From 948114e3e3325af01c31697dee861459624ca805 Mon Sep 17 00:00:00 2001 From: Mark Ward Date: Sun, 28 Apr 2024 16:41:38 +0000 Subject: [PATCH] fix sched to wait for the runner to terminate to ensure following vram check will be more accurate --- llm/server.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/llm/server.go b/llm/server.go index 231100a0..c82c5099 100644 --- a/llm/server.go +++ b/llm/server.go @@ -899,7 +899,10 @@ func (s *llmServer) Detokenize(ctx context.Context, tokens []int) (string, error func (s *llmServer) Close() error { if s.cmd != nil { slog.Debug("stopping llama server") - return s.cmd.Process.Kill() + if err := s.cmd.Process.Kill(); err != nil { + return err + } + return s.cmd.Wait() } return nil