From 948114e3e3325af01c31697dee861459624ca805 Mon Sep 17 00:00:00 2001 From: Mark Ward Date: Sun, 28 Apr 2024 16:41:38 +0000 Subject: [PATCH 1/6] fix sched to wait for the runner to terminate to ensure following vram check will be more accurate --- llm/server.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/llm/server.go b/llm/server.go index 231100a0..c82c5099 100644 --- a/llm/server.go +++ b/llm/server.go @@ -899,7 +899,10 @@ func (s *llmServer) Detokenize(ctx context.Context, tokens []int) (string, error func (s *llmServer) Close() error { if s.cmd != nil { slog.Debug("stopping llama server") - return s.cmd.Process.Kill() + if err := s.cmd.Process.Kill(); err != nil { + return err + } + return s.cmd.Wait() } return nil From f4a73d57a4e3182a4dd25dae93a779a13423dfc0 Mon Sep 17 00:00:00 2001 From: Mark Ward Date: Sun, 28 Apr 2024 16:43:40 +0000 Subject: [PATCH 2/6] fix runner expire during active use. Clearing the expire timer as it is used. Allowing the finish to assign an expire timer so that the runner will expire after no use. --- server/sched.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/server/sched.go b/server/sched.go index 0a6738a2..482bc39a 100644 --- a/server/sched.go +++ b/server/sched.go @@ -296,6 +296,10 @@ func (pending *LlmRequest) useLoadedRunner(runner *runnerRef, finished chan *Llm runner.refMu.Lock() defer runner.refMu.Unlock() runner.refCount++ + if runner.expireTimer != nil { + runner.expireTimer.Stop() + runner.expireTimer = nil + } runner.sessionDuration = pending.sessionDuration pending.successCh <- runner go func() { From 34a4a94f13d283ef794ba02f84ded96a794bb5d2 Mon Sep 17 00:00:00 2001 From: Mark Ward Date: Mon, 29 Apr 2024 15:05:54 +0000 Subject: [PATCH 3/6] ignore debug bin files --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index e0362a19..0d826ab6 100644 --- a/.gitignore +++ b/.gitignore @@ -11,4 +11,5 @@ ggml-metal.metal .idea test_data *.crt -llm/build \ No newline at end of file +llm/build +__debug_bin* \ No newline at end of file From 63c763685f1dc94f7efe4742b00b226be99505d0 Mon Sep 17 00:00:00 2001 From: Mark Ward Date: Mon, 29 Apr 2024 18:06:56 +0000 Subject: [PATCH 4/6] log when the waiting for the process to stop to help debug when other tasks execute during this wait. expire timer clear the timer reference because it will not be reused. close will clean up expireTimer if calling code has not already done this. --- llm/server.go | 7 ++++++- server/sched.go | 5 +++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/llm/server.go b/llm/server.go index c82c5099..c9636a82 100644 --- a/llm/server.go +++ b/llm/server.go @@ -902,7 +902,12 @@ func (s *llmServer) Close() error { if err := s.cmd.Process.Kill(); err != nil { return err } - return s.cmd.Wait() + + if err := s.cmd.Wait(); err != nil { + return err + } + + slog.Debug("llama server stopped") } return nil diff --git a/server/sched.go b/server/sched.go index 482bc39a..61c5e1b3 100644 --- a/server/sched.go +++ b/server/sched.go @@ -250,6 +250,7 @@ func (s *Scheduler) processCompleted(ctx context.Context) { defer runner.refMu.Unlock() if runner.expireTimer != nil { runner.expireTimer.Stop() + runner.expireTimer = nil } s.expiredCh <- runner }) @@ -430,6 +431,10 @@ type runnerRef struct { // The refMu must already be held when calling unload func (runner *runnerRef) unload() { + if runner.expireTimer != nil { + runner.expireTimer.Stop() + runner.expireTimer = nil + } if runner.llama != nil { runner.llama.Close() } From ba26c7aa00635c78c2028aba680556801c02fd64 Mon Sep 17 00:00:00 2001 From: Mark Ward Date: Mon, 29 Apr 2024 22:48:14 +0000 Subject: [PATCH 5/6] it will always return an error due to Kill() discarding Wait() errors --- llm/server.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/llm/server.go b/llm/server.go index c9636a82..c7a2609f 100644 --- a/llm/server.go +++ b/llm/server.go @@ -903,9 +903,7 @@ func (s *llmServer) Close() error { return err } - if err := s.cmd.Wait(); err != nil { - return err - } + _ = s.cmd.Wait() slog.Debug("llama server stopped") } From 321d57e1a06d8e95346dd911f7473e6cf382fee7 Mon Sep 17 00:00:00 2001 From: Mark Ward Date: Wed, 1 May 2024 18:49:28 +0000 Subject: [PATCH 6/6] Removing go routine calling .wait from load. --- llm/server.go | 6 ------ 1 file changed, 6 deletions(-) diff --git a/llm/server.go b/llm/server.go index c7a2609f..b41f393d 100644 --- a/llm/server.go +++ b/llm/server.go @@ -300,12 +300,6 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr continue } - // reap subprocess when it exits - go func() { - // Exit status managed via getServerStatus - _ = s.cmd.Wait() - }() - // TODO - make sure this is all wired up correctly // if err = s.WaitUntilRunning(); err != nil { // slog.Error("error starting llama server", "server", servers[i], "error", err)