Browse Source
Merge pull request #4682 from dhiltgen/more_time
Give the final model loading more time
language_support
Daniel Hiltgen
2 years ago
committed by
GitHub
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with
8 additions and
2 deletions
-
llm/server.go
|
|
|
@ -519,11 +519,13 @@ func (s *llmServer) Ping(ctx context.Context) error { |
|
|
|
|
|
|
|
func (s *llmServer) WaitUntilRunning(ctx context.Context) error { |
|
|
|
start := time.Now() |
|
|
|
stallDuration := 60 * time.Second |
|
|
|
stallTimer := time.Now().Add(stallDuration) // give up if we stall for
|
|
|
|
stallDuration := 5 * time.Minute // If no progress happens
|
|
|
|
finalLoadDuration := 5 * time.Minute // After we hit 100%, give the runner more time to come online
|
|
|
|
stallTimer := time.Now().Add(stallDuration) // give up if we stall
|
|
|
|
|
|
|
|
slog.Info("waiting for llama runner to start responding") |
|
|
|
var lastStatus ServerStatus = -1 |
|
|
|
fullyLoaded := false |
|
|
|
|
|
|
|
for { |
|
|
|
select { |
|
|
|
@ -572,6 +574,10 @@ func (s *llmServer) WaitUntilRunning(ctx context.Context) error { |
|
|
|
if priorProgress != s.loadProgress { |
|
|
|
slog.Debug(fmt.Sprintf("model load progress %0.2f", s.loadProgress)) |
|
|
|
stallTimer = time.Now().Add(stallDuration) |
|
|
|
} else if !fullyLoaded && int(s.loadProgress*100.0) >= 100 { |
|
|
|
slog.Debug("model load completed, waiting for server to become available", "status", status.ToString()) |
|
|
|
stallTimer = time.Now().Add(finalLoadDuration) |
|
|
|
fullyLoaded = true |
|
|
|
} |
|
|
|
time.Sleep(time.Millisecond * 250) |
|
|
|
continue |
|
|
|
|