|
|
|
@ -26,7 +26,7 @@ func TestMain(m *testing.M) { |
|
|
|
} |
|
|
|
|
|
|
|
func TestInitScheduler(t *testing.T) { |
|
|
|
ctx, done := context.WithCancel(context.Background()) |
|
|
|
ctx, done := context.WithCancel(t.Context()) |
|
|
|
defer done() |
|
|
|
s := InitScheduler(ctx) |
|
|
|
s.loadedMu.Lock() |
|
|
|
@ -35,7 +35,7 @@ func TestInitScheduler(t *testing.T) { |
|
|
|
} |
|
|
|
|
|
|
|
func TestLoad(t *testing.T) { |
|
|
|
ctx, done := context.WithTimeout(context.Background(), 20*time.Millisecond) |
|
|
|
ctx, done := context.WithTimeout(t.Context(), 20*time.Millisecond) |
|
|
|
defer done() |
|
|
|
s := InitScheduler(ctx) |
|
|
|
var f *ggml.GGML // value not used in tests
|
|
|
|
@ -167,7 +167,7 @@ func getCpuFn() discover.GpuInfoList { |
|
|
|
} |
|
|
|
|
|
|
|
func TestRequestsSameModelSameRequest(t *testing.T) { |
|
|
|
ctx, done := context.WithTimeout(context.Background(), 500*time.Millisecond) |
|
|
|
ctx, done := context.WithTimeout(t.Context(), 500*time.Millisecond) |
|
|
|
defer done() |
|
|
|
s := InitScheduler(ctx) |
|
|
|
s.getGpuFn = getGpuFn |
|
|
|
@ -210,7 +210,7 @@ func TestRequestsSameModelSameRequest(t *testing.T) { |
|
|
|
} |
|
|
|
|
|
|
|
func TestRequestsSimpleReloadSameModel(t *testing.T) { |
|
|
|
ctx, done := context.WithTimeout(context.Background(), 500*time.Millisecond) |
|
|
|
ctx, done := context.WithTimeout(t.Context(), 500*time.Millisecond) |
|
|
|
defer done() |
|
|
|
s := InitScheduler(ctx) |
|
|
|
s.getGpuFn = getGpuFn |
|
|
|
@ -258,7 +258,7 @@ func TestRequestsSimpleReloadSameModel(t *testing.T) { |
|
|
|
} |
|
|
|
|
|
|
|
func TestRequestsMultipleLoadedModels(t *testing.T) { |
|
|
|
ctx, done := context.WithTimeout(context.Background(), 500*time.Millisecond) |
|
|
|
ctx, done := context.WithTimeout(t.Context(), 500*time.Millisecond) |
|
|
|
defer done() |
|
|
|
s := InitScheduler(ctx) |
|
|
|
s.getGpuFn = getGpuFn |
|
|
|
@ -355,7 +355,7 @@ func TestRequestsMultipleLoadedModels(t *testing.T) { |
|
|
|
} |
|
|
|
|
|
|
|
func TestGetRunner(t *testing.T) { |
|
|
|
ctx, done := context.WithTimeout(context.Background(), 3*time.Second) |
|
|
|
ctx, done := context.WithTimeout(t.Context(), 3*time.Second) |
|
|
|
defer done() |
|
|
|
|
|
|
|
a := newScenarioRequest(t, ctx, "ollama-model-1a", 10, &api.Duration{Duration: 2 * time.Millisecond}) |
|
|
|
@ -408,7 +408,7 @@ func TestGetRunner(t *testing.T) { |
|
|
|
} |
|
|
|
|
|
|
|
func TestExpireRunner(t *testing.T) { |
|
|
|
ctx, done := context.WithTimeout(context.Background(), 20*time.Millisecond) |
|
|
|
ctx, done := context.WithTimeout(t.Context(), 20*time.Millisecond) |
|
|
|
defer done() |
|
|
|
s := InitScheduler(ctx) |
|
|
|
req := &LlmRequest{ |
|
|
|
@ -455,7 +455,7 @@ func TestExpireRunner(t *testing.T) { |
|
|
|
|
|
|
|
// TODO - add one scenario that triggers the bogus finished event with positive ref count
|
|
|
|
func TestPrematureExpired(t *testing.T) { |
|
|
|
ctx, done := context.WithTimeout(context.Background(), 500*time.Millisecond) |
|
|
|
ctx, done := context.WithTimeout(t.Context(), 500*time.Millisecond) |
|
|
|
defer done() |
|
|
|
|
|
|
|
// Same model, same request
|
|
|
|
@ -502,7 +502,7 @@ func TestPrematureExpired(t *testing.T) { |
|
|
|
} |
|
|
|
|
|
|
|
func TestUseLoadedRunner(t *testing.T) { |
|
|
|
ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond) |
|
|
|
ctx, done := context.WithTimeout(t.Context(), 100*time.Millisecond) |
|
|
|
req := &LlmRequest{ |
|
|
|
ctx: ctx, |
|
|
|
opts: api.DefaultOptions(), |
|
|
|
@ -529,7 +529,7 @@ func TestUseLoadedRunner(t *testing.T) { |
|
|
|
} |
|
|
|
|
|
|
|
func TestUpdateFreeSpace(t *testing.T) { |
|
|
|
ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond) |
|
|
|
ctx, done := context.WithTimeout(t.Context(), 100*time.Millisecond) |
|
|
|
defer done() |
|
|
|
gpus := discover.GpuInfoList{ |
|
|
|
{ |
|
|
|
@ -562,7 +562,7 @@ func TestUpdateFreeSpace(t *testing.T) { |
|
|
|
} |
|
|
|
|
|
|
|
func TestFilterGPUsWithoutLoadingModels(t *testing.T) { |
|
|
|
ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond) |
|
|
|
ctx, done := context.WithTimeout(t.Context(), 100*time.Millisecond) |
|
|
|
defer done() |
|
|
|
gpus := discover.GpuInfoList{ |
|
|
|
{ |
|
|
|
@ -596,7 +596,7 @@ func TestFilterGPUsWithoutLoadingModels(t *testing.T) { |
|
|
|
} |
|
|
|
|
|
|
|
func TestFindRunnerToUnload(t *testing.T) { |
|
|
|
ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond) |
|
|
|
ctx, done := context.WithTimeout(t.Context(), 100*time.Millisecond) |
|
|
|
defer done() |
|
|
|
|
|
|
|
r1 := &runnerRef{refCount: 1, sessionDuration: 1, numParallel: 1} |
|
|
|
@ -616,7 +616,7 @@ func TestFindRunnerToUnload(t *testing.T) { |
|
|
|
} |
|
|
|
|
|
|
|
func TestNeedsReload(t *testing.T) { |
|
|
|
ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond) |
|
|
|
ctx, done := context.WithTimeout(t.Context(), 100*time.Millisecond) |
|
|
|
defer done() |
|
|
|
|
|
|
|
llm := &mockLlm{estimatedVRAMByGPU: map[string]uint64{}} |
|
|
|
@ -663,7 +663,7 @@ func TestNeedsReload(t *testing.T) { |
|
|
|
} |
|
|
|
|
|
|
|
func TestUnloadAllRunners(t *testing.T) { |
|
|
|
ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond) |
|
|
|
ctx, done := context.WithTimeout(t.Context(), 100*time.Millisecond) |
|
|
|
defer done() |
|
|
|
|
|
|
|
llm1 := &mockLlm{estimatedVRAMByGPU: map[string]uint64{}} |
|
|
|
@ -695,7 +695,7 @@ func TestUnload(t *testing.T) { |
|
|
|
} |
|
|
|
|
|
|
|
func TestAlreadyCanceled(t *testing.T) { |
|
|
|
ctx, done := context.WithTimeout(context.Background(), 500*time.Millisecond) |
|
|
|
ctx, done := context.WithTimeout(t.Context(), 500*time.Millisecond) |
|
|
|
defer done() |
|
|
|
dctx, done2 := context.WithCancel(ctx) |
|
|
|
done2() |
|
|
|
@ -712,7 +712,7 @@ func TestAlreadyCanceled(t *testing.T) { |
|
|
|
} |
|
|
|
|
|
|
|
func TestHomogeneousGPUs(t *testing.T) { |
|
|
|
ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond) |
|
|
|
ctx, done := context.WithTimeout(t.Context(), 100*time.Millisecond) |
|
|
|
defer done() |
|
|
|
s := InitScheduler(ctx) |
|
|
|
|
|
|
|
|