|
|
|
@ -667,13 +667,19 @@ func (runner *runnerRef) waitForVRAMRecovery() chan any { |
|
|
|
return finished |
|
|
|
} |
|
|
|
|
|
|
|
type ByDuration []*runnerRef |
|
|
|
|
|
|
|
func (a ByDuration) Len() int { return len(a) } |
|
|
|
func (a ByDuration) Swap(i, j int) { a[i], a[j] = a[j], a[i] } |
|
|
|
func (a ByDuration) Less(i, j int) bool { |
|
|
|
// uint64 to turn negative time (never unload) to largest
|
|
|
|
return uint64(a[i].sessionDuration) < uint64(a[j].sessionDuration) |
|
|
|
type ByDurationAndName []*runnerRef |
|
|
|
|
|
|
|
func (a ByDurationAndName) Len() int { return len(a) } |
|
|
|
func (a ByDurationAndName) Swap(i, j int) { a[i], a[j] = a[j], a[i] } |
|
|
|
func (a ByDurationAndName) Less(i, j int) bool { |
|
|
|
// Primary sort by session duration (uint64 to handle negatives)
|
|
|
|
d1 := uint64(a[i].sessionDuration) |
|
|
|
d2 := uint64(a[j].sessionDuration) |
|
|
|
if d1 != d2 { |
|
|
|
return d1 < d2 |
|
|
|
} |
|
|
|
// Secondary sort by model path lex order
|
|
|
|
return a[i].modelPath < a[j].modelPath |
|
|
|
} |
|
|
|
|
|
|
|
// TODO - future consideration to pick runners based on size
|
|
|
|
@ -775,7 +781,7 @@ func (s *Scheduler) findRunnerToUnload() *runnerRef { |
|
|
|
|
|
|
|
// In the future we can enhance the algorithm to be smarter about picking the optimal runner to unload
|
|
|
|
// e.g., if we have multiple options, will one make room for the request?
|
|
|
|
sort.Sort(ByDuration(runnerList)) |
|
|
|
sort.Sort(ByDurationAndName(runnerList)) |
|
|
|
|
|
|
|
// First try to find a runner that's already idle
|
|
|
|
for _, runner := range runnerList { |
|
|
|
|