Browse Source

ggml: Use assigned layers when reporting loading stats

Reporting params.NumGPULayers can be misleading because it is the
requested number of layers, not the actual number that is loaded.
While they are often the same, there are cases where they might mismatch,
such as if the GPU backend is missing.
mxyng/quant
Jesse Gross 9 months ago
committed by Jesse Gross
parent
commit
acef9b4c1b
  1. 20
      ml/backend/ggml/ggml.go

20
ml/backend/ggml/ggml.go

@ -356,23 +356,25 @@ func New(modelPath string, params ml.BackendParams) (ml.Backend, error) {
}
// Mimic llama runner logs summarizing layers and memory
slog.Info(fmt.Sprintf("offloading %d repeating layers to GPU", max(0, params.NumGPULayers-1)))
gpuLayers := 0
for _, layer := range layers {
if C.ggml_backend_dev_type(layer.d) == C.GGML_BACKEND_DEVICE_TYPE_GPU {
gpuLayers++
}
}
slog.Info(fmt.Sprintf("offloading %d repeating layers to GPU", gpuLayers))
switch C.ggml_backend_dev_type(output.d) {
case 0: // CPU
case C.GGML_BACKEND_DEVICE_TYPE_CPU:
slog.Info("offloading output layer to CPU")
case 1: // GPU
case C.GGML_BACKEND_DEVICE_TYPE_GPU:
slog.Info("offloading output layer to GPU")
gpuLayers++
case 2: // ACCEL
case C.GGML_BACKEND_DEVICE_TYPE_ACCEL:
slog.Info("offloading output layer to ACCEL")
}
for _, layer := range layers {
if C.ggml_backend_dev_type(layer.d) == 1 {
gpuLayers++
}
}
slog.Info(fmt.Sprintf("offloaded %d/%d layers to GPU", gpuLayers, len(layers)+1))
for bs := range maps.Values(bbs) {
slog.Info("model weights", "buffer", C.GoString(C.ggml_backend_buffer_name(bs)), "size", format.HumanBytes2(uint64(C.ggml_backend_buffer_get_size(bs))))
}

Loading…
Cancel
Save