|
|
|
@ -17,6 +17,7 @@ import ( |
|
|
|
"os/exec" |
|
|
|
"path/filepath" |
|
|
|
"runtime" |
|
|
|
"slices" |
|
|
|
"strconv" |
|
|
|
"strings" |
|
|
|
"sync" |
|
|
|
@ -30,9 +31,37 @@ import ( |
|
|
|
"github.com/ollama/ollama/format" |
|
|
|
"github.com/ollama/ollama/fs/ggml" |
|
|
|
"github.com/ollama/ollama/llama" |
|
|
|
"github.com/ollama/ollama/logutil" |
|
|
|
"github.com/ollama/ollama/model" |
|
|
|
) |
|
|
|
|
|
|
|
type filteredEnv []string |
|
|
|
|
|
|
|
func (e filteredEnv) LogValue() slog.Value { |
|
|
|
var attrs []slog.Attr |
|
|
|
for _, env := range e { |
|
|
|
if key, value, ok := strings.Cut(env, "="); ok { |
|
|
|
switch { |
|
|
|
case strings.HasPrefix(key, "OLLAMA_"), |
|
|
|
strings.HasPrefix(key, "CUDA_"), |
|
|
|
strings.HasPrefix(key, "ROCR_"), |
|
|
|
strings.HasPrefix(key, "ROCM_"), |
|
|
|
strings.HasPrefix(key, "HIP_"), |
|
|
|
strings.HasPrefix(key, "GPU_"), |
|
|
|
strings.HasPrefix(key, "HSA_"), |
|
|
|
strings.HasPrefix(key, "GGML_"), |
|
|
|
slices.Contains([]string{ |
|
|
|
"PATH", |
|
|
|
"LD_LIBRARY_PATH", |
|
|
|
"DYLD_LIBRARY_PATH", |
|
|
|
}, key): |
|
|
|
attrs = append(attrs, slog.String(key, value)) |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
return slog.GroupValue(attrs...) |
|
|
|
} |
|
|
|
|
|
|
|
type LlamaServer interface { |
|
|
|
Ping(ctx context.Context) error |
|
|
|
WaitUntilRunning(ctx context.Context) error |
|
|
|
@ -148,10 +177,6 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a |
|
|
|
params = append(params, "--n-gpu-layers", strconv.Itoa(opts.NumGPU)) |
|
|
|
} |
|
|
|
|
|
|
|
if envconfig.Debug() { |
|
|
|
params = append(params, "--verbose") |
|
|
|
} |
|
|
|
|
|
|
|
if opts.MainGPU > 0 { |
|
|
|
params = append(params, "--main-gpu", strconv.Itoa(opts.MainGPU)) |
|
|
|
} |
|
|
|
@ -404,26 +429,7 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a |
|
|
|
} |
|
|
|
|
|
|
|
slog.Info("starting llama server", "cmd", s.cmd) |
|
|
|
if envconfig.Debug() { |
|
|
|
filteredEnv := []string{} |
|
|
|
for _, ev := range s.cmd.Env { |
|
|
|
if strings.HasPrefix(ev, "OLLAMA_") || |
|
|
|
strings.HasPrefix(ev, "CUDA_") || |
|
|
|
strings.HasPrefix(ev, "ROCR_") || |
|
|
|
strings.HasPrefix(ev, "ROCM_") || |
|
|
|
strings.HasPrefix(ev, "HIP_") || |
|
|
|
strings.HasPrefix(ev, "GPU_") || |
|
|
|
strings.HasPrefix(ev, "HSA_") || |
|
|
|
strings.HasPrefix(ev, "GGML_") || |
|
|
|
strings.HasPrefix(ev, "PATH=") || |
|
|
|
strings.HasPrefix(ev, "LD_LIBRARY_PATH=") || |
|
|
|
strings.HasPrefix(ev, "DYLD_LIBRARY_PATH=") { |
|
|
|
filteredEnv = append(filteredEnv, ev) |
|
|
|
} |
|
|
|
} |
|
|
|
// Log at debug as the environment is inherited and might contain sensitive information
|
|
|
|
slog.Debug("subprocess", "environment", filteredEnv) |
|
|
|
} |
|
|
|
slog.Debug("subprocess", "", filteredEnv(s.cmd.Env)) |
|
|
|
|
|
|
|
if err = s.cmd.Start(); err != nil { |
|
|
|
var msg string |
|
|
|
@ -721,6 +727,9 @@ type CompletionResponse struct { |
|
|
|
} |
|
|
|
|
|
|
|
func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn func(CompletionResponse)) error { |
|
|
|
slog.Debug("completion request", "images", len(req.Images), "prompt", len(req.Prompt), "format", string(req.Format)) |
|
|
|
slog.Log(ctx, logutil.LevelTrace, "completion request", "prompt", req.Prompt) |
|
|
|
|
|
|
|
if len(req.Format) > 0 { |
|
|
|
switch string(req.Format) { |
|
|
|
case `null`, `""`: |
|
|
|
@ -884,6 +893,8 @@ type EmbeddingResponse struct { |
|
|
|
} |
|
|
|
|
|
|
|
func (s *llmServer) Embedding(ctx context.Context, input string) ([]float32, error) { |
|
|
|
slog.Log(ctx, logutil.LevelTrace, "embedding request", "input", input) |
|
|
|
|
|
|
|
if err := s.sem.Acquire(ctx, 1); err != nil { |
|
|
|
if errors.Is(err, context.Canceled) { |
|
|
|
slog.Info("aborting embedding request due to client closing the connection") |
|
|
|
|