Jeffrey Morgan
9 months ago
committed by
GitHub
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with
6 additions and
1 deletions
-
fs/ggml/ggml.go
|
|
|
@ -555,7 +555,7 @@ func (f GGML) GraphSize(context, batch uint64, numParallel int, kvCacheType stri |
|
|
|
// vocab graph
|
|
|
|
4*batch*(embedding+vocab)+embedding*vocab*105/128, |
|
|
|
) |
|
|
|
case "gemma", "gemma2", "gemma3": |
|
|
|
case "gemma", "gemma2", "gemma3", "gemma3n": |
|
|
|
fullOffload = max( |
|
|
|
4*batch*(embedding+vocab), |
|
|
|
4*batch*(2+context+context*heads+2*embedding+2*embeddingHeadsK*heads), |
|
|
|
@ -568,6 +568,11 @@ func (f GGML) GraphSize(context, batch uint64, numParallel int, kvCacheType stri |
|
|
|
embedding*embeddingHeadsK*heads*9/16, |
|
|
|
) |
|
|
|
|
|
|
|
if f.KV().Architecture() == "gemma3n" { |
|
|
|
fullOffload *= 4 |
|
|
|
partialOffload *= 4 |
|
|
|
} |
|
|
|
|
|
|
|
// Gemma2 also has sliding window attention but we only have an optimized implementation in the Ollama
|
|
|
|
// engine. Gemma3 always uses the Ollama engine.
|
|
|
|
if f.KV().Architecture() == "gemma3" { |
|
|
|
|