discover: CPU supports flash attention

We already run flash attention on CPUs in cases where we have partial offloading but were disabling it if running on pure CPU, which is unnecessary.
9 months ago · 8f4ec9ab28
1 changed files with 2 additions and 1 deletions
--- a/discover/types.go
+++ b/discover/types.go
@ -171,7 +171,8 @@ func (si SystemInfo) GetOptimalThreadCount() int {
 // For each GPU, check if it does NOT support flash attention
 func (l GpuInfoList) FlashAttentionSupported() bool {
 	for _, gpu := range l {
-		supportsFA := gpu.Library == "metal" ||
+		supportsFA := gpu.Library == "cpu" ||
+			gpu.Library == "metal" ||
 			(gpu.Library == "cuda" && gpu.DriverMajor >= 7) ||
 			gpu.Library == "rocm"