Workaround broken NVIDIA iGPU free VRAM data (#12490)

The CUDA APIs for reporting free VRAM are useless on NVIDIA iGPU systems as they only return the kernels actual free memory and ignore buff/cache allocations which on a typical system will quickly fill up most of the free system memory. As a result, we incorrectly think there's very little available for GPU allocations which is wrong.
2025-10-05 16:22:53 +02:00 · 2025-10-03 12:17:21 -07:00
parent 2fa1e92a99
commit e4340667e3
1 changed files with 32 additions and 0 deletions
--- a/discover/runner.go
+++ b/discover/runner.go
@@ -330,6 +330,9 @@ func GPUDevices(ctx context.Context, runners []FilteredRunnerDiscovery) []ml.Dev
 		}
 	}

+	// Apply any iGPU workarounds
+	iGPUWorkarounds(devices)
+
 	return devices
 }

@@ -540,3 +543,32 @@ func GetDevicesFromRunner(ctx context.Context, runner BaseRunner) ([]ml.DeviceIn
 		}
 	}
 }
+
+func iGPUWorkarounds(devices []ml.DeviceInfo) {
+	// short circuit if we have no iGPUs
+	anyiGPU := false
+	for i := range devices {
+		if devices[i].Integrated {
+			anyiGPU = true
+			break
+		}
+	}
+	if !anyiGPU {
+		return
+	}
+
+	memInfo, err := GetCPUMem()
+	if err != nil {
+		slog.Debug("failed to fetch system memory information for iGPU", "error", err)
+		return
+	}
+	for i := range devices {
+		if !devices[i].Integrated {
+			continue
+		}
+		// NVIDIA iGPUs return useless free VRAM data which ignores system buff/cache
+		if devices[i].Library == "CUDA" {
+			devices[i].FreeMemory = memInfo.FreeMemory
+		}
+	}
+}