From e4340667e33e0efa5dee471917d71ad6011e59ba Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Fri, 3 Oct 2025 12:17:21 -0700 Subject: [PATCH] Workaround broken NVIDIA iGPU free VRAM data (#12490) The CUDA APIs for reporting free VRAM are useless on NVIDIA iGPU systems as they only return the kernels actual free memory and ignore buff/cache allocations which on a typical system will quickly fill up most of the free system memory. As a result, we incorrectly think there's very little available for GPU allocations which is wrong. --- discover/runner.go | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/discover/runner.go b/discover/runner.go index 8071111fa..f69680715 100644 --- a/discover/runner.go +++ b/discover/runner.go @@ -330,6 +330,9 @@ func GPUDevices(ctx context.Context, runners []FilteredRunnerDiscovery) []ml.Dev } } + // Apply any iGPU workarounds + iGPUWorkarounds(devices) + return devices } @@ -540,3 +543,32 @@ func GetDevicesFromRunner(ctx context.Context, runner BaseRunner) ([]ml.DeviceIn } } } + +func iGPUWorkarounds(devices []ml.DeviceInfo) { + // short circuit if we have no iGPUs + anyiGPU := false + for i := range devices { + if devices[i].Integrated { + anyiGPU = true + break + } + } + if !anyiGPU { + return + } + + memInfo, err := GetCPUMem() + if err != nil { + slog.Debug("failed to fetch system memory information for iGPU", "error", err) + return + } + for i := range devices { + if !devices[i].Integrated { + continue + } + // NVIDIA iGPUs return useless free VRAM data which ignores system buff/cache + if devices[i].Library == "CUDA" { + devices[i].FreeMemory = memInfo.FreeMemory + } + } +}