mirror of
https://github.com/jmorganca/ollama
synced 2025-10-05 16:22:53 +02:00
Workaround broken NVIDIA iGPU free VRAM data (#12490)
The CUDA APIs for reporting free VRAM are useless on NVIDIA iGPU systems as they only return the kernels actual free memory and ignore buff/cache allocations which on a typical system will quickly fill up most of the free system memory. As a result, we incorrectly think there's very little available for GPU allocations which is wrong.
This commit is contained in:
@@ -330,6 +330,9 @@ func GPUDevices(ctx context.Context, runners []FilteredRunnerDiscovery) []ml.Dev
|
||||
}
|
||||
}
|
||||
|
||||
// Apply any iGPU workarounds
|
||||
iGPUWorkarounds(devices)
|
||||
|
||||
return devices
|
||||
}
|
||||
|
||||
@@ -540,3 +543,32 @@ func GetDevicesFromRunner(ctx context.Context, runner BaseRunner) ([]ml.DeviceIn
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func iGPUWorkarounds(devices []ml.DeviceInfo) {
|
||||
// short circuit if we have no iGPUs
|
||||
anyiGPU := false
|
||||
for i := range devices {
|
||||
if devices[i].Integrated {
|
||||
anyiGPU = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !anyiGPU {
|
||||
return
|
||||
}
|
||||
|
||||
memInfo, err := GetCPUMem()
|
||||
if err != nil {
|
||||
slog.Debug("failed to fetch system memory information for iGPU", "error", err)
|
||||
return
|
||||
}
|
||||
for i := range devices {
|
||||
if !devices[i].Integrated {
|
||||
continue
|
||||
}
|
||||
// NVIDIA iGPUs return useless free VRAM data which ignores system buff/cache
|
||||
if devices[i].Library == "CUDA" {
|
||||
devices[i].FreeMemory = memInfo.FreeMemory
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user