mirror of
https://github.com/jmorganca/ollama
synced 2025-10-06 00:32:49 +02:00
This revamps how we discover GPUs in the system by leveraging the Ollama runner. This should eliminate inconsistency between our GPU discovery and the runners capabilities at runtime, particularly for cases where we try to filter out unsupported GPUs. Now the runner does that implicitly based on the actual device list. In some cases free VRAM reporting can be unreliable which can leaad to scheduling mistakes, so this also includes a patch to leverage more reliable VRAM reporting libraries if available. Automatic workarounds have been removed as only one GPU leveraged this, which is now documented. This GPU will soon fall off the support matrix with the next ROCm bump. Additional cleanup of the scheduler and discovery packages can be done in the future once we have switched on the new memory management code, and removed support for the llama runner.
213 lines
5.9 KiB
Go
213 lines
5.9 KiB
Go
package discover
|
|
|
|
import (
|
|
"fmt"
|
|
"log/slog"
|
|
"syscall"
|
|
"unsafe"
|
|
)
|
|
|
|
type MEMORYSTATUSEX struct {
|
|
length uint32
|
|
MemoryLoad uint32
|
|
TotalPhys uint64
|
|
AvailPhys uint64
|
|
TotalPageFile uint64
|
|
AvailPageFile uint64
|
|
TotalVirtual uint64
|
|
AvailVirtual uint64
|
|
AvailExtendedVirtual uint64
|
|
}
|
|
|
|
var (
|
|
k32 = syscall.NewLazyDLL("kernel32.dll")
|
|
globalMemoryStatusExProc = k32.NewProc("GlobalMemoryStatusEx")
|
|
sizeofMemoryStatusEx = uint32(unsafe.Sizeof(MEMORYSTATUSEX{}))
|
|
GetLogicalProcessorInformationEx = k32.NewProc("GetLogicalProcessorInformationEx")
|
|
)
|
|
|
|
func GetCPUMem() (memInfo, error) {
|
|
memStatus := MEMORYSTATUSEX{length: sizeofMemoryStatusEx}
|
|
r1, _, err := globalMemoryStatusExProc.Call(uintptr(unsafe.Pointer(&memStatus)))
|
|
if r1 == 0 {
|
|
return memInfo{}, fmt.Errorf("GlobalMemoryStatusEx failed: %w", err)
|
|
}
|
|
return memInfo{TotalMemory: memStatus.TotalPhys, FreeMemory: memStatus.AvailPhys, FreeSwap: memStatus.AvailPageFile}, nil
|
|
}
|
|
|
|
type LOGICAL_PROCESSOR_RELATIONSHIP uint32
|
|
|
|
const (
|
|
RelationProcessorCore LOGICAL_PROCESSOR_RELATIONSHIP = iota
|
|
RelationNumaNode
|
|
RelationCache
|
|
RelationProcessorPackage
|
|
RelationGroup
|
|
RelationProcessorDie
|
|
RelationNumaNodeEx
|
|
RelationProcessorModule
|
|
)
|
|
const RelationAll LOGICAL_PROCESSOR_RELATIONSHIP = 0xffff
|
|
|
|
type GROUP_AFFINITY struct {
|
|
Mask uintptr // KAFFINITY
|
|
Group uint16
|
|
Reserved [3]uint16
|
|
}
|
|
|
|
type PROCESSOR_RELATIONSHIP struct {
|
|
Flags byte
|
|
EfficiencyClass byte
|
|
Reserved [20]byte
|
|
GroupCount uint16
|
|
GroupMask [1]GROUP_AFFINITY // len GroupCount
|
|
}
|
|
|
|
// Omitted unused structs: NUMA_NODE_RELATIONSHIP CACHE_RELATIONSHIP GROUP_RELATIONSHIP
|
|
|
|
type SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX struct {
|
|
Relationship LOGICAL_PROCESSOR_RELATIONSHIP
|
|
Size uint32
|
|
U [1]byte // Union len Size
|
|
// PROCESSOR_RELATIONSHIP
|
|
// NUMA_NODE_RELATIONSHIP
|
|
// CACHE_RELATIONSHIP
|
|
// GROUP_RELATIONSHIP
|
|
}
|
|
|
|
func (group *GROUP_AFFINITY) IsMember(target *GROUP_AFFINITY) bool {
|
|
if group == nil || target == nil {
|
|
return false
|
|
}
|
|
return group.Mask&target.Mask != 0
|
|
}
|
|
|
|
type winPackage struct {
|
|
groups []*GROUP_AFFINITY
|
|
coreCount int // performance cores = coreCount - efficiencyCoreCount
|
|
efficiencyCoreCount int
|
|
threadCount int
|
|
}
|
|
|
|
func (pkg *winPackage) IsMember(target *GROUP_AFFINITY) bool {
|
|
for _, group := range pkg.groups {
|
|
if group.IsMember(target) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func getLogicalProcessorInformationEx() ([]byte, error) {
|
|
buf := make([]byte, 1024)
|
|
bufSize := len(buf)
|
|
var err error
|
|
for range 3 {
|
|
var ret uintptr
|
|
ret, _, err = GetLogicalProcessorInformationEx.Call(
|
|
uintptr(RelationAll),
|
|
uintptr(unsafe.Pointer(&buf[0])),
|
|
uintptr(unsafe.Pointer(&bufSize)),
|
|
)
|
|
if ret == 1 && bufSize <= len(buf) {
|
|
return buf, nil
|
|
}
|
|
buf = make([]byte, bufSize)
|
|
}
|
|
return nil, fmt.Errorf("unable to determine CPU details: %w", err)
|
|
}
|
|
|
|
func processSystemLogicalProcessorInforationList(buf []byte) []*winPackage {
|
|
var slpi *SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX
|
|
// Find all the packages first
|
|
packages := []*winPackage{}
|
|
for bufOffset := 0; bufOffset < len(buf); bufOffset += int(slpi.Size) {
|
|
slpi = (*SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)(unsafe.Pointer(&buf[bufOffset]))
|
|
if slpi.Relationship != RelationProcessorPackage {
|
|
continue
|
|
}
|
|
pr := (*PROCESSOR_RELATIONSHIP)(unsafe.Pointer(&slpi.U[0]))
|
|
pkg := &winPackage{}
|
|
ga0 := unsafe.Pointer(&pr.GroupMask[0])
|
|
for j := range pr.GroupCount {
|
|
gm := (*GROUP_AFFINITY)(unsafe.Pointer(uintptr(ga0) + uintptr(j)*unsafe.Sizeof(GROUP_AFFINITY{})))
|
|
pkg.groups = append(pkg.groups, gm)
|
|
}
|
|
packages = append(packages, pkg)
|
|
}
|
|
|
|
slog.Info("packages", "count", len(packages))
|
|
|
|
// To identify efficiency cores we have to compare the relative values
|
|
// Larger values are "less efficient" (aka, more performant)
|
|
var maxEfficiencyClass byte
|
|
for bufOffset := 0; bufOffset < len(buf); bufOffset += int(slpi.Size) {
|
|
slpi = (*SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)(unsafe.Pointer(&buf[bufOffset]))
|
|
if slpi.Relationship != RelationProcessorCore {
|
|
continue
|
|
}
|
|
pr := (*PROCESSOR_RELATIONSHIP)(unsafe.Pointer(&slpi.U[0]))
|
|
if pr.EfficiencyClass > maxEfficiencyClass {
|
|
maxEfficiencyClass = pr.EfficiencyClass
|
|
}
|
|
}
|
|
if maxEfficiencyClass > 0 {
|
|
slog.Info("efficiency cores detected", "maxEfficiencyClass", maxEfficiencyClass)
|
|
}
|
|
|
|
// then match up the Cores to the Packages, count up cores, threads and efficiency cores
|
|
for bufOffset := 0; bufOffset < len(buf); bufOffset += int(slpi.Size) {
|
|
slpi = (*SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)(unsafe.Pointer(&buf[bufOffset]))
|
|
if slpi.Relationship != RelationProcessorCore {
|
|
continue
|
|
}
|
|
pr := (*PROCESSOR_RELATIONSHIP)(unsafe.Pointer(&slpi.U[0]))
|
|
ga0 := unsafe.Pointer(&pr.GroupMask[0])
|
|
for j := range pr.GroupCount {
|
|
gm := (*GROUP_AFFINITY)(unsafe.Pointer(uintptr(ga0) + uintptr(j)*unsafe.Sizeof(GROUP_AFFINITY{})))
|
|
for _, pkg := range packages {
|
|
if pkg.IsMember(gm) {
|
|
pkg.coreCount++
|
|
if pr.Flags == 0 {
|
|
pkg.threadCount++
|
|
} else {
|
|
pkg.threadCount += 2
|
|
}
|
|
if pr.EfficiencyClass < maxEfficiencyClass {
|
|
pkg.efficiencyCoreCount++
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Summarize the results
|
|
for i, pkg := range packages {
|
|
slog.Info("", "package", i, "cores", pkg.coreCount, "efficiency", pkg.efficiencyCoreCount, "threads", pkg.threadCount)
|
|
}
|
|
|
|
return packages
|
|
}
|
|
|
|
func GetCPUDetails() []CPU {
|
|
buf, err := getLogicalProcessorInformationEx()
|
|
if err != nil {
|
|
slog.Warn("failed to get CPU details", "error", err)
|
|
return nil
|
|
}
|
|
packages := processSystemLogicalProcessorInforationList(buf)
|
|
cpus := make([]CPU, len(packages))
|
|
|
|
for i, pkg := range packages {
|
|
cpus[i].CoreCount = pkg.coreCount
|
|
cpus[i].EfficiencyCoreCount = pkg.efficiencyCoreCount
|
|
cpus[i].ThreadCount = pkg.threadCount
|
|
}
|
|
return cpus
|
|
}
|
|
|
|
func IsNUMA() bool {
|
|
// numa support in ggml is linux only
|
|
return false
|
|
}
|