Skip to content

Commit

Permalink
Merge pull request #670 from dougsland/gpudetector
Browse files Browse the repository at this point in the history
Add ramalama gpu_detector
  • Loading branch information
rhatdan authored Feb 1, 2025
2 parents 38974ff + 8d27050 commit 4b34290
Show file tree
Hide file tree
Showing 2 changed files with 286 additions and 0 deletions.
56 changes: 56 additions & 0 deletions ramalama/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import json
import os
import subprocess
import platform
import time
import ramalama.oci

Expand All @@ -21,6 +22,7 @@
from ramalama.shortnames import Shortnames
from ramalama.toml_parser import TOMLParser
from ramalama.version import version, print_version
from ramalama.gpu_detector import GPUDetector

shortnames = Shortnames()

Expand Down Expand Up @@ -239,6 +241,58 @@ def configure_subcommands(parser):
version_parser(subparsers)


def show_gpus_available_cli(args):
"""Detect and return available GPUs, with macOS support."""
gpu_detector = GPUDetector()
gpu_info = []
errors = []

system = platform.system()

if system == "Darwin": # macOS GPU detection
try:
macos_gpus = gpu_detector.get_macos_gpu()
if macos_gpus:
gpu_info.extend(macos_gpus)
else:
errors.append({"Vendor": "Apple", "INFO": "No GPU detected on macOS."})
except Exception as e:
errors.append({"Vendor": "Apple", "INFO": str(e)})

else: # Linux/Other OS GPU detection
try:
nvidia_gpus = gpu_detector.get_nvidia_gpu()
if nvidia_gpus:
gpu_info.extend(nvidia_gpus)
else:
errors.append({"Vendor": "NVIDIA", "INFO": "No NVIDIA GPU detected or drivers missing."})
except Exception as e:
errors.append({"Vendor": "NVIDIA", "INFO": str(e)})

try:
amd_gpus = gpu_detector.get_amd_gpu()
if amd_gpus:
gpu_info.extend(amd_gpus)
else:
errors.append({"Vendor": "AMD", "INFO": "No AMD GPU detected or drivers missing."})
except Exception as e:
errors.append({"Vendor": "AMD", "INFO": str(e)})

try:
intel_gpus = gpu_detector.get_intel_gpu()
if intel_gpus:
gpu_info.extend(intel_gpus)
else:
errors.append({"Vendor": "Intel", "INFO": "No Intel GPU detected or drivers missing."})
except Exception as e:
errors.append({"Vendor": "Intel", "INFO": str(e)})

return {
"Detected GPUs": gpu_info if gpu_info else [{"GPU": "None", "VRAM": "N/A", "INFO": "No GPUs detected"}],
"INFO": errors if errors else "No errors"
}


def parse_arguments(parser):
"""Parse command line arguments."""
return parser.parse_args()
Expand Down Expand Up @@ -508,6 +562,8 @@ def info_cli(args):
if args.engine and len(args.engine) > 0:
info["Engine"]["Info"] = engine_info(args)

gpu_info = show_gpus_available_cli(args)
info["GPUs"] = gpu_info
print(json.dumps(info, sort_keys=True, indent=4))


Expand Down
230 changes: 230 additions & 0 deletions ramalama/gpu_detector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,230 @@
"""
MIT License
(C) 2024-2025 ramalama developers
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
"""

import subprocess
import glob
import platform
import logging

logging.basicConfig(
level=logging.WARNING,
format="%(asctime)s - %(levelname)s - %(message)s"
)


class GPUDetector:
def __init__(self):
self.best_gpu = None
self.best_vram = 0
self.best_env = None

def _update_best_gpu(self, memory_mib, gpu_name, env_var):
"""Updates the best available GPU based on highest VRAM."""
if memory_mib > 1024 and memory_mib > self.best_vram:
self.best_vram = memory_mib
self.best_gpu = gpu_name
self.best_env = env_var

def get_nvidia_gpu(self):
"""Detects Nvidia GPUs using nvidia-smi (Linux only)."""
if platform.system() != "Linux":
return # Skip on macOS and other platforms

try:
result = subprocess.run(
["nvidia-smi", "--query-gpu=index,memory.total", "--format=csv,noheader,nounits"],
capture_output=True, text=True, check=True
)
output = result.stdout.strip()

for line in output.split('\n'):
try:
index, memory_mib = line.split(',')
memory_mib = int(memory_mib.strip())
self._update_best_gpu(memory_mib, index.strip(), "CUDA_VISIBLE_DEVICES")
except ValueError:
raise RuntimeError(f"Error parsing Nvidia GPU info: {line}")

except FileNotFoundError:
raise RuntimeError("`nvidia-smi` not found. No NVIDIA GPU detected or drivers missing.")
except subprocess.CalledProcessError as e:
error_msg = e.stderr.strip() if e.stderr else "Unknown error (check if NVIDIA drivers are loaded)."
raise RuntimeError(f"Unable to detect NVIDIA GPU(s). Error: {error_msg}")

def get_amd_gpu(self):
"""Detects AMD GPUs using sysfs on Linux or system_profiler on macOS."""
if platform.system() == "Linux":
self._read_gpu_memory('/sys/bus/pci/devices/*/mem_info_vram_total', "AMD GPU", "HIP_VISIBLE_DEVICES")
elif platform.system() == "Darwin": # macOS
self.get_macos_gpu() # macOS detection covers AMD GPUs

def _read_gpu_memory(self, path_pattern, gpu_name, env_var):
"""Helper function to read GPU VRAM from `/sys/class/drm/`."""
try:
mem_files = glob.glob(path_pattern)
for mem_file in mem_files:
with open(mem_file, "r") as f:
vram_total = int(f.read().strip()) // (1024 * 1024) # Convert bytes to MiB
return {"GPU": gpu_name, "VRAM": f"{vram_total} MiB", "Env": env_var}
except Exception as e:
return {"GPU": gpu_name, "VRAM": "Unknown", "Env": env_var, "Error": str(e)}
return {"GPU": gpu_name, "VRAM": "Unknown", "Env": env_var}

def get_intel_gpu(self):
"""Detect Intel GPUs using `lspci` and `/sys/class/drm/` for VRAM info."""
gpus = []

# Step 1: Use lspci to detect Intel GPUs
try:
output = subprocess.check_output("lspci | grep -i 'VGA compatible controller'", shell=True, text=True)
for line in output.splitlines():
if "Intel Corporation" in line:
gpu_info = {"GPU": "Intel", "Details": line.strip()}
gpus.append(gpu_info)
except subprocess.CalledProcessError:
pass # No Intel GPU found

# Step 2: Use `/sys/class/drm/` to read VRAM info
vram_info = self._read_gpu_memory('/sys/class/drm/card*/device/mem_info_vram_total', "Intel GPU", "ONEAPI_DEVICE_SELECTOR")

# If lspci found an Intel GPU, add VRAM info
if gpus:
for gpu in gpus:
gpu.update(vram_info)
else:
gpus.append(vram_info) # If no lspci match, return VRAM data anyway

return gpus

def get_macos_gpu(self):
"""Detect GPUs on macOS using system_profiler SPDisplaysDataType."""
try:
output = subprocess.check_output(
["system_profiler", "SPDisplaysDataType"], text=True
)
gpus = []
gpu_info = {}
inside_gpu_section = False # Tracks when we are inside a GPU block

for line in output.splitlines():
line = line.strip()

# Start detecting a new GPU section
if line.endswith(":") and "Displays:" not in line:
if gpu_info: # Store the previous GPU before starting a new one
gpus.append(gpu_info)
gpu_info = {}
inside_gpu_section = True
gpu_info["GPU"] = line[:-1] # Remove trailing colon from GPU name

elif inside_gpu_section:
if "Chipset Model:" in line:
gpu_info["GPU"] = line.split(":")[1].strip()
elif "Total Number of Cores:" in line:
gpu_info["Cores"] = line.split(":")[1].strip()
elif "Vendor:" in line:
gpu_info["Vendor"] = line.split(":")[1].strip()
elif "Metal Support:" in line:
gpu_info["Metal"] = line.split(":")[1].strip()

# Ensure the last detected GPU is added
if gpu_info:
gpus.append(gpu_info)

if not gpus:
logging.warning("No GPUs detected on macOS.")
return [{"GPU": "Unknown", "Error": "No GPU detected on macOS"}]

return gpus

except subprocess.CalledProcessError as e:
logging.error(f"Failed to detect GPU on macOS: {e}")
return [{"GPU": "Unknown", "Error": "Failed to detect GPU on macOS"}]
except Exception as e:
logging.error(f"Unexpected error while detecting macOS GPU: {e}")
return [{"GPU": "Unknown", "Error": str(e)}]


def detect_best_gpu(self, gpu_template):
"""
Compares Nvidia, AMD, Apple, and Intel GPUs and appends the best GPU
with the highest VRAM to gpu_template.
If one type of GPU fails, it continues to the next type.
"""
system = platform.system()
best_gpu = None
best_vram = 0
best_env = None # For CUDA, ONEAPI, Metal, etc.

if system == "Linux":
try:
nvidia_gpus = self.get_nvidia_gpu()
for gpu in nvidia_gpus:
vram = int(gpu.get("VRAM", "0 MiB").split()[0])
if vram > best_vram:
best_gpu = gpu
best_vram = vram
best_env = "CUDA"
except RuntimeError as e:
logging.warning(f"Warning: NVIDIA detection failed: {e}")

try:
amd_gpus = self.get_amd_gpu()
for gpu in amd_gpus:
vram = int(gpu.get("VRAM", "0 MiB").split()[0])
if vram > best_vram:
best_gpu = gpu
best_vram = vram
best_env = "ROCm"
except RuntimeError as e:
logging.warning(f"Warning: AMD detection failed: {e}")

try:
intel_gpus = self.get_intel_gpu()
for gpu in intel_gpus:
vram = int(gpu.get("VRAM", "0 MiB").split()[0])
if vram > best_vram:
best_gpu = gpu
best_vram = vram
best_env = "ONEAPI_DEVICE_SELECTOR"
except RuntimeError as e:
logging.warning(f"Warning: Intel detection failed: {e}")

elif system == "Darwin": # macOS
try:
macos_gpus = self.get_macos_gpu()
for gpu in macos_gpus:
vram = int(gpu.get("VRAM", "0 MiB").split()[0])
if vram > best_vram:
best_gpu = gpu
best_vram = vram
best_env = "Metal" # Apple uses Metal for GPU acceleration
except RuntimeError as e:
logging.warning(f"Warning: macOS GPU detection failed: {e}")

else:
raise RuntimeError(f"GPU detection is not supported on {system}.")

if best_gpu is not None:
gpu_template.append({
"index": best_gpu["GPU"],
"vram": f"{best_vram} MiB",
"env": best_env
})
return True # GPU detected and added successfully
else:
logging.warning("No compatible GPUs found.")
return False # No GPU found

0 comments on commit 4b34290

Please sign in to comment.