WIP: add initial intel xe/i915 support

This commit is contained in:
Ilya Zlobintsev 2024-12-20 10:12:46 +02:00
parent 08a3d5b90e
commit 22bf7257f8
118 changed files with 538 additions and 81 deletions

View File

@ -545,6 +545,10 @@ impl AmdGpuController {
&& STEAM_DECK_IDS.contains(&info.device_pci_info.model_id.as_str()) && STEAM_DECK_IDS.contains(&info.device_pci_info.model_id.as_str())
}) })
} }
pub fn get_driver(&self) -> &str {
self.handle.get_driver()
}
} }
impl GpuController for AmdGpuController { impl GpuController for AmdGpuController {
@ -572,19 +576,23 @@ impl GpuController for AmdGpuController {
self.handle.get_path() self.handle.get_path()
} }
fn get_info(&self) -> DeviceInfo { fn get_info(&self, include_vulkan: bool) -> DeviceInfo {
let vulkan_info = self.pci_info.as_ref().and_then(|pci_info| { let vulkan_info = if include_vulkan {
match get_vulkan_info( self.pci_info.as_ref().and_then(|pci_info| {
&pci_info.device_pci_info.vendor_id, match get_vulkan_info(
&pci_info.device_pci_info.model_id, &pci_info.device_pci_info.vendor_id,
) { &pci_info.device_pci_info.model_id,
Ok(info) => Some(info), ) {
Err(err) => { Ok(info) => Some(info),
warn!("could not load vulkan info: {err}"); Err(err) => {
None warn!("could not load vulkan info: {err}");
None
}
} }
} })
}); } else {
None
};
let pci_info = self.pci_info.clone(); let pci_info = self.pci_info.clone();
let driver = self.handle.get_driver().to_owned(); let driver = self.handle.get_driver().to_owned();
let vbios_version = self.get_full_vbios_version(); let vbios_version = self.get_full_vbios_version();
@ -601,10 +609,6 @@ impl GpuController for AmdGpuController {
} }
} }
fn hw_monitors(&self) -> &[HwMon] {
&self.handle.hw_monitors
}
fn get_pci_slot_name(&self) -> Option<String> { fn get_pci_slot_name(&self) -> Option<String> {
self.handle.get_pci_slot_name().map(str::to_owned) self.handle.get_pci_slot_name().map(str::to_owned)
} }

View File

@ -0,0 +1,213 @@
use super::GpuController;
use crate::{config, server::vulkan::get_vulkan_info};
use amdgpu_sysfs::gpu_handle::power_profile_mode::PowerProfileModesTable;
use anyhow::anyhow;
use futures::future::LocalBoxFuture;
use lact_schema::{
ClocksInfo, ClockspeedStats, DeviceInfo, DeviceStats, GpuPciInfo, LinkInfo, PowerStates,
};
use std::{
fmt::Display,
fs,
path::{Path, PathBuf},
str::FromStr,
};
use tracing::{error, info, warn};
enum DriverType {
Xe,
I915,
}
pub struct IntelGpuController {
sysfs_path: PathBuf,
driver: String,
driver_type: DriverType,
pci_slot_id: Option<String>,
pci_info: GpuPciInfo,
tile_gts: Vec<PathBuf>,
}
impl IntelGpuController {
pub fn new(
sysfs_path: PathBuf,
driver: String,
pci_slot_id: Option<String>,
pci_info: GpuPciInfo,
) -> Self {
let driver_type = match driver.as_str() {
"xe" => DriverType::Xe,
"i915" => DriverType::I915,
_ => unreachable!(),
};
let mut tile_gts = vec![];
if let DriverType::Xe = driver_type {
for entry in fs::read_dir(&sysfs_path).into_iter().flatten().flatten() {
if let Some(name) = entry.file_name().to_str() {
if name.starts_with("tile") {
for gt_entry in fs::read_dir(entry.path()).into_iter().flatten().flatten() {
if let Some(gt_name) = gt_entry.file_name().to_str() {
if gt_name.starts_with("gt") {
tile_gts.push(gt_entry.path());
}
}
}
}
}
}
info!(
"initialized {} gt at '{}'",
tile_gts.len(),
sysfs_path.display()
);
}
Self {
sysfs_path,
driver,
driver_type,
pci_slot_id,
pci_info,
tile_gts,
}
}
}
impl GpuController for IntelGpuController {
fn get_id(&self) -> anyhow::Result<String> {
let GpuPciInfo {
device_pci_info,
subsystem_pci_info,
} = &self.pci_info;
Ok(format!(
"{}:{}-{}:{}-{}",
device_pci_info.vendor_id,
device_pci_info.model_id,
subsystem_pci_info.vendor_id,
subsystem_pci_info.model_id,
self.pci_slot_id.as_deref().unwrap_or("unknown-slot")
))
}
fn get_pci_info(&self) -> Option<&GpuPciInfo> {
Some(&self.pci_info)
}
fn get_path(&self) -> &Path {
&self.sysfs_path
}
fn get_info(&self, include_vulkan: bool) -> DeviceInfo {
let vulkan_info = if include_vulkan {
match get_vulkan_info(
&self.pci_info.device_pci_info.vendor_id,
&self.pci_info.device_pci_info.model_id,
) {
Ok(info) => Some(info),
Err(err) => {
warn!("could not load vulkan info: {err}");
None
}
}
} else {
None
};
DeviceInfo {
pci_info: Some(self.pci_info.clone()),
vulkan_info,
driver: self.driver.clone(),
vbios_version: None,
link_info: LinkInfo::default(),
drm_info: None,
}
}
fn get_pci_slot_name(&self) -> Option<String> {
self.pci_slot_id.clone()
}
fn apply_config<'a>(
&'a self,
_config: &'a config::Gpu,
) -> LocalBoxFuture<'a, anyhow::Result<()>> {
Box::pin(async { Ok(()) })
}
fn get_stats(&self, _gpu_config: Option<&config::Gpu>) -> DeviceStats {
let current_gfxclk = self.read_gt_file("freq0/cur_freq");
let clockspeed = ClockspeedStats {
gpu_clockspeed: self
.read_gt_file("freq0/act_freq")
.filter(|freq| *freq != 0)
.or_else(|| current_gfxclk.map(u64::from)),
current_gfxclk,
vram_clockspeed: None,
};
DeviceStats {
clockspeed,
..Default::default()
}
}
fn get_clocks_info(&self) -> anyhow::Result<ClocksInfo> {
Err(anyhow!("Not supported"))
}
fn get_power_states(&self, _gpu_config: Option<&config::Gpu>) -> PowerStates {
PowerStates::default()
}
fn reset_pmfw_settings(&self) {}
fn cleanup_clocks(&self) -> anyhow::Result<()> {
Ok(())
}
fn get_power_profile_modes(&self) -> anyhow::Result<PowerProfileModesTable> {
Err(anyhow!("Not supported"))
}
fn vbios_dump(&self) -> anyhow::Result<Vec<u8>> {
Err(anyhow!("Not supported"))
}
}
impl IntelGpuController {
fn first_tile_gt(&self) -> Option<&Path> {
self.tile_gts.first().map(PathBuf::as_ref)
}
fn read_gt_file<T>(&self, file_name: &str) -> Option<T>
where
T: FromStr,
T::Err: Display,
{
if let Some(file_path) = self.first_tile_gt().map(|path| path.join(file_name)) {
if file_path.exists() {
match fs::read_to_string(&file_path) {
Ok(contents) => match contents.trim().parse() {
Ok(value) => return Some(value),
Err(err) => {
error!(
"could not parse value from '{}': {err}",
file_path.display()
);
}
},
Err(err) => {
error!("could not read file at '{}': {err}", file_path.display());
}
}
}
}
None
}
}

View File

@ -1,14 +1,15 @@
#![allow(clippy::module_name_repetitions)] #![allow(clippy::module_name_repetitions)]
mod amd; mod amd;
pub mod fan_control; pub mod fan_control;
mod intel;
mod nvidia; mod nvidia;
pub use amd::AmdGpuController; pub use amd::AmdGpuController;
pub use intel::IntelGpuController;
pub use nvidia::NvidiaGpuController; pub use nvidia::NvidiaGpuController;
use crate::config::{self}; use crate::config::{self};
use amdgpu_sysfs::gpu_handle::power_profile_mode::PowerProfileModesTable; use amdgpu_sysfs::gpu_handle::power_profile_mode::PowerProfileModesTable;
use amdgpu_sysfs::hw_mon::HwMon;
use futures::future::LocalBoxFuture; use futures::future::LocalBoxFuture;
use lact_schema::{ClocksInfo, DeviceInfo, DeviceStats, GpuPciInfo, PowerStates}; use lact_schema::{ClocksInfo, DeviceInfo, DeviceStats, GpuPciInfo, PowerStates};
use std::{path::Path, rc::Rc}; use std::{path::Path, rc::Rc};
@ -23,7 +24,7 @@ pub trait GpuController {
fn get_path(&self) -> &Path; fn get_path(&self) -> &Path;
fn get_info(&self) -> DeviceInfo; fn get_info(&self, include_vulkan: bool) -> DeviceInfo;
fn get_pci_slot_name(&self) -> Option<String>; fn get_pci_slot_name(&self) -> Option<String>;
@ -45,6 +46,4 @@ pub trait GpuController {
fn get_power_profile_modes(&self) -> anyhow::Result<PowerProfileModesTable>; fn get_power_profile_modes(&self) -> anyhow::Result<PowerProfileModesTable>;
fn vbios_dump(&self) -> anyhow::Result<Vec<u8>>; fn vbios_dump(&self) -> anyhow::Result<Vec<u8>>;
fn hw_monitors(&self) -> &[HwMon];
} }

View File

@ -4,10 +4,7 @@ use crate::{
}; };
use super::{fan_control::FanCurve, FanControlHandle, GpuController}; use super::{fan_control::FanCurve, FanControlHandle, GpuController};
use amdgpu_sysfs::{ use amdgpu_sysfs::{gpu_handle::power_profile_mode::PowerProfileModesTable, hw_mon::Temperature};
gpu_handle::power_profile_mode::PowerProfileModesTable,
hw_mon::{HwMon, Temperature},
};
use anyhow::{anyhow, Context}; use anyhow::{anyhow, Context};
use futures::future::LocalBoxFuture; use futures::future::LocalBoxFuture;
use lact_schema::{ use lact_schema::{
@ -32,11 +29,11 @@ use tokio::{select, sync::Notify, time::sleep};
use tracing::{debug, error, trace, warn}; use tracing::{debug, error, trace, warn};
pub struct NvidiaGpuController { pub struct NvidiaGpuController {
pub nvml: Rc<Nvml>, nvml: Rc<Nvml>,
pub pci_slot_id: String, pci_slot_id: String,
pub pci_info: GpuPciInfo, pci_info: GpuPciInfo,
pub sysfs_path: PathBuf, sysfs_path: PathBuf,
pub fan_control_handle: RefCell<Option<FanControlHandle>>, fan_control_handle: RefCell<Option<FanControlHandle>>,
last_applied_gpc_offset: Cell<Option<i32>>, last_applied_gpc_offset: Cell<Option<i32>>,
last_applied_mem_offset: Cell<Option<i32>>, last_applied_mem_offset: Cell<Option<i32>>,
@ -275,18 +272,22 @@ impl GpuController for NvidiaGpuController {
&self.sysfs_path &self.sysfs_path
} }
fn get_info(&self) -> DeviceInfo { fn get_info(&self, include_vulkan: bool) -> DeviceInfo {
let device = self.device(); let device = self.device();
let vulkan_info = match get_vulkan_info( let vulkan_info = if include_vulkan {
&self.pci_info.device_pci_info.vendor_id, match get_vulkan_info(
&self.pci_info.device_pci_info.model_id, &self.pci_info.device_pci_info.vendor_id,
) { &self.pci_info.device_pci_info.model_id,
Ok(info) => Some(info), ) {
Err(err) => { Ok(info) => Some(info),
warn!("could not load vulkan info: {err}"); Err(err) => {
None warn!("could not load vulkan info: {err}");
None
}
} }
} else {
None
}; };
DeviceInfo { DeviceInfo {
@ -353,10 +354,6 @@ impl GpuController for NvidiaGpuController {
} }
} }
fn hw_monitors(&self) -> &[HwMon] {
&[]
}
fn get_pci_slot_name(&self) -> Option<String> { fn get_pci_slot_name(&self) -> Option<String> {
Some(self.pci_slot_id.clone()) Some(self.pci_slot_id.clone())
} }

View File

@ -6,13 +6,12 @@ use super::{
use crate::{ use crate::{
config::{self, default_fan_static_speed, Config, FanControlSettings, Profile}, config::{self, default_fan_static_speed, Config, FanControlSettings, Profile},
server::{ server::{
gpu_controller::{AmdGpuController, NvidiaGpuController}, gpu_controller::{AmdGpuController, IntelGpuController, NvidiaGpuController},
profiles, profiles,
}, },
}; };
use amdgpu_sysfs::{ use amdgpu_sysfs::gpu_handle::{
gpu_handle::{power_profile_mode::PowerProfileModesTable, PerformanceLevel, PowerLevelKind}, power_profile_mode::PowerProfileModesTable, PerformanceLevel, PowerLevelKind,
sysfs::SysFS,
}; };
use anyhow::{anyhow, bail, Context}; use anyhow::{anyhow, bail, Context};
use lact_schema::{ use lact_schema::{
@ -85,8 +84,6 @@ const SNAPSHOT_FAN_CTRL_FILES: &[&str] = &[
"fan_zero_rpm_enable", "fan_zero_rpm_enable",
"fan_zero_rpm_stop_temperature", "fan_zero_rpm_stop_temperature",
]; ];
const SNAPSHOT_HWMON_FILE_PREFIXES: &[&str] =
&["fan", "pwm", "power", "temp", "freq", "in", "name"];
#[derive(Clone)] #[derive(Clone)]
pub struct Handler { pub struct Handler {
@ -355,7 +352,7 @@ impl<'a> Handler {
} }
pub fn get_device_info(&'a self, id: &str) -> anyhow::Result<DeviceInfo> { pub fn get_device_info(&'a self, id: &str) -> anyhow::Result<DeviceInfo> {
Ok(self.controller_by_id(id)?.get_info()) Ok(self.controller_by_id(id)?.get_info(true))
} }
pub fn get_gpu_stats(&'a self, id: &str) -> anyhow::Result<DeviceStats> { pub fn get_gpu_stats(&'a self, id: &str) -> anyhow::Result<DeviceStats> {
@ -607,31 +604,33 @@ impl<'a> Handler {
} }
} }
let card_path = controller.get_path().parent().unwrap();
let card_files = fs::read_dir(card_path)
.context("Could not read device dir")?
.flatten();
for card_entry in card_files {
if let Ok(metadata) = card_entry.metadata() {
if metadata.is_file() {
let full_path = controller_path.join(card_entry.path());
add_path_to_archive(&mut archive, &full_path)?;
}
}
}
let gt_path = card_path.join("gt");
if gt_path.exists() {
add_path_recursively(&mut archive, &gt_path, card_path)?;
}
let fan_ctrl_path = controller_path.join("gpu_od").join("fan_ctrl"); let fan_ctrl_path = controller_path.join("gpu_od").join("fan_ctrl");
for fan_ctrl_file in SNAPSHOT_FAN_CTRL_FILES { for fan_ctrl_file in SNAPSHOT_FAN_CTRL_FILES {
let full_path = fan_ctrl_path.join(fan_ctrl_file); let full_path = fan_ctrl_path.join(fan_ctrl_file);
add_path_to_archive(&mut archive, &full_path)?; add_path_to_archive(&mut archive, &full_path)?;
} }
for hw_mon in controller.hw_monitors() { let hwmon_path = controller_path.join("hwmon");
let hw_mon_path = hw_mon.get_path(); if hwmon_path.exists() {
let hw_mon_entries = add_path_recursively(&mut archive, &hwmon_path, controller_path)?;
fs::read_dir(hw_mon_path).context("Could not read HwMon dir")?;
'entries: for entry in hw_mon_entries.flatten() {
if !entry.metadata().is_ok_and(|metadata| metadata.is_file()) {
continue;
}
if let Some(name) = entry.file_name().to_str() {
for prefix in SNAPSHOT_HWMON_FILE_PREFIXES {
if name.starts_with(prefix) {
add_path_to_archive(&mut archive, &entry.path())?;
continue 'entries;
}
}
}
}
} }
} }
@ -671,7 +670,7 @@ impl<'a> Handler {
let info = json!({ let info = json!({
"system_info": system_info, "system_info": system_info,
"initramfs_type": initramfs_type, "initramfs_type": initramfs_type,
"devices": self.generate_snapshot_device_info(), "devices": self.generate_snapshot_device_info(true),
}); });
let info_data = serde_json::to_vec_pretty(&info).unwrap(); let info_data = serde_json::to_vec_pretty(&info).unwrap();
@ -696,7 +695,10 @@ impl<'a> Handler {
Ok(out_path) Ok(out_path)
} }
pub(crate) fn generate_snapshot_device_info(&self) -> BTreeMap<String, serde_json::Value> { pub(crate) fn generate_snapshot_device_info(
&self,
include_vulkan: bool,
) -> BTreeMap<String, serde_json::Value> {
self.gpu_controllers self.gpu_controllers
.iter() .iter()
.map(|(id, controller)| { .map(|(id, controller)| {
@ -708,7 +710,7 @@ impl<'a> Handler {
let data = json!({ let data = json!({
"pci_info": controller.get_pci_info(), "pci_info": controller.get_pci_info(),
"info": controller.get_info(), "info": controller.get_info(include_vulkan),
"stats": controller.get_stats(gpu_config), "stats": controller.get_stats(gpu_config),
"clocks_info": controller.get_clocks_info().ok(), "clocks_info": controller.get_clocks_info().ok(),
"power_profile_modes": controller.get_power_profile_modes().ok(), "power_profile_modes": controller.get_power_profile_modes().ok(),
@ -965,6 +967,27 @@ fn load_controllers(
Ok(id) => { Ok(id) => {
let path = controller.get_path(); let path = controller.get_path();
if matches!(controller.get_driver(), "xe" | "i915") {
match controller.get_pci_info() {
Some(pci_info) => {
let controller = IntelGpuController::new(
path.to_owned(),
controller.get_driver().to_owned(),
controller.get_pci_slot_name(),
pci_info.clone(),
);
let id = controller.get_id().unwrap();
info!("initialized Intel controller {id} for path {path:?}");
controllers
.insert(id, Box::new(controller) as Box<dyn GpuController>);
continue;
}
None => {
error!("could not get PCI info for Intel GPU at {path:?}",);
}
}
}
if let Some(nvml) = nvml.clone() { if let Some(nvml) = nvml.clone() {
if let Some(pci_slot_id) = controller.get_pci_slot_name() { if let Some(pci_slot_id) = controller.get_pci_slot_name() {
match nvml.device_by_pci_bus_id(pci_slot_id.as_str()) { match nvml.device_by_pci_bus_id(pci_slot_id.as_str()) {
@ -1082,6 +1105,8 @@ fn add_path_to_archive(
warn!("file {full_path:?} exists, but could not be added to snapshot: {err}"); warn!("file {full_path:?} exists, but could not be added to snapshot: {err}");
} }
} }
} else {
trace!("{full_path:?} does not exist, not adding to snapshot");
} }
Ok(()) Ok(())
} }

View File

@ -0,0 +1 @@
226:0

View File

@ -0,0 +1,6 @@
DRIVER=i915
PCI_CLASS=30000
PCI_ID=8086:9B41
PCI_SUBSYS_ID=17AA:22B1
PCI_SLOT_NAME=0000:00:02.0
MODALIAS=pci:v00008086d00009B41sv000017AAsd000022B1bc03sc00i00

View File

@ -0,0 +1 @@
0x8086

View File

@ -0,0 +1 @@
1150

View File

@ -0,0 +1 @@
300

View File

@ -0,0 +1 @@
300

View File

@ -0,0 +1 @@
300

View File

@ -0,0 +1 @@
1150

View File

@ -0,0 +1 @@
300

View File

@ -0,0 +1 @@
1150

View File

@ -0,0 +1 @@
300

View File

@ -0,0 +1,4 @@
MAJOR=226
MINOR=0
DEVNAME=dri/card0
DEVTYPE=drm_minor

Some files were not shown because too many files have changed in this diff Show More