refactor: exclude all system-dependent info in tests with conditional compilation

This commit is contained in:
Ilya Zlobintsev 2025-01-01 09:45:50 +02:00
parent 0f42435a0a
commit bce6eb2043
9 changed files with 86 additions and 85 deletions

View File

@ -42,7 +42,6 @@ use tracing::{debug, error, info, trace, warn};
use {
lact_schema::DrmMemoryInfo,
libdrm_amdgpu_sys::AMDGPU::{DeviceHandle as DrmHandle, MetricsInfo, GPU_INFO},
std::{fs::OpenOptions, os::fd::IntoRawFd},
};
const GPU_CLOCKDOWN_TIMEOUT_SECS: u64 = 3;
@ -58,16 +57,14 @@ pub struct AmdGpuController {
}
impl AmdGpuController {
pub fn new_from_path(
sysfs_path: PathBuf,
pci_db: &Database,
skip_drm: bool,
) -> anyhow::Result<Self> {
pub fn new_from_path(sysfs_path: PathBuf, pci_db: &Database) -> anyhow::Result<Self> {
let handle = GpuHandle::new_from_path(sysfs_path)
.map_err(|error| anyhow!("failed to initialize gpu handle: {error}"))?;
#[allow(unused_mut)]
let mut drm_handle = None;
if matches!(handle.get_driver(), "amdgpu" | "radeon") && !skip_drm {
#[cfg(not(test))]
if matches!(handle.get_driver(), "amdgpu" | "radeon") {
match get_drm_handle(&handle) {
Ok(handle) => {
drm_handle = Some(handle);
@ -578,9 +575,8 @@ impl GpuController for AmdGpuController {
self.handle.get_path()
}
fn get_info(&self, include_vulkan: bool) -> DeviceInfo {
let vulkan_info = if include_vulkan {
self.pci_info.as_ref().and_then(|pci_info| {
fn get_info(&self) -> DeviceInfo {
let vulkan_info = self.pci_info.as_ref().and_then(|pci_info| {
match get_vulkan_info(
&pci_info.device_pci_info.vendor_id,
&pci_info.device_pci_info.model_id,
@ -591,10 +587,7 @@ impl GpuController for AmdGpuController {
None
}
}
})
} else {
None
};
});
let pci_info = self.pci_info.clone();
let driver = self.handle.get_driver().to_owned();
let vbios_version = self.get_full_vbios_version();
@ -1038,12 +1031,15 @@ impl GpuController for AmdGpuController {
}
}
#[cfg(not(test))]
fn get_drm_handle(handle: &GpuHandle) -> anyhow::Result<DrmHandle> {
use std::os::fd::IntoRawFd;
let slot_name = handle
.get_pci_slot_name()
.context("Device has no PCI slot name")?;
let path = format!("/dev/dri/by-path/pci-{slot_name}-render");
let drm_file = OpenOptions::new()
let drm_file = fs::OpenOptions::new()
.read(true)
.write(true)
.open(&path)

View File

@ -122,9 +122,8 @@ impl GpuController for IntelGpuController {
&self.sysfs_path
}
fn get_info(&self, include_vulkan: bool) -> DeviceInfo {
let vulkan_info = if include_vulkan {
match get_vulkan_info(
fn get_info(&self) -> DeviceInfo {
let vulkan_info = match get_vulkan_info(
&self.pci_info.device_pci_info.vendor_id,
&self.pci_info.device_pci_info.model_id,
) {
@ -133,9 +132,6 @@ impl GpuController for IntelGpuController {
warn!("could not load vulkan info: {err}");
None
}
}
} else {
None
};
let drm_info = DrmInfo {
@ -143,6 +139,7 @@ impl GpuController for IntelGpuController {
execution_units: self.drm_try(drm::drm_intel_get_eu_total),
subslices: self.drm_try(drm::drm_intel_get_subslice_total),
},
vram_clock_ratio: 1.0,
..Default::default()
};
@ -357,7 +354,11 @@ impl IntelGpuController {
}
}
#[cfg_attr(test, allow(unreachable_code, unused_variables))]
fn drm_try<T: Default>(&self, f: unsafe extern "C" fn(c_int, *mut T) -> c_int) -> Option<T> {
#[cfg(test)]
return None;
unsafe {
let mut out = T::default();
let result = f(self.drm_file.as_raw_fd(), &mut out);
@ -369,10 +370,14 @@ impl IntelGpuController {
}
}
#[cfg_attr(test, allow(unreachable_code, unused_variables))]
fn drm_try_2<T: Default, O: Default>(
&self,
f: unsafe extern "C" fn(c_int, *mut T, *mut O) -> c_int,
) -> Option<(T, O)> {
#[cfg(test)]
return None;
unsafe {
let mut a = T::default();
let mut b = O::default();

View File

@ -24,7 +24,7 @@ pub trait GpuController {
fn get_path(&self) -> &Path;
fn get_info(&self, include_vulkan: bool) -> DeviceInfo;
fn get_info(&self) -> DeviceInfo;
fn get_pci_slot_name(&self) -> Option<String>;

View File

@ -272,11 +272,10 @@ impl GpuController for NvidiaGpuController {
&self.sysfs_path
}
fn get_info(&self, include_vulkan: bool) -> DeviceInfo {
fn get_info(&self) -> DeviceInfo {
let device = self.device();
let vulkan_info = if include_vulkan {
match get_vulkan_info(
let vulkan_info = match get_vulkan_info(
&self.pci_info.device_pci_info.vendor_id,
&self.pci_info.device_pci_info.model_id,
) {
@ -285,9 +284,6 @@ impl GpuController for NvidiaGpuController {
warn!("could not load vulkan info: {err}");
None
}
}
} else {
None
};
DeviceInfo {

View File

@ -101,21 +101,17 @@ impl<'a> Handler {
Ok(custom_path) => PathBuf::from(custom_path),
Err(_) => PathBuf::from("/sys/class/drm"),
};
Self::with_base_path(&base_path, config, false).await
Self::with_base_path(&base_path, config).await
}
pub(crate) async fn with_base_path(
base_path: &Path,
config: Config,
sysfs_only: bool,
) -> anyhow::Result<Self> {
pub(crate) async fn with_base_path(base_path: &Path, config: Config) -> anyhow::Result<Self> {
let mut controllers = BTreeMap::new();
// Sometimes LACT starts too early in the boot process, before the sysfs is initialized.
// For such scenarios there is a retry logic when no GPUs were found,
// or if some of the PCI devices don't have a drm entry yet.
for i in 1..=CONTROLLERS_LOAD_RETRY_ATTEMPTS {
controllers = load_controllers(base_path, sysfs_only)?;
controllers = load_controllers(base_path)?;
let mut should_retry = false;
if let Ok(devices) = fs::read_dir("/sys/bus/pci/devices") {
@ -352,7 +348,7 @@ impl<'a> Handler {
}
pub fn get_device_info(&'a self, id: &str) -> anyhow::Result<DeviceInfo> {
Ok(self.controller_by_id(id)?.get_info(true))
Ok(self.controller_by_id(id)?.get_info())
}
pub fn get_gpu_stats(&'a self, id: &str) -> anyhow::Result<DeviceStats> {
@ -670,7 +666,7 @@ impl<'a> Handler {
let info = json!({
"system_info": system_info,
"initramfs_type": initramfs_type,
"devices": self.generate_snapshot_device_info(true),
"devices": self.generate_snapshot_device_info(),
});
let info_data = serde_json::to_vec_pretty(&info).unwrap();
@ -695,10 +691,7 @@ impl<'a> Handler {
Ok(out_path)
}
pub(crate) fn generate_snapshot_device_info(
&self,
include_vulkan: bool,
) -> BTreeMap<String, serde_json::Value> {
pub(crate) fn generate_snapshot_device_info(&self) -> BTreeMap<String, serde_json::Value> {
self.gpu_controllers
.iter()
.map(|(id, controller)| {
@ -710,7 +703,7 @@ impl<'a> Handler {
let data = json!({
"pci_info": controller.get_pci_info(),
"info": controller.get_info(include_vulkan),
"info": controller.get_info(),
"stats": controller.get_stats(gpu_config),
"clocks_info": controller.get_clocks_info().ok(),
"power_profile_modes": controller.get_power_profile_modes().ok(),
@ -920,10 +913,7 @@ impl<'a> Handler {
}
/// `sysfs_only` disables initialization of any external data sources, such as libdrm and nvml
fn load_controllers(
base_path: &Path,
sysfs_only: bool,
) -> anyhow::Result<BTreeMap<String, Box<dyn GpuController>>> {
fn load_controllers(base_path: &Path) -> anyhow::Result<BTreeMap<String, Box<dyn GpuController>>> {
let mut controllers = BTreeMap::new();
let pci_db = Database::read().unwrap_or_else(|err| {
@ -934,10 +924,10 @@ fn load_controllers(
}
});
let nvml = if sysfs_only {
None
} else {
match Nvml::init() {
#[cfg(test)]
let nvml: Option<Rc<Nvml>> = None;
#[cfg(not(test))]
let nvml = match Nvml::init() {
Ok(nvml) => {
info!("NVML initialized");
Some(Rc::new(nvml))
@ -946,7 +936,6 @@ fn load_controllers(
info!("Nvidia support disabled, {err}");
None
}
}
};
for entry in base_path
@ -962,7 +951,7 @@ fn load_controllers(
if name.starts_with("card") && !name.contains('-') {
trace!("trying gpu controller at {:?}", entry.path());
let device_path = entry.path().join("device");
match AmdGpuController::new_from_path(device_path, &pci_db, sysfs_only) {
match AmdGpuController::new_from_path(device_path, &pci_db) {
Ok(controller) => match controller.get_id() {
Ok(id) => {
let path = controller.get_path();

View File

@ -7,7 +7,11 @@ use vulkano::{
VulkanLibrary,
};
#[cfg_attr(test, allow(unreachable_code, unused_variables))]
pub fn get_vulkan_info<'a>(vendor_id: &'a str, device_id: &'a str) -> anyhow::Result<VulkanInfo> {
#[cfg(test)]
return Err(anyhow!("Not allowed in tests"));
trace!("Reading vulkan info");
let vendor_id = u32::from_str_radix(vendor_id, 16)?;
let device_id = u32::from_str_radix(device_id, 16)?;

View File

@ -17,11 +17,11 @@ async fn snapshot_everything() {
device_dir.file_name().to_string_lossy()
);
let handler = Handler::with_base_path(&device_dir.path(), Config::default(), true)
let handler = Handler::with_base_path(&device_dir.path(), Config::default())
.await
.unwrap();
let device_info = handler
.generate_snapshot_device_info(false)
.generate_snapshot_device_info()
.into_values()
.next()
.unwrap();

View File

@ -1,6 +1,7 @@
---
source: lact-daemon/src/tests/mod.rs
expression: device_info
snapshot_kind: text
---
{
"clocks_info": {
@ -19,6 +20,9 @@ expression: device_info
},
"info": {
"driver": "i915",
"drm_info": {
"vram_clock_ratio": 1.0
},
"link_info": {},
"pci_info": {
"device_pci_info": {
@ -49,7 +53,10 @@ expression: device_info
},
"power_profile_modes": null,
"stats": {
"clockspeed": {},
"clockspeed": {
"current_gfxclk": 300,
"gpu_clockspeed": 300
},
"fan": {
"control_enabled": false,
"pmfw_info": {}

View File

@ -1,6 +1,7 @@
---
source: lact-daemon/src/tests/mod.rs
expression: device_info
snapshot_kind: text
---
{
"clocks_info": {
@ -19,6 +20,9 @@ expression: device_info
},
"info": {
"driver": "xe",
"drm_info": {
"vram_clock_ratio": 1.0
},
"link_info": {},
"pci_info": {
"device_pci_info": {