feat: nvidia per-pstate clock offset

This commit is contained in:
Ilya Zlobintsev 2025-02-02 12:39:07 +02:00
parent cb34adf4cb
commit 90a4b504ef
6 changed files with 162 additions and 123 deletions

View File

@ -108,7 +108,7 @@ pub struct Gpu {
} }
#[skip_serializing_none] #[skip_serializing_none]
#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default, PartialEq, Eq)] #[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, Eq)]
pub struct ClocksConfiguration { pub struct ClocksConfiguration {
pub min_core_clock: Option<i32>, pub min_core_clock: Option<i32>,
pub min_memory_clock: Option<i32>, pub min_memory_clock: Option<i32>,
@ -116,6 +116,10 @@ pub struct ClocksConfiguration {
pub max_core_clock: Option<i32>, pub max_core_clock: Option<i32>,
pub max_memory_clock: Option<i32>, pub max_memory_clock: Option<i32>,
pub max_voltage: Option<i32>, pub max_voltage: Option<i32>,
#[serde(default, skip_serializing_if = "IndexMap::is_empty")]
pub gpu_clock_offsets: IndexMap<u32, i32>,
#[serde(default, skip_serializing_if = "IndexMap::is_empty")]
pub mem_clock_offsets: IndexMap<u32, i32>,
pub voltage_offset: Option<i32>, pub voltage_offset: Option<i32>,
} }
@ -135,6 +139,22 @@ impl Gpu {
ClockspeedType::MinMemoryClock => clocks.min_memory_clock = value, ClockspeedType::MinMemoryClock => clocks.min_memory_clock = value,
ClockspeedType::MinVoltage => clocks.min_voltage = value, ClockspeedType::MinVoltage => clocks.min_voltage = value,
ClockspeedType::VoltageOffset => clocks.voltage_offset = value, ClockspeedType::VoltageOffset => clocks.voltage_offset = value,
ClockspeedType::GpuClockOffset(pstate) => match value {
Some(value) => {
clocks.gpu_clock_offsets.insert(pstate, value);
}
None => {
clocks.gpu_clock_offsets.shift_remove(&pstate);
}
},
ClockspeedType::MemClockOffset(pstate) => match value {
Some(value) => {
clocks.mem_clock_offsets.insert(pstate, value);
}
None => {
clocks.mem_clock_offsets.shift_remove(&pstate);
}
},
ClockspeedType::Reset => { ClockspeedType::Reset => {
*clocks = ClocksConfiguration::default(); *clocks = ClocksConfiguration::default();
assert!(!self.is_core_clocks_used()); assert!(!self.is_core_clocks_used());
@ -234,6 +254,23 @@ impl Config {
} }
} }
} }
2 => {
for (id, gpu) in &mut self.gpus {
if id.starts_with(VENDOR_NVIDIA) {
gpu.clocks_configuration.max_core_clock = None;
gpu.clocks_configuration.max_memory_clock = None;
}
}
for profile in &mut self.profiles.values_mut() {
for (id, gpu) in &mut profile.gpus {
if id.starts_with(VENDOR_NVIDIA) {
gpu.clocks_configuration.max_core_clock = None;
gpu.clocks_configuration.max_memory_clock = None;
}
}
}
}
_ => break, _ => break,
} }
info!("migrated config version {} to {next_version}", self.version); info!("migrated config version {} to {next_version}", self.version);

View File

@ -7,19 +7,19 @@ use super::{fan_control::FanCurve, CommonControllerInfo, FanControlHandle, GpuCo
use amdgpu_sysfs::{gpu_handle::power_profile_mode::PowerProfileModesTable, hw_mon::Temperature}; use amdgpu_sysfs::{gpu_handle::power_profile_mode::PowerProfileModesTable, hw_mon::Temperature};
use anyhow::{anyhow, Context}; use anyhow::{anyhow, Context};
use futures::future::LocalBoxFuture; use futures::future::LocalBoxFuture;
use indexmap::IndexMap;
use lact_schema::{ use lact_schema::{
ClocksInfo, ClocksTable, ClockspeedStats, DeviceInfo, DeviceStats, DrmInfo, DrmMemoryInfo, ClocksInfo, ClocksTable, ClockspeedStats, DeviceInfo, DeviceStats, DrmInfo, DrmMemoryInfo,
FanControlMode, FanStats, IntelDrmInfo, LinkInfo, NvidiaClockInfo, NvidiaClocksTable, PmfwInfo, FanControlMode, FanStats, IntelDrmInfo, LinkInfo, NvidiaClockOffset, NvidiaClocksTable,
PowerState, PowerStates, PowerStats, VoltageStats, VramStats, PmfwInfo, PowerState, PowerStates, PowerStats, VoltageStats, VramStats,
}; };
use nvml_wrapper::{ use nvml_wrapper::{
bitmasks::device::ThrottleReasons, bitmasks::device::ThrottleReasons,
enum_wrappers::device::{Brand, Clock, TemperatureSensor, TemperatureThreshold}, enum_wrappers::device::{Clock, PerformanceState, TemperatureSensor, TemperatureThreshold},
enums::device::DeviceArchitecture,
Device, Nvml, Device, Nvml,
}; };
use std::{ use std::{
cell::{Cell, RefCell}, cell::RefCell,
collections::HashMap, collections::HashMap,
fmt::Write, fmt::Write,
rc::Rc, rc::Rc,
@ -33,8 +33,8 @@ pub struct NvidiaGpuController {
common: CommonControllerInfo, common: CommonControllerInfo,
fan_control_handle: RefCell<Option<FanControlHandle>>, fan_control_handle: RefCell<Option<FanControlHandle>>,
last_applied_gpc_offset: Cell<Option<i32>>, // Store last applied offsets as a workaround when the driver doesn't tell us the current offset
last_applied_mem_offset: Cell<Option<i32>>, last_applied_offsets: RefCell<HashMap<Clock, HashMap<PerformanceState, i32>>>,
} }
impl NvidiaGpuController { impl NvidiaGpuController {
@ -50,8 +50,7 @@ impl NvidiaGpuController {
nvml, nvml,
common, common,
fan_control_handle: RefCell::new(None), fan_control_handle: RefCell::new(None),
last_applied_gpc_offset: Cell::new(None), last_applied_offsets: RefCell::new(HashMap::new()),
last_applied_mem_offset: Cell::new(None),
}) })
} }
@ -243,20 +242,6 @@ impl NvidiaGpuController {
Ok(power_states) Ok(power_states)
} }
// See https://github.com/ilya-zlobintsev/LACT/issues/418
fn vram_offset_ratio(&self) -> i32 {
let device = self.device();
if let (Ok(brand), Ok(architecture)) = (device.brand(), device.architecture()) {
let ratio = match (brand, architecture) {
(Brand::GeForce, DeviceArchitecture::Ada) => 2,
// TODO: check others
_ => 1,
};
return ratio;
}
1
}
} }
impl GpuController for NvidiaGpuController { impl GpuController for NvidiaGpuController {
@ -461,47 +446,46 @@ impl GpuController for NvidiaGpuController {
fn get_clocks_info(&self) -> anyhow::Result<ClocksInfo> { fn get_clocks_info(&self) -> anyhow::Result<ClocksInfo> {
let device = self.device(); let device = self.device();
let mut gpc = None; let mut gpu_offsets = IndexMap::new();
let mut mem = None; let mut mem_offsets = IndexMap::new();
// Negative offset values are not correctly reported by NVML, so we have to use the last known applied value let supported_pstates = device.supported_performance_states()?;
// instead of the actual read when an unreasonable value appears.
if let Ok(max) = device.max_clock_info(Clock::Graphics) { let clock_types = [
if let Ok(offset_range) = device.gpc_clk_min_max_vf_offset() { (Clock::Graphics, &mut gpu_offsets),
if let Some(offset) = self (Clock::Memory, &mut mem_offsets),
.last_applied_gpc_offset ];
.get()
.or_else(|| device.gpc_clk_vf_offset().ok()) for (clock_type, offsets) in clock_types {
{ for pstate in supported_pstates.iter().rev() {
gpc = Some(NvidiaClockInfo { if let Ok(offset) = device.clock_offset(clock_type, *pstate) {
max: max as i32, let mut offset = NvidiaClockOffset {
offset, current: offset.clock_offset_mhz,
offset_ratio: 1, min: offset.min_clock_offset_mhz,
offset_range, max: offset.max_clock_offset_mhz,
}); };
// On some driver versions, the applied offset values are not reported.
// In these scenarios we must store them manually for reporting.
if offset.current == 0 {
if let Some(applied_offsets) =
self.last_applied_offsets.borrow().get(&clock_type)
{
if let Some(applied_offset) = applied_offsets.get(pstate) {
offset.current = *applied_offset;
}
}
}
offsets.insert(pstate.as_c(), offset);
} }
} }
} }
if let Ok(max) = device.max_clock_info(Clock::Memory) { let table = NvidiaClocksTable {
if let Ok(offset_range) = device.mem_clk_min_max_vf_offset() { gpu_offsets,
if let Some(offset) = self mem_offsets,
.last_applied_mem_offset };
.get()
.or_else(|| device.mem_clk_vf_offset().ok())
{
mem = Some(NvidiaClockInfo {
max: max as i32,
offset,
offset_ratio: self.vram_offset_ratio(),
offset_range,
});
}
}
}
let table = NvidiaClocksTable { gpc, mem };
Ok(ClocksInfo { Ok(ClocksInfo {
table: Some(ClocksTable::Nvidia(table)), table: Some(ClocksTable::Nvidia(table)),
@ -564,34 +548,38 @@ impl GpuController for NvidiaGpuController {
self.cleanup_clocks()?; self.cleanup_clocks()?;
if let Some(max_gpu_clock) = config.clocks_configuration.max_core_clock { for (pstate, offset) in &config.clocks_configuration.gpu_clock_offsets {
let default_max_clock = device let pstate = PerformanceState::try_from(*pstate)
.max_clock_info(Clock::Graphics) .map_err(|_| anyhow!("Invalid pstate '{pstate}'"))?;
.context("Could not read max graphics clock")?; debug!("applying offset {offset} for GPU pstate {pstate:?}");
let offset = max_gpu_clock - default_max_clock as i32;
debug!(
"Using graphics clock offset {offset} (default max clock: {default_max_clock})"
);
device device
.set_gpc_clk_vf_offset(offset) .set_clock_offset(Clock::Graphics, pstate, *offset)
.context("Could not set graphics clock offset")?; .with_context(|| {
format!("Could not set clock offset {offset} for GPU pstate {pstate:?}")
})?;
self.last_applied_gpc_offset.set(Some(offset)); self.last_applied_offsets
.borrow_mut()
.entry(Clock::Graphics)
.or_default()
.insert(pstate, *offset);
} }
if let Some(max_mem_clock) = config.clocks_configuration.max_memory_clock { for (pstate, offset) in &config.clocks_configuration.mem_clock_offsets {
let default_max_clock = device let pstate = PerformanceState::try_from(*pstate)
.max_clock_info(Clock::Memory) .map_err(|_| anyhow!("Invalid pstate '{pstate}'"))?;
.context("Could not read max memory clock")?; debug!("applying offset {offset} for VRAM pstate {pstate:?}");
let offset = (max_mem_clock - default_max_clock as i32) * self.vram_offset_ratio();
debug!("Using mem clock offset {offset} (default max clock: {default_max_clock})");
device device
.set_mem_clk_vf_offset(offset) .set_clock_offset(Clock::Memory, pstate, *offset)
.context("Could not set memory clock offset")?; .with_context(|| {
format!("Could not set clock offset {offset} for VRAM pstate {pstate:?}")
})?;
self.last_applied_mem_offset.set(Some(offset)); self.last_applied_offsets
.borrow_mut()
.entry(Clock::Memory)
.or_default()
.insert(pstate, *offset);
} }
if config.fan_control_enabled { if config.fan_control_enabled {
@ -633,23 +621,33 @@ impl GpuController for NvidiaGpuController {
fn cleanup_clocks(&self) -> anyhow::Result<()> { fn cleanup_clocks(&self) -> anyhow::Result<()> {
let device = self.device(); let device = self.device();
if let Ok(current_offset) = device.gpc_clk_vf_offset() { if let Ok(supported_pstates) = device.supported_performance_states() {
if current_offset != 0 { for pstate in supported_pstates {
device for clock_type in [Clock::Graphics, Clock::Memory] {
.set_gpc_clk_vf_offset(0) if let Ok(current_offset) = device.clock_offset(clock_type, pstate) {
.context("Could not reset graphics clock offset")?; if current_offset.clock_offset_mhz != 0
|| self
.last_applied_offsets
.borrow()
.get(&clock_type)
.and_then(|applied_offsets| applied_offsets.get(&pstate))
.is_some_and(|offset| *offset != 0)
{
debug!("resetting clock offset for {clock_type:?} pstate {pstate:?}");
device
.set_clock_offset(clock_type, pstate, 0)
.with_context(|| {
format!("Could not reset {clock_type:?} pstate {pstate:?}")
})?;
}
}
self.last_applied_gpc_offset.set(None); if let Some(applied_offsets) =
} self.last_applied_offsets.borrow_mut().get_mut(&clock_type)
} {
applied_offsets.remove(&pstate);
if let Ok(current_offset) = device.mem_clk_vf_offset() { }
if current_offset != 0 { }
device
.set_mem_clk_vf_offset(0)
.context("Could not reset memory clock offset")?;
self.last_applied_mem_offset.set(None);
} }
} }

View File

@ -12,7 +12,7 @@ use gtk::{
}; };
use lact_schema::{ use lact_schema::{
request::{ClockspeedType, SetClocksCommand}, request::{ClockspeedType, SetClocksCommand},
ClocksTable, IntelClocksTable, NvidiaClockInfo, NvidiaClocksTable, ClocksTable, IntelClocksTable, NvidiaClockOffset, NvidiaClocksTable,
}; };
use relm4::{factory::FactoryHashMap, ComponentParts, ComponentSender, RelmWidgetExt}; use relm4::{factory::FactoryHashMap, ComponentParts, ComponentSender, RelmWidgetExt};
@ -190,16 +190,16 @@ impl ClocksFrame {
} }
fn set_nvidia_table(&mut self, table: NvidiaClocksTable) { fn set_nvidia_table(&mut self, table: NvidiaClocksTable) {
if let Some(gpc_info) = &table.gpc { for (pstate, offset) in table.gpu_offsets {
self.clocks.insert( self.clocks.insert(
ClockspeedType::MaxCoreClock, ClockspeedType::GpuClockOffset(pstate),
nvidia_clock_offset_to_data(gpc_info), nvidia_clock_offset_to_data(&offset),
); );
} }
if let Some(mem_info) = &table.mem { for (pstate, offset) in table.mem_offsets {
self.clocks.insert( self.clocks.insert(
ClockspeedType::MaxMemoryClock, ClockspeedType::MemClockOffset(pstate),
nvidia_clock_offset_to_data(mem_info), nvidia_clock_offset_to_data(&offset),
); );
} }
} }
@ -241,10 +241,10 @@ impl ClocksFrame {
} }
} }
fn nvidia_clock_offset_to_data(clock_info: &NvidiaClockInfo) -> ClocksData { fn nvidia_clock_offset_to_data(clock_info: &NvidiaClockOffset) -> ClocksData {
ClocksData { ClocksData {
current: clock_info.max + (clock_info.offset / clock_info.offset_ratio), current: clock_info.current,
min: clock_info.max + clock_info.offset_range.0, min: clock_info.min,
max: clock_info.max + clock_info.offset_range.1, max: clock_info.max,
} }
} }

View File

@ -41,14 +41,16 @@ impl FactoryComponent for ClockAdjustmentRow {
gtk::Label { gtk::Label {
set_width_request: 185, set_width_request: 185,
set_xalign: 0.0, set_xalign: 0.0,
set_label: match self.clock_type { set_label: &match self.clock_type {
ClockspeedType::MaxCoreClock => "Maximum GPU Clock (MHz)", ClockspeedType::MaxCoreClock => "Maximum GPU Clock (MHz)".to_owned(),
ClockspeedType::MaxMemoryClock => "Maximum VRAM Clock (MHz)", ClockspeedType::MaxMemoryClock => "Maximum VRAM Clock (MHz)".to_owned(),
ClockspeedType::MaxVoltage => "Maximum GPU voltage (mV)", ClockspeedType::MaxVoltage => "Maximum GPU voltage (mV)".to_owned(),
ClockspeedType::MinCoreClock => "Minimum GPU Clock (MHz)", ClockspeedType::MinCoreClock => "Minimum GPU Clock (MHz)".to_owned(),
ClockspeedType::MinMemoryClock => "Minimum VRAM Clock (MHz)", ClockspeedType::MinMemoryClock => "Minimum VRAM Clock (MHz)".to_owned(),
ClockspeedType::MinVoltage => "Minimum GPU voltage (mV)", ClockspeedType::MinVoltage => "Minimum GPU voltage (mV)".to_owned(),
ClockspeedType::VoltageOffset => "GPU voltage offset (mV)", ClockspeedType::VoltageOffset => "GPU voltage offset (mV)".to_owned(),
ClockspeedType::GpuClockOffset(pstate) => format!("GPU Clock offset at P-State {pstate} (MHz)"),
ClockspeedType::MemClockOffset(pstate) => format!("VRAM Clock offset at P-State {pstate} (MHz)"),
ClockspeedType::Reset => unreachable!(), ClockspeedType::Reset => unreachable!(),
}, },
}, },

View File

@ -167,11 +167,12 @@ pub enum ClocksTable {
Intel(IntelClocksTable), Intel(IntelClocksTable),
} }
#[skip_serializing_none]
#[derive(Serialize, Deserialize, Default, Debug, Clone)] #[derive(Serialize, Deserialize, Default, Debug, Clone)]
pub struct NvidiaClocksTable { pub struct NvidiaClocksTable {
pub gpc: Option<NvidiaClockInfo>, #[serde(default, skip_serializing_if = "IndexMap::is_empty")]
pub mem: Option<NvidiaClockInfo>, pub gpu_offsets: IndexMap<u32, NvidiaClockOffset>,
#[serde(default, skip_serializing_if = "IndexMap::is_empty")]
pub mem_offsets: IndexMap<u32, NvidiaClockOffset>,
} }
/// Doc from `xe_gt_freq.c` /// Doc from `xe_gt_freq.c`
@ -188,11 +189,10 @@ pub struct IntelClocksTable {
} }
#[derive(Serialize, Deserialize, Default, Debug, Clone)] #[derive(Serialize, Deserialize, Default, Debug, Clone)]
pub struct NvidiaClockInfo { pub struct NvidiaClockOffset {
pub current: i32,
pub min: i32,
pub max: i32, pub max: i32,
pub offset: i32,
pub offset_ratio: i32,
pub offset_range: (i32, i32),
} }
impl From<AmdClocksTableGen> for ClocksInfo { impl From<AmdClocksTableGen> for ClocksInfo {

View File

@ -125,6 +125,8 @@ pub enum ClockspeedType {
MinMemoryClock, MinMemoryClock,
MinVoltage, MinVoltage,
VoltageOffset, VoltageOffset,
GpuClockOffset(u32),
MemClockOffset(u32),
Reset, Reset,
} }