feat: nvidia per-pstate clock offset

This commit is contained in:
Ilya Zlobintsev 2025-02-02 12:39:07 +02:00
parent cb34adf4cb
commit 90a4b504ef
6 changed files with 162 additions and 123 deletions

View File

@ -108,7 +108,7 @@ pub struct Gpu {
}
#[skip_serializing_none]
#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default, PartialEq, Eq)]
#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, Eq)]
pub struct ClocksConfiguration {
pub min_core_clock: Option<i32>,
pub min_memory_clock: Option<i32>,
@ -116,6 +116,10 @@ pub struct ClocksConfiguration {
pub max_core_clock: Option<i32>,
pub max_memory_clock: Option<i32>,
pub max_voltage: Option<i32>,
#[serde(default, skip_serializing_if = "IndexMap::is_empty")]
pub gpu_clock_offsets: IndexMap<u32, i32>,
#[serde(default, skip_serializing_if = "IndexMap::is_empty")]
pub mem_clock_offsets: IndexMap<u32, i32>,
pub voltage_offset: Option<i32>,
}
@ -135,6 +139,22 @@ impl Gpu {
ClockspeedType::MinMemoryClock => clocks.min_memory_clock = value,
ClockspeedType::MinVoltage => clocks.min_voltage = value,
ClockspeedType::VoltageOffset => clocks.voltage_offset = value,
ClockspeedType::GpuClockOffset(pstate) => match value {
Some(value) => {
clocks.gpu_clock_offsets.insert(pstate, value);
}
None => {
clocks.gpu_clock_offsets.shift_remove(&pstate);
}
},
ClockspeedType::MemClockOffset(pstate) => match value {
Some(value) => {
clocks.mem_clock_offsets.insert(pstate, value);
}
None => {
clocks.mem_clock_offsets.shift_remove(&pstate);
}
},
ClockspeedType::Reset => {
*clocks = ClocksConfiguration::default();
assert!(!self.is_core_clocks_used());
@ -234,6 +254,23 @@ impl Config {
}
}
}
2 => {
for (id, gpu) in &mut self.gpus {
if id.starts_with(VENDOR_NVIDIA) {
gpu.clocks_configuration.max_core_clock = None;
gpu.clocks_configuration.max_memory_clock = None;
}
}
for profile in &mut self.profiles.values_mut() {
for (id, gpu) in &mut profile.gpus {
if id.starts_with(VENDOR_NVIDIA) {
gpu.clocks_configuration.max_core_clock = None;
gpu.clocks_configuration.max_memory_clock = None;
}
}
}
}
_ => break,
}
info!("migrated config version {} to {next_version}", self.version);

View File

@ -7,19 +7,19 @@ use super::{fan_control::FanCurve, CommonControllerInfo, FanControlHandle, GpuCo
use amdgpu_sysfs::{gpu_handle::power_profile_mode::PowerProfileModesTable, hw_mon::Temperature};
use anyhow::{anyhow, Context};
use futures::future::LocalBoxFuture;
use indexmap::IndexMap;
use lact_schema::{
ClocksInfo, ClocksTable, ClockspeedStats, DeviceInfo, DeviceStats, DrmInfo, DrmMemoryInfo,
FanControlMode, FanStats, IntelDrmInfo, LinkInfo, NvidiaClockInfo, NvidiaClocksTable, PmfwInfo,
PowerState, PowerStates, PowerStats, VoltageStats, VramStats,
FanControlMode, FanStats, IntelDrmInfo, LinkInfo, NvidiaClockOffset, NvidiaClocksTable,
PmfwInfo, PowerState, PowerStates, PowerStats, VoltageStats, VramStats,
};
use nvml_wrapper::{
bitmasks::device::ThrottleReasons,
enum_wrappers::device::{Brand, Clock, TemperatureSensor, TemperatureThreshold},
enums::device::DeviceArchitecture,
enum_wrappers::device::{Clock, PerformanceState, TemperatureSensor, TemperatureThreshold},
Device, Nvml,
};
use std::{
cell::{Cell, RefCell},
cell::RefCell,
collections::HashMap,
fmt::Write,
rc::Rc,
@ -33,8 +33,8 @@ pub struct NvidiaGpuController {
common: CommonControllerInfo,
fan_control_handle: RefCell<Option<FanControlHandle>>,
last_applied_gpc_offset: Cell<Option<i32>>,
last_applied_mem_offset: Cell<Option<i32>>,
// Store last applied offsets as a workaround when the driver doesn't tell us the current offset
last_applied_offsets: RefCell<HashMap<Clock, HashMap<PerformanceState, i32>>>,
}
impl NvidiaGpuController {
@ -50,8 +50,7 @@ impl NvidiaGpuController {
nvml,
common,
fan_control_handle: RefCell::new(None),
last_applied_gpc_offset: Cell::new(None),
last_applied_mem_offset: Cell::new(None),
last_applied_offsets: RefCell::new(HashMap::new()),
})
}
@ -243,20 +242,6 @@ impl NvidiaGpuController {
Ok(power_states)
}
// See https://github.com/ilya-zlobintsev/LACT/issues/418
fn vram_offset_ratio(&self) -> i32 {
let device = self.device();
if let (Ok(brand), Ok(architecture)) = (device.brand(), device.architecture()) {
let ratio = match (brand, architecture) {
(Brand::GeForce, DeviceArchitecture::Ada) => 2,
// TODO: check others
_ => 1,
};
return ratio;
}
1
}
}
impl GpuController for NvidiaGpuController {
@ -461,47 +446,46 @@ impl GpuController for NvidiaGpuController {
fn get_clocks_info(&self) -> anyhow::Result<ClocksInfo> {
let device = self.device();
let mut gpc = None;
let mut mem = None;
let mut gpu_offsets = IndexMap::new();
let mut mem_offsets = IndexMap::new();
// Negative offset values are not correctly reported by NVML, so we have to use the last known applied value
// instead of the actual read when an unreasonable value appears.
let supported_pstates = device.supported_performance_states()?;
if let Ok(max) = device.max_clock_info(Clock::Graphics) {
if let Ok(offset_range) = device.gpc_clk_min_max_vf_offset() {
if let Some(offset) = self
.last_applied_gpc_offset
.get()
.or_else(|| device.gpc_clk_vf_offset().ok())
{
gpc = Some(NvidiaClockInfo {
max: max as i32,
offset,
offset_ratio: 1,
offset_range,
});
let clock_types = [
(Clock::Graphics, &mut gpu_offsets),
(Clock::Memory, &mut mem_offsets),
];
for (clock_type, offsets) in clock_types {
for pstate in supported_pstates.iter().rev() {
if let Ok(offset) = device.clock_offset(clock_type, *pstate) {
let mut offset = NvidiaClockOffset {
current: offset.clock_offset_mhz,
min: offset.min_clock_offset_mhz,
max: offset.max_clock_offset_mhz,
};
// On some driver versions, the applied offset values are not reported.
// In these scenarios we must store them manually for reporting.
if offset.current == 0 {
if let Some(applied_offsets) =
self.last_applied_offsets.borrow().get(&clock_type)
{
if let Some(applied_offset) = applied_offsets.get(pstate) {
offset.current = *applied_offset;
}
}
}
offsets.insert(pstate.as_c(), offset);
}
}
}
if let Ok(max) = device.max_clock_info(Clock::Memory) {
if let Ok(offset_range) = device.mem_clk_min_max_vf_offset() {
if let Some(offset) = self
.last_applied_mem_offset
.get()
.or_else(|| device.mem_clk_vf_offset().ok())
{
mem = Some(NvidiaClockInfo {
max: max as i32,
offset,
offset_ratio: self.vram_offset_ratio(),
offset_range,
});
}
}
}
let table = NvidiaClocksTable { gpc, mem };
let table = NvidiaClocksTable {
gpu_offsets,
mem_offsets,
};
Ok(ClocksInfo {
table: Some(ClocksTable::Nvidia(table)),
@ -564,34 +548,38 @@ impl GpuController for NvidiaGpuController {
self.cleanup_clocks()?;
if let Some(max_gpu_clock) = config.clocks_configuration.max_core_clock {
let default_max_clock = device
.max_clock_info(Clock::Graphics)
.context("Could not read max graphics clock")?;
let offset = max_gpu_clock - default_max_clock as i32;
debug!(
"Using graphics clock offset {offset} (default max clock: {default_max_clock})"
);
for (pstate, offset) in &config.clocks_configuration.gpu_clock_offsets {
let pstate = PerformanceState::try_from(*pstate)
.map_err(|_| anyhow!("Invalid pstate '{pstate}'"))?;
debug!("applying offset {offset} for GPU pstate {pstate:?}");
device
.set_gpc_clk_vf_offset(offset)
.context("Could not set graphics clock offset")?;
.set_clock_offset(Clock::Graphics, pstate, *offset)
.with_context(|| {
format!("Could not set clock offset {offset} for GPU pstate {pstate:?}")
})?;
self.last_applied_gpc_offset.set(Some(offset));
self.last_applied_offsets
.borrow_mut()
.entry(Clock::Graphics)
.or_default()
.insert(pstate, *offset);
}
if let Some(max_mem_clock) = config.clocks_configuration.max_memory_clock {
let default_max_clock = device
.max_clock_info(Clock::Memory)
.context("Could not read max memory clock")?;
let offset = (max_mem_clock - default_max_clock as i32) * self.vram_offset_ratio();
debug!("Using mem clock offset {offset} (default max clock: {default_max_clock})");
for (pstate, offset) in &config.clocks_configuration.mem_clock_offsets {
let pstate = PerformanceState::try_from(*pstate)
.map_err(|_| anyhow!("Invalid pstate '{pstate}'"))?;
debug!("applying offset {offset} for VRAM pstate {pstate:?}");
device
.set_mem_clk_vf_offset(offset)
.context("Could not set memory clock offset")?;
.set_clock_offset(Clock::Memory, pstate, *offset)
.with_context(|| {
format!("Could not set clock offset {offset} for VRAM pstate {pstate:?}")
})?;
self.last_applied_mem_offset.set(Some(offset));
self.last_applied_offsets
.borrow_mut()
.entry(Clock::Memory)
.or_default()
.insert(pstate, *offset);
}
if config.fan_control_enabled {
@ -633,23 +621,33 @@ impl GpuController for NvidiaGpuController {
fn cleanup_clocks(&self) -> anyhow::Result<()> {
let device = self.device();
if let Ok(current_offset) = device.gpc_clk_vf_offset() {
if current_offset != 0 {
device
.set_gpc_clk_vf_offset(0)
.context("Could not reset graphics clock offset")?;
if let Ok(supported_pstates) = device.supported_performance_states() {
for pstate in supported_pstates {
for clock_type in [Clock::Graphics, Clock::Memory] {
if let Ok(current_offset) = device.clock_offset(clock_type, pstate) {
if current_offset.clock_offset_mhz != 0
|| self
.last_applied_offsets
.borrow()
.get(&clock_type)
.and_then(|applied_offsets| applied_offsets.get(&pstate))
.is_some_and(|offset| *offset != 0)
{
debug!("resetting clock offset for {clock_type:?} pstate {pstate:?}");
device
.set_clock_offset(clock_type, pstate, 0)
.with_context(|| {
format!("Could not reset {clock_type:?} pstate {pstate:?}")
})?;
}
}
self.last_applied_gpc_offset.set(None);
}
}
if let Ok(current_offset) = device.mem_clk_vf_offset() {
if current_offset != 0 {
device
.set_mem_clk_vf_offset(0)
.context("Could not reset memory clock offset")?;
self.last_applied_mem_offset.set(None);
if let Some(applied_offsets) =
self.last_applied_offsets.borrow_mut().get_mut(&clock_type)
{
applied_offsets.remove(&pstate);
}
}
}
}

View File

@ -12,7 +12,7 @@ use gtk::{
};
use lact_schema::{
request::{ClockspeedType, SetClocksCommand},
ClocksTable, IntelClocksTable, NvidiaClockInfo, NvidiaClocksTable,
ClocksTable, IntelClocksTable, NvidiaClockOffset, NvidiaClocksTable,
};
use relm4::{factory::FactoryHashMap, ComponentParts, ComponentSender, RelmWidgetExt};
@ -190,16 +190,16 @@ impl ClocksFrame {
}
fn set_nvidia_table(&mut self, table: NvidiaClocksTable) {
if let Some(gpc_info) = &table.gpc {
for (pstate, offset) in table.gpu_offsets {
self.clocks.insert(
ClockspeedType::MaxCoreClock,
nvidia_clock_offset_to_data(gpc_info),
ClockspeedType::GpuClockOffset(pstate),
nvidia_clock_offset_to_data(&offset),
);
}
if let Some(mem_info) = &table.mem {
for (pstate, offset) in table.mem_offsets {
self.clocks.insert(
ClockspeedType::MaxMemoryClock,
nvidia_clock_offset_to_data(mem_info),
ClockspeedType::MemClockOffset(pstate),
nvidia_clock_offset_to_data(&offset),
);
}
}
@ -241,10 +241,10 @@ impl ClocksFrame {
}
}
fn nvidia_clock_offset_to_data(clock_info: &NvidiaClockInfo) -> ClocksData {
fn nvidia_clock_offset_to_data(clock_info: &NvidiaClockOffset) -> ClocksData {
ClocksData {
current: clock_info.max + (clock_info.offset / clock_info.offset_ratio),
min: clock_info.max + clock_info.offset_range.0,
max: clock_info.max + clock_info.offset_range.1,
current: clock_info.current,
min: clock_info.min,
max: clock_info.max,
}
}

View File

@ -41,14 +41,16 @@ impl FactoryComponent for ClockAdjustmentRow {
gtk::Label {
set_width_request: 185,
set_xalign: 0.0,
set_label: match self.clock_type {
ClockspeedType::MaxCoreClock => "Maximum GPU Clock (MHz)",
ClockspeedType::MaxMemoryClock => "Maximum VRAM Clock (MHz)",
ClockspeedType::MaxVoltage => "Maximum GPU voltage (mV)",
ClockspeedType::MinCoreClock => "Minimum GPU Clock (MHz)",
ClockspeedType::MinMemoryClock => "Minimum VRAM Clock (MHz)",
ClockspeedType::MinVoltage => "Minimum GPU voltage (mV)",
ClockspeedType::VoltageOffset => "GPU voltage offset (mV)",
set_label: &match self.clock_type {
ClockspeedType::MaxCoreClock => "Maximum GPU Clock (MHz)".to_owned(),
ClockspeedType::MaxMemoryClock => "Maximum VRAM Clock (MHz)".to_owned(),
ClockspeedType::MaxVoltage => "Maximum GPU voltage (mV)".to_owned(),
ClockspeedType::MinCoreClock => "Minimum GPU Clock (MHz)".to_owned(),
ClockspeedType::MinMemoryClock => "Minimum VRAM Clock (MHz)".to_owned(),
ClockspeedType::MinVoltage => "Minimum GPU voltage (mV)".to_owned(),
ClockspeedType::VoltageOffset => "GPU voltage offset (mV)".to_owned(),
ClockspeedType::GpuClockOffset(pstate) => format!("GPU Clock offset at P-State {pstate} (MHz)"),
ClockspeedType::MemClockOffset(pstate) => format!("VRAM Clock offset at P-State {pstate} (MHz)"),
ClockspeedType::Reset => unreachable!(),
},
},

View File

@ -167,11 +167,12 @@ pub enum ClocksTable {
Intel(IntelClocksTable),
}
#[skip_serializing_none]
#[derive(Serialize, Deserialize, Default, Debug, Clone)]
pub struct NvidiaClocksTable {
pub gpc: Option<NvidiaClockInfo>,
pub mem: Option<NvidiaClockInfo>,
#[serde(default, skip_serializing_if = "IndexMap::is_empty")]
pub gpu_offsets: IndexMap<u32, NvidiaClockOffset>,
#[serde(default, skip_serializing_if = "IndexMap::is_empty")]
pub mem_offsets: IndexMap<u32, NvidiaClockOffset>,
}
/// Doc from `xe_gt_freq.c`
@ -188,11 +189,10 @@ pub struct IntelClocksTable {
}
#[derive(Serialize, Deserialize, Default, Debug, Clone)]
pub struct NvidiaClockInfo {
pub struct NvidiaClockOffset {
pub current: i32,
pub min: i32,
pub max: i32,
pub offset: i32,
pub offset_ratio: i32,
pub offset_range: (i32, i32),
}
impl From<AmdClocksTableGen> for ClocksInfo {

View File

@ -125,6 +125,8 @@ pub enum ClockspeedType {
MinMemoryClock,
MinVoltage,
VoltageOffset,
GpuClockOffset(u32),
MemClockOffset(u32),
Reset,
}