mirror of
https://github.com/ilya-zlobintsev/LACT.git
synced 2025-02-25 18:55:26 -06:00
feat: nvidia per-pstate clock offset
This commit is contained in:
parent
cb34adf4cb
commit
90a4b504ef
@ -108,7 +108,7 @@ pub struct Gpu {
|
||||
}
|
||||
|
||||
#[skip_serializing_none]
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default, PartialEq, Eq)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, Eq)]
|
||||
pub struct ClocksConfiguration {
|
||||
pub min_core_clock: Option<i32>,
|
||||
pub min_memory_clock: Option<i32>,
|
||||
@ -116,6 +116,10 @@ pub struct ClocksConfiguration {
|
||||
pub max_core_clock: Option<i32>,
|
||||
pub max_memory_clock: Option<i32>,
|
||||
pub max_voltage: Option<i32>,
|
||||
#[serde(default, skip_serializing_if = "IndexMap::is_empty")]
|
||||
pub gpu_clock_offsets: IndexMap<u32, i32>,
|
||||
#[serde(default, skip_serializing_if = "IndexMap::is_empty")]
|
||||
pub mem_clock_offsets: IndexMap<u32, i32>,
|
||||
pub voltage_offset: Option<i32>,
|
||||
}
|
||||
|
||||
@ -135,6 +139,22 @@ impl Gpu {
|
||||
ClockspeedType::MinMemoryClock => clocks.min_memory_clock = value,
|
||||
ClockspeedType::MinVoltage => clocks.min_voltage = value,
|
||||
ClockspeedType::VoltageOffset => clocks.voltage_offset = value,
|
||||
ClockspeedType::GpuClockOffset(pstate) => match value {
|
||||
Some(value) => {
|
||||
clocks.gpu_clock_offsets.insert(pstate, value);
|
||||
}
|
||||
None => {
|
||||
clocks.gpu_clock_offsets.shift_remove(&pstate);
|
||||
}
|
||||
},
|
||||
ClockspeedType::MemClockOffset(pstate) => match value {
|
||||
Some(value) => {
|
||||
clocks.mem_clock_offsets.insert(pstate, value);
|
||||
}
|
||||
None => {
|
||||
clocks.mem_clock_offsets.shift_remove(&pstate);
|
||||
}
|
||||
},
|
||||
ClockspeedType::Reset => {
|
||||
*clocks = ClocksConfiguration::default();
|
||||
assert!(!self.is_core_clocks_used());
|
||||
@ -234,6 +254,23 @@ impl Config {
|
||||
}
|
||||
}
|
||||
}
|
||||
2 => {
|
||||
for (id, gpu) in &mut self.gpus {
|
||||
if id.starts_with(VENDOR_NVIDIA) {
|
||||
gpu.clocks_configuration.max_core_clock = None;
|
||||
gpu.clocks_configuration.max_memory_clock = None;
|
||||
}
|
||||
}
|
||||
|
||||
for profile in &mut self.profiles.values_mut() {
|
||||
for (id, gpu) in &mut profile.gpus {
|
||||
if id.starts_with(VENDOR_NVIDIA) {
|
||||
gpu.clocks_configuration.max_core_clock = None;
|
||||
gpu.clocks_configuration.max_memory_clock = None;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
info!("migrated config version {} to {next_version}", self.version);
|
||||
|
@ -7,19 +7,19 @@ use super::{fan_control::FanCurve, CommonControllerInfo, FanControlHandle, GpuCo
|
||||
use amdgpu_sysfs::{gpu_handle::power_profile_mode::PowerProfileModesTable, hw_mon::Temperature};
|
||||
use anyhow::{anyhow, Context};
|
||||
use futures::future::LocalBoxFuture;
|
||||
use indexmap::IndexMap;
|
||||
use lact_schema::{
|
||||
ClocksInfo, ClocksTable, ClockspeedStats, DeviceInfo, DeviceStats, DrmInfo, DrmMemoryInfo,
|
||||
FanControlMode, FanStats, IntelDrmInfo, LinkInfo, NvidiaClockInfo, NvidiaClocksTable, PmfwInfo,
|
||||
PowerState, PowerStates, PowerStats, VoltageStats, VramStats,
|
||||
FanControlMode, FanStats, IntelDrmInfo, LinkInfo, NvidiaClockOffset, NvidiaClocksTable,
|
||||
PmfwInfo, PowerState, PowerStates, PowerStats, VoltageStats, VramStats,
|
||||
};
|
||||
use nvml_wrapper::{
|
||||
bitmasks::device::ThrottleReasons,
|
||||
enum_wrappers::device::{Brand, Clock, TemperatureSensor, TemperatureThreshold},
|
||||
enums::device::DeviceArchitecture,
|
||||
enum_wrappers::device::{Clock, PerformanceState, TemperatureSensor, TemperatureThreshold},
|
||||
Device, Nvml,
|
||||
};
|
||||
use std::{
|
||||
cell::{Cell, RefCell},
|
||||
cell::RefCell,
|
||||
collections::HashMap,
|
||||
fmt::Write,
|
||||
rc::Rc,
|
||||
@ -33,8 +33,8 @@ pub struct NvidiaGpuController {
|
||||
common: CommonControllerInfo,
|
||||
fan_control_handle: RefCell<Option<FanControlHandle>>,
|
||||
|
||||
last_applied_gpc_offset: Cell<Option<i32>>,
|
||||
last_applied_mem_offset: Cell<Option<i32>>,
|
||||
// Store last applied offsets as a workaround when the driver doesn't tell us the current offset
|
||||
last_applied_offsets: RefCell<HashMap<Clock, HashMap<PerformanceState, i32>>>,
|
||||
}
|
||||
|
||||
impl NvidiaGpuController {
|
||||
@ -50,8 +50,7 @@ impl NvidiaGpuController {
|
||||
nvml,
|
||||
common,
|
||||
fan_control_handle: RefCell::new(None),
|
||||
last_applied_gpc_offset: Cell::new(None),
|
||||
last_applied_mem_offset: Cell::new(None),
|
||||
last_applied_offsets: RefCell::new(HashMap::new()),
|
||||
})
|
||||
}
|
||||
|
||||
@ -243,20 +242,6 @@ impl NvidiaGpuController {
|
||||
|
||||
Ok(power_states)
|
||||
}
|
||||
|
||||
// See https://github.com/ilya-zlobintsev/LACT/issues/418
|
||||
fn vram_offset_ratio(&self) -> i32 {
|
||||
let device = self.device();
|
||||
if let (Ok(brand), Ok(architecture)) = (device.brand(), device.architecture()) {
|
||||
let ratio = match (brand, architecture) {
|
||||
(Brand::GeForce, DeviceArchitecture::Ada) => 2,
|
||||
// TODO: check others
|
||||
_ => 1,
|
||||
};
|
||||
return ratio;
|
||||
}
|
||||
1
|
||||
}
|
||||
}
|
||||
|
||||
impl GpuController for NvidiaGpuController {
|
||||
@ -461,47 +446,46 @@ impl GpuController for NvidiaGpuController {
|
||||
fn get_clocks_info(&self) -> anyhow::Result<ClocksInfo> {
|
||||
let device = self.device();
|
||||
|
||||
let mut gpc = None;
|
||||
let mut mem = None;
|
||||
let mut gpu_offsets = IndexMap::new();
|
||||
let mut mem_offsets = IndexMap::new();
|
||||
|
||||
// Negative offset values are not correctly reported by NVML, so we have to use the last known applied value
|
||||
// instead of the actual read when an unreasonable value appears.
|
||||
let supported_pstates = device.supported_performance_states()?;
|
||||
|
||||
if let Ok(max) = device.max_clock_info(Clock::Graphics) {
|
||||
if let Ok(offset_range) = device.gpc_clk_min_max_vf_offset() {
|
||||
if let Some(offset) = self
|
||||
.last_applied_gpc_offset
|
||||
.get()
|
||||
.or_else(|| device.gpc_clk_vf_offset().ok())
|
||||
{
|
||||
gpc = Some(NvidiaClockInfo {
|
||||
max: max as i32,
|
||||
offset,
|
||||
offset_ratio: 1,
|
||||
offset_range,
|
||||
});
|
||||
let clock_types = [
|
||||
(Clock::Graphics, &mut gpu_offsets),
|
||||
(Clock::Memory, &mut mem_offsets),
|
||||
];
|
||||
|
||||
for (clock_type, offsets) in clock_types {
|
||||
for pstate in supported_pstates.iter().rev() {
|
||||
if let Ok(offset) = device.clock_offset(clock_type, *pstate) {
|
||||
let mut offset = NvidiaClockOffset {
|
||||
current: offset.clock_offset_mhz,
|
||||
min: offset.min_clock_offset_mhz,
|
||||
max: offset.max_clock_offset_mhz,
|
||||
};
|
||||
|
||||
// On some driver versions, the applied offset values are not reported.
|
||||
// In these scenarios we must store them manually for reporting.
|
||||
if offset.current == 0 {
|
||||
if let Some(applied_offsets) =
|
||||
self.last_applied_offsets.borrow().get(&clock_type)
|
||||
{
|
||||
if let Some(applied_offset) = applied_offsets.get(pstate) {
|
||||
offset.current = *applied_offset;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
offsets.insert(pstate.as_c(), offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Ok(max) = device.max_clock_info(Clock::Memory) {
|
||||
if let Ok(offset_range) = device.mem_clk_min_max_vf_offset() {
|
||||
if let Some(offset) = self
|
||||
.last_applied_mem_offset
|
||||
.get()
|
||||
.or_else(|| device.mem_clk_vf_offset().ok())
|
||||
{
|
||||
mem = Some(NvidiaClockInfo {
|
||||
max: max as i32,
|
||||
offset,
|
||||
offset_ratio: self.vram_offset_ratio(),
|
||||
offset_range,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let table = NvidiaClocksTable { gpc, mem };
|
||||
let table = NvidiaClocksTable {
|
||||
gpu_offsets,
|
||||
mem_offsets,
|
||||
};
|
||||
|
||||
Ok(ClocksInfo {
|
||||
table: Some(ClocksTable::Nvidia(table)),
|
||||
@ -564,34 +548,38 @@ impl GpuController for NvidiaGpuController {
|
||||
|
||||
self.cleanup_clocks()?;
|
||||
|
||||
if let Some(max_gpu_clock) = config.clocks_configuration.max_core_clock {
|
||||
let default_max_clock = device
|
||||
.max_clock_info(Clock::Graphics)
|
||||
.context("Could not read max graphics clock")?;
|
||||
let offset = max_gpu_clock - default_max_clock as i32;
|
||||
debug!(
|
||||
"Using graphics clock offset {offset} (default max clock: {default_max_clock})"
|
||||
);
|
||||
|
||||
for (pstate, offset) in &config.clocks_configuration.gpu_clock_offsets {
|
||||
let pstate = PerformanceState::try_from(*pstate)
|
||||
.map_err(|_| anyhow!("Invalid pstate '{pstate}'"))?;
|
||||
debug!("applying offset {offset} for GPU pstate {pstate:?}");
|
||||
device
|
||||
.set_gpc_clk_vf_offset(offset)
|
||||
.context("Could not set graphics clock offset")?;
|
||||
.set_clock_offset(Clock::Graphics, pstate, *offset)
|
||||
.with_context(|| {
|
||||
format!("Could not set clock offset {offset} for GPU pstate {pstate:?}")
|
||||
})?;
|
||||
|
||||
self.last_applied_gpc_offset.set(Some(offset));
|
||||
self.last_applied_offsets
|
||||
.borrow_mut()
|
||||
.entry(Clock::Graphics)
|
||||
.or_default()
|
||||
.insert(pstate, *offset);
|
||||
}
|
||||
|
||||
if let Some(max_mem_clock) = config.clocks_configuration.max_memory_clock {
|
||||
let default_max_clock = device
|
||||
.max_clock_info(Clock::Memory)
|
||||
.context("Could not read max memory clock")?;
|
||||
let offset = (max_mem_clock - default_max_clock as i32) * self.vram_offset_ratio();
|
||||
debug!("Using mem clock offset {offset} (default max clock: {default_max_clock})");
|
||||
|
||||
for (pstate, offset) in &config.clocks_configuration.mem_clock_offsets {
|
||||
let pstate = PerformanceState::try_from(*pstate)
|
||||
.map_err(|_| anyhow!("Invalid pstate '{pstate}'"))?;
|
||||
debug!("applying offset {offset} for VRAM pstate {pstate:?}");
|
||||
device
|
||||
.set_mem_clk_vf_offset(offset)
|
||||
.context("Could not set memory clock offset")?;
|
||||
.set_clock_offset(Clock::Memory, pstate, *offset)
|
||||
.with_context(|| {
|
||||
format!("Could not set clock offset {offset} for VRAM pstate {pstate:?}")
|
||||
})?;
|
||||
|
||||
self.last_applied_mem_offset.set(Some(offset));
|
||||
self.last_applied_offsets
|
||||
.borrow_mut()
|
||||
.entry(Clock::Memory)
|
||||
.or_default()
|
||||
.insert(pstate, *offset);
|
||||
}
|
||||
|
||||
if config.fan_control_enabled {
|
||||
@ -633,23 +621,33 @@ impl GpuController for NvidiaGpuController {
|
||||
fn cleanup_clocks(&self) -> anyhow::Result<()> {
|
||||
let device = self.device();
|
||||
|
||||
if let Ok(current_offset) = device.gpc_clk_vf_offset() {
|
||||
if current_offset != 0 {
|
||||
device
|
||||
.set_gpc_clk_vf_offset(0)
|
||||
.context("Could not reset graphics clock offset")?;
|
||||
if let Ok(supported_pstates) = device.supported_performance_states() {
|
||||
for pstate in supported_pstates {
|
||||
for clock_type in [Clock::Graphics, Clock::Memory] {
|
||||
if let Ok(current_offset) = device.clock_offset(clock_type, pstate) {
|
||||
if current_offset.clock_offset_mhz != 0
|
||||
|| self
|
||||
.last_applied_offsets
|
||||
.borrow()
|
||||
.get(&clock_type)
|
||||
.and_then(|applied_offsets| applied_offsets.get(&pstate))
|
||||
.is_some_and(|offset| *offset != 0)
|
||||
{
|
||||
debug!("resetting clock offset for {clock_type:?} pstate {pstate:?}");
|
||||
device
|
||||
.set_clock_offset(clock_type, pstate, 0)
|
||||
.with_context(|| {
|
||||
format!("Could not reset {clock_type:?} pstate {pstate:?}")
|
||||
})?;
|
||||
}
|
||||
}
|
||||
|
||||
self.last_applied_gpc_offset.set(None);
|
||||
}
|
||||
}
|
||||
|
||||
if let Ok(current_offset) = device.mem_clk_vf_offset() {
|
||||
if current_offset != 0 {
|
||||
device
|
||||
.set_mem_clk_vf_offset(0)
|
||||
.context("Could not reset memory clock offset")?;
|
||||
|
||||
self.last_applied_mem_offset.set(None);
|
||||
if let Some(applied_offsets) =
|
||||
self.last_applied_offsets.borrow_mut().get_mut(&clock_type)
|
||||
{
|
||||
applied_offsets.remove(&pstate);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -12,7 +12,7 @@ use gtk::{
|
||||
};
|
||||
use lact_schema::{
|
||||
request::{ClockspeedType, SetClocksCommand},
|
||||
ClocksTable, IntelClocksTable, NvidiaClockInfo, NvidiaClocksTable,
|
||||
ClocksTable, IntelClocksTable, NvidiaClockOffset, NvidiaClocksTable,
|
||||
};
|
||||
use relm4::{factory::FactoryHashMap, ComponentParts, ComponentSender, RelmWidgetExt};
|
||||
|
||||
@ -190,16 +190,16 @@ impl ClocksFrame {
|
||||
}
|
||||
|
||||
fn set_nvidia_table(&mut self, table: NvidiaClocksTable) {
|
||||
if let Some(gpc_info) = &table.gpc {
|
||||
for (pstate, offset) in table.gpu_offsets {
|
||||
self.clocks.insert(
|
||||
ClockspeedType::MaxCoreClock,
|
||||
nvidia_clock_offset_to_data(gpc_info),
|
||||
ClockspeedType::GpuClockOffset(pstate),
|
||||
nvidia_clock_offset_to_data(&offset),
|
||||
);
|
||||
}
|
||||
if let Some(mem_info) = &table.mem {
|
||||
for (pstate, offset) in table.mem_offsets {
|
||||
self.clocks.insert(
|
||||
ClockspeedType::MaxMemoryClock,
|
||||
nvidia_clock_offset_to_data(mem_info),
|
||||
ClockspeedType::MemClockOffset(pstate),
|
||||
nvidia_clock_offset_to_data(&offset),
|
||||
);
|
||||
}
|
||||
}
|
||||
@ -241,10 +241,10 @@ impl ClocksFrame {
|
||||
}
|
||||
}
|
||||
|
||||
fn nvidia_clock_offset_to_data(clock_info: &NvidiaClockInfo) -> ClocksData {
|
||||
fn nvidia_clock_offset_to_data(clock_info: &NvidiaClockOffset) -> ClocksData {
|
||||
ClocksData {
|
||||
current: clock_info.max + (clock_info.offset / clock_info.offset_ratio),
|
||||
min: clock_info.max + clock_info.offset_range.0,
|
||||
max: clock_info.max + clock_info.offset_range.1,
|
||||
current: clock_info.current,
|
||||
min: clock_info.min,
|
||||
max: clock_info.max,
|
||||
}
|
||||
}
|
||||
|
@ -41,14 +41,16 @@ impl FactoryComponent for ClockAdjustmentRow {
|
||||
gtk::Label {
|
||||
set_width_request: 185,
|
||||
set_xalign: 0.0,
|
||||
set_label: match self.clock_type {
|
||||
ClockspeedType::MaxCoreClock => "Maximum GPU Clock (MHz)",
|
||||
ClockspeedType::MaxMemoryClock => "Maximum VRAM Clock (MHz)",
|
||||
ClockspeedType::MaxVoltage => "Maximum GPU voltage (mV)",
|
||||
ClockspeedType::MinCoreClock => "Minimum GPU Clock (MHz)",
|
||||
ClockspeedType::MinMemoryClock => "Minimum VRAM Clock (MHz)",
|
||||
ClockspeedType::MinVoltage => "Minimum GPU voltage (mV)",
|
||||
ClockspeedType::VoltageOffset => "GPU voltage offset (mV)",
|
||||
set_label: &match self.clock_type {
|
||||
ClockspeedType::MaxCoreClock => "Maximum GPU Clock (MHz)".to_owned(),
|
||||
ClockspeedType::MaxMemoryClock => "Maximum VRAM Clock (MHz)".to_owned(),
|
||||
ClockspeedType::MaxVoltage => "Maximum GPU voltage (mV)".to_owned(),
|
||||
ClockspeedType::MinCoreClock => "Minimum GPU Clock (MHz)".to_owned(),
|
||||
ClockspeedType::MinMemoryClock => "Minimum VRAM Clock (MHz)".to_owned(),
|
||||
ClockspeedType::MinVoltage => "Minimum GPU voltage (mV)".to_owned(),
|
||||
ClockspeedType::VoltageOffset => "GPU voltage offset (mV)".to_owned(),
|
||||
ClockspeedType::GpuClockOffset(pstate) => format!("GPU Clock offset at P-State {pstate} (MHz)"),
|
||||
ClockspeedType::MemClockOffset(pstate) => format!("VRAM Clock offset at P-State {pstate} (MHz)"),
|
||||
ClockspeedType::Reset => unreachable!(),
|
||||
},
|
||||
},
|
||||
|
@ -167,11 +167,12 @@ pub enum ClocksTable {
|
||||
Intel(IntelClocksTable),
|
||||
}
|
||||
|
||||
#[skip_serializing_none]
|
||||
#[derive(Serialize, Deserialize, Default, Debug, Clone)]
|
||||
pub struct NvidiaClocksTable {
|
||||
pub gpc: Option<NvidiaClockInfo>,
|
||||
pub mem: Option<NvidiaClockInfo>,
|
||||
#[serde(default, skip_serializing_if = "IndexMap::is_empty")]
|
||||
pub gpu_offsets: IndexMap<u32, NvidiaClockOffset>,
|
||||
#[serde(default, skip_serializing_if = "IndexMap::is_empty")]
|
||||
pub mem_offsets: IndexMap<u32, NvidiaClockOffset>,
|
||||
}
|
||||
|
||||
/// Doc from `xe_gt_freq.c`
|
||||
@ -188,11 +189,10 @@ pub struct IntelClocksTable {
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Default, Debug, Clone)]
|
||||
pub struct NvidiaClockInfo {
|
||||
pub struct NvidiaClockOffset {
|
||||
pub current: i32,
|
||||
pub min: i32,
|
||||
pub max: i32,
|
||||
pub offset: i32,
|
||||
pub offset_ratio: i32,
|
||||
pub offset_range: (i32, i32),
|
||||
}
|
||||
|
||||
impl From<AmdClocksTableGen> for ClocksInfo {
|
||||
|
@ -125,6 +125,8 @@ pub enum ClockspeedType {
|
||||
MinMemoryClock,
|
||||
MinVoltage,
|
||||
VoltageOffset,
|
||||
GpuClockOffset(u32),
|
||||
MemClockOffset(u32),
|
||||
Reset,
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user