feat: implement clocks control on Nvidia (#398)

* feat: initial support for clocks control on nvidia

* feat: store last applied offset, use it for reporting when needed

* chore: remove unused file
This commit is contained in:
Ilya Zlobintsev 2024-11-01 23:28:57 +02:00 committed by GitHub
parent d0f593e655
commit bc594da95b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 221 additions and 59 deletions

6
Cargo.lock generated
View File

@ -1651,7 +1651,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4"
dependencies = [
"cfg-if",
"windows-targets 0.48.5",
"windows-targets 0.52.6",
]
[[package]]
@ -1829,7 +1829,7 @@ dependencies = [
[[package]]
name = "nvml-wrapper"
version = "0.10.0"
source = "git+https://github.com/ilya-zlobintsev/nvml-wrapper?branch=lact#8e9f9c5738e167d2ef1e776ac86f388f23ee6d12"
source = "git+https://github.com/ilya-zlobintsev/nvml-wrapper?branch=lact#2ca2a6ea5dcd9270677722520cdb32537b79cd2d"
dependencies = [
"bitflags 2.6.0",
"libloading 0.8.5",
@ -1842,7 +1842,7 @@ dependencies = [
[[package]]
name = "nvml-wrapper-sys"
version = "0.8.0"
source = "git+https://github.com/ilya-zlobintsev/nvml-wrapper?branch=lact#8e9f9c5738e167d2ef1e776ac86f388f23ee6d12"
source = "git+https://github.com/ilya-zlobintsev/nvml-wrapper?branch=lact#2ca2a6ea5dcd9270677722520cdb32537b79cd2d"
dependencies = [
"libloading 0.8.5",
]

View File

@ -11,13 +11,13 @@ use amdgpu_sysfs::{
use anyhow::{anyhow, Context};
use futures::future::LocalBoxFuture;
use lact_schema::{
ClocksInfo, ClockspeedStats, DeviceInfo, DeviceStats, DrmInfo, DrmMemoryInfo, FanControlMode,
FanStats, GpuPciInfo, LinkInfo, PmfwInfo, PowerState, PowerStates, PowerStats, VoltageStats,
VramStats,
ClocksInfo, ClocksTable, ClockspeedStats, DeviceInfo, DeviceStats, DrmInfo, DrmMemoryInfo,
FanControlMode, FanStats, GpuPciInfo, LinkInfo, NvidiaClockInfo, NvidiaClocksTable, PmfwInfo,
PowerState, PowerStates, PowerStats, VoltageStats, VramStats,
};
use nvml_wrapper::{
bitmasks::device::ThrottleReasons,
enum_wrappers::device::{Clock, ClockType, TemperatureSensor, TemperatureThreshold},
enum_wrappers::device::{Clock, TemperatureSensor, TemperatureThreshold},
Device, Nvml,
};
use std::{
@ -27,6 +27,7 @@ use std::{
fmt::Write,
path::{Path, PathBuf},
rc::Rc,
sync::atomic::{AtomicI32, Ordering},
time::{Duration, Instant},
};
use tokio::{select, sync::Notify, time::sleep};
@ -38,9 +39,29 @@ pub struct NvidiaGpuController {
pub pci_info: GpuPciInfo,
pub sysfs_path: PathBuf,
pub fan_control_handle: RefCell<Option<FanControlHandle>>,
last_applied_gpc_offset: Rc<AtomicI32>,
last_applied_mem_offset: Rc<AtomicI32>,
}
impl NvidiaGpuController {
pub fn new(
nvml: Rc<Nvml>,
pci_slot_id: String,
pci_info: GpuPciInfo,
sysfs_path: PathBuf,
) -> Self {
Self {
nvml,
pci_slot_id,
pci_info,
sysfs_path,
fan_control_handle: RefCell::new(None),
last_applied_gpc_offset: Rc::new(AtomicI32::new(0)),
last_applied_mem_offset: Rc::new(AtomicI32::new(0)),
}
}
fn device(&self) -> Device<'_> {
self.nvml
.device_by_pci_bus_id(self.pci_slot_id.as_str())
@ -195,7 +216,7 @@ impl NvidiaGpuController {
for pstate in supported_states {
let (gpu_min, gpu_max) = device
.min_max_clock_of_pstate(ClockType::Graphics, pstate)
.min_max_clock_of_pstate(Clock::Graphics, pstate)
.context("Could not read GPU pstates")?;
power_states.core.push(PowerState {
@ -211,7 +232,7 @@ impl NvidiaGpuController {
});
let (mem_min, mem_max) = device
.min_max_clock_of_pstate(ClockType::Mem, pstate)
.min_max_clock_of_pstate(Clock::Memory, pstate)
.context("Could not read memory pstates")?;
power_states.vram.push(PowerState {
@ -456,8 +477,54 @@ impl GpuController for NvidiaGpuController {
}
}
#[allow(clippy::cast_possible_wrap)]
fn get_clocks_info(&self) -> anyhow::Result<ClocksInfo> {
Ok(ClocksInfo::default())
let device = self.device();
let mut gpc = None;
let mut mem = None;
// Negative offset values are not correctly reported by NVML, so we have to use the last known applied value
// instead of the actual read when an unreasonable value appears.
if let Ok(max) = device.max_clock_info(Clock::Graphics) {
if let Ok(offset_range) = device.gpc_clk_min_max_vf_offset() {
if let Ok(mut offset) = device.gpc_clk_vf_offset() {
if !(offset_range.0..offset_range.1).contains(&offset) {
offset = self.last_applied_gpc_offset.load(Ordering::SeqCst);
}
gpc = Some(NvidiaClockInfo {
max: max as i32,
offset,
offset_range,
});
}
}
}
if let Ok(max) = device.max_clock_info(Clock::Memory) {
if let Ok(offset_range) = device.mem_clk_min_max_vf_offset() {
if let Ok(mut offset) = device.mem_clk_vf_offset() {
if !(offset_range.0..offset_range.1).contains(&offset) {
offset = self.last_applied_mem_offset.load(Ordering::SeqCst);
}
mem = Some(NvidiaClockInfo {
max: max as i32,
offset,
offset_range,
});
}
}
}
let table = NvidiaClocksTable { gpc, mem };
Ok(ClocksInfo {
table: Some(ClocksTable::Nvidia(table)),
..Default::default()
})
}
fn get_power_states(&self, _gpu_config: Option<&config::Gpu>) -> PowerStates {
@ -477,6 +544,7 @@ impl GpuController for NvidiaGpuController {
Err(anyhow!("Not supported on Nvidia"))
}
#[allow(clippy::cast_possible_wrap)]
fn apply_config<'a>(
&'a self,
config: &'a config::Gpu,
@ -512,6 +580,36 @@ impl GpuController for NvidiaGpuController {
}
}
self.cleanup_clocks()?;
if let Some(max_gpu_clock) = config.clocks_configuration.max_core_clock {
let default_max_clock = device
.max_clock_info(Clock::Graphics)
.context("Could not read max graphics clock")?;
let offset = max_gpu_clock - default_max_clock as i32;
debug!("Using graphics clock offset {offset}");
device
.set_gpc_clk_vf_offset(offset)
.context("Could not set graphics clock offset")?;
self.last_applied_gpc_offset.store(offset, Ordering::SeqCst);
}
if let Some(max_mem_clock) = config.clocks_configuration.max_memory_clock {
let default_max_clock = device
.max_clock_info(Clock::Memory)
.context("Could not read max memory clock")?;
let offset = max_mem_clock - default_max_clock as i32;
debug!("Using mem clock offset {offset}");
device
.set_mem_clk_vf_offset(offset)
.context("Could not set memory clock offset")?;
self.last_applied_mem_offset.store(offset, Ordering::SeqCst);
}
if config.fan_control_enabled {
let settings = config
.fan_control_settings
@ -549,6 +647,28 @@ impl GpuController for NvidiaGpuController {
}
fn cleanup_clocks(&self) -> anyhow::Result<()> {
let device = self.device();
if let Ok(current_offset) = device.gpc_clk_vf_offset() {
if current_offset != 0 {
device
.set_gpc_clk_vf_offset(0)
.context("Could not reset graphics clock offset")?;
self.last_applied_gpc_offset.store(0, Ordering::SeqCst);
}
}
if let Ok(current_offset) = device.mem_clk_vf_offset() {
if current_offset != 0 {
device
.set_mem_clk_vf_offset(0)
.context("Could not reset memory clock offset")?;
self.last_applied_mem_offset.store(0, Ordering::SeqCst);
}
}
Ok(())
}
}

View File

@ -797,15 +797,14 @@ fn load_controllers() -> anyhow::Result<BTreeMap<String, Box<dyn GpuController>>
if let Some(pci_slot_id) = controller.get_pci_slot_name() {
match nvml.device_by_pci_bus_id(pci_slot_id.as_str()) {
Ok(_) => {
let controller = NvidiaGpuController {
let controller = NvidiaGpuController::new(
nvml,
pci_slot_id,
pci_info: controller.get_pci_info().expect(
controller.get_pci_info().expect(
"Initialized NVML device without PCI info somehow",
).clone(),
sysfs_path: path.to_owned(),
fan_control_handle: RefCell::default(),
};
path.to_owned(),
);
match controller.get_id() {
Ok(id) => {
info!("initialized Nvidia GPU controller {id} for path {path:?}");

View File

@ -3,10 +3,11 @@ mod adjustment_row;
use crate::app::page_section::PageSection;
use crate::app::root_stack::oc_adjustment::OcAdjustment;
use adjustment_row::AdjustmentRow;
use amdgpu_sysfs::gpu_handle::overdrive::{ClocksTable, ClocksTableGen};
use amdgpu_sysfs::gpu_handle::overdrive::{ClocksTable as _, ClocksTableGen as AmdClocksTable};
use glib::clone;
use gtk::prelude::*;
use gtk::*;
use lact_schema::{ClocksTable, NvidiaClockInfo, NvidiaClocksTable};
use subclass::prelude::ObjectSubclassIsExt;
use tracing::debug;
@ -144,11 +145,38 @@ impl ClocksFrame {
frame
}
pub fn set_table(&self, table: ClocksTableGen) -> anyhow::Result<()> {
pub fn set_table(&self, table: ClocksTable) -> anyhow::Result<()> {
debug!("using clocks table {table:?}");
let adjustments = [
&self.min_sclk_adjustment,
&self.min_mclk_adjustment,
&self.min_voltage_adjustment,
&self.max_sclk_adjustment,
&self.max_mclk_adjustment,
&self.max_voltage_adjustment,
&self.voltage_offset_adjustment,
];
for adjustment in adjustments {
adjustment.set_visible(false);
}
match table {
ClocksTable::Amd(table) => self.set_amd_table(table),
ClocksTable::Nvidia(table) => self.set_nvidia_table(table),
}
for adjustment in adjustments {
adjustment.refresh();
}
Ok(())
}
fn set_amd_table(&self, table: AmdClocksTable) {
if let Some((current_sclk_min, sclk_min, sclk_max)) =
extract_value_and_range(&table, |table| {
extract_value_and_range_amd(&table, |table| {
(
table.get_current_sclk_range().min,
table.get_min_sclk_range(),
@ -161,12 +189,10 @@ impl ClocksFrame {
min_sclk_adjustment.set_initial_value(current_sclk_min.into());
self.min_sclk_adjustment.set_visible(true);
} else {
self.min_sclk_adjustment.set_visible(false);
}
if let Some((current_mclk_min, mclk_min, mclk_max)) =
extract_value_and_range(&table, |table| {
extract_value_and_range_amd(&table, |table| {
(
table.get_current_mclk_range().min,
table.get_min_mclk_range(),
@ -179,12 +205,10 @@ impl ClocksFrame {
min_mclk_adjustment.set_initial_value(current_mclk_min.into());
self.min_mclk_adjustment.set_visible(true);
} else {
self.min_mclk_adjustment.set_visible(false);
}
if let Some((current_min_voltage, voltage_min, voltage_max)) =
extract_value_and_range(&table, |table| {
extract_value_and_range_amd(&table, |table| {
(
table
.get_current_voltage_range()
@ -200,12 +224,10 @@ impl ClocksFrame {
min_voltage_adjustment.set_value(current_min_voltage.into());
self.min_voltage_adjustment.set_visible(true);
} else {
self.min_voltage_adjustment.set_visible(false);
}
if let Some((current_sclk_max, sclk_min, sclk_max)) =
extract_value_and_range(&table, |table| {
extract_value_and_range_amd(&table, |table| {
(table.get_max_sclk(), table.get_max_sclk_range())
})
{
@ -216,12 +238,10 @@ impl ClocksFrame {
max_sclk_adjustment.set_value(current_sclk_max.into());
self.max_sclk_adjustment.set_visible(true);
} else {
self.max_sclk_adjustment.set_visible(false);
}
if let Some((current_mclk_max, mclk_min, mclk_max)) =
extract_value_and_range(&table, |table| {
extract_value_and_range_amd(&table, |table| {
(table.get_max_mclk(), table.get_max_mclk_range())
})
{
@ -231,12 +251,10 @@ impl ClocksFrame {
max_mclk_adjustment.set_value(current_mclk_max.into());
self.max_mclk_adjustment.set_visible(true);
} else {
self.max_mclk_adjustment.set_visible(false);
}
if let Some((current_voltage_max, voltage_min, voltage_max)) =
extract_value_and_range(&table, |table| {
extract_value_and_range_amd(&table, |table| {
(table.get_max_sclk_voltage(), table.get_max_voltage_range())
})
{
@ -246,11 +264,9 @@ impl ClocksFrame {
max_voltage_adjustment.set_value(current_voltage_max.into());
self.max_voltage_adjustment.set_visible(true);
} else {
self.max_voltage_adjustment.set_visible(false);
}
if let ClocksTableGen::Vega20(table) = table {
if let AmdClocksTable::Vega20(table) = table {
if let Some(offset) = table.voltage_offset {
let (min_offset, max_offset) = table
.od_range
@ -264,22 +280,17 @@ impl ClocksFrame {
voltage_offset_adjustment.set_value(offset.into());
self.voltage_offset_adjustment.set_visible(true);
} else {
self.voltage_offset_adjustment.set_visible(false);
}
} else {
self.voltage_offset_adjustment.set_visible(false);
}
}
self.min_sclk_adjustment.refresh();
self.min_mclk_adjustment.refresh();
self.min_voltage_adjustment.refresh();
self.max_sclk_adjustment.refresh();
self.max_mclk_adjustment.refresh();
self.max_voltage_adjustment.refresh();
self.voltage_offset_adjustment.refresh();
Ok(())
fn set_nvidia_table(&self, table: NvidiaClocksTable) {
if let Some(gpc_info) = &table.gpc {
set_nvidia_clock_offset(gpc_info, &self.max_sclk_adjustment);
}
if let Some(mem_info) = &table.mem {
set_nvidia_clock_offset(mem_info, &self.max_mclk_adjustment);
}
}
pub fn show(&self) {
@ -379,10 +390,10 @@ impl ClocksFrame {
}
}
fn extract_value_and_range(
table: &ClocksTableGen,
fn extract_value_and_range_amd(
table: &AmdClocksTable,
f: fn(
&ClocksTableGen,
&AmdClocksTable,
) -> (
Option<i32>,
Option<amdgpu_sysfs::gpu_handle::overdrive::Range>,
@ -404,3 +415,12 @@ pub struct ClocksSettings {
pub max_voltage: Option<i32>,
pub voltage_offset: Option<i32>,
}
fn set_nvidia_clock_offset(clock_info: &NvidiaClockInfo, adjustment_row: &AdjustmentRow) {
let oc_adjustment = &adjustment_row.imp().adjustment;
oc_adjustment.set_lower((clock_info.max + clock_info.offset_range.0) as f64);
oc_adjustment.set_upper((clock_info.max + clock_info.offset_range.1) as f64);
oc_adjustment.set_value((clock_info.max + clock_info.offset) as f64);
adjustment_row.set_visible(true);
}

View File

@ -7,12 +7,13 @@ mod power_states;
use self::power_cap_section::PowerCapSection;
use self::power_states::power_states_frame::PowerStatesFrame;
use amdgpu_sysfs::gpu_handle::{overdrive::ClocksTableGen, PerformanceLevel, PowerLevelKind};
use amdgpu_sysfs::gpu_handle::{PerformanceLevel, PowerLevelKind};
use clocks_frame::ClocksFrame;
use gpu_stats_section::GpuStatsSection;
use gtk::*;
use gtk::{glib::clone, prelude::*};
use lact_client::schema::{DeviceInfo, DeviceStats, SystemInfo};
use lact_schema::ClocksTable;
use performance_frame::PerformanceFrame;
// use power_cap_frame::PowerCapFrame;
use std::collections::HashMap;
@ -124,7 +125,7 @@ impl OcPage {
self.clocks_frame.set_vram_clock_ratio(vram_clock_ratio);
}
pub fn set_clocks_table(&self, table: Option<ClocksTableGen>) {
pub fn set_clocks_table(&self, table: Option<ClocksTable>) {
match table {
Some(table) => match self.clocks_frame.set_table(table) {
Ok(()) => {

View File

@ -12,7 +12,7 @@ pub use response::Response;
use amdgpu_sysfs::{
gpu_handle::{
fan_control::FanInfo,
overdrive::{ClocksTable, ClocksTableGen},
overdrive::{ClocksTable as _, ClocksTableGen as AmdClocksTableGen},
PerformanceLevel,
},
hw_mon::Temperature,
@ -126,16 +126,38 @@ pub struct DrmMemoryInfo {
pub resizeable_bar: Option<bool>,
}
#[skip_serializing_none]
#[derive(Serialize, Deserialize, Default, Debug, Clone)]
pub struct ClocksInfo {
pub max_sclk: Option<i32>,
pub max_mclk: Option<i32>,
pub max_voltage: Option<i32>,
pub table: Option<ClocksTableGen>,
pub table: Option<ClocksTable>,
}
impl From<ClocksTableGen> for ClocksInfo {
fn from(table: ClocksTableGen) -> Self {
#[derive(Serialize, Deserialize, Debug, Clone)]
#[serde(tag = "type", content = "value", rename_all = "snake_case")]
pub enum ClocksTable {
Amd(AmdClocksTableGen),
Nvidia(NvidiaClocksTable),
}
#[skip_serializing_none]
#[derive(Serialize, Deserialize, Default, Debug, Clone)]
pub struct NvidiaClocksTable {
pub gpc: Option<NvidiaClockInfo>,
pub mem: Option<NvidiaClockInfo>,
}
#[derive(Serialize, Deserialize, Default, Debug, Clone)]
pub struct NvidiaClockInfo {
pub max: i32,
pub offset: i32,
pub offset_range: (i32, i32),
}
impl From<AmdClocksTableGen> for ClocksInfo {
fn from(table: AmdClocksTableGen) -> Self {
let max_sclk = table.get_max_sclk();
let max_mclk = table.get_max_mclk();
let max_voltage = table.get_max_sclk_voltage();
@ -143,7 +165,7 @@ impl From<ClocksTableGen> for ClocksInfo {
max_sclk,
max_mclk,
max_voltage,
table: Some(table),
table: Some(ClocksTable::Amd(table)),
}
}
}