From 01eebc6d1fa1e94f03de63ea1546edfa2dd1a1aa Mon Sep 17 00:00:00 2001 From: Ilya Zlobintsev Date: Mon, 21 Nov 2022 15:42:14 +0200 Subject: [PATCH] feat: support setting the fan curve --- Cargo.lock | 5 +- lact-daemon/Cargo.toml | 4 +- .../src/server/gpu_controller/fan_control.rs | 100 ++++++++++++------ lact-daemon/src/server/gpu_controller/mod.rs | 67 ++++++------ lact-daemon/src/server/handler.rs | 74 ++++++++----- lact-daemon/src/server/mod.rs | 4 +- lact-gui/src/app/mod.rs | 6 +- lact-gui/src/client.rs | 13 ++- lact-schema/Cargo.toml | 4 +- lact-schema/src/request.rs | 20 +++- 10 files changed, 187 insertions(+), 110 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f769842..5302eb8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -25,8 +25,9 @@ dependencies = [ [[package]] name = "amdgpu-sysfs" -version = "0.5.0" -source = "git+https://github.com/ilya-zlobintsev/amdgpu-sysfs-rs?branch=error#80cf493d20c36e27a54e5b34162d05767ec6c305" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5a5ed5bf383b6aea853667b97646ce7daeae53f83b82dcc3cdd3390614e3128" dependencies = [ "serde", ] diff --git a/lact-daemon/Cargo.toml b/lact-daemon/Cargo.toml index 5a81eed..569f506 100644 --- a/lact-daemon/Cargo.toml +++ b/lact-daemon/Cargo.toml @@ -6,9 +6,7 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -amdgpu-sysfs = { git = "https://github.com/ilya-zlobintsev/amdgpu-sysfs-rs", branch = "error", features = [ - "serde", -] } +amdgpu-sysfs = { version = "0.6.1", features = ["serde"] } anyhow = "1.0" bincode = "1.3" nix = "0.25" diff --git a/lact-daemon/src/server/gpu_controller/fan_control.rs b/lact-daemon/src/server/gpu_controller/fan_control.rs index da7221d..76e0182 100644 --- a/lact-daemon/src/server/gpu_controller/fan_control.rs +++ b/lact-daemon/src/server/gpu_controller/fan_control.rs @@ -1,21 +1,22 @@ use amdgpu_sysfs::hw_mon::Temperature; +use anyhow::anyhow; use lact_schema::FanCurveMap; use serde::{Deserialize, Serialize}; -use tracing::error; +use tracing::warn; #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub struct FanCurve(pub FanCurveMap); impl FanCurve { - pub fn rpm_at_temp(&self, temp: Temperature, min_rpm: u32, max_rpm: u32) -> u32 { + pub fn pwm_at_temp(&self, temp: Temperature) -> u8 { let current = temp.current.expect("No current temp"); // This scenario is most likely unreachable as the kernel shuts down the GPU when it reaches critical temperature if temp.crit.filter(|crit| current > *crit).is_some() || temp.crit_hyst.filter(|hyst| current < *hyst).is_some() { - error!("GPU temperature is beyond critical values! {current}°C"); - return max_rpm; + warn!("GPU temperature is beyond critical values! {current}°C"); + return u8::MAX; } let current = current as i32; @@ -25,6 +26,7 @@ impl FanCurve { let percentage = match (maybe_lower, maybe_higher) { (Some((lower_temp, lower_speed)), Some((higher_temp, higher_speed))) => { let speed_ratio = (current - lower_temp) as f32 / (higher_temp - lower_temp) as f32; + println!("RATIO for temp {current}: {speed_ratio}"); lower_speed + (higher_speed - lower_speed) * speed_ratio } (Some((_, lower_speed)), None) => *lower_speed, @@ -32,7 +34,18 @@ impl FanCurve { (None, None) => panic!("Could not find fan speed on the curve! This is a bug."), }; - ((max_rpm - min_rpm) as f32 * percentage) as u32 + (u8::MAX as f32 * percentage) as u8 + } +} + +impl FanCurve { + pub fn validate(&self) -> anyhow::Result<()> { + for percentage in self.0.values() { + if !(0.0..=1.0).contains(percentage) { + return Err(anyhow!("Fan speed percentage must be between 0 and 1")); + } + } + Ok(()) } } @@ -57,44 +70,44 @@ mod tests { use super::FanCurve; use amdgpu_sysfs::hw_mon::Temperature; - fn simple_rpm(temp: f32, min_rpm: u32, max_rpm: u32) -> u32 { + fn simple_pwm(temp: f32) -> u8 { let curve = FanCurve([(0, 0.0), (100, 1.0)].into()); let temp = Temperature { current: Some(temp), crit: Some(150.0), crit_hyst: Some(-100.0), }; - curve.rpm_at_temp(temp, min_rpm, max_rpm) + curve.pwm_at_temp(temp) } #[test] fn simple_curve_middle() { - let rpm = simple_rpm(45.0, 0, 200); - assert_eq!(rpm, 90); + let pwm = simple_pwm(45.0); + assert_eq!(pwm, 114); } #[test] fn simple_curve_start() { - let rpm = simple_rpm(0.0, 0, 200); - assert_eq!(rpm, 0); + let pwm = simple_pwm(0.0); + assert_eq!(pwm, 0); } #[test] fn simple_curve_end() { - let rpm = simple_rpm(100.0, 0, 200); - assert_eq!(rpm, 200); + let pwm = simple_pwm(100.0); + assert_eq!(pwm, 255); } #[test] fn simple_curve_before() { - let rpm = simple_rpm(-5.0, 0, 200); - assert_eq!(rpm, 0); + let pwm = simple_pwm(-5.0); + assert_eq!(pwm, 0); } #[test] fn simple_curve_after() { - let rpm = simple_rpm(105.0, 0, 200); - assert_eq!(rpm, 200); + let pwm = simple_pwm(105.0); + assert_eq!(pwm, 255); } #[test] @@ -105,30 +118,51 @@ mod tests { crit: Some(90.0), crit_hyst: Some(0.0), }; - let rpm = curve.rpm_at_temp(temp, 0, 200); - assert_eq!(rpm, 200); + let pwm = curve.pwm_at_temp(temp); + assert_eq!(pwm, 255); } #[test] - fn default_curve() { - let curve = FanCurve::default(); - let rpm_at_temp = |current: f32| { + fn uneven_curve() { + let curve = FanCurve([(30, 0.0), (40, 0.1), (55, 0.9), (61, 1.0)].into()); + let pwm_at_temp = |current: f32| { let temp = Temperature { current: Some(current), crit: Some(90.0), crit_hyst: Some(0.0), }; - curve.rpm_at_temp(temp, 0, 1000) + curve.pwm_at_temp(temp) }; - assert_eq!(rpm_at_temp(20.0), 0); - assert_eq!(rpm_at_temp(30.0), 0); - assert_eq!(rpm_at_temp(33.0), 60); - assert_eq!(rpm_at_temp(60.0), 500); - assert_eq!(rpm_at_temp(65.0), 625); - assert_eq!(rpm_at_temp(70.0), 750); - assert_eq!(rpm_at_temp(79.0), 975); - assert_eq!(rpm_at_temp(85.0), 1000); - assert_eq!(rpm_at_temp(100.0), 1000); - assert_eq!(rpm_at_temp(-5.0), 1000); + + assert_eq!(pwm_at_temp(30.0), 0); + assert_eq!(pwm_at_temp(35.0), 12); + assert_eq!(pwm_at_temp(40.0), 25); + assert_eq!(pwm_at_temp(47.0), 120); + assert_eq!(pwm_at_temp(52.0), 188); + assert_eq!(pwm_at_temp(53.0), 202); + assert_eq!(pwm_at_temp(54.0), 215); + } + + #[test] + fn default_curve() { + let curve = FanCurve::default(); + let pwm_at_temp = |current: f32| { + let temp = Temperature { + current: Some(current), + crit: Some(90.0), + crit_hyst: Some(0.0), + }; + curve.pwm_at_temp(temp) + }; + assert_eq!(pwm_at_temp(20.0), 0); + assert_eq!(pwm_at_temp(30.0), 0); + assert_eq!(pwm_at_temp(33.0), 15); + assert_eq!(pwm_at_temp(60.0), 127); + assert_eq!(pwm_at_temp(65.0), 159); + assert_eq!(pwm_at_temp(70.0), 191); + assert_eq!(pwm_at_temp(79.0), 248); + assert_eq!(pwm_at_temp(85.0), 255); + assert_eq!(pwm_at_temp(100.0), 255); + assert_eq!(pwm_at_temp(-5.0), 255); } } diff --git a/lact-daemon/src/server/gpu_controller/mod.rs b/lact-daemon/src/server/gpu_controller/mod.rs index c1c445f..ace82a9 100644 --- a/lact-daemon/src/server/gpu_controller/mod.rs +++ b/lact-daemon/src/server/gpu_controller/mod.rs @@ -21,14 +21,14 @@ use std::{ time::Duration, }; use tokio::{select, sync::Notify, task::JoinHandle, time::sleep}; -use tracing::{error, info, trace, warn}; +use tracing::{debug, error, info, trace, warn}; type FanControlHandle = (Arc, JoinHandle<()>, FanCurve); pub struct GpuController { pub handle: GpuHandle, pub pci_info: Option, - pub fan_control_handle: Arc>>, + pub fan_control_handle: Mutex>, } impl GpuController { @@ -88,7 +88,7 @@ impl GpuController { Ok(Self { handle, pci_info, - fan_control_handle: Arc::new(Mutex::new(None)), + fan_control_handle: Mutex::new(None), }) } @@ -177,12 +177,15 @@ impl GpuController { self.handle.hw_monitors.first().map(f) } - pub fn start_fan_control( + pub async fn start_fan_control( &self, curve: FanCurve, temp_key: String, interval: Duration, ) -> anyhow::Result<()> { + // Stop existing task to re-apply new curve + self.stop_fan_control(false).await?; + let hw_mon = self .handle .hw_monitors @@ -193,50 +196,36 @@ impl GpuController { .set_fan_control_method(FanControlMethod::Manual) .context("Could not set fan control method")?; - let max_rpm = hw_mon.get_fan_max().context("Could not get min RPM")?; - let min_rpm = hw_mon.get_fan_min().context("Could not get max RPM")?; - let mut notify_guard = self .fan_control_handle .lock() .map_err(|err| anyhow!("Lock error: {err}"))?; - if notify_guard.is_some() { - return Ok(()); - } - let notify = Arc::new(Notify::new()); let task_notify = notify.clone(); let task_curve = curve.clone(); + debug!("Using curve {curve:?}"); - let notify_handle = self.fan_control_handle.clone(); let handle = tokio::spawn(async move { loop { - let mut temps = hw_mon.get_temps(); - let temp = temps - .remove(&temp_key) - .expect("Could not get temperature by given key"); - let target_rpm = task_curve.rpm_at_temp(temp, min_rpm, max_rpm); - trace!("Fan control tick: setting rpm to {target_rpm}"); - - if let Err(err) = hw_mon.set_fan_target(target_rpm) { - error!("Could not set fan speed: {err}, disabling fan control"); - break; - } - select! { _ = sleep(interval) => (), _ = task_notify.notified() => break, } + + let mut temps = hw_mon.get_temps(); + let temp = temps + .remove(&temp_key) + .expect("Could not get temperature by given key"); + let target_pwm = task_curve.pwm_at_temp(temp); + trace!("Fan control tick: setting pwm to {target_pwm}"); + + if let Err(err) = hw_mon.set_fan_pwm(target_pwm) { + error!("Could not set fan speed: {err}, disabling fan control"); + break; + } } - info!("Shutting down fan control"); - if let Err(err) = hw_mon.set_fan_control_method(FanControlMethod::Auto) { - error!("Could not set fan control back to automatic: {err}"); - } - notify_handle - .lock() - .expect("Fan control mutex error") - .take(); + info!("Exited fan control task"); }); *notify_guard = Some((notify, handle, curve)); @@ -249,7 +238,7 @@ impl GpuController { Ok(()) } - pub async fn stop_fan_control(&self) -> anyhow::Result<()> { + pub async fn stop_fan_control(&self, reset_mode: bool) -> anyhow::Result<()> { let maybe_notify = self .fan_control_handle .lock() @@ -259,6 +248,18 @@ impl GpuController { notify.notify_one(); handle.await?; } + + if reset_mode { + let hw_mon = self + .handle + .hw_monitors + .first() + .cloned() + .context("This GPU has no monitor")?; + hw_mon + .set_fan_control_method(FanControlMethod::Auto) + .context("Could not set fan control back to automatic: {err}")?; + } Ok(()) } } diff --git a/lact-daemon/src/server/handler.rs b/lact-daemon/src/server/handler.rs index a9c71ff..7043e4a 100644 --- a/lact-daemon/src/server/handler.rs +++ b/lact-daemon/src/server/handler.rs @@ -2,7 +2,7 @@ use super::gpu_controller::{fan_control::FanCurve, GpuController}; use crate::config::{Config, FanControlSettings, GpuConfig}; use amdgpu_sysfs::sysfs::SysFS; use anyhow::{anyhow, Context}; -use lact_schema::{DeviceInfo, DeviceListEntry, DeviceStats}; +use lact_schema::{DeviceInfo, DeviceListEntry, DeviceStats, FanCurveMap}; use std::{ collections::HashMap, path::PathBuf, @@ -77,11 +77,13 @@ impl<'a> Handler { "Fan control is enabled but no settings are defined (invalid config?)", )?; let interval = Duration::from_millis(settings.interval_ms); - controller.start_fan_control( - settings.curve.clone(), - settings.temperature_key.clone(), - interval, - )?; + controller + .start_fan_control( + settings.curve.clone(), + settings.temperature_key.clone(), + interval, + ) + .await?; } if let Some(power_cap) = gpu_config.power_cap { @@ -147,30 +149,50 @@ impl<'a> Handler { self.controller_by_id(id)?.get_stats() } - pub async fn set_fan_control(&'a self, id: &str, enabled: bool) -> anyhow::Result<()> { - if enabled { - let mut config_guard = self.config.write().map_err(|err| anyhow!("{err}"))?; - let gpu_config = config_guard.gpus.entry(id.to_owned()).or_default(); - let settings = - gpu_config - .fan_control_settings - .get_or_insert_with(|| FanControlSettings { - curve: FanCurve::default(), + pub async fn set_fan_control( + &'a self, + id: &str, + enabled: bool, + curve: Option, + ) -> anyhow::Result<()> { + let settings = if enabled { + let settings = { + let curve = curve.map_or_else(FanCurve::default, |curve| FanCurve(curve)); + curve.validate()?; + + let mut config_guard = self.config.write().map_err(|err| anyhow!("{err}"))?; + let gpu_config = config_guard.gpus.entry(id.to_owned()).or_default(); + + if let Some(mut existing_settings) = gpu_config.fan_control_settings.clone() { + existing_settings.curve = curve; + existing_settings + } else { + FanControlSettings { + curve, temperature_key: "edge".to_owned(), interval_ms: 500, - }); + } + } + }; let interval = Duration::from_millis(settings.interval_ms); - self.controller_by_id(id)?.start_fan_control( - settings.curve.clone(), - settings.temperature_key.clone(), - interval, - )?; - gpu_config.fan_control_enabled = true; - config_guard.save().context("Could not save config") + self.controller_by_id(id)? + .start_fan_control( + settings.curve.clone(), + settings.temperature_key.clone(), + interval, + ) + .await?; + Some(settings) } else { - self.controller_by_id(id)?.stop_fan_control().await - } + self.controller_by_id(id)?.stop_fan_control(true).await?; + None + }; + + self.edit_gpu_config(id.to_owned(), |config| { + config.fan_control_enabled = enabled; + config.fan_control_settings = settings + }) } pub fn set_power_cap(&'a self, id: &str, maybe_cap: Option) -> anyhow::Result<()> { @@ -199,7 +221,7 @@ impl<'a> Handler { if gpu_config.fan_control_enabled { debug!("Stopping fan control"); controller - .stop_fan_control() + .stop_fan_control(true) .await .expect("Could not stop fan control"); } diff --git a/lact-daemon/src/server/mod.rs b/lact-daemon/src/server/mod.rs index bc447ec..9d0686f 100644 --- a/lact-daemon/src/server/mod.rs +++ b/lact-daemon/src/server/mod.rs @@ -83,8 +83,8 @@ async fn handle_request<'a>(request: Request<'a>, handler: &'a Handler) -> anyho Request::ListDevices => ok_response(handler.list_devices()), Request::DeviceInfo { id } => ok_response(handler.get_device_info(id)?), Request::DeviceStats { id } => ok_response(handler.get_gpu_stats(id)?), - Request::SetFanControl { id, enabled } => { - ok_response(handler.set_fan_control(id, enabled).await?) + Request::SetFanControl { id, enabled, curve } => { + ok_response(handler.set_fan_control(id, enabled, curve).await?) } Request::SetPowerCap { id, cap } => ok_response(handler.set_power_cap(id, cap)?), } diff --git a/lact-gui/src/app/mod.rs b/lact-gui/src/app/mod.rs index 5986979..c4e5d1d 100644 --- a/lact-gui/src/app/mod.rs +++ b/lact-gui/src/app/mod.rs @@ -141,7 +141,11 @@ impl App { let thermals_settings = app.root_stack.thermals_page.get_thermals_settings(); app.daemon_client - .set_fan_control(&gpu_id, thermals_settings.automatic_fan_control_enabled) + .set_fan_control( + &gpu_id, + thermals_settings.automatic_fan_control_enabled, + None, + ) .expect("Could not set fan control"); // TODO diff --git a/lact-gui/src/client.rs b/lact-gui/src/client.rs index f3ef27e..ad9be34 100644 --- a/lact-gui/src/client.rs +++ b/lact-gui/src/client.rs @@ -1,5 +1,7 @@ use anyhow::{anyhow, Context}; -use lact_schema::{request::Request, response::Response, DeviceInfo, DeviceListEntry, DeviceStats}; +use lact_schema::{ + request::Request, response::Response, DeviceInfo, DeviceListEntry, DeviceStats, FanCurveMap, +}; use nix::unistd::getuid; use serde::Deserialize; use std::{ @@ -56,8 +58,13 @@ impl DaemonClient { self.make_request(Request::ListDevices) } - pub fn set_fan_control(&self, id: &str, enabled: bool) -> anyhow::Result<()> { - self.make_request::<()>(Request::SetFanControl { id, enabled })? + pub fn set_fan_control( + &self, + id: &str, + enabled: bool, + curve: Option, + ) -> anyhow::Result<()> { + self.make_request::<()>(Request::SetFanControl { id, enabled, curve })? .inner()?; Ok(()) } diff --git a/lact-schema/Cargo.toml b/lact-schema/Cargo.toml index 117fb2c..75fbdea 100644 --- a/lact-schema/Cargo.toml +++ b/lact-schema/Cargo.toml @@ -4,9 +4,7 @@ version = "0.1.0" edition = "2021" [dependencies] -amdgpu-sysfs = { git = "https://github.com/ilya-zlobintsev/amdgpu-sysfs-rs", branch = "error", features = [ - "serde", -] } +amdgpu-sysfs = { version = "*", features = ["serde"] } serde = { version = "1.0", features = ["derive"] } indexmap = { version = "1.9", features = ["serde"] } diff --git a/lact-schema/src/request.rs b/lact-schema/src/request.rs index 4129c82..220add5 100644 --- a/lact-schema/src/request.rs +++ b/lact-schema/src/request.rs @@ -1,3 +1,4 @@ +use crate::FanCurveMap; use serde::{Deserialize, Serialize}; #[derive(Serialize, Deserialize, Debug, PartialEq)] @@ -5,8 +6,19 @@ use serde::{Deserialize, Serialize}; pub enum Request<'a> { Ping, ListDevices, - DeviceInfo { id: &'a str }, - DeviceStats { id: &'a str }, - SetFanControl { id: &'a str, enabled: bool }, - SetPowerCap { id: &'a str, cap: Option }, + DeviceInfo { + id: &'a str, + }, + DeviceStats { + id: &'a str, + }, + SetFanControl { + id: &'a str, + enabled: bool, + curve: Option, + }, + SetPowerCap { + id: &'a str, + cap: Option, + }, }