feat: support setting the fan curve

This commit is contained in:
Ilya Zlobintsev
2022-11-21 15:42:14 +02:00
parent 9be48ceb3c
commit 01eebc6d1f
10 changed files with 187 additions and 110 deletions

5
Cargo.lock generated
View File

@@ -25,8 +25,9 @@ dependencies = [
[[package]]
name = "amdgpu-sysfs"
version = "0.5.0"
source = "git+https://github.com/ilya-zlobintsev/amdgpu-sysfs-rs?branch=error#80cf493d20c36e27a54e5b34162d05767ec6c305"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5a5ed5bf383b6aea853667b97646ce7daeae53f83b82dcc3cdd3390614e3128"
dependencies = [
"serde",
]

View File

@@ -6,9 +6,7 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
amdgpu-sysfs = { git = "https://github.com/ilya-zlobintsev/amdgpu-sysfs-rs", branch = "error", features = [
"serde",
] }
amdgpu-sysfs = { version = "0.6.1", features = ["serde"] }
anyhow = "1.0"
bincode = "1.3"
nix = "0.25"

View File

@@ -1,21 +1,22 @@
use amdgpu_sysfs::hw_mon::Temperature;
use anyhow::anyhow;
use lact_schema::FanCurveMap;
use serde::{Deserialize, Serialize};
use tracing::error;
use tracing::warn;
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct FanCurve(pub FanCurveMap);
impl FanCurve {
pub fn rpm_at_temp(&self, temp: Temperature, min_rpm: u32, max_rpm: u32) -> u32 {
pub fn pwm_at_temp(&self, temp: Temperature) -> u8 {
let current = temp.current.expect("No current temp");
// This scenario is most likely unreachable as the kernel shuts down the GPU when it reaches critical temperature
if temp.crit.filter(|crit| current > *crit).is_some()
|| temp.crit_hyst.filter(|hyst| current < *hyst).is_some()
{
error!("GPU temperature is beyond critical values! {current}°C");
return max_rpm;
warn!("GPU temperature is beyond critical values! {current}°C");
return u8::MAX;
}
let current = current as i32;
@@ -25,6 +26,7 @@ impl FanCurve {
let percentage = match (maybe_lower, maybe_higher) {
(Some((lower_temp, lower_speed)), Some((higher_temp, higher_speed))) => {
let speed_ratio = (current - lower_temp) as f32 / (higher_temp - lower_temp) as f32;
println!("RATIO for temp {current}: {speed_ratio}");
lower_speed + (higher_speed - lower_speed) * speed_ratio
}
(Some((_, lower_speed)), None) => *lower_speed,
@@ -32,7 +34,18 @@ impl FanCurve {
(None, None) => panic!("Could not find fan speed on the curve! This is a bug."),
};
((max_rpm - min_rpm) as f32 * percentage) as u32
(u8::MAX as f32 * percentage) as u8
}
}
impl FanCurve {
pub fn validate(&self) -> anyhow::Result<()> {
for percentage in self.0.values() {
if !(0.0..=1.0).contains(percentage) {
return Err(anyhow!("Fan speed percentage must be between 0 and 1"));
}
}
Ok(())
}
}
@@ -57,44 +70,44 @@ mod tests {
use super::FanCurve;
use amdgpu_sysfs::hw_mon::Temperature;
fn simple_rpm(temp: f32, min_rpm: u32, max_rpm: u32) -> u32 {
fn simple_pwm(temp: f32) -> u8 {
let curve = FanCurve([(0, 0.0), (100, 1.0)].into());
let temp = Temperature {
current: Some(temp),
crit: Some(150.0),
crit_hyst: Some(-100.0),
};
curve.rpm_at_temp(temp, min_rpm, max_rpm)
curve.pwm_at_temp(temp)
}
#[test]
fn simple_curve_middle() {
let rpm = simple_rpm(45.0, 0, 200);
assert_eq!(rpm, 90);
let pwm = simple_pwm(45.0);
assert_eq!(pwm, 114);
}
#[test]
fn simple_curve_start() {
let rpm = simple_rpm(0.0, 0, 200);
assert_eq!(rpm, 0);
let pwm = simple_pwm(0.0);
assert_eq!(pwm, 0);
}
#[test]
fn simple_curve_end() {
let rpm = simple_rpm(100.0, 0, 200);
assert_eq!(rpm, 200);
let pwm = simple_pwm(100.0);
assert_eq!(pwm, 255);
}
#[test]
fn simple_curve_before() {
let rpm = simple_rpm(-5.0, 0, 200);
assert_eq!(rpm, 0);
let pwm = simple_pwm(-5.0);
assert_eq!(pwm, 0);
}
#[test]
fn simple_curve_after() {
let rpm = simple_rpm(105.0, 0, 200);
assert_eq!(rpm, 200);
let pwm = simple_pwm(105.0);
assert_eq!(pwm, 255);
}
#[test]
@@ -105,30 +118,51 @@ mod tests {
crit: Some(90.0),
crit_hyst: Some(0.0),
};
let rpm = curve.rpm_at_temp(temp, 0, 200);
assert_eq!(rpm, 200);
let pwm = curve.pwm_at_temp(temp);
assert_eq!(pwm, 255);
}
#[test]
fn default_curve() {
let curve = FanCurve::default();
let rpm_at_temp = |current: f32| {
fn uneven_curve() {
let curve = FanCurve([(30, 0.0), (40, 0.1), (55, 0.9), (61, 1.0)].into());
let pwm_at_temp = |current: f32| {
let temp = Temperature {
current: Some(current),
crit: Some(90.0),
crit_hyst: Some(0.0),
};
curve.rpm_at_temp(temp, 0, 1000)
curve.pwm_at_temp(temp)
};
assert_eq!(rpm_at_temp(20.0), 0);
assert_eq!(rpm_at_temp(30.0), 0);
assert_eq!(rpm_at_temp(33.0), 60);
assert_eq!(rpm_at_temp(60.0), 500);
assert_eq!(rpm_at_temp(65.0), 625);
assert_eq!(rpm_at_temp(70.0), 750);
assert_eq!(rpm_at_temp(79.0), 975);
assert_eq!(rpm_at_temp(85.0), 1000);
assert_eq!(rpm_at_temp(100.0), 1000);
assert_eq!(rpm_at_temp(-5.0), 1000);
assert_eq!(pwm_at_temp(30.0), 0);
assert_eq!(pwm_at_temp(35.0), 12);
assert_eq!(pwm_at_temp(40.0), 25);
assert_eq!(pwm_at_temp(47.0), 120);
assert_eq!(pwm_at_temp(52.0), 188);
assert_eq!(pwm_at_temp(53.0), 202);
assert_eq!(pwm_at_temp(54.0), 215);
}
#[test]
fn default_curve() {
let curve = FanCurve::default();
let pwm_at_temp = |current: f32| {
let temp = Temperature {
current: Some(current),
crit: Some(90.0),
crit_hyst: Some(0.0),
};
curve.pwm_at_temp(temp)
};
assert_eq!(pwm_at_temp(20.0), 0);
assert_eq!(pwm_at_temp(30.0), 0);
assert_eq!(pwm_at_temp(33.0), 15);
assert_eq!(pwm_at_temp(60.0), 127);
assert_eq!(pwm_at_temp(65.0), 159);
assert_eq!(pwm_at_temp(70.0), 191);
assert_eq!(pwm_at_temp(79.0), 248);
assert_eq!(pwm_at_temp(85.0), 255);
assert_eq!(pwm_at_temp(100.0), 255);
assert_eq!(pwm_at_temp(-5.0), 255);
}
}

View File

@@ -21,14 +21,14 @@ use std::{
time::Duration,
};
use tokio::{select, sync::Notify, task::JoinHandle, time::sleep};
use tracing::{error, info, trace, warn};
use tracing::{debug, error, info, trace, warn};
type FanControlHandle = (Arc<Notify>, JoinHandle<()>, FanCurve);
pub struct GpuController {
pub handle: GpuHandle,
pub pci_info: Option<GpuPciInfo>,
pub fan_control_handle: Arc<Mutex<Option<FanControlHandle>>>,
pub fan_control_handle: Mutex<Option<FanControlHandle>>,
}
impl GpuController {
@@ -88,7 +88,7 @@ impl GpuController {
Ok(Self {
handle,
pci_info,
fan_control_handle: Arc::new(Mutex::new(None)),
fan_control_handle: Mutex::new(None),
})
}
@@ -177,12 +177,15 @@ impl GpuController {
self.handle.hw_monitors.first().map(f)
}
pub fn start_fan_control(
pub async fn start_fan_control(
&self,
curve: FanCurve,
temp_key: String,
interval: Duration,
) -> anyhow::Result<()> {
// Stop existing task to re-apply new curve
self.stop_fan_control(false).await?;
let hw_mon = self
.handle
.hw_monitors
@@ -193,50 +196,36 @@ impl GpuController {
.set_fan_control_method(FanControlMethod::Manual)
.context("Could not set fan control method")?;
let max_rpm = hw_mon.get_fan_max().context("Could not get min RPM")?;
let min_rpm = hw_mon.get_fan_min().context("Could not get max RPM")?;
let mut notify_guard = self
.fan_control_handle
.lock()
.map_err(|err| anyhow!("Lock error: {err}"))?;
if notify_guard.is_some() {
return Ok(());
}
let notify = Arc::new(Notify::new());
let task_notify = notify.clone();
let task_curve = curve.clone();
debug!("Using curve {curve:?}");
let notify_handle = self.fan_control_handle.clone();
let handle = tokio::spawn(async move {
loop {
let mut temps = hw_mon.get_temps();
let temp = temps
.remove(&temp_key)
.expect("Could not get temperature by given key");
let target_rpm = task_curve.rpm_at_temp(temp, min_rpm, max_rpm);
trace!("Fan control tick: setting rpm to {target_rpm}");
if let Err(err) = hw_mon.set_fan_target(target_rpm) {
error!("Could not set fan speed: {err}, disabling fan control");
break;
}
select! {
_ = sleep(interval) => (),
_ = task_notify.notified() => break,
}
let mut temps = hw_mon.get_temps();
let temp = temps
.remove(&temp_key)
.expect("Could not get temperature by given key");
let target_pwm = task_curve.pwm_at_temp(temp);
trace!("Fan control tick: setting pwm to {target_pwm}");
if let Err(err) = hw_mon.set_fan_pwm(target_pwm) {
error!("Could not set fan speed: {err}, disabling fan control");
break;
}
}
info!("Shutting down fan control");
if let Err(err) = hw_mon.set_fan_control_method(FanControlMethod::Auto) {
error!("Could not set fan control back to automatic: {err}");
}
notify_handle
.lock()
.expect("Fan control mutex error")
.take();
info!("Exited fan control task");
});
*notify_guard = Some((notify, handle, curve));
@@ -249,7 +238,7 @@ impl GpuController {
Ok(())
}
pub async fn stop_fan_control(&self) -> anyhow::Result<()> {
pub async fn stop_fan_control(&self, reset_mode: bool) -> anyhow::Result<()> {
let maybe_notify = self
.fan_control_handle
.lock()
@@ -259,6 +248,18 @@ impl GpuController {
notify.notify_one();
handle.await?;
}
if reset_mode {
let hw_mon = self
.handle
.hw_monitors
.first()
.cloned()
.context("This GPU has no monitor")?;
hw_mon
.set_fan_control_method(FanControlMethod::Auto)
.context("Could not set fan control back to automatic: {err}")?;
}
Ok(())
}
}

View File

@@ -2,7 +2,7 @@ use super::gpu_controller::{fan_control::FanCurve, GpuController};
use crate::config::{Config, FanControlSettings, GpuConfig};
use amdgpu_sysfs::sysfs::SysFS;
use anyhow::{anyhow, Context};
use lact_schema::{DeviceInfo, DeviceListEntry, DeviceStats};
use lact_schema::{DeviceInfo, DeviceListEntry, DeviceStats, FanCurveMap};
use std::{
collections::HashMap,
path::PathBuf,
@@ -77,11 +77,13 @@ impl<'a> Handler {
"Fan control is enabled but no settings are defined (invalid config?)",
)?;
let interval = Duration::from_millis(settings.interval_ms);
controller.start_fan_control(
settings.curve.clone(),
settings.temperature_key.clone(),
interval,
)?;
controller
.start_fan_control(
settings.curve.clone(),
settings.temperature_key.clone(),
interval,
)
.await?;
}
if let Some(power_cap) = gpu_config.power_cap {
@@ -147,30 +149,50 @@ impl<'a> Handler {
self.controller_by_id(id)?.get_stats()
}
pub async fn set_fan_control(&'a self, id: &str, enabled: bool) -> anyhow::Result<()> {
if enabled {
let mut config_guard = self.config.write().map_err(|err| anyhow!("{err}"))?;
let gpu_config = config_guard.gpus.entry(id.to_owned()).or_default();
let settings =
gpu_config
.fan_control_settings
.get_or_insert_with(|| FanControlSettings {
curve: FanCurve::default(),
pub async fn set_fan_control(
&'a self,
id: &str,
enabled: bool,
curve: Option<FanCurveMap>,
) -> anyhow::Result<()> {
let settings = if enabled {
let settings = {
let curve = curve.map_or_else(FanCurve::default, |curve| FanCurve(curve));
curve.validate()?;
let mut config_guard = self.config.write().map_err(|err| anyhow!("{err}"))?;
let gpu_config = config_guard.gpus.entry(id.to_owned()).or_default();
if let Some(mut existing_settings) = gpu_config.fan_control_settings.clone() {
existing_settings.curve = curve;
existing_settings
} else {
FanControlSettings {
curve,
temperature_key: "edge".to_owned(),
interval_ms: 500,
});
}
}
};
let interval = Duration::from_millis(settings.interval_ms);
self.controller_by_id(id)?.start_fan_control(
settings.curve.clone(),
settings.temperature_key.clone(),
interval,
)?;
gpu_config.fan_control_enabled = true;
config_guard.save().context("Could not save config")
self.controller_by_id(id)?
.start_fan_control(
settings.curve.clone(),
settings.temperature_key.clone(),
interval,
)
.await?;
Some(settings)
} else {
self.controller_by_id(id)?.stop_fan_control().await
}
self.controller_by_id(id)?.stop_fan_control(true).await?;
None
};
self.edit_gpu_config(id.to_owned(), |config| {
config.fan_control_enabled = enabled;
config.fan_control_settings = settings
})
}
pub fn set_power_cap(&'a self, id: &str, maybe_cap: Option<f64>) -> anyhow::Result<()> {
@@ -199,7 +221,7 @@ impl<'a> Handler {
if gpu_config.fan_control_enabled {
debug!("Stopping fan control");
controller
.stop_fan_control()
.stop_fan_control(true)
.await
.expect("Could not stop fan control");
}

View File

@@ -83,8 +83,8 @@ async fn handle_request<'a>(request: Request<'a>, handler: &'a Handler) -> anyho
Request::ListDevices => ok_response(handler.list_devices()),
Request::DeviceInfo { id } => ok_response(handler.get_device_info(id)?),
Request::DeviceStats { id } => ok_response(handler.get_gpu_stats(id)?),
Request::SetFanControl { id, enabled } => {
ok_response(handler.set_fan_control(id, enabled).await?)
Request::SetFanControl { id, enabled, curve } => {
ok_response(handler.set_fan_control(id, enabled, curve).await?)
}
Request::SetPowerCap { id, cap } => ok_response(handler.set_power_cap(id, cap)?),
}

View File

@@ -141,7 +141,11 @@ impl App {
let thermals_settings = app.root_stack.thermals_page.get_thermals_settings();
app.daemon_client
.set_fan_control(&gpu_id, thermals_settings.automatic_fan_control_enabled)
.set_fan_control(
&gpu_id,
thermals_settings.automatic_fan_control_enabled,
None,
)
.expect("Could not set fan control");
// TODO

View File

@@ -1,5 +1,7 @@
use anyhow::{anyhow, Context};
use lact_schema::{request::Request, response::Response, DeviceInfo, DeviceListEntry, DeviceStats};
use lact_schema::{
request::Request, response::Response, DeviceInfo, DeviceListEntry, DeviceStats, FanCurveMap,
};
use nix::unistd::getuid;
use serde::Deserialize;
use std::{
@@ -56,8 +58,13 @@ impl DaemonClient {
self.make_request(Request::ListDevices)
}
pub fn set_fan_control(&self, id: &str, enabled: bool) -> anyhow::Result<()> {
self.make_request::<()>(Request::SetFanControl { id, enabled })?
pub fn set_fan_control(
&self,
id: &str,
enabled: bool,
curve: Option<FanCurveMap>,
) -> anyhow::Result<()> {
self.make_request::<()>(Request::SetFanControl { id, enabled, curve })?
.inner()?;
Ok(())
}

View File

@@ -4,9 +4,7 @@ version = "0.1.0"
edition = "2021"
[dependencies]
amdgpu-sysfs = { git = "https://github.com/ilya-zlobintsev/amdgpu-sysfs-rs", branch = "error", features = [
"serde",
] }
amdgpu-sysfs = { version = "*", features = ["serde"] }
serde = { version = "1.0", features = ["derive"] }
indexmap = { version = "1.9", features = ["serde"] }

View File

@@ -1,3 +1,4 @@
use crate::FanCurveMap;
use serde::{Deserialize, Serialize};
#[derive(Serialize, Deserialize, Debug, PartialEq)]
@@ -5,8 +6,19 @@ use serde::{Deserialize, Serialize};
pub enum Request<'a> {
Ping,
ListDevices,
DeviceInfo { id: &'a str },
DeviceStats { id: &'a str },
SetFanControl { id: &'a str, enabled: bool },
SetPowerCap { id: &'a str, cap: Option<f64> },
DeviceInfo {
id: &'a str,
},
DeviceStats {
id: &'a str,
},
SetFanControl {
id: &'a str,
enabled: bool,
curve: Option<FanCurveMap>,
},
SetPowerCap {
id: &'a str,
cap: Option<f64>,
},
}