mirror of
https://github.com/ilya-zlobintsev/LACT.git
synced 2025-02-25 18:55:26 -06:00
feat: support setting the fan curve
This commit is contained in:
5
Cargo.lock
generated
5
Cargo.lock
generated
@@ -25,8 +25,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "amdgpu-sysfs"
|
||||
version = "0.5.0"
|
||||
source = "git+https://github.com/ilya-zlobintsev/amdgpu-sysfs-rs?branch=error#80cf493d20c36e27a54e5b34162d05767ec6c305"
|
||||
version = "0.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e5a5ed5bf383b6aea853667b97646ce7daeae53f83b82dcc3cdd3390614e3128"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
@@ -6,9 +6,7 @@ edition = "2021"
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
amdgpu-sysfs = { git = "https://github.com/ilya-zlobintsev/amdgpu-sysfs-rs", branch = "error", features = [
|
||||
"serde",
|
||||
] }
|
||||
amdgpu-sysfs = { version = "0.6.1", features = ["serde"] }
|
||||
anyhow = "1.0"
|
||||
bincode = "1.3"
|
||||
nix = "0.25"
|
||||
|
||||
@@ -1,21 +1,22 @@
|
||||
use amdgpu_sysfs::hw_mon::Temperature;
|
||||
use anyhow::anyhow;
|
||||
use lact_schema::FanCurveMap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tracing::error;
|
||||
use tracing::warn;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
pub struct FanCurve(pub FanCurveMap);
|
||||
|
||||
impl FanCurve {
|
||||
pub fn rpm_at_temp(&self, temp: Temperature, min_rpm: u32, max_rpm: u32) -> u32 {
|
||||
pub fn pwm_at_temp(&self, temp: Temperature) -> u8 {
|
||||
let current = temp.current.expect("No current temp");
|
||||
|
||||
// This scenario is most likely unreachable as the kernel shuts down the GPU when it reaches critical temperature
|
||||
if temp.crit.filter(|crit| current > *crit).is_some()
|
||||
|| temp.crit_hyst.filter(|hyst| current < *hyst).is_some()
|
||||
{
|
||||
error!("GPU temperature is beyond critical values! {current}°C");
|
||||
return max_rpm;
|
||||
warn!("GPU temperature is beyond critical values! {current}°C");
|
||||
return u8::MAX;
|
||||
}
|
||||
|
||||
let current = current as i32;
|
||||
@@ -25,6 +26,7 @@ impl FanCurve {
|
||||
let percentage = match (maybe_lower, maybe_higher) {
|
||||
(Some((lower_temp, lower_speed)), Some((higher_temp, higher_speed))) => {
|
||||
let speed_ratio = (current - lower_temp) as f32 / (higher_temp - lower_temp) as f32;
|
||||
println!("RATIO for temp {current}: {speed_ratio}");
|
||||
lower_speed + (higher_speed - lower_speed) * speed_ratio
|
||||
}
|
||||
(Some((_, lower_speed)), None) => *lower_speed,
|
||||
@@ -32,7 +34,18 @@ impl FanCurve {
|
||||
(None, None) => panic!("Could not find fan speed on the curve! This is a bug."),
|
||||
};
|
||||
|
||||
((max_rpm - min_rpm) as f32 * percentage) as u32
|
||||
(u8::MAX as f32 * percentage) as u8
|
||||
}
|
||||
}
|
||||
|
||||
impl FanCurve {
|
||||
pub fn validate(&self) -> anyhow::Result<()> {
|
||||
for percentage in self.0.values() {
|
||||
if !(0.0..=1.0).contains(percentage) {
|
||||
return Err(anyhow!("Fan speed percentage must be between 0 and 1"));
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -57,44 +70,44 @@ mod tests {
|
||||
use super::FanCurve;
|
||||
use amdgpu_sysfs::hw_mon::Temperature;
|
||||
|
||||
fn simple_rpm(temp: f32, min_rpm: u32, max_rpm: u32) -> u32 {
|
||||
fn simple_pwm(temp: f32) -> u8 {
|
||||
let curve = FanCurve([(0, 0.0), (100, 1.0)].into());
|
||||
let temp = Temperature {
|
||||
current: Some(temp),
|
||||
crit: Some(150.0),
|
||||
crit_hyst: Some(-100.0),
|
||||
};
|
||||
curve.rpm_at_temp(temp, min_rpm, max_rpm)
|
||||
curve.pwm_at_temp(temp)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn simple_curve_middle() {
|
||||
let rpm = simple_rpm(45.0, 0, 200);
|
||||
assert_eq!(rpm, 90);
|
||||
let pwm = simple_pwm(45.0);
|
||||
assert_eq!(pwm, 114);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn simple_curve_start() {
|
||||
let rpm = simple_rpm(0.0, 0, 200);
|
||||
assert_eq!(rpm, 0);
|
||||
let pwm = simple_pwm(0.0);
|
||||
assert_eq!(pwm, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn simple_curve_end() {
|
||||
let rpm = simple_rpm(100.0, 0, 200);
|
||||
assert_eq!(rpm, 200);
|
||||
let pwm = simple_pwm(100.0);
|
||||
assert_eq!(pwm, 255);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn simple_curve_before() {
|
||||
let rpm = simple_rpm(-5.0, 0, 200);
|
||||
assert_eq!(rpm, 0);
|
||||
let pwm = simple_pwm(-5.0);
|
||||
assert_eq!(pwm, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn simple_curve_after() {
|
||||
let rpm = simple_rpm(105.0, 0, 200);
|
||||
assert_eq!(rpm, 200);
|
||||
let pwm = simple_pwm(105.0);
|
||||
assert_eq!(pwm, 255);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -105,30 +118,51 @@ mod tests {
|
||||
crit: Some(90.0),
|
||||
crit_hyst: Some(0.0),
|
||||
};
|
||||
let rpm = curve.rpm_at_temp(temp, 0, 200);
|
||||
assert_eq!(rpm, 200);
|
||||
let pwm = curve.pwm_at_temp(temp);
|
||||
assert_eq!(pwm, 255);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn default_curve() {
|
||||
let curve = FanCurve::default();
|
||||
let rpm_at_temp = |current: f32| {
|
||||
fn uneven_curve() {
|
||||
let curve = FanCurve([(30, 0.0), (40, 0.1), (55, 0.9), (61, 1.0)].into());
|
||||
let pwm_at_temp = |current: f32| {
|
||||
let temp = Temperature {
|
||||
current: Some(current),
|
||||
crit: Some(90.0),
|
||||
crit_hyst: Some(0.0),
|
||||
};
|
||||
curve.rpm_at_temp(temp, 0, 1000)
|
||||
curve.pwm_at_temp(temp)
|
||||
};
|
||||
assert_eq!(rpm_at_temp(20.0), 0);
|
||||
assert_eq!(rpm_at_temp(30.0), 0);
|
||||
assert_eq!(rpm_at_temp(33.0), 60);
|
||||
assert_eq!(rpm_at_temp(60.0), 500);
|
||||
assert_eq!(rpm_at_temp(65.0), 625);
|
||||
assert_eq!(rpm_at_temp(70.0), 750);
|
||||
assert_eq!(rpm_at_temp(79.0), 975);
|
||||
assert_eq!(rpm_at_temp(85.0), 1000);
|
||||
assert_eq!(rpm_at_temp(100.0), 1000);
|
||||
assert_eq!(rpm_at_temp(-5.0), 1000);
|
||||
|
||||
assert_eq!(pwm_at_temp(30.0), 0);
|
||||
assert_eq!(pwm_at_temp(35.0), 12);
|
||||
assert_eq!(pwm_at_temp(40.0), 25);
|
||||
assert_eq!(pwm_at_temp(47.0), 120);
|
||||
assert_eq!(pwm_at_temp(52.0), 188);
|
||||
assert_eq!(pwm_at_temp(53.0), 202);
|
||||
assert_eq!(pwm_at_temp(54.0), 215);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn default_curve() {
|
||||
let curve = FanCurve::default();
|
||||
let pwm_at_temp = |current: f32| {
|
||||
let temp = Temperature {
|
||||
current: Some(current),
|
||||
crit: Some(90.0),
|
||||
crit_hyst: Some(0.0),
|
||||
};
|
||||
curve.pwm_at_temp(temp)
|
||||
};
|
||||
assert_eq!(pwm_at_temp(20.0), 0);
|
||||
assert_eq!(pwm_at_temp(30.0), 0);
|
||||
assert_eq!(pwm_at_temp(33.0), 15);
|
||||
assert_eq!(pwm_at_temp(60.0), 127);
|
||||
assert_eq!(pwm_at_temp(65.0), 159);
|
||||
assert_eq!(pwm_at_temp(70.0), 191);
|
||||
assert_eq!(pwm_at_temp(79.0), 248);
|
||||
assert_eq!(pwm_at_temp(85.0), 255);
|
||||
assert_eq!(pwm_at_temp(100.0), 255);
|
||||
assert_eq!(pwm_at_temp(-5.0), 255);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -21,14 +21,14 @@ use std::{
|
||||
time::Duration,
|
||||
};
|
||||
use tokio::{select, sync::Notify, task::JoinHandle, time::sleep};
|
||||
use tracing::{error, info, trace, warn};
|
||||
use tracing::{debug, error, info, trace, warn};
|
||||
|
||||
type FanControlHandle = (Arc<Notify>, JoinHandle<()>, FanCurve);
|
||||
|
||||
pub struct GpuController {
|
||||
pub handle: GpuHandle,
|
||||
pub pci_info: Option<GpuPciInfo>,
|
||||
pub fan_control_handle: Arc<Mutex<Option<FanControlHandle>>>,
|
||||
pub fan_control_handle: Mutex<Option<FanControlHandle>>,
|
||||
}
|
||||
|
||||
impl GpuController {
|
||||
@@ -88,7 +88,7 @@ impl GpuController {
|
||||
Ok(Self {
|
||||
handle,
|
||||
pci_info,
|
||||
fan_control_handle: Arc::new(Mutex::new(None)),
|
||||
fan_control_handle: Mutex::new(None),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -177,12 +177,15 @@ impl GpuController {
|
||||
self.handle.hw_monitors.first().map(f)
|
||||
}
|
||||
|
||||
pub fn start_fan_control(
|
||||
pub async fn start_fan_control(
|
||||
&self,
|
||||
curve: FanCurve,
|
||||
temp_key: String,
|
||||
interval: Duration,
|
||||
) -> anyhow::Result<()> {
|
||||
// Stop existing task to re-apply new curve
|
||||
self.stop_fan_control(false).await?;
|
||||
|
||||
let hw_mon = self
|
||||
.handle
|
||||
.hw_monitors
|
||||
@@ -193,50 +196,36 @@ impl GpuController {
|
||||
.set_fan_control_method(FanControlMethod::Manual)
|
||||
.context("Could not set fan control method")?;
|
||||
|
||||
let max_rpm = hw_mon.get_fan_max().context("Could not get min RPM")?;
|
||||
let min_rpm = hw_mon.get_fan_min().context("Could not get max RPM")?;
|
||||
|
||||
let mut notify_guard = self
|
||||
.fan_control_handle
|
||||
.lock()
|
||||
.map_err(|err| anyhow!("Lock error: {err}"))?;
|
||||
|
||||
if notify_guard.is_some() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let notify = Arc::new(Notify::new());
|
||||
let task_notify = notify.clone();
|
||||
let task_curve = curve.clone();
|
||||
debug!("Using curve {curve:?}");
|
||||
|
||||
let notify_handle = self.fan_control_handle.clone();
|
||||
let handle = tokio::spawn(async move {
|
||||
loop {
|
||||
let mut temps = hw_mon.get_temps();
|
||||
let temp = temps
|
||||
.remove(&temp_key)
|
||||
.expect("Could not get temperature by given key");
|
||||
let target_rpm = task_curve.rpm_at_temp(temp, min_rpm, max_rpm);
|
||||
trace!("Fan control tick: setting rpm to {target_rpm}");
|
||||
|
||||
if let Err(err) = hw_mon.set_fan_target(target_rpm) {
|
||||
error!("Could not set fan speed: {err}, disabling fan control");
|
||||
break;
|
||||
}
|
||||
|
||||
select! {
|
||||
_ = sleep(interval) => (),
|
||||
_ = task_notify.notified() => break,
|
||||
}
|
||||
|
||||
let mut temps = hw_mon.get_temps();
|
||||
let temp = temps
|
||||
.remove(&temp_key)
|
||||
.expect("Could not get temperature by given key");
|
||||
let target_pwm = task_curve.pwm_at_temp(temp);
|
||||
trace!("Fan control tick: setting pwm to {target_pwm}");
|
||||
|
||||
if let Err(err) = hw_mon.set_fan_pwm(target_pwm) {
|
||||
error!("Could not set fan speed: {err}, disabling fan control");
|
||||
break;
|
||||
}
|
||||
}
|
||||
info!("Shutting down fan control");
|
||||
if let Err(err) = hw_mon.set_fan_control_method(FanControlMethod::Auto) {
|
||||
error!("Could not set fan control back to automatic: {err}");
|
||||
}
|
||||
notify_handle
|
||||
.lock()
|
||||
.expect("Fan control mutex error")
|
||||
.take();
|
||||
info!("Exited fan control task");
|
||||
});
|
||||
|
||||
*notify_guard = Some((notify, handle, curve));
|
||||
@@ -249,7 +238,7 @@ impl GpuController {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn stop_fan_control(&self) -> anyhow::Result<()> {
|
||||
pub async fn stop_fan_control(&self, reset_mode: bool) -> anyhow::Result<()> {
|
||||
let maybe_notify = self
|
||||
.fan_control_handle
|
||||
.lock()
|
||||
@@ -259,6 +248,18 @@ impl GpuController {
|
||||
notify.notify_one();
|
||||
handle.await?;
|
||||
}
|
||||
|
||||
if reset_mode {
|
||||
let hw_mon = self
|
||||
.handle
|
||||
.hw_monitors
|
||||
.first()
|
||||
.cloned()
|
||||
.context("This GPU has no monitor")?;
|
||||
hw_mon
|
||||
.set_fan_control_method(FanControlMethod::Auto)
|
||||
.context("Could not set fan control back to automatic: {err}")?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,7 +2,7 @@ use super::gpu_controller::{fan_control::FanCurve, GpuController};
|
||||
use crate::config::{Config, FanControlSettings, GpuConfig};
|
||||
use amdgpu_sysfs::sysfs::SysFS;
|
||||
use anyhow::{anyhow, Context};
|
||||
use lact_schema::{DeviceInfo, DeviceListEntry, DeviceStats};
|
||||
use lact_schema::{DeviceInfo, DeviceListEntry, DeviceStats, FanCurveMap};
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
path::PathBuf,
|
||||
@@ -77,11 +77,13 @@ impl<'a> Handler {
|
||||
"Fan control is enabled but no settings are defined (invalid config?)",
|
||||
)?;
|
||||
let interval = Duration::from_millis(settings.interval_ms);
|
||||
controller.start_fan_control(
|
||||
settings.curve.clone(),
|
||||
settings.temperature_key.clone(),
|
||||
interval,
|
||||
)?;
|
||||
controller
|
||||
.start_fan_control(
|
||||
settings.curve.clone(),
|
||||
settings.temperature_key.clone(),
|
||||
interval,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
|
||||
if let Some(power_cap) = gpu_config.power_cap {
|
||||
@@ -147,30 +149,50 @@ impl<'a> Handler {
|
||||
self.controller_by_id(id)?.get_stats()
|
||||
}
|
||||
|
||||
pub async fn set_fan_control(&'a self, id: &str, enabled: bool) -> anyhow::Result<()> {
|
||||
if enabled {
|
||||
let mut config_guard = self.config.write().map_err(|err| anyhow!("{err}"))?;
|
||||
let gpu_config = config_guard.gpus.entry(id.to_owned()).or_default();
|
||||
let settings =
|
||||
gpu_config
|
||||
.fan_control_settings
|
||||
.get_or_insert_with(|| FanControlSettings {
|
||||
curve: FanCurve::default(),
|
||||
pub async fn set_fan_control(
|
||||
&'a self,
|
||||
id: &str,
|
||||
enabled: bool,
|
||||
curve: Option<FanCurveMap>,
|
||||
) -> anyhow::Result<()> {
|
||||
let settings = if enabled {
|
||||
let settings = {
|
||||
let curve = curve.map_or_else(FanCurve::default, |curve| FanCurve(curve));
|
||||
curve.validate()?;
|
||||
|
||||
let mut config_guard = self.config.write().map_err(|err| anyhow!("{err}"))?;
|
||||
let gpu_config = config_guard.gpus.entry(id.to_owned()).or_default();
|
||||
|
||||
if let Some(mut existing_settings) = gpu_config.fan_control_settings.clone() {
|
||||
existing_settings.curve = curve;
|
||||
existing_settings
|
||||
} else {
|
||||
FanControlSettings {
|
||||
curve,
|
||||
temperature_key: "edge".to_owned(),
|
||||
interval_ms: 500,
|
||||
});
|
||||
}
|
||||
}
|
||||
};
|
||||
let interval = Duration::from_millis(settings.interval_ms);
|
||||
|
||||
self.controller_by_id(id)?.start_fan_control(
|
||||
settings.curve.clone(),
|
||||
settings.temperature_key.clone(),
|
||||
interval,
|
||||
)?;
|
||||
gpu_config.fan_control_enabled = true;
|
||||
config_guard.save().context("Could not save config")
|
||||
self.controller_by_id(id)?
|
||||
.start_fan_control(
|
||||
settings.curve.clone(),
|
||||
settings.temperature_key.clone(),
|
||||
interval,
|
||||
)
|
||||
.await?;
|
||||
Some(settings)
|
||||
} else {
|
||||
self.controller_by_id(id)?.stop_fan_control().await
|
||||
}
|
||||
self.controller_by_id(id)?.stop_fan_control(true).await?;
|
||||
None
|
||||
};
|
||||
|
||||
self.edit_gpu_config(id.to_owned(), |config| {
|
||||
config.fan_control_enabled = enabled;
|
||||
config.fan_control_settings = settings
|
||||
})
|
||||
}
|
||||
|
||||
pub fn set_power_cap(&'a self, id: &str, maybe_cap: Option<f64>) -> anyhow::Result<()> {
|
||||
@@ -199,7 +221,7 @@ impl<'a> Handler {
|
||||
if gpu_config.fan_control_enabled {
|
||||
debug!("Stopping fan control");
|
||||
controller
|
||||
.stop_fan_control()
|
||||
.stop_fan_control(true)
|
||||
.await
|
||||
.expect("Could not stop fan control");
|
||||
}
|
||||
|
||||
@@ -83,8 +83,8 @@ async fn handle_request<'a>(request: Request<'a>, handler: &'a Handler) -> anyho
|
||||
Request::ListDevices => ok_response(handler.list_devices()),
|
||||
Request::DeviceInfo { id } => ok_response(handler.get_device_info(id)?),
|
||||
Request::DeviceStats { id } => ok_response(handler.get_gpu_stats(id)?),
|
||||
Request::SetFanControl { id, enabled } => {
|
||||
ok_response(handler.set_fan_control(id, enabled).await?)
|
||||
Request::SetFanControl { id, enabled, curve } => {
|
||||
ok_response(handler.set_fan_control(id, enabled, curve).await?)
|
||||
}
|
||||
Request::SetPowerCap { id, cap } => ok_response(handler.set_power_cap(id, cap)?),
|
||||
}
|
||||
|
||||
@@ -141,7 +141,11 @@ impl App {
|
||||
let thermals_settings = app.root_stack.thermals_page.get_thermals_settings();
|
||||
|
||||
app.daemon_client
|
||||
.set_fan_control(&gpu_id, thermals_settings.automatic_fan_control_enabled)
|
||||
.set_fan_control(
|
||||
&gpu_id,
|
||||
thermals_settings.automatic_fan_control_enabled,
|
||||
None,
|
||||
)
|
||||
.expect("Could not set fan control");
|
||||
|
||||
// TODO
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
use anyhow::{anyhow, Context};
|
||||
use lact_schema::{request::Request, response::Response, DeviceInfo, DeviceListEntry, DeviceStats};
|
||||
use lact_schema::{
|
||||
request::Request, response::Response, DeviceInfo, DeviceListEntry, DeviceStats, FanCurveMap,
|
||||
};
|
||||
use nix::unistd::getuid;
|
||||
use serde::Deserialize;
|
||||
use std::{
|
||||
@@ -56,8 +58,13 @@ impl DaemonClient {
|
||||
self.make_request(Request::ListDevices)
|
||||
}
|
||||
|
||||
pub fn set_fan_control(&self, id: &str, enabled: bool) -> anyhow::Result<()> {
|
||||
self.make_request::<()>(Request::SetFanControl { id, enabled })?
|
||||
pub fn set_fan_control(
|
||||
&self,
|
||||
id: &str,
|
||||
enabled: bool,
|
||||
curve: Option<FanCurveMap>,
|
||||
) -> anyhow::Result<()> {
|
||||
self.make_request::<()>(Request::SetFanControl { id, enabled, curve })?
|
||||
.inner()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -4,9 +4,7 @@ version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
amdgpu-sysfs = { git = "https://github.com/ilya-zlobintsev/amdgpu-sysfs-rs", branch = "error", features = [
|
||||
"serde",
|
||||
] }
|
||||
amdgpu-sysfs = { version = "*", features = ["serde"] }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
indexmap = { version = "1.9", features = ["serde"] }
|
||||
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use crate::FanCurveMap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, PartialEq)]
|
||||
@@ -5,8 +6,19 @@ use serde::{Deserialize, Serialize};
|
||||
pub enum Request<'a> {
|
||||
Ping,
|
||||
ListDevices,
|
||||
DeviceInfo { id: &'a str },
|
||||
DeviceStats { id: &'a str },
|
||||
SetFanControl { id: &'a str, enabled: bool },
|
||||
SetPowerCap { id: &'a str, cap: Option<f64> },
|
||||
DeviceInfo {
|
||||
id: &'a str,
|
||||
},
|
||||
DeviceStats {
|
||||
id: &'a str,
|
||||
},
|
||||
SetFanControl {
|
||||
id: &'a str,
|
||||
enabled: bool,
|
||||
curve: Option<FanCurveMap>,
|
||||
},
|
||||
SetPowerCap {
|
||||
id: &'a str,
|
||||
cap: Option<f64>,
|
||||
},
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user