From 4db593c73f7b46a8d1466bf5734e9a764c43afb7 Mon Sep 17 00:00:00 2001 From: Ilya Zlobintsev Date: Sun, 16 Jun 2024 00:06:51 +0300 Subject: [PATCH] fix: defer committing clocks/fan settings changes until all the values have been written (#340) * feat: defer committing settings until all the values have been written * fix: reset fan curve after setting pmfw values * chore: switch to amdgpu-sysfs tag --- Cargo.lock | 4 +- Cargo.toml | 2 +- lact-daemon/src/server/gpu_controller/mod.rs | 105 +++++++++++-------- 3 files changed, 66 insertions(+), 45 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5f87f0c..035d01d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -53,9 +53,9 @@ checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" [[package]] name = "amdgpu-sysfs" -version = "0.14.0" +version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f4211f5102236299dcbd91a8ed27e8a0cb91cb194fa273abcf828797f6cac86" +checksum = "64b277cf650bff281b49b12f4a2a4a004485e0b3ecfd34961c5f9b258800ea09" dependencies = [ "enum_dispatch", "serde", diff --git a/Cargo.toml b/Cargo.toml index 99dc1fc..1cb6b66 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ members = [ ] [workspace.dependencies] -amdgpu-sysfs = { version = "0.14.0", features = ["serde"] } +amdgpu-sysfs = { version = "0.15.0", features = ["serde"] } serde = { version = "1.0", features = ["derive"] } serde_with = { version = "3.5.0", default-features = false, features = [ "macros", diff --git a/lact-daemon/src/server/gpu_controller/mod.rs b/lact-daemon/src/server/gpu_controller/mod.rs index ee86d28..34c9238 100644 --- a/lact-daemon/src/server/gpu_controller/mod.rs +++ b/lact-daemon/src/server/gpu_controller/mod.rs @@ -8,7 +8,7 @@ use amdgpu_sysfs::{ gpu_handle::{ fan_control::FanCurve as PmfwCurve, overdrive::{ClocksTable, ClocksTableGen}, - GpuHandle, PerformanceLevel, PowerLevelKind, PowerLevels, + CommitHandle, GpuHandle, PerformanceLevel, PowerLevelKind, PowerLevels, }, hw_mon::{FanControlMethod, HwMon}, sysfs::SysFS, @@ -357,7 +357,10 @@ impl GpuController { self.handle.hw_monitors.first().map(f) } - async fn set_static_fan_control(&self, static_speed: f64) -> anyhow::Result<()> { + async fn set_static_fan_control( + &self, + static_speed: f64, + ) -> anyhow::Result> { // Stop existing task to set static speed self.stop_fan_control(false).await?; @@ -383,17 +386,14 @@ impl GpuController { allowed_ranges: Some(allowed_ranges), }; - if new_curve == current_curve { - debug!("fan curve unchanged"); - } else { - debug!("setting static curve {new_curve:?}"); + debug!("setting static curve {new_curve:?}"); - self.handle - .set_fan_curve(&new_curve) - .context("Could not set fan curve")?; - } + let commit_handle = self + .handle + .set_fan_curve(&new_curve) + .context("Could not set fan curve")?; - Ok(()) + Ok(Some(commit_handle)) } else { let hw_mon = self .handle @@ -415,7 +415,7 @@ impl GpuController { debug!("set fan speed to {}", static_speed); - Ok(()) + Ok(None) } } @@ -423,28 +423,25 @@ impl GpuController { &self, curve: FanCurve, settings: FanControlSettings, - ) -> anyhow::Result<()> { + ) -> anyhow::Result> { // Use the PMFW curve functionality when it is available // Otherwise, fall back to manual fan control via a task - match self.handle.get_fan_curve() { - Ok(current_curve) => { - let new_curve = curve - .into_pmfw_curve(current_curve.clone()) - .context("Invalid fan curve")?; + if let Ok(current_curve) = self.handle.get_fan_curve() { + let new_curve = curve + .into_pmfw_curve(current_curve.clone()) + .context("Invalid fan curve")?; - if new_curve == current_curve { - debug!("fan curve unchanged"); - } else { - debug!("setting pmfw curve {new_curve:?}"); + debug!("setting pmfw curve {new_curve:?}"); - self.handle - .set_fan_curve(&new_curve) - .context("Could not set fan curve")?; - } + let commit_handle = self + .handle + .set_fan_curve(&new_curve) + .context("Could not set fan curve")?; - Ok(()) - } - Err(_) => self.start_curve_fan_control_task(curve, settings).await, + Ok(Some(commit_handle)) + } else { + self.start_curve_fan_control_task(curve, settings).await?; + Ok(None) } } @@ -737,6 +734,8 @@ impl GpuController { } } + let mut commit_handles = Vec::new(); + // Reset the clocks table in case the settings get reverted back to not having a clocks value configured self.handle.reset_clocks_table().ok(); @@ -762,10 +761,12 @@ impl GpuController { .context("Failed to get table commands")? ); - self.handle + let handle = self + .handle .set_clocks_table(&table) .context("Could not write clocks table") .with_context(|| format!("Clocks table commands: {:?}", table.get_commands()))?; + commit_handles.push(handle); } if let Some(level) = config.performance_level { @@ -803,18 +804,26 @@ impl GpuController { if let Some(ref settings) = config.fan_control_settings { match settings.mode { lact_schema::FanControlMode::Static => { - self.set_static_fan_control(settings.static_speed) + if let Some(commit_handle) = self + .set_static_fan_control(settings.static_speed) .await - .context("Failed to set static fan control")?; + .context("Failed to set static fan control")? + { + commit_handles.push(commit_handle); + } } lact_schema::FanControlMode::Curve => { if settings.curve.0.is_empty() { return Err(anyhow!("Cannot use empty fan curve")); } - self.start_curve_fan_control(settings.curve.clone(), settings.clone()) + if let Some(commit_handle) = self + .start_curve_fan_control(settings.curve.clone(), settings.clone()) .await - .context("Failed to set curve fan control")?; + .context("Failed to set curve fan control")? + { + commit_handles.push(commit_handle); + } } } } else { @@ -823,10 +832,6 @@ impl GpuController { )); } } else { - self.stop_fan_control(true) - .await - .context("Failed to stop fan control")?; - let pmfw = &config.pmfw_options; if let Some(acoustic_limit) = pmfw.acoustic_limit { if self @@ -836,9 +841,11 @@ impl GpuController { .current != acoustic_limit { - self.handle + let commit_handle = self + .handle .set_fan_acoustic_limit(acoustic_limit) .context("Could not set acoustic limit")?; + commit_handles.push(commit_handle); } } if let Some(acoustic_target) = pmfw.acoustic_target { @@ -849,9 +856,11 @@ impl GpuController { .current != acoustic_target { - self.handle + let commit_handle = self + .handle .set_fan_acoustic_target(acoustic_target) .context("Could not set acoustic target")?; + commit_handles.push(commit_handle); } } if let Some(target_temperature) = pmfw.target_temperature { @@ -862,9 +871,11 @@ impl GpuController { .current != target_temperature { - self.handle + let commit_handle = self + .handle .set_fan_target_temperature(target_temperature) .context("Could not set target temperature")?; + commit_handles.push(commit_handle); } } if let Some(minimum_pwm) = pmfw.minimum_pwm { @@ -875,11 +886,21 @@ impl GpuController { .current != minimum_pwm { - self.handle + let commit_handle = self + .handle .set_fan_minimum_pwm(minimum_pwm) .context("Could not set minimum pwm")?; + commit_handles.push(commit_handle); } } + + self.stop_fan_control(true) + .await + .context("Failed to stop fan control")?; + } + + for handle in commit_handles { + handle.commit()?; } Ok(())