feat: set the GPU to the lowest power state before applying a power limit if current consumption exceeds the new limit (#233)

* feat: attempt to clock down the GPU when setting a power limit lower than the current power consumption

* add clippy settings
This commit is contained in:
Ilya Zlobintsev 2023-12-25 18:51:11 +02:00 committed by GitHub
parent 10f1e8c25a
commit f5cd0dc25f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 74 additions and 1 deletions

1
clippy.toml Normal file
View File

@ -0,0 +1 @@
too-many-lines-threshold = 150

View File

@ -29,7 +29,12 @@ use std::{
str::FromStr,
time::Duration,
};
use tokio::{select, sync::Notify, task::JoinHandle, time::sleep};
use tokio::{
select,
sync::Notify,
task::JoinHandle,
time::{sleep, timeout},
};
use tracing::{debug, error, trace, warn};
#[cfg(feature = "libdrm_amdgpu_sys")]
use {
@ -40,6 +45,8 @@ use {
type FanControlHandle = (Rc<Notify>, JoinHandle<()>);
const GPU_CLOCKDOWN_TIMEOUT_SECS: u64 = 3;
pub struct GpuController {
pub(super) handle: GpuHandle,
#[cfg(feature = "libdrm_amdgpu_sys")]
@ -493,7 +500,54 @@ impl GpuController {
if let Some(cap) = config.power_cap {
let hw_mon = self.first_hw_mon()?;
let current_usage = hw_mon
.get_power_input()
.or_else(|_| hw_mon.get_power_average())
.context("Could not get current power usage")?;
// When applying a power limit that's lower than the current power consumption,
// try to downclock the GPU first by forcing it into the lowest performance level.
// Workaround for behaviour described in https://github.com/ilya-zlobintsev/LACT/issues/207
let mut original_performance_level = None;
if current_usage > cap {
if let Ok(performance_level) = self.handle.get_power_force_performance_level() {
if self
.handle
.set_power_force_performance_level(PerformanceLevel::Low)
.is_ok()
{
debug!(
"waiting for the GPU to clock down before applying a new power limit"
);
match timeout(
Duration::from_secs(GPU_CLOCKDOWN_TIMEOUT_SECS),
wait_until_lowest_clock_level(&self.handle),
)
.await
{
Ok(()) => {
debug!("GPU clocked down successfully");
}
Err(_) => {
warn!("GPU did not clock down after {GPU_CLOCKDOWN_TIMEOUT_SECS}");
}
}
original_performance_level = Some(performance_level);
}
}
}
hw_mon.set_power_cap(cap)?;
// Reapply old power level
if let Some(level) = original_performance_level {
self.handle
.set_power_force_performance_level(level)
.context("Could not reapply original performance level")?;
}
} else if let Ok(hw_mon) = self.first_hw_mon() {
if let Ok(default_cap) = hw_mon.get_power_cap_default() {
hw_mon.set_power_cap(default_cap)?;
@ -601,3 +655,21 @@ impl ClocksConfiguration {
Ok(())
}
}
async fn wait_until_lowest_clock_level(handle: &GpuHandle) {
loop {
match handle.get_core_clock_levels() {
Ok(levels) => {
if levels.active == Some(0) {
break;
}
sleep(Duration::from_millis(250)).await;
}
Err(err) => {
warn!("could not get core clock levels: {err}");
break;
}
}
}
}