fix: always use applied clock offset values instead of reading them from NVML, bigger grace period for self-inflicted config changes

This commit is contained in:
Ilya Zlobintsev
2024-11-24 16:45:25 +02:00
parent 490929f94d
commit 73adb1aba2
3 changed files with 39 additions and 38 deletions

View File

@@ -21,6 +21,8 @@ const FILE_NAME: &str = "config.yaml";
const DEFAULT_ADMIN_GROUPS: [&str; 2] = ["wheel", "sudo"];
/// Minimum amount of time between separate config reloads
const CONFIG_RELOAD_INTERVAL_MILLIS: u64 = 50;
/// Period when config changes are ignored after LACT itself has edited the config
const SELF_CONFIG_EDIT_PERIOD_MILLIS: u64 = 1000;
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct Config {
@@ -175,11 +177,16 @@ impl Config {
}
}
pub fn save(&self) -> anyhow::Result<()> {
pub fn save(&self, config_last_saved: &Mutex<Instant>) -> anyhow::Result<()> {
let path = get_path();
debug!("saving config to {path:?}");
let raw_config = serde_yaml::to_string(self)?;
fs::write(path, raw_config).context("Could not write config")
let mut instant_guard = config_last_saved.lock().unwrap();
fs::write(path, raw_config).context("Could not write config")?;
*instant_guard = Instant::now();
Ok(())
}
pub fn load_or_create() -> anyhow::Result<Self> {
@@ -187,7 +194,7 @@ impl Config {
Ok(config)
} else {
let config = Config::default();
config.save()?;
config.save(&Mutex::new(Instant::now()))?;
Ok(config)
}
}
@@ -268,7 +275,7 @@ pub fn start_watcher(config_last_saved: Arc<Mutex<Instant>>) -> mpsc::UnboundedR
event.kind
{
if config_last_saved.lock().unwrap().elapsed()
< Duration::from_millis(CONFIG_RELOAD_INTERVAL_MILLIS)
< Duration::from_millis(SELF_CONFIG_EDIT_PERIOD_MILLIS)
{
debug!("ignoring fs event after self-inflicted config change");
continue;

View File

@@ -21,12 +21,11 @@ use nvml_wrapper::{
Device, Nvml,
};
use std::{
cell::RefCell,
cell::{Cell, RefCell},
collections::HashMap,
fmt::Write,
path::{Path, PathBuf},
rc::Rc,
sync::atomic::{AtomicI32, Ordering},
time::{Duration, Instant},
};
use tokio::{select, sync::Notify, time::sleep};
@@ -39,8 +38,8 @@ pub struct NvidiaGpuController {
pub sysfs_path: PathBuf,
pub fan_control_handle: RefCell<Option<FanControlHandle>>,
last_applied_gpc_offset: Rc<AtomicI32>,
last_applied_mem_offset: Rc<AtomicI32>,
last_applied_gpc_offset: Cell<Option<i32>>,
last_applied_mem_offset: Cell<Option<i32>>,
}
impl NvidiaGpuController {
@@ -56,8 +55,8 @@ impl NvidiaGpuController {
pci_info,
sysfs_path,
fan_control_handle: RefCell::new(None),
last_applied_gpc_offset: Rc::new(AtomicI32::new(0)),
last_applied_mem_offset: Rc::new(AtomicI32::new(0)),
last_applied_gpc_offset: Cell::new(None),
last_applied_mem_offset: Cell::new(None),
}
}
@@ -488,11 +487,11 @@ impl GpuController for NvidiaGpuController {
if let Ok(max) = device.max_clock_info(Clock::Graphics) {
if let Ok(offset_range) = device.gpc_clk_min_max_vf_offset() {
if let Ok(mut offset) = device.gpc_clk_vf_offset() {
if !(offset_range.0..offset_range.1).contains(&offset) {
offset = self.last_applied_gpc_offset.load(Ordering::SeqCst);
}
if let Some(offset) = self
.last_applied_gpc_offset
.get()
.or_else(|| device.gpc_clk_vf_offset().ok())
{
gpc = Some(NvidiaClockInfo {
max: max as i32,
offset,
@@ -504,11 +503,11 @@ impl GpuController for NvidiaGpuController {
if let Ok(max) = device.max_clock_info(Clock::Memory) {
if let Ok(offset_range) = device.mem_clk_min_max_vf_offset() {
if let Ok(mut offset) = device.mem_clk_vf_offset() {
if !(offset_range.0..offset_range.1).contains(&offset) {
offset = self.last_applied_mem_offset.load(Ordering::SeqCst);
}
if let Some(offset) = self
.last_applied_mem_offset
.get()
.or_else(|| device.mem_clk_vf_offset().ok())
{
mem = Some(NvidiaClockInfo {
max: max as i32,
offset,
@@ -586,13 +585,15 @@ impl GpuController for NvidiaGpuController {
.max_clock_info(Clock::Graphics)
.context("Could not read max graphics clock")?;
let offset = max_gpu_clock - default_max_clock as i32;
debug!("Using graphics clock offset {offset}");
debug!(
"Using graphics clock offset {offset} (default max clock: {default_max_clock})"
);
device
.set_gpc_clk_vf_offset(offset)
.context("Could not set graphics clock offset")?;
self.last_applied_gpc_offset.store(offset, Ordering::SeqCst);
self.last_applied_gpc_offset.set(Some(offset));
}
if let Some(max_mem_clock) = config.clocks_configuration.max_memory_clock {
@@ -600,13 +601,13 @@ impl GpuController for NvidiaGpuController {
.max_clock_info(Clock::Memory)
.context("Could not read max memory clock")?;
let offset = max_mem_clock - default_max_clock as i32;
debug!("Using mem clock offset {offset}");
debug!("Using mem clock offset {offset} (default max clock: {default_max_clock})");
device
.set_mem_clk_vf_offset(offset)
.context("Could not set memory clock offset")?;
self.last_applied_mem_offset.store(offset, Ordering::SeqCst);
self.last_applied_mem_offset.set(Some(offset));
}
if config.fan_control_enabled {
@@ -654,7 +655,7 @@ impl GpuController for NvidiaGpuController {
.set_gpc_clk_vf_offset(0)
.context("Could not reset graphics clock offset")?;
self.last_applied_gpc_offset.store(0, Ordering::SeqCst);
self.last_applied_gpc_offset.set(None);
}
}
@@ -664,7 +665,7 @@ impl GpuController for NvidiaGpuController {
.set_mem_clk_vf_offset(0)
.context("Could not reset memory clock offset")?;
self.last_applied_mem_offset.store(0, Ordering::SeqCst);
self.last_applied_mem_offset.set(None);
}
}

View File

@@ -251,8 +251,6 @@ impl<'a> Handler {
match result {
Ok(ConfirmCommand::Confirm) => {
info!("saving updated config");
*handler.config_last_saved.lock().unwrap() = Instant::now();
let mut config_guard = handler.config.borrow_mut();
match config_guard.gpus_mut() {
Ok(gpus) => {
@@ -261,11 +259,9 @@ impl<'a> Handler {
Err(err) => error!("{err:#}"),
}
if let Err(err) = config_guard.save() {
if let Err(err) = config_guard.save(&handler.config_last_saved) {
error!("{err:#}");
}
*handler.config_last_saved.lock().unwrap() = Instant::now();
}
Ok(ConfirmCommand::Revert) | Err(_) => {
if let Err(err) = controller.apply_config(&previous_config).await {
@@ -668,7 +664,7 @@ impl<'a> Handler {
self.config.borrow_mut().current_profile = name;
self.apply_current_config().await?;
self.config.borrow_mut().save()?;
self.config.borrow_mut().save(&self.config_last_saved)?;
Ok(())
}
@@ -685,7 +681,7 @@ impl<'a> Handler {
ProfileBase::Profile(name) => config.profile(&name)?.clone(),
};
config.profiles.insert(name, profile);
config.save()?;
config.save(&self.config_last_saved)?;
Ok(())
}
@@ -694,7 +690,7 @@ impl<'a> Handler {
self.set_profile(None).await?;
}
self.config.borrow_mut().profiles.shift_remove(&name);
self.config.borrow().save()?;
self.config.borrow().save(&self.config_last_saved)?;
Ok(())
}
@@ -718,12 +714,9 @@ impl<'a> Handler {
let mut config = self.config.borrow_mut();
config.clear();
*self.config_last_saved.lock().unwrap() = Instant::now();
if let Err(err) = config.save() {
if let Err(err) = config.save(&self.config_last_saved) {
error!("could not save config: {err:#}");
}
*self.config_last_saved.lock().unwrap() = Instant::now();
}
pub async fn cleanup(&self) {