Config support for multi-GPU setups

This commit is contained in:
Ilya Zlobintsev 2020-11-12 14:12:22 +02:00
parent d5d0829e4e
commit 6a9a4a12ca
5 changed files with 141 additions and 52 deletions

View File

@ -8,7 +8,7 @@ edition = "2018"
[dependencies]
bincode = "1.3"
serde = { version = "1.0", features = ["derive"] }
serde = { version = "1.0", features = ["derive", "rc"] }
serde_json = "1.0"
vulkano = "0.19"
log = "0.4"

View File

@ -1,5 +1,5 @@
use serde::{Deserialize, Serialize};
use std::{collections::BTreeMap, fs, io, path::PathBuf};
use std::{collections::{BTreeMap, HashMap}, fs, io, path::PathBuf};
#[derive(Debug)]
pub enum ConfigError {
@ -19,13 +19,27 @@ impl From<serde_json::Error> for ConfigError {
}
}
#[derive(Deserialize, Serialize, Debug, Clone, Hash, Eq)]
pub struct GpuIdentifier {
pub pci_id: String,
pub card_model: String,
pub gpu_model: String,
pub path: PathBuf,
}
impl PartialEq for GpuIdentifier {
fn eq(&self, other: &Self) -> bool {
self.pci_id == other.pci_id && self.gpu_model == other.gpu_model && self.card_model == other.card_model
}
}
#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct Config {
pub struct GpuConfig {
pub fan_control_enabled: bool,
pub fan_curve: BTreeMap<i32, f64>,
}
impl Config {
impl GpuConfig {
pub fn new() -> Self {
let mut fan_curve: BTreeMap<i32, f64> = BTreeMap::new();
fan_curve.insert(20, 0f64);
@ -34,11 +48,28 @@ impl Config {
fan_curve.insert(80, 80f64);
fan_curve.insert(100, 100f64);
Config {
GpuConfig {
fan_curve,
fan_control_enabled: false,
}
}
}
#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct Config {
pub gpu_configs: HashMap<u32, (GpuIdentifier, GpuConfig)>,
pub config_path: PathBuf,
}
impl Config {
pub fn new(config_path: &PathBuf) -> Self {
let gpu_configs: HashMap<u32, (GpuIdentifier, GpuConfig)> = HashMap::new();
Config {
gpu_configs,
config_path: config_path.clone(),
}
}
pub fn read_from_file(path: &PathBuf) -> Result<Self, ConfigError> {
let json = fs::read_to_string(path)?;
@ -46,10 +77,11 @@ impl Config {
Ok(serde_json::from_str::<Config>(&json)?)
}
pub fn save(&self, path: &PathBuf) -> Result<(), ConfigError> {
pub fn save(&self) -> Result<(), ConfigError> {
let json = serde_json::json!(self);
log::info!("saving {}", json.to_string());
Ok(fs::write(path, &json.to_string())?)
Ok(fs::write(&self.config_path, &json.to_string())?)
}
}

View File

@ -1,4 +1,4 @@
use crate::config::Config;
use crate::config::{Config, GpuConfig, GpuIdentifier};
use crate::hw_mon::{HWMon, HWMonError};
use serde::{Deserialize, Serialize};
use std::path::{Path, PathBuf};
@ -24,13 +24,12 @@ pub struct FanControlInfo {
pub curve: BTreeMap<i32, f64>,
}
#[derive(Clone)]
#[derive(Deserialize, Serialize)]
pub struct GpuController {
hw_path: PathBuf,
pub hw_path: PathBuf,
hw_mon: Option<HWMon>,
pub gpu_info: GpuInfo,
config: Config,
config_path: PathBuf,
config: GpuConfig,
}
#[derive(Serialize, Deserialize, Debug, Clone, Default)]
@ -54,10 +53,11 @@ pub struct GpuInfo {
pub link_speed: String,
pub link_width: u8,
pub vulkan_info: VulkanInfo,
pub pci_slot: String,
}
impl GpuController {
pub fn new(hw_path: PathBuf, config: Config, config_path: PathBuf) -> Self {
pub fn new(hw_path: PathBuf, config: GpuConfig) -> Self {
let hw_mon = match fs::read_dir(&hw_path.join("hwmon")) {
Ok(mut path) => {
let path = path.next().unwrap().unwrap().path();
@ -71,19 +71,45 @@ impl GpuController {
_ => None,
};
let mut controller = GpuController {
hw_path: hw_path.clone(),
hw_mon,
gpu_info: Default::default(),
config,
config_path,
};
controller.gpu_info = controller.get_info();
log::trace!("{:?}", controller.gpu_info);
controller
}
pub fn load_config(&mut self, config: GpuConfig) {
self.hw_mon = match fs::read_dir(self.hw_path.join("hwmon")) {
Ok(mut path) => {
let path = path.next().unwrap().unwrap().path();
let hw_mon = HWMon::new(
&path,
config.fan_control_enabled,
config.fan_curve.clone(),
);
Some(hw_mon)
},
_ => None,
};
}
pub fn get_config(&self) -> GpuConfig {
self.config.clone()
}
pub fn get_identifier(&self) -> GpuIdentifier {
GpuIdentifier { pci_id: self.gpu_info.pci_slot.clone(),
card_model: self.gpu_info.card_model.clone(),
gpu_model: self.gpu_info.gpu_model.clone(),
path: self.hw_path.clone() }
}
fn get_info(&self) -> GpuInfo {
let uevent =
fs::read_to_string(self.hw_path.join("uevent")).expect("Failed to read uevent");
@ -93,6 +119,7 @@ impl GpuController {
let mut model_id = String::new();
let mut card_vendor_id = String::new();
let mut card_model_id = String::new();
let mut pci_slot = String::new();
for line in uevent.split('\n') {
let split = line.split('=').collect::<Vec<&str>>();
@ -102,12 +129,13 @@ impl GpuController {
let ids = split.last().expect("failed to get split").split(':').collect::<Vec<&str>>();
vendor_id = ids.get(0).unwrap().to_string();
model_id = ids.get(0).unwrap().to_string();
}
},
&"PCI_SUBSYS_ID" => {
let ids = split.last().expect("failed to get split").split(':').collect::<Vec<&str>>();
card_vendor_id = ids.get(0).unwrap().to_string();
card_model_id = ids.get(1).unwrap().to_string();
}
},
&"PCI_SLOT_NAME" => pci_slot = split.get(1).unwrap().to_string(),
_ => (),
}
}
@ -194,6 +222,7 @@ impl GpuController {
link_speed,
link_width,
vulkan_info,
pci_slot,
}
}
@ -233,9 +262,6 @@ impl GpuController {
match hw_mon.start_fan_control() {
Ok(_) => {
self.config.fan_control_enabled = true;
self.config
.save(&self.config_path)
.expect("Failed to save config");
Ok(())
}
Err(e) => Err(e),
@ -251,9 +277,6 @@ impl GpuController {
match hw_mon.stop_fan_control() {
Ok(_) => {
self.config.fan_control_enabled = false;
self.config
.save(&self.config_path)
.expect("Failed to save config");
Ok(())
}
Err(e) => Err(e),
@ -282,9 +305,6 @@ impl GpuController {
Some(hw_mon) => {
hw_mon.set_fan_curve(curve.clone());
self.config.fan_curve = curve;
self.config
.save(&self.config_path)
.expect("Failed to save config");
Ok(())
},
None => Err(HWMonError::NoHWMon),

View File

@ -18,7 +18,7 @@ pub enum HWMonError {
NoHWMon,
}
#[derive(Debug, Clone)]
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct HWMon {
hwmon_path: PathBuf,
pub fan_max_speed: i32,

View File

@ -3,7 +3,7 @@ pub mod daemon_connection;
pub mod gpu_controller;
pub mod hw_mon;
use config::Config;
use config::{Config, GpuConfig, GpuIdentifier};
use serde::{Deserialize, Serialize};
use std::{collections::{BTreeMap, HashMap}, fs};
use std::os::unix::net::{UnixListener, UnixStream};
@ -22,6 +22,7 @@ pub const SOCK_PATH: &str = "/tmp/amdgpu-configurator.sock";
pub struct Daemon {
gpu_controllers: HashMap<u32, GpuController>,
listener: UnixListener,
config: Config,
}
#[derive(Serialize, Deserialize, Debug)]
@ -38,7 +39,7 @@ pub enum Action {
}
impl Daemon {
pub fn new(unpriveleged: bool) -> Daemon {
pub fn new(unprivileged: bool) -> Daemon {
if fs::metadata(SOCK_PATH).is_ok() {
fs::remove_file(SOCK_PATH).expect("Failed to take control over socket");
}
@ -52,14 +53,14 @@ impl Daemon {
.expect("Failed to chmod");
let config_path = PathBuf::from("/etc/lact.json");
let config = if unpriveleged {
Config::new()
let mut config = if unprivileged {
Config::new(&config_path)
} else {
match Config::read_from_file(&config_path) {
Ok(c) => c,
Err(_) => {
let c = Config::new();
c.save(&config_path).expect("Failed to save config");
let c = Config::new(&config_path);
//c.save().unwrap();
c
}
}
@ -69,35 +70,58 @@ impl Daemon {
let mut gpu_controllers: HashMap<u32, GpuController> = HashMap::new();
for entry in fs::read_dir("/sys/class/drm").expect("Could not open /sys/class/drm") {
/*for (gpu_identifier, gpu_config) in &config.gpu_configs {
let mut controller = GpuController::new(gpu_identifier.path.clone(), GpuConfig::new());
if controller.gpu_info.pci_slot == gpu_identifier.pci_id && controller.gpu_info.card_model == gpu_identifier.card_model && controller.gpu_info.gpu_model == gpu_identifier.gpu_model {
controller.load_config(gpu_config.clone());
gpu_controllers.insert(gpu_identifier.id, controller);
}
}*/
'entries: for entry in fs::read_dir("/sys/class/drm").expect("Could not open /sys/class/drm") {
let entry = entry.unwrap();
if entry.file_name().len() == 5 {
if entry.file_name().to_str().unwrap().split_at(4).0 == "card" {
log::info!("Initializing {:?}", entry.path());
loop {
let id: u32 = random();
if !gpu_controllers.contains_key(&id) {
gpu_controllers.insert(id, GpuController::new(entry.path().join("device"), config.clone(), config_path.clone()));
break;
let mut controller = GpuController::new(entry.path().join("device"), GpuConfig::new());
for (id, (gpu_identifier, gpu_config)) in &config.gpu_configs {
if controller.gpu_info.pci_slot == gpu_identifier.pci_id && controller.gpu_info.card_model == gpu_identifier.card_model && controller.gpu_info.gpu_model == gpu_identifier.gpu_model {
controller.load_config(gpu_config.clone());
gpu_controllers.insert(id.clone(), controller);
log::info!("already known");
continue 'entries;
}
}
log::info!("initializing for the first time");
let id: u32 = random();
config.gpu_configs.insert(id, (controller.get_identifier(), controller.get_config()));
gpu_controllers.insert(id, controller);
}
}
}
config.save().unwrap();
Daemon {
listener,
gpu_controllers,
config,
}
}
pub fn listen(mut self) {
for stream in self.listener.incoming() {
let listener = self.listener.try_clone().expect("couldn't try_clone");
for stream in listener.incoming() {
match stream {
Ok(stream) => {
//let mut controller = self.gpu_controller.clone();
//thread::spawn(move || Daemon::handle_connection(&mut controller, stream));
Daemon::handle_connection(&mut self.gpu_controllers, stream);
//Daemon::handle_connection(&mut self.gpu_controllers, stream);
Daemon::handle_connection(&mut self, stream);
}
Err(err) => {
log::error!("Error: {}", err);
@ -107,7 +131,8 @@ impl Daemon {
}
}
fn handle_connection(gpu_controllers: &mut HashMap<u32, GpuController>, mut stream: UnixStream) {
//fn handle_connection(gpu_controllers: &mut HashMap<u32, GpuController>, mut stream: UnixStream) {
fn handle_connection(&mut self, mut stream: UnixStream) {
log::trace!("Reading buffer");
let mut buffer = Vec::<u8>::new();
stream.read_to_end(&mut buffer).unwrap();
@ -122,48 +147,60 @@ impl Daemon {
Action::CheckAlive => Ok(DaemonResponse::OK),
Action::GetGpus => {
let mut gpus: HashMap<u32, String> = HashMap::new();
for controller in gpu_controllers {
for controller in &self.gpu_controllers {
gpus.insert(*controller.0, controller.1.gpu_info.gpu_model.clone());
}
Ok(DaemonResponse::Gpus(gpus))
},
Action::GetStats(i) => match gpu_controllers.get(&i) {
Action::GetStats(i) => match self.gpu_controllers.get(&i) {
Some(controller) => Ok(DaemonResponse::GpuStats(controller.get_stats())),
None => Err(DaemonError::InvalidID),
},
Action::GetInfo(i) => match gpu_controllers.get(&i) {
Action::GetInfo(i) => match self.gpu_controllers.get(&i) {
Some(controller) => Ok(DaemonResponse::GpuInfo(controller.gpu_info.clone())),
None => Err(DaemonError::InvalidID),
},
Action::StartFanControl(i) => match gpu_controllers.get_mut(&i) {
Action::StartFanControl(i) => match self.gpu_controllers.get_mut(&i) {
Some(controller) => match controller.start_fan_control() {
Ok(_) => Ok(DaemonResponse::OK),
Ok(_) => {
self.config.gpu_configs.insert(i, (controller.get_identifier(), controller.get_config()));
self.config.save().unwrap();
Ok(DaemonResponse::OK)
},
Err(_) => Err(DaemonError::HWMonError),
}
None => Err(DaemonError::InvalidID),
},
Action::StopFanControl(i) => match gpu_controllers.get_mut(&i) {
Action::StopFanControl(i) => match self.gpu_controllers.get_mut(&i) {
Some(controller) => match controller.stop_fan_control() {
Ok(_) => Ok(DaemonResponse::OK),
Ok(_) => {
self.config.gpu_configs.insert(i, (controller.get_identifier(), controller.get_config()));
self.config.save().unwrap();
Ok(DaemonResponse::OK)
},
Err(_) => Err(DaemonError::HWMonError),
},
None => Err(DaemonError::InvalidID),
},
Action::GetFanControl(i) => match gpu_controllers.get(&i) {
Action::GetFanControl(i) => match self.gpu_controllers.get(&i) {
Some(controller) => match controller.get_fan_control() {
Ok(info) => Ok(DaemonResponse::FanControlInfo(info)),
Err(_) => Err(DaemonError::HWMonError),
}
None => Err(DaemonError::InvalidID),
}
Action::SetFanCurve(i, curve) => match gpu_controllers.get_mut(&i) {
Action::SetFanCurve(i, curve) => match self.gpu_controllers.get_mut(&i) {
Some(controller) => {
let mut buffer = Vec::new();
stream.read_to_end(&mut buffer).unwrap();
match controller.set_fan_curve(curve) {
Ok(_) => Ok(DaemonResponse::OK),
Ok(_) => {
self.config.gpu_configs.insert(i, (controller.get_identifier(), controller.get_config()));
self.config.save().unwrap();
Ok(DaemonResponse::OK)
},
Err(_) => Err(DaemonError::HWMonError),
}