mirror of
https://github.com/ilya-zlobintsev/LACT.git
synced 2025-02-25 18:55:26 -06:00
fix: segfault caused by incorrect usage of fork
in the embedded daemon (#259)
Fixes https://github.com/ilya-zlobintsev/LACT/issues/204
This commit is contained in:
parent
1bbea44dc2
commit
e672ed4621
@ -17,7 +17,7 @@ serde = { workspace = true }
|
||||
serde_with = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
tracing-subscriber = { workspace = true }
|
||||
nix = { workspace = true, features = ["user", "fs", "process"] }
|
||||
nix = { workspace = true, features = ["user", "fs"] }
|
||||
|
||||
bincode = "1.3"
|
||||
pciid-parser = { version = "0.7", features = ["serde"] }
|
||||
|
@ -1,100 +0,0 @@
|
||||
use anyhow::{anyhow, Context};
|
||||
use nix::{
|
||||
sys::wait::waitpid,
|
||||
unistd::{fork, ForkResult},
|
||||
};
|
||||
use serde::{de::DeserializeOwned, Serialize};
|
||||
use std::{
|
||||
fmt::Debug,
|
||||
io::{BufReader, Read, Write},
|
||||
mem::size_of,
|
||||
os::unix::net::UnixStream,
|
||||
};
|
||||
use tracing::trace;
|
||||
|
||||
pub unsafe fn run_forked<T, F>(f: F) -> anyhow::Result<T>
|
||||
where
|
||||
T: Serialize + DeserializeOwned + Debug,
|
||||
F: FnOnce() -> Result<T, String>,
|
||||
{
|
||||
let (rx, mut tx) = UnixStream::pair()?;
|
||||
let mut rx = BufReader::new(rx);
|
||||
|
||||
match fork()? {
|
||||
ForkResult::Parent { child } => {
|
||||
trace!("waiting for message from child");
|
||||
|
||||
let mut size_buf = [0u8; size_of::<usize>()];
|
||||
rx.read_exact(&mut size_buf)?;
|
||||
let size = usize::from_ne_bytes(size_buf);
|
||||
|
||||
let mut data_buf = vec![0u8; size];
|
||||
rx.read_exact(&mut data_buf)?;
|
||||
|
||||
trace!("received {} data bytes from child", data_buf.len());
|
||||
|
||||
waitpid(child, None)?;
|
||||
|
||||
let data: Result<T, String> = bincode::deserialize(&data_buf)
|
||||
.context("Could not deserialize response from child")?;
|
||||
|
||||
data.map_err(|err| anyhow!("{err}"))
|
||||
}
|
||||
ForkResult::Child => {
|
||||
let response = f();
|
||||
trace!("sending response to parent: {response:?}");
|
||||
|
||||
let send_result = (|| {
|
||||
let data = bincode::serialize(&response)?;
|
||||
tx.write_all(&data.len().to_ne_bytes())?;
|
||||
tx.write_all(&data)?;
|
||||
Ok::<_, anyhow::Error>(())
|
||||
})();
|
||||
|
||||
let exit_code = match send_result {
|
||||
Ok(()) => 0,
|
||||
Err(_) => 1,
|
||||
};
|
||||
trace!("exiting child with code {exit_code}");
|
||||
std::process::exit(exit_code);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::run_forked;
|
||||
|
||||
#[test]
|
||||
fn basic() {
|
||||
let response = unsafe { run_forked(|| Ok(String::from("hello"))).unwrap() };
|
||||
assert_eq!(response, "hello");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn error() {
|
||||
let response =
|
||||
unsafe { run_forked::<(), _>(|| Err("something went wrong".to_owned())) }.unwrap_err();
|
||||
assert_eq!(response.to_string(), "something went wrong");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn vec() {
|
||||
let response = unsafe {
|
||||
run_forked(|| {
|
||||
let data = ["hello", "world", "123"].map(str::to_owned);
|
||||
Ok(data)
|
||||
})
|
||||
.unwrap()
|
||||
};
|
||||
assert_eq!(response, ["hello", "world", "123"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pci_db() {
|
||||
let db = unsafe {
|
||||
run_forked(|| pciid_parser::Database::read().map_err(|err| err.to_string())).unwrap()
|
||||
};
|
||||
assert_ne!(db.classes.len(), 0);
|
||||
}
|
||||
}
|
@ -2,7 +2,6 @@
|
||||
#![allow(clippy::missing_panics_doc)]
|
||||
|
||||
mod config;
|
||||
mod fork;
|
||||
mod server;
|
||||
mod socket;
|
||||
mod suspend;
|
||||
|
@ -2,10 +2,7 @@ pub mod fan_control;
|
||||
|
||||
use self::fan_control::FanCurve;
|
||||
use super::vulkan::get_vulkan_info;
|
||||
use crate::{
|
||||
config::{self, ClocksConfiguration},
|
||||
fork::run_forked,
|
||||
};
|
||||
use crate::config::{self, ClocksConfiguration};
|
||||
use amdgpu_sysfs::{
|
||||
error::Error,
|
||||
gpu_handle::{
|
||||
@ -58,7 +55,7 @@ pub struct GpuController {
|
||||
}
|
||||
|
||||
impl GpuController {
|
||||
pub fn new_from_path(sysfs_path: PathBuf) -> anyhow::Result<Self> {
|
||||
pub fn new_from_path(sysfs_path: PathBuf, pci_db: &Database) -> anyhow::Result<Self> {
|
||||
let handle = GpuHandle::new_from_path(sysfs_path)
|
||||
.map_err(|error| anyhow!("failed to initialize gpu handle: {error}"))?;
|
||||
|
||||
@ -83,34 +80,22 @@ impl GpuController {
|
||||
});
|
||||
|
||||
if let Some((subsys_vendor_id, subsys_model_id)) = handle.get_pci_subsys_id() {
|
||||
let (new_device_info, new_subsystem_info) = unsafe {
|
||||
run_forked(|| {
|
||||
let pci_db = Database::read().map_err(|err| err.to_string())?;
|
||||
let pci_device_info = pci_db.get_device_info(
|
||||
vendor_id,
|
||||
model_id,
|
||||
subsys_vendor_id,
|
||||
subsys_model_id,
|
||||
);
|
||||
let pci_device_info =
|
||||
pci_db.get_device_info(vendor_id, model_id, subsys_vendor_id, subsys_model_id);
|
||||
|
||||
let device_pci_info = PciInfo {
|
||||
vendor_id: vendor_id.to_owned(),
|
||||
vendor: pci_device_info.vendor_name.map(str::to_owned),
|
||||
model_id: model_id.to_owned(),
|
||||
model: pci_device_info.device_name.map(str::to_owned),
|
||||
};
|
||||
let subsystem_pci_info = PciInfo {
|
||||
vendor_id: subsys_vendor_id.to_owned(),
|
||||
vendor: pci_device_info.subvendor_name.map(str::to_owned),
|
||||
model_id: subsys_model_id.to_owned(),
|
||||
model: pci_device_info.subdevice_name.map(str::to_owned),
|
||||
};
|
||||
Ok((device_pci_info, subsystem_pci_info))
|
||||
})?
|
||||
};
|
||||
device_pci_info = Some(new_device_info);
|
||||
subsystem_pci_info = Some(new_subsystem_info);
|
||||
}
|
||||
device_pci_info = Some(PciInfo {
|
||||
vendor_id: vendor_id.to_owned(),
|
||||
vendor: pci_device_info.vendor_name.map(str::to_owned),
|
||||
model_id: model_id.to_owned(),
|
||||
model: pci_device_info.device_name.map(str::to_owned),
|
||||
});
|
||||
subsystem_pci_info = Some(PciInfo {
|
||||
vendor_id: subsys_vendor_id.to_owned(),
|
||||
vendor: pci_device_info.subvendor_name.map(str::to_owned),
|
||||
model_id: subsys_model_id.to_owned(),
|
||||
model: pci_device_info.subdevice_name.map(str::to_owned),
|
||||
});
|
||||
};
|
||||
}
|
||||
|
||||
let pci_info = device_pci_info.and_then(|device_pci_info| {
|
||||
|
@ -15,11 +15,13 @@ use lact_schema::{
|
||||
PowerStates,
|
||||
};
|
||||
use libflate::gzip;
|
||||
use nix::libc;
|
||||
use os_release::OS_RELEASE;
|
||||
use pciid_parser::Database;
|
||||
use serde_json::json;
|
||||
use std::{
|
||||
cell::RefCell,
|
||||
collections::BTreeMap,
|
||||
collections::{BTreeMap, HashMap},
|
||||
env,
|
||||
fs::{File, Permissions},
|
||||
io::{BufWriter, Cursor, Write},
|
||||
@ -101,6 +103,13 @@ impl<'a> Handler {
|
||||
};
|
||||
handler.load_config().await;
|
||||
|
||||
// Eagerly release memory
|
||||
// `load_controllers` allocates and deallocates the entire PCI ID database,
|
||||
// this tells the os to release it right away, lowering measured memory usage (the actual usage is low regardless as it was already deallocated)
|
||||
unsafe {
|
||||
libc::malloc_trim(0);
|
||||
}
|
||||
|
||||
Ok(handler)
|
||||
}
|
||||
|
||||
@ -567,6 +576,14 @@ fn load_controllers() -> anyhow::Result<BTreeMap<String, GpuController>> {
|
||||
Err(_) => PathBuf::from("/sys/class/drm"),
|
||||
};
|
||||
|
||||
let pci_db = Database::read().unwrap_or_else(|err| {
|
||||
warn!("could not read PCI ID database: {err}, device information will be limited");
|
||||
Database {
|
||||
vendors: HashMap::new(),
|
||||
classes: HashMap::new(),
|
||||
}
|
||||
});
|
||||
|
||||
for entry in base_path
|
||||
.read_dir()
|
||||
.map_err(|error| anyhow!("Failed to read sysfs: {error}"))?
|
||||
@ -580,7 +597,7 @@ fn load_controllers() -> anyhow::Result<BTreeMap<String, GpuController>> {
|
||||
if name.starts_with("card") && !name.contains('-') {
|
||||
trace!("trying gpu controller at {:?}", entry.path());
|
||||
let device_path = entry.path().join("device");
|
||||
match GpuController::new_from_path(device_path) {
|
||||
match GpuController::new_from_path(device_path, &pci_db) {
|
||||
Ok(controller) => match controller.get_id() {
|
||||
Ok(id) => {
|
||||
let path = controller.get_path();
|
||||
|
@ -1,4 +1,4 @@
|
||||
use crate::fork::run_forked;
|
||||
use anyhow::{anyhow, Context};
|
||||
use lact_schema::{VulkanDriverInfo, VulkanInfo};
|
||||
use std::borrow::Cow;
|
||||
use tracing::trace;
|
||||
@ -12,47 +12,45 @@ pub fn get_vulkan_info<'a>(vendor_id: &'a str, device_id: &'a str) -> anyhow::Re
|
||||
let vendor_id = u32::from_str_radix(vendor_id, 16)?;
|
||||
let device_id = u32::from_str_radix(device_id, 16)?;
|
||||
|
||||
unsafe {
|
||||
run_forked(|| {
|
||||
let library = VulkanLibrary::new().map_err(|err| err.to_string())?;
|
||||
let instance = Instance::new(library, InstanceCreateInfo::default())
|
||||
.map_err(|err| err.to_string())?;
|
||||
let enabled_layers = instance.enabled_layers().to_vec();
|
||||
let devices = instance
|
||||
.enumerate_physical_devices()
|
||||
.map_err(|err| err.to_string())?;
|
||||
let library = VulkanLibrary::new().context("Could not create vulkan library")?;
|
||||
let instance = Instance::new(library, InstanceCreateInfo::default())
|
||||
.context("Could not create vulkan instance")?;
|
||||
let enabled_layers = instance.enabled_layers().to_vec();
|
||||
let devices = instance
|
||||
.enumerate_physical_devices()
|
||||
.context("Could not enumerate vulkan devices")?;
|
||||
|
||||
for device in devices {
|
||||
let properties = device.properties();
|
||||
for device in devices {
|
||||
let properties = device.properties();
|
||||
|
||||
// Not sure how this works with systems that have multiple identical GPUs
|
||||
if (properties.vendor_id, properties.device_id) == (vendor_id, device_id) {
|
||||
let info = VulkanInfo {
|
||||
device_name: properties.device_name.clone(),
|
||||
api_version: device.api_version().to_string(),
|
||||
driver: VulkanDriverInfo {
|
||||
version: properties.driver_version,
|
||||
name: properties.driver_name.clone(),
|
||||
info: properties.driver_info.clone(),
|
||||
driver_version: None,
|
||||
},
|
||||
features: device
|
||||
.supported_features()
|
||||
.into_iter()
|
||||
.map(|(name, enabled)| (Cow::Borrowed(name), enabled))
|
||||
.collect(),
|
||||
extensions: device
|
||||
.supported_extensions()
|
||||
.into_iter()
|
||||
.map(|(name, enabled)| (Cow::Borrowed(name), enabled))
|
||||
.collect(),
|
||||
enabled_layers,
|
||||
};
|
||||
return Ok(info);
|
||||
}
|
||||
}
|
||||
|
||||
Err("Could not find a vulkan device with matching pci ids".to_owned())
|
||||
})
|
||||
// Not sure how this works with systems that have multiple identical GPUs
|
||||
if (properties.vendor_id, properties.device_id) == (vendor_id, device_id) {
|
||||
let info = VulkanInfo {
|
||||
device_name: properties.device_name.clone(),
|
||||
api_version: device.api_version().to_string(),
|
||||
driver: VulkanDriverInfo {
|
||||
version: properties.driver_version,
|
||||
name: properties.driver_name.clone(),
|
||||
info: properties.driver_info.clone(),
|
||||
driver_version: None,
|
||||
},
|
||||
features: device
|
||||
.supported_features()
|
||||
.into_iter()
|
||||
.map(|(name, enabled)| (Cow::Borrowed(name), enabled))
|
||||
.collect(),
|
||||
extensions: device
|
||||
.supported_extensions()
|
||||
.into_iter()
|
||||
.map(|(name, enabled)| (Cow::Borrowed(name), enabled))
|
||||
.collect(),
|
||||
enabled_layers,
|
||||
};
|
||||
return Ok(info);
|
||||
}
|
||||
}
|
||||
|
||||
Err(anyhow!(
|
||||
"Could not find a vulkan device with matching pci ids"
|
||||
))
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user