fix: segfault caused by incorrect usage of fork in the embedded daemon (#259)

Fixes https://github.com/ilya-zlobintsev/LACT/issues/204
This commit is contained in:
Ilya Zlobintsev 2024-02-06 08:37:20 +02:00 committed by GitHub
parent 1bbea44dc2
commit e672ed4621
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 76 additions and 177 deletions

View File

@ -17,7 +17,7 @@ serde = { workspace = true }
serde_with = { workspace = true }
serde_json = { workspace = true }
tracing-subscriber = { workspace = true }
nix = { workspace = true, features = ["user", "fs", "process"] }
nix = { workspace = true, features = ["user", "fs"] }
bincode = "1.3"
pciid-parser = { version = "0.7", features = ["serde"] }

View File

@ -1,100 +0,0 @@
use anyhow::{anyhow, Context};
use nix::{
sys::wait::waitpid,
unistd::{fork, ForkResult},
};
use serde::{de::DeserializeOwned, Serialize};
use std::{
fmt::Debug,
io::{BufReader, Read, Write},
mem::size_of,
os::unix::net::UnixStream,
};
use tracing::trace;
pub unsafe fn run_forked<T, F>(f: F) -> anyhow::Result<T>
where
T: Serialize + DeserializeOwned + Debug,
F: FnOnce() -> Result<T, String>,
{
let (rx, mut tx) = UnixStream::pair()?;
let mut rx = BufReader::new(rx);
match fork()? {
ForkResult::Parent { child } => {
trace!("waiting for message from child");
let mut size_buf = [0u8; size_of::<usize>()];
rx.read_exact(&mut size_buf)?;
let size = usize::from_ne_bytes(size_buf);
let mut data_buf = vec![0u8; size];
rx.read_exact(&mut data_buf)?;
trace!("received {} data bytes from child", data_buf.len());
waitpid(child, None)?;
let data: Result<T, String> = bincode::deserialize(&data_buf)
.context("Could not deserialize response from child")?;
data.map_err(|err| anyhow!("{err}"))
}
ForkResult::Child => {
let response = f();
trace!("sending response to parent: {response:?}");
let send_result = (|| {
let data = bincode::serialize(&response)?;
tx.write_all(&data.len().to_ne_bytes())?;
tx.write_all(&data)?;
Ok::<_, anyhow::Error>(())
})();
let exit_code = match send_result {
Ok(()) => 0,
Err(_) => 1,
};
trace!("exiting child with code {exit_code}");
std::process::exit(exit_code);
}
}
}
#[cfg(test)]
mod tests {
use super::run_forked;
#[test]
fn basic() {
let response = unsafe { run_forked(|| Ok(String::from("hello"))).unwrap() };
assert_eq!(response, "hello");
}
#[test]
fn error() {
let response =
unsafe { run_forked::<(), _>(|| Err("something went wrong".to_owned())) }.unwrap_err();
assert_eq!(response.to_string(), "something went wrong");
}
#[test]
fn vec() {
let response = unsafe {
run_forked(|| {
let data = ["hello", "world", "123"].map(str::to_owned);
Ok(data)
})
.unwrap()
};
assert_eq!(response, ["hello", "world", "123"]);
}
#[test]
fn pci_db() {
let db = unsafe {
run_forked(|| pciid_parser::Database::read().map_err(|err| err.to_string())).unwrap()
};
assert_ne!(db.classes.len(), 0);
}
}

View File

@ -2,7 +2,6 @@
#![allow(clippy::missing_panics_doc)]
mod config;
mod fork;
mod server;
mod socket;
mod suspend;

View File

@ -2,10 +2,7 @@ pub mod fan_control;
use self::fan_control::FanCurve;
use super::vulkan::get_vulkan_info;
use crate::{
config::{self, ClocksConfiguration},
fork::run_forked,
};
use crate::config::{self, ClocksConfiguration};
use amdgpu_sysfs::{
error::Error,
gpu_handle::{
@ -58,7 +55,7 @@ pub struct GpuController {
}
impl GpuController {
pub fn new_from_path(sysfs_path: PathBuf) -> anyhow::Result<Self> {
pub fn new_from_path(sysfs_path: PathBuf, pci_db: &Database) -> anyhow::Result<Self> {
let handle = GpuHandle::new_from_path(sysfs_path)
.map_err(|error| anyhow!("failed to initialize gpu handle: {error}"))?;
@ -83,34 +80,22 @@ impl GpuController {
});
if let Some((subsys_vendor_id, subsys_model_id)) = handle.get_pci_subsys_id() {
let (new_device_info, new_subsystem_info) = unsafe {
run_forked(|| {
let pci_db = Database::read().map_err(|err| err.to_string())?;
let pci_device_info = pci_db.get_device_info(
vendor_id,
model_id,
subsys_vendor_id,
subsys_model_id,
);
let pci_device_info =
pci_db.get_device_info(vendor_id, model_id, subsys_vendor_id, subsys_model_id);
let device_pci_info = PciInfo {
vendor_id: vendor_id.to_owned(),
vendor: pci_device_info.vendor_name.map(str::to_owned),
model_id: model_id.to_owned(),
model: pci_device_info.device_name.map(str::to_owned),
};
let subsystem_pci_info = PciInfo {
vendor_id: subsys_vendor_id.to_owned(),
vendor: pci_device_info.subvendor_name.map(str::to_owned),
model_id: subsys_model_id.to_owned(),
model: pci_device_info.subdevice_name.map(str::to_owned),
};
Ok((device_pci_info, subsystem_pci_info))
})?
};
device_pci_info = Some(new_device_info);
subsystem_pci_info = Some(new_subsystem_info);
}
device_pci_info = Some(PciInfo {
vendor_id: vendor_id.to_owned(),
vendor: pci_device_info.vendor_name.map(str::to_owned),
model_id: model_id.to_owned(),
model: pci_device_info.device_name.map(str::to_owned),
});
subsystem_pci_info = Some(PciInfo {
vendor_id: subsys_vendor_id.to_owned(),
vendor: pci_device_info.subvendor_name.map(str::to_owned),
model_id: subsys_model_id.to_owned(),
model: pci_device_info.subdevice_name.map(str::to_owned),
});
};
}
let pci_info = device_pci_info.and_then(|device_pci_info| {

View File

@ -15,11 +15,13 @@ use lact_schema::{
PowerStates,
};
use libflate::gzip;
use nix::libc;
use os_release::OS_RELEASE;
use pciid_parser::Database;
use serde_json::json;
use std::{
cell::RefCell,
collections::BTreeMap,
collections::{BTreeMap, HashMap},
env,
fs::{File, Permissions},
io::{BufWriter, Cursor, Write},
@ -101,6 +103,13 @@ impl<'a> Handler {
};
handler.load_config().await;
// Eagerly release memory
// `load_controllers` allocates and deallocates the entire PCI ID database,
// this tells the os to release it right away, lowering measured memory usage (the actual usage is low regardless as it was already deallocated)
unsafe {
libc::malloc_trim(0);
}
Ok(handler)
}
@ -567,6 +576,14 @@ fn load_controllers() -> anyhow::Result<BTreeMap<String, GpuController>> {
Err(_) => PathBuf::from("/sys/class/drm"),
};
let pci_db = Database::read().unwrap_or_else(|err| {
warn!("could not read PCI ID database: {err}, device information will be limited");
Database {
vendors: HashMap::new(),
classes: HashMap::new(),
}
});
for entry in base_path
.read_dir()
.map_err(|error| anyhow!("Failed to read sysfs: {error}"))?
@ -580,7 +597,7 @@ fn load_controllers() -> anyhow::Result<BTreeMap<String, GpuController>> {
if name.starts_with("card") && !name.contains('-') {
trace!("trying gpu controller at {:?}", entry.path());
let device_path = entry.path().join("device");
match GpuController::new_from_path(device_path) {
match GpuController::new_from_path(device_path, &pci_db) {
Ok(controller) => match controller.get_id() {
Ok(id) => {
let path = controller.get_path();

View File

@ -1,4 +1,4 @@
use crate::fork::run_forked;
use anyhow::{anyhow, Context};
use lact_schema::{VulkanDriverInfo, VulkanInfo};
use std::borrow::Cow;
use tracing::trace;
@ -12,47 +12,45 @@ pub fn get_vulkan_info<'a>(vendor_id: &'a str, device_id: &'a str) -> anyhow::Re
let vendor_id = u32::from_str_radix(vendor_id, 16)?;
let device_id = u32::from_str_radix(device_id, 16)?;
unsafe {
run_forked(|| {
let library = VulkanLibrary::new().map_err(|err| err.to_string())?;
let instance = Instance::new(library, InstanceCreateInfo::default())
.map_err(|err| err.to_string())?;
let enabled_layers = instance.enabled_layers().to_vec();
let devices = instance
.enumerate_physical_devices()
.map_err(|err| err.to_string())?;
let library = VulkanLibrary::new().context("Could not create vulkan library")?;
let instance = Instance::new(library, InstanceCreateInfo::default())
.context("Could not create vulkan instance")?;
let enabled_layers = instance.enabled_layers().to_vec();
let devices = instance
.enumerate_physical_devices()
.context("Could not enumerate vulkan devices")?;
for device in devices {
let properties = device.properties();
for device in devices {
let properties = device.properties();
// Not sure how this works with systems that have multiple identical GPUs
if (properties.vendor_id, properties.device_id) == (vendor_id, device_id) {
let info = VulkanInfo {
device_name: properties.device_name.clone(),
api_version: device.api_version().to_string(),
driver: VulkanDriverInfo {
version: properties.driver_version,
name: properties.driver_name.clone(),
info: properties.driver_info.clone(),
driver_version: None,
},
features: device
.supported_features()
.into_iter()
.map(|(name, enabled)| (Cow::Borrowed(name), enabled))
.collect(),
extensions: device
.supported_extensions()
.into_iter()
.map(|(name, enabled)| (Cow::Borrowed(name), enabled))
.collect(),
enabled_layers,
};
return Ok(info);
}
}
Err("Could not find a vulkan device with matching pci ids".to_owned())
})
// Not sure how this works with systems that have multiple identical GPUs
if (properties.vendor_id, properties.device_id) == (vendor_id, device_id) {
let info = VulkanInfo {
device_name: properties.device_name.clone(),
api_version: device.api_version().to_string(),
driver: VulkanDriverInfo {
version: properties.driver_version,
name: properties.driver_name.clone(),
info: properties.driver_info.clone(),
driver_version: None,
},
features: device
.supported_features()
.into_iter()
.map(|(name, enabled)| (Cow::Borrowed(name), enabled))
.collect(),
extensions: device
.supported_extensions()
.into_iter()
.map(|(name, enabled)| (Cow::Borrowed(name), enabled))
.collect(),
enabled_layers,
};
return Ok(info);
}
}
Err(anyhow!(
"Could not find a vulkan device with matching pci ids"
))
}