From e61b42b9cc42809f59c1009c7b27f38cf604cb5c Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Wed, 14 Aug 2024 09:52:45 -0500 Subject: [PATCH] Implement `bpf_map_delete_batch` for clearing eBPF maps (particularly the hot cache clear). --- src/rust/lqos_sys/src/bpf_map.rs | 410 +++++++++++++----------- src/rust/lqos_sys/src/ip_mapping/mod.rs | 2 +- 2 files changed, 215 insertions(+), 197 deletions(-) diff --git a/src/rust/lqos_sys/src/bpf_map.rs b/src/rust/lqos_sys/src/bpf_map.rs index 29bb8c24..23026cd1 100644 --- a/src/rust/lqos_sys/src/bpf_map.rs +++ b/src/rust/lqos_sys/src/bpf_map.rs @@ -1,229 +1,247 @@ #![allow(dead_code)] -use anyhow::{Error, Result}; -use libbpf_sys::{ - bpf_map_delete_elem, bpf_map_get_next_key, bpf_map_lookup_elem, - bpf_map_update_elem, bpf_obj_get, BPF_NOEXIST, -}; use std::{ - ffi::{c_void, CString}, - marker::PhantomData, - ptr::null_mut, + ffi::{c_void, CString}, + marker::PhantomData, + ptr::null_mut, }; +use anyhow::{Error, Result}; +use libbpf_sys::{bpf_map_delete_elem, bpf_map_get_next_key, bpf_map_lookup_elem, bpf_map_update_elem, BPF_NOEXIST, bpf_obj_get}; + +use crate::lqos_kernel::bpf::bpf_map_delete_batch; + /// Represents an underlying BPF map, accessed via the filesystem. /// `BpfMap` *only* talks to shared (not PER-CPU) variants of maps. /// /// `K` is the *key* type, indexing the map. /// `V` is the *value* type, and must exactly match the underlying C data type. pub struct BpfMap { - fd: i32, - _key_phantom: PhantomData, - _val_phantom: PhantomData, + fd: i32, + _key_phantom: PhantomData, + _val_phantom: PhantomData, } impl BpfMap where - K: Default + Clone, - V: Default + Clone, + K: Default + Clone, + V: Default + Clone, { - /// Connect to a BPF map via a filename. Connects the internal - /// file descriptor, which is held until the structure is - /// dropped. - pub fn from_path(filename: &str) -> Result { - let filename_c = CString::new(filename)?; - let fd = unsafe { bpf_obj_get(filename_c.as_ptr()) }; - if fd < 0 { - Err(Error::msg("Unable to open BPF map")) - } else { - Ok(Self { fd, _key_phantom: PhantomData, _val_phantom: PhantomData }) - } - } - - /// Iterates the undlering BPF map, and adds the results - /// to a vector. Each entry contains a `key, value` tuple. - /// - /// This has performance issues due to excessive cloning - pub fn dump_vec(&self) -> Vec<(K, V)> { - let mut result = Vec::new(); - - let mut prev_key: *mut K = null_mut(); - let mut key: K = K::default(); - let key_ptr: *mut K = &mut key; - let mut value = V::default(); - let value_ptr: *mut V = &mut value; - - unsafe { - while bpf_map_get_next_key( - self.fd, - prev_key as *mut c_void, - key_ptr as *mut c_void, - ) == 0 - { - bpf_map_lookup_elem( - self.fd, - key_ptr as *mut c_void, - value_ptr as *mut c_void, - ); - result.push((key.clone(), value.clone())); - prev_key = key_ptr; - } + /// Connect to a BPF map via a filename. Connects the internal + /// file descriptor, which is held until the structure is + /// dropped. + pub fn from_path(filename: &str) -> Result { + let filename_c = CString::new(filename)?; + let fd = unsafe { bpf_obj_get(filename_c.as_ptr()) }; + if fd < 0 { + Err(Error::msg("Unable to open BPF map")) + } else { + Ok(Self { fd, _key_phantom: PhantomData, _val_phantom: PhantomData }) + } } - result - } + /// Iterates the underlying BPF map, and adds the results + /// to a vector. Each entry contains a `key, value` tuple. + /// + /// This has performance issues due to excessive cloning + pub fn dump_vec(&self) -> Vec<(K, V)> { + let mut result = Vec::new(); - /// Inserts an entry into a BPF map. - /// Use this sparingly, because it briefly pauses XDP access to the - /// underlying map (through internal locking we can't reach from - /// userland). - /// - /// ## Arguments - /// - /// * `key` - the key to insert. - /// * `value` - the value to insert. - /// - /// Returns Ok if insertion succeeded, a generic error (no details yet) - /// if it fails. - pub fn insert(&mut self, key: &mut K, value: &mut V) -> Result<()> { - let key_ptr: *mut K = key; - let val_ptr: *mut V = value; - let err = unsafe { - bpf_map_update_elem( - self.fd, - key_ptr as *mut c_void, - val_ptr as *mut c_void, - BPF_NOEXIST.into(), - ) - }; - if err != 0 { - Err(Error::msg(format!("Unable to insert into map ({err})"))) - } else { - Ok(()) + let mut prev_key: *mut K = null_mut(); + let mut key: K = K::default(); + let key_ptr: *mut K = &mut key; + let mut value = V::default(); + let value_ptr: *mut V = &mut value; + + unsafe { + while bpf_map_get_next_key( + self.fd, + prev_key as *mut c_void, + key_ptr as *mut c_void, + ) == 0 + { + bpf_map_lookup_elem( + self.fd, + key_ptr as *mut c_void, + value_ptr as *mut c_void, + ); + result.push((key.clone(), value.clone())); + prev_key = key_ptr; + } + } + + result } - } - /// Inserts an entry into a BPF map. - /// Use this sparingly, because it briefly pauses XDP access to the - /// underlying map (through internal locking we can't reach from - /// userland). - /// - /// ## Arguments - /// - /// * `key` - the key to insert. - /// * `value` - the value to insert. - /// - /// Returns Ok if insertion succeeded, a generic error (no details yet) - /// if it fails. - pub fn insert_or_update(&mut self, key: &mut K, value: &mut V) -> Result<()> { - let key_ptr: *mut K = key; - let val_ptr: *mut V = value; - let err = unsafe { - bpf_map_update_elem( - self.fd, - key_ptr as *mut c_void, - val_ptr as *mut c_void, - 0, - ) - }; - if err != 0 { - Err(Error::msg(format!("Unable to insert into map ({err})"))) - } else { - Ok(()) + /// Inserts an entry into a BPF map. + /// Use this sparingly, because it briefly pauses XDP access to the + /// underlying map (through internal locking we can't reach from + /// userland). + /// + /// ## Arguments + /// + /// * `key` - the key to insert. + /// * `value` - the value to insert. + /// + /// Returns Ok if insertion succeeded, a generic error (no details yet) + /// if it fails. + pub fn insert(&mut self, key: &mut K, value: &mut V) -> Result<()> { + let key_ptr: *mut K = key; + let val_ptr: *mut V = value; + let err = unsafe { + bpf_map_update_elem( + self.fd, + key_ptr as *mut c_void, + val_ptr as *mut c_void, + BPF_NOEXIST.into(), + ) + }; + if err != 0 { + Err(Error::msg(format!("Unable to insert into map ({err})"))) + } else { + Ok(()) + } } - } - /// Deletes an entry from the underlying eBPF map. - /// Use this sparingly, it locks the underlying map in the - /// kernel. This can cause *long* delays under heavy load. - /// - /// ## Arguments - /// - /// * `key` - the key to delete. - /// - /// Return `Ok` if deletion succeeded. - pub fn delete(&mut self, key: &mut K) -> Result<()> { - let key_ptr: *mut K = key; - let err = unsafe { bpf_map_delete_elem(self.fd, key_ptr as *mut c_void) }; - if err != 0 { - if err == -2 { - // ENOEXIST : not actually an error, just nothing to do + /// Inserts an entry into a BPF map. + /// Use this sparingly, because it briefly pauses XDP access to the + /// underlying map (through internal locking we can't reach from + /// userland). + /// + /// ## Arguments + /// + /// * `key` - the key to insert. + /// * `value` - the value to insert. + /// + /// Returns Ok if insertion succeeded, a generic error (no details yet) + /// if it fails. + pub fn insert_or_update(&mut self, key: &mut K, value: &mut V) -> Result<()> { + let key_ptr: *mut K = key; + let val_ptr: *mut V = value; + let err = unsafe { + bpf_map_update_elem( + self.fd, + key_ptr as *mut c_void, + val_ptr as *mut c_void, + 0, + ) + }; + if err != 0 { + Err(Error::msg(format!("Unable to insert into map ({err})"))) + } else { + Ok(()) + } + } + + /// Deletes an entry from the underlying eBPF map. + /// Use this sparingly, it locks the underlying map in the + /// kernel. This can cause *long* delays under heavy load. + /// + /// ## Arguments + /// + /// * `key` - the key to delete. + /// + /// Return `Ok` if deletion succeeded. + pub fn delete(&mut self, key: &mut K) -> Result<()> { + let key_ptr: *mut K = key; + let err = unsafe { bpf_map_delete_elem(self.fd, key_ptr as *mut c_void) }; + if err != 0 { + if err == -2 { + // ENOEXIST : not actually an error, just nothing to do + Ok(()) + } else { + Err(Error::msg("Unable to delete from map")) + } + } else { + Ok(()) + } + } + + /// Delete all entries in the underlying eBPF map. + /// Use this sparingly, it locks the underlying map. Under + /// heavy load, it WILL eventually terminate - but it might + /// take a very long time. Only use this for cleaning up + /// sparsely allocated map data. + pub fn clear(&mut self) -> Result<()> { + loop { + let mut key = K::default(); + let mut prev_key: *mut K = null_mut(); + unsafe { + let key_ptr: *mut K = &mut key; + while bpf_map_get_next_key( + self.fd, + prev_key as *mut c_void, + key_ptr as *mut c_void, + ) == 0 + { + bpf_map_delete_elem(self.fd, key_ptr as *mut c_void); + prev_key = key_ptr; + } + } + + key = K::default(); + prev_key = null_mut(); + unsafe { + let key_ptr: *mut K = &mut key; + if bpf_map_get_next_key( + self.fd, + prev_key as *mut c_void, + key_ptr as *mut c_void, + ) != 0 + { + break; + } + } + } Ok(()) - } else { - Err(Error::msg("Unable to delete from map")) - } - } else { - Ok(()) } - } - /// Delete all entries in the underlying eBPF map. - /// Use this sparingly, it locks the underlying map. Under - /// heavy load, it WILL eventually terminate - but it might - /// take a very long time. Only use this for cleaning up - /// sparsely allocated map data. - pub fn clear(&mut self) -> Result<()> { - loop { - let mut key = K::default(); - let mut prev_key: *mut K = null_mut(); - unsafe { - let key_ptr: *mut K = &mut key; - while bpf_map_get_next_key( - self.fd, - prev_key as *mut c_void, - key_ptr as *mut c_void, - ) == 0 - { - bpf_map_delete_elem(self.fd, key_ptr as *mut c_void); - prev_key = key_ptr; + /// Delete all entries in the underlying eBPF map. + /// Use this sparingly, it locks the underlying map. Under + /// heavy load, it WILL eventually terminate - but it might + /// take a very long time. Only use this for cleaning up + /// sparsely allocated map data. + /// + /// This version skips the "did it really clear?" repeat + /// found in the main version. + pub fn clear_no_repeat(&mut self) -> Result<()> { + let mut key = K::default(); + let mut prev_key: *mut K = null_mut(); + unsafe { + let key_ptr: *mut K = &mut key; + while bpf_map_get_next_key( + self.fd, + prev_key as *mut c_void, + key_ptr as *mut c_void, + ) == 0 + { + bpf_map_delete_elem(self.fd, key_ptr as *mut c_void); + prev_key = key_ptr; + } } - } + Ok(()) + } - key = K::default(); - prev_key = null_mut(); - unsafe { - let key_ptr: *mut K = &mut key; - if bpf_map_get_next_key( - self.fd, - prev_key as *mut c_void, - key_ptr as *mut c_void, - ) != 0 - { - break; + /// Clears an eBPF map using `bpf_map_delete_batch`, which + /// has better locking semantics than per-row. + pub fn clear_bulk(&mut self) -> Result<()> { + let mut keys: Vec = self.dump_vec().iter().map(|(k, _)| { + k.clone() + }).collect(); + let mut count = keys.len() as u32; + loop { + let ret = unsafe { + bpf_map_delete_batch(self.fd, keys.as_mut_ptr() as *mut c_void, &mut count, null_mut()) + }; + if ret != 0 || count == 0 { + break; + } } - } + Ok(()) } - Ok(()) - } - - /// Delete all entries in the underlying eBPF map. - /// Use this sparingly, it locks the underlying map. Under - /// heavy load, it WILL eventually terminate - but it might - /// take a very long time. Only use this for cleaning up - /// sparsely allocated map data. - /// - /// This version skips the "did it really clear?" repeat - /// found in the main version. - pub fn clear_no_repeat(&mut self) -> Result<()> { - let mut key = K::default(); - let mut prev_key: *mut K = null_mut(); - unsafe { - let key_ptr: *mut K = &mut key; - while bpf_map_get_next_key( - self.fd, - prev_key as *mut c_void, - key_ptr as *mut c_void, - ) == 0 - { - bpf_map_delete_elem(self.fd, key_ptr as *mut c_void); - prev_key = key_ptr; - } - } - Ok(()) - } } impl Drop for BpfMap { - fn drop(&mut self) { - let _ = nix::unistd::close(self.fd); - } + fn drop(&mut self) { + let _ = nix::unistd::close(self.fd); + } } diff --git a/src/rust/lqos_sys/src/ip_mapping/mod.rs b/src/rust/lqos_sys/src/ip_mapping/mod.rs index cc0cd706..05107294 100644 --- a/src/rust/lqos_sys/src/ip_mapping/mod.rs +++ b/src/rust/lqos_sys/src/ip_mapping/mod.rs @@ -100,6 +100,6 @@ pub fn list_mapped_ips() -> Result> { /// destinations. pub fn clear_hot_cache() -> Result<()> { let mut bpf_map = BpfMap::::from_path("/sys/fs/bpf/ip_to_cpu_and_tc_hotcache")?; - bpf_map.clear()?; + bpf_map.clear_bulk()?; Ok(()) } \ No newline at end of file