Add a hashmap for looking up ASNs by number. Refactor the ASN-count code, it should be MUCH faster and gentler now.

This commit is contained in:
Herbert Wolverson 2024-07-27 08:20:16 -05:00
parent a73e3af50a
commit b4d1d5deff
2 changed files with 27 additions and 12 deletions

View File

@ -3,6 +3,7 @@
use std::{io::Read, net::IpAddr, path::Path};
use fxhash::FxHashMap;
use serde::Deserialize;
#[derive(Deserialize, Clone, Debug)]
@ -44,6 +45,7 @@ struct Geobin {
pub struct GeoTable {
asn_trie: ip_network_table::IpNetworkTable<AsnEncoded>,
geo_trie: ip_network_table::IpNetworkTable<GeoIpLocation>,
asn_lookup: FxHashMap<u32, String>,
}
impl GeoTable {
@ -78,10 +80,13 @@ impl GeoTable {
flate2::read::GzDecoder::new(file).read_to_end(&mut buffer)?;
let geobin: Geobin = bincode::deserialize(&buffer)?;
// Build the ASN trie
// Build the ASN trie and ASN lookup map
let mut asn_lookup = FxHashMap::default();
log::info!("Building ASN trie");
let mut asn_trie = ip_network_table::IpNetworkTable::<AsnEncoded>::new();
for entry in geobin.asn {
asn_lookup.insert(entry.asn, entry.organization.clone());
let (ip, prefix) = match entry.network {
IpAddr::V4(ip) => (ip.to_ipv6_mapped(), entry.prefix+96 ),
IpAddr::V6(ip) => (ip, entry.prefix),
@ -109,6 +114,7 @@ impl GeoTable {
Ok(Self {
asn_trie,
geo_trie,
asn_lookup,
})
}
@ -170,12 +176,7 @@ impl GeoTable {
}
pub fn find_name_by_id(&self, id: u32) -> String {
for (_, entry) in self.asn_trie.iter() {
if entry.asn == id {
return entry.organization.clone();
}
}
"Unknown".to_string()
self.asn_lookup.get(&id).cloned().unwrap_or_else(|| "Unknown".to_string())
}
}

View File

@ -281,14 +281,28 @@ impl TimeBuffer {
/// Builds a list of all ASNs with recent data, and how many flows they have.
pub fn asn_list(&self) -> Vec<AsnListEntry> {
let buffer = self.buffer.lock().unwrap();
buffer
.iter()
// 1: Clone: large operation, don't keep the buffer locked longer than we have to
let buffer = {
let buffer = self.buffer.lock().unwrap();
buffer.clone()
};
// Filter out short flows and reduce to the ASN ID# only
let mut buffer: Vec<_> = buffer
.into_iter()
.filter(|flow| {
// Total flow time > 2 seconds
flow.data.1.last_seen - flow.data.1.start_time > 2_000_000_000
// Total flow time > 3 seconds
flow.data.1.last_seen - flow.data.1.start_time > 3_000_000_000
})
.map(|flow| flow.data.2.asn_id.0)
.collect();
// Sort the buffer
buffer.sort_unstable();
// Deduplicate and count, decorate with name
buffer
.into_iter()
.sorted()
.dedup_with_count()
.map(|(count, asn)| AsnListEntry {