From f7c9e82173839069bc72c240076c1f1a769cde1e Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Mon, 5 Feb 2024 11:28:31 -0600 Subject: [PATCH 001/103] Just starting - display some packet info --- src/rust/lqos_sys/src/bpf/common/dissector.h | 6 +- src/rust/lqos_sys/src/bpf/common/lpm.h | 65 +++++++------------- src/rust/lqos_sys/src/bpf/lqos_kern.c | 53 ++++++++++++++-- 3 files changed, 73 insertions(+), 51 deletions(-) diff --git a/src/rust/lqos_sys/src/bpf/common/dissector.h b/src/rust/lqos_sys/src/bpf/common/dissector.h index cd43eb0d..39b73eda 100644 --- a/src/rust/lqos_sys/src/bpf/common/dissector.h +++ b/src/rust/lqos_sys/src/bpf/common/dissector.h @@ -278,11 +278,11 @@ static __always_inline bool dissector_find_l3_offset( static __always_inline struct tcphdr *get_tcp_header(struct dissector_t *dissector) { - if (dissector->eth_type == ETH_P_IP) + if (dissector->eth_type == ETH_P_IP && dissector->ip_header.iph->protocol == IPPROTO_TCP) { return (struct tcphdr *)((char *)dissector->ip_header.iph + (dissector->ip_header.iph->ihl * 4)); } - else if (dissector->eth_type == ETH_P_IPV6) + else if (dissector->eth_type == ETH_P_IPV6 && dissector->ip_header.ip6h->nexthdr == IPPROTO_TCP) { return (struct tcphdr *)(dissector->ip_header.ip6h + 1); } @@ -424,4 +424,4 @@ static __always_inline bool dissector_find_ip_header( default: return false; } -} \ No newline at end of file +} diff --git a/src/rust/lqos_sys/src/bpf/common/lpm.h b/src/rust/lqos_sys/src/bpf/common/lpm.h index 3975bb19..f539fac9 100644 --- a/src/rust/lqos_sys/src/bpf/common/lpm.h +++ b/src/rust/lqos_sys/src/bpf/common/lpm.h @@ -47,60 +47,39 @@ struct { __uint(map_flags, BPF_F_NO_PREALLOC); } map_ip_to_cpu_and_tc_recip SEC(".maps"); +// Determine the effective direction of a packet +static __always_inline int determine_effective_direction(int direction, __be16 internet_vlan, struct dissector_t * dissector) { + if (direction < 3) { + return direction; + } else { + if (dissector->current_vlan == internet_vlan) { + return 1; + } else { + return 2; + } + } +} + // Performs an LPM lookup for an `ip_hash.h` encoded address, taking // into account redirection and "on a stick" setup. static __always_inline struct ip_hash_info * setup_lookup_key_and_tc_cpu( - // The "direction" constant from the main program. 1 = Internet, - // 2 = LAN, 3 = Figure it out from VLAN tags + // This must have been pre-calculated by `determine_effective_direction`. int direction, // Pointer to the "lookup key", which should contain the IP address // to search for. Prefix length will be set for you. struct ip_hash_key * lookup_key, // Pointer to the traffic dissector. - struct dissector_t * dissector, - // Which VLAN represents the Internet, in redirection scenarios? (i.e. - // when direction == 3) - __be16 internet_vlan, - // Out variable setting the real "direction" of traffic when it has to - // be calculated. - int * out_effective_direction + struct dissector_t * dissector ) { lookup_key->prefixlen = 128; - // Normal preset 2-interface setup, no need to calculate any direction - // related VLANs. - if (direction < 3) { - lookup_key->address = (direction == 1) ? dissector->dst_ip : - dissector->src_ip; - *out_effective_direction = direction; - struct ip_hash_info * ip_info = bpf_map_lookup_elem( - &map_ip_to_cpu_and_tc, - lookup_key - ); - return ip_info; - } else { - if (dissector->current_vlan == internet_vlan) { - // Packet is coming IN from the Internet. - // Therefore it is download. - lookup_key->address = dissector->dst_ip; - *out_effective_direction = 1; - struct ip_hash_info * ip_info = bpf_map_lookup_elem( - &map_ip_to_cpu_and_tc, - lookup_key - ); - return ip_info; - } else { - // Packet is coming IN from the ISP. - // Therefore it is UPLOAD. - lookup_key->address = dissector->src_ip; - *out_effective_direction = 2; - struct ip_hash_info * ip_info = bpf_map_lookup_elem( - &map_ip_to_cpu_and_tc_recip, - lookup_key - ); - return ip_info; - } - } + lookup_key->address = (direction == 1) ? dissector->dst_ip : + dissector->src_ip; + struct ip_hash_info * ip_info = bpf_map_lookup_elem( + &map_ip_to_cpu_and_tc, + lookup_key + ); + return ip_info; } // For the TC side, the dissector is different. Operates similarly to diff --git a/src/rust/lqos_sys/src/bpf/lqos_kern.c b/src/rust/lqos_sys/src/bpf/lqos_kern.c index 06a323c2..5b545105 100644 --- a/src/rust/lqos_sys/src/bpf/lqos_kern.c +++ b/src/rust/lqos_sys/src/bpf/lqos_kern.c @@ -98,19 +98,62 @@ int xdp_prog(struct xdp_md *ctx) // is requested. if (!dissector_find_l3_offset(&dissector, vlan_redirect)) return XDP_PASS; if (!dissector_find_ip_header(&dissector)) return XDP_PASS; + int effective_direction = determine_effective_direction( + direction, + internet_vlan, + &dissector + ); + + // Per-Flow RTT Tracking + //bpf_debug("Checking for TCP"); + struct tcphdr * tcp = get_tcp_header(&dissector); + if (tcp != NULL) { + if (tcp + 1 < dissector.end) + { + //bpf_debug("TCP found"); + if (tcp->syn) { + // We've found a SYN packet, so the connection is just starting. + if (effective_direction == 1) { + bpf_debug("SYN->WAN, %d <-> %d", tcp->seq, tcp->ack_seq); + } else { + bpf_debug("SYN->LAN, %d <-> %d", tcp->seq, tcp->ack_seq); + } + } else if (tcp->fin) { + // We've found a FIN packet, so the connection is expecting to end. + bpf_debug("FIN packet, %d", effective_direction); + } else if (tcp->rst) { + // We've found a RST packet, so the connection is being reset. + bpf_debug("RST packet, %d", effective_direction); + } else if (tcp->ack) { + // We've found an ACK packet, so the connection is established. + + void *nh_pos = (tcp + 1) + (tcp->doff << 2); + bool is_valid = nh_pos - dissector.start < ctx->data_end - ctx->data; + + if (is_valid) { + //bpf_debug("ACK packet"); + if (effective_direction == 1) { + // To the internet + bpf_debug("ACK->WAN, %d <-> %d", tcp->seq, tcp->ack_seq); + } else { + // To the LAN + bpf_debug("ACK->LAN, %d <-> %d", tcp->seq, tcp->ack_seq); + } + } + } + } + } + #ifdef VERBOSE bpf_debug("(XDP) Spotted VLAN: %u", dissector.current_vlan); #endif // Determine the lookup key by direction struct ip_hash_key lookup_key; - int effective_direction = 0; struct ip_hash_info * ip_info = setup_lookup_key_and_tc_cpu( - direction, + effective_direction, &lookup_key, - &dissector, - internet_vlan, - &effective_direction + &dissector ); #ifdef VERBOSE bpf_debug("(XDP) Effective direction: %d", effective_direction); From d412851560bdd8284780cc1da570a0ad559960a6 Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Thu, 8 Feb 2024 08:32:58 -0600 Subject: [PATCH 002/103] WIP --- src/rust/lqos_sys/src/bpf/common/lpm.h | 4 +- src/rust/lqos_sys/src/bpf/lqos_kern.c | 52 ++++---------------------- src/rust/remove_pinned_maps.sh | 1 + 3 files changed, 11 insertions(+), 46 deletions(-) diff --git a/src/rust/lqos_sys/src/bpf/common/lpm.h b/src/rust/lqos_sys/src/bpf/common/lpm.h index f539fac9..1a8296ed 100644 --- a/src/rust/lqos_sys/src/bpf/common/lpm.h +++ b/src/rust/lqos_sys/src/bpf/common/lpm.h @@ -48,7 +48,7 @@ struct { } map_ip_to_cpu_and_tc_recip SEC(".maps"); // Determine the effective direction of a packet -static __always_inline int determine_effective_direction(int direction, __be16 internet_vlan, struct dissector_t * dissector) { +static __always_inline u_int8_t determine_effective_direction(int direction, __be16 internet_vlan, struct dissector_t * dissector) { if (direction < 3) { return direction; } else { @@ -64,7 +64,7 @@ static __always_inline int determine_effective_direction(int direction, __be16 i // into account redirection and "on a stick" setup. static __always_inline struct ip_hash_info * setup_lookup_key_and_tc_cpu( // This must have been pre-calculated by `determine_effective_direction`. - int direction, + u_int8_t direction, // Pointer to the "lookup key", which should contain the IP address // to search for. Prefix length will be set for you. struct ip_hash_key * lookup_key, diff --git a/src/rust/lqos_sys/src/bpf/lqos_kern.c b/src/rust/lqos_sys/src/bpf/lqos_kern.c index 5b545105..07bbd6c3 100644 --- a/src/rust/lqos_sys/src/bpf/lqos_kern.c +++ b/src/rust/lqos_sys/src/bpf/lqos_kern.c @@ -18,6 +18,7 @@ #include "common/tcp_rtt.h" #include "common/bifrost.h" #include "common/heimdall.h" +#include "common/flows.h" //#define VERBOSE 1 @@ -98,56 +99,22 @@ int xdp_prog(struct xdp_md *ctx) // is requested. if (!dissector_find_l3_offset(&dissector, vlan_redirect)) return XDP_PASS; if (!dissector_find_ip_header(&dissector)) return XDP_PASS; - int effective_direction = determine_effective_direction( + u_int8_t effective_direction = determine_effective_direction( direction, internet_vlan, &dissector ); - - // Per-Flow RTT Tracking - //bpf_debug("Checking for TCP"); - struct tcphdr * tcp = get_tcp_header(&dissector); - if (tcp != NULL) { - if (tcp + 1 < dissector.end) - { - //bpf_debug("TCP found"); - if (tcp->syn) { - // We've found a SYN packet, so the connection is just starting. - if (effective_direction == 1) { - bpf_debug("SYN->WAN, %d <-> %d", tcp->seq, tcp->ack_seq); - } else { - bpf_debug("SYN->LAN, %d <-> %d", tcp->seq, tcp->ack_seq); - } - } else if (tcp->fin) { - // We've found a FIN packet, so the connection is expecting to end. - bpf_debug("FIN packet, %d", effective_direction); - } else if (tcp->rst) { - // We've found a RST packet, so the connection is being reset. - bpf_debug("RST packet, %d", effective_direction); - } else if (tcp->ack) { - // We've found an ACK packet, so the connection is established. - - void *nh_pos = (tcp + 1) + (tcp->doff << 2); - bool is_valid = nh_pos - dissector.start < ctx->data_end - ctx->data; - - if (is_valid) { - //bpf_debug("ACK packet"); - if (effective_direction == 1) { - // To the internet - bpf_debug("ACK->WAN, %d <-> %d", tcp->seq, tcp->ack_seq); - } else { - // To the LAN - bpf_debug("ACK->LAN, %d <-> %d", tcp->seq, tcp->ack_seq); - } - } - } - } - } +#ifdef VERBOSE + bpf_debug("(XDP) Effective direction: %d", effective_direction); +#endif #ifdef VERBOSE bpf_debug("(XDP) Spotted VLAN: %u", dissector.current_vlan); #endif + // Per-Flow RTT Tracking + track_flows(&dissector, effective_direction); + // Determine the lookup key by direction struct ip_hash_key lookup_key; struct ip_hash_info * ip_info = setup_lookup_key_and_tc_cpu( @@ -155,9 +122,6 @@ int xdp_prog(struct xdp_md *ctx) &lookup_key, &dissector ); -#ifdef VERBOSE - bpf_debug("(XDP) Effective direction: %d", effective_direction); -#endif // Find the desired TC handle and CPU target __u32 tc_handle = 0; diff --git a/src/rust/remove_pinned_maps.sh b/src/rust/remove_pinned_maps.sh index 44e2ecf7..38b7a3e8 100755 --- a/src/rust/remove_pinned_maps.sh +++ b/src/rust/remove_pinned_maps.sh @@ -13,3 +13,4 @@ rm -v /sys/fs/bpf/bifrost_vlan_map rm -v /sys/fs/bpf/heimdall rm -v /sys/fs/bpf/heimdall_config rm -v /sys/fs/bpf/heimdall_watching +rm -v /sys/fs/bpf/flowbee \ No newline at end of file From 295caaad91567b615bbd6b9e471be2d201dc3389 Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Wed, 14 Feb 2024 13:51:55 -0600 Subject: [PATCH 003/103] Very early days - kinda/sorta working per-flow work. --- src/rust/lqos_sys/src/bpf/common/flows.h | 225 +++++++++++++++++++++++ 1 file changed, 225 insertions(+) create mode 100644 src/rust/lqos_sys/src/bpf/common/flows.h diff --git a/src/rust/lqos_sys/src/bpf/common/flows.h b/src/rust/lqos_sys/src/bpf/common/flows.h new file mode 100644 index 00000000..5a5e9f43 --- /dev/null +++ b/src/rust/lqos_sys/src/bpf/common/flows.h @@ -0,0 +1,225 @@ +// TCP flow monitor system + +#include +#include +#include "dissector.h" +#include "debug.h" + +// Defines a TCP connection flow key +struct tcp_flow_key_t { + struct in6_addr src; + struct in6_addr dst; + __u16 src_port; + __u16 dst_port; +}; + +// TCP connection flow entry +struct tcp_flow_data_t { + __u64 last_seen_a; + __u64 last_seen_b; + __u64 bytes_sent; + __u64 bytes_received; + __u32 time_a; + __u32 time_b; + __u64 last_rtt; + __u64 packets_sent; + __u64 packets_received; +}; + +// Map for tracking TCP flow progress. +// This is pinned and not per-CPU, because half the data appears on either side of the bridge. +struct +{ + __uint(type, BPF_MAP_TYPE_LRU_HASH); + __type(key, struct tcp_flow_key_t); + __type(value, struct tcp_flow_data_t); + __uint(max_entries, MAX_FLOWS); + __uint(pinning, LIBBPF_PIN_BY_NAME); +} flowbee SEC(".maps"); + +static __always_inline struct tcp_flow_key_t build_flow_key( + struct dissector_t *dissector, // The packet dissector from the previous step + u_int8_t direction // The direction of the packet (1 = to internet, 2 = to local network) +) { + if (direction == 1) { + return (struct tcp_flow_key_t) { + .src = dissector->src_ip, + .dst = dissector->dst_ip, + .src_port = dissector->src_port, + .dst_port = dissector->dst_port + }; + } else { + return (struct tcp_flow_key_t) { + .src = dissector->dst_ip, + .dst = dissector->src_ip, + .src_port = dissector->dst_port, + .dst_port = dissector->src_port + }; + } +} + +static __always_inline void debug_ip( + struct in6_addr *ip +) { + bpf_debug("%d.%d.%d", ip->s6_addr[13], ip->s6_addr[14], ip->s6_addr[15]); +} + +static __always_inline bool get_timestamps( + u_int32_t * out_tsval, + u_int32_t * out_tsecr, + struct tcphdr * tcp, + struct dissector_t * dissector, + void * end_opts +) { + u_int8_t *pos = (u_int8_t *)(tcp + 1); // Current pos in TCP options + u_int8_t len; + + // This 8 should be 10, but we're running out of space + for (u_int8_t i = 0; i<8; i++) { + if (pos + 2 > dissector->end) { + return false; + } + switch (*pos) { + case 0: return false; // End of options + case 1: pos++; break; // NOP + case 8: { + if (pos + 10 > dissector->end) { + return false; + } + *out_tsval = bpf_ntohl(*(__u32 *)(pos + 2)); + *out_tsecr = bpf_ntohl(*(__u32 *)(pos + 6)); + return true; + } + default: { + len = *(pos + 1); + pos += len; + } + } + } + + return false; +} + +static __always_inline void track_flows( + struct dissector_t *dissector, // The packet dissector from the previous step + u_int8_t direction // The direction of the packet (1 = to internet, 2 = to local network) +) { + struct tcphdr * tcp = get_tcp_header(dissector); + if (tcp == NULL) { + // Bail out if it's not a TCP packet + return; + } + + // Bail out if we've exceeded the packet size and there is no payload + // This keeps the safety checker happy and is generally a good idea + if (tcp + 1 >= dissector->end) { + return; + } + + // Determine the key for the flow. Since we know direction, there's + // no need to consider "reverse keys" and their ilk. + struct tcp_flow_key_t key = build_flow_key(dissector, direction); + + // Only care about connections that originate locally + __u64 now = bpf_ktime_get_ns(); + if (tcp->syn && direction == 1) { + // SYN packet sent to the Internet. We are establishing a new connection. + // We need to add this flow to the tracking table. + bpf_debug("New TCP connection detected"); + struct tcp_flow_data_t data = { + .last_seen_a = now, + .last_seen_b = now, + .bytes_sent = dissector->skb_len, + .bytes_received = 0, + .time_a = 0, + .time_b = 0, + .last_rtt = 0, + .packets_sent = 1, + .packets_received = 0 + }; + bpf_map_update_elem(&flowbee, &key, &data, BPF_ANY); + } + + // Update the flow's last seen time + struct tcp_flow_data_t *data = bpf_map_lookup_elem(&flowbee, &key); + if (data == NULL) { + return; + } + __u64 last_seen = data->last_seen_a; + if (direction == 1) { + data->last_seen_a = now; + data->bytes_sent += dissector->skb_len; + data->packets_sent++; + } else { + data->last_seen_b = now; + data->bytes_received += dissector->skb_len; + data->packets_received++; + } + //bpf_debug("Dir: %d, Sent/Received: [%d]/[%d]", direction, data->bytes_sent, data->bytes_received); + + // Parse the TCP options + __u32 tsval = 0; + __u32 tsecr = 0; + void *end_opts = (tcp + 1) + (tcp->doff << 2); + bool has_data = end_opts - dissector->start < dissector->skb_len; + get_timestamps(&tsval, &tsecr, tcp, dissector, end_opts); + + if ( tcp->ack && has_data) { + //bpf_debug("Direction %d", direction); + //bpf_debug("to 192.168.66.%d => SEQ %d <-> %d", dissector->dst_ip.in6_u.u6_addr8[15], bpf_ntohs(tcp->seq), bpf_ntohs(tcp->ack_seq)); + __u32 sequence = bpf_ntohl(tcp->seq); + + if (direction == 1) { + // Going TO the Internet. We're acknowledging a packet. + // We don't need to record an RTT measurement and check for issues. + //bpf_debug("%d / %d", data->time_a, data->time_b); + + if (data->time_a != 0 && sequence < data->time_a) { + // This is a retransmission + bpf_debug("DIR 1 Retransmission (or out of order) detected"); + bpf_debug("to 192.168.66.%d => SEQ %d < %d", dissector->dst_ip.in6_u.u6_addr8[15], sequence, data->time_a); + } + + data->time_a = sequence; + } else { + // Coming FROM the Internet. They are acknowledging a packet. + // We need to record an RTT measurement, but we can check for issues. + //bpf_debug("%d / %d", data->time_a, data->time_b); + + __u64 rtt = now - last_seen; + //bpf_debug("RTT: %d nanos", rtt); + data->last_rtt = rtt; + + if (data->time_b != 0 && sequence < data->time_b) { + // This is a retransmission + bpf_debug("DIR 2 Retransmission (or out of order) detected"); + bpf_debug("to 192.168.66.%d => SEQ %d > %d", dissector->dst_ip.in6_u.u6_addr8[15], sequence, data->time_b); + } + + data->time_b = sequence; + } + + + //bpf_debug("to 192.168.66.%d => TS %d <-> %d", dissector->dst_ip.in6_u.u6_addr8[15], bpf_ntohs(tsval), bpf_ntohs(tsecr)); + } else if ( tcp->fin) { + // FIN packet. We are closing a connection. + // We need to remove this flow from the tracking table. + bpf_debug("TCP connection closed"); + // TODO: Submit the result somewhere + bpf_debug(" BYTES : %d / %d", data->bytes_sent, data->bytes_received); + bpf_debug(" PACKETS : %d / %d", data->packets_sent, data->packets_received); + bpf_debug(" RTT : %d", data->last_rtt); + // /TODO + bpf_map_delete_elem(&flowbee, &key); + } else if ( tcp->rst ) { + // RST packet. We are resetting a connection. + // We need to remove this flow from the tracking table. + bpf_debug("TCP connection reset"); + // TODO: Submit the result somewhere + bpf_debug(" BYTES : %d / %d", data->bytes_sent, data->bytes_received); + bpf_debug(" PACKETS : %d / %d", data->packets_sent, data->packets_received); + bpf_debug(" RTT : %d", data->last_rtt); + // /TODO + bpf_map_delete_elem(&flowbee, &key); + } +} From 1ca595b16cb7418e63a6b48997bf13f0b735126e Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Wed, 14 Feb 2024 14:40:58 -0600 Subject: [PATCH 004/103] Hacking away - remove the fin/reset flags because they shouldn't be needed. --- src/rust/lqos_sys/src/bpf/common/flows.h | 26 +++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/src/rust/lqos_sys/src/bpf/common/flows.h b/src/rust/lqos_sys/src/bpf/common/flows.h index 5a5e9f43..b446e1e0 100644 --- a/src/rust/lqos_sys/src/bpf/common/flows.h +++ b/src/rust/lqos_sys/src/bpf/common/flows.h @@ -15,6 +15,7 @@ struct tcp_flow_key_t { // TCP connection flow entry struct tcp_flow_data_t { + __u64 start_time; __u64 last_seen_a; __u64 last_seen_b; __u64 bytes_sent; @@ -24,6 +25,8 @@ struct tcp_flow_data_t { __u64 last_rtt; __u64 packets_sent; __u64 packets_received; + __u64 retries_a; + __u64 retries_b; }; // Map for tracking TCP flow progress. @@ -127,6 +130,7 @@ static __always_inline void track_flows( // We need to add this flow to the tracking table. bpf_debug("New TCP connection detected"); struct tcp_flow_data_t data = { + .start_time = now, .last_seen_a = now, .last_seen_b = now, .bytes_sent = dissector->skb_len, @@ -135,7 +139,9 @@ static __always_inline void track_flows( .time_b = 0, .last_rtt = 0, .packets_sent = 1, - .packets_received = 0 + .packets_received = 0, + .retries_a = 0, + .retries_b = 0 }; bpf_map_update_elem(&flowbee, &key, &data, BPF_ANY); } @@ -176,8 +182,9 @@ static __always_inline void track_flows( if (data->time_a != 0 && sequence < data->time_a) { // This is a retransmission - bpf_debug("DIR 1 Retransmission (or out of order) detected"); - bpf_debug("to 192.168.66.%d => SEQ %d < %d", dissector->dst_ip.in6_u.u6_addr8[15], sequence, data->time_a); + //bpf_debug("DIR 1 Retransmission (or out of order) detected"); + //bpf_debug("to 192.168.66.%d => SEQ %d < %d", dissector->dst_ip.in6_u.u6_addr8[15], sequence, data->time_a); + data->retries_a++; } data->time_a = sequence; @@ -192,8 +199,9 @@ static __always_inline void track_flows( if (data->time_b != 0 && sequence < data->time_b) { // This is a retransmission - bpf_debug("DIR 2 Retransmission (or out of order) detected"); - bpf_debug("to 192.168.66.%d => SEQ %d > %d", dissector->dst_ip.in6_u.u6_addr8[15], sequence, data->time_b); + //bpf_debug("DIR 2 Retransmission (or out of order) detected"); + //bpf_debug("to 192.168.66.%d => SEQ %d > %d", dissector->dst_ip.in6_u.u6_addr8[15], sequence, data->time_b); + data->retries_b++; } data->time_b = sequence; @@ -206,9 +214,11 @@ static __always_inline void track_flows( // We need to remove this flow from the tracking table. bpf_debug("TCP connection closed"); // TODO: Submit the result somewhere + bpf_debug(" Flow Lifetime: %u nanos", now - data->start_time); bpf_debug(" BYTES : %d / %d", data->bytes_sent, data->bytes_received); bpf_debug(" PACKETS : %d / %d", data->packets_sent, data->packets_received); - bpf_debug(" RTT : %d", data->last_rtt); + bpf_debug(" RTT : %d nanos", data->last_rtt); + bpf_debug(" RETRIES : %d / %d", data->retries_a, data->retries_b); // /TODO bpf_map_delete_elem(&flowbee, &key); } else if ( tcp->rst ) { @@ -216,9 +226,11 @@ static __always_inline void track_flows( // We need to remove this flow from the tracking table. bpf_debug("TCP connection reset"); // TODO: Submit the result somewhere + bpf_debug(" Flow Lifetime: %u nanos", now - data->start_time); bpf_debug(" BYTES : %d / %d", data->bytes_sent, data->bytes_received); bpf_debug(" PACKETS : %d / %d", data->packets_sent, data->packets_received); - bpf_debug(" RTT : %d", data->last_rtt); + bpf_debug(" RTT : %d nanos", data->last_rtt); + bpf_debug(" RETRIES : %d / %d", data->retries_a, data->retries_b); // /TODO bpf_map_delete_elem(&flowbee, &key); } From 23487f3328e99b1e5a2058d2c8a365f7e09b14bf Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Wed, 14 Feb 2024 14:56:46 -0600 Subject: [PATCH 005/103] The flows now include their very own rate estimator, and only count RTT when rate exceeds a threshold - 5mbps right now, which is probably not a good choice. --- src/rust/lqos_sys/src/bpf/common/flows.h | 50 ++++++++++++++++++++++-- 1 file changed, 46 insertions(+), 4 deletions(-) diff --git a/src/rust/lqos_sys/src/bpf/common/flows.h b/src/rust/lqos_sys/src/bpf/common/flows.h index b446e1e0..b67a7abb 100644 --- a/src/rust/lqos_sys/src/bpf/common/flows.h +++ b/src/rust/lqos_sys/src/bpf/common/flows.h @@ -5,6 +5,8 @@ #include "dissector.h" #include "debug.h" +#define SECOND_IN_NANOS 1000000000 + // Defines a TCP connection flow key struct tcp_flow_key_t { struct in6_addr src; @@ -27,6 +29,11 @@ struct tcp_flow_data_t { __u64 packets_received; __u64 retries_a; __u64 retries_b; + + __u64 last_count_time; + __u64 next_count_time; + __u64 next_count_bytes; + __u64 rate_estimate; }; // Map for tracking TCP flow progress. @@ -141,7 +148,11 @@ static __always_inline void track_flows( .packets_sent = 1, .packets_received = 0, .retries_a = 0, - .retries_b = 0 + .retries_b = 0, + .next_count_time = now + SECOND_IN_NANOS, + .next_count_bytes = dissector->skb_len, + .rate_estimate = 0, + .last_count_time = now }; bpf_map_update_elem(&flowbee, &key, &data, BPF_ANY); } @@ -180,6 +191,23 @@ static __always_inline void track_flows( // We don't need to record an RTT measurement and check for issues. //bpf_debug("%d / %d", data->time_a, data->time_b); + if (now > data->next_count_time) { + // Calculate the rate estimate + __u64 bytes = data->bytes_sent + data->bytes_received - data->next_count_bytes; + __u64 time = now - data->last_count_time; + data->rate_estimate = ((bytes * SECOND_IN_NANOS / time)*8)/1000000; + data->next_count_time = now + SECOND_IN_NANOS; + data->next_count_bytes = data->bytes_sent + data->bytes_received; + data->last_count_time = now; + bpf_debug("Rate estimate: %u mbits/sec", data->rate_estimate); + + if (data->rate_estimate > 5) { + __u64 rtt = now - last_seen; + bpf_debug("RTT: %d nanos", rtt); + data->last_rtt = rtt; + } + } + if (data->time_a != 0 && sequence < data->time_a) { // This is a retransmission //bpf_debug("DIR 1 Retransmission (or out of order) detected"); @@ -193,9 +221,23 @@ static __always_inline void track_flows( // We need to record an RTT measurement, but we can check for issues. //bpf_debug("%d / %d", data->time_a, data->time_b); - __u64 rtt = now - last_seen; - //bpf_debug("RTT: %d nanos", rtt); - data->last_rtt = rtt; + if (now > data->next_count_time) { + // Calculate the rate estimate + __u64 bytes = data->bytes_sent + data->bytes_received - data->next_count_bytes; + __u64 time = now - data->last_count_time; + data->rate_estimate = ((bytes * SECOND_IN_NANOS / time)*8)/1000000; + data->next_count_time = now + SECOND_IN_NANOS; + data->next_count_bytes = data->bytes_sent + data->bytes_received; + data->last_count_time = now; + bpf_debug("Rate estimate: %u mbits/sec", data->rate_estimate); + + if (data->rate_estimate > 5) { + __u64 rtt = now - last_seen; + bpf_debug("RTT: %d nanos", rtt); + data->last_rtt = rtt; + } + } + if (data->time_b != 0 && sequence < data->time_b) { // This is a retransmission From 0fd6b29e6c42c657f4b4d6b41c55d3d27a108610 Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Mon, 26 Feb 2024 20:36:37 -0600 Subject: [PATCH 006/103] Considerably cleaned up flow tracking system. --- src/rust/lqos_sys/build.rs | 2 +- src/rust/lqos_sys/src/bpf/common/flows.h | 351 ++++++++++++++++++--- src/rust/lqos_sys/src/bpf/common/tcp_rtt.h | 81 ++++- src/rust/lqos_sys/src/bpf/lqos_kern.c | 2 +- 4 files changed, 395 insertions(+), 41 deletions(-) diff --git a/src/rust/lqos_sys/build.rs b/src/rust/lqos_sys/build.rs index fb8213e5..bc862464 100644 --- a/src/rust/lqos_sys/build.rs +++ b/src/rust/lqos_sys/build.rs @@ -123,7 +123,7 @@ fn main() { .header(&wrapper_target) // Tell cargo to invalidate the built crate whenever any of the // included header files changed. - .parse_callbacks(Box::new(bindgen::CargoCallbacks)) + .parse_callbacks(Box::new(bindgen::CargoCallbacks::new())) // Finish the builder and generate the bindings. .generate() // Unwrap the Result and panic on failure. diff --git a/src/rust/lqos_sys/src/bpf/common/flows.h b/src/rust/lqos_sys/src/bpf/common/flows.h index b67a7abb..8be2f1dc 100644 --- a/src/rust/lqos_sys/src/bpf/common/flows.h +++ b/src/rust/lqos_sys/src/bpf/common/flows.h @@ -6,6 +6,14 @@ #include "debug.h" #define SECOND_IN_NANOS 1000000000 +#define TIMESTAMP_INTERVAL_NANOS 2000000000 + +// Some helpers to make understanding direction easier +// for readability. +#define TO_INTERNET 2 +#define FROM_INTERNET 1 +#define TO_LOCAL 1 +#define FROM_LOCAL 2 // Defines a TCP connection flow key struct tcp_flow_key_t { @@ -17,23 +25,35 @@ struct tcp_flow_key_t { // TCP connection flow entry struct tcp_flow_data_t { + // Time (nanos) when the connection was established __u64 start_time; - __u64 last_seen_a; - __u64 last_seen_b; - __u64 bytes_sent; - __u64 bytes_received; - __u32 time_a; - __u32 time_b; - __u64 last_rtt; - __u64 packets_sent; - __u64 packets_received; - __u64 retries_a; - __u64 retries_b; - - __u64 last_count_time; - __u64 next_count_time; - __u64 next_count_bytes; - __u64 rate_estimate; + // Time (nanos) when the connection was last seen + __u64 last_seen; + // Bytes transmitted + __u64 bytes_sent[2]; + // Packets transmitted + __u64 packets_sent[2]; + // Clock for the next rate estimate + __u64 next_count_time[2]; + // Clock for the previous rate estimate + __u64 last_count_time[2]; + // Bytes at the next rate estimate + __u64 next_count_bytes[2]; + // Rate estimate + __u64 rate_estimate[2]; + // Sequence number of the last packet + __u32 last_sequence[2]; + // Acknowledgement number of the last packet + __u32 last_ack[2]; + // Retry Counters + __u32 retries[2]; + // Timestamp values + __u32 tsval[2]; + __u32 tsecr[2]; + __u64 ts_change_time[2]; + __u64 ts_calc_time[2]; + // Most recent RTT + __u64 last_rtt[2]; }; // Map for tracking TCP flow progress. @@ -47,23 +67,24 @@ struct __uint(pinning, LIBBPF_PIN_BY_NAME); } flowbee SEC(".maps"); -static __always_inline struct tcp_flow_key_t build_flow_key( +static __always_inline struct tcp_flow_key_t build_tcp_flow_key( struct dissector_t *dissector, // The packet dissector from the previous step + struct tcphdr *tcp, // The TCP header u_int8_t direction // The direction of the packet (1 = to internet, 2 = to local network) ) { - if (direction == 1) { + if (direction == FROM_INTERNET) { return (struct tcp_flow_key_t) { .src = dissector->src_ip, .dst = dissector->dst_ip, - .src_port = dissector->src_port, - .dst_port = dissector->dst_port + .src_port = tcp->source, + .dst_port = tcp->dest, }; } else { return (struct tcp_flow_key_t) { .src = dissector->dst_ip, .dst = dissector->src_ip, - .src_port = dissector->dst_port, - .dst_port = dissector->src_port + .src_port = tcp->dest, + .dst_port = tcp->source, }; } } @@ -84,8 +105,8 @@ static __always_inline bool get_timestamps( u_int8_t *pos = (u_int8_t *)(tcp + 1); // Current pos in TCP options u_int8_t len; - // This 8 should be 10, but we're running out of space - for (u_int8_t i = 0; i<8; i++) { + // This should be 10, but we're running out of space + for (u_int8_t i = 0; i<6; i++) { if (pos + 2 > dissector->end) { return false; } @@ -110,9 +131,253 @@ static __always_inline bool get_timestamps( return false; } +// Handle Per-Flow ICMP Analysis +static __always_inline void process_icmp( + struct dissector_t *dissector, + u_int8_t direction, + struct icmphdr *icmp +) { + +} + +// Handle Per-Flow UDP Analysis +static __always_inline void process_udp( + struct dissector_t *dissector, + u_int8_t direction, + struct udphdr *udp +) { + +} + +// Handle Per-Flow TCP Analysis +static __always_inline void process_tcp( + struct dissector_t *dissector, + u_int8_t direction, + struct tcphdr *tcp, + u_int64_t now +) { + if ((tcp->syn && !tcp->ack && direction == TO_INTERNET) || (tcp->syn && tcp->ack && direction == FROM_INTERNET)) { + // A customer is requesting a new TCP connection. That means + // we need to start tracking this flow. + bpf_debug("[FLOWS] New TCP Connection Detected (%u)", direction); + struct tcp_flow_key_t key = build_tcp_flow_key(dissector, tcp, direction); + struct tcp_flow_data_t data = { + .start_time = now, + .bytes_sent = { 0, 0 }, + .packets_sent = { 0, 0 }, + .next_count_time = { now + SECOND_IN_NANOS, now + SECOND_IN_NANOS }, + .last_count_time = { now, now }, + .next_count_bytes = { dissector->skb_len, dissector->skb_len }, + .rate_estimate = { 0, 0 }, + .last_sequence = { 0, 0 }, + .last_ack = { 0, 0 }, + .retries = { 0, 0 }, + .tsval = { 0, 0 }, + .tsecr = { 0, 0 }, + .ts_change_time = { 0, 0 }, + .ts_calc_time = { now + TIMESTAMP_INTERVAL_NANOS, now + TIMESTAMP_INTERVAL_NANOS }, + .last_rtt = { 0, 0 } + }; + if (bpf_map_update_elem(&flowbee, &key, &data, BPF_ANY) != 0) { + bpf_debug("[FLOWS] Failed to add new flow to map"); + } + return; + } + + // Build the flow key + struct tcp_flow_key_t key = build_tcp_flow_key(dissector, tcp, direction); + struct tcp_flow_data_t *data = bpf_map_lookup_elem(&flowbee, &key); + if (data == NULL) { + // If it isn't a flow we're tracking, bail out now + return; + } + + // Update last seen to now + data->last_seen = now; + + // Update bytes and packets sent + if (direction == TO_INTERNET) { + data->bytes_sent[0] += dissector->skb_len; + data->packets_sent[0]++; + + if (now > data->next_count_time[0]) { + // Calculate the rate estimate + __u64 bits = (data->bytes_sent[0] - data->next_count_bytes[0])*8; + __u64 time = (now - data->last_count_time[0]) / 1000000000; // Seconds + data->rate_estimate[0] = bits/time; + //bpf_debug("[FLOWS][%d] Rate Estimate: %u mbits / second", direction, data->rate_estimate[0] / 1000000); + data->next_count_time[0] = now + SECOND_IN_NANOS; + data->next_count_bytes[0] = data->bytes_sent[0]; + data->last_count_time[0] = now; + } + } else { + data->bytes_sent[1] += dissector->skb_len; + data->packets_sent[1]++; + + if (now > data->next_count_time[1]) { + // Calculate the rate estimate + __u64 bits = (data->bytes_sent[1] - data->next_count_bytes[1])*8; + __u64 time = (now - data->last_count_time[1]) / 1000000000; // Seconds + data->rate_estimate[1] = bits/time; + //bpf_debug("[FLOWS][%d] Rate Estimate: %u mbits / second", direction, data->rate_estimate[1] / 1000000); + data->next_count_time[1] = now + SECOND_IN_NANOS; + data->next_count_bytes[1] = data->bytes_sent[1]; + data->last_count_time[1] = now; + } + } + + // Sequence and Acknowledgement numbers + __u32 sequence = bpf_ntohl(tcp->seq); + __u32 ack_seq = bpf_ntohl(tcp->ack_seq); + if (direction == TO_INTERNET) { + if (data->last_sequence[0] != 0 && sequence < data->last_sequence[0]) { + // This is a retransmission + //bpf_debug("[FLOWS] Retransmission detected (%u)", direction); + data->retries[0]++; + } + + data->last_sequence[0] = sequence; + data->last_ack[0] = ack_seq; + } else { + if (data->last_sequence[1] != 0 && sequence < data->last_sequence[1]) { + // This is a retransmission + //bpf_debug("[FLOWS] Retransmission detected (%u)", direction); + data->retries[1]++; + } + + data->last_sequence[1] = sequence; + data->last_ack[1] = ack_seq; + } + //bpf_debug("[FLOWS][%d] Sequence: %u Ack: %u", direction, sequence, ack_seq); + + // Timestamps to calculate RTT + u_int32_t tsval = 0; + u_int32_t tsecr = 0; + void *end_opts = (tcp + 1) + (tcp->doff << 2); + if (tcp->ack && get_timestamps(&tsval, &tsecr, tcp, dissector, end_opts)) { + //bpf_debug("[FLOWS][%d] TSVal %u TSecr %u", direction, tsval, tsecr); + if (direction == TO_INTERNET) { + if (tsval != data->tsval[0] || tsecr != data->tsecr[0]) { + + if (tsval == data->tsecr[1]) { + //bpf_debug("%d Matched!", direction); + __u64 elapsed = now - data->ts_change_time[1]; + data->last_rtt[0] = elapsed; + //bpf_debug("%d TS Change (RTT): %u nanos", direction, elapsed); + // TODO: Do something with the RTT + } + + //bpf_debug("%d TSVal Changed", direction); + data->ts_change_time[0] = now; + data->tsval[0] = tsval; + data->tsecr[0] = tsecr; + } + } else { + if (tsval != data->tsval[1] || tsecr != data->tsecr[1]) { + + if (tsval == data->tsecr[0]) { + //bpf_debug("%d Matched!", direction); + __u64 elapsed = now - data->ts_change_time[0]; + data->last_rtt[1] = elapsed; + //bpf_debug("%d TS Change (RTT): %u nanos", direction, elapsed); + // TODO: Do something with the RTT + } + + //bpf_debug("%d TSVal Changed", direction); + data->ts_change_time[1] = now; + data->tsval[1] = tsval; + data->tsecr[1] = tsecr; + } + } + + + /*else { + if (tsval == data->tsecr[0]) { + //if (tsval == data->tsecr[0] && now > data->ts_calc_time[1]) { + __u64 elapsed = now - data->ts_change_time[0]; + bpf_debug("[FLOWS][%d] TS Change (RTT): %u nanos", direction, elapsed); + data->ts_calc_time[1] = now + TIMESTAMP_INTERVAL_NANOS; + } + if (tsval != data->tsval[1]) { + data->ts_change_time[1] = now; + } + data->tsval[1] = tsval; + data->tsecr[1] = tsecr; + }*/ + } + + // Has the connection ended? + if (tcp->fin || tcp->rst) { + __u64 lifetime = now - data->start_time; + bpf_debug("[FLOWS] TCP Connection Ended [%d / %d]. Lasted %u nanos.", data->bytes_sent[0], data->bytes_sent[1], lifetime); + bpf_debug("[FLOWS] Rate Estimate (Mbps): %u / %u", data->rate_estimate[0] / 1000000, data->rate_estimate[1] / 1000000); + bpf_debug("[FLOWS] Retries: %u / %u", data->retries[0], data->retries[1]); + bpf_debug("[FLOWS] RTT: %u / %u (nanos)", data->last_rtt[0], data->last_rtt[1]); + bpf_map_delete_elem(&flowbee, &key); + } +} + +// Note that this duplicates a lot of what we do for "snoop" - we're hoping +// to replace both it and the old RTT system. static __always_inline void track_flows( struct dissector_t *dissector, // The packet dissector from the previous step u_int8_t direction // The direction of the packet (1 = to internet, 2 = to local network) +) { + //bpf_debug("[FLOWS] Packet detected"); + __u64 now = bpf_ktime_get_ns(); + switch (dissector->ip_protocol) + { + case IPPROTO_TCP: { + struct tcphdr * tcp = get_tcp_header(dissector); + if (tcp == NULL) { + // Bail out if it's not a TCP packet + return; + } + // Bail out if we've exceeded the packet size and there is no payload + // This keeps the safety checker happy and is generally a good idea + if (tcp + 1 >= dissector->end) { + return; + } + //bpf_debug("[FLOWS] TCP packet detected"); + process_tcp(dissector, direction, tcp, now); + } break; + case IPPROTO_UDP: { + struct udphdr *udp = get_udp_header(dissector); + if (udp == NULL) { + // Bail out if it's not a UDP packet + return; + } + // Bail out if we've exceeded the packet size and there is no payload + // This keeps the safety checker happy and is generally a good idea + if (udp + 1 >= dissector->end) { + return; + } + bpf_debug("[FLOWS] UDP packet detected"); + process_udp(dissector, direction, udp); + } break; + case IPPROTO_ICMP: { + struct icmphdr *icmp = get_icmp_header(dissector); + if (icmp == NULL) { + // Bail out if it's not an ICMP packet + return; + } + // Bail out if we've exceeded the packet size and there is no payload + // This keeps the safety checker happy and is generally a good idea + if (icmp + 1 >= dissector->end) { + return; + } + bpf_debug("[FLOWS] ICMP packet detected"); + process_icmp(dissector, direction, icmp); + } break; + default: { + bpf_debug("[FLOWS] Unsupported protocol: %d", dissector->ip_protocol); + } + } +} + +/*static __always_inline void track_flows( + struct dissector_t *dissector, // The packet dissector from the previous step + u_int8_t direction // The direction of the packet (1 = to internet, 2 = to local network) ) { struct tcphdr * tcp = get_tcp_header(dissector); if (tcp == NULL) { @@ -175,39 +440,49 @@ static __always_inline void track_flows( //bpf_debug("Dir: %d, Sent/Received: [%d]/[%d]", direction, data->bytes_sent, data->bytes_received); // Parse the TCP options - __u32 tsval = 0; - __u32 tsecr = 0; + //__u32 tsval = 0; + //__u32 tsecr = 0; void *end_opts = (tcp + 1) + (tcp->doff << 2); bool has_data = end_opts - dissector->start < dissector->skb_len; - get_timestamps(&tsval, &tsecr, tcp, dissector, end_opts); + //if (get_timestamps(&tsval, &tsecr, tcp, dissector, end_opts)) { + //bpf_debug("[%d] => TSVal %u TSecr %u", direction, tsval, tsecr); + //bpf_debug("[%d] => Seq %u AckSeq %u", direction, tcp->seq, tcp->ack_seq); + //} if ( tcp->ack && has_data) { - //bpf_debug("Direction %d", direction); - //bpf_debug("to 192.168.66.%d => SEQ %d <-> %d", dissector->dst_ip.in6_u.u6_addr8[15], bpf_ntohs(tcp->seq), bpf_ntohs(tcp->ack_seq)); + //bpf_debug("Direction %d", direction); __u32 sequence = bpf_ntohl(tcp->seq); + __u32 ack_seq = bpf_ntohl(tcp->ack_seq); if (direction == 1) { // Going TO the Internet. We're acknowledging a packet. // We don't need to record an RTT measurement and check for issues. - //bpf_debug("%d / %d", data->time_a, data->time_b); + bpf_debug("%u, A: %u / B: %u", sequence, data->time_a, data->time_b); + bpf_debug("%u", ack_seq); if (now > data->next_count_time) { // Calculate the rate estimate __u64 bytes = data->bytes_sent + data->bytes_received - data->next_count_bytes; __u64 time = now - data->last_count_time; - data->rate_estimate = ((bytes * SECOND_IN_NANOS / time)*8)/1000000; + data->rate_estimate = ((bytes * SECOND_IN_NANOS / time)*8)/1048576; data->next_count_time = now + SECOND_IN_NANOS; data->next_count_bytes = data->bytes_sent + data->bytes_received; data->last_count_time = now; - bpf_debug("Rate estimate: %u mbits/sec", data->rate_estimate); + bpf_debug("[1] Rate estimate: %u mbits/sec", data->rate_estimate); - if (data->rate_estimate > 5) { + if (data->rate_estimate > 5 && tcp->ack_seq >= data->time_a) { __u64 rtt = now - last_seen; - bpf_debug("RTT: %d nanos", rtt); + bpf_debug("RTT: %d nanos (%u - %u)", rtt, tcp->ack_seq, data->time_a); data->last_rtt = rtt; } } + if (data->rate_estimate > 5 && ack_seq >= data->time_b) { + __u64 rtt = now - last_seen; + bpf_debug("[1] RTT: %d nanos (%u - %u)", rtt, sequence, data->time_b); + data->last_rtt = rtt; + } + if (data->time_a != 0 && sequence < data->time_a) { // This is a retransmission //bpf_debug("DIR 1 Retransmission (or out of order) detected"); @@ -229,11 +504,11 @@ static __always_inline void track_flows( data->next_count_time = now + SECOND_IN_NANOS; data->next_count_bytes = data->bytes_sent + data->bytes_received; data->last_count_time = now; - bpf_debug("Rate estimate: %u mbits/sec", data->rate_estimate); + bpf_debug("[2] Rate estimate: %u mbits/sec", data->rate_estimate); - if (data->rate_estimate > 5) { + if (data->rate_estimate > 5 && tcp->ack_seq >= data->time_b) { __u64 rtt = now - last_seen; - bpf_debug("RTT: %d nanos", rtt); + bpf_debug("[2] RTT: %d nanos", rtt); data->last_rtt = rtt; } } @@ -276,4 +551,4 @@ static __always_inline void track_flows( // /TODO bpf_map_delete_elem(&flowbee, &key); } -} +}*/ diff --git a/src/rust/lqos_sys/src/bpf/common/tcp_rtt.h b/src/rust/lqos_sys/src/bpf/common/tcp_rtt.h index 7fca9d35..8ec8e463 100644 --- a/src/rust/lqos_sys/src/bpf/common/tcp_rtt.h +++ b/src/rust/lqos_sys/src/bpf/common/tcp_rtt.h @@ -553,6 +553,7 @@ static __always_inline void pping_match_packet(struct flow_state *f_state, return; __u64 rtt = (p_info->time - *p_ts) / NS_PER_MS_TIMES_100; + bpf_debug("RTT (from TC): %u", p_info->time - *p_ts); // Delete timestamp entry as soon as RTT is calculated if (bpf_map_delete_elem(&packet_ts, &p_info->reply_pid) == 0) @@ -715,4 +716,82 @@ static __always_inline void tc_pping_start(struct parsing_context *context) pping_parsed_packet(context, &p_info); } -#endif /* __TC_CLASSIFY_KERN_PPING_H */ \ No newline at end of file +#endif /* __TC_CLASSIFY_KERN_PPING_H */ + +/* + +Understanding how this works (psuedocode): + +1. Parsing context is passed into tc_pping_start + 1. We lookup the rotating_performance map for the active host (local side). + 1. If it exists, we check to see if we are in "next entry" time window yet. + 2. If we are, and the current time exceeds the "recycle time", we reset the + performance map and set the "recycle time" to the current time plus the + recycle interval. We exit the function. + 2. We then check to see if the packet is TCP. If it is not, we exit the function. + 3. We then check to see if the packet is complete. If it is not, we exit the function. + 4. We then parse the packet identifier. If we are unable to parse the packet identifier, + we exit the function. (the `parse_packet_identifier` function). + 1. We set the packet time to the current time. + 2. We set the flow type to either AF_INET or AF_INET6. + 3. We set the source and destination IP addresses. + 4. We call `parse_tcp_identifier` to parse the TCP identifier. + 1. We use `parse_tcp_ts` to extract the TSval and TSecr from the TCP header. + These are stored in `proto_info.pid` and `proto_info.reply_pid`. + If we fail to parse the TCP identifier, we exit the function. + 2. We set "pid_valid" to true if the next header position is less than the end of the packet + or if the packet is a SYN packet. (i.e. ignore packets with no payload). + 3. We set "reply_pid_valid" to true if the packet is an ACK packet. + 4. RST events are set to "FLOW_EVENT_CLOSING_BOTH", FIN events are set to "FLOW_EVENT_CLOSING", + and SYN events are set to "FLOW_EVENT_OPENING". + 5. We set the source and destination ports. + 5. If we failed to parse the TCP identifier, we exit the function. + 6. We set "pid.identifier" to "proto_info.pid" and "reply_pid.identifier" to "proto_info.reply_pid". + 7. We set "pid_valid" to "proto_info.pid_valid" and "reply_pid_valid" to "proto_info.reply_pid_valid". + 8. We set "event_type" to "proto_info.event_type". + 9. We bail if the protocol is not AF_INET or AF_INET6. + 10. We set "pid_flow_is_dfkey" to "is_dualflow_key(&p_info->pid.flow)". + 1. Compare the source and destination addresses and return true when it + encounters a packet with the source address less than the destination address. + 2. This appears to be a way to sort the flow keys. + 11. We call `reverse_flow` with the reply flow and the forward flow. + 1.Reverse flow sets the destination to the source. + 5. We then call pping_parsed_packet with the parsing context and the packet info. + 1. We call `lookup_or_create_dualflow_state` and return it if we found one. + 1. We call `get_dualflow_key_from_packet` to get the flow key from the packet. + 1. + 2. If `pid_valid` is false, or the event type is "FLOW_EVENT_CLOSING" or "FLOW_EVENT_CLOSING_BOTH", + we return NULL. + 3. If we still haven't got a flow state, we call `create_dualflow_state` with the parsing context, + the packet info, and a pointer to new_flow. + 1. We call `get_dualflow_key_from_packet` to get the flow key from the packet. + 1. If "pid_flow_is_dfkey" we return pid.flow, otherwise reply_pid.flow. + 2. We call `init_dualflow_state` with the new state and the packet info. + 3. We create a new state in the flow state map (or return an existing one). + 4. We set `fw_flow` with `get_flowstate_from_packet` and the packet info. + 1. This in turns calls `fstate_from_dfkey` with the dual flow state and the packet info. + 1. If the packet flow is the dual flow key, we return dir1, otherwise dir2. + 5. We call `update_forward_flowstate` with the packet info. + 1. If the connection state is empty and the packet identifier is valid, we call `init_flowstate` + with the flow state and the packet info. + 1. `init_flowstate` sets the connection state to "WAITOPEN" and the last timestamp to the packet time. + 6. We call `pping_timestamp_packet` with the forward flow, the parsing context, the packet info, and new_flow. + 1. If the flow state is not active, or the packet identifier is not valid, we return. + 2. If the flow state is not new and the identifier is not new, we return. + 3. If the flow state is not new and the packet is rate limited, we return. + 4. We set the last timestamp to the packet time. + 7. We set `rev_flow` with `get_reverse_flowstate_from_packet` and the packet info. + 1. + 8. We call `update_reverse_flowstate` with the parsing context, the packet info, and the reverse flow. + 1. + 9. We call `pping_match_packet` with the reverse flow, the packet info, and the active host. + 1. If the flow state is not active, or the reply packet identifier is not valid, we return. + 2. If the flow state has no outstanding timestamps, we return. + 3. We call `bpf_map_lookup_elem` with the packet timestamp map and the reply packet identifier. + 1. If the lookup fails, or the packet time is less than the timestamp, we return. + 4. We calculate the round trip time. + 5. We call `bpf_map_delete_elem` with the packet timestamp map and the reply packet identifier. + 1. If the delete is successful, we decrement the outstanding timestamps. + 10. We call `close_and_delete_flows` with the parsing context, the packet info, the forward flow, and the reverse flow. + 1. +*/ \ No newline at end of file diff --git a/src/rust/lqos_sys/src/bpf/lqos_kern.c b/src/rust/lqos_sys/src/bpf/lqos_kern.c index 07bbd6c3..9fb07d06 100644 --- a/src/rust/lqos_sys/src/bpf/lqos_kern.c +++ b/src/rust/lqos_sys/src/bpf/lqos_kern.c @@ -233,7 +233,7 @@ int tc_iphash_to_cpu(struct __sk_buff *skb) context.tcp = NULL; context.dissector = &dissector; context.active_host = &lookup_key.address; - tc_pping_start(&context); + //tc_pping_start(&context); // Commented out for comparison if (ip_info && ip_info->tc_handle != 0) { // We found a matching mapped TC flow From c7df905e24f05c8f4501612856e36fee7ef92e96 Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Tue, 27 Feb 2024 10:30:48 -0600 Subject: [PATCH 007/103] WIP - cleanup done, TCP is mostly working on the server-side. --- src/rust/lqos_sys/src/bpf/common/dissector.h | 36 +- src/rust/lqos_sys/src/bpf/common/flows.h | 405 ++++--------------- src/rust/lqos_sys/src/bpf/lqos_kern.c | 7 +- 3 files changed, 115 insertions(+), 333 deletions(-) diff --git a/src/rust/lqos_sys/src/bpf/common/dissector.h b/src/rust/lqos_sys/src/bpf/common/dissector.h index 39b73eda..fe43f7e2 100644 --- a/src/rust/lqos_sys/src/bpf/common/dissector.h +++ b/src/rust/lqos_sys/src/bpf/common/dissector.h @@ -52,6 +52,8 @@ struct dissector_t __u16 window; __u32 tsval; __u32 tsecr; + __u32 sequence; + __u32 ack_seq; }; // Representation of the VLAN header type. @@ -114,6 +116,8 @@ static __always_inline bool dissector_new( dissector->src_port = 0; dissector->dst_port = 0; dissector->tos = 0; + dissector->sequence = 0; + dissector->ack_seq = 0; // Check that there's room for an ethernet header if SKB_OVERFLOW (dissector->start, dissector->end, ethhdr) @@ -315,6 +319,17 @@ static __always_inline struct icmphdr *get_icmp_header(struct dissector_t *disse return NULL; } +#define DIS_TCP_FIN 1 +#define DIS_TCP_SYN 2 +#define DIS_TCP_RST 4 +#define DIS_TCP_PSH 8 +#define DIS_TCP_ACK 16 +#define DIS_TCP_URG 32 +#define DIS_TCP_ECE 64 +#define DIS_TCP_CWR 128 + +#define BITCHECK(flag) (dissector->tcp_flags & flag) + static __always_inline void snoop(struct dissector_t *dissector) { switch (dissector->ip_protocol) @@ -331,17 +346,19 @@ static __always_inline void snoop(struct dissector_t *dissector) dissector->src_port = hdr->source; dissector->dst_port = hdr->dest; __u8 flags = 0; - if (hdr->fin) flags |= 1; - if (hdr->syn) flags |= 2; - if (hdr->rst) flags |= 4; - if (hdr->psh) flags |= 8; - if (hdr->ack) flags |= 16; - if (hdr->urg) flags |= 32; - if (hdr->ece) flags |= 64; - if (hdr->cwr) flags |= 128; + if (hdr->fin) flags |= DIS_TCP_FIN; + if (hdr->syn) flags |= DIS_TCP_SYN; + if (hdr->rst) flags |= DIS_TCP_RST; + if (hdr->psh) flags |= DIS_TCP_PSH; + if (hdr->ack) flags |= DIS_TCP_ACK; + if (hdr->urg) flags |= DIS_TCP_URG; + if (hdr->ece) flags |= DIS_TCP_ECE; + if (hdr->cwr) flags |= DIS_TCP_CWR; dissector->tcp_flags = flags; dissector->window = hdr->window; + dissector->sequence = hdr->seq; + dissector->ack_seq = hdr->ack_seq; parse_tcp_ts(hdr, dissector->end, &dissector->tsval, &dissector->tsecr); } @@ -399,6 +416,7 @@ static __always_inline bool dissector_find_ip_header( dissector->ip_protocol = dissector->ip_header.iph->protocol; dissector->tos = dissector->ip_header.iph->tos; snoop(dissector); + return true; } break; @@ -416,7 +434,7 @@ static __always_inline bool dissector_find_ip_header( encode_ipv6(&dissector->ip_header.ip6h->saddr, &dissector->src_ip); encode_ipv6(&dissector->ip_header.ip6h->daddr, &dissector->dst_ip); dissector->ip_protocol = dissector->ip_header.ip6h->nexthdr; - dissector->ip_header.ip6h->flow_lbl[0]; // Is this right? + dissector->tos = dissector->ip_header.ip6h->flow_lbl[0]; // Is this right? snoop(dissector); return true; } diff --git a/src/rust/lqos_sys/src/bpf/common/flows.h b/src/rust/lqos_sys/src/bpf/common/flows.h index 8be2f1dc..e0e7e19e 100644 --- a/src/rust/lqos_sys/src/bpf/common/flows.h +++ b/src/rust/lqos_sys/src/bpf/common/flows.h @@ -5,6 +5,7 @@ #include "dissector.h" #include "debug.h" + #define SECOND_IN_NANOS 1000000000 #define TIMESTAMP_INTERVAL_NANOS 2000000000 @@ -16,15 +17,17 @@ #define FROM_LOCAL 2 // Defines a TCP connection flow key -struct tcp_flow_key_t { +struct flow_key_t { struct in6_addr src; struct in6_addr dst; __u16 src_port; __u16 dst_port; + __u8 protocol; + __u8 pad; }; // TCP connection flow entry -struct tcp_flow_data_t { +struct flow_data_t { // Time (nanos) when the connection was established __u64 start_time; // Time (nanos) when the connection was last seen @@ -40,7 +43,7 @@ struct tcp_flow_data_t { // Bytes at the next rate estimate __u64 next_count_bytes[2]; // Rate estimate - __u64 rate_estimate[2]; + __u64 rate_estimate_bps[2]; // Sequence number of the last packet __u32 last_sequence[2]; // Acknowledgement number of the last packet @@ -50,10 +53,15 @@ struct tcp_flow_data_t { // Timestamp values __u32 tsval[2]; __u32 tsecr[2]; + // When did the timestamp change? __u64 ts_change_time[2]; + // When should we calculate RTT (to avoid flooding) __u64 ts_calc_time[2]; // Most recent RTT __u64 last_rtt[2]; + // Has the connection ended? + // 0 = Alive, 1 = FIN, 2 = RST + __u32 end_status; }; // Map for tracking TCP flow progress. @@ -61,76 +69,62 @@ struct tcp_flow_data_t { struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); - __type(key, struct tcp_flow_key_t); - __type(value, struct tcp_flow_data_t); + __type(key, struct flow_key_t); + __type(value, struct flow_data_t); __uint(max_entries, MAX_FLOWS); __uint(pinning, LIBBPF_PIN_BY_NAME); } flowbee SEC(".maps"); -static __always_inline struct tcp_flow_key_t build_tcp_flow_key( +static __always_inline struct flow_data_t new_flow_data( + __u64 now, + struct dissector_t *dissector +) { + struct flow_data_t data = { + .start_time = now, + .bytes_sent = { 0, 0 }, + .packets_sent = { 0, 0 }, + .next_count_time = { now + SECOND_IN_NANOS, now + SECOND_IN_NANOS }, + .last_count_time = { now, now }, + .next_count_bytes = { dissector->skb_len, dissector->skb_len }, + .rate_estimate_bps = { 0, 0 }, + .last_sequence = { 0, 0 }, + .last_ack = { 0, 0 }, + .retries = { 0, 0 }, + .tsval = { 0, 0 }, + .tsecr = { 0, 0 }, + .ts_change_time = { 0, 0 }, + .ts_calc_time = { now, now }, // Get a first number quickly + .last_rtt = { 0, 0 }, + .end_status = 0 + }; + return data; +} + +static __always_inline struct flow_key_t build_flow_key( struct dissector_t *dissector, // The packet dissector from the previous step - struct tcphdr *tcp, // The TCP header u_int8_t direction // The direction of the packet (1 = to internet, 2 = to local network) ) { if (direction == FROM_INTERNET) { - return (struct tcp_flow_key_t) { + return (struct flow_key_t) { .src = dissector->src_ip, .dst = dissector->dst_ip, - .src_port = tcp->source, - .dst_port = tcp->dest, + .src_port = dissector->src_port, + .dst_port = dissector->dst_port, + .protocol = dissector->ip_protocol, + .pad = 0 }; } else { - return (struct tcp_flow_key_t) { + return (struct flow_key_t) { .src = dissector->dst_ip, .dst = dissector->src_ip, - .src_port = tcp->dest, - .dst_port = tcp->source, + .src_port = dissector->dst_port, + .dst_port = dissector->src_port, + .protocol = dissector->ip_protocol, + .pad = 0 }; } } -static __always_inline void debug_ip( - struct in6_addr *ip -) { - bpf_debug("%d.%d.%d", ip->s6_addr[13], ip->s6_addr[14], ip->s6_addr[15]); -} - -static __always_inline bool get_timestamps( - u_int32_t * out_tsval, - u_int32_t * out_tsecr, - struct tcphdr * tcp, - struct dissector_t * dissector, - void * end_opts -) { - u_int8_t *pos = (u_int8_t *)(tcp + 1); // Current pos in TCP options - u_int8_t len; - - // This should be 10, but we're running out of space - for (u_int8_t i = 0; i<6; i++) { - if (pos + 2 > dissector->end) { - return false; - } - switch (*pos) { - case 0: return false; // End of options - case 1: pos++; break; // NOP - case 8: { - if (pos + 10 > dissector->end) { - return false; - } - *out_tsval = bpf_ntohl(*(__u32 *)(pos + 2)); - *out_tsecr = bpf_ntohl(*(__u32 *)(pos + 6)); - return true; - } - default: { - len = *(pos + 1); - pos += len; - } - } - } - - return false; -} - // Handle Per-Flow ICMP Analysis static __always_inline void process_icmp( struct dissector_t *dissector, @@ -153,31 +147,17 @@ static __always_inline void process_udp( static __always_inline void process_tcp( struct dissector_t *dissector, u_int8_t direction, - struct tcphdr *tcp, u_int64_t now ) { - if ((tcp->syn && !tcp->ack && direction == TO_INTERNET) || (tcp->syn && tcp->ack && direction == FROM_INTERNET)) { + if ((BITCHECK(DIS_TCP_SYN) && !BITCHECK(DIS_TCP_ACK) && direction == TO_INTERNET) || + (BITCHECK(DIS_TCP_SYN) && BITCHECK(DIS_TCP_ACK) && direction == FROM_INTERNET)) { // A customer is requesting a new TCP connection. That means // we need to start tracking this flow. + #ifdef VERBOSE bpf_debug("[FLOWS] New TCP Connection Detected (%u)", direction); - struct tcp_flow_key_t key = build_tcp_flow_key(dissector, tcp, direction); - struct tcp_flow_data_t data = { - .start_time = now, - .bytes_sent = { 0, 0 }, - .packets_sent = { 0, 0 }, - .next_count_time = { now + SECOND_IN_NANOS, now + SECOND_IN_NANOS }, - .last_count_time = { now, now }, - .next_count_bytes = { dissector->skb_len, dissector->skb_len }, - .rate_estimate = { 0, 0 }, - .last_sequence = { 0, 0 }, - .last_ack = { 0, 0 }, - .retries = { 0, 0 }, - .tsval = { 0, 0 }, - .tsecr = { 0, 0 }, - .ts_change_time = { 0, 0 }, - .ts_calc_time = { now + TIMESTAMP_INTERVAL_NANOS, now + TIMESTAMP_INTERVAL_NANOS }, - .last_rtt = { 0, 0 } - }; + #endif + struct flow_key_t key = build_flow_key(dissector, direction); + struct flow_data_t data = new_flow_data(now, dissector); if (bpf_map_update_elem(&flowbee, &key, &data, BPF_ANY) != 0) { bpf_debug("[FLOWS] Failed to add new flow to map"); } @@ -185,8 +165,8 @@ static __always_inline void process_tcp( } // Build the flow key - struct tcp_flow_key_t key = build_tcp_flow_key(dissector, tcp, direction); - struct tcp_flow_data_t *data = bpf_map_lookup_elem(&flowbee, &key); + struct flow_key_t key = build_flow_key(dissector, direction); + struct flow_data_t *data = bpf_map_lookup_elem(&flowbee, &key); if (data == NULL) { // If it isn't a flow we're tracking, bail out now return; @@ -204,8 +184,7 @@ static __always_inline void process_tcp( // Calculate the rate estimate __u64 bits = (data->bytes_sent[0] - data->next_count_bytes[0])*8; __u64 time = (now - data->last_count_time[0]) / 1000000000; // Seconds - data->rate_estimate[0] = bits/time; - //bpf_debug("[FLOWS][%d] Rate Estimate: %u mbits / second", direction, data->rate_estimate[0] / 1000000); + data->rate_estimate_bps[0] = bits/time; data->next_count_time[0] = now + SECOND_IN_NANOS; data->next_count_bytes[0] = data->bytes_sent[0]; data->last_count_time[0] = now; @@ -218,8 +197,7 @@ static __always_inline void process_tcp( // Calculate the rate estimate __u64 bits = (data->bytes_sent[1] - data->next_count_bytes[1])*8; __u64 time = (now - data->last_count_time[1]) / 1000000000; // Seconds - data->rate_estimate[1] = bits/time; - //bpf_debug("[FLOWS][%d] Rate Estimate: %u mbits / second", direction, data->rate_estimate[1] / 1000000); + data->rate_estimate_bps[1] = bits/time; data->next_count_time[1] = now + SECOND_IN_NANOS; data->next_count_bytes[1] = data->bytes_sent[1]; data->last_count_time[1] = now; @@ -227,12 +205,11 @@ static __always_inline void process_tcp( } // Sequence and Acknowledgement numbers - __u32 sequence = bpf_ntohl(tcp->seq); - __u32 ack_seq = bpf_ntohl(tcp->ack_seq); + __u32 sequence = bpf_ntohl(dissector->sequence); + __u32 ack_seq = bpf_ntohl(dissector->ack_seq); if (direction == TO_INTERNET) { if (data->last_sequence[0] != 0 && sequence < data->last_sequence[0]) { // This is a retransmission - //bpf_debug("[FLOWS] Retransmission detected (%u)", direction); data->retries[0]++; } @@ -241,33 +218,28 @@ static __always_inline void process_tcp( } else { if (data->last_sequence[1] != 0 && sequence < data->last_sequence[1]) { // This is a retransmission - //bpf_debug("[FLOWS] Retransmission detected (%u)", direction); data->retries[1]++; } data->last_sequence[1] = sequence; data->last_ack[1] = ack_seq; } - //bpf_debug("[FLOWS][%d] Sequence: %u Ack: %u", direction, sequence, ack_seq); // Timestamps to calculate RTT - u_int32_t tsval = 0; - u_int32_t tsecr = 0; - void *end_opts = (tcp + 1) + (tcp->doff << 2); - if (tcp->ack && get_timestamps(&tsval, &tsecr, tcp, dissector, end_opts)) { - //bpf_debug("[FLOWS][%d] TSVal %u TSecr %u", direction, tsval, tsecr); + u_int32_t tsval = dissector->tsval; + u_int32_t tsecr = dissector->tsecr; + if (BITCHECK(DIS_TCP_ACK) && tsval != 0) { if (direction == TO_INTERNET) { if (tsval != data->tsval[0] || tsecr != data->tsecr[0]) { if (tsval == data->tsecr[1]) { - //bpf_debug("%d Matched!", direction); - __u64 elapsed = now - data->ts_change_time[1]; - data->last_rtt[0] = elapsed; - //bpf_debug("%d TS Change (RTT): %u nanos", direction, elapsed); - // TODO: Do something with the RTT + if (now > data->ts_calc_time[0]) { + __u64 elapsed = now - data->ts_change_time[1]; + data->ts_calc_time[0] = now + TIMESTAMP_INTERVAL_NANOS; + data->last_rtt[0] = elapsed; + } } - //bpf_debug("%d TSVal Changed", direction); data->ts_change_time[0] = now; data->tsval[0] = tsval; data->tsecr[0] = tsecr; @@ -276,44 +248,25 @@ static __always_inline void process_tcp( if (tsval != data->tsval[1] || tsecr != data->tsecr[1]) { if (tsval == data->tsecr[0]) { - //bpf_debug("%d Matched!", direction); - __u64 elapsed = now - data->ts_change_time[0]; - data->last_rtt[1] = elapsed; - //bpf_debug("%d TS Change (RTT): %u nanos", direction, elapsed); - // TODO: Do something with the RTT + if (now > data->ts_calc_time[1]) { + __u64 elapsed = now - data->ts_change_time[0]; + data->ts_calc_time[1] = now + TIMESTAMP_INTERVAL_NANOS; + data->last_rtt[1] = elapsed; + } } - //bpf_debug("%d TSVal Changed", direction); data->ts_change_time[1] = now; data->tsval[1] = tsval; data->tsecr[1] = tsecr; } } - - - /*else { - if (tsval == data->tsecr[0]) { - //if (tsval == data->tsecr[0] && now > data->ts_calc_time[1]) { - __u64 elapsed = now - data->ts_change_time[0]; - bpf_debug("[FLOWS][%d] TS Change (RTT): %u nanos", direction, elapsed); - data->ts_calc_time[1] = now + TIMESTAMP_INTERVAL_NANOS; - } - if (tsval != data->tsval[1]) { - data->ts_change_time[1] = now; - } - data->tsval[1] = tsval; - data->tsecr[1] = tsecr; - }*/ } // Has the connection ended? - if (tcp->fin || tcp->rst) { - __u64 lifetime = now - data->start_time; - bpf_debug("[FLOWS] TCP Connection Ended [%d / %d]. Lasted %u nanos.", data->bytes_sent[0], data->bytes_sent[1], lifetime); - bpf_debug("[FLOWS] Rate Estimate (Mbps): %u / %u", data->rate_estimate[0] / 1000000, data->rate_estimate[1] / 1000000); - bpf_debug("[FLOWS] Retries: %u / %u", data->retries[0], data->retries[1]); - bpf_debug("[FLOWS] RTT: %u / %u (nanos)", data->last_rtt[0], data->last_rtt[1]); - bpf_map_delete_elem(&flowbee, &key); + if (BITCHECK(DIS_TCP_FIN)) { + data->end_status = 1; + } else if (BITCHECK(DIS_TCP_RST)) { + data->end_status = 2; } } @@ -323,24 +276,12 @@ static __always_inline void track_flows( struct dissector_t *dissector, // The packet dissector from the previous step u_int8_t direction // The direction of the packet (1 = to internet, 2 = to local network) ) { - //bpf_debug("[FLOWS] Packet detected"); - __u64 now = bpf_ktime_get_ns(); + __u64 now = bpf_ktime_get_boot_ns(); + + // Pass to the appropriate protocol handler switch (dissector->ip_protocol) { - case IPPROTO_TCP: { - struct tcphdr * tcp = get_tcp_header(dissector); - if (tcp == NULL) { - // Bail out if it's not a TCP packet - return; - } - // Bail out if we've exceeded the packet size and there is no payload - // This keeps the safety checker happy and is generally a good idea - if (tcp + 1 >= dissector->end) { - return; - } - //bpf_debug("[FLOWS] TCP packet detected"); - process_tcp(dissector, direction, tcp, now); - } break; + case IPPROTO_TCP: process_tcp(dissector, direction, now); break; case IPPROTO_UDP: { struct udphdr *udp = get_udp_header(dissector); if (udp == NULL) { @@ -352,7 +293,6 @@ static __always_inline void track_flows( if (udp + 1 >= dissector->end) { return; } - bpf_debug("[FLOWS] UDP packet detected"); process_udp(dissector, direction, udp); } break; case IPPROTO_ICMP: { @@ -366,189 +306,12 @@ static __always_inline void track_flows( if (icmp + 1 >= dissector->end) { return; } - bpf_debug("[FLOWS] ICMP packet detected"); process_icmp(dissector, direction, icmp); } break; default: { + #ifdef VERBOSE bpf_debug("[FLOWS] Unsupported protocol: %d", dissector->ip_protocol); + #endif } } } - -/*static __always_inline void track_flows( - struct dissector_t *dissector, // The packet dissector from the previous step - u_int8_t direction // The direction of the packet (1 = to internet, 2 = to local network) -) { - struct tcphdr * tcp = get_tcp_header(dissector); - if (tcp == NULL) { - // Bail out if it's not a TCP packet - return; - } - - // Bail out if we've exceeded the packet size and there is no payload - // This keeps the safety checker happy and is generally a good idea - if (tcp + 1 >= dissector->end) { - return; - } - - // Determine the key for the flow. Since we know direction, there's - // no need to consider "reverse keys" and their ilk. - struct tcp_flow_key_t key = build_flow_key(dissector, direction); - - // Only care about connections that originate locally - __u64 now = bpf_ktime_get_ns(); - if (tcp->syn && direction == 1) { - // SYN packet sent to the Internet. We are establishing a new connection. - // We need to add this flow to the tracking table. - bpf_debug("New TCP connection detected"); - struct tcp_flow_data_t data = { - .start_time = now, - .last_seen_a = now, - .last_seen_b = now, - .bytes_sent = dissector->skb_len, - .bytes_received = 0, - .time_a = 0, - .time_b = 0, - .last_rtt = 0, - .packets_sent = 1, - .packets_received = 0, - .retries_a = 0, - .retries_b = 0, - .next_count_time = now + SECOND_IN_NANOS, - .next_count_bytes = dissector->skb_len, - .rate_estimate = 0, - .last_count_time = now - }; - bpf_map_update_elem(&flowbee, &key, &data, BPF_ANY); - } - - // Update the flow's last seen time - struct tcp_flow_data_t *data = bpf_map_lookup_elem(&flowbee, &key); - if (data == NULL) { - return; - } - __u64 last_seen = data->last_seen_a; - if (direction == 1) { - data->last_seen_a = now; - data->bytes_sent += dissector->skb_len; - data->packets_sent++; - } else { - data->last_seen_b = now; - data->bytes_received += dissector->skb_len; - data->packets_received++; - } - //bpf_debug("Dir: %d, Sent/Received: [%d]/[%d]", direction, data->bytes_sent, data->bytes_received); - - // Parse the TCP options - //__u32 tsval = 0; - //__u32 tsecr = 0; - void *end_opts = (tcp + 1) + (tcp->doff << 2); - bool has_data = end_opts - dissector->start < dissector->skb_len; - //if (get_timestamps(&tsval, &tsecr, tcp, dissector, end_opts)) { - //bpf_debug("[%d] => TSVal %u TSecr %u", direction, tsval, tsecr); - //bpf_debug("[%d] => Seq %u AckSeq %u", direction, tcp->seq, tcp->ack_seq); - //} - - if ( tcp->ack && has_data) { - //bpf_debug("Direction %d", direction); - __u32 sequence = bpf_ntohl(tcp->seq); - __u32 ack_seq = bpf_ntohl(tcp->ack_seq); - - if (direction == 1) { - // Going TO the Internet. We're acknowledging a packet. - // We don't need to record an RTT measurement and check for issues. - bpf_debug("%u, A: %u / B: %u", sequence, data->time_a, data->time_b); - bpf_debug("%u", ack_seq); - - if (now > data->next_count_time) { - // Calculate the rate estimate - __u64 bytes = data->bytes_sent + data->bytes_received - data->next_count_bytes; - __u64 time = now - data->last_count_time; - data->rate_estimate = ((bytes * SECOND_IN_NANOS / time)*8)/1048576; - data->next_count_time = now + SECOND_IN_NANOS; - data->next_count_bytes = data->bytes_sent + data->bytes_received; - data->last_count_time = now; - bpf_debug("[1] Rate estimate: %u mbits/sec", data->rate_estimate); - - if (data->rate_estimate > 5 && tcp->ack_seq >= data->time_a) { - __u64 rtt = now - last_seen; - bpf_debug("RTT: %d nanos (%u - %u)", rtt, tcp->ack_seq, data->time_a); - data->last_rtt = rtt; - } - } - - if (data->rate_estimate > 5 && ack_seq >= data->time_b) { - __u64 rtt = now - last_seen; - bpf_debug("[1] RTT: %d nanos (%u - %u)", rtt, sequence, data->time_b); - data->last_rtt = rtt; - } - - if (data->time_a != 0 && sequence < data->time_a) { - // This is a retransmission - //bpf_debug("DIR 1 Retransmission (or out of order) detected"); - //bpf_debug("to 192.168.66.%d => SEQ %d < %d", dissector->dst_ip.in6_u.u6_addr8[15], sequence, data->time_a); - data->retries_a++; - } - - data->time_a = sequence; - } else { - // Coming FROM the Internet. They are acknowledging a packet. - // We need to record an RTT measurement, but we can check for issues. - //bpf_debug("%d / %d", data->time_a, data->time_b); - - if (now > data->next_count_time) { - // Calculate the rate estimate - __u64 bytes = data->bytes_sent + data->bytes_received - data->next_count_bytes; - __u64 time = now - data->last_count_time; - data->rate_estimate = ((bytes * SECOND_IN_NANOS / time)*8)/1000000; - data->next_count_time = now + SECOND_IN_NANOS; - data->next_count_bytes = data->bytes_sent + data->bytes_received; - data->last_count_time = now; - bpf_debug("[2] Rate estimate: %u mbits/sec", data->rate_estimate); - - if (data->rate_estimate > 5 && tcp->ack_seq >= data->time_b) { - __u64 rtt = now - last_seen; - bpf_debug("[2] RTT: %d nanos", rtt); - data->last_rtt = rtt; - } - } - - - if (data->time_b != 0 && sequence < data->time_b) { - // This is a retransmission - //bpf_debug("DIR 2 Retransmission (or out of order) detected"); - //bpf_debug("to 192.168.66.%d => SEQ %d > %d", dissector->dst_ip.in6_u.u6_addr8[15], sequence, data->time_b); - data->retries_b++; - } - - data->time_b = sequence; - } - - - //bpf_debug("to 192.168.66.%d => TS %d <-> %d", dissector->dst_ip.in6_u.u6_addr8[15], bpf_ntohs(tsval), bpf_ntohs(tsecr)); - } else if ( tcp->fin) { - // FIN packet. We are closing a connection. - // We need to remove this flow from the tracking table. - bpf_debug("TCP connection closed"); - // TODO: Submit the result somewhere - bpf_debug(" Flow Lifetime: %u nanos", now - data->start_time); - bpf_debug(" BYTES : %d / %d", data->bytes_sent, data->bytes_received); - bpf_debug(" PACKETS : %d / %d", data->packets_sent, data->packets_received); - bpf_debug(" RTT : %d nanos", data->last_rtt); - bpf_debug(" RETRIES : %d / %d", data->retries_a, data->retries_b); - // /TODO - bpf_map_delete_elem(&flowbee, &key); - } else if ( tcp->rst ) { - // RST packet. We are resetting a connection. - // We need to remove this flow from the tracking table. - bpf_debug("TCP connection reset"); - // TODO: Submit the result somewhere - bpf_debug(" Flow Lifetime: %u nanos", now - data->start_time); - bpf_debug(" BYTES : %d / %d", data->bytes_sent, data->bytes_received); - bpf_debug(" PACKETS : %d / %d", data->packets_sent, data->packets_received); - bpf_debug(" RTT : %d nanos", data->last_rtt); - bpf_debug(" RETRIES : %d / %d", data->retries_a, data->retries_b); - // /TODO - bpf_map_delete_elem(&flowbee, &key); - } -}*/ diff --git a/src/rust/lqos_sys/src/bpf/lqos_kern.c b/src/rust/lqos_sys/src/bpf/lqos_kern.c index 9fb07d06..da334232 100644 --- a/src/rust/lqos_sys/src/bpf/lqos_kern.c +++ b/src/rust/lqos_sys/src/bpf/lqos_kern.c @@ -112,9 +112,6 @@ int xdp_prog(struct xdp_md *ctx) bpf_debug("(XDP) Spotted VLAN: %u", dissector.current_vlan); #endif - // Per-Flow RTT Tracking - track_flows(&dissector, effective_direction); - // Determine the lookup key by direction struct ip_hash_key lookup_key; struct ip_hash_info * ip_info = setup_lookup_key_and_tc_cpu( @@ -130,6 +127,10 @@ int xdp_prog(struct xdp_md *ctx) tc_handle = ip_info->tc_handle; cpu = ip_info->cpu; } + + // Per-Flow RTT Tracking + track_flows(&dissector, effective_direction); + // Update the traffic tracking buffers track_traffic( effective_direction, From bfc9b8227c9b40a50bac6e4ae20387d91f5d969d Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Tue, 27 Feb 2024 10:38:17 -0600 Subject: [PATCH 008/103] Skeleton for including UDP and ICMP flow data. --- src/rust/lqos_sys/src/bpf/common/flows.h | 128 ++++++++++++----------- 1 file changed, 67 insertions(+), 61 deletions(-) diff --git a/src/rust/lqos_sys/src/bpf/common/flows.h b/src/rust/lqos_sys/src/bpf/common/flows.h index e0e7e19e..e5e61489 100644 --- a/src/rust/lqos_sys/src/bpf/common/flows.h +++ b/src/rust/lqos_sys/src/bpf/common/flows.h @@ -125,22 +125,82 @@ static __always_inline struct flow_key_t build_flow_key( } } +static __always_inline void update_flow_rates( + struct dissector_t *dissector, + u_int8_t direction, + struct flow_data_t *data, + __u64 now +) { + data->last_seen = now; + + // Update bytes and packets sent + if (direction == TO_INTERNET) { + data->bytes_sent[0] += dissector->skb_len; + data->packets_sent[0]++; + + if (now > data->next_count_time[0]) { + // Calculate the rate estimate + __u64 bits = (data->bytes_sent[0] - data->next_count_bytes[0])*8; + __u64 time = (now - data->last_count_time[0]) / 1000000000; // Seconds + data->rate_estimate_bps[0] = bits/time; + data->next_count_time[0] = now + SECOND_IN_NANOS; + data->next_count_bytes[0] = data->bytes_sent[0]; + data->last_count_time[0] = now; + } + } else { + data->bytes_sent[1] += dissector->skb_len; + data->packets_sent[1]++; + + if (now > data->next_count_time[1]) { + // Calculate the rate estimate + __u64 bits = (data->bytes_sent[1] - data->next_count_bytes[1])*8; + __u64 time = (now - data->last_count_time[1]) / 1000000000; // Seconds + data->rate_estimate_bps[1] = bits/time; + data->next_count_time[1] = now + SECOND_IN_NANOS; + data->next_count_bytes[1] = data->bytes_sent[1]; + data->last_count_time[1] = now; + } + } +} + // Handle Per-Flow ICMP Analysis static __always_inline void process_icmp( struct dissector_t *dissector, u_int8_t direction, - struct icmphdr *icmp + u_int64_t now ) { - + struct flow_key_t key = build_flow_key(dissector, direction); + struct flow_data_t *data = bpf_map_lookup_elem(&flowbee, &key); + if (data == NULL) { + // There isn't a flow, so we need to make one + struct flow_data_t new_data = new_flow_data(now, dissector); + if (bpf_map_update_elem(&flowbee, &key, &new_data, BPF_ANY) != 0) { + bpf_debug("[FLOWS] Failed to add new flow to map"); + return; + } + data = bpf_map_lookup_elem(&flowbee, &key); + } + update_flow_rates(dissector, direction, data, now); } // Handle Per-Flow UDP Analysis static __always_inline void process_udp( struct dissector_t *dissector, u_int8_t direction, - struct udphdr *udp + u_int64_t now ) { - + struct flow_key_t key = build_flow_key(dissector, direction); + struct flow_data_t *data = bpf_map_lookup_elem(&flowbee, &key); + if (data == NULL) { + // There isn't a flow, so we need to make one + struct flow_data_t new_data = new_flow_data(now, dissector); + if (bpf_map_update_elem(&flowbee, &key, &new_data, BPF_ANY) != 0) { + bpf_debug("[FLOWS] Failed to add new flow to map"); + return; + } + data = bpf_map_lookup_elem(&flowbee, &key); + } + update_flow_rates(dissector, direction, data, now); } // Handle Per-Flow TCP Analysis @@ -172,37 +232,7 @@ static __always_inline void process_tcp( return; } - // Update last seen to now - data->last_seen = now; - - // Update bytes and packets sent - if (direction == TO_INTERNET) { - data->bytes_sent[0] += dissector->skb_len; - data->packets_sent[0]++; - - if (now > data->next_count_time[0]) { - // Calculate the rate estimate - __u64 bits = (data->bytes_sent[0] - data->next_count_bytes[0])*8; - __u64 time = (now - data->last_count_time[0]) / 1000000000; // Seconds - data->rate_estimate_bps[0] = bits/time; - data->next_count_time[0] = now + SECOND_IN_NANOS; - data->next_count_bytes[0] = data->bytes_sent[0]; - data->last_count_time[0] = now; - } - } else { - data->bytes_sent[1] += dissector->skb_len; - data->packets_sent[1]++; - - if (now > data->next_count_time[1]) { - // Calculate the rate estimate - __u64 bits = (data->bytes_sent[1] - data->next_count_bytes[1])*8; - __u64 time = (now - data->last_count_time[1]) / 1000000000; // Seconds - data->rate_estimate_bps[1] = bits/time; - data->next_count_time[1] = now + SECOND_IN_NANOS; - data->next_count_bytes[1] = data->bytes_sent[1]; - data->last_count_time[1] = now; - } - } + update_flow_rates(dissector, direction, data, now); // Sequence and Acknowledgement numbers __u32 sequence = bpf_ntohl(dissector->sequence); @@ -282,32 +312,8 @@ static __always_inline void track_flows( switch (dissector->ip_protocol) { case IPPROTO_TCP: process_tcp(dissector, direction, now); break; - case IPPROTO_UDP: { - struct udphdr *udp = get_udp_header(dissector); - if (udp == NULL) { - // Bail out if it's not a UDP packet - return; - } - // Bail out if we've exceeded the packet size and there is no payload - // This keeps the safety checker happy and is generally a good idea - if (udp + 1 >= dissector->end) { - return; - } - process_udp(dissector, direction, udp); - } break; - case IPPROTO_ICMP: { - struct icmphdr *icmp = get_icmp_header(dissector); - if (icmp == NULL) { - // Bail out if it's not an ICMP packet - return; - } - // Bail out if we've exceeded the packet size and there is no payload - // This keeps the safety checker happy and is generally a good idea - if (icmp + 1 >= dissector->end) { - return; - } - process_icmp(dissector, direction, icmp); - } break; + case IPPROTO_UDP: process_udp(dissector, direction, now); break; + case IPPROTO_ICMP: process_icmp(dissector, direction, now); break; default: { #ifdef VERBOSE bpf_debug("[FLOWS] Unsupported protocol: %d", dissector->ip_protocol); From 8fa53782c60ed561c9571991ddacde1ef7ad3018 Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Tue, 27 Feb 2024 11:53:18 -0600 Subject: [PATCH 009/103] Work in progress. Add endian conversions. Add Rust interface for iterating the flows list. Add a temporary interface that dumps flow data to the console to prove that it works. --- src/rust/lqos_sys/src/bpf/common/flows.h | 22 ++++++--- src/rust/lqos_sys/src/bpf/lqos_kern.c | 19 +++++++ src/rust/lqos_sys/src/bpf_iterator.rs | 50 +++++++++++++++++-- src/rust/lqos_sys/src/flowbee_data.rs | 63 ++++++++++++++++++++++++ src/rust/lqos_sys/src/lib.rs | 1 + 5 files changed, 146 insertions(+), 9 deletions(-) create mode 100644 src/rust/lqos_sys/src/flowbee_data.rs diff --git a/src/rust/lqos_sys/src/bpf/common/flows.h b/src/rust/lqos_sys/src/bpf/common/flows.h index e5e61489..05f52692 100644 --- a/src/rust/lqos_sys/src/bpf/common/flows.h +++ b/src/rust/lqos_sys/src/bpf/common/flows.h @@ -1,3 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + // TCP flow monitor system #include @@ -24,6 +26,8 @@ struct flow_key_t { __u16 dst_port; __u8 protocol; __u8 pad; + __u8 pad1; + __u8 pad2; }; // TCP connection flow entry @@ -108,19 +112,23 @@ static __always_inline struct flow_key_t build_flow_key( return (struct flow_key_t) { .src = dissector->src_ip, .dst = dissector->dst_ip, - .src_port = dissector->src_port, - .dst_port = dissector->dst_port, + .src_port = bpf_htons(dissector->src_port), + .dst_port = bpf_htons(dissector->dst_port), .protocol = dissector->ip_protocol, - .pad = 0 + .pad = 0, + .pad1 = 0, + .pad2 = 0 }; } else { return (struct flow_key_t) { .src = dissector->dst_ip, .dst = dissector->src_ip, - .src_port = dissector->dst_port, - .dst_port = dissector->src_port, + .src_port = bpf_htons(dissector->dst_port), + .dst_port = bpf_htons(dissector->src_port), .protocol = dissector->ip_protocol, - .pad = 0 + .pad = 0, + .pad1 = 0, + .pad2 = 0 }; } } @@ -179,6 +187,7 @@ static __always_inline void process_icmp( return; } data = bpf_map_lookup_elem(&flowbee, &key); + if (data == NULL) return; } update_flow_rates(dissector, direction, data, now); } @@ -199,6 +208,7 @@ static __always_inline void process_udp( return; } data = bpf_map_lookup_elem(&flowbee, &key); + if (data == NULL) return; } update_flow_rates(dissector, direction, data, now); } diff --git a/src/rust/lqos_sys/src/bpf/lqos_kern.c b/src/rust/lqos_sys/src/bpf/lqos_kern.c index da334232..5abaa397 100644 --- a/src/rust/lqos_sys/src/bpf/lqos_kern.c +++ b/src/rust/lqos_sys/src/bpf/lqos_kern.c @@ -422,4 +422,23 @@ int heimdall_reader(struct bpf_iter__bpf_map_elem *ctx) { return 0; } +SEC("iter/bpf_map_elem") +int flow_reader(struct bpf_iter__bpf_map_elem *ctx) +{ + // The sequence file + struct seq_file *seq = ctx->meta->seq; + struct flow_data_t *counter = ctx->value; + struct flow_key_t *ip = ctx->key; + + // Bail on end + if (counter == NULL || ip == NULL) { + return 0; + } + + //BPF_SEQ_PRINTF(seq, "%d %d\n", counter->next_entry, counter->rtt[0]); + bpf_seq_write(seq, ip, sizeof(struct flow_key_t)); + bpf_seq_write(seq, counter, sizeof(struct flow_data_t)); + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/src/rust/lqos_sys/src/bpf_iterator.rs b/src/rust/lqos_sys/src/bpf_iterator.rs index f3c77c0e..63ec24ef 100644 --- a/src/rust/lqos_sys/src/bpf_iterator.rs +++ b/src/rust/lqos_sys/src/bpf_iterator.rs @@ -1,6 +1,5 @@ use crate::{ - kernel_wrapper::BPF_SKELETON, lqos_kernel::bpf, HostCounter, - RttTrackingEntry, heimdall_data::{HeimdallKey, HeimdallData}, + flowbee_data::{FlowbeeData, FlowbeeKey}, heimdall_data::{HeimdallData, HeimdallKey}, kernel_wrapper::BPF_SKELETON, lqos_kernel::bpf, HostCounter, RttTrackingEntry }; use lqos_utils::XdpIpAddress; use once_cell::sync::Lazy; @@ -149,7 +148,17 @@ where let (_head, values, _tail) = unsafe { &value_slice.align_to::() }; - callback(&key[0], &values[0]); + if !key.is_empty() && !values.is_empty() { + callback(&key[0], &values[0]); + } else { + log::error!("Empty key or value found in iterator"); + if key.is_empty() { + log::error!("Empty key"); + } + if values.is_empty() { + log::error!("Empty value"); + } + } index += Self::KEY_SIZE + Self::VALUE_SIZE; } @@ -191,6 +200,10 @@ static mut HEIMDALL_TRACKER: Lazy< Option>, > = Lazy::new(|| None); +static mut FLOWBEE_TRACKER: Lazy< + Option>, +> = Lazy::new(|| None); + pub unsafe fn iterate_throughput( callback: &mut dyn FnMut(&XdpIpAddress, &[HostCounter]), ) { @@ -235,6 +248,9 @@ pub unsafe fn iterate_rtt( if let Some(iter) = RTT_TRACKER.as_mut() { let _ = iter.for_each(callback); } + + // TEMPORARY + iterate_flows(); } /// Iterate through the heimdall map and call the callback for each entry. @@ -261,4 +277,32 @@ pub fn iterate_heimdall( let _ = iter.for_each_per_cpu(callback); } } +} + +/// Iterate through the Flows 2 system tracker, retrieving all flows +pub fn iterate_flows() { + unsafe { + if FLOWBEE_TRACKER.is_none() { + let lock = BPF_SKELETON.lock().unwrap(); + if let Some(skeleton) = lock.as_ref() { + let skeleton = skeleton.get_ptr(); + if let Ok(iter) = unsafe { + BpfMapIterator::new( + (*skeleton).progs.flow_reader, + (*skeleton).maps.flowbee, + ) + } { + *FLOWBEE_TRACKER = Some(iter); + } + } + } + + let mut callback = |key: &FlowbeeKey, data: &FlowbeeData| { + log::info!("Flow: {:#?} -> {:#?}", key, data); + }; + + if let Some(iter) = FLOWBEE_TRACKER.as_mut() { + let _ = iter.for_each(&mut callback); + } + } } \ No newline at end of file diff --git a/src/rust/lqos_sys/src/flowbee_data.rs b/src/rust/lqos_sys/src/flowbee_data.rs new file mode 100644 index 00000000..692c19bf --- /dev/null +++ b/src/rust/lqos_sys/src/flowbee_data.rs @@ -0,0 +1,63 @@ +use lqos_utils::XdpIpAddress; +use zerocopy::FromBytes; + +/// Representation of the eBPF `flow_key_t` type. +#[derive(Debug, Clone, Default, PartialEq, Eq, Hash, FromBytes)] +#[repr(C)] +pub struct FlowbeeKey { + /// Mapped `XdpIpAddress` source for the flow. + pub remote_ip: XdpIpAddress, + /// Mapped `XdpIpAddress` destination for the flow + pub local_ip: XdpIpAddress, + /// Source port number, or ICMP type. + pub src_port: u16, + /// Destination port number. + pub dst_port: u16, + /// IP protocol (see the Linux kernel!) + pub ip_protocol: u8, + /// Padding to align the structure to 16 bytes. + padding: u8, + padding1: u8, + padding2: u8, +} + +/// Mapped representation of the eBPF `flow_data_t` type. +#[derive(Debug, Clone, Default, FromBytes)] +#[repr(C)] +pub struct FlowbeeData { + /// Time (nanos) when the connection was established + pub start_time: u64, + /// Time (nanos) when the connection was last seen + pub last_seen: u64, + /// Bytes transmitted + pub bytes_sent: [u64; 2], + /// Packets transmitted + pub packets_sent: [u64; 2], + /// Clock for the next rate estimate + pub next_count_time: [u64; 2], + /// Clock for the previous rate estimate + pub last_count_time: [u64; 2], + /// Bytes at the next rate estimate + pub next_count_bytes: [u64; 2], + /// Rate estimate + pub rate_estimate_bps: [u64; 2], + /// Sequence number of the last packet + pub last_sequence: [u32; 2], + /// Acknowledgement number of the last packet + pub last_ack: [u32; 2], + /// Retry Counters + pub retries: [u32; 2], + /// Timestamp values + pub tsval: [u32; 2], + /// Timestamp echo values + pub tsecr: [u32; 2], + /// When did the timestamp change? + pub ts_change_time: [u64; 2], + /// When should we calculate RTT (to avoid flooding) + pub ts_calc_time: [u64; 2], + /// Most recent RTT + pub last_rtt: [u64; 2], + /// Has the connection ended? + /// 0 = Alive, 1 = FIN, 2 = RST + pub end_status: u32, +} \ No newline at end of file diff --git a/src/rust/lqos_sys/src/lib.rs b/src/rust/lqos_sys/src/lib.rs index d8ac2c3e..a957f198 100644 --- a/src/rust/lqos_sys/src/lib.rs +++ b/src/rust/lqos_sys/src/lib.rs @@ -22,6 +22,7 @@ mod bpf_iterator; /// Data shared between eBPF and Heimdall that needs local access /// for map control. pub mod heimdall_data; +pub mod flowbee_data; pub use ip_mapping::{ add_ip_to_tc, clear_ips_from_tc, del_ip_from_tc, list_mapped_ips, From b7c02d251d7cdd51fc110ee4fe438f56399376b2 Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Tue, 27 Feb 2024 11:56:36 -0600 Subject: [PATCH 010/103] Warning fix --- src/rust/lqos_sys/src/bpf_iterator.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rust/lqos_sys/src/bpf_iterator.rs b/src/rust/lqos_sys/src/bpf_iterator.rs index 63ec24ef..20b7d36a 100644 --- a/src/rust/lqos_sys/src/bpf_iterator.rs +++ b/src/rust/lqos_sys/src/bpf_iterator.rs @@ -286,7 +286,7 @@ pub fn iterate_flows() { let lock = BPF_SKELETON.lock().unwrap(); if let Some(skeleton) = lock.as_ref() { let skeleton = skeleton.get_ptr(); - if let Ok(iter) = unsafe { + if let Ok(iter) = { BpfMapIterator::new( (*skeleton).progs.flow_reader, (*skeleton).maps.flowbee, From f33d22faa00495798a35b387ab034a714bd9bc6c Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Tue, 27 Feb 2024 14:06:13 -0600 Subject: [PATCH 011/103] In progress. The RTT data from the flows system is integrated into the high-level per-IP RTT tracker. --- src/rust/lqos_sys/src/bpf_iterator.rs | 15 ++++++----- src/rust/lqos_sys/src/flowbee_data.rs | 2 ++ src/rust/lqos_sys/src/lib.rs | 2 +- .../src/throughput_tracker/tracking_data.rs | 26 +++++++++++++++++-- 4 files changed, 35 insertions(+), 10 deletions(-) diff --git a/src/rust/lqos_sys/src/bpf_iterator.rs b/src/rust/lqos_sys/src/bpf_iterator.rs index 20b7d36a..27378e49 100644 --- a/src/rust/lqos_sys/src/bpf_iterator.rs +++ b/src/rust/lqos_sys/src/bpf_iterator.rs @@ -250,7 +250,10 @@ pub unsafe fn iterate_rtt( } // TEMPORARY - iterate_flows(); + let mut callback = |key: &FlowbeeKey, data: &FlowbeeData| { + println!("{:?} {:?}", key, data); + }; + iterate_flows(&mut callback); } /// Iterate through the heimdall map and call the callback for each entry. @@ -280,7 +283,9 @@ pub fn iterate_heimdall( } /// Iterate through the Flows 2 system tracker, retrieving all flows -pub fn iterate_flows() { +pub fn iterate_flows( + callback: &mut dyn FnMut(&FlowbeeKey, &FlowbeeData) +) { unsafe { if FLOWBEE_TRACKER.is_none() { let lock = BPF_SKELETON.lock().unwrap(); @@ -297,12 +302,8 @@ pub fn iterate_flows() { } } - let mut callback = |key: &FlowbeeKey, data: &FlowbeeData| { - log::info!("Flow: {:#?} -> {:#?}", key, data); - }; - if let Some(iter) = FLOWBEE_TRACKER.as_mut() { - let _ = iter.for_each(&mut callback); + let _ = iter.for_each(callback); } } } \ No newline at end of file diff --git a/src/rust/lqos_sys/src/flowbee_data.rs b/src/rust/lqos_sys/src/flowbee_data.rs index 692c19bf..cebf79b5 100644 --- a/src/rust/lqos_sys/src/flowbee_data.rs +++ b/src/rust/lqos_sys/src/flowbee_data.rs @@ -1,3 +1,5 @@ +//! Data structures for the Flowbee eBPF program. + use lqos_utils::XdpIpAddress; use zerocopy::FromBytes; diff --git a/src/rust/lqos_sys/src/lib.rs b/src/rust/lqos_sys/src/lib.rs index a957f198..d2870968 100644 --- a/src/rust/lqos_sys/src/lib.rs +++ b/src/rust/lqos_sys/src/lib.rs @@ -32,4 +32,4 @@ pub use linux::num_possible_cpus; pub use lqos_kernel::max_tracked_ips; pub use tcp_rtt::{rtt_for_each, RttTrackingEntry}; pub use throughput::{throughput_for_each, HostCounter}; -pub use bpf_iterator::iterate_heimdall; \ No newline at end of file +pub use bpf_iterator::{iterate_heimdall, iterate_flows}; \ No newline at end of file diff --git a/src/rust/lqosd/src/throughput_tracker/tracking_data.rs b/src/rust/lqosd/src/throughput_tracker/tracking_data.rs index 246332f8..b8383ff6 100644 --- a/src/rust/lqosd/src/throughput_tracker/tracking_data.rs +++ b/src/rust/lqosd/src/throughput_tracker/tracking_data.rs @@ -3,7 +3,7 @@ use crate::{shaped_devices_tracker::{SHAPED_DEVICES, NETWORK_JSON}, stats::{HIGH use super::{throughput_entry::ThroughputEntry, RETIRE_AFTER_SECONDS}; use dashmap::DashMap; use lqos_bus::TcHandle; -use lqos_sys::{rtt_for_each, throughput_for_each}; +use lqos_sys::{iterate_flows, throughput_for_each}; use lqos_utils::XdpIpAddress; pub struct ThroughputTracker { @@ -170,7 +170,7 @@ impl ThroughputTracker { pub(crate) fn apply_rtt_data(&self) { let self_cycle = self.cycle.load(std::sync::atomic::Ordering::Relaxed); - rtt_for_each(&mut |ip, rtt| { + /*rtt_for_each(&mut |ip, rtt| { if rtt.has_fresh_data != 0 { if let Some(mut tracker) = self.raw_data.get_mut(ip) { tracker.recent_rtt_data = rtt.rtt; @@ -183,6 +183,28 @@ impl ThroughputTracker { } } } + });*/ + + iterate_flows(&mut |key, data| { + // 6 is TCP, not expired + if key.ip_protocol == 6 && data.end_status == 0 { + if let Some(mut tracker) = self.raw_data.get_mut(&key.local_ip) { + let rtt_as_nanos = data.last_rtt[0]; + let data_as_ms_times_10 = rtt_as_nanos / 10000; + // Shift left + for i in 1..60 { + tracker.recent_rtt_data[i] = tracker.recent_rtt_data[i - 1]; + } + tracker.recent_rtt_data[0] = data_as_ms_times_10 as u32; + tracker.last_fresh_rtt_data_cycle = self_cycle; + if let Some(parents) = &tracker.network_json_parents { + let net_json = NETWORK_JSON.write().unwrap(); + if let Some(rtt) = tracker.median_latency() { + net_json.add_rtt_cycle(parents, rtt); + } + } + } + } }); } From df2b9dfe326d1151d036d59640eb223591f392b9 Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Tue, 27 Feb 2024 14:54:29 -0600 Subject: [PATCH 012/103] Integrate the flow data into the moving average system for RTTs, giving a better spread of results. --- src/rust/lqos_map_perf/src/main.rs | 4 +- src/rust/lqos_sys/src/bpf/common/flows.h | 23 +- src/rust/lqos_sys/src/bpf/common/tcp_rtt.h | 797 ------------------ src/rust/lqos_sys/src/bpf/lqos_kern.c | 28 - src/rust/lqos_sys/src/bpf_iterator.rs | 36 +- src/rust/lqos_sys/src/flowbee_data.rs | 2 - src/rust/lqos_sys/src/lib.rs | 2 - src/rust/lqos_sys/src/tcp_rtt.rs | 38 - .../src/throughput_tracker/tracking_data.rs | 45 +- 9 files changed, 37 insertions(+), 938 deletions(-) delete mode 100644 src/rust/lqos_sys/src/bpf/common/tcp_rtt.h delete mode 100644 src/rust/lqos_sys/src/tcp_rtt.rs diff --git a/src/rust/lqos_map_perf/src/main.rs b/src/rust/lqos_map_perf/src/main.rs index c31c1433..4baabc47 100644 --- a/src/rust/lqos_map_perf/src/main.rs +++ b/src/rust/lqos_map_perf/src/main.rs @@ -1,6 +1,6 @@ use std::time::Instant; -use lqos_sys::{rtt_for_each, throughput_for_each}; +use lqos_sys::{iterate_flows, throughput_for_each}; fn main() { println!("LibreQoS Map Performance Tool"); @@ -8,7 +8,7 @@ fn main() { // Test the RTT map let mut rtt_count = 0; let now = Instant::now(); - rtt_for_each(&mut |_rtt, _tracker| { + iterate_flows(&mut |_rtt, _tracker| { rtt_count += 1; }); let elapsed = now.elapsed(); diff --git a/src/rust/lqos_sys/src/bpf/common/flows.h b/src/rust/lqos_sys/src/bpf/common/flows.h index 05f52692..03e5135b 100644 --- a/src/rust/lqos_sys/src/bpf/common/flows.h +++ b/src/rust/lqos_sys/src/bpf/common/flows.h @@ -9,7 +9,7 @@ #define SECOND_IN_NANOS 1000000000 -#define TIMESTAMP_INTERVAL_NANOS 2000000000 +//#define TIMESTAMP_INTERVAL_NANOS 10000000 // Some helpers to make understanding direction easier // for readability. @@ -59,8 +59,6 @@ struct flow_data_t { __u32 tsecr[2]; // When did the timestamp change? __u64 ts_change_time[2]; - // When should we calculate RTT (to avoid flooding) - __u64 ts_calc_time[2]; // Most recent RTT __u64 last_rtt[2]; // Has the connection ended? @@ -97,7 +95,6 @@ static __always_inline struct flow_data_t new_flow_data( .tsval = { 0, 0 }, .tsecr = { 0, 0 }, .ts_change_time = { 0, 0 }, - .ts_calc_time = { now, now }, // Get a first number quickly .last_rtt = { 0, 0 }, .end_status = 0 }; @@ -220,7 +217,7 @@ static __always_inline void process_tcp( u_int64_t now ) { if ((BITCHECK(DIS_TCP_SYN) && !BITCHECK(DIS_TCP_ACK) && direction == TO_INTERNET) || - (BITCHECK(DIS_TCP_SYN) && BITCHECK(DIS_TCP_ACK) && direction == FROM_INTERNET)) { + (BITCHECK(DIS_TCP_SYN) && !BITCHECK(DIS_TCP_ACK) && direction == FROM_INTERNET)) { // A customer is requesting a new TCP connection. That means // we need to start tracking this flow. #ifdef VERBOSE @@ -273,11 +270,9 @@ static __always_inline void process_tcp( if (tsval != data->tsval[0] || tsecr != data->tsecr[0]) { if (tsval == data->tsecr[1]) { - if (now > data->ts_calc_time[0]) { - __u64 elapsed = now - data->ts_change_time[1]; - data->ts_calc_time[0] = now + TIMESTAMP_INTERVAL_NANOS; - data->last_rtt[0] = elapsed; - } + __u64 elapsed = now - data->ts_change_time[1]; + data->last_rtt[0] = elapsed; + //bpf_debug("[FLOWS][0] RTT: %llu", elapsed); } data->ts_change_time[0] = now; @@ -288,11 +283,9 @@ static __always_inline void process_tcp( if (tsval != data->tsval[1] || tsecr != data->tsecr[1]) { if (tsval == data->tsecr[0]) { - if (now > data->ts_calc_time[1]) { - __u64 elapsed = now - data->ts_change_time[0]; - data->ts_calc_time[1] = now + TIMESTAMP_INTERVAL_NANOS; - data->last_rtt[1] = elapsed; - } + __u64 elapsed = now - data->ts_change_time[0]; + data->last_rtt[1] = elapsed; + //bpf_debug("[FLOWS][1] RTT: %llu", elapsed); } data->ts_change_time[1] = now; diff --git a/src/rust/lqos_sys/src/bpf/common/tcp_rtt.h b/src/rust/lqos_sys/src/bpf/common/tcp_rtt.h deleted file mode 100644 index 8ec8e463..00000000 --- a/src/rust/lqos_sys/src/bpf/common/tcp_rtt.h +++ /dev/null @@ -1,797 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* -Based on the GPLv2 xdp-pping project -(https://github.com/xdp-project/bpf-examples/tree/master/pping) - -xdp_pping is based on the ideas in Dr. Kathleen Nichols' pping -utility: https://github.com/pollere/pping - and the papers around "Listening to Networks": -http://www.pollere.net/Pdfdocs/ListeningGoog.pdf - -My modifications are Copyright 2022, Herbert Wolverson -(Bracket Productions) -*/ -/* Shared structures between userspace and kernel space - */ - -/* Implementation of pping inside the kernel - * classifier - */ -#ifndef __TC_CLASSIFY_KERN_PPING_H -#define __TC_CLASSIFY_KERN_PPING_H - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "tc_classify_kern_pping_common.h" -#include "maximums.h" -#include "debug.h" -#include "ip_hash.h" -#include "dissector_tc.h" -#include "tcp_opts.h" - -#define MAX_MEMCMP_SIZE 128 - -struct parsing_context -{ - struct tcphdr *tcp; - __u64 now; - struct tc_dissector_t * dissector; - struct in6_addr * active_host; -}; - -/* Event type recorded for a packet flow */ -enum __attribute__((__packed__)) flow_event_type -{ - FLOW_EVENT_NONE, - FLOW_EVENT_OPENING, - FLOW_EVENT_CLOSING, - FLOW_EVENT_CLOSING_BOTH -}; - -enum __attribute__((__packed__)) connection_state -{ - CONNECTION_STATE_EMPTY, - CONNECTION_STATE_WAITOPEN, - CONNECTION_STATE_OPEN, - CONNECTION_STATE_CLOSED -}; - -struct flow_state -{ - __u64 last_timestamp; - __u32 last_id; - __u32 outstanding_timestamps; - enum connection_state conn_state; - __u8 reserved[2]; -}; - -/* - * Stores flowstate for both direction (src -> dst and dst -> src) of a flow - * - * Uses two named members instead of array of size 2 to avoid hassels with - * convincing verifier that member access is not out of bounds - */ -struct dual_flow_state -{ - struct flow_state dir1; - struct flow_state dir2; -}; - -/* - * Struct filled in by parse_packet_id. - * - * Note: As long as parse_packet_id is successful, the flow-parts of pid - * and reply_pid should be valid, regardless of value for pid_valid and - * reply_pid valid. The *pid_valid members are there to indicate that the - * identifier part of *pid are valid and can be used for timestamping/lookup. - * The reason for not keeping the flow parts as an entirely separate members - * is to save some performance by avoid doing a copy for lookup/insertion - * in the packet_ts map. - */ -struct packet_info -{ - __u64 time; // Arrival time of packet - //__u32 payload; // Size of packet data (excluding headers) - struct packet_id pid; // flow + identifier to timestamp (ex. TSval) - struct packet_id reply_pid; // rev. flow + identifier to match against (ex. TSecr) - //__u32 ingress_ifindex; // Interface packet arrived on (if is_ingress, otherwise not valid) - bool pid_flow_is_dfkey; // Used to determine which member of dualflow state to use for forward direction - bool pid_valid; // identifier can be used to timestamp packet - bool reply_pid_valid; // reply_identifier can be used to match packet - enum flow_event_type event_type; // flow event triggered by packet -}; - -/* - * Struct filled in by protocol id parsers (ex. parse_tcp_identifier) - */ -struct protocol_info -{ - __u32 pid; - __u32 reply_pid; - bool pid_valid; - bool reply_pid_valid; - enum flow_event_type event_type; -}; - - - -/* Map Definitions */ -struct -{ - __uint(type, BPF_MAP_TYPE_LRU_HASH); - __type(key, struct packet_id); - __type(value, __u64); - __uint(max_entries, MAX_PACKETS); - __uint(pinning, LIBBPF_PIN_BY_NAME); -// __uint(map_flags, BPF_F_NO_PREALLOC); -} packet_ts SEC(".maps"); - -struct -{ - __uint(type, BPF_MAP_TYPE_LRU_HASH); - __type(key, struct network_tuple); - __type(value, struct dual_flow_state); - __uint(max_entries, MAX_FLOWS); - __uint(pinning, LIBBPF_PIN_BY_NAME); -// __uint(map_flags, BPF_F_NO_PREALLOC); -} flow_state SEC(".maps"); - -struct -{ - __uint(type, BPF_MAP_TYPE_LRU_HASH); - __type(key, struct in6_addr); // Keyed to the IP address - __type(value, struct rotating_performance); - __uint(max_entries, IP_HASH_ENTRIES_MAX); - __uint(pinning, LIBBPF_PIN_BY_NAME); -// __uint(map_flags, BPF_F_NO_PREALLOC); - -} rtt_tracker SEC(".maps"); - -// Mask for IPv6 flowlabel + traffic class - used in fib lookup -#define IPV6_FLOWINFO_MASK __cpu_to_be32(0x0FFFFFFF) - -#ifndef AF_INET -#define AF_INET 2 -#endif -#ifndef AF_INET6 -#define AF_INET6 10 -#endif - -#define MAX_TCP_OPTIONS 10 - -/* Functions */ - -/* - * Convenience function for getting the corresponding reverse flow. - * PPing needs to keep track of flow in both directions, and sometimes - * also needs to reverse the flow to report the "correct" (consistent - * with Kathie's PPing) src and dest address. - */ -static __always_inline void reverse_flow( - struct network_tuple *dest, - struct network_tuple *src -) { - dest->ipv = src->ipv; - dest->proto = src->proto; - dest->saddr = src->daddr; - dest->daddr = src->saddr; - dest->reserved = 0; -} - -/* - * Can't seem to get __builtin_memcmp to work, so hacking my own - * - * Based on https://githubhot.com/repo/iovisor/bcc/issues/3559, - * __builtin_memcmp should work constant size but I still get the "failed to - * find BTF for extern" error. - */ -static __always_inline int my_memcmp( - const void *s1_, - const void *s2_, - __u32 size -) { - const __u8 *s1 = (const __u8 *)s1_, *s2 = (const __u8 *)s2_; - int i; - - for (i = 0; i < MAX_MEMCMP_SIZE && i < size; i++) - { - if (s1[i] != s2[i]) - return s1[i] > s2[i] ? 1 : -1; - } - - return 0; -} - -static __always_inline bool is_dualflow_key(struct network_tuple *flow) -{ - return my_memcmp(&flow->saddr, &flow->daddr, sizeof(flow->saddr)) <= 0; -} - -static __always_inline struct flow_state *fstate_from_dfkey( - struct dual_flow_state *df_state, - bool is_dfkey -) { - if (!df_state) { - return (struct flow_state *)NULL; - } - - return is_dfkey ? &df_state->dir1 : &df_state->dir2; -} - -/* - * Attempts to fetch an identifier for TCP packets, based on the TCP timestamp - * option. - * - * Will use the TSval as pid and TSecr as reply_pid, and the TCP source and dest - * as port numbers. - * - * If successful, tcph, sport, dport and proto_info will be set - * appropriately and 0 will be returned. - * On failure -1 will be returned (and arguments will not be set). - */ -static __always_inline int parse_tcp_identifier( - struct parsing_context *context, - __u16 *sport, - __u16 *dport, - struct protocol_info *proto_info -) { - if (parse_tcp_ts(context->tcp, context->dissector->end, &proto_info->pid, - &proto_info->reply_pid) < 0) { - return -1; // Possible TODO, fall back on seq/ack instead - } - - // Do not timestamp pure ACKs (no payload) - void *nh_pos = (context->tcp + 1) + (context->tcp->doff << 2); - proto_info->pid_valid = nh_pos - context->dissector->start < context->dissector->ctx->len || context->tcp->syn; - - // Do not match on non-ACKs (TSecr not valid) - proto_info->reply_pid_valid = context->tcp->ack; - - // Check if connection is opening/closing - if (context->tcp->rst) - { - proto_info->event_type = FLOW_EVENT_CLOSING_BOTH; - } - else if (context->tcp->fin) - { - proto_info->event_type = FLOW_EVENT_CLOSING; - } - else if (context->tcp->syn) - { - proto_info->event_type = FLOW_EVENT_OPENING; - } - else - { - proto_info->event_type = FLOW_EVENT_NONE; - } - - *sport = bpf_ntohs(context->tcp->dest); - *dport = bpf_ntohs(context->tcp->source); - - return 0; -} - -/* This is a bit of a hackjob from the original */ -static __always_inline int parse_packet_identifier( - struct parsing_context *context, - struct packet_info *p_info -) { - p_info->time = context->now; - if (context->dissector->eth_type == ETH_P_IP) - { - p_info->pid.flow.ipv = AF_INET; - p_info->pid.flow.saddr.ip = context->dissector->src_ip; - p_info->pid.flow.daddr.ip = context->dissector->dst_ip; - } - else if (context->dissector->eth_type == ETH_P_IPV6) - { - p_info->pid.flow.ipv = AF_INET6; - p_info->pid.flow.saddr.ip = context->dissector->src_ip; - p_info->pid.flow.daddr.ip = context->dissector->dst_ip; - } - else - { - bpf_debug("Unknown protocol"); - return -1; - } - //bpf_debug("IPs: %u %u", p_info->pid.flow.saddr.ip.in6_u.u6_addr32[3], p_info->pid.flow.daddr.ip.in6_u.u6_addr32[3]); - - struct protocol_info proto_info; - int err = parse_tcp_identifier(context, - &p_info->pid.flow.saddr.port, - &p_info->pid.flow.daddr.port, - &proto_info); - if (err) - return -1; - //bpf_debug("Ports: %u %u", p_info->pid.flow.saddr.port, p_info->pid.flow.daddr.port); - - // Sucessfully parsed packet identifier - fill in remaining members and return - p_info->pid.identifier = proto_info.pid; - p_info->pid_valid = proto_info.pid_valid; - p_info->reply_pid.identifier = proto_info.reply_pid; - p_info->reply_pid_valid = proto_info.reply_pid_valid; - p_info->event_type = proto_info.event_type; - - if (p_info->pid.flow.ipv == AF_INET && p_info->pid.flow.ipv == AF_INET6) { - bpf_debug("Unknown internal protocol"); - return -1; - } - - p_info->pid_flow_is_dfkey = is_dualflow_key(&p_info->pid.flow); - - reverse_flow(&p_info->reply_pid.flow, &p_info->pid.flow); - - return 0; -} - -static __always_inline struct network_tuple * -get_dualflow_key_from_packet(struct packet_info *p_info) -{ - return p_info->pid_flow_is_dfkey ? &p_info->pid.flow : &p_info->reply_pid.flow; -} - -/* - * Initilizes an "empty" flow state based on the forward direction of the - * current packet - */ -static __always_inline void init_flowstate(struct flow_state *f_state, - struct packet_info *p_info) -{ - f_state->conn_state = CONNECTION_STATE_WAITOPEN; - f_state->last_timestamp = p_info->time; -} - -static __always_inline void init_empty_flowstate(struct flow_state *f_state) -{ - f_state->conn_state = CONNECTION_STATE_EMPTY; -} - -static __always_inline struct flow_state * -get_flowstate_from_packet(struct dual_flow_state *df_state, - struct packet_info *p_info) -{ - return fstate_from_dfkey(df_state, p_info->pid_flow_is_dfkey); -} - -static __always_inline struct flow_state * -get_reverse_flowstate_from_packet(struct dual_flow_state *df_state, - struct packet_info *p_info) -{ - return fstate_from_dfkey(df_state, !p_info->pid_flow_is_dfkey); -} - -/* - * Initilize a new (assumed 0-initlized) dual flow state based on the current - * packet. - */ -static __always_inline void init_dualflow_state( - struct dual_flow_state *df_state, - struct packet_info *p_info -) { - struct flow_state *fw_state = - get_flowstate_from_packet(df_state, p_info); - struct flow_state *rev_state = - get_reverse_flowstate_from_packet(df_state, p_info); - - init_flowstate(fw_state, p_info); - init_empty_flowstate(rev_state); -} - -static __always_inline struct dual_flow_state * -create_dualflow_state( - struct parsing_context *ctx, - struct packet_info *p_info, - bool *new_flow -) { - struct network_tuple *key = get_dualflow_key_from_packet(p_info); - struct dual_flow_state new_state = {0}; - - init_dualflow_state(&new_state, p_info); - //new_state.dir1.tc_handle.handle = ctx->tc_handle; - //new_state.dir2.tc_handle.handle = ctx->tc_handle; - - if (bpf_map_update_elem(&flow_state, key, &new_state, BPF_NOEXIST) == - 0) - { - if (new_flow) - *new_flow = true; - } - else - { - return (struct dual_flow_state *)NULL; - } - - return (struct dual_flow_state *)bpf_map_lookup_elem(&flow_state, key); -} - -static __always_inline struct dual_flow_state * -lookup_or_create_dualflow_state( - struct parsing_context *ctx, - struct packet_info *p_info, - bool *new_flow -) { - struct dual_flow_state *df_state; - - struct network_tuple *key = get_dualflow_key_from_packet(p_info); - df_state = (struct dual_flow_state *)bpf_map_lookup_elem(&flow_state, key); - - if (df_state) - { - return df_state; - } - - // Only try to create new state if we have a valid pid - if (!p_info->pid_valid || p_info->event_type == FLOW_EVENT_CLOSING || - p_info->event_type == FLOW_EVENT_CLOSING_BOTH) - return (struct dual_flow_state *)NULL; - - return create_dualflow_state(ctx, p_info, new_flow); -} - -static __always_inline bool is_flowstate_active(struct flow_state *f_state) -{ - return f_state->conn_state != CONNECTION_STATE_EMPTY && - f_state->conn_state != CONNECTION_STATE_CLOSED; -} - -static __always_inline void update_forward_flowstate( - struct packet_info *p_info, - struct flow_state *f_state, - bool *new_flow -) { - // "Create" flowstate if it's empty - if (f_state->conn_state == CONNECTION_STATE_EMPTY && - p_info->pid_valid) - { - init_flowstate(f_state, p_info); - if (new_flow) - *new_flow = true; - } -} - -static __always_inline void update_reverse_flowstate( - void *ctx, - struct packet_info *p_info, - struct flow_state *f_state -) { - if (!is_flowstate_active(f_state)) - return; - - // First time we see reply for flow? - if (f_state->conn_state == CONNECTION_STATE_WAITOPEN && - p_info->event_type != FLOW_EVENT_CLOSING_BOTH) - { - f_state->conn_state = CONNECTION_STATE_OPEN; - } -} - -static __always_inline bool is_new_identifier( - struct packet_id *pid, - struct flow_state *f_state -) { - if (pid->flow.proto == IPPROTO_TCP) - /* TCP timestamps should be monotonically non-decreasing - * Check that pid > last_ts (considering wrap around) by - * checking 0 < pid - last_ts < 2^31 as specified by - * RFC7323 Section 5.2*/ - return pid->identifier - f_state->last_id > 0 && - pid->identifier - f_state->last_id < 1UL << 31; - - return pid->identifier != f_state->last_id; -} - -static __always_inline bool is_rate_limited(__u64 now, __u64 last_ts) -{ - if (now < last_ts) - return true; - - // Static rate limit - //return now - last_ts < DELAY_BETWEEN_RTT_REPORTS_MS * NS_PER_MS; - return false; // Max firehose drinking speed -} - -/* - * Attempt to create a timestamp-entry for packet p_info for flow in f_state - */ -static __always_inline void pping_timestamp_packet( - struct flow_state *f_state, - void *ctx, - struct packet_info *p_info, - bool new_flow -) { - if (!is_flowstate_active(f_state) || !p_info->pid_valid) - return; - - // Check if identfier is new - if (!new_flow && !is_new_identifier(&p_info->pid, f_state)) - return; - f_state->last_id = p_info->pid.identifier; - - // Check rate-limit - if (!new_flow && is_rate_limited(p_info->time, f_state->last_timestamp)) - return; - - /* - * Updates attempt at creating timestamp, even if creation of timestamp - * fails (due to map being full). This should make the competition for - * the next available map slot somewhat fairer between heavy and sparse - * flows. - */ - f_state->last_timestamp = p_info->time; - - if (bpf_map_update_elem(&packet_ts, &p_info->pid, &p_info->time, - BPF_NOEXIST) == 0) - __sync_fetch_and_add(&f_state->outstanding_timestamps, 1); -} - -/* - * Attempt to match packet in p_info with a timestamp from flow in f_state - */ -static __always_inline void pping_match_packet(struct flow_state *f_state, - struct packet_info *p_info, - struct in6_addr *active_host) -{ - __u64 *p_ts; - - if (!is_flowstate_active(f_state) || !p_info->reply_pid_valid) - return; - - if (f_state->outstanding_timestamps == 0) - return; - - p_ts = (__u64 *)bpf_map_lookup_elem(&packet_ts, &p_info->reply_pid); - if (!p_ts || p_info->time < *p_ts) - return; - - __u64 rtt = (p_info->time - *p_ts) / NS_PER_MS_TIMES_100; - bpf_debug("RTT (from TC): %u", p_info->time - *p_ts); - - // Delete timestamp entry as soon as RTT is calculated - if (bpf_map_delete_elem(&packet_ts, &p_info->reply_pid) == 0) - { - __sync_fetch_and_add(&f_state->outstanding_timestamps, -1); - } - - // Update the most performance map to include this data - struct rotating_performance *perf = - (struct rotating_performance *)bpf_map_lookup_elem( - &rtt_tracker, active_host); - if (perf == NULL) return; - __sync_fetch_and_add(&perf->next_entry, 1); - __u32 next_entry = perf->next_entry; - if (next_entry < MAX_PERF_SECONDS) { - __sync_fetch_and_add(&perf->rtt[next_entry], rtt); - perf->has_fresh_data = 1; - } -} - -static __always_inline void close_and_delete_flows( - void *ctx, - struct packet_info *p_info, - struct flow_state *fw_flow, - struct flow_state *rev_flow -) { - // Forward flow closing - if (p_info->event_type == FLOW_EVENT_CLOSING || - p_info->event_type == FLOW_EVENT_CLOSING_BOTH) - { - fw_flow->conn_state = CONNECTION_STATE_CLOSED; - } - - // Reverse flow closing - if (p_info->event_type == FLOW_EVENT_CLOSING_BOTH) - { - rev_flow->conn_state = CONNECTION_STATE_CLOSED; - } - - // Delete flowstate entry if neither flow is open anymore - if (!is_flowstate_active(fw_flow) && !is_flowstate_active(rev_flow)) - { - bpf_map_delete_elem(&flow_state, get_dualflow_key_from_packet(p_info)); - } -} - -/* - * Contains the actual pping logic that is applied after a packet has been - * parsed and deemed to contain some valid identifier. - * Looks up and updates flowstate (in both directions), tries to save a - * timestamp of the packet, tries to match packet against previous timestamps, - * calculates RTTs and pushes messages to userspace as appropriate. - */ -static __always_inline void pping_parsed_packet( - struct parsing_context *context, - struct packet_info *p_info -) { - struct dual_flow_state *df_state; - struct flow_state *fw_flow, *rev_flow; - bool new_flow = false; - - df_state = lookup_or_create_dualflow_state(context, p_info, &new_flow); - if (!df_state) - { - // bpf_debug("No flow state - stop"); - return; - } - - fw_flow = get_flowstate_from_packet(df_state, p_info); - update_forward_flowstate(p_info, fw_flow, &new_flow); - pping_timestamp_packet(fw_flow, context, p_info, new_flow); - - rev_flow = get_reverse_flowstate_from_packet(df_state, p_info); - update_reverse_flowstate(context, p_info, rev_flow); - pping_match_packet(rev_flow, p_info, context->active_host); - - close_and_delete_flows(context, p_info, fw_flow, rev_flow); -} - -/* Entry poing for running pping in the tc context */ -static __always_inline void tc_pping_start(struct parsing_context *context) -{ - // Check to see if we can store perf info. Bail if we've hit the limit. - // Copying occurs because otherwise the validator complains. - struct rotating_performance *perf = - (struct rotating_performance *)bpf_map_lookup_elem( - &rtt_tracker, context->active_host); - if (perf) { - if (perf->next_entry >= MAX_PERF_SECONDS-1) { - //bpf_debug("Flow has max samples. Not sampling further until next reset."); - //for (int i=0; irtt[i]); - //} - if (context->now > perf->recycle_time) { - // If the time-to-live for the sample is exceeded, recycle it to be - // usable again. - //bpf_debug("Recycling flow, %u > %u", context->now, perf->recycle_time); - __builtin_memset(perf->rtt, 0, sizeof(__u32) * MAX_PERF_SECONDS); - perf->recycle_time = context->now + RECYCLE_RTT_INTERVAL; - perf->next_entry = 0; - perf->has_fresh_data = 0; - } - return; - } - } - - // Populate the TCP Header - if (context->dissector->eth_type == ETH_P_IP) - { - // If its not TCP, stop - if (context->dissector->ip_header.iph + 1 > context->dissector->end) - return; // Stops the error checking from crashing - if (context->dissector->ip_header.iph->protocol != IPPROTO_TCP) - { - return; - } - context->tcp = (struct tcphdr *)((char *)context->dissector->ip_header.iph + (context->dissector->ip_header.iph->ihl * 4)); - } - else if (context->dissector->eth_type == ETH_P_IPV6) - { - // If its not TCP, stop - if (context->dissector->ip_header.ip6h + 1 > context->dissector->end) - return; // Stops the error checking from crashing - if (context->dissector->ip_header.ip6h->nexthdr != IPPROTO_TCP) - { - return; - } - context->tcp = (struct tcphdr *)(context->dissector->ip_header.ip6h + 1); - } - else - { - bpf_debug("UNKNOWN PROTOCOL TYPE"); - return; - } - - // Bail out if the packet is incomplete - if (context->tcp + 1 > context->dissector->end) - { - return; - } - - // If we didn't get a handle, make one - if (perf == NULL) - { - struct rotating_performance new_perf = {0}; - new_perf.recycle_time = context->now + RECYCLE_RTT_INTERVAL; - new_perf.has_fresh_data = 0; - if (bpf_map_update_elem(&rtt_tracker, context->active_host, &new_perf, BPF_NOEXIST) != 0) return; - } - - - // Start the parsing process - struct packet_info p_info = {0}; - if (parse_packet_identifier(context, &p_info) < 0) - { - //bpf_debug("Unable to parse packet identifier"); - return; - } - - pping_parsed_packet(context, &p_info); -} - -#endif /* __TC_CLASSIFY_KERN_PPING_H */ - -/* - -Understanding how this works (psuedocode): - -1. Parsing context is passed into tc_pping_start - 1. We lookup the rotating_performance map for the active host (local side). - 1. If it exists, we check to see if we are in "next entry" time window yet. - 2. If we are, and the current time exceeds the "recycle time", we reset the - performance map and set the "recycle time" to the current time plus the - recycle interval. We exit the function. - 2. We then check to see if the packet is TCP. If it is not, we exit the function. - 3. We then check to see if the packet is complete. If it is not, we exit the function. - 4. We then parse the packet identifier. If we are unable to parse the packet identifier, - we exit the function. (the `parse_packet_identifier` function). - 1. We set the packet time to the current time. - 2. We set the flow type to either AF_INET or AF_INET6. - 3. We set the source and destination IP addresses. - 4. We call `parse_tcp_identifier` to parse the TCP identifier. - 1. We use `parse_tcp_ts` to extract the TSval and TSecr from the TCP header. - These are stored in `proto_info.pid` and `proto_info.reply_pid`. - If we fail to parse the TCP identifier, we exit the function. - 2. We set "pid_valid" to true if the next header position is less than the end of the packet - or if the packet is a SYN packet. (i.e. ignore packets with no payload). - 3. We set "reply_pid_valid" to true if the packet is an ACK packet. - 4. RST events are set to "FLOW_EVENT_CLOSING_BOTH", FIN events are set to "FLOW_EVENT_CLOSING", - and SYN events are set to "FLOW_EVENT_OPENING". - 5. We set the source and destination ports. - 5. If we failed to parse the TCP identifier, we exit the function. - 6. We set "pid.identifier" to "proto_info.pid" and "reply_pid.identifier" to "proto_info.reply_pid". - 7. We set "pid_valid" to "proto_info.pid_valid" and "reply_pid_valid" to "proto_info.reply_pid_valid". - 8. We set "event_type" to "proto_info.event_type". - 9. We bail if the protocol is not AF_INET or AF_INET6. - 10. We set "pid_flow_is_dfkey" to "is_dualflow_key(&p_info->pid.flow)". - 1. Compare the source and destination addresses and return true when it - encounters a packet with the source address less than the destination address. - 2. This appears to be a way to sort the flow keys. - 11. We call `reverse_flow` with the reply flow and the forward flow. - 1.Reverse flow sets the destination to the source. - 5. We then call pping_parsed_packet with the parsing context and the packet info. - 1. We call `lookup_or_create_dualflow_state` and return it if we found one. - 1. We call `get_dualflow_key_from_packet` to get the flow key from the packet. - 1. - 2. If `pid_valid` is false, or the event type is "FLOW_EVENT_CLOSING" or "FLOW_EVENT_CLOSING_BOTH", - we return NULL. - 3. If we still haven't got a flow state, we call `create_dualflow_state` with the parsing context, - the packet info, and a pointer to new_flow. - 1. We call `get_dualflow_key_from_packet` to get the flow key from the packet. - 1. If "pid_flow_is_dfkey" we return pid.flow, otherwise reply_pid.flow. - 2. We call `init_dualflow_state` with the new state and the packet info. - 3. We create a new state in the flow state map (or return an existing one). - 4. We set `fw_flow` with `get_flowstate_from_packet` and the packet info. - 1. This in turns calls `fstate_from_dfkey` with the dual flow state and the packet info. - 1. If the packet flow is the dual flow key, we return dir1, otherwise dir2. - 5. We call `update_forward_flowstate` with the packet info. - 1. If the connection state is empty and the packet identifier is valid, we call `init_flowstate` - with the flow state and the packet info. - 1. `init_flowstate` sets the connection state to "WAITOPEN" and the last timestamp to the packet time. - 6. We call `pping_timestamp_packet` with the forward flow, the parsing context, the packet info, and new_flow. - 1. If the flow state is not active, or the packet identifier is not valid, we return. - 2. If the flow state is not new and the identifier is not new, we return. - 3. If the flow state is not new and the packet is rate limited, we return. - 4. We set the last timestamp to the packet time. - 7. We set `rev_flow` with `get_reverse_flowstate_from_packet` and the packet info. - 1. - 8. We call `update_reverse_flowstate` with the parsing context, the packet info, and the reverse flow. - 1. - 9. We call `pping_match_packet` with the reverse flow, the packet info, and the active host. - 1. If the flow state is not active, or the reply packet identifier is not valid, we return. - 2. If the flow state has no outstanding timestamps, we return. - 3. We call `bpf_map_lookup_elem` with the packet timestamp map and the reply packet identifier. - 1. If the lookup fails, or the packet time is less than the timestamp, we return. - 4. We calculate the round trip time. - 5. We call `bpf_map_delete_elem` with the packet timestamp map and the reply packet identifier. - 1. If the delete is successful, we decrement the outstanding timestamps. - 10. We call `close_and_delete_flows` with the parsing context, the packet info, the forward flow, and the reverse flow. - 1. -*/ \ No newline at end of file diff --git a/src/rust/lqos_sys/src/bpf/lqos_kern.c b/src/rust/lqos_sys/src/bpf/lqos_kern.c index 5abaa397..8653c819 100644 --- a/src/rust/lqos_sys/src/bpf/lqos_kern.c +++ b/src/rust/lqos_sys/src/bpf/lqos_kern.c @@ -15,7 +15,6 @@ #include "common/throughput.h" #include "common/lpm.h" #include "common/cpu_map.h" -#include "common/tcp_rtt.h" #include "common/bifrost.h" #include "common/heimdall.h" #include "common/flows.h" @@ -228,14 +227,6 @@ int tc_iphash_to_cpu(struct __sk_buff *skb) bpf_debug("(TC) effective direction: %d", effective_direction); #endif - // Call pping to obtain RTT times - struct parsing_context context = {0}; - context.now = bpf_ktime_get_ns(); - context.tcp = NULL; - context.dissector = &dissector; - context.active_host = &lookup_key.address; - //tc_pping_start(&context); // Commented out for comparison - if (ip_info && ip_info->tc_handle != 0) { // We found a matching mapped TC flow #ifdef VERBOSE @@ -375,25 +366,6 @@ int throughput_reader(struct bpf_iter__bpf_map_elem *ctx) return 0; } -SEC("iter/bpf_map_elem") -int rtt_reader(struct bpf_iter__bpf_map_elem *ctx) -{ - // The sequence file - struct seq_file *seq = ctx->meta->seq; - struct rotating_performance *counter = ctx->value; - struct in6_addr *ip = ctx->key; - - // Bail on end - if (counter == NULL || ip == NULL) { - return 0; - } - - //BPF_SEQ_PRINTF(seq, "%d %d\n", counter->next_entry, counter->rtt[0]); - bpf_seq_write(seq, ip, sizeof(struct in6_addr)); - bpf_seq_write(seq, counter, sizeof(struct rotating_performance)); - return 0; -} - SEC("iter/bpf_map_elem") int heimdall_reader(struct bpf_iter__bpf_map_elem *ctx) { // The sequence file diff --git a/src/rust/lqos_sys/src/bpf_iterator.rs b/src/rust/lqos_sys/src/bpf_iterator.rs index 27378e49..828c2436 100644 --- a/src/rust/lqos_sys/src/bpf_iterator.rs +++ b/src/rust/lqos_sys/src/bpf_iterator.rs @@ -1,5 +1,6 @@ use crate::{ - flowbee_data::{FlowbeeData, FlowbeeKey}, heimdall_data::{HeimdallData, HeimdallKey}, kernel_wrapper::BPF_SKELETON, lqos_kernel::bpf, HostCounter, RttTrackingEntry + flowbee_data::{FlowbeeData, FlowbeeKey}, heimdall_data::{HeimdallData, HeimdallKey}, + kernel_wrapper::BPF_SKELETON, lqos_kernel::bpf, HostCounter }; use lqos_utils::XdpIpAddress; use once_cell::sync::Lazy; @@ -192,10 +193,6 @@ static mut MAP_TRAFFIC: Lazy< Option>, > = Lazy::new(|| None); -static mut RTT_TRACKER: Lazy< - Option>, -> = Lazy::new(|| None); - static mut HEIMDALL_TRACKER: Lazy< Option>, > = Lazy::new(|| None); @@ -227,35 +224,6 @@ pub unsafe fn iterate_throughput( } } -pub unsafe fn iterate_rtt( - callback: &mut dyn FnMut(&XdpIpAddress, &RttTrackingEntry), -) { - if RTT_TRACKER.is_none() { - let lock = BPF_SKELETON.lock().unwrap(); - if let Some(skeleton) = lock.as_ref() { - let skeleton = skeleton.get_ptr(); - if let Ok(iter) = unsafe { - BpfMapIterator::new( - (*skeleton).progs.rtt_reader, - (*skeleton).maps.rtt_tracker, - ) - } { - *RTT_TRACKER = Some(iter); - } - } - } - - if let Some(iter) = RTT_TRACKER.as_mut() { - let _ = iter.for_each(callback); - } - - // TEMPORARY - let mut callback = |key: &FlowbeeKey, data: &FlowbeeData| { - println!("{:?} {:?}", key, data); - }; - iterate_flows(&mut callback); -} - /// Iterate through the heimdall map and call the callback for each entry. pub fn iterate_heimdall( callback: &mut dyn FnMut(&HeimdallKey, &[HeimdallData]), diff --git a/src/rust/lqos_sys/src/flowbee_data.rs b/src/rust/lqos_sys/src/flowbee_data.rs index cebf79b5..9feb13b6 100644 --- a/src/rust/lqos_sys/src/flowbee_data.rs +++ b/src/rust/lqos_sys/src/flowbee_data.rs @@ -55,8 +55,6 @@ pub struct FlowbeeData { pub tsecr: [u32; 2], /// When did the timestamp change? pub ts_change_time: [u64; 2], - /// When should we calculate RTT (to avoid flooding) - pub ts_calc_time: [u64; 2], /// Most recent RTT pub last_rtt: [u64; 2], /// Has the connection ended? diff --git a/src/rust/lqos_sys/src/lib.rs b/src/rust/lqos_sys/src/lib.rs index d2870968..92d6d71a 100644 --- a/src/rust/lqos_sys/src/lib.rs +++ b/src/rust/lqos_sys/src/lib.rs @@ -15,7 +15,6 @@ mod cpu_map; mod ip_mapping; mod kernel_wrapper; mod lqos_kernel; -mod tcp_rtt; mod throughput; mod linux; mod bpf_iterator; @@ -30,6 +29,5 @@ pub use ip_mapping::{ pub use kernel_wrapper::LibreQoSKernels; pub use linux::num_possible_cpus; pub use lqos_kernel::max_tracked_ips; -pub use tcp_rtt::{rtt_for_each, RttTrackingEntry}; pub use throughput::{throughput_for_each, HostCounter}; pub use bpf_iterator::{iterate_heimdall, iterate_flows}; \ No newline at end of file diff --git a/src/rust/lqos_sys/src/tcp_rtt.rs b/src/rust/lqos_sys/src/tcp_rtt.rs deleted file mode 100644 index 467440f1..00000000 --- a/src/rust/lqos_sys/src/tcp_rtt.rs +++ /dev/null @@ -1,38 +0,0 @@ -use lqos_utils::XdpIpAddress; -use zerocopy::FromBytes; -use crate::bpf_iterator::iterate_rtt; - -/// Entry from the XDP rtt_tracker map. -#[repr(C)] -#[derive(Clone, Copy, Debug, FromBytes)] -pub struct RttTrackingEntry { - /// Array containing TCP round-trip times. Convert to an `f32` and divide by `100.0` for actual numbers. - pub rtt: [u32; 60], - - /// Used internally by the XDP program to store the current position in the storage array. Do not modify. - next_entry: u32, - - /// Used internally by the XDP program to determine when it is time to recycle and reuse a record. Do not modify. - recycle_time: u64, - - /// Flag indicating that an entry has been updated recently (last 30 seconds by default). - pub has_fresh_data: u32, -} - -impl Default for RttTrackingEntry { - fn default() -> Self { - Self { rtt: [0; 60], next_entry: 0, recycle_time: 0, has_fresh_data: 0 } - } -} - -/// Queries the active XDP/TC programs for TCP round-trip time tracking -/// data (from the `rtt_tracker` pinned eBPF map). -/// -/// Only IP addresses facing the ISP Network side are tracked. -/// -/// Executes `callback` for each entry. -pub fn rtt_for_each(callback: &mut dyn FnMut(&XdpIpAddress, &RttTrackingEntry)) { - unsafe { - iterate_rtt(callback); - } -} diff --git a/src/rust/lqosd/src/throughput_tracker/tracking_data.rs b/src/rust/lqosd/src/throughput_tracker/tracking_data.rs index b8383ff6..ee701308 100644 --- a/src/rust/lqosd/src/throughput_tracker/tracking_data.rs +++ b/src/rust/lqosd/src/throughput_tracker/tracking_data.rs @@ -1,10 +1,10 @@ -use std::sync::atomic::AtomicU64; +use std::{sync::atomic::AtomicU64, time::Duration}; use crate::{shaped_devices_tracker::{SHAPED_DEVICES, NETWORK_JSON}, stats::{HIGH_WATERMARK_DOWN, HIGH_WATERMARK_UP}}; use super::{throughput_entry::ThroughputEntry, RETIRE_AFTER_SECONDS}; use dashmap::DashMap; use lqos_bus::TcHandle; use lqos_sys::{iterate_flows, throughput_for_each}; -use lqos_utils::XdpIpAddress; +use lqos_utils::{unix_time::time_since_boot, XdpIpAddress}; pub struct ThroughputTracker { pub(crate) cycle: AtomicU64, @@ -185,27 +185,32 @@ impl ThroughputTracker { } });*/ - iterate_flows(&mut |key, data| { - // 6 is TCP, not expired - if key.ip_protocol == 6 && data.end_status == 0 { - if let Some(mut tracker) = self.raw_data.get_mut(&key.local_ip) { - let rtt_as_nanos = data.last_rtt[0]; - let data_as_ms_times_10 = rtt_as_nanos / 10000; - // Shift left - for i in 1..60 { - tracker.recent_rtt_data[i] = tracker.recent_rtt_data[i - 1]; - } - tracker.recent_rtt_data[0] = data_as_ms_times_10 as u32; - tracker.last_fresh_rtt_data_cycle = self_cycle; - if let Some(parents) = &tracker.network_json_parents { - let net_json = NETWORK_JSON.write().unwrap(); - if let Some(rtt) = tracker.median_latency() { - net_json.add_rtt_cycle(parents, rtt); + if let Ok(now) = time_since_boot() { + let since_boot = Duration::from(now); + let expire = (since_boot - Duration::from_secs(60)).as_nanos() as u64; + iterate_flows(&mut |key, data| { + // 6 is TCP, not expired + if key.ip_protocol == 6 && data.last_seen > expire && (data.last_rtt[0] != 0 || data.last_rtt[1] != 0) { + if let Some(mut tracker) = self.raw_data.get_mut(&key.local_ip) { + // Shift left + for i in 1..60 { + tracker.recent_rtt_data[i] = tracker.recent_rtt_data[i - 1]; + } + tracker.recent_rtt_data[0] = u32::max( + (data.last_rtt[0] / 10000) as u32, + (data.last_rtt[1] / 10000) as u32, + ); + tracker.last_fresh_rtt_data_cycle = self_cycle; + if let Some(parents) = &tracker.network_json_parents { + let net_json = NETWORK_JSON.write().unwrap(); + if let Some(rtt) = tracker.median_latency() { + net_json.add_rtt_cycle(parents, rtt); + } } } } - } - }); + }); + } } #[inline(always)] From e98a1864ad72c8e775682490900cb1f2a3bd3344 Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Tue, 27 Feb 2024 16:22:22 -0600 Subject: [PATCH 013/103] Add a new api call - api/flows/dump_all - that lists all recent flows that have been collected. Intended for debugging. --- src/rust/lqos_bus/src/bus/request.rs | 4 ++ src/rust/lqos_bus/src/bus/response.rs | 5 +- src/rust/lqos_bus/src/ip_stats.rs | 28 ++++++++ src/rust/lqos_bus/src/lib.rs | 2 +- .../lqos_node_manager/src/flow_monitor.rs | 15 +++++ src/rust/lqos_node_manager/src/main.rs | 3 + src/rust/lqos_sys/src/bpf/common/flows.h | 9 +-- src/rust/lqosd/src/main.rs | 3 + .../lqosd/src/throughput_tracker/flow_data.rs | 7 ++ src/rust/lqosd/src/throughput_tracker/mod.rs | 29 +++++++- .../src/throughput_tracker/tracking_data.rs | 66 +++++++++---------- 11 files changed, 129 insertions(+), 42 deletions(-) create mode 100644 src/rust/lqos_node_manager/src/flow_monitor.rs create mode 100644 src/rust/lqosd/src/throughput_tracker/flow_data.rs diff --git a/src/rust/lqos_bus/src/bus/request.rs b/src/rust/lqos_bus/src/bus/request.rs index d86e8f3b..8fecf94f 100644 --- a/src/rust/lqos_bus/src/bus/request.rs +++ b/src/rust/lqos_bus/src/bus/request.rs @@ -152,6 +152,10 @@ pub enum BusRequest { /// display a "run bandwidht test" link. #[cfg(feature = "equinix_tests")] RequestLqosEquinixTest, + + /// Request a dump of all active flows. This can be a lot of data. + /// so this is intended for debugging + DumpActiveFlows, } /// Specific requests from the long-term stats system diff --git a/src/rust/lqos_bus/src/bus/response.rs b/src/rust/lqos_bus/src/bus/response.rs index 229d8be1..4400d184 100644 --- a/src/rust/lqos_bus/src/bus/response.rs +++ b/src/rust/lqos_bus/src/bus/response.rs @@ -1,6 +1,6 @@ use super::QueueStoreTransit; use crate::{ - ip_stats::PacketHeader, FlowTransport, IpMapping, IpStats, XdpPpingResult, + ip_stats::{FlowbeeData, PacketHeader}, FlowTransport, IpMapping, IpStats, XdpPpingResult, }; use lts_client::transport_data::{StatsTotals, StatsHost, StatsTreeNode}; use serde::{Deserialize, Serialize}; @@ -116,4 +116,7 @@ pub enum BusResponse { /// Long-term stats tree LongTermTree(Vec), + + /// All Active Flows (Not Recommended - Debug Use) + AllActiveFlows(Vec), } diff --git a/src/rust/lqos_bus/src/ip_stats.rs b/src/rust/lqos_bus/src/ip_stats.rs index 9e9cbcc6..0f8ceefe 100644 --- a/src/rust/lqos_bus/src/ip_stats.rs +++ b/src/rust/lqos_bus/src/ip_stats.rs @@ -143,4 +143,32 @@ pub struct PacketHeader { pub tcp_tsval: u32, /// TCP ECR val pub tcp_tsecr: u32, +} + +/// Flowbee: a complete flow data, combining key and data. +#[derive(Deserialize, Serialize, Debug, Clone, PartialEq)] +pub struct FlowbeeData { + /// Mapped `XdpIpAddress` source for the flow. + pub remote_ip: String, + /// Mapped `XdpIpAddress` destination for the flow + pub local_ip: String, + /// Source port number, or ICMP type. + pub src_port: u16, + /// Destination port number. + pub dst_port: u16, + /// IP protocol (see the Linux kernel!) + pub ip_protocol: u8, + /// Bytes transmitted + pub bytes_sent: [u64; 2], + /// Packets transmitted + pub packets_sent: [u64; 2], + /// Rate estimate + pub rate_estimate_bps: [u64; 2], + /// Retry Counters + pub retries: [u32; 2], + /// Most recent RTT + pub last_rtt: [u64; 2], + /// Has the connection ended? + /// 0 = Alive, 1 = FIN, 2 = RST + pub end_status: u32, } \ No newline at end of file diff --git a/src/rust/lqos_bus/src/lib.rs b/src/rust/lqos_bus/src/lib.rs index 88c95be1..fb18fb16 100644 --- a/src/rust/lqos_bus/src/lib.rs +++ b/src/rust/lqos_bus/src/lib.rs @@ -14,7 +14,7 @@ mod bus; mod ip_stats; pub use ip_stats::{ tos_parser, FlowProto, FlowTransport, IpMapping, IpStats, PacketHeader, - XdpPpingResult, + XdpPpingResult, FlowbeeData }; mod tc_handle; pub use bus::{ diff --git a/src/rust/lqos_node_manager/src/flow_monitor.rs b/src/rust/lqos_node_manager/src/flow_monitor.rs new file mode 100644 index 00000000..2b56a26f --- /dev/null +++ b/src/rust/lqos_node_manager/src/flow_monitor.rs @@ -0,0 +1,15 @@ +use lqos_bus::{bus_request, BusRequest, BusResponse, FlowbeeData}; +use rocket::serde::json::Json; +use crate::cache_control::NoCache; + +#[get("/api/flows/dump_all")] +pub async fn all_flows_debug_dump() -> NoCache>> { + let responses = + bus_request(vec![BusRequest::DumpActiveFlows]).await.unwrap(); + let result = match &responses[0] { + BusResponse::AllActiveFlows(flowbee) => flowbee.to_owned(), + _ => Vec::new(), + }; + + NoCache::new(Json(result)) +} \ No newline at end of file diff --git a/src/rust/lqos_node_manager/src/main.rs b/src/rust/lqos_node_manager/src/main.rs index d680eb58..77845aed 100644 --- a/src/rust/lqos_node_manager/src/main.rs +++ b/src/rust/lqos_node_manager/src/main.rs @@ -12,6 +12,7 @@ mod config_control; mod network_tree; mod queue_info; mod toasts; +mod flow_monitor; // Use JemAllocator only on supported platforms #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] @@ -109,6 +110,8 @@ fn rocket() -> _ { // Front page toast checks toasts::version_check, toasts::stats_check, + // Flowbee System + flow_monitor::all_flows_debug_dump, ], ); diff --git a/src/rust/lqos_sys/src/bpf/common/flows.h b/src/rust/lqos_sys/src/bpf/common/flows.h index 03e5135b..9d3bdd25 100644 --- a/src/rust/lqos_sys/src/bpf/common/flows.h +++ b/src/rust/lqos_sys/src/bpf/common/flows.h @@ -266,10 +266,11 @@ static __always_inline void process_tcp( u_int32_t tsval = dissector->tsval; u_int32_t tsecr = dissector->tsecr; if (BITCHECK(DIS_TCP_ACK) && tsval != 0) { + //bpf_debug("[FLOWS][%d] TSVAL: %u, TSECR: %u", direction, tsval, tsecr); if (direction == TO_INTERNET) { - if (tsval != data->tsval[0] || tsecr != data->tsecr[0]) { + if (tsval != data->tsval[0] && tsecr != data->tsecr[0]) { - if (tsval == data->tsecr[1]) { + if (tsval > data->tsecr[1]) { __u64 elapsed = now - data->ts_change_time[1]; data->last_rtt[0] = elapsed; //bpf_debug("[FLOWS][0] RTT: %llu", elapsed); @@ -280,9 +281,9 @@ static __always_inline void process_tcp( data->tsecr[0] = tsecr; } } else { - if (tsval != data->tsval[1] || tsecr != data->tsecr[1]) { + if (tsval != data->tsval[1] && tsecr != data->tsecr[1]) { - if (tsval == data->tsecr[0]) { + if (tsval > data->tsecr[0]) { __u64 elapsed = now - data->ts_change_time[0]; data->last_rtt[1] = elapsed; //bpf_debug("[FLOWS][1] RTT: %llu", elapsed); diff --git a/src/rust/lqosd/src/main.rs b/src/rust/lqosd/src/main.rs index d537d65a..00f6d213 100644 --- a/src/rust/lqosd/src/main.rs +++ b/src/rust/lqosd/src/main.rs @@ -223,6 +223,9 @@ fn handle_bus_requests( BusRequest::GetLongTermStats(StatsRequest::Tree) => { long_term_stats::get_stats_tree() } + BusRequest::DumpActiveFlows => { + throughput_tracker::dump_active_flows() + } }); } } diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data.rs b/src/rust/lqosd/src/throughput_tracker/flow_data.rs new file mode 100644 index 00000000..f8280deb --- /dev/null +++ b/src/rust/lqosd/src/throughput_tracker/flow_data.rs @@ -0,0 +1,7 @@ +use lqos_sys::flowbee_data::{FlowbeeData, FlowbeeKey}; +use once_cell::sync::Lazy; +use std::sync::Mutex; + +pub static ALL_FLOWS: Lazy>> = + Lazy::new(|| Mutex::new(Vec::with_capacity(128_000))); + diff --git a/src/rust/lqosd/src/throughput_tracker/mod.rs b/src/rust/lqosd/src/throughput_tracker/mod.rs index 15d46809..5375660b 100644 --- a/src/rust/lqosd/src/throughput_tracker/mod.rs +++ b/src/rust/lqosd/src/throughput_tracker/mod.rs @@ -1,6 +1,7 @@ mod heimdall_data; mod throughput_entry; mod tracking_data; +pub mod flow_data; use crate::{ shaped_devices_tracker::{NETWORK_JSON, STATS_NEEDS_NEW_SHAPED_DEVICES, SHAPED_DEVICES}, stats::TIME_TO_POLL_HOSTS, throughput_tracker::tracking_data::ThroughputTracker, long_term_stats::get_network_tree, @@ -16,6 +17,8 @@ use tokio::{ time::{Duration, Instant}, }; +use self::flow_data::ALL_FLOWS; + const RETIRE_AFTER_SECONDS: u64 = 30; pub static THROUGHPUT_TRACKER: Lazy = Lazy::new(ThroughputTracker::new); @@ -48,7 +51,7 @@ async fn throughput_task(interval_ms: u64, long_term_stats_tx: Sender BusResponse { ) .collect(); BusResponse::AllUnknownIps(result) + } + + /// For debugging: dump all active flows! + pub fn dump_active_flows() -> BusResponse { + let lock = ALL_FLOWS.lock().unwrap(); + let mut result = Vec::with_capacity(lock.len()); + + for (ip, flow) in lock.iter() { + result.push(lqos_bus::FlowbeeData { + remote_ip: ip.remote_ip.as_ip().to_string(), + local_ip: ip.local_ip.as_ip().to_string(), + src_port: ip.src_port, + dst_port: ip.dst_port, + ip_protocol: ip.ip_protocol, + bytes_sent: flow.bytes_sent, + packets_sent: flow.packets_sent, + rate_estimate_bps: flow.rate_estimate_bps, + retries: flow.retries, + last_rtt: flow.last_rtt, + end_status: flow.end_status, + }); + } + + BusResponse::AllActiveFlows(result) } \ No newline at end of file diff --git a/src/rust/lqosd/src/throughput_tracker/tracking_data.rs b/src/rust/lqosd/src/throughput_tracker/tracking_data.rs index ee701308..59e67983 100644 --- a/src/rust/lqosd/src/throughput_tracker/tracking_data.rs +++ b/src/rust/lqosd/src/throughput_tracker/tracking_data.rs @@ -1,6 +1,6 @@ use std::{sync::atomic::AtomicU64, time::Duration}; use crate::{shaped_devices_tracker::{SHAPED_DEVICES, NETWORK_JSON}, stats::{HIGH_WATERMARK_DOWN, HIGH_WATERMARK_UP}}; -use super::{throughput_entry::ThroughputEntry, RETIRE_AFTER_SECONDS}; +use super::{flow_data::ALL_FLOWS, throughput_entry::ThroughputEntry, RETIRE_AFTER_SECONDS}; use dashmap::DashMap; use lqos_bus::TcHandle; use lqos_sys::{iterate_flows, throughput_for_each}; @@ -168,48 +168,44 @@ impl ThroughputTracker { }); } - pub(crate) fn apply_rtt_data(&self) { + pub(crate) fn apply_flow_data(&self) { let self_cycle = self.cycle.load(std::sync::atomic::Ordering::Relaxed); - /*rtt_for_each(&mut |ip, rtt| { - if rtt.has_fresh_data != 0 { - if let Some(mut tracker) = self.raw_data.get_mut(ip) { - tracker.recent_rtt_data = rtt.rtt; - tracker.last_fresh_rtt_data_cycle = self_cycle; - if let Some(parents) = &tracker.network_json_parents { - let net_json = NETWORK_JSON.write().unwrap(); - if let Some(rtt) = tracker.median_latency() { - net_json.add_rtt_cycle(parents, rtt); - } - } - } - } - });*/ if let Ok(now) = time_since_boot() { let since_boot = Duration::from(now); let expire = (since_boot - Duration::from_secs(60)).as_nanos() as u64; - iterate_flows(&mut |key, data| { - // 6 is TCP, not expired - if key.ip_protocol == 6 && data.last_seen > expire && (data.last_rtt[0] != 0 || data.last_rtt[1] != 0) { - if let Some(mut tracker) = self.raw_data.get_mut(&key.local_ip) { - // Shift left - for i in 1..60 { - tracker.recent_rtt_data[i] = tracker.recent_rtt_data[i - 1]; - } - tracker.recent_rtt_data[0] = u32::max( - (data.last_rtt[0] / 10000) as u32, - (data.last_rtt[1] / 10000) as u32, - ); - tracker.last_fresh_rtt_data_cycle = self_cycle; - if let Some(parents) = &tracker.network_json_parents { - let net_json = NETWORK_JSON.write().unwrap(); - if let Some(rtt) = tracker.median_latency() { - net_json.add_rtt_cycle(parents, rtt); + if let Ok(mut flow_lock) = ALL_FLOWS.try_lock() { + flow_lock.clear(); // Remove all previous values + iterate_flows(&mut |key, data| { + if data.last_seen > expire { + // We have a valid flow, so it needs to be tracked + flow_lock.push((key.clone(), data.clone())); + + // TCP - we have RTT data? 6 is TCP + if key.ip_protocol == 6 && (data.last_rtt[0] != 0 || data.last_rtt[1] != 0) { + if let Some(mut tracker) = self.raw_data.get_mut(&key.local_ip) { + // Shift left + for i in 1..60 { + tracker.recent_rtt_data[i] = tracker.recent_rtt_data[i - 1]; + } + tracker.recent_rtt_data[0] = u32::max( + (data.last_rtt[0] / 10000) as u32, + (data.last_rtt[1] / 10000) as u32, + ); + tracker.last_fresh_rtt_data_cycle = self_cycle; + if let Some(parents) = &tracker.network_json_parents { + let net_json = NETWORK_JSON.write().unwrap(); + if let Some(rtt) = tracker.median_latency() { + net_json.add_rtt_cycle(parents, rtt); + } + } } } } - } - }); + }); + } else { + log::warn!("Failed to lock ALL_FLOWS"); + } } } From 29b0e078673d9db970c05a5982a11464551a7c6e Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Wed, 28 Feb 2024 09:29:20 -0600 Subject: [PATCH 014/103] Massive improvement in RTT tracking accuracy. --- src/rust/lqos_sys/src/bpf/common/flows.h | 12 ++++++------ src/rust/lqos_sys/src/bpf/lqos_kern.c | 11 +++++++++++ 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/src/rust/lqos_sys/src/bpf/common/flows.h b/src/rust/lqos_sys/src/bpf/common/flows.h index 9d3bdd25..bb408784 100644 --- a/src/rust/lqos_sys/src/bpf/common/flows.h +++ b/src/rust/lqos_sys/src/bpf/common/flows.h @@ -265,15 +265,15 @@ static __always_inline void process_tcp( // Timestamps to calculate RTT u_int32_t tsval = dissector->tsval; u_int32_t tsecr = dissector->tsecr; - if (BITCHECK(DIS_TCP_ACK) && tsval != 0) { + if (tsval != 0) { //bpf_debug("[FLOWS][%d] TSVAL: %u, TSECR: %u", direction, tsval, tsecr); if (direction == TO_INTERNET) { if (tsval != data->tsval[0] && tsecr != data->tsecr[0]) { - if (tsval > data->tsecr[1]) { + if (tsecr == data->tsval[1]) { __u64 elapsed = now - data->ts_change_time[1]; data->last_rtt[0] = elapsed; - //bpf_debug("[FLOWS][0] RTT: %llu", elapsed); + //bpf_debug("[FLOWS][%d] RTT: %llu", direction, elapsed); } data->ts_change_time[0] = now; @@ -281,12 +281,12 @@ static __always_inline void process_tcp( data->tsecr[0] = tsecr; } } else { - if (tsval != data->tsval[1] && tsecr != data->tsecr[1]) { + if (tsval != data->tsecr[1] && tsecr != data->tsval[1]) { - if (tsval > data->tsecr[0]) { + if (tsval == data->tsecr[0]) { __u64 elapsed = now - data->ts_change_time[0]; data->last_rtt[1] = elapsed; - //bpf_debug("[FLOWS][1] RTT: %llu", elapsed); + //bpf_debug("[FLOWS][%d] RTT: %llu", direction, elapsed); } data->ts_change_time[1] = now; diff --git a/src/rust/lqos_sys/src/bpf/lqos_kern.c b/src/rust/lqos_sys/src/bpf/lqos_kern.c index 8653c819..631805e3 100644 --- a/src/rust/lqos_sys/src/bpf/lqos_kern.c +++ b/src/rust/lqos_sys/src/bpf/lqos_kern.c @@ -15,6 +15,7 @@ #include "common/throughput.h" #include "common/lpm.h" #include "common/cpu_map.h" +//#include "common/tcp_rtt.h" #include "common/bifrost.h" #include "common/heimdall.h" #include "common/flows.h" @@ -227,6 +228,16 @@ int tc_iphash_to_cpu(struct __sk_buff *skb) bpf_debug("(TC) effective direction: %d", effective_direction); #endif +/* + // Call pping to obtain RTT times + struct parsing_context context = {0}; + context.now = bpf_ktime_get_ns(); + context.tcp = NULL; + context.dissector = &dissector; + context.active_host = &lookup_key.address; + tc_pping_start(&context); +*/ + if (ip_info && ip_info->tc_handle != 0) { // We found a matching mapped TC flow #ifdef VERBOSE From ff4c070b7d6ea38a4f2babf28e8c8cb9969b025c Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Wed, 28 Feb 2024 09:51:43 -0600 Subject: [PATCH 015/103] Improve the JSON transfer data by listing protocol explicitly for flowbee. --- src/rust/lqos_bus/src/ip_stats.rs | 23 +++++++++++++++++++- src/rust/lqos_bus/src/lib.rs | 2 +- src/rust/lqosd/src/throughput_tracker/mod.rs | 4 ++-- 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/src/rust/lqos_bus/src/ip_stats.rs b/src/rust/lqos_bus/src/ip_stats.rs index 0f8ceefe..b9a08cfa 100644 --- a/src/rust/lqos_bus/src/ip_stats.rs +++ b/src/rust/lqos_bus/src/ip_stats.rs @@ -145,6 +145,27 @@ pub struct PacketHeader { pub tcp_tsecr: u32, } +/// Flowbee protocol enumeration +#[derive(Deserialize, Serialize, Debug, Clone, PartialEq)] +pub enum FlowbeeProtocol { + /// TCP (type 6) + TCP, + /// UDP (type 17) + UDP, + /// ICMP (type 1) + ICMP +} + +impl From for FlowbeeProtocol { + fn from(value: u8) -> Self { + match value { + 6 => Self::TCP, + 17 => Self::UDP, + _ => Self::ICMP, + } + } +} + /// Flowbee: a complete flow data, combining key and data. #[derive(Deserialize, Serialize, Debug, Clone, PartialEq)] pub struct FlowbeeData { @@ -157,7 +178,7 @@ pub struct FlowbeeData { /// Destination port number. pub dst_port: u16, /// IP protocol (see the Linux kernel!) - pub ip_protocol: u8, + pub ip_protocol: FlowbeeProtocol, /// Bytes transmitted pub bytes_sent: [u64; 2], /// Packets transmitted diff --git a/src/rust/lqos_bus/src/lib.rs b/src/rust/lqos_bus/src/lib.rs index fb18fb16..0e2cac8f 100644 --- a/src/rust/lqos_bus/src/lib.rs +++ b/src/rust/lqos_bus/src/lib.rs @@ -14,7 +14,7 @@ mod bus; mod ip_stats; pub use ip_stats::{ tos_parser, FlowProto, FlowTransport, IpMapping, IpStats, PacketHeader, - XdpPpingResult, FlowbeeData + XdpPpingResult, FlowbeeData, FlowbeeProtocol }; mod tc_handle; pub use bus::{ diff --git a/src/rust/lqosd/src/throughput_tracker/mod.rs b/src/rust/lqosd/src/throughput_tracker/mod.rs index 5375660b..f2572dab 100644 --- a/src/rust/lqosd/src/throughput_tracker/mod.rs +++ b/src/rust/lqosd/src/throughput_tracker/mod.rs @@ -8,7 +8,7 @@ use crate::{ }; pub use heimdall_data::get_flow_stats; use log::{info, warn}; -use lqos_bus::{BusResponse, IpStats, TcHandle, XdpPpingResult}; +use lqos_bus::{BusResponse, FlowbeeProtocol, IpStats, TcHandle, XdpPpingResult}; use lqos_utils::{unix_time::time_since_boot, XdpIpAddress}; use lts_client::collector::{StatsUpdateMessage, ThroughputSummary, HostSummary}; use once_cell::sync::Lazy; @@ -458,7 +458,7 @@ pub fn all_unknown_ips() -> BusResponse { local_ip: ip.local_ip.as_ip().to_string(), src_port: ip.src_port, dst_port: ip.dst_port, - ip_protocol: ip.ip_protocol, + ip_protocol: FlowbeeProtocol::from(ip.ip_protocol), bytes_sent: flow.bytes_sent, packets_sent: flow.packets_sent, rate_estimate_bps: flow.rate_estimate_bps, From 22d56a71bff04ad69661190493e5e43dfcb9c177 Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Wed, 28 Feb 2024 09:56:59 -0600 Subject: [PATCH 016/103] Correct the ordering of src/dst ports in the flow map. --- src/rust/lqos_sys/src/bpf/common/flows.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/rust/lqos_sys/src/bpf/common/flows.h b/src/rust/lqos_sys/src/bpf/common/flows.h index bb408784..a75e0a7f 100644 --- a/src/rust/lqos_sys/src/bpf/common/flows.h +++ b/src/rust/lqos_sys/src/bpf/common/flows.h @@ -109,8 +109,8 @@ static __always_inline struct flow_key_t build_flow_key( return (struct flow_key_t) { .src = dissector->src_ip, .dst = dissector->dst_ip, - .src_port = bpf_htons(dissector->src_port), - .dst_port = bpf_htons(dissector->dst_port), + .src_port = bpf_htons(dissector->dst_port), + .dst_port = bpf_htons(dissector->src_port), .protocol = dissector->ip_protocol, .pad = 0, .pad1 = 0, @@ -120,8 +120,8 @@ static __always_inline struct flow_key_t build_flow_key( return (struct flow_key_t) { .src = dissector->dst_ip, .dst = dissector->src_ip, - .src_port = bpf_htons(dissector->dst_port), - .dst_port = bpf_htons(dissector->src_port), + .src_port = bpf_htons(dissector->src_port), + .dst_port = bpf_htons(dissector->dst_port), .protocol = dissector->ip_protocol, .pad = 0, .pad1 = 0, From 95b7c9ac5cc4b875001dbf8f86919a04a5ea2d9a Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Wed, 28 Feb 2024 10:32:02 -0600 Subject: [PATCH 017/103] No more second stage dissector. --- .../lqos_sys/src/bpf/common/dissector_tc.h | 223 ------------------ src/rust/lqos_sys/src/bpf/lqos_kern.c | 1 - 2 files changed, 224 deletions(-) delete mode 100644 src/rust/lqos_sys/src/bpf/common/dissector_tc.h diff --git a/src/rust/lqos_sys/src/bpf/common/dissector_tc.h b/src/rust/lqos_sys/src/bpf/common/dissector_tc.h deleted file mode 100644 index 5074085a..00000000 --- a/src/rust/lqos_sys/src/bpf/common/dissector_tc.h +++ /dev/null @@ -1,223 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include "../common/skb_safety.h" -#include "../common/debug.h" -#include "../common/ip_hash.h" -#include "dissector.h" - -// Structure holding packet dissection information (obtained at the TC level) -struct tc_dissector_t -{ - // Pointer to the SKB context. - struct __sk_buff *ctx; - // Pointer to the data start - void *start; - // Pointer to the data end - void *end; - // Pointer to the Ethernet header once obtained (NULL until then) - struct ethhdr *ethernet_header; - // Ethernet packet type, once obtained - __u16 eth_type; - // Start of layer-3 data, once obtained - __u32 l3offset; - // IP header (either v4 or v6), once obtained. - union iph_ptr ip_header; - // Source IP, encoded by `ip_hash.h` functions. - struct in6_addr src_ip; - // Destination IP, encoded by `ip_hash.h` functions. - struct in6_addr dst_ip; - // Current VLAN detected. - // TODO: This can probably be removed since the packet dissector - // now finds this. - __be16 current_vlan; -}; - -// Constructor for a dissector -// Connects XDP/TC SKB structure to a dissector structure. -// Arguments: -// * ctx - an xdp_md structure, passed from the entry-point -// * dissector - pointer to a local dissector object to be initialized -// -// Returns TRUE if all is good, FALSE if the process cannot be completed -static __always_inline bool tc_dissector_new( - struct __sk_buff *ctx, - struct tc_dissector_t *dissector -) { - dissector->ctx = ctx; - dissector->start = (void *)(long)ctx->data; - dissector->end = (void *)(long)ctx->data_end; - dissector->ethernet_header = (struct ethhdr *)NULL; - dissector->l3offset = 0; - dissector->current_vlan = bpf_htons(ctx->vlan_tci); - - // Check that there's room for an ethernet header - if SKB_OVERFLOW (dissector->start, dissector->end, ethhdr) - { - return false; - } - dissector->ethernet_header = (struct ethhdr *)dissector->start; - - return true; -} - -// Search a context to find the layer-3 offset. -static __always_inline bool tc_dissector_find_l3_offset( - struct tc_dissector_t *dissector -) { - if (dissector->ethernet_header == NULL) - { - bpf_debug("Ethernet header is NULL, still called offset check."); - return false; - } - __u32 offset = sizeof(struct ethhdr); - __u16 eth_type = bpf_ntohs(dissector->ethernet_header->h_proto); - - // Fast return for unwrapped IP - if (eth_type == ETH_P_IP || eth_type == ETH_P_IPV6) - { - dissector->eth_type = eth_type; - dissector->l3offset = offset; - return true; - } - - // Fast return for ARP or non-802.3 ether types - if (eth_type == ETH_P_ARP || eth_type < ETH_P_802_3_MIN) - { - return false; - } - - // Walk the headers until we find IP - __u8 i = 0; - while (i < 10 && !is_ip(eth_type)) - { - switch (eth_type) - { - // Read inside VLAN headers - case ETH_P_8021AD: - case ETH_P_8021Q: - { - if SKB_OVERFLOW_OFFSET (dissector->start, dissector->end, - offset, vlan_hdr) - { - return false; - } - //bpf_debug("TC Found VLAN"); - struct vlan_hdr *vlan = (struct vlan_hdr *) - (dissector->start + offset); - // Calculated from the SKB - //dissector->current_vlan = vlan->h_vlan_TCI; - eth_type = bpf_ntohs(vlan->h_vlan_encapsulated_proto); - offset += sizeof(struct vlan_hdr); - } - break; - - // Handle PPPoE - case ETH_P_PPP_SES: - { - if SKB_OVERFLOW_OFFSET (dissector->start, dissector->end, - offset, pppoe_proto) - { - return false; - } - struct pppoe_proto *pppoe = (struct pppoe_proto *) - (dissector->start + offset); - __u16 proto = bpf_ntohs(pppoe->proto); - switch (proto) - { - case PPP_IP: - eth_type = ETH_P_IP; - break; - case PPP_IPV6: - eth_type = ETH_P_IPV6; - break; - default: - return false; - } - offset += PPPOE_SES_HLEN; - } - break; - - // WARNING/TODO: Here be dragons; this needs testing. - case ETH_P_MPLS_UC: - case ETH_P_MPLS_MC: { - if SKB_OVERFLOW_OFFSET(dissector->start, dissector-> end, - offset, mpls_label) - { - return false; - } - struct mpls_label * mpls = (struct mpls_label *) - (dissector->start + offset); - // Are we at the bottom of the stack? - offset += 4; // 32-bits - if (mpls->entry & MPLS_LS_S_MASK) { - // We've hit the bottom - if SKB_OVERFLOW_OFFSET(dissector->start, dissector->end, - offset, iphdr) - { - return false; - } - struct iphdr * iph = (struct iphdr *)(dissector->start + offset); - switch (iph->version) { - case 4: eth_type = ETH_P_IP; break; - case 6: eth_type = ETH_P_IPV6; break; - default: return false; - } - } - } break; - - // We found something we don't know how to handle - bail out - default: - return false; - } - ++i; - } - - dissector->l3offset = offset; - dissector->eth_type = eth_type; - return true; -} - -// Locate the IP header if present -static __always_inline bool tc_dissector_find_ip_header( - struct tc_dissector_t *dissector -) { - switch (dissector->eth_type) - { - case ETH_P_IP: - { - if (dissector->start + dissector->l3offset + - sizeof(struct iphdr) > dissector->end) { - return false; - } - dissector->ip_header.iph = dissector->start + dissector->l3offset; - if (dissector->ip_header.iph + 1 > dissector->end) { - return false; - } - encode_ipv4(dissector->ip_header.iph->saddr, &dissector->src_ip); - encode_ipv4(dissector->ip_header.iph->daddr, &dissector->dst_ip); - return true; - } - break; - case ETH_P_IPV6: - { - if (dissector->start + dissector->l3offset + - sizeof(struct ipv6hdr) > dissector->end) { - return false; - } - dissector->ip_header.ip6h = dissector->start + dissector->l3offset; - if (dissector->ip_header.iph + 1 > dissector->end) - return false; - encode_ipv6(&dissector->ip_header.ip6h->saddr, &dissector->src_ip); - encode_ipv6(&dissector->ip_header.ip6h->daddr, &dissector->dst_ip); - return true; - } - break; - default: - return false; - } -} \ No newline at end of file diff --git a/src/rust/lqos_sys/src/bpf/lqos_kern.c b/src/rust/lqos_sys/src/bpf/lqos_kern.c index 631805e3..983461a3 100644 --- a/src/rust/lqos_sys/src/bpf/lqos_kern.c +++ b/src/rust/lqos_sys/src/bpf/lqos_kern.c @@ -10,7 +10,6 @@ #include /* TC_H_MAJ + TC_H_MIN */ #include "common/debug.h" #include "common/dissector.h" -#include "common/dissector_tc.h" #include "common/maximums.h" #include "common/throughput.h" #include "common/lpm.h" From 393c3adc2a1e28fcc68f0186d1b691b5df4b3f20 Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Wed, 28 Feb 2024 12:52:04 -0600 Subject: [PATCH 018/103] * Restore the tc_dissector, I didn't mean to remove that. * Add SKB metadata support for pass-through and eliding the second LPM check if we can. Checks at run-time to see if it is possible. --- .../lqos_sys/src/bpf/common/dissector_tc.h | 223 ++++++++++++++++++ src/rust/lqos_sys/src/bpf/common/flows.h | 2 +- src/rust/lqos_sys/src/bpf/common/lpm.h | 1 - src/rust/lqos_sys/src/bpf/lqos_kern.c | 77 +++++- src/rust/lqos_sys/src/lqos_kernel.rs | 3 +- 5 files changed, 302 insertions(+), 4 deletions(-) create mode 100644 src/rust/lqos_sys/src/bpf/common/dissector_tc.h diff --git a/src/rust/lqos_sys/src/bpf/common/dissector_tc.h b/src/rust/lqos_sys/src/bpf/common/dissector_tc.h new file mode 100644 index 00000000..5074085a --- /dev/null +++ b/src/rust/lqos_sys/src/bpf/common/dissector_tc.h @@ -0,0 +1,223 @@ +#pragma once + +#include +#include +#include +#include +#include +#include "../common/skb_safety.h" +#include "../common/debug.h" +#include "../common/ip_hash.h" +#include "dissector.h" + +// Structure holding packet dissection information (obtained at the TC level) +struct tc_dissector_t +{ + // Pointer to the SKB context. + struct __sk_buff *ctx; + // Pointer to the data start + void *start; + // Pointer to the data end + void *end; + // Pointer to the Ethernet header once obtained (NULL until then) + struct ethhdr *ethernet_header; + // Ethernet packet type, once obtained + __u16 eth_type; + // Start of layer-3 data, once obtained + __u32 l3offset; + // IP header (either v4 or v6), once obtained. + union iph_ptr ip_header; + // Source IP, encoded by `ip_hash.h` functions. + struct in6_addr src_ip; + // Destination IP, encoded by `ip_hash.h` functions. + struct in6_addr dst_ip; + // Current VLAN detected. + // TODO: This can probably be removed since the packet dissector + // now finds this. + __be16 current_vlan; +}; + +// Constructor for a dissector +// Connects XDP/TC SKB structure to a dissector structure. +// Arguments: +// * ctx - an xdp_md structure, passed from the entry-point +// * dissector - pointer to a local dissector object to be initialized +// +// Returns TRUE if all is good, FALSE if the process cannot be completed +static __always_inline bool tc_dissector_new( + struct __sk_buff *ctx, + struct tc_dissector_t *dissector +) { + dissector->ctx = ctx; + dissector->start = (void *)(long)ctx->data; + dissector->end = (void *)(long)ctx->data_end; + dissector->ethernet_header = (struct ethhdr *)NULL; + dissector->l3offset = 0; + dissector->current_vlan = bpf_htons(ctx->vlan_tci); + + // Check that there's room for an ethernet header + if SKB_OVERFLOW (dissector->start, dissector->end, ethhdr) + { + return false; + } + dissector->ethernet_header = (struct ethhdr *)dissector->start; + + return true; +} + +// Search a context to find the layer-3 offset. +static __always_inline bool tc_dissector_find_l3_offset( + struct tc_dissector_t *dissector +) { + if (dissector->ethernet_header == NULL) + { + bpf_debug("Ethernet header is NULL, still called offset check."); + return false; + } + __u32 offset = sizeof(struct ethhdr); + __u16 eth_type = bpf_ntohs(dissector->ethernet_header->h_proto); + + // Fast return for unwrapped IP + if (eth_type == ETH_P_IP || eth_type == ETH_P_IPV6) + { + dissector->eth_type = eth_type; + dissector->l3offset = offset; + return true; + } + + // Fast return for ARP or non-802.3 ether types + if (eth_type == ETH_P_ARP || eth_type < ETH_P_802_3_MIN) + { + return false; + } + + // Walk the headers until we find IP + __u8 i = 0; + while (i < 10 && !is_ip(eth_type)) + { + switch (eth_type) + { + // Read inside VLAN headers + case ETH_P_8021AD: + case ETH_P_8021Q: + { + if SKB_OVERFLOW_OFFSET (dissector->start, dissector->end, + offset, vlan_hdr) + { + return false; + } + //bpf_debug("TC Found VLAN"); + struct vlan_hdr *vlan = (struct vlan_hdr *) + (dissector->start + offset); + // Calculated from the SKB + //dissector->current_vlan = vlan->h_vlan_TCI; + eth_type = bpf_ntohs(vlan->h_vlan_encapsulated_proto); + offset += sizeof(struct vlan_hdr); + } + break; + + // Handle PPPoE + case ETH_P_PPP_SES: + { + if SKB_OVERFLOW_OFFSET (dissector->start, dissector->end, + offset, pppoe_proto) + { + return false; + } + struct pppoe_proto *pppoe = (struct pppoe_proto *) + (dissector->start + offset); + __u16 proto = bpf_ntohs(pppoe->proto); + switch (proto) + { + case PPP_IP: + eth_type = ETH_P_IP; + break; + case PPP_IPV6: + eth_type = ETH_P_IPV6; + break; + default: + return false; + } + offset += PPPOE_SES_HLEN; + } + break; + + // WARNING/TODO: Here be dragons; this needs testing. + case ETH_P_MPLS_UC: + case ETH_P_MPLS_MC: { + if SKB_OVERFLOW_OFFSET(dissector->start, dissector-> end, + offset, mpls_label) + { + return false; + } + struct mpls_label * mpls = (struct mpls_label *) + (dissector->start + offset); + // Are we at the bottom of the stack? + offset += 4; // 32-bits + if (mpls->entry & MPLS_LS_S_MASK) { + // We've hit the bottom + if SKB_OVERFLOW_OFFSET(dissector->start, dissector->end, + offset, iphdr) + { + return false; + } + struct iphdr * iph = (struct iphdr *)(dissector->start + offset); + switch (iph->version) { + case 4: eth_type = ETH_P_IP; break; + case 6: eth_type = ETH_P_IPV6; break; + default: return false; + } + } + } break; + + // We found something we don't know how to handle - bail out + default: + return false; + } + ++i; + } + + dissector->l3offset = offset; + dissector->eth_type = eth_type; + return true; +} + +// Locate the IP header if present +static __always_inline bool tc_dissector_find_ip_header( + struct tc_dissector_t *dissector +) { + switch (dissector->eth_type) + { + case ETH_P_IP: + { + if (dissector->start + dissector->l3offset + + sizeof(struct iphdr) > dissector->end) { + return false; + } + dissector->ip_header.iph = dissector->start + dissector->l3offset; + if (dissector->ip_header.iph + 1 > dissector->end) { + return false; + } + encode_ipv4(dissector->ip_header.iph->saddr, &dissector->src_ip); + encode_ipv4(dissector->ip_header.iph->daddr, &dissector->dst_ip); + return true; + } + break; + case ETH_P_IPV6: + { + if (dissector->start + dissector->l3offset + + sizeof(struct ipv6hdr) > dissector->end) { + return false; + } + dissector->ip_header.ip6h = dissector->start + dissector->l3offset; + if (dissector->ip_header.iph + 1 > dissector->end) + return false; + encode_ipv6(&dissector->ip_header.ip6h->saddr, &dissector->src_ip); + encode_ipv6(&dissector->ip_header.ip6h->daddr, &dissector->dst_ip); + return true; + } + break; + default: + return false; + } +} \ No newline at end of file diff --git a/src/rust/lqos_sys/src/bpf/common/flows.h b/src/rust/lqos_sys/src/bpf/common/flows.h index a75e0a7f..bbca5ebd 100644 --- a/src/rust/lqos_sys/src/bpf/common/flows.h +++ b/src/rust/lqos_sys/src/bpf/common/flows.h @@ -70,7 +70,7 @@ struct flow_data_t { // This is pinned and not per-CPU, because half the data appears on either side of the bridge. struct { - __uint(type, BPF_MAP_TYPE_LRU_HASH); + __uint(type, BPF_MAP_TYPE_LRU_HASH); // TODO: BPF_MAP_TYPE_LRU_PERCPU_HASH? __type(key, struct flow_key_t); __type(value, struct flow_data_t); __uint(max_entries, MAX_FLOWS); diff --git a/src/rust/lqos_sys/src/bpf/common/lpm.h b/src/rust/lqos_sys/src/bpf/common/lpm.h index 1a8296ed..61200695 100644 --- a/src/rust/lqos_sys/src/bpf/common/lpm.h +++ b/src/rust/lqos_sys/src/bpf/common/lpm.h @@ -11,7 +11,6 @@ #include "maximums.h" #include "debug.h" #include "dissector.h" -#include "dissector_tc.h" // Data structure used for map_ip_hash struct ip_hash_info { diff --git a/src/rust/lqos_sys/src/bpf/lqos_kern.c b/src/rust/lqos_sys/src/bpf/lqos_kern.c index 983461a3..c98fe0e9 100644 --- a/src/rust/lqos_sys/src/bpf/lqos_kern.c +++ b/src/rust/lqos_sys/src/bpf/lqos_kern.c @@ -10,6 +10,7 @@ #include /* TC_H_MAJ + TC_H_MIN */ #include "common/debug.h" #include "common/dissector.h" +#include "common/dissector_tc.h" #include "common/maximums.h" #include "common/throughput.h" #include "common/lpm.h" @@ -54,6 +55,18 @@ int direction = 255; __be16 internet_vlan = 0; // Note: turn these into big-endian __be16 isp_vlan = 0; +// Helpers from https://elixir.bootlin.com/linux/v5.4.153/source/tools/testing/selftests/bpf/progs/test_xdp_meta.c#L37 +#define __round_mask(x, y) ((__typeof__(x))((y) - 1)) +#define round_up(x, y) ((((x) - 1) | __round_mask(x, y)) + 1) +#define ctx_ptr(ctx, mem) (void *)(unsigned long)ctx->mem + + +// Structure for passing metadata from XDP to TC +struct metadata_pass_t { + __u32 tc_handle; + __u32 cpu; +}; + // XDP Entry Point SEC("xdp") int xdp_prog(struct xdp_md *ctx) @@ -138,7 +151,6 @@ int xdp_prog(struct xdp_md *ctx) tc_handle ); - // Send on its way if (tc_handle != 0) { // Send data to Heimdall @@ -159,6 +171,32 @@ int xdp_prog(struct xdp_md *ctx) } __u32 cpu_dest = *cpu_lookup; + // Experimental: can we adjust the metadata? + int ret = bpf_xdp_adjust_meta(ctx, -round_up(ETH_ALEN, sizeof(struct metadata_pass_t))); + if (ret < 0) { + #ifdef VERBOSE + bpf_debug("Error: unable to adjust metadata, ret: %d", ret); + #endif + } else { + #ifdef VERBOSE + bpf_debug("Metadata adjusted, ret: %d", ret); + #endif + + __u8 *data_meta = ctx_ptr(ctx, data_meta); + __u8 *data_end = ctx_ptr(ctx, data_end); + __u8 *data = ctx_ptr(ctx, data); + + if (data + ETH_ALEN > data_end || data_meta + round_up(ETH_ALEN, 4) > data) { + bpf_debug("Bounds error on the metadata"); + return XDP_DROP; + } + struct metadata_pass_t meta = (struct metadata_pass_t) { + .tc_handle = tc_handle, + .cpu = cpu + }; + __builtin_memcpy(data_meta, &meta, sizeof(struct metadata_pass_t)); + } + // Redirect based on CPU #ifdef VERBOSE bpf_debug("(XDP) Zooming to CPU: %u", cpu_dest); @@ -203,6 +241,43 @@ int tc_iphash_to_cpu(struct __sk_buff *skb) } } // Scope to remove tcq_cfg when done with it + // Do we have metadata? + if (skb->data != skb->data_meta) { + #ifdef VERBOSE + bpf_debug("(TC) Metadata is present"); + #endif + int size = skb->data_meta - skb->data; + if (size < sizeof(struct metadata_pass_t)) { + bpf_debug("(TC) Metadata too small"); + } else { + // Use it here + __u8 *data_meta = ctx_ptr(skb, data_meta); + __u8 *data_end = ctx_ptr(skb, data_end); + __u8 *data = ctx_ptr(skb, data); + + if (data + ETH_ALEN > data_end || data_meta + round_up(ETH_ALEN, 4) > data) + { + bpf_debug("(TC) Bounds error on the metadata"); + return TC_ACT_SHOT; + } + + struct metadata_pass_t *meta = (struct metadata_pass_t *)data_meta; + #ifdef VERBOSE + bpf_debug("(TC) Metadata: CPU: %u, TC: %u", meta->cpu, meta->tc_handle); + #endif + if (meta->tc_handle != 0) { + // We can short-circuit the redirect and bypass the second + // LPM lookup! Yay! + skb->priority = meta->tc_handle; + return TC_ACT_OK; + } + } + } else { + #ifdef VERBOSE + bpf_debug("(TC) No metadata present"); + #endif + } + // Once again parse the packet // Note that we are returning OK on failure, which is a little odd. // The reasoning being that if its a packet we don't know how to handle, diff --git a/src/rust/lqos_sys/src/lqos_kernel.rs b/src/rust/lqos_sys/src/lqos_kernel.rs index 66746369..11805c93 100644 --- a/src/rust/lqos_sys/src/lqos_kernel.rs +++ b/src/rust/lqos_sys/src/lqos_kernel.rs @@ -99,7 +99,8 @@ unsafe fn open_kernel() -> Result<*mut bpf::lqos_kern> { unsafe fn load_kernel(skeleton: *mut bpf::lqos_kern) -> Result<()> { let error = bpf::lqos_kern_load(skeleton); if error != 0 { - Err(Error::msg("Unable to load the XDP/TC kernel")) + let error = format!("Unable to load the XDP/TC kernel ({error})"); + Err(Error::msg(error)) } else { Ok(()) } From 751671587478eb55d750c68926e29305bd159c06 Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Wed, 28 Feb 2024 14:44:20 -0600 Subject: [PATCH 019/103] Add a real-time flow counter to the display and API. --- src/rust/lqos_bus/src/bus/request.rs | 3 +++ src/rust/lqos_bus/src/bus/response.rs | 3 +++ src/rust/lqos_node_manager/src/flow_monitor.rs | 12 ++++++++++++ src/rust/lqos_node_manager/src/main.rs | 1 + src/rust/lqos_node_manager/static/main.html | 10 +++++++++- src/rust/lqosd/src/main.rs | 3 +++ src/rust/lqosd/src/throughput_tracker/mod.rs | 6 ++++++ 7 files changed, 37 insertions(+), 1 deletion(-) diff --git a/src/rust/lqos_bus/src/bus/request.rs b/src/rust/lqos_bus/src/bus/request.rs index 8fecf94f..b477d653 100644 --- a/src/rust/lqos_bus/src/bus/request.rs +++ b/src/rust/lqos_bus/src/bus/request.rs @@ -156,6 +156,9 @@ pub enum BusRequest { /// Request a dump of all active flows. This can be a lot of data. /// so this is intended for debugging DumpActiveFlows, + + /// Count the nubmer of active flows. + CountActiveFlows, } /// Specific requests from the long-term stats system diff --git a/src/rust/lqos_bus/src/bus/response.rs b/src/rust/lqos_bus/src/bus/response.rs index 4400d184..a9c30219 100644 --- a/src/rust/lqos_bus/src/bus/response.rs +++ b/src/rust/lqos_bus/src/bus/response.rs @@ -119,4 +119,7 @@ pub enum BusResponse { /// All Active Flows (Not Recommended - Debug Use) AllActiveFlows(Vec), + + /// Count active flows + CountActiveFlows(u64), } diff --git a/src/rust/lqos_node_manager/src/flow_monitor.rs b/src/rust/lqos_node_manager/src/flow_monitor.rs index 2b56a26f..e4dd1c56 100644 --- a/src/rust/lqos_node_manager/src/flow_monitor.rs +++ b/src/rust/lqos_node_manager/src/flow_monitor.rs @@ -11,5 +11,17 @@ pub async fn all_flows_debug_dump() -> NoCache>> { _ => Vec::new(), }; + NoCache::new(Json(result)) +} + +#[get("/api/flows/count")] +pub async fn count_flows() -> NoCache> { + let responses = + bus_request(vec![BusRequest::CountActiveFlows]).await.unwrap(); + let result = match &responses[0] { + BusResponse::CountActiveFlows(count) => *count, + _ => 0, + }; + NoCache::new(Json(result)) } \ No newline at end of file diff --git a/src/rust/lqos_node_manager/src/main.rs b/src/rust/lqos_node_manager/src/main.rs index 77845aed..2b8ba55d 100644 --- a/src/rust/lqos_node_manager/src/main.rs +++ b/src/rust/lqos_node_manager/src/main.rs @@ -112,6 +112,7 @@ fn rocket() -> _ { toasts::stats_check, // Flowbee System flow_monitor::all_flows_debug_dump, + flow_monitor::count_flows, ], ); diff --git a/src/rust/lqos_node_manager/static/main.html b/src/rust/lqos_node_manager/static/main.html index 61366e25..e1f7b1d3 100644 --- a/src/rust/lqos_node_manager/static/main.html +++ b/src/rust/lqos_node_manager/static/main.html @@ -66,7 +66,7 @@
-
Current Throughput
+
Current Throughput ?
@@ -190,6 +190,12 @@ }); } + function updateFlowCounter() { + $.get("/api/flows/count", (data) => { + $("#flowCount").text(data + " flows"); + }); + } + function updateCurrentThroughput() { msgPackGet("/api/current_throughput", (tp) => { const bits = 0; @@ -316,6 +322,7 @@ function OneSecondCadence() { updateCurrentThroughput(); + updateFlowCounter(); updateSiteFunnel(); if (tickCount % 5 == 0) { @@ -342,6 +349,7 @@ colorReloadButton(); fillCurrentThroughput(); + updateFlowCounter(); updateCpu(); updateRam(); updateTop10(); diff --git a/src/rust/lqosd/src/main.rs b/src/rust/lqosd/src/main.rs index 00f6d213..e4d455b1 100644 --- a/src/rust/lqosd/src/main.rs +++ b/src/rust/lqosd/src/main.rs @@ -226,6 +226,9 @@ fn handle_bus_requests( BusRequest::DumpActiveFlows => { throughput_tracker::dump_active_flows() } + BusRequest::CountActiveFlows => { + throughput_tracker::count_active_flows() + } }); } } diff --git a/src/rust/lqosd/src/throughput_tracker/mod.rs b/src/rust/lqosd/src/throughput_tracker/mod.rs index f2572dab..2c83a6c1 100644 --- a/src/rust/lqosd/src/throughput_tracker/mod.rs +++ b/src/rust/lqosd/src/throughput_tracker/mod.rs @@ -469,4 +469,10 @@ pub fn all_unknown_ips() -> BusResponse { } BusResponse::AllActiveFlows(result) + } + + /// Count active flows + pub fn count_active_flows() -> BusResponse { + let lock = ALL_FLOWS.lock().unwrap(); + BusResponse::CountActiveFlows(lock.len() as u64) } \ No newline at end of file From f84798885b9cd8d8eb34ae8ea2db1d9f0d1ef98d Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Wed, 28 Feb 2024 15:03:03 -0600 Subject: [PATCH 020/103] Add a stub for listing top flows by current bitrate. --- src/rust/Cargo.lock | 14 ++++++-- src/rust/lqos_bus/src/bus/request.rs | 3 ++ src/rust/lqos_bus/src/bus/response.rs | 3 ++ .../lqos_node_manager/src/flow_monitor.rs | 12 +++++++ src/rust/lqos_node_manager/src/main.rs | 1 + src/rust/lqosd/Cargo.toml | 1 + src/rust/lqosd/src/main.rs | 1 + src/rust/lqosd/src/throughput_tracker/mod.rs | 32 +++++++++++++++++++ 8 files changed, 65 insertions(+), 2 deletions(-) diff --git a/src/rust/Cargo.lock b/src/rust/Cargo.lock index 205f2544..9a9fff9c 100644 --- a/src/rust/Cargo.lock +++ b/src/rust/Cargo.lock @@ -556,7 +556,7 @@ dependencies = [ "criterion-plot", "futures", "is-terminal", - "itertools", + "itertools 0.10.5", "num-traits", "once_cell", "oorandom", @@ -578,7 +578,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" dependencies = [ "cast", - "itertools", + "itertools 0.10.5", ] [[package]] @@ -1354,6 +1354,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.10" @@ -1676,6 +1685,7 @@ dependencies = [ "anyhow", "dashmap", "env_logger", + "itertools 0.12.1", "jemallocator", "log", "lqos_bus", diff --git a/src/rust/lqos_bus/src/bus/request.rs b/src/rust/lqos_bus/src/bus/request.rs index b477d653..52d76607 100644 --- a/src/rust/lqos_bus/src/bus/request.rs +++ b/src/rust/lqos_bus/src/bus/request.rs @@ -159,6 +159,9 @@ pub enum BusRequest { /// Count the nubmer of active flows. CountActiveFlows, + + /// Top Flows Reports + TopFlows{ n: u32 }, } /// Specific requests from the long-term stats system diff --git a/src/rust/lqos_bus/src/bus/response.rs b/src/rust/lqos_bus/src/bus/response.rs index a9c30219..80c26bc1 100644 --- a/src/rust/lqos_bus/src/bus/response.rs +++ b/src/rust/lqos_bus/src/bus/response.rs @@ -122,4 +122,7 @@ pub enum BusResponse { /// Count active flows CountActiveFlows(u64), + + /// Top Flopws + TopFlows(Vec), } diff --git a/src/rust/lqos_node_manager/src/flow_monitor.rs b/src/rust/lqos_node_manager/src/flow_monitor.rs index e4dd1c56..61ed159f 100644 --- a/src/rust/lqos_node_manager/src/flow_monitor.rs +++ b/src/rust/lqos_node_manager/src/flow_monitor.rs @@ -23,5 +23,17 @@ pub async fn count_flows() -> NoCache> { _ => 0, }; + NoCache::new(Json(result)) +} + +#[get("/api/flows/top5")] +pub async fn top_5_flows() -> NoCache>> { + let responses = + bus_request(vec![BusRequest::TopFlows { n: 5 }]).await.unwrap(); + let result = match &responses[0] { + BusResponse::TopFlows(flowbee) => flowbee.to_owned(), + _ => Vec::new(), + }; + NoCache::new(Json(result)) } \ No newline at end of file diff --git a/src/rust/lqos_node_manager/src/main.rs b/src/rust/lqos_node_manager/src/main.rs index 2b8ba55d..d9d3bb4c 100644 --- a/src/rust/lqos_node_manager/src/main.rs +++ b/src/rust/lqos_node_manager/src/main.rs @@ -113,6 +113,7 @@ fn rocket() -> _ { // Flowbee System flow_monitor::all_flows_debug_dump, flow_monitor::count_flows, + flow_monitor::top_5_flows, ], ); diff --git a/src/rust/lqosd/Cargo.toml b/src/rust/lqosd/Cargo.toml index a5115991..10899c39 100644 --- a/src/rust/lqosd/Cargo.toml +++ b/src/rust/lqosd/Cargo.toml @@ -29,6 +29,7 @@ sysinfo = "0" dashmap = "5" num-traits = "0.2" thiserror = "1" +itertools = "0.12.1" # Support JemAlloc on supported platforms [target.'cfg(any(target_arch = "x86", target_arch = "x86_64"))'.dependencies] diff --git a/src/rust/lqosd/src/main.rs b/src/rust/lqosd/src/main.rs index e4d455b1..114fad76 100644 --- a/src/rust/lqosd/src/main.rs +++ b/src/rust/lqosd/src/main.rs @@ -229,6 +229,7 @@ fn handle_bus_requests( BusRequest::CountActiveFlows => { throughput_tracker::count_active_flows() } + BusRequest::TopFlows { n } => throughput_tracker::top_flows(*n), }); } } diff --git a/src/rust/lqosd/src/throughput_tracker/mod.rs b/src/rust/lqosd/src/throughput_tracker/mod.rs index 2c83a6c1..72c3dc44 100644 --- a/src/rust/lqosd/src/throughput_tracker/mod.rs +++ b/src/rust/lqosd/src/throughput_tracker/mod.rs @@ -475,4 +475,36 @@ pub fn all_unknown_ips() -> BusResponse { pub fn count_active_flows() -> BusResponse { let lock = ALL_FLOWS.lock().unwrap(); BusResponse::CountActiveFlows(lock.len() as u64) + } + + /// Top Flows Report + pub fn top_flows(n: u32) -> BusResponse { + let lock = ALL_FLOWS.lock().unwrap(); + let mut table = lock.clone(); + table.sort_by(|a, b| { + let a_total = a.1.rate_estimate_bps[0] + a.1.rate_estimate_bps[1]; + let b_total = b.1.rate_estimate_bps[0] + b.1.rate_estimate_bps[1]; + b_total.cmp(&a_total) + }); + let result = table + .iter() + .take(n as usize) + .map(|(ip, flow)| { + lqos_bus::FlowbeeData { + remote_ip: ip.remote_ip.as_ip().to_string(), + local_ip: ip.local_ip.as_ip().to_string(), + src_port: ip.src_port, + dst_port: ip.dst_port, + ip_protocol: FlowbeeProtocol::from(ip.ip_protocol), + bytes_sent: flow.bytes_sent, + packets_sent: flow.packets_sent, + rate_estimate_bps: flow.rate_estimate_bps, + retries: flow.retries, + last_rtt: flow.last_rtt, + end_status: flow.end_status, + } + }) + .collect(); + + BusResponse::TopFlows(result) } \ No newline at end of file From 9de53853e773e30023005b9094a3927659f623d4 Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Thu, 29 Feb 2024 08:28:33 -0600 Subject: [PATCH 021/103] Improve comments on metadata --- src/rust/lqos_sys/src/bpf/lqos_kern.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/rust/lqos_sys/src/bpf/lqos_kern.c b/src/rust/lqos_sys/src/bpf/lqos_kern.c index c98fe0e9..3e6e1ca0 100644 --- a/src/rust/lqos_sys/src/bpf/lqos_kern.c +++ b/src/rust/lqos_sys/src/bpf/lqos_kern.c @@ -171,7 +171,10 @@ int xdp_prog(struct xdp_md *ctx) } __u32 cpu_dest = *cpu_lookup; - // Experimental: can we adjust the metadata? + // Can we adjust the metadata? We'll try to do so, and if we can store the + // needed info there. Not all drivers support this, so it has to remain + // optional. This call invalidates the ctx->data pointer, so it has to be + // done last. int ret = bpf_xdp_adjust_meta(ctx, -round_up(ETH_ALEN, sizeof(struct metadata_pass_t))); if (ret < 0) { #ifdef VERBOSE From 0659cda22503a433bcc3e24087b71b3768992ece Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Thu, 29 Feb 2024 08:45:41 -0600 Subject: [PATCH 022/103] Managed to reduce the amount of space required in the XDP metadata buffer to 32 bits. --- src/rust/lqos_sys/src/bpf/lqos_kern.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/rust/lqos_sys/src/bpf/lqos_kern.c b/src/rust/lqos_sys/src/bpf/lqos_kern.c index 3e6e1ca0..79307cdd 100644 --- a/src/rust/lqos_sys/src/bpf/lqos_kern.c +++ b/src/rust/lqos_sys/src/bpf/lqos_kern.c @@ -63,8 +63,7 @@ __be16 isp_vlan = 0; // Structure for passing metadata from XDP to TC struct metadata_pass_t { - __u32 tc_handle; - __u32 cpu; + __u32 tc_handle; // The encoded TC handle }; // XDP Entry Point @@ -195,7 +194,6 @@ int xdp_prog(struct xdp_md *ctx) } struct metadata_pass_t meta = (struct metadata_pass_t) { .tc_handle = tc_handle, - .cpu = cpu }; __builtin_memcpy(data_meta, &meta, sizeof(struct metadata_pass_t)); } From 8f343b7c3b4b3510006a8e071b34b70b2aa9a74d Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Thu, 29 Feb 2024 09:10:31 -0600 Subject: [PATCH 023/103] More flexible API for 'top N' calculations based on flow buffer. --- src/rust/lqos_bus/src/bus/mod.rs | 2 +- src/rust/lqos_bus/src/bus/request.rs | 22 ++++++++- src/rust/lqos_bus/src/lib.rs | 2 +- .../lqos_node_manager/src/flow_monitor.rs | 15 ++++-- src/rust/lqosd/src/main.rs | 2 +- src/rust/lqosd/src/throughput_tracker/mod.rs | 49 ++++++++++++++++--- 6 files changed, 78 insertions(+), 14 deletions(-) diff --git a/src/rust/lqos_bus/src/bus/mod.rs b/src/rust/lqos_bus/src/bus/mod.rs index 13e394d1..5196b933 100644 --- a/src/rust/lqos_bus/src/bus/mod.rs +++ b/src/rust/lqos_bus/src/bus/mod.rs @@ -10,7 +10,7 @@ pub use client::bus_request; use log::error; pub use persistent_client::BusClient; pub use reply::BusReply; -pub use request::{BusRequest, StatsRequest}; +pub use request::{BusRequest, StatsRequest, TopFlowType}; pub use response::BusResponse; pub use session::BusSession; use thiserror::Error; diff --git a/src/rust/lqos_bus/src/bus/request.rs b/src/rust/lqos_bus/src/bus/request.rs index 52d76607..97199222 100644 --- a/src/rust/lqos_bus/src/bus/request.rs +++ b/src/rust/lqos_bus/src/bus/request.rs @@ -161,7 +161,27 @@ pub enum BusRequest { CountActiveFlows, /// Top Flows Reports - TopFlows{ n: u32 }, + TopFlows{ + /// The type of top report to request + flow_type: TopFlowType, + /// The number of flows to return + n: u32 + }, +} + +/// Defines the type of "top" flow being requested +#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, Copy)] +pub enum TopFlowType { + /// Top flows by current estimated bandwidth use + RateEstimate, + /// Top flows by total bytes transferred + Bytes, + /// Top flows by total packets transferred + Packets, + /// Top flows by total drops + Drops, + /// Top flows by round-trip time estimate + RoundTripTime, } /// Specific requests from the long-term stats system diff --git a/src/rust/lqos_bus/src/lib.rs b/src/rust/lqos_bus/src/lib.rs index 0e2cac8f..3cbcf416 100644 --- a/src/rust/lqos_bus/src/lib.rs +++ b/src/rust/lqos_bus/src/lib.rs @@ -21,7 +21,7 @@ pub use bus::{ bus_request, decode_request, decode_response, encode_request, encode_response, BusClient, BusReply, BusRequest, BusResponse, BusSession, CakeDiffTinTransit, CakeDiffTransit, CakeTransit, QueueStoreTransit, - UnixSocketServer, BUS_SOCKET_PATH, StatsRequest + UnixSocketServer, BUS_SOCKET_PATH, StatsRequest, TopFlowType }; pub use tc_handle::TcHandle; diff --git a/src/rust/lqos_node_manager/src/flow_monitor.rs b/src/rust/lqos_node_manager/src/flow_monitor.rs index 61ed159f..36c3f5a7 100644 --- a/src/rust/lqos_node_manager/src/flow_monitor.rs +++ b/src/rust/lqos_node_manager/src/flow_monitor.rs @@ -26,10 +26,19 @@ pub async fn count_flows() -> NoCache> { NoCache::new(Json(result)) } -#[get("/api/flows/top5")] -pub async fn top_5_flows() -> NoCache>> { +#[get("/api/flows/top//")] +pub async fn top_5_flows(top_n: u32, flow_type: String) -> NoCache>> { + let flow_type = match flow_type.as_str() { + "rate" => lqos_bus::TopFlowType::RateEstimate, + "bytes" => lqos_bus::TopFlowType::Bytes, + "packets" => lqos_bus::TopFlowType::Packets, + "drops" => lqos_bus::TopFlowType::Drops, + "rtt" => lqos_bus::TopFlowType::RoundTripTime, + _ => lqos_bus::TopFlowType::RateEstimate, + }; + let responses = - bus_request(vec![BusRequest::TopFlows { n: 5 }]).await.unwrap(); + bus_request(vec![BusRequest::TopFlows { n: top_n, flow_type }]).await.unwrap(); let result = match &responses[0] { BusResponse::TopFlows(flowbee) => flowbee.to_owned(), _ => Vec::new(), diff --git a/src/rust/lqosd/src/main.rs b/src/rust/lqosd/src/main.rs index 114fad76..f34ce1d4 100644 --- a/src/rust/lqosd/src/main.rs +++ b/src/rust/lqosd/src/main.rs @@ -229,7 +229,7 @@ fn handle_bus_requests( BusRequest::CountActiveFlows => { throughput_tracker::count_active_flows() } - BusRequest::TopFlows { n } => throughput_tracker::top_flows(*n), + BusRequest::TopFlows { n, flow_type } => throughput_tracker::top_flows(*n, *flow_type), }); } } diff --git a/src/rust/lqosd/src/throughput_tracker/mod.rs b/src/rust/lqosd/src/throughput_tracker/mod.rs index 72c3dc44..f70e73d1 100644 --- a/src/rust/lqosd/src/throughput_tracker/mod.rs +++ b/src/rust/lqosd/src/throughput_tracker/mod.rs @@ -8,7 +8,7 @@ use crate::{ }; pub use heimdall_data::get_flow_stats; use log::{info, warn}; -use lqos_bus::{BusResponse, FlowbeeProtocol, IpStats, TcHandle, XdpPpingResult}; +use lqos_bus::{BusResponse, FlowbeeProtocol, IpStats, TcHandle, TopFlowType, XdpPpingResult}; use lqos_utils::{unix_time::time_since_boot, XdpIpAddress}; use lts_client::collector::{StatsUpdateMessage, ThroughputSummary, HostSummary}; use once_cell::sync::Lazy; @@ -478,14 +478,49 @@ pub fn all_unknown_ips() -> BusResponse { } /// Top Flows Report - pub fn top_flows(n: u32) -> BusResponse { + pub fn top_flows(n: u32, flow_type: TopFlowType) -> BusResponse { let lock = ALL_FLOWS.lock().unwrap(); let mut table = lock.clone(); - table.sort_by(|a, b| { - let a_total = a.1.rate_estimate_bps[0] + a.1.rate_estimate_bps[1]; - let b_total = b.1.rate_estimate_bps[0] + b.1.rate_estimate_bps[1]; - b_total.cmp(&a_total) - }); + + + match flow_type { + TopFlowType::RateEstimate => { + table.sort_by(|a, b| { + let a_total = a.1.rate_estimate_bps[0] + a.1.rate_estimate_bps[1]; + let b_total = b.1.rate_estimate_bps[0] + b.1.rate_estimate_bps[1]; + b_total.cmp(&a_total) + }); + } + TopFlowType::Bytes => { + table.sort_by(|a, b| { + let a_total = a.1.bytes_sent[0] + a.1.bytes_sent[1]; + let b_total = b.1.bytes_sent[0] + b.1.bytes_sent[1]; + b_total.cmp(&a_total) + }); + } + TopFlowType::Packets => { + table.sort_by(|a, b| { + let a_total = a.1.packets_sent[0] + a.1.packets_sent[1]; + let b_total = b.1.packets_sent[0] + b.1.packets_sent[1]; + b_total.cmp(&a_total) + }); + } + TopFlowType::Drops => { + table.sort_by(|a, b| { + let a_total = a.1.retries[0] + a.1.retries[1]; + let b_total = b.1.retries[0] + b.1.retries[1]; + b_total.cmp(&a_total) + }); + } + TopFlowType::RoundTripTime => { + table.sort_by(|a, b| { + let a_total = a.1.last_rtt[0] + a.1.last_rtt[1]; + let b_total = b.1.last_rtt[0] + b.1.last_rtt[1]; + b_total.cmp(&a_total) + }); + } + } + let result = table .iter() .take(n as usize) From 7d6cbd417a5f8d9684c63ef643a8e6b7133b7532 Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Thu, 29 Feb 2024 10:17:28 -0600 Subject: [PATCH 024/103] * Remove most code duplication by tracking rate_index and its reciprocal. * Significantly nicer comments. * I used a ternary operator. Please shoot me. Thanks to Simon Sundberg for this recommendation. --- src/rust/lqos_sys/src/bpf/common/flows.h | 142 +++++++++++------------ 1 file changed, 70 insertions(+), 72 deletions(-) diff --git a/src/rust/lqos_sys/src/bpf/common/flows.h b/src/rust/lqos_sys/src/bpf/common/flows.h index bbca5ebd..3e0b0b8c 100644 --- a/src/rust/lqos_sys/src/bpf/common/flows.h +++ b/src/rust/lqos_sys/src/bpf/common/flows.h @@ -77,8 +77,11 @@ struct __uint(pinning, LIBBPF_PIN_BY_NAME); } flowbee SEC(".maps"); +// Construct an empty flow_data_t structure, using default values. static __always_inline struct flow_data_t new_flow_data( + // The current time in nanoseconds, from bpf_ktime_get_boot_ns __u64 now, + // The packet dissector from the previous step struct dissector_t *dissector ) { struct flow_data_t data = { @@ -101,70 +104,55 @@ static __always_inline struct flow_data_t new_flow_data( return data; } +// Construct a flow_key_t structure from a dissector_t. This represents the +// unique key for a flow in the flowbee map. static __always_inline struct flow_key_t build_flow_key( struct dissector_t *dissector, // The packet dissector from the previous step u_int8_t direction // The direction of the packet (1 = to internet, 2 = to local network) ) { - if (direction == FROM_INTERNET) { - return (struct flow_key_t) { - .src = dissector->src_ip, - .dst = dissector->dst_ip, - .src_port = bpf_htons(dissector->dst_port), - .dst_port = bpf_htons(dissector->src_port), - .protocol = dissector->ip_protocol, - .pad = 0, - .pad1 = 0, - .pad2 = 0 - }; - } else { - return (struct flow_key_t) { - .src = dissector->dst_ip, - .dst = dissector->src_ip, - .src_port = bpf_htons(dissector->src_port), - .dst_port = bpf_htons(dissector->dst_port), - .protocol = dissector->ip_protocol, - .pad = 0, - .pad1 = 0, - .pad2 = 0 - }; - } + __u16 src_port = direction == FROM_INTERNET ? bpf_htons(dissector->src_port) : bpf_htons(dissector->dst_port); + __u16 dst_port = direction == FROM_INTERNET ? bpf_htons(dissector->dst_port) : bpf_htons(dissector->src_port); + + return (struct flow_key_t) { + .src = dissector->src_ip, + .dst = dissector->dst_ip, + .src_port = src_port, + .dst_port = dst_port, + .protocol = dissector->ip_protocol, + .pad = 0, + .pad1 = 0, + .pad2 = 0 + }; } +// Update the flow data with the current packet's information. +// * Update the timestamp of the last seen packet +// * Update the bytes and packets sent +// * Update the rate estimate (if it is time to do so) static __always_inline void update_flow_rates( + // The packet dissector from the previous step struct dissector_t *dissector, - u_int8_t direction, + // The rate index (0 = to internet, 1 = to local network) + u_int8_t rate_index, + // The flow data structure to update struct flow_data_t *data, + // The current time in nanoseconds, from bpf_ktime_get_boot_ns __u64 now ) { data->last_seen = now; // Update bytes and packets sent - if (direction == TO_INTERNET) { - data->bytes_sent[0] += dissector->skb_len; - data->packets_sent[0]++; + data->bytes_sent[rate_index] += dissector->skb_len; + data->packets_sent[rate_index]++; - if (now > data->next_count_time[0]) { - // Calculate the rate estimate - __u64 bits = (data->bytes_sent[0] - data->next_count_bytes[0])*8; - __u64 time = (now - data->last_count_time[0]) / 1000000000; // Seconds - data->rate_estimate_bps[0] = bits/time; - data->next_count_time[0] = now + SECOND_IN_NANOS; - data->next_count_bytes[0] = data->bytes_sent[0]; - data->last_count_time[0] = now; - } - } else { - data->bytes_sent[1] += dissector->skb_len; - data->packets_sent[1]++; - - if (now > data->next_count_time[1]) { - // Calculate the rate estimate - __u64 bits = (data->bytes_sent[1] - data->next_count_bytes[1])*8; - __u64 time = (now - data->last_count_time[1]) / 1000000000; // Seconds - data->rate_estimate_bps[1] = bits/time; - data->next_count_time[1] = now + SECOND_IN_NANOS; - data->next_count_bytes[1] = data->bytes_sent[1]; - data->last_count_time[1] = now; - } + if (now > data->next_count_time[rate_index]) { + // Calculate the rate estimate + __u64 bits = (data->bytes_sent[rate_index] - data->next_count_bytes[rate_index])*8; + __u64 time = (now - data->last_count_time[rate_index]) / 1000000000; // Seconds + data->rate_estimate_bps[rate_index] = bits/time; + data->next_count_time[rate_index] = now + SECOND_IN_NANOS; + data->next_count_bytes[rate_index] = data->bytes_sent[0]; + data->last_count_time[rate_index] = now; } } @@ -172,6 +160,8 @@ static __always_inline void update_flow_rates( static __always_inline void process_icmp( struct dissector_t *dissector, u_int8_t direction, + u_int8_t rate_index, + u_int8_t other_rate_index, u_int64_t now ) { struct flow_key_t key = build_flow_key(dissector, direction); @@ -186,13 +176,15 @@ static __always_inline void process_icmp( data = bpf_map_lookup_elem(&flowbee, &key); if (data == NULL) return; } - update_flow_rates(dissector, direction, data, now); + update_flow_rates(dissector, rate_index, data, now); } // Handle Per-Flow UDP Analysis static __always_inline void process_udp( struct dissector_t *dissector, u_int8_t direction, + u_int8_t rate_index, + u_int8_t other_rate_index, u_int64_t now ) { struct flow_key_t key = build_flow_key(dissector, direction); @@ -207,15 +199,19 @@ static __always_inline void process_udp( data = bpf_map_lookup_elem(&flowbee, &key); if (data == NULL) return; } - update_flow_rates(dissector, direction, data, now); + update_flow_rates(dissector, rate_index, data, now); } // Handle Per-Flow TCP Analysis static __always_inline void process_tcp( struct dissector_t *dissector, u_int8_t direction, + u_int8_t rate_index, + u_int8_t other_rate_index, u_int64_t now ) { + // SYN packet indicating the start of a conversation. We are explicitly ignoring + // SYN-ACK packets, we just want to catch the opening of a new connection. if ((BITCHECK(DIS_TCP_SYN) && !BITCHECK(DIS_TCP_ACK) && direction == TO_INTERNET) || (BITCHECK(DIS_TCP_SYN) && !BITCHECK(DIS_TCP_ACK) && direction == FROM_INTERNET)) { // A customer is requesting a new TCP connection. That means @@ -231,7 +227,7 @@ static __always_inline void process_tcp( return; } - // Build the flow key + // Build the flow key to uniquely identify this flow struct flow_key_t key = build_flow_key(dissector, direction); struct flow_data_t *data = bpf_map_lookup_elem(&flowbee, &key); if (data == NULL) { @@ -239,29 +235,21 @@ static __always_inline void process_tcp( return; } - update_flow_rates(dissector, direction, data, now); + // Update the flow data with the current packet's information + update_flow_rates(dissector, rate_index, data, now); // Sequence and Acknowledgement numbers __u32 sequence = bpf_ntohl(dissector->sequence); __u32 ack_seq = bpf_ntohl(dissector->ack_seq); - if (direction == TO_INTERNET) { - if (data->last_sequence[0] != 0 && sequence < data->last_sequence[0]) { - // This is a retransmission - data->retries[0]++; - } - - data->last_sequence[0] = sequence; - data->last_ack[0] = ack_seq; - } else { - if (data->last_sequence[1] != 0 && sequence < data->last_sequence[1]) { - // This is a retransmission - data->retries[1]++; - } - - data->last_sequence[1] = sequence; - data->last_ack[1] = ack_seq; + if (data->last_sequence[rate_index] != 0 && sequence < data->last_sequence[rate_index]) { + // This is a retransmission + data->retries[rate_index]++; } + // Store the sequence and ack numbers for the next packet + data->last_sequence[rate_index] = sequence; + data->last_ack[rate_index] = ack_seq; + // Timestamps to calculate RTT u_int32_t tsval = dissector->tsval; u_int32_t tsecr = dissector->tsecr; @@ -312,12 +300,22 @@ static __always_inline void track_flows( ) { __u64 now = bpf_ktime_get_boot_ns(); + u_int8_t rate_index; + u_int8_t other_rate_index; + if (direction == TO_INTERNET) { + rate_index = 0; + other_rate_index = 1; + } else { + rate_index = 1; + other_rate_index = 0; + } + // Pass to the appropriate protocol handler switch (dissector->ip_protocol) { - case IPPROTO_TCP: process_tcp(dissector, direction, now); break; - case IPPROTO_UDP: process_udp(dissector, direction, now); break; - case IPPROTO_ICMP: process_icmp(dissector, direction, now); break; + case IPPROTO_TCP: process_tcp(dissector, direction, rate_index, other_rate_index, now); break; + case IPPROTO_UDP: process_udp(dissector, direction, rate_index, other_rate_index, now); break; + case IPPROTO_ICMP: process_icmp(dissector, direction, rate_index, other_rate_index, now); break; default: { #ifdef VERBOSE bpf_debug("[FLOWS] Unsupported protocol: %d", dissector->ip_protocol); From 23f2cfb9d82860e6545312685e108328a104654e Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Thu, 29 Feb 2024 11:05:37 -0600 Subject: [PATCH 025/103] Reduce the flow rate estimation frequency from 1 second to 1 ms. This reduces round-error issues and is generally more accurate - at the expense of slightly higher CPU usage and a "more bouncy" rate - that is, for a widely variable flow such as Netflix it jumps around a lot more. I remain unconvinced that 1ms is the right number - will continue to test. Thanks again to Simon Sundberg for this suggestion. --- src/rust/lqos_sys/src/bpf/common/flows.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/rust/lqos_sys/src/bpf/common/flows.h b/src/rust/lqos_sys/src/bpf/common/flows.h index 3e0b0b8c..a3a53383 100644 --- a/src/rust/lqos_sys/src/bpf/common/flows.h +++ b/src/rust/lqos_sys/src/bpf/common/flows.h @@ -9,6 +9,7 @@ #define SECOND_IN_NANOS 1000000000 +#define MS_IN_NANOS 1000000 //#define TIMESTAMP_INTERVAL_NANOS 10000000 // Some helpers to make understanding direction easier @@ -88,7 +89,9 @@ static __always_inline struct flow_data_t new_flow_data( .start_time = now, .bytes_sent = { 0, 0 }, .packets_sent = { 0, 0 }, - .next_count_time = { now + SECOND_IN_NANOS, now + SECOND_IN_NANOS }, + // Track flow rates at an MS scale rather than per-second + // to minimize rounding errors. + .next_count_time = { now + MS_IN_NANOS, now + MS_IN_NANOS }, .last_count_time = { now, now }, .next_count_bytes = { dissector->skb_len, dissector->skb_len }, .rate_estimate_bps = { 0, 0 }, @@ -148,10 +151,10 @@ static __always_inline void update_flow_rates( if (now > data->next_count_time[rate_index]) { // Calculate the rate estimate __u64 bits = (data->bytes_sent[rate_index] - data->next_count_bytes[rate_index])*8; - __u64 time = (now - data->last_count_time[rate_index]) / 1000000000; // Seconds - data->rate_estimate_bps[rate_index] = bits/time; - data->next_count_time[rate_index] = now + SECOND_IN_NANOS; - data->next_count_bytes[rate_index] = data->bytes_sent[0]; + __u64 time = (now - data->last_count_time[rate_index]) / 1000000; // Milliseconds + data->rate_estimate_bps[rate_index] = (bits/time) * 1000; // bits per second + data->next_count_time[rate_index] = now + MS_IN_NANOS; + data->next_count_bytes[rate_index] = data->bytes_sent[rate_index]; data->last_count_time[rate_index] = now; } } From 370b65fa67c3da6511a62bf043912eb5096d3901 Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Thu, 29 Feb 2024 11:19:35 -0600 Subject: [PATCH 026/103] Compromise on 10ms sampling time for rate estimation. Giving pretty accurate results on my tests, and not hitting the CPU too hard. --- src/rust/lqos_sys/src/bpf/common/flows.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/rust/lqos_sys/src/bpf/common/flows.h b/src/rust/lqos_sys/src/bpf/common/flows.h index a3a53383..c637691f 100644 --- a/src/rust/lqos_sys/src/bpf/common/flows.h +++ b/src/rust/lqos_sys/src/bpf/common/flows.h @@ -9,7 +9,7 @@ #define SECOND_IN_NANOS 1000000000 -#define MS_IN_NANOS 1000000 +#define MS_IN_NANOS_T10 10000000 //#define TIMESTAMP_INTERVAL_NANOS 10000000 // Some helpers to make understanding direction easier @@ -91,7 +91,7 @@ static __always_inline struct flow_data_t new_flow_data( .packets_sent = { 0, 0 }, // Track flow rates at an MS scale rather than per-second // to minimize rounding errors. - .next_count_time = { now + MS_IN_NANOS, now + MS_IN_NANOS }, + .next_count_time = { now + MS_IN_NANOS_T10, now + MS_IN_NANOS_T10 }, .last_count_time = { now, now }, .next_count_bytes = { dissector->skb_len, dissector->skb_len }, .rate_estimate_bps = { 0, 0 }, @@ -151,11 +151,12 @@ static __always_inline void update_flow_rates( if (now > data->next_count_time[rate_index]) { // Calculate the rate estimate __u64 bits = (data->bytes_sent[rate_index] - data->next_count_bytes[rate_index])*8; - __u64 time = (now - data->last_count_time[rate_index]) / 1000000; // Milliseconds - data->rate_estimate_bps[rate_index] = (bits/time) * 1000; // bits per second - data->next_count_time[rate_index] = now + MS_IN_NANOS; + __u64 time = (now - data->last_count_time[rate_index]) / 100000; // 10 Milliseconds + data->rate_estimate_bps[rate_index] = (bits/time); // bits per second + data->next_count_time[rate_index] = now + MS_IN_NANOS_T10; data->next_count_bytes[rate_index] = data->bytes_sent[rate_index]; data->last_count_time[rate_index] = now; + bpf_debug("[FLOWS] Rate Estimate: %llu", data->rate_estimate_bps[rate_index]); } } From 2be8ce4aa9a80e9db580ee813240380af760fdf1 Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Thu, 29 Feb 2024 11:35:25 -0600 Subject: [PATCH 027/103] Add wraparound logic to handle ACK sequence wraparound. --- src/rust/lqos_sys/src/bpf/common/flows.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/rust/lqos_sys/src/bpf/common/flows.h b/src/rust/lqos_sys/src/bpf/common/flows.h index c637691f..3d1d194a 100644 --- a/src/rust/lqos_sys/src/bpf/common/flows.h +++ b/src/rust/lqos_sys/src/bpf/common/flows.h @@ -245,7 +245,14 @@ static __always_inline void process_tcp( // Sequence and Acknowledgement numbers __u32 sequence = bpf_ntohl(dissector->sequence); __u32 ack_seq = bpf_ntohl(dissector->ack_seq); - if (data->last_sequence[rate_index] != 0 && sequence < data->last_sequence[rate_index]) { + if ( + data->last_sequence[rate_index] != 0 && // We have a previous sequence number + sequence < data->last_sequence[rate_index] && // This is a retransmission + ( + data->last_sequence[rate_index] > 0x10000 && // Wrap around possible + sequence > data->last_sequence[rate_index] - 0x10000 // Wrap around didn't occur + ) + ) { // This is a retransmission data->retries[rate_index]++; } From 28ebecc44d1b336eebfeb385d2598018135e97ea Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Thu, 29 Feb 2024 11:53:10 -0600 Subject: [PATCH 028/103] Forgot to comment out a debug line. --- src/rust/lqos_sys/src/bpf/common/flows.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rust/lqos_sys/src/bpf/common/flows.h b/src/rust/lqos_sys/src/bpf/common/flows.h index 3d1d194a..187c6961 100644 --- a/src/rust/lqos_sys/src/bpf/common/flows.h +++ b/src/rust/lqos_sys/src/bpf/common/flows.h @@ -156,7 +156,7 @@ static __always_inline void update_flow_rates( data->next_count_time[rate_index] = now + MS_IN_NANOS_T10; data->next_count_bytes[rate_index] = data->bytes_sent[rate_index]; data->last_count_time[rate_index] = now; - bpf_debug("[FLOWS] Rate Estimate: %llu", data->rate_estimate_bps[rate_index]); + //bpf_debug("[FLOWS] Rate Estimate: %llu", data->rate_estimate_bps[rate_index]); } } From 0173798981edf2146f22b9757f23a30b5fed389f Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Thu, 29 Feb 2024 12:16:45 -0600 Subject: [PATCH 029/103] WIP - Fix my botch at deduping the flow key code. --- src/rust/lqos_sys/src/bpf/common/flows.h | 52 +++++++++++++++--------- 1 file changed, 33 insertions(+), 19 deletions(-) diff --git a/src/rust/lqos_sys/src/bpf/common/flows.h b/src/rust/lqos_sys/src/bpf/common/flows.h index 187c6961..57e4979e 100644 --- a/src/rust/lqos_sys/src/bpf/common/flows.h +++ b/src/rust/lqos_sys/src/bpf/common/flows.h @@ -115,10 +115,12 @@ static __always_inline struct flow_key_t build_flow_key( ) { __u16 src_port = direction == FROM_INTERNET ? bpf_htons(dissector->src_port) : bpf_htons(dissector->dst_port); __u16 dst_port = direction == FROM_INTERNET ? bpf_htons(dissector->dst_port) : bpf_htons(dissector->src_port); + struct in6_addr src = direction == FROM_INTERNET ? dissector->src_ip : dissector->dst_ip; + struct in6_addr dst = direction == FROM_INTERNET ? dissector->dst_ip : dissector->src_ip; return (struct flow_key_t) { - .src = dissector->src_ip, - .dst = dissector->dst_ip, + .src = src, + .dst = dst, .src_port = src_port, .dst_port = dst_port, .protocol = dissector->ip_protocol, @@ -206,6 +208,33 @@ static __always_inline void process_udp( update_flow_rates(dissector, rate_index, data, now); } +// Store the most recent sequence and ack numbers, and detect retransmissions. +// This will also trigger on duplicate packets, and out-of-order - but those +// are both an indication that you have issues anyway. So that's ok by me! +static __always_inline void detect_retries( + struct dissector_t *dissector, + u_int8_t rate_index, + struct flow_data_t *data +) { + __u32 sequence = bpf_ntohl(dissector->sequence); + __u32 ack_seq = bpf_ntohl(dissector->ack_seq); + if ( + data->last_sequence[rate_index] != 0 && // We have a previous sequence number + sequence < data->last_sequence[rate_index] && // This is a retransmission + ( + data->last_sequence[rate_index] > 0x10000 && // Wrap around possible + sequence > data->last_sequence[rate_index] - 0x10000 // Wrap around didn't occur + ) + ) { + // This is a retransmission + data->retries[rate_index]++; + } + + // Store the sequence and ack numbers for the next packet + data->last_sequence[rate_index] = sequence; + data->last_ack[rate_index] = ack_seq; +} + // Handle Per-Flow TCP Analysis static __always_inline void process_tcp( struct dissector_t *dissector, @@ -236,6 +265,7 @@ static __always_inline void process_tcp( struct flow_data_t *data = bpf_map_lookup_elem(&flowbee, &key); if (data == NULL) { // If it isn't a flow we're tracking, bail out now + bpf_debug("Bailing"); return; } @@ -243,23 +273,7 @@ static __always_inline void process_tcp( update_flow_rates(dissector, rate_index, data, now); // Sequence and Acknowledgement numbers - __u32 sequence = bpf_ntohl(dissector->sequence); - __u32 ack_seq = bpf_ntohl(dissector->ack_seq); - if ( - data->last_sequence[rate_index] != 0 && // We have a previous sequence number - sequence < data->last_sequence[rate_index] && // This is a retransmission - ( - data->last_sequence[rate_index] > 0x10000 && // Wrap around possible - sequence > data->last_sequence[rate_index] - 0x10000 // Wrap around didn't occur - ) - ) { - // This is a retransmission - data->retries[rate_index]++; - } - - // Store the sequence and ack numbers for the next packet - data->last_sequence[rate_index] = sequence; - data->last_ack[rate_index] = ack_seq; + detect_retries(dissector, rate_index, data); // Timestamps to calculate RTT u_int32_t tsval = dissector->tsval; From ff0b3973952eff990cfbda194fc6474250f72088 Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Thu, 29 Feb 2024 12:26:31 -0600 Subject: [PATCH 030/103] Dedupe the RTT estimation code. --- src/rust/lqos_sys/src/bpf/common/flows.h | 31 ++++++------------------ 1 file changed, 8 insertions(+), 23 deletions(-) diff --git a/src/rust/lqos_sys/src/bpf/common/flows.h b/src/rust/lqos_sys/src/bpf/common/flows.h index 57e4979e..cf6fd502 100644 --- a/src/rust/lqos_sys/src/bpf/common/flows.h +++ b/src/rust/lqos_sys/src/bpf/common/flows.h @@ -280,32 +280,17 @@ static __always_inline void process_tcp( u_int32_t tsecr = dissector->tsecr; if (tsval != 0) { //bpf_debug("[FLOWS][%d] TSVAL: %u, TSECR: %u", direction, tsval, tsecr); - if (direction == TO_INTERNET) { - if (tsval != data->tsval[0] && tsecr != data->tsecr[0]) { + if (tsval != data->tsval[rate_index] && tsecr != data->tsecr[rate_index]) { - if (tsecr == data->tsval[1]) { - __u64 elapsed = now - data->ts_change_time[1]; - data->last_rtt[0] = elapsed; - //bpf_debug("[FLOWS][%d] RTT: %llu", direction, elapsed); - } - - data->ts_change_time[0] = now; - data->tsval[0] = tsval; - data->tsecr[0] = tsecr; + if (tsecr == data->tsval[other_rate_index]) { + __u64 elapsed = now - data->ts_change_time[other_rate_index]; + data->last_rtt[rate_index] = elapsed; + //bpf_debug("[FLOWS][%d] RTT: %llu", direction, elapsed); } - } else { - if (tsval != data->tsecr[1] && tsecr != data->tsval[1]) { - if (tsval == data->tsecr[0]) { - __u64 elapsed = now - data->ts_change_time[0]; - data->last_rtt[1] = elapsed; - //bpf_debug("[FLOWS][%d] RTT: %llu", direction, elapsed); - } - - data->ts_change_time[1] = now; - data->tsval[1] = tsval; - data->tsecr[1] = tsecr; - } + data->ts_change_time[rate_index] = now; + data->tsval[rate_index] = tsval; + data->tsecr[rate_index] = tsecr; } } From 28e861aeee990bc63d4ec754f5fe65141cd13325 Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Tue, 5 Mar 2024 08:44:57 -0600 Subject: [PATCH 031/103] First attempt at adding NetFlow v5 support. --- src/rust/lqos_bus/src/ip_stats.rs | 6 +- src/rust/lqos_config/src/etc/mod.rs | 1 - src/rust/lqos_config/src/etc/v15/flows.rs | 26 +++ src/rust/lqos_config/src/etc/v15/mod.rs | 1 + .../lqos_config/src/etc/v15/top_config.rs | 4 + src/rust/lqos_sys/src/bifrost_maps.rs | 4 +- src/rust/lqos_sys/src/bpf/common/flows.h | 15 +- src/rust/lqos_sys/src/bpf_iterator.rs | 21 +- src/rust/lqos_sys/src/flowbee_data.rs | 8 +- src/rust/lqos_sys/src/lib.rs | 2 +- src/rust/lqos_utils/src/xdp_ip_address.rs | 3 +- src/rust/lqosd/src/main.rs | 5 +- .../lqosd/src/throughput_tracker/flow_data.rs | 198 +++++++++++++++++- src/rust/lqosd/src/throughput_tracker/mod.rs | 48 ++++- .../src/throughput_tracker/tracking_data.rs | 42 +++- 15 files changed, 361 insertions(+), 23 deletions(-) create mode 100644 src/rust/lqos_config/src/etc/v15/flows.rs diff --git a/src/rust/lqos_bus/src/ip_stats.rs b/src/rust/lqos_bus/src/ip_stats.rs index b9a08cfa..0f2a89c2 100644 --- a/src/rust/lqos_bus/src/ip_stats.rs +++ b/src/rust/lqos_bus/src/ip_stats.rs @@ -191,5 +191,9 @@ pub struct FlowbeeData { pub last_rtt: [u64; 2], /// Has the connection ended? /// 0 = Alive, 1 = FIN, 2 = RST - pub end_status: u32, + pub end_status: u8, + /// Raw IP TOS + pub tos: u8, + /// Raw TCP flags + pub flags: u8, } \ No newline at end of file diff --git a/src/rust/lqos_config/src/etc/mod.rs b/src/rust/lqos_config/src/etc/mod.rs index 58a1df95..74af9730 100644 --- a/src/rust/lqos_config/src/etc/mod.rs +++ b/src/rust/lqos_config/src/etc/mod.rs @@ -45,7 +45,6 @@ pub fn load_config() -> Result { *lock = Some(config_result.unwrap()); } - log::info!("Returning cached config"); Ok(lock.as_ref().unwrap().clone()) } diff --git a/src/rust/lqos_config/src/etc/v15/flows.rs b/src/rust/lqos_config/src/etc/v15/flows.rs new file mode 100644 index 00000000..fe3a00f3 --- /dev/null +++ b/src/rust/lqos_config/src/etc/v15/flows.rs @@ -0,0 +1,26 @@ +//! Provides netflow support for tracking network flows. +//! +//! You can enable them by adding a `[flows]` section to your configuration file. + +use serde::{Serialize, Deserialize}; + +#[derive(Clone, Serialize, Deserialize, Debug)] +pub struct FlowConfig { + pub flow_timeout_seconds: u64, + pub netflow_enabled: bool, + pub netflow_port: Option, + pub netflow_ip: Option, + pub netflow_version: Option, +} + +impl Default for FlowConfig { + fn default() -> Self { + Self { + flow_timeout_seconds: 30, + netflow_enabled: false, + netflow_port: None, + netflow_ip: None, + netflow_version: None, + } + } +} diff --git a/src/rust/lqos_config/src/etc/v15/mod.rs b/src/rust/lqos_config/src/etc/v15/mod.rs index 44567b47..a84906e1 100644 --- a/src/rust/lqos_config/src/etc/v15/mod.rs +++ b/src/rust/lqos_config/src/etc/v15/mod.rs @@ -14,6 +14,7 @@ mod uisp_integration; mod powercode_integration; mod sonar_integration; mod influxdb; +mod flows; pub use bridge::*; pub use long_term_stats::LongTermStats; pub use tuning::Tunables; \ No newline at end of file diff --git a/src/rust/lqos_config/src/etc/v15/top_config.rs b/src/rust/lqos_config/src/etc/v15/top_config.rs index 5d7175d1..f96f3d56 100644 --- a/src/rust/lqos_config/src/etc/v15/top_config.rs +++ b/src/rust/lqos_config/src/etc/v15/top_config.rs @@ -51,6 +51,9 @@ pub struct Config { /// IP Range definitions pub ip_ranges: super::ip_ranges::IpRanges, + /// Network flows configuration + pub flows: Option, + /// Integration Common Variables pub integration_common: super::integration_common::IntegrationConfig, @@ -133,6 +136,7 @@ impl Default for Config { influxdb: super::influxdb::InfluxDbConfig::default(), packet_capture_time: 10, queue_check_period_ms: 1000, + flows: None, } } } diff --git a/src/rust/lqos_sys/src/bifrost_maps.rs b/src/rust/lqos_sys/src/bifrost_maps.rs index 68de6ccb..5a296907 100644 --- a/src/rust/lqos_sys/src/bifrost_maps.rs +++ b/src/rust/lqos_sys/src/bifrost_maps.rs @@ -84,7 +84,7 @@ pub(crate) fn map_single_interface_mode( // VLANs - Internet let mut key: u32 = (interface_name_to_index(&interface)? << 16) | internet_vlan; - let mut val = BifrostVlan { redirect_to: mapping.redirect_to }; + let mut val = BifrostVlan { redirect_to: lan_vlan }; vlan_map.insert(&mut key, &mut val)?; info!( "Mapped bifrost VLAN: {}:{} => {}", @@ -94,7 +94,7 @@ pub(crate) fn map_single_interface_mode( // VLANs - LAN let mut key: u32 = (interface_name_to_index(&interface)? << 16) | lan_vlan; - let mut val = BifrostVlan { redirect_to: mapping.redirect_to }; + let mut val = BifrostVlan { redirect_to: internet_vlan }; vlan_map.insert(&mut key, &mut val)?; info!( "Mapped bifrost VLAN: {}:{} => {}", diff --git a/src/rust/lqos_sys/src/bpf/common/flows.h b/src/rust/lqos_sys/src/bpf/common/flows.h index cf6fd502..63c76478 100644 --- a/src/rust/lqos_sys/src/bpf/common/flows.h +++ b/src/rust/lqos_sys/src/bpf/common/flows.h @@ -64,7 +64,13 @@ struct flow_data_t { __u64 last_rtt[2]; // Has the connection ended? // 0 = Alive, 1 = FIN, 2 = RST - __u32 end_status; + __u8 end_status; + // TOS + __u8 tos; + // IP Flags + __u8 ip_flags; + // Padding + __u8 pad; }; // Map for tracking TCP flow progress. @@ -102,7 +108,9 @@ static __always_inline struct flow_data_t new_flow_data( .tsecr = { 0, 0 }, .ts_change_time = { 0, 0 }, .last_rtt = { 0, 0 }, - .end_status = 0 + .end_status = 0, + .tos = 0, + .ip_flags = 0, }; return data; } @@ -145,6 +153,7 @@ static __always_inline void update_flow_rates( __u64 now ) { data->last_seen = now; + data->end_status = 0; // Reset the end status // Update bytes and packets sent data->bytes_sent[rate_index] += dissector->skb_len; @@ -254,6 +263,8 @@ static __always_inline void process_tcp( #endif struct flow_key_t key = build_flow_key(dissector, direction); struct flow_data_t data = new_flow_data(now, dissector); + data.tos = dissector->tos; + data.ip_flags = 0; // Obtain these if (bpf_map_update_elem(&flowbee, &key, &data, BPF_ANY) != 0) { bpf_debug("[FLOWS] Failed to add new flow to map"); } diff --git a/src/rust/lqos_sys/src/bpf_iterator.rs b/src/rust/lqos_sys/src/bpf_iterator.rs index 828c2436..31cd4036 100644 --- a/src/rust/lqos_sys/src/bpf_iterator.rs +++ b/src/rust/lqos_sys/src/bpf_iterator.rs @@ -1,6 +1,5 @@ use crate::{ - flowbee_data::{FlowbeeData, FlowbeeKey}, heimdall_data::{HeimdallData, HeimdallKey}, - kernel_wrapper::BPF_SKELETON, lqos_kernel::bpf, HostCounter + bpf_map::BpfMap, flowbee_data::{FlowbeeData, FlowbeeKey}, heimdall_data::{HeimdallData, HeimdallKey}, kernel_wrapper::BPF_SKELETON, lqos_kernel::bpf, HostCounter }; use lqos_utils::XdpIpAddress; use once_cell::sync::Lazy; @@ -274,4 +273,22 @@ pub fn iterate_flows( let _ = iter.for_each(callback); } } +} + +/// Adjust flows to have status 2 - already processed +/// +// Arguments: the list of flow keys to expire +pub fn end_flows(flows: &mut [FlowbeeKey]) -> anyhow::Result<()> { + let mut map = BpfMap::::from_path("/sys/fs/bpf/flowbee")?; + + let mut dead_flow = FlowbeeData { + end_status: 2, + ..Default::default() + }; + + for flow in flows { + map.insert_or_update(flow, &mut dead_flow)?; + } + + Ok(()) } \ No newline at end of file diff --git a/src/rust/lqos_sys/src/flowbee_data.rs b/src/rust/lqos_sys/src/flowbee_data.rs index 9feb13b6..78461977 100644 --- a/src/rust/lqos_sys/src/flowbee_data.rs +++ b/src/rust/lqos_sys/src/flowbee_data.rs @@ -59,5 +59,11 @@ pub struct FlowbeeData { pub last_rtt: [u64; 2], /// Has the connection ended? /// 0 = Alive, 1 = FIN, 2 = RST - pub end_status: u32, + pub end_status: u8, + /// Raw IP TOS + pub tos: u8, + /// Raw TCP flags + pub flags: u8, + /// Padding. + pub padding: u8, } \ No newline at end of file diff --git a/src/rust/lqos_sys/src/lib.rs b/src/rust/lqos_sys/src/lib.rs index 92d6d71a..482b925e 100644 --- a/src/rust/lqos_sys/src/lib.rs +++ b/src/rust/lqos_sys/src/lib.rs @@ -30,4 +30,4 @@ pub use kernel_wrapper::LibreQoSKernels; pub use linux::num_possible_cpus; pub use lqos_kernel::max_tracked_ips; pub use throughput::{throughput_for_each, HostCounter}; -pub use bpf_iterator::{iterate_heimdall, iterate_flows}; \ No newline at end of file +pub use bpf_iterator::{iterate_heimdall, iterate_flows, end_flows}; \ No newline at end of file diff --git a/src/rust/lqos_utils/src/xdp_ip_address.rs b/src/rust/lqos_utils/src/xdp_ip_address.rs index 8580f783..40d61628 100644 --- a/src/rust/lqos_utils/src/xdp_ip_address.rs +++ b/src/rust/lqos_utils/src/xdp_ip_address.rs @@ -42,7 +42,8 @@ impl XdpIpAddress { result } - fn is_v4(&self) -> bool { + /// Checks if the `XdpIpAddress` is an IPv4 address + pub fn is_v4(&self) -> bool { self.0[0] == 0xFF && self.0[1] == 0xFF && self.0[2] == 0xFF diff --git a/src/rust/lqosd/src/main.rs b/src/rust/lqosd/src/main.rs index f34ce1d4..200b7bbf 100644 --- a/src/rust/lqosd/src/main.rs +++ b/src/rust/lqosd/src/main.rs @@ -12,7 +12,7 @@ mod long_term_stats; use std::net::IpAddr; use crate::{ file_lock::FileLock, - ip_mapping::{clear_ip_flows, del_ip_flow, list_mapped_ips, map_ip_to_flow}, + ip_mapping::{clear_ip_flows, del_ip_flow, list_mapped_ips, map_ip_to_flow}, throughput_tracker::flow_data::setup_netflow_tracker, }; use anyhow::Result; use log::{info, warn}; @@ -73,13 +73,14 @@ async fn main() -> Result<()> { // Spawn tracking sub-systems let long_term_stats_tx = start_long_term_stats().await; + let flow_tx = setup_netflow_tracker(); join!( start_heimdall(), spawn_queue_structure_monitor(), shaped_devices_tracker::shaped_devices_watcher(), shaped_devices_tracker::network_json_watcher(), anonymous_usage::start_anonymous_usage(), - throughput_tracker::spawn_throughput_monitor(long_term_stats_tx.clone()), + throughput_tracker::spawn_throughput_monitor(long_term_stats_tx.clone(), flow_tx), ); spawn_queue_monitor(); diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data.rs b/src/rust/lqosd/src/throughput_tracker/flow_data.rs index f8280deb..9e6e28a8 100644 --- a/src/rust/lqosd/src/throughput_tracker/flow_data.rs +++ b/src/rust/lqosd/src/throughput_tracker/flow_data.rs @@ -1,7 +1,203 @@ use lqos_sys::flowbee_data::{FlowbeeData, FlowbeeKey}; +use lqos_utils::unix_time::time_since_boot; +use nix::sys::time::TimeValLike; use once_cell::sync::Lazy; -use std::sync::Mutex; +use std::{net::{IpAddr, UdpSocket}, sync::{mpsc::{channel, Sender}, Mutex}}; pub static ALL_FLOWS: Lazy>> = Lazy::new(|| Mutex::new(Vec::with_capacity(128_000))); +// Creates the netflow tracker and returns the sender +pub fn setup_netflow_tracker() -> Sender<(FlowbeeKey, FlowbeeData)> { + let (tx, rx) = channel::<(FlowbeeKey, FlowbeeData)>(); + let config = lqos_config::load_config().unwrap(); + + std::thread::spawn(move || { + log::info!("Starting the network flow tracker back-end"); + + // Build the endpoints list + let mut endpoints: Vec> = Vec::new(); + if let Some(flow_config) = config.flows { + if let (Some(ip), Some(port), Some(version)) = (flow_config.netflow_ip, flow_config.netflow_port, flow_config.netflow_version) + { + log::info!("Setting up netflow target: {ip}:{port}, version: {version}"); + let target = format!("{ip}:{port}", ip = ip, port = port); + match version { + 5 => { + let endpoint = Netflow5::new(target).unwrap(); + endpoints.push(Box::new(endpoint)); + log::info!("Netflow 5 endpoint added"); + } + _ => log::error!("Unsupported netflow version: {version}"), + } + } + + } + + // Send to all endpoints upon receipt + while let Ok((key, value)) = rx.recv() { + endpoints.iter_mut().for_each(|f| f.send(key.clone(), value.clone())); + } + log::info!("Network flow tracker back-end has stopped") + }); + + tx +} + +trait FlowbeeRecipient { + fn send(&mut self, key: FlowbeeKey, data: FlowbeeData); +} + +struct Netflow5 { + socket: UdpSocket, + sequence: u32, + target: String, +} + +impl Netflow5 { + fn new(target: String) -> anyhow::Result { + let socket = UdpSocket::bind("0.0.0.0:12212")?; + Ok(Self { socket, sequence: 0, target }) + } +} + +impl FlowbeeRecipient for Netflow5 { + fn send(&mut self, key: FlowbeeKey, data: FlowbeeData) { + if let Ok((packet1, packet2)) = to_netflow_5(&key, &data) { + let header = Netflow5Header::new(self.sequence); + let header_bytes = unsafe { std::slice::from_raw_parts(&header as *const _ as *const u8, std::mem::size_of::()) }; + let packet1_bytes = unsafe { std::slice::from_raw_parts(&packet1 as *const _ as *const u8, std::mem::size_of::()) }; + let packet2_bytes = unsafe { std::slice::from_raw_parts(&packet2 as *const _ as *const u8, std::mem::size_of::()) }; + let mut buffer = Vec::with_capacity(header_bytes.len() + packet1_bytes.len() + packet2_bytes.len()); + buffer.extend_from_slice(header_bytes); + buffer.extend_from_slice(packet1_bytes); + buffer.extend_from_slice(packet2_bytes); + + log::debug!("Sending netflow packet to {target}", target = self.target); + self.socket.send_to(&buffer, &self.target).unwrap(); + + self.sequence = self.sequence.wrapping_add(2); + } + } +} + +#[repr(C)] +struct Netflow5Header { + version: u16, + count: u16, + sys_uptime: u32, + unix_secs: u32, + unix_nsecs: u32, + flow_sequence: u32, + engine_type: u8, + engine_id: u8, + sampling_interval: u16, +} + +impl Netflow5Header { + fn new(flow_sequence: u32) -> Self { + let uptime = time_since_boot().unwrap(); + + Self { + version: 5, + count: 2, + sys_uptime: uptime.num_milliseconds() as u32, + unix_secs: uptime.num_seconds() as u32, + unix_nsecs: 0, + flow_sequence, + engine_type: 0, + engine_id: 0, + sampling_interval: 0, + } + } + +} + +#[repr(C)] +struct Netflow5Record { + src_addr: u32, + dst_addr: u32, + next_hop: u32, + input: u16, + output: u16, + d_pkts: u32, + d_octets: u32, + first: u32, + last: u32, + src_port: u16, + dst_port: u16, + pad1: u8, + tcp_flags: u8, + prot: u8, + tos: u8, + src_as: u16, + dst_as: u16, + src_mask: u8, + dst_mask: u8, + pad2: u16, +} + +fn to_netflow_5(key: &FlowbeeKey, data: &FlowbeeData) -> anyhow::Result<(Netflow5Record, Netflow5Record)> { + // TODO: Detect overflow + let local = key.local_ip.as_ip(); + let remote = key.remote_ip.as_ip(); + if let (IpAddr::V4(local), IpAddr::V4(remote)) = (local, remote) { + let src_ip = u32::from_ne_bytes(local.octets()); + let dst_ip = u32::from_ne_bytes(remote.octets()); + // Convert d_pkts to network order + let d_pkts = (data.packets_sent[0] as u32).to_be(); + let d_octets = (data.bytes_sent[0] as u32).to_be(); + let d_pkts2 = (data.packets_sent[1] as u32).to_be(); + let d_octets2 = (data.bytes_sent[1] as u32).to_be(); + + let record = Netflow5Record { + src_addr: src_ip, + dst_addr: dst_ip, + next_hop: 0, + input: 0, + output: 1, + d_pkts, + d_octets, + first: data.start_time as u32, // Convert to milliseconds + last: data.last_seen as u32, // Convert to milliseconds + src_port: key.src_port.to_be(), + dst_port: key.dst_port.to_be(), + pad1: 0, + tcp_flags: 0, + prot: key.ip_protocol.to_be(), + tos: 0, + src_as: 0, + dst_as: 0, + src_mask: 0, + dst_mask: 0, + pad2: 0, + }; + + let record2 = Netflow5Record { + src_addr: dst_ip, + dst_addr: src_ip, + next_hop: 0, + input: 1, + output: 0, + d_pkts: d_pkts2, + d_octets: d_octets2, + first: data.start_time as u32, // Convert to milliseconds + last: data.last_seen as u32, // Convert to milliseconds + src_port: key.dst_port.to_be(), + dst_port: key.src_port.to_be(), + pad1: 0, + tcp_flags: 0, + prot: key.ip_protocol.to_be(), + tos: 0, + src_as: 0, + dst_as: 0, + src_mask: 0, + dst_mask: 0, + pad2: 0, + }; + + Ok((record, record2)) + } else { + Err(anyhow::anyhow!("Only IPv4 is supported")) + } +} \ No newline at end of file diff --git a/src/rust/lqosd/src/throughput_tracker/mod.rs b/src/rust/lqosd/src/throughput_tracker/mod.rs index f70e73d1..b6164d81 100644 --- a/src/rust/lqosd/src/throughput_tracker/mod.rs +++ b/src/rust/lqosd/src/throughput_tracker/mod.rs @@ -9,6 +9,7 @@ use crate::{ pub use heimdall_data::get_flow_stats; use log::{info, warn}; use lqos_bus::{BusResponse, FlowbeeProtocol, IpStats, TcHandle, TopFlowType, XdpPpingResult}; +use lqos_sys::flowbee_data::{FlowbeeData, FlowbeeKey}; use lqos_utils::{unix_time::time_since_boot, XdpIpAddress}; use lts_client::collector::{StatsUpdateMessage, ThroughputSummary, HostSummary}; use once_cell::sync::Lazy; @@ -30,19 +31,50 @@ pub static THROUGHPUT_TRACKER: Lazy = Lazy::new(ThroughputTra /// /// * `long_term_stats_tx` - an optional MPSC sender to notify the /// collection thread that there is fresh data. -pub async fn spawn_throughput_monitor(long_term_stats_tx: Sender) { +pub async fn spawn_throughput_monitor( + long_term_stats_tx: Sender, + netflow_sender: std::sync::mpsc::Sender<(FlowbeeKey, FlowbeeData)>, +) { info!("Starting the bandwidth monitor thread."); let interval_ms = 1000; // 1 second info!("Bandwidth check period set to {interval_ms} ms."); - tokio::spawn(throughput_task(interval_ms, long_term_stats_tx)); + tokio::spawn(throughput_task(interval_ms, long_term_stats_tx, netflow_sender)); } -async fn throughput_task(interval_ms: u64, long_term_stats_tx: Sender) { +async fn throughput_task( + interval_ms: u64, + long_term_stats_tx: Sender, + netflow_sender: std::sync::mpsc::Sender<(FlowbeeKey, FlowbeeData)> +) { + // Obtain the flow timeout from the config, default to 30 seconds + let timeout_seconds = if let Ok(config) = lqos_config::load_config() { + if let Some(flow_config) = config.flows { + flow_config.flow_timeout_seconds + } else { + 30 + } + } else { + 30 + }; + + // Obtain the netflow_enabled from the config, default to false + let netflow_enabled = if let Ok(config) = lqos_config::load_config() { + if let Some(flow_config) = config.flows { + flow_config.netflow_enabled + } else { + false + } + } else { + false + }; + + loop { let start = Instant::now(); // Perform the stats collection in a blocking thread, ensuring that // the tokio runtime is not blocked. + let my_netflow_sender = netflow_sender.clone(); if let Err(e) = tokio::task::spawn_blocking(move || { { @@ -51,7 +83,11 @@ async fn throughput_task(interval_ms: u64, long_term_stats_tx: Sender BusResponse { retries: flow.retries, last_rtt: flow.last_rtt, end_status: flow.end_status, + tos: flow.tos, + flags: flow.flags, }); } @@ -537,6 +575,8 @@ pub fn all_unknown_ips() -> BusResponse { retries: flow.retries, last_rtt: flow.last_rtt, end_status: flow.end_status, + tos: flow.tos, + flags: flow.flags, } }) .collect(); diff --git a/src/rust/lqosd/src/throughput_tracker/tracking_data.rs b/src/rust/lqosd/src/throughput_tracker/tracking_data.rs index 59e67983..dddcea01 100644 --- a/src/rust/lqosd/src/throughput_tracker/tracking_data.rs +++ b/src/rust/lqosd/src/throughput_tracker/tracking_data.rs @@ -3,7 +3,7 @@ use crate::{shaped_devices_tracker::{SHAPED_DEVICES, NETWORK_JSON}, stats::{HIGH use super::{flow_data::ALL_FLOWS, throughput_entry::ThroughputEntry, RETIRE_AFTER_SECONDS}; use dashmap::DashMap; use lqos_bus::TcHandle; -use lqos_sys::{iterate_flows, throughput_for_each}; +use lqos_sys::{flowbee_data::{FlowbeeData, FlowbeeKey}, iterate_flows, throughput_for_each}; use lqos_utils::{unix_time::time_since_boot, XdpIpAddress}; pub struct ThroughputTracker { @@ -168,16 +168,41 @@ impl ThroughputTracker { }); } - pub(crate) fn apply_flow_data(&self) { + pub(crate) fn apply_flow_data( + &self, + timeout_seconds: u64, + netflow_enabled: bool, + sender: std::sync::mpsc::Sender<(FlowbeeKey, FlowbeeData)>, + ) { let self_cycle = self.cycle.load(std::sync::atomic::Ordering::Relaxed); if let Ok(now) = time_since_boot() { let since_boot = Duration::from(now); - let expire = (since_boot - Duration::from_secs(60)).as_nanos() as u64; + let expire = (since_boot - Duration::from_secs(timeout_seconds)).as_nanos() as u64; + + // Track the expired keys + let mut expired_keys = Vec::new(); + if let Ok(mut flow_lock) = ALL_FLOWS.try_lock() { flow_lock.clear(); // Remove all previous values + + // Track through all the flows iterate_flows(&mut |key, data| { - if data.last_seen > expire { + + if data.end_status == 2 { + // The flow has been handled already and should be ignored + return; + } + + if data.last_seen < expire { + // This flow has expired. Add it to the list to be cleaned + expired_keys.push(key.clone()); + + // Send it off to netperf for analysis if we are supporting doing so. + if netflow_enabled { + let _ = sender.send((key.clone(), data.clone())); + } + } else { // We have a valid flow, so it needs to be tracked flow_lock.push((key.clone(), data.clone())); @@ -202,7 +227,14 @@ impl ThroughputTracker { } } } - }); + }); // End flow iterator + + if !expired_keys.is_empty() { + let ret = lqos_sys::end_flows(&mut expired_keys); + if let Err(e) = ret { + log::warn!("Failed to end flows: {:?}", e); + } + } } else { log::warn!("Failed to lock ALL_FLOWS"); } From f0ddbe62f8d225b76cdfcb74f668368611e45c6f Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Tue, 5 Mar 2024 09:31:49 -0600 Subject: [PATCH 032/103] Netflow V5 is largely working. Still a few kinks to work out, but the exporter sends them out - and they are received correctly by my test ehnt setup. --- .../lqosd/src/throughput_tracker/flow_data.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data.rs b/src/rust/lqosd/src/throughput_tracker/flow_data.rs index 9e6e28a8..f82e0995 100644 --- a/src/rust/lqosd/src/throughput_tracker/flow_data.rs +++ b/src/rust/lqosd/src/throughput_tracker/flow_data.rs @@ -73,7 +73,7 @@ impl FlowbeeRecipient for Netflow5 { buffer.extend_from_slice(packet1_bytes); buffer.extend_from_slice(packet2_bytes); - log::debug!("Sending netflow packet to {target}", target = self.target); + //log::debug!("Sending netflow packet to {target}", target = self.target); self.socket.send_to(&buffer, &self.target).unwrap(); self.sequence = self.sequence.wrapping_add(2); @@ -99,10 +99,10 @@ impl Netflow5Header { let uptime = time_since_boot().unwrap(); Self { - version: 5, - count: 2, - sys_uptime: uptime.num_milliseconds() as u32, - unix_secs: uptime.num_seconds() as u32, + version: (5u16).to_be(), + count: (2u16).to_be(), + sys_uptime: (uptime.num_milliseconds() as u32).to_be(), + unix_secs: (uptime.num_seconds() as u32).to_be(), unix_nsecs: 0, flow_sequence, engine_type: 0, @@ -154,12 +154,12 @@ fn to_netflow_5(key: &FlowbeeKey, data: &FlowbeeData) -> anyhow::Result<(Netflow src_addr: src_ip, dst_addr: dst_ip, next_hop: 0, - input: 0, - output: 1, + input: (0u16).to_be(), + output: (1u16).to_be(), d_pkts, d_octets, - first: data.start_time as u32, // Convert to milliseconds - last: data.last_seen as u32, // Convert to milliseconds + first: ((data.start_time / 1_000_000) as u32).to_be(), // Convert to milliseconds + last: ((data.last_seen / 1_000_000) as u32).to_be(), // Convert to milliseconds src_port: key.src_port.to_be(), dst_port: key.dst_port.to_be(), pad1: 0, From a5bef2851a0232b67a82c553e7eb621cc00630d9 Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Tue, 5 Mar 2024 11:02:47 -0600 Subject: [PATCH 033/103] Refactor to clean code up. --- .../lqosd/src/throughput_tracker/flow_data.rs | 203 ------------------ .../flow_data/flow_tracker.rs | 9 + .../src/throughput_tracker/flow_data/mod.rs | 51 +++++ .../flow_data/netflow5/mod.rs | 39 ++++ .../flow_data/netflow5/protocol.rs | 131 +++++++++++ 5 files changed, 230 insertions(+), 203 deletions(-) delete mode 100644 src/rust/lqosd/src/throughput_tracker/flow_data.rs create mode 100644 src/rust/lqosd/src/throughput_tracker/flow_data/flow_tracker.rs create mode 100644 src/rust/lqosd/src/throughput_tracker/flow_data/mod.rs create mode 100644 src/rust/lqosd/src/throughput_tracker/flow_data/netflow5/mod.rs create mode 100644 src/rust/lqosd/src/throughput_tracker/flow_data/netflow5/protocol.rs diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data.rs b/src/rust/lqosd/src/throughput_tracker/flow_data.rs deleted file mode 100644 index f82e0995..00000000 --- a/src/rust/lqosd/src/throughput_tracker/flow_data.rs +++ /dev/null @@ -1,203 +0,0 @@ -use lqos_sys::flowbee_data::{FlowbeeData, FlowbeeKey}; -use lqos_utils::unix_time::time_since_boot; -use nix::sys::time::TimeValLike; -use once_cell::sync::Lazy; -use std::{net::{IpAddr, UdpSocket}, sync::{mpsc::{channel, Sender}, Mutex}}; - -pub static ALL_FLOWS: Lazy>> = - Lazy::new(|| Mutex::new(Vec::with_capacity(128_000))); - -// Creates the netflow tracker and returns the sender -pub fn setup_netflow_tracker() -> Sender<(FlowbeeKey, FlowbeeData)> { - let (tx, rx) = channel::<(FlowbeeKey, FlowbeeData)>(); - let config = lqos_config::load_config().unwrap(); - - std::thread::spawn(move || { - log::info!("Starting the network flow tracker back-end"); - - // Build the endpoints list - let mut endpoints: Vec> = Vec::new(); - if let Some(flow_config) = config.flows { - if let (Some(ip), Some(port), Some(version)) = (flow_config.netflow_ip, flow_config.netflow_port, flow_config.netflow_version) - { - log::info!("Setting up netflow target: {ip}:{port}, version: {version}"); - let target = format!("{ip}:{port}", ip = ip, port = port); - match version { - 5 => { - let endpoint = Netflow5::new(target).unwrap(); - endpoints.push(Box::new(endpoint)); - log::info!("Netflow 5 endpoint added"); - } - _ => log::error!("Unsupported netflow version: {version}"), - } - } - - } - - // Send to all endpoints upon receipt - while let Ok((key, value)) = rx.recv() { - endpoints.iter_mut().for_each(|f| f.send(key.clone(), value.clone())); - } - log::info!("Network flow tracker back-end has stopped") - }); - - tx -} - -trait FlowbeeRecipient { - fn send(&mut self, key: FlowbeeKey, data: FlowbeeData); -} - -struct Netflow5 { - socket: UdpSocket, - sequence: u32, - target: String, -} - -impl Netflow5 { - fn new(target: String) -> anyhow::Result { - let socket = UdpSocket::bind("0.0.0.0:12212")?; - Ok(Self { socket, sequence: 0, target }) - } -} - -impl FlowbeeRecipient for Netflow5 { - fn send(&mut self, key: FlowbeeKey, data: FlowbeeData) { - if let Ok((packet1, packet2)) = to_netflow_5(&key, &data) { - let header = Netflow5Header::new(self.sequence); - let header_bytes = unsafe { std::slice::from_raw_parts(&header as *const _ as *const u8, std::mem::size_of::()) }; - let packet1_bytes = unsafe { std::slice::from_raw_parts(&packet1 as *const _ as *const u8, std::mem::size_of::()) }; - let packet2_bytes = unsafe { std::slice::from_raw_parts(&packet2 as *const _ as *const u8, std::mem::size_of::()) }; - let mut buffer = Vec::with_capacity(header_bytes.len() + packet1_bytes.len() + packet2_bytes.len()); - buffer.extend_from_slice(header_bytes); - buffer.extend_from_slice(packet1_bytes); - buffer.extend_from_slice(packet2_bytes); - - //log::debug!("Sending netflow packet to {target}", target = self.target); - self.socket.send_to(&buffer, &self.target).unwrap(); - - self.sequence = self.sequence.wrapping_add(2); - } - } -} - -#[repr(C)] -struct Netflow5Header { - version: u16, - count: u16, - sys_uptime: u32, - unix_secs: u32, - unix_nsecs: u32, - flow_sequence: u32, - engine_type: u8, - engine_id: u8, - sampling_interval: u16, -} - -impl Netflow5Header { - fn new(flow_sequence: u32) -> Self { - let uptime = time_since_boot().unwrap(); - - Self { - version: (5u16).to_be(), - count: (2u16).to_be(), - sys_uptime: (uptime.num_milliseconds() as u32).to_be(), - unix_secs: (uptime.num_seconds() as u32).to_be(), - unix_nsecs: 0, - flow_sequence, - engine_type: 0, - engine_id: 0, - sampling_interval: 0, - } - } - -} - -#[repr(C)] -struct Netflow5Record { - src_addr: u32, - dst_addr: u32, - next_hop: u32, - input: u16, - output: u16, - d_pkts: u32, - d_octets: u32, - first: u32, - last: u32, - src_port: u16, - dst_port: u16, - pad1: u8, - tcp_flags: u8, - prot: u8, - tos: u8, - src_as: u16, - dst_as: u16, - src_mask: u8, - dst_mask: u8, - pad2: u16, -} - -fn to_netflow_5(key: &FlowbeeKey, data: &FlowbeeData) -> anyhow::Result<(Netflow5Record, Netflow5Record)> { - // TODO: Detect overflow - let local = key.local_ip.as_ip(); - let remote = key.remote_ip.as_ip(); - if let (IpAddr::V4(local), IpAddr::V4(remote)) = (local, remote) { - let src_ip = u32::from_ne_bytes(local.octets()); - let dst_ip = u32::from_ne_bytes(remote.octets()); - // Convert d_pkts to network order - let d_pkts = (data.packets_sent[0] as u32).to_be(); - let d_octets = (data.bytes_sent[0] as u32).to_be(); - let d_pkts2 = (data.packets_sent[1] as u32).to_be(); - let d_octets2 = (data.bytes_sent[1] as u32).to_be(); - - let record = Netflow5Record { - src_addr: src_ip, - dst_addr: dst_ip, - next_hop: 0, - input: (0u16).to_be(), - output: (1u16).to_be(), - d_pkts, - d_octets, - first: ((data.start_time / 1_000_000) as u32).to_be(), // Convert to milliseconds - last: ((data.last_seen / 1_000_000) as u32).to_be(), // Convert to milliseconds - src_port: key.src_port.to_be(), - dst_port: key.dst_port.to_be(), - pad1: 0, - tcp_flags: 0, - prot: key.ip_protocol.to_be(), - tos: 0, - src_as: 0, - dst_as: 0, - src_mask: 0, - dst_mask: 0, - pad2: 0, - }; - - let record2 = Netflow5Record { - src_addr: dst_ip, - dst_addr: src_ip, - next_hop: 0, - input: 1, - output: 0, - d_pkts: d_pkts2, - d_octets: d_octets2, - first: data.start_time as u32, // Convert to milliseconds - last: data.last_seen as u32, // Convert to milliseconds - src_port: key.dst_port.to_be(), - dst_port: key.src_port.to_be(), - pad1: 0, - tcp_flags: 0, - prot: key.ip_protocol.to_be(), - tos: 0, - src_as: 0, - dst_as: 0, - src_mask: 0, - dst_mask: 0, - pad2: 0, - }; - - Ok((record, record2)) - } else { - Err(anyhow::anyhow!("Only IPv4 is supported")) - } -} \ No newline at end of file diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/flow_tracker.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_tracker.rs new file mode 100644 index 00000000..71afc16e --- /dev/null +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_tracker.rs @@ -0,0 +1,9 @@ +//! Provides a globally accessible vector of all flows. This is used to store +//! all flows for the purpose of tracking and data-services. + +use std::sync::Mutex; +use lqos_sys::flowbee_data::{FlowbeeData, FlowbeeKey}; +use once_cell::sync::Lazy; + +pub static ALL_FLOWS: Lazy>> = + Lazy::new(|| Mutex::new(Vec::with_capacity(128_000))); \ No newline at end of file diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/mod.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/mod.rs new file mode 100644 index 00000000..34de1deb --- /dev/null +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/mod.rs @@ -0,0 +1,51 @@ +//! Provides tracking and data-services for per-flow data. Includes implementations +//! of netflow protocols. + +mod netflow5; +mod flow_tracker; + +use lqos_sys::flowbee_data::{FlowbeeData, FlowbeeKey}; +use std::sync::mpsc::{channel, Sender}; +pub(crate) use flow_tracker::ALL_FLOWS; +use crate::throughput_tracker::flow_data::netflow5::Netflow5; + +trait FlowbeeRecipient { + fn send(&mut self, key: FlowbeeKey, data: FlowbeeData); +} + +// Creates the netflow tracker and returns the sender +pub fn setup_netflow_tracker() -> Sender<(FlowbeeKey, FlowbeeData)> { + let (tx, rx) = channel::<(FlowbeeKey, FlowbeeData)>(); + let config = lqos_config::load_config().unwrap(); + + std::thread::spawn(move || { + log::info!("Starting the network flow tracker back-end"); + + // Build the endpoints list + let mut endpoints: Vec> = Vec::new(); + if let Some(flow_config) = config.flows { + if let (Some(ip), Some(port), Some(version)) = (flow_config.netflow_ip, flow_config.netflow_port, flow_config.netflow_version) + { + log::info!("Setting up netflow target: {ip}:{port}, version: {version}"); + let target = format!("{ip}:{port}", ip = ip, port = port); + match version { + 5 => { + let endpoint = Netflow5::new(target).unwrap(); + endpoints.push(Box::new(endpoint)); + log::info!("Netflow 5 endpoint added"); + } + _ => log::error!("Unsupported netflow version: {version}"), + } + } + + } + + // Send to all endpoints upon receipt + while let Ok((key, value)) = rx.recv() { + endpoints.iter_mut().for_each(|f| f.send(key.clone(), value.clone())); + } + log::info!("Network flow tracker back-end has stopped") + }); + + tx +} diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/netflow5/mod.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/netflow5/mod.rs new file mode 100644 index 00000000..c9db0792 --- /dev/null +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/netflow5/mod.rs @@ -0,0 +1,39 @@ +//! Support for the Netflow 5 protocol +mod protocol; +use std::net::UdpSocket; +use lqos_sys::flowbee_data::{FlowbeeData, FlowbeeKey}; +use super::FlowbeeRecipient; +pub(crate) use protocol::*; + +pub(crate) struct Netflow5 { + socket: UdpSocket, + sequence: u32, + target: String, +} + +impl Netflow5 { + pub(crate) fn new(target: String) -> anyhow::Result { + let socket = UdpSocket::bind("0.0.0.0:12212")?; + Ok(Self { socket, sequence: 0, target }) + } +} + +impl FlowbeeRecipient for Netflow5 { + fn send(&mut self, key: FlowbeeKey, data: FlowbeeData) { + if let Ok((packet1, packet2)) = to_netflow_5(&key, &data) { + let header = Netflow5Header::new(self.sequence); + let header_bytes = unsafe { std::slice::from_raw_parts(&header as *const _ as *const u8, std::mem::size_of::()) }; + let packet1_bytes = unsafe { std::slice::from_raw_parts(&packet1 as *const _ as *const u8, std::mem::size_of::()) }; + let packet2_bytes = unsafe { std::slice::from_raw_parts(&packet2 as *const _ as *const u8, std::mem::size_of::()) }; + let mut buffer = Vec::with_capacity(header_bytes.len() + packet1_bytes.len() + packet2_bytes.len()); + buffer.extend_from_slice(header_bytes); + buffer.extend_from_slice(packet1_bytes); + buffer.extend_from_slice(packet2_bytes); + + //log::debug!("Sending netflow packet to {target}", target = self.target); + self.socket.send_to(&buffer, &self.target).unwrap(); + + self.sequence = self.sequence.wrapping_add(2); + } + } +} diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/netflow5/protocol.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/netflow5/protocol.rs new file mode 100644 index 00000000..12f6fefb --- /dev/null +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/netflow5/protocol.rs @@ -0,0 +1,131 @@ +//! Definitions for the actual netflow 5 protocol + +use std::net::IpAddr; +use lqos_sys::flowbee_data::{FlowbeeData, FlowbeeKey}; +use lqos_utils::unix_time::time_since_boot; +use nix::sys::time::TimeValLike; + +/// Standard Netflow 5 header +#[repr(C)] +pub(crate) struct Netflow5Header { + pub(crate) version: u16, + pub(crate) count: u16, + pub(crate) sys_uptime: u32, + pub(crate) unix_secs: u32, + pub(crate) unix_nsecs: u32, + pub(crate) flow_sequence: u32, + pub(crate) engine_type: u8, + pub(crate) engine_id: u8, + pub(crate) sampling_interval: u16, +} + +impl Netflow5Header { + /// Create a new Netflow 5 header + pub(crate) fn new(flow_sequence: u32) -> Self { + let uptime = time_since_boot().unwrap(); + + Self { + version: (5u16).to_be(), + count: (2u16).to_be(), + sys_uptime: (uptime.num_milliseconds() as u32).to_be(), + unix_secs: (uptime.num_seconds() as u32).to_be(), + unix_nsecs: 0, + flow_sequence, + engine_type: 0, + engine_id: 0, + sampling_interval: 0, + } + } + +} + +/// Standard Netflow 5 record +#[repr(C)] +pub(crate) struct Netflow5Record { + pub(crate) src_addr: u32, + pub(crate) dst_addr: u32, + pub(crate) next_hop: u32, + pub(crate) input: u16, + pub(crate) output: u16, + pub(crate) d_pkts: u32, + pub(crate) d_octets: u32, + pub(crate) first: u32, + pub(crate) last: u32, + pub(crate) src_port: u16, + pub(crate) dst_port: u16, + pub(crate) pad1: u8, + pub(crate) tcp_flags: u8, + pub(crate) prot: u8, + pub(crate) tos: u8, + pub(crate) src_as: u16, + pub(crate) dst_as: u16, + pub(crate) src_mask: u8, + pub(crate) dst_mask: u8, + pub(crate) pad2: u16, +} + +/// Convert a Flowbee key and data to a pair of Netflow 5 records +pub(crate) fn to_netflow_5(key: &FlowbeeKey, data: &FlowbeeData) -> anyhow::Result<(Netflow5Record, Netflow5Record)> { + // TODO: Detect overflow + let local = key.local_ip.as_ip(); + let remote = key.remote_ip.as_ip(); + if let (IpAddr::V4(local), IpAddr::V4(remote)) = (local, remote) { + let src_ip = u32::from_ne_bytes(local.octets()); + let dst_ip = u32::from_ne_bytes(remote.octets()); + // Convert d_pkts to network order + let d_pkts = (data.packets_sent[0] as u32).to_be(); + let d_octets = (data.bytes_sent[0] as u32).to_be(); + let d_pkts2 = (data.packets_sent[1] as u32).to_be(); + let d_octets2 = (data.bytes_sent[1] as u32).to_be(); + + let record = Netflow5Record { + src_addr: src_ip, + dst_addr: dst_ip, + next_hop: 0, + input: (0u16).to_be(), + output: (1u16).to_be(), + d_pkts, + d_octets, + first: ((data.start_time / 1_000_000) as u32).to_be(), // Convert to milliseconds + last: ((data.last_seen / 1_000_000) as u32).to_be(), // Convert to milliseconds + src_port: key.src_port.to_be(), + dst_port: key.dst_port.to_be(), + pad1: 0, + tcp_flags: 0, + prot: key.ip_protocol.to_be(), + tos: 0, + src_as: 0, + dst_as: 0, + src_mask: 0, + dst_mask: 0, + pad2: 0, + }; + + let record2 = Netflow5Record { + src_addr: dst_ip, + dst_addr: src_ip, + next_hop: 0, + input: 1, + output: 0, + d_pkts: d_pkts2, + d_octets: d_octets2, + first: data.start_time as u32, // Convert to milliseconds + last: data.last_seen as u32, // Convert to milliseconds + src_port: key.dst_port.to_be(), + dst_port: key.src_port.to_be(), + pad1: 0, + tcp_flags: 0, + prot: key.ip_protocol.to_be(), + tos: 0, + src_as: 0, + dst_as: 0, + src_mask: 0, + dst_mask: 0, + pad2: 0, + }; + + Ok((record, record2)) + } else { + Err(anyhow::anyhow!("Only IPv4 is supported")) + } +} \ No newline at end of file From b649f7004efa0208f69fc1a8ff0872a7a678c29a Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Tue, 5 Mar 2024 14:05:18 -0600 Subject: [PATCH 034/103] Netflow v9. Probably doesn't work yet, committing before I change PC. --- .../src/throughput_tracker/flow_data/mod.rs | 8 +- .../flow_data/netflow5/protocol.rs | 1 - .../flow_data/netflow9/mod.rs | 56 ++++ .../flow_data/netflow9/protocol.rs | 246 ++++++++++++++++++ 4 files changed, 309 insertions(+), 2 deletions(-) create mode 100644 src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/mod.rs create mode 100644 src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/protocol.rs diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/mod.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/mod.rs index 34de1deb..06ad03ce 100644 --- a/src/rust/lqosd/src/throughput_tracker/flow_data/mod.rs +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/mod.rs @@ -2,12 +2,13 @@ //! of netflow protocols. mod netflow5; +mod netflow9; mod flow_tracker; use lqos_sys::flowbee_data::{FlowbeeData, FlowbeeKey}; use std::sync::mpsc::{channel, Sender}; pub(crate) use flow_tracker::ALL_FLOWS; -use crate::throughput_tracker::flow_data::netflow5::Netflow5; +use crate::throughput_tracker::flow_data::{netflow5::Netflow5, netflow9::Netflow9}; trait FlowbeeRecipient { fn send(&mut self, key: FlowbeeKey, data: FlowbeeData); @@ -34,6 +35,11 @@ pub fn setup_netflow_tracker() -> Sender<(FlowbeeKey, FlowbeeData)> { endpoints.push(Box::new(endpoint)); log::info!("Netflow 5 endpoint added"); } + 9 => { + let endpoint = Netflow9::new(target).unwrap(); + endpoints.push(Box::new(endpoint)); + log::info!("Netflow 9 endpoint added"); + } _ => log::error!("Unsupported netflow version: {version}"), } } diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/netflow5/protocol.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/netflow5/protocol.rs index 12f6fefb..da7ee620 100644 --- a/src/rust/lqosd/src/throughput_tracker/flow_data/netflow5/protocol.rs +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/netflow5/protocol.rs @@ -36,7 +36,6 @@ impl Netflow5Header { sampling_interval: 0, } } - } /// Standard Netflow 5 record diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/mod.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/mod.rs new file mode 100644 index 00000000..0db9c094 --- /dev/null +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/mod.rs @@ -0,0 +1,56 @@ +use std::{net::UdpSocket, time::Instant}; + +use lqos_sys::flowbee_data::{FlowbeeData, FlowbeeKey}; + +use self::protocol::{to_netflow_9, Netflow9Header}; + +use super::FlowbeeRecipient; + +mod protocol; + +pub(crate) struct Netflow9 { + socket: UdpSocket, + sequence: u32, + target: String, + last_sent_template: Option, +} + +impl Netflow9 { + pub(crate) fn new(target: String) -> anyhow::Result { + let socket = UdpSocket::bind("0.0.0.0:12212")?; + Ok(Self { socket, sequence: 0, target, last_sent_template: None}) + } +} + +impl FlowbeeRecipient for Netflow9 { + fn send(&mut self, key: FlowbeeKey, data: FlowbeeData) { + let mut needs_template = false; + if let Some(last_sent_template) = self.last_sent_template { + if last_sent_template.elapsed().as_secs() > 60 { + needs_template = true; + } + } else { + needs_template = true; + } + + if needs_template { + let template = protocol::template_data_ipv4(self.sequence); + self.socket.send_to(&template, &self.target).unwrap(); + self.last_sent_template = Some(Instant::now()); + } + + if let Ok((packet1, packet2)) = to_netflow_9(&key, &data) { + let header = Netflow9Header::new(self.sequence); + let header_bytes = unsafe { std::slice::from_raw_parts(&header as *const _ as *const u8, std::mem::size_of::()) }; + let mut buffer = Vec::with_capacity(header_bytes.len() + packet1.len() + packet2.len()); + buffer.extend_from_slice(header_bytes); + buffer.extend_from_slice(&packet1); + buffer.extend_from_slice(&packet2); + + //log::debug!("Sending netflow packet to {target}", target = self.target); + self.socket.send_to(&buffer, &self.target).unwrap(); + + self.sequence = self.sequence.wrapping_add(2); + } + } +} \ No newline at end of file diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/protocol.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/protocol.rs new file mode 100644 index 00000000..86e51e96 --- /dev/null +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/protocol.rs @@ -0,0 +1,246 @@ +//! Protocol definitions for Netflow v9 Data. + +use std::net::IpAddr; + +use lqos_sys::flowbee_data::{FlowbeeData, FlowbeeKey}; +use lqos_utils::unix_time::time_since_boot; +use nix::sys::time::TimeValLike; + +pub(crate) struct Netflow9Header { + pub(crate) version: u16, + pub(crate) count: u16, + pub(crate) sys_uptime: u32, + pub(crate) unix_secs: u32, + pub(crate) package_sequence: u32, + pub(crate) source_id: u32, +} + +impl Netflow9Header { + /// Create a new Netflow 9 header + pub(crate) fn new(flow_sequence: u32) -> Self { + let uptime = time_since_boot().unwrap(); + + Self { + version: (9u16).to_be(), + count: (2u16).to_be(), + sys_uptime: (uptime.num_milliseconds() as u32).to_be(), + unix_secs: (uptime.num_seconds() as u32).to_be(), + package_sequence: flow_sequence, + source_id: 0, + } + } +} + +fn add_field(bytes: &mut Vec, field_type: u16, field_length: u16) { + bytes.extend_from_slice(field_type.to_be_bytes().as_ref()); + bytes.extend_from_slice(field_length.to_be_bytes().as_ref()); +} + +pub fn template_data_ipv4(sequence: u32) -> Vec { + const FIELDS: [(u16, u16); 8] = [ + (1, 4), // IN_BYTES + (2, 4), // IN_PKTS + (4, 1), // PROTOCOL + (7, 4), // L4_SRC_PORT + (8, 4), // IPV4_SRC_ADDR + (11, 4), // L4_DST_PORT + (12, 4), // IPV4_DST_ADDR + (15, 1), // TOS + ]; + + // Build the header + let mut bytes = Vec::new(); + + // Add the flowset_id, id is zero. (See https://netflow.caligare.com/netflow_v9.htm) + // 16 + bytes.push(0); + bytes.push(0); + + // Add the length of the flowset, 4 bytes + const LENGTH: u16 = 4; // TODO: Fixme + bytes.extend_from_slice(LENGTH.to_be_bytes().as_ref()); + + // Add the TemplateID. We're going to use 256 for IPv4. + const TEMPLATE_ID: u16 = 256; + bytes.extend_from_slice(TEMPLATE_ID.to_be_bytes().as_ref()); + + // Add the number of fields in the template + const FIELD_COUNT: u16 = FIELDS.len() as u16; + bytes.extend_from_slice(FIELD_COUNT.to_be_bytes().as_ref()); + + for (field_type, field_length) in FIELDS.iter() { + add_field(&mut bytes, *field_type, *field_length); + } + + bytes +} + +pub fn template_data_ipv6(sequence: u32) -> Vec { + const FIELDS: [(u16, u16); 8] = [ + (1, 4), // IN_BYTES + (2, 4), // IN_PKTS + (4, 1), // PROTOCOL + (7, 4), // L4_SRC_PORT + (27, 16), // IPV6_SRC_ADDR + (11, 4), // L4_DST_PORT + (28, 16), // IPV6_DST_ADDR + (15, 1), // TOS + ]; + + // Build the header + let mut bytes = Vec::new(); + + // Add the flowset_id, id is zero. (See https://netflow.caligare.com/netflow_v9.htm) + // 16 + bytes.push(0); + bytes.push(0); + + // Add the length of the flowset, 4 bytes + const LENGTH: u16 = 4; // TODO: Fixme + bytes.extend_from_slice(LENGTH.to_be_bytes().as_ref()); + + // Add the TemplateID. We're going to use 257 for IPv6. + const TEMPLATE_ID: u16 = 257; + bytes.extend_from_slice(TEMPLATE_ID.to_be_bytes().as_ref()); + + // Add the number of fields in the template + const FIELD_COUNT: u16 = FIELDS.len() as u16; + bytes.extend_from_slice(FIELD_COUNT.to_be_bytes().as_ref()); + + for (field_type, field_length) in FIELDS.iter() { + add_field(&mut bytes, *field_type, *field_length); + } + + bytes +} + +pub(crate) fn to_netflow_9( + key: &FlowbeeKey, + data: &FlowbeeData, +) -> anyhow::Result<(Vec, Vec)> { + if key.local_ip.is_v4() && key.remote_ip.is_v4() { + // Return IPv4 records + Ok((ipv4_record(key, data, 0)?, ipv4_record(key, data, 1)?)) + } else if (!key.local_ip.is_v4()) && (!key.remote_ip.is_v4()) { + // Return IPv6 records + Ok((ipv6_record(key, data, 0)?, ipv6_record(key, data, 1)?)) + } else { + anyhow::bail!("Mixing IPv4 and IPv6 is not supported"); + } +} + +fn ipv4_record(key: &FlowbeeKey, data: &FlowbeeData, direction: usize) -> anyhow::Result> { + // Configure IP directions + let local = key.local_ip.as_ip(); + let remote = key.remote_ip.as_ip(); + if let (IpAddr::V4(local), IpAddr::V4(remote)) = (local, remote) { + let src_ip = u32::from_ne_bytes(local.octets()); + let dst_ip = u32::from_ne_bytes(remote.octets()); + + // Build the field values + let mut field_bytes: Vec = Vec::new(); + + // Bytes Sent + field_bytes.extend_from_slice(&data.bytes_sent[direction].to_be_bytes()); + + // Packet Sent + field_bytes.extend_from_slice(&data.packets_sent[direction].to_be_bytes()); + + // Add the protocol + field_bytes.push(key.ip_protocol); + + // Add the source port + field_bytes.extend_from_slice(&key.src_port.to_be_bytes()); + + // Add the source address + if direction == 0 { + field_bytes.extend_from_slice(&src_ip.to_be_bytes()); + } else { + field_bytes.extend_from_slice(&dst_ip.to_be_bytes()); + } + + // Add the destination port + field_bytes.extend_from_slice(&key.dst_port.to_be_bytes()); + + // Add the destination address + if direction == 0 { + field_bytes.extend_from_slice(&dst_ip.to_be_bytes()); + } else { + field_bytes.extend_from_slice(&src_ip.to_be_bytes()); + } + + // Add the TOS + field_bytes.push(0); + + // Build the actual record + let mut bytes = Vec::new(); + // Add the flowset_id. Template ID is 256 + bytes.extend_from_slice(&(256u16).to_be_bytes()); + + // Add the length. Length includes 2 bytes for flowset and 2 bytes for the length field + // itself. That's odd. + bytes.extend_from_slice(&((field_bytes.len() as u16 + 4).to_be_bytes())); + + Ok(bytes) + } else { + anyhow::bail!("IPv6 data in an IPv4 function was a bad idea"); + } +} + +fn ipv6_record(key: &FlowbeeKey, data: &FlowbeeData, direction: usize) -> anyhow::Result> { + // Configure IP directions + let local = key.local_ip.as_ip(); + let remote = key.remote_ip.as_ip(); + if let (IpAddr::V6(local), IpAddr::V6(remote)) = (local, remote) { + let src_ip = local.octets(); + let dst_ip = remote.octets(); + + // Build the field values + let mut field_bytes: Vec = Vec::new(); + + // Bytes Sent + field_bytes.extend_from_slice(&data.bytes_sent[direction].to_be_bytes()); + + // Packet Sent + field_bytes.extend_from_slice(&data.packets_sent[direction].to_be_bytes()); + + // Add the protocol + field_bytes.push(key.ip_protocol); + + // Add the source port + field_bytes.extend_from_slice(&key.src_port.to_be_bytes()); + + // Add the source address + if direction == 0 { + field_bytes.extend_from_slice(&src_ip); + } else { + field_bytes.extend_from_slice(&dst_ip); + } + + // Add the destination port + field_bytes.extend_from_slice(&key.dst_port.to_be_bytes()); + + // Add the destination address + if direction == 0 { + field_bytes.extend_from_slice(&dst_ip); + } else { + field_bytes.extend_from_slice(&src_ip); + } + + // Add the TOS + field_bytes.push(0); + + // Build the actual record + let mut bytes = Vec::new(); + // Add the flowset_id. Template ID is 257 + bytes.extend_from_slice(&(257u16).to_be_bytes()); + + // Add the length. Length includes 2 bytes for flowset and 2 bytes for the length field + // itself. That's odd. + bytes.extend_from_slice(&((field_bytes.len() as u16 + 4).to_be_bytes())); + + Ok(bytes) + } else { + anyhow::bail!("IPv4 data in an IPv6 function was a bad idea"); + } +} \ No newline at end of file From 10c56f9353b580eaf55b22076bfa32f260974321 Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Tue, 5 Mar 2024 14:54:57 -0600 Subject: [PATCH 035/103] This time with a theoretically valid header... --- .../src/throughput_tracker/flow_data/netflow9/mod.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/mod.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/mod.rs index 0db9c094..3396c1b4 100644 --- a/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/mod.rs +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/mod.rs @@ -34,8 +34,14 @@ impl FlowbeeRecipient for Netflow9 { } if needs_template { - let template = protocol::template_data_ipv4(self.sequence); - self.socket.send_to(&template, &self.target).unwrap(); + // Get the header, ipv4 template and ipv6 templates and send them all. + let header = Netflow9Header::new(self.sequence); + let header_bytes = unsafe { std::slice::from_raw_parts(&header as *const _ as *const u8, std::mem::size_of::()) }; + let mut buffer = Vec::with_capacity(header_bytes.len()); + buffer.extend_from_slice(header_bytes); + buffer.extend_from_slice(&protocol::template_data_ipv4(self.sequence)); + buffer.extend_from_slice(&protocol::template_data_ipv6(self.sequence)); + self.socket.send_to(&buffer, &self.target).unwrap(); self.last_sent_template = Some(Instant::now()); } From 34a2ec7b888018142332a6fbba6ae35ba28ef4f3 Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Thu, 7 Mar 2024 11:19:02 -0600 Subject: [PATCH 036/103] Refactor netflow v9 into readable code, and the IPv4 version is at least somewhat working now. --- .../flow_data/netflow9/mod.rs | 38 +-- .../flow_data/netflow9/protocol.rs | 246 ------------------ .../netflow9/protocol/field_encoder.rs | 70 +++++ .../netflow9/protocol/field_types.rs | 82 ++++++ .../flow_data/netflow9/protocol/header.rs | 28 ++ .../flow_data/netflow9/protocol/mod.rs | 131 ++++++++++ .../netflow9/protocol/template_ipv4.rs | 42 +++ .../netflow9/protocol/template_ipv6.rs | 40 +++ 8 files changed, 401 insertions(+), 276 deletions(-) delete mode 100644 src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/protocol.rs create mode 100644 src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/protocol/field_encoder.rs create mode 100644 src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/protocol/field_types.rs create mode 100644 src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/protocol/header.rs create mode 100644 src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/protocol/mod.rs create mode 100644 src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/protocol/template_ipv4.rs create mode 100644 src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/protocol/template_ipv6.rs diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/mod.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/mod.rs index 3396c1b4..76b5d3e0 100644 --- a/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/mod.rs +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/mod.rs @@ -1,59 +1,37 @@ -use std::{net::UdpSocket, time::Instant}; - +use std::net::UdpSocket; use lqos_sys::flowbee_data::{FlowbeeData, FlowbeeKey}; +use crate::throughput_tracker::flow_data::netflow9::protocol::{header::Netflow9Header, template_ipv4::template_data_ipv4, template_ipv6::template_data_ipv6}; -use self::protocol::{to_netflow_9, Netflow9Header}; - +use self::protocol::to_netflow_9; use super::FlowbeeRecipient; - mod protocol; pub(crate) struct Netflow9 { socket: UdpSocket, sequence: u32, target: String, - last_sent_template: Option, } impl Netflow9 { pub(crate) fn new(target: String) -> anyhow::Result { let socket = UdpSocket::bind("0.0.0.0:12212")?; - Ok(Self { socket, sequence: 0, target, last_sent_template: None}) + Ok(Self { socket, sequence: 0, target }) } } impl FlowbeeRecipient for Netflow9 { fn send(&mut self, key: FlowbeeKey, data: FlowbeeData) { - let mut needs_template = false; - if let Some(last_sent_template) = self.last_sent_template { - if last_sent_template.elapsed().as_secs() > 60 { - needs_template = true; - } - } else { - needs_template = true; - } - - if needs_template { - // Get the header, ipv4 template and ipv6 templates and send them all. - let header = Netflow9Header::new(self.sequence); - let header_bytes = unsafe { std::slice::from_raw_parts(&header as *const _ as *const u8, std::mem::size_of::()) }; - let mut buffer = Vec::with_capacity(header_bytes.len()); - buffer.extend_from_slice(header_bytes); - buffer.extend_from_slice(&protocol::template_data_ipv4(self.sequence)); - buffer.extend_from_slice(&protocol::template_data_ipv6(self.sequence)); - self.socket.send_to(&buffer, &self.target).unwrap(); - self.last_sent_template = Some(Instant::now()); - } - if let Ok((packet1, packet2)) = to_netflow_9(&key, &data) { - let header = Netflow9Header::new(self.sequence); + let header = Netflow9Header::new(self.sequence, 4); let header_bytes = unsafe { std::slice::from_raw_parts(&header as *const _ as *const u8, std::mem::size_of::()) }; let mut buffer = Vec::with_capacity(header_bytes.len() + packet1.len() + packet2.len()); buffer.extend_from_slice(header_bytes); + buffer.extend_from_slice(&template_data_ipv4()); + buffer.extend_from_slice(&template_data_ipv6()); buffer.extend_from_slice(&packet1); buffer.extend_from_slice(&packet2); - //log::debug!("Sending netflow packet to {target}", target = self.target); + log::debug!("Sending netflow9 packet of size {} to {}", buffer.len(), self.target); self.socket.send_to(&buffer, &self.target).unwrap(); self.sequence = self.sequence.wrapping_add(2); diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/protocol.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/protocol.rs deleted file mode 100644 index 86e51e96..00000000 --- a/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/protocol.rs +++ /dev/null @@ -1,246 +0,0 @@ -//! Protocol definitions for Netflow v9 Data. - -use std::net::IpAddr; - -use lqos_sys::flowbee_data::{FlowbeeData, FlowbeeKey}; -use lqos_utils::unix_time::time_since_boot; -use nix::sys::time::TimeValLike; - -pub(crate) struct Netflow9Header { - pub(crate) version: u16, - pub(crate) count: u16, - pub(crate) sys_uptime: u32, - pub(crate) unix_secs: u32, - pub(crate) package_sequence: u32, - pub(crate) source_id: u32, -} - -impl Netflow9Header { - /// Create a new Netflow 9 header - pub(crate) fn new(flow_sequence: u32) -> Self { - let uptime = time_since_boot().unwrap(); - - Self { - version: (9u16).to_be(), - count: (2u16).to_be(), - sys_uptime: (uptime.num_milliseconds() as u32).to_be(), - unix_secs: (uptime.num_seconds() as u32).to_be(), - package_sequence: flow_sequence, - source_id: 0, - } - } -} - -fn add_field(bytes: &mut Vec, field_type: u16, field_length: u16) { - bytes.extend_from_slice(field_type.to_be_bytes().as_ref()); - bytes.extend_from_slice(field_length.to_be_bytes().as_ref()); -} - -pub fn template_data_ipv4(sequence: u32) -> Vec { - const FIELDS: [(u16, u16); 8] = [ - (1, 4), // IN_BYTES - (2, 4), // IN_PKTS - (4, 1), // PROTOCOL - (7, 4), // L4_SRC_PORT - (8, 4), // IPV4_SRC_ADDR - (11, 4), // L4_DST_PORT - (12, 4), // IPV4_DST_ADDR - (15, 1), // TOS - ]; - - // Build the header - let mut bytes = Vec::new(); - - // Add the flowset_id, id is zero. (See https://netflow.caligare.com/netflow_v9.htm) - // 16 - bytes.push(0); - bytes.push(0); - - // Add the length of the flowset, 4 bytes - const LENGTH: u16 = 4; // TODO: Fixme - bytes.extend_from_slice(LENGTH.to_be_bytes().as_ref()); - - // Add the TemplateID. We're going to use 256 for IPv4. - const TEMPLATE_ID: u16 = 256; - bytes.extend_from_slice(TEMPLATE_ID.to_be_bytes().as_ref()); - - // Add the number of fields in the template - const FIELD_COUNT: u16 = FIELDS.len() as u16; - bytes.extend_from_slice(FIELD_COUNT.to_be_bytes().as_ref()); - - for (field_type, field_length) in FIELDS.iter() { - add_field(&mut bytes, *field_type, *field_length); - } - - bytes -} - -pub fn template_data_ipv6(sequence: u32) -> Vec { - const FIELDS: [(u16, u16); 8] = [ - (1, 4), // IN_BYTES - (2, 4), // IN_PKTS - (4, 1), // PROTOCOL - (7, 4), // L4_SRC_PORT - (27, 16), // IPV6_SRC_ADDR - (11, 4), // L4_DST_PORT - (28, 16), // IPV6_DST_ADDR - (15, 1), // TOS - ]; - - // Build the header - let mut bytes = Vec::new(); - - // Add the flowset_id, id is zero. (See https://netflow.caligare.com/netflow_v9.htm) - // 16 - bytes.push(0); - bytes.push(0); - - // Add the length of the flowset, 4 bytes - const LENGTH: u16 = 4; // TODO: Fixme - bytes.extend_from_slice(LENGTH.to_be_bytes().as_ref()); - - // Add the TemplateID. We're going to use 257 for IPv6. - const TEMPLATE_ID: u16 = 257; - bytes.extend_from_slice(TEMPLATE_ID.to_be_bytes().as_ref()); - - // Add the number of fields in the template - const FIELD_COUNT: u16 = FIELDS.len() as u16; - bytes.extend_from_slice(FIELD_COUNT.to_be_bytes().as_ref()); - - for (field_type, field_length) in FIELDS.iter() { - add_field(&mut bytes, *field_type, *field_length); - } - - bytes -} - -pub(crate) fn to_netflow_9( - key: &FlowbeeKey, - data: &FlowbeeData, -) -> anyhow::Result<(Vec, Vec)> { - if key.local_ip.is_v4() && key.remote_ip.is_v4() { - // Return IPv4 records - Ok((ipv4_record(key, data, 0)?, ipv4_record(key, data, 1)?)) - } else if (!key.local_ip.is_v4()) && (!key.remote_ip.is_v4()) { - // Return IPv6 records - Ok((ipv6_record(key, data, 0)?, ipv6_record(key, data, 1)?)) - } else { - anyhow::bail!("Mixing IPv4 and IPv6 is not supported"); - } -} - -fn ipv4_record(key: &FlowbeeKey, data: &FlowbeeData, direction: usize) -> anyhow::Result> { - // Configure IP directions - let local = key.local_ip.as_ip(); - let remote = key.remote_ip.as_ip(); - if let (IpAddr::V4(local), IpAddr::V4(remote)) = (local, remote) { - let src_ip = u32::from_ne_bytes(local.octets()); - let dst_ip = u32::from_ne_bytes(remote.octets()); - - // Build the field values - let mut field_bytes: Vec = Vec::new(); - - // Bytes Sent - field_bytes.extend_from_slice(&data.bytes_sent[direction].to_be_bytes()); - - // Packet Sent - field_bytes.extend_from_slice(&data.packets_sent[direction].to_be_bytes()); - - // Add the protocol - field_bytes.push(key.ip_protocol); - - // Add the source port - field_bytes.extend_from_slice(&key.src_port.to_be_bytes()); - - // Add the source address - if direction == 0 { - field_bytes.extend_from_slice(&src_ip.to_be_bytes()); - } else { - field_bytes.extend_from_slice(&dst_ip.to_be_bytes()); - } - - // Add the destination port - field_bytes.extend_from_slice(&key.dst_port.to_be_bytes()); - - // Add the destination address - if direction == 0 { - field_bytes.extend_from_slice(&dst_ip.to_be_bytes()); - } else { - field_bytes.extend_from_slice(&src_ip.to_be_bytes()); - } - - // Add the TOS - field_bytes.push(0); - - // Build the actual record - let mut bytes = Vec::new(); - // Add the flowset_id. Template ID is 256 - bytes.extend_from_slice(&(256u16).to_be_bytes()); - - // Add the length. Length includes 2 bytes for flowset and 2 bytes for the length field - // itself. That's odd. - bytes.extend_from_slice(&((field_bytes.len() as u16 + 4).to_be_bytes())); - - Ok(bytes) - } else { - anyhow::bail!("IPv6 data in an IPv4 function was a bad idea"); - } -} - -fn ipv6_record(key: &FlowbeeKey, data: &FlowbeeData, direction: usize) -> anyhow::Result> { - // Configure IP directions - let local = key.local_ip.as_ip(); - let remote = key.remote_ip.as_ip(); - if let (IpAddr::V6(local), IpAddr::V6(remote)) = (local, remote) { - let src_ip = local.octets(); - let dst_ip = remote.octets(); - - // Build the field values - let mut field_bytes: Vec = Vec::new(); - - // Bytes Sent - field_bytes.extend_from_slice(&data.bytes_sent[direction].to_be_bytes()); - - // Packet Sent - field_bytes.extend_from_slice(&data.packets_sent[direction].to_be_bytes()); - - // Add the protocol - field_bytes.push(key.ip_protocol); - - // Add the source port - field_bytes.extend_from_slice(&key.src_port.to_be_bytes()); - - // Add the source address - if direction == 0 { - field_bytes.extend_from_slice(&src_ip); - } else { - field_bytes.extend_from_slice(&dst_ip); - } - - // Add the destination port - field_bytes.extend_from_slice(&key.dst_port.to_be_bytes()); - - // Add the destination address - if direction == 0 { - field_bytes.extend_from_slice(&dst_ip); - } else { - field_bytes.extend_from_slice(&src_ip); - } - - // Add the TOS - field_bytes.push(0); - - // Build the actual record - let mut bytes = Vec::new(); - // Add the flowset_id. Template ID is 257 - bytes.extend_from_slice(&(257u16).to_be_bytes()); - - // Add the length. Length includes 2 bytes for flowset and 2 bytes for the length field - // itself. That's odd. - bytes.extend_from_slice(&((field_bytes.len() as u16 + 4).to_be_bytes())); - - Ok(bytes) - } else { - anyhow::bail!("IPv4 data in an IPv6 function was a bad idea"); - } -} \ No newline at end of file diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/protocol/field_encoder.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/protocol/field_encoder.rs new file mode 100644 index 00000000..2d5054fa --- /dev/null +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/protocol/field_encoder.rs @@ -0,0 +1,70 @@ +use std::net::IpAddr; + +use lqos_sys::flowbee_data::{FlowbeeData, FlowbeeKey}; +use super::field_types::*; + +pub(crate) fn encode_fields_from_template(template: &[(u16, u16)], direction: usize, key: &FlowbeeKey, data: &FlowbeeData) -> anyhow::Result> { + let src_port = if direction == 0 { key.src_port } else { key.dst_port }; + let dst_port = if direction == 0 { key.dst_port } else { key.src_port }; + + let total_size: u16 = template.iter().map(|(_, size)| size).sum(); + let mut result = Vec::with_capacity(total_size as usize); + for (field_type, field_length) in template.iter() { + match (*field_type, *field_length) { + IN_BYTES => encode_u64(data.bytes_sent[direction], &mut result), + IN_PKTS => encode_u64(data.packets_sent[direction], &mut result), + PROTOCOL => result.push(key.ip_protocol), + L4_SRC_PORT => encode_u16(src_port, &mut result), + L4_DST_PORT => encode_u16(dst_port, &mut result), + DST_TOS => result.push(data.tos), + IPV4_SRC_ADDR => encode_ipv4(0, key, &mut result)?, + IPV4_DST_ADDR => encode_ipv4(1, key, &mut result)?, + IPV6_SRC_ADDR => encode_ipv6(0, key, &mut result)?, + IPV6_DST_ADDR => encode_ipv6(1, key, &mut result)?, + _ => anyhow::bail!("Don't know how to encode field type {} yet", field_type), + } + } + Ok(result) +} + +fn encode_u64(value: u64, target: &mut Vec) { + target.extend_from_slice(&value.to_be_bytes()); +} + +fn encode_u16(value: u16, target: &mut Vec) { + target.extend_from_slice(&value.to_be_bytes()); +} + +fn encode_ipv4(direction: usize, key: &FlowbeeKey, target: &mut Vec) -> anyhow::Result<()> { + let local = key.local_ip.as_ip(); + let remote = key.remote_ip.as_ip(); + if let (IpAddr::V4(local), IpAddr::V4(remote)) = (local, remote) { + let src_ip = u32::from_ne_bytes(local.octets()); + let dst_ip = u32::from_ne_bytes(remote.octets()); + if direction == 0 { + target.extend_from_slice(&src_ip.to_be_bytes()); + } else { + target.extend_from_slice(&dst_ip.to_be_bytes()); + } + } else { + anyhow::bail!("Expected IPv4 addresses, got {:?}", (local, remote)); + } + Ok(()) +} + +fn encode_ipv6(direction: usize, key: &FlowbeeKey, target: &mut Vec) -> anyhow::Result<()> { + let local = key.local_ip.as_ip(); + let remote = key.remote_ip.as_ip(); + if let (IpAddr::V6(local), IpAddr::V6(remote)) = (local, remote) { + let src_ip = local.octets(); + let dst_ip = remote.octets(); + if direction == 0 { + target.extend_from_slice(&src_ip); + } else { + target.extend_from_slice(&dst_ip); + } + } else { + anyhow::bail!("Expected IPv6 addresses, got {:?}", (local, remote)); + } + Ok(()) +} \ No newline at end of file diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/protocol/field_types.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/protocol/field_types.rs new file mode 100644 index 00000000..9cfae8f5 --- /dev/null +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/protocol/field_types.rs @@ -0,0 +1,82 @@ +// Extracted from https://netflow.caligare.com/netflow_v9.htm +#![allow(dead_code)] + +pub(crate) const IN_BYTES:(u16, u16) = (1, 8); +pub(crate) const IN_PKTS:(u16, u16) = (2, 8); +pub(crate) const FLOWS:(u16, u16) = (3, 4); +pub(crate) const PROTOCOL:(u16, u16) = (4, 1); +pub(crate) const SRC_TOS:(u16, u16) = (5, 1); +pub(crate) const TCP_FLAGS:(u16, u16) = (6, 1); +pub(crate) const L4_SRC_PORT:(u16, u16) = (7, 2); +pub(crate) const IPV4_SRC_ADDR:(u16, u16) = (8, 4); +pub(crate) const SRC_MASK:(u16, u16) = (9, 1); +pub(crate) const INPUT_SNMP:(u16, u16) = (10, 2); +pub(crate) const L4_DST_PORT:(u16, u16) = (11, 2); +pub(crate) const IPV4_DST_ADDR:(u16, u16) = (12, 4); +pub(crate) const DST_MASK:(u16, u16) = (13, 1); +pub(crate) const OUTPUT_SNMP:(u16, u16) = (14, 2); +pub(crate) const IPV4_NEXT_HOP:(u16, u16) = (15, 4); +pub(crate) const SRC_AS:(u16, u16) = (16, 2); +pub(crate) const DST_AS:(u16, u16) = (17, 2); +pub(crate) const BGP_IPV4_NEXT_HOP:(u16, u16) = (18, 4); +pub(crate) const MUL_DST_PKTS:(u16, u16) = (19, 4); +pub(crate) const MUL_DST_BYTES:(u16, u16) = (20, 4); +pub(crate) const LAST_SWITCHED:(u16, u16) = (21, 4); +pub(crate) const FIRST_SWITCHED:(u16, u16) = (22, 4); +pub(crate) const OUT_BYTES:(u16, u16) = (23, 4); +pub(crate) const OUT_PKTS:(u16, u16) = (24, 4); +pub(crate) const MIN_PKT_LNGTH:(u16, u16) = (25, 2); +pub(crate) const MAX_PKT_LNGTH:(u16, u16) = (26, 2); +pub(crate) const IPV6_SRC_ADDR:(u16, u16) = (27, 16); +pub(crate) const IPV6_DST_ADDR:(u16, u16) = (28, 16); +pub(crate) const IPV6_SRC_MASK:(u16, u16) = (29, 1); +pub(crate) const IPV6_DST_MASK:(u16, u16) = (30, 1); +pub(crate) const IPV6_FLOW_LABEL:(u16, u16) = (31, 3); +pub(crate) const ICMP_TYPE:(u16, u16) = (32, 2); +pub(crate) const MUL_IGMP_TYPE:(u16, u16) = (33, 1); +pub(crate) const SAMPLING_INTERVAL:(u16, u16) = (34, 4); +pub(crate) const SAMPLING_ALGORITHM:(u16, u16) = (35, 1); +pub(crate) const FLOW_ACTIVE_TIMEOUT:(u16, u16) = (36, 2); +pub(crate) const FLOW_INACTIVE_TIMEOUT:(u16, u16) = (37, 2); +pub(crate) const ENGINE_TYPE:(u16, u16) = (38, 1); +pub(crate) const ENGINE_ID:(u16, u16) = (39, 1); +pub(crate) const TOTAL_BYTES_EXP:(u16, u16) = (40, 4); +pub(crate) const TOTAL_PKTS_EXP:(u16, u16) = (41, 4); +pub(crate) const TOTAL_FLOWS_EXP:(u16, u16) = (42, 4); +pub(crate) const IPV4_SRC_PREFIX:(u16, u16) = (44, 4); +pub(crate) const IPV4_DST_PREFIX:(u16, u16) = (45, 4); +pub(crate) const MPLS_TOP_LABEL_TYPE:(u16, u16) = (46, 1); +pub(crate) const MPLS_TOP_LABEL_IP_ADDR:(u16, u16) = (47, 4); +pub(crate) const FLOW_SAMPLER_ID:(u16, u16) = (48, 1); +pub(crate) const FLOW_SAMPLER_MODE:(u16, u16) = (49, 1); +pub(crate) const FLOW_SAMPLER_RANDOM_INTERVAL:(u16, u16) = (50, 4); +pub(crate) const MIN_TTL:(u16, u16) = (52, 1); +pub(crate) const MAX_TTL:(u16, u16) = (53, 1); +pub(crate) const IPV4_IDENT:(u16, u16) = (54, 2); +pub(crate) const DST_TOS:(u16, u16) = (55, 1); +pub(crate) const IN_SRC_MAC:(u16, u16) = (56, 6); +pub(crate) const OUT_DST_MAC:(u16, u16) = (57, 6); +pub(crate) const SRC_VLAN:(u16, u16) = (58, 2); +pub(crate) const DST_VLAN:(u16, u16) = (59, 2); +pub(crate) const IP_PROTOCOL_VERSION:(u16, u16) = (60, 1); +pub(crate) const DIRECTION:(u16, u16) = (61, 1); +pub(crate) const IPV6_NEXT_HOP:(u16, u16) = (62, 16); +pub(crate) const BPG_IPV6_NEXT_HOP:(u16, u16) = (63, 16); +pub(crate) const IPV6_OPTION_HEADERS:(u16, u16) = (64, 4); +pub(crate) const MPLS_LABEL_1:(u16, u16) = (70, 3); +pub(crate) const MPLS_LABEL_2:(u16, u16) = (71, 3); +pub(crate) const MPLS_LABEL_3:(u16, u16) = (72, 3); +pub(crate) const MPLS_LABEL_4:(u16, u16) = (73, 3); +pub(crate) const MPLS_LABEL_5:(u16, u16) = (74, 3); +pub(crate) const MPLS_LABEL_6:(u16, u16) = (75, 3); +pub(crate) const MPLS_LABEL_7:(u16, u16) = (76, 3); +pub(crate) const MPLS_LABEL_8:(u16, u16) = (77, 3); +pub(crate) const MPLS_LABEL_9:(u16, u16) = (78, 3); +pub(crate) const MPLS_LABEL_10:(u16, u16) = (79, 3); +pub(crate) const IN_DST_MAC:(u16, u16) = (80, 6); +pub(crate) const OUT_SRC_MAC:(u16, u16) = (81, 6); +pub(crate) const IF_NAME:(u16, u16) = (82, 0); +pub(crate) const IF_DESC:(u16, u16) = (83, 0); +pub(crate) const SAMPLER_NAME:(u16, u16) = (84, 0); +pub(crate) const IN_PERMANENT_BYTES:(u16, u16) = (85, 4); +pub(crate) const IN_PERMANENT_PKTS:(u16, u16) = (86, 4); diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/protocol/header.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/protocol/header.rs new file mode 100644 index 00000000..fec54c27 --- /dev/null +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/protocol/header.rs @@ -0,0 +1,28 @@ +use lqos_utils::unix_time::time_since_boot; +use nix::sys::time::TimeValLike; + +#[repr(C)] +pub(crate) struct Netflow9Header { + pub(crate) version: u16, + pub(crate) count: u16, + pub(crate) sys_uptime: u32, + pub(crate) unix_secs: u32, + pub(crate) package_sequence: u32, + pub(crate) source_id: u32, +} + +impl Netflow9Header { + /// Create a new Netflow 9 header + pub(crate) fn new(flow_sequence: u32, record_count_including_templates: u16) -> Self { + let uptime = time_since_boot().unwrap(); + + Self { + version: (9u16).to_be(), + count: record_count_including_templates.to_be(), + sys_uptime: (uptime.num_milliseconds() as u32).to_be(), + unix_secs: (uptime.num_seconds() as u32).to_be(), + package_sequence: flow_sequence.to_be(), + source_id: 0, + } + } +} \ No newline at end of file diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/protocol/mod.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/protocol/mod.rs new file mode 100644 index 00000000..60fbc3fc --- /dev/null +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/protocol/mod.rs @@ -0,0 +1,131 @@ +//! Protocol definitions for Netflow v9 Data. +//! Mostly derived from https://netflow.caligare.com/netflow_v9.htm + +use lqos_sys::flowbee_data::{FlowbeeData, FlowbeeKey}; +use std::net::IpAddr; +mod field_types; +use field_types::*; +pub(crate) mod field_encoder; +pub(crate) mod header; +pub(crate) mod template_ipv4; +pub(crate) mod template_ipv6; + +fn add_field(bytes: &mut Vec, field_type: u16, field_length: u16) { + bytes.extend_from_slice(field_type.to_be_bytes().as_ref()); + bytes.extend_from_slice(field_length.to_be_bytes().as_ref()); +} + +pub(crate) fn to_netflow_9( + key: &FlowbeeKey, + data: &FlowbeeData, +) -> anyhow::Result<(Vec, Vec)> { + if key.local_ip.is_v4() && key.remote_ip.is_v4() { + // Return IPv4 records + Ok((ipv4_record(key, data, 0)?, ipv4_record(key, data, 1)?)) + } else if (!key.local_ip.is_v4()) && (!key.remote_ip.is_v4()) { + // Return IPv6 records + Ok((ipv6_record(key, data, 0)?, ipv6_record(key, data, 1)?)) + } else { + anyhow::bail!("Mixing IPv4 and IPv6 is not supported"); + } +} + +fn ipv4_record(key: &FlowbeeKey, data: &FlowbeeData, direction: usize) -> anyhow::Result> { + let field_bytes = field_encoder::encode_fields_from_template( + &template_ipv4::FIELDS_IPV4, + direction, + key, + data, + )?; + + // Build the actual record + let mut bytes = Vec::new(); + // Add the flowset_id. Template ID is 256 + bytes.extend_from_slice(&(256u16).to_be_bytes()); + + // Add the length. Length includes 2 bytes for flowset and 2 bytes for the length field + // itself. That's odd. + let padding = (field_bytes.len() + 4) % 4; + let size = (bytes.len() + field_bytes.len() + padding + 2) as u16; + bytes.extend_from_slice(&size.to_be_bytes()); + + // Add the data itself + bytes.extend_from_slice(&field_bytes); + + println!("Padding: {}", padding); + println!("IPv4 data {} = {}", bytes.len(), size); + println!("Field bytes was: {}", field_bytes.len()); + + // Pad to 32-bits + for _ in 0..padding { + bytes.push(0); + } + + Ok(bytes) +} + +fn ipv6_record(key: &FlowbeeKey, data: &FlowbeeData, direction: usize) -> anyhow::Result> { + // Configure IP directions + let local = key.local_ip.as_ip(); + let remote = key.remote_ip.as_ip(); + if let (IpAddr::V6(local), IpAddr::V6(remote)) = (local, remote) { + let src_ip = local.octets(); + let dst_ip = remote.octets(); + + // Build the field values + let mut field_bytes: Vec = Vec::new(); + + // Bytes Sent + field_bytes.extend_from_slice(&data.bytes_sent[direction].to_be_bytes()); + + // Packet Sent + field_bytes.extend_from_slice(&data.packets_sent[direction].to_be_bytes()); + + // Add the protocol + field_bytes.push(key.ip_protocol); + + // Add the source port + field_bytes.extend_from_slice(&key.src_port.to_be_bytes()); + + // Add the source address + if direction == 0 { + field_bytes.extend_from_slice(&src_ip); + } else { + field_bytes.extend_from_slice(&dst_ip); + } + + // Add the destination port + field_bytes.extend_from_slice(&key.dst_port.to_be_bytes()); + + // Add the destination address + if direction == 0 { + field_bytes.extend_from_slice(&dst_ip); + } else { + field_bytes.extend_from_slice(&src_ip); + } + + // Add the TOS + field_bytes.push(0); + + // Build the actual record + let mut bytes = Vec::new(); + // Add the flowset_id. Template ID is 257 + bytes.extend_from_slice(&(257u16).to_be_bytes()); + + // Add the length. Length includes 2 bytes for flowset and 2 bytes for the length field + // itself. That's odd. + bytes.extend_from_slice(&((field_bytes.len() as u16 + 4).to_be_bytes())); + + // Add the data itself + bytes.extend_from_slice(&field_bytes); + + // Pad to 32-bits + while bytes.len() % 4 != 0 { + bytes.push(0); + } + + Ok(bytes) + } else { + anyhow::bail!("IPv4 data in an IPv6 function was a bad idea"); + } +} diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/protocol/template_ipv4.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/protocol/template_ipv4.rs new file mode 100644 index 00000000..2eaae91c --- /dev/null +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/protocol/template_ipv4.rs @@ -0,0 +1,42 @@ +use crate::throughput_tracker::flow_data::netflow9::protocol::*; + +pub(crate) const FIELDS_IPV4: [(u16, u16); 8] = [ + IN_BYTES, + IN_PKTS, + PROTOCOL, + L4_SRC_PORT, + IPV4_SRC_ADDR, + L4_DST_PORT, + IPV4_DST_ADDR, + DST_TOS, +]; + +pub fn template_data_ipv4() -> Vec { + // Build the header + let mut bytes = Vec::new(); + + // Add the flowset_id, id is zero. (See https://netflow.caligare.com/netflow_v9.htm) + // 16 + bytes.push(0); + bytes.push(0); + + // Add the length of the flowset, 4 bytes + const LENGTH: u16 = 8 + (FIELDS_IPV4.len() * 4) as u16; // TODO: Fixme + bytes.extend_from_slice(LENGTH.to_be_bytes().as_ref()); + + // Add the TemplateID. We're going to use 256 for IPv4. + const TEMPLATE_ID: u16 = 256; + bytes.extend_from_slice(TEMPLATE_ID.to_be_bytes().as_ref()); + + // Add the number of fields in the template + const FIELD_COUNT: u16 = FIELDS_IPV4.len() as u16; + bytes.extend_from_slice(FIELD_COUNT.to_be_bytes().as_ref()); + + for (field_type, field_length) in FIELDS_IPV4.iter() { + add_field(&mut bytes, *field_type, *field_length); + } + + println!("Templatev4 Size {} = {}", bytes.len(), 8 + (FIELDS_IPV4.len() * 2)); + + bytes +} diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/protocol/template_ipv6.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/protocol/template_ipv6.rs new file mode 100644 index 00000000..a27dfba7 --- /dev/null +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/protocol/template_ipv6.rs @@ -0,0 +1,40 @@ +use crate::throughput_tracker::flow_data::netflow9::protocol::*; + +pub(crate) const FIELDS_IPV6: [(u16, u16); 8] = [ + IN_BYTES, + IN_PKTS, + PROTOCOL, + L4_SRC_PORT, + IPV6_SRC_ADDR, + L4_DST_PORT, + IPV6_DST_ADDR, + DST_TOS, +]; + +pub fn template_data_ipv6() -> Vec { + // Build the header + let mut bytes = Vec::new(); + + // Add the flowset_id, id is zero. (See https://netflow.caligare.com/netflow_v9.htm) + // 16 + bytes.push(0); + bytes.push(0); + + // Add the length of the flowset, 4 bytes + const LENGTH: u16 = 8 + (FIELDS_IPV6.len() * 4) as u16; // TODO: Fixme + bytes.extend_from_slice(LENGTH.to_be_bytes().as_ref()); + + // Add the TemplateID. We're going to use 257 for IPv6. + const TEMPLATE_ID: u16 = 257; + bytes.extend_from_slice(TEMPLATE_ID.to_be_bytes().as_ref()); + + // Add the number of fields in the template + const FIELD_COUNT: u16 = FIELDS_IPV6.len() as u16; + bytes.extend_from_slice(FIELD_COUNT.to_be_bytes().as_ref()); + + for (field_type, field_length) in FIELDS_IPV6.iter() { + add_field(&mut bytes, *field_type, *field_length); + } + + bytes +} \ No newline at end of file From 33c1efdd2cc24d6704eda2e77d54e76c4004a3cf Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Thu, 7 Mar 2024 11:27:33 -0600 Subject: [PATCH 037/103] IPv6 encoding matches the same pattern, needs testing. --- .../flow_data/netflow9/protocol/mod.rs | 80 +++++-------------- 1 file changed, 22 insertions(+), 58 deletions(-) diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/protocol/mod.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/protocol/mod.rs index 60fbc3fc..4446644a 100644 --- a/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/protocol/mod.rs +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/protocol/mod.rs @@ -65,67 +65,31 @@ fn ipv4_record(key: &FlowbeeKey, data: &FlowbeeData, direction: usize) -> anyhow } fn ipv6_record(key: &FlowbeeKey, data: &FlowbeeData, direction: usize) -> anyhow::Result> { - // Configure IP directions - let local = key.local_ip.as_ip(); - let remote = key.remote_ip.as_ip(); - if let (IpAddr::V6(local), IpAddr::V6(remote)) = (local, remote) { - let src_ip = local.octets(); - let dst_ip = remote.octets(); + let field_bytes = field_encoder::encode_fields_from_template( + &template_ipv6::FIELDS_IPV6, + direction, + key, + data, + )?; - // Build the field values - let mut field_bytes: Vec = Vec::new(); + // Build the actual record + let mut bytes = Vec::new(); + // Add the flowset_id. Template ID is 257 + bytes.extend_from_slice(&(257u16).to_be_bytes()); - // Bytes Sent - field_bytes.extend_from_slice(&data.bytes_sent[direction].to_be_bytes()); + // Add the length. Length includes 2 bytes for flowset and 2 bytes for the length field + // itself. That's odd. + let padding = (field_bytes.len() + 4) % 4; + let size = (bytes.len() + field_bytes.len() + padding + 2) as u16; + bytes.extend_from_slice(&size.to_be_bytes()); - // Packet Sent - field_bytes.extend_from_slice(&data.packets_sent[direction].to_be_bytes()); + // Add the data itself + bytes.extend_from_slice(&field_bytes); - // Add the protocol - field_bytes.push(key.ip_protocol); - - // Add the source port - field_bytes.extend_from_slice(&key.src_port.to_be_bytes()); - - // Add the source address - if direction == 0 { - field_bytes.extend_from_slice(&src_ip); - } else { - field_bytes.extend_from_slice(&dst_ip); - } - - // Add the destination port - field_bytes.extend_from_slice(&key.dst_port.to_be_bytes()); - - // Add the destination address - if direction == 0 { - field_bytes.extend_from_slice(&dst_ip); - } else { - field_bytes.extend_from_slice(&src_ip); - } - - // Add the TOS - field_bytes.push(0); - - // Build the actual record - let mut bytes = Vec::new(); - // Add the flowset_id. Template ID is 257 - bytes.extend_from_slice(&(257u16).to_be_bytes()); - - // Add the length. Length includes 2 bytes for flowset and 2 bytes for the length field - // itself. That's odd. - bytes.extend_from_slice(&((field_bytes.len() as u16 + 4).to_be_bytes())); - - // Add the data itself - bytes.extend_from_slice(&field_bytes); - - // Pad to 32-bits - while bytes.len() % 4 != 0 { - bytes.push(0); - } - - Ok(bytes) - } else { - anyhow::bail!("IPv4 data in an IPv6 function was a bad idea"); + // Pad to 32-bits + while bytes.len() % 4 != 0 { + bytes.push(0); } + + Ok(bytes) } From 04b0cd42464373e61971d78dbfdfc543ec611750 Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Thu, 7 Mar 2024 11:38:12 -0600 Subject: [PATCH 038/103] Fix warning --- .../src/throughput_tracker/flow_data/netflow9/protocol/mod.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/protocol/mod.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/protocol/mod.rs index 4446644a..97772b94 100644 --- a/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/protocol/mod.rs +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/protocol/mod.rs @@ -2,7 +2,6 @@ //! Mostly derived from https://netflow.caligare.com/netflow_v9.htm use lqos_sys::flowbee_data::{FlowbeeData, FlowbeeKey}; -use std::net::IpAddr; mod field_types; use field_types::*; pub(crate) mod field_encoder; From b7d43567fffaf7c381bec4bec01056669c165e03 Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Thu, 7 Mar 2024 12:51:08 -0600 Subject: [PATCH 039/103] Reworked the NetFlow code to batch packets into single submissions of up to 30 packets at a time. --- .../src/throughput_tracker/flow_data/mod.rs | 32 ++++--- .../flow_data/netflow5/mod.rs | 93 ++++++++++++++----- .../flow_data/netflow5/protocol.rs | 4 +- .../flow_data/netflow9/mod.rs | 77 ++++++++++----- 4 files changed, 149 insertions(+), 57 deletions(-) diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/mod.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/mod.rs index 06ad03ce..133b05f5 100644 --- a/src/rust/lqosd/src/throughput_tracker/flow_data/mod.rs +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/mod.rs @@ -1,17 +1,20 @@ //! Provides tracking and data-services for per-flow data. Includes implementations //! of netflow protocols. +mod flow_tracker; mod netflow5; mod netflow9; -mod flow_tracker; -use lqos_sys::flowbee_data::{FlowbeeData, FlowbeeKey}; -use std::sync::mpsc::{channel, Sender}; -pub(crate) use flow_tracker::ALL_FLOWS; use crate::throughput_tracker::flow_data::{netflow5::Netflow5, netflow9::Netflow9}; +pub(crate) use flow_tracker::ALL_FLOWS; +use lqos_sys::flowbee_data::{FlowbeeData, FlowbeeKey}; +use std::sync::{ + mpsc::{channel, Sender}, + Arc, +}; trait FlowbeeRecipient { - fn send(&mut self, key: FlowbeeKey, data: FlowbeeData); + fn enqueue(&self, key: FlowbeeKey, data: FlowbeeData); } // Creates the netflow tracker and returns the sender @@ -23,32 +26,37 @@ pub fn setup_netflow_tracker() -> Sender<(FlowbeeKey, FlowbeeData)> { log::info!("Starting the network flow tracker back-end"); // Build the endpoints list - let mut endpoints: Vec> = Vec::new(); + let mut endpoints: Vec> = Vec::new(); if let Some(flow_config) = config.flows { - if let (Some(ip), Some(port), Some(version)) = (flow_config.netflow_ip, flow_config.netflow_port, flow_config.netflow_version) - { + if let (Some(ip), Some(port), Some(version)) = ( + flow_config.netflow_ip, + flow_config.netflow_port, + flow_config.netflow_version, + ) { log::info!("Setting up netflow target: {ip}:{port}, version: {version}"); let target = format!("{ip}:{port}", ip = ip, port = port); match version { 5 => { let endpoint = Netflow5::new(target).unwrap(); - endpoints.push(Box::new(endpoint)); + endpoints.push(endpoint); log::info!("Netflow 5 endpoint added"); } 9 => { let endpoint = Netflow9::new(target).unwrap(); - endpoints.push(Box::new(endpoint)); + endpoints.push(endpoint); log::info!("Netflow 9 endpoint added"); } _ => log::error!("Unsupported netflow version: {version}"), } } - } // Send to all endpoints upon receipt while let Ok((key, value)) = rx.recv() { - endpoints.iter_mut().for_each(|f| f.send(key.clone(), value.clone())); + endpoints.iter_mut().for_each(|f| { + log::debug!("Enqueueing flow data for {key:?}"); + f.enqueue(key.clone(), value.clone()); + }); } log::info!("Network flow tracker back-end has stopped") }); diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/netflow5/mod.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/netflow5/mod.rs index c9db0792..e73395b0 100644 --- a/src/rust/lqosd/src/throughput_tracker/flow_data/netflow5/mod.rs +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/netflow5/mod.rs @@ -1,39 +1,90 @@ //! Support for the Netflow 5 protocol +//! Mostly taken from: https://netflow.caligare.com/netflow_v5.htm mod protocol; -use std::net::UdpSocket; -use lqos_sys::flowbee_data::{FlowbeeData, FlowbeeKey}; use super::FlowbeeRecipient; +use lqos_sys::flowbee_data::{FlowbeeData, FlowbeeKey}; pub(crate) use protocol::*; +use std::{ + net::UdpSocket, + sync::{atomic::AtomicU32, Arc, Mutex}, +}; pub(crate) struct Netflow5 { socket: UdpSocket, - sequence: u32, + sequence: AtomicU32, target: String, + send_queue: Mutex>, } impl Netflow5 { - pub(crate) fn new(target: String) -> anyhow::Result { + pub(crate) fn new(target: String) -> anyhow::Result> { let socket = UdpSocket::bind("0.0.0.0:12212")?; - Ok(Self { socket, sequence: 0, target }) + let result = Arc::new(Self { + socket, + sequence: AtomicU32::new(0), + target, + send_queue: Mutex::new(Vec::new()), + }); + let thread_result = result.clone(); + std::thread::spawn(move || thread_result.queue_handler()); + Ok(result) + } + + fn queue_handler(&self) { + loop { + let mut lock = self.send_queue.lock().unwrap(); + if lock.is_empty() { + std::thread::sleep(std::time::Duration::from_millis(100)); + continue; + } + + let send_chunks = lock.chunks(15); + for to_send in send_chunks { + let num_records = (to_send.len() * 2) as u16; + let sequence = self.sequence.load(std::sync::atomic::Ordering::Relaxed); + let header = Netflow5Header::new(sequence, num_records); + let header_bytes = unsafe { + std::slice::from_raw_parts( + &header as *const _ as *const u8, + std::mem::size_of::(), + ) + }; + + let mut buffer = Vec::with_capacity( + header_bytes.len() + to_send.len() * 2 * std::mem::size_of::(), + ); + + buffer.extend_from_slice(header_bytes); + for (key, data) in to_send { + if let Ok((packet1, packet2)) = to_netflow_5(key, data) { + let packet1_bytes = unsafe { + std::slice::from_raw_parts( + &packet1 as *const _ as *const u8, + std::mem::size_of::(), + ) + }; + let packet2_bytes = unsafe { + std::slice::from_raw_parts( + &packet2 as *const _ as *const u8, + std::mem::size_of::(), + ) + }; + buffer.extend_from_slice(packet1_bytes); + buffer.extend_from_slice(packet2_bytes); + } + } + + self.socket.send_to(&buffer, &self.target).unwrap(); + self.sequence.fetch_add(num_records as u32, std::sync::atomic::Ordering::Relaxed); + } + lock.clear(); + } } } impl FlowbeeRecipient for Netflow5 { - fn send(&mut self, key: FlowbeeKey, data: FlowbeeData) { - if let Ok((packet1, packet2)) = to_netflow_5(&key, &data) { - let header = Netflow5Header::new(self.sequence); - let header_bytes = unsafe { std::slice::from_raw_parts(&header as *const _ as *const u8, std::mem::size_of::()) }; - let packet1_bytes = unsafe { std::slice::from_raw_parts(&packet1 as *const _ as *const u8, std::mem::size_of::()) }; - let packet2_bytes = unsafe { std::slice::from_raw_parts(&packet2 as *const _ as *const u8, std::mem::size_of::()) }; - let mut buffer = Vec::with_capacity(header_bytes.len() + packet1_bytes.len() + packet2_bytes.len()); - buffer.extend_from_slice(header_bytes); - buffer.extend_from_slice(packet1_bytes); - buffer.extend_from_slice(packet2_bytes); - - //log::debug!("Sending netflow packet to {target}", target = self.target); - self.socket.send_to(&buffer, &self.target).unwrap(); - - self.sequence = self.sequence.wrapping_add(2); - } + fn enqueue(&self, key: FlowbeeKey, data: FlowbeeData) { + let mut lock = self.send_queue.lock().unwrap(); + lock.push((key, data)); } } diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/netflow5/protocol.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/netflow5/protocol.rs index da7ee620..98d6a2d6 100644 --- a/src/rust/lqosd/src/throughput_tracker/flow_data/netflow5/protocol.rs +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/netflow5/protocol.rs @@ -21,12 +21,12 @@ pub(crate) struct Netflow5Header { impl Netflow5Header { /// Create a new Netflow 5 header - pub(crate) fn new(flow_sequence: u32) -> Self { + pub(crate) fn new(flow_sequence: u32, num_records: u16) -> Self { let uptime = time_since_boot().unwrap(); Self { version: (5u16).to_be(), - count: (2u16).to_be(), + count: num_records.to_be(), sys_uptime: (uptime.num_milliseconds() as u32).to_be(), unix_secs: (uptime.num_seconds() as u32).to_be(), unix_nsecs: 0, diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/mod.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/mod.rs index 76b5d3e0..0de36ef7 100644 --- a/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/mod.rs +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/mod.rs @@ -1,6 +1,8 @@ -use std::net::UdpSocket; +use crate::throughput_tracker::flow_data::netflow9::protocol::{ + header::Netflow9Header, template_ipv4::template_data_ipv4, template_ipv6::template_data_ipv6, +}; use lqos_sys::flowbee_data::{FlowbeeData, FlowbeeKey}; -use crate::throughput_tracker::flow_data::netflow9::protocol::{header::Netflow9Header, template_ipv4::template_data_ipv4, template_ipv6::template_data_ipv6}; +use std::{net::UdpSocket, sync::{atomic::AtomicU32, Arc, Mutex}}; use self::protocol::to_netflow_9; use super::FlowbeeRecipient; @@ -8,33 +10,64 @@ mod protocol; pub(crate) struct Netflow9 { socket: UdpSocket, - sequence: u32, + sequence: AtomicU32, target: String, + send_queue: Mutex>, } impl Netflow9 { - pub(crate) fn new(target: String) -> anyhow::Result { + pub(crate) fn new(target: String) -> anyhow::Result> { let socket = UdpSocket::bind("0.0.0.0:12212")?; - Ok(Self { socket, sequence: 0, target }) + let result = Arc::new(Self { + socket, + sequence: AtomicU32::new(0), + target, + send_queue: Mutex::new(Vec::new()), + }); + let thread_result = result.clone(); + std::thread::spawn(move || thread_result.queue_handler()); + Ok(result) + } + + fn queue_handler(&self) { + loop { + let mut lock = self.send_queue.lock().unwrap(); + if lock.is_empty() { + std::thread::sleep(std::time::Duration::from_millis(100)); + continue; + } + + let send_chunks = lock.chunks(14); + for to_send in send_chunks { + let num_records = (to_send.len() * 2) as u16 + 2; // +2 to include templates + let sequence = self.sequence.load(std::sync::atomic::Ordering::Relaxed); + let header = Netflow9Header::new(sequence, num_records); + let header_bytes = unsafe { std::slice::from_raw_parts(&header as *const _ as *const u8, std::mem::size_of::()) }; + let template1 = template_data_ipv4(); + let template2 = template_data_ipv6(); + let mut buffer = Vec::with_capacity(header_bytes.len() + template1.len() + template2.len() + (num_records as usize) * 140); + buffer.extend_from_slice(header_bytes); + buffer.extend_from_slice(&template1); + buffer.extend_from_slice(&template2); + + for (key, data) in to_send { + if let Ok((packet1, packet2)) = to_netflow_9(key, data) { + buffer.extend_from_slice(&packet1); + buffer.extend_from_slice(&packet2); + } + } + self.socket.send_to(&buffer, &self.target).unwrap(); + self.sequence.fetch_add(num_records as u32, std::sync::atomic::Ordering::Relaxed); + } + lock.clear(); + } + } } impl FlowbeeRecipient for Netflow9 { - fn send(&mut self, key: FlowbeeKey, data: FlowbeeData) { - if let Ok((packet1, packet2)) = to_netflow_9(&key, &data) { - let header = Netflow9Header::new(self.sequence, 4); - let header_bytes = unsafe { std::slice::from_raw_parts(&header as *const _ as *const u8, std::mem::size_of::()) }; - let mut buffer = Vec::with_capacity(header_bytes.len() + packet1.len() + packet2.len()); - buffer.extend_from_slice(header_bytes); - buffer.extend_from_slice(&template_data_ipv4()); - buffer.extend_from_slice(&template_data_ipv6()); - buffer.extend_from_slice(&packet1); - buffer.extend_from_slice(&packet2); - - log::debug!("Sending netflow9 packet of size {} to {}", buffer.len(), self.target); - self.socket.send_to(&buffer, &self.target).unwrap(); - - self.sequence = self.sequence.wrapping_add(2); - } + fn enqueue(&self, key: FlowbeeKey, data: FlowbeeData) { + let mut lock = self.send_queue.lock().unwrap(); + lock.push((key, data)); } -} \ No newline at end of file +} From 9c7a9849ada7ff5c3300649738fc05f36ed19f38 Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Fri, 8 Mar 2024 09:32:15 -0600 Subject: [PATCH 040/103] Replace mutex locked vector with a dashmap for flow data, to ease sharing and update rather than replace cycle. --- .../flow_data/flow_tracker.rs | 6 +- src/rust/lqosd/src/throughput_tracker/mod.rs | 50 +++++----- .../src/throughput_tracker/tracking_data.rs | 97 ++++++++++--------- 3 files changed, 80 insertions(+), 73 deletions(-) diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/flow_tracker.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_tracker.rs index 71afc16e..c81cc912 100644 --- a/src/rust/lqosd/src/throughput_tracker/flow_data/flow_tracker.rs +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_tracker.rs @@ -1,9 +1,9 @@ //! Provides a globally accessible vector of all flows. This is used to store //! all flows for the purpose of tracking and data-services. -use std::sync::Mutex; +use dashmap::DashMap; use lqos_sys::flowbee_data::{FlowbeeData, FlowbeeKey}; use once_cell::sync::Lazy; -pub static ALL_FLOWS: Lazy>> = - Lazy::new(|| Mutex::new(Vec::with_capacity(128_000))); \ No newline at end of file +pub static ALL_FLOWS: Lazy> = Lazy::new(|| DashMap::new()); + diff --git a/src/rust/lqosd/src/throughput_tracker/mod.rs b/src/rust/lqosd/src/throughput_tracker/mod.rs index b6164d81..995bd7b9 100644 --- a/src/rust/lqosd/src/throughput_tracker/mod.rs +++ b/src/rust/lqosd/src/throughput_tracker/mod.rs @@ -485,41 +485,41 @@ pub fn all_unknown_ips() -> BusResponse { /// For debugging: dump all active flows! pub fn dump_active_flows() -> BusResponse { - let lock = ALL_FLOWS.lock().unwrap(); - let mut result = Vec::with_capacity(lock.len()); - - for (ip, flow) in lock.iter() { - result.push(lqos_bus::FlowbeeData { - remote_ip: ip.remote_ip.as_ip().to_string(), - local_ip: ip.local_ip.as_ip().to_string(), - src_port: ip.src_port, - dst_port: ip.dst_port, - ip_protocol: FlowbeeProtocol::from(ip.ip_protocol), - bytes_sent: flow.bytes_sent, - packets_sent: flow.packets_sent, - rate_estimate_bps: flow.rate_estimate_bps, - retries: flow.retries, - last_rtt: flow.last_rtt, - end_status: flow.end_status, - tos: flow.tos, - flags: flow.flags, - }); - } + let result: Vec = ALL_FLOWS.iter().map(|row| { + lqos_bus::FlowbeeData { + remote_ip: row.key().remote_ip.as_ip().to_string(), + local_ip: row.key().local_ip.as_ip().to_string(), + src_port: row.key().src_port, + dst_port: row.key().dst_port, + ip_protocol: FlowbeeProtocol::from(row.key().ip_protocol), + bytes_sent: row.value().bytes_sent, + packets_sent: row.value().packets_sent, + rate_estimate_bps: row.value().rate_estimate_bps, + retries: row.value().retries, + last_rtt: row.value().last_rtt, + end_status: row.value().end_status, + tos: row.value().tos, + flags: row.value().flags, + } + }).collect(); BusResponse::AllActiveFlows(result) } /// Count active flows pub fn count_active_flows() -> BusResponse { - let lock = ALL_FLOWS.lock().unwrap(); - BusResponse::CountActiveFlows(lock.len() as u64) + BusResponse::CountActiveFlows(ALL_FLOWS.len() as u64) } /// Top Flows Report pub fn top_flows(n: u32, flow_type: TopFlowType) -> BusResponse { - let lock = ALL_FLOWS.lock().unwrap(); - let mut table = lock.clone(); - + let mut table: Vec<(FlowbeeKey, FlowbeeData)> = ALL_FLOWS + .iter() + .map(|row| ( + row.key().clone(), + row.value().clone(), + )) + .collect(); match flow_type { TopFlowType::RateEstimate => { diff --git a/src/rust/lqosd/src/throughput_tracker/tracking_data.rs b/src/rust/lqosd/src/throughput_tracker/tracking_data.rs index dddcea01..9a5c973c 100644 --- a/src/rust/lqosd/src/throughput_tracker/tracking_data.rs +++ b/src/rust/lqosd/src/throughput_tracker/tracking_data.rs @@ -183,60 +183,67 @@ impl ThroughputTracker { // Track the expired keys let mut expired_keys = Vec::new(); - if let Ok(mut flow_lock) = ALL_FLOWS.try_lock() { - flow_lock.clear(); // Remove all previous values - // Track through all the flows - iterate_flows(&mut |key, data| { + // Track through all the flows + iterate_flows(&mut |key, data| { - if data.end_status == 2 { - // The flow has been handled already and should be ignored - return; + if data.end_status == 2 { + // The flow has been handled already and should be ignored + return; + } + + if data.last_seen < expire { + // This flow has expired. Add it to the list to be cleaned + expired_keys.push(key.clone()); + + // Send it off to netperf for analysis if we are supporting doing so. + if netflow_enabled { + let _ = sender.send((key.clone(), data.clone())); } + } else { + // We have a valid flow, so it needs to be tracked + let mut this_flow = ALL_FLOWS.entry(key.clone()).or_insert(data.clone()); + this_flow.last_seen = data.last_seen; + this_flow.bytes_sent = data.bytes_sent; + this_flow.packets_sent = data.packets_sent; + this_flow.rate_estimate_bps = data.rate_estimate_bps; + this_flow.retries = data.retries; + this_flow.last_rtt = data.last_rtt; + this_flow.end_status = data.end_status; + this_flow.tos = data.tos; + this_flow.flags = data.flags; - if data.last_seen < expire { - // This flow has expired. Add it to the list to be cleaned - expired_keys.push(key.clone()); - - // Send it off to netperf for analysis if we are supporting doing so. - if netflow_enabled { - let _ = sender.send((key.clone(), data.clone())); - } - } else { - // We have a valid flow, so it needs to be tracked - flow_lock.push((key.clone(), data.clone())); - - // TCP - we have RTT data? 6 is TCP - if key.ip_protocol == 6 && (data.last_rtt[0] != 0 || data.last_rtt[1] != 0) { - if let Some(mut tracker) = self.raw_data.get_mut(&key.local_ip) { - // Shift left - for i in 1..60 { - tracker.recent_rtt_data[i] = tracker.recent_rtt_data[i - 1]; - } - tracker.recent_rtt_data[0] = u32::max( - (data.last_rtt[0] / 10000) as u32, - (data.last_rtt[1] / 10000) as u32, - ); - tracker.last_fresh_rtt_data_cycle = self_cycle; - if let Some(parents) = &tracker.network_json_parents { - let net_json = NETWORK_JSON.write().unwrap(); - if let Some(rtt) = tracker.median_latency() { - net_json.add_rtt_cycle(parents, rtt); - } + // TCP - we have RTT data? 6 is TCP + if key.ip_protocol == 6 && (data.last_rtt[0] != 0 || data.last_rtt[1] != 0) { + if let Some(mut tracker) = self.raw_data.get_mut(&key.local_ip) { + // Shift left + for i in 1..60 { + tracker.recent_rtt_data[i] = tracker.recent_rtt_data[i - 1]; + } + tracker.recent_rtt_data[0] = u32::max( + (data.last_rtt[0] / 10000) as u32, + (data.last_rtt[1] / 10000) as u32, + ); + tracker.last_fresh_rtt_data_cycle = self_cycle; + if let Some(parents) = &tracker.network_json_parents { + let net_json = NETWORK_JSON.write().unwrap(); + if let Some(rtt) = tracker.median_latency() { + net_json.add_rtt_cycle(parents, rtt); } } } } - }); // End flow iterator - - if !expired_keys.is_empty() { - let ret = lqos_sys::end_flows(&mut expired_keys); - if let Err(e) = ret { - log::warn!("Failed to end flows: {:?}", e); - } } - } else { - log::warn!("Failed to lock ALL_FLOWS"); + }); // End flow iterator + + if !expired_keys.is_empty() { + let ret = lqos_sys::end_flows(&mut expired_keys); + if let Err(e) = ret { + log::warn!("Failed to end flows: {:?}", e); + } + for key in expired_keys { + ALL_FLOWS.remove(&key); + } } } } From b1cd8e5ed4507245d84bc927297f4028a590154d Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Fri, 8 Mar 2024 12:11:37 -0600 Subject: [PATCH 041/103] Setup initial stage of ASN analysis - downloading and loading the table, periodic upgrades. --- src/rust/Cargo.lock | 3 + src/rust/lqosd/Cargo.toml | 3 + src/rust/lqosd/src/main.rs | 1 + .../flow_data/flow_analysis/asn.rs | 101 ++++++++++++++++++ .../flow_data/flow_analysis/mod.rs | 43 ++++++++ .../src/throughput_tracker/flow_data/mod.rs | 2 + .../src/throughput_tracker/tracking_data.rs | 25 +++-- 7 files changed, 168 insertions(+), 10 deletions(-) create mode 100644 src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/asn.rs create mode 100644 src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/mod.rs diff --git a/src/rust/Cargo.lock b/src/rust/Cargo.lock index 07b302af..3cd9aebe 100644 --- a/src/rust/Cargo.lock +++ b/src/rust/Cargo.lock @@ -1738,8 +1738,10 @@ name = "lqosd" version = "0.1.0" dependencies = [ "anyhow", + "csv", "dashmap", "env_logger", + "flate2", "itertools 0.12.1", "jemallocator", "log", @@ -1753,6 +1755,7 @@ dependencies = [ "nix", "num-traits", "once_cell", + "reqwest", "serde", "serde_json", "signal-hook", diff --git a/src/rust/lqosd/Cargo.toml b/src/rust/lqosd/Cargo.toml index 10899c39..ae1cd6df 100644 --- a/src/rust/lqosd/Cargo.toml +++ b/src/rust/lqosd/Cargo.toml @@ -30,6 +30,9 @@ dashmap = "5" num-traits = "0.2" thiserror = "1" itertools = "0.12.1" +csv = "1" +reqwest = { version = "0.11.24", features = ["blocking"] } +flate2 = "1.0" # Support JemAlloc on supported platforms [target.'cfg(any(target_arch = "x86", target_arch = "x86_64"))'.dependencies] diff --git a/src/rust/lqosd/src/main.rs b/src/rust/lqosd/src/main.rs index 200b7bbf..48f11c24 100644 --- a/src/rust/lqosd/src/main.rs +++ b/src/rust/lqosd/src/main.rs @@ -74,6 +74,7 @@ async fn main() -> Result<()> { // Spawn tracking sub-systems let long_term_stats_tx = start_long_term_stats().await; let flow_tx = setup_netflow_tracker(); + let _ = throughput_tracker::flow_data::setup_flow_analysis(); join!( start_heimdall(), spawn_queue_structure_monitor(), diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/asn.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/asn.rs new file mode 100644 index 00000000..ebaa503f --- /dev/null +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/asn.rs @@ -0,0 +1,101 @@ +use std::{io::Read, net::IpAddr, path::Path}; +use serde::Deserialize; + +/// Structure to represent the on-disk structure for files +/// from: https://iptoasn.com/ +/// Specifically: https://iptoasn.com/data/ip2asn-combined.tsv.gz +#[derive(Deserialize, Debug, Clone)] +pub struct Ip2AsnRow { + pub start_ip: IpAddr, + pub end_ip: IpAddr, + pub asn: u32, + pub country: String, + pub owners: String, +} + +pub struct AsnTable { + asn_table: Vec, +} + +impl AsnTable { + pub fn new() -> anyhow::Result { + if !Self::exists() { + Self::download()?; + } + let asn_table = Self::build_asn_table()?; + log::info!("Setup ASN Table with {} entries.", asn_table.len()); + Ok(Self { + asn_table, + }) + } + + fn file_path() -> std::path::PathBuf { + Path::new(&lqos_config::load_config().unwrap().lqos_directory) + .join("ip2asn-combined.tsv") + } + + fn download() -> anyhow::Result<()> { + log::info!("Downloading ASN-IP Table"); + let file_path = Self::file_path(); + let url = "https://iptoasn.com/data/ip2asn-combined.tsv.gz"; + let response = reqwest::blocking::get(url)?; + let content = response.bytes()?; + let bytes = &content[0..]; + let mut decompresser = flate2::read::GzDecoder::new(bytes); + let mut buf = Vec::new(); + decompresser.read_to_end(&mut buf)?; + std::fs::write(file_path, buf)?; + Ok(()) + } + + fn exists() -> bool { + Self::file_path().exists() + } + + fn build_asn_table() -> anyhow::Result> { + let file_path = Self::file_path(); + + let mut retries = 0; + while retries < 3 { + if file_path.exists() { + break; + } + Self::download()?; + retries += 1; + } + + if !file_path.exists() { + anyhow::bail!("IP to ASN file not found: {:?}", file_path); + } + let in_file = std::fs::File::open(file_path)?; + + let mut rdr = csv::ReaderBuilder::new() + .has_headers(false) + .delimiter(b'\t') + .double_quote(false) + .escape(Some(b'\\')) + .flexible(true) + .comment(Some(b'#')) + .from_reader(in_file); + + let mut output = Vec::new(); + for result in rdr.deserialize() { + let record: Ip2AsnRow = result?; + output.push(record); + } + output.sort_by(|a, b| a.start_ip.cmp(&b.start_ip)); + Ok(output) + } + + pub fn find_asn(&self, ip: IpAddr) -> Option { + self.asn_table.binary_search_by(|probe| { + if ip < probe.start_ip { + std::cmp::Ordering::Greater + } else if ip > probe.end_ip { + std::cmp::Ordering::Less + } else { + std::cmp::Ordering::Equal + } + }).map(|idx| self.asn_table[idx].clone()).ok() + } +} diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/mod.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/mod.rs new file mode 100644 index 00000000..fa4eaf67 --- /dev/null +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/mod.rs @@ -0,0 +1,43 @@ +use std::sync::Mutex; +use once_cell::sync::Lazy; + +use self::asn::AsnTable; +mod asn; + +static ANALYSIS: Lazy = Lazy::new(|| FlowAnalysis::new()); + +pub struct FlowAnalysis { + asn_table: Mutex>, +} + +impl FlowAnalysis { + pub fn new() -> Self { + // Periodically update the ASN table + std::thread::spawn(|| { + loop { + let result = AsnTable::new(); + match result { + Ok(table) => { + ANALYSIS.asn_table.lock().unwrap().replace(table); + } + Err(e) => { + log::error!("Failed to update ASN table: {e}"); + } + } + std::thread::sleep(std::time::Duration::from_secs(60 * 60 * 24)); + } + }); + + Self { + asn_table: Mutex::new(None), + } + } +} + +pub fn setup_flow_analysis() -> anyhow::Result<()> { + let e = ANALYSIS.asn_table.lock(); + if e.is_err() { + anyhow::bail!("Failed to lock ASN table"); + } + Ok(()) +} diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/mod.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/mod.rs index 133b05f5..7e8ce22d 100644 --- a/src/rust/lqosd/src/throughput_tracker/flow_data/mod.rs +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/mod.rs @@ -4,6 +4,7 @@ mod flow_tracker; mod netflow5; mod netflow9; +mod flow_analysis; use crate::throughput_tracker::flow_data::{netflow5::Netflow5, netflow9::Netflow9}; pub(crate) use flow_tracker::ALL_FLOWS; @@ -12,6 +13,7 @@ use std::sync::{ mpsc::{channel, Sender}, Arc, }; +pub(crate) use flow_analysis::setup_flow_analysis; trait FlowbeeRecipient { fn enqueue(&self, key: FlowbeeKey, data: FlowbeeData); diff --git a/src/rust/lqosd/src/throughput_tracker/tracking_data.rs b/src/rust/lqosd/src/throughput_tracker/tracking_data.rs index 9a5c973c..bf5818fe 100644 --- a/src/rust/lqosd/src/throughput_tracker/tracking_data.rs +++ b/src/rust/lqosd/src/throughput_tracker/tracking_data.rs @@ -202,16 +202,21 @@ impl ThroughputTracker { } } else { // We have a valid flow, so it needs to be tracked - let mut this_flow = ALL_FLOWS.entry(key.clone()).or_insert(data.clone()); - this_flow.last_seen = data.last_seen; - this_flow.bytes_sent = data.bytes_sent; - this_flow.packets_sent = data.packets_sent; - this_flow.rate_estimate_bps = data.rate_estimate_bps; - this_flow.retries = data.retries; - this_flow.last_rtt = data.last_rtt; - this_flow.end_status = data.end_status; - this_flow.tos = data.tos; - this_flow.flags = data.flags; + if let Some(mut this_flow) = ALL_FLOWS.get_mut(&key) { + this_flow.last_seen = data.last_seen; + this_flow.bytes_sent = data.bytes_sent; + this_flow.packets_sent = data.packets_sent; + this_flow.rate_estimate_bps = data.rate_estimate_bps; + this_flow.retries = data.retries; + this_flow.last_rtt = data.last_rtt; + this_flow.end_status = data.end_status; + this_flow.tos = data.tos; + this_flow.flags = data.flags; + } else { + // Insert it into the map + ALL_FLOWS.insert(key.clone(), data.clone()); + // TODO: Submit it for analysis + } // TCP - we have RTT data? 6 is TCP if key.ip_protocol == 6 && (data.last_rtt[0] != 0 || data.last_rtt[1] != 0) { From 6b384b2a5f6080ce0b728aa3ec8cc287f80a49ca Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Fri, 8 Mar 2024 13:15:49 -0600 Subject: [PATCH 042/103] Include ASN assessment in flows data. --- src/rust/lqos_bus/src/ip_stats.rs | 6 ++ .../flow_data/flow_analysis/asn.rs | 4 ++ .../flow_data/flow_analysis/mod.rs | 36 +++++++++- .../flow_data/flow_tracker.rs | 5 +- .../src/throughput_tracker/flow_data/mod.rs | 5 +- src/rust/lqosd/src/throughput_tracker/mod.rs | 65 +++++++++++-------- .../src/throughput_tracker/tracking_data.rs | 24 +++---- 7 files changed, 102 insertions(+), 43 deletions(-) diff --git a/src/rust/lqos_bus/src/ip_stats.rs b/src/rust/lqos_bus/src/ip_stats.rs index 0f2a89c2..9c0e8b46 100644 --- a/src/rust/lqos_bus/src/ip_stats.rs +++ b/src/rust/lqos_bus/src/ip_stats.rs @@ -196,4 +196,10 @@ pub struct FlowbeeData { pub tos: u8, /// Raw TCP flags pub flags: u8, + /// Remote ASN + pub remote_asn: u32, + /// Remote ASN Name + pub remote_asn_name: String, + /// Remote ASN Country + pub remote_asn_country: String, } \ No newline at end of file diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/asn.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/asn.rs index ebaa503f..82b89d9e 100644 --- a/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/asn.rs +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/asn.rs @@ -98,4 +98,8 @@ impl AsnTable { } }).map(|idx| self.asn_table[idx].clone()).ok() } + + pub fn find_asn_by_id(&self, asn: u32) -> Option { + self.asn_table.iter().find(|row| row.asn == asn).map(|row| row.clone()) + } } diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/mod.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/mod.rs index fa4eaf67..0497b5be 100644 --- a/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/mod.rs +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/mod.rs @@ -1,4 +1,4 @@ -use std::sync::Mutex; +use std::{net::IpAddr, sync::Mutex}; use once_cell::sync::Lazy; use self::asn::AsnTable; @@ -41,3 +41,37 @@ pub fn setup_flow_analysis() -> anyhow::Result<()> { } Ok(()) } + +pub fn lookup_asn_id(ip: IpAddr) -> Option { + let table_lock = ANALYSIS.asn_table.lock(); + if table_lock.is_err() { + return None; + } + let table = table_lock.unwrap(); + if table.is_none() { + return None; + } + let table = table.as_ref().unwrap(); + if let Some(asn) = table.find_asn(ip) { + Some(asn.asn) + } else { + None + } +} + +pub fn get_asn_name_and_country(asn: u32) -> (String, String) { + let table_lock = ANALYSIS.asn_table.lock(); + if table_lock.is_err() { + return ("".to_string(), "".to_string()); + } + let table = table_lock.unwrap(); + if table.is_none() { + return ("".to_string(), "".to_string()); + } + let table = table.as_ref().unwrap(); + if let Some(row) = table.find_asn_by_id(asn) { + (row.owners.clone(), row.country.clone()) + } else { + ("".to_string(), "".to_string()) + } +} \ No newline at end of file diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/flow_tracker.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_tracker.rs index c81cc912..b3dfaafd 100644 --- a/src/rust/lqosd/src/throughput_tracker/flow_data/flow_tracker.rs +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_tracker.rs @@ -5,5 +5,8 @@ use dashmap::DashMap; use lqos_sys::flowbee_data::{FlowbeeData, FlowbeeKey}; use once_cell::sync::Lazy; -pub static ALL_FLOWS: Lazy> = Lazy::new(|| DashMap::new()); +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct AsnId(pub u32); + +pub static ALL_FLOWS: Lazy> = Lazy::new(|| DashMap::new()); diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/mod.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/mod.rs index 7e8ce22d..4b7df971 100644 --- a/src/rust/lqosd/src/throughput_tracker/flow_data/mod.rs +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/mod.rs @@ -7,13 +7,14 @@ mod netflow9; mod flow_analysis; use crate::throughput_tracker::flow_data::{netflow5::Netflow5, netflow9::Netflow9}; -pub(crate) use flow_tracker::ALL_FLOWS; +pub(crate) use flow_tracker::{ALL_FLOWS, AsnId}; use lqos_sys::flowbee_data::{FlowbeeData, FlowbeeKey}; use std::sync::{ mpsc::{channel, Sender}, Arc, }; -pub(crate) use flow_analysis::setup_flow_analysis; +pub(crate) use flow_analysis::{setup_flow_analysis, lookup_asn_id, get_asn_name_and_country}; + trait FlowbeeRecipient { fn enqueue(&self, key: FlowbeeKey, data: FlowbeeData); diff --git a/src/rust/lqosd/src/throughput_tracker/mod.rs b/src/rust/lqosd/src/throughput_tracker/mod.rs index 995bd7b9..f331e4c0 100644 --- a/src/rust/lqosd/src/throughput_tracker/mod.rs +++ b/src/rust/lqosd/src/throughput_tracker/mod.rs @@ -18,7 +18,7 @@ use tokio::{ time::{Duration, Instant}, }; -use self::flow_data::ALL_FLOWS; +use self::flow_data::{get_asn_name_and_country, AsnId, ALL_FLOWS}; const RETIRE_AFTER_SECONDS: u64 = 30; @@ -486,20 +486,25 @@ pub fn all_unknown_ips() -> BusResponse { /// For debugging: dump all active flows! pub fn dump_active_flows() -> BusResponse { let result: Vec = ALL_FLOWS.iter().map(|row| { + let (remote_asn_name, remote_asn_country) = get_asn_name_and_country(row.value().1.0); + lqos_bus::FlowbeeData { remote_ip: row.key().remote_ip.as_ip().to_string(), local_ip: row.key().local_ip.as_ip().to_string(), src_port: row.key().src_port, dst_port: row.key().dst_port, ip_protocol: FlowbeeProtocol::from(row.key().ip_protocol), - bytes_sent: row.value().bytes_sent, - packets_sent: row.value().packets_sent, - rate_estimate_bps: row.value().rate_estimate_bps, - retries: row.value().retries, - last_rtt: row.value().last_rtt, - end_status: row.value().end_status, - tos: row.value().tos, - flags: row.value().flags, + bytes_sent: row.value().0.bytes_sent, + packets_sent: row.value().0.packets_sent, + rate_estimate_bps: row.value().0.rate_estimate_bps, + retries: row.value().0.retries, + last_rtt: row.value().0.last_rtt, + end_status: row.value().0.end_status, + tos: row.value().0.tos, + flags: row.value().0.flags, + remote_asn: row.value().1.0, + remote_asn_name, + remote_asn_country, } }).collect(); @@ -513,7 +518,7 @@ pub fn all_unknown_ips() -> BusResponse { /// Top Flows Report pub fn top_flows(n: u32, flow_type: TopFlowType) -> BusResponse { - let mut table: Vec<(FlowbeeKey, FlowbeeData)> = ALL_FLOWS + let mut table: Vec<(FlowbeeKey, (FlowbeeData, AsnId))> = ALL_FLOWS .iter() .map(|row| ( row.key().clone(), @@ -524,36 +529,36 @@ pub fn all_unknown_ips() -> BusResponse { match flow_type { TopFlowType::RateEstimate => { table.sort_by(|a, b| { - let a_total = a.1.rate_estimate_bps[0] + a.1.rate_estimate_bps[1]; - let b_total = b.1.rate_estimate_bps[0] + b.1.rate_estimate_bps[1]; + let a_total = a.1.0.rate_estimate_bps[0] + a.1.0.rate_estimate_bps[1]; + let b_total = b.1.0.rate_estimate_bps[0] + b.1.0.rate_estimate_bps[1]; b_total.cmp(&a_total) }); } TopFlowType::Bytes => { table.sort_by(|a, b| { - let a_total = a.1.bytes_sent[0] + a.1.bytes_sent[1]; - let b_total = b.1.bytes_sent[0] + b.1.bytes_sent[1]; + let a_total = a.1.0.bytes_sent[0] + a.1.0.bytes_sent[1]; + let b_total = b.1.0.bytes_sent[0] + b.1.0.bytes_sent[1]; b_total.cmp(&a_total) }); } TopFlowType::Packets => { table.sort_by(|a, b| { - let a_total = a.1.packets_sent[0] + a.1.packets_sent[1]; - let b_total = b.1.packets_sent[0] + b.1.packets_sent[1]; + let a_total = a.1.0.packets_sent[0] + a.1.0.packets_sent[1]; + let b_total = b.1.0.packets_sent[0] + b.1.0.packets_sent[1]; b_total.cmp(&a_total) }); } TopFlowType::Drops => { table.sort_by(|a, b| { - let a_total = a.1.retries[0] + a.1.retries[1]; - let b_total = b.1.retries[0] + b.1.retries[1]; + let a_total = a.1.0.retries[0] + a.1.0.retries[1]; + let b_total = b.1.0.retries[0] + b.1.0.retries[1]; b_total.cmp(&a_total) }); } TopFlowType::RoundTripTime => { table.sort_by(|a, b| { - let a_total = a.1.last_rtt[0] + a.1.last_rtt[1]; - let b_total = b.1.last_rtt[0] + b.1.last_rtt[1]; + let a_total = a.1.0.last_rtt[0] + a.1.0.last_rtt[1]; + let b_total = b.1.0.last_rtt[0] + b.1.0.last_rtt[1]; b_total.cmp(&a_total) }); } @@ -563,20 +568,24 @@ pub fn all_unknown_ips() -> BusResponse { .iter() .take(n as usize) .map(|(ip, flow)| { + let (remote_asn_name, remote_asn_country) = get_asn_name_and_country(flow.1.0); lqos_bus::FlowbeeData { remote_ip: ip.remote_ip.as_ip().to_string(), local_ip: ip.local_ip.as_ip().to_string(), src_port: ip.src_port, dst_port: ip.dst_port, ip_protocol: FlowbeeProtocol::from(ip.ip_protocol), - bytes_sent: flow.bytes_sent, - packets_sent: flow.packets_sent, - rate_estimate_bps: flow.rate_estimate_bps, - retries: flow.retries, - last_rtt: flow.last_rtt, - end_status: flow.end_status, - tos: flow.tos, - flags: flow.flags, + bytes_sent: flow.0.bytes_sent, + packets_sent: flow.0.packets_sent, + rate_estimate_bps: flow.0.rate_estimate_bps, + retries: flow.0.retries, + last_rtt: flow.0.last_rtt, + end_status: flow.0.end_status, + tos: flow.0.tos, + flags: flow.0.flags, + remote_asn: flow.1.0, + remote_asn_name, + remote_asn_country, } }) .collect(); diff --git a/src/rust/lqosd/src/throughput_tracker/tracking_data.rs b/src/rust/lqosd/src/throughput_tracker/tracking_data.rs index bf5818fe..7f0ab16a 100644 --- a/src/rust/lqosd/src/throughput_tracker/tracking_data.rs +++ b/src/rust/lqosd/src/throughput_tracker/tracking_data.rs @@ -1,6 +1,6 @@ use std::{sync::atomic::AtomicU64, time::Duration}; use crate::{shaped_devices_tracker::{SHAPED_DEVICES, NETWORK_JSON}, stats::{HIGH_WATERMARK_DOWN, HIGH_WATERMARK_UP}}; -use super::{flow_data::ALL_FLOWS, throughput_entry::ThroughputEntry, RETIRE_AFTER_SECONDS}; +use super::{flow_data::{lookup_asn_id, AsnId, ALL_FLOWS}, throughput_entry::ThroughputEntry, RETIRE_AFTER_SECONDS}; use dashmap::DashMap; use lqos_bus::TcHandle; use lqos_sys::{flowbee_data::{FlowbeeData, FlowbeeKey}, iterate_flows, throughput_for_each}; @@ -203,18 +203,20 @@ impl ThroughputTracker { } else { // We have a valid flow, so it needs to be tracked if let Some(mut this_flow) = ALL_FLOWS.get_mut(&key) { - this_flow.last_seen = data.last_seen; - this_flow.bytes_sent = data.bytes_sent; - this_flow.packets_sent = data.packets_sent; - this_flow.rate_estimate_bps = data.rate_estimate_bps; - this_flow.retries = data.retries; - this_flow.last_rtt = data.last_rtt; - this_flow.end_status = data.end_status; - this_flow.tos = data.tos; - this_flow.flags = data.flags; + this_flow.0.last_seen = data.last_seen; + this_flow.0.bytes_sent = data.bytes_sent; + this_flow.0.packets_sent = data.packets_sent; + this_flow.0.rate_estimate_bps = data.rate_estimate_bps; + this_flow.0.retries = data.retries; + this_flow.0.last_rtt = data.last_rtt; + this_flow.0.end_status = data.end_status; + this_flow.0.tos = data.tos; + this_flow.0.flags = data.flags; } else { // Insert it into the map - ALL_FLOWS.insert(key.clone(), data.clone()); + let asn = lookup_asn_id(key.remote_ip.as_ip()).unwrap_or(0); + + ALL_FLOWS.insert(key.clone(), (data.clone(), AsnId(asn))); // TODO: Submit it for analysis } From 5202f447dceeef3363859eb15164dbda4582fabe Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Fri, 8 Mar 2024 14:03:29 -0600 Subject: [PATCH 043/103] Begin adding a little UI --- src/rust/lqos_node_manager/static/main.html | 56 ++++++++++++++++++++- 1 file changed, 54 insertions(+), 2 deletions(-) diff --git a/src/rust/lqos_node_manager/static/main.html b/src/rust/lqos_node_manager/static/main.html index e1f7b1d3..fd7e098b 100644 --- a/src/rust/lqos_node_manager/static/main.html +++ b/src/rust/lqos_node_manager/static/main.html @@ -143,8 +143,13 @@
-
Top 10 Downloaders
-
+
+ Top 10 Downloaders + + +
+
+
@@ -306,6 +311,52 @@ }); } + function updateTop10Flows() { + $.get("/api/flows/top/10/rate", data => { + let html = "
Packets/Second
"; + html += ""; + html += ""; + html += ""; + html += ""; + html += ""; + html += ""; + html += ""; + html += ""; + html += ""; + html += ""; + html += ""; + html += ""; + for (var i = 0; i"; + html += ""; + html += ""; + html += ""; + // TODO: Check scaling + html += ""; + html += ""; + html += ""; + html += ""; + html += ""; + html += ""; + html += ""; + } + html += "
ProtocolLocal IPRemote IPPortsUL ⬆️DL ⬇️UL RTTDL RTTRemote ASNCountry
" + data[i].local_ip + "" + data[i].remote_ip + "" + data[i].src_port + " / " + data[i].dst_port + "" + scaleNumber(data[i].rate_estimate_bps[0] * 8) + "" + scaleNumber(data[i].rate_estimate_bps[1] * 8) + "" + (data[i].last_rtt[0] / 1000000).toFixed(2) + "" + (data[i].last_rtt[1] / 1000000).toFixed(2) + "" + data[i].remote_asn_name + "" + data[i].remote_asn_country + "
"; + $("#top10flows").html(html); + }); + } + + function showCircuits() { + $("#top10dl").show(); + $("#top10flows").hide(); + } + + function showFlows() { + $("#top10dl").hide(); + $("#top10flows").show(); + } + var rttGraph = new RttHistogram(); function updateHistogram() { @@ -329,6 +380,7 @@ updateHistogram(); updateWorst10(); updateTop10(); + updateTop10Flows(); } if (tickCount % 10 == 0) { From 1fb151aa1b7be16e66348a046b5295dfb4a22e87 Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Fri, 8 Mar 2024 14:52:02 -0600 Subject: [PATCH 044/103] Minimal protocol analysis beginnings. --- src/rust/lqos_bus/src/ip_stats.rs | 2 + src/rust/lqos_node_manager/static/main.html | 4 +- .../flow_data/flow_analysis/mod.rs | 30 +++++- .../flow_data/flow_analysis/protocol.rs | 93 +++++++++++++++++++ .../flow_data/flow_tracker.rs | 3 +- .../src/throughput_tracker/flow_data/mod.rs | 2 +- src/rust/lqosd/src/throughput_tracker/mod.rs | 14 +-- .../src/throughput_tracker/tracking_data.rs | 6 +- 8 files changed, 136 insertions(+), 18 deletions(-) create mode 100644 src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/protocol.rs diff --git a/src/rust/lqos_bus/src/ip_stats.rs b/src/rust/lqos_bus/src/ip_stats.rs index 9c0e8b46..63730b42 100644 --- a/src/rust/lqos_bus/src/ip_stats.rs +++ b/src/rust/lqos_bus/src/ip_stats.rs @@ -202,4 +202,6 @@ pub struct FlowbeeData { pub remote_asn_name: String, /// Remote ASN Country pub remote_asn_country: String, + /// Analysis + pub analysis: String, } \ No newline at end of file diff --git a/src/rust/lqos_node_manager/static/main.html b/src/rust/lqos_node_manager/static/main.html index fd7e098b..b90cdc45 100644 --- a/src/rust/lqos_node_manager/static/main.html +++ b/src/rust/lqos_node_manager/static/main.html @@ -318,7 +318,6 @@ html += "Protocol"; html += "Local IP"; html += "Remote IP"; - html += "Ports"; html += "UL ⬆️"; html += "DL ⬇️"; html += "UL RTT"; @@ -329,10 +328,9 @@ for (var i = 0; i"; + html += "" + data[i].analysis + ""; html += "" + data[i].local_ip + ""; html += "" + data[i].remote_ip + ""; - html += "" + data[i].src_port + " / " + data[i].dst_port + ""; // TODO: Check scaling html += "" + scaleNumber(data[i].rate_estimate_bps[0] * 8) + ""; html += "" + scaleNumber(data[i].rate_estimate_bps[1] * 8) + ""; diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/mod.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/mod.rs index 0497b5be..66625bd8 100644 --- a/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/mod.rs +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/mod.rs @@ -1,16 +1,20 @@ use std::{net::IpAddr, sync::Mutex}; +use lqos_sys::flowbee_data::FlowbeeKey; use once_cell::sync::Lazy; - use self::asn::AsnTable; mod asn; +mod protocol; +pub use protocol::FlowProtocol; -static ANALYSIS: Lazy = Lazy::new(|| FlowAnalysis::new()); +use super::AsnId; -pub struct FlowAnalysis { +static ANALYSIS: Lazy = Lazy::new(|| FlowAnalysisSystem::new()); + +pub struct FlowAnalysisSystem { asn_table: Mutex>, } -impl FlowAnalysis { +impl FlowAnalysisSystem { pub fn new() -> Self { // Periodically update the ASN table std::thread::spawn(|| { @@ -42,6 +46,24 @@ pub fn setup_flow_analysis() -> anyhow::Result<()> { Ok(()) } +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct FlowAnalysis { + pub asn_id: AsnId, + pub protocol_analysis: FlowProtocol, +} + +impl FlowAnalysis { + pub fn new(key: &FlowbeeKey) -> Self { + let asn_id = lookup_asn_id(key.remote_ip.as_ip()); + let protocol_analysis = FlowProtocol::new(key); + Self { + asn_id: AsnId(asn_id.unwrap_or(0)), + protocol_analysis, + } + } +} + + pub fn lookup_asn_id(ip: IpAddr) -> Option { let table_lock = ANALYSIS.asn_table.lock(); if table_lock.is_err() { diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/protocol.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/protocol.rs new file mode 100644 index 00000000..4a07c8ab --- /dev/null +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/protocol.rs @@ -0,0 +1,93 @@ +use std::fmt::Display; + +use lqos_sys::flowbee_data::FlowbeeKey; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum FlowProtocol { + Smtp, + Ftp, + Http, + Https, + Ssh, + Telnet, + Imap, + Rdp, + Dns, + Pop3, + Quic, + Other { proto: u8, src_port: u16, dst_port: u16 } +} + +impl FlowProtocol { + pub fn new(key: &FlowbeeKey) -> Self { + match key.ip_protocol { + 6 => Self::tcp(key), + 17 => Self::udp(key), + _ => Self::Other { + proto: key.ip_protocol, + src_port: key.src_port, + dst_port: key.dst_port, + } + } + } + + fn tcp(key: &FlowbeeKey) -> Self { + match key.src_port { + 25 => Self::Smtp, + 80 => Self::Http, + 443 => Self::Https, + 21 | 20 => Self::Ftp, + 22 => Self::Ssh, + 23 => Self::Telnet, + 3389 => Self::Rdp, + 143 => Self::Imap, + 53 => Self::Dns, + 110 => Self::Pop3, + _ => Self::Other { + proto: key.ip_protocol, + src_port: key.src_port, + dst_port: key.dst_port, + } + } + } + + fn udp(key: &FlowbeeKey) -> Self { + match key.src_port { + 53 => Self::Dns, + 80 | 443 => Self::Quic, + _ => Self::Other { + proto: key.ip_protocol, + src_port: key.src_port, + dst_port: key.dst_port, + } + } + } +} + +impl Display for FlowProtocol { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Smtp => write!(f, "SMTP"), + Self::Ftp => write!(f, "FTP"), + Self::Http => write!(f, "HTTP"), + Self::Https => write!(f, "HTTPS"), + Self::Ssh => write!(f, "SSH"), + Self::Telnet => write!(f, "Telnet"), + Self::Imap => write!(f, "IMAP"), + Self::Rdp => write!(f, "RDP"), + Self::Dns => write!(f, "DNS"), + Self::Pop3 => write!(f, "POP3"), + Self::Quic => write!(f, "QUIC"), + Self::Other { proto, src_port, dst_port } => write!(f, "{} {}/{}", proto_name(proto), src_port, dst_port), + } + } +} + +fn proto_name(proto: &u8) -> &'static str { + match proto { + 6 => "TCP", + 17 => "UDP", + 1 => "ICMP", + _ => "Other", + } +} \ No newline at end of file diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/flow_tracker.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_tracker.rs index b3dfaafd..5b8a3778 100644 --- a/src/rust/lqosd/src/throughput_tracker/flow_data/flow_tracker.rs +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_tracker.rs @@ -4,9 +4,10 @@ use dashmap::DashMap; use lqos_sys::flowbee_data::{FlowbeeData, FlowbeeKey}; use once_cell::sync::Lazy; +use super::flow_analysis::FlowAnalysis; #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct AsnId(pub u32); -pub static ALL_FLOWS: Lazy> = Lazy::new(|| DashMap::new()); +pub static ALL_FLOWS: Lazy> = Lazy::new(|| DashMap::new()); diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/mod.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/mod.rs index 4b7df971..651df996 100644 --- a/src/rust/lqosd/src/throughput_tracker/flow_data/mod.rs +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/mod.rs @@ -13,7 +13,7 @@ use std::sync::{ mpsc::{channel, Sender}, Arc, }; -pub(crate) use flow_analysis::{setup_flow_analysis, lookup_asn_id, get_asn_name_and_country}; +pub(crate) use flow_analysis::{setup_flow_analysis, lookup_asn_id, get_asn_name_and_country, FlowAnalysis}; trait FlowbeeRecipient { diff --git a/src/rust/lqosd/src/throughput_tracker/mod.rs b/src/rust/lqosd/src/throughput_tracker/mod.rs index f331e4c0..237198c2 100644 --- a/src/rust/lqosd/src/throughput_tracker/mod.rs +++ b/src/rust/lqosd/src/throughput_tracker/mod.rs @@ -18,7 +18,7 @@ use tokio::{ time::{Duration, Instant}, }; -use self::flow_data::{get_asn_name_and_country, AsnId, ALL_FLOWS}; +use self::flow_data::{get_asn_name_and_country, AsnId, FlowAnalysis, ALL_FLOWS}; const RETIRE_AFTER_SECONDS: u64 = 30; @@ -486,7 +486,7 @@ pub fn all_unknown_ips() -> BusResponse { /// For debugging: dump all active flows! pub fn dump_active_flows() -> BusResponse { let result: Vec = ALL_FLOWS.iter().map(|row| { - let (remote_asn_name, remote_asn_country) = get_asn_name_and_country(row.value().1.0); + let (remote_asn_name, remote_asn_country) = get_asn_name_and_country(row.value().1.asn_id.0); lqos_bus::FlowbeeData { remote_ip: row.key().remote_ip.as_ip().to_string(), @@ -502,9 +502,10 @@ pub fn all_unknown_ips() -> BusResponse { end_status: row.value().0.end_status, tos: row.value().0.tos, flags: row.value().0.flags, - remote_asn: row.value().1.0, + remote_asn: row.value().1.asn_id.0, remote_asn_name, remote_asn_country, + analysis: row.value().1.protocol_analysis.to_string(), } }).collect(); @@ -518,7 +519,7 @@ pub fn all_unknown_ips() -> BusResponse { /// Top Flows Report pub fn top_flows(n: u32, flow_type: TopFlowType) -> BusResponse { - let mut table: Vec<(FlowbeeKey, (FlowbeeData, AsnId))> = ALL_FLOWS + let mut table: Vec<(FlowbeeKey, (FlowbeeData, FlowAnalysis))> = ALL_FLOWS .iter() .map(|row| ( row.key().clone(), @@ -568,7 +569,7 @@ pub fn all_unknown_ips() -> BusResponse { .iter() .take(n as usize) .map(|(ip, flow)| { - let (remote_asn_name, remote_asn_country) = get_asn_name_and_country(flow.1.0); + let (remote_asn_name, remote_asn_country) = get_asn_name_and_country(flow.1.asn_id.0); lqos_bus::FlowbeeData { remote_ip: ip.remote_ip.as_ip().to_string(), local_ip: ip.local_ip.as_ip().to_string(), @@ -583,9 +584,10 @@ pub fn all_unknown_ips() -> BusResponse { end_status: flow.0.end_status, tos: flow.0.tos, flags: flow.0.flags, - remote_asn: flow.1.0, + remote_asn: flow.1.asn_id.0, remote_asn_name, remote_asn_country, + analysis: flow.1.protocol_analysis.to_string(), } }) .collect(); diff --git a/src/rust/lqosd/src/throughput_tracker/tracking_data.rs b/src/rust/lqosd/src/throughput_tracker/tracking_data.rs index 7f0ab16a..6fdd611a 100644 --- a/src/rust/lqosd/src/throughput_tracker/tracking_data.rs +++ b/src/rust/lqosd/src/throughput_tracker/tracking_data.rs @@ -1,6 +1,6 @@ use std::{sync::atomic::AtomicU64, time::Duration}; use crate::{shaped_devices_tracker::{SHAPED_DEVICES, NETWORK_JSON}, stats::{HIGH_WATERMARK_DOWN, HIGH_WATERMARK_UP}}; -use super::{flow_data::{lookup_asn_id, AsnId, ALL_FLOWS}, throughput_entry::ThroughputEntry, RETIRE_AFTER_SECONDS}; +use super::{flow_data::{lookup_asn_id, AsnId, FlowAnalysis, ALL_FLOWS}, throughput_entry::ThroughputEntry, RETIRE_AFTER_SECONDS}; use dashmap::DashMap; use lqos_bus::TcHandle; use lqos_sys::{flowbee_data::{FlowbeeData, FlowbeeKey}, iterate_flows, throughput_for_each}; @@ -214,9 +214,9 @@ impl ThroughputTracker { this_flow.0.flags = data.flags; } else { // Insert it into the map - let asn = lookup_asn_id(key.remote_ip.as_ip()).unwrap_or(0); + let flow_analysis = FlowAnalysis::new(&key); - ALL_FLOWS.insert(key.clone(), (data.clone(), AsnId(asn))); + ALL_FLOWS.insert(key.clone(), (data.clone(), flow_analysis)); // TODO: Submit it for analysis } From 79fa1d42a8dff84d37ed8b468c4adce1dec99d90 Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Fri, 8 Mar 2024 14:53:49 -0600 Subject: [PATCH 045/103] Revert per-ms timings for now. --- src/rust/lqos_sys/src/bpf/common/flows.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/rust/lqos_sys/src/bpf/common/flows.h b/src/rust/lqos_sys/src/bpf/common/flows.h index 63c76478..55952e81 100644 --- a/src/rust/lqos_sys/src/bpf/common/flows.h +++ b/src/rust/lqos_sys/src/bpf/common/flows.h @@ -97,7 +97,7 @@ static __always_inline struct flow_data_t new_flow_data( .packets_sent = { 0, 0 }, // Track flow rates at an MS scale rather than per-second // to minimize rounding errors. - .next_count_time = { now + MS_IN_NANOS_T10, now + MS_IN_NANOS_T10 }, + .next_count_time = { now + SECOND_IN_NANOS, now + SECOND_IN_NANOS }, .last_count_time = { now, now }, .next_count_bytes = { dissector->skb_len, dissector->skb_len }, .rate_estimate_bps = { 0, 0 }, @@ -162,9 +162,9 @@ static __always_inline void update_flow_rates( if (now > data->next_count_time[rate_index]) { // Calculate the rate estimate __u64 bits = (data->bytes_sent[rate_index] - data->next_count_bytes[rate_index])*8; - __u64 time = (now - data->last_count_time[rate_index]) / 100000; // 10 Milliseconds + __u64 time = (now - data->last_count_time[rate_index]) / 10000000; // 1 Second data->rate_estimate_bps[rate_index] = (bits/time); // bits per second - data->next_count_time[rate_index] = now + MS_IN_NANOS_T10; + data->next_count_time[rate_index] = now + SECOND_IN_NANOS; data->next_count_bytes[rate_index] = data->bytes_sent[rate_index]; data->last_count_time[rate_index] = now; //bpf_debug("[FLOWS] Rate Estimate: %llu", data->rate_estimate_bps[rate_index]); From 4a8be30c09b224069da7de332f85b8aac85b3d0f Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Sat, 9 Mar 2024 10:29:49 -0600 Subject: [PATCH 046/103] First update of the 'recently finished flows' mechanism for tracking the last 5 minutes of data. --- .../flow_data/flow_analysis/asn.rs | 14 +- .../flow_data/flow_analysis/finished_flows.rs | 152 ++++++++++++++++++ .../flow_data/flow_analysis/mod.rs | 3 +- .../src/throughput_tracker/flow_data/mod.rs | 17 +- .../flow_data/netflow5/mod.rs | 4 +- .../flow_data/netflow9/mod.rs | 4 +- src/rust/lqosd/src/throughput_tracker/mod.rs | 4 +- .../src/throughput_tracker/tracking_data.rs | 20 +-- 8 files changed, 188 insertions(+), 30 deletions(-) create mode 100644 src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/finished_flows.rs diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/asn.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/asn.rs index 82b89d9e..d06e8141 100644 --- a/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/asn.rs +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/asn.rs @@ -55,13 +55,15 @@ impl AsnTable { fn build_asn_table() -> anyhow::Result> { let file_path = Self::file_path(); - let mut retries = 0; - while retries < 3 { - if file_path.exists() { - break; + if !file_path.exists() { + let mut retries = 0; + while retries < 3 { + if file_path.exists() { + break; + } + Self::download()?; + retries += 1; } - Self::download()?; - retries += 1; } if !file_path.exists() { diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/finished_flows.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/finished_flows.rs new file mode 100644 index 00000000..27004079 --- /dev/null +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/finished_flows.rs @@ -0,0 +1,152 @@ +use super::{get_asn_name_and_country, FlowAnalysis}; +use crate::throughput_tracker::flow_data::FlowbeeRecipient; +use dashmap::DashMap; +use lqos_sys::flowbee_data::{FlowbeeData, FlowbeeKey}; +use once_cell::sync::Lazy; +use std::{ + collections::HashMap, + fmt::Debug, + sync::{Arc, Mutex}, +}; + +struct TimeBuffer { + buffer: Mutex>, +} + +struct TimeEntry { + time: u64, + data: (FlowbeeKey, FlowbeeData, FlowAnalysis), +} + +impl TimeBuffer { + fn new() -> Self { + Self { + buffer: Mutex::new(Vec::new()), + } + } + + fn expire_over_five_minutes(&self) { + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_secs(); + let mut buffer = self.buffer.lock().unwrap(); + buffer.retain(|v| now - v.time < 300); + } + + fn push(&self, entry: TimeEntry) { + let mut buffer = self.buffer.lock().unwrap(); + buffer.push(entry); + } + + fn country_summary(&self) -> Vec<(String, [u64; 2], [f32; 2])> { + let buffer = self.buffer.lock().unwrap(); + let mut my_buffer = buffer + .iter() + .map(|v| { + let (_key, data, analysis) = &v.data; + let (_name, country) = get_asn_name_and_country(analysis.asn_id.0); + let rtt = [ + (data.last_rtt[0] / 1000000) as f32, + (data.last_rtt[1] / 1000000) as f32, + ]; + (country, data.bytes_sent, rtt) + }) + .collect::>(); + + // Sort by country + my_buffer.sort_by(|a, b| a.0.cmp(&b.0)); + + // Summarize by country + let mut country_summary = Vec::new(); + let mut last_country = String::new(); + let mut total_bytes = [0, 0]; + let mut total_rtt = [0.0f64, 0.0f64]; + let mut rtt_count = [0, 0]; + for (country, bytes, rtt) in my_buffer { + if last_country != country { + if !last_country.is_empty() { + // Store the country + let rtt = [ + if total_rtt[0] > 0.0 { + (total_rtt[0] / rtt_count[0] as f64) as f32 + } else { + 0.0 + }, + if total_rtt[1] > 0.0 { + (total_rtt[1] / rtt_count[1] as f64) as f32 + } else { + 0.0 + }, + ]; + + country_summary.push((last_country, total_bytes, rtt)); + } + + last_country = country.to_string(); + total_bytes = [0, 0]; + total_rtt = [0.0, 0.0]; + rtt_count = [0, 0]; + } + total_bytes[0] += bytes[0]; + total_bytes[1] += bytes[1]; + total_rtt[0] += rtt[0] as f64; + total_rtt[1] += rtt[1] as f64; + rtt_count[0] += 1; + rtt_count[1] += 1; + } + + // Store the last country + let rtt = [ + if total_rtt[0] > 0.0 { + (total_rtt[0] / rtt_count[0] as f64) as f32 + } else { + 0.0 + }, + if total_rtt[1] > 0.0 { + (total_rtt[1] / rtt_count[1] as f64) as f32 + } else { + 0.0 + }, + ]; + + country_summary.push((last_country, total_bytes, rtt)); + + // Sort by bytes descending + country_summary.sort_by(|a, b| b.1[0].cmp(&a.1[0])); + + country_summary + } +} + +static RECENT_FLOWS: Lazy = Lazy::new(|| TimeBuffer::new()); + +pub struct FinishedFlowAnalysis {} + +impl FinishedFlowAnalysis { + pub fn new() -> Arc { + log::debug!("Created Flow Analysis Endpoint"); + + std::thread::spawn(|| loop { + RECENT_FLOWS.expire_over_five_minutes(); + std::thread::sleep(std::time::Duration::from_secs(60 * 5)); + }); + + Arc::new(Self {}) + } +} + +impl FlowbeeRecipient for FinishedFlowAnalysis { + fn enqueue(&self, key: FlowbeeKey, data: FlowbeeData, analysis: FlowAnalysis) { + log::info!("Finished flow analysis"); + RECENT_FLOWS.push(TimeEntry { + time: std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_secs(), + data: (key, data, analysis), + }); + + println!("{:?}", RECENT_FLOWS.country_summary()); + } +} diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/mod.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/mod.rs index 66625bd8..7800fc88 100644 --- a/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/mod.rs +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/mod.rs @@ -5,8 +5,9 @@ use self::asn::AsnTable; mod asn; mod protocol; pub use protocol::FlowProtocol; - use super::AsnId; +mod finished_flows; +pub use finished_flows::FinishedFlowAnalysis; static ANALYSIS: Lazy = Lazy::new(|| FlowAnalysisSystem::new()); diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/mod.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/mod.rs index 651df996..f4225318 100644 --- a/src/rust/lqosd/src/throughput_tracker/flow_data/mod.rs +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/mod.rs @@ -6,7 +6,7 @@ mod netflow5; mod netflow9; mod flow_analysis; -use crate::throughput_tracker::flow_data::{netflow5::Netflow5, netflow9::Netflow9}; +use crate::throughput_tracker::flow_data::{flow_analysis::FinishedFlowAnalysis, netflow5::Netflow5, netflow9::Netflow9}; pub(crate) use flow_tracker::{ALL_FLOWS, AsnId}; use lqos_sys::flowbee_data::{FlowbeeData, FlowbeeKey}; use std::sync::{ @@ -17,12 +17,12 @@ pub(crate) use flow_analysis::{setup_flow_analysis, lookup_asn_id, get_asn_name_ trait FlowbeeRecipient { - fn enqueue(&self, key: FlowbeeKey, data: FlowbeeData); + fn enqueue(&self, key: FlowbeeKey, data: FlowbeeData, analysis: FlowAnalysis); } // Creates the netflow tracker and returns the sender -pub fn setup_netflow_tracker() -> Sender<(FlowbeeKey, FlowbeeData)> { - let (tx, rx) = channel::<(FlowbeeKey, FlowbeeData)>(); +pub fn setup_netflow_tracker() -> Sender<(FlowbeeKey, (FlowbeeData, FlowAnalysis))> { + let (tx, rx) = channel::<(FlowbeeKey, (FlowbeeData, FlowAnalysis))>(); let config = lqos_config::load_config().unwrap(); std::thread::spawn(move || { @@ -30,6 +30,8 @@ pub fn setup_netflow_tracker() -> Sender<(FlowbeeKey, FlowbeeData)> { // Build the endpoints list let mut endpoints: Vec> = Vec::new(); + endpoints.push(FinishedFlowAnalysis::new()); + if let Some(flow_config) = config.flows { if let (Some(ip), Some(port), Some(version)) = ( flow_config.netflow_ip, @@ -53,12 +55,13 @@ pub fn setup_netflow_tracker() -> Sender<(FlowbeeKey, FlowbeeData)> { } } } + log::info!("Flow Endpoints: {}", endpoints.len()); // Send to all endpoints upon receipt - while let Ok((key, value)) = rx.recv() { + while let Ok((key, (value, analysis))) = rx.recv() { endpoints.iter_mut().for_each(|f| { - log::debug!("Enqueueing flow data for {key:?}"); - f.enqueue(key.clone(), value.clone()); + //log::debug!("Enqueueing flow data for {key:?}"); + f.enqueue(key.clone(), value.clone(), analysis.clone()); }); } log::info!("Network flow tracker back-end has stopped") diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/netflow5/mod.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/netflow5/mod.rs index e73395b0..ac8fc6f6 100644 --- a/src/rust/lqosd/src/throughput_tracker/flow_data/netflow5/mod.rs +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/netflow5/mod.rs @@ -1,7 +1,7 @@ //! Support for the Netflow 5 protocol //! Mostly taken from: https://netflow.caligare.com/netflow_v5.htm mod protocol; -use super::FlowbeeRecipient; +use super::{FlowAnalysis, FlowbeeRecipient}; use lqos_sys::flowbee_data::{FlowbeeData, FlowbeeKey}; pub(crate) use protocol::*; use std::{ @@ -83,7 +83,7 @@ impl Netflow5 { } impl FlowbeeRecipient for Netflow5 { - fn enqueue(&self, key: FlowbeeKey, data: FlowbeeData) { + fn enqueue(&self, key: FlowbeeKey, data: FlowbeeData, _analysis: FlowAnalysis) { let mut lock = self.send_queue.lock().unwrap(); lock.push((key, data)); } diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/mod.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/mod.rs index 0de36ef7..3366d213 100644 --- a/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/mod.rs +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/netflow9/mod.rs @@ -5,7 +5,7 @@ use lqos_sys::flowbee_data::{FlowbeeData, FlowbeeKey}; use std::{net::UdpSocket, sync::{atomic::AtomicU32, Arc, Mutex}}; use self::protocol::to_netflow_9; -use super::FlowbeeRecipient; +use super::{FlowAnalysis, FlowbeeRecipient}; mod protocol; pub(crate) struct Netflow9 { @@ -66,7 +66,7 @@ impl Netflow9 { } impl FlowbeeRecipient for Netflow9 { - fn enqueue(&self, key: FlowbeeKey, data: FlowbeeData) { + fn enqueue(&self, key: FlowbeeKey, data: FlowbeeData, _analysis: FlowAnalysis) { let mut lock = self.send_queue.lock().unwrap(); lock.push((key, data)); } diff --git a/src/rust/lqosd/src/throughput_tracker/mod.rs b/src/rust/lqosd/src/throughput_tracker/mod.rs index 237198c2..590b2bac 100644 --- a/src/rust/lqosd/src/throughput_tracker/mod.rs +++ b/src/rust/lqosd/src/throughput_tracker/mod.rs @@ -33,7 +33,7 @@ pub static THROUGHPUT_TRACKER: Lazy = Lazy::new(ThroughputTra /// collection thread that there is fresh data. pub async fn spawn_throughput_monitor( long_term_stats_tx: Sender, - netflow_sender: std::sync::mpsc::Sender<(FlowbeeKey, FlowbeeData)>, + netflow_sender: std::sync::mpsc::Sender<(FlowbeeKey, (FlowbeeData, FlowAnalysis))>, ) { info!("Starting the bandwidth monitor thread."); let interval_ms = 1000; // 1 second @@ -44,7 +44,7 @@ pub async fn spawn_throughput_monitor( async fn throughput_task( interval_ms: u64, long_term_stats_tx: Sender, - netflow_sender: std::sync::mpsc::Sender<(FlowbeeKey, FlowbeeData)> + netflow_sender: std::sync::mpsc::Sender<(FlowbeeKey, (FlowbeeData, FlowAnalysis))> ) { // Obtain the flow timeout from the config, default to 30 seconds let timeout_seconds = if let Ok(config) = lqos_config::load_config() { diff --git a/src/rust/lqosd/src/throughput_tracker/tracking_data.rs b/src/rust/lqosd/src/throughput_tracker/tracking_data.rs index 6fdd611a..0de2c1e3 100644 --- a/src/rust/lqosd/src/throughput_tracker/tracking_data.rs +++ b/src/rust/lqosd/src/throughput_tracker/tracking_data.rs @@ -172,7 +172,7 @@ impl ThroughputTracker { &self, timeout_seconds: u64, netflow_enabled: bool, - sender: std::sync::mpsc::Sender<(FlowbeeKey, FlowbeeData)>, + sender: std::sync::mpsc::Sender<(FlowbeeKey, (FlowbeeData, FlowAnalysis))>, ) { let self_cycle = self.cycle.load(std::sync::atomic::Ordering::Relaxed); @@ -182,7 +182,6 @@ impl ThroughputTracker { // Track the expired keys let mut expired_keys = Vec::new(); - // Track through all the flows iterate_flows(&mut |key, data| { @@ -194,12 +193,7 @@ impl ThroughputTracker { if data.last_seen < expire { // This flow has expired. Add it to the list to be cleaned - expired_keys.push(key.clone()); - - // Send it off to netperf for analysis if we are supporting doing so. - if netflow_enabled { - let _ = sender.send((key.clone(), data.clone())); - } + expired_keys.push(key.clone()); } else { // We have a valid flow, so it needs to be tracked if let Some(mut this_flow) = ALL_FLOWS.get_mut(&key) { @@ -217,7 +211,6 @@ impl ThroughputTracker { let flow_analysis = FlowAnalysis::new(&key); ALL_FLOWS.insert(key.clone(), (data.clone(), flow_analysis)); - // TODO: Submit it for analysis } // TCP - we have RTT data? 6 is TCP @@ -249,7 +242,14 @@ impl ThroughputTracker { log::warn!("Failed to end flows: {:?}", e); } for key in expired_keys { - ALL_FLOWS.remove(&key); + // Send it off to netperf for analysis if we are supporting doing so. + if netflow_enabled { + if let Some(d) = ALL_FLOWS.get(&key) { + let _ = sender.send((key.clone(), (d.0.clone(), d.1.clone()))); + } + } + + //ALL_FLOWS.remove(&key); } } } From 82ecd5eb174472dab3271005015ccd568e409114 Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Sun, 10 Mar 2024 21:11:55 -0500 Subject: [PATCH 047/103] Eliminate all time fetching calls except one, and store the result in the dissector. Minor speed improvement. --- src/rust/lqos_node_manager/static/main.html | 2 + src/rust/lqos_sys/src/bpf/common/dissector.h | 2 + src/rust/lqos_sys/src/bpf/common/flows.h | 55 ++++++++----------- src/rust/lqos_sys/src/bpf/common/heimdall.h | 6 +- src/rust/lqos_sys/src/bpf/common/throughput.h | 7 ++- src/rust/lqos_sys/src/bpf/lqos_kern.c | 13 +---- 6 files changed, 36 insertions(+), 49 deletions(-) diff --git a/src/rust/lqos_node_manager/static/main.html b/src/rust/lqos_node_manager/static/main.html index b90cdc45..ebde62f2 100644 --- a/src/rust/lqos_node_manager/static/main.html +++ b/src/rust/lqos_node_manager/static/main.html @@ -322,6 +322,7 @@ html += "DL ⬇️"; html += "UL RTT"; html += "DL RTT"; + html += "TCP Retries"; html += "Remote ASN"; html += "Country"; html += ""; @@ -336,6 +337,7 @@ html += "" + scaleNumber(data[i].rate_estimate_bps[1] * 8) + ""; html += "" + (data[i].last_rtt[0] / 1000000).toFixed(2) + ""; html += "" + (data[i].last_rtt[1] / 1000000).toFixed(2) + ""; + html += "" + data[i].retries[0] + "/" + data[i].retries[1] + ""; html += "" + data[i].remote_asn_name + ""; html += "" + data[i].remote_asn_country + ""; html += ""; diff --git a/src/rust/lqos_sys/src/bpf/common/dissector.h b/src/rust/lqos_sys/src/bpf/common/dissector.h index fe43f7e2..1e65e651 100644 --- a/src/rust/lqos_sys/src/bpf/common/dissector.h +++ b/src/rust/lqos_sys/src/bpf/common/dissector.h @@ -54,6 +54,7 @@ struct dissector_t __u32 tsecr; __u32 sequence; __u32 ack_seq; + __u64 now; }; // Representation of the VLAN header type. @@ -118,6 +119,7 @@ static __always_inline bool dissector_new( dissector->tos = 0; dissector->sequence = 0; dissector->ack_seq = 0; + dissector->now = bpf_ktime_get_boot_ns(); // Check that there's room for an ethernet header if SKB_OVERFLOW (dissector->start, dissector->end, ethhdr) diff --git a/src/rust/lqos_sys/src/bpf/common/flows.h b/src/rust/lqos_sys/src/bpf/common/flows.h index 55952e81..9c2cd02a 100644 --- a/src/rust/lqos_sys/src/bpf/common/flows.h +++ b/src/rust/lqos_sys/src/bpf/common/flows.h @@ -86,19 +86,17 @@ struct // Construct an empty flow_data_t structure, using default values. static __always_inline struct flow_data_t new_flow_data( - // The current time in nanoseconds, from bpf_ktime_get_boot_ns - __u64 now, // The packet dissector from the previous step struct dissector_t *dissector ) { struct flow_data_t data = { - .start_time = now, + .start_time = dissector->now, .bytes_sent = { 0, 0 }, .packets_sent = { 0, 0 }, // Track flow rates at an MS scale rather than per-second // to minimize rounding errors. - .next_count_time = { now + SECOND_IN_NANOS, now + SECOND_IN_NANOS }, - .last_count_time = { now, now }, + .next_count_time = { dissector->now + SECOND_IN_NANOS, dissector->now + SECOND_IN_NANOS }, + .last_count_time = { dissector->now, dissector->now }, .next_count_bytes = { dissector->skb_len, dissector->skb_len }, .rate_estimate_bps = { 0, 0 }, .last_sequence = { 0, 0 }, @@ -148,25 +146,23 @@ static __always_inline void update_flow_rates( // The rate index (0 = to internet, 1 = to local network) u_int8_t rate_index, // The flow data structure to update - struct flow_data_t *data, - // The current time in nanoseconds, from bpf_ktime_get_boot_ns - __u64 now + struct flow_data_t *data ) { - data->last_seen = now; + data->last_seen = dissector->now; data->end_status = 0; // Reset the end status // Update bytes and packets sent data->bytes_sent[rate_index] += dissector->skb_len; data->packets_sent[rate_index]++; - if (now > data->next_count_time[rate_index]) { + if (dissector->now > data->next_count_time[rate_index]) { // Calculate the rate estimate __u64 bits = (data->bytes_sent[rate_index] - data->next_count_bytes[rate_index])*8; - __u64 time = (now - data->last_count_time[rate_index]) / 10000000; // 1 Second + __u64 time = (dissector->now - data->last_count_time[rate_index]) / 10000000; // 1 Second data->rate_estimate_bps[rate_index] = (bits/time); // bits per second - data->next_count_time[rate_index] = now + SECOND_IN_NANOS; + data->next_count_time[rate_index] = dissector->now + SECOND_IN_NANOS; data->next_count_bytes[rate_index] = data->bytes_sent[rate_index]; - data->last_count_time[rate_index] = now; + data->last_count_time[rate_index] = dissector->now; //bpf_debug("[FLOWS] Rate Estimate: %llu", data->rate_estimate_bps[rate_index]); } } @@ -176,14 +172,13 @@ static __always_inline void process_icmp( struct dissector_t *dissector, u_int8_t direction, u_int8_t rate_index, - u_int8_t other_rate_index, - u_int64_t now + u_int8_t other_rate_index ) { struct flow_key_t key = build_flow_key(dissector, direction); struct flow_data_t *data = bpf_map_lookup_elem(&flowbee, &key); if (data == NULL) { // There isn't a flow, so we need to make one - struct flow_data_t new_data = new_flow_data(now, dissector); + struct flow_data_t new_data = new_flow_data(dissector); if (bpf_map_update_elem(&flowbee, &key, &new_data, BPF_ANY) != 0) { bpf_debug("[FLOWS] Failed to add new flow to map"); return; @@ -191,7 +186,7 @@ static __always_inline void process_icmp( data = bpf_map_lookup_elem(&flowbee, &key); if (data == NULL) return; } - update_flow_rates(dissector, rate_index, data, now); + update_flow_rates(dissector, rate_index, data); } // Handle Per-Flow UDP Analysis @@ -199,14 +194,13 @@ static __always_inline void process_udp( struct dissector_t *dissector, u_int8_t direction, u_int8_t rate_index, - u_int8_t other_rate_index, - u_int64_t now + u_int8_t other_rate_index ) { struct flow_key_t key = build_flow_key(dissector, direction); struct flow_data_t *data = bpf_map_lookup_elem(&flowbee, &key); if (data == NULL) { // There isn't a flow, so we need to make one - struct flow_data_t new_data = new_flow_data(now, dissector); + struct flow_data_t new_data = new_flow_data(dissector); if (bpf_map_update_elem(&flowbee, &key, &new_data, BPF_ANY) != 0) { bpf_debug("[FLOWS] Failed to add new flow to map"); return; @@ -214,7 +208,7 @@ static __always_inline void process_udp( data = bpf_map_lookup_elem(&flowbee, &key); if (data == NULL) return; } - update_flow_rates(dissector, rate_index, data, now); + update_flow_rates(dissector, rate_index, data); } // Store the most recent sequence and ack numbers, and detect retransmissions. @@ -249,8 +243,7 @@ static __always_inline void process_tcp( struct dissector_t *dissector, u_int8_t direction, u_int8_t rate_index, - u_int8_t other_rate_index, - u_int64_t now + u_int8_t other_rate_index ) { // SYN packet indicating the start of a conversation. We are explicitly ignoring // SYN-ACK packets, we just want to catch the opening of a new connection. @@ -262,7 +255,7 @@ static __always_inline void process_tcp( bpf_debug("[FLOWS] New TCP Connection Detected (%u)", direction); #endif struct flow_key_t key = build_flow_key(dissector, direction); - struct flow_data_t data = new_flow_data(now, dissector); + struct flow_data_t data = new_flow_data(dissector); data.tos = dissector->tos; data.ip_flags = 0; // Obtain these if (bpf_map_update_elem(&flowbee, &key, &data, BPF_ANY) != 0) { @@ -281,7 +274,7 @@ static __always_inline void process_tcp( } // Update the flow data with the current packet's information - update_flow_rates(dissector, rate_index, data, now); + update_flow_rates(dissector, rate_index, data); // Sequence and Acknowledgement numbers detect_retries(dissector, rate_index, data); @@ -294,12 +287,12 @@ static __always_inline void process_tcp( if (tsval != data->tsval[rate_index] && tsecr != data->tsecr[rate_index]) { if (tsecr == data->tsval[other_rate_index]) { - __u64 elapsed = now - data->ts_change_time[other_rate_index]; + __u64 elapsed = dissector->now - data->ts_change_time[other_rate_index]; data->last_rtt[rate_index] = elapsed; //bpf_debug("[FLOWS][%d] RTT: %llu", direction, elapsed); } - data->ts_change_time[rate_index] = now; + data->ts_change_time[rate_index] = dissector->now; data->tsval[rate_index] = tsval; data->tsecr[rate_index] = tsecr; } @@ -319,8 +312,6 @@ static __always_inline void track_flows( struct dissector_t *dissector, // The packet dissector from the previous step u_int8_t direction // The direction of the packet (1 = to internet, 2 = to local network) ) { - __u64 now = bpf_ktime_get_boot_ns(); - u_int8_t rate_index; u_int8_t other_rate_index; if (direction == TO_INTERNET) { @@ -334,9 +325,9 @@ static __always_inline void track_flows( // Pass to the appropriate protocol handler switch (dissector->ip_protocol) { - case IPPROTO_TCP: process_tcp(dissector, direction, rate_index, other_rate_index, now); break; - case IPPROTO_UDP: process_udp(dissector, direction, rate_index, other_rate_index, now); break; - case IPPROTO_ICMP: process_icmp(dissector, direction, rate_index, other_rate_index, now); break; + case IPPROTO_TCP: process_tcp(dissector, direction, rate_index, other_rate_index); break; + case IPPROTO_UDP: process_udp(dissector, direction, rate_index, other_rate_index); break; + case IPPROTO_ICMP: process_icmp(dissector, direction, rate_index, other_rate_index); break; default: { #ifdef VERBOSE bpf_debug("[FLOWS] Unsupported protocol: %d", dissector->ip_protocol); diff --git a/src/rust/lqos_sys/src/bpf/common/heimdall.h b/src/rust/lqos_sys/src/bpf/common/heimdall.h index 9d5b8342..17b14ba7 100644 --- a/src/rust/lqos_sys/src/bpf/common/heimdall.h +++ b/src/rust/lqos_sys/src/bpf/common/heimdall.h @@ -137,7 +137,7 @@ static __always_inline void update_heimdall(struct dissector_t *dissector, __u32 struct heimdall_data *counter = (struct heimdall_data *)bpf_map_lookup_elem(&heimdall, &key); if (counter) { - counter->last_seen = bpf_ktime_get_boot_ns(); + counter->last_seen = dissector->now; counter->packets += 1; counter->bytes += size; if (dissector->tos != 0) @@ -148,7 +148,7 @@ static __always_inline void update_heimdall(struct dissector_t *dissector, __u32 else { struct heimdall_data counter = {0}; - counter.last_seen = bpf_ktime_get_boot_ns(); + counter.last_seen = dissector->now; counter.bytes = size; counter.packets = 1; counter.tos = dissector->tos; @@ -160,7 +160,7 @@ static __always_inline void update_heimdall(struct dissector_t *dissector, __u32 } } else if (mode == 2) { struct heimdall_event event = {0}; - event.timetamp = bpf_ktime_get_boot_ns(); + event.timetamp = dissector->now; event.src = dissector->src_ip; event.dst = dissector->dst_ip; event.src_port = dissector->src_port; diff --git a/src/rust/lqos_sys/src/bpf/common/throughput.h b/src/rust/lqos_sys/src/bpf/common/throughput.h index 89466409..93fb4e47 100644 --- a/src/rust/lqos_sys/src/bpf/common/throughput.h +++ b/src/rust/lqos_sys/src/bpf/common/throughput.h @@ -33,13 +33,14 @@ static __always_inline void track_traffic( int direction, struct in6_addr * key, __u32 size, - __u32 tc_handle + __u32 tc_handle, + __u64 now ) { // Count the bits. It's per-CPU, so we can't be interrupted - no sync required struct host_counter * counter = (struct host_counter *)bpf_map_lookup_elem(&map_traffic, key); if (counter) { - counter->last_seen = bpf_ktime_get_boot_ns(); + counter->last_seen = now; counter->tc_handle = tc_handle; if (direction == 1) { // Download @@ -53,7 +54,7 @@ static __always_inline void track_traffic( } else { struct host_counter new_host = {0}; new_host.tc_handle = tc_handle; - new_host.last_seen = bpf_ktime_get_boot_ns(); + new_host.last_seen = now; if (direction == 1) { new_host.download_packets = 1; new_host.download_bytes = size; diff --git a/src/rust/lqos_sys/src/bpf/lqos_kern.c b/src/rust/lqos_sys/src/bpf/lqos_kern.c index 79307cdd..9ad2a20c 100644 --- a/src/rust/lqos_sys/src/bpf/lqos_kern.c +++ b/src/rust/lqos_sys/src/bpf/lqos_kern.c @@ -147,7 +147,8 @@ int xdp_prog(struct xdp_md *ctx) effective_direction, &lookup_key.address, ctx->data_end - ctx->data, // end - data = length - tc_handle + tc_handle, + dissector.now ); // Send on its way @@ -303,16 +304,6 @@ int tc_iphash_to_cpu(struct __sk_buff *skb) bpf_debug("(TC) effective direction: %d", effective_direction); #endif -/* - // Call pping to obtain RTT times - struct parsing_context context = {0}; - context.now = bpf_ktime_get_ns(); - context.tcp = NULL; - context.dissector = &dissector; - context.active_host = &lookup_key.address; - tc_pping_start(&context); -*/ - if (ip_info && ip_info->tc_handle != 0) { // We found a matching mapped TC flow #ifdef VERBOSE From eb281b3edd3ac59f94876a434d79b49522c87fe7 Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Mon, 11 Mar 2024 11:20:27 -0500 Subject: [PATCH 048/103] Revert from a dashmap to a regular mutex for the flow container. Performance improved, and flow removal is less troublesome. --- .../flow_data/flow_analysis/finished_flows.rs | 7 +- .../flow_data/flow_tracker.rs | 8 +- .../src/throughput_tracker/flow_data/mod.rs | 2 +- src/rust/lqosd/src/throughput_tracker/mod.rs | 650 +++++++++--------- .../src/throughput_tracker/tracking_data.rs | 12 +- 5 files changed, 348 insertions(+), 331 deletions(-) diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/finished_flows.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/finished_flows.rs index 27004079..3bfa123a 100644 --- a/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/finished_flows.rs +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/finished_flows.rs @@ -1,13 +1,8 @@ use super::{get_asn_name_and_country, FlowAnalysis}; use crate::throughput_tracker::flow_data::FlowbeeRecipient; -use dashmap::DashMap; use lqos_sys::flowbee_data::{FlowbeeData, FlowbeeKey}; use once_cell::sync::Lazy; -use std::{ - collections::HashMap, - fmt::Debug, - sync::{Arc, Mutex}, -}; +use std::sync::{Arc, Mutex}; struct TimeBuffer { buffer: Mutex>, diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/flow_tracker.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_tracker.rs index 5b8a3778..ad057f36 100644 --- a/src/rust/lqosd/src/throughput_tracker/flow_data/flow_tracker.rs +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_tracker.rs @@ -1,13 +1,13 @@ //! Provides a globally accessible vector of all flows. This is used to store //! all flows for the purpose of tracking and data-services. -use dashmap::DashMap; +use super::flow_analysis::FlowAnalysis; use lqos_sys::flowbee_data::{FlowbeeData, FlowbeeKey}; use once_cell::sync::Lazy; -use super::flow_analysis::FlowAnalysis; +use std::{collections::HashMap, sync::Mutex}; #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct AsnId(pub u32); -pub static ALL_FLOWS: Lazy> = Lazy::new(|| DashMap::new()); - +pub static ALL_FLOWS: Lazy>> = + Lazy::new(|| Mutex::new(HashMap::new())); diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/mod.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/mod.rs index f4225318..608110dd 100644 --- a/src/rust/lqosd/src/throughput_tracker/flow_data/mod.rs +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/mod.rs @@ -13,7 +13,7 @@ use std::sync::{ mpsc::{channel, Sender}, Arc, }; -pub(crate) use flow_analysis::{setup_flow_analysis, lookup_asn_id, get_asn_name_and_country, FlowAnalysis}; +pub(crate) use flow_analysis::{setup_flow_analysis, get_asn_name_and_country, FlowAnalysis}; trait FlowbeeRecipient { diff --git a/src/rust/lqosd/src/throughput_tracker/mod.rs b/src/rust/lqosd/src/throughput_tracker/mod.rs index 590b2bac..8457f4c4 100644 --- a/src/rust/lqosd/src/throughput_tracker/mod.rs +++ b/src/rust/lqosd/src/throughput_tracker/mod.rs @@ -1,24 +1,25 @@ +pub mod flow_data; mod heimdall_data; mod throughput_entry; mod tracking_data; -pub mod flow_data; use crate::{ - shaped_devices_tracker::{NETWORK_JSON, STATS_NEEDS_NEW_SHAPED_DEVICES, SHAPED_DEVICES}, stats::TIME_TO_POLL_HOSTS, - throughput_tracker::tracking_data::ThroughputTracker, long_term_stats::get_network_tree, + long_term_stats::get_network_tree, + shaped_devices_tracker::{NETWORK_JSON, SHAPED_DEVICES, STATS_NEEDS_NEW_SHAPED_DEVICES}, + stats::TIME_TO_POLL_HOSTS, + throughput_tracker::tracking_data::ThroughputTracker, }; pub use heimdall_data::get_flow_stats; use log::{info, warn}; use lqos_bus::{BusResponse, FlowbeeProtocol, IpStats, TcHandle, TopFlowType, XdpPpingResult}; use lqos_sys::flowbee_data::{FlowbeeData, FlowbeeKey}; use lqos_utils::{unix_time::time_since_boot, XdpIpAddress}; -use lts_client::collector::{StatsUpdateMessage, ThroughputSummary, HostSummary}; +use lts_client::collector::{HostSummary, StatsUpdateMessage, ThroughputSummary}; use once_cell::sync::Lazy; use tokio::{ sync::mpsc::Sender, time::{Duration, Instant}, }; - -use self::flow_data::{get_asn_name_and_country, AsnId, FlowAnalysis, ALL_FLOWS}; +use self::flow_data::{get_asn_name_and_country, FlowAnalysis, ALL_FLOWS}; const RETIRE_AFTER_SECONDS: u64 = 30; @@ -32,43 +33,46 @@ pub static THROUGHPUT_TRACKER: Lazy = Lazy::new(ThroughputTra /// * `long_term_stats_tx` - an optional MPSC sender to notify the /// collection thread that there is fresh data. pub async fn spawn_throughput_monitor( - long_term_stats_tx: Sender, - netflow_sender: std::sync::mpsc::Sender<(FlowbeeKey, (FlowbeeData, FlowAnalysis))>, + long_term_stats_tx: Sender, + netflow_sender: std::sync::mpsc::Sender<(FlowbeeKey, (FlowbeeData, FlowAnalysis))>, ) { info!("Starting the bandwidth monitor thread."); let interval_ms = 1000; // 1 second info!("Bandwidth check period set to {interval_ms} ms."); - tokio::spawn(throughput_task(interval_ms, long_term_stats_tx, netflow_sender)); + tokio::spawn(throughput_task( + interval_ms, + long_term_stats_tx, + netflow_sender, + )); } async fn throughput_task( - interval_ms: u64, - long_term_stats_tx: Sender, - netflow_sender: std::sync::mpsc::Sender<(FlowbeeKey, (FlowbeeData, FlowAnalysis))> + interval_ms: u64, + long_term_stats_tx: Sender, + netflow_sender: std::sync::mpsc::Sender<(FlowbeeKey, (FlowbeeData, FlowAnalysis))>, ) { // Obtain the flow timeout from the config, default to 30 seconds let timeout_seconds = if let Ok(config) = lqos_config::load_config() { - if let Some(flow_config) = config.flows { - flow_config.flow_timeout_seconds - } else { - 30 - } + if let Some(flow_config) = config.flows { + flow_config.flow_timeout_seconds + } else { + 30 + } } else { - 30 + 30 }; // Obtain the netflow_enabled from the config, default to false let netflow_enabled = if let Ok(config) = lqos_config::load_config() { - if let Some(flow_config) = config.flows { - flow_config.netflow_enabled - } else { - false - } + if let Some(flow_config) = config.flows { + flow_config.netflow_enabled + } else { + false + } } else { - false + false }; - loop { let start = Instant::now(); @@ -76,34 +80,34 @@ async fn throughput_task( // the tokio runtime is not blocked. let my_netflow_sender = netflow_sender.clone(); if let Err(e) = tokio::task::spawn_blocking(move || { - - { - let net_json = NETWORK_JSON.read().unwrap(); - net_json.zero_throughput_and_rtt(); - } // Scope to end the lock - THROUGHPUT_TRACKER.copy_previous_and_reset_rtt(); - THROUGHPUT_TRACKER.apply_new_throughput_counters(); - THROUGHPUT_TRACKER.apply_flow_data( - timeout_seconds, - netflow_enabled, - my_netflow_sender.clone(), - ); - THROUGHPUT_TRACKER.update_totals(); - THROUGHPUT_TRACKER.next_cycle(); - let duration_ms = start.elapsed().as_micros(); - TIME_TO_POLL_HOSTS.store(duration_ms as u64, std::sync::atomic::Ordering::Relaxed); - - }).await { + { + let net_json = NETWORK_JSON.read().unwrap(); + net_json.zero_throughput_and_rtt(); + } // Scope to end the lock + THROUGHPUT_TRACKER.copy_previous_and_reset_rtt(); + THROUGHPUT_TRACKER.apply_new_throughput_counters(); + THROUGHPUT_TRACKER.apply_flow_data( + timeout_seconds, + netflow_enabled, + my_netflow_sender.clone(), + ); + THROUGHPUT_TRACKER.update_totals(); + THROUGHPUT_TRACKER.next_cycle(); + let duration_ms = start.elapsed().as_micros(); + TIME_TO_POLL_HOSTS.store(duration_ms as u64, std::sync::atomic::Ordering::Relaxed); + }) + .await + { log::error!("Error polling network. {e:?}"); } tokio::spawn(submit_throughput_stats(long_term_stats_tx.clone())); let elapsed = start.elapsed(); if elapsed.as_secs_f32() < 1.0 { - let sleep_duration = Duration::from_millis(interval_ms) - start.elapsed(); - tokio::time::sleep(sleep_duration).await; + let sleep_duration = Duration::from_millis(interval_ms) - start.elapsed(); + tokio::time::sleep(sleep_duration).await; } else { - log::error!("Throughput monitor thread is running behind. It took {elapsed} to poll the network.", elapsed=elapsed.as_secs_f32()); + log::error!("Throughput monitor thread is running behind. It took {elapsed} to poll the network.", elapsed=elapsed.as_secs_f32()); } } } @@ -149,12 +153,15 @@ async fn submit_throughput_stats(long_term_stats_tx: Sender) }) .collect(); - let summary = Box::new((ThroughputSummary{ - bits_per_second, - shaped_bits_per_second, - packets_per_second, - hosts, - }, get_network_tree())); + let summary = Box::new(( + ThroughputSummary { + bits_per_second, + shaped_bits_per_second, + packets_per_second, + hosts, + }, + get_network_tree(), + )); // Send the stats let result = long_term_stats_tx @@ -199,141 +206,150 @@ type TopList = (XdpIpAddress, (u64, u64), (u64, u64), f32, TcHandle, String); pub fn top_n(start: u32, end: u32) -> BusResponse { let mut full_list: Vec = { - let tp_cycle = THROUGHPUT_TRACKER.cycle.load(std::sync::atomic::Ordering::Relaxed); - THROUGHPUT_TRACKER.raw_data - .iter() - .filter(|v| !v.key().as_ip().is_loopback()) - .filter(|d| retire_check(tp_cycle, d.most_recent_cycle)) - .map(|te| { - ( - *te.key(), - te.bytes_per_second, - te.packets_per_second, - te.median_latency().unwrap_or(0.0), - te.tc_handle, - te.circuit_id.as_ref().unwrap_or(&String::new()).clone(), - ) - }) - .collect() + let tp_cycle = THROUGHPUT_TRACKER + .cycle + .load(std::sync::atomic::Ordering::Relaxed); + THROUGHPUT_TRACKER + .raw_data + .iter() + .filter(|v| !v.key().as_ip().is_loopback()) + .filter(|d| retire_check(tp_cycle, d.most_recent_cycle)) + .map(|te| { + ( + *te.key(), + te.bytes_per_second, + te.packets_per_second, + te.median_latency().unwrap_or(0.0), + te.tc_handle, + te.circuit_id.as_ref().unwrap_or(&String::new()).clone(), + ) + }) + .collect() }; full_list.sort_by(|a, b| b.1 .0.cmp(&a.1 .0)); let result = full_list - .iter() - .skip(start as usize) - .take((end as usize) - (start as usize)) - .map( - |( - ip, - (bytes_dn, bytes_up), - (packets_dn, packets_up), - median_rtt, - tc_handle, - circuit_id, - )| IpStats { - ip_address: ip.as_ip().to_string(), - circuit_id: circuit_id.clone(), - bits_per_second: (bytes_dn * 8, bytes_up * 8), - packets_per_second: (*packets_dn, *packets_up), - median_tcp_rtt: *median_rtt, - tc_handle: *tc_handle, - }, - ) - .collect(); - BusResponse::TopDownloaders(result) - } - - pub fn worst_n(start: u32, end: u32) -> BusResponse { - let mut full_list: Vec = { - let tp_cycle = THROUGHPUT_TRACKER.cycle.load(std::sync::atomic::Ordering::Relaxed); - THROUGHPUT_TRACKER.raw_data .iter() - .filter(|v| !v.key().as_ip().is_loopback()) - .filter(|d| retire_check(tp_cycle, d.most_recent_cycle)) - .filter(|te| te.median_latency().is_some()) - .map(|te| { - ( - *te.key(), - te.bytes_per_second, - te.packets_per_second, - te.median_latency().unwrap_or(0.0), - te.tc_handle, - te.circuit_id.as_ref().unwrap_or(&String::new()).clone(), - ) - }) - .collect() + .skip(start as usize) + .take((end as usize) - (start as usize)) + .map( + |( + ip, + (bytes_dn, bytes_up), + (packets_dn, packets_up), + median_rtt, + tc_handle, + circuit_id, + )| IpStats { + ip_address: ip.as_ip().to_string(), + circuit_id: circuit_id.clone(), + bits_per_second: (bytes_dn * 8, bytes_up * 8), + packets_per_second: (*packets_dn, *packets_up), + median_tcp_rtt: *median_rtt, + tc_handle: *tc_handle, + }, + ) + .collect(); + BusResponse::TopDownloaders(result) +} + +pub fn worst_n(start: u32, end: u32) -> BusResponse { + let mut full_list: Vec = { + let tp_cycle = THROUGHPUT_TRACKER + .cycle + .load(std::sync::atomic::Ordering::Relaxed); + THROUGHPUT_TRACKER + .raw_data + .iter() + .filter(|v| !v.key().as_ip().is_loopback()) + .filter(|d| retire_check(tp_cycle, d.most_recent_cycle)) + .filter(|te| te.median_latency().is_some()) + .map(|te| { + ( + *te.key(), + te.bytes_per_second, + te.packets_per_second, + te.median_latency().unwrap_or(0.0), + te.tc_handle, + te.circuit_id.as_ref().unwrap_or(&String::new()).clone(), + ) + }) + .collect() }; full_list.sort_by(|a, b| b.3.partial_cmp(&a.3).unwrap()); let result = full_list - .iter() - .skip(start as usize) - .take((end as usize) - (start as usize)) - .map( - |( - ip, - (bytes_dn, bytes_up), - (packets_dn, packets_up), - median_rtt, - tc_handle, - circuit_id, - )| IpStats { - ip_address: ip.as_ip().to_string(), - circuit_id: circuit_id.clone(), - bits_per_second: (bytes_dn * 8, bytes_up * 8), - packets_per_second: (*packets_dn, *packets_up), - median_tcp_rtt: *median_rtt, - tc_handle: *tc_handle, - }, - ) - .collect(); - BusResponse::WorstRtt(result) - } - - pub fn best_n(start: u32, end: u32) -> BusResponse { - let mut full_list: Vec = { - let tp_cycle = THROUGHPUT_TRACKER.cycle.load(std::sync::atomic::Ordering::Relaxed); - THROUGHPUT_TRACKER.raw_data .iter() - .filter(|v| !v.key().as_ip().is_loopback()) - .filter(|d| retire_check(tp_cycle, d.most_recent_cycle)) - .filter(|te| te.median_latency().is_some()) - .map(|te| { - ( - *te.key(), - te.bytes_per_second, - te.packets_per_second, - te.median_latency().unwrap_or(0.0), - te.tc_handle, - te.circuit_id.as_ref().unwrap_or(&String::new()).clone(), - ) - }) - .collect() + .skip(start as usize) + .take((end as usize) - (start as usize)) + .map( + |( + ip, + (bytes_dn, bytes_up), + (packets_dn, packets_up), + median_rtt, + tc_handle, + circuit_id, + )| IpStats { + ip_address: ip.as_ip().to_string(), + circuit_id: circuit_id.clone(), + bits_per_second: (bytes_dn * 8, bytes_up * 8), + packets_per_second: (*packets_dn, *packets_up), + median_tcp_rtt: *median_rtt, + tc_handle: *tc_handle, + }, + ) + .collect(); + BusResponse::WorstRtt(result) +} + +pub fn best_n(start: u32, end: u32) -> BusResponse { + let mut full_list: Vec = { + let tp_cycle = THROUGHPUT_TRACKER + .cycle + .load(std::sync::atomic::Ordering::Relaxed); + THROUGHPUT_TRACKER + .raw_data + .iter() + .filter(|v| !v.key().as_ip().is_loopback()) + .filter(|d| retire_check(tp_cycle, d.most_recent_cycle)) + .filter(|te| te.median_latency().is_some()) + .map(|te| { + ( + *te.key(), + te.bytes_per_second, + te.packets_per_second, + te.median_latency().unwrap_or(0.0), + te.tc_handle, + te.circuit_id.as_ref().unwrap_or(&String::new()).clone(), + ) + }) + .collect() }; full_list.sort_by(|a, b| b.3.partial_cmp(&a.3).unwrap()); full_list.reverse(); let result = full_list - .iter() - .skip(start as usize) - .take((end as usize) - (start as usize)) - .map( - |( - ip, - (bytes_dn, bytes_up), - (packets_dn, packets_up), - median_rtt, - tc_handle, - circuit_id, - )| IpStats { - ip_address: ip.as_ip().to_string(), - circuit_id: circuit_id.clone(), - bits_per_second: (bytes_dn * 8, bytes_up * 8), - packets_per_second: (*packets_dn, *packets_up), - median_tcp_rtt: *median_rtt, - tc_handle: *tc_handle, - }, - ) - .collect(); + .iter() + .skip(start as usize) + .take((end as usize) - (start as usize)) + .map( + |( + ip, + (bytes_dn, bytes_up), + (packets_dn, packets_up), + median_rtt, + tc_handle, + circuit_id, + )| IpStats { + ip_address: ip.as_ip().to_string(), + circuit_id: circuit_id.clone(), + bits_per_second: (bytes_dn * 8, bytes_up * 8), + packets_per_second: (*packets_dn, *packets_up), + median_tcp_rtt: *median_rtt, + tc_handle: *tc_handle, + }, + ) + .collect(); BusResponse::BestRtt(result) - } +} pub fn xdp_pping_compat() -> BusResponse { let raw_cycle = THROUGHPUT_TRACKER @@ -431,166 +447,170 @@ type FullList = (XdpIpAddress, (u64, u64), (u64, u64), f32, TcHandle, u64); pub fn all_unknown_ips() -> BusResponse { let boot_time = time_since_boot(); if boot_time.is_err() { - warn!("The Linux system clock isn't available to provide time since boot, yet."); - warn!("This only happens immediately after a reboot."); - return BusResponse::NotReadyYet; + warn!("The Linux system clock isn't available to provide time since boot, yet."); + warn!("This only happens immediately after a reboot."); + return BusResponse::NotReadyYet; } let boot_time = boot_time.unwrap(); let time_since_boot = Duration::from(boot_time); - let five_minutes_ago = - time_since_boot.saturating_sub(Duration::from_secs(300)); + let five_minutes_ago = time_since_boot.saturating_sub(Duration::from_secs(300)); let five_minutes_ago_nanoseconds = five_minutes_ago.as_nanos(); - + let mut full_list: Vec = { - THROUGHPUT_TRACKER.raw_data - .iter() - .filter(|v| !v.key().as_ip().is_loopback()) - .filter(|d| d.tc_handle.as_u32() == 0) - .filter(|d| d.last_seen as u128 > five_minutes_ago_nanoseconds) - .map(|te| { - ( - *te.key(), - te.bytes, - te.packets, - te.median_latency().unwrap_or(0.0), - te.tc_handle, - te.most_recent_cycle, - ) - }) - .collect() + THROUGHPUT_TRACKER + .raw_data + .iter() + .filter(|v| !v.key().as_ip().is_loopback()) + .filter(|d| d.tc_handle.as_u32() == 0) + .filter(|d| d.last_seen as u128 > five_minutes_ago_nanoseconds) + .map(|te| { + ( + *te.key(), + te.bytes, + te.packets, + te.median_latency().unwrap_or(0.0), + te.tc_handle, + te.most_recent_cycle, + ) + }) + .collect() }; full_list.sort_by(|a, b| b.5.partial_cmp(&a.5).unwrap()); let result = full_list - .iter() - .map( - |( - ip, - (bytes_dn, bytes_up), - (packets_dn, packets_up), - median_rtt, - tc_handle, - _last_seen, - )| IpStats { - ip_address: ip.as_ip().to_string(), - circuit_id: String::new(), - bits_per_second: (bytes_dn * 8, bytes_up * 8), - packets_per_second: (*packets_dn, *packets_up), - median_tcp_rtt: *median_rtt, - tc_handle: *tc_handle, - }, - ) - .collect(); + .iter() + .map( + |( + ip, + (bytes_dn, bytes_up), + (packets_dn, packets_up), + median_rtt, + tc_handle, + _last_seen, + )| IpStats { + ip_address: ip.as_ip().to_string(), + circuit_id: String::new(), + bits_per_second: (bytes_dn * 8, bytes_up * 8), + packets_per_second: (*packets_dn, *packets_up), + median_tcp_rtt: *median_rtt, + tc_handle: *tc_handle, + }, + ) + .collect(); BusResponse::AllUnknownIps(result) - } +} - /// For debugging: dump all active flows! - pub fn dump_active_flows() -> BusResponse { - let result: Vec = ALL_FLOWS.iter().map(|row| { - let (remote_asn_name, remote_asn_country) = get_asn_name_and_country(row.value().1.asn_id.0); +/// For debugging: dump all active flows! +pub fn dump_active_flows() -> BusResponse { + let lock = ALL_FLOWS.lock().unwrap(); + let result: Vec = lock + .iter() + .map(|(key, row)| { + let (remote_asn_name, remote_asn_country) = get_asn_name_and_country(row.1.asn_id.0); - lqos_bus::FlowbeeData { - remote_ip: row.key().remote_ip.as_ip().to_string(), - local_ip: row.key().local_ip.as_ip().to_string(), - src_port: row.key().src_port, - dst_port: row.key().dst_port, - ip_protocol: FlowbeeProtocol::from(row.key().ip_protocol), - bytes_sent: row.value().0.bytes_sent, - packets_sent: row.value().0.packets_sent, - rate_estimate_bps: row.value().0.rate_estimate_bps, - retries: row.value().0.retries, - last_rtt: row.value().0.last_rtt, - end_status: row.value().0.end_status, - tos: row.value().0.tos, - flags: row.value().0.flags, - remote_asn: row.value().1.asn_id.0, - remote_asn_name, - remote_asn_country, - analysis: row.value().1.protocol_analysis.to_string(), - } - }).collect(); + lqos_bus::FlowbeeData { + remote_ip: key.remote_ip.as_ip().to_string(), + local_ip: key.local_ip.as_ip().to_string(), + src_port: key.src_port, + dst_port: key.dst_port, + ip_protocol: FlowbeeProtocol::from(key.ip_protocol), + bytes_sent: row.0.bytes_sent, + packets_sent: row.0.packets_sent, + rate_estimate_bps: row.0.rate_estimate_bps, + retries: row.0.retries, + last_rtt: row.0.last_rtt, + end_status: row.0.end_status, + tos: row.0.tos, + flags: row.0.flags, + remote_asn: row.1.asn_id.0, + remote_asn_name, + remote_asn_country, + analysis: row.1.protocol_analysis.to_string(), + } + }) + .collect(); BusResponse::AllActiveFlows(result) - } +} - /// Count active flows - pub fn count_active_flows() -> BusResponse { - BusResponse::CountActiveFlows(ALL_FLOWS.len() as u64) - } +/// Count active flows +pub fn count_active_flows() -> BusResponse { + let lock = ALL_FLOWS.lock().unwrap(); + BusResponse::CountActiveFlows(lock.len() as u64) +} - /// Top Flows Report - pub fn top_flows(n: u32, flow_type: TopFlowType) -> BusResponse { - let mut table: Vec<(FlowbeeKey, (FlowbeeData, FlowAnalysis))> = ALL_FLOWS - .iter() - .map(|row| ( - row.key().clone(), - row.value().clone(), - )) - .collect(); +/// Top Flows Report +pub fn top_flows(n: u32, flow_type: TopFlowType) -> BusResponse { + let lock = ALL_FLOWS.lock().unwrap(); + let mut table: Vec<(FlowbeeKey, (FlowbeeData, FlowAnalysis))> = lock + .iter() + .map(|(key, value)| (key.clone(), value.clone())) + .collect(); + std::mem::drop(lock); // Early lock release match flow_type { - TopFlowType::RateEstimate => { - table.sort_by(|a, b| { - let a_total = a.1.0.rate_estimate_bps[0] + a.1.0.rate_estimate_bps[1]; - let b_total = b.1.0.rate_estimate_bps[0] + b.1.0.rate_estimate_bps[1]; - b_total.cmp(&a_total) - }); - } - TopFlowType::Bytes => { - table.sort_by(|a, b| { - let a_total = a.1.0.bytes_sent[0] + a.1.0.bytes_sent[1]; - let b_total = b.1.0.bytes_sent[0] + b.1.0.bytes_sent[1]; - b_total.cmp(&a_total) - }); - } - TopFlowType::Packets => { - table.sort_by(|a, b| { - let a_total = a.1.0.packets_sent[0] + a.1.0.packets_sent[1]; - let b_total = b.1.0.packets_sent[0] + b.1.0.packets_sent[1]; - b_total.cmp(&a_total) - }); - } - TopFlowType::Drops => { - table.sort_by(|a, b| { - let a_total = a.1.0.retries[0] + a.1.0.retries[1]; - let b_total = b.1.0.retries[0] + b.1.0.retries[1]; - b_total.cmp(&a_total) - }); - } - TopFlowType::RoundTripTime => { - table.sort_by(|a, b| { - let a_total = a.1.0.last_rtt[0] + a.1.0.last_rtt[1]; - let b_total = b.1.0.last_rtt[0] + b.1.0.last_rtt[1]; - b_total.cmp(&a_total) - }); - } + TopFlowType::RateEstimate => { + table.sort_by(|a, b| { + let a_total = a.1 .0.rate_estimate_bps[0] + a.1 .0.rate_estimate_bps[1]; + let b_total = b.1 .0.rate_estimate_bps[0] + b.1 .0.rate_estimate_bps[1]; + b_total.cmp(&a_total) + }); + } + TopFlowType::Bytes => { + table.sort_by(|a, b| { + let a_total = a.1 .0.bytes_sent[0] + a.1 .0.bytes_sent[1]; + let b_total = b.1 .0.bytes_sent[0] + b.1 .0.bytes_sent[1]; + b_total.cmp(&a_total) + }); + } + TopFlowType::Packets => { + table.sort_by(|a, b| { + let a_total = a.1 .0.packets_sent[0] + a.1 .0.packets_sent[1]; + let b_total = b.1 .0.packets_sent[0] + b.1 .0.packets_sent[1]; + b_total.cmp(&a_total) + }); + } + TopFlowType::Drops => { + table.sort_by(|a, b| { + let a_total = a.1 .0.retries[0] + a.1 .0.retries[1]; + let b_total = b.1 .0.retries[0] + b.1 .0.retries[1]; + b_total.cmp(&a_total) + }); + } + TopFlowType::RoundTripTime => { + table.sort_by(|a, b| { + let a_total = a.1 .0.last_rtt[0] + a.1 .0.last_rtt[1]; + let b_total = b.1 .0.last_rtt[0] + b.1 .0.last_rtt[1]; + b_total.cmp(&a_total) + }); + } } let result = table - .iter() - .take(n as usize) - .map(|(ip, flow)| { - let (remote_asn_name, remote_asn_country) = get_asn_name_and_country(flow.1.asn_id.0); - lqos_bus::FlowbeeData { - remote_ip: ip.remote_ip.as_ip().to_string(), - local_ip: ip.local_ip.as_ip().to_string(), - src_port: ip.src_port, - dst_port: ip.dst_port, - ip_protocol: FlowbeeProtocol::from(ip.ip_protocol), - bytes_sent: flow.0.bytes_sent, - packets_sent: flow.0.packets_sent, - rate_estimate_bps: flow.0.rate_estimate_bps, - retries: flow.0.retries, - last_rtt: flow.0.last_rtt, - end_status: flow.0.end_status, - tos: flow.0.tos, - flags: flow.0.flags, - remote_asn: flow.1.asn_id.0, - remote_asn_name, - remote_asn_country, - analysis: flow.1.protocol_analysis.to_string(), - } - }) - .collect(); + .iter() + .take(n as usize) + .map(|(ip, flow)| { + let (remote_asn_name, remote_asn_country) = get_asn_name_and_country(flow.1.asn_id.0); + lqos_bus::FlowbeeData { + remote_ip: ip.remote_ip.as_ip().to_string(), + local_ip: ip.local_ip.as_ip().to_string(), + src_port: ip.src_port, + dst_port: ip.dst_port, + ip_protocol: FlowbeeProtocol::from(ip.ip_protocol), + bytes_sent: flow.0.bytes_sent, + packets_sent: flow.0.packets_sent, + rate_estimate_bps: flow.0.rate_estimate_bps, + retries: flow.0.retries, + last_rtt: flow.0.last_rtt, + end_status: flow.0.end_status, + tos: flow.0.tos, + flags: flow.0.flags, + remote_asn: flow.1.asn_id.0, + remote_asn_name, + remote_asn_country, + analysis: flow.1.protocol_analysis.to_string(), + } + }) + .collect(); BusResponse::TopFlows(result) - } \ No newline at end of file +} diff --git a/src/rust/lqosd/src/throughput_tracker/tracking_data.rs b/src/rust/lqosd/src/throughput_tracker/tracking_data.rs index 0de2c1e3..fd8874ba 100644 --- a/src/rust/lqosd/src/throughput_tracker/tracking_data.rs +++ b/src/rust/lqosd/src/throughput_tracker/tracking_data.rs @@ -1,6 +1,6 @@ use std::{sync::atomic::AtomicU64, time::Duration}; use crate::{shaped_devices_tracker::{SHAPED_DEVICES, NETWORK_JSON}, stats::{HIGH_WATERMARK_DOWN, HIGH_WATERMARK_UP}}; -use super::{flow_data::{lookup_asn_id, AsnId, FlowAnalysis, ALL_FLOWS}, throughput_entry::ThroughputEntry, RETIRE_AFTER_SECONDS}; +use super::{flow_data::{FlowAnalysis, ALL_FLOWS}, throughput_entry::ThroughputEntry, RETIRE_AFTER_SECONDS}; use dashmap::DashMap; use lqos_bus::TcHandle; use lqos_sys::{flowbee_data::{FlowbeeData, FlowbeeKey}, iterate_flows, throughput_for_each}; @@ -195,8 +195,9 @@ impl ThroughputTracker { // This flow has expired. Add it to the list to be cleaned expired_keys.push(key.clone()); } else { + let mut lock = ALL_FLOWS.lock().unwrap(); // We have a valid flow, so it needs to be tracked - if let Some(mut this_flow) = ALL_FLOWS.get_mut(&key) { + if let Some(this_flow) = lock.get_mut(&key) { this_flow.0.last_seen = data.last_seen; this_flow.0.bytes_sent = data.bytes_sent; this_flow.0.packets_sent = data.packets_sent; @@ -210,7 +211,7 @@ impl ThroughputTracker { // Insert it into the map let flow_analysis = FlowAnalysis::new(&key); - ALL_FLOWS.insert(key.clone(), (data.clone(), flow_analysis)); + lock.insert(key.clone(), (data.clone(), flow_analysis)); } // TCP - we have RTT data? 6 is TCP @@ -241,15 +242,16 @@ impl ThroughputTracker { if let Err(e) = ret { log::warn!("Failed to end flows: {:?}", e); } + let mut lock = ALL_FLOWS.lock().unwrap(); for key in expired_keys { // Send it off to netperf for analysis if we are supporting doing so. if netflow_enabled { - if let Some(d) = ALL_FLOWS.get(&key) { + if let Some(d) = lock.get(&key) { let _ = sender.send((key.clone(), (d.0.clone(), d.1.clone()))); } } - //ALL_FLOWS.remove(&key); + lock.remove(&key); } } } From 91a48bc2759ecc23245d66e446437004c2fbc8dd Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Mon, 11 Mar 2024 12:24:18 -0500 Subject: [PATCH 049/103] Use the new flows system rather than Heimdall to display the circuits flows tab. Not complete yet - the backend is all there still. Also needs some tweaking on the hyperfocus for packet capture mode. --- src/rust/lqos_bus/src/bus/request.rs | 4 ++ src/rust/lqos_bus/src/bus/response.rs | 4 ++ src/rust/lqos_node_manager/src/queue_info.rs | 17 +++++++- .../static/circuit_queue.html | 32 ++++++++++++--- src/rust/lqos_node_manager/static/main.html | 4 +- src/rust/lqos_sys/src/bpf/common/flows.h | 2 +- src/rust/lqosd/src/main.rs | 1 + src/rust/lqosd/src/throughput_tracker/mod.rs | 40 +++++++++++++++++++ 8 files changed, 94 insertions(+), 10 deletions(-) diff --git a/src/rust/lqos_bus/src/bus/request.rs b/src/rust/lqos_bus/src/bus/request.rs index 97199222..e796fa51 100644 --- a/src/rust/lqos_bus/src/bus/request.rs +++ b/src/rust/lqos_bus/src/bus/request.rs @@ -134,6 +134,7 @@ pub enum BusRequest { GetLqosStats, /// Tell me flow stats for a given IP address + #[deprecated(note = "Heimdall flows are being migrated to flows 2")] GetFlowStats(String), /// Tell Heimdall to hyper-focus on an IP address for a bit @@ -167,6 +168,9 @@ pub enum BusRequest { /// The number of flows to return n: u32 }, + + /// Flows by IP Address + FlowsByIp(String), } /// Defines the type of "top" flow being requested diff --git a/src/rust/lqos_bus/src/bus/response.rs b/src/rust/lqos_bus/src/bus/response.rs index 80c26bc1..48d716e8 100644 --- a/src/rust/lqos_bus/src/bus/response.rs +++ b/src/rust/lqos_bus/src/bus/response.rs @@ -92,6 +92,7 @@ pub enum BusResponse { }, /// Flow Data + #[deprecated(note = "Being replaced by FlowbeeData")] FlowData(Vec<(FlowTransport, Option)>), /// The index of the new packet collection session @@ -125,4 +126,7 @@ pub enum BusResponse { /// Top Flopws TopFlows(Vec), + + /// Flows by IP + FlowsByIp(Vec), } diff --git a/src/rust/lqos_node_manager/src/queue_info.rs b/src/rust/lqos_node_manager/src/queue_info.rs index 3a0a34ab..d34a7668 100644 --- a/src/rust/lqos_node_manager/src/queue_info.rs +++ b/src/rust/lqos_node_manager/src/queue_info.rs @@ -1,7 +1,7 @@ use crate::auth_guard::AuthGuard; use crate::cache_control::NoCache; use crate::tracker::{SHAPED_DEVICES, lookup_dns}; -use lqos_bus::{bus_request, BusRequest, BusResponse, FlowTransport, PacketHeader, QueueStoreTransit}; +use lqos_bus::{bus_request, BusRequest, BusResponse, FlowTransport, FlowbeeData, PacketHeader, QueueStoreTransit}; use rocket::fs::NamedFile; use rocket::http::Status; use rocket::response::content::RawJson; @@ -107,6 +107,19 @@ pub async fn raw_queue_by_circuit( } #[get("/api/flows/")] +pub async fn flow_stats(ip_list: String, _auth: AuthGuard) -> NoCache>> { + let mut result = Vec::new(); + let request: Vec = ip_list.split(',').map(|ip| BusRequest::FlowsByIp(ip.to_string())).collect(); + let responses = bus_request(request).await.unwrap(); + for r in responses.iter() { + if let BusResponse::FlowsByIp(flow) = r { + result.extend_from_slice(flow); + } + } + NoCache::new(Json(result)) +} + +/*#[get("/api/flows/")] pub async fn flow_stats(ip_list: String, _auth: AuthGuard) -> NoCache)>>> { let mut result = Vec::new(); let request: Vec = ip_list.split(',').map(|ip| BusRequest::GetFlowStats(ip.to_string())).collect(); @@ -117,7 +130,7 @@ pub async fn flow_stats(ip_list: String, _auth: AuthGuard) -> NoCache
Flows (Last 30 Seconds)
-
@@ -779,10 +775,35 @@ } ip_list = ip_list.substring(0, ip_list.length - 1); if (ip_list == "") return; - msgPackGet("/api/flows/" + ip_list, (data) => { + $.get("/api/flows/" + ip_list, (data) => { + //msgPackGet("/api/flows/" + ip_list, (data) => { //console.log(data); let html = ""; + html += ""; + html += ""; + html += ""; + html += ""; + html += ""; + html += ""; + html += ""; + html += ""; + html += ""; + html += ""; + for (var i=0; i"; + html += ""; + html += ""; + html += ""; + html += ""; + html += ""; + html += ""; + html += ""; + } + html += ""; + + /*html += ""; html += ""; html += ""; html += ""; @@ -837,6 +858,7 @@ html += ""; } html += "
ConnectionBytesPacketsTCP RetriesTCP RTTASNASN Country
" + scaleNumber(data[i].bytes_sent[0]) + " / " + scaleNumber(data[i].bytes_sent[1]) + "" + scaleNumber(data[i].packets_sent[0]) + " / " + scaleNumber(data[i].packets_sent[1]) + "" + data[i].retries[0] + " / " + data[i].retries[1] + "" + (data[i].last_rtt[0] / 1000000).toFixed(2) + " / " + (data[i].last_rtt[1] / 1000000).toFixed(2) + "(" + data[i].remote_asn + ") " + data[i].remote_asn_name + "" + data[i].remote_asn_country + "
ProtocolSrcSrc Port
"; + */ $("#flowList").html(html); }) } diff --git a/src/rust/lqos_node_manager/static/main.html b/src/rust/lqos_node_manager/static/main.html index ebde62f2..1c14c9a9 100644 --- a/src/rust/lqos_node_manager/static/main.html +++ b/src/rust/lqos_node_manager/static/main.html @@ -333,8 +333,8 @@ html += "" + data[i].local_ip + ""; html += "" + data[i].remote_ip + ""; // TODO: Check scaling - html += "" + scaleNumber(data[i].rate_estimate_bps[0] * 8) + ""; - html += "" + scaleNumber(data[i].rate_estimate_bps[1] * 8) + ""; + html += "" + scaleNumber(data[i].rate_estimate_bps[0]) + ""; + html += "" + scaleNumber(data[i].rate_estimate_bps[1]) + ""; html += "" + (data[i].last_rtt[0] / 1000000).toFixed(2) + ""; html += "" + (data[i].last_rtt[1] / 1000000).toFixed(2) + ""; html += "" + data[i].retries[0] + "/" + data[i].retries[1] + ""; diff --git a/src/rust/lqos_sys/src/bpf/common/flows.h b/src/rust/lqos_sys/src/bpf/common/flows.h index 9c2cd02a..719c4db1 100644 --- a/src/rust/lqos_sys/src/bpf/common/flows.h +++ b/src/rust/lqos_sys/src/bpf/common/flows.h @@ -158,7 +158,7 @@ static __always_inline void update_flow_rates( if (dissector->now > data->next_count_time[rate_index]) { // Calculate the rate estimate __u64 bits = (data->bytes_sent[rate_index] - data->next_count_bytes[rate_index])*8; - __u64 time = (dissector->now - data->last_count_time[rate_index]) / 10000000; // 1 Second + __u64 time = (dissector->now - data->last_count_time[rate_index]) / SECOND_IN_NANOS; // 1 Second data->rate_estimate_bps[rate_index] = (bits/time); // bits per second data->next_count_time[rate_index] = dissector->now + SECOND_IN_NANOS; data->next_count_bytes[rate_index] = data->bytes_sent[rate_index]; diff --git a/src/rust/lqosd/src/main.rs b/src/rust/lqosd/src/main.rs index 48f11c24..b18cb6d2 100644 --- a/src/rust/lqosd/src/main.rs +++ b/src/rust/lqosd/src/main.rs @@ -232,6 +232,7 @@ fn handle_bus_requests( throughput_tracker::count_active_flows() } BusRequest::TopFlows { n, flow_type } => throughput_tracker::top_flows(*n, *flow_type), + BusRequest::FlowsByIp(ip) => throughput_tracker::flows_by_ip(ip), }); } } diff --git a/src/rust/lqosd/src/throughput_tracker/mod.rs b/src/rust/lqosd/src/throughput_tracker/mod.rs index 8457f4c4..8c8f16c0 100644 --- a/src/rust/lqosd/src/throughput_tracker/mod.rs +++ b/src/rust/lqosd/src/throughput_tracker/mod.rs @@ -2,6 +2,8 @@ pub mod flow_data; mod heimdall_data; mod throughput_entry; mod tracking_data; +use std::net::IpAddr; + use crate::{ long_term_stats::get_network_tree, shaped_devices_tracker::{NETWORK_JSON, SHAPED_DEVICES, STATS_NEEDS_NEW_SHAPED_DEVICES}, @@ -614,3 +616,41 @@ pub fn top_flows(n: u32, flow_type: TopFlowType) -> BusResponse { BusResponse::TopFlows(result) } + +/// Flows by IP +pub fn flows_by_ip(ip: &str) -> BusResponse { + if let Ok(ip) = ip.parse::() { + let ip = XdpIpAddress::from_ip(ip); + let lock = ALL_FLOWS.lock().unwrap(); + let matching_flows: Vec<_> = lock + .iter() + .filter(|(key, _)| key.local_ip == ip) + .map(|(key, row)| { + let (remote_asn_name, remote_asn_country) = get_asn_name_and_country(row.1.asn_id.0); + + lqos_bus::FlowbeeData { + remote_ip: key.remote_ip.as_ip().to_string(), + local_ip: key.local_ip.as_ip().to_string(), + src_port: key.src_port, + dst_port: key.dst_port, + ip_protocol: FlowbeeProtocol::from(key.ip_protocol), + bytes_sent: row.0.bytes_sent, + packets_sent: row.0.packets_sent, + rate_estimate_bps: row.0.rate_estimate_bps, + retries: row.0.retries, + last_rtt: row.0.last_rtt, + end_status: row.0.end_status, + tos: row.0.tos, + flags: row.0.flags, + remote_asn: row.1.asn_id.0, + remote_asn_name, + remote_asn_country, + analysis: row.1.protocol_analysis.to_string(), + } + }) + .collect(); + + return BusResponse::FlowsByIp(matching_flows); + } + BusResponse::Ack +} \ No newline at end of file From 445cdcda811c202ffb31df6ad96ff466758a1221 Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Mon, 11 Mar 2024 13:27:42 -0500 Subject: [PATCH 050/103] Remove most of the Heimdall mode 1 path, cleaning up the execution path now that we have global flow tracking. --- src/rust/lqos_bus/src/bus/request.rs | 4 - src/rust/lqos_bus/src/bus/response.rs | 6 +- src/rust/lqos_bus/src/ip_stats.rs | 35 ---- src/rust/lqos_bus/src/lib.rs | 2 +- src/rust/lqos_heimdall/src/flows.rs | 165 ------------------ src/rust/lqos_heimdall/src/lib.rs | 10 +- src/rust/lqos_node_manager/src/queue_info.rs | 2 +- .../lqos_node_manager/static/ip_dump.html | 1 + src/rust/lqos_sys/src/bpf/common/heimdall.h | 65 +------ src/rust/lqos_sys/src/bpf/lqos_kern.c | 28 --- src/rust/lqos_sys/src/bpf_iterator.rs | 39 +---- src/rust/lqos_sys/src/heimdall_data.rs | 33 ---- src/rust/lqos_sys/src/lib.rs | 3 +- src/rust/lqosd/src/main.rs | 2 - .../src/throughput_tracker/heimdall_data.rs | 14 -- src/rust/lqosd/src/throughput_tracker/mod.rs | 2 - 16 files changed, 10 insertions(+), 401 deletions(-) delete mode 100644 src/rust/lqos_heimdall/src/flows.rs delete mode 100644 src/rust/lqos_sys/src/heimdall_data.rs delete mode 100644 src/rust/lqosd/src/throughput_tracker/heimdall_data.rs diff --git a/src/rust/lqos_bus/src/bus/request.rs b/src/rust/lqos_bus/src/bus/request.rs index e796fa51..fe58b1cf 100644 --- a/src/rust/lqos_bus/src/bus/request.rs +++ b/src/rust/lqos_bus/src/bus/request.rs @@ -133,10 +133,6 @@ pub enum BusRequest { /// Obtain the lqosd statistics GetLqosStats, - /// Tell me flow stats for a given IP address - #[deprecated(note = "Heimdall flows are being migrated to flows 2")] - GetFlowStats(String), - /// Tell Heimdall to hyper-focus on an IP address for a bit GatherPacketData(String), diff --git a/src/rust/lqos_bus/src/bus/response.rs b/src/rust/lqos_bus/src/bus/response.rs index 48d716e8..f57784fa 100644 --- a/src/rust/lqos_bus/src/bus/response.rs +++ b/src/rust/lqos_bus/src/bus/response.rs @@ -1,6 +1,6 @@ use super::QueueStoreTransit; use crate::{ - ip_stats::{FlowbeeData, PacketHeader}, FlowTransport, IpMapping, IpStats, XdpPpingResult, + ip_stats::{FlowbeeData, PacketHeader}, IpMapping, IpStats, XdpPpingResult, }; use lts_client::transport_data::{StatsTotals, StatsHost, StatsTreeNode}; use serde::{Deserialize, Serialize}; @@ -91,10 +91,6 @@ pub enum BusResponse { tracked_flows: u64, }, - /// Flow Data - #[deprecated(note = "Being replaced by FlowbeeData")] - FlowData(Vec<(FlowTransport, Option)>), - /// The index of the new packet collection session PacketCollectionSession { /// The identifier of the capture session diff --git a/src/rust/lqos_bus/src/ip_stats.rs b/src/rust/lqos_bus/src/ip_stats.rs index 63730b42..41517a79 100644 --- a/src/rust/lqos_bus/src/ip_stats.rs +++ b/src/rust/lqos_bus/src/ip_stats.rs @@ -67,41 +67,6 @@ pub struct XdpPpingResult { pub samples: u32, } -/// Defines an IP protocol for display in the flow -/// tracking (Heimdall) system. -#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)] -pub enum FlowProto { - /// A TCP flow - TCP, - /// A UDP flow - UDP, - /// An ICMP flow - ICMP -} - -/// Defines the display data for a flow in Heimdall. -#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)] -pub struct FlowTransport { - /// The Source IP address - pub src: String, - /// The Destination IP address - pub dst: String, - /// The flow protocol (see `FlowProto`) - pub proto: FlowProto, - /// The source port, which is overridden to ICMP code on ICMP flows. - pub src_port: u16, - /// The destination port, which isn't useful at all on ICMP flows. - pub dst_port: u16, - /// The number of bytes since we started tracking this flow. - pub bytes: u64, - /// The number of packets since we started tracking this flow. - pub packets: u64, - /// Detected DSCP code if any - pub dscp: u8, - /// Detected ECN bit status (0-3) - pub ecn: u8, -} - /// Extract the 6-bit DSCP and 2-bit ECN code from a TOS field /// in an IP header. pub fn tos_parser(tos: u8) -> (u8, u8) { diff --git a/src/rust/lqos_bus/src/lib.rs b/src/rust/lqos_bus/src/lib.rs index 3cbcf416..9a60fd75 100644 --- a/src/rust/lqos_bus/src/lib.rs +++ b/src/rust/lqos_bus/src/lib.rs @@ -13,7 +13,7 @@ mod bus; mod ip_stats; pub use ip_stats::{ - tos_parser, FlowProto, FlowTransport, IpMapping, IpStats, PacketHeader, + tos_parser, IpMapping, IpStats, PacketHeader, XdpPpingResult, FlowbeeData, FlowbeeProtocol }; mod tc_handle; diff --git a/src/rust/lqos_heimdall/src/flows.rs b/src/rust/lqos_heimdall/src/flows.rs deleted file mode 100644 index 215e093d..00000000 --- a/src/rust/lqos_heimdall/src/flows.rs +++ /dev/null @@ -1,165 +0,0 @@ -use crate::{timeline::expire_timeline, FLOW_EXPIRE_SECS}; -use dashmap::DashMap; -use lqos_bus::{tos_parser, BusResponse, FlowTransport}; -use lqos_sys::heimdall_data::{HeimdallKey, HeimdallData}; -use lqos_utils::{unix_time::time_since_boot, XdpIpAddress}; -use once_cell::sync::Lazy; -use std::{collections::HashSet, time::Duration}; - -#[derive(Clone, Debug, PartialEq, Eq, Hash)] -struct FlowKey { - src: XdpIpAddress, - dst: XdpIpAddress, - proto: u8, - src_port: u16, - dst_port: u16, -} - -#[derive(Clone, Debug, Default)] -struct FlowData { - last_seen: u64, - bytes: u64, - packets: u64, - tos: u8, -} - -impl From<&HeimdallKey> for FlowKey { - fn from(value: &HeimdallKey) -> Self { - Self { - src: value.src_ip, - dst: value.dst_ip, - proto: value.ip_protocol, - src_port: value.src_port, - dst_port: value.dst_port, - } - } -} - -static FLOW_DATA: Lazy> = Lazy::new(DashMap::new); - -/*pub(crate) fn record_flow(event: &HeimdallEvent) { - let key: FlowKey = event.into(); - if let Some(mut data) = FLOW_DATA.get_mut(&key) { - data.last_seen = event.timestamp; - data.packets += 1; - data.bytes += event.size as u64; - data.tos = event.tos; - } else { - FLOW_DATA.insert( - key, - FlowData { - last_seen: event.timestamp, - bytes: event.size.into(), - packets: 1, - tos: event.tos, - }, - ); - } -}*/ - - -/// Iterates through all throughput entries, and sends them in turn to `callback`. -/// This elides the need to clone or copy data. -fn heimdall_for_each( - callback: &mut dyn FnMut(&HeimdallKey, &[HeimdallData]), -) { - /*if let Ok(heimdall) = BpfPerCpuMap::::from_path( - "/sys/fs/bpf/heimdall", - ) { - heimdall.for_each(callback); - }*/ - lqos_sys::iterate_heimdall(callback); -} - - -fn combine_flows(values: &[HeimdallData]) -> FlowData { - let mut result = FlowData::default(); - let mut ls = 0; - values.iter().for_each(|v| { - result.bytes += v.bytes; - result.packets += v.packets; - result.tos += v.tos; - if v.last_seen > ls { - ls = v.last_seen; - } - }); - result.last_seen = ls; - result -} - -pub fn read_flows() { - heimdall_for_each(&mut |key, value| { - let flow_key = key.into(); - let combined = combine_flows(value); - if let Some(mut flow) = FLOW_DATA.get_mut(&flow_key) { - flow.last_seen = combined.last_seen; - flow.bytes = combined.bytes; - flow.packets = combined.packets; - flow.tos = combined.tos; - } else { - FLOW_DATA.insert(flow_key, combined); - } - }); -} - -/// Expire flows that have not been seen in a while. -pub fn expire_heimdall_flows() { - if let Ok(now) = time_since_boot() { - let since_boot = Duration::from(now); - let expire = (since_boot - Duration::from_secs(FLOW_EXPIRE_SECS)).as_nanos() as u64; - FLOW_DATA.retain(|_k, v| v.last_seen > expire); - expire_timeline(); - } -} - -/// Get the flow stats for a given IP address. -pub fn get_flow_stats(ip: XdpIpAddress) -> BusResponse { - let mut result = Vec::new(); - - // Obtain all the flows - let mut all_flows = Vec::new(); - for value in FLOW_DATA.iter() { - let key = value.key(); - if key.src == ip || key.dst == ip { - let (dscp, ecn) = tos_parser(value.tos); - all_flows.push(FlowTransport { - src: key.src.as_ip().to_string(), - dst: key.dst.as_ip().to_string(), - src_port: key.src_port, - dst_port: key.dst_port, - proto: match key.proto { - 6 => lqos_bus::FlowProto::TCP, - 17 => lqos_bus::FlowProto::UDP, - _ => lqos_bus::FlowProto::ICMP, - }, - bytes: value.bytes, - packets: value.packets, - dscp, - ecn, - }); - } - } - - // Turn them into reciprocal pairs - let mut done = HashSet::new(); - for (i, flow) in all_flows.iter().enumerate() { - if !done.contains(&i) { - let flow_a = flow.clone(); - let flow_b = if let Some(flow_b) = all_flows - .iter() - .position(|f| f.src == flow_a.dst && f.src_port == flow_a.dst_port) - { - done.insert(flow_b); - Some(all_flows[flow_b].clone()) - } else { - None - }; - - result.push((flow_a, flow_b)); - } - } - - result.sort_by(|a, b| b.0.bytes.cmp(&a.0.bytes)); - - BusResponse::FlowData(result) -} diff --git a/src/rust/lqos_heimdall/src/lib.rs b/src/rust/lqos_heimdall/src/lib.rs index 5531858b..a770bdfd 100644 --- a/src/rust/lqos_heimdall/src/lib.rs +++ b/src/rust/lqos_heimdall/src/lib.rs @@ -7,8 +7,6 @@ mod config; pub mod perf_interface; pub mod stats; pub use config::{HeimdalConfig, HeimdallMode}; -mod flows; -pub use flows::{expire_heimdall_flows, get_flow_stats}; mod timeline; pub use timeline::{n_second_packet_dump, n_second_pcap, hyperfocus_on_target}; mod pcap; @@ -16,7 +14,7 @@ mod watchlist; use lqos_utils::fdtimer::periodic; pub use watchlist::{heimdall_expire, heimdall_watch_ip, set_heimdall_mode}; -use crate::flows::read_flows; +use crate::timeline::expire_timeline; /// How long should Heimdall keep watching a flow after being requested /// to do so? Setting this to a long period increases CPU load after the @@ -24,9 +22,6 @@ use crate::flows::read_flows; /// collections if the client hasn't maintained the 1s request cadence. const EXPIRE_WATCHES_SECS: u64 = 5; -/// How long should Heimdall retain flow summary data? -const FLOW_EXPIRE_SECS: u64 = 10; - /// How long should Heimdall retain packet timeline data? const TIMELINE_EXPIRE_SECS: u64 = 10; @@ -48,9 +43,8 @@ pub async fn start_heimdall() { std::thread::spawn(move || { periodic(interval_ms, "Heimdall Packet Watcher", &mut || { - read_flows(); - expire_heimdall_flows(); heimdall_expire(); + expire_timeline(); }); }); } diff --git a/src/rust/lqos_node_manager/src/queue_info.rs b/src/rust/lqos_node_manager/src/queue_info.rs index d34a7668..c48ed138 100644 --- a/src/rust/lqos_node_manager/src/queue_info.rs +++ b/src/rust/lqos_node_manager/src/queue_info.rs @@ -1,7 +1,7 @@ use crate::auth_guard::AuthGuard; use crate::cache_control::NoCache; use crate::tracker::{SHAPED_DEVICES, lookup_dns}; -use lqos_bus::{bus_request, BusRequest, BusResponse, FlowTransport, FlowbeeData, PacketHeader, QueueStoreTransit}; +use lqos_bus::{bus_request, BusRequest, BusResponse, FlowbeeData, PacketHeader, QueueStoreTransit}; use rocket::fs::NamedFile; use rocket::http::Status; use rocket::response::content::RawJson; diff --git a/src/rust/lqos_node_manager/static/ip_dump.html b/src/rust/lqos_node_manager/static/ip_dump.html index a4060c6b..d5517ebf 100644 --- a/src/rust/lqos_node_manager/static/ip_dump.html +++ b/src/rust/lqos_node_manager/static/ip_dump.html @@ -332,6 +332,7 @@ if (hdr->cwr) flags |= 128; target = params.id; $.get("/api/packet_dump/" + params.id, (data) => { + console.log(data); data.sort((a,b) => a.timestamp - b.timestamp); // Find the minimum timestamp diff --git a/src/rust/lqos_sys/src/bpf/common/heimdall.h b/src/rust/lqos_sys/src/bpf/common/heimdall.h index 17b14ba7..ff320884 100644 --- a/src/rust/lqos_sys/src/bpf/common/heimdall.h +++ b/src/rust/lqos_sys/src/bpf/common/heimdall.h @@ -60,33 +60,6 @@ struct heimdall_event { __u8 dump[PACKET_OCTET_SIZE]; }; -struct heimdall_key -{ - struct in6_addr src; - struct in6_addr dst; - __u8 ip_protocol; - __u16 src_port; - __u16 dst_port; - __u8 pad; -}; - -struct heimdall_data { - __u64 last_seen; - __u64 bytes; - __u64 packets; - __u8 tos; -}; - -// Map for tracking flow information in-kernel for watched IPs -struct -{ - __uint(type, BPF_MAP_TYPE_LRU_PERCPU_HASH); - __type(key, struct heimdall_key); - __type(value, struct heimdall_data); - __uint(max_entries, MAX_FLOWS); - __uint(pinning, LIBBPF_PIN_BY_NAME); -} heimdall SEC(".maps"); - static __always_inline __u8 get_heimdall_mode() { __u32 index = 0; @@ -122,43 +95,7 @@ static __always_inline bool is_heimdall_watching(struct dissector_t *dissector, static __always_inline void update_heimdall(struct dissector_t *dissector, __u32 size, __u8 mode) { - if (mode == 1) { - // Don't report any non-ICMP without ports - if (dissector->ip_protocol != 1 && (dissector->src_port == 0 || dissector->dst_port == 0)) - return; - // Don't report ICMP with invalid numbers - if (dissector->ip_protocol == 1 && dissector->src_port > 18) return; - struct heimdall_key key = {0}; - key.src = dissector->src_ip; - key.dst = dissector->dst_ip; - key.ip_protocol = dissector->ip_protocol; - key.src_port = bpf_ntohs(dissector->src_port); - key.dst_port = bpf_ntohs(dissector->dst_port); - struct heimdall_data *counter = (struct heimdall_data *)bpf_map_lookup_elem(&heimdall, &key); - if (counter) - { - counter->last_seen = dissector->now; - counter->packets += 1; - counter->bytes += size; - if (dissector->tos != 0) - { - counter->tos = dissector->tos; - } - } - else - { - struct heimdall_data counter = {0}; - counter.last_seen = dissector->now; - counter.bytes = size; - counter.packets = 1; - counter.tos = dissector->tos; - if (bpf_map_update_elem(&heimdall, &key, &counter, BPF_NOEXIST) != 0) - { - bpf_debug("Failed to insert tracking"); - } - //bpf_debug("Inserted tracking"); - } - } else if (mode == 2) { + if (mode == 2) { struct heimdall_event event = {0}; event.timetamp = dissector->now; event.src = dissector->src_ip; diff --git a/src/rust/lqos_sys/src/bpf/lqos_kern.c b/src/rust/lqos_sys/src/bpf/lqos_kern.c index 9ad2a20c..7d93fe59 100644 --- a/src/rust/lqos_sys/src/bpf/lqos_kern.c +++ b/src/rust/lqos_sys/src/bpf/lqos_kern.c @@ -443,34 +443,6 @@ int throughput_reader(struct bpf_iter__bpf_map_elem *ctx) return 0; } -SEC("iter/bpf_map_elem") -int heimdall_reader(struct bpf_iter__bpf_map_elem *ctx) { - // The sequence file - struct seq_file *seq = ctx->meta->seq; - void *counter = ctx->value; - struct heimdall_key *ip = ctx->key; - __u32 num_cpus = NUM_CPUS; - - if (ctx->meta->seq_num == 0) { - bpf_seq_write(seq, &num_cpus, sizeof(__u32)); - bpf_seq_write(seq, &num_cpus, sizeof(__u32)); // Repeat for padding - } - - // Bail on end - if (counter == NULL || ip == NULL) { - return 0; - } - - bpf_seq_write(seq, ip, sizeof(struct heimdall_key)); - for (__u32 i=0; idownload_bytes, counter->upload_bytes); - return 0; -} - SEC("iter/bpf_map_elem") int flow_reader(struct bpf_iter__bpf_map_elem *ctx) { diff --git a/src/rust/lqos_sys/src/bpf_iterator.rs b/src/rust/lqos_sys/src/bpf_iterator.rs index 31cd4036..1980299d 100644 --- a/src/rust/lqos_sys/src/bpf_iterator.rs +++ b/src/rust/lqos_sys/src/bpf_iterator.rs @@ -1,5 +1,5 @@ use crate::{ - bpf_map::BpfMap, flowbee_data::{FlowbeeData, FlowbeeKey}, heimdall_data::{HeimdallData, HeimdallKey}, kernel_wrapper::BPF_SKELETON, lqos_kernel::bpf, HostCounter + bpf_map::BpfMap, flowbee_data::{FlowbeeData, FlowbeeKey}, kernel_wrapper::BPF_SKELETON, lqos_kernel::bpf, HostCounter }; use lqos_utils::XdpIpAddress; use once_cell::sync::Lazy; @@ -192,10 +192,6 @@ static mut MAP_TRAFFIC: Lazy< Option>, > = Lazy::new(|| None); -static mut HEIMDALL_TRACKER: Lazy< - Option>, -> = Lazy::new(|| None); - static mut FLOWBEE_TRACKER: Lazy< Option>, > = Lazy::new(|| None); @@ -223,32 +219,6 @@ pub unsafe fn iterate_throughput( } } -/// Iterate through the heimdall map and call the callback for each entry. -pub fn iterate_heimdall( - callback: &mut dyn FnMut(&HeimdallKey, &[HeimdallData]), -) { - unsafe { - if HEIMDALL_TRACKER.is_none() { - let lock = BPF_SKELETON.lock().unwrap(); - if let Some(skeleton) = lock.as_ref() { - let skeleton = skeleton.get_ptr(); - if let Ok(iter) = { - BpfMapIterator::new( - (*skeleton).progs.heimdall_reader, - (*skeleton).maps.heimdall, - ) - } { - *HEIMDALL_TRACKER = Some(iter); - } - } - } - - if let Some(iter) = HEIMDALL_TRACKER.as_mut() { - let _ = iter.for_each_per_cpu(callback); - } - } -} - /// Iterate through the Flows 2 system tracker, retrieving all flows pub fn iterate_flows( callback: &mut dyn FnMut(&FlowbeeKey, &FlowbeeData) @@ -281,13 +251,8 @@ pub fn iterate_flows( pub fn end_flows(flows: &mut [FlowbeeKey]) -> anyhow::Result<()> { let mut map = BpfMap::::from_path("/sys/fs/bpf/flowbee")?; - let mut dead_flow = FlowbeeData { - end_status: 2, - ..Default::default() - }; - for flow in flows { - map.insert_or_update(flow, &mut dead_flow)?; + map.delete(flow)?; } Ok(()) diff --git a/src/rust/lqos_sys/src/heimdall_data.rs b/src/rust/lqos_sys/src/heimdall_data.rs deleted file mode 100644 index 31be4bca..00000000 --- a/src/rust/lqos_sys/src/heimdall_data.rs +++ /dev/null @@ -1,33 +0,0 @@ -use lqos_utils::XdpIpAddress; -use zerocopy::FromBytes; - -/// Representation of the eBPF `heimdall_key` type. -#[derive(Debug, Clone, Default, PartialEq, Eq, Hash, FromBytes)] -#[repr(C)] -pub struct HeimdallKey { - /// Mapped `XdpIpAddress` source for the flow. - pub src_ip: XdpIpAddress, - /// Mapped `XdpIpAddress` destination for the flow - pub dst_ip: XdpIpAddress, - /// IP protocol (see the Linux kernel!) - pub ip_protocol: u8, - /// Source port number, or ICMP type. - pub src_port: u16, - /// Destination port number. - pub dst_port: u16, - _padding: u8, -} - -/// Mapped representation of the eBPF `heimdall_data` type. -#[derive(Debug, Clone, Default, FromBytes)] -#[repr(C)] -pub struct HeimdallData { - /// Last seen, in nanoseconds (since boot time). - pub last_seen: u64, - /// Number of bytes since the flow started being tracked - pub bytes: u64, - /// Number of packets since the flow started being tracked - pub packets: u64, - /// IP header TOS value - pub tos: u8, -} \ No newline at end of file diff --git a/src/rust/lqos_sys/src/lib.rs b/src/rust/lqos_sys/src/lib.rs index 482b925e..a8654859 100644 --- a/src/rust/lqos_sys/src/lib.rs +++ b/src/rust/lqos_sys/src/lib.rs @@ -20,7 +20,6 @@ mod linux; mod bpf_iterator; /// Data shared between eBPF and Heimdall that needs local access /// for map control. -pub mod heimdall_data; pub mod flowbee_data; pub use ip_mapping::{ @@ -30,4 +29,4 @@ pub use kernel_wrapper::LibreQoSKernels; pub use linux::num_possible_cpus; pub use lqos_kernel::max_tracked_ips; pub use throughput::{throughput_for_each, HostCounter}; -pub use bpf_iterator::{iterate_heimdall, iterate_flows, end_flows}; \ No newline at end of file +pub use bpf_iterator::{iterate_flows, end_flows}; \ No newline at end of file diff --git a/src/rust/lqosd/src/main.rs b/src/rust/lqosd/src/main.rs index b18cb6d2..417972c7 100644 --- a/src/rust/lqosd/src/main.rs +++ b/src/rust/lqosd/src/main.rs @@ -29,7 +29,6 @@ use signal_hook::{ iterator::Signals, }; use stats::{BUS_REQUESTS, TIME_TO_POLL_HOSTS, HIGH_WATERMARK_DOWN, HIGH_WATERMARK_UP, FLOWS_TRACKED}; -use throughput_tracker::get_flow_stats; use tokio::join; mod stats; @@ -197,7 +196,6 @@ fn handle_bus_requests( tracked_flows: FLOWS_TRACKED.load(std::sync::atomic::Ordering::Relaxed), } } - BusRequest::GetFlowStats(ip) => get_flow_stats(ip), BusRequest::GetPacketHeaderDump(id) => { BusResponse::PacketDump(n_second_packet_dump(*id)) } diff --git a/src/rust/lqosd/src/throughput_tracker/heimdall_data.rs b/src/rust/lqosd/src/throughput_tracker/heimdall_data.rs deleted file mode 100644 index d209f710..00000000 --- a/src/rust/lqosd/src/throughput_tracker/heimdall_data.rs +++ /dev/null @@ -1,14 +0,0 @@ -use std::net::IpAddr; -use lqos_bus::BusResponse; -use lqos_heimdall::heimdall_watch_ip; -use lqos_utils::XdpIpAddress; - -pub fn get_flow_stats(ip: &str) -> BusResponse { - let ip = ip.parse::(); - if let Ok(ip) = ip { - let ip = XdpIpAddress::from_ip(ip); - heimdall_watch_ip(ip); - return lqos_heimdall::get_flow_stats(ip); - } - BusResponse::Fail("No Stats or bad IP".to_string()) -} \ No newline at end of file diff --git a/src/rust/lqosd/src/throughput_tracker/mod.rs b/src/rust/lqosd/src/throughput_tracker/mod.rs index 8c8f16c0..d34bacf3 100644 --- a/src/rust/lqosd/src/throughput_tracker/mod.rs +++ b/src/rust/lqosd/src/throughput_tracker/mod.rs @@ -1,5 +1,4 @@ pub mod flow_data; -mod heimdall_data; mod throughput_entry; mod tracking_data; use std::net::IpAddr; @@ -10,7 +9,6 @@ use crate::{ stats::TIME_TO_POLL_HOSTS, throughput_tracker::tracking_data::ThroughputTracker, }; -pub use heimdall_data::get_flow_stats; use log::{info, warn}; use lqos_bus::{BusResponse, FlowbeeProtocol, IpStats, TcHandle, TopFlowType, XdpPpingResult}; use lqos_sys::flowbee_data::{FlowbeeData, FlowbeeKey}; From 71fd1d558f109ca5adf7d5e8ee2b235910e91be2 Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Mon, 11 Mar 2024 14:04:41 -0500 Subject: [PATCH 051/103] Probable fix for building this on earlier kernel versions. --- src/rust/lqos_sys/src/bpf/common/flows.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rust/lqos_sys/src/bpf/common/flows.h b/src/rust/lqos_sys/src/bpf/common/flows.h index 719c4db1..73c53b03 100644 --- a/src/rust/lqos_sys/src/bpf/common/flows.h +++ b/src/rust/lqos_sys/src/bpf/common/flows.h @@ -97,7 +97,7 @@ static __always_inline struct flow_data_t new_flow_data( // to minimize rounding errors. .next_count_time = { dissector->now + SECOND_IN_NANOS, dissector->now + SECOND_IN_NANOS }, .last_count_time = { dissector->now, dissector->now }, - .next_count_bytes = { dissector->skb_len, dissector->skb_len }, + .next_count_bytes = { 0, 0 }, // Should be packet size, that isn't working? .rate_estimate_bps = { 0, 0 }, .last_sequence = { 0, 0 }, .last_ack = { 0, 0 }, From d93726e538b0687cf85a72fbc9f28f03fed5aa3f Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Mon, 11 Mar 2024 15:03:47 -0500 Subject: [PATCH 052/103] Only report RTT for flows exceeding 4kbps - to eliminate noise from basically idle connections. --- src/rust/lqos_sys/src/bpf/common/flows.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/rust/lqos_sys/src/bpf/common/flows.h b/src/rust/lqos_sys/src/bpf/common/flows.h index 73c53b03..17628239 100644 --- a/src/rust/lqos_sys/src/bpf/common/flows.h +++ b/src/rust/lqos_sys/src/bpf/common/flows.h @@ -280,13 +280,16 @@ static __always_inline void process_tcp( detect_retries(dissector, rate_index, data); // Timestamps to calculate RTT - u_int32_t tsval = dissector->tsval; - u_int32_t tsecr = dissector->tsecr; - if (tsval != 0) { + u_int32_t tsval = dissector->tsval; + u_int32_t tsecr = dissector->tsecr; + if (tsval != 0) { //bpf_debug("[FLOWS][%d] TSVAL: %u, TSECR: %u", direction, tsval, tsecr); if (tsval != data->tsval[rate_index] && tsecr != data->tsecr[rate_index]) { - if (tsecr == data->tsval[other_rate_index]) { + if ( + tsecr == data->tsval[other_rate_index] && + data->rate_estimate_bps[rate_index] > 4096 + ) { __u64 elapsed = dissector->now - data->ts_change_time[other_rate_index]; data->last_rtt[rate_index] = elapsed; //bpf_debug("[FLOWS][%d] RTT: %llu", direction, elapsed); From 79247e07f060e5b0d6d9280203f90b7015ecb1d5 Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Mon, 11 Mar 2024 15:26:23 -0500 Subject: [PATCH 053/103] Fix a really silly mistake. I did NOT mean to always store the worst RTT we've ever seen. --- src/rust/lqosd/src/throughput_tracker/tracking_data.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/rust/lqosd/src/throughput_tracker/tracking_data.rs b/src/rust/lqosd/src/throughput_tracker/tracking_data.rs index fd8874ba..d9992c80 100644 --- a/src/rust/lqosd/src/throughput_tracker/tracking_data.rs +++ b/src/rust/lqosd/src/throughput_tracker/tracking_data.rs @@ -221,10 +221,7 @@ impl ThroughputTracker { for i in 1..60 { tracker.recent_rtt_data[i] = tracker.recent_rtt_data[i - 1]; } - tracker.recent_rtt_data[0] = u32::max( - (data.last_rtt[0] / 10000) as u32, - (data.last_rtt[1] / 10000) as u32, - ); + tracker.recent_rtt_data[0] = (data.last_rtt[0] / 10000) as u32; tracker.last_fresh_rtt_data_cycle = self_cycle; if let Some(parents) = &tracker.network_json_parents { let net_json = NETWORK_JSON.write().unwrap(); From 213a27498e2019caa2afde5355d741c9779ecc49 Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Tue, 12 Mar 2024 08:21:33 -0500 Subject: [PATCH 054/103] Improve the flow cleanup logic. --- src/rust/lqos_sys/src/bpf/common/flows.h | 2 +- src/rust/lqosd/src/throughput_tracker/tracking_data.rs | 10 ++++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/rust/lqos_sys/src/bpf/common/flows.h b/src/rust/lqos_sys/src/bpf/common/flows.h index 17628239..ebc2a2f8 100644 --- a/src/rust/lqos_sys/src/bpf/common/flows.h +++ b/src/rust/lqos_sys/src/bpf/common/flows.h @@ -77,7 +77,7 @@ struct flow_data_t { // This is pinned and not per-CPU, because half the data appears on either side of the bridge. struct { - __uint(type, BPF_MAP_TYPE_LRU_HASH); // TODO: BPF_MAP_TYPE_LRU_PERCPU_HASH? + __uint(type, BPF_MAP_TYPE_HASH); // TODO: BPF_MAP_TYPE_LRU_PERCPU_HASH? __type(key, struct flow_key_t); __type(value, struct flow_data_t); __uint(max_entries, MAX_FLOWS); diff --git a/src/rust/lqosd/src/throughput_tracker/tracking_data.rs b/src/rust/lqosd/src/throughput_tracker/tracking_data.rs index d9992c80..011bdbd0 100644 --- a/src/rust/lqosd/src/throughput_tracker/tracking_data.rs +++ b/src/rust/lqosd/src/throughput_tracker/tracking_data.rs @@ -187,7 +187,11 @@ impl ThroughputTracker { iterate_flows(&mut |key, data| { if data.end_status == 2 { - // The flow has been handled already and should be ignored + // The flow has been handled already and should be ignored. + // This shouldn't happen in our deletion logic. If it DID happen, + // we'll take this opportunity to clean it up. + expired_keys.push(key.clone()); + ALL_FLOWS.lock().unwrap().remove(&key); return; } @@ -221,7 +225,7 @@ impl ThroughputTracker { for i in 1..60 { tracker.recent_rtt_data[i] = tracker.recent_rtt_data[i - 1]; } - tracker.recent_rtt_data[0] = (data.last_rtt[0] / 10000) as u32; + tracker.recent_rtt_data[0] = (data.last_rtt[1] / 10000) as u32; tracker.last_fresh_rtt_data_cycle = self_cycle; if let Some(parents) = &tracker.network_json_parents { let net_json = NETWORK_JSON.write().unwrap(); @@ -246,6 +250,8 @@ impl ThroughputTracker { if let Some(d) = lock.get(&key) { let _ = sender.send((key.clone(), (d.0.clone(), d.1.clone()))); } + // Remove the flow from circulation + lock.remove(&key); } lock.remove(&key); From 3d9b52e627f6b06416fa3c1fea309c9923ee5d3e Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Tue, 12 Mar 2024 08:57:29 -0500 Subject: [PATCH 055/103] More cleanup and logic improvements based on observation. In particular, flow cleanup is more accurate now, and we're a bit more aggressive in what RTT data we accept. --- src/rust/lqos_sys/src/bpf/common/flows.h | 8 ++++++-- .../lqosd/src/throughput_tracker/tracking_data.rs | 13 ++++++++----- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/src/rust/lqos_sys/src/bpf/common/flows.h b/src/rust/lqos_sys/src/bpf/common/flows.h index ebc2a2f8..83a0ec91 100644 --- a/src/rust/lqos_sys/src/bpf/common/flows.h +++ b/src/rust/lqos_sys/src/bpf/common/flows.h @@ -9,7 +9,9 @@ #define SECOND_IN_NANOS 1000000000 +#define TWO_SECONDS_IN_NANOS 2000000000 #define MS_IN_NANOS_T10 10000000 +#define ONE_MBPS_IN_BYTES_PER_SECOND 125000 //#define TIMESTAMP_INTERVAL_NANOS 10000000 // Some helpers to make understanding direction easier @@ -288,10 +290,12 @@ static __always_inline void process_tcp( if ( tsecr == data->tsval[other_rate_index] && - data->rate_estimate_bps[rate_index] > 4096 + data->rate_estimate_bps[rate_index] > ONE_MBPS_IN_BYTES_PER_SECOND ) { __u64 elapsed = dissector->now - data->ts_change_time[other_rate_index]; - data->last_rtt[rate_index] = elapsed; + if (elapsed < TWO_SECONDS_IN_NANOS) { + data->last_rtt[rate_index] = elapsed; + } //bpf_debug("[FLOWS][%d] RTT: %llu", direction, elapsed); } diff --git a/src/rust/lqosd/src/throughput_tracker/tracking_data.rs b/src/rust/lqosd/src/throughput_tracker/tracking_data.rs index 011bdbd0..337322a3 100644 --- a/src/rust/lqosd/src/throughput_tracker/tracking_data.rs +++ b/src/rust/lqosd/src/throughput_tracker/tracking_data.rs @@ -186,7 +186,7 @@ impl ThroughputTracker { // Track through all the flows iterate_flows(&mut |key, data| { - if data.end_status == 2 { + if data.end_status == 3 { // The flow has been handled already and should be ignored. // This shouldn't happen in our deletion logic. If it DID happen, // we'll take this opportunity to clean it up. @@ -219,7 +219,7 @@ impl ThroughputTracker { } // TCP - we have RTT data? 6 is TCP - if key.ip_protocol == 6 && (data.last_rtt[0] != 0 || data.last_rtt[1] != 0) { + if key.ip_protocol == 6 && data.last_rtt[0] != 0 { if let Some(mut tracker) = self.raw_data.get_mut(&key.local_ip) { // Shift left for i in 1..60 { @@ -235,6 +235,11 @@ impl ThroughputTracker { } } } + + if data.end_status != 0 { + // The flow has ended. We need to remove it from the map. + expired_keys.push(key.clone()); + } } }); // End flow iterator @@ -250,10 +255,8 @@ impl ThroughputTracker { if let Some(d) = lock.get(&key) { let _ = sender.send((key.clone(), (d.0.clone(), d.1.clone()))); } - // Remove the flow from circulation - lock.remove(&key); } - + // Remove the flow from circulation lock.remove(&key); } } From e20d6d39b02667ff38099535bdd319bf8ab14bb4 Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Tue, 12 Mar 2024 10:29:08 -0500 Subject: [PATCH 056/103] Another try at a flow system that cleans up. --- src/rust/lqos_sys/src/bpf/common/flows.h | 2 +- src/rust/lqos_sys/src/bpf_iterator.rs | 3 +- .../src/throughput_tracker/tracking_data.rs | 48 +++++++++---------- 3 files changed, 26 insertions(+), 27 deletions(-) diff --git a/src/rust/lqos_sys/src/bpf/common/flows.h b/src/rust/lqos_sys/src/bpf/common/flows.h index 83a0ec91..73b52b48 100644 --- a/src/rust/lqos_sys/src/bpf/common/flows.h +++ b/src/rust/lqos_sys/src/bpf/common/flows.h @@ -79,7 +79,7 @@ struct flow_data_t { // This is pinned and not per-CPU, because half the data appears on either side of the bridge. struct { - __uint(type, BPF_MAP_TYPE_HASH); // TODO: BPF_MAP_TYPE_LRU_PERCPU_HASH? + __uint(type, BPF_MAP_TYPE_LRU_HASH); // TODO: BPF_MAP_TYPE_LRU_PERCPU_HASH? __type(key, struct flow_key_t); __type(value, struct flow_data_t); __uint(max_entries, MAX_FLOWS); diff --git a/src/rust/lqos_sys/src/bpf_iterator.rs b/src/rust/lqos_sys/src/bpf_iterator.rs index 1980299d..c1ba3b83 100644 --- a/src/rust/lqos_sys/src/bpf_iterator.rs +++ b/src/rust/lqos_sys/src/bpf_iterator.rs @@ -252,7 +252,8 @@ pub fn end_flows(flows: &mut [FlowbeeKey]) -> anyhow::Result<()> { let mut map = BpfMap::::from_path("/sys/fs/bpf/flowbee")?; for flow in flows { - map.delete(flow)?; + let mut empty = FlowbeeData::default(); + map.insert_or_update(flow, &mut empty)?; } Ok(()) diff --git a/src/rust/lqosd/src/throughput_tracker/tracking_data.rs b/src/rust/lqosd/src/throughput_tracker/tracking_data.rs index 337322a3..df4f744e 100644 --- a/src/rust/lqosd/src/throughput_tracker/tracking_data.rs +++ b/src/rust/lqosd/src/throughput_tracker/tracking_data.rs @@ -171,7 +171,7 @@ impl ThroughputTracker { pub(crate) fn apply_flow_data( &self, timeout_seconds: u64, - netflow_enabled: bool, + _netflow_enabled: bool, sender: std::sync::mpsc::Sender<(FlowbeeKey, (FlowbeeData, FlowAnalysis))>, ) { let self_cycle = self.cycle.load(std::sync::atomic::Ordering::Relaxed); @@ -188,16 +188,10 @@ impl ThroughputTracker { if data.end_status == 3 { // The flow has been handled already and should be ignored. - // This shouldn't happen in our deletion logic. If it DID happen, - // we'll take this opportunity to clean it up. + // DO NOT process it again. + } else if data.last_seen < expire { + // This flow has expired but not been handled yet. Add it to the list to be cleaned. expired_keys.push(key.clone()); - ALL_FLOWS.lock().unwrap().remove(&key); - return; - } - - if data.last_seen < expire { - // This flow has expired. Add it to the list to be cleaned - expired_keys.push(key.clone()); } else { let mut lock = ALL_FLOWS.lock().unwrap(); // We have a valid flow, so it needs to be tracked @@ -214,12 +208,11 @@ impl ThroughputTracker { } else { // Insert it into the map let flow_analysis = FlowAnalysis::new(&key); - lock.insert(key.clone(), (data.clone(), flow_analysis)); } // TCP - we have RTT data? 6 is TCP - if key.ip_protocol == 6 && data.last_rtt[0] != 0 { + if key.ip_protocol == 6 && data.last_rtt[0] != 0 && data.end_status == 0 { if let Some(mut tracker) = self.raw_data.get_mut(&key.local_ip) { // Shift left for i in 1..60 { @@ -234,31 +227,36 @@ impl ThroughputTracker { } } } - } - if data.end_status != 0 { - // The flow has ended. We need to remove it from the map. - expired_keys.push(key.clone()); + if data.end_status != 0 { + // The flow has ended. We need to remove it from the map. + expired_keys.push(key.clone()); + } } } }); // End flow iterator if !expired_keys.is_empty() { - let ret = lqos_sys::end_flows(&mut expired_keys); - if let Err(e) = ret { - log::warn!("Failed to end flows: {:?}", e); - } let mut lock = ALL_FLOWS.lock().unwrap(); - for key in expired_keys { + for key in expired_keys.iter() { // Send it off to netperf for analysis if we are supporting doing so. - if netflow_enabled { - if let Some(d) = lock.get(&key) { - let _ = sender.send((key.clone(), (d.0.clone(), d.1.clone()))); - } + if let Some(d) = lock.get(&key) { + let _ = sender.send((key.clone(), (d.0.clone(), d.1.clone()))); } // Remove the flow from circulation lock.remove(&key); } + + let ret = lqos_sys::end_flows(&mut expired_keys); + if let Err(e) = ret { + log::warn!("Failed to end flows: {:?}", e); + } + } + + // Cleaning run + { + let mut lock = ALL_FLOWS.lock().unwrap(); + lock.retain(|_k,v| v.0.last_seen >= expire); } } } From 3ca7ca8a0dbc0d8e7a34be1adfea68620e8adbc9 Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Tue, 12 Mar 2024 10:52:41 -0500 Subject: [PATCH 057/103] Lower threshold for capturing RTT by rate --- src/rust/lqos_sys/src/bpf/common/flows.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/rust/lqos_sys/src/bpf/common/flows.h b/src/rust/lqos_sys/src/bpf/common/flows.h index 73b52b48..d2f34e55 100644 --- a/src/rust/lqos_sys/src/bpf/common/flows.h +++ b/src/rust/lqos_sys/src/bpf/common/flows.h @@ -11,7 +11,7 @@ #define SECOND_IN_NANOS 1000000000 #define TWO_SECONDS_IN_NANOS 2000000000 #define MS_IN_NANOS_T10 10000000 -#define ONE_MBPS_IN_BYTES_PER_SECOND 125000 +#define HALF_MBPS_IN_BYTES_PER_SECOND 62500 //#define TIMESTAMP_INTERVAL_NANOS 10000000 // Some helpers to make understanding direction easier @@ -290,7 +290,8 @@ static __always_inline void process_tcp( if ( tsecr == data->tsval[other_rate_index] && - data->rate_estimate_bps[rate_index] > ONE_MBPS_IN_BYTES_PER_SECOND + (data->rate_estimate_bps[rate_index] > HALF_MBPS_IN_BYTES_PER_SECOND || + data->rate_estimate_bps[other_rate_index] > HALF_MBPS_IN_BYTES_PER_SECOND ) ) { __u64 elapsed = dissector->now - data->ts_change_time[other_rate_index]; if (elapsed < TWO_SECONDS_IN_NANOS) { From 07239b3d24d7ae4797ef062796a769a405732352 Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Tue, 12 Mar 2024 11:30:13 -0500 Subject: [PATCH 058/103] That should avoid some flow duplication that was hitting too hard. --- src/rust/lqosd/src/throughput_tracker/tracking_data.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rust/lqosd/src/throughput_tracker/tracking_data.rs b/src/rust/lqosd/src/throughput_tracker/tracking_data.rs index df4f744e..d4a653d0 100644 --- a/src/rust/lqosd/src/throughput_tracker/tracking_data.rs +++ b/src/rust/lqosd/src/throughput_tracker/tracking_data.rs @@ -189,7 +189,7 @@ impl ThroughputTracker { if data.end_status == 3 { // The flow has been handled already and should be ignored. // DO NOT process it again. - } else if data.last_seen < expire { + } else if data.last_seen < expire && data.end_status == 0 { // This flow has expired but not been handled yet. Add it to the list to be cleaned. expired_keys.push(key.clone()); } else { From 55f24cf71b52de0f0965405dd72fddef958686ae Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Tue, 12 Mar 2024 11:38:19 -0500 Subject: [PATCH 059/103] Revert previous bad idea --- src/rust/lqosd/src/throughput_tracker/tracking_data.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/rust/lqosd/src/throughput_tracker/tracking_data.rs b/src/rust/lqosd/src/throughput_tracker/tracking_data.rs index d4a653d0..28e53e00 100644 --- a/src/rust/lqosd/src/throughput_tracker/tracking_data.rs +++ b/src/rust/lqosd/src/throughput_tracker/tracking_data.rs @@ -171,7 +171,7 @@ impl ThroughputTracker { pub(crate) fn apply_flow_data( &self, timeout_seconds: u64, - _netflow_enabled: bool, + netflow_enabled: bool, sender: std::sync::mpsc::Sender<(FlowbeeKey, (FlowbeeData, FlowAnalysis))>, ) { let self_cycle = self.cycle.load(std::sync::atomic::Ordering::Relaxed); @@ -189,7 +189,7 @@ impl ThroughputTracker { if data.end_status == 3 { // The flow has been handled already and should be ignored. // DO NOT process it again. - } else if data.last_seen < expire && data.end_status == 0 { + } else if data.last_seen < expire { // This flow has expired but not been handled yet. Add it to the list to be cleaned. expired_keys.push(key.clone()); } else { @@ -241,7 +241,9 @@ impl ThroughputTracker { for key in expired_keys.iter() { // Send it off to netperf for analysis if we are supporting doing so. if let Some(d) = lock.get(&key) { - let _ = sender.send((key.clone(), (d.0.clone(), d.1.clone()))); + if netflow_enabled { + let _ = sender.send((key.clone(), (d.0.clone(), d.1.clone()))); + } } // Remove the flow from circulation lock.remove(&key); From 5a3f90412d6bccd18c8836a4900d0a5401a11119 Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Tue, 12 Mar 2024 14:02:36 -0500 Subject: [PATCH 060/103] Working geocode system, albeit not as useful as I hoped. --- src/rust/Cargo.lock | 3 + src/rust/lqosd/Cargo.toml | 3 + .../flow_data/flow_analysis/asn.rs | 141 ++++++++++++++++++ .../flow_data/flow_analysis/finished_flows.rs | 4 +- .../flow_data/flow_analysis/mod.rs | 43 ++---- src/rust/lqosd/src/throughput_tracker/mod.rs | 6 +- 6 files changed, 165 insertions(+), 35 deletions(-) diff --git a/src/rust/Cargo.lock b/src/rust/Cargo.lock index 3cd9aebe..036631c6 100644 --- a/src/rust/Cargo.lock +++ b/src/rust/Cargo.lock @@ -1738,10 +1738,13 @@ name = "lqosd" version = "0.1.0" dependencies = [ "anyhow", + "bincode", "csv", "dashmap", "env_logger", "flate2", + "ip_network", + "ip_network_table", "itertools 0.12.1", "jemallocator", "log", diff --git a/src/rust/lqosd/Cargo.toml b/src/rust/lqosd/Cargo.toml index ae1cd6df..c4d948cd 100644 --- a/src/rust/lqosd/Cargo.toml +++ b/src/rust/lqosd/Cargo.toml @@ -33,6 +33,9 @@ itertools = "0.12.1" csv = "1" reqwest = { version = "0.11.24", features = ["blocking"] } flate2 = "1.0" +bincode = "1" +ip_network_table = "0" +ip_network = "0" # Support JemAlloc on supported platforms [target.'cfg(any(target_arch = "x86", target_arch = "x86_64"))'.dependencies] diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/asn.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/asn.rs index d06e8141..68641c13 100644 --- a/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/asn.rs +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/asn.rs @@ -1,6 +1,146 @@ +//! Obtain ASN and geo mappings from IP addresses for flow +//! analysis. + + use std::{io::Read, net::IpAddr, path::Path}; use serde::Deserialize; +#[derive(Deserialize, Clone, Debug)] +struct AsnEncoded { + network: IpAddr, + prefix: u8, + pub asn: u32, + organization: String, +} + +#[allow(dead_code)] +#[derive(Deserialize, Debug)] +struct GeoIpLocation { + network: IpAddr, + prefix: u8, + latitude: f64, + longitude: f64, + city_and_country: String, + +} + +#[derive(Deserialize)] +struct Geobin { + asn: Vec, + geo: Vec, +} + +pub struct GeoTable { + asn_trie: ip_network_table::IpNetworkTable, + geo_trie: ip_network_table::IpNetworkTable, +} + +impl GeoTable { + const FILENAME: &'static str = "geo.bin"; + + fn file_path() -> std::path::PathBuf { + Path::new(&lqos_config::load_config().unwrap().lqos_directory) + .join(Self::FILENAME) + } + + fn download() -> anyhow::Result<()> { + log::info!("Downloading ASN-IP Table"); + let file_path = Self::file_path(); + let url = "https://bfnightly.bracketproductions.com/geo.bin"; + let response = reqwest::blocking::get(url)?; + let content = response.bytes()?; + let bytes = &content[0..]; + std::fs::write(file_path, bytes)?; + Ok(()) + } + + pub fn load() -> anyhow::Result { + let path = Self::file_path(); + if !path.exists() { + log::info!("geo.bin not found - trying to download it"); + Self::download()?; + } + + // Decompress and deserialize + let file = std::fs::File::open(path)?; + let mut buffer = Vec::new(); + flate2::read::GzDecoder::new(file).read_to_end(&mut buffer)?; + let geobin: Geobin = bincode::deserialize(&buffer)?; + + // Build the ASN trie + log::info!("Building ASN trie"); + let mut asn_trie = ip_network_table::IpNetworkTable::::new(); + for entry in geobin.asn { + let (ip, prefix) = match entry.network { + IpAddr::V4(ip) => (ip.to_ipv6_mapped(), entry.prefix+96 ), + IpAddr::V6(ip) => (ip, entry.prefix), + }; + if let Ok(ip) = ip_network::Ipv6Network::new(ip, prefix) { + asn_trie.insert(ip, entry); + } + } + + // Build the GeoIP trie + log::info!("Building GeoIP trie"); + let mut geo_trie = ip_network_table::IpNetworkTable::::new(); + for entry in geobin.geo { + let (ip, prefix) = match entry.network { + IpAddr::V4(ip) => (ip.to_ipv6_mapped(), entry.prefix+96 ), + IpAddr::V6(ip) => (ip, entry.prefix), + }; + if let Ok(ip) = ip_network::Ipv6Network::new(ip, prefix) { + geo_trie.insert(ip, entry); + } + } + + log::info!("GeoTables loaded, {}-{} records.", asn_trie.len().1, geo_trie.len().1); + + Ok(Self { + asn_trie, + geo_trie, + }) + } + + pub fn find_asn(&self, ip: IpAddr) -> Option { + log::debug!("Looking up ASN for IP: {:?}", ip); + let ip = match ip { + IpAddr::V4(ip) => ip.to_ipv6_mapped(), + IpAddr::V6(ip) => ip, + }; + if let Some(matched) = self.asn_trie.longest_match(ip) { + log::debug!("Matched ASN: {:?}", matched.1.asn); + Some(matched.1.asn) + } else { + log::debug!("No ASN found"); + None + } + } + + pub fn find_owners_by_ip(&self, ip: IpAddr) -> (String, String) { + log::debug!("Looking up ASN for IP: {:?}", ip); + let ip = match ip { + IpAddr::V4(ip) => ip.to_ipv6_mapped(), + IpAddr::V6(ip) => ip, + }; + let mut owners = String::new(); + let mut country = String::new(); + + if let Some(matched) = self.asn_trie.longest_match(ip) { + log::debug!("Matched ASN: {:?}", matched.1.asn); + owners = matched.1.organization.clone(); + } + if let Some(matched) = self.geo_trie.longest_match(ip) { + log::debug!("Matched Geo: {:?}", matched.1.city_and_country); + country = matched.1.city_and_country.clone(); + } + + (owners, country) + } +} + +/////////////////////////////////////////////////////////////////////// + +/* /// Structure to represent the on-disk structure for files /// from: https://iptoasn.com/ /// Specifically: https://iptoasn.com/data/ip2asn-combined.tsv.gz @@ -105,3 +245,4 @@ impl AsnTable { self.asn_table.iter().find(|row| row.asn == asn).map(|row| row.clone()) } } +*/ \ No newline at end of file diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/finished_flows.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/finished_flows.rs index 3bfa123a..974ab649 100644 --- a/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/finished_flows.rs +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/finished_flows.rs @@ -39,8 +39,8 @@ impl TimeBuffer { let mut my_buffer = buffer .iter() .map(|v| { - let (_key, data, analysis) = &v.data; - let (_name, country) = get_asn_name_and_country(analysis.asn_id.0); + let (key, data, _analysis) = &v.data; + let (_name, country) = get_asn_name_and_country(key.remote_ip.as_ip()); let rtt = [ (data.last_rtt[0] / 1000000) as f32, (data.last_rtt[1] / 1000000) as f32, diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/mod.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/mod.rs index 7800fc88..b0cc985b 100644 --- a/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/mod.rs +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/mod.rs @@ -1,7 +1,6 @@ use std::{net::IpAddr, sync::Mutex}; use lqos_sys::flowbee_data::FlowbeeKey; use once_cell::sync::Lazy; -use self::asn::AsnTable; mod asn; mod protocol; pub use protocol::FlowProtocol; @@ -12,7 +11,7 @@ pub use finished_flows::FinishedFlowAnalysis; static ANALYSIS: Lazy = Lazy::new(|| FlowAnalysisSystem::new()); pub struct FlowAnalysisSystem { - asn_table: Mutex>, + asn_table: Mutex>, } impl FlowAnalysisSystem { @@ -20,7 +19,7 @@ impl FlowAnalysisSystem { // Periodically update the ASN table std::thread::spawn(|| { loop { - let result = AsnTable::new(); + let result = asn::GeoTable::load(); match result { Ok(table) => { ANALYSIS.asn_table.lock().unwrap().replace(table); @@ -66,35 +65,19 @@ impl FlowAnalysis { pub fn lookup_asn_id(ip: IpAddr) -> Option { - let table_lock = ANALYSIS.asn_table.lock(); - if table_lock.is_err() { - return None; - } - let table = table_lock.unwrap(); - if table.is_none() { - return None; - } - let table = table.as_ref().unwrap(); - if let Some(asn) = table.find_asn(ip) { - Some(asn.asn) - } else { - None + if let Ok(table_lock) = ANALYSIS.asn_table.lock() { + if let Some(table) = table_lock.as_ref() { + return table.find_asn(ip); + } } + None } -pub fn get_asn_name_and_country(asn: u32) -> (String, String) { - let table_lock = ANALYSIS.asn_table.lock(); - if table_lock.is_err() { - return ("".to_string(), "".to_string()); - } - let table = table_lock.unwrap(); - if table.is_none() { - return ("".to_string(), "".to_string()); - } - let table = table.as_ref().unwrap(); - if let Some(row) = table.find_asn_by_id(asn) { - (row.owners.clone(), row.country.clone()) - } else { - ("".to_string(), "".to_string()) +pub fn get_asn_name_and_country(ip: IpAddr) -> (String, String) { + if let Ok(table_lock) = ANALYSIS.asn_table.lock() { + if let Some(table) = table_lock.as_ref() { + return table.find_owners_by_ip(ip); + } } + (String::new(), String::new()) } \ No newline at end of file diff --git a/src/rust/lqosd/src/throughput_tracker/mod.rs b/src/rust/lqosd/src/throughput_tracker/mod.rs index d34bacf3..d8f64509 100644 --- a/src/rust/lqosd/src/throughput_tracker/mod.rs +++ b/src/rust/lqosd/src/throughput_tracker/mod.rs @@ -505,7 +505,7 @@ pub fn dump_active_flows() -> BusResponse { let result: Vec = lock .iter() .map(|(key, row)| { - let (remote_asn_name, remote_asn_country) = get_asn_name_and_country(row.1.asn_id.0); + let (remote_asn_name, remote_asn_country) = get_asn_name_and_country(key.remote_ip.as_ip()); lqos_bus::FlowbeeData { remote_ip: key.remote_ip.as_ip().to_string(), @@ -589,7 +589,7 @@ pub fn top_flows(n: u32, flow_type: TopFlowType) -> BusResponse { .iter() .take(n as usize) .map(|(ip, flow)| { - let (remote_asn_name, remote_asn_country) = get_asn_name_and_country(flow.1.asn_id.0); + let (remote_asn_name, remote_asn_country) = get_asn_name_and_country(ip.remote_ip.as_ip()); lqos_bus::FlowbeeData { remote_ip: ip.remote_ip.as_ip().to_string(), local_ip: ip.local_ip.as_ip().to_string(), @@ -624,7 +624,7 @@ pub fn flows_by_ip(ip: &str) -> BusResponse { .iter() .filter(|(key, _)| key.local_ip == ip) .map(|(key, row)| { - let (remote_asn_name, remote_asn_country) = get_asn_name_and_country(row.1.asn_id.0); + let (remote_asn_name, remote_asn_country) = get_asn_name_and_country(key.remote_ip.as_ip()); lqos_bus::FlowbeeData { remote_ip: key.remote_ip.as_ip().to_string(), From e46aafe5aebf74bd22c8aac1e82b52738d66ee35 Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Tue, 12 Mar 2024 14:20:35 -0500 Subject: [PATCH 061/103] Silly performance: making 12k mutex locks when I need one was really dumb. --- src/rust/lqos_sys/src/bpf/common/flows.h | 2 +- src/rust/lqos_sys/src/bpf_iterator.rs | 3 +-- src/rust/lqosd/src/throughput_tracker/tracking_data.rs | 9 +++------ 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/src/rust/lqos_sys/src/bpf/common/flows.h b/src/rust/lqos_sys/src/bpf/common/flows.h index d2f34e55..d97a5dd6 100644 --- a/src/rust/lqos_sys/src/bpf/common/flows.h +++ b/src/rust/lqos_sys/src/bpf/common/flows.h @@ -79,7 +79,7 @@ struct flow_data_t { // This is pinned and not per-CPU, because half the data appears on either side of the bridge. struct { - __uint(type, BPF_MAP_TYPE_LRU_HASH); // TODO: BPF_MAP_TYPE_LRU_PERCPU_HASH? + __uint(type, BPF_MAP_TYPE_HASH); // TODO: BPF_MAP_TYPE_LRU_PERCPU_HASH? __type(key, struct flow_key_t); __type(value, struct flow_data_t); __uint(max_entries, MAX_FLOWS); diff --git a/src/rust/lqos_sys/src/bpf_iterator.rs b/src/rust/lqos_sys/src/bpf_iterator.rs index c1ba3b83..1980299d 100644 --- a/src/rust/lqos_sys/src/bpf_iterator.rs +++ b/src/rust/lqos_sys/src/bpf_iterator.rs @@ -252,8 +252,7 @@ pub fn end_flows(flows: &mut [FlowbeeKey]) -> anyhow::Result<()> { let mut map = BpfMap::::from_path("/sys/fs/bpf/flowbee")?; for flow in flows { - let mut empty = FlowbeeData::default(); - map.insert_or_update(flow, &mut empty)?; + map.delete(flow)?; } Ok(()) diff --git a/src/rust/lqosd/src/throughput_tracker/tracking_data.rs b/src/rust/lqosd/src/throughput_tracker/tracking_data.rs index 28e53e00..dc2cd56e 100644 --- a/src/rust/lqosd/src/throughput_tracker/tracking_data.rs +++ b/src/rust/lqosd/src/throughput_tracker/tracking_data.rs @@ -182,6 +182,8 @@ impl ThroughputTracker { // Track the expired keys let mut expired_keys = Vec::new(); + + let mut lock = ALL_FLOWS.lock().unwrap(); // Track through all the flows iterate_flows(&mut |key, data| { @@ -193,7 +195,6 @@ impl ThroughputTracker { // This flow has expired but not been handled yet. Add it to the list to be cleaned. expired_keys.push(key.clone()); } else { - let mut lock = ALL_FLOWS.lock().unwrap(); // We have a valid flow, so it needs to be tracked if let Some(this_flow) = lock.get_mut(&key) { this_flow.0.last_seen = data.last_seen; @@ -237,7 +238,6 @@ impl ThroughputTracker { }); // End flow iterator if !expired_keys.is_empty() { - let mut lock = ALL_FLOWS.lock().unwrap(); for key in expired_keys.iter() { // Send it off to netperf for analysis if we are supporting doing so. if let Some(d) = lock.get(&key) { @@ -256,10 +256,7 @@ impl ThroughputTracker { } // Cleaning run - { - let mut lock = ALL_FLOWS.lock().unwrap(); - lock.retain(|_k,v| v.0.last_seen >= expire); - } + lock.retain(|_k,v| v.0.last_seen >= expire); } } From 1fb5838ebea5c6501c4f2b3d3a403d5784269570 Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Tue, 12 Mar 2024 14:46:47 -0500 Subject: [PATCH 062/103] Remove a test call that was spamming the logs. --- .../flow_data/flow_analysis/finished_flows.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/finished_flows.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/finished_flows.rs index 974ab649..a2d8f42c 100644 --- a/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/finished_flows.rs +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/finished_flows.rs @@ -141,7 +141,5 @@ impl FlowbeeRecipient for FinishedFlowAnalysis { .as_secs(), data: (key, data, analysis), }); - - println!("{:?}", RECENT_FLOWS.country_summary()); } } From 66a19c04a4d55c7b101f0c8c6a10bc100c0b57ff Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Tue, 12 Mar 2024 15:19:07 -0500 Subject: [PATCH 063/103] First example of an actual summary report - we can group flow endpoints in a 5-minute report. --- src/rust/lqos_bus/src/bus/request.rs | 3 ++ src/rust/lqos_bus/src/bus/response.rs | 3 ++ .../lqos_node_manager/src/flow_monitor.rs | 12 +++++ src/rust/lqos_node_manager/src/main.rs | 1 + src/rust/lqos_node_manager/static/main.html | 51 ++++++++++++++++++- src/rust/lqosd/src/main.rs | 1 + .../flow_data/flow_analysis/finished_flows.rs | 6 +-- .../flow_data/flow_analysis/mod.rs | 3 +- .../src/throughput_tracker/flow_data/mod.rs | 2 +- src/rust/lqosd/src/throughput_tracker/mod.rs | 6 +++ .../src/throughput_tracker/tracking_data.rs | 6 +-- 11 files changed, 83 insertions(+), 11 deletions(-) diff --git a/src/rust/lqos_bus/src/bus/request.rs b/src/rust/lqos_bus/src/bus/request.rs index fe58b1cf..f5920f47 100644 --- a/src/rust/lqos_bus/src/bus/request.rs +++ b/src/rust/lqos_bus/src/bus/request.rs @@ -167,6 +167,9 @@ pub enum BusRequest { /// Flows by IP Address FlowsByIp(String), + + /// Current Endpoints by Country + CurrentEndpointsByCountry, } /// Defines the type of "top" flow being requested diff --git a/src/rust/lqos_bus/src/bus/response.rs b/src/rust/lqos_bus/src/bus/response.rs index f57784fa..09f84aa5 100644 --- a/src/rust/lqos_bus/src/bus/response.rs +++ b/src/rust/lqos_bus/src/bus/response.rs @@ -125,4 +125,7 @@ pub enum BusResponse { /// Flows by IP FlowsByIp(Vec), + + /// Current endpoints by country + CurrentEndpointsByCountry(Vec<(String, [u64; 2], [f32; 2])>), } diff --git a/src/rust/lqos_node_manager/src/flow_monitor.rs b/src/rust/lqos_node_manager/src/flow_monitor.rs index 36c3f5a7..56808c61 100644 --- a/src/rust/lqos_node_manager/src/flow_monitor.rs +++ b/src/rust/lqos_node_manager/src/flow_monitor.rs @@ -44,5 +44,17 @@ pub async fn top_5_flows(top_n: u32, flow_type: String) -> NoCache Vec::new(), }; + NoCache::new(Json(result)) +} + +#[get("/api/flows/by_country")] +pub async fn flows_by_country() -> NoCache>> { + let responses = + bus_request(vec![BusRequest::CurrentEndpointsByCountry]).await.unwrap(); + let result = match &responses[0] { + BusResponse::CurrentEndpointsByCountry(country_summary) => country_summary.to_owned(), + _ => Vec::new(), + }; + NoCache::new(Json(result)) } \ No newline at end of file diff --git a/src/rust/lqos_node_manager/src/main.rs b/src/rust/lqos_node_manager/src/main.rs index d9d3bb4c..851dd942 100644 --- a/src/rust/lqos_node_manager/src/main.rs +++ b/src/rust/lqos_node_manager/src/main.rs @@ -114,6 +114,7 @@ fn rocket() -> _ { flow_monitor::all_flows_debug_dump, flow_monitor::count_flows, flow_monitor::top_5_flows, + flow_monitor::flows_by_country, ], ); diff --git a/src/rust/lqos_node_manager/static/main.html b/src/rust/lqos_node_manager/static/main.html index 1c14c9a9..04b46821 100644 --- a/src/rust/lqos_node_manager/static/main.html +++ b/src/rust/lqos_node_manager/static/main.html @@ -147,9 +147,11 @@ Top 10 Downloaders +
+
@@ -347,14 +349,54 @@ }); } + function updateTop10Endpoints() { + $.get("/api/flows/by_country", data => { + //console.log(data); + let html = ""; + html += ""; + html += ""; + html += ""; + html += ""; + html += ""; + html += ""; + html += ""; + let i = 0; + while (i < data.length) { + html += ""; + html += ""; + html += ""; + html += ""; + html += ""; + html += ""; + html += ""; + i += 1; + } + html += "
CountryUL ⬆️DL ⬇️UL RTTDL RTT
" + data[i][0] + "" + scaleNumber(data[i][1][0]) + "" + scaleNumber(data[i][1][1]) + "" + (data[i][2][0] / 1000000).toFixed(2) + "" + (data[i][2][1] / 1000000).toFixed(2) + "
"; + $("#top10ep").html(html); + }); + } + + let top10view = "circuits"; + function showCircuits() { $("#top10dl").show(); $("#top10flows").hide(); + $("#top10ep").hide(); + top10view = "circuits"; } function showFlows() { $("#top10dl").hide(); $("#top10flows").show(); + $("#top10ep").hide(); + top10view = "flows"; + } + + function showEndpoints() { + $("#top10dl").hide(); + $("#top10flows").hide(); + $("#top10ep").show(); + top10view = "endpoints"; } var rttGraph = new RttHistogram(); @@ -379,8 +421,13 @@ if (tickCount % 5 == 0) { updateHistogram(); updateWorst10(); - updateTop10(); - updateTop10Flows(); + if (top10view == "circuits") { + updateTop10(); + } else if (top10view == "flows") { + updateTop10Flows(); + } else { + updateTop10Endpoints(); + } } if (tickCount % 10 == 0) { diff --git a/src/rust/lqosd/src/main.rs b/src/rust/lqosd/src/main.rs index 417972c7..771bd3f0 100644 --- a/src/rust/lqosd/src/main.rs +++ b/src/rust/lqosd/src/main.rs @@ -231,6 +231,7 @@ fn handle_bus_requests( } BusRequest::TopFlows { n, flow_type } => throughput_tracker::top_flows(*n, *flow_type), BusRequest::FlowsByIp(ip) => throughput_tracker::flows_by_ip(ip), + BusRequest::CurrentEndpointsByCountry => throughput_tracker::current_endpoints_by_country(), }); } } diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/finished_flows.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/finished_flows.rs index a2d8f42c..2616f52b 100644 --- a/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/finished_flows.rs +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/finished_flows.rs @@ -4,7 +4,7 @@ use lqos_sys::flowbee_data::{FlowbeeData, FlowbeeKey}; use once_cell::sync::Lazy; use std::sync::{Arc, Mutex}; -struct TimeBuffer { +pub struct TimeBuffer { buffer: Mutex>, } @@ -34,7 +34,7 @@ impl TimeBuffer { buffer.push(entry); } - fn country_summary(&self) -> Vec<(String, [u64; 2], [f32; 2])> { + pub fn country_summary(&self) -> Vec<(String, [u64; 2], [f32; 2])> { let buffer = self.buffer.lock().unwrap(); let mut my_buffer = buffer .iter() @@ -114,7 +114,7 @@ impl TimeBuffer { } } -static RECENT_FLOWS: Lazy = Lazy::new(|| TimeBuffer::new()); +pub static RECENT_FLOWS: Lazy = Lazy::new(|| TimeBuffer::new()); pub struct FinishedFlowAnalysis {} diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/mod.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/mod.rs index b0cc985b..53917621 100644 --- a/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/mod.rs +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/mod.rs @@ -7,6 +7,7 @@ pub use protocol::FlowProtocol; use super::AsnId; mod finished_flows; pub use finished_flows::FinishedFlowAnalysis; +pub use finished_flows::RECENT_FLOWS; static ANALYSIS: Lazy = Lazy::new(|| FlowAnalysisSystem::new()); @@ -80,4 +81,4 @@ pub fn get_asn_name_and_country(ip: IpAddr) -> (String, String) { } } (String::new(), String::new()) -} \ No newline at end of file +} diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/mod.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/mod.rs index 608110dd..bedf8cf8 100644 --- a/src/rust/lqosd/src/throughput_tracker/flow_data/mod.rs +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/mod.rs @@ -13,7 +13,7 @@ use std::sync::{ mpsc::{channel, Sender}, Arc, }; -pub(crate) use flow_analysis::{setup_flow_analysis, get_asn_name_and_country, FlowAnalysis}; +pub(crate) use flow_analysis::{setup_flow_analysis, get_asn_name_and_country, FlowAnalysis, RECENT_FLOWS}; trait FlowbeeRecipient { diff --git a/src/rust/lqosd/src/throughput_tracker/mod.rs b/src/rust/lqosd/src/throughput_tracker/mod.rs index d8f64509..e2315b03 100644 --- a/src/rust/lqosd/src/throughput_tracker/mod.rs +++ b/src/rust/lqosd/src/throughput_tracker/mod.rs @@ -651,4 +651,10 @@ pub fn flows_by_ip(ip: &str) -> BusResponse { return BusResponse::FlowsByIp(matching_flows); } BusResponse::Ack +} + +/// Current endpoints by country +pub fn current_endpoints_by_country() -> BusResponse { + let summary = flow_data::RECENT_FLOWS.country_summary(); + BusResponse::CurrentEndpointsByCountry(summary) } \ No newline at end of file diff --git a/src/rust/lqosd/src/throughput_tracker/tracking_data.rs b/src/rust/lqosd/src/throughput_tracker/tracking_data.rs index dc2cd56e..2d2f3a54 100644 --- a/src/rust/lqosd/src/throughput_tracker/tracking_data.rs +++ b/src/rust/lqosd/src/throughput_tracker/tracking_data.rs @@ -171,7 +171,7 @@ impl ThroughputTracker { pub(crate) fn apply_flow_data( &self, timeout_seconds: u64, - netflow_enabled: bool, + _netflow_enabled: bool, sender: std::sync::mpsc::Sender<(FlowbeeKey, (FlowbeeData, FlowAnalysis))>, ) { let self_cycle = self.cycle.load(std::sync::atomic::Ordering::Relaxed); @@ -241,9 +241,7 @@ impl ThroughputTracker { for key in expired_keys.iter() { // Send it off to netperf for analysis if we are supporting doing so. if let Some(d) = lock.get(&key) { - if netflow_enabled { - let _ = sender.send((key.clone(), (d.0.clone(), d.1.clone()))); - } + let _ = sender.send((key.clone(), (d.0.clone(), d.1.clone()))); } // Remove the flow from circulation lock.remove(&key); From c16f06b0ab0a1002130f12469810b6eaf162b66d Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Tue, 12 Mar 2024 15:47:29 -0500 Subject: [PATCH 064/103] Sort countries by download bytes not upload --- .../flow_data/flow_analysis/finished_flows.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/finished_flows.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/finished_flows.rs index 2616f52b..6c512322 100644 --- a/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/finished_flows.rs +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/finished_flows.rs @@ -107,8 +107,8 @@ impl TimeBuffer { country_summary.push((last_country, total_bytes, rtt)); - // Sort by bytes descending - country_summary.sort_by(|a, b| b.1[0].cmp(&a.1[0])); + // Sort by bytes downloaded descending + country_summary.sort_by(|a, b| b.1[1].cmp(&a.1[1])); country_summary } From fae96280b918fd6e118344bbfbd9c32a4c3f6e84 Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Tue, 12 Mar 2024 16:41:57 -0500 Subject: [PATCH 065/103] Actually limit the top 10 endpoints to being 10 --- src/rust/lqos_node_manager/static/main.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rust/lqos_node_manager/static/main.html b/src/rust/lqos_node_manager/static/main.html index 04b46821..1aa27f66 100644 --- a/src/rust/lqos_node_manager/static/main.html +++ b/src/rust/lqos_node_manager/static/main.html @@ -361,7 +361,7 @@ html += "DL RTT"; html += ""; let i = 0; - while (i < data.length) { + while (i < data.length && i < 10) { html += ""; html += "" + data[i][0] + ""; html += "" + scaleNumber(data[i][1][0]) + ""; From a63ff0a6f1a0d3a92e37236752f48f013ad620c3 Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Wed, 13 Mar 2024 08:20:36 -0500 Subject: [PATCH 066/103] Rename all flowbee "retries" to "tcp_retransmits" in code, and "retransmits" in visible HTML. --- src/rust/lqos_bus/src/ip_stats.rs | 4 ++-- src/rust/lqos_node_manager/static/circuit_queue.html | 4 ++-- src/rust/lqos_node_manager/static/main.html | 4 ++-- src/rust/lqos_sys/src/bpf/common/flows.h | 8 ++++---- src/rust/lqos_sys/src/flowbee_data.rs | 4 ++-- src/rust/lqosd/src/throughput_tracker/mod.rs | 10 +++++----- src/rust/lqosd/src/throughput_tracker/tracking_data.rs | 2 +- 7 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/rust/lqos_bus/src/ip_stats.rs b/src/rust/lqos_bus/src/ip_stats.rs index 41517a79..ea11c7c3 100644 --- a/src/rust/lqos_bus/src/ip_stats.rs +++ b/src/rust/lqos_bus/src/ip_stats.rs @@ -150,8 +150,8 @@ pub struct FlowbeeData { pub packets_sent: [u64; 2], /// Rate estimate pub rate_estimate_bps: [u64; 2], - /// Retry Counters - pub retries: [u32; 2], + /// TCP Retransmission count (also counts duplicates) + pub tcp_retransmits: [u32; 2], /// Most recent RTT pub last_rtt: [u64; 2], /// Has the connection ended? diff --git a/src/rust/lqos_node_manager/static/circuit_queue.html b/src/rust/lqos_node_manager/static/circuit_queue.html index 72641e1d..5fe151ef 100644 --- a/src/rust/lqos_node_manager/static/circuit_queue.html +++ b/src/rust/lqos_node_manager/static/circuit_queue.html @@ -784,7 +784,7 @@ html += "Connection"; html += "Bytes"; html += "Packets"; - html += "TCP Retries"; + html += "TCP Retransmits"; html += "TCP RTT"; html += "ASN"; html += "ASN Country"; @@ -795,7 +795,7 @@ html += "" + data[i].analysis + ""; html += "" + scaleNumber(data[i].bytes_sent[0]) + " / " + scaleNumber(data[i].bytes_sent[1]) + ""; html += "" + scaleNumber(data[i].packets_sent[0]) + " / " + scaleNumber(data[i].packets_sent[1]) + ""; - html += "" + data[i].retries[0] + " / " + data[i].retries[1] + ""; + html += "" + data[i].tcp_retransmits[0] + " / " + data[i].tcp_retransmits[1] + ""; html += "" + (data[i].last_rtt[0] / 1000000).toFixed(2) + " / " + (data[i].last_rtt[1] / 1000000).toFixed(2) + ""; html += "(" + data[i].remote_asn + ") " + data[i].remote_asn_name + ""; html += "" + data[i].remote_asn_country + ""; diff --git a/src/rust/lqos_node_manager/static/main.html b/src/rust/lqos_node_manager/static/main.html index 1aa27f66..534ebc9a 100644 --- a/src/rust/lqos_node_manager/static/main.html +++ b/src/rust/lqos_node_manager/static/main.html @@ -324,7 +324,7 @@ html += "DL ⬇️"; html += "UL RTT"; html += "DL RTT"; - html += "TCP Retries"; + html += "TCP Retransmits"; html += "Remote ASN"; html += "Country"; html += ""; @@ -339,7 +339,7 @@ html += "" + scaleNumber(data[i].rate_estimate_bps[1]) + ""; html += "" + (data[i].last_rtt[0] / 1000000).toFixed(2) + ""; html += "" + (data[i].last_rtt[1] / 1000000).toFixed(2) + ""; - html += "" + data[i].retries[0] + "/" + data[i].retries[1] + ""; + html += "" + data[i].tcp_retransmits[0] + "/" + data[i].tcp_retransmits[1] + ""; html += "" + data[i].remote_asn_name + ""; html += "" + data[i].remote_asn_country + ""; html += ""; diff --git a/src/rust/lqos_sys/src/bpf/common/flows.h b/src/rust/lqos_sys/src/bpf/common/flows.h index d97a5dd6..f3de6202 100644 --- a/src/rust/lqos_sys/src/bpf/common/flows.h +++ b/src/rust/lqos_sys/src/bpf/common/flows.h @@ -55,8 +55,8 @@ struct flow_data_t { __u32 last_sequence[2]; // Acknowledgement number of the last packet __u32 last_ack[2]; - // Retry Counters - __u32 retries[2]; + // Retransmit Counters (Also catches duplicates and out-of-order packets) + __u32 tcp_retransmits[2]; // Timestamp values __u32 tsval[2]; __u32 tsecr[2]; @@ -103,7 +103,7 @@ static __always_inline struct flow_data_t new_flow_data( .rate_estimate_bps = { 0, 0 }, .last_sequence = { 0, 0 }, .last_ack = { 0, 0 }, - .retries = { 0, 0 }, + .tcp_retransmits = { 0, 0 }, .tsval = { 0, 0 }, .tsecr = { 0, 0 }, .ts_change_time = { 0, 0 }, @@ -232,7 +232,7 @@ static __always_inline void detect_retries( ) ) { // This is a retransmission - data->retries[rate_index]++; + data->tcp_retransmits[rate_index]++; } // Store the sequence and ack numbers for the next packet diff --git a/src/rust/lqos_sys/src/flowbee_data.rs b/src/rust/lqos_sys/src/flowbee_data.rs index 78461977..00085261 100644 --- a/src/rust/lqos_sys/src/flowbee_data.rs +++ b/src/rust/lqos_sys/src/flowbee_data.rs @@ -47,8 +47,8 @@ pub struct FlowbeeData { pub last_sequence: [u32; 2], /// Acknowledgement number of the last packet pub last_ack: [u32; 2], - /// Retry Counters - pub retries: [u32; 2], + /// TCP Retransmission count (also counts duplicates) + pub tcp_retransmits: [u32; 2], /// Timestamp values pub tsval: [u32; 2], /// Timestamp echo values diff --git a/src/rust/lqosd/src/throughput_tracker/mod.rs b/src/rust/lqosd/src/throughput_tracker/mod.rs index e2315b03..ef6d103d 100644 --- a/src/rust/lqosd/src/throughput_tracker/mod.rs +++ b/src/rust/lqosd/src/throughput_tracker/mod.rs @@ -516,7 +516,7 @@ pub fn dump_active_flows() -> BusResponse { bytes_sent: row.0.bytes_sent, packets_sent: row.0.packets_sent, rate_estimate_bps: row.0.rate_estimate_bps, - retries: row.0.retries, + tcp_retransmits: row.0.tcp_retransmits, last_rtt: row.0.last_rtt, end_status: row.0.end_status, tos: row.0.tos, @@ -571,8 +571,8 @@ pub fn top_flows(n: u32, flow_type: TopFlowType) -> BusResponse { } TopFlowType::Drops => { table.sort_by(|a, b| { - let a_total = a.1 .0.retries[0] + a.1 .0.retries[1]; - let b_total = b.1 .0.retries[0] + b.1 .0.retries[1]; + let a_total = a.1 .0.tcp_retransmits[0] + a.1 .0.tcp_retransmits[1]; + let b_total = b.1 .0.tcp_retransmits[0] + b.1 .0.tcp_retransmits[1]; b_total.cmp(&a_total) }); } @@ -599,7 +599,7 @@ pub fn top_flows(n: u32, flow_type: TopFlowType) -> BusResponse { bytes_sent: flow.0.bytes_sent, packets_sent: flow.0.packets_sent, rate_estimate_bps: flow.0.rate_estimate_bps, - retries: flow.0.retries, + tcp_retransmits: flow.0.tcp_retransmits, last_rtt: flow.0.last_rtt, end_status: flow.0.end_status, tos: flow.0.tos, @@ -635,7 +635,7 @@ pub fn flows_by_ip(ip: &str) -> BusResponse { bytes_sent: row.0.bytes_sent, packets_sent: row.0.packets_sent, rate_estimate_bps: row.0.rate_estimate_bps, - retries: row.0.retries, + tcp_retransmits: row.0.tcp_retransmits, last_rtt: row.0.last_rtt, end_status: row.0.end_status, tos: row.0.tos, diff --git a/src/rust/lqosd/src/throughput_tracker/tracking_data.rs b/src/rust/lqosd/src/throughput_tracker/tracking_data.rs index 2d2f3a54..32849e87 100644 --- a/src/rust/lqosd/src/throughput_tracker/tracking_data.rs +++ b/src/rust/lqosd/src/throughput_tracker/tracking_data.rs @@ -201,7 +201,7 @@ impl ThroughputTracker { this_flow.0.bytes_sent = data.bytes_sent; this_flow.0.packets_sent = data.packets_sent; this_flow.0.rate_estimate_bps = data.rate_estimate_bps; - this_flow.0.retries = data.retries; + this_flow.0.tcp_retransmits = data.tcp_retransmits; this_flow.0.last_rtt = data.last_rtt; this_flow.0.end_status = data.end_status; this_flow.0.tos = data.tos; From fb91e8313ab8b60f8f08934ee85f9c2bf942ab22 Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Wed, 13 Mar 2024 14:45:53 -0500 Subject: [PATCH 067/103] Very silly commit - adds a /showoff page to the node manager firing particles from all endpoints at my ISP in Missouri. Will turn into something useful in the future. --- src/rust/lqos_bus/src/bus/request.rs | 3 +++ src/rust/lqos_bus/src/bus/response.rs | 3 +++ .../lqos_node_manager/src/flow_monitor.rs | 12 +++++++++ src/rust/lqos_node_manager/src/main.rs | 2 ++ .../lqos_node_manager/src/static_pages.rs | 8 ++++++ src/rust/lqosd/src/main.rs | 1 + .../flow_data/flow_analysis/asn.rs | 15 +++++++++++ .../flow_data/flow_analysis/finished_flows.rs | 25 +++++++++++++++++-- .../flow_data/flow_analysis/mod.rs | 9 +++++++ src/rust/lqosd/src/throughput_tracker/mod.rs | 8 +++++- 10 files changed, 83 insertions(+), 3 deletions(-) diff --git a/src/rust/lqos_bus/src/bus/request.rs b/src/rust/lqos_bus/src/bus/request.rs index f5920f47..05442cc5 100644 --- a/src/rust/lqos_bus/src/bus/request.rs +++ b/src/rust/lqos_bus/src/bus/request.rs @@ -170,6 +170,9 @@ pub enum BusRequest { /// Current Endpoints by Country CurrentEndpointsByCountry, + + /// Lat/Lon of Endpoints + CurrentEndpointLatLon, } /// Defines the type of "top" flow being requested diff --git a/src/rust/lqos_bus/src/bus/response.rs b/src/rust/lqos_bus/src/bus/response.rs index 09f84aa5..b600237e 100644 --- a/src/rust/lqos_bus/src/bus/response.rs +++ b/src/rust/lqos_bus/src/bus/response.rs @@ -128,4 +128,7 @@ pub enum BusResponse { /// Current endpoints by country CurrentEndpointsByCountry(Vec<(String, [u64; 2], [f32; 2])>), + + /// Current Lat/Lon of endpoints + CurrentLatLon(Vec<(f64, f64)>), } diff --git a/src/rust/lqos_node_manager/src/flow_monitor.rs b/src/rust/lqos_node_manager/src/flow_monitor.rs index 56808c61..b3c422bc 100644 --- a/src/rust/lqos_node_manager/src/flow_monitor.rs +++ b/src/rust/lqos_node_manager/src/flow_monitor.rs @@ -56,5 +56,17 @@ pub async fn flows_by_country() -> NoCache Vec::new(), }; + NoCache::new(Json(result)) +} + +#[get("/api/flows/lat_lon")] +pub async fn flows_lat_lon() -> NoCache>> { + let responses = + bus_request(vec![BusRequest::CurrentEndpointLatLon]).await.unwrap(); + let result = match &responses[0] { + BusResponse::CurrentLatLon(lat_lon) => lat_lon.to_owned(), + _ => Vec::new(), + }; + NoCache::new(Json(result)) } \ No newline at end of file diff --git a/src/rust/lqos_node_manager/src/main.rs b/src/rust/lqos_node_manager/src/main.rs index 851dd942..6f9647cd 100644 --- a/src/rust/lqos_node_manager/src/main.rs +++ b/src/rust/lqos_node_manager/src/main.rs @@ -44,6 +44,7 @@ fn rocket() -> _ { static_pages::shaped_devices_add_page, static_pages::unknown_devices_page, static_pages::circuit_queue, + static_pages::pretty_map_graph, config_control::config_page, network_tree::tree_page, static_pages::ip_dump, @@ -115,6 +116,7 @@ fn rocket() -> _ { flow_monitor::count_flows, flow_monitor::top_5_flows, flow_monitor::flows_by_country, + flow_monitor::flows_lat_lon, ], ); diff --git a/src/rust/lqos_node_manager/src/static_pages.rs b/src/rust/lqos_node_manager/src/static_pages.rs index e3b07436..eae42154 100644 --- a/src/rust/lqos_node_manager/src/static_pages.rs +++ b/src/rust/lqos_node_manager/src/static_pages.rs @@ -75,6 +75,14 @@ pub async fn shaped_devices_add_page<'a>( NoCache::new(NamedFile::open("static/shaped-add.html").await.ok()) } +// Temporary for funsies +#[get("/showoff")] +pub async fn pretty_map_graph<'a>( + _auth: AuthGuard, +) -> NoCache> { + NoCache::new(NamedFile::open("static/showoff.html").await.ok()) +} + #[get("/vendor/bootstrap.min.css")] pub async fn bootsrap_css<'a>() -> LongCache> { LongCache::new(NamedFile::open("static/vendor/bootstrap.min.css").await.ok()) diff --git a/src/rust/lqosd/src/main.rs b/src/rust/lqosd/src/main.rs index 771bd3f0..61ec9acb 100644 --- a/src/rust/lqosd/src/main.rs +++ b/src/rust/lqosd/src/main.rs @@ -232,6 +232,7 @@ fn handle_bus_requests( BusRequest::TopFlows { n, flow_type } => throughput_tracker::top_flows(*n, *flow_type), BusRequest::FlowsByIp(ip) => throughput_tracker::flows_by_ip(ip), BusRequest::CurrentEndpointsByCountry => throughput_tracker::current_endpoints_by_country(), + BusRequest::CurrentEndpointLatLon => throughput_tracker::current_lat_lon(), }); } } diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/asn.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/asn.rs index 68641c13..fb3b35c1 100644 --- a/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/asn.rs +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/asn.rs @@ -136,6 +136,21 @@ impl GeoTable { (owners, country) } + + pub fn find_lat_lon_by_ip(&self, ip: IpAddr) -> (f64, f64) { + log::debug!("Looking up ASN for IP: {:?}", ip); + let ip = match ip { + IpAddr::V4(ip) => ip.to_ipv6_mapped(), + IpAddr::V6(ip) => ip, + }; + + if let Some(matched) = self.geo_trie.longest_match(ip) { + log::debug!("Matched Geo: {:?}", matched.1.city_and_country); + return (matched.1.latitude, matched.1.longitude); + } + + (0.0, 0.0) + } } /////////////////////////////////////////////////////////////////////// diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/finished_flows.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/finished_flows.rs index 6c512322..16d14f16 100644 --- a/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/finished_flows.rs +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/finished_flows.rs @@ -1,8 +1,8 @@ -use super::{get_asn_name_and_country, FlowAnalysis}; +use super::{get_asn_lat_lon, get_asn_name_and_country, FlowAnalysis}; use crate::throughput_tracker::flow_data::FlowbeeRecipient; use lqos_sys::flowbee_data::{FlowbeeData, FlowbeeKey}; use once_cell::sync::Lazy; -use std::sync::{Arc, Mutex}; +use std::{alloc::LayoutError, sync::{Arc, Mutex}}; pub struct TimeBuffer { buffer: Mutex>, @@ -34,6 +34,27 @@ impl TimeBuffer { buffer.push(entry); } + pub fn lat_lon_endpoints(&self) -> Vec<(f64, f64)> { + let buffer = self.buffer.lock().unwrap(); + let mut my_buffer = buffer + .iter() + .map(|v| { + let (key, _data, _analysis) = &v.data; + let (lat, lon) = get_asn_lat_lon(key.remote_ip.as_ip()); + (lat, lon) + }) + .filter(|(lat, lon)| *lat != 0.0 && *lon != 0.0) + .collect::>(); + + // Sort by lat/lon + my_buffer.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap()); + + // Depuplicate + my_buffer.dedup(); + + my_buffer + } + pub fn country_summary(&self) -> Vec<(String, [u64; 2], [f32; 2])> { let buffer = self.buffer.lock().unwrap(); let mut my_buffer = buffer diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/mod.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/mod.rs index 53917621..4db1f7ee 100644 --- a/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/mod.rs +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/mod.rs @@ -82,3 +82,12 @@ pub fn get_asn_name_and_country(ip: IpAddr) -> (String, String) { } (String::new(), String::new()) } + +pub fn get_asn_lat_lon(ip: IpAddr) -> (f64, f64) { + if let Ok(table_lock) = ANALYSIS.asn_table.lock() { + if let Some(table) = table_lock.as_ref() { + return table.find_lat_lon_by_ip(ip); + } + } + (0.0, 0.0) +} \ No newline at end of file diff --git a/src/rust/lqosd/src/throughput_tracker/mod.rs b/src/rust/lqosd/src/throughput_tracker/mod.rs index ef6d103d..afdea101 100644 --- a/src/rust/lqosd/src/throughput_tracker/mod.rs +++ b/src/rust/lqosd/src/throughput_tracker/mod.rs @@ -657,4 +657,10 @@ pub fn flows_by_ip(ip: &str) -> BusResponse { pub fn current_endpoints_by_country() -> BusResponse { let summary = flow_data::RECENT_FLOWS.country_summary(); BusResponse::CurrentEndpointsByCountry(summary) -} \ No newline at end of file +} + +/// Current endpoint lat/lon +pub fn current_lat_lon() -> BusResponse { + let summary = flow_data::RECENT_FLOWS.lat_lon_endpoints(); + BusResponse::CurrentLatLon(summary) +} From eab9df4874e33719dc309e151eb4c35acfa7e930 Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Wed, 13 Mar 2024 14:56:59 -0500 Subject: [PATCH 068/103] Missing file from previous commit --- .../lqos_node_manager/static/showoff.html | 114 ++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 src/rust/lqos_node_manager/static/showoff.html diff --git a/src/rust/lqos_node_manager/static/showoff.html b/src/rust/lqos_node_manager/static/showoff.html new file mode 100644 index 00000000..e4b7cfec --- /dev/null +++ b/src/rust/lqos_node_manager/static/showoff.html @@ -0,0 +1,114 @@ + + + + + + + + + + + + + + + + + + + + + +
+ + + + \ No newline at end of file From fbb3960b5009ca9c4473fbbdeca3ca2793a8b07a Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Thu, 14 Mar 2024 08:52:36 -0500 Subject: [PATCH 069/103] Warning fix --- .../flow_data/flow_analysis/finished_flows.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/finished_flows.rs b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/finished_flows.rs index 16d14f16..c473e941 100644 --- a/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/finished_flows.rs +++ b/src/rust/lqosd/src/throughput_tracker/flow_data/flow_analysis/finished_flows.rs @@ -2,7 +2,7 @@ use super::{get_asn_lat_lon, get_asn_name_and_country, FlowAnalysis}; use crate::throughput_tracker::flow_data::FlowbeeRecipient; use lqos_sys::flowbee_data::{FlowbeeData, FlowbeeKey}; use once_cell::sync::Lazy; -use std::{alloc::LayoutError, sync::{Arc, Mutex}}; +use std::sync::{Arc, Mutex}; pub struct TimeBuffer { buffer: Mutex>, From 8ec361b0954879e3147ba31b07355edce640b05b Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Thu, 14 Mar 2024 10:42:22 -0500 Subject: [PATCH 070/103] Fix a reporting message that was meant to be compile-conditional. --- src/rust/lqos_sys/src/bpf/common/flows.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/rust/lqos_sys/src/bpf/common/flows.h b/src/rust/lqos_sys/src/bpf/common/flows.h index f3de6202..d19e3f72 100644 --- a/src/rust/lqos_sys/src/bpf/common/flows.h +++ b/src/rust/lqos_sys/src/bpf/common/flows.h @@ -271,7 +271,9 @@ static __always_inline void process_tcp( struct flow_data_t *data = bpf_map_lookup_elem(&flowbee, &key); if (data == NULL) { // If it isn't a flow we're tracking, bail out now + #ifdef VERBOSE bpf_debug("Bailing"); + #endif return; } From a937820a87df31f46034e800a4d96bfcc6390d84 Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Thu, 14 Mar 2024 11:03:43 -0500 Subject: [PATCH 071/103] Move rate estimate from 64-bits to 32-bits, saving some bytes. --- src/rust/lqos_bus/src/ip_stats.rs | 2 +- src/rust/lqos_sys/src/bpf/common/flows.h | 6 +++--- src/rust/lqos_sys/src/flowbee_data.rs | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/rust/lqos_bus/src/ip_stats.rs b/src/rust/lqos_bus/src/ip_stats.rs index ea11c7c3..4132fedf 100644 --- a/src/rust/lqos_bus/src/ip_stats.rs +++ b/src/rust/lqos_bus/src/ip_stats.rs @@ -149,7 +149,7 @@ pub struct FlowbeeData { /// Packets transmitted pub packets_sent: [u64; 2], /// Rate estimate - pub rate_estimate_bps: [u64; 2], + pub rate_estimate_bps: [u32; 2], /// TCP Retransmission count (also counts duplicates) pub tcp_retransmits: [u32; 2], /// Most recent RTT diff --git a/src/rust/lqos_sys/src/bpf/common/flows.h b/src/rust/lqos_sys/src/bpf/common/flows.h index d19e3f72..5b167f00 100644 --- a/src/rust/lqos_sys/src/bpf/common/flows.h +++ b/src/rust/lqos_sys/src/bpf/common/flows.h @@ -50,7 +50,7 @@ struct flow_data_t { // Bytes at the next rate estimate __u64 next_count_bytes[2]; // Rate estimate - __u64 rate_estimate_bps[2]; + __u32 rate_estimate_bps[2]; // Sequence number of the last packet __u32 last_sequence[2]; // Acknowledgement number of the last packet @@ -292,8 +292,8 @@ static __always_inline void process_tcp( if ( tsecr == data->tsval[other_rate_index] && - (data->rate_estimate_bps[rate_index] > HALF_MBPS_IN_BYTES_PER_SECOND || - data->rate_estimate_bps[other_rate_index] > HALF_MBPS_IN_BYTES_PER_SECOND ) + (data->rate_estimate_bps[rate_index] > 0 || + data->rate_estimate_bps[other_rate_index] > 0 ) ) { __u64 elapsed = dissector->now - data->ts_change_time[other_rate_index]; if (elapsed < TWO_SECONDS_IN_NANOS) { diff --git a/src/rust/lqos_sys/src/flowbee_data.rs b/src/rust/lqos_sys/src/flowbee_data.rs index 00085261..da8afb23 100644 --- a/src/rust/lqos_sys/src/flowbee_data.rs +++ b/src/rust/lqos_sys/src/flowbee_data.rs @@ -42,7 +42,7 @@ pub struct FlowbeeData { /// Bytes at the next rate estimate pub next_count_bytes: [u64; 2], /// Rate estimate - pub rate_estimate_bps: [u64; 2], + pub rate_estimate_bps: [u32; 2], /// Sequence number of the last packet pub last_sequence: [u32; 2], /// Acknowledgement number of the last packet From 69508d2753f2f089f01e404297d494ed73d832d3 Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Thu, 14 Mar 2024 12:40:33 -0500 Subject: [PATCH 072/103] Remove two variables and use pointers instead to save stack space. --- src/rust/lqos_sys/src/bpf/common/flows.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/rust/lqos_sys/src/bpf/common/flows.h b/src/rust/lqos_sys/src/bpf/common/flows.h index 5b167f00..7ad9dc21 100644 --- a/src/rust/lqos_sys/src/bpf/common/flows.h +++ b/src/rust/lqos_sys/src/bpf/common/flows.h @@ -284,14 +284,15 @@ static __always_inline void process_tcp( detect_retries(dissector, rate_index, data); // Timestamps to calculate RTT - u_int32_t tsval = dissector->tsval; - u_int32_t tsecr = dissector->tsecr; - if (tsval != 0) { + // Removed to save stack space + //u_int32_t tsval = dissector->tsval; + //u_int32_t tsecr = dissector->tsecr; + if (dissector->tsval != 0) { //bpf_debug("[FLOWS][%d] TSVAL: %u, TSECR: %u", direction, tsval, tsecr); - if (tsval != data->tsval[rate_index] && tsecr != data->tsecr[rate_index]) { + if (dissector->tsval != data->tsval[rate_index] && dissector->tsecr != data->tsecr[rate_index]) { if ( - tsecr == data->tsval[other_rate_index] && + dissector->tsecr == data->tsval[other_rate_index] && (data->rate_estimate_bps[rate_index] > 0 || data->rate_estimate_bps[other_rate_index] > 0 ) ) { @@ -303,8 +304,8 @@ static __always_inline void process_tcp( } data->ts_change_time[rate_index] = dissector->now; - data->tsval[rate_index] = tsval; - data->tsecr[rate_index] = tsecr; + data->tsval[rate_index] = dissector->tsval; + data->tsecr[rate_index] = dissector->tsecr; } } From 56dd4b67507abb2cab7584680aafafd6ecfcc98f Mon Sep 17 00:00:00 2001 From: Herbert Wolverson Date: Thu, 14 Mar 2024 14:07:06 -0500 Subject: [PATCH 073/103] It's not quite done yet, but I spent the last 2 hours matching observed RTT, checking with Wireshark, and looking to see if the numbers line up. Implements a small buffer for RTT values per flow. Shrinks some stack entries. Will require a map rebuild. --- src/rust/lqos_bus/src/ip_stats.rs | 8 ++-- src/rust/lqos_node_manager/static/main.html | 25 +++++++++--- src/rust/lqos_sys/src/bpf/common/flows.h | 29 ++++++++------ src/rust/lqos_sys/src/flowbee_data.rs | 35 +++++++++++++++-- .../flow_data/flow_analysis/finished_flows.rs | 5 +-- src/rust/lqosd/src/throughput_tracker/mod.rs | 17 ++++++--- .../throughput_tracker/throughput_entry.rs | 2 +- .../src/throughput_tracker/tracking_data.rs | 38 +++++++++++-------- 8 files changed, 110 insertions(+), 49 deletions(-) diff --git a/src/rust/lqos_bus/src/ip_stats.rs b/src/rust/lqos_bus/src/ip_stats.rs index 4132fedf..3c12189b 100644 --- a/src/rust/lqos_bus/src/ip_stats.rs +++ b/src/rust/lqos_bus/src/ip_stats.rs @@ -151,9 +151,11 @@ pub struct FlowbeeData { /// Rate estimate pub rate_estimate_bps: [u32; 2], /// TCP Retransmission count (also counts duplicates) - pub tcp_retransmits: [u32; 2], - /// Most recent RTT - pub last_rtt: [u64; 2], + pub tcp_retransmits: [u16; 2], + /// RTT Ringbuffer index + pub rtt_index: [u8; 2], + /// RTT Ringbuffers + pub rtt_ringbuffer: [[u16; 4]; 2], /// Has the connection ended? /// 0 = Alive, 1 = FIN, 2 = RST pub end_status: u8, diff --git a/src/rust/lqos_node_manager/static/main.html b/src/rust/lqos_node_manager/static/main.html index 534ebc9a..dddb5003 100644 --- a/src/rust/lqos_node_manager/static/main.html +++ b/src/rust/lqos_node_manager/static/main.html @@ -171,7 +171,7 @@
© 2022-2023, LibreQoE LLC
- - - - - - - - + + - -
+ +

+

+ Refresh +

+
+