bpf_pcap() simplifies packet capture for skb and XDP BPF programs by creating a BPF perf event containing information relevant for packet capture (protocol, actual/captured packet size, time of capture, etc) along with the packet payload itself. All of this is stored in a "struct bpf_pcap_hdr". This header information can then be retrieved from the perf event map and used by packet capture frameworks such as libpcap to carry out packet capture. skb and XDP programs currently deal in Ethernet-based traffic exclusively, so should specify BPF_PCAP_TYPE_ETH or BPF_PCAP_TYPE_UNSET. The protocol parameter will be used in a later commit. Note that libpcap assumes times are relative to the epoch while we record nanoseconds since boot; as a result any times need to be normalized with respect to the boot time for libpcap storage; sysinfo(2) can be used to retrieve boot time to normalize values appropriately. Example usage for a tc-bpf program: struct bpf_map_def SEC("maps") pcap_map = { .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, .key_size = sizeof(int), .value_size = sizeof(int), .max_entries = 1024, }; SEC("cap") int cap(struct __sk_buff *skb) { bpf_pcap(skb, 1514, &pcap_map, BPF_PCAP_TYPE_ETH, 0); return TC_ACT_OK; } Signed-off-by: Alan Maguire <alan.maguire@xxxxxxxxxx> --- include/linux/bpf.h | 20 +++++++++++++ include/uapi/linux/bpf.h | 75 +++++++++++++++++++++++++++++++++++++++++++++++- kernel/bpf/verifier.c | 4 ++- net/core/filter.c | 67 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 164 insertions(+), 2 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5b9d223..033c9cf 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1145,4 +1145,24 @@ static inline u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type, } #endif /* CONFIG_INET */ + +static inline int bpf_pcap_prepare(int protocol, u32 cap_len, u32 tot_len, + u64 flags, struct bpf_pcap_hdr *pcap) +{ + if (protocol < 0 || pcap == NULL) + return -EINVAL; + + pcap->magic = BPF_PCAP_MAGIC; + pcap->protocol = protocol; + pcap->flags = flags; + + if (cap_len == 0 || tot_len < cap_len) + cap_len = tot_len; + pcap->cap_len = cap_len; + pcap->tot_len = tot_len; + pcap->ktime_ns = ktime_get_mono_fast_ns(); + + return 0; +} + #endif /* _LINUX_BPF_H */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 77c6be9..a27e58e 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2750,6 +2750,39 @@ struct bpf_stack_build_id { * **-EOPNOTSUPP** kernel configuration does not enable SYN cookies * * **-EPROTONOSUPPORT** IP packet version is not 4 or 6 + * + * int bpf_pcap(void *data, u32 size, struct bpf_map *map, int protocol, + * u64 flags) + * Description + * Write packet data from *data* into a special BPF perf event + * held by *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This + * perf event has the same attributes as perf events generated + * by bpf_perf_event_output. For skb and xdp programs, *data* + * is the relevant context. + * + * Metadata for this event is a **struct bpf_pcap_hdr**; this + * contains the capture length, actual packet length and + * the starting protocol. + * + * The max number of bytes of context to store is specified via + * *size*. + * + * The flags value can be used to specify an id value of up + * to 48 bits; the id can be used to correlate captured packets + * with other trace data, since the passed-in flags value is stored + * stored in the **struct bpf_pcap_hdr** in the **flags** field. + * + * The *protocol* value specifies the protocol type of the start + * of the packet so that packet capture can carry out + * interpretation. See **pcap-linktype** (7) for details on + * the supported values. + * + * Return + * 0 on success, or a negative error in case of failure. + * -ENOENT will be returned if the associated perf event + * map entry is empty, or the skb is zero-length. + * -EINVAL will be returned if the flags value is invalid. + * */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2862,7 +2895,8 @@ struct bpf_stack_build_id { FN(sk_storage_get), \ FN(sk_storage_delete), \ FN(send_signal), \ - FN(tcp_gen_syncookie), + FN(tcp_gen_syncookie), \ + FN(pcap), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -2941,6 +2975,9 @@ enum bpf_func_id { /* BPF_FUNC_sk_storage_get flags */ #define BPF_SK_STORAGE_GET_F_CREATE (1ULL << 0) +/* BPF_FUNC_pcap flags */ +#define BPF_F_PCAP_ID_MASK 0xffffffffffff + /* Mode for BPF_FUNC_skb_adjust_room helper. */ enum bpf_adj_room_mode { BPF_ADJ_ROOM_NET, @@ -3613,4 +3650,40 @@ struct bpf_sockopt { __s32 retval; }; +/* bpf_pcap_hdr contains information related to a particular packet capture + * flow. It specifies + * + * - a magic number BPF_PCAP_MAGIC which identifies the perf event as + * a pcap-related event. + * - a starting protocol is the protocol associated with the header + * - a flags value, copied from the flags value passed into bpf_pcap(). + * IDs can be used to correlate packet capture data and other tracing data. + * + * bpf_pcap_hdr also contains the information relating to the to-be-captured + * packet, and closely corresponds to the struct pcap_pkthdr used by + * pcap_dump (3PCAP). The bpf_pcap helper sets ktime_ns (nanoseconds since + * boot) to the ktime_ns value; to get sensible pcap times this value should + * be converted to a struct timeval time since epoch in the struct pcap_pkthdr. + * + * When bpf_pcap() is used, a "struct bpf_pcap_hdr" is stored as we + * need both information about the particular packet and the protocol + * we are capturing. + */ + +#define BPF_PCAP_MAGIC 0xb7fca7 + +struct bpf_pcap_hdr { + __u32 magic; + int protocol; + __u64 flags; + __u64 ktime_ns; + __u32 tot_len; + __u32 cap_len; + __u8 data[0]; +}; + +#define BPF_PCAP_TYPE_UNSET -1 +#define BPF_PCAP_TYPE_ETH 1 +#define BPF_PCAP_TYPE_IP 12 + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 3fb5075..a33ed24 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3440,7 +3440,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, case BPF_MAP_TYPE_PERF_EVENT_ARRAY: if (func_id != BPF_FUNC_perf_event_read && func_id != BPF_FUNC_perf_event_output && - func_id != BPF_FUNC_perf_event_read_value) + func_id != BPF_FUNC_perf_event_read_value && + func_id != BPF_FUNC_pcap) goto error; break; case BPF_MAP_TYPE_STACK_TRACE: @@ -3527,6 +3528,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, case BPF_FUNC_perf_event_read: case BPF_FUNC_perf_event_output: case BPF_FUNC_perf_event_read_value: + case BPF_FUNC_pcap: if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) goto error; break; diff --git a/net/core/filter.c b/net/core/filter.c index ed65636..e0e23ee 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4158,6 +4158,35 @@ static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff, .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; +BPF_CALL_5(bpf_xdp_pcap, struct xdp_buff *, xdp, u32, size, + struct bpf_map *, map, int, protocol, u64, flags) +{ + unsigned long len = (unsigned long)(xdp->data_end - xdp->data); + struct bpf_pcap_hdr pcap; + int ret; + + if (unlikely(flags & ~BPF_F_PCAP_ID_MASK)) + return -EINVAL; + + ret = bpf_pcap_prepare(protocol, size, len, flags, &pcap); + if (ret) + return ret; + + return bpf_event_output(map, BPF_F_CURRENT_CPU, &pcap, sizeof(pcap), + xdp->data, pcap.cap_len, bpf_xdp_copy); +} + +static const struct bpf_func_proto bpf_xdp_pcap_proto = { + .func = bpf_xdp_pcap, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_CONST_MAP_PTR, + .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_ANYTHING, +}; + BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb) { return skb->sk ? sock_gen_cookie(skb->sk) : 0; @@ -5926,6 +5955,34 @@ u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type, #endif /* CONFIG_INET */ +BPF_CALL_5(bpf_skb_pcap, struct sk_buff *, skb, u32, size, + struct bpf_map *, map, int, protocol, u64, flags) +{ + struct bpf_pcap_hdr pcap; + int ret; + + if (unlikely(flags & ~BPF_F_PCAP_ID_MASK)) + return -EINVAL; + + ret = bpf_pcap_prepare(protocol, size, skb->len, flags, &pcap); + if (ret) + return ret; + + return bpf_event_output(map, BPF_F_CURRENT_CPU, &pcap, sizeof(pcap), + skb, pcap.cap_len, bpf_skb_copy); +} + +static const struct bpf_func_proto bpf_skb_pcap_proto = { + .func = bpf_skb_pcap, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_CONST_MAP_PTR, + .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_ANYTHING, +}; + bool bpf_helper_changes_pkt_data(void *func) { if (func == bpf_skb_vlan_push || @@ -6075,6 +6132,8 @@ bool bpf_helper_changes_pkt_data(void *func) return &bpf_get_socket_uid_proto; case BPF_FUNC_perf_event_output: return &bpf_skb_event_output_proto; + case BPF_FUNC_pcap: + return &bpf_skb_pcap_proto; default: return bpf_base_func_proto(func_id); } @@ -6216,6 +6275,8 @@ bool bpf_helper_changes_pkt_data(void *func) case BPF_FUNC_tcp_gen_syncookie: return &bpf_tcp_gen_syncookie_proto; #endif + case BPF_FUNC_pcap: + return &bpf_skb_pcap_proto; default: return bpf_base_func_proto(func_id); } @@ -6256,6 +6317,8 @@ bool bpf_helper_changes_pkt_data(void *func) return &bpf_tcp_check_syncookie_proto; case BPF_FUNC_tcp_gen_syncookie: return &bpf_tcp_gen_syncookie_proto; + case BPF_FUNC_pcap: + return &bpf_xdp_pcap_proto; #endif default: return bpf_base_func_proto(func_id); @@ -6361,6 +6424,8 @@ bool bpf_helper_changes_pkt_data(void *func) case BPF_FUNC_skc_lookup_tcp: return &bpf_skc_lookup_tcp_proto; #endif + case BPF_FUNC_pcap: + return &bpf_skb_pcap_proto; default: return bpf_base_func_proto(func_id); } @@ -6399,6 +6464,8 @@ bool bpf_helper_changes_pkt_data(void *func) return &bpf_get_smp_processor_id_proto; case BPF_FUNC_skb_under_cgroup: return &bpf_skb_under_cgroup_proto; + case BPF_FUNC_pcap: + return &bpf_skb_pcap_proto; default: return bpf_base_func_proto(func_id); } -- 1.8.3.1