Add support for queueing packets before forwarding them to the xdp_fwd sample. This is meant to serve as an example (for the RFC series) of how one could add queueing to a forwarding application. It doesn't actually implement any fancy queueing algorithms, it just uses the queue maps to do simple FIFO queueing, instantiating one queue map per interface. Signed-off-by: Toke Høiland-Jørgensen <toke@xxxxxxxxxx> --- samples/bpf/xdp_fwd_kern.c | 65 +++++++++++- samples/bpf/xdp_fwd_user.c | 200 +++++++++++++++++++++++++++---------- 2 files changed, 205 insertions(+), 60 deletions(-) diff --git a/samples/bpf/xdp_fwd_kern.c b/samples/bpf/xdp_fwd_kern.c index 54c099cbd639..125adb02c658 100644 --- a/samples/bpf/xdp_fwd_kern.c +++ b/samples/bpf/xdp_fwd_kern.c @@ -23,6 +23,14 @@ #define IPV6_FLOWINFO_MASK cpu_to_be32(0x0FFFFFFF) +struct pifo_map { + __uint(type, BPF_MAP_TYPE_PIFO_XDP); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); + __uint(max_entries, 1024); + __uint(map_extra, 8192); /* range */ +} pmap SEC(".maps"); + struct { __uint(type, BPF_MAP_TYPE_DEVMAP); __uint(key_size, sizeof(int)); @@ -30,6 +38,13 @@ struct { __uint(max_entries, 64); } xdp_tx_ports SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(key_size, sizeof(__u32)); + __uint(max_entries, 64); + __array(values, struct pifo_map); +} pifo_maps SEC(".maps"); + /* from include/net/ip.h */ static __always_inline int ip_decrease_ttl(struct iphdr *iph) { @@ -40,7 +55,7 @@ static __always_inline int ip_decrease_ttl(struct iphdr *iph) return --iph->ttl; } -static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags) +static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags, bool queue) { void *data_end = (void *)(long)ctx->data_end; void *data = (void *)(long)ctx->data; @@ -137,22 +152,62 @@ static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags) memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN); memcpy(eth->h_source, fib_params.smac, ETH_ALEN); + + if (queue) { + void *ptr; + int ret; + + ptr = bpf_map_lookup_elem(&pifo_maps, &fib_params.ifindex); + if (!ptr) + return XDP_DROP; + + ret = bpf_redirect_map(ptr, 0, 0); + if (ret == XDP_REDIRECT) + bpf_schedule_iface_dequeue(ctx, fib_params.ifindex, 0); + return ret; + } + return bpf_redirect_map(&xdp_tx_ports, fib_params.ifindex, 0); } return XDP_PASS; } -SEC("xdp_fwd") +SEC("xdp") int xdp_fwd_prog(struct xdp_md *ctx) { - return xdp_fwd_flags(ctx, 0); + return xdp_fwd_flags(ctx, 0, false); } -SEC("xdp_fwd_direct") +SEC("xdp") int xdp_fwd_direct_prog(struct xdp_md *ctx) { - return xdp_fwd_flags(ctx, BPF_FIB_LOOKUP_DIRECT); + return xdp_fwd_flags(ctx, BPF_FIB_LOOKUP_DIRECT, false); +} + +SEC("xdp") +int xdp_fwd_queue(struct xdp_md *ctx) +{ + return xdp_fwd_flags(ctx, 0, true); +} + +SEC("dequeue") +void *xdp_dequeue(struct dequeue_ctx *ctx) +{ + __u32 ifindex = ctx->egress_ifindex; + struct xdp_md *pkt; + __u64 prio = 0; + void *pifo_ptr; + + pifo_ptr = bpf_map_lookup_elem(&pifo_maps, &ifindex); + if (!pifo_ptr) + return NULL; + + pkt = (void *)bpf_packet_dequeue(ctx, pifo_ptr, 0, &prio); + if (!pkt) + return NULL; + + return pkt; } char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/xdp_fwd_user.c b/samples/bpf/xdp_fwd_user.c index 84f57f1209ce..ec3f29d0babe 100644 --- a/samples/bpf/xdp_fwd_user.c +++ b/samples/bpf/xdp_fwd_user.c @@ -11,6 +11,7 @@ * General Public License for more details. */ +#include "linux/if_link.h" #include <linux/bpf.h> #include <linux/if_link.h> #include <linux/limits.h> @@ -29,66 +30,122 @@ static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; -static int do_attach(int idx, int prog_fd, int map_fd, const char *name) +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) + +const char *redir_prog_names[] = { + "xdp_fwd_prog", + "xdp_fwd_direct_", /* name truncated to BPF_OBJ_NAME_LEN */ + "xdp_fwd_queue", +}; + +const char *dequeue_prog_names[] = { + "xdp_dequeue" +}; + +static int do_attach(int idx, int redir_prog_fd, int dequeue_prog_fd, + int redir_map_fd, int pifos_map_fd, const char *name) { int err; - err = bpf_xdp_attach(idx, prog_fd, xdp_flags, NULL); + if (pifos_map_fd > -1) { + LIBBPF_OPTS(bpf_map_create_opts, map_opts, .map_extra = 8192); + char map_name[BPF_OBJ_NAME_LEN]; + int pifo_fd; + + snprintf(map_name, sizeof(map_name), "pifo_%d", idx); + map_name[BPF_OBJ_NAME_LEN - 1] = '\0'; + + pifo_fd = bpf_map_create(BPF_MAP_TYPE_PIFO_XDP, map_name, + sizeof(__u32), sizeof(__u32), 10240, &map_opts); + if (pifo_fd < 0) { + err = -errno; + printf("ERROR: Couldn't create PIFO map: %s\n", strerror(-err)); + return err; + } + + err = bpf_map_update_elem(pifos_map_fd, &idx, &pifo_fd, 0); + if (err) + printf("ERROR: failed adding PIFO map for device %s\n", name); + } + + if (dequeue_prog_fd > -1) { + LIBBPF_OPTS(bpf_xdp_attach_opts, prog_opts, .old_prog_fd = -1); + + err = bpf_xdp_attach(idx, dequeue_prog_fd, + (XDP_FLAGS_DEQUEUE_MODE | XDP_FLAGS_REPLACE), + &prog_opts); + if (err < 0) { + printf("ERROR: failed to attach dequeue program to %s\n", name); + return err; + } + } + + err = bpf_xdp_attach(idx, redir_prog_fd, xdp_flags, NULL); if (err < 0) { - printf("ERROR: failed to attach program to %s\n", name); + printf("ERROR: failed to attach redir program to %s\n", name); return err; } /* Adding ifindex as a possible egress TX port */ - err = bpf_map_update_elem(map_fd, &idx, &idx, 0); + err = bpf_map_update_elem(redir_map_fd, &idx, &idx, 0); if (err) printf("ERROR: failed using device %s as TX-port\n", name); return err; } +static bool should_detach(__u32 prog_fd, const char **prog_names, int num_prog_names) +{ + struct bpf_prog_info prog_info = {}; + __u32 info_len = sizeof(prog_info); + int err, i; + + err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len); + if (err) { + printf("ERROR: bpf_obj_get_info_by_fd failed (%s)\n", + strerror(errno)); + return false; + } + + for (i = 0; i < num_prog_names; i++) + if (!strcmp(prog_info.name, prog_names[i])) + return true; + + return false; +} + static int do_detach(int ifindex, const char *ifname, const char *app_name) { LIBBPF_OPTS(bpf_xdp_attach_opts, opts); - struct bpf_prog_info prog_info = {}; - char prog_name[BPF_OBJ_NAME_LEN]; - __u32 info_len, curr_prog_id; - int prog_fd; - int err = 1; + LIBBPF_OPTS(bpf_xdp_query_opts, query_opts); + int prog_fd, err = 1; + __u32 curr_prog_id; - if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) { + if (bpf_xdp_query(ifindex, xdp_flags, &query_opts)) { printf("ERROR: bpf_xdp_query_id failed (%s)\n", strerror(errno)); return err; } + curr_prog_id = (xdp_flags & XDP_FLAGS_SKB_MODE) ? query_opts.skb_prog_id + : query_opts.drv_prog_id; if (!curr_prog_id) { printf("ERROR: flags(0x%x) xdp prog is not attached to %s\n", xdp_flags, ifname); return err; } - info_len = sizeof(prog_info); prog_fd = bpf_prog_get_fd_by_id(curr_prog_id); if (prog_fd < 0) { printf("ERROR: bpf_prog_get_fd_by_id failed (%s)\n", strerror(errno)); - return prog_fd; - } - - err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len); - if (err) { - printf("ERROR: bpf_obj_get_info_by_fd failed (%s)\n", - strerror(errno)); - goto close_out; + return err; } - snprintf(prog_name, sizeof(prog_name), "%s_prog", app_name); - prog_name[BPF_OBJ_NAME_LEN - 1] = '\0'; - if (strcmp(prog_info.name, prog_name)) { + if (!should_detach(prog_fd, redir_prog_names, ARRAY_SIZE(redir_prog_names))) { printf("ERROR: %s isn't attached to %s\n", app_name, ifname); - err = 1; - goto close_out; + close(prog_fd); + return 1; } opts.old_prog_fd = prog_fd; @@ -96,11 +153,34 @@ static int do_detach(int ifindex, const char *ifname, const char *app_name) if (err < 0) printf("ERROR: failed to detach program from %s (%s)\n", ifname, strerror(errno)); - /* TODO: Remember to cleanup map, when adding use of shared map + + close(prog_fd); + + if (query_opts.dequeue_prog_id) { + prog_fd = bpf_prog_get_fd_by_id(query_opts.dequeue_prog_id); + if (prog_fd < 0) { + printf("ERROR: bpf_prog_get_fd_by_id failed (%s)\n", + strerror(errno)); + return err; + } + + if (!should_detach(prog_fd, dequeue_prog_names, ARRAY_SIZE(dequeue_prog_names))) { + close(prog_fd); + return err; + } + + opts.old_prog_fd = prog_fd; + err = bpf_xdp_detach(ifindex, + (XDP_FLAGS_DEQUEUE_MODE | XDP_FLAGS_REPLACE), + &opts); + if (err < 0) + printf("ERROR: failed to detach dequeue program from %s (%s)\n", + ifname, strerror(errno)); + } + + /* todo: Remember to cleanup map, when adding use of shared map * bpf_map_delete_elem((map_fd, &idx); */ -close_out: - close(prog_fd); return err; } @@ -112,24 +192,23 @@ static void usage(const char *prog) " -d detach program\n" " -S use skb-mode\n" " -F force loading prog\n" - " -D direct table lookups (skip fib rules)\n", + " -D direct table lookups (skip fib rules)\n" + " -Q direct table lookups (skip fib rules)\n", prog); } int main(int argc, char **argv) { - const char *prog_name = "xdp_fwd"; - struct bpf_program *prog = NULL; - struct bpf_program *pos; - const char *sec_name; - int prog_fd = -1, map_fd = -1; + int redir_prog_fd = -1, dequeue_prog_fd = -1, redir_map_fd = -1, pifos_map_fd = -1; + const char *prog_name = "xdp_fwd_prog"; char filename[PATH_MAX]; struct bpf_object *obj; int opt, i, idx, err; + bool queue = false; int attach = 1; int ret = 0; - while ((opt = getopt(argc, argv, ":dDSF")) != -1) { + while ((opt = getopt(argc, argv, ":dDQSF")) != -1) { switch (opt) { case 'd': attach = 0; @@ -141,7 +220,11 @@ int main(int argc, char **argv) xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; break; case 'D': - prog_name = "xdp_fwd_direct"; + prog_name = "xdp_fwd_direct_prog"; + break; + case 'Q': + prog_name = "xdp_fwd_queue"; + queue = true; break; default: usage(basename(argv[0])); @@ -170,9 +253,6 @@ int main(int argc, char **argv) if (libbpf_get_error(obj)) return 1; - prog = bpf_object__next_program(obj, NULL); - bpf_program__set_type(prog, BPF_PROG_TYPE_XDP); - err = bpf_object__load(obj); if (err) { printf("Does kernel support devmap lookup?\n"); @@ -181,25 +261,34 @@ int main(int argc, char **argv) */ return 1; } - - bpf_object__for_each_program(pos, obj) { - sec_name = bpf_program__section_name(pos); - if (sec_name && !strcmp(sec_name, prog_name)) { - prog = pos; - break; - } - } - prog_fd = bpf_program__fd(prog); - if (prog_fd < 0) { - printf("program not found: %s\n", strerror(prog_fd)); + redir_prog_fd = bpf_program__fd(bpf_object__find_program_by_name(obj, + prog_name)); + if (redir_prog_fd < 0) { + printf("program not found: %s\n", strerror(redir_prog_fd)); return 1; } - map_fd = bpf_map__fd(bpf_object__find_map_by_name(obj, - "xdp_tx_ports")); - if (map_fd < 0) { - printf("map not found: %s\n", strerror(map_fd)); + + redir_map_fd = bpf_map__fd(bpf_object__find_map_by_name(obj, + "xdp_tx_ports")); + if (redir_map_fd < 0) { + printf("map not found: %s\n", strerror(redir_map_fd)); return 1; } + + if (queue) { + dequeue_prog_fd = bpf_program__fd(bpf_object__find_program_by_name(obj, + "xdp_dequeue")); + if (dequeue_prog_fd < 0) { + printf("dequeue program not found: %s\n", + strerror(-dequeue_prog_fd)); + return 1; + } + pifos_map_fd = bpf_map__fd(bpf_object__find_map_by_name(obj, "pifo_maps")); + if (pifos_map_fd < 0) { + printf("map not found: %s\n", strerror(-pifos_map_fd)); + return 1; + } + } } for (i = optind; i < argc; ++i) { @@ -212,11 +301,12 @@ int main(int argc, char **argv) return 1; } if (!attach) { - err = do_detach(idx, argv[i], prog_name); + err = do_detach(idx, argv[i], argv[0]); if (err) ret = err; } else { - err = do_attach(idx, prog_fd, map_fd, argv[i]); + err = do_attach(idx, redir_prog_fd, dequeue_prog_fd, + redir_map_fd, pifos_map_fd, argv[i]); if (err) ret = err; } -- 2.37.0