Björn Töpel <bjorn.topel@xxxxxxxxx> writes: > From: Björn Töpel <bjorn.topel@xxxxxxxxx> > > The XDP_REDIRECT implementations for maps and non-maps are fairly > similar, but obviously need to take different code paths depending on > if the target is using a map or not. Today, the redirect targets for > XDP either uses a map, or is based on ifindex. > > Here, an explicit redirect type is added to bpf_redirect_info, instead > of the actual map. Redirect type, map item/ifindex, and the map_id (if > any) is passed to xdp_do_redirect(). > > In addition to making the code easier to follow, using an explicit > type in bpf_redirect_info has a slight positive performance impact by > avoiding a pointer indirection for the map type lookup, and instead > use the cacheline for bpf_redirect_info. > > Since the actual map is not passed via bpf_redirect_info anymore, the > map lookup is only done in the BPF helper. This means that the > bpf_clear_redirect_map() function can be removed. The actual map item > is RCU protected. > > The bpf_redirect_info flags member is not used by XDP, and not > read/written any more. The map member is only written to when > required/used, and not unconditionally. > > rfc->v1: Use map_id, and remove bpf_clear_redirect_map(). (Toke) > > Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@xxxxxxxxx> > Signed-off-by: Björn Töpel <bjorn.topel@xxxxxxxxx> Very cool! Also a small nit below, but otherwise: Acked-by: Toke Høiland-Jørgensen <toke@xxxxxxxxxx> > --- > include/linux/filter.h | 11 ++- > include/trace/events/xdp.h | 66 +++++++++------ > kernel/bpf/cpumap.c | 1 - > kernel/bpf/devmap.c | 1 - > net/core/filter.c | 162 ++++++++++++++++--------------------- > net/xdp/xskmap.c | 1 - > 6 files changed, 121 insertions(+), 121 deletions(-) > > diff --git a/include/linux/filter.h b/include/linux/filter.h > index 1dedcf66b694..1f3cf2a1e116 100644 > --- a/include/linux/filter.h > +++ b/include/linux/filter.h > @@ -646,11 +646,20 @@ struct bpf_redirect_info { > u32 flags; > u32 tgt_index; > void *tgt_value; > - struct bpf_map *map; > + u32 map_id; > + u32 tgt_type; > u32 kern_flags; > struct bpf_nh_params nh; > }; > > +enum xdp_redirect_type { > + XDP_REDIR_UNSET, > + XDP_REDIR_DEV_IFINDEX, > + XDP_REDIR_DEV_MAP, > + XDP_REDIR_CPU_MAP, > + XDP_REDIR_XSK_MAP, > +}; > + > DECLARE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info); > > /* flags for bpf_redirect_info kern_flags */ > diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h > index 76a97176ab81..538321735447 100644 > --- a/include/trace/events/xdp.h > +++ b/include/trace/events/xdp.h > @@ -86,19 +86,15 @@ struct _bpf_dtab_netdev { > }; > #endif /* __DEVMAP_OBJ_TYPE */ > > -#define devmap_ifindex(tgt, map) \ > - (((map->map_type == BPF_MAP_TYPE_DEVMAP || \ > - map->map_type == BPF_MAP_TYPE_DEVMAP_HASH)) ? \ > - ((struct _bpf_dtab_netdev *)tgt)->dev->ifindex : 0) > - > DECLARE_EVENT_CLASS(xdp_redirect_template, > > TP_PROTO(const struct net_device *dev, > const struct bpf_prog *xdp, > const void *tgt, int err, > - const struct bpf_map *map, u32 index), > + enum xdp_redirect_type type, > + const struct bpf_redirect_info *ri), > > - TP_ARGS(dev, xdp, tgt, err, map, index), > + TP_ARGS(dev, xdp, tgt, err, type, ri), > > TP_STRUCT__entry( > __field(int, prog_id) > @@ -111,14 +107,30 @@ DECLARE_EVENT_CLASS(xdp_redirect_template, > ), > > TP_fast_assign( > + u32 ifindex = 0, map_id = 0, index = ri->tgt_index; > + > + switch (type) { > + case XDP_REDIR_DEV_MAP: > + ifindex = ((struct _bpf_dtab_netdev *)tgt)->dev->ifindex; > + fallthrough; > + case XDP_REDIR_CPU_MAP: > + case XDP_REDIR_XSK_MAP: > + map_id = ri->map_id; > + break; > + case XDP_REDIR_DEV_IFINDEX: > + ifindex = (u32)(long)tgt; > + break; > + default: > + break; > + } > + > __entry->prog_id = xdp->aux->id; > __entry->act = XDP_REDIRECT; > __entry->ifindex = dev->ifindex; > __entry->err = err; > - __entry->to_ifindex = map ? devmap_ifindex(tgt, map) : > - index; > - __entry->map_id = map ? map->id : 0; > - __entry->map_index = map ? index : 0; > + __entry->to_ifindex = ifindex; > + __entry->map_id = map_id; > + __entry->map_index = index; > ), > > TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d" > @@ -133,45 +145,49 @@ DEFINE_EVENT(xdp_redirect_template, xdp_redirect, > TP_PROTO(const struct net_device *dev, > const struct bpf_prog *xdp, > const void *tgt, int err, > - const struct bpf_map *map, u32 index), > - TP_ARGS(dev, xdp, tgt, err, map, index) > + enum xdp_redirect_type type, > + const struct bpf_redirect_info *ri), > + TP_ARGS(dev, xdp, tgt, err, type, ri) > ); > > DEFINE_EVENT(xdp_redirect_template, xdp_redirect_err, > TP_PROTO(const struct net_device *dev, > const struct bpf_prog *xdp, > const void *tgt, int err, > - const struct bpf_map *map, u32 index), > - TP_ARGS(dev, xdp, tgt, err, map, index) > + enum xdp_redirect_type type, > + const struct bpf_redirect_info *ri), > + TP_ARGS(dev, xdp, tgt, err, type, ri) > ); > > #define _trace_xdp_redirect(dev, xdp, to) \ > - trace_xdp_redirect(dev, xdp, NULL, 0, NULL, to) > + trace_xdp_redirect(dev, xdp, NULL, 0, XDP_REDIR_DEV_IFINDEX, NULL) > > #define _trace_xdp_redirect_err(dev, xdp, to, err) \ > - trace_xdp_redirect_err(dev, xdp, NULL, err, NULL, to) > + trace_xdp_redirect_err(dev, xdp, NULL, err, XDP_REDIR_DEV_IFINDEX, NULL) > > -#define _trace_xdp_redirect_map(dev, xdp, to, map, index) \ > - trace_xdp_redirect(dev, xdp, to, 0, map, index) > +#define _trace_xdp_redirect_map(dev, xdp, to, type, ri) \ > + trace_xdp_redirect(dev, xdp, to, 0, type, ri) > > -#define _trace_xdp_redirect_map_err(dev, xdp, to, map, index, err) \ > - trace_xdp_redirect_err(dev, xdp, to, err, map, index) > +#define _trace_xdp_redirect_map_err(dev, xdp, to, type, ri, err) \ > + trace_xdp_redirect_err(dev, xdp, to, err, type, ri) > > /* not used anymore, but kept around so as not to break old programs */ > DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map, > TP_PROTO(const struct net_device *dev, > const struct bpf_prog *xdp, > const void *tgt, int err, > - const struct bpf_map *map, u32 index), > - TP_ARGS(dev, xdp, tgt, err, map, index) > + enum xdp_redirect_type type, > + const struct bpf_redirect_info *ri), > + TP_ARGS(dev, xdp, tgt, err, type, ri) > ); > > DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map_err, > TP_PROTO(const struct net_device *dev, > const struct bpf_prog *xdp, > const void *tgt, int err, > - const struct bpf_map *map, u32 index), > - TP_ARGS(dev, xdp, tgt, err, map, index) > + enum xdp_redirect_type type, > + const struct bpf_redirect_info *ri), > + TP_ARGS(dev, xdp, tgt, err, type, ri) > ); > > TRACE_EVENT(xdp_cpumap_kthread, > diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c > index a4d2cb93cd69..b7f4d22f5c8d 100644 > --- a/kernel/bpf/cpumap.c > +++ b/kernel/bpf/cpumap.c > @@ -543,7 +543,6 @@ static void cpu_map_free(struct bpf_map *map) > * complete. > */ > > - bpf_clear_redirect_map(map); > synchronize_rcu(); > > /* For cpu_map the remote CPUs can still be using the entries > diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c > index 37ac4cde9713..b5681a98020d 100644 > --- a/kernel/bpf/devmap.c > +++ b/kernel/bpf/devmap.c > @@ -197,7 +197,6 @@ static void dev_map_free(struct bpf_map *map) > list_del_rcu(&dtab->list); > spin_unlock(&dev_map_lock); > > - bpf_clear_redirect_map(map); > synchronize_rcu(); > > /* Make sure prior __dev_map_entry_free() have completed. */ > diff --git a/net/core/filter.c b/net/core/filter.c > index fd64d768e16a..56074b88d7e2 100644 > --- a/net/core/filter.c > +++ b/net/core/filter.c > @@ -3919,23 +3919,6 @@ static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = { > .arg2_type = ARG_ANYTHING, > }; > > -static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd, > - struct bpf_map *map, struct xdp_buff *xdp) > -{ > - switch (map->map_type) { > - case BPF_MAP_TYPE_DEVMAP: > - case BPF_MAP_TYPE_DEVMAP_HASH: > - return dev_map_enqueue(fwd, xdp, dev_rx); > - case BPF_MAP_TYPE_CPUMAP: > - return cpu_map_enqueue(fwd, xdp, dev_rx); > - case BPF_MAP_TYPE_XSKMAP: > - return __xsk_map_redirect(fwd, xdp); > - default: > - return -EBADRQC; > - } > - return 0; > -} > - > void xdp_do_flush(void) > { > __dev_flush(); > @@ -3944,55 +3927,45 @@ void xdp_do_flush(void) > } > EXPORT_SYMBOL_GPL(xdp_do_flush); > > -void bpf_clear_redirect_map(struct bpf_map *map) > -{ > - struct bpf_redirect_info *ri; > - int cpu; > - > - for_each_possible_cpu(cpu) { > - ri = per_cpu_ptr(&bpf_redirect_info, cpu); > - /* Avoid polluting remote cacheline due to writes if > - * not needed. Once we pass this test, we need the > - * cmpxchg() to make sure it hasn't been changed in > - * the meantime by remote CPU. > - */ > - if (unlikely(READ_ONCE(ri->map) == map)) > - cmpxchg(&ri->map, map, NULL); > - } > -} > - > int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, > struct bpf_prog *xdp_prog) > { > struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); > - struct bpf_map *map = READ_ONCE(ri->map); > - u32 index = ri->tgt_index; > + enum xdp_redirect_type type = ri->tgt_type; > void *fwd = ri->tgt_value; > int err; > > - ri->tgt_index = 0; > - ri->tgt_value = NULL; > - WRITE_ONCE(ri->map, NULL); > + ri->tgt_type = XDP_REDIR_UNSET; > > - if (unlikely(!map)) { > - fwd = dev_get_by_index_rcu(dev_net(dev), index); > + switch (type) { > + case XDP_REDIR_DEV_IFINDEX: > + fwd = dev_get_by_index_rcu(dev_net(dev), (u32)(long)fwd); > if (unlikely(!fwd)) { > err = -EINVAL; > - goto err; > + break; > } > - > err = dev_xdp_enqueue(fwd, xdp, dev); > - } else { > - err = __bpf_tx_xdp_map(dev, fwd, map, xdp); > + break; > + case XDP_REDIR_DEV_MAP: > + err = dev_map_enqueue(fwd, xdp, dev); > + break; > + case XDP_REDIR_CPU_MAP: > + err = cpu_map_enqueue(fwd, xdp, dev); > + break; > + case XDP_REDIR_XSK_MAP: > + err = __xsk_map_redirect(fwd, xdp); > + break; > + default: > + err = -EBADRQC; > } > > if (unlikely(err)) > goto err; > > - _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index); > + _trace_xdp_redirect_map(dev, xdp_prog, fwd, type, ri); > return 0; > err: > - _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err); > + _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, type, ri, err); > return err; > } > EXPORT_SYMBOL_GPL(xdp_do_redirect); > @@ -4001,41 +3974,40 @@ static int xdp_do_generic_redirect_map(struct net_device *dev, > struct sk_buff *skb, > struct xdp_buff *xdp, > struct bpf_prog *xdp_prog, > - struct bpf_map *map) > + void *fwd, > + enum xdp_redirect_type type) > { > struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); > - u32 index = ri->tgt_index; > - void *fwd = ri->tgt_value; > - int err = 0; > - > - ri->tgt_index = 0; > - ri->tgt_value = NULL; > - WRITE_ONCE(ri->map, NULL); > + int err; > > - if (map->map_type == BPF_MAP_TYPE_DEVMAP || > - map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) { > + switch (type) { > + case XDP_REDIR_DEV_MAP: { > struct bpf_dtab_netdev *dst = fwd; I thought the braces around the case body looked a bit odd. I guess that's to get a local scope for the dst var (and xs var below), right? This is basically a cast, though, so I wonder if you couldn't just as well use the fwd pointer directly (with a cast) in the function call below? WDYT? (Strictly speaking I don't think the compiler will even complain if you omit the cast as well, but having it in there is nice fore readability I think, and guards against someone forgetting to update the call if the function prototype changes). > err = dev_map_generic_redirect(dst, skb, xdp_prog); > if (unlikely(err)) > goto err; > - } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) { > + break; > + } > + case XDP_REDIR_XSK_MAP: { > struct xdp_sock *xs = fwd; > > err = xsk_generic_rcv(xs, xdp); > if (err) > goto err; > consume_skb(skb); > - } else { > + break; > + } > + default: > /* TODO: Handle BPF_MAP_TYPE_CPUMAP */ > err = -EBADRQC; > goto err; > } > > - _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index); > + _trace_xdp_redirect_map(dev, xdp_prog, fwd, type, ri); > return 0; > err: > - _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err); > + _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, type, ri, err); > return err; > } > > @@ -4043,29 +4015,31 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, > struct xdp_buff *xdp, struct bpf_prog *xdp_prog) > { > struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); > - struct bpf_map *map = READ_ONCE(ri->map); > - u32 index = ri->tgt_index; > - struct net_device *fwd; > + enum xdp_redirect_type type = ri->tgt_type; > + void *fwd = ri->tgt_value; > int err = 0; > > - if (map) > - return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, > - map); > - ri->tgt_index = 0; > - fwd = dev_get_by_index_rcu(dev_net(dev), index); > - if (unlikely(!fwd)) { > - err = -EINVAL; > - goto err; > - } > + ri->tgt_type = XDP_REDIR_UNSET; > + ri->tgt_value = NULL; > > - err = xdp_ok_fwd_dev(fwd, skb->len); > - if (unlikely(err)) > - goto err; > + if (type == XDP_REDIR_DEV_IFINDEX) { > + fwd = dev_get_by_index_rcu(dev_net(dev), (u32)(long)fwd); > + if (unlikely(!fwd)) { > + err = -EINVAL; > + goto err; > + } > > - skb->dev = fwd; > - _trace_xdp_redirect(dev, xdp_prog, index); > - generic_xdp_tx(skb, xdp_prog); > - return 0; > + err = xdp_ok_fwd_dev(fwd, skb->len); > + if (unlikely(err)) > + goto err; > + > + skb->dev = fwd; > + _trace_xdp_redirect(dev, xdp_prog, index); > + generic_xdp_tx(skb, xdp_prog); > + return 0; > + } > + > + return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, fwd, type); > err: > _trace_xdp_redirect_err(dev, xdp_prog, index, err); > return err; > @@ -4078,10 +4052,9 @@ BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags) > if (unlikely(flags)) > return XDP_ABORTED; > > - ri->flags = flags; > - ri->tgt_index = ifindex; > - ri->tgt_value = NULL; > - WRITE_ONCE(ri->map, NULL); > + ri->tgt_type = XDP_REDIR_DEV_IFINDEX; > + ri->tgt_index = 0; > + ri->tgt_value = (void *)(long)ifindex; > > return XDP_REDIRECT; > } > @@ -4096,7 +4069,8 @@ static const struct bpf_func_proto bpf_xdp_redirect_proto = { > > static __always_inline s64 __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifindex, u64 flags, > void *lookup_elem(struct bpf_map *map, > - u32 key)) > + u32 key), > + enum xdp_redirect_type type) > { > struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); > > @@ -4105,35 +4079,39 @@ static __always_inline s64 __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifind > > ri->tgt_value = lookup_elem(map, ifindex); > if (unlikely(!ri->tgt_value)) { > - WRITE_ONCE(ri->map, NULL); > + ri->tgt_type = XDP_REDIR_UNSET; > return flags; > } > > - ri->flags = flags; > ri->tgt_index = ifindex; > - WRITE_ONCE(ri->map, map); > + ri->tgt_type = type; > + ri->map_id = map->id; > > return XDP_REDIRECT; > } > > BPF_CALL_3(bpf_xdp_redirect_devmap, struct bpf_map *, map, u32, ifindex, u64, flags) > { > - return __bpf_xdp_redirect_map(map, ifindex, flags, __dev_map_lookup_elem); > + return __bpf_xdp_redirect_map(map, ifindex, flags, __dev_map_lookup_elem, > + XDP_REDIR_DEV_MAP); > } > > BPF_CALL_3(bpf_xdp_redirect_devmap_hash, struct bpf_map *, map, u32, ifindex, u64, flags) > { > - return __bpf_xdp_redirect_map(map, ifindex, flags, __dev_map_hash_lookup_elem); > + return __bpf_xdp_redirect_map(map, ifindex, flags, __dev_map_hash_lookup_elem, > + XDP_REDIR_DEV_MAP); > } > > BPF_CALL_3(bpf_xdp_redirect_cpumap, struct bpf_map *, map, u32, ifindex, u64, flags) > { > - return __bpf_xdp_redirect_map(map, ifindex, flags, __cpu_map_lookup_elem); > + return __bpf_xdp_redirect_map(map, ifindex, flags, __cpu_map_lookup_elem, > + XDP_REDIR_CPU_MAP); > } > > BPF_CALL_3(bpf_xdp_redirect_xskmap, struct bpf_map *, map, u32, ifindex, u64, flags) > { > - return __bpf_xdp_redirect_map(map, ifindex, flags, __xsk_map_lookup_elem); > + return __bpf_xdp_redirect_map(map, ifindex, flags, __xsk_map_lookup_elem, > + XDP_REDIR_XSK_MAP); > } > > bpf_func_proto_func get_xdp_redirect_func(enum bpf_map_type map_type) > diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c > index 113fd9017203..c285d3dd04ad 100644 > --- a/net/xdp/xskmap.c > +++ b/net/xdp/xskmap.c > @@ -87,7 +87,6 @@ static void xsk_map_free(struct bpf_map *map) > { > struct xsk_map *m = container_of(map, struct xsk_map, map); > > - bpf_clear_redirect_map(map); > synchronize_net(); > bpf_map_area_free(m); > } > -- > 2.27.0