On Wed, Apr 14, 2021 at 08:26:08PM +0800, Hangbin Liu wrote: [ ... ] > +static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifindex, > + u64 flags, u64 flag_mask, > void *lookup_elem(struct bpf_map *map, u32 key)) > { > struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); > > /* Lower bits of the flags are used as return code on lookup failure */ > - if (unlikely(flags > XDP_TX)) > + if (unlikely(flags & ~(BPF_F_ACTION_MASK | flag_mask))) > return XDP_ABORTED; > > ri->tgt_value = lookup_elem(map, ifindex); > - if (unlikely(!ri->tgt_value)) { > + if (unlikely(!ri->tgt_value) && !(flags & BPF_F_BROADCAST)) { > /* If the lookup fails we want to clear out the state in the > * redirect_info struct completely, so that if an eBPF program > * performs multiple lookups, the last one always takes > @@ -1482,13 +1484,21 @@ static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifind > */ > ri->map_id = INT_MAX; /* Valid map id idr range: [1,INT_MAX[ */ > ri->map_type = BPF_MAP_TYPE_UNSPEC; > - return flags; > + return flags & BPF_F_ACTION_MASK; > } > > ri->tgt_index = ifindex; > ri->map_id = map->id; > ri->map_type = map->map_type; > > + if (flags & BPF_F_BROADCAST) { > + WRITE_ONCE(ri->map, map); Why only WRITE_ONCE on ri->map? Is it needed? > + ri->flags = flags; > + } else { > + WRITE_ONCE(ri->map, NULL); > + ri->flags = 0; > + } > + > return XDP_REDIRECT; > } > [ ... ] > +int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx, > + struct bpf_map *map, bool exclude_ingress) > +{ > + struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); > + int exclude_ifindex = exclude_ingress ? dev_rx->ifindex : 0; > + struct bpf_dtab_netdev *dst, *last_dst = NULL; > + struct hlist_head *head; > + struct hlist_node *next; > + struct xdp_frame *xdpf; > + unsigned int i; > + int err; > + > + xdpf = xdp_convert_buff_to_frame(xdp); > + if (unlikely(!xdpf)) > + return -EOVERFLOW; > + > + if (map->map_type == BPF_MAP_TYPE_DEVMAP) { > + for (i = 0; i < map->max_entries; i++) { > + dst = READ_ONCE(dtab->netdev_map[i]); > + if (!is_valid_dst(dst, xdp, exclude_ifindex)) > + continue; > + > + /* we only need n-1 clones; last_dst enqueued below */ > + if (!last_dst) { > + last_dst = dst; > + continue; > + } > + > + err = dev_map_enqueue_clone(last_dst, dev_rx, xdpf); > + if (err) > + return err; > + > + last_dst = dst; > + } > + } else { /* BPF_MAP_TYPE_DEVMAP_HASH */ > + for (i = 0; i < dtab->n_buckets; i++) { > + head = dev_map_index_hash(dtab, i); > + hlist_for_each_entry_safe(dst, next, head, index_hlist) { hmm.... should it be hlist_for_each_entry_rcu() instead? > + if (!is_valid_dst(dst, xdp, exclude_ifindex)) > + continue; > + > + /* we only need n-1 clones; last_dst enqueued below */ > + if (!last_dst) { > + last_dst = dst; > + continue; > + } > + > + err = dev_map_enqueue_clone(last_dst, dev_rx, xdpf); > + if (err) > + return err; > + > + last_dst = dst; > + } > + } > + } > + > + /* consume the last copy of the frame */ > + if (last_dst) > + bq_enqueue(last_dst->dev, xdpf, dev_rx, last_dst->xdp_prog); > + else > + xdp_return_frame_rx_napi(xdpf); /* dtab is empty */ > + > + return 0; > +} > +