> From: Florian Westphal <fw@xxxxxxxxx> > > This adds a small internal mapping table so that a new bpf (xdp) kfunc > can perform lookups in a flowtable. > > As-is, xdp program has access to the device pointer, but no way to do a > lookup in a flowtable -- there is no way to obtain the needed struct > without questionable stunts. > > This allows to obtain an nf_flowtable pointer given a net_device > structure. > > In order to keep backward compatibility, the infrastructure allows the > user to add a given device to multiple flowtables, but it will always > return the first added mapping performing the lookup since it assumes > the right configuration is 1:1 mapping between flowtables and net_devices. Hi Pablo, do you have any feedback about nft part? Thanks. Regards, Lorenzo > > Signed-off-by: Florian Westphal <fw@xxxxxxxxx> > Co-developed-by: Lorenzo Bianconi <lorenzo@xxxxxxxxxx> > Signed-off-by: Lorenzo Bianconi <lorenzo@xxxxxxxxxx> > --- > include/net/netfilter/nf_flow_table.h | 8 ++ > net/netfilter/Makefile | 2 +- > net/netfilter/nf_flow_table_offload.c | 6 +- > net/netfilter/nf_flow_table_xdp.c | 163 ++++++++++++++++++++++++++ > 4 files changed, 176 insertions(+), 3 deletions(-) > create mode 100644 net/netfilter/nf_flow_table_xdp.c > > diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h > index 9abb7ee40d72f..688e02b287cc4 100644 > --- a/include/net/netfilter/nf_flow_table.h > +++ b/include/net/netfilter/nf_flow_table.h > @@ -305,6 +305,14 @@ struct flow_ports { > __be16 source, dest; > }; > > +struct nf_flowtable *nf_flowtable_by_dev(const struct net_device *dev); > +int nf_flow_offload_xdp_setup(struct nf_flowtable *flowtable, > + struct net_device *dev, > + enum flow_block_command cmd); > +void nf_flow_offload_xdp_cancel(struct nf_flowtable *flowtable, > + struct net_device *dev, > + enum flow_block_command cmd); > + > unsigned int nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, > const struct nf_hook_state *state); > unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, > diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile > index 614815a3ed738..18046872a38aa 100644 > --- a/net/netfilter/Makefile > +++ b/net/netfilter/Makefile > @@ -142,7 +142,7 @@ obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_netdev.o > # flow table infrastructure > obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o > nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o \ > - nf_flow_table_offload.o > + nf_flow_table_offload.o nf_flow_table_xdp.o > nf_flow_table-$(CONFIG_NF_FLOW_TABLE_PROCFS) += nf_flow_table_procfs.o > > obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o > diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c > index a010b25076ca0..d9b019c98694b 100644 > --- a/net/netfilter/nf_flow_table_offload.c > +++ b/net/netfilter/nf_flow_table_offload.c > @@ -1192,7 +1192,7 @@ int nf_flow_table_offload_setup(struct nf_flowtable *flowtable, > int err; > > if (!nf_flowtable_hw_offload(flowtable)) > - return 0; > + return nf_flow_offload_xdp_setup(flowtable, dev, cmd); > > if (dev->netdev_ops->ndo_setup_tc) > err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd, > @@ -1200,8 +1200,10 @@ int nf_flow_table_offload_setup(struct nf_flowtable *flowtable, > else > err = nf_flow_table_indr_offload_cmd(&bo, flowtable, dev, cmd, > &extack); > - if (err < 0) > + if (err < 0) { > + nf_flow_offload_xdp_cancel(flowtable, dev, cmd); > return err; > + } > > return nf_flow_table_block_setup(flowtable, &bo, cmd); > } > diff --git a/net/netfilter/nf_flow_table_xdp.c b/net/netfilter/nf_flow_table_xdp.c > new file mode 100644 > index 0000000000000..b9bdf27ba9bd3 > --- /dev/null > +++ b/net/netfilter/nf_flow_table_xdp.c > @@ -0,0 +1,163 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +#include <linux/kernel.h> > +#include <linux/module.h> > +#include <linux/netfilter.h> > +#include <linux/rhashtable.h> > +#include <linux/netdevice.h> > +#include <net/flow_offload.h> > +#include <net/netfilter/nf_flow_table.h> > + > +struct flow_offload_xdp_ft { > + struct list_head head; > + struct nf_flowtable *ft; > + struct rcu_head rcuhead; > +}; > + > +struct flow_offload_xdp { > + struct hlist_node hnode; > + unsigned long net_device_addr; > + struct list_head head; > +}; > + > +#define NF_XDP_HT_BITS 4 > +static DEFINE_HASHTABLE(nf_xdp_hashtable, NF_XDP_HT_BITS); > +static DEFINE_MUTEX(nf_xdp_hashtable_lock); > + > +/* caller must hold rcu read lock */ > +struct nf_flowtable *nf_flowtable_by_dev(const struct net_device *dev) > +{ > + unsigned long key = (unsigned long)dev; > + struct flow_offload_xdp *iter; > + > + hash_for_each_possible_rcu(nf_xdp_hashtable, iter, hnode, key) { > + if (key == iter->net_device_addr) { > + struct flow_offload_xdp_ft *ft_elem; > + > + /* The user is supposed to insert a given net_device > + * just into a single nf_flowtable so we always return > + * the first element here. > + */ > + ft_elem = list_first_or_null_rcu(&iter->head, > + struct flow_offload_xdp_ft, > + head); > + return ft_elem ? ft_elem->ft : NULL; > + } > + } > + > + return NULL; > +} > + > +static int nf_flowtable_by_dev_insert(struct nf_flowtable *ft, > + const struct net_device *dev) > +{ > + struct flow_offload_xdp *iter, *elem = NULL; > + unsigned long key = (unsigned long)dev; > + struct flow_offload_xdp_ft *ft_elem; > + > + ft_elem = kzalloc(sizeof(*ft_elem), GFP_KERNEL_ACCOUNT); > + if (!ft_elem) > + return -ENOMEM; > + > + ft_elem->ft = ft; > + > + mutex_lock(&nf_xdp_hashtable_lock); > + > + hash_for_each_possible(nf_xdp_hashtable, iter, hnode, key) { > + if (key == iter->net_device_addr) { > + elem = iter; > + break; > + } > + } > + > + if (!elem) { > + elem = kzalloc(sizeof(*elem), GFP_KERNEL_ACCOUNT); > + if (!elem) > + goto err_unlock; > + > + elem->net_device_addr = key; > + INIT_LIST_HEAD(&elem->head); > + hash_add_rcu(nf_xdp_hashtable, &elem->hnode, key); > + } > + list_add_tail_rcu(&ft_elem->head, &elem->head); > + > + mutex_unlock(&nf_xdp_hashtable_lock); > + > + return 0; > + > +err_unlock: > + mutex_unlock(&nf_xdp_hashtable_lock); > + kfree(ft_elem); > + > + return -ENOMEM; > +} > + > +static void nf_flowtable_by_dev_remove(struct nf_flowtable *ft, > + const struct net_device *dev) > +{ > + struct flow_offload_xdp *iter, *elem = NULL; > + unsigned long key = (unsigned long)dev; > + > + mutex_lock(&nf_xdp_hashtable_lock); > + > + hash_for_each_possible(nf_xdp_hashtable, iter, hnode, key) { > + if (key == iter->net_device_addr) { > + elem = iter; > + break; > + } > + } > + > + if (elem) { > + struct flow_offload_xdp_ft *ft_elem, *ft_next; > + > + list_for_each_entry_safe(ft_elem, ft_next, &elem->head, head) { > + if (ft_elem->ft == ft) { > + list_del_rcu(&ft_elem->head); > + kfree_rcu(ft_elem, rcuhead); > + } > + } > + > + if (list_empty(&elem->head)) > + hash_del_rcu(&elem->hnode); > + else > + elem = NULL; > + } > + > + mutex_unlock(&nf_xdp_hashtable_lock); > + > + if (elem) { > + synchronize_rcu(); > + kfree(elem); > + } > +} > + > +int nf_flow_offload_xdp_setup(struct nf_flowtable *flowtable, > + struct net_device *dev, > + enum flow_block_command cmd) > +{ > + switch (cmd) { > + case FLOW_BLOCK_BIND: > + return nf_flowtable_by_dev_insert(flowtable, dev); > + case FLOW_BLOCK_UNBIND: > + nf_flowtable_by_dev_remove(flowtable, dev); > + return 0; > + } > + > + WARN_ON_ONCE(1); > + return 0; > +} > + > +void nf_flow_offload_xdp_cancel(struct nf_flowtable *flowtable, > + struct net_device *dev, > + enum flow_block_command cmd) > +{ > + switch (cmd) { > + case FLOW_BLOCK_BIND: > + nf_flowtable_by_dev_remove(flowtable, dev); > + return; > + case FLOW_BLOCK_UNBIND: > + /* We do not re-bind in case hw offload would report error > + * on *unregister*. > + */ > + break; > + } > +} > -- > 2.45.1 > >
Attachment:
signature.asc
Description: PGP signature