This adds a small internal mapping table so that a new bpf (xdp) kfunc can perform lookups in a flowtable. As-is, xdp program has access to the device pointer, but no way to do a lookup in a flowtable -- there is no way to obtain the needed struct without questionable stunts. This allows to obtain an nf_flowtable pointer given a net_device structure. A device cannot be added to multiple flowtables, the mapping needs to be unique. This is enforced when a flowtables with the NF_FLOWTABLE_XDP_OFFLOAD was added. Exposure of this NF_FLOWTABLE_XDP_OFFLOAD in UAPI could be avoided, iff the 'net_device maps to 0 or 1 flowtable' paradigm is enforced regardless of offload-or-not flag. HOWEVER, that does break existing behaviour. An alternative would be to repurpose the hw offload flag by allowing XDP fallback when hw offload cannot be done due to lack of ndo callbacks. Signed-off-by: Florian Westphal <fw@xxxxxxxxx> --- include/net/netfilter/nf_flow_table.h | 7 ++ net/netfilter/nf_flow_table_offload.c | 131 +++++++++++++++++++++++++- net/netfilter/nf_tables_api.c | 3 +- 3 files changed, 139 insertions(+), 2 deletions(-) diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index 11985d9b8370..b8b7fcb98732 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -93,6 +93,11 @@ static inline bool nf_flowtable_hw_offload(struct nf_flowtable *flowtable) return flowtable->flags & NF_FLOWTABLE_HW_OFFLOAD; } +static inline bool nf_flowtable_xdp_offload(struct nf_flowtable *flowtable) +{ + return flowtable->flags & NF_FLOWTABLE_XDP_OFFLOAD; +} + enum flow_offload_tuple_dir { FLOW_OFFLOAD_DIR_ORIGINAL = IP_CT_DIR_ORIGINAL, FLOW_OFFLOAD_DIR_REPLY = IP_CT_DIR_REPLY, @@ -299,6 +304,8 @@ struct flow_ports { __be16 source, dest; }; +struct nf_flowtable *nf_flowtable_by_dev(const struct net_device *dev); + unsigned int nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state); unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index a010b25076ca..9ec7aa4ad2e5 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -17,6 +17,92 @@ static struct workqueue_struct *nf_flow_offload_add_wq; static struct workqueue_struct *nf_flow_offload_del_wq; static struct workqueue_struct *nf_flow_offload_stats_wq; +struct flow_offload_xdp { + struct hlist_node hnode; + + unsigned long net_device_addr; + struct nf_flowtable *ft; + + struct rcu_head rcuhead; +}; + +#define NF_XDP_HT_BITS 4 +static DEFINE_HASHTABLE(nf_xdp_hashtable, NF_XDP_HT_BITS); +static DEFINE_MUTEX(nf_xdp_hashtable_lock); + +/* caller must hold rcu read lock */ +struct nf_flowtable *nf_flowtable_by_dev(const struct net_device *dev) +{ + unsigned long key = (unsigned long)dev; + const struct flow_offload_xdp *cur; + + hash_for_each_possible_rcu(nf_xdp_hashtable, cur, hnode, key) { + if (key == cur->net_device_addr) + return cur->ft; + } + + return NULL; +} + +static int nf_flowtable_by_dev_insert(struct nf_flowtable *ft, + const struct net_device *dev) +{ + unsigned long key = (unsigned long)dev; + struct flow_offload_xdp *cur; + int err = 0; + + mutex_lock(&nf_xdp_hashtable_lock); + hash_for_each_possible(nf_xdp_hashtable, cur, hnode, key) { + if (key != cur->net_device_addr) + continue; + err = -EEXIST; + break; + } + + if (err == 0) { + struct flow_offload_xdp *new; + + new = kzalloc(sizeof(*new), GFP_KERNEL_ACCOUNT); + if (new) { + new->net_device_addr = key; + new->ft = ft; + + hash_add_rcu(nf_xdp_hashtable, &new->hnode, key); + } else { + err = -ENOMEM; + } + } + + mutex_unlock(&nf_xdp_hashtable_lock); + + DEBUG_NET_WARN_ON_ONCE(err == 0 && nf_flowtable_by_dev(dev) != ft); + + return err; +} + +static void nf_flowtable_by_dev_remove(const struct net_device *dev) +{ + unsigned long key = (unsigned long)dev; + struct flow_offload_xdp *cur; + bool found = false; + + mutex_lock(&nf_xdp_hashtable_lock); + + hash_for_each_possible(nf_xdp_hashtable, cur, hnode, key) { + if (key != cur->net_device_addr) + continue; + + hash_del_rcu(&cur->hnode); + kfree_rcu(cur, rcuhead); + found = true; + break; + } + + mutex_unlock(&nf_xdp_hashtable_lock); + + WARN_ON_ONCE(!found); +} + struct flow_offload_work { struct list_head list; enum flow_cls_command cmd; @@ -1183,6 +1269,44 @@ static int nf_flow_table_offload_cmd(struct flow_block_offload *bo, return 0; } +static int nf_flow_offload_xdp_setup(struct nf_flowtable *flowtable, + struct net_device *dev, + enum flow_block_command cmd) +{ + if (!nf_flowtable_xdp_offload(flowtable)) + return 0; + + switch (cmd) { + case FLOW_BLOCK_BIND: + return nf_flowtable_by_dev_insert(flowtable, dev); + case FLOW_BLOCK_UNBIND: + nf_flowtable_by_dev_remove(dev); + return 0; + } + + WARN_ON_ONCE(1); + return 0; +} + +static void nf_flow_offload_xdp_cancel(struct nf_flowtable *flowtable, + struct net_device *dev, + enum flow_block_command cmd) +{ + if (!nf_flowtable_xdp_offload(flowtable)) + return; + + switch (cmd) { + case FLOW_BLOCK_BIND: + nf_flowtable_by_dev_remove(dev); + return; + case FLOW_BLOCK_UNBIND: + /* We do not re-bind in case hw offload would report error + * on *unregister*. + */ + break; + } +} + int nf_flow_table_offload_setup(struct nf_flowtable *flowtable, struct net_device *dev, enum flow_block_command cmd) @@ -1191,6 +1315,9 @@ int nf_flow_table_offload_setup(struct nf_flowtable *flowtable, struct flow_block_offload bo; int err; + if (nf_flow_offload_xdp_setup(flowtable, dev, cmd)) + return -EBUSY; + if (!nf_flowtable_hw_offload(flowtable)) return 0; @@ -1200,8 +1327,10 @@ int nf_flow_table_offload_setup(struct nf_flowtable *flowtable, else err = nf_flow_table_indr_offload_cmd(&bo, flowtable, dev, cmd, &extack); - if (err < 0) + if (err < 0) { + nf_flow_offload_xdp_cancel(flowtable, dev, cmd); return err; + } return nf_flow_table_block_setup(flowtable, &bo, cmd); } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 4e21311ec768..223ca4d0e2a5 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -8198,7 +8198,8 @@ static bool nft_flowtable_offload_clash(struct net *net, const struct nft_table *table; /* No offload requested, no need to validate */ - if (!nf_flowtable_hw_offload(flowtable->ft)) + if (!nf_flowtable_hw_offload(flowtable->ft) && + !nf_flowtable_xdp_offload(flowtable->ft)) return false; nft_net = nft_pernet(net); -- 2.41.0