Add a persistent NAPI storage area for NAPI configuration to the core. Drivers opt-in to setting the storage for a NAPI by passing an index when calling netif_napi_add_storage. napi_storage is allocated in alloc_netdev_mqs, freed in free_netdev (after the NAPIs are deleted), and set to 0 when napi_enable is called. Signed-off-by: Joe Damato <jdamato@xxxxxxxxxx> --- .../networking/net_cachelines/net_device.rst | 1 + include/linux/netdevice.h | 34 +++++++++++++++++++ net/core/dev.c | 18 +++++++++- 3 files changed, 52 insertions(+), 1 deletion(-) diff --git a/Documentation/networking/net_cachelines/net_device.rst b/Documentation/networking/net_cachelines/net_device.rst index 22b07c814f4a..a82751c88d18 100644 --- a/Documentation/networking/net_cachelines/net_device.rst +++ b/Documentation/networking/net_cachelines/net_device.rst @@ -106,6 +106,7 @@ rx_handler_func_t* rx_handler read_mostly void* rx_handler_data read_mostly - struct_netdev_queue* ingress_queue read_mostly - struct_bpf_mprog_entry tcx_ingress - read_mostly sch_handle_ingress +struct napi_storage* napi_storage - read_mostly napi_complete_done struct_nf_hook_entries* nf_hooks_ingress unsigned_char broadcast[32] struct_cpu_rmap* rx_cpu_rmap diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b47c00657bd0..54da1c800e65 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -342,6 +342,14 @@ struct gro_list { */ #define GRO_HASH_BUCKETS 8 +/* + * Structure for per-NAPI storage + */ +struct napi_storage { + u64 gro_flush_timeout; + u32 defer_hard_irqs; +}; + /* * Structure for NAPI scheduling similar to tasklet but with weighting */ @@ -377,6 +385,8 @@ struct napi_struct { struct list_head dev_list; struct hlist_node napi_hash_node; int irq; + int index; + struct napi_storage *napi_storage; }; enum { @@ -2009,6 +2019,9 @@ enum netdev_reg_state { * @dpll_pin: Pointer to the SyncE source pin of a DPLL subsystem, * where the clock is recovered. * + * @napi_storage: An array of napi_storage structures containing per-NAPI + * settings. + * * FIXME: cleanup struct net_device such that network protocol info * moves out. */ @@ -2087,6 +2100,7 @@ struct net_device { #ifdef CONFIG_NET_XGRESS struct bpf_mprog_entry __rcu *tcx_ingress; #endif + struct napi_storage *napi_storage; __cacheline_group_end(net_device_read_rx); char name[IFNAMSIZ]; @@ -2648,6 +2662,24 @@ netif_napi_add_tx_weight(struct net_device *dev, netif_napi_add_weight(dev, napi, poll, weight); } +/** + * netif_napi_add_storage - initialize a NAPI context and set storage area + * @dev: network device + * @napi: NAPI context + * @poll: polling function + * @weight: the poll weight of this NAPI + * @index: the NAPI index + */ +static inline void +netif_napi_add_storage(struct net_device *dev, struct napi_struct *napi, + int (*poll)(struct napi_struct *, int), int weight, + int index) +{ + napi->index = index; + napi->napi_storage = &dev->napi_storage[index]; + netif_napi_add_weight(dev, napi, poll, weight); +} + /** * netif_napi_add_tx() - initialize a NAPI context to be used for Tx only * @dev: network device @@ -2683,6 +2715,8 @@ void __netif_napi_del(struct napi_struct *napi); */ static inline void netif_napi_del(struct napi_struct *napi) { + napi->napi_storage = NULL; + napi->index = -1; __netif_napi_del(napi); synchronize_net(); } diff --git a/net/core/dev.c b/net/core/dev.c index 22c3f14d9287..ca90e8cab121 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6719,6 +6719,9 @@ void napi_enable(struct napi_struct *n) if (n->dev->threaded && n->thread) new |= NAPIF_STATE_THREADED; } while (!try_cmpxchg(&n->state, &val, new)); + + if (n->napi_storage) + memset(n->napi_storage, 0, sizeof(*n->napi_storage)); } EXPORT_SYMBOL(napi_enable); @@ -11054,6 +11057,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, unsigned int txqs, unsigned int rxqs) { struct net_device *dev; + size_t napi_storage_sz; + unsigned int maxqs; BUG_ON(strlen(name) >= sizeof(dev->name)); @@ -11067,6 +11072,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, return NULL; } + WARN_ON_ONCE(txqs != rxqs); + maxqs = max(txqs, rxqs); + dev = kvzalloc(struct_size(dev, priv, sizeof_priv), GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL); if (!dev) @@ -11141,6 +11149,11 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, if (!dev->ethtool) goto free_all; + napi_storage_sz = array_size(maxqs, sizeof(*dev->napi_storage)); + dev->napi_storage = kvzalloc(napi_storage_sz, GFP_KERNEL_ACCOUNT); + if (!dev->napi_storage) + goto free_all; + strscpy(dev->name, name); dev->name_assign_type = name_assign_type; dev->group = INIT_NETDEV_GROUP; @@ -11202,6 +11215,8 @@ void free_netdev(struct net_device *dev) list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) netif_napi_del(p); + kvfree(dev->napi_storage); + ref_tracker_dir_exit(&dev->refcnt_tracker); #ifdef CONFIG_PCPU_DEV_REFCNT free_percpu(dev->pcpu_refcnt); @@ -11979,7 +11994,8 @@ static void __init net_dev_struct_check(void) #ifdef CONFIG_NET_XGRESS CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, tcx_ingress); #endif - CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_rx, 104); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, napi_storage); + CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_rx, 112); } /* -- 2.25.1