On 09/12, Joe Damato wrote: > Add a persistent NAPI config area for NAPI configuration to the core. > Drivers opt-in to setting the storage for a NAPI by passing an index > when calling netif_napi_add_storage. > > napi_config is allocated in alloc_netdev_mqs, freed in free_netdev > (after the NAPIs are deleted), and set to 0 when napi_enable is called. > > Drivers which implement call netif_napi_add_storage will have persistent > NAPI IDs. > > Signed-off-by: Joe Damato <jdamato@xxxxxxxxxx> > --- > .../networking/net_cachelines/net_device.rst | 1 + > include/linux/netdevice.h | 34 +++++++++ > net/core/dev.c | 74 +++++++++++++++++-- > net/core/dev.h | 12 +++ > 4 files changed, 113 insertions(+), 8 deletions(-) > > diff --git a/Documentation/networking/net_cachelines/net_device.rst b/Documentation/networking/net_cachelines/net_device.rst > index 3d02ae79c850..11d659051f5e 100644 > --- a/Documentation/networking/net_cachelines/net_device.rst > +++ b/Documentation/networking/net_cachelines/net_device.rst > @@ -183,3 +183,4 @@ struct hlist_head page_pools > struct dim_irq_moder* irq_moder > unsigned_long gro_flush_timeout > u32 napi_defer_hard_irqs > +struct napi_config* napi_config > diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h > index 3e07ab8e0295..08afc96179f9 100644 > --- a/include/linux/netdevice.h > +++ b/include/linux/netdevice.h > @@ -342,6 +342,15 @@ struct gro_list { > */ > #define GRO_HASH_BUCKETS 8 > > +/* > + * Structure for per-NAPI storage > + */ > +struct napi_config { > + u64 gro_flush_timeout; > + u32 defer_hard_irqs; > + unsigned int napi_id; > +}; > + > /* > * Structure for NAPI scheduling similar to tasklet but with weighting > */ > @@ -379,6 +388,8 @@ struct napi_struct { > int irq; > unsigned long gro_flush_timeout; > u32 defer_hard_irqs; > + int index; > + struct napi_config *config; > }; > > enum { > @@ -2011,6 +2022,9 @@ enum netdev_reg_state { > * @dpll_pin: Pointer to the SyncE source pin of a DPLL subsystem, > * where the clock is recovered. > * > + * @napi_config: An array of napi_config structures containing per-NAPI > + * settings. > + * > * FIXME: cleanup struct net_device such that network protocol info > * moves out. > */ > @@ -2400,6 +2414,7 @@ struct net_device { > struct dim_irq_moder *irq_moder; > unsigned long gro_flush_timeout; > u32 napi_defer_hard_irqs; > + struct napi_config *napi_config; > > u8 priv[] ____cacheline_aligned > __counted_by(priv_len); > @@ -2650,6 +2665,23 @@ netif_napi_add_tx_weight(struct net_device *dev, > netif_napi_add_weight(dev, napi, poll, weight); > } > > +/** > + * netif_napi_add_storage - initialize a NAPI context and set storage area > + * @dev: network device > + * @napi: NAPI context > + * @poll: polling function > + * @weight: the poll weight of this NAPI > + * @index: the NAPI index > + */ > +static inline void > +netif_napi_add_storage(struct net_device *dev, struct napi_struct *napi, > + int (*poll)(struct napi_struct *, int), int index) > +{ > + napi->index = index; > + napi->config = &dev->napi_config[index]; > + netif_napi_add_weight(dev, napi, poll, NAPI_POLL_WEIGHT); > +} > + > /** > * netif_napi_add_tx() - initialize a NAPI context to be used for Tx only > * @dev: network device > @@ -2685,6 +2717,8 @@ void __netif_napi_del(struct napi_struct *napi); > */ > static inline void netif_napi_del(struct napi_struct *napi) > { > + napi->config = NULL; > + napi->index = -1; > __netif_napi_del(napi); > synchronize_net(); > } > diff --git a/net/core/dev.c b/net/core/dev.c > index f2fd503516de..ca2227d0b8ed 100644 > --- a/net/core/dev.c > +++ b/net/core/dev.c > @@ -6493,6 +6493,18 @@ EXPORT_SYMBOL(napi_busy_loop); > > #endif /* CONFIG_NET_RX_BUSY_POLL */ > > +static void napi_hash_add_with_id(struct napi_struct *napi, unsigned int napi_id) > +{ > + spin_lock(&napi_hash_lock); > + > + napi->napi_id = napi_id; > + > + hlist_add_head_rcu(&napi->napi_hash_node, > + &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]); > + > + spin_unlock(&napi_hash_lock); > +} > + > static void napi_hash_add(struct napi_struct *napi) > { > if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state)) > @@ -6505,12 +6517,13 @@ static void napi_hash_add(struct napi_struct *napi) > if (unlikely(++napi_gen_id < MIN_NAPI_ID)) > napi_gen_id = MIN_NAPI_ID; > } while (napi_by_id(napi_gen_id)); [..] > - napi->napi_id = napi_gen_id; > - > - hlist_add_head_rcu(&napi->napi_hash_node, > - &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]); > > spin_unlock(&napi_hash_lock); > + > + napi_hash_add_with_id(napi, napi_gen_id); nit: it is very unlikely that napi_gen_id is gonna wrap around after the spin_unlock above, but maybe it's safer to have the following? static void __napi_hash_add_with_id(struct napi_struct *napi, unsigned int napi_id) { napi->napi_id = napi_id; hlist_add_head_rcu(&napi->napi_hash_node, &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]); } static void napi_hash_add_with_id(struct napi_struct *napi, unsigned int napi_id) { spin_lock(&napi_hash_lock); __napi_hash_add_with_id(...); spin_unlock(&napi_hash_lock); } And use __napi_hash_add_with_id here before spin_unlock?