On Tue, Aug 20, 2024 at 03:22:27AM -0700, Souradeep Chakrabarti wrote: > Currently napi_disable() gets called during rxq and txq cleanup, > even before napi is enabled and hrtimer is initialized. It causes > kernel panic. > > ? page_fault_oops+0x136/0x2b0 > ? page_counter_cancel+0x2e/0x80 > ? do_user_addr_fault+0x2f2/0x640 > ? refill_obj_stock+0xc4/0x110 > ? exc_page_fault+0x71/0x160 > ? asm_exc_page_fault+0x27/0x30 > ? __mmdrop+0x10/0x180 > ? __mmdrop+0xec/0x180 > ? hrtimer_active+0xd/0x50 > hrtimer_try_to_cancel+0x2c/0xf0 > hrtimer_cancel+0x15/0x30 > napi_disable+0x65/0x90 > mana_destroy_rxq+0x4c/0x2f0 > mana_create_rxq.isra.0+0x56c/0x6d0 > ? mana_uncfg_vport+0x50/0x50 > mana_alloc_queues+0x21b/0x320 > ? skb_dequeue+0x5f/0x80 > > Fixes: e1b5683ff62e ("net: mana: Move NAPI from EQ to CQ") > > Signed-off-by: Souradeep Chakrabarti <schakrabarti@xxxxxxxxxxxxxxxxxxx> > --- > drivers/net/ethernet/microsoft/mana/mana_en.c | 41 +++++++++++++------ > 1 file changed, 28 insertions(+), 13 deletions(-) > > diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c > index 39f56973746d..882b05e087b9 100644 > --- a/drivers/net/ethernet/microsoft/mana/mana_en.c > +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c > @@ -1862,7 +1862,7 @@ static void mana_deinit_txq(struct mana_port_context *apc, struct mana_txq *txq) > mana_gd_destroy_queue(gd->gdma_context, txq->gdma_sq); > } > > -static void mana_destroy_txq(struct mana_port_context *apc) > +static void mana_cleanup_napi_txq(struct mana_port_context *apc) > { > struct napi_struct *napi; > int i; > @@ -1875,7 +1875,17 @@ static void mana_destroy_txq(struct mana_port_context *apc) > napi_synchronize(napi); > napi_disable(napi); > netif_napi_del(napi); > + } > +} > + > +static void mana_destroy_txq(struct mana_port_context *apc) > +{ > + int i; > + > + if (!apc->tx_qp) > + return; > > + for (i = 0; i < apc->num_queues; i++) { > mana_destroy_wq_obj(apc, GDMA_SQ, apc->tx_qp[i].tx_object); > > mana_deinit_cq(apc, &apc->tx_qp[i].tx_cq); > @@ -2007,6 +2017,21 @@ static int mana_create_txq(struct mana_port_context *apc, > return err; > } I think the mana_cleanup_napi_txq() call should also be added in the out path of mana_create_txq(). Consider this, the napi enable for first few tx queue succeeds but if queue creation for any further SQ fails, we don't cleanup the napi's for previously successful ones. > > +static void mana_cleanup_napi_rxq(struct mana_port_context *apc, > + struct mana_rxq *rxq, bool validate_state) > +{ > + struct napi_struct *napi; > + > + if (!rxq) > + return; > + > + napi = &rxq->rx_cq.napi; > + if (validate_state) > + napi_synchronize(napi); > + napi_disable(napi); > + netif_napi_del(napi); > +} > + > static void mana_destroy_rxq(struct mana_port_context *apc, > struct mana_rxq *rxq, bool validate_state) > > @@ -2014,24 +2039,14 @@ static void mana_destroy_rxq(struct mana_port_context *apc, > struct gdma_context *gc = apc->ac->gdma_dev->gdma_context; > struct mana_recv_buf_oob *rx_oob; > struct device *dev = gc->dev; > - struct napi_struct *napi; > struct page *page; > int i; > > if (!rxq) > return; > > - napi = &rxq->rx_cq.napi; > - > - if (validate_state) > - napi_synchronize(napi); > - > - napi_disable(napi); > - > xdp_rxq_info_unreg(&rxq->xdp_rxq); > > - netif_napi_del(napi); > - > mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj); > > mana_deinit_cq(apc, &rxq->rx_cq); > @@ -2336,11 +2351,11 @@ static void mana_destroy_vport(struct mana_port_context *apc) > rxq = apc->rxqs[rxq_idx]; > if (!rxq) > continue; > - > + mana_cleanup_napi_rxq(apc, rxq, true); > mana_destroy_rxq(apc, rxq, true); > apc->rxqs[rxq_idx] = NULL; > } > - > + mana_cleanup_napi_txq(apc); > mana_destroy_txq(apc); > mana_uncfg_vport(apc); > > -- > 2.34.1 > >