On Wed, Mar 05, 2025 at 05:21:25PM +0100, Alexander Lobakin wrote: > From: Michal Kubiak <michal.kubiak@xxxxxxxxx> > > SW marker descriptors on completion queues are used only when a queue > is about to be destroyed. It's far from hotpath and handling it in the > hotpath NAPI poll makes no sense. > Instead, run a simple poller after a virtchnl message for destroying > the queue is sent and wait for the replies. If replies for all of the > queues are received, this means the synchronization is done correctly > and we can go forth with stopping the link. > > Signed-off-by: Michal Kubiak <michal.kubiak@xxxxxxxxx> > Signed-off-by: Alexander Lobakin <aleksander.lobakin@xxxxxxxxx> > --- > drivers/net/ethernet/intel/idpf/idpf.h | 7 +- > drivers/net/ethernet/intel/idpf/idpf_txrx.h | 4 +- > drivers/net/ethernet/intel/idpf/idpf_lib.c | 2 - > drivers/net/ethernet/intel/idpf/idpf_txrx.c | 108 +++++++++++------- > .../net/ethernet/intel/idpf/idpf_virtchnl.c | 34 ++---- > 5 files changed, 80 insertions(+), 75 deletions(-) > > diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h > index 66544faab710..6b51a5dcc1e0 100644 > --- a/drivers/net/ethernet/intel/idpf/idpf.h > +++ b/drivers/net/ethernet/intel/idpf/idpf.h > @@ -36,6 +36,7 @@ struct idpf_vport_max_q; > #define IDPF_NUM_CHUNKS_PER_MSG(struct_sz, chunk_sz) \ > ((IDPF_CTLQ_MAX_BUF_LEN - (struct_sz)) / (chunk_sz)) > > +#define IDPF_WAIT_FOR_MARKER_TIMEO 500 > #define IDPF_MAX_WAIT 500 > > /* available message levels */ > @@ -224,13 +225,10 @@ enum idpf_vport_reset_cause { > /** > * enum idpf_vport_flags - Vport flags > * @IDPF_VPORT_DEL_QUEUES: To send delete queues message > - * @IDPF_VPORT_SW_MARKER: Indicate TX pipe drain software marker packets > - * processing is done > * @IDPF_VPORT_FLAGS_NBITS: Must be last > */ > enum idpf_vport_flags { > IDPF_VPORT_DEL_QUEUES, > - IDPF_VPORT_SW_MARKER, > IDPF_VPORT_FLAGS_NBITS, > }; > > @@ -289,7 +287,6 @@ struct idpf_port_stats { > * @tx_itr_profile: TX profiles for Dynamic Interrupt Moderation > * @port_stats: per port csum, header split, and other offload stats > * @link_up: True if link is up > - * @sw_marker_wq: workqueue for marker packets > */ > struct idpf_vport { > u16 num_txq; > @@ -332,8 +329,6 @@ struct idpf_vport { > struct idpf_port_stats port_stats; > > bool link_up; > - > - wait_queue_head_t sw_marker_wq; > }; > > /** > diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h > index 9f938301b2c5..dd6cc3b5cdab 100644 > --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h > +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h > @@ -286,7 +286,6 @@ struct idpf_ptype_state { > * bit and Q_RFL_GEN is the SW bit. > * @__IDPF_Q_FLOW_SCH_EN: Enable flow scheduling > * @__IDPF_Q_SW_MARKER: Used to indicate TX queue marker completions > - * @__IDPF_Q_POLL_MODE: Enable poll mode > * @__IDPF_Q_CRC_EN: enable CRC offload in singleq mode > * @__IDPF_Q_HSPLIT_EN: enable header split on Rx (splitq) > * @__IDPF_Q_FLAGS_NBITS: Must be last > @@ -296,7 +295,6 @@ enum idpf_queue_flags_t { > __IDPF_Q_RFL_GEN_CHK, > __IDPF_Q_FLOW_SCH_EN, > __IDPF_Q_SW_MARKER, > - __IDPF_Q_POLL_MODE, > __IDPF_Q_CRC_EN, > __IDPF_Q_HSPLIT_EN, > > @@ -1044,6 +1042,8 @@ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_rx_queue *rxq, > u16 cleaned_count); > int idpf_tso(struct sk_buff *skb, struct idpf_tx_offload_params *off); > > +void idpf_wait_for_sw_marker_completion(struct idpf_tx_queue *txq); > + > static inline bool idpf_tx_maybe_stop_common(struct idpf_tx_queue *tx_q, > u32 needed) > { > diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c > index f3aea7bcdaa3..e17582d15e27 100644 > --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c > +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c > @@ -1501,8 +1501,6 @@ void idpf_init_task(struct work_struct *work) > index = vport->idx; > vport_config = adapter->vport_config[index]; > > - init_waitqueue_head(&vport->sw_marker_wq); > - > spin_lock_init(&vport_config->mac_filter_list_lock); > > INIT_LIST_HEAD(&vport_config->user_config.mac_filter_list); > diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c > index a240ed115e3e..4e3de6031422 100644 > --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c > +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c > @@ -1626,32 +1626,6 @@ int idpf_vport_queues_alloc(struct idpf_vport *vport) > return err; > } > > -/** > - * idpf_tx_handle_sw_marker - Handle queue marker packet > - * @tx_q: tx queue to handle software marker > - */ > -static void idpf_tx_handle_sw_marker(struct idpf_tx_queue *tx_q) > -{ > - struct idpf_netdev_priv *priv = netdev_priv(tx_q->netdev); > - struct idpf_vport *vport = priv->vport; > - int i; > - > - idpf_queue_clear(SW_MARKER, tx_q); > - /* Hardware must write marker packets to all queues associated with > - * completion queues. So check if all queues received marker packets > - */ > - for (i = 0; i < vport->num_txq; i++) > - /* If we're still waiting on any other TXQ marker completions, > - * just return now since we cannot wake up the marker_wq yet. > - */ > - if (idpf_queue_has(SW_MARKER, vport->txqs[i])) > - return; > - > - /* Drain complete */ > - set_bit(IDPF_VPORT_SW_MARKER, vport->flags); > - wake_up(&vport->sw_marker_wq); > -} > - > /** > * idpf_tx_clean_stashed_bufs - clean bufs that were stored for > * out of order completions > @@ -2008,6 +1982,19 @@ idpf_tx_handle_rs_cmpl_fb(struct idpf_tx_queue *txq, > idpf_tx_clean_stashed_bufs(txq, compl_tag, cleaned, budget); > } > > +/** > + * idpf_tx_update_complq_indexes - update completion queue indexes > + * @complq: completion queue being updated > + * @ntc: current "next to clean" index value > + * @gen_flag: current "generation" flag value > + */ > +static void idpf_tx_update_complq_indexes(struct idpf_compl_queue *complq, > + int ntc, bool gen_flag) > +{ > + complq->next_to_clean = ntc + complq->desc_count; > + idpf_queue_assign(GEN_CHK, complq, gen_flag); > +} > + > /** > * idpf_tx_finalize_complq - Finalize completion queue cleaning > * @complq: completion queue to finalize > @@ -2059,8 +2046,7 @@ static void idpf_tx_finalize_complq(struct idpf_compl_queue *complq, int ntc, > tx_q->cleaned_pkts = 0; > } > > - complq->next_to_clean = ntc + complq->desc_count; > - idpf_queue_assign(GEN_CHK, complq, gen_flag); > + idpf_tx_update_complq_indexes(complq, ntc, gen_flag); > } > > /** > @@ -2115,9 +2101,6 @@ static bool idpf_tx_clean_complq(struct idpf_compl_queue *complq, int budget, > &cleaned_stats, > budget); > break; > - case IDPF_TXD_COMPLT_SW_MARKER: > - idpf_tx_handle_sw_marker(tx_q); > - break; > case -ENODATA: > goto exit_clean_complq; > case -EINVAL: > @@ -2159,6 +2142,59 @@ static bool idpf_tx_clean_complq(struct idpf_compl_queue *complq, int budget, > return !!complq_budget; > } > > +/** > + * idpf_wait_for_sw_marker_completion - wait for SW marker of disabled Tx queue > + * @txq: disabled Tx queue > + */ > +void idpf_wait_for_sw_marker_completion(struct idpf_tx_queue *txq) > +{ > + struct idpf_compl_queue *complq = txq->txq_grp->complq; > + struct idpf_splitq_4b_tx_compl_desc *tx_desc; > + s16 ntc = complq->next_to_clean; > + unsigned long timeout; > + bool flow, gen_flag; > + u32 pos = ntc; > + > + if (!idpf_queue_has(SW_MARKER, txq)) > + return; > + > + flow = idpf_queue_has(FLOW_SCH_EN, complq); > + gen_flag = idpf_queue_has(GEN_CHK, complq); > + > + timeout = jiffies + msecs_to_jiffies(IDPF_WAIT_FOR_MARKER_TIMEO); > + tx_desc = flow ? &complq->comp[pos].common : &complq->comp_4b[pos]; > + ntc -= complq->desc_count; could we stop this logic? it was introduced back in the days as comparison against 0 for wrap case was faster, here as you said it doesn't have much in common with hot path. > + > + do { > + struct idpf_tx_queue *tx_q; > + int ctype; > + > + ctype = idpf_parse_compl_desc(tx_desc, complq, &tx_q, > + gen_flag); > + if (ctype == IDPF_TXD_COMPLT_SW_MARKER) { > + idpf_queue_clear(SW_MARKER, tx_q); > + if (txq == tx_q) > + break; > + } else if (ctype == -ENODATA) { > + usleep_range(500, 1000); > + continue; > + } > + > + pos++; > + ntc++; > + if (unlikely(!ntc)) { > + ntc -= complq->desc_count; > + pos = 0; > + gen_flag = !gen_flag; > + } > + > + tx_desc = flow ? &complq->comp[pos].common : > + &complq->comp_4b[pos]; > + prefetch(tx_desc); > + } while (time_before(jiffies, timeout)); what if timeout expires and you didn't find the marker desc? why do you need timer? couldn't you scan the whole ring instead? > + > + idpf_tx_update_complq_indexes(complq, ntc, gen_flag); > +} > /** > * idpf_tx_splitq_build_ctb - populate command tag and size for queue > * based scheduling descriptors > @@ -4130,15 +4166,7 @@ static int idpf_vport_splitq_napi_poll(struct napi_struct *napi, int budget) > else > idpf_vport_intr_set_wb_on_itr(q_vector); > > - /* Switch to poll mode in the tear-down path after sending disable > - * queues virtchnl message, as the interrupts will be disabled after > - * that > - */ > - if (unlikely(q_vector->num_txq && idpf_queue_has(POLL_MODE, > - q_vector->tx[0]))) > - return budget; > - else > - return work_done; > + return work_done; > } > > /** > diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c > index 135af3cc243f..24495e4d6c78 100644 > --- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c > +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c > @@ -752,21 +752,17 @@ int idpf_recv_mb_msg(struct idpf_adapter *adapter) > **/ > static int idpf_wait_for_marker_event(struct idpf_vport *vport) > { > - int event; > - int i; > - > - for (i = 0; i < vport->num_txq; i++) > - idpf_queue_set(SW_MARKER, vport->txqs[i]); > + bool markers_rcvd = true; > > - event = wait_event_timeout(vport->sw_marker_wq, > - test_and_clear_bit(IDPF_VPORT_SW_MARKER, > - vport->flags), > - msecs_to_jiffies(500)); > + for (u32 i = 0; i < vport->num_txq; i++) { > + struct idpf_tx_queue *txq = vport->txqs[i]; > > - for (i = 0; i < vport->num_txq; i++) > - idpf_queue_clear(POLL_MODE, vport->txqs[i]); > + idpf_queue_set(SW_MARKER, txq); > + idpf_wait_for_sw_marker_completion(txq); > + markers_rcvd &= !idpf_queue_has(SW_MARKER, txq); > + } > > - if (event) > + if (markers_rcvd) > return 0; > > dev_warn(&vport->adapter->pdev->dev, "Failed to receive marker packets\n"); > @@ -1993,24 +1989,12 @@ int idpf_send_enable_queues_msg(struct idpf_vport *vport) > */ > int idpf_send_disable_queues_msg(struct idpf_vport *vport) > { > - int err, i; > + int err; > > err = idpf_send_ena_dis_queues_msg(vport, false); > if (err) > return err; > > - /* switch to poll mode as interrupts will be disabled after disable > - * queues virtchnl message is sent > - */ > - for (i = 0; i < vport->num_txq; i++) > - idpf_queue_set(POLL_MODE, vport->txqs[i]); > - > - /* schedule the napi to receive all the marker packets */ > - local_bh_disable(); > - for (i = 0; i < vport->num_q_vectors; i++) > - napi_schedule(&vport->q_vectors[i].napi); > - local_bh_enable(); > - > return idpf_wait_for_marker_event(vport); > } > > -- > 2.48.1 >