On 5/18/23 03:21, Daisuke Matsuda wrote: > Both responder and completer need to sleep to execute page-fault when used > with ODP. It can happen when they are going to access user MRs, so tasks > must be executed in process context for such cases. > > Additionally, current implementation seldom defers tasks to workqueue, but > instead defers to a softirq context running do_task(). It is called from > rxe_resp_queue_pkt() and rxe_comp_queue_pkt() in SOFTIRQ_NET_RX context and > can last until maximum RXE_MAX_ITERATIONS (=1024) loops are executed. The > problem is the that task execuion appears to be anonymous loads in the > system and that the loop can throttle other softirqs on the same CPU. > > This patch makes responder and completer codes run in process context for > ODP and the problem described above. > > Signed-off-by: Daisuke Matsuda <matsuda-daisuke@xxxxxxxxxxx> > --- > drivers/infiniband/sw/rxe/rxe_comp.c | 12 +----------- > drivers/infiniband/sw/rxe/rxe_hw_counters.c | 1 - > drivers/infiniband/sw/rxe/rxe_hw_counters.h | 1 - > drivers/infiniband/sw/rxe/rxe_resp.c | 13 +------------ > 4 files changed, 2 insertions(+), 25 deletions(-) > > diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c > index db18ace74d2b..671fdb645030 100644 > --- a/drivers/infiniband/sw/rxe/rxe_comp.c > +++ b/drivers/infiniband/sw/rxe/rxe_comp.c > @@ -128,18 +128,8 @@ void retransmit_timer(struct timer_list *t) > > void rxe_comp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb) > { > - int must_sched; > - > skb_queue_tail(&qp->resp_pkts, skb); > - > - must_sched = skb_queue_len(&qp->resp_pkts) > 1; > - if (must_sched != 0) > - rxe_counter_inc(SKB_TO_PKT(skb)->rxe, RXE_CNT_COMPLETER_SCHED); > - > - if (must_sched) > - rxe_sched_task(&qp->comp.task); > - else > - rxe_run_task(&qp->comp.task); > + rxe_sched_task(&qp->comp.task); > } > > static inline enum comp_state get_wqe(struct rxe_qp *qp, > diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.c b/drivers/infiniband/sw/rxe/rxe_hw_counters.c > index a012522b577a..dc23cf3a6967 100644 > --- a/drivers/infiniband/sw/rxe/rxe_hw_counters.c > +++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.c > @@ -14,7 +14,6 @@ static const struct rdma_stat_desc rxe_counter_descs[] = { > [RXE_CNT_RCV_RNR].name = "rcvd_rnr_err", > [RXE_CNT_SND_RNR].name = "send_rnr_err", > [RXE_CNT_RCV_SEQ_ERR].name = "rcvd_seq_err", > - [RXE_CNT_COMPLETER_SCHED].name = "ack_deferred", > [RXE_CNT_RETRY_EXCEEDED].name = "retry_exceeded_err", > [RXE_CNT_RNR_RETRY_EXCEEDED].name = "retry_rnr_exceeded_err", > [RXE_CNT_COMP_RETRY].name = "completer_retry_err", > diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.h b/drivers/infiniband/sw/rxe/rxe_hw_counters.h > index 71f4d4fa9dc8..303da0e3134a 100644 > --- a/drivers/infiniband/sw/rxe/rxe_hw_counters.h > +++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.h > @@ -18,7 +18,6 @@ enum rxe_counters { > RXE_CNT_RCV_RNR, > RXE_CNT_SND_RNR, > RXE_CNT_RCV_SEQ_ERR, > - RXE_CNT_COMPLETER_SCHED, > RXE_CNT_RETRY_EXCEEDED, > RXE_CNT_RNR_RETRY_EXCEEDED, > RXE_CNT_COMP_RETRY, > diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c > index 68f6cd188d8e..ba0222bfce9e 100644 > --- a/drivers/infiniband/sw/rxe/rxe_resp.c > +++ b/drivers/infiniband/sw/rxe/rxe_resp.c > @@ -46,21 +46,10 @@ static char *resp_state_name[] = { > [RESPST_EXIT] = "EXIT", > }; > > -/* rxe_recv calls here to add a request packet to the input queue */ > void rxe_resp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb) > { > - int must_sched; > - struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); > - > skb_queue_tail(&qp->req_pkts, skb); > - > - must_sched = (pkt->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST) || > - (skb_queue_len(&qp->req_pkts) > 1); > - > - if (must_sched) > - rxe_sched_task(&qp->resp.task); > - else > - rxe_run_task(&qp->resp.task); > + rxe_sched_task(&qp->resp.task); > } > > static inline enum resp_states get_req(struct rxe_qp *qp, Looks good. Reviewed-by: Bob Pearson <rpearsonhpe@xxxxxxxxx>