A QP must use the same NIC TX queue to maintain packet order. The TX queue is usually selected based on the core from which the transmit was originated. Assign QPs to cores to better spread traffic across TX queues. This requires scheduling the tasklets in the cpu assigned to the QP. The transmit cpu is selected based on the comp_vector for the QP's scq. Signed-off-by: Vijay Immanuel <vijayi@xxxxxxxxxxxxxxxxx> --- drivers/infiniband/sw/rxe/rxe_comp.c | 8 +------ drivers/infiniband/sw/rxe/rxe_cq.c | 1 + drivers/infiniband/sw/rxe/rxe_qp.c | 6 ++--- drivers/infiniband/sw/rxe/rxe_resp.c | 8 +------ drivers/infiniband/sw/rxe/rxe_task.c | 45 ++++++++++++++++++++++++++++++++--- drivers/infiniband/sw/rxe/rxe_task.h | 7 +++++- drivers/infiniband/sw/rxe/rxe_verbs.c | 3 +-- drivers/infiniband/sw/rxe/rxe_verbs.h | 1 + 8 files changed, 56 insertions(+), 23 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index 6cdc40e..189f80e 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -149,14 +149,8 @@ void retransmit_timer(struct timer_list *t) void rxe_comp_queue_pkt(struct rxe_dev *rxe, struct rxe_qp *qp, struct sk_buff *skb) { - int must_sched; - skb_queue_tail(&qp->resp_pkts, skb); - - must_sched = skb_queue_len(&qp->resp_pkts) > 1; - if (must_sched != 0) - rxe_counter_inc(rxe, RXE_CNT_COMPLETER_SCHED); - rxe_run_task(&qp->comp.task, must_sched); + rxe_run_task(&qp->comp.task, 1); } static inline enum comp_state get_wqe(struct rxe_qp *qp, diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c index 2ee4b08..ac7c5d0 100644 --- a/drivers/infiniband/sw/rxe/rxe_cq.c +++ b/drivers/infiniband/sw/rxe/rxe_cq.c @@ -106,6 +106,7 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe, cq->is_user = 1; cq->is_dying = false; + cq->comp_vector = comp_vector; tasklet_init(&cq->comp_task, rxe_send_complete, (unsigned long)cq); diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index b9f7aa1..58204c8 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -260,9 +260,9 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, spin_lock_init(&qp->sq.sq_lock); skb_queue_head_init(&qp->req_pkts); - rxe_init_task(rxe, &qp->req.task, qp, + rxe_init_task(rxe, &qp->req.task, qp->scq->comp_vector, qp, rxe_requester, "req"); - rxe_init_task(rxe, &qp->comp.task, qp, + rxe_init_task(rxe, &qp->comp.task, qp->scq->comp_vector, qp, rxe_completer, "comp"); qp->qp_timeout_jiffies = 0; /* Can't be set for UD/UC in modify_qp */ @@ -311,7 +311,7 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp, skb_queue_head_init(&qp->resp_pkts); - rxe_init_task(rxe, &qp->resp.task, qp, + rxe_init_task(rxe, &qp->resp.task, qp->scq->comp_vector, qp, rxe_responder, "resp"); qp->resp.opcode = OPCODE_NONE; diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 955ff3b..853ab49 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -107,15 +107,9 @@ static char *resp_state_name[] = { void rxe_resp_queue_pkt(struct rxe_dev *rxe, struct rxe_qp *qp, struct sk_buff *skb) { - int must_sched; - struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); - skb_queue_tail(&qp->req_pkts, skb); - must_sched = (pkt->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST) || - (skb_queue_len(&qp->req_pkts) > 1); - - rxe_run_task(&qp->resp.task, must_sched); + rxe_run_task(&qp->resp.task, 1); } static inline enum resp_states get_req(struct rxe_qp *qp, diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c index 08f05ac..8b7cb97 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.c +++ b/drivers/infiniband/sw/rxe/rxe_task.c @@ -37,6 +37,8 @@ #include "rxe_task.h" +static void rxe_run_task_local(void *data); + int __rxe_do_task(struct rxe_task *task) { @@ -63,6 +65,7 @@ void rxe_do_task(unsigned long data) struct rxe_task *task = (struct rxe_task *)data; spin_lock_irqsave(&task->state_lock, flags); + task->scheduled = false; switch (task->state) { case TASK_STATE_START: task->state = TASK_STATE_BUSY; @@ -108,20 +111,27 @@ void rxe_do_task(unsigned long data) pr_warn("%s failed with bad state %d\n", __func__, task->state); } + + if (!cont && task->scheduled) + tasklet_schedule(&task->tasklet); spin_unlock_irqrestore(&task->state_lock, flags); } while (cont); task->ret = ret; } -int rxe_init_task(void *obj, struct rxe_task *task, +int rxe_init_task(void *obj, struct rxe_task *task, int cpu, void *arg, int (*func)(void *), char *name) { + task->cpu = cpu; task->obj = obj; task->arg = arg; task->func = func; + task->csd.func = rxe_run_task_local; + task->csd.info = task; snprintf(task->name, sizeof(task->name), "%s", name); task->destroyed = false; + task->scheduled = false; tasklet_init(&task->tasklet, rxe_do_task, (unsigned long)task); @@ -151,15 +161,44 @@ void rxe_cleanup_task(struct rxe_task *task) tasklet_kill(&task->tasklet); } +static void rxe_run_task_local(void *data) +{ + struct rxe_task *task = (struct rxe_task *)data; + + if (task->destroyed) + return; + + tasklet_schedule(&task->tasklet); +} + void rxe_run_task(struct rxe_task *task, int sched) { + int cpu; + unsigned long flags; + if (task->destroyed) return; - if (sched) + if (!sched) { + rxe_do_task((unsigned long)task); + return; + } + + spin_lock_irqsave(&task->state_lock, flags); + if (task->scheduled || task->state != TASK_STATE_START) { + task->scheduled = true; + spin_unlock_irqrestore(&task->state_lock, flags); + return; + } + task->scheduled = true; + spin_unlock_irqrestore(&task->state_lock, flags); + + cpu = get_cpu(); + if (task->cpu == cpu || !cpu_online(task->cpu)) tasklet_schedule(&task->tasklet); else - rxe_do_task((unsigned long)task); + smp_call_function_single_async(task->cpu, &task->csd); + put_cpu(); } void rxe_disable_task(struct rxe_task *task) diff --git a/drivers/infiniband/sw/rxe/rxe_task.h b/drivers/infiniband/sw/rxe/rxe_task.h index 08ff42d..1470dee 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.h +++ b/drivers/infiniband/sw/rxe/rxe_task.h @@ -34,6 +34,8 @@ #ifndef RXE_TASK_H #define RXE_TASK_H +#include <linux/smp.h> + enum { TASK_STATE_START = 0, TASK_STATE_BUSY = 1, @@ -48,13 +50,16 @@ enum { struct rxe_task { void *obj; struct tasklet_struct tasklet; + int cpu; int state; spinlock_t state_lock; /* spinlock for task state */ void *arg; int (*func)(void *arg); + call_single_data_t csd; int ret; char name[16]; bool destroyed; + bool scheduled; }; /* @@ -62,7 +67,7 @@ struct rxe_task { * arg => parameter to pass to fcn * fcn => function to call until it returns != 0 */ -int rxe_init_task(void *obj, struct rxe_task *task, +int rxe_init_task(void *obj, struct rxe_task *task, int cpu, void *arg, int (*func)(void *), char *name); /* cleanup task */ diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 73a00a1..addccc1 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -813,8 +813,7 @@ static int rxe_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } if (qp->is_user) { - /* Utilize process context to do protocol processing */ - rxe_run_task(&qp->req.task, 0); + rxe_run_task(&qp->req.task, 1); return 0; } else return rxe_post_send_kernel(qp, wr, bad_wr); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index af1470d..6c6e8db 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -92,6 +92,7 @@ struct rxe_cq { u8 notify; bool is_dying; int is_user; + int comp_vector; struct tasklet_struct comp_task; }; -- 2.7.4 -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html