tgt uses the elevator code to send SCSI commands to the user-space daemon (q->request_fn sends netlink packets including commands). This patch replaces the elevator code with a simple list. This is mainly because tgt also needs to send TMF requests to the user-space daemon (the daemon does all the SCSI state machine stuff). tgt must send SCSI commands and TMF requests in an exact order so that it would be preferable to use a single queue (per host) for both. To uses the elevator code for TMF requests, tgt needs to allocate request structures for them. That's wasteful because request structures is useless for TMF requests, which don't perform any I/Os. We basically have a netdev queue of events to send to userspace so by using the request_queue and netdev queue we are basically double queueing and wasting resources and it is affecting performance We like to use shared memory stuff between kernel and user spaces instead of netlink in the future. These queues would go away. Signed-off-by: FUJITA Tomonori <fujita.tomonori@xxxxxxxxxxxxx> Signed-off-by: Mike Christie <michaelc@xxxxxxxxxxx> --- drivers/scsi/scsi_tgt_lib.c | 180 ++++++++++++++++++++++++++++++------------ drivers/scsi/scsi_tgt_priv.h | 4 - 2 files changed, 129 insertions(+), 55 deletions(-) c4bb05742e1834d664a7d8e721ecea71d42fd7f7 diff --git a/drivers/scsi/scsi_tgt_lib.c b/drivers/scsi/scsi_tgt_lib.c index 274d929..2cbc749 100644 --- a/drivers/scsi/scsi_tgt_lib.c +++ b/drivers/scsi/scsi_tgt_lib.c @@ -20,7 +20,7 @@ * 02110-1301 USA */ #include <linux/blkdev.h> -#include <linux/elevator.h> +#include <linux/hash.h> #include <linux/module.h> #include <linux/pagemap.h> #include <scsi/scsi.h> @@ -46,6 +46,24 @@ struct scsi_tgt_cmd { struct bio_list xfer_done_list; struct bio_list xfer_list; struct scsi_lun *lun; + + struct list_head hash_list; + struct request *rq; +}; + +#define TGT_HASH_ORDER 4 +#define cmd_hashfn(cid) hash_long((cid), TGT_HASH_ORDER) + +struct scsi_tgt_queuedata { + struct Scsi_Host *shost; + struct list_head cmd_hash[1 << TGT_HASH_ORDER]; + spinlock_t cmd_hash_lock; + + struct work_struct uspace_send_work; + + spinlock_t cmd_req_lock; + struct mutex cmd_req_mutex; + struct list_head cmd_req; }; static void scsi_unmap_user_pages(struct scsi_tgt_cmd *tcmd) @@ -68,9 +86,16 @@ static void scsi_tgt_cmd_destroy(void *d { struct scsi_cmnd *cmd = data; struct scsi_tgt_cmd *tcmd = cmd->request->end_io_data; + struct scsi_tgt_queuedata *qdata = cmd->request->q->queuedata; + unsigned long flags; dprintk("cmd %p %d %lu\n", cmd, cmd->sc_data_direction, rq_data_dir(cmd->request)); + + spin_lock_irqsave(&qdata->cmd_hash_lock, flags); + list_del(&tcmd->hash_list); + spin_unlock_irqrestore(&qdata->cmd_hash_lock, flags); + /* * We must set rq->flags here because bio_map_user and * blk_rq_bio_prep ruined ti. @@ -88,55 +113,84 @@ static void scsi_tgt_cmd_destroy(void *d static void init_scsi_tgt_cmd(struct request *rq, struct scsi_tgt_cmd *tcmd) { + struct scsi_tgt_queuedata *qdata = rq->q->queuedata; + unsigned long flags; + struct list_head *head; + static u32 tag = 0; + tcmd->lun = rq->end_io_data; bio_list_init(&tcmd->xfer_list); bio_list_init(&tcmd->xfer_done_list); -} - -static int scsi_uspace_prep_fn(struct request_queue *q, struct request *rq) -{ - struct scsi_tgt_cmd *tcmd; - tcmd = kmem_cache_alloc(scsi_tgt_cmd_cache, GFP_ATOMIC); - if (!tcmd) - return BLKPREP_DEFER; + spin_lock_irqsave(&qdata->cmd_hash_lock, flags); + rq->tag = tag++; + head = &qdata->cmd_hash[cmd_hashfn(rq->tag)]; + list_add(&tcmd->hash_list, head); + spin_unlock_irqrestore(&qdata->cmd_hash_lock, flags); - init_scsi_tgt_cmd(rq, tcmd); + tcmd->rq = rq; rq->end_io_data = tcmd; rq->flags |= REQ_DONTPREP; - return BLKPREP_OK; } -static void scsi_uspace_request_fn(struct request_queue *q) +static void scsi_tgt_uspace_send_fn(void *data) { + struct request_queue *q = data; + struct scsi_tgt_queuedata *qdata = q->queuedata; struct request *rq; struct scsi_cmnd *cmd; struct scsi_tgt_cmd *tcmd; + unsigned long flags; + int err; - /* - * TODO: just send everthing in the queue to userspace in - * one vector instead of multiple calls - */ - while ((rq = elv_next_request(q)) != NULL) { - cmd = rq->special; - tcmd = rq->end_io_data; +retry: + err = 0; + if (list_empty(&qdata->cmd_req)) + return; - /* the completion code kicks us in case we hit this */ - if (blk_queue_start_tag(q, rq)) - break; + tcmd = kmem_cache_alloc(scsi_tgt_cmd_cache, GFP_ATOMIC); + if (!tcmd) { + err = -ENOMEM; + goto out; + } + + mutex_lock(&qdata->cmd_req_mutex); - spin_unlock_irq(q->queue_lock); - if (scsi_tgt_uspace_send(cmd, tcmd->lun, GFP_ATOMIC) < 0) - goto requeue; - spin_lock_irq(q->queue_lock); + spin_lock_irqsave(&qdata->cmd_req_lock, flags); + if (list_empty(&qdata->cmd_req)) { + spin_unlock_irqrestore(&qdata->cmd_req_lock, flags); + mutex_unlock(&qdata->cmd_req_mutex); + kmem_cache_free(scsi_tgt_cmd_cache, tcmd); + goto out; } + rq = list_entry_rq(qdata->cmd_req.next); + list_del_init(&rq->queuelist); + spin_unlock_irqrestore(&qdata->cmd_req_lock, flags); - return; -requeue: - spin_lock_irq(q->queue_lock); - /* need to track cnts and plug */ - blk_requeue_request(q, rq); - spin_unlock_irq(q->queue_lock); + if ((rq->flags & REQ_DONTPREP)) { + kmem_cache_free(scsi_tgt_cmd_cache, tcmd); + tcmd = rq->end_io_data; + } else + init_scsi_tgt_cmd(rq, tcmd); + + cmd = rq->special; + err = scsi_tgt_uspace_send(cmd, tcmd->lun, GFP_ATOMIC); + if (err < 0) { + eprintk("failed to send: %p %d\n", cmd, err); + + spin_lock_irqsave(&qdata->cmd_req_lock, flags); + list_add(&rq->queuelist, &qdata->cmd_req); + spin_unlock_irqrestore(&qdata->cmd_req_lock, flags); + } + + mutex_unlock(&qdata->cmd_req_mutex); +out: + /* TODO: proper error handling */ + if (err < 0) + queue_delayed_work(scsi_tgtd, &qdata->uspace_send_work, + HZ / 10); + else + goto retry; } /** @@ -150,13 +204,13 @@ int scsi_tgt_alloc_queue(struct Scsi_Hos { struct scsi_tgt_queuedata *queuedata; struct request_queue *q; - int err; + int err, i; /* * Do we need to send a netlink event or should uspace * just respond to the hotplug event? */ - q = __scsi_alloc_queue(shost, scsi_uspace_request_fn); + q = __scsi_alloc_queue(shost, NULL); if (!q) return -ENOMEM; @@ -168,19 +222,12 @@ int scsi_tgt_alloc_queue(struct Scsi_Hos queuedata->shost = shost; q->queuedata = queuedata; - elevator_exit(q->elevator); - err = elevator_init(q, "noop"); - if (err) - goto free_data; - - blk_queue_prep_rq(q, scsi_uspace_prep_fn); /* * this is a silly hack. We should probably just queue as many * command as is recvd to userspace. uspace can then make * sure we do not overload the HBA */ q->nr_requests = shost->hostt->can_queue; - blk_queue_init_tags(q, shost->hostt->can_queue, NULL); /* * We currently only support software LLDs so this does * not matter for now. Do we need this for the cards we support? @@ -189,10 +236,17 @@ int scsi_tgt_alloc_queue(struct Scsi_Hos blk_queue_dma_alignment(q, 0); shost->uspace_req_q = q; + for (i = 0; i < ARRAY_SIZE(queuedata->cmd_hash); i++) + INIT_LIST_HEAD(&queuedata->cmd_hash[i]); + spin_lock_init(&queuedata->cmd_hash_lock); + + INIT_LIST_HEAD(&queuedata->cmd_req); + spin_lock_init(&queuedata->cmd_req_lock); + INIT_WORK(&queuedata->uspace_send_work, scsi_tgt_uspace_send_fn, q); + mutex_init(&queuedata->cmd_req_mutex); + return 0; -free_data: - kfree(queuedata); cleanup_queue: blk_cleanup_queue(q); return err; @@ -215,14 +269,17 @@ EXPORT_SYMBOL_GPL(scsi_tgt_cmd_to_host); void scsi_tgt_queue_command(struct scsi_cmnd *cmd, struct scsi_lun *scsilun, int noblock) { - /* - * For now this just calls the request_fn from this context. - * For HW llds though we do not want to execute from here so - * the elevator code needs something like a REQ_TGT_CMD or - * REQ_MSG_DONT_UNPLUG_IMMED_BECUASE_WE_WILL_HANDLE_IT - */ + struct request_queue *q = cmd->request->q; + struct scsi_tgt_queuedata *qdata = q->queuedata; + unsigned long flags; + cmd->request->end_io_data = scsilun; - elv_add_request(cmd->request->q, cmd->request, ELEVATOR_INSERT_BACK, 1); + + spin_lock_irqsave(&qdata->cmd_req_lock, flags); + list_add_tail(&cmd->request->queuelist, &qdata->cmd_req); + spin_unlock_irqrestore(&qdata->cmd_req_lock, flags); + + queue_work(scsi_tgtd, &qdata->uspace_send_work); } EXPORT_SYMBOL_GPL(scsi_tgt_queue_command); @@ -438,6 +495,27 @@ static int scsi_tgt_copy_sense(struct sc return 0; } +static struct request *tgt_cmd_hash_lookup(struct request_queue *q, u32 cid) +{ + struct scsi_tgt_queuedata *qdata = q->queuedata; + struct request *rq = NULL; + struct list_head *head; + struct scsi_tgt_cmd *tcmd; + unsigned long flags; + + head = &qdata->cmd_hash[cmd_hashfn(cid)]; + spin_lock_irqsave(&qdata->cmd_hash_lock, flags); + list_for_each_entry(tcmd, head, hash_list) { + if (tcmd->rq->tag == cid) { + rq = tcmd->rq; + break; + } + } + spin_unlock_irqrestore(&qdata->cmd_hash_lock, flags); + + return rq; +} + int scsi_tgt_kspace_exec(int host_no, u32 cid, int result, u32 len, unsigned long uaddr, u8 rw) { @@ -456,7 +534,7 @@ int scsi_tgt_kspace_exec(int host_no, u3 return -EINVAL; } - rq = blk_queue_find_tag(shost->uspace_req_q, cid); + rq = tgt_cmd_hash_lookup(shost->uspace_req_q, cid); if (!rq) { printk(KERN_ERR "Could not find cid %u\n", cid); err = -EINVAL; diff --git a/drivers/scsi/scsi_tgt_priv.h b/drivers/scsi/scsi_tgt_priv.h index fcf2ec6..6fedcec 100644 --- a/drivers/scsi/scsi_tgt_priv.h +++ b/drivers/scsi/scsi_tgt_priv.h @@ -11,10 +11,6 @@ do { \ #define eprintk dprintk -struct scsi_tgt_queuedata { - struct Scsi_Host *shost; -}; - extern void scsi_tgt_if_exit(void); extern int scsi_tgt_if_init(void); -- 1.1.3 - : send the line "unsubscribe linux-scsi" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html