A slightly revised sg-chaining patch to accommodate for the cleanup of sg-pools allocations. from Jens: This is what enables large commands. If we need to allocate an sgtable that doesn't fit in a single page, allocate several SCSI_MAX_SG_SEGMENTS sized tables and chain them together. SCSI defaults to large chained sg tables, if the arch supports it. Was-Signed-by: Jens Axboe <jens.axboe@xxxxxxxxxx> Signed-off-by: Boaz Harrosh <bharrosh@xxxxxxxxxxx> --- drivers/scsi/scsi_lib.c | 136 +++++++++++++++++++++++++++++++++++++++++++--- include/scsi/scsi_cmnd.h | 1 + 2 files changed, 129 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 71532f9..7ee5591 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -54,7 +54,11 @@ struct scsi_host_sg_pool { }; static struct scsi_host_sg_pool scsi_sg_pools[SG_MEMPOOL_NR]; - +/* + * IO limit For archs that have sg chaining. This limit is totally arbitrary, + * a setting of 2048 will get you at least 8mb ios. + */ +#define SCSI_MAX_SG_CHAIN_SEGMENTS 2048 static void scsi_run_queue(struct request_queue *q); @@ -712,21 +716,123 @@ static unsigned scsi_sgtable_index(unsigned nents) struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, gfp_t gfp_mask) { - unsigned int pool = scsi_sgtable_index(cmd->use_sg); - struct scatterlist *sgl; + struct scsi_host_sg_pool *sgp; + struct scatterlist *sgl, *prev, *ret; + unsigned int index; + int this, left; + + BUG_ON(!cmd->use_sg); + + left = cmd->use_sg; + ret = prev = NULL; + do { + this = left; + if (this > SCSI_MAX_SG_SEGMENTS) { + this = SCSI_MAX_SG_SEGMENTS - 1; + index = SG_MEMPOOL_NR - 1; + } else + index = scsi_sgtable_index(this); - sgl = mempool_alloc(scsi_sg_pools[pool].pool, gfp_mask); - if (unlikely(!sgl)) - return NULL; + left -= this; + + sgp = scsi_sg_pools + index; + + sgl = mempool_alloc(sgp->pool, gfp_mask); + if (unlikely(!sgl)) + goto enomem; + + memset(sgl, 0, sizeof(*sgl) * sgp->size); + + /* + * first loop through, set initial index and return value + */ + if (!ret) { + cmd->sg_pool = index; + ret = sgl; + } + + /* + * chain previous sglist, if any. we know the previous + * sglist must be the biggest one, or we would not have + * ended up doing another loop. + */ + if (prev) + sg_chain(prev, SCSI_MAX_SG_SEGMENTS, sgl); + + /* + * don't allow subsequent mempool allocs to sleep, it would + * violate the mempool principle. + */ + gfp_mask &= ~__GFP_WAIT; + gfp_mask |= __GFP_HIGH; + prev = sgl; + } while (left); + + /* + * ->use_sg may get modified after dma mapping has potentially + * shrunk the number of segments, so keep a copy of it for free. + */ + cmd->__use_sg = cmd->use_sg; + return ret; +enomem: + if (ret) { + /* + * Free entries chained off ret. Since we were trying to + * allocate another sglist, we know that all entries are of + * the max size. + */ + sgp = scsi_sg_pools + SG_MEMPOOL_NR - 1; + prev = ret; + ret = &ret[SCSI_MAX_SG_SEGMENTS - 1]; + + while ((sgl = sg_chain_ptr(ret)) != NULL) { + ret = &sgl[SCSI_MAX_SG_SEGMENTS - 1]; + mempool_free(sgl, sgp->pool); + } - cmd->sg_pool = pool; - return sgl; + mempool_free(prev, sgp->pool); + } + return NULL; } EXPORT_SYMBOL(scsi_alloc_sgtable); void scsi_free_sgtable(struct scsi_cmnd *cmd) { + struct scatterlist *sgl = cmd->request_buffer; + struct scsi_host_sg_pool *sgp; + + /* + * if this is the biggest size sglist, check if we have + * chained parts we need to free + */ + if (cmd->__use_sg > SCSI_MAX_SG_SEGMENTS) { + unsigned short this, left; + struct scatterlist *next; + unsigned int index; + + left = cmd->__use_sg - (SCSI_MAX_SG_SEGMENTS - 1); + next = sg_chain_ptr(&sgl[SCSI_MAX_SG_SEGMENTS - 1]); + while (left && next) { + sgl = next; + this = left; + if (this > SCSI_MAX_SG_SEGMENTS) { + this = SCSI_MAX_SG_SEGMENTS - 1; + index = SG_MEMPOOL_NR - 1; + } else + index = scsi_sgtable_index(this); + + left -= this; + + sgp = scsi_sg_pools + index; + + if (left) + next = sg_chain_ptr(&sgl[sgp->size - 1]); + + mempool_free(sgl, sgp->pool); + } + } + mempool_free(cmd->request_buffer, scsi_sg_pools[cmd->sg_pool].pool); } @@ -1550,8 +1656,22 @@ struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost, if (!q) return NULL; + /* + * this limit is imposed by hardware restrictions + */ blk_queue_max_hw_segments(q, shost->sg_tablesize); + + /* + * In the future, sg chaining support will be mandatory and this + * ifdef can then go away. Right now we don't have all archs + * converted, so better keep it safe. + */ +#ifdef ARCH_HAS_SG_CHAIN + blk_queue_max_phys_segments(q, SCSI_MAX_SG_CHAIN_SEGMENTS); +#else blk_queue_max_phys_segments(q, SCSI_MAX_SG_SEGMENTS); +#endif + blk_queue_max_sectors(q, shost->max_sectors); blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost)); blk_queue_segment_boundary(q, shost->dma_boundary); diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index 279a4df..7d0b2de 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -72,6 +72,7 @@ struct scsi_cmnd { /* These elements define the operation we ultimately want to perform */ unsigned short use_sg; /* Number of pieces of scatter-gather */ unsigned short sg_pool; /* pool index of allocated sg array */ + unsigned short __use_sg; unsigned underflow; /* Return error if less than this amount is transferred */ -- 1.5.2.2.249.g45fd - To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html