Based on Jens code for sg-chaining but over scsi_sgtable implementation - Previous scsi_{alloc,free}_sgtable() renamed to scsi_{alloc,free}_sgtable_page() - scsi_{alloc,free}_sgtable() using the above now supports sg-chaining with multiple sgtable allocations. - Report arbitrary default of 2048 to block layer. from Jens: This is what enables large commands. If we need to allocate an sgtable that doesn't fit in a single page, allocate several SCSI_MAX_SG_SEGMENTS sized tables and chain them together. SCSI defaults to large chained sg tables, if the arch supports it. Signed-off-by: Boaz Harrosh <bharrosh@xxxxxxxxxxx> --- drivers/scsi/scsi_lib.c | 89 +++++++++++++++++++++++++++++++++++++++++++++- 1 files changed, 87 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 262128c..13870b5 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -59,6 +59,12 @@ static inline unsigned scsi_pool_size(int pool) return scsi_sg_pools[pool].size; } +/* + * IO limit For archs that have sg chaining. This limit is totally arbitrary, + * a setting of 2048 will get you at least 8mb ios. + */ +#define SCSI_MAX_SG_CHAIN_SEGMENTS 2048 + static void scsi_run_queue(struct request_queue *q); /* @@ -713,7 +719,7 @@ static unsigned scsi_sgtable_index(unsigned nents) return -1; } -struct scsi_sgtable *scsi_alloc_sgtable(int sg_count, gfp_t gfp_mask) +static struct scsi_sgtable *scsi_alloc_sgtable_page(int sg_count, gfp_t gfp_mask) { unsigned int pool = scsi_sgtable_index(sg_count); struct scsi_sgtable *sgt; @@ -727,12 +733,77 @@ struct scsi_sgtable *scsi_alloc_sgtable(int sg_count, gfp_t gfp_mask) sgt->sg_pool = pool; return sgt; } + +struct scsi_sgtable *scsi_alloc_sgtable(int sg_count, gfp_t gfp_mask) +{ + struct scsi_sgtable *sgt, *prev, *ret; + + if (sg_count <= SCSI_MAX_SG_SEGMENTS) + return scsi_alloc_sgtable_page(sg_count, gfp_mask); + + ret = prev = NULL; + do { + int this; + + if (sg_count > SCSI_MAX_SG_SEGMENTS) { + this = SCSI_MAX_SG_SEGMENTS - 1; /* room for chain */ + } else { + this = sg_count; + } + + sgt = scsi_alloc_sgtable_page(this, gfp_mask); + /* + * FIXME: since second and on allocations are done + * ~__GFP_WAIT we can fail more easilly, but nothing + * prevents us from trying smaller pools and chaining + * more arrays. The last patch in the series does just + * that. + */ + if (unlikely(!sgt)) + goto enomem; + + /* first loop through, set return value */ + if (!ret) + ret = sgt; + + /* chain previous sglist, if any */ + if (prev) + sg_chain(prev->sglist, scsi_pool_size(prev->sg_pool), + sgt->sglist); + + /* + * don't allow subsequent mempool allocs to sleep, it would + * violate the mempool principle. + */ + gfp_mask &= ~__GFP_WAIT; + gfp_mask |= __GFP_HIGH; + sg_count -= this; + prev = sgt; + } while (sg_count); + + return ret; +enomem: + if (ret) + scsi_free_sgtable(ret); + return NULL; +} EXPORT_SYMBOL(scsi_alloc_sgtable); -void scsi_free_sgtable(struct scsi_sgtable *sgt) +static void scsi_free_sgtable_page(struct scsi_sgtable *sgt) { mempool_free(sgt, scsi_sg_pools[sgt->sg_pool].pool); } + +static void scsi_free_sgtable(struct scsi_sgtable *sgt) +{ + do { + struct scatterlist *next, *here_last; + here_last = &sgt->sglist[scsi_pool_size(sgt->sg_pool) - 1]; + next = sg_is_chain(here_last) ? sg_chain_ptr(here_last) : NULL; + scsi_free_sgtable_page(sgt); + sgt = next ? ((struct scsi_sgtable*)next) - 1 : NULL; + } while(sgt); +} EXPORT_SYMBOL(scsi_free_sgtable); /* @@ -1550,8 +1621,22 @@ struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost, if (!q) return NULL; + /* + * this limit is imposed by hardware restrictions + */ blk_queue_max_hw_segments(q, shost->sg_tablesize); + + /* + * In the future, sg chaining support will be mandatory and this + * ifdef can then go away. Right now we don't have all archs + * converted, so better keep it safe. + */ +#ifdef ARCH_HAS_SG_CHAIN + blk_queue_max_phys_segments(q, SCSI_MAX_SG_CHAIN_SEGMENTS); +#else blk_queue_max_phys_segments(q, SCSI_MAX_SG_SEGMENTS); +#endif + blk_queue_max_sectors(q, shost->max_sectors); blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost)); blk_queue_segment_boundary(q, shost->dma_boundary); -- 1.5.2.2.249.g45fd - To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html