On Thu, May 21, 2020 at 8:42 PM Tomas Henzl <thenzl@xxxxxxxxxx> wrote: > > On 4/23/20 9:23 AM, Suganath Prabu wrote: > > For INVADER_SERIES each set of 8 reply queues (0 - 7, 8 - 15,..) and > > VENTURA_SERIES each set of 16 reply queues (0 - 15, 16 - 31,..) should > > be within 4 GB boundary. Driver uses limitation of VENTURA_SERIES to > > manage INVADER_SERIES as well. So here driver is allocating the DMA able > > memory for RDPQ's accordingly. > > > > 1) At driver load, set DMA Mask to 64 and allocate memory for RDPQ's. > > 2) Check if allocated resources for RDPQ are in the same 4GB range. > > 3) If #2 is true, continue with 64 bit DMA and go to #6 > > 4) If #2 is false, then free all the resources from #1. > > 5) Set DMA mask to 32 and allocate RDPQ's. > > 6) Proceed with driver loading and other allocations > > --- > > v1 Change log: > > 1) Use one dma pool for RDPQ's, thus removes the logic of using second > > dma pool with align. > > v2 Change log: > > Added flag use_32bit_dma. If this flag is true, 32 bit coharent > > dma is used. > > > > Signed-off-by: Suganath Prabu <suganath-prabu.subramani@xxxxxxxxxxxx> > > Reviewed-by: Christoph Hellwig <hch@xxxxxx> > > --- > > drivers/scsi/mpt3sas/mpt3sas_base.c | 152 +++++++++++++++++++++++++----------- > > drivers/scsi/mpt3sas/mpt3sas_base.h | 3 + > > 2 files changed, 109 insertions(+), 46 deletions(-) > > > > diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c > > index 0588941..af30e74 100644 > > --- a/drivers/scsi/mpt3sas/mpt3sas_base.c > > +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c > > @@ -2810,7 +2810,7 @@ _base_config_dma_addressing(struct MPT3SAS_ADAPTER *ioc, struct pci_dev *pdev) > > int dma_mask; > > > > if (ioc->is_mcpu_endpoint || > > - sizeof(dma_addr_t) == 4 || > > + sizeof(dma_addr_t) == 4 || ioc->use_32bit_dma || > > dma_get_required_mask(&pdev->dev) <= 32) > > dma_mask = 32; > > /* Set 63 bit DMA mask for all SAS3 and SAS35 controllers */ > > @@ -4807,8 +4807,8 @@ _base_release_memory_pools(struct MPT3SAS_ADAPTER *ioc) > > { > > int i = 0; > > int j = 0; > > + int dma_alloc_count = 0; > > struct chain_tracker *ct; > > - struct reply_post_struct *rps; > > > > dexitprintk(ioc, ioc_info(ioc, "%s\n", __func__)); > > > > @@ -4850,29 +4850,34 @@ _base_release_memory_pools(struct MPT3SAS_ADAPTER *ioc) > > } > > > > if (ioc->reply_post) { > > - do { > > - rps = &ioc->reply_post[i]; > > - if (rps->reply_post_free) { > > - dma_pool_free( > > - ioc->reply_post_free_dma_pool, > > - rps->reply_post_free, > > - rps->reply_post_free_dma); > > - dexitprintk(ioc, > > - ioc_info(ioc, "reply_post_free_pool(0x%p): free\n", > > - rps->reply_post_free)); > > - rps->reply_post_free = NULL; > > + dma_alloc_count = DIV_ROUND_UP(ioc->reply_queue_count, > > + RDPQ_MAX_INDEX_IN_ONE_CHUNK); > > + for (i = 0; i < ioc->reply_queue_count; i++) { > > + if (i % RDPQ_MAX_INDEX_IN_ONE_CHUNK == 0 > > + && dma_alloc_count) { > > + if (ioc->reply_post[i].reply_post_free) { > > + dma_pool_free( > > + ioc->reply_post_free_dma_pool, > > + ioc->reply_post[i].reply_post_free, > > + ioc->reply_post[i].reply_post_free_dma); > > + dexitprintk(ioc, ioc_info(ioc, > > + "reply_post_free_pool(0x%p): free\n", > > + ioc->reply_post[i].reply_post_free)); > > + ioc->reply_post[i].reply_post_free = > > + NULL; > > + } > > + --dma_alloc_count; > > } > > - } while (ioc->rdpq_array_enable && > > - (++i < ioc->reply_queue_count)); > > + } > > + dma_pool_destroy(ioc->reply_post_free_dma_pool); > > if (ioc->reply_post_free_array && > > ioc->rdpq_array_enable) { > > dma_pool_free(ioc->reply_post_free_array_dma_pool, > > - ioc->reply_post_free_array, > > - ioc->reply_post_free_array_dma); > > + ioc->reply_post_free_array, > > + ioc->reply_post_free_array_dma); > > ioc->reply_post_free_array = NULL; > > } > > dma_pool_destroy(ioc->reply_post_free_array_dma_pool); > > - dma_pool_destroy(ioc->reply_post_free_dma_pool); > > kfree(ioc->reply_post); > > } > > > > @@ -4948,36 +4953,75 @@ mpt3sas_check_same_4gb_region(long reply_pool_start_address, u32 pool_sz) > > static int > > base_alloc_rdpq_dma_pool(struct MPT3SAS_ADAPTER *ioc, int sz) > > { > > - int i; > > + int i = 0; > > + u32 dma_alloc_count = 0; > > + int reply_post_free_sz = ioc->reply_post_queue_depth * > > + sizeof(Mpi2DefaultReplyDescriptor_t); > > int count = ioc->rdpq_array_enable ? ioc->reply_queue_count : 1; > > > > ioc->reply_post = kcalloc(count, sizeof(struct reply_post_struct), > > GFP_KERNEL); > > if (!ioc->reply_post) > > return -ENOMEM; > > + /* > > + * For INVADER_SERIES each set of 8 reply queues(0-7, 8-15, ..) and > > + * VENTURA_SERIES each set of 16 reply queues(0-15, 16-31, ..) should > > + * be within 4GB boundary i.e reply queues in a set must have same > > + * upper 32-bits in their memory address. so here driver is allocating > > + * the DMA'able memory for reply queues according. > > + * Driver uses limitation of > > + * VENTURA_SERIES to manage INVADER_SERIES as well. > > + */ > > + dma_alloc_count = DIV_ROUND_UP(ioc->reply_queue_count, > > + RDPQ_MAX_INDEX_IN_ONE_CHUNK); > > ioc->reply_post_free_dma_pool = > > - dma_pool_create("reply_post_free pool", > > - &ioc->pdev->dev, sz, 16, 0); > > + dma_pool_create("reply_post_free pool", > > + &ioc->pdev->dev, sz, 16, 0); > > if (!ioc->reply_post_free_dma_pool) > > return -ENOMEM; > > - i = 0; > > - do { > > - ioc->reply_post[i].reply_post_free = > > - dma_pool_zalloc(ioc->reply_post_free_dma_pool, > > - GFP_KERNEL, > > - &ioc->reply_post[i].reply_post_free_dma); > > - if (!ioc->reply_post[i].reply_post_free) > > - return -ENOMEM; > > - dinitprintk(ioc, > > - ioc_info(ioc, "reply post free pool (0x%p): depth(%d)," > > - "element_size(%d), pool_size(%d kB)\n", > > - ioc->reply_post[i].reply_post_free, > > - ioc->reply_post_queue_depth, 8, sz / 1024)); > > - dinitprintk(ioc, > > - ioc_info(ioc, "reply_post_free_dma = (0x%llx)\n", > > - (u64)ioc->reply_post[i].reply_post_free_dma)); > > + for (i = 0; i < ioc->reply_queue_count; i++) { > > + if ((i % RDPQ_MAX_INDEX_IN_ONE_CHUNK == 0) && dma_alloc_count) { > > + ioc->reply_post[i].reply_post_free = > ioc->reply_post is allocated with 'count' size, which may be smaller > than ioc->reply_queue_count > Probably this causes some older systems to not boot. > The problem should be fixed before it comes to mainline. > > Cheers, > tomash Tomash, thanks for identifying this bug. We will post the fix patch today. Thanks, Sreekanth > > > > + dma_pool_alloc(ioc->reply_post_free_dma_pool, > > + GFP_KERNEL, > > + &ioc->reply_post[i].reply_post_free_dma); > > + if (!ioc->reply_post[i].reply_post_free) > > + return -ENOMEM; > > + /* > > + * Each set of RDPQ pool must satisfy 4gb boundary > > + * restriction. > > + * 1) Check if allocated resources for RDPQ pool are in > > + * the same 4GB range. > > + * 2) If #1 is true, continue with 64 bit DMA. > > + * 3) If #1 is false, return 1. which means free all the > > + * resources and set DMA mask to 32 and allocate. > > + */ > > + if (!mpt3sas_check_same_4gb_region( > > + (long)ioc->reply_post[i].reply_post_free, sz)) { > > + dinitprintk(ioc, > > + ioc_err(ioc, "bad Replypost free pool(0x%p)" > > + "reply_post_free_dma = (0x%llx)\n", > > + ioc->reply_post[i].reply_post_free, > > + (unsigned long long) > > + ioc->reply_post[i].reply_post_free_dma)); > > + return -EAGAIN; > > + } > > + memset(ioc->reply_post[i].reply_post_free, 0, > > + RDPQ_MAX_INDEX_IN_ONE_CHUNK * > > + reply_post_free_sz); > > + dma_alloc_count--; > > > > - } while (ioc->rdpq_array_enable && ++i < ioc->reply_queue_count); > > + } else { > > + ioc->reply_post[i].reply_post_free = > > + (Mpi2ReplyDescriptorsUnion_t *) > > + ((long)ioc->reply_post[i-1].reply_post_free > > + + reply_post_free_sz); > > + ioc->reply_post[i].reply_post_free_dma = > > + (dma_addr_t) > > + (ioc->reply_post[i-1].reply_post_free_dma + > > + reply_post_free_sz); > > + } > > + } > > return 0; > > } > > > > @@ -4995,10 +5039,12 @@ _base_allocate_memory_pools(struct MPT3SAS_ADAPTER *ioc) > > u16 chains_needed_per_io; > > u32 sz, total_sz, reply_post_free_sz, reply_post_free_array_sz; > > u32 retry_sz; > > + u32 rdpq_sz = 0; > > u16 max_request_credit, nvme_blocks_needed; > > unsigned short sg_tablesize; > > u16 sge_size; > > int i, j; > > + int ret = 0; > > struct chain_tracker *ct; > > > > dinitprintk(ioc, ioc_info(ioc, "%s\n", __func__)); > > @@ -5152,14 +5198,28 @@ _base_allocate_memory_pools(struct MPT3SAS_ADAPTER *ioc) > > /* reply post queue, 16 byte align */ > > reply_post_free_sz = ioc->reply_post_queue_depth * > > sizeof(Mpi2DefaultReplyDescriptor_t); > > - > > - sz = reply_post_free_sz; > > + rdpq_sz = reply_post_free_sz * RDPQ_MAX_INDEX_IN_ONE_CHUNK; > > if (_base_is_controller_msix_enabled(ioc) && !ioc->rdpq_array_enable) > > - sz *= ioc->reply_queue_count; > > - if (base_alloc_rdpq_dma_pool(ioc, sz)) > > - goto out; > > - total_sz += sz * (!ioc->rdpq_array_enable ? 1 : ioc->reply_queue_count); > > - > > + rdpq_sz = reply_post_free_sz * ioc->reply_queue_count; > > + ret = base_alloc_rdpq_dma_pool(ioc, rdpq_sz); > > + if (ret == -EAGAIN) { > > + /* > > + * Free allocated bad RDPQ memory pools. > > + * Change dma coherent mask to 32 bit and reallocate RDPQ > > + */ > > + _base_release_memory_pools(ioc); > > + ioc->use_32bit_dma = true; > > + if (_base_config_dma_addressing(ioc, ioc->pdev) != 0) { > > + ioc_err(ioc, > > + "32 DMA mask failed %s\n", pci_name(ioc->pdev)); > > + return -ENODEV; > > + } > > + if (base_alloc_rdpq_dma_pool(ioc, rdpq_sz)) > > + return -ENOMEM; > > + } else if (ret == -ENOMEM) > > + return -ENOMEM; > > + total_sz = rdpq_sz * (!ioc->rdpq_array_enable ? 1 : > > + DIV_ROUND_UP(ioc->reply_queue_count, RDPQ_MAX_INDEX_IN_ONE_CHUNK)); > > ioc->scsiio_depth = ioc->hba_queue_depth - > > ioc->hi_priority_depth - ioc->internal_depth; > > > > @@ -5171,7 +5231,6 @@ _base_allocate_memory_pools(struct MPT3SAS_ADAPTER *ioc) > > ioc_info(ioc, "scsi host: can_queue depth (%d)\n", > > ioc->shost->can_queue)); > > > > - > > /* contiguous pool for request and chains, 16 byte align, one extra " > > * "frame for smid=0 > > */ > > @@ -7141,6 +7200,7 @@ mpt3sas_base_attach(struct MPT3SAS_ADAPTER *ioc) > > ioc->smp_affinity_enable = smp_affinity_enable; > > > > ioc->rdpq_array_enable_assigned = 0; > > + ioc->use_32bit_dma = 0; > > if (ioc->is_aero_ioc) > > ioc->base_readl = &_base_readl_aero; > > else > > diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h > > index caae040..5a83971 100644 > > --- a/drivers/scsi/mpt3sas/mpt3sas_base.h > > +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h > > @@ -367,6 +367,7 @@ struct mpt3sas_nvme_cmd { > > #define MPT3SAS_HIGH_IOPS_REPLY_QUEUES 8 > > #define MPT3SAS_HIGH_IOPS_BATCH_COUNT 16 > > #define MPT3SAS_GEN35_MAX_MSIX_QUEUES 128 > > +#define RDPQ_MAX_INDEX_IN_ONE_CHUNK 16 > > > > /* OEM Specific Flags will come from OEM specific header files */ > > struct Mpi2ManufacturingPage10_t { > > @@ -1063,6 +1064,7 @@ typedef void (*MPT3SAS_FLUSH_RUNNING_CMDS)(struct MPT3SAS_ADAPTER *ioc); > > * @thresh_hold: Max number of reply descriptors processed > > * before updating Host Index > > * @drv_support_bitmap: driver's supported feature bit map > > + * @use_32bit_dma: Flag to use 32 bit consistent dma mask > > * @scsi_io_cb_idx: shost generated commands > > * @tm_cb_idx: task management commands > > * @scsih_cb_idx: scsih internal commands > > @@ -1252,6 +1254,7 @@ struct MPT3SAS_ADAPTER { > > u8 high_iops_queues; > > u32 drv_support_bitmap; > > bool enable_sdev_max_qd; > > + bool use_32bit_dma; > > > > /* internal commands, callback index */ > > u8 scsi_io_cb_idx; > > >