RE: [PATCH V4 1/4] scsi: hpsa: fix selection of reply queue

Don Brace <don.brace@xxxxxxxxxxxxx> · Mon, 12 Mar 2018 15:39:19 +0000



> -----Original Message-----
> From: Ming Lei [mailto:ming.lei@xxxxxxxxxx]
> Sent: Thursday, March 08, 2018 9:32 PM
> To: James Bottomley <James.Bottomley@xxxxxxxxxxxxxxxxxxxxx>; Jens Axboe
> <axboe@xxxxxx>; Martin K . Petersen <martin.petersen@xxxxxxxxxx>
> Cc: Christoph Hellwig <hch@xxxxxx>; linux-scsi@xxxxxxxxxxxxxxx; linux-
> block@xxxxxxxxxxxxxxx; Meelis Roos <mroos@xxxxxxxx>; Don Brace
> <don.brace@xxxxxxxxxxxxx>; Kashyap Desai
> <kashyap.desai@xxxxxxxxxxxx>; Laurence Oberman
> <loberman@xxxxxxxxxx>; Mike Snitzer <snitzer@xxxxxxxxxx>; Ming Lei
> <ming.lei@xxxxxxxxxx>; Hannes Reinecke <hare@xxxxxxx>; James Bottomley
> <james.bottomley@xxxxxxxxxxxxxxxxxxxxx>; Artem Bityutskiy
> <artem.bityutskiy@xxxxxxxxx>
> Subject: [PATCH V4 1/4] scsi: hpsa: fix selection of reply queue
> 
> EXTERNAL EMAIL
> 
> 
> From 84676c1f21 (genirq/affinity: assign vectors to all possible CPUs),
> one msix vector can be created without any online CPU mapped, then one
> command's completion may not be notified.
> 
> This patch setups mapping between cpu and reply queue according to irq
> affinity info retrived by pci_irq_get_affinity(), and uses this mapping
> table to choose reply queue for queuing one command.
> 
> Then the chosen reply queue has to be active, and fixes IO hang caused
> by using inactive reply queue which doesn't have any online CPU mapped.
> 
> Cc: Hannes Reinecke <hare@xxxxxxx>
> Cc: "Martin K. Petersen" <martin.petersen@xxxxxxxxxx>,
> Cc: James Bottomley <james.bottomley@xxxxxxxxxxxxxxxxxxxxx>,
> Cc: Christoph Hellwig <hch@xxxxxx>,
> Cc: Don Brace <don.brace@xxxxxxxxxxxxx>
> Cc: Kashyap Desai <kashyap.desai@xxxxxxxxxxxx>
> Cc: Laurence Oberman <loberman@xxxxxxxxxx>
> Cc: Meelis Roos <mroos@xxxxxxxx>
> Cc: Artem Bityutskiy <artem.bityutskiy@xxxxxxxxx>
> Cc: Mike Snitzer <snitzer@xxxxxxxxxx>
> Tested-by: Laurence Oberman <loberman@xxxxxxxxxx>
> Tested-by: Don Brace <don.brace@xxxxxxxxxxxxx>
> Fixes: 84676c1f21e8 ("genirq/affinity: assign vectors to all possible CPUs")
> Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxx>
> ---

Acked-by: Don Brace <don.brace@xxxxxxxxxxxxx>
Tested-by: Don Brace <don.brace@xxxxxxxxxxxxx>
   * Rebuilt test rig: applied the following patches to Linus's tree 4.16.0-rc4+:
                 [PATCH V4 1_4] scsi: hpsa: fix selection of reply queue - Ming Lei <ming.lei@xxxxxxxxxx> - 2018-03-08 2132.eml
                 [PATCH V4 3_4] scsi: introduce force_blk_mq - Ming Lei <ming.lei@xxxxxxxxxx> - 2018-03-08 2132.eml
        * fio tests on 6 LVs on P441 controller (fw 6.59) 5 days.
        * fio tests on 10 HBA disks on P431 (fw 4.54) controller. 3 days. ( concurrent with P441 tests)

>  drivers/scsi/hpsa.c | 73 +++++++++++++++++++++++++++++++++++++++--------------
>  drivers/scsi/hpsa.h |  1 +
>  2 files changed, 55 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
> index 5293e6827ce5..3a9eca163db8 100644
> --- a/drivers/scsi/hpsa.c
> +++ b/drivers/scsi/hpsa.c
> @@ -1045,11 +1045,7 @@ static void set_performant_mode(struct ctlr_info
> *h, struct CommandList *c,
>                 c->busaddr |= 1 | (h->blockFetchTable[c->Header.SGList] << 1);
>                 if (unlikely(!h->msix_vectors))
>                         return;
> -               if (likely(reply_queue == DEFAULT_REPLY_QUEUE))
> -                       c->Header.ReplyQueue =
> -                               raw_smp_processor_id() % h->nreply_queues;
> -               else
> -                       c->Header.ReplyQueue = reply_queue % h->nreply_queues;
> +               c->Header.ReplyQueue = reply_queue;
>         }
>  }
> 
> @@ -1063,10 +1059,7 @@ static void set_ioaccel1_performant_mode(struct
> ctlr_info *h,
>          * Tell the controller to post the reply to the queue for this
>          * processor.  This seems to give the best I/O throughput.
>          */
> -       if (likely(reply_queue == DEFAULT_REPLY_QUEUE))
> -               cp->ReplyQueue = smp_processor_id() % h->nreply_queues;
> -       else
> -               cp->ReplyQueue = reply_queue % h->nreply_queues;
> +       cp->ReplyQueue = reply_queue;
>         /*
>          * Set the bits in the address sent down to include:
>          *  - performant mode bit (bit 0)
> @@ -1087,10 +1080,7 @@ static void
> set_ioaccel2_tmf_performant_mode(struct ctlr_info *h,
>         /* Tell the controller to post the reply to the queue for this
>          * processor.  This seems to give the best I/O throughput.
>          */
> -       if (likely(reply_queue == DEFAULT_REPLY_QUEUE))
> -               cp->reply_queue = smp_processor_id() % h->nreply_queues;
> -       else
> -               cp->reply_queue = reply_queue % h->nreply_queues;
> +       cp->reply_queue = reply_queue;
>         /* Set the bits in the address sent down to include:
>          *  - performant mode bit not used in ioaccel mode 2
>          *  - pull count (bits 0-3)
> @@ -1109,10 +1099,7 @@ static void set_ioaccel2_performant_mode(struct
> ctlr_info *h,
>          * Tell the controller to post the reply to the queue for this
>          * processor.  This seems to give the best I/O throughput.
>          */
> -       if (likely(reply_queue == DEFAULT_REPLY_QUEUE))
> -               cp->reply_queue = smp_processor_id() % h->nreply_queues;
> -       else
> -               cp->reply_queue = reply_queue % h->nreply_queues;
> +       cp->reply_queue = reply_queue;
>         /*
>          * Set the bits in the address sent down to include:
>          *  - performant mode bit not used in ioaccel mode 2
> @@ -1157,6 +1144,8 @@ static void __enqueue_cmd_and_start_io(struct
> ctlr_info *h,
>  {
>         dial_down_lockup_detection_during_fw_flash(h, c);
>         atomic_inc(&h->commands_outstanding);
> +
> +       reply_queue = h->reply_map[raw_smp_processor_id()];
>         switch (c->cmd_type) {
>         case CMD_IOACCEL1:
>                 set_ioaccel1_performant_mode(h, c, reply_queue);
> @@ -7376,6 +7365,26 @@ static void hpsa_disable_interrupt_mode(struct
> ctlr_info *h)
>         h->msix_vectors = 0;
>  }
> 
> +static void hpsa_setup_reply_map(struct ctlr_info *h)
> +{
> +       const struct cpumask *mask;
> +       unsigned int queue, cpu;
> +
> +       for (queue = 0; queue < h->msix_vectors; queue++) {
> +               mask = pci_irq_get_affinity(h->pdev, queue);
> +               if (!mask)
> +                       goto fallback;
> +
> +               for_each_cpu(cpu, mask)
> +                       h->reply_map[cpu] = queue;
> +       }
> +       return;
> +
> +fallback:
> +       for_each_possible_cpu(cpu)
> +               h->reply_map[cpu] = 0;
> +}
> +
>  /* If MSI/MSI-X is supported by the kernel we will try to enable it on
>   * controllers that are capable. If not, we use legacy INTx mode.
>   */
> @@ -7771,6 +7780,10 @@ static int hpsa_pci_init(struct ctlr_info *h)
>         err = hpsa_interrupt_mode(h);
>         if (err)
>                 goto clean1;
> +
> +       /* setup mapping between CPU and reply queue */
> +       hpsa_setup_reply_map(h);
> +
>         err = hpsa_pci_find_memory_BAR(h->pdev, &h->paddr);
>         if (err)
>                 goto clean2;    /* intmode+region, pci */
> @@ -8480,6 +8493,28 @@ static struct workqueue_struct
> *hpsa_create_controller_wq(struct ctlr_info *h,
>         return wq;
>  }
> 
> +static void hpda_free_ctlr_info(struct ctlr_info *h)
> +{
> +       kfree(h->reply_map);
> +       kfree(h);
> +}
> +
> +static struct ctlr_info *hpda_alloc_ctlr_info(void)
> +{
> +       struct ctlr_info *h;
> +
> +       h = kzalloc(sizeof(*h), GFP_KERNEL);
> +       if (!h)
> +               return NULL;
> +
> +       h->reply_map = kzalloc(sizeof(*h->reply_map) * nr_cpu_ids, GFP_KERNEL);
> +       if (!h->reply_map) {
> +               kfree(h);
> +               return NULL;
> +       }
> +       return h;
> +}
> +
>  static int hpsa_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
>  {
>         int dac, rc;
> @@ -8517,7 +8552,7 @@ static int hpsa_init_one(struct pci_dev *pdev, const
> struct pci_device_id *ent)
>          * the driver.  See comments in hpsa.h for more info.
>          */
>         BUILD_BUG_ON(sizeof(struct CommandList) %
> COMMANDLIST_ALIGNMENT);
> -       h = kzalloc(sizeof(*h), GFP_KERNEL);
> +       h = hpda_alloc_ctlr_info();
>         if (!h) {
>                 dev_err(&pdev->dev, "Failed to allocate controller head\n");
>                 return -ENOMEM;
> @@ -8916,7 +8951,7 @@ static void hpsa_remove_one(struct pci_dev *pdev)
>         h->lockup_detected = NULL;                      /* init_one 2 */
>         /* (void) pci_disable_pcie_error_reporting(pdev); */    /* init_one 1 */
> 
> -       kfree(h);                                       /* init_one 1 */
> +       hpda_free_ctlr_info(h);                         /* init_one 1 */
>  }
> 
>  static int hpsa_suspend(__attribute__((unused)) struct pci_dev *pdev,
> diff --git a/drivers/scsi/hpsa.h b/drivers/scsi/hpsa.h
> index 018f980a701c..fb9f5e7f8209 100644
> --- a/drivers/scsi/hpsa.h
> +++ b/drivers/scsi/hpsa.h
> @@ -158,6 +158,7 @@ struct bmic_controller_parameters {
>  #pragma pack()
> 
>  struct ctlr_info {
> +       unsigned int *reply_map;
>         int     ctlr;
>         char    devname[8];
>         char    *product_name;
> --
> 2.9.5