Re: [PATCH RFC v6 08/10] megaraid_sas: switch fusion adapters to MQ

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Kashyap,


I have to add additional changes on top of your below patch -
active_queues also should be managed differently for shared tag map case. I
added extra flags in queue_flags interface which is per request.

ok, so it's not proper to use active hctx per request queue as "users", but rather that the active request_queue's per host (which is what we effectively have for nr_hw_queues = 1). Just a small comment at the bottom on your change.

So I already experimented with reducing shost.can_queue significantly on hisi_sas (by a factor of 8, from 4096->512, and I use 12x SAS SSD), and saw:

RFC + shost.can_queue reduced: ~1.2M IOPS
With RFC + shost.can_queue unmodified: ~2M IOPS
Without RFC + shost.can_queue unmodified: ~2M IOPS

And with the change, below, I still get around 1.2M IOPS. But there may be some sweet spot/zone where this makes a difference, which I'm not visiting.


Combination of your patch and below resolves fairness issues. I see some
better results using " --cpus_allowed_policy=split", but
--cpus_allowed_policy=shared

I did not try changing the cpus_allowed_policy policy, and so I would be using default, which I believe is shared.

is still having some issue and most likely it
is to do with fairness. If fairness is not managed properly, there is high
contention in wait/wakeup handling of sbitmap.

ok, I can investigate.



=====Additional patch ===

diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 3719e1a..95bcb47 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -23,10 +23,20 @@
   */
  bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
  {
-       if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) &&
-           !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
-               atomic_inc(&hctx->tags->active_queues);
+       struct blk_mq_tags *tags = hctx->tags;
+       struct request_queue *q = hctx->queue;
+       struct sbitmap_queue *bt;

+       if (blk_mq_is_sbitmap_shared(q->tag_set)){
+               bt = tags->bitmap_tags;
+               if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) &&
+                       !test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE,
&q->queue_flags))
+                       atomic_inc(&bt->active_queues_per_host);
+       } else {
+               if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) &&
+                   !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
+                       atomic_inc(&hctx->tags->active_queues);
+       }
         return true;
  }

@@ -47,12 +57,20 @@ void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags,
bool include_reserve)
  void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
  {
         struct blk_mq_tags *tags = hctx->tags;
+       struct sbitmap_queue *bt;
+       struct request_queue *q = hctx->queue;

-       if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
-               return;
-
-       atomic_dec(&tags->active_queues);
+       if (blk_mq_is_sbitmap_shared(q->tag_set)){
+               bt = tags->bitmap_tags;
+               if (!test_and_clear_bit(QUEUE_FLAG_HCTX_ACTIVE,
&q->queue_flags))
+                       return;
+               atomic_dec(&bt->active_queues_per_host);
+       } else {
+               if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
+                       return;

+               atomic_dec(&tags->active_queues);
+       }
         blk_mq_tag_wakeup_all(tags, false);
  }

@@ -65,12 +83,13 @@ static inline bool hctx_may_queue(struct
blk_mq_alloc_data *data,
  {
         struct blk_mq_hw_ctx *hctx = data->hctx;
         struct request_queue *q = data->q;
+       struct blk_mq_tags *tags = hctx->tags;
         unsigned int depth, users;

         if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED))
                 return true;
-       if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
-               return true;
+       //if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
+       //      return true;

         /*
          * Don't try dividing an ant
@@ -78,7 +97,11 @@ static inline bool hctx_may_queue(struct
blk_mq_alloc_data *data,
         if (bt->sb.depth == 1)
                 return true;

-       users = atomic_read(&hctx->tags->active_queues);
+       if (blk_mq_is_sbitmap_shared(q->tag_set)) {
+               bt = tags->bitmap_tags;
+               users = atomic_read(&bt->active_queues_per_host);
+       } else
+               users = atomic_read(&hctx->tags->active_queues);
         if (!users)
                 return true;

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 2bc9998..7049800 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -613,6 +613,7 @@ struct request_queue {
  #define QUEUE_FLAG_PCI_P2PDMA  25      /* device supports PCI p2p requests
*/
  #define QUEUE_FLAG_ZONE_RESETALL 26    /* supports Zone Reset All */
  #define QUEUE_FLAG_RQ_ALLOC_TIME 27    /* record rq->alloc_time_ns */
+#define QUEUE_FLAG_HCTX_ACTIVE 28      /* atleast one hctx is active*/

  #define QUEUE_FLAG_MQ_DEFAULT  ((1 << QUEUE_FLAG_IO_STAT) |            \
                                  (1 << QUEUE_FLAG_SAME_COMP))
diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
index e40d019..fb44925 100644
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -139,6 +139,8 @@ struct sbitmap_queue {
          * sbitmap_queue_get_shallow() or __sbitmap_queue_get_shallow().
          */
         unsigned int min_shallow_depth;
+
+       atomic_t active_queues_per_host;

It's prob better to put this in blk_mq_tag_set structure.

  };


Thanks very much,
John




[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [SCSI Target Devel]     [Linux SCSI Target Infrastructure]     [Kernel Newbies]     [IDE]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux ATA RAID]     [Linux IIO]     [Samba]     [Device Mapper]

  Powered by Linux