Re: [RFC PATCH v4 3/4] md: raid10 add nowait support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 12/13/21 5:32 PM, Song Liu wrote:
On Wed, Nov 10, 2021 at 10:15 AM Vishal Verma <vverma@xxxxxxxxxxxxxxxx> wrote:
This adds nowait support to the RAID10 driver. Very similar to
raid1 driver changes. It makes RAID10 driver return with EAGAIN
for situations where it could wait for eg:

   - Waiting for the barrier,
   - Too many pending I/Os to be queued,
   - Reshape operation,
   - Discard operation.

wait_barrier() fn is modified to return bool to support error for
wait barriers. It returns true in case of wait or if wait is not
required and returns false if wait was required but not performed
to support nowait.

Signed-off-by: Vishal Verma <vverma@xxxxxxxxxxxxxxxx>
---
  drivers/md/raid10.c | 87 +++++++++++++++++++++++++++++++--------------
  1 file changed, 60 insertions(+), 27 deletions(-)

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index dde98f65bd04..03983146d31a 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -952,8 +952,9 @@ static void lower_barrier(struct r10conf *conf)
         wake_up(&conf->wait_barrier);
  }

-static void wait_barrier(struct r10conf *conf)
+static bool wait_barrier(struct r10conf *conf, bool nowait)
  {
+       bool ret = true;
         spin_lock_irq(&conf->resync_lock);
         if (conf->barrier) {
                 struct bio_list *bio_list = current->bio_list;
@@ -968,26 +969,33 @@ static void wait_barrier(struct r10conf *conf)
                  * count down.
                  */
                 raid10_log(conf->mddev, "wait barrier");
-               wait_event_lock_irq(conf->wait_barrier,
-                                   !conf->barrier ||
-                                   (atomic_read(&conf->nr_pending) &&
-                                    bio_list &&
-                                    (!bio_list_empty(&bio_list[0]) ||
-                                     !bio_list_empty(&bio_list[1]))) ||
-                                    /* move on if recovery thread is
-                                     * blocked by us
-                                     */
-                                    (conf->mddev->thread->tsk == current &&
-                                     test_bit(MD_RECOVERY_RUNNING,
-                                              &conf->mddev->recovery) &&
-                                     conf->nr_queued > 0),
-                                   conf->resync_lock);
+               /* Return false when nowait flag is set */
+               if (nowait)
+                       ret = false;
+               else
+                       wait_event_lock_irq(conf->wait_barrier,
+                                           !conf->barrier ||
+                                           (atomic_read(&conf->nr_pending) &&
+                                            bio_list &&
+                                            (!bio_list_empty(&bio_list[0]) ||
+                                             !bio_list_empty(&bio_list[1]))) ||
+                                            /* move on if recovery thread is
+                                             * blocked by us
+                                             */
+                                            (conf->mddev->thread->tsk == current &&
+                                             test_bit(MD_RECOVERY_RUNNING,
+                                                      &conf->mddev->recovery) &&
+                                             conf->nr_queued > 0),
+                                           conf->resync_lock);
                 conf->nr_waiting--;
                 if (!conf->nr_waiting)
                         wake_up(&conf->wait_barrier);
         }
-       atomic_inc(&conf->nr_pending);
+       /* Only increment nr_pending when we wait */
+       if (ret)
+               atomic_inc(&conf->nr_pending);
         spin_unlock_irq(&conf->resync_lock);
+       return ret;
  }
I guess something like this would simplify the code:

static bool wait_barrier(struct r10conf *conf, bool nowait)
{
         bool ret = true;
         spin_lock_irq(&conf->resync_lock);
         if (conf->barrier) {
                 struct bio_list *bio_list = current->bio_list;

                 if (nowait) {
                         spin_unlock_irq(&conf->resync_lock);
                         return false;
                 }
Ack, makes sense to me.
  static void allow_barrier(struct r10conf *conf)
@@ -1101,17 +1109,25 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
  static void regular_request_wait(struct mddev *mddev, struct r10conf *conf,
                                  struct bio *bio, sector_t sectors)
  {
-       wait_barrier(conf);
+       /* Bail out if REQ_NOWAIT is set for the bio */
+       if (!wait_barrier(conf, bio->bi_opf & REQ_NOWAIT)) {
+               bio_wouldblock_error(bio);
+               return;
+       }
         while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
             bio->bi_iter.bi_sector < conf->reshape_progress &&
             bio->bi_iter.bi_sector + sectors > conf->reshape_progress) {
                 raid10_log(conf->mddev, "wait reshape");
+               if (bio->bi_opf & REQ_NOWAIT) {
+                       bio_wouldblock_error(bio);
+                       return;
+               }
                 allow_barrier(conf);
                 wait_event(conf->wait_barrier,
                            conf->reshape_progress <= bio->bi_iter.bi_sector ||
                            conf->reshape_progress >= bio->bi_iter.bi_sector +
                            sectors);
-               wait_barrier(conf);
+               wait_barrier(conf, false);
         }
  }

@@ -1179,7 +1195,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
                 bio_chain(split, bio);
                 allow_barrier(conf);
                 submit_bio_noacct(bio);
-               wait_barrier(conf);
+               wait_barrier(conf, false);
                 bio = split;
                 r10_bio->master_bio = bio;
                 r10_bio->sectors = max_sectors;
@@ -1338,7 +1354,7 @@ static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio)
                 raid10_log(conf->mddev, "%s wait rdev %d blocked",
                                 __func__, blocked_rdev->raid_disk);
                 md_wait_for_blocked_rdev(blocked_rdev, mddev);
-               wait_barrier(conf);
+               wait_barrier(conf, false);
                 goto retry_wait;
         }
  }
@@ -1357,6 +1373,11 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
                                             bio_end_sector(bio)))) {
                 DEFINE_WAIT(w);
                 for (;;) {
+                       /* Bail out if REQ_NOWAIT is set for the bio */
+                       if (bio->bi_opf & REQ_NOWAIT) {
+                               bio_wouldblock_error(bio);
+                               return;
+                       }
                         prepare_to_wait(&conf->wait_barrier,
                                         &w, TASK_IDLE);
                         if (!md_cluster_ops->area_resyncing(mddev, WRITE,
@@ -1381,6 +1402,10 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
                               BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_PENDING));
                 md_wakeup_thread(mddev->thread);
                 raid10_log(conf->mddev, "wait reshape metadata");
+               if (bio->bi_opf & REQ_NOWAIT) {
+                       bio_wouldblock_error(bio);
+                       return;
+               }
                 wait_event(mddev->sb_wait,
                            !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));

@@ -1390,6 +1415,10 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
         if (conf->pending_count >= max_queued_requests) {
                 md_wakeup_thread(mddev->thread);
                 raid10_log(mddev, "wait queued");
+               if (bio->bi_opf & REQ_NOWAIT) {
+                       bio_wouldblock_error(bio);
+                       return;
+               }
                 wait_event(conf->wait_barrier,
                            conf->pending_count < max_queued_requests);
         }
@@ -1482,7 +1511,7 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
                 bio_chain(split, bio);
                 allow_barrier(conf);
                 submit_bio_noacct(bio);
-               wait_barrier(conf);
+               wait_barrier(conf, false);
                 bio = split;
                 r10_bio->master_bio = bio;
         }
@@ -1607,7 +1636,11 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
         if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
                 return -EAGAIN;

-       wait_barrier(conf);
+       if (bio->bi_opf & REQ_NOWAIT) {
+               bio_wouldblock_error(bio);
+               return 0;
+       }
Does this mean we always bail out on discard?
Yeah, I wanted your feedback specifically for this case. I see just after this is when wait_barrier() happens
+       wait_barrier(conf, false);

         /*
          * Check reshape again to avoid reshape happens after checking
@@ -1649,7 +1682,7 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
                 allow_barrier(conf);
                 /* Resend the fist split part */
                 submit_bio_noacct(split);
-               wait_barrier(conf);
+               wait_barrier(conf, false);
         }
         div_u64_rem(bio_end, stripe_size, &remainder);
         if (remainder) {
@@ -1660,7 +1693,7 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
                 /* Resend the second split part */
                 submit_bio_noacct(bio);
                 bio = split;
-               wait_barrier(conf);
+               wait_barrier(conf, false);
         }

         bio_start = bio->bi_iter.bi_sector;
@@ -1816,7 +1849,7 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
                 end_disk_offset += geo->stride;
                 atomic_inc(&first_r10bio->remaining);
                 raid_end_discard_bio(r10_bio);
-               wait_barrier(conf);
+               wait_barrier(conf, false);
                 goto retry_discard;
         }

@@ -2011,7 +2044,7 @@ static void print_conf(struct r10conf *conf)

  static void close_sync(struct r10conf *conf)
  {
-       wait_barrier(conf);
+       wait_barrier(conf, false);
         allow_barrier(conf);

         mempool_exit(&conf->r10buf_pool);
@@ -4819,7 +4852,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
         if (need_flush ||
             time_after(jiffies, conf->reshape_checkpoint + 10*HZ)) {
                 /* Need to update reshape_position in metadata */
-               wait_barrier(conf);
+               wait_barrier(conf, false);
                 mddev->reshape_position = conf->reshape_progress;
                 if (mddev->reshape_backwards)
                         mddev->curr_resync_completed = raid10_size(mddev, 0, 0)
--
2.17.1




[Index of Archives]     [Linux RAID Wiki]     [ATA RAID]     [Linux SCSI Target Infrastructure]     [Linux Block]     [Linux IDE]     [Linux SCSI]     [Linux Hams]     [Device Mapper]     [Device Mapper Cryptographics]     [Kernel]     [Linux Admin]     [Linux Net]     [GFS]     [RPM]     [git]     [Yosemite Forum]


  Powered by Linux