Re: [PATCH v2 4/4] md/raid10: convert resync_lock to use seqlock

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

在 2022/09/14 9:49, Yu Kuai 写道:
From: Yu Kuai <yukuai3@xxxxxxxxxx>

Currently, wait_barrier() will hold 'resync_lock' to read 'conf->barrier',
and io can't be dispatched until 'barrier' is dropped.

Since holding the 'barrier' is not common, convert 'resync_lock' to use
seqlock so that holding lock can be avoided in fast path.

Signed-off-by: Yu Kuai <yukuai3@xxxxxxxxxx>
---
  drivers/md/raid10.c | 85 +++++++++++++++++++++++++++++----------------
  drivers/md/raid10.h |  2 +-
  2 files changed, 57 insertions(+), 30 deletions(-)

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 377d4641bb54..6c2396fe75a0 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -79,6 +79,21 @@ static void end_reshape(struct r10conf *conf);
#include "raid1-10.c" +#define NULL_CMD
+#define cmd_before(conf, cmd) \
+	do { \
+		write_sequnlock_irq(&(conf)->resync_lock); \
+		cmd; \
+	} while (0)
+#define cmd_after(conf) write_seqlock_irq(&(conf)->resync_lock)
+
+#define wait_event_barrier_cmd(conf, cond, cmd) \
+	wait_event_cmd((conf)->wait_barrier, cond, cmd_before(conf, cmd), \
+		       cmd_after(conf))
+
+#define wait_event_barrier(conf, cond) \
+	wait_event_barrier_cmd(conf, cond, NULL_CMD)
+
  /*
   * for resync bio, r10bio pointer can be retrieved from the per-bio
   * 'struct resync_pages'.
@@ -936,30 +951,29 @@ static void flush_pending_writes(struct r10conf *conf)
static void raise_barrier(struct r10conf *conf, int force)
  {
-	spin_lock_irq(&conf->resync_lock);
+	write_seqlock_irq(&conf->resync_lock);
  	BUG_ON(force && !conf->barrier);
/* Wait until no block IO is waiting (unless 'force') */
-	wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting,
-			    conf->resync_lock);
+	wait_event_barrier(conf, force || !conf->nr_waiting);
/* block any new IO from starting */
-	conf->barrier++;
+	WRITE_ONCE(conf->barrier, conf->barrier + 1);
/* Now wait for all pending IO to complete */
-	wait_event_lock_irq(conf->wait_barrier,
-			    !atomic_read(&conf->nr_pending) && conf->barrier < RESYNC_DEPTH,
-			    conf->resync_lock);
+	wait_event_barrier(conf, !atomic_read(&conf->nr_pending) &&
+				 conf->barrier < RESYNC_DEPTH);
- spin_unlock_irq(&conf->resync_lock);
+	write_sequnlock_irq(&conf->resync_lock);
  }
static void lower_barrier(struct r10conf *conf)
  {
  	unsigned long flags;
-	spin_lock_irqsave(&conf->resync_lock, flags);
-	conf->barrier--;
-	spin_unlock_irqrestore(&conf->resync_lock, flags);
+
+	write_seqlock_irqsave(&conf->resync_lock, flags);
+	WRITE_ONCE(conf->barrier, conf->barrier - 1);
+	write_sequnlock_irqrestore(&conf->resync_lock, flags);
  	wake_up_barrier(conf);
  }
@@ -992,11 +1006,29 @@ static bool stop_waiting_barrier(struct r10conf *conf)
  	return false;
  }
+static bool wait_barrier_nolock(struct r10conf *conf)
+{
+	unsigned int seq = read_seqbegin(&conf->resync_lock);
+
+	if (READ_ONCE(conf->barrier))
+		return false;
+
+	atomic_inc(&conf->nr_pending);
+	if (!read_seqretry(&conf->resync_lock, seq))
+		return true;
+
+	atomic_dec(&conf->nr_pending);

During pressure test, I found that this is problematic, raise_barrier()
can wait for nr_pending to be zero, and the increase and decrease here
will cause raise_barrier() hang if nr_pending is decreased to 0 here.

I'll send to new version to fix this.

Thanks,
Kuai
+	return false;
+}
+
  static bool wait_barrier(struct r10conf *conf, bool nowait)
  {
  	bool ret = true;
- spin_lock_irq(&conf->resync_lock);
+	if (wait_barrier_nolock(conf))
+		return true;
+
+	write_seqlock_irq(&conf->resync_lock);
  	if (conf->barrier) {
  		/* Return false when nowait flag is set */
  		if (nowait) {
@@ -1004,9 +1036,7 @@ static bool wait_barrier(struct r10conf *conf, bool nowait)
  		} else {
  			conf->nr_waiting++;
  			raid10_log(conf->mddev, "wait barrier");
-			wait_event_lock_irq(conf->wait_barrier,
-					    stop_waiting_barrier(conf),
-					    conf->resync_lock);
+			wait_event_barrier(conf, stop_waiting_barrier(conf));
  			conf->nr_waiting--;
  		}
  		if (!conf->nr_waiting)
@@ -1015,7 +1045,7 @@ static bool wait_barrier(struct r10conf *conf, bool nowait)
  	/* Only increment nr_pending when we wait */
  	if (ret)
  		atomic_inc(&conf->nr_pending);
-	spin_unlock_irq(&conf->resync_lock);
+	write_sequnlock_irq(&conf->resync_lock);
  	return ret;
  }
@@ -1040,27 +1070,24 @@ static void freeze_array(struct r10conf *conf, int extra)
  	 * must match the number of pending IOs (nr_pending) before
  	 * we continue.
  	 */
-	spin_lock_irq(&conf->resync_lock);
+	write_seqlock_irq(&conf->resync_lock);
  	conf->array_freeze_pending++;
-	conf->barrier++;
+	WRITE_ONCE(conf->barrier, conf->barrier + 1);
  	conf->nr_waiting++;
-	wait_event_lock_irq_cmd(conf->wait_barrier,
-				atomic_read(&conf->nr_pending) == conf->nr_queued+extra,
-				conf->resync_lock,
-				flush_pending_writes(conf));
-
+	wait_event_barrier_cmd(conf, atomic_read(&conf->nr_pending) ==
+			conf->nr_queued + extra, flush_pending_writes(conf));
  	conf->array_freeze_pending--;
-	spin_unlock_irq(&conf->resync_lock);
+	write_sequnlock_irq(&conf->resync_lock);
  }
static void unfreeze_array(struct r10conf *conf)
  {
  	/* reverse the effect of the freeze */
-	spin_lock_irq(&conf->resync_lock);
-	conf->barrier--;
+	write_seqlock_irq(&conf->resync_lock);
+	WRITE_ONCE(conf->barrier, conf->barrier - 1);
  	conf->nr_waiting--;
  	wake_up_barrier(conf);
-	spin_unlock_irq(&conf->resync_lock);
+	write_sequnlock_irq(&conf->resync_lock);
  }
static sector_t choose_data_offset(struct r10bio *r10_bio,
@@ -4046,7 +4073,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
  	INIT_LIST_HEAD(&conf->retry_list);
  	INIT_LIST_HEAD(&conf->bio_end_io_list);
- spin_lock_init(&conf->resync_lock);
+	seqlock_init(&conf->resync_lock);
  	init_waitqueue_head(&conf->wait_barrier);
  	atomic_set(&conf->nr_pending, 0);
@@ -4365,7 +4392,7 @@ static void *raid10_takeover_raid0(struct mddev *mddev, sector_t size, int devs)
  				rdev->new_raid_disk = rdev->raid_disk * 2;
  				rdev->sectors = size;
  			}
-		conf->barrier = 1;
+		WRITE_ONCE(conf->barrier, 1);
  	}
return conf;
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h
index 5c0804d8bb1f..8c072ce0bc54 100644
--- a/drivers/md/raid10.h
+++ b/drivers/md/raid10.h
@@ -76,7 +76,7 @@ struct r10conf {
  	/* queue pending writes and submit them on unplug */
  	struct bio_list		pending_bio_list;
- spinlock_t resync_lock;
+	seqlock_t		resync_lock;
  	atomic_t		nr_pending;
  	int			nr_waiting;
  	int			nr_queued;





[Index of Archives]     [Linux RAID Wiki]     [ATA RAID]     [Linux SCSI Target Infrastructure]     [Linux Block]     [Linux IDE]     [Linux SCSI]     [Linux Hams]     [Device Mapper]     [Device Mapper Cryptographics]     [Kernel]     [Linux Admin]     [Linux Net]     [GFS]     [RPM]     [git]     [Yosemite Forum]


  Powered by Linux