On 2022-09-02 02:14, Yu Kuai wrote: > Can you try the following patch? I'm running mdadm tests myself and I > didn't see any problems yet. Yes, that patch seems to fix the issue. However, may I suggest we do this without trying to introduce new helpers into wait.h? I suspect that could result in a fair amount of bike shedding and delay. wait_event_cmd() is often used in situations where a specific lock type doesn't have a helper. My stab at it is in a diff below which also fixes the bug. I'd also recommend somebody clean up that nasty condition in wait_barrier(). Put it into an appropriately named function with some comments. As is, it is pretty much unreadable. Logan -- diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 0e3229ee1ebc..ae297bc870bd 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -934,22 +934,26 @@ static void flush_pending_writes(struct r10conf *conf) * lower_barrier when the particular background IO completes. */ +#define wait_event_barrier_cmd(conf, cond, cmd) \ + wait_event_cmd((conf)->wait_barrier, cond, \ + write_sequnlock_irq(&(conf)->resync_lock); cmd, \ + write_seqlock_irq(&(conf)->resync_lock)) +#define wait_event_barrier(conf, cond) wait_event_barrier_cmd(conf, cond, ) + static void raise_barrier(struct r10conf *conf, int force) { write_seqlock_irq(&conf->resync_lock); BUG_ON(force && !conf->barrier); /* Wait until no block IO is waiting (unless 'force') */ - wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting, - conf->resync_lock.lock); + wait_event_barrier(conf, force || !conf->nr_waiting); /* block any new IO from starting */ WRITE_ONCE(conf->barrier, conf->barrier + 1); /* Now wait for all pending IO to complete */ - wait_event_lock_irq(conf->wait_barrier, - !atomic_read(&conf->nr_pending) && conf->barrier < RESYNC_DEPTH, - conf->resync_lock.lock); + wait_event_barrier(conf, !atomic_read(&conf->nr_pending) && + conf->barrier < RESYNC_DEPTH); write_sequnlock_irq(&conf->resync_lock); } @@ -1007,20 +1011,19 @@ static bool wait_barrier(struct r10conf *conf, bool nowait) ret = false; } else { raid10_log(conf->mddev, "wait barrier"); - wait_event_lock_irq(conf->wait_barrier, - !conf->barrier || - (atomic_read(&conf->nr_pending) && - bio_list && - (!bio_list_empty(&bio_list[0]) || - !bio_list_empty(&bio_list[1]))) || + wait_event_barrier(conf, + !conf->barrier || + (atomic_read(&conf->nr_pending) && + bio_list && + (!bio_list_empty(&bio_list[0]) || + !bio_list_empty(&bio_list[1]))) || /* move on if recovery thread is * blocked by us */ - (conf->mddev->thread->tsk == current && - test_bit(MD_RECOVERY_RUNNING, - &conf->mddev->recovery) && - conf->nr_queued > 0), - conf->resync_lock.lock); + (conf->mddev->thread->tsk == current && + test_bit(MD_RECOVERY_RUNNING, + &conf->mddev->recovery) && + conf->nr_queued > 0)); } conf->nr_waiting--; if (!conf->nr_waiting) @@ -1058,10 +1061,9 @@ static void freeze_array(struct r10conf *conf, int extra) conf->array_freeze_pending++; WRITE_ONCE(conf->barrier, conf->barrier + 1); conf->nr_waiting++; - wait_event_lock_irq_cmd(conf->wait_barrier, - atomic_read(&conf->nr_pending) == conf->nr_queued+extra, - conf->resync_lock.lock, - flush_pending_writes(conf)); + wait_event_barrier_cmd(conf, + atomic_read(&conf->nr_pending) == conf->nr_queued+extra, + flush_pending_writes(conf)); conf->array_freeze_pending--; write_sequnlock_irq(&conf->resync_lock);