Hi,
在 2023/11/02 7:02, Junxiao Bi 写道:
Looks like there is a race between md_write_start() and raid5d() which
Is this a real issue or just based on code review?
can cause deadlock. Run into this issue while raid_check is running.
md_write_start: raid5d:
if (mddev->safemode == 1)
mddev->safemode = 0;
/* sync_checkers is always 0 when writes_pending is in per-cpu mode */
if (mddev->in_sync || mddev->sync_checkers) {
spin_lock(&mddev->lock);
if (mddev->in_sync) {
mddev->in_sync = 0;
set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
set_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
>>> running before md_write_start wake up it
if (mddev->sb_flags & ~(1 << MD_SB_CHANGE_PENDING)) {
spin_unlock_irq(&conf->device_lock);
md_check_recovery(mddev);
spin_lock_irq(&conf->device_lock);
/*
* Waiting on MD_SB_CHANGE_PENDING below may deadlock
* seeing md_check_recovery() is needed to clear
* the flag when using mdmon.
*/
continue;
}
wait_event_lock_irq(mddev->sb_wait, >>>>>>>>>>> hung
!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags),
conf->device_lock);
md_wakeup_thread(mddev->thread);
did_change = 1;
}
spin_unlock(&mddev->lock);
}
...
wait_event(mddev->sb_wait, >>>>>>>>>> hung
!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) ||
mddev->suspended);
This is not correct, if daemon thread is running, md_wakeup_thread()
will make sure that daemon thread will run again, see details how
THREAD_WAKEUP worked in md_thread().
Thanks,
Kuai
commit 5e2cf333b7bd ("md/raid5: Wait for MD_SB_CHANGE_PENDING in raid5d")
introduced this issue, since it want to a reschedule for flushing blk_plug,
let do it explicitly to address that issue.
Fixes: 5e2cf333b7bd ("md/raid5: Wait for MD_SB_CHANGE_PENDING in raid5d")
Signed-off-by: Junxiao Bi <junxiao.bi@xxxxxxxxxx>
---
block/blk-core.c | 1 +
drivers/md/raid5.c | 9 +++++----
2 files changed, 6 insertions(+), 4 deletions(-)
diff --git a/block/blk-core.c b/block/blk-core.c
index 9d51e9894ece..bc8757a78477 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1149,6 +1149,7 @@ void __blk_flush_plug(struct blk_plug *plug, bool from_schedule)
if (unlikely(!rq_list_empty(plug->cached_rq)))
blk_mq_free_plug_rqs(plug);
}
+EXPORT_SYMBOL(__blk_flush_plug);
/**
* blk_finish_plug - mark the end of a batch of submitted I/O
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 284cd71bcc68..25ec82f2813f 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -6850,11 +6850,12 @@ static void raid5d(struct md_thread *thread)
* the flag when using mdmon.
*/
continue;
+ } else {
+ spin_unlock_irq(&conf->device_lock);
+ blk_flush_plug(current);
+ cond_resched();
+ spin_lock_irq(&conf->device_lock);
}
-
- wait_event_lock_irq(mddev->sb_wait,
- !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags),
- conf->device_lock);
}
pr_debug("%d stripes handled\n", handled);