In commit 1ed850f356a0a4220, it removed those judgement in func handle_failed_stripe. But i found it can cause a waring error. When wrote data to a failed raid5 array, the kernel will print those messages: [ 381.350494] sector=2e180 i=1 (null) (null) (null) (null) 1 [ 381.350495] ------------[ cut here ]------------ [ 381.350497] WARNING: at drivers/md/raid5.c:356 get_active_stripe+0x55c/0x5b0 [raid456]() [ 381.350498] Hardware name: To Be Filled By O.E.M. [ 381.350499] Modules linked in: raid456 async_pq async_xor xor async_memcpy async_raid6_recov raid6_pq async_tx [last unloaded:raid456] [ 381.350504] Pid: 6971, comm: flush-9:0 Tainted: G W O 3.8.0-rc1+ #1 [ 381.350505] Call Trace: [ 381.350507] [<ffffffff8103a44a>] warn_slowpath_common+0x7a/0xb0 [ 381.350509] [<ffffffff8103a495>] warn_slowpath_null+0x15/0x20 [ 381.350512] [<ffffffffa00454fc>] get_active_stripe+0x55c/0x5b0 [raid456] [ 381.350514] [<ffffffffa004511d>] ? get_active_stripe+0x17d/0x5b0 [raid456] [ 381.350517] [<ffffffff8105d12b>] ? prepare_to_wait+0x5b/0x90 [ 381.350519] [<ffffffffa004c3d0>] make_request+0x1a0/0xa80 [raid456] [ 381.350521] [<ffffffff8105d280>] ? add_wait_queue+0x60/0x60 [ 381.350523] [<ffffffff814c6c66>] md_make_request+0xc6/0x200 [ 381.350526] [<ffffffff810e4a89>] ? mempool_alloc+0x59/0x160 [ 381.350528] [<ffffffff812928b2>] generic_make_request+0xc2/0x100 [ 381.350530] [<ffffffff81292955>] submit_bio+0x65/0x130 [ 381.350531] [<ffffffff8115da70>] submit_bh+0x110/0x1e0 [ 381.350533] [<ffffffff81161108>] __block_write_full_page+0x1c8/0x350 [ 381.350536] [<ffffffff8115f3b0>] ? end_buffer_async_read+0x130/0x130 [ 381.350538] [<ffffffff81163d00>] ? I_BDEV+0x10/0x10 [ 381.350540] [<ffffffff81163d00>] ? I_BDEV+0x10/0x10 [ 381.350542] [<ffffffff81161355>] block_write_full_page_endio+0xc5/0x100 [ 381.350544] [<ffffffff811613a0>] block_write_full_page+0x10/0x20 [ 381.350546] [<ffffffff811645c3>] blkdev_writepage+0x13/0x20 [ 381.350548] [<ffffffff810eb7d2>] __writepage+0x12/0x40 [ 381.350550] [<ffffffff810ebc65>] write_cache_pages+0x215/0x490 [ 381.350552] [<ffffffff810eb7c0>] ? set_page_dirty_lock+0x50/0x50 [ 381.350554] [<ffffffff810ebf25>] generic_writepages+0x45/0x70 [ 381.350556] [<ffffffff810ed6eb>] do_writepages+0x1b/0x30 [ 381.350558] [<ffffffff81154b76>] __writeback_single_inode+0x36/0x170 [ 381.350560] [<ffffffff8105ce76>] ? bit_waitqueue+0x16/0xc0 [ 381.350563] [<ffffffff81156c9b>] writeback_sb_inodes+0x19b/0x3c0 [ 381.350565] [<ffffffff81156f56>] __writeback_inodes_wb+0x96/0xc0 [ 381.350567] [<ffffffff8115717b>] wb_writeback+0x1fb/0x320 [ 381.350569] [<ffffffff810eb605>] ? global_dirtyable_memory+0x15/0x40 [ 381.350572] [<ffffffff81158808>] wb_do_writeback+0x128/0x1d0 [ 381.350574] [<ffffffff81158943>] bdi_writeback_thread+0x93/0x260 [ 381.350576] [<ffffffff811588b0>] ? wb_do_writeback+0x1d0/0x1d0 [ 381.350578] [<ffffffff8105c9bb>] kthread+0xbb/0xc0 [ 381.350580] [<ffffffff8105c900>] ? flush_kthread_work+0x120/0x120 [ 381.350582] [<ffffffff816ee0ac>] ret_from_fork+0x7c/0xb0 [ 381.350584] [<ffffffff8105c900>] ? flush_kthread_work+0x120/0x120 Because to_write did not decrease,so the to_write is larger than zero. So it will exec: > if (s.to_write && !sh->reconstruct_state && !sh->check_state) > handle_stripe_dirtying(conf, sh, &s, disks); In handle_stripe_dirtying, it will call func schedule_reconstruction. In func schedule_reconstruction, it set R5_LOCKE flag. So in func init_stripe print the message and call WARN_ONE(1). Once we discussed this problem.The info can get from http://www.spinics.net/lists/raid/msg40519.html. So i used my previous patch to resend. Reported-by: zhouqi <qi.g.zhou@xxxxxxxxx> Tested-by: zhouqi <qi.g.zhou@xxxxxxxxx> Signed-off-by: Jianpeng Ma <majianpeng@xxxxxxxxx> --- drivers/md/raid5.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 19d77a0..52ad751 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2520,8 +2520,10 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, bi = sh->dev[i].towrite; sh->dev[i].towrite = NULL; spin_unlock_irq(&sh->stripe_lock); - if (bi) + if (bi) { + s->to_write--; bitmap_end = 1; + } if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) wake_up(&conf->wait_for_overlap); @@ -2569,6 +2571,8 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, spin_unlock_irq(&sh->stripe_lock); if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) wake_up(&conf->wait_for_overlap); + if (bi) + s->to_read--; while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS) { struct bio *nextbi = @@ -3571,7 +3575,7 @@ static void handle_stripe(struct stripe_head *sh) * parity, or to satisfy requests * or to load a block that is being partially written. */ - if (s.to_read || s.non_overwrite + if (s.to_read > 0 || s.non_overwrite || (conf->level == 6 && s.to_write && s.failed) || (s.syncing && (s.uptodate + s.compute < disks)) || s.replacing @@ -3584,7 +3588,7 @@ static void handle_stripe(struct stripe_head *sh) * 2/ A 'check' operation is in flight, as it may clobber the parity * block. */ - if (s.to_write && !sh->reconstruct_state && !sh->check_state) + if (s.to_write > 0 && !sh->reconstruct_state && !sh->check_state) handle_stripe_dirtying(conf, sh, &s, disks); /* maybe we need to check and possibly fix the parity for this stripe -- 1.7.9.5 ?韬{.n?????%??檩??w?{.n???{炳盯w???塄}?财??j:+v??????2??璀??摺?囤??z夸z罐?+?????w棹f