The patch titled md: improve locking around error handling has been added to the -mm tree. Its filename is md-improve-locking-around-error-handling.patch See http://www.zip.com.au/~akpm/linux/patches/stuff/added-to-mm.txt to find out what to do about this ------------------------------------------------------ Subject: md: improve locking around error handling From: NeilBrown <neilb@xxxxxxx> The error handling routines don't use proper locking, and so two concurrent errors could trigger a problem. So: - use test-and-set and test-and-clear to synchonise the In_sync bits with the ->degraded count - use the spinlock to protect updates to the degraded count (could use an atomic_t but that would be a bigger change in code, and isn't really justified) - remove un-necessary locking in raid5 Signed-off-by: Neil Brown <neilb@xxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxx> --- drivers/md/raid1.c | 16 +++++++++++----- drivers/md/raid10.c | 12 ++++++++---- drivers/md/raid5.c | 20 ++++++++++++-------- 3 files changed, 31 insertions(+), 17 deletions(-) diff -puN drivers/md/raid10.c~md-improve-locking-around-error-handling drivers/md/raid10.c --- a/drivers/md/raid10.c~md-improve-locking-around-error-handling +++ a/drivers/md/raid10.c @@ -950,14 +950,16 @@ static void error(mddev_t *mddev, mdk_rd * really dead" tests... */ return; - if (test_bit(In_sync, &rdev->flags)) { + if (test_and_clear_bit(In_sync, &rdev->flags)) { + unsigned long flags; + spin_lock_irqsave(&conf->device_lock, flags); mddev->degraded++; + spin_unlock_irqrestore(&conf->device_lock, flags); /* * if recovery is running, make sure it aborts. */ set_bit(MD_RECOVERY_ERR, &mddev->recovery); } - clear_bit(In_sync, &rdev->flags); set_bit(Faulty, &rdev->flags); set_bit(MD_CHANGE_DEVS, &mddev->flags); printk(KERN_ALERT "raid10: Disk failure on %s, disabling device. \n" @@ -1033,9 +1035,11 @@ static int raid10_spare_active(mddev_t * tmp = conf->mirrors + i; if (tmp->rdev && !test_bit(Faulty, &tmp->rdev->flags) - && !test_bit(In_sync, &tmp->rdev->flags)) { + && !test_and_set_bit(In_sync, &tmp->rdev->flags)) { + unsigned long flags; + spin_lock_irqsave(&conf->device_lock, flags); mddev->degraded--; - set_bit(In_sync, &tmp->rdev->flags); + spin_unlock_irqrestore(&conf->device_lock, flags); } } diff -puN drivers/md/raid1.c~md-improve-locking-around-error-handling drivers/md/raid1.c --- a/drivers/md/raid1.c~md-improve-locking-around-error-handling +++ a/drivers/md/raid1.c @@ -956,14 +956,16 @@ static void error(mddev_t *mddev, mdk_rd * normal single drive */ return; - if (test_bit(In_sync, &rdev->flags)) { + if (test_and_clear_bit(In_sync, &rdev->flags)) { + unsigned long flags; + spin_lock_irqsave(&conf->device_lock, flags); mddev->degraded++; + spin_unlock_irqrestore(&conf->device_lock, flags); /* * if recovery is running, make sure it aborts. */ set_bit(MD_RECOVERY_ERR, &mddev->recovery); } - clear_bit(In_sync, &rdev->flags); set_bit(Faulty, &rdev->flags); set_bit(MD_CHANGE_DEVS, &mddev->flags); printk(KERN_ALERT "raid1: Disk failure on %s, disabling device. \n" @@ -1017,9 +1019,11 @@ static int raid1_spare_active(mddev_t *m tmp = conf->mirrors + i; if (tmp->rdev && !test_bit(Faulty, &tmp->rdev->flags) - && !test_bit(In_sync, &tmp->rdev->flags)) { + && !test_and_set_bit(In_sync, &tmp->rdev->flags)) { + unsigned long flags; + spin_lock_irqsave(&conf->device_lock, flags); mddev->degraded--; - set_bit(In_sync, &tmp->rdev->flags); + spin_unlock_irqrestore(&conf->device_lock, flags); } } @@ -2034,7 +2038,7 @@ static int raid1_reshape(mddev_t *mddev) mirror_info_t *newmirrors; conf_t *conf = mddev_to_conf(mddev); int cnt, raid_disks; - + unsigned long flags; int d, d2; /* Cannot change chunk_size, layout, or level */ @@ -2093,7 +2097,9 @@ static int raid1_reshape(mddev_t *mddev) kfree(conf->poolinfo); conf->poolinfo = newpoolinfo; + spin_lock_irqsave(&conf->device_lock, flags); mddev->degraded += (raid_disks - conf->raid_disks); + spin_unlock_irqrestore(&conf->device_lock, flags); conf->raid_disks = mddev->raid_disks = raid_disks; mddev->delta_disks = 0; diff -puN drivers/md/raid5.c~md-improve-locking-around-error-handling drivers/md/raid5.c --- a/drivers/md/raid5.c~md-improve-locking-around-error-handling +++ a/drivers/md/raid5.c @@ -636,7 +636,6 @@ static int raid5_end_write_request (stru struct stripe_head *sh = bi->bi_private; raid5_conf_t *conf = sh->raid_conf; int disks = sh->disks, i; - unsigned long flags; int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags); if (bi->bi_size) @@ -654,7 +653,6 @@ static int raid5_end_write_request (stru return 0; } - spin_lock_irqsave(&conf->device_lock, flags); if (!uptodate) md_error(conf->mddev, conf->disks[i].rdev); @@ -662,8 +660,7 @@ static int raid5_end_write_request (stru clear_bit(R5_LOCKED, &sh->dev[i].flags); set_bit(STRIPE_HANDLE, &sh->state); - __release_stripe(conf, sh); - spin_unlock_irqrestore(&conf->device_lock, flags); + release_stripe(sh); return 0; } @@ -697,9 +694,11 @@ static void error(mddev_t *mddev, mdk_rd if (!test_bit(Faulty, &rdev->flags)) { set_bit(MD_CHANGE_DEVS, &mddev->flags); - if (test_bit(In_sync, &rdev->flags)) { + if (test_and_clear_bit(In_sync, &rdev->flags)) { + unsigned long flags; + spin_lock_irqsave(&conf->device_lock, flags); mddev->degraded++; - clear_bit(In_sync, &rdev->flags); + spin_unlock_irqrestore(&conf->device_lock, flags); /* * if recovery was running, make sure it aborts. */ @@ -3419,9 +3418,11 @@ static int raid5_spare_active(mddev_t *m tmp = conf->disks + i; if (tmp->rdev && !test_bit(Faulty, &tmp->rdev->flags) - && !test_bit(In_sync, &tmp->rdev->flags)) { + && !test_and_set_bit(In_sync, &tmp->rdev->flags)) { + unsigned long flags; + spin_lock_irqsave(&conf->device_lock, flags); mddev->degraded--; - set_bit(In_sync, &tmp->rdev->flags); + spin_unlock_irqrestore(&conf->device_lock, flags); } } print_raid5_conf(conf); @@ -3557,6 +3558,7 @@ static int raid5_start_reshape(mddev_t * struct list_head *rtmp; int spares = 0; int added_devices = 0; + unsigned long flags; if (mddev->degraded || test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) @@ -3598,7 +3600,9 @@ static int raid5_start_reshape(mddev_t * break; } + spin_lock_irqsave(&conf->device_lock, flags); mddev->degraded = (conf->raid_disks - conf->previous_raid_disks) - added_devices; + spin_unlock_irqrestore(&conf->device_lock, flags); mddev->raid_disks = conf->raid_disks; mddev->reshape_position = 0; set_bit(MD_CHANGE_DEVS, &mddev->flags); _ Patches currently in -mm which might be from neilb@xxxxxxx are ext3-avoid-triggering-ext3_error-on-bad-nfs-file-handle.patch knfsd-fix-stale-file-handle-problem-with-subtree_checking.patch knfsd-knfsd-add-some-missing-newlines-in-printks.patch knfsd-knfsd-remove-an-unused-variable-from-e_show.patch knfsd-knfsd-remove-an-unused-variable-from-auth_unix_lookup.patch knfsd-add-a-callback-for-when-last-rpc-thread-finishes.patch knfsd-add-a-callback-for-when-last-rpc-thread-finishes-tidy.patch knfsd-be-more-selective-in-which-sockets-lockd-listens-on.patch knfsd-remove-nfsd_versbits-as-intermediate-storage-for-desired-versions.patch knfsd-separate-out-some-parts-of-nfsd_svc-which-start-nfs-servers.patch knfsd-separate-out-some-parts-of-nfsd_svc-which-start-nfs-servers-tweaks.patch knfsd-define-new-nfsdfs-file-portlist-contains-list-of-ports.patch knfsd-define-new-nfsdfs-file-portlist-contains-list-of-ports-tidy.patch knfsd-define-new-nfsdfs-file-portlist-contains-list-of-ports-fix.patch knfsd-allow-sockets-to-be-passed-to-nfsd-via-portlist.patch knfsd-use-seq_start_token-instead-of-hardcoded-magic-void1.patch knfsd-have-ext2-reject-file-handles-with-bad-inode-numbers-early.patch knfsd-have-ext2-reject-file-handles-with-bad-inode-numbers-early-tidy.patch knfsd-make-ext3-reject-filehandles-referring-to-invalid-inode-numbers.patch knfsd-make-ext3-reject-filehandles-referring-to-invalid-inode-numbers-tidy.patch knfsd-drop-serv-option-to-svc_recv-and-svc_process.patch knfsd-drop-serv-option-to-svc_recv-and-svc_process-nfs-callback-fix-nfs-callback-fix.patch knfsd-check-return-value-of-lockd_up-in-write_ports.patch knfsd-move-makesock-failed-warning-into-make_socks.patch knfsd-correctly-handle-error-condition-from-lockd_up.patch knfsd-move-tempsock-aging-to-a-timer.patch knfsd-move-tempsock-aging-to-a-timer-tidy.patch knfsd-convert-sk_inuse-to-atomic_t.patch knfsd-use-new-lock-for-svc_sock-deferred-list.patch knfsd-convert-sk_reserved-to-atomic_t.patch knfsd-test-and-set-sk_busy-atomically.patch knfsd-split-svc_serv-into-pools.patch knfsd-add-svc_get.patch knfsd-add-svc_set_num_threads.patch knfsd-use-svc_set_num_threads-to-manage-threads-in-knfsd.patch knfsd-make-rpc-threads-pools-numa-aware.patch knfsd-allow-admin-to-set-nthreads-per-node.patch md-the-scheduled-removal-of-the-start_array-ioctl-for-md.patch md-fix-a-comment-that-is-wrong-in-raid5h.patch md-factor-out-part-of-raid1d-into-a-separate-function.patch md-factor-out-part-of-raid10d-into-a-separate-function.patch md-replace-magic-numbers-in-sb_dirty-with-well-defined-bit-flags.patch md-remove-the-working_disks-and-failed_disks-from-raid5-state-data.patch md-remove-working_disks-from-raid10-state.patch md-remove-working_disks-from-raid1-state-data.patch md-improve-locking-around-error-handling.patch md-dm-reduce-stack-usage-with-stacked-block-devices.patch - To unsubscribe from this list: send the line "unsubscribe mm-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html