The patch titled md: Don't write dirty/clean update to spares - leave them alone has been added to the -mm tree. Its filename is md-dont-write-dirty-clean-update-to-spares-leave-them-alone.patch See http://www.zip.com.au/~akpm/linux/patches/stuff/added-to-mm.txt to find out what to do about this ------------------------------------------------------ Subject: md: Don't write dirty/clean update to spares - leave them alone From: NeilBrown <neilb@xxxxxxx> - record the 'event' count on each individual device (they might sometimes be slightly different now) - add a new value for 'sb_dirty': '3' means that the super block only needs to be updated to record a clean<->dirty transition. - Prefer odd event numbers for dirty states and even numbers for clean states - Using all the above, don't update the superblock on a spare device if the update is just doing a clean-dirty transition. To accomodate this, a transition from dirty back to clean might now decrement the events counter if nothing else has changed. The net effect of this is that spare drives will not see any IO requests during normal running of the array, so they can go to sleep if that is what they want to do. Signed-off-by: Neil Brown <neilb@xxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxx> --- drivers/md/md.c | 65 +++++++++++++++++++++++++++++++----- include/linux/raid/md_k.h | 1 2 files changed, 58 insertions(+), 8 deletions(-) diff -puN drivers/md/md.c~md-dont-write-dirty-clean-update-to-spares-leave-them-alone drivers/md/md.c --- devel/drivers/md/md.c~md-dont-write-dirty-clean-update-to-spares-leave-them-alone 2006-05-31 22:28:15.000000000 -0700 +++ devel-akpm/drivers/md/md.c 2006-05-31 22:28:15.000000000 -0700 @@ -1558,15 +1558,30 @@ static void md_print_devices(void) } -static void sync_sbs(mddev_t * mddev) +static void sync_sbs(mddev_t * mddev, int nospares) { + /* Update each superblock (in-memory image), but + * if we are allowed to, skip spares which already + * have the right event counter, or have one earlier + * (which would mean they aren't being marked as dirty + * with the rest of the array) + */ mdk_rdev_t *rdev; struct list_head *tmp; ITERATE_RDEV(mddev,rdev,tmp) { - super_types[mddev->major_version]. - sync_super(mddev, rdev); - rdev->sb_loaded = 1; + if (rdev->sb_events == mddev->events || + (nospares && + rdev->raid_disk < 0 && + (rdev->sb_events&1)==0 && + rdev->sb_events+1 == mddev->events)) { + /* Don't update this superblock */ + rdev->sb_loaded = 2; + } else { + super_types[mddev->major_version]. + sync_super(mddev, rdev); + rdev->sb_loaded = 1; + } } } @@ -1576,12 +1591,42 @@ void md_update_sb(mddev_t * mddev) struct list_head *tmp; mdk_rdev_t *rdev; int sync_req; + int nospares = 0; repeat: spin_lock_irq(&mddev->write_lock); sync_req = mddev->in_sync; mddev->utime = get_seconds(); - mddev->events ++; + if (mddev->sb_dirty == 3) + /* just a clean<-> dirty transition, possibly leave spares alone, + * though if events isn't the right even/odd, we will have to do + * spares after all + */ + nospares = 1; + + /* If this is just a dirty<->clean transition, and the array is clean + * and 'events' is odd, we can roll back to the previous clean state */ + if (mddev->sb_dirty == 3 + && (mddev->in_sync && mddev->recovery_cp == MaxSector) + && (mddev->events & 1)) + mddev->events--; + else { + /* otherwise we have to go forward and ... */ + mddev->events ++; + if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */ + /* .. if the array isn't clean, insist on an odd 'events' */ + if ((mddev->events&1)==0) { + mddev->events++; + nospares = 0; + } + } else { + /* otherwise insist on an even 'events' (for clean states) */ + if ((mddev->events&1)) { + mddev->events++; + nospares = 0; + } + } + } if (!mddev->events) { /* @@ -1593,7 +1638,7 @@ repeat: mddev->events --; } mddev->sb_dirty = 2; - sync_sbs(mddev); + sync_sbs(mddev, nospares); /* * do not write anything to disk if using @@ -1615,6 +1660,8 @@ repeat: ITERATE_RDEV(mddev,rdev,tmp) { char b[BDEVNAME_SIZE]; dprintk(KERN_INFO "md: "); + if (rdev->sb_loaded != 1) + continue; /* no noise on spare devices */ if (test_bit(Faulty, &rdev->flags)) dprintk("(skipping faulty "); @@ -1626,6 +1673,7 @@ repeat: dprintk(KERN_INFO "(write) %s's sb offset: %llu\n", bdevname(rdev->bdev,b), (unsigned long long)rdev->sb_offset); + rdev->sb_events = mddev->events; } else dprintk(")\n"); @@ -1895,6 +1943,7 @@ static mdk_rdev_t *md_import_device(dev_ rdev->desc_nr = -1; rdev->flags = 0; rdev->data_offset = 0; + rdev->sb_events = 0; atomic_set(&rdev->nr_pending, 0); atomic_set(&rdev->read_errors, 0); atomic_set(&rdev->corrected_errors, 0); @@ -4708,7 +4757,7 @@ void md_write_start(mddev_t *mddev, stru spin_lock_irq(&mddev->write_lock); if (mddev->in_sync) { mddev->in_sync = 0; - mddev->sb_dirty = 1; + mddev->sb_dirty = 3; md_wakeup_thread(mddev->thread); } spin_unlock_irq(&mddev->write_lock); @@ -5055,7 +5104,7 @@ void md_check_recovery(mddev_t *mddev) if (mddev->safemode && !atomic_read(&mddev->writes_pending) && !mddev->in_sync && mddev->recovery_cp == MaxSector) { mddev->in_sync = 1; - mddev->sb_dirty = 1; + mddev->sb_dirty = 3; } if (mddev->safemode == 1) mddev->safemode = 0; diff -puN include/linux/raid/md_k.h~md-dont-write-dirty-clean-update-to-spares-leave-them-alone include/linux/raid/md_k.h --- devel/include/linux/raid/md_k.h~md-dont-write-dirty-clean-update-to-spares-leave-them-alone 2006-05-31 22:28:15.000000000 -0700 +++ devel-akpm/include/linux/raid/md_k.h 2006-05-31 22:28:15.000000000 -0700 @@ -58,6 +58,7 @@ struct mdk_rdev_s struct page *sb_page; int sb_loaded; + __u64 sb_events; sector_t data_offset; /* start of data in array */ sector_t sb_offset; int sb_size; /* bytes in the superblock */ _ Patches currently in -mm which might be from neilb@xxxxxxx are origin.patch fix-dcache-race-during-umount.patch prune_one_dentry-tweaks.patch remove-softlockup-from-invalidate_mapping_pages.patch prepare-for-__copy_from_user_inatomic-to-not-zero-missed-bytes.patch make-copy_from_user_inatomic-not-zero-the-tail-on-i386.patch kconfig-select-things-at-the-closest-tristate-instead-of-bool.patch dm-mirror-sector-offset-fix.patch md-reformat-code-in-raid1_end_write_request-to-avoid-goto.patch md-remove-arbitrary-limit-on-chunk-size.patch md-remove-useless-ioctl-warning.patch md-increase-the-delay-before-marking-metadata-clean-and-make-it-configurable.patch md-merge-raid5-and-raid6-code.patch md-remove-nuisance-message-at-shutdown.patch md-allow-checkpoint-of-recovery-with-version-1-superblock.patch md-allow-checkpoint-of-recovery-with-version-1-superblock-fix.patch md-allow-a-linear-array-to-have-drives-added-while-active.patch md-support-stripe-offset-mode-in-raid10.patch md-make-md_print_devices-static.patch md-split-reshape-portion-of-raid5-sync_request-into-a-separate-function.patch md-bitmap-fix-online-removal-of-file-backed-bitmaps.patch md-bitmap-remove-bitmap-writeback-daemon.patch md-bitmap-cleaner-separation-of-page-attribute-handlers-in-md-bitmap.patch md-bitmap-use-set_bit-etc-for-bitmap-page-attributes.patch md-bitmap-remove-unnecessary-page-reference-manipulations-from-md-bitmap-code.patch md-bitmap-remove-dead-code-from-md-bitmap.patch md-bitmap-tidy-up-i_writecount-handling-in-md-bitmap.patch md-bitmap-change-md-bitmap-file-handling-to-use-bmap-to-file-blocks.patch md-change-md-bitmap-file-handling-to-use-bmap-to-file-blocks-fix.patch md-calculate-correct-array-size-for-raid10-in-new-offset-mode.patch md-md-kconfig-speeling-feex.patch md-fix-kconfig-error.patch md-fix-bug-that-stops-raid5-resync-from-happening.patch md-allow-re-add-to-work-on-array-without-bitmaps.patch md-dont-write-dirty-clean-update-to-spares-leave-them-alone.patch md-set-get-state-of-array-via-sysfs.patch md-allow-rdev-state-to-be-set-via-sysfs.patch md-allow-raid-layout-to-be-read-and-set-via-sysfs.patch md-allow-resync_start-to-be-set-and-queried-via-sysfs.patch md-allow-the-write_mostly-flag-to-be-set-via-sysfs.patch md-dm-reduce-stack-usage-with-stacked-block-devices.patch - To unsubscribe from this list: send the line "unsubscribe mm-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html