NeilBrown <neilb@xxxxxxx> writes: > sh->lock is now mainly used to ensure that two threads aren't running > in the locked part of handle_stripe[56] at the same time. > > That can more neatly be achieved with an 'active' flag which we set > while running handle_stripe. If we find the flag is set, we simply > requeue the stripe for later by setting STRIPE_HANDLE. > > For safety we take ->device_lock while examining the state of the > stripe and creating a summary in 'stripe_head_state / r6_state'. > This possibly isn't needed but as shared fields like ->toread, > ->towrite are checked it is safer for now at least. > > We leave the label after the old 'unlock' called "unlock" because it > will disappear in a few patches, so renaming seems pointless. > > This leaves the stripe 'locked' for longer as we clear STRIPE_ACTIVE > later, but that is not a problem. > > Signed-off-by: NeilBrown <neilb@xxxxxxx> Reviewed-by: Namhyung Kim <namhyung@xxxxxxxxx> But I have a question, please see below. > --- > > drivers/md/raid5.c | 26 +++++++++++++------------- > drivers/md/raid5.h | 2 +- > 2 files changed, 14 insertions(+), 14 deletions(-) > > diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c > index 9985138..f8275b5 100644 > --- a/drivers/md/raid5.c > +++ b/drivers/md/raid5.c > @@ -1020,14 +1020,12 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) > if (test_and_clear_bit(R5_Wantdrain, &dev->flags)) { > struct bio *wbi; > > - spin_lock(&sh->lock); > spin_lock_irq(&sh->raid_conf->device_lock); > chosen = dev->towrite; > dev->towrite = NULL; > BUG_ON(dev->written); > wbi = dev->written = chosen; > spin_unlock_irq(&sh->raid_conf->device_lock); > - spin_unlock(&sh->lock); > > while (wbi && wbi->bi_sector < > dev->sector + STRIPE_SECTORS) { > @@ -1322,7 +1320,6 @@ static int grow_one_stripe(raid5_conf_t *conf) > return 0; > > sh->raid_conf = conf; > - spin_lock_init(&sh->lock); > #ifdef CONFIG_MULTICORE_RAID456 > init_waitqueue_head(&sh->ops.wait_for_ops); > #endif > @@ -1442,7 +1439,6 @@ static int resize_stripes(raid5_conf_t *conf, int newsize) > break; > > nsh->raid_conf = conf; > - spin_lock_init(&nsh->lock); > #ifdef CONFIG_MULTICORE_RAID456 > init_waitqueue_head(&nsh->ops.wait_for_ops); > #endif > @@ -2148,7 +2144,6 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in > (unsigned long long)sh->sector); > > > - spin_lock(&sh->lock); > spin_lock_irq(&conf->device_lock); > if (forwrite) { > bip = &sh->dev[dd_idx].towrite; > @@ -2184,7 +2179,6 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in > set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags); > } > spin_unlock_irq(&conf->device_lock); > - spin_unlock(&sh->lock); > > pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n", > (unsigned long long)(*bip)->bi_sector, > @@ -2201,7 +2195,6 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in > overlap: > set_bit(R5_Overlap, &sh->dev[dd_idx].flags); > spin_unlock_irq(&conf->device_lock); > - spin_unlock(&sh->lock); > return 0; > } > > @@ -3023,12 +3016,10 @@ static void handle_stripe5(struct stripe_head *sh) > atomic_read(&sh->count), sh->pd_idx, sh->check_state, > sh->reconstruct_state); > > - spin_lock(&sh->lock); > if (test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) { > set_bit(STRIPE_SYNCING, &sh->state); > clear_bit(STRIPE_INSYNC, &sh->state); > } > - clear_bit(STRIPE_HANDLE, &sh->state); > clear_bit(STRIPE_DELAYED, &sh->state); > > s.syncing = test_bit(STRIPE_SYNCING, &sh->state); > @@ -3037,6 +3028,7 @@ static void handle_stripe5(struct stripe_head *sh) > > /* Now to look around and see what can be done */ > rcu_read_lock(); > + spin_lock_irq(&conf->device_lock); Do we still need rcu_read_lock()? AFAIK rcu_read_lock() only prevents task from preemption but spin_lock does same thing as well. I know it's been already there under sh->lock before this patch, and it doesn't hurt anything, but I'm not sure it is really needed. > for (i=disks; i--; ) { > mdk_rdev_t *rdev; > > @@ -3099,6 +3091,7 @@ static void handle_stripe5(struct stripe_head *sh) > s.failed_num = i; > } > } > + spin_unlock_irq(&conf->device_lock); > rcu_read_unlock(); > > if (unlikely(blocked_rdev)) { > @@ -3275,7 +3268,6 @@ static void handle_stripe5(struct stripe_head *sh) > handle_stripe_expansion(conf, sh, NULL); > > unlock: > - spin_unlock(&sh->lock); > > /* wait for this device to become unblocked */ > if (unlikely(blocked_rdev)) > @@ -3318,12 +3310,10 @@ static void handle_stripe6(struct stripe_head *sh) > sh->check_state, sh->reconstruct_state); > memset(&s, 0, sizeof(s)); > > - spin_lock(&sh->lock); > if (test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) { > set_bit(STRIPE_SYNCING, &sh->state); > clear_bit(STRIPE_INSYNC, &sh->state); > } > - clear_bit(STRIPE_HANDLE, &sh->state); > clear_bit(STRIPE_DELAYED, &sh->state); > > s.syncing = test_bit(STRIPE_SYNCING, &sh->state); > @@ -3332,6 +3322,7 @@ static void handle_stripe6(struct stripe_head *sh) > /* Now to look around and see what can be done */ > > rcu_read_lock(); > + spin_lock_irq(&conf->device_lock); Same here. > for (i=disks; i--; ) { > mdk_rdev_t *rdev; > dev = &sh->dev[i]; > @@ -3395,6 +3386,7 @@ static void handle_stripe6(struct stripe_head *sh) > s.failed++; > } > } > + spin_unlock_irq(&conf->device_lock); > rcu_read_unlock(); > > if (unlikely(blocked_rdev)) { > @@ -3580,7 +3572,6 @@ static void handle_stripe6(struct stripe_head *sh) > handle_stripe_expansion(conf, sh, &r6s); > > unlock: > - spin_unlock(&sh->lock); > > /* wait for this device to become unblocked */ > if (unlikely(blocked_rdev)) > @@ -3608,10 +3599,19 @@ static void handle_stripe6(struct stripe_head *sh) > > static void handle_stripe(struct stripe_head *sh) > { > + clear_bit(STRIPE_HANDLE, &sh->state); > + if (test_and_set_bit(STRIPE_ACTIVE, &sh->state)) { > + /* already being handled, ensure it gets handled > + * again when current action finishes */ > + set_bit(STRIPE_HANDLE, &sh->state); > + return; > + } > + > if (sh->raid_conf->level == 6) > handle_stripe6(sh); > else > handle_stripe5(sh); > + clear_bit(STRIPE_ACTIVE, &sh->state); > } > > static void raid5_activate_delayed(raid5_conf_t *conf) > diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h > index a330011..217a9d4 100644 > --- a/drivers/md/raid5.h > +++ b/drivers/md/raid5.h > @@ -209,7 +209,6 @@ struct stripe_head { > short ddf_layout;/* use DDF ordering to calculate Q */ > unsigned long state; /* state flags */ > atomic_t count; /* nr of active thread/requests */ > - spinlock_t lock; > int bm_seq; /* sequence number for bitmap flushes */ > int disks; /* disks in stripe */ > enum check_states check_state; > @@ -290,6 +289,7 @@ struct r6_state { > * Stripe state > */ > enum { > + STRIPE_ACTIVE, > STRIPE_HANDLE, > STRIPE_SYNC_REQUESTED, > STRIPE_SYNCING, > > > -- > To unsubscribe from this list: send the line "unsubscribe linux-raid" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-raid" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html