ops_complete_biofill tried to avoid calling handle_stripe since all the state necessary to return read completions is available. However the process of determining whether more read requests are pending requires locking the stripe (to block add_stripe_bio from updating dev->toead). ops_complete_biofill can run in tasklet context, so rather than upgrading all the stripe locks from spin_lock to spin_lock_bh this patch just moves read completion handling back into handle_stripe. Found-by: Yuri Tikhonov <yur@xxxxxxxxxxx> Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx> --- drivers/md/raid5.c | 90 +++++++++++++++++++++++++++------------------------- 1 files changed, 46 insertions(+), 44 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 4d63773..38c8893 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -512,54 +512,12 @@ async_copy_data(int frombio, struct bio *bio, struct page *page, static void ops_complete_biofill(void *stripe_head_ref) { struct stripe_head *sh = stripe_head_ref; - struct bio *return_bi = NULL; - raid5_conf_t *conf = sh->raid_conf; - int i, more_to_read = 0; pr_debug("%s: stripe %llu\n", __FUNCTION__, (unsigned long long)sh->sector); - /* clear completed biofills */ - for (i = sh->disks; i--; ) { - struct r5dev *dev = &sh->dev[i]; - /* check if this stripe has new incoming reads */ - if (dev->toread) - more_to_read++; - - /* acknowledge completion of a biofill operation */ - /* and check if we need to reply to a read request - */ - if (test_bit(R5_Wantfill, &dev->flags) && !dev->toread) { - struct bio *rbi, *rbi2; - clear_bit(R5_Wantfill, &dev->flags); - - /* The access to dev->read is outside of the - * spin_lock_irq(&conf->device_lock), but is protected - * by the STRIPE_OP_BIOFILL pending bit - */ - BUG_ON(!dev->read); - rbi = dev->read; - dev->read = NULL; - while (rbi && rbi->bi_sector < - dev->sector + STRIPE_SECTORS) { - rbi2 = r5_next_bio(rbi, dev->sector); - spin_lock_irq(&conf->device_lock); - if (--rbi->bi_phys_segments == 0) { - rbi->bi_next = return_bi; - return_bi = rbi; - } - spin_unlock_irq(&conf->device_lock); - rbi = rbi2; - } - } - } - clear_bit(STRIPE_OP_BIOFILL, &sh->ops.ack); - clear_bit(STRIPE_OP_BIOFILL, &sh->ops.pending); - - return_io(return_bi); - - if (more_to_read) - set_bit(STRIPE_HANDLE, &sh->state); + set_bit(STRIPE_OP_BIOFILL, &sh->ops.complete); + set_bit(STRIPE_HANDLE, &sh->state); release_stripe(sh); } @@ -2112,6 +2070,42 @@ static void handle_issuing_new_read_requests6(struct stripe_head *sh, } +/* handle_completed_read_requests - return completion for reads and allow + * new read operations to be submitted to the stripe. + */ +static void handle_completed_read_requests(raid5_conf_t *conf, + struct stripe_head *sh, + struct bio **return_bi) +{ + int i; + + pr_debug("%s: stripe %llu\n", __FUNCTION__, + (unsigned long long)sh->sector); + + /* check if we need to reply to a read request */ + for (i = sh->disks; i--; ) { + struct r5dev *dev = &sh->dev[i]; + + if (test_and_clear_bit(R5_Wantfill, &dev->flags)) { + struct bio *rbi, *rbi2; + + rbi = dev->read; + dev->read = NULL; + while (rbi && rbi->bi_sector < + dev->sector + STRIPE_SECTORS) { + rbi2 = r5_next_bio(rbi, dev->sector); + spin_lock_irq(&conf->device_lock); + if (--rbi->bi_phys_segments == 0) { + rbi->bi_next = *return_bi; + *return_bi = rbi; + } + spin_unlock_irq(&conf->device_lock); + rbi = rbi2; + } + } + } +} + /* handle_completed_write_requests * any written block on an uptodate or failed drive can be returned. * Note that if we 'wrote' to a failed drive, it will be UPTODATE, but @@ -2633,6 +2627,14 @@ static void handle_stripe5(struct stripe_head *sh) s.expanded = test_bit(STRIPE_EXPAND_READY, &sh->state); /* Now to look around and see what can be done */ + if (test_bit(STRIPE_OP_BIOFILL, &sh->ops.complete)) { + clear_bit(STRIPE_OP_BIOFILL, &sh->ops.ack); + clear_bit(STRIPE_OP_BIOFILL, &sh->ops.pending); + clear_bit(STRIPE_OP_BIOFILL, &sh->ops.complete); + + handle_completed_read_requests(conf, sh, &return_bi); + } + rcu_read_lock(); for (i=disks; i--; ) { mdk_rdev_t *rdev; - To unsubscribe from this list: send the line "unsubscribe linux-raid" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html