On Mon, Jun 19, 2023 at 8:49 PM Yu Kuai <yukuai1@xxxxxxxxxxxxxxx> wrote: > > From: Yu Kuai <yukuai3@xxxxxxxxxx> > > Currently, 'active_io' is grabed before make_reqeust() is called, and > it's dropped immediately make_reqeust() returns. Hence 'active_io' > actually means io is dispatching, not io is inflight. Hi Kuai There are three typo errors in the commit message: s/grabed/grabbed/g > > For raid0 and raid456 that io accounting is enabled, 'active_io' will > also be grabed when bio is cloned for io accounting, and this 'active_io' > is dropped until io is done. > > Always clone new bio so that 'active_io' will mean that io is inflight, > raid1 and raid10 will switch to use this method in later patches. Once > these are implemented, it can be cleaned up that 'active_io' is grabed > twice for one io. > > Now that bio will be cloned even if io accounting is disabled, also > rename related structure from '*_acct_*' to '*_clone_*'. > > Signed-off-by: Yu Kuai <yukuai3@xxxxxxxxxx> > --- > drivers/md/md.c | 61 ++++++++++++++++++++++++---------------------- > drivers/md/md.h | 4 +-- > drivers/md/raid5.c | 18 +++++++------- > 3 files changed, 43 insertions(+), 40 deletions(-) > > diff --git a/drivers/md/md.c b/drivers/md/md.c > index 42347289195a..5ad8e7f3aebd 100644 > --- a/drivers/md/md.c > +++ b/drivers/md/md.c > @@ -2314,7 +2314,7 @@ int md_integrity_register(struct mddev *mddev) > pr_debug("md: data integrity enabled on %s\n", mdname(mddev)); > if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE) || > (mddev->level != 1 && mddev->level != 10 && > - bioset_integrity_create(&mddev->io_acct_set, BIO_POOL_SIZE))) { > + bioset_integrity_create(&mddev->io_clone_set, BIO_POOL_SIZE))) { > /* > * No need to handle the failure of bioset_integrity_create, > * because the function is called by md_run() -> pers->run(), > @@ -5886,9 +5886,9 @@ int md_run(struct mddev *mddev) > goto exit_bio_set; > } > > - if (!bioset_initialized(&mddev->io_acct_set)) { > - err = bioset_init(&mddev->io_acct_set, BIO_POOL_SIZE, > - offsetof(struct md_io_acct, bio_clone), 0); > + if (!bioset_initialized(&mddev->io_clone_set)) { > + err = bioset_init(&mddev->io_clone_set, BIO_POOL_SIZE, > + offsetof(struct md_io_clone, bio_clone), 0); > if (err) > goto exit_sync_set; > } > @@ -6070,7 +6070,7 @@ int md_run(struct mddev *mddev) > module_put(pers->owner); > md_bitmap_destroy(mddev); > abort: > - bioset_exit(&mddev->io_acct_set); > + bioset_exit(&mddev->io_clone_set); > exit_sync_set: > bioset_exit(&mddev->sync_set); > exit_bio_set: > @@ -6295,7 +6295,7 @@ static void __md_stop(struct mddev *mddev) > percpu_ref_exit(&mddev->active_io); > bioset_exit(&mddev->bio_set); > bioset_exit(&mddev->sync_set); > - bioset_exit(&mddev->io_acct_set); > + bioset_exit(&mddev->io_clone_set); > } > > void md_stop(struct mddev *mddev) > @@ -8661,45 +8661,48 @@ void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev, > } > EXPORT_SYMBOL_GPL(md_submit_discard_bio); > > -static void md_end_io_acct(struct bio *bio) > +static void md_end_clone_io(struct bio *bio) > { > - struct md_io_acct *md_io_acct = bio->bi_private; > - struct bio *orig_bio = md_io_acct->orig_bio; > - struct mddev *mddev = md_io_acct->mddev; > + struct md_io_clone *md_io_clone = bio->bi_private; > + struct bio *orig_bio = md_io_clone->orig_bio; > + struct mddev *mddev = md_io_clone->mddev; > > orig_bio->bi_status = bio->bi_status; > > - bio_end_io_acct(orig_bio, md_io_acct->start_time); > + if (md_io_clone->start_time) > + bio_end_io_acct(orig_bio, md_io_clone->start_time); > + > bio_put(bio); > bio_endio(orig_bio); > - > percpu_ref_put(&mddev->active_io); > } > > +static void md_clone_bio(struct mddev *mddev, struct bio **bio) > +{ > + struct block_device *bdev = (*bio)->bi_bdev; > + struct md_io_clone *md_io_clone; > + struct bio *clone = > + bio_alloc_clone(bdev, *bio, GFP_NOIO, &mddev->io_clone_set); > + > + md_io_clone = container_of(clone, struct md_io_clone, bio_clone); > + md_io_clone->orig_bio = *bio; > + md_io_clone->mddev = mddev; > + if (blk_queue_io_stat(bdev->bd_disk->queue)) > + md_io_clone->start_time = bio_start_io_acct(*bio); > + > + clone->bi_end_io = md_end_clone_io; > + clone->bi_private = md_io_clone; > + *bio = clone; > +} > + > /* > * Used by personalities that don't already clone the bio and thus can't > * easily add the timestamp to their extended bio structure. > */ > void md_account_bio(struct mddev *mddev, struct bio **bio) > { > - struct block_device *bdev = (*bio)->bi_bdev; > - struct md_io_acct *md_io_acct; > - struct bio *clone; > - > - if (!blk_queue_io_stat(bdev->bd_disk->queue)) > - return; > - > percpu_ref_get(&mddev->active_io); > - > - clone = bio_alloc_clone(bdev, *bio, GFP_NOIO, &mddev->io_acct_set); > - md_io_acct = container_of(clone, struct md_io_acct, bio_clone); > - md_io_acct->orig_bio = *bio; > - md_io_acct->start_time = bio_start_io_acct(*bio); > - md_io_acct->mddev = mddev; > - > - clone->bi_end_io = md_end_io_acct; > - clone->bi_private = md_io_acct; > - *bio = clone; > + md_clone_bio(mddev, bio); > } > EXPORT_SYMBOL_GPL(md_account_bio); > > diff --git a/drivers/md/md.h b/drivers/md/md.h > index 11299d94b239..892a598a5029 100644 > --- a/drivers/md/md.h > +++ b/drivers/md/md.h > @@ -510,7 +510,7 @@ struct mddev { > struct bio_set sync_set; /* for sync operations like > * metadata and bitmap writes > */ > - struct bio_set io_acct_set; /* for raid0 and raid5 io accounting */ > + struct bio_set io_clone_set; > > /* Generic flush handling. > * The last to finish preflush schedules a worker to submit > @@ -738,7 +738,7 @@ struct md_thread { > void *private; > }; > > -struct md_io_acct { > +struct md_io_clone { > struct mddev *mddev; > struct bio *orig_bio; > unsigned long start_time; > diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c > index 29cf5455d7a5..cef0b400b2ee 100644 > --- a/drivers/md/raid5.c > +++ b/drivers/md/raid5.c > @@ -5468,13 +5468,13 @@ static struct bio *remove_bio_from_retry(struct r5conf *conf, > */ > static void raid5_align_endio(struct bio *bi) > { > - struct md_io_acct *md_io_acct = bi->bi_private; > - struct bio *raid_bi = md_io_acct->orig_bio; > + struct md_io_clone *md_io_clone = bi->bi_private; > + struct bio *raid_bi = md_io_clone->orig_bio; > struct mddev *mddev; > struct r5conf *conf; > struct md_rdev *rdev; > blk_status_t error = bi->bi_status; > - unsigned long start_time = md_io_acct->start_time; > + unsigned long start_time = md_io_clone->start_time; > > bio_put(bi); > > @@ -5506,7 +5506,7 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio) > struct md_rdev *rdev; > sector_t sector, end_sector, first_bad; > int bad_sectors, dd_idx; > - struct md_io_acct *md_io_acct; > + struct md_io_clone *md_io_clone; > bool did_inc; > > if (!in_chunk_boundary(mddev, raid_bio)) { > @@ -5544,15 +5544,15 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio) > } > > align_bio = bio_alloc_clone(rdev->bdev, raid_bio, GFP_NOIO, > - &mddev->io_acct_set); > - md_io_acct = container_of(align_bio, struct md_io_acct, bio_clone); > + &mddev->io_clone_set); > + md_io_clone = container_of(align_bio, struct md_io_clone, bio_clone); > raid_bio->bi_next = (void *)rdev; > if (blk_queue_io_stat(raid_bio->bi_bdev->bd_disk->queue)) > - md_io_acct->start_time = bio_start_io_acct(raid_bio); > - md_io_acct->orig_bio = raid_bio; > + md_io_clone->start_time = bio_start_io_acct(raid_bio); > + md_io_clone->orig_bio = raid_bio; > > align_bio->bi_end_io = raid5_align_endio; > - align_bio->bi_private = md_io_acct; > + align_bio->bi_private = md_io_clone; > align_bio->bi_iter.bi_sector = sector; > > /* No reshape active, so we can trust rdev->data_offset */ > -- > 2.39.2 > Reviewed-by: Xiao Ni <xni@xxxxxxxxxx>