There is very little of the I/O context that is actually needed for issuing a bio. Add the few needed fields to struct btrfs_bio instead. The stripes array is still allocated on demand when more than a single I/O is needed, but for single leg I/O (e.g. all reads) there is no additional memory allocation now. Signed-off-by: Christoph Hellwig <hch@xxxxxx> --- fs/btrfs/volumes.c | 147 ++++++++++++++++++++++++++++----------------- fs/btrfs/volumes.h | 20 ++++-- 2 files changed, 107 insertions(+), 60 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index cc9e2565e4b64..cec3f6b9f5c21 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -253,10 +253,9 @@ static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info); static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev); static void btrfs_dev_stat_print_on_load(struct btrfs_device *device); static int __btrfs_map_block(struct btrfs_fs_info *fs_info, - enum btrfs_map_op op, - u64 logical, u64 *length, - struct btrfs_io_context **bioc_ret, - int mirror_num, int need_raid_map); + enum btrfs_map_op op, u64 logical, u64 *length, + struct btrfs_io_context **bioc_ret, struct btrfs_bio *bbio, + int mirror_num, int need_raid_map); /* * Device locking @@ -5926,7 +5925,6 @@ static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_ sizeof(u64) * (total_stripes), GFP_NOFS|__GFP_NOFAIL); - atomic_set(&bioc->error, 0); refcount_set(&bioc->refs, 1); bioc->fs_info = fs_info; @@ -6128,7 +6126,7 @@ static int get_extra_mirror_from_replace(struct btrfs_fs_info *fs_info, int ret = 0; ret = __btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS, - logical, &length, &bioc, 0, 0); + logical, &length, &bioc, NULL, 0, 0); if (ret) { ASSERT(bioc == NULL); return ret; @@ -6397,10 +6395,9 @@ int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, struct extent_map *em, } static int __btrfs_map_block(struct btrfs_fs_info *fs_info, - enum btrfs_map_op op, - u64 logical, u64 *length, - struct btrfs_io_context **bioc_ret, - int mirror_num, int need_raid_map) + enum btrfs_map_op op, u64 logical, u64 *length, + struct btrfs_io_context **bioc_ret, struct btrfs_bio *bbio, + int mirror_num, int need_raid_map) { struct extent_map *em; struct map_lookup *map; @@ -6566,6 +6563,48 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, tgtdev_indexes = num_stripes; } + if (need_full_stripe(op)) + max_errors = btrfs_chunk_max_errors(map); + + if (bbio && !need_raid_map) { + int replacement_idx = num_stripes; + + if (num_alloc_stripes > 1) { + bbio->stripes = kmalloc_array(num_alloc_stripes, + sizeof(*bbio->stripes), + GFP_NOFS | __GFP_NOFAIL); + } else { + bbio->stripes = &bbio->__stripe; + } + + atomic_set(&bbio->stripes_pending, num_stripes); + for (i = 0; i < num_stripes; i++) { + struct btrfs_bio_stripe *s = &bbio->stripes[i]; + + s->physical = map->stripes[stripe_index].physical + + stripe_offset + stripe_nr * map->stripe_len; + s->dev = map->stripes[stripe_index].dev; + stripe_index++; + + if (op == BTRFS_MAP_WRITE && dev_replace_is_ongoing && + dev_replace->tgtdev && + !is_block_group_to_copy(fs_info, logical) && + s->dev->devid == dev_replace->srcdev->devid) { + struct btrfs_bio_stripe *r = + &bbio->stripes[replacement_idx++]; + + r->physical = s->physical; + r->dev = dev_replace->tgtdev; + max_errors++; + atomic_inc(&bbio->stripes_pending); + } + } + + bbio->max_errors = max_errors; + bbio->mirror_num = mirror_num; + goto out; + } + bioc = alloc_btrfs_io_context(fs_info, num_alloc_stripes, tgtdev_indexes); if (!bioc) { ret = -ENOMEM; @@ -6601,9 +6640,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, sort_parity_stripes(bioc, num_stripes); } - if (need_full_stripe(op)) - max_errors = btrfs_chunk_max_errors(map); - if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL && need_full_stripe(op)) { handle_ops_on_dev_replace(op, &bioc, dev_replace, logical, @@ -6646,7 +6682,7 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, length, bioc_ret); return __btrfs_map_block(fs_info, op, logical, length, bioc_ret, - mirror_num, 0); + NULL, mirror_num, 0); } /* For Scrub/replace */ @@ -6654,14 +6690,15 @@ int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, u64 logical, u64 *length, struct btrfs_io_context **bioc_ret) { - return __btrfs_map_block(fs_info, op, logical, length, bioc_ret, 0, 1); + return __btrfs_map_block(fs_info, op, logical, length, bioc_ret, NULL, + 0, 1); } -static struct btrfs_workqueue *btrfs_end_io_wq(struct btrfs_io_context *bioc) +static struct btrfs_workqueue *btrfs_end_io_wq(struct btrfs_bio *bbio) { - struct btrfs_fs_info *fs_info = bioc->fs_info; + struct btrfs_fs_info *fs_info = btrfs_sb(bbio->inode->i_sb); - switch (btrfs_bio(bioc->orig_bio)->end_io_type) { + switch (bbio->end_io_type) { case BTRFS_ENDIO_WQ_DATA_READ: return fs_info->endio_workers; case BTRFS_ENDIO_WQ_DATA_WRITE: @@ -6682,21 +6719,22 @@ static void btrfs_end_bio_work(struct btrfs_work *work) bio_endio(&bbio->bio); } -static void btrfs_end_bioc(struct btrfs_io_context *bioc, bool async) +static void btrfs_end_bbio(struct btrfs_bio *bbio, bool async) { - struct btrfs_workqueue *wq = async ? btrfs_end_io_wq(bioc) : NULL; - struct bio *bio = bioc->orig_bio; - struct btrfs_bio *bbio = btrfs_bio(bio); + struct btrfs_workqueue *wq = async ? btrfs_end_io_wq(bbio) : NULL; + struct bio *bio = &bbio->bio; - bbio->mirror_num = bioc->mirror_num; - bio->bi_private = bioc->private; - bio->bi_end_io = bioc->end_io; + bio->bi_private = bbio->private; + bio->bi_end_io = bbio->end_io; + + if (bbio->stripes != &bbio->__stripe) + kfree(bbio->stripes); /* * Only send an error to the higher layers if it is beyond the tolerance * threshold. */ - if (atomic_read(&bioc->error) > bioc->max_errors) + if (atomic_read(&bbio->error) > bbio->max_errors) bio->bi_status = BLK_STS_IOERR; else bio->bi_status = BLK_STS_OK; @@ -6707,16 +6745,14 @@ static void btrfs_end_bioc(struct btrfs_io_context *bioc, bool async) } else { bio_endio(bio); } - - btrfs_put_bioc(bioc); } static void btrfs_end_bio(struct bio *bio) { - struct btrfs_io_context *bioc = bio->bi_private; + struct btrfs_bio *bbio = bio->bi_private; if (bio->bi_status) { - atomic_inc(&bioc->error); + atomic_inc(&bbio->error); if (bio->bi_status == BLK_STS_IOERR || bio->bi_status == BLK_STS_TARGET) { struct btrfs_device *dev = btrfs_bio(bio)->device; @@ -6734,40 +6770,39 @@ static void btrfs_end_bio(struct bio *bio) } } - if (bio != bioc->orig_bio) + if (bio != &bbio->bio) bio_put(bio); - btrfs_bio_counter_dec(bioc->fs_info); - if (atomic_dec_and_test(&bioc->stripes_pending)) - btrfs_end_bioc(bioc, true); + btrfs_bio_counter_dec(btrfs_sb(bbio->inode->i_sb)); + if (atomic_dec_and_test(&bbio->stripes_pending)) + btrfs_end_bbio(bbio, true); } -static void submit_stripe_bio(struct btrfs_io_context *bioc, - struct bio *orig_bio, int dev_nr, bool clone) +static void submit_stripe_bio(struct btrfs_bio *bbio, int dev_nr, bool clone) { - struct btrfs_fs_info *fs_info = bioc->fs_info; - struct btrfs_device *dev = bioc->stripes[dev_nr].dev; - u64 physical = bioc->stripes[dev_nr].physical; + struct btrfs_fs_info *fs_info = btrfs_sb(bbio->inode->i_sb); + struct btrfs_device *dev = bbio->stripes[dev_nr].dev; + u64 physical = bbio->stripes[dev_nr].physical; struct bio *bio; if (!dev || !dev->bdev || test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) || - (btrfs_op(orig_bio) == BTRFS_MAP_WRITE && + (btrfs_op(&bbio->bio) == BTRFS_MAP_WRITE && !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) { - atomic_inc(&bioc->error); - if (atomic_dec_and_test(&bioc->stripes_pending)) - btrfs_end_bioc(bioc, false); + atomic_inc(&bbio->error); + if (atomic_dec_and_test(&bbio->stripes_pending)) + btrfs_end_bbio(bbio, false); return; } if (clone) { - bio = btrfs_bio_clone(dev->bdev, orig_bio); + bio = btrfs_bio_clone(dev->bdev, &bbio->bio); } else { - bio = orig_bio; + bio = &bbio->bio; bio_set_dev(bio, dev->bdev); } - bio->bi_private = bioc; + bio->bi_private = bbio; btrfs_bio(bio)->device = dev; bio->bi_end_io = btrfs_end_bio; bio->bi_iter.bi_sector = physical >> 9; @@ -6800,6 +6835,7 @@ static void submit_stripe_bio(struct btrfs_io_context *bioc, blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, int mirror_num) { + struct btrfs_bio *bbio = btrfs_bio(bio); u64 logical = bio->bi_iter.bi_sector << 9; u64 length = bio->bi_iter.bi_size; u64 map_length = length; @@ -6809,18 +6845,17 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, struct btrfs_io_context *bioc = NULL; btrfs_bio_counter_inc_blocked(fs_info); - ret = __btrfs_map_block(fs_info, btrfs_op(bio), logical, - &map_length, &bioc, mirror_num, 1); + ret = __btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length, + &bioc, bbio, mirror_num, 1); if (ret) goto out_dec; - total_devs = bioc->num_stripes; - bioc->orig_bio = bio; - bioc->private = bio->bi_private; - bioc->end_io = bio->bi_end_io; - atomic_set(&bioc->stripes_pending, bioc->num_stripes); + bbio->private = bio->bi_private; + bbio->end_io = bio->bi_end_io; + + if (bioc) { + ASSERT(bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK); - if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) { /* * In this case, map_length has been set to the length of a * single stripe; not the whole write. @@ -6834,6 +6869,7 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, mirror_num, 1); goto out_dec; } + ASSERT(0); } if (map_length < length) { @@ -6843,8 +6879,9 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, BUG(); } + total_devs = atomic_read(&bbio->stripes_pending); for (dev_nr = 0; dev_nr < total_devs; dev_nr++) - submit_stripe_bio(bioc, bio, dev_nr, dev_nr < total_devs - 1); + submit_stripe_bio(bbio, dev_nr, dev_nr < total_devs - 1); out_dec: btrfs_bio_counter_dec(fs_info); return errno_to_blk_status(ret); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 51a27180004eb..cd71cd33a9df2 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -323,6 +323,11 @@ enum btrfs_endio_type { BTRFS_ENDIO_WQ_FREE_SPACE_READ, }; +struct btrfs_bio_stripe { + struct btrfs_device *dev; + u64 physical; +}; + /* * Additional info to pass along bio. * @@ -333,6 +338,16 @@ struct btrfs_bio { unsigned int mirror_num; + atomic_t stripes_pending; + atomic_t error; + int max_errors; + + struct btrfs_bio_stripe *stripes; + struct btrfs_bio_stripe __stripe; + + bio_end_io_t *end_io; + void *private; + enum btrfs_endio_type end_io_type; struct btrfs_work work; @@ -389,13 +404,8 @@ struct btrfs_io_stripe { */ struct btrfs_io_context { refcount_t refs; - atomic_t stripes_pending; struct btrfs_fs_info *fs_info; u64 map_type; /* get from map_lookup->type */ - bio_end_io_t *end_io; - struct bio *orig_bio; - void *private; - atomic_t error; int max_errors; int num_stripes; int mirror_num; -- 2.30.2