On Tue, Feb 28, 2017 at 11:41:41PM +0800, Ming Lei wrote: > Now we allocate one page array for managing resync pages, instead > of using bio's vec table to do that, and the old way is very hacky > and won't work any more if multipage bvec is enabled. > > The introduced cost is that we need to allocate (128 + 16) * copies > bytes per r10_bio, and it is fine because the inflight r10_bio for > resync shouldn't be much, as pointed by Shaohua. > > Also bio_reset() in raid10_sync_request() and reshape_request() > are removed because all bios are freshly new now in these functions > and not necessary to reset any more. > > This patch can be thought as cleanup too. > > Suggested-by: Shaohua Li <shli@xxxxxxxxxx> > Signed-off-by: Ming Lei <tom.leiming@xxxxxxxxx> > --- > drivers/md/raid10.c | 125 ++++++++++++++++++++++++++++++---------------------- > 1 file changed, 73 insertions(+), 52 deletions(-) > > diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c > index a9ddd4f14008..f887b21332e7 100644 > --- a/drivers/md/raid10.c > +++ b/drivers/md/raid10.c > @@ -110,6 +110,16 @@ static void end_reshape(struct r10conf *conf); > #define raid10_log(md, fmt, args...) \ > do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid10 " fmt, ##args); } while (0) > > +static inline struct resync_pages *get_resync_pages(struct bio *bio) > +{ > + return bio->bi_private; > +} > + > +static inline struct r10bio *get_resync_r10bio(struct bio *bio) > +{ > + return get_resync_pages(bio)->raid_bio; > +} Same applies to raid10 too. embedded the resync_pages into r10bio, possibly a pointer. Merge the 11, 12, 13 into a single patch. Thanks, Shaohua > + > static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data) > { > struct r10conf *conf = data; > @@ -140,11 +150,11 @@ static void r10bio_pool_free(void *r10_bio, void *data) > static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data) > { > struct r10conf *conf = data; > - struct page *page; > struct r10bio *r10_bio; > struct bio *bio; > - int i, j; > - int nalloc; > + int j; > + int nalloc, nalloc_rp; > + struct resync_pages *rps; > > r10_bio = r10bio_pool_alloc(gfp_flags, conf); > if (!r10_bio) > @@ -156,6 +166,15 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data) > else > nalloc = 2; /* recovery */ > > + /* allocate once for all bios */ > + if (!conf->have_replacement) > + nalloc_rp = nalloc; > + else > + nalloc_rp = nalloc * 2; > + rps = kmalloc(sizeof(struct resync_pages) * nalloc_rp, gfp_flags); > + if (!rps) > + goto out_free_r10bio; > + > /* > * Allocate bios. > */ > @@ -175,36 +194,40 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data) > * Allocate RESYNC_PAGES data pages and attach them > * where needed. > */ > - for (j = 0 ; j < nalloc; j++) { > + for (j = 0; j < nalloc; j++) { > struct bio *rbio = r10_bio->devs[j].repl_bio; > + struct resync_pages *rp, *rp_repl; > + > + rp = &rps[j]; > + if (rbio) > + rp_repl = &rps[nalloc + j]; > + > bio = r10_bio->devs[j].bio; > - for (i = 0; i < RESYNC_PAGES; i++) { > - if (j > 0 && !test_bit(MD_RECOVERY_SYNC, > - &conf->mddev->recovery)) { > - /* we can share bv_page's during recovery > - * and reshape */ > - struct bio *rbio = r10_bio->devs[0].bio; > - page = rbio->bi_io_vec[i].bv_page; > - get_page(page); > - } else > - page = alloc_page(gfp_flags); > - if (unlikely(!page)) > + > + if (!j || test_bit(MD_RECOVERY_SYNC, > + &conf->mddev->recovery)) { > + if (resync_alloc_pages(rp, gfp_flags)) > goto out_free_pages; > + } else { > + memcpy(rp, &rps[0], sizeof(*rp)); > + resync_get_all_pages(rp); > + } > > - bio->bi_io_vec[i].bv_page = page; > - if (rbio) > - rbio->bi_io_vec[i].bv_page = page; > + rp->idx = 0; > + rp->raid_bio = r10_bio; > + bio->bi_private = rp; > + if (rbio) { > + memcpy(rp_repl, rp, sizeof(*rp)); > + rbio->bi_private = rp_repl; > } > } > > return r10_bio; > > out_free_pages: > - for ( ; i > 0 ; i--) > - safe_put_page(bio->bi_io_vec[i-1].bv_page); > - while (j--) > - for (i = 0; i < RESYNC_PAGES ; i++) > - safe_put_page(r10_bio->devs[j].bio->bi_io_vec[i].bv_page); > + while (--j >= 0) > + resync_free_pages(&rps[j * 2]); > + > j = 0; > out_free_bio: > for ( ; j < nalloc; j++) { > @@ -213,30 +236,34 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data) > if (r10_bio->devs[j].repl_bio) > bio_put(r10_bio->devs[j].repl_bio); > } > + kfree(rps); > +out_free_r10bio: > r10bio_pool_free(r10_bio, conf); > return NULL; > } > > static void r10buf_pool_free(void *__r10_bio, void *data) > { > - int i; > struct r10conf *conf = data; > struct r10bio *r10bio = __r10_bio; > int j; > + struct resync_pages *rp = NULL; > > - for (j=0; j < conf->copies; j++) { > + for (j = conf->copies; j--; ) { > struct bio *bio = r10bio->devs[j].bio; > - if (bio) { > - for (i = 0; i < RESYNC_PAGES; i++) { > - safe_put_page(bio->bi_io_vec[i].bv_page); > - bio->bi_io_vec[i].bv_page = NULL; > - } > - bio_put(bio); > - } > + > + rp = get_resync_pages(bio); > + resync_free_pages(rp); > + bio_put(bio); > + > bio = r10bio->devs[j].repl_bio; > if (bio) > bio_put(bio); > } > + > + /* resync pages array stored in the 1st bio's .bi_private */ > + kfree(rp); > + > r10bio_pool_free(r10bio, conf); > } > > @@ -1935,7 +1962,7 @@ static void __end_sync_read(struct r10bio *r10_bio, struct bio *bio, int d) > > static void end_sync_read(struct bio *bio) > { > - struct r10bio *r10_bio = bio->bi_private; > + struct r10bio *r10_bio = get_resync_r10bio(bio); > struct r10conf *conf = r10_bio->mddev->private; > int d = find_bio_disk(conf, r10_bio, bio, NULL, NULL); > > @@ -1944,6 +1971,7 @@ static void end_sync_read(struct bio *bio) > > static void end_reshape_read(struct bio *bio) > { > + /* reshape read bio isn't allocated from r10buf_pool */ > struct r10bio *r10_bio = bio->bi_private; > > __end_sync_read(r10_bio, bio, r10_bio->read_slot); > @@ -1978,7 +2006,7 @@ static void end_sync_request(struct r10bio *r10_bio) > > static void end_sync_write(struct bio *bio) > { > - struct r10bio *r10_bio = bio->bi_private; > + struct r10bio *r10_bio = get_resync_r10bio(bio); > struct mddev *mddev = r10_bio->mddev; > struct r10conf *conf = mddev->private; > int d; > @@ -2058,6 +2086,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio) > for (i=0 ; i < conf->copies ; i++) { > int j, d; > struct md_rdev *rdev; > + struct resync_pages *rp; > > tbio = r10_bio->devs[i].bio; > > @@ -2099,11 +2128,13 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio) > * First we need to fixup bv_offset, bv_len and > * bi_vecs, as the read request might have corrupted these > */ > + rp = get_resync_pages(tbio); > bio_reset(tbio); > > tbio->bi_vcnt = vcnt; > tbio->bi_iter.bi_size = fbio->bi_iter.bi_size; > - tbio->bi_private = r10_bio; > + rp->raid_bio = r10_bio; > + tbio->bi_private = rp; > tbio->bi_iter.bi_sector = r10_bio->devs[i].addr; > tbio->bi_end_io = end_sync_write; > bio_set_op_attrs(tbio, REQ_OP_WRITE, 0); > @@ -3171,10 +3202,8 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, > } > } > bio = r10_bio->devs[0].bio; > - bio_reset(bio); > bio->bi_next = biolist; > biolist = bio; > - bio->bi_private = r10_bio; > bio->bi_end_io = end_sync_read; > bio_set_op_attrs(bio, REQ_OP_READ, 0); > if (test_bit(FailFast, &rdev->flags)) > @@ -3198,10 +3227,8 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, > > if (!test_bit(In_sync, &mrdev->flags)) { > bio = r10_bio->devs[1].bio; > - bio_reset(bio); > bio->bi_next = biolist; > biolist = bio; > - bio->bi_private = r10_bio; > bio->bi_end_io = end_sync_write; > bio_set_op_attrs(bio, REQ_OP_WRITE, 0); > bio->bi_iter.bi_sector = to_addr > @@ -3226,10 +3253,8 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, > if (mreplace == NULL || bio == NULL || > test_bit(Faulty, &mreplace->flags)) > break; > - bio_reset(bio); > bio->bi_next = biolist; > biolist = bio; > - bio->bi_private = r10_bio; > bio->bi_end_io = end_sync_write; > bio_set_op_attrs(bio, REQ_OP_WRITE, 0); > bio->bi_iter.bi_sector = to_addr + > @@ -3351,7 +3376,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, > r10_bio->devs[i].repl_bio->bi_end_io = NULL; > > bio = r10_bio->devs[i].bio; > - bio_reset(bio); > bio->bi_error = -EIO; > rcu_read_lock(); > rdev = rcu_dereference(conf->mirrors[d].rdev); > @@ -3376,7 +3400,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, > atomic_inc(&r10_bio->remaining); > bio->bi_next = biolist; > biolist = bio; > - bio->bi_private = r10_bio; > bio->bi_end_io = end_sync_read; > bio_set_op_attrs(bio, REQ_OP_READ, 0); > if (test_bit(FailFast, &conf->mirrors[d].rdev->flags)) > @@ -3395,13 +3418,11 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, > > /* Need to set up for writing to the replacement */ > bio = r10_bio->devs[i].repl_bio; > - bio_reset(bio); > bio->bi_error = -EIO; > > sector = r10_bio->devs[i].addr; > bio->bi_next = biolist; > biolist = bio; > - bio->bi_private = r10_bio; > bio->bi_end_io = end_sync_write; > bio_set_op_attrs(bio, REQ_OP_WRITE, 0); > if (test_bit(FailFast, &conf->mirrors[d].rdev->flags)) > @@ -3440,7 +3461,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, > if (len == 0) > break; > for (bio= biolist ; bio ; bio=bio->bi_next) { > - page = bio->bi_io_vec[bio->bi_vcnt].bv_page; > + page = resync_fetch_page(get_resync_pages(bio)); > /* > * won't fail because the vec table is big enough > * to hold all these pages > @@ -3449,7 +3470,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, > } > nr_sectors += len>>9; > sector_nr += len>>9; > - } while (biolist->bi_vcnt < RESYNC_PAGES); > + } while (resync_page_available(get_resync_pages(biolist))); > r10_bio->sectors = nr_sectors; > > while (biolist) { > @@ -3457,7 +3478,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, > biolist = biolist->bi_next; > > bio->bi_next = NULL; > - r10_bio = bio->bi_private; > + r10_bio = get_resync_r10bio(bio); > r10_bio->sectors = nr_sectors; > > if (bio->bi_end_io == end_sync_read) { > @@ -4352,6 +4373,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, > struct bio *blist; > struct bio *bio, *read_bio; > int sectors_done = 0; > + struct page **pages; > > if (sector_nr == 0) { > /* If restarting in the middle, skip the initial sectors */ > @@ -4502,11 +4524,9 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, > if (!rdev2 || test_bit(Faulty, &rdev2->flags)) > continue; > > - bio_reset(b); > b->bi_bdev = rdev2->bdev; > b->bi_iter.bi_sector = r10_bio->devs[s/2].addr + > rdev2->new_data_offset; > - b->bi_private = r10_bio; > b->bi_end_io = end_reshape_write; > bio_set_op_attrs(b, REQ_OP_WRITE, 0); > b->bi_next = blist; > @@ -4516,8 +4536,9 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, > /* Now add as many pages as possible to all of these bios. */ > > nr_sectors = 0; > + pages = get_resync_pages(r10_bio->devs[0].bio)->pages; > for (s = 0 ; s < max_sectors; s += PAGE_SIZE >> 9) { > - struct page *page = r10_bio->devs[0].bio->bi_io_vec[s/(PAGE_SIZE>>9)].bv_page; > + struct page *page = pages[s / (PAGE_SIZE >> 9)]; > int len = (max_sectors - s) << 9; > if (len > PAGE_SIZE) > len = PAGE_SIZE; > @@ -4701,7 +4722,7 @@ static int handle_reshape_read_error(struct mddev *mddev, > > static void end_reshape_write(struct bio *bio) > { > - struct r10bio *r10_bio = bio->bi_private; > + struct r10bio *r10_bio = get_resync_r10bio(bio); > struct mddev *mddev = r10_bio->mddev; > struct r10conf *conf = mddev->private; > int d; > -- > 2.7.4 > -- To unsubscribe from this list: send the line "unsubscribe linux-raid" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html