On Tue, Oct 09, 2018 at 10:59:42AM +0800, Guoqing Jiang wrote: > To support add disk under grow mode, we need to resize > all the bitmaps of each node before reshape, so that we > can ensure all nodes have the same view of the bitmap of > the clustered raid. > > So after the master node resized the bitmap, it broadcast > a message to other slave nodes, and it checks the size of > each bitmap are same or not by compare pages. We can only > continue the reshaping after all nodes update the bitmap > to the same size (by checking the pages), otherwise revert > bitmap size to previous value. > > The resize_bitmaps interface and BITMAP_RESIZE message are > introduced in md-cluster.c for the purpose. > > Reviewed-by: NeilBrown <neilb@xxxxxxxx> > Signed-off-by: Guoqing Jiang <gqjiang@xxxxxxxx> which tree are the patches against? There are several compiling errors. > --- > drivers/md/md-cluster.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++++ > drivers/md/md-cluster.h | 1 + > drivers/md/raid10.c | 40 ++++++++++++++++++++++-- > 3 files changed, 119 insertions(+), 3 deletions(-) > > diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c > index 0b2af6e74fc3..8d691cc5f1b5 100644 > --- a/drivers/md/md-cluster.c > +++ b/drivers/md/md-cluster.c > @@ -105,6 +105,7 @@ enum msg_type { > RE_ADD, > BITMAP_NEEDS_SYNC, > CHANGE_CAPACITY, > + BITMAP_RESIZE, > }; > > struct cluster_msg { > @@ -612,6 +613,11 @@ static int process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg) > case BITMAP_NEEDS_SYNC: > __recover_slot(mddev, le32_to_cpu(msg->slot)); > break; > + case BITMAP_RESIZE: > + if (le64_to_cpu(msg->high) != mddev->pers->size(mddev, 0, 0)) > + ret = bitmap_resize(mddev->bitmap, > + le64_to_cpu(msg->high), 0, 0); > + break; > default: > ret = -1; > pr_warn("%s:%d Received unknown message from %d\n", > @@ -1102,6 +1108,80 @@ static void metadata_update_cancel(struct mddev *mddev) > unlock_comm(cinfo); > } > > +static int update_bitmap_size(struct mddev *mddev, sector_t size) > +{ > + struct md_cluster_info *cinfo = mddev->cluster_info; > + struct cluster_msg cmsg = {0}; > + int ret; > + > + cmsg.type = cpu_to_le32(BITMAP_RESIZE); > + cmsg.high = cpu_to_le64(size); > + ret = sendmsg(cinfo, &cmsg, 0); > + if (ret) > + pr_err("%s:%d: failed to send BITMAP_RESIZE message (%d)\n", > + __func__, __LINE__, ret); > + return ret; > +} > + > +static int resize_bitmaps(struct mddev *mddev, sector_t newsize, sector_t oldsize) > +{ > + struct bitmap_counts *counts; > + char str[64]; > + struct dlm_lock_resource *bm_lockres; > + struct bitmap *bitmap = mddev->bitmap; > + unsigned long my_pages = bitmap->counts.pages; > + int i, rv; > + > + /* > + * We need to ensure all the nodes can grow to a larger > + * bitmap size before make the reshaping. > + */ > + rv = update_bitmap_size(mddev, newsize); > + if (rv) > + return rv; > + > + for (i = 0; i < mddev->bitmap_info.nodes; i++) { > + if (i == md_cluster_ops->slot_number(mddev)) > + continue; > + > + bitmap = get_bitmap_from_slot(mddev, i); > + if (IS_ERR(bitmap)) { > + pr_err("can't get bitmap from slot %d\n", i); > + goto out; > + } > + counts = &bitmap->counts; > + > + /* > + * If we can hold the bitmap lock of one node then > + * the slot is not occupied, update the pages. > + */ > + snprintf(str, 64, "bitmap%04d", i); > + bm_lockres = lockres_init(mddev, str, NULL, 1); > + if (!bm_lockres) { > + pr_err("Cannot initialize %s lock\n", str); > + goto out; > + } > + bm_lockres->flags |= DLM_LKF_NOQUEUE; > + rv = dlm_lock_sync(bm_lockres, DLM_LOCK_PW); > + if (!rv) > + counts->pages = my_pages; > + lockres_free(bm_lockres); > + > + if (my_pages != counts->pages) > + /* > + * Let's revert the bitmap size if one node > + * can't resize bitmap > + */ > + goto out; > + } > + > + return 0; > +out: > + bitmap_free(bitmap); > + update_bitmap_size(mddev, oldsize); > + return -1; > +} > + > /* > * return 0 if all the bitmaps have the same sync_size > */ > @@ -1492,6 +1572,7 @@ static struct md_cluster_operations cluster_ops = { > .remove_disk = remove_disk, > .load_bitmaps = load_bitmaps, > .gather_bitmaps = gather_bitmaps, > + .resize_bitmaps = resize_bitmaps, > .lock_all_bitmaps = lock_all_bitmaps, > .unlock_all_bitmaps = unlock_all_bitmaps, > .update_size = update_size, > diff --git a/drivers/md/md-cluster.h b/drivers/md/md-cluster.h > index c0240708f443..9bd753a6a94e 100644 > --- a/drivers/md/md-cluster.h > +++ b/drivers/md/md-cluster.h > @@ -26,6 +26,7 @@ struct md_cluster_operations { > int (*remove_disk)(struct mddev *mddev, struct md_rdev *rdev); > void (*load_bitmaps)(struct mddev *mddev, int total_slots); > int (*gather_bitmaps)(struct md_rdev *rdev); > + int (*resize_bitmaps)(struct mddev *mddev, sector_t newsize, sector_t oldsize); > int (*lock_all_bitmaps)(struct mddev *mddev); > void (*unlock_all_bitmaps)(struct mddev *mddev); > void (*update_size)(struct mddev *mddev, sector_t old_dev_sectors); > diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c > index 749848b2c477..69791ac32154 100644 > --- a/drivers/md/raid10.c > +++ b/drivers/md/raid10.c > @@ -4287,12 +4287,46 @@ static int raid10_start_reshape(struct mddev *mddev) > spin_unlock_irq(&conf->device_lock); > > if (mddev->delta_disks && mddev->bitmap) { > - ret = md_bitmap_resize(mddev->bitmap, > - raid10_size(mddev, 0, conf->geo.raid_disks), > - 0, 0); > + struct mdp_superblock_1 *sb = NULL; > + sector_t oldsize, newsize; > + > + oldsize = raid10_size(mddev, 0, 0); > + newsize = raid10_size(mddev, 0, conf->geo.raid_disks); > + > + if (!mddev_is_clustered(mddev)) { > + ret = bitmap_resize(mddev->bitmap, newsize, 0, 0); > + if (ret) > + goto abort; > + else > + goto out; > + } > + > + rdev_for_each(rdev, mddev) { > + if (rdev->raid_disk > -1 && > + !test_bit(Faulty, &rdev->flags)) > + sb = page_address(rdev->sb_page); > + } > + > + /* > + * some node is already performing reshape, and no need to > + * call bitmap_resize again since it should be called when > + * receiving BITMAP_RESIZE msg > + */ > + if ((sb && (le32_to_cpu(sb->feature_map) & > + MD_FEATURE_RESHAPE_ACTIVE)) || (oldsize == newsize)) > + goto out; > + > + ret = bitmap_resize(mddev->bitmap, newsize, 0, 0); > if (ret) > goto abort; > + > + ret = md_cluster_ops->resize_bitmaps(mddev, newsize, oldsize); > + if (ret) { > + bitmap_resize(mddev->bitmap, oldsize, 0, 0); > + goto abort; > + } > } > +out: > if (mddev->delta_disks > 0) { > rdev_for_each(rdev, mddev) > if (rdev->raid_disk < 0 && > -- > 2.12.3 >