On Tue, 14 Apr 2015 10:45:42 -0500 Goldwyn Rodrigues <rgoldwyn@xxxxxxx> wrote: > When "re-add" is writted to /sys/block/mdXX/md/dev-YYY/state, > the clustered md: > > 1. Sends RE_ADD message with the desc_nr. Nodes receiving the message > clear the Faulty bit in their respective rdev->flags. > 2. The node initiating re-add, gathers the bitmaps of all nodes > and copies them into the local bitmap. It does not clear the bitmap > from which it is copying. > 3. Initiating node schedules a md recovery to sync the devices. > > Signed-off-by: Guoqing Jiang <gqjiang@xxxxxxxx> > Signed-off-by: Goldwyn Rodrigues <rgoldwyn@xxxxxxxx> > --- > drivers/md/bitmap.c | 20 +++++++++++--------- > drivers/md/bitmap.h | 2 +- > drivers/md/md-cluster.c | 48 +++++++++++++++++++++++++++++++++++++++++++++++- > drivers/md/md-cluster.h | 1 + > drivers/md/md.c | 12 ++++++++++++ > 5 files changed, 72 insertions(+), 11 deletions(-) > > diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c > index 5ff67c3..956cfb9 100644 > --- a/drivers/md/bitmap.c > +++ b/drivers/md/bitmap.c > @@ -1852,7 +1852,7 @@ EXPORT_SYMBOL_GPL(bitmap_load); > * to our bitmap > */ > int bitmap_copy_from_slot(struct mddev *mddev, int slot, > - sector_t *low, sector_t *high) > + sector_t *low, sector_t *high, bool clear_bits) > { > int rv = 0, i, j; > sector_t block, lo = 0, hi = 0; > @@ -1879,14 +1879,16 @@ int bitmap_copy_from_slot(struct mddev *mddev, int slot, > } > } > > - bitmap_update_sb(bitmap); > - /* Setting this for the ev_page should be enough. > - * And we do not require both write_all and PAGE_DIRT either > - */ > - for (i = 0; i < bitmap->storage.file_pages; i++) > - set_page_attr(bitmap, i, BITMAP_PAGE_DIRTY); > - bitmap_write_all(bitmap); > - bitmap_unplug(bitmap); > + if (clear_bits) { > + bitmap_update_sb(bitmap); > + /* Setting this for the ev_page should be enough. > + * And we do not require both write_all and PAGE_DIRT either > + */ > + for (i = 0; i < bitmap->storage.file_pages; i++) > + set_page_attr(bitmap, i, BITMAP_PAGE_DIRTY); > + bitmap_write_all(bitmap); > + bitmap_unplug(bitmap); > + } > *low = lo; > *high = hi; > err: > diff --git a/drivers/md/bitmap.h b/drivers/md/bitmap.h > index 4aabc74..f1f4dd0 100644 > --- a/drivers/md/bitmap.h > +++ b/drivers/md/bitmap.h > @@ -263,7 +263,7 @@ void bitmap_daemon_work(struct mddev *mddev); > int bitmap_resize(struct bitmap *bitmap, sector_t blocks, > int chunksize, int init); > int bitmap_copy_from_slot(struct mddev *mddev, int slot, > - sector_t *lo, sector_t *hi); > + sector_t *lo, sector_t *hi, bool clear_bits); > #endif > > #endif > diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c > index 82f1b7b..ad2b5b7 100644 > --- a/drivers/md/md-cluster.c > +++ b/drivers/md/md-cluster.c > @@ -73,6 +73,7 @@ enum msg_type { > RESYNCING, > NEWDISK, > REMOVE, > + RE_ADD, > }; > > struct cluster_msg { > @@ -253,7 +254,7 @@ void recover_bitmaps(struct md_thread *thread) > str, ret); > goto clear_bit; > } > - ret = bitmap_copy_from_slot(mddev, slot, &lo, &hi); > + ret = bitmap_copy_from_slot(mddev, slot, &lo, &hi, true); > if (ret) { > pr_err("md-cluster: Could not copy data from bitmap %d\n", slot); > goto dlm_unlock; > @@ -412,6 +413,16 @@ static void process_remove_disk(struct mddev *mddev, struct cluster_msg *msg) > pr_warn("%s: %d Could not find disk(%d) to REMOVE\n", __func__, __LINE__, msg->raid_slot); > } > > +static void process_readd_disk(struct mddev *mddev, struct cluster_msg *msg) > +{ > + struct md_rdev *rdev = md_find_rdev_nr_rcu(mddev, msg->raid_slot); > + > + if (rdev && test_bit(Faulty, &rdev->flags)) > + clear_bit(Faulty, &rdev->flags); > + else > + pr_warn("%s: %d Could not find disk(%d) which is faulty", __func__, __LINE__, msg->raid_slot); > +} > + > static void process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg) > { > switch (msg->type) { > @@ -436,6 +447,11 @@ static void process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg) > __func__, __LINE__, msg->slot); > process_remove_disk(mddev, msg); > break; > + case RE_ADD: > + pr_info("%s: %d Received RE_ADD from %d\n", > + __func__, __LINE__, msg->slot); > + process_readd_disk(mddev, msg); > + break; > default: > pr_warn("%s:%d Received unknown message from %d\n", > __func__, __LINE__, msg->slot); > @@ -883,6 +899,35 @@ static int remove_disk(struct mddev *mddev, struct md_rdev *rdev) > return __sendmsg(cinfo, &cmsg); > } > > +static int gather_bitmaps(struct md_rdev *rdev) > +{ > + int sn, err; > + sector_t lo, hi; > + struct cluster_msg cmsg; > + struct mddev *mddev = rdev->mddev; > + struct md_cluster_info *cinfo = mddev->cluster_info; > + > + cmsg.type = RE_ADD; > + cmsg.raid_slot = rdev->desc_nr; > + err = sendmsg(cinfo, &cmsg); > + if (err) > + goto out; > + > + for (sn = 0; sn < mddev->bitmap_info.nodes; sn++) { > + if (sn == (cinfo->slot_number - 1)) > + continue; > + err = bitmap_copy_from_slot(mddev, sn, &lo, &hi, false); > + if (err) { > + pr_warn("md-cluster: Could not gather bitmaps from slot %d", sn); > + goto out; > + } > + if ((hi > 0) && (lo < mddev->recovery_cp)) > + mddev->recovery_cp = lo; > + } > +out: > + return err; > +} > + > static struct md_cluster_operations cluster_ops = { > .join = join, > .leave = leave, > @@ -898,6 +943,7 @@ static struct md_cluster_operations cluster_ops = { > .add_new_disk_finish = add_new_disk_finish, > .new_disk_ack = new_disk_ack, > .remove_disk = remove_disk, > + .gather_bitmaps = gather_bitmaps, > }; > > static int __init cluster_init(void) > diff --git a/drivers/md/md-cluster.h b/drivers/md/md-cluster.h > index 71e5143..6817ee0 100644 > --- a/drivers/md/md-cluster.h > +++ b/drivers/md/md-cluster.h > @@ -23,6 +23,7 @@ struct md_cluster_operations { > int (*add_new_disk_finish)(struct mddev *mddev); > int (*new_disk_ack)(struct mddev *mddev, bool ack); > int (*remove_disk)(struct mddev *mddev, struct md_rdev *rdev); > + int (*gather_bitmaps)(struct md_rdev *rdev); > }; > > #endif /* _MD_CLUSTER_H */ > diff --git a/drivers/md/md.c b/drivers/md/md.c > index ba01605..8c37bbf 100644 > --- a/drivers/md/md.c > +++ b/drivers/md/md.c > @@ -2599,11 +2599,23 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) > err = 0; > } > } else if (cmd_match(buf, "re-add") && (test_bit(Faulty, &rdev->flags) || (rdev->raid_disk == -1))) { > + /* clear_bit is performed _after_ all the devices > + * have their local Faulty bit cleared. If any writes > + * happen in the meantime in the local node, they > + * will land in the local bitmap, which will be synced > + * by this node eventually > + */ > + if (mddev_is_clustered(rdev->mddev)) { > + err = md_cluster_ops->gather_bitmaps(rdev); > + if (err) > + goto out; > + } > clear_bit(Faulty, &rdev->flags); > err = add_bound_rdev(rdev); > } > if (!err) > sysfs_notify_dirent_safe(rdev->sysfs_state); > +out: > return err ? err : len; > } > static struct rdev_sysfs_entry rdev_state = I changed this to: if (!mddev_is_clustered(rdev->mddev) || (err = md_cluster_ops->gather_bitmaps(rdev)) == 0) { clear_bit(Faulty, &rdev->flags); err = add_bound_rdev(rdev); } because I think it makes the code a bit clearer. otherwise, applied. Thanks, NeilBrown
Attachment:
pgpzNLpNsDyKz.pgp
Description: OpenPGP digital signature