Re: [PATCH 4/4] md-cluster: re-add

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, 8 Apr 2015 14:24:14 -0500 Goldwyn Rodrigues <rgoldwyn@xxxxxxx> wrote:

> This extends the capabilites of re-adding a failed device
> to the clustering environment.
> 
> A new function gather_bitmaps gathers set bits from bitmaps of
> all nodes, sends a message to all nodes to readd the disk
> and then initiates the recovery process.
> 
> Question: Do you see a race in sending a READD and then performing
> the bitmap resync/recovery? Should the initiating node perform the
> recovery before sending the READD message? The recovery will send a
> METADATA_UPDATE anyways.

The RE-ADD has to happen *before* the bitmaps are gathered.
After the RE-ADD, all writes will go to the new device.
Any write before that RE-ADD will be recorded in the bitmap.
To ensure that the recovery handles all regions affected by writes, it needs
to know about all writes that didn't go to the new device.  So it needs to
collect bitmaps only once new writes have started going to the new device.

Is that clear?  If not, I'll try again.


NeilBrown

> 
> Signed-off-by: Goldwyn Rodrigues <rgoldwyn@xxxxxxxx>
> ---
>  drivers/md/bitmap.c     | 20 +++++++++++---------
>  drivers/md/bitmap.h     |  2 +-
>  drivers/md/md-cluster.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++-
>  drivers/md/md-cluster.h |  1 +
>  drivers/md/md.c         |  2 ++
>  5 files changed, 64 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
> index eccfa27..3e69583 100644
> --- a/drivers/md/bitmap.c
> +++ b/drivers/md/bitmap.c
> @@ -1869,7 +1869,7 @@ EXPORT_SYMBOL_GPL(bitmap_load);
>   * to our bitmap
>   */
>  int bitmap_copy_from_slot(struct mddev *mddev, int slot,
> -		sector_t *low, sector_t *high)
> +		sector_t *low, sector_t *high, bool clear_bits)
>  {
>  	int rv = 0, i, j;
>  	sector_t block, lo = 0, hi = 0;
> @@ -1896,14 +1896,16 @@ int bitmap_copy_from_slot(struct mddev *mddev, int slot,
>  		}
>  	}
>  
> -	bitmap_update_sb(bitmap);
> -	/* Setting this for the ev_page should be enough.
> -	 * And we do not require both write_all and PAGE_DIRT either
> -	 */
> -	for (i = 0; i < bitmap->storage.file_pages; i++)
> -		set_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
> -	bitmap_write_all(bitmap);
> -	bitmap_unplug(bitmap);
> +	if (clear_bits) {
> +		bitmap_update_sb(bitmap);
> +		/* Setting this for the ev_page should be enough.
> +		 * And we do not require both write_all and PAGE_DIRT either
> +		 */
> +		for (i = 0; i < bitmap->storage.file_pages; i++)
> +			set_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
> +		bitmap_write_all(bitmap);
> +		bitmap_unplug(bitmap);
> +	}
>  	*low = lo;
>  	*high = hi;
>  err:
> diff --git a/drivers/md/bitmap.h b/drivers/md/bitmap.h
> index e838ea7..74bffc7 100644
> --- a/drivers/md/bitmap.h
> +++ b/drivers/md/bitmap.h
> @@ -263,7 +263,7 @@ void bitmap_daemon_work(struct mddev *mddev);
>  int bitmap_resize(struct bitmap *bitmap, sector_t blocks,
>  		  int chunksize, int init);
>  int bitmap_copy_from_slot(struct mddev *mddev, int slot,
> -				sector_t *lo, sector_t *hi);
> +				sector_t *lo, sector_t *hi, bool clear_bits);
>  #endif
>  
>  #endif
> diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
> index d036c83..afffbee 100644
> --- a/drivers/md/md-cluster.c
> +++ b/drivers/md/md-cluster.c
> @@ -50,6 +50,7 @@ struct md_cluster_info {
>  	/* dlm lock space and resources for clustered raid. */
>  	dlm_lockspace_t *lockspace;
>  	int slot_number;
> +	int total_slots;
>  	struct completion completion;
>  	struct dlm_lock_resource *sb_lock;
>  	struct mutex sb_mutex;
> @@ -73,6 +74,7 @@ enum msg_type {
>  	RESYNCING,
>  	NEWDISK,
>  	REMOVE,
> +	READD,
>  };
>  
>  struct cluster_msg {
> @@ -267,7 +269,7 @@ void recover_bitmaps(struct md_thread *thread)
>  					str, ret);
>  			goto clear_bit;
>  		}
> -		ret = bitmap_copy_from_slot(mddev, slot, &lo, &hi);
> +		ret = bitmap_copy_from_slot(mddev, slot, &lo, &hi, true);
>  		if (ret) {
>  			pr_err("md-cluster: Could not copy data from bitmap %d\n", slot);
>  			goto dlm_unlock;
> @@ -427,6 +429,17 @@ static void process_remove_disk(struct mddev *mddev, struct cluster_msg *msg)
>  		pr_warn("%s: %d Could not find disk with uuid: %s", __func__, __LINE__, pretty_uuid(uuid, msg->uuid));
>  }
>  
> +static void process_readd_disk(struct mddev *mddev, struct cluster_msg *msg)
> +{
> +	struct md_rdev *rdev = find_rdev_uuid(mddev, msg->uuid);
> +	char uuid[32];
> +
> +	if (rdev)
> +		clear_bit(Faulty, &rdev->flags);
> +	else
> +		pr_warn("%s: %d Could not find disk with uuid: %s", __func__, __LINE__, pretty_uuid(uuid, msg->uuid));
> +}
> +
>  static void process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg)
>  {
>  	switch (msg->type) {
> @@ -451,6 +464,11 @@ static void process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg)
>  			__func__, __LINE__, msg->slot);
>  		process_remove_disk(mddev, msg);
>  		break;
> +	case READD:
> +		pr_info("%s: %d Received READD from %d\n",
> +			__func__, __LINE__, msg->slot);
> +		process_readd_disk(mddev, msg);
> +		break;
>  	default:
>  		pr_warn("%s:%d Received unknown message from %d\n",
>  			__func__, __LINE__, msg->slot);
> @@ -653,6 +671,7 @@ static int join(struct mddev *mddev, int nodes)
>  		ret = -ERANGE;
>  		goto err;
>  	}
> +	cinfo->total_slots = nodes;
>  	cinfo->sb_lock = lockres_init(mddev, "cmd-super",
>  					NULL, 0);
>  	if (!cinfo->sb_lock) {
> @@ -900,6 +919,34 @@ static int remove_disk(struct mddev *mddev, struct md_rdev *rdev)
>  	return __sendmsg(cinfo, &cmsg);
>  }
>  
> +static int gather_bitmaps(struct md_rdev *rdev)
> +{
> +	int sn, err;
> +	sector_t lo, hi;
> +	struct cluster_msg cmsg;
> +	struct mddev *mddev = rdev->mddev;
> +	struct md_cluster_info *cinfo = mddev->cluster_info;
> +	struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
> +	char *uuid = sb->device_uuid;
> +
> +	for (sn = 0; sn < cinfo->total_slots; sn++) {
> +		if (sn == (cinfo->slot_number - 1))
> +			continue;
> +		err = bitmap_copy_from_slot(mddev, sn, &lo, &hi, false);
> +		if (err) {
> +			pr_warn("md-cluster: Could not gather bitmaps from slot %d", sn);
> +			goto out;
> +		}
> +		if ((hi > 0) && (lo < mddev->recovery_cp))
> +			mddev->recovery_cp = lo;
> +	}
> +	cmsg.type = READD;
> +	memcpy(cmsg.uuid, uuid, 16);
> +	err = sendmsg(cinfo, &cmsg);
> +out:
> +	return err;
> +}
> +
>  static struct md_cluster_operations cluster_ops = {
>  	.join   = join,
>  	.leave  = leave,
> @@ -915,6 +962,7 @@ static struct md_cluster_operations cluster_ops = {
>  	.add_new_disk_finish = add_new_disk_finish,
>  	.new_disk_ack = new_disk_ack,
>  	.remove_disk = remove_disk,
> +	.gather_bitmaps = gather_bitmaps,
>  };
>  
>  static int __init cluster_init(void)
> diff --git a/drivers/md/md-cluster.h b/drivers/md/md-cluster.h
> index 71e5143..6817ee0 100644
> --- a/drivers/md/md-cluster.h
> +++ b/drivers/md/md-cluster.h
> @@ -23,6 +23,7 @@ struct md_cluster_operations {
>  	int (*add_new_disk_finish)(struct mddev *mddev);
>  	int (*new_disk_ack)(struct mddev *mddev, bool ack);
>  	int (*remove_disk)(struct mddev *mddev, struct md_rdev *rdev);
> +	int (*gather_bitmaps)(struct md_rdev *rdev);
>  };
>  
>  #endif /* _MD_CLUSTER_H */
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 83a8e91..a233c09 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -2847,6 +2847,8 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
>  			err = 0;
>  		}
>  	} else if (cmd_match(buf, "re-add")) {
> +		if (mddev_is_clustered(rdev->mddev))
> +			md_cluster_ops->gather_bitmaps(rdev);
>  		clear_bit(Faulty, &rdev->flags);
>  		err = add_bound_rdev(rdev);
>  	}

Attachment: pgpK6LfbUxnWV.pgp
Description: OpenPGP digital signature


[Index of Archives]     [Linux RAID Wiki]     [ATA RAID]     [Linux SCSI Target Infrastructure]     [Linux Block]     [Linux IDE]     [Linux SCSI]     [Linux Hams]     [Device Mapper]     [Device Mapper Cryptographics]     [Kernel]     [Linux Admin]     [Linux Net]     [GFS]     [RPM]     [git]     [Yosemite Forum]


  Powered by Linux