Re: [PATCH 05/10] Add a new clustered disk

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, 24 Apr 2015 15:30:36 +0800 gqjiang@xxxxxxxx wrote:

> From: Guoqing Jiang <gqjiang@xxxxxxxx>
> 
> A clustered disk is added by the traditional --add sequence.
> However, other nodes need to acknowledge that they can "see"
> the device. This is done by --cluster-confirm:
> 
> --cluster-confirm Y:/dev/whatever (if disk is found)
> or
> --cluster-confirm Y:missing (if disk is not found)
> 
> The node initiating the --add, has the disk state tagged with
> MD_DISK_CLUSTER_ADD and the one confirming tag the disk with
> MD_DISK_CANDIDATE.

You haven't explained 'Y' here.  It looks like it means 'Yes', but it doesn't.


> 
> Signed-off-by: Goldwyn Rodrigues <rgoldwyn@xxxxxxxx>
> Signed-off-by: Guoqing Jiang <gqjiang@xxxxxxxx>
> ---
>  Manage.c   | 33 +++++++++++++++++++++++++++++----
>  ReadMe.c   |  1 +
>  md_p.h     |  7 +++++++
>  md_u.h     |  1 +
>  mdadm.8.in |  9 +++++++++
>  mdadm.c    |  4 ++++
>  mdadm.h    |  2 ++
>  util.c     | 11 +++++++++++
>  8 files changed, 64 insertions(+), 4 deletions(-)
> 
> diff --git a/Manage.c b/Manage.c
> index d3cfb55..4c3d451 100644
> --- a/Manage.c
> +++ b/Manage.c
> @@ -690,7 +690,8 @@ skip_re_add:
>  int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>  	       struct supertype *tst, mdu_array_info_t *array,
>  	       int force, int verbose, char *devname,
> -	       char *update, unsigned long rdev, unsigned long long array_size)
> +	       char *update, unsigned long rdev, unsigned long long array_size,
> +	       int raid_slot)
>  {
>  	unsigned long long ldsize;
>  	struct supertype *dev_st = NULL;
> @@ -879,7 +880,10 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>  	}
>  	disc.major = major(rdev);
>  	disc.minor = minor(rdev);
> -	disc.number =j;
> +	if (raid_slot < 0)
> +		disc.number = j;
> +	else
> +		disc.number = raid_slot;
>  	disc.state = 0;
>  	if (array->not_persistent==0) {
>  		int dfd;
> @@ -920,6 +924,14 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>  			}
>  		free(used);
>  	}
> +
> +	if (array->state & (1 << MD_SB_CLUSTERED)) {
> +		if (dv->disposition == 'c')
> +			disc.state |= (1 << MD_DISK_CANDIDATE);
> +		else
> +			disc.state |= (1 << MD_DISK_CLUSTER_ADD);
> +	}
> +
>  	if (dv->writemostly == 1)
>  		disc.state |= (1 << MD_DISK_WRITEMOSTLY);
>  	if (tst->ss->external) {
> @@ -1239,6 +1251,7 @@ int Manage_subdevs(char *devname, int fd,
>  	 *        variant on 'A'
>  	 *  'F' - Another variant of 'A', where the device was faulty
>  	 *        so must be removed from the array first.
> +	 *  'c' - confirm the device as found (for clustered environments)
>  	 *
>  	 * For 'f' and 'r', the device can also be a kernel-internal
>  	 * name such as 'sdb'.
> @@ -1254,6 +1267,7 @@ int Manage_subdevs(char *devname, int fd,
>  	struct mdinfo info;
>  	int frozen = 0;
>  	int busy = 0;
> +	int raid_slot = -1;
>  
>  	if (ioctl(fd, GET_ARRAY_INFO, &array)) {
>  		pr_err("Cannot get array info for %s\n",
> @@ -1282,6 +1296,11 @@ int Manage_subdevs(char *devname, int fd,
>  		int rv;
>  		int mj,mn;
>  
> +		raid_slot = -1;
> +		if (dv->disposition == 'c')
> +			parse_cluster_confirm_arg(dv->devname, &dv->devname,
> +					&raid_slot);
> +
>  		if (strcmp(dv->devname, "failed") == 0 ||
>  		    strcmp(dv->devname, "faulty") == 0) {
>  			if (dv->disposition != 'A'
> @@ -1307,6 +1326,11 @@ int Manage_subdevs(char *devname, int fd,
>  		if (strcmp(dv->devname, "missing") == 0) {
>  			struct mddev_dev *add_devlist = NULL;
>  			struct mddev_dev **dp;
> +			if (dv->disposition == 'c') {
> +				rv = ioctl(fd, CLUSTERED_DISK_NACK, NULL);
> +				break;
> +			}
> +
>  			if (dv->disposition != 'A') {
>  				pr_err("'missing' only meaningful with --re-add\n");
>  				goto abort;
> @@ -1399,7 +1423,7 @@ int Manage_subdevs(char *devname, int fd,
>  			else {
>  				int open_err = errno;
>  				if (stat(dv->devname, &stb) != 0) {
> -					pr_err("Cannot find %s: %s\n",
> +					pr_err("%s: %d Cannot find %s: %s\n", __func__, __LINE__,
>  					       dv->devname, strerror(errno));
>  					goto abort;
>  				}
> @@ -1437,6 +1461,7 @@ int Manage_subdevs(char *devname, int fd,
>  		case 'A':
>  		case 'M': /* --re-add missing */
>  		case 'F': /* --re-add faulty  */
> +		case 'c': /* --cluster-confirm */
>  			/* add the device */
>  			if (subarray) {
>  				pr_err("Cannot add disks to a \'member\' array, perform this operation on the parent container\n");
> @@ -1470,7 +1495,7 @@ int Manage_subdevs(char *devname, int fd,
>  			}
>  			rv = Manage_add(fd, tfd, dv, tst, &array,
>  					force, verbose, devname, update,
> -					rdev, array_size);
> +					rdev, array_size, raid_slot);
>  			close(tfd);
>  			tfd = -1;
>  			if (rv < 0)
> diff --git a/ReadMe.c b/ReadMe.c
> index c6286ae..c854cd5 100644
> --- a/ReadMe.c
> +++ b/ReadMe.c
> @@ -169,6 +169,7 @@ struct option long_options[] = {
>      {"wait",	  0, 0,  WaitOpt},
>      {"wait-clean", 0, 0, Waitclean },
>      {"action",    1, 0, Action },
> +    {"cluster-confirm", 0, 0, ClusterConfirm},
>  
>      /* For Detail/Examine */
>      {"brief",	  0, 0, Brief},
> diff --git a/md_p.h b/md_p.h
> index c4846ba..e59504f 100644
> --- a/md_p.h
> +++ b/md_p.h
> @@ -78,6 +78,12 @@
>  #define MD_DISK_ACTIVE		1 /* disk is running but may not be in sync */
>  #define MD_DISK_SYNC		2 /* disk is in sync with the raid set */
>  #define MD_DISK_REMOVED		3 /* disk is in sync with the raid set */
> +#define MD_DISK_CLUSTER_ADD     4 /* Initiate a disk add across the cluster
> +				   * For clustered enviroments only.
> +				   */
> +#define MD_DISK_CANDIDATE	5 /* disk is added as spare (local) until confirmed
> +				   * For clustered enviroments only.
> +				   */
>  
>  #define	MD_DISK_WRITEMOSTLY	9 /* disk is "write-mostly" is RAID1 config.
>  				   * read requests will only be sent here in
> @@ -106,6 +112,7 @@ typedef struct mdp_device_descriptor_s {
>  #define MD_SB_BLOCK_CONTAINER_RESHAPE 3 /* block container wide reshapes */
>  #define MD_SB_BLOCK_VOLUME	4 /* block activation of array, other arrays
>  				   * in container can be activated */
> +#define MD_SB_CLUSTERED		5 /* MD is clustered  */
>  #define	MD_SB_BITMAP_PRESENT	8 /* bitmap may be present nearby */
>  
>  typedef struct mdp_superblock_s {
> diff --git a/md_u.h b/md_u.h
> index be9868a..76068d6 100644
> --- a/md_u.h
> +++ b/md_u.h
> @@ -44,6 +44,7 @@
>  #define STOP_ARRAY		_IO (MD_MAJOR, 0x32)
>  #define STOP_ARRAY_RO		_IO (MD_MAJOR, 0x33)
>  #define RESTART_ARRAY_RW	_IO (MD_MAJOR, 0x34)
> +#define CLUSTERED_DISK_NACK	_IO (MD_MAJOR, 0x35)
>  
>  typedef struct mdu_version_s {
>  	int major;
> diff --git a/mdadm.8.in b/mdadm.8.in
> index c015cbf..6873cc7 100644
> --- a/mdadm.8.in
> +++ b/mdadm.8.in
> @@ -1405,6 +1405,15 @@ will avoid reading from these devices if possible.
>  .BR \-\-readwrite
>  Subsequent devices that are added or re\-added will have the 'write-mostly'
>  flag cleared.
> +.TP
> +.BR \-\-cluster\-confirm
> +Confirm the existence of the device. This is issued in response to an \-\-add
> +request by a node in a cluster. When a node adds a device it sends a message
> +to all nodes in the cluster to look for a device with a UUID. This translates
> +to a udev notification with the UUID of the device to be added and the slot
> +number. The receiving node must acknowledge this message
> +with \-\-cluster\-confirm. Valid arguments are <slot>:<devicename> in case
> +the device is found or <slot>:missing in case the device is not found.
>  
>  .P
>  Each of these options requires that the first device listed is the array
> diff --git a/mdadm.c b/mdadm.c
> index 6963a09..5b4b3ef 100644
> --- a/mdadm.c
> +++ b/mdadm.c
> @@ -196,6 +196,7 @@ int main(int argc, char *argv[])
>  		case 'f':
>  		case Fail:
>  		case ReAdd: /* re-add */
> +		case ClusterConfirm:
>  			if (!mode) {
>  				newmode = MANAGE;
>  				shortopt = short_bitmap_options;
> @@ -933,6 +934,9 @@ int main(int argc, char *argv[])
>  					   * remove the device */
>  			devmode = 'f';
>  			continue;
> +		case O(MANAGE, ClusterConfirm):
> +			devmode = 'c';
> +			continue;
>  		case O(MANAGE,Replace):
>  			/* Mark these devices for replacement */
>  			devmode = 'R';
> diff --git a/mdadm.h b/mdadm.h
> index f56d9d6..00c726e 100644
> --- a/mdadm.h
> +++ b/mdadm.h
> @@ -346,6 +346,7 @@ enum special_options {
>  	Action,
>  	Nodes,
>  	ClusterName,
> +	ClusterConfirm,
>  };
>  
>  enum prefix_standard {
> @@ -1281,6 +1282,7 @@ extern int parse_uuid(char *str, int uuid[4]);
>  extern int parse_layout_10(char *layout);
>  extern int parse_layout_faulty(char *layout);
>  extern long parse_num(char *num);
> +extern int parse_cluster_confirm_arg(char *inp, char **devname, int *slot);
>  extern int check_ext2(int fd, char *name);
>  extern int check_reiser(int fd, char *name);
>  extern int check_raid(int fd, char *name);
> diff --git a/util.c b/util.c
> index ed9a745..1d82fc7 100644
> --- a/util.c
> +++ b/util.c
> @@ -273,6 +273,17 @@ long parse_num(char *num)
>  }
>  #endif
>  
> +int parse_cluster_confirm_arg(char *input, char **devname, int *slot)
> +{
> +	char *dev;
> +	*slot = strtoul(input, &dev, 10);
> +	if (dev[0] == ':')
> +		*devname = dev+1;
> +	else
> +		return -1;
> +	return 0;
> +}

The logic here hurts my brain :-(

 *slot = strtoul(input, &dev, 10);
 if (dev == input || dev[0] != ':')
     return -1;
 *devname = dev+1;
 return 0;

> +
>  void remove_partitions(int fd)
>  {
>  	/* remove partitions from this block devices.

Thanks,
NeilBrown

Attachment: pgpdPTEFaxnDi.pgp
Description: OpenPGP digital signature


[Index of Archives]     [Linux RAID Wiki]     [ATA RAID]     [Linux SCSI Target Infrastructure]     [Linux Block]     [Linux IDE]     [Linux SCSI]     [Linux Hams]     [Device Mapper]     [Device Mapper Cryptographics]     [Kernel]     [Linux Admin]     [Linux Net]     [GFS]     [RPM]     [git]     [Yosemite Forum]


  Powered by Linux