Re: [PATCH v3] Fix race of "mdadm --add" and "mdadm --incremental"

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



ping

On 2023/9/7 19:37, Li Xiao Keng wrote:
> There is a raid1 with sda and sdb. And we add sdc to this raid,
> it may return -EBUSY.
> 
> The main process of --add:
> 1. dev_open(sdc) in Manage_add
> 2. store_super1(st, di->fd) in write_init_super1
> 3. fsync(fd) in store_super1
> 4. close(di->fd) in write_init_super1
> 5. ioctl(ADD_NEW_DISK)
> 
> Step 2 and 3 will add sdc to metadata of raid1. There will be
> udev(change of sdc) event after step4. Then "/usr/sbin/mdadm
> --incremental --export $devnode --offroot $env{DEVLINKS}"
> will be run, and the sdc will be added to the raid1. Then
> step 5 will return -EBUSY because it checks if device isn't
> claimed in md_import_device()->lock_rdev()->blkdev_get_by_dev()
> ->blkdev_get().
> 
> It will be confusing for users because sdc is added first time.
> The "incremental" will get map_lock before add sdc to raid1.
> So we add map_lock before write_init_super in "mdadm --add"
> to fix the race of "add" and "incremental".
> 
> Signed-off-by: Li Xiao Keng <lixiaokeng@xxxxxxxxxx>
> Signed-off-by: Guanqin Miao <miaoguanqin@xxxxxxxxxx>
> ---
>  Manage.c | 24 ++++++++++++++++--------
>  1 file changed, 16 insertions(+), 8 deletions(-)
> 
> diff --git a/Manage.c b/Manage.c
> index f997b163..075dd720 100644
> --- a/Manage.c
> +++ b/Manage.c
> @@ -704,6 +704,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>  	struct supertype *dev_st;
>  	int j;
>  	mdu_disk_info_t disc;
> +	struct map_ent *map = NULL;
> 
>  	if (!get_dev_size(tfd, dv->devname, &ldsize)) {
>  		if (dv->disposition == 'M')
> @@ -907,6 +908,9 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>  		disc.raid_disk = 0;
>  	}
> 
> +	if (map_lock(&map))
> +		pr_err("failed to get exclusive lock on mapfile when add disk\n");
> +
>  	if (array->not_persistent==0) {
>  		int dfd;
>  		if (dv->disposition == 'j')
> @@ -918,9 +922,9 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>  		dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
>  		if (tst->ss->add_to_super(tst, &disc, dfd,
>  					  dv->devname, INVALID_SECTORS))
> -			return -1;
> +			goto unlock;
>  		if (tst->ss->write_init_super(tst))
> -			return -1;
> +			goto unlock;
>  	} else if (dv->disposition == 'A') {
>  		/*  this had better be raid1.
>  		 * As we are "--re-add"ing we must find a spare slot
> @@ -978,14 +982,14 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>  			pr_err("add failed for %s: could not get exclusive access to container\n",
>  			       dv->devname);
>  			tst->ss->free_super(tst);
> -			return -1;
> +			goto unlock;
>  		}
> 
>  		/* Check if metadata handler is able to accept the drive */
>  		if (!tst->ss->validate_geometry(tst, LEVEL_CONTAINER, 0, 1, NULL,
>  		    0, 0, dv->devname, NULL, 0, 1)) {
>  			close(container_fd);
> -			return -1;
> +			goto unlock;
>  		}
> 
>  		Kill(dv->devname, NULL, 0, -1, 0);
> @@ -994,7 +998,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>  					  dv->devname, INVALID_SECTORS)) {
>  			close(dfd);
>  			close(container_fd);
> -			return -1;
> +			goto unlock;
>  		}
>  		if (!mdmon_running(tst->container_devnm))
>  			tst->ss->sync_metadata(tst);
> @@ -1005,7 +1009,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>  			       dv->devname);
>  			close(container_fd);
>  			tst->ss->free_super(tst);
> -			return -1;
> +			goto unlock;
>  		}
>  		sra->array.level = LEVEL_CONTAINER;
>  		/* Need to set data_offset and component_size */
> @@ -1020,7 +1024,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>  			pr_err("add new device to external metadata failed for %s\n", dv->devname);
>  			close(container_fd);
>  			sysfs_free(sra);
> -			return -1;
> +			goto unlock;
>  		}
>  		ping_monitor(devnm);
>  		sysfs_free(sra);
> @@ -1034,7 +1038,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>  			else
>  				pr_err("add new device failed for %s as %d: %s\n",
>  				       dv->devname, j, strerror(errno));
> -			return -1;
> +			goto unlock;
>  		}
>  		if (dv->disposition == 'j') {
>  			pr_err("Journal added successfully, making %s read-write\n", devname);
> @@ -1045,7 +1049,11 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>  	}
>  	if (verbose >= 0)
>  		pr_err("added %s\n", dv->devname);
> +	map_unlock(&map);
>  	return 1;
> +unlock:
> +	map_unlock(&map);
> +	return -1;
>  }
> 
>  int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv,
> 



[Index of Archives]     [Linux RAID Wiki]     [ATA RAID]     [Linux SCSI Target Infrastructure]     [Linux Block]     [Linux IDE]     [Linux SCSI]     [Linux Hams]     [Device Mapper]     [Device Mapper Cryptographics]     [Kernel]     [Linux Admin]     [Linux Net]     [GFS]     [RPM]     [git]     [Yosemite Forum]


  Powered by Linux