Re: [PATCH] Fix race of "mdadm --add" and "mdadm --incremental"

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hello Li Xiao Keng,

On Mon, 2023-04-17 at 22:01 +0800, Li Xiao Keng wrote:
> When we add a new disk to a raid, it may return -EBUSY.
> 
> The main process of --add:
> 1. dev_open
> 2. store_super1(st, di->fd) in write_init_super1
> 3. fsync(di->fd) in write_init_super1
> 4. close(di->fd)
> 5. ioctl(ADD_NEW_DISK)
> 
> However, there will be some udev(change) event after step4. Then
> "/usr/sbin/mdadm --incremental ..." will be run, and the new disk
> will be add to md device. After that, ioctl will return -EBUSY.
> 
> Here we add map_lock before write_init_super in "mdadm --add"
> to fix this race.
> 
> Signed-off-by: Li Xiao Keng <lixiaokeng@xxxxxxxxxx>
> Signed-off-by: Guanqin Miao <miaoguanqin@xxxxxxxxxx>

I don't feel familiar enough with the mdadm code to write an
authoritative review. In particular, I don't fully understand the 
intended semantics of map_lock(); thus while I believe the way you 
are using this lock is correct, I can't assert this with certainty.
Jes, Coly, or someone else should double-check that.

Anyway, I'll try to get the communication on this issue going again.

I think you should expand some more on your commit description. It
makes sense for people who followed the previous discussion, but it
should be self-explanatory also for people reading the commit 5y from
now.

Other than that, I only have one nitpick (see below).

Regards
Martin

> ---
>  Assemble.c |  5 ++++-
>  Manage.c   | 25 +++++++++++++++++--------
>  2 files changed, 21 insertions(+), 9 deletions(-)
> 
> diff --git a/Assemble.c b/Assemble.c
> index 49804941..086890ed 100644
> --- a/Assemble.c
> +++ b/Assemble.c
> @@ -1479,8 +1479,11 @@ try_again:
>          * to our list.  We flag them so that we don't try to re-add,
>          * but can remove if they turn out to not be wanted.
>          */
> -       if (map_lock(&map))
> +       if (map_lock(&map)) {
>                 pr_err("failed to get exclusive lock on mapfile -
> continue anyway...\n");

As you added a "return 1" here, the "continue anyway" message is wrong.
You need to change it.

> +               return 1;
> +       }
> +
>         if (c->update == UOPT_UUID)
>                 mp = NULL;
>         else
> diff --git a/Manage.c b/Manage.c
> index f54de7c6..6a101bae 100644
> --- a/Manage.c
> +++ b/Manage.c
> @@ -703,6 +703,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev
> *dv,
>         struct supertype *dev_st;
>         int j;
>         mdu_disk_info_t disc;
> +       struct map_ent *map = NULL;
>  
>         if (!get_dev_size(tfd, dv->devname, &ldsize)) {
>                 if (dv->disposition == 'M')
> @@ -900,6 +901,10 @@ int Manage_add(int fd, int tfd, struct mddev_dev
> *dv,
>                 disc.raid_disk = 0;
>         }
>  
> +       if (map_lock(&map)) {
> +               pr_err("failed to get exclusive lock on mapfile when
> add disk\n");
> +               return -1;
> +       }
>         if (array->not_persistent==0) {
>                 int dfd;
>                 if (dv->disposition == 'j')
> @@ -911,9 +916,9 @@ int Manage_add(int fd, int tfd, struct mddev_dev
> *dv,
>                 dfd = dev_open(dv->devname, O_RDWR |
> O_EXCL|O_DIRECT);
>                 if (tst->ss->add_to_super(tst, &disc, dfd,
>                                           dv->devname,
> INVALID_SECTORS))
> -                       return -1;
> +                       goto unlock;
>                 if (tst->ss->write_init_super(tst))
> -                       return -1;
> +                       goto unlock;
>         } else if (dv->disposition == 'A') {
>                 /*  this had better be raid1.
>                  * As we are "--re-add"ing we must find a spare slot
> @@ -971,14 +976,14 @@ int Manage_add(int fd, int tfd, struct
> mddev_dev *dv,
>                         pr_err("add failed for %s: could not get
> exclusive access to container\n",
>                                dv->devname);
>                         tst->ss->free_super(tst);
> -                       return -1;
> +                       goto unlock;
>                 }
>  
>                 /* Check if metadata handler is able to accept the
> drive */
>                 if (!tst->ss->validate_geometry(tst, LEVEL_CONTAINER,
> 0, 1, NULL,
>                     0, 0, dv->devname, NULL, 0, 1)) {
>                         close(container_fd);
> -                       return -1;
> +                       goto unlock;
>                 }
>  
>                 Kill(dv->devname, NULL, 0, -1, 0);
> @@ -987,7 +992,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev
> *dv,
>                                           dv->devname,
> INVALID_SECTORS)) {
>                         close(dfd);
>                         close(container_fd);
> -                       return -1;
> +                       goto unlock;
>                 }
>                 if (!mdmon_running(tst->container_devnm))
>                         tst->ss->sync_metadata(tst);
> @@ -998,7 +1003,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev
> *dv,
>                                dv->devname);
>                         close(container_fd);
>                         tst->ss->free_super(tst);
> -                       return -1;
> +                       goto unlock;
>                 }
>                 sra->array.level = LEVEL_CONTAINER;
>                 /* Need to set data_offset and component_size */
> @@ -1013,7 +1018,7 @@ int Manage_add(int fd, int tfd, struct
> mddev_dev *dv,
>                         pr_err("add new device to external metadata
> failed for %s\n", dv->devname);
>                         close(container_fd);
>                         sysfs_free(sra);
> -                       return -1;
> +                       goto unlock;
>                 }
>                 ping_monitor(devnm);
>                 sysfs_free(sra);
> @@ -1027,7 +1032,7 @@ int Manage_add(int fd, int tfd, struct
> mddev_dev *dv,
>                         else
>                                 pr_err("add new device failed for %s
> as %d: %s\n",
>                                        dv->devname, j,
> strerror(errno));
> -                       return -1;
> +                       goto unlock;
>                 }
>                 if (dv->disposition == 'j') {
>                         pr_err("Journal added successfully, making %s
> read-write\n", devname);
> @@ -1038,7 +1043,11 @@ int Manage_add(int fd, int tfd, struct
> mddev_dev *dv,
>         }
>         if (verbose >= 0)
>                 pr_err("added %s\n", dv->devname);
> +       map_unlock(&map);
>         return 1;
> +unlock:
> +       map_unlock(&map);
> +       return -1;
>  }
>  
>  int Manage_remove(struct supertype *tst, int fd, struct mddev_dev
> *dv,





[Index of Archives]     [Linux RAID Wiki]     [ATA RAID]     [Linux SCSI Target Infrastructure]     [Linux Block]     [Linux IDE]     [Linux SCSI]     [Linux Hams]     [Device Mapper]     [Device Mapper Cryptographics]     [Kernel]     [Linux Admin]     [Linux Net]     [GFS]     [RPM]     [git]     [Yosemite Forum]


  Powered by Linux