Re: [PATCH v5 1/2] NFSD: delay unmount source's export after inter-server copy completed.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, May 17, 2021 at 6:43 PM Dai Ngo <dai.ngo@xxxxxxxxxx> wrote:
>
> Currently the source's export is mounted and unmounted on every
> inter-server copy operation. This patch is an enhancement to delay
> the unmount of the source export for a certain period of time to
> eliminate the mount and unmount overhead on subsequent copy operations.
>
> After a copy operation completes, a delayed task is scheduled to
> unmount the export after a configurable idle time. Each time the
> export is being used again, its expire time is extended to allow
> the export to remain mounted.
>
> The unmount task and the mount operation of the copy request are
> synced to make sure the export is not unmounted while it's being
> used.

Can you tell me what this should apply on top of? It doesn't apply to
5.13-rc2. I know Chuck posted a lot of nfsd patches which I don't
have, is your patch on top of that?

>
> Signed-off-by: Dai Ngo <dai.ngo@xxxxxxxxxx>
> ---
>  fs/nfsd/netns.h         |   5 ++
>  fs/nfsd/nfs4proc.c      | 216 +++++++++++++++++++++++++++++++++++++++++++++++-
>  fs/nfsd/nfs4state.c     |   8 ++
>  fs/nfsd/nfsd.h          |   6 ++
>  fs/nfsd/nfssvc.c        |   3 +
>  include/linux/nfs_ssc.h |  16 ++++
>  6 files changed, 250 insertions(+), 4 deletions(-)
>
> diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
> index c330f5bd0cf3..6018e5050cb4 100644
> --- a/fs/nfsd/netns.h
> +++ b/fs/nfsd/netns.h
> @@ -21,6 +21,7 @@
>
>  struct cld_net;
>  struct nfsd4_client_tracking_ops;
> +struct nfsd4_ssc_umount;
>
>  enum {
>         /* cache misses due only to checksum comparison failures */
> @@ -176,6 +177,10 @@ struct nfsd_net {
>         unsigned int             longest_chain_cachesize;
>
>         struct shrinker         nfsd_reply_cache_shrinker;
> +
> +       spinlock_t              nfsd_ssc_lock;
> +       struct nfsd4_ssc_umount *nfsd_ssc_umount;
> +
>         /* utsname taken from the process that starts the server */
>         char                    nfsd_name[UNX_MAXNODENAME+1];
>  };
> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> index dd9f38d072dd..892ad72d87ae 100644
> --- a/fs/nfsd/nfs4proc.c
> +++ b/fs/nfsd/nfs4proc.c
> @@ -55,6 +55,99 @@ module_param(inter_copy_offload_enable, bool, 0644);
>  MODULE_PARM_DESC(inter_copy_offload_enable,
>                  "Enable inter server to server copy offload. Default: false");
>
> +#ifdef CONFIG_NFSD_V4_2_INTER_SSC
> +static int nfsd4_ssc_umount_timeout = 900000;          /* default to 15 mins */
> +module_param(nfsd4_ssc_umount_timeout, int, 0644);
> +MODULE_PARM_DESC(nfsd4_ssc_umount_timeout,
> +               "idle msecs before unmount export from source server");
> +
> +void nfsd4_ssc_expire_umount(struct nfsd_net *nn)
> +{
> +       bool do_wakeup = false;
> +       struct nfsd4_ssc_umount_item *ni = 0;
> +       struct nfsd4_ssc_umount_item *tmp;
> +       struct nfsd4_ssc_umount *nu;
> +
> +       spin_lock(&nn->nfsd_ssc_lock);
> +       if (!nn->nfsd_ssc_umount) {
> +               spin_unlock(&nn->nfsd_ssc_lock);
> +               return;
> +       }
> +       nu = nn->nfsd_ssc_umount;
> +       list_for_each_entry_safe(ni, tmp, &nu->nsu_list, nsui_list) {
> +               if (time_after(jiffies, ni->nsui_expire)) {
> +                       if (refcount_read(&ni->nsui_refcnt) > 0)
> +                               continue;
> +
> +                       /* mark being unmount */
> +                       ni->nsui_busy = true;
> +                       spin_unlock(&nn->nfsd_ssc_lock);
> +                       mntput(ni->nsui_vfsmount);
> +                       spin_lock(&nn->nfsd_ssc_lock);
> +
> +                       /* waiters need to start from begin of list */
> +                       list_del(&ni->nsui_list);
> +                       kfree(ni);
> +
> +                       /* wakeup ssc_connect waiters */
> +                       do_wakeup = true;
> +                       continue;
> +               }
> +               break;
> +       }
> +       if (!list_empty(&nu->nsu_list)) {
> +               ni = list_first_entry(&nu->nsu_list,
> +                       struct nfsd4_ssc_umount_item, nsui_list);
> +               nu->nsu_expire = ni->nsui_expire;
> +       } else
> +               nu->nsu_expire = 0;
> +
> +       if (do_wakeup)
> +               wake_up_all(&nu->nsu_waitq);
> +       spin_unlock(&nn->nfsd_ssc_lock);
> +}
> +
> +/*
> + * This is called when nfsd is being shutdown, after all inter_ssc
> + * cleanup were done, to destroy the ssc delayed unmount list.
> + */
> +void nfsd4_ssc_shutdown_umount(struct nfsd_net *nn)
> +{
> +       struct nfsd4_ssc_umount_item *ni = 0;
> +       struct nfsd4_ssc_umount_item *tmp;
> +       struct nfsd4_ssc_umount *nu;
> +
> +       spin_lock(&nn->nfsd_ssc_lock);
> +       if (!nn->nfsd_ssc_umount) {
> +               spin_unlock(&nn->nfsd_ssc_lock);
> +               return;
> +       }
> +       nu = nn->nfsd_ssc_umount;
> +       nn->nfsd_ssc_umount = 0;
> +       list_for_each_entry_safe(ni, tmp, &nu->nsu_list, nsui_list) {
> +               list_del(&ni->nsui_list);
> +               spin_unlock(&nn->nfsd_ssc_lock);
> +               mntput(ni->nsui_vfsmount);
> +               kfree(ni);
> +               spin_lock(&nn->nfsd_ssc_lock);
> +       }
> +       spin_unlock(&nn->nfsd_ssc_lock);
> +       kfree(nu);
> +}
> +
> +void nfsd4_ssc_init_umount_work(struct nfsd_net *nn)
> +{
> +       nn->nfsd_ssc_umount = kzalloc(sizeof(struct nfsd4_ssc_umount),
> +                                       GFP_KERNEL);
> +       if (!nn->nfsd_ssc_umount)
> +               return;
> +       spin_lock_init(&nn->nfsd_ssc_lock);
> +       INIT_LIST_HEAD(&nn->nfsd_ssc_umount->nsu_list);
> +       init_waitqueue_head(&nn->nfsd_ssc_umount->nsu_waitq);
> +}
> +EXPORT_SYMBOL_GPL(nfsd4_ssc_init_umount_work);
> +#endif
> +
>  #ifdef CONFIG_NFSD_V4_SECURITY_LABEL
>  #include <linux/security.h>
>
> @@ -1181,6 +1274,12 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp,
>         char *ipaddr, *dev_name, *raw_data;
>         int len, raw_len;
>         __be32 status = nfserr_inval;
> +       struct nfsd4_ssc_umount_item *ni = 0;
> +       struct nfsd4_ssc_umount_item *work = NULL;
> +       struct nfsd4_ssc_umount_item *tmp;
> +       struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
> +       struct nfsd4_ssc_umount *nu;
> +       DEFINE_WAIT(wait);
>
>         naddr = &nss->u.nl4_addr;
>         tmp_addrlen = rpc_uaddr2sockaddr(SVC_NET(rqstp), naddr->addr,
> @@ -1229,12 +1328,76 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp,
>                 goto out_free_rawdata;
>         snprintf(dev_name, len + 5, "%s%s%s:/", startsep, ipaddr, endsep);
>
> +       work = kzalloc(sizeof(*work), GFP_KERNEL);
> +try_again:
> +       spin_lock(&nn->nfsd_ssc_lock);
> +       if (!nn->nfsd_ssc_umount) {
> +               spin_unlock(&nn->nfsd_ssc_lock);
> +               kfree(work);
> +               work = NULL;
> +               goto skip_dul;
> +       }
> +       nu = nn->nfsd_ssc_umount;
> +       list_for_each_entry_safe(ni, tmp, &nu->nsu_list, nsui_list) {
> +               if (strncmp(ni->nsui_ipaddr, ipaddr, sizeof(ni->nsui_ipaddr)))
> +                       continue;
> +               /* found a match */
> +               if (ni->nsui_busy) {
> +                       /*  wait - and try again */
> +                       prepare_to_wait(&nu->nsu_waitq, &wait,
> +                               TASK_INTERRUPTIBLE);
> +                       spin_unlock(&nn->nfsd_ssc_lock);
> +
> +                       /* allow 20secs for mount/unmount for now - revisit */
> +                       if (signal_pending(current) ||
> +                                       (schedule_timeout(20*HZ) == 0)) {
> +                               status = nfserr_eagain;
> +                               kfree(work);
> +                               goto out_free_devname;
> +                       }
> +                       finish_wait(&nu->nsu_waitq, &wait);
> +                       goto try_again;
> +               }
> +               ss_mnt = ni->nsui_vfsmount;
> +               if (refcount_read(&ni->nsui_refcnt) == 0)
> +                       refcount_set(&ni->nsui_refcnt, 1);
> +               else
> +                       refcount_inc(&ni->nsui_refcnt);
> +               spin_unlock(&nn->nfsd_ssc_lock);
> +               kfree(work);
> +               goto out_done;
> +       }
> +       /* create new entry, set busy, insert list, clear busy after mount */
> +       if (work) {
> +               strncpy(work->nsui_ipaddr, ipaddr, sizeof(work->nsui_ipaddr));
> +               refcount_set(&work->nsui_refcnt, 1);
> +               work->nsui_busy = true;
> +               list_add_tail(&work->nsui_list, &nu->nsu_list);
> +       }
> +       spin_unlock(&nn->nfsd_ssc_lock);
> +skip_dul:
> +
>         /* Use an 'internal' mount: SB_KERNMOUNT -> MNT_INTERNAL */
>         ss_mnt = vfs_kern_mount(type, SB_KERNMOUNT, dev_name, raw_data);
>         module_put(type->owner);
> -       if (IS_ERR(ss_mnt))
> +       if (IS_ERR(ss_mnt)) {
> +               if (work) {
> +                       spin_lock(&nn->nfsd_ssc_lock);
> +                       list_del(&work->nsui_list);
> +                       wake_up_all(&nu->nsu_waitq);
> +                       spin_unlock(&nn->nfsd_ssc_lock);
> +                       kfree(work);
> +               }
>                 goto out_free_devname;
> -
> +       }
> +       if (work) {
> +               spin_lock(&nn->nfsd_ssc_lock);
> +               work->nsui_vfsmount = ss_mnt;
> +               work->nsui_busy = false;
> +               wake_up_all(&nu->nsu_waitq);
> +               spin_unlock(&nn->nfsd_ssc_lock);
> +       }
> +out_done:
>         status = 0;
>         *mount = ss_mnt;
>
> @@ -1301,10 +1464,55 @@ static void
>  nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src,
>                         struct nfsd_file *dst)
>  {
> +       bool found = false;
> +       bool resched = false;
> +       long timeout;
> +       struct nfsd4_ssc_umount_item *tmp;
> +       struct nfsd4_ssc_umount_item *ni = 0;
> +       struct nfsd4_ssc_umount *nu;
> +       struct nfsd_net *nn = net_generic(dst->nf_net, nfsd_net_id);
> +
>         nfs42_ssc_close(src->nf_file);
> -       fput(src->nf_file);
>         nfsd_file_put(dst);
> -       mntput(ss_mnt);
> +       fput(src->nf_file);
> +
> +       if (!nn) {
> +               mntput(ss_mnt);
> +               return;
> +       }
> +       spin_lock(&nn->nfsd_ssc_lock);
> +       if (!nn->nfsd_ssc_umount) {
> +               /* delayed unmount list not setup */
> +               spin_unlock(&nn->nfsd_ssc_lock);
> +               mntput(ss_mnt);
> +               return;
> +       }
> +       nu = nn->nfsd_ssc_umount;
> +       timeout = msecs_to_jiffies(nfsd4_ssc_umount_timeout);
> +       list_for_each_entry_safe(ni, tmp, &nu->nsu_list, nsui_list) {
> +               if (ni->nsui_vfsmount->mnt_sb == ss_mnt->mnt_sb) {
> +                       list_del(&ni->nsui_list);
> +                       /*
> +                        * vfsmount can be shared by multiple exports,
> +                        * decrement refcnt and schedule delayed task
> +                        * if it drops to 0.
> +                        */
> +                       if (refcount_dec_and_test(&ni->nsui_refcnt))
> +                               resched = true;
> +                       ni->nsui_expire = jiffies + timeout;
> +                       list_add_tail(&ni->nsui_list, &nu->nsu_list);
> +                       found = true;
> +                       break;
> +               }
> +       }
> +       if (!found) {
> +               spin_unlock(&nn->nfsd_ssc_lock);
> +               mntput(ss_mnt);
> +               return;
> +       }
> +       if (resched && !nu->nsu_expire)
> +               nu->nsu_expire = ni->nsui_expire;
> +       spin_unlock(&nn->nfsd_ssc_lock);
>  }
>
>  #else /* CONFIG_NFSD_V4_2_INTER_SSC */
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 97447a64bad0..0cdc898f06c9 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -5459,6 +5459,11 @@ nfs4_laundromat(struct nfsd_net *nn)
>                 list_del_init(&nbl->nbl_lru);
>                 free_blocked_lock(nbl);
>         }
> +
> +#ifdef CONFIG_NFSD_V4_2_INTER_SSC
> +       /* service the inter-copy delayed unmount list */
> +       nfsd4_ssc_expire_umount(nn);
> +#endif
>  out:
>         new_timeo = max_t(time64_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT);
>         return new_timeo;
> @@ -7398,6 +7403,9 @@ nfs4_state_shutdown_net(struct net *net)
>
>         nfsd4_client_tracking_exit(net);
>         nfs4_state_destroy_net(net);
> +#ifdef CONFIG_NFSD_V4_2_INTER_SSC
> +       nfsd4_ssc_shutdown_umount(nn);
> +#endif
>         mntput(nn->nfsd_mnt);
>  }
>
> diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
> index 8bdc37aa2c2e..cf86d9010974 100644
> --- a/fs/nfsd/nfsd.h
> +++ b/fs/nfsd/nfsd.h
> @@ -483,6 +483,12 @@ static inline bool nfsd_attrs_supported(u32 minorversion, const u32 *bmval)
>  extern int nfsd4_is_junction(struct dentry *dentry);
>  extern int register_cld_notifier(void);
>  extern void unregister_cld_notifier(void);
> +#ifdef CONFIG_NFSD_V4_2_INTER_SSC
> +extern void nfsd4_ssc_init_umount_work(struct nfsd_net *nn);
> +extern void nfsd4_ssc_expire_umount(struct nfsd_net *nn);
> +extern void nfsd4_ssc_shutdown_umount(struct nfsd_net *nn);
> +#endif
> +
>  #else /* CONFIG_NFSD_V4 */
>  static inline int nfsd4_is_junction(struct dentry *dentry)
>  {
> diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
> index 6de406322106..ce89a8fe07ff 100644
> --- a/fs/nfsd/nfssvc.c
> +++ b/fs/nfsd/nfssvc.c
> @@ -403,6 +403,9 @@ static int nfsd_startup_net(int nrservs, struct net *net, const struct cred *cre
>         if (ret)
>                 goto out_filecache;
>
> +#ifdef CONFIG_NFSD_V4_2_INTER_SSC
> +       nfsd4_ssc_init_umount_work(nn);
> +#endif
>         nn->nfsd_net_up = true;
>         return 0;
>
> diff --git a/include/linux/nfs_ssc.h b/include/linux/nfs_ssc.h
> index f5ba0fbff72f..18afe62988b5 100644
> --- a/include/linux/nfs_ssc.h
> +++ b/include/linux/nfs_ssc.h
> @@ -8,6 +8,7 @@
>   */
>
>  #include <linux/nfs_fs.h>
> +#include <linux/sunrpc/svc.h>
>
>  extern struct nfs_ssc_client_ops_tbl nfs_ssc_client_tbl;
>
> @@ -52,6 +53,21 @@ static inline void nfs42_ssc_close(struct file *filep)
>         if (nfs_ssc_client_tbl.ssc_nfs4_ops)
>                 (*nfs_ssc_client_tbl.ssc_nfs4_ops->sco_close)(filep);
>  }
> +
> +struct nfsd4_ssc_umount_item {
> +       struct list_head nsui_list;
> +       bool nsui_busy;
> +       refcount_t nsui_refcnt;
> +       unsigned long nsui_expire;
> +       struct vfsmount *nsui_vfsmount;
> +       char nsui_ipaddr[RPC_MAX_ADDRBUFLEN];
> +};
> +
> +struct nfsd4_ssc_umount {
> +       struct list_head nsu_list;
> +       unsigned long nsu_expire;
> +       wait_queue_head_t nsu_waitq;
> +};
>  #endif
>
>  /*
> --
> 2.9.5
>



[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux