Signed-off-by: Dai Ngo <dai.ngo@xxxxxxxxxx>
---
fs/nfsd/netns.h | 5 ++
fs/nfsd/nfs4proc.c | 216 +++++++++++++++++++++++++++++++++++++++++++++++-
fs/nfsd/nfs4state.c | 8 ++
fs/nfsd/nfsd.h | 6 ++
fs/nfsd/nfssvc.c | 3 +
include/linux/nfs_ssc.h | 16 ++++
6 files changed, 250 insertions(+), 4 deletions(-)
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index c330f5bd0cf3..6018e5050cb4 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -21,6 +21,7 @@
struct cld_net;
struct nfsd4_client_tracking_ops;
+struct nfsd4_ssc_umount;
enum {
/* cache misses due only to checksum comparison failures */
@@ -176,6 +177,10 @@ struct nfsd_net {
unsigned int longest_chain_cachesize;
struct shrinker nfsd_reply_cache_shrinker;
+
+ spinlock_t nfsd_ssc_lock;
+ struct nfsd4_ssc_umount *nfsd_ssc_umount;
+
/* utsname taken from the process that starts the server */
char nfsd_name[UNX_MAXNODENAME+1];
};
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index dd9f38d072dd..892ad72d87ae 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -55,6 +55,99 @@ module_param(inter_copy_offload_enable, bool, 0644);
MODULE_PARM_DESC(inter_copy_offload_enable,
"Enable inter server to server copy offload. Default: false");
+#ifdef CONFIG_NFSD_V4_2_INTER_SSC
+static int nfsd4_ssc_umount_timeout = 900000; /* default to 15 mins */
+module_param(nfsd4_ssc_umount_timeout, int, 0644);
+MODULE_PARM_DESC(nfsd4_ssc_umount_timeout,
+ "idle msecs before unmount export from source server");
+
+void nfsd4_ssc_expire_umount(struct nfsd_net *nn)
+{
+ bool do_wakeup = false;
+ struct nfsd4_ssc_umount_item *ni = 0;
+ struct nfsd4_ssc_umount_item *tmp;
+ struct nfsd4_ssc_umount *nu;
+
+ spin_lock(&nn->nfsd_ssc_lock);
+ if (!nn->nfsd_ssc_umount) {
+ spin_unlock(&nn->nfsd_ssc_lock);
+ return;
+ }
+ nu = nn->nfsd_ssc_umount;
+ list_for_each_entry_safe(ni, tmp, &nu->nsu_list, nsui_list) {
+ if (time_after(jiffies, ni->nsui_expire)) {
+ if (refcount_read(&ni->nsui_refcnt) > 0)
+ continue;
+
+ /* mark being unmount */
+ ni->nsui_busy = true;
+ spin_unlock(&nn->nfsd_ssc_lock);
+ mntput(ni->nsui_vfsmount);
+ spin_lock(&nn->nfsd_ssc_lock);
+
+ /* waiters need to start from begin of list */
+ list_del(&ni->nsui_list);
+ kfree(ni);
+
+ /* wakeup ssc_connect waiters */
+ do_wakeup = true;
+ continue;
+ }
+ break;
+ }
+ if (!list_empty(&nu->nsu_list)) {
+ ni = list_first_entry(&nu->nsu_list,
+ struct nfsd4_ssc_umount_item, nsui_list);
+ nu->nsu_expire = ni->nsui_expire;
+ } else
+ nu->nsu_expire = 0;
+
+ if (do_wakeup)
+ wake_up_all(&nu->nsu_waitq);
+ spin_unlock(&nn->nfsd_ssc_lock);
+}
+
+/*
+ * This is called when nfsd is being shutdown, after all inter_ssc
+ * cleanup were done, to destroy the ssc delayed unmount list.
+ */
+void nfsd4_ssc_shutdown_umount(struct nfsd_net *nn)
+{
+ struct nfsd4_ssc_umount_item *ni = 0;
+ struct nfsd4_ssc_umount_item *tmp;
+ struct nfsd4_ssc_umount *nu;
+
+ spin_lock(&nn->nfsd_ssc_lock);
+ if (!nn->nfsd_ssc_umount) {
+ spin_unlock(&nn->nfsd_ssc_lock);
+ return;
+ }
+ nu = nn->nfsd_ssc_umount;
+ nn->nfsd_ssc_umount = 0;
+ list_for_each_entry_safe(ni, tmp, &nu->nsu_list, nsui_list) {
+ list_del(&ni->nsui_list);
+ spin_unlock(&nn->nfsd_ssc_lock);
+ mntput(ni->nsui_vfsmount);
+ kfree(ni);
+ spin_lock(&nn->nfsd_ssc_lock);
+ }
+ spin_unlock(&nn->nfsd_ssc_lock);
+ kfree(nu);
+}
+
+void nfsd4_ssc_init_umount_work(struct nfsd_net *nn)
+{
+ nn->nfsd_ssc_umount = kzalloc(sizeof(struct nfsd4_ssc_umount),
+ GFP_KERNEL);
+ if (!nn->nfsd_ssc_umount)
+ return;
+ spin_lock_init(&nn->nfsd_ssc_lock);
+ INIT_LIST_HEAD(&nn->nfsd_ssc_umount->nsu_list);
+ init_waitqueue_head(&nn->nfsd_ssc_umount->nsu_waitq);
+}
+EXPORT_SYMBOL_GPL(nfsd4_ssc_init_umount_work);
+#endif
+
#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
#include <linux/security.h>
@@ -1181,6 +1274,12 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp,
char *ipaddr, *dev_name, *raw_data;
int len, raw_len;
__be32 status = nfserr_inval;
+ struct nfsd4_ssc_umount_item *ni = 0;
+ struct nfsd4_ssc_umount_item *work = NULL;
+ struct nfsd4_ssc_umount_item *tmp;
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+ struct nfsd4_ssc_umount *nu;
+ DEFINE_WAIT(wait);
naddr = &nss->u.nl4_addr;
tmp_addrlen = rpc_uaddr2sockaddr(SVC_NET(rqstp), naddr->addr,
@@ -1229,12 +1328,76 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp,
goto out_free_rawdata;
snprintf(dev_name, len + 5, "%s%s%s:/", startsep, ipaddr, endsep);
+ work = kzalloc(sizeof(*work), GFP_KERNEL);
+try_again:
+ spin_lock(&nn->nfsd_ssc_lock);
+ if (!nn->nfsd_ssc_umount) {
+ spin_unlock(&nn->nfsd_ssc_lock);
+ kfree(work);
+ work = NULL;
+ goto skip_dul;
+ }
+ nu = nn->nfsd_ssc_umount;
+ list_for_each_entry_safe(ni, tmp, &nu->nsu_list, nsui_list) {
+ if (strncmp(ni->nsui_ipaddr, ipaddr, sizeof(ni->nsui_ipaddr)))
+ continue;
+ /* found a match */
+ if (ni->nsui_busy) {
+ /* wait - and try again */
+ prepare_to_wait(&nu->nsu_waitq, &wait,
+ TASK_INTERRUPTIBLE);
+ spin_unlock(&nn->nfsd_ssc_lock);
+
+ /* allow 20secs for mount/unmount for now - revisit */
+ if (signal_pending(current) ||
+ (schedule_timeout(20*HZ) == 0)) {
+ status = nfserr_eagain;
+ kfree(work);
+ goto out_free_devname;
+ }
+ finish_wait(&nu->nsu_waitq, &wait);
+ goto try_again;
+ }
+ ss_mnt = ni->nsui_vfsmount;
+ if (refcount_read(&ni->nsui_refcnt) == 0)
+ refcount_set(&ni->nsui_refcnt, 1);
+ else
+ refcount_inc(&ni->nsui_refcnt);
+ spin_unlock(&nn->nfsd_ssc_lock);
+ kfree(work);
+ goto out_done;
+ }
+ /* create new entry, set busy, insert list, clear busy after mount */
+ if (work) {
+ strncpy(work->nsui_ipaddr, ipaddr, sizeof(work->nsui_ipaddr));
+ refcount_set(&work->nsui_refcnt, 1);
+ work->nsui_busy = true;
+ list_add_tail(&work->nsui_list, &nu->nsu_list);
+ }
+ spin_unlock(&nn->nfsd_ssc_lock);
+skip_dul:
+
/* Use an 'internal' mount: SB_KERNMOUNT -> MNT_INTERNAL */
ss_mnt = vfs_kern_mount(type, SB_KERNMOUNT, dev_name, raw_data);
module_put(type->owner);
- if (IS_ERR(ss_mnt))
+ if (IS_ERR(ss_mnt)) {
+ if (work) {
+ spin_lock(&nn->nfsd_ssc_lock);
+ list_del(&work->nsui_list);
+ wake_up_all(&nu->nsu_waitq);
+ spin_unlock(&nn->nfsd_ssc_lock);
+ kfree(work);
+ }
goto out_free_devname;
-
+ }
+ if (work) {
+ spin_lock(&nn->nfsd_ssc_lock);
+ work->nsui_vfsmount = ss_mnt;
+ work->nsui_busy = false;
+ wake_up_all(&nu->nsu_waitq);
+ spin_unlock(&nn->nfsd_ssc_lock);
+ }
+out_done:
status = 0;
*mount = ss_mnt;
@@ -1301,10 +1464,55 @@ static void
nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src,
struct nfsd_file *dst)
{
+ bool found = false;
+ bool resched = false;
+ long timeout;
+ struct nfsd4_ssc_umount_item *tmp;
+ struct nfsd4_ssc_umount_item *ni = 0;
+ struct nfsd4_ssc_umount *nu;
+ struct nfsd_net *nn = net_generic(dst->nf_net, nfsd_net_id);
+
nfs42_ssc_close(src->nf_file);
- fput(src->nf_file);
nfsd_file_put(dst);
- mntput(ss_mnt);
+ fput(src->nf_file);
+
+ if (!nn) {
+ mntput(ss_mnt);
+ return;
+ }
+ spin_lock(&nn->nfsd_ssc_lock);
+ if (!nn->nfsd_ssc_umount) {
+ /* delayed unmount list not setup */
+ spin_unlock(&nn->nfsd_ssc_lock);
+ mntput(ss_mnt);
+ return;
+ }
+ nu = nn->nfsd_ssc_umount;
+ timeout = msecs_to_jiffies(nfsd4_ssc_umount_timeout);
+ list_for_each_entry_safe(ni, tmp, &nu->nsu_list, nsui_list) {
+ if (ni->nsui_vfsmount->mnt_sb == ss_mnt->mnt_sb) {
+ list_del(&ni->nsui_list);
+ /*
+ * vfsmount can be shared by multiple exports,
+ * decrement refcnt and schedule delayed task
+ * if it drops to 0.
+ */
+ if (refcount_dec_and_test(&ni->nsui_refcnt))
+ resched = true;
+ ni->nsui_expire = jiffies + timeout;
+ list_add_tail(&ni->nsui_list, &nu->nsu_list);
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ spin_unlock(&nn->nfsd_ssc_lock);
+ mntput(ss_mnt);
+ return;
+ }
+ if (resched && !nu->nsu_expire)
+ nu->nsu_expire = ni->nsui_expire;
+ spin_unlock(&nn->nfsd_ssc_lock);
}
#else /* CONFIG_NFSD_V4_2_INTER_SSC */
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 97447a64bad0..0cdc898f06c9 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -5459,6 +5459,11 @@ nfs4_laundromat(struct nfsd_net *nn)
list_del_init(&nbl->nbl_lru);
free_blocked_lock(nbl);
}
+
+#ifdef CONFIG_NFSD_V4_2_INTER_SSC
+ /* service the inter-copy delayed unmount list */
+ nfsd4_ssc_expire_umount(nn);
+#endif
out:
new_timeo = max_t(time64_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT);
return new_timeo;
@@ -7398,6 +7403,9 @@ nfs4_state_shutdown_net(struct net *net)
nfsd4_client_tracking_exit(net);
nfs4_state_destroy_net(net);
+#ifdef CONFIG_NFSD_V4_2_INTER_SSC
+ nfsd4_ssc_shutdown_umount(nn);
+#endif
mntput(nn->nfsd_mnt);
}
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 8bdc37aa2c2e..cf86d9010974 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -483,6 +483,12 @@ static inline bool nfsd_attrs_supported(u32 minorversion, const u32 *bmval)
extern int nfsd4_is_junction(struct dentry *dentry);
extern int register_cld_notifier(void);
extern void unregister_cld_notifier(void);
+#ifdef CONFIG_NFSD_V4_2_INTER_SSC
+extern void nfsd4_ssc_init_umount_work(struct nfsd_net *nn);
+extern void nfsd4_ssc_expire_umount(struct nfsd_net *nn);
+extern void nfsd4_ssc_shutdown_umount(struct nfsd_net *nn);
+#endif
+
#else /* CONFIG_NFSD_V4 */
static inline int nfsd4_is_junction(struct dentry *dentry)
{
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 6de406322106..ce89a8fe07ff 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -403,6 +403,9 @@ static int nfsd_startup_net(int nrservs, struct net *net, const struct cred *cre
if (ret)
goto out_filecache;
+#ifdef CONFIG_NFSD_V4_2_INTER_SSC
+ nfsd4_ssc_init_umount_work(nn);
+#endif
nn->nfsd_net_up = true;
return 0;
diff --git a/include/linux/nfs_ssc.h b/include/linux/nfs_ssc.h
index f5ba0fbff72f..18afe62988b5 100644
--- a/include/linux/nfs_ssc.h
+++ b/include/linux/nfs_ssc.h
@@ -8,6 +8,7 @@
*/
#include <linux/nfs_fs.h>
+#include <linux/sunrpc/svc.h>
extern struct nfs_ssc_client_ops_tbl nfs_ssc_client_tbl;
@@ -52,6 +53,21 @@ static inline void nfs42_ssc_close(struct file *filep)
if (nfs_ssc_client_tbl.ssc_nfs4_ops)
(*nfs_ssc_client_tbl.ssc_nfs4_ops->sco_close)(filep);
}
+
+struct nfsd4_ssc_umount_item {
+ struct list_head nsui_list;
+ bool nsui_busy;
+ refcount_t nsui_refcnt;
+ unsigned long nsui_expire;
+ struct vfsmount *nsui_vfsmount;
+ char nsui_ipaddr[RPC_MAX_ADDRBUFLEN];
+};
+
+struct nfsd4_ssc_umount {
+ struct list_head nsu_list;
+ unsigned long nsu_expire;
+ wait_queue_head_t nsu_waitq;
+};
#endif
/*
--
2.9.5