[patch/rfc] allow exported (and *not* exported) filesystems to be unmounted.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Bruce,
 this is a little issue that seems to keep coming up so I thought it might be
 time to fix it.

 As you know, a filesystem that is exported cannot be unmounted as the export
 cache holds a reference to it.  Though if it hasn't been accessed for a
 while then it can.

 As I hadn't realised before sometimes *non* exported filesystems can be
 pinned to.  A negative entry in the cache can pin a filesystem just as
 easily as a positive entry.
 An amusing, if somewhat contrived, example is that if you export '/' with
 crossmnt and:

    mount localhost:/ /mnt
    ls -l /
    umount /mnt

 the umount might fail.  This is because the "ls -l" tried to export every
 filesystem found mounted in '/'.  The export of "/mnt" failed of course
 because you cannot re-export an NFS filesystem.  But it is still in the
 cache.
 An 'exportfs -f' fixes this, but shouldn't be necessary.

 So this RFC patch makes it possible  to register a notifier which gets
 called on unmount, and links the export table in to the notifier chain.

 The "atomic" flavour is used so that notifiers can be registered under a
 spin_lock.  This is needed for "expkey_update" as ->update is called under a
 lock.

 As notifier callees cannot unregister themselves, the unregister needs to
 happen in a workqueue item, and the unmount will wait for that.

 It seems to work for me (once I figured out all the locking issues and found
 a way to make it work without deadlocking).

 If you are OK with in in general I'll make it into a proper patch series and
 include Al Viro for the VFS bits.

Thanks,
NeilBrown

diff --git a/fs/mount.h b/fs/mount.h
index cd50079..544ea17 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -44,6 +44,9 @@ struct mount {
 	struct hlist_head mnt_fsnotify_marks;
 	__u32 mnt_fsnotify_mask;
 #endif
+	/* Notifier chain to call when trying to unmount */
+	struct atomic_notifier_head mnt_holders;
+
 	int mnt_id;			/* mount identifier */
 	int mnt_group_id;		/* peer group identifier */
 	int mnt_expiry_mark;		/* true if marked for expiry */
diff --git a/fs/namespace.c b/fs/namespace.c
index 341d3f5..123fcba 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -160,6 +160,37 @@ unsigned int mnt_get_count(struct mount *mnt)
 #endif
 }
 
+/* Each mount has a notifier call chain which is called on unmount
+ * so that in-kernel users can let go.  This is particularly used
+ * by nfsd.
+ * As a notify callee cannot unregister the notify block directly
+ * due to recursive locking, and as it must be unregistered before the
+ * unmount can be allow to complete (as unregistering afterwards is
+ * impossible), notify callees should arrange for the
+ * umount_notify_unregister() to happen via a scheduled worker.
+ * umount_notifier_call will wait for scheduled workers to finish.
+ * All callees should return NOTIFY_OK so that umount_notifier_call
+ * knows that at least one was called, and so to run flush_scheduled_work().
+ */
+static void umount_notifier_call(struct mount *mnt)
+{
+	if (atomic_notifier_call_chain(&mnt->mnt_holders, 0, NULL))
+		flush_scheduled_work();
+}
+int umount_notifier_register(struct vfsmount *v, struct notifier_block *nb)
+{
+	struct mount *mnt = real_mount(v);
+	return atomic_notifier_chain_register(&mnt->mnt_holders, nb);
+}
+EXPORT_SYMBOL_GPL(umount_notifier_register);
+
+int umount_notifier_unregister(struct vfsmount *v, struct notifier_block *nb)
+{
+	struct mount *mnt = real_mount(v);
+	return atomic_notifier_chain_unregister(&mnt->mnt_holders, nb);
+}
+EXPORT_SYMBOL_GPL(umount_notifier_unregister);
+
 static struct mount *alloc_vfsmnt(const char *name)
 {
 	struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
@@ -198,6 +229,8 @@ static struct mount *alloc_vfsmnt(const char *name)
 #ifdef CONFIG_FSNOTIFY
 		INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks);
 #endif
+		ATOMIC_INIT_NOTIFIER_HEAD(&mnt->mnt_holders);
+
 	}
 	return mnt;
 
@@ -1201,6 +1234,11 @@ static int do_umount(struct mount *mnt, int flags)
 		sb->s_op->umount_begin(sb);
 	}
 
+	/* Some in-kernel users (nfsd) might need to be asked to release
+	 * the filesystem
+	 */
+	umount_notifier_call(mnt);
+
 	/*
 	 * No sense to grab the lock for this test, but test itself looks
 	 * somewhat bogus. Suggestions for better replacement?
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 5f38ea3..e4dbd5b 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -46,8 +46,11 @@ static void expkey_put(struct kref *ref)
 	struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref);
 
 	if (test_bit(CACHE_VALID, &key->h.flags) &&
-	    !test_bit(CACHE_NEGATIVE, &key->h.flags))
+	    !test_bit(CACHE_NEGATIVE, &key->h.flags)) {
+		umount_notifier_unregister(key->ek_path.mnt,
+					   &key->ek_umount);
 		path_put(&key->ek_path);
+	}
 	auth_domain_put(key->ek_client);
 	kfree(key);
 }
@@ -71,6 +74,16 @@ static struct svc_expkey *svc_expkey_update(struct cache_detail *cd, struct svc_
 					    struct svc_expkey *old);
 static struct svc_expkey *svc_expkey_lookup(struct cache_detail *cd, struct svc_expkey *);
 
+static int purge_expkey(struct notifier_block *nb,
+			unsigned long mode, void *unused)
+{
+	struct svc_expkey *ek = container_of(nb, struct svc_expkey, ek_umount);
+	ek->h.expiry_time = 1;
+	ek->cd->nextcheck = 1;
+	queue_sunrpc_cache_flush();
+	return NOTIFY_OK;
+}
+
 static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
 {
 	/* client fsidtype fsid [path] */
@@ -123,8 +136,9 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
 	if (key.h.expiry_time == 0)
 		goto out;
 
-	key.ek_client = dom;	
+	key.ek_client = dom;
 	key.ek_fsidtype = fsidtype;
+	key.cd = cd;
 	memcpy(key.ek_fsid, buf, len);
 
 	ek = svc_expkey_lookup(cd, &key);
@@ -212,6 +226,7 @@ static inline void expkey_init(struct cache_head *cnew,
 	kref_get(&item->ek_client->ref);
 	new->ek_client = item->ek_client;
 	new->ek_fsidtype = item->ek_fsidtype;
+	new->cd = item->cd;
 
 	memcpy(new->ek_fsid, item->ek_fsid, sizeof(new->ek_fsid));
 }
@@ -223,7 +238,10 @@ static inline void expkey_update(struct cache_head *cnew,
 	struct svc_expkey *item = container_of(citem, struct svc_expkey, h);
 
 	new->ek_path = item->ek_path;
-	path_get(&item->ek_path);
+	path_get(&new->ek_path);
+	new->ek_umount.notifier_call = purge_expkey;
+	umount_notifier_register(new->ek_path.mnt,
+				 &new->ek_umount);
 }
 
 static struct cache_head *expkey_alloc(void)
@@ -307,6 +325,7 @@ static void nfsd4_fslocs_free(struct nfsd4_fs_locations *fsloc)
 static void svc_export_put(struct kref *ref)
 {
 	struct svc_export *exp = container_of(ref, struct svc_export, h.ref);
+	umount_notifier_unregister(exp->ex_path.mnt, &exp->ex_umount);
 	path_put(&exp->ex_path);
 	auth_domain_put(exp->ex_client);
 	nfsd4_fslocs_free(&exp->ex_fslocs);
@@ -653,6 +672,16 @@ static int svc_export_match(struct cache_head *a, struct cache_head *b)
 		orig->ex_path.mnt == new->ex_path.mnt;
 }
 
+static int purge_export(struct notifier_block *nb,
+			unsigned long mode, void *unused)
+{
+	struct svc_export *exp = container_of(nb, struct svc_export, ex_umount);
+	exp->h.expiry_time = 1;
+	exp->cd->nextcheck = 1;
+	queue_sunrpc_cache_flush();
+	return NOTIFY_OK;
+}
+
 static void svc_export_init(struct cache_head *cnew, struct cache_head *citem)
 {
 	struct svc_export *new = container_of(cnew, struct svc_export, h);
@@ -662,6 +691,8 @@ static void svc_export_init(struct cache_head *cnew, struct cache_head *citem)
 	new->ex_client = item->ex_client;
 	new->ex_path.dentry = dget(item->ex_path.dentry);
 	new->ex_path.mnt = mntget(item->ex_path.mnt);
+	new->ex_umount.notifier_call = purge_export;
+	umount_notifier_register(new->ex_path.mnt, &new->ex_umount);
 	new->ex_fslocs.locations = NULL;
 	new->ex_fslocs.locations_count = 0;
 	new->ex_fslocs.migrated = 0;
@@ -766,6 +797,7 @@ exp_find_key(struct cache_detail *cd, svc_client *clp, int fsid_type,
 
 	key.ek_client = clp;
 	key.ek_fsidtype = fsid_type;
+	key.cd = cd;
 	memcpy(key.ek_fsid, fsidv, key_len(fsid_type));
 
 	ek = svc_expkey_lookup(cd, &key);
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 73005f9..1e18926 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -76,6 +76,10 @@ extern struct vfsmount *vfs_kern_mount(struct file_system_type *type,
 extern void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list);
 extern void mark_mounts_for_expiry(struct list_head *mounts);
 
+struct notifier_block;
+extern int umount_notifier_register(struct vfsmount *v, struct notifier_block *nb);
+extern int umount_notifier_unregister(struct vfsmount *v, struct notifier_block *nb);
+
 extern dev_t name_to_dev_t(char *name);
 
 #endif /* _LINUX_MOUNT_H */
diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index 7898c99..696cf62 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -49,6 +49,7 @@ struct svc_export {
 	struct auth_domain *	ex_client;
 	int			ex_flags;
 	struct path		ex_path;
+	struct notifier_block	ex_umount;
 	kuid_t			ex_anon_uid;
 	kgid_t			ex_anon_gid;
 	int			ex_fsid;
@@ -71,6 +72,8 @@ struct svc_expkey {
 	u32			ek_fsid[6];
 
 	struct path		ek_path;
+	struct notifier_block	ek_umount;
+	struct cache_detail	*cd;
 };
 
 #define EX_ISSYNC(exp)		(!((exp)->ex_flags & NFSEXP_ASYNC))
diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 303399b..ed23c31 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -195,6 +195,7 @@ static inline int cache_valid(struct cache_head *h)
 extern int cache_check(struct cache_detail *detail,
 		       struct cache_head *h, struct cache_req *rqstp);
 extern void cache_flush(void);
+extern void queue_sunrpc_cache_flush(void);
 extern void cache_purge(struct cache_detail *detail);
 #define NEVER (0x7FFFFFFF)
 extern void __init cache_initialize(void);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 25d58e76..bf7d351 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -471,10 +471,15 @@ static int cache_clean(void)
 /*
  * We want to regularly clean the cache, so we need to schedule some work ...
  */
+static int cache_flush_required = 0;
 static void do_cache_clean(struct work_struct *work)
 {
 	int delay = 5;
-	if (cache_clean() == -1)
+
+	if (cache_flush_required) {
+		cache_flush_required = 0;
+		cache_flush();
+	} else if (cache_clean() == -1)
 		delay = round_jiffies_relative(30*HZ);
 
 	if (list_empty(&cache_list))
@@ -508,6 +513,13 @@ void cache_purge(struct cache_detail *detail)
 }
 EXPORT_SYMBOL_GPL(cache_purge);
 
+void queue_sunrpc_cache_flush(void)
+{
+	cache_flush_required = 1;
+	cancel_delayed_work(&cache_cleaner);
+	schedule_delayed_work(&cache_cleaner, 0);
+}
+EXPORT_SYMBOL_GPL(queue_sunrpc_cache_flush);
 
 /*
  * Deferral and Revisiting of Requests.

Attachment: signature.asc
Description: PGP signature


[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux