Re: [PATCH 03/34] teach move_mount(2) to work with OPEN_TREE_CLONE [ver #12]

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Okay, I put in a tracepoint (patch attached) and got a trace from the life of
the offending mount object.  I've cropped non-useful information out of the
lines, inserted a blank line every time the usage count goes down to 2 and
dropped most of the lines generated by fsnotify.

Most of the lines are irrelevant.  You can see system calls being issued and
commands being run that make no difference on balance.

What matters are the first four lines, the two mounts and the umount.  You can
see it go splat on the last line when the usage count has become poisoned.

            bash-3597  M=93785f8a u=1 ALC sp=clone_mnt+0x31/0x30a
            bash-3597  M=93785f8a u=2 GET sp=do_dentry_open+0x24/0x2d3
            bash-3597  M=93785f8a u=1 PUT sp=__se_sys_open_tree+0x195/0x1da

^--- These three lines look like the open_tree() syscall done by test-fsmount.

            bash-3597  M=93785f8a u=2 GET sp=set_fs_pwd+0x37/0xdb

^--- And this the fchdir() syscall from test-fsmount.  At this point u=2 would
     seem correct as the subtree isn't actually mounted anywhere (1 for pwd, 1
     for fd).

v--- test-fsmount then called execl() on bash, which did some stat'ing to find
     the executable...

            bash-3597  M=93785f8a u=3 GET sp=legitimize_path.isra.7+0x16/0x50
            bash-3597  M=93785f8a u=2 PUT sp=vfs_statx+0x95/0xcc

            bash-3597  M=93785f8a u=3 GET sp=legitimize_path.isra.7+0x16/0x50
            bash-3597  M=93785f8a u=2 PUT sp=vfs_statx+0x95/0xcc

v--- and then exec'd it.

            bash-3597  M=93785f8a u=3 GET sp=legitimize_path.isra.7+0x16/0x50
            bash-3597  M=93785f8a u=4 GET sp=do_dentry_open+0x24/0x2d3
            bash-3597  M=93785f8a u=3 PUT sp=terminate_walk+0x20/0xda
            bash-3597  M=93785f8a u=4 GET sp=legitimize_path.isra.7+0x16/0x50
            bash-3597  M=93785f8a u=3 PUT sp=vfs_statx+0x95/0xcc
            bash-3597  M=93785f8a u=2 PUT sp=__fput+0x180/0x1c1

v--- bash then did stuff:

            bash-3597  M=93785f8a u=3 GET sp=legitimize_path.isra.7+0x16/0x50
            bash-3597  M=93785f8a u=2 PUT sp=vfs_statx+0x95/0xcc

            bash-3597  M=93785f8a u=3 GET sp=copy_fs_struct+0xcc/0xde
     grepconf.sh-3598  M=93785f8a u=4 GET sp=legitimize_path.isra.7+0x16/0x50
     grepconf.sh-3598  M=93785f8a u=3 PUT sp=vfs_statx+0x95/0xcc
     grepconf.sh-3598  M=93785f8a u=4 GET sp=legitimize_path.isra.7+0x16/0x50
     grepconf.sh-3598  M=93785f8a u=3 PUT sp=vfs_statx+0x95/0xcc
     grepconf.sh-3598  M=93785f8a u=4 GET sp=legitimize_path.isra.7+0x16/0x50
     grepconf.sh-3598  M=93785f8a u=5 GET sp=do_dentry_open+0x24/0x2d3
     grepconf.sh-3598  M=93785f8a u=4 PUT sp=terminate_walk+0x20/0xda
     grepconf.sh-3598  M=93785f8a u=5 GET sp=legitimize_path.isra.7+0x16/0x50
     grepconf.sh-3598  M=93785f8a u=4 PUT sp=vfs_statx+0x95/0xcc
     grepconf.sh-3598  M=93785f8a u=3 PUT sp=__fput+0x180/0x1c1
     grepconf.sh-3598  M=93785f8a u=4 GET sp=legitimize_path.isra.7+0x16/0x50
     grepconf.sh-3598  M=93785f8a u=3 PUT sp=vfs_statx+0x95/0xcc
     grepconf.sh-3598  M=93785f8a u=4 GET sp=copy_fs_struct+0xcc/0xde
            grep-3599  M=93785f8a u=3 PUT sp=free_fs_struct+0x1e/0x2e
     grepconf.sh-3598  M=93785f8a u=2 PUT sp=free_fs_struct+0x1e/0x2e

            bash-3597  M=93785f8a u=3 GET sp=copy_fs_struct+0xcc/0xde
            bash-3600  M=93785f8a u=4 GET sp=copy_fs_struct+0xcc/0xde
             tty-3601  M=93785f8a u=3 PUT sp=free_fs_struct+0x1e/0x2e
            bash-3600  M=93785f8a u=4 GET sp=copy_fs_struct+0xcc/0xde
            tput-3602  M=93785f8a u=3 PUT sp=free_fs_struct+0x1e/0x2e
            bash-3600  M=93785f8a u=2 PUT sp=free_fs_struct+0x1e/0x2e

            bash-3597  M=93785f8a u=3 GET sp=copy_fs_struct+0xcc/0xde
            bash-3603  M=93785f8a u=4 GET sp=copy_fs_struct+0xcc/0xde
       dircolors-3604  M=93785f8a u=3 PUT sp=free_fs_struct+0x1e/0x2e
            bash-3603  M=93785f8a u=2 PUT sp=free_fs_struct+0x1e/0x2e

            bash-3597  M=93785f8a u=3 GET sp=copy_fs_struct+0xcc/0xde
            grep-3605  M=93785f8a u=2 PUT sp=free_fs_struct+0x1e/0x2e

            bash-3597  M=93785f8a u=3 GET sp=copy_fs_struct+0xcc/0xde
     grepconf.sh-3606  M=93785f8a u=4 GET sp=legitimize_path.isra.7+0x16/0x50
     grepconf.sh-3606  M=93785f8a u=3 PUT sp=vfs_statx+0x95/0xcc
     grepconf.sh-3606  M=93785f8a u=4 GET sp=legitimize_path.isra.7+0x16/0x50
     grepconf.sh-3606  M=93785f8a u=3 PUT sp=vfs_statx+0x95/0xcc
     grepconf.sh-3606  M=93785f8a u=4 GET sp=legitimize_path.isra.7+0x16/0x50
     grepconf.sh-3606  M=93785f8a u=5 GET sp=do_dentry_open+0x24/0x2d3
     grepconf.sh-3606  M=93785f8a u=4 PUT sp=terminate_walk+0x20/0xda
     grepconf.sh-3606  M=93785f8a u=5 GET sp=legitimize_path.isra.7+0x16/0x50
     grepconf.sh-3606  M=93785f8a u=4 PUT sp=vfs_statx+0x95/0xcc
     grepconf.sh-3606  M=93785f8a u=3 PUT sp=__fput+0x180/0x1c1
     grepconf.sh-3606  M=93785f8a u=4 GET sp=legitimize_path.isra.7+0x16/0x50
     grepconf.sh-3606  M=93785f8a u=3 PUT sp=vfs_statx+0x95/0xcc
     grepconf.sh-3606  M=93785f8a u=4 GET sp=copy_fs_struct+0xcc/0xde
            grep-3607  M=93785f8a u=3 PUT sp=free_fs_struct+0x1e/0x2e
     grepconf.sh-3606  M=93785f8a u=2 PUT sp=free_fs_struct+0x1e/0x2e

            bash-3597  M=93785f8a u=3 GET sp=copy_fs_struct+0xcc/0xde
     grepconf.sh-3608  M=93785f8a u=4 GET sp=legitimize_path.isra.7+0x16/0x50
     grepconf.sh-3608  M=93785f8a u=3 PUT sp=vfs_statx+0x95/0xcc
     grepconf.sh-3608  M=93785f8a u=4 GET sp=legitimize_path.isra.7+0x16/0x50
     grepconf.sh-3608  M=93785f8a u=3 PUT sp=vfs_statx+0x95/0xcc
     grepconf.sh-3608  M=93785f8a u=4 GET sp=legitimize_path.isra.7+0x16/0x50
     grepconf.sh-3608  M=93785f8a u=5 GET sp=do_dentry_open+0x24/0x2d3
     grepconf.sh-3608  M=93785f8a u=4 PUT sp=terminate_walk+0x20/0xda
     grepconf.sh-3608  M=93785f8a u=5 GET sp=legitimize_path.isra.7+0x16/0x50
     grepconf.sh-3608  M=93785f8a u=4 PUT sp=vfs_statx+0x95/0xcc
     grepconf.sh-3608  M=93785f8a u=3 PUT sp=__fput+0x180/0x1c1
     grepconf.sh-3608  M=93785f8a u=4 GET sp=legitimize_path.isra.7+0x16/0x50
     grepconf.sh-3608  M=93785f8a u=3 PUT sp=vfs_statx+0x95/0xcc
     grepconf.sh-3608  M=93785f8a u=4 GET sp=copy_fs_struct+0xcc/0xde
            grep-3609  M=93785f8a u=3 PUT sp=free_fs_struct+0x1e/0x2e
     grepconf.sh-3608  M=93785f8a u=2 PUT sp=free_fs_struct+0x1e/0x2e

            bash-3597  M=93785f8a u=3 GET sp=legitimize_path.isra.7+0x16/0x50
            bash-3597  M=93785f8a u=2 PUT sp=vfs_statx+0x95/0xcc

I ran "mount --move . /mnt":

            bash-3597  M=93785f8a u=3 GET sp=copy_fs_struct+0xcc/0xde
           mount-3610  M=93785f8a u=4 GET sp=legitimize_path.isra.7+0x16/0x50
           mount-3610  M=93785f8a u=3 PUT sp=vfs_statx+0x95/0xcc
           mount-3610  M=93785f8a u=4 GET sp=legitimize_path.isra.7+0x16/0x50
           mount-3610  M=93785f8a u=5 GET sp=do_dentry_open+0x24/0x2d3
           mount-3610  M=93785f8a u=4 PUT sp=terminate_walk+0x20/0xda
           mount-3610  M=93785f8a u=5 GET sp=legitimize_path.isra.7+0x16/0x50
           mount-3610  M=93785f8a u=4 PUT sp=vfs_statx+0x95/0xcc
           mount-3610  M=93785f8a u=3 PUT sp=__fput+0x180/0x1c1
           mount-3610  M=93785f8a u=4 GET sp=legitimize_path.isra.7+0x16/0x50
           mount-3610  M=93785f8a u=3 PUT sp=do_mount+0x715/0x929
           mount-3610  M=93785f8a u=2 PUT sp=free_fs_struct+0x1e/0x2e

which worked.  Herein lieth the problem.  The usage count should be 3 now (1
for pwd, 1 for fd, 1 for mount) - but how does VFS know that this mount object
isn't mounted yet?

I ran "mount --move /mnt /mnt":

            bash-3597  M=93785f8a u=3 GET sp=copy_fs_struct+0xcc/0xde
           mount-3611  M=93785f8a u=4 GET sp=legitimize_path.isra.7+0x16/0x50
           mount-3611  M=93785f8a u=3 PUT sp=vfs_statx+0x95/0xcc
           mount-3611  M=93785f8a u=4 GET sp=legitimize_path.isra.7+0x16/0x50
           mount-3611  M=93785f8a u=5 GET sp=legitimize_path.isra.7+0x16/0x50
           mount-3611  M=93785f8a u=4 PUT sp=do_mount+0x715/0x929
           mount-3611  M=93785f8a u=3 PUT sp=do_mount+0x1cf/0x929
           mount-3611  M=93785f8a u=2 PUT sp=free_fs_struct+0x1e/0x2e

which failed with ELOOP.

I ran "cd":

            bash-3597  M=93785f8a u=1 PUT sp=set_fs_pwd+0xb8/0xdb

I ran "umount /mnt":

          umount-3612  M=93785f8a u=2 GET sp=legitimize_path.isra.7+0x16/0x50
          umount-3612  M=93785f8a u=1 PUT sp=vfs_statx+0x95/0xcc
          umount-3612  M=93785f8a u=2 GET sp=legitimize_path.isra.7+0x16/0x50
          umount-3612  M=93785f8a u=1 PUT sp=vfs_statx+0x95/0xcc
          umount-3612  M=93785f8a u=2 GET sp=legitimize_path.isra.7+0x16/0x50
          umount-3612  M=93785f8a u=1 PUT sp=user_statfs+0x61/0x98
          umount-3612  M=93785f8a u=2 GET sp=legitimize_mnt+0x12/0x108
          umount-3612  M=93785f8a u=1 PUT sp=pin_kill+0x11c/0x325
          umount-3612  M=93785f8a u=0 PUT sp=ksys_umount+0x3e8/0x40e
          umount-3612  M=93785f8a u=0 FRE sp=cleanup_mnt+0x4d/0x5e

And finally, I exited the shell, which then tried to release the fd inherited
from open_tree():

            bash-3597  M=93785f8a u=1802201963 NFY sp=__fput+0xac/0x1c1

Note that the subtree attached to the fd has not at this point been directly
mounted by move_mount(), but implicitly mounted by fchdir() into it and then
using mount(MS_MOVE) of "." to "/mnt".

Note also that if I run the commands all as one go rather than pasting them
individually, a crash occurs in pin_kill() instead.

So, I'm not sure how to proceed from here.  There's no easy way to remove the
FMODE_NEED_UNMOUNT flag left by open_tree().

David
---
commit e7c8e6590aa0dd3bf2e10450b8992d496c44be8b
Author: David Howells <dhowells@xxxxxxxxxx>
Date:   Fri Oct 19 10:38:35 2018 +0100

    mnt_count trace

diff --git a/fs/mount.h b/fs/mount.h
index f39bc9da4d73..124a6dd73936 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -20,7 +20,7 @@ struct mnt_namespace {
 } __randomize_layout;
 
 struct mnt_pcp {
-	int mnt_count;
+	int mnt_countxxx;
 	int mnt_writers;
 };
 
@@ -46,6 +46,7 @@ struct mount {
 	int mnt_count;
 	int mnt_writers;
 #endif
+	atomic_t mnt_count;
 	struct list_head mnt_mounts;	/* list of children, anchored here */
 	struct list_head mnt_child;	/* and going through their mnt_child */
 	struct list_head mnt_instance;	/* mount instance on sb->s_mounts */
diff --git a/fs/namei.c b/fs/namei.c
index fb913148d4d1..da1489f6925c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -460,32 +460,6 @@ int inode_permission(struct inode *inode, int mask)
 }
 EXPORT_SYMBOL(inode_permission);
 
-/**
- * path_get - get a reference to a path
- * @path: path to get the reference to
- *
- * Given a path increment the reference count to the dentry and the vfsmount.
- */
-void path_get(const struct path *path)
-{
-	mntget(path->mnt);
-	dget(path->dentry);
-}
-EXPORT_SYMBOL(path_get);
-
-/**
- * path_put - put a reference to a path
- * @path: path to put the reference to
- *
- * Given a path decrement the reference count to the dentry and the vfsmount.
- */
-void path_put(const struct path *path)
-{
-	dput(path->dentry);
-	mntput(path->mnt);
-}
-EXPORT_SYMBOL(path_put);
-
 #define EMBEDDED_LEVELS 2
 struct nameidata {
 	struct path	path;
diff --git a/fs/namespace.c b/fs/namespace.c
index 6370bfabec99..389e806e1a65 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -29,6 +29,8 @@
 #include <linux/sched/task.h>
 #include <uapi/linux/mount.h>
 #include <linux/fs_context.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/mnt.h>
 
 #include "pnode.h"
 #include "internal.h"
@@ -109,8 +111,10 @@ static int mnt_alloc_id(struct mount *mnt)
 	return 0;
 }
 
-static void mnt_free_id(struct mount *mnt)
+static noinline void mnt_free_id(struct mount *mnt)
 {
+	trace_mnt_count(mnt, mnt->mnt_id, atomic_read(&mnt->mnt_count), 99,
+			__builtin_return_address(0));
 	ida_free(&mnt_id_ida, mnt->mnt_id);
 }
 
@@ -141,6 +145,9 @@ void mnt_release_group_id(struct mount *mnt)
  */
 static inline void mnt_add_count(struct mount *mnt, int n)
 {
+	int u;
+
+#if 0
 #ifdef CONFIG_SMP
 	this_cpu_add(mnt->mnt_pcp->mnt_count, n);
 #else
@@ -148,6 +155,9 @@ static inline void mnt_add_count(struct mount *mnt, int n)
 	mnt->mnt_count += n;
 	preempt_enable();
 #endif
+#endif
+	u = atomic_add_return(n, &mnt->mnt_count);
+	trace_mnt_count(mnt, mnt->mnt_id, u, n, __builtin_return_address(0));
 }
 
 /*
@@ -155,6 +165,7 @@ static inline void mnt_add_count(struct mount *mnt, int n)
  */
 unsigned int mnt_get_count(struct mount *mnt)
 {
+#if 0
 #ifdef CONFIG_SMP
 	unsigned int count = 0;
 	int cpu;
@@ -167,6 +178,8 @@ unsigned int mnt_get_count(struct mount *mnt)
 #else
 	return mnt->mnt_count;
 #endif
+#endif
+	return atomic_read(&mnt->mnt_count);
 }
 
 static void drop_mountpoint(struct fs_pin *p)
@@ -198,11 +211,15 @@ static struct mount *alloc_vfsmnt(const char *name)
 		if (!mnt->mnt_pcp)
 			goto out_free_devname;
 
+#if 0
 		this_cpu_add(mnt->mnt_pcp->mnt_count, 1);
+#endif
 #else
 		mnt->mnt_count = 1;
 		mnt->mnt_writers = 0;
 #endif
+		atomic_set(&mnt->mnt_count, 1);
+		trace_mnt_count(mnt, mnt->mnt_id, 1, 0, __builtin_return_address(0));
 
 		INIT_HLIST_NODE(&mnt->mnt_hash);
 		INIT_LIST_HEAD(&mnt->mnt_child);
@@ -1128,7 +1145,7 @@ static void mntput_no_expire(struct mount *mnt)
 	cleanup_mnt(mnt);
 }
 
-void mntput(struct vfsmount *mnt)
+inline void mntput(struct vfsmount *mnt)
 {
 	if (mnt) {
 		struct mount *m = real_mount(mnt);
@@ -1140,7 +1157,7 @@ void mntput(struct vfsmount *mnt)
 }
 EXPORT_SYMBOL(mntput);
 
-struct vfsmount *mntget(struct vfsmount *mnt)
+inline struct vfsmount *mntget(struct vfsmount *mnt)
 {
 	if (mnt)
 		mnt_add_count(real_mount(mnt), 1);
@@ -3970,3 +3987,29 @@ const struct proc_ns_operations mntns_operations = {
 	.install	= mntns_install,
 	.owner		= mntns_owner,
 };
+
+/**
+ * path_get - get a reference to a path
+ * @path: path to get the reference to
+ *
+ * Given a path increment the reference count to the dentry and the vfsmount.
+ */
+void path_get(const struct path *path)
+{
+	mntget(path->mnt);
+	dget(path->dentry);
+}
+EXPORT_SYMBOL(path_get);
+
+/**
+ * path_put - put a reference to a path
+ * @path: path to put the reference to
+ *
+ * Given a path decrement the reference count to the dentry and the vfsmount.
+ */
+void path_put(const struct path *path)
+{
+	dput(path->dentry);
+	mntput(path->mnt);
+}
+EXPORT_SYMBOL(path_put);
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index ababdbfab537..aaef44d6204c 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -23,6 +23,7 @@
 #include <linux/module.h>
 #include <linux/mount.h>
 #include <linux/srcu.h>
+#include <trace/events/mnt.h>
 
 #include <linux/fsnotify_backend.h>
 #include "fsnotify.h"
@@ -324,10 +325,13 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
 	/* global tests shouldn't care about events on child only the specific event */
 	__u32 test_mask = (mask & ~FS_EVENT_ON_CHILD);
 
-	if (data_is == FSNOTIFY_EVENT_PATH)
+	if (data_is == FSNOTIFY_EVENT_PATH) {
 		mnt = real_mount(((const struct path *)data)->mnt);
-	else
+		trace_mnt_count(mnt, mnt->mnt_id, atomic_read(&mnt->mnt_count), 88,
+				__builtin_return_address(0));
+	} else {
 		mnt = NULL;
+	}
 
 	/*
 	 * Optimization: srcu_read_lock() has a memory barrier which can
diff --git a/include/trace/events/mnt.h b/include/trace/events/mnt.h
new file mode 100644
index 000000000000..da1a981f1a61
--- /dev/null
+++ b/include/trace/events/mnt.h
@@ -0,0 +1,57 @@
+/* Mount tracepoints
+ *
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@xxxxxxxxxx)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mnt
+
+#if !defined(_TRACE_MNT_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MNT_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(mnt_count,
+	    TP_PROTO(const void *mnt, int mnt_id, int mnt_count,
+		     int delta, const void *where),
+
+	    TP_ARGS(mnt, mnt_id, mnt_count, delta, where),
+
+	    TP_STRUCT__entry(
+		    __field(int,			mnt_id		)
+		    __field(int,			mnt_count	)
+		    __field(int,			delta		)
+		    __field(const void *,		mnt		)
+		    __field(const void *,		where		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->mnt_id = mnt_id;
+		    __entry->mnt_count = mnt_count;
+		    __entry->delta = delta;
+		    __entry->mnt = mnt;
+		    __entry->where = where;
+			   ),
+
+	    TP_printk("M=%p m=%08x u=%d %s sp=%pSR",
+		      __entry->mnt,
+		      __entry->mnt_id,
+		      __entry->mnt_count,
+		      __print_symbolic(__entry->delta,
+				       { 0, "ALC" },
+				       { 1, "GET" },
+				       { -1, "PUT" },
+				       { 88, "NFY" },
+				       { 99, "FRE" }),
+		      __entry->where)
+	    );
+
+#endif /* _TRACE_MNT_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>



[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux