Re: [PATCH 00/13] VFS: Filesystem information [ver #19]

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Apr 1, 2020 at 10:37 AM Miklos Szeredi <miklos@xxxxxxxxxx> wrote:
>
> On Wed, Apr 1, 2020 at 10:27 AM David Howells <dhowells@xxxxxxxxxx> wrote:
> >
> > Miklos Szeredi <miklos@xxxxxxxxxx> wrote:
> >
> > > According to dhowell's measurements processing 100k mounts would take
> > > about a few seconds of system time (that's the time spent by the
> > > kernel to retrieve the data,
> >
> > But the inefficiency of mountfs - at least as currently implemented - scales
> > up with the number of individual values you want to retrieve, both in terms of
> > memory usage and time taken.
>
> I've taken that into account when guesstimating a "few seconds per
> 100k entries".  My guess is that there's probably an order of
> magnitude difference between the performance of a fs based interface
> and a binary syscall based interface.  That could be reduced somewhat
> with a readfile(2) type API.

And to show that I'm not completely off base, attached a patch that
adds a limited readfile(2) syscall and uses it in the p2 method.

Results are promising:

./test-fsinfo-perf /tmp/a 30000
--- make mounts ---
--- test fsinfo by path ---
sum(mnt_id) = 930000
--- test fsinfo by mnt_id ---
sum(mnt_id) = 930000
--- test /proc/fdinfo ---
sum(mnt_id) = 930000
--- test mountfs ---
sum(mnt_id) = 930000
For   30000 mounts, f=    146400us f2=    136766us p=   1406569us p2=
  221669us; p=9.6*f p=10.3*f2 p=6.3*p2
--- umount ---

This is about a 2 fold increase in speed compared to open + read + close.

Is someone still worried about performance, or can we move on to more
interesting parts of the design?

Thanks,
Miklos
Index: linux/fs/mountfs/super.c
===================================================================
--- linux.orig/fs/mountfs/super.c	2020-04-01 14:21:24.609955072 +0200
+++ linux/fs/mountfs/super.c	2020-04-01 14:21:42.426151545 +0200
@@ -51,10 +51,11 @@ static bool mountfs_entry_visible(struct
 
 	return visible;
 }
+
 static int mountfs_attr_show(struct seq_file *sf, void *v)
 {
 	const char *name = sf->file->f_path.dentry->d_name.name;
-	struct mountfs_entry *entry = sf->private;
+	struct mountfs_entry *entry = file_inode(sf->file)->i_private;
 	struct mount *mnt;
 	struct vfsmount *m;
 	struct super_block *sb;
@@ -140,12 +141,40 @@ static int mountfs_attr_show(struct seq_
 	return err;
 }
 
+ssize_t mountfs_attr_readfile(struct file *file, char __user *buf, size_t size)
+{
+	struct seq_file m = { .size = PAGE_SIZE, .file = file };
+	ssize_t ret;
+
+retry:
+	m.buf = kvmalloc(m.size, GFP_KERNEL);
+	if (!m.buf)
+		return -ENOMEM;
+
+	ret = mountfs_attr_show(&m, NULL);
+	if (!ret) {
+		if (m.count == m.size) {
+			kvfree(m.buf);
+			m.size <<= 1;
+			m.count = 0;
+			goto retry;
+		}
+		ret = min(m.count, size);
+		if (copy_to_user(buf, m.buf, ret))
+			ret = -EFAULT;
+	}
+
+	kvfree(m.buf);
+	return ret;
+}
+
 static int mountfs_attr_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, mountfs_attr_show, inode->i_private);
+	return single_open(file, mountfs_attr_show, NULL);
 }
 
 static const struct file_operations mountfs_attr_fops = {
+	.readfile	= mountfs_attr_readfile,
 	.open		= mountfs_attr_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
Index: linux/samples/vfs/test-fsinfo-perf.c
===================================================================
--- linux.orig/samples/vfs/test-fsinfo-perf.c	2020-04-01 14:21:24.609955072 +0200
+++ linux/samples/vfs/test-fsinfo-perf.c	2020-04-01 14:21:42.426151545 +0200
@@ -172,6 +172,12 @@ static void get_id_by_proc(int ix, const
 	//printf("[%u] %u\n", ix, x);
 }
 
+static long readfile(int dfd, const char *name, char *buffer, size_t size,
+		     int flags)
+{
+	return syscall(__NR_readfile, dfd, name, buffer, size, flags);
+}
+
 static void get_id_by_fsinfo_2(void)
 {
 	struct fsinfo_mount_topology t;
@@ -300,11 +306,8 @@ static void get_id_by_mountfs(void)
 		}
 
 		sprintf(procfile, "%u/parent", mnt_id);
-		fd = openat(mntfd, procfile, O_RDONLY);
-		ERR(fd, procfile);
-		len = read(fd, buffer, sizeof(buffer) - 1);
-		ERR(len, "read/parent");
-		close(fd);
+		len = readfile(mntfd, procfile, buffer, sizeof(buffer), 0);
+		ERR(len, "readfile/parent");
 		if (len > 0 && buffer[len - 1] == '\n')
 			len--;
 		buffer[len] = 0;
@@ -319,11 +322,8 @@ static void get_id_by_mountfs(void)
 		sum_check += x;
 
 		sprintf(procfile, "%u/counter", mnt_id);
-		fd = openat(mntfd, procfile, O_RDONLY);
-		ERR(fd, procfile);
-		len = read(fd, buffer, sizeof(buffer) - 1);
-		ERR(len, "read/counter");
-		close(fd);
+		len = readfile(mntfd, procfile, buffer, sizeof(buffer) - 1, 0);
+		ERR(len, "readfile/counter");
 		if (len > 0 && buffer[len - 1] == '\n')
 			len--;
 		buffer[len] = 0;
Index: linux/arch/x86/entry/syscalls/syscall_64.tbl
===================================================================
--- linux.orig/arch/x86/entry/syscalls/syscall_64.tbl	2020-04-01 14:21:37.284094840 +0200
+++ linux/arch/x86/entry/syscalls/syscall_64.tbl	2020-04-01 14:21:42.412151390 +0200
@@ -362,6 +362,7 @@
 439	common	watch_mount		__x64_sys_watch_mount
 440	common	watch_sb		__x64_sys_watch_sb
 441	common	fsinfo			__x64_sys_fsinfo
+442	common	readfile		__x64_sys_readfile
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
Index: linux/fs/open.c
===================================================================
--- linux.orig/fs/open.c	2020-04-01 14:21:37.284094840 +0200
+++ linux/fs/open.c	2020-04-01 14:21:42.424151523 +0200
@@ -1340,3 +1340,25 @@ int stream_open(struct inode *inode, str
 }
 
 EXPORT_SYMBOL(stream_open);
+
+SYSCALL_DEFINE5(readfile, int, dfd, const char __user *, filename,
+		char __user *, buffer, size_t, bufsize, int, flags)
+{
+	ssize_t ret;
+	struct file file = {};
+
+	if (flags)
+		return -EINVAL;
+
+	ret = user_path_at(dfd, filename, 0, &file.f_path);
+	if (!ret) {
+		file.f_inode = file.f_path.dentry->d_inode;
+		file.f_op = file.f_inode->i_fop;
+		ret = -EOPNOTSUPP;
+		if (file.f_op->readfile)
+			ret = file.f_op->readfile(&file, buffer, bufsize);
+		path_put(&file.f_path);
+	}
+
+	return ret;
+}
Index: linux/include/linux/syscalls.h
===================================================================
--- linux.orig/include/linux/syscalls.h	2020-04-01 14:21:37.284094840 +0200
+++ linux/include/linux/syscalls.h	2020-04-01 14:21:42.413151401 +0200
@@ -1011,6 +1011,8 @@ asmlinkage long sys_watch_sb(int dfd, co
 asmlinkage long sys_fsinfo(int dfd, const char __user *pathname,
 			   struct fsinfo_params __user *params, size_t params_size,
 			   void __user *result_buffer, size_t result_buf_size);
+asmlinkage long sys_readfile(int dfd, const char __user *filename,
+			     char __user *buffer, size_t bufsize, int flags);
 
 /*
  * Architecture-specific system calls
Index: linux/include/uapi/asm-generic/unistd.h
===================================================================
--- linux.orig/include/uapi/asm-generic/unistd.h	2020-04-01 14:21:37.284094840 +0200
+++ linux/include/uapi/asm-generic/unistd.h	2020-04-01 14:21:42.413151401 +0200
@@ -861,9 +861,11 @@ __SYSCALL(__NR_watch_mount, sys_watch_mo
 __SYSCALL(__NR_watch_sb, sys_watch_sb)
 #define __NR_fsinfo 441
 __SYSCALL(__NR_fsinfo, sys_fsinfo)
+#define __NR_readfile 442
+__SYSCALL(__NR_readfile, sys_readfile)
 
 #undef __NR_syscalls
-#define __NR_syscalls 442
+#define __NR_syscalls 443
 
 /*
  * 32 bit systems traditionally used different
Index: linux/include/linux/fs.h
===================================================================
--- linux.orig/include/linux/fs.h	2020-04-01 14:21:19.144894804 +0200
+++ linux/include/linux/fs.h	2020-04-01 14:21:42.425151534 +0200
@@ -1868,6 +1868,7 @@ struct file_operations {
 				   struct file *file_out, loff_t pos_out,
 				   loff_t len, unsigned int remap_flags);
 	int (*fadvise)(struct file *, loff_t, loff_t, int);
+	ssize_t (*readfile)(struct file *, char __user *, size_t);
 } __randomize_layout;
 
 struct inode_operations {

[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux