On Wed, Apr 1, 2020 at 10:37 AM Miklos Szeredi <miklos@xxxxxxxxxx> wrote: > > On Wed, Apr 1, 2020 at 10:27 AM David Howells <dhowells@xxxxxxxxxx> wrote: > > > > Miklos Szeredi <miklos@xxxxxxxxxx> wrote: > > > > > According to dhowell's measurements processing 100k mounts would take > > > about a few seconds of system time (that's the time spent by the > > > kernel to retrieve the data, > > > > But the inefficiency of mountfs - at least as currently implemented - scales > > up with the number of individual values you want to retrieve, both in terms of > > memory usage and time taken. > > I've taken that into account when guesstimating a "few seconds per > 100k entries". My guess is that there's probably an order of > magnitude difference between the performance of a fs based interface > and a binary syscall based interface. That could be reduced somewhat > with a readfile(2) type API. And to show that I'm not completely off base, attached a patch that adds a limited readfile(2) syscall and uses it in the p2 method. Results are promising: ./test-fsinfo-perf /tmp/a 30000 --- make mounts --- --- test fsinfo by path --- sum(mnt_id) = 930000 --- test fsinfo by mnt_id --- sum(mnt_id) = 930000 --- test /proc/fdinfo --- sum(mnt_id) = 930000 --- test mountfs --- sum(mnt_id) = 930000 For 30000 mounts, f= 146400us f2= 136766us p= 1406569us p2= 221669us; p=9.6*f p=10.3*f2 p=6.3*p2 --- umount --- This is about a 2 fold increase in speed compared to open + read + close. Is someone still worried about performance, or can we move on to more interesting parts of the design? Thanks, Miklos
Index: linux/fs/mountfs/super.c =================================================================== --- linux.orig/fs/mountfs/super.c 2020-04-01 14:21:24.609955072 +0200 +++ linux/fs/mountfs/super.c 2020-04-01 14:21:42.426151545 +0200 @@ -51,10 +51,11 @@ static bool mountfs_entry_visible(struct return visible; } + static int mountfs_attr_show(struct seq_file *sf, void *v) { const char *name = sf->file->f_path.dentry->d_name.name; - struct mountfs_entry *entry = sf->private; + struct mountfs_entry *entry = file_inode(sf->file)->i_private; struct mount *mnt; struct vfsmount *m; struct super_block *sb; @@ -140,12 +141,40 @@ static int mountfs_attr_show(struct seq_ return err; } +ssize_t mountfs_attr_readfile(struct file *file, char __user *buf, size_t size) +{ + struct seq_file m = { .size = PAGE_SIZE, .file = file }; + ssize_t ret; + +retry: + m.buf = kvmalloc(m.size, GFP_KERNEL); + if (!m.buf) + return -ENOMEM; + + ret = mountfs_attr_show(&m, NULL); + if (!ret) { + if (m.count == m.size) { + kvfree(m.buf); + m.size <<= 1; + m.count = 0; + goto retry; + } + ret = min(m.count, size); + if (copy_to_user(buf, m.buf, ret)) + ret = -EFAULT; + } + + kvfree(m.buf); + return ret; +} + static int mountfs_attr_open(struct inode *inode, struct file *file) { - return single_open(file, mountfs_attr_show, inode->i_private); + return single_open(file, mountfs_attr_show, NULL); } static const struct file_operations mountfs_attr_fops = { + .readfile = mountfs_attr_readfile, .open = mountfs_attr_open, .read = seq_read, .llseek = seq_lseek, Index: linux/samples/vfs/test-fsinfo-perf.c =================================================================== --- linux.orig/samples/vfs/test-fsinfo-perf.c 2020-04-01 14:21:24.609955072 +0200 +++ linux/samples/vfs/test-fsinfo-perf.c 2020-04-01 14:21:42.426151545 +0200 @@ -172,6 +172,12 @@ static void get_id_by_proc(int ix, const //printf("[%u] %u\n", ix, x); } +static long readfile(int dfd, const char *name, char *buffer, size_t size, + int flags) +{ + return syscall(__NR_readfile, dfd, name, buffer, size, flags); +} + static void get_id_by_fsinfo_2(void) { struct fsinfo_mount_topology t; @@ -300,11 +306,8 @@ static void get_id_by_mountfs(void) } sprintf(procfile, "%u/parent", mnt_id); - fd = openat(mntfd, procfile, O_RDONLY); - ERR(fd, procfile); - len = read(fd, buffer, sizeof(buffer) - 1); - ERR(len, "read/parent"); - close(fd); + len = readfile(mntfd, procfile, buffer, sizeof(buffer), 0); + ERR(len, "readfile/parent"); if (len > 0 && buffer[len - 1] == '\n') len--; buffer[len] = 0; @@ -319,11 +322,8 @@ static void get_id_by_mountfs(void) sum_check += x; sprintf(procfile, "%u/counter", mnt_id); - fd = openat(mntfd, procfile, O_RDONLY); - ERR(fd, procfile); - len = read(fd, buffer, sizeof(buffer) - 1); - ERR(len, "read/counter"); - close(fd); + len = readfile(mntfd, procfile, buffer, sizeof(buffer) - 1, 0); + ERR(len, "readfile/counter"); if (len > 0 && buffer[len - 1] == '\n') len--; buffer[len] = 0; Index: linux/arch/x86/entry/syscalls/syscall_64.tbl =================================================================== --- linux.orig/arch/x86/entry/syscalls/syscall_64.tbl 2020-04-01 14:21:37.284094840 +0200 +++ linux/arch/x86/entry/syscalls/syscall_64.tbl 2020-04-01 14:21:42.412151390 +0200 @@ -362,6 +362,7 @@ 439 common watch_mount __x64_sys_watch_mount 440 common watch_sb __x64_sys_watch_sb 441 common fsinfo __x64_sys_fsinfo +442 common readfile __x64_sys_readfile # # x32-specific system call numbers start at 512 to avoid cache impact Index: linux/fs/open.c =================================================================== --- linux.orig/fs/open.c 2020-04-01 14:21:37.284094840 +0200 +++ linux/fs/open.c 2020-04-01 14:21:42.424151523 +0200 @@ -1340,3 +1340,25 @@ int stream_open(struct inode *inode, str } EXPORT_SYMBOL(stream_open); + +SYSCALL_DEFINE5(readfile, int, dfd, const char __user *, filename, + char __user *, buffer, size_t, bufsize, int, flags) +{ + ssize_t ret; + struct file file = {}; + + if (flags) + return -EINVAL; + + ret = user_path_at(dfd, filename, 0, &file.f_path); + if (!ret) { + file.f_inode = file.f_path.dentry->d_inode; + file.f_op = file.f_inode->i_fop; + ret = -EOPNOTSUPP; + if (file.f_op->readfile) + ret = file.f_op->readfile(&file, buffer, bufsize); + path_put(&file.f_path); + } + + return ret; +} Index: linux/include/linux/syscalls.h =================================================================== --- linux.orig/include/linux/syscalls.h 2020-04-01 14:21:37.284094840 +0200 +++ linux/include/linux/syscalls.h 2020-04-01 14:21:42.413151401 +0200 @@ -1011,6 +1011,8 @@ asmlinkage long sys_watch_sb(int dfd, co asmlinkage long sys_fsinfo(int dfd, const char __user *pathname, struct fsinfo_params __user *params, size_t params_size, void __user *result_buffer, size_t result_buf_size); +asmlinkage long sys_readfile(int dfd, const char __user *filename, + char __user *buffer, size_t bufsize, int flags); /* * Architecture-specific system calls Index: linux/include/uapi/asm-generic/unistd.h =================================================================== --- linux.orig/include/uapi/asm-generic/unistd.h 2020-04-01 14:21:37.284094840 +0200 +++ linux/include/uapi/asm-generic/unistd.h 2020-04-01 14:21:42.413151401 +0200 @@ -861,9 +861,11 @@ __SYSCALL(__NR_watch_mount, sys_watch_mo __SYSCALL(__NR_watch_sb, sys_watch_sb) #define __NR_fsinfo 441 __SYSCALL(__NR_fsinfo, sys_fsinfo) +#define __NR_readfile 442 +__SYSCALL(__NR_readfile, sys_readfile) #undef __NR_syscalls -#define __NR_syscalls 442 +#define __NR_syscalls 443 /* * 32 bit systems traditionally used different Index: linux/include/linux/fs.h =================================================================== --- linux.orig/include/linux/fs.h 2020-04-01 14:21:19.144894804 +0200 +++ linux/include/linux/fs.h 2020-04-01 14:21:42.425151534 +0200 @@ -1868,6 +1868,7 @@ struct file_operations { struct file *file_out, loff_t pos_out, loff_t len, unsigned int remap_flags); int (*fadvise)(struct file *, loff_t, loff_t, int); + ssize_t (*readfile)(struct file *, char __user *, size_t); } __randomize_layout; struct inode_operations {