Re: [PATCH] fuse: enable larger read buffers for readdir.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Jul 26, 2023 at 08:25:48PM +0200, Jaco Kroon wrote:

> glibc uses a 128KiB buffer for getdents64, so I'm not sure >128KiB here
> makes sense.  Or if these two buffers are even directly related.

Definitely related.  See how the dir_emit() works: if the entry doesn't fit in
the userspace buffer, then ->readdir() returns.  If entries remain in the fuse
buffer at that time, they are simply discarded.  Simply put, making the
buffer too large is going to be a waste of resources and at some point will be
slower than the single page buffer.

Note: discarding received entries is the only possible action if caching is
disabled.  With caching enabled it would make sense to pre-fill the cache with
the overflowed entries and use them in the next iteration.  However the caching
code is not prepared for using partial caches at the moment.

The best strategy would be to find the optimal buffer size based on the size of
the userspace buffer.  Putting that info into struct dir_context doesn't sound
too complicated...

Here's a patch.  It doesn't touch readdir.  Simply setting the fuse buffer size
to the userspace buffer size should work, the record sizes are similar (fuse's
is slightly larger than libc's, so no overflow should ever happen).

Thanks,
Miklos


---
 fs/exportfs/expfs.c    |    1 +
 fs/overlayfs/readdir.c |   12 +++++++++++-
 fs/readdir.c           |   29 ++++++++++++++---------------
 include/linux/fs.h     |    7 +++++++
 4 files changed, 33 insertions(+), 16 deletions(-)

--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -286,6 +286,7 @@ static int get_name(const struct path *p
 	};
 	struct getdents_callback buffer = {
 		.ctx.actor = filldir_one,
+		.ctx.count = INT_MAX,
 		.name = name,
 	};
 
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -347,6 +347,7 @@ static int ovl_dir_read_merged(struct de
 	struct path realpath;
 	struct ovl_readdir_data rdd = {
 		.ctx.actor = ovl_fill_merge,
+		.ctx.count = INT_MAX,
 		.dentry = dentry,
 		.list = list,
 		.root = root,
@@ -557,6 +558,7 @@ static int ovl_dir_read_impure(const str
 	struct ovl_cache_entry *p, *n;
 	struct ovl_readdir_data rdd = {
 		.ctx.actor = ovl_fill_plain,
+		.ctx.count = INT_MAX,
 		.list = list,
 		.root = root,
 	};
@@ -658,6 +660,7 @@ static bool ovl_fill_real(struct dir_con
 	struct ovl_readdir_translate *rdt =
 		container_of(ctx, struct ovl_readdir_translate, ctx);
 	struct dir_context *orig_ctx = rdt->orig_ctx;
+	bool res;
 
 	if (rdt->parent_ino && strcmp(name, "..") == 0) {
 		ino = rdt->parent_ino;
@@ -672,7 +675,10 @@ static bool ovl_fill_real(struct dir_con
 					  name, namelen, rdt->xinowarn);
 	}
 
-	return orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type);
+	res = orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type);
+	ctx->count = orig_ctx->count;
+
+	return res;
 }
 
 static bool ovl_is_impure_dir(struct file *file)
@@ -699,6 +705,7 @@ static int ovl_iterate_real(struct file
 	const struct ovl_layer *lower_layer = ovl_layer_lower(dir);
 	struct ovl_readdir_translate rdt = {
 		.ctx.actor = ovl_fill_real,
+		.ctx.count = ctx->count,
 		.orig_ctx = ctx,
 		.xinobits = ovl_xino_bits(ofs),
 		.xinowarn = ovl_xino_warn(ofs),
@@ -1058,6 +1065,7 @@ int ovl_check_d_type_supported(const str
 	int err;
 	struct ovl_readdir_data rdd = {
 		.ctx.actor = ovl_check_d_type,
+		.ctx.count = INT_MAX,
 		.d_type_supported = false,
 	};
 
@@ -1079,6 +1087,7 @@ static int ovl_workdir_cleanup_recurse(s
 	struct ovl_cache_entry *p;
 	struct ovl_readdir_data rdd = {
 		.ctx.actor = ovl_fill_plain,
+		.ctx.count = INT_MAX,
 		.list = &list,
 	};
 	bool incompat = false;
@@ -1163,6 +1172,7 @@ int ovl_indexdir_cleanup(struct ovl_fs *
 	struct ovl_cache_entry *p;
 	struct ovl_readdir_data rdd = {
 		.ctx.actor = ovl_fill_plain,
+		.ctx.count = INT_MAX,
 		.list = &list,
 	};
 
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -184,6 +184,7 @@ SYSCALL_DEFINE3(old_readdir, unsigned in
 	struct fd f = fdget_pos(fd);
 	struct readdir_callback buf = {
 		.ctx.actor = fillonedir,
+		.ctx.count = 1, /* Hint to fs: just one entry. */
 		.dirent = dirent
 	};
 
@@ -215,7 +216,6 @@ struct getdents_callback {
 	struct dir_context ctx;
 	struct linux_dirent __user * current_dir;
 	int prev_reclen;
-	int count;
 	int error;
 };
 
@@ -234,7 +234,7 @@ static bool filldir(struct dir_context *
 	if (unlikely(buf->error))
 		return false;
 	buf->error = -EINVAL;	/* only used if we fail.. */
-	if (reclen > buf->count)
+	if (reclen > ctx->count)
 		return false;
 	d_ino = ino;
 	if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
@@ -259,7 +259,7 @@ static bool filldir(struct dir_context *
 
 	buf->current_dir = (void __user *)dirent + reclen;
 	buf->prev_reclen = reclen;
-	buf->count -= reclen;
+	ctx->count -= reclen;
 	return true;
 efault_end:
 	user_write_access_end();
@@ -274,7 +274,7 @@ SYSCALL_DEFINE3(getdents, unsigned int,
 	struct fd f;
 	struct getdents_callback buf = {
 		.ctx.actor = filldir,
-		.count = count,
+		.ctx.count = count,
 		.current_dir = dirent
 	};
 	int error;
@@ -293,7 +293,7 @@ SYSCALL_DEFINE3(getdents, unsigned int,
 		if (put_user(buf.ctx.pos, &lastdirent->d_off))
 			error = -EFAULT;
 		else
-			error = count - buf.count;
+			error = count - buf.ctx.count;
 	}
 	fdput_pos(f);
 	return error;
@@ -303,7 +303,6 @@ struct getdents_callback64 {
 	struct dir_context ctx;
 	struct linux_dirent64 __user * current_dir;
 	int prev_reclen;
-	int count;
 	int error;
 };
 
@@ -321,7 +320,7 @@ static bool filldir64(struct dir_context
 	if (unlikely(buf->error))
 		return false;
 	buf->error = -EINVAL;	/* only used if we fail.. */
-	if (reclen > buf->count)
+	if (reclen > ctx->count)
 		return false;
 	prev_reclen = buf->prev_reclen;
 	if (prev_reclen && signal_pending(current))
@@ -341,7 +340,7 @@ static bool filldir64(struct dir_context
 
 	buf->prev_reclen = reclen;
 	buf->current_dir = (void __user *)dirent + reclen;
-	buf->count -= reclen;
+	ctx->count -= reclen;
 	return true;
 
 efault_end:
@@ -357,7 +356,7 @@ SYSCALL_DEFINE3(getdents64, unsigned int
 	struct fd f;
 	struct getdents_callback64 buf = {
 		.ctx.actor = filldir64,
-		.count = count,
+		.ctx.count = count,
 		.current_dir = dirent
 	};
 	int error;
@@ -377,7 +376,7 @@ SYSCALL_DEFINE3(getdents64, unsigned int
 		if (put_user(d_off, &lastdirent->d_off))
 			error = -EFAULT;
 		else
-			error = count - buf.count;
+			error = count - buf.ctx.count;
 	}
 	fdput_pos(f);
 	return error;
@@ -442,6 +441,7 @@ COMPAT_SYSCALL_DEFINE3(old_readdir, unsi
 	struct fd f = fdget_pos(fd);
 	struct compat_readdir_callback buf = {
 		.ctx.actor = compat_fillonedir,
+		.ctx.count = 1, /* Hint to fs: just one entry. */
 		.dirent = dirent
 	};
 
@@ -467,7 +467,6 @@ struct compat_getdents_callback {
 	struct dir_context ctx;
 	struct compat_linux_dirent __user *current_dir;
 	int prev_reclen;
-	int count;
 	int error;
 };
 
@@ -486,7 +485,7 @@ static bool compat_filldir(struct dir_co
 	if (unlikely(buf->error))
 		return false;
 	buf->error = -EINVAL;	/* only used if we fail.. */
-	if (reclen > buf->count)
+	if (reclen > ctx->count)
 		return false;
 	d_ino = ino;
 	if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
@@ -510,7 +509,7 @@ static bool compat_filldir(struct dir_co
 
 	buf->prev_reclen = reclen;
 	buf->current_dir = (void __user *)dirent + reclen;
-	buf->count -= reclen;
+	ctx->count -= reclen;
 	return true;
 efault_end:
 	user_write_access_end();
@@ -525,8 +524,8 @@ COMPAT_SYSCALL_DEFINE3(getdents, unsigne
 	struct fd f;
 	struct compat_getdents_callback buf = {
 		.ctx.actor = compat_filldir,
+		.ctx.count = count,
 		.current_dir = dirent,
-		.count = count
 	};
 	int error;
 
@@ -544,7 +543,7 @@ COMPAT_SYSCALL_DEFINE3(getdents, unsigne
 		if (put_user(buf.ctx.pos, &lastdirent->d_off))
 			error = -EFAULT;
 		else
-			error = count - buf.count;
+			error = count - buf.ctx.count;
 	}
 	fdput_pos(f);
 	return error;
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1719,6 +1719,13 @@ typedef bool (*filldir_t)(struct dir_con
 struct dir_context {
 	filldir_t actor;
 	loff_t pos;
+	/*
+	 * Filesystems MUST NOT MODIFY count, but may use as a hint:
+	 * 0	    unknown
+	 * > 0      space in buffer (assume at least one entry)
+	 * INT_MAX  unlimited
+	 */
+	int count;
 };
 
 /*



[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux