Re: [PATCH v6 4/5] libfs: Replace simple_offset end-of-directory detection

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



* cel@xxxxxxxxxx <cel@xxxxxxxxxx> [241220 10:33]:
> From: Chuck Lever <chuck.lever@xxxxxxxxxx>
> 
> According to getdents(3), the d_off field in each returned directory
> entry points to the next entry in the directory. The d_off field in
> the last returned entry in the readdir buffer must contain a valid
> offset value, but if it points to an actual directory entry, then
> readdir/getdents can loop.
> 
> This patch introduces a specific fixed offset value that is placed
> in the d_off field of the last entry in a directory. Some user space
> applications assume that the EOD offset value is larger than the
> offsets of real directory entries, so the largest possible offset
> value is reserved for this purpose. This new value is never
> allocated by simple_offset_add().
> 
> When ->iterate_dir() returns, getdents{64} inserts the ctx->pos
> value into the d_off field of the last valid entry in the readdir
> buffer. When it hits EOD, offset_readdir() sets ctx->pos to the EOD
> offset value so the last entry is updated to point to the EOD marker.
> 
> When trying to read the entry at the EOD offset, offset_readdir()
> terminates immediately.
> 
> It is worth noting that using a Maple tree for directory offset
> value allocation does not guarantee a 63-bit range of values --
> on platforms where "long" is a 32-bit type, the directory offset
> value range is still 0..(2^31 - 1).

I have a standing request to have 32-bit archs return 64-bit values.  Is
this another 'nice to have' 64 bit values on 32 bit archs?

> 
> Fixes: 796432efab1e ("libfs: getdents() should return 0 after reaching EOD")
> Signed-off-by: Chuck Lever <chuck.lever@xxxxxxxxxx>
> ---
>  fs/libfs.c | 38 ++++++++++++++++++++++----------------
>  1 file changed, 22 insertions(+), 16 deletions(-)
> 
> diff --git a/fs/libfs.c b/fs/libfs.c
> index 8c9364a0174c..5c56783c03a5 100644
> --- a/fs/libfs.c
> +++ b/fs/libfs.c
> @@ -245,9 +245,16 @@ const struct inode_operations simple_dir_inode_operations = {
>  };
>  EXPORT_SYMBOL(simple_dir_inode_operations);
>  
> -/* 0 is '.', 1 is '..', so always start with offset 2 or more */
> +/* simple_offset_add() allocation range */
>  enum {
> -	DIR_OFFSET_MIN	= 2,
> +	DIR_OFFSET_MIN		= 2,
> +	DIR_OFFSET_MAX		= LONG_MAX - 1,
> +};
> +
> +/* simple_offset_add() never assigns these to a dentry */
> +enum {
> +	DIR_OFFSET_EOD		= LONG_MAX,	/* Marks EOD */
> +
>  };
>  
>  static void offset_set(struct dentry *dentry, long offset)
> @@ -291,7 +298,8 @@ int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry)
>  		return -EBUSY;
>  
>  	ret = mtree_alloc_cyclic(&octx->mt, &offset, dentry, DIR_OFFSET_MIN,
> -				 LONG_MAX, &octx->next_offset, GFP_KERNEL);
> +				 DIR_OFFSET_MAX, &octx->next_offset,
> +				 GFP_KERNEL);
>  	if (unlikely(ret < 0))
>  		return ret == -EBUSY ? -ENOSPC : ret;
>  
> @@ -447,8 +455,6 @@ static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence)
>  		return -EINVAL;
>  	}
>  
> -	/* In this case, ->private_data is protected by f_pos_lock */
> -	file->private_data = NULL;
>  	return vfs_setpos(file, offset, LONG_MAX);
>  }
>  
> @@ -458,7 +464,7 @@ static struct dentry *offset_find_next(struct offset_ctx *octx, loff_t offset)
>  	struct dentry *child, *found = NULL;
>  
>  	rcu_read_lock();
> -	child = mas_find(&mas, LONG_MAX);
> +	child = mas_find(&mas, DIR_OFFSET_MAX);
>  	if (!child)
>  		goto out;
>  	spin_lock(&child->d_lock);
> @@ -479,7 +485,7 @@ static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry)
>  			  inode->i_ino, fs_umode_to_dtype(inode->i_mode));
>  }
>  
> -static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
> +static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
>  {
>  	struct offset_ctx *octx = inode->i_op->get_offset_ctx(inode);
>  	struct dentry *dentry;
> @@ -487,7 +493,7 @@ static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
>  	while (true) {
>  		dentry = offset_find_next(octx, ctx->pos);
>  		if (!dentry)
> -			return ERR_PTR(-ENOENT);
> +			goto out_eod;
>  
>  		if (!offset_dir_emit(ctx, dentry)) {
>  			dput(dentry);
> @@ -497,7 +503,10 @@ static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
>  		ctx->pos = dentry2offset(dentry) + 1;
>  		dput(dentry);
>  	}
> -	return NULL;
> +	return;
> +
> +out_eod:
> +	ctx->pos = DIR_OFFSET_EOD;
>  }
>  
>  /**
> @@ -517,6 +526,8 @@ static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
>   *
>   * On return, @ctx->pos contains an offset that will read the next entry
>   * in this directory when offset_readdir() is called again with @ctx.
> + * Caller places this value in the d_off field of the last entry in the
> + * user's buffer.
>   *
>   * Return values:
>   *   %0 - Complete
> @@ -529,13 +540,8 @@ static int offset_readdir(struct file *file, struct dir_context *ctx)
>  
>  	if (!dir_emit_dots(file, ctx))
>  		return 0;
> -
> -	/* In this case, ->private_data is protected by f_pos_lock */
> -	if (ctx->pos == DIR_OFFSET_MIN)
> -		file->private_data = NULL;
> -	else if (file->private_data == ERR_PTR(-ENOENT))
> -		return 0;
> -	file->private_data = offset_iterate_dir(d_inode(dir), ctx);
> +	if (ctx->pos != DIR_OFFSET_EOD)
> +		offset_iterate_dir(d_inode(dir), ctx);
>  	return 0;
>  }
>  
> -- 
> 2.47.0
> 
> 




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux