Re: [PATCH 2/2 v2] ceph: use a xarray to record all the opened files for each inode

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, 2022-11-14 at 13:19 +0800, xiubli@xxxxxxxxxx wrote:
> From: Xiubo Li <xiubli@xxxxxxxxxx>
> 
> When releasing the file locks the fl->fl_file memory could be
> already released by another thread in filp_close(), so we couldn't
> depend on fl->fl_file to get the inode. Just use a xarray to record
> the opened files for each inode.
> 
> Cc: stable@xxxxxxxxxxxxxxx
> URL: https://tracker.ceph.com/issues/57986
> Signed-off-by: Xiubo Li <xiubli@xxxxxxxxxx>
> ---
>  fs/ceph/file.c  |  9 +++++++++
>  fs/ceph/inode.c |  4 ++++
>  fs/ceph/locks.c | 17 ++++++++++++++++-
>  fs/ceph/super.h |  4 ++++
>  4 files changed, 33 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index 85afcbbb5648..cb4a9c52df27 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -231,6 +231,13 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
>  			fi->flags |= CEPH_F_SYNC;
>  
>  		file->private_data = fi;
> +
> +		ret = xa_insert(&ci->i_opened_files, (unsigned long)file,
> +				CEPH_FILP_AVAILABLE, GFP_KERNEL);
> +		if (ret) {
> +			kmem_cache_free(ceph_file_cachep, fi);
> +			return ret;
> +		}
>  	}
>  
>  	ceph_get_fmode(ci, fmode, 1);
> @@ -932,6 +939,8 @@ int ceph_release(struct inode *inode, struct file *file)
>  		dout("release inode %p regular file %p\n", inode, file);
>  		WARN_ON(!list_empty(&fi->rw_contexts));
>  
> +		xa_erase(&ci->i_opened_files, (unsigned long)file);
> +
>  		ceph_fscache_unuse_cookie(inode, file->f_mode & FMODE_WRITE);
>  		ceph_put_fmode(ci, fi->fmode, 1);
>  
> diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
> index 77b0cd9af370..554450838e44 100644
> --- a/fs/ceph/inode.c
> +++ b/fs/ceph/inode.c
> @@ -619,6 +619,8 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
>  	INIT_LIST_HEAD(&ci->i_unsafe_iops);
>  	spin_lock_init(&ci->i_unsafe_lock);
>  
> +	xa_init(&ci->i_opened_files);
> +
>  	ci->i_snap_realm = NULL;
>  	INIT_LIST_HEAD(&ci->i_snap_realm_item);
>  	INIT_LIST_HEAD(&ci->i_snap_flush_item);
> @@ -637,6 +639,8 @@ void ceph_free_inode(struct inode *inode)
>  {
>  	struct ceph_inode_info *ci = ceph_inode(inode);
>  
> +	xa_destroy(&ci->i_opened_files);
> +
>  	kfree(ci->i_symlink);
>  #ifdef CONFIG_FS_ENCRYPTION
>  	kfree(ci->fscrypt_auth);
> diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
> index d8385dd0076e..a176a30badd0 100644
> --- a/fs/ceph/locks.c
> +++ b/fs/ceph/locks.c
> @@ -42,9 +42,10 @@ static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
>  
>  static void ceph_fl_release_lock(struct file_lock *fl)
>  {
> -	struct ceph_file_info *fi = fl->fl_file->private_data;
>  	struct inode *inode = fl->fl_u.ceph_fl.fl_inode;
>  	struct ceph_inode_info *ci;
> +	struct ceph_file_info *fi;
> +	void *val;
>  
>  	/*
>  	 * If inode is NULL it should be a request file_lock,
> @@ -54,6 +55,20 @@ static void ceph_fl_release_lock(struct file_lock *fl)
>  		return;
>  
>  	ci = ceph_inode(inode);
> +
> +	/*
> +	 * For Posix-style locks, it may race between filp_close()s,
> +	 * and it's possible that the 'file' memory pointed by
> +	 * 'fl->fl_file' has been released. If so just skip it.
> +	 */
> +	rcu_read_lock();
> +	val = xa_load(&ci->i_opened_files, (unsigned long)fl->fl_file);
> +	if (val == CEPH_FILP_AVAILABLE) {
> +		fi = fl->fl_file->private_data;
> +		atomic_dec(&fi->num_locks);

Don't you need to remove the old atomic_dec from this function if you
move it here?

> +	}
> +	rcu_read_unlock();
> +
>  	if (atomic_dec_and_test(&ci->i_filelock_ref)) {
>  		/* clear error when all locks are released */
>  		spin_lock(&ci->i_ceph_lock);
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index 7b75a84ba48d..b3e89192cbec 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -329,6 +329,8 @@ struct ceph_inode_xattrs_info {
>  	u64 version, index_version;
>  };
>  
> +#define CEPH_FILP_AVAILABLE         xa_mk_value(1)
> +
>  /*
>   * Ceph inode.
>   */
> @@ -434,6 +436,8 @@ struct ceph_inode_info {
>  	struct list_head i_unsafe_iops;   /* uncommitted mds inode ops */
>  	spinlock_t i_unsafe_lock;
>  
> +	struct xarray		i_opened_files;
> +
>  	union {
>  		struct ceph_snap_realm *i_snap_realm; /* snap realm (if caps) */
>  		struct ceph_snapid_map *i_snapid_map; /* snapid -> dev_t */

This looks like it'll work, but it's a lot of extra work, having to
track this extra xarray just on the off chance that one of these fd's
might have file locks. The num_locks field is only checked in one place
in ceph_get_caps.

Here's what I'd recommend instead:

Have ceph_get_caps look at the lists in inode->i_flctx and see whether
any of its locks have an fl_file that matches the @filp argument in that
function. Most inodes never get any file locks, so in most cases  this
will turn out to just be a NULL pointer check for i_flctx anyway.

Then you can just remove the num_locks field and call the new helper
from ceph_get_caps instead. I'll send along a proposed patch for the
helper in a bit.
-- 
Jeff Layton <jlayton@xxxxxxxxxx>




[Index of Archives]     [CEPH Users]     [Ceph Large]     [Ceph Dev]     [Information on CEPH]     [Linux BTRFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux