On Mon, 2022-11-14 at 13:19 +0800, xiubli@xxxxxxxxxx wrote: > From: Xiubo Li <xiubli@xxxxxxxxxx> > > When releasing the file locks the fl->fl_file memory could be > already released by another thread in filp_close(), so we couldn't > depend on fl->fl_file to get the inode. Just use a xarray to record > the opened files for each inode. > > Cc: stable@xxxxxxxxxxxxxxx > URL: https://tracker.ceph.com/issues/57986 > Signed-off-by: Xiubo Li <xiubli@xxxxxxxxxx> > --- > fs/ceph/file.c | 9 +++++++++ > fs/ceph/inode.c | 4 ++++ > fs/ceph/locks.c | 17 ++++++++++++++++- > fs/ceph/super.h | 4 ++++ > 4 files changed, 33 insertions(+), 1 deletion(-) > > diff --git a/fs/ceph/file.c b/fs/ceph/file.c > index 85afcbbb5648..cb4a9c52df27 100644 > --- a/fs/ceph/file.c > +++ b/fs/ceph/file.c > @@ -231,6 +231,13 @@ static int ceph_init_file_info(struct inode *inode, struct file *file, > fi->flags |= CEPH_F_SYNC; > > file->private_data = fi; > + > + ret = xa_insert(&ci->i_opened_files, (unsigned long)file, > + CEPH_FILP_AVAILABLE, GFP_KERNEL); > + if (ret) { > + kmem_cache_free(ceph_file_cachep, fi); > + return ret; > + } > } > > ceph_get_fmode(ci, fmode, 1); > @@ -932,6 +939,8 @@ int ceph_release(struct inode *inode, struct file *file) > dout("release inode %p regular file %p\n", inode, file); > WARN_ON(!list_empty(&fi->rw_contexts)); > > + xa_erase(&ci->i_opened_files, (unsigned long)file); > + > ceph_fscache_unuse_cookie(inode, file->f_mode & FMODE_WRITE); > ceph_put_fmode(ci, fi->fmode, 1); > > diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c > index 77b0cd9af370..554450838e44 100644 > --- a/fs/ceph/inode.c > +++ b/fs/ceph/inode.c > @@ -619,6 +619,8 @@ struct inode *ceph_alloc_inode(struct super_block *sb) > INIT_LIST_HEAD(&ci->i_unsafe_iops); > spin_lock_init(&ci->i_unsafe_lock); > > + xa_init(&ci->i_opened_files); > + > ci->i_snap_realm = NULL; > INIT_LIST_HEAD(&ci->i_snap_realm_item); > INIT_LIST_HEAD(&ci->i_snap_flush_item); > @@ -637,6 +639,8 @@ void ceph_free_inode(struct inode *inode) > { > struct ceph_inode_info *ci = ceph_inode(inode); > > + xa_destroy(&ci->i_opened_files); > + > kfree(ci->i_symlink); > #ifdef CONFIG_FS_ENCRYPTION > kfree(ci->fscrypt_auth); > diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c > index d8385dd0076e..a176a30badd0 100644 > --- a/fs/ceph/locks.c > +++ b/fs/ceph/locks.c > @@ -42,9 +42,10 @@ static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src) > > static void ceph_fl_release_lock(struct file_lock *fl) > { > - struct ceph_file_info *fi = fl->fl_file->private_data; > struct inode *inode = fl->fl_u.ceph_fl.fl_inode; > struct ceph_inode_info *ci; > + struct ceph_file_info *fi; > + void *val; > > /* > * If inode is NULL it should be a request file_lock, > @@ -54,6 +55,20 @@ static void ceph_fl_release_lock(struct file_lock *fl) > return; > > ci = ceph_inode(inode); > + > + /* > + * For Posix-style locks, it may race between filp_close()s, > + * and it's possible that the 'file' memory pointed by > + * 'fl->fl_file' has been released. If so just skip it. > + */ > + rcu_read_lock(); > + val = xa_load(&ci->i_opened_files, (unsigned long)fl->fl_file); > + if (val == CEPH_FILP_AVAILABLE) { > + fi = fl->fl_file->private_data; > + atomic_dec(&fi->num_locks); Don't you need to remove the old atomic_dec from this function if you move it here? > + } > + rcu_read_unlock(); > + > if (atomic_dec_and_test(&ci->i_filelock_ref)) { > /* clear error when all locks are released */ > spin_lock(&ci->i_ceph_lock); > diff --git a/fs/ceph/super.h b/fs/ceph/super.h > index 7b75a84ba48d..b3e89192cbec 100644 > --- a/fs/ceph/super.h > +++ b/fs/ceph/super.h > @@ -329,6 +329,8 @@ struct ceph_inode_xattrs_info { > u64 version, index_version; > }; > > +#define CEPH_FILP_AVAILABLE xa_mk_value(1) > + > /* > * Ceph inode. > */ > @@ -434,6 +436,8 @@ struct ceph_inode_info { > struct list_head i_unsafe_iops; /* uncommitted mds inode ops */ > spinlock_t i_unsafe_lock; > > + struct xarray i_opened_files; > + > union { > struct ceph_snap_realm *i_snap_realm; /* snap realm (if caps) */ > struct ceph_snapid_map *i_snapid_map; /* snapid -> dev_t */ This looks like it'll work, but it's a lot of extra work, having to track this extra xarray just on the off chance that one of these fd's might have file locks. The num_locks field is only checked in one place in ceph_get_caps. Here's what I'd recommend instead: Have ceph_get_caps look at the lists in inode->i_flctx and see whether any of its locks have an fl_file that matches the @filp argument in that function. Most inodes never get any file locks, so in most cases this will turn out to just be a NULL pointer check for i_flctx anyway. Then you can just remove the num_locks field and call the new helper from ceph_get_caps instead. I'll send along a proposed patch for the helper in a bit. -- Jeff Layton <jlayton@xxxxxxxxxx>