On Mon, 2022-12-12 at 18:56 +0100, Ilya Dryomov wrote: > On Fri, Nov 18, 2022 at 3:07 AM <xiubli@xxxxxxxxxx> wrote: > > > > From: Xiubo Li <xiubli@xxxxxxxxxx> > > > > When ceph releasing the file_lock it will try to get the inode pointer > > from the fl->fl_file, which the memory could already be released by > > another thread in filp_close(). Because in VFS layer the fl->fl_file > > doesn't increase the file's reference counter. > > > > Will switch to use ceph dedicate lock info to track the inode. > > > > And in ceph_fl_release_lock() we should skip all the operations if > > the fl->fl_u.ceph_fl.fl_inode is not set, which should come from > > the request file_lock. And we will set fl->fl_u.ceph_fl.fl_inode when > > inserting it to the inode lock list, which is when copying the lock. > > > > Cc: stable@xxxxxxxxxxxxxxx > > Cc: Jeff Layton <jlayton@xxxxxxxxxx> > > URL: https://tracker.ceph.com/issues/57986 > > Signed-off-by: Xiubo Li <xiubli@xxxxxxxxxx> > > --- > > fs/ceph/locks.c | 20 ++++++++++++++++++-- > > include/linux/ceph/ceph_fs_fl.h | 17 +++++++++++++++++ > > include/linux/fs.h | 2 ++ > > 3 files changed, 37 insertions(+), 2 deletions(-) > > create mode 100644 include/linux/ceph/ceph_fs_fl.h > > > > diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c > > index b191426bf880..621f38f10a88 100644 > > --- a/fs/ceph/locks.c > > +++ b/fs/ceph/locks.c > > @@ -34,18 +34,34 @@ static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src) > > { > > struct inode *inode = file_inode(dst->fl_file); > > atomic_inc(&ceph_inode(inode)->i_filelock_ref); > > + dst->fl_u.ceph_fl.fl_inode = igrab(inode); > > } > > > > +/* > > + * Do not use the 'fl->fl_file' in release function, which > > + * is possibly already released by another thread. > > + */ > > static void ceph_fl_release_lock(struct file_lock *fl) > > { > > - struct inode *inode = file_inode(fl->fl_file); > > - struct ceph_inode_info *ci = ceph_inode(inode); > > + struct inode *inode = fl->fl_u.ceph_fl.fl_inode; > > + struct ceph_inode_info *ci; > > + > > + /* > > + * If inode is NULL it should be a request file_lock, > > + * nothing we can do. > > + */ > > + if (!inode) > > + return; > > + > > + ci = ceph_inode(inode); > > if (atomic_dec_and_test(&ci->i_filelock_ref)) { > > /* clear error when all locks are released */ > > spin_lock(&ci->i_ceph_lock); > > ci->i_ceph_flags &= ~CEPH_I_ERROR_FILELOCK; > > spin_unlock(&ci->i_ceph_lock); > > } > > + fl->fl_u.ceph_fl.fl_inode = NULL; > > + iput(inode); > > } > > > > static const struct file_lock_operations ceph_fl_lock_ops = { > > diff --git a/include/linux/ceph/ceph_fs_fl.h b/include/linux/ceph/ceph_fs_fl.h > > new file mode 100644 > > index 000000000000..ad1cf96329f9 > > --- /dev/null > > +++ b/include/linux/ceph/ceph_fs_fl.h > > @@ -0,0 +1,17 @@ > > +/* SPDX-License-Identifier: GPL-2.0 */ > > +/* > > + * ceph_fs_fl.h - Ceph lock info > > + * > > + * LGPL2 > > + */ > > + > > +#ifndef CEPH_FS_FL_H > > +#define CEPH_FS_FL_H > > + > > +#include <linux/fs.h> > > + > > +struct ceph_lock_info { > > + struct inode *fl_inode; > > +}; > > + > > +#endif > > diff --git a/include/linux/fs.h b/include/linux/fs.h > > index d6cb42b7e91c..2b03d5e375d7 100644 > > --- a/include/linux/fs.h > > +++ b/include/linux/fs.h > > @@ -1066,6 +1066,7 @@ bool opens_in_grace(struct net *); > > > > /* that will die - we need it for nfs_lock_info */ > > #include <linux/nfs_fs_i.h> > > +#include <linux/ceph/ceph_fs_fl.h> > > > > /* > > * struct file_lock represents a generic "file lock". It's used to represent > > @@ -1119,6 +1120,7 @@ struct file_lock { > > int state; /* state of grant or error if -ve */ > > unsigned int debug_id; > > } afs; > > + struct ceph_lock_info ceph_fl; > > Hi Xiubo and Jeff, > > Xiubo, instead of defining struct ceph_lock_info and including > a CephFS-specific header file in linux/fs.h, I think we should repeat > what was done for AFS -- particularly given that ceph_lock_info ends up > being a dummy type that isn't mentioned anywhere else. > > Jeff, could you please ack this with your file locking hat on? > ACK. I think that would be cleaner. Thanks -- Jeff Layton <jlayton@xxxxxxxxxx>