On Thu, 2022-04-21 at 15:03 +0800, Xiubo Li wrote: > If any 'x' caps is issued we can just choose the auth MDS instead > of the random replica MDSes. Because only when the Locker is in > LOCK_EXEC state will the loner client could get the 'x' caps. And > if we send the getattr requests to any replica MDS it must auth pin > and tries to rdlock from the auth MDS, and then the auth MDS need > to do the Locker state transition to LOCK_SYNC. And after that the > lock state will change back. > > This cost much when doing the Locker state transition and usually > will need to revoke caps from clients. > > URL: https://tracker.ceph.com/issues/55240 > Signed-off-by: Xiubo Li <xiubli@xxxxxxxxxx> > --- > fs/ceph/addr.c | 4 +++- > fs/ceph/inode.c | 26 +++++++++++++++++++++++++- > fs/ceph/super.h | 1 + > 3 files changed, 29 insertions(+), 2 deletions(-) > > diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c > index 02722ac86d73..261bc8bb2ab8 100644 > --- a/fs/ceph/addr.c > +++ b/fs/ceph/addr.c > @@ -256,6 +256,7 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq) > struct iov_iter iter; > ssize_t err = 0; > size_t len; > + int mode; > > __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); > __clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags); > @@ -264,7 +265,8 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq) > goto out; > > /* We need to fetch the inline data. */ > - req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); > + mode = ceph_try_to_choose_auth_mds(inode, CEPH_STAT_CAP_INLINE_DATA); > + req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, mode); > if (IS_ERR(req)) { > err = PTR_ERR(req); > goto out; > diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c > index b45f321910af..d05b91391f17 100644 > --- a/fs/ceph/inode.c > +++ b/fs/ceph/inode.c > @@ -2260,6 +2260,30 @@ int ceph_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, > return err; > } > > +int ceph_try_to_choose_auth_mds(struct inode *inode, int mask) > +{ > + int issued = ceph_caps_issued(ceph_inode(inode)); > + > + /* > + * If any 'x' caps is issued we can just choose the auth MDS > + * instead of the random replica MDSes. Because only when the > + * Locker is in LOCK_EXEC state will the loner client could > + * get the 'x' caps. And if we send the getattr requests to > + * any replica MDS it must auth pin and tries to rdlock from > + * the auth MDS, and then the auth MDS need to do the Locker > + * state transition to LOCK_SYNC. And after that the lock state > + * will change back. > + * > + * This cost much when doing the Locker state transition and > + * usually will need to revoke caps from clients. > + */ > + if (((mask & CEPH_CAP_ANY_SHARED) && (issued & CEPH_CAP_ANY_EXCL)) > + || (mask & CEPH_STAT_RSTAT)) > + return USE_AUTH_MDS; > + else > + return USE_ANY_MDS; > +} > + > /* > * Verify that we have a lease on the given mask. If not, > * do a getattr against an mds. > @@ -2283,7 +2307,7 @@ int __ceph_do_getattr(struct inode *inode, struct page *locked_page, > if (!force && ceph_caps_issued_mask_metric(ceph_inode(inode), mask, 1)) > return 0; > > - mode = (mask & CEPH_STAT_RSTAT) ? USE_AUTH_MDS : USE_ANY_MDS; > + mode = ceph_try_to_choose_auth_mds(inode, mask); > req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, mode); > if (IS_ERR(req)) > return PTR_ERR(req); > diff --git a/fs/ceph/super.h b/fs/ceph/super.h > index 73db7f6021f3..669036ebef1e 100644 > --- a/fs/ceph/super.h > +++ b/fs/ceph/super.h > @@ -1024,6 +1024,7 @@ static inline void ceph_queue_flush_snaps(struct inode *inode) > ceph_queue_inode_work(inode, CEPH_I_WORK_FLUSH_SNAPS); > } > > +extern int ceph_try_to_choose_auth_mds(struct inode *inode, int mask); > extern int __ceph_do_getattr(struct inode *inode, struct page *locked_page, > int mask, bool force); > static inline int ceph_do_getattr(struct inode *inode, int mask, bool force) Nice optimization. Reviewed-by: Jeff Layton <jlayton@xxxxxxxxxx>