On Fri, 2020-02-21 at 21:16 +0800, Yan, Zheng wrote: > original code only renews caps for inodes with CEPH_I_CAP_DROPPED flags. > The flag indicates that mds closed session and caps were dropped. This > patch is preparation for not requesting caps for idle open files. > > CEPH_I_CAP_DROPPED is no longer tested by anyone, so this patch also > remove it. > > Signed-off-by: "Yan, Zheng" <zyan@xxxxxxxxxx> > --- > fs/ceph/caps.c | 36 +++++++++++++++--------------------- > fs/ceph/mds_client.c | 5 ----- > fs/ceph/super.h | 11 +++++------ > 3 files changed, 20 insertions(+), 32 deletions(-) > > diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c > index d05717397c2a..293920d013ff 100644 > --- a/fs/ceph/caps.c > +++ b/fs/ceph/caps.c > @@ -2659,6 +2659,7 @@ static int try_get_cap_refs(struct inode *inode, int need, int want, > } > } else { > int session_readonly = false; > + int mds_wanted; > if (ci->i_auth_cap && > (need & (CEPH_CAP_FILE_WR | CEPH_CAP_FILE_EXCL))) { > struct ceph_mds_session *s = ci->i_auth_cap->session; > @@ -2667,32 +2668,27 @@ static int try_get_cap_refs(struct inode *inode, int need, int want, > spin_unlock(&s->s_cap_lock); > } > if (session_readonly) { > - dout("get_cap_refs %p needed %s but mds%d readonly\n", > + dout("get_cap_refs %p need %s but mds%d readonly\n", > inode, ceph_cap_string(need), ci->i_auth_cap->mds); > ret = -EROFS; > goto out_unlock; > } > > - if (ci->i_ceph_flags & CEPH_I_CAP_DROPPED) { > - int mds_wanted; > - if (READ_ONCE(mdsc->fsc->mount_state) == > - CEPH_MOUNT_SHUTDOWN) { > - dout("get_cap_refs %p forced umount\n", inode); > - ret = -EIO; > - goto out_unlock; > - } > - mds_wanted = __ceph_caps_mds_wanted(ci, false); > - if (need & ~(mds_wanted & need)) { > - dout("get_cap_refs %p caps were dropped" > - " (session killed?)\n", inode); > - ret = -ESTALE; > - goto out_unlock; > - } > - if (!(file_wanted & ~mds_wanted)) > - ci->i_ceph_flags &= ~CEPH_I_CAP_DROPPED; > + if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { > + dout("get_cap_refs %p forced umount\n", inode); > + ret = -EIO; > + goto out_unlock; > + } > + mds_wanted = __ceph_caps_mds_wanted(ci, false); > + if (need & ~mds_wanted) { > + dout("get_cap_refs %p need %s > mds_wanted %s\n", > + inode, ceph_cap_string(need), > + ceph_cap_string(mds_wanted)); > + ret = -ESTALE; > + goto out_unlock; > } > I was able to reproduce softlockups with xfstests reliably for a little while, but it doesn't always happen. I bisected it down to this patch though. I suspect the problem is in the hunk above. It looks like it's getting into a situation where this is continually returning ESTALE. I cranked up debug logging in this function and I see this: [ 259.284839] ceph: get_cap_refs 000000003d7d65fa ret -116 got - [ 259.284848] ceph: get_cap_refs 000000003d7d65fa ret -116 got - [ 259.284855] ceph: get_cap_refs 000000003d7d65fa ret -116 got - [ 259.284863] ceph: get_cap_refs 000000003d7d65fa need Fr want Fc [ 259.284868] ceph: get_cap_refs 000000003d7d65fa need Fr > mds_wanted - [ 259.284877] ceph: get_cap_refs 000000003d7d65fa need Fr want Fc [ 259.284885] ceph: get_cap_refs 000000003d7d65fa need Fr want Fc [ 259.284890] ceph: get_cap_refs 000000003d7d65fa need Fr want Fc [ 259.284899] ceph: get_cap_refs 000000003d7d65fa ret -116 got - [ 259.284908] ceph: get_cap_refs 000000003d7d65fa need Fr > mds_wanted - [ 259.284918] ceph: get_cap_refs 000000003d7d65fa need Fr want Fc [ 259.284926] ceph: get_cap_refs 000000003d7d65fa need Fr want Fc [ 259.284945] ceph: get_cap_refs 000000003d7d65fa need Fr > mds_wanted - [ 259.284950] ceph: get_cap_refs 000000003d7d65fa need Fr want Fc [ 259.284961] ceph: get_cap_refs 000000003d7d65fa need Fr want Fc [ 259.284969] ceph: get_cap_refs 000000003d7d65fa need Fr want Fc [ 259.284975] ceph: get_cap_refs 000000003d7d65fa ret -116 got - [ 259.284984] ceph: get_cap_refs 000000003d7d65fa need Fr > mds_wanted - [ 259.284994] ceph: get_cap_refs 000000003d7d65fa ret -116 got - [ 259.285003] ceph: get_cap_refs 000000003d7d65fa need Fr want Fc ...not sure I understand the logical flow in this function well enough to suggest a fix yet though. > - dout("get_cap_refs %p have %s needed %s\n", inode, > + dout("get_cap_refs %p have %s need %s\n", inode, > ceph_cap_string(have), ceph_cap_string(need)); > } > out_unlock: > @@ -3646,8 +3642,6 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, > goto out_unlock; > > if (target < 0) { > - if (cap->mds_wanted | cap->issued) > - ci->i_ceph_flags |= CEPH_I_CAP_DROPPED; > __ceph_remove_cap(cap, false); > goto out_unlock; > } > diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c > index fab9d6461a65..98d746b3bb53 100644 > --- a/fs/ceph/mds_client.c > +++ b/fs/ceph/mds_client.c > @@ -1411,8 +1411,6 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, > dout("removing cap %p, ci is %p, inode is %p\n", > cap, ci, &ci->vfs_inode); > spin_lock(&ci->i_ceph_lock); > - if (cap->mds_wanted | cap->issued) > - ci->i_ceph_flags |= CEPH_I_CAP_DROPPED; > __ceph_remove_cap(cap, false); > if (!ci->i_auth_cap) { > struct ceph_cap_flush *cf; > @@ -1578,9 +1576,6 @@ static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap, > /* mds did not re-issue stale cap */ > spin_lock(&ci->i_ceph_lock); > cap->issued = cap->implemented = CEPH_CAP_PIN; > - /* make sure mds knows what we want */ > - if (__ceph_caps_file_wanted(ci) & ~cap->mds_wanted) > - ci->i_ceph_flags |= CEPH_I_CAP_DROPPED; > spin_unlock(&ci->i_ceph_lock); > } > } else if (ev == FORCE_RO) { > diff --git a/fs/ceph/super.h b/fs/ceph/super.h > index 37dc1ac8f6c3..48e84d7f48a0 100644 > --- a/fs/ceph/super.h > +++ b/fs/ceph/super.h > @@ -517,12 +517,11 @@ static inline struct inode *ceph_find_inode(struct super_block *sb, > #define CEPH_I_POOL_RD (1 << 4) /* can read from pool */ > #define CEPH_I_POOL_WR (1 << 5) /* can write to pool */ > #define CEPH_I_SEC_INITED (1 << 6) /* security initialized */ > -#define CEPH_I_CAP_DROPPED (1 << 7) /* caps were forcibly dropped */ > -#define CEPH_I_KICK_FLUSH (1 << 8) /* kick flushing caps */ > -#define CEPH_I_FLUSH_SNAPS (1 << 9) /* need flush snapss */ > -#define CEPH_I_ERROR_WRITE (1 << 10) /* have seen write errors */ > -#define CEPH_I_ERROR_FILELOCK (1 << 11) /* have seen file lock errors */ > -#define CEPH_I_ODIRECT (1 << 12) /* inode in direct I/O mode */ > +#define CEPH_I_KICK_FLUSH (1 << 7) /* kick flushing caps */ > +#define CEPH_I_FLUSH_SNAPS (1 << 8) /* need flush snapss */ > +#define CEPH_I_ERROR_WRITE (1 << 9) /* have seen write errors */ > +#define CEPH_I_ERROR_FILELOCK (1 << 10) /* have seen file lock errors */ > +#define CEPH_I_ODIRECT (1 << 11) /* inode in direct I/O mode */ > > /* > * Masks of ceph inode work. -- Jeff Layton <jlayton@xxxxxxxxxx>