Call __ceph_touch_fmode() for dir operations. __ceph_caps_file_wanted() calculate dir's wanted caps according to last dir read/modification. If there is recent dir read, dir inode wants CEPH_CAP_ANY_SHARED caps, if there is recent dir modification, also wants CEPH_CAP_FILE_EXCL. Readdir is a special, dir inode wants CEPH_CAP_FILE_EXCL after readdir. Because if dir indoe has CEPH_CAP_FILE_EXCL, later dir modifications do not need to release CEPH_CAP_FILE_SHARED, invalidate all dentry leases issued by readdir. Signed-off-by: "Yan, Zheng" <zyan@xxxxxxxxxx> --- fs/ceph/caps.c | 30 ++++++++++++++++++++++-------- fs/ceph/dir.c | 21 +++++++++++++++------ fs/ceph/mds_client.c | 11 +++++++++-- 3 files changed, 46 insertions(+), 16 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 622568cd6d8a..4ff832edd9dd 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -978,12 +978,29 @@ int __ceph_caps_file_wanted(struct ceph_inode_info *ci) ceph_inode_to_client(&ci->vfs_inode)->mount_options; unsigned long used_cutoff = jiffies - opt->caps_wanted_delay_max * HZ; unsigned long idle_cutoff = jiffies - opt->caps_wanted_delay_min * HZ; - int bits = 0; if (S_ISDIR(ci->vfs_inode.i_mode)) { - if (ci->i_nr_by_mode[PIN_SHIFT] > 0) - bits |= 1 << PIN_SHIFT; + int want = 0; + + /* use used_cutoff here, to keep dir's wanted caps longer */ + if (ci->i_nr_by_mode[RD_SHIFT] > 0 || + time_after(ci->i_last_rd, used_cutoff)) + want |= CEPH_CAP_ANY_SHARED; + + if (ci->i_nr_by_mode[WR_SHIFT] > 0 || + time_after(ci->i_last_wr, used_cutoff)) { + want |= CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_EXCL; + if (opt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS) + want |= CEPH_CAP_ANY_DIR_OPS; + } + + if (want || ci->i_nr_by_mode[PIN_SHIFT] > 0) + want |= CEPH_CAP_PIN; + + return want; } else { + int bits = 0; + if (ci->i_nr_by_mode[RD_SHIFT] > 0) { if (ci->i_nr_by_mode[RD_SHIFT] >= FMODE_WAIT_BIAS || time_after(ci->i_last_rd, used_cutoff)) @@ -1005,9 +1022,8 @@ int __ceph_caps_file_wanted(struct ceph_inode_info *ci) ci->i_nr_by_mode[LAZY_SHIFT] > 0) bits |= 1 << LAZY_SHIFT; + return bits ? ceph_caps_for_mode(bits >> 1) : 0; } - - return bits ? ceph_caps_for_mode(bits >> 1) : 0; } /* @@ -1888,9 +1904,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags, if (IS_RDONLY(inode)) { want = CEPH_CAP_ANY_SHARED; } else { - want = CEPH_CAP_ANY_SHARED | - CEPH_CAP_FILE_EXCL | - CEPH_CAP_ANY_DIR_OPS; + want |= CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_EXCL; } retain |= want; } else { diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index ee6b319e5481..d594c2627430 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -335,8 +335,11 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) ctx->pos = 2; } - /* can we use the dcache? */ spin_lock(&ci->i_ceph_lock); + /* request Fx cap. if have Fx, we don't need to release Fs cap + * for later create/unlink. */ + __ceph_touch_fmode(ci, mdsc, CEPH_FILE_MODE_WR); + /* can we use the dcache? */ if (ceph_test_mount_opt(fsc, DCACHE) && !ceph_test_mount_opt(fsc, NOASYNCREADDIR) && ceph_snap(inode) != CEPH_SNAPDIR && @@ -760,6 +763,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, ceph_test_mount_opt(fsc, DCACHE) && __ceph_dir_is_complete(ci) && (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) { + __ceph_touch_fmode(ci, mdsc, CEPH_FILE_MODE_RD); spin_unlock(&ci->i_ceph_lock); dout(" dir %p complete, -ENOENT\n", dir); d_add(dentry, NULL); @@ -1621,7 +1625,8 @@ static int __dir_lease_try_check(const struct dentry *dentry) /* * Check if directory-wide content lease/cap is valid. */ -static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry) +static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry, + struct ceph_mds_client *mdsc) { struct ceph_inode_info *ci = ceph_inode(dir); int valid; @@ -1629,7 +1634,10 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry) spin_lock(&ci->i_ceph_lock); valid = __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1); - shared_gen = atomic_read(&ci->i_shared_gen); + if (valid) { + __ceph_touch_fmode(ci, mdsc, CEPH_FILE_MODE_RD); + shared_gen = atomic_read(&ci->i_shared_gen); + } spin_unlock(&ci->i_ceph_lock); if (valid) { struct ceph_dentry_info *di; @@ -1655,6 +1663,7 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags) int valid = 0; struct dentry *parent; struct inode *dir, *inode; + struct ceph_mds_client *mdsc; if (flags & LOOKUP_RCU) { parent = READ_ONCE(dentry->d_parent); @@ -1671,6 +1680,8 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags) dout("d_revalidate %p '%pd' inode %p offset 0x%llx\n", dentry, dentry, inode, ceph_dentry(dentry)->offset); + mdsc = ceph_sb_to_client(dir->i_sb)->mdsc; + /* always trust cached snapped dentries, snapdir dentry */ if (ceph_snap(dir) != CEPH_NOSNAP) { dout("d_revalidate %p '%pd' inode %p is SNAPPED\n", dentry, @@ -1682,7 +1693,7 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags) valid = dentry_lease_is_valid(dentry, flags); if (valid == -ECHILD) return valid; - if (valid || dir_lease_is_valid(dir, dentry)) { + if (valid || dir_lease_is_valid(dir, dentry, mdsc)) { if (inode) valid = ceph_is_any_caps(inode); else @@ -1691,8 +1702,6 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags) } if (!valid) { - struct ceph_mds_client *mdsc = - ceph_sb_to_client(dir->i_sb)->mdsc; struct ceph_mds_request *req; int op, err; u32 mask; diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 68b8afded466..486f91f9685b 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2486,7 +2486,8 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, if (req->r_inode_drop) releases += ceph_encode_inode_release(&p, req->r_inode ? req->r_inode : d_inode(req->r_dentry), - mds, req->r_inode_drop, req->r_inode_unless, 0); + mds, req->r_inode_drop, req->r_inode_unless, + req->r_op == CEPH_MDS_OP_READDIR); if (req->r_dentry_drop) releases += ceph_encode_dentry_release(&p, req->r_dentry, req->r_parent, mds, req->r_dentry_drop, @@ -2833,7 +2834,13 @@ int ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, struct inode *dir, if (req->r_inode) ceph_get_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN); if (req->r_parent) { - ceph_get_cap_refs(ceph_inode(req->r_parent), CEPH_CAP_PIN); + struct ceph_inode_info *ci = ceph_inode(req->r_parent); + int fmode = (req->r_op & CEPH_MDS_OP_WRITE) ? + CEPH_FILE_MODE_WR : CEPH_FILE_MODE_RD; + spin_lock(&ci->i_ceph_lock); + ceph_take_cap_refs(ci, CEPH_CAP_PIN, false); + __ceph_touch_fmode(ci, mdsc, fmode); + spin_unlock(&ci->i_ceph_lock); ihold(req->r_parent); } if (req->r_old_dentry_dir) -- 2.21.1