From: Xiubo Li <xiubli@xxxxxxxxxx> This will fulfill the caps hit/miss metric for each session. When checking the "need" mask and if one cap has the subset of the "need" mask it means hit, or missed. item total miss hit ------------------------------------------------- d_lease 295 0 993 session caps miss hit ------------------------------------------------- 0 295 107 4119 1 1 107 9 URL: https://tracker.ceph.com/issues/43215 Signed-off-by: Xiubo Li <xiubli@xxxxxxxxxx> --- fs/ceph/acl.c | 2 ++ fs/ceph/addr.c | 2 ++ fs/ceph/caps.c | 74 ++++++++++++++++++++++++++++++++++++++++++++ fs/ceph/debugfs.c | 20 ++++++++++++ fs/ceph/dir.c | 9 ++++-- fs/ceph/file.c | 3 ++ fs/ceph/mds_client.c | 16 +++++++++- fs/ceph/mds_client.h | 3 ++ fs/ceph/quota.c | 9 ++++-- fs/ceph/super.h | 11 +++++++ fs/ceph/xattr.c | 17 ++++++++-- 11 files changed, 158 insertions(+), 8 deletions(-) diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c index 26be6520d3fb..58e119e3519f 100644 --- a/fs/ceph/acl.c +++ b/fs/ceph/acl.c @@ -22,6 +22,8 @@ static inline void ceph_set_cached_acl(struct inode *inode, struct ceph_inode_info *ci = ceph_inode(inode); spin_lock(&ci->i_ceph_lock); + __ceph_caps_metric(ci, CEPH_CAP_XATTR_SHARED); + if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 0)) set_cached_acl(inode, type, acl); else diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 7ab616601141..29d4513eff8c 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1706,6 +1706,8 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page) err = -ENOMEM; goto out; } + + ceph_caps_metric(ci, CEPH_STAT_CAP_INLINE_DATA); err = __ceph_do_getattr(inode, page, CEPH_STAT_CAP_INLINE_DATA, true); if (err < 0) { diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 7fc87b693ba4..af2e9e826f8c 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -783,6 +783,75 @@ static int __cap_is_valid(struct ceph_cap *cap) return 1; } +/* + * Counts the cap metric. + */ +void __ceph_caps_metric(struct ceph_inode_info *ci, int mask) +{ + int have = ci->i_snap_caps; + struct ceph_mds_session *s; + struct ceph_cap *cap; + struct rb_node *p; + bool skip_auth = false; + + lockdep_assert_held(&ci->i_ceph_lock); + + if (mask <= 0) + return; + + /* Counts the snap caps metric in the auth cap */ + if (ci->i_auth_cap) { + cap = ci->i_auth_cap; + if (have) { + have |= cap->issued; + + dout("%s %p cap %p issued %s, mask %s\n", __func__, + &ci->vfs_inode, cap, ceph_cap_string(cap->issued), + ceph_cap_string(mask)); + + s = ceph_get_mds_session(cap->session); + if (s) { + if (mask & have) + percpu_counter_inc(&s->i_caps_hit); + else + percpu_counter_inc(&s->i_caps_mis); + ceph_put_mds_session(s); + } + skip_auth = true; + } + } + + if ((mask & have) == mask) + return; + + /* Checks others */ + for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) { + cap = rb_entry(p, struct ceph_cap, ci_node); + if (!__cap_is_valid(cap)) + continue; + + if (skip_auth && cap == ci->i_auth_cap) + continue; + + dout("%s %p cap %p issued %s, mask %s\n", __func__, + &ci->vfs_inode, cap, ceph_cap_string(cap->issued), + ceph_cap_string(mask)); + + s = ceph_get_mds_session(cap->session); + if (s) { + if (mask & cap->issued) + percpu_counter_inc(&s->i_caps_hit); + else + percpu_counter_inc(&s->i_caps_mis); + ceph_put_mds_session(s); + } + + have |= cap->issued; + if ((mask & have) == mask) + return; + } +} + /* * Return set of valid cap bits issued to us. Note that caps time * out, and may be invalidated in bulk if the client session times out @@ -2746,6 +2815,7 @@ static void check_max_size(struct inode *inode, loff_t endoff) int ceph_try_get_caps(struct inode *inode, int need, int want, bool nonblock, int *got) { + struct ceph_inode_info *ci = ceph_inode(inode); int ret; BUG_ON(need & ~CEPH_CAP_FILE_RD); @@ -2758,6 +2828,7 @@ int ceph_try_get_caps(struct inode *inode, int need, int want, BUG_ON(want & ~(CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO | CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL | CEPH_CAP_ANY_DIR_OPS)); + ceph_caps_metric(ci, need | want); ret = try_get_cap_refs(inode, need, want, 0, nonblock, got); return ret == -EAGAIN ? 0 : ret; } @@ -2784,6 +2855,8 @@ int ceph_get_caps(struct file *filp, int need, int want, fi->filp_gen != READ_ONCE(fsc->filp_gen)) return -EBADF; + ceph_caps_metric(ci, need | want); + while (true) { if (endoff > 0) check_max_size(inode, endoff); @@ -2871,6 +2944,7 @@ int ceph_get_caps(struct file *filp, int need, int want, * getattr request will bring inline data into * page cache */ + ceph_caps_metric(ci, CEPH_STAT_CAP_INLINE_DATA); ret = __ceph_do_getattr(inode, NULL, CEPH_STAT_CAP_INLINE_DATA, true); diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 40a22da0214a..c132fdb40d53 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -128,6 +128,7 @@ static int metric_show(struct seq_file *s, void *p) { struct ceph_fs_client *fsc = s->private; struct ceph_mds_client *mdsc = fsc->mdsc; + int i; seq_printf(s, "item total miss hit\n"); seq_printf(s, "-------------------------------------------------\n"); @@ -137,6 +138,25 @@ static int metric_show(struct seq_file *s, void *p) percpu_counter_sum(&mdsc->metric.d_lease_mis), percpu_counter_sum(&mdsc->metric.d_lease_hit)); + seq_printf(s, "\n"); + seq_printf(s, "session caps miss hit\n"); + seq_printf(s, "-------------------------------------------------\n"); + + mutex_lock(&mdsc->mutex); + for (i = 0; i < mdsc->max_sessions; i++) { + struct ceph_mds_session *session; + + session = __ceph_lookup_mds_session(mdsc, i); + if (!session) + continue; + seq_printf(s, "%-14d%-16d%-16lld%lld\n", i, + session->s_nr_caps, + percpu_counter_sum(&session->i_caps_mis), + percpu_counter_sum(&session->i_caps_hit)); + ceph_put_mds_session(session); + } + mutex_unlock(&mdsc->mutex); + return 0; } diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 658c55b323cc..33eb239e09e2 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -313,7 +313,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_mds_client *mdsc = fsc->mdsc; int i; - int err; + int err, ret = -1; unsigned frag = -1; struct ceph_mds_reply_info_parsed *rinfo; @@ -346,13 +346,16 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) !ceph_test_mount_opt(fsc, NOASYNCREADDIR) && ceph_snap(inode) != CEPH_SNAPDIR && __ceph_dir_is_complete_ordered(ci) && - __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) { + (ret = __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) { int shared_gen = atomic_read(&ci->i_shared_gen); + __ceph_caps_metric(ci, CEPH_CAP_XATTR_SHARED); spin_unlock(&ci->i_ceph_lock); err = __dcache_readdir(file, ctx, shared_gen); if (err != -EAGAIN) return err; } else { + if (ret != -1) + __ceph_caps_metric(ci, CEPH_CAP_XATTR_SHARED); spin_unlock(&ci->i_ceph_lock); } @@ -757,6 +760,8 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, struct ceph_dentry_info *di = ceph_dentry(dentry); spin_lock(&ci->i_ceph_lock); + __ceph_caps_metric(ci, CEPH_CAP_FILE_SHARED); + dout(" dir %p flags are %d\n", dir, ci->i_ceph_flags); if (strncmp(dentry->d_name.name, fsc->mount_options->snapdir_name, diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 1e6cdf2dfe90..c78dfbbb7b91 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -384,6 +384,8 @@ int ceph_open(struct inode *inode, struct file *file) * asynchronously. */ spin_lock(&ci->i_ceph_lock); + __ceph_caps_metric(ci, wanted); + if (__ceph_is_any_real_caps(ci) && (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) { int mds_wanted = __ceph_caps_mds_wanted(ci, true); @@ -1340,6 +1342,7 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to) return -ENOMEM; } + ceph_caps_metric(ci, CEPH_STAT_CAP_INLINE_DATA); statret = __ceph_do_getattr(inode, page, CEPH_STAT_CAP_INLINE_DATA, !!page); if (statret < 0) { diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index a24fd00676b8..141c1c03636c 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -558,6 +558,8 @@ void ceph_put_mds_session(struct ceph_mds_session *s) if (refcount_dec_and_test(&s->s_ref)) { if (s->s_auth.authorizer) ceph_auth_destroy_authorizer(s->s_auth.authorizer); + percpu_counter_destroy(&s->i_caps_hit); + percpu_counter_destroy(&s->i_caps_mis); kfree(s); } } @@ -598,6 +600,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, int mds) { struct ceph_mds_session *s; + int err; if (mds >= mdsc->mdsmap->possible_max_rank) return ERR_PTR(-EINVAL); @@ -612,8 +615,10 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, dout("%s: realloc to %d\n", __func__, newmax); sa = kcalloc(newmax, sizeof(void *), GFP_NOFS); - if (!sa) + if (!sa) { + err = -ENOMEM; goto fail_realloc; + } if (mdsc->sessions) { memcpy(sa, mdsc->sessions, mdsc->max_sessions * sizeof(void *)); @@ -653,6 +658,13 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, INIT_LIST_HEAD(&s->s_cap_flushing); + err = percpu_counter_init(&s->i_caps_hit, 0, GFP_NOFS); + if (err) + goto fail_realloc; + err = percpu_counter_init(&s->i_caps_mis, 0, GFP_NOFS); + if (err) + goto fail_init; + mdsc->sessions[mds] = s; atomic_inc(&mdsc->num_sessions); refcount_inc(&s->s_ref); /* one ref to sessions[], one to caller */ @@ -662,6 +674,8 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, return s; +fail_init: + percpu_counter_destroy(&s->i_caps_hit); fail_realloc: kfree(s); return ERR_PTR(-ENOMEM); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index dd1f417b90eb..ba74ff74c59c 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -201,6 +201,9 @@ struct ceph_mds_session { struct list_head s_waiting; /* waiting requests */ struct list_head s_unsafe; /* unsafe requests */ + + struct percpu_counter i_caps_hit; + struct percpu_counter i_caps_mis; }; /* diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c index de56dee60540..4ce2f658e63d 100644 --- a/fs/ceph/quota.c +++ b/fs/ceph/quota.c @@ -147,9 +147,14 @@ static struct inode *lookup_quotarealm_inode(struct ceph_mds_client *mdsc, return NULL; } if (qri->inode) { + struct ceph_inode_info *ci = ceph_inode(qri->inode); + int ret; + + ceph_caps_metric(ci, CEPH_STAT_CAP_INODE); + /* get caps */ - int ret = __ceph_do_getattr(qri->inode, NULL, - CEPH_STAT_CAP_INODE, true); + ret = __ceph_do_getattr(qri->inode, NULL, + CEPH_STAT_CAP_INODE, true); if (ret >= 0) in = qri->inode; else diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 7af91628636c..3f4829222528 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -641,6 +641,14 @@ static inline bool __ceph_is_any_real_caps(struct ceph_inode_info *ci) return !RB_EMPTY_ROOT(&ci->i_caps); } +extern void __ceph_caps_metric(struct ceph_inode_info *ci, int mask); +static inline void ceph_caps_metric(struct ceph_inode_info *ci, int mask) +{ + spin_lock(&ci->i_ceph_lock); + __ceph_caps_metric(ci, mask); + spin_unlock(&ci->i_ceph_lock); +} + extern int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented); extern int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int t); extern int __ceph_caps_issued_other(struct ceph_inode_info *ci, @@ -927,6 +935,9 @@ extern int __ceph_do_getattr(struct inode *inode, struct page *locked_page, int mask, bool force); static inline int ceph_do_getattr(struct inode *inode, int mask, bool force) { + struct ceph_inode_info *ci = ceph_inode(inode); + + ceph_caps_metric(ci, mask); return __ceph_do_getattr(inode, NULL, mask, force); } extern int ceph_permission(struct inode *inode, int mask); diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index d58fa14c1f01..ebd522edb0a8 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -829,6 +829,7 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value, struct ceph_vxattr *vxattr = NULL; int req_mask; ssize_t err; + int ret = -1; /* let's see if a virtual xattr was requested */ vxattr = ceph_match_vxattr(inode, name); @@ -856,7 +857,9 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value, if (ci->i_xattrs.version == 0 || !((req_mask & CEPH_CAP_XATTR_SHARED) || - __ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1))) { + (ret = __ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1)))) { + if (ret != -1) + __ceph_caps_metric(ci, CEPH_CAP_XATTR_SHARED); spin_unlock(&ci->i_ceph_lock); /* security module gets xattr while filling trace */ @@ -871,6 +874,9 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value, if (err) return err; spin_lock(&ci->i_ceph_lock); + } else { + if (ret != -1) + __ceph_caps_metric(ci, CEPH_CAP_XATTR_SHARED); } err = __build_xattrs(inode); @@ -907,19 +913,24 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size) struct ceph_inode_info *ci = ceph_inode(inode); bool len_only = (size == 0); u32 namelen; - int err; + int err, ret = -1; spin_lock(&ci->i_ceph_lock); dout("listxattr %p ver=%lld index_ver=%lld\n", inode, ci->i_xattrs.version, ci->i_xattrs.index_version); if (ci->i_xattrs.version == 0 || - !__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1)) { + !(ret = __ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1))) { + if (ret != -1) + __ceph_caps_metric(ci, CEPH_CAP_XATTR_SHARED); spin_unlock(&ci->i_ceph_lock); err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR, true); if (err) return err; spin_lock(&ci->i_ceph_lock); + } else { + if (ret != -1) + __ceph_caps_metric(ci, CEPH_CAP_XATTR_SHARED); } err = __build_xattrs(inode); -- 2.21.0