From: Xuehan Xu <xuxuehan@xxxxxx> Signed-off-by: Xuehan Xu <xuxuehan@xxxxxx> --- fs/ceph/mds_client.c | 34 +++++++++++++++++++++++++- fs/ceph/mds_client.h | 9 +++++++ fs/ceph/super.c | 5 +++- include/linux/ceph/osd_client.h | 11 ++++++++- include/linux/ceph/types.h | 1 - net/ceph/ceph_common.c | 2 +- net/ceph/ceph_fs.c | 1 - net/ceph/osd_client.c | 42 +++++++++++++++++++++++++++++++++ 8 files changed, 99 insertions(+), 6 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 9049c2a3e972..4ba6b4de0f64 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -20,6 +20,10 @@ #include <linux/ceph/auth.h> #include <linux/ceph/debugfs.h> +#ifdef CONFIG_CGROUP_CEPH_FS +#include <linux/cgroup_cephfs.h> +#endif + #define RECONNECT_MAX_SIZE (INT_MAX - PAGE_SIZE) /* @@ -689,6 +693,9 @@ void ceph_mdsc_release_request(struct kref *kref) struct ceph_mds_request *req = container_of(kref, struct ceph_mds_request, r_kref); +#ifdef CONFIG_CGROUP_CEPH_FS + kfree(req->qitem.tokens_requested); +#endif destroy_reply_info(&req->r_reply_info); if (req->r_request) ceph_msg_put(req->r_request); @@ -2035,6 +2042,12 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) { struct ceph_mds_request *req = kzalloc(sizeof(*req), GFP_NOFS); struct timespec64 ts; +#ifdef CONFIG_CGROUP_CEPH_FS + struct task_struct* tsk = current; + struct cgroup_subsys_state* css = tsk->cgroups->subsys[cephfs_cgrp_subsys.id]; + struct cephfscg* cephfscg_p = + container_of(css, struct cephfscg, css); +#endif if (!req) return ERR_PTR(-ENOMEM); @@ -2058,6 +2071,10 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) req->r_op = op; req->r_direct_mode = mode; + +#ifdef CONFIG_CGROUP_CEPH_FS + queue_item_init(&req->qitem, &cephfscg_p->meta_ops_throttle, META_OPS_TB_NUM); +#endif return req; } @@ -2689,9 +2706,22 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, struct ceph_mds_request *req) { int err; +#ifdef CONFIG_CGROUP_CEPH_FS + struct task_struct* ts = current; + struct cgroup_subsys_state* css = ts->cgroups->subsys[cephfs_cgrp_subsys.id]; + struct cephfscg* cephfscg_p = + container_of(css, struct cephfscg, css); +#endif dout("do_request on %p\n", req); +#ifdef CONFIG_CGROUP_CEPH_FS + req->qitem.tokens_requested[META_OPS_IOPS_IDX] = 1; + err = get_token_bucket_throttle(&cephfscg_p->meta_ops_throttle, &req->qitem); + if (err) + goto nolock_err; +#endif + /* take CAP_PIN refs for r_inode, r_parent, r_old_dentry */ if (req->r_inode) ceph_get_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN); @@ -2755,6 +2785,7 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, out: mutex_unlock(&mdsc->mutex); +nolock_err: dout("do_request %p done, result %d\n", req, err); return err; } @@ -4166,7 +4197,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) strscpy(mdsc->nodename, utsname()->nodename, sizeof(mdsc->nodename)); - return 0; + + return 0; } /* diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 50385a481fdb..814f0ecca523 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -16,6 +16,10 @@ #include <linux/ceph/mdsmap.h> #include <linux/ceph/auth.h> +#ifdef CONFIG_CGROUP_CEPH_FS +#include <linux/cgroup_cephfs.h> +#endif + /* The first 8 bits are reserved for old ceph releases */ #define CEPHFS_FEATURE_MIMIC 8 #define CEPHFS_FEATURE_REPLY_ENCODING 9 @@ -284,6 +288,11 @@ struct ceph_mds_request { /* unsafe requests that modify the target inode */ struct list_head r_unsafe_target_item; +#ifdef CONFIG_CGROUP_CEPH_FS + /* requests that blocked by the token bucket throttle*/ + struct queue_item qitem; +#endif + struct ceph_mds_session *r_session; int r_attempts; /* resend attempts */ diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 6d5bb2f74612..19d035dfe414 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -1161,7 +1161,10 @@ MODULE_ALIAS_FS("ceph"); static int __init init_ceph(void) { - int ret = init_caches(); + int ret = 0; + pr_info("init_ceph\n"); + + ret = init_caches(); if (ret) goto out; diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 2294f963dab7..31011a1e9573 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -16,6 +16,10 @@ #include <linux/ceph/auth.h> #include <linux/ceph/pagelist.h> +#ifdef CONFIG_CGROUP_CEPH_FS +#include <linux/cgroup_cephfs.h> +#endif + struct ceph_msg; struct ceph_snap_context; struct ceph_osd_request; @@ -193,7 +197,12 @@ struct ceph_osd_request { int r_result; - struct ceph_osd_client *r_osdc; +#ifdef CONFIG_CGROUP_CEPH_FS + /* token bucket throttle item*/ + struct queue_item qitem; +#endif + + struct ceph_osd_client *r_osdc; struct kref r_kref; bool r_mempool; struct completion r_completion; /* private to osd_client.c */ diff --git a/include/linux/ceph/types.h b/include/linux/ceph/types.h index bd3d532902d7..3b404bdbb28f 100644 --- a/include/linux/ceph/types.h +++ b/include/linux/ceph/types.h @@ -27,5 +27,4 @@ struct ceph_cap_reservation { int used; }; - #endif diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 79eac465ec65..a0087532516b 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -503,7 +503,7 @@ ceph_parse_options(char *options, const char *dev_name, opt->osd_request_timeout = msecs_to_jiffies(intval * 1000); break; - case Opt_share: + case Opt_share: opt->flags &= ~CEPH_OPT_NOSHARE; break; case Opt_noshare: diff --git a/net/ceph/ceph_fs.c b/net/ceph/ceph_fs.c index 756a2dc10d27..744a6e5c0cba 100644 --- a/net/ceph/ceph_fs.c +++ b/net/ceph/ceph_fs.c @@ -4,7 +4,6 @@ */ #include <linux/module.h> #include <linux/ceph/types.h> - /* * return true if @layout appears to be valid */ diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index fa9530dd876e..27d9fdf88af6 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -22,6 +22,10 @@ #include <linux/ceph/pagelist.h> #include <linux/ceph/striper.h> +#ifdef CONFIG_CGROUP_CEPH_FS +#include <linux/cgroup_cephfs.h> +#endif + #define OSD_OPREPLY_FRONT_LEN 512 static struct kmem_cache *ceph_osd_request_cache; @@ -492,6 +496,10 @@ static void ceph_osdc_release_request(struct kref *kref) req->r_request, req->r_reply); request_release_checks(req); +#ifdef CONFIG_CGROUP_CEPH_FS + kfree(req->qitem.tokens_requested); +#endif + if (req->r_request) ceph_msg_put(req->r_request); if (req->r_reply) @@ -587,6 +595,13 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, gfp_t gfp_flags) { struct ceph_osd_request *req; +#ifdef CONFIG_CGROUP_CEPH_FS + struct task_struct* ts = current; + struct cgroup_subsys_state* css = ts->cgroups->subsys[cephfs_cgrp_subsys.id]; + struct cephfscg* cephfscg_p = + container_of(css, struct cephfscg, css); + int r = 0; +#endif if (use_mempool) { BUG_ON(num_ops > CEPH_OSD_SLAB_OPS); @@ -606,6 +621,11 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, req->r_num_ops = num_ops; req->r_snapid = CEPH_NOSNAP; req->r_snapc = ceph_get_snap_context(snapc); +#ifdef CONFIG_CGROUP_CEPH_FS + r = queue_item_init(&req->qitem, &cephfscg_p->data_ops_throttle, DATA_OPS_TB_NUM); + if (unlikely(r)) + return NULL; +#endif dout("%s req %p\n", __func__, req); return req; @@ -4463,6 +4483,28 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc, struct ceph_osd_request *req, bool nofail) { +#ifdef CONFIG_CGROUP_CEPH_FS + struct task_struct* ts = current; + struct cgroup_subsys_state* css = ts->cgroups->subsys[cephfs_cgrp_subsys.id]; + struct cephfscg* cephfscg_p = + container_of(css, struct cephfscg, css); + int err = 0; + int i = 0; + u64 len = 0; + + dout("%s: req: %p, tid: %llu, tokens_requested: %p, tb_item_num: %d\n", __func__, req, req->r_tid, req->qitem.tokens_requested, req->qitem.tb_item_num); + req->qitem.tokens_requested[DATA_OPS_IOPS_IDX] = 1; + for (i = 0; i < req->r_num_ops; i++) { + if (req->r_ops[i].op == CEPH_OSD_OP_READ + || req->r_ops[i].op == CEPH_OSD_OP_WRITE) + len += req->r_ops[i].extent.length; + } + dout("%s: req: %llu, ops: %d, len: %llu\n", __func__, req->r_tid, req->r_num_ops, len); + req->qitem.tokens_requested[DATA_OPS_BAND_IDX] = len; + err = get_token_bucket_throttle(&cephfscg_p->data_ops_throttle, &req->qitem); + if (err) + return err; +#endif down_read(&osdc->lock); submit_request(req, false); up_read(&osdc->lock); -- 2.20.1