Hi Vivek, This patch exports a cgroup based per group request limits interface. and removes the global one. Now we can use this interface to perform different request allocation limitation for different groups. Signed-off-by: Gui Jianfeng <guijianfeng@xxxxxxxxxxxxxx> --- block/blk-core.c | 23 ++++++++++-- block/blk-settings.c | 1 - block/blk-sysfs.c | 43 ----------------------- block/elevator-fq.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++--- block/elevator-fq.h | 4 ++ 5 files changed, 111 insertions(+), 54 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 79fe6a9..7010b76 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -722,13 +722,20 @@ static void ioc_set_batching(struct request_queue *q, struct io_context *ioc) static void __freed_request(struct request_queue *q, int sync, struct request_list *rl) { + struct io_group *iog; + unsigned long nr_group_requests; + if (q->rq_data.count[sync] < queue_congestion_off_threshold(q)) blk_clear_queue_congested(q, sync); if (q->rq_data.count[sync] + 1 <= q->nr_requests) blk_clear_queue_full(q, sync); - if (rl->count[sync] + 1 <= q->nr_group_requests) { + iog = rl_iog(rl); + + nr_group_requests = get_group_requests(q, iog); + + if (nr_group_requests && rl->count[sync] + 1 <= nr_group_requests) { if (waitqueue_active(&rl->wait[sync])) wake_up(&rl->wait[sync]); } @@ -828,6 +835,8 @@ static struct request *get_request(struct request_queue *q, int rw_flags, const bool is_sync = rw_is_sync(rw_flags) != 0; int may_queue, priv; int sleep_on_global = 0; + struct io_group *iog; + unsigned long nr_group_requests; may_queue = elv_may_queue(q, rw_flags); if (may_queue == ELV_MQUEUE_NO) @@ -843,7 +852,12 @@ static struct request *get_request(struct request_queue *q, int rw_flags, if (q->rq_data.count[is_sync]+1 >= q->nr_requests) blk_set_queue_full(q, is_sync); - if (rl->count[is_sync]+1 >= q->nr_group_requests) { + iog = rl_iog(rl); + + nr_group_requests = get_group_requests(q, iog); + + if (nr_group_requests && + rl->count[is_sync]+1 >= nr_group_requests) { ioc = current_io_context(GFP_ATOMIC, q->node); /* * The queue request descriptor group will fill after this @@ -852,7 +866,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags, * This process will be allowed to complete a batch of * requests, others will be blocked. */ - if (rl->count[is_sync] <= q->nr_group_requests) + if (rl->count[is_sync] <= nr_group_requests) ioc_set_batching(q, ioc); else { if (may_queue != ELV_MQUEUE_MUST @@ -898,7 +912,8 @@ static struct request *get_request(struct request_queue *q, int rw_flags, * from per group request list */ - if (rl->count[is_sync] >= (3 * q->nr_group_requests / 2)) + if (nr_group_requests && + rl->count[is_sync] >= (3 * nr_group_requests / 2)) goto out; rl->starved[is_sync] = 0; diff --git a/block/blk-settings.c b/block/blk-settings.c index 78b8aec..bd582a7 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -148,7 +148,6 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn) * set defaults */ q->nr_requests = BLKDEV_MAX_RQ; - q->nr_group_requests = BLKDEV_MAX_GROUP_RQ; q->make_request_fn = mfn; blk_queue_dma_alignment(q, 511); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 92b9f25..706d852 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -78,40 +78,8 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count) return ret; } #ifdef CONFIG_GROUP_IOSCHED -static ssize_t queue_group_requests_show(struct request_queue *q, char *page) -{ - return queue_var_show(q->nr_group_requests, (page)); -} - extern void elv_io_group_congestion_threshold(struct request_queue *q, struct io_group *iog); - -static ssize_t -queue_group_requests_store(struct request_queue *q, const char *page, - size_t count) -{ - struct hlist_node *n; - struct io_group *iog; - struct elv_fq_data *efqd; - unsigned long nr; - int ret = queue_var_store(&nr, page, count); - - if (nr < BLKDEV_MIN_RQ) - nr = BLKDEV_MIN_RQ; - - spin_lock_irq(q->queue_lock); - - q->nr_group_requests = nr; - - efqd = &q->elevator->efqd; - - hlist_for_each_entry(iog, n, &efqd->group_list, elv_data_node) { - elv_io_group_congestion_threshold(q, iog); - } - - spin_unlock_irq(q->queue_lock); - return ret; -} #endif static ssize_t queue_ra_show(struct request_queue *q, char *page) @@ -278,14 +246,6 @@ static struct queue_sysfs_entry queue_requests_entry = { .store = queue_requests_store, }; -#ifdef CONFIG_GROUP_IOSCHED -static struct queue_sysfs_entry queue_group_requests_entry = { - .attr = {.name = "nr_group_requests", .mode = S_IRUGO | S_IWUSR }, - .show = queue_group_requests_show, - .store = queue_group_requests_store, -}; -#endif - static struct queue_sysfs_entry queue_ra_entry = { .attr = {.name = "read_ahead_kb", .mode = S_IRUGO | S_IWUSR }, .show = queue_ra_show, @@ -360,9 +320,6 @@ static struct queue_sysfs_entry queue_iostats_entry = { static struct attribute *default_attrs[] = { &queue_requests_entry.attr, -#ifdef CONFIG_GROUP_IOSCHED - &queue_group_requests_entry.attr, -#endif &queue_ra_entry.attr, &queue_max_hw_sectors_entry.attr, &queue_max_sectors_entry.attr, diff --git a/block/elevator-fq.c b/block/elevator-fq.c index 29392e7..bfb0210 100644 --- a/block/elevator-fq.c +++ b/block/elevator-fq.c @@ -59,6 +59,35 @@ elv_release_ioq(struct elevator_queue *eq, struct io_queue **ioq_ptr); #define for_each_entity_safe(entity, parent) \ for (; entity && ({ parent = entity->parent; 1; }); entity = parent) +unsigned short get_group_requests(struct request_queue *q, + struct io_group *iog) +{ + struct cgroup_subsys_state *css; + struct io_cgroup *iocg; + unsigned long nr_group_requests; + + if (!iog) + return q->nr_requests; + + rcu_read_lock(); + + if (!iog->iocg_id) { + nr_group_requests = 0; + goto out; + } + + css = css_lookup(&io_subsys, iog->iocg_id); + if (!css) { + nr_group_requests = 0; + goto out; + } + + iocg = container_of(css, struct io_cgroup, css); + nr_group_requests = iocg->nr_group_requests; +out: + rcu_read_unlock(); + return nr_group_requests; +} static struct io_entity *bfq_lookup_next_entity(struct io_sched_data *sd, int extract); @@ -1257,14 +1286,17 @@ void elv_io_group_congestion_threshold(struct request_queue *q, struct io_group *iog) { int nr; + unsigned long nr_group_requests; - nr = q->nr_group_requests - (q->nr_group_requests / 8) + 1; - if (nr > q->nr_group_requests) - nr = q->nr_group_requests; + nr_group_requests = get_group_requests(q, iog); + + nr = nr_group_requests - (nr_group_requests / 8) + 1; + if (nr > nr_group_requests) + nr = nr_group_requests; iog->nr_congestion_on = nr; - nr = q->nr_group_requests - (q->nr_group_requests / 8) - - (q->nr_group_requests / 16) - 1; + nr = nr_group_requests - (nr_group_requests / 8) + - (nr_group_requests / 16) - 1; if (nr < 1) nr = 1; iog->nr_congestion_off = nr; @@ -1283,6 +1315,7 @@ int elv_io_group_congested(struct request_queue *q, struct page *page, int sync) { struct io_group *iog; int ret = 0; + unsigned long nr_group_requests; rcu_read_lock(); @@ -1300,10 +1333,11 @@ int elv_io_group_congested(struct request_queue *q, struct page *page, int sync) } ret = elv_is_iog_congested(q, iog, sync); + nr_group_requests = get_group_requests(q, iog); if (ret) elv_log_iog(&q->elevator->efqd, iog, "iog congested=%d sync=%d" " rl.count[sync]=%d nr_group_requests=%d", - ret, sync, iog->rl.count[sync], q->nr_group_requests); + ret, sync, iog->rl.count[sync], nr_group_requests); rcu_read_unlock(); return ret; } @@ -1549,6 +1583,48 @@ free_buf: return ret; } +static u64 io_cgroup_nr_requests_read(struct cgroup *cgroup, + struct cftype *cftype) +{ + struct io_cgroup *iocg; + u64 ret; + + if (!cgroup_lock_live_group(cgroup)) + return -ENODEV; + + iocg = cgroup_to_io_cgroup(cgroup); + spin_lock_irq(&iocg->lock); + ret = iocg->nr_group_requests; + spin_unlock_irq(&iocg->lock); + + cgroup_unlock(); + + return ret; +} + +static int io_cgroup_nr_requests_write(struct cgroup *cgroup, + struct cftype *cftype, + u64 val) +{ + struct io_cgroup *iocg; + + if (val < BLKDEV_MIN_RQ) + val = BLKDEV_MIN_RQ; + + if (!cgroup_lock_live_group(cgroup)) + return -ENODEV; + + iocg = cgroup_to_io_cgroup(cgroup); + + spin_lock_irq(&iocg->lock); + iocg->nr_group_requests = (unsigned long)val; + spin_unlock_irq(&iocg->lock); + + cgroup_unlock(); + + return 0; +} + #define SHOW_FUNCTION(__VAR) \ static u64 io_cgroup_##__VAR##_read(struct cgroup *cgroup, \ struct cftype *cftype) \ @@ -1735,6 +1811,11 @@ static int io_cgroup_disk_dequeue_read(struct cgroup *cgroup, struct cftype bfqio_files[] = { { + .name = "nr_group_requests", + .read_u64 = io_cgroup_nr_requests_read, + .write_u64 = io_cgroup_nr_requests_write, + }, + { .name = "policy", .read_seq_string = io_cgroup_policy_read, .write_string = io_cgroup_policy_write, @@ -1790,6 +1871,7 @@ static struct cgroup_subsys_state *iocg_create(struct cgroup_subsys *subsys, spin_lock_init(&iocg->lock); INIT_HLIST_HEAD(&iocg->group_data); + iocg->nr_group_requests = BLKDEV_MAX_GROUP_RQ; iocg->weight = IO_DEFAULT_GRP_WEIGHT; iocg->ioprio_class = IO_DEFAULT_GRP_CLASS; INIT_LIST_HEAD(&iocg->policy_list); diff --git a/block/elevator-fq.h b/block/elevator-fq.h index f089a55..df077d0 100644 --- a/block/elevator-fq.h +++ b/block/elevator-fq.h @@ -308,6 +308,7 @@ struct io_cgroup { unsigned int weight; unsigned short ioprio_class; + unsigned long nr_group_requests; /* list of io_policy_node */ struct list_head policy_list; @@ -386,6 +387,9 @@ struct elv_fq_data { unsigned int fairness; }; +extern unsigned short get_group_requests(struct request_queue *q, + struct io_group *iog); + /* Logging facilities. */ #ifdef CONFIG_DEBUG_GROUP_IOSCHED #define elv_log_ioq(efqd, ioq, fmt, args...) \ -- 1.5.4.rc3 -- dm-devel mailing list dm-devel@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/dm-devel