Hi Jens, Could you please pick this patch? Thanks, Joseph On 17/10/13 01:13, Shaohua Li wrote: > On Thu, Oct 12, 2017 at 05:15:46PM +0800, Joseph Qi wrote: >> From: Joseph Qi <qijiang.qj@xxxxxxxxxxxxxxx> >> >> In mixed read/write workload on SSD, write latency is much lower than >> read. But now we only track and record read latency and then use it as >> threshold base for both read and write io latency accounting. As a >> result, write io latency will always be considered as good and >> bad_bio_cnt is much smaller than 20% of bio_cnt. That is to mean, the >> tg to be checked will be treated as idle most of the time and still let >> others dispatch more ios, even it is truly running under low limit and >> wants its low limit to be guaranteed, which is not we expected in fact. >> So track read and write request individually, which can bring more >> precise latency control for low limit idle detection. >> >> Signed-off-by: Joseph Qi <qijiang.qj@xxxxxxxxxxxxxxx> > > Reviewed-by: Shaohua Li <shli@xxxxxx> >> --- >> block/blk-throttle.c | 134 ++++++++++++++++++++++++++++++--------------------- >> 1 file changed, 79 insertions(+), 55 deletions(-) >> >> diff --git a/block/blk-throttle.c b/block/blk-throttle.c >> index 17816a0..0897378 100644 >> --- a/block/blk-throttle.c >> +++ b/block/blk-throttle.c >> @@ -215,9 +215,9 @@ struct throtl_data >> >> unsigned int scale; >> >> - struct latency_bucket tmp_buckets[LATENCY_BUCKET_SIZE]; >> - struct avg_latency_bucket avg_buckets[LATENCY_BUCKET_SIZE]; >> - struct latency_bucket __percpu *latency_buckets; >> + struct latency_bucket tmp_buckets[2][LATENCY_BUCKET_SIZE]; >> + struct avg_latency_bucket avg_buckets[2][LATENCY_BUCKET_SIZE]; >> + struct latency_bucket __percpu *latency_buckets[2]; >> unsigned long last_calculate_time; >> unsigned long filtered_latency; >> >> @@ -2040,10 +2040,10 @@ static void blk_throtl_update_idletime(struct throtl_grp *tg) >> #ifdef CONFIG_BLK_DEV_THROTTLING_LOW >> static void throtl_update_latency_buckets(struct throtl_data *td) >> { >> - struct avg_latency_bucket avg_latency[LATENCY_BUCKET_SIZE]; >> - int i, cpu; >> - unsigned long last_latency = 0; >> - unsigned long latency; >> + struct avg_latency_bucket avg_latency[2][LATENCY_BUCKET_SIZE]; >> + int i, cpu, rw; >> + unsigned long last_latency[2] = { 0 }; >> + unsigned long latency[2]; >> >> if (!blk_queue_nonrot(td->queue)) >> return; >> @@ -2052,56 +2052,67 @@ static void throtl_update_latency_buckets(struct throtl_data *td) >> td->last_calculate_time = jiffies; >> >> memset(avg_latency, 0, sizeof(avg_latency)); >> - for (i = 0; i < LATENCY_BUCKET_SIZE; i++) { >> - struct latency_bucket *tmp = &td->tmp_buckets[i]; >> - >> - for_each_possible_cpu(cpu) { >> - struct latency_bucket *bucket; >> - >> - /* this isn't race free, but ok in practice */ >> - bucket = per_cpu_ptr(td->latency_buckets, cpu); >> - tmp->total_latency += bucket[i].total_latency; >> - tmp->samples += bucket[i].samples; >> - bucket[i].total_latency = 0; >> - bucket[i].samples = 0; >> - } >> + for (rw = READ; rw <= WRITE; rw++) { >> + for (i = 0; i < LATENCY_BUCKET_SIZE; i++) { >> + struct latency_bucket *tmp = &td->tmp_buckets[rw][i]; >> + >> + for_each_possible_cpu(cpu) { >> + struct latency_bucket *bucket; >> + >> + /* this isn't race free, but ok in practice */ >> + bucket = per_cpu_ptr(td->latency_buckets[rw], >> + cpu); >> + tmp->total_latency += bucket[i].total_latency; >> + tmp->samples += bucket[i].samples; >> + bucket[i].total_latency = 0; >> + bucket[i].samples = 0; >> + } >> >> - if (tmp->samples >= 32) { >> - int samples = tmp->samples; >> + if (tmp->samples >= 32) { >> + int samples = tmp->samples; >> >> - latency = tmp->total_latency; >> + latency[rw] = tmp->total_latency; >> >> - tmp->total_latency = 0; >> - tmp->samples = 0; >> - latency /= samples; >> - if (latency == 0) >> - continue; >> - avg_latency[i].latency = latency; >> + tmp->total_latency = 0; >> + tmp->samples = 0; >> + latency[rw] /= samples; >> + if (latency[rw] == 0) >> + continue; >> + avg_latency[rw][i].latency = latency[rw]; >> + } >> } >> } >> >> - for (i = 0; i < LATENCY_BUCKET_SIZE; i++) { >> - if (!avg_latency[i].latency) { >> - if (td->avg_buckets[i].latency < last_latency) >> - td->avg_buckets[i].latency = last_latency; >> - continue; >> - } >> + for (rw = READ; rw <= WRITE; rw++) { >> + for (i = 0; i < LATENCY_BUCKET_SIZE; i++) { >> + if (!avg_latency[rw][i].latency) { >> + if (td->avg_buckets[rw][i].latency < last_latency[rw]) >> + td->avg_buckets[rw][i].latency = >> + last_latency[rw]; >> + continue; >> + } >> >> - if (!td->avg_buckets[i].valid) >> - latency = avg_latency[i].latency; >> - else >> - latency = (td->avg_buckets[i].latency * 7 + >> - avg_latency[i].latency) >> 3; >> + if (!td->avg_buckets[rw][i].valid) >> + latency[rw] = avg_latency[rw][i].latency; >> + else >> + latency[rw] = (td->avg_buckets[rw][i].latency * 7 + >> + avg_latency[rw][i].latency) >> 3; >> >> - td->avg_buckets[i].latency = max(latency, last_latency); >> - td->avg_buckets[i].valid = true; >> - last_latency = td->avg_buckets[i].latency; >> + td->avg_buckets[rw][i].latency = max(latency[rw], >> + last_latency[rw]); >> + td->avg_buckets[rw][i].valid = true; >> + last_latency[rw] = td->avg_buckets[rw][i].latency; >> + } >> } >> >> for (i = 0; i < LATENCY_BUCKET_SIZE; i++) >> throtl_log(&td->service_queue, >> - "Latency bucket %d: latency=%ld, valid=%d", i, >> - td->avg_buckets[i].latency, td->avg_buckets[i].valid); >> + "Latency bucket %d: read latency=%ld, read valid=%d, " >> + "write latency=%ld, write valid=%d", i, >> + td->avg_buckets[READ][i].latency, >> + td->avg_buckets[READ][i].valid, >> + td->avg_buckets[WRITE][i].latency, >> + td->avg_buckets[WRITE][i].valid); >> } >> #else >> static inline void throtl_update_latency_buckets(struct throtl_data *td) >> @@ -2244,16 +2255,17 @@ static void throtl_track_latency(struct throtl_data *td, sector_t size, >> struct latency_bucket *latency; >> int index; >> >> - if (!td || td->limit_index != LIMIT_LOW || op != REQ_OP_READ || >> + if (!td || td->limit_index != LIMIT_LOW || >> + !(op == REQ_OP_READ || op == REQ_OP_WRITE) || >> !blk_queue_nonrot(td->queue)) >> return; >> >> index = request_bucket_index(size); >> >> - latency = get_cpu_ptr(td->latency_buckets); >> + latency = get_cpu_ptr(td->latency_buckets[op]); >> latency[index].total_latency += time; >> latency[index].samples++; >> - put_cpu_ptr(td->latency_buckets); >> + put_cpu_ptr(td->latency_buckets[op]); >> } >> >> void blk_throtl_stat_add(struct request *rq, u64 time_ns) >> @@ -2272,6 +2284,7 @@ void blk_throtl_bio_endio(struct bio *bio) >> unsigned long finish_time; >> unsigned long start_time; >> unsigned long lat; >> + int rw = bio_data_dir(bio); >> >> tg = bio->bi_cg_private; >> if (!tg) >> @@ -2298,7 +2311,7 @@ void blk_throtl_bio_endio(struct bio *bio) >> >> bucket = request_bucket_index( >> blk_stat_size(&bio->bi_issue_stat)); >> - threshold = tg->td->avg_buckets[bucket].latency + >> + threshold = tg->td->avg_buckets[rw][bucket].latency + >> tg->latency_target; >> if (lat > threshold) >> tg->bad_bio_cnt++; >> @@ -2389,9 +2402,16 @@ int blk_throtl_init(struct request_queue *q) >> td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node); >> if (!td) >> return -ENOMEM; >> - td->latency_buckets = __alloc_percpu(sizeof(struct latency_bucket) * >> + td->latency_buckets[READ] = __alloc_percpu(sizeof(struct latency_bucket) * >> + LATENCY_BUCKET_SIZE, __alignof__(u64)); >> + if (!td->latency_buckets[READ]) { >> + kfree(td); >> + return -ENOMEM; >> + } >> + td->latency_buckets[WRITE] = __alloc_percpu(sizeof(struct latency_bucket) * >> LATENCY_BUCKET_SIZE, __alignof__(u64)); >> - if (!td->latency_buckets) { >> + if (!td->latency_buckets[WRITE]) { >> + free_percpu(td->latency_buckets[READ]); >> kfree(td); >> return -ENOMEM; >> } >> @@ -2410,7 +2430,8 @@ int blk_throtl_init(struct request_queue *q) >> /* activate policy */ >> ret = blkcg_activate_policy(q, &blkcg_policy_throtl); >> if (ret) { >> - free_percpu(td->latency_buckets); >> + free_percpu(td->latency_buckets[READ]); >> + free_percpu(td->latency_buckets[WRITE]); >> kfree(td); >> } >> return ret; >> @@ -2421,7 +2442,8 @@ void blk_throtl_exit(struct request_queue *q) >> BUG_ON(!q->td); >> throtl_shutdown_wq(q); >> blkcg_deactivate_policy(q, &blkcg_policy_throtl); >> - free_percpu(q->td->latency_buckets); >> + free_percpu(q->td->latency_buckets[READ]); >> + free_percpu(q->td->latency_buckets[WRITE]); >> kfree(q->td); >> } >> >> @@ -2439,8 +2461,10 @@ void blk_throtl_register_queue(struct request_queue *q) >> } else { >> td->throtl_slice = DFL_THROTL_SLICE_HD; >> td->filtered_latency = LATENCY_FILTERED_HD; >> - for (i = 0; i < LATENCY_BUCKET_SIZE; i++) >> - td->avg_buckets[i].latency = DFL_HD_BASELINE_LATENCY; >> + for (i = 0; i < LATENCY_BUCKET_SIZE; i++) { >> + td->avg_buckets[READ][i].latency = DFL_HD_BASELINE_LATENCY; >> + td->avg_buckets[WRITE][i].latency = DFL_HD_BASELINE_LATENCY; >> + } >> } >> #ifndef CONFIG_BLK_DEV_THROTTLING_LOW >> /* if no low limit, use previous default */ >> -- >> 1.9.4