From: Yamin Friedman <yaminf@xxxxxxxxxxxx> blk_dim implements a different algorithm than net_dim that is optimized for nvmf storage applications. The algorithm optimizes for number of completions and ratio between completions and events. It also has a feature for fast reduction of moderation level when the traffic changes in such a way as to no longer require high moderation in order to avoid long latencies. blk_dim.h will be called from the ib_core module. Signed-off-by: Yamin Friedman <yaminf@xxxxxxxxxxxx> Signed-off-by: Tal Gilboa <talgi@xxxxxxxxxxxx> --- MAINTAINERS | 1 + include/linux/blk_dim.h | 56 ++++++++++++++++++++ lib/dim/Makefile | 7 ++- lib/dim/blk_dim.c | 114 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 177 insertions(+), 1 deletion(-) create mode 100644 include/linux/blk_dim.h create mode 100644 lib/dim/blk_dim.c diff --git a/MAINTAINERS b/MAINTAINERS index 6ae949be8b83..2860a3316be5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5337,6 +5337,7 @@ DYNAMIC INTERRUPT MODERATION M: Tal Gilboa <talgi@xxxxxxxxxxxx> S: Maintained F: include/linux/net_dim.h +F: include/linux/blk_dim.h F: include/linux/dim.h F: lib/dim/ diff --git a/include/linux/blk_dim.h b/include/linux/blk_dim.h new file mode 100644 index 000000000000..a044f62ec8fe --- /dev/null +++ b/include/linux/blk_dim.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef BLK_DIM_H +#define BLK_DIM_H + +#include <linux/module.h> +#include <linux/dim.h> + +#define BLK_DIM_PARAMS_NUM_PROFILES 8 +#define BLK_DIM_START_PROFILE 0 + +static const struct dim_cq_moder +blk_dim_prof[BLK_DIM_PARAMS_NUM_PROFILES] = { + {1, 0, 1, 0}, + {2, 0, 2, 0}, + {4, 0, 4, 0}, + {16, 0, 4, 0}, + {32, 0, 4, 0}, + {32, 0, 16, 0}, + {256, 0, 16, 0}, + {256, 0, 32, 0}, +}; + +void blk_dim(struct dim *dim, struct dim_sample end_sample); + +#endif /* BLK_DIM_H */ diff --git a/lib/dim/Makefile b/lib/dim/Makefile index 160afe288df0..2b3c57318dbb 100644 --- a/lib/dim/Makefile +++ b/lib/dim/Makefile @@ -2,8 +2,13 @@ # DIM Dynamic Interrupt Moderation library # -obj-$(CONFIG_DIMLIB) = net_dim.o +obj-$(CONFIG_DIMLIB) += net_dim.o +obj-$(CONFIG_DIMLIB) += blk_dim.o net_dim-y = \ dim.o \ net_dim.o + +blk_dim-y = \ + dim.o \ + blk_dim.o diff --git a/lib/dim/blk_dim.c b/lib/dim/blk_dim.c new file mode 100644 index 000000000000..49107c169b56 --- /dev/null +++ b/lib/dim/blk_dim.c @@ -0,0 +1,114 @@ +#include <linux/blk_dim.h> + +static inline int blk_dim_step(struct dim *dim) +{ + switch (dim->tune_state) { + case DIM_PARKING_ON_TOP: + case DIM_PARKING_TIRED: + break; + case DIM_GOING_RIGHT: + if (dim->profile_ix == (BLK_DIM_PARAMS_NUM_PROFILES - 1)) + return DIM_ON_EDGE; + dim->profile_ix++; + dim->steps_right++; + break; + case DIM_GOING_LEFT: + if (dim->profile_ix == 0) + return DIM_ON_EDGE; + dim->profile_ix--; + dim->steps_left++; + break; + } + + return DIM_STEPPED; +} + +static inline int blk_dim_stats_compare(struct dim_stats *curr, struct dim_stats *prev) +{ + /* first stat */ + if (!prev->cpms) + return DIM_STATS_SAME; + + if (IS_SIGNIFICANT_DIFF(curr->cpms, prev->cpms)) + return (curr->cpms > prev->cpms) ? DIM_STATS_BETTER : + DIM_STATS_WORSE; + + if (IS_SIGNIFICANT_DIFF(curr->cpe_ratio, prev->cpe_ratio)) + return (curr->cpe_ratio > prev->cpe_ratio) ? DIM_STATS_BETTER : + DIM_STATS_WORSE; + + return DIM_STATS_SAME; +} + +static inline bool blk_dim_decision(struct dim_stats *curr_stats, struct dim *dim) +{ + int prev_ix = dim->profile_ix; + int stats_res; + int step_res; + + switch (dim->tune_state) { + case DIM_PARKING_ON_TOP: + break; + case DIM_PARKING_TIRED: + break; + + case DIM_GOING_RIGHT: + case DIM_GOING_LEFT: + stats_res = blk_dim_stats_compare(curr_stats, &dim->prev_stats); + + switch (stats_res) { + case DIM_STATS_SAME: + if (curr_stats->cpe_ratio <= 50*prev_ix) + dim->profile_ix = 0; + break; + case DIM_STATS_WORSE: + dim_turn(dim); + default: + case DIM_STATS_BETTER: + /* fall through */ + step_res = blk_dim_step(dim); + if (step_res == DIM_ON_EDGE) + dim_turn(dim); + break; + } + break; + } + + dim->prev_stats = *curr_stats; + + return dim->profile_ix != prev_ix; +} + +void blk_dim(struct dim *dim, struct dim_sample end_sample) +{ + struct dim_stats curr_stats; + u16 nevents; + + switch (dim->state) { + case DIM_MEASURE_IN_PROGRESS: + nevents = end_sample.event_ctr - dim->start_sample.event_ctr; + if (nevents < DIM_NEVENTS) { + dim_create_sample(end_sample.event_ctr, end_sample.pkt_ctr, + end_sample.byte_ctr, end_sample.comp_ctr, &dim->measuring_sample); + break; + } + dim_calc_stats(&dim->start_sample, &end_sample, + &curr_stats); + if (blk_dim_decision(&curr_stats, dim)) { + dim->state = DIM_APPLY_NEW_PROFILE; + schedule_work(&dim->work); + break; + } + /* fall through */ + case DIM_START_MEASURE: + dim->state = DIM_MEASURE_IN_PROGRESS; + dim_create_sample(end_sample.event_ctr, end_sample.pkt_ctr, end_sample.byte_ctr, + end_sample.comp_ctr, &dim->start_sample); + dim_create_sample(end_sample.event_ctr, end_sample.pkt_ctr, end_sample.byte_ctr, + end_sample.comp_ctr, &dim->measuring_sample); + break; + case DIM_APPLY_NEW_PROFILE: + break; + } +} +EXPORT_SYMBOL(blk_dim); -- 2.19.1