From: Tal Gilboa <talgi@xxxxxxxxxxxx> Moved all logic from dim.h and net_dim.h to dim.c and net_dim.c. This is both more structurally appealing and would allow to only expose externally used functions. Signed-off-by: Tal Gilboa <talgi@xxxxxxxxxxxx> Signed-off-by: Saeed Mahameed <saeedm@xxxxxxxxxxxx> --- MAINTAINERS | 2 +- drivers/net/ethernet/broadcom/Kconfig | 1 + drivers/net/ethernet/broadcom/bcmsysport.h | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 2 +- .../net/ethernet/broadcom/bnxt/bnxt_debugfs.c | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c | 2 +- .../net/ethernet/broadcom/genet/bcmgenet.h | 2 +- .../net/ethernet/mellanox/mlx5/core/Kconfig | 1 + drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +- .../net/ethernet/mellanox/mlx5/core/en_dim.c | 2 +- include/linux/dim.h | 319 ++++++++++++++---- include/linux/net_dim.h | 273 --------------- lib/Kconfig | 8 + lib/Makefile | 1 + lib/dim/Makefile | 9 + lib/dim/dim.c | 74 ++++ lib/dim/net_dim.c | 190 +++++++++++ 17 files changed, 547 insertions(+), 345 deletions(-) delete mode 100644 include/linux/net_dim.h create mode 100644 lib/dim/Makefile create mode 100644 lib/dim/dim.c create mode 100644 lib/dim/net_dim.c diff --git a/MAINTAINERS b/MAINTAINERS index 5d4b852d9d39..f78dd16195e3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5588,8 +5588,8 @@ F: include/linux/dynamic_debug.h DYNAMIC INTERRUPT MODERATION M: Tal Gilboa <talgi@xxxxxxxxxxxx> S: Maintained -F: include/linux/net_dim.h F: include/linux/dim.h +F: lib/dim/ DZ DECSTATION DZ11 SERIAL DRIVER M: "Maciej W. Rozycki" <macro@xxxxxxxxxxxxxx> diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig index b123509d385f..2e4a8c7237ef 100644 --- a/drivers/net/ethernet/broadcom/Kconfig +++ b/drivers/net/ethernet/broadcom/Kconfig @@ -8,6 +8,7 @@ config NET_VENDOR_BROADCOM default y depends on (SSB_POSSIBLE && HAS_DMA) || PCI || BCM63XX || \ SIBYTE_SB1xxx_SOC + select DIMLIB ---help--- If you have a network (Ethernet) chipset belonging to this class, say Y. diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h index cbe6d559d964..f6677a02d811 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.h +++ b/drivers/net/ethernet/broadcom/bcmsysport.h @@ -14,7 +14,7 @@ #include <linux/bitmap.h> #include <linux/ethtool.h> #include <linux/if_vlan.h> -#include <linux/net_dim.h> +#include <linux/dim.h> /* Receive/transmit descriptor format */ #define DESC_ADDR_HI_STATUS_LEN 0x00 diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index a552c5539cc9..54c01705f3bd 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -23,7 +23,7 @@ #include <net/devlink.h> #include <net/dst_metadata.h> #include <net/xdp.h> -#include <linux/net_dim.h> +#include <linux/dim.h> struct tx_bd { __le32 tx_bd_len_flags_type; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c index 3d1d53fbb135..61393f351a77 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c @@ -11,7 +11,7 @@ #include <linux/module.h> #include <linux/pci.h> #include "bnxt_hsi.h" -#include <linux/net_dim.h> +#include <linux/dim.h> #include "bnxt.h" #include "bnxt_debugfs.h" diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c index 11605f9fa61e..6f6576dc417a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c @@ -7,7 +7,7 @@ * the Free Software Foundation. */ -#include <linux/net_dim.h> +#include <linux/dim.h> #include "bnxt_hsi.h" #include "bnxt.h" diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index 6e418d9c3706..b2f05e47dc65 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -16,7 +16,7 @@ #include <linux/mii.h> #include <linux/if_vlan.h> #include <linux/phy.h> -#include <linux/net_dim.h> +#include <linux/dim.h> /* total number of Buffer Descriptors, same for Rx/Tx */ #define TOTAL_DESC 256 diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 2391e3cfb56b..7845aa5bf6be 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -34,6 +34,7 @@ config MLX5_CORE_EN depends on NETDEVICES && ETHERNET && INET && PCI && MLX5_CORE depends on IPV6=y || IPV6=n || MLX5_CORE=m select PAGE_POOL + select DIMLIB default n ---help--- Ethernet support in Mellanox Technologies ConnectX-4 NIC. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 11efd6e4bdc3..abf42d3aabe9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -48,7 +48,7 @@ #include <linux/rhashtable.h> #include <net/switchdev.h> #include <net/xdp.h> -#include <linux/net_dim.h> +#include <linux/dim.h> #include <linux/bits.h> #include "wq.h" #include "mlx5_core.h" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c index ba3c1be9f2d3..ca9cfbf57d8f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c @@ -30,7 +30,7 @@ * SOFTWARE. */ -#include <linux/net_dim.h> +#include <linux/dim.h> #include "en.h" static void diff --git a/include/linux/dim.h b/include/linux/dim.h index 60e5074a7cc0..f48ede3e0322 100644 --- a/include/linux/dim.h +++ b/include/linux/dim.h @@ -6,20 +6,49 @@ #include <linux/module.h> +/** + * Number of events between DIM iterations. + * Causes a moderation of the algorithm run. + */ #define DIM_NEVENTS 64 -/* more than 10% difference */ +/** + * Is a difference between values justifies taking an action. + * We consider 10% difference as significant. + */ #define IS_SIGNIFICANT_DIFF(val, ref) \ (((100UL * abs((val) - (ref))) / (ref)) > 10) + +/** + * Calculate the gap between two values. + * Take wrap-around and variable size into consideration. + */ #define BIT_GAP(bits, end, start) ((((end) - (start)) + BIT_ULL(bits)) \ -& (BIT_ULL(bits) - 1)) + & (BIT_ULL(bits) - 1)) +/** + * Structure for CQ moderation values. + * Used for communications between DIM and its consumer. + * + * @usec: CQ timer suggestion (by DIM) + * @pkts: CQ packet counter suggestion (by DIM) + * @cq_period_mode: CQ priod count mode (from CQE/EQE) + */ struct dim_cq_moder { u16 usec; u16 pkts; u8 cq_period_mode; }; +/** + * Structure for DIM sample data. + * Used for communications between DIM and its consumer. + * + * @time: Sample timestamp + * @pkt_ctr: Number of packets + * @byte_ctr: Number of bytes + * @event_ctr: Number of events + */ struct dim_sample { ktime_t time; u32 pkt_ctr; @@ -27,13 +56,36 @@ struct dim_sample { u16 event_ctr; }; +/** + * Structure for DIM stats. + * Used for holding current measured rates. + * + * @ppms: Packets per msec + * @bpms: Bytes per msec + * @epms: Events per msec + */ struct dim_stats { - int ppms; /* packets per msec */ - int bpms; /* bytes per msec */ - int epms; /* events per msec */ + int ppms; + int bpms; + int epms; }; -struct dim { /* Dynamic Interrupt Moderation */ +/** + * Main structure for dynamic interrupt moderation (DIM). + * Used for holding all information about a specific DIM instance. + * + * @state: Algorithm state (see below) + * @prev_stats: Measured rates from previous iteration (for comparison) + * @start_sample: Sampled data at start of current iteration + * @work: Work to perform on action required + * @profile_ix: Current moderation profile + * @mode: CQ period count mode + * @tune_state: Algorithm tuning state (see below) + * @steps_right: Number of steps taken towards higher moderation + * @steps_left: Number of steps taken towards lower moderation + * @tired: Parking depth counter + */ +struct dim { u8 state; struct dim_stats prev_stats; struct dim_sample start_sample; @@ -46,18 +98,49 @@ struct dim { /* Dynamic Interrupt Moderation */ u8 tired; }; +/** + * enum dim_cq_period_mode + * + * These are the modes for CQ period count. + * + * @DIM_CQ_PERIOD_MODE_START_FROM_EQE: Start counting from EQE + * @DIM_CQ_PERIOD_MODE_START_FROM_CQE: Start counting from CQE (implies timer reset) + * @DIM_CQ_PERIOD_NUM_MODES: Number of modes + */ enum { DIM_CQ_PERIOD_MODE_START_FROM_EQE = 0x0, DIM_CQ_PERIOD_MODE_START_FROM_CQE = 0x1, DIM_CQ_PERIOD_NUM_MODES }; +/** + * enum dim_state + * + * These are the DIM algorithm states. + * These will determine if the algorithm is in a valid state to start an iteration. + * + * @DIM_START_MEASURE: This is the first iteration (also after applying a new profile) + * @DIM_MEASURE_IN_PROGRESS: Algorithm is already in progress - check if + * need to perform an action + * @DIM_APPLY_NEW_PROFILE: DIM consumer is currently applying a profile - no need to measure + */ enum { DIM_START_MEASURE, DIM_MEASURE_IN_PROGRESS, DIM_APPLY_NEW_PROFILE, }; +/** + * enum dim_tune_state + * + * These are the DIM algorithm tune states. + * These will determine which action the algorithm should perform. + * + * @DIM_PARKING_ON_TOP: Algorithm found a local top point - exit on significant difference + * @DIM_PARKING_TIRED: Algorithm found a deep top point - don't exit if tired > 0 + * @DIM_GOING_RIGHT: Algorithm is currently trying higher moderation levels + * @DIM_GOING_LEFT: Algorithm is currently trying lower moderation levels + */ enum { DIM_PARKING_ON_TOP, DIM_PARKING_TIRED, @@ -65,63 +148,95 @@ enum { DIM_GOING_LEFT, }; +/** + * enum dim_stats_state + * + * These are the DIM algorithm statistics states. + * These will determine the verdict of current iteration. + * + * @DIM_STATS_WORSE: Current iteration shows worse performance than before + * @DIM_STATS_WORSE: Current iteration shows same performance than before + * @DIM_STATS_WORSE: Current iteration shows better performance than before + */ enum { DIM_STATS_WORSE, DIM_STATS_SAME, DIM_STATS_BETTER, }; +/** + * enum dim_step_result + * + * These are the DIM algorithm step results. + * These describe the result of a step. + * + * @DIM_STEPPED: Performed a regular step + * @DIM_TOO_TIRED: Same kind of step was done multiple times - should go to + * tired parking + * @DIM_ON_EDGE: Stepped to the most left/right profile + */ enum { DIM_STEPPED, DIM_TOO_TIRED, DIM_ON_EDGE, }; -static inline bool dim_on_top(struct dim *dim) -{ - switch (dim->tune_state) { - case DIM_PARKING_ON_TOP: - case DIM_PARKING_TIRED: - return true; - case DIM_GOING_RIGHT: - return (dim->steps_left > 1) && (dim->steps_right == 1); - default: /* DIM_GOING_LEFT */ - return (dim->steps_right > 1) && (dim->steps_left == 1); - } -} +/** + * dim_on_top - check if current state is a good place to stop (top location) + * @dim: DIM context + * + * Check if current profile is a good place to park at. + * This will result in reducing the DIM checks frequency as we assume we + * shouldn't probably change profiles, unless traffic pattern wasn't changed. + */ +bool dim_on_top(struct dim *dim); -static inline void dim_turn(struct dim *dim) -{ - switch (dim->tune_state) { - case DIM_PARKING_ON_TOP: - case DIM_PARKING_TIRED: - break; - case DIM_GOING_RIGHT: - dim->tune_state = DIM_GOING_LEFT; - dim->steps_left = 0; - break; - case DIM_GOING_LEFT: - dim->tune_state = DIM_GOING_RIGHT; - dim->steps_right = 0; - break; - } -} +/** + * dim_turn - change profile alterning direction + * @dim: DIM context + * + * Go left if we were going right and vice-versa. + * Do nothing if currently parking. + */ +void dim_turn(struct dim *dim); -static inline void dim_park_on_top(struct dim *dim) -{ - dim->steps_right = 0; - dim->steps_left = 0; - dim->tired = 0; - dim->tune_state = DIM_PARKING_ON_TOP; -} +/** + * dim_park_on_top - enter a parking state on a top location + * @dim: DIM context + * + * Enter parking state. + * Clear all movement history. + */ +void dim_park_on_top(struct dim *dim); -static inline void dim_park_tired(struct dim *dim) -{ - dim->steps_right = 0; - dim->steps_left = 0; - dim->tune_state = DIM_PARKING_TIRED; -} +/** + * dim_park_tired - enter a tired parking state + * @dim: DIM context + * + * Enter parking state. + * Clear all movement history and cause DIM checks frequency to reduce. + */ +void dim_park_tired(struct dim *dim); + +/** + * dim_calc_stats - calculate the difference between two samples + * @start: start sample + * @end: end sample + * @curr_stats: delta between samples + * + * Calculate the delta between two samples (in data rates). + * Takes into consideration counter wrap-around. + */ +void dim_calc_stats(struct dim_sample *start, struct dim_sample *end, + struct dim_stats *curr_stats); +/** + * dim_update_sample - set a sample's fields with give values + * @event_ctr: number of events to set + * @packets: number of packets to set + * @bytes: number of bytes to set + * @s: DIM sample + */ static inline void dim_update_sample(u16 event_ctr, u64 packets, u64 bytes, struct dim_sample *s) { @@ -131,23 +246,99 @@ dim_update_sample(u16 event_ctr, u64 packets, u64 bytes, struct dim_sample *s) s->event_ctr = event_ctr; } -static inline void -dim_calc_stats(struct dim_sample *start, struct dim_sample *end, - struct dim_stats *curr_stats) -{ - /* u32 holds up to 71 minutes, should be enough */ - u32 delta_us = ktime_us_delta(end->time, start->time); - u32 npkts = BIT_GAP(BITS_PER_TYPE(u32), end->pkt_ctr, start->pkt_ctr); - u32 nbytes = BIT_GAP(BITS_PER_TYPE(u32), end->byte_ctr, - start->byte_ctr); - - if (!delta_us) - return; - - curr_stats->ppms = DIV_ROUND_UP(npkts * USEC_PER_MSEC, delta_us); - curr_stats->bpms = DIV_ROUND_UP(nbytes * USEC_PER_MSEC, delta_us); - curr_stats->epms = DIV_ROUND_UP(DIM_NEVENTS * USEC_PER_MSEC, - delta_us); +/* Net DIM */ + +/* + * Net DIM profiles: + * There are different set of profiles for each CQ period mode. + * There are different set of profiles for RX/TX CQs. + * Each profile size must be of NET_DIM_PARAMS_NUM_PROFILES + */ +#define NET_DIM_PARAMS_NUM_PROFILES 5 +#define NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE 256 +#define NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE 128 +#define NET_DIM_DEF_PROFILE_CQE 1 +#define NET_DIM_DEF_PROFILE_EQE 1 + +#define NET_DIM_RX_EQE_PROFILES { \ + {1, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {8, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {64, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {128, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {256, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ } +#define NET_DIM_RX_CQE_PROFILES { \ + {2, 256}, \ + {8, 128}, \ + {16, 64}, \ + {32, 64}, \ + {64, 64} \ +} + +#define NET_DIM_TX_EQE_PROFILES { \ + {1, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {8, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {32, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {64, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {128, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE} \ +} + +#define NET_DIM_TX_CQE_PROFILES { \ + {5, 128}, \ + {8, 64}, \ + {16, 32}, \ + {32, 32}, \ + {64, 32} \ +} + +static const struct dim_cq_moder +rx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = { + NET_DIM_RX_EQE_PROFILES, + NET_DIM_RX_CQE_PROFILES, +}; + +static const struct dim_cq_moder +tx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = { + NET_DIM_TX_EQE_PROFILES, + NET_DIM_TX_CQE_PROFILES, +}; + +/** + * net_dim_get_rx_moderation - provide a CQ moderation object for the given RX profile + * @cq_period_mode: CQ period mode + * @ix: Profile index + */ +struct dim_cq_moder net_dim_get_rx_moderation(u8 cq_period_mode, int ix); + +/** + * net_dim_get_def_rx_moderation - provide the default RX moderation + * @cq_period_mode: CQ period mode + */ +struct dim_cq_moder net_dim_get_def_rx_moderation(u8 cq_period_mode); + +/** + * net_dim_get_tx_moderation - provide a CQ moderation object for the given TX profile + * @cq_period_mode: CQ period mode + * @ix: Profile index + */ +struct dim_cq_moder net_dim_get_tx_moderation(u8 cq_period_mode, int ix); + +/** + * net_dim_get_def_tx_moderation - provide the default TX moderation + * @cq_period_mode: CQ period mode + */ +struct dim_cq_moder net_dim_get_def_tx_moderation(u8 cq_period_mode); + +/** + * net_dim - main DIM algorithm entry point + * @dim: DIM instance information + * @end_sample: Current data measurement + * + * Called by the consumer. + * This is the main logic of the algorithm, where data is processed in order to decide on next + * required action. + */ +void net_dim(struct dim *dim, struct dim_sample end_sample); + #endif /* DIM_H */ diff --git a/include/linux/net_dim.h b/include/linux/net_dim.h deleted file mode 100644 index 4e009ec193ef..000000000000 --- a/include/linux/net_dim.h +++ /dev/null @@ -1,273 +0,0 @@ -/* - * Copyright (c) 2016, Mellanox Technologies. All rights reserved. - * Copyright (c) 2017-2018, Broadcom Limited. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef NET_DIM_H -#define NET_DIM_H - -#include <linux/module.h> -#include <linux/dim.h> - -#define NET_DIM_PARAMS_NUM_PROFILES 5 -/* Netdev dynamic interrupt moderation profiles */ -#define NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE 256 -#define NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE 128 -#define NET_DIM_DEF_PROFILE_CQE 1 -#define NET_DIM_DEF_PROFILE_EQE 1 - -/* All profiles sizes must be NET_PARAMS_DIM_NUM_PROFILES */ -#define NET_DIM_RX_EQE_PROFILES { \ - {1, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {8, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {64, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {128, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {256, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ -} - -#define NET_DIM_RX_CQE_PROFILES { \ - {2, 256}, \ - {8, 128}, \ - {16, 64}, \ - {32, 64}, \ - {64, 64} \ -} - -#define NET_DIM_TX_EQE_PROFILES { \ - {1, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {8, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {32, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {64, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {128, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE} \ -} - -#define NET_DIM_TX_CQE_PROFILES { \ - {5, 128}, \ - {8, 64}, \ - {16, 32}, \ - {32, 32}, \ - {64, 32} \ -} - -static const struct dim_cq_moder -rx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = { - NET_DIM_RX_EQE_PROFILES, - NET_DIM_RX_CQE_PROFILES, -}; - -static const struct dim_cq_moder -tx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = { - NET_DIM_TX_EQE_PROFILES, - NET_DIM_TX_CQE_PROFILES, -}; - -static inline struct dim_cq_moder -net_dim_get_rx_moderation(u8 cq_period_mode, int ix) -{ - struct dim_cq_moder cq_moder = rx_profile[cq_period_mode][ix]; - - cq_moder.cq_period_mode = cq_period_mode; - return cq_moder; -} - -static inline struct dim_cq_moder -net_dim_get_def_rx_moderation(u8 cq_period_mode) -{ - u8 profile_ix = cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE ? - NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE; - - return net_dim_get_rx_moderation(cq_period_mode, profile_ix); -} - -static inline struct dim_cq_moder -net_dim_get_tx_moderation(u8 cq_period_mode, int ix) -{ - struct dim_cq_moder cq_moder = tx_profile[cq_period_mode][ix]; - - cq_moder.cq_period_mode = cq_period_mode; - return cq_moder; -} - -static inline struct dim_cq_moder -net_dim_get_def_tx_moderation(u8 cq_period_mode) -{ - u8 profile_ix = cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE ? - NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE; - - return net_dim_get_tx_moderation(cq_period_mode, profile_ix); -} - -static inline int net_dim_step(struct dim *dim) -{ - if (dim->tired == (NET_DIM_PARAMS_NUM_PROFILES * 2)) - return DIM_TOO_TIRED; - - switch (dim->tune_state) { - case DIM_PARKING_ON_TOP: - case DIM_PARKING_TIRED: - break; - case DIM_GOING_RIGHT: - if (dim->profile_ix == (NET_DIM_PARAMS_NUM_PROFILES - 1)) - return DIM_ON_EDGE; - dim->profile_ix++; - dim->steps_right++; - break; - case DIM_GOING_LEFT: - if (dim->profile_ix == 0) - return DIM_ON_EDGE; - dim->profile_ix--; - dim->steps_left++; - break; - } - - dim->tired++; - return DIM_STEPPED; -} - -static inline void net_dim_exit_parking(struct dim *dim) -{ - dim->tune_state = dim->profile_ix ? DIM_GOING_LEFT : - DIM_GOING_RIGHT; - net_dim_step(dim); -} - -static inline int net_dim_stats_compare(struct dim_stats *curr, - struct dim_stats *prev) -{ - if (!prev->bpms) - return curr->bpms ? DIM_STATS_BETTER : - DIM_STATS_SAME; - - if (IS_SIGNIFICANT_DIFF(curr->bpms, prev->bpms)) - return (curr->bpms > prev->bpms) ? DIM_STATS_BETTER : - DIM_STATS_WORSE; - - if (!prev->ppms) - return curr->ppms ? DIM_STATS_BETTER : - DIM_STATS_SAME; - - if (IS_SIGNIFICANT_DIFF(curr->ppms, prev->ppms)) - return (curr->ppms > prev->ppms) ? DIM_STATS_BETTER : - DIM_STATS_WORSE; - - if (!prev->epms) - return DIM_STATS_SAME; - - if (IS_SIGNIFICANT_DIFF(curr->epms, prev->epms)) - return (curr->epms < prev->epms) ? DIM_STATS_BETTER : - DIM_STATS_WORSE; - - return DIM_STATS_SAME; -} - -static inline bool net_dim_decision(struct dim_stats *curr_stats, - struct dim *dim) -{ - int prev_state = dim->tune_state; - int prev_ix = dim->profile_ix; - int stats_res; - int step_res; - - switch (dim->tune_state) { - case DIM_PARKING_ON_TOP: - stats_res = net_dim_stats_compare(curr_stats, &dim->prev_stats); - if (stats_res != DIM_STATS_SAME) - net_dim_exit_parking(dim); - break; - - case DIM_PARKING_TIRED: - dim->tired--; - if (!dim->tired) - net_dim_exit_parking(dim); - break; - - case DIM_GOING_RIGHT: - case DIM_GOING_LEFT: - stats_res = net_dim_stats_compare(curr_stats, &dim->prev_stats); - if (stats_res != DIM_STATS_BETTER) - dim_turn(dim); - - if (dim_on_top(dim)) { - dim_park_on_top(dim); - break; - } - - step_res = net_dim_step(dim); - switch (step_res) { - case DIM_ON_EDGE: - dim_park_on_top(dim); - break; - case DIM_TOO_TIRED: - dim_park_tired(dim); - break; - } - - break; - } - - if (prev_state != DIM_PARKING_ON_TOP || - dim->tune_state != DIM_PARKING_ON_TOP) - dim->prev_stats = *curr_stats; - - return dim->profile_ix != prev_ix; -} - -static inline void net_dim(struct dim *dim, - struct dim_sample end_sample) -{ - struct dim_stats curr_stats; - u16 nevents; - - switch (dim->state) { - case DIM_MEASURE_IN_PROGRESS: - nevents = BIT_GAP(BITS_PER_TYPE(u16), - end_sample.event_ctr, - dim->start_sample.event_ctr); - if (nevents < DIM_NEVENTS) - break; - dim_calc_stats(&dim->start_sample, &end_sample, &curr_stats); - if (net_dim_decision(&curr_stats, dim)) { - dim->state = DIM_APPLY_NEW_PROFILE; - schedule_work(&dim->work); - break; - } - /* fall through */ - case DIM_START_MEASURE: - dim_update_sample(end_sample.event_ctr, end_sample.pkt_ctr, - end_sample.byte_ctr, &dim->start_sample); - dim->state = DIM_MEASURE_IN_PROGRESS; - break; - case DIM_APPLY_NEW_PROFILE: - break; - } -} - -#endif /* NET_DIM_H */ diff --git a/lib/Kconfig b/lib/Kconfig index 90623a0e1942..78ddb9526b62 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -562,6 +562,14 @@ config SIGNATURE Digital signature verification. Currently only RSA is supported. Implementation is done using GnuPG MPI library +config DIMLIB + bool "DIM library" + default y + help + Dynamic Interrupt Moderation library. + Implements an algorithm for dynamically change CQ modertion values + according to run time performance. + # # libfdt files, only selected if needed. # diff --git a/lib/Makefile b/lib/Makefile index fb7697031a79..dcb558c7554d 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -202,6 +202,7 @@ obj-$(CONFIG_GLOB) += glob.o obj-$(CONFIG_GLOB_SELFTEST) += globtest.o obj-$(CONFIG_MPILIB) += mpi/ +obj-$(CONFIG_DIMLIB) += dim/ obj-$(CONFIG_SIGNATURE) += digsig.o lib-$(CONFIG_CLZ_TAB) += clz_tab.o diff --git a/lib/dim/Makefile b/lib/dim/Makefile new file mode 100644 index 000000000000..160afe288df0 --- /dev/null +++ b/lib/dim/Makefile @@ -0,0 +1,9 @@ +# +# DIM Dynamic Interrupt Moderation library +# + +obj-$(CONFIG_DIMLIB) = net_dim.o + +net_dim-y = \ + dim.o \ + net_dim.o diff --git a/lib/dim/dim.c b/lib/dim/dim.c new file mode 100644 index 000000000000..17d5236759bd --- /dev/null +++ b/lib/dim/dim.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved. + */ + +#include <linux/dim.h> + +bool dim_on_top(struct dim *dim) +{ + switch (dim->tune_state) { + case DIM_PARKING_ON_TOP: + case DIM_PARKING_TIRED: + return true; + case DIM_GOING_RIGHT: + return (dim->steps_left > 1) && (dim->steps_right == 1); + default: /* DIM_GOING_LEFT */ + return (dim->steps_right > 1) && (dim->steps_left == 1); + } +} +EXPORT_SYMBOL(dim_on_top); + +void dim_turn(struct dim *dim) +{ + switch (dim->tune_state) { + case DIM_PARKING_ON_TOP: + case DIM_PARKING_TIRED: + break; + case DIM_GOING_RIGHT: + dim->tune_state = DIM_GOING_LEFT; + dim->steps_left = 0; + break; + case DIM_GOING_LEFT: + dim->tune_state = DIM_GOING_RIGHT; + dim->steps_right = 0; + break; + } +} +EXPORT_SYMBOL(dim_turn); + +void dim_park_on_top(struct dim *dim) +{ + dim->steps_right = 0; + dim->steps_left = 0; + dim->tired = 0; + dim->tune_state = DIM_PARKING_ON_TOP; +} +EXPORT_SYMBOL(dim_park_on_top); + +void dim_park_tired(struct dim *dim) +{ + dim->steps_right = 0; + dim->steps_left = 0; + dim->tune_state = DIM_PARKING_TIRED; +} +EXPORT_SYMBOL(dim_park_tired); + +void dim_calc_stats(struct dim_sample *start, struct dim_sample *end, + struct dim_stats *curr_stats) +{ + /* u32 holds up to 71 minutes, should be enough */ + u32 delta_us = ktime_us_delta(end->time, start->time); + u32 npkts = BIT_GAP(BITS_PER_TYPE(u32), end->pkt_ctr, start->pkt_ctr); + u32 nbytes = BIT_GAP(BITS_PER_TYPE(u32), end->byte_ctr, + start->byte_ctr); + + if (!delta_us) + return; + + curr_stats->ppms = DIV_ROUND_UP(npkts * USEC_PER_MSEC, delta_us); + curr_stats->bpms = DIV_ROUND_UP(nbytes * USEC_PER_MSEC, delta_us); + curr_stats->epms = DIV_ROUND_UP(DIM_NEVENTS * USEC_PER_MSEC, + delta_us); +} +EXPORT_SYMBOL(dim_calc_stats); diff --git a/lib/dim/net_dim.c b/lib/dim/net_dim.c new file mode 100644 index 000000000000..5bcc902c5388 --- /dev/null +++ b/lib/dim/net_dim.c @@ -0,0 +1,190 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. + */ + +#include <linux/dim.h> + +struct dim_cq_moder +net_dim_get_rx_moderation(u8 cq_period_mode, int ix) +{ + struct dim_cq_moder cq_moder = rx_profile[cq_period_mode][ix]; + + cq_moder.cq_period_mode = cq_period_mode; + return cq_moder; +} +EXPORT_SYMBOL(net_dim_get_rx_moderation); + +struct dim_cq_moder +net_dim_get_def_rx_moderation(u8 cq_period_mode) +{ + u8 profile_ix = cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE ? + NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE; + + return net_dim_get_rx_moderation(cq_period_mode, profile_ix); +} +EXPORT_SYMBOL(net_dim_get_def_rx_moderation); + +struct dim_cq_moder +net_dim_get_tx_moderation(u8 cq_period_mode, int ix) +{ + struct dim_cq_moder cq_moder = tx_profile[cq_period_mode][ix]; + + cq_moder.cq_period_mode = cq_period_mode; + return cq_moder; +} +EXPORT_SYMBOL(net_dim_get_tx_moderation); + +struct dim_cq_moder +net_dim_get_def_tx_moderation(u8 cq_period_mode) +{ + u8 profile_ix = cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE ? + NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE; + + return net_dim_get_tx_moderation(cq_period_mode, profile_ix); +} +EXPORT_SYMBOL(net_dim_get_def_tx_moderation); + +static int net_dim_step(struct dim *dim) +{ + if (dim->tired == (NET_DIM_PARAMS_NUM_PROFILES * 2)) + return DIM_TOO_TIRED; + + switch (dim->tune_state) { + case DIM_PARKING_ON_TOP: + case DIM_PARKING_TIRED: + break; + case DIM_GOING_RIGHT: + if (dim->profile_ix == (NET_DIM_PARAMS_NUM_PROFILES - 1)) + return DIM_ON_EDGE; + dim->profile_ix++; + dim->steps_right++; + break; + case DIM_GOING_LEFT: + if (dim->profile_ix == 0) + return DIM_ON_EDGE; + dim->profile_ix--; + dim->steps_left++; + break; + } + + dim->tired++; + return DIM_STEPPED; +} + +static void net_dim_exit_parking(struct dim *dim) +{ + dim->tune_state = dim->profile_ix ? DIM_GOING_LEFT : DIM_GOING_RIGHT; + net_dim_step(dim); +} + +static int net_dim_stats_compare(struct dim_stats *curr, + struct dim_stats *prev) +{ + if (!prev->bpms) + return curr->bpms ? DIM_STATS_BETTER : DIM_STATS_SAME; + + if (IS_SIGNIFICANT_DIFF(curr->bpms, prev->bpms)) + return (curr->bpms > prev->bpms) ? DIM_STATS_BETTER : + DIM_STATS_WORSE; + + if (!prev->ppms) + return curr->ppms ? DIM_STATS_BETTER : + DIM_STATS_SAME; + + if (IS_SIGNIFICANT_DIFF(curr->ppms, prev->ppms)) + return (curr->ppms > prev->ppms) ? DIM_STATS_BETTER : + DIM_STATS_WORSE; + + if (!prev->epms) + return DIM_STATS_SAME; + + if (IS_SIGNIFICANT_DIFF(curr->epms, prev->epms)) + return (curr->epms < prev->epms) ? DIM_STATS_BETTER : + DIM_STATS_WORSE; + + return DIM_STATS_SAME; +} + +static bool net_dim_decision(struct dim_stats *curr_stats, struct dim *dim) +{ + int prev_state = dim->tune_state; + int prev_ix = dim->profile_ix; + int stats_res; + int step_res; + + switch (dim->tune_state) { + case DIM_PARKING_ON_TOP: + stats_res = net_dim_stats_compare(curr_stats, + &dim->prev_stats); + if (stats_res != DIM_STATS_SAME) + net_dim_exit_parking(dim); + break; + + case DIM_PARKING_TIRED: + dim->tired--; + if (!dim->tired) + net_dim_exit_parking(dim); + break; + + case DIM_GOING_RIGHT: + case DIM_GOING_LEFT: + stats_res = net_dim_stats_compare(curr_stats, + &dim->prev_stats); + if (stats_res != DIM_STATS_BETTER) + dim_turn(dim); + + if (dim_on_top(dim)) { + dim_park_on_top(dim); + break; + } + + step_res = net_dim_step(dim); + switch (step_res) { + case DIM_ON_EDGE: + dim_park_on_top(dim); + break; + case DIM_TOO_TIRED: + dim_park_tired(dim); + break; + } + + break; + } + + if (prev_state != DIM_PARKING_ON_TOP || + dim->tune_state != DIM_PARKING_ON_TOP) + dim->prev_stats = *curr_stats; + + return dim->profile_ix != prev_ix; +} + +void net_dim(struct dim *dim, struct dim_sample end_sample) +{ + struct dim_stats curr_stats; + u16 nevents; + + switch (dim->state) { + case DIM_MEASURE_IN_PROGRESS: + nevents = BIT_GAP(BITS_PER_TYPE(u16), + end_sample.event_ctr, + dim->start_sample.event_ctr); + if (nevents < DIM_NEVENTS) + break; + dim_calc_stats(&dim->start_sample, &end_sample, &curr_stats); + if (net_dim_decision(&curr_stats, dim)) { + dim->state = DIM_APPLY_NEW_PROFILE; + schedule_work(&dim->work); + break; + } + /* fall through */ + case DIM_START_MEASURE: + dim_update_sample(end_sample.event_ctr, end_sample.pkt_ctr, + end_sample.byte_ctr, &dim->start_sample); + dim->state = DIM_MEASURE_IN_PROGRESS; + break; + case DIM_APPLY_NEW_PROFILE: + break; + } +} +EXPORT_SYMBOL(net_dim); -- 2.21.0