[PATCH rdma-next 1/2] net/mlx5: Packet pacing enhancement

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Bodong Wang <bodong@xxxxxxxxxxxx>

Add two new parameters: max_burst_sz and typical_pkt_size (both
in bytes) to rate limit configurations.

max_burst_sz: The device will schedule bursts of packets for an
SQ connected to this rate, smaller than or equal to this value.
Value 0x0 indicates packet bursts will be limited to the device
defaults. This field should be used if bursts of packets must be
strictly kept under a certain value.

typical_pkt_size: When the rate limit is intended for a stream of
similar packets, stating the typical packet size can improve the
accuracy of the rate limiter. The expected packet size will be
the same for all SQs associated with the same rate limit index.

Ethernet driver is updated according to this change, but these two
parameters will be kept as 0 due to lacking of proper way to get the
configurations from user space which requires to change
ndo_set_tx_maxrate interface.

Signed-off-by: Bodong Wang <bodong@xxxxxxxxxxxx>
Reviewed-by: Daniel Jurgens <danielj@xxxxxxxxxxxx>
Reviewed-by: Yishai Hadas <yishaih@xxxxxxxxxxxx>
Signed-off-by: Leon Romanovsky <leon@xxxxxxxxxx>
---
 drivers/infiniband/hw/mlx5/mlx5_ib.h              |  2 +-
 drivers/infiniband/hw/mlx5/qp.c                   | 41 ++++++++------
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 19 +++++--
 drivers/net/ethernet/mellanox/mlx5/core/rl.c      | 67 +++++++++++++++--------
 include/linux/mlx5/driver.h                       | 15 ++++-
 include/linux/mlx5/mlx5_ifc.h                     | 12 +++-
 6 files changed, 103 insertions(+), 53 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index e0bad28e0f09..8285de76278c 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -406,7 +406,7 @@ struct mlx5_ib_qp {
 	struct list_head	qps_list;
 	struct list_head	cq_recv_list;
 	struct list_head	cq_send_list;
-	u32			rate_limit;
+	struct mlx5_rate_limit	rl;
 	u32                     underlay_qpn;
 	bool			tunnel_offload_en;
 	/* storage for qp sub type when core qp type is IB_QPT_DRIVER */
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 44a9b84573d8..edad8ac74a70 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -86,7 +86,9 @@ struct mlx5_modify_raw_qp_param {
 	u16 operation;
 
 	u32 set_mask; /* raw_qp_set_mask_map */
-	u32 rate_limit;
+
+	struct mlx5_rate_limit rl;
+
 	u8 rq_q_ctr_id;
 };
 
@@ -2774,8 +2776,9 @@ static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev,
 				   const struct mlx5_modify_raw_qp_param *raw_qp_param)
 {
 	struct mlx5_ib_qp *ibqp = sq->base.container_mibqp;
-	u32 old_rate = ibqp->rate_limit;
-	u32 new_rate = old_rate;
+	struct mlx5_rate_limit old_rl = ibqp->rl;
+	struct mlx5_rate_limit new_rl = old_rl;
+	bool new_rate_added = false;
 	u16 rl_index = 0;
 	void *in;
 	void *sqc;
@@ -2797,39 +2800,41 @@ static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev,
 			pr_warn("%s: Rate limit can only be changed when SQ is moving to RDY\n",
 				__func__);
 		else
-			new_rate = raw_qp_param->rate_limit;
+			new_rl = raw_qp_param->rl;
 	}
 
-	if (old_rate != new_rate) {
-		if (new_rate) {
-			err = mlx5_rl_add_rate(dev, new_rate, &rl_index);
+	if (!mlx5_rl_are_equal(&old_rl, &new_rl)) {
+		if (new_rl.rate) {
+			err = mlx5_rl_add_rate(dev, &rl_index, &new_rl);
 			if (err) {
-				pr_err("Failed configuring rate %u: %d\n",
-				       new_rate, err);
+				pr_err("Failed configuring rate limit(err %d): rate %u, max_burst_sz %u, typical_pkt_sz %u\n",
+				       err, new_rl.rate, new_rl.max_burst_sz,
+				       new_rl.typical_pkt_sz);
+
 				goto out;
 			}
+			new_rate_added = true;
 		}
 
 		MLX5_SET64(modify_sq_in, in, modify_bitmask, 1);
+		/* index 0 means no limit */
 		MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, rl_index);
 	}
 
 	err = mlx5_core_modify_sq(dev, sq->base.mqp.qpn, in, inlen);
 	if (err) {
 		/* Remove new rate from table if failed */
-		if (new_rate &&
-		    old_rate != new_rate)
-			mlx5_rl_remove_rate(dev, new_rate);
+		if (new_rate_added)
+			mlx5_rl_remove_rate(dev, &new_rl);
 		goto out;
 	}
 
 	/* Only remove the old rate after new rate was set */
-	if ((old_rate &&
-	    (old_rate != new_rate)) ||
-	    (new_state != MLX5_SQC_STATE_RDY))
-		mlx5_rl_remove_rate(dev, old_rate);
+	if ((old_rl.rate && !mlx5_rl_are_equal(&old_rl, &new_rl)) ||
+	    new_state != MLX5_SQC_STATE_RDY)
+		mlx5_rl_remove_rate(dev, &old_rl);
 
-	ibqp->rate_limit = new_rate;
+	ibqp->rl = new_rl;
 	sq->state = new_state;
 
 out:
@@ -3144,7 +3149,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
 		}
 
 		if (attr_mask & IB_QP_RATE_LIMIT) {
-			raw_qp_param.rate_limit = attr->rate_limit;
+			raw_qp_param.rl.rate = attr->rate_limit;
 			raw_qp_param.set_mask |= MLX5_RAW_QP_RATE_LIMIT;
 		}
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 47bab842c5ee..2ee4ffbddd5f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1195,10 +1195,13 @@ static void mlx5e_close_txqsq(struct mlx5e_txqsq *sq)
 {
 	struct mlx5e_channel *c = sq->channel;
 	struct mlx5_core_dev *mdev = c->mdev;
+	struct mlx5_rate_limit rl = {0};
 
 	mlx5e_destroy_sq(mdev, sq->sqn);
-	if (sq->rate_limit)
-		mlx5_rl_remove_rate(mdev, sq->rate_limit);
+	if (sq->rate_limit) {
+		rl.rate = sq->rate_limit;
+		mlx5_rl_remove_rate(mdev, &rl);
+	}
 	mlx5e_free_txqsq_descs(sq);
 	mlx5e_free_txqsq(sq);
 }
@@ -1528,6 +1531,7 @@ static int mlx5e_set_sq_maxrate(struct net_device *dev,
 	struct mlx5e_priv *priv = netdev_priv(dev);
 	struct mlx5_core_dev *mdev = priv->mdev;
 	struct mlx5e_modify_sq_param msp = {0};
+	struct mlx5_rate_limit rl = {0};
 	u16 rl_index = 0;
 	int err;
 
@@ -1535,14 +1539,17 @@ static int mlx5e_set_sq_maxrate(struct net_device *dev,
 		/* nothing to do */
 		return 0;
 
-	if (sq->rate_limit)
+	if (sq->rate_limit) {
+		rl.rate = sq->rate_limit;
 		/* remove current rl index to free space to next ones */
-		mlx5_rl_remove_rate(mdev, sq->rate_limit);
+		mlx5_rl_remove_rate(mdev, &rl);
+	}
 
 	sq->rate_limit = 0;
 
 	if (rate) {
-		err = mlx5_rl_add_rate(mdev, rate, &rl_index);
+		rl.rate = rate;
+		err = mlx5_rl_add_rate(mdev, &rl_index, &rl);
 		if (err) {
 			netdev_err(dev, "Failed configuring rate %u: %d\n",
 				   rate, err);
@@ -1560,7 +1567,7 @@ static int mlx5e_set_sq_maxrate(struct net_device *dev,
 			   rate, err);
 		/* remove the rate from the table */
 		if (rate)
-			mlx5_rl_remove_rate(mdev, rate);
+			mlx5_rl_remove_rate(mdev, &rl);
 		return err;
 	}
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
index d3c33e9eea72..02299b9ecae4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/rl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
@@ -107,16 +107,16 @@ int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
  * If the table is full, return NULL
  */
 static struct mlx5_rl_entry *find_rl_entry(struct mlx5_rl_table *table,
-					   u32 rate)
+					   struct mlx5_rate_limit *rl)
 {
 	struct mlx5_rl_entry *ret_entry = NULL;
 	bool empty_found = false;
 	int i;
 
 	for (i = 0; i < table->max_size; i++) {
-		if (table->rl_entry[i].rate == rate)
+		if (mlx5_rl_are_equal(&table->rl_entry[i].rl, rl))
 			return &table->rl_entry[i];
-		if (!empty_found && !table->rl_entry[i].rate) {
+		if (!empty_found && !table->rl_entry[i].rl.rate) {
 			empty_found = true;
 			ret_entry = &table->rl_entry[i];
 		}
@@ -126,7 +126,8 @@ static struct mlx5_rl_entry *find_rl_entry(struct mlx5_rl_table *table,
 }
 
 static int mlx5_set_pp_rate_limit_cmd(struct mlx5_core_dev *dev,
-				   u32 rate, u16 index)
+				      u16 index,
+				      struct mlx5_rate_limit *rl)
 {
 	u32 in[MLX5_ST_SZ_DW(set_pp_rate_limit_in)]   = {0};
 	u32 out[MLX5_ST_SZ_DW(set_pp_rate_limit_out)] = {0};
@@ -134,7 +135,9 @@ static int mlx5_set_pp_rate_limit_cmd(struct mlx5_core_dev *dev,
 	MLX5_SET(set_pp_rate_limit_in, in, opcode,
 		 MLX5_CMD_OP_SET_PP_RATE_LIMIT);
 	MLX5_SET(set_pp_rate_limit_in, in, rate_limit_index, index);
-	MLX5_SET(set_pp_rate_limit_in, in, rate_limit, rate);
+	MLX5_SET(set_pp_rate_limit_in, in, rate_limit, rl->rate);
+	MLX5_SET(set_pp_rate_limit_in, in, burst_upper_bound, rl->max_burst_sz);
+	MLX5_SET(set_pp_rate_limit_in, in, typical_packet_size, rl->typical_pkt_sz);
 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
@@ -146,7 +149,17 @@ bool mlx5_rl_is_in_range(struct mlx5_core_dev *dev, u32 rate)
 }
 EXPORT_SYMBOL(mlx5_rl_is_in_range);
 
-int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u16 *index)
+bool mlx5_rl_are_equal(struct mlx5_rate_limit *rl_0,
+		       struct mlx5_rate_limit *rl_1)
+{
+	return ((rl_0->rate == rl_1->rate) &&
+		(rl_0->max_burst_sz == rl_1->max_burst_sz) &&
+		(rl_0->typical_pkt_sz == rl_1->typical_pkt_sz));
+}
+EXPORT_SYMBOL(mlx5_rl_are_equal);
+
+int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u16 *index,
+		     struct mlx5_rate_limit *rl)
 {
 	struct mlx5_rl_table *table = &dev->priv.rl_table;
 	struct mlx5_rl_entry *entry;
@@ -154,14 +167,15 @@ int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u16 *index)
 
 	mutex_lock(&table->rl_lock);
 
-	if (!rate || !mlx5_rl_is_in_range(dev, rate)) {
-		mlx5_core_err(dev, "Invalid rate: %u, should be %u to %u\n",
-			      rate, table->min_rate, table->max_rate);
+	if (!rl->rate || !mlx5_rl_is_in_range(dev, rl->rate)) {
+		mlx5_core_err(dev,
+			      "Invalid rate: %u, should be %u to %u\n",
+			      rl->rate, table->min_rate, table->max_rate);
 		err = -EINVAL;
 		goto out;
 	}
 
-	entry = find_rl_entry(table, rate);
+	entry = find_rl_entry(table, rl);
 	if (!entry) {
 		mlx5_core_err(dev, "Max number of %u rates reached\n",
 			      table->max_size);
@@ -173,13 +187,15 @@ int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u16 *index)
 		entry->refcount++;
 	} else {
 		/* new rate limit */
-		err = mlx5_set_pp_rate_limit_cmd(dev, rate, entry->index);
+		err = mlx5_set_pp_rate_limit_cmd(dev, entry->index, rl);
 		if (err) {
-			mlx5_core_err(dev, "Failed configuring rate: %u (%d)\n",
-				      rate, err);
+			mlx5_core_err(dev,
+				      "Failed configuring rate limit(err %d): rate %u, max_burst_sz %u, typical_pkt_sz %u\n",
+				      err, rl->rate, rl->max_burst_sz,
+				      rl->typical_pkt_sz);
 			goto out;
 		}
-		entry->rate = rate;
+		entry->rl = *rl;
 		entry->refcount = 1;
 	}
 	*index = entry->index;
@@ -190,27 +206,30 @@ int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u16 *index)
 }
 EXPORT_SYMBOL(mlx5_rl_add_rate);
 
-void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, u32 rate)
+void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, struct mlx5_rate_limit *rl)
 {
 	struct mlx5_rl_table *table = &dev->priv.rl_table;
 	struct mlx5_rl_entry *entry = NULL;
+	struct mlx5_rate_limit reset_rl = {0};
 
 	/* 0 is a reserved value for unlimited rate */
-	if (rate == 0)
+	if (rl->rate == 0)
 		return;
 
 	mutex_lock(&table->rl_lock);
-	entry = find_rl_entry(table, rate);
+	entry = find_rl_entry(table, rl);
 	if (!entry || !entry->refcount) {
-		mlx5_core_warn(dev, "Rate %u is not configured\n", rate);
+		mlx5_core_warn(dev,
+			       "Rate %u, max_burst_sz %u typical_pkt_sz %u are not configured\n",
+			       rl->rate, rl->max_burst_sz, rl->typical_pkt_sz);
 		goto out;
 	}
 
 	entry->refcount--;
 	if (!entry->refcount) {
 		/* need to remove rate */
-		mlx5_set_pp_rate_limit_cmd(dev, 0, entry->index);
-		entry->rate = 0;
+		mlx5_set_pp_rate_limit_cmd(dev, entry->index, &reset_rl);
+		entry->rl = reset_rl;
 	}
 
 out:
@@ -257,13 +276,15 @@ int mlx5_init_rl_table(struct mlx5_core_dev *dev)
 void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev)
 {
 	struct mlx5_rl_table *table = &dev->priv.rl_table;
+	struct mlx5_rate_limit rl = {0};
 	int i;
 
 	/* Clear all configured rates */
 	for (i = 0; i < table->max_size; i++)
-		if (table->rl_entry[i].rate)
-			mlx5_set_pp_rate_limit_cmd(dev, 0,
-						   table->rl_entry[i].index);
+		if (table->rl_entry[i].rl.rate)
+			mlx5_set_pp_rate_limit_cmd(dev,
+						   table->rl_entry[i].index,
+						   &rl);
 
 	kfree(dev->priv.rl_table.rl_entry);
 }
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index cded85ab6fe4..767d193c269a 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -591,8 +591,14 @@ struct mlx5_eswitch;
 struct mlx5_lag;
 struct mlx5_pagefault;
 
+struct mlx5_rate_limit {
+	u32			rate;
+	u32			max_burst_sz;
+	u16			typical_pkt_sz;
+};
+
 struct mlx5_rl_entry {
-	u32                     rate;
+	struct mlx5_rate_limit	rl;
 	u16                     index;
 	u16                     refcount;
 };
@@ -1107,9 +1113,12 @@ int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 token,
 
 int mlx5_init_rl_table(struct mlx5_core_dev *dev);
 void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev);
-int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u16 *index);
-void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, u32 rate);
+int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u16 *index,
+		     struct mlx5_rate_limit *rl);
+void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, struct mlx5_rate_limit *rl);
 bool mlx5_rl_is_in_range(struct mlx5_core_dev *dev, u32 rate);
+bool mlx5_rl_are_equal(struct mlx5_rate_limit *rl_0,
+		       struct mlx5_rate_limit *rl_1);
 int mlx5_alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg,
 		     bool map_wc, bool fast_path);
 void mlx5_free_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg);
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 14ad84afe8ba..c63bbdc35503 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -571,7 +571,10 @@ struct mlx5_ifc_qos_cap_bits {
 	u8         esw_scheduling[0x1];
 	u8         esw_bw_share[0x1];
 	u8         esw_rate_limit[0x1];
-	u8         reserved_at_4[0x1c];
+	u8         reserved_at_4[0x1];
+	u8         packet_pacing_burst_bound[0x1];
+	u8         packet_pacing_typical_size[0x1];
+	u8         reserved_at_7[0x19];
 
 	u8         reserved_at_20[0x20];
 
@@ -7313,7 +7316,12 @@ struct mlx5_ifc_set_pp_rate_limit_in_bits {
 
 	u8         rate_limit[0x20];
 
-	u8         reserved_at_a0[0x160];
+	u8	   burst_upper_bound[0x20];
+
+	u8         reserved_at_c0[0x10];
+	u8	   typical_packet_size[0x10];
+
+	u8         reserved_at_e0[0x120];
 };
 
 struct mlx5_ifc_access_register_out_bits {
-- 
2.14.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux