在 2024/7/15 5:40, Shradha Gupta 写道:
Currently the values of WQs for RX and TX queues for MANA devices
are hardcoded to default sizes.
Allow configuring these values for MANA devices as ringparam
configuration(get/set) through ethtool_ops.
Signed-off-by: Shradha Gupta <shradhagupta@xxxxxxxxxxxxxxxxxxx>
Reviewed-by: Haiyang Zhang <haiyangz@xxxxxxxxxxxxx>
Reviewed-by: Long Li <longli@xxxxxxxxxxxxx>
---
drivers/net/ethernet/microsoft/mana/mana_en.c | 20 +++--
.../ethernet/microsoft/mana/mana_ethtool.c | 79 +++++++++++++++++++
include/net/mana/mana.h | 21 ++++-
3 files changed, 109 insertions(+), 11 deletions(-)
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 91f10910ea44..31355a95e891 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -618,7 +618,7 @@ static int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu)
dev = mpc->ac->gdma_dev->gdma_context->dev;
- num_rxb = mpc->num_queues * RX_BUFFERS_PER_QUEUE;
+ num_rxb = mpc->num_queues * mpc->rx_queue_size;
WARN(mpc->rxbufs_pre, "mana rxbufs_pre exists\n");
mpc->rxbufs_pre = kmalloc_array(num_rxb, sizeof(void *), GFP_KERNEL);
@@ -1899,14 +1899,15 @@ static int mana_create_txq(struct mana_port_context *apc,
return -ENOMEM;
/* The minimum size of the WQE is 32 bytes, hence
- * MAX_SEND_BUFFERS_PER_QUEUE represents the maximum number of WQEs
+ * apc->tx_queue_size represents the maximum number of WQEs
* the SQ can store. This value is then used to size other queues
* to prevent overflow.
+ * Also note that the txq_size is always going to be MANA_PAGE_ALIGNED,
+ * as tx_queue_size is always a power of 2.
*/
- txq_size = MAX_SEND_BUFFERS_PER_QUEUE * 32;
- BUILD_BUG_ON(!MANA_PAGE_ALIGNED(txq_size));
+ txq_size = apc->tx_queue_size * 32;
- cq_size = MAX_SEND_BUFFERS_PER_QUEUE * COMP_ENTRY_SIZE;
+ cq_size = apc->tx_queue_size * COMP_ENTRY_SIZE;
cq_size = MANA_PAGE_ALIGN(cq_size);
gc = gd->gdma_context;
@@ -2145,10 +2146,11 @@ static int mana_push_wqe(struct mana_rxq *rxq)
static int mana_create_page_pool(struct mana_rxq *rxq, struct gdma_context *gc)
{
+ struct mana_port_context *mpc = netdev_priv(rxq->ndev);
struct page_pool_params pprm = {};
int ret;
- pprm.pool_size = RX_BUFFERS_PER_QUEUE;
+ pprm.pool_size = mpc->rx_queue_size;
pprm.nid = gc->numa_node;
pprm.napi = &rxq->rx_cq.napi;
pprm.netdev = rxq->ndev;
@@ -2180,13 +2182,13 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
gc = gd->gdma_context;
- rxq = kzalloc(struct_size(rxq, rx_oobs, RX_BUFFERS_PER_QUEUE),
+ rxq = kzalloc(struct_size(rxq, rx_oobs, apc->rx_queue_size),
GFP_KERNEL);
if (!rxq)
return NULL;
rxq->ndev = ndev;
- rxq->num_rx_buf = RX_BUFFERS_PER_QUEUE;
+ rxq->num_rx_buf = apc->rx_queue_size;
rxq->rxq_idx = rxq_idx;
rxq->rxobj = INVALID_MANA_HANDLE;
@@ -2734,6 +2736,8 @@ static int mana_probe_port(struct mana_context *ac, int port_idx,
apc->ndev = ndev;
apc->max_queues = gc->max_num_queues;
apc->num_queues = gc->max_num_queues;
+ apc->tx_queue_size = DEF_TX_BUFFERS_PER_QUEUE;
+ apc->rx_queue_size = DEF_RX_BUFFERS_PER_QUEUE;
apc->port_handle = INVALID_MANA_HANDLE;
apc->pf_filter_handle = INVALID_MANA_HANDLE;
apc->port_idx = port_idx;
diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
index 146d5db1792f..7a4752dda7b8 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
@@ -369,6 +369,83 @@ static int mana_set_channels(struct net_device *ndev,
return err;
}
+static void mana_get_ringparam(struct net_device *ndev,
+ struct ethtool_ringparam *ring,
+ struct kernel_ethtool_ringparam *kernel_ring,
+ struct netlink_ext_ack *extack)
+{
+ struct mana_port_context *apc = netdev_priv(ndev);
+
+ ring->rx_pending = apc->rx_queue_size;
+ ring->tx_pending = apc->tx_queue_size;
+ ring->rx_max_pending = MAX_RX_BUFFERS_PER_QUEUE;
+ ring->tx_max_pending = MAX_TX_BUFFERS_PER_QUEUE;
+}
+
+static int mana_set_ringparam(struct net_device *ndev,
+ struct ethtool_ringparam *ring,
+ struct kernel_ethtool_ringparam *kernel_ring,
+ struct netlink_ext_ack *extack)
+{
+ struct mana_port_context *apc = netdev_priv(ndev);
+ u32 new_tx, new_rx;
+ u32 old_tx, old_rx;
+ int err1, err2;
+
+ if (ring->rx_jumbo_pending) {
+ netdev_err(ndev, "%s: rx_jumbo_pending not supported\n", __func__);
I noticed that "rx_jumbo_pending not supported".
#define ENOTSUP 134 /* Not supported */
So can we use the error -ENOTSUP?
+ return -EINVAL;
+ }
+ if (ring->rx_mini_pending) {
+ netdev_err(ndev, "%s: rx_mini_pending not supported\n", __func__);
ditto. " rx_mini_pending not supported"
Thanks,
Zhu Yanjun
+ return -EINVAL;
+ }
+
+ if (!apc)
+ return -EINVAL;
+
+ old_tx = apc->tx_queue_size;
+ old_rx = apc->rx_queue_size;
+ new_tx = clamp_t(u32, ring->tx_pending, MIN_TX_BUFFERS_PER_QUEUE, MAX_TX_BUFFERS_PER_QUEUE);
+ new_rx = clamp_t(u32, ring->rx_pending, MIN_RX_BUFFERS_PER_QUEUE, MAX_RX_BUFFERS_PER_QUEUE);
+
+ if (new_tx == old_tx && new_rx == old_rx)
+ return 0;
+
+ if (!is_power_of_2(new_tx)) {
+ netdev_err(ndev, "%s:Tx:%d not supported\n", __func__, new_tx);
+ return -EINVAL;
+ }
+
+ if (!is_power_of_2(new_rx)) {
+ netdev_err(ndev, "%s:Rx:%d not supported\n", __func__, new_rx);
+ return -EINVAL;
+ }
+
+ err1 = mana_detach(ndev, false);
+ if (err1) {
+ netdev_err(ndev, "mana_detach failed: %d\n", err1);
+ return err1;
+ }
+
+ apc->tx_queue_size = new_tx;
+ apc->rx_queue_size = new_rx;
+ err1 = mana_attach(ndev);
+ if (!err1)
+ return 0;
+
+ netdev_err(ndev, "mana_attach failed: %d\n", err1);
+
+ /* Try rolling back to the older values */
+ apc->tx_queue_size = old_tx;
+ apc->rx_queue_size = old_rx;
+ err2 = mana_attach(ndev);
+ if (err2)
+ netdev_err(ndev, "mana_reattach failed: %d\n", err2);
+
+ return err1;
+}
+
const struct ethtool_ops mana_ethtool_ops = {
.get_ethtool_stats = mana_get_ethtool_stats,
.get_sset_count = mana_get_sset_count,
@@ -380,4 +457,6 @@ const struct ethtool_ops mana_ethtool_ops = {
.set_rxfh = mana_set_rxfh,
.get_channels = mana_get_channels,
.set_channels = mana_set_channels,
+ .get_ringparam = mana_get_ringparam,
+ .set_ringparam = mana_set_ringparam,
};
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index e39b8676fe54..3e27ca5155aa 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -38,9 +38,21 @@ enum TRI_STATE {
#define COMP_ENTRY_SIZE 64
-#define RX_BUFFERS_PER_QUEUE 512
+/* This Max value for RX buffers is derived from __alloc_page()'s max page
+ * allocation calculation. It allows maximum 2^(MAX_ORDER -1) pages. RX buffer
+ * size beyond this value gets rejected by __alloc_page() call.
+ */
+#define MAX_RX_BUFFERS_PER_QUEUE 8192
+#define DEF_RX_BUFFERS_PER_QUEUE 512
+#define MIN_RX_BUFFERS_PER_QUEUE 128
-#define MAX_SEND_BUFFERS_PER_QUEUE 256
+/* This max value for TX buffers is dervied as the maximum allocatable
+ * pages supported on host per guest through testing. TX buffer size beyond
+ * this value is rejected by the hardware.
+ */
+#define MAX_TX_BUFFERS_PER_QUEUE 16384
+#define DEF_TX_BUFFERS_PER_QUEUE 256
+#define MIN_TX_BUFFERS_PER_QUEUE 128
#define EQ_SIZE (8 * MANA_PAGE_SIZE)
@@ -285,7 +297,7 @@ struct mana_recv_buf_oob {
void *buf_va;
bool from_pool; /* allocated from a page pool */
- /* SGL of the buffer going to be sent has part of the work request. */
+ /* SGL of the buffer going to be sent as part of the work request. */
u32 num_sge;
struct gdma_sge sgl[MAX_RX_WQE_SGL_ENTRIES];
@@ -437,6 +449,9 @@ struct mana_port_context {
unsigned int max_queues;
unsigned int num_queues;
+ unsigned int rx_queue_size;
+ unsigned int tx_queue_size;
+
mana_handle_t port_handle;
mana_handle_t pf_filter_handle;