On Mon, Jul 15, 2024 at 06:42:16AM +0200, Zhu Yanjun wrote: > ??? 2024/7/15 5:40, Shradha Gupta ??????: > >Currently the values of WQs for RX and TX queues for MANA devices > >are hardcoded to default sizes. > >Allow configuring these values for MANA devices as ringparam > >configuration(get/set) through ethtool_ops. > > > >Signed-off-by: Shradha Gupta <shradhagupta@xxxxxxxxxxxxxxxxxxx> > >Reviewed-by: Haiyang Zhang <haiyangz@xxxxxxxxxxxxx> > >Reviewed-by: Long Li <longli@xxxxxxxxxxxxx> > >--- > > drivers/net/ethernet/microsoft/mana/mana_en.c | 20 +++-- > > .../ethernet/microsoft/mana/mana_ethtool.c | 79 +++++++++++++++++++ > > include/net/mana/mana.h | 21 ++++- > > 3 files changed, 109 insertions(+), 11 deletions(-) > > > >diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c > >index 91f10910ea44..31355a95e891 100644 > >--- a/drivers/net/ethernet/microsoft/mana/mana_en.c > >+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c > >@@ -618,7 +618,7 @@ static int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu) > > dev = mpc->ac->gdma_dev->gdma_context->dev; > >- num_rxb = mpc->num_queues * RX_BUFFERS_PER_QUEUE; > >+ num_rxb = mpc->num_queues * mpc->rx_queue_size; > > WARN(mpc->rxbufs_pre, "mana rxbufs_pre exists\n"); > > mpc->rxbufs_pre = kmalloc_array(num_rxb, sizeof(void *), GFP_KERNEL); > >@@ -1899,14 +1899,15 @@ static int mana_create_txq(struct mana_port_context *apc, > > return -ENOMEM; > > /* The minimum size of the WQE is 32 bytes, hence > >- * MAX_SEND_BUFFERS_PER_QUEUE represents the maximum number of WQEs > >+ * apc->tx_queue_size represents the maximum number of WQEs > > * the SQ can store. This value is then used to size other queues > > * to prevent overflow. > >+ * Also note that the txq_size is always going to be MANA_PAGE_ALIGNED, > >+ * as tx_queue_size is always a power of 2. > > */ > >- txq_size = MAX_SEND_BUFFERS_PER_QUEUE * 32; > >- BUILD_BUG_ON(!MANA_PAGE_ALIGNED(txq_size)); > >+ txq_size = apc->tx_queue_size * 32; > >- cq_size = MAX_SEND_BUFFERS_PER_QUEUE * COMP_ENTRY_SIZE; > >+ cq_size = apc->tx_queue_size * COMP_ENTRY_SIZE; > > cq_size = MANA_PAGE_ALIGN(cq_size); > > gc = gd->gdma_context; > >@@ -2145,10 +2146,11 @@ static int mana_push_wqe(struct mana_rxq *rxq) > > static int mana_create_page_pool(struct mana_rxq *rxq, struct gdma_context *gc) > > { > >+ struct mana_port_context *mpc = netdev_priv(rxq->ndev); > > struct page_pool_params pprm = {}; > > int ret; > >- pprm.pool_size = RX_BUFFERS_PER_QUEUE; > >+ pprm.pool_size = mpc->rx_queue_size; > > pprm.nid = gc->numa_node; > > pprm.napi = &rxq->rx_cq.napi; > > pprm.netdev = rxq->ndev; > >@@ -2180,13 +2182,13 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc, > > gc = gd->gdma_context; > >- rxq = kzalloc(struct_size(rxq, rx_oobs, RX_BUFFERS_PER_QUEUE), > >+ rxq = kzalloc(struct_size(rxq, rx_oobs, apc->rx_queue_size), > > GFP_KERNEL); > > if (!rxq) > > return NULL; > > rxq->ndev = ndev; > >- rxq->num_rx_buf = RX_BUFFERS_PER_QUEUE; > >+ rxq->num_rx_buf = apc->rx_queue_size; > > rxq->rxq_idx = rxq_idx; > > rxq->rxobj = INVALID_MANA_HANDLE; > >@@ -2734,6 +2736,8 @@ static int mana_probe_port(struct mana_context *ac, int port_idx, > > apc->ndev = ndev; > > apc->max_queues = gc->max_num_queues; > > apc->num_queues = gc->max_num_queues; > >+ apc->tx_queue_size = DEF_TX_BUFFERS_PER_QUEUE; > >+ apc->rx_queue_size = DEF_RX_BUFFERS_PER_QUEUE; > > apc->port_handle = INVALID_MANA_HANDLE; > > apc->pf_filter_handle = INVALID_MANA_HANDLE; > > apc->port_idx = port_idx; > >diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c > >index 146d5db1792f..7a4752dda7b8 100644 > >--- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c > >+++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c > >@@ -369,6 +369,83 @@ static int mana_set_channels(struct net_device *ndev, > > return err; > > } > >+static void mana_get_ringparam(struct net_device *ndev, > >+ struct ethtool_ringparam *ring, > >+ struct kernel_ethtool_ringparam *kernel_ring, > >+ struct netlink_ext_ack *extack) > >+{ > >+ struct mana_port_context *apc = netdev_priv(ndev); > >+ > >+ ring->rx_pending = apc->rx_queue_size; > >+ ring->tx_pending = apc->tx_queue_size; > >+ ring->rx_max_pending = MAX_RX_BUFFERS_PER_QUEUE; > >+ ring->tx_max_pending = MAX_TX_BUFFERS_PER_QUEUE; > >+} > >+ > >+static int mana_set_ringparam(struct net_device *ndev, > >+ struct ethtool_ringparam *ring, > >+ struct kernel_ethtool_ringparam *kernel_ring, > >+ struct netlink_ext_ack *extack) > >+{ > >+ struct mana_port_context *apc = netdev_priv(ndev); > >+ u32 new_tx, new_rx; > >+ u32 old_tx, old_rx; > >+ int err1, err2; > >+ > >+ if (ring->rx_jumbo_pending) { > >+ netdev_err(ndev, "%s: rx_jumbo_pending not supported\n", __func__); > > I noticed that "rx_jumbo_pending not supported". > > #define ENOTSUP 134 /* Not supported */ > > So can we use the error -ENOTSUP? > > >+ return -EINVAL; > >+ } > >+ if (ring->rx_mini_pending) { > >+ netdev_err(ndev, "%s: rx_mini_pending not supported\n", __func__); > > ditto. " rx_mini_pending not supported" > > Thanks, > Zhu Yanjun Thanks for the pointers Zhu Yanjun, I'll take care of these in the next version. > > >+ return -EINVAL; > >+ } > >+ > >+ if (!apc) > >+ return -EINVAL; > >+ > >+ old_tx = apc->tx_queue_size; > >+ old_rx = apc->rx_queue_size; > >+ new_tx = clamp_t(u32, ring->tx_pending, MIN_TX_BUFFERS_PER_QUEUE, MAX_TX_BUFFERS_PER_QUEUE); > >+ new_rx = clamp_t(u32, ring->rx_pending, MIN_RX_BUFFERS_PER_QUEUE, MAX_RX_BUFFERS_PER_QUEUE); > >+ > >+ if (new_tx == old_tx && new_rx == old_rx) > >+ return 0; > >+ > >+ if (!is_power_of_2(new_tx)) { > >+ netdev_err(ndev, "%s:Tx:%d not supported\n", __func__, new_tx); > >+ return -EINVAL; > >+ } > >+ > >+ if (!is_power_of_2(new_rx)) { > >+ netdev_err(ndev, "%s:Rx:%d not supported\n", __func__, new_rx); > >+ return -EINVAL; > >+ } > >+ > >+ err1 = mana_detach(ndev, false); > >+ if (err1) { > >+ netdev_err(ndev, "mana_detach failed: %d\n", err1); > >+ return err1; > >+ } > >+ > >+ apc->tx_queue_size = new_tx; > >+ apc->rx_queue_size = new_rx; > >+ err1 = mana_attach(ndev); > >+ if (!err1) > >+ return 0; > >+ > >+ netdev_err(ndev, "mana_attach failed: %d\n", err1); > >+ > >+ /* Try rolling back to the older values */ > >+ apc->tx_queue_size = old_tx; > >+ apc->rx_queue_size = old_rx; > >+ err2 = mana_attach(ndev); > >+ if (err2) > >+ netdev_err(ndev, "mana_reattach failed: %d\n", err2); > >+ > >+ return err1; > >+} > >+ > > const struct ethtool_ops mana_ethtool_ops = { > > .get_ethtool_stats = mana_get_ethtool_stats, > > .get_sset_count = mana_get_sset_count, > >@@ -380,4 +457,6 @@ const struct ethtool_ops mana_ethtool_ops = { > > .set_rxfh = mana_set_rxfh, > > .get_channels = mana_get_channels, > > .set_channels = mana_set_channels, > >+ .get_ringparam = mana_get_ringparam, > >+ .set_ringparam = mana_set_ringparam, > > }; > >diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h > >index e39b8676fe54..3e27ca5155aa 100644 > >--- a/include/net/mana/mana.h > >+++ b/include/net/mana/mana.h > >@@ -38,9 +38,21 @@ enum TRI_STATE { > > #define COMP_ENTRY_SIZE 64 > >-#define RX_BUFFERS_PER_QUEUE 512 > >+/* This Max value for RX buffers is derived from __alloc_page()'s max page > >+ * allocation calculation. It allows maximum 2^(MAX_ORDER -1) pages. RX buffer > >+ * size beyond this value gets rejected by __alloc_page() call. > >+ */ > >+#define MAX_RX_BUFFERS_PER_QUEUE 8192 > >+#define DEF_RX_BUFFERS_PER_QUEUE 512 > >+#define MIN_RX_BUFFERS_PER_QUEUE 128 > >-#define MAX_SEND_BUFFERS_PER_QUEUE 256 > >+/* This max value for TX buffers is dervied as the maximum allocatable > >+ * pages supported on host per guest through testing. TX buffer size beyond > >+ * this value is rejected by the hardware. > >+ */ > >+#define MAX_TX_BUFFERS_PER_QUEUE 16384 > >+#define DEF_TX_BUFFERS_PER_QUEUE 256 > >+#define MIN_TX_BUFFERS_PER_QUEUE 128 > > #define EQ_SIZE (8 * MANA_PAGE_SIZE) > >@@ -285,7 +297,7 @@ struct mana_recv_buf_oob { > > void *buf_va; > > bool from_pool; /* allocated from a page pool */ > >- /* SGL of the buffer going to be sent has part of the work request. */ > >+ /* SGL of the buffer going to be sent as part of the work request. */ > > u32 num_sge; > > struct gdma_sge sgl[MAX_RX_WQE_SGL_ENTRIES]; > >@@ -437,6 +449,9 @@ struct mana_port_context { > > unsigned int max_queues; > > unsigned int num_queues; > >+ unsigned int rx_queue_size; > >+ unsigned int tx_queue_size; > >+ > > mana_handle_t port_handle; > > mana_handle_t pf_filter_handle;