Re: [PATCH v2 5/6] vdpa/mlx5: Add support for control VQ and MAC setting

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




在 2021/8/19 下午2:06, Eli Cohen 写道:
On Thu, Aug 19, 2021 at 12:04:10PM +0800, Jason Wang wrote:
在 2021/8/17 下午2:02, Eli Cohen 写道:
Add support to handle control virtqueue configurations per virtio
specification. The control virtqueue is implemented in software and no
hardware offloading is involved.

Control VQ configuration need task context, therefore all configurations
are handled in a workqueue created for the purpose.

I think all the current callers are already in the the task context (the
caller of virtnet_send_command()).

Any reason for using workqueue here?

I am running code that might sleep and the call has, IIRC, irqs disabled. The
kernel complained about this.


I see.



I'm not sure if it can work well on UP where the workqueue might not have a
chance to be scheduled (we are doing busy waiting here):

         /* Spin for a response, the kick causes an ioport write, trapping
          * into the hypervisor, so the request should be handled
immediately.
          */
         while (!virtqueue_get_buf(vi->cvq, &tmp) &&
                !virtqueue_is_broken(vi->cvq))
                 cpu_relax();

I think we have two different cases here:
1. When the net device is running in a VM. In this case we do not have
any issue since the loop is running at the guest kernel and the work is
done at the host kernel and would end at a finite time.

2. When we're using virtio_vdpa and the device is on the host we have an
issue if we're using UP processor. Maybe we should avoid supporting CVQ
in this case?


Maybe we can switch to use interrupt and sleep here, will have a look.




Modifications are made to the memory registration code to allow for
saving a copy of itolb to be used by the control VQ to access the vring.

The max number of data virtqueus supported by the driver has been
updated to 2 since multiqueue is not supported at this stage and we need
to ensure consistency of VQ indices mapping to either data or control
VQ.

Signed-off-by: Eli Cohen <elic@xxxxxxxxxx>
---
   drivers/vdpa/mlx5/core/mlx5_vdpa.h |  23 +++
   drivers/vdpa/mlx5/core/mr.c        |  81 +++++++---
   drivers/vdpa/mlx5/core/resources.c |  31 ++++
   drivers/vdpa/mlx5/net/mlx5_vnet.c  | 231 +++++++++++++++++++++++++++--
   4 files changed, 334 insertions(+), 32 deletions(-)

diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
index 41b20855ed31..6c43476a69cb 100644
--- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
+++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
@@ -5,6 +5,7 @@
   #define __MLX5_VDPA_H__
   #include <linux/etherdevice.h>
+#include <linux/vringh.h>
   #include <linux/vdpa.h>
   #include <linux/mlx5/driver.h>
@@ -47,6 +48,26 @@ struct mlx5_vdpa_resources {
   	bool valid;
   };
+struct mlx5_control_vq {
+	struct vhost_iotlb *iotlb;
+	/* spinlock to synchronize iommu table */
+	spinlock_t iommu_lock;
+	struct vringh vring;
+	bool ready;
+	u64 desc_addr;
+	u64 device_addr;
+	u64 driver_addr;
+	struct vdpa_callback event_cb;
+	struct vringh_kiov riov;
+	struct vringh_kiov wiov;
+	unsigned short head;
+};
+
+struct mlx5_ctrl_wq_ent {
+	struct work_struct work;
+	struct mlx5_vdpa_dev *mvdev;
+};
+
   struct mlx5_vdpa_dev {
   	struct vdpa_device vdev;
   	struct mlx5_core_dev *mdev;
@@ -60,6 +81,8 @@ struct mlx5_vdpa_dev {
   	u32 generation;
   	struct mlx5_vdpa_mr mr;
+	struct mlx5_control_vq cvq;
+	struct workqueue_struct *wq;
   };
   int mlx5_vdpa_alloc_pd(struct mlx5_vdpa_dev *dev, u32 *pdn, u16 uid);
diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c
index e59135fa867e..da013b8082bc 100644
--- a/drivers/vdpa/mlx5/core/mr.c
+++ b/drivers/vdpa/mlx5/core/mr.c
@@ -1,6 +1,7 @@
   // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
   /* Copyright (c) 2020 Mellanox Technologies Ltd. */
+#include <linux/vhost_types.h>
   #include <linux/vdpa.h>
   #include <linux/gcd.h>
   #include <linux/string.h>
@@ -451,33 +452,30 @@ static void destroy_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
   	mlx5_vdpa_destroy_mkey(mvdev, &mr->mkey);
   }
-static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
+static int dup_iotlb(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *src)
   {
-	struct mlx5_vdpa_mr *mr = &mvdev->mr;
+	struct vhost_iotlb_map *map;
+	u64 start = 0ULL, last = 0ULL - 1;
   	int err;
-	if (mr->initialized)
-		return 0;
-
-	if (iotlb)
-		err = create_user_mr(mvdev, iotlb);
-	else
-		err = create_dma_mr(mvdev, mr);
-
-	if (!err)
-		mr->initialized = true;
+	if (!src) {
+		err = vhost_iotlb_add_range(mvdev->cvq.iotlb, start, last, start, VHOST_ACCESS_RW);
+		return err;
+	}
-	return err;
+	for (map = vhost_iotlb_itree_first(src, start, last); map;
+		map = vhost_iotlb_itree_next(map, start, last)) {
+		err = vhost_iotlb_add_range(mvdev->cvq.iotlb, map->start, map->last,
+					    map->addr, map->perm);
+		if (err)
+			return err;
+	}
+	return 0;
   }
-int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
+static void prune_iotlb(struct mlx5_vdpa_dev *mvdev)
   {
-	int err;
-
-	mutex_lock(&mvdev->mr.mkey_mtx);
-	err = _mlx5_vdpa_create_mr(mvdev, iotlb);
-	mutex_unlock(&mvdev->mr.mkey_mtx);
-	return err;
+	vhost_iotlb_del_range(mvdev->cvq.iotlb, 0ULL, 0ULL - 1);

It's better to use ULLONG_MAX.
Will change.


   }
   static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
@@ -501,6 +499,7 @@ void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
   	if (!mr->initialized)
   		goto out;
+	prune_iotlb(mvdev);
   	if (mr->user_mr)
   		destroy_user_mr(mvdev, mr);
   	else
@@ -512,6 +511,48 @@ void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
   	mutex_unlock(&mr->mkey_mtx);
   }
+static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
+{
+	struct mlx5_vdpa_mr *mr = &mvdev->mr;
+	int err;
+
+	if (mr->initialized)
+		return 0;
+
+	if (iotlb)
+		err = create_user_mr(mvdev, iotlb);
+	else
+		err = create_dma_mr(mvdev, mr);
+
+	if (err)
+		return err;
+
+	err = dup_iotlb(mvdev, iotlb);
+	if (err)
+		goto out_err;
+
+	mr->initialized = true;
+	return 0;
+
+out_err:
+	if (iotlb)
+		destroy_user_mr(mvdev, mr);
+	else
+		destroy_dma_mr(mvdev, mr);
+
+	return err;
+}
+
+int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
+{
+	int err;
+
+	mutex_lock(&mvdev->mr.mkey_mtx);
+	err = _mlx5_vdpa_create_mr(mvdev, iotlb);
+	mutex_unlock(&mvdev->mr.mkey_mtx);
+	return err;
+}
+
   int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
   			     bool *change_map)
   {
diff --git a/drivers/vdpa/mlx5/core/resources.c b/drivers/vdpa/mlx5/core/resources.c
index d4606213f88a..d24ae1a85159 100644
--- a/drivers/vdpa/mlx5/core/resources.c
+++ b/drivers/vdpa/mlx5/core/resources.c
@@ -1,6 +1,7 @@
   // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
   /* Copyright (c) 2020 Mellanox Technologies Ltd. */
+#include <linux/iova.h>
   #include <linux/mlx5/driver.h>
   #include "mlx5_vdpa.h"
@@ -221,6 +222,28 @@ int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *m
   	return mlx5_cmd_exec_in(mvdev->mdev, destroy_mkey, in);
   }
+static int init_ctrl_vq(struct mlx5_vdpa_dev *mvdev)
+{
+	int err;
+
+	mvdev->cvq.iotlb = vhost_iotlb_alloc(0, 0);
+	if (!mvdev->cvq.iotlb)
+		return -ENOMEM;
+
+	vringh_set_iotlb(&mvdev->cvq.vring, mvdev->cvq.iotlb, &mvdev->cvq.iommu_lock);
+	err = iova_cache_get();

Any reason for using iova cache here?
Isn't it required? Aren't we allocating buffers for the CVQ from
iommu_iova kmem cache?


I may miss something here but which buffer did you refer here?




+	if (err)
+		vhost_iotlb_free(mvdev->cvq.iotlb);
+
+	return err;
+}
+
+static void cleanup_ctrl_vq(struct mlx5_vdpa_dev *mvdev)
+{
+	iova_cache_put();
+	vhost_iotlb_free(mvdev->cvq.iotlb);
+}
+
   int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev)
   {
   	u64 offset = MLX5_CAP64_DEV_VDPA_EMULATION(mvdev->mdev, doorbell_bar_offset);
@@ -260,10 +283,17 @@ int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev)
   		err = -ENOMEM;
   		goto err_key;
   	}
+
+	err = init_ctrl_vq(mvdev);
+	if (err)
+		goto err_ctrl;
+
   	res->valid = true;
   	return 0;
+err_ctrl:
+	iounmap(res->kick_addr);
   err_key:
   	dealloc_pd(mvdev, res->pdn, res->uid);
   err_pd:
@@ -282,6 +312,7 @@ void mlx5_vdpa_free_resources(struct mlx5_vdpa_dev *mvdev)
   	if (!res->valid)
   		return;
+	cleanup_ctrl_vq(mvdev);
   	iounmap(res->kick_addr);
   	res->kick_addr = NULL;
   	dealloc_pd(mvdev, res->pdn, res->uid);
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index 0fe7cd370e4b..e18665781135 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -133,7 +133,7 @@ struct mlx5_vdpa_virtqueue {
   /* We will remove this limitation once mlx5_vdpa_alloc_resources()
    * provides for driver space allocation
    */
-#define MLX5_MAX_SUPPORTED_VQS 16
+#define MLX5_MAX_SUPPORTED_VQS 2
   static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
   {
@@ -160,6 +160,7 @@ struct mlx5_vdpa_net {
   	struct mlx5_flow_handle *rx_rule;
   	bool setup;
   	u16 mtu;
+	u32 cur_num_vqs;
   };
   static void free_resources(struct mlx5_vdpa_net *ndev);
@@ -169,6 +170,8 @@ static void teardown_driver(struct mlx5_vdpa_net *ndev);
   static bool mlx5_vdpa_debug;
+#define MLX5_CVQ_MAX_ENT 16
+
   #define MLX5_LOG_VIO_FLAG(_feature)                                                                \
   	do {                                                                                       \
   		if (features & BIT_ULL(_feature))                                                  \
@@ -186,6 +189,16 @@ static inline u32 mlx5_vdpa_max_qps(int max_vqs)
   	return max_vqs / 2;
   }
+static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev)
+{
+	return 2 * mlx5_vdpa_max_qps(mvdev->max_vqs);
+}
+
+static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx)
+{
+	return idx == ctrl_vq_idx(mvdev);
+}
+
   static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
   {
   	if (status & ~VALID_STATUS_MASK)
@@ -1359,15 +1372,132 @@ static void remove_fwd_to_tir(struct mlx5_vdpa_net *ndev)
   	ndev->rx_rule = NULL;
   }
+virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
+{
+	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
+	struct mlx5_control_vq *cvq = &mvdev->cvq;
+	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
+	struct mlx5_core_dev *pfmdev;
+	size_t read;
+	u8 mac[ETH_ALEN];
+
+	pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
+	switch (cmd) {
+	case VIRTIO_NET_CTRL_MAC_ADDR_SET:
+		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN);
+		if (read != ETH_ALEN)
+			break;
+
+		if (!memcmp(ndev->config.mac, mac, 6)) {
+			status = VIRTIO_NET_OK;
+			break;
+		}
+
+		if (!is_zero_ether_addr(ndev->config.mac)) {
+			if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
+				mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n",
+					       ndev->config.mac);
+				break;
+			}
+		}
+
+		if (mlx5_mpfs_add_mac(pfmdev, mac)) {
+			mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n",
+				       mac);
+			break;
+		}
+
+		memcpy(ndev->config.mac, mac, ETH_ALEN);
+		status = VIRTIO_NET_OK;
+		break;
+
+	default:
+		break;
+	}
+
+	return status;
+}
+
+static void mlx5_cvq_kick_handler(struct work_struct *work)
+{
+	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
+	struct virtio_net_ctrl_hdr ctrl;
+	struct mlx5_ctrl_wq_ent *wqent;
+	struct mlx5_vdpa_dev *mvdev;
+	struct mlx5_control_vq *cvq;
+	struct mlx5_vdpa_net *ndev;
+	size_t read, write;
+	int err;
+
+	wqent = container_of(work, struct mlx5_ctrl_wq_ent, work);
+	mvdev = wqent->mvdev;
+	ndev = to_mlx5_vdpa_ndev(mvdev);
+	cvq = &mvdev->cvq;
+	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
+		goto out;
+
+	if (!cvq->ready)
+		goto out;
+
+	while (true) {
+		err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head,
+					   GFP_ATOMIC);
+		if (err <= 0)
+			break;
+
+		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl));
+		if (read != sizeof(ctrl))
+			break;
+
+		switch (ctrl.class) {
+		case VIRTIO_NET_CTRL_MAC:
+			status = handle_ctrl_mac(mvdev, ctrl.cmd);
+			break;
+
+		default:
+			break;
+		}
+
+		/* Make sure data is written before advancing index */
+		smp_wmb();
+
+		write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status));
+		vringh_complete_iotlb(&cvq->vring, cvq->head, write);
+		vringh_kiov_cleanup(&cvq->riov);
+		vringh_kiov_cleanup(&cvq->wiov);
+
+		if (vringh_need_notify_iotlb(&cvq->vring))
+			vringh_notify(&cvq->vring);
+	}
+out:
+	kfree(wqent);
+}
+
   static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
   {
   	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
   	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
-	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
+	struct mlx5_vdpa_virtqueue *mvq;
+	struct mlx5_ctrl_wq_ent *wqent;
   	if (!is_index_valid(mvdev, idx))
   		return;
+	if (unlikely(is_ctrl_vq_idx(mvdev, idx))) {
+		if (!mvdev->cvq.ready)
+			return;
+
+		wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
+		if (!wqent)
+			return;
+
+		wqent->mvdev = mvdev;
+		INIT_WORK(&wqent->work, mlx5_cvq_kick_handler);
+		queue_work(mvdev->wq, &wqent->work);
+		return;
+	}
+
+	mvq = &ndev->vqs[idx];
   	if (unlikely(!mvq->ready))
   		return;
@@ -1379,11 +1509,19 @@ static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_
   {
   	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
   	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
-	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
+	struct mlx5_vdpa_virtqueue *mvq;
   	if (!is_index_valid(mvdev, idx))
   		return -EINVAL;
+	if (is_ctrl_vq_idx(mvdev, idx)) {
+		mvdev->cvq.desc_addr = desc_area;
+		mvdev->cvq.device_addr = device_area;
+		mvdev->cvq.driver_addr = driver_area;
+		return 0;
+	}
+
+	mvq = &ndev->vqs[idx];
   	mvq->desc_addr = desc_area;
   	mvq->device_addr = device_area;
   	mvq->driver_addr = driver_area;
@@ -1396,7 +1534,7 @@ static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
   	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
   	struct mlx5_vdpa_virtqueue *mvq;
-	if (!is_index_valid(mvdev, idx))
+	if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
   		return;
   	mvq = &ndev->vqs[idx];
@@ -1411,15 +1549,42 @@ static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_c
   	ndev->event_cbs[idx] = *cb;
   }
+static void mlx5_cvq_notify(struct vringh *vring)
+{
+	struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring);
+
+	if (!cvq->event_cb.callback)
+		return;
+
+	cvq->event_cb.callback(cvq->event_cb.private);
+}
+
+static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready)
+{
+	struct mlx5_control_vq *cvq = &mvdev->cvq;
+
+	cvq->ready = ready;
+	if (!ready)
+		return;
+
+	cvq->vring.notify = mlx5_cvq_notify;
+}
+
   static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
   {
   	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
   	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
-	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
+	struct mlx5_vdpa_virtqueue *mvq;
   	if (!is_index_valid(mvdev, idx))
   		return;
+	if (is_ctrl_vq_idx(mvdev, idx)) {
+		set_cvq_ready(mvdev, ready);
+		return;
+	}
+
+	mvq = &ndev->vqs[idx];
   	if (!ready)
   		suspend_vq(ndev, mvq);
@@ -1430,12 +1595,14 @@ static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx)
   {
   	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
   	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
-	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
   	if (!is_index_valid(mvdev, idx))
   		return false;
-	return mvq->ready;
+	if (is_ctrl_vq_idx(mvdev, idx))
+		return mvdev->cvq.ready;
+
+	return ndev->vqs[idx].ready;
   }
   static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
@@ -1443,11 +1610,17 @@ static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
   {
   	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
   	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
-	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
+	struct mlx5_vdpa_virtqueue *mvq;
   	if (!is_index_valid(mvdev, idx))
   		return -EINVAL;
+	if (is_ctrl_vq_idx(mvdev, idx)) {
+		mvdev->cvq.vring.last_avail_idx = state->split.avail_index;

Question, is packed virtqueue supported by current mlx5e?

If no, this is fine.
We don't. The hardware might support but the device driver does not
advertise packed virtqueue support.


Good to know this. So we're fine.

Thanks



If yes, we should disable packed and re-enable it after vringh supports
packed virtqueue.

Other looks good.

Thanks


+		return 0;
+	}
+
+	mvq = &ndev->vqs[idx];
   	if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
   		mlx5_vdpa_warn(mvdev, "can't modify available index\n");
   		return -EINVAL;
@@ -1462,13 +1635,19 @@ static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa
   {
   	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
   	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
-	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
+	struct mlx5_vdpa_virtqueue *mvq;
   	struct mlx5_virtq_attr attr;
   	int err;
   	if (!is_index_valid(mvdev, idx))
   		return -EINVAL;
+	if (is_ctrl_vq_idx(mvdev, idx)) {
+		state->split.avail_index = mvdev->cvq.vring.last_avail_idx;
+		return 0;
+	}
+
+	mvq = &ndev->vqs[idx];
   	/* If the virtq object was destroyed, use the value saved at
   	 * the last minute of suspend_vq. This caters for userspace
   	 * that cares about emulating the index after vq is stopped.
@@ -1525,10 +1704,13 @@ static u64 mlx5_vdpa_get_features(struct vdpa_device *vdev)
   	u16 dev_features;
   	dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, device_features_bits_mask);
-	ndev->mvdev.mlx_features = mlx_to_vritio_features(dev_features);
+	ndev->mvdev.mlx_features |= mlx_to_vritio_features(dev_features);
   	if (MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, virtio_version_1_0))
   		ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_VERSION_1);
   	ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
+	ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
+	ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
+
   	print_features(mvdev, ndev->mvdev.mlx_features, false);
   	return ndev->mvdev.mlx_features;
   }
@@ -1544,6 +1726,7 @@ static int verify_min_features(struct mlx5_vdpa_dev *mvdev, u64 features)
   static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
   {
   	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
+	struct mlx5_control_vq *cvq = &mvdev->cvq;
   	int err;
   	int i;
@@ -1553,6 +1736,16 @@ static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
   			goto err_vq;
   	}
+	if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) {
+		err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
+					MLX5_CVQ_MAX_ENT, false,
+					(struct vring_desc *)(uintptr_t)cvq->desc_addr,
+					(struct vring_avail *)(uintptr_t)cvq->driver_addr,
+					(struct vring_used *)(uintptr_t)cvq->device_addr);
+		if (err)
+			goto err_vq;
+	}
+
   	return 0;
   err_vq:
@@ -1937,7 +2130,7 @@ static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device
   	struct mlx5_vdpa_net *ndev;
   	phys_addr_t addr;
-	if (!is_index_valid(mvdev, idx))
+	if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
   		return ret;
   	/* If SF BAR size is smaller than PAGE_SIZE, do not use direct
@@ -2114,8 +2307,11 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
   		err = mlx5_mpfs_add_mac(pfmdev, config->mac);
   		if (err)
   			goto err_mtu;
+
+		ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MAC);
   	}
+	config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, mlx5_vdpa_max_qps(max_vqs));
   	mvdev->vdev.dma_dev = &mdev->pdev->dev;
   	err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
   	if (err)
@@ -2131,8 +2327,15 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
   	if (err)
   		goto err_mr;
+	mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_ctrl_wq");
+	if (!mvdev->wq) {
+		err = -ENOMEM;
+		goto err_res2;
+	}
+
+	ndev->cur_num_vqs = 2 * mlx5_vdpa_max_qps(max_vqs);
   	mvdev->vdev.mdev = &mgtdev->mgtdev;
-	err = _vdpa_register_device(&mvdev->vdev, 2 * mlx5_vdpa_max_qps(max_vqs));
+	err = _vdpa_register_device(&mvdev->vdev, ndev->cur_num_vqs + 1);
   	if (err)
   		goto err_reg;
@@ -2140,6 +2343,8 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
   	return 0;
   err_reg:
+	destroy_workqueue(mvdev->wq);
+err_res2:
   	free_resources(ndev);
   err_mr:
   	mlx5_vdpa_destroy_mr(mvdev);
@@ -2157,7 +2362,9 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
   static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev)
   {
   	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
+	struct mlx5_vdpa_dev *mvdev = to_mvdev(dev);
+	destroy_workqueue(mvdev->wq);
   	_vdpa_unregister_device(dev);
   	mgtdev->ndev = NULL;
   }

_______________________________________________
Virtualization mailing list
Virtualization@xxxxxxxxxxxxxxxxxxxxxxxxxx
https://lists.linuxfoundation.org/mailman/listinfo/virtualization




[Index of Archives]     [KVM Development]     [Libvirt Development]     [Libvirt Users]     [CentOS Virtualization]     [Netdev]     [Ethernet Bridging]     [Linux Wireless]     [Kernel Newbies]     [Security]     [Linux for Hams]     [Netfilter]     [Bugtraq]     [Yosemite Forum]     [MIPS Linux]     [ARM Linux]     [Linux RAID]     [Linux Admin]     [Samba]

  Powered by Linux