On Thu, Aug 19, 2021 at 3:22 PM Eli Cohen <elic@xxxxxxxxxx> wrote: > > On Thu, Aug 19, 2021 at 03:12:14PM +0800, Jason Wang wrote: > > > > 在 2021/8/19 下午2:06, Eli Cohen 写道: > > > On Thu, Aug 19, 2021 at 12:04:10PM +0800, Jason Wang wrote: > > > > 在 2021/8/17 下午2:02, Eli Cohen 写道: > > > > > Add support to handle control virtqueue configurations per virtio > > > > > specification. The control virtqueue is implemented in software and no > > > > > hardware offloading is involved. > > > > > > > > > > Control VQ configuration need task context, therefore all configurations > > > > > are handled in a workqueue created for the purpose. > > > > > > > > I think all the current callers are already in the the task context (the > > > > caller of virtnet_send_command()). > > > > > > > > Any reason for using workqueue here? > > > > > > > I am running code that might sleep and the call has, IIRC, irqs disabled. The > > > kernel complained about this. > > > > > > I see. > > > > > > > > > > > I'm not sure if it can work well on UP where the workqueue might not have a > > > > chance to be scheduled (we are doing busy waiting here): > > > > > > > > /* Spin for a response, the kick causes an ioport write, trapping > > > > * into the hypervisor, so the request should be handled > > > > immediately. > > > > */ > > > > while (!virtqueue_get_buf(vi->cvq, &tmp) && > > > > !virtqueue_is_broken(vi->cvq)) > > > > cpu_relax(); > > > > > > > I think we have two different cases here: > > > 1. When the net device is running in a VM. In this case we do not have > > > any issue since the loop is running at the guest kernel and the work is > > > done at the host kernel and would end at a finite time. > > > > > > 2. When we're using virtio_vdpa and the device is on the host we have an > > > issue if we're using UP processor. Maybe we should avoid supporting CVQ > > > in this case? > > > > > > Maybe we can switch to use interrupt and sleep here, will have a look. > > > > Won't it hurt latency? I'm not sure, but usually we don't care about the performance of control vq. Thanks > > > > > > > > > > > > > Modifications are made to the memory registration code to allow for > > > > > saving a copy of itolb to be used by the control VQ to access the vring. > > > > > > > > > > The max number of data virtqueus supported by the driver has been > > > > > updated to 2 since multiqueue is not supported at this stage and we need > > > > > to ensure consistency of VQ indices mapping to either data or control > > > > > VQ. > > > > > > > > > > Signed-off-by: Eli Cohen <elic@xxxxxxxxxx> > > > > > --- > > > > > drivers/vdpa/mlx5/core/mlx5_vdpa.h | 23 +++ > > > > > drivers/vdpa/mlx5/core/mr.c | 81 +++++++--- > > > > > drivers/vdpa/mlx5/core/resources.c | 31 ++++ > > > > > drivers/vdpa/mlx5/net/mlx5_vnet.c | 231 +++++++++++++++++++++++++++-- > > > > > 4 files changed, 334 insertions(+), 32 deletions(-) > > > > > > > > > > diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h > > > > > index 41b20855ed31..6c43476a69cb 100644 > > > > > --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h > > > > > +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h > > > > > @@ -5,6 +5,7 @@ > > > > > #define __MLX5_VDPA_H__ > > > > > #include <linux/etherdevice.h> > > > > > +#include <linux/vringh.h> > > > > > #include <linux/vdpa.h> > > > > > #include <linux/mlx5/driver.h> > > > > > @@ -47,6 +48,26 @@ struct mlx5_vdpa_resources { > > > > > bool valid; > > > > > }; > > > > > +struct mlx5_control_vq { > > > > > + struct vhost_iotlb *iotlb; > > > > > + /* spinlock to synchronize iommu table */ > > > > > + spinlock_t iommu_lock; > > > > > + struct vringh vring; > > > > > + bool ready; > > > > > + u64 desc_addr; > > > > > + u64 device_addr; > > > > > + u64 driver_addr; > > > > > + struct vdpa_callback event_cb; > > > > > + struct vringh_kiov riov; > > > > > + struct vringh_kiov wiov; > > > > > + unsigned short head; > > > > > +}; > > > > > + > > > > > +struct mlx5_ctrl_wq_ent { > > > > > + struct work_struct work; > > > > > + struct mlx5_vdpa_dev *mvdev; > > > > > +}; > > > > > + > > > > > struct mlx5_vdpa_dev { > > > > > struct vdpa_device vdev; > > > > > struct mlx5_core_dev *mdev; > > > > > @@ -60,6 +81,8 @@ struct mlx5_vdpa_dev { > > > > > u32 generation; > > > > > struct mlx5_vdpa_mr mr; > > > > > + struct mlx5_control_vq cvq; > > > > > + struct workqueue_struct *wq; > > > > > }; > > > > > int mlx5_vdpa_alloc_pd(struct mlx5_vdpa_dev *dev, u32 *pdn, u16 uid); > > > > > diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c > > > > > index e59135fa867e..da013b8082bc 100644 > > > > > --- a/drivers/vdpa/mlx5/core/mr.c > > > > > +++ b/drivers/vdpa/mlx5/core/mr.c > > > > > @@ -1,6 +1,7 @@ > > > > > // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB > > > > > /* Copyright (c) 2020 Mellanox Technologies Ltd. */ > > > > > +#include <linux/vhost_types.h> > > > > > #include <linux/vdpa.h> > > > > > #include <linux/gcd.h> > > > > > #include <linux/string.h> > > > > > @@ -451,33 +452,30 @@ static void destroy_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) > > > > > mlx5_vdpa_destroy_mkey(mvdev, &mr->mkey); > > > > > } > > > > > -static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb) > > > > > +static int dup_iotlb(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *src) > > > > > { > > > > > - struct mlx5_vdpa_mr *mr = &mvdev->mr; > > > > > + struct vhost_iotlb_map *map; > > > > > + u64 start = 0ULL, last = 0ULL - 1; > > > > > int err; > > > > > - if (mr->initialized) > > > > > - return 0; > > > > > - > > > > > - if (iotlb) > > > > > - err = create_user_mr(mvdev, iotlb); > > > > > - else > > > > > - err = create_dma_mr(mvdev, mr); > > > > > - > > > > > - if (!err) > > > > > - mr->initialized = true; > > > > > + if (!src) { > > > > > + err = vhost_iotlb_add_range(mvdev->cvq.iotlb, start, last, start, VHOST_ACCESS_RW); > > > > > + return err; > > > > > + } > > > > > - return err; > > > > > + for (map = vhost_iotlb_itree_first(src, start, last); map; > > > > > + map = vhost_iotlb_itree_next(map, start, last)) { > > > > > + err = vhost_iotlb_add_range(mvdev->cvq.iotlb, map->start, map->last, > > > > > + map->addr, map->perm); > > > > > + if (err) > > > > > + return err; > > > > > + } > > > > > + return 0; > > > > > } > > > > > -int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb) > > > > > +static void prune_iotlb(struct mlx5_vdpa_dev *mvdev) > > > > > { > > > > > - int err; > > > > > - > > > > > - mutex_lock(&mvdev->mr.mkey_mtx); > > > > > - err = _mlx5_vdpa_create_mr(mvdev, iotlb); > > > > > - mutex_unlock(&mvdev->mr.mkey_mtx); > > > > > - return err; > > > > > + vhost_iotlb_del_range(mvdev->cvq.iotlb, 0ULL, 0ULL - 1); > > > > > > > > It's better to use ULLONG_MAX. > > > Will change. > > > > > > > > > > > > } > > > > > static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) > > > > > @@ -501,6 +499,7 @@ void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev) > > > > > if (!mr->initialized) > > > > > goto out; > > > > > + prune_iotlb(mvdev); > > > > > if (mr->user_mr) > > > > > destroy_user_mr(mvdev, mr); > > > > > else > > > > > @@ -512,6 +511,48 @@ void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev) > > > > > mutex_unlock(&mr->mkey_mtx); > > > > > } > > > > > +static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb) > > > > > +{ > > > > > + struct mlx5_vdpa_mr *mr = &mvdev->mr; > > > > > + int err; > > > > > + > > > > > + if (mr->initialized) > > > > > + return 0; > > > > > + > > > > > + if (iotlb) > > > > > + err = create_user_mr(mvdev, iotlb); > > > > > + else > > > > > + err = create_dma_mr(mvdev, mr); > > > > > + > > > > > + if (err) > > > > > + return err; > > > > > + > > > > > + err = dup_iotlb(mvdev, iotlb); > > > > > + if (err) > > > > > + goto out_err; > > > > > + > > > > > + mr->initialized = true; > > > > > + return 0; > > > > > + > > > > > +out_err: > > > > > + if (iotlb) > > > > > + destroy_user_mr(mvdev, mr); > > > > > + else > > > > > + destroy_dma_mr(mvdev, mr); > > > > > + > > > > > + return err; > > > > > +} > > > > > + > > > > > +int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb) > > > > > +{ > > > > > + int err; > > > > > + > > > > > + mutex_lock(&mvdev->mr.mkey_mtx); > > > > > + err = _mlx5_vdpa_create_mr(mvdev, iotlb); > > > > > + mutex_unlock(&mvdev->mr.mkey_mtx); > > > > > + return err; > > > > > +} > > > > > + > > > > > int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, > > > > > bool *change_map) > > > > > { > > > > > diff --git a/drivers/vdpa/mlx5/core/resources.c b/drivers/vdpa/mlx5/core/resources.c > > > > > index d4606213f88a..d24ae1a85159 100644 > > > > > --- a/drivers/vdpa/mlx5/core/resources.c > > > > > +++ b/drivers/vdpa/mlx5/core/resources.c > > > > > @@ -1,6 +1,7 @@ > > > > > // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB > > > > > /* Copyright (c) 2020 Mellanox Technologies Ltd. */ > > > > > +#include <linux/iova.h> > > > > > #include <linux/mlx5/driver.h> > > > > > #include "mlx5_vdpa.h" > > > > > @@ -221,6 +222,28 @@ int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *m > > > > > return mlx5_cmd_exec_in(mvdev->mdev, destroy_mkey, in); > > > > > } > > > > > +static int init_ctrl_vq(struct mlx5_vdpa_dev *mvdev) > > > > > +{ > > > > > + int err; > > > > > + > > > > > + mvdev->cvq.iotlb = vhost_iotlb_alloc(0, 0); > > > > > + if (!mvdev->cvq.iotlb) > > > > > + return -ENOMEM; > > > > > + > > > > > + vringh_set_iotlb(&mvdev->cvq.vring, mvdev->cvq.iotlb, &mvdev->cvq.iommu_lock); > > > > > + err = iova_cache_get(); > > > > > > > > Any reason for using iova cache here? > > > Isn't it required? Aren't we allocating buffers for the CVQ from > > > iommu_iova kmem cache? > > > > > > I may miss something here but which buffer did you refer here? > > > > Aren't the data buffers for the control VQ allocated from this cache? > > > > > > > > > > > > > > > + if (err) > > > > > + vhost_iotlb_free(mvdev->cvq.iotlb); > > > > > + > > > > > + return err; > > > > > +} > > > > > + > > > > > +static void cleanup_ctrl_vq(struct mlx5_vdpa_dev *mvdev) > > > > > +{ > > > > > + iova_cache_put(); > > > > > + vhost_iotlb_free(mvdev->cvq.iotlb); > > > > > +} > > > > > + > > > > > int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev) > > > > > { > > > > > u64 offset = MLX5_CAP64_DEV_VDPA_EMULATION(mvdev->mdev, doorbell_bar_offset); > > > > > @@ -260,10 +283,17 @@ int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev) > > > > > err = -ENOMEM; > > > > > goto err_key; > > > > > } > > > > > + > > > > > + err = init_ctrl_vq(mvdev); > > > > > + if (err) > > > > > + goto err_ctrl; > > > > > + > > > > > res->valid = true; > > > > > return 0; > > > > > +err_ctrl: > > > > > + iounmap(res->kick_addr); > > > > > err_key: > > > > > dealloc_pd(mvdev, res->pdn, res->uid); > > > > > err_pd: > > > > > @@ -282,6 +312,7 @@ void mlx5_vdpa_free_resources(struct mlx5_vdpa_dev *mvdev) > > > > > if (!res->valid) > > > > > return; > > > > > + cleanup_ctrl_vq(mvdev); > > > > > iounmap(res->kick_addr); > > > > > res->kick_addr = NULL; > > > > > dealloc_pd(mvdev, res->pdn, res->uid); > > > > > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c > > > > > index 0fe7cd370e4b..e18665781135 100644 > > > > > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c > > > > > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c > > > > > @@ -133,7 +133,7 @@ struct mlx5_vdpa_virtqueue { > > > > > /* We will remove this limitation once mlx5_vdpa_alloc_resources() > > > > > * provides for driver space allocation > > > > > */ > > > > > -#define MLX5_MAX_SUPPORTED_VQS 16 > > > > > +#define MLX5_MAX_SUPPORTED_VQS 2 > > > > > static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx) > > > > > { > > > > > @@ -160,6 +160,7 @@ struct mlx5_vdpa_net { > > > > > struct mlx5_flow_handle *rx_rule; > > > > > bool setup; > > > > > u16 mtu; > > > > > + u32 cur_num_vqs; > > > > > }; > > > > > static void free_resources(struct mlx5_vdpa_net *ndev); > > > > > @@ -169,6 +170,8 @@ static void teardown_driver(struct mlx5_vdpa_net *ndev); > > > > > static bool mlx5_vdpa_debug; > > > > > +#define MLX5_CVQ_MAX_ENT 16 > > > > > + > > > > > #define MLX5_LOG_VIO_FLAG(_feature) \ > > > > > do { \ > > > > > if (features & BIT_ULL(_feature)) \ > > > > > @@ -186,6 +189,16 @@ static inline u32 mlx5_vdpa_max_qps(int max_vqs) > > > > > return max_vqs / 2; > > > > > } > > > > > +static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev) > > > > > +{ > > > > > + return 2 * mlx5_vdpa_max_qps(mvdev->max_vqs); > > > > > +} > > > > > + > > > > > +static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx) > > > > > +{ > > > > > + return idx == ctrl_vq_idx(mvdev); > > > > > +} > > > > > + > > > > > static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set) > > > > > { > > > > > if (status & ~VALID_STATUS_MASK) > > > > > @@ -1359,15 +1372,132 @@ static void remove_fwd_to_tir(struct mlx5_vdpa_net *ndev) > > > > > ndev->rx_rule = NULL; > > > > > } > > > > > +virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd) > > > > > +{ > > > > > + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); > > > > > + struct mlx5_control_vq *cvq = &mvdev->cvq; > > > > > + virtio_net_ctrl_ack status = VIRTIO_NET_ERR; > > > > > + struct mlx5_core_dev *pfmdev; > > > > > + size_t read; > > > > > + u8 mac[ETH_ALEN]; > > > > > + > > > > > + pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev)); > > > > > + switch (cmd) { > > > > > + case VIRTIO_NET_CTRL_MAC_ADDR_SET: > > > > > + read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN); > > > > > + if (read != ETH_ALEN) > > > > > + break; > > > > > + > > > > > + if (!memcmp(ndev->config.mac, mac, 6)) { > > > > > + status = VIRTIO_NET_OK; > > > > > + break; > > > > > + } > > > > > + > > > > > + if (!is_zero_ether_addr(ndev->config.mac)) { > > > > > + if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) { > > > > > + mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n", > > > > > + ndev->config.mac); > > > > > + break; > > > > > + } > > > > > + } > > > > > + > > > > > + if (mlx5_mpfs_add_mac(pfmdev, mac)) { > > > > > + mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n", > > > > > + mac); > > > > > + break; > > > > > + } > > > > > + > > > > > + memcpy(ndev->config.mac, mac, ETH_ALEN); > > > > > + status = VIRTIO_NET_OK; > > > > > + break; > > > > > + > > > > > + default: > > > > > + break; > > > > > + } > > > > > + > > > > > + return status; > > > > > +} > > > > > + > > > > > +static void mlx5_cvq_kick_handler(struct work_struct *work) > > > > > +{ > > > > > + virtio_net_ctrl_ack status = VIRTIO_NET_ERR; > > > > > + struct virtio_net_ctrl_hdr ctrl; > > > > > + struct mlx5_ctrl_wq_ent *wqent; > > > > > + struct mlx5_vdpa_dev *mvdev; > > > > > + struct mlx5_control_vq *cvq; > > > > > + struct mlx5_vdpa_net *ndev; > > > > > + size_t read, write; > > > > > + int err; > > > > > + > > > > > + wqent = container_of(work, struct mlx5_ctrl_wq_ent, work); > > > > > + mvdev = wqent->mvdev; > > > > > + ndev = to_mlx5_vdpa_ndev(mvdev); > > > > > + cvq = &mvdev->cvq; > > > > > + if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) > > > > > + goto out; > > > > > + > > > > > + if (!cvq->ready) > > > > > + goto out; > > > > > + > > > > > + while (true) { > > > > > + err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head, > > > > > + GFP_ATOMIC); > > > > > + if (err <= 0) > > > > > + break; > > > > > + > > > > > + read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl)); > > > > > + if (read != sizeof(ctrl)) > > > > > + break; > > > > > + > > > > > + switch (ctrl.class) { > > > > > + case VIRTIO_NET_CTRL_MAC: > > > > > + status = handle_ctrl_mac(mvdev, ctrl.cmd); > > > > > + break; > > > > > + > > > > > + default: > > > > > + break; > > > > > + } > > > > > + > > > > > + /* Make sure data is written before advancing index */ > > > > > + smp_wmb(); > > > > > + > > > > > + write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status)); > > > > > + vringh_complete_iotlb(&cvq->vring, cvq->head, write); > > > > > + vringh_kiov_cleanup(&cvq->riov); > > > > > + vringh_kiov_cleanup(&cvq->wiov); > > > > > + > > > > > + if (vringh_need_notify_iotlb(&cvq->vring)) > > > > > + vringh_notify(&cvq->vring); > > > > > + } > > > > > +out: > > > > > + kfree(wqent); > > > > > +} > > > > > + > > > > > static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx) > > > > > { > > > > > struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); > > > > > struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); > > > > > - struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; > > > > > + struct mlx5_vdpa_virtqueue *mvq; > > > > > + struct mlx5_ctrl_wq_ent *wqent; > > > > > if (!is_index_valid(mvdev, idx)) > > > > > return; > > > > > + if (unlikely(is_ctrl_vq_idx(mvdev, idx))) { > > > > > + if (!mvdev->cvq.ready) > > > > > + return; > > > > > + > > > > > + wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC); > > > > > + if (!wqent) > > > > > + return; > > > > > + > > > > > + wqent->mvdev = mvdev; > > > > > + INIT_WORK(&wqent->work, mlx5_cvq_kick_handler); > > > > > + queue_work(mvdev->wq, &wqent->work); > > > > > + return; > > > > > + } > > > > > + > > > > > + mvq = &ndev->vqs[idx]; > > > > > if (unlikely(!mvq->ready)) > > > > > return; > > > > > @@ -1379,11 +1509,19 @@ static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_ > > > > > { > > > > > struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); > > > > > struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); > > > > > - struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; > > > > > + struct mlx5_vdpa_virtqueue *mvq; > > > > > if (!is_index_valid(mvdev, idx)) > > > > > return -EINVAL; > > > > > + if (is_ctrl_vq_idx(mvdev, idx)) { > > > > > + mvdev->cvq.desc_addr = desc_area; > > > > > + mvdev->cvq.device_addr = device_area; > > > > > + mvdev->cvq.driver_addr = driver_area; > > > > > + return 0; > > > > > + } > > > > > + > > > > > + mvq = &ndev->vqs[idx]; > > > > > mvq->desc_addr = desc_area; > > > > > mvq->device_addr = device_area; > > > > > mvq->driver_addr = driver_area; > > > > > @@ -1396,7 +1534,7 @@ static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num) > > > > > struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); > > > > > struct mlx5_vdpa_virtqueue *mvq; > > > > > - if (!is_index_valid(mvdev, idx)) > > > > > + if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx)) > > > > > return; > > > > > mvq = &ndev->vqs[idx]; > > > > > @@ -1411,15 +1549,42 @@ static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_c > > > > > ndev->event_cbs[idx] = *cb; > > > > > } > > > > > +static void mlx5_cvq_notify(struct vringh *vring) > > > > > +{ > > > > > + struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring); > > > > > + > > > > > + if (!cvq->event_cb.callback) > > > > > + return; > > > > > + > > > > > + cvq->event_cb.callback(cvq->event_cb.private); > > > > > +} > > > > > + > > > > > +static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready) > > > > > +{ > > > > > + struct mlx5_control_vq *cvq = &mvdev->cvq; > > > > > + > > > > > + cvq->ready = ready; > > > > > + if (!ready) > > > > > + return; > > > > > + > > > > > + cvq->vring.notify = mlx5_cvq_notify; > > > > > +} > > > > > + > > > > > static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready) > > > > > { > > > > > struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); > > > > > struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); > > > > > - struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; > > > > > + struct mlx5_vdpa_virtqueue *mvq; > > > > > if (!is_index_valid(mvdev, idx)) > > > > > return; > > > > > + if (is_ctrl_vq_idx(mvdev, idx)) { > > > > > + set_cvq_ready(mvdev, ready); > > > > > + return; > > > > > + } > > > > > + > > > > > + mvq = &ndev->vqs[idx]; > > > > > if (!ready) > > > > > suspend_vq(ndev, mvq); > > > > > @@ -1430,12 +1595,14 @@ static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx) > > > > > { > > > > > struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); > > > > > struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); > > > > > - struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; > > > > > if (!is_index_valid(mvdev, idx)) > > > > > return false; > > > > > - return mvq->ready; > > > > > + if (is_ctrl_vq_idx(mvdev, idx)) > > > > > + return mvdev->cvq.ready; > > > > > + > > > > > + return ndev->vqs[idx].ready; > > > > > } > > > > > static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx, > > > > > @@ -1443,11 +1610,17 @@ static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx, > > > > > { > > > > > struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); > > > > > struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); > > > > > - struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; > > > > > + struct mlx5_vdpa_virtqueue *mvq; > > > > > if (!is_index_valid(mvdev, idx)) > > > > > return -EINVAL; > > > > > + if (is_ctrl_vq_idx(mvdev, idx)) { > > > > > + mvdev->cvq.vring.last_avail_idx = state->split.avail_index; > > > > > > > > Question, is packed virtqueue supported by current mlx5e? > > > > > > > > If no, this is fine. > > > We don't. The hardware might support but the device driver does not > > > advertise packed virtqueue support. > > > > > > Good to know this. So we're fine. > > > > Thanks > > > > > > > > > > > If yes, we should disable packed and re-enable it after vringh supports > > > > packed virtqueue. > > > > > > > > Other looks good. > > > > > > > > Thanks > > > > > > > > > > > > > + return 0; > > > > > + } > > > > > + > > > > > + mvq = &ndev->vqs[idx]; > > > > > if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) { > > > > > mlx5_vdpa_warn(mvdev, "can't modify available index\n"); > > > > > return -EINVAL; > > > > > @@ -1462,13 +1635,19 @@ static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa > > > > > { > > > > > struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); > > > > > struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); > > > > > - struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; > > > > > + struct mlx5_vdpa_virtqueue *mvq; > > > > > struct mlx5_virtq_attr attr; > > > > > int err; > > > > > if (!is_index_valid(mvdev, idx)) > > > > > return -EINVAL; > > > > > + if (is_ctrl_vq_idx(mvdev, idx)) { > > > > > + state->split.avail_index = mvdev->cvq.vring.last_avail_idx; > > > > > + return 0; > > > > > + } > > > > > + > > > > > + mvq = &ndev->vqs[idx]; > > > > > /* If the virtq object was destroyed, use the value saved at > > > > > * the last minute of suspend_vq. This caters for userspace > > > > > * that cares about emulating the index after vq is stopped. > > > > > @@ -1525,10 +1704,13 @@ static u64 mlx5_vdpa_get_features(struct vdpa_device *vdev) > > > > > u16 dev_features; > > > > > dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, device_features_bits_mask); > > > > > - ndev->mvdev.mlx_features = mlx_to_vritio_features(dev_features); > > > > > + ndev->mvdev.mlx_features |= mlx_to_vritio_features(dev_features); > > > > > if (MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, virtio_version_1_0)) > > > > > ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_VERSION_1); > > > > > ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM); > > > > > + ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ); > > > > > + ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR); > > > > > + > > > > > print_features(mvdev, ndev->mvdev.mlx_features, false); > > > > > return ndev->mvdev.mlx_features; > > > > > } > > > > > @@ -1544,6 +1726,7 @@ static int verify_min_features(struct mlx5_vdpa_dev *mvdev, u64 features) > > > > > static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev) > > > > > { > > > > > struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); > > > > > + struct mlx5_control_vq *cvq = &mvdev->cvq; > > > > > int err; > > > > > int i; > > > > > @@ -1553,6 +1736,16 @@ static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev) > > > > > goto err_vq; > > > > > } > > > > > + if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) { > > > > > + err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features, > > > > > + MLX5_CVQ_MAX_ENT, false, > > > > > + (struct vring_desc *)(uintptr_t)cvq->desc_addr, > > > > > + (struct vring_avail *)(uintptr_t)cvq->driver_addr, > > > > > + (struct vring_used *)(uintptr_t)cvq->device_addr); > > > > > + if (err) > > > > > + goto err_vq; > > > > > + } > > > > > + > > > > > return 0; > > > > > err_vq: > > > > > @@ -1937,7 +2130,7 @@ static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device > > > > > struct mlx5_vdpa_net *ndev; > > > > > phys_addr_t addr; > > > > > - if (!is_index_valid(mvdev, idx)) > > > > > + if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx)) > > > > > return ret; > > > > > /* If SF BAR size is smaller than PAGE_SIZE, do not use direct > > > > > @@ -2114,8 +2307,11 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name) > > > > > err = mlx5_mpfs_add_mac(pfmdev, config->mac); > > > > > if (err) > > > > > goto err_mtu; > > > > > + > > > > > + ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MAC); > > > > > } > > > > > + config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, mlx5_vdpa_max_qps(max_vqs)); > > > > > mvdev->vdev.dma_dev = &mdev->pdev->dev; > > > > > err = mlx5_vdpa_alloc_resources(&ndev->mvdev); > > > > > if (err) > > > > > @@ -2131,8 +2327,15 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name) > > > > > if (err) > > > > > goto err_mr; > > > > > + mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_ctrl_wq"); > > > > > + if (!mvdev->wq) { > > > > > + err = -ENOMEM; > > > > > + goto err_res2; > > > > > + } > > > > > + > > > > > + ndev->cur_num_vqs = 2 * mlx5_vdpa_max_qps(max_vqs); > > > > > mvdev->vdev.mdev = &mgtdev->mgtdev; > > > > > - err = _vdpa_register_device(&mvdev->vdev, 2 * mlx5_vdpa_max_qps(max_vqs)); > > > > > + err = _vdpa_register_device(&mvdev->vdev, ndev->cur_num_vqs + 1); > > > > > if (err) > > > > > goto err_reg; > > > > > @@ -2140,6 +2343,8 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name) > > > > > return 0; > > > > > err_reg: > > > > > + destroy_workqueue(mvdev->wq); > > > > > +err_res2: > > > > > free_resources(ndev); > > > > > err_mr: > > > > > mlx5_vdpa_destroy_mr(mvdev); > > > > > @@ -2157,7 +2362,9 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name) > > > > > static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev) > > > > > { > > > > > struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev); > > > > > + struct mlx5_vdpa_dev *mvdev = to_mvdev(dev); > > > > > + destroy_workqueue(mvdev->wq); > > > > > _vdpa_unregister_device(dev); > > > > > mgtdev->ndev = NULL; > > > > > } > > > _______________________________________________ Virtualization mailing list Virtualization@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linuxfoundation.org/mailman/listinfo/virtualization