Re: [PATCH v2 5/6] vdpa/mlx5: Add support for control VQ and MAC setting

Jason Wang <jasowang@xxxxxxxxxx> · Thu, 19 Aug 2021 15:37:40 +0800

On Thu, Aug 19, 2021 at 3:22 PM Eli Cohen <elic@xxxxxxxxxx> wrote:
>
> On Thu, Aug 19, 2021 at 03:12:14PM +0800, Jason Wang wrote:
> >
> > 在 2021/8/19 下午2:06, Eli Cohen 写道:
> > > On Thu, Aug 19, 2021 at 12:04:10PM +0800, Jason Wang wrote:
> > > > 在 2021/8/17 下午2:02, Eli Cohen 写道:
> > > > > Add support to handle control virtqueue configurations per virtio
> > > > > specification. The control virtqueue is implemented in software and no
> > > > > hardware offloading is involved.
> > > > >
> > > > > Control VQ configuration need task context, therefore all configurations
> > > > > are handled in a workqueue created for the purpose.
> > > >
> > > > I think all the current callers are already in the the task context (the
> > > > caller of virtnet_send_command()).
> > > >
> > > > Any reason for using workqueue here?
> > > >
> > > I am running code that might sleep and the call has, IIRC, irqs disabled. The
> > > kernel complained about this.
> >
> >
> > I see.
> >
> >
> > >
> > > > I'm not sure if it can work well on UP where the workqueue might not have a
> > > > chance to be scheduled (we are doing busy waiting here):
> > > >
> > > >          /* Spin for a response, the kick causes an ioport write, trapping
> > > >           * into the hypervisor, so the request should be handled
> > > > immediately.
> > > >           */
> > > >          while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> > > >                 !virtqueue_is_broken(vi->cvq))
> > > >                  cpu_relax();
> > > >
> > > I think we have two different cases here:
> > > 1. When the net device is running in a VM. In this case we do not have
> > > any issue since the loop is running at the guest kernel and the work is
> > > done at the host kernel and would end at a finite time.
> > >
> > > 2. When we're using virtio_vdpa and the device is on the host we have an
> > > issue if we're using UP processor. Maybe we should avoid supporting CVQ
> > > in this case?
> >
> >
> > Maybe we can switch to use interrupt and sleep here, will have a look.
> >
>
> Won't it hurt latency?

I'm not sure, but usually we don't care about the performance of control vq.

Thanks

>
> >
> >
> > >
> > > > > Modifications are made to the memory registration code to allow for
> > > > > saving a copy of itolb to be used by the control VQ to access the vring.
> > > > >
> > > > > The max number of data virtqueus supported by the driver has been
> > > > > updated to 2 since multiqueue is not supported at this stage and we need
> > > > > to ensure consistency of VQ indices mapping to either data or control
> > > > > VQ.
> > > > >
> > > > > Signed-off-by: Eli Cohen <elic@xxxxxxxxxx>
> > > > > ---
> > > > >    drivers/vdpa/mlx5/core/mlx5_vdpa.h |  23 +++
> > > > >    drivers/vdpa/mlx5/core/mr.c        |  81 +++++++---
> > > > >    drivers/vdpa/mlx5/core/resources.c |  31 ++++
> > > > >    drivers/vdpa/mlx5/net/mlx5_vnet.c  | 231 +++++++++++++++++++++++++++--
> > > > >    4 files changed, 334 insertions(+), 32 deletions(-)
> > > > >
> > > > > diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> > > > > index 41b20855ed31..6c43476a69cb 100644
> > > > > --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> > > > > +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> > > > > @@ -5,6 +5,7 @@
> > > > >    #define __MLX5_VDPA_H__
> > > > >    #include <linux/etherdevice.h>
> > > > > +#include <linux/vringh.h>
> > > > >    #include <linux/vdpa.h>
> > > > >    #include <linux/mlx5/driver.h>
> > > > > @@ -47,6 +48,26 @@ struct mlx5_vdpa_resources {
> > > > >         bool valid;
> > > > >    };
> > > > > +struct mlx5_control_vq {
> > > > > +       struct vhost_iotlb *iotlb;
> > > > > +       /* spinlock to synchronize iommu table */
> > > > > +       spinlock_t iommu_lock;
> > > > > +       struct vringh vring;
> > > > > +       bool ready;
> > > > > +       u64 desc_addr;
> > > > > +       u64 device_addr;
> > > > > +       u64 driver_addr;
> > > > > +       struct vdpa_callback event_cb;
> > > > > +       struct vringh_kiov riov;
> > > > > +       struct vringh_kiov wiov;
> > > > > +       unsigned short head;
> > > > > +};
> > > > > +
> > > > > +struct mlx5_ctrl_wq_ent {
> > > > > +       struct work_struct work;
> > > > > +       struct mlx5_vdpa_dev *mvdev;
> > > > > +};
> > > > > +
> > > > >    struct mlx5_vdpa_dev {
> > > > >         struct vdpa_device vdev;
> > > > >         struct mlx5_core_dev *mdev;
> > > > > @@ -60,6 +81,8 @@ struct mlx5_vdpa_dev {
> > > > >         u32 generation;
> > > > >         struct mlx5_vdpa_mr mr;
> > > > > +       struct mlx5_control_vq cvq;
> > > > > +       struct workqueue_struct *wq;
> > > > >    };
> > > > >    int mlx5_vdpa_alloc_pd(struct mlx5_vdpa_dev *dev, u32 *pdn, u16 uid);
> > > > > diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c
> > > > > index e59135fa867e..da013b8082bc 100644
> > > > > --- a/drivers/vdpa/mlx5/core/mr.c
> > > > > +++ b/drivers/vdpa/mlx5/core/mr.c
> > > > > @@ -1,6 +1,7 @@
> > > > >    // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
> > > > >    /* Copyright (c) 2020 Mellanox Technologies Ltd. */
> > > > > +#include <linux/vhost_types.h>
> > > > >    #include <linux/vdpa.h>
> > > > >    #include <linux/gcd.h>
> > > > >    #include <linux/string.h>
> > > > > @@ -451,33 +452,30 @@ static void destroy_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
> > > > >         mlx5_vdpa_destroy_mkey(mvdev, &mr->mkey);
> > > > >    }
> > > > > -static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
> > > > > +static int dup_iotlb(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *src)
> > > > >    {
> > > > > -       struct mlx5_vdpa_mr *mr = &mvdev->mr;
> > > > > +       struct vhost_iotlb_map *map;
> > > > > +       u64 start = 0ULL, last = 0ULL - 1;
> > > > >         int err;
> > > > > -       if (mr->initialized)
> > > > > -               return 0;
> > > > > -
> > > > > -       if (iotlb)
> > > > > -               err = create_user_mr(mvdev, iotlb);
> > > > > -       else
> > > > > -               err = create_dma_mr(mvdev, mr);
> > > > > -
> > > > > -       if (!err)
> > > > > -               mr->initialized = true;
> > > > > +       if (!src) {
> > > > > +               err = vhost_iotlb_add_range(mvdev->cvq.iotlb, start, last, start, VHOST_ACCESS_RW);
> > > > > +               return err;
> > > > > +       }
> > > > > -       return err;
> > > > > +       for (map = vhost_iotlb_itree_first(src, start, last); map;
> > > > > +               map = vhost_iotlb_itree_next(map, start, last)) {
> > > > > +               err = vhost_iotlb_add_range(mvdev->cvq.iotlb, map->start, map->last,
> > > > > +                                           map->addr, map->perm);
> > > > > +               if (err)
> > > > > +                       return err;
> > > > > +       }
> > > > > +       return 0;
> > > > >    }
> > > > > -int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
> > > > > +static void prune_iotlb(struct mlx5_vdpa_dev *mvdev)
> > > > >    {
> > > > > -       int err;
> > > > > -
> > > > > -       mutex_lock(&mvdev->mr.mkey_mtx);
> > > > > -       err = _mlx5_vdpa_create_mr(mvdev, iotlb);
> > > > > -       mutex_unlock(&mvdev->mr.mkey_mtx);
> > > > > -       return err;
> > > > > +       vhost_iotlb_del_range(mvdev->cvq.iotlb, 0ULL, 0ULL - 1);
> > > >
> > > > It's better to use ULLONG_MAX.
> > > Will change.
> > >
> > > >
> > > > >    }
> > > > >    static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
> > > > > @@ -501,6 +499,7 @@ void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
> > > > >         if (!mr->initialized)
> > > > >                 goto out;
> > > > > +       prune_iotlb(mvdev);
> > > > >         if (mr->user_mr)
> > > > >                 destroy_user_mr(mvdev, mr);
> > > > >         else
> > > > > @@ -512,6 +511,48 @@ void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
> > > > >         mutex_unlock(&mr->mkey_mtx);
> > > > >    }
> > > > > +static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
> > > > > +{
> > > > > +       struct mlx5_vdpa_mr *mr = &mvdev->mr;
> > > > > +       int err;
> > > > > +
> > > > > +       if (mr->initialized)
> > > > > +               return 0;
> > > > > +
> > > > > +       if (iotlb)
> > > > > +               err = create_user_mr(mvdev, iotlb);
> > > > > +       else
> > > > > +               err = create_dma_mr(mvdev, mr);
> > > > > +
> > > > > +       if (err)
> > > > > +               return err;
> > > > > +
> > > > > +       err = dup_iotlb(mvdev, iotlb);
> > > > > +       if (err)
> > > > > +               goto out_err;
> > > > > +
> > > > > +       mr->initialized = true;
> > > > > +       return 0;
> > > > > +
> > > > > +out_err:
> > > > > +       if (iotlb)
> > > > > +               destroy_user_mr(mvdev, mr);
> > > > > +       else
> > > > > +               destroy_dma_mr(mvdev, mr);
> > > > > +
> > > > > +       return err;
> > > > > +}
> > > > > +
> > > > > +int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
> > > > > +{
> > > > > +       int err;
> > > > > +
> > > > > +       mutex_lock(&mvdev->mr.mkey_mtx);
> > > > > +       err = _mlx5_vdpa_create_mr(mvdev, iotlb);
> > > > > +       mutex_unlock(&mvdev->mr.mkey_mtx);
> > > > > +       return err;
> > > > > +}
> > > > > +
> > > > >    int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
> > > > >                              bool *change_map)
> > > > >    {
> > > > > diff --git a/drivers/vdpa/mlx5/core/resources.c b/drivers/vdpa/mlx5/core/resources.c
> > > > > index d4606213f88a..d24ae1a85159 100644
> > > > > --- a/drivers/vdpa/mlx5/core/resources.c
> > > > > +++ b/drivers/vdpa/mlx5/core/resources.c
> > > > > @@ -1,6 +1,7 @@
> > > > >    // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
> > > > >    /* Copyright (c) 2020 Mellanox Technologies Ltd. */
> > > > > +#include <linux/iova.h>
> > > > >    #include <linux/mlx5/driver.h>
> > > > >    #include "mlx5_vdpa.h"
> > > > > @@ -221,6 +222,28 @@ int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *m
> > > > >         return mlx5_cmd_exec_in(mvdev->mdev, destroy_mkey, in);
> > > > >    }
> > > > > +static int init_ctrl_vq(struct mlx5_vdpa_dev *mvdev)
> > > > > +{
> > > > > +       int err;
> > > > > +
> > > > > +       mvdev->cvq.iotlb = vhost_iotlb_alloc(0, 0);
> > > > > +       if (!mvdev->cvq.iotlb)
> > > > > +               return -ENOMEM;
> > > > > +
> > > > > +       vringh_set_iotlb(&mvdev->cvq.vring, mvdev->cvq.iotlb, &mvdev->cvq.iommu_lock);
> > > > > +       err = iova_cache_get();
> > > >
> > > > Any reason for using iova cache here?
> > > Isn't it required? Aren't we allocating buffers for the CVQ from
> > > iommu_iova kmem cache?
> >
> >
> > I may miss something here but which buffer did you refer here?
> >
>
> Aren't the data buffers for the control VQ allocated from this cache?
>
> >
> > >
> > > >
> > > > > +       if (err)
> > > > > +               vhost_iotlb_free(mvdev->cvq.iotlb);
> > > > > +
> > > > > +       return err;
> > > > > +}
> > > > > +
> > > > > +static void cleanup_ctrl_vq(struct mlx5_vdpa_dev *mvdev)
> > > > > +{
> > > > > +       iova_cache_put();
> > > > > +       vhost_iotlb_free(mvdev->cvq.iotlb);
> > > > > +}
> > > > > +
> > > > >    int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev)
> > > > >    {
> > > > >         u64 offset = MLX5_CAP64_DEV_VDPA_EMULATION(mvdev->mdev, doorbell_bar_offset);
> > > > > @@ -260,10 +283,17 @@ int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev)
> > > > >                 err = -ENOMEM;
> > > > >                 goto err_key;
> > > > >         }
> > > > > +
> > > > > +       err = init_ctrl_vq(mvdev);
> > > > > +       if (err)
> > > > > +               goto err_ctrl;
> > > > > +
> > > > >         res->valid = true;
> > > > >         return 0;
> > > > > +err_ctrl:
> > > > > +       iounmap(res->kick_addr);
> > > > >    err_key:
> > > > >         dealloc_pd(mvdev, res->pdn, res->uid);
> > > > >    err_pd:
> > > > > @@ -282,6 +312,7 @@ void mlx5_vdpa_free_resources(struct mlx5_vdpa_dev *mvdev)
> > > > >         if (!res->valid)
> > > > >                 return;
> > > > > +       cleanup_ctrl_vq(mvdev);
> > > > >         iounmap(res->kick_addr);
> > > > >         res->kick_addr = NULL;
> > > > >         dealloc_pd(mvdev, res->pdn, res->uid);
> > > > > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > index 0fe7cd370e4b..e18665781135 100644
> > > > > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > @@ -133,7 +133,7 @@ struct mlx5_vdpa_virtqueue {
> > > > >    /* We will remove this limitation once mlx5_vdpa_alloc_resources()
> > > > >     * provides for driver space allocation
> > > > >     */
> > > > > -#define MLX5_MAX_SUPPORTED_VQS 16
> > > > > +#define MLX5_MAX_SUPPORTED_VQS 2
> > > > >    static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
> > > > >    {
> > > > > @@ -160,6 +160,7 @@ struct mlx5_vdpa_net {
> > > > >         struct mlx5_flow_handle *rx_rule;
> > > > >         bool setup;
> > > > >         u16 mtu;
> > > > > +       u32 cur_num_vqs;
> > > > >    };
> > > > >    static void free_resources(struct mlx5_vdpa_net *ndev);
> > > > > @@ -169,6 +170,8 @@ static void teardown_driver(struct mlx5_vdpa_net *ndev);
> > > > >    static bool mlx5_vdpa_debug;
> > > > > +#define MLX5_CVQ_MAX_ENT 16
> > > > > +
> > > > >    #define MLX5_LOG_VIO_FLAG(_feature)                                                                \
> > > > >         do {                                                                                       \
> > > > >                 if (features & BIT_ULL(_feature))                                                  \
> > > > > @@ -186,6 +189,16 @@ static inline u32 mlx5_vdpa_max_qps(int max_vqs)
> > > > >         return max_vqs / 2;
> > > > >    }
> > > > > +static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev)
> > > > > +{
> > > > > +       return 2 * mlx5_vdpa_max_qps(mvdev->max_vqs);
> > > > > +}
> > > > > +
> > > > > +static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx)
> > > > > +{
> > > > > +       return idx == ctrl_vq_idx(mvdev);
> > > > > +}
> > > > > +
> > > > >    static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
> > > > >    {
> > > > >         if (status & ~VALID_STATUS_MASK)
> > > > > @@ -1359,15 +1372,132 @@ static void remove_fwd_to_tir(struct mlx5_vdpa_net *ndev)
> > > > >         ndev->rx_rule = NULL;
> > > > >    }
> > > > > +virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
> > > > > +{
> > > > > +       struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > > +       struct mlx5_control_vq *cvq = &mvdev->cvq;
> > > > > +       virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
> > > > > +       struct mlx5_core_dev *pfmdev;
> > > > > +       size_t read;
> > > > > +       u8 mac[ETH_ALEN];
> > > > > +
> > > > > +       pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
> > > > > +       switch (cmd) {
> > > > > +       case VIRTIO_NET_CTRL_MAC_ADDR_SET:
> > > > > +               read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN);
> > > > > +               if (read != ETH_ALEN)
> > > > > +                       break;
> > > > > +
> > > > > +               if (!memcmp(ndev->config.mac, mac, 6)) {
> > > > > +                       status = VIRTIO_NET_OK;
> > > > > +                       break;
> > > > > +               }
> > > > > +
> > > > > +               if (!is_zero_ether_addr(ndev->config.mac)) {
> > > > > +                       if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
> > > > > +                               mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n",
> > > > > +                                              ndev->config.mac);
> > > > > +                               break;
> > > > > +                       }
> > > > > +               }
> > > > > +
> > > > > +               if (mlx5_mpfs_add_mac(pfmdev, mac)) {
> > > > > +                       mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n",
> > > > > +                                      mac);
> > > > > +                       break;
> > > > > +               }
> > > > > +
> > > > > +               memcpy(ndev->config.mac, mac, ETH_ALEN);
> > > > > +               status = VIRTIO_NET_OK;
> > > > > +               break;
> > > > > +
> > > > > +       default:
> > > > > +               break;
> > > > > +       }
> > > > > +
> > > > > +       return status;
> > > > > +}
> > > > > +
> > > > > +static void mlx5_cvq_kick_handler(struct work_struct *work)
> > > > > +{
> > > > > +       virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
> > > > > +       struct virtio_net_ctrl_hdr ctrl;
> > > > > +       struct mlx5_ctrl_wq_ent *wqent;
> > > > > +       struct mlx5_vdpa_dev *mvdev;
> > > > > +       struct mlx5_control_vq *cvq;
> > > > > +       struct mlx5_vdpa_net *ndev;
> > > > > +       size_t read, write;
> > > > > +       int err;
> > > > > +
> > > > > +       wqent = container_of(work, struct mlx5_ctrl_wq_ent, work);
> > > > > +       mvdev = wqent->mvdev;
> > > > > +       ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > > +       cvq = &mvdev->cvq;
> > > > > +       if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
> > > > > +               goto out;
> > > > > +
> > > > > +       if (!cvq->ready)
> > > > > +               goto out;
> > > > > +
> > > > > +       while (true) {
> > > > > +               err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head,
> > > > > +                                          GFP_ATOMIC);
> > > > > +               if (err <= 0)
> > > > > +                       break;
> > > > > +
> > > > > +               read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl));
> > > > > +               if (read != sizeof(ctrl))
> > > > > +                       break;
> > > > > +
> > > > > +               switch (ctrl.class) {
> > > > > +               case VIRTIO_NET_CTRL_MAC:
> > > > > +                       status = handle_ctrl_mac(mvdev, ctrl.cmd);
> > > > > +                       break;
> > > > > +
> > > > > +               default:
> > > > > +                       break;
> > > > > +               }
> > > > > +
> > > > > +               /* Make sure data is written before advancing index */
> > > > > +               smp_wmb();
> > > > > +
> > > > > +               write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status));
> > > > > +               vringh_complete_iotlb(&cvq->vring, cvq->head, write);
> > > > > +               vringh_kiov_cleanup(&cvq->riov);
> > > > > +               vringh_kiov_cleanup(&cvq->wiov);
> > > > > +
> > > > > +               if (vringh_need_notify_iotlb(&cvq->vring))
> > > > > +                       vringh_notify(&cvq->vring);
> > > > > +       }
> > > > > +out:
> > > > > +       kfree(wqent);
> > > > > +}
> > > > > +
> > > > >    static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
> > > > >    {
> > > > >         struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > > >         struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > > -       struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
> > > > > +       struct mlx5_vdpa_virtqueue *mvq;
> > > > > +       struct mlx5_ctrl_wq_ent *wqent;
> > > > >         if (!is_index_valid(mvdev, idx))
> > > > >                 return;
> > > > > +       if (unlikely(is_ctrl_vq_idx(mvdev, idx))) {
> > > > > +               if (!mvdev->cvq.ready)
> > > > > +                       return;
> > > > > +
> > > > > +               wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
> > > > > +               if (!wqent)
> > > > > +                       return;
> > > > > +
> > > > > +               wqent->mvdev = mvdev;
> > > > > +               INIT_WORK(&wqent->work, mlx5_cvq_kick_handler);
> > > > > +               queue_work(mvdev->wq, &wqent->work);
> > > > > +               return;
> > > > > +       }
> > > > > +
> > > > > +       mvq = &ndev->vqs[idx];
> > > > >         if (unlikely(!mvq->ready))
> > > > >                 return;
> > > > > @@ -1379,11 +1509,19 @@ static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_
> > > > >    {
> > > > >         struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > > >         struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > > -       struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
> > > > > +       struct mlx5_vdpa_virtqueue *mvq;
> > > > >         if (!is_index_valid(mvdev, idx))
> > > > >                 return -EINVAL;
> > > > > +       if (is_ctrl_vq_idx(mvdev, idx)) {
> > > > > +               mvdev->cvq.desc_addr = desc_area;
> > > > > +               mvdev->cvq.device_addr = device_area;
> > > > > +               mvdev->cvq.driver_addr = driver_area;
> > > > > +               return 0;
> > > > > +       }
> > > > > +
> > > > > +       mvq = &ndev->vqs[idx];
> > > > >         mvq->desc_addr = desc_area;
> > > > >         mvq->device_addr = device_area;
> > > > >         mvq->driver_addr = driver_area;
> > > > > @@ -1396,7 +1534,7 @@ static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
> > > > >         struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > >         struct mlx5_vdpa_virtqueue *mvq;
> > > > > -       if (!is_index_valid(mvdev, idx))
> > > > > +       if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
> > > > >                 return;
> > > > >         mvq = &ndev->vqs[idx];
> > > > > @@ -1411,15 +1549,42 @@ static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_c
> > > > >         ndev->event_cbs[idx] = *cb;
> > > > >    }
> > > > > +static void mlx5_cvq_notify(struct vringh *vring)
> > > > > +{
> > > > > +       struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring);
> > > > > +
> > > > > +       if (!cvq->event_cb.callback)
> > > > > +               return;
> > > > > +
> > > > > +       cvq->event_cb.callback(cvq->event_cb.private);
> > > > > +}
> > > > > +
> > > > > +static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready)
> > > > > +{
> > > > > +       struct mlx5_control_vq *cvq = &mvdev->cvq;
> > > > > +
> > > > > +       cvq->ready = ready;
> > > > > +       if (!ready)
> > > > > +               return;
> > > > > +
> > > > > +       cvq->vring.notify = mlx5_cvq_notify;
> > > > > +}
> > > > > +
> > > > >    static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
> > > > >    {
> > > > >         struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > > >         struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > > -       struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
> > > > > +       struct mlx5_vdpa_virtqueue *mvq;
> > > > >         if (!is_index_valid(mvdev, idx))
> > > > >                 return;
> > > > > +       if (is_ctrl_vq_idx(mvdev, idx)) {
> > > > > +               set_cvq_ready(mvdev, ready);
> > > > > +               return;
> > > > > +       }
> > > > > +
> > > > > +       mvq = &ndev->vqs[idx];
> > > > >         if (!ready)
> > > > >                 suspend_vq(ndev, mvq);
> > > > > @@ -1430,12 +1595,14 @@ static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx)
> > > > >    {
> > > > >         struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > > >         struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > > -       struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
> > > > >         if (!is_index_valid(mvdev, idx))
> > > > >                 return false;
> > > > > -       return mvq->ready;
> > > > > +       if (is_ctrl_vq_idx(mvdev, idx))
> > > > > +               return mvdev->cvq.ready;
> > > > > +
> > > > > +       return ndev->vqs[idx].ready;
> > > > >    }
> > > > >    static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
> > > > > @@ -1443,11 +1610,17 @@ static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
> > > > >    {
> > > > >         struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > > >         struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > > -       struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
> > > > > +       struct mlx5_vdpa_virtqueue *mvq;
> > > > >         if (!is_index_valid(mvdev, idx))
> > > > >                 return -EINVAL;
> > > > > +       if (is_ctrl_vq_idx(mvdev, idx)) {
> > > > > +               mvdev->cvq.vring.last_avail_idx = state->split.avail_index;
> > > >
> > > > Question, is packed virtqueue supported by current mlx5e?
> > > >
> > > > If no, this is fine.
> > > We don't. The hardware might support but the device driver does not
> > > advertise packed virtqueue support.
> >
> >
> > Good to know this. So we're fine.
> >
> > Thanks
> >
> >
> > >
> > > > If yes, we should disable packed and re-enable it after vringh supports
> > > > packed virtqueue.
> > > >
> > > > Other looks good.
> > > >
> > > > Thanks
> > > >
> > > >
> > > > > +               return 0;
> > > > > +       }
> > > > > +
> > > > > +       mvq = &ndev->vqs[idx];
> > > > >         if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
> > > > >                 mlx5_vdpa_warn(mvdev, "can't modify available index\n");
> > > > >                 return -EINVAL;
> > > > > @@ -1462,13 +1635,19 @@ static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa
> > > > >    {
> > > > >         struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > > >         struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > > -       struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
> > > > > +       struct mlx5_vdpa_virtqueue *mvq;
> > > > >         struct mlx5_virtq_attr attr;
> > > > >         int err;
> > > > >         if (!is_index_valid(mvdev, idx))
> > > > >                 return -EINVAL;
> > > > > +       if (is_ctrl_vq_idx(mvdev, idx)) {
> > > > > +               state->split.avail_index = mvdev->cvq.vring.last_avail_idx;
> > > > > +               return 0;
> > > > > +       }
> > > > > +
> > > > > +       mvq = &ndev->vqs[idx];
> > > > >         /* If the virtq object was destroyed, use the value saved at
> > > > >          * the last minute of suspend_vq. This caters for userspace
> > > > >          * that cares about emulating the index after vq is stopped.
> > > > > @@ -1525,10 +1704,13 @@ static u64 mlx5_vdpa_get_features(struct vdpa_device *vdev)
> > > > >         u16 dev_features;
> > > > >         dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, device_features_bits_mask);
> > > > > -       ndev->mvdev.mlx_features = mlx_to_vritio_features(dev_features);
> > > > > +       ndev->mvdev.mlx_features |= mlx_to_vritio_features(dev_features);
> > > > >         if (MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, virtio_version_1_0))
> > > > >                 ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_VERSION_1);
> > > > >         ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
> > > > > +       ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
> > > > > +       ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
> > > > > +
> > > > >         print_features(mvdev, ndev->mvdev.mlx_features, false);
> > > > >         return ndev->mvdev.mlx_features;
> > > > >    }
> > > > > @@ -1544,6 +1726,7 @@ static int verify_min_features(struct mlx5_vdpa_dev *mvdev, u64 features)
> > > > >    static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
> > > > >    {
> > > > >         struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > > +       struct mlx5_control_vq *cvq = &mvdev->cvq;
> > > > >         int err;
> > > > >         int i;
> > > > > @@ -1553,6 +1736,16 @@ static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
> > > > >                         goto err_vq;
> > > > >         }
> > > > > +       if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) {
> > > > > +               err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
> > > > > +                                       MLX5_CVQ_MAX_ENT, false,
> > > > > +                                       (struct vring_desc *)(uintptr_t)cvq->desc_addr,
> > > > > +                                       (struct vring_avail *)(uintptr_t)cvq->driver_addr,
> > > > > +                                       (struct vring_used *)(uintptr_t)cvq->device_addr);
> > > > > +               if (err)
> > > > > +                       goto err_vq;
> > > > > +       }
> > > > > +
> > > > >         return 0;
> > > > >    err_vq:
> > > > > @@ -1937,7 +2130,7 @@ static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device
> > > > >         struct mlx5_vdpa_net *ndev;
> > > > >         phys_addr_t addr;
> > > > > -       if (!is_index_valid(mvdev, idx))
> > > > > +       if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
> > > > >                 return ret;
> > > > >         /* If SF BAR size is smaller than PAGE_SIZE, do not use direct
> > > > > @@ -2114,8 +2307,11 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
> > > > >                 err = mlx5_mpfs_add_mac(pfmdev, config->mac);
> > > > >                 if (err)
> > > > >                         goto err_mtu;
> > > > > +
> > > > > +               ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MAC);
> > > > >         }
> > > > > +       config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, mlx5_vdpa_max_qps(max_vqs));
> > > > >         mvdev->vdev.dma_dev = &mdev->pdev->dev;
> > > > >         err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
> > > > >         if (err)
> > > > > @@ -2131,8 +2327,15 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
> > > > >         if (err)
> > > > >                 goto err_mr;
> > > > > +       mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_ctrl_wq");
> > > > > +       if (!mvdev->wq) {
> > > > > +               err = -ENOMEM;
> > > > > +               goto err_res2;
> > > > > +       }
> > > > > +
> > > > > +       ndev->cur_num_vqs = 2 * mlx5_vdpa_max_qps(max_vqs);
> > > > >         mvdev->vdev.mdev = &mgtdev->mgtdev;
> > > > > -       err = _vdpa_register_device(&mvdev->vdev, 2 * mlx5_vdpa_max_qps(max_vqs));
> > > > > +       err = _vdpa_register_device(&mvdev->vdev, ndev->cur_num_vqs + 1);
> > > > >         if (err)
> > > > >                 goto err_reg;
> > > > > @@ -2140,6 +2343,8 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
> > > > >         return 0;
> > > > >    err_reg:
> > > > > +       destroy_workqueue(mvdev->wq);
> > > > > +err_res2:
> > > > >         free_resources(ndev);
> > > > >    err_mr:
> > > > >         mlx5_vdpa_destroy_mr(mvdev);
> > > > > @@ -2157,7 +2362,9 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
> > > > >    static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev)
> > > > >    {
> > > > >         struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
> > > > > +       struct mlx5_vdpa_dev *mvdev = to_mvdev(dev);
> > > > > +       destroy_workqueue(mvdev->wq);
> > > > >         _vdpa_unregister_device(dev);
> > > > >         mgtdev->ndev = NULL;
> > > > >    }
> >
>

_______________________________________________
Virtualization mailing list
Virtualization@xxxxxxxxxxxxxxxxxxxxxxxxxx
https://lists.linuxfoundation.org/mailman/listinfo/virtualization