On Fri, Apr 21, 2023 at 2:54 PM Michael S. Tsirkin <mst@xxxxxxxxxx> wrote: > > On Wed, Apr 19, 2023 at 12:39:11PM +0800, Jason Wang wrote: > > On Mon, Apr 17, 2023 at 4:35 PM Eli Cohen <elic@xxxxxxxxxx> wrote: > > > > > > Add support for generation of interrupts from the device directly to the > > > VM to the VCPU thus avoiding the overhead on the host CPU. > > > > > > When supported, the driver will attempt to allocate vectors for each > > > data virtqueue. If a vector for a virtqueue cannot be provided it will > > > use the QP mode where notifications go through the driver. > > > > > > In addition, we add a shutdown callback to make sure allocated > > > interrupts are released in case of shutdown to allow clean shutdown. > > > > > > Issue: 2211199 > > > Change-Id: I7633ee56ca2861a8d0f4935bb29109c8e80f352c > > > Signed-off-by: Eli Cohen <elic@xxxxxxxxxx> > > > Signed-off-by: Saeed Mahameed <saeedm@xxxxxxxxxx> > > > --- > > > v1 -> v2: > > > 1. Remove parentheses around return statement > > > 2. Immediately return if vector entry found > > > 3. Use boolean instead of counter on used entry tracking > > > 4. Fix symmetry in teardown_vq() > > > > > > This patch is still under discussion but I wanted to address commnents > > > and have this is the basis for further discussion. > > > > > > drivers/vdpa/mlx5/net/mlx5_vnet.c | 140 ++++++++++++++++++++++++++++-- > > > drivers/vdpa/mlx5/net/mlx5_vnet.h | 14 +++ > > > 2 files changed, 145 insertions(+), 9 deletions(-) > > > > > > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c > > > index e29e32b306ad..2360c643148f 100644 > > > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c > > > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c > > > @@ -83,6 +83,7 @@ struct mlx5_vq_restore_info { > > > u64 driver_addr; > > > u16 avail_index; > > > u16 used_index; > > > + struct msi_map map; > > > bool ready; > > > bool restore; > > > }; > > > @@ -118,6 +119,7 @@ struct mlx5_vdpa_virtqueue { > > > u16 avail_idx; > > > u16 used_idx; > > > int fw_state; > > > + struct msi_map map; > > > > > > /* keep last in the struct */ > > > struct mlx5_vq_restore_info ri; > > > @@ -808,6 +810,13 @@ static bool counters_supported(const struct mlx5_vdpa_dev *mvdev) > > > BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); > > > } > > > > > > +static bool msix_mode_supported(struct mlx5_vdpa_dev *mvdev) > > > +{ > > > + return MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, event_mode) & > > > + (1 << MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE) && > > > + pci_msix_can_alloc_dyn(mvdev->mdev->pdev); > > > +} > > > + > > > static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) > > > { > > > int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in); > > > @@ -849,9 +858,15 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque > > > if (vq_is_tx(mvq->index)) > > > MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn); > > > > > > - MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE); > > > + if (mvq->map.virq) { > > > + MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE); > > > + MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->map.index); > > > + } else { > > > + MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE); > > > + MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn); > > > + } > > > + > > > MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index); > > > - MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn); > > > MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent); > > > MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, > > > !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1))); > > > @@ -1194,6 +1209,34 @@ static void counter_set_dealloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_vir > > > mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n", mvq->counter_set_id); > > > } > > > > > > +static void alloc_vector(struct mlx5_vdpa_net *ndev, > > > + struct mlx5_vdpa_virtqueue *mvq) > > > +{ > > > + struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp; > > > + int i; > > > + > > > + for (i = 0; i < irqp->num_ent; i++) { > > > + if (!irqp->entries[i].used) { > > > + irqp->entries[i].used = true; > > > + mvq->map = irqp->entries[i].map; > > > + return; > > > + } > > > + } > > > +} > > > + > > > +static void dealloc_vector(struct mlx5_vdpa_net *ndev, > > > + struct mlx5_vdpa_virtqueue *mvq) > > > +{ > > > + struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp; > > > + int i; > > > + > > > + for (i = 0; i < irqp->num_ent; i++) > > > + if (mvq->map.virq == irqp->entries[i].map.virq) { > > > + irqp->entries[i].used = false; > > > + return; > > > + } > > > +} > > > + > > > static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) > > > { > > > u16 idx = mvq->index; > > > @@ -1223,27 +1266,31 @@ static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) > > > > > > err = counter_set_alloc(ndev, mvq); > > > if (err) > > > - goto err_counter; > > > + goto err_connect; > > > > > > + alloc_vector(ndev, mvq); > > > err = create_virtqueue(ndev, mvq); > > > if (err) > > > - goto err_connect; > > > + goto err_vq; > > > > > > if (mvq->ready) { > > > err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY); > > > if (err) { > > > mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n", > > > idx, err); > > > - goto err_connect; > > > + goto err_modify; > > > } > > > } > > > > > > mvq->initialized = true; > > > return 0; > > > > > > -err_connect: > > > +err_modify: > > > + destroy_virtqueue(ndev, mvq); > > > +err_vq: > > > + dealloc_vector(ndev, mvq); > > > counter_set_dealloc(ndev, mvq); > > > -err_counter: > > > +err_connect: > > > qp_destroy(ndev, &mvq->vqqp); > > > err_vqqp: > > > qp_destroy(ndev, &mvq->fwqp); > > > @@ -1288,6 +1335,7 @@ static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue * > > > > > > suspend_vq(ndev, mvq); > > > destroy_virtqueue(ndev, mvq); > > > + dealloc_vector(ndev, mvq); > > > counter_set_dealloc(ndev, mvq); > > > qp_destroy(ndev, &mvq->vqqp); > > > qp_destroy(ndev, &mvq->fwqp); > > > @@ -2505,6 +2553,7 @@ static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqu > > > ri->desc_addr = mvq->desc_addr; > > > ri->device_addr = mvq->device_addr; > > > ri->driver_addr = mvq->driver_addr; > > > + ri->map = mvq->map; > > > ri->restore = true; > > > return 0; > > > } > > > @@ -2549,6 +2598,7 @@ static void restore_channels_info(struct mlx5_vdpa_net *ndev) > > > mvq->desc_addr = ri->desc_addr; > > > mvq->device_addr = ri->device_addr; > > > mvq->driver_addr = ri->driver_addr; > > > + mvq->map = ri->map; > > > } > > > } > > > > > > @@ -2833,6 +2883,21 @@ static struct device *mlx5_get_vq_dma_dev(struct vdpa_device *vdev, u16 idx) > > > return mvdev->vdev.dma_dev; > > > } > > > > > > +static void free_irqs(struct mlx5_vdpa_net *ndev) > > > +{ > > > + struct mlx5_vdpa_irq_pool_entry *ent; > > > + int i; > > > + > > > + if (!msix_mode_supported(&ndev->mvdev)) > > > + return; > > > + > > > + for (i = ndev->irqp.num_ent - 1; i >= 0; i--) { > > > + ent = ndev->irqp.entries + i; > > > + mlx5_msix_free(ndev->mvdev.mdev, ent->map); > > > + } > > > + kfree(ndev->irqp.entries); > > > +} > > > + > > > static void mlx5_vdpa_free(struct vdpa_device *vdev) > > > { > > > struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); > > > @@ -2848,6 +2913,7 @@ static void mlx5_vdpa_free(struct vdpa_device *vdev) > > > mlx5_mpfs_del_mac(pfmdev, ndev->config.mac); > > > } > > > mlx5_vdpa_free_resources(&ndev->mvdev); > > > + free_irqs(ndev); > > > kfree(ndev->event_cbs); > > > kfree(ndev->vqs); > > > } > > > @@ -2876,9 +2942,23 @@ static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device > > > return ret; > > > } > > > > > > -static int mlx5_get_vq_irq(struct vdpa_device *vdv, u16 idx) > > > +static int mlx5_get_vq_irq(struct vdpa_device *vdev, u16 idx) > > > { > > > - return -EOPNOTSUPP; > > > + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); > > > + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); > > > + struct mlx5_vdpa_virtqueue *mvq; > > > + > > > + if (!is_index_valid(mvdev, idx)) > > > + return -EINVAL; > > > + > > > + if (is_ctrl_vq_idx(mvdev, idx)) > > > + return -EOPNOTSUPP; > > > + > > > + mvq = &ndev->vqs[idx]; > > > + if (!mvq->map.virq) > > > + return -EOPNOTSUPP; > > > + > > > + return mvq->map.virq; > > > } > > > > > > static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev) > > > @@ -3155,6 +3235,35 @@ static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu) > > > return err; > > > } > > > > > > +static irqreturn_t int_handler(int irq, void *nh) > > > +{ > > > + return IRQ_HANDLED; > > > +} > > > > I still don't understand when this handler will be called. > > > > E.g IFCVF did: > > > > static irqreturn_t ifcvf_vq_intr_handler(int irq, void *arg) > > { > > struct vring_info *vring = arg; > > > > if (vring->cb.callback) > > return vring->cb.callback(vring->cb.private); > > > > return IRQ_HANDLED; > > } > > > > So when the irq bypass doesn't work it will fall back to a slow path. > > > > Maybe some comments here or changelog would be helpful. > > > > Thanks > > Jason I don't understand what you are asking about. > Are you asking why it's ok not to invoke the vring callback? Yes. Thanks > > -- > MST > _______________________________________________ Virtualization mailing list Virtualization@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linuxfoundation.org/mailman/listinfo/virtualization