Re: [PATCH v2 01/13] vdpa net: move iova tree creation from init to start

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 





On 2/13/2023 3:14 AM, Eugenio Perez Martin wrote:
On Mon, Feb 13, 2023 at 7:51 AM Si-Wei Liu <si-wei.liu@xxxxxxxxxx> wrote:


On 2/8/2023 1:42 AM, Eugenio Pérez wrote:
Only create iova_tree if and when it is needed.

The cleanup keeps being responsible of last VQ but this change allows it
to merge both cleanup functions.

Signed-off-by: Eugenio Pérez <eperezma@xxxxxxxxxx>
Acked-by: Jason Wang <jasowang@xxxxxxxxxx>
---
   net/vhost-vdpa.c | 99 ++++++++++++++++++++++++++++++++++--------------
   1 file changed, 71 insertions(+), 28 deletions(-)

diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index de5ed8ff22..a9e6c8f28e 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -178,13 +178,9 @@ err_init:
   static void vhost_vdpa_cleanup(NetClientState *nc)
   {
       VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
-    struct vhost_dev *dev = &s->vhost_net->dev;

       qemu_vfree(s->cvq_cmd_out_buffer);
       qemu_vfree(s->status);
-    if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
-        g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete);
-    }
       if (s->vhost_net) {
           vhost_net_cleanup(s->vhost_net);
           g_free(s->vhost_net);
@@ -234,10 +230,64 @@ static ssize_t vhost_vdpa_receive(NetClientState *nc, const uint8_t *buf,
       return size;
   }

+/** From any vdpa net client, get the netclient of first queue pair */
+static VhostVDPAState *vhost_vdpa_net_first_nc_vdpa(VhostVDPAState *s)
+{
+    NICState *nic = qemu_get_nic(s->nc.peer);
+    NetClientState *nc0 = qemu_get_peer(nic->ncs, 0);
+
+    return DO_UPCAST(VhostVDPAState, nc, nc0);
+}
+
+static void vhost_vdpa_net_data_start_first(VhostVDPAState *s)
+{
+    struct vhost_vdpa *v = &s->vhost_vdpa;
+
+    if (v->shadow_vqs_enabled) {
+        v->iova_tree = vhost_iova_tree_new(v->iova_range.first,
+                                           v->iova_range.last);
+    }
+}
+
+static int vhost_vdpa_net_data_start(NetClientState *nc)
+{
+    VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
+    struct vhost_vdpa *v = &s->vhost_vdpa;
+
+    assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
+
+    if (v->index == 0) {
+        vhost_vdpa_net_data_start_first(s);
+        return 0;
+    }
+
+    if (v->shadow_vqs_enabled) {
+        VhostVDPAState *s0 = vhost_vdpa_net_first_nc_vdpa(s);
+        v->iova_tree = s0->vhost_vdpa.iova_tree;
+    }
+
+    return 0;
+}
+
+static void vhost_vdpa_net_client_stop(NetClientState *nc)
+{
+    VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
+    struct vhost_dev *dev;
+
+    assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
+
+    dev = s->vhost_vdpa.dev;
+    if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
+        g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete);
+    }
+}
+
   static NetClientInfo net_vhost_vdpa_info = {
           .type = NET_CLIENT_DRIVER_VHOST_VDPA,
           .size = sizeof(VhostVDPAState),
           .receive = vhost_vdpa_receive,
+        .start = vhost_vdpa_net_data_start,
+        .stop = vhost_vdpa_net_client_stop,
           .cleanup = vhost_vdpa_cleanup,
           .has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
           .has_ufo = vhost_vdpa_has_ufo,
@@ -351,7 +401,7 @@ dma_map_err:

   static int vhost_vdpa_net_cvq_start(NetClientState *nc)
   {
-    VhostVDPAState *s;
+    VhostVDPAState *s, *s0;
       struct vhost_vdpa *v;
       uint64_t backend_features;
       int64_t cvq_group;
@@ -425,6 +475,15 @@ out:
           return 0;
       }

+    s0 = vhost_vdpa_net_first_nc_vdpa(s);
+    if (s0->vhost_vdpa.iova_tree) {
+        /* SVQ is already configured for all virtqueues */
+        v->iova_tree = s0->vhost_vdpa.iova_tree;
+    } else {
+        v->iova_tree = vhost_iova_tree_new(v->iova_range.first,
+                                           v->iova_range.last);
I wonder how this case could happen, vhost_vdpa_net_data_start_first()
should've allocated an iova tree on the first data vq. Is zero data vq
ever possible on net vhost-vdpa?

It's the case of the current qemu master when only CVQ is being
shadowed. It's not that "there are no data vq": If that case were
possible, CVQ vhost-vdpa state would be s0.

The case is that since only CVQ vhost-vdpa is the one being migrated,
only CVQ has an iova tree.
OK, so this corresponds to the case where live migration is not started and CVQ starts in its own address space of VHOST_VDPA_NET_CVQ_ASID. Thanks for explaining it!


With this series applied and with no migration running, the case is
the same as before: only SVQ gets shadowed. When migration starts, all
vqs are migrated, and share iova tree.
I wonder what is the reason to share the iova tree when migration starts, I think CVQ may stay on its own VHOST_VDPA_NET_CVQ_ASID still?

Actually there's discrepancy in vhost_vdpa_net_log_global_enable(), I don't see explicit code to switch from VHOST_VDPA_NET_CVQ_ASID to VHOST_VDPA_GUEST_PA_ASID for the CVQ. This is the address space I collision I mentioned earlier:

9585@1676093788.259201:vhost_vdpa_dma_map vdpa:0x7ff13088a190 fd: 16 msg_type: 2 asid: 0 iova: 0x1000 size: 0x2000 uaddr: 0x55a5a7ff3000 perm: 0x1 type: 2 9585@1676093788.279923:vhost_vdpa_dma_map vdpa:0x7ff13088a190 fd: 16 msg_type: 2 asid: 0 iova: 0x3000 size: 0x1000 uaddr: 0x55a5a7ff6000 perm: 0x3 type: 2 9585@1676093788.290529:vhost_vdpa_set_vring_addr dev: 0x55a5a77cec20 index: 0 flags: 0x0 desc_user_addr: 0x1000 used_user_addr: 0x3000 avail_user_addr: 0x2000 log_guest_addr: 0x0
:
:
9585@1676093788.543567:vhost_vdpa_dma_map vdpa:0x7ff1302b6190 fd: 16 msg_type: 2 asid: 0 iova: 0x16000 size: 0x2000 uaddr: 0x55a5a7959000 perm: 0x1 type: 2 9585@1676093788.576923:vhost_vdpa_dma_map vdpa:0x7ff1302b6190 fd: 16 msg_type: 2 asid: 0 iova: 0x18000 size: 0x1000 uaddr: 0x55a5a795c000 perm: 0x3 type: 2 9585@1676093788.593881:vhost_vdpa_set_vring_addr dev: 0x55a5a7580930 index: 7 flags: 0x0 desc_user_addr: 0x16000 used_user_addr: 0x18000 avail_user_addr: 0x17000 log_guest_addr: 0x0 9585@1676093788.593904:vhost_vdpa_dma_map vdpa:0x7ff13026d190 fd: 16 msg_type: 2 asid: 1 iova: 0x19000 size: 0x1000 uaddr: 0x55a5a77f8000 perm: 0x1 type: 2 9585@1676093788.606448:vhost_vdpa_dma_map vdpa:0x7ff13026d190 fd: 16 msg_type: 2 asid: 1 iova: 0x1a000 size: 0x1000 uaddr: 0x55a5a77fa000 perm: 0x3 type: 2 9585@1676093788.616253:vhost_vdpa_dma_map vdpa:0x7ff13026d190 fd: 16 msg_type: 2 asid: 1 iova: 0x1b000 size: 0x1000 uaddr: 0x55a5a795f000 perm: 0x1 type: 2 9585@1676093788.625956:vhost_vdpa_dma_map vdpa:0x7ff13026d190 fd: 16 msg_type: 2 asid: 1 iova: 0x1c000 size: 0x1000 uaddr: 0x55a5a7f4e000 perm: 0x3 type: 2 9585@1676093788.635655:vhost_vdpa_set_vring_addr dev: 0x55a5a7580ec0 index: 8 flags: 0x0 desc_user_addr: 0x1b000 used_user_addr: 0x1c000 avail_user_addr: 0x1b400 log_guest_addr: 0x0 9585@1676093788.635667:vhost_vdpa_listener_region_add vdpa: 0x7ff13026d190 iova 0x0 llend 0xa0000 vaddr: 0x7fef1fe00000 read-only: 0 9585@1676093788.635670:vhost_vdpa_listener_begin_batch vdpa:0x7ff13026d190 fd: 16 msg_type: 2 type: 5 9585@1676093788.635677:vhost_vdpa_dma_map vdpa:0x7ff13026d190 fd: 16 msg_type: 2 asid: 0 iova: 0x0 size: 0xa0000 uaddr: 0x7fef1fe00000 perm: 0x3 type: 2 2023-02-11T05:36:28.635686Z qemu-system-x86_64: failed to write, fd=16, errno=14 (Bad address)
2023-02-11T05:36:28.635721Z qemu-system-x86_64: vhost vdpa map fail!
2023-02-11T05:36:28.635744Z qemu-system-x86_64: vhost-vdpa: DMA mapping failed, unable to continue


Regards,
-Siwei

Thanks!

Thanks,
-Siwei
+    }
+
       r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer,
                                  vhost_vdpa_net_cvq_cmd_page_len(), false);
       if (unlikely(r < 0)) {
@@ -449,15 +508,9 @@ static void vhost_vdpa_net_cvq_stop(NetClientState *nc)
       if (s->vhost_vdpa.shadow_vqs_enabled) {
           vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
           vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->status);
-        if (!s->always_svq) {
-            /*
-             * If only the CVQ is shadowed we can delete this safely.
-             * If all the VQs are shadows this will be needed by the time the
-             * device is started again to register SVQ vrings and similar.
-             */
-            g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete);
-        }
       }
+
+    vhost_vdpa_net_client_stop(nc);
   }

   static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s, size_t out_len,
@@ -667,8 +720,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
                                          int nvqs,
                                          bool is_datapath,
                                          bool svq,
-                                       struct vhost_vdpa_iova_range iova_range,
-                                       VhostIOVATree *iova_tree)
+                                       struct vhost_vdpa_iova_range iova_range)
   {
       NetClientState *nc = NULL;
       VhostVDPAState *s;
@@ -690,7 +742,6 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
       s->vhost_vdpa.shadow_vqs_enabled = svq;
       s->vhost_vdpa.iova_range = iova_range;
       s->vhost_vdpa.shadow_data = svq;
-    s->vhost_vdpa.iova_tree = iova_tree;
       if (!is_datapath) {
           s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(),
                                               vhost_vdpa_net_cvq_cmd_page_len());
@@ -760,7 +811,6 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
       uint64_t features;
       int vdpa_device_fd;
       g_autofree NetClientState **ncs = NULL;
-    g_autoptr(VhostIOVATree) iova_tree = NULL;
       struct vhost_vdpa_iova_range iova_range;
       NetClientState *nc;
       int queue_pairs, r, i = 0, has_cvq = 0;
@@ -812,12 +862,8 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
           goto err;
       }

-    if (opts->x_svq) {
-        if (!vhost_vdpa_net_valid_svq_features(features, errp)) {
-            goto err_svq;
-        }
-
-        iova_tree = vhost_iova_tree_new(iova_range.first, iova_range.last);
+    if (opts->x_svq && !vhost_vdpa_net_valid_svq_features(features, errp)) {
+        goto err;
       }

       ncs = g_malloc0(sizeof(*ncs) * queue_pairs);
@@ -825,7 +871,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
       for (i = 0; i < queue_pairs; i++) {
           ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
                                        vdpa_device_fd, i, 2, true, opts->x_svq,
-                                     iova_range, iova_tree);
+                                     iova_range);
           if (!ncs[i])
               goto err;
       }
@@ -833,13 +879,11 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
       if (has_cvq) {
           nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
                                    vdpa_device_fd, i, 1, false,
-                                 opts->x_svq, iova_range, iova_tree);
+                                 opts->x_svq, iova_range);
           if (!nc)
               goto err;
       }

-    /* iova_tree ownership belongs to last NetClientState */
-    g_steal_pointer(&iova_tree);
       return 0;

   err:
@@ -849,7 +893,6 @@ err:
           }
       }

-err_svq:
       qemu_close(vdpa_device_fd);

       return -1;

_______________________________________________
Virtualization mailing list
Virtualization@xxxxxxxxxxxxxxxxxxxxxxxxxx
https://lists.linuxfoundation.org/mailman/listinfo/virtualization




[Index of Archives]     [KVM Development]     [Libvirt Development]     [Libvirt Users]     [CentOS Virtualization]     [Netdev]     [Ethernet Bridging]     [Linux Wireless]     [Kernel Newbies]     [Security]     [Linux for Hams]     [Netfilter]     [Bugtraq]     [Yosemite Forum]     [MIPS Linux]     [ARM Linux]     [Linux RAID]     [Linux Admin]     [Samba]

  Powered by Linux