On Mon, Aug 1, 2022 at 9:50 AM Eugenio Perez Martin <eperezma@xxxxxxxxxx> wrote: > > On Tue, Jul 26, 2022 at 5:04 AM Jason Wang <jasowang@xxxxxxxxxx> wrote: > > > > > > 在 2022/7/22 21:43, Eugenio Pérez 写道: > > > Isolate control virtqueue in its own group, allowing to intercept control > > > commands but letting dataplane run totally passthrough to the guest. > > > > > > Signed-off-by: Eugenio Pérez <eperezma@xxxxxxxxxx> > > > --- > > > hw/virtio/vhost-vdpa.c | 3 +- > > > net/vhost-vdpa.c | 158 +++++++++++++++++++++++++++++++++++++++-- > > > 2 files changed, 156 insertions(+), 5 deletions(-) > > > > > > diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c > > > index 79623badf2..fe1c85b086 100644 > > > --- a/hw/virtio/vhost-vdpa.c > > > +++ b/hw/virtio/vhost-vdpa.c > > > @@ -668,7 +668,8 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) > > > { > > > uint64_t features; > > > uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 | > > > - 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH; > > > + 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH | > > > + 0x1ULL << VHOST_BACKEND_F_IOTLB_ASID; > > > int r; > > > > > > if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) { > > > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c > > > index 6c1c64f9b1..f5075ef487 100644 > > > --- a/net/vhost-vdpa.c > > > +++ b/net/vhost-vdpa.c > > > @@ -37,6 +37,9 @@ typedef struct VhostVDPAState { > > > /* Control commands shadow buffers */ > > > void *cvq_cmd_out_buffer, *cvq_cmd_in_buffer; > > > > > > + /* Number of address spaces supported by the device */ > > > + unsigned address_space_num; > > > + > > > /* The device always have SVQ enabled */ > > > bool always_svq; > > > bool started; > > > @@ -100,6 +103,8 @@ static const uint64_t vdpa_svq_device_features = > > > BIT_ULL(VIRTIO_NET_F_RSC_EXT) | > > > BIT_ULL(VIRTIO_NET_F_STANDBY); > > > > > > +#define VHOST_VDPA_NET_CVQ_ASID 1 > > > + > > > VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc) > > > { > > > VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); > > > @@ -214,6 +219,109 @@ static ssize_t vhost_vdpa_receive(NetClientState *nc, const uint8_t *buf, > > > return 0; > > > } > > > > > > +static int vhost_vdpa_get_vring_group(int device_fd, > > > + struct vhost_vring_state *state) > > > +{ > > > + int r = ioctl(device_fd, VHOST_VDPA_GET_VRING_GROUP, state); > > > + return r < 0 ? -errno : 0; > > > +} > > > > > > It would be more convenient for the caller if we can simply return 0 here. > > > > I don't follow this, how do we know if the call failed then? > > > > > > + > > > +/** > > > + * Check if all the virtqueues of the virtio device are in a different vq than > > > + * the last vq. VQ group of last group passed in cvq_group. > > > + */ > > > +static bool vhost_vdpa_cvq_group_is_independent(struct vhost_vdpa *v, > > > + struct vhost_vring_state cvq_group) > > > +{ > > > + struct vhost_dev *dev = v->dev; > > > + int ret; > > > + > > > + for (int i = 0; i < (dev->vq_index_end - 1); ++i) { > > > + struct vhost_vring_state vq_group = { > > > + .index = i, > > > + }; > > > + > > > + ret = vhost_vdpa_get_vring_group(v->device_fd, &vq_group); > > > + if (unlikely(ret)) { > > > + goto call_err; > > > + } > > > + if (unlikely(vq_group.num == cvq_group.num)) { > > > + error_report("CVQ %u group is the same as VQ %u one (%u)", > > > + cvq_group.index, vq_group.index, cvq_group.num); > > > > > > Any reason we need error_report() here? > > > > We can move it to a migration blocker. > > > Btw, I'd suggest to introduce new field in vhost_vdpa, then we can get > > and store the group_id there during init. > > > > This could be useful for the future e.g PASID virtualization. > > > > Answering below. > > > > > > + return false; > > > + } > > > + } > > > + > > > + return true; > > > + > > > +call_err: > > > + error_report("Can't read vq group, errno=%d (%s)", -ret, g_strerror(-ret)); > > > + return false; > > > +} > > > + > > > +static int vhost_vdpa_set_address_space_id(struct vhost_vdpa *v, > > > + unsigned vq_group, > > > + unsigned asid_num) > > > +{ > > > + struct vhost_vring_state asid = { > > > + .index = vq_group, > > > + .num = asid_num, > > > + }; > > > + int ret; > > > + > > > + ret = ioctl(v->device_fd, VHOST_VDPA_SET_GROUP_ASID, &asid); > > > + if (unlikely(ret < 0)) { > > > + error_report("Can't set vq group %u asid %u, errno=%d (%s)", > > > + asid.index, asid.num, errno, g_strerror(errno)); > > > + } > > > + return ret; > > > +} > > > + > > > +static void vhost_vdpa_net_prepare(NetClientState *nc) > > > +{ > > > + VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); > > > + struct vhost_vdpa *v = &s->vhost_vdpa; > > > + struct vhost_dev *dev = v->dev; > > > + struct vhost_vring_state cvq_group = { > > > + .index = v->dev->vq_index_end - 1, > > > + }; > > > + int r; > > > + > > > + assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); > > > + > > > + if (dev->nvqs != 1 || dev->vq_index + dev->nvqs != dev->vq_index_end) { > > > + /* Only interested in CVQ */ > > > + return; > > > + } > > > + > > > + if (s->always_svq) { > > > + /* SVQ is already enabled */ > > > + return; > > > + } > > > + > > > + if (s->address_space_num < 2) { > > > + v->shadow_vqs_enabled = false; > > > + return; > > > + } > > > + > > > + r = vhost_vdpa_get_vring_group(v->device_fd, &cvq_group); > > > + if (unlikely(r)) { > > > + error_report("Can't read cvq group, errno=%d (%s)", r, g_strerror(-r)); > > > + v->shadow_vqs_enabled = false; > > > + return; > > > + } > > > + > > > + if (!vhost_vdpa_cvq_group_is_independent(v, cvq_group)) { > > > + v->shadow_vqs_enabled = false; > > > + return; > > > + } > > > + > > > + r = vhost_vdpa_set_address_space_id(v, cvq_group.num, > > > + VHOST_VDPA_NET_CVQ_ASID); > > > + v->shadow_vqs_enabled = r == 0; > > > + s->vhost_vdpa.address_space_id = r == 0 ? 1 : 0; > > > > > > I'd expect this to be done net_init_vhost_vdpa(). Or any advantage of > > initializing thing here? > > > > We don't know the CVQ index at initialization time, since the guest is > not even running so it has not acked the features. So we cannot know > its VQ group, which is needed to call set_address_space_id. > > In my opinion, we shouldn't even cache "cvq has group id N", since the > device could return a different group id between resets (for example, > because the negotiation of different features). > > > > > > +} > > > + > > > static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr) > > > { > > > VhostIOVATree *tree = v->iova_tree; > > > @@ -432,6 +540,7 @@ static NetClientInfo net_vhost_vdpa_info = { > > > .type = NET_CLIENT_DRIVER_VHOST_VDPA, > > > .size = sizeof(VhostVDPAState), > > > .receive = vhost_vdpa_receive, > > > + .prepare = vhost_vdpa_net_prepare, > > > .start = vhost_vdpa_net_start, > > > .cleanup = vhost_vdpa_cleanup, > > > .has_vnet_hdr = vhost_vdpa_has_vnet_hdr, > > > @@ -542,12 +651,40 @@ static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = { > > > .avail_handler = vhost_vdpa_net_handle_ctrl_avail, > > > }; > > > > > > +static bool vhost_vdpa_get_as_num(int vdpa_device_fd, unsigned *num_as, > > > + Error **errp) > > > > > > Let's simply return int as the #as here. > > > > If as is uint32_t, should we return int64_t and both leave negative > values for errors and check that #as <= UINT32_MAX then? > > Thanks! > I think a few of the previous comments are on the line of "do not fail the initialization but allow it with default values (num_as = 1, vq_group = 0)". I think that is ok, but in my opinion we need to add enough tracing for the user to know what is failing in the environment. Unless more debugging, the only information they have is that the device does not support migration, but not what step is failing. I'll move all the errors to simple warnings for the next version, let me know if you think we need to omit those warnings too. Thanks!