On Tue, Aug 9, 2022 at 9:36 AM Jason Wang <jasowang@xxxxxxxxxx> wrote: > > On Sat, Aug 6, 2022 at 12:39 AM Eugenio Pérez <eperezma@xxxxxxxxxx> wrote: > > > > Isolate control virtqueue in its own group, allowing to intercept control > > commands but letting dataplane run totally passthrough to the guest. > > > > Signed-off-by: Eugenio Pérez <eperezma@xxxxxxxxxx> > > --- > > v4: > > * Squash vhost_vdpa_cvq_group_is_independent. > > * Rebased on last CVQ start series, that allocated CVQ cmd bufs at load > > * Do not check for cvq index on vhost_vdpa_net_prepare, we only have one > > that callback registered in that NetClientInfo. > > > > v3: > > * Make asid related queries print a warning instead of returning an > > error and stop the start of qemu. > > --- > > hw/virtio/vhost-vdpa.c | 3 +- > > net/vhost-vdpa.c | 124 +++++++++++++++++++++++++++++++++++++++-- > > 2 files changed, 122 insertions(+), 5 deletions(-) > > > > diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c > > index 3eb67b27b7..69f34f4cc5 100644 > > --- a/hw/virtio/vhost-vdpa.c > > +++ b/hw/virtio/vhost-vdpa.c > > @@ -678,7 +678,8 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) > > { > > uint64_t features; > > uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 | > > - 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH; > > + 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH | > > + 0x1ULL << VHOST_BACKEND_F_IOTLB_ASID; > > int r; > > > > if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) { > > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c > > index bf78b1332f..c820a5fd9f 100644 > > --- a/net/vhost-vdpa.c > > +++ b/net/vhost-vdpa.c > > @@ -37,6 +37,9 @@ typedef struct VhostVDPAState { > > /* Control commands shadow buffers */ > > void *cvq_cmd_out_buffer, *cvq_cmd_in_buffer; > > > > + /* Number of address spaces supported by the device */ > > + unsigned address_space_num; > > + > > /* The device always have SVQ enabled */ > > bool always_svq; > > bool started; > > @@ -100,6 +103,9 @@ static const uint64_t vdpa_svq_device_features = > > BIT_ULL(VIRTIO_NET_F_RSC_EXT) | > > BIT_ULL(VIRTIO_NET_F_STANDBY); > > > > +#define VHOST_VDPA_NET_CVQ_PASSTHROUGH 0 > > We need a better name for the macro since it's not easy to see it's an asid. > VHOST_VDPA_NET_DATA_ASID? > > +#define VHOST_VDPA_NET_CVQ_ASID 1 > > + > > VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc) > > { > > VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); > > @@ -224,6 +230,37 @@ static NetClientInfo net_vhost_vdpa_info = { > > .check_peer_type = vhost_vdpa_check_peer_type, > > }; > > > > +static void vhost_vdpa_get_vring_group(int device_fd, > > + struct vhost_vring_state *state) > > +{ > > + int r = ioctl(device_fd, VHOST_VDPA_GET_VRING_GROUP, state); > > Let's hide vhost_vring_state from the caller and simply accept a vq > index parameter (as the below function did) then we can return the > group id. > > The hides low level ABI and simplify the caller's code. > We need to return an error. The example is a little bit pathological, but if we get that CVQ is on vq group != 0, and all data vqs returns an error reading the vq group, I think we shouldn't assume they belong to the asid 0. Some of them could be on the same vq group as cvq, making all of this fail. Can we assume the vq group is an uint32_t? Would it work to return an int64_t with the possibility of errors? > > + if (unlikely(r < 0)) { > > + /* > > + * Assume all groups are 0, the consequences are the same and we will > > + * not abort device creation > > + */ > > + state->num = 0; > > + } > > +} > > + > > +static int vhost_vdpa_set_address_space_id(struct vhost_vdpa *v, > > + unsigned vq_group, > > + unsigned asid_num) > > +{ > > + struct vhost_vring_state asid = { > > + .index = vq_group, > > + .num = asid_num, > > + }; > > + int ret; > > + > > + ret = ioctl(v->device_fd, VHOST_VDPA_SET_GROUP_ASID, &asid); > > + if (unlikely(ret < 0)) { > > + warn_report("Can't set vq group %u asid %u, errno=%d (%s)", > > + asid.index, asid.num, errno, g_strerror(errno)); > > + } > > + return ret; > > +} > > + > > static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr) > > { > > VhostIOVATree *tree = v->iova_tree; > > @@ -298,11 +335,55 @@ dma_map_err: > > static int vhost_vdpa_net_cvq_prepare(NetClientState *nc) > > { > > VhostVDPAState *s; > > + struct vhost_vdpa *v; > > + struct vhost_vring_state cvq_group = {}; > > int r; > > > > assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); > > > > s = DO_UPCAST(VhostVDPAState, nc, nc); > > + v = &s->vhost_vdpa; > > + cvq_group.index = v->dev->vq_index_end - 1; > > + > > + /* Default values */ > > Code can explain itself so this comment is probably useless. > I'll delete. > > + v->shadow_vqs_enabled = false; > > + s->vhost_vdpa.address_space_id = VHOST_VDPA_NET_CVQ_PASSTHROUGH; > > + > > + if (s->always_svq) { > > + v->shadow_vqs_enabled = true; > > The name of the variable is suboptimal. > > I think we need to differ: > > 1) shadow all virtqueues > > from > > 2) shadow cvq only > Note that shadow_vqs_enabled is per vhost_vdpa, so v->shadow_vqs_enabled means that vqs of *v* are shadowed. Data vhost_vdpa can have a different value than CVQ vhost_vdpa. Having said that, I'm ok with changing the name of the variable, but I cannot come with a better one. Thanks! > Thanks > > > + goto out; > > + } > > + > > + if (s->address_space_num < 2) { > > + return 0; > > + } > > + > > + /** > > + * Check if all the virtqueues of the virtio device are in a different vq > > + * than the last vq. VQ group of last group passed in cvq_group. > > + */ > > + vhost_vdpa_get_vring_group(v->device_fd, &cvq_group); > > + for (int i = 0; i < (v->dev->vq_index_end - 1); ++i) { > > + struct vhost_vring_state vq_group = { > > + .index = i, > > + }; > > + > > + vhost_vdpa_get_vring_group(v->device_fd, &vq_group); > > + if (unlikely(vq_group.num == cvq_group.num)) { > > + warn_report("CVQ %u group is the same as VQ %u one (%u)", > > + cvq_group.index, vq_group.index, cvq_group.num); > > + return 0; > > + } > > + } > > + > > + r = vhost_vdpa_set_address_space_id(v, cvq_group.num, > > + VHOST_VDPA_NET_CVQ_ASID); > > + if (r == 0) { > > + v->shadow_vqs_enabled = true; > > + s->vhost_vdpa.address_space_id = VHOST_VDPA_NET_CVQ_ASID; > > + } > > + > > +out: > > if (!s->vhost_vdpa.shadow_vqs_enabled) { > > return 0; > > } > > @@ -523,12 +604,38 @@ static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = { > > .avail_handler = vhost_vdpa_net_handle_ctrl_avail, > > }; > > > > +static uint32_t vhost_vdpa_get_as_num(int vdpa_device_fd) > > +{ > > + uint64_t features; > > + unsigned num_as; > > + int r; > > + > > + r = ioctl(vdpa_device_fd, VHOST_GET_BACKEND_FEATURES, &features); > > + if (unlikely(r < 0)) { > > + warn_report("Cannot get backend features"); > > + return 1; > > + } > > + > > + if (!(features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID))) { > > + return 1; > > + } > > + > > + r = ioctl(vdpa_device_fd, VHOST_VDPA_GET_AS_NUM, &num_as); > > + if (unlikely(r < 0)) { > > + warn_report("Cannot retrieve number of supported ASs"); > > + return 1; > > + } > > + > > + return num_as; > > +} > > + > > static NetClientState *net_vhost_vdpa_init(NetClientState *peer, > > const char *device, > > const char *name, > > int vdpa_device_fd, > > int queue_pair_index, > > int nvqs, > > + unsigned nas, > > bool is_datapath, > > bool svq, > > VhostIOVATree *iova_tree) > > @@ -547,6 +654,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, > > snprintf(nc->info_str, sizeof(nc->info_str), TYPE_VHOST_VDPA); > > s = DO_UPCAST(VhostVDPAState, nc, nc); > > > > + s->address_space_num = nas; > > s->vhost_vdpa.device_fd = vdpa_device_fd; > > s->vhost_vdpa.index = queue_pair_index; > > s->always_svq = svq; > > @@ -632,6 +740,8 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, > > g_autoptr(VhostIOVATree) iova_tree = NULL; > > NetClientState *nc; > > int queue_pairs, r, i = 0, has_cvq = 0; > > + unsigned num_as = 1; > > + bool svq_cvq; > > > > assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA); > > opts = &netdev->u.vhost_vdpa; > > @@ -656,7 +766,13 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, > > goto err; > > } > > > > - if (opts->x_svq) { > > + svq_cvq = opts->x_svq; > > + if (has_cvq && !opts->x_svq) { > > + num_as = vhost_vdpa_get_as_num(vdpa_device_fd); > > + svq_cvq = num_as > 1; > > + } > > + > > + if (opts->x_svq || svq_cvq) { > > struct vhost_vdpa_iova_range iova_range; > > > > uint64_t invalid_dev_features = > > @@ -679,15 +795,15 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, > > > > for (i = 0; i < queue_pairs; i++) { > > ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, > > - vdpa_device_fd, i, 2, true, opts->x_svq, > > - iova_tree); > > + vdpa_device_fd, i, 2, num_as, true, > > + opts->x_svq, iova_tree); > > if (!ncs[i]) > > goto err; > > } > > > > if (has_cvq) { > > nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, > > - vdpa_device_fd, i, 1, false, > > + vdpa_device_fd, i, 1, num_as, false, > > opts->x_svq, iova_tree); > > if (!nc) > > goto err; > > -- > > 2.31.1 > > >