On Wed, Dec 12, 2018 at 05:29:31PM +0800, jiangyiwen wrote: > When vhost support VIRTIO_VSOCK_F_MRG_RXBUF feature, > it will merge big packet into rx vq. > > Signed-off-by: Yiwen Jiang <jiangyiwen@xxxxxxxxxx> I feel this approach jumps into making interface changes for optimizations too quickly. For example, what prevents us from taking a big buffer, prepending each chunk with the header and writing it out without host/guest interface changes? This should allow optimizations such as vhost_add_used_n batching. I realize a header in each packet does have a cost, but it also has advantages such as improved robustness, I'd like to see more of an apples to apples comparison of the performance gain from skipping them. > --- > drivers/vhost/vsock.c | 111 ++++++++++++++++++++++++++++++-------- > include/linux/virtio_vsock.h | 1 + > include/uapi/linux/virtio_vsock.h | 5 ++ > 3 files changed, 94 insertions(+), 23 deletions(-) > > diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c > index 34bc3ab..dc52b0f 100644 > --- a/drivers/vhost/vsock.c > +++ b/drivers/vhost/vsock.c > @@ -22,7 +22,8 @@ > #define VHOST_VSOCK_DEFAULT_HOST_CID 2 > > enum { > - VHOST_VSOCK_FEATURES = VHOST_FEATURES, > + VHOST_VSOCK_FEATURES = VHOST_FEATURES | > + (1ULL << VIRTIO_VSOCK_F_MRG_RXBUF), > }; > > /* Used to track all the vhost_vsock instances on the system. */ > @@ -80,6 +81,69 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid) > return vsock; > } > > +/* This segment of codes are copied from drivers/vhost/net.c */ > +static int get_rx_bufs(struct vhost_virtqueue *vq, > + struct vring_used_elem *heads, int datalen, > + unsigned *iovcount, unsigned int quota) > +{ > + unsigned int out, in; > + int seg = 0; > + int headcount = 0; > + unsigned d; > + int ret; > + /* > + * len is always initialized before use since we are always called with > + * datalen > 0. > + */ > + u32 uninitialized_var(len); > + > + while (datalen > 0 && headcount < quota) { > + if (unlikely(seg >= UIO_MAXIOV)) { > + ret = -ENOBUFS; > + goto err; > + } > + > + ret = vhost_get_vq_desc(vq, vq->iov + seg, > + ARRAY_SIZE(vq->iov) - seg, &out, > + &in, NULL, NULL); > + if (unlikely(ret < 0)) > + goto err; > + > + d = ret; > + if (d == vq->num) { > + ret = 0; > + goto err; > + } > + > + if (unlikely(out || in <= 0)) { > + vq_err(vq, "unexpected descriptor format for RX: " > + "out %d, in %d\n", out, in); > + ret = -EINVAL; > + goto err; > + } > + > + heads[headcount].id = cpu_to_vhost32(vq, d); > + len = iov_length(vq->iov + seg, in); > + heads[headcount].len = cpu_to_vhost32(vq, len); > + datalen -= len; > + ++headcount; > + seg += in; > + } > + > + heads[headcount - 1].len = cpu_to_vhost32(vq, len + datalen); > + *iovcount = seg; > + > + /* Detect overrun */ > + if (unlikely(datalen > 0)) { > + ret = UIO_MAXIOV + 1; > + goto err; > + } > + return headcount; > +err: > + vhost_discard_vq_desc(vq, headcount); > + return ret; > +} > + > static void > vhost_transport_do_send_pkt(struct vhost_vsock *vsock, > struct vhost_virtqueue *vq) > @@ -87,22 +151,34 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid) > struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX]; > bool added = false; > bool restart_tx = false; > + int mergeable; > + size_t vsock_hlen; > > mutex_lock(&vq->mutex); > > if (!vq->private_data) > goto out; > > + mergeable = vhost_has_feature(vq, VIRTIO_VSOCK_F_MRG_RXBUF); > + /* > + * Guest fill page for rx vq in mergeable case, so it will not > + * allocate pkt structure, we should reserve size of pkt in advance. > + */ > + if (likely(mergeable)) > + vsock_hlen = sizeof(struct virtio_vsock_pkt); > + else > + vsock_hlen = sizeof(struct virtio_vsock_hdr); > + > /* Avoid further vmexits, we're already processing the virtqueue */ > vhost_disable_notify(&vsock->dev, vq); > > for (;;) { > struct virtio_vsock_pkt *pkt; > struct iov_iter iov_iter; > - unsigned out, in; > + unsigned out = 0, in = 0; > size_t nbytes; > size_t len; > - int head; > + s16 headcount; > > spin_lock_bh(&vsock->send_pkt_list_lock); > if (list_empty(&vsock->send_pkt_list)) { > @@ -116,16 +192,9 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid) > list_del_init(&pkt->list); > spin_unlock_bh(&vsock->send_pkt_list_lock); > > - head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), > - &out, &in, NULL, NULL); > - if (head < 0) { > - spin_lock_bh(&vsock->send_pkt_list_lock); > - list_add(&pkt->list, &vsock->send_pkt_list); > - spin_unlock_bh(&vsock->send_pkt_list_lock); > - break; > - } > - > - if (head == vq->num) { > + headcount = get_rx_bufs(vq, vq->heads, vsock_hlen + pkt->len, > + &in, likely(mergeable) ? UIO_MAXIOV : 1); > + if (headcount <= 0) { > spin_lock_bh(&vsock->send_pkt_list_lock); > list_add(&pkt->list, &vsock->send_pkt_list); > spin_unlock_bh(&vsock->send_pkt_list_lock); > @@ -133,24 +202,20 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid) > /* We cannot finish yet if more buffers snuck in while > * re-enabling notify. > */ > - if (unlikely(vhost_enable_notify(&vsock->dev, vq))) { > + if (!headcount && unlikely(vhost_enable_notify(&vsock->dev, vq))) { > vhost_disable_notify(&vsock->dev, vq); > continue; > } > break; > } > > - if (out) { > - virtio_transport_free_pkt(pkt); > - vq_err(vq, "Expected 0 output buffers, got %u\n", out); > - break; > - } > - > len = iov_length(&vq->iov[out], in); > iov_iter_init(&iov_iter, READ, &vq->iov[out], in, len); > > - nbytes = copy_to_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter); > - if (nbytes != sizeof(pkt->hdr)) { > + if (likely(mergeable)) > + pkt->mrg_rxbuf_hdr.num_buffers = cpu_to_le16(headcount); > + nbytes = copy_to_iter(&pkt->hdr, vsock_hlen, &iov_iter); > + if (nbytes != vsock_hlen) { > virtio_transport_free_pkt(pkt); > vq_err(vq, "Faulted on copying pkt hdr\n"); > break; > @@ -163,7 +228,7 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid) > break; > } > > - vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len); > + vhost_add_used_n(vq, vq->heads, headcount); > added = true; > > if (pkt->reply) { > diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h > index bf84418..da9e1fe 100644 > --- a/include/linux/virtio_vsock.h > +++ b/include/linux/virtio_vsock.h > @@ -50,6 +50,7 @@ struct virtio_vsock_sock { > > struct virtio_vsock_pkt { > struct virtio_vsock_hdr hdr; > + struct virtio_vsock_mrg_rxbuf_hdr mrg_rxbuf_hdr; > struct work_struct work; > struct list_head list; > /* socket refcnt not held, only use for cancellation */ > diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h > index 1d57ed3..2292f30 100644 > --- a/include/uapi/linux/virtio_vsock.h > +++ b/include/uapi/linux/virtio_vsock.h > @@ -63,6 +63,11 @@ struct virtio_vsock_hdr { > __le32 fwd_cnt; > } __attribute__((packed)); > > +/* It add mergeable rx buffers feature */ > +struct virtio_vsock_mrg_rxbuf_hdr { > + __le16 num_buffers; /* number of mergeable rx buffers */ > +} __attribute__((packed)); > + > enum virtio_vsock_type { > VIRTIO_VSOCK_TYPE_STREAM = 1, > }; > -- > 1.8.3.1 >