On 2018/11/6 12:00, Jason Wang wrote: > > On 2018/11/5 下午3:47, jiangyiwen wrote: >> Guest receive mergeable rx buffer, it can merge >> scatter rx buffer into a big buffer and then copy >> to user space. >> >> Signed-off-by: Yiwen Jiang <jiangyiwen@xxxxxxxxxx> >> --- >> include/linux/virtio_vsock.h | 9 ++++ >> net/vmw_vsock/virtio_transport.c | 75 +++++++++++++++++++++++++++++---- >> net/vmw_vsock/virtio_transport_common.c | 59 ++++++++++++++++++++++---- >> 3 files changed, 127 insertions(+), 16 deletions(-) >> >> diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h >> index da9e1fe..6be3cd7 100644 >> --- a/include/linux/virtio_vsock.h >> +++ b/include/linux/virtio_vsock.h >> @@ -13,6 +13,8 @@ >> #define VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE (1024 * 4) >> #define VIRTIO_VSOCK_MAX_BUF_SIZE 0xFFFFFFFFUL >> #define VIRTIO_VSOCK_MAX_PKT_BUF_SIZE (1024 * 64) >> +/* virtio_vsock_pkt + max_pkt_len(default MAX_PKT_BUF_SIZE) */ >> +#define VIRTIO_VSOCK_MAX_MRG_BUF_NUM ((VIRTIO_VSOCK_MAX_PKT_BUF_SIZE / PAGE_SIZE) + 1) >> >> /* Virtio-vsock feature */ >> #define VIRTIO_VSOCK_F_MRG_RXBUF 0 /* Host can merge receive buffers. */ >> @@ -48,6 +50,11 @@ struct virtio_vsock_sock { >> struct list_head rx_queue; >> }; >> >> +struct virtio_vsock_mrg_rxbuf { >> + void *buf; >> + u32 len; >> +}; >> + >> struct virtio_vsock_pkt { >> struct virtio_vsock_hdr hdr; >> struct virtio_vsock_mrg_rxbuf_hdr mrg_rxbuf_hdr; >> @@ -59,6 +66,8 @@ struct virtio_vsock_pkt { >> u32 len; >> u32 off; >> bool reply; >> + bool mergeable; >> + struct virtio_vsock_mrg_rxbuf mrg_rxbuf[VIRTIO_VSOCK_MAX_MRG_BUF_NUM]; >> }; > > > It's better to use iov here I think, and drop buf completely. > > And this is better to be done in an independent patch. > You're right, I can use kvec instead of customized structure, in addition, I don't understand about drop buf completely and an independent patch. Thanks. > >> >> struct virtio_vsock_pkt_info { >> diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c >> index 2040a9e..3557ad3 100644 >> --- a/net/vmw_vsock/virtio_transport.c >> +++ b/net/vmw_vsock/virtio_transport.c >> @@ -359,11 +359,62 @@ static bool virtio_transport_more_replies(struct virtio_vsock *vsock) >> return val < virtqueue_get_vring_size(vq); >> } >> >> +static struct virtio_vsock_pkt *receive_mergeable(struct virtqueue *vq, >> + struct virtio_vsock *vsock, unsigned int *total_len) >> +{ >> + struct virtio_vsock_pkt *pkt; >> + u16 num_buf; >> + void *page; >> + unsigned int len; >> + int i = 0; >> + >> + page = virtqueue_get_buf(vq, &len); >> + if (!page) >> + return NULL; >> + >> + *total_len = len; >> + vsock->rx_buf_nr--; >> + >> + pkt = page; >> + num_buf = le16_to_cpu(pkt->mrg_rxbuf_hdr.num_buffers); >> + if (!num_buf || num_buf > VIRTIO_VSOCK_MAX_MRG_BUF_NUM) >> + goto err; >> + >> + pkt->mergeable = true; >> + if (!le32_to_cpu(pkt->hdr.len)) >> + return pkt; >> + >> + len -= sizeof(struct virtio_vsock_pkt); >> + pkt->mrg_rxbuf[i].buf = page + sizeof(struct virtio_vsock_pkt); >> + pkt->mrg_rxbuf[i].len = len; >> + i++; >> + >> + while (--num_buf) { >> + page = virtqueue_get_buf(vq, &len); >> + if (!page) >> + goto err; >> + >> + *total_len += len; >> + vsock->rx_buf_nr--; >> + >> + pkt->mrg_rxbuf[i].buf = page; >> + pkt->mrg_rxbuf[i].len = len; >> + i++; >> + } >> + >> + return pkt; >> +err: >> + virtio_transport_free_pkt(pkt); >> + return NULL; >> +} > > > Similar logic could be found at virtio-net driver. > > Haven't thought this deeply, but it looks to me use virtio-net driver is also possible, e.g for data-path, just register vsock specific callbacks. > > >> + >> static void virtio_transport_rx_work(struct work_struct *work) >> { >> struct virtio_vsock *vsock = >> container_of(work, struct virtio_vsock, rx_work); >> struct virtqueue *vq; >> + size_t vsock_hlen = vsock->mergeable ? sizeof(struct virtio_vsock_pkt) : >> + sizeof(struct virtio_vsock_hdr); >> >> vq = vsock->vqs[VSOCK_VQ_RX]; >> >> @@ -383,21 +434,29 @@ static void virtio_transport_rx_work(struct work_struct *work) >> goto out; >> } >> >> - pkt = virtqueue_get_buf(vq, &len); >> - if (!pkt) { >> - break; >> - } >> + if (likely(vsock->mergeable)) { >> + pkt = receive_mergeable(vq, vsock, &len); >> + if (!pkt) >> + break; >> >> - vsock->rx_buf_nr--; >> + pkt->len = le32_to_cpu(pkt->hdr.len); >> + } else { >> + pkt = virtqueue_get_buf(vq, &len); >> + if (!pkt) { >> + break; >> + } >> + >> + vsock->rx_buf_nr--; >> + } >> >> /* Drop short/long packets */ >> - if (unlikely(len < sizeof(pkt->hdr) || >> - len > sizeof(pkt->hdr) + pkt->len)) { >> + if (unlikely(len < vsock_hlen || >> + len > vsock_hlen + pkt->len)) { >> virtio_transport_free_pkt(pkt); >> continue; >> } >> >> - pkt->len = len - sizeof(pkt->hdr); >> + pkt->len = len - vsock_hlen; >> virtio_transport_deliver_tap_pkt(pkt); >> virtio_transport_recv_pkt(pkt); >> } >> diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c >> index 3ae3a33..7bef1d5 100644 >> --- a/net/vmw_vsock/virtio_transport_common.c >> +++ b/net/vmw_vsock/virtio_transport_common.c >> @@ -272,14 +272,49 @@ static int virtio_transport_send_credit_update(struct vsock_sock *vsk, >> */ >> spin_unlock_bh(&vvs->rx_lock); >> >> - err = memcpy_to_msg(msg, pkt->buf + pkt->off, bytes); >> - if (err) >> - goto out; >> + if (pkt->mergeable) { >> + struct virtio_vsock_mrg_rxbuf *buf = pkt->mrg_rxbuf; >> + size_t mrg_copy_bytes, last_buf_total = 0, rxbuf_off; >> + size_t tmp_bytes = bytes; >> + int i; >> + >> + for (i = 0; i < le16_to_cpu(pkt->mrg_rxbuf_hdr.num_buffers); i++) { >> + if (pkt->off > last_buf_total + buf[i].len) { >> + last_buf_total += buf[i].len; >> + continue; >> + } >> + >> + rxbuf_off = pkt->off - last_buf_total; >> + mrg_copy_bytes = min(buf[i].len - rxbuf_off, tmp_bytes); >> + err = memcpy_to_msg(msg, buf[i].buf + rxbuf_off, mrg_copy_bytes); >> + if (err) >> + goto out; >> + >> + tmp_bytes -= mrg_copy_bytes; >> + pkt->off += mrg_copy_bytes; >> + last_buf_total += buf[i].len; >> + if (!tmp_bytes) >> + break; >> + } > > > After switch to use iov, you can user iov_iter helper to avoid the above open-coding I believe. > > And you can also drop the if (mergeable) condition. > > Thanks > Thanks, I will try to use iov instead. > >> + >> + if (tmp_bytes) { >> + printk(KERN_WARNING "WARNING! bytes = %llu, " >> + "bytes = %llu\n", >> + (unsigned long long)bytes, >> + (unsigned long long)tmp_bytes); >> + } >> + >> + total += (bytes - tmp_bytes); >> + } else { >> + err = memcpy_to_msg(msg, pkt->buf + pkt->off, bytes); >> + if (err) >> + goto out; >> + >> + total += bytes; >> + pkt->off += bytes; >> + } >> >> spin_lock_bh(&vvs->rx_lock); >> - >> - total += bytes; >> - pkt->off += bytes; >> if (pkt->off == pkt->len) { >> virtio_transport_dec_rx_pkt(vvs, pkt); >> list_del(&pkt->list); >> @@ -1050,8 +1085,16 @@ void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt) >> >> void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt) >> { >> - kfree(pkt->buf); >> - kfree(pkt); >> + int i; >> + >> + if (pkt->mergeable) { >> + for (i = 1; i < le16_to_cpu(pkt->mrg_rxbuf_hdr.num_buffers); i++) >> + free_page((unsigned long)pkt->mrg_rxbuf[i].buf); >> + free_page((unsigned long)(void *)pkt); >> + } else { >> + kfree(pkt->buf); >> + kfree(pkt); >> + } >> } >> EXPORT_SYMBOL_GPL(virtio_transport_free_pkt); >> > > . >