On 09.06.2022 11:38, Stefano Garzarella wrote: > On Fri, Jun 03, 2022 at 05:33:04AM +0000, Arseniy Krasnov wrote: >> For packets received from virtio RX queue, use buddy >> allocator instead of 'kmalloc()' to be able to insert >> such pages to user provided vma. Single call to >> 'copy_from_iter()' replaced with per-page loop. >> >> Signed-off-by: Arseniy Krasnov <AVKrasnov@xxxxxxxxxxxxxx> >> --- >> drivers/vhost/vsock.c | 81 ++++++++++++++++++++++++++++++++++++------- >> 1 file changed, 69 insertions(+), 12 deletions(-) >> >> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c >> index e6c9d41db1de..0dc2229f18f7 100644 >> --- a/drivers/vhost/vsock.c >> +++ b/drivers/vhost/vsock.c >> @@ -58,6 +58,7 @@ struct vhost_vsock { >> >> u32 guest_cid; >> bool seqpacket_allow; >> + bool zerocopy_rx_on; > > This is per-device, so a single socket can change the behaviour of all the sockets of this device. Sure, my mistake > > Can we do something better? > > Maybe we can allocate the header, copy it, find the socket and check if zero-copy is enabled or not for that socket. > > Of course we should change or extend virtio_transport_recv_pkt() to avoid to find the socket again. I think yes > > >> }; >> >> static u32 vhost_transport_get_local_cid(void) >> @@ -357,6 +358,7 @@ vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq, >> unsigned int out, unsigned int in) >> { >> struct virtio_vsock_pkt *pkt; >> + struct vhost_vsock *vsock; >> struct iov_iter iov_iter; >> size_t nbytes; >> size_t len; >> @@ -393,20 +395,75 @@ vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq, >> return NULL; >> } >> >> - pkt->buf = kmalloc(pkt->len, GFP_KERNEL); >> - if (!pkt->buf) { >> - kfree(pkt); >> - return NULL; >> - } >> - >> pkt->buf_len = pkt->len; >> + vsock = container_of(vq->dev, struct vhost_vsock, dev); >> >> - nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter); >> - if (nbytes != pkt->len) { >> - vq_err(vq, "Expected %u byte payload, got %zu bytes\n", >> - pkt->len, nbytes); >> - virtio_transport_free_pkt(pkt); >> - return NULL; >> + if (!vsock->zerocopy_rx_on) { >> + pkt->buf = kmalloc(pkt->len, GFP_KERNEL); >> + >> + if (!pkt->buf) { >> + kfree(pkt); >> + return NULL; >> + } >> + >> + pkt->slab_buf = true; >> + nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter); >> + if (nbytes != pkt->len) { >> + vq_err(vq, "Expected %u byte payload, got %zu bytes\n", >> + pkt->len, nbytes); >> + virtio_transport_free_pkt(pkt); >> + return NULL; >> + } >> + } else { >> + struct page *buf_page; >> + ssize_t pkt_len; >> + int page_idx; >> + >> + /* This creates memory overrun, as we allocate >> + * at least one page for each packet. >> + */ >> + buf_page = alloc_pages(GFP_KERNEL, get_order(pkt->len)); >> + >> + if (buf_page == NULL) { >> + kfree(pkt); >> + return NULL; >> + } >> + >> + pkt->buf = page_to_virt(buf_page); >> + >> + page_idx = 0; >> + pkt_len = pkt->len; >> + >> + /* As allocated pages are not mapped, process >> + * pages one by one. >> + */ >> + while (pkt_len > 0) { >> + void *mapped; >> + size_t to_copy; >> + >> + mapped = kmap(buf_page + page_idx); >> + >> + if (mapped == NULL) { >> + virtio_transport_free_pkt(pkt); >> + return NULL; >> + } >> + >> + to_copy = min(pkt_len, ((ssize_t)PAGE_SIZE)); >> + >> + nbytes = copy_from_iter(mapped, to_copy, &iov_iter); >> + if (nbytes != to_copy) { >> + vq_err(vq, "Expected %zu byte payload, got %zu bytes\n", >> + to_copy, nbytes); >> + kunmap(mapped); >> + virtio_transport_free_pkt(pkt); >> + return NULL; >> + } >> + >> + kunmap(mapped); >> + >> + pkt_len -= to_copy; >> + page_idx++; >> + } >> } >> >> return pkt; >> -- >> 2.25.1 >