On Wed, Apr 28, 2010 at 01:57:12PM -0700, David L Stevens wrote: > This patch adds mergeable receive buffer support to vhost_net. > > Signed-off-by: David L Stevens <dlstevens@xxxxxxxxxx> I have applied this, thanks very much! I have also applied some tweaks on top, please take a look. Thanks, MSt commit 2809e94f5f26d89dc5232aaec753ffda95c4d95e Author: Michael S. Tsirkin <mst@xxxxxxxxxx> Date: Thu Apr 29 16:18:08 2010 +0300 vhost-net: minor tweaks in mergeable buffer code Applies the following tweaks on top of mergeable buffers patch: 1. vhost_get_desc_n assumes that all desriptors are 'in' only. It's also unlikely to be useful for any vhost frontend besides vhost_net, so just move it to net.c, and rename get_rx_bufs for brevity. 2. for rx, we called iov_length within vhost_get_desc_n (now get_rx_bufs) already, so we don't need an extra call to iov_length to avoid overflow anymore. Accordingly, copy_iovec_hdr can return void now. 3. for rx, do some further code tweaks: do not assign len = err as we check that err == len handle data length in a way similar to how we handle header length: datalen -> sock_len, len -> vhost_len. add sock_hlen as a local variable, for symmetry with vhost_hlen. 4. add some likely/unlikely annotations Signed-off-by: Michael S. Tsirkin <mst@xxxxxxxxxx> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index d61d945..85519b4 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -74,9 +74,9 @@ static int move_iovec_hdr(struct iovec *from, struct iovec *to, } return seg; } -/* Copy iovec entries for len bytes from iovec. Return segments used. */ -static int copy_iovec_hdr(const struct iovec *from, struct iovec *to, - size_t len, int iovcount) +/* Copy iovec entries for len bytes from iovec. */ +static void copy_iovec_hdr(const struct iovec *from, struct iovec *to, + size_t len, int iovcount) { int seg = 0; size_t size; @@ -89,7 +89,6 @@ static int copy_iovec_hdr(const struct iovec *from, struct iovec *to, ++to; ++seg; } - return seg; } /* Caller must have TX VQ lock */ @@ -204,7 +203,7 @@ static void handle_tx(struct vhost_net *net) unuse_mm(net->dev.mm); } -static int vhost_head_len(struct vhost_virtqueue *vq, struct sock *sk) +static int peek_head_len(struct vhost_virtqueue *vq, struct sock *sk) { struct sk_buff *head; int len = 0; @@ -212,17 +211,70 @@ static int vhost_head_len(struct vhost_virtqueue *vq, struct sock *sk) lock_sock(sk); head = skb_peek(&sk->sk_receive_queue); if (head) - len = head->len + vq->sock_hlen; + len = head->len; release_sock(sk); return len; } +/* This is a multi-buffer version of vhost_get_desc, that works if + * vq has read descriptors only. + * @vq - the relevant virtqueue + * @datalen - data length we'll be reading + * @iovcount - returned count of io vectors we fill + * @log - vhost log + * @log_num - log offset + * returns number of buffer heads allocated, negative on error + */ +static int get_rx_bufs(struct vhost_virtqueue *vq, + struct vring_used_elem *heads, + int datalen, + unsigned *iovcount, + struct vhost_log *log, + unsigned *log_num) +{ + unsigned int out, in; + int seg = 0; + int headcount = 0; + unsigned d; + int r; + + while (datalen > 0) { + if (unlikely(headcount >= VHOST_NET_MAX_SG)) { + r = -ENOBUFS; + goto err; + } + d = vhost_get_desc(vq->dev, vq, vq->iov + seg, + ARRAY_SIZE(vq->iov) - seg, &out, + &in, log, log_num); + if (d == vq->num) { + r = 0; + goto err; + } + if (unlikely(out || in <= 0)) { + vq_err(vq, "unexpected descriptor format for RX: " + "out %d, in %d\n", out, in); + r = -EINVAL; + goto err; + } + heads[headcount].id = d; + heads[headcount].len = iov_length(vq->iov + seg, in); + datalen -= heads[headcount].len; + ++headcount; + seg += in; + } + *iovcount = seg; + return headcount; +err: + vhost_discard_desc(vq, headcount); + return r; +} + /* Expects to be always run from workqueue - which acts as * read-size critical section for our kind of RCU. */ static void handle_rx(struct vhost_net *net) { struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX]; - unsigned in, log, s; + unsigned uninitialized_var(in), log; struct vhost_log *vq_log; struct msghdr msg = { .msg_name = NULL, @@ -238,9 +290,10 @@ static void handle_rx(struct vhost_net *net) .hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE }; - size_t len, total_len = 0; - int err, headcount, datalen; - size_t vhost_hlen; + size_t total_len = 0; + int err, headcount; + size_t vhost_hlen, sock_hlen; + size_t vhost_len, sock_len; struct socket *sock = rcu_dereference(vq->private_data); if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue)) return; @@ -249,14 +302,16 @@ static void handle_rx(struct vhost_net *net) mutex_lock(&vq->mutex); vhost_disable_notify(vq); vhost_hlen = vq->vhost_hlen; + sock_hlen = vq->sock_hlen; vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ? vq->log : NULL; - while ((datalen = vhost_head_len(vq, sock->sk))) { - headcount = vhost_get_desc_n(vq, vq->heads, - datalen + vhost_hlen, - &in, vq_log, &log); + while ((sock_len = peek_head_len(vq, sock->sk))) { + sock_len += sock_hlen; + vhost_len = sock_len + vhost_hlen; + headcount = get_rx_bufs(vq, vq->heads, vhost_len, + &in, vq_log, &log); if (headcount < 0) break; /* OK, now we need to know about added descriptors. */ @@ -272,34 +327,25 @@ static void handle_rx(struct vhost_net *net) break; } /* We don't need to be notified again. */ - if (vhost_hlen) + if (unlikely((vhost_hlen))) /* Skip header. TODO: support TSO. */ - s = move_iovec_hdr(vq->iov, vq->hdr, vhost_hlen, in); + move_iovec_hdr(vq->iov, vq->hdr, vhost_hlen, in); else - s = copy_iovec_hdr(vq->iov, vq->hdr, vq->sock_hlen, in); + copy_iovec_hdr(vq->iov, vq->hdr, sock_hlen, in); msg.msg_iovlen = in; - len = iov_length(vq->iov, in); - /* Sanity check */ - if (!len) { - vq_err(vq, "Unexpected header len for RX: " - "%zd expected %zd\n", - iov_length(vq->hdr, s), vhost_hlen); - break; - } err = sock->ops->recvmsg(NULL, sock, &msg, - len, MSG_DONTWAIT | MSG_TRUNC); + sock_len, MSG_DONTWAIT | MSG_TRUNC); /* TODO: Check specific error and bomb out unless EAGAIN? */ if (err < 0) { vhost_discard_desc(vq, headcount); break; } - if (err != datalen) { + if (err != sock_len) { pr_err("Discarded rx packet: " - " len %d, expected %zd\n", err, datalen); + " len %d, expected %zd\n", err, sock_len); vhost_discard_desc(vq, headcount); continue; } - len = err; if (vhost_hlen && memcpy_toiovecend(vq->hdr, (unsigned char *)&hdr, 0, vhost_hlen)) { @@ -311,17 +357,16 @@ static void handle_rx(struct vhost_net *net) if (vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF) && memcpy_toiovecend(vq->hdr, (unsigned char *)&headcount, offsetof(typeof(hdr), num_buffers), - sizeof(hdr.num_buffers))) { + sizeof hdr.num_buffers)) { vq_err(vq, "Failed num_buffers write"); vhost_discard_desc(vq, headcount); break; } - len += vhost_hlen; vhost_add_used_and_signal_n(&net->dev, vq, vq->heads, headcount); if (unlikely(vq_log)) - vhost_log_write(vq, vq_log, log, len); - total_len += len; + vhost_log_write(vq, vq_log, log, vhost_len); + total_len += vhost_len; if (unlikely(total_len >= VHOST_NET_WEIGHT)) { vhost_poll_queue(&vq->poll); break; diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 8ef5e3f..4b49991 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -862,53 +862,6 @@ static unsigned get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq, return 0; } -/* This is a multi-buffer version of vhost_get_desc - * @vq - the relevant virtqueue - * datalen - data length we'll be reading - * @iovcount - returned count of io vectors we fill - * @log - vhost log - * @log_num - log offset - * returns number of buffer heads allocated, negative on error - */ -int vhost_get_desc_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, - int datalen, unsigned *iovcount, struct vhost_log *log, - unsigned int *log_num) -{ - unsigned int out, in; - int seg = 0; - int headcount = 0; - int r; - - while (datalen > 0) { - if (headcount >= VHOST_NET_MAX_SG) { - r = -ENOBUFS; - goto err; - } - heads[headcount].id = vhost_get_desc(vq->dev, vq, vq->iov + seg, - ARRAY_SIZE(vq->iov) - seg, &out, - &in, log, log_num); - if (heads[headcount].id == vq->num) { - r = 0; - goto err; - } - if (out || in <= 0) { - vq_err(vq, "unexpected descriptor format for RX: " - "out %d, in %d\n", out, in); - r = -EINVAL; - goto err; - } - heads[headcount].len = iov_length(vq->iov + seg, in); - datalen -= heads[headcount].len; - ++headcount; - seg += in; - } - *iovcount = seg; - return headcount; -err: - vhost_discard_desc(vq, headcount); - return r; -} - /* This looks in the virtqueue and for the first available buffer, and converts * it to an iovec for convenient access. Since descriptors consist of some * number of output then some number of input descriptors, it's actually two diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 08d740a..4c9809e 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -122,9 +122,6 @@ long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, unsigned long arg); int vhost_vq_access_ok(struct vhost_virtqueue *vq); int vhost_log_access_ok(struct vhost_dev *); -int vhost_get_desc_n(struct vhost_virtqueue *, struct vring_used_elem *heads, - int datalen, unsigned int *iovcount, struct vhost_log *log, - unsigned int *log_num); unsigned vhost_get_desc(struct vhost_dev *, struct vhost_virtqueue *, struct iovec iov[], unsigned int iov_count, unsigned int *out_num, unsigned int *in_num, -- MST -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html