On Tue, 2022-08-16 at 09:58 +0000, Bobby Eshleman wrote: > On Wed, Aug 17, 2022 at 05:42:08AM +0000, Arseniy Krasnov wrote: > > On 17.08.2022 08:01, Arseniy Krasnov wrote: > > > On 16.08.2022 05:32, Bobby Eshleman wrote: > > > > CC'ing virtio-dev@xxxxxxxxxxxxxxxxxxxx > > > > > > > > On Mon, Aug 15, 2022 at 10:56:08AM -0700, Bobby Eshleman wrote: > > > > > This patch supports dgram in virtio and on the vhost side. > > > Hello, > > > > > > sorry, i don't understand, how this maintains message boundaries? > > > Or it > > > is unnecessary for SOCK_DGRAM? > > > > > > Thanks > > > > > Signed-off-by: Jiang Wang <jiang.wang@xxxxxxxxxxxxx> > > > > > Signed-off-by: Bobby Eshleman <bobby.eshleman@xxxxxxxxxxxxx> > > > > > --- > > > > > drivers/vhost/vsock.c | 2 +- > > > > > include/net/af_vsock.h | 2 + > > > > > include/uapi/linux/virtio_vsock.h | 1 + > > > > > net/vmw_vsock/af_vsock.c | 26 +++- > > > > > net/vmw_vsock/virtio_transport.c | 2 +- > > > > > net/vmw_vsock/virtio_transport_common.c | 173 > > > > > ++++++++++++++++++++++-- > > > > > 6 files changed, 186 insertions(+), 20 deletions(-) > > > > > > > > > > diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c > > > > > index a5d1bdb786fe..3dc72a5647ca 100644 > > > > > --- a/drivers/vhost/vsock.c > > > > > +++ b/drivers/vhost/vsock.c > > > > > @@ -925,7 +925,7 @@ static int __init vhost_vsock_init(void) > > > > > int ret; > > > > > > > > > > ret = vsock_core_register(&vhost_transport.transport, > > > > > - VSOCK_TRANSPORT_F_H2G); > > > > > + VSOCK_TRANSPORT_F_H2G | > > > > > VSOCK_TRANSPORT_F_DGRAM); > > > > > if (ret < 0) > > > > > return ret; > > > > > > > > > > diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h > > > > > index 1c53c4c4d88f..37e55c81e4df 100644 > > > > > --- a/include/net/af_vsock.h > > > > > +++ b/include/net/af_vsock.h > > > > > @@ -78,6 +78,8 @@ struct vsock_sock { > > > > > s64 vsock_stream_has_data(struct vsock_sock *vsk); > > > > > s64 vsock_stream_has_space(struct vsock_sock *vsk); > > > > > struct sock *vsock_create_connected(struct sock *parent); > > > > > +int vsock_bind_stream(struct vsock_sock *vsk, > > > > > + struct sockaddr_vm *addr); > > > > > > > > > > /**** TRANSPORT ****/ > > > > > > > > > > diff --git a/include/uapi/linux/virtio_vsock.h > > > > > b/include/uapi/linux/virtio_vsock.h > > > > > index 857df3a3a70d..0975b9c88292 100644 > > > > > --- a/include/uapi/linux/virtio_vsock.h > > > > > +++ b/include/uapi/linux/virtio_vsock.h > > > > > @@ -70,6 +70,7 @@ struct virtio_vsock_hdr { > > > > > enum virtio_vsock_type { > > > > > VIRTIO_VSOCK_TYPE_STREAM = 1, > > > > > VIRTIO_VSOCK_TYPE_SEQPACKET = 2, > > > > > + VIRTIO_VSOCK_TYPE_DGRAM = 3, > > > > > }; > > > > > > > > > > enum virtio_vsock_op { > > > > > diff --git a/net/vmw_vsock/af_vsock.c > > > > > b/net/vmw_vsock/af_vsock.c > > > > > index 1893f8aafa48..87e4ae1866d3 100644 > > > > > --- a/net/vmw_vsock/af_vsock.c > > > > > +++ b/net/vmw_vsock/af_vsock.c > > > > > @@ -675,6 +675,19 @@ static int > > > > > __vsock_bind_connectible(struct vsock_sock *vsk, > > > > > return 0; > > > > > } > > > > > > > > > > +int vsock_bind_stream(struct vsock_sock *vsk, > > > > > + struct sockaddr_vm *addr) > > > > > +{ > > > > > + int retval; > > > > > + > > > > > + spin_lock_bh(&vsock_table_lock); > > > > > + retval = __vsock_bind_connectible(vsk, addr); > > > > > + spin_unlock_bh(&vsock_table_lock); > > > > > + > > > > > + return retval; > > > > > +} > > > > > +EXPORT_SYMBOL(vsock_bind_stream); > > > > > + > > > > > static int __vsock_bind_dgram(struct vsock_sock *vsk, > > > > > struct sockaddr_vm *addr) > > > > > { > > > > > @@ -2363,11 +2376,16 @@ int vsock_core_register(const struct > > > > > vsock_transport *t, int features) > > > > > } > > > > > > > > > > if (features & VSOCK_TRANSPORT_F_DGRAM) { > > > > > - if (t_dgram) { > > > > > - err = -EBUSY; > > > > > - goto err_busy; > > > > > + /* TODO: always chose the G2H variant over > > > > > others, support nesting later */ > > > > > + if (features & VSOCK_TRANSPORT_F_G2H) { > > > > > + if (t_dgram) > > > > > + pr_warn("virtio_vsock: t_dgram > > > > > already set\n"); > > > > > + t_dgram = t; > > > > > + } > > > > > + > > > > > + if (!t_dgram) { > > > > > + t_dgram = t; > > > > > } > > > > > - t_dgram = t; > > > > > } > > > > > > > > > > if (features & VSOCK_TRANSPORT_F_LOCAL) { > > > > > diff --git a/net/vmw_vsock/virtio_transport.c > > > > > b/net/vmw_vsock/virtio_transport.c > > > > > index 073314312683..d4526ca462d2 100644 > > > > > --- a/net/vmw_vsock/virtio_transport.c > > > > > +++ b/net/vmw_vsock/virtio_transport.c > > > > > @@ -850,7 +850,7 @@ static int __init virtio_vsock_init(void) > > > > > return -ENOMEM; > > > > > > > > > > ret = vsock_core_register(&virtio_transport.transport, > > > > > - VSOCK_TRANSPORT_F_G2H); > > > > > + VSOCK_TRANSPORT_F_G2H | > > > > > VSOCK_TRANSPORT_F_DGRAM); > > > > > if (ret) > > > > > goto out_wq; > > > > > > > > > > diff --git a/net/vmw_vsock/virtio_transport_common.c > > > > > b/net/vmw_vsock/virtio_transport_common.c > > > > > index bdf16fff054f..aedb48728677 100644 > > > > > --- a/net/vmw_vsock/virtio_transport_common.c > > > > > +++ b/net/vmw_vsock/virtio_transport_common.c > > > > > @@ -229,7 +229,9 @@ > > > > > EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt); > > > > > > > > > > static u16 virtio_transport_get_type(struct sock *sk) > > > > > { > > > > > - if (sk->sk_type == SOCK_STREAM) > > > > > + if (sk->sk_type == SOCK_DGRAM) > > > > > + return VIRTIO_VSOCK_TYPE_DGRAM; > > > > > + else if (sk->sk_type == SOCK_STREAM) > > > > > return VIRTIO_VSOCK_TYPE_STREAM; > > > > > else > > > > > return VIRTIO_VSOCK_TYPE_SEQPACKET; > > > > > @@ -287,22 +289,29 @@ static int > > > > > virtio_transport_send_pkt_info(struct vsock_sock *vsk, > > > > > vvs = vsk->trans; > > > > > > > > > > /* we can send less than pkt_len bytes */ > > > > > - if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) > > > > > - pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; > > > > > + if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) { > > > > > + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) > > > > > + pkt_len = > > > > > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; > > > > > + else > > > > > + return 0; > > > > > + } > > > > > > > > > > - /* virtio_transport_get_credit might return less than > > > > > pkt_len credit */ > > > > > - pkt_len = virtio_transport_get_credit(vvs, pkt_len); > > > > > + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) { > > > > > + /* virtio_transport_get_credit might return > > > > > less than pkt_len credit */ > > > > > + pkt_len = virtio_transport_get_credit(vvs, > > > > > pkt_len); > > > > > > > > > > - /* Do not send zero length OP_RW pkt */ > > > > > - if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) > > > > > - return pkt_len; > > > > > + /* Do not send zero length OP_RW pkt */ > > > > > + if (pkt_len == 0 && info->op == > > > > > VIRTIO_VSOCK_OP_RW) > > > > > + return pkt_len; > > > > > + } > > > > > > > > > > skb = virtio_transport_alloc_skb(info, pkt_len, > > > > > src_cid, src_port, > > > > > dst_cid, dst_port, > > > > > &err); > > > > > if (!skb) { > > > > > - virtio_transport_put_credit(vvs, pkt_len); > > > > > + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) > > > > > + virtio_transport_put_credit(vvs, > > > > > pkt_len); > > > > > return err; > > > > > } > > > > > > > > > > @@ -586,6 +595,61 @@ > > > > > virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk, > > > > > } > > > > > EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue); > > > > > > > > > > +static ssize_t > > > > > +virtio_transport_dgram_do_dequeue(struct vsock_sock *vsk, > > > > > + struct msghdr *msg, size_t > > > > > len) > > > > > +{ > > > > > + struct virtio_vsock_sock *vvs = vsk->trans; > > > > > + struct sk_buff *skb; > > > > > + size_t total = 0; > > > > > + u32 free_space; > > > > > + int err = -EFAULT; > > > > > + > > > > > + spin_lock_bh(&vvs->rx_lock); > > > > > + if (total < len && !skb_queue_empty_lockless(&vvs- > > > > > >rx_queue)) { > > > > > + skb = __skb_dequeue(&vvs->rx_queue); > > > > > + > > > > > + total = len; > > > > > + if (total > skb->len - vsock_metadata(skb)- > > > > > >off) > > > > > + total = skb->len - vsock_metadata(skb)- > > > > > >off; > > > > > + else if (total < skb->len - > > > > > vsock_metadata(skb)->off) > > > > > + msg->msg_flags |= MSG_TRUNC; > > > > > + > > > > > + /* sk_lock is held by caller so no one else can > > > > > dequeue. > > > > > + * Unlock rx_lock since memcpy_to_msg() may > > > > > sleep. > > > > > + */ > > > > > + spin_unlock_bh(&vvs->rx_lock); > > > > > + > > > > > + err = memcpy_to_msg(msg, skb->data + > > > > > vsock_metadata(skb)->off, total); > > > > > + if (err) > > > > > + return err; > > > > > + > > > > > + spin_lock_bh(&vvs->rx_lock); > > > > > + > > > > > + virtio_transport_dec_rx_pkt(vvs, skb); > > > > > + consume_skb(skb); > > > > > + } > > > > > + > > > > > + free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs- > > > > > >last_fwd_cnt); > > > > > + > > > > > + spin_unlock_bh(&vvs->rx_lock); > > > > > + > > > > > + if (total > 0 && msg->msg_name) { > > > > > + /* Provide the address of the sender. */ > > > > > + DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr, > > > > > msg->msg_name); > > > > > + > > > > > + vsock_addr_init(vm_addr, > > > > > le64_to_cpu(vsock_hdr(skb)->src_cid), > > > > > + le32_to_cpu(vsock_hdr(skb)- > > > > > >src_port)); > > > > > + msg->msg_namelen = sizeof(*vm_addr); > > > > > + } > > > > > + return total; > > > > > +} > > > > > + > > > > > +static s64 virtio_transport_dgram_has_data(struct vsock_sock > > > > > *vsk) > > > > > +{ > > > > > + return virtio_transport_stream_has_data(vsk); > > > > > +} > > > > > + > > > > > int > > > > > virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk, > > > > > struct msghdr *msg, > > > > > @@ -611,7 +675,66 @@ virtio_transport_dgram_dequeue(struct > > > > > vsock_sock *vsk, > > > > > struct msghdr *msg, > > > > > size_t len, int flags) > > > > > { > > > > > - return -EOPNOTSUPP; > > > > > + struct sock *sk; > > > > > + size_t err = 0; > > > > > + long timeout; > > > > > + > > > > > + DEFINE_WAIT(wait); > > > > > + > > > > > + sk = &vsk->sk; > > > > > + err = 0; > > > > > + > > > > > + if (flags & MSG_OOB || flags & MSG_ERRQUEUE || flags & > > > > > MSG_PEEK) > > > > > + return -EOPNOTSUPP; > > > > > + > > > > > + lock_sock(sk); > > > > > + > > > > > + if (!len) > > > > > + goto out; > > > > > + > > > > > + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); > > > > > + > > > > > + while (1) { > > > > > + s64 ready; > > > > > + > > > > > + prepare_to_wait(sk_sleep(sk), &wait, > > > > > TASK_INTERRUPTIBLE); > > > > > + ready = virtio_transport_dgram_has_data(vsk); > > > > > + > > > > > + if (ready == 0) { > > > > > + if (timeout == 0) { > > > > > + err = -EAGAIN; > > > > > + finish_wait(sk_sleep(sk), > > > > > &wait); > > > > > + break; > > > > > + } > > > > > + > > > > > + release_sock(sk); > > > > > + timeout = schedule_timeout(timeout); > > > > > + lock_sock(sk); > > > > > + > > > > > + if (signal_pending(current)) { > > > > > + err = sock_intr_errno(timeout); > > > > > + finish_wait(sk_sleep(sk), > > > > > &wait); > > > > > + break; > > > > > + } else if (timeout == 0) { > > > > > + err = -EAGAIN; > > > > > + finish_wait(sk_sleep(sk), > > > > > &wait); > > > > > + break; > > > > > + } > > > > > + } else { > > > > > + finish_wait(sk_sleep(sk), &wait); > > > > > + > > > > > + if (ready < 0) { > > > > > + err = -ENOMEM; > > > > > + goto out; > > > > > + } > > > > > + > > > > > + err = > > > > > virtio_transport_dgram_do_dequeue(vsk, msg, len); > > > > > + break; > > > > > + } > > > > > + } > > > > > +out: > > > > > + release_sock(sk); > > > > > + return err; > > > > > } > > > > > EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue); > > ^^^ > > May be, this generic data waiting logic should be in af_vsock.c, as > > for stream/seqpacket? > > In this way, another transport which supports SOCK_DGRAM could > > reuse it. > > I think that is a great idea. I'll test that change for v2. > > Thanks. Also for v2, i tested Your patchset a little bit(write here to not spread over all mails): 1) seqpacket test in vsock_test.c fails(seems MSG_EOR flag issue) 2) i can't do rmmod with the following config(after testing): CONFIG_VSOCKETS=m CONFIG_VIRTIO_VSOCKETS=m CONFIG_VIRTIO_VSOCKETS_COMMON=m CONFIG_VHOST=m CONFIG_VHOST_VSOCK=m Guest is shutdown, but rmmod fails. 3) virtio_transport_init + virtio_transport_exit seems must be under EXPORT_SYMBOL_GPL(), because both used in another module. 4) I tried to send 5kb(or 20kb not matter) piece of data, but got kernel panic both in guest and later in host. Thank You > > > > > > > > > > > @@ -819,13 +942,13 @@ > > > > > EXPORT_SYMBOL_GPL(virtio_transport_stream_allow); > > > > > int virtio_transport_dgram_bind(struct vsock_sock *vsk, > > > > > struct sockaddr_vm *addr) > > > > > { > > > > > - return -EOPNOTSUPP; > > > > > + return vsock_bind_stream(vsk, addr); > > > > > } > > > > > EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind); > > > > > > > > > > bool virtio_transport_dgram_allow(u32 cid, u32 port) > > > > > { > > > > > - return false; > > > > > + return true; > > > > > } > > > > > EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow); > > > > > > > > > > @@ -861,7 +984,16 @@ virtio_transport_dgram_enqueue(struct > > > > > vsock_sock *vsk, > > > > > struct msghdr *msg, > > > > > size_t dgram_len) > > > > > { > > > > > - return -EOPNOTSUPP; > > > > > + struct virtio_vsock_pkt_info info = { > > > > > + .op = VIRTIO_VSOCK_OP_RW, > > > > > + .msg = msg, > > > > > + .pkt_len = dgram_len, > > > > > + .vsk = vsk, > > > > > + .remote_cid = remote_addr->svm_cid, > > > > > + .remote_port = remote_addr->svm_port, > > > > > + }; > > > > > + > > > > > + return virtio_transport_send_pkt_info(vsk, &info); > > > > > } > > > > > EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue); > > > > > > > > > > @@ -1165,6 +1297,12 @@ virtio_transport_recv_connected(struct > > > > > sock *sk, > > > > > struct virtio_vsock_hdr *hdr = vsock_hdr(skb); > > > > > int err = 0; > > > > > > > > > > + if (le16_to_cpu(vsock_hdr(skb)->type) == > > > > > VIRTIO_VSOCK_TYPE_DGRAM) { > > > > > + virtio_transport_recv_enqueue(vsk, skb); > > > > > + sk->sk_data_ready(sk); > > > > > + return err; > > > > > + } > > > > > + > > > > > switch (le16_to_cpu(hdr->op)) { > > > > > case VIRTIO_VSOCK_OP_RW: > > > > > virtio_transport_recv_enqueue(vsk, skb); > > > > > @@ -1320,7 +1458,8 @@ virtio_transport_recv_listen(struct > > > > > sock *sk, struct sk_buff *skb, > > > > > static bool virtio_transport_valid_type(u16 type) > > > > > { > > > > > return (type == VIRTIO_VSOCK_TYPE_STREAM) || > > > > > - (type == VIRTIO_VSOCK_TYPE_SEQPACKET); > > > > > + (type == VIRTIO_VSOCK_TYPE_SEQPACKET) || > > > > > + (type == VIRTIO_VSOCK_TYPE_DGRAM); > > > > > } > > > > > > > > > > /* We are under the virtio-vsock's vsock->rx_lock or vhost- > > > > > vsock's vq->mutex > > > > > @@ -1384,6 +1523,11 @@ void virtio_transport_recv_pkt(struct > > > > > virtio_transport *t, > > > > > goto free_pkt; > > > > > } > > > > > > > > > > + if (sk->sk_type == SOCK_DGRAM) { > > > > > + virtio_transport_recv_connected(sk, skb); > > > > > + goto out; > > > > > + } > > > > > + > > > > > space_available = virtio_transport_space_update(sk, > > > > > skb); > > > > > > > > > > /* Update CID in case it has changed after a transport > > > > > reset event */ > > > > > @@ -1415,6 +1559,7 @@ void virtio_transport_recv_pkt(struct > > > > > virtio_transport *t, > > > > > break; > > > > > } > > > > > > > > > > +out: > > > > > release_sock(sk); > > > > > > > > > > /* Release refcnt obtained when we fetched this socket > > > > > out of the > > > > > -- > > > > > 2.35.1 > > > > > > > > > > > > > ------------------------------------------------------------- > > > > -------- > > > > To unsubscribe, e-mail: > > > > virtio-dev-unsubscribe@xxxxxxxxxxxxxxxxxxxx > > > > For additional commands, e-mail: > > > > virtio-dev-help@xxxxxxxxxxxxxxxxxxxx > > > > > > --------------------------------------------------------------------- > To unsubscribe, e-mail: virtio-dev-unsubscribe@xxxxxxxxxxxxxxxxxxxx > For additional commands, e-mail: virtio-dev-help@xxxxxxxxxxxxxxxxxxxx >