On 17.08.2022 08:01, Arseniy Krasnov wrote: > On 16.08.2022 05:32, Bobby Eshleman wrote: >> CC'ing virtio-dev@xxxxxxxxxxxxxxxxxxxx >> >> On Mon, Aug 15, 2022 at 10:56:08AM -0700, Bobby Eshleman wrote: >>> This patch supports dgram in virtio and on the vhost side. > Hello, > > sorry, i don't understand, how this maintains message boundaries? Or it > is unnecessary for SOCK_DGRAM? > > Thanks >>> >>> Signed-off-by: Jiang Wang <jiang.wang@xxxxxxxxxxxxx> >>> Signed-off-by: Bobby Eshleman <bobby.eshleman@xxxxxxxxxxxxx> >>> --- >>> drivers/vhost/vsock.c | 2 +- >>> include/net/af_vsock.h | 2 + >>> include/uapi/linux/virtio_vsock.h | 1 + >>> net/vmw_vsock/af_vsock.c | 26 +++- >>> net/vmw_vsock/virtio_transport.c | 2 +- >>> net/vmw_vsock/virtio_transport_common.c | 173 ++++++++++++++++++++++-- >>> 6 files changed, 186 insertions(+), 20 deletions(-) >>> >>> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c >>> index a5d1bdb786fe..3dc72a5647ca 100644 >>> --- a/drivers/vhost/vsock.c >>> +++ b/drivers/vhost/vsock.c >>> @@ -925,7 +925,7 @@ static int __init vhost_vsock_init(void) >>> int ret; >>> >>> ret = vsock_core_register(&vhost_transport.transport, >>> - VSOCK_TRANSPORT_F_H2G); >>> + VSOCK_TRANSPORT_F_H2G | VSOCK_TRANSPORT_F_DGRAM); >>> if (ret < 0) >>> return ret; >>> >>> diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h >>> index 1c53c4c4d88f..37e55c81e4df 100644 >>> --- a/include/net/af_vsock.h >>> +++ b/include/net/af_vsock.h >>> @@ -78,6 +78,8 @@ struct vsock_sock { >>> s64 vsock_stream_has_data(struct vsock_sock *vsk); >>> s64 vsock_stream_has_space(struct vsock_sock *vsk); >>> struct sock *vsock_create_connected(struct sock *parent); >>> +int vsock_bind_stream(struct vsock_sock *vsk, >>> + struct sockaddr_vm *addr); >>> >>> /**** TRANSPORT ****/ >>> >>> diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h >>> index 857df3a3a70d..0975b9c88292 100644 >>> --- a/include/uapi/linux/virtio_vsock.h >>> +++ b/include/uapi/linux/virtio_vsock.h >>> @@ -70,6 +70,7 @@ struct virtio_vsock_hdr { >>> enum virtio_vsock_type { >>> VIRTIO_VSOCK_TYPE_STREAM = 1, >>> VIRTIO_VSOCK_TYPE_SEQPACKET = 2, >>> + VIRTIO_VSOCK_TYPE_DGRAM = 3, >>> }; >>> >>> enum virtio_vsock_op { >>> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c >>> index 1893f8aafa48..87e4ae1866d3 100644 >>> --- a/net/vmw_vsock/af_vsock.c >>> +++ b/net/vmw_vsock/af_vsock.c >>> @@ -675,6 +675,19 @@ static int __vsock_bind_connectible(struct vsock_sock *vsk, >>> return 0; >>> } >>> >>> +int vsock_bind_stream(struct vsock_sock *vsk, >>> + struct sockaddr_vm *addr) >>> +{ >>> + int retval; >>> + >>> + spin_lock_bh(&vsock_table_lock); >>> + retval = __vsock_bind_connectible(vsk, addr); >>> + spin_unlock_bh(&vsock_table_lock); >>> + >>> + return retval; >>> +} >>> +EXPORT_SYMBOL(vsock_bind_stream); >>> + >>> static int __vsock_bind_dgram(struct vsock_sock *vsk, >>> struct sockaddr_vm *addr) >>> { >>> @@ -2363,11 +2376,16 @@ int vsock_core_register(const struct vsock_transport *t, int features) >>> } >>> >>> if (features & VSOCK_TRANSPORT_F_DGRAM) { >>> - if (t_dgram) { >>> - err = -EBUSY; >>> - goto err_busy; >>> + /* TODO: always chose the G2H variant over others, support nesting later */ >>> + if (features & VSOCK_TRANSPORT_F_G2H) { >>> + if (t_dgram) >>> + pr_warn("virtio_vsock: t_dgram already set\n"); >>> + t_dgram = t; >>> + } >>> + >>> + if (!t_dgram) { >>> + t_dgram = t; >>> } >>> - t_dgram = t; >>> } >>> >>> if (features & VSOCK_TRANSPORT_F_LOCAL) { >>> diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c >>> index 073314312683..d4526ca462d2 100644 >>> --- a/net/vmw_vsock/virtio_transport.c >>> +++ b/net/vmw_vsock/virtio_transport.c >>> @@ -850,7 +850,7 @@ static int __init virtio_vsock_init(void) >>> return -ENOMEM; >>> >>> ret = vsock_core_register(&virtio_transport.transport, >>> - VSOCK_TRANSPORT_F_G2H); >>> + VSOCK_TRANSPORT_F_G2H | VSOCK_TRANSPORT_F_DGRAM); >>> if (ret) >>> goto out_wq; >>> >>> diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c >>> index bdf16fff054f..aedb48728677 100644 >>> --- a/net/vmw_vsock/virtio_transport_common.c >>> +++ b/net/vmw_vsock/virtio_transport_common.c >>> @@ -229,7 +229,9 @@ EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt); >>> >>> static u16 virtio_transport_get_type(struct sock *sk) >>> { >>> - if (sk->sk_type == SOCK_STREAM) >>> + if (sk->sk_type == SOCK_DGRAM) >>> + return VIRTIO_VSOCK_TYPE_DGRAM; >>> + else if (sk->sk_type == SOCK_STREAM) >>> return VIRTIO_VSOCK_TYPE_STREAM; >>> else >>> return VIRTIO_VSOCK_TYPE_SEQPACKET; >>> @@ -287,22 +289,29 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, >>> vvs = vsk->trans; >>> >>> /* we can send less than pkt_len bytes */ >>> - if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) >>> - pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; >>> + if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) { >>> + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) >>> + pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; >>> + else >>> + return 0; >>> + } >>> >>> - /* virtio_transport_get_credit might return less than pkt_len credit */ >>> - pkt_len = virtio_transport_get_credit(vvs, pkt_len); >>> + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) { >>> + /* virtio_transport_get_credit might return less than pkt_len credit */ >>> + pkt_len = virtio_transport_get_credit(vvs, pkt_len); >>> >>> - /* Do not send zero length OP_RW pkt */ >>> - if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) >>> - return pkt_len; >>> + /* Do not send zero length OP_RW pkt */ >>> + if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) >>> + return pkt_len; >>> + } >>> >>> skb = virtio_transport_alloc_skb(info, pkt_len, >>> src_cid, src_port, >>> dst_cid, dst_port, >>> &err); >>> if (!skb) { >>> - virtio_transport_put_credit(vvs, pkt_len); >>> + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) >>> + virtio_transport_put_credit(vvs, pkt_len); >>> return err; >>> } >>> >>> @@ -586,6 +595,61 @@ virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk, >>> } >>> EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue); >>> >>> +static ssize_t >>> +virtio_transport_dgram_do_dequeue(struct vsock_sock *vsk, >>> + struct msghdr *msg, size_t len) >>> +{ >>> + struct virtio_vsock_sock *vvs = vsk->trans; >>> + struct sk_buff *skb; >>> + size_t total = 0; >>> + u32 free_space; >>> + int err = -EFAULT; >>> + >>> + spin_lock_bh(&vvs->rx_lock); >>> + if (total < len && !skb_queue_empty_lockless(&vvs->rx_queue)) { >>> + skb = __skb_dequeue(&vvs->rx_queue); >>> + >>> + total = len; >>> + if (total > skb->len - vsock_metadata(skb)->off) >>> + total = skb->len - vsock_metadata(skb)->off; >>> + else if (total < skb->len - vsock_metadata(skb)->off) >>> + msg->msg_flags |= MSG_TRUNC; >>> + >>> + /* sk_lock is held by caller so no one else can dequeue. >>> + * Unlock rx_lock since memcpy_to_msg() may sleep. >>> + */ >>> + spin_unlock_bh(&vvs->rx_lock); >>> + >>> + err = memcpy_to_msg(msg, skb->data + vsock_metadata(skb)->off, total); >>> + if (err) >>> + return err; >>> + >>> + spin_lock_bh(&vvs->rx_lock); >>> + >>> + virtio_transport_dec_rx_pkt(vvs, skb); >>> + consume_skb(skb); >>> + } >>> + >>> + free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs->last_fwd_cnt); >>> + >>> + spin_unlock_bh(&vvs->rx_lock); >>> + >>> + if (total > 0 && msg->msg_name) { >>> + /* Provide the address of the sender. */ >>> + DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr, msg->msg_name); >>> + >>> + vsock_addr_init(vm_addr, le64_to_cpu(vsock_hdr(skb)->src_cid), >>> + le32_to_cpu(vsock_hdr(skb)->src_port)); >>> + msg->msg_namelen = sizeof(*vm_addr); >>> + } >>> + return total; >>> +} >>> + >>> +static s64 virtio_transport_dgram_has_data(struct vsock_sock *vsk) >>> +{ >>> + return virtio_transport_stream_has_data(vsk); >>> +} >>> + >>> int >>> virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk, >>> struct msghdr *msg, >>> @@ -611,7 +675,66 @@ virtio_transport_dgram_dequeue(struct vsock_sock *vsk, >>> struct msghdr *msg, >>> size_t len, int flags) >>> { >>> - return -EOPNOTSUPP; >>> + struct sock *sk; >>> + size_t err = 0; >>> + long timeout; >>> + >>> + DEFINE_WAIT(wait); >>> + >>> + sk = &vsk->sk; >>> + err = 0; >>> + >>> + if (flags & MSG_OOB || flags & MSG_ERRQUEUE || flags & MSG_PEEK) >>> + return -EOPNOTSUPP; >>> + >>> + lock_sock(sk); >>> + >>> + if (!len) >>> + goto out; >>> + >>> + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); >>> + >>> + while (1) { >>> + s64 ready; >>> + >>> + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); >>> + ready = virtio_transport_dgram_has_data(vsk); >>> + >>> + if (ready == 0) { >>> + if (timeout == 0) { >>> + err = -EAGAIN; >>> + finish_wait(sk_sleep(sk), &wait); >>> + break; >>> + } >>> + >>> + release_sock(sk); >>> + timeout = schedule_timeout(timeout); >>> + lock_sock(sk); >>> + >>> + if (signal_pending(current)) { >>> + err = sock_intr_errno(timeout); >>> + finish_wait(sk_sleep(sk), &wait); >>> + break; >>> + } else if (timeout == 0) { >>> + err = -EAGAIN; >>> + finish_wait(sk_sleep(sk), &wait); >>> + break; >>> + } >>> + } else { >>> + finish_wait(sk_sleep(sk), &wait); >>> + >>> + if (ready < 0) { >>> + err = -ENOMEM; >>> + goto out; >>> + } >>> + >>> + err = virtio_transport_dgram_do_dequeue(vsk, msg, len); >>> + break; >>> + } >>> + } >>> +out: >>> + release_sock(sk); >>> + return err; >>> } >>> EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue); ^^^ May be, this generic data waiting logic should be in af_vsock.c, as for stream/seqpacket? In this way, another transport which supports SOCK_DGRAM could reuse it. >>> >>> @@ -819,13 +942,13 @@ EXPORT_SYMBOL_GPL(virtio_transport_stream_allow); >>> int virtio_transport_dgram_bind(struct vsock_sock *vsk, >>> struct sockaddr_vm *addr) >>> { >>> - return -EOPNOTSUPP; >>> + return vsock_bind_stream(vsk, addr); >>> } >>> EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind); >>> >>> bool virtio_transport_dgram_allow(u32 cid, u32 port) >>> { >>> - return false; >>> + return true; >>> } >>> EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow); >>> >>> @@ -861,7 +984,16 @@ virtio_transport_dgram_enqueue(struct vsock_sock *vsk, >>> struct msghdr *msg, >>> size_t dgram_len) >>> { >>> - return -EOPNOTSUPP; >>> + struct virtio_vsock_pkt_info info = { >>> + .op = VIRTIO_VSOCK_OP_RW, >>> + .msg = msg, >>> + .pkt_len = dgram_len, >>> + .vsk = vsk, >>> + .remote_cid = remote_addr->svm_cid, >>> + .remote_port = remote_addr->svm_port, >>> + }; >>> + >>> + return virtio_transport_send_pkt_info(vsk, &info); >>> } >>> EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue); >>> >>> @@ -1165,6 +1297,12 @@ virtio_transport_recv_connected(struct sock *sk, >>> struct virtio_vsock_hdr *hdr = vsock_hdr(skb); >>> int err = 0; >>> >>> + if (le16_to_cpu(vsock_hdr(skb)->type) == VIRTIO_VSOCK_TYPE_DGRAM) { >>> + virtio_transport_recv_enqueue(vsk, skb); >>> + sk->sk_data_ready(sk); >>> + return err; >>> + } >>> + >>> switch (le16_to_cpu(hdr->op)) { >>> case VIRTIO_VSOCK_OP_RW: >>> virtio_transport_recv_enqueue(vsk, skb); >>> @@ -1320,7 +1458,8 @@ virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb, >>> static bool virtio_transport_valid_type(u16 type) >>> { >>> return (type == VIRTIO_VSOCK_TYPE_STREAM) || >>> - (type == VIRTIO_VSOCK_TYPE_SEQPACKET); >>> + (type == VIRTIO_VSOCK_TYPE_SEQPACKET) || >>> + (type == VIRTIO_VSOCK_TYPE_DGRAM); >>> } >>> >>> /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex >>> @@ -1384,6 +1523,11 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, >>> goto free_pkt; >>> } >>> >>> + if (sk->sk_type == SOCK_DGRAM) { >>> + virtio_transport_recv_connected(sk, skb); >>> + goto out; >>> + } >>> + >>> space_available = virtio_transport_space_update(sk, skb); >>> >>> /* Update CID in case it has changed after a transport reset event */ >>> @@ -1415,6 +1559,7 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, >>> break; >>> } >>> >>> +out: >>> release_sock(sk); >>> >>> /* Release refcnt obtained when we fetched this socket out of the >>> -- >>> 2.35.1 >>> >> >> --------------------------------------------------------------------- >> To unsubscribe, e-mail: virtio-dev-unsubscribe@xxxxxxxxxxxxxxxxxxxx >> For additional commands, e-mail: virtio-dev-help@xxxxxxxxxxxxxxxxxxxx >> >