Hi Eduardo, (Cc Will who maintains kvmtool) On Wed, Oct 04, 2023 at 05:49:45PM -0300, Eduardo Bart wrote: > I'm experiencing a segmentation fault in lkvm where it may crash after > powering off a guest machine that uses a virtio network device. > The crash is hard to reproduce, because looks like it only happens > when the guest machine is powering off while extra virtio threads is > doing some work, > when it happens lkvm crashes in the function virtio_net_rx_thread > while attempting to read invalid guest physical memory, > because guest physical memory was unmapped. > > I've isolated the problem and looks like when lkvm exits it unmaps the > guest memory while virtio device extra threads may still be executing. > I noticed most virtio devices are not executing pthread_cancel + > pthread_join to synchronize extra threads when exiting, > to make sure this happens I added explicit calls to the virtio device > exit function to all virtio devices, > which should cancel and join all threads before unmapping guest > physical memory, fixing the crash for me. > > Below I'm attaching a patch to fix the issue, feel free to apply or > fix the issue some other way. > > Signed-off-by: Eduardo Bart <edub4rt@xxxxxxxxx> The patch doesn't apply for some reason, there seems to be whitespace issues, tabs replaced by spaces. Looks correct otherwise. vCPUs are stopped first, then virtio exit, and memory is unmapped last. This also fixes runtime virtqueue reset for virtio-balloon. > > --- > include/kvm/virtio-9p.h | 1 + > virtio/9p.c | 14 ++++++++++++++ > virtio/balloon.c | 11 +++++++++++ > virtio/blk.c | 1 + > virtio/console.c | 3 +++ > virtio/net.c | 4 ++++ > virtio/rng.c | 8 ++++++++ > 7 files changed, 42 insertions(+) > > diff --git a/include/kvm/virtio-9p.h b/include/kvm/virtio-9p.h > index 1dffc95..09f7e46 100644 > --- a/include/kvm/virtio-9p.h > +++ b/include/kvm/virtio-9p.h > @@ -70,6 +70,7 @@ int virtio_9p_rootdir_parser(const struct option > *opt, const char *arg, int unse > int virtio_9p_img_name_parser(const struct option *opt, const char > *arg, int unset); > int virtio_9p__register(struct kvm *kvm, const char *root, const char > *tag_name); > int virtio_9p__init(struct kvm *kvm); > +int virtio_9p__exit(struct kvm *kvm); > int virtio_p9_pdu_readf(struct p9_pdu *pdu, const char *fmt, ...); > int virtio_p9_pdu_writef(struct p9_pdu *pdu, const char *fmt, ...); > > diff --git a/virtio/9p.c b/virtio/9p.c > index 513164e..f536d9e 100644 > --- a/virtio/9p.c > +++ b/virtio/9p.c > @@ -1562,6 +1562,20 @@ int virtio_9p__init(struct kvm *kvm) > } > virtio_dev_init(virtio_9p__init); > > +int virtio_9p__exit(struct kvm *kvm) > +{ > + struct p9_dev *p9dev, *tmp; > + > + list_for_each_entry_safe(p9dev, tmp, &devs, list) { > + list_del(&p9dev->list); > + p9dev->vdev.ops->exit(kvm, &p9dev->vdev); Introducing a virtio_exit(kvm, vdev) helper would look neater. It could also check if vdev.ops is set > + free(p9dev); > + } > + > + return 0; > +} > +virtio_dev_exit(virtio_9p__exit); > + > int virtio_9p__register(struct kvm *kvm, const char *root, const char > *tag_name) > { > struct p9_dev *p9dev; > diff --git a/virtio/balloon.c b/virtio/balloon.c > index 01d1982..a36e50e 100644 > --- a/virtio/balloon.c > +++ b/virtio/balloon.c > @@ -221,6 +221,13 @@ static int init_vq(struct kvm *kvm, void *dev, u32 vq) > return 0; > } > > +static void exit_vq(struct kvm *kvm, void *dev, u32 vq) > +{ > + struct bln_dev *bdev = dev; > + > + thread_pool__cancel_job(&bdev->jobs[vq]); > +} > + > static int notify_vq(struct kvm *kvm, void *dev, u32 vq) > { > struct bln_dev *bdev = dev; > @@ -258,6 +265,7 @@ struct virtio_ops bln_dev_virtio_ops = { > .get_config_size = get_config_size, > .get_host_features = get_host_features, > .init_vq = init_vq, > + .exit_vq = exit_vq, > .notify_vq = notify_vq, > .get_vq = get_vq, > .get_size_vq = get_size_vq, > @@ -293,6 +301,9 @@ virtio_dev_init(virtio_bln__init); > > int virtio_bln__exit(struct kvm *kvm) > { > + if (bdev.vdev.ops) > + bdev.vdev.ops->exit(kvm, &bdev.vdev); > + > return 0; > } > virtio_dev_exit(virtio_bln__exit); > diff --git a/virtio/blk.c b/virtio/blk.c > index a58c745..e34723a 100644 > --- a/virtio/blk.c > +++ b/virtio/blk.c > @@ -345,6 +345,7 @@ static int virtio_blk__init_one(struct kvm *kvm, > struct disk_image *disk) > static int virtio_blk__exit_one(struct kvm *kvm, struct blk_dev *bdev) > { > list_del(&bdev->list); > + bdev->vdev.ops->exit(kvm, &bdev->vdev); > free(bdev); > > return 0; > diff --git a/virtio/console.c b/virtio/console.c > index ebfbaf0..5a71bbc 100644 > --- a/virtio/console.c > +++ b/virtio/console.c > @@ -243,6 +243,9 @@ virtio_dev_init(virtio_console__init); > > int virtio_console__exit(struct kvm *kvm) > { > + if (cdev.vdev.ops) > + cdev.vdev.ops->exit(kvm, &cdev.vdev); > + > return 0; > } > virtio_dev_exit(virtio_console__exit); > diff --git a/virtio/net.c b/virtio/net.c > index f09dd0a..dc6d89d 100644 > --- a/virtio/net.c > +++ b/virtio/net.c > @@ -969,10 +969,14 @@ int virtio_net__exit(struct kvm *kvm) > if (ndev->mode == NET_MODE_TAP && > strcmp(params->downscript, "none")) > virtio_net_exec_script(params->downscript, ndev->tap_name); > + if (ndev->mode != NET_MODE_TAP) > + uip_exit(&ndev->info); virtio_net_stop() might be nicer here Thanks, Jean > > list_del(&ndev->list); > + ndev->vdev.ops->exit(kvm, &ndev->vdev); > free(ndev); > } > + > return 0; > } > virtio_dev_exit(virtio_net__exit); > diff --git a/virtio/rng.c b/virtio/rng.c > index 6b36655..ebdb455 100644 > --- a/virtio/rng.c > +++ b/virtio/rng.c > @@ -122,6 +122,13 @@ static int init_vq(struct kvm *kvm, void *dev, u32 vq) > return 0; > } > > +static void exit_vq(struct kvm *kvm, void *dev, u32 vq) > +{ > + struct rng_dev *rdev = dev; > + > + thread_pool__cancel_job(&rdev->jobs[vq].job_id); > +} > + > static int notify_vq(struct kvm *kvm, void *dev, u32 vq) > { > struct rng_dev *rdev = dev; > @@ -159,6 +166,7 @@ static struct virtio_ops rng_dev_virtio_ops = { > .get_config_size = get_config_size, > .get_host_features = get_host_features, > .init_vq = init_vq, > + .exit_vq = exit_vq, > .notify_vq = notify_vq, > .get_vq = get_vq, > .get_size_vq = get_size_vq, > -- > 2.42.0