[Bug 60505] Heavy network traffic triggers vhost_net lockup

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



https://bugzilla.kernel.org/show_bug.cgi?id=60505

--- Comment #4 from Bart Van Assche <bvanassche@xxxxxxx> ---
I have not yet tried to disable zero-copy tx. But even with the vhost-net patch
applied on kernel v3.9.9 I can still trigger this issue:

Jul  8 10:58:01 asus kernel: BUG: unable to handle kernel NULL pointer
dereference at 000000000000001c
Jul  8 10:58:01 asus kernel: IP: [<ffffffff810f73a9>]
put_compound_page+0x89/0x170
Jul  8 10:58:01 asus kernel: PGD 0 
Jul  8 10:58:01 asus kernel: Oops: 0000 [#1] SMP 
Jul  8 10:58:01 asus kernel: Modules linked in: dm_queue_length dm_multipath
ib_iser iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi vhost_net tun fuse
ip6table_filter ip6_tables iptable_filter ip_tables ebtable_nat ebtables
x_tables af_packet bridge stp llc rdma_ucm rdma_cm iw_cm ib_addr ib_srp
scsi_transport_srp scsi_tgt ib_ipoib ib_cm ib_uverbs ib_umad mlx4_en mlx4_ib
ib_sa ib_mad ib_core dm_mod hid_generic usbhid hid acpi_cpufreq mperf kvm_intel
i2c_i801 kvm r8169 ehci_pci snd_hda_codec_hdmi qla2xxx snd_hda_codec_realtek
snd_hda_intel snd_hda_codec snd_hwdep ehci_hcd snd_pcm snd_seq mii sr_mod cdrom
sg snd_timer pcspkr snd_seq_device mlx4_core scsi_transport_fc wmi snd
soundcore snd_page_alloc crc32c_intel microcode autofs4 ext4 jbd2 mbcache crc16
raid456 async_raid6_recov async_pq raid6_pq async_xor xor async_memcpy async_tx
raid10 raid0 raid1 sd_mod crc_t10dif i915 drm_kms_helper drm ahci libahci
intel_agp i2c_algo_bit intel_gtt agpgart xhci_hcd i2c_core video usbcore
usb_common button processor thermal_sys hwmon scsi_dh_alua scsi_dh pata_acpi
libata scsi_mod
Jul  8 10:58:01 asus kernel: CPU 3 
Jul  8 10:58:01 asus kernel: Pid: 5485, comm: vhost-5462 Not tainted 3.9.9+ #1
Gigabyte Technology Co., Ltd. Z68X-UD3H-B3/Z68X-UD3H-B3
Jul  8 10:58:01 asus kernel: RIP: 0010:[<ffffffff810f73a9>] 
[<ffffffff810f73a9>] put_compound_page+0x89/0x170
Jul  8 10:58:01 asus kernel: RSP: 0018:ffff8800aab13bd8  EFLAGS: 00010286
Jul  8 10:58:01 asus kernel: RAX: ffff880118b0b600 RBX: ffff880118b0b800 RCX:
ffffea000252801c
Jul  8 10:58:01 asus kernel: RDX: 0000000000000140 RSI: 0000000000000246 RDI:
ffff880118b0b800
Jul  8 10:58:01 asus kernel: RBP: ffff8800aab13bf8 R08: ffff8800aa8f4518 R09:
0000000000000010
Jul  8 10:58:01 asus kernel: R10: 0000000000000000 R11: 00007fa0c0000000 R12:
0000000000000000
Jul  8 10:58:01 asus kernel: R13: ffffffffa078f96c R14: 00000000000091aa R15:
ffff8800b3bb7500
Jul  8 10:58:01 asus kernel: FS:  0000000000000000(0000)
GS:ffff88011fac0000(0000) knlGS:0000000000000000
Jul  8 10:58:01 asus kernel: CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
Jul  8 10:58:01 asus kernel: CR2: 000000000000001c CR3: 00000000aab9f000 CR4:
00000000000427e0
Jul  8 10:58:01 asus kernel: DR0: 0000000000000000 DR1: 0000000000000000 DR2:
0000000000000000
Jul  8 10:58:01 asus kernel: DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7:
0000000000000400
Jul  8 10:58:01 asus kernel: Process vhost-5462 (pid: 5485, threadinfo
ffff8800aab12000, task ffff880107920000)
Jul  8 10:58:01 asus kernel: Stack:
Jul  8 10:58:01 asus kernel: ffffea0000ecae40 0000000000000012 ffff8800b3bb7500
ffffffffa078f96c
Jul  8 10:58:01 asus kernel: ffff8800aab13c08 ffffffff810f77ec ffff8800aab13c28
ffffffff8132045f
Jul  8 10:58:01 asus kernel: ffff8800b3bb7500 ffff8800b3bb7500 ffff8800aab13c48
ffffffff813204fe
Jul  8 10:58:01 asus kernel: Call Trace:
Jul  8 10:58:01 asus kernel: [<ffffffff810f77ec>] put_page+0x2c/0x40
Jul  8 10:58:01 asus kernel: [<ffffffff8132045f>] skb_release_data+0x8f/0x110
Jul  8 10:58:01 asus kernel: [<ffffffff813204fe>] __kfree_skb+0x1e/0xa0
Jul  8 10:58:01 asus kernel: [<ffffffff813205b6>] kfree_skb+0x36/0xa0
Jul  8 10:58:01 asus kernel: [<ffffffffa078f96c>] tun_get_user+0x71c/0x810
[tun]
Jul  8 10:58:01 asus kernel: [<ffffffffa078faba>] tun_sendmsg+0x5a/0x80 [tun]
Jul  8 10:58:01 asus kernel: [<ffffffffa079e607>] handle_tx+0x287/0x680
[vhost_net]
Jul  8 10:58:01 asus kernel: [<ffffffffa079ea35>] handle_tx_kick+0x15/0x20
[vhost_net]
Jul  8 10:58:01 asus kernel: [<ffffffffa079a80a>] vhost_worker+0xaa/0x1a0
[vhost_net]
Jul  8 10:58:01 asus kernel: [<ffffffff8105ef80>] kthread+0xc0/0xd0
Jul  8 10:58:01 asus kernel: [<ffffffff8140395c>] ret_from_fork+0x7c/0xb0
Jul  8 10:58:01 asus kernel: Code: 8b 6d f8 c9 c3 48 8b 07 f6 c4 80 75 0d f0 ff
4b 1c 0f 94 c0 84 c0 74 c9 eb bf 4c 8b 67 30 48 8b 07 f6 c4 80 74 e7 4c 39 e7
74 e2 <41> 8b 54 24 1c 49 8d 4c 24 1c 85 d2 74 d4 8d 72 01 89 d0 f0 0f 
Jul  8 10:58:01 asus kernel: RIP  [<ffffffff810f73a9>]
put_compound_page+0x89/0x170
Jul  8 10:58:01 asus kernel: RSP <ffff8800aab13bd8>
Jul  8 10:58:01 asus kernel: CR2: 000000000000001c
Jul  8 10:58:01 asus kernel: ---[ end trace 481d0b283c089c9a ]---

The patch I ran this test with is as follows:

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index dfff647..98f81e6 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -857,7 +857,7 @@ static long vhost_net_set_backend(struct vhost_net *n,
unsigned index, int fd)
     mutex_unlock(&vq->mutex);

     if (oldubufs) {
-        vhost_ubuf_put_and_wait(oldubufs);
+        vhost_ubuf_put_wait_and_free(oldubufs);
         mutex_lock(&vq->mutex);
         vhost_zerocopy_signal_used(n, vq);
         mutex_unlock(&vq->mutex);
@@ -875,7 +875,7 @@ err_used:
     rcu_assign_pointer(vq->private_data, oldsock);
     vhost_net_enable_vq(n, vq);
     if (ubufs)
-        vhost_ubuf_put_and_wait(ubufs);
+        vhost_ubuf_put_wait_and_free(ubufs);
 err_ubufs:
     fput(sock->file);
 err_vq:
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 0d96700..348fce4 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -1576,5 +1576,10 @@ void vhost_ubuf_put_and_wait(struct vhost_ubuf_ref
*ubufs)
 {
     kref_put(&ubufs->kref, vhost_zerocopy_done_signal);
     wait_event(ubufs->wait, !atomic_read(&ubufs->kref.refcount));
+}
+
+void vhost_ubuf_put_wait_and_free(struct vhost_ubuf_ref *ubufs)
+{
+    vhost_ubuf_put_and_wait(ubufs);
     kfree(ubufs);
 }
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 17261e2..ab2eb0d 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -63,6 +63,7 @@ struct vhost_ubuf_ref {
 struct vhost_ubuf_ref *vhost_ubuf_alloc(struct vhost_virtqueue *, bool zcopy);
 void vhost_ubuf_put(struct vhost_ubuf_ref *);
 void vhost_ubuf_put_and_wait(struct vhost_ubuf_ref *);
+void vhost_ubuf_put_wait_and_free(struct vhost_ubuf_ref *ubufs);

 struct ubuf_info;

-- 
You are receiving this mail because:
You are watching the assignee of the bug.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux