https://bugzilla.kernel.org/show_bug.cgi?id=197813 --- Comment #6 from Carlos Lopez (clopezbelenguer@xxxxxxxxxxxxxx) --- c9(In reply to Bandan Das from comment #5) > bugzilla-daemon@xxxxxxxxxxxxxxxxxxx writes: > > > https://bugzilla.kernel.org/show_bug.cgi?id=197813 > > > > --- Comment #4 from Carlos Lopez (clopezbelenguer@xxxxxxxxxxxxxx) --- > > At the moment since we are using PRoxmox and they build the distro around > the > > ubuntu kernel we could wait until the release of ubuntu 18.04 when they > build > > the next versión to try a new kernel. > > but > > Considering we haven't the skills to debug more thoroughly the panic, atm, > > we > > are thinking in Downgrade the platform to an older one and see if the > problem > > persist. > > We have been experiencing worse and worse behaviour at every update of the > > kernel/distro > > This does sound good. If you can run an older version of the kernel and > confirm > if the issue goes away, that would be a good place to start. Please update > the bug > with the older kernel version when you get a chance to run this test. we tried the newer kernel 4.15.3-041503-generic with no success Machine crashed with this output in less than 24h. Any clues about what to do next KVM: entry failed, hardware error 0x50ffd5be0 -name DEVsai-store01.es.amnesty.org -smp 10,sockets=1,cores=10,maxcpus=10 -nodefaults -boot menu=on,strict=on,reboot-timeout=1000,splash=/usr/shaRAX=0000000000000000 RBX=ffffffff81f3c9c0 RCX=0000000000000000 RDX=0000000000000000password -cpu Haswell,+kvm_pv_unhalt,+kvm_pv_eoi,enforce,vendor=GenuineIntel -m 12288 -k es -device pci-bridRSI=0000000000000000 RDI=0000000000000000 RBP=ffff8803313cbe90 RSP=ffff8803313cbe90=2,bus=pci.0,addr=0x1f -device piix3-usb-uhci,id=uhci,bus=pci.0,addr=0x1.0x2 -device usb-tablet,id=tablet,buR8 =ffff880333390580 R9 =0000000000000000 R10=00000001005d277a R11=0000000000003c00rver,nowait -device isa-serial,chardev=serial0 -iscsi initiator-name=iqn.1993-08.org.debian:01:481b3cd8ee13 R12=0000000000000007 R13=0000000000000000 R14=0000000000000000 R15=ffff8803313c80000,drive=drive-ide2,id=ide2,bootindex=200 -device virtio-scsi-pci,id=scsihw0,bus=pci.0,addr=0x5 -drive file=/RIP=ffffffff81064646 RFL=00000246 [---Z-P-] CPL=0 II=0 A20=1 SMM=0 HLT=0=native,detect-zeroes=on -device scsi-hd,bus=scsihw0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0,id=scsi0 -drive fileES =0000 0000000000000000 ffffffff 00000000scsi1,format=raw,cache=none,aio=native,detect-zeroes=on -device scsi-hd,bus=scsihw0.0,channel=0,scsi-id=0,lun=1,drive=drive-scsi1,id=scsi1 -drive fiCS =0010 0000000000000000 ffffffff 00a09b00 DPL=0 CS64 [-RA]w,cache=none,aio=native,detect-zeroes=on -device scsi-hd,bus=scsihw0.0,channel=0,scsi-id=0,lun=3,drive=drive-scsi3,id=scsi3 -drive SS =0018 0000000000000000 ffffffff 00c09300 DPL=0 DS [-WA]raw,cache=none,aio=native,detect-zeroes=on -device scsi-hd,bus=scsihw0.0,channel=0,scsi-id=0,lun=4,drive=drive-scsi4,id=scsi4 -deviDS =0000 0000000000000000 ffffffff 00000000,addr=0x7 -drive file=/dev/data2/vm-139-disk-1,if=none,id=drive-sata0,format=raw,cache=none,aio=native,detect-zeroes=on -device ide-drive,bus=ahci0.FS =0000 0000000000000000 ffffffff 00000000-drive file=/dev/data2/vm-139-disk-2,if=none,id=drive-sata3,format=raw,cache=none,aio=native,detect-zeroes=on -device ide-drive,bus=ahci0.3,drive=drGS =0000 ffff880333380000 ffffffff 00000000m-139-disk-3,if=none,id=drive-sata5,format=raw,cache=none,aio=native,detect-zeroes=on -device ide-drive,bus=ahci0.5,drive=drive-sata5,id=sata5 -netdLDT=0000 0000000000000000 ffffffff 00000000/var/lib/qemu-server/pve-bridge,downscript=/var/lib/qemu-server/pve-bridgedown -device e1000,mac=62:20:40:70:2E:55,netdev=net0,bus=pci.0,addr=0x12,iTR =0040 ffff8803333848c0 00002087 00008b00 DPL=0 TSS64-busy GDT= ffff88033338c000 0000007f IDT= ffffffffff574000 00000fff CR0=80050033 CR2=00007f4b4f09ec80 CR3=00000000b9cf4000 CR4=00160670 DR0=0000000000000000 DR1=0000000000000000 DR2=0000000000000000 DR3=0000000000000000 DR6=00000000fffe0ff0 DR7=0000000000000400 EFER=0000000000000d01 Code=89 e5 fb 5d c3 66 0f 1f 84 00 00 00 00 00 55 48 89 e5 fb f4 <5d> c3 0f 1f 84 00 00 00 00 00 55 48 89 e5 f4 5d c3 66 0f 1f 84 00 00 00 00 00 55 49 89 > > > Since we have another machine with an older processor from the same family > > and > > we are not experiencing any issues i attach cpuinfo of the problematic one > > and > > the goodold one in case is relevant. > > What's the qemu cmdline for the Haswell host ? Can you try -cpu Haswell on > the Broadwell > host as well and see if it makes any difference ? This is the cmd in the haswell host: /usr/bin/kvm -id 154 -chardev socket,id=qmp,path=/var/run/qemu-server/154.qmp,server,nowait -mon chardev=qmp,mode=control -pidfile /var/run/qemu-server/154.pid -daemonize -smbios type=1,uuid=04fc63bd-caef-424d-a047-c99d0ffd5be0 -name sai-store01.es.amnesty.org -smp 6,sockets=1,cores=6,maxcpus=6 -nodefaults -boot menu=on,strict=on,reboot-timeout=1000,splash=/usr/share/qemu-server/bootsplash.jpg -vga cirrus -vnc unix:/var/run/qemu-server/154.vnc,x509,password -cpu kvm64,+lahf_lm,+sep,+kvm_pv_unhalt,+kvm_pv_eoi,enforce -m 8000 -k es -device pci-bridge,id=pci.1,chassis_nr=1,bus=pci.0,addr=0x1e -device pci-bridge,id=pci.2,chassis_nr=2,bus=pci.0,addr=0x1f -device piix3-usb-uhci,id=uhci,bus=pci.0,addr=0x1.0x2 -device usb-tablet,id=tablet,bus=uhci.0,port=1 -iscsi initiator-name=iqn.1993-08.org.debian:01:9f87b7a31af -drive if=none,id=drive-ide2,media=cdrom,aio=threads -device ide-cd,bus=ide.1,unit=0,drive=drive-ide2,id=ide2,bootindex=200 -device virtio-scsi-pci,id=scsihw0,bus=pci.0,addr=0x5 -drive file=/dev/pve/vm-154-disk-1,if=none,id=drive-scsi0,format=raw,cache=none,aio=native,detect-zeroes=on -device scsi-hd,bus=scsihw0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0,id=scsi0 -drive file=/srv/data/images/154/vm-154-disk-7.raw,if=none,id=drive-scsi1,format=raw,cache=none,aio=native,detect-zeroes=on -device scsi-hd,bus=scsihw0.0,channel=0,scsi-id=0,lun=1,drive=drive-scsi1,id=scsi1 -drive file=/srv/data/images/154/vm-154-disk-1.raw,if=none,id=drive-scsi2,cache=writethrough,format=raw,aio=threads,detect-zeroes=on -device scsi-hd,bus=scsihw0.0,channel=0,scsi-id=0,lun=2,drive=drive-scsi2,id=scsi2 -drive file=/srv/data/images/154/vm-154-disk-8.raw,if=none,id=drive-scsi3,format=raw,cache=none,aio=native,detect-zeroes=on -device scsi-hd,bus=scsihw0.0,channel=0,scsi-id=0,lun=3,drive=drive-scsi3,id=scsi3 -drive file=/srv/data/images/154/vm-154-disk-9.raw,if=none,id=drive-scsi4,format=raw,cache=none,aio=native,detect-zeroes=on -device scsi-hd,bus=scsihw0.0,channel=0,scsi-id=0,lun=4,drive=drive-scsi4,id=scsi4 -device ahci,id=ahci0,multifunction=on,bus=pci.0,addr=0x7 -drive file=/dev/pve/vm-154-disk-2,if=none,id=drive-sata0,format=raw,cache=none,aio=native,detect-zeroes=on -device ide-drive,bus=ahci0.0,drive=drive-sata0,id=sata0,bootindex=105 -drive file=/srv/data/images/154/vm-154-disk-4.raw,if=none,id=drive-sata3,format=raw,cache=none,aio=native,detect-zeroes=on -device ide-drive,bus=ahci0.3,drive=drive-sata3,id=sata3 -netdev type=tap,id=net0,ifname=tap154i0,script=/var/lib/qemu-server/pve-bridge,downscript=/var/lib/qemu-server/pve-bridgedown -device e1000,mac=[xxxx],netdev=net0,bus=pci.0,addr=0x12,id=net0,bootindex=300 We tried the haswell cpu in the broadwell without success either. One thing left to do is to try an older kernel. > > > [KVM WITh panics/frozen vm] > > Kernel > > > > 4.13.13-5-pve #1 SMP PVE 4.13.13-38 x86_64 > > > > processor : 40 > > vendor_id : GenuineIntel > > cpu family : 6 > > model : 79 > > model name : Intel(R) Xeon(R) CPU E5-2650 v4 @ 2.20GHz > > stepping : 1 > > microcode : 0xb000021 > > cpu MHz : 2199.991 > > cache size : 30720 KB > > physical id : 0 > > siblings : 24 > > core id : 10 > > cpu cores : 12 > > apicid : 21 > > initial apicid : 21 > > fpu : yes > > fpu_exception : yes > > cpuid level : 20 > > wp : yes > > flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca > > cmov > > pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx > pdpe1gb > > rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology > > nonstop_tsc cpuid aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx > est > > tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe > popcnt > > tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch > > cpuid_fault epb cat_l3 cdp_l3 invpcid_single intel_pt tpr_shadow vnmi > > flexpriority ept vpid fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms > > invpcid > > rtm cqm rdt_a rdseed adx smap xsaveopt cqm_llc cqm_occup_llc cqm_mbm_total > > cqm_mbm_local dtherm ida arat pln pts > > bugs : cpu_meltdown spectre_v1 spectre_v2 > > bogomips : 4399.98 > > clflush size : 64 > > cache_alignment : 64 > > address sizes : 46 bits physical, 48 bits virtual > > power management: > > > > > > [KVM WITHOUT PROBLEMS] > > Kernel 4.4.95-1-pve #1 SMP PVE 4.4.95-99 > > > > processor : 39 > > vendor_id : GenuineIntel > > cpu family : 6 > > model : 63 > > model name : Intel(R) Xeon(R) CPU E5-2660 v3 @ 2.60GHz > > stepping : 2 > > microcode : 0x31 > > cpu MHz : 2743.203 > > cache size : 25600 KB > > physical id : 1 > > siblings : 20 > > core id : 12 > > cpu cores : 10 > > apicid : 57 > > initial apicid : 57 > > fpu : yes > > fpu_exception : yes > > cpuid level : 15 > > wp : yes > > flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca > > cmov > > pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx > pdpe1gb > > rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology > > nonstop_tsc aperfmperf eagerfpu pni pclmulqdq dtes64 monitor ds_cpl vmx smx > > est > > tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe > popcnt > > tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm epb tpr_shadow > vnmi > > flexpriority ept vpid fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid > > cqm > > xsaveopt cqm_llc cqm_occup_llc dtherm ida arat pln pts > > bugs : > > bogomips : 5201.14 > > clflush size : 64 > > cache_alignment : 64 > > address sizes : 46 bits physical, 48 bits virtual > > power management: -- You are receiving this mail because: You are watching the assignee of the bug.