On Tue, Aug 17, 2021 at 10:02:10AM +0200, David Hildenbrand wrote: > On 17.08.21 09:56, Mike Rapoport wrote: > > On Mon, Aug 16, 2021 at 10:13:18PM +0300, Mike Rapoport wrote: > > > On Mon, Aug 16, 2021 at 08:38:43PM +0200, David Hildenbrand wrote: > > > > On 16.08.21 20:12, Jiri Olsa wrote: > > > > > On Mon, Aug 16, 2021 at 07:49:15PM +0200, David Hildenbrand wrote: > > > > > > On 16.08.21 19:34, Jiri Olsa wrote: > > > > > > > hi, > > > > > > > I'm getting fault below when running: > > > > > > > > > > > > > > # cat /proc/kallsyms | grep ksys_read > > > > > > > ffffffff8136d580 T ksys_read > > > > > > > # objdump -d --start-address=0xffffffff8136d580 --stop-address=0xffffffff8136d590 /proc/kcore > > > > > > > > > > > > > > /proc/kcore: file format elf64-x86-64 > > > > > > > > > > > > > > Segmentation fault > > > > > > > > > > > > > > any idea? config is attached > > > > > > > > > > > > Just tried with a different config on 5.14.0-rc6+ > > > > > > > > > > > > [root@localhost ~]# cat /proc/kallsyms | grep ksys_read > > > > > > ffffffff8927a800 T ksys_readahead > > > > > > ffffffff89333660 T ksys_read > > > > > > > > > > > > [root@localhost ~]# objdump -d --start-address=0xffffffff89333660 > > > > > > --stop-address=0xffffffff89333670 > > > > > > > > > > > > a.out: file format elf64-x86-64 > > > > > > > > > > > > > > > > > > > > > > > > The kern_addr_valid(start) seems to fault in your case, which is weird, > > > > > > because it merely walks the page tables. But it seems to complain about a > > > > > > non-canonical address 0xf887ffcbff000 > > > > > > > > > > > > Can you post your QEMU cmdline? Did you test this on other kernel versions? > > > > > > > > > > I'm using virt-manager so: > > > > > > > > > > /usr/bin/qemu-system-x86_64 -name guest=fedora33,debug-threads=on -S -object secret,id=masterKey0,format=raw,file=/var/lib/libvirt/qemu/domain-13-fedora33/master-key.aes -machine pc-q35-5.1,accel=kvm,usb=off,vmport=off,dump-guest-core=off,memory-backend=pc.ram -cpu Skylake-Server-IBRS,ss=on,vmx=on,pdcm=on,hypervisor=on,tsc-adjust=on,clflushopt=on,umip=on,pku=on,stibp=on,arch-capabilities=on,ssbd=on,xsaves=on,ibpb=on,amd-stibp=on,amd-ssbd=on,skip-l1dfl-vmentry=on,pschange-mc-no=on -m 8192 -object memory-backend-ram,id=pc.ram,size=8589934592 -overcommit mem-lock=off -smp 20,sockets=20,cores=1,threads=1 -uuid 2185d5a9-dbad-4d61-aa4e-97af9fd7ebca -no-user-config -nodefaults -chardev socket,id=charmonitor,fd=36,server,nowait -mon chardev=charmonitor,id=monitor,mode=control -rtc base=utc,driftfix=slew -global kvm-pit.lost_tick_policy=delay -no-hpet -no-shutdown -global ICH9-LPC.disable_s3=1 -global ICH9-LPC.disable_s4=1 -boot strict=on -kernel /home/jolsa/qemu/run/vmlinux -initrd /home/jolsa/qemu/run/initrd -append root=/dev/mapper/fedora_fedora-root ro rd.lvm.lv=fedora_fedora/root console=tty0 console=ttyS0,115200 -device pcie-root-port,port=0x10,chassis=1,id=pci.1,bus=pcie.0,multifunction=on,addr=0x2 -device pcie-root-port,port=0x11,chassis=2,id=pci.2,bus=pcie.0,addr=0x2.0x1 -device pcie-root-port,port=0x12,chassis=3,id=pci.3,bus=pcie.0,addr=0x2.0x2 -device pcie-root-port,port=0x13,chassis=4,id=pci.4,bus=pcie.0,addr=0x2.0x3 -device pcie-root-port,port=0x14,chassis=5,id=pci.5,bus=pcie.0,addr=0x2.0x4 -device pcie-root-port,port=0x15,chassis=6,id=pci.6,bus=pcie.0,addr=0x2.0x5 -device pcie-root-port,port=0x16,chassis=7,id=pci.7,bus=pcie.0,addr=0x2.0x6 -device qemu-xhci,p2=15,p3=15,id=usb,bus=pci.2,addr=0x0 -device virtio-serial-pci,id=virtio-serial0,bus=pci.3,addr=0x0 -blockdev {"driver":"file","filename":"/var/lib/libvirt/images/fedora33.qcow2","node-name":"libvirt-2-storage","auto-read-only":true,"discard":"unmap"} -blockdev {"node-name":"libvirt-2-format","read-only":false,"driver":"qcow2","file":"libvirt-2-storage","backing":null} -device virtio-blk-pci,bus=pci.4,addr=0x0,drive=libvirt-2-format,id=virtio-disk0,bootindex=1 -device ide-cd,bus=ide.0,id=sata0-0-0 -netdev tap,fd=38,id=hostnet0,vhost=on,vhostfd=39 -device virtio-net-pci,netdev=hostnet0,id=net0,mac=52:54:00:f3:c6:e7,bus=pci.1,addr=0x0 -chardev pty,id=charserial0 -device isa-serial,chardev=charserial0,id=serial0 -chardev socket,id=charchannel0,fd=40,server,nowait -device virtserialport,bus=virtio-serial0.0,nr=1,chardev=charchannel0,id=channel0,name=org.qemu.guest_agent.0 -chardev spicevmc,id=charchannel1,name=vdagent -device virtserialport,bus=virtio-serial0.0,nr=2,chardev=charchannel1,id=channel1,name=com.redhat.spice.0 -device usb-tablet,id=input0,bus=usb.0,port=1 -spice port=5900,addr=127.0.0.1,disable-ticketing,image-compression=off,seamless-migration=on -device qxl-vga,id=video0,ram_size=67108864,vram_size=67108864,vram64_size_mb=0,vgamem_mb=16,max_outputs=1,bus=pcie.0,addr=0x1 -device ich9-intel-hda,id=sound0,bus=pcie.0,addr=0x1b -device hda-duplex,id=sound0-codec0,bus=sound0.0,cad=0 -chardev spicevmc,id=charredir0,name=usbredir -device usb-redir,chardev=charredir0,id=redir0,bus=usb.0,port=2 -chardev spicevmc,id=charredir1,name=usbredir -device usb-redir,chardev=charredir1,id=redir1,bus=usb.0,port=3 -device virtio-balloon-pci,id=balloon0,bus=pci.5,addr=0x0 -object rng-random,id=objrng0,filename=/dev/urandom -device virtio-rng-pci,rng=objrng0,id=rng0,bus=pci.6,addr=0x0 -sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny -msg timestamp=on > > > > > so far I tested just bpf-next/master: > > > > > git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git > > > > > > > > > > > > > Just tried with upstream Linux (5.14.0-rc6) and your config without > > > > triggering it. I'm using "-cpu host", though, on an AMD Ryzen 9 3900X > > > > > > With Jiri's config and '-cpu <very long string>' it triggers for me on > > > v5.14-rc6. > > > > > > I'll also try to take a look tomorrow. > > > > There are some non-zero PMDs that are not present in the high kernel > > mappings. The patch below fixes for me the issue in kern_addr_valid() > > trying to access a not-present PMD. Jiri, can you check if it works for > > you? > > > > diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c > > index ddeaba947eb3..07b56e90db5d 100644 > > --- a/arch/x86/mm/init_64.c > > +++ b/arch/x86/mm/init_64.c > > @@ -1433,18 +1433,18 @@ int kern_addr_valid(unsigned long addr) > > return 0; > > p4d = p4d_offset(pgd, addr); > > - if (p4d_none(*p4d)) > > + if (p4d_none(*p4d) || !p4d_present(*p4d)) > > return 0; > > pud = pud_offset(p4d, addr); > > - if (pud_none(*pud)) > > + if (pud_none(*pud) || !pud_present(*pud)) > > return 0; > > if (pud_large(*pud)) > > return pfn_valid(pud_pfn(*pud)); > > pmd = pmd_offset(pud, addr); > > - if (pmd_none(*pmd)) > > + if (pmd_none(*pmd) || !pmd_present(*pmd)) > > return 0; > > if (pmd_large(*pmd)) > > > > However, wouldn't that mean that that TEXT segment isn't actually accessible > at all? Or is this some weird kind of TEXT protection (not even being able > to read it, weird, no?) It does not seem like TEXT isn't accessible. There are unused parts in that virtual range, but for some reason the PMDs there are not zero. > We don't support swapping and all that stuff for kernel memory. So what does > !present even indicate here? (smells like a different BUG, but I might be > wrong, of course) Don't know yet. For now I've only found the cause for kern_addr_valid() to crash. -- Sincerely yours, Mike.