Nouveau dmem NULL Pointer deref (SVM)

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]<

 



Hi,

just for your information and maybe for some help: with 5.1rc1 and SVM enabled i see the following backtrace [1] when the nouveau card (reverse prime) goes to sleep, for now i have papered over with [2] which leaves me with userspace hangs. Any pointers where to look for the actual culprit?

PS: Card is: nouveau 0000:01:00.0: NVIDIA GP106 (136000a1)

Greetings,

Tobias


[1]:

BUG: unable to handle kernel NULL pointer dereference at 0000000000000028
#PF error: [normal kernel read fault]
PGD 0 P4D 0
Oops: 0000 [#1] PREEMPT SMP PTI
CPU: 3 PID: 435 Comm: kworker/3:4 Not tainted 5.1.0-rc1-desktop-debug+ #80
Hardware name: Acer Aspire VN7-593G/Pluto_KLS, BIOS V1.11 08/01/2018
Workqueue: pm pm_runtime_work
RIP: 0010:nouveau_bo_unpin (linux/./include/linux/compiler.h:193 linux/./arch/x86/include/asm/atomic.h:31 linux/./include/asm-generic/atomic-instrumented.h:27 linux/./include/linux/refcount.h:43 linux/./include/linux/kref.h:38 linux/./include/drm/ttm/ttm_bo_driver.h:721 linux/drivers/gpu/drm/nouveau/nouveau_bo.c:454) nouveau Code: 89 d9 48 c7 c6 50 04 e5 c0 c4 42 79 f7 c0 bd f0 ff ff ff e8 42 d5 7a c6 ff 83 00 04 00 00 e9 17 ff ff ff 41 54 55 53 48 89 fb <8b> 47 28 85 c0 0f 84 cf 00 00 00 48 8b bb c0 01 00 00 31 f6 4c 8b
All code
========
   0:    89 d9                    mov    %ebx,%ecx
   2:    48 c7 c6 50 04 e5 c0     mov    $0xffffffffc0e50450,%rsi
   9:    c4 42 79 f7 c0           shlx   %eax,%r8d,%r8d
   e:    bd f0 ff ff ff           mov    $0xfffffff0,%ebp
  13:    e8 42 d5 7a c6           callq  0xffffffffc67ad55a
  18:    ff 83 00 04 00 00        incl   0x400(%rbx)
  1e:    e9 17 ff ff ff           jmpq   0xffffffffffffff3a
  23:    41 54                    push   %r12
  25:    55                       push   %rbp
  26:    53                       push   %rbx
  27:    48 89 fb                 mov    %rdi,%rbx
  2a:*    8b 47 28                 mov    0x28(%rdi),%eax <-- trapping instruction
  2d:    85 c0                    test   %eax,%eax
  2f:    0f 84 cf 00 00 00        je     0x104
  35:    48 8b bb c0 01 00 00     mov    0x1c0(%rbx),%rdi
  3c:    31 f6                    xor    %esi,%esi
  3e:    4c                       rex.WR
  3f:    8b                       .byte 0x8b

Code starting with the faulting instruction
===========================================
   0:    8b 47 28                 mov    0x28(%rdi),%eax
   3:    85 c0                    test   %eax,%eax
   5:    0f 84 cf 00 00 00        je     0xda
   b:    48 8b bb c0 01 00 00     mov    0x1c0(%rbx),%rdi
  12:    31 f6                    xor    %esi,%esi
  14:    4c                       rex.WR
  15:    8b                       .byte 0x8b
RSP: 0018:ffffbf0b41237d20 EFLAGS: 00010216
RAX: ffff9dfe0ba2ec00 RBX: 0000000000000000 RCX: ffffffffc0ceb630
RDX: ffff9dfe0ba2ec38 RSI: 000000007fffffff RDI: 0000000000000000
RBP: ffff9dfe0a07e000 R08: 0000000000000000 R09: ffffffffc0d4a9a0
R10: 8080808080808080 R11: 0000000000001800 R12: 0000000000000001
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000008
FS:  0000000000000000(0000) GS:ffff9dfe3ecc0000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000028 CR3: 00000001a500e002 CR4: 00000000003606e0
Call Trace:
nouveau_dmem_suspend (linux/drivers/gpu/drm/nouveau/nouveau_dmem.c:482 (discriminator 9)) nouveau
nouveau_do_suspend (linux/drivers/gpu/drm/nouveau/nouveau_drm.c:748) nouveau
nouveau_pmops_runtime_suspend (linux/drivers/gpu/drm/nouveau/nouveau_drm.c:915) nouveau
pci_pm_runtime_suspend (linux/drivers/pci/pci-driver.c:1262)
? __switch_to_asm (linux/arch/x86/entry/entry_64.S:312)
? pci_has_legacy_pm_support (linux/drivers/pci/pci-driver.c:1238)
__rpm_callback (linux/drivers/base/power/runtime.c:357)
? pci_has_legacy_pm_support (linux/drivers/pci/pci-driver.c:1238)
rpm_callback (linux/drivers/base/power/runtime.c:490)
? pci_has_legacy_pm_support (linux/drivers/pci/pci-driver.c:1238)
rpm_suspend (linux/drivers/base/power/runtime.c:629)
? __switch_to_asm (linux/arch/x86/entry/entry_64.S:312)
? __switch_to_asm (linux/arch/x86/entry/entry_64.S:312)
? __switch_to_asm (linux/arch/x86/entry/entry_64.S:312)
? __switch_to_asm (linux/arch/x86/entry/entry_64.S:312)
? __switch_to_asm (linux/arch/x86/entry/entry_64.S:312)
pm_runtime_work (linux/drivers/base/power/runtime.c:922)
process_one_work (linux/./arch/x86/include/asm/preempt.h:26 linux/kernel/workqueue.c:2278) worker_thread (linux/./include/linux/compiler.h:193 linux/./include/linux/list.h:237 linux/kernel/workqueue.c:2416)
? process_one_work (linux/kernel/workqueue.c:2358)
kthread (linux/kernel/kthread.c:253)
? kthread_create_worker_on_cpu (linux/kernel/kthread.c:213)
ret_from_fork (linux/arch/x86/entry/entry_64.S:358)
Modules linked in: rfcomm af_packet snd_hda_codec_hdmi bnep uvcvideo videobuf2_vmalloc rtsx_usb_sdmmc videobuf2_memops btusb rtsx_usb_ms videobuf2_v4l2 btrtl mmc_core memstick btbcm videodev btintel videobuf2_common rtsx_usb bluetooth usbhid ecdh_generic snd_hda_codec_realtek snd_hda_codec_generic ledtrig_audio nouveau arc4 i915 nls_iso8859_1 nls_cp437 vfat fat intel_rapl x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel ath10k_pci kvm ath10k_core msr snd_hda_intel mxm_wmi ath snd_hda_codec irqbypass joydev ttm snd_hda_core mac80211 snd_hwdep crct10dif_pclmul drm_kms_helper crc32_pclmul snd_pcm crc32c_intel hid_multitouch drm snd_timer ghash_clmulni_intel hid_generic mei_hdcp iTCO_wdt aesni_intel snd iTCO_vendor_support cfg80211 aes_x86_64 crypto_simd fb_sys_fops cryptd acerfan r8169 syscopyarea glue_helper sysfillrect idma64 sysimgblt realtek acer_wmi i2c_algo_bit soundcore mei_me libphy intel_wmi_thunderbolt sparse_keymap pcspkr intel_pch_thermal wmi_bmof rfkill intel_lpss_pci mei i2c_i801 intel_lpss thermal battery ac tpm_crb tpm_tis tpm_tis_core pinctrl_sunrisepoint pinctrl_intel tpm pcc_cpufreq acpi_pad button xhci_pci serio_raw xhci_hcd usbcore i2c_hid wmi video sg dm_multipath dm_mod scsi_dh_rdac scsi_dh_emc scsi_dh_alua efivarfs autofs4
CR2: 0000000000000028
---[ end trace abc30d2b924ee9b1 ]---
RIP: 0010:nouveau_bo_unpin (linux/./include/linux/compiler.h:193 linux/./arch/x86/include/asm/atomic.h:31 linux/./include/asm-generic/atomic-instrumented.h:27 linux/./include/linux/refcount.h:43 linux/./include/linux/kref.h:38 linux/./include/drm/ttm/ttm_bo_driver.h:721 linux/drivers/gpu/drm/nouveau/nouveau_bo.c:454) nouveau Code: 89 d9 48 c7 c6 50 04 e5 c0 c4 42 79 f7 c0 bd f0 ff ff ff e8 42 d5 7a c6 ff 83 00 04 00 00 e9 17 ff ff ff 41 54 55 53 48 89 fb <8b> 47 28 85 c0 0f 84 cf 00 00 00 48 8b bb c0 01 00 00 31 f6 4c 8b
All code
========
   0:    89 d9                    mov    %ebx,%ecx
   2:    48 c7 c6 50 04 e5 c0     mov    $0xffffffffc0e50450,%rsi
   9:    c4 42 79 f7 c0           shlx   %eax,%r8d,%r8d
   e:    bd f0 ff ff ff           mov    $0xfffffff0,%ebp
  13:    e8 42 d5 7a c6           callq  0xffffffffc67ad55a
  18:    ff 83 00 04 00 00        incl   0x400(%rbx)
  1e:    e9 17 ff ff ff           jmpq   0xffffffffffffff3a
  23:    41 54                    push   %r12
  25:    55                       push   %rbp
  26:    53                       push   %rbx
  27:    48 89 fb                 mov    %rdi,%rbx
  2a:*    8b 47 28                 mov    0x28(%rdi),%eax <-- trapping instruction
  2d:    85 c0                    test   %eax,%eax
  2f:    0f 84 cf 00 00 00        je     0x104
  35:    48 8b bb c0 01 00 00     mov    0x1c0(%rbx),%rdi
  3c:    31 f6                    xor    %esi,%esi
  3e:    4c                       rex.WR
  3f:    8b                       .byte 0x8b

Code starting with the faulting instruction
===========================================
   0:    8b 47 28                 mov    0x28(%rdi),%eax
   3:    85 c0                    test   %eax,%eax
   5:    0f 84 cf 00 00 00        je     0xda
   b:    48 8b bb c0 01 00 00     mov    0x1c0(%rbx),%rdi
  12:    31 f6                    xor    %esi,%esi
  14:    4c                       rex.WR
  15:    8b                       .byte 0x8b
RSP: 0018:ffffbf0b41237d20 EFLAGS: 00010216
RAX: ffff9dfe0ba2ec00 RBX: 0000000000000000 RCX: ffffffffc0ceb630
RDX: ffff9dfe0ba2ec38 RSI: 000000007fffffff RDI: 0000000000000000
RBP: ffff9dfe0a07e000 R08: 0000000000000000 R09: ffffffffc0d4a9a0
R10: 8080808080808080 R11: 0000000000001800 R12: 0000000000000001
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000008
FS:  0000000000000000(0000) GS:ffff9dfe3ecc0000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000028 CR3: 00000001a500e002 CR4: 00000000003606e0


[2]:

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 34a998012bf6..aee10a120896 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -377,11 +377,21 @@ nouveau_bo_placement_set(struct nouveau_bo *nvbo, uint32_t type, uint32_t busy)
 int
 nouveau_bo_pin(struct nouveau_bo *nvbo, uint32_t memtype, bool contig)
 {
+    WARN_ON(!virt_addr_valid(nvbo));
+
+    if (!virt_addr_valid(nvbo))
+        return 0;
+
     struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
     struct ttm_buffer_object *bo = &nvbo->bo;
     bool force = false, evict = false;
     int ret;

+    WARN_ON(!virt_addr_valid(bo));
+
+    if (!virt_addr_valid(bo))
+        return 0;
+
     ret = ttm_bo_reserve(bo, false, false, NULL);
     if (ret)
         return ret;
@@ -447,10 +457,20 @@ nouveau_bo_pin(struct nouveau_bo *nvbo, uint32_t memtype, bool contig)
 int
 nouveau_bo_unpin(struct nouveau_bo *nvbo)
 {
+    WARN_ON(!virt_addr_valid(nvbo));
+
+    if (!virt_addr_valid(nvbo))
+        return 0;
+
     struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
     struct ttm_buffer_object *bo = &nvbo->bo;
     int ret, ref;

+    WARN_ON(!virt_addr_valid(bo));
+
+    if (!virt_addr_valid(bo))
+        return 0;
+
     ret = ttm_bo_reserve(bo, false, false, NULL);
     if (ret)
         return ret;

_______________________________________________
Nouveau mailing list
Nouveau@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/nouveau




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Security]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux