On Fri, 1 Sep 2023, Mikhail Gavrilov wrote: > On Fri, Sep 1, 2023 at 12:29 PM Hugh Dickins <hughd@xxxxxxxxxx> wrote: > > > > > > Thanks for all the info, which has helped in several ways. The only > > thing I can do is to offer you a debug (and then keep running) patch - > > suitable for the config you showed there, not for anyone else's config. > > > > I've never used stackdepot before, but I've tried this out in good and > > bad cases, and expect it to work for you, shedding light on where is > > going wrong - machine should boot up fine, and in dmesg you'll find one > > stacktrace between "WARNING: pte_map..." and "End of pte_map..." lines. > > > > To apply on top of a349d72fd9ef ("mm/pgtable: add rcu_read_lock() and > > rcu_read_unlock()s"), the bad end point of your bisection; but if you > > prefer, I can provide a version to go on top of whatever later Linus > > commit suits you. > > > > Patch not for general consumption, just for Mike's debugging: > > please report back the stacktrace shown - thanks! > > > > Thanks for digging into the problem. > With the attached patch I got FTBFS when build kernel at commit a349d72fd9ef. > > > LD [M] drivers/gpu/drm/amd/amdgpu/amdgpu.o > MODPOST Module.symvers > ERROR: modpost: "pte_unmap" [arch/x86/kvm/kvm.ko] undefined! > ERROR: modpost: "pte_unmap" [drivers/vfio/vfio_iommu_type1.ko] undefined! > make[2]: *** [scripts/Makefile.modpost:144: Module.symvers] Error 1 > make[1]: *** [/home/mikhail/packaging-work/git/linux/Makefile:1984: > modpost] Error 2 > make: *** [Makefile:234: __sub-make] Error 2 Sorry about that, please try this instead, adds EXPORT_SYMBOL(pte_unmap). --- include/linux/pgtable.h | 5 +---- mm/memory.c | 1 + mm/mremap.c | 1 + mm/pgtable-generic.c | 41 +++++++++++++++++++++++++++++++++++++++-- 4 files changed, 42 insertions(+), 6 deletions(-) diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 5134edcec668..131392f1c33e 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -106,10 +106,7 @@ static inline pte_t *__pte_map(pmd_t *pmd, unsigned long address) { return pte_offset_kernel(pmd, address); } -static inline void pte_unmap(pte_t *pte) -{ - rcu_read_unlock(); -} +void pte_unmap(pte_t *pte); #endif /* Find an entry in the second-level page table.. */ diff --git a/mm/memory.c b/mm/memory.c index 44d11812a88f..b1ee8ab51978 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1033,6 +1033,7 @@ copy_pte_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, ret = -ENOMEM; goto out; } + pte_unmap(NULL); /* avoid warning when knowingly nested */ src_pte = pte_offset_map_nolock(src_mm, src_pmd, addr, &src_ptl); if (!src_pte) { pte_unmap_unlock(dst_pte, dst_ptl); diff --git a/mm/mremap.c b/mm/mremap.c index 11e06e4ab33b..56d981add487 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -175,6 +175,7 @@ static int move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, err = -EAGAIN; goto out; } + pte_unmap(NULL); /* avoid warning when knowingly nested */ new_pte = pte_offset_map_nolock(mm, new_pmd, new_addr, &new_ptl); if (!new_pte) { pte_unmap_unlock(old_pte, old_ptl); diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index 400e5a045848..958ee5cf91b1 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c @@ -232,11 +232,48 @@ pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address, #endif #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#include <linux/stacktrace.h> +#include <linux/stackdepot.h> +#include <linux/timekeeping.h> + +static depot_stack_handle_t depot_stack; + +static void pte_map(void) +{ + static bool done = false; + unsigned long entries[16]; + unsigned int nr_entries; + + /* rcu_read_lock(); */ + if (raw_smp_processor_id() != 0 || done) + return; + if (depot_stack) { + pr_warn("WARNING: pte_map was not pte_unmapped:\n"); + stack_depot_print(depot_stack); + pr_warn("End of pte_map warning.\n"); + done = true; + return; + } + nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 0); + depot_stack = stack_depot_save(entries, nr_entries, GFP_NOWAIT); + if (ktime_get_seconds() > 1800) /* give up after half an hour */ + done = true; +} + +void pte_unmap(pte_t *pte) +{ + /* rcu_read_unlock(); */ + if (raw_smp_processor_id() != 0) + return; + depot_stack = 0; +} +EXPORT_SYMBOL(pte_unmap); + pte_t *__pte_offset_map(pmd_t *pmd, unsigned long addr, pmd_t *pmdvalp) { pmd_t pmdval; - rcu_read_lock(); + pte_map(); pmdval = pmdp_get_lockless(pmd); if (pmdvalp) *pmdvalp = pmdval; @@ -250,7 +287,7 @@ pte_t *__pte_offset_map(pmd_t *pmd, unsigned long addr, pmd_t *pmdvalp) } return __pte_map(&pmdval, addr); nomap: - rcu_read_unlock(); + pte_unmap(NULL); return NULL; } -- 2.35.3