Re: [PATCH 4/8] mm: Add __dump_folio()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Matthew,

On 27/02/2024 19:23, Matthew Wilcox (Oracle) wrote:
> Turn __dump_page() into a wrapper around __dump_folio().  Snapshot the
> page & folio into a stack variable so we don't hit BUG_ON() if an
> allocation is freed under us and what was a folio pointer becomes a
> pointer to a tail page.

I'm seeing a couple of panics caused by this patch. I already raised the first one at [1], and it looks like there is a bug in Ard's patch (which he now has a proposed fix for) which provokes the bug in this.

[1] https://lore.kernel.org/linux-arm-kernel/fc691e8d-1a50-4be6-a3b2-d60d6f2e2487@xxxxxxx/

The other way to trigger it is to run the mm kselftest:

  gup_test -ct -F 0x1 0 19 0x1000

This calls into the kernel and deliberately calls dump_page() via dump_pages_test() in gup_test.c.

Panic is as follows (root cause identified below):

[   22.994800] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000008
[   22.995428] Mem abort info:
[   22.995617]   ESR = 0x0000000096000005
[   22.995867]   EC = 0x25: DABT (current EL), IL = 32 bits
[   22.996215]   SET = 0, FnV = 0
[   22.996419]   EA = 0, S1PTW = 0
[   22.996628]   FSC = 0x05: level 1 translation fault
[   22.996951] Data abort info:
[   22.997145]   ISV = 0, ISS = 0x00000005, ISS2 = 0x00000000
[   22.997541]   CM = 0, WnR = 0, TnD = 0, TagAccess = 0
[   22.998025]   GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
[   22.998438] user pgtable: 4k pages, 39-bit VAs, pgdp=000000019f2d6000
[   22.998937] [0000000000000008] pgd=0000000000000000, p4d=0000000000000000, pud=0000000000000000
[   22.999608] Internal error: Oops: 0000000096000005 [#1] PREEMPT SMP
[   23.000083] Modules linked in:
[   23.000319] CPU: 6 PID: 1222 Comm: gup_test Not tainted 6.8.0-rc6-00915-g7f43e0f76e47 #2
[   23.000883] Hardware name: linux,dummy-virt (DT)
[   23.001209] pstate: 20400005 (nzCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[   23.001621] pc : get_pfnblock_flags_mask+0x3c/0x68
[   23.001929] lr : __dump_page+0x188/0x400
[   23.002168] sp : ffffffc0885ebb40
[   23.002370] x29: ffffffc0885ebb40 x28: 0000000000ffffc0 x27: 0000000000000000
[   23.002839] x26: 0000000000000000 x25: ffffffc0885ebba0 x24: 00000000ffffffff
[   23.003335] x23: ffffffeabbbc1000 x22: 0000000000000030 x21: ffffffeabb9f5e98
[   23.003869] x20: ffffffc0885ebba0 x19: ffffffc0885ebba0 x18: ffffffffffffffff
[   23.004489] x17: 34383833383a6465 x16: 7070616d5f736567 x15: ffffffeabd058782
[   23.004989] x14: 0000000000000000 x13: 3030303730303039 x12: 3138666666666666
[   23.005501] x11: fffffffffffe0000 x10: ffffffeabca9c018 x9 : ffffffeab9f0c798
[   23.005980] x8 : 00000000ffffefff x7 : ffffffeabca9c018 x6 : 0000000000000000
[   23.006448] x5 : 000003fffffffc28 x4 : 0001fffffffe144a x3 : 0000000000000000
[   23.006931] x2 : 0000000000000007 x1 : ffffffff0a257aee x0 : 00000000001fffff
[   23.007436] Call trace:
[   23.007623]  get_pfnblock_flags_mask+0x3c/0x68
[   23.007928]  dump_page+0x2c/0x70
[   23.008156]  gup_test_ioctl+0xb34/0xc40
[   23.008416]  __arm64_sys_ioctl+0xb0/0x100
[   23.008694]  invoke_syscall+0x50/0x128
[   23.008944]  el0_svc_common.constprop.0+0x48/0xf8
[   23.009259]  do_el0_svc+0x28/0x40
[   23.009499]  el0_svc+0x34/0xb8
[   23.009720]  el0t_64_sync_handler+0x13c/0x158
[   23.010029]  el0t_64_sync+0x190/0x198
[   23.010293] Code: d37b1884 f100007f 8b040064 9a831083 (f9400460) 
[   23.010714] ---[ end trace 0000000000000000 ]---


> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@xxxxxxxxxxxxx>
> ---
>  mm/debug.c | 120 +++++++++++++++++++++++++++++------------------------
>  1 file changed, 66 insertions(+), 54 deletions(-)
> 
> diff --git a/mm/debug.c b/mm/debug.c
> index ee533a5ceb79..96555fc78f1a 100644
> --- a/mm/debug.c
> +++ b/mm/debug.c
> @@ -51,84 +51,96 @@ const struct trace_print_flags vmaflag_names[] = {
>  	{0, NULL}
>  };
>  
> -static void __dump_page(struct page *page)
> +static void __dump_folio(struct folio *folio, struct page *page,
> +		unsigned long pfn, unsigned long idx)
>  {
> -	struct folio *folio = page_folio(page);
> -	struct page *head = &folio->page;
> -	struct address_space *mapping;
> -	bool compound = PageCompound(page);
> -	/*
> -	 * Accessing the pageblock without the zone lock. It could change to
> -	 * "isolate" again in the meantime, but since we are just dumping the
> -	 * state for debugging, it should be fine to accept a bit of
> -	 * inaccuracy here due to racing.
> -	 */
> -	bool page_cma = is_migrate_cma_page(page);
> -	int mapcount;
> +	struct address_space *mapping = folio_mapping(folio);
> +	bool page_cma;
> +	int mapcount = 0;
>  	char *type = "";
>  
> -	if (page < head || (page >= head + MAX_ORDER_NR_PAGES)) {
> -		/*
> -		 * Corrupt page, so we cannot call page_mapping. Instead, do a
> -		 * safe subset of the steps that page_mapping() does. Caution:
> -		 * this will be misleading for tail pages, PageSwapCache pages,
> -		 * and potentially other situations. (See the page_mapping()
> -		 * implementation for what's missing here.)
> -		 */
> -		unsigned long tmp = (unsigned long)page->mapping;
> -
> -		if (tmp & PAGE_MAPPING_ANON)
> -			mapping = NULL;
> -		else
> -			mapping = (void *)(tmp & ~PAGE_MAPPING_FLAGS);
> -		head = page;
> -		folio = (struct folio *)page;
> -		compound = false;
> -	} else {
> -		mapping = page_mapping(page);
> -	}
> -
>  	/*
> -	 * Avoid VM_BUG_ON() in page_mapcount().
> -	 * page->_mapcount space in struct page is used by sl[aou]b pages to
> -	 * encode own info.
> +	 * page->_mapcount space in struct page is used by slab pages to
> +	 * encode own info, and we must avoid calling page_folio() again.
>  	 */
> -	mapcount = PageSlab(head) ? 0 : page_mapcount(page);
> -
> -	pr_warn("page:%p refcount:%d mapcount:%d mapping:%p index:%#lx pfn:%#lx\n",
> -			page, page_ref_count(head), mapcount, mapping,
> -			page_to_pgoff(page), page_to_pfn(page));
> -	if (compound) {
> -		pr_warn("head:%p order:%u entire_mapcount:%d nr_pages_mapped:%d pincount:%d\n",
> -				head, compound_order(head),
> +	if (!folio_test_slab(folio)) {
> +		mapcount = atomic_read(&page->_mapcount) + 1;
> +		if (folio_test_large(folio))
> +			mapcount += folio_entire_mapcount(folio);
> +	}
> +
> +	pr_warn("page: refcount:%d mapcount:%d mapping:%p index:%#lx pfn:%#lx\n",
> +			folio_ref_count(folio), mapcount, mapping,
> +			folio->index + idx, pfn);
> +	if (folio_test_large(folio)) {
> +		pr_warn("head: order:%u entire_mapcount:%d nr_pages_mapped:%d pincount:%d\n",
> +				folio_order(folio),
>  				folio_entire_mapcount(folio),
>  				folio_nr_pages_mapped(folio),
>  				atomic_read(&folio->_pincount));
>  	}
>  
>  #ifdef CONFIG_MEMCG
> -	if (head->memcg_data)
> -		pr_warn("memcg:%lx\n", head->memcg_data);
> +	if (folio->memcg_data)
> +		pr_warn("memcg:%lx\n", folio->memcg_data);
>  #endif
> -	if (PageKsm(page))
> +	if (folio_test_ksm(folio))
>  		type = "ksm ";
> -	else if (PageAnon(page))
> +	else if (folio_test_anon(folio))
>  		type = "anon ";
>  	else if (mapping)
>  		dump_mapping(mapping);
>  	BUILD_BUG_ON(ARRAY_SIZE(pageflag_names) != __NR_PAGEFLAGS + 1);
>  
> -	pr_warn("%sflags: %pGp%s\n", type, &head->flags,
> +	/*
> +	 * Accessing the pageblock without the zone lock. It could change to
> +	 * "isolate" again in the meantime, but since we are just dumping the
> +	 * state for debugging, it should be fine to accept a bit of
> +	 * inaccuracy here due to racing.
> +	 */
> +	page_cma = is_migrate_cma_page(page);

Problem is here: is_migrate_cma_page() is a macro that resolves to this:

page_cma = get_pfnblock_flags_mask(page, page_to_pfn(page), MIGRATETYPE_MASK) == MIGRATE_CMA;

And since page is on the stack, page_to_pfn() gives a very wrong answer.

I confirmed that the problem goes away for both cases above, when changing the line to:

page_cma = get_pfnblock_flags_mask(page, pfn, MIGRATETYPE_MASK) == MIGRATE_CMA;

Thanks,
Ryan

> +	pr_warn("%sflags: %pGp%s\n", type, &folio->flags,
>  		page_cma ? " CMA" : "");
> -	pr_warn("page_type: %pGt\n", &head->page_type);
> +	pr_warn("page_type: %pGt\n", &folio->page.page_type);
>  
>  	print_hex_dump(KERN_WARNING, "raw: ", DUMP_PREFIX_NONE, 32,
>  			sizeof(unsigned long), page,
>  			sizeof(struct page), false);
> -	if (head != page)
> +	if (folio_test_large(folio))
>  		print_hex_dump(KERN_WARNING, "head: ", DUMP_PREFIX_NONE, 32,
> -			sizeof(unsigned long), head,
> -			sizeof(struct page), false);
> +			sizeof(unsigned long), folio,
> +			2 * sizeof(struct page), false);
> +}
> +
> +static void __dump_page(const struct page *page)
> +{
> +	struct folio *foliop, folio;
> +	struct page precise;
> +	unsigned long pfn = page_to_pfn(page);
> +	unsigned long idx, nr_pages = 1;
> +	int loops = 5;
> +
> +again:
> +	memcpy(&precise, page, sizeof(*page));
> +	foliop = page_folio(&precise);
> +	idx = folio_page_idx(foliop, page);
> +	if (idx != 0) {
> +		if (idx < (1UL << PUD_ORDER)) {
> +			memcpy(&folio, foliop, 2 * sizeof(struct page));
> +			nr_pages = folio_nr_pages(&folio);
> +		}
> +
> +		if (idx > nr_pages) {
> +			if (loops-- > 0)
> +				goto again;
> +			printk("page does not match folio\n");
> +			precise.compound_head &= ~1UL;
> +			foliop = (struct folio *)&precise;
> +			idx = 0;
> +		}
> +	}
> +
> +	__dump_folio(foliop, &precise, pfn, idx);
>  }
>  
>  void dump_page(struct page *page, const char *reason)





[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux