On 10/03/2017 23:41, Brijesh Singh wrote: >> Maybe there's a reason this fires: >> >> WARNING: modpost: Found 2 section mismatch(es). >> To see full details build your kernel with: >> 'make CONFIG_DEBUG_SECTION_MISMATCH=y' >> >> WARNING: vmlinux.o(.text+0x48edc): Section mismatch in reference from >> the function __change_page_attr() to the function >> .init.text:memblock_alloc() >> The function __change_page_attr() references >> the function __init memblock_alloc(). >> This is often because __change_page_attr lacks a __init >> annotation or the annotation of memblock_alloc is wrong. >> >> WARNING: vmlinux.o(.text+0x491d1): Section mismatch in reference from >> the function __change_page_attr() to the function >> .meminit.text:memblock_free() >> The function __change_page_attr() references >> the function __meminit memblock_free(). >> This is often because __change_page_attr lacks a __meminit >> annotation or the annotation of memblock_free is wrong. >> >> But maybe Paolo might have an even better idea... > > I am sure he will have better idea :) Not sure if it's better or worse, but an alternative idea is to turn __change_page_attr and __change_page_attr_set_clr inside out, so that: 1) the alloc_pages/__free_page happens in __change_page_attr_set_clr; 2) __change_page_attr_set_clr overall does not beocome more complex. Then you can introduce __early_change_page_attr_set_clr and/or early_kernel_map_pages_in_pgd, for use in your next patches. They use the memblock allocator instead of alloc/free_page The attached patch is compile-tested only and almost certainly has some thinko in it. But it even skims a few lines from the code so the idea might have some merit. Paolo
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 28d42130243c..953c8e697562 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -490,11 +490,12 @@ static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) } static int -try_preserve_large_page(pte_t *kpte, unsigned long address, +try_preserve_large_page(pte_t **p_kpte, unsigned long address, struct cpa_data *cpa) { unsigned long nextpage_addr, numpages, pmask, psize, addr, pfn, old_pfn; - pte_t new_pte, old_pte, *tmp; + pte_t *kpte = *p_kpte; + pte_t new_pte, old_pte; pgprot_t old_prot, new_prot, req_prot; int i, do_split = 1; enum pg_level level; @@ -507,8 +508,8 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, * Check for races, another CPU might have split this page * up already: */ - tmp = _lookup_address_cpa(cpa, address, &level); - if (tmp != kpte) + *p_kpte = _lookup_address_cpa(cpa, address, &level); + if (*p_kpte != kpte) goto out_unlock; switch (level) { @@ -634,17 +635,18 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address, unsigned int i, level; pte_t *tmp; pgprot_t ref_prot; + int retry = 1; + if (!debug_pagealloc_enabled()) + spin_lock(&cpa_lock); spin_lock(&pgd_lock); /* * Check for races, another CPU might have split this page * up for us already: */ tmp = _lookup_address_cpa(cpa, address, &level); - if (tmp != kpte) { - spin_unlock(&pgd_lock); - return 1; - } + if (tmp != kpte) + goto out; paravirt_alloc_pte(&init_mm, page_to_pfn(base)); @@ -671,10 +673,11 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address, break; default: - spin_unlock(&pgd_lock); - return 1; + goto out; } + retry = 0; + /* * Set the GLOBAL flags only if the PRESENT flag is set * otherwise pmd/pte_present will return true even on a non @@ -718,28 +721,34 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address, * going on. */ __flush_tlb_all(); - spin_unlock(&pgd_lock); - return 0; -} - -static int split_large_page(struct cpa_data *cpa, pte_t *kpte, - unsigned long address) -{ - struct page *base; +out: + spin_unlock(&pgd_lock); + /* + * Do a global flush tlb after splitting the large page + * and before we do the actual change page attribute in the PTE. + * + * With out this, we violate the TLB application note, that says + * "The TLBs may contain both ordinary and large-page + * translations for a 4-KByte range of linear addresses. This + * may occur if software modifies the paging structures so that + * the page size used for the address range changes. If the two + * translations differ with respect to page frame or attributes + * (e.g., permissions), processor behavior is undefined and may + * be implementation-specific." + * + * We do this global tlb flush inside the cpa_lock, so that we + * don't allow any other cpu, with stale tlb entries change the + * page attribute in parallel, that also falls into the + * just split large page entry. + */ + if (!retry) + flush_tlb_all(); if (!debug_pagealloc_enabled()) spin_unlock(&cpa_lock); - base = alloc_pages(GFP_KERNEL | __GFP_NOTRACK, 0); - if (!debug_pagealloc_enabled()) - spin_lock(&cpa_lock); - if (!base) - return -ENOMEM; - - if (__split_large_page(cpa, kpte, address, base)) - __free_page(base); - return 0; + return retry; } static bool try_to_free_pte_page(pte_t *pte) @@ -1166,30 +1175,26 @@ static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr, } } -static int __change_page_attr(struct cpa_data *cpa, int primary) +static int __change_page_attr(struct cpa_data *cpa, pte_t **p_kpte, unsigned long address, + int primary) { - unsigned long address; - int do_split, err; unsigned int level; pte_t *kpte, old_pte; + int err = 0; - if (cpa->flags & CPA_PAGES_ARRAY) { - struct page *page = cpa->pages[cpa->curpage]; - if (unlikely(PageHighMem(page))) - return 0; - address = (unsigned long)page_address(page); - } else if (cpa->flags & CPA_ARRAY) - address = cpa->vaddr[cpa->curpage]; - else - address = *cpa->vaddr; -repeat: - kpte = _lookup_address_cpa(cpa, address, &level); - if (!kpte) - return __cpa_process_fault(cpa, address, primary); + if (!debug_pagealloc_enabled()) + spin_lock(&cpa_lock); + *p_kpte = kpte = _lookup_address_cpa(cpa, address, &level); + if (!kpte) { + err = __cpa_process_fault(cpa, address, primary); + goto out; + } old_pte = *kpte; - if (pte_none(old_pte)) - return __cpa_process_fault(cpa, address, primary); + if (pte_none(old_pte)) { + err = __cpa_process_fault(cpa, address, primary); + goto out; + } if (level == PG_LEVEL_4K) { pte_t new_pte; @@ -1228,59 +1233,27 @@ static int __change_page_attr(struct cpa_data *cpa, int primary) cpa->flags |= CPA_FLUSHTLB; } cpa->numpages = 1; - return 0; + goto out; } /* * Check, whether we can keep the large page intact * and just change the pte: */ - do_split = try_preserve_large_page(kpte, address, cpa); - /* - * When the range fits into the existing large page, - * return. cp->numpages and cpa->tlbflush have been updated in - * try_large_page: - */ - if (do_split <= 0) - return do_split; - - /* - * We have to split the large page: - */ - err = split_large_page(cpa, kpte, address); - if (!err) { - /* - * Do a global flush tlb after splitting the large page - * and before we do the actual change page attribute in the PTE. - * - * With out this, we violate the TLB application note, that says - * "The TLBs may contain both ordinary and large-page - * translations for a 4-KByte range of linear addresses. This - * may occur if software modifies the paging structures so that - * the page size used for the address range changes. If the two - * translations differ with respect to page frame or attributes - * (e.g., permissions), processor behavior is undefined and may - * be implementation-specific." - * - * We do this global tlb flush inside the cpa_lock, so that we - * don't allow any other cpu, with stale tlb entries change the - * page attribute in parallel, that also falls into the - * just split large page entry. - */ - flush_tlb_all(); - goto repeat; - } + err = try_preserve_large_page(p_kpte, address, cpa); +out: + if (!debug_pagealloc_enabled()) + spin_unlock(&cpa_lock); return err; } static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias); -static int cpa_process_alias(struct cpa_data *cpa) +static int cpa_process_alias(struct cpa_data *cpa, unsigned long vaddr) { struct cpa_data alias_cpa; unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT); - unsigned long vaddr; int ret; if (!pfn_range_is_mapped(cpa->pfn, cpa->pfn + 1)) @@ -1290,16 +1263,6 @@ static int cpa_process_alias(struct cpa_data *cpa) * No need to redo, when the primary call touched the direct * mapping already: */ - if (cpa->flags & CPA_PAGES_ARRAY) { - struct page *page = cpa->pages[cpa->curpage]; - if (unlikely(PageHighMem(page))) - return 0; - vaddr = (unsigned long)page_address(page); - } else if (cpa->flags & CPA_ARRAY) - vaddr = cpa->vaddr[cpa->curpage]; - else - vaddr = *cpa->vaddr; - if (!(within(vaddr, PAGE_OFFSET, PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)))) { @@ -1338,33 +1301,64 @@ static int cpa_process_alias(struct cpa_data *cpa) return 0; } +static unsigned long cpa_address(struct cpa_data *cpa, unsigned long numpages) +{ + /* + * Store the remaining nr of pages for the large page + * preservation check. + */ + /* for array changes, we can't use large page */ + cpa->numpages = 1; + if (cpa->flags & CPA_PAGES_ARRAY) { + struct page *page = cpa->pages[cpa->curpage]; + if (unlikely(PageHighMem(page))) + return -EINVAL; + return (unsigned long)page_address(page); + } else if (cpa->flags & CPA_ARRAY) { + return cpa->vaddr[cpa->curpage]; + } else { + cpa->numpages = numpages; + return *cpa->vaddr; + } +} + +static void cpa_advance(struct cpa_data *cpa) +{ + if (cpa->flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) + cpa->curpage++; + else + *cpa->vaddr += cpa->numpages * PAGE_SIZE; +} + static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) { unsigned long numpages = cpa->numpages; + unsigned long vaddr; + struct page *base; + pte_t *kpte; int ret; while (numpages) { - /* - * Store the remaining nr of pages for the large page - * preservation check. - */ - cpa->numpages = numpages; - /* for array changes, we can't use large page */ - if (cpa->flags & (CPA_ARRAY | CPA_PAGES_ARRAY)) - cpa->numpages = 1; - - if (!debug_pagealloc_enabled()) - spin_lock(&cpa_lock); - ret = __change_page_attr(cpa, checkalias); - if (!debug_pagealloc_enabled()) - spin_unlock(&cpa_lock); - if (ret) - return ret; - - if (checkalias) { - ret = cpa_process_alias(cpa); - if (ret) + vaddr = cpa_address(cpa, numpages); + if (!IS_ERR_VALUE(vaddr)) { +repeat: + ret = __change_page_attr(cpa, &kpte, vaddr, checkalias); + if (ret < 0) return ret; + if (ret) { + base = alloc_page(GFP_KERNEL|__GFP_NOTRACK); + if (!base) + return -ENOMEM; + if (__split_large_page(cpa, kpte, vaddr, base)) + __free_page(base); + goto repeat; + } + + if (checkalias) { + ret = cpa_process_alias(cpa, vaddr); + if (ret < 0) + return ret; + } } /* @@ -1374,11 +1368,7 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) */ BUG_ON(cpa->numpages > numpages || !cpa->numpages); numpages -= cpa->numpages; - if (cpa->flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) - cpa->curpage++; - else - *cpa->vaddr += cpa->numpages * PAGE_SIZE; - + cpa_advance(cpa); } return 0; }