Le 20/02/2024 à 21:32, Maxwell Bland a écrit : > [Vous ne recevez pas souvent de courriers de mbland@xxxxxxxxxxxx. Découvrez pourquoi ceci est important à https://aka.ms/LearnAboutSenderIdentification ] > > While other descriptors (e.g. pud) allow allocations conditional on > which virtual address is allocated, pmd descriptor allocations do not. > However, adding support for this is straightforward and is beneficial to > future kernel development targeting the PMD memory granularity. > > As many architectures already implement pmd_populate_kernel in an > address-generic manner, it is necessary to roll out support > incrementally. For this purpose a preprocessor flag, Is it really worth it ? It is only 48 call sites that need to be updated. It would avoid that processor flag and avoid introducing that pmd_populate_kernel_at() in kernel core. $ git grep -l pmd_populate_kernel -- arch/ | wc -l 48 > __HAVE_ARCH_ADDR_COND_PMD is introduced to capture whether the > architecture supports some feature requiring PMD allocation conditional > on virtual address. Some microarchitectures (e.g. arm64) support > configurations for table descriptors, for example to enforce Privilege > eXecute Never, which benefit from knowing the virtual memory addresses > referenced by PMDs. > > Thus two major arguments in favor of this change are (1) unformity of > allocation between PMD and other table descriptor types and (2) the > capability of address-specific PMD allocation. Can you give more details on that uniformity ? I can't find any function called pud_populate_kernel(). Previously, pmd_populate_kernel() had same arguments as pmd_populate(). Why not also updating pmd_populate() to keep consistancy ? (can be done in a follow-up patch, not in this patch). > > Signed-off-by: Maxwell Bland <mbland@xxxxxxxxxxxx> > --- > include/asm-generic/pgalloc.h | 18 ++++++++++++++++++ > include/linux/mm.h | 4 ++-- > mm/hugetlb_vmemmap.c | 4 ++-- > mm/kasan/init.c | 22 +++++++++++++--------- > mm/memory.c | 4 ++-- > mm/percpu.c | 2 +- > mm/pgalloc-track.h | 3 ++- > mm/sparse-vmemmap.c | 2 +- > 8 files changed, 41 insertions(+), 18 deletions(-) > > diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h > index 879e5f8aa5e9..e5cdce77c6e4 100644 > --- a/include/asm-generic/pgalloc.h > +++ b/include/asm-generic/pgalloc.h > @@ -142,6 +142,24 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) > } > #endif > > +#ifdef __HAVE_ARCH_ADDR_COND_PMD > +static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, > + pte_t *ptep, unsigned long address); > +#else > +static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, > + pte_t *ptep); > +#endif > + > +static inline void pmd_populate_kernel_at(struct mm_struct *mm, pmd_t *pmdp, > + pte_t *ptep, unsigned long address) > +{ > +#ifdef __HAVE_ARCH_ADDR_COND_PMD > + pmd_populate_kernel(mm, pmdp, ptep, address); > +#else > + pmd_populate_kernel(mm, pmdp, ptep); > +#endif > +} > + > #ifndef __HAVE_ARCH_PMD_FREE > static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) > { > diff --git a/include/linux/mm.h b/include/linux/mm.h > index f5a97dec5169..6a9d5ded428d 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -2782,7 +2782,7 @@ static inline void mm_dec_nr_ptes(struct mm_struct *mm) {} > #endif > > int __pte_alloc(struct mm_struct *mm, pmd_t *pmd); > -int __pte_alloc_kernel(pmd_t *pmd); > +int __pte_alloc_kernel(pmd_t *pmd, unsigned long address); > > #if defined(CONFIG_MMU) > > @@ -2977,7 +2977,7 @@ pte_t *pte_offset_map_nolock(struct mm_struct *mm, pmd_t *pmd, > NULL : pte_offset_map_lock(mm, pmd, address, ptlp)) > > #define pte_alloc_kernel(pmd, address) \ > - ((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd))? \ > + ((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address)) ? \ > NULL: pte_offset_kernel(pmd, address)) > > #if USE_SPLIT_PMD_PTLOCKS > diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c > index da177e49d956..1f5664b656f1 100644 > --- a/mm/hugetlb_vmemmap.c > +++ b/mm/hugetlb_vmemmap.c > @@ -58,7 +58,7 @@ static int vmemmap_split_pmd(pmd_t *pmd, struct page *head, unsigned long start, > if (!pgtable) > return -ENOMEM; > > - pmd_populate_kernel(&init_mm, &__pmd, pgtable); > + pmd_populate_kernel_at(&init_mm, &__pmd, pgtable, addr); > > for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) { > pte_t entry, *pte; > @@ -81,7 +81,7 @@ static int vmemmap_split_pmd(pmd_t *pmd, struct page *head, unsigned long start, > > /* Make pte visible before pmd. See comment in pmd_install(). */ > smp_wmb(); > - pmd_populate_kernel(&init_mm, pmd, pgtable); > + pmd_populate_kernel_at(&init_mm, pmd, pgtable, addr); > if (!(walk->flags & VMEMMAP_SPLIT_NO_TLB_FLUSH)) > flush_tlb_kernel_range(start, start + PMD_SIZE); > } else { > diff --git a/mm/kasan/init.c b/mm/kasan/init.c > index 89895f38f722..1e31d965a14e 100644 > --- a/mm/kasan/init.c > +++ b/mm/kasan/init.c > @@ -116,8 +116,9 @@ static int __ref zero_pmd_populate(pud_t *pud, unsigned long addr, > next = pmd_addr_end(addr, end); > > if (IS_ALIGNED(addr, PMD_SIZE) && end - addr >= PMD_SIZE) { > - pmd_populate_kernel(&init_mm, pmd, > - lm_alias(kasan_early_shadow_pte)); > + pmd_populate_kernel_at(&init_mm, pmd, > + lm_alias(kasan_early_shadow_pte), > + addr); > continue; > } > > @@ -131,7 +132,7 @@ static int __ref zero_pmd_populate(pud_t *pud, unsigned long addr, > if (!p) > return -ENOMEM; > > - pmd_populate_kernel(&init_mm, pmd, p); > + pmd_populate_kernel_at(&init_mm, pmd, p, addr); > } > zero_pte_populate(pmd, addr, next); > } while (pmd++, addr = next, addr != end); > @@ -157,8 +158,9 @@ static int __ref zero_pud_populate(p4d_t *p4d, unsigned long addr, > pud_populate(&init_mm, pud, > lm_alias(kasan_early_shadow_pmd)); > pmd = pmd_offset(pud, addr); > - pmd_populate_kernel(&init_mm, pmd, > - lm_alias(kasan_early_shadow_pte)); > + pmd_populate_kernel_at(&init_mm, pmd, > + lm_alias(kasan_early_shadow_pte), > + addr); > continue; > } > > @@ -203,8 +205,9 @@ static int __ref zero_p4d_populate(pgd_t *pgd, unsigned long addr, > pud_populate(&init_mm, pud, > lm_alias(kasan_early_shadow_pmd)); > pmd = pmd_offset(pud, addr); > - pmd_populate_kernel(&init_mm, pmd, > - lm_alias(kasan_early_shadow_pte)); > + pmd_populate_kernel_at(&init_mm, pmd, > + lm_alias(kasan_early_shadow_pte), > + addr); > continue; > } > > @@ -266,8 +269,9 @@ int __ref kasan_populate_early_shadow(const void *shadow_start, > pud_populate(&init_mm, pud, > lm_alias(kasan_early_shadow_pmd)); > pmd = pmd_offset(pud, addr); > - pmd_populate_kernel(&init_mm, pmd, > - lm_alias(kasan_early_shadow_pte)); > + pmd_populate_kernel_at(&init_mm, pmd, > + lm_alias(kasan_early_shadow_pte), > + addr); > continue; > } > > diff --git a/mm/memory.c b/mm/memory.c > index 15f8b10ea17c..15702822d904 100644 > --- a/mm/memory.c > +++ b/mm/memory.c > @@ -447,7 +447,7 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd) > return 0; > } > > -int __pte_alloc_kernel(pmd_t *pmd) > +int __pte_alloc_kernel(pmd_t *pmd, unsigned long address) > { > pte_t *new = pte_alloc_one_kernel(&init_mm); > if (!new) > @@ -456,7 +456,7 @@ int __pte_alloc_kernel(pmd_t *pmd) > spin_lock(&init_mm.page_table_lock); > if (likely(pmd_none(*pmd))) { /* Has another populated it ? */ > smp_wmb(); /* See comment in pmd_install() */ > - pmd_populate_kernel(&init_mm, pmd, new); > + pmd_populate_kernel_at(&init_mm, pmd, new, address); > new = NULL; > } > spin_unlock(&init_mm.page_table_lock); > diff --git a/mm/percpu.c b/mm/percpu.c > index 4e11fc1e6def..7312e584c1b5 100644 > --- a/mm/percpu.c > +++ b/mm/percpu.c > @@ -3238,7 +3238,7 @@ void __init __weak pcpu_populate_pte(unsigned long addr) > new = memblock_alloc(PTE_TABLE_SIZE, PTE_TABLE_SIZE); > if (!new) > goto err_alloc; > - pmd_populate_kernel(&init_mm, pmd, new); > + pmd_populate_kernel_at(&init_mm, pmd, new, addr); > } > > return; > diff --git a/mm/pgalloc-track.h b/mm/pgalloc-track.h > index e9e879de8649..0984681c03d4 100644 > --- a/mm/pgalloc-track.h > +++ b/mm/pgalloc-track.h > @@ -45,7 +45,8 @@ static inline pmd_t *pmd_alloc_track(struct mm_struct *mm, pud_t *pud, > > #define pte_alloc_kernel_track(pmd, address, mask) \ > ((unlikely(pmd_none(*(pmd))) && \ > - (__pte_alloc_kernel(pmd) || ({*(mask)|=PGTBL_PMD_MODIFIED;0;})))?\ > + (__pte_alloc_kernel(pmd, address) || \ > + ({*(mask) |= PGTBL_PMD_MODIFIED; 0; }))) ? \ > NULL: pte_offset_kernel(pmd, address)) > > #endif /* _LINUX_PGALLOC_TRACK_H */ > diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c > index a2cbe44c48e1..d876cc4dc700 100644 > --- a/mm/sparse-vmemmap.c > +++ b/mm/sparse-vmemmap.c > @@ -191,7 +191,7 @@ pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node) > void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node); > if (!p) > return NULL; > - pmd_populate_kernel(&init_mm, pmd, p); > + pmd_populate_kernel_at(&init_mm, pmd, p, addr); > } > return pmd; > } > -- > 2.39.2 >