On 5/26/21 3:42 PM, Claudio Imbrenda wrote: > Add support for 1M and 2G pages. > > Signed-off-by: Claudio Imbrenda <imbrenda@xxxxxxxxxxxxx> > --- > lib/s390x/mmu.h | 73 +++++++++++++- > lib/s390x/mmu.c | 260 +++++++++++++++++++++++++++++++++++++++++++----- > 2 files changed, 307 insertions(+), 26 deletions(-) > > diff --git a/lib/s390x/mmu.h b/lib/s390x/mmu.h > index 603f289e..93208467 100644 > --- a/lib/s390x/mmu.h > +++ b/lib/s390x/mmu.h > @@ -10,9 +10,78 @@ > #ifndef _ASMS390X_MMU_H_ > #define _ASMS390X_MMU_H_ > > -void protect_page(void *vaddr, unsigned long prot); > +/* > + * Splits the pagetables down to the given DAT tables level. > + * Returns a pointer to the DAT table entry of the given level. > + * @pgtable root of the page table tree > + * @vaddr address whose page tables are to split > + * @level 3 (for 2GB pud), 4 (for 1 MB pmd) or 5 (for 4KB pages) > + */ > +void *split_page(pgd_t *pgtable, void *vaddr, unsigned int level); > + > +/* > + * Applies the given protection bits to the given DAT tables level, > + * splitting if necessary. > + * @pgtable root of the page table tree > + * @vaddr address whose protection bits are to be changed > + * @prot the protection bits to set > + * @level 3 (for 2GB pud), 4 (for 1MB pmd) or 5 (for 4KB pages) > + */ > +void protect_dat_entry(void *vaddr, unsigned long prot, unsigned int level); > +/* > + * Clears the given protection bits from the given DAT tables level, > + * splitting if necessary. > + * @pgtable root of the page table tree > + * @vaddr address whose protection bits are to be changed > + * @prot the protection bits to clear > + * @level 3 (for 2GB pud), 4 (for 1MB pmd) or 5 (for 4kB pages) > + */ > +void unprotect_dat_entry(void *vaddr, unsigned long prot, unsigned int level); > + > +/* > + * Applies the given protection bits to the given 4kB pages range, > + * splitting if necessary. > + * @start starting address whose protection bits are to be changed > + * @len size in bytes > + * @prot the protection bits to set > + */ > void protect_range(void *start, unsigned long len, unsigned long prot); > -void unprotect_page(void *vaddr, unsigned long prot); > +/* > + * Clears the given protection bits from the given 4kB pages range, > + * splitting if necessary. > + * @start starting address whose protection bits are to be changed > + * @len size in bytes > + * @prot the protection bits to set > + */ > void unprotect_range(void *start, unsigned long len, unsigned long prot); > > +/* Similar to install_page, maps the virtual address to the physical address > + * for the given page tables, using 1MB large pages. > + * Returns a pointer to the DAT table entry. > + * @pgtable root of the page table tree > + * @phys physical address to map, must be 1MB aligned! > + * @vaddr virtual address to map, must be 1MB aligned! > + */ > +pmdval_t *install_large_page(pgd_t *pgtable, phys_addr_t phys, void *vaddr); > + > +/* Similar to install_page, maps the virtual address to the physical address > + * for the given page tables, using 2GB huge pages. > + * Returns a pointer to the DAT table entry. > + * @pgtable root of the page table tree > + * @phys physical address to map, must be 2GB aligned! > + * @vaddr virtual address to map, must be 2GB aligned! > + */ > +pudval_t *install_huge_page(pgd_t *pgtable, phys_addr_t phys, void *vaddr); > + > +static inline void protect_page(void *vaddr, unsigned long prot) > +{ > + protect_dat_entry(vaddr, prot, 5); > +} > + > +static inline void unprotect_page(void *vaddr, unsigned long prot) > +{ > + unprotect_dat_entry(vaddr, prot, 5); > +} \n > +void *get_dat_entry(pgd_t *pgtable, void *vaddr, unsigned int level); > + > #endif /* _ASMS390X_MMU_H_ */ > diff --git a/lib/s390x/mmu.c b/lib/s390x/mmu.c > index 5c517366..def91334 100644 > --- a/lib/s390x/mmu.c > +++ b/lib/s390x/mmu.c > @@ -15,6 +15,18 @@ > #include <vmalloc.h> > #include "mmu.h" > > +/* > + * The naming convention used here is the same as used in the Linux kernel, > + * and this is the corrispondence between the s390x architectural names and corresponds > + * the Linux ones: > + * > + * pgd - region 1 table entry > + * p4d - region 2 table entry > + * pud - region 3 table entry > + * pmd - segment table entry > + * pte - page table entry > + */ > + > static pgd_t *table_root; > > void configure_dat(int enable) > @@ -46,54 +58,254 @@ static void mmu_enable(pgd_t *pgtable) > lc->pgm_new_psw.mask |= PSW_MASK_DAT; > } > > -static pteval_t *get_pte(pgd_t *pgtable, uintptr_t vaddr) > +/* > + * Get the pud (region 3) DAT table entry for the given address and root, > + * allocating it if necessary > + */ > +static inline pud_t *get_pud(pgd_t *pgtable, uintptr_t vaddr) > { > pgd_t *pgd = pgd_offset(pgtable, vaddr); > p4d_t *p4d = p4d_alloc(pgd, vaddr); > pud_t *pud = pud_alloc(p4d, vaddr); > - pmd_t *pmd = pmd_alloc(pud, vaddr); > - pte_t *pte = pte_alloc(pmd, vaddr); > > - return &pte_val(*pte); > + return pud; > +} > + > +/* > + * Get the pmd (segment) DAT table entry for the given address and pud, > + * allocating it if necessary. > + * The pud must not be huge. > + */ > +static inline pmd_t *get_pmd(pud_t *pud, uintptr_t vaddr) > +{ > + pmd_t *pmd; > + > + assert(!pud_huge(*pud)); > + pmd = pmd_alloc(pud, vaddr); Don't we have the *_alloc_map() functions in the kernel whic either map or allocate? I'd prefer that naming over *_alloc() if you also map if already allocated. > + return pmd; > +} > + > +/* > + * Get the pte (page) DAT table entry for the given address and pmd, > + * allocating it if necessary. > + * The pmd must not be large. > + */ > +static inline pte_t *get_pte(pmd_t *pmd, uintptr_t vaddr) > +{ > + pte_t *pte; > + > + assert(!pmd_large(*pmd)); > + pte = pte_alloc(pmd, vaddr); > + return pte; > +} > + > +/* > + * Splits a large pmd (segment) DAT table entry into equivalent 4kB small > + * pages. > + * @pmd The pmd to split, it must be large. > + * @va the virtual address corresponding to this pmd. > + */ > +static void split_pmd(pmd_t *pmd, uintptr_t va) > +{ > + phys_addr_t pa = pmd_val(*pmd) & SEGMENT_ENTRY_SFAA; > + unsigned long i; > + pte_t *pte; > + > + assert(pmd_large(*pmd)); > + pte = alloc_pages(PAGE_TABLE_ORDER); > + for (i = 0; i < PAGE_TABLE_ENTRIES; i++) > + pte_val(pte[i]) = pa | PAGE_SIZE * i; > + idte_pmdp(va, &pmd_val(*pmd)); > + pmd_val(*pmd) = __pa(pte) | SEGMENT_ENTRY_TT_SEGMENT; Equivalent would mean we carry over protection, no? > + > +} > + > +/* > + * Splits a huge pud (region 3) DAT table entry into equivalent 1MB large > + * pages. > + * @pud The pud to split, it must be huge. > + * @va the virtual address corresponding to this pud. > + */ > +static void split_pud(pud_t *pud, uintptr_t va) > +{ > + phys_addr_t pa = pud_val(*pud) & REGION3_ENTRY_RFAA; > + unsigned long i; > + pmd_t *pmd; > + > + assert(pud_huge(*pud)); > + pmd = alloc_pages(SEGMENT_TABLE_ORDER); > + for (i = 0; i < SEGMENT_TABLE_ENTRIES; i++) > + pmd_val(pmd[i]) = pa | SZ_1M * i | SEGMENT_ENTRY_FC | SEGMENT_ENTRY_TT_SEGMENT; > + idte_pudp(va, &pud_val(*pud)); > + pud_val(*pud) = __pa(pmd) | REGION_ENTRY_TT_REGION3 | REGION_TABLE_LENGTH; > +}