From: Guo Ren <guoren@xxxxxxxxxxxxxxxxx> "PBMT" - Page-Based Memory Types (see Link for detail), current it has defined 3 memory types [62:61] in PTE: - WB 00 "Cacheable 'main memory'" - NC 01 "Noncacheable 'main memory'" - IO 11 "Non-cacheable non-idempotent 'I/O'" The patch not only implements the current PBMT extension but also considers future scalability. It uses 3 words of image header to store 8 memory types' values plus a mask value. That means there are still 5 memory types reserved for future scalability. This patch does not introduce any manufacturer-defined attribute codes, because RISC-V requires manufacturers to follow the PBMT extension specification. However, if the manufacturer implements a custom PBMT code, it can be compatible by modifying image_hdr.pbmt[3] during the startup phase. The patch is trying to keep both below works together: - "Linux Keep real hardware work" (Allwinner D1 needs custom mt) - "riscv spec acceptance policy" (Svpbmt extension) Link: https://lists.riscv.org/g/tech-virt-mem/topic/simplified_latest_pbmt/83389883?p=,,,20,0,0,0::recentpostdate%2Fsticky,,,20,2,0,83389883 Signed-off-by: Guo Ren <guoren@xxxxxxxxxxxxxxxxx> Signed-off-by: Liu Shaohua <liush@xxxxxxxxxxxxxxxxx> Cc: Palmer Dabbelt <palmerdabbelt@xxxxxxxxxx> Cc: Christoph Hellwig <hch@xxxxxx> Cc: Anup Patel <anup.patel@xxxxxxx> Cc: Arnd Bergmann <arnd@xxxxxxxx> Cc: Drew Fustini <drew@xxxxxxxxxxxxxxx> Cc: Wei Fu <wefu@xxxxxxxxxx> Cc: Wei Wu <lazyparser@xxxxxxxxx> Cc: Chen-Yu Tsai <wens@xxxxxxxx> Cc: Maxime Ripard <maxime@xxxxxxxxxx> Cc: Daniel Lustig <dlustig@xxxxxxxxxx> Cc: Greg Favor <gfavor@xxxxxxxxxxxxxxxx> Cc: Andrea Mondelli <andrea.mondelli@xxxxxxxxxx> Cc: Jonathan Behrens <behrensj@xxxxxxx> Cc: Xinhaoqu (Freddie) <xinhaoqu@xxxxxxxxxx> Cc: Bill Huffman <huffman@xxxxxxxxxxx> Cc: Nick Kossifidis <mick@xxxxxxxxxxxx> Cc: Allen Baum <allen.baum@xxxxxxxxxxxxxxxxx> Cc: Josh Scheid <jscheid@xxxxxxxxxxxxxxxx> Cc: Richard Trauben <rtrauben@xxxxxxxxx> --- arch/riscv/include/asm/image.h | 6 ++-- arch/riscv/include/asm/pgtable-64.h | 8 +++-- arch/riscv/include/asm/pgtable-bits.h | 55 +++++++++++++++++++++++++++++++++-- arch/riscv/include/asm/pgtable.h | 17 +++++------ arch/riscv/kernel/head.S | 6 ++++ arch/riscv/mm/init.c | 46 +++++++++++++++++++++++++++++ 6 files changed, 119 insertions(+), 19 deletions(-) diff --git a/arch/riscv/include/asm/image.h b/arch/riscv/include/asm/image.h index e0b319a..15e13de 100644 --- a/arch/riscv/include/asm/image.h +++ b/arch/riscv/include/asm/image.h @@ -38,8 +38,7 @@ * @image_size: Effective Image size (little endian) * @flags: kernel flags (little endian) * @version: version - * @res1: reserved - * @res2: reserved + * @pbmt[3]: Page-Based Memory Types (Encode within 3 words) * @magic: Magic number (RISC-V specific; deprecated) * @magic2: Magic number 2 (to match the ARM64 'magic' field pos) * @res3: reserved (will be used for PE COFF offset) @@ -55,8 +54,7 @@ struct riscv_image_header { u64 image_size; u64 flags; u32 version; - u32 res1; - u64 res2; + u32 pbmt[3]; u64 magic; u32 magic2; u32 res3; diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h index e3b7c5d..ecf10bc 100644 --- a/arch/riscv/include/asm/pgtable-64.h +++ b/arch/riscv/include/asm/pgtable-64.h @@ -61,12 +61,14 @@ static inline void pud_clear(pud_t *pudp) static inline unsigned long pud_page_vaddr(pud_t pud) { - return (unsigned long)pfn_to_virt(pud_val(pud) >> _PAGE_PFN_SHIFT); + return (unsigned long)pfn_to_virt( + (pud_val(pud) & _PAGE_CHG_MASK) >> _PAGE_PFN_SHIFT); } static inline struct page *pud_page(pud_t pud) { - return pfn_to_page(pud_val(pud) >> _PAGE_PFN_SHIFT); + return pfn_to_page( + (pud_val(pud) & _PAGE_CHG_MASK) >> _PAGE_PFN_SHIFT); } static inline pmd_t pfn_pmd(unsigned long pfn, pgprot_t prot) @@ -76,7 +78,7 @@ static inline pmd_t pfn_pmd(unsigned long pfn, pgprot_t prot) static inline unsigned long _pmd_pfn(pmd_t pmd) { - return pmd_val(pmd) >> _PAGE_PFN_SHIFT; + return (pmd_val(pmd) & _PAGE_CHG_MASK) >> _PAGE_PFN_SHIFT; } #define pmd_ERROR(e) \ diff --git a/arch/riscv/include/asm/pgtable-bits.h b/arch/riscv/include/asm/pgtable-bits.h index 2ee4139..1aadd00 100644 --- a/arch/riscv/include/asm/pgtable-bits.h +++ b/arch/riscv/include/asm/pgtable-bits.h @@ -7,7 +7,7 @@ #define _ASM_RISCV_PGTABLE_BITS_H /* - * PTE format: + * rv32 PTE format: * | XLEN-1 10 | 9 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 * PFN reserved for SW D A G U X W R V */ @@ -24,6 +24,49 @@ #define _PAGE_DIRTY (1 << 7) /* Set by hardware on any write */ #define _PAGE_SOFT (1 << 8) /* Reserved for software */ +#ifdef CONFIG_64BIT +/* + * rv64 PTE format: + * | 63 | 62 61 | 60 54 | 53 10 | 9 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 + * N MT RSV PFN reserved for SW D A G U X W R V + * [62:61] Memory Type definitions: + * - WB: 00 + * - NC: 01 + * - IO: 11 + */ +#define _PAGE_MT_MASK (0x3 << 61) +#define _PAGE_MT_WB (0x0 << 61) +#define _PAGE_MT_NC (0x1 << 61) +#define _PAGE_MT_IO (0x2 << 61) + +/* + * Using 96 bits of image header to encode memory types. Every types occupy 10 + * bits and every word contains 3 elements. + * - word0: dma + mt[0] + mt[1] + * - word1: mt[2] + mt[3] + mt[4] + * - word2: mt[5] + mt[6] + mt[7] + */ +#define _IMG_HDR_MT_WORD0 (_PAGE_MT_MASK >> 54) | \ + (_PAGE_MT_WB >> 44) | \ + (_PAGE_MT_NC >> 34) +#define _IMG_HDR_MT_WORD1 (_PAGE_MT_IO >> 54) | \ + (0 >> 44) | \ + (0 >> 34) +#define _IMG_HDR_MT_WORD2 (0 >> 54) | \ + (0 >> 44) | \ + (0 >> 34) + +#define _PAGE_DMA_MASK __riscv_pbmt.mask +#define _PAGE_DMA_WB __riscv_pbmt.mt[0] +#define _PAGE_DMA_NC __riscv_pbmt.mt[1] +#define _PAGE_DMA_IO __riscv_pbmt.mt[2] +#else +#define _PAGE_DMA_MASK 0 +#define _PAGE_DMA_WB 0 +#define _PAGE_DMA_NC 0 +#define _PAGE_DMA_IO 0 +#endif + #define _PAGE_SPECIAL _PAGE_SOFT #define _PAGE_TABLE _PAGE_PRESENT @@ -35,10 +78,18 @@ #define _PAGE_PFN_SHIFT 10 +#ifndef __ASSEMBLY__ +extern struct __riscv_pbmt_struct { + unsigned long mask; + unsigned long mt[8]; +} __riscv_pbmt; +#endif + /* Set of bits to preserve across pte_modify() */ #define _PAGE_CHG_MASK (~(unsigned long)(_PAGE_PRESENT | _PAGE_READ | \ _PAGE_WRITE | _PAGE_EXEC | \ - _PAGE_USER | _PAGE_GLOBAL)) + _PAGE_USER | _PAGE_GLOBAL | \ + _PAGE_DMA_MASK)) /* * when all of R/W/X are zero, the PTE is a pointer to the next level * of the page table; otherwise, it is a leaf PTE. diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 46a8aa6..d59c1d3 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -116,7 +116,7 @@ #define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE) /* Page protection bits */ -#define _PAGE_BASE (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER) +#define _PAGE_BASE (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER | _PAGE_DMA_WB) #define PAGE_NONE __pgprot(_PAGE_PROT_NONE) #define PAGE_READ __pgprot(_PAGE_BASE | _PAGE_READ) @@ -137,7 +137,8 @@ | _PAGE_PRESENT \ | _PAGE_ACCESSED \ | _PAGE_DIRTY \ - | _PAGE_GLOBAL) + | _PAGE_GLOBAL \ + | _PAGE_DMA_WB) #define PAGE_KERNEL __pgprot(_PAGE_KERNEL) #define PAGE_KERNEL_READ __pgprot(_PAGE_KERNEL & ~_PAGE_WRITE) @@ -147,11 +148,7 @@ #define PAGE_TABLE __pgprot(_PAGE_TABLE) -/* - * The RISC-V ISA doesn't yet specify how to query or modify PMAs, so we can't - * change the properties of memory regions. - */ -#define _PAGE_IOREMAP _PAGE_KERNEL +#define _PAGE_IOREMAP ((_PAGE_KERNEL & ~_PAGE_DMA_MASK) | _PAGE_DMA_IO) extern pgd_t swapper_pg_dir[]; @@ -231,12 +228,12 @@ static inline unsigned long _pgd_pfn(pgd_t pgd) static inline struct page *pmd_page(pmd_t pmd) { - return pfn_to_page(pmd_val(pmd) >> _PAGE_PFN_SHIFT); + return pfn_to_page((pmd_val(pmd) & _PAGE_CHG_MASK) >> _PAGE_PFN_SHIFT); } static inline unsigned long pmd_page_vaddr(pmd_t pmd) { - return (unsigned long)pfn_to_virt(pmd_val(pmd) >> _PAGE_PFN_SHIFT); + return (unsigned long)pfn_to_virt((pmd_val(pmd) & _PAGE_CHG_MASK) >> _PAGE_PFN_SHIFT); } static inline pte_t pmd_pte(pmd_t pmd) @@ -252,7 +249,7 @@ static inline pte_t pud_pte(pud_t pud) /* Yields the page frame number (PFN) of a page table entry */ static inline unsigned long pte_pfn(pte_t pte) { - return (pte_val(pte) >> _PAGE_PFN_SHIFT); + return ((pte_val(pte) & _PAGE_CHG_MASK) >> _PAGE_PFN_SHIFT); } #define pte_page(x) pfn_to_page(pte_pfn(x)) diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 89cc58a..6037df9 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -63,8 +63,14 @@ ENTRY(_start) .dword _end - _start .dword __HEAD_FLAGS .word RISCV_HEADER_VERSION +#ifdef CONFIG_64BIT + .word _IMG_HDR_MT_WORD0 + .word _IMG_HDR_MT_WORD1 + .word _IMG_HDR_MT_WORD2 +#else .word 0 .dword 0 +#endif .ascii RISCV_IMAGE_MAGIC .balign 4 .ascii RISCV_IMAGE_MAGIC2 diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 4b55046..8c1978a 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -21,6 +21,7 @@ #include <linux/crash_dump.h> #include <asm/fixmap.h> +#include <asm/image.h> #include <asm/tlbflush.h> #include <asm/sections.h> #include <asm/soc.h> @@ -554,6 +555,27 @@ static void __init create_kernel_page_table(pgd_t *pgdir, uintptr_t map_size, bo } #endif +#ifdef CONFIG_64BIT +#define PBMT_HDR_TO_MT(val, off) ((unsigned long)((val >> off) & 0x3ff) << 54) + +static void __init setup_pbmt(void) +{ + unsigned int *pbmt = ((struct riscv_image_header *)(&_start))->pbmt; + + __riscv_pbmt.mask = PBMT_HDR_TO_MT(pbmt[0], 0); + __riscv_pbmt.mt[0] = PBMT_HDR_TO_MT(pbmt[0], 10); + __riscv_pbmt.mt[1] = PBMT_HDR_TO_MT(pbmt[0], 20); + + __riscv_pbmt.mt[2] = PBMT_HDR_TO_MT(pbmt[1], 0); + __riscv_pbmt.mt[3] = PBMT_HDR_TO_MT(pbmt[1], 10); + __riscv_pbmt.mt[4] = PBMT_HDR_TO_MT(pbmt[1], 20); + + __riscv_pbmt.mt[5] = PBMT_HDR_TO_MT(pbmt[2], 0); + __riscv_pbmt.mt[6] = PBMT_HDR_TO_MT(pbmt[2], 10); + __riscv_pbmt.mt[7] = PBMT_HDR_TO_MT(pbmt[2], 20); +} +#endif + static void __init setup_protection_map(void) { protection_map[0] = __P000; @@ -582,6 +604,9 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) pmd_t fix_bmap_spmd, fix_bmap_epmd; #endif +#ifdef CONFIG_64BIT + setup_pbmt(); +#endif setup_protection_map(); #ifdef CONFIG_XIP_KERNEL @@ -915,6 +940,19 @@ void __init paging_init(void) setup_vm_final(); } +#ifdef CONFIG_64BIT +void __init pbmt_init(void) +{ + struct device_node *cpu; + + cpu = of_find_node_by_path("/cpus"); + if (!cpu || of_property_read_bool(cpu, "pbmt-extension")) + return; + + memset(&__riscv_pbmt, 0, sizeof(__riscv_pbmt)); +} +#endif + void __init misc_mem_init(void) { early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT); @@ -925,6 +963,9 @@ void __init misc_mem_init(void) reserve_crashkernel(); #endif memblock_dump_all(); +#ifdef CONFIG_64BIT + pbmt_init(); +#endif } #ifdef CONFIG_SPARSEMEM_VMEMMAP @@ -934,3 +975,8 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, return vmemmap_populate_basepages(start, end, node, NULL); } #endif + +#ifdef CONFIG_64BIT +struct __riscv_pbmt_struct __riscv_pbmt __ro_after_init; +EXPORT_SYMBOL(__riscv_pbmt); +#endif -- 2.7.4