On Wed, Nov 2, 2022 at 3:24 PM Edgecombe, Rick P <rick.p.edgecombe@xxxxxxxxx> wrote: > > On Mon, 2022-10-31 at 15:25 -0700, Song Liu wrote: > > Allocate 2MB pages up to round_up(_etext, 2MB), and register memory > > [round_up(_etext, 4kb), round_up(_etext, 2MB)] with > > register_text_tail_vm > > so that we can use this part of memory for dynamic kernel text (BPF > > programs, etc.). > > > > Here is an example: > > > > [root@eth50-1 ~]# grep _etext /proc/kallsyms > > ffffffff82202a08 T _etext > > > > [root@eth50-1 ~]# grep bpf_prog_ /proc/kallsyms | tail -n 3 > > ffffffff8220f920 t > > bpf_prog_cc61a5364ac11d93_handle__sched_wakeup [bpf] > > ffffffff8220fa28 t > > bpf_prog_cc61a5364ac11d93_handle__sched_wakeup_new [bpf] > > ffffffff8220fad4 t > > bpf_prog_3bf73fa16f5e3d92_handle__sched_switch [bpf] > > > > [root@eth50-1 ~]# grep 0xffffffff82200000 > > /sys/kernel/debug/page_tables/kernel > > 0xffffffff82200000- > > 0xffffffff82400000 2M ro PSE x pmd > > > > ffffffff82200000-ffffffff82400000 is a 2MB page, serving kernel text, > > and > > bpf programs. > > > > Signed-off-by: Song Liu <song@xxxxxxxxxx> > > --- > > arch/x86/include/asm/pgtable_64_types.h | 1 + > > arch/x86/mm/init_64.c | 4 +++- > > include/linux/vmalloc.h | 4 ++++ > > 3 files changed, 8 insertions(+), 1 deletion(-) > > > > diff --git a/arch/x86/include/asm/pgtable_64_types.h > > b/arch/x86/include/asm/pgtable_64_types.h > > index 04f36063ad54..c0f9cceb109a 100644 > > --- a/arch/x86/include/asm/pgtable_64_types.h > > +++ b/arch/x86/include/asm/pgtable_64_types.h > > @@ -101,6 +101,7 @@ extern unsigned int ptrs_per_p4d; > > #define PUD_MASK (~(PUD_SIZE - 1)) > > #define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) > > #define PGDIR_MASK (~(PGDIR_SIZE - 1)) > > +#define PMD_ALIGN(x) (((unsigned long)(x) + (PMD_SIZE - 1)) & > > PMD_MASK) > > > > /* > > * See Documentation/x86/x86_64/mm.rst for a description of the > > memory map. > > diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c > > index 3f040c6e5d13..5b42fc0c6099 100644 > > --- a/arch/x86/mm/init_64.c > > +++ b/arch/x86/mm/init_64.c > > @@ -1373,7 +1373,7 @@ void mark_rodata_ro(void) > > unsigned long start = PFN_ALIGN(_text); > > unsigned long rodata_start = PFN_ALIGN(__start_rodata); > > unsigned long end = (unsigned long)__end_rodata_hpage_align; > > - unsigned long text_end = PFN_ALIGN(_etext); > > + unsigned long text_end = PMD_ALIGN(_etext); > > unsigned long rodata_end = PFN_ALIGN(__end_rodata); > > unsigned long all_end; > > Check out is_errata93(). Right now it assumes all text is between text- > etext and MODULES_VADDR-MODULES_END. It's a quite old errata, but it > would be nice if we had a is_text_addr() helper or something. To help > keep track of the places where text might pop up. > > Speaking of which, it might be nice to update > Documentation/x86/x86_64/mm.rst with some hints that this area exists. > > > > > @@ -1414,6 +1414,8 @@ void mark_rodata_ro(void) > > (void *)rodata_end, (void *)_sdata); > > > > debug_checkwx(); > > + register_text_tail_vm(PFN_ALIGN((unsigned long)_etext), > > + PMD_ALIGN((unsigned long)_etext)); > > } > > > > int kern_addr_valid(unsigned long addr) > > diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h > > index 9b2042313c12..7365cf9c4e7f 100644 > > --- a/include/linux/vmalloc.h > > +++ b/include/linux/vmalloc.h > > @@ -132,11 +132,15 @@ extern void vm_unmap_aliases(void); > > #ifdef CONFIG_MMU > > extern void __init vmalloc_init(void); > > extern unsigned long vmalloc_nr_pages(void); > > +void register_text_tail_vm(unsigned long start, unsigned long end); > > #else > > static inline void vmalloc_init(void) > > { > > } > > static inline unsigned long vmalloc_nr_pages(void) { return 0; } > > +void register_text_tail_vm(unsigned long start, unsigned long end) > > +{ > > +} > > #endif > > This looks like it should be in the previous patch. Good catch! I will fix it in the next version. Thanks, Song > > > > > extern void *vmalloc(unsigned long size) __alloc_size(1);