On Thu, 19 Nov 2020 at 17:25, David Brazdil <dbrazdil@xxxxxxxxxx> wrote: > > KVM nVHE code runs under a different VA mapping than the kernel, hence > so far it relied only on PC-relative addressing to avoid accidentally > using a relocated kernel VA from a constant pool (see hyp_symbol_addr). > > So as to reduce the possibility of a programmer error, fixup the > relocated addresses instead. Let the kernel relocate them to kernel VA > first, but then iterate over them again, filter those that point to hyp > code/data and convert the kernel VA to hyp VA. > > This is done after kvm_compute_layout and before apply_alternatives. > If this is significant enough to call out, please include the reason for it. > Signed-off-by: David Brazdil <dbrazdil@xxxxxxxxxx> > --- > arch/arm64/include/asm/kvm_mmu.h | 1 + > arch/arm64/kernel/smp.c | 4 +- > arch/arm64/kvm/va_layout.c | 76 ++++++++++++++++++++++++++++++++ > 3 files changed, 80 insertions(+), 1 deletion(-) > > diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h > index 5168a0c516ae..e5226f7e4732 100644 > --- a/arch/arm64/include/asm/kvm_mmu.h > +++ b/arch/arm64/include/asm/kvm_mmu.h > @@ -105,6 +105,7 @@ alternative_cb_end > void kvm_update_va_mask(struct alt_instr *alt, > __le32 *origptr, __le32 *updptr, int nr_inst); > void kvm_compute_layout(void); > +void kvm_fixup_hyp_relocations(void); > > static __always_inline unsigned long __kern_hyp_va(unsigned long v) > { > diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c > index 18e9727d3f64..30241afc2c93 100644 > --- a/arch/arm64/kernel/smp.c > +++ b/arch/arm64/kernel/smp.c > @@ -434,8 +434,10 @@ static void __init hyp_mode_check(void) > "CPU: CPUs started in inconsistent modes"); > else > pr_info("CPU: All CPU(s) started at EL1\n"); > - if (IS_ENABLED(CONFIG_KVM)) > + if (IS_ENABLED(CONFIG_KVM)) { > kvm_compute_layout(); > + kvm_fixup_hyp_relocations(); > + } > } > > void __init smp_cpus_done(unsigned int max_cpus) > diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c > index d8cc51bd60bf..b80fab974896 100644 > --- a/arch/arm64/kvm/va_layout.c > +++ b/arch/arm64/kvm/va_layout.c > @@ -10,6 +10,7 @@ > #include <asm/alternative.h> > #include <asm/debug-monitors.h> > #include <asm/insn.h> > +#include <asm/kvm_asm.h> > #include <asm/kvm_mmu.h> > #include <asm/memory.h> > > @@ -82,6 +83,81 @@ __init void kvm_compute_layout(void) > init_hyp_physvirt_offset(); > } > > +#define __load_elf_u64(s) \ > + ({ \ > + extern u64 s; \ > + u64 val; \ > + \ > + asm ("ldr %0, =%1" : "=r"(val) : "S"(&s)); \ > + val; \ > + }) > + Do you need this to ensure that the reference is absolute? There may be more elegant ways to achieve that, using weak references for instance. Also, in the relocation startup code, I deliberately used a 32-bit quantity here, as it won't get confused for an absolute virtual address that needs relocation. > +static bool __is_within_bounds(u64 addr, char *start, char *end) > +{ > + return start <= (char*)addr && (char*)addr < end; > +} > + > +static bool __is_in_hyp_section(u64 addr) > +{ > + return __is_within_bounds(addr, __hyp_text_start, __hyp_text_end) || > + __is_within_bounds(addr, __hyp_rodata_start, __hyp_rodata_end) || > + __is_within_bounds(addr, > + CHOOSE_NVHE_SYM(__per_cpu_start), > + CHOOSE_NVHE_SYM(__per_cpu_end)); > +} > + It is slightly disappointing that we need to filter these one by one like this, but I don't think there are any guarantees about the order in which the R_AARCH64_RELATIVE entries appear. > +static void __fixup_hyp_rel(u64 addr) __init ? > +{ > + u64 *ptr, kern_va, hyp_va; > + > + /* Adjust the relocation address taken from ELF for KASLR. */ > + addr += kaslr_offset(); > + > + /* Skip addresses not in any of the hyp sections. */ > + if (!__is_in_hyp_section(addr)) > + return; > + > + /* Get the LM alias of the relocation address. */ > + ptr = (u64*)kvm_ksym_ref((void*)addr); > + > + /* > + * Read the value at the relocation address. It has already been > + * relocated to the actual kernel kimg VA. > + */ > + kern_va = (u64)kvm_ksym_ref((void*)*ptr); > + > + /* Convert to hyp VA. */ > + hyp_va = __early_kern_hyp_va(kern_va); > + > + /* Store hyp VA at the relocation address. */ > + *ptr = __early_kern_hyp_va(kern_va); > +} > + > +static void __fixup_hyp_rela(void) __init ? > +{ > + Elf64_Rela *rel; > + size_t i, n; > + > + rel = (Elf64_Rela*)(kimage_vaddr + __load_elf_u64(__rela_offset)); > + n = __load_elf_u64(__rela_size) / sizeof(*rel); > + > + for (i = 0; i < n; ++i) > + __fixup_hyp_rel(rel[i].r_offset); > +} > + > +/* > + * The kernel relocated pointers to kernel VA. Iterate over relocations in > + * the hypervisor ELF sections and convert them to hyp VA. This avoids the > + * need to only use PC-relative addressing in hyp. > + */ > +__init void kvm_fixup_hyp_relocations(void) It is more idiomatic to put the __init after the 'void', and someone is undoubtedly going to send a patch to 'fix' that if we merge it like this. > +{ > + if (!IS_ENABLED(CONFIG_RELOCATABLE) || has_vhe()) > + return; > + > + __fixup_hyp_rela(); > +} > + > static u32 compute_instruction(int n, u32 rd, u32 rn) > { > u32 insn = AARCH64_BREAK_FAULT; > -- > 2.29.2.299.gdc1121823c-goog > _______________________________________________ kvmarm mailing list kvmarm@xxxxxxxxxxxxxxxxxxxxx https://lists.cs.columbia.edu/mailman/listinfo/kvmarm