Excerpts from Alexey Kardashevskiy's message of April 9, 2021 1:57 pm: > > > On 05/04/2021 11:19, Nicholas Piggin wrote: >> Almost all logic is moved to C, by introducing a new in_guest mode for >> the P9 path that branches very early in the KVM interrupt handler to >> P9 exit code. >> >> The main P9 entry and exit assembly is now only about 160 lines of low >> level stack setup and register save/restore, plus a bad-interrupt >> handler. >> >> There are two motivations for this, the first is just make the code more >> maintainable being in C. The second is to reduce the amount of code >> running in a special KVM mode, "realmode". In quotes because with radix >> it is no longer necessarily real-mode in the MMU, but it still has to be >> treated specially because it may be in real-mode, and has various >> important registers like PID, DEC, TB, etc set to guest. This is hostile >> to the rest of Linux and can't use arbitrary kernel functionality or be >> instrumented well. >> >> This initial patch is a reasonably faithful conversion of the asm code, >> but it does lack any loop to return quickly back into the guest without >> switching out of realmode in the case of unimportant or easily handled >> interrupts. As explained in previous changes, handling HV interrupts >> in real mode is not so important for P9. >> >> Use of Linux 64s interrupt entry code register conventions including >> paca EX_ save areas are brought into the KVM code. There is no point >> shuffling things into different paca save areas and making up a >> different calling convention for KVM. >> >> Signed-off-by: Nicholas Piggin <npiggin@xxxxxxxxx> >> --- >> arch/powerpc/include/asm/asm-prototypes.h | 3 +- >> arch/powerpc/include/asm/kvm_asm.h | 3 +- >> arch/powerpc/include/asm/kvm_book3s_64.h | 8 + >> arch/powerpc/include/asm/kvm_host.h | 7 +- >> arch/powerpc/kernel/security.c | 5 +- >> arch/powerpc/kvm/Makefile | 1 + >> arch/powerpc/kvm/book3s_64_entry.S | 247 ++++++++++++++++++++++ >> arch/powerpc/kvm/book3s_hv.c | 9 +- >> arch/powerpc/kvm/book3s_hv_interrupt.c | 218 +++++++++++++++++++ >> arch/powerpc/kvm/book3s_hv_rmhandlers.S | 125 +---------- >> 10 files changed, 501 insertions(+), 125 deletions(-) >> create mode 100644 arch/powerpc/kvm/book3s_hv_interrupt.c >> >> diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h >> index 939f3c94c8f3..7c74c80ed994 100644 >> --- a/arch/powerpc/include/asm/asm-prototypes.h >> +++ b/arch/powerpc/include/asm/asm-prototypes.h >> @@ -122,6 +122,7 @@ extern s32 patch__call_flush_branch_caches3; >> extern s32 patch__flush_count_cache_return; >> extern s32 patch__flush_link_stack_return; >> extern s32 patch__call_kvm_flush_link_stack; >> +extern s32 patch__call_kvm_flush_link_stack_p9; >> extern s32 patch__memset_nocache, patch__memcpy_nocache; >> >> extern long flush_branch_caches; >> @@ -142,7 +143,7 @@ void kvmhv_load_host_pmu(void); >> void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use); >> void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu); >> >> -int __kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu); >> +void kvmppc_p9_enter_guest(struct kvm_vcpu *vcpu); >> >> long kvmppc_h_set_dabr(struct kvm_vcpu *vcpu, unsigned long dabr); >> long kvmppc_h_set_xdabr(struct kvm_vcpu *vcpu, unsigned long dabr, >> diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h >> index a3633560493b..b4f9996bd331 100644 >> --- a/arch/powerpc/include/asm/kvm_asm.h >> +++ b/arch/powerpc/include/asm/kvm_asm.h >> @@ -146,7 +146,8 @@ >> #define KVM_GUEST_MODE_GUEST 1 >> #define KVM_GUEST_MODE_SKIP 2 >> #define KVM_GUEST_MODE_GUEST_HV 3 >> -#define KVM_GUEST_MODE_HOST_HV 4 >> +#define KVM_GUEST_MODE_GUEST_HV_FAST 4 /* ISA v3.0 with host radix mode */ >> +#define KVM_GUEST_MODE_HOST_HV 5 >> >> #define KVM_INST_FETCH_FAILED -1 >> >> diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h >> index 9bb9bb370b53..c214bcffb441 100644 >> --- a/arch/powerpc/include/asm/kvm_book3s_64.h >> +++ b/arch/powerpc/include/asm/kvm_book3s_64.h >> @@ -153,9 +153,17 @@ static inline bool kvmhv_vcpu_is_radix(struct kvm_vcpu *vcpu) >> return radix; >> } >> >> +int __kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu); >> + >> #define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */ >> #endif >> >> +/* >> + * Invalid HDSISR value which is used to indicate when HW has not set the reg. >> + * Used to work around an errata. >> + */ >> +#define HDSISR_CANARY 0x7fff >> + >> /* >> * We use a lock bit in HPTE dword 0 to synchronize updates and >> * accesses to each HPTE, and another bit to indicate non-present >> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h >> index 05fb00d37609..fa0083345b11 100644 >> --- a/arch/powerpc/include/asm/kvm_host.h >> +++ b/arch/powerpc/include/asm/kvm_host.h >> @@ -690,7 +690,12 @@ struct kvm_vcpu_arch { >> ulong fault_dar; >> u32 fault_dsisr; >> unsigned long intr_msr; >> - ulong fault_gpa; /* guest real address of page fault (POWER9) */ >> + /* >> + * POWER9 and later, fault_gpa contains the guest real address of page >> + * fault for a radix guest, or segment descriptor (equivalent to result >> + * from slbmfev of SLB entry that translated the EA) for hash guests. >> + */ >> + ulong fault_gpa; >> #endif >> >> #ifdef CONFIG_BOOKE >> diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c >> index e4e1a94ccf6a..3a607c11f20f 100644 >> --- a/arch/powerpc/kernel/security.c >> +++ b/arch/powerpc/kernel/security.c >> @@ -430,16 +430,19 @@ device_initcall(stf_barrier_debugfs_init); >> >> static void update_branch_cache_flush(void) >> { >> - u32 *site; >> + u32 *site, __maybe_unused *site2; >> >> #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE >> site = &patch__call_kvm_flush_link_stack; >> + site2 = &patch__call_kvm_flush_link_stack_p9; >> // This controls the branch from guest_exit_cont to kvm_flush_link_stack >> if (link_stack_flush_type == BRANCH_CACHE_FLUSH_NONE) { >> patch_instruction_site(site, ppc_inst(PPC_INST_NOP)); >> + patch_instruction_site(site2, ppc_inst(PPC_INST_NOP)); >> } else { >> // Could use HW flush, but that could also flush count cache >> patch_branch_site(site, (u64)&kvm_flush_link_stack, BRANCH_SET_LINK); >> + patch_branch_site(site2, (u64)&kvm_flush_link_stack, BRANCH_SET_LINK); >> } >> #endif >> >> diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile >> index cdd119028f64..ca7c86aa9360 100644 >> --- a/arch/powerpc/kvm/Makefile >> +++ b/arch/powerpc/kvm/Makefile >> @@ -88,6 +88,7 @@ kvm-book3s_64-builtin-tm-objs-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \ >> >> ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE >> kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \ >> + book3s_hv_interrupt.o \ >> book3s_hv_hmi.o \ >> book3s_hv_rmhandlers.o \ >> book3s_hv_rm_mmu.o \ >> diff --git a/arch/powerpc/kvm/book3s_64_entry.S b/arch/powerpc/kvm/book3s_64_entry.S >> index 0c79c89c6a4b..d98ad580fd98 100644 >> --- a/arch/powerpc/kvm/book3s_64_entry.S >> +++ b/arch/powerpc/kvm/book3s_64_entry.S >> @@ -1,11 +1,16 @@ >> /* SPDX-License-Identifier: GPL-2.0-only */ >> #include <asm/asm-offsets.h> >> #include <asm/cache.h> >> +#include <asm/code-patching-asm.h> >> #include <asm/exception-64s.h> >> +#include <asm/export.h> >> #include <asm/kvm_asm.h> >> #include <asm/kvm_book3s_asm.h> >> +#include <asm/mmu.h> >> #include <asm/ppc_asm.h> >> +#include <asm/ptrace.h> >> #include <asm/reg.h> >> +#include <asm/ultravisor-api.h> >> >> /* >> * These are branched to from interrupt handlers in exception-64s.S which set >> @@ -29,10 +34,15 @@ >> .global kvmppc_hcall >> .balign IFETCH_ALIGN_BYTES >> kvmppc_hcall: >> + lbz r10,HSTATE_IN_GUEST(r13) >> + cmpwi r10,KVM_GUEST_MODE_GUEST_HV_FAST >> + beq kvmppc_p9_exit_hcall >> ld r10,PACA_EXGEN+EX_R13(r13) >> SET_SCRATCH0(r10) >> li r10,0xc00 >> /* Now we look like kvmppc_interrupt */ >> + li r11,PACA_EXGEN >> + b 1f >> >> /* >> * KVM interrupt entry occurs after GEN_INT_ENTRY runs, and follows that >> @@ -53,6 +63,12 @@ kvmppc_hcall: >> .global kvmppc_interrupt >> .balign IFETCH_ALIGN_BYTES >> kvmppc_interrupt: >> + std r10,HSTATE_SCRATCH0(r13) >> + lbz r10,HSTATE_IN_GUEST(r13) >> + cmpwi r10,KVM_GUEST_MODE_GUEST_HV_FAST >> + beq kvmppc_p9_exit_interrupt >> + ld r10,HSTATE_SCRATCH0(r13) >> + lbz r11,HSTATE_IN_GUEST(r13) >> li r11,PACA_EXGEN >> cmpdi r10,0x200 >> bgt+ 1f >> @@ -154,3 +170,234 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) >> GET_SCRATCH0(r13) >> HRFI_TO_KERNEL >> #endif >> + >> +/* Stack frame offsets for kvmppc_hv_entry */ >> +#define SFS (144 + STACK_FRAME_MIN_SIZE) >> +#define STACK_SLOT_NVGPRS (SFS - 144) /* 18 gprs */ >> + >> +/* >> + * void kvmppc_p9_enter_guest(struct vcpu *vcpu); >> + * >> + * Enter the guest on a ISAv3.0 or later system where we have exactly >> + * one vcpu per vcore, and both the host and guest are radix, and threads >> + * are set to "indepdent mode". >> + */ >> +.balign IFETCH_ALIGN_BYTES >> +_GLOBAL(kvmppc_p9_enter_guest) >> +EXPORT_SYMBOL_GPL(kvmppc_p9_enter_guest) >> + mflr r0 >> + std r0,PPC_LR_STKOFF(r1) >> + stdu r1,-SFS(r1) >> + >> + std r1,HSTATE_HOST_R1(r13) >> + >> + mfcr r4 >> + stw r4,SFS+8(r1) >> + >> + reg = 14 >> + .rept 18 >> + std reg,STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1) >> + reg = reg + 1 >> + .endr >> + >> + ld r4,VCPU_LR(r3) >> + mtlr r4 >> + ld r4,VCPU_CTR(r3) >> + mtctr r4 >> + ld r4,VCPU_XER(r3) >> + mtspr SPRN_XER,r4 >> + >> + ld r1,VCPU_CR(r3) >> + >> +BEGIN_FTR_SECTION >> + ld r4,VCPU_CFAR(r3) >> + mtspr SPRN_CFAR,r4 >> +END_FTR_SECTION_IFSET(CPU_FTR_CFAR) >> +BEGIN_FTR_SECTION >> + ld r4,VCPU_PPR(r3) >> + mtspr SPRN_PPR,r4 >> +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) >> + >> + reg = 4 >> + .rept 28 >> + ld reg,__VCPU_GPR(reg)(r3) >> + reg = reg + 1 >> + .endr >> + >> + ld r4,VCPU_KVM(r3) >> + lbz r4,KVM_SECURE_GUEST(r4) > > > This does not compile when CONFIG_KVM_BOOK3S_HV_POSSIBLE is not defined. Thanks, I fixed that. I admittedly haven't tested all such combinations well, I'll have to script some more comprehensive compile jobs. Thanks, Nick