On Thu, Dec 01, 2016 at 06:18:10PM +1100, Nicholas Piggin wrote: > Change the calling convention to put the trap number together with > CR in two halves of r12, which frees up HSTATE_SCRATCH2 in the HV > handler, and r9 free. Cute idea! Some comments below... > The 64-bit PR handler entry translates the calling convention back > to match the previous call convention (i.e., shared with 32-bit), for > simplicity. > > Signed-off-by: Nicholas Piggin <npiggin@xxxxxxxxx> > --- > arch/powerpc/include/asm/exception-64s.h | 28 +++++++++++++++------------- > arch/powerpc/kvm/book3s_hv_rmhandlers.S | 15 +++++++-------- > arch/powerpc/kvm/book3s_segment.S | 27 ++++++++++++++++++++------- > 3 files changed, 42 insertions(+), 28 deletions(-) > > diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h > index 9a3eee6..bc8fc45 100644 > --- a/arch/powerpc/include/asm/exception-64s.h > +++ b/arch/powerpc/include/asm/exception-64s.h > @@ -233,7 +233,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) > > #endif > > -#define __KVM_HANDLER_PROLOG(area, n) \ > +#define __KVM_HANDLER(area, h, n) \ > BEGIN_FTR_SECTION_NESTED(947) \ > ld r10,area+EX_CFAR(r13); \ > std r10,HSTATE_CFAR(r13); \ > @@ -243,30 +243,32 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) > std r10,HSTATE_PPR(r13); \ > END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948); \ > ld r10,area+EX_R10(r13); \ > - stw r9,HSTATE_SCRATCH1(r13); \ > - ld r9,area+EX_R9(r13); \ > std r12,HSTATE_SCRATCH0(r13); \ > - > -#define __KVM_HANDLER(area, h, n) \ > - __KVM_HANDLER_PROLOG(area, n) \ > - li r12,n; \ > + li r12,(n); \ > + sldi r12,r12,32; \ > + or r12,r12,r9; \ Did you consider doing it the other way around, i.e. with r12 containing (cr << 32) | trap? That would save 1 instruction in each handler: + sldi r12,r9,32; \ + ori r12,r12,(n); \ > + ld r9,area+EX_R9(r13); \ > + std r9,HSTATE_SCRATCH1(r13); \ Why not put this std in kvmppc_interrupt[_hv] rather than in each handler? > b kvmppc_interrupt > > #define __KVM_HANDLER_SKIP(area, h, n) \ > cmpwi r10,KVM_GUEST_MODE_SKIP; \ > - ld r10,area+EX_R10(r13); \ > beq 89f; \ > - stw r9,HSTATE_SCRATCH1(r13); \ > BEGIN_FTR_SECTION_NESTED(948) \ > - ld r9,area+EX_PPR(r13); \ > - std r9,HSTATE_PPR(r13); \ > + ld r10,area+EX_PPR(r13); \ > + std r10,HSTATE_PPR(r13); \ > END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948); \ > - ld r9,area+EX_R9(r13); \ > + ld r10,area+EX_R10(r13); \ > std r12,HSTATE_SCRATCH0(r13); \ > - li r12,n; \ > + li r12,(n); \ > + sldi r12,r12,32; \ > + or r12,r12,r9; \ > + ld r9,area+EX_R9(r13); \ > + std r9,HSTATE_SCRATCH1(r13); \ Same comment again, of course. > b kvmppc_interrupt; \ > 89: mtocrf 0x80,r9; \ > ld r9,area+EX_R9(r13); \ > + ld r10,area+EX_R10(r13); \ > b kvmppc_skip_##h##interrupt > > #ifdef CONFIG_KVM_BOOK3S_64_HANDLER > diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S > index c3c1d1b..0536c73 100644 > --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S > +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S > @@ -1043,19 +1043,18 @@ hdec_soon: > kvmppc_interrupt_hv: > /* > * Register contents: > - * R12 = interrupt vector > + * R12 = (interrupt vector << 32) | guest CR > * R13 = PACA > - * guest CR, R12 saved in shadow VCPU SCRATCH1/0 > + * R9 = unused > + * guest R12, R9 saved in shadow VCPU SCRATCH0/1 respectively > * guest R13 saved in SPRN_SCRATCH0 > */ > - std r9, HSTATE_SCRATCH2(r13) > - > lbz r9, HSTATE_IN_GUEST(r13) > cmpwi r9, KVM_GUEST_MODE_HOST_HV > beq kvmppc_bad_host_intr > #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE > cmpwi r9, KVM_GUEST_MODE_GUEST > - ld r9, HSTATE_SCRATCH2(r13) > + ld r9, HSTATE_SCRATCH1(r13) > beq kvmppc_interrupt_pr > #endif > /* We're now back in the host but in guest MMU context */ > @@ -1075,14 +1074,13 @@ kvmppc_interrupt_hv: > std r6, VCPU_GPR(R6)(r9) > std r7, VCPU_GPR(R7)(r9) > std r8, VCPU_GPR(R8)(r9) > - ld r0, HSTATE_SCRATCH2(r13) > + ld r0, HSTATE_SCRATCH1(r13) > std r0, VCPU_GPR(R9)(r9) > std r10, VCPU_GPR(R10)(r9) > std r11, VCPU_GPR(R11)(r9) > ld r3, HSTATE_SCRATCH0(r13) > - lwz r4, HSTATE_SCRATCH1(r13) > std r3, VCPU_GPR(R12)(r9) > - stw r4, VCPU_CR(r9) > + stw r12, VCPU_CR(r9) /* CR is in the low half of r12 */ This would then need to be srdi r4, r12, 32; stw r4, VCPU_CR(r9) > BEGIN_FTR_SECTION > ld r3, HSTATE_CFAR(r13) > std r3, VCPU_CFAR(r9) > @@ -1100,6 +1098,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) > mfspr r11, SPRN_SRR1 > std r10, VCPU_SRR0(r9) > std r11, VCPU_SRR1(r9) > + srdi r12, r12, 32 /* trap is in the high half of r12 */ and this would become clrldi r12,r12,32 though arguably that's not totally necessary since we always do cmpwi/stw/lwz on r12 (but I'd feel safer with the clrldi in place). Cheers, Paul. -- To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html