This is the really low level of guest entry/exit code. Usually the Linux kernel resides in virtual memory 0xc000000000000000 to 0xffffffffffffffff. These addresses are mapped into every userspace application. When going into a 32 bit guest, this is perfectly fine. That one can't access memory that high anyways. Going into a 64 bit guest, the guest kernel probably is in the same virtual memory region as the host, so we need to switch between those two. During normal entry code we're in those virtual addresses though. So we need a small wrapper in real memory that switches from host to guest high SLB state and vice versa. To store both host and guest state in the SLB, we store guest kernel SLB entries in a different range (0x40000000000000000 - 0x7ffffffffffffffff). For details on which entries go where, please see the patch itself. Signed-off-by: Alexander Graf <agraf@xxxxxxx> --- arch/powerpc/kvm/book3s_64_slb.S | 456 ++++++++++++++++++++++++++++++++++++++ 1 files changed, 456 insertions(+), 0 deletions(-) create mode 100644 arch/powerpc/kvm/book3s_64_slb.S diff --git a/arch/powerpc/kvm/book3s_64_slb.S b/arch/powerpc/kvm/book3s_64_slb.S new file mode 100644 index 0000000..c5d2bf3 --- /dev/null +++ b/arch/powerpc/kvm/book3s_64_slb.S @@ -0,0 +1,456 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright SUSE Linux Products GmbH 2009 + * + * Authors: Alexander Graf <agraf@xxxxxxx> + */ + +/* Switch the SLB to the guest's context + * + * This function switches the SLB's contents to the guest's. This is achieved + * by switching Linux's kernel segments to unused segments and the guest's kernel + * segments from unused to kernel segments. + * + * Looking at the most significant nybble, we do the following on enty: + * + * 0 0000 + * 1 0001 + * 2 0010 + * 3 0011 + * + * 4 0100 -> c 1100 + * 5 0101 -> d 1101 + * 6 0110 -> e 1110 + * 7 0111 -> f 1111 + * + * 8 1000 + * 9 1001 + * a 1010 + * b 1011 + * + * c 1100 -> 8 1000 + * d 1101 -> 9 1001 + * e 1110 -> a 1010 + * f 1111 -> b 1011 + * + * + * That way we can run Linux as a guest, even though Linux is occupying the + * segments in question as host already. + * + * Required state: + * + * MSR = ~IR|DR + * R13 = PACA + * R0 = free + * R9 = guest IP + * R10 = guest MSR + * R11 = free + * R12 = free + * PACA[PACA_EXMC + EX_R9] = guest R9 + * PACA[PACA_EXMC + EX_R10] = guest R10 + * PACA[PACA_EXMC + EX_R11] = guest R11 + * PACA[PACA_EXMC + EX_R12] = guest R12 + * PACA[PACA_EXMC + EX_R13] = guest R13 + * PACA[PACA_EXMC + EX_CCR] = guest CR + * PACA[PACA_EXMC + EX_R3] = guest XER + * SPRG1 = guest R0 + */ + +.global kvmppc_handler_trampoline_enter +kvmppc_handler_trampoline_enter: + mtsrr0 r9 + mtsrr1 r10 + + /* Don't mess with SLB magic for 32 bit guests */ + rldicl. r10, r10, 1, 63 + beq slb_do_enter + + /* + * SLB most significant nybble + * + * 0 0000 GUEST 0000 + * 1 0001 - + * 2 0010 - + * 3 0011 - + * + * 4 0100 GUEST 1100 + * 5 0101 GUEST 1101 + * 6 0110 GUEST 1110 + * 7 0111 GUEST 1111 + * + * 8 1000 - + * 9 1001 - + * a 1010 - + * b 1011 - + * + * c 1100 HOST 1100 + * d 1101 HOST 1101 + * e 1110 HOST 1110 + * f 1111 HOST 1111 + */ + + /* Replace 11xx -> 10xx */ + + /* for (r11 = 0; r11 < slb_entries; r11++) */ + li r11, 0 +slb_loop_11xx_10xx: + + /* r10 = esid(r11) */ + slbmfee r10, r11 + /* r0 = leftmost 2 bits of esid */ + rldicl r0, r10, 2, 62 + /* esid & 0xc... == 0xc... */ + cmpwi r0, 3 + /* no? skip it then */ + bne+ slb_loop_11xx_10xx_skip + /* Skip invalid entries (V=0) */ + rldicl. r0, r10, 37, 63 + beq slb_loop_11xx_10xx_skip + /* r9 = VSID */ + slbmfev r9, r11 + /* r0 = esid & ESID_MASK */ + rldicr r0, r10, 0, 35 + /* r0 |= CLASS_BIT(VSID) */ + rldic r12, r9, 56 - 36, 36 + or r0, r0, r12 + /* slbie(r0) */ + slbie r0 + /* r0 = esid & 0xb... */ + rldicr r0, r10, 2, 62 + rotrdi r0, r0, 2 + /* r0 |= r11 */ + or r0, r0, r11 + /* slbmte(r9, r0) */ + slbmte r9, r0 +slb_loop_11xx_10xx_skip: + addi r11, r11, 1 +_GLOBAL(kvmppc_patch_slb_1) + cmpwi r11, 0 + blt slb_loop_11xx_10xx + + /* + * SLB most significant nybble + * + * 0 0000 GUEST 0000 + * 1 0001 - + * 2 0010 - + * 3 0011 - + * + * 4 0100 GUEST 1100 + * 5 0101 GUEST 1101 + * 6 0110 GUEST 1110 + * 7 0111 GUEST 1111 + * + * 8 1000 HOST 1100 + * 9 1001 HOST 1101 + * a 1010 HOST 1110 + * b 1011 HOST 1111 + * + * c 1100 - + * d 1101 - + * e 1110 - + * f 1111 - + */ + + /* Replace 01xx -> 11xx */ + + lis r12, 0x8000000000000000@highest + rldicr r12, r12, 32, 31 + li r11, 0 +slb_loop_01xx_11xx: + slbmfee r10, r11 + rldicl r0, r10, 2, 62 + cmpwi r0, 1 + bne+ slb_loop_01xx_11xx_skip + rldicl. r0, r10, 37, 63 + beq slb_loop_01xx_11xx_skip + slbmfev r9, r11 + /* r0 = esid | 0x8... */ + or r0, r12, r10 + or r0, r0, r11 + slbmte r9, r0 +slb_loop_01xx_11xx_skip: + addi r11, r11, 1 +_GLOBAL(kvmppc_patch_slb_2) + cmpwi r11, 0 + blt slb_loop_01xx_11xx + + /* + * SLB most significant nybble + * + * 0 0000 GUEST 0000 + * 1 0001 - + * 2 0010 - + * 3 0011 - + * + * 4 0100 - + * 5 0101 - + * 6 0110 - + * 7 0111 - + * + * 8 1000 HOST 1100 + * 9 1001 HOST 1101 + * a 1010 HOST 1110 + * b 1011 HOST 1111 + * + * c 1100 GUEST 1100 + * d 1101 GUEST 1101 + * e 1110 GUEST 1110 + * f 1111 GUEST 1111 + */ + +slb_do_enter: + + /* Enter guest */ + + mfspr r0, SPRN_SPRG1 + + ld r9, (PACA_EXMC+EX_R9)(r13) + ld r10, (PACA_EXMC+EX_R10)(r13) + ld r12, (PACA_EXMC+EX_R12)(r13) + + lwz r11, (PACA_EXMC+EX_CCR)(r13) + mtcr r11 + + ld r11, (PACA_EXMC+EX_R3)(r13) + mtxer r11 + + ld r11, (PACA_EXMC+EX_R11)(r13) + ld r13, (PACA_EXMC+EX_R13)(r13) + + RFI +kvmppc_handler_trampoline_enter_end: + + + +/****************************************************************************** + * * + * Exit code * + * * + *****************************************************************************/ + +.global kvmppc_handler_trampoline_exit +kvmppc_handler_trampoline_exit: + + /* Register usage at this point: + * + * SPRG0 = reserved + * SPRG1 = guest R13 + * SPRG2 = guest CR + * SPRG3 = virt. PACA + * R01 = host R1 + * R02 = host R2 + * R10 = guest PC + * R11 = guest MSR + * R12 = exit handler id + * R13 = PACA + * PACA.exmc.R9 = guest R1 + * PACA.exmc.R10 = guest R10 + * PACA.exmc.R11 = guest R11 + * PACA.exmc.R12 = guest R12 + * PACA.exmc.R13 = guest R2 + * + */ + + /* Save registers */ + + std r0, (PACA_EXMC+EX_SRR0)(r13) + std r8, (PACA_EXMC+EX_DSISR)(r13) + std r9, (PACA_EXMC+EX_R3)(r13) + std r10, (PACA_EXMC+EX_LR)(r13) + std r11, (PACA_EXMC+EX_DAR)(r13) + + /* + * In order for us to easily get the last instruction, + * we got the #vmexit at, we exploit the fact that the + * virtual layout is still the same here, so we can just + * ld from the guest's PC address + */ + + /* We only load the last instruction when it's safe */ + cmpwi r12, BOOK3S_INTERRUPT_DATA_STORAGE + beq ld_last_inst + cmpwi r12, BOOK3S_INTERRUPT_PROGRAM + beq ld_last_inst + + b no_ld_last_inst + +ld_last_inst: + /* Save off the guest instruction we're at */ + /* 1) enable paging for data */ + mfmsr r0 + ori r8, r0, MSR_DR /* Enable paging for data */ + mtmsr r8 + /* 2) fetch the instruction */ + lwz r8, 0(r10) + /* 3) disable paging again */ + mtmsr r0 + +no_ld_last_inst: + + /* Don't mess with SLB magic for 32 bit guests */ + rldicl. r11, r11, 1, 63 + beq slb_do_exit + + /* + * SLB most significant nybble + * + * 0 0000 GUEST 0000 + * 1 0001 - + * 2 0010 - + * 3 0011 - + * + * 4 0100 - + * 5 0101 - + * 6 0110 - + * 7 0111 - + * + * 8 1000 HOST 1100 + * 9 1001 HOST 1101 + * a 1010 HOST 1110 + * b 1011 HOST 1111 + * + * c 1100 GUEST 1100 + * d 1101 GUEST 1101 + * e 1110 GUEST 1110 + * f 1111 GUEST 1111 + */ + + /* Replace 11xx -> 01xx */ + + li r11, 0 +slb_loop_11xx_01xx: + + slbmfee r10, r11 + /* esid & 0xc... == 0xc... */ + rldicl r0, r10, 2, 62 + cmpwi r0, 3 + bne+ slb_loop_11xx_01xx_skip + rldicl. r0, r10, 37, 63 + beq slb_loop_11xx_01xx_skip + slbmfev r9, r11 + /* r0 = esid & ESID_MASK */ + rldicr r0, r10, 0, 35 + /* r0 |= CLASS_BIT(VSID) */ + rldimi r0, r9, 56 - 36, 36 + /* slbie(r0) */ + slbie r0 + /* r0 = esid & 0x7... */ + rldicl r0, r10, 0, 1 + or r0, r0, r11 + /* slbmte(r9, r0) */ + slbmte r9, r0 +slb_loop_11xx_01xx_skip: + addi r11, r11, 1 +_GLOBAL(kvmppc_patch_slb_3) + cmpwi r11, 0 + blt slb_loop_11xx_01xx + + /* + * SLB most significant nybble + * + * 0 0000 GUEST 0000 + * 1 0001 - + * 2 0010 - + * 3 0011 - + * + * 4 0100 GUEST 1100 + * 5 0101 GUEST 1101 + * 6 0110 GUEST 1110 + * 7 0111 GUEST 1111 + * + * 8 1000 HOST 1100 + * 9 1001 HOST 1101 + * a 1010 HOST 1110 + * b 1011 HOST 1111 + * + * c 1100 - + * d 1101 - + * e 1110 - + * f 1111 - + */ + + + /* Replace 10xx -> 11xx */ + + li r11, 0 +slb_loop_10xx_11xx: + slbmfee r10, r11 + rldicl r0, r10, 2, 62 + cmpwi r0, 2 + bne+ slb_loop_10xx_11xx_skip + rldicl. r0, r10, 37, 63 + beq slb_loop_10xx_11xx_skip + slbmfev r9, r11 + /* r0 = esid | 0x4... */ + lis r0, 0x4000000000000000@highest + rldicr r0, r0, 32, 31 + or r0, r0, r10 + or r0, r0, r11 + slbmte r9, r0 +slb_loop_10xx_11xx_skip: + addi r11, r11, 1 +_GLOBAL(kvmppc_patch_slb_4) + cmpwi r11, 0 + blt slb_loop_10xx_11xx + + /* + * SLB most significant nybble + * + * 0 0000 GUEST 0000 + * 1 0001 - + * 2 0010 - + * 3 0011 - + * + * 4 0100 GUEST 1100 + * 5 0101 GUEST 1101 + * 6 0110 GUEST 1110 + * 7 0111 GUEST 1111 + * + * 8 1000 - + * 9 1001 - + * a 1010 - + * b 1011 - + * + * c 1100 HOST 1100 + * d 1101 HOST 1101 + * e 1110 HOST 1110 + * f 1111 HOST 1111 + */ + +slb_do_exit: + + /* Restore registers */ + + ld r11, (PACA_EXMC+EX_DAR)(r13) + ld r10, (PACA_EXMC+EX_LR)(r13) + ld r9, (PACA_EXMC+EX_R3)(r13) + /* Save last inst */ + stw r8, (PACA_EXMC+EX_LR)(r13) + /* Restore on */ + ld r8, (PACA_EXMC+EX_DSISR)(r13) + + /* RFI into the highmem handler */ + mfmsr r0 + ori r0, r0, MSR_IR|MSR_DR|MSR_RI /* Enable paging */ + mtsrr1 r0 + ld r0, PACASAVEDMSR(r13) /* Highmem handler address */ + mtsrr0 r0 + + ld r0, (PACA_EXMC+EX_SRR0)(r13) + + RFI +kvmppc_handler_trampoline_exit_end: + -- 1.6.0.2 -- To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html