When the guest accesses I/O memory this will create data abort exceptions and they are handled by decoding the HSR information (physical address, read/write, length, register) and forwarding reads and writes to QEMU which performs the device emulation. Certain classes of load/store operations do not support the syndrome information provided in the HSR and we therefore must be able to fetch the offending instruction from guest memory and decode it manually. This requires changing the general flow somewhat since new calls to run the VCPU must check if there's a pending MMIO load and perform the write after userspace has made the data available. Signed-off-by: Christoffer Dall <c.dall@xxxxxxxxxxxxxxxxxxxxxx> --- arch/arm/include/asm/kvm_arm.h | 3 arch/arm/include/asm/kvm_emulate.h | 2 arch/arm/include/asm/kvm_mmu.h | 1 arch/arm/kvm/arm.c | 6 + arch/arm/kvm/emulate.c | 273 ++++++++++++++++++++++++++++++++++++ arch/arm/kvm/mmu.c | 162 +++++++++++++++++++++ arch/arm/kvm/trace.h | 21 +++ 7 files changed, 466 insertions(+), 2 deletions(-) diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index 4cff3b7..21cb240 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -158,8 +158,11 @@ #define HSR_ISS (HSR_IL - 1) #define HSR_ISV_SHIFT (24) #define HSR_ISV (1U << HSR_ISV_SHIFT) +#define HSR_SRT_SHIFT (16) +#define HSR_SRT_MASK (0xf << HSR_SRT_SHIFT) #define HSR_FSC (0x3f) #define HSR_FSC_TYPE (0x3c) +#define HSR_SSE (1 << 21) #define HSR_WNR (1 << 6) #define HSR_CV_SHIFT (24) #define HSR_CV (1U << HSR_CV_SHIFT) diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index d914029..d899fbb 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -52,6 +52,8 @@ static inline enum vcpu_mode vcpu_mode(struct kvm_vcpu *vcpu) } int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run); +int kvm_emulate_mmio_ls(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, + unsigned long instr); void kvm_adjust_itstate(struct kvm_vcpu *vcpu); void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr); void kvm_inject_undefined(struct kvm_vcpu *vcpu); diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 11f4c3a..c3f90b0 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -38,6 +38,7 @@ void kvm_free_stage2_pgd(struct kvm *kvm); int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, phys_addr_t pa, unsigned long size); +int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run); int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run); void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu); diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 4eafdcd..31ddf56 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -565,6 +565,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) if (unlikely(!vcpu->arch.target)) return -ENOEXEC; + if (run->exit_reason == KVM_EXIT_MMIO) { + ret = kvm_handle_mmio_return(vcpu, vcpu->run); + if (ret) + return ret; + } + if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c index 93bd3e2..cc5fa89 100644 --- a/arch/arm/kvm/emulate.c +++ b/arch/arm/kvm/emulate.c @@ -132,11 +132,284 @@ u32 *vcpu_reg_mode(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode) return reg_array + vcpu_reg_offsets[mode][reg_num]; } +/****************************************************************************** + * Utility functions common for all emulation code + *****************************************************************************/ + +/* + * This one accepts a matrix where the first element is the + * bits as they must be, and the second element is the bitmask. + */ +#define INSTR_NONE -1 +static int kvm_instr_index(u32 instr, u32 table[][2], int table_entries) +{ + int i; + u32 mask; + + for (i = 0; i < table_entries; i++) { + mask = table[i][1]; + if ((table[i][0] & mask) == (instr & mask)) + return i; + } + return INSTR_NONE; +} + int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run) { return 0; } + +/****************************************************************************** + * Load-Store instruction emulation + *****************************************************************************/ + +/* + * Must be ordered with LOADS first and WRITES afterwards + * for easy distinction when doing MMIO. + */ +#define NUM_LD_INSTR 9 +enum INSTR_LS_INDEXES { + INSTR_LS_LDRBT, INSTR_LS_LDRT, INSTR_LS_LDR, INSTR_LS_LDRB, + INSTR_LS_LDRD, INSTR_LS_LDREX, INSTR_LS_LDRH, INSTR_LS_LDRSB, + INSTR_LS_LDRSH, + INSTR_LS_STRBT, INSTR_LS_STRT, INSTR_LS_STR, INSTR_LS_STRB, + INSTR_LS_STRD, INSTR_LS_STREX, INSTR_LS_STRH, + NUM_LS_INSTR +}; + +static u32 ls_instr[NUM_LS_INSTR][2] = { + {0x04700000, 0x0d700000}, /* LDRBT */ + {0x04300000, 0x0d700000}, /* LDRT */ + {0x04100000, 0x0c500000}, /* LDR */ + {0x04500000, 0x0c500000}, /* LDRB */ + {0x000000d0, 0x0e1000f0}, /* LDRD */ + {0x01900090, 0x0ff000f0}, /* LDREX */ + {0x001000b0, 0x0e1000f0}, /* LDRH */ + {0x001000d0, 0x0e1000f0}, /* LDRSB */ + {0x001000f0, 0x0e1000f0}, /* LDRSH */ + {0x04600000, 0x0d700000}, /* STRBT */ + {0x04200000, 0x0d700000}, /* STRT */ + {0x04000000, 0x0c500000}, /* STR */ + {0x04400000, 0x0c500000}, /* STRB */ + {0x000000f0, 0x0e1000f0}, /* STRD */ + {0x01800090, 0x0ff000f0}, /* STREX */ + {0x000000b0, 0x0e1000f0} /* STRH */ +}; + +static inline int get_arm_ls_instr_index(u32 instr) +{ + return kvm_instr_index(instr, ls_instr, NUM_LS_INSTR); +} + +/* + * Load-Store instruction decoding + */ +#define INSTR_LS_TYPE_BIT 26 +#define INSTR_LS_RD_MASK 0x0000f000 +#define INSTR_LS_RD_SHIFT 12 +#define INSTR_LS_RN_MASK 0x000f0000 +#define INSTR_LS_RN_SHIFT 16 +#define INSTR_LS_RM_MASK 0x0000000f +#define INSTR_LS_OFFSET12_MASK 0x00000fff + +#define INSTR_LS_BIT_P 24 +#define INSTR_LS_BIT_U 23 +#define INSTR_LS_BIT_B 22 +#define INSTR_LS_BIT_W 21 +#define INSTR_LS_BIT_L 20 +#define INSTR_LS_BIT_S 6 +#define INSTR_LS_BIT_H 5 + +/* + * ARM addressing mode defines + */ +#define OFFSET_IMM_MASK 0x0e000000 +#define OFFSET_IMM_VALUE 0x04000000 +#define OFFSET_REG_MASK 0x0e000ff0 +#define OFFSET_REG_VALUE 0x06000000 +#define OFFSET_SCALE_MASK 0x0e000010 +#define OFFSET_SCALE_VALUE 0x06000000 + +#define SCALE_SHIFT_MASK 0x000000a0 +#define SCALE_SHIFT_SHIFT 5 +#define SCALE_SHIFT_LSL 0x0 +#define SCALE_SHIFT_LSR 0x1 +#define SCALE_SHIFT_ASR 0x2 +#define SCALE_SHIFT_ROR_RRX 0x3 +#define SCALE_SHIFT_IMM_MASK 0x00000f80 +#define SCALE_SHIFT_IMM_SHIFT 6 + +#define PSR_BIT_C 29 + +static unsigned long ls_word_calc_offset(struct kvm_vcpu *vcpu, + unsigned long instr) +{ + int offset = 0; + + if ((instr & OFFSET_IMM_MASK) == OFFSET_IMM_VALUE) { + /* Immediate offset/index */ + offset = instr & INSTR_LS_OFFSET12_MASK; + + if (!(instr & (1U << INSTR_LS_BIT_U))) + offset = -offset; + } + + if ((instr & OFFSET_REG_MASK) == OFFSET_REG_VALUE) { + /* Register offset/index */ + u8 rm = instr & INSTR_LS_RM_MASK; + offset = *vcpu_reg(vcpu, rm); + + if (!(instr & (1U << INSTR_LS_BIT_P))) + offset = 0; + } + + if ((instr & OFFSET_SCALE_MASK) == OFFSET_SCALE_VALUE) { + /* Scaled register offset */ + u8 rm = instr & INSTR_LS_RM_MASK; + u8 shift = (instr & SCALE_SHIFT_MASK) >> SCALE_SHIFT_SHIFT; + u32 shift_imm = (instr & SCALE_SHIFT_IMM_MASK) + >> SCALE_SHIFT_IMM_SHIFT; + offset = *vcpu_reg(vcpu, rm); + + switch (shift) { + case SCALE_SHIFT_LSL: + offset = offset << shift_imm; + break; + case SCALE_SHIFT_LSR: + if (shift_imm == 0) + offset = 0; + else + offset = ((u32)offset) >> shift_imm; + break; + case SCALE_SHIFT_ASR: + if (shift_imm == 0) { + if (offset & (1U << 31)) + offset = 0xffffffff; + else + offset = 0; + } else { + /* Ensure arithmetic shift */ + asm("mov %[r], %[op], ASR %[s]" : + [r] "=r" (offset) : + [op] "r" (offset), [s] "r" (shift_imm)); + } + break; + case SCALE_SHIFT_ROR_RRX: + if (shift_imm == 0) { + u32 C = (vcpu->arch.regs.cpsr & + (1U << PSR_BIT_C)); + offset = (C << 31) | offset >> 1; + } else { + /* Ensure arithmetic shift */ + asm("mov %[r], %[op], ASR %[s]" : + [r] "=r" (offset) : + [op] "r" (offset), [s] "r" (shift_imm)); + } + break; + } + + if (instr & (1U << INSTR_LS_BIT_U)) + return offset; + else + return -offset; + } + + if (instr & (1U << INSTR_LS_BIT_U)) + return offset; + else + return -offset; + + BUG(); +} + +static int kvm_ls_length(struct kvm_vcpu *vcpu, u32 instr) +{ + int index; + + index = get_arm_ls_instr_index(instr); + + if (instr & (1U << INSTR_LS_TYPE_BIT)) { + /* LS word or unsigned byte */ + if (instr & (1U << INSTR_LS_BIT_B)) + return sizeof(unsigned char); + else + return sizeof(u32); + } else { + /* LS halfword, doubleword or signed byte */ + u32 H = (instr & (1U << INSTR_LS_BIT_H)); + u32 S = (instr & (1U << INSTR_LS_BIT_S)); + u32 L = (instr & (1U << INSTR_LS_BIT_L)); + + if (!L && S) { + kvm_err("WARNING: d-word for MMIO\n"); + return 2 * sizeof(u32); + } else if (L && S && !H) + return sizeof(char); + else + return sizeof(u16); + } + + BUG(); +} + +/** + * kvm_emulate_mmio_ls - emulates load/store instructions made to I/O memory + * @vcpu: The vcpu pointer + * @fault_ipa: The IPA that caused the 2nd stage fault + * @instr: The instruction that caused the fault + * + * Handles emulation of load/store instructions which cannot be emulated through + * information found in the HSR on faults. It is necessary in this case to + * simply decode the offending instruction in software and determine the + * required operands. + */ +int kvm_emulate_mmio_ls(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, + unsigned long instr) +{ + unsigned long rd, rn, offset, len; + int index; + bool is_write; + + trace_kvm_mmio_emulate(vcpu->arch.regs.pc, instr, vcpu->arch.regs.cpsr); + + index = get_arm_ls_instr_index(instr); + if (index == INSTR_NONE) { + kvm_err("Unknown load/store instruction\n"); + return -EINVAL; + } + + is_write = (index < NUM_LD_INSTR) ? false : true; + rd = (instr & INSTR_LS_RD_MASK) >> INSTR_LS_RD_SHIFT; + len = kvm_ls_length(vcpu, instr); + + vcpu->run->mmio.is_write = is_write; + vcpu->run->mmio.phys_addr = fault_ipa; + vcpu->run->mmio.len = len; + vcpu->arch.mmio_sign_extend = false; + vcpu->arch.mmio_rd = rd; + + trace_kvm_mmio((is_write) ? KVM_TRACE_MMIO_WRITE : + KVM_TRACE_MMIO_READ_UNSATISFIED, + len, fault_ipa, (is_write) ? *vcpu_reg(vcpu, rd) : 0); + + /* Handle base register writeback */ + if (!(instr & (1U << INSTR_LS_BIT_P)) || + (instr & (1U << INSTR_LS_BIT_W))) { + rn = (instr & INSTR_LS_RN_MASK) >> INSTR_LS_RN_SHIFT; + offset = ls_word_calc_offset(vcpu, instr); + *vcpu_reg(vcpu, rn) += offset; + } + + /* + * The MMIO instruction is emulated and should not be re-executed + * in the guest. + */ + kvm_skip_instr(vcpu, is_wide_instruction(instr)); + vcpu->run->exit_reason = KVM_EXIT_MMIO; + return 0; +} + /** * adjust_itstate - adjust ITSTATE when emulating instructions in IT-block * @vcpu: The VCPU pointer diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 448fbd6..3df4fa8 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -19,6 +19,7 @@ #include <linux/mman.h> #include <linux/kvm_host.h> #include <linux/io.h> +#include <trace/events/kvm.h> #include <asm/idmap.h> #include <asm/pgalloc.h> #include <asm/kvm_arm.h> @@ -26,6 +27,9 @@ #include <asm/kvm_asm.h> #include <asm/mach/map.h> #include <asm/kvm_asm.h> +#include <asm/kvm_emulate.h> + +#include "trace.h" static DEFINE_MUTEX(kvm_hyp_pgd_mutex); @@ -540,6 +544,159 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, } /** + * kvm_handle_mmio_return -- Handle MMIO loads after user space emulation + * @vcpu: The VCPU pointer + * @run: The VCPU run struct containing the mmio data + * + * This should only be called after returning from userspace for MMIO load + * emulation. + */ +int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + int *dest; + unsigned int len; + int mask; + + if (!run->mmio.is_write) { + dest = vcpu_reg(vcpu, vcpu->arch.mmio_rd); + memset(dest, 0, sizeof(int)); + + len = run->mmio.len; + if (len > 4) + return -EINVAL; + + memcpy(dest, run->mmio.data, len); + + trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr, + *((u64 *)run->mmio.data)); + + if (vcpu->arch.mmio_sign_extend && len < 4) { + mask = 1U << ((len * 8) - 1); + *dest = (*dest ^ mask) - mask; + } + } + + return 0; +} + +/** + * invalid_io_mem_abort -- Handle I/O aborts ISV bit is clear + * + * @vcpu: The vcpu pointer + * @fault_ipa: The IPA that caused the 2nd stage fault + * + * Some load/store instructions cannot be emulated using the information + * presented in the HSR, for instance, register write-back instructions are not + * supported. We therefore need to fetch the instruction, decode it, and then + * emulate its behavior. + */ +static int invalid_io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) +{ + unsigned long instr; + phys_addr_t pc_ipa; + + if (vcpu->arch.pc_ipa & 1) { + kvm_err("I/O Abort from invalid instruction address? Wrong!\n"); + return -EINVAL; + } + + if (vcpu->arch.pc_ipa & (1U << 11)) { + /* LPAE PAR format */ + /* TODO: Check if this ever happens - called from Hyp mode */ + pc_ipa = vcpu->arch.pc_ipa & PAGE_MASK & ((1ULL << 32) - 1); + } else { + /* VMSAv7 PAR format */ + pc_ipa = vcpu->arch.pc_ipa & PAGE_MASK & ((1ULL << 40) - 1); + } + pc_ipa += vcpu->arch.regs.pc & ~PAGE_MASK; + + if (vcpu->arch.regs.cpsr & PSR_T_BIT) { + /* TODO: Check validity of PC IPA and IPA2!!! */ + /* Need to decode thumb instructions as well */ + kvm_err("Thumb guest support not there yet :(\n"); + return -EINVAL; + } + + if (kvm_read_guest(vcpu->kvm, pc_ipa, &instr, sizeof(instr))) { + kvm_err("Could not copy guest instruction\n"); + return -EFAULT; + } + + return kvm_emulate_mmio_ls(vcpu, fault_ipa, instr); +} + +static int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, + phys_addr_t fault_ipa, struct kvm_memory_slot *memslot) +{ + unsigned long rd, len, instr_len; + bool is_write, sign_extend; + + if (!(vcpu->arch.hsr & HSR_ISV)) + return invalid_io_mem_abort(vcpu, fault_ipa); + + if (((vcpu->arch.hsr >> 8) & 1)) { + kvm_err("Not supported, Cache operation on I/O addr.\n"); + return -EFAULT; + } + + if ((vcpu->arch.hsr >> 7) & 1) { + kvm_err("Translation table accesses I/O memory\n"); + return -EFAULT; + } + + switch ((vcpu->arch.hsr >> 22) & 0x3) { + case 0: + len = 1; + break; + case 1: + len = 2; + break; + case 2: + len = 4; + break; + default: + kvm_err("Invalid I/O abort\n"); + return -EFAULT; + } + + is_write = vcpu->arch.hsr & HSR_WNR; + sign_extend = vcpu->arch.hsr & HSR_SSE; + rd = (vcpu->arch.hsr & HSR_SRT_MASK) >> HSR_SRT_SHIFT; + BUG_ON(rd > 15); + + if (rd == 15) { + kvm_err("I/O memory trying to read/write pc\n"); + return -EFAULT; + } + + /* Get instruction length in bytes */ + instr_len = (vcpu->arch.hsr & HSR_IL) ? 4 : 2; + + /* Export MMIO operations to user space */ + run->mmio.is_write = is_write; + run->mmio.phys_addr = fault_ipa; + run->mmio.len = len; + vcpu->arch.mmio_sign_extend = sign_extend; + vcpu->arch.mmio_rd = rd; + + trace_kvm_mmio((is_write) ? KVM_TRACE_MMIO_WRITE : + KVM_TRACE_MMIO_READ_UNSATISFIED, + len, fault_ipa, (is_write) ? *vcpu_reg(vcpu, rd) : 0); + + if (is_write) + memcpy(run->mmio.data, vcpu_reg(vcpu, rd), len); + + /* + * The MMIO instruction is emulated and should not be re-executed + * in the guest. + */ + *vcpu_pc(vcpu) += instr_len; + kvm_adjust_itstate(vcpu); + run->exit_reason = KVM_EXIT_MMIO; + return 0; +} + +/** * kvm_handle_guest_abort - handles all 2nd stage aborts * @vcpu: the VCPU pointer * @run: the kvm_run structure @@ -582,8 +739,9 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) return -EFAULT; } - kvm_pr_unimpl("I/O address abort..."); - return 0; + /* Adjust page offset */ + fault_ipa |= vcpu->arch.hdfar & ~PAGE_MASK; + return io_mem_abort(vcpu, run, fault_ipa, memslot); } memslot = gfn_to_memslot(vcpu->kvm, gfn); diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h index e474a0a..325106c 100644 --- a/arch/arm/kvm/trace.h +++ b/arch/arm/kvm/trace.h @@ -39,6 +39,27 @@ TRACE_EVENT(kvm_exit, TP_printk("PC: 0x%08lx", __entry->vcpu_pc) ); +TRACE_EVENT(kvm_mmio_emulate, + TP_PROTO(unsigned long vcpu_pc, unsigned long instr, + unsigned long cpsr), + TP_ARGS(vcpu_pc, instr, cpsr), + + TP_STRUCT__entry( + __field( unsigned long, vcpu_pc ) + __field( unsigned long, instr ) + __field( unsigned long, cpsr ) + ), + + TP_fast_assign( + __entry->vcpu_pc = vcpu_pc; + __entry->instr = instr; + __entry->cpsr = cpsr; + ), + + TP_printk("Emulate MMIO at: 0x%08lx (instr: %08lx, cpsr: %08lx)", + __entry->vcpu_pc, __entry->instr, __entry->cpsr) +); + /* Architecturally implementation defined CP15 register access */ TRACE_EVENT(kvm_emulate_cp15_imp, TP_PROTO(unsigned long Op1, unsigned long Rt1, unsigned long CRn, -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html