Test various combinations of the AC bit and reading/writing into user pages at CPL=0. One notable missing test is implicit kernel reads and writes (e.g. reading the IDT/GDT/LDT/TSS). The interesting part of this is that AC must be ignored in ring 3; the processor always behaves as if AC=0. I skipped this because QEMU doesn't emulate this correctly, and because right now there's no kvm-unit-tests infrastructure to run code in ring 3 at all. Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> --- config-x86-common.mak | 4 +- lib/x86/processor.h | 13 ++++- x86/smap.c | 156 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 171 insertions(+), 2 deletions(-) create mode 100644 x86/smap.c diff --git a/config-x86-common.mak b/config-x86-common.mak index aa5a439..93c9fee 100644 --- a/config-x86-common.mak +++ b/config-x86-common.mak @@ -37,7 +37,7 @@ tests-common = $(TEST_DIR)/vmexit.flat $(TEST_DIR)/tsc.flat \ $(TEST_DIR)/kvmclock_test.flat $(TEST_DIR)/eventinj.flat \ $(TEST_DIR)/s3.flat $(TEST_DIR)/pmu.flat \ $(TEST_DIR)/tsc_adjust.flat $(TEST_DIR)/asyncpf.flat \ - $(TEST_DIR)/init.flat + $(TEST_DIR)/init.flat $(TEST_DIR)/smap.flat ifdef API tests-common += api/api-sample @@ -101,6 +101,8 @@ $(TEST_DIR)/asyncpf.elf: $(cstart.o) $(TEST_DIR)/asyncpf.o $(TEST_DIR)/pcid.elf: $(cstart.o) $(TEST_DIR)/pcid.o +$(TEST_DIR)/smap.elf: $(cstart.o) $(TEST_DIR)/smap.o + $(TEST_DIR)/vmx.elf: $(cstart.o) $(TEST_DIR)/vmx.o $(TEST_DIR)/vmx_tests.o $(TEST_DIR)/debug.elf: $(cstart.o) $(TEST_DIR)/debug.o diff --git a/lib/x86/processor.h b/lib/x86/processor.h index 9cc1112..7fc1026 100644 --- a/lib/x86/processor.h +++ b/lib/x86/processor.h @@ -25,6 +25,7 @@ #define X86_CR4_PSE 0x00000010 #define X86_CR4_PAE 0x00000020 #define X86_CR4_PCIDE 0x00020000 +#define X86_CR4_SMAP 0x00200000 #define X86_IA32_EFER 0xc0000080 #define X86_EFER_LMA (1UL << 8) @@ -39,6 +40,16 @@ static inline void barrier(void) asm volatile ("" : : : "memory"); } +static inline void clac(void) +{ + asm volatile (".byte 0x0f, 0x01, 0xca" : : : "memory"); +} + +static inline void stac(void) +{ + asm volatile (".byte 0x0f, 0x01, 0xcb" : : : "memory"); +} + static inline u16 read_cs(void) { unsigned val; @@ -330,7 +341,7 @@ static inline void irq_enable(void) asm volatile("sti"); } -static inline void invlpg(void *va) +static inline void invlpg(volatile void *va) { asm volatile("invlpg (%0)" ::"r" (va) : "memory"); } diff --git a/x86/smap.c b/x86/smap.c new file mode 100644 index 0000000..d0b9e07 --- /dev/null +++ b/x86/smap.c @@ -0,0 +1,156 @@ +#include "libcflat.h" +#include "lib/x86/desc.h" +#include "lib/x86/processor.h" +#include "lib/x86/vm.h" + +#define X86_FEATURE_SMAP 20 +#define X86_EFLAGS_AC (1 << 18) + +volatile int pf_count = 0; +volatile int save; +volatile unsigned test; + + +// When doing ring 3 tests, page fault handlers will always run on a +// separate stack (the ring 0 stack). Seems easier to use the alt_stack +// mechanism for both ring 0 and ring 3. + +void do_pf_tss(unsigned long error_code) +{ + pf_count++; + save = test; + +#ifndef __x86_64__ + tss.eflags |= X86_EFLAGS_AC; +#endif +} + +extern void pf_tss(void); +asm ("pf_tss:\n" +#ifdef __x86_64__ + // no task on x86_64, save/restore caller-save regs + "push %rax; push %rcx; push %rdx; push %rsi; push %rdi\n" + "push %r8; push %r9; push %r10; push %r11\n" + "mov 9*8(%rsp),%rsi\n" +#endif + "call do_pf_tss\n" +#ifdef __x86_64__ + "pop %r11; pop %r10; pop %r9; pop %r8\n" + "pop %rdi; pop %rsi; pop %rdx; pop %rcx; pop %rax\n" +#endif + "add $"S", %"R "sp\n" +#ifdef __x86_64__ + "orl $" xstr(X86_EFLAGS_AC) ", 2*"S"(%"R "sp)\n" // set EFLAGS.AC and retry +#endif + "iret"W" \n\t" + "jmp pf_tss\n\t"); + + +#define USER_BASE (1 << 24) +#define USER_VAR(v) (*((__typeof__(&(v))) (((unsigned long)&v) + USER_BASE))) + +static void init_test(int i) +{ + pf_count = 0; + if (i) { + invlpg(&test); + invlpg(&USER_VAR(test)); + } +} + +int main(int ac, char **av) +{ + unsigned long i; + + if (!(cpuid_indexed(7, 0).b & (1 << X86_FEATURE_SMAP))) { + printf("SMAP not enabled, exiting\n"); + exit(1); + } + + setup_vm(); + setup_alt_stack(); + set_intr_alt_stack(14, pf_tss); + + // Map first 16MB as supervisor pages + for (i = 0; i < USER_BASE; i += PAGE_SIZE) { + *get_pte(phys_to_virt(read_cr3()), phys_to_virt(i)) &= ~PTE_USER; + invlpg((void *)i); + } + + // Present the same 16MB as user pages in the 16MB-32MB range + for (i = USER_BASE; i < 2 * USER_BASE; i += PAGE_SIZE) { + *get_pte(phys_to_virt(read_cr3()), phys_to_virt(i)) &= ~USER_BASE; + invlpg((void *)i); + } + + clac(); + write_cr4(read_cr4() | X86_CR4_SMAP); + write_cr3(read_cr3()); + + for (i = 0; i < 2; i++) { + if (i) + printf("testing with INVLPG\n"); + else + printf("testing without INVLPG\n"); + + init_test(i); + clac(); + test = 42; + report("write to supervisor page", pf_count == 0 && test == 42); + + init_test(i); + stac(); + (void)USER_VAR(test); + report("read from user page with AC=1", pf_count == 0); + + init_test(i); + clac(); + (void)USER_VAR(test); + report("read from user page with AC=0", pf_count == 1 && save == 42); + + init_test(i); + stac(); + save = 0; + USER_VAR(test) = 43; + report("write to user page with AC=1", pf_count == 0 && test == 43); + + init_test(i); + clac(); + USER_VAR(test) = 44; + report("read from user page with AC=0", pf_count == 1 && test == 44 && save == 43); + + init_test(i); + stac(); + test = -1; + asm("or $(" xstr(USER_BASE) "), %"R "sp \n" + "push $44 \n " + "decl test\n" + "and $~(" xstr(USER_BASE) "), %"R "sp \n" + "pop %"R "ax\n" + "movl %eax, test"); + report("write to user stack with AC=1", pf_count == 0 && test == 44); + + init_test(i); + clac(); + test = -1; + asm("or $(" xstr(USER_BASE) "), %"R "sp \n" + "push $45 \n " + "decl test\n" + "and $~(" xstr(USER_BASE) "), %"R "sp \n" + "pop %"R "ax\n" + "movl %eax, test"); + report("write to user stack with AC=0", pf_count == 1 && test == 45 && save == -1); + + /* This would be trapped by SMEP */ + init_test(i); + clac(); + asm("jmp 1f + "xstr(USER_BASE)" \n" + "1: jmp 2f - "xstr(USER_BASE)" \n" + "2:"); + report("executing on user page with AC=0", pf_count == 0); + } + + // TODO: implicit kernel access from ring 3 (e.g. int) + + return report_summary(); +} -- 1.8.3.1 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html