On Wed, Jul 27, 2011 at 11:34:21AM -0400, Andrew Lutomirski wrote: > On Wed, Jul 27, 2011 at 11:30 AM, Konrad Rzeszutek Wilk > <konrad.wilk@xxxxxxxxxx> wrote: > >> > Anyhow, removed the benchmark code and ran it on 64-bit: > >> > > >> > sh-4.1# /test_vsyscall test > >> > Testing gettimeofday... > >> > [ 109.552261] test_vsyscall[2462] trap invalid opcode ip:400c8d sp:7fff84fab470 error:0 in test_vsyscall[400000+2000] > >> > Illegal instruction > >> > sh-4.1# /test_vsyscall intcc > >> > About to execute int 0xcc from RIP = 400959 > >> > [ 114.137150] test_vsyscall[2463] illegal int 0xcc (exploit attempt?) ip:400959 cs:e033 sp:7fff8b328310 ax:2c si:0 di:7fff8b3280f0 > >> > Caught SIGSEGV: Segmentation fault (Signal sent by the kernel [(nil)])RIP = 400959 > >> > > >> > [This is on git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen.git #testing, which > >> > has todays linus/master and your patchset] > >> > > >> > >> I'll set up Xen. Something's clearly still buggy. > > > > You sure? This is what I get when I boot baremetal: > > > > sh-4.1# > > sh-4.1# xen-detect > > Not running on Xen. > > sh-4.1# /test_vsyscall test > > Testing gettimeo[ 84.442819] test_vsyscall[3175] trap invalid opcode ip:400c8d sp:7fffa8a72dc0 error:0fday... > > in test_vsyscall[400000+2000] > > $ test_vsyscall test > Testing gettimeofday... > vDSO offset = 0.000001s > vsyscall offset = 0.000001s > > Testing time... > vDSO offset = 0 > vsyscall offset = 0 > Testing getcpu... > ok! cpu=6 node=0 > > Can you send me your test_vsyscall binary so I can disassemble it? Here it is (also including source since I uncommented parts of it). One extra thing - I've been using AMD machines for this - I hadn't tried this on an Intel box.
Attachment:
test_vsyscall
Description: Binary data
#define _POSIX_SOURCE #include <stdio.h> #include <sys/time.h> #include <time.h> #include <stdlib.h> #include <sys/syscall.h> #include <unistd.h> #include <dlfcn.h> #include <string.h> #include <inttypes.h> #include <signal.h> #include <sys/ucontext.h> #include <asm/ldt.h> #include <errno.h> static inline int modify_ldt(int mode, void *ptr, unsigned long size) { int ret = syscall(__NR_modify_ldt, mode, ptr, size); if (ret != 0) errno = -ret; return (ret == 0 ? 0 : -1); } /* vsyscalls and vDSO */ typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz); const gtod_t vgtod = (gtod_t)0xffffffffff600000; gtod_t vdso_gtod; typedef long (*time_func_t)(time_t *t); const time_func_t vtime = (time_func_t)0xffffffffff600400; time_func_t vdso_time; typedef long (*getcpu_t)(unsigned *, unsigned *, struct getcpu_cache*); const getcpu_t vgetcpu = (getcpu_t)0xffffffffff600800; getcpu_t vdso_getcpu; void init_vdso() { void *vdso = dlopen("linux-vdso.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); if (!vdso) { printf("Warning: failed to find vDSO\n"); return; } vdso_gtod = (gtod_t)dlsym(vdso, "gettimeofday"); if (!vdso_gtod) printf("Warning: failed to find gettimeofday in vDSO\n"); vdso_time = (time_func_t)dlsym(vdso, "time"); if (!vdso_time) printf("Warning: failed to find time in vDSO\n"); vdso_getcpu = (getcpu_t)dlsym(vdso, "getcpu"); if (!vdso_getcpu) printf("Warning: failed to find getcpu in vDSO\n"); } /* syscalls */ static inline long sys_gtod(struct timeval *tv, struct timezone *tz) { return syscall(__NR_gettimeofday, tv, tz); } static inline long sys_time(time_t *t) { return syscall(__NR_time, t); } /* There is no sys_getcpu. */ static void segv(int sig, siginfo_t *info, void *ctx_void) { psiginfo(info, "Caught SIGSEGV"); ucontext_t *ctx = (ucontext_t*)ctx_void; printf("RIP = %lx\n", ctx->uc_mcontext.gregs[REG_RIP]); exit(1); } #if 0 /* benchmark helper */ template<typename Func> void benchmark(const char *desc, Func f) { struct timespec start, end; long loops = 0; printf("Benchmarking %s ... ", desc); fflush(stdout); if (clock_gettime(CLOCK_MONOTONIC, &start)) { perror("clock_gettime"); exit(1); } while(true) { long loops_now = 1000; for(int i = 0; i < loops_now; i++) f(); loops += loops_now; if (clock_gettime(CLOCK_MONOTONIC, &end)) { perror("clock_gettime"); exit(1); } unsigned long long duration = (end.tv_nsec - start.tv_nsec) + 1000000000ULL * (end.tv_sec - start.tv_sec); if (duration < 500000000ULL) continue; printf("%9ld loops in %.5fs = %7.2f nsec / loop\n", loops, float(duration) * 1e-9, float(duration) / loops); break; } } #endif static double tv_diff(const struct timeval &a, const struct timeval &b) { return double(a.tv_sec - b.tv_sec) + double((int)a.tv_usec - (int)b.tv_usec) * 1e-6; } int test(int argc, char **argv) { printf("Testing gettimeofday...\n"); struct timeval tv_sys, tv_vdso, tv_vsys; struct timezone tz_sys, tz_vdso, tz_vsys; int ret_sys = sys_gtod(&tv_sys, &tz_sys); int ret_vdso = -1; if (vdso_gtod) ret_vdso = vdso_gtod(&tv_vdso, &tz_vdso); int ret_vsys = vgtod(&tv_vsys, &tz_vsys); if (ret_sys) { printf(" syscall failed\n"); } else { if (ret_vdso == 0) { if (tz_sys.tz_minuteswest != tz_vdso.tz_minuteswest || tz_sys.tz_dsttime != tz_vdso.tz_dsttime) printf(" vDSO tz mismatch\n"); else printf(" vDSO offset = %.6fs\n", tv_diff(tv_vdso, tv_sys)); } else if (vdso_gtod) { printf(" vDSO failed\n"); } if (ret_vsys == 0) { if (tz_sys.tz_minuteswest != tz_vsys.tz_minuteswest || tz_sys.tz_dsttime != tz_vsys.tz_dsttime) printf(" vsyscall tz mismatch\n"); else printf(" vsyscall offset = %.6fs\n", tv_diff(tv_vsys, tv_sys)); } } printf("\nTesting time...\n"); long t_sys, t_vdso = 0, t_vsys; long t2_sys = -1, t2_vdso = -1, t2_vsys = -1; t_sys = sys_time(&t2_sys); if (vdso_time) t_vdso = vdso_time(&t2_vdso); t_vsys = vtime(&t2_vsys); if (t_sys < 0 || t_sys != t2_sys) { printf(" syscall failed (ret:%ld output:%ld)\n", t_sys, t2_sys); } else { if (vdso_time) { if (t_vdso < 0 || t_vdso != t2_vdso) printf(" vDSO failed (ret:%ld output:%ld)\n", t_vdso, t2_vdso); else printf(" vDSO offset = %ld\n", t_vdso - t_sys); } if (t_vsys < 0 || t_vsys != t2_vsys) printf(" vsyscall failed (ret:%ld output:%ld)\n", t_vsys, t2_vsys); else printf(" vsyscall offset = %ld\n", t_vsys - t_sys); } printf("Testing getcpu...\n"); unsigned cpu_vdso, cpu_vsys, node_vdso, node_vsys; ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0); ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0); if (ret_vdso) printf(" vDSO failed (ret:%ld)\n", (unsigned long)ret_vdso); if (ret_vsys) printf(" vsyscall failed (ret:%ld)\n", (unsigned long)ret_vdso); if (ret_vdso == 0 && ret_vsys == 0) { if (cpu_vdso != cpu_vsys) printf(" cpu mismatch (vdso:%u vsyscall:%u)!\n", cpu_vdso, cpu_vsys); else if (node_vdso != node_vsys) printf(" node mismatch (vdso:%u vsyscall:%u)!\n", node_vdso, node_vsys); else printf(" ok! cpu=%u node=%u\n", cpu_vdso, node_vdso); } return 0; } int bench(int argc, char **argv) { struct timeval tv; struct timezone tz; #if 0 benchmark(" syscall gettimeofday", [&]{sys_gtod(&tv, &tz);}); benchmark(" vdso gettimeofday", [&]{vdso_gtod(&tv, &tz);}); benchmark("vsyscall gettimeofday", [&]{vgtod(&tv, &tz);}); printf("\n"); time_t t; benchmark(" syscall time ", [&]{sys_time(&t);}); if (vdso_time) benchmark(" vdso time ", [&]{vdso_time(&t);}); benchmark("vsyscall time ", [&]{vtime(&t);}); printf("\n"); unsigned cpu, node; benchmark(" vdso getcpu ", [&]{vdso_getcpu(&cpu, &node, 0);}); benchmark("vsyscall getcpu ", [&]{vgetcpu(&cpu, &node, 0);}); printf("\n"); benchmark("dummy syscall ", [&]{syscall(0xffffffff);}); #endif return 0; } int call(int argc, char **argv) { if (argc != 5) { printf("Usage: call <addr> <rax> <arg1> <arg2> <arg3>\n"); return 1; } unsigned long addr, rax, arg1, arg2, arg3; char *end; addr = strtoull(argv[0], &end, 0); if (*end) goto bad; rax = strtoull(argv[1], &end, 0); if (*end) goto bad; arg1 = strtoull(argv[2], &end, 0); if (*end) goto bad; arg2 = strtoull(argv[3], &end, 0); if (*end) goto bad; arg3 = strtoull(argv[4], &end, 0); if (*end) goto bad; unsigned long ret; asm volatile("call *%[addr]" : "=a" (ret) : [addr] "rm" (addr), "a" (rax), "D" (arg1), "S" (arg2), "d" (arg3)); printf("Return value = %ld\n", ret); return 0; bad: printf("Bad arg\n"); return 1; } int intcc(int argc, char **argv) { if (argc != 0) { printf("Usage: intcc\n"); return 1; } extern char intcc_addr; printf("About to execute int 0xcc from RIP = %lX\n", (unsigned long)&intcc_addr); asm volatile ("intcc_addr: int $0xcc"); return 0; } struct __attribute__((packed)) farptr { uint32_t offset; uint16_t sel; }; static bool to_farptr(farptr *out, uint16_t sel, void *offset) { out->sel = sel; out->offset = (uint32_t)(unsigned long)offset; return out->offset == (unsigned long)offset; } int intcc32(int argc, char **argv) { if (argc != 0) { printf("Usage: intcc32\n"); return 1; } // Install a 32-bit code descriptor struct user_desc desc; memset(&desc, 0, sizeof(desc)); desc.entry_number = 0; desc.base_addr = 0; desc.limit = 0xFFFFF; desc.seg_32bit = 1; desc.contents = MODIFY_LDT_CONTENTS_CODE; desc.limit_in_pages = 1; if (modify_ldt(1, &desc, sizeof(desc)) != 0) { perror("modify_ldt"); return 1; } /* Load the initial CS. */ uint16_t initial_cs; asm ("mov %%cs,%[initial_cs]" : [initial_cs] "=rm" (initial_cs)); printf("Initial CS = 0x%04X (entry %d)\n", (unsigned)initial_cs, (int)(initial_cs >> 3)); extern char landing_32, landing_64; /* Set up the pointers. */ static farptr ptr32, ptr64; if (!to_farptr(&ptr32, 0x4, &landing_32) || !to_farptr(&ptr64, initial_cs, &landing_64)) { printf("Something's mapped too high\n"); return 1; } /* Go for it! */ asm volatile ( "mov %%rsp,%%rsi\n" // Save rsp (avoids truncation). "ljmpl *(%%eax)\n" // Switch to 32-bit mode. // 32-bit mode! // (Well, sort of. DS and ES are 0, so we can't use them.) ".code32\n" "landing_32:\n" "\tint $0xcc\n" // Try int 0xcc. "\tljmpl *%%cs:(%%ecx)\n" // Switch back. // 64-bit mode again! ".code64\n" "landing_64:\n" "\tmov %%rsi,%%rsp" : : "a" (&ptr32), "c" (&ptr64) : "rsi", "cc"); printf("Holy cow! We survived!\n"); return 0; } int main(int argc, char **argv) { struct sigaction sa_segv; memset(&sa_segv, 0, sizeof(sa_segv)); sa_segv.sa_sigaction = segv; sa_segv.sa_flags = SA_SIGINFO; sigemptyset(&sa_segv.sa_mask); if (sigaction(SIGSEGV, &sa_segv, 0)) perror("sigaction"); init_vdso(); if (argc < 2) { printf("Usage: test_vsyscall <command> ...\n" "command := { test, bench, intcc, call }\n"); return 1; } if (!strcmp(argv[1], "test")) return test(argc - 2, argv + 2); if (!strcmp(argv[1], "bench")) return bench(argc - 2, argv + 2); if (!strcmp(argv[1], "intcc")) return intcc(argc - 2, argv + 2); if (!strcmp(argv[1], "intcc32")) return intcc32(argc - 2, argv + 2); if (!strcmp(argv[1], "call")) return call(argc - 2, argv + 2); printf("Unknown command\n"); return 1; }
_______________________________________________ Virtualization mailing list Virtualization@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linux-foundation.org/mailman/listinfo/virtualization