Re: [PATCH 0/5] Collected vdso/vsyscall fixes for 3.1

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Jul 27, 2011 at 11:34:21AM -0400, Andrew Lutomirski wrote:
> On Wed, Jul 27, 2011 at 11:30 AM, Konrad Rzeszutek Wilk
> <konrad.wilk@xxxxxxxxxx> wrote:
> >> > Anyhow, removed the benchmark code and ran it on 64-bit:
> >> >
> >> > sh-4.1# /test_vsyscall  test
> >> > Testing gettimeofday...
> >> > [  109.552261] test_vsyscall[2462] trap invalid opcode ip:400c8d sp:7fff84fab470 error:0 in test_vsyscall[400000+2000]
> >> > Illegal instruction
> >> > sh-4.1# /test_vsyscall  intcc
> >> > About to execute int 0xcc from RIP = 400959
> >> > [  114.137150] test_vsyscall[2463] illegal int 0xcc (exploit attempt?) ip:400959 cs:e033 sp:7fff8b328310 ax:2c si:0 di:7fff8b3280f0
> >> > Caught SIGSEGV: Segmentation fault (Signal sent by the kernel [(nil)])RIP = 400959
> >> >
> >> > [This is on git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen.git #testing, which
> >> > has todays linus/master and your patchset]
> >> >
> >>
> >> I'll set up Xen.  Something's clearly still buggy.
> >
> > You sure? This is what I get when I boot baremetal:
> >
> > sh-4.1#
> > sh-4.1# xen-detect
> > Not running on Xen.
> > sh-4.1# /test_vsyscall test
> > Testing gettimeo[   84.442819] test_vsyscall[3175] trap invalid opcode ip:400c8d sp:7fffa8a72dc0 error:0fday...
> >  in test_vsyscall[400000+2000]
> 
> $ test_vsyscall test
> Testing gettimeofday...
>   vDSO offset = 0.000001s
>   vsyscall offset = 0.000001s
> 
> Testing time...
>   vDSO offset = 0
>   vsyscall offset = 0
> Testing getcpu...
>   ok!  cpu=6 node=0
> 
> Can you send me your test_vsyscall binary so I can disassemble it?

Here it is (also including source since I uncommented parts of it).

One extra thing - I've been using AMD machines for this - I hadn't
tried this on an Intel box.

Attachment: test_vsyscall
Description: Binary data

#define _POSIX_SOURCE

#include <stdio.h>
#include <sys/time.h>
#include <time.h>
#include <stdlib.h>
#include <sys/syscall.h>
#include <unistd.h>
#include <dlfcn.h>
#include <string.h>
#include <inttypes.h>
#include <signal.h>
#include <sys/ucontext.h>
#include <asm/ldt.h>
#include <errno.h>

static inline int modify_ldt(int mode, void *ptr, unsigned long size)
{
  int ret = syscall(__NR_modify_ldt, mode, ptr, size);
  if (ret != 0)
    errno = -ret;
  return (ret == 0 ? 0 : -1);
}

/* vsyscalls and vDSO */
typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz);
const gtod_t vgtod = (gtod_t)0xffffffffff600000;
gtod_t vdso_gtod;

typedef long (*time_func_t)(time_t *t);
const time_func_t vtime = (time_func_t)0xffffffffff600400;
time_func_t vdso_time;

typedef long (*getcpu_t)(unsigned *, unsigned *, struct getcpu_cache*);
const getcpu_t vgetcpu = (getcpu_t)0xffffffffff600800;
getcpu_t vdso_getcpu;

void init_vdso()
{
  void *vdso = dlopen("linux-vdso.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
  if (!vdso) {
    printf("Warning: failed to find vDSO\n");
    return;
  }

  vdso_gtod = (gtod_t)dlsym(vdso, "gettimeofday");
  if (!vdso_gtod)
    printf("Warning: failed to find gettimeofday in vDSO\n");

  vdso_time = (time_func_t)dlsym(vdso, "time");
  if (!vdso_time)
    printf("Warning: failed to find time in vDSO\n");

  vdso_getcpu = (getcpu_t)dlsym(vdso, "getcpu");
  if (!vdso_getcpu)
    printf("Warning: failed to find getcpu in vDSO\n");
}

/* syscalls */
static inline long sys_gtod(struct timeval *tv, struct timezone *tz)
{
  return syscall(__NR_gettimeofday, tv, tz);
}

static inline long sys_time(time_t *t)
{
  return syscall(__NR_time, t);
}

/* There is no sys_getcpu. */

static void segv(int sig, siginfo_t *info, void *ctx_void)
{
  psiginfo(info, "Caught SIGSEGV");

  ucontext_t *ctx = (ucontext_t*)ctx_void;
  printf("RIP = %lx\n", ctx->uc_mcontext.gregs[REG_RIP]);

  exit(1);
}

#if 0
/* benchmark helper */
template<typename Func> void benchmark(const char *desc, Func f)
{
  struct timespec start, end;
  long loops = 0;

  printf("Benchmarking %s ... ", desc);
  fflush(stdout);

  if (clock_gettime(CLOCK_MONOTONIC, &start)) {
    perror("clock_gettime");
    exit(1);
  }

  while(true)
    {
      long loops_now = 1000;
      for(int i = 0; i < loops_now; i++)
	f();
      loops += loops_now;

      if (clock_gettime(CLOCK_MONOTONIC, &end)) {
	perror("clock_gettime");
	exit(1);
      }

      unsigned long long duration = (end.tv_nsec - start.tv_nsec) +
	1000000000ULL * (end.tv_sec - start.tv_sec);

      if (duration < 500000000ULL)
	continue;

      printf("%9ld loops in %.5fs = %7.2f nsec / loop\n",
	     loops, float(duration) * 1e-9,
	     float(duration) / loops);
      break;
    }
}
#endif
static double tv_diff(const struct timeval &a, const struct timeval &b)
{
  return double(a.tv_sec - b.tv_sec) +
    double((int)a.tv_usec - (int)b.tv_usec) * 1e-6;
}

int test(int argc, char **argv)
{
  printf("Testing gettimeofday...\n");
  struct timeval tv_sys, tv_vdso, tv_vsys;
  struct timezone tz_sys, tz_vdso, tz_vsys;
  int ret_sys = sys_gtod(&tv_sys, &tz_sys);
  int ret_vdso = -1;
  if (vdso_gtod)
    ret_vdso = vdso_gtod(&tv_vdso, &tz_vdso);
  int ret_vsys = vgtod(&tv_vsys, &tz_vsys);

  if (ret_sys) {
    printf("  syscall failed\n");
  } else {
    if (ret_vdso == 0) {
      if (tz_sys.tz_minuteswest != tz_vdso.tz_minuteswest || tz_sys.tz_dsttime != tz_vdso.tz_dsttime)
	printf("  vDSO tz mismatch\n");
      else
	printf("  vDSO offset = %.6fs\n", tv_diff(tv_vdso, tv_sys));
    } else if (vdso_gtod) {
      printf("  vDSO failed\n");
    }
    if (ret_vsys == 0) {
      if (tz_sys.tz_minuteswest != tz_vsys.tz_minuteswest || tz_sys.tz_dsttime != tz_vsys.tz_dsttime)
	printf("  vsyscall tz mismatch\n");
      else
	printf("  vsyscall offset = %.6fs\n", tv_diff(tv_vsys, tv_sys));
    }
  }

  printf("\nTesting time...\n");
  long t_sys, t_vdso = 0, t_vsys; 
  long t2_sys = -1, t2_vdso = -1, t2_vsys = -1;
  t_sys = sys_time(&t2_sys);
  if (vdso_time)
    t_vdso = vdso_time(&t2_vdso);
  t_vsys = vtime(&t2_vsys);
  if (t_sys < 0 || t_sys != t2_sys) {
    printf("  syscall failed (ret:%ld output:%ld)\n", t_sys, t2_sys);
  } else {
    if (vdso_time) {
      if (t_vdso < 0 || t_vdso != t2_vdso)
	printf("  vDSO failed (ret:%ld output:%ld)\n", t_vdso, t2_vdso);
      else
	printf("  vDSO offset = %ld\n", t_vdso - t_sys);
    }

    if (t_vsys < 0 || t_vsys != t2_vsys)
      printf("  vsyscall failed (ret:%ld output:%ld)\n", t_vsys, t2_vsys);
    else
      printf("  vsyscall offset = %ld\n", t_vsys - t_sys);
  }

  printf("Testing getcpu...\n");
  unsigned cpu_vdso, cpu_vsys, node_vdso, node_vsys;
  ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0);
  ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0);
  if (ret_vdso)
    printf("  vDSO failed (ret:%ld)\n", (unsigned long)ret_vdso);
  if (ret_vsys)
    printf("  vsyscall failed (ret:%ld)\n", (unsigned long)ret_vdso);
  if (ret_vdso == 0 && ret_vsys == 0) {
    if (cpu_vdso != cpu_vsys)
      printf("  cpu mismatch (vdso:%u vsyscall:%u)!\n", cpu_vdso, cpu_vsys);
    else if (node_vdso != node_vsys)
      printf("  node mismatch (vdso:%u vsyscall:%u)!\n", node_vdso, node_vsys);
    else
      printf("  ok!  cpu=%u node=%u\n", cpu_vdso, node_vdso);
  }

  return 0;
}

int bench(int argc, char **argv)
{
  struct timeval tv;
  struct timezone tz;
#if 0
  benchmark(" syscall gettimeofday", [&]{sys_gtod(&tv, &tz);});
  benchmark("    vdso gettimeofday", [&]{vdso_gtod(&tv, &tz);});
  benchmark("vsyscall gettimeofday", [&]{vgtod(&tv, &tz);});

  printf("\n");
  time_t t;
  benchmark(" syscall time        ", [&]{sys_time(&t);});
  if (vdso_time)
    benchmark("    vdso time        ", [&]{vdso_time(&t);});
  benchmark("vsyscall time        ", [&]{vtime(&t);});

  printf("\n");
  unsigned cpu, node;
  benchmark("    vdso getcpu      ", [&]{vdso_getcpu(&cpu, &node, 0);});
  benchmark("vsyscall getcpu      ", [&]{vgetcpu(&cpu, &node, 0);});

  printf("\n");
  benchmark("dummy syscall        ", [&]{syscall(0xffffffff);});
#endif
  return 0;
}

int call(int argc, char **argv)
{
  if (argc != 5) {
    printf("Usage: call <addr> <rax> <arg1> <arg2> <arg3>\n");
    return 1;
  }

  unsigned long addr, rax, arg1, arg2, arg3;
  char *end;
  addr = strtoull(argv[0], &end, 0);
  if (*end)
    goto bad;

  rax = strtoull(argv[1], &end, 0);
  if (*end)
    goto bad;

  arg1 = strtoull(argv[2], &end, 0);
  if (*end)
    goto bad;

  arg2 = strtoull(argv[3], &end, 0);
  if (*end)
    goto bad;

  arg3 = strtoull(argv[4], &end, 0);
  if (*end)
    goto bad;

  unsigned long ret;
  asm volatile("call *%[addr]" : "=a" (ret) : [addr] "rm" (addr), "a" (rax),
	       "D" (arg1), "S" (arg2), "d" (arg3));
  printf("Return value = %ld\n", ret);

  return 0;

 bad:
  printf("Bad arg\n");
  return 1;
}

int intcc(int argc, char **argv)
{
  if (argc != 0) {
    printf("Usage: intcc\n");
    return 1;
  }

  extern char intcc_addr;
  printf("About to execute int 0xcc from RIP = %lX\n",
	 (unsigned long)&intcc_addr);

  asm volatile ("intcc_addr: int $0xcc");
  return 0;
}

struct __attribute__((packed)) farptr {
  uint32_t offset;
  uint16_t sel;
};

static bool to_farptr(farptr *out, uint16_t sel, void *offset)
{
  out->sel = sel;
  out->offset = (uint32_t)(unsigned long)offset;
  return out->offset == (unsigned long)offset;
}

int intcc32(int argc, char **argv)
{
  if (argc != 0) {
    printf("Usage: intcc32\n");
    return 1;
  }

  // Install a 32-bit code descriptor
  struct user_desc desc;
  memset(&desc, 0, sizeof(desc));
  desc.entry_number = 0;
  desc.base_addr = 0;
  desc.limit = 0xFFFFF;
  desc.seg_32bit = 1;
  desc.contents = MODIFY_LDT_CONTENTS_CODE;
  desc.limit_in_pages = 1;

  if (modify_ldt(1, &desc, sizeof(desc)) != 0) {
    perror("modify_ldt");
    return 1;
  }

  /* Load the initial CS. */
  uint16_t initial_cs;
  asm ("mov %%cs,%[initial_cs]" : [initial_cs] "=rm" (initial_cs));
  printf("Initial CS = 0x%04X (entry %d)\n",
	 (unsigned)initial_cs, (int)(initial_cs >> 3));

  extern char landing_32, landing_64;

  /* Set up the pointers. */
  static farptr ptr32, ptr64;
  if (!to_farptr(&ptr32, 0x4, &landing_32) || !to_farptr(&ptr64, initial_cs, &landing_64)) {
    printf("Something's mapped too high\n");
    return 1;
  }

  /* Go for it! */
  asm volatile (
		"mov %%rsp,%%rsi\n"		// Save rsp (avoids truncation).
		"ljmpl *(%%eax)\n"		// Switch to 32-bit mode.

		// 32-bit mode!
		// (Well, sort of.  DS and ES are 0, so we can't use them.)
		".code32\n"
		"landing_32:\n"
		"\tint $0xcc\n"			// Try int 0xcc.
		"\tljmpl *%%cs:(%%ecx)\n"	// Switch back.

		// 64-bit mode again!
		".code64\n"
		"landing_64:\n"
		"\tmov %%rsi,%%rsp"
		:
		: "a" (&ptr32), "c" (&ptr64)
		: "rsi", "cc");

  printf("Holy cow!  We survived!\n");

  return 0;
}

int main(int argc, char **argv)
{
  struct sigaction sa_segv;
  memset(&sa_segv, 0, sizeof(sa_segv));
  sa_segv.sa_sigaction = segv;
  sa_segv.sa_flags = SA_SIGINFO;
  sigemptyset(&sa_segv.sa_mask);
  if (sigaction(SIGSEGV, &sa_segv, 0))
    perror("sigaction");

  init_vdso();
  if (argc < 2) {
    printf("Usage: test_vsyscall <command> ...\n"
	   "command := { test, bench, intcc, call }\n");
    return 1;
  }

  if (!strcmp(argv[1], "test"))
    return test(argc - 2, argv + 2);
  if (!strcmp(argv[1], "bench"))
    return bench(argc - 2, argv + 2);
  if (!strcmp(argv[1], "intcc"))
    return intcc(argc - 2, argv + 2);
  if (!strcmp(argv[1], "intcc32"))
    return intcc32(argc - 2, argv + 2);
  if (!strcmp(argv[1], "call"))
    return call(argc - 2, argv + 2);

  printf("Unknown command\n");
  return 1;
}
_______________________________________________
Virtualization mailing list
Virtualization@xxxxxxxxxxxxxxxxxxxxxxxxxx
https://lists.linux-foundation.org/mailman/listinfo/virtualization

[Index of Archives]     [KVM Development]     [Libvirt Development]     [Libvirt Users]     [CentOS Virtualization]     [Netdev]     [Ethernet Bridging]     [Linux Wireless]     [Kernel Newbies]     [Security]     [Linux for Hams]     [Netfilter]     [Bugtraq]     [Yosemite Forum]     [MIPS Linux]     [ARM Linux]     [Linux RAID]     [Linux Admin]     [Samba]

  Powered by Linux