Re: [PATCH] mm: Enforce a minimal stack gap even against inaccessible VMAs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Oct 8, 2024 at 1:40 PM Lorenzo Stoakes
<lorenzo.stoakes@xxxxxxxxxx> wrote:
> This is touching mm/mmap.c, please ensure to cc- the reviewers (me and
> Liam, I have cc'd Liam here) as listed in MAINTAINERS when submitting
> patches for this file.

Ah, my bad, sorry about that.

> Also this seems like a really speculative 'please discuss' change so should
> be an RFC imo.
>
> On Tue, Oct 08, 2024 at 12:55:39AM +0200, Jann Horn wrote:
> > As explained in the comment block this change adds, we can't tell what
> > userspace's intent is when the stack grows towards an inaccessible VMA.
> >
> > I have a (highly contrived) C testcase for 32-bit x86 userspace with glibc
> > that mixes malloc(), pthread creation, and recursion in just the right way
> > such that the main stack overflows into malloc() arena memory.
> > (Let me know if you want me to share that.)
>
> You are claiming a fixes from 2017 and cc'ing stable on a non-RFC so
> yeah... we're going to need more than your word for it :) we will want to
> be really sure this is a thing before we backport to basically every
> stable kernel.
>
> Surely this isn't that hard to demonstrate though? You mmap something
> PROT_NONE just stack gap below the stack, then intentionally extend stack
> to it, before mprotect()'ing the PROT_NONE region?

I've attached my test case that demonstrates this using basically only
malloc, free, pthread_create() and recursion, plus a bunch of ugly
read-only gunk and synchronization. It assumes that it runs under
glibc, as a 32-bit x86 binary. Usage:

$ clang -O2 -fstack-check -m32 -o grow-32 grow-32.c -pthread -ggdb &&
for i in {0..10}; do ./grow-32; done
corrupted thread_obj2 at depth 190528
corrupted thread_obj2 at depth 159517
corrupted thread_obj2 at depth 209777
corrupted thread_obj2 at depth 200119
corrupted thread_obj2 at depth 208093
corrupted thread_obj2 at depth 167705
corrupted thread_obj2 at depth 234523
corrupted thread_obj2 at depth 174528
corrupted thread_obj2 at depth 223823
corrupted thread_obj2 at depth 199816
grow-32: malloc failed: Cannot allocate memory

This demonstrates that it is possible for a userspace program that is
just using basic libc functionality, and whose only bug is unbounded
recursion, to corrupt memory despite being built with -fstack-check.

As you said, to just demonstrate the core issue in a more contrived
way, you can also use a simpler example:

$ cat basic-grow-repro.c
#include <err.h>
#include <stdlib.h>
#include <sys/mman.h>

#define STACK_POINTER() ({ void *__stack; asm volatile("mov %%rsp,
%0":"=r"(__stack)); __stack; })

int main(void) {
  char *ptr = (char*)(  (unsigned long)(STACK_POINTER() -
(1024*1024*4)/*4MiB*/) & ~0xfffUL  );
  if (mmap(ptr, 0x1000, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0) != ptr)
    err(1, "mmap");
  *(volatile char *)(ptr + 0x1000); /* expand stack */
  if (mprotect(ptr, 0x1000, PROT_READ|PROT_WRITE|PROT_EXEC))
    err(1, "mprotect");
  system("cat /proc/$PPID/maps");
}
$ gcc -o basic-grow-repro basic-grow-repro.c
$ ./basic-grow-repro
5600a0fef000-5600a0ff0000 r--p 00000000 fd:01 28313737
  [...]/basic-grow-repro
5600a0ff0000-5600a0ff1000 r-xp 00001000 fd:01 28313737
  [...]/basic-grow-repro
5600a0ff1000-5600a0ff2000 r--p 00002000 fd:01 28313737
  [...]/basic-grow-repro
5600a0ff2000-5600a0ff3000 r--p 00002000 fd:01 28313737
  [...]/basic-grow-repro
5600a0ff3000-5600a0ff4000 rw-p 00003000 fd:01 28313737
  [...]/basic-grow-repro
7f9a88553000-7f9a88556000 rw-p 00000000 00:00 0
7f9a88556000-7f9a8857c000 r--p 00000000 fd:01 3417714
  /usr/lib/x86_64-linux-gnu/libc.so.6
7f9a8857c000-7f9a886d2000 r-xp 00026000 fd:01 3417714
  /usr/lib/x86_64-linux-gnu/libc.so.6
7f9a886d2000-7f9a88727000 r--p 0017c000 fd:01 3417714
  /usr/lib/x86_64-linux-gnu/libc.so.6
7f9a88727000-7f9a8872b000 r--p 001d0000 fd:01 3417714
  /usr/lib/x86_64-linux-gnu/libc.so.6
7f9a8872b000-7f9a8872d000 rw-p 001d4000 fd:01 3417714
  /usr/lib/x86_64-linux-gnu/libc.so.6
7f9a8872d000-7f9a8873a000 rw-p 00000000 00:00 0
7f9a88754000-7f9a88756000 rw-p 00000000 00:00 0
7f9a88756000-7f9a8875a000 r--p 00000000 00:00 0                          [vvar]
7f9a8875a000-7f9a8875c000 r-xp 00000000 00:00 0                          [vdso]
7f9a8875c000-7f9a8875d000 r--p 00000000 fd:01 3409055
  /usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2
7f9a8875d000-7f9a88782000 r-xp 00001000 fd:01 3409055
  /usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2
7f9a88782000-7f9a8878c000 r--p 00026000 fd:01 3409055
  /usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2
7f9a8878c000-7f9a8878e000 r--p 00030000 fd:01 3409055
  /usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2
7f9a8878e000-7f9a88790000 rw-p 00032000 fd:01 3409055
  /usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2
7fff84664000-7fff84665000 rwxp 00000000 00:00 0
7fff84665000-7fff84a67000 rw-p 00000000 00:00 0                          [stack]
$


Though, while writing the above reproducer, I noticed another dodgy
scenario regarding the stack gap: MAP_FIXED_NOREPLACE apparently
ignores the stack guard region, because it only checks for VMA
intersection, see this example:

$ cat basic-grow-repro-ohno.c
#include <err.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/mman.h>

#define STACK_POINTER() ({ void *__stack; asm volatile("mov %%rsp,
%0":"=r"(__stack)); __stack; })

int main(void) {
  setbuf(stdout, NULL);
  char *ptr = (char*)(  (unsigned long)(STACK_POINTER() -
(1024*1024*4)/*4MiB*/) & ~0xfffUL  );
  *(volatile char *)(ptr + 0x1000); /* expand stack to just above ptr */

  printf("trying to map at %p\n", ptr);
  system("cat /proc/$PPID/maps;echo");
  if (mmap(ptr, 0x1000, PROT_READ|PROT_WRITE|PROT_EXEC,
MAP_FIXED_NOREPLACE|MAP_ANONYMOUS|MAP_PRIVATE, -1, 0) != ptr)
    err(1, "mmap");
  system("cat /proc/$PPID/maps");
}
$ gcc -o basic-grow-repro-ohno basic-grow-repro-ohno.c
$ ./basic-grow-repro-ohno
trying to map at 0x7ffc344ca000
560ee371d000-560ee371e000 r--p 00000000 fd:01 28313842
  [...]/basic-grow-repro-ohno
560ee371e000-560ee371f000 r-xp 00001000 fd:01 28313842
  [...]/basic-grow-repro-ohno
560ee371f000-560ee3720000 r--p 00002000 fd:01 28313842
  [...]/basic-grow-repro-ohno
560ee3720000-560ee3721000 r--p 00002000 fd:01 28313842
  [...]/basic-grow-repro-ohno
560ee3721000-560ee3722000 rw-p 00003000 fd:01 28313842
  [...]/basic-grow-repro-ohno
7f0d636ed000-7f0d636f0000 rw-p 00000000 00:00 0
7f0d636f0000-7f0d63716000 r--p 00000000 fd:01 3417714
  /usr/lib/x86_64-linux-gnu/libc.so.6
7f0d63716000-7f0d6386c000 r-xp 00026000 fd:01 3417714
  /usr/lib/x86_64-linux-gnu/libc.so.6
7f0d6386c000-7f0d638c1000 r--p 0017c000 fd:01 3417714
  /usr/lib/x86_64-linux-gnu/libc.so.6
7f0d638c1000-7f0d638c5000 r--p 001d0000 fd:01 3417714
  /usr/lib/x86_64-linux-gnu/libc.so.6
7f0d638c5000-7f0d638c7000 rw-p 001d4000 fd:01 3417714
  /usr/lib/x86_64-linux-gnu/libc.so.6
7f0d638c7000-7f0d638d4000 rw-p 00000000 00:00 0
7f0d638ee000-7f0d638f0000 rw-p 00000000 00:00 0
7f0d638f0000-7f0d638f4000 r--p 00000000 00:00 0                          [vvar]
7f0d638f4000-7f0d638f6000 r-xp 00000000 00:00 0                          [vdso]
7f0d638f6000-7f0d638f7000 r--p 00000000 fd:01 3409055
  /usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2
7f0d638f7000-7f0d6391c000 r-xp 00001000 fd:01 3409055
  /usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2
7f0d6391c000-7f0d63926000 r--p 00026000 fd:01 3409055
  /usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2
7f0d63926000-7f0d63928000 r--p 00030000 fd:01 3409055
  /usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2
7f0d63928000-7f0d6392a000 rw-p 00032000 fd:01 3409055
  /usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2
7ffc344cb000-7ffc348cd000 rw-p 00000000 00:00 0                          [stack]

560ee371d000-560ee371e000 r--p 00000000 fd:01 28313842
  [...]/basic-grow-repro-ohno
560ee371e000-560ee371f000 r-xp 00001000 fd:01 28313842
  [...]/basic-grow-repro-ohno
560ee371f000-560ee3720000 r--p 00002000 fd:01 28313842
  [...]/basic-grow-repro-ohno
560ee3720000-560ee3721000 r--p 00002000 fd:01 28313842
  [...]/basic-grow-repro-ohno
560ee3721000-560ee3722000 rw-p 00003000 fd:01 28313842
  [...]/basic-grow-repro-ohno
7f0d636ed000-7f0d636f0000 rw-p 00000000 00:00 0
7f0d636f0000-7f0d63716000 r--p 00000000 fd:01 3417714
  /usr/lib/x86_64-linux-gnu/libc.so.6
7f0d63716000-7f0d6386c000 r-xp 00026000 fd:01 3417714
  /usr/lib/x86_64-linux-gnu/libc.so.6
7f0d6386c000-7f0d638c1000 r--p 0017c000 fd:01 3417714
  /usr/lib/x86_64-linux-gnu/libc.so.6
7f0d638c1000-7f0d638c5000 r--p 001d0000 fd:01 3417714
  /usr/lib/x86_64-linux-gnu/libc.so.6
7f0d638c5000-7f0d638c7000 rw-p 001d4000 fd:01 3417714
  /usr/lib/x86_64-linux-gnu/libc.so.6
7f0d638c7000-7f0d638d4000 rw-p 00000000 00:00 0
7f0d638ee000-7f0d638f0000 rw-p 00000000 00:00 0
7f0d638f0000-7f0d638f4000 r--p 00000000 00:00 0                          [vvar]
7f0d638f4000-7f0d638f6000 r-xp 00000000 00:00 0                          [vdso]
7f0d638f6000-7f0d638f7000 r--p 00000000 fd:01 3409055
  /usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2
7f0d638f7000-7f0d6391c000 r-xp 00001000 fd:01 3409055
  /usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2
7f0d6391c000-7f0d63926000 r--p 00026000 fd:01 3409055
  /usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2
7f0d63926000-7f0d63928000 r--p 00030000 fd:01 3409055
  /usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2
7f0d63928000-7f0d6392a000 rw-p 00032000 fd:01 3409055
  /usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2
7ffc344ca000-7ffc344cb000 rwxp 00000000 00:00 0
7ffc344cb000-7ffc348cd000 rw-p 00000000 00:00 0                          [stack]
$

That could also be bad: MAP_FIXED_NOREPLACE exists, from what I
understand, partly so that malloc implementations can use it to grow
heap memory chunks (though glibc doesn't use it, I'm not sure who
actually uses it that way). We wouldn't want a malloc implementation
to grow a heap memory chunk until it is directly adjacent to a stack.

> > I don't know of any specific scenario where this is actually exploitable,
> > but it seems like it could be a security problem for sufficiently unlucky
> > userspace.
> >
> > I believe we should ensure that, as long as code is compiled with something
> > like -fstack-check, a stack overflow in it can never cause the main stack
> > to overflow into adjacent heap memory.
> >
> > My fix effectively reverts the behavior for !vma_is_accessible() VMAs to
> > the behavior before commit 1be7107fbe18 ("mm: larger stack guard gap,
> > between vmas"), so I think it should be a fairly safe change even in
> > case A.
> >
> > Fixes: 561b5e0709e4 ("mm/mmap.c: do not blow on PROT_NONE MAP_FIXED holes in the stack")
> > Cc: stable@xxxxxxxxxxxxxxx
> > Signed-off-by: Jann Horn <jannh@xxxxxxxxxx>
> > ---
> > I have tested that Libreoffice still launches after this change, though
> > I don't know if that means anything.
> >
> > Note that I haven't tested the growsup code.
> > ---
> >  mm/mmap.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++-------
> >  1 file changed, 46 insertions(+), 7 deletions(-)
> >
> > diff --git a/mm/mmap.c b/mm/mmap.c
> > index dd4b35a25aeb..971bfd6c1cea 100644
> > --- a/mm/mmap.c
> > +++ b/mm/mmap.c
> > @@ -1064,10 +1064,12 @@ static int expand_upwards(struct vm_area_struct *vma, unsigned long address)
> >               gap_addr = TASK_SIZE;
> >
> >       next = find_vma_intersection(mm, vma->vm_end, gap_addr);
> > -     if (next && vma_is_accessible(next)) {
> > -             if (!(next->vm_flags & VM_GROWSUP))
> > +     if (next && !(next->vm_flags & VM_GROWSUP)) {
> > +             /* see comments in expand_downwards() */
> > +             if (vma_is_accessible(prev))
> > +                     return -ENOMEM;
> > +             if (address == next->vm_start)
> >                       return -ENOMEM;
> > -             /* Check that both stack segments have the same anon_vma? */
>
> I hate that we even maintain this for one single arch I believe at this point?

Looks that way, just parisc?

It would be so nice if we could somehow just get rid of this concept
of growing stacks in general...

> >       }
> >
> >       if (next)
> > @@ -1155,10 +1157,47 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address)
> >       /* Enforce stack_guard_gap */
> >       prev = vma_prev(&vmi);
> >       /* Check that both stack segments have the same anon_vma? */
> > -     if (prev) {
> > -             if (!(prev->vm_flags & VM_GROWSDOWN) &&
> > -                 vma_is_accessible(prev) &&
> > -                 (address - prev->vm_end < stack_guard_gap))
> > +     if (prev && !(prev->vm_flags & VM_GROWSDOWN) &&
> > +         (address - prev->vm_end < stack_guard_gap)) {
> > +             /*
> > +              * If the previous VMA is accessible, this is the normal case
> > +              * where the main stack is growing down towards some unrelated
> > +              * VMA. Enforce the full stack guard gap.
> > +              */
> > +             if (vma_is_accessible(prev))
> > +                     return -ENOMEM;
> > +
> > +             /*
> > +              * If the previous VMA is not accessible, we have a problem:
> > +              * We can't tell what userspace's intent is.
> > +              *
> > +              * Case A:
> > +              * Maybe userspace wants to use the previous VMA as a
> > +              * "guard region" at the bottom of the main stack, in which case
> > +              * userspace wants us to grow the stack until it is adjacent to
> > +              * the guard region. Apparently some Java runtime environments
> > +              * and Rust do that?
> > +              * That is kind of ugly, and in that case userspace really ought
> > +              * to ensure that the stack is fully expanded immediately, but
> > +              * we have to handle this case.
>
> Yeah we can't break userspace on this, no doubt somebody is relying on this
> _somewhere_.

It would have to be a new user who appeared after commit 1be7107fbe18.
And they'd have to install a "guard vma" somewhere below the main
stack, and they'd have to care so much about the size of the stack
that a single page makes a difference.

> That said, I wish we disallowed this altogether regardless of accessibility.
>
> > +              *
> > +              * Case B:
> > +              * But maybe the previous VMA is entirely unrelated to the stack
> > +              * and is only *temporarily* PROT_NONE. For example, glibc
> > +              * malloc arenas create a big PROT_NONE region and then
> > +              * progressively mark parts of it as writable.
> > +              * In that case, we must not let the stack become adjacent to
> > +              * the previous VMA. Otherwise, after the region later becomes
> > +              * writable, a stack overflow will cause the stack to grow into
> > +              * the previous VMA, and we won't have any stack gap to protect
> > +              * against this.
>
> Should be careful with terminology here, an mprotect() will not allow a
> merge so by 'grow into' you mean that a stack VMA could become immediately
> adjacent to a non-stack VMA prior to it which was later made writable.
>
> Perhaps I am being pedantic...

Ah, sorry, I worded that very confusingly. By "a stack overflow will
cause the stack to grow into the previous VMA", I meant that the stack
pointer moves into the previous VMA and the program uses part of the
previous VMA as stack memory, clobbering whatever was stored there
before. That part was not meant to talk about a change of VMA bounds.

> > +              *
> > +              * As an ugly tradeoff, enforce a single-page gap.
> > +              * A single page will hopefully be small enough to not be
> > +              * noticed in case A, while providing the same level of
> > +              * protection in case B that normal userspace threads get.
> > +              */
> > +             if (address == prev->vm_end)
> >                       return -ENOMEM;
>
> Ugh, yuck. Not a fan of this at all. Feels like a dreadful hack.

Oh, I agree, I just didn't see a better way to do it.

> You do raise an important point here, but it strikes me that we should be
> doing this check in the mprotect()/mmap() MAP_FIXED scenarios where it
> shouldn't be too costly to check against the next VMA (which we will be
> obtaining anyway for merge checks)?
>
> That way we don't need a hack like this, and can just disallow the
> operation. That'd probably be as liable to break the program as an -ENOMEM
> on a stack expansion would...

Hmm... yeah, I guess that would work. If someone hits this case, it
would probably be less obvious to the programmer what went wrong based
on the error code, but on the other hand, it would give userspace a
slightly better chance to recover from the issue...

I guess I can see if I can code that up.
#define _GNU_SOURCE
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <err.h>
#include <sys/eventfd.h>

#define STACK_POINTER() ({ void *__stack; asm volatile("mov %%esp, %0":"=r"(__stack)); __stack; })
#define IGNORE(x) asm volatile(""::"r"(x))

#define MAP_LEN 0x100000
#define UNMAP_COUNT 10

static int efd_to_main, efd_to_thread;
static void *malloc_start, *malloc_end;
static unsigned long *thread_obj2;

static void *thread_fn(void *dummy) {
  /* STEP 2 */
  void *thread_obj = malloc(0x1000);
  thread_obj2 = malloc(0x1000);
  *thread_obj2 = 0xdeadbeef;
  malloc_start = (void*)(((unsigned long)thread_obj) & ~0xfffUL);
  malloc_end = malloc_start + MAP_LEN;
  eventfd_write(efd_to_main, 1);
  eventfd_t dummy_val;
  eventfd_read(efd_to_thread, &dummy_val);

  /* STEP 4 */
  while (1) {
    void *p = malloc(0x400);
    if (p < malloc_start || p > malloc_end)
      break;
  }
  eventfd_write(efd_to_main, 1);

  while (1)
    pause();
}

__attribute__((noinline))
void recurse() {
  if (STACK_POINTER() > malloc_end+0xf00)
    recurse();
  asm volatile("");
}

__attribute__((noinline))
void recurse2(unsigned long depth) {
  if (*thread_obj2 != 0xdeadbeef) {
    printf("corrupted thread_obj2 at depth %lu\n", depth);
    exit(1);
  }
  recurse2(depth+1);
  asm volatile("");
}

void *top_maps[UNMAP_COUNT];
void top_maps_insert(void *ptr) {
  // find lowest element in top_maps
  size_t lowest = 0;
  for (size_t i = 0; i < UNMAP_COUNT; i++) {
    if (top_maps[i] < top_maps[lowest])
      lowest = i;
  }
  if (top_maps[lowest] < ptr)
    top_maps[lowest] = ptr;
}

int main(void) {
  efd_to_main = eventfd(0, EFD_SEMAPHORE);
  efd_to_thread = eventfd(0, EFD_SEMAPHORE);

  /* STEP 1 */
  void *stack = STACK_POINTER();
  IGNORE(malloc(1)); /* make sure our arena is initialized or whatever */
  while (1) {
    void *obj = malloc(MAP_LEN - 0xf00);
    if (obj == NULL)
      err(1, "malloc failed");
    if (obj > STACK_POINTER()) {
      free(obj);
      for (size_t i = 0; i < UNMAP_COUNT; i++)
        free(top_maps[i]);
      break;
    }
    top_maps_insert(obj);
  }
  pthread_t thread;
  if (pthread_create(&thread, NULL, thread_fn, NULL))
    errx(1, "pthread_create failed");
  eventfd_t dummy_val;
  eventfd_read(efd_to_main, &dummy_val);

  /* STEP 3 */
  recurse();
  eventfd_write(efd_to_thread, 1);
  eventfd_read(efd_to_main, &dummy_val);

  /* STEP 5 */
  recurse2(0);
}

[Index of Archives]     [Linux Kernel]     [Kernel Development Newbies]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite Hiking]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux