Re: [PATCH 1/1] ksm: fix use after free with merge_across_nodes = 0

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 05/12/2017 10:38 PM, Andrea Arcangeli wrote:
> If merge_across_nodes was manually set to 0 (not the default value) by
> the admin or a tuned profile on NUMA systems triggering cross-NODE
> page migrations, a stable_node use after free could materialize.
> 
> If the chain is collapsed stable_node would point to the old chain
> that was already freed. stable_node_dup would be the stable_node dup
> now converted to a regular stable_node and indexed in the rbtree in
> replacement of the freed stable_node chain (not anymore a dup).
> 
> This special case where the chain is collapsed in the NUMA replacement
> path, is now detected by setting stable_node to NULL by the
> chain_prune callee if it decides to collapse the chain. This tells the
> NUMA replacement code that even if stable_node and stable_node_dup are
> different, this is not a chain if stable_node is NULL, as the
> stable_node_dup was converted to a regular stable_node and the chain
> was collapsed.
> 
> It is generally safer for the callee to force the caller stable_node
> to NULL the moment it become stale so any other mistake like this
> would result in an instant Oops easier to debug than an use after free.
> 
> Otherwise the replace logic would act like if stable_node was a valid
> chain, when in fact it was freed. Notably
> stable_node_chain_add_dup(page_node, stable_node) would run on a
> stable stable_node.
> 
> Andrey Ryabinin found the source of the use after free in
> chain_prune().
> 
> Reported-by: Andrey Ryabinin <aryabinin@xxxxxxxxxxxxx>
> Reported-by: Evgheni Dereveanchin <ederevea@xxxxxxxxxx>
> Signed-off-by: Andrea Arcangeli <aarcange@xxxxxxxxxx>
> ---


Works for me,

Tested-by: Andrey Ryabinin <aryabinin@xxxxxxxxxxxxx>


Bellow is reproducer which causes crash in ksm in several minutes without this fix.


$ cat  ksm_test.c

#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/mman.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdio.h>
#include <numaif.h>
#include <sys/types.h>
#include <sys/wait.h>

#define NR_NODES 4
#define MAP_SIZE 4096

#define NR_THREADS 1024

pid_t pids[NR_THREADS];

int merge_and_migrate(void)
{
        void *p;
        unsigned long rnd;
        unsigned long old_node, new_node;
        pid_t p_pid, pid;
        int j;

        p = mmap(NULL, MAP_SIZE, PROT_READ|PROT_WRITE,
                MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
        if (p == MAP_FAILED)
                perror("mmap"), exit(1);

        memset(p, 0xff, MAP_SIZE);
        if (madvise(p, MAP_SIZE, MADV_MERGEABLE))
                perror("madvise"), exit(1);


        while (1) {
                sleep(0);
                rnd = rand() % 2;
                switch (rnd) {
                case 0: {
                        rnd = rand() % 128;
                        memset(p, rnd, MAP_SIZE);
                        break;
                }
                case 1: {
                        j = rand()%NR_NODES;
                        old_node = 1 << j;
                        new_node = 1<<((j+1)%NR_NODES);

                        migrate_pages(0, NR_NODES, &old_node, &new_node);
                        break;
                }
                }
        }
        return 0;
}

int main(void)
{
        int i,ret,j;
        pid_t pid;
        int wstatus;
        unsigned long old_node, new_node;

        for (i = 0; i < NR_THREADS; i++) {
                pid = fork();
                if (pid < 0) {
                        perror("fork");
                        return 1;
                }
                if (pid) {
                        pids[i] = pid;
                        continue;
                } else
                        merge_and_migrate();
        }

        while (1) {
                pid = waitpid(-1, &wstatus, WNOHANG);
                if (pid < 0) {
                        perror("waitpid failed");
                        return 1;
                }
                if (pid) {
                        for (i = 0; i< NR_THREADS; i++) {
                                if (pids[i] == pid) {
                                        pid = fork();
                                        if (pid < 0) {
                                                perror("fork in while");
                                                return 1;
                                        }
                                        if (pid) {
                                                pids[i] = pid;
                                                break;
                                        } else
                                                merge_and_migrate();
                                }
                        }
                        continue; /*while(1)*/
                }
                i = rand()%NR_THREADS;
                kill(pids[i], SIGKILL);
        }
        return 0;
}

$ cat run_ksm.sh
#!/bin/bash

gcc -lnuma -O2 ksm_test.c -o ksm_test
echo 1 > /sys/kernel/mm/ksm/run
echo 0 > /sys/kernel/mm/ksm/merge_across_nodes
echo 2 > /sys/kernel/mm/ksm/max_page_sharing
echo 0 > /sys/kernel/mm/ksm/stable_node_chains_prune_millisecs
./ksm_test


$ ./run_ksm.sh
[  203.251200] ==================================================================
[  203.251679] BUG: KASAN: use-after-free in stable_tree_search+0x1450/0x16f0
[  203.252229] Read of size 4 at addr ffff880037e9d938 by task ksmd/170
[  203.252800] 
[  203.252957] CPU: 2 PID: 170 Comm: ksmd Not tainted 4.12.0-rc1-next-20170515+ #639
[  203.253627] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.10.2-0-g5f4c7b1-prebuilt.qemu-project.org 04/01/2014
[  203.254670] Call Trace:
[  203.254907]  dump_stack+0x67/0x98
[  203.255222]  print_address_description+0x7c/0x290
[  203.255652]  ? stable_tree_search+0x1450/0x16f0
[  203.256073]  kasan_report+0x26e/0x350
[  203.256418]  __asan_report_load4_noabort+0x19/0x20
[  203.256852]  stable_tree_search+0x1450/0x16f0
[  203.257262]  ? __stable_node_chain+0x8a0/0x8a0
[  203.257668]  ? follow_page_mask+0x5f9/0xd80
[  203.258060]  ksm_scan_thread+0xb47/0x2790
[  203.258438]  ? stable_tree_search+0x16f0/0x16f0
[  203.258858]  ? __schedule+0x904/0x1ad0
[  203.259214]  ? clkdev_alloc+0xd0/0xd0
[  203.259553]  ? wake_atomic_t_function+0x2a0/0x2a0
[  203.259985]  ? trace_hardirqs_on+0xd/0x10
[  203.260361]  kthread+0x2d6/0x3d0
[  203.260658]  ? stable_tree_search+0x16f0/0x16f0
[  203.261073]  ? kthread_create_on_node+0xb0/0xb0
[  203.261485]  ret_from_fork+0x2e/0x40
[  203.261819] 
[  203.261936] Allocated by task 170:
[  203.262251]  save_stack_trace+0x1b/0x20
[  203.262601]  kasan_kmalloc+0xee/0x180
[  203.262938]  kasan_slab_alloc+0x12/0x20
[  203.263290]  kmem_cache_alloc+0x129/0x2d0
[  203.263654]  alloc_stable_node_chain+0x29/0x310
[  203.264072]  ksm_scan_thread+0x2048/0x2790
[  203.264444]  kthread+0x2d6/0x3d0
[  203.264744]  ret_from_fork+0x2e/0x40
[  203.265075] 
[  203.265220] Freed by task 170:
[  203.265503]  save_stack_trace+0x1b/0x20
[  203.265852]  kasan_slab_free+0xad/0x180
[  203.266208]  kmem_cache_free+0xc7/0x300
[  203.266558]  __stable_node_chain+0x68a/0x8a0
[  203.266948]  stable_tree_search+0x18e/0x16f0
[  203.267339]  ksm_scan_thread+0xb47/0x2790
[  203.267655]  kthread+0x2d6/0x3d0
[  203.267910]  ret_from_fork+0x2e/0x40





--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>



[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]
  Powered by Linux