Re: WARNING in handle_mm_fault

Dmitry Vyukov <dvyukov@xxxxxxxxxx> · Wed, 25 Nov 2015 18:21:02 +0100

On Wed, Nov 25, 2015 at 4:27 PM, Tetsuo Handa
<penguin-kernel@xxxxxxxxxxxxxxxxxxx> wrote:
> Dmitry Vyukov wrote:
>> If the race described in
>> http://www.spinics.net/lists/cgroups/msg14078.html does actually
>> happen, then there is nothing to check.
>> https://gcc.gnu.org/ml/gcc/2012-02/msg00005.html talks about different
>> memory locations, if there is store-widening involving different
>> memory locations, then this is a compiler bug. But the race happens on
>> a single memory location, in such case the code is buggy.
>>
>
> All ->in_execve ->in_iowait ->sched_reset_on_fork ->sched_contributes_to_load
> ->sched_migrated ->memcg_may_oom ->memcg_kmem_skip_account ->brk_randomized
> shares the same byte.
>
> sched_fork(p) modifies p->sched_reset_on_fork but p is not yet visible.
> __sched_setscheduler(p) modifies p->sched_reset_on_fork.
> try_to_wake_up(p) modifies p->sched_contributes_to_load.
> perf_event_task_migrate(p) modifies p->sched_migrated.
>
> Trying to reproduce this problem with
>
>  static __always_inline bool
>  perf_sw_migrate_enabled(void)
>  {
> -       if (static_key_false(&perf_swevent_enabled[PERF_COUNT_SW_CPU_MIGRATIONS]))
> -               return true;
>         return false;
>  }
>
> would help testing ->sched_migrated case.








I have some progress.

With the following patch:

dvyukov@dvyukov-z840:~/src/linux-dvyukov$ git diff
include/linux/sched.h mm/memory.c

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2fae7d8..4c126a1 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1455,6 +1455,8 @@ struct task_struct {
        /* Used for emulating ABI behavior of previous Linux versions */
        unsigned int personality;

+       union {
+       struct {
        unsigned in_execve:1;   /* Tell the LSMs that the process is doing an
                                 * execve */
        unsigned in_iowait:1;
@@ -1463,18 +1465,24 @@ struct task_struct {
        unsigned sched_reset_on_fork:1;
        unsigned sched_contributes_to_load:1;
        unsigned sched_migrated:1;
+       unsigned dummy_a:1;
 #ifdef CONFIG_MEMCG
        unsigned memcg_may_oom:1;
 #endif
+       unsigned dummy_b:1;
 #ifdef CONFIG_MEMCG_KMEM
        unsigned memcg_kmem_skip_account:1;
 #endif
 #ifdef CONFIG_COMPAT_BRK
        unsigned brk_randomized:1;
 #endif
+       };
+       unsigned nonatomic_flags;
+       };

        unsigned long atomic_flags; /* Flags needing atomic access. */

+
        struct restart_block restart_block;

        pid_t pid;
diff --git a/mm/memory.c b/mm/memory.c
index deb679c..6351dac 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -62,6 +62,7 @@
 #include <linux/dma-debug.h>
 #include <linux/debugfs.h>
 #include <linux/userfaultfd_k.h>
+#include <linux/kasan.h>

 #include <asm/io.h>
 #include <asm/pgalloc.h>
@@ -3436,12 +3437,45 @@ int handle_mm_fault(struct mm_struct *mm,
struct vm_area_struct *vma,
         * Enable the memcg OOM handling for faults triggered in user
         * space.  Kernel faults are handled more gracefully.
         */
-       if (flags & FAULT_FLAG_USER)
+       if (flags & FAULT_FLAG_USER) {
+               volatile int x;
+               unsigned f0, f1;
+               f0 = READ_ONCE(current->nonatomic_flags);
+               for (x = 0; x < 1000; x++) {
+                       WRITE_ONCE(current->nonatomic_flags, 0xaeaeaeae);
+                       cpu_relax();
+                       WRITE_ONCE(current->nonatomic_flags, 0xaeaeaeab);
+                       cpu_relax();
+                       f1 = READ_ONCE(current->nonatomic_flags);
+                       if (f1 != 0xaeaeaeab) {
+                               pr_err("enable: flags 0x%x -> 0x%x\n", f0, f1);
+                               break;
+                       }
+               }
+               WRITE_ONCE(current->nonatomic_flags, f0);
+
                mem_cgroup_oom_enable();
+       }

        ret = __handle_mm_fault(mm, vma, address, flags);

        if (flags & FAULT_FLAG_USER) {
+               volatile int x;
+               unsigned f0, f1;
+               f0 = READ_ONCE(current->nonatomic_flags);
+               for (x = 0; x < 1000; x++) {
+                       WRITE_ONCE(current->nonatomic_flags, 0xaeaeaeae);
+                       cpu_relax();
+                       WRITE_ONCE(current->nonatomic_flags, 0xaeaeaeab);
+                       cpu_relax();
+                       f1 = READ_ONCE(current->nonatomic_flags);
+                       if (f1 != 0xaeaeaeab) {
+                               pr_err("enable: flags 0x%x -> 0x%x\n", f0, f1);
+                               break;
+                       }
+               }
+               WRITE_ONCE(current->nonatomic_flags, f0);
+
                mem_cgroup_oom_disable();
                 /*
                  * The task may have entered a memcg OOM situation but


I see:

[  153.484152] enable: flags 0x8 -> 0xaeaeaeaf
[  168.707786] enable: flags 0x8 -> 0xaeaeaeae
[  169.654966] enable: flags 0x40 -> 0xaeaeaeae
[  176.809080] enable: flags 0x48 -> 0xaeaeaeaa
[  177.496219] enable: flags 0x8 -> 0xaeaeaeaf
[  193.266703] enable: flags 0x0 -> 0xaeaeaeae
[  199.536435] enable: flags 0x8 -> 0xaeaeaeae
[  210.650809] enable: flags 0x48 -> 0xaeaeaeaf
[  210.869397] enable: flags 0x8 -> 0xaeaeaeaf
[  216.150804] enable: flags 0x8 -> 0xaeaeaeaa
[  231.607211] enable: flags 0x8 -> 0xaeaeaeaf
[  260.677408] enable: flags 0x48 -> 0xaeaeaeae
[  272.065364] enable: flags 0x40 -> 0xaeaeaeaf
[  281.594973] enable: flags 0x48 -> 0xaeaeaeaf
[  282.899860] enable: flags 0x8 -> 0xaeaeaeaf
[  286.472173] enable: flags 0x8 -> 0xaeaeaeae
[  286.763203] enable: flags 0x8 -> 0xaeaeaeaf
[  288.229107] enable: flags 0x0 -> 0xaeaeaeaf
[  291.336522] enable: flags 0x8 -> 0xaeaeaeae
[  310.082981] enable: flags 0x48 -> 0xaeaeaeaf
[  313.798935] enable: flags 0x8 -> 0xaeaeaeaf
[  343.340508] enable: flags 0x8 -> 0xaeaeaeaf
[  344.170635] enable: flags 0x48 -> 0xaeaeaeaf
[  357.568555] enable: flags 0x8 -> 0xaeaeaeaf
[  359.158179] enable: flags 0x48 -> 0xaeaeaeaf
[  361.188300] enable: flags 0x40 -> 0xaeaeaeaa
[  365.636639] enable: flags 0x8 -> 0xaeaeaeaf

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>