Hi Ingo, I encountered BUG at mm/slab.c. It tells me there is an inconsistency case. One of the panics, it shows obviously exclusive control in slab is not right. The console log; Unable to handle kernel paging request at ffff8108cf81d3c8 RIP: [<ffffffff8029e92f>] kmem_cache_alloc+0x5e/0xdb PGD 8063 PUD 0 Oops: 0000 [1] PREEMPT SMP CPU 1 Modules linked in: Pid: 32268, comm: dcachebench Not tainted 2.6.24.3-rt3 #3 RIP: 0010:[<ffffffff8029e92f>] [<ffffffff8029e92f>] kmem_cache_alloc+0x5e/0xdb RSP: 0018:ffff810048895e58 EFLAGS: 00010097 RAX: 00000000ffffffff RBX: 0000000000000246 RCX: 0000000000000000 RDX: ffff8100cf81d3b8 RSI: 0000000000000000 RDI: ffff8100cf80ee40 RBP: ffff8100cf80ee40 R08: 0000000000000c62 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000000000d0 R13: ffffffff802aa83d R14: 0000000000000006 R15: 0000000000000000 FS: 00002adc7b6686f0(0000) GS:ffff8100cf81f340(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: ffff8108cf81d3c8 CR3: 00000000b36e0000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process dcachebench (pid: 32268, threadinfo ffff810048894000, task ffff8100cd248080) Stack: 0000000000000000 ffff810097d1c000 0000000100000282 0000000000000006 00000000ffffff9c 0000000000000000 00000000005038b0 ffffffff802aa83d ffff8100cf811080 0000000000000006 00000000ffffff9c 0000000000000000 Call Trace: [<ffffffff802aa83d>] getname+0x1e/0x1f0 [<ffffffff802ac38c>] do_rmdir+0x17/0xfd [<ffffffff8026fb5f>] audit_syscall_entry+0x163/0x189 [<ffffffff8020c2f3>] tracesys+0x71/0xe1 [<ffffffff8020c35e>] tracesys+0xdc/0xe1 --------------------------- | preempt count: 00000001 ] | 1-level deep critical section nesting: ---------------------------------------- .. [<ffffffff80640737>] .... __spin_trylock+0xe/0x49 .....[<00000000>] .. ( <= 0x0) Code: 48 8b 54 c2 18 53 9d 4c 89 e9 44 89 e6 48 89 ef e8 f1 e1 ff RIP [<ffffffff8029e92f>] kmem_cache_alloc+0x5e/0xdb RSP <ffff810048895e58> The kernel dump; (gdb) info thr 4 process 32262 flush_tlb_others (cpumask={bits = {4}}, mm=0xffff8100cd16d450, va=46966178668544) at arch/x86/kernel/smp_64.c:195 3 process 32325 0xffffffff8020cad0 in invalidate_interrupt3 () 2 process 32268 0xffffffff8029e92f in kmem_cache_alloc (cachep=0xffff8100cf80ee40, flags=208) at mm/slab.c:3320 * 1 process 32326 0xffffffff806408c4 in __spin_lock (lock=0xffff8100cdc9a4d0) at kernel/spinlock.c:333 (gdb) thr 2 [Switching to thread 2 (process 32268)]#0 0xffffffff8029e92f in kmem_cache_alloc ( cachep=0xffff8100cf80ee40, flags=208) at mm/slab.c:3320 3320 mm/slab.c: No such file or directory. in mm/slab.c 0xffffffff8029e903 <kmem_cache_alloc+50>: callq 0xffffffff8029c292 <check_irq_off> 0xffffffff8029e908 <kmem_cache_alloc+55>: movslq 0x14(%rsp),%rax 0xffffffff8029e90d <kmem_cache_alloc+60>: mov 0x0(%rbp,%rax,8),%rdx 0xffffffff8029e912 <kmem_cache_alloc+65>: mov (%rdx),%eax 0xffffffff8029e914 <kmem_cache_alloc+67>: test %eax,%eax 0xffffffff8029e916 <kmem_cache_alloc+69>: je 0xffffffff8029e990 <kmem_cache_alloc+191> 1st. %eax, ac->avail is not 0. 0xffffffff8029e918 <kmem_cache_alloc+71>: lock incl 0xf8(%rbp) 0xffffffff8029e91f <kmem_cache_alloc+78>: movl $0x1,0xc(%rdx) 0xffffffff8029e926 <kmem_cache_alloc+85>: mov (%rdx),%eax 0xffffffff8029e928 <kmem_cache_alloc+87>: sub $0x1,%eax 0xffffffff8029e92b <kmem_cache_alloc+90>: mov %eax,(%rdx) 0xffffffff8029e92d <kmem_cache_alloc+92>: mov %eax,%eax 0xffffffff8029e92f <kmem_cache_alloc+94>: mov 0x18(%rdx,%rax,8),%rdx 2nd. %eax, ac->avail = 0xffffffff, it causes this panic. 0xffffffff8029e934 <kmem_cache_alloc+99>: push %rbx 0xffffffff8029e935 <kmem_cache_alloc+100>: popfq static inline void * ____cache_alloc(struct kmem_cache *cachep, gfp_t flags, int *this_cpu) { void *objp; struct array_cache *ac; check_irq_off(); ac = cpu_cache_get(cachep, *this_cpu); if (likely(ac->avail)) { ---------------------------> 1st. check STATS_INC_ALLOCHIT(cachep); ac->touched = 1; objp = ac->entry[--ac->avail]; -------------> PANIC HERE } else { STATS_INC_ALLOCMISS(cachep); objp = cache_alloc_refill(cachep, flags, this_cpu); } return objp; } It means that ac->avail is changed by the other cpu. So I looked for who touch another cpu's data and found the root cause. I think, calling cache_grow() in cache_alloc_refill() can change cpu, but cache_grow() doesn't update this_cpu variable in spite of changing cpu. It makes the kernel access other cpu's data in cache_alloc_refill(). I'm testing this patch now. --- From: Hiroshi Shimamoto <h-shimamoto@xxxxxxxxxxxxx> On !PREEMPT_RT, an inconsistency case may be occurred. After releasing cpu by slab_irq_enable_nort() the cpu can be changed. In slab_irq_disable_nort() new cpu id should be gotten. If not, it makes data of other cpu corrupted. Signed-off-by: Hiroshi Shimamoto <h-shimamoto@xxxxxxxxxxxxx> --- mm/slab.c | 18 +++++++++--------- 1 files changed, 9 insertions(+), 9 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index 4bf6ca8..7beb378 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -138,8 +138,8 @@ * * (On PREEMPT_RT, these are NOPs, but we have to drop/get the irq locks.) */ -# define slab_irq_disable_nort() local_irq_disable() -# define slab_irq_enable_nort() local_irq_enable() +# define slab_irq_disable_nort(cpu) slab_irq_disable(cpu) +# define slab_irq_enable_nort(cpu) slab_irq_enable(cpu) # define slab_irq_disable_rt(flags) do { (void)(flags); } while (0) # define slab_irq_enable_rt(flags) do { (void)(flags); } while (0) # define slab_spin_lock_irq(lock, cpu) \ @@ -160,8 +160,8 @@ DEFINE_PER_CPU_LOCKED(int, slab_irq_locks) = { 0, }; do { slab_irq_enable(cpu); (void) (flags); } while (0) # define slab_irq_disable_rt(cpu) slab_irq_disable(cpu) # define slab_irq_enable_rt(cpu) slab_irq_enable(cpu) -# define slab_irq_disable_nort() do { } while (0) -# define slab_irq_enable_nort() do { } while (0) +# define slab_irq_disable_nort(cpu) do { } while (0) +# define slab_irq_enable_nort(cpu) do { } while (0) # define slab_spin_lock_irq(lock, cpu) \ do { slab_irq_disable(cpu); spin_lock(lock); } while (0) # define slab_spin_unlock_irq(lock, cpu) \ @@ -2899,7 +2899,7 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid, offset *= cachep->colour_off; if (local_flags & __GFP_WAIT) - slab_irq_enable_nort(); + slab_irq_enable_nort(*this_cpu); slab_irq_enable_rt(*this_cpu); /* @@ -2932,7 +2932,7 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid, slab_irq_disable_rt(*this_cpu); if (local_flags & __GFP_WAIT) - slab_irq_disable_nort(); + slab_irq_disable_nort(*this_cpu); check_irq_off(); spin_lock(&l3->list_lock); @@ -2948,7 +2948,7 @@ opps1: failed: slab_irq_disable_rt(*this_cpu); if (local_flags & __GFP_WAIT) - slab_irq_disable_nort(); + slab_irq_disable_nort(*this_cpu); return 0; } @@ -3397,7 +3397,7 @@ retry: * set and go into memory reserves if necessary. */ if (local_flags & __GFP_WAIT) - slab_irq_enable_nort(); + slab_irq_enable_nort(*this_cpu); slab_irq_enable_rt(*this_cpu); kmem_flagcheck(cache, flags); @@ -3405,7 +3405,7 @@ retry: slab_irq_disable_rt(*this_cpu); if (local_flags & __GFP_WAIT) - slab_irq_disable_nort(); + slab_irq_disable_nort(*this_cpu); if (obj) { /* -- 1.5.4.1 -- To unsubscribe from this list: send the line "unsubscribe linux-rt-users" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html