Hi,
I met crashes on ppc64le machine.
Call trace: lookup_fast( ) -> __d_lookup_rcu( ) -> dentry_cmp( ) ->
dentry_string_cmp ( )
From the symbolized trace and disassembly code, when doing
dentry_string_cmp(),
dentry.d_name->name is NULL , this dereference triggered crash.
The dentry's data when crash happens: http://paste.ubuntu.com/19340635/.
And the analysis of the crash vmcore here if you're interested:
http://paste.ubuntu.com/19359665/
Also pasted above traces on attached txt file.
Can we add check before at the begging of dentry_string_cmp() as below?
Or maybe we should not silently ignore the NULL pointer.
static inline int dentry_string_cmp(const unsigned char *cs, const
unsigned char *ct, unsigned tcount)
{
do {
+ if (unlikely(!cs || !ct ))
+ return 1;
if (*cs != *ct)
return 1;
cs++;
Below is the stack trace:
---------------------------------------------------------------------------------------------------------
Stack trace output:
[387421.142576] Unable to handle kernel paging request for data at
address 0x00000000
[387421.142709] Faulting instruction address: 0xc000000000327f00
[387421.142769] Oops: Kernel access of bad area, sig: 11 [#1]
[387421.142816] SMP NR_CPUS=2048 NUMA PowerNV
[387421.142876] Modules linked in: iptable_mangle iptable_nat
nf_nat_ipv4 nf_nat
iptable_raw iptable_filter ip_tables binfmt_misc nf_conntrack_ipv4
nf_defrag_ipv4
...
[387421.143529] CPU: 69 PID: 39485 Comm: rsync Tainted: G W
------------ 3.10.0-327.18.2.el7.ppc64le #1
[387421.143622] task: c0000022787bd220 ti: c000001f06fc0000 task.ti:
c000001f06fc0000
[387421.143692] NIP: c000000000327f00 LR: c0000000003122f8 CTR:
0000000000000008
[387421.143761] REGS: c000001f06fc3820 TRAP: 0300 Tainted: G
W ------------ (3.10.0-327.18.2.el7.ppc64le)
[387421.143853] MSR: 9000000000009033 <SF,HV,EE,ME,IR,DR,RI,LE> CR:
22000882 XER: 00000000
[387421.144026] CFAR: c000000000009368 DAR: 0000000000000000 DSISR:
40000000 SOFTE: 1
GPR00: c0000000003122f8 c000001f06fc3aa0 c0000000011231b0 c000002611320300
GPR04: c000001f06fc3c60 0000000000000002 0000000000000007 0000000000000000
GPR08: 0000000000000008 ffffffffffffffff c0000029aa14b048 c0000029aa14b049
GPR12: 0000000000000000 c000000007b46d00 0000000000000003 0000000000000018
GPR16: 0000000000000000 00000000001cc131 00000100399fc3b0 0000000000000002
GPR20: 000000004ab52a5c 00003fffe2a2b328 0000000000000001 c000000001179650
GPR24: 0000000000000007 c0000029aa14b049 c000001f06fc3b20 c000001f06fc3c60
GPR28: 00000008d4908d9a c0000026113203c0 c000002611320300 c0000026113203c8
[387421.144948] NIP [c000000000327f00] __d_lookup_rcu+0x150/0x1d0
[387421.145006] LR [c0000000003122f8] lookup_fast+0x68/0x390
[387421.145053] Call Trace:
[387421.145077] [c000001f06fc3aa0] [c00000000031291c]
link_path_walk+0x2fc/0xba0 (unreliable)
[387421.145159] [c000001f06fc3b00] [c0000000003122f8]
lookup_fast+0x68/0x390
[387421.145228] [c000001f06fc3b70] [c00000000031352c]
path_lookupat+0x1bc/0xb60
[387421.145298] [c000001f06fc3c30] [c000000000319440]
user_path_at_empty+0xc0/0x430
[387421.145380] [c000001f06fc3d30] [c0000000003056f4]
vfs_fstatat+0x84/0x280
[387421.145449] [c000001f06fc3d90] [c0000000003059c4]
SyS_newlstat+0x34/0x60
[387421.145520] [c000001f06fc3e30] [c00000000000a17c] system_call+0x38/0xb4
[387421.145589] Instruction dump:
[387421.145651] 39180001 7d0903a6 3959ffff e93f0020 3929ffff 4800001c
60000000 60000000
[387421.145872] 60000000 60000000 60000000 60420000 <8ce90001> 8d0a0001
7f874000 409eff4c
[387421.146096] ---[ end trace 7c1c505a25279a32 ]---
[387421.157384]
[387421.157422] Sending IPI to other CPUs
[387421.158535] IPI complete
Thanks
Fiona
KERNEL: vmlinux
DUMPFILE: /home/fedora/vmcore [PARTIAL DUMP]
CPUS: 192
LOAD AVERAGE: 0.05, 0.09, 0.12
TASKS: 2445
RELEASE: 3.10.0-327.18.2.el7.ppc64le
VERSION: #1 SMP Fri Apr 8 05:10:45 EDT 2016
MACHINE: ppc64le (3525 Mhz)
MEMORY: 256 GB
PANIC: "Unable to handle kernel paging request for data at address 0x00000000"
PID: 39485
COMMAND: "rsync"
TASK: c0000022787bd220 [THREAD_INFO: c000001f06fc0000]
CPU: 69
STATE: TASK_RUNNING (PANIC)
--------------------------------------------------------------------------------------------------
crash> gdb l*(__d_lookup_rcu+0x150)
0xc000000000327f00 is in __d_lookup_rcu (fs/dcache.c:182).
177 #else
178
179 static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
180 {
181 do {
182 if (*cs != *ct)
183 return 1;
184 cs++;
185 ct++;
186 tcount--;
--------------------------------------------------------------------------------------------------
crash>dis -l __d_lookup_rcu
...
/usr/src/debug/kernel-3.10.0-327.18.2.el7/linux-3.10.0-327.18.2.el7.ppc64le/fs/dcache.c: 212
0xc000000000327edc <__d_lookup_rcu+300>: ld r9,32(r31)
0xc000000000327ee0 <__d_lookup_rcu+304>: addi r9,r9,-1
0xc000000000327ee4 <__d_lookup_rcu+308>: b 0xc000000000327f00 <__d_lookup_rcu+336>
0xc000000000327ee8 <__d_lookup_rcu+312>: nop
0xc000000000327eec <__d_lookup_rcu+316>: nop
0xc000000000327ef0 <__d_lookup_rcu+320>: nop
0xc000000000327ef4 <__d_lookup_rcu+324>: nop
0xc000000000327ef8 <__d_lookup_rcu+328>: nop
0xc000000000327efc <__d_lookup_rcu+332>: ori r2,r2,0
/usr/src/debug/kernel-3.10.0-327.18.2.el7/linux-3.10.0-327.18.2.el7.ppc64le/fs/dcache.c: 182
0xc000000000327f00 <__d_lookup_rcu+336>: lbzu r7,1(r9) --->Crash Here(0xc000000000327f00)
0xc000000000327f04 <__d_lookup_rcu+340>: lbzu r8,1(r10)
0xc000000000327f08 <__d_lookup_rcu+344>: cmpw cr7,r7,r8
0xc000000000327f0c <__d_lookup_rcu+348>: bne cr7,0xc000000000327e58 <__d_lookup_rcu+168>
In "/usr/src/debug/kernel-3.10.0-327.18.2.el7/linux-3.10.0-327.18.2.el7.ppc64le/fs/dcache.c: 182", convert to assembly code:
0xc000000000327f00 <__d_lookup_rcu+336>: lbzu r7,1(r9) --->Crash here.(read GPR09 (0xffffffffffffffff +1) = 0x0 , It is a bad address.)
--------------------------------------------------------------------------------------------------
r9: c0000026113203c8 is the address of dentry.d_hash, then struct dentry's address is 0xc0000026113203c0
crash> struct dentry 0xc0000026113203c0
struct dentry {
d_flags = 17301632,
d_seq = {
sequence = 2
},
d_hash = {
next = 0xc0000021b8995dc8,
pprev = 0xc000003618d25288
},
d_parent = 0xc000002611320300,
d_name = {
{
{
hash = 3566243226,
len = 8
},
hash_len = 37925981594
},
name = 0x0 ---> name is NULL
},
d_inode = 0xc0000018a8e5fdb8,
d_iname = "features\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000", ---> iname is not NULL
d_lockref = {
{
lock_count = 107374182400,
{
lock = {
{
rlock = {
raw_lock = {
slock = 0
}
}
}
},
count = 25
}
}
},
d_op = 0x0,
d_sb = 0xc000000fa0a2f800,
d_time = 0,
d_fsdata = 0x0,
d_lru = {
next = 0xc000002611320380,
prev = 0xc000002611320500
},
d_u = {
d_child = {
next = 0xc0000026113203a0,
prev = 0xc0000026113217d0
},
d_rcu = {
next = 0xc0000026113203a0,
func = 0xc0000026113217d0
}
},
d_subdirs = {
next = 0xc000002611321710,
prev = 0xc000002611320510
},
d_alias = {
next = 0x0,
pprev = 0xc0000018a8e5fed0
}
}