Hi Dave, I got an s390x dump of a Linux 2.6.36 system, where a task (kmcheck, pid=44) is missing in the ps output. I debugged the problem and I think that I found the reason: It looks like that crash does not walk the linked list of the pid hash table to the end, if it finds a NULL pointer in the pid.tasks[PIDTYPE_PID=0] array. Unfortunately, for the struct pid that is before our lost task in the linked list this condition is true. Therefore crash does not find our task. The attached patch seems to fix this problem. Here my crash debug log with the 2.6.36 dump: --------------------------------------------- Task "kmcheck" is in hash slot 2941 in the linked list at position 2: crash> print pid_hash[2941] $4 = { first = 0x3f5fb7f8 } crash> upid struct upid { int nr; struct pid_namespace *ns; struct hlist_node pid_chain; } SIZE: 32 crash> upid.pid_chain struct upid { [16] struct hlist_node pid_chain; } crash> eval 0x3f5fb7f8 - 16 hexadecimal: 3f5fb7e8 crash> upid 3f5fb7e8 <<<<---- the first upid in the list struct upid { nr = 565, ns = 0x81d8f8, pid_chain = { next = 0x3edea2b0, pprev = 0x96554e8 } } crash> pid struct pid { atomic_t count; unsigned int level; struct hlist_head tasks[3]; struct rcu_head rcu; struct upid numbers[1]; } SIZE: 80 crash> pid.numbers struct pid { [48] struct upid numbers[1]; } crash> eval 3f5fb7e8 - 48 hexadecimal: 3f5fb7b8 crash> pid 3f5fb7b8 struct pid { count = { counter = 1 }, level = 0, tasks = {{ first = 0x0 <<<----------- tasks[0] is NULL }, { first = 0x3d488620 }, { first = 0x0 }}, rcu = { next = 0x5a5a5a5a5a5a5a5a, func = 0x5a5a5a5a5a5a5a5a }, numbers = {{ nr = 565, ns = 0x81d8f8, pid_chain = { next = 0x3edea2b0, <<<--------- Pointer to second element in list pprev = 0x96554e8 } }} } crash> eval 0x3edea2b0 - 16 hexadecimal: 3edea2a0 <<<-- The second upid in the list crash> upid 0x3edea2a0 struct upid { nr = 44, <<<--- Our missing pid=44 (kmcheck) ns = 0x81d8f8, pid_chain = { next = 0x0, pprev = 0x3f5fb7f8 } } crash> eval 0x3edea2a0 - 48 hexadecimal: 3edea270 crash> pid 3edea270 struct pid { count = { counter = 5 }, level = 0, tasks = {{ first = 0x3e799908 <<<--- Pointer to our task_struct.pids }, { first = 0x0 }, { first = 0x0 }}, rcu = { next = 0x5a5a5a5a5a5a5a5a, func = 0x5a5a5a5a5a5a5a5a }, numbers = {{ nr = 44, ns = 0x81d8f8, pid_chain = { next = 0x0, pprev = 0x3f5fb7f8 } }} } crash> task_struct.pids struct task_struct { [712] struct pid_link pids[3]; } crash> eval 0x3e799908 - 712 hexadecimal: 3e799640 crash> task_struct 3e799640 | grep comm comm = "kmcheck\000\000\000\000\000\000\000\000", <<<--- here it is --- task.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) --- a/task.c +++ b/task.c @@ -2006,7 +2006,7 @@ do_chained: } if (pid_tasks_0 == 0) - continue; + goto chain_next; next = pid_tasks_0 - OFFSET(task_struct_pids); @@ -2042,7 +2042,7 @@ do_chained: } cnt++; - +chain_next: if (pnext) { kpp = pnext; upid = pnext - OFFSET(upid_pid_chain);
Hi Dave, I got an s390x dump of a Linux 2.6.36 system, where a task (kmcheck, pid=44) is missing in the ps output. I debugged the problem and I think that I found the reason: It looks like that crash does not walk the linked list of the pid hash table to the end, if it finds a NULL pointer in the pid.tasks[PIDTYPE_PID=0] array. Unfortunately, for the struct pid that is before our lost task in the linked list this condition is true. Therefore crash does not find our task. The attached patch seems to fix this problem. Here my crash debug log with the 2.6.36 dump: --------------------------------------------- Task "kmcheck" is in hash slot 2941 in the linked list at position 2: crash> print pid_hash[2941] $4 = { first = 0x3f5fb7f8 } crash> upid struct upid { int nr; struct pid_namespace *ns; struct hlist_node pid_chain; } SIZE: 32 crash> upid.pid_chain struct upid { [16] struct hlist_node pid_chain; } crash> eval 0x3f5fb7f8 - 16 hexadecimal: 3f5fb7e8 crash> upid 3f5fb7e8 <<<<---- the first upid in the list struct upid { nr = 565, ns = 0x81d8f8, pid_chain = { next = 0x3edea2b0, pprev = 0x96554e8 } } crash> pid struct pid { atomic_t count; unsigned int level; struct hlist_head tasks[3]; struct rcu_head rcu; struct upid numbers[1]; } SIZE: 80 crash> pid.numbers struct pid { [48] struct upid numbers[1]; } crash> eval 3f5fb7e8 - 48 hexadecimal: 3f5fb7b8 crash> pid 3f5fb7b8 struct pid { count = { counter = 1 }, level = 0, tasks = {{ first = 0x0 <<<----------- tasks[0] is NULL }, { first = 0x3d488620 }, { first = 0x0 }}, rcu = { next = 0x5a5a5a5a5a5a5a5a, func = 0x5a5a5a5a5a5a5a5a }, numbers = {{ nr = 565, ns = 0x81d8f8, pid_chain = { next = 0x3edea2b0, <<<--------- Pointer to second element in list pprev = 0x96554e8 } }} } crash> eval 0x3edea2b0 - 16 hexadecimal: 3edea2a0 <<<-- The second upid in the list crash> upid 0x3edea2a0 struct upid { nr = 44, <<<--- Our missing pid=44 ns = 0x81d8f8, pid_chain = { next = 0x0, pprev = 0x3f5fb7f8 } } crash> eval 0x3edea2a0 - 48 hexadecimal: 3edea270 crash> pid 3edea270 struct pid { count = { counter = 5 }, level = 0, tasks = {{ first = 0x3e799908 <<<--- Pointer to our task_struct.pids }, { first = 0x0 }, { first = 0x0 }}, rcu = { next = 0x5a5a5a5a5a5a5a5a, func = 0x5a5a5a5a5a5a5a5a }, numbers = {{ nr = 44, ns = 0x81d8f8, pid_chain = { next = 0x0, pprev = 0x3f5fb7f8 } }} } crash> task_struct.pids struct task_struct { [712] struct pid_link pids[3]; } crash> eval 0x3e799908 - 712 hexadecimal: 3e799640 crash> task_struct 3e799640 | grep comm comm = "kmcheck\000\000\000\000\000\000\000\000", <<<--- here it is --- task.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) --- a/task.c +++ b/task.c @@ -2006,7 +2006,7 @@ do_chained: } if (pid_tasks_0 == 0) - continue; + goto chain_next; next = pid_tasks_0 - OFFSET(task_struct_pids); @@ -2042,7 +2042,7 @@ do_chained: } cnt++; - +chain_next: if (pnext) { kpp = pnext; upid = pnext - OFFSET(upid_pid_chain);
-- Crash-utility mailing list Crash-utility@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/crash-utility