[PATCH] Show missing tasks in ps

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Dave,

I got an s390x dump of a Linux 2.6.36 system, where a task (kmcheck, pid=44) is
missing in the ps output. I debugged the problem and I think that I found the
reason:

It looks like that crash does not walk the linked list of the pid hash table
to the end, if it finds a NULL pointer in the pid.tasks[PIDTYPE_PID=0]
array. Unfortunately, for the struct pid that is before our lost task in the
linked list this condition is true. Therefore crash does not find our task.

The attached patch seems to fix this problem.

Here my crash debug log with the 2.6.36 dump:
---------------------------------------------
Task "kmcheck" is in hash slot 2941 in the linked list at position 2:

crash> print pid_hash[2941]
$4 = {
  first = 0x3f5fb7f8
}

crash> upid
struct upid {
    int nr;
    struct pid_namespace *ns;
    struct hlist_node pid_chain;
}
SIZE: 32

crash> upid.pid_chain
struct upid {
  [16] struct hlist_node pid_chain;
}

crash> eval 0x3f5fb7f8 - 16
hexadecimal: 3f5fb7e8  

crash> upid 3f5fb7e8   <<<<---- the first upid in the list
struct upid {
  nr = 565, 
  ns = 0x81d8f8, 
  pid_chain = {
    next = 0x3edea2b0, 
    pprev = 0x96554e8
  }
}

crash> pid
struct pid {
    atomic_t count;
    unsigned int level;
    struct hlist_head tasks[3];
    struct rcu_head rcu;
    struct upid numbers[1];
}
SIZE: 80

crash> pid.numbers
struct pid {
  [48] struct upid numbers[1];
}

crash> eval 3f5fb7e8 - 48
hexadecimal: 3f5fb7b8  

crash> pid 3f5fb7b8
struct pid {
  count = {
    counter = 1
  }, 
  level = 0, 
  tasks = {{
      first = 0x0 <<<----------- tasks[0] is NULL
    }, {
      first = 0x3d488620
    }, {
      first = 0x0
    }}, 
  rcu = {
    next = 0x5a5a5a5a5a5a5a5a, 
    func = 0x5a5a5a5a5a5a5a5a
  }, 
  numbers = {{
      nr = 565, 
      ns = 0x81d8f8, 
      pid_chain = {
        next = 0x3edea2b0,  <<<--------- Pointer to second element in list
        pprev = 0x96554e8
      }
    }}
}

crash> eval 0x3edea2b0 - 16
hexadecimal: 3edea2a0   <<<-- The second upid in the list

crash> upid 0x3edea2a0
struct upid {
  nr = 44,                 <<<--- Our missing pid=44 (kmcheck)
  ns = 0x81d8f8, 
  pid_chain = {
    next = 0x0, 
    pprev = 0x3f5fb7f8
  }
}

crash> eval 0x3edea2a0 - 48
hexadecimal: 3edea270  

crash> pid 3edea270
struct pid {
  count = {
    counter = 5
  }, 
  level = 0, 
  tasks = {{
      first = 0x3e799908   <<<--- Pointer to our task_struct.pids
    }, {
      first = 0x0
    }, {
      first = 0x0
    }}, 
  rcu = {
    next = 0x5a5a5a5a5a5a5a5a, 
    func = 0x5a5a5a5a5a5a5a5a
  }, 
  numbers = {{
      nr = 44, 
      ns = 0x81d8f8, 
      pid_chain = {
        next = 0x0, 
        pprev = 0x3f5fb7f8
      }
    }}
}

crash> task_struct.pids
struct task_struct {
   [712] struct pid_link pids[3];
}

crash> eval 0x3e799908 - 712
hexadecimal: 3e799640  

crash> task_struct 3e799640 | grep comm
  comm = "kmcheck\000\000\000\000\000\000\000\000", <<<--- here it is
---
 task.c |    4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

--- a/task.c
+++ b/task.c
@@ -2006,7 +2006,7 @@ do_chained:
                 }
 
 		if (pid_tasks_0 == 0)
-			continue;
+			goto chain_next;
 
 		next = pid_tasks_0 - OFFSET(task_struct_pids);
 
@@ -2042,7 +2042,7 @@ do_chained:
 		}
 
 		cnt++;
-
+chain_next:
 		if (pnext) {
 			kpp = pnext;
 			upid = pnext - OFFSET(upid_pid_chain);

Hi Dave,

I got an s390x dump of a Linux 2.6.36 system, where a task (kmcheck, pid=44) is
missing in the ps output. I debugged the problem and I think that I found the
reason:

It looks like that crash does not walk the linked list of the pid hash table
to the end, if it finds a NULL pointer in the pid.tasks[PIDTYPE_PID=0]
array. Unfortunately, for the struct pid that is before our lost task in the
linked list this condition is true. Therefore crash does not find our task.

The attached patch seems to fix this problem.

Here my crash debug log with the 2.6.36 dump:
---------------------------------------------
Task "kmcheck" is in hash slot 2941 in the linked list at position 2:

crash> print pid_hash[2941]
$4 = {
  first = 0x3f5fb7f8
}

crash> upid
struct upid {
    int nr;
    struct pid_namespace *ns;
    struct hlist_node pid_chain;
}
SIZE: 32

crash> upid.pid_chain
struct upid {
  [16] struct hlist_node pid_chain;
}

crash> eval 0x3f5fb7f8 - 16
hexadecimal: 3f5fb7e8  

crash> upid 3f5fb7e8   <<<<---- the first upid in the list
struct upid {
  nr = 565, 
  ns = 0x81d8f8, 
  pid_chain = {
    next = 0x3edea2b0, 
    pprev = 0x96554e8
  }
}

crash> pid
struct pid {
    atomic_t count;
    unsigned int level;
    struct hlist_head tasks[3];
    struct rcu_head rcu;
    struct upid numbers[1];
}
SIZE: 80

crash> pid.numbers
struct pid {
  [48] struct upid numbers[1];
}

crash> eval 3f5fb7e8 - 48
hexadecimal: 3f5fb7b8  

crash> pid 3f5fb7b8
struct pid {
  count = {
    counter = 1
  }, 
  level = 0, 
  tasks = {{
      first = 0x0 <<<----------- tasks[0] is NULL
    }, {
      first = 0x3d488620
    }, {
      first = 0x0
    }}, 
  rcu = {
    next = 0x5a5a5a5a5a5a5a5a, 
    func = 0x5a5a5a5a5a5a5a5a
  }, 
  numbers = {{
      nr = 565, 
      ns = 0x81d8f8, 
      pid_chain = {
        next = 0x3edea2b0,  <<<--------- Pointer to second element in list
        pprev = 0x96554e8
      }
    }}
}

crash> eval 0x3edea2b0 - 16
hexadecimal: 3edea2a0   <<<-- The second upid in the list

crash> upid 0x3edea2a0
struct upid {
  nr = 44,                 <<<--- Our missing pid=44
  ns = 0x81d8f8, 
  pid_chain = {
    next = 0x0, 
    pprev = 0x3f5fb7f8
  }
}

crash> eval 0x3edea2a0 - 48
hexadecimal: 3edea270  

crash> pid 3edea270
struct pid {
  count = {
    counter = 5
  }, 
  level = 0, 
  tasks = {{
      first = 0x3e799908   <<<--- Pointer to our task_struct.pids
    }, {
      first = 0x0
    }, {
      first = 0x0
    }}, 
  rcu = {
    next = 0x5a5a5a5a5a5a5a5a, 
    func = 0x5a5a5a5a5a5a5a5a
  }, 
  numbers = {{
      nr = 44, 
      ns = 0x81d8f8, 
      pid_chain = {
        next = 0x0, 
        pprev = 0x3f5fb7f8
      }
    }}
}

crash> task_struct.pids
struct task_struct {
   [712] struct pid_link pids[3];
}

crash> eval 0x3e799908 - 712
hexadecimal: 3e799640  

crash> task_struct 3e799640 | grep comm
  comm = "kmcheck\000\000\000\000\000\000\000\000", <<<--- here it is
---
 task.c |    4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

--- a/task.c
+++ b/task.c
@@ -2006,7 +2006,7 @@ do_chained:
                 }
 
 		if (pid_tasks_0 == 0)
-			continue;
+			goto chain_next;
 
 		next = pid_tasks_0 - OFFSET(task_struct_pids);
 
@@ -2042,7 +2042,7 @@ do_chained:
 		}
 
 		cnt++;
-
+chain_next:
 		if (pnext) {
 			kpp = pnext;
 			upid = pnext - OFFSET(upid_pid_chain);
--
Crash-utility mailing list
Crash-utility@xxxxxxxxxx
https://www.redhat.com/mailman/listinfo/crash-utility

[Index of Archives]     [Fedora Development]     [Fedora Desktop]     [Fedora SELinux]     [Yosemite News]     [KDE Users]     [Fedora Tools]

 

Powered by Linux