Hello
Dave,
Here
are some "notes" i have done in using crash this summer.
Thanks.
================================================================================
PROBLEM
TO START CRASH
================================================================================
PB
to start crash with this version :
[root@fedora4
crash-4.0-2.33]# cat /proc/version
Linux
version 2.6.16.1 (root@xxxxxxxxxxxxxxxxxx) (version gcc 4.0.0 20050519
(Red Hat 4.0.0-8)) #2 SMP PREEMPT Tue Apr 11 12:38:29
CEST 2006
crash
4.0-2.33
Copyright
(C) 2002, 2003, 2004, 2005, 2006 Red Hat, Inc.
Copyright
(C) 2004, 2005, 2006 IBM Corporation
Copyright
(C) 1999-2006 Hewlett-Packard Co
Copyright
(C) 2005 Fujitsu Limited
Copyright
(C) 2005 NEC Corporation
Copyright
(C) 1999, 2002 Silicon Graphics, Inc.
Copyright
(C) 1999, 2000, 2001, 2002 Mission Critical Linux, Inc.
This
program is free software, covered by the GNU General Public License,
and
you are welcome to change it and/or distribute copies of it under
certain
conditions. Enter "help copying" to see the conditions.
This
program has absolutely no warranty. Enter "help warranty" for details.
GNU
gdb 6.1
Copyright
2004 Free Software Foundation, Inc.
GDB
is free software, covered by the GNU General Public License, and you are
welcome
to change it and/or distribute copies of it under certain conditions.
Type
"show copying" to see the conditions.
There
is absolutely no warranty for GDB. Type "show warranty" for details.
This
GDB was configured as "i686-pc-linux-gnu"...
crash:
invalid size request: 0 type: "hardirq_ctx"
Looks like it has something to do with kernels not CONFIG_4KSTACKS?:
#ifdef CONFIG_4KSTACKS
/*
* per-CPU IRQ handling contexts (thread information and stack)
*/
union irq_ctx {
struct thread_info
tinfo;
u32
stack[THREAD_SIZE/sizeof(u32)];
};
static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly;
static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly;
#endif
=================================================================
MAY
BE A PROBLEM WITH STRUCT AND ARRAYS
=================================================================
May
be a problem with struct ?
crash>
struct kmem_cache_s f7ffdc80
struct
kmem_cache_s {
array = {0xf7ffb980, 0xc1ddf500, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
batchcount = 12,
limit = 24,
lists = {
slabs_partial = {
next = 0xf7bdd000,
prev = 0xf701a000
},
slabs_full = {
next = 0xc1f3a000,
prev = 0xf7e8c000
},
[...]
name = 0xc0322190 "task_struct",
next = {
next = 0xf7ffde78,
prev = 0xf7ffdc78
}
}
crash>
struct kmem_cache_s.array f7ffdc80 <--- OTHER FIELDS ARE COMMING WITH
ARRAY
array = {0xf7ffb980, 0xc1ddf500, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
batchcount = 12,
limit = 24,
lists = {
slabs_partial = {
next = 0xf7828000,
prev = 0xf7bdd000
},
slabs_full = {
next = 0xc1eb3000,
prev = 0xf7e8c000
},
slabs_free = {
next = 0xf7ffdd18,
prev = 0xf7ffdd18
},
free_objects = 13,
free_touched = 0,
next_reap = 11377353,
shared = 0xf7ff5000
},
crash>
It's in symbols.c: parse_for_member() where the closing "}" is not
at the same indent as the start. I don't know how to fix
this off-hand
without breaking other output where there are arrays within enclosed
data structures.
=================================================================
I
THINK THERE IS A POINTER PROBLEM WITH SIG
=================================================================
It
seems there is a pointer problem with sig.
(Also,
currently, sig -l and sig -s do not support RT signals).
I
use the very simple programme (procsig.c) which block and send a signal
using tkill()
#include
<stdio.h>
#include
<stdlib.h>
#include
<signal.h>
#include
<linux/unistd.h>
#ifndef
GETTID
pid_t
gettid(void) {return syscall(__NR_gettid);}
#endif
#ifndef
TKILL
int
tkill(pid_t tid, int sig) {return syscall (__NR_tkill,tid,sig); }
#endif
void
sigusr2(int sig, siginfo_t *sp, void *uc)
{
}
void
sigcatch()
{
struct sigaction sa;
sigemptyset(&sa.sa_mask);
sa.sa_flags = SA_SIGINFO;
sa.sa_sigaction
= sigusr2;
if (sigaction(SIGUSR2, &sa, NULL)==-1) {
perror("sigaction() - SIGUSR2"); exit(1);
}
}
void
sig_block()
{
sigset_t set;
sigemptyset(&set);
sigaddset(&set, SIGUSR2);
if (sigprocmask(SIG_SETMASK, &set,NULL) == -1) {
perror("sigprocmask()"); exit(1);
}
if(tkill(gettid(), SIGUSR2)==-1){
perror("tkill()"); exit(1);
}
}
main(int
argc)
{
sig_block();
sigcatch();
pause();
}
[root@fedora4
~]# ./procsig &
[1]
4985
[root@fedora4
~]# crash -s
crash>
set 4985
PID: 4985
COMMAND:
"procsig"
TASK: f7e19020 [THREAD_INFO: f4a97000]
CPU: 0
STATE: TASK_INTERRUPTIBLE
crash>
sig >/tmp/res <----------- NETHER FINISH, I MUST KILL THE TASK
This
is what i get in the file /tmp/res
PID:
4985 TASK: f7e19020 CPU: 0 COMMAND: "procsig"
SIGPENDING:
no
SIGNAL: 0000000000000800
BLOCKED: 0000000000000800
SIGNAL_STRUCT:
c1e57980 COUNT: 1
SIG SIGACTION HANDLER MASK
FLAGS
[1] c1f3e604 SIG_DFL 0000000000000000 0
[2] c1f3e618 SIG_DFL 0000000000000000 0
[3] c1f3e62c SIG_DFL 0000000000000000 0
[...]
[60]
c1f3eaa0 SIG_DFL 0000000000000000 0
[61]
c1f3eab4 SIG_DFL 0000000000000000 0
[62]
c1f3eac8 SIG_DFL 0000000000000000 0
[63]
c1f3eadc SIG_DFL 0000000000000000 0
SIGQUEUE:
SIG SIGINFO
12 f7ac1344
0 f7e194a4
12 f7ac1344
0 f7e194a4
12 f7ac1344
0 f7e194a4
12 f7ac1344
0 f7e194a4
12 f7ac1344
0 f7e194a4
12 f7ac1344
0 f7e194a4
12 f7ac1344
0 f7e194a4
12 f7ac1344
.....
.....
I don't use that command much -- a fix would be appreciated...
=================================================================
IS
CRASH ABLE TO READ per_cpu__xxxxxx STRUCTURES ?
================================================================
Strange
results with per_cpu__xxxxxxx
May
be, i do something which is not correct. The "problem" exists only with
SMP.
crash>
sys
KERNEL: /boot/vmlinux-2.6.11-prep
DUMPFILE: /dev/crash
CPUS: 2
DATE: Fri Jul 21 16:30:51
2006
UPTIME: 00:09:11
LOAD
AVERAGE: 0.01, 0.11, 0.08
TASKS: 105
NODENAME: fedora4.domain.com
RELEASE: 2.6.11-prep
VERSION: #6 SMP Fri Jul 21 10:13:20
CEST 2006
MACHINE: i686 (2399 Mhz)
MEMORY: 1.5 GB
crash>
per_cpu__loopback_stats
per_cpu__loopback_stats
= $6 = {
rx_packets = 3435973836,
tx_packets = 3435973836,
rx_bytes = 3435973836,
tx_bytes = 3435973836,
rx_errors = 3435973836,
tx_errors = 3435973836,
rx_dropped = 3435973836,
tx_dropped = 3435973836,
multicast = 3435973836,
collisions = 3435973836,
rx_length_errors = 3435973836,
rx_over_errors = 3435973836,
rx_crc_errors = 3435973836,
rx_frame_errors = 3435973836,
rx_fifo_errors = 3435973836,
rx_missed_errors = 3435973836,
tx_aborted_errors = 3435973836,
tx_carrier_errors = 3435973836,
tx_fifo_errors = 3435973836,
tx_heartbeat_errors = 3435973836,
tx_window_errors = 3435973836,
rx_compressed = 3435973836,
tx_compressed = 3435973836
}
crash>
rd per_cpu__loopback_stats 10
c040c320:
cccccccc cccccccc cccccccc cccccccc ................
c040c330:
cccccccc cccccccc cccccccc cccccccc ................
c040c340:
cccccccc cccccccc
........
An
other one :
crash>
per_cpu__irq_stat
per_cpu__irq_stat
= $1 = {
__softirq_pending = 1954047342, /* ???? */
idle_timestamp = 1668312320,
__nmi_count = 1835364191,
apic_timer_irqs = 1702521203
}
crash>
rd per_cpu__irq_stat 4
c0407380:
7478656e 63706d00 6d656f5f 657a6973 next.mpc_oemsize
per_cpu data structures cannot be read appropriately, except in the
cases where I need to have them, like the runqueues, where I
do the offset calculations.
.
=================================================================
A
SMALL DETAIL with ps
=================================================================
In
using prothread - the very first time when i do ps procthread, i see VSZ
and RSS with 0
FIRS
TIME
crash>
ps procthread
PID
PPID CPU TASK ST
%MEM VSZ RSS COMM
4844 1 1 f6646020
IN 0.0 0
0 procthread
4845 1 0 f6646560
IN 0.0 22108 552 procthread
>
4846 1 1 f78f5560
RU 0.0 22108 552 procthread
SECOND
TIME is OK
crash>
ps procthread
PID
PPID CPU TASK ST
%MEM VSZ RSS COMM
4844 1 1 f6646020
IN 0.0 22108 552 procthread
4845 1 0 f6646560
IN 0.0 22108 552 procthread
>
4846 1 1 f78f5560
RU 0.0 22108 552 procthread
Here
is procthread.c
#include
<stdio.h>
#include
<stdlib.h>
#include
<signal.h>
#include
<assert.h>
#include
<pthread.h>
#ifndef
GETTID
#include
<linux/unistd.h>
_syscall0(pid_t,gettid);
#endif
#define
NBR_THREADS 2
pthread_t
tid[NBR_THREADS];
void
*lwp_pr(void *lwp_num)
{
int ret;
pid_t tid = gettid();
printf("PPID %d TGID %d TID %d\n",
getppid(), getpid(), tid);
while(1)
if (tid % 2)
sleep(1);
}
main(int
argc)
{
int i, ret;
printf("main PPID %d TGID %d TID %d\n",
getppid(), getpid(), gettid());
for (i=0 ; i <NBR_THREADS ; i++) {
ret = pthread_create( &tid[i],NULL, lwp_pr, NULL);
assert(ret == 0);
}
pause();
}
I don't know what the issue is there...
=================================================================
A
SMALL DETAIL WITH SYS -c
=================================================================
A
small detail with sys -c
Many
obsolete or not implemented system calls map (#define) on sys_ni_syscall.
[root@fedora4
boot]# grep sys_ni_syscall System.map-2.6.11-prep
c0136278
T sys_ni_syscall
[root@fedora4
boot]# grep c0136278 System.map-2.6.11-prep
c0136278
W compat_sys_futex
c0136278
W compat_sys_get_mempolicy
c0136278
W compat_sys_keyctl
c0136278
W compat_sys_mbind
c0136278
W compat_sys_mq_getsetattr
c0136278
W compat_sys_mq_notify
c0136278
W compat_sys_mq_open
c0136278
W compat_sys_mq_timedreceive
c0136278
W compat_sys_mq_timedsend
c0136278
W compat_sys_set_mempolicy
c0136278
W compat_sys_socketcall
c0136278
W ppc_rtas
c0136278
W sys32_ipc
c0136278
W sys32_sysctl
c0136278
W sys_get_mempolicy
c0136278
W sys_mbind
c0136278
T sys_ni_syscall
c0136278
W sys_pciconfig_iobase
c0136278
W sys_pciconfig_read
c0136278
W sys_pciconfig_write
c0136278
W sys_set_mempolicy
crash>
sys -c
NUM
SYSTEM CALL
FILE AND LINE NUMBER
0 sys_restart_syscall ../kernel/signal.c:
2037
1
sys_exit
../kernel/exit.c: 870
2
sys_fork
../arch/i386/kernel/process.c: 650
3 sys_read
../fs/read_write.c: 313
4 sys_write
../fs/read_write.c: 331
5 sys_open
../fs/open.c: 938
6 sys_close
../fs/open.c: 1018
7 sys_waitpid
../kernel/exit.c: 1533
8 sys_creat
../fs/open.c: 974
9 sys_link
../fs/namei.c: 2015
10 sys_unlink
../fs/namei.c: 1863
11 sys_execve
../arch/i386/kernel/process.c: 688
12 sys_chdir
../fs/open.c: 519
13 sys_time
../kernel/time.c: 59
14 sys_mknod
../fs/namei.c: 1620
15 sys_chmod
../fs/open.c: 635
16 sys_lchown16
../kernel/uid16.c: 26
17 sys_set_mempolicy
../kernel/sys_ni.c: 13 <--- in fact, "it is" sys_ni_syscall
crash>
sys -c mempolicy
NUM
SYSTEM CALL
FILE AND LINE NUMBER
17 sys_set_mempolicy
../kernel/sys_ni.c: 13
31 sys_set_mempolicy
../kernel/sys_ni.c: 13
32 sys_set_mempolicy
../kernel/sys_ni.c: 13
35 sys_set_mempolicy
../kernel/sys_ni.c: 13
44 sys_set_mempolicy
../kernel/sys_ni.c: 13
53 sys_set_mempolicy
../kernel/sys_ni.c: 13
56 sys_set_mempolicy
../kernel/sys_ni.c: 13
58 sys_set_mempolicy
../kernel/sys_ni.c: 13
98 sys_set_mempolicy
../kernel/sys_ni.c: 13
112
sys_set_mempolicy
../kernel/sys_ni.c: 13
127
sys_set_mempolicy
../kernel/sys_ni.c: 13
130
sys_set_mempolicy
../kernel/sys_ni.c: 13
137
sys_set_mempolicy
../kernel/sys_ni.c: 13
167
sys_set_mempolicy
../kernel/sys_ni.c: 13
188
sys_set_mempolicy
../kernel/sys_ni.c: 13
189
sys_set_mempolicy
../kernel/sys_ni.c: 13
223
sys_set_mempolicy
../kernel/sys_ni.c: 13
251
sys_set_mempolicy
../kernel/sys_ni.c: 13
273
sys_set_mempolicy
../kernel/sys_ni.c: 13
274
sys_set_mempolicy
../kernel/sys_ni.c: 13
275
sys_set_mempolicy
../kernel/sys_ni.c: 13
276
sys_set_mempolicy
../kernel/sys_ni.c: 13
283
sys_set_mempolicy
../kernel/sys_ni.c: 13
285
sys_set_mempolicy
../kernel/sys_ni.c: 13
Il
would be more clear, il we could "force" sys_ni_syscall (of course we have
the reference to sys_ni.c).
Haven't used that command in a long time. Send in a fix...
==============================================================================
Command
irq x should be extended for recent systems
==============================================================================
irq
only allowed between 0 and 15 (old PICs)
Again, I don't use it -- send in a fix if you need it...
==============================================================================
repeat
is not abortable in case of a mistake ?
==============================================================================
If
i do repeat 2 xxxxxxx (instead of repeat -2 xxxxxx), i must kill crash
Yeah, the ctrl-C entries don't seem to be making it through if scrolling
is turned off.
============================================================================================
MOUNTDOES
NOT SHOW NAMESPACE
=============================================================================================
About
mount : an option -n would be very interesting to show namespaces (when
supported)
Again, send in a patch...