Hello Dave, Here are some “notes” i have done in using crash this summer. Thanks.
================================================================================ PROBLEM TO START CRASH ================================================================================
PB to start crash with this version : [root@fedora4 crash-4.0-2.33]# cat /proc/version Linux version 2.6.16.1 (root@xxxxxxxxxxxxxxxxxx) (version gcc 4.0.0 20050519 (Red Hat 4.0.0-8)) #2 SMP PREEMPT Tue Apr 11 12:38:29 CEST 2006
crash 4.0-2.33 Copyright (C) 2002, 2003, 2004, 2005, 2006 Red Hat, Inc. Copyright (C) 2004, 2005, 2006 IBM Corporation Copyright (C) 1999-2006 Hewlett-Packard Co Copyright (C) 2005 Fujitsu Limited Copyright (C) 2005 NEC Corporation Copyright (C) 1999, 2002 Silicon Graphics, Inc. Copyright (C) 1999, 2000, 2001, 2002 Mission Critical Linux, Inc. This program is free software, covered by the GNU General Public License, and you are welcome to change it and/or distribute copies of it under certain conditions. Enter "help copying" to see the conditions. This program has absolutely no warranty. Enter "help warranty" for details.
GNU gdb 6.1 Copyright 2004 Free Software Foundation, Inc. GDB is free software, covered by the GNU General Public License, and you are welcome to change it and/or distribute copies of it under certain conditions. Type "show copying" to see the conditions. There is absolutely no warranty for GDB. Type "show warranty" for details. This GDB was configured as "i686-pc-linux-gnu"...
crash: invalid size request: 0 type: "hardirq_ctx"
================================================================= MAY BE A PROBLEM WITH STRUCT AND ARRAYS ================================================================= May be a problem with struct ?
crash> struct kmem_cache_s f7ffdc80 struct kmem_cache_s { array = {0xf7ffb980, 0xc1ddf500, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, batchcount = 12, limit = 24, lists = { slabs_partial = { next = 0xf7bdd000, prev = 0xf701a000 }, slabs_full = { next = 0xc1f3a000, prev = 0xf7e8c000 }, [...] name = 0xc0322190 "task_struct", next = { next = 0xf7ffde78, prev = 0xf7ffdc78 } }
crash> struct kmem_cache_s.array f7ffdc80 <--- OTHER FIELDS ARE COMMING WITH ARRAY array = {0xf7ffb980, 0xc1ddf500, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, batchcount = 12, limit = 24, lists = { slabs_partial = { next = 0xf7828000, prev = 0xf7bdd000 }, slabs_full = { next = 0xc1eb3000, prev = 0xf7e8c000 }, slabs_free = { next = 0xf7ffdd18, prev = 0xf7ffdd18 }, free_objects = 13, free_touched = 0, next_reap = 11377353, shared = 0xf7ff5000 }, crash>
================================================================= I THINK THERE IS A POINTER PROBLEM WITH SIG ================================================================= It seems there is a pointer problem with sig. (Also, currently, sig -l and sig -s do not support RT signals).
I use the very simple programme (procsig.c) which block and send a signal using tkill()
#include <stdio.h> #include <stdlib.h> #include <signal.h> #include <linux/unistd.h>
#ifndef GETTID pid_t gettid(void) {return syscall(__NR_gettid);} #endif #ifndef TKILL int tkill(pid_t tid, int sig) {return syscall (__NR_tkill,tid,sig); } #endif
void sigusr2(int sig, siginfo_t *sp, void *uc) { }
void sigcatch() { struct sigaction sa; sigemptyset(&sa.sa_mask); sa.sa_flags = SA_SIGINFO; sa.sa_sigaction = sigusr2; if (sigaction(SIGUSR2, &sa, NULL)==-1) { perror("sigaction() - SIGUSR2"); exit(1); } } void sig_block() { sigset_t set; sigemptyset(&set); sigaddset(&set, SIGUSR2); if (sigprocmask(SIG_SETMASK, &set,NULL) == -1) { perror("sigprocmask()"); exit(1); } if(tkill(gettid(), SIGUSR2)==-1){ perror("tkill()"); exit(1); } }
main(int argc) { sig_block(); sigcatch();
pause(); }
[root@fedora4 ~]# ./procsig & [1] 4985 [root@fedora4 ~]# crash -s crash> set 4985 PID: 4985 COMMAND: "procsig" TASK: f7e19020 [THREAD_INFO: f4a97000] CPU: 0 STATE: TASK_INTERRUPTIBLE crash> sig >/tmp/res <----------- NETHER FINISH, I MUST KILL THE TASK
This is what i get in the file /tmp/res
PID: 4985 TASK: f7e19020 CPU: 0 COMMAND: "procsig" SIGPENDING: no SIGNAL: 0000000000000800 BLOCKED: 0000000000000800 SIGNAL_STRUCT: c1e57980 COUNT: 1 SIG SIGACTION HANDLER MASK FLAGS [1] c1f3e604 SIG_DFL 0000000000000000 0 [2] c1f3e618 SIG_DFL 0000000000000000 0 [3] c1f3e62c SIG_DFL 0000000000000000 0 [...] [60] c1f3eaa0 SIG_DFL 0000000000000000 0 [61] c1f3eab4 SIG_DFL 0000000000000000 0 [62] c1f3eac8 SIG_DFL 0000000000000000 0 [63] c1f3eadc SIG_DFL 0000000000000000 0 SIGQUEUE: SIG SIGINFO 12 f7ac1344 0 f7e194a4 12 f7ac1344 0 f7e194a4 12 f7ac1344 0 f7e194a4 12 f7ac1344 0 f7e194a4 12 f7ac1344 0 f7e194a4 12 f7ac1344 0 f7e194a4 12 f7ac1344 0 f7e194a4 12 f7ac1344 ..... ..... ================================================================= IS CRASH ABLE TO READ per_cpu__xxxxxx STRUCTURES ? ================================================================ Strange results with per_cpu__xxxxxxx May be, i do something which is not correct. The "problem" exists only with SMP.
crash> sys KERNEL: /boot/vmlinux-2.6.11-prep DUMPFILE: /dev/crash CPUS: 2 DATE: Fri Jul 21 16:30:51 2006 UPTIME: 00:09:11 LOAD AVERAGE: 0.01, 0.11, 0.08 TASKS: 105 NODENAME: fedora4.domain.com RELEASE: 2.6.11-prep VERSION: #6 SMP Fri Jul 21 10:13:20 CEST 2006 MACHINE: i686 (2399 Mhz) MEMORY: 1.5 GB
crash> per_cpu__loopback_stats per_cpu__loopback_stats = $6 = { rx_packets = 3435973836, tx_packets = 3435973836, rx_bytes = 3435973836, tx_bytes = 3435973836, rx_errors = 3435973836, tx_errors = 3435973836, rx_dropped = 3435973836, tx_dropped = 3435973836, multicast = 3435973836, collisions = 3435973836, rx_length_errors = 3435973836, rx_over_errors = 3435973836, rx_crc_errors = 3435973836, rx_frame_errors = 3435973836, rx_fifo_errors = 3435973836, rx_missed_errors = 3435973836, tx_aborted_errors = 3435973836, tx_carrier_errors = 3435973836, tx_fifo_errors = 3435973836, tx_heartbeat_errors = 3435973836, tx_window_errors = 3435973836, rx_compressed = 3435973836, tx_compressed = 3435973836 } crash> rd per_cpu__loopback_stats 10 c040c320: cccccccc cccccccc cccccccc cccccccc ................ c040c330: cccccccc cccccccc cccccccc cccccccc ................ c040c340: cccccccc cccccccc ........
An other one : crash> per_cpu__irq_stat per_cpu__irq_stat = $1 = { __softirq_pending = 1954047342, /* ???? */ idle_timestamp = 1668312320, __nmi_count = 1835364191, apic_timer_irqs = 1702521203 } crash> rd per_cpu__irq_stat 4 c0407380: 7478656e 63706d00 6d656f5f 657a6973 next.mpc_oemsize
================================================================= A SMALL DETAIL with ps ================================================================= In using prothread - the very first time when i do ps procthread, i see VSZ and RSS with 0
FIRS TIME crash> ps procthread PID PPID CPU TASK ST %MEM VSZ RSS COMM 4844 1 1 f6646020 IN 0.0 0 0 procthread 4845 1 0 f6646560 IN 0.0 22108 552 procthread > 4846 1 1 f78f5560 RU 0.0 22108 552 procthread
SECOND TIME is OK crash> ps procthread PID PPID CPU TASK ST %MEM VSZ RSS COMM 4844 1 1 f6646020 IN 0.0 22108 552 procthread 4845 1 0 f6646560 IN 0.0 22108 552 procthread > 4846 1 1 f78f5560 RU 0.0 22108 552 procthread
Here is procthread.c
#include <stdio.h> #include <stdlib.h> #include <signal.h> #include <assert.h> #include <pthread.h>
#ifndef GETTID #include <linux/unistd.h> _syscall0(pid_t,gettid); #endif
#define NBR_THREADS 2 pthread_t tid[NBR_THREADS];
void *lwp_pr(void *lwp_num) { int ret; pid_t tid = gettid(); printf("PPID %d TGID %d TID %d\n", getppid(), getpid(), tid); while(1) if (tid % 2) sleep(1); }
main(int argc) { int i, ret; printf("main PPID %d TGID %d TID %d\n", getppid(), getpid(), gettid()); for (i=0 ; i <NBR_THREADS ; i++) { ret = pthread_create( &tid[i],NULL, lwp_pr, NULL); assert(ret == 0); } pause(); }
================================================================= A SMALL DETAIL WITH SYS -c ================================================================= A small detail with sys -c Many obsolete or not implemented system calls map (#define) on sys_ni_syscall.
[root@fedora4 boot]# grep sys_ni_syscall System.map-2.6.11-prep c0136278 T sys_ni_syscall [root@fedora4 boot]# grep c0136278 System.map-2.6.11-prep c0136278 W compat_sys_futex c0136278 W compat_sys_get_mempolicy c0136278 W compat_sys_keyctl c0136278 W compat_sys_mbind c0136278 W compat_sys_mq_getsetattr c0136278 W compat_sys_mq_notify c0136278 W compat_sys_mq_open c0136278 W compat_sys_mq_timedreceive c0136278 W compat_sys_mq_timedsend c0136278 W compat_sys_set_mempolicy c0136278 W compat_sys_socketcall c0136278 W ppc_rtas c0136278 W sys32_ipc c0136278 W sys32_sysctl c0136278 W sys_get_mempolicy c0136278 W sys_mbind c0136278 T sys_ni_syscall c0136278 W sys_pciconfig_iobase c0136278 W sys_pciconfig_read c0136278 W sys_pciconfig_write c0136278 W sys_set_mempolicy
crash> sys -c NUM SYSTEM CALL FILE AND LINE NUMBER 0 sys_restart_syscall ../kernel/signal.c: 2037 1 sys_exit ../kernel/exit.c: 870 2 sys_fork ../arch/i386/kernel/process.c: 650 3 sys_read ../fs/read_write.c: 313 4 sys_write ../fs/read_write.c: 331 5 sys_open ../fs/open.c: 938 6 sys_close ../fs/open.c: 1018 7 sys_waitpid ../kernel/exit.c: 1533 8 sys_creat ../fs/open.c: 974 9 sys_link ../fs/namei.c: 2015 10 sys_unlink ../fs/namei.c: 1863 11 sys_execve ../arch/i386/kernel/process.c: 688 12 sys_chdir ../fs/open.c: 519 13 sys_time ../kernel/time.c: 59 14 sys_mknod ../fs/namei.c: 1620 15 sys_chmod ../fs/open.c: 635 16 sys_lchown16 ../kernel/uid16.c: 26 17 sys_set_mempolicy ../kernel/sys_ni.c: 13 <--- in fact, "it is" sys_ni_syscall
crash> sys -c mempolicy NUM SYSTEM CALL FILE AND LINE NUMBER 17 sys_set_mempolicy ../kernel/sys_ni.c: 13 31 sys_set_mempolicy ../kernel/sys_ni.c: 13 32 sys_set_mempolicy ../kernel/sys_ni.c: 13 35 sys_set_mempolicy ../kernel/sys_ni.c: 13 44 sys_set_mempolicy ../kernel/sys_ni.c: 13 53 sys_set_mempolicy ../kernel/sys_ni.c: 13 56 sys_set_mempolicy ../kernel/sys_ni.c: 13 58 sys_set_mempolicy ../kernel/sys_ni.c: 13 98 sys_set_mempolicy ../kernel/sys_ni.c: 13 112 sys_set_mempolicy ../kernel/sys_ni.c: 13 127 sys_set_mempolicy ../kernel/sys_ni.c: 13 130 sys_set_mempolicy ../kernel/sys_ni.c: 13 137 sys_set_mempolicy ../kernel/sys_ni.c: 13 167 sys_set_mempolicy ../kernel/sys_ni.c: 13 188 sys_set_mempolicy ../kernel/sys_ni.c: 13 189 sys_set_mempolicy ../kernel/sys_ni.c: 13 223 sys_set_mempolicy ../kernel/sys_ni.c: 13 251 sys_set_mempolicy ../kernel/sys_ni.c: 13 273 sys_set_mempolicy ../kernel/sys_ni.c: 13 274 sys_set_mempolicy ../kernel/sys_ni.c: 13 275 sys_set_mempolicy ../kernel/sys_ni.c: 13 276 sys_set_mempolicy ../kernel/sys_ni.c: 13 283 sys_set_mempolicy ../kernel/sys_ni.c: 13 285 sys_set_mempolicy ../kernel/sys_ni.c: 13
Il would be more clear, il we could "force" sys_ni_syscall (of course we have the reference to sys_ni.c).
============================================================================== Command irq x should be extended for recent systems ============================================================================== irq only allowed between 0 and 15 (old PICs)
============================================================================== repeat is not abortable in case of a mistake ? ============================================================================== If i do repeat 2 xxxxxxx (instead of repeat -2 xxxxxx), i must kill crash
============================================================================================ MOUNT DOES NOT SHOW NAMESPACE ============================================================================================= About mount : an option -n would be very interesting to show namespaces (when supported)
|
-- Crash-utility mailing list Crash-utility@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/crash-utility