Hello! I'm working on Linux kernel model and I experience crash with following OOPS message: ====================================================================== BUG: unable to handle kernel paging request at 00003336 IP: [<f8a82f9c>] :nf_conntrack:_ipfix_send_msg+0x4d/0x96 Oops: 0000 [#1] SMP Modules linked in: xt_NOTRACK nf_conntrack_netlink nfnetlink ipt_set ip_set_iphash ip_set softdog af_packe t cls_fw cls_u32 nfs sch_sfq lockd sunrpc sch_htb ipt_REDIRECT xt_MARK ipt_ULOG ipt_REJECT xt_tcpudp iptab le_mangle iptable_nat nf_nat nf_conntrack_ipv4 nf_conntrack cn iptable_filter ip_tables x_tables ip_gre 80 21q garp stp llc dummy parport_pc parport pcspkr rtc r8169 i2c_i801 i2c_core ehci_hcd uhci_hcd usbcore ext 3 jbd dm_mod skge 8139too atl1 mii sd_mod w83627ehf hwmon_vid Pid: 0, comm: swapper Tainted: G W (2.6.27.5 #1) EIP: 0060:[<f8a82f9c>] EFLAGS: 00010046 CPU: 1 EIP is at _ipfix_send_msg+0x4d/0x96 [nf_conntrack] EAX: ee5d4a00 EBX: 0000056e ECX: 00000000 EDX: 00000001 ESI: f8a90968 EDI: f8a90ed6 EBP: 0000332e ESP: f786fc2c DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 Process swapper (pid: 0, ti=f786e000 task=f783e700 task.ti=f786e000) Stack: 00000246 00000000 f6a18d5e f8a90ed6 00000013 f8a831b2 0000005d 00000130 0000000d f8a8397f f786fc74 00000003 f786fcac f786fc80 f786fcc0 00000013 f6a79bc0 f786fc90 00000000 efaff45c e4213790 0000000d 00000000 f8a83f6b Call Trace: [<f8a831b2>] ipfix_export_flush+0x6b/0x9e [nf_conntrack] [<f8a8397f>] ipfix_export_array+0xa9/0x240 [nf_conntrack] [<f8a83f6b>] __ct_nf_bi_export+0x19e/0x1a8 [nf_conntrack] [<f8a7f2f2>] __nf_ct_refresh_acct+0x202/0x28b [nf_conntrack] [<f8a81ebc>] tcp_packet+0x4d9/0x4e8 [nf_conntrack] [<f8a7e469>] __nf_conntrack_find+0xd7/0xf7 [nf_conntrack] [<f8a7ef97>] nf_conntrack_in+0x23b/0x2cb [nf_conntrack] [<c024437d>] dev_queue_xmit+0x249/0x252 [<c0259b51>] nf_iterate+0x40/0x60 [<c025e8ed>] ip_rcv_finish+0x0/0x27f [<c0259bb3>] nf_hook_slow+0x42/0xa2 [<c025e8ed>] ip_rcv_finish+0x0/0x27f BUG: unable to handle kernel paging request at 00003336 IP: [<f8a82f9c>] :nf_conntrack:_ipfix_send_msg+0x4d/0x96 Oops: 0000 [#1] SMP Modules linked in: xt_NOTRACK nf_conntrack_netlink nfnetlink ipt_set ip_set_iphash ip_set softdog af_packe t cls_fw cls_u32 nfs sch_sfq lockd sunrpc sch_htb ipt_REDIRECT xt_MARK ipt_ULOG ipt_REJECT xt_tcpudp iptab le_mangle iptable_nat nf_nat nf_conntrack_ipv4 nf_conntrack cn iptable_filter ip_tables x_tables ip_gre 80 21q garp stp llc dummy parport_pc parport pcspkr rtc r8169 i2c_i801 i2c_core ehci_hcd uhci_hcd usbcore ext 3 jbd dm_mod skge 8139too atl1 mii sd_mod w83627ehf hwmon_vid Pid: 0, comm: swapper Tainted: G W (2.6.27.5 #1) EIP: 0060:[<f8a82f9c>] EFLAGS: 00010046 CPU: 1 EIP is at _ipfix_send_msg+0x4d/0x96 [nf_conntrack] EAX: ee5d4a00 EBX: 0000056e ECX: 00000000 EDX: 00000001 ESI: f8a90968 EDI: f8a90ed6 EBP: 0000332e ESP: f786fc2c DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 Process swapper (pid: 0, ti=f786e000 task=f783e700 task.ti=f786e000) Stack: 00000246 00000000 f6a18d5e f8a90ed6 00000013 f8a831b2 0000005d 00000130 0000000d f8a8397f f786fc74 00000003 f786fcac f786fc80 f786fcc0 00000013 f6a79bc0 f786fc90 00000000 efaff45c e4213790 0000000d 00000000 f8a83f6b Call Trace: [<f8a831b2>] ipfix_export_flush+0x6b/0x9e [nf_conntrack] [<f8a8397f>] ipfix_export_array+0xa9/0x240 [nf_conntrack] [<f8a83f6b>] __ct_nf_bi_export+0x19e/0x1a8 [nf_conntrack] [<f8a7f2f2>] __nf_ct_refresh_acct+0x202/0x28b [nf_conntrack] [<f8a81ebc>] tcp_packet+0x4d9/0x4e8 [nf_conntrack] [<f8a7e469>] __nf_conntrack_find+0xd7/0xf7 [nf_conntrack] [<f8a7ef97>] nf_conntrack_in+0x23b/0x2cb [nf_conntrack] [<c024437d>] dev_queue_xmit+0x249/0x252 [<c0259b51>] nf_iterate+0x40/0x60 [<c025e8ed>] ip_rcv_finish+0x0/0x27f [<c0259bb3>] nf_hook_slow+0x42/0xa2 [<c025e8ed>] ip_rcv_finish+0x0/0x27f [<c025ed59>] ip_rcv+0x1ed/0x224 [<c025e8ed>] ip_rcv_finish+0x0/0x27f [<c02449c4>] netif_receive_skb+0x32a/0x34c [<f885d34d>] rtl8169_rx_interrupt+0x2eb/0x3a7 [r8169] [<f885d561>] rtl8169_poll+0x23/0x89 [r8169] [<c0244ba2>] net_rx_action+0x99/0x1ad [<c011f7d9>] __do_softirq+0x63/0xc1 [<c011f868>] do_softirq+0x31/0x35 [<c0104c7e>] do_IRQ+0x52/0x62 [<c010351b>] common_interrupt+0x23/0x28 [<c0108171>] mwait_idle+0x2b/0x30 [<c010155d>] cpu_idle+0x92/0xaa ======================= Code: 72 a8 f8 e8 27 92 69 c7 59 b8 01 00 00 00 eb 5e 8d 73 08 a1 44 09 a9 f8 8b 5b 04 9c 8f 04 24 fa 64 8 b 15 04 80 3d c0 8b 6c 90 20 <8b> 55 08 01 da 3b 50 04 76 0b 89 da 89 e8 e8 5e 90 6b c7 89 c3 EIP: [<f8a82f9c>] _ipfix_send_msg+0x4d/0x96 [nf_conntrack] SS:ESP 0068:f786fc2c Kernel panic - not syncing: Fatal exception in interrupt ====================================================================== My module uses Relay to pass data to user-space. I dissasembled that part of code: ====================================================================== 0000066f <_ipfix_send_msg>: 66f: 55 push %ebp 670: 57 push %edi 671: 56 push %esi 672: 53 push %ebx 673: 53 push %ebx 674: 89 c3 mov %eax,%ebx 676: 83 3d dc 0b 00 00 00 cmpl $0x0,0xbdc 67d: 74 22 je 6a1 <_ipfix_send_msg+0x32> 67f: 31 c0 xor %eax,%eax 681: ba 60 06 00 00 mov $0x660,%edx 686: e8 fc ff ff ff call 687 <_ipfix_send_msg+0x18> 68b: 85 c0 test %eax,%eax 68d: 7e 12 jle 6a1 <_ipfix_send_msg+0x32> 68f: 68 da 11 00 00 push $0x11da 694: e8 fc ff ff ff call 695 <_ipfix_send_msg+0x26> 699: 59 pop %ecx 69a: b8 01 00 00 00 mov $0x1,%eax 69f: eb 5e jmp 6ff <_ipfix_send_msg+0x90> 6a1: 8d 73 08 lea 0x8(%ebx),%esi 6a4: a1 24 00 00 00 mov 0x24,%eax 6a9: 8b 5b 04 mov 0x4(%ebx),%ebx 6ac: 9c pushf 6ad: 8f 04 24 popl (%esp) 6b0: fa cli 6b1: 64 8b 15 00 00 00 00 mov %fs:0x0,%edx 6b8: 8b 6c 90 20 mov 0x20(%eax,%edx,4),%ebp 6bc: 8b 55 08 mov 0x8(%ebp),%edx 6bf: 01 da add %ebx,%edx 6c1: 3b 50 04 cmp 0x4(%eax),%edx 6c4: 76 0b jbe 6d1 <_ipfix_send_msg+0x62> ====================================================================== No debug symbols here but panic happens at 6bc line. Then I compiled with debug symbols and disassembled again. ====================================================================== c02601ef <_ipfix_send_msg>: int _ipfix_send_msg (struct ipfix_iobuf *buf) { c02601ef: 55 push %ebp c02601f0: 57 push %edi c02601f1: 56 push %esi c02601f2: 53 push %ebx c02601f3: 53 push %ebx c02601f4: 89 c3 mov %eax,%ebx if (if_msg.offset) { c02601f6: 83 3d 9c 40 42 c0 00 cmpl $0x0,0xc042409c c02601fd: 74 22 je c0260221 <_ipfix_send_msg+0x32> if (_ipfix_send_message(0, &if_msg) > 0) { c02601ff: 31 c0 xor %eax,%eax c0260201: ba 20 3b 42 c0 mov $0xc0423b20,%edx c0260206: e8 03 ff ff ff call c026010e <_ipfix_send_message> c026020b: 85 c0 test %eax,%eax c026020d: 7e 12 jle c0260221 <_ipfix_send_msg+0x32> printk("_ipfix_send_msg: _ipfix_send_message() (sending templates failed)\n"); c026020f: 68 2a d2 31 c0 push $0xc031d22a c0260214: e8 8f bf eb ff call c011c1a8 <printk> return 1; c0260219: 59 pop %ecx c026021a: b8 01 00 00 00 mov $0x1,%eax c026021f: eb 5e jmp c026027f <_ipfix_send_msg+0x90> */ static inline void relay_write(struct rchan *chan, const void *data, size_t length) { c0260221: 8d 73 08 lea 0x8(%ebx),%esi c0260224: a1 e4 34 42 c0 mov 0xc04234e4,%eax c0260229: 8b 5b 04 mov 0x4(%ebx),%ebx static inline unsigned long native_save_fl(void) { unsigned long flags; asm volatile("# __raw_save_flags\n\t" c026022c: 9c pushf c026022d: 8f 04 24 popl (%esp) "pushf ; pop %0" : "=g" (flags) : /* no input */ : "memory"); return flags; } static inline void native_restore_fl(unsigned long flags) { asm volatile("push %0 ; popf" : /* no output */ :"g" (flags) :"memory", "cc"); } static inline void native_irq_disable(void) { asm volatile("cli": : :"memory"); c0260230: fa cli unsigned long flags; struct rchan_buf *buf; local_irq_save(flags); buf = chan->buf[smp_processor_id()]; c0260231: 64 8b 15 04 60 3e c0 mov %fs:0xc03e6004,%edx c0260238: 8b 6c 90 20 mov 0x20(%eax,%edx,4),%ebp if (unlikely(buf->offset + length > chan->subbuf_size)) c026023c: 8b 55 08 mov 0x8(%ebp),%edx c026023f: 01 da add %ebx,%edx c0260241: 3b 50 04 cmp 0x4(%eax),%edx c0260244: 76 0b jbe c0260251 <_ipfix_send_msg+0x62> length = relay_switch_subbuf(buf, length); c0260246: 89 da mov %ebx,%edx c0260248: 89 e8 mov %ebp,%eax c026024a: e8 4e be ed ff call c013c09d <relay_switch_subbuf> c026024f: 89 c3 mov %eax,%ebx c0260251: 8b 7d 04 mov 0x4(%ebp),%edi static __always_inline void *__memcpy(void *to, const void *from, size_t n) { int d0, d1, d2; asm volatile("rep ; movsl\n\t" c0260254: 89 d9 mov %ebx,%ecx c0260256: c1 e9 02 shr $0x2,%ecx c0260259: 03 7d 08 add 0x8(%ebp),%edi c026025c: f3 a5 repz movsl %ds:(%esi),%es:(%edi) c026025e: 89 d9 mov %ebx,%ecx c0260260: 83 e1 03 and $0x3,%ecx c0260263: 74 02 je c0260267 <_ipfix_send_msg+0x78> c0260265: f3 a4 repz movsb %ds:(%esi),%es:(%edi) buf = chan->buf[smp_processor_id()]; if (unlikely(buf->offset + length > chan->subbuf_size)) length = relay_switch_subbuf(buf, length); memcpy(buf->data + buf->offset, data, length); buf->offset += length; c0260267: 01 5d 08 add %ebx,0x8(%ebp) } static inline void native_restore_fl(unsigned long flags) { asm volatile("push %0 ; popf" c026026a: ff 34 24 pushl (%esp) c026026d: 9d popf } } relay_write(nf_pool.rchan, buf->buf, buf->buflen); if_lastaccess = jiffies_to_msecs(jiffies); c026026e: a1 00 bb 35 c0 mov 0xc035bb00,%eax c0260273: e8 2c ef eb ff call c011f1a4 <jiffies_to_msecs> c0260278: a3 04 3b 42 c0 mov %eax,0xc0423b04 return 0; c026027d: 31 c0 xor %eax,%eax } c026027f: 5a pop %edx c0260280: 5b pop %ebx c0260281: 5e pop %esi c0260282: 5f pop %edi c0260283: 5d pop %ebp c0260284: c3 ret ====================================================================== Problem happens at c026023c line: if (unlikely(buf->offset + length > chan->subbuf_size)) c026023c: 8b 55 08 mov 0x8(%ebp),%edx c026023f: 01 da add %ebx,%edx c0260241: 3b 50 04 cmp 0x4(%eax),%edx c0260244: 76 0b jbe c0260251 <_ipfix_send_msg+0x62> Can anyone tell me what is problem here? 'length' is broken or could it be that this is Relay bug? Also it seems that problem only happens at SMP systems. No crashes at UP. Thanks in advance, -- Alexey. -- To unsubscribe from this list: send an email with "unsubscribe kernelnewbies" to ecartis@xxxxxxxxxxxx Please read the FAQ at http://kernelnewbies.org/FAQ