* Ingo Molnar <mingo@xxxxxxx> wrote: > 100.000000 total > ................ > 2.118525 skb_release_head_state hits (total: 211852) ......... ffffffff8048938e: 967 <skb_release_head_state>: ffffffff8048938e: 967 53 push %rbx ffffffff8048938f: 3975 48 89 fb mov %rdi,%rbx ffffffff80489392: 17 48 8b 7f 28 mov 0x28(%rdi),%rdi ffffffff80489396: 0 e8 9c 93 00 00 callq ffffffff80492737 <dst_release> ffffffff8048939b: 6 48 8b 7b 30 mov 0x30(%rbx),%rdi ffffffff8048939f: 2887 48 85 ff test %rdi,%rdi ffffffff804893a2: 859 74 0f je ffffffff804893b3 <skb_release_head_state+0x25> ffffffff804893a4: 0 f0 ff 0f lock decl (%rdi) ffffffff804893a7: 0 0f 94 c0 sete %al ffffffff804893aa: 0 84 c0 test %al,%al ffffffff804893ac: 0 74 05 je ffffffff804893b3 <skb_release_head_state+0x25> ffffffff804893ae: 0 e8 7a 14 06 00 callq ffffffff804ea82d <__secpath_destroy> ffffffff804893b3: 16 48 83 bb 80 00 00 00 cmpq $0x0,0x80(%rbx) ffffffff804893ba: 0 00 ffffffff804893bb: 4294 74 31 je ffffffff804893ee <skb_release_head_state+0x60> ffffffff804893bd: 0 65 48 8b 04 25 10 00 mov %gs:0x10,%rax ffffffff804893c4: 0 00 00 ffffffff804893c6: 6540 48 63 80 48 e0 ff ff movslq -0x1fb8(%rax),%rax ffffffff804893cd: 14 a9 00 00 ff 0f test $0xfff0000,%eax ffffffff804893d2: 471 74 11 je ffffffff804893e5 <skb_release_head_state+0x57> ffffffff804893d4: 0 be 89 01 00 00 mov $0x189,%esi ffffffff804893d9: 0 48 c7 c7 cc b1 6a 80 mov $0xffffffff806ab1cc,%rdi ffffffff804893e0: 0 e8 d0 cd da ff callq ffffffff802361b5 <warn_on_slowpath> ffffffff804893e5: 0 48 89 df mov %rbx,%rdi ffffffff804893e8: 1733 ff 93 80 00 00 00 callq *0x80(%rbx) ffffffff804893ee: 888 48 8b bb 88 00 00 00 mov 0x88(%rbx),%rdi ffffffff804893f5: 3959 48 85 ff test %rdi,%rdi ffffffff804893f8: 0 74 0f je ffffffff80489409 <skb_release_head_state+0x7b> ffffffff804893fa: 0 f0 ff 0f lock decl (%rdi) ffffffff804893fd: 0 0f 94 c0 sete %al ffffffff80489400: 0 84 c0 test %al,%al ffffffff80489402: 0 74 05 je ffffffff80489409 <skb_release_head_state+0x7b> ffffffff80489404: 0 e8 48 f2 01 00 callq ffffffff804a8651 <nf_conntrack_destroy> ffffffff80489409: 0 48 8b bb 90 00 00 00 mov 0x90(%rbx),%rdi ffffffff80489410: 3132 48 85 ff test %rdi,%rdi ffffffff80489413: 1 74 05 je ffffffff8048941a <skb_release_head_state+0x8c> ffffffff80489415: 0 e8 d7 f7 ff ff callq ffffffff80488bf1 <kfree_skb> ffffffff8048941a: 958 48 8b bb 98 00 00 00 mov 0x98(%rbx),%rdi ffffffff80489421: 1999 48 85 ff test %rdi,%rdi ffffffff80489424: 0 74 0f je ffffffff80489435 <skb_release_head_state+0xa7> ffffffff80489426: 0 f0 ff 0f lock decl (%rdi) ffffffff80489429: 0 0f 94 c0 sete %al ffffffff8048942c: 0 84 c0 test %al,%al ffffffff8048942e: 0 74 05 je ffffffff80489435 <skb_release_head_state+0xa7> ffffffff80489430: 0 e8 a7 5f e0 ff callq ffffffff8028f3dc <kfree> ffffffff80489435: 0 66 c7 83 a6 00 00 00 movw $0x0,0xa6(%rbx) ffffffff8048943c: 0 00 00 ffffffff8048943e: 6503 66 c7 83 a8 00 00 00 movw $0x0,0xa8(%rbx) ffffffff80489445: 0 00 00 ffffffff80489447: 174101 5b pop %rbx ffffffff80489448: 0 c3 retq this function _really_ hurts from a 16-bit op: ffffffff8048943e: 6503 66 c7 83 a8 00 00 00 movw $0x0,0xa8(%rbx) ffffffff80489445: 0 00 00 ffffffff80489447: 174101 5b pop %rbx (gdb) list *0xffffffff8048943e 0xffffffff8048943e is in skb_release_head_state (net/core/skbuff.c:407). 402 #endif 403 /* XXX: IS this still necessary? - JHS */ 404 #ifdef CONFIG_NET_SCHED 405 skb->tc_index = 0; 406 #ifdef CONFIG_NET_CLS_ACT 407 skb->tc_verd = 0; 408 #endif 409 #endif 410 } 411 dirtying skb->tc_verd. I do have: CONFIG_NET_CLS_ACT=y BUT, on a second look, i dont think it's really this 16-bit op that hurts us. The wider context is: ffffffff80489426: 0 f0 ff 0f lock decl (%rdi) ffffffff80489429: 0 0f 94 c0 sete %al ffffffff8048942c: 0 84 c0 test %al,%al ffffffff8048942e: 0 74 05 je ffffffff80489435 <skb_release_head_state+0xa7> ffffffff80489430: 0 e8 a7 5f e0 ff callq ffffffff8028f3dc <kfree> ffffffff80489435: 0 66 c7 83 a6 00 00 00 movw $0x0,0xa6(%rbx) ffffffff8048943c: 0 00 00 ffffffff8048943e: 6503 66 c7 83 a8 00 00 00 movw $0x0,0xa8(%rbx) ffffffff80489445: 0 00 00 ffffffff80489447: 174101 5b pop %rbx ffffffff80489448: 0 c3 retq look how we jump over the callq most of the time - so what we are seeing here i believe is the cost of the atomic op at ffffffff80489426. That comes from: (gdb) list *0xffffffff8048942e 0xffffffff8048942e is in skb_release_head_state (include/linux/skbuff.h:1783). 1778 } 1779 #endif 1780 #ifdef CONFIG_BRIDGE_NETFILTER 1781 static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge) 1782 { 1783 if (nf_bridge && atomic_dec_and_test(&nf_bridge->use)) 1784 kfree(nf_bridge); 1785 } 1786 static inline void nf_bridge_get(struct nf_bridge_info *nf_bridge) 1787 { and ouch does that global dec on &nf_bridge->use hurt! i do have: CONFIG_BRIDGE_NETFILTER=y (this is a Fedora distro kernel derived .config) Ingo -- To unsubscribe from this list: send the line "unsubscribe kernel-testers" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html