Soft Lockup in "__udp4_lib_lookup", Maybe a GCC's bug

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi guys,

I'm using linux-3.2, yes, it's pretty old I know, and I'm going to
move on a latest stable version.

I hit a soft lockup issue in function `__udp4_lib_lookup`. And it
turns out that the soft lockup results from that it got a hlist_nulls_node
from a hash slot, but that hlist_nulls_node relates to another hash
slot, and the code will spin as the following:

```
begin:
        result = NULL;
        badness = -1;
        sk_nulls_for_each_rcu(sk, node, &hslot->head) {
                score = compute_score(sk, net, saddr, hnum, sport,
                                      daddr, dport, dif);
                if (score > badness) {
                        result = sk;
                        badness = score;
                }
        }
        /*
         * if the nulls value we got at the end of this lookup is
         * not the expected one, we must restart lookup.
         * We probably met an item that was moved to another chain.
         */
        if (get_nulls_value(node) != slot)
                goto begin;
  
```

After analyzing the disassembly, I would imagine that maybe it's
GCC's bad, it incorrectly reused the register `r8`, so that it 
won't re-access `hslot->head` when restarting `sk_nulls_for_each_rcu()`

The GCC I'm using is 4.5.1, it is also pretty old, yes, I know.
And please look at the followings (added some inline comments): 

Dump of assembler code for function __udp4_lib_lookup:
linux-3.2/net/ipv4/udp.c:
451	{
   0xffffffff8134c98f <+0>:	push   %rbp
   0xffffffff8134c990 <+1>:	mov    %rsp,%rbp
   0xffffffff8134c993 <+4>:	push   %r15
   0xffffffff8134c995 <+6>:	push   %r14
   0xffffffff8134c997 <+8>:	push   %r13
   0xffffffff8134c999 <+10>:	push   %r12
   0xffffffff8134c99b <+12>:	push   %rbx
   0xffffffff8134c99c <+13>:	sub    $0x48,%rsp
   0xffffffff8134c9a0 <+17>:	callq  0xffffffff813a2e80 <mcount>

include/linux/swab.h:
51		return ___constant_swab16(val);
   0xffffffff8134c9a5 <+22>:	rol    $0x8,%r8w

/linux-3.2/net/ipv4/udp.c:
451	{
   0xffffffff8134c9aa <+27>:	mov    0x10(%rbp),%r13

include/linux/swab.h:
51		return ___constant_swab16(val);
   0xffffffff8134c9ae <+31>:	mov    %r8w,-0x32(%rbp)

/linux-3.2/net/ipv4/udp.c:
451	{
   0xffffffff8134c9b3 <+36>:	mov    %ecx,%r15d

452		struct sock *sk, *result;
453		struct hlist_nulls_node *node;
454		unsigned short hnum = ntohs(dport);
455		unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
   0xffffffff8134c9b6 <+39>:	mov    0x10(%r13),%r8d
   0xffffffff8134c9ba <+43>:	movzwl -0x32(%rbp),%r14d

include/net/netns/hash.h:
16		return (unsigned)(((unsigned long)net) >> L1_CACHE_SHIFT);
   0xffffffff8134c9bf <+48>:	mov    %rdi,%rax

/linux-3.2/net/ipv4/udp.c:
451	{
   0xffffffff8134c9c2 <+51>:	mov    %rdi,%r12

include/net/netns/hash.h:
16		return (unsigned)(((unsigned long)net) >> L1_CACHE_SHIFT);
   0xffffffff8134c9c5 <+54>:	shr    $0x6,%rax

/linux-3.2/net/ipv4/udp.c:
451	{
   0xffffffff8134c9c9 <+58>:	mov    %esi,-0x38(%rbp)

include/linux/udp.h:
52		return (num + net_hash_mix(net)) & mask;
   0xffffffff8134c9cc <+61>:	lea    (%r14,%rax,1),%eax

/linux-3.2/net/ipv4/udp.c:
451	{
   0xffffffff8134c9d0 <+65>:	mov    %r9d,-0x3c(%rbp)

456		struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
   0xffffffff8134c9d4 <+69>:	and    %r8d,%eax

451	{
   0xffffffff8134c9d7 <+72>:	mov    %dx,-0x3e(%rbp)

456		struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
   0xffffffff8134c9db <+76>:	mov    %rax,%rbx
   0xffffffff8134c9de <+79>:	mov    %rax,-0x48(%rbp)
   0xffffffff8134c9e2 <+83>:	shl    $0x5,%rbx
   0xffffffff8134c9e6 <+87>:	add    0x0(%r13),%rbx
                                       ^~~~~~~~~~~~~~ rbx is hslot

457		int score, badness;
458	
459		rcu_read_lock();
460		if (hslot->count > 10) {
   0xffffffff8134c9ea <+91>:	mov    0x8(%rbx),%ecx
   0xffffffff8134c9ed <+94>:	cmp    $0xa,%ecx
   0xffffffff8134c9f0 <+97>:	jle    0xffffffff8134ca9e <__udp4_lib_lookup+271>

461			hash2 = udp4_portaddr_hash(net, daddr, hnum);
   0xffffffff8134c9f6 <+103>:	mov    %r14d,%edx
   0xffffffff8134c9f9 <+106>:	mov    %ecx,-0x60(%rbp)
   0xffffffff8134c9fc <+109>:	mov    %r8d,-0x58(%rbp)
   0xffffffff8134ca00 <+113>:	mov    %r15d,%esi
   0xffffffff8134ca03 <+116>:	callq  0xffffffff8134a74f <udp4_portaddr_hash>

462			slot2 = hash2 & udptable->mask;
   0xffffffff8134ca08 <+121>:	mov    -0x58(%rbp),%r8d

464			if (hslot->count < hslot2->count)
   0xffffffff8134ca0c <+125>:	mov    -0x60(%rbp),%ecx

462			slot2 = hash2 & udptable->mask;
   0xffffffff8134ca0f <+128>:	and    %r8d,%eax

463			hslot2 = &udptable->hash2[slot2];
   0xffffffff8134ca12 <+131>:	mov    %eax,%edx
   0xffffffff8134ca14 <+133>:	shl    $0x5,%rdx
   0xffffffff8134ca18 <+137>:	add    0x8(%r13),%rdx

464			if (hslot->count < hslot2->count)
   0xffffffff8134ca1c <+141>:	cmp    0x8(%rdx),%ecx
   0xffffffff8134ca1f <+144>:	jl     0xffffffff8134ca9e <__udp4_lib_lookup+271>

465				goto begin;
466	
467			result = udp4_lib_lookup2(net, saddr, sport,
   0xffffffff8134ca21 <+146>:	movzwl -0x3e(%rbp),%ecx
   0xffffffff8134ca25 <+150>:	mov    %rdx,(%rsp)
   0xffffffff8134ca29 <+154>:	mov    %ecx,-0x4c(%rbp)
   0xffffffff8134ca2c <+157>:	mov    %r12,%rdi
   0xffffffff8134ca2f <+160>:	mov    %eax,0x8(%rsp)
   0xffffffff8134ca33 <+164>:	mov    -0x3c(%rbp),%r9d
   0xffffffff8134ca37 <+168>:	mov    %r14d,%r8d
   0xffffffff8134ca3a <+171>:	mov    %r15d,%ecx
   0xffffffff8134ca3d <+174>:	mov    -0x4c(%rbp),%edx
   0xffffffff8134ca40 <+177>:	mov    -0x38(%rbp),%esi
   0xffffffff8134ca43 <+180>:	callq  0xffffffff8134c7bd <udp4_lib_lookup2>
   0xffffffff8134ca48 <+185>:	mov    %rax,%rdi

468						  daddr, hnum, dif,
469						  hslot2, slot2);
470			if (!result) {
   0xffffffff8134ca4b <+188>:	test   %rax,%rax
   0xffffffff8134ca4e <+191>:	jne    0xffffffff8134cc0f <__udp4_lib_lookup+640>

471				hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
   0xffffffff8134ca54 <+197>:	mov    %r14d,%edx
   0xffffffff8134ca57 <+200>:	xor    %esi,%esi
   0xffffffff8134ca59 <+202>:	mov    %r12,%rdi
   0xffffffff8134ca5c <+205>:	callq  0xffffffff8134a74f <udp4_portaddr_hash>

472				slot2 = hash2 & udptable->mask;
   0xffffffff8134ca61 <+210>:	and    0x10(%r13),%eax

473				hslot2 = &udptable->hash2[slot2];
   0xffffffff8134ca65 <+214>:	mov    %eax,%edx
   0xffffffff8134ca67 <+216>:	shl    $0x5,%rdx
   0xffffffff8134ca6b <+220>:	add    0x8(%r13),%rdx

474				if (hslot->count < hslot2->count)
   0xffffffff8134ca6f <+224>:	mov    0x8(%rdx),%ecx
   0xffffffff8134ca72 <+227>:	cmp    %ecx,0x8(%rbx)
   0xffffffff8134ca75 <+230>:	jl     0xffffffff8134ca9e <__udp4_lib_lookup+271>

475					goto begin;
476	
477				result = udp4_lib_lookup2(net, saddr, sport,
   0xffffffff8134ca77 <+232>:	mov    %rdx,(%rsp)
   0xffffffff8134ca7b <+236>:	mov    %r12,%rdi
   0xffffffff8134ca7e <+239>:	mov    %eax,0x8(%rsp)
   0xffffffff8134ca82 <+243>:	mov    -0x3c(%rbp),%r9d
   0xffffffff8134ca86 <+247>:	mov    %r14d,%r8d
   0xffffffff8134ca89 <+250>:	xor    %ecx,%ecx
   0xffffffff8134ca8b <+252>:	mov    -0x4c(%rbp),%edx
   0xffffffff8134ca8e <+255>:	mov    -0x38(%rbp),%esi
   0xffffffff8134ca91 <+258>:	callq  0xffffffff8134c7bd <udp4_lib_lookup2>
   0xffffffff8134ca96 <+263>:	mov    %rax,%rdi
   0xffffffff8134ca99 <+266>:	jmpq   0xffffffff8134cc0f <__udp4_lib_lookup+640>
   0xffffffff8134ca9e <+271>:	mov    -0x32(%rbp),%r13w

487		sk_nulls_for_each_rcu(sk, node, &hslot->head) {
   0xffffffff8134caa3 <+276>:	mov    (%rbx),%r8  <==
                                       ^~~~~~~~~~ Here! hslot->head assigns to r8!

   0xffffffff8134caa6 <+279>:	jmpq   0xffffffff8134cb2c <__udp4_lib_lookup+413>

333		if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
   0xffffffff8134caab <+284>:	cmp    %r13w,-0x30(%rcx)
   0xffffffff8134cab0 <+289>:	jne    0xffffffff8134cb27 <__udp4_lib_lookup+408>

334				!ipv6_only_sock(sk)) {
   0xffffffff8134cab2 <+291>:	mov    0xc(%rsi),%eax

333		if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
   0xffffffff8134cab5 <+294>:	cmp    $0xa,%ax
   0xffffffff8134cab9 <+298>:	jne    0xffffffff8134cac9 <__udp4_lib_lookup+314>

334				!ipv6_only_sock(sk)) {
   0xffffffff8134cabb <+300>:	mov    0x270(%rsi),%r9
   0xffffffff8134cac2 <+307>:	testb  $0x10,0x6a(%r9)
   0xffffffff8134cac7 <+312>:	jne    0xffffffff8134cb27 <__udp4_lib_lookup+408>

335			struct inet_sock *inet = inet_sk(sk);
336	
337			score = (sk->sk_family == PF_INET ? 1 : 0);
   0xffffffff8134cac9 <+314>:	cmp    $0x2,%ax

338			if (inet->inet_rcv_saddr) {
   0xffffffff8134cacd <+318>:	mov    0x4(%rsi),%r9d

337			score = (sk->sk_family == PF_INET ? 1 : 0);
   0xffffffff8134cad1 <+322>:	sete   %al

338			if (inet->inet_rcv_saddr) {
   0xffffffff8134cad4 <+325>:	test   %r9d,%r9d

337			score = (sk->sk_family == PF_INET ? 1 : 0);
   0xffffffff8134cad7 <+328>:	movzbl %al,%eax

338			if (inet->inet_rcv_saddr) {
   0xffffffff8134cada <+331>:	je     0xffffffff8134cae4 <__udp4_lib_lookup+341>

339				if (inet->inet_rcv_saddr != daddr)
   0xffffffff8134cadc <+333>:	cmp    %r15d,%r9d
   0xffffffff8134cadf <+336>:	jne    0xffffffff8134cb27 <__udp4_lib_lookup+408>

340					return -1;
341				score += 2;
   0xffffffff8134cae1 <+338>:	add    $0x2,%eax

342			}
343			if (inet->inet_daddr) {
   0xffffffff8134cae4 <+341>:	mov    (%rsi),%r9d
   0xffffffff8134cae7 <+344>:	test   %r9d,%r9d
   0xffffffff8134caea <+347>:	je     0xffffffff8134caf5 <__udp4_lib_lookup+358>

344				if (inet->inet_daddr != saddr)
   0xffffffff8134caec <+349>:	cmp    -0x38(%rbp),%r9d
   0xffffffff8134caf0 <+353>:	jne    0xffffffff8134cb27 <__udp4_lib_lookup+408>

345					return -1;
346				score += 2;
   0xffffffff8134caf2 <+355>:	add    $0x2,%eax

347			}
348			if (inet->inet_dport) {
   0xffffffff8134caf5 <+358>:	mov    0x278(%rsi),%r9d
   0xffffffff8134cafc <+365>:	test   %r9w,%r9w
   0xffffffff8134cb00 <+369>:	je     0xffffffff8134cb0c <__udp4_lib_lookup+381>

349				if (inet->inet_dport != sport)
   0xffffffff8134cb02 <+371>:	cmp    -0x3e(%rbp),%r9w
   0xffffffff8134cb07 <+376>:	jne    0xffffffff8134cb27 <__udp4_lib_lookup+408>

351				score += 2;
   0xffffffff8134cb09 <+378>:	add    $0x2,%eax

352			}
353			if (sk->sk_bound_dev_if) {
   0xffffffff8134cb0c <+381>:	mov    0x10(%rsi),%r9d
   0xffffffff8134cb10 <+385>:	test   %r9d,%r9d
   0xffffffff8134cb13 <+388>:	je     0xffffffff8134cb1e <__udp4_lib_lookup+399>

354				if (sk->sk_bound_dev_if != dif)
   0xffffffff8134cb15 <+390>:	cmp    -0x3c(%rbp),%r9d
   0xffffffff8134cb19 <+394>:	jne    0xffffffff8134cb27 <__udp4_lib_lookup+408>

355					return -1;
356				score += 2;
   0xffffffff8134cb1b <+396>:	add    $0x2,%eax

488			score = compute_score(sk, net, saddr, hnum, sport,
489					      daddr, dport, dif);
490			if (score > badness) {
   0xffffffff8134cb1e <+399>:	cmp    %edx,%eax
   0xffffffff8134cb20 <+401>:	jle    0xffffffff8134cb27 <__udp4_lib_lookup+408>
   0xffffffff8134cb22 <+403>:	mov    %eax,%edx

491				result = sk;
   0xffffffff8134cb24 <+405>:	mov    %rsi,%rdi

487		sk_nulls_for_each_rcu(sk, node, &hslot->head) {
   0xffffffff8134cb27 <+408>:	mov    (%rcx),%rcx
   0xffffffff8134cb2a <+411>:	jmp    0xffffffff8134cb34 <__udp4_lib_lookup+421>
   0xffffffff8134cb2c <+413>:	mov    %r8,%rcx  <== 
                                       ^~~~~~~~ Here, the value of r8 assigns to rcx in every loop,
                                                it means that the original hslot->head is cached in r8.
                                                It should re-access (%rbx) to get the value of hslot->head again, is that right?

486		badness = -1;
   0xffffffff8134cb2f <+416>:	or     $0xffffffff,%edx

485		result = NULL;
   0xffffffff8134cb32 <+419>:	xor    %edi,%edi

487		sk_nulls_for_each_rcu(sk, node, &hslot->head) {
   0xffffffff8134cb34 <+421>:	test   $0x1,%cl
   0xffffffff8134cb37 <+424>:	jne    0xffffffff8134cb48 <__udp4_lib_lookup+441>
   0xffffffff8134cb39 <+426>:	lea    -0x38(%rcx),%rsi

333		if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
   0xffffffff8134cb3d <+430>:	cmp    %r12,-0x8(%rcx)
   0xffffffff8134cb41 <+434>:	jne    0xffffffff8134cb27 <__udp4_lib_lookup+408>
   0xffffffff8134cb43 <+436>:	jmpq   0xffffffff8134caab <__udp4_lib_lookup+284>

include/linux/list_nulls.h:
46		return ((unsigned long)ptr) >> 1;
   0xffffffff8134cb48 <+441>:	shr    %rcx

/linux-3.2/net/ipv4/udp.c:
500		if (get_nulls_value(node) != slot)
   0xffffffff8134cb4b <+444>:	cmp    -0x48(%rbp),%rcx
   0xffffffff8134cb4f <+448>:	jne    0xffffffff8134cb2c <__udp4_lib_lookup+413> => goto +413, but it isn't equal to "goto begin". 

501			goto begin;
502	
503		if (result) {
   0xffffffff8134cb51 <+450>:	test   %rdi,%rdi
   0xffffffff8134cb54 <+453>:	je     0xffffffff8134cc0f <__udp4_lib_lookup+640>
   0xffffffff8134cb5a <+459>:	mov    $0x2,%ecx
   0xffffffff8134cb5f <+464>:	jmp    0xffffffff8134cb63 <__udp4_lib_lookup+468>

include/linux/atomic.h:
55		} while (c);
   0xffffffff8134cb61 <+466>:	mov    %eax,%ecx

51			val = atomic_cmpxchg(v, c, c + 1);
   0xffffffff8134cb63 <+468>:	lea    0x1(%rcx),%esi

/linux-3.2/arch/x86/include/asm/atomic.h:
211		return cmpxchg(&v->counter, old, new);
   0xffffffff8134cb66 <+471>:	mov    %ecx,%eax
   0xffffffff8134cb68 <+473>:	lock cmpxchg %esi,0x4c(%rdi)

include/linux/atomic.h:
52			if (val == c)
   0xffffffff8134cb6d <+478>:	cmp    %ecx,%eax
   0xffffffff8134cb6f <+480>:	je     0xffffffff8134cc21 <__udp4_lib_lookup+658>

53				return 1;
54			c = val;
55		} while (c);
   0xffffffff8134cb75 <+486>:	test   %eax,%eax
   0xffffffff8134cb77 <+488>:	jne    0xffffffff8134cb61 <__udp4_lib_lookup+466>

/linux-3.2/net/ipv4/udp.c:
505				result = NULL;
   0xffffffff8134cb79 <+490>:	xor    %edi,%edi
   0xffffffff8134cb7b <+492>:	jmpq   0xffffffff8134cc0f <__udp4_lib_lookup+640>

331		int score = -1;
   0xffffffff8134cb80 <+497>:	or     $0xffffffff,%eax

332	
333		if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
   0xffffffff8134cb83 <+500>:	cmp    %r13w,0x8(%rdi)
   0xffffffff8134cb88 <+505>:	jne    0xffffffff8134cbf2 <__udp4_lib_lookup+611>

334				!ipv6_only_sock(sk)) {
   0xffffffff8134cb8a <+507>:	mov    0xc(%rdi),%ecx

333		if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
   0xffffffff8134cb8d <+510>:	cmp    $0xa,%cx
   0xffffffff8134cb91 <+514>:	jne    0xffffffff8134cba0 <__udp4_lib_lookup+529>

334				!ipv6_only_sock(sk)) {
   0xffffffff8134cb93 <+516>:	mov    0x270(%rdi),%rsi
   0xffffffff8134cb9a <+523>:	testb  $0x10,0x6a(%rsi)
   0xffffffff8134cb9e <+527>:	jne    0xffffffff8134cbf2 <__udp4_lib_lookup+611>

335			struct inet_sock *inet = inet_sk(sk);
336	
337			score = (sk->sk_family == PF_INET ? 1 : 0);
   0xffffffff8134cba0 <+529>:	xor    %eax,%eax
   0xffffffff8134cba2 <+531>:	cmp    $0x2,%cx

338			if (inet->inet_rcv_saddr) {
   0xffffffff8134cba6 <+535>:	mov    0x4(%rdi),%ecx

337			score = (sk->sk_family == PF_INET ? 1 : 0);
   0xffffffff8134cba9 <+538>:	sete   %al

338			if (inet->inet_rcv_saddr) {
   0xffffffff8134cbac <+541>:	test   %ecx,%ecx
   0xffffffff8134cbae <+543>:	je     0xffffffff8134cbb8 <__udp4_lib_lookup+553>

339				if (inet->inet_rcv_saddr != daddr)
   0xffffffff8134cbb0 <+545>:	cmp    %r15d,%ecx
   0xffffffff8134cbb3 <+548>:	jne    0xffffffff8134cbef <__udp4_lib_lookup+608>

340					return -1;
341				score += 2;
   0xffffffff8134cbb5 <+550>:	add    $0x2,%eax

342			}
343			if (inet->inet_daddr) {
   0xffffffff8134cbb8 <+553>:	mov    (%rdi),%ecx
   0xffffffff8134cbba <+555>:	test   %ecx,%ecx
   0xffffffff8134cbbc <+557>:	je     0xffffffff8134cbc6 <__udp4_lib_lookup+567>

344				if (inet->inet_daddr != saddr)
   0xffffffff8134cbbe <+559>:	cmp    -0x38(%rbp),%ecx
   0xffffffff8134cbc1 <+562>:	jne    0xffffffff8134cbef <__udp4_lib_lookup+608>

345					return -1;
346				score += 2;
   0xffffffff8134cbc3 <+564>:	add    $0x2,%eax

347			}
348			if (inet->inet_dport) {
   0xffffffff8134cbc6 <+567>:	mov    0x278(%rdi),%ecx
   0xffffffff8134cbcc <+573>:	test   %cx,%cx
   0xffffffff8134cbcf <+576>:	je     0xffffffff8134cbda <__udp4_lib_lookup+587>

349				if (inet->inet_dport != sport)
   0xffffffff8134cbd1 <+578>:	cmp    -0x3e(%rbp),%cx
   0xffffffff8134cbd5 <+582>:	jne    0xffffffff8134cbef <__udp4_lib_lookup+608>

351				score += 2;
   0xffffffff8134cbd7 <+584>:	add    $0x2,%eax

352			}
353			if (sk->sk_bound_dev_if) {
   0xffffffff8134cbda <+587>:	mov    0x10(%rdi),%ecx
   0xffffffff8134cbdd <+590>:	test   %ecx,%ecx
   0xffffffff8134cbdf <+592>:	je     0xffffffff8134cbf2 <__udp4_lib_lookup+611>

355					return -1;
356				score += 2;
   0xffffffff8134cbe1 <+594>:	lea    0x2(%rax),%esi
   0xffffffff8134cbe4 <+597>:	or     $0xffffffff,%eax
   0xffffffff8134cbe7 <+600>:	cmp    -0x3c(%rbp),%ecx
   0xffffffff8134cbea <+603>:	cmove  %esi,%eax
   0xffffffff8134cbed <+606>:	jmp    0xffffffff8134cbf2 <__udp4_lib_lookup+611>

350					return -1;
   0xffffffff8134cbef <+608>:	or     $0xffffffff,%eax

506			else if (unlikely(compute_score(result, net, saddr, hnum, sport,
   0xffffffff8134cbf2 <+611>:	cmp    %edx,%eax
   0xffffffff8134cbf4 <+613>:	jge    0xffffffff8134cc0f <__udp4_lib_lookup+640>

/linux-3.2/arch/x86/include/asm/atomic.h:
123		asm volatile(LOCK_PREFIX "decl %0; sete %1"
   0xffffffff8134cbf6 <+615>:	lock decl 0x4c(%rdi)
   0xffffffff8134cbfa <+619>:	sete   %al

include/net/sock.h:
1257		if (atomic_dec_and_test(&sk->sk_refcnt))
   0xffffffff8134cbfd <+622>:	test   %al,%al
   0xffffffff8134cbff <+624>:	je     0xffffffff8134caa3 <__udp4_lib_lookup+276>

1258			sk_free(sk);
   0xffffffff8134cc05 <+630>:	callq  0xffffffff812e873f <sk_free>
   0xffffffff8134cc0a <+635>:	jmpq   0xffffffff8134caa3 <__udp4_lib_lookup+276>

/linux-3.2/net/ipv4/udp.c:
514	}
   0xffffffff8134cc0f <+640>:	add    $0x48,%rsp
   0xffffffff8134cc13 <+644>:	mov    %rdi,%rax
   0xffffffff8134cc16 <+647>:	pop    %rbx
   0xffffffff8134cc17 <+648>:	pop    %r12
   0xffffffff8134cc19 <+650>:	pop    %r13
   0xffffffff8134cc1b <+652>:	pop    %r14
   0xffffffff8134cc1d <+654>:	pop    %r15
   0xffffffff8134cc1f <+656>:	leaveq 
   0xffffffff8134cc20 <+657>:	retq   

333		if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
   0xffffffff8134cc21 <+658>:	cmp    %r12,0x30(%rdi)
   0xffffffff8134cc25 <+662>:	jne    0xffffffff8134cbef <__udp4_lib_lookup+608>
   0xffffffff8134cc27 <+664>:	jmpq   0xffffffff8134cb80 <__udp4_lib_lookup+497>
End of assembler dump.


The value of r8 assigns to rcx in every loop, it means that the original 
hslot->head is cached in r8. It should re-access (%rbx) to get the value 
of hslot->head again, is that right?

I would greatly appreciate if you kindly give me some feedback.

Best regards,
Jason Cai





[Index of Archives]     [Linux Kernel]     [Kernel Development Newbies]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite Hiking]     [Linux Kernel]     [Linux SCSI]