Re: test_and_set_bit implementation

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Matthew Wilcox wrote:

The original code and your rewrite both access memory twice in the loop.
Why don't we do it with one memory reference per loop instead?

{
	CMPXCHG_BUGCHECK_DECL

	u32 *m = (u32 *)addr + (nr >> 5);
	u32 bit = 1 << (nr & 31);

	u32 old = *m;
	while (!(old & bit)) {
		u32 new = old | bit;
		u32 prev = cmpxchg_acq(m, old, new);
		CMPXCHG_BUGCHECK(m);
		if (prev == old)
			return 1;
		old = prev;
	}

	return 0;
}

Looking at the disassembly of grab_block() in fs/ext2/balloc.c, I don't
see much difference.  The ld4.acq turns into a regular ld4 (because
'm' is no longer tagged as volatile), and is hoisted out of the loop.
Interestingly, gcc chooses to reorder the tests, and make the loop four
bundles long instead of three, but will 'goto repeat' in two bundles
instead of four.  Using likely()/unlikely() doesn't persuade gcc to
change the order of the two branches, so I assume it actually is better
to do it this way.

I like this code with the following slight modifications:
- let's keep "m" as pointer to volatile
- let's keep on using "__u32" types
- not sure we need "new"
- return the old bit

	volatile __u32 *m = (volatile __u32 *)addr + (nr >> 5);
	__u32 bit = 1 << (nr & 31);

	__u32 old = *m;
	while (!(old & bit)) {
		__u32 prev = cmpxchg_acq(m, old, old | bit);
		CMPXCHG_BUGCHECK(m);
		if (prev == old)
			return 0;
		old = prev;
	}
	return 1;

It compiles to:

  0:         and r15=31,r32
  6:         mov r14=1
  c:         extr r32=r32,5,27;;
 10:         shladd r18=r32,2,r33;;
 16:         ld4.acq r16=[r18]
 1c:         shl r17=r14,r15;;
 20:         and r14=r17,r16;;
 26:         nop.m 0x0
 2c:         cmp4.eq p7,p6=0,r14
 30:         nop.b 0x0
 36:         nop.b 0x0
 3c:   (p06) br.cond.dpnt.few a0 <+0xa0>
 40:         nop.m 0x0
 46:         zxt4 r14=r16
 4c:         nop.b 0x0;;
 50:         mov.m ar.ccv=r14;;
 56:         nop.m 0x0
 5c:         or r14=r17,r16
 60:         nop.m 0x0;;
 66:         cmpxchg4.acq r14=[r18],r14,ar.ccv
 6c:         nop.i 0x0;;
 70:         cmp4.eq p7,p6=r14,r16
 76:         nop.f 0x0
 7c:         and r15=r17,r14
 80:         mov r16=r14
 86:         nop.f 0x0
 8c:   (p07) br.cond.dpnt.few b0 <+0xb0>;;
 90:         nop.m 0x0
 96:         cmp4.eq p7,p6=0,r15
 9c:   (p07) br.cond.dptk.few 40 <+0x40>
 a0:         nop.m 0x0
 a6:         mov r8=1
 ac:         br.ret.sptk.many b0;;
 b0:         nop.m 0x0
 b6:         mov r8=r0
 bc:         br.ret.sptk.many b0;;

It seems to be o.k., thanks.

Zoltán Menyhárt


-
To unsubscribe from this list: send the line "unsubscribe linux-ia64" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel]     [Sparc Linux]     [DCCP]     [Linux ARM]     [Yosemite News]     [Linux SCSI]     [Linux x86_64]     [Linux for Ham Radio]

  Powered by Linux