how to correctly pass volatile pointer to _mm_loadu_ps?

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



hi,

i'm trying to read 128-bits of unaligned data in one atomic move using 'movups' opcode.
the protoype with _mm_loadu_ps was optimized out by the compiler.

$ gcc46 hw_reg.c -Wall -c -O2 -m64 --save-temps
hw_reg.c: In function 'read_mapped_register_128':
hw_reg.c:19:2: warning: passing argument 1 of '_mm_loadu_ps' from incompatible pointer type [enabled by default]
/opt/gcc46/lib/gcc/x86_64-unknown-linux-gnu/4.6.0/include/xmmintrin.h:904:1: note: expected 'const float *' but argument is of type 'const volatile __vector(4) float *'

i've tested another idea with intermediate volatile value:

static __m128 read_mapped_register_128( __m128 volatile const* address, ptrdiff_t index )
{
        __m128 volatile const* p = address + index;
        __m128 volatile const v = _mm_loadu_ps( p );
        return v;
}

but it generates 3 moves while one is enough:

unused_read_128_with_side_effects:
        salq    $4, %rsi
        movups  (%rdi,%rsi), %xmm0
        movaps  %xmm0, -24(%rsp)
        movaps  -24(%rsp), %xmm0
        ret

is there a nice solution in C language for such (one-move) access?

BR,
Pawel.
#include <stddef.h>
#include <xmmintrin.h>

static unsigned read_mapped_register_32( unsigned volatile const* address, ptrdiff_t index )
{
	unsigned volatile const* p = address + index;
	return *p;
}

static __m64 read_mapped_register_64( __m64 volatile const* address, ptrdiff_t index )
{
	__m64 volatile const* p = address + index;
	return *p;
}

static __m128 read_mapped_register_128( __m128 volatile const* address, ptrdiff_t index )
{
	__m128 volatile const* p = address + index;
	return _mm_loadu_ps( p );
}

void unused_read_32_with_side_effects( unsigned volatile const* address, ptrdiff_t index )
{ read_mapped_register_32( address, index ); }
void unused_read_64_with_side_effects( __m64 volatile const* address, ptrdiff_t index )
{ read_mapped_register_64( address, index );}
void unused_read_128_with_side_effects( __m128 volatile const* address, ptrdiff_t index )
{ read_mapped_register_128( address, index ); }
	.file	"hw_reg.c"
	.text
	.p2align 4,,15
	.globl	unused_read_32_with_side_effects
	.type	unused_read_32_with_side_effects, @function
unused_read_32_with_side_effects:
.LFB519:
	.cfi_startproc
	leaq	(%rdi,%rsi,4), %rax
	movl	(%rax), %eax
	ret
	.cfi_endproc
.LFE519:
	.size	unused_read_32_with_side_effects, .-unused_read_32_with_side_effects
	.p2align 4,,15
	.globl	unused_read_64_with_side_effects
	.type	unused_read_64_with_side_effects, @function
unused_read_64_with_side_effects:
.LFB520:
	.cfi_startproc
	leaq	(%rdi,%rsi,8), %rax
	movq	(%rax), %rax
	ret
	.cfi_endproc
.LFE520:
	.size	unused_read_64_with_side_effects, .-unused_read_64_with_side_effects
	.p2align 4,,15
	.globl	unused_read_128_with_side_effects
	.type	unused_read_128_with_side_effects, @function
unused_read_128_with_side_effects:
.LFB521:
	.cfi_startproc
	rep
	ret
	.cfi_endproc
.LFE521:
	.size	unused_read_128_with_side_effects, .-unused_read_128_with_side_effects
	.ident	"GCC: (GNU) 4.6.0 20110122 (experimental)"
	.section	.note.GNU-stack,"",@progbits

[Index of Archives]     [Linux C Programming]     [Linux Kernel]     [eCos]     [Fedora Development]     [Fedora Announce]     [Autoconf]     [The DWARVES Debugging Tools]     [Yosemite Campsites]     [Yosemite News]     [Linux GCC]

  Powered by Linux