I hoped to do something "clever" with a function of the form:
typedef struct { char s[64] ; } qerr_str_t ;
extern qerr_str_t
qerrst0(int err)
{
qerr_str_t st ;
snprintf(st.s, sizeof(st.s), "errno=%d", err) ;
return st ;
}
but was disappointed to find that this compiles (gcc 8.3 and others,
-O2) to this:
.LC0:
.string "errno=%d"
qerrst0:
pushq %rbx
movl %esi, %ecx
movq %rdi, %rbx
movl $.LC0, %edx
movl $64, %esi
xorl %eax, %eax
subq $64, %rsp
movq %rsp, %rdi
call snprintf
movdqa (%rsp), %xmm0
movq %rbx, %rax
movdqa 16(%rsp), %xmm1
movdqa 32(%rsp), %xmm2
movdqa 48(%rsp), %xmm3
movups %xmm0, (%rbx)
movups %xmm1, 16(%rbx)
movups %xmm2, 32(%rbx)
movups %xmm3, 48(%rbx)
addq $64, %rsp
popq %rbx
ret
On reflection, the compiler is playing safe and not writing to whatever
the "hidden" pointer %rdi is pointing at, until the implicit assignment.
So I have no right to be disappointed.
The object of the exercise is to create temporary strings for use like this:
int
main(int argc, char* argv[])
{
printf("%s: %s\n", argv[0], qerrst0(argc).s) ;
}
where the "hidden" pointer passed to qerrst0() does not, in fact, point
to anything accessible. Sadly, even when qerrst0() is inlined, I find:
.LC0:
.string "errno=%d"
.LC1:
.string "%s: %s\n"
main:
pushq %rbx
movl %edi, %ecx
movq %rsi, %rbx
movl $.LC0, %edx
movl $64, %esi
xorl %eax, %eax
addq $-128, %rsp
leaq 64(%rsp), %rdi
call snprintf
movdqa 64(%rsp), %xmm0
movq (%rbx), %rsi
xorl %eax, %eax
movdqa 80(%rsp), %xmm1
movdqa 96(%rsp), %xmm2
movq %rsp, %rdx
movl $.LC1, %edi
movdqa 112(%rsp), %xmm3
movaps %xmm0, (%rsp)
movaps %xmm1, 16(%rsp)
movaps %xmm2, 32(%rsp)
movaps %xmm3, 48(%rsp)
call printf
subq $-128, %rsp
xorl %eax, %eax
popq %rbx
ret
where there is still an (unnecessary) assignment going on !
I tried something simpler:
extern qerr_str_t
qerrst1(int err)
{
qerr_str_t st ;
st.s[0] = err ;
return st ;
}
which compiles to:
qerrst1:
movq %rdi, %rax
movb %sil, (%rdi)
ret
...so a trivial case optimises as one might hope.
As does:
extern qerr_str_t
qerrst2(int err)
{
qerr_str_t st ;
char* q = st.s ;
q[0] = err ;
q[63] = err ;
return st ;
}
qerrst2:
movq %rdi, %rax
movb %sil, (%rdi)
movb %sil, 63(%rdi)
ret
The following are also optimised:
extern qerr_str_t
qerrst3a(int err)
{
qerr_str_t st = { "" } ;
return st ;
}
extern qerr_str_t
qerrst3b(int err)
{
qerr_str_t st ;
char* q = st.s ;
memset(q, 0, sizeof(st.s)) ;
return st ;
}
to the same code:
qerrst3a/b:
pxor %xmm0, %xmm0
movq %rdi, %rax
movups %xmm0, (%rdi)
movups %xmm0, 16(%rdi)
movups %xmm0, 32(%rdi)
movups %xmm0, 48(%rdi)
ret
However, ever so slightly more complicated:
extern qerr_str_t
qerrst4(int err)
{
qerr_str_t st ;
for (int i = 0 ; i < (err & 63) ; ++i)
st.s[i] = err - i ;
return st ;
}
qerrst4:
movl %esi, %edx
movq %rdi, %rax
andl $63, %edx
je .L12
subl $1, %edx
leaq -71(%rsp,%rdx), %r8
leaq -72(%rsp), %rdx
addl %edx, %esi
.L11:
movl %esi, %ecx
subl %edx, %ecx
addq $1, %rdx
movb %cl, -1(%rdx)
cmpq %r8, %rdx
jne .L11
.L12:
movdqa -72(%rsp), %xmm0
movdqa -56(%rsp), %xmm1
movdqa -40(%rsp), %xmm2
movdqa -24(%rsp), %xmm3
movups %xmm0, (%rax)
movups %xmm1, 16(%rax)
movups %xmm2, 32(%rax)
movups %xmm3, 48(%rax)
ret
Which is a puzzle :-(
Interestingly, I also found (after a little effort):
extern qerr_str_t
qerrst5(int err, char* fred)
{
qerr_str_t st ;
st.s[ 0] = err ;
st.s[ 2] = fred[ 8] ;
st.s[ 4] = fred[ 6] ;
st.s[ 6] = fred[ 4] ;
st.s[ 8] = fred[ 2] ;
st.s[10] = fred[ 0] ;
return st ;
}
qerrst5:
movq %rdi, %rax
movzbl 8(%rdx), %r9d
movzbl 6(%rdx), %r8d
movzbl 4(%rdx), %edi
movzbl 2(%rdx), %ecx
movb %sil, (%rax) -- BUG iff %rax ==
movzbl (%rdx), %edx -- %rdx !
movb %r9b, 2(%rax)
movb %r8b, 4(%rax)
movb %dil, 6(%rax)
movb %cl, 8(%rax)
movb %dl, 10(%rax)
ret
which is very nearly correct... except as noted, if *fred points at the
final destination !!
For this to do what I had hoped (and I imagine is the majority case),
what is needed is a way to mark the declaration of 'qerr_str_t st' in
the function as a "clone" of the final destination 'qerr_str_t' in the
caller -- so that the compiler could Just Do It.
I looked for an __attribute__(()) for this... but could not find one.
Is there any way in which I can persuade the compiler that a function
returning a struct does not need to worry about preserving the value of
the final destination (ie the struct at %rdi) ?
Chris