> Actually the mmx support isn't about whether the compiler supports it > or not. With inline asm, the MMX-enabled code is always included in > alsa-lib as well as the code without MMX, then which one to be > executed is dynamically switched at runtime by probing the CPU > capability. Yes and the problem is that if MMX technology is enabled, the compiler could technically choose to store some data in these registers; data that will be overwritten by the chunk. The role of the clobber list is to inform the compiler that these registers are used and modified. If the MMX technology is not enabled, the compiler will not use them anyway, so there is no need to put them in clobbers. However (this is the beauty of x86...), they will clobber x87 floating point registers so, I would say: if HAVE_MMX -> "mm0" , "mm1" else -> "st" , "st(1)" , "st(2)" , "st(3)" , "st(4)" , "st(5)" , "st(6)" , "st(7)" Regards, Frédéric Recoules ----- Mail original ----- De: "tiwai" <tiwai@xxxxxxx> À: "FRÉDÉRIC RECOULES" <frederic.recoules@xxxxxxxxxxxxxxxxxxxxxx> Cc: "alsa-devel" <alsa-devel@xxxxxxxxxxxxxxxx> Envoyé: Lundi 27 Avril 2020 17:12:28 Objet: Re: [PATCH] [inline assembly] fix pcm_dmix_i386.h assembly chunk interfaces Please don't drop Cc to ML. And about the comments: On Mon, 27 Apr 2020 16:35:32 +0200, FRÉDÉRIC RECOULES wrote: > > I wrongly assumed that the option -mmmx was passed when compiling (re) > mix_areas_16_mmx. > I know how to fix it (inspired of ffmpeg): > - the configure test if mmx is supported by the compiler option and set a > macro HAVE_MMX accordingly (maybe something already exist?). Actually the mmx support isn't about whether the compiler supports it or not. With inline asm, the MMX-enabled code is always included in alsa-lib as well as the code without MMX, then which one to be executed is dynamically switched at runtime by probing the CPU capability. thanks, Takashi > - declare a macro MMX_CLOBBERS(list) that will output the list when HAVE_MMX > is true and nothing otherwise. > I will resubmit a patch soon. > > Regards, > Frédéric Recoules > > ------------------------------------------------------------------------------ > De: "tiwai" <tiwai@xxxxxxx> > À: "frederic recoules" <frederic.recoules@xxxxxxxxxxxxxxxxxxxxxx> > Cc: "alsa-devel" <alsa-devel@xxxxxxxxxxxxxxxx>, "frederic recoules" > <frederic.recoules@xxxxxxxxx> > Envoyé: Lundi 27 Avril 2020 15:47:02 > Objet: Re: [PATCH] [inline assembly] fix pcm_dmix_i386.h assembly chunk > interfaces > > On Mon, 27 Apr 2020 09:36:04 +0200, > frederic.recoules@xxxxxxxxxxxxxxxxxxxxxx wrote: > > > > From: Frédéric Recoules <frederic.recoules@xxxxxxxxx> > > > > Main changes are: > > - move 'size' and 'old_ebx' to the output list since they are clobbered > > - add the "memory" keyword since input pointers are dereferenced > > - add mmx registers in the clobber list and add an initialization for mm1 > > - add ebx in clobbers via a set of macro when GCC is newer than 5.0 > > (it will work for other compilers or non-PIC mode too) > > > > Minor changes are: > > - keep consistent the token numbering in the template > > - remove the manual save/restore ebx when it is in the clobber list > > - allows 'dst_step', 'src_step' and 'sum_step' to be given by immediates > > - allows 'size' to be given by register (e.g. ebp) > > - add "cc" keyword since the eflag register is clobbered > > > > Signed-off-by: Frédéric Recoules <frederic.recoules@xxxxxxxxx> > > When I apply this and build for i386 with gcc9, I got the following > error: > pcm_dmix_i386.h: In function 'mix_areas_16_mmx': > pcm_dmix_i386.h:180:2: error: unknown register name 'mm1' in 'asm' > 180 | __asm__ __volatile__ ( > | ^~~~~~~ > pcm_dmix_i386.h:180:2: error: unknown register name 'mm0' in 'asm' > In file included from pcm_dmix_i386.c:31, > from pcm_dmix.c:144: > pcm_dmix_i386.h: In function 'remix_areas_16_mmx': > pcm_dmix_i386.h:180:2: error: unknown register name 'mm1' in 'asm' > 180 | __asm__ __volatile__ ( > | ^~~~~~~ > .... > > Could you check those errors? > > thanks, > > Takashi > > > --- > > src/pcm/pcm_dmix_i386.h | 168 ++++++++++++++++++++++------------------ > > 1 file changed, 93 insertions(+), 75 deletions(-) > > > > diff --git a/src/pcm/pcm_dmix_i386.h b/src/pcm/pcm_dmix_i386.h > > index 2778cb1d..af2f4630 100644 > > --- a/src/pcm/pcm_dmix_i386.h > > +++ b/src/pcm/pcm_dmix_i386.h > > @@ -26,6 +26,13 @@ > > * > > */ > > > > +#define COMMA , > > +#if __GNUC__ < 5 && defined(__PIC__) > > +# define GCC_PIC_SWITCH(before,after) before > > +#else > > +# define GCC_PIC_SWITCH(before,after) after > > +#endif > > + > > /* > > * for plain i386 > > */ > > @@ -47,13 +54,14 @@ static void MIX_AREAS_16(unsigned int size, > > __asm__ __volatile__ ( > > "\n" > > > > - "\tmovl %%ebx, %7\n" /* ebx is GOT pointer (-fPIC) * > / > > + /* ebx is GOT pointer (-fPIC) */ > > + GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",) > > /* > > * initialization, load ESI, EDI, EBX registers > > */ > > - "\tmovl %1, %%edi\n" > > - "\tmovl %2, %%esi\n" > > - "\tmovl %3, %%ebx\n" > > + "\tmovl %2, %%edi\n" > > + "\tmovl %3, %%esi\n" > > + "\tmovl %4, %%ebx\n" > > "\tcmpl $0, %0\n" > > "\tjnz 2f\n" > > "\tjmp 7f\n" > > @@ -64,9 +72,9 @@ static void MIX_AREAS_16(unsigned int size, > > */ > > "\t.p2align 4,,15\n" > > "1:" > > - "\tadd %4, %%edi\n" > > - "\tadd %5, %%esi\n" > > - "\tadd %6, %%ebx\n" > > + "\tadd %5, %%edi\n" > > + "\tadd %6, %%esi\n" > > + "\tadd %7, %%ebx\n" > > > > /* > > * sample = *src; > > @@ -138,15 +146,16 @@ static void MIX_AREAS_16(unsigned int size, > > "\tjnz 4b\n" > > "\tdecl %0\n" > > "\tjnz 1b\n" > > - > > - "7:" > > - "\tmovl %7, %%ebx\n" /* ebx is GOT pointer (-fPIC) * > / > > > > - : /* no output regs */ > > - : "m" (size), "m" (dst), "m" (src), > > - "m" (sum), "m" (dst_step), "m" (src_step), > > - "m" (sum_step), "m" (old_ebx) > > - : "esi", "edi", "edx", "ecx", "eax" > > + "7:" > > + /* ebx is GOT pointer (-fPIC) */ > > + GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",) > > + > > + : "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx) > > + : "m" (dst), "m" (src), "m" (sum), > > + "im" (dst_step), "im" (src_step), "im" (sum_step) > > + : "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA) > "eax", > > + "memory", "cc" > > ); > > } > > > > @@ -171,22 +180,24 @@ static void MIX_AREAS_16_MMX(unsigned int size, > > __asm__ __volatile__ ( > > "\n" > > > > - "\tmovl %%ebx, %7\n" /* ebx is GOT pointer (-fPIC) * > / > > + /* ebx is GOT pointer (-fPIC) */ > > + GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",) > > /* > > - * initialization, load ESI, EDI, EBX registers > > + * initialization, load ESI, EDI, EBX registers, clear MM1 > > */ > > - "\tmovl %1, %%edi\n" > > - "\tmovl %2, %%esi\n" > > - "\tmovl %3, %%ebx\n" > > + "\tpxor %%mm1, %%mm1\n" > > + "\tmovl %2, %%edi\n" > > + "\tmovl %3, %%esi\n" > > + "\tmovl %4, %%ebx\n" > > "\tcmpl $0, %0\n" > > "\tjnz 2f\n" > > "\tjmp 5f\n" > > > > "\t.p2align 4,,15\n" > > "1:" > > - "\tadd %4, %%edi\n" > > - "\tadd %5, %%esi\n" > > - "\tadd %6, %%ebx\n" > > + "\tadd %5, %%edi\n" > > + "\tadd %6, %%esi\n" > > + "\tadd %7, %%ebx\n" > > > > "2:" > > /* > > @@ -230,13 +241,14 @@ static void MIX_AREAS_16_MMX(unsigned int size, > > "\tjnz 1b\n" > > "\temms\n" > > "5:" > > - "\tmovl %7, %%ebx\n" /* ebx is GOT pointer (-fPIC) * > / > > - > > - : /* no output regs */ > > - : "m" (size), "m" (dst), "m" (src), > > - "m" (sum), "m" (dst_step), "m" (src_step), > > - "m" (sum_step), "m" (old_ebx) > > - : "esi", "edi", "edx", "ecx", "eax" > > + /* ebx is GOT pointer (-fPIC) */ > > + GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",) > > + > > + : "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx) > > + : "m" (dst), "m" (src), "m" (sum), > > + "im" (dst_step), "im" (src_step), "im" (sum_step) > > + : "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA) > "eax", > > + "mm0", "mm1", "memory", "cc" > > ); > > } > > > > @@ -261,13 +273,14 @@ static void MIX_AREAS_32(unsigned int size, > > __asm__ __volatile__ ( > > "\n" > > > > - "\tmovl %%ebx, %7\n" /* ebx is GOT pointer (-fPIC) * > / > > + /* ebx is GOT pointer (-fPIC) */ > > + GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",) > > /* > > * initialization, load ESI, EDI, EBX registers > > */ > > - "\tmovl %1, %%edi\n" > > - "\tmovl %2, %%esi\n" > > - "\tmovl %3, %%ebx\n" > > + "\tmovl %2, %%edi\n" > > + "\tmovl %3, %%esi\n" > > + "\tmovl %4, %%ebx\n" > > "\tcmpl $0, %0\n" > > "\tjnz 1f\n" > > "\tjmp 6f\n" > > @@ -337,19 +350,20 @@ static void MIX_AREAS_32(unsigned int size, > > */ > > "\tdecl %0\n" > > "\tjz 6f\n" > > - "\tadd %4, %%edi\n" > > - "\tadd %5, %%esi\n" > > - "\tadd %6, %%ebx\n" > > + "\tadd %5, %%edi\n" > > + "\tadd %6, %%esi\n" > > + "\tadd %7, %%ebx\n" > > "\tjmp 1b\n" > > - > > - "6:" > > - "\tmovl %7, %%ebx\n" /* ebx is GOT pointer (-fPIC) * > / > > > > - : /* no output regs */ > > - : "m" (size), "m" (dst), "m" (src), > > - "m" (sum), "m" (dst_step), "m" (src_step), > > - "m" (sum_step), "m" (old_ebx) > > - : "esi", "edi", "edx", "ecx", "eax" > > + "6:" > > + /* ebx is GOT pointer (-fPIC) */ > > + GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",) > > + > > + : "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx) > > + : "m" (dst), "m" (src), "m" (sum), > > + "im" (dst_step), "im" (src_step), "im" (sum_step) > > + : "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA) > "eax", > > + "memory", "cc" > > ); > > } > > > > @@ -374,13 +388,14 @@ static void MIX_AREAS_24(unsigned int size, > > __asm__ __volatile__ ( > > "\n" > > > > - "\tmovl %%ebx, %7\n" /* ebx is GOT pointer (-fPIC) * > / > > + /* ebx is GOT pointer (-fPIC) */ > > + GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",) > > /* > > * initialization, load ESI, EDI, EBX registers > > */ > > - "\tmovl %1, %%edi\n" > > - "\tmovl %2, %%esi\n" > > - "\tmovl %3, %%ebx\n" > > + "\tmovl %2, %%edi\n" > > + "\tmovl %3, %%esi\n" > > + "\tmovl %4, %%ebx\n" > > "\tcmpl $0, %0\n" > > "\tjnz 1f\n" > > "\tjmp 6f\n" > > @@ -443,19 +458,20 @@ static void MIX_AREAS_24(unsigned int size, > > */ > > "\tdecl %0\n" > > "\tjz 6f\n" > > - "\tadd %4, %%edi\n" > > - "\tadd %5, %%esi\n" > > - "\tadd %6, %%ebx\n" > > + "\tadd %5, %%edi\n" > > + "\tadd %6, %%esi\n" > > + "\tadd %7, %%ebx\n" > > "\tjmp 1b\n" > > - > > - "6:" > > - "\tmovl %7, %%ebx\n" /* ebx is GOT pointer (-fPIC) * > / > > > > - : /* no output regs */ > > - : "m" (size), "m" (dst), "m" (src), > > - "m" (sum), "m" (dst_step), "m" (src_step), > > - "m" (sum_step), "m" (old_ebx) > > - : "esi", "edi", "edx", "ecx", "eax" > > + "6:" > > + /* ebx is GOT pointer (-fPIC) */ > > + GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",) > > + > > + : "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx) > > + : "m" (dst), "m" (src), "m" (sum), > > + "im" (dst_step), "im" (src_step), "im" (sum_step) > > + : "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA) > "eax", > > + "memory", "cc" > > ); > > } > > > > @@ -480,13 +496,14 @@ static void MIX_AREAS_24_CMOV(unsigned int size, > > __asm__ __volatile__ ( > > "\n" > > > > - "\tmovl %%ebx, %7\n" /* ebx is GOT pointer (-fPIC) * > / > > + /* ebx is GOT pointer (-fPIC) */ > > + GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",) > > /* > > * initialization, load ESI, EDI, EBX registers > > */ > > - "\tmovl %1, %%edi\n" > > - "\tmovl %2, %%esi\n" > > - "\tmovl %3, %%ebx\n" > > + "\tmovl %2, %%edi\n" > > + "\tmovl %3, %%esi\n" > > + "\tmovl %4, %%ebx\n" > > "\tcmpl $0, %0\n" > > "\tjz 6f\n" > > > > @@ -541,19 +558,20 @@ static void MIX_AREAS_24_CMOV(unsigned int size, > > /* > > * while (size-- > 0) > > */ > > - "\tadd %4, %%edi\n" > > - "\tadd %5, %%esi\n" > > - "\tadd %6, %%ebx\n" > > + "\tadd %5, %%edi\n" > > + "\tadd %6, %%esi\n" > > + "\tadd %7, %%ebx\n" > > "\tdecl %0\n" > > "\tjnz 1b\n" > > - > > - "6:" > > - "\tmovl %7, %%ebx\n" /* ebx is GOT pointer (-fPIC) * > / > > > > - : /* no output regs */ > > - : "m" (size), "m" (dst), "m" (src), > > - "m" (sum), "m" (dst_step), "m" (src_step), > > - "m" (sum_step), "m" (old_ebx) > > - : "esi", "edi", "edx", "ecx", "eax" > > + "6:" > > + /* ebx is GOT pointer (-fPIC) */ > > + GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",) > > + > > + : "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx) > > + : "m" (dst), "m" (src), "m" (sum), > > + "im" (dst_step), "im" (src_step), "im" (sum_step) > > + : "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA) > "eax", > > + "memory", "cc" > > ); > > } > > -- > > 2.17.1 > > > >