[PATCH 2/2] endianmacros: Replace borked PA_FLOAT32_SWAP() with PA_READ_FLOAT32RE() / PA_WRITE_FLOAT32RE()

pmeerw@xxxxxxxxxx (Peter Meerwald) · Thu, 4 Sep 2014 10:01:48 +0200 (CEST)



> I was considering whether "const void *p"  should instead be a "const float
> *p" (in the function prototypes), but maybe not - a reversed float cannot be
> considered a float anymore, due to the NaN problems you describe below.
> 
> OTOH, that leaves us with a few float pointers in the callers to these
> functions, which should perhaps be converted to void* too, but then you can't
> index them...
> 
> So hey, it fixes a bug, and I assume you have run the relevant tests and they
> succeeded. So while it's slightly inconsistent to mix float* and void*, this
> one is acked too.

tested by Felipe, see
https://buildd.debian.org/status/logs.php?pkg=pulseaudio&suite=experimental

pushed

thanks, p.

> On 2014-09-03 02:25, Peter Meerwald wrote:
> > From: Peter Meerwald <pmeerw at debian>
> > 
> > building PA with -O0 leads to test failure in mix-test on i386
> > 
> > issue reported by Felipe, see
> > http://lists.freedesktop.org/archives/pulseaudio-discuss/2014-August/021406.html
> > 
> > the problem is the value 0xbeffbd7f: when byte-swapped it becomes 0x7fbdffbe
> > and according
> > to IEEE-754 represents a signalling NaN (starting with s111 1111 10, see
> > http://en.wikipedia.org/wiki/NaN)
> > 
> > when this value is assigned to a floating point register, it becomes
> > 0x7ffdffbe, representing
> > a quiet NaN (starting with s111 1111 11) -- a signalling NaN is turned into
> > a quiet NaN!
> > 
> > so PA_FLOAT32_SWAP(PA_FLOAT32_SWAP(x)) != x for certain values, uhuh!
> > 
> > the following test code can be used; due to volatile, it will always
> > demonstrate the issue;
> > without volatile, it depends on the optimization level (i386, 32-bit, gcc
> > 4.9):
> > 
> > // snip
> > 
> > static inline float PA_FLOAT32_SWAP(float x) {
> >      union {
> >          float f;
> >          uint32_t u;
> >      } t;
> > 
> >      t.f = x;
> >      t.u = bswap_32(t.u);
> >      return t.f;
> > }
> > 
> > int main() {
> >    unsigned x = 0xbeffbd7f;
> >    volatile float f = PA_FLOAT32_SWAP(*(float *)&x);
> >    printf("%08x %08x %08x %f\n", 0xbeffbd7f, *(unsigned *)&f,
> > bswap_32(*(unsigned *)&f), f);
> > }
> > // snip
> > 
> > the problem goes away with optimization when no temporary floating point
> > registers are used
> > 
> > the proposed solution is to avoid passing swapped floating point data in a
> > float; this is done with new functions PA_READ_FLOAT32RE() and
> > PA_WRITE_FLOAT32RE()
> > which use uint32_t to dereference a pointer and byte-swap the data, hence no
> > temporary
> > float variable is used
> > 
> > also delete PA_FLOAT32_TO_LE()/_BE(), not used
> > 
> > Signed-off-by: Peter Meerwald <pmeerw at pmeerw.net>
> > Reported-by: Felipe Sateler <fsateler at debian.org>
> > ---
> >   src/pulsecore/endianmacros.h | 23 ++++++++++++-----------
> >   src/pulsecore/mix.c          | 11 ++++-------
> >   src/pulsecore/sample-util.c  |  4 ++--
> >   src/pulsecore/sconv-s16le.c  | 22 ++++++++--------------
> >   src/pulsecore/svolume_c.c    |  4 ++--
> >   src/tests/mix-test.c         |  4 ++--
> >   src/tests/resampler-test.c   |  4 ++--
> >   7 files changed, 32 insertions(+), 40 deletions(-)
> > 
> > diff --git a/src/pulsecore/endianmacros.h b/src/pulsecore/endianmacros.h
> > index 4822fc4..2e73136 100644
> > --- a/src/pulsecore/endianmacros.h
> > +++ b/src/pulsecore/endianmacros.h
> > @@ -71,25 +71,32 @@ static inline void PA_WRITE24LE(uint8_t *p, uint32_t u)
> > {
> >       p[0] = (uint8_t) u;
> >   }
> > 
> > -static inline float PA_FLOAT32_SWAP(float x) {
> > +static inline float PA_READ_FLOAT32RE(const void *p) {
> >       union {
> >           float f;
> >           uint32_t u;
> >       } t;
> > 
> > -    t.f = x;
> > -    t.u = PA_UINT32_SWAP(t.u);
> > +    t.u = PA_UINT32_SWAP(*(uint32_t *) p);
> >       return t.f;
> >   }
> > 
> > +static inline void PA_WRITE_FLOAT32RE(void *p, float x) {
> > +    union {
> > +        float f;
> > +        uint32_t u;
> > +    } t;
> > +
> > +    t.f = x;
> > +    *(uint32_t *) p = PA_UINT32_SWAP(t.u);
> > +}
> > +
> >   #define PA_MAYBE_INT16_SWAP(c,x) ((c) ? PA_INT16_SWAP(x) : (x))
> >   #define PA_MAYBE_UINT16_SWAP(c,x) ((c) ? PA_UINT16_SWAP(x) : (x))
> > 
> >   #define PA_MAYBE_INT32_SWAP(c,x) ((c) ? PA_INT32_SWAP(x) : (x))
> >   #define PA_MAYBE_UINT32_SWAP(c,x) ((c) ? PA_UINT32_SWAP(x) : (x))
> > 
> > -#define PA_MAYBE_FLOAT32_SWAP(c,x) ((c) ? PA_FLOAT32_SWAP(x) : (x))
> > -
> >   #ifdef WORDS_BIGENDIAN
> >    #define PA_INT16_FROM_LE(x) PA_INT16_SWAP(x)
> >    #define PA_INT16_FROM_BE(x) ((int16_t)(x))
> > @@ -115,9 +122,6 @@ static inline float PA_FLOAT32_SWAP(float x) {
> >    #define PA_UINT32_TO_LE(x) PA_UINT32_SWAP(x)
> >    #define PA_UINT32_TO_BE(x) ((uint32_t)(x))
> > 
> > - #define PA_FLOAT32_TO_LE(x) PA_FLOAT32_SWAP(x)
> > - #define PA_FLOAT32_TO_BE(x) ((float) (x))
> > -
> >    #define PA_READ24NE(x) PA_READ24BE(x)
> >    #define PA_WRITE24NE(x,y) PA_WRITE24BE((x),(y))
> > 
> > @@ -148,9 +152,6 @@ static inline float PA_FLOAT32_SWAP(float x) {
> >    #define PA_UINT32_TO_LE(x) ((uint32_t)(x))
> >    #define PA_UINT32_TO_BE(x) PA_UINT32_SWAP(x)
> > 
> > - #define PA_FLOAT32_TO_LE(x) ((float) (x))
> > - #define PA_FLOAT32_TO_BE(x) PA_FLOAT32_SWAP(x)
> > -
> >    #define PA_READ24NE(x) PA_READ24LE(x)
> >    #define PA_WRITE24NE(x,y) PA_WRITE24LE((x),(y))
> > 
> > diff --git a/src/pulsecore/mix.c b/src/pulsecore/mix.c
> > index 10567d6..03c71f0 100644
> > --- a/src/pulsecore/mix.c
> > +++ b/src/pulsecore/mix.c
> > @@ -576,17 +576,14 @@ static void pa_mix_float32re_c(pa_mix_info streams[],
> > unsigned nstreams, unsigne
> > 
> >           for (i = 0; i < nstreams; i++) {
> >               pa_mix_info *m = streams + i;
> > -            float v, cv = m->linear[channel].f;
> > +            float cv = m->linear[channel].f;
> > 
> > -            if (PA_LIKELY(cv > 0)) {
> > -                v = PA_FLOAT32_SWAP(*(float*) m->ptr);
> > -                v *= cv;
> > -                sum += v;
> > -            }
> > +            if (PA_LIKELY(cv > 0))
> > +                sum += PA_READ_FLOAT32RE(m->ptr) * cv;
> >               m->ptr = (uint8_t*) m->ptr + sizeof(float);
> >           }
> > 
> > -        *data = PA_FLOAT32_SWAP(sum);
> > +        PA_WRITE_FLOAT32RE(data, sum);
> > 
> >           if (PA_UNLIKELY(++channel >= channels))
> >               channel = 0;
> > diff --git a/src/pulsecore/sample-util.c b/src/pulsecore/sample-util.c
> > index dc5c1fd..539f3b8 100644
> > --- a/src/pulsecore/sample-util.c
> > +++ b/src/pulsecore/sample-util.c
> > @@ -296,9 +296,9 @@ void pa_sample_clamp(pa_sample_format_t format, void
> > *dst, size_t dstr, const vo
> >           for (; n > 0; n--) {
> >               float f;
> > 
> > -            f = PA_FLOAT32_SWAP(*s);
> > +            f = PA_READ_FLOAT32RE(s);
> >               f = PA_CLAMP_UNLIKELY(f, -1.0f, 1.0f);
> > -            *d = PA_FLOAT32_SWAP(f);
> > +            PA_WRITE_FLOAT32RE(d, f);
> > 
> >               s = (const float*) ((const uint8_t*) s + sstr);
> >               d = (float*) ((uint8_t*) d + dstr);
> > diff --git a/src/pulsecore/sconv-s16le.c b/src/pulsecore/sconv-s16le.c
> > index 45cf972..0c1d16c 100644
> > --- a/src/pulsecore/sconv-s16le.c
> > +++ b/src/pulsecore/sconv-s16le.c
> > @@ -157,8 +157,7 @@ void pa_sconv_s16le_to_float32re(unsigned n, const
> > int16_t *a, float *b) {
> >       for (; n > 0; n--) {
> >           int16_t s = *(a++);
> >           float k = INT16_FROM(s) * (1.0f / (1 << 15));
> > -        k = PA_FLOAT32_SWAP(k);
> > -        *(b++) = k;
> > +        PA_WRITE_FLOAT32RE(b++, k);
> >       }
> >   }
> > 
> > @@ -169,8 +168,7 @@ void pa_sconv_s32le_to_float32re(unsigned n, const
> > int32_t *a, float *b) {
> >       for (; n > 0; n--) {
> >           int32_t s = *(a++);
> >           float k = INT32_FROM(s) * (1.0f / (1U << 31));
> > -        k = PA_FLOAT32_SWAP(k);
> > -        *(b++) = k;
> > +        PA_WRITE_FLOAT32RE(b++, k);
> >       }
> >   }
> > 
> > @@ -180,8 +178,7 @@ void pa_sconv_s16le_from_float32re(unsigned n, const
> > float *a, int16_t *b) {
> > 
> >       for (; n > 0; n--) {
> >           int16_t s;
> > -        float v = *(a++);
> > -        v = PA_FLOAT32_SWAP(v) * (1 << 15);
> > +        float v = PA_READ_FLOAT32RE(a++) * (1 << 15);
> >           s = (int16_t) PA_CLAMP_UNLIKELY(lrintf(v), -0x8000, 0x7FFF);
> >           *(b++) = INT16_TO(s);
> >       }
> > @@ -193,8 +190,7 @@ void pa_sconv_s32le_from_float32re(unsigned n, const
> > float *a, int32_t *b) {
> > 
> >       for (; n > 0; n--) {
> >           int32_t s;
> > -        float v = *(a++);
> > -        v = PA_FLOAT32_SWAP(v) * (1U << 31);
> > +        float v = PA_READ_FLOAT32RE(a++) * (1U << 31);
> >           s = (int32_t) PA_CLAMP_UNLIKELY(llrintf(v), -0x80000000LL,
> > 0x7FFFFFFFLL);
> >           *(b++) = INT32_TO(s);
> >       }
> > @@ -325,7 +321,7 @@ void pa_sconv_s24le_to_float32re(unsigned n, const
> > uint8_t *a, float *b) {
> >       for (; n > 0; n--) {
> >           int32_t s = READ24(a) << 8;
> >           float k = s * (1.0f / (1U << 31));
> > -        *b = PA_FLOAT32_SWAP(k);
> > +        PA_WRITE_FLOAT32RE(b, k);
> >           a += 3;
> >           b++;
> >       }
> > @@ -337,8 +333,7 @@ void pa_sconv_s24le_from_float32re(unsigned n, const
> > float *a, uint8_t *b) {
> > 
> >       for (; n > 0; n--) {
> >           int32_t s;
> > -        float v = *a;
> > -        v = PA_FLOAT32_SWAP(v) * (1U << 31);
> > +        float v = PA_READ_FLOAT32RE(a) * (1U << 31);
> >           s = (int32_t) PA_CLAMP_UNLIKELY(llrint(v), -0x80000000LL,
> > 0x7FFFFFFFLL);
> >           WRITE24(b, ((uint32_t) s) >> 8);
> >           a++;
> > @@ -411,7 +406,7 @@ void pa_sconv_s24_32le_to_float32re(unsigned n, const
> > uint32_t *a, float *b) {
> >       for (; n > 0; n--) {
> >           int32_t s = (int32_t) (UINT32_FROM(*a) << 8);
> >           float k = s * (1.0f / (1U << 31));
> > -        *b = PA_FLOAT32_SWAP(k);
> > +        PA_WRITE_FLOAT32RE(b, k);
> >           a++;
> >           b++;
> >       }
> > @@ -437,8 +432,7 @@ void pa_sconv_s24_32le_from_float32re(unsigned n, const
> > float *a, uint32_t *b) {
> > 
> >       for (; n > 0; n--) {
> >           int32_t s;
> > -        float v = *a;
> > -        v = PA_FLOAT32_SWAP(v) * (1U << 31);
> > +        float v = PA_READ_FLOAT32RE(a) * (1U << 31);
> >           s = (int32_t) PA_CLAMP_UNLIKELY(llrint(v), -0x80000000LL,
> > 0x7FFFFFFFLL);
> >           *b = UINT32_TO(((uint32_t) s) >> 8);
> >           a++;
> > diff --git a/src/pulsecore/svolume_c.c b/src/pulsecore/svolume_c.c
> > index ad2df18..2e2db12 100644
> > --- a/src/pulsecore/svolume_c.c
> > +++ b/src/pulsecore/svolume_c.c
> > @@ -125,9 +125,9 @@ static void pa_volume_float32re_c(float *samples, const
> > float *volumes, unsigned
> >       for (channel = 0; length; length--) {
> >           float t;
> > 
> > -        t = PA_FLOAT32_SWAP(*samples);
> > +        t = PA_READ_FLOAT32RE(samples);
> >           t *= volumes[channel];
> > -        *samples++ = PA_FLOAT32_SWAP(t);
> > +        PA_WRITE_FLOAT32RE(samples++, t);
> > 
> >           if (PA_UNLIKELY(++channel >= channels))
> >               channel = 0;
> > diff --git a/src/tests/mix-test.c b/src/tests/mix-test.c
> > index d5bae13..e95607d 100644
> > --- a/src/tests/mix-test.c
> > +++ b/src/tests/mix-test.c
> > @@ -150,7 +150,7 @@ static void compare_block(const pa_sample_spec *ss,
> > const pa_memchunk *chunk, in
> >               float *u = d;
> > 
> >               for (i = 0; i < chunk->length / pa_frame_size(ss); i++) {
> > -                float uu = PA_MAYBE_FLOAT32_SWAP(ss->format !=
> > PA_SAMPLE_FLOAT32NE, *u);
> > +                float uu = ss->format == PA_SAMPLE_FLOAT32NE ? *u :
> > PA_READ_FLOAT32RE(u);
> >                   fail_unless(fabsf(uu - *v) <= 1e-6f, NULL);
> >                   ++u;
> >                   ++v;
> > @@ -264,7 +264,7 @@ static pa_memblock* generate_block(pa_mempool *pool,
> > const pa_sample_spec *ss) {
> >               if (ss->format == PA_SAMPLE_FLOAT32RE) {
> >                   float *u = d;
> >                   for (i = 0; i < 10; i++)
> > -                    u[i] = PA_FLOAT32_SWAP(float32ne_result[0][i]);
> > +                    PA_WRITE_FLOAT32RE(&u[i], float32ne_result[0][i]);
> >               } else
> >                   memcpy(d, float32ne_result[0],
> > sizeof(float32ne_result[0]));
> > 
> > diff --git a/src/tests/resampler-test.c b/src/tests/resampler-test.c
> > index 60345af..f547770 100644
> > --- a/src/tests/resampler-test.c
> > +++ b/src/tests/resampler-test.c
> > @@ -98,7 +98,7 @@ static void dump_block(const char *label, const
> > pa_sample_spec *ss, const pa_mem
> >               float *u = d;
> > 
> >               for (i = 0; i < chunk->length / pa_frame_size(ss); i++) {
> > -                printf("%4.3g ", ss->format == PA_SAMPLE_FLOAT32NE ? *u :
> > PA_FLOAT32_SWAP(*u));
> > +                printf("%4.3g ", ss->format == PA_SAMPLE_FLOAT32NE ? *u :
> > PA_READ_FLOAT32RE(u));
> >                   u++;
> >               }
> > 
> > @@ -222,7 +222,7 @@ static pa_memblock* generate_block(pa_mempool *pool,
> > const pa_sample_spec *ss) {
> > 
> >               if (ss->format == PA_SAMPLE_FLOAT32RE)
> >                   for (i = 0; i < 10; i++)
> > -                    u[i] = PA_FLOAT32_SWAP(u[i]);
> > +                    PA_WRITE_FLOAT32RE(&u[i], u[i]);
> > 
> >               break;
> >           }
> > 
> 
> 

-- 

Peter Meerwald
+43-664-2444418 (mobile)