[PATCH 10/10] tests: Add remap-special-test

pmeerw@xxxxxxxxxx (Peter Meerwald) · Fri, 29 Mar 2013 16:56:51 +0100

From: Peter Meerwald <p.meerwald@xxxxxxxxxxxxxxxxxx>

beware, lots of code duplication between test and runtime code
for illustration purpose only :)

compiled with Ubuntu/Linaro gcc 4.6.3:
arm-linux-gnueabi-gcc -O2 -mcpu=cortex-a8 -mfloat-abi=softfp -mfpu=neon

runtime on beagle-xm measured remap-special-test:
Testing s16 2-channel-to-mono remap performance with 3 sample alignment
func: 1128848 usec (avg: 11288.5, min = 11200, max = 11871, stddev = 92.7675).
orig: 2821201 usec (avg: 28212, min = 28106, max = 28595, stddev = 91.0587).
Testing s16 4-channel-to-mono remap performance with 3 sample alignment
func: 1536317 usec (avg: 15363.2, min = 15289, max = 15900, stddev = 82.5403).
orig: 5551764 usec (avg: 55517.6, min = 55359, max = 56702, stddev = 196.81).
Testing s16 mono-to-2-channel remap performance with 3 sample alignment
func: 415250 usec (avg: 4152.5, min = 4119, max = 4456, stddev = 52.7611).
orig: 1537903 usec (avg: 15379, min = 15289, max = 15869, stddev = 93.078).
Testing s16 mono-to-4-channel remap performance with 3 sample alignment
func: 710513 usec (avg: 7105.13, min = 7019, max = 7630, stddev = 86.6658).
orig: 3062413 usec (avg: 30624.1, min = 30518, max = 31128, stddev = 109.456).
Testing float 2-channel-to-mono remap performance with 3 sample alignment
func: 4049533 usec (avg: 40495.3, min = 40374, max = 41046, stddev = 128.187).
orig: 12115179 usec (avg: 121152, min = 120910, max = 122071, stddev = 178.598).
Testing float 4-channel-to-mono remap performance with 3 sample alignment
func: 7940282 usec (avg: 79402.8, min = 79254, max = 80627, stddev = 175.874).
orig: 24183727 usec (avg: 241837, min = 241577, max = 242737, stddev = 208.3).
Testing float mono-to-2-channel remap performance with 3 sample alignment
func: 677094 usec (avg: 6770.94, min = 6073, max = 7843, stddev = 624.805).
orig: 8505589 usec (avg: 85055.9, min = 84838, max = 85511, stddev = 138.815).
Testing float mono-to-4-channel remap performance with 3 sample alignment
func: 1240600 usec (avg: 12406, min = 11078, max = 15839, stddev = 1195.99).
orig: 11538670 usec (avg: 115387, min = 115203, max = 115753, stddev = 152.72).

on Core i7, 64bit:
Testing s16 2-channel-to-mono remap performance with 3 sample alignment
func: 108723 usec (avg: 1087.23, min = 939, max = 3023, stddev = 449.7).
orig: 185219 usec (avg: 1852.19, min = 1796, max = 2210, stddev = 76.6693).
Testing s16 4-channel-to-mono remap performance with 3 sample alignment
func: 130692 usec (avg: 1306.92, min = 1272, max = 1448, stddev = 42.0549).
orig: 365576 usec (avg: 3655.76, min = 3580, max = 4376, stddev = 123.333).
Testing s16 mono-to-2-channel remap performance with 3 sample alignment
func: 60965 usec (avg: 609.65, min = 581, max = 775, stddev = 34.7144).
orig: 183806 usec (avg: 1838.06, min = 1802, max = 2231, stddev = 71.464).
Testing s16 mono-to-4-channel remap performance with 3 sample alignment
func: 118915 usec (avg: 1189.15, min = 1149, max = 1395, stddev = 48.0912).
orig: 365320 usec (avg: 3653.2, min = 3598, max = 4203, stddev = 89.2894).
Testing float 2-channel-to-mono remap performance with 3 sample alignment
func: 71056 usec (avg: 710.56, min = 582, max = 1775, stddev = 322.095).
orig: 186387 usec (avg: 1863.87, min = 1805, max = 2374, stddev = 79.5336).
Testing float 4-channel-to-mono remap performance with 3 sample alignment
func: 117826 usec (avg: 1178.26, min = 1157, max = 1277, stddev = 29.9315).
orig: 358471 usec (avg: 3584.71, min = 3539, max = 4025, stddev = 87.0788).
Testing float mono-to-2-channel remap performance with 3 sample alignment
func: 61018 usec (avg: 610.18, min = 580, max = 735, stddev = 28.4445).
orig: 190701 usec (avg: 1907.01, min = 1870, max = 2101, stddev = 52.3818).
Testing float mono-to-4-channel remap performance with 3 sample alignment
func: 117890 usec (avg: 1178.9, min = 1157, max = 1297, stddev = 32.613).
orig: 380944 usec (avg: 3809.44, min = 3743, max = 4232, stddev = 88.4906).

Signed-off-by: Peter Meerwald <p.meerwald at bct-electronic.com>
---
 src/.gitignore                 |    1 +
 src/Makefile.am                |    8 +-
 src/tests/remap-special-test.c |  651 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 659 insertions(+), 1 deletion(-)
 create mode 100644 src/tests/remap-special-test.c

diff --git a/src/.gitignore b/src/.gitignore
index cd9c51a..33d8212 100644
--- a/src/.gitignore
+++ b/src/.gitignore
@@ -74,3 +74,4 @@ usergroup-test
 utf8-test
 volume-test
 mult-s16-test
+remap-special-test
diff --git a/src/Makefile.am b/src/Makefile.am
index 915c177..eab4a15 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -247,7 +247,8 @@ TESTS_default = \
 		cpu-test \
 		lock-autospawn-test \
 		mult-s16-test \
-		mix-special-test
+		mix-special-test \
+		remap-special-test
 
 TESTS_norun = \
 		ipacl-test \
@@ -518,6 +519,11 @@ mix_special_test_LDADD = $(AM_LDADD) libpulsecore- at PA_MAJORMINOR@.la libpulse.la
 mix_special_test_CFLAGS = $(AM_CFLAGS) $(LIBCHECK_CFLAGS)
 mix_special_test_LDFLAGS = $(AM_LDFLAGS) $(BINLDFLAGS) $(LIBCHECK_LIBS)
 
+remap_special_test_SOURCES = tests/remap-special-test.c
+remap_special_test_LDADD = $(AM_LDADD) libpulsecore- at PA_MAJORMINOR@.la libpulse.la libpulsecommon- at PA_MAJORMINOR@.la
+remap_special_test_CFLAGS = $(AM_CFLAGS) $(LIBCHECK_CFLAGS)
+remap_special_test_LDFLAGS = $(AM_LDFLAGS) $(BINLDFLAGS) $(LIBCHECK_LIBS)
+
 rtstutter_SOURCES = tests/rtstutter.c
 rtstutter_LDADD = $(AM_LDADD) libpulsecore- at PA_MAJORMINOR@.la libpulse.la libpulsecommon- at PA_MAJORMINOR@.la
 rtstutter_CFLAGS = $(AM_CFLAGS)
diff --git a/src/tests/remap-special-test.c b/src/tests/remap-special-test.c
new file mode 100644
index 0000000..4d7a0cf
--- /dev/null
+++ b/src/tests/remap-special-test.c
@@ -0,0 +1,651 @@
+/***
+  This file is part of PulseAudio.
+
+  PulseAudio is free software; you can redistribute it and/or modify
+  it under the terms of the GNU Lesser General Public License as published
+  by the Free Software Foundation; either version 2.1 of the License,
+  or (at your option) any later version.
+
+  PulseAudio is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with PulseAudio; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+  USA.
+***/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <check.h>
+#include <stdlib.h>
+#include <math.h>
+
+#include <pulse/rtclock.h>
+#include <pulse/volume.h>
+#include <pulsecore/random.h>
+#include <pulsecore/macro.h>
+#include <pulsecore/remap.h>
+
+#define PA_CPU_TEST_RUN_START(l, t1, t2)                        \
+{                                                               \
+    int _j, _k;                                                 \
+    int _times = (t1), _times2 = (t2);                          \
+    pa_usec_t _start, _stop;                                    \
+    pa_usec_t _min = INT_MAX, _max = 0;                         \
+    double _s1 = 0, _s2 = 0;                                    \
+    const char *_label = (l);                                   \
+                                                                \
+    for (_k = 0; _k < _times2; _k++) {                          \
+        _start = pa_rtclock_now();                              \
+        for (_j = 0; _j < _times; _j++)
+
+#define PA_CPU_TEST_RUN_STOP                                    \
+        _stop = pa_rtclock_now();                               \
+                                                                \
+        if (_min > (_stop - _start)) _min = _stop - _start;     \
+        if (_max < (_stop - _start)) _max = _stop - _start;     \
+        _s1 += _stop - _start;                                  \
+        _s2 += (_stop - _start) * (_stop - _start);             \
+    }                                                           \
+    pa_log_debug("%s: %llu usec (avg: %g, min = %llu, max = %llu, stddev = %g).", _label, \
+            (long long unsigned int)_s1,                        \
+            ((double)_s1 / _times2),                            \
+            (long long unsigned int)_min,                       \
+            (long long unsigned int)_max,                       \
+            sqrt(_times2 * _s2 - _s1 * _s1) / _times2);         \
+}
+
+static void remap_mono_to_stereo_c(pa_remap_t *m, void *dst, const void *src, unsigned n) {
+    unsigned i;
+
+    switch (*m->format) {
+        case PA_SAMPLE_FLOAT32NE:
+        {
+            float *d, *s;
+
+            d = (float *) dst;
+            s = (float *) src;
+
+            for (i = n >> 2; i; i--) {
+                d[0] = d[1] = s[0];
+                d[2] = d[3] = s[1];
+                d[4] = d[5] = s[2];
+                d[6] = d[7] = s[3];
+                s += 4;
+                d += 8;
+            }
+            for (i = n & 3; i; i--) {
+                d[0] = d[1] = s[0];
+                s++;
+                d += 2;
+            }
+            break;
+        }
+        case PA_SAMPLE_S16NE:
+        {
+            int16_t *d, *s;
+
+            d = (int16_t *) dst;
+            s = (int16_t *) src;
+
+            for (i = n >> 2; i; i--) {
+                d[0] = d[1] = s[0];
+                d[2] = d[3] = s[1];
+                d[4] = d[5] = s[2];
+                d[6] = d[7] = s[3];
+                s += 4;
+                d += 8;
+            }
+            for (i = n & 3; i; i--) {
+                d[0] = d[1] = s[0];
+                s++;
+                d += 2;
+            }
+            break;
+        }
+        default:
+            pa_assert_not_reached();
+    }
+}
+
+static void remap_mono_to_ch4_c(pa_remap_t *m, void *dst, const void *src, unsigned n) {
+    unsigned i;
+
+    switch (*m->format) {
+        case PA_SAMPLE_FLOAT32NE:
+        {
+            float *d, *s;
+
+            d = (float *) dst;
+            s = (float *) src;
+
+            for (i = n >> 2; i; i--) {
+                d[0] = d[1] = d[2] = d[3] = s[0];
+                d[4] = d[5] = d[6] = d[7] = s[1];
+                d[8] = d[9] = d[10] = d[11] = s[2];
+                d[12] = d[13] = d[14] = d[15] = s[3];
+                s += 4;
+                d += 16;
+            }
+            for (i = n & 3; i; i--) {
+                d[0] = d[1] = d[2] = d[3] = s[0];
+                s++;
+                d += 4;
+            }
+            break;
+        }
+        case PA_SAMPLE_S16NE:
+        {
+            int16_t *d, *s;
+
+            d = (int16_t *) dst;
+            s = (int16_t *) src;
+
+            for (i = n >> 2; i; i--) {
+                d[0] = d[1] = d[2] = d[3] = s[0];
+                d[4] = d[5] = d[6] = d[7] = s[1];
+                d[8] = d[9] = d[10] = d[11] = s[2];
+                d[12] = d[13] = d[14] = d[15] = s[3];
+                s += 4;
+                d += 16;
+            }
+            for (i = n & 3; i; i--) {
+                d[0] = d[1] = d[2] = d[3] = s[0];
+                s++;
+                d += 4;
+            }
+            break;
+        }
+        default:
+            pa_assert_not_reached();
+    }
+}
+
+static void remap_stereo_to_mono_c(pa_remap_t *m, void *dst, const void *src, unsigned n) {
+    unsigned i;
+
+    switch (*m->format) {
+        case PA_SAMPLE_FLOAT32NE:
+        {
+            float *d = (float *) dst, *s = (float *) src;
+
+            for (i = n >> 2; i > 0; i--) {
+                d[0] = (s[0] + s[1])*0.5f;
+                d[1] = (s[2] + s[3])*0.5f;
+                d[2] = (s[4] + s[5])*0.5f;
+                d[3] = (s[6] + s[7])*0.5f;
+                s += 8;
+                d += 4;
+            }
+            for (i = n & 3; i; i--) {
+                d[0] = (s[0] + s[1])*0.5f;
+                s += 2;
+                d += 1;
+            }
+            break;
+        }
+        case PA_SAMPLE_S16NE:
+        {
+            int16_t *d = (int16_t *) dst, *s = (int16_t *) src;
+
+            for (i = n >> 2; i > 0; i--) {
+                d[0] += (s[0] + s[1])/2;
+                d[1] += (s[2] + s[3])/2;
+                d[2] += (s[4] + s[5])/2;
+                d[3] += (s[6] + s[7])/2;
+                s += 8;
+                d += 4;
+            }
+            for (i = n & 3; i; i--) {
+                d[0] += (s[0] + s[1])/2;
+                s += 2;
+                d += 1;
+            }
+            break;
+        }
+        default:
+            pa_assert_not_reached();
+    }
+}
+
+static void remap_ch4_to_mono_c(pa_remap_t *m, void *dst, const void *src, unsigned n) {
+    unsigned i;
+
+    switch (*m->format) {
+        case PA_SAMPLE_FLOAT32NE:
+        {
+            float *d = (float *) dst, *s = (float *) src;
+
+            for (i = n >> 2; i > 0; i--) {
+                d[0] = (s[0] + s[1] + s[2] + s[3])*0.25f;
+                d[1] = (s[4] + s[5] + s[6] + s[7])*0.25f;
+                d[2] = (s[8] + s[9] + s[10] + s[11])*0.25f;
+                d[3] = (s[12] + s[13] + s[14] + s[15])*0.25f;
+                s += 16;
+                d += 4;
+            }
+            for (i = n & 3; i; i--) {
+                d[0] = (s[0] + s[1] + s[2] + s[3])*0.25f;
+                s += 4;
+                d += 1;
+            }
+            break;
+        }
+        case PA_SAMPLE_S16NE:
+        {
+            int16_t *d = (int16_t *) dst, *s = (int16_t *) src;
+            for (i = n >> 2; i > 0; i--) {
+                d[0] = (s[0] + s[1] + s[2] + s[3])/4;
+                d[1] = (s[4] + s[5] + s[6] + s[7])/4;
+                d[2] = (s[8] + s[9] + s[10] + s[11])/4;
+                d[3] = (s[12] + s[13] + s[14] + s[15])/4;
+                s += 16;
+                d += 4;
+            }
+            for (i = n & 3; i; i--) {
+                d[0] = (s[0] + s[1] + s[2] + s[3])/4;
+                s += 4;
+                d += 1;
+            }
+            break;
+        }
+        default:
+            pa_assert_not_reached();
+    }
+}
+
+static void remap_channels_matrix_c(pa_remap_t *m, void *dst, const void *src, unsigned n) {
+    unsigned oc, ic, i;
+    unsigned n_ic, n_oc;
+
+    n_ic = m->i_ss->channels;
+    n_oc = m->o_ss->channels;
+
+    switch (*m->format) {
+        case PA_SAMPLE_FLOAT32NE:
+        {
+            float *d, *s;
+
+            memset(dst, 0, n * sizeof(float) * n_oc);
+
+            for (oc = 0; oc < n_oc; oc++) {
+
+                for (ic = 0; ic < n_ic; ic++) {
+                    float vol;
+
+                    vol = m->map_table_f[oc][ic];
+
+                    if (vol <= 0.0)
+                        continue;
+
+                    d = (float *)dst + oc;
+                    s = (float *)src + ic;
+
+                    if (vol >= 1.0) {
+                        for (i = n; i > 0; i--, s += n_ic, d += n_oc)
+                            *d += *s;
+                    } else {
+                        for (i = n; i > 0; i--, s += n_ic, d += n_oc)
+                            *d += *s * vol;
+                    }
+                }
+            }
+
+            break;
+        }
+        case PA_SAMPLE_S16NE:
+        {
+            int16_t *d, *s;
+
+            memset(dst, 0, n * sizeof(int16_t) * n_oc);
+
+            for (oc = 0; oc < n_oc; oc++) {
+
+                for (ic = 0; ic < n_ic; ic++) {
+                    int32_t vol;
+
+                    vol = m->map_table_i[oc][ic];
+
+                    if (vol <= 0)
+                        continue;
+
+                    d = (int16_t *)dst + oc;
+                    s = (int16_t *)src + ic;
+
+                    if (vol >= 0x10000) {
+                        for (i = n; i > 0; i--, s += n_ic, d += n_oc)
+                            *d += *s;
+                    } else {
+                        for (i = n; i > 0; i--, s += n_ic, d += n_oc)
+                            *d += (int16_t) (((int32_t)*s * vol) >> 16);
+                    }
+                }
+            }
+            break;
+        }
+        default:
+            pa_assert_not_reached();
+    }
+}
+
+#define SAMPLES 1028
+#define TIMES 1000
+#define TIMES2 100
+
+static void run_remap_test_mono_channels_float(
+        pa_remap_t *remap,
+        pa_do_remap_func_t func, pa_do_remap_func_t orig_func,
+        int channels,
+        int align, pa_bool_t correct, pa_bool_t perf) {
+
+    PA_DECLARE_ALIGNED(8, float, out_ref[SAMPLES*8]) = { 0 };
+    PA_DECLARE_ALIGNED(8, float, out[SAMPLES*8]) = { 0 };
+    PA_DECLARE_ALIGNED(8, float, in[SAMPLES]) = { 0 };
+    float *ch, *ch_ref;
+    float *mono;
+    int i, nsamples;
+
+    assert(channels == 2 || channels == 4 || channels == 8);
+
+    /* Force sample alignment as requested */
+    ch = out + (8 - align);
+    ch_ref = out_ref + (8 - align);
+    mono = in + (8 - align);
+    nsamples = SAMPLES - (8 - align);
+
+    for (i = 0; i < nsamples; i++)
+        mono[i] = 2.1f * (rand()/(float) RAND_MAX - 0.5f);
+
+    if (correct) {
+        orig_func(remap, ch_ref, mono, nsamples);
+        func(remap, ch, mono, nsamples);
+
+        for (i = 0; i < nsamples * channels; i++) {
+            if (fabsf(ch[i] - ch_ref[i]) > 0.0001) {
+                pa_log_debug("Correctness test failed: align=%d", align);
+                pa_log_debug("%d: %.24f != %.24f (%.24f)\n", i, ch[i], ch_ref[i], mono[i]);
+                fail();
+            }
+        }
+    }
+
+    if (perf) {
+        pa_log_debug("Testing float mono-to-%d-channel remap performance with %d sample alignment", channels, align);
+
+        PA_CPU_TEST_RUN_START("func", TIMES, TIMES2) {
+            func(remap, ch, mono, nsamples);
+        } PA_CPU_TEST_RUN_STOP
+
+        PA_CPU_TEST_RUN_START("orig", TIMES, TIMES2) {
+            orig_func(remap, ch_ref, mono, nsamples);
+        } PA_CPU_TEST_RUN_STOP
+    }
+}
+
+static void run_remap_test_mono_channels_s16(
+        pa_remap_t *remap,
+        pa_do_remap_func_t func, pa_do_remap_func_t orig_func,
+        int channels,
+        int align, pa_bool_t correct, pa_bool_t perf) {
+
+    PA_DECLARE_ALIGNED(8, int16_t, out_ref[SAMPLES*8]) = { 0 };
+    PA_DECLARE_ALIGNED(8, int16_t, out[SAMPLES*8]) = { 0 };
+    PA_DECLARE_ALIGNED(8, int16_t, in[SAMPLES]) = { 0 };
+    int16_t *ch, *ch_ref;
+    int16_t *mono;
+    int i, nsamples;
+
+    assert(channels == 2 || channels == 4 || channels == 8);
+
+    /* Force sample alignment as requested */
+    ch = out + (8 - align);
+    ch_ref = out_ref + (8 - align);
+    mono = in + (8 - align);
+    nsamples = SAMPLES - (8 - align);
+
+    pa_random(mono, nsamples * sizeof(int16_t));
+
+    if (correct) {
+        orig_func(remap, ch_ref, mono, nsamples);
+        func(remap, ch, mono, nsamples);
+
+        for (i = 0; i < nsamples * channels; i++) {
+            if (abs(ch[i] - ch_ref[i]) > 1) {
+                pa_log_debug("Correctness test failed: align=%d", align);
+                pa_log_debug("%d: %d != %d (%d)\n", i, ch[i], ch_ref[i], mono[i]);
+                fail();
+            }
+        }
+    }
+
+    if (perf) {
+        pa_log_debug("Testing s16 mono-to-%d-channel remap performance with %d sample alignment", channels, align);
+
+        PA_CPU_TEST_RUN_START("func", TIMES, TIMES2) {
+            func(remap, ch, mono, nsamples);
+        } PA_CPU_TEST_RUN_STOP
+
+        PA_CPU_TEST_RUN_START("orig", TIMES, TIMES2) {
+            orig_func(remap, ch_ref, mono, nsamples);
+        } PA_CPU_TEST_RUN_STOP
+    }
+}
+
+static void remap_test_mono_channels(pa_sample_format_t format, int channels) {
+    pa_sample_format_t sf;
+    pa_remap_t remap;
+    pa_sample_spec iss, oss;
+    pa_do_remap_func_t orig_func, func;
+    int i;
+
+    iss.format = oss.format = sf = format;
+    iss.channels = 1;
+    oss.channels = channels;
+    remap.format = &sf;
+    remap.i_ss = &iss;
+    remap.o_ss = &oss;
+    for (i = 0; i < channels; i++) {
+        remap.map_table_f[i][0] = 1.0f;
+        remap.map_table_i[i][0] = PA_VOLUME_NORM;
+    }
+
+    orig_func = remap_channels_matrix_c;
+    if (channels == 2) func = remap_mono_to_stereo_c;
+    else if (channels == 4) func = remap_mono_to_ch4_c;
+    else pa_assert_not_reached();
+
+    if (format == PA_SAMPLE_FLOAT32NE) {
+        run_remap_test_mono_channels_float(&remap, func, orig_func, channels, 0, TRUE, FALSE);
+        run_remap_test_mono_channels_float(&remap, func, orig_func, channels, 1, TRUE, FALSE);
+        run_remap_test_mono_channels_float(&remap, func, orig_func, channels, 2, TRUE, FALSE);
+        run_remap_test_mono_channels_float(&remap, func, orig_func, channels, 3, TRUE, TRUE);
+    } else if (format == PA_SAMPLE_S16NE) {
+        run_remap_test_mono_channels_s16(&remap, func, orig_func, channels, 0, TRUE, FALSE);
+        run_remap_test_mono_channels_s16(&remap, func, orig_func, channels, 1, TRUE, FALSE);
+        run_remap_test_mono_channels_s16(&remap, func, orig_func, channels, 2, TRUE, FALSE);
+        run_remap_test_mono_channels_s16(&remap, func, orig_func, channels, 3, TRUE, TRUE);
+    } else pa_assert_not_reached();
+}
+
+static void run_remap_test_channels_mono_float(
+        pa_remap_t *remap,
+        pa_do_remap_func_t func, pa_do_remap_func_t orig_func,
+        int channels, int align, pa_bool_t correct, pa_bool_t perf) {
+
+    PA_DECLARE_ALIGNED(8, float, in[SAMPLES*8]) = { 0 };
+    PA_DECLARE_ALIGNED(8, float, out[SAMPLES]) = { 0 };
+    PA_DECLARE_ALIGNED(8, float, out_ref[SAMPLES]) = { 0 };
+    float *ch;
+    float *mono, *mono_ref;
+    int i, nsamples;
+
+    assert(channels == 2 || channels == 4 || channels == 8);
+
+    /* Force sample alignment as requested */
+    ch = in + (8 - align);
+    mono_ref = out_ref + (8 - align);
+    mono = out + (8 - align);
+    nsamples = SAMPLES - (8 - align);
+
+    for (i = 0; i < nsamples * channels; i++)
+        ch[i] = 2.1f * (rand()/(float) RAND_MAX - 0.5f);
+
+    if (correct) {
+        orig_func(remap, mono_ref, ch, nsamples);
+        func(remap, mono, ch, nsamples);
+
+        for (i = 0; i < nsamples; i++) {
+            if (fabsf(mono[i] - mono_ref[i]) > 0.0001) {
+                pa_log_debug("Correctness test failed: align=%d", align);
+                pa_log_debug("%d: %.24f != %.24f\n", i, mono[i], mono_ref[i]);
+                fail();
+            }
+        }
+    }
+
+    if (perf) {
+        pa_log_debug("Testing float %d-channel-to-mono remap performance with %d sample alignment", channels, align);
+
+        PA_CPU_TEST_RUN_START("func", TIMES, TIMES2) {
+            func(remap, mono, ch, nsamples);
+        } PA_CPU_TEST_RUN_STOP
+
+        PA_CPU_TEST_RUN_START("orig", TIMES, TIMES2) {
+            orig_func(remap, mono_ref, ch, nsamples);
+        } PA_CPU_TEST_RUN_STOP
+    }
+}
+
+static void run_remap_test_channels_mono_s16(
+        pa_remap_t *remap,
+        pa_do_remap_func_t func, pa_do_remap_func_t orig_func,
+        int channels, int align, pa_bool_t correct, pa_bool_t perf) {
+
+    PA_DECLARE_ALIGNED(8, int16_t, in[SAMPLES*8]) = { 0 };
+    PA_DECLARE_ALIGNED(8, int16_t, out[SAMPLES]) = { 0 };
+    PA_DECLARE_ALIGNED(8, int16_t, out_ref[SAMPLES]) = { 0 };
+    int16_t *ch;
+    int16_t *mono, *mono_ref;
+    int i, nsamples;
+
+    assert(channels == 2 || channels == 4 || channels == 8);
+
+    /* Force sample alignment as requested */
+    ch = in + (8 - align);
+    mono_ref = out_ref + (8 - align);
+    mono = out + (8 - align);
+    nsamples = SAMPLES - (8 - align);
+
+    pa_random(ch, nsamples * channels * sizeof(int16_t));
+
+    if (correct) {
+        orig_func(remap, mono_ref, ch, nsamples);
+        func(remap, mono, ch, nsamples);
+
+        for (i = 0; i < nsamples; i++) {
+            if (abs(mono[i] - mono_ref[i]) > 3) {
+                pa_log_debug("Correctness test failed: align=%d", align);
+                pa_log_debug("%d: %hd != %hd", i, mono[i], mono_ref[i]);
+                fail();
+            }
+        }
+    }
+
+    if (perf) {
+        pa_log_debug("Testing s16 %d-channel-to-mono remap performance with %d sample alignment", channels, align);
+
+        PA_CPU_TEST_RUN_START("func", TIMES, TIMES2) {
+            func(remap, mono, ch, nsamples);
+        } PA_CPU_TEST_RUN_STOP
+
+        PA_CPU_TEST_RUN_START("orig", TIMES, TIMES2) {
+            orig_func(remap, mono_ref, ch, nsamples);
+        } PA_CPU_TEST_RUN_STOP
+    }
+}
+
+static void remap_test_channels_mono(pa_sample_format_t format, int channels) {
+    pa_sample_format_t sf;
+    pa_remap_t remap;
+    pa_sample_spec iss, oss;
+    pa_do_remap_func_t orig_func, func;
+    int i;
+
+    iss.format = oss.format = sf = format;
+    iss.channels = channels;
+    oss.channels = 1;
+    remap.format = &sf;
+    remap.i_ss = &iss;
+    remap.o_ss = &oss;
+    for (i = 0; i < channels; i++) {
+        remap.map_table_f[0][i] = 1.0f / channels;
+        remap.map_table_i[0][i] = PA_VOLUME_NORM / channels;
+    }
+
+    orig_func = remap_channels_matrix_c;
+    if (channels == 2) func = remap_stereo_to_mono_c;
+    else if (channels == 4) func = remap_ch4_to_mono_c;
+    else pa_assert_not_reached();
+
+    if (format == PA_SAMPLE_FLOAT32NE) {
+        run_remap_test_channels_mono_float(&remap, func, orig_func, channels, 0, TRUE, FALSE);
+        run_remap_test_channels_mono_float(&remap, func, orig_func, channels, 1, TRUE, FALSE);
+        run_remap_test_channels_mono_float(&remap, func, orig_func, channels, 2, TRUE, FALSE);
+        run_remap_test_channels_mono_float(&remap, func, orig_func, channels, 3, TRUE, TRUE);
+    } else if (format == PA_SAMPLE_S16NE) {
+        run_remap_test_channels_mono_s16(&remap, func, orig_func, channels, 0, TRUE, FALSE);
+        run_remap_test_channels_mono_s16(&remap, func, orig_func, channels, 1, TRUE, FALSE);
+        run_remap_test_channels_mono_s16(&remap, func, orig_func, channels, 2, TRUE, FALSE);
+        run_remap_test_channels_mono_s16(&remap, func, orig_func, channels, 3, TRUE, TRUE);
+    } else pa_assert_not_reached();
+}
+
+START_TEST (remap_special_s16_test) {
+    remap_test_channels_mono(PA_SAMPLE_S16NE, 2);
+    remap_test_channels_mono(PA_SAMPLE_S16NE, 4);
+
+    remap_test_mono_channels(PA_SAMPLE_S16NE, 2);
+    remap_test_mono_channels(PA_SAMPLE_S16NE, 4);
+}
+END_TEST
+
+START_TEST (remap_special_float_test) {
+    remap_test_channels_mono(PA_SAMPLE_FLOAT32NE, 2);
+    remap_test_channels_mono(PA_SAMPLE_FLOAT32NE, 4);
+
+    remap_test_mono_channels(PA_SAMPLE_FLOAT32NE, 2);
+    remap_test_mono_channels(PA_SAMPLE_FLOAT32NE, 4);
+}
+END_TEST
+
+int main(int argc, char *argv[]) {
+    int failed = 0;
+    Suite *s;
+    TCase *tc;
+    SRunner *sr;
+
+    if (!getenv("MAKE_CHECK"))
+        pa_log_set_level(PA_LOG_DEBUG);
+
+    s = suite_create("Remap-special");
+    tc = tcase_create("Remap-special s16");
+    tcase_add_test(tc, remap_special_s16_test);
+    tcase_set_timeout(tc, 120);
+    suite_add_tcase(s, tc);
+    tc = tcase_create("Remap-special float");
+    tcase_add_test(tc, remap_special_float_test);
+    tcase_set_timeout(tc, 120);
+    suite_add_tcase(s, tc);
+
+    sr = srunner_create(s);
+    srunner_run_all(sr, CK_NORMAL);
+    failed = srunner_ntests_failed(sr);
+    srunner_free(sr);
+
+    return (failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE;
+}
-- 
1.7.9.5