[PATCH 11/11] remap: Add stereo to mono and 4-channel special case remapping

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Peter Meerwald <p.meerwald@xxxxxxxxxxxxxxxxxx>

The generic matrix remapping is rather inefficient; special-case code
improves performance by 3x easily.

v4: split into s16 and float code, 4-channel remapping
v3: fix remap_mono_to_stereo_c(), use assignment
v2: use consistent array addressing

on Intel Core i7-870 @ 2.93 GHz (GCC 4.6, 64-bit):

Checking special remap (float, mono->stereo)
func: 70392 usec (avg: 703.92, min = 583, max = 1879, stddev = 295.192).
orig: 193042 usec (avg: 1930.42, min = 1457, max = 2269, stddev = 89.9045).
Checking special remap (float, mono->4-channel)
func: 118408 usec (avg: 1184.08, min = 1151, max = 1454, stddev = 57.1244).
orig: 380074 usec (avg: 3800.74, min = 3740, max = 4180, stddev = 96.3389).
Checking special remap (s16, mono->stereo)
func: 60574 usec (avg: 605.74, min = 582, max = 659, stddev = 20.7681).
orig: 188262 usec (avg: 1882.62, min = 1804, max = 2167, stddev = 79.17).
Checking special remap (s16, mono->4-channel)
func: 120331 usec (avg: 1203.31, min = 1151, max = 1429, stddev = 55.2863).
orig: 376028 usec (avg: 3760.28, min = 3609, max = 4096, stddev = 122.043).
Checking special remap (float, stereo->mono)
func: 61408 usec (avg: 614.08, min = 580, max = 867, stddev = 50.933).
orig: 186484 usec (avg: 1864.84, min = 1808, max = 2121, stddev = 65.3967).
Checking special remap (float, 4-channel->mono)
func: 118101 usec (avg: 1181.01, min = 1157, max = 1383, stddev = 36.4474).
orig: 365191 usec (avg: 3651.91, min = 3540, max = 4083, stddev = 117.509).
Checking special remap (s16, stereo->mono)
func: 82908 usec (avg: 829.08, min = 795, max = 953, stddev = 33.3409).
orig: 182565 usec (avg: 1825.65, min = 1774, max = 2117, stddev = 65.5401).
Checking special remap (s16, 4-channel->mono)
func: 132025 usec (avg: 1320.25, min = 1284, max = 1509, stddev = 47.0133).
orig: 363347 usec (avg: 3633.47, min = 3560, max = 4012, stddev = 111.259).

on ARM Cortex-A8 (TI OMAP3 DM3730 @ 1GHz) (Linaro GCC 4.6):

Checking special remap (float, mono->stereo)
func: 1213562 usec (avg: 12135.6, min = 4669, max = 16266, stddev = 2067.64).
orig: 9251927 usec (avg: 92519.3, min = 87372, max = 134216, stddev = 5965.79).
Checking special remap (float, mono->4-channel)
func: 2479550 usec (avg: 24795.5, min = 7507, max = 29358, stddev = 2690.16).
orig: 13186133 usec (avg: 131861, min = 119843, max = 263855, stddev = 27309).
Checking special remap (s16, mono->stereo)
func: 471894 usec (avg: 4718.94, min = 4058, max = 9583, stddev = 1302.7).
orig: 1673826 usec (avg: 16738.3, min = 14679, max = 31342, stddev = 2271.67).
Checking special remap (s16, mono->4-channel)
func: 869508 usec (avg: 8695.08, min = 7019, max = 19165, stddev = 1866.94).
orig: 3317020 usec (avg: 33170.2, min = 29327, max = 47577, stddev = 2029.11).
Checking special remap (float, stereo->mono)
func: 4405182 usec (avg: 44051.8, min = 41443, max = 77912, stddev = 4160.54).
orig: 13245064 usec (avg: 132451, min = 125244, max = 182282, stddev = 8543.93).
Checking special remap (float, 4-channel->mono)
func: 8607974 usec (avg: 86079.7, min = 81909, max = 116608, stddev = 4311.52).
orig: 26326036 usec (avg: 263260, min = 255097, max = 312928, stddev = 10111.5).
Checking special remap (s16, stereo->mono)
func: 1209135 usec (avg: 12091.4, min = 10742, max = 16632, stddev = 1633.88).
orig: 3081515 usec (avg: 30815.2, min = 27008, max = 50537, stddev = 3124.35).
Checking special remap (s16, 4-channel->mono)
func: 1653868 usec (avg: 16538.7, min = 14648, max = 20721, stddev = 1834.52).
orig: 6017854 usec (avg: 60178.5, min = 56061, max = 89569, stddev = 4052.86).

benchmark code will be posted as follow-up patches

Signed-off-by: Peter Meerwald <pmeerw at pmeerw.net>
---
 src/pulsecore/remap.c |  128 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 128 insertions(+)

diff --git a/src/pulsecore/remap.c b/src/pulsecore/remap.c
index b2575c0..adff2a5 100644
--- a/src/pulsecore/remap.c
+++ b/src/pulsecore/remap.c
@@ -70,6 +70,114 @@ static void remap_mono_to_stereo_float32ne_c(pa_remap_t *m, float *dst, const fl
     }
 }
 
+static void remap_stereo_to_mono_s16ne_c(pa_remap_t *m, int16_t *dst, const int16_t *src, unsigned n) {
+    unsigned i;
+
+    for (i = n >> 2; i > 0; i--) {
+        dst[0] = (src[0] + src[1])/2;
+        dst[1] = (src[2] + src[3])/2;
+        dst[2] = (src[4] + src[5])/2;
+        dst[3] = (src[6] + src[7])/2;
+        src += 8;
+        dst += 4;
+    }
+    for (i = n & 3; i; i--) {
+        dst[0] = (src[0] + src[1])/2;
+        src += 2;
+        dst += 1;
+    }
+}
+
+static void remap_stereo_to_mono_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) {
+    unsigned i;
+
+    for (i = n >> 2; i > 0; i--) {
+        dst[0] = (src[0] + src[1])*0.5f;
+        dst[1] = (src[2] + src[3])*0.5f;
+        dst[2] = (src[4] + src[5])*0.5f;
+        dst[3] = (src[6] + src[7])*0.5f;
+        src += 8;
+        dst += 4;
+    }
+    for (i = n & 3; i; i--) {
+        dst[0] = (src[0] + src[1])*0.5f;
+        src += 2;
+        dst += 1;
+    }
+}
+
+static void remap_mono_to_ch4_s16ne_c(pa_remap_t *m, int16_t *dst, const int16_t *src, unsigned n) {
+    unsigned i;
+
+    for (i = n >> 2; i; i--) {
+        dst[0] = dst[1] = dst[2] = dst[3] = src[0];
+        dst[4] = dst[5] = dst[6] = dst[7] = src[1];
+        dst[8] = dst[9] = dst[10] = dst[11] = src[2];
+        dst[12] = dst[13] = dst[14] = dst[15] = src[3];
+        src += 4;
+        dst += 16;
+    }
+    for (i = n & 3; i; i--) {
+        dst[0] = dst[1] = dst[2] = dst[3] = src[0];
+        src++;
+        dst += 4;
+    }
+}
+
+static void remap_mono_to_ch4_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) {
+    unsigned i;
+
+    for (i = n >> 2; i; i--) {
+        dst[0] = dst[1] = dst[2] = dst[3] = src[0];
+        dst[4] = dst[5] = dst[6] = dst[7] = src[1];
+        dst[8] = dst[9] = dst[10] = dst[11] = src[2];
+        dst[12] = dst[13] = dst[14] = dst[15] = src[3];
+        src += 4;
+        dst += 16;
+    }
+    for (i = n & 3; i; i--) {
+        dst[0] = dst[1] = dst[2] = dst[3] = src[0];
+        src++;
+        dst += 4;
+    }
+}
+
+static void remap_ch4_to_mono_s16ne_c(pa_remap_t *m, int16_t *dst, const int16_t *src, unsigned n) {
+    unsigned i;
+
+    for (i = n >> 2; i > 0; i--) {
+        dst[0] = (src[0] + src[1] + src[2] + src[3])/4;
+        dst[1] = (src[4] + src[5] + src[6] + src[7])/4;
+        dst[2] = (src[8] + src[9] + src[10] + src[11])/4;
+        dst[3] = (src[12] + src[13] + src[14] + src[15])/4;
+        src += 16;
+        dst += 4;
+    }
+    for (i = n & 3; i; i--) {
+        dst[0] = (src[0] + src[1] + src[2] + src[3])/4;
+        src += 4;
+        dst += 1;
+    }
+}
+
+static void remap_ch4_to_mono_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) {
+    unsigned i;
+
+    for (i = n >> 2; i > 0; i--) {
+        dst[0] = (src[0] + src[1] + src[2] + src[3])*0.25f;
+        dst[1] = (src[4] + src[5] + src[6] + src[7])*0.25f;
+        dst[2] = (src[8] + src[9] + src[10] + src[11])*0.25f;
+        dst[3] = (src[12] + src[13] + src[14] + src[15])*0.25f;
+        src += 16;
+        dst += 4;
+    }
+    for (i = n & 3; i; i--) {
+        dst[0] = (src[0] + src[1] + src[2] + src[3])*0.25f;
+        src += 4;
+        dst += 1;
+    }
+}
+
 static void remap_channels_matrix_s16ne_c(pa_remap_t *m, void *dst, const void *src, unsigned n) {
     unsigned oc, ic, i;
     unsigned n_ic, n_oc;
@@ -249,6 +357,26 @@ static void init_remap_c(pa_remap_t *m) {
         pa_log_info("Using mono to stereo remapping");
         pa_set_remap_func(m, (pa_do_remap_func_t) remap_mono_to_stereo_s16ne_c,
             (pa_do_remap_func_t) remap_mono_to_stereo_float32ne_c);
+    } else if (n_ic == 2 && n_oc == 1 &&
+            m->map_table_i[0][0] == 0x8000 && m->map_table_i[0][1] == 0x8000) {
+
+        pa_log_info("Using stereo to mono remapping");
+        pa_set_remap_func(m, (pa_do_remap_func_t) remap_stereo_to_mono_s16ne_c,
+            (pa_do_remap_func_t)remap_stereo_to_mono_float32ne_c);
+    } else if (n_ic == 1 && n_oc == 4 &&
+            m->map_table_i[0][0] == 0x10000 && m->map_table_i[1][0] == 0x10000 &&
+            m->map_table_i[2][0] == 0x10000 && m->map_table_i[3][0] == 0x10000) {
+
+        pa_log_info("Using mono to 4-channel remapping");
+        pa_set_remap_func(m, (pa_do_remap_func_t)remap_mono_to_ch4_s16ne_c,
+            (pa_do_remap_func_t) remap_mono_to_ch4_float32ne_c);
+    } else if (n_ic == 4 && n_oc == 1 &&
+            m->map_table_i[0][0] == 0x4000 && m->map_table_i[0][1] == 0x4000 &&
+            m->map_table_i[0][2] == 0x4000 && m->map_table_i[0][3] == 0x4000) {
+
+        pa_log_info("Using 4-channel to mono remapping");
+        pa_set_remap_func(m, (pa_do_remap_func_t) remap_ch4_to_mono_s16ne_c,
+            (pa_do_remap_func_t)remap_ch4_to_mono_float32ne_c);
     } else if (pa_setup_remap_arrange(m, arrange) && n_oc == 2) {
 
         pa_log_info("Using stereo arrange remapping");
-- 
1.7.9.5



[Index of Archives]     [Linux Audio Users]     [AMD Graphics]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux