From: Luiz Augusto von Dentz <luiz.von.dentz@xxxxxxxxx> SBC shared library is now available at: http://git.kernel.org/?p=bluetooth/sbc.git;a=summary --- configure.ac | 3 + src/Makefile.am | 29 +- src/modules/bluetooth/module-bluetooth-device.c | 3 +- src/modules/bluetooth/sbc/sbc.c | 1241 --------------------- src/modules/bluetooth/sbc/sbc.h | 113 -- src/modules/bluetooth/sbc/sbc_math.h | 61 - src/modules/bluetooth/sbc/sbc_primitives.c | 554 --------- src/modules/bluetooth/sbc/sbc_primitives.h | 80 -- src/modules/bluetooth/sbc/sbc_primitives_armv6.c | 299 ----- src/modules/bluetooth/sbc/sbc_primitives_armv6.h | 52 - src/modules/bluetooth/sbc/sbc_primitives_iwmmxt.c | 304 ----- src/modules/bluetooth/sbc/sbc_primitives_iwmmxt.h | 42 - src/modules/bluetooth/sbc/sbc_primitives_mmx.c | 375 ------- src/modules/bluetooth/sbc/sbc_primitives_mmx.h | 41 - src/modules/bluetooth/sbc/sbc_primitives_neon.c | 893 --------------- src/modules/bluetooth/sbc/sbc_primitives_neon.h | 41 - src/modules/bluetooth/sbc/sbc_tables.h | 662 ----------- 17 files changed, 9 insertions(+), 4784 deletions(-) delete mode 100644 src/modules/bluetooth/sbc/sbc.c delete mode 100644 src/modules/bluetooth/sbc/sbc.h delete mode 100644 src/modules/bluetooth/sbc/sbc_math.h delete mode 100644 src/modules/bluetooth/sbc/sbc_primitives.c delete mode 100644 src/modules/bluetooth/sbc/sbc_primitives.h delete mode 100644 src/modules/bluetooth/sbc/sbc_primitives_armv6.c delete mode 100644 src/modules/bluetooth/sbc/sbc_primitives_armv6.h delete mode 100644 src/modules/bluetooth/sbc/sbc_primitives_iwmmxt.c delete mode 100644 src/modules/bluetooth/sbc/sbc_primitives_iwmmxt.h delete mode 100644 src/modules/bluetooth/sbc/sbc_primitives_mmx.c delete mode 100644 src/modules/bluetooth/sbc/sbc_primitives_mmx.h delete mode 100644 src/modules/bluetooth/sbc/sbc_primitives_neon.c delete mode 100644 src/modules/bluetooth/sbc/sbc_primitives_neon.h delete mode 100644 src/modules/bluetooth/sbc/sbc_tables.h diff --git a/configure.ac b/configure.ac index ffb2a35..06923e3 100644 --- a/configure.ac +++ b/configure.ac @@ -965,6 +965,9 @@ AC_ARG_ENABLE([bluez], AS_IF([test "x$enable_bluez" != "xno"], [PKG_CHECK_MODULES(BLUEZ, [ bluez >= 3.0 ], HAVE_BLUEZ=1, HAVE_BLUEZ=0)], HAVE_BLUEZ=0) +AS_IF([test "x$enable_bluez" != "xno"], + [PKG_CHECK_MODULES(SBC, [ sbc >= 1.0 ], HAVE_BLUEZ=1, HAVE_BLUEZ=0)], + HAVE_BLUEZ=0) AS_IF([test "x$HAVE_DBUS" != "x1"], HAVE_BLUEZ=0) AS_IF([test "x$enable_bluez" = "xyes" && test "x$HAVE_BLUEZ" = "x0"], diff --git a/src/Makefile.am b/src/Makefile.am index faebe2a..f030102 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1250,7 +1250,6 @@ modlibexec_LTLIBRARIES += \ libbluetooth-util.la \ module-bluetooth-proximity.la \ module-bluetooth-discover.la \ - libbluetooth-sbc.la \ module-bluetooth-policy.la \ module-bluetooth-device.la @@ -1938,19 +1937,6 @@ module_bluetooth_discover_la_LDFLAGS = $(MODULE_LDFLAGS) module_bluetooth_discover_la_LIBADD = $(MODULE_LIBADD) $(DBUS_LIBS) libbluetooth-util.la module_bluetooth_discover_la_CFLAGS = $(AM_CFLAGS) $(DBUS_CFLAGS) -libbluetooth_sbc_la_SOURCES = \ - modules/bluetooth/sbc/sbc.c modules/bluetooth/sbc/sbc.h \ - modules/bluetooth/sbc/sbc_primitives.c modules/bluetooth/sbc/sbc_primitives.h \ - modules/bluetooth/sbc/sbc_primitives_armv6.h modules/bluetooth/sbc/sbc_primitives_armv6.c \ - modules/bluetooth/sbc/sbc_primitives_iwmmxt.h modules/bluetooth/sbc/sbc_primitives_iwmmxt.c \ - modules/bluetooth/sbc/sbc_primitives_mmx.c modules/bluetooth/sbc/sbc_primitives_mmx.h \ - modules/bluetooth/sbc/sbc_primitives_neon.c modules/bluetooth/sbc/sbc_primitives_neon.h \ - modules/bluetooth/sbc/sbc_math.h \ - modules/bluetooth/sbc/sbc_tables.h -libbluetooth_sbc_la_LDFLAGS = -avoid-version -libbluetooth_sbc_la_LIBADD = $(MODULE_LIBADD) -libbluetooth_sbc_la_CFLAGS = $(AM_CFLAGS) -I$(top_srcdir)/src/modules/bluetooth/sbc -BLUETOOTH_SBC_FILES = $(subst modules/bluetooth/,,$(libbluetooth_sbc_la_SOURCES)) libbluetooth_util_la_SOURCES = modules/bluetooth/bluetooth-util.c modules/bluetooth/bluetooth-util.h libbluetooth_util_la_LDFLAGS = -avoid-version @@ -1959,8 +1945,8 @@ libbluetooth_util_la_CFLAGS = $(AM_CFLAGS) $(DBUS_CFLAGS) module_bluetooth_device_la_SOURCES = modules/bluetooth/module-bluetooth-device.c modules/bluetooth/rtp.h module_bluetooth_device_la_LDFLAGS = $(MODULE_LDFLAGS) -module_bluetooth_device_la_LIBADD = $(MODULE_LIBADD) $(DBUS_LIBS) libbluetooth-util.la libbluetooth-sbc.la -module_bluetooth_device_la_CFLAGS = $(AM_CFLAGS) $(DBUS_CFLAGS) -I$(top_srcdir)/src/modules/bluetooth/sbc +module_bluetooth_device_la_LIBADD = $(MODULE_LIBADD) $(DBUS_LIBS) $(SBC_LIBS) libbluetooth-util.la +module_bluetooth_device_la_CFLAGS = $(AM_CFLAGS) $(DBUS_CFLAGS) $(SBC_CFLAGS) module_bluetooth_policy_la_SOURCES = modules/bluetooth/module-bluetooth-policy.c module_bluetooth_policy_la_LDFLAGS = $(MODULE_LDFLAGS) @@ -2022,13 +2008,6 @@ massif: pulseaudio update-ffmpeg: wget -O pulsecore/ffmpeg/resample2.c http://svn.mplayerhq.hu/ffmpeg/trunk/libavcodec/resample2.c?view=co -# We get things twice here, because sometimes gitweb will us just give a "Generating..." otherwise. -update-sbc: - for i in $(BLUETOOTH_SBC_FILES) ; do \ - wget -O /dev/null http://git.kernel.org/\?p=bluetooth/bluez.git\;a=blob_plain\;f=$$i ; \ - wget -O $(top_srcdir)/src/modules/bluetooth/$$i http://git.kernel.org/\?p=bluetooth/bluez.git\;a=blob_plain\;f=$$i ; \ - done - update-reserve: for i in reserve.c reserve.h reserve-monitor.c reserve-monitor.h ; do \ wget -O $(top_srcdir)/src/modules/$$i http://git.0pointer.de/\?p=reserve.git\;a=blob_plain\;f=$$i\;hb=master ; \ @@ -2048,7 +2027,7 @@ update-map-file: echo "*;" ; \ echo "};" ) > $(srcdir)/map-file -update-all: update-ffmpeg update-sbc update-map-file +update-all: update-ffmpeg update-map-file # Force installation order of libraries. libtool relinks on install time, in # which case libpulsecommon has to be installed before others, but the padsp @@ -2087,4 +2066,4 @@ coverage: @echo "" endif -.PHONY: massif update-all update-ffmpeg update-sbc update-map-file coverage +.PHONY: massif update-all update-ffmpeg update-map-file diff --git a/src/modules/bluetooth/module-bluetooth-device.c b/src/modules/bluetooth/module-bluetooth-device.c index 19d62a6..6e3f81e 100644 --- a/src/modules/bluetooth/module-bluetooth-device.c +++ b/src/modules/bluetooth/module-bluetooth-device.c @@ -50,9 +50,10 @@ #include <pulsecore/namereg.h> #include <pulsecore/dbus-shared.h> +#include <sbc/sbc.h> + #include "module-bluetooth-device-symdef.h" #include "ipc.h" -#include "sbc.h" #include "a2dp-codecs.h" #include "rtp.h" #include "bluetooth-util.h" diff --git a/src/modules/bluetooth/sbc/sbc.c b/src/modules/bluetooth/sbc/sbc.c deleted file mode 100644 index c5015ab..0000000 --- a/src/modules/bluetooth/sbc/sbc.c +++ /dev/null @@ -1,1241 +0,0 @@ -/* - * - * Bluetooth low-complexity, subband codec (SBC) library - * - * Copyright (C) 2008-2010 Nokia Corporation - * Copyright (C) 2004-2010 Marcel Holtmann <marcel at holtmann.org> - * Copyright (C) 2004-2005 Henryk Ploetz <henryk at ploetzli.ch> - * Copyright (C) 2005-2008 Brad Midgley <bmidgley at xmission.com> - * - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - */ - -/* todo items: - - use a log2 table for byte integer scale factors calculation (sum log2 results - for high and low bytes) fill bitpool by 16 bits instead of one at a time in - bits allocation/bitpool generation port to the dsp - -*/ - -#ifdef HAVE_CONFIG_H -#include <config.h> -#endif - -#include <stdio.h> -#include <errno.h> -#include <string.h> -#include <stdlib.h> -#include <sys/types.h> -#include <limits.h> - -#include "sbc_math.h" -#include "sbc_tables.h" - -#include "sbc.h" -#include "sbc_primitives.h" - -#define SBC_SYNCWORD 0x9C - -/* This structure contains an unpacked SBC frame. - Yes, there is probably quite some unused space herein */ -struct sbc_frame { - uint8_t frequency; - uint8_t block_mode; - uint8_t blocks; - enum { - MONO = SBC_MODE_MONO, - DUAL_CHANNEL = SBC_MODE_DUAL_CHANNEL, - STEREO = SBC_MODE_STEREO, - JOINT_STEREO = SBC_MODE_JOINT_STEREO - } mode; - uint8_t channels; - enum { - LOUDNESS = SBC_AM_LOUDNESS, - SNR = SBC_AM_SNR - } allocation; - uint8_t subband_mode; - uint8_t subbands; - uint8_t bitpool; - uint16_t codesize; - uint8_t length; - - /* bit number x set means joint stereo has been used in subband x */ - uint8_t joint; - - /* only the lower 4 bits of every element are to be used */ - uint32_t SBC_ALIGNED scale_factor[2][8]; - - /* raw integer subband samples in the frame */ - int32_t SBC_ALIGNED sb_sample_f[16][2][8]; - - /* modified subband samples */ - int32_t SBC_ALIGNED sb_sample[16][2][8]; - - /* original pcm audio samples */ - int16_t SBC_ALIGNED pcm_sample[2][16*8]; -}; - -struct sbc_decoder_state { - int subbands; - int32_t V[2][170]; - int offset[2][16]; -}; - -/* - * Calculates the CRC-8 of the first len bits in data - */ -static const uint8_t crc_table[256] = { - 0x00, 0x1D, 0x3A, 0x27, 0x74, 0x69, 0x4E, 0x53, - 0xE8, 0xF5, 0xD2, 0xCF, 0x9C, 0x81, 0xA6, 0xBB, - 0xCD, 0xD0, 0xF7, 0xEA, 0xB9, 0xA4, 0x83, 0x9E, - 0x25, 0x38, 0x1F, 0x02, 0x51, 0x4C, 0x6B, 0x76, - 0x87, 0x9A, 0xBD, 0xA0, 0xF3, 0xEE, 0xC9, 0xD4, - 0x6F, 0x72, 0x55, 0x48, 0x1B, 0x06, 0x21, 0x3C, - 0x4A, 0x57, 0x70, 0x6D, 0x3E, 0x23, 0x04, 0x19, - 0xA2, 0xBF, 0x98, 0x85, 0xD6, 0xCB, 0xEC, 0xF1, - 0x13, 0x0E, 0x29, 0x34, 0x67, 0x7A, 0x5D, 0x40, - 0xFB, 0xE6, 0xC1, 0xDC, 0x8F, 0x92, 0xB5, 0xA8, - 0xDE, 0xC3, 0xE4, 0xF9, 0xAA, 0xB7, 0x90, 0x8D, - 0x36, 0x2B, 0x0C, 0x11, 0x42, 0x5F, 0x78, 0x65, - 0x94, 0x89, 0xAE, 0xB3, 0xE0, 0xFD, 0xDA, 0xC7, - 0x7C, 0x61, 0x46, 0x5B, 0x08, 0x15, 0x32, 0x2F, - 0x59, 0x44, 0x63, 0x7E, 0x2D, 0x30, 0x17, 0x0A, - 0xB1, 0xAC, 0x8B, 0x96, 0xC5, 0xD8, 0xFF, 0xE2, - 0x26, 0x3B, 0x1C, 0x01, 0x52, 0x4F, 0x68, 0x75, - 0xCE, 0xD3, 0xF4, 0xE9, 0xBA, 0xA7, 0x80, 0x9D, - 0xEB, 0xF6, 0xD1, 0xCC, 0x9F, 0x82, 0xA5, 0xB8, - 0x03, 0x1E, 0x39, 0x24, 0x77, 0x6A, 0x4D, 0x50, - 0xA1, 0xBC, 0x9B, 0x86, 0xD5, 0xC8, 0xEF, 0xF2, - 0x49, 0x54, 0x73, 0x6E, 0x3D, 0x20, 0x07, 0x1A, - 0x6C, 0x71, 0x56, 0x4B, 0x18, 0x05, 0x22, 0x3F, - 0x84, 0x99, 0xBE, 0xA3, 0xF0, 0xED, 0xCA, 0xD7, - 0x35, 0x28, 0x0F, 0x12, 0x41, 0x5C, 0x7B, 0x66, - 0xDD, 0xC0, 0xE7, 0xFA, 0xA9, 0xB4, 0x93, 0x8E, - 0xF8, 0xE5, 0xC2, 0xDF, 0x8C, 0x91, 0xB6, 0xAB, - 0x10, 0x0D, 0x2A, 0x37, 0x64, 0x79, 0x5E, 0x43, - 0xB2, 0xAF, 0x88, 0x95, 0xC6, 0xDB, 0xFC, 0xE1, - 0x5A, 0x47, 0x60, 0x7D, 0x2E, 0x33, 0x14, 0x09, - 0x7F, 0x62, 0x45, 0x58, 0x0B, 0x16, 0x31, 0x2C, - 0x97, 0x8A, 0xAD, 0xB0, 0xE3, 0xFE, 0xD9, 0xC4 -}; - -static uint8_t sbc_crc8(const uint8_t *data, size_t len) -{ - uint8_t crc = 0x0f; - size_t i; - uint8_t octet; - - for (i = 0; i < len / 8; i++) - crc = crc_table[crc ^ data[i]]; - - octet = data[i]; - for (i = 0; i < len % 8; i++) { - char bit = ((octet ^ crc) & 0x80) >> 7; - - crc = ((crc & 0x7f) << 1) ^ (bit ? 0x1d : 0); - - octet = octet << 1; - } - - return crc; -} - -/* - * Code straight from the spec to calculate the bits array - * Takes a pointer to the frame in question, a pointer to the bits array and - * the sampling frequency (as 2 bit integer) - */ -static SBC_ALWAYS_INLINE void sbc_calculate_bits_internal( - const struct sbc_frame *frame, int (*bits)[8], int subbands) -{ - uint8_t sf = frame->frequency; - - if (frame->mode == MONO || frame->mode == DUAL_CHANNEL) { - int bitneed[2][8], loudness, max_bitneed, bitcount, slicecount, bitslice; - int ch, sb; - - for (ch = 0; ch < frame->channels; ch++) { - max_bitneed = 0; - if (frame->allocation == SNR) { - for (sb = 0; sb < subbands; sb++) { - bitneed[ch][sb] = frame->scale_factor[ch][sb]; - if (bitneed[ch][sb] > max_bitneed) - max_bitneed = bitneed[ch][sb]; - } - } else { - for (sb = 0; sb < subbands; sb++) { - if (frame->scale_factor[ch][sb] == 0) - bitneed[ch][sb] = -5; - else { - if (subbands == 4) - loudness = frame->scale_factor[ch][sb] - sbc_offset4[sf][sb]; - else - loudness = frame->scale_factor[ch][sb] - sbc_offset8[sf][sb]; - if (loudness > 0) - bitneed[ch][sb] = loudness / 2; - else - bitneed[ch][sb] = loudness; - } - if (bitneed[ch][sb] > max_bitneed) - max_bitneed = bitneed[ch][sb]; - } - } - - bitcount = 0; - slicecount = 0; - bitslice = max_bitneed + 1; - do { - bitslice--; - bitcount += slicecount; - slicecount = 0; - for (sb = 0; sb < subbands; sb++) { - if ((bitneed[ch][sb] > bitslice + 1) && (bitneed[ch][sb] < bitslice + 16)) - slicecount++; - else if (bitneed[ch][sb] == bitslice + 1) - slicecount += 2; - } - } while (bitcount + slicecount < frame->bitpool); - - if (bitcount + slicecount == frame->bitpool) { - bitcount += slicecount; - bitslice--; - } - - for (sb = 0; sb < subbands; sb++) { - if (bitneed[ch][sb] < bitslice + 2) - bits[ch][sb] = 0; - else { - bits[ch][sb] = bitneed[ch][sb] - bitslice; - if (bits[ch][sb] > 16) - bits[ch][sb] = 16; - } - } - - for (sb = 0; bitcount < frame->bitpool && - sb < subbands; sb++) { - if ((bits[ch][sb] >= 2) && (bits[ch][sb] < 16)) { - bits[ch][sb]++; - bitcount++; - } else if ((bitneed[ch][sb] == bitslice + 1) && (frame->bitpool > bitcount + 1)) { - bits[ch][sb] = 2; - bitcount += 2; - } - } - - for (sb = 0; bitcount < frame->bitpool && - sb < subbands; sb++) { - if (bits[ch][sb] < 16) { - bits[ch][sb]++; - bitcount++; - } - } - - } - - } else if (frame->mode == STEREO || frame->mode == JOINT_STEREO) { - int bitneed[2][8], loudness, max_bitneed, bitcount, slicecount, bitslice; - int ch, sb; - - max_bitneed = 0; - if (frame->allocation == SNR) { - for (ch = 0; ch < 2; ch++) { - for (sb = 0; sb < subbands; sb++) { - bitneed[ch][sb] = frame->scale_factor[ch][sb]; - if (bitneed[ch][sb] > max_bitneed) - max_bitneed = bitneed[ch][sb]; - } - } - } else { - for (ch = 0; ch < 2; ch++) { - for (sb = 0; sb < subbands; sb++) { - if (frame->scale_factor[ch][sb] == 0) - bitneed[ch][sb] = -5; - else { - if (subbands == 4) - loudness = frame->scale_factor[ch][sb] - sbc_offset4[sf][sb]; - else - loudness = frame->scale_factor[ch][sb] - sbc_offset8[sf][sb]; - if (loudness > 0) - bitneed[ch][sb] = loudness / 2; - else - bitneed[ch][sb] = loudness; - } - if (bitneed[ch][sb] > max_bitneed) - max_bitneed = bitneed[ch][sb]; - } - } - } - - bitcount = 0; - slicecount = 0; - bitslice = max_bitneed + 1; - do { - bitslice--; - bitcount += slicecount; - slicecount = 0; - for (ch = 0; ch < 2; ch++) { - for (sb = 0; sb < subbands; sb++) { - if ((bitneed[ch][sb] > bitslice + 1) && (bitneed[ch][sb] < bitslice + 16)) - slicecount++; - else if (bitneed[ch][sb] == bitslice + 1) - slicecount += 2; - } - } - } while (bitcount + slicecount < frame->bitpool); - - if (bitcount + slicecount == frame->bitpool) { - bitcount += slicecount; - bitslice--; - } - - for (ch = 0; ch < 2; ch++) { - for (sb = 0; sb < subbands; sb++) { - if (bitneed[ch][sb] < bitslice + 2) { - bits[ch][sb] = 0; - } else { - bits[ch][sb] = bitneed[ch][sb] - bitslice; - if (bits[ch][sb] > 16) - bits[ch][sb] = 16; - } - } - } - - ch = 0; - sb = 0; - while (bitcount < frame->bitpool) { - if ((bits[ch][sb] >= 2) && (bits[ch][sb] < 16)) { - bits[ch][sb]++; - bitcount++; - } else if ((bitneed[ch][sb] == bitslice + 1) && (frame->bitpool > bitcount + 1)) { - bits[ch][sb] = 2; - bitcount += 2; - } - if (ch == 1) { - ch = 0; - sb++; - if (sb >= subbands) - break; - } else - ch = 1; - } - - ch = 0; - sb = 0; - while (bitcount < frame->bitpool) { - if (bits[ch][sb] < 16) { - bits[ch][sb]++; - bitcount++; - } - if (ch == 1) { - ch = 0; - sb++; - if (sb >= subbands) - break; - } else - ch = 1; - } - - } - -} - -static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8]) -{ - if (frame->subbands == 4) - sbc_calculate_bits_internal(frame, bits, 4); - else - sbc_calculate_bits_internal(frame, bits, 8); -} - -/* - * Unpacks a SBC frame at the beginning of the stream in data, - * which has at most len bytes into frame. - * Returns the length in bytes of the packed frame, or a negative - * value on error. The error codes are: - * - * -1 Data stream too short - * -2 Sync byte incorrect - * -3 CRC8 incorrect - * -4 Bitpool value out of bounds - */ -static int sbc_unpack_frame(const uint8_t *data, struct sbc_frame *frame, - size_t len) -{ - unsigned int consumed; - /* Will copy the parts of the header that are relevant to crc - * calculation here */ - uint8_t crc_header[11] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - int crc_pos = 0; - int32_t temp; - - uint32_t audio_sample; - int ch, sb, blk, bit; /* channel, subband, block and bit standard - counters */ - int bits[2][8]; /* bits distribution */ - uint32_t levels[2][8]; /* levels derived from that */ - - if (len < 4) - return -1; - - if (data[0] != SBC_SYNCWORD) - return -2; - - frame->frequency = (data[1] >> 6) & 0x03; - - frame->block_mode = (data[1] >> 4) & 0x03; - switch (frame->block_mode) { - case SBC_BLK_4: - frame->blocks = 4; - break; - case SBC_BLK_8: - frame->blocks = 8; - break; - case SBC_BLK_12: - frame->blocks = 12; - break; - case SBC_BLK_16: - frame->blocks = 16; - break; - } - - frame->mode = (data[1] >> 2) & 0x03; - switch (frame->mode) { - case MONO: - frame->channels = 1; - break; - case DUAL_CHANNEL: /* fall-through */ - case STEREO: - case JOINT_STEREO: - frame->channels = 2; - break; - } - - frame->allocation = (data[1] >> 1) & 0x01; - - frame->subband_mode = (data[1] & 0x01); - frame->subbands = frame->subband_mode ? 8 : 4; - - frame->bitpool = data[2]; - - if ((frame->mode == MONO || frame->mode == DUAL_CHANNEL) && - frame->bitpool > 16 * frame->subbands) - return -4; - - if ((frame->mode == STEREO || frame->mode == JOINT_STEREO) && - frame->bitpool > 32 * frame->subbands) - return -4; - - /* data[3] is crc, we're checking it later */ - - consumed = 32; - - crc_header[0] = data[1]; - crc_header[1] = data[2]; - crc_pos = 16; - - if (frame->mode == JOINT_STEREO) { - if (len * 8 < consumed + frame->subbands) - return -1; - - frame->joint = 0x00; - for (sb = 0; sb < frame->subbands - 1; sb++) - frame->joint |= ((data[4] >> (7 - sb)) & 0x01) << sb; - if (frame->subbands == 4) - crc_header[crc_pos / 8] = data[4] & 0xf0; - else - crc_header[crc_pos / 8] = data[4]; - - consumed += frame->subbands; - crc_pos += frame->subbands; - } - - if (len * 8 < consumed + (4 * frame->subbands * frame->channels)) - return -1; - - for (ch = 0; ch < frame->channels; ch++) { - for (sb = 0; sb < frame->subbands; sb++) { - /* FIXME assert(consumed % 4 == 0); */ - frame->scale_factor[ch][sb] = - (data[consumed >> 3] >> (4 - (consumed & 0x7))) & 0x0F; - crc_header[crc_pos >> 3] |= - frame->scale_factor[ch][sb] << (4 - (crc_pos & 0x7)); - - consumed += 4; - crc_pos += 4; - } - } - - if (data[3] != sbc_crc8(crc_header, crc_pos)) - return -3; - - sbc_calculate_bits(frame, bits); - - for (ch = 0; ch < frame->channels; ch++) { - for (sb = 0; sb < frame->subbands; sb++) - levels[ch][sb] = (1 << bits[ch][sb]) - 1; - } - - for (blk = 0; blk < frame->blocks; blk++) { - for (ch = 0; ch < frame->channels; ch++) { - for (sb = 0; sb < frame->subbands; sb++) { - uint32_t shift; - - if (levels[ch][sb] == 0) { - frame->sb_sample[blk][ch][sb] = 0; - continue; - } - - shift = frame->scale_factor[ch][sb] + - 1 + SBCDEC_FIXED_EXTRA_BITS; - - audio_sample = 0; - for (bit = 0; bit < bits[ch][sb]; bit++) { - if (consumed > len * 8) - return -1; - - if ((data[consumed >> 3] >> (7 - (consumed & 0x7))) & 0x01) - audio_sample |= 1 << (bits[ch][sb] - bit - 1); - - consumed++; - } - - frame->sb_sample[blk][ch][sb] = (int32_t) - (((((uint64_t) audio_sample << 1) | 1) << shift) / - levels[ch][sb]) - (1 << shift); - } - } - } - - if (frame->mode == JOINT_STEREO) { - for (blk = 0; blk < frame->blocks; blk++) { - for (sb = 0; sb < frame->subbands; sb++) { - if (frame->joint & (0x01 << sb)) { - temp = frame->sb_sample[blk][0][sb] + - frame->sb_sample[blk][1][sb]; - frame->sb_sample[blk][1][sb] = - frame->sb_sample[blk][0][sb] - - frame->sb_sample[blk][1][sb]; - frame->sb_sample[blk][0][sb] = temp; - } - } - } - } - - if ((consumed & 0x7) != 0) - consumed += 8 - (consumed & 0x7); - - return consumed >> 3; -} - -static void sbc_decoder_init(struct sbc_decoder_state *state, - const struct sbc_frame *frame) -{ - int i, ch; - - memset(state->V, 0, sizeof(state->V)); - state->subbands = frame->subbands; - - for (ch = 0; ch < 2; ch++) - for (i = 0; i < frame->subbands * 2; i++) - state->offset[ch][i] = (10 * i + 10); -} - -static SBC_ALWAYS_INLINE int16_t sbc_clip16(int32_t s) -{ - if (s > 0x7FFF) - return 0x7FFF; - else if (s < -0x8000) - return -0x8000; - else - return s; -} - -static inline void sbc_synthesize_four(struct sbc_decoder_state *state, - struct sbc_frame *frame, int ch, int blk) -{ - int i, k, idx; - int32_t *v = state->V[ch]; - int *offset = state->offset[ch]; - - for (i = 0; i < 8; i++) { - /* Shifting */ - offset[i]--; - if (offset[i] < 0) { - offset[i] = 79; - memcpy(v + 80, v, 9 * sizeof(*v)); - } - - /* Distribute the new matrix value to the shifted position */ - v[offset[i]] = SCALE4_STAGED1( - MULA(synmatrix4[i][0], frame->sb_sample[blk][ch][0], - MULA(synmatrix4[i][1], frame->sb_sample[blk][ch][1], - MULA(synmatrix4[i][2], frame->sb_sample[blk][ch][2], - MUL (synmatrix4[i][3], frame->sb_sample[blk][ch][3]))))); - } - - /* Compute the samples */ - for (idx = 0, i = 0; i < 4; i++, idx += 5) { - k = (i + 4) & 0xf; - - /* Store in output, Q0 */ - frame->pcm_sample[ch][blk * 4 + i] = sbc_clip16(SCALE4_STAGED1( - MULA(v[offset[i] + 0], sbc_proto_4_40m0[idx + 0], - MULA(v[offset[k] + 1], sbc_proto_4_40m1[idx + 0], - MULA(v[offset[i] + 2], sbc_proto_4_40m0[idx + 1], - MULA(v[offset[k] + 3], sbc_proto_4_40m1[idx + 1], - MULA(v[offset[i] + 4], sbc_proto_4_40m0[idx + 2], - MULA(v[offset[k] + 5], sbc_proto_4_40m1[idx + 2], - MULA(v[offset[i] + 6], sbc_proto_4_40m0[idx + 3], - MULA(v[offset[k] + 7], sbc_proto_4_40m1[idx + 3], - MULA(v[offset[i] + 8], sbc_proto_4_40m0[idx + 4], - MUL( v[offset[k] + 9], sbc_proto_4_40m1[idx + 4])))))))))))); - } -} - -static inline void sbc_synthesize_eight(struct sbc_decoder_state *state, - struct sbc_frame *frame, int ch, int blk) -{ - int i, j, k, idx; - int *offset = state->offset[ch]; - - for (i = 0; i < 16; i++) { - /* Shifting */ - offset[i]--; - if (offset[i] < 0) { - offset[i] = 159; - for (j = 0; j < 9; j++) - state->V[ch][j + 160] = state->V[ch][j]; - } - - /* Distribute the new matrix value to the shifted position */ - state->V[ch][offset[i]] = SCALE8_STAGED1( - MULA(synmatrix8[i][0], frame->sb_sample[blk][ch][0], - MULA(synmatrix8[i][1], frame->sb_sample[blk][ch][1], - MULA(synmatrix8[i][2], frame->sb_sample[blk][ch][2], - MULA(synmatrix8[i][3], frame->sb_sample[blk][ch][3], - MULA(synmatrix8[i][4], frame->sb_sample[blk][ch][4], - MULA(synmatrix8[i][5], frame->sb_sample[blk][ch][5], - MULA(synmatrix8[i][6], frame->sb_sample[blk][ch][6], - MUL( synmatrix8[i][7], frame->sb_sample[blk][ch][7]))))))))); - } - - /* Compute the samples */ - for (idx = 0, i = 0; i < 8; i++, idx += 5) { - k = (i + 8) & 0xf; - - /* Store in output, Q0 */ - frame->pcm_sample[ch][blk * 8 + i] = sbc_clip16(SCALE8_STAGED1( - MULA(state->V[ch][offset[i] + 0], sbc_proto_8_80m0[idx + 0], - MULA(state->V[ch][offset[k] + 1], sbc_proto_8_80m1[idx + 0], - MULA(state->V[ch][offset[i] + 2], sbc_proto_8_80m0[idx + 1], - MULA(state->V[ch][offset[k] + 3], sbc_proto_8_80m1[idx + 1], - MULA(state->V[ch][offset[i] + 4], sbc_proto_8_80m0[idx + 2], - MULA(state->V[ch][offset[k] + 5], sbc_proto_8_80m1[idx + 2], - MULA(state->V[ch][offset[i] + 6], sbc_proto_8_80m0[idx + 3], - MULA(state->V[ch][offset[k] + 7], sbc_proto_8_80m1[idx + 3], - MULA(state->V[ch][offset[i] + 8], sbc_proto_8_80m0[idx + 4], - MUL( state->V[ch][offset[k] + 9], sbc_proto_8_80m1[idx + 4])))))))))))); - } -} - -static int sbc_synthesize_audio(struct sbc_decoder_state *state, - struct sbc_frame *frame) -{ - int ch, blk; - - switch (frame->subbands) { - case 4: - for (ch = 0; ch < frame->channels; ch++) { - for (blk = 0; blk < frame->blocks; blk++) - sbc_synthesize_four(state, frame, ch, blk); - } - return frame->blocks * 4; - - case 8: - for (ch = 0; ch < frame->channels; ch++) { - for (blk = 0; blk < frame->blocks; blk++) - sbc_synthesize_eight(state, frame, ch, blk); - } - return frame->blocks * 8; - - default: - return -EIO; - } -} - -static int sbc_analyze_audio(struct sbc_encoder_state *state, - struct sbc_frame *frame) -{ - int ch, blk; - int16_t *x; - - switch (frame->subbands) { - case 4: - for (ch = 0; ch < frame->channels; ch++) { - x = &state->X[ch][state->position - 16 + - frame->blocks * 4]; - for (blk = 0; blk < frame->blocks; blk += 4) { - state->sbc_analyze_4b_4s( - x, - frame->sb_sample_f[blk][ch], - frame->sb_sample_f[blk + 1][ch] - - frame->sb_sample_f[blk][ch]); - x -= 16; - } - } - return frame->blocks * 4; - - case 8: - for (ch = 0; ch < frame->channels; ch++) { - x = &state->X[ch][state->position - 32 + - frame->blocks * 8]; - for (blk = 0; blk < frame->blocks; blk += 4) { - state->sbc_analyze_4b_8s( - x, - frame->sb_sample_f[blk][ch], - frame->sb_sample_f[blk + 1][ch] - - frame->sb_sample_f[blk][ch]); - x -= 32; - } - } - return frame->blocks * 8; - - default: - return -EIO; - } -} - -/* Supplementary bitstream writing macros for 'sbc_pack_frame' */ - -#define PUT_BITS(data_ptr, bits_cache, bits_count, v, n) \ - do { \ - bits_cache = (v) | (bits_cache << (n)); \ - bits_count += (n); \ - if (bits_count >= 16) { \ - bits_count -= 8; \ - *data_ptr++ = (uint8_t) \ - (bits_cache >> bits_count); \ - bits_count -= 8; \ - *data_ptr++ = (uint8_t) \ - (bits_cache >> bits_count); \ - } \ - } while (0) - -#define FLUSH_BITS(data_ptr, bits_cache, bits_count) \ - do { \ - while (bits_count >= 8) { \ - bits_count -= 8; \ - *data_ptr++ = (uint8_t) \ - (bits_cache >> bits_count); \ - } \ - if (bits_count > 0) \ - *data_ptr++ = (uint8_t) \ - (bits_cache << (8 - bits_count)); \ - } while (0) - -/* - * Packs the SBC frame from frame into the memory at data. At most len - * bytes will be used, should more memory be needed an appropriate - * error code will be returned. Returns the length of the packed frame - * on success or a negative value on error. - * - * The error codes are: - * -1 Not enough memory reserved - * -2 Unsupported sampling rate - * -3 Unsupported number of blocks - * -4 Unsupported number of subbands - * -5 Bitpool value out of bounds - * -99 not implemented - */ - -static SBC_ALWAYS_INLINE ssize_t sbc_pack_frame_internal(uint8_t *data, - struct sbc_frame *frame, size_t len, - int frame_subbands, int frame_channels, - int joint) -{ - /* Bitstream writer starts from the fourth byte */ - uint8_t *data_ptr = data + 4; - uint32_t bits_cache = 0; - uint32_t bits_count = 0; - - /* Will copy the header parts for CRC-8 calculation here */ - uint8_t crc_header[11] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - int crc_pos = 0; - - uint32_t audio_sample; - - int ch, sb, blk; /* channel, subband, block and bit counters */ - int bits[2][8]; /* bits distribution */ - uint32_t levels[2][8]; /* levels are derived from that */ - uint32_t sb_sample_delta[2][8]; - - data[0] = SBC_SYNCWORD; - - data[1] = (frame->frequency & 0x03) << 6; - - data[1] |= (frame->block_mode & 0x03) << 4; - - data[1] |= (frame->mode & 0x03) << 2; - - data[1] |= (frame->allocation & 0x01) << 1; - - switch (frame_subbands) { - case 4: - /* Nothing to do */ - break; - case 8: - data[1] |= 0x01; - break; - default: - return -4; - break; - } - - data[2] = frame->bitpool; - - if ((frame->mode == MONO || frame->mode == DUAL_CHANNEL) && - frame->bitpool > frame_subbands << 4) - return -5; - - if ((frame->mode == STEREO || frame->mode == JOINT_STEREO) && - frame->bitpool > frame_subbands << 5) - return -5; - - /* Can't fill in crc yet */ - - crc_header[0] = data[1]; - crc_header[1] = data[2]; - crc_pos = 16; - - if (frame->mode == JOINT_STEREO) { - PUT_BITS(data_ptr, bits_cache, bits_count, - joint, frame_subbands); - crc_header[crc_pos >> 3] = joint; - crc_pos += frame_subbands; - } - - for (ch = 0; ch < frame_channels; ch++) { - for (sb = 0; sb < frame_subbands; sb++) { - PUT_BITS(data_ptr, bits_cache, bits_count, - frame->scale_factor[ch][sb] & 0x0F, 4); - crc_header[crc_pos >> 3] <<= 4; - crc_header[crc_pos >> 3] |= frame->scale_factor[ch][sb] & 0x0F; - crc_pos += 4; - } - } - - /* align the last crc byte */ - if (crc_pos % 8) - crc_header[crc_pos >> 3] <<= 8 - (crc_pos % 8); - - data[3] = sbc_crc8(crc_header, crc_pos); - - sbc_calculate_bits(frame, bits); - - for (ch = 0; ch < frame_channels; ch++) { - for (sb = 0; sb < frame_subbands; sb++) { - levels[ch][sb] = ((1 << bits[ch][sb]) - 1) << - (32 - (frame->scale_factor[ch][sb] + - SCALE_OUT_BITS + 2)); - sb_sample_delta[ch][sb] = (uint32_t) 1 << - (frame->scale_factor[ch][sb] + - SCALE_OUT_BITS + 1); - } - } - - for (blk = 0; blk < frame->blocks; blk++) { - for (ch = 0; ch < frame_channels; ch++) { - for (sb = 0; sb < frame_subbands; sb++) { - - if (bits[ch][sb] == 0) - continue; - - audio_sample = ((uint64_t) levels[ch][sb] * - (sb_sample_delta[ch][sb] + - frame->sb_sample_f[blk][ch][sb])) >> 32; - - PUT_BITS(data_ptr, bits_cache, bits_count, - audio_sample, bits[ch][sb]); - } - } - } - - FLUSH_BITS(data_ptr, bits_cache, bits_count); - - return data_ptr - data; -} - -static ssize_t sbc_pack_frame(uint8_t *data, struct sbc_frame *frame, size_t len, - int joint) -{ - if (frame->subbands == 4) { - if (frame->channels == 1) - return sbc_pack_frame_internal( - data, frame, len, 4, 1, joint); - else - return sbc_pack_frame_internal( - data, frame, len, 4, 2, joint); - } else { - if (frame->channels == 1) - return sbc_pack_frame_internal( - data, frame, len, 8, 1, joint); - else - return sbc_pack_frame_internal( - data, frame, len, 8, 2, joint); - } -} - -static void sbc_encoder_init(struct sbc_encoder_state *state, - const struct sbc_frame *frame) -{ - memset(&state->X, 0, sizeof(state->X)); - state->position = (SBC_X_BUFFER_SIZE - frame->subbands * 9) & ~7; - - sbc_init_primitives(state); -} - -struct sbc_priv { - int init; - struct SBC_ALIGNED sbc_frame frame; - struct SBC_ALIGNED sbc_decoder_state dec_state; - struct SBC_ALIGNED sbc_encoder_state enc_state; -}; - -static void sbc_set_defaults(sbc_t *sbc, unsigned long flags) -{ - sbc->frequency = SBC_FREQ_44100; - sbc->mode = SBC_MODE_STEREO; - sbc->subbands = SBC_SB_8; - sbc->blocks = SBC_BLK_16; - sbc->bitpool = 32; -#if __BYTE_ORDER == __LITTLE_ENDIAN - sbc->endian = SBC_LE; -#elif __BYTE_ORDER == __BIG_ENDIAN - sbc->endian = SBC_BE; -#else -#error "Unknown byte order" -#endif -} - -int sbc_init(sbc_t *sbc, unsigned long flags) -{ - if (!sbc) - return -EIO; - - memset(sbc, 0, sizeof(sbc_t)); - - sbc->priv_alloc_base = malloc(sizeof(struct sbc_priv) + SBC_ALIGN_MASK); - if (!sbc->priv_alloc_base) - return -ENOMEM; - - sbc->priv = (void *) (((uintptr_t) sbc->priv_alloc_base + - SBC_ALIGN_MASK) & ~((uintptr_t) SBC_ALIGN_MASK)); - - memset(sbc->priv, 0, sizeof(struct sbc_priv)); - - sbc_set_defaults(sbc, flags); - - return 0; -} - -ssize_t sbc_parse(sbc_t *sbc, const void *input, size_t input_len) -{ - return sbc_decode(sbc, input, input_len, NULL, 0, NULL); -} - -ssize_t sbc_decode(sbc_t *sbc, const void *input, size_t input_len, - void *output, size_t output_len, size_t *written) -{ - struct sbc_priv *priv; - char *ptr; - int i, ch, framelen, samples; - - if (!sbc || !input) - return -EIO; - - priv = sbc->priv; - - framelen = sbc_unpack_frame(input, &priv->frame, input_len); - - if (!priv->init) { - sbc_decoder_init(&priv->dec_state, &priv->frame); - priv->init = 1; - - sbc->frequency = priv->frame.frequency; - sbc->mode = priv->frame.mode; - sbc->subbands = priv->frame.subband_mode; - sbc->blocks = priv->frame.block_mode; - sbc->allocation = priv->frame.allocation; - sbc->bitpool = priv->frame.bitpool; - - priv->frame.codesize = sbc_get_codesize(sbc); - priv->frame.length = framelen; - } else if (priv->frame.bitpool != sbc->bitpool) { - priv->frame.length = framelen; - sbc->bitpool = priv->frame.bitpool; - } - - if (!output) - return framelen; - - if (written) - *written = 0; - - if (framelen <= 0) - return framelen; - - samples = sbc_synthesize_audio(&priv->dec_state, &priv->frame); - - ptr = output; - - if (output_len < (size_t) (samples * priv->frame.channels * 2)) - samples = output_len / (priv->frame.channels * 2); - - for (i = 0; i < samples; i++) { - for (ch = 0; ch < priv->frame.channels; ch++) { - int16_t s; - s = priv->frame.pcm_sample[ch][i]; - - if (sbc->endian == SBC_BE) { - *ptr++ = (s & 0xff00) >> 8; - *ptr++ = (s & 0x00ff); - } else { - *ptr++ = (s & 0x00ff); - *ptr++ = (s & 0xff00) >> 8; - } - } - } - - if (written) - *written = samples * priv->frame.channels * 2; - - return framelen; -} - -ssize_t sbc_encode(sbc_t *sbc, const void *input, size_t input_len, - void *output, size_t output_len, ssize_t *written) -{ - struct sbc_priv *priv; - int samples; - ssize_t framelen; - int (*sbc_enc_process_input)(int position, - const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], - int nsamples, int nchannels); - - if (!sbc || !input) - return -EIO; - - priv = sbc->priv; - - if (written) - *written = 0; - - if (!priv->init) { - priv->frame.frequency = sbc->frequency; - priv->frame.mode = sbc->mode; - priv->frame.channels = sbc->mode == SBC_MODE_MONO ? 1 : 2; - priv->frame.allocation = sbc->allocation; - priv->frame.subband_mode = sbc->subbands; - priv->frame.subbands = sbc->subbands ? 8 : 4; - priv->frame.block_mode = sbc->blocks; - priv->frame.blocks = 4 + (sbc->blocks * 4); - priv->frame.bitpool = sbc->bitpool; - priv->frame.codesize = sbc_get_codesize(sbc); - priv->frame.length = sbc_get_frame_length(sbc); - - sbc_encoder_init(&priv->enc_state, &priv->frame); - priv->init = 1; - } else if (priv->frame.bitpool != sbc->bitpool) { - priv->frame.length = sbc_get_frame_length(sbc); - priv->frame.bitpool = sbc->bitpool; - } - - /* input must be large enough to encode a complete frame */ - if (input_len < priv->frame.codesize) - return 0; - - /* output must be large enough to receive the encoded frame */ - if (!output || output_len < priv->frame.length) - return -ENOSPC; - - /* Select the needed input data processing function and call it */ - if (priv->frame.subbands == 8) { - if (sbc->endian == SBC_BE) - sbc_enc_process_input = - priv->enc_state.sbc_enc_process_input_8s_be; - else - sbc_enc_process_input = - priv->enc_state.sbc_enc_process_input_8s_le; - } else { - if (sbc->endian == SBC_BE) - sbc_enc_process_input = - priv->enc_state.sbc_enc_process_input_4s_be; - else - sbc_enc_process_input = - priv->enc_state.sbc_enc_process_input_4s_le; - } - - priv->enc_state.position = sbc_enc_process_input( - priv->enc_state.position, (const uint8_t *) input, - priv->enc_state.X, priv->frame.subbands * priv->frame.blocks, - priv->frame.channels); - - samples = sbc_analyze_audio(&priv->enc_state, &priv->frame); - - if (priv->frame.mode == JOINT_STEREO) { - int j = priv->enc_state.sbc_calc_scalefactors_j( - priv->frame.sb_sample_f, priv->frame.scale_factor, - priv->frame.blocks, priv->frame.subbands); - framelen = sbc_pack_frame(output, &priv->frame, output_len, j); - } else { - priv->enc_state.sbc_calc_scalefactors( - priv->frame.sb_sample_f, priv->frame.scale_factor, - priv->frame.blocks, priv->frame.channels, - priv->frame.subbands); - framelen = sbc_pack_frame(output, &priv->frame, output_len, 0); - } - - if (written) - *written = framelen; - - return samples * priv->frame.channels * 2; -} - -void sbc_finish(sbc_t *sbc) -{ - if (!sbc) - return; - - free(sbc->priv_alloc_base); - - memset(sbc, 0, sizeof(sbc_t)); -} - -size_t sbc_get_frame_length(sbc_t *sbc) -{ - int ret; - uint8_t subbands, channels, blocks, joint, bitpool; - struct sbc_priv *priv; - - priv = sbc->priv; - if (priv->init && priv->frame.bitpool == sbc->bitpool) - return priv->frame.length; - - subbands = sbc->subbands ? 8 : 4; - blocks = 4 + (sbc->blocks * 4); - channels = sbc->mode == SBC_MODE_MONO ? 1 : 2; - joint = sbc->mode == SBC_MODE_JOINT_STEREO ? 1 : 0; - bitpool = sbc->bitpool; - - ret = 4 + (4 * subbands * channels) / 8; - /* This term is not always evenly divide so we round it up */ - if (channels == 1) - ret += ((blocks * channels * bitpool) + 7) / 8; - else - ret += (((joint ? subbands : 0) + blocks * bitpool) + 7) / 8; - - return ret; -} - -unsigned sbc_get_frame_duration(sbc_t *sbc) -{ - uint8_t subbands, blocks; - uint16_t frequency; - struct sbc_priv *priv; - - priv = sbc->priv; - if (!priv->init) { - subbands = sbc->subbands ? 8 : 4; - blocks = 4 + (sbc->blocks * 4); - } else { - subbands = priv->frame.subbands; - blocks = priv->frame.blocks; - } - - switch (sbc->frequency) { - case SBC_FREQ_16000: - frequency = 16000; - break; - - case SBC_FREQ_32000: - frequency = 32000; - break; - - case SBC_FREQ_44100: - frequency = 44100; - break; - - case SBC_FREQ_48000: - frequency = 48000; - break; - default: - return 0; - } - - return (1000000 * blocks * subbands) / frequency; -} - -size_t sbc_get_codesize(sbc_t *sbc) -{ - uint16_t subbands, channels, blocks; - struct sbc_priv *priv; - - priv = sbc->priv; - if (!priv->init) { - subbands = sbc->subbands ? 8 : 4; - blocks = 4 + (sbc->blocks * 4); - channels = sbc->mode == SBC_MODE_MONO ? 1 : 2; - } else { - subbands = priv->frame.subbands; - blocks = priv->frame.blocks; - channels = priv->frame.channels; - } - - return subbands * blocks * channels * 2; -} - -const char *sbc_get_implementation_info(sbc_t *sbc) -{ - struct sbc_priv *priv; - - if (!sbc) - return NULL; - - priv = sbc->priv; - if (!priv) - return NULL; - - return priv->enc_state.implementation_info; -} - -int sbc_reinit(sbc_t *sbc, unsigned long flags) -{ - struct sbc_priv *priv; - - if (!sbc || !sbc->priv) - return -EIO; - - priv = sbc->priv; - - if (priv->init == 1) - memset(sbc->priv, 0, sizeof(struct sbc_priv)); - - sbc_set_defaults(sbc, flags); - - return 0; -} diff --git a/src/modules/bluetooth/sbc/sbc.h b/src/modules/bluetooth/sbc/sbc.h deleted file mode 100644 index 2f830ad..0000000 --- a/src/modules/bluetooth/sbc/sbc.h +++ /dev/null @@ -1,113 +0,0 @@ -/* - * - * Bluetooth low-complexity, subband codec (SBC) library - * - * Copyright (C) 2008-2010 Nokia Corporation - * Copyright (C) 2004-2010 Marcel Holtmann <marcel at holtmann.org> - * Copyright (C) 2004-2005 Henryk Ploetz <henryk at ploetzli.ch> - * Copyright (C) 2005-2006 Brad Midgley <bmidgley at xmission.com> - * - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - */ - -#ifndef __SBC_H -#define __SBC_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include <stdint.h> -#include <sys/types.h> - -/* sampling frequency */ -#define SBC_FREQ_16000 0x00 -#define SBC_FREQ_32000 0x01 -#define SBC_FREQ_44100 0x02 -#define SBC_FREQ_48000 0x03 - -/* blocks */ -#define SBC_BLK_4 0x00 -#define SBC_BLK_8 0x01 -#define SBC_BLK_12 0x02 -#define SBC_BLK_16 0x03 - -/* channel mode */ -#define SBC_MODE_MONO 0x00 -#define SBC_MODE_DUAL_CHANNEL 0x01 -#define SBC_MODE_STEREO 0x02 -#define SBC_MODE_JOINT_STEREO 0x03 - -/* allocation method */ -#define SBC_AM_LOUDNESS 0x00 -#define SBC_AM_SNR 0x01 - -/* subbands */ -#define SBC_SB_4 0x00 -#define SBC_SB_8 0x01 - -/* Data endianess */ -#define SBC_LE 0x00 -#define SBC_BE 0x01 - -struct sbc_struct { - unsigned long flags; - - uint8_t frequency; - uint8_t blocks; - uint8_t subbands; - uint8_t mode; - uint8_t allocation; - uint8_t bitpool; - uint8_t endian; - - void *priv; - void *priv_alloc_base; -}; - -typedef struct sbc_struct sbc_t; - -int sbc_init(sbc_t *sbc, unsigned long flags); -int sbc_reinit(sbc_t *sbc, unsigned long flags); - -ssize_t sbc_parse(sbc_t *sbc, const void *input, size_t input_len); - -/* Decodes ONE input block into ONE output block */ -ssize_t sbc_decode(sbc_t *sbc, const void *input, size_t input_len, - void *output, size_t output_len, size_t *written); - -/* Encodes ONE input block into ONE output block */ -ssize_t sbc_encode(sbc_t *sbc, const void *input, size_t input_len, - void *output, size_t output_len, ssize_t *written); - -/* Returns the output block size in bytes */ -size_t sbc_get_frame_length(sbc_t *sbc); - -/* Returns the time one input/output block takes to play in msec*/ -unsigned sbc_get_frame_duration(sbc_t *sbc); - -/* Returns the input block size in bytes */ -size_t sbc_get_codesize(sbc_t *sbc); - -const char *sbc_get_implementation_info(sbc_t *sbc); -void sbc_finish(sbc_t *sbc); - -#ifdef __cplusplus -} -#endif - -#endif /* __SBC_H */ diff --git a/src/modules/bluetooth/sbc/sbc_math.h b/src/modules/bluetooth/sbc/sbc_math.h deleted file mode 100644 index 5476860..0000000 --- a/src/modules/bluetooth/sbc/sbc_math.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * - * Bluetooth low-complexity, subband codec (SBC) library - * - * Copyright (C) 2008-2010 Nokia Corporation - * Copyright (C) 2004-2010 Marcel Holtmann <marcel at holtmann.org> - * Copyright (C) 2004-2005 Henryk Ploetz <henryk at ploetzli.ch> - * Copyright (C) 2005-2008 Brad Midgley <bmidgley at xmission.com> - * - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - */ - -#define fabs(x) ((x) < 0 ? -(x) : (x)) -/* C does not provide an explicit arithmetic shift right but this will - always be correct and every compiler *should* generate optimal code */ -#define ASR(val, bits) ((-2 >> 1 == -1) ? \ - ((int32_t)(val)) >> (bits) : ((int32_t) (val)) / (1 << (bits))) - -#define SCALE_SPROTO4_TBL 12 -#define SCALE_SPROTO8_TBL 14 -#define SCALE_NPROTO4_TBL 11 -#define SCALE_NPROTO8_TBL 11 -#define SCALE4_STAGED1_BITS 15 -#define SCALE4_STAGED2_BITS 16 -#define SCALE8_STAGED1_BITS 15 -#define SCALE8_STAGED2_BITS 16 - -typedef int32_t sbc_fixed_t; - -#define SCALE4_STAGED1(src) ASR(src, SCALE4_STAGED1_BITS) -#define SCALE4_STAGED2(src) ASR(src, SCALE4_STAGED2_BITS) -#define SCALE8_STAGED1(src) ASR(src, SCALE8_STAGED1_BITS) -#define SCALE8_STAGED2(src) ASR(src, SCALE8_STAGED2_BITS) - -#define SBC_FIXED_0(val) { val = 0; } -#define MUL(a, b) ((a) * (b)) -#if defined(__arm__) && (!defined(__thumb__) || defined(__thumb2__)) -#define MULA(a, b, res) ({ \ - int tmp = res; \ - __asm__( \ - "mla %0, %2, %3, %0" \ - : "=&r" (tmp) \ - : "0" (tmp), "r" (a), "r" (b)); \ - tmp; }) -#else -#define MULA(a, b, res) ((a) * (b) + (res)) -#endif diff --git a/src/modules/bluetooth/sbc/sbc_primitives.c b/src/modules/bluetooth/sbc/sbc_primitives.c deleted file mode 100644 index ad780d0..0000000 --- a/src/modules/bluetooth/sbc/sbc_primitives.c +++ /dev/null @@ -1,554 +0,0 @@ -/* - * - * Bluetooth low-complexity, subband codec (SBC) library - * - * Copyright (C) 2008-2010 Nokia Corporation - * Copyright (C) 2004-2010 Marcel Holtmann <marcel at holtmann.org> - * Copyright (C) 2004-2005 Henryk Ploetz <henryk at ploetzli.ch> - * Copyright (C) 2005-2006 Brad Midgley <bmidgley at xmission.com> - * - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - */ - -#include <stdint.h> -#include <limits.h> -#include <string.h> -#include "sbc.h" -#include "sbc_math.h" -#include "sbc_tables.h" - -#include "sbc_primitives.h" -#include "sbc_primitives_mmx.h" -#include "sbc_primitives_iwmmxt.h" -#include "sbc_primitives_neon.h" -#include "sbc_primitives_armv6.h" - -/* - * A reference C code of analysis filter with SIMD-friendly tables - * reordering and code layout. This code can be used to develop platform - * specific SIMD optimizations. Also it may be used as some kind of test - * for compiler autovectorization capabilities (who knows, if the compiler - * is very good at this stuff, hand optimized assembly may be not strictly - * needed for some platform). - * - * Note: It is also possible to make a simple variant of analysis filter, - * which needs only a single constants table without taking care about - * even/odd cases. This simple variant of filter can be implemented without - * input data permutation. The only thing that would be lost is the - * possibility to use pairwise SIMD multiplications. But for some simple - * CPU cores without SIMD extensions it can be useful. If anybody is - * interested in implementing such variant of a filter, sourcecode from - * bluez versions 4.26/4.27 can be used as a reference and the history of - * the changes in git repository done around that time may be worth checking. - */ - -static inline void sbc_analyze_four_simd(const int16_t *in, int32_t *out, - const FIXED_T *consts) -{ - FIXED_A t1[4]; - FIXED_T t2[4]; - int hop = 0; - - /* rounding coefficient */ - t1[0] = t1[1] = t1[2] = t1[3] = - (FIXED_A) 1 << (SBC_PROTO_FIXED4_SCALE - 1); - - /* low pass polyphase filter */ - for (hop = 0; hop < 40; hop += 8) { - t1[0] += (FIXED_A) in[hop] * consts[hop]; - t1[0] += (FIXED_A) in[hop + 1] * consts[hop + 1]; - t1[1] += (FIXED_A) in[hop + 2] * consts[hop + 2]; - t1[1] += (FIXED_A) in[hop + 3] * consts[hop + 3]; - t1[2] += (FIXED_A) in[hop + 4] * consts[hop + 4]; - t1[2] += (FIXED_A) in[hop + 5] * consts[hop + 5]; - t1[3] += (FIXED_A) in[hop + 6] * consts[hop + 6]; - t1[3] += (FIXED_A) in[hop + 7] * consts[hop + 7]; - } - - /* scaling */ - t2[0] = t1[0] >> SBC_PROTO_FIXED4_SCALE; - t2[1] = t1[1] >> SBC_PROTO_FIXED4_SCALE; - t2[2] = t1[2] >> SBC_PROTO_FIXED4_SCALE; - t2[3] = t1[3] >> SBC_PROTO_FIXED4_SCALE; - - /* do the cos transform */ - t1[0] = (FIXED_A) t2[0] * consts[40 + 0]; - t1[0] += (FIXED_A) t2[1] * consts[40 + 1]; - t1[1] = (FIXED_A) t2[0] * consts[40 + 2]; - t1[1] += (FIXED_A) t2[1] * consts[40 + 3]; - t1[2] = (FIXED_A) t2[0] * consts[40 + 4]; - t1[2] += (FIXED_A) t2[1] * consts[40 + 5]; - t1[3] = (FIXED_A) t2[0] * consts[40 + 6]; - t1[3] += (FIXED_A) t2[1] * consts[40 + 7]; - - t1[0] += (FIXED_A) t2[2] * consts[40 + 8]; - t1[0] += (FIXED_A) t2[3] * consts[40 + 9]; - t1[1] += (FIXED_A) t2[2] * consts[40 + 10]; - t1[1] += (FIXED_A) t2[3] * consts[40 + 11]; - t1[2] += (FIXED_A) t2[2] * consts[40 + 12]; - t1[2] += (FIXED_A) t2[3] * consts[40 + 13]; - t1[3] += (FIXED_A) t2[2] * consts[40 + 14]; - t1[3] += (FIXED_A) t2[3] * consts[40 + 15]; - - out[0] = t1[0] >> - (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS); - out[1] = t1[1] >> - (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS); - out[2] = t1[2] >> - (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS); - out[3] = t1[3] >> - (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS); -} - -static inline void sbc_analyze_eight_simd(const int16_t *in, int32_t *out, - const FIXED_T *consts) -{ - FIXED_A t1[8]; - FIXED_T t2[8]; - int i, hop; - - /* rounding coefficient */ - t1[0] = t1[1] = t1[2] = t1[3] = t1[4] = t1[5] = t1[6] = t1[7] = - (FIXED_A) 1 << (SBC_PROTO_FIXED8_SCALE-1); - - /* low pass polyphase filter */ - for (hop = 0; hop < 80; hop += 16) { - t1[0] += (FIXED_A) in[hop] * consts[hop]; - t1[0] += (FIXED_A) in[hop + 1] * consts[hop + 1]; - t1[1] += (FIXED_A) in[hop + 2] * consts[hop + 2]; - t1[1] += (FIXED_A) in[hop + 3] * consts[hop + 3]; - t1[2] += (FIXED_A) in[hop + 4] * consts[hop + 4]; - t1[2] += (FIXED_A) in[hop + 5] * consts[hop + 5]; - t1[3] += (FIXED_A) in[hop + 6] * consts[hop + 6]; - t1[3] += (FIXED_A) in[hop + 7] * consts[hop + 7]; - t1[4] += (FIXED_A) in[hop + 8] * consts[hop + 8]; - t1[4] += (FIXED_A) in[hop + 9] * consts[hop + 9]; - t1[5] += (FIXED_A) in[hop + 10] * consts[hop + 10]; - t1[5] += (FIXED_A) in[hop + 11] * consts[hop + 11]; - t1[6] += (FIXED_A) in[hop + 12] * consts[hop + 12]; - t1[6] += (FIXED_A) in[hop + 13] * consts[hop + 13]; - t1[7] += (FIXED_A) in[hop + 14] * consts[hop + 14]; - t1[7] += (FIXED_A) in[hop + 15] * consts[hop + 15]; - } - - /* scaling */ - t2[0] = t1[0] >> SBC_PROTO_FIXED8_SCALE; - t2[1] = t1[1] >> SBC_PROTO_FIXED8_SCALE; - t2[2] = t1[2] >> SBC_PROTO_FIXED8_SCALE; - t2[3] = t1[3] >> SBC_PROTO_FIXED8_SCALE; - t2[4] = t1[4] >> SBC_PROTO_FIXED8_SCALE; - t2[5] = t1[5] >> SBC_PROTO_FIXED8_SCALE; - t2[6] = t1[6] >> SBC_PROTO_FIXED8_SCALE; - t2[7] = t1[7] >> SBC_PROTO_FIXED8_SCALE; - - - /* do the cos transform */ - t1[0] = t1[1] = t1[2] = t1[3] = t1[4] = t1[5] = t1[6] = t1[7] = 0; - - for (i = 0; i < 4; i++) { - t1[0] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 0]; - t1[0] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 1]; - t1[1] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 2]; - t1[1] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 3]; - t1[2] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 4]; - t1[2] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 5]; - t1[3] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 6]; - t1[3] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 7]; - t1[4] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 8]; - t1[4] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 9]; - t1[5] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 10]; - t1[5] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 11]; - t1[6] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 12]; - t1[6] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 13]; - t1[7] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 14]; - t1[7] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 15]; - } - - for (i = 0; i < 8; i++) - out[i] = t1[i] >> - (SBC_COS_TABLE_FIXED8_SCALE - SCALE_OUT_BITS); -} - -static inline void sbc_analyze_4b_4s_simd(int16_t *x, - int32_t *out, int out_stride) -{ - /* Analyze blocks */ - sbc_analyze_four_simd(x + 12, out, analysis_consts_fixed4_simd_odd); - out += out_stride; - sbc_analyze_four_simd(x + 8, out, analysis_consts_fixed4_simd_even); - out += out_stride; - sbc_analyze_four_simd(x + 4, out, analysis_consts_fixed4_simd_odd); - out += out_stride; - sbc_analyze_four_simd(x + 0, out, analysis_consts_fixed4_simd_even); -} - -static inline void sbc_analyze_4b_8s_simd(int16_t *x, - int32_t *out, int out_stride) -{ - /* Analyze blocks */ - sbc_analyze_eight_simd(x + 24, out, analysis_consts_fixed8_simd_odd); - out += out_stride; - sbc_analyze_eight_simd(x + 16, out, analysis_consts_fixed8_simd_even); - out += out_stride; - sbc_analyze_eight_simd(x + 8, out, analysis_consts_fixed8_simd_odd); - out += out_stride; - sbc_analyze_eight_simd(x + 0, out, analysis_consts_fixed8_simd_even); -} - -static inline int16_t unaligned16_be(const uint8_t *ptr) -{ - return (int16_t) ((ptr[0] << 8) | ptr[1]); -} - -static inline int16_t unaligned16_le(const uint8_t *ptr) -{ - return (int16_t) (ptr[0] | (ptr[1] << 8)); -} - -/* - * Internal helper functions for input data processing. In order to get - * optimal performance, it is important to have "nsamples", "nchannels" - * and "big_endian" arguments used with this inline function as compile - * time constants. - */ - -static SBC_ALWAYS_INLINE int sbc_encoder_process_input_s4_internal( - int position, - const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], - int nsamples, int nchannels, int big_endian) -{ - /* handle X buffer wraparound */ - if (position < nsamples) { - if (nchannels > 0) - memcpy(&X[0][SBC_X_BUFFER_SIZE - 40], &X[0][position], - 36 * sizeof(int16_t)); - if (nchannels > 1) - memcpy(&X[1][SBC_X_BUFFER_SIZE - 40], &X[1][position], - 36 * sizeof(int16_t)); - position = SBC_X_BUFFER_SIZE - 40; - } - - #define PCM(i) (big_endian ? \ - unaligned16_be(pcm + (i) * 2) : unaligned16_le(pcm + (i) * 2)) - - /* copy/permutate audio samples */ - while ((nsamples -= 8) >= 0) { - position -= 8; - if (nchannels > 0) { - int16_t *x = &X[0][position]; - x[0] = PCM(0 + 7 * nchannels); - x[1] = PCM(0 + 3 * nchannels); - x[2] = PCM(0 + 6 * nchannels); - x[3] = PCM(0 + 4 * nchannels); - x[4] = PCM(0 + 0 * nchannels); - x[5] = PCM(0 + 2 * nchannels); - x[6] = PCM(0 + 1 * nchannels); - x[7] = PCM(0 + 5 * nchannels); - } - if (nchannels > 1) { - int16_t *x = &X[1][position]; - x[0] = PCM(1 + 7 * nchannels); - x[1] = PCM(1 + 3 * nchannels); - x[2] = PCM(1 + 6 * nchannels); - x[3] = PCM(1 + 4 * nchannels); - x[4] = PCM(1 + 0 * nchannels); - x[5] = PCM(1 + 2 * nchannels); - x[6] = PCM(1 + 1 * nchannels); - x[7] = PCM(1 + 5 * nchannels); - } - pcm += 16 * nchannels; - } - #undef PCM - - return position; -} - -static SBC_ALWAYS_INLINE int sbc_encoder_process_input_s8_internal( - int position, - const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], - int nsamples, int nchannels, int big_endian) -{ - /* handle X buffer wraparound */ - if (position < nsamples) { - if (nchannels > 0) - memcpy(&X[0][SBC_X_BUFFER_SIZE - 72], &X[0][position], - 72 * sizeof(int16_t)); - if (nchannels > 1) - memcpy(&X[1][SBC_X_BUFFER_SIZE - 72], &X[1][position], - 72 * sizeof(int16_t)); - position = SBC_X_BUFFER_SIZE - 72; - } - - #define PCM(i) (big_endian ? \ - unaligned16_be(pcm + (i) * 2) : unaligned16_le(pcm + (i) * 2)) - - /* copy/permutate audio samples */ - while ((nsamples -= 16) >= 0) { - position -= 16; - if (nchannels > 0) { - int16_t *x = &X[0][position]; - x[0] = PCM(0 + 15 * nchannels); - x[1] = PCM(0 + 7 * nchannels); - x[2] = PCM(0 + 14 * nchannels); - x[3] = PCM(0 + 8 * nchannels); - x[4] = PCM(0 + 13 * nchannels); - x[5] = PCM(0 + 9 * nchannels); - x[6] = PCM(0 + 12 * nchannels); - x[7] = PCM(0 + 10 * nchannels); - x[8] = PCM(0 + 11 * nchannels); - x[9] = PCM(0 + 3 * nchannels); - x[10] = PCM(0 + 6 * nchannels); - x[11] = PCM(0 + 0 * nchannels); - x[12] = PCM(0 + 5 * nchannels); - x[13] = PCM(0 + 1 * nchannels); - x[14] = PCM(0 + 4 * nchannels); - x[15] = PCM(0 + 2 * nchannels); - } - if (nchannels > 1) { - int16_t *x = &X[1][position]; - x[0] = PCM(1 + 15 * nchannels); - x[1] = PCM(1 + 7 * nchannels); - x[2] = PCM(1 + 14 * nchannels); - x[3] = PCM(1 + 8 * nchannels); - x[4] = PCM(1 + 13 * nchannels); - x[5] = PCM(1 + 9 * nchannels); - x[6] = PCM(1 + 12 * nchannels); - x[7] = PCM(1 + 10 * nchannels); - x[8] = PCM(1 + 11 * nchannels); - x[9] = PCM(1 + 3 * nchannels); - x[10] = PCM(1 + 6 * nchannels); - x[11] = PCM(1 + 0 * nchannels); - x[12] = PCM(1 + 5 * nchannels); - x[13] = PCM(1 + 1 * nchannels); - x[14] = PCM(1 + 4 * nchannels); - x[15] = PCM(1 + 2 * nchannels); - } - pcm += 32 * nchannels; - } - #undef PCM - - return position; -} - -/* - * Input data processing functions. The data is endian converted if needed, - * channels are deintrleaved and audio samples are reordered for use in - * SIMD-friendly analysis filter function. The results are put into "X" - * array, getting appended to the previous data (or it is better to say - * prepended, as the buffer is filled from top to bottom). Old data is - * discarded when neededed, but availability of (10 * nrof_subbands) - * contiguous samples is always guaranteed for the input to the analysis - * filter. This is achieved by copying a sufficient part of old data - * to the top of the buffer on buffer wraparound. - */ - -static int sbc_enc_process_input_4s_le(int position, - const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], - int nsamples, int nchannels) -{ - if (nchannels > 1) - return sbc_encoder_process_input_s4_internal( - position, pcm, X, nsamples, 2, 0); - else - return sbc_encoder_process_input_s4_internal( - position, pcm, X, nsamples, 1, 0); -} - -static int sbc_enc_process_input_4s_be(int position, - const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], - int nsamples, int nchannels) -{ - if (nchannels > 1) - return sbc_encoder_process_input_s4_internal( - position, pcm, X, nsamples, 2, 1); - else - return sbc_encoder_process_input_s4_internal( - position, pcm, X, nsamples, 1, 1); -} - -static int sbc_enc_process_input_8s_le(int position, - const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], - int nsamples, int nchannels) -{ - if (nchannels > 1) - return sbc_encoder_process_input_s8_internal( - position, pcm, X, nsamples, 2, 0); - else - return sbc_encoder_process_input_s8_internal( - position, pcm, X, nsamples, 1, 0); -} - -static int sbc_enc_process_input_8s_be(int position, - const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], - int nsamples, int nchannels) -{ - if (nchannels > 1) - return sbc_encoder_process_input_s8_internal( - position, pcm, X, nsamples, 2, 1); - else - return sbc_encoder_process_input_s8_internal( - position, pcm, X, nsamples, 1, 1); -} - -/* Supplementary function to count the number of leading zeros */ - -static inline int sbc_clz(uint32_t x) -{ -#ifdef __GNUC__ - return __builtin_clz(x); -#else - /* TODO: this should be replaced with something better if good - * performance is wanted when using compilers other than gcc */ - int cnt = 0; - while (x) { - cnt++; - x >>= 1; - } - return 32 - cnt; -#endif -} - -static void sbc_calc_scalefactors( - int32_t sb_sample_f[16][2][8], - uint32_t scale_factor[2][8], - int blocks, int channels, int subbands) -{ - int ch, sb, blk; - for (ch = 0; ch < channels; ch++) { - for (sb = 0; sb < subbands; sb++) { - uint32_t x = 1 << SCALE_OUT_BITS; - for (blk = 0; blk < blocks; blk++) { - int32_t tmp = fabs(sb_sample_f[blk][ch][sb]); - if (tmp != 0) - x |= tmp - 1; - } - scale_factor[ch][sb] = (31 - SCALE_OUT_BITS) - - sbc_clz(x); - } - } -} - -static int sbc_calc_scalefactors_j( - int32_t sb_sample_f[16][2][8], - uint32_t scale_factor[2][8], - int blocks, int subbands) -{ - int blk, joint = 0; - int32_t tmp0, tmp1; - uint32_t x, y; - - /* last subband does not use joint stereo */ - int sb = subbands - 1; - x = 1 << SCALE_OUT_BITS; - y = 1 << SCALE_OUT_BITS; - for (blk = 0; blk < blocks; blk++) { - tmp0 = fabs(sb_sample_f[blk][0][sb]); - tmp1 = fabs(sb_sample_f[blk][1][sb]); - if (tmp0 != 0) - x |= tmp0 - 1; - if (tmp1 != 0) - y |= tmp1 - 1; - } - scale_factor[0][sb] = (31 - SCALE_OUT_BITS) - sbc_clz(x); - scale_factor[1][sb] = (31 - SCALE_OUT_BITS) - sbc_clz(y); - - /* the rest of subbands can use joint stereo */ - while (--sb >= 0) { - int32_t sb_sample_j[16][2]; - x = 1 << SCALE_OUT_BITS; - y = 1 << SCALE_OUT_BITS; - for (blk = 0; blk < blocks; blk++) { - tmp0 = sb_sample_f[blk][0][sb]; - tmp1 = sb_sample_f[blk][1][sb]; - sb_sample_j[blk][0] = ASR(tmp0, 1) + ASR(tmp1, 1); - sb_sample_j[blk][1] = ASR(tmp0, 1) - ASR(tmp1, 1); - tmp0 = fabs(tmp0); - tmp1 = fabs(tmp1); - if (tmp0 != 0) - x |= tmp0 - 1; - if (tmp1 != 0) - y |= tmp1 - 1; - } - scale_factor[0][sb] = (31 - SCALE_OUT_BITS) - - sbc_clz(x); - scale_factor[1][sb] = (31 - SCALE_OUT_BITS) - - sbc_clz(y); - x = 1 << SCALE_OUT_BITS; - y = 1 << SCALE_OUT_BITS; - for (blk = 0; blk < blocks; blk++) { - tmp0 = fabs(sb_sample_j[blk][0]); - tmp1 = fabs(sb_sample_j[blk][1]); - if (tmp0 != 0) - x |= tmp0 - 1; - if (tmp1 != 0) - y |= tmp1 - 1; - } - x = (31 - SCALE_OUT_BITS) - sbc_clz(x); - y = (31 - SCALE_OUT_BITS) - sbc_clz(y); - - /* decide whether to use joint stereo for this subband */ - if ((scale_factor[0][sb] + scale_factor[1][sb]) > x + y) { - joint |= 1 << (subbands - 1 - sb); - scale_factor[0][sb] = x; - scale_factor[1][sb] = y; - for (blk = 0; blk < blocks; blk++) { - sb_sample_f[blk][0][sb] = sb_sample_j[blk][0]; - sb_sample_f[blk][1][sb] = sb_sample_j[blk][1]; - } - } - } - - /* bitmask with the information about subbands using joint stereo */ - return joint; -} - -/* - * Detect CPU features and setup function pointers - */ -void sbc_init_primitives(struct sbc_encoder_state *state) -{ - /* Default implementation for analyze functions */ - state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_simd; - state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_simd; - - /* Default implementation for input reordering / deinterleaving */ - state->sbc_enc_process_input_4s_le = sbc_enc_process_input_4s_le; - state->sbc_enc_process_input_4s_be = sbc_enc_process_input_4s_be; - state->sbc_enc_process_input_8s_le = sbc_enc_process_input_8s_le; - state->sbc_enc_process_input_8s_be = sbc_enc_process_input_8s_be; - - /* Default implementation for scale factors calculation */ - state->sbc_calc_scalefactors = sbc_calc_scalefactors; - state->sbc_calc_scalefactors_j = sbc_calc_scalefactors_j; - state->implementation_info = "Generic C"; - - /* X86/AMD64 optimizations */ -#ifdef SBC_BUILD_WITH_MMX_SUPPORT - sbc_init_primitives_mmx(state); -#endif - - /* ARM optimizations */ -#ifdef SBC_BUILD_WITH_ARMV6_SUPPORT - sbc_init_primitives_armv6(state); -#endif -#ifdef SBC_BUILD_WITH_IWMMXT_SUPPORT - sbc_init_primitives_iwmmxt(state); -#endif -#ifdef SBC_BUILD_WITH_NEON_SUPPORT - sbc_init_primitives_neon(state); -#endif -} diff --git a/src/modules/bluetooth/sbc/sbc_primitives.h b/src/modules/bluetooth/sbc/sbc_primitives.h deleted file mode 100644 index 17ad4f7..0000000 --- a/src/modules/bluetooth/sbc/sbc_primitives.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - * - * Bluetooth low-complexity, subband codec (SBC) library - * - * Copyright (C) 2008-2010 Nokia Corporation - * Copyright (C) 2004-2010 Marcel Holtmann <marcel at holtmann.org> - * Copyright (C) 2004-2005 Henryk Ploetz <henryk at ploetzli.ch> - * Copyright (C) 2005-2006 Brad Midgley <bmidgley at xmission.com> - * - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - */ - -#ifndef __SBC_PRIMITIVES_H -#define __SBC_PRIMITIVES_H - -#define SCALE_OUT_BITS 15 -#define SBC_X_BUFFER_SIZE 328 - -#ifdef __GNUC__ -#define SBC_ALWAYS_INLINE inline __attribute__((always_inline)) -#else -#define SBC_ALWAYS_INLINE inline -#endif - -struct sbc_encoder_state { - int position; - int16_t SBC_ALIGNED X[2][SBC_X_BUFFER_SIZE]; - /* Polyphase analysis filter for 4 subbands configuration, - * it handles 4 blocks at once */ - void (*sbc_analyze_4b_4s)(int16_t *x, int32_t *out, int out_stride); - /* Polyphase analysis filter for 8 subbands configuration, - * it handles 4 blocks at once */ - void (*sbc_analyze_4b_8s)(int16_t *x, int32_t *out, int out_stride); - /* Process input data (deinterleave, endian conversion, reordering), - * depending on the number of subbands and input data byte order */ - int (*sbc_enc_process_input_4s_le)(int position, - const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], - int nsamples, int nchannels); - int (*sbc_enc_process_input_4s_be)(int position, - const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], - int nsamples, int nchannels); - int (*sbc_enc_process_input_8s_le)(int position, - const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], - int nsamples, int nchannels); - int (*sbc_enc_process_input_8s_be)(int position, - const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], - int nsamples, int nchannels); - /* Scale factors calculation */ - void (*sbc_calc_scalefactors)(int32_t sb_sample_f[16][2][8], - uint32_t scale_factor[2][8], - int blocks, int channels, int subbands); - /* Scale factors calculation with joint stereo support */ - int (*sbc_calc_scalefactors_j)(int32_t sb_sample_f[16][2][8], - uint32_t scale_factor[2][8], - int blocks, int subbands); - const char *implementation_info; -}; - -/* - * Initialize pointers to the functions which are the basic "building bricks" - * of SBC codec. Best implementation is selected based on target CPU - * capabilities. - */ -void sbc_init_primitives(struct sbc_encoder_state *encoder_state); - -#endif diff --git a/src/modules/bluetooth/sbc/sbc_primitives_armv6.c b/src/modules/bluetooth/sbc/sbc_primitives_armv6.c deleted file mode 100644 index b321272..0000000 --- a/src/modules/bluetooth/sbc/sbc_primitives_armv6.c +++ /dev/null @@ -1,299 +0,0 @@ -/* - * - * Bluetooth low-complexity, subband codec (SBC) library - * - * Copyright (C) 2008-2010 Nokia Corporation - * Copyright (C) 2004-2010 Marcel Holtmann <marcel at holtmann.org> - * Copyright (C) 2004-2005 Henryk Ploetz <henryk at ploetzli.ch> - * Copyright (C) 2005-2006 Brad Midgley <bmidgley at xmission.com> - * - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - */ - -#include <stdint.h> -#include <limits.h> -#include "sbc.h" -#include "sbc_math.h" -#include "sbc_tables.h" - -#include "sbc_primitives_armv6.h" - -/* - * ARMv6 optimizations. The instructions are scheduled for ARM11 pipeline. - */ - -#ifdef SBC_BUILD_WITH_ARMV6_SUPPORT - -static void __attribute__((naked)) sbc_analyze_four_armv6() -{ - /* r0 = in, r1 = out, r2 = consts */ - __asm__ volatile ( - "push {r1, r4-r7, lr}\n" - "push {r8-r11}\n" - "ldrd r4, r5, [r0, #0]\n" - "ldrd r6, r7, [r2, #0]\n" - "ldrd r8, r9, [r0, #16]\n" - "ldrd r10, r11, [r2, #16]\n" - "mov r14, #0x8000\n" - "smlad r3, r4, r6, r14\n" - "smlad r12, r5, r7, r14\n" - "ldrd r4, r5, [r0, #32]\n" - "ldrd r6, r7, [r2, #32]\n" - "smlad r3, r8, r10, r3\n" - "smlad r12, r9, r11, r12\n" - "ldrd r8, r9, [r0, #48]\n" - "ldrd r10, r11, [r2, #48]\n" - "smlad r3, r4, r6, r3\n" - "smlad r12, r5, r7, r12\n" - "ldrd r4, r5, [r0, #64]\n" - "ldrd r6, r7, [r2, #64]\n" - "smlad r3, r8, r10, r3\n" - "smlad r12, r9, r11, r12\n" - "ldrd r8, r9, [r0, #8]\n" - "ldrd r10, r11, [r2, #8]\n" - "smlad r3, r4, r6, r3\n" /* t1[0] is done */ - "smlad r12, r5, r7, r12\n" /* t1[1] is done */ - "ldrd r4, r5, [r0, #24]\n" - "ldrd r6, r7, [r2, #24]\n" - "pkhtb r3, r12, r3, asr #16\n" /* combine t1[0] and t1[1] */ - "smlad r12, r8, r10, r14\n" - "smlad r14, r9, r11, r14\n" - "ldrd r8, r9, [r0, #40]\n" - "ldrd r10, r11, [r2, #40]\n" - "smlad r12, r4, r6, r12\n" - "smlad r14, r5, r7, r14\n" - "ldrd r4, r5, [r0, #56]\n" - "ldrd r6, r7, [r2, #56]\n" - "smlad r12, r8, r10, r12\n" - "smlad r14, r9, r11, r14\n" - "ldrd r8, r9, [r0, #72]\n" - "ldrd r10, r11, [r2, #72]\n" - "smlad r12, r4, r6, r12\n" - "smlad r14, r5, r7, r14\n" - "ldrd r4, r5, [r2, #80]\n" /* start loading cos table */ - "smlad r12, r8, r10, r12\n" /* t1[2] is done */ - "smlad r14, r9, r11, r14\n" /* t1[3] is done */ - "ldrd r6, r7, [r2, #88]\n" - "ldrd r8, r9, [r2, #96]\n" - "ldrd r10, r11, [r2, #104]\n" /* cos table fully loaded */ - "pkhtb r12, r14, r12, asr #16\n" /* combine t1[2] and t1[3] */ - "smuad r4, r3, r4\n" - "smuad r5, r3, r5\n" - "smlad r4, r12, r8, r4\n" - "smlad r5, r12, r9, r5\n" - "smuad r6, r3, r6\n" - "smuad r7, r3, r7\n" - "smlad r6, r12, r10, r6\n" - "smlad r7, r12, r11, r7\n" - "pop {r8-r11}\n" - "stmia r1, {r4, r5, r6, r7}\n" - "pop {r1, r4-r7, pc}\n" - ); -} - -#define sbc_analyze_four(in, out, consts) \ - ((void (*)(int16_t *, int32_t *, const FIXED_T*)) \ - sbc_analyze_four_armv6)((in), (out), (consts)) - -static void __attribute__((naked)) sbc_analyze_eight_armv6() -{ - /* r0 = in, r1 = out, r2 = consts */ - __asm__ volatile ( - "push {r1, r4-r7, lr}\n" - "push {r8-r11}\n" - "ldrd r4, r5, [r0, #24]\n" - "ldrd r6, r7, [r2, #24]\n" - "ldrd r8, r9, [r0, #56]\n" - "ldrd r10, r11, [r2, #56]\n" - "mov r14, #0x8000\n" - "smlad r3, r4, r6, r14\n" - "smlad r12, r5, r7, r14\n" - "ldrd r4, r5, [r0, #88]\n" - "ldrd r6, r7, [r2, #88]\n" - "smlad r3, r8, r10, r3\n" - "smlad r12, r9, r11, r12\n" - "ldrd r8, r9, [r0, #120]\n" - "ldrd r10, r11, [r2, #120]\n" - "smlad r3, r4, r6, r3\n" - "smlad r12, r5, r7, r12\n" - "ldrd r4, r5, [r0, #152]\n" - "ldrd r6, r7, [r2, #152]\n" - "smlad r3, r8, r10, r3\n" - "smlad r12, r9, r11, r12\n" - "ldrd r8, r9, [r0, #16]\n" - "ldrd r10, r11, [r2, #16]\n" - "smlad r3, r4, r6, r3\n" /* t1[6] is done */ - "smlad r12, r5, r7, r12\n" /* t1[7] is done */ - "ldrd r4, r5, [r0, #48]\n" - "ldrd r6, r7, [r2, #48]\n" - "pkhtb r3, r12, r3, asr #16\n" /* combine t1[6] and t1[7] */ - "str r3, [sp, #-4]!\n" /* save to stack */ - "smlad r3, r8, r10, r14\n" - "smlad r12, r9, r11, r14\n" - "ldrd r8, r9, [r0, #80]\n" - "ldrd r10, r11, [r2, #80]\n" - "smlad r3, r4, r6, r3\n" - "smlad r12, r5, r7, r12\n" - "ldrd r4, r5, [r0, #112]\n" - "ldrd r6, r7, [r2, #112]\n" - "smlad r3, r8, r10, r3\n" - "smlad r12, r9, r11, r12\n" - "ldrd r8, r9, [r0, #144]\n" - "ldrd r10, r11, [r2, #144]\n" - "smlad r3, r4, r6, r3\n" - "smlad r12, r5, r7, r12\n" - "ldrd r4, r5, [r0, #0]\n" - "ldrd r6, r7, [r2, #0]\n" - "smlad r3, r8, r10, r3\n" /* t1[4] is done */ - "smlad r12, r9, r11, r12\n" /* t1[5] is done */ - "ldrd r8, r9, [r0, #32]\n" - "ldrd r10, r11, [r2, #32]\n" - "pkhtb r3, r12, r3, asr #16\n" /* combine t1[4] and t1[5] */ - "str r3, [sp, #-4]!\n" /* save to stack */ - "smlad r3, r4, r6, r14\n" - "smlad r12, r5, r7, r14\n" - "ldrd r4, r5, [r0, #64]\n" - "ldrd r6, r7, [r2, #64]\n" - "smlad r3, r8, r10, r3\n" - "smlad r12, r9, r11, r12\n" - "ldrd r8, r9, [r0, #96]\n" - "ldrd r10, r11, [r2, #96]\n" - "smlad r3, r4, r6, r3\n" - "smlad r12, r5, r7, r12\n" - "ldrd r4, r5, [r0, #128]\n" - "ldrd r6, r7, [r2, #128]\n" - "smlad r3, r8, r10, r3\n" - "smlad r12, r9, r11, r12\n" - "ldrd r8, r9, [r0, #8]\n" - "ldrd r10, r11, [r2, #8]\n" - "smlad r3, r4, r6, r3\n" /* t1[0] is done */ - "smlad r12, r5, r7, r12\n" /* t1[1] is done */ - "ldrd r4, r5, [r0, #40]\n" - "ldrd r6, r7, [r2, #40]\n" - "pkhtb r3, r12, r3, asr #16\n" /* combine t1[0] and t1[1] */ - "smlad r12, r8, r10, r14\n" - "smlad r14, r9, r11, r14\n" - "ldrd r8, r9, [r0, #72]\n" - "ldrd r10, r11, [r2, #72]\n" - "smlad r12, r4, r6, r12\n" - "smlad r14, r5, r7, r14\n" - "ldrd r4, r5, [r0, #104]\n" - "ldrd r6, r7, [r2, #104]\n" - "smlad r12, r8, r10, r12\n" - "smlad r14, r9, r11, r14\n" - "ldrd r8, r9, [r0, #136]\n" - "ldrd r10, r11, [r2, #136]!\n" - "smlad r12, r4, r6, r12\n" - "smlad r14, r5, r7, r14\n" - "ldrd r4, r5, [r2, #(160 - 136 + 0)]\n" - "smlad r12, r8, r10, r12\n" /* t1[2] is done */ - "smlad r14, r9, r11, r14\n" /* t1[3] is done */ - "ldrd r6, r7, [r2, #(160 - 136 + 8)]\n" - "smuad r4, r3, r4\n" - "smuad r5, r3, r5\n" - "pkhtb r12, r14, r12, asr #16\n" /* combine t1[2] and t1[3] */ - /* r3 = t2[0:1] */ - /* r12 = t2[2:3] */ - "pop {r0, r14}\n" /* t2[4:5], t2[6:7] */ - "ldrd r8, r9, [r2, #(160 - 136 + 32)]\n" - "smuad r6, r3, r6\n" - "smuad r7, r3, r7\n" - "ldrd r10, r11, [r2, #(160 - 136 + 40)]\n" - "smlad r4, r12, r8, r4\n" - "smlad r5, r12, r9, r5\n" - "ldrd r8, r9, [r2, #(160 - 136 + 64)]\n" - "smlad r6, r12, r10, r6\n" - "smlad r7, r12, r11, r7\n" - "ldrd r10, r11, [r2, #(160 - 136 + 72)]\n" - "smlad r4, r0, r8, r4\n" - "smlad r5, r0, r9, r5\n" - "ldrd r8, r9, [r2, #(160 - 136 + 96)]\n" - "smlad r6, r0, r10, r6\n" - "smlad r7, r0, r11, r7\n" - "ldrd r10, r11, [r2, #(160 - 136 + 104)]\n" - "smlad r4, r14, r8, r4\n" - "smlad r5, r14, r9, r5\n" - "ldrd r8, r9, [r2, #(160 - 136 + 16 + 0)]\n" - "smlad r6, r14, r10, r6\n" - "smlad r7, r14, r11, r7\n" - "ldrd r10, r11, [r2, #(160 - 136 + 16 + 8)]\n" - "stmia r1!, {r4, r5}\n" - "smuad r4, r3, r8\n" - "smuad r5, r3, r9\n" - "ldrd r8, r9, [r2, #(160 - 136 + 16 + 32)]\n" - "stmia r1!, {r6, r7}\n" - "smuad r6, r3, r10\n" - "smuad r7, r3, r11\n" - "ldrd r10, r11, [r2, #(160 - 136 + 16 + 40)]\n" - "smlad r4, r12, r8, r4\n" - "smlad r5, r12, r9, r5\n" - "ldrd r8, r9, [r2, #(160 - 136 + 16 + 64)]\n" - "smlad r6, r12, r10, r6\n" - "smlad r7, r12, r11, r7\n" - "ldrd r10, r11, [r2, #(160 - 136 + 16 + 72)]\n" - "smlad r4, r0, r8, r4\n" - "smlad r5, r0, r9, r5\n" - "ldrd r8, r9, [r2, #(160 - 136 + 16 + 96)]\n" - "smlad r6, r0, r10, r6\n" - "smlad r7, r0, r11, r7\n" - "ldrd r10, r11, [r2, #(160 - 136 + 16 + 104)]\n" - "smlad r4, r14, r8, r4\n" - "smlad r5, r14, r9, r5\n" - "smlad r6, r14, r10, r6\n" - "smlad r7, r14, r11, r7\n" - "pop {r8-r11}\n" - "stmia r1!, {r4, r5, r6, r7}\n" - "pop {r1, r4-r7, pc}\n" - ); -} - -#define sbc_analyze_eight(in, out, consts) \ - ((void (*)(int16_t *, int32_t *, const FIXED_T*)) \ - sbc_analyze_eight_armv6)((in), (out), (consts)) - -static void sbc_analyze_4b_4s_armv6(int16_t *x, int32_t *out, int out_stride) -{ - /* Analyze blocks */ - sbc_analyze_four(x + 12, out, analysis_consts_fixed4_simd_odd); - out += out_stride; - sbc_analyze_four(x + 8, out, analysis_consts_fixed4_simd_even); - out += out_stride; - sbc_analyze_four(x + 4, out, analysis_consts_fixed4_simd_odd); - out += out_stride; - sbc_analyze_four(x + 0, out, analysis_consts_fixed4_simd_even); -} - -static void sbc_analyze_4b_8s_armv6(int16_t *x, int32_t *out, int out_stride) -{ - /* Analyze blocks */ - sbc_analyze_eight(x + 24, out, analysis_consts_fixed8_simd_odd); - out += out_stride; - sbc_analyze_eight(x + 16, out, analysis_consts_fixed8_simd_even); - out += out_stride; - sbc_analyze_eight(x + 8, out, analysis_consts_fixed8_simd_odd); - out += out_stride; - sbc_analyze_eight(x + 0, out, analysis_consts_fixed8_simd_even); -} - -void sbc_init_primitives_armv6(struct sbc_encoder_state *state) -{ - state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_armv6; - state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_armv6; - state->implementation_info = "ARMv6 SIMD"; -} - -#endif diff --git a/src/modules/bluetooth/sbc/sbc_primitives_armv6.h b/src/modules/bluetooth/sbc/sbc_primitives_armv6.h deleted file mode 100644 index 6a9efe5..0000000 --- a/src/modules/bluetooth/sbc/sbc_primitives_armv6.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * - * Bluetooth low-complexity, subband codec (SBC) library - * - * Copyright (C) 2008-2010 Nokia Corporation - * Copyright (C) 2004-2010 Marcel Holtmann <marcel at holtmann.org> - * Copyright (C) 2004-2005 Henryk Ploetz <henryk at ploetzli.ch> - * Copyright (C) 2005-2006 Brad Midgley <bmidgley at xmission.com> - * - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - */ - -#ifndef __SBC_PRIMITIVES_ARMV6_H -#define __SBC_PRIMITIVES_ARMV6_H - -#include "sbc_primitives.h" - -#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ - defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \ - defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) || \ - defined(__ARM_ARCH_6M__) || defined(__ARM_ARCH_7__) || \ - defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || \ - defined(__ARM_ARCH_7M__) -#define SBC_HAVE_ARMV6 1 -#endif - -#if !defined(SBC_HIGH_PRECISION) && (SCALE_OUT_BITS == 15) && \ - defined(__GNUC__) && defined(SBC_HAVE_ARMV6) && \ - defined(__ARM_EABI__) && !defined(__ARM_NEON__) && \ - (!defined(__thumb__) || defined(__thumb2__)) - -#define SBC_BUILD_WITH_ARMV6_SUPPORT - -void sbc_init_primitives_armv6(struct sbc_encoder_state *encoder_state); - -#endif - -#endif diff --git a/src/modules/bluetooth/sbc/sbc_primitives_iwmmxt.c b/src/modules/bluetooth/sbc/sbc_primitives_iwmmxt.c deleted file mode 100644 index e0bd060..0000000 --- a/src/modules/bluetooth/sbc/sbc_primitives_iwmmxt.c +++ /dev/null @@ -1,304 +0,0 @@ -/* - * - * Bluetooth low-complexity, subband codec (SBC) library - * - * Copyright (C) 2010 Keith Mok <ek9852 at gmail.com> - * Copyright (C) 2008-2010 Nokia Corporation - * Copyright (C) 2004-2010 Marcel Holtmann <marcel at holtmann.org> - * Copyright (C) 2004-2005 Henryk Ploetz <henryk at ploetzli.ch> - * Copyright (C) 2005-2006 Brad Midgley <bmidgley at xmission.com> - * - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - */ - -#include <stdint.h> -#include <limits.h> -#include "sbc.h" -#include "sbc_math.h" -#include "sbc_tables.h" - -#include "sbc_primitives_iwmmxt.h" - -/* - * IWMMXT optimizations - */ - -#ifdef SBC_BUILD_WITH_IWMMXT_SUPPORT - -static inline void sbc_analyze_four_iwmmxt(const int16_t *in, int32_t *out, - const FIXED_T *consts) -{ - __asm__ volatile ( - "wldrd wr0, [%0]\n" - "tbcstw wr4, %2\n" - "wldrd wr2, [%1]\n" - "wldrd wr1, [%0, #8]\n" - "wldrd wr3, [%1, #8]\n" - "wmadds wr0, wr2, wr0\n" - " wldrd wr6, [%0, #16]\n" - "wmadds wr1, wr3, wr1\n" - " wldrd wr7, [%0, #24]\n" - "waddwss wr0, wr0, wr4\n" - " wldrd wr8, [%1, #16]\n" - "waddwss wr1, wr1, wr4\n" - " wldrd wr9, [%1, #24]\n" - " wmadds wr6, wr8, wr6\n" - " wldrd wr2, [%0, #32]\n" - " wmadds wr7, wr9, wr7\n" - " wldrd wr3, [%0, #40]\n" - " waddwss wr0, wr6, wr0\n" - " wldrd wr4, [%1, #32]\n" - " waddwss wr1, wr7, wr1\n" - " wldrd wr5, [%1, #40]\n" - " wmadds wr2, wr4, wr2\n" - "wldrd wr6, [%0, #48]\n" - " wmadds wr3, wr5, wr3\n" - "wldrd wr7, [%0, #56]\n" - " waddwss wr0, wr2, wr0\n" - "wldrd wr8, [%1, #48]\n" - " waddwss wr1, wr3, wr1\n" - "wldrd wr9, [%1, #56]\n" - "wmadds wr6, wr8, wr6\n" - " wldrd wr2, [%0, #64]\n" - "wmadds wr7, wr9, wr7\n" - " wldrd wr3, [%0, #72]\n" - "waddwss wr0, wr6, wr0\n" - " wldrd wr4, [%1, #64]\n" - "waddwss wr1, wr7, wr1\n" - " wldrd wr5, [%1, #72]\n" - " wmadds wr2, wr4, wr2\n" - "tmcr wcgr0, %4\n" - " wmadds wr3, wr5, wr3\n" - " waddwss wr0, wr2, wr0\n" - " waddwss wr1, wr3, wr1\n" - "\n" - "wsrawg wr0, wr0, wcgr0\n" - " wldrd wr4, [%1, #80]\n" - "wsrawg wr1, wr1, wcgr0\n" - " wldrd wr5, [%1, #88]\n" - "wpackwss wr0, wr0, wr0\n" - " wldrd wr6, [%1, #96]\n" - "wpackwss wr1, wr1, wr1\n" - "wmadds wr2, wr5, wr0\n" - " wldrd wr7, [%1, #104]\n" - "wmadds wr0, wr4, wr0\n" - "\n" - " wmadds wr3, wr7, wr1\n" - " wmadds wr1, wr6, wr1\n" - " waddwss wr2, wr3, wr2\n" - " waddwss wr0, wr1, wr0\n" - "\n" - "wstrd wr0, [%3]\n" - "wstrd wr2, [%3, #8]\n" - : - : "r" (in), "r" (consts), - "r" (1 << (SBC_PROTO_FIXED4_SCALE - 1)), "r" (out), - "r" (SBC_PROTO_FIXED4_SCALE) - : "wr0", "wr1", "wr2", "wr3", "wr4", "wr5", "wr6", "wr7", - "wr8", "wr9", "wcgr0", "memory"); -} - -static inline void sbc_analyze_eight_iwmmxt(const int16_t *in, int32_t *out, - const FIXED_T *consts) -{ - __asm__ volatile ( - "wldrd wr0, [%0]\n" - "tbcstw wr15, %2\n" - "wldrd wr1, [%0, #8]\n" - "wldrd wr2, [%0, #16]\n" - "wldrd wr3, [%0, #24]\n" - "wldrd wr4, [%1]\n" - "wldrd wr5, [%1, #8]\n" - "wldrd wr6, [%1, #16]\n" - "wldrd wr7, [%1, #24]\n" - "wmadds wr0, wr0, wr4\n" - " wldrd wr8, [%1, #32]\n" - "wmadds wr1, wr1, wr5\n" - " wldrd wr9, [%1, #40]\n" - "wmadds wr2, wr2, wr6\n" - " wldrd wr10, [%1, #48]\n" - "wmadds wr3, wr3, wr7\n" - " wldrd wr11, [%1, #56]\n" - "waddwss wr0, wr0, wr15\n" - " wldrd wr4, [%0, #32]\n" - "waddwss wr1, wr1, wr15\n" - " wldrd wr5, [%0, #40]\n" - "waddwss wr2, wr2, wr15\n" - " wldrd wr6, [%0, #48]\n" - "waddwss wr3, wr3, wr15\n" - " wldrd wr7, [%0, #56]\n" - " wmadds wr4, wr4, wr8\n" - " wldrd wr12, [%0, #64]\n" - " wmadds wr5, wr5, wr9\n" - " wldrd wr13, [%0, #72]\n" - " wmadds wr6, wr6, wr10\n" - " wldrd wr14, [%0, #80]\n" - " wmadds wr7, wr7, wr11\n" - " wldrd wr15, [%0, #88]\n" - " waddwss wr0, wr4, wr0\n" - " wldrd wr8, [%1, #64]\n" - " waddwss wr1, wr5, wr1\n" - " wldrd wr9, [%1, #72]\n" - " waddwss wr2, wr6, wr2\n" - " wldrd wr10, [%1, #80]\n" - " waddwss wr3, wr7, wr3\n" - " wldrd wr11, [%1, #88]\n" - " wmadds wr12, wr12, wr8\n" - "wldrd wr4, [%0, #96]\n" - " wmadds wr13, wr13, wr9\n" - "wldrd wr5, [%0, #104]\n" - " wmadds wr14, wr14, wr10\n" - "wldrd wr6, [%0, #112]\n" - " wmadds wr15, wr15, wr11\n" - "wldrd wr7, [%0, #120]\n" - " waddwss wr0, wr12, wr0\n" - "wldrd wr8, [%1, #96]\n" - " waddwss wr1, wr13, wr1\n" - "wldrd wr9, [%1, #104]\n" - " waddwss wr2, wr14, wr2\n" - "wldrd wr10, [%1, #112]\n" - " waddwss wr3, wr15, wr3\n" - "wldrd wr11, [%1, #120]\n" - "wmadds wr4, wr4, wr8\n" - " wldrd wr12, [%0, #128]\n" - "wmadds wr5, wr5, wr9\n" - " wldrd wr13, [%0, #136]\n" - "wmadds wr6, wr6, wr10\n" - " wldrd wr14, [%0, #144]\n" - "wmadds wr7, wr7, wr11\n" - " wldrd wr15, [%0, #152]\n" - "waddwss wr0, wr4, wr0\n" - " wldrd wr8, [%1, #128]\n" - "waddwss wr1, wr5, wr1\n" - " wldrd wr9, [%1, #136]\n" - "waddwss wr2, wr6, wr2\n" - " wldrd wr10, [%1, #144]\n" - " waddwss wr3, wr7, wr3\n" - " wldrd wr11, [%1, #152]\n" - " wmadds wr12, wr12, wr8\n" - "tmcr wcgr0, %4\n" - " wmadds wr13, wr13, wr9\n" - " wmadds wr14, wr14, wr10\n" - " wmadds wr15, wr15, wr11\n" - " waddwss wr0, wr12, wr0\n" - " waddwss wr1, wr13, wr1\n" - " waddwss wr2, wr14, wr2\n" - " waddwss wr3, wr15, wr3\n" - "\n" - "wsrawg wr0, wr0, wcgr0\n" - "wsrawg wr1, wr1, wcgr0\n" - "wsrawg wr2, wr2, wcgr0\n" - "wsrawg wr3, wr3, wcgr0\n" - "\n" - "wpackwss wr0, wr0, wr0\n" - "wpackwss wr1, wr1, wr1\n" - " wldrd wr4, [%1, #160]\n" - "wpackwss wr2, wr2, wr2\n" - " wldrd wr5, [%1, #168]\n" - "wpackwss wr3, wr3, wr3\n" - " wldrd wr6, [%1, #192]\n" - " wmadds wr4, wr4, wr0\n" - " wldrd wr7, [%1, #200]\n" - " wmadds wr5, wr5, wr0\n" - " wldrd wr8, [%1, #224]\n" - " wmadds wr6, wr6, wr1\n" - " wldrd wr9, [%1, #232]\n" - " wmadds wr7, wr7, wr1\n" - " waddwss wr4, wr6, wr4\n" - " waddwss wr5, wr7, wr5\n" - " wmadds wr8, wr8, wr2\n" - "wldrd wr6, [%1, #256]\n" - " wmadds wr9, wr9, wr2\n" - "wldrd wr7, [%1, #264]\n" - "waddwss wr4, wr8, wr4\n" - " waddwss wr5, wr9, wr5\n" - "wmadds wr6, wr6, wr3\n" - "wmadds wr7, wr7, wr3\n" - "waddwss wr4, wr6, wr4\n" - "waddwss wr5, wr7, wr5\n" - "\n" - "wstrd wr4, [%3]\n" - "wstrd wr5, [%3, #8]\n" - "\n" - "wldrd wr6, [%1, #176]\n" - "wldrd wr5, [%1, #184]\n" - "wmadds wr5, wr5, wr0\n" - "wldrd wr8, [%1, #208]\n" - "wmadds wr0, wr6, wr0\n" - "wldrd wr9, [%1, #216]\n" - "wmadds wr9, wr9, wr1\n" - "wldrd wr6, [%1, #240]\n" - "wmadds wr1, wr8, wr1\n" - "wldrd wr7, [%1, #248]\n" - "waddwss wr0, wr1, wr0\n" - "waddwss wr5, wr9, wr5\n" - "wmadds wr7, wr7, wr2\n" - "wldrd wr8, [%1, #272]\n" - "wmadds wr2, wr6, wr2\n" - "wldrd wr9, [%1, #280]\n" - "waddwss wr0, wr2, wr0\n" - "waddwss wr5, wr7, wr5\n" - "wmadds wr9, wr9, wr3\n" - "wmadds wr3, wr8, wr3\n" - "waddwss wr0, wr3, wr0\n" - "waddwss wr5, wr9, wr5\n" - "\n" - "wstrd wr0, [%3, #16]\n" - "wstrd wr5, [%3, #24]\n" - : - : "r" (in), "r" (consts), - "r" (1 << (SBC_PROTO_FIXED8_SCALE - 1)), "r" (out), - "r" (SBC_PROTO_FIXED8_SCALE) - : "wr0", "wr1", "wr2", "wr3", "wr4", "wr5", "wr6", "wr7", - "wr8", "wr9", "wr10", "wr11", "wr12", "wr13", "wr14", "wr15", - "wcgr0", "memory"); -} - -static inline void sbc_analyze_4b_4s_iwmmxt(int16_t *x, int32_t *out, - int out_stride) -{ - /* Analyze blocks */ - sbc_analyze_four_iwmmxt(x + 12, out, analysis_consts_fixed4_simd_odd); - out += out_stride; - sbc_analyze_four_iwmmxt(x + 8, out, analysis_consts_fixed4_simd_even); - out += out_stride; - sbc_analyze_four_iwmmxt(x + 4, out, analysis_consts_fixed4_simd_odd); - out += out_stride; - sbc_analyze_four_iwmmxt(x + 0, out, analysis_consts_fixed4_simd_even); -} - -static inline void sbc_analyze_4b_8s_iwmmxt(int16_t *x, int32_t *out, - int out_stride) -{ - /* Analyze blocks */ - sbc_analyze_eight_iwmmxt(x + 24, out, analysis_consts_fixed8_simd_odd); - out += out_stride; - sbc_analyze_eight_iwmmxt(x + 16, out, analysis_consts_fixed8_simd_even); - out += out_stride; - sbc_analyze_eight_iwmmxt(x + 8, out, analysis_consts_fixed8_simd_odd); - out += out_stride; - sbc_analyze_eight_iwmmxt(x + 0, out, analysis_consts_fixed8_simd_even); -} - -void sbc_init_primitives_iwmmxt(struct sbc_encoder_state *state) -{ - state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_iwmmxt; - state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_iwmmxt; - state->implementation_info = "IWMMXT"; -} - -#endif diff --git a/src/modules/bluetooth/sbc/sbc_primitives_iwmmxt.h b/src/modules/bluetooth/sbc/sbc_primitives_iwmmxt.h deleted file mode 100644 index b535e68..0000000 --- a/src/modules/bluetooth/sbc/sbc_primitives_iwmmxt.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * - * Bluetooth low-complexity, subband codec (SBC) library - * - * Copyright (C) 2010 Keith Mok <ek9852 at gmail.com> - * Copyright (C) 2008-2010 Nokia Corporation - * Copyright (C) 2004-2010 Marcel Holtmann <marcel at holtmann.org> - * Copyright (C) 2004-2005 Henryk Ploetz <henryk at ploetzli.ch> - * Copyright (C) 2005-2006 Brad Midgley <bmidgley at xmission.com> - * - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - */ - -#ifndef __SBC_PRIMITIVES_IWMMXT_H -#define __SBC_PRIMITIVES_IWMMXT_H - -#include "sbc_primitives.h" - -#if defined(__GNUC__) && defined(__IWMMXT__) && \ - !defined(SBC_HIGH_PRECISION) && (SCALE_OUT_BITS == 15) - -#define SBC_BUILD_WITH_IWMMXT_SUPPORT - -void sbc_init_primitives_iwmmxt(struct sbc_encoder_state *encoder_state); - -#endif - -#endif diff --git a/src/modules/bluetooth/sbc/sbc_primitives_mmx.c b/src/modules/bluetooth/sbc/sbc_primitives_mmx.c deleted file mode 100644 index 27e9a56..0000000 --- a/src/modules/bluetooth/sbc/sbc_primitives_mmx.c +++ /dev/null @@ -1,375 +0,0 @@ -/* - * - * Bluetooth low-complexity, subband codec (SBC) library - * - * Copyright (C) 2008-2010 Nokia Corporation - * Copyright (C) 2004-2010 Marcel Holtmann <marcel at holtmann.org> - * Copyright (C) 2004-2005 Henryk Ploetz <henryk at ploetzli.ch> - * Copyright (C) 2005-2006 Brad Midgley <bmidgley at xmission.com> - * - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - */ - -#include <stdint.h> -#include <limits.h> -#include "sbc.h" -#include "sbc_math.h" -#include "sbc_tables.h" - -#include "sbc_primitives_mmx.h" - -/* - * MMX optimizations - */ - -#ifdef SBC_BUILD_WITH_MMX_SUPPORT - -static inline void sbc_analyze_four_mmx(const int16_t *in, int32_t *out, - const FIXED_T *consts) -{ - static const SBC_ALIGNED int32_t round_c[2] = { - 1 << (SBC_PROTO_FIXED4_SCALE - 1), - 1 << (SBC_PROTO_FIXED4_SCALE - 1), - }; - __asm__ volatile ( - "movq (%0), %%mm0\n" - "movq 8(%0), %%mm1\n" - "pmaddwd (%1), %%mm0\n" - "pmaddwd 8(%1), %%mm1\n" - "paddd (%2), %%mm0\n" - "paddd (%2), %%mm1\n" - "\n" - "movq 16(%0), %%mm2\n" - "movq 24(%0), %%mm3\n" - "pmaddwd 16(%1), %%mm2\n" - "pmaddwd 24(%1), %%mm3\n" - "paddd %%mm2, %%mm0\n" - "paddd %%mm3, %%mm1\n" - "\n" - "movq 32(%0), %%mm2\n" - "movq 40(%0), %%mm3\n" - "pmaddwd 32(%1), %%mm2\n" - "pmaddwd 40(%1), %%mm3\n" - "paddd %%mm2, %%mm0\n" - "paddd %%mm3, %%mm1\n" - "\n" - "movq 48(%0), %%mm2\n" - "movq 56(%0), %%mm3\n" - "pmaddwd 48(%1), %%mm2\n" - "pmaddwd 56(%1), %%mm3\n" - "paddd %%mm2, %%mm0\n" - "paddd %%mm3, %%mm1\n" - "\n" - "movq 64(%0), %%mm2\n" - "movq 72(%0), %%mm3\n" - "pmaddwd 64(%1), %%mm2\n" - "pmaddwd 72(%1), %%mm3\n" - "paddd %%mm2, %%mm0\n" - "paddd %%mm3, %%mm1\n" - "\n" - "psrad %4, %%mm0\n" - "psrad %4, %%mm1\n" - "packssdw %%mm0, %%mm0\n" - "packssdw %%mm1, %%mm1\n" - "\n" - "movq %%mm0, %%mm2\n" - "pmaddwd 80(%1), %%mm0\n" - "pmaddwd 88(%1), %%mm2\n" - "\n" - "movq %%mm1, %%mm3\n" - "pmaddwd 96(%1), %%mm1\n" - "pmaddwd 104(%1), %%mm3\n" - "paddd %%mm1, %%mm0\n" - "paddd %%mm3, %%mm2\n" - "\n" - "movq %%mm0, (%3)\n" - "movq %%mm2, 8(%3)\n" - : - : "r" (in), "r" (consts), "r" (&round_c), "r" (out), - "i" (SBC_PROTO_FIXED4_SCALE) - : "cc", "memory"); -} - -static inline void sbc_analyze_eight_mmx(const int16_t *in, int32_t *out, - const FIXED_T *consts) -{ - static const SBC_ALIGNED int32_t round_c[2] = { - 1 << (SBC_PROTO_FIXED8_SCALE - 1), - 1 << (SBC_PROTO_FIXED8_SCALE - 1), - }; - __asm__ volatile ( - "movq (%0), %%mm0\n" - "movq 8(%0), %%mm1\n" - "movq 16(%0), %%mm2\n" - "movq 24(%0), %%mm3\n" - "pmaddwd (%1), %%mm0\n" - "pmaddwd 8(%1), %%mm1\n" - "pmaddwd 16(%1), %%mm2\n" - "pmaddwd 24(%1), %%mm3\n" - "paddd (%2), %%mm0\n" - "paddd (%2), %%mm1\n" - "paddd (%2), %%mm2\n" - "paddd (%2), %%mm3\n" - "\n" - "movq 32(%0), %%mm4\n" - "movq 40(%0), %%mm5\n" - "movq 48(%0), %%mm6\n" - "movq 56(%0), %%mm7\n" - "pmaddwd 32(%1), %%mm4\n" - "pmaddwd 40(%1), %%mm5\n" - "pmaddwd 48(%1), %%mm6\n" - "pmaddwd 56(%1), %%mm7\n" - "paddd %%mm4, %%mm0\n" - "paddd %%mm5, %%mm1\n" - "paddd %%mm6, %%mm2\n" - "paddd %%mm7, %%mm3\n" - "\n" - "movq 64(%0), %%mm4\n" - "movq 72(%0), %%mm5\n" - "movq 80(%0), %%mm6\n" - "movq 88(%0), %%mm7\n" - "pmaddwd 64(%1), %%mm4\n" - "pmaddwd 72(%1), %%mm5\n" - "pmaddwd 80(%1), %%mm6\n" - "pmaddwd 88(%1), %%mm7\n" - "paddd %%mm4, %%mm0\n" - "paddd %%mm5, %%mm1\n" - "paddd %%mm6, %%mm2\n" - "paddd %%mm7, %%mm3\n" - "\n" - "movq 96(%0), %%mm4\n" - "movq 104(%0), %%mm5\n" - "movq 112(%0), %%mm6\n" - "movq 120(%0), %%mm7\n" - "pmaddwd 96(%1), %%mm4\n" - "pmaddwd 104(%1), %%mm5\n" - "pmaddwd 112(%1), %%mm6\n" - "pmaddwd 120(%1), %%mm7\n" - "paddd %%mm4, %%mm0\n" - "paddd %%mm5, %%mm1\n" - "paddd %%mm6, %%mm2\n" - "paddd %%mm7, %%mm3\n" - "\n" - "movq 128(%0), %%mm4\n" - "movq 136(%0), %%mm5\n" - "movq 144(%0), %%mm6\n" - "movq 152(%0), %%mm7\n" - "pmaddwd 128(%1), %%mm4\n" - "pmaddwd 136(%1), %%mm5\n" - "pmaddwd 144(%1), %%mm6\n" - "pmaddwd 152(%1), %%mm7\n" - "paddd %%mm4, %%mm0\n" - "paddd %%mm5, %%mm1\n" - "paddd %%mm6, %%mm2\n" - "paddd %%mm7, %%mm3\n" - "\n" - "psrad %4, %%mm0\n" - "psrad %4, %%mm1\n" - "psrad %4, %%mm2\n" - "psrad %4, %%mm3\n" - "\n" - "packssdw %%mm0, %%mm0\n" - "packssdw %%mm1, %%mm1\n" - "packssdw %%mm2, %%mm2\n" - "packssdw %%mm3, %%mm3\n" - "\n" - "movq %%mm0, %%mm4\n" - "movq %%mm0, %%mm5\n" - "pmaddwd 160(%1), %%mm4\n" - "pmaddwd 168(%1), %%mm5\n" - "\n" - "movq %%mm1, %%mm6\n" - "movq %%mm1, %%mm7\n" - "pmaddwd 192(%1), %%mm6\n" - "pmaddwd 200(%1), %%mm7\n" - "paddd %%mm6, %%mm4\n" - "paddd %%mm7, %%mm5\n" - "\n" - "movq %%mm2, %%mm6\n" - "movq %%mm2, %%mm7\n" - "pmaddwd 224(%1), %%mm6\n" - "pmaddwd 232(%1), %%mm7\n" - "paddd %%mm6, %%mm4\n" - "paddd %%mm7, %%mm5\n" - "\n" - "movq %%mm3, %%mm6\n" - "movq %%mm3, %%mm7\n" - "pmaddwd 256(%1), %%mm6\n" - "pmaddwd 264(%1), %%mm7\n" - "paddd %%mm6, %%mm4\n" - "paddd %%mm7, %%mm5\n" - "\n" - "movq %%mm4, (%3)\n" - "movq %%mm5, 8(%3)\n" - "\n" - "movq %%mm0, %%mm5\n" - "pmaddwd 176(%1), %%mm0\n" - "pmaddwd 184(%1), %%mm5\n" - "\n" - "movq %%mm1, %%mm7\n" - "pmaddwd 208(%1), %%mm1\n" - "pmaddwd 216(%1), %%mm7\n" - "paddd %%mm1, %%mm0\n" - "paddd %%mm7, %%mm5\n" - "\n" - "movq %%mm2, %%mm7\n" - "pmaddwd 240(%1), %%mm2\n" - "pmaddwd 248(%1), %%mm7\n" - "paddd %%mm2, %%mm0\n" - "paddd %%mm7, %%mm5\n" - "\n" - "movq %%mm3, %%mm7\n" - "pmaddwd 272(%1), %%mm3\n" - "pmaddwd 280(%1), %%mm7\n" - "paddd %%mm3, %%mm0\n" - "paddd %%mm7, %%mm5\n" - "\n" - "movq %%mm0, 16(%3)\n" - "movq %%mm5, 24(%3)\n" - : - : "r" (in), "r" (consts), "r" (&round_c), "r" (out), - "i" (SBC_PROTO_FIXED8_SCALE) - : "cc", "memory"); -} - -static inline void sbc_analyze_4b_4s_mmx(int16_t *x, int32_t *out, - int out_stride) -{ - /* Analyze blocks */ - sbc_analyze_four_mmx(x + 12, out, analysis_consts_fixed4_simd_odd); - out += out_stride; - sbc_analyze_four_mmx(x + 8, out, analysis_consts_fixed4_simd_even); - out += out_stride; - sbc_analyze_four_mmx(x + 4, out, analysis_consts_fixed4_simd_odd); - out += out_stride; - sbc_analyze_four_mmx(x + 0, out, analysis_consts_fixed4_simd_even); - - __asm__ volatile ("emms\n"); -} - -static inline void sbc_analyze_4b_8s_mmx(int16_t *x, int32_t *out, - int out_stride) -{ - /* Analyze blocks */ - sbc_analyze_eight_mmx(x + 24, out, analysis_consts_fixed8_simd_odd); - out += out_stride; - sbc_analyze_eight_mmx(x + 16, out, analysis_consts_fixed8_simd_even); - out += out_stride; - sbc_analyze_eight_mmx(x + 8, out, analysis_consts_fixed8_simd_odd); - out += out_stride; - sbc_analyze_eight_mmx(x + 0, out, analysis_consts_fixed8_simd_even); - - __asm__ volatile ("emms\n"); -} - -static void sbc_calc_scalefactors_mmx( - int32_t sb_sample_f[16][2][8], - uint32_t scale_factor[2][8], - int blocks, int channels, int subbands) -{ - static const SBC_ALIGNED int32_t consts[2] = { - 1 << SCALE_OUT_BITS, - 1 << SCALE_OUT_BITS, - }; - int ch, sb; - intptr_t blk; - for (ch = 0; ch < channels; ch++) { - for (sb = 0; sb < subbands; sb += 2) { - blk = (blocks - 1) * (((char *) &sb_sample_f[1][0][0] - - (char *) &sb_sample_f[0][0][0])); - __asm__ volatile ( - "movq (%4), %%mm0\n" - "1:\n" - "movq (%1, %0), %%mm1\n" - "pxor %%mm2, %%mm2\n" - "pcmpgtd %%mm2, %%mm1\n" - "paddd (%1, %0), %%mm1\n" - "pcmpgtd %%mm1, %%mm2\n" - "pxor %%mm2, %%mm1\n" - - "por %%mm1, %%mm0\n" - - "sub %2, %0\n" - "jns 1b\n" - - "movd %%mm0, %k0\n" - "psrlq $32, %%mm0\n" - "bsrl %k0, %k0\n" - "subl %5, %k0\n" - "movl %k0, (%3)\n" - - "movd %%mm0, %k0\n" - "bsrl %k0, %k0\n" - "subl %5, %k0\n" - "movl %k0, 4(%3)\n" - : "+r" (blk) - : "r" (&sb_sample_f[0][ch][sb]), - "i" ((char *) &sb_sample_f[1][0][0] - - (char *) &sb_sample_f[0][0][0]), - "r" (&scale_factor[ch][sb]), - "r" (&consts), - "i" (SCALE_OUT_BITS) - : "cc", "memory"); - } - } - __asm__ volatile ("emms\n"); -} - -static int check_mmx_support(void) -{ -#ifdef __amd64__ - return 1; /* We assume that all 64-bit processors have MMX support */ -#else - int cpuid_feature_information; - __asm__ volatile ( - /* According to Intel manual, CPUID instruction is supported - * if the value of ID bit (bit 21) in EFLAGS can be modified */ - "pushf\n" - "movl (%%esp), %0\n" - "xorl $0x200000, (%%esp)\n" /* try to modify ID bit */ - "popf\n" - "pushf\n" - "xorl (%%esp), %0\n" /* check if ID bit changed */ - "jz 1f\n" - "push %%eax\n" - "push %%ebx\n" - "push %%ecx\n" - "mov $1, %%eax\n" - "cpuid\n" - "pop %%ecx\n" - "pop %%ebx\n" - "pop %%eax\n" - "1:\n" - "popf\n" - : "=d" (cpuid_feature_information) - : - : "cc"); - return cpuid_feature_information & (1 << 23); -#endif -} - -void sbc_init_primitives_mmx(struct sbc_encoder_state *state) -{ - if (check_mmx_support()) { - state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_mmx; - state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_mmx; - state->sbc_calc_scalefactors = sbc_calc_scalefactors_mmx; - state->implementation_info = "MMX"; - } -} - -#endif diff --git a/src/modules/bluetooth/sbc/sbc_primitives_mmx.h b/src/modules/bluetooth/sbc/sbc_primitives_mmx.h deleted file mode 100644 index e0e728b..0000000 --- a/src/modules/bluetooth/sbc/sbc_primitives_mmx.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * - * Bluetooth low-complexity, subband codec (SBC) library - * - * Copyright (C) 2008-2010 Nokia Corporation - * Copyright (C) 2004-2010 Marcel Holtmann <marcel at holtmann.org> - * Copyright (C) 2004-2005 Henryk Ploetz <henryk at ploetzli.ch> - * Copyright (C) 2005-2006 Brad Midgley <bmidgley at xmission.com> - * - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - */ - -#ifndef __SBC_PRIMITIVES_MMX_H -#define __SBC_PRIMITIVES_MMX_H - -#include "sbc_primitives.h" - -#if defined(__GNUC__) && (defined(__i386__) || defined(__amd64__)) && \ - !defined(SBC_HIGH_PRECISION) && (SCALE_OUT_BITS == 15) - -#define SBC_BUILD_WITH_MMX_SUPPORT - -void sbc_init_primitives_mmx(struct sbc_encoder_state *encoder_state); - -#endif - -#endif diff --git a/src/modules/bluetooth/sbc/sbc_primitives_neon.c b/src/modules/bluetooth/sbc/sbc_primitives_neon.c deleted file mode 100644 index 5d4d0e3..0000000 --- a/src/modules/bluetooth/sbc/sbc_primitives_neon.c +++ /dev/null @@ -1,893 +0,0 @@ -/* - * - * Bluetooth low-complexity, subband codec (SBC) library - * - * Copyright (C) 2008-2010 Nokia Corporation - * Copyright (C) 2004-2010 Marcel Holtmann <marcel at holtmann.org> - * Copyright (C) 2004-2005 Henryk Ploetz <henryk at ploetzli.ch> - * Copyright (C) 2005-2006 Brad Midgley <bmidgley at xmission.com> - * - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - */ - -#include <stdint.h> -#include <limits.h> -#include "sbc.h" -#include "sbc_math.h" -#include "sbc_tables.h" - -#include "sbc_primitives_neon.h" - -/* - * ARM NEON optimizations - */ - -#ifdef SBC_BUILD_WITH_NEON_SUPPORT - -static inline void _sbc_analyze_four_neon(const int16_t *in, int32_t *out, - const FIXED_T *consts) -{ - /* TODO: merge even and odd cases (or even merge all four calls to this - * function) in order to have only aligned reads from 'in' array - * and reduce number of load instructions */ - __asm__ volatile ( - "vld1.16 {d4, d5}, [%0, :64]!\n" - "vld1.16 {d8, d9}, [%1, :128]!\n" - - "vmull.s16 q0, d4, d8\n" - "vld1.16 {d6, d7}, [%0, :64]!\n" - "vmull.s16 q1, d5, d9\n" - "vld1.16 {d10, d11}, [%1, :128]!\n" - - "vmlal.s16 q0, d6, d10\n" - "vld1.16 {d4, d5}, [%0, :64]!\n" - "vmlal.s16 q1, d7, d11\n" - "vld1.16 {d8, d9}, [%1, :128]!\n" - - "vmlal.s16 q0, d4, d8\n" - "vld1.16 {d6, d7}, [%0, :64]!\n" - "vmlal.s16 q1, d5, d9\n" - "vld1.16 {d10, d11}, [%1, :128]!\n" - - "vmlal.s16 q0, d6, d10\n" - "vld1.16 {d4, d5}, [%0, :64]!\n" - "vmlal.s16 q1, d7, d11\n" - "vld1.16 {d8, d9}, [%1, :128]!\n" - - "vmlal.s16 q0, d4, d8\n" - "vmlal.s16 q1, d5, d9\n" - - "vpadd.s32 d0, d0, d1\n" - "vpadd.s32 d1, d2, d3\n" - - "vrshrn.s32 d0, q0, %3\n" - - "vld1.16 {d2, d3, d4, d5}, [%1, :128]!\n" - - "vdup.i32 d1, d0[1]\n" /* TODO: can be eliminated */ - "vdup.i32 d0, d0[0]\n" /* TODO: can be eliminated */ - - "vmull.s16 q3, d2, d0\n" - "vmull.s16 q4, d3, d0\n" - "vmlal.s16 q3, d4, d1\n" - "vmlal.s16 q4, d5, d1\n" - - "vpadd.s32 d0, d6, d7\n" /* TODO: can be eliminated */ - "vpadd.s32 d1, d8, d9\n" /* TODO: can be eliminated */ - - "vst1.32 {d0, d1}, [%2, :128]\n" - : "+r" (in), "+r" (consts) - : "r" (out), - "i" (SBC_PROTO_FIXED4_SCALE) - : "memory", - "d0", "d1", "d2", "d3", "d4", "d5", - "d6", "d7", "d8", "d9", "d10", "d11"); -} - -static inline void _sbc_analyze_eight_neon(const int16_t *in, int32_t *out, - const FIXED_T *consts) -{ - /* TODO: merge even and odd cases (or even merge all four calls to this - * function) in order to have only aligned reads from 'in' array - * and reduce number of load instructions */ - __asm__ volatile ( - "vld1.16 {d4, d5}, [%0, :64]!\n" - "vld1.16 {d8, d9}, [%1, :128]!\n" - - "vmull.s16 q6, d4, d8\n" - "vld1.16 {d6, d7}, [%0, :64]!\n" - "vmull.s16 q7, d5, d9\n" - "vld1.16 {d10, d11}, [%1, :128]!\n" - "vmull.s16 q8, d6, d10\n" - "vld1.16 {d4, d5}, [%0, :64]!\n" - "vmull.s16 q9, d7, d11\n" - "vld1.16 {d8, d9}, [%1, :128]!\n" - - "vmlal.s16 q6, d4, d8\n" - "vld1.16 {d6, d7}, [%0, :64]!\n" - "vmlal.s16 q7, d5, d9\n" - "vld1.16 {d10, d11}, [%1, :128]!\n" - "vmlal.s16 q8, d6, d10\n" - "vld1.16 {d4, d5}, [%0, :64]!\n" - "vmlal.s16 q9, d7, d11\n" - "vld1.16 {d8, d9}, [%1, :128]!\n" - - "vmlal.s16 q6, d4, d8\n" - "vld1.16 {d6, d7}, [%0, :64]!\n" - "vmlal.s16 q7, d5, d9\n" - "vld1.16 {d10, d11}, [%1, :128]!\n" - "vmlal.s16 q8, d6, d10\n" - "vld1.16 {d4, d5}, [%0, :64]!\n" - "vmlal.s16 q9, d7, d11\n" - "vld1.16 {d8, d9}, [%1, :128]!\n" - - "vmlal.s16 q6, d4, d8\n" - "vld1.16 {d6, d7}, [%0, :64]!\n" - "vmlal.s16 q7, d5, d9\n" - "vld1.16 {d10, d11}, [%1, :128]!\n" - "vmlal.s16 q8, d6, d10\n" - "vld1.16 {d4, d5}, [%0, :64]!\n" - "vmlal.s16 q9, d7, d11\n" - "vld1.16 {d8, d9}, [%1, :128]!\n" - - "vmlal.s16 q6, d4, d8\n" - "vld1.16 {d6, d7}, [%0, :64]!\n" - "vmlal.s16 q7, d5, d9\n" - "vld1.16 {d10, d11}, [%1, :128]!\n" - - "vmlal.s16 q8, d6, d10\n" - "vmlal.s16 q9, d7, d11\n" - - "vpadd.s32 d0, d12, d13\n" - "vpadd.s32 d1, d14, d15\n" - "vpadd.s32 d2, d16, d17\n" - "vpadd.s32 d3, d18, d19\n" - - "vrshr.s32 q0, q0, %3\n" - "vrshr.s32 q1, q1, %3\n" - "vmovn.s32 d0, q0\n" - "vmovn.s32 d1, q1\n" - - "vdup.i32 d3, d1[1]\n" /* TODO: can be eliminated */ - "vdup.i32 d2, d1[0]\n" /* TODO: can be eliminated */ - "vdup.i32 d1, d0[1]\n" /* TODO: can be eliminated */ - "vdup.i32 d0, d0[0]\n" /* TODO: can be eliminated */ - - "vld1.16 {d4, d5}, [%1, :128]!\n" - "vmull.s16 q6, d4, d0\n" - "vld1.16 {d6, d7}, [%1, :128]!\n" - "vmull.s16 q7, d5, d0\n" - "vmull.s16 q8, d6, d0\n" - "vmull.s16 q9, d7, d0\n" - - "vld1.16 {d4, d5}, [%1, :128]!\n" - "vmlal.s16 q6, d4, d1\n" - "vld1.16 {d6, d7}, [%1, :128]!\n" - "vmlal.s16 q7, d5, d1\n" - "vmlal.s16 q8, d6, d1\n" - "vmlal.s16 q9, d7, d1\n" - - "vld1.16 {d4, d5}, [%1, :128]!\n" - "vmlal.s16 q6, d4, d2\n" - "vld1.16 {d6, d7}, [%1, :128]!\n" - "vmlal.s16 q7, d5, d2\n" - "vmlal.s16 q8, d6, d2\n" - "vmlal.s16 q9, d7, d2\n" - - "vld1.16 {d4, d5}, [%1, :128]!\n" - "vmlal.s16 q6, d4, d3\n" - "vld1.16 {d6, d7}, [%1, :128]!\n" - "vmlal.s16 q7, d5, d3\n" - "vmlal.s16 q8, d6, d3\n" - "vmlal.s16 q9, d7, d3\n" - - "vpadd.s32 d0, d12, d13\n" /* TODO: can be eliminated */ - "vpadd.s32 d1, d14, d15\n" /* TODO: can be eliminated */ - "vpadd.s32 d2, d16, d17\n" /* TODO: can be eliminated */ - "vpadd.s32 d3, d18, d19\n" /* TODO: can be eliminated */ - - "vst1.32 {d0, d1, d2, d3}, [%2, :128]\n" - : "+r" (in), "+r" (consts) - : "r" (out), - "i" (SBC_PROTO_FIXED8_SCALE) - : "memory", - "d0", "d1", "d2", "d3", "d4", "d5", - "d6", "d7", "d8", "d9", "d10", "d11", - "d12", "d13", "d14", "d15", "d16", "d17", - "d18", "d19"); -} - -static inline void sbc_analyze_4b_4s_neon(int16_t *x, - int32_t *out, int out_stride) -{ - /* Analyze blocks */ - _sbc_analyze_four_neon(x + 12, out, analysis_consts_fixed4_simd_odd); - out += out_stride; - _sbc_analyze_four_neon(x + 8, out, analysis_consts_fixed4_simd_even); - out += out_stride; - _sbc_analyze_four_neon(x + 4, out, analysis_consts_fixed4_simd_odd); - out += out_stride; - _sbc_analyze_four_neon(x + 0, out, analysis_consts_fixed4_simd_even); -} - -static inline void sbc_analyze_4b_8s_neon(int16_t *x, - int32_t *out, int out_stride) -{ - /* Analyze blocks */ - _sbc_analyze_eight_neon(x + 24, out, analysis_consts_fixed8_simd_odd); - out += out_stride; - _sbc_analyze_eight_neon(x + 16, out, analysis_consts_fixed8_simd_even); - out += out_stride; - _sbc_analyze_eight_neon(x + 8, out, analysis_consts_fixed8_simd_odd); - out += out_stride; - _sbc_analyze_eight_neon(x + 0, out, analysis_consts_fixed8_simd_even); -} - -static void sbc_calc_scalefactors_neon( - int32_t sb_sample_f[16][2][8], - uint32_t scale_factor[2][8], - int blocks, int channels, int subbands) -{ - int ch, sb; - for (ch = 0; ch < channels; ch++) { - for (sb = 0; sb < subbands; sb += 4) { - int blk = blocks; - int32_t *in = &sb_sample_f[0][ch][sb]; - __asm__ volatile ( - "vmov.s32 q0, #0\n" - "vmov.s32 q1, %[c1]\n" - "vmov.s32 q14, #1\n" - "vmov.s32 q15, %[c2]\n" - "vadd.s32 q1, q1, q14\n" - "1:\n" - "vld1.32 {d16, d17}, [%[in], :128], %[inc]\n" - "vabs.s32 q8, q8\n" - "vld1.32 {d18, d19}, [%[in], :128], %[inc]\n" - "vabs.s32 q9, q9\n" - "vld1.32 {d20, d21}, [%[in], :128], %[inc]\n" - "vabs.s32 q10, q10\n" - "vld1.32 {d22, d23}, [%[in], :128], %[inc]\n" - "vabs.s32 q11, q11\n" - "vmax.s32 q0, q0, q8\n" - "vmax.s32 q1, q1, q9\n" - "vmax.s32 q0, q0, q10\n" - "vmax.s32 q1, q1, q11\n" - "subs %[blk], %[blk], #4\n" - "bgt 1b\n" - "vmax.s32 q0, q0, q1\n" - "vsub.s32 q0, q0, q14\n" - "vclz.s32 q0, q0\n" - "vsub.s32 q0, q15, q0\n" - "vst1.32 {d0, d1}, [%[out], :128]\n" - : - [blk] "+r" (blk), - [in] "+r" (in) - : - [inc] "r" ((char *) &sb_sample_f[1][0][0] - - (char *) &sb_sample_f[0][0][0]), - [out] "r" (&scale_factor[ch][sb]), - [c1] "i" (1 << SCALE_OUT_BITS), - [c2] "i" (31 - SCALE_OUT_BITS) - : "d0", "d1", "d2", "d3", "d16", "d17", "d18", "d19", - "d20", "d21", "d22", "d23", "d24", "d25", "d26", - "d27", "d28", "d29", "d30", "d31", "cc", "memory"); - } - } -} - -int sbc_calc_scalefactors_j_neon( - int32_t sb_sample_f[16][2][8], - uint32_t scale_factor[2][8], - int blocks, int subbands) -{ - static SBC_ALIGNED int32_t joint_bits_mask[8] = { - 8, 4, 2, 1, 128, 64, 32, 16 - }; - int joint, i; - int32_t *in0, *in1; - int32_t *in = &sb_sample_f[0][0][0]; - uint32_t *out0, *out1; - uint32_t *out = &scale_factor[0][0]; - int32_t *consts = joint_bits_mask; - - i = subbands; - - __asm__ volatile ( - /* - * constants: q13 = (31 - SCALE_OUT_BITS), q14 = 1 - * input: q0 = ((1 << SCALE_OUT_BITS) + 1) - * %[in0] - samples for channel 0 - * %[in1] - samples for shannel 1 - * output: q0, q1 - scale factors without joint stereo - * q2, q3 - scale factors with joint stereo - * q15 - joint stereo selection mask - */ - ".macro calc_scalefactors\n" - "vmov.s32 q1, q0\n" - "vmov.s32 q2, q0\n" - "vmov.s32 q3, q0\n" - "mov %[i], %[blocks]\n" - "1:\n" - "vld1.32 {d18, d19}, [%[in1], :128], %[inc]\n" - "vbic.s32 q11, q9, q14\n" - "vld1.32 {d16, d17}, [%[in0], :128], %[inc]\n" - "vhadd.s32 q10, q8, q11\n" - "vhsub.s32 q11, q8, q11\n" - "vabs.s32 q8, q8\n" - "vabs.s32 q9, q9\n" - "vabs.s32 q10, q10\n" - "vabs.s32 q11, q11\n" - "vmax.s32 q0, q0, q8\n" - "vmax.s32 q1, q1, q9\n" - "vmax.s32 q2, q2, q10\n" - "vmax.s32 q3, q3, q11\n" - "subs %[i], %[i], #1\n" - "bgt 1b\n" - "vsub.s32 q0, q0, q14\n" - "vsub.s32 q1, q1, q14\n" - "vsub.s32 q2, q2, q14\n" - "vsub.s32 q3, q3, q14\n" - "vclz.s32 q0, q0\n" - "vclz.s32 q1, q1\n" - "vclz.s32 q2, q2\n" - "vclz.s32 q3, q3\n" - "vsub.s32 q0, q13, q0\n" - "vsub.s32 q1, q13, q1\n" - "vsub.s32 q2, q13, q2\n" - "vsub.s32 q3, q13, q3\n" - ".endm\n" - /* - * constants: q14 = 1 - * input: q15 - joint stereo selection mask - * %[in0] - value set by calc_scalefactors macro - * %[in1] - value set by calc_scalefactors macro - */ - ".macro update_joint_stereo_samples\n" - "sub %[out1], %[in1], %[inc]\n" - "sub %[out0], %[in0], %[inc]\n" - "sub %[in1], %[in1], %[inc], asl #1\n" - "sub %[in0], %[in0], %[inc], asl #1\n" - "vld1.32 {d18, d19}, [%[in1], :128]\n" - "vbic.s32 q11, q9, q14\n" - "vld1.32 {d16, d17}, [%[in0], :128]\n" - "vld1.32 {d2, d3}, [%[out1], :128]\n" - "vbic.s32 q3, q1, q14\n" - "vld1.32 {d0, d1}, [%[out0], :128]\n" - "vhsub.s32 q10, q8, q11\n" - "vhadd.s32 q11, q8, q11\n" - "vhsub.s32 q2, q0, q3\n" - "vhadd.s32 q3, q0, q3\n" - "vbif.s32 q10, q9, q15\n" - "vbif.s32 d22, d16, d30\n" - "sub %[inc], %[zero], %[inc], asl #1\n" - "sub %[i], %[blocks], #2\n" - "2:\n" - "vbif.s32 d23, d17, d31\n" - "vst1.32 {d20, d21}, [%[in1], :128], %[inc]\n" - "vbif.s32 d4, d2, d30\n" - "vld1.32 {d18, d19}, [%[in1], :128]\n" - "vbif.s32 d5, d3, d31\n" - "vst1.32 {d22, d23}, [%[in0], :128], %[inc]\n" - "vbif.s32 d6, d0, d30\n" - "vld1.32 {d16, d17}, [%[in0], :128]\n" - "vbif.s32 d7, d1, d31\n" - "vst1.32 {d4, d5}, [%[out1], :128], %[inc]\n" - "vbic.s32 q11, q9, q14\n" - "vld1.32 {d2, d3}, [%[out1], :128]\n" - "vst1.32 {d6, d7}, [%[out0], :128], %[inc]\n" - "vbic.s32 q3, q1, q14\n" - "vld1.32 {d0, d1}, [%[out0], :128]\n" - "vhsub.s32 q10, q8, q11\n" - "vhadd.s32 q11, q8, q11\n" - "vhsub.s32 q2, q0, q3\n" - "vhadd.s32 q3, q0, q3\n" - "vbif.s32 q10, q9, q15\n" - "vbif.s32 d22, d16, d30\n" - "subs %[i], %[i], #2\n" - "bgt 2b\n" - "sub %[inc], %[zero], %[inc], asr #1\n" - "vbif.s32 d23, d17, d31\n" - "vst1.32 {d20, d21}, [%[in1], :128]\n" - "vbif.s32 q2, q1, q15\n" - "vst1.32 {d22, d23}, [%[in0], :128]\n" - "vbif.s32 q3, q0, q15\n" - "vst1.32 {d4, d5}, [%[out1], :128]\n" - "vst1.32 {d6, d7}, [%[out0], :128]\n" - ".endm\n" - - "vmov.s32 q14, #1\n" - "vmov.s32 q13, %[c2]\n" - - "cmp %[i], #4\n" - "bne 8f\n" - - "4:\n" /* 4 subbands */ - "add %[in0], %[in], #0\n" - "add %[in1], %[in], #32\n" - "add %[out0], %[out], #0\n" - "add %[out1], %[out], #32\n" - "vmov.s32 q0, %[c1]\n" - "vadd.s32 q0, q0, q14\n" - - "calc_scalefactors\n" - - /* check whether to use joint stereo for subbands 0, 1, 2 */ - "vadd.s32 q15, q0, q1\n" - "vadd.s32 q9, q2, q3\n" - "vmov.s32 d31[1], %[zero]\n" /* last subband -> no joint */ - "vld1.32 {d16, d17}, [%[consts], :128]!\n" - "vcgt.s32 q15, q15, q9\n" - - /* calculate and save to memory 'joint' variable */ - /* update and save scale factors to memory */ - " vand.s32 q8, q8, q15\n" - "vbit.s32 q0, q2, q15\n" - " vpadd.s32 d16, d16, d17\n" - "vbit.s32 q1, q3, q15\n" - " vpadd.s32 d16, d16, d16\n" - "vst1.32 {d0, d1}, [%[out0], :128]\n" - "vst1.32 {d2, d3}, [%[out1], :128]\n" - " vst1.32 {d16[0]}, [%[joint]]\n" - - "update_joint_stereo_samples\n" - "b 9f\n" - - "8:\n" /* 8 subbands */ - "add %[in0], %[in], #16\n\n" - "add %[in1], %[in], #48\n" - "add %[out0], %[out], #16\n\n" - "add %[out1], %[out], #48\n" - "vmov.s32 q0, %[c1]\n" - "vadd.s32 q0, q0, q14\n" - - "calc_scalefactors\n" - - /* check whether to use joint stereo for subbands 4, 5, 6 */ - "vadd.s32 q15, q0, q1\n" - "vadd.s32 q9, q2, q3\n" - "vmov.s32 d31[1], %[zero]\n" /* last subband -> no joint */ - "vld1.32 {d16, d17}, [%[consts], :128]!\n" - "vcgt.s32 q15, q15, q9\n" - - /* calculate part of 'joint' variable and save it to d24 */ - /* update and save scale factors to memory */ - " vand.s32 q8, q8, q15\n" - "vbit.s32 q0, q2, q15\n" - " vpadd.s32 d16, d16, d17\n" - "vbit.s32 q1, q3, q15\n" - "vst1.32 {d0, d1}, [%[out0], :128]\n" - "vst1.32 {d2, d3}, [%[out1], :128]\n" - " vpadd.s32 d24, d16, d16\n" - - "update_joint_stereo_samples\n" - - "add %[in0], %[in], #0\n" - "add %[in1], %[in], #32\n" - "add %[out0], %[out], #0\n\n" - "add %[out1], %[out], #32\n" - "vmov.s32 q0, %[c1]\n" - "vadd.s32 q0, q0, q14\n" - - "calc_scalefactors\n" - - /* check whether to use joint stereo for subbands 0, 1, 2, 3 */ - "vadd.s32 q15, q0, q1\n" - "vadd.s32 q9, q2, q3\n" - "vld1.32 {d16, d17}, [%[consts], :128]!\n" - "vcgt.s32 q15, q15, q9\n" - - /* combine last part of 'joint' with d24 and save to memory */ - /* update and save scale factors to memory */ - " vand.s32 q8, q8, q15\n" - "vbit.s32 q0, q2, q15\n" - " vpadd.s32 d16, d16, d17\n" - "vbit.s32 q1, q3, q15\n" - " vpadd.s32 d16, d16, d16\n" - "vst1.32 {d0, d1}, [%[out0], :128]\n" - " vadd.s32 d16, d16, d24\n" - "vst1.32 {d2, d3}, [%[out1], :128]\n" - " vst1.32 {d16[0]}, [%[joint]]\n" - - "update_joint_stereo_samples\n" - "9:\n" - ".purgem calc_scalefactors\n" - ".purgem update_joint_stereo_samples\n" - : - [i] "+&r" (i), - [in] "+&r" (in), - [in0] "=&r" (in0), - [in1] "=&r" (in1), - [out] "+&r" (out), - [out0] "=&r" (out0), - [out1] "=&r" (out1), - [consts] "+&r" (consts) - : - [inc] "r" ((char *) &sb_sample_f[1][0][0] - - (char *) &sb_sample_f[0][0][0]), - [blocks] "r" (blocks), - [joint] "r" (&joint), - [c1] "i" (1 << SCALE_OUT_BITS), - [c2] "i" (31 - SCALE_OUT_BITS), - [zero] "r" (0) - : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", - "d16", "d17", "d18", "d19", "d20", "d21", "d22", - "d23", "d24", "d25", "d26", "d27", "d28", "d29", - "d30", "d31", "cc", "memory"); - - return joint; -} - -#define PERM_BE(a, b, c, d) { \ - (a * 2) + 1, (a * 2) + 0, \ - (b * 2) + 1, (b * 2) + 0, \ - (c * 2) + 1, (c * 2) + 0, \ - (d * 2) + 1, (d * 2) + 0 \ - } -#define PERM_LE(a, b, c, d) { \ - (a * 2) + 0, (a * 2) + 1, \ - (b * 2) + 0, (b * 2) + 1, \ - (c * 2) + 0, (c * 2) + 1, \ - (d * 2) + 0, (d * 2) + 1 \ - } - -static SBC_ALWAYS_INLINE int sbc_enc_process_input_4s_neon_internal( - int position, - const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], - int nsamples, int nchannels, int big_endian) -{ - static SBC_ALIGNED uint8_t perm_be[2][8] = { - PERM_BE(7, 3, 6, 4), - PERM_BE(0, 2, 1, 5) - }; - static SBC_ALIGNED uint8_t perm_le[2][8] = { - PERM_LE(7, 3, 6, 4), - PERM_LE(0, 2, 1, 5) - }; - /* handle X buffer wraparound */ - if (position < nsamples) { - int16_t *dst = &X[0][SBC_X_BUFFER_SIZE - 40]; - int16_t *src = &X[0][position]; - __asm__ volatile ( - "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" - "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" - "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" - "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" - "vld1.16 {d0}, [%[src], :64]!\n" - "vst1.16 {d0}, [%[dst], :64]!\n" - : - [dst] "+r" (dst), - [src] "+r" (src) - : : "memory", "d0", "d1", "d2", "d3"); - if (nchannels > 1) { - dst = &X[1][SBC_X_BUFFER_SIZE - 40]; - src = &X[1][position]; - __asm__ volatile ( - "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" - "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" - "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" - "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" - "vld1.16 {d0}, [%[src], :64]!\n" - "vst1.16 {d0}, [%[dst], :64]!\n" - : - [dst] "+r" (dst), - [src] "+r" (src) - : : "memory", "d0", "d1", "d2", "d3"); - } - position = SBC_X_BUFFER_SIZE - 40; - } - - if ((nchannels > 1) && ((uintptr_t)pcm & 1)) { - /* poor 'pcm' alignment */ - int16_t *x = &X[0][position]; - int16_t *y = &X[1][position]; - __asm__ volatile ( - "vld1.8 {d0, d1}, [%[perm], :128]\n" - "1:\n" - "sub %[x], %[x], #16\n" - "sub %[y], %[y], #16\n" - "sub %[position], %[position], #8\n" - "vld1.8 {d4, d5}, [%[pcm]]!\n" - "vuzp.16 d4, d5\n" - "vld1.8 {d20, d21}, [%[pcm]]!\n" - "vuzp.16 d20, d21\n" - "vswp d5, d20\n" - "vtbl.8 d16, {d4, d5}, d0\n" - "vtbl.8 d17, {d4, d5}, d1\n" - "vtbl.8 d18, {d20, d21}, d0\n" - "vtbl.8 d19, {d20, d21}, d1\n" - "vst1.16 {d16, d17}, [%[x], :128]\n" - "vst1.16 {d18, d19}, [%[y], :128]\n" - "subs %[nsamples], %[nsamples], #8\n" - "bgt 1b\n" - : - [x] "+r" (x), - [y] "+r" (y), - [pcm] "+r" (pcm), - [nsamples] "+r" (nsamples), - [position] "+r" (position) - : - [perm] "r" (big_endian ? perm_be : perm_le) - : "cc", "memory", "d0", "d1", "d2", "d3", "d4", - "d5", "d6", "d7", "d16", "d17", "d18", "d19", - "d20", "d21", "d22", "d23"); - } else if (nchannels > 1) { - /* proper 'pcm' alignment */ - int16_t *x = &X[0][position]; - int16_t *y = &X[1][position]; - __asm__ volatile ( - "vld1.8 {d0, d1}, [%[perm], :128]\n" - "1:\n" - "sub %[x], %[x], #16\n" - "sub %[y], %[y], #16\n" - "sub %[position], %[position], #8\n" - "vld2.16 {d4, d5}, [%[pcm]]!\n" - "vld2.16 {d20, d21}, [%[pcm]]!\n" - "vswp d5, d20\n" - "vtbl.8 d16, {d4, d5}, d0\n" - "vtbl.8 d17, {d4, d5}, d1\n" - "vtbl.8 d18, {d20, d21}, d0\n" - "vtbl.8 d19, {d20, d21}, d1\n" - "vst1.16 {d16, d17}, [%[x], :128]\n" - "vst1.16 {d18, d19}, [%[y], :128]\n" - "subs %[nsamples], %[nsamples], #8\n" - "bgt 1b\n" - : - [x] "+r" (x), - [y] "+r" (y), - [pcm] "+r" (pcm), - [nsamples] "+r" (nsamples), - [position] "+r" (position) - : - [perm] "r" (big_endian ? perm_be : perm_le) - : "cc", "memory", "d0", "d1", "d2", "d3", "d4", - "d5", "d6", "d7", "d16", "d17", "d18", "d19", - "d20", "d21", "d22", "d23"); - } else { - int16_t *x = &X[0][position]; - __asm__ volatile ( - "vld1.8 {d0, d1}, [%[perm], :128]\n" - "1:\n" - "sub %[x], %[x], #16\n" - "sub %[position], %[position], #8\n" - "vld1.8 {d4, d5}, [%[pcm]]!\n" - "vtbl.8 d16, {d4, d5}, d0\n" - "vtbl.8 d17, {d4, d5}, d1\n" - "vst1.16 {d16, d17}, [%[x], :128]\n" - "subs %[nsamples], %[nsamples], #8\n" - "bgt 1b\n" - : - [x] "+r" (x), - [pcm] "+r" (pcm), - [nsamples] "+r" (nsamples), - [position] "+r" (position) - : - [perm] "r" (big_endian ? perm_be : perm_le) - : "cc", "memory", "d0", "d1", "d2", "d3", "d4", - "d5", "d6", "d7", "d16", "d17", "d18", "d19"); - } - return position; -} - -static SBC_ALWAYS_INLINE int sbc_enc_process_input_8s_neon_internal( - int position, - const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], - int nsamples, int nchannels, int big_endian) -{ - static SBC_ALIGNED uint8_t perm_be[4][8] = { - PERM_BE(15, 7, 14, 8), - PERM_BE(13, 9, 12, 10), - PERM_BE(11, 3, 6, 0), - PERM_BE(5, 1, 4, 2) - }; - static SBC_ALIGNED uint8_t perm_le[4][8] = { - PERM_LE(15, 7, 14, 8), - PERM_LE(13, 9, 12, 10), - PERM_LE(11, 3, 6, 0), - PERM_LE(5, 1, 4, 2) - }; - /* handle X buffer wraparound */ - if (position < nsamples) { - int16_t *dst = &X[0][SBC_X_BUFFER_SIZE - 72]; - int16_t *src = &X[0][position]; - __asm__ volatile ( - "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" - "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" - "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" - "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" - "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" - "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" - "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" - "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" - "vld1.16 {d0, d1}, [%[src], :128]!\n" - "vst1.16 {d0, d1}, [%[dst], :128]!\n" - : - [dst] "+r" (dst), - [src] "+r" (src) - : : "memory", "d0", "d1", "d2", "d3"); - if (nchannels > 1) { - dst = &X[1][SBC_X_BUFFER_SIZE - 72]; - src = &X[1][position]; - __asm__ volatile ( - "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" - "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" - "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" - "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" - "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" - "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" - "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" - "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" - "vld1.16 {d0, d1}, [%[src], :128]!\n" - "vst1.16 {d0, d1}, [%[dst], :128]!\n" - : - [dst] "+r" (dst), - [src] "+r" (src) - : : "memory", "d0", "d1", "d2", "d3"); - } - position = SBC_X_BUFFER_SIZE - 72; - } - - if ((nchannels > 1) && ((uintptr_t)pcm & 1)) { - /* poor 'pcm' alignment */ - int16_t *x = &X[0][position]; - int16_t *y = &X[1][position]; - __asm__ volatile ( - "vld1.8 {d0, d1, d2, d3}, [%[perm], :128]\n" - "1:\n" - "sub %[x], %[x], #32\n" - "sub %[y], %[y], #32\n" - "sub %[position], %[position], #16\n" - "vld1.8 {d4, d5, d6, d7}, [%[pcm]]!\n" - "vuzp.16 q2, q3\n" - "vld1.8 {d20, d21, d22, d23}, [%[pcm]]!\n" - "vuzp.16 q10, q11\n" - "vswp q3, q10\n" - "vtbl.8 d16, {d4, d5, d6, d7}, d0\n" - "vtbl.8 d17, {d4, d5, d6, d7}, d1\n" - "vtbl.8 d18, {d4, d5, d6, d7}, d2\n" - "vtbl.8 d19, {d4, d5, d6, d7}, d3\n" - "vst1.16 {d16, d17, d18, d19}, [%[x], :128]\n" - "vtbl.8 d16, {d20, d21, d22, d23}, d0\n" - "vtbl.8 d17, {d20, d21, d22, d23}, d1\n" - "vtbl.8 d18, {d20, d21, d22, d23}, d2\n" - "vtbl.8 d19, {d20, d21, d22, d23}, d3\n" - "vst1.16 {d16, d17, d18, d19}, [%[y], :128]\n" - "subs %[nsamples], %[nsamples], #16\n" - "bgt 1b\n" - : - [x] "+r" (x), - [y] "+r" (y), - [pcm] "+r" (pcm), - [nsamples] "+r" (nsamples), - [position] "+r" (position) - : - [perm] "r" (big_endian ? perm_be : perm_le) - : "cc", "memory", "d0", "d1", "d2", "d3", "d4", - "d5", "d6", "d7", "d16", "d17", "d18", "d19", - "d20", "d21", "d22", "d23"); - } else if (nchannels > 1) { - /* proper 'pcm' alignment */ - int16_t *x = &X[0][position]; - int16_t *y = &X[1][position]; - __asm__ volatile ( - "vld1.8 {d0, d1, d2, d3}, [%[perm], :128]\n" - "1:\n" - "sub %[x], %[x], #32\n" - "sub %[y], %[y], #32\n" - "sub %[position], %[position], #16\n" - "vld2.16 {d4, d5, d6, d7}, [%[pcm]]!\n" - "vld2.16 {d20, d21, d22, d23}, [%[pcm]]!\n" - "vswp q3, q10\n" - "vtbl.8 d16, {d4, d5, d6, d7}, d0\n" - "vtbl.8 d17, {d4, d5, d6, d7}, d1\n" - "vtbl.8 d18, {d4, d5, d6, d7}, d2\n" - "vtbl.8 d19, {d4, d5, d6, d7}, d3\n" - "vst1.16 {d16, d17, d18, d19}, [%[x], :128]\n" - "vtbl.8 d16, {d20, d21, d22, d23}, d0\n" - "vtbl.8 d17, {d20, d21, d22, d23}, d1\n" - "vtbl.8 d18, {d20, d21, d22, d23}, d2\n" - "vtbl.8 d19, {d20, d21, d22, d23}, d3\n" - "vst1.16 {d16, d17, d18, d19}, [%[y], :128]\n" - "subs %[nsamples], %[nsamples], #16\n" - "bgt 1b\n" - : - [x] "+r" (x), - [y] "+r" (y), - [pcm] "+r" (pcm), - [nsamples] "+r" (nsamples), - [position] "+r" (position) - : - [perm] "r" (big_endian ? perm_be : perm_le) - : "cc", "memory", "d0", "d1", "d2", "d3", "d4", - "d5", "d6", "d7", "d16", "d17", "d18", "d19", - "d20", "d21", "d22", "d23"); - } else { - int16_t *x = &X[0][position]; - __asm__ volatile ( - "vld1.8 {d0, d1, d2, d3}, [%[perm], :128]\n" - "1:\n" - "sub %[x], %[x], #32\n" - "sub %[position], %[position], #16\n" - "vld1.8 {d4, d5, d6, d7}, [%[pcm]]!\n" - "vtbl.8 d16, {d4, d5, d6, d7}, d0\n" - "vtbl.8 d17, {d4, d5, d6, d7}, d1\n" - "vtbl.8 d18, {d4, d5, d6, d7}, d2\n" - "vtbl.8 d19, {d4, d5, d6, d7}, d3\n" - "vst1.16 {d16, d17, d18, d19}, [%[x], :128]\n" - "subs %[nsamples], %[nsamples], #16\n" - "bgt 1b\n" - : - [x] "+r" (x), - [pcm] "+r" (pcm), - [nsamples] "+r" (nsamples), - [position] "+r" (position) - : - [perm] "r" (big_endian ? perm_be : perm_le) - : "cc", "memory", "d0", "d1", "d2", "d3", "d4", - "d5", "d6", "d7", "d16", "d17", "d18", "d19"); - } - return position; -} - -#undef PERM_BE -#undef PERM_LE - -static int sbc_enc_process_input_4s_be_neon(int position, const uint8_t *pcm, - int16_t X[2][SBC_X_BUFFER_SIZE], - int nsamples, int nchannels) -{ - return sbc_enc_process_input_4s_neon_internal( - position, pcm, X, nsamples, nchannels, 1); -} - -static int sbc_enc_process_input_4s_le_neon(int position, const uint8_t *pcm, - int16_t X[2][SBC_X_BUFFER_SIZE], - int nsamples, int nchannels) -{ - return sbc_enc_process_input_4s_neon_internal( - position, pcm, X, nsamples, nchannels, 0); -} - -static int sbc_enc_process_input_8s_be_neon(int position, const uint8_t *pcm, - int16_t X[2][SBC_X_BUFFER_SIZE], - int nsamples, int nchannels) -{ - return sbc_enc_process_input_8s_neon_internal( - position, pcm, X, nsamples, nchannels, 1); -} - -static int sbc_enc_process_input_8s_le_neon(int position, const uint8_t *pcm, - int16_t X[2][SBC_X_BUFFER_SIZE], - int nsamples, int nchannels) -{ - return sbc_enc_process_input_8s_neon_internal( - position, pcm, X, nsamples, nchannels, 0); -} - -void sbc_init_primitives_neon(struct sbc_encoder_state *state) -{ - state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_neon; - state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_neon; - state->sbc_calc_scalefactors = sbc_calc_scalefactors_neon; - state->sbc_calc_scalefactors_j = sbc_calc_scalefactors_j_neon; - state->sbc_enc_process_input_4s_le = sbc_enc_process_input_4s_le_neon; - state->sbc_enc_process_input_4s_be = sbc_enc_process_input_4s_be_neon; - state->sbc_enc_process_input_8s_le = sbc_enc_process_input_8s_le_neon; - state->sbc_enc_process_input_8s_be = sbc_enc_process_input_8s_be_neon; - state->implementation_info = "NEON"; -} - -#endif diff --git a/src/modules/bluetooth/sbc/sbc_primitives_neon.h b/src/modules/bluetooth/sbc/sbc_primitives_neon.h deleted file mode 100644 index ea3da06..0000000 --- a/src/modules/bluetooth/sbc/sbc_primitives_neon.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * - * Bluetooth low-complexity, subband codec (SBC) library - * - * Copyright (C) 2008-2010 Nokia Corporation - * Copyright (C) 2004-2010 Marcel Holtmann <marcel at holtmann.org> - * Copyright (C) 2004-2005 Henryk Ploetz <henryk at ploetzli.ch> - * Copyright (C) 2005-2006 Brad Midgley <bmidgley at xmission.com> - * - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - */ - -#ifndef __SBC_PRIMITIVES_NEON_H -#define __SBC_PRIMITIVES_NEON_H - -#include "sbc_primitives.h" - -#if defined(__GNUC__) && defined(__ARM_NEON__) && \ - !defined(SBC_HIGH_PRECISION) && (SCALE_OUT_BITS == 15) - -#define SBC_BUILD_WITH_NEON_SUPPORT - -void sbc_init_primitives_neon(struct sbc_encoder_state *encoder_state); - -#endif - -#endif diff --git a/src/modules/bluetooth/sbc/sbc_tables.h b/src/modules/bluetooth/sbc/sbc_tables.h deleted file mode 100644 index 25e24e6..0000000 --- a/src/modules/bluetooth/sbc/sbc_tables.h +++ /dev/null @@ -1,662 +0,0 @@ -/* - * - * Bluetooth low-complexity, subband codec (SBC) library - * - * Copyright (C) 2008-2010 Nokia Corporation - * Copyright (C) 2004-2010 Marcel Holtmann <marcel at holtmann.org> - * Copyright (C) 2004-2005 Henryk Ploetz <henryk at ploetzli.ch> - * Copyright (C) 2005-2006 Brad Midgley <bmidgley at xmission.com> - * - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - */ - -/* A2DP specification: Appendix B, page 69 */ -static const int sbc_offset4[4][4] = { - { -1, 0, 0, 0 }, - { -2, 0, 0, 1 }, - { -2, 0, 0, 1 }, - { -2, 0, 0, 1 } -}; - -/* A2DP specification: Appendix B, page 69 */ -static const int sbc_offset8[4][8] = { - { -2, 0, 0, 0, 0, 0, 0, 1 }, - { -3, 0, 0, 0, 0, 0, 1, 2 }, - { -4, 0, 0, 0, 0, 0, 1, 2 }, - { -4, 0, 0, 0, 0, 0, 1, 2 } -}; - -/* extra bits of precision for the synthesis filter input data */ -#define SBCDEC_FIXED_EXTRA_BITS 2 - -#define SS4(val) ASR(val, SCALE_SPROTO4_TBL) -#define SS8(val) ASR(val, SCALE_SPROTO8_TBL) -#define SN4(val) ASR(val, SCALE_NPROTO4_TBL + 1 + SBCDEC_FIXED_EXTRA_BITS) -#define SN8(val) ASR(val, SCALE_NPROTO8_TBL + 1 + SBCDEC_FIXED_EXTRA_BITS) - -static const int32_t sbc_proto_4_40m0[] = { - SS4(0x00000000), SS4(0xffa6982f), SS4(0xfba93848), SS4(0x0456c7b8), - SS4(0x005967d1), SS4(0xfffb9ac7), SS4(0xff589157), SS4(0xf9c2a8d8), - SS4(0x027c1434), SS4(0x0019118b), SS4(0xfff3c74c), SS4(0xff137330), - SS4(0xf81b8d70), SS4(0x00ec1b8b), SS4(0xfff0b71a), SS4(0xffe99b00), - SS4(0xfef84470), SS4(0xf6fb4370), SS4(0xffcdc351), SS4(0xffe01dc7) -}; - -static const int32_t sbc_proto_4_40m1[] = { - SS4(0xffe090ce), SS4(0xff2c0475), SS4(0xf694f800), SS4(0xff2c0475), - SS4(0xffe090ce), SS4(0xffe01dc7), SS4(0xffcdc351), SS4(0xf6fb4370), - SS4(0xfef84470), SS4(0xffe99b00), SS4(0xfff0b71a), SS4(0x00ec1b8b), - SS4(0xf81b8d70), SS4(0xff137330), SS4(0xfff3c74c), SS4(0x0019118b), - SS4(0x027c1434), SS4(0xf9c2a8d8), SS4(0xff589157), SS4(0xfffb9ac7) -}; - -static const int32_t sbc_proto_8_80m0[] = { - SS8(0x00000000), SS8(0xfe8d1970), SS8(0xee979f00), SS8(0x11686100), - SS8(0x0172e690), SS8(0xfff5bd1a), SS8(0xfdf1c8d4), SS8(0xeac182c0), - SS8(0x0d9daee0), SS8(0x00e530da), SS8(0xffe9811d), SS8(0xfd52986c), - SS8(0xe7054ca0), SS8(0x0a00d410), SS8(0x006c1de4), SS8(0xffdba705), - SS8(0xfcbc98e8), SS8(0xe3889d20), SS8(0x06af2308), SS8(0x000bb7db), - SS8(0xffca00ed), SS8(0xfc3fbb68), SS8(0xe071bc00), SS8(0x03bf7948), - SS8(0xffc4e05c), SS8(0xffb54b3b), SS8(0xfbedadc0), SS8(0xdde26200), - SS8(0x0142291c), SS8(0xff960e94), SS8(0xff9f3e17), SS8(0xfbd8f358), - SS8(0xdbf79400), SS8(0xff405e01), SS8(0xff7d4914), SS8(0xff8b1a31), - SS8(0xfc1417b8), SS8(0xdac7bb40), SS8(0xfdbb828c), SS8(0xff762170) -}; - -static const int32_t sbc_proto_8_80m1[] = { - SS8(0xff7c272c), SS8(0xfcb02620), SS8(0xda612700), SS8(0xfcb02620), - SS8(0xff7c272c), SS8(0xff762170), SS8(0xfdbb828c), SS8(0xdac7bb40), - SS8(0xfc1417b8), SS8(0xff8b1a31), SS8(0xff7d4914), SS8(0xff405e01), - SS8(0xdbf79400), SS8(0xfbd8f358), SS8(0xff9f3e17), SS8(0xff960e94), - SS8(0x0142291c), SS8(0xdde26200), SS8(0xfbedadc0), SS8(0xffb54b3b), - SS8(0xffc4e05c), SS8(0x03bf7948), SS8(0xe071bc00), SS8(0xfc3fbb68), - SS8(0xffca00ed), SS8(0x000bb7db), SS8(0x06af2308), SS8(0xe3889d20), - SS8(0xfcbc98e8), SS8(0xffdba705), SS8(0x006c1de4), SS8(0x0a00d410), - SS8(0xe7054ca0), SS8(0xfd52986c), SS8(0xffe9811d), SS8(0x00e530da), - SS8(0x0d9daee0), SS8(0xeac182c0), SS8(0xfdf1c8d4), SS8(0xfff5bd1a) -}; - -static const int32_t synmatrix4[8][4] = { - { SN4(0x05a82798), SN4(0xfa57d868), SN4(0xfa57d868), SN4(0x05a82798) }, - { SN4(0x030fbc54), SN4(0xf89be510), SN4(0x07641af0), SN4(0xfcf043ac) }, - { SN4(0x00000000), SN4(0x00000000), SN4(0x00000000), SN4(0x00000000) }, - { SN4(0xfcf043ac), SN4(0x07641af0), SN4(0xf89be510), SN4(0x030fbc54) }, - { SN4(0xfa57d868), SN4(0x05a82798), SN4(0x05a82798), SN4(0xfa57d868) }, - { SN4(0xf89be510), SN4(0xfcf043ac), SN4(0x030fbc54), SN4(0x07641af0) }, - { SN4(0xf8000000), SN4(0xf8000000), SN4(0xf8000000), SN4(0xf8000000) }, - { SN4(0xf89be510), SN4(0xfcf043ac), SN4(0x030fbc54), SN4(0x07641af0) } -}; - -static const int32_t synmatrix8[16][8] = { - { SN8(0x05a82798), SN8(0xfa57d868), SN8(0xfa57d868), SN8(0x05a82798), - SN8(0x05a82798), SN8(0xfa57d868), SN8(0xfa57d868), SN8(0x05a82798) }, - { SN8(0x0471ced0), SN8(0xf8275a10), SN8(0x018f8b84), SN8(0x06a6d988), - SN8(0xf9592678), SN8(0xfe70747c), SN8(0x07d8a5f0), SN8(0xfb8e3130) }, - { SN8(0x030fbc54), SN8(0xf89be510), SN8(0x07641af0), SN8(0xfcf043ac), - SN8(0xfcf043ac), SN8(0x07641af0), SN8(0xf89be510), SN8(0x030fbc54) }, - { SN8(0x018f8b84), SN8(0xfb8e3130), SN8(0x06a6d988), SN8(0xf8275a10), - SN8(0x07d8a5f0), SN8(0xf9592678), SN8(0x0471ced0), SN8(0xfe70747c) }, - { SN8(0x00000000), SN8(0x00000000), SN8(0x00000000), SN8(0x00000000), - SN8(0x00000000), SN8(0x00000000), SN8(0x00000000), SN8(0x00000000) }, - { SN8(0xfe70747c), SN8(0x0471ced0), SN8(0xf9592678), SN8(0x07d8a5f0), - SN8(0xf8275a10), SN8(0x06a6d988), SN8(0xfb8e3130), SN8(0x018f8b84) }, - { SN8(0xfcf043ac), SN8(0x07641af0), SN8(0xf89be510), SN8(0x030fbc54), - SN8(0x030fbc54), SN8(0xf89be510), SN8(0x07641af0), SN8(0xfcf043ac) }, - { SN8(0xfb8e3130), SN8(0x07d8a5f0), SN8(0xfe70747c), SN8(0xf9592678), - SN8(0x06a6d988), SN8(0x018f8b84), SN8(0xf8275a10), SN8(0x0471ced0) }, - { SN8(0xfa57d868), SN8(0x05a82798), SN8(0x05a82798), SN8(0xfa57d868), - SN8(0xfa57d868), SN8(0x05a82798), SN8(0x05a82798), SN8(0xfa57d868) }, - { SN8(0xf9592678), SN8(0x018f8b84), SN8(0x07d8a5f0), SN8(0x0471ced0), - SN8(0xfb8e3130), SN8(0xf8275a10), SN8(0xfe70747c), SN8(0x06a6d988) }, - { SN8(0xf89be510), SN8(0xfcf043ac), SN8(0x030fbc54), SN8(0x07641af0), - SN8(0x07641af0), SN8(0x030fbc54), SN8(0xfcf043ac), SN8(0xf89be510) }, - { SN8(0xf8275a10), SN8(0xf9592678), SN8(0xfb8e3130), SN8(0xfe70747c), - SN8(0x018f8b84), SN8(0x0471ced0), SN8(0x06a6d988), SN8(0x07d8a5f0) }, - { SN8(0xf8000000), SN8(0xf8000000), SN8(0xf8000000), SN8(0xf8000000), - SN8(0xf8000000), SN8(0xf8000000), SN8(0xf8000000), SN8(0xf8000000) }, - { SN8(0xf8275a10), SN8(0xf9592678), SN8(0xfb8e3130), SN8(0xfe70747c), - SN8(0x018f8b84), SN8(0x0471ced0), SN8(0x06a6d988), SN8(0x07d8a5f0) }, - { SN8(0xf89be510), SN8(0xfcf043ac), SN8(0x030fbc54), SN8(0x07641af0), - SN8(0x07641af0), SN8(0x030fbc54), SN8(0xfcf043ac), SN8(0xf89be510) }, - { SN8(0xf9592678), SN8(0x018f8b84), SN8(0x07d8a5f0), SN8(0x0471ced0), - SN8(0xfb8e3130), SN8(0xf8275a10), SN8(0xfe70747c), SN8(0x06a6d988) } -}; - -/* Uncomment the following line to enable high precision build of SBC encoder */ - -/* #define SBC_HIGH_PRECISION */ - -#ifdef SBC_HIGH_PRECISION -#define FIXED_A int64_t /* data type for fixed point accumulator */ -#define FIXED_T int32_t /* data type for fixed point constants */ -#define SBC_FIXED_EXTRA_BITS 16 -#else -#define FIXED_A int32_t /* data type for fixed point accumulator */ -#define FIXED_T int16_t /* data type for fixed point constants */ -#define SBC_FIXED_EXTRA_BITS 0 -#endif - -/* A2DP specification: Section 12.8 Tables - * - * Original values are premultiplied by 2 for better precision (that is the - * maximum which is possible without overflows) - * - * Note: in each block of 8 numbers sign was changed for elements 2 and 7 - * in order to compensate the same change applied to cos_table_fixed_4 - */ -#define SBC_PROTO_FIXED4_SCALE \ - ((sizeof(FIXED_T) * CHAR_BIT - 1) - SBC_FIXED_EXTRA_BITS + 1) -#define F_PROTO4(x) (FIXED_A) ((x * 2) * \ - ((FIXED_A) 1 << (sizeof(FIXED_T) * CHAR_BIT - 1)) + 0.5) -#define F(x) F_PROTO4(x) -static const FIXED_T _sbc_proto_fixed4[40] = { - F(0.00000000E+00), F(5.36548976E-04), - -F(1.49188357E-03), F(2.73370904E-03), - F(3.83720193E-03), F(3.89205149E-03), - F(1.86581691E-03), F(3.06012286E-03), - - F(1.09137620E-02), F(2.04385087E-02), - -F(2.88757392E-02), F(3.21939290E-02), - F(2.58767811E-02), F(6.13245186E-03), - -F(2.88217274E-02), F(7.76463494E-02), - - F(1.35593274E-01), F(1.94987841E-01), - -F(2.46636662E-01), F(2.81828203E-01), - F(2.94315332E-01), F(2.81828203E-01), - F(2.46636662E-01), -F(1.94987841E-01), - - -F(1.35593274E-01), -F(7.76463494E-02), - F(2.88217274E-02), F(6.13245186E-03), - F(2.58767811E-02), F(3.21939290E-02), - F(2.88757392E-02), -F(2.04385087E-02), - - -F(1.09137620E-02), -F(3.06012286E-03), - -F(1.86581691E-03), F(3.89205149E-03), - F(3.83720193E-03), F(2.73370904E-03), - F(1.49188357E-03), -F(5.36548976E-04), -}; -#undef F - -/* - * To produce this cosine matrix in Octave: - * - * b = zeros(4, 8); - * for i = 0:3 - * for j = 0:7 b(i+1, j+1) = cos((i + 0.5) * (j - 2) * (pi/4)) - * endfor - * endfor; - * printf("%.10f, ", b'); - * - * Note: in each block of 8 numbers sign was changed for elements 2 and 7 - * - * Change of sign for element 2 allows to replace constant 1.0 (not - * representable in Q15 format) with -1.0 (fine with Q15). - * Changed sign for element 7 allows to have more similar constants - * and simplify subband filter function code. - */ -#define SBC_COS_TABLE_FIXED4_SCALE \ - ((sizeof(FIXED_T) * CHAR_BIT - 1) + SBC_FIXED_EXTRA_BITS) -#define F_COS4(x) (FIXED_A) ((x) * \ - ((FIXED_A) 1 << (sizeof(FIXED_T) * CHAR_BIT - 1)) + 0.5) -#define F(x) F_COS4(x) -static const FIXED_T cos_table_fixed_4[32] = { - F(0.7071067812), F(0.9238795325), -F(1.0000000000), F(0.9238795325), - F(0.7071067812), F(0.3826834324), F(0.0000000000), F(0.3826834324), - - -F(0.7071067812), F(0.3826834324), -F(1.0000000000), F(0.3826834324), - -F(0.7071067812), -F(0.9238795325), -F(0.0000000000), -F(0.9238795325), - - -F(0.7071067812), -F(0.3826834324), -F(1.0000000000), -F(0.3826834324), - -F(0.7071067812), F(0.9238795325), F(0.0000000000), F(0.9238795325), - - F(0.7071067812), -F(0.9238795325), -F(1.0000000000), -F(0.9238795325), - F(0.7071067812), -F(0.3826834324), -F(0.0000000000), -F(0.3826834324), -}; -#undef F - -/* A2DP specification: Section 12.8 Tables - * - * Original values are premultiplied by 4 for better precision (that is the - * maximum which is possible without overflows) - * - * Note: in each block of 16 numbers sign was changed for elements 4, 13, 14, 15 - * in order to compensate the same change applied to cos_table_fixed_8 - */ -#define SBC_PROTO_FIXED8_SCALE \ - ((sizeof(FIXED_T) * CHAR_BIT - 1) - SBC_FIXED_EXTRA_BITS + 1) -#define F_PROTO8(x) (FIXED_A) ((x * 2) * \ - ((FIXED_A) 1 << (sizeof(FIXED_T) * CHAR_BIT - 1)) + 0.5) -#define F(x) F_PROTO8(x) -static const FIXED_T _sbc_proto_fixed8[80] = { - F(0.00000000E+00), F(1.56575398E-04), - F(3.43256425E-04), F(5.54620202E-04), - -F(8.23919506E-04), F(1.13992507E-03), - F(1.47640169E-03), F(1.78371725E-03), - F(2.01182542E-03), F(2.10371989E-03), - F(1.99454554E-03), F(1.61656283E-03), - F(9.02154502E-04), F(1.78805361E-04), - F(1.64973098E-03), F(3.49717454E-03), - - F(5.65949473E-03), F(8.02941163E-03), - F(1.04584443E-02), F(1.27472335E-02), - -F(1.46525263E-02), F(1.59045603E-02), - F(1.62208471E-02), F(1.53184106E-02), - F(1.29371806E-02), F(8.85757540E-03), - F(2.92408442E-03), -F(4.91578024E-03), - -F(1.46404076E-02), F(2.61098752E-02), - F(3.90751381E-02), F(5.31873032E-02), - - F(6.79989431E-02), F(8.29847578E-02), - F(9.75753918E-02), F(1.11196689E-01), - -F(1.23264548E-01), F(1.33264415E-01), - F(1.40753505E-01), F(1.45389847E-01), - F(1.46955068E-01), F(1.45389847E-01), - F(1.40753505E-01), F(1.33264415E-01), - F(1.23264548E-01), -F(1.11196689E-01), - -F(9.75753918E-02), -F(8.29847578E-02), - - -F(6.79989431E-02), -F(5.31873032E-02), - -F(3.90751381E-02), -F(2.61098752E-02), - F(1.46404076E-02), -F(4.91578024E-03), - F(2.92408442E-03), F(8.85757540E-03), - F(1.29371806E-02), F(1.53184106E-02), - F(1.62208471E-02), F(1.59045603E-02), - F(1.46525263E-02), -F(1.27472335E-02), - -F(1.04584443E-02), -F(8.02941163E-03), - - -F(5.65949473E-03), -F(3.49717454E-03), - -F(1.64973098E-03), -F(1.78805361E-04), - -F(9.02154502E-04), F(1.61656283E-03), - F(1.99454554E-03), F(2.10371989E-03), - F(2.01182542E-03), F(1.78371725E-03), - F(1.47640169E-03), F(1.13992507E-03), - F(8.23919506E-04), -F(5.54620202E-04), - -F(3.43256425E-04), -F(1.56575398E-04), -}; -#undef F - -/* - * To produce this cosine matrix in Octave: - * - * b = zeros(8, 16); - * for i = 0:7 - * for j = 0:15 b(i+1, j+1) = cos((i + 0.5) * (j - 4) * (pi/8)) - * endfor endfor; - * printf("%.10f, ", b'); - * - * Note: in each block of 16 numbers sign was changed for elements 4, 13, 14, 15 - * - * Change of sign for element 4 allows to replace constant 1.0 (not - * representable in Q15 format) with -1.0 (fine with Q15). - * Changed signs for elements 13, 14, 15 allow to have more similar constants - * and simplify subband filter function code. - */ -#define SBC_COS_TABLE_FIXED8_SCALE \ - ((sizeof(FIXED_T) * CHAR_BIT - 1) + SBC_FIXED_EXTRA_BITS) -#define F_COS8(x) (FIXED_A) ((x) * \ - ((FIXED_A) 1 << (sizeof(FIXED_T) * CHAR_BIT - 1)) + 0.5) -#define F(x) F_COS8(x) -static const FIXED_T cos_table_fixed_8[128] = { - F(0.7071067812), F(0.8314696123), F(0.9238795325), F(0.9807852804), - -F(1.0000000000), F(0.9807852804), F(0.9238795325), F(0.8314696123), - F(0.7071067812), F(0.5555702330), F(0.3826834324), F(0.1950903220), - F(0.0000000000), F(0.1950903220), F(0.3826834324), F(0.5555702330), - - -F(0.7071067812), -F(0.1950903220), F(0.3826834324), F(0.8314696123), - -F(1.0000000000), F(0.8314696123), F(0.3826834324), -F(0.1950903220), - -F(0.7071067812), -F(0.9807852804), -F(0.9238795325), -F(0.5555702330), - -F(0.0000000000), -F(0.5555702330), -F(0.9238795325), -F(0.9807852804), - - -F(0.7071067812), -F(0.9807852804), -F(0.3826834324), F(0.5555702330), - -F(1.0000000000), F(0.5555702330), -F(0.3826834324), -F(0.9807852804), - -F(0.7071067812), F(0.1950903220), F(0.9238795325), F(0.8314696123), - F(0.0000000000), F(0.8314696123), F(0.9238795325), F(0.1950903220), - - F(0.7071067812), -F(0.5555702330), -F(0.9238795325), F(0.1950903220), - -F(1.0000000000), F(0.1950903220), -F(0.9238795325), -F(0.5555702330), - F(0.7071067812), F(0.8314696123), -F(0.3826834324), -F(0.9807852804), - -F(0.0000000000), -F(0.9807852804), -F(0.3826834324), F(0.8314696123), - - F(0.7071067812), F(0.5555702330), -F(0.9238795325), -F(0.1950903220), - -F(1.0000000000), -F(0.1950903220), -F(0.9238795325), F(0.5555702330), - F(0.7071067812), -F(0.8314696123), -F(0.3826834324), F(0.9807852804), - F(0.0000000000), F(0.9807852804), -F(0.3826834324), -F(0.8314696123), - - -F(0.7071067812), F(0.9807852804), -F(0.3826834324), -F(0.5555702330), - -F(1.0000000000), -F(0.5555702330), -F(0.3826834324), F(0.9807852804), - -F(0.7071067812), -F(0.1950903220), F(0.9238795325), -F(0.8314696123), - -F(0.0000000000), -F(0.8314696123), F(0.9238795325), -F(0.1950903220), - - -F(0.7071067812), F(0.1950903220), F(0.3826834324), -F(0.8314696123), - -F(1.0000000000), -F(0.8314696123), F(0.3826834324), F(0.1950903220), - -F(0.7071067812), F(0.9807852804), -F(0.9238795325), F(0.5555702330), - -F(0.0000000000), F(0.5555702330), -F(0.9238795325), F(0.9807852804), - - F(0.7071067812), -F(0.8314696123), F(0.9238795325), -F(0.9807852804), - -F(1.0000000000), -F(0.9807852804), F(0.9238795325), -F(0.8314696123), - F(0.7071067812), -F(0.5555702330), F(0.3826834324), -F(0.1950903220), - -F(0.0000000000), -F(0.1950903220), F(0.3826834324), -F(0.5555702330), -}; -#undef F - -/* - * Enforce 16 byte alignment for the data, which is supposed to be used - * with SIMD optimized code. - */ - -#define SBC_ALIGN_BITS 4 -#define SBC_ALIGN_MASK ((1 << (SBC_ALIGN_BITS)) - 1) - -#ifdef __GNUC__ -#define SBC_ALIGNED __attribute__((aligned(1 << (SBC_ALIGN_BITS)))) -#else -#define SBC_ALIGNED -#endif - -/* - * Constant tables for the use in SIMD optimized analysis filters - * Each table consists of two parts: - * 1. reordered "proto" table - * 2. reordered "cos" table - * - * Due to non-symmetrical reordering, separate tables for "even" - * and "odd" cases are needed - */ - -static const FIXED_T SBC_ALIGNED analysis_consts_fixed4_simd_even[40 + 16] = { -#define C0 1.0932568993 -#define C1 1.3056875580 -#define C2 1.3056875580 -#define C3 1.6772280856 - -#define F(x) F_PROTO4(x) - F(0.00000000E+00 * C0), F(3.83720193E-03 * C0), - F(5.36548976E-04 * C1), F(2.73370904E-03 * C1), - F(3.06012286E-03 * C2), F(3.89205149E-03 * C2), - F(0.00000000E+00 * C3), -F(1.49188357E-03 * C3), - F(1.09137620E-02 * C0), F(2.58767811E-02 * C0), - F(2.04385087E-02 * C1), F(3.21939290E-02 * C1), - F(7.76463494E-02 * C2), F(6.13245186E-03 * C2), - F(0.00000000E+00 * C3), -F(2.88757392E-02 * C3), - F(1.35593274E-01 * C0), F(2.94315332E-01 * C0), - F(1.94987841E-01 * C1), F(2.81828203E-01 * C1), - -F(1.94987841E-01 * C2), F(2.81828203E-01 * C2), - F(0.00000000E+00 * C3), -F(2.46636662E-01 * C3), - -F(1.35593274E-01 * C0), F(2.58767811E-02 * C0), - -F(7.76463494E-02 * C1), F(6.13245186E-03 * C1), - -F(2.04385087E-02 * C2), F(3.21939290E-02 * C2), - F(0.00000000E+00 * C3), F(2.88217274E-02 * C3), - -F(1.09137620E-02 * C0), F(3.83720193E-03 * C0), - -F(3.06012286E-03 * C1), F(3.89205149E-03 * C1), - -F(5.36548976E-04 * C2), F(2.73370904E-03 * C2), - F(0.00000000E+00 * C3), -F(1.86581691E-03 * C3), -#undef F -#define F(x) F_COS4(x) - F(0.7071067812 / C0), F(0.9238795325 / C1), - -F(0.7071067812 / C0), F(0.3826834324 / C1), - -F(0.7071067812 / C0), -F(0.3826834324 / C1), - F(0.7071067812 / C0), -F(0.9238795325 / C1), - F(0.3826834324 / C2), -F(1.0000000000 / C3), - -F(0.9238795325 / C2), -F(1.0000000000 / C3), - F(0.9238795325 / C2), -F(1.0000000000 / C3), - -F(0.3826834324 / C2), -F(1.0000000000 / C3), -#undef F - -#undef C0 -#undef C1 -#undef C2 -#undef C3 -}; - -static const FIXED_T SBC_ALIGNED analysis_consts_fixed4_simd_odd[40 + 16] = { -#define C0 1.3056875580 -#define C1 1.6772280856 -#define C2 1.0932568993 -#define C3 1.3056875580 - -#define F(x) F_PROTO4(x) - F(2.73370904E-03 * C0), F(5.36548976E-04 * C0), - -F(1.49188357E-03 * C1), F(0.00000000E+00 * C1), - F(3.83720193E-03 * C2), F(1.09137620E-02 * C2), - F(3.89205149E-03 * C3), F(3.06012286E-03 * C3), - F(3.21939290E-02 * C0), F(2.04385087E-02 * C0), - -F(2.88757392E-02 * C1), F(0.00000000E+00 * C1), - F(2.58767811E-02 * C2), F(1.35593274E-01 * C2), - F(6.13245186E-03 * C3), F(7.76463494E-02 * C3), - F(2.81828203E-01 * C0), F(1.94987841E-01 * C0), - -F(2.46636662E-01 * C1), F(0.00000000E+00 * C1), - F(2.94315332E-01 * C2), -F(1.35593274E-01 * C2), - F(2.81828203E-01 * C3), -F(1.94987841E-01 * C3), - F(6.13245186E-03 * C0), -F(7.76463494E-02 * C0), - F(2.88217274E-02 * C1), F(0.00000000E+00 * C1), - F(2.58767811E-02 * C2), -F(1.09137620E-02 * C2), - F(3.21939290E-02 * C3), -F(2.04385087E-02 * C3), - F(3.89205149E-03 * C0), -F(3.06012286E-03 * C0), - -F(1.86581691E-03 * C1), F(0.00000000E+00 * C1), - F(3.83720193E-03 * C2), F(0.00000000E+00 * C2), - F(2.73370904E-03 * C3), -F(5.36548976E-04 * C3), -#undef F -#define F(x) F_COS4(x) - F(0.9238795325 / C0), -F(1.0000000000 / C1), - F(0.3826834324 / C0), -F(1.0000000000 / C1), - -F(0.3826834324 / C0), -F(1.0000000000 / C1), - -F(0.9238795325 / C0), -F(1.0000000000 / C1), - F(0.7071067812 / C2), F(0.3826834324 / C3), - -F(0.7071067812 / C2), -F(0.9238795325 / C3), - -F(0.7071067812 / C2), F(0.9238795325 / C3), - F(0.7071067812 / C2), -F(0.3826834324 / C3), -#undef F - -#undef C0 -#undef C1 -#undef C2 -#undef C3 -}; - -static const FIXED_T SBC_ALIGNED analysis_consts_fixed8_simd_even[80 + 64] = { -#define C0 2.7906148894 -#define C1 2.4270044280 -#define C2 2.8015616024 -#define C3 3.1710363741 -#define C4 2.5377944043 -#define C5 2.4270044280 -#define C6 2.8015616024 -#define C7 3.1710363741 - -#define F(x) F_PROTO8(x) - F(0.00000000E+00 * C0), F(2.01182542E-03 * C0), - F(1.56575398E-04 * C1), F(1.78371725E-03 * C1), - F(3.43256425E-04 * C2), F(1.47640169E-03 * C2), - F(5.54620202E-04 * C3), F(1.13992507E-03 * C3), - -F(8.23919506E-04 * C4), F(0.00000000E+00 * C4), - F(2.10371989E-03 * C5), F(3.49717454E-03 * C5), - F(1.99454554E-03 * C6), F(1.64973098E-03 * C6), - F(1.61656283E-03 * C7), F(1.78805361E-04 * C7), - F(5.65949473E-03 * C0), F(1.29371806E-02 * C0), - F(8.02941163E-03 * C1), F(1.53184106E-02 * C1), - F(1.04584443E-02 * C2), F(1.62208471E-02 * C2), - F(1.27472335E-02 * C3), F(1.59045603E-02 * C3), - -F(1.46525263E-02 * C4), F(0.00000000E+00 * C4), - F(8.85757540E-03 * C5), F(5.31873032E-02 * C5), - F(2.92408442E-03 * C6), F(3.90751381E-02 * C6), - -F(4.91578024E-03 * C7), F(2.61098752E-02 * C7), - F(6.79989431E-02 * C0), F(1.46955068E-01 * C0), - F(8.29847578E-02 * C1), F(1.45389847E-01 * C1), - F(9.75753918E-02 * C2), F(1.40753505E-01 * C2), - F(1.11196689E-01 * C3), F(1.33264415E-01 * C3), - -F(1.23264548E-01 * C4), F(0.00000000E+00 * C4), - F(1.45389847E-01 * C5), -F(8.29847578E-02 * C5), - F(1.40753505E-01 * C6), -F(9.75753918E-02 * C6), - F(1.33264415E-01 * C7), -F(1.11196689E-01 * C7), - -F(6.79989431E-02 * C0), F(1.29371806E-02 * C0), - -F(5.31873032E-02 * C1), F(8.85757540E-03 * C1), - -F(3.90751381E-02 * C2), F(2.92408442E-03 * C2), - -F(2.61098752E-02 * C3), -F(4.91578024E-03 * C3), - F(1.46404076E-02 * C4), F(0.00000000E+00 * C4), - F(1.53184106E-02 * C5), -F(8.02941163E-03 * C5), - F(1.62208471E-02 * C6), -F(1.04584443E-02 * C6), - F(1.59045603E-02 * C7), -F(1.27472335E-02 * C7), - -F(5.65949473E-03 * C0), F(2.01182542E-03 * C0), - -F(3.49717454E-03 * C1), F(2.10371989E-03 * C1), - -F(1.64973098E-03 * C2), F(1.99454554E-03 * C2), - -F(1.78805361E-04 * C3), F(1.61656283E-03 * C3), - -F(9.02154502E-04 * C4), F(0.00000000E+00 * C4), - F(1.78371725E-03 * C5), -F(1.56575398E-04 * C5), - F(1.47640169E-03 * C6), -F(3.43256425E-04 * C6), - F(1.13992507E-03 * C7), -F(5.54620202E-04 * C7), -#undef F -#define F(x) F_COS8(x) - F(0.7071067812 / C0), F(0.8314696123 / C1), - -F(0.7071067812 / C0), -F(0.1950903220 / C1), - -F(0.7071067812 / C0), -F(0.9807852804 / C1), - F(0.7071067812 / C0), -F(0.5555702330 / C1), - F(0.7071067812 / C0), F(0.5555702330 / C1), - -F(0.7071067812 / C0), F(0.9807852804 / C1), - -F(0.7071067812 / C0), F(0.1950903220 / C1), - F(0.7071067812 / C0), -F(0.8314696123 / C1), - F(0.9238795325 / C2), F(0.9807852804 / C3), - F(0.3826834324 / C2), F(0.8314696123 / C3), - -F(0.3826834324 / C2), F(0.5555702330 / C3), - -F(0.9238795325 / C2), F(0.1950903220 / C3), - -F(0.9238795325 / C2), -F(0.1950903220 / C3), - -F(0.3826834324 / C2), -F(0.5555702330 / C3), - F(0.3826834324 / C2), -F(0.8314696123 / C3), - F(0.9238795325 / C2), -F(0.9807852804 / C3), - -F(1.0000000000 / C4), F(0.5555702330 / C5), - -F(1.0000000000 / C4), -F(0.9807852804 / C5), - -F(1.0000000000 / C4), F(0.1950903220 / C5), - -F(1.0000000000 / C4), F(0.8314696123 / C5), - -F(1.0000000000 / C4), -F(0.8314696123 / C5), - -F(1.0000000000 / C4), -F(0.1950903220 / C5), - -F(1.0000000000 / C4), F(0.9807852804 / C5), - -F(1.0000000000 / C4), -F(0.5555702330 / C5), - F(0.3826834324 / C6), F(0.1950903220 / C7), - -F(0.9238795325 / C6), -F(0.5555702330 / C7), - F(0.9238795325 / C6), F(0.8314696123 / C7), - -F(0.3826834324 / C6), -F(0.9807852804 / C7), - -F(0.3826834324 / C6), F(0.9807852804 / C7), - F(0.9238795325 / C6), -F(0.8314696123 / C7), - -F(0.9238795325 / C6), F(0.5555702330 / C7), - F(0.3826834324 / C6), -F(0.1950903220 / C7), -#undef F - -#undef C0 -#undef C1 -#undef C2 -#undef C3 -#undef C4 -#undef C5 -#undef C6 -#undef C7 -}; - -static const FIXED_T SBC_ALIGNED analysis_consts_fixed8_simd_odd[80 + 64] = { -#define C0 2.5377944043 -#define C1 2.4270044280 -#define C2 2.8015616024 -#define C3 3.1710363741 -#define C4 2.7906148894 -#define C5 2.4270044280 -#define C6 2.8015616024 -#define C7 3.1710363741 - -#define F(x) F_PROTO8(x) - F(0.00000000E+00 * C0), -F(8.23919506E-04 * C0), - F(1.56575398E-04 * C1), F(1.78371725E-03 * C1), - F(3.43256425E-04 * C2), F(1.47640169E-03 * C2), - F(5.54620202E-04 * C3), F(1.13992507E-03 * C3), - F(2.01182542E-03 * C4), F(5.65949473E-03 * C4), - F(2.10371989E-03 * C5), F(3.49717454E-03 * C5), - F(1.99454554E-03 * C6), F(1.64973098E-03 * C6), - F(1.61656283E-03 * C7), F(1.78805361E-04 * C7), - F(0.00000000E+00 * C0), -F(1.46525263E-02 * C0), - F(8.02941163E-03 * C1), F(1.53184106E-02 * C1), - F(1.04584443E-02 * C2), F(1.62208471E-02 * C2), - F(1.27472335E-02 * C3), F(1.59045603E-02 * C3), - F(1.29371806E-02 * C4), F(6.79989431E-02 * C4), - F(8.85757540E-03 * C5), F(5.31873032E-02 * C5), - F(2.92408442E-03 * C6), F(3.90751381E-02 * C6), - -F(4.91578024E-03 * C7), F(2.61098752E-02 * C7), - F(0.00000000E+00 * C0), -F(1.23264548E-01 * C0), - F(8.29847578E-02 * C1), F(1.45389847E-01 * C1), - F(9.75753918E-02 * C2), F(1.40753505E-01 * C2), - F(1.11196689E-01 * C3), F(1.33264415E-01 * C3), - F(1.46955068E-01 * C4), -F(6.79989431E-02 * C4), - F(1.45389847E-01 * C5), -F(8.29847578E-02 * C5), - F(1.40753505E-01 * C6), -F(9.75753918E-02 * C6), - F(1.33264415E-01 * C7), -F(1.11196689E-01 * C7), - F(0.00000000E+00 * C0), F(1.46404076E-02 * C0), - -F(5.31873032E-02 * C1), F(8.85757540E-03 * C1), - -F(3.90751381E-02 * C2), F(2.92408442E-03 * C2), - -F(2.61098752E-02 * C3), -F(4.91578024E-03 * C3), - F(1.29371806E-02 * C4), -F(5.65949473E-03 * C4), - F(1.53184106E-02 * C5), -F(8.02941163E-03 * C5), - F(1.62208471E-02 * C6), -F(1.04584443E-02 * C6), - F(1.59045603E-02 * C7), -F(1.27472335E-02 * C7), - F(0.00000000E+00 * C0), -F(9.02154502E-04 * C0), - -F(3.49717454E-03 * C1), F(2.10371989E-03 * C1), - -F(1.64973098E-03 * C2), F(1.99454554E-03 * C2), - -F(1.78805361E-04 * C3), F(1.61656283E-03 * C3), - F(2.01182542E-03 * C4), F(0.00000000E+00 * C4), - F(1.78371725E-03 * C5), -F(1.56575398E-04 * C5), - F(1.47640169E-03 * C6), -F(3.43256425E-04 * C6), - F(1.13992507E-03 * C7), -F(5.54620202E-04 * C7), -#undef F -#define F(x) F_COS8(x) - -F(1.0000000000 / C0), F(0.8314696123 / C1), - -F(1.0000000000 / C0), -F(0.1950903220 / C1), - -F(1.0000000000 / C0), -F(0.9807852804 / C1), - -F(1.0000000000 / C0), -F(0.5555702330 / C1), - -F(1.0000000000 / C0), F(0.5555702330 / C1), - -F(1.0000000000 / C0), F(0.9807852804 / C1), - -F(1.0000000000 / C0), F(0.1950903220 / C1), - -F(1.0000000000 / C0), -F(0.8314696123 / C1), - F(0.9238795325 / C2), F(0.9807852804 / C3), - F(0.3826834324 / C2), F(0.8314696123 / C3), - -F(0.3826834324 / C2), F(0.5555702330 / C3), - -F(0.9238795325 / C2), F(0.1950903220 / C3), - -F(0.9238795325 / C2), -F(0.1950903220 / C3), - -F(0.3826834324 / C2), -F(0.5555702330 / C3), - F(0.3826834324 / C2), -F(0.8314696123 / C3), - F(0.9238795325 / C2), -F(0.9807852804 / C3), - F(0.7071067812 / C4), F(0.5555702330 / C5), - -F(0.7071067812 / C4), -F(0.9807852804 / C5), - -F(0.7071067812 / C4), F(0.1950903220 / C5), - F(0.7071067812 / C4), F(0.8314696123 / C5), - F(0.7071067812 / C4), -F(0.8314696123 / C5), - -F(0.7071067812 / C4), -F(0.1950903220 / C5), - -F(0.7071067812 / C4), F(0.9807852804 / C5), - F(0.7071067812 / C4), -F(0.5555702330 / C5), - F(0.3826834324 / C6), F(0.1950903220 / C7), - -F(0.9238795325 / C6), -F(0.5555702330 / C7), - F(0.9238795325 / C6), F(0.8314696123 / C7), - -F(0.3826834324 / C6), -F(0.9807852804 / C7), - -F(0.3826834324 / C6), F(0.9807852804 / C7), - F(0.9238795325 / C6), -F(0.8314696123 / C7), - -F(0.9238795325 / C6), F(0.5555702330 / C7), - F(0.3826834324 / C6), -F(0.1950903220 / C7), -#undef F - -#undef C0 -#undef C1 -#undef C2 -#undef C3 -#undef C4 -#undef C5 -#undef C6 -#undef C7 -}; -- 1.7.11.2