Hello, I found that floating point processing eliminates all the quality issues that were mentioned here. So I added a floating point mode to SBC, which can be enabled with an #ifdef in sbc_tables.h, along with a code for minimizing quantization errors. It reduces noise by several dB and greatly improves the reproduction of higher bands. There is room for further improvements, such as adaptive clipping prevention and high resolution input format, but with current code base, they are somewhat hard to implement. The attached patch may cause some trouble because of one part being rejected. I hope this will not prevent those who are interested, from applying it and testing. -- Sebastian Olter
diff -bc5 bluez-4.99-orig/sbc//sbc_primitives.c bluez-4.99/sbc//sbc_primitives.c *** bluez-4.99-orig/sbc//sbc_primitives.c 2010-11-20 21:25:14.000000000 +0100 --- bluez-4.99/sbc//sbc_primitives.c 2012-03-25 01:12:29.502123931 +0100 *************** *** 35,44 **** --- 35,47 ---- #include "sbc_primitives_mmx.h" #include "sbc_primitives_iwmmxt.h" #include "sbc_primitives_neon.h" #include "sbc_primitives_armv6.h" + #define ANTICLIP 0.8 + + /* * A reference C code of analysis filter with SIMD-friendly tables * reordering and code layout. This code can be used to develop platform * specific SIMD optimizations. Also it may be used as some kind of test * for compiler autovectorization capabilities (who knows, if the compiler *************** *** 59,73 **** static inline void sbc_analyze_four_simd(const int16_t *in, int32_t *out, const FIXED_T *consts) { FIXED_A t1[4]; FIXED_T t2[4]; ! int hop = 0; ! /* rounding coefficient */ t1[0] = t1[1] = t1[2] = t1[3] = (FIXED_A) 1 << (SBC_PROTO_FIXED4_SCALE - 1); /* low pass polyphase filter */ for (hop = 0; hop < 40; hop += 8) { t1[0] += (FIXED_A) in[hop] * consts[hop]; t1[0] += (FIXED_A) in[hop + 1] * consts[hop + 1]; --- 62,82 ---- static inline void sbc_analyze_four_simd(const int16_t *in, int32_t *out, const FIXED_T *consts) { FIXED_A t1[4]; FIXED_T t2[4]; ! int hop = 0, i = 0; ! #ifdef SBC_FLOAT ! #define SBC_PROTO_SCALE_FIXED4(x) x; ! /* rounding coefficient */ ! t1[0] = t1[1] = t1[2] = t1[3] = 1; ! #else ! #define SBC_PROTO_SCALE_FIXED4(x) x >> SBC_PROTO_FIXED4_SCALE /* rounding coefficient */ t1[0] = t1[1] = t1[2] = t1[3] = (FIXED_A) 1 << (SBC_PROTO_FIXED4_SCALE - 1); + #endif /* low pass polyphase filter */ for (hop = 0; hop < 40; hop += 8) { t1[0] += (FIXED_A) in[hop] * consts[hop]; t1[0] += (FIXED_A) in[hop + 1] * consts[hop + 1]; *************** *** 78,91 **** t1[3] += (FIXED_A) in[hop + 6] * consts[hop + 6]; t1[3] += (FIXED_A) in[hop + 7] * consts[hop + 7]; } /* scaling */ ! t2[0] = t1[0] >> SBC_PROTO_FIXED4_SCALE; ! t2[1] = t1[1] >> SBC_PROTO_FIXED4_SCALE; ! t2[2] = t1[2] >> SBC_PROTO_FIXED4_SCALE; ! t2[3] = t1[3] >> SBC_PROTO_FIXED4_SCALE; /* do the cos transform */ t1[0] = (FIXED_A) t2[0] * consts[40 + 0]; t1[0] += (FIXED_A) t2[1] * consts[40 + 1]; t1[1] = (FIXED_A) t2[0] * consts[40 + 2]; --- 87,100 ---- t1[3] += (FIXED_A) in[hop + 6] * consts[hop + 6]; t1[3] += (FIXED_A) in[hop + 7] * consts[hop + 7]; } /* scaling */ ! t2[0] = SBC_PROTO_SCALE_FIXED4(t1[0]); ! t2[1] = SBC_PROTO_SCALE_FIXED4(t1[1]); ! t2[2] = SBC_PROTO_SCALE_FIXED4(t1[2]); ! t2[3] = SBC_PROTO_SCALE_FIXED4(t1[3]); /* do the cos transform */ t1[0] = (FIXED_A) t2[0] * consts[40 + 0]; t1[0] += (FIXED_A) t2[1] * consts[40 + 1]; t1[1] = (FIXED_A) t2[0] * consts[40 + 2]; *************** *** 102,131 **** t1[2] += (FIXED_A) t2[2] * consts[40 + 12]; t1[2] += (FIXED_A) t2[3] * consts[40 + 13]; t1[3] += (FIXED_A) t2[2] * consts[40 + 14]; t1[3] += (FIXED_A) t2[3] * consts[40 + 15]; ! out[0] = t1[0] >> ! (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS); ! out[1] = t1[1] >> ! (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS); ! out[2] = t1[2] >> ! (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS); ! out[3] = t1[3] >> (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS); } static inline void sbc_analyze_eight_simd(const int16_t *in, int32_t *out, const FIXED_T *consts) { FIXED_A t1[8]; FIXED_T t2[8]; int i, hop; ! /* rounding coefficient */ t1[0] = t1[1] = t1[2] = t1[3] = t1[4] = t1[5] = t1[6] = t1[7] = (FIXED_A) 1 << (SBC_PROTO_FIXED8_SCALE-1); /* low pass polyphase filter */ for (hop = 0; hop < 80; hop += 16) { t1[0] += (FIXED_A) in[hop] * consts[hop]; t1[0] += (FIXED_A) in[hop + 1] * consts[hop + 1]; --- 111,154 ---- t1[2] += (FIXED_A) t2[2] * consts[40 + 12]; t1[2] += (FIXED_A) t2[3] * consts[40 + 13]; t1[3] += (FIXED_A) t2[2] * consts[40 + 14]; t1[3] += (FIXED_A) t2[3] * consts[40 + 15]; ! #ifdef SBC_FLOAT ! /* Tries to redistribute quantization errors so that exactly half of them ! * is positive. This reduces overall distortion, especially in high bands ! * and increases chances for clipping, hence the ANTICLIP coefficient. */ ! double accumError = 0; ! for (i = 0; i < 4; i++){ ! t1[i] *= (1 << SCALE_OUT_BITS) * ANTICLIP; ! accumError += t1[i] - (int32_t)t1[i]; ! } ! for (i = 0; i < 4; i++) ! out[i] = t1[i] + accumError / 4; ! #else ! for (i = 0; i < 4; i++) ! out[i] = t1[i] >> (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS); + #endif } static inline void sbc_analyze_eight_simd(const int16_t *in, int32_t *out, const FIXED_T *consts) { FIXED_A t1[8]; FIXED_T t2[8]; int i, hop; ! #ifdef SBC_FLOAT ! #define SBC_PROTO_SCALE_FIXED8(x) x; ! /* rounding coefficient */ ! t1[0] = t1[1] = t1[2] = t1[3] = t1[4] = t1[5] = t1[6] = t1[7] = 1; ! #else ! #define SBC_PROTO_SCALE_FIXED8(x) x >> SBC_PROTO_FIXED8_SCALE /* rounding coefficient */ t1[0] = t1[1] = t1[2] = t1[3] = t1[4] = t1[5] = t1[6] = t1[7] = (FIXED_A) 1 << (SBC_PROTO_FIXED8_SCALE-1); + #endif /* low pass polyphase filter */ for (hop = 0; hop < 80; hop += 16) { t1[0] += (FIXED_A) in[hop] * consts[hop]; t1[0] += (FIXED_A) in[hop + 1] * consts[hop + 1]; *************** *** 144,161 **** t1[7] += (FIXED_A) in[hop + 14] * consts[hop + 14]; t1[7] += (FIXED_A) in[hop + 15] * consts[hop + 15]; } /* scaling */ ! t2[0] = t1[0] >> SBC_PROTO_FIXED8_SCALE; ! t2[1] = t1[1] >> SBC_PROTO_FIXED8_SCALE; ! t2[2] = t1[2] >> SBC_PROTO_FIXED8_SCALE; ! t2[3] = t1[3] >> SBC_PROTO_FIXED8_SCALE; ! t2[4] = t1[4] >> SBC_PROTO_FIXED8_SCALE; ! t2[5] = t1[5] >> SBC_PROTO_FIXED8_SCALE; ! t2[6] = t1[6] >> SBC_PROTO_FIXED8_SCALE; ! t2[7] = t1[7] >> SBC_PROTO_FIXED8_SCALE; /* do the cos transform */ t1[0] = t1[1] = t1[2] = t1[3] = t1[4] = t1[5] = t1[6] = t1[7] = 0; --- 167,184 ---- t1[7] += (FIXED_A) in[hop + 14] * consts[hop + 14]; t1[7] += (FIXED_A) in[hop + 15] * consts[hop + 15]; } /* scaling */ ! t2[0] = SBC_PROTO_SCALE_FIXED8(t1[0]); ! t2[1] = SBC_PROTO_SCALE_FIXED8(t1[1]); ! t2[2] = SBC_PROTO_SCALE_FIXED8(t1[2]); ! t2[3] = SBC_PROTO_SCALE_FIXED8(t1[3]); ! t2[4] = SBC_PROTO_SCALE_FIXED8(t1[4]); ! t2[5] = SBC_PROTO_SCALE_FIXED8(t1[5]); ! t2[6] = SBC_PROTO_SCALE_FIXED8(t1[6]); ! t2[7] = SBC_PROTO_SCALE_FIXED8(t1[7]); /* do the cos transform */ t1[0] = t1[1] = t1[2] = t1[3] = t1[4] = t1[5] = t1[6] = t1[7] = 0; *************** *** 176,188 **** --- 199,227 ---- t1[6] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 13]; t1[7] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 14]; t1[7] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 15]; } + #ifdef SBC_FLOAT + /* Tries to redistribute quantization errors so that exactly half of them + * is positive. This reduces overall distortion, especially in high bands + * and increases chances for clipping, hence the ANTICLIP coefficient. */ + double accumError = 0; + for (i = 0; i < 8; i++){ + t1[i] *= (1 << SCALE_OUT_BITS) * ANTICLIP; + accumError += t1[i] - (int32_t)t1[i]; + } + for (i = 0; i < 8; i++) + out[i] = t1[i] + accumError / 8; + /* + out[i] = t1[i]; + */ + #else for (i = 0; i < 8; i++) out[i] = t1[i] >> (SBC_COS_TABLE_FIXED8_SCALE - SCALE_OUT_BITS); + #endif } static inline void sbc_analyze_4b_4s_simd(int16_t *x, int32_t *out, int out_stride) { diff -bc5 bluez-4.99-orig/sbc//sbc_tables.h bluez-4.99/sbc//sbc_tables.h *** bluez-4.99-orig/sbc//sbc_tables.h 2011-12-21 23:53:54.000000000 +0100 --- bluez-4.99/sbc//sbc_tables.h 2012-03-21 18:08:15.559689237 +0100 *************** *** 136,154 **** SN8(0xfb8e3130), SN8(0xf8275a10), SN8(0xfe70747c), SN8(0x06a6d988) } }; /* Uncomment the following line to enable high precision build of SBC encoder */ ! /* #define SBC_HIGH_PRECISION */ #ifdef SBC_HIGH_PRECISION #define FIXED_A int64_t /* data type for fixed point accumulator */ #define FIXED_T int32_t /* data type for fixed point constants */ #define SBC_FIXED_EXTRA_BITS 16 #else #define FIXED_A int32_t /* data type for fixed point accumulator */ #define FIXED_T int16_t /* data type for fixed point constants */ #define SBC_FIXED_EXTRA_BITS 0 #endif /* A2DP specification: Section 12.8 Tables * --- 136,175 ---- SN8(0xfb8e3130), SN8(0xf8275a10), SN8(0xfe70747c), SN8(0x06a6d988) } }; /* Uncomment the following line to enable high precision build of SBC encoder */ ! //#define SBC_HIGH_PRECISION ! #define SBC_FLOAT #ifdef SBC_HIGH_PRECISION #define FIXED_A int64_t /* data type for fixed point accumulator */ #define FIXED_T int32_t /* data type for fixed point constants */ + #define FIXED_T_BITS (sizeof(FIXED_T) * CHAR_BIT - 1) + #define F_PROTO4(x) (FIXED_A) ((x * 2) * (1LL << FIXED_T_BITS) + 0.5) + #define F_COS4(x) ((x) * (1LL << FIXED_T_BITS) + 0.5) + #define F_PROTO8(x) ((x * 2) * (1LL << FIXED_T_BITS) + 0.5) + #define F_COS8(x) ((x) * (1LL << FIXED_T_BITS) + 0.5) #define SBC_FIXED_EXTRA_BITS 16 + #elif defined(SBC_FLOAT) + #define SBC_HIGH_PRECISION // disable MMX + #define FIXED_A double /* data type for fixed point accumulator */ + #define FIXED_T float /* data type for fixed point constants */ + #define FIXED_T_BITS (sizeof(FIXED_T) * CHAR_BIT - 1) + #define F_PROTO4(x) (x) + #define F_COS4(x) (x) + #define F_PROTO8(x) (x) + #define F_COS8(x) (x) + #define SBC_FIXED_EXTRA_BITS 0 #else #define FIXED_A int32_t /* data type for fixed point accumulator */ #define FIXED_T int16_t /* data type for fixed point constants */ + #define FIXED_T_BITS (sizeof(FIXED_T) * CHAR_BIT - 1) + #define F_PROTO4(x) (FIXED_A) ((x * 2) * (1 << FIXED_T_BITS) + 0.5) + #define F_COS4(x) ((x) * (1 << FIXED_T_BITS) + 0.5) + #define F_PROTO8(x) ((x * 2) * (1 << FIXED_T_BITS) + 0.5) + #define F_COS8(x) ((x) * (1 << FIXED_T_BITS) + 0.5) #define SBC_FIXED_EXTRA_BITS 0 #endif /* A2DP specification: Section 12.8 Tables * *************** *** 156,191 **** * maximum which is possible without overflows) * * Note: in each block of 8 numbers sign was changed for elements 2 and 7 * in order to compensate the same change applied to cos_table_fixed_4 */ ! #define SBC_PROTO_FIXED4_SCALE \ ! ((sizeof(FIXED_T) * CHAR_BIT - 1) - SBC_FIXED_EXTRA_BITS + 1) ! #define F_PROTO4(x) (FIXED_A) ((x * 2) * \ ! ((FIXED_A) 1 << (sizeof(FIXED_T) * CHAR_BIT - 1)) + 0.5) #define F(x) F_PROTO4(x) static const FIXED_T _sbc_proto_fixed4[40] = { F(0.00000000E+00), F(5.36548976E-04), -F(1.49188357E-03), F(2.73370904E-03), F(3.83720193E-03), F(3.89205149E-03), F(1.86581691E-03), F(3.06012286E-03), - F(1.09137620E-02), F(2.04385087E-02), -F(2.88757392E-02), F(3.21939290E-02), F(2.58767811E-02), F(6.13245186E-03), -F(2.88217274E-02), F(7.76463494E-02), - F(1.35593274E-01), F(1.94987841E-01), -F(2.46636662E-01), F(2.81828203E-01), F(2.94315332E-01), F(2.81828203E-01), F(2.46636662E-01), -F(1.94987841E-01), - -F(1.35593274E-01), -F(7.76463494E-02), F(2.88217274E-02), F(6.13245186E-03), F(2.58767811E-02), F(3.21939290E-02), F(2.88757392E-02), -F(2.04385087E-02), - -F(1.09137620E-02), -F(3.06012286E-03), -F(1.86581691E-03), F(3.89205149E-03), F(3.83720193E-03), F(2.73370904E-03), F(1.49188357E-03), -F(5.36548976E-04), }; --- 177,205 ---- * maximum which is possible without overflows) * * Note: in each block of 8 numbers sign was changed for elements 2 and 7 * in order to compensate the same change applied to cos_table_fixed_4 */ ! #define SBC_PROTO_FIXED4_SCALE (FIXED_T_BITS - SBC_FIXED_EXTRA_BITS + 1) #define F(x) F_PROTO4(x) static const FIXED_T _sbc_proto_fixed4[40] = { F(0.00000000E+00), F(5.36548976E-04), -F(1.49188357E-03), F(2.73370904E-03), F(3.83720193E-03), F(3.89205149E-03), F(1.86581691E-03), F(3.06012286E-03), F(1.09137620E-02), F(2.04385087E-02), -F(2.88757392E-02), F(3.21939290E-02), F(2.58767811E-02), F(6.13245186E-03), -F(2.88217274E-02), F(7.76463494E-02), F(1.35593274E-01), F(1.94987841E-01), -F(2.46636662E-01), F(2.81828203E-01), F(2.94315332E-01), F(2.81828203E-01), F(2.46636662E-01), -F(1.94987841E-01), -F(1.35593274E-01), -F(7.76463494E-02), F(2.88217274E-02), F(6.13245186E-03), F(2.58767811E-02), F(3.21939290E-02), F(2.88757392E-02), -F(2.04385087E-02), -F(1.09137620E-02), -F(3.06012286E-03), -F(1.86581691E-03), F(3.89205149E-03), F(3.83720193E-03), F(2.73370904E-03), F(1.49188357E-03), -F(5.36548976E-04), }; *************** *** 206,219 **** * Change of sign for element 2 allows to replace constant 1.0 (not * representable in Q15 format) with -1.0 (fine with Q15). * Changed sign for element 7 allows to have more similar constants * and simplify subband filter function code. */ ! #define SBC_COS_TABLE_FIXED4_SCALE \ ! ((sizeof(FIXED_T) * CHAR_BIT - 1) + SBC_FIXED_EXTRA_BITS) ! #define F_COS4(x) (FIXED_A) ((x) * \ ! ((FIXED_A) 1 << (sizeof(FIXED_T) * CHAR_BIT - 1)) + 0.5) #define F(x) F_COS4(x) static const FIXED_T cos_table_fixed_4[32] = { F(0.7071067812), F(0.9238795325), -F(1.0000000000), F(0.9238795325), F(0.7071067812), F(0.3826834324), F(0.0000000000), F(0.3826834324), --- 220,230 ---- * Change of sign for element 2 allows to replace constant 1.0 (not * representable in Q15 format) with -1.0 (fine with Q15). * Changed sign for element 7 allows to have more similar constants * and simplify subband filter function code. */ ! #define SBC_COS_TABLE_FIXED4_SCALE (FIXED_T_BITS + SBC_FIXED_EXTRA_BITS) #define F(x) F_COS4(x) static const FIXED_T cos_table_fixed_4[32] = { F(0.7071067812), F(0.9238795325), -F(1.0000000000), F(0.9238795325), F(0.7071067812), F(0.3826834324), F(0.0000000000), F(0.3826834324), *************** *** 234,247 **** * maximum which is possible without overflows) * * Note: in each block of 16 numbers sign was changed for elements 4, 13, 14, 15 * in order to compensate the same change applied to cos_table_fixed_8 */ ! #define SBC_PROTO_FIXED8_SCALE \ ! ((sizeof(FIXED_T) * CHAR_BIT - 1) - SBC_FIXED_EXTRA_BITS + 1) ! #define F_PROTO8(x) (FIXED_A) ((x * 2) * \ ! ((FIXED_A) 1 << (sizeof(FIXED_T) * CHAR_BIT - 1)) + 0.5) #define F(x) F_PROTO8(x) static const FIXED_T _sbc_proto_fixed8[80] = { F(0.00000000E+00), F(1.56575398E-04), F(3.43256425E-04), F(5.54620202E-04), -F(8.23919506E-04), F(1.13992507E-03), --- 245,255 ---- * maximum which is possible without overflows) * * Note: in each block of 16 numbers sign was changed for elements 4, 13, 14, 15 * in order to compensate the same change applied to cos_table_fixed_8 */ ! #define SBC_PROTO_FIXED8_SCALE (FIXED_T_BITS - SBC_FIXED_EXTRA_BITS + 1) #define F(x) F_PROTO8(x) static const FIXED_T _sbc_proto_fixed8[80] = { F(0.00000000E+00), F(1.56575398E-04), F(3.43256425E-04), F(5.54620202E-04), -F(8.23919506E-04), F(1.13992507E-03), *************** *** 303,316 **** * Change of sign for element 4 allows to replace constant 1.0 (not * representable in Q15 format) with -1.0 (fine with Q15). * Changed signs for elements 13, 14, 15 allow to have more similar constants * and simplify subband filter function code. */ ! #define SBC_COS_TABLE_FIXED8_SCALE \ ! ((sizeof(FIXED_T) * CHAR_BIT - 1) + SBC_FIXED_EXTRA_BITS) ! #define F_COS8(x) (FIXED_A) ((x) * \ ! ((FIXED_A) 1 << (sizeof(FIXED_T) * CHAR_BIT - 1)) + 0.5) #define F(x) F_COS8(x) static const FIXED_T cos_table_fixed_8[128] = { F(0.7071067812), F(0.8314696123), F(0.9238795325), F(0.9807852804), -F(1.0000000000), F(0.9807852804), F(0.9238795325), F(0.8314696123), F(0.7071067812), F(0.5555702330), F(0.3826834324), F(0.1950903220), --- 311,321 ---- * Change of sign for element 4 allows to replace constant 1.0 (not * representable in Q15 format) with -1.0 (fine with Q15). * Changed signs for elements 13, 14, 15 allow to have more similar constants * and simplify subband filter function code. */ ! #define SBC_COS_TABLE_FIXED8_SCALE (FIXED_T_BITS + SBC_FIXED_EXTRA_BITS) #define F(x) F_COS8(x) static const FIXED_T cos_table_fixed_8[128] = { F(0.7071067812), F(0.8314696123), F(0.9238795325), F(0.9807852804), -F(1.0000000000), F(0.9807852804), F(0.9238795325), F(0.8314696123), F(0.7071067812), F(0.5555702330), F(0.3826834324), F(0.1950903220),