Re: [PATCH] sbc: bitstream packing optimization for encoder.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Friday 12 December 2008 03:39:34 ext Marcel Holtmann wrote:
> Hi Siarhei,
>
> > Appears that bitstream packing is also one of the most CPU hungry
> > operations in SBC encoder, which did not get proper attention yet.
> > Could somebody review this patch and provide feedback/comments?
>
> again thanks for working on this. I am actually willing to include your
> patches quickly in one of the next releases to get more wider testing
> audience.

Well, after doing a bit more testing, appears that a bug slipped in:
+					PUT_BITS(audio_sample, bits[ch][sb]);

There should be the following line instead:
+					PUT_BITS(audio_sample & levels[ch][sb], bits[ch][sb]);

Updated patch is attached. Any additional testing/feedback is very much
welcome.

Best regards
Siarhei Siamashka
>From 0a3818bd86342c60cda2fa87c59ecd5f9d6d4591 Mon Sep 17 00:00:00 2001
From: Siarhei Siamashka <siarhei.siamashka@xxxxxxxxx>
Date: Thu, 11 Dec 2008 21:21:28 +0200
Subject: [PATCH] sbc: bitstream writing optimization for encoder.

SBC encoder performance improvement up to 1.5x for ARM11
and almost twice faster for Intel Core2 in some cases.

Signed-off-by: Siarhei Siamashka <siarhei.siamashka@xxxxxxxxx>
---
 sbc/sbc.c |   65 ++++++++++++++++++++++++++++++++++--------------------------
 1 files changed, 37 insertions(+), 28 deletions(-)

diff --git a/sbc/sbc.c b/sbc/sbc.c
index 7072673..c495a75 100644
--- a/sbc/sbc.c
+++ b/sbc/sbc.c
@@ -909,6 +909,29 @@ static int sbc_analyze_audio(struct sbc_encoder_state *state,
 	}
 }
 
+/* Supplementary bitstream writing macros for 'sbc_pack_frame' */
+
+#define PUT_BITS(v, n)\
+	{\
+		bits_cache = (v) | (bits_cache << (n));\
+		bits_count += (n);\
+		if (bits_count >= 16) {\
+			bits_count -= 8;\
+			*data_ptr++ = (uint8_t)(bits_cache >> bits_count);\
+			bits_count -= 8;\
+			*data_ptr++ = (uint8_t)(bits_cache >> bits_count);\
+		}\
+	} while (0);\
+
+#define FLUSH_BITS()\
+	while (bits_count >= 8) {\
+		bits_count -= 8;\
+		*data_ptr++ = (uint8_t)(bits_cache >> bits_count);\
+	}\
+	if (bits_count > 0) {\
+	    *data_ptr++ = (uint8_t)(bits_cache << (8 - bits_count));\
+	}\
+
 /*
  * Packs the SBC frame from frame into the memory at data. At most len
  * bytes will be used, should more memory be needed an appropriate
@@ -926,14 +949,18 @@ static int sbc_analyze_audio(struct sbc_encoder_state *state,
 
 static int sbc_pack_frame(uint8_t *data, struct sbc_frame *frame, size_t len)
 {
-	int produced;
+	/* Bitstream writer starts from the fourth byte */
+	uint8_t *data_ptr = data + 4;
+	uint32_t bits_cache = 0;
+	uint32_t bits_count = 0;
+
 	/* Will copy the header parts for CRC-8 calculation here */
 	uint8_t crc_header[11] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
 	int crc_pos = 0;
 
 	uint16_t audio_sample;
 
-	int ch, sb, blk, bit;	/* channel, subband, block and bit counters */
+	int ch, sb, blk;	/* channel, subband, block and bit counters */
 	int bits[2][8];		/* bits distribution */
 	int levels[2][8];	/* levels are derived from that */
 
@@ -973,8 +1000,6 @@ static int sbc_pack_frame(uint8_t *data, struct sbc_frame *frame, size_t len)
 
 	/* Can't fill in crc yet */
 
-	produced = 32;
-
 	crc_header[0] = data[1];
 	crc_header[1] = data[2];
 	crc_pos = 16;
@@ -999,6 +1024,7 @@ static int sbc_pack_frame(uint8_t *data, struct sbc_frame *frame, size_t len)
 		u_int32_t scalefactor_j[2];
 		uint8_t scale_factor_j[2];
 
+		uint8_t joint = 0;
 		frame->joint = 0;
 
 		for (sb = 0; sb < frame->subbands - 1; sb++) {
@@ -1031,6 +1057,7 @@ static int sbc_pack_frame(uint8_t *data, struct sbc_frame *frame, size_t len)
 			if ((scalefactor[0][sb] + scalefactor[1][sb]) >
 					(scalefactor_j[0] + scalefactor_j[1]) ) {
 				/* use joint stereo for this subband */
+				joint |= 1 << (frame->subbands - 1 - sb);
 				frame->joint |= 1 << sb;
 				frame->scale_factor[0][sb] = scale_factor_j[0];
 				frame->scale_factor[1][sb] = scale_factor_j[1];
@@ -1045,24 +1072,16 @@ static int sbc_pack_frame(uint8_t *data, struct sbc_frame *frame, size_t len)
 			}
 		}
 
-		data[4] = 0;
-		for (sb = 0; sb < frame->subbands - 1; sb++)
-			data[4] |= ((frame->joint >> sb) & 0x01) << (frame->subbands - 1 - sb);
-
-		crc_header[crc_pos >> 3] = data[4];
-
-		produced += frame->subbands;
+		PUT_BITS(joint, frame->subbands);
+		crc_header[crc_pos >> 3] = joint;
 		crc_pos += frame->subbands;
 	}
 
 	for (ch = 0; ch < frame->channels; ch++) {
 		for (sb = 0; sb < frame->subbands; sb++) {
-			data[produced >> 3] <<= 4;
+			PUT_BITS(frame->scale_factor[ch][sb] & 0x0F, 4);
 			crc_header[crc_pos >> 3] <<= 4;
-			data[produced >> 3] |= frame->scale_factor[ch][sb] & 0x0F;
 			crc_header[crc_pos >> 3] |= frame->scale_factor[ch][sb] & 0x0F;
-
-			produced += 4;
 			crc_pos += 4;
 		}
 	}
@@ -1088,25 +1107,15 @@ static int sbc_pack_frame(uint8_t *data, struct sbc_frame *frame, size_t len)
 						(uint16_t) ((((frame->sb_sample_f[blk][ch][sb]*levels[ch][sb]) >>
 									(frame->scale_factor[ch][sb] + 1)) +
 								levels[ch][sb]) >> 1);
-					audio_sample <<= 16 - bits[ch][sb];
-					for (bit = 0; bit < bits[ch][sb]; bit++) {
-						data[produced >> 3] <<= 1;
-						if (audio_sample & 0x8000)
-							data[produced >> 3] |= 0x1;
-						audio_sample <<= 1;
-						produced++;
-					}
+					PUT_BITS(audio_sample & levels[ch][sb], bits[ch][sb]);
 				}
 			}
 		}
 	}
 
-	/* align the last byte */
-	if (produced % 8) {
-		data[produced >> 3] <<= 8 - (produced % 8);
-	}
+	FLUSH_BITS();
 
-	return (produced + 7) >> 3;
+	return data_ptr - data;
 }
 
 struct sbc_priv {
-- 
1.5.6.5


[Index of Archives]     [Bluez Devel]     [Linux Wireless Networking]     [Linux Wireless Personal Area Networking]     [Linux ATH6KL]     [Linux USB Devel]     [Linux Media Drivers]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [Big List of Linux Books]

  Powered by Linux