[PATCH] do not unroll big stuff in twofish key setup if OPTIMIZE_FOR_SIZE

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hello Herbert,

Currently twofish cipher key setup code
has unrolled loops - approximately 70-100
instructions are repeated 40 times.

As a result, twofish module is the biggest module
in crypto/*.

Attached patch conditionalize this unrolling on
CONFIG_CC_OPTIMIZE_FOR_SIZE. Presumably, people which
want to use -Os will also prefer to not have these loops
unrolled:

$ size */twofish_common.o
   text    data     bss     dec     hex filename
  37920       0       0   37920    9420 crypto.org/twofish_common.o
  13209       0       0   13209    3399 crypto/twofish_common.o

Run tested (modprobe tcrypt reports ok). Please apply.

Signed-off-by: Denys Vlasenko <vda.linux@xxxxxxxxxxxxxx>
--
vda
--- linux-2.6.23.crypto/crypto/twofish_common0.c	Sun Oct 21 18:30:14 2007
+++ linux-2.6.23.crypto/crypto/twofish_common.c	Sun Oct 21 18:17:36 2007
@@ -655,6 +655,23 @@
 			CALC_SB256_2( i, calc_sb_tbl[j], calc_sb_tbl[k] );
 		}
 
+		/* Unrolling produces x2.5 more code (+18k on i386),
+		 * and speeds up key setup by 7%:
+		 * unrolled: twofish_setkey/sec: 41128
+		 *     loop: twofish_setkey/sec: 38148
+		 * CALC_K256: ~100 insns each
+		 * CALC_K192: ~90 insns
+		 *    CALC_K: ~70 insns
+		 */
+#ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
+		/* Calculate whitening and round subkeys */
+		for ( i = 0; i < 8; i += 2 ) {
+			CALC_K256 (w, i, q0[i], q1[i], q0[i+1], q1[i+1]);
+		}
+		for ( i = 0; i < 32; i += 2 ) {
+			CALC_K256 (k, i, q0[i+8], q1[i+8], q0[i+9], q1[i+9]);
+		}
+#else
 		/* Calculate whitening and round subkeys.  The constants are
 		 * indices of subkeys, preprocessed through q0 and q1. */
 		CALC_K256 (w, 0, 0xA9, 0x75, 0x67, 0xF3);
@@ -677,12 +694,22 @@
 		CALC_K256 (k, 26, 0x8B, 0xAE, 0x30, 0x5B);
 		CALC_K256 (k, 28, 0x84, 0x8A, 0x54, 0x00);
 		CALC_K256 (k, 30, 0xDF, 0xBC, 0x23, 0x9D);
+#endif
 	} else if (key_len == 24) { /* 192-bit key */
 		/* Compute the S-boxes. */
 		for ( i = j = 0, k = 1; i < 256; i++, j += 2, k += 2 ) {
 		        CALC_SB192_2( i, calc_sb_tbl[j], calc_sb_tbl[k] );
 		}
 
+#ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
+		/* Calculate whitening and round subkeys */
+		for ( i = 0; i < 8; i += 2 ) {
+			CALC_K192 (w, i, q0[i], q1[i], q0[i+1], q1[i+1]);
+		}
+		for ( i = 0; i < 32; i += 2 ) {
+			CALC_K192 (k, i, q0[i+8], q1[i+8], q0[i+9], q1[i+9]);
+		}
+#else
 		/* Calculate whitening and round subkeys.  The constants are
 		 * indices of subkeys, preprocessed through q0 and q1. */
 		CALC_K192 (w, 0, 0xA9, 0x75, 0x67, 0xF3);
@@ -705,12 +732,22 @@
 		CALC_K192 (k, 26, 0x8B, 0xAE, 0x30, 0x5B);
 		CALC_K192 (k, 28, 0x84, 0x8A, 0x54, 0x00);
 		CALC_K192 (k, 30, 0xDF, 0xBC, 0x23, 0x9D);
+#endif
 	} else { /* 128-bit key */
 		/* Compute the S-boxes. */
 		for ( i = j = 0, k = 1; i < 256; i++, j += 2, k += 2 ) {
 			CALC_SB_2( i, calc_sb_tbl[j], calc_sb_tbl[k] );
 		}
 
+#ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
+		/* Calculate whitening and round subkeys */
+		for ( i = 0; i < 8; i += 2 ) {
+			CALC_K (w, i, q0[i], q1[i], q0[i+1], q1[i+1]);
+		}
+		for ( i = 0; i < 32; i += 2 ) {
+			CALC_K (k, i, q0[i+8], q1[i+8], q0[i+9], q1[i+9]);
+		}
+#else
 		/* Calculate whitening and round subkeys.  The constants are
 		 * indices of subkeys, preprocessed through q0 and q1. */
 		CALC_K (w, 0, 0xA9, 0x75, 0x67, 0xF3);
@@ -733,6 +770,7 @@
 		CALC_K (k, 26, 0x8B, 0xAE, 0x30, 0x5B);
 		CALC_K (k, 28, 0x84, 0x8A, 0x54, 0x00);
 		CALC_K (k, 30, 0xDF, 0xBC, 0x23, 0x9D);
+#endif
 	}
 
 	return 0;

[Index of Archives]     [Kernel]     [Gnu Classpath]     [Gnu Crypto]     [DM Crypt]     [Netfilter]     [Bugtraq]

  Powered by Linux