[PATCH 1/4] crypto/arm64: ghash - reduce performance impact of NEON yield checks

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



As reported by Vakul, checking the TIF_NEED_RESCHED flag after every
iteration of the GHASH and AES-GCM core routines is having a considerable
performance impact on cores such as the Cortex-A53 with Crypto Extensions
implemented.

GHASH performance is down by 22% for large block sizes, and AES-GCM is
down by 16% for large block sizes and 128 bit keys. This appears to be
a result of the high performance of the crypto instructions on the one
hand (2.0 cycles per byte for GHASH, 3.0 cpb for AES-GCM), combined with
the relatively poor load/store performance of this simple core.

So let's reduce this performance impact by only doing the yield check
once every 32 blocks for GHASH (or 4 when using the version based on
8-bit polynomial multiplication), and once every 16 blocks for AES-GCM.
This way, we recover most of the performance while still limiting the
duration of scheduling blackouts due to disabling preemption to ~1000
cycles.

Cc: Vakul Garg <vakul.garg@xxxxxxx>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@xxxxxxxxxx>
---
 arch/arm64/crypto/ghash-ce-core.S | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S
index dcffb9e77589..9c14beaabeee 100644
--- a/arch/arm64/crypto/ghash-ce-core.S
+++ b/arch/arm64/crypto/ghash-ce-core.S
@@ -212,7 +212,7 @@
 	ushr		XL.2d, XL.2d, #1
 	.endm
 
-	.macro		__pmull_ghash, pn
+	.macro		__pmull_ghash, pn, yield_count
 	frame_push	5
 
 	mov		x19, x0
@@ -259,6 +259,9 @@ CPU_LE(	rev64		T1.16b, T1.16b	)
 	eor		T2.16b, T2.16b, XH.16b
 	eor		XL.16b, XL.16b, T2.16b
 
+	tst		w19, #(\yield_count - 1)
+	b.ne		1b
+
 	cbz		w19, 3f
 
 	if_will_cond_yield_neon
@@ -279,11 +282,11 @@ CPU_LE(	rev64		T1.16b, T1.16b	)
 	 *			   struct ghash_key const *k, const char *head)
 	 */
 ENTRY(pmull_ghash_update_p64)
-	__pmull_ghash	p64
+	__pmull_ghash	p64, 32
 ENDPROC(pmull_ghash_update_p64)
 
 ENTRY(pmull_ghash_update_p8)
-	__pmull_ghash	p8
+	__pmull_ghash	p8, 4
 ENDPROC(pmull_ghash_update_p8)
 
 	KS		.req	v8
@@ -428,6 +431,9 @@ CPU_LE(	rev		x28, x28	)
 	st1		{INP.16b}, [x21], #16
 	.endif
 
+	tst		w19, #0xf			// do yield check only
+	b.ne		1b				// once every 16 blocks
+
 	cbz		w19, 3f
 
 	if_will_cond_yield_neon
-- 
2.11.0




[Index of Archives]     [Kernel]     [Gnu Classpath]     [Gnu Crypto]     [DM Crypt]     [Netfilter]     [Bugtraq]

  Powered by Linux