[PATCH] staging: skein: threefish_block: Use ror64

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Use the inline instead of direct code to improve readability
and shorten the code a little.

Done with perl:

$ perl -p -i -e 's/\((\w+) \>\> (\d+)\) \| \(\1 \<\< \(64 \- \2\)\)/ror64(\1, \2)/g' drivers/staging/skein/threefish_block.c

Signed-off-by: Joe Perches <joe@xxxxxxxxxxx>
---
 drivers/staging/skein/threefish_block.c | 2144 +++++++++++++++----------------
 1 file changed, 1072 insertions(+), 1072 deletions(-)

diff --git a/drivers/staging/skein/threefish_block.c b/drivers/staging/skein/threefish_block.c
index e19ac43..a95563f 100644
--- a/drivers/staging/skein/threefish_block.c
+++ b/drivers/staging/skein/threefish_block.c
@@ -512,622 +512,622 @@ void threefish_decrypt_256(struct threefish_key *key_ctx, u64 *input,
 	b2 -= k0 + t1;
 	b3 -= k1 + 18;
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 32) | (tmp << (64 - 32));
+	b3 = ror64(tmp, 32);
 	b0 -= b3;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 32) | (tmp << (64 - 32));
+	b1 = ror64(tmp, 32);
 	b2 -= b1;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 58) | (tmp << (64 - 58));
+	b1 = ror64(tmp, 58);
 	b0 -= b1;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 22) | (tmp << (64 - 22));
+	b3 = ror64(tmp, 22);
 	b2 -= b3;
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 46) | (tmp << (64 - 46));
+	b3 = ror64(tmp, 46);
 	b0 -= b3;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 12) | (tmp << (64 - 12));
+	b1 = ror64(tmp, 12);
 	b2 -= b1;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 25) | (tmp << (64 - 25));
+	b1 = ror64(tmp, 25);
 	b0 -= b1 + k2;
 	b1 -= k3 + t2;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 33) | (tmp << (64 - 33));
+	b3 = ror64(tmp, 33);
 	b2 -= b3 + k4 + t0;
 	b3 -= k0 + 17;
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 5) | (tmp << (64 - 5));
+	b3 = ror64(tmp, 5);
 	b0 -= b3;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 37) | (tmp << (64 - 37));
+	b1 = ror64(tmp, 37);
 	b2 -= b1;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 23) | (tmp << (64 - 23));
+	b1 = ror64(tmp, 23);
 	b0 -= b1;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 40) | (tmp << (64 - 40));
+	b3 = ror64(tmp, 40);
 	b2 -= b3;
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 52) | (tmp << (64 - 52));
+	b3 = ror64(tmp, 52);
 	b0 -= b3;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 57) | (tmp << (64 - 57));
+	b1 = ror64(tmp, 57);
 	b2 -= b1;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 14) | (tmp << (64 - 14));
+	b1 = ror64(tmp, 14);
 	b0 -= b1 + k1;
 	b1 -= k2 + t1;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 16) | (tmp << (64 - 16));
+	b3 = ror64(tmp, 16);
 	b2 -= b3 + k3 + t2;
 	b3 -= k4 + 16;
 
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 32) | (tmp << (64 - 32));
+	b3 = ror64(tmp, 32);
 	b0 -= b3;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 32) | (tmp << (64 - 32));
+	b1 = ror64(tmp, 32);
 	b2 -= b1;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 58) | (tmp << (64 - 58));
+	b1 = ror64(tmp, 58);
 	b0 -= b1;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 22) | (tmp << (64 - 22));
+	b3 = ror64(tmp, 22);
 	b2 -= b3;
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 46) | (tmp << (64 - 46));
+	b3 = ror64(tmp, 46);
 	b0 -= b3;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 12) | (tmp << (64 - 12));
+	b1 = ror64(tmp, 12);
 	b2 -= b1;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 25) | (tmp << (64 - 25));
+	b1 = ror64(tmp, 25);
 	b0 -= b1 + k0;
 	b1 -= k1 + t0;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 33) | (tmp << (64 - 33));
+	b3 = ror64(tmp, 33);
 	b2 -= b3 + k2 + t1;
 	b3 -= k3 + 15;
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 5) | (tmp << (64 - 5));
+	b3 = ror64(tmp, 5);
 	b0 -= b3;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 37) | (tmp << (64 - 37));
+	b1 = ror64(tmp, 37);
 	b2 -= b1;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 23) | (tmp << (64 - 23));
+	b1 = ror64(tmp, 23);
 	b0 -= b1;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 40) | (tmp << (64 - 40));
+	b3 = ror64(tmp, 40);
 	b2 -= b3;
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 52) | (tmp << (64 - 52));
+	b3 = ror64(tmp, 52);
 	b0 -= b3;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 57) | (tmp << (64 - 57));
+	b1 = ror64(tmp, 57);
 	b2 -= b1;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 14) | (tmp << (64 - 14));
+	b1 = ror64(tmp, 14);
 	b0 -= b1 + k4;
 	b1 -= k0 + t2;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 16) | (tmp << (64 - 16));
+	b3 = ror64(tmp, 16);
 	b2 -= b3 + k1 + t0;
 	b3 -= k2 + 14;
 
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 32) | (tmp << (64 - 32));
+	b3 = ror64(tmp, 32);
 	b0 -= b3;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 32) | (tmp << (64 - 32));
+	b1 = ror64(tmp, 32);
 	b2 -= b1;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 58) | (tmp << (64 - 58));
+	b1 = ror64(tmp, 58);
 	b0 -= b1;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 22) | (tmp << (64 - 22));
+	b3 = ror64(tmp, 22);
 	b2 -= b3;
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 46) | (tmp << (64 - 46));
+	b3 = ror64(tmp, 46);
 	b0 -= b3;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 12) | (tmp << (64 - 12));
+	b1 = ror64(tmp, 12);
 	b2 -= b1;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 25) | (tmp << (64 - 25));
+	b1 = ror64(tmp, 25);
 	b0 -= b1 + k3;
 	b1 -= k4 + t1;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 33) | (tmp << (64 - 33));
+	b3 = ror64(tmp, 33);
 	b2 -= b3 + k0 + t2;
 	b3 -= k1 + 13;
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 5) | (tmp << (64 - 5));
+	b3 = ror64(tmp, 5);
 	b0 -= b3;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 37) | (tmp << (64 - 37));
+	b1 = ror64(tmp, 37);
 	b2 -= b1;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 23) | (tmp << (64 - 23));
+	b1 = ror64(tmp, 23);
 	b0 -= b1;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 40) | (tmp << (64 - 40));
+	b3 = ror64(tmp, 40);
 	b2 -= b3;
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 52) | (tmp << (64 - 52));
+	b3 = ror64(tmp, 52);
 	b0 -= b3;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 57) | (tmp << (64 - 57));
+	b1 = ror64(tmp, 57);
 	b2 -= b1;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 14) | (tmp << (64 - 14));
+	b1 = ror64(tmp, 14);
 	b0 -= b1 + k2;
 	b1 -= k3 + t0;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 16) | (tmp << (64 - 16));
+	b3 = ror64(tmp, 16);
 	b2 -= b3 + k4 + t1;
 	b3 -= k0 + 12;
 
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 32) | (tmp << (64 - 32));
+	b3 = ror64(tmp, 32);
 	b0 -= b3;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 32) | (tmp << (64 - 32));
+	b1 = ror64(tmp, 32);
 	b2 -= b1;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 58) | (tmp << (64 - 58));
+	b1 = ror64(tmp, 58);
 	b0 -= b1;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 22) | (tmp << (64 - 22));
+	b3 = ror64(tmp, 22);
 	b2 -= b3;
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 46) | (tmp << (64 - 46));
+	b3 = ror64(tmp, 46);
 	b0 -= b3;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 12) | (tmp << (64 - 12));
+	b1 = ror64(tmp, 12);
 	b2 -= b1;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 25) | (tmp << (64 - 25));
+	b1 = ror64(tmp, 25);
 	b0 -= b1 + k1;
 	b1 -= k2 + t2;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 33) | (tmp << (64 - 33));
+	b3 = ror64(tmp, 33);
 	b2 -= b3 + k3 + t0;
 	b3 -= k4 + 11;
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 5) | (tmp << (64 - 5));
+	b3 = ror64(tmp, 5);
 	b0 -= b3;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 37) | (tmp << (64 - 37));
+	b1 = ror64(tmp, 37);
 	b2 -= b1;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 23) | (tmp << (64 - 23));
+	b1 = ror64(tmp, 23);
 	b0 -= b1;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 40) | (tmp << (64 - 40));
+	b3 = ror64(tmp, 40);
 	b2 -= b3;
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 52) | (tmp << (64 - 52));
+	b3 = ror64(tmp, 52);
 	b0 -= b3;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 57) | (tmp << (64 - 57));
+	b1 = ror64(tmp, 57);
 	b2 -= b1;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 14) | (tmp << (64 - 14));
+	b1 = ror64(tmp, 14);
 	b0 -= b1 + k0;
 	b1 -= k1 + t1;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 16) | (tmp << (64 - 16));
+	b3 = ror64(tmp, 16);
 	b2 -= b3 + k2 + t2;
 	b3 -= k3 + 10;
 
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 32) | (tmp << (64 - 32));
+	b3 = ror64(tmp, 32);
 	b0 -= b3;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 32) | (tmp << (64 - 32));
+	b1 = ror64(tmp, 32);
 	b2 -= b1;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 58) | (tmp << (64 - 58));
+	b1 = ror64(tmp, 58);
 	b0 -= b1;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 22) | (tmp << (64 - 22));
+	b3 = ror64(tmp, 22);
 	b2 -= b3;
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 46) | (tmp << (64 - 46));
+	b3 = ror64(tmp, 46);
 	b0 -= b3;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 12) | (tmp << (64 - 12));
+	b1 = ror64(tmp, 12);
 	b2 -= b1;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 25) | (tmp << (64 - 25));
+	b1 = ror64(tmp, 25);
 	b0 -= b1 + k4;
 	b1 -= k0 + t0;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 33) | (tmp << (64 - 33));
+	b3 = ror64(tmp, 33);
 	b2 -= b3 + k1 + t1;
 	b3 -= k2 + 9;
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 5) | (tmp << (64 - 5));
+	b3 = ror64(tmp, 5);
 	b0 -= b3;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 37) | (tmp << (64 - 37));
+	b1 = ror64(tmp, 37);
 	b2 -= b1;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 23) | (tmp << (64 - 23));
+	b1 = ror64(tmp, 23);
 	b0 -= b1;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 40) | (tmp << (64 - 40));
+	b3 = ror64(tmp, 40);
 	b2 -= b3;
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 52) | (tmp << (64 - 52));
+	b3 = ror64(tmp, 52);
 	b0 -= b3;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 57) | (tmp << (64 - 57));
+	b1 = ror64(tmp, 57);
 	b2 -= b1;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 14) | (tmp << (64 - 14));
+	b1 = ror64(tmp, 14);
 	b0 -= b1 + k3;
 	b1 -= k4 + t2;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 16) | (tmp << (64 - 16));
+	b3 = ror64(tmp, 16);
 	b2 -= b3 + k0 + t0;
 	b3 -= k1 + 8;
 
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 32) | (tmp << (64 - 32));
+	b3 = ror64(tmp, 32);
 	b0 -= b3;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 32) | (tmp << (64 - 32));
+	b1 = ror64(tmp, 32);
 	b2 -= b1;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 58) | (tmp << (64 - 58));
+	b1 = ror64(tmp, 58);
 	b0 -= b1;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 22) | (tmp << (64 - 22));
+	b3 = ror64(tmp, 22);
 	b2 -= b3;
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 46) | (tmp << (64 - 46));
+	b3 = ror64(tmp, 46);
 	b0 -= b3;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 12) | (tmp << (64 - 12));
+	b1 = ror64(tmp, 12);
 	b2 -= b1;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 25) | (tmp << (64 - 25));
+	b1 = ror64(tmp, 25);
 	b0 -= b1 + k2;
 	b1 -= k3 + t1;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 33) | (tmp << (64 - 33));
+	b3 = ror64(tmp, 33);
 	b2 -= b3 + k4 + t2;
 	b3 -= k0 + 7;
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 5) | (tmp << (64 - 5));
+	b3 = ror64(tmp, 5);
 	b0 -= b3;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 37) | (tmp << (64 - 37));
+	b1 = ror64(tmp, 37);
 	b2 -= b1;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 23) | (tmp << (64 - 23));
+	b1 = ror64(tmp, 23);
 	b0 -= b1;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 40) | (tmp << (64 - 40));
+	b3 = ror64(tmp, 40);
 	b2 -= b3;
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 52) | (tmp << (64 - 52));
+	b3 = ror64(tmp, 52);
 	b0 -= b3;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 57) | (tmp << (64 - 57));
+	b1 = ror64(tmp, 57);
 	b2 -= b1;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 14) | (tmp << (64 - 14));
+	b1 = ror64(tmp, 14);
 	b0 -= b1 + k1;
 	b1 -= k2 + t0;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 16) | (tmp << (64 - 16));
+	b3 = ror64(tmp, 16);
 	b2 -= b3 + k3 + t1;
 	b3 -= k4 + 6;
 
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 32) | (tmp << (64 - 32));
+	b3 = ror64(tmp, 32);
 	b0 -= b3;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 32) | (tmp << (64 - 32));
+	b1 = ror64(tmp, 32);
 	b2 -= b1;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 58) | (tmp << (64 - 58));
+	b1 = ror64(tmp, 58);
 	b0 -= b1;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 22) | (tmp << (64 - 22));
+	b3 = ror64(tmp, 22);
 	b2 -= b3;
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 46) | (tmp << (64 - 46));
+	b3 = ror64(tmp, 46);
 	b0 -= b3;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 12) | (tmp << (64 - 12));
+	b1 = ror64(tmp, 12);
 	b2 -= b1;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 25) | (tmp << (64 - 25));
+	b1 = ror64(tmp, 25);
 	b0 -= b1 + k0;
 	b1 -= k1 + t2;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 33) | (tmp << (64 - 33));
+	b3 = ror64(tmp, 33);
 	b2 -= b3 + k2 + t0;
 	b3 -= k3 + 5;
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 5) | (tmp << (64 - 5));
+	b3 = ror64(tmp, 5);
 	b0 -= b3;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 37) | (tmp << (64 - 37));
+	b1 = ror64(tmp, 37);
 	b2 -= b1;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 23) | (tmp << (64 - 23));
+	b1 = ror64(tmp, 23);
 	b0 -= b1;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 40) | (tmp << (64 - 40));
+	b3 = ror64(tmp, 40);
 	b2 -= b3;
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 52) | (tmp << (64 - 52));
+	b3 = ror64(tmp, 52);
 	b0 -= b3;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 57) | (tmp << (64 - 57));
+	b1 = ror64(tmp, 57);
 	b2 -= b1;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 14) | (tmp << (64 - 14));
+	b1 = ror64(tmp, 14);
 	b0 -= b1 + k4;
 	b1 -= k0 + t1;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 16) | (tmp << (64 - 16));
+	b3 = ror64(tmp, 16);
 	b2 -= b3 + k1 + t2;
 	b3 -= k2 + 4;
 
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 32) | (tmp << (64 - 32));
+	b3 = ror64(tmp, 32);
 	b0 -= b3;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 32) | (tmp << (64 - 32));
+	b1 = ror64(tmp, 32);
 	b2 -= b1;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 58) | (tmp << (64 - 58));
+	b1 = ror64(tmp, 58);
 	b0 -= b1;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 22) | (tmp << (64 - 22));
+	b3 = ror64(tmp, 22);
 	b2 -= b3;
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 46) | (tmp << (64 - 46));
+	b3 = ror64(tmp, 46);
 	b0 -= b3;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 12) | (tmp << (64 - 12));
+	b1 = ror64(tmp, 12);
 	b2 -= b1;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 25) | (tmp << (64 - 25));
+	b1 = ror64(tmp, 25);
 	b0 -= b1 + k3;
 	b1 -= k4 + t0;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 33) | (tmp << (64 - 33));
+	b3 = ror64(tmp, 33);
 	b2 -= b3 + k0 + t1;
 	b3 -= k1 + 3;
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 5) | (tmp << (64 - 5));
+	b3 = ror64(tmp, 5);
 	b0 -= b3;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 37) | (tmp << (64 - 37));
+	b1 = ror64(tmp, 37);
 	b2 -= b1;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 23) | (tmp << (64 - 23));
+	b1 = ror64(tmp, 23);
 	b0 -= b1;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 40) | (tmp << (64 - 40));
+	b3 = ror64(tmp, 40);
 	b2 -= b3;
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 52) | (tmp << (64 - 52));
+	b3 = ror64(tmp, 52);
 	b0 -= b3;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 57) | (tmp << (64 - 57));
+	b1 = ror64(tmp, 57);
 	b2 -= b1;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 14) | (tmp << (64 - 14));
+	b1 = ror64(tmp, 14);
 	b0 -= b1 + k2;
 	b1 -= k3 + t2;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 16) | (tmp << (64 - 16));
+	b3 = ror64(tmp, 16);
 	b2 -= b3 + k4 + t0;
 	b3 -= k0 + 2;
 
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 32) | (tmp << (64 - 32));
+	b3 = ror64(tmp, 32);
 	b0 -= b3;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 32) | (tmp << (64 - 32));
+	b1 = ror64(tmp, 32);
 	b2 -= b1;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 58) | (tmp << (64 - 58));
+	b1 = ror64(tmp, 58);
 	b0 -= b1;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 22) | (tmp << (64 - 22));
+	b3 = ror64(tmp, 22);
 	b2 -= b3;
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 46) | (tmp << (64 - 46));
+	b3 = ror64(tmp, 46);
 	b0 -= b3;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 12) | (tmp << (64 - 12));
+	b1 = ror64(tmp, 12);
 	b2 -= b1;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 25) | (tmp << (64 - 25));
+	b1 = ror64(tmp, 25);
 	b0 -= b1 + k1;
 	b1 -= k2 + t1;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 33) | (tmp << (64 - 33));
+	b3 = ror64(tmp, 33);
 	b2 -= b3 + k3 + t2;
 	b3 -= k4 + 1;
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 5) | (tmp << (64 - 5));
+	b3 = ror64(tmp, 5);
 	b0 -= b3;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 37) | (tmp << (64 - 37));
+	b1 = ror64(tmp, 37);
 	b2 -= b1;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 23) | (tmp << (64 - 23));
+	b1 = ror64(tmp, 23);
 	b0 -= b1;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 40) | (tmp << (64 - 40));
+	b3 = ror64(tmp, 40);
 	b2 -= b3;
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 52) | (tmp << (64 - 52));
+	b3 = ror64(tmp, 52);
 	b0 -= b3;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 57) | (tmp << (64 - 57));
+	b1 = ror64(tmp, 57);
 	b2 -= b1;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 14) | (tmp << (64 - 14));
+	b1 = ror64(tmp, 14);
 	b0 -= b1 + k0;
 	b1 -= k1 + t0;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 16) | (tmp << (64 - 16));
+	b3 = ror64(tmp, 16);
 	b2 -= b3 + k2 + t1;
 	b3 -= k3;
 
@@ -2125,1226 +2125,1226 @@ void threefish_decrypt_512(struct threefish_key *key_ctx, u64 *input,
 	b7 -= k7 + 18;
 
 	tmp = b3 ^ b4;
-	b3 = (tmp >> 22) | (tmp << (64 - 22));
+	b3 = ror64(tmp, 22);
 	b4 -= b3;
 
 	tmp = b5 ^ b2;
-	b5 = (tmp >> 56) | (tmp << (64 - 56));
+	b5 = ror64(tmp, 56);
 	b2 -= b5;
 
 	tmp = b7 ^ b0;
-	b7 = (tmp >> 35) | (tmp << (64 - 35));
+	b7 = ror64(tmp, 35);
 	b0 -= b7;
 
 	tmp = b1 ^ b6;
-	b1 = (tmp >> 8) | (tmp << (64 - 8));
+	b1 = ror64(tmp, 8);
 	b6 -= b1;
 
 	tmp = b7 ^ b2;
-	b7 = (tmp >> 43) | (tmp << (64 - 43));
+	b7 = ror64(tmp, 43);
 	b2 -= b7;
 
 	tmp = b5 ^ b0;
-	b5 = (tmp >> 39) | (tmp << (64 - 39));
+	b5 = ror64(tmp, 39);
 	b0 -= b5;
 
 	tmp = b3 ^ b6;
-	b3 = (tmp >> 29) | (tmp << (64 - 29));
+	b3 = ror64(tmp, 29);
 	b6 -= b3;
 
 	tmp = b1 ^ b4;
-	b1 = (tmp >> 25) | (tmp << (64 - 25));
+	b1 = ror64(tmp, 25);
 	b4 -= b1;
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 17) | (tmp << (64 - 17));
+	b3 = ror64(tmp, 17);
 	b0 -= b3;
 
 	tmp = b5 ^ b6;
-	b5 = (tmp >> 10) | (tmp << (64 - 10));
+	b5 = ror64(tmp, 10);
 	b6 -= b5;
 
 	tmp = b7 ^ b4;
-	b7 = (tmp >> 50) | (tmp << (64 - 50));
+	b7 = ror64(tmp, 50);
 	b4 -= b7;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 13) | (tmp << (64 - 13));
+	b1 = ror64(tmp, 13);
 	b2 -= b1;
 
 	tmp = b7 ^ b6;
-	b7 = (tmp >> 24) | (tmp << (64 - 24));
+	b7 = ror64(tmp, 24);
 	b6 -= b7 + k5 + t0;
 	b7 -= k6 + 17;
 
 	tmp = b5 ^ b4;
-	b5 = (tmp >> 34) | (tmp << (64 - 34));
+	b5 = ror64(tmp, 34);
 	b4 -= b5 + k3;
 	b5 -= k4 + t2;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 30) | (tmp << (64 - 30));
+	b3 = ror64(tmp, 30);
 	b2 -= b3 + k1;
 	b3 -= k2;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 39) | (tmp << (64 - 39));
+	b1 = ror64(tmp, 39);
 	b0 -= b1 + k8;
 	b1 -= k0;
 
 	tmp = b3 ^ b4;
-	b3 = (tmp >> 56) | (tmp << (64 - 56));
+	b3 = ror64(tmp, 56);
 	b4 -= b3;
 
 	tmp = b5 ^ b2;
-	b5 = (tmp >> 54) | (tmp << (64 - 54));
+	b5 = ror64(tmp, 54);
 	b2 -= b5;
 
 	tmp = b7 ^ b0;
-	b7 = (tmp >> 9) | (tmp << (64 - 9));
+	b7 = ror64(tmp, 9);
 	b0 -= b7;
 
 	tmp = b1 ^ b6;
-	b1 = (tmp >> 44) | (tmp << (64 - 44));
+	b1 = ror64(tmp, 44);
 	b6 -= b1;
 
 	tmp = b7 ^ b2;
-	b7 = (tmp >> 39) | (tmp << (64 - 39));
+	b7 = ror64(tmp, 39);
 	b2 -= b7;
 
 	tmp = b5 ^ b0;
-	b5 = (tmp >> 36) | (tmp << (64 - 36));
+	b5 = ror64(tmp, 36);
 	b0 -= b5;
 
 	tmp = b3 ^ b6;
-	b3 = (tmp >> 49) | (tmp << (64 - 49));
+	b3 = ror64(tmp, 49);
 	b6 -= b3;
 
 	tmp = b1 ^ b4;
-	b1 = (tmp >> 17) | (tmp << (64 - 17));
+	b1 = ror64(tmp, 17);
 	b4 -= b1;
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 42) | (tmp << (64 - 42));
+	b3 = ror64(tmp, 42);
 	b0 -= b3;
 
 	tmp = b5 ^ b6;
-	b5 = (tmp >> 14) | (tmp << (64 - 14));
+	b5 = ror64(tmp, 14);
 	b6 -= b5;
 
 	tmp = b7 ^ b4;
-	b7 = (tmp >> 27) | (tmp << (64 - 27));
+	b7 = ror64(tmp, 27);
 	b4 -= b7;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 33) | (tmp << (64 - 33));
+	b1 = ror64(tmp, 33);
 	b2 -= b1;
 
 	tmp = b7 ^ b6;
-	b7 = (tmp >> 37) | (tmp << (64 - 37));
+	b7 = ror64(tmp, 37);
 	b6 -= b7 + k4 + t2;
 	b7 -= k5 + 16;
 
 	tmp = b5 ^ b4;
-	b5 = (tmp >> 19) | (tmp << (64 - 19));
+	b5 = ror64(tmp, 19);
 	b4 -= b5 + k2;
 	b5 -= k3 + t1;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 36) | (tmp << (64 - 36));
+	b3 = ror64(tmp, 36);
 	b2 -= b3 + k0;
 	b3 -= k1;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 46) | (tmp << (64 - 46));
+	b1 = ror64(tmp, 46);
 	b0 -= b1 + k7;
 	b1 -= k8;
 
 	tmp = b3 ^ b4;
-	b3 = (tmp >> 22) | (tmp << (64 - 22));
+	b3 = ror64(tmp, 22);
 	b4 -= b3;
 
 	tmp = b5 ^ b2;
-	b5 = (tmp >> 56) | (tmp << (64 - 56));
+	b5 = ror64(tmp, 56);
 	b2 -= b5;
 
 	tmp = b7 ^ b0;
-	b7 = (tmp >> 35) | (tmp << (64 - 35));
+	b7 = ror64(tmp, 35);
 	b0 -= b7;
 
 	tmp = b1 ^ b6;
-	b1 = (tmp >> 8) | (tmp << (64 - 8));
+	b1 = ror64(tmp, 8);
 	b6 -= b1;
 
 	tmp = b7 ^ b2;
-	b7 = (tmp >> 43) | (tmp << (64 - 43));
+	b7 = ror64(tmp, 43);
 	b2 -= b7;
 
 	tmp = b5 ^ b0;
-	b5 = (tmp >> 39) | (tmp << (64 - 39));
+	b5 = ror64(tmp, 39);
 	b0 -= b5;
 
 	tmp = b3 ^ b6;
-	b3 = (tmp >> 29) | (tmp << (64 - 29));
+	b3 = ror64(tmp, 29);
 	b6 -= b3;
 
 	tmp = b1 ^ b4;
-	b1 = (tmp >> 25) | (tmp << (64 - 25));
+	b1 = ror64(tmp, 25);
 	b4 -= b1;
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 17) | (tmp << (64 - 17));
+	b3 = ror64(tmp, 17);
 	b0 -= b3;
 
 	tmp = b5 ^ b6;
-	b5 = (tmp >> 10) | (tmp << (64 - 10));
+	b5 = ror64(tmp, 10);
 	b6 -= b5;
 
 	tmp = b7 ^ b4;
-	b7 = (tmp >> 50) | (tmp << (64 - 50));
+	b7 = ror64(tmp, 50);
 	b4 -= b7;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 13) | (tmp << (64 - 13));
+	b1 = ror64(tmp, 13);
 	b2 -= b1;
 
 	tmp = b7 ^ b6;
-	b7 = (tmp >> 24) | (tmp << (64 - 24));
+	b7 = ror64(tmp, 24);
 	b6 -= b7 + k3 + t1;
 	b7 -= k4 + 15;
 
 	tmp = b5 ^ b4;
-	b5 = (tmp >> 34) | (tmp << (64 - 34));
+	b5 = ror64(tmp, 34);
 	b4 -= b5 + k1;
 	b5 -= k2 + t0;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 30) | (tmp << (64 - 30));
+	b3 = ror64(tmp, 30);
 	b2 -= b3 + k8;
 	b3 -= k0;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 39) | (tmp << (64 - 39));
+	b1 = ror64(tmp, 39);
 	b0 -= b1 + k6;
 	b1 -= k7;
 
 	tmp = b3 ^ b4;
-	b3 = (tmp >> 56) | (tmp << (64 - 56));
+	b3 = ror64(tmp, 56);
 	b4 -= b3;
 
 	tmp = b5 ^ b2;
-	b5 = (tmp >> 54) | (tmp << (64 - 54));
+	b5 = ror64(tmp, 54);
 	b2 -= b5;
 
 	tmp = b7 ^ b0;
-	b7 = (tmp >> 9) | (tmp << (64 - 9));
+	b7 = ror64(tmp, 9);
 	b0 -= b7;
 
 	tmp = b1 ^ b6;
-	b1 = (tmp >> 44) | (tmp << (64 - 44));
+	b1 = ror64(tmp, 44);
 	b6 -= b1;
 
 	tmp = b7 ^ b2;
-	b7 = (tmp >> 39) | (tmp << (64 - 39));
+	b7 = ror64(tmp, 39);
 	b2 -= b7;
 
 	tmp = b5 ^ b0;
-	b5 = (tmp >> 36) | (tmp << (64 - 36));
+	b5 = ror64(tmp, 36);
 	b0 -= b5;
 
 	tmp = b3 ^ b6;
-	b3 = (tmp >> 49) | (tmp << (64 - 49));
+	b3 = ror64(tmp, 49);
 	b6 -= b3;
 
 	tmp = b1 ^ b4;
-	b1 = (tmp >> 17) | (tmp << (64 - 17));
+	b1 = ror64(tmp, 17);
 	b4 -= b1;
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 42) | (tmp << (64 - 42));
+	b3 = ror64(tmp, 42);
 	b0 -= b3;
 
 	tmp = b5 ^ b6;
-	b5 = (tmp >> 14) | (tmp << (64 - 14));
+	b5 = ror64(tmp, 14);
 	b6 -= b5;
 
 	tmp = b7 ^ b4;
-	b7 = (tmp >> 27) | (tmp << (64 - 27));
+	b7 = ror64(tmp, 27);
 	b4 -= b7;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 33) | (tmp << (64 - 33));
+	b1 = ror64(tmp, 33);
 	b2 -= b1;
 
 	tmp = b7 ^ b6;
-	b7 = (tmp >> 37) | (tmp << (64 - 37));
+	b7 = ror64(tmp, 37);
 	b6 -= b7 + k2 + t0;
 	b7 -= k3 + 14;
 
 	tmp = b5 ^ b4;
-	b5 = (tmp >> 19) | (tmp << (64 - 19));
+	b5 = ror64(tmp, 19);
 	b4 -= b5 + k0;
 	b5 -= k1 + t2;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 36) | (tmp << (64 - 36));
+	b3 = ror64(tmp, 36);
 	b2 -= b3 + k7;
 	b3 -= k8;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 46) | (tmp << (64 - 46));
+	b1 = ror64(tmp, 46);
 	b0 -= b1 + k5;
 	b1 -= k6;
 
 	tmp = b3 ^ b4;
-	b3 = (tmp >> 22) | (tmp << (64 - 22));
+	b3 = ror64(tmp, 22);
 	b4 -= b3;
 
 	tmp = b5 ^ b2;
-	b5 = (tmp >> 56) | (tmp << (64 - 56));
+	b5 = ror64(tmp, 56);
 	b2 -= b5;
 
 	tmp = b7 ^ b0;
-	b7 = (tmp >> 35) | (tmp << (64 - 35));
+	b7 = ror64(tmp, 35);
 	b0 -= b7;
 
 	tmp = b1 ^ b6;
-	b1 = (tmp >> 8) | (tmp << (64 - 8));
+	b1 = ror64(tmp, 8);
 	b6 -= b1;
 
 	tmp = b7 ^ b2;
-	b7 = (tmp >> 43) | (tmp << (64 - 43));
+	b7 = ror64(tmp, 43);
 	b2 -= b7;
 
 	tmp = b5 ^ b0;
-	b5 = (tmp >> 39) | (tmp << (64 - 39));
+	b5 = ror64(tmp, 39);
 	b0 -= b5;
 
 	tmp = b3 ^ b6;
-	b3 = (tmp >> 29) | (tmp << (64 - 29));
+	b3 = ror64(tmp, 29);
 	b6 -= b3;
 
 	tmp = b1 ^ b4;
-	b1 = (tmp >> 25) | (tmp << (64 - 25));
+	b1 = ror64(tmp, 25);
 	b4 -= b1;
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 17) | (tmp << (64 - 17));
+	b3 = ror64(tmp, 17);
 	b0 -= b3;
 
 	tmp = b5 ^ b6;
-	b5 = (tmp >> 10) | (tmp << (64 - 10));
+	b5 = ror64(tmp, 10);
 	b6 -= b5;
 
 	tmp = b7 ^ b4;
-	b7 = (tmp >> 50) | (tmp << (64 - 50));
+	b7 = ror64(tmp, 50);
 	b4 -= b7;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 13) | (tmp << (64 - 13));
+	b1 = ror64(tmp, 13);
 	b2 -= b1;
 
 	tmp = b7 ^ b6;
-	b7 = (tmp >> 24) | (tmp << (64 - 24));
+	b7 = ror64(tmp, 24);
 	b6 -= b7 + k1 + t2;
 	b7 -= k2 + 13;
 
 	tmp = b5 ^ b4;
-	b5 = (tmp >> 34) | (tmp << (64 - 34));
+	b5 = ror64(tmp, 34);
 	b4 -= b5 + k8;
 	b5 -= k0 + t1;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 30) | (tmp << (64 - 30));
+	b3 = ror64(tmp, 30);
 	b2 -= b3 + k6;
 	b3 -= k7;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 39) | (tmp << (64 - 39));
+	b1 = ror64(tmp, 39);
 	b0 -= b1 + k4;
 	b1 -= k5;
 
 	tmp = b3 ^ b4;
-	b3 = (tmp >> 56) | (tmp << (64 - 56));
+	b3 = ror64(tmp, 56);
 	b4 -= b3;
 
 	tmp = b5 ^ b2;
-	b5 = (tmp >> 54) | (tmp << (64 - 54));
+	b5 = ror64(tmp, 54);
 	b2 -= b5;
 
 	tmp = b7 ^ b0;
-	b7 = (tmp >> 9) | (tmp << (64 - 9));
+	b7 = ror64(tmp, 9);
 	b0 -= b7;
 
 	tmp = b1 ^ b6;
-	b1 = (tmp >> 44) | (tmp << (64 - 44));
+	b1 = ror64(tmp, 44);
 	b6 -= b1;
 
 	tmp = b7 ^ b2;
-	b7 = (tmp >> 39) | (tmp << (64 - 39));
+	b7 = ror64(tmp, 39);
 	b2 -= b7;
 
 	tmp = b5 ^ b0;
-	b5 = (tmp >> 36) | (tmp << (64 - 36));
+	b5 = ror64(tmp, 36);
 	b0 -= b5;
 
 	tmp = b3 ^ b6;
-	b3 = (tmp >> 49) | (tmp << (64 - 49));
+	b3 = ror64(tmp, 49);
 	b6 -= b3;
 
 	tmp = b1 ^ b4;
-	b1 = (tmp >> 17) | (tmp << (64 - 17));
+	b1 = ror64(tmp, 17);
 	b4 -= b1;
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 42) | (tmp << (64 - 42));
+	b3 = ror64(tmp, 42);
 	b0 -= b3;
 
 	tmp = b5 ^ b6;
-	b5 = (tmp >> 14) | (tmp << (64 - 14));
+	b5 = ror64(tmp, 14);
 	b6 -= b5;
 
 	tmp = b7 ^ b4;
-	b7 = (tmp >> 27) | (tmp << (64 - 27));
+	b7 = ror64(tmp, 27);
 	b4 -= b7;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 33) | (tmp << (64 - 33));
+	b1 = ror64(tmp, 33);
 	b2 -= b1;
 
 	tmp = b7 ^ b6;
-	b7 = (tmp >> 37) | (tmp << (64 - 37));
+	b7 = ror64(tmp, 37);
 	b6 -= b7 + k0 + t1;
 	b7 -= k1 + 12;
 
 	tmp = b5 ^ b4;
-	b5 = (tmp >> 19) | (tmp << (64 - 19));
+	b5 = ror64(tmp, 19);
 	b4 -= b5 + k7;
 	b5 -= k8 + t0;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 36) | (tmp << (64 - 36));
+	b3 = ror64(tmp, 36);
 	b2 -= b3 + k5;
 	b3 -= k6;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 46) | (tmp << (64 - 46));
+	b1 = ror64(tmp, 46);
 	b0 -= b1 + k3;
 	b1 -= k4;
 
 	tmp = b3 ^ b4;
-	b3 = (tmp >> 22) | (tmp << (64 - 22));
+	b3 = ror64(tmp, 22);
 	b4 -= b3;
 
 	tmp = b5 ^ b2;
-	b5 = (tmp >> 56) | (tmp << (64 - 56));
+	b5 = ror64(tmp, 56);
 	b2 -= b5;
 
 	tmp = b7 ^ b0;
-	b7 = (tmp >> 35) | (tmp << (64 - 35));
+	b7 = ror64(tmp, 35);
 	b0 -= b7;
 
 	tmp = b1 ^ b6;
-	b1 = (tmp >> 8) | (tmp << (64 - 8));
+	b1 = ror64(tmp, 8);
 	b6 -= b1;
 
 	tmp = b7 ^ b2;
-	b7 = (tmp >> 43) | (tmp << (64 - 43));
+	b7 = ror64(tmp, 43);
 	b2 -= b7;
 
 	tmp = b5 ^ b0;
-	b5 = (tmp >> 39) | (tmp << (64 - 39));
+	b5 = ror64(tmp, 39);
 	b0 -= b5;
 
 	tmp = b3 ^ b6;
-	b3 = (tmp >> 29) | (tmp << (64 - 29));
+	b3 = ror64(tmp, 29);
 	b6 -= b3;
 
 	tmp = b1 ^ b4;
-	b1 = (tmp >> 25) | (tmp << (64 - 25));
+	b1 = ror64(tmp, 25);
 	b4 -= b1;
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 17) | (tmp << (64 - 17));
+	b3 = ror64(tmp, 17);
 	b0 -= b3;
 
 	tmp = b5 ^ b6;
-	b5 = (tmp >> 10) | (tmp << (64 - 10));
+	b5 = ror64(tmp, 10);
 	b6 -= b5;
 
 	tmp = b7 ^ b4;
-	b7 = (tmp >> 50) | (tmp << (64 - 50));
+	b7 = ror64(tmp, 50);
 	b4 -= b7;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 13) | (tmp << (64 - 13));
+	b1 = ror64(tmp, 13);
 	b2 -= b1;
 
 	tmp = b7 ^ b6;
-	b7 = (tmp >> 24) | (tmp << (64 - 24));
+	b7 = ror64(tmp, 24);
 	b6 -= b7 + k8 + t0;
 	b7 -= k0 + 11;
 
 	tmp = b5 ^ b4;
-	b5 = (tmp >> 34) | (tmp << (64 - 34));
+	b5 = ror64(tmp, 34);
 	b4 -= b5 + k6;
 	b5 -= k7 + t2;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 30) | (tmp << (64 - 30));
+	b3 = ror64(tmp, 30);
 	b2 -= b3 + k4;
 	b3 -= k5;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 39) | (tmp << (64 - 39));
+	b1 = ror64(tmp, 39);
 	b0 -= b1 + k2;
 	b1 -= k3;
 
 	tmp = b3 ^ b4;
-	b3 = (tmp >> 56) | (tmp << (64 - 56));
+	b3 = ror64(tmp, 56);
 	b4 -= b3;
 
 	tmp = b5 ^ b2;
-	b5 = (tmp >> 54) | (tmp << (64 - 54));
+	b5 = ror64(tmp, 54);
 	b2 -= b5;
 
 	tmp = b7 ^ b0;
-	b7 = (tmp >> 9) | (tmp << (64 - 9));
+	b7 = ror64(tmp, 9);
 	b0 -= b7;
 
 	tmp = b1 ^ b6;
-	b1 = (tmp >> 44) | (tmp << (64 - 44));
+	b1 = ror64(tmp, 44);
 	b6 -= b1;
 
 	tmp = b7 ^ b2;
-	b7 = (tmp >> 39) | (tmp << (64 - 39));
+	b7 = ror64(tmp, 39);
 	b2 -= b7;
 
 	tmp = b5 ^ b0;
-	b5 = (tmp >> 36) | (tmp << (64 - 36));
+	b5 = ror64(tmp, 36);
 	b0 -= b5;
 
 	tmp = b3 ^ b6;
-	b3 = (tmp >> 49) | (tmp << (64 - 49));
+	b3 = ror64(tmp, 49);
 	b6 -= b3;
 
 	tmp = b1 ^ b4;
-	b1 = (tmp >> 17) | (tmp << (64 - 17));
+	b1 = ror64(tmp, 17);
 	b4 -= b1;
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 42) | (tmp << (64 - 42));
+	b3 = ror64(tmp, 42);
 	b0 -= b3;
 
 	tmp = b5 ^ b6;
-	b5 = (tmp >> 14) | (tmp << (64 - 14));
+	b5 = ror64(tmp, 14);
 	b6 -= b5;
 
 	tmp = b7 ^ b4;
-	b7 = (tmp >> 27) | (tmp << (64 - 27));
+	b7 = ror64(tmp, 27);
 	b4 -= b7;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 33) | (tmp << (64 - 33));
+	b1 = ror64(tmp, 33);
 	b2 -= b1;
 
 	tmp = b7 ^ b6;
-	b7 = (tmp >> 37) | (tmp << (64 - 37));
+	b7 = ror64(tmp, 37);
 	b6 -= b7 + k7 + t2;
 	b7 -= k8 + 10;
 
 	tmp = b5 ^ b4;
-	b5 = (tmp >> 19) | (tmp << (64 - 19));
+	b5 = ror64(tmp, 19);
 	b4 -= b5 + k5;
 	b5 -= k6 + t1;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 36) | (tmp << (64 - 36));
+	b3 = ror64(tmp, 36);
 	b2 -= b3 + k3;
 	b3 -= k4;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 46) | (tmp << (64 - 46));
+	b1 = ror64(tmp, 46);
 	b0 -= b1 + k1;
 	b1 -= k2;
 
 	tmp = b3 ^ b4;
-	b3 = (tmp >> 22) | (tmp << (64 - 22));
+	b3 = ror64(tmp, 22);
 	b4 -= b3;
 
 	tmp = b5 ^ b2;
-	b5 = (tmp >> 56) | (tmp << (64 - 56));
+	b5 = ror64(tmp, 56);
 	b2 -= b5;
 
 	tmp = b7 ^ b0;
-	b7 = (tmp >> 35) | (tmp << (64 - 35));
+	b7 = ror64(tmp, 35);
 	b0 -= b7;
 
 	tmp = b1 ^ b6;
-	b1 = (tmp >> 8) | (tmp << (64 - 8));
+	b1 = ror64(tmp, 8);
 	b6 -= b1;
 
 	tmp = b7 ^ b2;
-	b7 = (tmp >> 43) | (tmp << (64 - 43));
+	b7 = ror64(tmp, 43);
 	b2 -= b7;
 
 	tmp = b5 ^ b0;
-	b5 = (tmp >> 39) | (tmp << (64 - 39));
+	b5 = ror64(tmp, 39);
 	b0 -= b5;
 
 	tmp = b3 ^ b6;
-	b3 = (tmp >> 29) | (tmp << (64 - 29));
+	b3 = ror64(tmp, 29);
 	b6 -= b3;
 
 	tmp = b1 ^ b4;
-	b1 = (tmp >> 25) | (tmp << (64 - 25));
+	b1 = ror64(tmp, 25);
 	b4 -= b1;
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 17) | (tmp << (64 - 17));
+	b3 = ror64(tmp, 17);
 	b0 -= b3;
 
 	tmp = b5 ^ b6;
-	b5 = (tmp >> 10) | (tmp << (64 - 10));
+	b5 = ror64(tmp, 10);
 	b6 -= b5;
 
 	tmp = b7 ^ b4;
-	b7 = (tmp >> 50) | (tmp << (64 - 50));
+	b7 = ror64(tmp, 50);
 	b4 -= b7;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 13) | (tmp << (64 - 13));
+	b1 = ror64(tmp, 13);
 	b2 -= b1;
 
 	tmp = b7 ^ b6;
-	b7 = (tmp >> 24) | (tmp << (64 - 24));
+	b7 = ror64(tmp, 24);
 	b6 -= b7 + k6 + t1;
 	b7 -= k7 + 9;
 
 	tmp = b5 ^ b4;
-	b5 = (tmp >> 34) | (tmp << (64 - 34));
+	b5 = ror64(tmp, 34);
 	b4 -= b5 + k4;
 	b5 -= k5 + t0;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 30) | (tmp << (64 - 30));
+	b3 = ror64(tmp, 30);
 	b2 -= b3 + k2;
 	b3 -= k3;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 39) | (tmp << (64 - 39));
+	b1 = ror64(tmp, 39);
 	b0 -= b1 + k0;
 	b1 -= k1;
 
 	tmp = b3 ^ b4;
-	b3 = (tmp >> 56) | (tmp << (64 - 56));
+	b3 = ror64(tmp, 56);
 	b4 -= b3;
 
 	tmp = b5 ^ b2;
-	b5 = (tmp >> 54) | (tmp << (64 - 54));
+	b5 = ror64(tmp, 54);
 	b2 -= b5;
 
 	tmp = b7 ^ b0;
-	b7 = (tmp >> 9) | (tmp << (64 - 9));
+	b7 = ror64(tmp, 9);
 	b0 -= b7;
 
 	tmp = b1 ^ b6;
-	b1 = (tmp >> 44) | (tmp << (64 - 44));
+	b1 = ror64(tmp, 44);
 	b6 -= b1;
 
 	tmp = b7 ^ b2;
-	b7 = (tmp >> 39) | (tmp << (64 - 39));
+	b7 = ror64(tmp, 39);
 	b2 -= b7;
 
 	tmp = b5 ^ b0;
-	b5 = (tmp >> 36) | (tmp << (64 - 36));
+	b5 = ror64(tmp, 36);
 	b0 -= b5;
 
 	tmp = b3 ^ b6;
-	b3 = (tmp >> 49) | (tmp << (64 - 49));
+	b3 = ror64(tmp, 49);
 	b6 -= b3;
 
 	tmp = b1 ^ b4;
-	b1 = (tmp >> 17) | (tmp << (64 - 17));
+	b1 = ror64(tmp, 17);
 	b4 -= b1;
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 42) | (tmp << (64 - 42));
+	b3 = ror64(tmp, 42);
 	b0 -= b3;
 
 	tmp = b5 ^ b6;
-	b5 = (tmp >> 14) | (tmp << (64 - 14));
+	b5 = ror64(tmp, 14);
 	b6 -= b5;
 
 	tmp = b7 ^ b4;
-	b7 = (tmp >> 27) | (tmp << (64 - 27));
+	b7 = ror64(tmp, 27);
 	b4 -= b7;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 33) | (tmp << (64 - 33));
+	b1 = ror64(tmp, 33);
 	b2 -= b1;
 
 	tmp = b7 ^ b6;
-	b7 = (tmp >> 37) | (tmp << (64 - 37));
+	b7 = ror64(tmp, 37);
 	b6 -= b7 + k5 + t0;
 	b7 -= k6 + 8;
 
 	tmp = b5 ^ b4;
-	b5 = (tmp >> 19) | (tmp << (64 - 19));
+	b5 = ror64(tmp, 19);
 	b4 -= b5 + k3;
 	b5 -= k4 + t2;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 36) | (tmp << (64 - 36));
+	b3 = ror64(tmp, 36);
 	b2 -= b3 + k1;
 	b3 -= k2;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 46) | (tmp << (64 - 46));
+	b1 = ror64(tmp, 46);
 	b0 -= b1 + k8;
 	b1 -= k0;
 
 	tmp = b3 ^ b4;
-	b3 = (tmp >> 22) | (tmp << (64 - 22));
+	b3 = ror64(tmp, 22);
 	b4 -= b3;
 
 	tmp = b5 ^ b2;
-	b5 = (tmp >> 56) | (tmp << (64 - 56));
+	b5 = ror64(tmp, 56);
 	b2 -= b5;
 
 	tmp = b7 ^ b0;
-	b7 = (tmp >> 35) | (tmp << (64 - 35));
+	b7 = ror64(tmp, 35);
 	b0 -= b7;
 
 	tmp = b1 ^ b6;
-	b1 = (tmp >> 8) | (tmp << (64 - 8));
+	b1 = ror64(tmp, 8);
 	b6 -= b1;
 
 	tmp = b7 ^ b2;
-	b7 = (tmp >> 43) | (tmp << (64 - 43));
+	b7 = ror64(tmp, 43);
 	b2 -= b7;
 
 	tmp = b5 ^ b0;
-	b5 = (tmp >> 39) | (tmp << (64 - 39));
+	b5 = ror64(tmp, 39);
 	b0 -= b5;
 
 	tmp = b3 ^ b6;
-	b3 = (tmp >> 29) | (tmp << (64 - 29));
+	b3 = ror64(tmp, 29);
 	b6 -= b3;
 
 	tmp = b1 ^ b4;
-	b1 = (tmp >> 25) | (tmp << (64 - 25));
+	b1 = ror64(tmp, 25);
 	b4 -= b1;
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 17) | (tmp << (64 - 17));
+	b3 = ror64(tmp, 17);
 	b0 -= b3;
 
 	tmp = b5 ^ b6;
-	b5 = (tmp >> 10) | (tmp << (64 - 10));
+	b5 = ror64(tmp, 10);
 	b6 -= b5;
 
 	tmp = b7 ^ b4;
-	b7 = (tmp >> 50) | (tmp << (64 - 50));
+	b7 = ror64(tmp, 50);
 	b4 -= b7;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 13) | (tmp << (64 - 13));
+	b1 = ror64(tmp, 13);
 	b2 -= b1;
 
 	tmp = b7 ^ b6;
-	b7 = (tmp >> 24) | (tmp << (64 - 24));
+	b7 = ror64(tmp, 24);
 	b6 -= b7 + k4 + t2;
 	b7 -= k5 + 7;
 
 	tmp = b5 ^ b4;
-	b5 = (tmp >> 34) | (tmp << (64 - 34));
+	b5 = ror64(tmp, 34);
 	b4 -= b5 + k2;
 	b5 -= k3 + t1;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 30) | (tmp << (64 - 30));
+	b3 = ror64(tmp, 30);
 	b2 -= b3 + k0;
 	b3 -= k1;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 39) | (tmp << (64 - 39));
+	b1 = ror64(tmp, 39);
 	b0 -= b1 + k7;
 	b1 -= k8;
 
 	tmp = b3 ^ b4;
-	b3 = (tmp >> 56) | (tmp << (64 - 56));
+	b3 = ror64(tmp, 56);
 	b4 -= b3;
 
 	tmp = b5 ^ b2;
-	b5 = (tmp >> 54) | (tmp << (64 - 54));
+	b5 = ror64(tmp, 54);
 	b2 -= b5;
 
 	tmp = b7 ^ b0;
-	b7 = (tmp >> 9) | (tmp << (64 - 9));
+	b7 = ror64(tmp, 9);
 	b0 -= b7;
 
 	tmp = b1 ^ b6;
-	b1 = (tmp >> 44) | (tmp << (64 - 44));
+	b1 = ror64(tmp, 44);
 	b6 -= b1;
 
 	tmp = b7 ^ b2;
-	b7 = (tmp >> 39) | (tmp << (64 - 39));
+	b7 = ror64(tmp, 39);
 	b2 -= b7;
 
 	tmp = b5 ^ b0;
-	b5 = (tmp >> 36) | (tmp << (64 - 36));
+	b5 = ror64(tmp, 36);
 	b0 -= b5;
 
 	tmp = b3 ^ b6;
-	b3 = (tmp >> 49) | (tmp << (64 - 49));
+	b3 = ror64(tmp, 49);
 	b6 -= b3;
 
 	tmp = b1 ^ b4;
-	b1 = (tmp >> 17) | (tmp << (64 - 17));
+	b1 = ror64(tmp, 17);
 	b4 -= b1;
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 42) | (tmp << (64 - 42));
+	b3 = ror64(tmp, 42);
 	b0 -= b3;
 
 	tmp = b5 ^ b6;
-	b5 = (tmp >> 14) | (tmp << (64 - 14));
+	b5 = ror64(tmp, 14);
 	b6 -= b5;
 
 	tmp = b7 ^ b4;
-	b7 = (tmp >> 27) | (tmp << (64 - 27));
+	b7 = ror64(tmp, 27);
 	b4 -= b7;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 33) | (tmp << (64 - 33));
+	b1 = ror64(tmp, 33);
 	b2 -= b1;
 
 	tmp = b7 ^ b6;
-	b7 = (tmp >> 37) | (tmp << (64 - 37));
+	b7 = ror64(tmp, 37);
 	b6 -= b7 + k3 + t1;
 	b7 -= k4 + 6;
 
 	tmp = b5 ^ b4;
-	b5 = (tmp >> 19) | (tmp << (64 - 19));
+	b5 = ror64(tmp, 19);
 	b4 -= b5 + k1;
 	b5 -= k2 + t0;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 36) | (tmp << (64 - 36));
+	b3 = ror64(tmp, 36);
 	b2 -= b3 + k8;
 	b3 -= k0;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 46) | (tmp << (64 - 46));
+	b1 = ror64(tmp, 46);
 	b0 -= b1 + k6;
 	b1 -= k7;
 
 	tmp = b3 ^ b4;
-	b3 = (tmp >> 22) | (tmp << (64 - 22));
+	b3 = ror64(tmp, 22);
 	b4 -= b3;
 
 	tmp = b5 ^ b2;
-	b5 = (tmp >> 56) | (tmp << (64 - 56));
+	b5 = ror64(tmp, 56);
 	b2 -= b5;
 
 	tmp = b7 ^ b0;
-	b7 = (tmp >> 35) | (tmp << (64 - 35));
+	b7 = ror64(tmp, 35);
 	b0 -= b7;
 
 	tmp = b1 ^ b6;
-	b1 = (tmp >> 8) | (tmp << (64 - 8));
+	b1 = ror64(tmp, 8);
 	b6 -= b1;
 
 	tmp = b7 ^ b2;
-	b7 = (tmp >> 43) | (tmp << (64 - 43));
+	b7 = ror64(tmp, 43);
 	b2 -= b7;
 
 	tmp = b5 ^ b0;
-	b5 = (tmp >> 39) | (tmp << (64 - 39));
+	b5 = ror64(tmp, 39);
 	b0 -= b5;
 
 	tmp = b3 ^ b6;
-	b3 = (tmp >> 29) | (tmp << (64 - 29));
+	b3 = ror64(tmp, 29);
 	b6 -= b3;
 
 	tmp = b1 ^ b4;
-	b1 = (tmp >> 25) | (tmp << (64 - 25));
+	b1 = ror64(tmp, 25);
 	b4 -= b1;
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 17) | (tmp << (64 - 17));
+	b3 = ror64(tmp, 17);
 	b0 -= b3;
 
 	tmp = b5 ^ b6;
-	b5 = (tmp >> 10) | (tmp << (64 - 10));
+	b5 = ror64(tmp, 10);
 	b6 -= b5;
 
 	tmp = b7 ^ b4;
-	b7 = (tmp >> 50) | (tmp << (64 - 50));
+	b7 = ror64(tmp, 50);
 	b4 -= b7;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 13) | (tmp << (64 - 13));
+	b1 = ror64(tmp, 13);
 	b2 -= b1;
 
 	tmp = b7 ^ b6;
-	b7 = (tmp >> 24) | (tmp << (64 - 24));
+	b7 = ror64(tmp, 24);
 	b6 -= b7 + k2 + t0;
 	b7 -= k3 + 5;
 
 	tmp = b5 ^ b4;
-	b5 = (tmp >> 34) | (tmp << (64 - 34));
+	b5 = ror64(tmp, 34);
 	b4 -= b5 + k0;
 	b5 -= k1 + t2;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 30) | (tmp << (64 - 30));
+	b3 = ror64(tmp, 30);
 	b2 -= b3 + k7;
 	b3 -= k8;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 39) | (tmp << (64 - 39));
+	b1 = ror64(tmp, 39);
 	b0 -= b1 + k5;
 	b1 -= k6;
 
 	tmp = b3 ^ b4;
-	b3 = (tmp >> 56) | (tmp << (64 - 56));
+	b3 = ror64(tmp, 56);
 	b4 -= b3;
 
 	tmp = b5 ^ b2;
-	b5 = (tmp >> 54) | (tmp << (64 - 54));
+	b5 = ror64(tmp, 54);
 	b2 -= b5;
 
 	tmp = b7 ^ b0;
-	b7 = (tmp >> 9) | (tmp << (64 - 9));
+	b7 = ror64(tmp, 9);
 	b0 -= b7;
 
 	tmp = b1 ^ b6;
-	b1 = (tmp >> 44) | (tmp << (64 - 44));
+	b1 = ror64(tmp, 44);
 	b6 -= b1;
 
 	tmp = b7 ^ b2;
-	b7 = (tmp >> 39) | (tmp << (64 - 39));
+	b7 = ror64(tmp, 39);
 	b2 -= b7;
 
 	tmp = b5 ^ b0;
-	b5 = (tmp >> 36) | (tmp << (64 - 36));
+	b5 = ror64(tmp, 36);
 	b0 -= b5;
 
 	tmp = b3 ^ b6;
-	b3 = (tmp >> 49) | (tmp << (64 - 49));
+	b3 = ror64(tmp, 49);
 	b6 -= b3;
 
 	tmp = b1 ^ b4;
-	b1 = (tmp >> 17) | (tmp << (64 - 17));
+	b1 = ror64(tmp, 17);
 	b4 -= b1;
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 42) | (tmp << (64 - 42));
+	b3 = ror64(tmp, 42);
 	b0 -= b3;
 
 	tmp = b5 ^ b6;
-	b5 = (tmp >> 14) | (tmp << (64 - 14));
+	b5 = ror64(tmp, 14);
 	b6 -= b5;
 
 	tmp = b7 ^ b4;
-	b7 = (tmp >> 27) | (tmp << (64 - 27));
+	b7 = ror64(tmp, 27);
 	b4 -= b7;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 33) | (tmp << (64 - 33));
+	b1 = ror64(tmp, 33);
 	b2 -= b1;
 
 	tmp = b7 ^ b6;
-	b7 = (tmp >> 37) | (tmp << (64 - 37));
+	b7 = ror64(tmp, 37);
 	b6 -= b7 + k1 + t2;
 	b7 -= k2 + 4;
 
 	tmp = b5 ^ b4;
-	b5 = (tmp >> 19) | (tmp << (64 - 19));
+	b5 = ror64(tmp, 19);
 	b4 -= b5 + k8;
 	b5 -= k0 + t1;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 36) | (tmp << (64 - 36));
+	b3 = ror64(tmp, 36);
 	b2 -= b3 + k6;
 	b3 -= k7;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 46) | (tmp << (64 - 46));
+	b1 = ror64(tmp, 46);
 	b0 -= b1 + k4;
 	b1 -= k5;
 
 	tmp = b3 ^ b4;
-	b3 = (tmp >> 22) | (tmp << (64 - 22));
+	b3 = ror64(tmp, 22);
 	b4 -= b3;
 
 	tmp = b5 ^ b2;
-	b5 = (tmp >> 56) | (tmp << (64 - 56));
+	b5 = ror64(tmp, 56);
 	b2 -= b5;
 
 	tmp = b7 ^ b0;
-	b7 = (tmp >> 35) | (tmp << (64 - 35));
+	b7 = ror64(tmp, 35);
 	b0 -= b7;
 
 	tmp = b1 ^ b6;
-	b1 = (tmp >> 8) | (tmp << (64 - 8));
+	b1 = ror64(tmp, 8);
 	b6 -= b1;
 
 	tmp = b7 ^ b2;
-	b7 = (tmp >> 43) | (tmp << (64 - 43));
+	b7 = ror64(tmp, 43);
 	b2 -= b7;
 
 	tmp = b5 ^ b0;
-	b5 = (tmp >> 39) | (tmp << (64 - 39));
+	b5 = ror64(tmp, 39);
 	b0 -= b5;
 
 	tmp = b3 ^ b6;
-	b3 = (tmp >> 29) | (tmp << (64 - 29));
+	b3 = ror64(tmp, 29);
 	b6 -= b3;
 
 	tmp = b1 ^ b4;
-	b1 = (tmp >> 25) | (tmp << (64 - 25));
+	b1 = ror64(tmp, 25);
 	b4 -= b1;
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 17) | (tmp << (64 - 17));
+	b3 = ror64(tmp, 17);
 	b0 -= b3;
 
 	tmp = b5 ^ b6;
-	b5 = (tmp >> 10) | (tmp << (64 - 10));
+	b5 = ror64(tmp, 10);
 	b6 -= b5;
 
 	tmp = b7 ^ b4;
-	b7 = (tmp >> 50) | (tmp << (64 - 50));
+	b7 = ror64(tmp, 50);
 	b4 -= b7;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 13) | (tmp << (64 - 13));
+	b1 = ror64(tmp, 13);
 	b2 -= b1;
 
 	tmp = b7 ^ b6;
-	b7 = (tmp >> 24) | (tmp << (64 - 24));
+	b7 = ror64(tmp, 24);
 	b6 -= b7 + k0 + t1;
 	b7 -= k1 + 3;
 
 	tmp = b5 ^ b4;
-	b5 = (tmp >> 34) | (tmp << (64 - 34));
+	b5 = ror64(tmp, 34);
 	b4 -= b5 + k7;
 	b5 -= k8 + t0;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 30) | (tmp << (64 - 30));
+	b3 = ror64(tmp, 30);
 	b2 -= b3 + k5;
 	b3 -= k6;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 39) | (tmp << (64 - 39));
+	b1 = ror64(tmp, 39);
 	b0 -= b1 + k3;
 	b1 -= k4;
 
 	tmp = b3 ^ b4;
-	b3 = (tmp >> 56) | (tmp << (64 - 56));
+	b3 = ror64(tmp, 56);
 	b4 -= b3;
 
 	tmp = b5 ^ b2;
-	b5 = (tmp >> 54) | (tmp << (64 - 54));
+	b5 = ror64(tmp, 54);
 	b2 -= b5;
 
 	tmp = b7 ^ b0;
-	b7 = (tmp >> 9) | (tmp << (64 - 9));
+	b7 = ror64(tmp, 9);
 	b0 -= b7;
 
 	tmp = b1 ^ b6;
-	b1 = (tmp >> 44) | (tmp << (64 - 44));
+	b1 = ror64(tmp, 44);
 	b6 -= b1;
 
 	tmp = b7 ^ b2;
-	b7 = (tmp >> 39) | (tmp << (64 - 39));
+	b7 = ror64(tmp, 39);
 	b2 -= b7;
 
 	tmp = b5 ^ b0;
-	b5 = (tmp >> 36) | (tmp << (64 - 36));
+	b5 = ror64(tmp, 36);
 	b0 -= b5;
 
 	tmp = b3 ^ b6;
-	b3 = (tmp >> 49) | (tmp << (64 - 49));
+	b3 = ror64(tmp, 49);
 	b6 -= b3;
 
 	tmp = b1 ^ b4;
-	b1 = (tmp >> 17) | (tmp << (64 - 17));
+	b1 = ror64(tmp, 17);
 	b4 -= b1;
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 42) | (tmp << (64 - 42));
+	b3 = ror64(tmp, 42);
 	b0 -= b3;
 
 	tmp = b5 ^ b6;
-	b5 = (tmp >> 14) | (tmp << (64 - 14));
+	b5 = ror64(tmp, 14);
 	b6 -= b5;
 
 	tmp = b7 ^ b4;
-	b7 = (tmp >> 27) | (tmp << (64 - 27));
+	b7 = ror64(tmp, 27);
 	b4 -= b7;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 33) | (tmp << (64 - 33));
+	b1 = ror64(tmp, 33);
 	b2 -= b1;
 
 	tmp = b7 ^ b6;
-	b7 = (tmp >> 37) | (tmp << (64 - 37));
+	b7 = ror64(tmp, 37);
 	b6 -= b7 + k8 + t0;
 	b7 -= k0 + 2;
 
 	tmp = b5 ^ b4;
-	b5 = (tmp >> 19) | (tmp << (64 - 19));
+	b5 = ror64(tmp, 19);
 	b4 -= b5 + k6;
 	b5 -= k7 + t2;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 36) | (tmp << (64 - 36));
+	b3 = ror64(tmp, 36);
 	b2 -= b3 + k4;
 	b3 -= k5;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 46) | (tmp << (64 - 46));
+	b1 = ror64(tmp, 46);
 	b0 -= b1 + k2;
 	b1 -= k3;
 
 	tmp = b3 ^ b4;
-	b3 = (tmp >> 22) | (tmp << (64 - 22));
+	b3 = ror64(tmp, 22);
 	b4 -= b3;
 
 	tmp = b5 ^ b2;
-	b5 = (tmp >> 56) | (tmp << (64 - 56));
+	b5 = ror64(tmp, 56);
 	b2 -= b5;
 
 	tmp = b7 ^ b0;
-	b7 = (tmp >> 35) | (tmp << (64 - 35));
+	b7 = ror64(tmp, 35);
 	b0 -= b7;
 
 	tmp = b1 ^ b6;
-	b1 = (tmp >> 8) | (tmp << (64 - 8));
+	b1 = ror64(tmp, 8);
 	b6 -= b1;
 
 	tmp = b7 ^ b2;
-	b7 = (tmp >> 43) | (tmp << (64 - 43));
+	b7 = ror64(tmp, 43);
 	b2 -= b7;
 
 	tmp = b5 ^ b0;
-	b5 = (tmp >> 39) | (tmp << (64 - 39));
+	b5 = ror64(tmp, 39);
 	b0 -= b5;
 
 	tmp = b3 ^ b6;
-	b3 = (tmp >> 29) | (tmp << (64 - 29));
+	b3 = ror64(tmp, 29);
 	b6 -= b3;
 
 	tmp = b1 ^ b4;
-	b1 = (tmp >> 25) | (tmp << (64 - 25));
+	b1 = ror64(tmp, 25);
 	b4 -= b1;
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 17) | (tmp << (64 - 17));
+	b3 = ror64(tmp, 17);
 	b0 -= b3;
 
 	tmp = b5 ^ b6;
-	b5 = (tmp >> 10) | (tmp << (64 - 10));
+	b5 = ror64(tmp, 10);
 	b6 -= b5;
 
 	tmp = b7 ^ b4;
-	b7 = (tmp >> 50) | (tmp << (64 - 50));
+	b7 = ror64(tmp, 50);
 	b4 -= b7;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 13) | (tmp << (64 - 13));
+	b1 = ror64(tmp, 13);
 	b2 -= b1;
 
 	tmp = b7 ^ b6;
-	b7 = (tmp >> 24) | (tmp << (64 - 24));
+	b7 = ror64(tmp, 24);
 	b6 -= b7 + k7 + t2;
 	b7 -= k8 + 1;
 
 	tmp = b5 ^ b4;
-	b5 = (tmp >> 34) | (tmp << (64 - 34));
+	b5 = ror64(tmp, 34);
 	b4 -= b5 + k5;
 	b5 -= k6 + t1;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 30) | (tmp << (64 - 30));
+	b3 = ror64(tmp, 30);
 	b2 -= b3 + k3;
 	b3 -= k4;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 39) | (tmp << (64 - 39));
+	b1 = ror64(tmp, 39);
 	b0 -= b1 + k1;
 	b1 -= k2;
 
 	tmp = b3 ^ b4;
-	b3 = (tmp >> 56) | (tmp << (64 - 56));
+	b3 = ror64(tmp, 56);
 	b4 -= b3;
 
 	tmp = b5 ^ b2;
-	b5 = (tmp >> 54) | (tmp << (64 - 54));
+	b5 = ror64(tmp, 54);
 	b2 -= b5;
 
 	tmp = b7 ^ b0;
-	b7 = (tmp >> 9) | (tmp << (64 - 9));
+	b7 = ror64(tmp, 9);
 	b0 -= b7;
 
 	tmp = b1 ^ b6;
-	b1 = (tmp >> 44) | (tmp << (64 - 44));
+	b1 = ror64(tmp, 44);
 	b6 -= b1;
 
 	tmp = b7 ^ b2;
-	b7 = (tmp >> 39) | (tmp << (64 - 39));
+	b7 = ror64(tmp, 39);
 	b2 -= b7;
 
 	tmp = b5 ^ b0;
-	b5 = (tmp >> 36) | (tmp << (64 - 36));
+	b5 = ror64(tmp, 36);
 	b0 -= b5;
 
 	tmp = b3 ^ b6;
-	b3 = (tmp >> 49) | (tmp << (64 - 49));
+	b3 = ror64(tmp, 49);
 	b6 -= b3;
 
 	tmp = b1 ^ b4;
-	b1 = (tmp >> 17) | (tmp << (64 - 17));
+	b1 = ror64(tmp, 17);
 	b4 -= b1;
 
 	tmp = b3 ^ b0;
-	b3 = (tmp >> 42) | (tmp << (64 - 42));
+	b3 = ror64(tmp, 42);
 	b0 -= b3;
 
 	tmp = b5 ^ b6;
-	b5 = (tmp >> 14) | (tmp << (64 - 14));
+	b5 = ror64(tmp, 14);
 	b6 -= b5;
 
 	tmp = b7 ^ b4;
-	b7 = (tmp >> 27) | (tmp << (64 - 27));
+	b7 = ror64(tmp, 27);
 	b4 -= b7;
 
 	tmp = b1 ^ b2;
-	b1 = (tmp >> 33) | (tmp << (64 - 33));
+	b1 = ror64(tmp, 33);
 	b2 -= b1;
 
 	tmp = b7 ^ b6;
-	b7 = (tmp >> 37) | (tmp << (64 - 37));
+	b7 = ror64(tmp, 37);
 	b6 -= b7 + k6 + t1;
 	b7 -= k7;
 
 	tmp = b5 ^ b4;
-	b5 = (tmp >> 19) | (tmp << (64 - 19));
+	b5 = ror64(tmp, 19);
 	b4 -= b5 + k4;
 	b5 -= k5 + t0;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 36) | (tmp << (64 - 36));
+	b3 = ror64(tmp, 36);
 	b2 -= b3 + k2;
 	b3 -= k3;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 46) | (tmp << (64 - 46));
+	b1 = ror64(tmp, 46);
 	b0 -= b1 + k0;
 	b1 -= k1;
 
@@ -5521,2722 +5521,2722 @@ void threefish_decrypt_1024(struct threefish_key *key_ctx, u64 *input,
 	b14 -= k0 + t0;
 	b15 -= k1 + 20;
 	tmp = b7 ^ b12;
-	b7 = (tmp >> 20) | (tmp << (64 - 20));
+	b7 = ror64(tmp, 20);
 	b12 -= b7;
 
 	tmp = b3 ^ b10;
-	b3 = (tmp >> 37) | (tmp << (64 - 37));
+	b3 = ror64(tmp, 37);
 	b10 -= b3;
 
 	tmp = b5 ^ b8;
-	b5 = (tmp >> 31) | (tmp << (64 - 31));
+	b5 = ror64(tmp, 31);
 	b8 -= b5;
 
 	tmp = b1 ^ b14;
-	b1 = (tmp >> 23) | (tmp << (64 - 23));
+	b1 = ror64(tmp, 23);
 	b14 -= b1;
 
 	tmp = b9 ^ b4;
-	b9 = (tmp >> 52) | (tmp << (64 - 52));
+	b9 = ror64(tmp, 52);
 	b4 -= b9;
 
 	tmp = b13 ^ b6;
-	b13 = (tmp >> 35) | (tmp << (64 - 35));
+	b13 = ror64(tmp, 35);
 	b6 -= b13;
 
 	tmp = b11 ^ b2;
-	b11 = (tmp >> 48) | (tmp << (64 - 48));
+	b11 = ror64(tmp, 48);
 	b2 -= b11;
 
 	tmp = b15 ^ b0;
-	b15 = (tmp >> 9) | (tmp << (64 - 9));
+	b15 = ror64(tmp, 9);
 	b0 -= b15;
 
 	tmp = b9 ^ b10;
-	b9 = (tmp >> 25) | (tmp << (64 - 25));
+	b9 = ror64(tmp, 25);
 	b10 -= b9;
 
 	tmp = b11 ^ b8;
-	b11 = (tmp >> 44) | (tmp << (64 - 44));
+	b11 = ror64(tmp, 44);
 	b8 -= b11;
 
 	tmp = b13 ^ b14;
-	b13 = (tmp >> 42) | (tmp << (64 - 42));
+	b13 = ror64(tmp, 42);
 	b14 -= b13;
 
 	tmp = b15 ^ b12;
-	b15 = (tmp >> 19) | (tmp << (64 - 19));
+	b15 = ror64(tmp, 19);
 	b12 -= b15;
 
 	tmp = b1 ^ b6;
-	b1 = (tmp >> 46) | (tmp << (64 - 46));
+	b1 = ror64(tmp, 46);
 	b6 -= b1;
 
 	tmp = b3 ^ b4;
-	b3 = (tmp >> 47) | (tmp << (64 - 47));
+	b3 = ror64(tmp, 47);
 	b4 -= b3;
 
 	tmp = b5 ^ b2;
-	b5 = (tmp >> 44) | (tmp << (64 - 44));
+	b5 = ror64(tmp, 44);
 	b2 -= b5;
 
 	tmp = b7 ^ b0;
-	b7 = (tmp >> 31) | (tmp << (64 - 31));
+	b7 = ror64(tmp, 31);
 	b0 -= b7;
 
 	tmp = b1 ^ b8;
-	b1 = (tmp >> 41) | (tmp << (64 - 41));
+	b1 = ror64(tmp, 41);
 	b8 -= b1;
 
 	tmp = b5 ^ b14;
-	b5 = (tmp >> 42) | (tmp << (64 - 42));
+	b5 = ror64(tmp, 42);
 	b14 -= b5;
 
 	tmp = b3 ^ b12;
-	b3 = (tmp >> 53) | (tmp << (64 - 53));
+	b3 = ror64(tmp, 53);
 	b12 -= b3;
 
 	tmp = b7 ^ b10;
-	b7 = (tmp >> 4) | (tmp << (64 - 4));
+	b7 = ror64(tmp, 4);
 	b10 -= b7;
 
 	tmp = b15 ^ b4;
-	b15 = (tmp >> 51) | (tmp << (64 - 51));
+	b15 = ror64(tmp, 51);
 	b4 -= b15;
 
 	tmp = b11 ^ b6;
-	b11 = (tmp >> 56) | (tmp << (64 - 56));
+	b11 = ror64(tmp, 56);
 	b6 -= b11;
 
 	tmp = b13 ^ b2;
-	b13 = (tmp >> 34) | (tmp << (64 - 34));
+	b13 = ror64(tmp, 34);
 	b2 -= b13;
 
 	tmp = b9 ^ b0;
-	b9 = (tmp >> 16) | (tmp << (64 - 16));
+	b9 = ror64(tmp, 16);
 	b0 -= b9;
 
 	tmp = b15 ^ b14;
-	b15 = (tmp >> 30) | (tmp << (64 - 30));
+	b15 = ror64(tmp, 30);
 	b14 -= b15 + k16 + t2;
 	b15 -= k0 + 19;
 
 	tmp = b13 ^ b12;
-	b13 = (tmp >> 44) | (tmp << (64 - 44));
+	b13 = ror64(tmp, 44);
 	b12 -= b13 + k14;
 	b13 -= k15 + t1;
 
 	tmp = b11 ^ b10;
-	b11 = (tmp >> 47) | (tmp << (64 - 47));
+	b11 = ror64(tmp, 47);
 	b10 -= b11 + k12;
 	b11 -= k13;
 
 	tmp = b9 ^ b8;
-	b9 = (tmp >> 12) | (tmp << (64 - 12));
+	b9 = ror64(tmp, 12);
 	b8 -= b9 + k10;
 	b9 -= k11;
 
 	tmp = b7 ^ b6;
-	b7 = (tmp >> 31) | (tmp << (64 - 31));
+	b7 = ror64(tmp, 31);
 	b6 -= b7 + k8;
 	b7 -= k9;
 
 	tmp = b5 ^ b4;
-	b5 = (tmp >> 37) | (tmp << (64 - 37));
+	b5 = ror64(tmp, 37);
 	b4 -= b5 + k6;
 	b5 -= k7;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 9) | (tmp << (64 - 9));
+	b3 = ror64(tmp, 9);
 	b2 -= b3 + k4;
 	b3 -= k5;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 41) | (tmp << (64 - 41));
+	b1 = ror64(tmp, 41);
 	b0 -= b1 + k2;
 	b1 -= k3;
 
 	tmp = b7 ^ b12;
-	b7 = (tmp >> 25) | (tmp << (64 - 25));
+	b7 = ror64(tmp, 25);
 	b12 -= b7;
 
 	tmp = b3 ^ b10;
-	b3 = (tmp >> 16) | (tmp << (64 - 16));
+	b3 = ror64(tmp, 16);
 	b10 -= b3;
 
 	tmp = b5 ^ b8;
-	b5 = (tmp >> 28) | (tmp << (64 - 28));
+	b5 = ror64(tmp, 28);
 	b8 -= b5;
 
 	tmp = b1 ^ b14;
-	b1 = (tmp >> 47) | (tmp << (64 - 47));
+	b1 = ror64(tmp, 47);
 	b14 -= b1;
 
 	tmp = b9 ^ b4;
-	b9 = (tmp >> 41) | (tmp << (64 - 41));
+	b9 = ror64(tmp, 41);
 	b4 -= b9;
 
 	tmp = b13 ^ b6;
-	b13 = (tmp >> 48) | (tmp << (64 - 48));
+	b13 = ror64(tmp, 48);
 	b6 -= b13;
 
 	tmp = b11 ^ b2;
-	b11 = (tmp >> 20) | (tmp << (64 - 20));
+	b11 = ror64(tmp, 20);
 	b2 -= b11;
 
 	tmp = b15 ^ b0;
-	b15 = (tmp >> 5) | (tmp << (64 - 5));
+	b15 = ror64(tmp, 5);
 	b0 -= b15;
 
 	tmp = b9 ^ b10;
-	b9 = (tmp >> 17) | (tmp << (64 - 17));
+	b9 = ror64(tmp, 17);
 	b10 -= b9;
 
 	tmp = b11 ^ b8;
-	b11 = (tmp >> 59) | (tmp << (64 - 59));
+	b11 = ror64(tmp, 59);
 	b8 -= b11;
 
 	tmp = b13 ^ b14;
-	b13 = (tmp >> 41) | (tmp << (64 - 41));
+	b13 = ror64(tmp, 41);
 	b14 -= b13;
 
 	tmp = b15 ^ b12;
-	b15 = (tmp >> 34) | (tmp << (64 - 34));
+	b15 = ror64(tmp, 34);
 	b12 -= b15;
 
 	tmp = b1 ^ b6;
-	b1 = (tmp >> 13) | (tmp << (64 - 13));
+	b1 = ror64(tmp, 13);
 	b6 -= b1;
 
 	tmp = b3 ^ b4;
-	b3 = (tmp >> 51) | (tmp << (64 - 51));
+	b3 = ror64(tmp, 51);
 	b4 -= b3;
 
 	tmp = b5 ^ b2;
-	b5 = (tmp >> 4) | (tmp << (64 - 4));
+	b5 = ror64(tmp, 4);
 	b2 -= b5;
 
 	tmp = b7 ^ b0;
-	b7 = (tmp >> 33) | (tmp << (64 - 33));
+	b7 = ror64(tmp, 33);
 	b0 -= b7;
 
 	tmp = b1 ^ b8;
-	b1 = (tmp >> 52) | (tmp << (64 - 52));
+	b1 = ror64(tmp, 52);
 	b8 -= b1;
 
 	tmp = b5 ^ b14;
-	b5 = (tmp >> 23) | (tmp << (64 - 23));
+	b5 = ror64(tmp, 23);
 	b14 -= b5;
 
 	tmp = b3 ^ b12;
-	b3 = (tmp >> 18) | (tmp << (64 - 18));
+	b3 = ror64(tmp, 18);
 	b12 -= b3;
 
 	tmp = b7 ^ b10;
-	b7 = (tmp >> 49) | (tmp << (64 - 49));
+	b7 = ror64(tmp, 49);
 	b10 -= b7;
 
 	tmp = b15 ^ b4;
-	b15 = (tmp >> 55) | (tmp << (64 - 55));
+	b15 = ror64(tmp, 55);
 	b4 -= b15;
 
 	tmp = b11 ^ b6;
-	b11 = (tmp >> 10) | (tmp << (64 - 10));
+	b11 = ror64(tmp, 10);
 	b6 -= b11;
 
 	tmp = b13 ^ b2;
-	b13 = (tmp >> 19) | (tmp << (64 - 19));
+	b13 = ror64(tmp, 19);
 	b2 -= b13;
 
 	tmp = b9 ^ b0;
-	b9 = (tmp >> 38) | (tmp << (64 - 38));
+	b9 = ror64(tmp, 38);
 	b0 -= b9;
 
 	tmp = b15 ^ b14;
-	b15 = (tmp >> 37) | (tmp << (64 - 37));
+	b15 = ror64(tmp, 37);
 	b14 -= b15 + k15 + t1;
 	b15 -= k16 + 18;
 
 	tmp = b13 ^ b12;
-	b13 = (tmp >> 22) | (tmp << (64 - 22));
+	b13 = ror64(tmp, 22);
 	b12 -= b13 + k13;
 	b13 -= k14 + t0;
 
 	tmp = b11 ^ b10;
-	b11 = (tmp >> 17) | (tmp << (64 - 17));
+	b11 = ror64(tmp, 17);
 	b10 -= b11 + k11;
 	b11 -= k12;
 
 	tmp = b9 ^ b8;
-	b9 = (tmp >> 8) | (tmp << (64 - 8));
+	b9 = ror64(tmp, 8);
 	b8 -= b9 + k9;
 	b9 -= k10;
 
 	tmp = b7 ^ b6;
-	b7 = (tmp >> 47) | (tmp << (64 - 47));
+	b7 = ror64(tmp, 47);
 	b6 -= b7 + k7;
 	b7 -= k8;
 
 	tmp = b5 ^ b4;
-	b5 = (tmp >> 8) | (tmp << (64 - 8));
+	b5 = ror64(tmp, 8);
 	b4 -= b5 + k5;
 	b5 -= k6;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 13) | (tmp << (64 - 13));
+	b3 = ror64(tmp, 13);
 	b2 -= b3 + k3;
 	b3 -= k4;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 24) | (tmp << (64 - 24));
+	b1 = ror64(tmp, 24);
 	b0 -= b1 + k1;
 	b1 -= k2;
 
 	tmp = b7 ^ b12;
-	b7 = (tmp >> 20) | (tmp << (64 - 20));
+	b7 = ror64(tmp, 20);
 	b12 -= b7;
 
 	tmp = b3 ^ b10;
-	b3 = (tmp >> 37) | (tmp << (64 - 37));
+	b3 = ror64(tmp, 37);
 	b10 -= b3;
 
 	tmp = b5 ^ b8;
-	b5 = (tmp >> 31) | (tmp << (64 - 31));
+	b5 = ror64(tmp, 31);
 	b8 -= b5;
 
 	tmp = b1 ^ b14;
-	b1 = (tmp >> 23) | (tmp << (64 - 23));
+	b1 = ror64(tmp, 23);
 	b14 -= b1;
 
 	tmp = b9 ^ b4;
-	b9 = (tmp >> 52) | (tmp << (64 - 52));
+	b9 = ror64(tmp, 52);
 	b4 -= b9;
 
 	tmp = b13 ^ b6;
-	b13 = (tmp >> 35) | (tmp << (64 - 35));
+	b13 = ror64(tmp, 35);
 	b6 -= b13;
 
 	tmp = b11 ^ b2;
-	b11 = (tmp >> 48) | (tmp << (64 - 48));
+	b11 = ror64(tmp, 48);
 	b2 -= b11;
 
 	tmp = b15 ^ b0;
-	b15 = (tmp >> 9) | (tmp << (64 - 9));
+	b15 = ror64(tmp, 9);
 	b0 -= b15;
 
 	tmp = b9 ^ b10;
-	b9 = (tmp >> 25) | (tmp << (64 - 25));
+	b9 = ror64(tmp, 25);
 	b10 -= b9;
 
 	tmp = b11 ^ b8;
-	b11 = (tmp >> 44) | (tmp << (64 - 44));
+	b11 = ror64(tmp, 44);
 	b8 -= b11;
 
 	tmp = b13 ^ b14;
-	b13 = (tmp >> 42) | (tmp << (64 - 42));
+	b13 = ror64(tmp, 42);
 	b14 -= b13;
 
 	tmp = b15 ^ b12;
-	b15 = (tmp >> 19) | (tmp << (64 - 19));
+	b15 = ror64(tmp, 19);
 	b12 -= b15;
 
 	tmp = b1 ^ b6;
-	b1 = (tmp >> 46) | (tmp << (64 - 46));
+	b1 = ror64(tmp, 46);
 	b6 -= b1;
 
 	tmp = b3 ^ b4;
-	b3 = (tmp >> 47) | (tmp << (64 - 47));
+	b3 = ror64(tmp, 47);
 	b4 -= b3;
 
 	tmp = b5 ^ b2;
-	b5 = (tmp >> 44) | (tmp << (64 - 44));
+	b5 = ror64(tmp, 44);
 	b2 -= b5;
 
 	tmp = b7 ^ b0;
-	b7 = (tmp >> 31) | (tmp << (64 - 31));
+	b7 = ror64(tmp, 31);
 	b0 -= b7;
 
 	tmp = b1 ^ b8;
-	b1 = (tmp >> 41) | (tmp << (64 - 41));
+	b1 = ror64(tmp, 41);
 	b8 -= b1;
 
 	tmp = b5 ^ b14;
-	b5 = (tmp >> 42) | (tmp << (64 - 42));
+	b5 = ror64(tmp, 42);
 	b14 -= b5;
 
 	tmp = b3 ^ b12;
-	b3 = (tmp >> 53) | (tmp << (64 - 53));
+	b3 = ror64(tmp, 53);
 	b12 -= b3;
 
 	tmp = b7 ^ b10;
-	b7 = (tmp >> 4) | (tmp << (64 - 4));
+	b7 = ror64(tmp, 4);
 	b10 -= b7;
 
 	tmp = b15 ^ b4;
-	b15 = (tmp >> 51) | (tmp << (64 - 51));
+	b15 = ror64(tmp, 51);
 	b4 -= b15;
 
 	tmp = b11 ^ b6;
-	b11 = (tmp >> 56) | (tmp << (64 - 56));
+	b11 = ror64(tmp, 56);
 	b6 -= b11;
 
 	tmp = b13 ^ b2;
-	b13 = (tmp >> 34) | (tmp << (64 - 34));
+	b13 = ror64(tmp, 34);
 	b2 -= b13;
 
 	tmp = b9 ^ b0;
-	b9 = (tmp >> 16) | (tmp << (64 - 16));
+	b9 = ror64(tmp, 16);
 	b0 -= b9;
 
 	tmp = b15 ^ b14;
-	b15 = (tmp >> 30) | (tmp << (64 - 30));
+	b15 = ror64(tmp, 30);
 	b14 -= b15 + k14 + t0;
 	b15 -= k15 + 17;
 
 	tmp = b13 ^ b12;
-	b13 = (tmp >> 44) | (tmp << (64 - 44));
+	b13 = ror64(tmp, 44);
 	b12 -= b13 + k12;
 	b13 -= k13 + t2;
 
 	tmp = b11 ^ b10;
-	b11 = (tmp >> 47) | (tmp << (64 - 47));
+	b11 = ror64(tmp, 47);
 	b10 -= b11 + k10;
 	b11 -= k11;
 
 	tmp = b9 ^ b8;
-	b9 = (tmp >> 12) | (tmp << (64 - 12));
+	b9 = ror64(tmp, 12);
 	b8 -= b9 + k8;
 	b9 -= k9;
 
 	tmp = b7 ^ b6;
-	b7 = (tmp >> 31) | (tmp << (64 - 31));
+	b7 = ror64(tmp, 31);
 	b6 -= b7 + k6;
 	b7 -= k7;
 
 	tmp = b5 ^ b4;
-	b5 = (tmp >> 37) | (tmp << (64 - 37));
+	b5 = ror64(tmp, 37);
 	b4 -= b5 + k4;
 	b5 -= k5;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 9) | (tmp << (64 - 9));
+	b3 = ror64(tmp, 9);
 	b2 -= b3 + k2;
 	b3 -= k3;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 41) | (tmp << (64 - 41));
+	b1 = ror64(tmp, 41);
 	b0 -= b1 + k0;
 	b1 -= k1;
 
 	tmp = b7 ^ b12;
-	b7 = (tmp >> 25) | (tmp << (64 - 25));
+	b7 = ror64(tmp, 25);
 	b12 -= b7;
 
 	tmp = b3 ^ b10;
-	b3 = (tmp >> 16) | (tmp << (64 - 16));
+	b3 = ror64(tmp, 16);
 	b10 -= b3;
 
 	tmp = b5 ^ b8;
-	b5 = (tmp >> 28) | (tmp << (64 - 28));
+	b5 = ror64(tmp, 28);
 	b8 -= b5;
 
 	tmp = b1 ^ b14;
-	b1 = (tmp >> 47) | (tmp << (64 - 47));
+	b1 = ror64(tmp, 47);
 	b14 -= b1;
 
 	tmp = b9 ^ b4;
-	b9 = (tmp >> 41) | (tmp << (64 - 41));
+	b9 = ror64(tmp, 41);
 	b4 -= b9;
 
 	tmp = b13 ^ b6;
-	b13 = (tmp >> 48) | (tmp << (64 - 48));
+	b13 = ror64(tmp, 48);
 	b6 -= b13;
 
 	tmp = b11 ^ b2;
-	b11 = (tmp >> 20) | (tmp << (64 - 20));
+	b11 = ror64(tmp, 20);
 	b2 -= b11;
 
 	tmp = b15 ^ b0;
-	b15 = (tmp >> 5) | (tmp << (64 - 5));
+	b15 = ror64(tmp, 5);
 	b0 -= b15;
 
 	tmp = b9 ^ b10;
-	b9 = (tmp >> 17) | (tmp << (64 - 17));
+	b9 = ror64(tmp, 17);
 	b10 -= b9;
 
 	tmp = b11 ^ b8;
-	b11 = (tmp >> 59) | (tmp << (64 - 59));
+	b11 = ror64(tmp, 59);
 	b8 -= b11;
 
 	tmp = b13 ^ b14;
-	b13 = (tmp >> 41) | (tmp << (64 - 41));
+	b13 = ror64(tmp, 41);
 	b14 -= b13;
 
 	tmp = b15 ^ b12;
-	b15 = (tmp >> 34) | (tmp << (64 - 34));
+	b15 = ror64(tmp, 34);
 	b12 -= b15;
 
 	tmp = b1 ^ b6;
-	b1 = (tmp >> 13) | (tmp << (64 - 13));
+	b1 = ror64(tmp, 13);
 	b6 -= b1;
 
 	tmp = b3 ^ b4;
-	b3 = (tmp >> 51) | (tmp << (64 - 51));
+	b3 = ror64(tmp, 51);
 	b4 -= b3;
 
 	tmp = b5 ^ b2;
-	b5 = (tmp >> 4) | (tmp << (64 - 4));
+	b5 = ror64(tmp, 4);
 	b2 -= b5;
 
 	tmp = b7 ^ b0;
-	b7 = (tmp >> 33) | (tmp << (64 - 33));
+	b7 = ror64(tmp, 33);
 	b0 -= b7;
 
 	tmp = b1 ^ b8;
-	b1 = (tmp >> 52) | (tmp << (64 - 52));
+	b1 = ror64(tmp, 52);
 	b8 -= b1;
 
 	tmp = b5 ^ b14;
-	b5 = (tmp >> 23) | (tmp << (64 - 23));
+	b5 = ror64(tmp, 23);
 	b14 -= b5;
 
 	tmp = b3 ^ b12;
-	b3 = (tmp >> 18) | (tmp << (64 - 18));
+	b3 = ror64(tmp, 18);
 	b12 -= b3;
 
 	tmp = b7 ^ b10;
-	b7 = (tmp >> 49) | (tmp << (64 - 49));
+	b7 = ror64(tmp, 49);
 	b10 -= b7;
 
 	tmp = b15 ^ b4;
-	b15 = (tmp >> 55) | (tmp << (64 - 55));
+	b15 = ror64(tmp, 55);
 	b4 -= b15;
 
 	tmp = b11 ^ b6;
-	b11 = (tmp >> 10) | (tmp << (64 - 10));
+	b11 = ror64(tmp, 10);
 	b6 -= b11;
 
 	tmp = b13 ^ b2;
-	b13 = (tmp >> 19) | (tmp << (64 - 19));
+	b13 = ror64(tmp, 19);
 	b2 -= b13;
 
 	tmp = b9 ^ b0;
-	b9 = (tmp >> 38) | (tmp << (64 - 38));
+	b9 = ror64(tmp, 38);
 	b0 -= b9;
 
 	tmp = b15 ^ b14;
-	b15 = (tmp >> 37) | (tmp << (64 - 37));
+	b15 = ror64(tmp, 37);
 	b14 -= b15 + k13 + t2;
 	b15 -= k14 + 16;
 
 	tmp = b13 ^ b12;
-	b13 = (tmp >> 22) | (tmp << (64 - 22));
+	b13 = ror64(tmp, 22);
 	b12 -= b13 + k11;
 	b13 -= k12 + t1;
 
 	tmp = b11 ^ b10;
-	b11 = (tmp >> 17) | (tmp << (64 - 17));
+	b11 = ror64(tmp, 17);
 	b10 -= b11 + k9;
 	b11 -= k10;
 
 	tmp = b9 ^ b8;
-	b9 = (tmp >> 8) | (tmp << (64 - 8));
+	b9 = ror64(tmp, 8);
 	b8 -= b9 + k7;
 	b9 -= k8;
 
 	tmp = b7 ^ b6;
-	b7 = (tmp >> 47) | (tmp << (64 - 47));
+	b7 = ror64(tmp, 47);
 	b6 -= b7 + k5;
 	b7 -= k6;
 
 	tmp = b5 ^ b4;
-	b5 = (tmp >> 8) | (tmp << (64 - 8));
+	b5 = ror64(tmp, 8);
 	b4 -= b5 + k3;
 	b5 -= k4;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 13) | (tmp << (64 - 13));
+	b3 = ror64(tmp, 13);
 	b2 -= b3 + k1;
 	b3 -= k2;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 24) | (tmp << (64 - 24));
+	b1 = ror64(tmp, 24);
 	b0 -= b1 + k16;
 	b1 -= k0;
 
 	tmp = b7 ^ b12;
-	b7 = (tmp >> 20) | (tmp << (64 - 20));
+	b7 = ror64(tmp, 20);
 	b12 -= b7;
 
 	tmp = b3 ^ b10;
-	b3 = (tmp >> 37) | (tmp << (64 - 37));
+	b3 = ror64(tmp, 37);
 	b10 -= b3;
 
 	tmp = b5 ^ b8;
-	b5 = (tmp >> 31) | (tmp << (64 - 31));
+	b5 = ror64(tmp, 31);
 	b8 -= b5;
 
 	tmp = b1 ^ b14;
-	b1 = (tmp >> 23) | (tmp << (64 - 23));
+	b1 = ror64(tmp, 23);
 	b14 -= b1;
 
 	tmp = b9 ^ b4;
-	b9 = (tmp >> 52) | (tmp << (64 - 52));
+	b9 = ror64(tmp, 52);
 	b4 -= b9;
 
 	tmp = b13 ^ b6;
-	b13 = (tmp >> 35) | (tmp << (64 - 35));
+	b13 = ror64(tmp, 35);
 	b6 -= b13;
 
 	tmp = b11 ^ b2;
-	b11 = (tmp >> 48) | (tmp << (64 - 48));
+	b11 = ror64(tmp, 48);
 	b2 -= b11;
 
 	tmp = b15 ^ b0;
-	b15 = (tmp >> 9) | (tmp << (64 - 9));
+	b15 = ror64(tmp, 9);
 	b0 -= b15;
 
 	tmp = b9 ^ b10;
-	b9 = (tmp >> 25) | (tmp << (64 - 25));
+	b9 = ror64(tmp, 25);
 	b10 -= b9;
 
 	tmp = b11 ^ b8;
-	b11 = (tmp >> 44) | (tmp << (64 - 44));
+	b11 = ror64(tmp, 44);
 	b8 -= b11;
 
 	tmp = b13 ^ b14;
-	b13 = (tmp >> 42) | (tmp << (64 - 42));
+	b13 = ror64(tmp, 42);
 	b14 -= b13;
 
 	tmp = b15 ^ b12;
-	b15 = (tmp >> 19) | (tmp << (64 - 19));
+	b15 = ror64(tmp, 19);
 	b12 -= b15;
 
 	tmp = b1 ^ b6;
-	b1 = (tmp >> 46) | (tmp << (64 - 46));
+	b1 = ror64(tmp, 46);
 	b6 -= b1;
 
 	tmp = b3 ^ b4;
-	b3 = (tmp >> 47) | (tmp << (64 - 47));
+	b3 = ror64(tmp, 47);
 	b4 -= b3;
 
 	tmp = b5 ^ b2;
-	b5 = (tmp >> 44) | (tmp << (64 - 44));
+	b5 = ror64(tmp, 44);
 	b2 -= b5;
 
 	tmp = b7 ^ b0;
-	b7 = (tmp >> 31) | (tmp << (64 - 31));
+	b7 = ror64(tmp, 31);
 	b0 -= b7;
 
 	tmp = b1 ^ b8;
-	b1 = (tmp >> 41) | (tmp << (64 - 41));
+	b1 = ror64(tmp, 41);
 	b8 -= b1;
 
 	tmp = b5 ^ b14;
-	b5 = (tmp >> 42) | (tmp << (64 - 42));
+	b5 = ror64(tmp, 42);
 	b14 -= b5;
 
 	tmp = b3 ^ b12;
-	b3 = (tmp >> 53) | (tmp << (64 - 53));
+	b3 = ror64(tmp, 53);
 	b12 -= b3;
 
 	tmp = b7 ^ b10;
-	b7 = (tmp >> 4) | (tmp << (64 - 4));
+	b7 = ror64(tmp, 4);
 	b10 -= b7;
 
 	tmp = b15 ^ b4;
-	b15 = (tmp >> 51) | (tmp << (64 - 51));
+	b15 = ror64(tmp, 51);
 	b4 -= b15;
 
 	tmp = b11 ^ b6;
-	b11 = (tmp >> 56) | (tmp << (64 - 56));
+	b11 = ror64(tmp, 56);
 	b6 -= b11;
 
 	tmp = b13 ^ b2;
-	b13 = (tmp >> 34) | (tmp << (64 - 34));
+	b13 = ror64(tmp, 34);
 	b2 -= b13;
 
 	tmp = b9 ^ b0;
-	b9 = (tmp >> 16) | (tmp << (64 - 16));
+	b9 = ror64(tmp, 16);
 	b0 -= b9;
 
 	tmp = b15 ^ b14;
-	b15 = (tmp >> 30) | (tmp << (64 - 30));
+	b15 = ror64(tmp, 30);
 	b14 -= b15 + k12 + t1;
 	b15 -= k13 + 15;
 
 	tmp = b13 ^ b12;
-	b13 = (tmp >> 44) | (tmp << (64 - 44));
+	b13 = ror64(tmp, 44);
 	b12 -= b13 + k10;
 	b13 -= k11 + t0;
 
 	tmp = b11 ^ b10;
-	b11 = (tmp >> 47) | (tmp << (64 - 47));
+	b11 = ror64(tmp, 47);
 	b10 -= b11 + k8;
 	b11 -= k9;
 
 	tmp = b9 ^ b8;
-	b9 = (tmp >> 12) | (tmp << (64 - 12));
+	b9 = ror64(tmp, 12);
 	b8 -= b9 + k6;
 	b9 -= k7;
 
 	tmp = b7 ^ b6;
-	b7 = (tmp >> 31) | (tmp << (64 - 31));
+	b7 = ror64(tmp, 31);
 	b6 -= b7 + k4;
 	b7 -= k5;
 
 	tmp = b5 ^ b4;
-	b5 = (tmp >> 37) | (tmp << (64 - 37));
+	b5 = ror64(tmp, 37);
 	b4 -= b5 + k2;
 	b5 -= k3;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 9) | (tmp << (64 - 9));
+	b3 = ror64(tmp, 9);
 	b2 -= b3 + k0;
 	b3 -= k1;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 41) | (tmp << (64 - 41));
+	b1 = ror64(tmp, 41);
 	b0 -= b1 + k15;
 	b1 -= k16;
 
 	tmp = b7 ^ b12;
-	b7 = (tmp >> 25) | (tmp << (64 - 25));
+	b7 = ror64(tmp, 25);
 	b12 -= b7;
 
 	tmp = b3 ^ b10;
-	b3 = (tmp >> 16) | (tmp << (64 - 16));
+	b3 = ror64(tmp, 16);
 	b10 -= b3;
 
 	tmp = b5 ^ b8;
-	b5 = (tmp >> 28) | (tmp << (64 - 28));
+	b5 = ror64(tmp, 28);
 	b8 -= b5;
 
 	tmp = b1 ^ b14;
-	b1 = (tmp >> 47) | (tmp << (64 - 47));
+	b1 = ror64(tmp, 47);
 	b14 -= b1;
 
 	tmp = b9 ^ b4;
-	b9 = (tmp >> 41) | (tmp << (64 - 41));
+	b9 = ror64(tmp, 41);
 	b4 -= b9;
 
 	tmp = b13 ^ b6;
-	b13 = (tmp >> 48) | (tmp << (64 - 48));
+	b13 = ror64(tmp, 48);
 	b6 -= b13;
 
 	tmp = b11 ^ b2;
-	b11 = (tmp >> 20) | (tmp << (64 - 20));
+	b11 = ror64(tmp, 20);
 	b2 -= b11;
 
 	tmp = b15 ^ b0;
-	b15 = (tmp >> 5) | (tmp << (64 - 5));
+	b15 = ror64(tmp, 5);
 	b0 -= b15;
 
 	tmp = b9 ^ b10;
-	b9 = (tmp >> 17) | (tmp << (64 - 17));
+	b9 = ror64(tmp, 17);
 	b10 -= b9;
 
 	tmp = b11 ^ b8;
-	b11 = (tmp >> 59) | (tmp << (64 - 59));
+	b11 = ror64(tmp, 59);
 	b8 -= b11;
 
 	tmp = b13 ^ b14;
-	b13 = (tmp >> 41) | (tmp << (64 - 41));
+	b13 = ror64(tmp, 41);
 	b14 -= b13;
 
 	tmp = b15 ^ b12;
-	b15 = (tmp >> 34) | (tmp << (64 - 34));
+	b15 = ror64(tmp, 34);
 	b12 -= b15;
 
 	tmp = b1 ^ b6;
-	b1 = (tmp >> 13) | (tmp << (64 - 13));
+	b1 = ror64(tmp, 13);
 	b6 -= b1;
 
 	tmp = b3 ^ b4;
-	b3 = (tmp >> 51) | (tmp << (64 - 51));
+	b3 = ror64(tmp, 51);
 	b4 -= b3;
 
 	tmp = b5 ^ b2;
-	b5 = (tmp >> 4) | (tmp << (64 - 4));
+	b5 = ror64(tmp, 4);
 	b2 -= b5;
 
 	tmp = b7 ^ b0;
-	b7 = (tmp >> 33) | (tmp << (64 - 33));
+	b7 = ror64(tmp, 33);
 	b0 -= b7;
 
 	tmp = b1 ^ b8;
-	b1 = (tmp >> 52) | (tmp << (64 - 52));
+	b1 = ror64(tmp, 52);
 	b8 -= b1;
 
 	tmp = b5 ^ b14;
-	b5 = (tmp >> 23) | (tmp << (64 - 23));
+	b5 = ror64(tmp, 23);
 	b14 -= b5;
 
 	tmp = b3 ^ b12;
-	b3 = (tmp >> 18) | (tmp << (64 - 18));
+	b3 = ror64(tmp, 18);
 	b12 -= b3;
 
 	tmp = b7 ^ b10;
-	b7 = (tmp >> 49) | (tmp << (64 - 49));
+	b7 = ror64(tmp, 49);
 	b10 -= b7;
 
 	tmp = b15 ^ b4;
-	b15 = (tmp >> 55) | (tmp << (64 - 55));
+	b15 = ror64(tmp, 55);
 	b4 -= b15;
 
 	tmp = b11 ^ b6;
-	b11 = (tmp >> 10) | (tmp << (64 - 10));
+	b11 = ror64(tmp, 10);
 	b6 -= b11;
 
 	tmp = b13 ^ b2;
-	b13 = (tmp >> 19) | (tmp << (64 - 19));
+	b13 = ror64(tmp, 19);
 	b2 -= b13;
 
 	tmp = b9 ^ b0;
-	b9 = (tmp >> 38) | (tmp << (64 - 38));
+	b9 = ror64(tmp, 38);
 	b0 -= b9;
 
 	tmp = b15 ^ b14;
-	b15 = (tmp >> 37) | (tmp << (64 - 37));
+	b15 = ror64(tmp, 37);
 	b14 -= b15 + k11 + t0;
 	b15 -= k12 + 14;
 
 	tmp = b13 ^ b12;
-	b13 = (tmp >> 22) | (tmp << (64 - 22));
+	b13 = ror64(tmp, 22);
 	b12 -= b13 + k9;
 	b13 -= k10 + t2;
 
 	tmp = b11 ^ b10;
-	b11 = (tmp >> 17) | (tmp << (64 - 17));
+	b11 = ror64(tmp, 17);
 	b10 -= b11 + k7;
 	b11 -= k8;
 
 	tmp = b9 ^ b8;
-	b9 = (tmp >> 8) | (tmp << (64 - 8));
+	b9 = ror64(tmp, 8);
 	b8 -= b9 + k5;
 	b9 -= k6;
 
 	tmp = b7 ^ b6;
-	b7 = (tmp >> 47) | (tmp << (64 - 47));
+	b7 = ror64(tmp, 47);
 	b6 -= b7 + k3;
 	b7 -= k4;
 
 	tmp = b5 ^ b4;
-	b5 = (tmp >> 8) | (tmp << (64 - 8));
+	b5 = ror64(tmp, 8);
 	b4 -= b5 + k1;
 	b5 -= k2;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 13) | (tmp << (64 - 13));
+	b3 = ror64(tmp, 13);
 	b2 -= b3 + k16;
 	b3 -= k0;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 24) | (tmp << (64 - 24));
+	b1 = ror64(tmp, 24);
 	b0 -= b1 + k14;
 	b1 -= k15;
 
 	tmp = b7 ^ b12;
-	b7 = (tmp >> 20) | (tmp << (64 - 20));
+	b7 = ror64(tmp, 20);
 	b12 -= b7;
 
 	tmp = b3 ^ b10;
-	b3 = (tmp >> 37) | (tmp << (64 - 37));
+	b3 = ror64(tmp, 37);
 	b10 -= b3;
 
 	tmp = b5 ^ b8;
-	b5 = (tmp >> 31) | (tmp << (64 - 31));
+	b5 = ror64(tmp, 31);
 	b8 -= b5;
 
 	tmp = b1 ^ b14;
-	b1 = (tmp >> 23) | (tmp << (64 - 23));
+	b1 = ror64(tmp, 23);
 	b14 -= b1;
 
 	tmp = b9 ^ b4;
-	b9 = (tmp >> 52) | (tmp << (64 - 52));
+	b9 = ror64(tmp, 52);
 	b4 -= b9;
 
 	tmp = b13 ^ b6;
-	b13 = (tmp >> 35) | (tmp << (64 - 35));
+	b13 = ror64(tmp, 35);
 	b6 -= b13;
 
 	tmp = b11 ^ b2;
-	b11 = (tmp >> 48) | (tmp << (64 - 48));
+	b11 = ror64(tmp, 48);
 	b2 -= b11;
 
 	tmp = b15 ^ b0;
-	b15 = (tmp >> 9) | (tmp << (64 - 9));
+	b15 = ror64(tmp, 9);
 	b0 -= b15;
 
 	tmp = b9 ^ b10;
-	b9 = (tmp >> 25) | (tmp << (64 - 25));
+	b9 = ror64(tmp, 25);
 	b10 -= b9;
 
 	tmp = b11 ^ b8;
-	b11 = (tmp >> 44) | (tmp << (64 - 44));
+	b11 = ror64(tmp, 44);
 	b8 -= b11;
 
 	tmp = b13 ^ b14;
-	b13 = (tmp >> 42) | (tmp << (64 - 42));
+	b13 = ror64(tmp, 42);
 	b14 -= b13;
 
 	tmp = b15 ^ b12;
-	b15 = (tmp >> 19) | (tmp << (64 - 19));
+	b15 = ror64(tmp, 19);
 	b12 -= b15;
 
 	tmp = b1 ^ b6;
-	b1 = (tmp >> 46) | (tmp << (64 - 46));
+	b1 = ror64(tmp, 46);
 	b6 -= b1;
 
 	tmp = b3 ^ b4;
-	b3 = (tmp >> 47) | (tmp << (64 - 47));
+	b3 = ror64(tmp, 47);
 	b4 -= b3;
 
 	tmp = b5 ^ b2;
-	b5 = (tmp >> 44) | (tmp << (64 - 44));
+	b5 = ror64(tmp, 44);
 	b2 -= b5;
 
 	tmp = b7 ^ b0;
-	b7 = (tmp >> 31) | (tmp << (64 - 31));
+	b7 = ror64(tmp, 31);
 	b0 -= b7;
 
 	tmp = b1 ^ b8;
-	b1 = (tmp >> 41) | (tmp << (64 - 41));
+	b1 = ror64(tmp, 41);
 	b8 -= b1;
 
 	tmp = b5 ^ b14;
-	b5 = (tmp >> 42) | (tmp << (64 - 42));
+	b5 = ror64(tmp, 42);
 	b14 -= b5;
 
 	tmp = b3 ^ b12;
-	b3 = (tmp >> 53) | (tmp << (64 - 53));
+	b3 = ror64(tmp, 53);
 	b12 -= b3;
 
 	tmp = b7 ^ b10;
-	b7 = (tmp >> 4) | (tmp << (64 - 4));
+	b7 = ror64(tmp, 4);
 	b10 -= b7;
 
 	tmp = b15 ^ b4;
-	b15 = (tmp >> 51) | (tmp << (64 - 51));
+	b15 = ror64(tmp, 51);
 	b4 -= b15;
 
 	tmp = b11 ^ b6;
-	b11 = (tmp >> 56) | (tmp << (64 - 56));
+	b11 = ror64(tmp, 56);
 	b6 -= b11;
 
 	tmp = b13 ^ b2;
-	b13 = (tmp >> 34) | (tmp << (64 - 34));
+	b13 = ror64(tmp, 34);
 	b2 -= b13;
 
 	tmp = b9 ^ b0;
-	b9 = (tmp >> 16) | (tmp << (64 - 16));
+	b9 = ror64(tmp, 16);
 	b0 -= b9;
 
 	tmp = b15 ^ b14;
-	b15 = (tmp >> 30) | (tmp << (64 - 30));
+	b15 = ror64(tmp, 30);
 	b14 -= b15 + k10 + t2;
 	b15 -= k11 + 13;
 
 	tmp = b13 ^ b12;
-	b13 = (tmp >> 44) | (tmp << (64 - 44));
+	b13 = ror64(tmp, 44);
 	b12 -= b13 + k8;
 	b13 -= k9 + t1;
 
 	tmp = b11 ^ b10;
-	b11 = (tmp >> 47) | (tmp << (64 - 47));
+	b11 = ror64(tmp, 47);
 	b10 -= b11 + k6;
 	b11 -= k7;
 
 	tmp = b9 ^ b8;
-	b9 = (tmp >> 12) | (tmp << (64 - 12));
+	b9 = ror64(tmp, 12);
 	b8 -= b9 + k4;
 	b9 -= k5;
 
 	tmp = b7 ^ b6;
-	b7 = (tmp >> 31) | (tmp << (64 - 31));
+	b7 = ror64(tmp, 31);
 	b6 -= b7 + k2;
 	b7 -= k3;
 
 	tmp = b5 ^ b4;
-	b5 = (tmp >> 37) | (tmp << (64 - 37));
+	b5 = ror64(tmp, 37);
 	b4 -= b5 + k0;
 	b5 -= k1;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 9) | (tmp << (64 - 9));
+	b3 = ror64(tmp, 9);
 	b2 -= b3 + k15;
 	b3 -= k16;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 41) | (tmp << (64 - 41));
+	b1 = ror64(tmp, 41);
 	b0 -= b1 + k13;
 	b1 -= k14;
 
 	tmp = b7 ^ b12;
-	b7 = (tmp >> 25) | (tmp << (64 - 25));
+	b7 = ror64(tmp, 25);
 	b12 -= b7;
 
 	tmp = b3 ^ b10;
-	b3 = (tmp >> 16) | (tmp << (64 - 16));
+	b3 = ror64(tmp, 16);
 	b10 -= b3;
 
 	tmp = b5 ^ b8;
-	b5 = (tmp >> 28) | (tmp << (64 - 28));
+	b5 = ror64(tmp, 28);
 	b8 -= b5;
 
 	tmp = b1 ^ b14;
-	b1 = (tmp >> 47) | (tmp << (64 - 47));
+	b1 = ror64(tmp, 47);
 	b14 -= b1;
 
 	tmp = b9 ^ b4;
-	b9 = (tmp >> 41) | (tmp << (64 - 41));
+	b9 = ror64(tmp, 41);
 	b4 -= b9;
 
 	tmp = b13 ^ b6;
-	b13 = (tmp >> 48) | (tmp << (64 - 48));
+	b13 = ror64(tmp, 48);
 	b6 -= b13;
 
 	tmp = b11 ^ b2;
-	b11 = (tmp >> 20) | (tmp << (64 - 20));
+	b11 = ror64(tmp, 20);
 	b2 -= b11;
 
 	tmp = b15 ^ b0;
-	b15 = (tmp >> 5) | (tmp << (64 - 5));
+	b15 = ror64(tmp, 5);
 	b0 -= b15;
 
 	tmp = b9 ^ b10;
-	b9 = (tmp >> 17) | (tmp << (64 - 17));
+	b9 = ror64(tmp, 17);
 	b10 -= b9;
 
 	tmp = b11 ^ b8;
-	b11 = (tmp >> 59) | (tmp << (64 - 59));
+	b11 = ror64(tmp, 59);
 	b8 -= b11;
 
 	tmp = b13 ^ b14;
-	b13 = (tmp >> 41) | (tmp << (64 - 41));
+	b13 = ror64(tmp, 41);
 	b14 -= b13;
 
 	tmp = b15 ^ b12;
-	b15 = (tmp >> 34) | (tmp << (64 - 34));
+	b15 = ror64(tmp, 34);
 	b12 -= b15;
 
 	tmp = b1 ^ b6;
-	b1 = (tmp >> 13) | (tmp << (64 - 13));
+	b1 = ror64(tmp, 13);
 	b6 -= b1;
 
 	tmp = b3 ^ b4;
-	b3 = (tmp >> 51) | (tmp << (64 - 51));
+	b3 = ror64(tmp, 51);
 	b4 -= b3;
 
 	tmp = b5 ^ b2;
-	b5 = (tmp >> 4) | (tmp << (64 - 4));
+	b5 = ror64(tmp, 4);
 	b2 -= b5;
 
 	tmp = b7 ^ b0;
-	b7 = (tmp >> 33) | (tmp << (64 - 33));
+	b7 = ror64(tmp, 33);
 	b0 -= b7;
 
 	tmp = b1 ^ b8;
-	b1 = (tmp >> 52) | (tmp << (64 - 52));
+	b1 = ror64(tmp, 52);
 	b8 -= b1;
 
 	tmp = b5 ^ b14;
-	b5 = (tmp >> 23) | (tmp << (64 - 23));
+	b5 = ror64(tmp, 23);
 	b14 -= b5;
 
 	tmp = b3 ^ b12;
-	b3 = (tmp >> 18) | (tmp << (64 - 18));
+	b3 = ror64(tmp, 18);
 	b12 -= b3;
 
 	tmp = b7 ^ b10;
-	b7 = (tmp >> 49) | (tmp << (64 - 49));
+	b7 = ror64(tmp, 49);
 	b10 -= b7;
 
 	tmp = b15 ^ b4;
-	b15 = (tmp >> 55) | (tmp << (64 - 55));
+	b15 = ror64(tmp, 55);
 	b4 -= b15;
 
 	tmp = b11 ^ b6;
-	b11 = (tmp >> 10) | (tmp << (64 - 10));
+	b11 = ror64(tmp, 10);
 	b6 -= b11;
 
 	tmp = b13 ^ b2;
-	b13 = (tmp >> 19) | (tmp << (64 - 19));
+	b13 = ror64(tmp, 19);
 	b2 -= b13;
 
 	tmp = b9 ^ b0;
-	b9 = (tmp >> 38) | (tmp << (64 - 38));
+	b9 = ror64(tmp, 38);
 	b0 -= b9;
 
 	tmp = b15 ^ b14;
-	b15 = (tmp >> 37) | (tmp << (64 - 37));
+	b15 = ror64(tmp, 37);
 	b14 -= b15 + k9 + t1;
 	b15 -= k10 + 12;
 
 	tmp = b13 ^ b12;
-	b13 = (tmp >> 22) | (tmp << (64 - 22));
+	b13 = ror64(tmp, 22);
 	b12 -= b13 + k7;
 	b13 -= k8 + t0;
 
 	tmp = b11 ^ b10;
-	b11 = (tmp >> 17) | (tmp << (64 - 17));
+	b11 = ror64(tmp, 17);
 	b10 -= b11 + k5;
 	b11 -= k6;
 
 	tmp = b9 ^ b8;
-	b9 = (tmp >> 8) | (tmp << (64 - 8));
+	b9 = ror64(tmp, 8);
 	b8 -= b9 + k3;
 	b9 -= k4;
 
 	tmp = b7 ^ b6;
-	b7 = (tmp >> 47) | (tmp << (64 - 47));
+	b7 = ror64(tmp, 47);
 	b6 -= b7 + k1;
 	b7 -= k2;
 
 	tmp = b5 ^ b4;
-	b5 = (tmp >> 8) | (tmp << (64 - 8));
+	b5 = ror64(tmp, 8);
 	b4 -= b5 + k16;
 	b5 -= k0;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 13) | (tmp << (64 - 13));
+	b3 = ror64(tmp, 13);
 	b2 -= b3 + k14;
 	b3 -= k15;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 24) | (tmp << (64 - 24));
+	b1 = ror64(tmp, 24);
 	b0 -= b1 + k12;
 	b1 -= k13;
 
 	tmp = b7 ^ b12;
-	b7 = (tmp >> 20) | (tmp << (64 - 20));
+	b7 = ror64(tmp, 20);
 	b12 -= b7;
 
 	tmp = b3 ^ b10;
-	b3 = (tmp >> 37) | (tmp << (64 - 37));
+	b3 = ror64(tmp, 37);
 	b10 -= b3;
 
 	tmp = b5 ^ b8;
-	b5 = (tmp >> 31) | (tmp << (64 - 31));
+	b5 = ror64(tmp, 31);
 	b8 -= b5;
 
 	tmp = b1 ^ b14;
-	b1 = (tmp >> 23) | (tmp << (64 - 23));
+	b1 = ror64(tmp, 23);
 	b14 -= b1;
 
 	tmp = b9 ^ b4;
-	b9 = (tmp >> 52) | (tmp << (64 - 52));
+	b9 = ror64(tmp, 52);
 	b4 -= b9;
 
 	tmp = b13 ^ b6;
-	b13 = (tmp >> 35) | (tmp << (64 - 35));
+	b13 = ror64(tmp, 35);
 	b6 -= b13;
 
 	tmp = b11 ^ b2;
-	b11 = (tmp >> 48) | (tmp << (64 - 48));
+	b11 = ror64(tmp, 48);
 	b2 -= b11;
 
 	tmp = b15 ^ b0;
-	b15 = (tmp >> 9) | (tmp << (64 - 9));
+	b15 = ror64(tmp, 9);
 	b0 -= b15;
 
 	tmp = b9 ^ b10;
-	b9 = (tmp >> 25) | (tmp << (64 - 25));
+	b9 = ror64(tmp, 25);
 	b10 -= b9;
 
 	tmp = b11 ^ b8;
-	b11 = (tmp >> 44) | (tmp << (64 - 44));
+	b11 = ror64(tmp, 44);
 	b8 -= b11;
 
 	tmp = b13 ^ b14;
-	b13 = (tmp >> 42) | (tmp << (64 - 42));
+	b13 = ror64(tmp, 42);
 	b14 -= b13;
 
 	tmp = b15 ^ b12;
-	b15 = (tmp >> 19) | (tmp << (64 - 19));
+	b15 = ror64(tmp, 19);
 	b12 -= b15;
 
 	tmp = b1 ^ b6;
-	b1 = (tmp >> 46) | (tmp << (64 - 46));
+	b1 = ror64(tmp, 46);
 	b6 -= b1;
 
 	tmp = b3 ^ b4;
-	b3 = (tmp >> 47) | (tmp << (64 - 47));
+	b3 = ror64(tmp, 47);
 	b4 -= b3;
 
 	tmp = b5 ^ b2;
-	b5 = (tmp >> 44) | (tmp << (64 - 44));
+	b5 = ror64(tmp, 44);
 	b2 -= b5;
 
 	tmp = b7 ^ b0;
-	b7 = (tmp >> 31) | (tmp << (64 - 31));
+	b7 = ror64(tmp, 31);
 	b0 -= b7;
 
 	tmp = b1 ^ b8;
-	b1 = (tmp >> 41) | (tmp << (64 - 41));
+	b1 = ror64(tmp, 41);
 	b8 -= b1;
 
 	tmp = b5 ^ b14;
-	b5 = (tmp >> 42) | (tmp << (64 - 42));
+	b5 = ror64(tmp, 42);
 	b14 -= b5;
 
 	tmp = b3 ^ b12;
-	b3 = (tmp >> 53) | (tmp << (64 - 53));
+	b3 = ror64(tmp, 53);
 	b12 -= b3;
 
 	tmp = b7 ^ b10;
-	b7 = (tmp >> 4) | (tmp << (64 - 4));
+	b7 = ror64(tmp, 4);
 	b10 -= b7;
 
 	tmp = b15 ^ b4;
-	b15 = (tmp >> 51) | (tmp << (64 - 51));
+	b15 = ror64(tmp, 51);
 	b4 -= b15;
 
 	tmp = b11 ^ b6;
-	b11 = (tmp >> 56) | (tmp << (64 - 56));
+	b11 = ror64(tmp, 56);
 	b6 -= b11;
 
 	tmp = b13 ^ b2;
-	b13 = (tmp >> 34) | (tmp << (64 - 34));
+	b13 = ror64(tmp, 34);
 	b2 -= b13;
 
 	tmp = b9 ^ b0;
-	b9 = (tmp >> 16) | (tmp << (64 - 16));
+	b9 = ror64(tmp, 16);
 	b0 -= b9;
 
 	tmp = b15 ^ b14;
-	b15 = (tmp >> 30) | (tmp << (64 - 30));
+	b15 = ror64(tmp, 30);
 	b14 -= b15 + k8 + t0;
 	b15 -= k9 + 11;
 
 	tmp = b13 ^ b12;
-	b13 = (tmp >> 44) | (tmp << (64 - 44));
+	b13 = ror64(tmp, 44);
 	b12 -= b13 + k6;
 	b13 -= k7 + t2;
 
 	tmp = b11 ^ b10;
-	b11 = (tmp >> 47) | (tmp << (64 - 47));
+	b11 = ror64(tmp, 47);
 	b10 -= b11 + k4;
 	b11 -= k5;
 
 	tmp = b9 ^ b8;
-	b9 = (tmp >> 12) | (tmp << (64 - 12));
+	b9 = ror64(tmp, 12);
 	b8 -= b9 + k2;
 	b9 -= k3;
 
 	tmp = b7 ^ b6;
-	b7 = (tmp >> 31) | (tmp << (64 - 31));
+	b7 = ror64(tmp, 31);
 	b6 -= b7 + k0;
 	b7 -= k1;
 
 	tmp = b5 ^ b4;
-	b5 = (tmp >> 37) | (tmp << (64 - 37));
+	b5 = ror64(tmp, 37);
 	b4 -= b5 + k15;
 	b5 -= k16;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 9) | (tmp << (64 - 9));
+	b3 = ror64(tmp, 9);
 	b2 -= b3 + k13;
 	b3 -= k14;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 41) | (tmp << (64 - 41));
+	b1 = ror64(tmp, 41);
 	b0 -= b1 + k11;
 	b1 -= k12;
 
 	tmp = b7 ^ b12;
-	b7 = (tmp >> 25) | (tmp << (64 - 25));
+	b7 = ror64(tmp, 25);
 	b12 -= b7;
 
 	tmp = b3 ^ b10;
-	b3 = (tmp >> 16) | (tmp << (64 - 16));
+	b3 = ror64(tmp, 16);
 	b10 -= b3;
 
 	tmp = b5 ^ b8;
-	b5 = (tmp >> 28) | (tmp << (64 - 28));
+	b5 = ror64(tmp, 28);
 	b8 -= b5;
 
 	tmp = b1 ^ b14;
-	b1 = (tmp >> 47) | (tmp << (64 - 47));
+	b1 = ror64(tmp, 47);
 	b14 -= b1;
 
 	tmp = b9 ^ b4;
-	b9 = (tmp >> 41) | (tmp << (64 - 41));
+	b9 = ror64(tmp, 41);
 	b4 -= b9;
 
 	tmp = b13 ^ b6;
-	b13 = (tmp >> 48) | (tmp << (64 - 48));
+	b13 = ror64(tmp, 48);
 	b6 -= b13;
 
 	tmp = b11 ^ b2;
-	b11 = (tmp >> 20) | (tmp << (64 - 20));
+	b11 = ror64(tmp, 20);
 	b2 -= b11;
 
 	tmp = b15 ^ b0;
-	b15 = (tmp >> 5) | (tmp << (64 - 5));
+	b15 = ror64(tmp, 5);
 	b0 -= b15;
 
 	tmp = b9 ^ b10;
-	b9 = (tmp >> 17) | (tmp << (64 - 17));
+	b9 = ror64(tmp, 17);
 	b10 -= b9;
 
 	tmp = b11 ^ b8;
-	b11 = (tmp >> 59) | (tmp << (64 - 59));
+	b11 = ror64(tmp, 59);
 	b8 -= b11;
 
 	tmp = b13 ^ b14;
-	b13 = (tmp >> 41) | (tmp << (64 - 41));
+	b13 = ror64(tmp, 41);
 	b14 -= b13;
 
 	tmp = b15 ^ b12;
-	b15 = (tmp >> 34) | (tmp << (64 - 34));
+	b15 = ror64(tmp, 34);
 	b12 -= b15;
 
 	tmp = b1 ^ b6;
-	b1 = (tmp >> 13) | (tmp << (64 - 13));
+	b1 = ror64(tmp, 13);
 	b6 -= b1;
 
 	tmp = b3 ^ b4;
-	b3 = (tmp >> 51) | (tmp << (64 - 51));
+	b3 = ror64(tmp, 51);
 	b4 -= b3;
 
 	tmp = b5 ^ b2;
-	b5 = (tmp >> 4) | (tmp << (64 - 4));
+	b5 = ror64(tmp, 4);
 	b2 -= b5;
 
 	tmp = b7 ^ b0;
-	b7 = (tmp >> 33) | (tmp << (64 - 33));
+	b7 = ror64(tmp, 33);
 	b0 -= b7;
 
 	tmp = b1 ^ b8;
-	b1 = (tmp >> 52) | (tmp << (64 - 52));
+	b1 = ror64(tmp, 52);
 	b8 -= b1;
 
 	tmp = b5 ^ b14;
-	b5 = (tmp >> 23) | (tmp << (64 - 23));
+	b5 = ror64(tmp, 23);
 	b14 -= b5;
 
 	tmp = b3 ^ b12;
-	b3 = (tmp >> 18) | (tmp << (64 - 18));
+	b3 = ror64(tmp, 18);
 	b12 -= b3;
 
 	tmp = b7 ^ b10;
-	b7 = (tmp >> 49) | (tmp << (64 - 49));
+	b7 = ror64(tmp, 49);
 	b10 -= b7;
 
 	tmp = b15 ^ b4;
-	b15 = (tmp >> 55) | (tmp << (64 - 55));
+	b15 = ror64(tmp, 55);
 	b4 -= b15;
 
 	tmp = b11 ^ b6;
-	b11 = (tmp >> 10) | (tmp << (64 - 10));
+	b11 = ror64(tmp, 10);
 	b6 -= b11;
 
 	tmp = b13 ^ b2;
-	b13 = (tmp >> 19) | (tmp << (64 - 19));
+	b13 = ror64(tmp, 19);
 	b2 -= b13;
 
 	tmp = b9 ^ b0;
-	b9 = (tmp >> 38) | (tmp << (64 - 38));
+	b9 = ror64(tmp, 38);
 	b0 -= b9;
 
 	tmp = b15 ^ b14;
-	b15 = (tmp >> 37) | (tmp << (64 - 37));
+	b15 = ror64(tmp, 37);
 	b14 -= b15 + k7 + t2;
 	b15 -= k8 + 10;
 
 	tmp = b13 ^ b12;
-	b13 = (tmp >> 22) | (tmp << (64 - 22));
+	b13 = ror64(tmp, 22);
 	b12 -= b13 + k5;
 	b13 -= k6 + t1;
 
 	tmp = b11 ^ b10;
-	b11 = (tmp >> 17) | (tmp << (64 - 17));
+	b11 = ror64(tmp, 17);
 	b10 -= b11 + k3;
 	b11 -= k4;
 
 	tmp = b9 ^ b8;
-	b9 = (tmp >> 8) | (tmp << (64 - 8));
+	b9 = ror64(tmp, 8);
 	b8 -= b9 + k1;
 	b9 -= k2;
 
 	tmp = b7 ^ b6;
-	b7 = (tmp >> 47) | (tmp << (64 - 47));
+	b7 = ror64(tmp, 47);
 	b6 -= b7 + k16;
 	b7 -= k0;
 
 	tmp = b5 ^ b4;
-	b5 = (tmp >> 8) | (tmp << (64 - 8));
+	b5 = ror64(tmp, 8);
 	b4 -= b5 + k14;
 	b5 -= k15;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 13) | (tmp << (64 - 13));
+	b3 = ror64(tmp, 13);
 	b2 -= b3 + k12;
 	b3 -= k13;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 24) | (tmp << (64 - 24));
+	b1 = ror64(tmp, 24);
 	b0 -= b1 + k10;
 	b1 -= k11;
 
 	tmp = b7 ^ b12;
-	b7 = (tmp >> 20) | (tmp << (64 - 20));
+	b7 = ror64(tmp, 20);
 	b12 -= b7;
 
 	tmp = b3 ^ b10;
-	b3 = (tmp >> 37) | (tmp << (64 - 37));
+	b3 = ror64(tmp, 37);
 	b10 -= b3;
 
 	tmp = b5 ^ b8;
-	b5 = (tmp >> 31) | (tmp << (64 - 31));
+	b5 = ror64(tmp, 31);
 	b8 -= b5;
 
 	tmp = b1 ^ b14;
-	b1 = (tmp >> 23) | (tmp << (64 - 23));
+	b1 = ror64(tmp, 23);
 	b14 -= b1;
 
 	tmp = b9 ^ b4;
-	b9 = (tmp >> 52) | (tmp << (64 - 52));
+	b9 = ror64(tmp, 52);
 	b4 -= b9;
 
 	tmp = b13 ^ b6;
-	b13 = (tmp >> 35) | (tmp << (64 - 35));
+	b13 = ror64(tmp, 35);
 	b6 -= b13;
 
 	tmp = b11 ^ b2;
-	b11 = (tmp >> 48) | (tmp << (64 - 48));
+	b11 = ror64(tmp, 48);
 	b2 -= b11;
 
 	tmp = b15 ^ b0;
-	b15 = (tmp >> 9) | (tmp << (64 - 9));
+	b15 = ror64(tmp, 9);
 	b0 -= b15;
 
 	tmp = b9 ^ b10;
-	b9 = (tmp >> 25) | (tmp << (64 - 25));
+	b9 = ror64(tmp, 25);
 	b10 -= b9;
 
 	tmp = b11 ^ b8;
-	b11 = (tmp >> 44) | (tmp << (64 - 44));
+	b11 = ror64(tmp, 44);
 	b8 -= b11;
 
 	tmp = b13 ^ b14;
-	b13 = (tmp >> 42) | (tmp << (64 - 42));
+	b13 = ror64(tmp, 42);
 	b14 -= b13;
 
 	tmp = b15 ^ b12;
-	b15 = (tmp >> 19) | (tmp << (64 - 19));
+	b15 = ror64(tmp, 19);
 	b12 -= b15;
 
 	tmp = b1 ^ b6;
-	b1 = (tmp >> 46) | (tmp << (64 - 46));
+	b1 = ror64(tmp, 46);
 	b6 -= b1;
 
 	tmp = b3 ^ b4;
-	b3 = (tmp >> 47) | (tmp << (64 - 47));
+	b3 = ror64(tmp, 47);
 	b4 -= b3;
 
 	tmp = b5 ^ b2;
-	b5 = (tmp >> 44) | (tmp << (64 - 44));
+	b5 = ror64(tmp, 44);
 	b2 -= b5;
 
 	tmp = b7 ^ b0;
-	b7 = (tmp >> 31) | (tmp << (64 - 31));
+	b7 = ror64(tmp, 31);
 	b0 -= b7;
 
 	tmp = b1 ^ b8;
-	b1 = (tmp >> 41) | (tmp << (64 - 41));
+	b1 = ror64(tmp, 41);
 	b8 -= b1;
 
 	tmp = b5 ^ b14;
-	b5 = (tmp >> 42) | (tmp << (64 - 42));
+	b5 = ror64(tmp, 42);
 	b14 -= b5;
 
 	tmp = b3 ^ b12;
-	b3 = (tmp >> 53) | (tmp << (64 - 53));
+	b3 = ror64(tmp, 53);
 	b12 -= b3;
 
 	tmp = b7 ^ b10;
-	b7 = (tmp >> 4) | (tmp << (64 - 4));
+	b7 = ror64(tmp, 4);
 	b10 -= b7;
 
 	tmp = b15 ^ b4;
-	b15 = (tmp >> 51) | (tmp << (64 - 51));
+	b15 = ror64(tmp, 51);
 	b4 -= b15;
 
 	tmp = b11 ^ b6;
-	b11 = (tmp >> 56) | (tmp << (64 - 56));
+	b11 = ror64(tmp, 56);
 	b6 -= b11;
 
 	tmp = b13 ^ b2;
-	b13 = (tmp >> 34) | (tmp << (64 - 34));
+	b13 = ror64(tmp, 34);
 	b2 -= b13;
 
 	tmp = b9 ^ b0;
-	b9 = (tmp >> 16) | (tmp << (64 - 16));
+	b9 = ror64(tmp, 16);
 	b0 -= b9;
 
 	tmp = b15 ^ b14;
-	b15 = (tmp >> 30) | (tmp << (64 - 30));
+	b15 = ror64(tmp, 30);
 	b14 -= b15 + k6 + t1;
 	b15 -= k7 + 9;
 
 	tmp = b13 ^ b12;
-	b13 = (tmp >> 44) | (tmp << (64 - 44));
+	b13 = ror64(tmp, 44);
 	b12 -= b13 + k4;
 	b13 -= k5 + t0;
 
 	tmp = b11 ^ b10;
-	b11 = (tmp >> 47) | (tmp << (64 - 47));
+	b11 = ror64(tmp, 47);
 	b10 -= b11 + k2;
 	b11 -= k3;
 
 	tmp = b9 ^ b8;
-	b9 = (tmp >> 12) | (tmp << (64 - 12));
+	b9 = ror64(tmp, 12);
 	b8 -= b9 + k0;
 	b9 -= k1;
 
 	tmp = b7 ^ b6;
-	b7 = (tmp >> 31) | (tmp << (64 - 31));
+	b7 = ror64(tmp, 31);
 	b6 -= b7 + k15;
 	b7 -= k16;
 
 	tmp = b5 ^ b4;
-	b5 = (tmp >> 37) | (tmp << (64 - 37));
+	b5 = ror64(tmp, 37);
 	b4 -= b5 + k13;
 	b5 -= k14;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 9) | (tmp << (64 - 9));
+	b3 = ror64(tmp, 9);
 	b2 -= b3 + k11;
 	b3 -= k12;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 41) | (tmp << (64 - 41));
+	b1 = ror64(tmp, 41);
 	b0 -= b1 + k9;
 	b1 -= k10;
 
 	tmp = b7 ^ b12;
-	b7 = (tmp >> 25) | (tmp << (64 - 25));
+	b7 = ror64(tmp, 25);
 	b12 -= b7;
 
 	tmp = b3 ^ b10;
-	b3 = (tmp >> 16) | (tmp << (64 - 16));
+	b3 = ror64(tmp, 16);
 	b10 -= b3;
 
 	tmp = b5 ^ b8;
-	b5 = (tmp >> 28) | (tmp << (64 - 28));
+	b5 = ror64(tmp, 28);
 	b8 -= b5;
 
 	tmp = b1 ^ b14;
-	b1 = (tmp >> 47) | (tmp << (64 - 47));
+	b1 = ror64(tmp, 47);
 	b14 -= b1;
 
 	tmp = b9 ^ b4;
-	b9 = (tmp >> 41) | (tmp << (64 - 41));
+	b9 = ror64(tmp, 41);
 	b4 -= b9;
 
 	tmp = b13 ^ b6;
-	b13 = (tmp >> 48) | (tmp << (64 - 48));
+	b13 = ror64(tmp, 48);
 	b6 -= b13;
 
 	tmp = b11 ^ b2;
-	b11 = (tmp >> 20) | (tmp << (64 - 20));
+	b11 = ror64(tmp, 20);
 	b2 -= b11;
 
 	tmp = b15 ^ b0;
-	b15 = (tmp >> 5) | (tmp << (64 - 5));
+	b15 = ror64(tmp, 5);
 	b0 -= b15;
 
 	tmp = b9 ^ b10;
-	b9 = (tmp >> 17) | (tmp << (64 - 17));
+	b9 = ror64(tmp, 17);
 	b10 -= b9;
 
 	tmp = b11 ^ b8;
-	b11 = (tmp >> 59) | (tmp << (64 - 59));
+	b11 = ror64(tmp, 59);
 	b8 -= b11;
 
 	tmp = b13 ^ b14;
-	b13 = (tmp >> 41) | (tmp << (64 - 41));
+	b13 = ror64(tmp, 41);
 	b14 -= b13;
 
 	tmp = b15 ^ b12;
-	b15 = (tmp >> 34) | (tmp << (64 - 34));
+	b15 = ror64(tmp, 34);
 	b12 -= b15;
 
 	tmp = b1 ^ b6;
-	b1 = (tmp >> 13) | (tmp << (64 - 13));
+	b1 = ror64(tmp, 13);
 	b6 -= b1;
 
 	tmp = b3 ^ b4;
-	b3 = (tmp >> 51) | (tmp << (64 - 51));
+	b3 = ror64(tmp, 51);
 	b4 -= b3;
 
 	tmp = b5 ^ b2;
-	b5 = (tmp >> 4) | (tmp << (64 - 4));
+	b5 = ror64(tmp, 4);
 	b2 -= b5;
 
 	tmp = b7 ^ b0;
-	b7 = (tmp >> 33) | (tmp << (64 - 33));
+	b7 = ror64(tmp, 33);
 	b0 -= b7;
 
 	tmp = b1 ^ b8;
-	b1 = (tmp >> 52) | (tmp << (64 - 52));
+	b1 = ror64(tmp, 52);
 	b8 -= b1;
 
 	tmp = b5 ^ b14;
-	b5 = (tmp >> 23) | (tmp << (64 - 23));
+	b5 = ror64(tmp, 23);
 	b14 -= b5;
 
 	tmp = b3 ^ b12;
-	b3 = (tmp >> 18) | (tmp << (64 - 18));
+	b3 = ror64(tmp, 18);
 	b12 -= b3;
 
 	tmp = b7 ^ b10;
-	b7 = (tmp >> 49) | (tmp << (64 - 49));
+	b7 = ror64(tmp, 49);
 	b10 -= b7;
 
 	tmp = b15 ^ b4;
-	b15 = (tmp >> 55) | (tmp << (64 - 55));
+	b15 = ror64(tmp, 55);
 	b4 -= b15;
 
 	tmp = b11 ^ b6;
-	b11 = (tmp >> 10) | (tmp << (64 - 10));
+	b11 = ror64(tmp, 10);
 	b6 -= b11;
 
 	tmp = b13 ^ b2;
-	b13 = (tmp >> 19) | (tmp << (64 - 19));
+	b13 = ror64(tmp, 19);
 	b2 -= b13;
 
 	tmp = b9 ^ b0;
-	b9 = (tmp >> 38) | (tmp << (64 - 38));
+	b9 = ror64(tmp, 38);
 	b0 -= b9;
 
 	tmp = b15 ^ b14;
-	b15 = (tmp >> 37) | (tmp << (64 - 37));
+	b15 = ror64(tmp, 37);
 	b14 -= b15 + k5 + t0;
 	b15 -= k6 + 8;
 
 	tmp = b13 ^ b12;
-	b13 = (tmp >> 22) | (tmp << (64 - 22));
+	b13 = ror64(tmp, 22);
 	b12 -= b13 + k3;
 	b13 -= k4 + t2;
 
 	tmp = b11 ^ b10;
-	b11 = (tmp >> 17) | (tmp << (64 - 17));
+	b11 = ror64(tmp, 17);
 	b10 -= b11 + k1;
 	b11 -= k2;
 
 	tmp = b9 ^ b8;
-	b9 = (tmp >> 8) | (tmp << (64 - 8));
+	b9 = ror64(tmp, 8);
 	b8 -= b9 + k16;
 	b9 -= k0;
 
 	tmp = b7 ^ b6;
-	b7 = (tmp >> 47) | (tmp << (64 - 47));
+	b7 = ror64(tmp, 47);
 	b6 -= b7 + k14;
 	b7 -= k15;
 
 	tmp = b5 ^ b4;
-	b5 = (tmp >> 8) | (tmp << (64 - 8));
+	b5 = ror64(tmp, 8);
 	b4 -= b5 + k12;
 	b5 -= k13;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 13) | (tmp << (64 - 13));
+	b3 = ror64(tmp, 13);
 	b2 -= b3 + k10;
 	b3 -= k11;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 24) | (tmp << (64 - 24));
+	b1 = ror64(tmp, 24);
 	b0 -= b1 + k8;
 	b1 -= k9;
 
 	tmp = b7 ^ b12;
-	b7 = (tmp >> 20) | (tmp << (64 - 20));
+	b7 = ror64(tmp, 20);
 	b12 -= b7;
 
 	tmp = b3 ^ b10;
-	b3 = (tmp >> 37) | (tmp << (64 - 37));
+	b3 = ror64(tmp, 37);
 	b10 -= b3;
 
 	tmp = b5 ^ b8;
-	b5 = (tmp >> 31) | (tmp << (64 - 31));
+	b5 = ror64(tmp, 31);
 	b8 -= b5;
 
 	tmp = b1 ^ b14;
-	b1 = (tmp >> 23) | (tmp << (64 - 23));
+	b1 = ror64(tmp, 23);
 	b14 -= b1;
 
 	tmp = b9 ^ b4;
-	b9 = (tmp >> 52) | (tmp << (64 - 52));
+	b9 = ror64(tmp, 52);
 	b4 -= b9;
 
 	tmp = b13 ^ b6;
-	b13 = (tmp >> 35) | (tmp << (64 - 35));
+	b13 = ror64(tmp, 35);
 	b6 -= b13;
 
 	tmp = b11 ^ b2;
-	b11 = (tmp >> 48) | (tmp << (64 - 48));
+	b11 = ror64(tmp, 48);
 	b2 -= b11;
 
 	tmp = b15 ^ b0;
-	b15 = (tmp >> 9) | (tmp << (64 - 9));
+	b15 = ror64(tmp, 9);
 	b0 -= b15;
 
 	tmp = b9 ^ b10;
-	b9 = (tmp >> 25) | (tmp << (64 - 25));
+	b9 = ror64(tmp, 25);
 	b10 -= b9;
 
 	tmp = b11 ^ b8;
-	b11 = (tmp >> 44) | (tmp << (64 - 44));
+	b11 = ror64(tmp, 44);
 	b8 -= b11;
 
 	tmp = b13 ^ b14;
-	b13 = (tmp >> 42) | (tmp << (64 - 42));
+	b13 = ror64(tmp, 42);
 	b14 -= b13;
 
 	tmp = b15 ^ b12;
-	b15 = (tmp >> 19) | (tmp << (64 - 19));
+	b15 = ror64(tmp, 19);
 	b12 -= b15;
 
 	tmp = b1 ^ b6;
-	b1 = (tmp >> 46) | (tmp << (64 - 46));
+	b1 = ror64(tmp, 46);
 	b6 -= b1;
 
 	tmp = b3 ^ b4;
-	b3 = (tmp >> 47) | (tmp << (64 - 47));
+	b3 = ror64(tmp, 47);
 	b4 -= b3;
 
 	tmp = b5 ^ b2;
-	b5 = (tmp >> 44) | (tmp << (64 - 44));
+	b5 = ror64(tmp, 44);
 	b2 -= b5;
 
 	tmp = b7 ^ b0;
-	b7 = (tmp >> 31) | (tmp << (64 - 31));
+	b7 = ror64(tmp, 31);
 	b0 -= b7;
 
 	tmp = b1 ^ b8;
-	b1 = (tmp >> 41) | (tmp << (64 - 41));
+	b1 = ror64(tmp, 41);
 	b8 -= b1;
 
 	tmp = b5 ^ b14;
-	b5 = (tmp >> 42) | (tmp << (64 - 42));
+	b5 = ror64(tmp, 42);
 	b14 -= b5;
 
 	tmp = b3 ^ b12;
-	b3 = (tmp >> 53) | (tmp << (64 - 53));
+	b3 = ror64(tmp, 53);
 	b12 -= b3;
 
 	tmp = b7 ^ b10;
-	b7 = (tmp >> 4) | (tmp << (64 - 4));
+	b7 = ror64(tmp, 4);
 	b10 -= b7;
 
 	tmp = b15 ^ b4;
-	b15 = (tmp >> 51) | (tmp << (64 - 51));
+	b15 = ror64(tmp, 51);
 	b4 -= b15;
 
 	tmp = b11 ^ b6;
-	b11 = (tmp >> 56) | (tmp << (64 - 56));
+	b11 = ror64(tmp, 56);
 	b6 -= b11;
 
 	tmp = b13 ^ b2;
-	b13 = (tmp >> 34) | (tmp << (64 - 34));
+	b13 = ror64(tmp, 34);
 	b2 -= b13;
 
 	tmp = b9 ^ b0;
-	b9 = (tmp >> 16) | (tmp << (64 - 16));
+	b9 = ror64(tmp, 16);
 	b0 -= b9;
 
 	tmp = b15 ^ b14;
-	b15 = (tmp >> 30) | (tmp << (64 - 30));
+	b15 = ror64(tmp, 30);
 	b14 -= b15 + k4 + t2;
 	b15 -= k5 + 7;
 
 	tmp = b13 ^ b12;
-	b13 = (tmp >> 44) | (tmp << (64 - 44));
+	b13 = ror64(tmp, 44);
 	b12 -= b13 + k2;
 	b13 -= k3 + t1;
 
 	tmp = b11 ^ b10;
-	b11 = (tmp >> 47) | (tmp << (64 - 47));
+	b11 = ror64(tmp, 47);
 	b10 -= b11 + k0;
 	b11 -= k1;
 
 	tmp = b9 ^ b8;
-	b9 = (tmp >> 12) | (tmp << (64 - 12));
+	b9 = ror64(tmp, 12);
 	b8 -= b9 + k15;
 	b9 -= k16;
 
 	tmp = b7 ^ b6;
-	b7 = (tmp >> 31) | (tmp << (64 - 31));
+	b7 = ror64(tmp, 31);
 	b6 -= b7 + k13;
 	b7 -= k14;
 
 	tmp = b5 ^ b4;
-	b5 = (tmp >> 37) | (tmp << (64 - 37));
+	b5 = ror64(tmp, 37);
 	b4 -= b5 + k11;
 	b5 -= k12;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 9) | (tmp << (64 - 9));
+	b3 = ror64(tmp, 9);
 	b2 -= b3 + k9;
 	b3 -= k10;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 41) | (tmp << (64 - 41));
+	b1 = ror64(tmp, 41);
 	b0 -= b1 + k7;
 	b1 -= k8;
 
 	tmp = b7 ^ b12;
-	b7 = (tmp >> 25) | (tmp << (64 - 25));
+	b7 = ror64(tmp, 25);
 	b12 -= b7;
 
 	tmp = b3 ^ b10;
-	b3 = (tmp >> 16) | (tmp << (64 - 16));
+	b3 = ror64(tmp, 16);
 	b10 -= b3;
 
 	tmp = b5 ^ b8;
-	b5 = (tmp >> 28) | (tmp << (64 - 28));
+	b5 = ror64(tmp, 28);
 	b8 -= b5;
 
 	tmp = b1 ^ b14;
-	b1 = (tmp >> 47) | (tmp << (64 - 47));
+	b1 = ror64(tmp, 47);
 	b14 -= b1;
 
 	tmp = b9 ^ b4;
-	b9 = (tmp >> 41) | (tmp << (64 - 41));
+	b9 = ror64(tmp, 41);
 	b4 -= b9;
 
 	tmp = b13 ^ b6;
-	b13 = (tmp >> 48) | (tmp << (64 - 48));
+	b13 = ror64(tmp, 48);
 	b6 -= b13;
 
 	tmp = b11 ^ b2;
-	b11 = (tmp >> 20) | (tmp << (64 - 20));
+	b11 = ror64(tmp, 20);
 	b2 -= b11;
 
 	tmp = b15 ^ b0;
-	b15 = (tmp >> 5) | (tmp << (64 - 5));
+	b15 = ror64(tmp, 5);
 	b0 -= b15;
 
 	tmp = b9 ^ b10;
-	b9 = (tmp >> 17) | (tmp << (64 - 17));
+	b9 = ror64(tmp, 17);
 	b10 -= b9;
 
 	tmp = b11 ^ b8;
-	b11 = (tmp >> 59) | (tmp << (64 - 59));
+	b11 = ror64(tmp, 59);
 	b8 -= b11;
 
 	tmp = b13 ^ b14;
-	b13 = (tmp >> 41) | (tmp << (64 - 41));
+	b13 = ror64(tmp, 41);
 	b14 -= b13;
 
 	tmp = b15 ^ b12;
-	b15 = (tmp >> 34) | (tmp << (64 - 34));
+	b15 = ror64(tmp, 34);
 	b12 -= b15;
 
 	tmp = b1 ^ b6;
-	b1 = (tmp >> 13) | (tmp << (64 - 13));
+	b1 = ror64(tmp, 13);
 	b6 -= b1;
 
 	tmp = b3 ^ b4;
-	b3 = (tmp >> 51) | (tmp << (64 - 51));
+	b3 = ror64(tmp, 51);
 	b4 -= b3;
 
 	tmp = b5 ^ b2;
-	b5 = (tmp >> 4) | (tmp << (64 - 4));
+	b5 = ror64(tmp, 4);
 	b2 -= b5;
 
 	tmp = b7 ^ b0;
-	b7 = (tmp >> 33) | (tmp << (64 - 33));
+	b7 = ror64(tmp, 33);
 	b0 -= b7;
 
 	tmp = b1 ^ b8;
-	b1 = (tmp >> 52) | (tmp << (64 - 52));
+	b1 = ror64(tmp, 52);
 	b8 -= b1;
 
 	tmp = b5 ^ b14;
-	b5 = (tmp >> 23) | (tmp << (64 - 23));
+	b5 = ror64(tmp, 23);
 	b14 -= b5;
 
 	tmp = b3 ^ b12;
-	b3 = (tmp >> 18) | (tmp << (64 - 18));
+	b3 = ror64(tmp, 18);
 	b12 -= b3;
 
 	tmp = b7 ^ b10;
-	b7 = (tmp >> 49) | (tmp << (64 - 49));
+	b7 = ror64(tmp, 49);
 	b10 -= b7;
 
 	tmp = b15 ^ b4;
-	b15 = (tmp >> 55) | (tmp << (64 - 55));
+	b15 = ror64(tmp, 55);
 	b4 -= b15;
 
 	tmp = b11 ^ b6;
-	b11 = (tmp >> 10) | (tmp << (64 - 10));
+	b11 = ror64(tmp, 10);
 	b6 -= b11;
 
 	tmp = b13 ^ b2;
-	b13 = (tmp >> 19) | (tmp << (64 - 19));
+	b13 = ror64(tmp, 19);
 	b2 -= b13;
 
 	tmp = b9 ^ b0;
-	b9 = (tmp >> 38) | (tmp << (64 - 38));
+	b9 = ror64(tmp, 38);
 	b0 -= b9;
 
 	tmp = b15 ^ b14;
-	b15 = (tmp >> 37) | (tmp << (64 - 37));
+	b15 = ror64(tmp, 37);
 	b14 -= b15 + k3 + t1;
 	b15 -= k4 + 6;
 
 	tmp = b13 ^ b12;
-	b13 = (tmp >> 22) | (tmp << (64 - 22));
+	b13 = ror64(tmp, 22);
 	b12 -= b13 + k1;
 	b13 -= k2 + t0;
 
 	tmp = b11 ^ b10;
-	b11 = (tmp >> 17) | (tmp << (64 - 17));
+	b11 = ror64(tmp, 17);
 	b10 -= b11 + k16;
 	b11 -= k0;
 
 	tmp = b9 ^ b8;
-	b9 = (tmp >> 8) | (tmp << (64 - 8));
+	b9 = ror64(tmp, 8);
 	b8 -= b9 + k14;
 	b9 -= k15;
 
 	tmp = b7 ^ b6;
-	b7 = (tmp >> 47) | (tmp << (64 - 47));
+	b7 = ror64(tmp, 47);
 	b6 -= b7 + k12;
 	b7 -= k13;
 
 	tmp = b5 ^ b4;
-	b5 = (tmp >> 8) | (tmp << (64 - 8));
+	b5 = ror64(tmp, 8);
 	b4 -= b5 + k10;
 	b5 -= k11;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 13) | (tmp << (64 - 13));
+	b3 = ror64(tmp, 13);
 	b2 -= b3 + k8;
 	b3 -= k9;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 24) | (tmp << (64 - 24));
+	b1 = ror64(tmp, 24);
 	b0 -= b1 + k6;
 	b1 -= k7;
 
 	tmp = b7 ^ b12;
-	b7 = (tmp >> 20) | (tmp << (64 - 20));
+	b7 = ror64(tmp, 20);
 	b12 -= b7;
 
 	tmp = b3 ^ b10;
-	b3 = (tmp >> 37) | (tmp << (64 - 37));
+	b3 = ror64(tmp, 37);
 	b10 -= b3;
 
 	tmp = b5 ^ b8;
-	b5 = (tmp >> 31) | (tmp << (64 - 31));
+	b5 = ror64(tmp, 31);
 	b8 -= b5;
 
 	tmp = b1 ^ b14;
-	b1 = (tmp >> 23) | (tmp << (64 - 23));
+	b1 = ror64(tmp, 23);
 	b14 -= b1;
 
 	tmp = b9 ^ b4;
-	b9 = (tmp >> 52) | (tmp << (64 - 52));
+	b9 = ror64(tmp, 52);
 	b4 -= b9;
 
 	tmp = b13 ^ b6;
-	b13 = (tmp >> 35) | (tmp << (64 - 35));
+	b13 = ror64(tmp, 35);
 	b6 -= b13;
 
 	tmp = b11 ^ b2;
-	b11 = (tmp >> 48) | (tmp << (64 - 48));
+	b11 = ror64(tmp, 48);
 	b2 -= b11;
 
 	tmp = b15 ^ b0;
-	b15 = (tmp >> 9) | (tmp << (64 - 9));
+	b15 = ror64(tmp, 9);
 	b0 -= b15;
 
 	tmp = b9 ^ b10;
-	b9 = (tmp >> 25) | (tmp << (64 - 25));
+	b9 = ror64(tmp, 25);
 	b10 -= b9;
 
 	tmp = b11 ^ b8;
-	b11 = (tmp >> 44) | (tmp << (64 - 44));
+	b11 = ror64(tmp, 44);
 	b8 -= b11;
 
 	tmp = b13 ^ b14;
-	b13 = (tmp >> 42) | (tmp << (64 - 42));
+	b13 = ror64(tmp, 42);
 	b14 -= b13;
 
 	tmp = b15 ^ b12;
-	b15 = (tmp >> 19) | (tmp << (64 - 19));
+	b15 = ror64(tmp, 19);
 	b12 -= b15;
 
 	tmp = b1 ^ b6;
-	b1 = (tmp >> 46) | (tmp << (64 - 46));
+	b1 = ror64(tmp, 46);
 	b6 -= b1;
 
 	tmp = b3 ^ b4;
-	b3 = (tmp >> 47) | (tmp << (64 - 47));
+	b3 = ror64(tmp, 47);
 	b4 -= b3;
 
 	tmp = b5 ^ b2;
-	b5 = (tmp >> 44) | (tmp << (64 - 44));
+	b5 = ror64(tmp, 44);
 	b2 -= b5;
 
 	tmp = b7 ^ b0;
-	b7 = (tmp >> 31) | (tmp << (64 - 31));
+	b7 = ror64(tmp, 31);
 	b0 -= b7;
 
 	tmp = b1 ^ b8;
-	b1 = (tmp >> 41) | (tmp << (64 - 41));
+	b1 = ror64(tmp, 41);
 	b8 -= b1;
 
 	tmp = b5 ^ b14;
-	b5 = (tmp >> 42) | (tmp << (64 - 42));
+	b5 = ror64(tmp, 42);
 	b14 -= b5;
 
 	tmp = b3 ^ b12;
-	b3 = (tmp >> 53) | (tmp << (64 - 53));
+	b3 = ror64(tmp, 53);
 	b12 -= b3;
 
 	tmp = b7 ^ b10;
-	b7 = (tmp >> 4) | (tmp << (64 - 4));
+	b7 = ror64(tmp, 4);
 	b10 -= b7;
 
 	tmp = b15 ^ b4;
-	b15 = (tmp >> 51) | (tmp << (64 - 51));
+	b15 = ror64(tmp, 51);
 	b4 -= b15;
 
 	tmp = b11 ^ b6;
-	b11 = (tmp >> 56) | (tmp << (64 - 56));
+	b11 = ror64(tmp, 56);
 	b6 -= b11;
 
 	tmp = b13 ^ b2;
-	b13 = (tmp >> 34) | (tmp << (64 - 34));
+	b13 = ror64(tmp, 34);
 	b2 -= b13;
 
 	tmp = b9 ^ b0;
-	b9 = (tmp >> 16) | (tmp << (64 - 16));
+	b9 = ror64(tmp, 16);
 	b0 -= b9;
 
 	tmp = b15 ^ b14;
-	b15 = (tmp >> 30) | (tmp << (64 - 30));
+	b15 = ror64(tmp, 30);
 	b14 -= b15 + k2 + t0;
 	b15 -= k3 + 5;
 
 	tmp = b13 ^ b12;
-	b13 = (tmp >> 44) | (tmp << (64 - 44));
+	b13 = ror64(tmp, 44);
 	b12 -= b13 + k0;
 	b13 -= k1 + t2;
 
 	tmp = b11 ^ b10;
-	b11 = (tmp >> 47) | (tmp << (64 - 47));
+	b11 = ror64(tmp, 47);
 	b10 -= b11 + k15;
 	b11 -= k16;
 
 	tmp = b9 ^ b8;
-	b9 = (tmp >> 12) | (tmp << (64 - 12));
+	b9 = ror64(tmp, 12);
 	b8 -= b9 + k13;
 	b9 -= k14;
 
 	tmp = b7 ^ b6;
-	b7 = (tmp >> 31) | (tmp << (64 - 31));
+	b7 = ror64(tmp, 31);
 	b6 -= b7 + k11;
 	b7 -= k12;
 
 	tmp = b5 ^ b4;
-	b5 = (tmp >> 37) | (tmp << (64 - 37));
+	b5 = ror64(tmp, 37);
 	b4 -= b5 + k9;
 	b5 -= k10;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 9) | (tmp << (64 - 9));
+	b3 = ror64(tmp, 9);
 	b2 -= b3 + k7;
 	b3 -= k8;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 41) | (tmp << (64 - 41));
+	b1 = ror64(tmp, 41);
 	b0 -= b1 + k5;
 	b1 -= k6;
 
 	tmp = b7 ^ b12;
-	b7 = (tmp >> 25) | (tmp << (64 - 25));
+	b7 = ror64(tmp, 25);
 	b12 -= b7;
 
 	tmp = b3 ^ b10;
-	b3 = (tmp >> 16) | (tmp << (64 - 16));
+	b3 = ror64(tmp, 16);
 	b10 -= b3;
 
 	tmp = b5 ^ b8;
-	b5 = (tmp >> 28) | (tmp << (64 - 28));
+	b5 = ror64(tmp, 28);
 	b8 -= b5;
 
 	tmp = b1 ^ b14;
-	b1 = (tmp >> 47) | (tmp << (64 - 47));
+	b1 = ror64(tmp, 47);
 	b14 -= b1;
 
 	tmp = b9 ^ b4;
-	b9 = (tmp >> 41) | (tmp << (64 - 41));
+	b9 = ror64(tmp, 41);
 	b4 -= b9;
 
 	tmp = b13 ^ b6;
-	b13 = (tmp >> 48) | (tmp << (64 - 48));
+	b13 = ror64(tmp, 48);
 	b6 -= b13;
 
 	tmp = b11 ^ b2;
-	b11 = (tmp >> 20) | (tmp << (64 - 20));
+	b11 = ror64(tmp, 20);
 	b2 -= b11;
 
 	tmp = b15 ^ b0;
-	b15 = (tmp >> 5) | (tmp << (64 - 5));
+	b15 = ror64(tmp, 5);
 	b0 -= b15;
 
 	tmp = b9 ^ b10;
-	b9 = (tmp >> 17) | (tmp << (64 - 17));
+	b9 = ror64(tmp, 17);
 	b10 -= b9;
 
 	tmp = b11 ^ b8;
-	b11 = (tmp >> 59) | (tmp << (64 - 59));
+	b11 = ror64(tmp, 59);
 	b8 -= b11;
 
 	tmp = b13 ^ b14;
-	b13 = (tmp >> 41) | (tmp << (64 - 41));
+	b13 = ror64(tmp, 41);
 	b14 -= b13;
 
 	tmp = b15 ^ b12;
-	b15 = (tmp >> 34) | (tmp << (64 - 34));
+	b15 = ror64(tmp, 34);
 	b12 -= b15;
 
 	tmp = b1 ^ b6;
-	b1 = (tmp >> 13) | (tmp << (64 - 13));
+	b1 = ror64(tmp, 13);
 	b6 -= b1;
 
 	tmp = b3 ^ b4;
-	b3 = (tmp >> 51) | (tmp << (64 - 51));
+	b3 = ror64(tmp, 51);
 	b4 -= b3;
 
 	tmp = b5 ^ b2;
-	b5 = (tmp >> 4) | (tmp << (64 - 4));
+	b5 = ror64(tmp, 4);
 	b2 -= b5;
 
 	tmp = b7 ^ b0;
-	b7 = (tmp >> 33) | (tmp << (64 - 33));
+	b7 = ror64(tmp, 33);
 	b0 -= b7;
 
 	tmp = b1 ^ b8;
-	b1 = (tmp >> 52) | (tmp << (64 - 52));
+	b1 = ror64(tmp, 52);
 	b8 -= b1;
 
 	tmp = b5 ^ b14;
-	b5 = (tmp >> 23) | (tmp << (64 - 23));
+	b5 = ror64(tmp, 23);
 	b14 -= b5;
 
 	tmp = b3 ^ b12;
-	b3 = (tmp >> 18) | (tmp << (64 - 18));
+	b3 = ror64(tmp, 18);
 	b12 -= b3;
 
 	tmp = b7 ^ b10;
-	b7 = (tmp >> 49) | (tmp << (64 - 49));
+	b7 = ror64(tmp, 49);
 	b10 -= b7;
 
 	tmp = b15 ^ b4;
-	b15 = (tmp >> 55) | (tmp << (64 - 55));
+	b15 = ror64(tmp, 55);
 	b4 -= b15;
 
 	tmp = b11 ^ b6;
-	b11 = (tmp >> 10) | (tmp << (64 - 10));
+	b11 = ror64(tmp, 10);
 	b6 -= b11;
 
 	tmp = b13 ^ b2;
-	b13 = (tmp >> 19) | (tmp << (64 - 19));
+	b13 = ror64(tmp, 19);
 	b2 -= b13;
 
 	tmp = b9 ^ b0;
-	b9 = (tmp >> 38) | (tmp << (64 - 38));
+	b9 = ror64(tmp, 38);
 	b0 -= b9;
 
 	tmp = b15 ^ b14;
-	b15 = (tmp >> 37) | (tmp << (64 - 37));
+	b15 = ror64(tmp, 37);
 	b14 -= b15 + k1 + t2;
 	b15 -= k2 + 4;
 
 	tmp = b13 ^ b12;
-	b13 = (tmp >> 22) | (tmp << (64 - 22));
+	b13 = ror64(tmp, 22);
 	b12 -= b13 + k16;
 	b13 -= k0 + t1;
 
 	tmp = b11 ^ b10;
-	b11 = (tmp >> 17) | (tmp << (64 - 17));
+	b11 = ror64(tmp, 17);
 	b10 -= b11 + k14;
 	b11 -= k15;
 
 	tmp = b9 ^ b8;
-	b9 = (tmp >> 8) | (tmp << (64 - 8));
+	b9 = ror64(tmp, 8);
 	b8 -= b9 + k12;
 	b9 -= k13;
 
 	tmp = b7 ^ b6;
-	b7 = (tmp >> 47) | (tmp << (64 - 47));
+	b7 = ror64(tmp, 47);
 	b6 -= b7 + k10;
 	b7 -= k11;
 
 	tmp = b5 ^ b4;
-	b5 = (tmp >> 8) | (tmp << (64 - 8));
+	b5 = ror64(tmp, 8);
 	b4 -= b5 + k8;
 	b5 -= k9;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 13) | (tmp << (64 - 13));
+	b3 = ror64(tmp, 13);
 	b2 -= b3 + k6;
 	b3 -= k7;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 24) | (tmp << (64 - 24));
+	b1 = ror64(tmp, 24);
 	b0 -= b1 + k4;
 	b1 -= k5;
 
 	tmp = b7 ^ b12;
-	b7 = (tmp >> 20) | (tmp << (64 - 20));
+	b7 = ror64(tmp, 20);
 	b12 -= b7;
 
 	tmp = b3 ^ b10;
-	b3 = (tmp >> 37) | (tmp << (64 - 37));
+	b3 = ror64(tmp, 37);
 	b10 -= b3;
 
 	tmp = b5 ^ b8;
-	b5 = (tmp >> 31) | (tmp << (64 - 31));
+	b5 = ror64(tmp, 31);
 	b8 -= b5;
 
 	tmp = b1 ^ b14;
-	b1 = (tmp >> 23) | (tmp << (64 - 23));
+	b1 = ror64(tmp, 23);
 	b14 -= b1;
 
 	tmp = b9 ^ b4;
-	b9 = (tmp >> 52) | (tmp << (64 - 52));
+	b9 = ror64(tmp, 52);
 	b4 -= b9;
 
 	tmp = b13 ^ b6;
-	b13 = (tmp >> 35) | (tmp << (64 - 35));
+	b13 = ror64(tmp, 35);
 	b6 -= b13;
 
 	tmp = b11 ^ b2;
-	b11 = (tmp >> 48) | (tmp << (64 - 48));
+	b11 = ror64(tmp, 48);
 	b2 -= b11;
 
 	tmp = b15 ^ b0;
-	b15 = (tmp >> 9) | (tmp << (64 - 9));
+	b15 = ror64(tmp, 9);
 	b0 -= b15;
 
 	tmp = b9 ^ b10;
-	b9 = (tmp >> 25) | (tmp << (64 - 25));
+	b9 = ror64(tmp, 25);
 	b10 -= b9;
 
 	tmp = b11 ^ b8;
-	b11 = (tmp >> 44) | (tmp << (64 - 44));
+	b11 = ror64(tmp, 44);
 	b8 -= b11;
 
 	tmp = b13 ^ b14;
-	b13 = (tmp >> 42) | (tmp << (64 - 42));
+	b13 = ror64(tmp, 42);
 	b14 -= b13;
 
 	tmp = b15 ^ b12;
-	b15 = (tmp >> 19) | (tmp << (64 - 19));
+	b15 = ror64(tmp, 19);
 	b12 -= b15;
 
 	tmp = b1 ^ b6;
-	b1 = (tmp >> 46) | (tmp << (64 - 46));
+	b1 = ror64(tmp, 46);
 	b6 -= b1;
 
 	tmp = b3 ^ b4;
-	b3 = (tmp >> 47) | (tmp << (64 - 47));
+	b3 = ror64(tmp, 47);
 	b4 -= b3;
 
 	tmp = b5 ^ b2;
-	b5 = (tmp >> 44) | (tmp << (64 - 44));
+	b5 = ror64(tmp, 44);
 	b2 -= b5;
 
 	tmp = b7 ^ b0;
-	b7 = (tmp >> 31) | (tmp << (64 - 31));
+	b7 = ror64(tmp, 31);
 	b0 -= b7;
 
 	tmp = b1 ^ b8;
-	b1 = (tmp >> 41) | (tmp << (64 - 41));
+	b1 = ror64(tmp, 41);
 	b8 -= b1;
 
 	tmp = b5 ^ b14;
-	b5 = (tmp >> 42) | (tmp << (64 - 42));
+	b5 = ror64(tmp, 42);
 	b14 -= b5;
 
 	tmp = b3 ^ b12;
-	b3 = (tmp >> 53) | (tmp << (64 - 53));
+	b3 = ror64(tmp, 53);
 	b12 -= b3;
 
 	tmp = b7 ^ b10;
-	b7 = (tmp >> 4) | (tmp << (64 - 4));
+	b7 = ror64(tmp, 4);
 	b10 -= b7;
 
 	tmp = b15 ^ b4;
-	b15 = (tmp >> 51) | (tmp << (64 - 51));
+	b15 = ror64(tmp, 51);
 	b4 -= b15;
 
 	tmp = b11 ^ b6;
-	b11 = (tmp >> 56) | (tmp << (64 - 56));
+	b11 = ror64(tmp, 56);
 	b6 -= b11;
 
 	tmp = b13 ^ b2;
-	b13 = (tmp >> 34) | (tmp << (64 - 34));
+	b13 = ror64(tmp, 34);
 	b2 -= b13;
 
 	tmp = b9 ^ b0;
-	b9 = (tmp >> 16) | (tmp << (64 - 16));
+	b9 = ror64(tmp, 16);
 	b0 -= b9;
 
 	tmp = b15 ^ b14;
-	b15 = (tmp >> 30) | (tmp << (64 - 30));
+	b15 = ror64(tmp, 30);
 	b14 -= b15 + k0 + t1;
 	b15 -= k1 + 3;
 
 	tmp = b13 ^ b12;
-	b13 = (tmp >> 44) | (tmp << (64 - 44));
+	b13 = ror64(tmp, 44);
 	b12 -= b13 + k15;
 	b13 -= k16 + t0;
 
 	tmp = b11 ^ b10;
-	b11 = (tmp >> 47) | (tmp << (64 - 47));
+	b11 = ror64(tmp, 47);
 	b10 -= b11 + k13;
 	b11 -= k14;
 
 	tmp = b9 ^ b8;
-	b9 = (tmp >> 12) | (tmp << (64 - 12));
+	b9 = ror64(tmp, 12);
 	b8 -= b9 + k11;
 	b9 -= k12;
 
 	tmp = b7 ^ b6;
-	b7 = (tmp >> 31) | (tmp << (64 - 31));
+	b7 = ror64(tmp, 31);
 	b6 -= b7 + k9;
 	b7 -= k10;
 
 	tmp = b5 ^ b4;
-	b5 = (tmp >> 37) | (tmp << (64 - 37));
+	b5 = ror64(tmp, 37);
 	b4 -= b5 + k7;
 	b5 -= k8;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 9) | (tmp << (64 - 9));
+	b3 = ror64(tmp, 9);
 	b2 -= b3 + k5;
 	b3 -= k6;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 41) | (tmp << (64 - 41));
+	b1 = ror64(tmp, 41);
 	b0 -= b1 + k3;
 	b1 -= k4;
 
 	tmp = b7 ^ b12;
-	b7 = (tmp >> 25) | (tmp << (64 - 25));
+	b7 = ror64(tmp, 25);
 	b12 -= b7;
 
 	tmp = b3 ^ b10;
-	b3 = (tmp >> 16) | (tmp << (64 - 16));
+	b3 = ror64(tmp, 16);
 	b10 -= b3;
 
 	tmp = b5 ^ b8;
-	b5 = (tmp >> 28) | (tmp << (64 - 28));
+	b5 = ror64(tmp, 28);
 	b8 -= b5;
 
 	tmp = b1 ^ b14;
-	b1 = (tmp >> 47) | (tmp << (64 - 47));
+	b1 = ror64(tmp, 47);
 	b14 -= b1;
 
 	tmp = b9 ^ b4;
-	b9 = (tmp >> 41) | (tmp << (64 - 41));
+	b9 = ror64(tmp, 41);
 	b4 -= b9;
 
 	tmp = b13 ^ b6;
-	b13 = (tmp >> 48) | (tmp << (64 - 48));
+	b13 = ror64(tmp, 48);
 	b6 -= b13;
 
 	tmp = b11 ^ b2;
-	b11 = (tmp >> 20) | (tmp << (64 - 20));
+	b11 = ror64(tmp, 20);
 	b2 -= b11;
 
 	tmp = b15 ^ b0;
-	b15 = (tmp >> 5) | (tmp << (64 - 5));
+	b15 = ror64(tmp, 5);
 	b0 -= b15;
 
 	tmp = b9 ^ b10;
-	b9 = (tmp >> 17) | (tmp << (64 - 17));
+	b9 = ror64(tmp, 17);
 	b10 -= b9;
 
 	tmp = b11 ^ b8;
-	b11 = (tmp >> 59) | (tmp << (64 - 59));
+	b11 = ror64(tmp, 59);
 	b8 -= b11;
 
 	tmp = b13 ^ b14;
-	b13 = (tmp >> 41) | (tmp << (64 - 41));
+	b13 = ror64(tmp, 41);
 	b14 -= b13;
 
 	tmp = b15 ^ b12;
-	b15 = (tmp >> 34) | (tmp << (64 - 34));
+	b15 = ror64(tmp, 34);
 	b12 -= b15;
 
 	tmp = b1 ^ b6;
-	b1 = (tmp >> 13) | (tmp << (64 - 13));
+	b1 = ror64(tmp, 13);
 	b6 -= b1;
 
 	tmp = b3 ^ b4;
-	b3 = (tmp >> 51) | (tmp << (64 - 51));
+	b3 = ror64(tmp, 51);
 	b4 -= b3;
 
 	tmp = b5 ^ b2;
-	b5 = (tmp >> 4) | (tmp << (64 - 4));
+	b5 = ror64(tmp, 4);
 	b2 -= b5;
 
 	tmp = b7 ^ b0;
-	b7 = (tmp >> 33) | (tmp << (64 - 33));
+	b7 = ror64(tmp, 33);
 	b0 -= b7;
 
 	tmp = b1 ^ b8;
-	b1 = (tmp >> 52) | (tmp << (64 - 52));
+	b1 = ror64(tmp, 52);
 	b8 -= b1;
 
 	tmp = b5 ^ b14;
-	b5 = (tmp >> 23) | (tmp << (64 - 23));
+	b5 = ror64(tmp, 23);
 	b14 -= b5;
 
 	tmp = b3 ^ b12;
-	b3 = (tmp >> 18) | (tmp << (64 - 18));
+	b3 = ror64(tmp, 18);
 	b12 -= b3;
 
 	tmp = b7 ^ b10;
-	b7 = (tmp >> 49) | (tmp << (64 - 49));
+	b7 = ror64(tmp, 49);
 	b10 -= b7;
 
 	tmp = b15 ^ b4;
-	b15 = (tmp >> 55) | (tmp << (64 - 55));
+	b15 = ror64(tmp, 55);
 	b4 -= b15;
 
 	tmp = b11 ^ b6;
-	b11 = (tmp >> 10) | (tmp << (64 - 10));
+	b11 = ror64(tmp, 10);
 	b6 -= b11;
 
 	tmp = b13 ^ b2;
-	b13 = (tmp >> 19) | (tmp << (64 - 19));
+	b13 = ror64(tmp, 19);
 	b2 -= b13;
 
 	tmp = b9 ^ b0;
-	b9 = (tmp >> 38) | (tmp << (64 - 38));
+	b9 = ror64(tmp, 38);
 	b0 -= b9;
 
 	tmp = b15 ^ b14;
-	b15 = (tmp >> 37) | (tmp << (64 - 37));
+	b15 = ror64(tmp, 37);
 	b14 -= b15 + k16 + t0;
 	b15 -= k0 + 2;
 
 	tmp = b13 ^ b12;
-	b13 = (tmp >> 22) | (tmp << (64 - 22));
+	b13 = ror64(tmp, 22);
 	b12 -= b13 + k14;
 	b13 -= k15 + t2;
 
 	tmp = b11 ^ b10;
-	b11 = (tmp >> 17) | (tmp << (64 - 17));
+	b11 = ror64(tmp, 17);
 	b10 -= b11 + k12;
 	b11 -= k13;
 
 	tmp = b9 ^ b8;
-	b9 = (tmp >> 8) | (tmp << (64 - 8));
+	b9 = ror64(tmp, 8);
 	b8 -= b9 + k10;
 	b9 -= k11;
 
 	tmp = b7 ^ b6;
-	b7 = (tmp >> 47) | (tmp << (64 - 47));
+	b7 = ror64(tmp, 47);
 	b6 -= b7 + k8;
 	b7 -= k9;
 
 	tmp = b5 ^ b4;
-	b5 = (tmp >> 8) | (tmp << (64 - 8));
+	b5 = ror64(tmp, 8);
 	b4 -= b5 + k6;
 	b5 -= k7;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 13) | (tmp << (64 - 13));
+	b3 = ror64(tmp, 13);
 	b2 -= b3 + k4;
 	b3 -= k5;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 24) | (tmp << (64 - 24));
+	b1 = ror64(tmp, 24);
 	b0 -= b1 + k2;
 	b1 -= k3;
 
 	tmp = b7 ^ b12;
-	b7 = (tmp >> 20) | (tmp << (64 - 20));
+	b7 = ror64(tmp, 20);
 	b12 -= b7;
 
 	tmp = b3 ^ b10;
-	b3 = (tmp >> 37) | (tmp << (64 - 37));
+	b3 = ror64(tmp, 37);
 	b10 -= b3;
 
 	tmp = b5 ^ b8;
-	b5 = (tmp >> 31) | (tmp << (64 - 31));
+	b5 = ror64(tmp, 31);
 	b8 -= b5;
 
 	tmp = b1 ^ b14;
-	b1 = (tmp >> 23) | (tmp << (64 - 23));
+	b1 = ror64(tmp, 23);
 	b14 -= b1;
 
 	tmp = b9 ^ b4;
-	b9 = (tmp >> 52) | (tmp << (64 - 52));
+	b9 = ror64(tmp, 52);
 	b4 -= b9;
 
 	tmp = b13 ^ b6;
-	b13 = (tmp >> 35) | (tmp << (64 - 35));
+	b13 = ror64(tmp, 35);
 	b6 -= b13;
 
 	tmp = b11 ^ b2;
-	b11 = (tmp >> 48) | (tmp << (64 - 48));
+	b11 = ror64(tmp, 48);
 	b2 -= b11;
 
 	tmp = b15 ^ b0;
-	b15 = (tmp >> 9) | (tmp << (64 - 9));
+	b15 = ror64(tmp, 9);
 	b0 -= b15;
 
 	tmp = b9 ^ b10;
-	b9 = (tmp >> 25) | (tmp << (64 - 25));
+	b9 = ror64(tmp, 25);
 	b10 -= b9;
 
 	tmp = b11 ^ b8;
-	b11 = (tmp >> 44) | (tmp << (64 - 44));
+	b11 = ror64(tmp, 44);
 	b8 -= b11;
 
 	tmp = b13 ^ b14;
-	b13 = (tmp >> 42) | (tmp << (64 - 42));
+	b13 = ror64(tmp, 42);
 	b14 -= b13;
 
 	tmp = b15 ^ b12;
-	b15 = (tmp >> 19) | (tmp << (64 - 19));
+	b15 = ror64(tmp, 19);
 	b12 -= b15;
 
 	tmp = b1 ^ b6;
-	b1 = (tmp >> 46) | (tmp << (64 - 46));
+	b1 = ror64(tmp, 46);
 	b6 -= b1;
 
 	tmp = b3 ^ b4;
-	b3 = (tmp >> 47) | (tmp << (64 - 47));
+	b3 = ror64(tmp, 47);
 	b4 -= b3;
 
 	tmp = b5 ^ b2;
-	b5 = (tmp >> 44) | (tmp << (64 - 44));
+	b5 = ror64(tmp, 44);
 	b2 -= b5;
 
 	tmp = b7 ^ b0;
-	b7 = (tmp >> 31) | (tmp << (64 - 31));
+	b7 = ror64(tmp, 31);
 	b0 -= b7;
 
 	tmp = b1 ^ b8;
-	b1 = (tmp >> 41) | (tmp << (64 - 41));
+	b1 = ror64(tmp, 41);
 	b8 -= b1;
 
 	tmp = b5 ^ b14;
-	b5 = (tmp >> 42) | (tmp << (64 - 42));
+	b5 = ror64(tmp, 42);
 	b14 -= b5;
 
 	tmp = b3 ^ b12;
-	b3 = (tmp >> 53) | (tmp << (64 - 53));
+	b3 = ror64(tmp, 53);
 	b12 -= b3;
 
 	tmp = b7 ^ b10;
-	b7 = (tmp >> 4) | (tmp << (64 - 4));
+	b7 = ror64(tmp, 4);
 	b10 -= b7;
 
 	tmp = b15 ^ b4;
-	b15 = (tmp >> 51) | (tmp << (64 - 51));
+	b15 = ror64(tmp, 51);
 	b4 -= b15;
 
 	tmp = b11 ^ b6;
-	b11 = (tmp >> 56) | (tmp << (64 - 56));
+	b11 = ror64(tmp, 56);
 	b6 -= b11;
 
 	tmp = b13 ^ b2;
-	b13 = (tmp >> 34) | (tmp << (64 - 34));
+	b13 = ror64(tmp, 34);
 	b2 -= b13;
 
 	tmp = b9 ^ b0;
-	b9 = (tmp >> 16) | (tmp << (64 - 16));
+	b9 = ror64(tmp, 16);
 	b0 -= b9;
 
 	tmp = b15 ^ b14;
-	b15 = (tmp >> 30) | (tmp << (64 - 30));
+	b15 = ror64(tmp, 30);
 	b14 -= b15 + k15 + t2;
 	b15 -= k16 + 1;
 
 	tmp = b13 ^ b12;
-	b13 = (tmp >> 44) | (tmp << (64 - 44));
+	b13 = ror64(tmp, 44);
 	b12 -= b13 + k13;
 	b13 -= k14 + t1;
 
 	tmp = b11 ^ b10;
-	b11 = (tmp >> 47) | (tmp << (64 - 47));
+	b11 = ror64(tmp, 47);
 	b10 -= b11 + k11;
 	b11 -= k12;
 
 	tmp = b9 ^ b8;
-	b9 = (tmp >> 12) | (tmp << (64 - 12));
+	b9 = ror64(tmp, 12);
 	b8 -= b9 + k9;
 	b9 -= k10;
 
 	tmp = b7 ^ b6;
-	b7 = (tmp >> 31) | (tmp << (64 - 31));
+	b7 = ror64(tmp, 31);
 	b6 -= b7 + k7;
 	b7 -= k8;
 
 	tmp = b5 ^ b4;
-	b5 = (tmp >> 37) | (tmp << (64 - 37));
+	b5 = ror64(tmp, 37);
 	b4 -= b5 + k5;
 	b5 -= k6;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 9) | (tmp << (64 - 9));
+	b3 = ror64(tmp, 9);
 	b2 -= b3 + k3;
 	b3 -= k4;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 41) | (tmp << (64 - 41));
+	b1 = ror64(tmp, 41);
 	b0 -= b1 + k1;
 	b1 -= k2;
 
 	tmp = b7 ^ b12;
-	b7 = (tmp >> 25) | (tmp << (64 - 25));
+	b7 = ror64(tmp, 25);
 	b12 -= b7;
 
 	tmp = b3 ^ b10;
-	b3 = (tmp >> 16) | (tmp << (64 - 16));
+	b3 = ror64(tmp, 16);
 	b10 -= b3;
 
 	tmp = b5 ^ b8;
-	b5 = (tmp >> 28) | (tmp << (64 - 28));
+	b5 = ror64(tmp, 28);
 	b8 -= b5;
 
 	tmp = b1 ^ b14;
-	b1 = (tmp >> 47) | (tmp << (64 - 47));
+	b1 = ror64(tmp, 47);
 	b14 -= b1;
 
 	tmp = b9 ^ b4;
-	b9 = (tmp >> 41) | (tmp << (64 - 41));
+	b9 = ror64(tmp, 41);
 	b4 -= b9;
 
 	tmp = b13 ^ b6;
-	b13 = (tmp >> 48) | (tmp << (64 - 48));
+	b13 = ror64(tmp, 48);
 	b6 -= b13;
 
 	tmp = b11 ^ b2;
-	b11 = (tmp >> 20) | (tmp << (64 - 20));
+	b11 = ror64(tmp, 20);
 	b2 -= b11;
 
 	tmp = b15 ^ b0;
-	b15 = (tmp >> 5) | (tmp << (64 - 5));
+	b15 = ror64(tmp, 5);
 	b0 -= b15;
 
 	tmp = b9 ^ b10;
-	b9 = (tmp >> 17) | (tmp << (64 - 17));
+	b9 = ror64(tmp, 17);
 	b10 -= b9;
 
 	tmp = b11 ^ b8;
-	b11 = (tmp >> 59) | (tmp << (64 - 59));
+	b11 = ror64(tmp, 59);
 	b8 -= b11;
 
 	tmp = b13 ^ b14;
-	b13 = (tmp >> 41) | (tmp << (64 - 41));
+	b13 = ror64(tmp, 41);
 	b14 -= b13;
 
 	tmp = b15 ^ b12;
-	b15 = (tmp >> 34) | (tmp << (64 - 34));
+	b15 = ror64(tmp, 34);
 	b12 -= b15;
 
 	tmp = b1 ^ b6;
-	b1 = (tmp >> 13) | (tmp << (64 - 13));
+	b1 = ror64(tmp, 13);
 	b6 -= b1;
 
 	tmp = b3 ^ b4;
-	b3 = (tmp >> 51) | (tmp << (64 - 51));
+	b3 = ror64(tmp, 51);
 	b4 -= b3;
 
 	tmp = b5 ^ b2;
-	b5 = (tmp >> 4) | (tmp << (64 - 4));
+	b5 = ror64(tmp, 4);
 	b2 -= b5;
 
 	tmp = b7 ^ b0;
-	b7 = (tmp >> 33) | (tmp << (64 - 33));
+	b7 = ror64(tmp, 33);
 	b0 -= b7;
 
 	tmp = b1 ^ b8;
-	b1 = (tmp >> 52) | (tmp << (64 - 52));
+	b1 = ror64(tmp, 52);
 	b8 -= b1;
 
 	tmp = b5 ^ b14;
-	b5 = (tmp >> 23) | (tmp << (64 - 23));
+	b5 = ror64(tmp, 23);
 	b14 -= b5;
 
 	tmp = b3 ^ b12;
-	b3 = (tmp >> 18) | (tmp << (64 - 18));
+	b3 = ror64(tmp, 18);
 	b12 -= b3;
 
 	tmp = b7 ^ b10;
-	b7 = (tmp >> 49) | (tmp << (64 - 49));
+	b7 = ror64(tmp, 49);
 	b10 -= b7;
 
 	tmp = b15 ^ b4;
-	b15 = (tmp >> 55) | (tmp << (64 - 55));
+	b15 = ror64(tmp, 55);
 	b4 -= b15;
 
 	tmp = b11 ^ b6;
-	b11 = (tmp >> 10) | (tmp << (64 - 10));
+	b11 = ror64(tmp, 10);
 	b6 -= b11;
 
 	tmp = b13 ^ b2;
-	b13 = (tmp >> 19) | (tmp << (64 - 19));
+	b13 = ror64(tmp, 19);
 	b2 -= b13;
 
 	tmp = b9 ^ b0;
-	b9 = (tmp >> 38) | (tmp << (64 - 38));
+	b9 = ror64(tmp, 38);
 	b0 -= b9;
 
 	tmp = b15 ^ b14;
-	b15 = (tmp >> 37) | (tmp << (64 - 37));
+	b15 = ror64(tmp, 37);
 	b14 -= b15 + k14 + t1;
 	b15 -= k15;
 
 	tmp = b13 ^ b12;
-	b13 = (tmp >> 22) | (tmp << (64 - 22));
+	b13 = ror64(tmp, 22);
 	b12 -= b13 + k12;
 	b13 -= k13 + t0;
 
 	tmp = b11 ^ b10;
-	b11 = (tmp >> 17) | (tmp << (64 - 17));
+	b11 = ror64(tmp, 17);
 	b10 -= b11 + k10;
 	b11 -= k11;
 
 	tmp = b9 ^ b8;
-	b9 = (tmp >> 8) | (tmp << (64 - 8));
+	b9 = ror64(tmp, 8);
 	b8 -= b9 + k8;
 	b9 -= k9;
 
 	tmp = b7 ^ b6;
-	b7 = (tmp >> 47) | (tmp << (64 - 47));
+	b7 = ror64(tmp, 47);
 	b6 -= b7 + k6;
 	b7 -= k7;
 
 	tmp = b5 ^ b4;
-	b5 = (tmp >> 8) | (tmp << (64 - 8));
+	b5 = ror64(tmp, 8);
 	b4 -= b5 + k4;
 	b5 -= k5;
 
 	tmp = b3 ^ b2;
-	b3 = (tmp >> 13) | (tmp << (64 - 13));
+	b3 = ror64(tmp, 13);
 	b2 -= b3 + k2;
 	b3 -= k3;
 
 	tmp = b1 ^ b0;
-	b1 = (tmp >> 24) | (tmp << (64 - 24));
+	b1 = ror64(tmp, 24);
 	b0 -= b1 + k0;
 	b1 -= k1;
 
-- 
2.6.3.368.gf34be46

_______________________________________________
devel mailing list
devel@xxxxxxxxxxxxxxxxxxxxxx
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel




[Index of Archives]     [Linux Driver Backports]     [DMA Engine]     [Linux GPIO]     [Linux SPI]     [Video for Linux]     [Linux USB Devel]     [Linux Coverity]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [Yosemite Backpacking]
  Powered by Linux