On Mon, 2018-08-27 at 15:28 -0700, Tim Chen wrote: > On 08/22/2018 01:51 AM, Ard Biesheuvel wrote: > > As it turns out, the AVX2 multibuffer SHA routines are currently > > broken [0], in a way that would have likely been noticed if this > > code were in wide use. Since the code is too complicated to be > > maintained by anyone except the original authors, and since the > > performance benefits for real-world use cases are debatable to > > begin with, it is better to drop it entirely for the moment. > > > > [0] https://marc.info/?l=linux-crypto-vger&m=153476243825350&w=2 > > Sorry I was out of the loop for a while and haven't been following > the code too closely. > > Megha is maintaining the code now. Before we pull the code, > please give us a chance to fix it first. > > Thanks. > > Tim > Hi, I am working to find a fix for these corner cases. If possible, we would like to fix the issues instead of removing the code altogether. -Megha > > > > Suggested-by: Eric Biggers <ebiggers@xxxxxxxxxx> > > Cc: Megha Dey <megha.dey@xxxxxxxxxxxxxxx> > > Cc: Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx> > > Cc: Geert Uytterhoeven <geert@xxxxxxxxxxxxxx> > > Cc: Martin Schwidefsky <schwidefsky@xxxxxxxxxx> > > Cc: Heiko Carstens <heiko.carstens@xxxxxxxxxx> > > Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> > > Cc: Ingo Molnar <mingo@xxxxxxxxxx> > > Signed-off-by: Ard Biesheuvel <ard.biesheuvel@xxxxxxxxxx> > > --- > > MAINTAINERS | 8 - > > arch/m68k/configs/amiga_defconfig | 1 - > > arch/m68k/configs/apollo_defconfig | 1 - > > arch/m68k/configs/atari_defconfig | 1 - > > arch/m68k/configs/bvme6000_defconfig | 1 - > > arch/m68k/configs/hp300_defconfig | 1 - > > arch/m68k/configs/mac_defconfig | 1 - > > arch/m68k/configs/multi_defconfig | 1 - > > arch/m68k/configs/mvme147_defconfig | 1 - > > arch/m68k/configs/mvme16x_defconfig | 1 - > > arch/m68k/configs/q40_defconfig | 1 - > > arch/m68k/configs/sun3_defconfig | 1 - > > arch/m68k/configs/sun3x_defconfig | 1 - > > arch/s390/configs/debug_defconfig | 1 - > > arch/s390/configs/performance_defconfig | 1 - > > arch/x86/crypto/Makefile | 3 - > > arch/x86/crypto/sha1-mb/Makefile | 14 - > > arch/x86/crypto/sha1-mb/sha1_mb.c | 1011 ---------------- > > arch/x86/crypto/sha1-mb/sha1_mb_ctx.h | 134 --- > > arch/x86/crypto/sha1-mb/sha1_mb_mgr.h | 110 -- > > .../crypto/sha1-mb/sha1_mb_mgr_datastruct.S | 287 ----- > > .../crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S | 304 ----- > > .../crypto/sha1-mb/sha1_mb_mgr_init_avx2.c | 64 - > > .../crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S | 209 ---- > > arch/x86/crypto/sha1-mb/sha1_x8_avx2.S | 492 -------- > > arch/x86/crypto/sha256-mb/Makefile | 14 - > > arch/x86/crypto/sha256-mb/sha256_mb.c | 1013 ---------------- > > arch/x86/crypto/sha256-mb/sha256_mb_ctx.h | 134 --- > > arch/x86/crypto/sha256-mb/sha256_mb_mgr.h | 108 -- > > .../sha256-mb/sha256_mb_mgr_datastruct.S | 304 ----- > > .../sha256-mb/sha256_mb_mgr_flush_avx2.S | 307 ----- > > .../sha256-mb/sha256_mb_mgr_init_avx2.c | 65 - > > .../sha256-mb/sha256_mb_mgr_submit_avx2.S | 214 ---- > > arch/x86/crypto/sha256-mb/sha256_x8_avx2.S | 598 ---------- > > arch/x86/crypto/sha512-mb/Makefile | 12 - > > arch/x86/crypto/sha512-mb/sha512_mb.c | 1047 ----------------- > > arch/x86/crypto/sha512-mb/sha512_mb_ctx.h | 128 -- > > arch/x86/crypto/sha512-mb/sha512_mb_mgr.h | 104 -- > > .../sha512-mb/sha512_mb_mgr_datastruct.S | 281 ----- > > .../sha512-mb/sha512_mb_mgr_flush_avx2.S | 297 ----- > > .../sha512-mb/sha512_mb_mgr_init_avx2.c | 69 -- > > .../sha512-mb/sha512_mb_mgr_submit_avx2.S | 224 ---- > > arch/x86/crypto/sha512-mb/sha512_x4_avx2.S | 531 --------- > > crypto/Kconfig | 62 - > > crypto/Makefile | 1 - > > crypto/mcryptd.c | 675 ----------- > > include/crypto/mcryptd.h | 114 -- > > 47 files changed, 8952 deletions(-) > > delete mode 100644 arch/x86/crypto/sha1-mb/Makefile > > delete mode 100644 arch/x86/crypto/sha1-mb/sha1_mb.c > > delete mode 100644 arch/x86/crypto/sha1-mb/sha1_mb_ctx.h > > delete mode 100644 arch/x86/crypto/sha1-mb/sha1_mb_mgr.h > > delete mode 100644 arch/x86/crypto/sha1-mb/sha1_mb_mgr_datastruct.S > > delete mode 100644 arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S > > delete mode 100644 arch/x86/crypto/sha1-mb/sha1_mb_mgr_init_avx2.c > > delete mode 100644 arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S > > delete mode 100644 arch/x86/crypto/sha1-mb/sha1_x8_avx2.S > > delete mode 100644 arch/x86/crypto/sha256-mb/Makefile > > delete mode 100644 arch/x86/crypto/sha256-mb/sha256_mb.c > > delete mode 100644 arch/x86/crypto/sha256-mb/sha256_mb_ctx.h > > delete mode 100644 arch/x86/crypto/sha256-mb/sha256_mb_mgr.h > > delete mode 100644 arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S > > delete mode 100644 arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S > > delete mode 100644 arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c > > delete mode 100644 arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S > > delete mode 100644 arch/x86/crypto/sha256-mb/sha256_x8_avx2.S > > delete mode 100644 arch/x86/crypto/sha512-mb/Makefile > > delete mode 100644 arch/x86/crypto/sha512-mb/sha512_mb.c > > delete mode 100644 arch/x86/crypto/sha512-mb/sha512_mb_ctx.h > > delete mode 100644 arch/x86/crypto/sha512-mb/sha512_mb_mgr.h > > delete mode 100644 arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S > > delete mode 100644 arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S > > delete mode 100644 arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c > > delete mode 100644 arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S > > delete mode 100644 arch/x86/crypto/sha512-mb/sha512_x4_avx2.S > > delete mode 100644 crypto/mcryptd.c > > delete mode 100644 include/crypto/mcryptd.h > > > > diff --git a/MAINTAINERS b/MAINTAINERS > > index 24b200d91b30..05747b8ac88e 100644 > > --- a/MAINTAINERS > > +++ b/MAINTAINERS > > @@ -7487,14 +7487,6 @@ S: Supported > > F: drivers/infiniband/hw/i40iw/ > > F: include/uapi/rdma/i40iw-abi.h > > > > -INTEL SHA MULTIBUFFER DRIVER > > -M: Megha Dey <megha.dey@xxxxxxxxxxxxxxx> > > -R: Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx> > > -L: linux-crypto@xxxxxxxxxxxxxxx > > -S: Supported > > -F: arch/x86/crypto/sha*-mb/ > > -F: crypto/mcryptd.c > > - > > INTEL TELEMETRY DRIVER > > M: Souvik Kumar Chakravarty <souvik.k.chakravarty@xxxxxxxxx> > > L: platform-driver-x86@xxxxxxxxxxxxxxx > > diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig > > index 1d5483f6e457..70b10d712624 100644 > > --- a/arch/m68k/configs/amiga_defconfig > > +++ b/arch/m68k/configs/amiga_defconfig > > @@ -621,7 +621,6 @@ CONFIG_CRYPTO_ECDH=m > > CONFIG_CRYPTO_MANAGER=y > > CONFIG_CRYPTO_USER=m > > CONFIG_CRYPTO_CRYPTD=m > > -CONFIG_CRYPTO_MCRYPTD=m > > CONFIG_CRYPTO_TEST=m > > CONFIG_CRYPTO_CHACHA20POLY1305=m > > CONFIG_CRYPTO_AEGIS128=m > > diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig > > index 52a0af127951..211eec5859e8 100644 > > --- a/arch/m68k/configs/apollo_defconfig > > +++ b/arch/m68k/configs/apollo_defconfig > > @@ -578,7 +578,6 @@ CONFIG_CRYPTO_ECDH=m > > CONFIG_CRYPTO_MANAGER=y > > CONFIG_CRYPTO_USER=m > > CONFIG_CRYPTO_CRYPTD=m > > -CONFIG_CRYPTO_MCRYPTD=m > > CONFIG_CRYPTO_TEST=m > > CONFIG_CRYPTO_CHACHA20POLY1305=m > > CONFIG_CRYPTO_AEGIS128=m > > diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig > > index b3103e51268a..0da45c6084f7 100644 > > --- a/arch/m68k/configs/atari_defconfig > > +++ b/arch/m68k/configs/atari_defconfig > > @@ -599,7 +599,6 @@ CONFIG_CRYPTO_ECDH=m > > CONFIG_CRYPTO_MANAGER=y > > CONFIG_CRYPTO_USER=m > > CONFIG_CRYPTO_CRYPTD=m > > -CONFIG_CRYPTO_MCRYPTD=m > > CONFIG_CRYPTO_TEST=m > > CONFIG_CRYPTO_CHACHA20POLY1305=m > > CONFIG_CRYPTO_AEGIS128=m > > diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig > > index fb7d651a4cab..c09ae7219416 100644 > > --- a/arch/m68k/configs/bvme6000_defconfig > > +++ b/arch/m68k/configs/bvme6000_defconfig > > @@ -570,7 +570,6 @@ CONFIG_CRYPTO_ECDH=m > > CONFIG_CRYPTO_MANAGER=y > > CONFIG_CRYPTO_USER=m > > CONFIG_CRYPTO_CRYPTD=m > > -CONFIG_CRYPTO_MCRYPTD=m > > CONFIG_CRYPTO_TEST=m > > CONFIG_CRYPTO_CHACHA20POLY1305=m > > CONFIG_CRYPTO_AEGIS128=m > > diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig > > index 6b37f5537c39..8c4775b30748 100644 > > --- a/arch/m68k/configs/hp300_defconfig > > +++ b/arch/m68k/configs/hp300_defconfig > > @@ -580,7 +580,6 @@ CONFIG_CRYPTO_ECDH=m > > CONFIG_CRYPTO_MANAGER=y > > CONFIG_CRYPTO_USER=m > > CONFIG_CRYPTO_CRYPTD=m > > -CONFIG_CRYPTO_MCRYPTD=m > > CONFIG_CRYPTO_TEST=m > > CONFIG_CRYPTO_CHACHA20POLY1305=m > > CONFIG_CRYPTO_AEGIS128=m > > diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig > > index c717bf879449..48ad520e2f2d 100644 > > --- a/arch/m68k/configs/mac_defconfig > > +++ b/arch/m68k/configs/mac_defconfig > > @@ -602,7 +602,6 @@ CONFIG_CRYPTO_ECDH=m > > CONFIG_CRYPTO_MANAGER=y > > CONFIG_CRYPTO_USER=m > > CONFIG_CRYPTO_CRYPTD=m > > -CONFIG_CRYPTO_MCRYPTD=m > > CONFIG_CRYPTO_TEST=m > > CONFIG_CRYPTO_CHACHA20POLY1305=m > > CONFIG_CRYPTO_AEGIS128=m > > diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig > > index 226c994ce794..3a3cccb9f625 100644 > > --- a/arch/m68k/configs/multi_defconfig > > +++ b/arch/m68k/configs/multi_defconfig > > @@ -684,7 +684,6 @@ CONFIG_CRYPTO_ECDH=m > > CONFIG_CRYPTO_MANAGER=y > > CONFIG_CRYPTO_USER=m > > CONFIG_CRYPTO_CRYPTD=m > > -CONFIG_CRYPTO_MCRYPTD=m > > CONFIG_CRYPTO_TEST=m > > CONFIG_CRYPTO_CHACHA20POLY1305=m > > CONFIG_CRYPTO_AEGIS128=m > > diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig > > index b383327fd77a..63dc311f94ff 100644 > > --- a/arch/m68k/configs/mvme147_defconfig > > +++ b/arch/m68k/configs/mvme147_defconfig > > @@ -570,7 +570,6 @@ CONFIG_CRYPTO_ECDH=m > > CONFIG_CRYPTO_MANAGER=y > > CONFIG_CRYPTO_USER=m > > CONFIG_CRYPTO_CRYPTD=m > > -CONFIG_CRYPTO_MCRYPTD=m > > CONFIG_CRYPTO_TEST=m > > CONFIG_CRYPTO_CHACHA20POLY1305=m > > CONFIG_CRYPTO_AEGIS128=m > > diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig > > index 9783d3deb9e9..1ae39d1f9bb5 100644 > > --- a/arch/m68k/configs/mvme16x_defconfig > > +++ b/arch/m68k/configs/mvme16x_defconfig > > @@ -570,7 +570,6 @@ CONFIG_CRYPTO_ECDH=m > > CONFIG_CRYPTO_MANAGER=y > > CONFIG_CRYPTO_USER=m > > CONFIG_CRYPTO_CRYPTD=m > > -CONFIG_CRYPTO_MCRYPTD=m > > CONFIG_CRYPTO_TEST=m > > CONFIG_CRYPTO_CHACHA20POLY1305=m > > CONFIG_CRYPTO_AEGIS128=m > > diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig > > index a35d10ee10cb..ba2f351811da 100644 > > --- a/arch/m68k/configs/q40_defconfig > > +++ b/arch/m68k/configs/q40_defconfig > > @@ -593,7 +593,6 @@ CONFIG_CRYPTO_ECDH=m > > CONFIG_CRYPTO_MANAGER=y > > CONFIG_CRYPTO_USER=m > > CONFIG_CRYPTO_CRYPTD=m > > -CONFIG_CRYPTO_MCRYPTD=m > > CONFIG_CRYPTO_TEST=m > > CONFIG_CRYPTO_CHACHA20POLY1305=m > > CONFIG_CRYPTO_AEGIS128=m > > diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig > > index 573bf922d448..544b7475ff6a 100644 > > --- a/arch/m68k/configs/sun3_defconfig > > +++ b/arch/m68k/configs/sun3_defconfig > > @@ -571,7 +571,6 @@ CONFIG_CRYPTO_ECDH=m > > CONFIG_CRYPTO_MANAGER=y > > CONFIG_CRYPTO_USER=m > > CONFIG_CRYPTO_CRYPTD=m > > -CONFIG_CRYPTO_MCRYPTD=m > > CONFIG_CRYPTO_TEST=m > > CONFIG_CRYPTO_CHACHA20POLY1305=m > > CONFIG_CRYPTO_AEGIS128=m > > diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig > > index efb27a7fcc55..149edafbb9f9 100644 > > --- a/arch/m68k/configs/sun3x_defconfig > > +++ b/arch/m68k/configs/sun3x_defconfig > > @@ -572,7 +572,6 @@ CONFIG_CRYPTO_ECDH=m > > CONFIG_CRYPTO_MANAGER=y > > CONFIG_CRYPTO_USER=m > > CONFIG_CRYPTO_CRYPTD=m > > -CONFIG_CRYPTO_MCRYPTD=m > > CONFIG_CRYPTO_TEST=m > > CONFIG_CRYPTO_CHACHA20POLY1305=m > > CONFIG_CRYPTO_AEGIS128=m > > diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig > > index 941d8cc6c9f5..259d1698ac50 100644 > > --- a/arch/s390/configs/debug_defconfig > > +++ b/arch/s390/configs/debug_defconfig > > @@ -668,7 +668,6 @@ CONFIG_CRYPTO_USER=m > > # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set > > CONFIG_CRYPTO_PCRYPT=m > > CONFIG_CRYPTO_CRYPTD=m > > -CONFIG_CRYPTO_MCRYPTD=m > > CONFIG_CRYPTO_TEST=m > > CONFIG_CRYPTO_CHACHA20POLY1305=m > > CONFIG_CRYPTO_LRW=m > > diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig > > index eb6f75f24208..37fd60c20e22 100644 > > --- a/arch/s390/configs/performance_defconfig > > +++ b/arch/s390/configs/performance_defconfig > > @@ -610,7 +610,6 @@ CONFIG_CRYPTO_USER=m > > # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set > > CONFIG_CRYPTO_PCRYPT=m > > CONFIG_CRYPTO_CRYPTD=m > > -CONFIG_CRYPTO_MCRYPTD=m > > CONFIG_CRYPTO_TEST=m > > CONFIG_CRYPTO_CHACHA20POLY1305=m > > CONFIG_CRYPTO_LRW=m > > diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile > > index a450ad573dcb..9edfa5469f9f 100644 > > --- a/arch/x86/crypto/Makefile > > +++ b/arch/x86/crypto/Makefile > > @@ -60,9 +60,6 @@ endif > > ifeq ($(avx2_supported),yes) > > obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o > > obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o > > - obj-$(CONFIG_CRYPTO_SHA1_MB) += sha1-mb/ > > - obj-$(CONFIG_CRYPTO_SHA256_MB) += sha256-mb/ > > - obj-$(CONFIG_CRYPTO_SHA512_MB) += sha512-mb/ > > > > obj-$(CONFIG_CRYPTO_MORUS1280_AVX2) += morus1280-avx2.o > > endif > > diff --git a/arch/x86/crypto/sha1-mb/Makefile b/arch/x86/crypto/sha1-mb/Makefile > > deleted file mode 100644 > > index 815ded3ba90e..000000000000 > > --- a/arch/x86/crypto/sha1-mb/Makefile > > +++ /dev/null > > @@ -1,14 +0,0 @@ > > -# SPDX-License-Identifier: GPL-2.0 > > -# > > -# Arch-specific CryptoAPI modules. > > -# > > - > > -OBJECT_FILES_NON_STANDARD := y > > - > > -avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\ > > - $(comma)4)$(comma)%ymm2,yes,no) > > -ifeq ($(avx2_supported),yes) > > - obj-$(CONFIG_CRYPTO_SHA1_MB) += sha1-mb.o > > - sha1-mb-y := sha1_mb.o sha1_mb_mgr_flush_avx2.o \ > > - sha1_mb_mgr_init_avx2.o sha1_mb_mgr_submit_avx2.o sha1_x8_avx2.o > > -endif > > diff --git a/arch/x86/crypto/sha1-mb/sha1_mb.c b/arch/x86/crypto/sha1-mb/sha1_mb.c > > deleted file mode 100644 > > index b93805664c1d..000000000000 > > --- a/arch/x86/crypto/sha1-mb/sha1_mb.c > > +++ /dev/null > > @@ -1,1011 +0,0 @@ > > -/* > > - * Multi buffer SHA1 algorithm Glue Code > > - * > > - * This file is provided under a dual BSD/GPLv2 license. When using or > > - * redistributing this file, you may do so under either license. > > - * > > - * GPL LICENSE SUMMARY > > - * > > - * Copyright(c) 2014 Intel Corporation. > > - * > > - * This program is free software; you can redistribute it and/or modify > > - * it under the terms of version 2 of the GNU General Public License as > > - * published by the Free Software Foundation. > > - * > > - * This program is distributed in the hope that it will be useful, but > > - * WITHOUT ANY WARRANTY; without even the implied warranty of > > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > - * General Public License for more details. > > - * > > - * Contact Information: > > - * Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx> > > - * > > - * BSD LICENSE > > - * > > - * Copyright(c) 2014 Intel Corporation. > > - * > > - * Redistribution and use in source and binary forms, with or without > > - * modification, are permitted provided that the following conditions > > - * are met: > > - * > > - * * Redistributions of source code must retain the above copyright > > - * notice, this list of conditions and the following disclaimer. > > - * * Redistributions in binary form must reproduce the above copyright > > - * notice, this list of conditions and the following disclaimer in > > - * the documentation and/or other materials provided with the > > - * distribution. > > - * * Neither the name of Intel Corporation nor the names of its > > - * contributors may be used to endorse or promote products derived > > - * from this software without specific prior written permission. > > - * > > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > > - */ > > - > > -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt > > - > > -#include <crypto/internal/hash.h> > > -#include <linux/init.h> > > -#include <linux/module.h> > > -#include <linux/mm.h> > > -#include <linux/cryptohash.h> > > -#include <linux/types.h> > > -#include <linux/list.h> > > -#include <crypto/scatterwalk.h> > > -#include <crypto/sha.h> > > -#include <crypto/mcryptd.h> > > -#include <crypto/crypto_wq.h> > > -#include <asm/byteorder.h> > > -#include <linux/hardirq.h> > > -#include <asm/fpu/api.h> > > -#include "sha1_mb_ctx.h" > > - > > -#define FLUSH_INTERVAL 1000 /* in usec */ > > - > > -static struct mcryptd_alg_state sha1_mb_alg_state; > > - > > -struct sha1_mb_ctx { > > - struct mcryptd_ahash *mcryptd_tfm; > > -}; > > - > > -static inline struct mcryptd_hash_request_ctx > > - *cast_hash_to_mcryptd_ctx(struct sha1_hash_ctx *hash_ctx) > > -{ > > - struct ahash_request *areq; > > - > > - areq = container_of((void *) hash_ctx, struct ahash_request, __ctx); > > - return container_of(areq, struct mcryptd_hash_request_ctx, areq); > > -} > > - > > -static inline struct ahash_request > > - *cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx) > > -{ > > - return container_of((void *) ctx, struct ahash_request, __ctx); > > -} > > - > > -static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx, > > - struct ahash_request *areq) > > -{ > > - rctx->flag = HASH_UPDATE; > > -} > > - > > -static asmlinkage void (*sha1_job_mgr_init)(struct sha1_mb_mgr *state); > > -static asmlinkage struct job_sha1* (*sha1_job_mgr_submit) > > - (struct sha1_mb_mgr *state, struct job_sha1 *job); > > -static asmlinkage struct job_sha1* (*sha1_job_mgr_flush) > > - (struct sha1_mb_mgr *state); > > -static asmlinkage struct job_sha1* (*sha1_job_mgr_get_comp_job) > > - (struct sha1_mb_mgr *state); > > - > > -static inline uint32_t sha1_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2], > > - uint64_t total_len) > > -{ > > - uint32_t i = total_len & (SHA1_BLOCK_SIZE - 1); > > - > > - memset(&padblock[i], 0, SHA1_BLOCK_SIZE); > > - padblock[i] = 0x80; > > - > > - i += ((SHA1_BLOCK_SIZE - 1) & > > - (0 - (total_len + SHA1_PADLENGTHFIELD_SIZE + 1))) > > - + 1 + SHA1_PADLENGTHFIELD_SIZE; > > - > > -#if SHA1_PADLENGTHFIELD_SIZE == 16 > > - *((uint64_t *) &padblock[i - 16]) = 0; > > -#endif > > - > > - *((uint64_t *) &padblock[i - 8]) = cpu_to_be64(total_len << 3); > > - > > - /* Number of extra blocks to hash */ > > - return i >> SHA1_LOG2_BLOCK_SIZE; > > -} > > - > > -static struct sha1_hash_ctx *sha1_ctx_mgr_resubmit(struct sha1_ctx_mgr *mgr, > > - struct sha1_hash_ctx *ctx) > > -{ > > - while (ctx) { > > - if (ctx->status & HASH_CTX_STS_COMPLETE) { > > - /* Clear PROCESSING bit */ > > - ctx->status = HASH_CTX_STS_COMPLETE; > > - return ctx; > > - } > > - > > - /* > > - * If the extra blocks are empty, begin hashing what remains > > - * in the user's buffer. > > - */ > > - if (ctx->partial_block_buffer_length == 0 && > > - ctx->incoming_buffer_length) { > > - > > - const void *buffer = ctx->incoming_buffer; > > - uint32_t len = ctx->incoming_buffer_length; > > - uint32_t copy_len; > > - > > - /* > > - * Only entire blocks can be hashed. > > - * Copy remainder to extra blocks buffer. > > - */ > > - copy_len = len & (SHA1_BLOCK_SIZE-1); > > - > > - if (copy_len) { > > - len -= copy_len; > > - memcpy(ctx->partial_block_buffer, > > - ((const char *) buffer + len), > > - copy_len); > > - ctx->partial_block_buffer_length = copy_len; > > - } > > - > > - ctx->incoming_buffer_length = 0; > > - > > - /* len should be a multiple of the block size now */ > > - assert((len % SHA1_BLOCK_SIZE) == 0); > > - > > - /* Set len to the number of blocks to be hashed */ > > - len >>= SHA1_LOG2_BLOCK_SIZE; > > - > > - if (len) { > > - > > - ctx->job.buffer = (uint8_t *) buffer; > > - ctx->job.len = len; > > - ctx = (struct sha1_hash_ctx *)sha1_job_mgr_submit(&mgr->mgr, > > - &ctx->job); > > - continue; > > - } > > - } > > - > > - /* > > - * If the extra blocks are not empty, then we are > > - * either on the last block(s) or we need more > > - * user input before continuing. > > - */ > > - if (ctx->status & HASH_CTX_STS_LAST) { > > - > > - uint8_t *buf = ctx->partial_block_buffer; > > - uint32_t n_extra_blocks = > > - sha1_pad(buf, ctx->total_length); > > - > > - ctx->status = (HASH_CTX_STS_PROCESSING | > > - HASH_CTX_STS_COMPLETE); > > - ctx->job.buffer = buf; > > - ctx->job.len = (uint32_t) n_extra_blocks; > > - ctx = (struct sha1_hash_ctx *) > > - sha1_job_mgr_submit(&mgr->mgr, &ctx->job); > > - continue; > > - } > > - > > - ctx->status = HASH_CTX_STS_IDLE; > > - return ctx; > > - } > > - > > - return NULL; > > -} > > - > > -static struct sha1_hash_ctx > > - *sha1_ctx_mgr_get_comp_ctx(struct sha1_ctx_mgr *mgr) > > -{ > > - /* > > - * If get_comp_job returns NULL, there are no jobs complete. > > - * If get_comp_job returns a job, verify that it is safe to return to > > - * the user. > > - * If it is not ready, resubmit the job to finish processing. > > - * If sha1_ctx_mgr_resubmit returned a job, it is ready to be returned. > > - * Otherwise, all jobs currently being managed by the hash_ctx_mgr > > - * still need processing. > > - */ > > - struct sha1_hash_ctx *ctx; > > - > > - ctx = (struct sha1_hash_ctx *) sha1_job_mgr_get_comp_job(&mgr->mgr); > > - return sha1_ctx_mgr_resubmit(mgr, ctx); > > -} > > - > > -static void sha1_ctx_mgr_init(struct sha1_ctx_mgr *mgr) > > -{ > > - sha1_job_mgr_init(&mgr->mgr); > > -} > > - > > -static struct sha1_hash_ctx *sha1_ctx_mgr_submit(struct sha1_ctx_mgr *mgr, > > - struct sha1_hash_ctx *ctx, > > - const void *buffer, > > - uint32_t len, > > - int flags) > > -{ > > - if (flags & ~(HASH_UPDATE | HASH_LAST)) { > > - /* User should not pass anything other than UPDATE or LAST */ > > - ctx->error = HASH_CTX_ERROR_INVALID_FLAGS; > > - return ctx; > > - } > > - > > - if (ctx->status & HASH_CTX_STS_PROCESSING) { > > - /* Cannot submit to a currently processing job. */ > > - ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING; > > - return ctx; > > - } > > - > > - if (ctx->status & HASH_CTX_STS_COMPLETE) { > > - /* Cannot update a finished job. */ > > - ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED; > > - return ctx; > > - } > > - > > - /* > > - * If we made it here, there were no errors during this call to > > - * submit > > - */ > > - ctx->error = HASH_CTX_ERROR_NONE; > > - > > - /* Store buffer ptr info from user */ > > - ctx->incoming_buffer = buffer; > > - ctx->incoming_buffer_length = len; > > - > > - /* > > - * Store the user's request flags and mark this ctx as currently > > - * being processed. > > - */ > > - ctx->status = (flags & HASH_LAST) ? > > - (HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) : > > - HASH_CTX_STS_PROCESSING; > > - > > - /* Advance byte counter */ > > - ctx->total_length += len; > > - > > - /* > > - * If there is anything currently buffered in the extra blocks, > > - * append to it until it contains a whole block. > > - * Or if the user's buffer contains less than a whole block, > > - * append as much as possible to the extra block. > > - */ > > - if (ctx->partial_block_buffer_length || len < SHA1_BLOCK_SIZE) { > > - /* > > - * Compute how many bytes to copy from user buffer into > > - * extra block > > - */ > > - uint32_t copy_len = SHA1_BLOCK_SIZE - > > - ctx->partial_block_buffer_length; > > - if (len < copy_len) > > - copy_len = len; > > - > > - if (copy_len) { > > - /* Copy and update relevant pointers and counters */ > > - memcpy(&ctx->partial_block_buffer[ctx->partial_block_buffer_length], > > - buffer, copy_len); > > - > > - ctx->partial_block_buffer_length += copy_len; > > - ctx->incoming_buffer = (const void *) > > - ((const char *)buffer + copy_len); > > - ctx->incoming_buffer_length = len - copy_len; > > - } > > - > > - /* > > - * The extra block should never contain more than 1 block > > - * here > > - */ > > - assert(ctx->partial_block_buffer_length <= SHA1_BLOCK_SIZE); > > - > > - /* > > - * If the extra block buffer contains exactly 1 block, it can > > - * be hashed. > > - */ > > - if (ctx->partial_block_buffer_length >= SHA1_BLOCK_SIZE) { > > - ctx->partial_block_buffer_length = 0; > > - > > - ctx->job.buffer = ctx->partial_block_buffer; > > - ctx->job.len = 1; > > - ctx = (struct sha1_hash_ctx *) > > - sha1_job_mgr_submit(&mgr->mgr, &ctx->job); > > - } > > - } > > - > > - return sha1_ctx_mgr_resubmit(mgr, ctx); > > -} > > - > > -static struct sha1_hash_ctx *sha1_ctx_mgr_flush(struct sha1_ctx_mgr *mgr) > > -{ > > - struct sha1_hash_ctx *ctx; > > - > > - while (1) { > > - ctx = (struct sha1_hash_ctx *) sha1_job_mgr_flush(&mgr->mgr); > > - > > - /* If flush returned 0, there are no more jobs in flight. */ > > - if (!ctx) > > - return NULL; > > - > > - /* > > - * If flush returned a job, resubmit the job to finish > > - * processing. > > - */ > > - ctx = sha1_ctx_mgr_resubmit(mgr, ctx); > > - > > - /* > > - * If sha1_ctx_mgr_resubmit returned a job, it is ready to be > > - * returned. Otherwise, all jobs currently being managed by the > > - * sha1_ctx_mgr still need processing. Loop. > > - */ > > - if (ctx) > > - return ctx; > > - } > > -} > > - > > -static int sha1_mb_init(struct ahash_request *areq) > > -{ > > - struct sha1_hash_ctx *sctx = ahash_request_ctx(areq); > > - > > - hash_ctx_init(sctx); > > - sctx->job.result_digest[0] = SHA1_H0; > > - sctx->job.result_digest[1] = SHA1_H1; > > - sctx->job.result_digest[2] = SHA1_H2; > > - sctx->job.result_digest[3] = SHA1_H3; > > - sctx->job.result_digest[4] = SHA1_H4; > > - sctx->total_length = 0; > > - sctx->partial_block_buffer_length = 0; > > - sctx->status = HASH_CTX_STS_IDLE; > > - > > - return 0; > > -} > > - > > -static int sha1_mb_set_results(struct mcryptd_hash_request_ctx *rctx) > > -{ > > - int i; > > - struct sha1_hash_ctx *sctx = ahash_request_ctx(&rctx->areq); > > - __be32 *dst = (__be32 *) rctx->out; > > - > > - for (i = 0; i < 5; ++i) > > - dst[i] = cpu_to_be32(sctx->job.result_digest[i]); > > - > > - return 0; > > -} > > - > > -static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx, > > - struct mcryptd_alg_cstate *cstate, bool flush) > > -{ > > - int flag = HASH_UPDATE; > > - int nbytes, err = 0; > > - struct mcryptd_hash_request_ctx *rctx = *ret_rctx; > > - struct sha1_hash_ctx *sha_ctx; > > - > > - /* more work ? */ > > - while (!(rctx->flag & HASH_DONE)) { > > - nbytes = crypto_ahash_walk_done(&rctx->walk, 0); > > - if (nbytes < 0) { > > - err = nbytes; > > - goto out; > > - } > > - /* check if the walk is done */ > > - if (crypto_ahash_walk_last(&rctx->walk)) { > > - rctx->flag |= HASH_DONE; > > - if (rctx->flag & HASH_FINAL) > > - flag |= HASH_LAST; > > - > > - } > > - sha_ctx = (struct sha1_hash_ctx *) > > - ahash_request_ctx(&rctx->areq); > > - kernel_fpu_begin(); > > - sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, > > - rctx->walk.data, nbytes, flag); > > - if (!sha_ctx) { > > - if (flush) > > - sha_ctx = sha1_ctx_mgr_flush(cstate->mgr); > > - } > > - kernel_fpu_end(); > > - if (sha_ctx) > > - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); > > - else { > > - rctx = NULL; > > - goto out; > > - } > > - } > > - > > - /* copy the results */ > > - if (rctx->flag & HASH_FINAL) > > - sha1_mb_set_results(rctx); > > - > > -out: > > - *ret_rctx = rctx; > > - return err; > > -} > > - > > -static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx, > > - struct mcryptd_alg_cstate *cstate, > > - int err) > > -{ > > - struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); > > - struct sha1_hash_ctx *sha_ctx; > > - struct mcryptd_hash_request_ctx *req_ctx; > > - int ret; > > - > > - /* remove from work list */ > > - spin_lock(&cstate->work_lock); > > - list_del(&rctx->waiter); > > - spin_unlock(&cstate->work_lock); > > - > > - if (irqs_disabled()) > > - rctx->complete(&req->base, err); > > - else { > > - local_bh_disable(); > > - rctx->complete(&req->base, err); > > - local_bh_enable(); > > - } > > - > > - /* check to see if there are other jobs that are done */ > > - sha_ctx = sha1_ctx_mgr_get_comp_ctx(cstate->mgr); > > - while (sha_ctx) { > > - req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx); > > - ret = sha_finish_walk(&req_ctx, cstate, false); > > - if (req_ctx) { > > - spin_lock(&cstate->work_lock); > > - list_del(&req_ctx->waiter); > > - spin_unlock(&cstate->work_lock); > > - > > - req = cast_mcryptd_ctx_to_req(req_ctx); > > - if (irqs_disabled()) > > - req_ctx->complete(&req->base, ret); > > - else { > > - local_bh_disable(); > > - req_ctx->complete(&req->base, ret); > > - local_bh_enable(); > > - } > > - } > > - sha_ctx = sha1_ctx_mgr_get_comp_ctx(cstate->mgr); > > - } > > - > > - return 0; > > -} > > - > > -static void sha1_mb_add_list(struct mcryptd_hash_request_ctx *rctx, > > - struct mcryptd_alg_cstate *cstate) > > -{ > > - unsigned long next_flush; > > - unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL); > > - > > - /* initialize tag */ > > - rctx->tag.arrival = jiffies; /* tag the arrival time */ > > - rctx->tag.seq_num = cstate->next_seq_num++; > > - next_flush = rctx->tag.arrival + delay; > > - rctx->tag.expire = next_flush; > > - > > - spin_lock(&cstate->work_lock); > > - list_add_tail(&rctx->waiter, &cstate->work_list); > > - spin_unlock(&cstate->work_lock); > > - > > - mcryptd_arm_flusher(cstate, delay); > > -} > > - > > -static int sha1_mb_update(struct ahash_request *areq) > > -{ > > - struct mcryptd_hash_request_ctx *rctx = > > - container_of(areq, struct mcryptd_hash_request_ctx, areq); > > - struct mcryptd_alg_cstate *cstate = > > - this_cpu_ptr(sha1_mb_alg_state.alg_cstate); > > - > > - struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); > > - struct sha1_hash_ctx *sha_ctx; > > - int ret = 0, nbytes; > > - > > - > > - /* sanity check */ > > - if (rctx->tag.cpu != smp_processor_id()) { > > - pr_err("mcryptd error: cpu clash\n"); > > - goto done; > > - } > > - > > - /* need to init context */ > > - req_ctx_init(rctx, areq); > > - > > - nbytes = crypto_ahash_walk_first(req, &rctx->walk); > > - > > - if (nbytes < 0) { > > - ret = nbytes; > > - goto done; > > - } > > - > > - if (crypto_ahash_walk_last(&rctx->walk)) > > - rctx->flag |= HASH_DONE; > > - > > - /* submit */ > > - sha_ctx = (struct sha1_hash_ctx *) ahash_request_ctx(areq); > > - sha1_mb_add_list(rctx, cstate); > > - kernel_fpu_begin(); > > - sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data, > > - nbytes, HASH_UPDATE); > > - kernel_fpu_end(); > > - > > - /* check if anything is returned */ > > - if (!sha_ctx) > > - return -EINPROGRESS; > > - > > - if (sha_ctx->error) { > > - ret = sha_ctx->error; > > - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); > > - goto done; > > - } > > - > > - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); > > - ret = sha_finish_walk(&rctx, cstate, false); > > - > > - if (!rctx) > > - return -EINPROGRESS; > > -done: > > - sha_complete_job(rctx, cstate, ret); > > - return ret; > > -} > > - > > -static int sha1_mb_finup(struct ahash_request *areq) > > -{ > > - struct mcryptd_hash_request_ctx *rctx = > > - container_of(areq, struct mcryptd_hash_request_ctx, areq); > > - struct mcryptd_alg_cstate *cstate = > > - this_cpu_ptr(sha1_mb_alg_state.alg_cstate); > > - > > - struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); > > - struct sha1_hash_ctx *sha_ctx; > > - int ret = 0, flag = HASH_UPDATE, nbytes; > > - > > - /* sanity check */ > > - if (rctx->tag.cpu != smp_processor_id()) { > > - pr_err("mcryptd error: cpu clash\n"); > > - goto done; > > - } > > - > > - /* need to init context */ > > - req_ctx_init(rctx, areq); > > - > > - nbytes = crypto_ahash_walk_first(req, &rctx->walk); > > - > > - if (nbytes < 0) { > > - ret = nbytes; > > - goto done; > > - } > > - > > - if (crypto_ahash_walk_last(&rctx->walk)) { > > - rctx->flag |= HASH_DONE; > > - flag = HASH_LAST; > > - } > > - > > - /* submit */ > > - rctx->flag |= HASH_FINAL; > > - sha_ctx = (struct sha1_hash_ctx *) ahash_request_ctx(areq); > > - sha1_mb_add_list(rctx, cstate); > > - > > - kernel_fpu_begin(); > > - sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data, > > - nbytes, flag); > > - kernel_fpu_end(); > > - > > - /* check if anything is returned */ > > - if (!sha_ctx) > > - return -EINPROGRESS; > > - > > - if (sha_ctx->error) { > > - ret = sha_ctx->error; > > - goto done; > > - } > > - > > - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); > > - ret = sha_finish_walk(&rctx, cstate, false); > > - if (!rctx) > > - return -EINPROGRESS; > > -done: > > - sha_complete_job(rctx, cstate, ret); > > - return ret; > > -} > > - > > -static int sha1_mb_final(struct ahash_request *areq) > > -{ > > - struct mcryptd_hash_request_ctx *rctx = > > - container_of(areq, struct mcryptd_hash_request_ctx, areq); > > - struct mcryptd_alg_cstate *cstate = > > - this_cpu_ptr(sha1_mb_alg_state.alg_cstate); > > - > > - struct sha1_hash_ctx *sha_ctx; > > - int ret = 0; > > - u8 data; > > - > > - /* sanity check */ > > - if (rctx->tag.cpu != smp_processor_id()) { > > - pr_err("mcryptd error: cpu clash\n"); > > - goto done; > > - } > > - > > - /* need to init context */ > > - req_ctx_init(rctx, areq); > > - > > - rctx->flag |= HASH_DONE | HASH_FINAL; > > - > > - sha_ctx = (struct sha1_hash_ctx *) ahash_request_ctx(areq); > > - /* flag HASH_FINAL and 0 data size */ > > - sha1_mb_add_list(rctx, cstate); > > - kernel_fpu_begin(); > > - sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, &data, 0, > > - HASH_LAST); > > - kernel_fpu_end(); > > - > > - /* check if anything is returned */ > > - if (!sha_ctx) > > - return -EINPROGRESS; > > - > > - if (sha_ctx->error) { > > - ret = sha_ctx->error; > > - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); > > - goto done; > > - } > > - > > - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); > > - ret = sha_finish_walk(&rctx, cstate, false); > > - if (!rctx) > > - return -EINPROGRESS; > > -done: > > - sha_complete_job(rctx, cstate, ret); > > - return ret; > > -} > > - > > -static int sha1_mb_export(struct ahash_request *areq, void *out) > > -{ > > - struct sha1_hash_ctx *sctx = ahash_request_ctx(areq); > > - > > - memcpy(out, sctx, sizeof(*sctx)); > > - > > - return 0; > > -} > > - > > -static int sha1_mb_import(struct ahash_request *areq, const void *in) > > -{ > > - struct sha1_hash_ctx *sctx = ahash_request_ctx(areq); > > - > > - memcpy(sctx, in, sizeof(*sctx)); > > - > > - return 0; > > -} > > - > > -static int sha1_mb_async_init_tfm(struct crypto_tfm *tfm) > > -{ > > - struct mcryptd_ahash *mcryptd_tfm; > > - struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm); > > - struct mcryptd_hash_ctx *mctx; > > - > > - mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha1-mb", > > - CRYPTO_ALG_INTERNAL, > > - CRYPTO_ALG_INTERNAL); > > - if (IS_ERR(mcryptd_tfm)) > > - return PTR_ERR(mcryptd_tfm); > > - mctx = crypto_ahash_ctx(&mcryptd_tfm->base); > > - mctx->alg_state = &sha1_mb_alg_state; > > - ctx->mcryptd_tfm = mcryptd_tfm; > > - crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), > > - sizeof(struct ahash_request) + > > - crypto_ahash_reqsize(&mcryptd_tfm->base)); > > - > > - return 0; > > -} > > - > > -static void sha1_mb_async_exit_tfm(struct crypto_tfm *tfm) > > -{ > > - struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm); > > - > > - mcryptd_free_ahash(ctx->mcryptd_tfm); > > -} > > - > > -static int sha1_mb_areq_init_tfm(struct crypto_tfm *tfm) > > -{ > > - crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), > > - sizeof(struct ahash_request) + > > - sizeof(struct sha1_hash_ctx)); > > - > > - return 0; > > -} > > - > > -static void sha1_mb_areq_exit_tfm(struct crypto_tfm *tfm) > > -{ > > - struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm); > > - > > - mcryptd_free_ahash(ctx->mcryptd_tfm); > > -} > > - > > -static struct ahash_alg sha1_mb_areq_alg = { > > - .init = sha1_mb_init, > > - .update = sha1_mb_update, > > - .final = sha1_mb_final, > > - .finup = sha1_mb_finup, > > - .export = sha1_mb_export, > > - .import = sha1_mb_import, > > - .halg = { > > - .digestsize = SHA1_DIGEST_SIZE, > > - .statesize = sizeof(struct sha1_hash_ctx), > > - .base = { > > - .cra_name = "__sha1-mb", > > - .cra_driver_name = "__intel_sha1-mb", > > - .cra_priority = 100, > > - /* > > - * use ASYNC flag as some buffers in multi-buffer > > - * algo may not have completed before hashing thread > > - * sleep > > - */ > > - .cra_flags = CRYPTO_ALG_ASYNC | > > - CRYPTO_ALG_INTERNAL, > > - .cra_blocksize = SHA1_BLOCK_SIZE, > > - .cra_module = THIS_MODULE, > > - .cra_list = LIST_HEAD_INIT > > - (sha1_mb_areq_alg.halg.base.cra_list), > > - .cra_init = sha1_mb_areq_init_tfm, > > - .cra_exit = sha1_mb_areq_exit_tfm, > > - .cra_ctxsize = sizeof(struct sha1_hash_ctx), > > - } > > - } > > -}; > > - > > -static int sha1_mb_async_init(struct ahash_request *req) > > -{ > > - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); > > - struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm); > > - struct ahash_request *mcryptd_req = ahash_request_ctx(req); > > - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; > > - > > - memcpy(mcryptd_req, req, sizeof(*req)); > > - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); > > - return crypto_ahash_init(mcryptd_req); > > -} > > - > > -static int sha1_mb_async_update(struct ahash_request *req) > > -{ > > - struct ahash_request *mcryptd_req = ahash_request_ctx(req); > > - > > - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); > > - struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm); > > - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; > > - > > - memcpy(mcryptd_req, req, sizeof(*req)); > > - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); > > - return crypto_ahash_update(mcryptd_req); > > -} > > - > > -static int sha1_mb_async_finup(struct ahash_request *req) > > -{ > > - struct ahash_request *mcryptd_req = ahash_request_ctx(req); > > - > > - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); > > - struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm); > > - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; > > - > > - memcpy(mcryptd_req, req, sizeof(*req)); > > - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); > > - return crypto_ahash_finup(mcryptd_req); > > -} > > - > > -static int sha1_mb_async_final(struct ahash_request *req) > > -{ > > - struct ahash_request *mcryptd_req = ahash_request_ctx(req); > > - > > - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); > > - struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm); > > - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; > > - > > - memcpy(mcryptd_req, req, sizeof(*req)); > > - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); > > - return crypto_ahash_final(mcryptd_req); > > -} > > - > > -static int sha1_mb_async_digest(struct ahash_request *req) > > -{ > > - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); > > - struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm); > > - struct ahash_request *mcryptd_req = ahash_request_ctx(req); > > - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; > > - > > - memcpy(mcryptd_req, req, sizeof(*req)); > > - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); > > - return crypto_ahash_digest(mcryptd_req); > > -} > > - > > -static int sha1_mb_async_export(struct ahash_request *req, void *out) > > -{ > > - struct ahash_request *mcryptd_req = ahash_request_ctx(req); > > - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); > > - struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm); > > - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; > > - > > - memcpy(mcryptd_req, req, sizeof(*req)); > > - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); > > - return crypto_ahash_export(mcryptd_req, out); > > -} > > - > > -static int sha1_mb_async_import(struct ahash_request *req, const void *in) > > -{ > > - struct ahash_request *mcryptd_req = ahash_request_ctx(req); > > - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); > > - struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm); > > - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; > > - struct crypto_ahash *child = mcryptd_ahash_child(mcryptd_tfm); > > - struct mcryptd_hash_request_ctx *rctx; > > - struct ahash_request *areq; > > - > > - memcpy(mcryptd_req, req, sizeof(*req)); > > - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); > > - rctx = ahash_request_ctx(mcryptd_req); > > - areq = &rctx->areq; > > - > > - ahash_request_set_tfm(areq, child); > > - ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_SLEEP, > > - rctx->complete, req); > > - > > - return crypto_ahash_import(mcryptd_req, in); > > -} > > - > > -static struct ahash_alg sha1_mb_async_alg = { > > - .init = sha1_mb_async_init, > > - .update = sha1_mb_async_update, > > - .final = sha1_mb_async_final, > > - .finup = sha1_mb_async_finup, > > - .digest = sha1_mb_async_digest, > > - .export = sha1_mb_async_export, > > - .import = sha1_mb_async_import, > > - .halg = { > > - .digestsize = SHA1_DIGEST_SIZE, > > - .statesize = sizeof(struct sha1_hash_ctx), > > - .base = { > > - .cra_name = "sha1", > > - .cra_driver_name = "sha1_mb", > > - /* > > - * Low priority, since with few concurrent hash requests > > - * this is extremely slow due to the flush delay. Users > > - * whose workloads would benefit from this can request > > - * it explicitly by driver name, or can increase its > > - * priority at runtime using NETLINK_CRYPTO. > > - */ > > - .cra_priority = 50, > > - .cra_flags = CRYPTO_ALG_ASYNC, > > - .cra_blocksize = SHA1_BLOCK_SIZE, > > - .cra_module = THIS_MODULE, > > - .cra_list = LIST_HEAD_INIT(sha1_mb_async_alg.halg.base.cra_list), > > - .cra_init = sha1_mb_async_init_tfm, > > - .cra_exit = sha1_mb_async_exit_tfm, > > - .cra_ctxsize = sizeof(struct sha1_mb_ctx), > > - .cra_alignmask = 0, > > - }, > > - }, > > -}; > > - > > -static unsigned long sha1_mb_flusher(struct mcryptd_alg_cstate *cstate) > > -{ > > - struct mcryptd_hash_request_ctx *rctx; > > - unsigned long cur_time; > > - unsigned long next_flush = 0; > > - struct sha1_hash_ctx *sha_ctx; > > - > > - > > - cur_time = jiffies; > > - > > - while (!list_empty(&cstate->work_list)) { > > - rctx = list_entry(cstate->work_list.next, > > - struct mcryptd_hash_request_ctx, waiter); > > - if (time_before(cur_time, rctx->tag.expire)) > > - break; > > - kernel_fpu_begin(); > > - sha_ctx = (struct sha1_hash_ctx *) > > - sha1_ctx_mgr_flush(cstate->mgr); > > - kernel_fpu_end(); > > - if (!sha_ctx) { > > - pr_err("sha1_mb error: nothing got flushed for non-empty list\n"); > > - break; > > - } > > - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); > > - sha_finish_walk(&rctx, cstate, true); > > - sha_complete_job(rctx, cstate, 0); > > - } > > - > > - if (!list_empty(&cstate->work_list)) { > > - rctx = list_entry(cstate->work_list.next, > > - struct mcryptd_hash_request_ctx, waiter); > > - /* get the hash context and then flush time */ > > - next_flush = rctx->tag.expire; > > - mcryptd_arm_flusher(cstate, get_delay(next_flush)); > > - } > > - return next_flush; > > -} > > - > > -static int __init sha1_mb_mod_init(void) > > -{ > > - > > - int cpu; > > - int err; > > - struct mcryptd_alg_cstate *cpu_state; > > - > > - /* check for dependent cpu features */ > > - if (!boot_cpu_has(X86_FEATURE_AVX2) || > > - !boot_cpu_has(X86_FEATURE_BMI2)) > > - return -ENODEV; > > - > > - /* initialize multibuffer structures */ > > - sha1_mb_alg_state.alg_cstate = alloc_percpu(struct mcryptd_alg_cstate); > > - > > - sha1_job_mgr_init = sha1_mb_mgr_init_avx2; > > - sha1_job_mgr_submit = sha1_mb_mgr_submit_avx2; > > - sha1_job_mgr_flush = sha1_mb_mgr_flush_avx2; > > - sha1_job_mgr_get_comp_job = sha1_mb_mgr_get_comp_job_avx2; > > - > > - if (!sha1_mb_alg_state.alg_cstate) > > - return -ENOMEM; > > - for_each_possible_cpu(cpu) { > > - cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu); > > - cpu_state->next_flush = 0; > > - cpu_state->next_seq_num = 0; > > - cpu_state->flusher_engaged = false; > > - INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher); > > - cpu_state->cpu = cpu; > > - cpu_state->alg_state = &sha1_mb_alg_state; > > - cpu_state->mgr = kzalloc(sizeof(struct sha1_ctx_mgr), > > - GFP_KERNEL); > > - if (!cpu_state->mgr) > > - goto err2; > > - sha1_ctx_mgr_init(cpu_state->mgr); > > - INIT_LIST_HEAD(&cpu_state->work_list); > > - spin_lock_init(&cpu_state->work_lock); > > - } > > - sha1_mb_alg_state.flusher = &sha1_mb_flusher; > > - > > - err = crypto_register_ahash(&sha1_mb_areq_alg); > > - if (err) > > - goto err2; > > - err = crypto_register_ahash(&sha1_mb_async_alg); > > - if (err) > > - goto err1; > > - > > - > > - return 0; > > -err1: > > - crypto_unregister_ahash(&sha1_mb_areq_alg); > > -err2: > > - for_each_possible_cpu(cpu) { > > - cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu); > > - kfree(cpu_state->mgr); > > - } > > - free_percpu(sha1_mb_alg_state.alg_cstate); > > - return -ENODEV; > > -} > > - > > -static void __exit sha1_mb_mod_fini(void) > > -{ > > - int cpu; > > - struct mcryptd_alg_cstate *cpu_state; > > - > > - crypto_unregister_ahash(&sha1_mb_async_alg); > > - crypto_unregister_ahash(&sha1_mb_areq_alg); > > - for_each_possible_cpu(cpu) { > > - cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu); > > - kfree(cpu_state->mgr); > > - } > > - free_percpu(sha1_mb_alg_state.alg_cstate); > > -} > > - > > -module_init(sha1_mb_mod_init); > > -module_exit(sha1_mb_mod_fini); > > - > > -MODULE_LICENSE("GPL"); > > -MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, multi buffer accelerated"); > > - > > -MODULE_ALIAS_CRYPTO("sha1"); > > diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h b/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h > > deleted file mode 100644 > > index 9454bd16f9f8..000000000000 > > --- a/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h > > +++ /dev/null > > @@ -1,134 +0,0 @@ > > -/* > > - * Header file for multi buffer SHA context > > - * > > - * This file is provided under a dual BSD/GPLv2 license. When using or > > - * redistributing this file, you may do so under either license. > > - * > > - * GPL LICENSE SUMMARY > > - * > > - * Copyright(c) 2014 Intel Corporation. > > - * > > - * This program is free software; you can redistribute it and/or modify > > - * it under the terms of version 2 of the GNU General Public License as > > - * published by the Free Software Foundation. > > - * > > - * This program is distributed in the hope that it will be useful, but > > - * WITHOUT ANY WARRANTY; without even the implied warranty of > > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > - * General Public License for more details. > > - * > > - * Contact Information: > > - * Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx> > > - * > > - * BSD LICENSE > > - * > > - * Copyright(c) 2014 Intel Corporation. > > - * > > - * Redistribution and use in source and binary forms, with or without > > - * modification, are permitted provided that the following conditions > > - * are met: > > - * > > - * * Redistributions of source code must retain the above copyright > > - * notice, this list of conditions and the following disclaimer. > > - * * Redistributions in binary form must reproduce the above copyright > > - * notice, this list of conditions and the following disclaimer in > > - * the documentation and/or other materials provided with the > > - * distribution. > > - * * Neither the name of Intel Corporation nor the names of its > > - * contributors may be used to endorse or promote products derived > > - * from this software without specific prior written permission. > > - * > > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > > - */ > > - > > -#ifndef _SHA_MB_CTX_INTERNAL_H > > -#define _SHA_MB_CTX_INTERNAL_H > > - > > -#include "sha1_mb_mgr.h" > > - > > -#define HASH_UPDATE 0x00 > > -#define HASH_LAST 0x01 > > -#define HASH_DONE 0x02 > > -#define HASH_FINAL 0x04 > > - > > -#define HASH_CTX_STS_IDLE 0x00 > > -#define HASH_CTX_STS_PROCESSING 0x01 > > -#define HASH_CTX_STS_LAST 0x02 > > -#define HASH_CTX_STS_COMPLETE 0x04 > > - > > -enum hash_ctx_error { > > - HASH_CTX_ERROR_NONE = 0, > > - HASH_CTX_ERROR_INVALID_FLAGS = -1, > > - HASH_CTX_ERROR_ALREADY_PROCESSING = -2, > > - HASH_CTX_ERROR_ALREADY_COMPLETED = -3, > > - > > -#ifdef HASH_CTX_DEBUG > > - HASH_CTX_ERROR_DEBUG_DIGEST_MISMATCH = -4, > > -#endif > > -}; > > - > > - > > -#define hash_ctx_user_data(ctx) ((ctx)->user_data) > > -#define hash_ctx_digest(ctx) ((ctx)->job.result_digest) > > -#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING) > > -#define hash_ctx_complete(ctx) ((ctx)->status == HASH_CTX_STS_COMPLETE) > > -#define hash_ctx_status(ctx) ((ctx)->status) > > -#define hash_ctx_error(ctx) ((ctx)->error) > > -#define hash_ctx_init(ctx) \ > > - do { \ > > - (ctx)->error = HASH_CTX_ERROR_NONE; \ > > - (ctx)->status = HASH_CTX_STS_COMPLETE; \ > > - } while (0) > > - > > - > > -/* Hash Constants and Typedefs */ > > -#define SHA1_DIGEST_LENGTH 5 > > -#define SHA1_LOG2_BLOCK_SIZE 6 > > - > > -#define SHA1_PADLENGTHFIELD_SIZE 8 > > - > > -#ifdef SHA_MB_DEBUG > > -#define assert(expr) \ > > -do { \ > > - if (unlikely(!(expr))) { \ > > - printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \ > > - #expr, __FILE__, __func__, __LINE__); \ > > - } \ > > -} while (0) > > -#else > > -#define assert(expr) do {} while (0) > > -#endif > > - > > -struct sha1_ctx_mgr { > > - struct sha1_mb_mgr mgr; > > -}; > > - > > -/* typedef struct sha1_ctx_mgr sha1_ctx_mgr; */ > > - > > -struct sha1_hash_ctx { > > - /* Must be at struct offset 0 */ > > - struct job_sha1 job; > > - /* status flag */ > > - int status; > > - /* error flag */ > > - int error; > > - > > - uint64_t total_length; > > - const void *incoming_buffer; > > - uint32_t incoming_buffer_length; > > - uint8_t partial_block_buffer[SHA1_BLOCK_SIZE * 2]; > > - uint32_t partial_block_buffer_length; > > - void *user_data; > > -}; > > - > > -#endif > > diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr.h b/arch/x86/crypto/sha1-mb/sha1_mb_mgr.h > > deleted file mode 100644 > > index 08ad1a9acfd7..000000000000 > > --- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr.h > > +++ /dev/null > > @@ -1,110 +0,0 @@ > > -/* > > - * Header file for multi buffer SHA1 algorithm manager > > - * > > - * This file is provided under a dual BSD/GPLv2 license. When using or > > - * redistributing this file, you may do so under either license. > > - * > > - * GPL LICENSE SUMMARY > > - * > > - * Copyright(c) 2014 Intel Corporation. > > - * > > - * This program is free software; you can redistribute it and/or modify > > - * it under the terms of version 2 of the GNU General Public License as > > - * published by the Free Software Foundation. > > - * > > - * This program is distributed in the hope that it will be useful, but > > - * WITHOUT ANY WARRANTY; without even the implied warranty of > > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > - * General Public License for more details. > > - * > > - * Contact Information: > > - * James Guilford <james.guilford@xxxxxxxxx> > > - * Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx> > > - * > > - * BSD LICENSE > > - * > > - * Copyright(c) 2014 Intel Corporation. > > - * > > - * Redistribution and use in source and binary forms, with or without > > - * modification, are permitted provided that the following conditions > > - * are met: > > - * > > - * * Redistributions of source code must retain the above copyright > > - * notice, this list of conditions and the following disclaimer. > > - * * Redistributions in binary form must reproduce the above copyright > > - * notice, this list of conditions and the following disclaimer in > > - * the documentation and/or other materials provided with the > > - * distribution. > > - * * Neither the name of Intel Corporation nor the names of its > > - * contributors may be used to endorse or promote products derived > > - * from this software without specific prior written permission. > > - * > > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > > - */ > > -#ifndef __SHA_MB_MGR_H > > -#define __SHA_MB_MGR_H > > - > > - > > -#include <linux/types.h> > > - > > -#define NUM_SHA1_DIGEST_WORDS 5 > > - > > -enum job_sts { STS_UNKNOWN = 0, > > - STS_BEING_PROCESSED = 1, > > - STS_COMPLETED = 2, > > - STS_INTERNAL_ERROR = 3, > > - STS_ERROR = 4 > > -}; > > - > > -struct job_sha1 { > > - u8 *buffer; > > - u32 len; > > - u32 result_digest[NUM_SHA1_DIGEST_WORDS] __aligned(32); > > - enum job_sts status; > > - void *user_data; > > -}; > > - > > -/* SHA1 out-of-order scheduler */ > > - > > -/* typedef uint32_t sha1_digest_array[5][8]; */ > > - > > -struct sha1_args_x8 { > > - uint32_t digest[5][8]; > > - uint8_t *data_ptr[8]; > > -}; > > - > > -struct sha1_lane_data { > > - struct job_sha1 *job_in_lane; > > -}; > > - > > -struct sha1_mb_mgr { > > - struct sha1_args_x8 args; > > - > > - uint32_t lens[8]; > > - > > - /* each byte is index (0...7) of unused lanes */ > > - uint64_t unused_lanes; > > - /* byte 4 is set to FF as a flag */ > > - struct sha1_lane_data ldata[8]; > > -}; > > - > > - > > -#define SHA1_MB_MGR_NUM_LANES_AVX2 8 > > - > > -void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state); > > -struct job_sha1 *sha1_mb_mgr_submit_avx2(struct sha1_mb_mgr *state, > > - struct job_sha1 *job); > > -struct job_sha1 *sha1_mb_mgr_flush_avx2(struct sha1_mb_mgr *state); > > -struct job_sha1 *sha1_mb_mgr_get_comp_job_avx2(struct sha1_mb_mgr *state); > > - > > -#endif > > diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_datastruct.S b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_datastruct.S > > deleted file mode 100644 > > index 86688c6e7a25..000000000000 > > --- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_datastruct.S > > +++ /dev/null > > @@ -1,287 +0,0 @@ > > -/* > > - * Header file for multi buffer SHA1 algorithm data structure > > - * > > - * This file is provided under a dual BSD/GPLv2 license. When using or > > - * redistributing this file, you may do so under either license. > > - * > > - * GPL LICENSE SUMMARY > > - * > > - * Copyright(c) 2014 Intel Corporation. > > - * > > - * This program is free software; you can redistribute it and/or modify > > - * it under the terms of version 2 of the GNU General Public License as > > - * published by the Free Software Foundation. > > - * > > - * This program is distributed in the hope that it will be useful, but > > - * WITHOUT ANY WARRANTY; without even the implied warranty of > > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > - * General Public License for more details. > > - * > > - * Contact Information: > > - * James Guilford <james.guilford@xxxxxxxxx> > > - * Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx> > > - * > > - * BSD LICENSE > > - * > > - * Copyright(c) 2014 Intel Corporation. > > - * > > - * Redistribution and use in source and binary forms, with or without > > - * modification, are permitted provided that the following conditions > > - * are met: > > - * > > - * * Redistributions of source code must retain the above copyright > > - * notice, this list of conditions and the following disclaimer. > > - * * Redistributions in binary form must reproduce the above copyright > > - * notice, this list of conditions and the following disclaimer in > > - * the documentation and/or other materials provided with the > > - * distribution. > > - * * Neither the name of Intel Corporation nor the names of its > > - * contributors may be used to endorse or promote products derived > > - * from this software without specific prior written permission. > > - * > > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > > - */ > > - > > -# Macros for defining data structures > > - > > -# Usage example > > - > > -#START_FIELDS # JOB_AES > > -### name size align > > -#FIELD _plaintext, 8, 8 # pointer to plaintext > > -#FIELD _ciphertext, 8, 8 # pointer to ciphertext > > -#FIELD _IV, 16, 8 # IV > > -#FIELD _keys, 8, 8 # pointer to keys > > -#FIELD _len, 4, 4 # length in bytes > > -#FIELD _status, 4, 4 # status enumeration > > -#FIELD _user_data, 8, 8 # pointer to user data > > -#UNION _union, size1, align1, \ > > -# size2, align2, \ > > -# size3, align3, \ > > -# ... > > -#END_FIELDS > > -#%assign _JOB_AES_size _FIELD_OFFSET > > -#%assign _JOB_AES_align _STRUCT_ALIGN > > - > > -######################################################################### > > - > > -# Alternate "struc-like" syntax: > > -# STRUCT job_aes2 > > -# RES_Q .plaintext, 1 > > -# RES_Q .ciphertext, 1 > > -# RES_DQ .IV, 1 > > -# RES_B .nested, _JOB_AES_SIZE, _JOB_AES_ALIGN > > -# RES_U .union, size1, align1, \ > > -# size2, align2, \ > > -# ... > > -# ENDSTRUCT > > -# # Following only needed if nesting > > -# %assign job_aes2_size _FIELD_OFFSET > > -# %assign job_aes2_align _STRUCT_ALIGN > > -# > > -# RES_* macros take a name, a count and an optional alignment. > > -# The count in in terms of the base size of the macro, and the > > -# default alignment is the base size. > > -# The macros are: > > -# Macro Base size > > -# RES_B 1 > > -# RES_W 2 > > -# RES_D 4 > > -# RES_Q 8 > > -# RES_DQ 16 > > -# RES_Y 32 > > -# RES_Z 64 > > -# > > -# RES_U defines a union. It's arguments are a name and two or more > > -# pairs of "size, alignment" > > -# > > -# The two assigns are only needed if this structure is being nested > > -# within another. Even if the assigns are not done, one can still use > > -# STRUCT_NAME_size as the size of the structure. > > -# > > -# Note that for nesting, you still need to assign to STRUCT_NAME_size. > > -# > > -# The differences between this and using "struc" directly are that each > > -# type is implicitly aligned to its natural length (although this can be > > -# over-ridden with an explicit third parameter), and that the structure > > -# is padded at the end to its overall alignment. > > -# > > - > > -######################################################################### > > - > > -#ifndef _SHA1_MB_MGR_DATASTRUCT_ASM_ > > -#define _SHA1_MB_MGR_DATASTRUCT_ASM_ > > - > > -## START_FIELDS > > -.macro START_FIELDS > > - _FIELD_OFFSET = 0 > > - _STRUCT_ALIGN = 0 > > -.endm > > - > > -## FIELD name size align > > -.macro FIELD name size align > > - _FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1)) > > - \name = _FIELD_OFFSET > > - _FIELD_OFFSET = _FIELD_OFFSET + (\size) > > -.if (\align > _STRUCT_ALIGN) > > - _STRUCT_ALIGN = \align > > -.endif > > -.endm > > - > > -## END_FIELDS > > -.macro END_FIELDS > > - _FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1)) > > -.endm > > - > > -######################################################################## > > - > > -.macro STRUCT p1 > > -START_FIELDS > > -.struc \p1 > > -.endm > > - > > -.macro ENDSTRUCT > > - tmp = _FIELD_OFFSET > > - END_FIELDS > > - tmp = (_FIELD_OFFSET - %%tmp) > > -.if (tmp > 0) > > - .lcomm tmp > > -.endif > > -.endstruc > > -.endm > > - > > -## RES_int name size align > > -.macro RES_int p1 p2 p3 > > - name = \p1 > > - size = \p2 > > - align = .\p3 > > - > > - _FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1)) > > -.align align > > -.lcomm name size > > - _FIELD_OFFSET = _FIELD_OFFSET + (size) > > -.if (align > _STRUCT_ALIGN) > > - _STRUCT_ALIGN = align > > -.endif > > -.endm > > - > > - > > - > > -# macro RES_B name, size [, align] > > -.macro RES_B _name, _size, _align=1 > > -RES_int _name _size _align > > -.endm > > - > > -# macro RES_W name, size [, align] > > -.macro RES_W _name, _size, _align=2 > > -RES_int _name 2*(_size) _align > > -.endm > > - > > -# macro RES_D name, size [, align] > > -.macro RES_D _name, _size, _align=4 > > -RES_int _name 4*(_size) _align > > -.endm > > - > > -# macro RES_Q name, size [, align] > > -.macro RES_Q _name, _size, _align=8 > > -RES_int _name 8*(_size) _align > > -.endm > > - > > -# macro RES_DQ name, size [, align] > > -.macro RES_DQ _name, _size, _align=16 > > -RES_int _name 16*(_size) _align > > -.endm > > - > > -# macro RES_Y name, size [, align] > > -.macro RES_Y _name, _size, _align=32 > > -RES_int _name 32*(_size) _align > > -.endm > > - > > -# macro RES_Z name, size [, align] > > -.macro RES_Z _name, _size, _align=64 > > -RES_int _name 64*(_size) _align > > -.endm > > - > > - > > -#endif > > - > > -######################################################################## > > -#### Define constants > > -######################################################################## > > - > > -######################################################################## > > -#### Define SHA1 Out Of Order Data Structures > > -######################################################################## > > - > > -START_FIELDS # LANE_DATA > > -### name size align > > -FIELD _job_in_lane, 8, 8 # pointer to job object > > -END_FIELDS > > - > > -_LANE_DATA_size = _FIELD_OFFSET > > -_LANE_DATA_align = _STRUCT_ALIGN > > - > > -######################################################################## > > - > > -START_FIELDS # SHA1_ARGS_X8 > > -### name size align > > -FIELD _digest, 4*5*8, 16 # transposed digest > > -FIELD _data_ptr, 8*8, 8 # array of pointers to data > > -END_FIELDS > > - > > -_SHA1_ARGS_X4_size = _FIELD_OFFSET > > -_SHA1_ARGS_X4_align = _STRUCT_ALIGN > > -_SHA1_ARGS_X8_size = _FIELD_OFFSET > > -_SHA1_ARGS_X8_align = _STRUCT_ALIGN > > - > > -######################################################################## > > - > > -START_FIELDS # MB_MGR > > -### name size align > > -FIELD _args, _SHA1_ARGS_X4_size, _SHA1_ARGS_X4_align > > -FIELD _lens, 4*8, 8 > > -FIELD _unused_lanes, 8, 8 > > -FIELD _ldata, _LANE_DATA_size*8, _LANE_DATA_align > > -END_FIELDS > > - > > -_MB_MGR_size = _FIELD_OFFSET > > -_MB_MGR_align = _STRUCT_ALIGN > > - > > -_args_digest = _args + _digest > > -_args_data_ptr = _args + _data_ptr > > - > > - > > -######################################################################## > > -#### Define constants > > -######################################################################## > > - > > -#define STS_UNKNOWN 0 > > -#define STS_BEING_PROCESSED 1 > > -#define STS_COMPLETED 2 > > - > > -######################################################################## > > -#### Define JOB_SHA1 structure > > -######################################################################## > > - > > -START_FIELDS # JOB_SHA1 > > - > > -### name size align > > -FIELD _buffer, 8, 8 # pointer to buffer > > -FIELD _len, 4, 4 # length in bytes > > -FIELD _result_digest, 5*4, 32 # Digest (output) > > -FIELD _status, 4, 4 > > -FIELD _user_data, 8, 8 > > -END_FIELDS > > - > > -_JOB_SHA1_size = _FIELD_OFFSET > > -_JOB_SHA1_align = _STRUCT_ALIGN > > diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S > > deleted file mode 100644 > > index 7cfba738f104..000000000000 > > --- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S > > +++ /dev/null > > @@ -1,304 +0,0 @@ > > -/* > > - * Flush routine for SHA1 multibuffer > > - * > > - * This file is provided under a dual BSD/GPLv2 license. When using or > > - * redistributing this file, you may do so under either license. > > - * > > - * GPL LICENSE SUMMARY > > - * > > - * Copyright(c) 2014 Intel Corporation. > > - * > > - * This program is free software; you can redistribute it and/or modify > > - * it under the terms of version 2 of the GNU General Public License as > > - * published by the Free Software Foundation. > > - * > > - * This program is distributed in the hope that it will be useful, but > > - * WITHOUT ANY WARRANTY; without even the implied warranty of > > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > - * General Public License for more details. > > - * > > - * Contact Information: > > - * James Guilford <james.guilford@xxxxxxxxx> > > - * Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx> > > - * > > - * BSD LICENSE > > - * > > - * Copyright(c) 2014 Intel Corporation. > > - * > > - * Redistribution and use in source and binary forms, with or without > > - * modification, are permitted provided that the following conditions > > - * are met: > > - * > > - * * Redistributions of source code must retain the above copyright > > - * notice, this list of conditions and the following disclaimer. > > - * * Redistributions in binary form must reproduce the above copyright > > - * notice, this list of conditions and the following disclaimer in > > - * the documentation and/or other materials provided with the > > - * distribution. > > - * * Neither the name of Intel Corporation nor the names of its > > - * contributors may be used to endorse or promote products derived > > - * from this software without specific prior written permission. > > - * > > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > > - */ > > -#include <linux/linkage.h> > > -#include <asm/frame.h> > > -#include "sha1_mb_mgr_datastruct.S" > > - > > - > > -.extern sha1_x8_avx2 > > - > > -# LINUX register definitions > > -#define arg1 %rdi > > -#define arg2 %rsi > > - > > -# Common definitions > > -#define state arg1 > > -#define job arg2 > > -#define len2 arg2 > > - > > -# idx must be a register not clobbered by sha1_x8_avx2 > > -#define idx %r8 > > -#define DWORD_idx %r8d > > - > > -#define unused_lanes %rbx > > -#define lane_data %rbx > > -#define tmp2 %rbx > > -#define tmp2_w %ebx > > - > > -#define job_rax %rax > > -#define tmp1 %rax > > -#define size_offset %rax > > -#define tmp %rax > > -#define start_offset %rax > > - > > -#define tmp3 %arg1 > > - > > -#define extra_blocks %arg2 > > -#define p %arg2 > > - > > -.macro LABEL prefix n > > -\prefix\n\(): > > -.endm > > - > > -.macro JNE_SKIP i > > -jne skip_\i > > -.endm > > - > > -.altmacro > > -.macro SET_OFFSET _offset > > -offset = \_offset > > -.endm > > -.noaltmacro > > - > > -# JOB* sha1_mb_mgr_flush_avx2(MB_MGR *state) > > -# arg 1 : rcx : state > > -ENTRY(sha1_mb_mgr_flush_avx2) > > - FRAME_BEGIN > > - push %rbx > > - > > - # If bit (32+3) is set, then all lanes are empty > > - mov _unused_lanes(state), unused_lanes > > - bt $32+3, unused_lanes > > - jc return_null > > - > > - # find a lane with a non-null job > > - xor idx, idx > > - offset = (_ldata + 1 * _LANE_DATA_size + _job_in_lane) > > - cmpq $0, offset(state) > > - cmovne one(%rip), idx > > - offset = (_ldata + 2 * _LANE_DATA_size + _job_in_lane) > > - cmpq $0, offset(state) > > - cmovne two(%rip), idx > > - offset = (_ldata + 3 * _LANE_DATA_size + _job_in_lane) > > - cmpq $0, offset(state) > > - cmovne three(%rip), idx > > - offset = (_ldata + 4 * _LANE_DATA_size + _job_in_lane) > > - cmpq $0, offset(state) > > - cmovne four(%rip), idx > > - offset = (_ldata + 5 * _LANE_DATA_size + _job_in_lane) > > - cmpq $0, offset(state) > > - cmovne five(%rip), idx > > - offset = (_ldata + 6 * _LANE_DATA_size + _job_in_lane) > > - cmpq $0, offset(state) > > - cmovne six(%rip), idx > > - offset = (_ldata + 7 * _LANE_DATA_size + _job_in_lane) > > - cmpq $0, offset(state) > > - cmovne seven(%rip), idx > > - > > - # copy idx to empty lanes > > -copy_lane_data: > > - offset = (_args + _data_ptr) > > - mov offset(state,idx,8), tmp > > - > > - I = 0 > > -.rep 8 > > - offset = (_ldata + I * _LANE_DATA_size + _job_in_lane) > > - cmpq $0, offset(state) > > -.altmacro > > - JNE_SKIP %I > > - offset = (_args + _data_ptr + 8*I) > > - mov tmp, offset(state) > > - offset = (_lens + 4*I) > > - movl $0xFFFFFFFF, offset(state) > > -LABEL skip_ %I > > - I = (I+1) > > -.noaltmacro > > -.endr > > - > > - # Find min length > > - vmovdqu _lens+0*16(state), %xmm0 > > - vmovdqu _lens+1*16(state), %xmm1 > > - > > - vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A} > > - vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C} > > - vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F} > > - vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E} > > - vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min value in low dword > > - > > - vmovd %xmm2, DWORD_idx > > - mov idx, len2 > > - and $0xF, idx > > - shr $4, len2 > > - jz len_is_0 > > - > > - vpand clear_low_nibble(%rip), %xmm2, %xmm2 > > - vpshufd $0, %xmm2, %xmm2 > > - > > - vpsubd %xmm2, %xmm0, %xmm0 > > - vpsubd %xmm2, %xmm1, %xmm1 > > - > > - vmovdqu %xmm0, _lens+0*16(state) > > - vmovdqu %xmm1, _lens+1*16(state) > > - > > - # "state" and "args" are the same address, arg1 > > - # len is arg2 > > - call sha1_x8_avx2 > > - # state and idx are intact > > - > > - > > -len_is_0: > > - # process completed job "idx" > > - imul $_LANE_DATA_size, idx, lane_data > > - lea _ldata(state, lane_data), lane_data > > - > > - mov _job_in_lane(lane_data), job_rax > > - movq $0, _job_in_lane(lane_data) > > - movl $STS_COMPLETED, _status(job_rax) > > - mov _unused_lanes(state), unused_lanes > > - shl $4, unused_lanes > > - or idx, unused_lanes > > - mov unused_lanes, _unused_lanes(state) > > - > > - movl $0xFFFFFFFF, _lens(state, idx, 4) > > - > > - vmovd _args_digest(state , idx, 4) , %xmm0 > > - vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0 > > - vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0 > > - vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0 > > - movl _args_digest+4*32(state, idx, 4), tmp2_w > > - > > - vmovdqu %xmm0, _result_digest(job_rax) > > - offset = (_result_digest + 1*16) > > - mov tmp2_w, offset(job_rax) > > - > > -return: > > - pop %rbx > > - FRAME_END > > - ret > > - > > -return_null: > > - xor job_rax, job_rax > > - jmp return > > -ENDPROC(sha1_mb_mgr_flush_avx2) > > - > > - > > -################################################################# > > - > > -.align 16 > > -ENTRY(sha1_mb_mgr_get_comp_job_avx2) > > - push %rbx > > - > > - ## if bit 32+3 is set, then all lanes are empty > > - mov _unused_lanes(state), unused_lanes > > - bt $(32+3), unused_lanes > > - jc .return_null > > - > > - # Find min length > > - vmovdqu _lens(state), %xmm0 > > - vmovdqu _lens+1*16(state), %xmm1 > > - > > - vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A} > > - vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C} > > - vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F} > > - vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E} > > - vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min value in low dword > > - > > - vmovd %xmm2, DWORD_idx > > - test $~0xF, idx > > - jnz .return_null > > - > > - # process completed job "idx" > > - imul $_LANE_DATA_size, idx, lane_data > > - lea _ldata(state, lane_data), lane_data > > - > > - mov _job_in_lane(lane_data), job_rax > > - movq $0, _job_in_lane(lane_data) > > - movl $STS_COMPLETED, _status(job_rax) > > - mov _unused_lanes(state), unused_lanes > > - shl $4, unused_lanes > > - or idx, unused_lanes > > - mov unused_lanes, _unused_lanes(state) > > - > > - movl $0xFFFFFFFF, _lens(state, idx, 4) > > - > > - vmovd _args_digest(state, idx, 4), %xmm0 > > - vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0 > > - vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0 > > - vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0 > > - movl _args_digest+4*32(state, idx, 4), tmp2_w > > - > > - vmovdqu %xmm0, _result_digest(job_rax) > > - movl tmp2_w, _result_digest+1*16(job_rax) > > - > > - pop %rbx > > - > > - ret > > - > > -.return_null: > > - xor job_rax, job_rax > > - pop %rbx > > - ret > > -ENDPROC(sha1_mb_mgr_get_comp_job_avx2) > > - > > -.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16 > > -.align 16 > > -clear_low_nibble: > > -.octa 0x000000000000000000000000FFFFFFF0 > > - > > -.section .rodata.cst8, "aM", @progbits, 8 > > -.align 8 > > -one: > > -.quad 1 > > -two: > > -.quad 2 > > -three: > > -.quad 3 > > -four: > > -.quad 4 > > -five: > > -.quad 5 > > -six: > > -.quad 6 > > -seven: > > -.quad 7 > > diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_init_avx2.c b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_init_avx2.c > > deleted file mode 100644 > > index d2add0d35f43..000000000000 > > --- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_init_avx2.c > > +++ /dev/null > > @@ -1,64 +0,0 @@ > > -/* > > - * Initialization code for multi buffer SHA1 algorithm for AVX2 > > - * > > - * This file is provided under a dual BSD/GPLv2 license. When using or > > - * redistributing this file, you may do so under either license. > > - * > > - * GPL LICENSE SUMMARY > > - * > > - * Copyright(c) 2014 Intel Corporation. > > - * > > - * This program is free software; you can redistribute it and/or modify > > - * it under the terms of version 2 of the GNU General Public License as > > - * published by the Free Software Foundation. > > - * > > - * This program is distributed in the hope that it will be useful, but > > - * WITHOUT ANY WARRANTY; without even the implied warranty of > > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > - * General Public License for more details. > > - * > > - * Contact Information: > > - * Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx> > > - * > > - * BSD LICENSE > > - * > > - * Copyright(c) 2014 Intel Corporation. > > - * > > - * Redistribution and use in source and binary forms, with or without > > - * modification, are permitted provided that the following conditions > > - * are met: > > - * > > - * * Redistributions of source code must retain the above copyright > > - * notice, this list of conditions and the following disclaimer. > > - * * Redistributions in binary form must reproduce the above copyright > > - * notice, this list of conditions and the following disclaimer in > > - * the documentation and/or other materials provided with the > > - * distribution. > > - * * Neither the name of Intel Corporation nor the names of its > > - * contributors may be used to endorse or promote products derived > > - * from this software without specific prior written permission. > > - * > > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > > - */ > > - > > -#include "sha1_mb_mgr.h" > > - > > -void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state) > > -{ > > - unsigned int j; > > - state->unused_lanes = 0xF76543210ULL; > > - for (j = 0; j < 8; j++) { > > - state->lens[j] = 0xFFFFFFFF; > > - state->ldata[j].job_in_lane = NULL; > > - } > > -} > > diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S > > deleted file mode 100644 > > index 7a93b1c0d69a..000000000000 > > --- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S > > +++ /dev/null > > @@ -1,209 +0,0 @@ > > -/* > > - * Buffer submit code for multi buffer SHA1 algorithm > > - * > > - * This file is provided under a dual BSD/GPLv2 license. When using or > > - * redistributing this file, you may do so under either license. > > - * > > - * GPL LICENSE SUMMARY > > - * > > - * Copyright(c) 2014 Intel Corporation. > > - * > > - * This program is free software; you can redistribute it and/or modify > > - * it under the terms of version 2 of the GNU General Public License as > > - * published by the Free Software Foundation. > > - * > > - * This program is distributed in the hope that it will be useful, but > > - * WITHOUT ANY WARRANTY; without even the implied warranty of > > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > - * General Public License for more details. > > - * > > - * Contact Information: > > - * James Guilford <james.guilford@xxxxxxxxx> > > - * Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx> > > - * > > - * BSD LICENSE > > - * > > - * Copyright(c) 2014 Intel Corporation. > > - * > > - * Redistribution and use in source and binary forms, with or without > > - * modification, are permitted provided that the following conditions > > - * are met: > > - * > > - * * Redistributions of source code must retain the above copyright > > - * notice, this list of conditions and the following disclaimer. > > - * * Redistributions in binary form must reproduce the above copyright > > - * notice, this list of conditions and the following disclaimer in > > - * the documentation and/or other materials provided with the > > - * distribution. > > - * * Neither the name of Intel Corporation nor the names of its > > - * contributors may be used to endorse or promote products derived > > - * from this software without specific prior written permission. > > - * > > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > > - */ > > - > > -#include <linux/linkage.h> > > -#include <asm/frame.h> > > -#include "sha1_mb_mgr_datastruct.S" > > - > > - > > -.extern sha1_x8_avx > > - > > -# LINUX register definitions > > -arg1 = %rdi > > -arg2 = %rsi > > -size_offset = %rcx > > -tmp2 = %rcx > > -extra_blocks = %rdx > > - > > -# Common definitions > > -#define state arg1 > > -#define job %rsi > > -#define len2 arg2 > > -#define p2 arg2 > > - > > -# idx must be a register not clobberred by sha1_x8_avx2 > > -idx = %r8 > > -DWORD_idx = %r8d > > -last_len = %r8 > > - > > -p = %r11 > > -start_offset = %r11 > > - > > -unused_lanes = %rbx > > -BYTE_unused_lanes = %bl > > - > > -job_rax = %rax > > -len = %rax > > -DWORD_len = %eax > > - > > -lane = %r12 > > -tmp3 = %r12 > > - > > -tmp = %r9 > > -DWORD_tmp = %r9d > > - > > -lane_data = %r10 > > - > > -# JOB* submit_mb_mgr_submit_avx2(MB_MGR *state, job_sha1 *job) > > -# arg 1 : rcx : state > > -# arg 2 : rdx : job > > -ENTRY(sha1_mb_mgr_submit_avx2) > > - FRAME_BEGIN > > - push %rbx > > - push %r12 > > - > > - mov _unused_lanes(state), unused_lanes > > - mov unused_lanes, lane > > - and $0xF, lane > > - shr $4, unused_lanes > > - imul $_LANE_DATA_size, lane, lane_data > > - movl $STS_BEING_PROCESSED, _status(job) > > - lea _ldata(state, lane_data), lane_data > > - mov unused_lanes, _unused_lanes(state) > > - movl _len(job), DWORD_len > > - > > - mov job, _job_in_lane(lane_data) > > - shl $4, len > > - or lane, len > > - > > - movl DWORD_len, _lens(state , lane, 4) > > - > > - # Load digest words from result_digest > > - vmovdqu _result_digest(job), %xmm0 > > - mov _result_digest+1*16(job), DWORD_tmp > > - vmovd %xmm0, _args_digest(state, lane, 4) > > - vpextrd $1, %xmm0, _args_digest+1*32(state , lane, 4) > > - vpextrd $2, %xmm0, _args_digest+2*32(state , lane, 4) > > - vpextrd $3, %xmm0, _args_digest+3*32(state , lane, 4) > > - movl DWORD_tmp, _args_digest+4*32(state , lane, 4) > > - > > - mov _buffer(job), p > > - mov p, _args_data_ptr(state, lane, 8) > > - > > - cmp $0xF, unused_lanes > > - jne return_null > > - > > -start_loop: > > - # Find min length > > - vmovdqa _lens(state), %xmm0 > > - vmovdqa _lens+1*16(state), %xmm1 > > - > > - vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A} > > - vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C} > > - vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F} > > - vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E} > > - vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min value in low dword > > - > > - vmovd %xmm2, DWORD_idx > > - mov idx, len2 > > - and $0xF, idx > > - shr $4, len2 > > - jz len_is_0 > > - > > - vpand clear_low_nibble(%rip), %xmm2, %xmm2 > > - vpshufd $0, %xmm2, %xmm2 > > - > > - vpsubd %xmm2, %xmm0, %xmm0 > > - vpsubd %xmm2, %xmm1, %xmm1 > > - > > - vmovdqa %xmm0, _lens + 0*16(state) > > - vmovdqa %xmm1, _lens + 1*16(state) > > - > > - > > - # "state" and "args" are the same address, arg1 > > - # len is arg2 > > - call sha1_x8_avx2 > > - > > - # state and idx are intact > > - > > -len_is_0: > > - # process completed job "idx" > > - imul $_LANE_DATA_size, idx, lane_data > > - lea _ldata(state, lane_data), lane_data > > - > > - mov _job_in_lane(lane_data), job_rax > > - mov _unused_lanes(state), unused_lanes > > - movq $0, _job_in_lane(lane_data) > > - movl $STS_COMPLETED, _status(job_rax) > > - shl $4, unused_lanes > > - or idx, unused_lanes > > - mov unused_lanes, _unused_lanes(state) > > - > > - movl $0xFFFFFFFF, _lens(state, idx, 4) > > - > > - vmovd _args_digest(state, idx, 4), %xmm0 > > - vpinsrd $1, _args_digest+1*32(state , idx, 4), %xmm0, %xmm0 > > - vpinsrd $2, _args_digest+2*32(state , idx, 4), %xmm0, %xmm0 > > - vpinsrd $3, _args_digest+3*32(state , idx, 4), %xmm0, %xmm0 > > - movl _args_digest+4*32(state, idx, 4), DWORD_tmp > > - > > - vmovdqu %xmm0, _result_digest(job_rax) > > - movl DWORD_tmp, _result_digest+1*16(job_rax) > > - > > -return: > > - pop %r12 > > - pop %rbx > > - FRAME_END > > - ret > > - > > -return_null: > > - xor job_rax, job_rax > > - jmp return > > - > > -ENDPROC(sha1_mb_mgr_submit_avx2) > > - > > -.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16 > > -.align 16 > > -clear_low_nibble: > > - .octa 0x000000000000000000000000FFFFFFF0 > > diff --git a/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S b/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S > > deleted file mode 100644 > > index 20f77aa633de..000000000000 > > --- a/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S > > +++ /dev/null > > @@ -1,492 +0,0 @@ > > -/* > > - * Multi-buffer SHA1 algorithm hash compute routine > > - * > > - * This file is provided under a dual BSD/GPLv2 license. When using or > > - * redistributing this file, you may do so under either license. > > - * > > - * GPL LICENSE SUMMARY > > - * > > - * Copyright(c) 2014 Intel Corporation. > > - * > > - * This program is free software; you can redistribute it and/or modify > > - * it under the terms of version 2 of the GNU General Public License as > > - * published by the Free Software Foundation. > > - * > > - * This program is distributed in the hope that it will be useful, but > > - * WITHOUT ANY WARRANTY; without even the implied warranty of > > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > - * General Public License for more details. > > - * > > - * Contact Information: > > - * James Guilford <james.guilford@xxxxxxxxx> > > - * Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx> > > - * > > - * BSD LICENSE > > - * > > - * Copyright(c) 2014 Intel Corporation. > > - * > > - * Redistribution and use in source and binary forms, with or without > > - * modification, are permitted provided that the following conditions > > - * are met: > > - * > > - * * Redistributions of source code must retain the above copyright > > - * notice, this list of conditions and the following disclaimer. > > - * * Redistributions in binary form must reproduce the above copyright > > - * notice, this list of conditions and the following disclaimer in > > - * the documentation and/or other materials provided with the > > - * distribution. > > - * * Neither the name of Intel Corporation nor the names of its > > - * contributors may be used to endorse or promote products derived > > - * from this software without specific prior written permission. > > - * > > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > > - */ > > - > > -#include <linux/linkage.h> > > -#include "sha1_mb_mgr_datastruct.S" > > - > > -## code to compute oct SHA1 using SSE-256 > > -## outer calling routine takes care of save and restore of XMM registers > > - > > -## Function clobbers: rax, rcx, rdx, rbx, rsi, rdi, r9-r15# ymm0-15 > > -## > > -## Linux clobbers: rax rbx rcx rdx rsi r9 r10 r11 r12 r13 r14 r15 > > -## Linux preserves: rdi rbp r8 > > -## > > -## clobbers ymm0-15 > > - > > - > > -# TRANSPOSE8 r0, r1, r2, r3, r4, r5, r6, r7, t0, t1 > > -# "transpose" data in {r0...r7} using temps {t0...t1} > > -# Input looks like: {r0 r1 r2 r3 r4 r5 r6 r7} > > -# r0 = {a7 a6 a5 a4 a3 a2 a1 a0} > > -# r1 = {b7 b6 b5 b4 b3 b2 b1 b0} > > -# r2 = {c7 c6 c5 c4 c3 c2 c1 c0} > > -# r3 = {d7 d6 d5 d4 d3 d2 d1 d0} > > -# r4 = {e7 e6 e5 e4 e3 e2 e1 e0} > > -# r5 = {f7 f6 f5 f4 f3 f2 f1 f0} > > -# r6 = {g7 g6 g5 g4 g3 g2 g1 g0} > > -# r7 = {h7 h6 h5 h4 h3 h2 h1 h0} > > -# > > -# Output looks like: {r0 r1 r2 r3 r4 r5 r6 r7} > > -# r0 = {h0 g0 f0 e0 d0 c0 b0 a0} > > -# r1 = {h1 g1 f1 e1 d1 c1 b1 a1} > > -# r2 = {h2 g2 f2 e2 d2 c2 b2 a2} > > -# r3 = {h3 g3 f3 e3 d3 c3 b3 a3} > > -# r4 = {h4 g4 f4 e4 d4 c4 b4 a4} > > -# r5 = {h5 g5 f5 e5 d5 c5 b5 a5} > > -# r6 = {h6 g6 f6 e6 d6 c6 b6 a6} > > -# r7 = {h7 g7 f7 e7 d7 c7 b7 a7} > > -# > > - > > -.macro TRANSPOSE8 r0 r1 r2 r3 r4 r5 r6 r7 t0 t1 > > - # process top half (r0..r3) {a...d} > > - vshufps $0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4 b1 b0 a1 a0} > > - vshufps $0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6 b3 b2 a3 a2} > > - vshufps $0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4 d1 d0 c1 c0} > > - vshufps $0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6 d3 d2 c3 c2} > > - vshufps $0xDD, \t1, \t0, \r3 # r3 = {d5 c5 b5 a5 d1 c1 b1 a1} > > - vshufps $0x88, \r2, \r0, \r1 # r1 = {d6 c6 b6 a6 d2 c2 b2 a2} > > - vshufps $0xDD, \r2, \r0, \r0 # r0 = {d7 c7 b7 a7 d3 c3 b3 a3} > > - vshufps $0x88, \t1, \t0, \t0 # t0 = {d4 c4 b4 a4 d0 c0 b0 a0} > > - > > - # use r2 in place of t0 > > - # process bottom half (r4..r7) {e...h} > > - vshufps $0x44, \r5, \r4, \r2 # r2 = {f5 f4 e5 e4 f1 f0 e1 e0} > > - vshufps $0xEE, \r5, \r4, \r4 # r4 = {f7 f6 e7 e6 f3 f2 e3 e2} > > - vshufps $0x44, \r7, \r6, \t1 # t1 = {h5 h4 g5 g4 h1 h0 g1 g0} > > - vshufps $0xEE, \r7, \r6, \r6 # r6 = {h7 h6 g7 g6 h3 h2 g3 g2} > > - vshufps $0xDD, \t1, \r2, \r7 # r7 = {h5 g5 f5 e5 h1 g1 f1 e1} > > - vshufps $0x88, \r6, \r4, \r5 # r5 = {h6 g6 f6 e6 h2 g2 f2 e2} > > - vshufps $0xDD, \r6, \r4, \r4 # r4 = {h7 g7 f7 e7 h3 g3 f3 e3} > > - vshufps $0x88, \t1, \r2, \t1 # t1 = {h4 g4 f4 e4 h0 g0 f0 e0} > > - > > - vperm2f128 $0x13, \r1, \r5, \r6 # h6...a6 > > - vperm2f128 $0x02, \r1, \r5, \r2 # h2...a2 > > - vperm2f128 $0x13, \r3, \r7, \r5 # h5...a5 > > - vperm2f128 $0x02, \r3, \r7, \r1 # h1...a1 > > - vperm2f128 $0x13, \r0, \r4, \r7 # h7...a7 > > - vperm2f128 $0x02, \r0, \r4, \r3 # h3...a3 > > - vperm2f128 $0x13, \t0, \t1, \r4 # h4...a4 > > - vperm2f128 $0x02, \t0, \t1, \r0 # h0...a0 > > - > > -.endm > > -## > > -## Magic functions defined in FIPS 180-1 > > -## > > -# macro MAGIC_F0 F,B,C,D,T ## F = (D ^ (B & (C ^ D))) > > -.macro MAGIC_F0 regF regB regC regD regT > > - vpxor \regD, \regC, \regF > > - vpand \regB, \regF, \regF > > - vpxor \regD, \regF, \regF > > -.endm > > - > > -# macro MAGIC_F1 F,B,C,D,T ## F = (B ^ C ^ D) > > -.macro MAGIC_F1 regF regB regC regD regT > > - vpxor \regC, \regD, \regF > > - vpxor \regB, \regF, \regF > > -.endm > > - > > -# macro MAGIC_F2 F,B,C,D,T ## F = ((B & C) | (B & D) | (C & D)) > > -.macro MAGIC_F2 regF regB regC regD regT > > - vpor \regC, \regB, \regF > > - vpand \regC, \regB, \regT > > - vpand \regD, \regF, \regF > > - vpor \regT, \regF, \regF > > -.endm > > - > > -# macro MAGIC_F3 F,B,C,D,T ## F = (B ^ C ^ D) > > -.macro MAGIC_F3 regF regB regC regD regT > > - MAGIC_F1 \regF,\regB,\regC,\regD,\regT > > -.endm > > - > > -# PROLD reg, imm, tmp > > -.macro PROLD reg imm tmp > > - vpsrld $(32-\imm), \reg, \tmp > > - vpslld $\imm, \reg, \reg > > - vpor \tmp, \reg, \reg > > -.endm > > - > > -.macro PROLD_nd reg imm tmp src > > - vpsrld $(32-\imm), \src, \tmp > > - vpslld $\imm, \src, \reg > > - vpor \tmp, \reg, \reg > > -.endm > > - > > -.macro SHA1_STEP_00_15 regA regB regC regD regE regT regF memW immCNT MAGIC > > - vpaddd \immCNT, \regE, \regE > > - vpaddd \memW*32(%rsp), \regE, \regE > > - PROLD_nd \regT, 5, \regF, \regA > > - vpaddd \regT, \regE, \regE > > - \MAGIC \regF, \regB, \regC, \regD, \regT > > - PROLD \regB, 30, \regT > > - vpaddd \regF, \regE, \regE > > -.endm > > - > > -.macro SHA1_STEP_16_79 regA regB regC regD regE regT regF memW immCNT MAGIC > > - vpaddd \immCNT, \regE, \regE > > - offset = ((\memW - 14) & 15) * 32 > > - vmovdqu offset(%rsp), W14 > > - vpxor W14, W16, W16 > > - offset = ((\memW - 8) & 15) * 32 > > - vpxor offset(%rsp), W16, W16 > > - offset = ((\memW - 3) & 15) * 32 > > - vpxor offset(%rsp), W16, W16 > > - vpsrld $(32-1), W16, \regF > > - vpslld $1, W16, W16 > > - vpor W16, \regF, \regF > > - > > - ROTATE_W > > - > > - offset = ((\memW - 0) & 15) * 32 > > - vmovdqu \regF, offset(%rsp) > > - vpaddd \regF, \regE, \regE > > - PROLD_nd \regT, 5, \regF, \regA > > - vpaddd \regT, \regE, \regE > > - \MAGIC \regF,\regB,\regC,\regD,\regT ## FUN = MAGIC_Fi(B,C,D) > > - PROLD \regB,30, \regT > > - vpaddd \regF, \regE, \regE > > -.endm > > - > > -######################################################################## > > -######################################################################## > > -######################################################################## > > - > > -## FRAMESZ plus pushes must be an odd multiple of 8 > > -YMM_SAVE = (15-15)*32 > > -FRAMESZ = 32*16 + YMM_SAVE > > -_YMM = FRAMESZ - YMM_SAVE > > - > > -#define VMOVPS vmovups > > - > > -IDX = %rax > > -inp0 = %r9 > > -inp1 = %r10 > > -inp2 = %r11 > > -inp3 = %r12 > > -inp4 = %r13 > > -inp5 = %r14 > > -inp6 = %r15 > > -inp7 = %rcx > > -arg1 = %rdi > > -arg2 = %rsi > > -RSP_SAVE = %rdx > > - > > -# ymm0 A > > -# ymm1 B > > -# ymm2 C > > -# ymm3 D > > -# ymm4 E > > -# ymm5 F AA > > -# ymm6 T0 BB > > -# ymm7 T1 CC > > -# ymm8 T2 DD > > -# ymm9 T3 EE > > -# ymm10 T4 TMP > > -# ymm11 T5 FUN > > -# ymm12 T6 K > > -# ymm13 T7 W14 > > -# ymm14 T8 W15 > > -# ymm15 T9 W16 > > - > > - > > -A = %ymm0 > > -B = %ymm1 > > -C = %ymm2 > > -D = %ymm3 > > -E = %ymm4 > > -F = %ymm5 > > -T0 = %ymm6 > > -T1 = %ymm7 > > -T2 = %ymm8 > > -T3 = %ymm9 > > -T4 = %ymm10 > > -T5 = %ymm11 > > -T6 = %ymm12 > > -T7 = %ymm13 > > -T8 = %ymm14 > > -T9 = %ymm15 > > - > > -AA = %ymm5 > > -BB = %ymm6 > > -CC = %ymm7 > > -DD = %ymm8 > > -EE = %ymm9 > > -TMP = %ymm10 > > -FUN = %ymm11 > > -K = %ymm12 > > -W14 = %ymm13 > > -W15 = %ymm14 > > -W16 = %ymm15 > > - > > -.macro ROTATE_ARGS > > - TMP_ = E > > - E = D > > - D = C > > - C = B > > - B = A > > - A = TMP_ > > -.endm > > - > > -.macro ROTATE_W > > -TMP_ = W16 > > -W16 = W15 > > -W15 = W14 > > -W14 = TMP_ > > -.endm > > - > > -# 8 streams x 5 32bit words per digest x 4 bytes per word > > -#define DIGEST_SIZE (8*5*4) > > - > > -.align 32 > > - > > -# void sha1_x8_avx2(void **input_data, UINT128 *digest, UINT32 size) > > -# arg 1 : pointer to array[4] of pointer to input data > > -# arg 2 : size (in blocks) ;; assumed to be >= 1 > > -# > > -ENTRY(sha1_x8_avx2) > > - > > - # save callee-saved clobbered registers to comply with C function ABI > > - push %r12 > > - push %r13 > > - push %r14 > > - push %r15 > > - > > - #save rsp > > - mov %rsp, RSP_SAVE > > - sub $FRAMESZ, %rsp > > - > > - #align rsp to 32 Bytes > > - and $~0x1F, %rsp > > - > > - ## Initialize digests > > - vmovdqu 0*32(arg1), A > > - vmovdqu 1*32(arg1), B > > - vmovdqu 2*32(arg1), C > > - vmovdqu 3*32(arg1), D > > - vmovdqu 4*32(arg1), E > > - > > - ## transpose input onto stack > > - mov _data_ptr+0*8(arg1),inp0 > > - mov _data_ptr+1*8(arg1),inp1 > > - mov _data_ptr+2*8(arg1),inp2 > > - mov _data_ptr+3*8(arg1),inp3 > > - mov _data_ptr+4*8(arg1),inp4 > > - mov _data_ptr+5*8(arg1),inp5 > > - mov _data_ptr+6*8(arg1),inp6 > > - mov _data_ptr+7*8(arg1),inp7 > > - > > - xor IDX, IDX > > -lloop: > > - vmovdqu PSHUFFLE_BYTE_FLIP_MASK(%rip), F > > - I=0 > > -.rep 2 > > - VMOVPS (inp0, IDX), T0 > > - VMOVPS (inp1, IDX), T1 > > - VMOVPS (inp2, IDX), T2 > > - VMOVPS (inp3, IDX), T3 > > - VMOVPS (inp4, IDX), T4 > > - VMOVPS (inp5, IDX), T5 > > - VMOVPS (inp6, IDX), T6 > > - VMOVPS (inp7, IDX), T7 > > - > > - TRANSPOSE8 T0, T1, T2, T3, T4, T5, T6, T7, T8, T9 > > - vpshufb F, T0, T0 > > - vmovdqu T0, (I*8)*32(%rsp) > > - vpshufb F, T1, T1 > > - vmovdqu T1, (I*8+1)*32(%rsp) > > - vpshufb F, T2, T2 > > - vmovdqu T2, (I*8+2)*32(%rsp) > > - vpshufb F, T3, T3 > > - vmovdqu T3, (I*8+3)*32(%rsp) > > - vpshufb F, T4, T4 > > - vmovdqu T4, (I*8+4)*32(%rsp) > > - vpshufb F, T5, T5 > > - vmovdqu T5, (I*8+5)*32(%rsp) > > - vpshufb F, T6, T6 > > - vmovdqu T6, (I*8+6)*32(%rsp) > > - vpshufb F, T7, T7 > > - vmovdqu T7, (I*8+7)*32(%rsp) > > - add $32, IDX > > - I = (I+1) > > -.endr > > - # save old digests > > - vmovdqu A,AA > > - vmovdqu B,BB > > - vmovdqu C,CC > > - vmovdqu D,DD > > - vmovdqu E,EE > > - > > -## > > -## perform 0-79 steps > > -## > > - vmovdqu K00_19(%rip), K > > -## do rounds 0...15 > > - I = 0 > > -.rep 16 > > - SHA1_STEP_00_15 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0 > > - ROTATE_ARGS > > - I = (I+1) > > -.endr > > - > > -## do rounds 16...19 > > - vmovdqu ((16 - 16) & 15) * 32 (%rsp), W16 > > - vmovdqu ((16 - 15) & 15) * 32 (%rsp), W15 > > -.rep 4 > > - SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0 > > - ROTATE_ARGS > > - I = (I+1) > > -.endr > > - > > -## do rounds 20...39 > > - vmovdqu K20_39(%rip), K > > -.rep 20 > > - SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F1 > > - ROTATE_ARGS > > - I = (I+1) > > -.endr > > - > > -## do rounds 40...59 > > - vmovdqu K40_59(%rip), K > > -.rep 20 > > - SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F2 > > - ROTATE_ARGS > > - I = (I+1) > > -.endr > > - > > -## do rounds 60...79 > > - vmovdqu K60_79(%rip), K > > -.rep 20 > > - SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F3 > > - ROTATE_ARGS > > - I = (I+1) > > -.endr > > - > > - vpaddd AA,A,A > > - vpaddd BB,B,B > > - vpaddd CC,C,C > > - vpaddd DD,D,D > > - vpaddd EE,E,E > > - > > - sub $1, arg2 > > - jne lloop > > - > > - # write out digests > > - vmovdqu A, 0*32(arg1) > > - vmovdqu B, 1*32(arg1) > > - vmovdqu C, 2*32(arg1) > > - vmovdqu D, 3*32(arg1) > > - vmovdqu E, 4*32(arg1) > > - > > - # update input pointers > > - add IDX, inp0 > > - add IDX, inp1 > > - add IDX, inp2 > > - add IDX, inp3 > > - add IDX, inp4 > > - add IDX, inp5 > > - add IDX, inp6 > > - add IDX, inp7 > > - mov inp0, _data_ptr (arg1) > > - mov inp1, _data_ptr + 1*8(arg1) > > - mov inp2, _data_ptr + 2*8(arg1) > > - mov inp3, _data_ptr + 3*8(arg1) > > - mov inp4, _data_ptr + 4*8(arg1) > > - mov inp5, _data_ptr + 5*8(arg1) > > - mov inp6, _data_ptr + 6*8(arg1) > > - mov inp7, _data_ptr + 7*8(arg1) > > - > > - ################ > > - ## Postamble > > - > > - mov RSP_SAVE, %rsp > > - > > - # restore callee-saved clobbered registers > > - pop %r15 > > - pop %r14 > > - pop %r13 > > - pop %r12 > > - > > - ret > > -ENDPROC(sha1_x8_avx2) > > - > > - > > -.section .rodata.cst32.K00_19, "aM", @progbits, 32 > > -.align 32 > > -K00_19: > > -.octa 0x5A8279995A8279995A8279995A827999 > > -.octa 0x5A8279995A8279995A8279995A827999 > > - > > -.section .rodata.cst32.K20_39, "aM", @progbits, 32 > > -.align 32 > > -K20_39: > > -.octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1 > > -.octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1 > > - > > -.section .rodata.cst32.K40_59, "aM", @progbits, 32 > > -.align 32 > > -K40_59: > > -.octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC > > -.octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC > > - > > -.section .rodata.cst32.K60_79, "aM", @progbits, 32 > > -.align 32 > > -K60_79: > > -.octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6 > > -.octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6 > > - > > -.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32 > > -.align 32 > > -PSHUFFLE_BYTE_FLIP_MASK: > > -.octa 0x0c0d0e0f08090a0b0405060700010203 > > -.octa 0x0c0d0e0f08090a0b0405060700010203 > > diff --git a/arch/x86/crypto/sha256-mb/Makefile b/arch/x86/crypto/sha256-mb/Makefile > > deleted file mode 100644 > > index 53ad6e7db747..000000000000 > > --- a/arch/x86/crypto/sha256-mb/Makefile > > +++ /dev/null > > @@ -1,14 +0,0 @@ > > -# SPDX-License-Identifier: GPL-2.0 > > -# > > -# Arch-specific CryptoAPI modules. > > -# > > - > > -OBJECT_FILES_NON_STANDARD := y > > - > > -avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\ > > - $(comma)4)$(comma)%ymm2,yes,no) > > -ifeq ($(avx2_supported),yes) > > - obj-$(CONFIG_CRYPTO_SHA256_MB) += sha256-mb.o > > - sha256-mb-y := sha256_mb.o sha256_mb_mgr_flush_avx2.o \ > > - sha256_mb_mgr_init_avx2.o sha256_mb_mgr_submit_avx2.o sha256_x8_avx2.o > > -endif > > diff --git a/arch/x86/crypto/sha256-mb/sha256_mb.c b/arch/x86/crypto/sha256-mb/sha256_mb.c > > deleted file mode 100644 > > index 97c5fc43e115..000000000000 > > --- a/arch/x86/crypto/sha256-mb/sha256_mb.c > > +++ /dev/null > > @@ -1,1013 +0,0 @@ > > -/* > > - * Multi buffer SHA256 algorithm Glue Code > > - * > > - * This file is provided under a dual BSD/GPLv2 license. When using or > > - * redistributing this file, you may do so under either license. > > - * > > - * GPL LICENSE SUMMARY > > - * > > - * Copyright(c) 2016 Intel Corporation. > > - * > > - * This program is free software; you can redistribute it and/or modify > > - * it under the terms of version 2 of the GNU General Public License as > > - * published by the Free Software Foundation. > > - * > > - * This program is distributed in the hope that it will be useful, but > > - * WITHOUT ANY WARRANTY; without even the implied warranty of > > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > - * General Public License for more details. > > - * > > - * Contact Information: > > - * Megha Dey <megha.dey@xxxxxxxxxxxxxxx> > > - * > > - * BSD LICENSE > > - * > > - * Copyright(c) 2016 Intel Corporation. > > - * > > - * Redistribution and use in source and binary forms, with or without > > - * modification, are permitted provided that the following conditions > > - * are met: > > - * > > - * * Redistributions of source code must retain the above copyright > > - * notice, this list of conditions and the following disclaimer. > > - * * Redistributions in binary form must reproduce the above copyright > > - * notice, this list of conditions and the following disclaimer in > > - * the documentation and/or other materials provided with the > > - * distribution. > > - * * Neither the name of Intel Corporation nor the names of its > > - * contributors may be used to endorse or promote products derived > > - * from this software without specific prior written permission. > > - * > > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > > - */ > > - > > -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt > > - > > -#include <crypto/internal/hash.h> > > -#include <linux/init.h> > > -#include <linux/module.h> > > -#include <linux/mm.h> > > -#include <linux/cryptohash.h> > > -#include <linux/types.h> > > -#include <linux/list.h> > > -#include <crypto/scatterwalk.h> > > -#include <crypto/sha.h> > > -#include <crypto/mcryptd.h> > > -#include <crypto/crypto_wq.h> > > -#include <asm/byteorder.h> > > -#include <linux/hardirq.h> > > -#include <asm/fpu/api.h> > > -#include "sha256_mb_ctx.h" > > - > > -#define FLUSH_INTERVAL 1000 /* in usec */ > > - > > -static struct mcryptd_alg_state sha256_mb_alg_state; > > - > > -struct sha256_mb_ctx { > > - struct mcryptd_ahash *mcryptd_tfm; > > -}; > > - > > -static inline struct mcryptd_hash_request_ctx > > - *cast_hash_to_mcryptd_ctx(struct sha256_hash_ctx *hash_ctx) > > -{ > > - struct ahash_request *areq; > > - > > - areq = container_of((void *) hash_ctx, struct ahash_request, __ctx); > > - return container_of(areq, struct mcryptd_hash_request_ctx, areq); > > -} > > - > > -static inline struct ahash_request > > - *cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx) > > -{ > > - return container_of((void *) ctx, struct ahash_request, __ctx); > > -} > > - > > -static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx, > > - struct ahash_request *areq) > > -{ > > - rctx->flag = HASH_UPDATE; > > -} > > - > > -static asmlinkage void (*sha256_job_mgr_init)(struct sha256_mb_mgr *state); > > -static asmlinkage struct job_sha256* (*sha256_job_mgr_submit) > > - (struct sha256_mb_mgr *state, struct job_sha256 *job); > > -static asmlinkage struct job_sha256* (*sha256_job_mgr_flush) > > - (struct sha256_mb_mgr *state); > > -static asmlinkage struct job_sha256* (*sha256_job_mgr_get_comp_job) > > - (struct sha256_mb_mgr *state); > > - > > -inline uint32_t sha256_pad(uint8_t padblock[SHA256_BLOCK_SIZE * 2], > > - uint64_t total_len) > > -{ > > - uint32_t i = total_len & (SHA256_BLOCK_SIZE - 1); > > - > > - memset(&padblock[i], 0, SHA256_BLOCK_SIZE); > > - padblock[i] = 0x80; > > - > > - i += ((SHA256_BLOCK_SIZE - 1) & > > - (0 - (total_len + SHA256_PADLENGTHFIELD_SIZE + 1))) > > - + 1 + SHA256_PADLENGTHFIELD_SIZE; > > - > > -#if SHA256_PADLENGTHFIELD_SIZE == 16 > > - *((uint64_t *) &padblock[i - 16]) = 0; > > -#endif > > - > > - *((uint64_t *) &padblock[i - 8]) = cpu_to_be64(total_len << 3); > > - > > - /* Number of extra blocks to hash */ > > - return i >> SHA256_LOG2_BLOCK_SIZE; > > -} > > - > > -static struct sha256_hash_ctx > > - *sha256_ctx_mgr_resubmit(struct sha256_ctx_mgr *mgr, > > - struct sha256_hash_ctx *ctx) > > -{ > > - while (ctx) { > > - if (ctx->status & HASH_CTX_STS_COMPLETE) { > > - /* Clear PROCESSING bit */ > > - ctx->status = HASH_CTX_STS_COMPLETE; > > - return ctx; > > - } > > - > > - /* > > - * If the extra blocks are empty, begin hashing what remains > > - * in the user's buffer. > > - */ > > - if (ctx->partial_block_buffer_length == 0 && > > - ctx->incoming_buffer_length) { > > - > > - const void *buffer = ctx->incoming_buffer; > > - uint32_t len = ctx->incoming_buffer_length; > > - uint32_t copy_len; > > - > > - /* > > - * Only entire blocks can be hashed. > > - * Copy remainder to extra blocks buffer. > > - */ > > - copy_len = len & (SHA256_BLOCK_SIZE-1); > > - > > - if (copy_len) { > > - len -= copy_len; > > - memcpy(ctx->partial_block_buffer, > > - ((const char *) buffer + len), > > - copy_len); > > - ctx->partial_block_buffer_length = copy_len; > > - } > > - > > - ctx->incoming_buffer_length = 0; > > - > > - /* len should be a multiple of the block size now */ > > - assert((len % SHA256_BLOCK_SIZE) == 0); > > - > > - /* Set len to the number of blocks to be hashed */ > > - len >>= SHA256_LOG2_BLOCK_SIZE; > > - > > - if (len) { > > - > > - ctx->job.buffer = (uint8_t *) buffer; > > - ctx->job.len = len; > > - ctx = (struct sha256_hash_ctx *) > > - sha256_job_mgr_submit(&mgr->mgr, &ctx->job); > > - continue; > > - } > > - } > > - > > - /* > > - * If the extra blocks are not empty, then we are > > - * either on the last block(s) or we need more > > - * user input before continuing. > > - */ > > - if (ctx->status & HASH_CTX_STS_LAST) { > > - > > - uint8_t *buf = ctx->partial_block_buffer; > > - uint32_t n_extra_blocks = > > - sha256_pad(buf, ctx->total_length); > > - > > - ctx->status = (HASH_CTX_STS_PROCESSING | > > - HASH_CTX_STS_COMPLETE); > > - ctx->job.buffer = buf; > > - ctx->job.len = (uint32_t) n_extra_blocks; > > - ctx = (struct sha256_hash_ctx *) > > - sha256_job_mgr_submit(&mgr->mgr, &ctx->job); > > - continue; > > - } > > - > > - ctx->status = HASH_CTX_STS_IDLE; > > - return ctx; > > - } > > - > > - return NULL; > > -} > > - > > -static struct sha256_hash_ctx > > - *sha256_ctx_mgr_get_comp_ctx(struct sha256_ctx_mgr *mgr) > > -{ > > - /* > > - * If get_comp_job returns NULL, there are no jobs complete. > > - * If get_comp_job returns a job, verify that it is safe to return to > > - * the user. If it is not ready, resubmit the job to finish processing. > > - * If sha256_ctx_mgr_resubmit returned a job, it is ready to be > > - * returned. Otherwise, all jobs currently being managed by the > > - * hash_ctx_mgr still need processing. > > - */ > > - struct sha256_hash_ctx *ctx; > > - > > - ctx = (struct sha256_hash_ctx *) sha256_job_mgr_get_comp_job(&mgr->mgr); > > - return sha256_ctx_mgr_resubmit(mgr, ctx); > > -} > > - > > -static void sha256_ctx_mgr_init(struct sha256_ctx_mgr *mgr) > > -{ > > - sha256_job_mgr_init(&mgr->mgr); > > -} > > - > > -static struct sha256_hash_ctx *sha256_ctx_mgr_submit(struct sha256_ctx_mgr *mgr, > > - struct sha256_hash_ctx *ctx, > > - const void *buffer, > > - uint32_t len, > > - int flags) > > -{ > > - if (flags & ~(HASH_UPDATE | HASH_LAST)) { > > - /* User should not pass anything other than UPDATE or LAST */ > > - ctx->error = HASH_CTX_ERROR_INVALID_FLAGS; > > - return ctx; > > - } > > - > > - if (ctx->status & HASH_CTX_STS_PROCESSING) { > > - /* Cannot submit to a currently processing job. */ > > - ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING; > > - return ctx; > > - } > > - > > - if (ctx->status & HASH_CTX_STS_COMPLETE) { > > - /* Cannot update a finished job. */ > > - ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED; > > - return ctx; > > - } > > - > > - /* If we made it here, there was no error during this call to submit */ > > - ctx->error = HASH_CTX_ERROR_NONE; > > - > > - /* Store buffer ptr info from user */ > > - ctx->incoming_buffer = buffer; > > - ctx->incoming_buffer_length = len; > > - > > - /* > > - * Store the user's request flags and mark this ctx as currently > > - * being processed. > > - */ > > - ctx->status = (flags & HASH_LAST) ? > > - (HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) : > > - HASH_CTX_STS_PROCESSING; > > - > > - /* Advance byte counter */ > > - ctx->total_length += len; > > - > > - /* > > - * If there is anything currently buffered in the extra blocks, > > - * append to it until it contains a whole block. > > - * Or if the user's buffer contains less than a whole block, > > - * append as much as possible to the extra block. > > - */ > > - if (ctx->partial_block_buffer_length || len < SHA256_BLOCK_SIZE) { > > - /* > > - * Compute how many bytes to copy from user buffer into > > - * extra block > > - */ > > - uint32_t copy_len = SHA256_BLOCK_SIZE - > > - ctx->partial_block_buffer_length; > > - if (len < copy_len) > > - copy_len = len; > > - > > - if (copy_len) { > > - /* Copy and update relevant pointers and counters */ > > - memcpy( > > - &ctx->partial_block_buffer[ctx->partial_block_buffer_length], > > - buffer, copy_len); > > - > > - ctx->partial_block_buffer_length += copy_len; > > - ctx->incoming_buffer = (const void *) > > - ((const char *)buffer + copy_len); > > - ctx->incoming_buffer_length = len - copy_len; > > - } > > - > > - /* The extra block should never contain more than 1 block */ > > - assert(ctx->partial_block_buffer_length <= SHA256_BLOCK_SIZE); > > - > > - /* > > - * If the extra block buffer contains exactly 1 block, > > - * it can be hashed. > > - */ > > - if (ctx->partial_block_buffer_length >= SHA256_BLOCK_SIZE) { > > - ctx->partial_block_buffer_length = 0; > > - > > - ctx->job.buffer = ctx->partial_block_buffer; > > - ctx->job.len = 1; > > - ctx = (struct sha256_hash_ctx *) > > - sha256_job_mgr_submit(&mgr->mgr, &ctx->job); > > - } > > - } > > - > > - return sha256_ctx_mgr_resubmit(mgr, ctx); > > -} > > - > > -static struct sha256_hash_ctx *sha256_ctx_mgr_flush(struct sha256_ctx_mgr *mgr) > > -{ > > - struct sha256_hash_ctx *ctx; > > - > > - while (1) { > > - ctx = (struct sha256_hash_ctx *) > > - sha256_job_mgr_flush(&mgr->mgr); > > - > > - /* If flush returned 0, there are no more jobs in flight. */ > > - if (!ctx) > > - return NULL; > > - > > - /* > > - * If flush returned a job, resubmit the job to finish > > - * processing. > > - */ > > - ctx = sha256_ctx_mgr_resubmit(mgr, ctx); > > - > > - /* > > - * If sha256_ctx_mgr_resubmit returned a job, it is ready to > > - * be returned. Otherwise, all jobs currently being managed by > > - * the sha256_ctx_mgr still need processing. Loop. > > - */ > > - if (ctx) > > - return ctx; > > - } > > -} > > - > > -static int sha256_mb_init(struct ahash_request *areq) > > -{ > > - struct sha256_hash_ctx *sctx = ahash_request_ctx(areq); > > - > > - hash_ctx_init(sctx); > > - sctx->job.result_digest[0] = SHA256_H0; > > - sctx->job.result_digest[1] = SHA256_H1; > > - sctx->job.result_digest[2] = SHA256_H2; > > - sctx->job.result_digest[3] = SHA256_H3; > > - sctx->job.result_digest[4] = SHA256_H4; > > - sctx->job.result_digest[5] = SHA256_H5; > > - sctx->job.result_digest[6] = SHA256_H6; > > - sctx->job.result_digest[7] = SHA256_H7; > > - sctx->total_length = 0; > > - sctx->partial_block_buffer_length = 0; > > - sctx->status = HASH_CTX_STS_IDLE; > > - > > - return 0; > > -} > > - > > -static int sha256_mb_set_results(struct mcryptd_hash_request_ctx *rctx) > > -{ > > - int i; > > - struct sha256_hash_ctx *sctx = ahash_request_ctx(&rctx->areq); > > - __be32 *dst = (__be32 *) rctx->out; > > - > > - for (i = 0; i < 8; ++i) > > - dst[i] = cpu_to_be32(sctx->job.result_digest[i]); > > - > > - return 0; > > -} > > - > > -static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx, > > - struct mcryptd_alg_cstate *cstate, bool flush) > > -{ > > - int flag = HASH_UPDATE; > > - int nbytes, err = 0; > > - struct mcryptd_hash_request_ctx *rctx = *ret_rctx; > > - struct sha256_hash_ctx *sha_ctx; > > - > > - /* more work ? */ > > - while (!(rctx->flag & HASH_DONE)) { > > - nbytes = crypto_ahash_walk_done(&rctx->walk, 0); > > - if (nbytes < 0) { > > - err = nbytes; > > - goto out; > > - } > > - /* check if the walk is done */ > > - if (crypto_ahash_walk_last(&rctx->walk)) { > > - rctx->flag |= HASH_DONE; > > - if (rctx->flag & HASH_FINAL) > > - flag |= HASH_LAST; > > - > > - } > > - sha_ctx = (struct sha256_hash_ctx *) > > - ahash_request_ctx(&rctx->areq); > > - kernel_fpu_begin(); > > - sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx, > > - rctx->walk.data, nbytes, flag); > > - if (!sha_ctx) { > > - if (flush) > > - sha_ctx = sha256_ctx_mgr_flush(cstate->mgr); > > - } > > - kernel_fpu_end(); > > - if (sha_ctx) > > - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); > > - else { > > - rctx = NULL; > > - goto out; > > - } > > - } > > - > > - /* copy the results */ > > - if (rctx->flag & HASH_FINAL) > > - sha256_mb_set_results(rctx); > > - > > -out: > > - *ret_rctx = rctx; > > - return err; > > -} > > - > > -static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx, > > - struct mcryptd_alg_cstate *cstate, > > - int err) > > -{ > > - struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); > > - struct sha256_hash_ctx *sha_ctx; > > - struct mcryptd_hash_request_ctx *req_ctx; > > - int ret; > > - > > - /* remove from work list */ > > - spin_lock(&cstate->work_lock); > > - list_del(&rctx->waiter); > > - spin_unlock(&cstate->work_lock); > > - > > - if (irqs_disabled()) > > - rctx->complete(&req->base, err); > > - else { > > - local_bh_disable(); > > - rctx->complete(&req->base, err); > > - local_bh_enable(); > > - } > > - > > - /* check to see if there are other jobs that are done */ > > - sha_ctx = sha256_ctx_mgr_get_comp_ctx(cstate->mgr); > > - while (sha_ctx) { > > - req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx); > > - ret = sha_finish_walk(&req_ctx, cstate, false); > > - if (req_ctx) { > > - spin_lock(&cstate->work_lock); > > - list_del(&req_ctx->waiter); > > - spin_unlock(&cstate->work_lock); > > - > > - req = cast_mcryptd_ctx_to_req(req_ctx); > > - if (irqs_disabled()) > > - req_ctx->complete(&req->base, ret); > > - else { > > - local_bh_disable(); > > - req_ctx->complete(&req->base, ret); > > - local_bh_enable(); > > - } > > - } > > - sha_ctx = sha256_ctx_mgr_get_comp_ctx(cstate->mgr); > > - } > > - > > - return 0; > > -} > > - > > -static void sha256_mb_add_list(struct mcryptd_hash_request_ctx *rctx, > > - struct mcryptd_alg_cstate *cstate) > > -{ > > - unsigned long next_flush; > > - unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL); > > - > > - /* initialize tag */ > > - rctx->tag.arrival = jiffies; /* tag the arrival time */ > > - rctx->tag.seq_num = cstate->next_seq_num++; > > - next_flush = rctx->tag.arrival + delay; > > - rctx->tag.expire = next_flush; > > - > > - spin_lock(&cstate->work_lock); > > - list_add_tail(&rctx->waiter, &cstate->work_list); > > - spin_unlock(&cstate->work_lock); > > - > > - mcryptd_arm_flusher(cstate, delay); > > -} > > - > > -static int sha256_mb_update(struct ahash_request *areq) > > -{ > > - struct mcryptd_hash_request_ctx *rctx = > > - container_of(areq, struct mcryptd_hash_request_ctx, areq); > > - struct mcryptd_alg_cstate *cstate = > > - this_cpu_ptr(sha256_mb_alg_state.alg_cstate); > > - > > - struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); > > - struct sha256_hash_ctx *sha_ctx; > > - int ret = 0, nbytes; > > - > > - /* sanity check */ > > - if (rctx->tag.cpu != smp_processor_id()) { > > - pr_err("mcryptd error: cpu clash\n"); > > - goto done; > > - } > > - > > - /* need to init context */ > > - req_ctx_init(rctx, areq); > > - > > - nbytes = crypto_ahash_walk_first(req, &rctx->walk); > > - > > - if (nbytes < 0) { > > - ret = nbytes; > > - goto done; > > - } > > - > > - if (crypto_ahash_walk_last(&rctx->walk)) > > - rctx->flag |= HASH_DONE; > > - > > - /* submit */ > > - sha_ctx = (struct sha256_hash_ctx *) ahash_request_ctx(areq); > > - sha256_mb_add_list(rctx, cstate); > > - kernel_fpu_begin(); > > - sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data, > > - nbytes, HASH_UPDATE); > > - kernel_fpu_end(); > > - > > - /* check if anything is returned */ > > - if (!sha_ctx) > > - return -EINPROGRESS; > > - > > - if (sha_ctx->error) { > > - ret = sha_ctx->error; > > - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); > > - goto done; > > - } > > - > > - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); > > - ret = sha_finish_walk(&rctx, cstate, false); > > - > > - if (!rctx) > > - return -EINPROGRESS; > > -done: > > - sha_complete_job(rctx, cstate, ret); > > - return ret; > > -} > > - > > -static int sha256_mb_finup(struct ahash_request *areq) > > -{ > > - struct mcryptd_hash_request_ctx *rctx = > > - container_of(areq, struct mcryptd_hash_request_ctx, areq); > > - struct mcryptd_alg_cstate *cstate = > > - this_cpu_ptr(sha256_mb_alg_state.alg_cstate); > > - > > - struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); > > - struct sha256_hash_ctx *sha_ctx; > > - int ret = 0, flag = HASH_UPDATE, nbytes; > > - > > - /* sanity check */ > > - if (rctx->tag.cpu != smp_processor_id()) { > > - pr_err("mcryptd error: cpu clash\n"); > > - goto done; > > - } > > - > > - /* need to init context */ > > - req_ctx_init(rctx, areq); > > - > > - nbytes = crypto_ahash_walk_first(req, &rctx->walk); > > - > > - if (nbytes < 0) { > > - ret = nbytes; > > - goto done; > > - } > > - > > - if (crypto_ahash_walk_last(&rctx->walk)) { > > - rctx->flag |= HASH_DONE; > > - flag = HASH_LAST; > > - } > > - > > - /* submit */ > > - rctx->flag |= HASH_FINAL; > > - sha_ctx = (struct sha256_hash_ctx *) ahash_request_ctx(areq); > > - sha256_mb_add_list(rctx, cstate); > > - > > - kernel_fpu_begin(); > > - sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data, > > - nbytes, flag); > > - kernel_fpu_end(); > > - > > - /* check if anything is returned */ > > - if (!sha_ctx) > > - return -EINPROGRESS; > > - > > - if (sha_ctx->error) { > > - ret = sha_ctx->error; > > - goto done; > > - } > > - > > - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); > > - ret = sha_finish_walk(&rctx, cstate, false); > > - if (!rctx) > > - return -EINPROGRESS; > > -done: > > - sha_complete_job(rctx, cstate, ret); > > - return ret; > > -} > > - > > -static int sha256_mb_final(struct ahash_request *areq) > > -{ > > - struct mcryptd_hash_request_ctx *rctx = > > - container_of(areq, struct mcryptd_hash_request_ctx, > > - areq); > > - struct mcryptd_alg_cstate *cstate = > > - this_cpu_ptr(sha256_mb_alg_state.alg_cstate); > > - > > - struct sha256_hash_ctx *sha_ctx; > > - int ret = 0; > > - u8 data; > > - > > - /* sanity check */ > > - if (rctx->tag.cpu != smp_processor_id()) { > > - pr_err("mcryptd error: cpu clash\n"); > > - goto done; > > - } > > - > > - /* need to init context */ > > - req_ctx_init(rctx, areq); > > - > > - rctx->flag |= HASH_DONE | HASH_FINAL; > > - > > - sha_ctx = (struct sha256_hash_ctx *) ahash_request_ctx(areq); > > - /* flag HASH_FINAL and 0 data size */ > > - sha256_mb_add_list(rctx, cstate); > > - kernel_fpu_begin(); > > - sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx, &data, 0, > > - HASH_LAST); > > - kernel_fpu_end(); > > - > > - /* check if anything is returned */ > > - if (!sha_ctx) > > - return -EINPROGRESS; > > - > > - if (sha_ctx->error) { > > - ret = sha_ctx->error; > > - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); > > - goto done; > > - } > > - > > - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); > > - ret = sha_finish_walk(&rctx, cstate, false); > > - if (!rctx) > > - return -EINPROGRESS; > > -done: > > - sha_complete_job(rctx, cstate, ret); > > - return ret; > > -} > > - > > -static int sha256_mb_export(struct ahash_request *areq, void *out) > > -{ > > - struct sha256_hash_ctx *sctx = ahash_request_ctx(areq); > > - > > - memcpy(out, sctx, sizeof(*sctx)); > > - > > - return 0; > > -} > > - > > -static int sha256_mb_import(struct ahash_request *areq, const void *in) > > -{ > > - struct sha256_hash_ctx *sctx = ahash_request_ctx(areq); > > - > > - memcpy(sctx, in, sizeof(*sctx)); > > - > > - return 0; > > -} > > - > > -static int sha256_mb_async_init_tfm(struct crypto_tfm *tfm) > > -{ > > - struct mcryptd_ahash *mcryptd_tfm; > > - struct sha256_mb_ctx *ctx = crypto_tfm_ctx(tfm); > > - struct mcryptd_hash_ctx *mctx; > > - > > - mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha256-mb", > > - CRYPTO_ALG_INTERNAL, > > - CRYPTO_ALG_INTERNAL); > > - if (IS_ERR(mcryptd_tfm)) > > - return PTR_ERR(mcryptd_tfm); > > - mctx = crypto_ahash_ctx(&mcryptd_tfm->base); > > - mctx->alg_state = &sha256_mb_alg_state; > > - ctx->mcryptd_tfm = mcryptd_tfm; > > - crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), > > - sizeof(struct ahash_request) + > > - crypto_ahash_reqsize(&mcryptd_tfm->base)); > > - > > - return 0; > > -} > > - > > -static void sha256_mb_async_exit_tfm(struct crypto_tfm *tfm) > > -{ > > - struct sha256_mb_ctx *ctx = crypto_tfm_ctx(tfm); > > - > > - mcryptd_free_ahash(ctx->mcryptd_tfm); > > -} > > - > > -static int sha256_mb_areq_init_tfm(struct crypto_tfm *tfm) > > -{ > > - crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), > > - sizeof(struct ahash_request) + > > - sizeof(struct sha256_hash_ctx)); > > - > > - return 0; > > -} > > - > > -static void sha256_mb_areq_exit_tfm(struct crypto_tfm *tfm) > > -{ > > - struct sha256_mb_ctx *ctx = crypto_tfm_ctx(tfm); > > - > > - mcryptd_free_ahash(ctx->mcryptd_tfm); > > -} > > - > > -static struct ahash_alg sha256_mb_areq_alg = { > > - .init = sha256_mb_init, > > - .update = sha256_mb_update, > > - .final = sha256_mb_final, > > - .finup = sha256_mb_finup, > > - .export = sha256_mb_export, > > - .import = sha256_mb_import, > > - .halg = { > > - .digestsize = SHA256_DIGEST_SIZE, > > - .statesize = sizeof(struct sha256_hash_ctx), > > - .base = { > > - .cra_name = "__sha256-mb", > > - .cra_driver_name = "__intel_sha256-mb", > > - .cra_priority = 100, > > - /* > > - * use ASYNC flag as some buffers in multi-buffer > > - * algo may not have completed before hashing thread > > - * sleep > > - */ > > - .cra_flags = CRYPTO_ALG_ASYNC | > > - CRYPTO_ALG_INTERNAL, > > - .cra_blocksize = SHA256_BLOCK_SIZE, > > - .cra_module = THIS_MODULE, > > - .cra_list = LIST_HEAD_INIT > > - (sha256_mb_areq_alg.halg.base.cra_list), > > - .cra_init = sha256_mb_areq_init_tfm, > > - .cra_exit = sha256_mb_areq_exit_tfm, > > - .cra_ctxsize = sizeof(struct sha256_hash_ctx), > > - } > > - } > > -}; > > - > > -static int sha256_mb_async_init(struct ahash_request *req) > > -{ > > - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); > > - struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm); > > - struct ahash_request *mcryptd_req = ahash_request_ctx(req); > > - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; > > - > > - memcpy(mcryptd_req, req, sizeof(*req)); > > - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); > > - return crypto_ahash_init(mcryptd_req); > > -} > > - > > -static int sha256_mb_async_update(struct ahash_request *req) > > -{ > > - struct ahash_request *mcryptd_req = ahash_request_ctx(req); > > - > > - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); > > - struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm); > > - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; > > - > > - memcpy(mcryptd_req, req, sizeof(*req)); > > - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); > > - return crypto_ahash_update(mcryptd_req); > > -} > > - > > -static int sha256_mb_async_finup(struct ahash_request *req) > > -{ > > - struct ahash_request *mcryptd_req = ahash_request_ctx(req); > > - > > - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); > > - struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm); > > - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; > > - > > - memcpy(mcryptd_req, req, sizeof(*req)); > > - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); > > - return crypto_ahash_finup(mcryptd_req); > > -} > > - > > -static int sha256_mb_async_final(struct ahash_request *req) > > -{ > > - struct ahash_request *mcryptd_req = ahash_request_ctx(req); > > - > > - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); > > - struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm); > > - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; > > - > > - memcpy(mcryptd_req, req, sizeof(*req)); > > - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); > > - return crypto_ahash_final(mcryptd_req); > > -} > > - > > -static int sha256_mb_async_digest(struct ahash_request *req) > > -{ > > - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); > > - struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm); > > - struct ahash_request *mcryptd_req = ahash_request_ctx(req); > > - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; > > - > > - memcpy(mcryptd_req, req, sizeof(*req)); > > - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); > > - return crypto_ahash_digest(mcryptd_req); > > -} > > - > > -static int sha256_mb_async_export(struct ahash_request *req, void *out) > > -{ > > - struct ahash_request *mcryptd_req = ahash_request_ctx(req); > > - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); > > - struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm); > > - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; > > - > > - memcpy(mcryptd_req, req, sizeof(*req)); > > - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); > > - return crypto_ahash_export(mcryptd_req, out); > > -} > > - > > -static int sha256_mb_async_import(struct ahash_request *req, const void *in) > > -{ > > - struct ahash_request *mcryptd_req = ahash_request_ctx(req); > > - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); > > - struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm); > > - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; > > - struct crypto_ahash *child = mcryptd_ahash_child(mcryptd_tfm); > > - struct mcryptd_hash_request_ctx *rctx; > > - struct ahash_request *areq; > > - > > - memcpy(mcryptd_req, req, sizeof(*req)); > > - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); > > - rctx = ahash_request_ctx(mcryptd_req); > > - areq = &rctx->areq; > > - > > - ahash_request_set_tfm(areq, child); > > - ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_SLEEP, > > - rctx->complete, req); > > - > > - return crypto_ahash_import(mcryptd_req, in); > > -} > > - > > -static struct ahash_alg sha256_mb_async_alg = { > > - .init = sha256_mb_async_init, > > - .update = sha256_mb_async_update, > > - .final = sha256_mb_async_final, > > - .finup = sha256_mb_async_finup, > > - .export = sha256_mb_async_export, > > - .import = sha256_mb_async_import, > > - .digest = sha256_mb_async_digest, > > - .halg = { > > - .digestsize = SHA256_DIGEST_SIZE, > > - .statesize = sizeof(struct sha256_hash_ctx), > > - .base = { > > - .cra_name = "sha256", > > - .cra_driver_name = "sha256_mb", > > - /* > > - * Low priority, since with few concurrent hash requests > > - * this is extremely slow due to the flush delay. Users > > - * whose workloads would benefit from this can request > > - * it explicitly by driver name, or can increase its > > - * priority at runtime using NETLINK_CRYPTO. > > - */ > > - .cra_priority = 50, > > - .cra_flags = CRYPTO_ALG_ASYNC, > > - .cra_blocksize = SHA256_BLOCK_SIZE, > > - .cra_module = THIS_MODULE, > > - .cra_list = LIST_HEAD_INIT > > - (sha256_mb_async_alg.halg.base.cra_list), > > - .cra_init = sha256_mb_async_init_tfm, > > - .cra_exit = sha256_mb_async_exit_tfm, > > - .cra_ctxsize = sizeof(struct sha256_mb_ctx), > > - .cra_alignmask = 0, > > - }, > > - }, > > -}; > > - > > -static unsigned long sha256_mb_flusher(struct mcryptd_alg_cstate *cstate) > > -{ > > - struct mcryptd_hash_request_ctx *rctx; > > - unsigned long cur_time; > > - unsigned long next_flush = 0; > > - struct sha256_hash_ctx *sha_ctx; > > - > > - > > - cur_time = jiffies; > > - > > - while (!list_empty(&cstate->work_list)) { > > - rctx = list_entry(cstate->work_list.next, > > - struct mcryptd_hash_request_ctx, waiter); > > - if (time_before(cur_time, rctx->tag.expire)) > > - break; > > - kernel_fpu_begin(); > > - sha_ctx = (struct sha256_hash_ctx *) > > - sha256_ctx_mgr_flush(cstate->mgr); > > - kernel_fpu_end(); > > - if (!sha_ctx) { > > - pr_err("sha256_mb error: nothing got" > > - " flushed for non-empty list\n"); > > - break; > > - } > > - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); > > - sha_finish_walk(&rctx, cstate, true); > > - sha_complete_job(rctx, cstate, 0); > > - } > > - > > - if (!list_empty(&cstate->work_list)) { > > - rctx = list_entry(cstate->work_list.next, > > - struct mcryptd_hash_request_ctx, waiter); > > - /* get the hash context and then flush time */ > > - next_flush = rctx->tag.expire; > > - mcryptd_arm_flusher(cstate, get_delay(next_flush)); > > - } > > - return next_flush; > > -} > > - > > -static int __init sha256_mb_mod_init(void) > > -{ > > - > > - int cpu; > > - int err; > > - struct mcryptd_alg_cstate *cpu_state; > > - > > - /* check for dependent cpu features */ > > - if (!boot_cpu_has(X86_FEATURE_AVX2) || > > - !boot_cpu_has(X86_FEATURE_BMI2)) > > - return -ENODEV; > > - > > - /* initialize multibuffer structures */ > > - sha256_mb_alg_state.alg_cstate = alloc_percpu > > - (struct mcryptd_alg_cstate); > > - > > - sha256_job_mgr_init = sha256_mb_mgr_init_avx2; > > - sha256_job_mgr_submit = sha256_mb_mgr_submit_avx2; > > - sha256_job_mgr_flush = sha256_mb_mgr_flush_avx2; > > - sha256_job_mgr_get_comp_job = sha256_mb_mgr_get_comp_job_avx2; > > - > > - if (!sha256_mb_alg_state.alg_cstate) > > - return -ENOMEM; > > - for_each_possible_cpu(cpu) { > > - cpu_state = per_cpu_ptr(sha256_mb_alg_state.alg_cstate, cpu); > > - cpu_state->next_flush = 0; > > - cpu_state->next_seq_num = 0; > > - cpu_state->flusher_engaged = false; > > - INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher); > > - cpu_state->cpu = cpu; > > - cpu_state->alg_state = &sha256_mb_alg_state; > > - cpu_state->mgr = kzalloc(sizeof(struct sha256_ctx_mgr), > > - GFP_KERNEL); > > - if (!cpu_state->mgr) > > - goto err2; > > - sha256_ctx_mgr_init(cpu_state->mgr); > > - INIT_LIST_HEAD(&cpu_state->work_list); > > - spin_lock_init(&cpu_state->work_lock); > > - } > > - sha256_mb_alg_state.flusher = &sha256_mb_flusher; > > - > > - err = crypto_register_ahash(&sha256_mb_areq_alg); > > - if (err) > > - goto err2; > > - err = crypto_register_ahash(&sha256_mb_async_alg); > > - if (err) > > - goto err1; > > - > > - > > - return 0; > > -err1: > > - crypto_unregister_ahash(&sha256_mb_areq_alg); > > -err2: > > - for_each_possible_cpu(cpu) { > > - cpu_state = per_cpu_ptr(sha256_mb_alg_state.alg_cstate, cpu); > > - kfree(cpu_state->mgr); > > - } > > - free_percpu(sha256_mb_alg_state.alg_cstate); > > - return -ENODEV; > > -} > > - > > -static void __exit sha256_mb_mod_fini(void) > > -{ > > - int cpu; > > - struct mcryptd_alg_cstate *cpu_state; > > - > > - crypto_unregister_ahash(&sha256_mb_async_alg); > > - crypto_unregister_ahash(&sha256_mb_areq_alg); > > - for_each_possible_cpu(cpu) { > > - cpu_state = per_cpu_ptr(sha256_mb_alg_state.alg_cstate, cpu); > > - kfree(cpu_state->mgr); > > - } > > - free_percpu(sha256_mb_alg_state.alg_cstate); > > -} > > - > > -module_init(sha256_mb_mod_init); > > -module_exit(sha256_mb_mod_fini); > > - > > -MODULE_LICENSE("GPL"); > > -MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, multi buffer accelerated"); > > - > > -MODULE_ALIAS_CRYPTO("sha256"); > > diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h b/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h > > deleted file mode 100644 > > index 7c432543dc7f..000000000000 > > --- a/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h > > +++ /dev/null > > @@ -1,134 +0,0 @@ > > -/* > > - * Header file for multi buffer SHA256 context > > - * > > - * This file is provided under a dual BSD/GPLv2 license. When using or > > - * redistributing this file, you may do so under either license. > > - * > > - * GPL LICENSE SUMMARY > > - * > > - * Copyright(c) 2016 Intel Corporation. > > - * > > - * This program is free software; you can redistribute it and/or modify > > - * it under the terms of version 2 of the GNU General Public License as > > - * published by the Free Software Foundation. > > - * > > - * This program is distributed in the hope that it will be useful, but > > - * WITHOUT ANY WARRANTY; without even the implied warranty of > > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > - * General Public License for more details. > > - * > > - * Contact Information: > > - * Megha Dey <megha.dey@xxxxxxxxxxxxxxx> > > - * > > - * BSD LICENSE > > - * > > - * Copyright(c) 2016 Intel Corporation. > > - * > > - * Redistribution and use in source and binary forms, with or without > > - * modification, are permitted provided that the following conditions > > - * are met: > > - * > > - * * Redistributions of source code must retain the above copyright > > - * notice, this list of conditions and the following disclaimer. > > - * * Redistributions in binary form must reproduce the above copyright > > - * notice, this list of conditions and the following disclaimer in > > - * the documentation and/or other materials provided with the > > - * distribution. > > - * * Neither the name of Intel Corporation nor the names of its > > - * contributors may be used to endorse or promote products derived > > - * from this software without specific prior written permission. > > - * > > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > > - */ > > - > > -#ifndef _SHA_MB_CTX_INTERNAL_H > > -#define _SHA_MB_CTX_INTERNAL_H > > - > > -#include "sha256_mb_mgr.h" > > - > > -#define HASH_UPDATE 0x00 > > -#define HASH_LAST 0x01 > > -#define HASH_DONE 0x02 > > -#define HASH_FINAL 0x04 > > - > > -#define HASH_CTX_STS_IDLE 0x00 > > -#define HASH_CTX_STS_PROCESSING 0x01 > > -#define HASH_CTX_STS_LAST 0x02 > > -#define HASH_CTX_STS_COMPLETE 0x04 > > - > > -enum hash_ctx_error { > > - HASH_CTX_ERROR_NONE = 0, > > - HASH_CTX_ERROR_INVALID_FLAGS = -1, > > - HASH_CTX_ERROR_ALREADY_PROCESSING = -2, > > - HASH_CTX_ERROR_ALREADY_COMPLETED = -3, > > - > > -#ifdef HASH_CTX_DEBUG > > - HASH_CTX_ERROR_DEBUG_DIGEST_MISMATCH = -4, > > -#endif > > -}; > > - > > - > > -#define hash_ctx_user_data(ctx) ((ctx)->user_data) > > -#define hash_ctx_digest(ctx) ((ctx)->job.result_digest) > > -#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING) > > -#define hash_ctx_complete(ctx) ((ctx)->status == HASH_CTX_STS_COMPLETE) > > -#define hash_ctx_status(ctx) ((ctx)->status) > > -#define hash_ctx_error(ctx) ((ctx)->error) > > -#define hash_ctx_init(ctx) \ > > - do { \ > > - (ctx)->error = HASH_CTX_ERROR_NONE; \ > > - (ctx)->status = HASH_CTX_STS_COMPLETE; \ > > - } while (0) > > - > > - > > -/* Hash Constants and Typedefs */ > > -#define SHA256_DIGEST_LENGTH 8 > > -#define SHA256_LOG2_BLOCK_SIZE 6 > > - > > -#define SHA256_PADLENGTHFIELD_SIZE 8 > > - > > -#ifdef SHA_MB_DEBUG > > -#define assert(expr) \ > > -do { \ > > - if (unlikely(!(expr))) { \ > > - printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \ > > - #expr, __FILE__, __func__, __LINE__); \ > > - } \ > > -} while (0) > > -#else > > -#define assert(expr) do {} while (0) > > -#endif > > - > > -struct sha256_ctx_mgr { > > - struct sha256_mb_mgr mgr; > > -}; > > - > > -/* typedef struct sha256_ctx_mgr sha256_ctx_mgr; */ > > - > > -struct sha256_hash_ctx { > > - /* Must be at struct offset 0 */ > > - struct job_sha256 job; > > - /* status flag */ > > - int status; > > - /* error flag */ > > - int error; > > - > > - uint64_t total_length; > > - const void *incoming_buffer; > > - uint32_t incoming_buffer_length; > > - uint8_t partial_block_buffer[SHA256_BLOCK_SIZE * 2]; > > - uint32_t partial_block_buffer_length; > > - void *user_data; > > -}; > > - > > -#endif > > diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr.h b/arch/x86/crypto/sha256-mb/sha256_mb_mgr.h > > deleted file mode 100644 > > index b01ae408c56d..000000000000 > > --- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr.h > > +++ /dev/null > > @@ -1,108 +0,0 @@ > > -/* > > - * Header file for multi buffer SHA256 algorithm manager > > - * > > - * This file is provided under a dual BSD/GPLv2 license. When using or > > - * redistributing this file, you may do so under either license. > > - * > > - * GPL LICENSE SUMMARY > > - * > > - * Copyright(c) 2016 Intel Corporation. > > - * > > - * This program is free software; you can redistribute it and/or modify > > - * it under the terms of version 2 of the GNU General Public License as > > - * published by the Free Software Foundation. > > - * > > - * This program is distributed in the hope that it will be useful, but > > - * WITHOUT ANY WARRANTY; without even the implied warranty of > > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > - * General Public License for more details. > > - * > > - * Contact Information: > > - * Megha Dey <megha.dey@xxxxxxxxxxxxxxx> > > - * > > - * BSD LICENSE > > - * > > - * Copyright(c) 2016 Intel Corporation. > > - * > > - * Redistribution and use in source and binary forms, with or without > > - * modification, are permitted provided that the following conditions > > - * are met: > > - * > > - * * Redistributions of source code must retain the above copyright > > - * notice, this list of conditions and the following disclaimer. > > - * * Redistributions in binary form must reproduce the above copyright > > - * notice, this list of conditions and the following disclaimer in > > - * the documentation and/or other materials provided with the > > - * distribution. > > - * * Neither the name of Intel Corporation nor the names of its > > - * contributors may be used to endorse or promote products derived > > - * from this software without specific prior written permission. > > - * > > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > > - */ > > -#ifndef __SHA_MB_MGR_H > > -#define __SHA_MB_MGR_H > > - > > -#include <linux/types.h> > > - > > -#define NUM_SHA256_DIGEST_WORDS 8 > > - > > -enum job_sts { STS_UNKNOWN = 0, > > - STS_BEING_PROCESSED = 1, > > - STS_COMPLETED = 2, > > - STS_INTERNAL_ERROR = 3, > > - STS_ERROR = 4 > > -}; > > - > > -struct job_sha256 { > > - u8 *buffer; > > - u32 len; > > - u32 result_digest[NUM_SHA256_DIGEST_WORDS] __aligned(32); > > - enum job_sts status; > > - void *user_data; > > -}; > > - > > -/* SHA256 out-of-order scheduler */ > > - > > -/* typedef uint32_t sha8_digest_array[8][8]; */ > > - > > -struct sha256_args_x8 { > > - uint32_t digest[8][8]; > > - uint8_t *data_ptr[8]; > > -}; > > - > > -struct sha256_lane_data { > > - struct job_sha256 *job_in_lane; > > -}; > > - > > -struct sha256_mb_mgr { > > - struct sha256_args_x8 args; > > - > > - uint32_t lens[8]; > > - > > - /* each byte is index (0...7) of unused lanes */ > > - uint64_t unused_lanes; > > - /* byte 4 is set to FF as a flag */ > > - struct sha256_lane_data ldata[8]; > > -}; > > - > > - > > -#define SHA256_MB_MGR_NUM_LANES_AVX2 8 > > - > > -void sha256_mb_mgr_init_avx2(struct sha256_mb_mgr *state); > > -struct job_sha256 *sha256_mb_mgr_submit_avx2(struct sha256_mb_mgr *state, > > - struct job_sha256 *job); > > -struct job_sha256 *sha256_mb_mgr_flush_avx2(struct sha256_mb_mgr *state); > > -struct job_sha256 *sha256_mb_mgr_get_comp_job_avx2(struct sha256_mb_mgr *state); > > - > > -#endif > > diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S > > deleted file mode 100644 > > index 5c377bac21d0..000000000000 > > --- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S > > +++ /dev/null > > @@ -1,304 +0,0 @@ > > -/* > > - * Header file for multi buffer SHA256 algorithm data structure > > - * > > - * This file is provided under a dual BSD/GPLv2 license. When using or > > - * redistributing this file, you may do so under either license. > > - * > > - * GPL LICENSE SUMMARY > > - * > > - * Copyright(c) 2016 Intel Corporation. > > - * > > - * This program is free software; you can redistribute it and/or modify > > - * it under the terms of version 2 of the GNU General Public License as > > - * published by the Free Software Foundation. > > - * > > - * This program is distributed in the hope that it will be useful, but > > - * WITHOUT ANY WARRANTY; without even the implied warranty of > > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > - * General Public License for more details. > > - * > > - * Contact Information: > > - * Megha Dey <megha.dey@xxxxxxxxxxxxxxx> > > - * > > - * BSD LICENSE > > - * > > - * Copyright(c) 2016 Intel Corporation. > > - * > > - * Redistribution and use in source and binary forms, with or without > > - * modification, are permitted provided that the following conditions > > - * are met: > > - * > > - * * Redistributions of source code must retain the above copyright > > - * notice, this list of conditions and the following disclaimer. > > - * * Redistributions in binary form must reproduce the above copyright > > - * notice, this list of conditions and the following disclaimer in > > - * the documentation and/or other materials provided with the > > - * distribution. > > - * * Neither the name of Intel Corporation nor the names of its > > - * contributors may be used to endorse or promote products derived > > - * from this software without specific prior written permission. > > - * > > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > > - */ > > - > > -# Macros for defining data structures > > - > > -# Usage example > > - > > -#START_FIELDS # JOB_AES > > -### name size align > > -#FIELD _plaintext, 8, 8 # pointer to plaintext > > -#FIELD _ciphertext, 8, 8 # pointer to ciphertext > > -#FIELD _IV, 16, 8 # IV > > -#FIELD _keys, 8, 8 # pointer to keys > > -#FIELD _len, 4, 4 # length in bytes > > -#FIELD _status, 4, 4 # status enumeration > > -#FIELD _user_data, 8, 8 # pointer to user data > > -#UNION _union, size1, align1, \ > > -# size2, align2, \ > > -# size3, align3, \ > > -# ... > > -#END_FIELDS > > -#%assign _JOB_AES_size _FIELD_OFFSET > > -#%assign _JOB_AES_align _STRUCT_ALIGN > > - > > -######################################################################### > > - > > -# Alternate "struc-like" syntax: > > -# STRUCT job_aes2 > > -# RES_Q .plaintext, 1 > > -# RES_Q .ciphertext, 1 > > -# RES_DQ .IV, 1 > > -# RES_B .nested, _JOB_AES_SIZE, _JOB_AES_ALIGN > > -# RES_U .union, size1, align1, \ > > -# size2, align2, \ > > -# ... > > -# ENDSTRUCT > > -# # Following only needed if nesting > > -# %assign job_aes2_size _FIELD_OFFSET > > -# %assign job_aes2_align _STRUCT_ALIGN > > -# > > -# RES_* macros take a name, a count and an optional alignment. > > -# The count in in terms of the base size of the macro, and the > > -# default alignment is the base size. > > -# The macros are: > > -# Macro Base size > > -# RES_B 1 > > -# RES_W 2 > > -# RES_D 4 > > -# RES_Q 8 > > -# RES_DQ 16 > > -# RES_Y 32 > > -# RES_Z 64 > > -# > > -# RES_U defines a union. It's arguments are a name and two or more > > -# pairs of "size, alignment" > > -# > > -# The two assigns are only needed if this structure is being nested > > -# within another. Even if the assigns are not done, one can still use > > -# STRUCT_NAME_size as the size of the structure. > > -# > > -# Note that for nesting, you still need to assign to STRUCT_NAME_size. > > -# > > -# The differences between this and using "struc" directly are that each > > -# type is implicitly aligned to its natural length (although this can be > > -# over-ridden with an explicit third parameter), and that the structure > > -# is padded at the end to its overall alignment. > > -# > > - > > -######################################################################### > > - > > -#ifndef _DATASTRUCT_ASM_ > > -#define _DATASTRUCT_ASM_ > > - > > -#define SZ8 8*SHA256_DIGEST_WORD_SIZE > > -#define ROUNDS 64*SZ8 > > -#define PTR_SZ 8 > > -#define SHA256_DIGEST_WORD_SIZE 4 > > -#define MAX_SHA256_LANES 8 > > -#define SHA256_DIGEST_WORDS 8 > > -#define SHA256_DIGEST_ROW_SIZE (MAX_SHA256_LANES * SHA256_DIGEST_WORD_SIZE) > > -#define SHA256_DIGEST_SIZE (SHA256_DIGEST_ROW_SIZE * SHA256_DIGEST_WORDS) > > -#define SHA256_BLK_SZ 64 > > - > > -# START_FIELDS > > -.macro START_FIELDS > > - _FIELD_OFFSET = 0 > > - _STRUCT_ALIGN = 0 > > -.endm > > - > > -# FIELD name size align > > -.macro FIELD name size align > > - _FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1)) > > - \name = _FIELD_OFFSET > > - _FIELD_OFFSET = _FIELD_OFFSET + (\size) > > -.if (\align > _STRUCT_ALIGN) > > - _STRUCT_ALIGN = \align > > -.endif > > -.endm > > - > > -# END_FIELDS > > -.macro END_FIELDS > > - _FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1)) > > -.endm > > - > > -######################################################################## > > - > > -.macro STRUCT p1 > > -START_FIELDS > > -.struc \p1 > > -.endm > > - > > -.macro ENDSTRUCT > > - tmp = _FIELD_OFFSET > > - END_FIELDS > > - tmp = (_FIELD_OFFSET - %%tmp) > > -.if (tmp > 0) > > - .lcomm tmp > > -.endif > > -.endstruc > > -.endm > > - > > -## RES_int name size align > > -.macro RES_int p1 p2 p3 > > - name = \p1 > > - size = \p2 > > - align = .\p3 > > - > > - _FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1)) > > -.align align > > -.lcomm name size > > - _FIELD_OFFSET = _FIELD_OFFSET + (size) > > -.if (align > _STRUCT_ALIGN) > > - _STRUCT_ALIGN = align > > -.endif > > -.endm > > - > > -# macro RES_B name, size [, align] > > -.macro RES_B _name, _size, _align=1 > > -RES_int _name _size _align > > -.endm > > - > > -# macro RES_W name, size [, align] > > -.macro RES_W _name, _size, _align=2 > > -RES_int _name 2*(_size) _align > > -.endm > > - > > -# macro RES_D name, size [, align] > > -.macro RES_D _name, _size, _align=4 > > -RES_int _name 4*(_size) _align > > -.endm > > - > > -# macro RES_Q name, size [, align] > > -.macro RES_Q _name, _size, _align=8 > > -RES_int _name 8*(_size) _align > > -.endm > > - > > -# macro RES_DQ name, size [, align] > > -.macro RES_DQ _name, _size, _align=16 > > -RES_int _name 16*(_size) _align > > -.endm > > - > > -# macro RES_Y name, size [, align] > > -.macro RES_Y _name, _size, _align=32 > > -RES_int _name 32*(_size) _align > > -.endm > > - > > -# macro RES_Z name, size [, align] > > -.macro RES_Z _name, _size, _align=64 > > -RES_int _name 64*(_size) _align > > -.endm > > - > > -#endif > > - > > - > > -######################################################################## > > -#### Define SHA256 Out Of Order Data Structures > > -######################################################################## > > - > > -START_FIELDS # LANE_DATA > > -### name size align > > -FIELD _job_in_lane, 8, 8 # pointer to job object > > -END_FIELDS > > - > > - _LANE_DATA_size = _FIELD_OFFSET > > - _LANE_DATA_align = _STRUCT_ALIGN > > - > > -######################################################################## > > - > > -START_FIELDS # SHA256_ARGS_X4 > > -### name size align > > -FIELD _digest, 4*8*8, 4 # transposed digest > > -FIELD _data_ptr, 8*8, 8 # array of pointers to data > > -END_FIELDS > > - > > - _SHA256_ARGS_X4_size = _FIELD_OFFSET > > - _SHA256_ARGS_X4_align = _STRUCT_ALIGN > > - _SHA256_ARGS_X8_size = _FIELD_OFFSET > > - _SHA256_ARGS_X8_align = _STRUCT_ALIGN > > - > > -####################################################################### > > - > > -START_FIELDS # MB_MGR > > -### name size align > > -FIELD _args, _SHA256_ARGS_X4_size, _SHA256_ARGS_X4_align > > -FIELD _lens, 4*8, 8 > > -FIELD _unused_lanes, 8, 8 > > -FIELD _ldata, _LANE_DATA_size*8, _LANE_DATA_align > > -END_FIELDS > > - > > - _MB_MGR_size = _FIELD_OFFSET > > - _MB_MGR_align = _STRUCT_ALIGN > > - > > -_args_digest = _args + _digest > > -_args_data_ptr = _args + _data_ptr > > - > > -####################################################################### > > - > > -START_FIELDS #STACK_FRAME > > -### name size align > > -FIELD _data, 16*SZ8, 1 # transposed digest > > -FIELD _digest, 8*SZ8, 1 # array of pointers to data > > -FIELD _ytmp, 4*SZ8, 1 > > -FIELD _rsp, 8, 1 > > -END_FIELDS > > - > > - _STACK_FRAME_size = _FIELD_OFFSET > > - _STACK_FRAME_align = _STRUCT_ALIGN > > - > > -####################################################################### > > - > > -######################################################################## > > -#### Define constants > > -######################################################################## > > - > > -#define STS_UNKNOWN 0 > > -#define STS_BEING_PROCESSED 1 > > -#define STS_COMPLETED 2 > > - > > -######################################################################## > > -#### Define JOB_SHA256 structure > > -######################################################################## > > - > > -START_FIELDS # JOB_SHA256 > > - > > -### name size align > > -FIELD _buffer, 8, 8 # pointer to buffer > > -FIELD _len, 8, 8 # length in bytes > > -FIELD _result_digest, 8*4, 32 # Digest (output) > > -FIELD _status, 4, 4 > > -FIELD _user_data, 8, 8 > > -END_FIELDS > > - > > - _JOB_SHA256_size = _FIELD_OFFSET > > - _JOB_SHA256_align = _STRUCT_ALIGN > > diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S > > deleted file mode 100644 > > index d2364c55bbde..000000000000 > > --- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S > > +++ /dev/null > > @@ -1,307 +0,0 @@ > > -/* > > - * Flush routine for SHA256 multibuffer > > - * > > - * This file is provided under a dual BSD/GPLv2 license. When using or > > - * redistributing this file, you may do so under either license. > > - * > > - * GPL LICENSE SUMMARY > > - * > > - * Copyright(c) 2016 Intel Corporation. > > - * > > - * This program is free software; you can redistribute it and/or modify > > - * it under the terms of version 2 of the GNU General Public License as > > - * published by the Free Software Foundation. > > - * > > - * This program is distributed in the hope that it will be useful, but > > - * WITHOUT ANY WARRANTY; without even the implied warranty of > > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > - * General Public License for more details. > > - * > > - * Contact Information: > > - * Megha Dey <megha.dey@xxxxxxxxxxxxxxx> > > - * > > - * BSD LICENSE > > - * > > - * Copyright(c) 2016 Intel Corporation. > > - * > > - * Redistribution and use in source and binary forms, with or without > > - * modification, are permitted provided that the following conditions > > - * are met: > > - * > > - * * Redistributions of source code must retain the above copyright > > - * notice, this list of conditions and the following disclaimer. > > - * * Redistributions in binary form must reproduce the above copyright > > - * notice, this list of conditions and the following disclaimer in > > - * the documentation and/or other materials provided with the > > - * distribution. > > - * * Neither the name of Intel Corporation nor the names of its > > - * contributors may be used to endorse or promote products derived > > - * from this software without specific prior written permission. > > - * > > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > > - */ > > -#include <linux/linkage.h> > > -#include <asm/frame.h> > > -#include "sha256_mb_mgr_datastruct.S" > > - > > -.extern sha256_x8_avx2 > > - > > -#LINUX register definitions > > -#define arg1 %rdi > > -#define arg2 %rsi > > - > > -# Common register definitions > > -#define state arg1 > > -#define job arg2 > > -#define len2 arg2 > > - > > -# idx must be a register not clobberred by sha1_mult > > -#define idx %r8 > > -#define DWORD_idx %r8d > > - > > -#define unused_lanes %rbx > > -#define lane_data %rbx > > -#define tmp2 %rbx > > -#define tmp2_w %ebx > > - > > -#define job_rax %rax > > -#define tmp1 %rax > > -#define size_offset %rax > > -#define tmp %rax > > -#define start_offset %rax > > - > > -#define tmp3 %arg1 > > - > > -#define extra_blocks %arg2 > > -#define p %arg2 > > - > > -.macro LABEL prefix n > > -\prefix\n\(): > > -.endm > > - > > -.macro JNE_SKIP i > > -jne skip_\i > > -.endm > > - > > -.altmacro > > -.macro SET_OFFSET _offset > > -offset = \_offset > > -.endm > > -.noaltmacro > > - > > -# JOB_SHA256* sha256_mb_mgr_flush_avx2(MB_MGR *state) > > -# arg 1 : rcx : state > > -ENTRY(sha256_mb_mgr_flush_avx2) > > - FRAME_BEGIN > > - push %rbx > > - > > - # If bit (32+3) is set, then all lanes are empty > > - mov _unused_lanes(state), unused_lanes > > - bt $32+3, unused_lanes > > - jc return_null > > - > > - # find a lane with a non-null job > > - xor idx, idx > > - offset = (_ldata + 1 * _LANE_DATA_size + _job_in_lane) > > - cmpq $0, offset(state) > > - cmovne one(%rip), idx > > - offset = (_ldata + 2 * _LANE_DATA_size + _job_in_lane) > > - cmpq $0, offset(state) > > - cmovne two(%rip), idx > > - offset = (_ldata + 3 * _LANE_DATA_size + _job_in_lane) > > - cmpq $0, offset(state) > > - cmovne three(%rip), idx > > - offset = (_ldata + 4 * _LANE_DATA_size + _job_in_lane) > > - cmpq $0, offset(state) > > - cmovne four(%rip), idx > > - offset = (_ldata + 5 * _LANE_DATA_size + _job_in_lane) > > - cmpq $0, offset(state) > > - cmovne five(%rip), idx > > - offset = (_ldata + 6 * _LANE_DATA_size + _job_in_lane) > > - cmpq $0, offset(state) > > - cmovne six(%rip), idx > > - offset = (_ldata + 7 * _LANE_DATA_size + _job_in_lane) > > - cmpq $0, offset(state) > > - cmovne seven(%rip), idx > > - > > - # copy idx to empty lanes > > -copy_lane_data: > > - offset = (_args + _data_ptr) > > - mov offset(state,idx,8), tmp > > - > > - I = 0 > > -.rep 8 > > - offset = (_ldata + I * _LANE_DATA_size + _job_in_lane) > > - cmpq $0, offset(state) > > -.altmacro > > - JNE_SKIP %I > > - offset = (_args + _data_ptr + 8*I) > > - mov tmp, offset(state) > > - offset = (_lens + 4*I) > > - movl $0xFFFFFFFF, offset(state) > > -LABEL skip_ %I > > - I = (I+1) > > -.noaltmacro > > -.endr > > - > > - # Find min length > > - vmovdqu _lens+0*16(state), %xmm0 > > - vmovdqu _lens+1*16(state), %xmm1 > > - > > - vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A} > > - vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C} > > - vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F} > > - vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E} > > - vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min val in low dword > > - > > - vmovd %xmm2, DWORD_idx > > - mov idx, len2 > > - and $0xF, idx > > - shr $4, len2 > > - jz len_is_0 > > - > > - vpand clear_low_nibble(%rip), %xmm2, %xmm2 > > - vpshufd $0, %xmm2, %xmm2 > > - > > - vpsubd %xmm2, %xmm0, %xmm0 > > - vpsubd %xmm2, %xmm1, %xmm1 > > - > > - vmovdqu %xmm0, _lens+0*16(state) > > - vmovdqu %xmm1, _lens+1*16(state) > > - > > - # "state" and "args" are the same address, arg1 > > - # len is arg2 > > - call sha256_x8_avx2 > > - # state and idx are intact > > - > > -len_is_0: > > - # process completed job "idx" > > - imul $_LANE_DATA_size, idx, lane_data > > - lea _ldata(state, lane_data), lane_data > > - > > - mov _job_in_lane(lane_data), job_rax > > - movq $0, _job_in_lane(lane_data) > > - movl $STS_COMPLETED, _status(job_rax) > > - mov _unused_lanes(state), unused_lanes > > - shl $4, unused_lanes > > - or idx, unused_lanes > > - > > - mov unused_lanes, _unused_lanes(state) > > - movl $0xFFFFFFFF, _lens(state,idx,4) > > - > > - vmovd _args_digest(state , idx, 4) , %xmm0 > > - vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0 > > - vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0 > > - vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0 > > - vmovd _args_digest+4*32(state, idx, 4), %xmm1 > > - vpinsrd $1, _args_digest+5*32(state, idx, 4), %xmm1, %xmm1 > > - vpinsrd $2, _args_digest+6*32(state, idx, 4), %xmm1, %xmm1 > > - vpinsrd $3, _args_digest+7*32(state, idx, 4), %xmm1, %xmm1 > > - > > - vmovdqu %xmm0, _result_digest(job_rax) > > - offset = (_result_digest + 1*16) > > - vmovdqu %xmm1, offset(job_rax) > > - > > -return: > > - pop %rbx > > - FRAME_END > > - ret > > - > > -return_null: > > - xor job_rax, job_rax > > - jmp return > > -ENDPROC(sha256_mb_mgr_flush_avx2) > > - > > -############################################################################## > > - > > -.align 16 > > -ENTRY(sha256_mb_mgr_get_comp_job_avx2) > > - push %rbx > > - > > - ## if bit 32+3 is set, then all lanes are empty > > - mov _unused_lanes(state), unused_lanes > > - bt $(32+3), unused_lanes > > - jc .return_null > > - > > - # Find min length > > - vmovdqu _lens(state), %xmm0 > > - vmovdqu _lens+1*16(state), %xmm1 > > - > > - vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A} > > - vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C} > > - vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F} > > - vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E} > > - vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min val in low dword > > - > > - vmovd %xmm2, DWORD_idx > > - test $~0xF, idx > > - jnz .return_null > > - > > - # process completed job "idx" > > - imul $_LANE_DATA_size, idx, lane_data > > - lea _ldata(state, lane_data), lane_data > > - > > - mov _job_in_lane(lane_data), job_rax > > - movq $0, _job_in_lane(lane_data) > > - movl $STS_COMPLETED, _status(job_rax) > > - mov _unused_lanes(state), unused_lanes > > - shl $4, unused_lanes > > - or idx, unused_lanes > > - mov unused_lanes, _unused_lanes(state) > > - > > - movl $0xFFFFFFFF, _lens(state, idx, 4) > > - > > - vmovd _args_digest(state, idx, 4), %xmm0 > > - vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0 > > - vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0 > > - vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0 > > - vmovd _args_digest+4*32(state, idx, 4), %xmm1 > > - vpinsrd $1, _args_digest+5*32(state, idx, 4), %xmm1, %xmm1 > > - vpinsrd $2, _args_digest+6*32(state, idx, 4), %xmm1, %xmm1 > > - vpinsrd $3, _args_digest+7*32(state, idx, 4), %xmm1, %xmm1 > > - > > - vmovdqu %xmm0, _result_digest(job_rax) > > - offset = (_result_digest + 1*16) > > - vmovdqu %xmm1, offset(job_rax) > > - > > - pop %rbx > > - > > - ret > > - > > -.return_null: > > - xor job_rax, job_rax > > - pop %rbx > > - ret > > -ENDPROC(sha256_mb_mgr_get_comp_job_avx2) > > - > > -.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16 > > -.align 16 > > -clear_low_nibble: > > -.octa 0x000000000000000000000000FFFFFFF0 > > - > > -.section .rodata.cst8, "aM", @progbits, 8 > > -.align 8 > > -one: > > -.quad 1 > > -two: > > -.quad 2 > > -three: > > -.quad 3 > > -four: > > -.quad 4 > > -five: > > -.quad 5 > > -six: > > -.quad 6 > > -seven: > > -.quad 7 > > diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c > > deleted file mode 100644 > > index b0c498371e67..000000000000 > > --- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c > > +++ /dev/null > > @@ -1,65 +0,0 @@ > > -/* > > - * Initialization code for multi buffer SHA256 algorithm for AVX2 > > - * > > - * This file is provided under a dual BSD/GPLv2 license. When using or > > - * redistributing this file, you may do so under either license. > > - * > > - * GPL LICENSE SUMMARY > > - * > > - * Copyright(c) 2016 Intel Corporation. > > - * > > - * This program is free software; you can redistribute it and/or modify > > - * it under the terms of version 2 of the GNU General Public License as > > - * published by the Free Software Foundation. > > - * > > - * This program is distributed in the hope that it will be useful, but > > - * WITHOUT ANY WARRANTY; without even the implied warranty of > > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > - * General Public License for more details. > > - * > > - * Contact Information: > > - * Megha Dey <megha.dey@xxxxxxxxxxxxxxx> > > - * > > - * BSD LICENSE > > - * > > - * Copyright(c) 2016 Intel Corporation. > > - * > > - * Redistribution and use in source and binary forms, with or without > > - * modification, are permitted provided that the following conditions > > - * are met: > > - * > > - * * Redistributions of source code must retain the above copyright > > - * notice, this list of conditions and the following disclaimer. > > - * * Redistributions in binary form must reproduce the above copyright > > - * notice, this list of conditions and the following disclaimer in > > - * the documentation and/or other materials provided with the > > - * distribution. > > - * * Neither the name of Intel Corporation nor the names of its > > - * contributors may be used to endorse or promote products derived > > - * from this software without specific prior written permission. > > - * > > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > > - */ > > - > > -#include "sha256_mb_mgr.h" > > - > > -void sha256_mb_mgr_init_avx2(struct sha256_mb_mgr *state) > > -{ > > - unsigned int j; > > - > > - state->unused_lanes = 0xF76543210ULL; > > - for (j = 0; j < 8; j++) { > > - state->lens[j] = 0xFFFFFFFF; > > - state->ldata[j].job_in_lane = NULL; > > - } > > -} > > diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S > > deleted file mode 100644 > > index b36ae7454084..000000000000 > > --- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S > > +++ /dev/null > > @@ -1,214 +0,0 @@ > > -/* > > - * Buffer submit code for multi buffer SHA256 algorithm > > - * > > - * This file is provided under a dual BSD/GPLv2 license. When using or > > - * redistributing this file, you may do so under either license. > > - * > > - * GPL LICENSE SUMMARY > > - * > > - * Copyright(c) 2016 Intel Corporation. > > - * > > - * This program is free software; you can redistribute it and/or modify > > - * it under the terms of version 2 of the GNU General Public License as > > - * published by the Free Software Foundation. > > - * > > - * This program is distributed in the hope that it will be useful, but > > - * WITHOUT ANY WARRANTY; without even the implied warranty of > > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > - * General Public License for more details. > > - * > > - * Contact Information: > > - * Megha Dey <megha.dey@xxxxxxxxxxxxxxx> > > - * > > - * BSD LICENSE > > - * > > - * Copyright(c) 2016 Intel Corporation. > > - * > > - * Redistribution and use in source and binary forms, with or without > > - * modification, are permitted provided that the following conditions > > - * are met: > > - * > > - * * Redistributions of source code must retain the above copyright > > - * notice, this list of conditions and the following disclaimer. > > - * * Redistributions in binary form must reproduce the above copyright > > - * notice, this list of conditions and the following disclaimer in > > - * the documentation and/or other materials provided with the > > - * distribution. > > - * * Neither the name of Intel Corporation nor the names of its > > - * contributors may be used to endorse or promote products derived > > - * from this software without specific prior written permission. > > - * > > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > > - */ > > - > > -#include <linux/linkage.h> > > -#include <asm/frame.h> > > -#include "sha256_mb_mgr_datastruct.S" > > - > > -.extern sha256_x8_avx2 > > - > > -# LINUX register definitions > > -arg1 = %rdi > > -arg2 = %rsi > > -size_offset = %rcx > > -tmp2 = %rcx > > -extra_blocks = %rdx > > - > > -# Common definitions > > -#define state arg1 > > -#define job %rsi > > -#define len2 arg2 > > -#define p2 arg2 > > - > > -# idx must be a register not clobberred by sha1_x8_avx2 > > -idx = %r8 > > -DWORD_idx = %r8d > > -last_len = %r8 > > - > > -p = %r11 > > -start_offset = %r11 > > - > > -unused_lanes = %rbx > > -BYTE_unused_lanes = %bl > > - > > -job_rax = %rax > > -len = %rax > > -DWORD_len = %eax > > - > > -lane = %r12 > > -tmp3 = %r12 > > - > > -tmp = %r9 > > -DWORD_tmp = %r9d > > - > > -lane_data = %r10 > > - > > -# JOB* sha256_mb_mgr_submit_avx2(MB_MGR *state, JOB_SHA256 *job) > > -# arg 1 : rcx : state > > -# arg 2 : rdx : job > > -ENTRY(sha256_mb_mgr_submit_avx2) > > - FRAME_BEGIN > > - push %rbx > > - push %r12 > > - > > - mov _unused_lanes(state), unused_lanes > > - mov unused_lanes, lane > > - and $0xF, lane > > - shr $4, unused_lanes > > - imul $_LANE_DATA_size, lane, lane_data > > - movl $STS_BEING_PROCESSED, _status(job) > > - lea _ldata(state, lane_data), lane_data > > - mov unused_lanes, _unused_lanes(state) > > - movl _len(job), DWORD_len > > - > > - mov job, _job_in_lane(lane_data) > > - shl $4, len > > - or lane, len > > - > > - movl DWORD_len, _lens(state , lane, 4) > > - > > - # Load digest words from result_digest > > - vmovdqu _result_digest(job), %xmm0 > > - vmovdqu _result_digest+1*16(job), %xmm1 > > - vmovd %xmm0, _args_digest(state, lane, 4) > > - vpextrd $1, %xmm0, _args_digest+1*32(state , lane, 4) > > - vpextrd $2, %xmm0, _args_digest+2*32(state , lane, 4) > > - vpextrd $3, %xmm0, _args_digest+3*32(state , lane, 4) > > - vmovd %xmm1, _args_digest+4*32(state , lane, 4) > > - > > - vpextrd $1, %xmm1, _args_digest+5*32(state , lane, 4) > > - vpextrd $2, %xmm1, _args_digest+6*32(state , lane, 4) > > - vpextrd $3, %xmm1, _args_digest+7*32(state , lane, 4) > > - > > - mov _buffer(job), p > > - mov p, _args_data_ptr(state, lane, 8) > > - > > - cmp $0xF, unused_lanes > > - jne return_null > > - > > -start_loop: > > - # Find min length > > - vmovdqa _lens(state), %xmm0 > > - vmovdqa _lens+1*16(state), %xmm1 > > - > > - vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A} > > - vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C} > > - vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F} > > - vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E} > > - vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min val in low dword > > - > > - vmovd %xmm2, DWORD_idx > > - mov idx, len2 > > - and $0xF, idx > > - shr $4, len2 > > - jz len_is_0 > > - > > - vpand clear_low_nibble(%rip), %xmm2, %xmm2 > > - vpshufd $0, %xmm2, %xmm2 > > - > > - vpsubd %xmm2, %xmm0, %xmm0 > > - vpsubd %xmm2, %xmm1, %xmm1 > > - > > - vmovdqa %xmm0, _lens + 0*16(state) > > - vmovdqa %xmm1, _lens + 1*16(state) > > - > > - # "state" and "args" are the same address, arg1 > > - # len is arg2 > > - call sha256_x8_avx2 > > - > > - # state and idx are intact > > - > > -len_is_0: > > - # process completed job "idx" > > - imul $_LANE_DATA_size, idx, lane_data > > - lea _ldata(state, lane_data), lane_data > > - > > - mov _job_in_lane(lane_data), job_rax > > - mov _unused_lanes(state), unused_lanes > > - movq $0, _job_in_lane(lane_data) > > - movl $STS_COMPLETED, _status(job_rax) > > - shl $4, unused_lanes > > - or idx, unused_lanes > > - mov unused_lanes, _unused_lanes(state) > > - > > - movl $0xFFFFFFFF, _lens(state,idx,4) > > - > > - vmovd _args_digest(state, idx, 4), %xmm0 > > - vpinsrd $1, _args_digest+1*32(state , idx, 4), %xmm0, %xmm0 > > - vpinsrd $2, _args_digest+2*32(state , idx, 4), %xmm0, %xmm0 > > - vpinsrd $3, _args_digest+3*32(state , idx, 4), %xmm0, %xmm0 > > - vmovd _args_digest+4*32(state, idx, 4), %xmm1 > > - > > - vpinsrd $1, _args_digest+5*32(state , idx, 4), %xmm1, %xmm1 > > - vpinsrd $2, _args_digest+6*32(state , idx, 4), %xmm1, %xmm1 > > - vpinsrd $3, _args_digest+7*32(state , idx, 4), %xmm1, %xmm1 > > - > > - vmovdqu %xmm0, _result_digest(job_rax) > > - vmovdqu %xmm1, _result_digest+1*16(job_rax) > > - > > -return: > > - pop %r12 > > - pop %rbx > > - FRAME_END > > - ret > > - > > -return_null: > > - xor job_rax, job_rax > > - jmp return > > - > > -ENDPROC(sha256_mb_mgr_submit_avx2) > > - > > -.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16 > > -.align 16 > > -clear_low_nibble: > > - .octa 0x000000000000000000000000FFFFFFF0 > > diff --git a/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S b/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S > > deleted file mode 100644 > > index 1687c80c5995..000000000000 > > --- a/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S > > +++ /dev/null > > @@ -1,598 +0,0 @@ > > -/* > > - * Multi-buffer SHA256 algorithm hash compute routine > > - * > > - * This file is provided under a dual BSD/GPLv2 license. When using or > > - * redistributing this file, you may do so under either license. > > - * > > - * GPL LICENSE SUMMARY > > - * > > - * Copyright(c) 2016 Intel Corporation. > > - * > > - * This program is free software; you can redistribute it and/or modify > > - * it under the terms of version 2 of the GNU General Public License as > > - * published by the Free Software Foundation. > > - * > > - * This program is distributed in the hope that it will be useful, but > > - * WITHOUT ANY WARRANTY; without even the implied warranty of > > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > - * General Public License for more details. > > - * > > - * Contact Information: > > - * Megha Dey <megha.dey@xxxxxxxxxxxxxxx> > > - * > > - * BSD LICENSE > > - * > > - * Copyright(c) 2016 Intel Corporation. > > - * > > - * Redistribution and use in source and binary forms, with or without > > - * modification, are permitted provided that the following conditions > > - * are met: > > - * > > - * * Redistributions of source code must retain the above copyright > > - * notice, this list of conditions and the following disclaimer. > > - * * Redistributions in binary form must reproduce the above copyright > > - * notice, this list of conditions and the following disclaimer in > > - * the documentation and/or other materials provided with the > > - * distribution. > > - * * Neither the name of Intel Corporation nor the names of its > > - * contributors may be used to endorse or promote products derived > > - * from this software without specific prior written permission. > > - * > > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > > - */ > > - > > -#include <linux/linkage.h> > > -#include "sha256_mb_mgr_datastruct.S" > > - > > -## code to compute oct SHA256 using SSE-256 > > -## outer calling routine takes care of save and restore of XMM registers > > -## Logic designed/laid out by JDG > > - > > -## Function clobbers: rax, rcx, rdx, rbx, rsi, rdi, r9-r15; %ymm0-15 > > -## Linux clobbers: rax rbx rcx rdx rsi r9 r10 r11 r12 r13 r14 r15 > > -## Linux preserves: rdi rbp r8 > > -## > > -## clobbers %ymm0-15 > > - > > -arg1 = %rdi > > -arg2 = %rsi > > -reg3 = %rcx > > -reg4 = %rdx > > - > > -# Common definitions > > -STATE = arg1 > > -INP_SIZE = arg2 > > - > > -IDX = %rax > > -ROUND = %rbx > > -TBL = reg3 > > - > > -inp0 = %r9 > > -inp1 = %r10 > > -inp2 = %r11 > > -inp3 = %r12 > > -inp4 = %r13 > > -inp5 = %r14 > > -inp6 = %r15 > > -inp7 = reg4 > > - > > -a = %ymm0 > > -b = %ymm1 > > -c = %ymm2 > > -d = %ymm3 > > -e = %ymm4 > > -f = %ymm5 > > -g = %ymm6 > > -h = %ymm7 > > - > > -T1 = %ymm8 > > - > > -a0 = %ymm12 > > -a1 = %ymm13 > > -a2 = %ymm14 > > -TMP = %ymm15 > > -TMP0 = %ymm6 > > -TMP1 = %ymm7 > > - > > -TT0 = %ymm8 > > -TT1 = %ymm9 > > -TT2 = %ymm10 > > -TT3 = %ymm11 > > -TT4 = %ymm12 > > -TT5 = %ymm13 > > -TT6 = %ymm14 > > -TT7 = %ymm15 > > - > > -# Define stack usage > > - > > -# Assume stack aligned to 32 bytes before call > > -# Therefore FRAMESZ mod 32 must be 32-8 = 24 > > - > > -#define FRAMESZ 0x388 > > - > > -#define VMOVPS vmovups > > - > > -# TRANSPOSE8 r0, r1, r2, r3, r4, r5, r6, r7, t0, t1 > > -# "transpose" data in {r0...r7} using temps {t0...t1} > > -# Input looks like: {r0 r1 r2 r3 r4 r5 r6 r7} > > -# r0 = {a7 a6 a5 a4 a3 a2 a1 a0} > > -# r1 = {b7 b6 b5 b4 b3 b2 b1 b0} > > -# r2 = {c7 c6 c5 c4 c3 c2 c1 c0} > > -# r3 = {d7 d6 d5 d4 d3 d2 d1 d0} > > -# r4 = {e7 e6 e5 e4 e3 e2 e1 e0} > > -# r5 = {f7 f6 f5 f4 f3 f2 f1 f0} > > -# r6 = {g7 g6 g5 g4 g3 g2 g1 g0} > > -# r7 = {h7 h6 h5 h4 h3 h2 h1 h0} > > -# > > -# Output looks like: {r0 r1 r2 r3 r4 r5 r6 r7} > > -# r0 = {h0 g0 f0 e0 d0 c0 b0 a0} > > -# r1 = {h1 g1 f1 e1 d1 c1 b1 a1} > > -# r2 = {h2 g2 f2 e2 d2 c2 b2 a2} > > -# r3 = {h3 g3 f3 e3 d3 c3 b3 a3} > > -# r4 = {h4 g4 f4 e4 d4 c4 b4 a4} > > -# r5 = {h5 g5 f5 e5 d5 c5 b5 a5} > > -# r6 = {h6 g6 f6 e6 d6 c6 b6 a6} > > -# r7 = {h7 g7 f7 e7 d7 c7 b7 a7} > > -# > > - > > -.macro TRANSPOSE8 r0 r1 r2 r3 r4 r5 r6 r7 t0 t1 > > - # process top half (r0..r3) {a...d} > > - vshufps $0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4 b1 b0 a1 a0} > > - vshufps $0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6 b3 b2 a3 a2} > > - vshufps $0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4 d1 d0 c1 c0} > > - vshufps $0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6 d3 d2 c3 c2} > > - vshufps $0xDD, \t1, \t0, \r3 # r3 = {d5 c5 b5 a5 d1 c1 b1 a1} > > - vshufps $0x88, \r2, \r0, \r1 # r1 = {d6 c6 b6 a6 d2 c2 b2 a2} > > - vshufps $0xDD, \r2, \r0, \r0 # r0 = {d7 c7 b7 a7 d3 c3 b3 a3} > > - vshufps $0x88, \t1, \t0, \t0 # t0 = {d4 c4 b4 a4 d0 c0 b0 a0} > > - > > - # use r2 in place of t0 > > - # process bottom half (r4..r7) {e...h} > > - vshufps $0x44, \r5, \r4, \r2 # r2 = {f5 f4 e5 e4 f1 f0 e1 e0} > > - vshufps $0xEE, \r5, \r4, \r4 # r4 = {f7 f6 e7 e6 f3 f2 e3 e2} > > - vshufps $0x44, \r7, \r6, \t1 # t1 = {h5 h4 g5 g4 h1 h0 g1 g0} > > - vshufps $0xEE, \r7, \r6, \r6 # r6 = {h7 h6 g7 g6 h3 h2 g3 g2} > > - vshufps $0xDD, \t1, \r2, \r7 # r7 = {h5 g5 f5 e5 h1 g1 f1 e1} > > - vshufps $0x88, \r6, \r4, \r5 # r5 = {h6 g6 f6 e6 h2 g2 f2 e2} > > - vshufps $0xDD, \r6, \r4, \r4 # r4 = {h7 g7 f7 e7 h3 g3 f3 e3} > > - vshufps $0x88, \t1, \r2, \t1 # t1 = {h4 g4 f4 e4 h0 g0 f0 e0} > > - > > - vperm2f128 $0x13, \r1, \r5, \r6 # h6...a6 > > - vperm2f128 $0x02, \r1, \r5, \r2 # h2...a2 > > - vperm2f128 $0x13, \r3, \r7, \r5 # h5...a5 > > - vperm2f128 $0x02, \r3, \r7, \r1 # h1...a1 > > - vperm2f128 $0x13, \r0, \r4, \r7 # h7...a7 > > - vperm2f128 $0x02, \r0, \r4, \r3 # h3...a3 > > - vperm2f128 $0x13, \t0, \t1, \r4 # h4...a4 > > - vperm2f128 $0x02, \t0, \t1, \r0 # h0...a0 > > - > > -.endm > > - > > -.macro ROTATE_ARGS > > -TMP_ = h > > -h = g > > -g = f > > -f = e > > -e = d > > -d = c > > -c = b > > -b = a > > -a = TMP_ > > -.endm > > - > > -.macro _PRORD reg imm tmp > > - vpslld $(32-\imm),\reg,\tmp > > - vpsrld $\imm,\reg, \reg > > - vpor \tmp,\reg, \reg > > -.endm > > - > > -# PRORD_nd reg, imm, tmp, src > > -.macro _PRORD_nd reg imm tmp src > > - vpslld $(32-\imm), \src, \tmp > > - vpsrld $\imm, \src, \reg > > - vpor \tmp, \reg, \reg > > -.endm > > - > > -# PRORD dst/src, amt > > -.macro PRORD reg imm > > - _PRORD \reg,\imm,TMP > > -.endm > > - > > -# PRORD_nd dst, src, amt > > -.macro PRORD_nd reg tmp imm > > - _PRORD_nd \reg, \imm, TMP, \tmp > > -.endm > > - > > -# arguments passed implicitly in preprocessor symbols i, a...h > > -.macro ROUND_00_15 _T1 i > > - PRORD_nd a0,e,5 # sig1: a0 = (e >> 5) > > - > > - vpxor g, f, a2 # ch: a2 = f^g > > - vpand e,a2, a2 # ch: a2 = (f^g)&e > > - vpxor g, a2, a2 # a2 = ch > > - > > - PRORD_nd a1,e,25 # sig1: a1 = (e >> 25) > > - > > - vmovdqu \_T1,(SZ8*(\i & 0xf))(%rsp) > > - vpaddd (TBL,ROUND,1), \_T1, \_T1 # T1 = W + K > > - vpxor e,a0, a0 # sig1: a0 = e ^ (e >> 5) > > - PRORD a0, 6 # sig1: a0 = (e >> 6) ^ (e >> 11) > > - vpaddd a2, h, h # h = h + ch > > - PRORD_nd a2,a,11 # sig0: a2 = (a >> 11) > > - vpaddd \_T1,h, h # h = h + ch + W + K > > - vpxor a1, a0, a0 # a0 = sigma1 > > - PRORD_nd a1,a,22 # sig0: a1 = (a >> 22) > > - vpxor c, a, \_T1 # maj: T1 = a^c > > - add $SZ8, ROUND # ROUND++ > > - vpand b, \_T1, \_T1 # maj: T1 = (a^c)&b > > - vpaddd a0, h, h > > - vpaddd h, d, d > > - vpxor a, a2, a2 # sig0: a2 = a ^ (a >> 11) > > - PRORD a2,2 # sig0: a2 = (a >> 2) ^ (a >> 13) > > - vpxor a1, a2, a2 # a2 = sig0 > > - vpand c, a, a1 # maj: a1 = a&c > > - vpor \_T1, a1, a1 # a1 = maj > > - vpaddd a1, h, h # h = h + ch + W + K + maj > > - vpaddd a2, h, h # h = h + ch + W + K + maj + sigma0 > > - ROTATE_ARGS > > -.endm > > - > > -# arguments passed implicitly in preprocessor symbols i, a...h > > -.macro ROUND_16_XX _T1 i > > - vmovdqu (SZ8*((\i-15)&0xf))(%rsp), \_T1 > > - vmovdqu (SZ8*((\i-2)&0xf))(%rsp), a1 > > - vmovdqu \_T1, a0 > > - PRORD \_T1,11 > > - vmovdqu a1, a2 > > - PRORD a1,2 > > - vpxor a0, \_T1, \_T1 > > - PRORD \_T1, 7 > > - vpxor a2, a1, a1 > > - PRORD a1, 17 > > - vpsrld $3, a0, a0 > > - vpxor a0, \_T1, \_T1 > > - vpsrld $10, a2, a2 > > - vpxor a2, a1, a1 > > - vpaddd (SZ8*((\i-16)&0xf))(%rsp), \_T1, \_T1 > > - vpaddd (SZ8*((\i-7)&0xf))(%rsp), a1, a1 > > - vpaddd a1, \_T1, \_T1 > > - > > - ROUND_00_15 \_T1,\i > > -.endm > > - > > -# SHA256_ARGS: > > -# UINT128 digest[8]; // transposed digests > > -# UINT8 *data_ptr[4]; > > - > > -# void sha256_x8_avx2(SHA256_ARGS *args, UINT64 bytes); > > -# arg 1 : STATE : pointer to array of pointers to input data > > -# arg 2 : INP_SIZE : size of input in blocks > > - # general registers preserved in outer calling routine > > - # outer calling routine saves all the XMM registers > > - # save rsp, allocate 32-byte aligned for local variables > > -ENTRY(sha256_x8_avx2) > > - > > - # save callee-saved clobbered registers to comply with C function ABI > > - push %r12 > > - push %r13 > > - push %r14 > > - push %r15 > > - > > - mov %rsp, IDX > > - sub $FRAMESZ, %rsp > > - and $~0x1F, %rsp > > - mov IDX, _rsp(%rsp) > > - > > - # Load the pre-transposed incoming digest. > > - vmovdqu 0*SHA256_DIGEST_ROW_SIZE(STATE),a > > - vmovdqu 1*SHA256_DIGEST_ROW_SIZE(STATE),b > > - vmovdqu 2*SHA256_DIGEST_ROW_SIZE(STATE),c > > - vmovdqu 3*SHA256_DIGEST_ROW_SIZE(STATE),d > > - vmovdqu 4*SHA256_DIGEST_ROW_SIZE(STATE),e > > - vmovdqu 5*SHA256_DIGEST_ROW_SIZE(STATE),f > > - vmovdqu 6*SHA256_DIGEST_ROW_SIZE(STATE),g > > - vmovdqu 7*SHA256_DIGEST_ROW_SIZE(STATE),h > > - > > - lea K256_8(%rip),TBL > > - > > - # load the address of each of the 4 message lanes > > - # getting ready to transpose input onto stack > > - mov _args_data_ptr+0*PTR_SZ(STATE),inp0 > > - mov _args_data_ptr+1*PTR_SZ(STATE),inp1 > > - mov _args_data_ptr+2*PTR_SZ(STATE),inp2 > > - mov _args_data_ptr+3*PTR_SZ(STATE),inp3 > > - mov _args_data_ptr+4*PTR_SZ(STATE),inp4 > > - mov _args_data_ptr+5*PTR_SZ(STATE),inp5 > > - mov _args_data_ptr+6*PTR_SZ(STATE),inp6 > > - mov _args_data_ptr+7*PTR_SZ(STATE),inp7 > > - > > - xor IDX, IDX > > -lloop: > > - xor ROUND, ROUND > > - > > - # save old digest > > - vmovdqu a, _digest(%rsp) > > - vmovdqu b, _digest+1*SZ8(%rsp) > > - vmovdqu c, _digest+2*SZ8(%rsp) > > - vmovdqu d, _digest+3*SZ8(%rsp) > > - vmovdqu e, _digest+4*SZ8(%rsp) > > - vmovdqu f, _digest+5*SZ8(%rsp) > > - vmovdqu g, _digest+6*SZ8(%rsp) > > - vmovdqu h, _digest+7*SZ8(%rsp) > > - i = 0 > > -.rep 2 > > - VMOVPS i*32(inp0, IDX), TT0 > > - VMOVPS i*32(inp1, IDX), TT1 > > - VMOVPS i*32(inp2, IDX), TT2 > > - VMOVPS i*32(inp3, IDX), TT3 > > - VMOVPS i*32(inp4, IDX), TT4 > > - VMOVPS i*32(inp5, IDX), TT5 > > - VMOVPS i*32(inp6, IDX), TT6 > > - VMOVPS i*32(inp7, IDX), TT7 > > - vmovdqu g, _ytmp(%rsp) > > - vmovdqu h, _ytmp+1*SZ8(%rsp) > > - TRANSPOSE8 TT0, TT1, TT2, TT3, TT4, TT5, TT6, TT7, TMP0, TMP1 > > - vmovdqu PSHUFFLE_BYTE_FLIP_MASK(%rip), TMP1 > > - vmovdqu _ytmp(%rsp), g > > - vpshufb TMP1, TT0, TT0 > > - vpshufb TMP1, TT1, TT1 > > - vpshufb TMP1, TT2, TT2 > > - vpshufb TMP1, TT3, TT3 > > - vpshufb TMP1, TT4, TT4 > > - vpshufb TMP1, TT5, TT5 > > - vpshufb TMP1, TT6, TT6 > > - vpshufb TMP1, TT7, TT7 > > - vmovdqu _ytmp+1*SZ8(%rsp), h > > - vmovdqu TT4, _ytmp(%rsp) > > - vmovdqu TT5, _ytmp+1*SZ8(%rsp) > > - vmovdqu TT6, _ytmp+2*SZ8(%rsp) > > - vmovdqu TT7, _ytmp+3*SZ8(%rsp) > > - ROUND_00_15 TT0,(i*8+0) > > - vmovdqu _ytmp(%rsp), TT0 > > - ROUND_00_15 TT1,(i*8+1) > > - vmovdqu _ytmp+1*SZ8(%rsp), TT1 > > - ROUND_00_15 TT2,(i*8+2) > > - vmovdqu _ytmp+2*SZ8(%rsp), TT2 > > - ROUND_00_15 TT3,(i*8+3) > > - vmovdqu _ytmp+3*SZ8(%rsp), TT3 > > - ROUND_00_15 TT0,(i*8+4) > > - ROUND_00_15 TT1,(i*8+5) > > - ROUND_00_15 TT2,(i*8+6) > > - ROUND_00_15 TT3,(i*8+7) > > - i = (i+1) > > -.endr > > - add $64, IDX > > - i = (i*8) > > - > > - jmp Lrounds_16_xx > > -.align 16 > > -Lrounds_16_xx: > > -.rep 16 > > - ROUND_16_XX T1, i > > - i = (i+1) > > -.endr > > - > > - cmp $ROUNDS,ROUND > > - jb Lrounds_16_xx > > - > > - # add old digest > > - vpaddd _digest+0*SZ8(%rsp), a, a > > - vpaddd _digest+1*SZ8(%rsp), b, b > > - vpaddd _digest+2*SZ8(%rsp), c, c > > - vpaddd _digest+3*SZ8(%rsp), d, d > > - vpaddd _digest+4*SZ8(%rsp), e, e > > - vpaddd _digest+5*SZ8(%rsp), f, f > > - vpaddd _digest+6*SZ8(%rsp), g, g > > - vpaddd _digest+7*SZ8(%rsp), h, h > > - > > - sub $1, INP_SIZE # unit is blocks > > - jne lloop > > - > > - # write back to memory (state object) the transposed digest > > - vmovdqu a, 0*SHA256_DIGEST_ROW_SIZE(STATE) > > - vmovdqu b, 1*SHA256_DIGEST_ROW_SIZE(STATE) > > - vmovdqu c, 2*SHA256_DIGEST_ROW_SIZE(STATE) > > - vmovdqu d, 3*SHA256_DIGEST_ROW_SIZE(STATE) > > - vmovdqu e, 4*SHA256_DIGEST_ROW_SIZE(STATE) > > - vmovdqu f, 5*SHA256_DIGEST_ROW_SIZE(STATE) > > - vmovdqu g, 6*SHA256_DIGEST_ROW_SIZE(STATE) > > - vmovdqu h, 7*SHA256_DIGEST_ROW_SIZE(STATE) > > - > > - # update input pointers > > - add IDX, inp0 > > - mov inp0, _args_data_ptr+0*8(STATE) > > - add IDX, inp1 > > - mov inp1, _args_data_ptr+1*8(STATE) > > - add IDX, inp2 > > - mov inp2, _args_data_ptr+2*8(STATE) > > - add IDX, inp3 > > - mov inp3, _args_data_ptr+3*8(STATE) > > - add IDX, inp4 > > - mov inp4, _args_data_ptr+4*8(STATE) > > - add IDX, inp5 > > - mov inp5, _args_data_ptr+5*8(STATE) > > - add IDX, inp6 > > - mov inp6, _args_data_ptr+6*8(STATE) > > - add IDX, inp7 > > - mov inp7, _args_data_ptr+7*8(STATE) > > - > > - # Postamble > > - mov _rsp(%rsp), %rsp > > - > > - # restore callee-saved clobbered registers > > - pop %r15 > > - pop %r14 > > - pop %r13 > > - pop %r12 > > - > > - ret > > -ENDPROC(sha256_x8_avx2) > > - > > -.section .rodata.K256_8, "a", @progbits > > -.align 64 > > -K256_8: > > - .octa 0x428a2f98428a2f98428a2f98428a2f98 > > - .octa 0x428a2f98428a2f98428a2f98428a2f98 > > - .octa 0x71374491713744917137449171374491 > > - .octa 0x71374491713744917137449171374491 > > - .octa 0xb5c0fbcfb5c0fbcfb5c0fbcfb5c0fbcf > > - .octa 0xb5c0fbcfb5c0fbcfb5c0fbcfb5c0fbcf > > - .octa 0xe9b5dba5e9b5dba5e9b5dba5e9b5dba5 > > - .octa 0xe9b5dba5e9b5dba5e9b5dba5e9b5dba5 > > - .octa 0x3956c25b3956c25b3956c25b3956c25b > > - .octa 0x3956c25b3956c25b3956c25b3956c25b > > - .octa 0x59f111f159f111f159f111f159f111f1 > > - .octa 0x59f111f159f111f159f111f159f111f1 > > - .octa 0x923f82a4923f82a4923f82a4923f82a4 > > - .octa 0x923f82a4923f82a4923f82a4923f82a4 > > - .octa 0xab1c5ed5ab1c5ed5ab1c5ed5ab1c5ed5 > > - .octa 0xab1c5ed5ab1c5ed5ab1c5ed5ab1c5ed5 > > - .octa 0xd807aa98d807aa98d807aa98d807aa98 > > - .octa 0xd807aa98d807aa98d807aa98d807aa98 > > - .octa 0x12835b0112835b0112835b0112835b01 > > - .octa 0x12835b0112835b0112835b0112835b01 > > - .octa 0x243185be243185be243185be243185be > > - .octa 0x243185be243185be243185be243185be > > - .octa 0x550c7dc3550c7dc3550c7dc3550c7dc3 > > - .octa 0x550c7dc3550c7dc3550c7dc3550c7dc3 > > - .octa 0x72be5d7472be5d7472be5d7472be5d74 > > - .octa 0x72be5d7472be5d7472be5d7472be5d74 > > - .octa 0x80deb1fe80deb1fe80deb1fe80deb1fe > > - .octa 0x80deb1fe80deb1fe80deb1fe80deb1fe > > - .octa 0x9bdc06a79bdc06a79bdc06a79bdc06a7 > > - .octa 0x9bdc06a79bdc06a79bdc06a79bdc06a7 > > - .octa 0xc19bf174c19bf174c19bf174c19bf174 > > - .octa 0xc19bf174c19bf174c19bf174c19bf174 > > - .octa 0xe49b69c1e49b69c1e49b69c1e49b69c1 > > - .octa 0xe49b69c1e49b69c1e49b69c1e49b69c1 > > - .octa 0xefbe4786efbe4786efbe4786efbe4786 > > - .octa 0xefbe4786efbe4786efbe4786efbe4786 > > - .octa 0x0fc19dc60fc19dc60fc19dc60fc19dc6 > > - .octa 0x0fc19dc60fc19dc60fc19dc60fc19dc6 > > - .octa 0x240ca1cc240ca1cc240ca1cc240ca1cc > > - .octa 0x240ca1cc240ca1cc240ca1cc240ca1cc > > - .octa 0x2de92c6f2de92c6f2de92c6f2de92c6f > > - .octa 0x2de92c6f2de92c6f2de92c6f2de92c6f > > - .octa 0x4a7484aa4a7484aa4a7484aa4a7484aa > > - .octa 0x4a7484aa4a7484aa4a7484aa4a7484aa > > - .octa 0x5cb0a9dc5cb0a9dc5cb0a9dc5cb0a9dc > > - .octa 0x5cb0a9dc5cb0a9dc5cb0a9dc5cb0a9dc > > - .octa 0x76f988da76f988da76f988da76f988da > > - .octa 0x76f988da76f988da76f988da76f988da > > - .octa 0x983e5152983e5152983e5152983e5152 > > - .octa 0x983e5152983e5152983e5152983e5152 > > - .octa 0xa831c66da831c66da831c66da831c66d > > - .octa 0xa831c66da831c66da831c66da831c66d > > - .octa 0xb00327c8b00327c8b00327c8b00327c8 > > - .octa 0xb00327c8b00327c8b00327c8b00327c8 > > - .octa 0xbf597fc7bf597fc7bf597fc7bf597fc7 > > - .octa 0xbf597fc7bf597fc7bf597fc7bf597fc7 > > - .octa 0xc6e00bf3c6e00bf3c6e00bf3c6e00bf3 > > - .octa 0xc6e00bf3c6e00bf3c6e00bf3c6e00bf3 > > - .octa 0xd5a79147d5a79147d5a79147d5a79147 > > - .octa 0xd5a79147d5a79147d5a79147d5a79147 > > - .octa 0x06ca635106ca635106ca635106ca6351 > > - .octa 0x06ca635106ca635106ca635106ca6351 > > - .octa 0x14292967142929671429296714292967 > > - .octa 0x14292967142929671429296714292967 > > - .octa 0x27b70a8527b70a8527b70a8527b70a85 > > - .octa 0x27b70a8527b70a8527b70a8527b70a85 > > - .octa 0x2e1b21382e1b21382e1b21382e1b2138 > > - .octa 0x2e1b21382e1b21382e1b21382e1b2138 > > - .octa 0x4d2c6dfc4d2c6dfc4d2c6dfc4d2c6dfc > > - .octa 0x4d2c6dfc4d2c6dfc4d2c6dfc4d2c6dfc > > - .octa 0x53380d1353380d1353380d1353380d13 > > - .octa 0x53380d1353380d1353380d1353380d13 > > - .octa 0x650a7354650a7354650a7354650a7354 > > - .octa 0x650a7354650a7354650a7354650a7354 > > - .octa 0x766a0abb766a0abb766a0abb766a0abb > > - .octa 0x766a0abb766a0abb766a0abb766a0abb > > - .octa 0x81c2c92e81c2c92e81c2c92e81c2c92e > > - .octa 0x81c2c92e81c2c92e81c2c92e81c2c92e > > - .octa 0x92722c8592722c8592722c8592722c85 > > - .octa 0x92722c8592722c8592722c8592722c85 > > - .octa 0xa2bfe8a1a2bfe8a1a2bfe8a1a2bfe8a1 > > - .octa 0xa2bfe8a1a2bfe8a1a2bfe8a1a2bfe8a1 > > - .octa 0xa81a664ba81a664ba81a664ba81a664b > > - .octa 0xa81a664ba81a664ba81a664ba81a664b > > - .octa 0xc24b8b70c24b8b70c24b8b70c24b8b70 > > - .octa 0xc24b8b70c24b8b70c24b8b70c24b8b70 > > - .octa 0xc76c51a3c76c51a3c76c51a3c76c51a3 > > - .octa 0xc76c51a3c76c51a3c76c51a3c76c51a3 > > - .octa 0xd192e819d192e819d192e819d192e819 > > - .octa 0xd192e819d192e819d192e819d192e819 > > - .octa 0xd6990624d6990624d6990624d6990624 > > - .octa 0xd6990624d6990624d6990624d6990624 > > - .octa 0xf40e3585f40e3585f40e3585f40e3585 > > - .octa 0xf40e3585f40e3585f40e3585f40e3585 > > - .octa 0x106aa070106aa070106aa070106aa070 > > - .octa 0x106aa070106aa070106aa070106aa070 > > - .octa 0x19a4c11619a4c11619a4c11619a4c116 > > - .octa 0x19a4c11619a4c11619a4c11619a4c116 > > - .octa 0x1e376c081e376c081e376c081e376c08 > > - .octa 0x1e376c081e376c081e376c081e376c08 > > - .octa 0x2748774c2748774c2748774c2748774c > > - .octa 0x2748774c2748774c2748774c2748774c > > - .octa 0x34b0bcb534b0bcb534b0bcb534b0bcb5 > > - .octa 0x34b0bcb534b0bcb534b0bcb534b0bcb5 > > - .octa 0x391c0cb3391c0cb3391c0cb3391c0cb3 > > - .octa 0x391c0cb3391c0cb3391c0cb3391c0cb3 > > - .octa 0x4ed8aa4a4ed8aa4a4ed8aa4a4ed8aa4a > > - .octa 0x4ed8aa4a4ed8aa4a4ed8aa4a4ed8aa4a > > - .octa 0x5b9cca4f5b9cca4f5b9cca4f5b9cca4f > > - .octa 0x5b9cca4f5b9cca4f5b9cca4f5b9cca4f > > - .octa 0x682e6ff3682e6ff3682e6ff3682e6ff3 > > - .octa 0x682e6ff3682e6ff3682e6ff3682e6ff3 > > - .octa 0x748f82ee748f82ee748f82ee748f82ee > > - .octa 0x748f82ee748f82ee748f82ee748f82ee > > - .octa 0x78a5636f78a5636f78a5636f78a5636f > > - .octa 0x78a5636f78a5636f78a5636f78a5636f > > - .octa 0x84c8781484c8781484c8781484c87814 > > - .octa 0x84c8781484c8781484c8781484c87814 > > - .octa 0x8cc702088cc702088cc702088cc70208 > > - .octa 0x8cc702088cc702088cc702088cc70208 > > - .octa 0x90befffa90befffa90befffa90befffa > > - .octa 0x90befffa90befffa90befffa90befffa > > - .octa 0xa4506ceba4506ceba4506ceba4506ceb > > - .octa 0xa4506ceba4506ceba4506ceba4506ceb > > - .octa 0xbef9a3f7bef9a3f7bef9a3f7bef9a3f7 > > - .octa 0xbef9a3f7bef9a3f7bef9a3f7bef9a3f7 > > - .octa 0xc67178f2c67178f2c67178f2c67178f2 > > - .octa 0xc67178f2c67178f2c67178f2c67178f2 > > - > > -.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32 > > -.align 32 > > -PSHUFFLE_BYTE_FLIP_MASK: > > -.octa 0x0c0d0e0f08090a0b0405060700010203 > > -.octa 0x0c0d0e0f08090a0b0405060700010203 > > - > > -.section .rodata.cst256.K256, "aM", @progbits, 256 > > -.align 64 > > -.global K256 > > -K256: > > - .int 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 > > - .int 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 > > - .int 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 > > - .int 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 > > - .int 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc > > - .int 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da > > - .int 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 > > - .int 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 > > - .int 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 > > - .int 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 > > - .int 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 > > - .int 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 > > - .int 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 > > - .int 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 > > - .int 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 > > - .int 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 > > diff --git a/arch/x86/crypto/sha512-mb/Makefile b/arch/x86/crypto/sha512-mb/Makefile > > deleted file mode 100644 > > index 90f1ef69152e..000000000000 > > --- a/arch/x86/crypto/sha512-mb/Makefile > > +++ /dev/null > > @@ -1,12 +0,0 @@ > > -# SPDX-License-Identifier: GPL-2.0 > > -# > > -# Arch-specific CryptoAPI modules. > > -# > > - > > -avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\ > > - $(comma)4)$(comma)%ymm2,yes,no) > > -ifeq ($(avx2_supported),yes) > > - obj-$(CONFIG_CRYPTO_SHA512_MB) += sha512-mb.o > > - sha512-mb-y := sha512_mb.o sha512_mb_mgr_flush_avx2.o \ > > - sha512_mb_mgr_init_avx2.o sha512_mb_mgr_submit_avx2.o sha512_x4_avx2.o > > -endif > > diff --git a/arch/x86/crypto/sha512-mb/sha512_mb.c b/arch/x86/crypto/sha512-mb/sha512_mb.c > > deleted file mode 100644 > > index 26b85678012d..000000000000 > > --- a/arch/x86/crypto/sha512-mb/sha512_mb.c > > +++ /dev/null > > @@ -1,1047 +0,0 @@ > > -/* > > - * Multi buffer SHA512 algorithm Glue Code > > - * > > - * This file is provided under a dual BSD/GPLv2 license. When using or > > - * redistributing this file, you may do so under either license. > > - * > > - * GPL LICENSE SUMMARY > > - * > > - * Copyright(c) 2016 Intel Corporation. > > - * > > - * This program is free software; you can redistribute it and/or modify > > - * it under the terms of version 2 of the GNU General Public License as > > - * published by the Free Software Foundation. > > - * > > - * This program is distributed in the hope that it will be useful, but > > - * WITHOUT ANY WARRANTY; without even the implied warranty of > > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > - * General Public License for more details. > > - * > > - * Contact Information: > > - * Megha Dey <megha.dey@xxxxxxxxxxxxxxx> > > - * > > - * BSD LICENSE > > - * > > - * Copyright(c) 2016 Intel Corporation. > > - * > > - * Redistribution and use in source and binary forms, with or without > > - * modification, are permitted provided that the following conditions > > - * are met: > > - * > > - * * Redistributions of source code must retain the above copyright > > - * notice, this list of conditions and the following disclaimer. > > - * * Redistributions in binary form must reproduce the above copyright > > - * notice, this list of conditions and the following disclaimer in > > - * the documentation and/or other materials provided with the > > - * distribution. > > - * * Neither the name of Intel Corporation nor the names of its > > - * contributors may be used to endorse or promote products derived > > - * from this software without specific prior written permission. > > - * > > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > > - */ > > - > > -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt > > - > > -#include <crypto/internal/hash.h> > > -#include <linux/init.h> > > -#include <linux/module.h> > > -#include <linux/mm.h> > > -#include <linux/cryptohash.h> > > -#include <linux/types.h> > > -#include <linux/list.h> > > -#include <crypto/scatterwalk.h> > > -#include <crypto/sha.h> > > -#include <crypto/mcryptd.h> > > -#include <crypto/crypto_wq.h> > > -#include <asm/byteorder.h> > > -#include <linux/hardirq.h> > > -#include <asm/fpu/api.h> > > -#include "sha512_mb_ctx.h" > > - > > -#define FLUSH_INTERVAL 1000 /* in usec */ > > - > > -static struct mcryptd_alg_state sha512_mb_alg_state; > > - > > -struct sha512_mb_ctx { > > - struct mcryptd_ahash *mcryptd_tfm; > > -}; > > - > > -static inline struct mcryptd_hash_request_ctx > > - *cast_hash_to_mcryptd_ctx(struct sha512_hash_ctx *hash_ctx) > > -{ > > - struct ahash_request *areq; > > - > > - areq = container_of((void *) hash_ctx, struct ahash_request, __ctx); > > - return container_of(areq, struct mcryptd_hash_request_ctx, areq); > > -} > > - > > -static inline struct ahash_request > > - *cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx) > > -{ > > - return container_of((void *) ctx, struct ahash_request, __ctx); > > -} > > - > > -static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx, > > - struct ahash_request *areq) > > -{ > > - rctx->flag = HASH_UPDATE; > > -} > > - > > -static asmlinkage void (*sha512_job_mgr_init)(struct sha512_mb_mgr *state); > > -static asmlinkage struct job_sha512* (*sha512_job_mgr_submit) > > - (struct sha512_mb_mgr *state, > > - struct job_sha512 *job); > > -static asmlinkage struct job_sha512* (*sha512_job_mgr_flush) > > - (struct sha512_mb_mgr *state); > > -static asmlinkage struct job_sha512* (*sha512_job_mgr_get_comp_job) > > - (struct sha512_mb_mgr *state); > > - > > -inline uint32_t sha512_pad(uint8_t padblock[SHA512_BLOCK_SIZE * 2], > > - uint64_t total_len) > > -{ > > - uint32_t i = total_len & (SHA512_BLOCK_SIZE - 1); > > - > > - memset(&padblock[i], 0, SHA512_BLOCK_SIZE); > > - padblock[i] = 0x80; > > - > > - i += ((SHA512_BLOCK_SIZE - 1) & > > - (0 - (total_len + SHA512_PADLENGTHFIELD_SIZE + 1))) > > - + 1 + SHA512_PADLENGTHFIELD_SIZE; > > - > > -#if SHA512_PADLENGTHFIELD_SIZE == 16 > > - *((uint64_t *) &padblock[i - 16]) = 0; > > -#endif > > - > > - *((uint64_t *) &padblock[i - 8]) = cpu_to_be64(total_len << 3); > > - > > - /* Number of extra blocks to hash */ > > - return i >> SHA512_LOG2_BLOCK_SIZE; > > -} > > - > > -static struct sha512_hash_ctx *sha512_ctx_mgr_resubmit > > - (struct sha512_ctx_mgr *mgr, struct sha512_hash_ctx *ctx) > > -{ > > - while (ctx) { > > - if (ctx->status & HASH_CTX_STS_COMPLETE) { > > - /* Clear PROCESSING bit */ > > - ctx->status = HASH_CTX_STS_COMPLETE; > > - return ctx; > > - } > > - > > - /* > > - * If the extra blocks are empty, begin hashing what remains > > - * in the user's buffer. > > - */ > > - if (ctx->partial_block_buffer_length == 0 && > > - ctx->incoming_buffer_length) { > > - > > - const void *buffer = ctx->incoming_buffer; > > - uint32_t len = ctx->incoming_buffer_length; > > - uint32_t copy_len; > > - > > - /* > > - * Only entire blocks can be hashed. > > - * Copy remainder to extra blocks buffer. > > - */ > > - copy_len = len & (SHA512_BLOCK_SIZE-1); > > - > > - if (copy_len) { > > - len -= copy_len; > > - memcpy(ctx->partial_block_buffer, > > - ((const char *) buffer + len), > > - copy_len); > > - ctx->partial_block_buffer_length = copy_len; > > - } > > - > > - ctx->incoming_buffer_length = 0; > > - > > - /* len should be a multiple of the block size now */ > > - assert((len % SHA512_BLOCK_SIZE) == 0); > > - > > - /* Set len to the number of blocks to be hashed */ > > - len >>= SHA512_LOG2_BLOCK_SIZE; > > - > > - if (len) { > > - > > - ctx->job.buffer = (uint8_t *) buffer; > > - ctx->job.len = len; > > - ctx = (struct sha512_hash_ctx *) > > - sha512_job_mgr_submit(&mgr->mgr, > > - &ctx->job); > > - continue; > > - } > > - } > > - > > - /* > > - * If the extra blocks are not empty, then we are > > - * either on the last block(s) or we need more > > - * user input before continuing. > > - */ > > - if (ctx->status & HASH_CTX_STS_LAST) { > > - > > - uint8_t *buf = ctx->partial_block_buffer; > > - uint32_t n_extra_blocks = > > - sha512_pad(buf, ctx->total_length); > > - > > - ctx->status = (HASH_CTX_STS_PROCESSING | > > - HASH_CTX_STS_COMPLETE); > > - ctx->job.buffer = buf; > > - ctx->job.len = (uint32_t) n_extra_blocks; > > - ctx = (struct sha512_hash_ctx *) > > - sha512_job_mgr_submit(&mgr->mgr, &ctx->job); > > - continue; > > - } > > - > > - if (ctx) > > - ctx->status = HASH_CTX_STS_IDLE; > > - return ctx; > > - } > > - > > - return NULL; > > -} > > - > > -static struct sha512_hash_ctx > > - *sha512_ctx_mgr_get_comp_ctx(struct mcryptd_alg_cstate *cstate) > > -{ > > - /* > > - * If get_comp_job returns NULL, there are no jobs complete. > > - * If get_comp_job returns a job, verify that it is safe to return to > > - * the user. > > - * If it is not ready, resubmit the job to finish processing. > > - * If sha512_ctx_mgr_resubmit returned a job, it is ready to be > > - * returned. > > - * Otherwise, all jobs currently being managed by the hash_ctx_mgr > > - * still need processing. > > - */ > > - struct sha512_ctx_mgr *mgr; > > - struct sha512_hash_ctx *ctx; > > - unsigned long flags; > > - > > - mgr = cstate->mgr; > > - spin_lock_irqsave(&cstate->work_lock, flags); > > - ctx = (struct sha512_hash_ctx *) > > - sha512_job_mgr_get_comp_job(&mgr->mgr); > > - ctx = sha512_ctx_mgr_resubmit(mgr, ctx); > > - spin_unlock_irqrestore(&cstate->work_lock, flags); > > - return ctx; > > -} > > - > > -static void sha512_ctx_mgr_init(struct sha512_ctx_mgr *mgr) > > -{ > > - sha512_job_mgr_init(&mgr->mgr); > > -} > > - > > -static struct sha512_hash_ctx > > - *sha512_ctx_mgr_submit(struct mcryptd_alg_cstate *cstate, > > - struct sha512_hash_ctx *ctx, > > - const void *buffer, > > - uint32_t len, > > - int flags) > > -{ > > - struct sha512_ctx_mgr *mgr; > > - unsigned long irqflags; > > - > > - mgr = cstate->mgr; > > - spin_lock_irqsave(&cstate->work_lock, irqflags); > > - if (flags & ~(HASH_UPDATE | HASH_LAST)) { > > - /* User should not pass anything other than UPDATE or LAST */ > > - ctx->error = HASH_CTX_ERROR_INVALID_FLAGS; > > - goto unlock; > > - } > > - > > - if (ctx->status & HASH_CTX_STS_PROCESSING) { > > - /* Cannot submit to a currently processing job. */ > > - ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING; > > - goto unlock; > > - } > > - > > - if (ctx->status & HASH_CTX_STS_COMPLETE) { > > - /* Cannot update a finished job. */ > > - ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED; > > - goto unlock; > > - } > > - > > - /* > > - * If we made it here, there were no errors during this call to > > - * submit > > - */ > > - ctx->error = HASH_CTX_ERROR_NONE; > > - > > - /* Store buffer ptr info from user */ > > - ctx->incoming_buffer = buffer; > > - ctx->incoming_buffer_length = len; > > - > > - /* > > - * Store the user's request flags and mark this ctx as currently being > > - * processed. > > - */ > > - ctx->status = (flags & HASH_LAST) ? > > - (HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) : > > - HASH_CTX_STS_PROCESSING; > > - > > - /* Advance byte counter */ > > - ctx->total_length += len; > > - > > - /* > > - * If there is anything currently buffered in the extra blocks, > > - * append to it until it contains a whole block. > > - * Or if the user's buffer contains less than a whole block, > > - * append as much as possible to the extra block. > > - */ > > - if (ctx->partial_block_buffer_length || len < SHA512_BLOCK_SIZE) { > > - /* Compute how many bytes to copy from user buffer into extra > > - * block > > - */ > > - uint32_t copy_len = SHA512_BLOCK_SIZE - > > - ctx->partial_block_buffer_length; > > - if (len < copy_len) > > - copy_len = len; > > - > > - if (copy_len) { > > - /* Copy and update relevant pointers and counters */ > > - memcpy > > - (&ctx->partial_block_buffer[ctx->partial_block_buffer_length], > > - buffer, copy_len); > > - > > - ctx->partial_block_buffer_length += copy_len; > > - ctx->incoming_buffer = (const void *) > > - ((const char *)buffer + copy_len); > > - ctx->incoming_buffer_length = len - copy_len; > > - } > > - > > - /* The extra block should never contain more than 1 block > > - * here > > - */ > > - assert(ctx->partial_block_buffer_length <= SHA512_BLOCK_SIZE); > > - > > - /* If the extra block buffer contains exactly 1 block, it can > > - * be hashed. > > - */ > > - if (ctx->partial_block_buffer_length >= SHA512_BLOCK_SIZE) { > > - ctx->partial_block_buffer_length = 0; > > - > > - ctx->job.buffer = ctx->partial_block_buffer; > > - ctx->job.len = 1; > > - ctx = (struct sha512_hash_ctx *) > > - sha512_job_mgr_submit(&mgr->mgr, &ctx->job); > > - } > > - } > > - > > - ctx = sha512_ctx_mgr_resubmit(mgr, ctx); > > -unlock: > > - spin_unlock_irqrestore(&cstate->work_lock, irqflags); > > - return ctx; > > -} > > - > > -static struct sha512_hash_ctx *sha512_ctx_mgr_flush(struct mcryptd_alg_cstate *cstate) > > -{ > > - struct sha512_ctx_mgr *mgr; > > - struct sha512_hash_ctx *ctx; > > - unsigned long flags; > > - > > - mgr = cstate->mgr; > > - spin_lock_irqsave(&cstate->work_lock, flags); > > - while (1) { > > - ctx = (struct sha512_hash_ctx *) > > - sha512_job_mgr_flush(&mgr->mgr); > > - > > - /* If flush returned 0, there are no more jobs in flight. */ > > - if (!ctx) > > - break; > > - > > - /* > > - * If flush returned a job, resubmit the job to finish > > - * processing. > > - */ > > - ctx = sha512_ctx_mgr_resubmit(mgr, ctx); > > - > > - /* > > - * If sha512_ctx_mgr_resubmit returned a job, it is ready to > > - * be returned. Otherwise, all jobs currently being managed by > > - * the sha512_ctx_mgr still need processing. Loop. > > - */ > > - if (ctx) > > - break; > > - } > > - spin_unlock_irqrestore(&cstate->work_lock, flags); > > - return ctx; > > -} > > - > > -static int sha512_mb_init(struct ahash_request *areq) > > -{ > > - struct sha512_hash_ctx *sctx = ahash_request_ctx(areq); > > - > > - hash_ctx_init(sctx); > > - sctx->job.result_digest[0] = SHA512_H0; > > - sctx->job.result_digest[1] = SHA512_H1; > > - sctx->job.result_digest[2] = SHA512_H2; > > - sctx->job.result_digest[3] = SHA512_H3; > > - sctx->job.result_digest[4] = SHA512_H4; > > - sctx->job.result_digest[5] = SHA512_H5; > > - sctx->job.result_digest[6] = SHA512_H6; > > - sctx->job.result_digest[7] = SHA512_H7; > > - sctx->total_length = 0; > > - sctx->partial_block_buffer_length = 0; > > - sctx->status = HASH_CTX_STS_IDLE; > > - > > - return 0; > > -} > > - > > -static int sha512_mb_set_results(struct mcryptd_hash_request_ctx *rctx) > > -{ > > - int i; > > - struct sha512_hash_ctx *sctx = ahash_request_ctx(&rctx->areq); > > - __be64 *dst = (__be64 *) rctx->out; > > - > > - for (i = 0; i < 8; ++i) > > - dst[i] = cpu_to_be64(sctx->job.result_digest[i]); > > - > > - return 0; > > -} > > - > > -static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx, > > - struct mcryptd_alg_cstate *cstate, bool flush) > > -{ > > - int flag = HASH_UPDATE; > > - int nbytes, err = 0; > > - struct mcryptd_hash_request_ctx *rctx = *ret_rctx; > > - struct sha512_hash_ctx *sha_ctx; > > - > > - /* more work ? */ > > - while (!(rctx->flag & HASH_DONE)) { > > - nbytes = crypto_ahash_walk_done(&rctx->walk, 0); > > - if (nbytes < 0) { > > - err = nbytes; > > - goto out; > > - } > > - /* check if the walk is done */ > > - if (crypto_ahash_walk_last(&rctx->walk)) { > > - rctx->flag |= HASH_DONE; > > - if (rctx->flag & HASH_FINAL) > > - flag |= HASH_LAST; > > - > > - } > > - sha_ctx = (struct sha512_hash_ctx *) > > - ahash_request_ctx(&rctx->areq); > > - kernel_fpu_begin(); > > - sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, > > - rctx->walk.data, nbytes, flag); > > - if (!sha_ctx) { > > - if (flush) > > - sha_ctx = sha512_ctx_mgr_flush(cstate); > > - } > > - kernel_fpu_end(); > > - if (sha_ctx) > > - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); > > - else { > > - rctx = NULL; > > - goto out; > > - } > > - } > > - > > - /* copy the results */ > > - if (rctx->flag & HASH_FINAL) > > - sha512_mb_set_results(rctx); > > - > > -out: > > - *ret_rctx = rctx; > > - return err; > > -} > > - > > -static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx, > > - struct mcryptd_alg_cstate *cstate, > > - int err) > > -{ > > - struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); > > - struct sha512_hash_ctx *sha_ctx; > > - struct mcryptd_hash_request_ctx *req_ctx; > > - int ret; > > - unsigned long flags; > > - > > - /* remove from work list */ > > - spin_lock_irqsave(&cstate->work_lock, flags); > > - list_del(&rctx->waiter); > > - spin_unlock_irqrestore(&cstate->work_lock, flags); > > - > > - if (irqs_disabled()) > > - rctx->complete(&req->base, err); > > - else { > > - local_bh_disable(); > > - rctx->complete(&req->base, err); > > - local_bh_enable(); > > - } > > - > > - /* check to see if there are other jobs that are done */ > > - sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate); > > - while (sha_ctx) { > > - req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx); > > - ret = sha_finish_walk(&req_ctx, cstate, false); > > - if (req_ctx) { > > - spin_lock_irqsave(&cstate->work_lock, flags); > > - list_del(&req_ctx->waiter); > > - spin_unlock_irqrestore(&cstate->work_lock, flags); > > - > > - req = cast_mcryptd_ctx_to_req(req_ctx); > > - if (irqs_disabled()) > > - req_ctx->complete(&req->base, ret); > > - else { > > - local_bh_disable(); > > - req_ctx->complete(&req->base, ret); > > - local_bh_enable(); > > - } > > - } > > - sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate); > > - } > > - > > - return 0; > > -} > > - > > -static void sha512_mb_add_list(struct mcryptd_hash_request_ctx *rctx, > > - struct mcryptd_alg_cstate *cstate) > > -{ > > - unsigned long next_flush; > > - unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL); > > - unsigned long flags; > > - > > - /* initialize tag */ > > - rctx->tag.arrival = jiffies; /* tag the arrival time */ > > - rctx->tag.seq_num = cstate->next_seq_num++; > > - next_flush = rctx->tag.arrival + delay; > > - rctx->tag.expire = next_flush; > > - > > - spin_lock_irqsave(&cstate->work_lock, flags); > > - list_add_tail(&rctx->waiter, &cstate->work_list); > > - spin_unlock_irqrestore(&cstate->work_lock, flags); > > - > > - mcryptd_arm_flusher(cstate, delay); > > -} > > - > > -static int sha512_mb_update(struct ahash_request *areq) > > -{ > > - struct mcryptd_hash_request_ctx *rctx = > > - container_of(areq, struct mcryptd_hash_request_ctx, > > - areq); > > - struct mcryptd_alg_cstate *cstate = > > - this_cpu_ptr(sha512_mb_alg_state.alg_cstate); > > - > > - struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); > > - struct sha512_hash_ctx *sha_ctx; > > - int ret = 0, nbytes; > > - > > - > > - /* sanity check */ > > - if (rctx->tag.cpu != smp_processor_id()) { > > - pr_err("mcryptd error: cpu clash\n"); > > - goto done; > > - } > > - > > - /* need to init context */ > > - req_ctx_init(rctx, areq); > > - > > - nbytes = crypto_ahash_walk_first(req, &rctx->walk); > > - > > - if (nbytes < 0) { > > - ret = nbytes; > > - goto done; > > - } > > - > > - if (crypto_ahash_walk_last(&rctx->walk)) > > - rctx->flag |= HASH_DONE; > > - > > - /* submit */ > > - sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq); > > - sha512_mb_add_list(rctx, cstate); > > - kernel_fpu_begin(); > > - sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, rctx->walk.data, > > - nbytes, HASH_UPDATE); > > - kernel_fpu_end(); > > - > > - /* check if anything is returned */ > > - if (!sha_ctx) > > - return -EINPROGRESS; > > - > > - if (sha_ctx->error) { > > - ret = sha_ctx->error; > > - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); > > - goto done; > > - } > > - > > - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); > > - ret = sha_finish_walk(&rctx, cstate, false); > > - > > - if (!rctx) > > - return -EINPROGRESS; > > -done: > > - sha_complete_job(rctx, cstate, ret); > > - return ret; > > -} > > - > > -static int sha512_mb_finup(struct ahash_request *areq) > > -{ > > - struct mcryptd_hash_request_ctx *rctx = > > - container_of(areq, struct mcryptd_hash_request_ctx, > > - areq); > > - struct mcryptd_alg_cstate *cstate = > > - this_cpu_ptr(sha512_mb_alg_state.alg_cstate); > > - > > - struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); > > - struct sha512_hash_ctx *sha_ctx; > > - int ret = 0, flag = HASH_UPDATE, nbytes; > > - > > - /* sanity check */ > > - if (rctx->tag.cpu != smp_processor_id()) { > > - pr_err("mcryptd error: cpu clash\n"); > > - goto done; > > - } > > - > > - /* need to init context */ > > - req_ctx_init(rctx, areq); > > - > > - nbytes = crypto_ahash_walk_first(req, &rctx->walk); > > - > > - if (nbytes < 0) { > > - ret = nbytes; > > - goto done; > > - } > > - > > - if (crypto_ahash_walk_last(&rctx->walk)) { > > - rctx->flag |= HASH_DONE; > > - flag = HASH_LAST; > > - } > > - > > - /* submit */ > > - rctx->flag |= HASH_FINAL; > > - sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq); > > - sha512_mb_add_list(rctx, cstate); > > - > > - kernel_fpu_begin(); > > - sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, rctx->walk.data, > > - nbytes, flag); > > - kernel_fpu_end(); > > - > > - /* check if anything is returned */ > > - if (!sha_ctx) > > - return -EINPROGRESS; > > - > > - if (sha_ctx->error) { > > - ret = sha_ctx->error; > > - goto done; > > - } > > - > > - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); > > - ret = sha_finish_walk(&rctx, cstate, false); > > - if (!rctx) > > - return -EINPROGRESS; > > -done: > > - sha_complete_job(rctx, cstate, ret); > > - return ret; > > -} > > - > > -static int sha512_mb_final(struct ahash_request *areq) > > -{ > > - struct mcryptd_hash_request_ctx *rctx = > > - container_of(areq, struct mcryptd_hash_request_ctx, > > - areq); > > - struct mcryptd_alg_cstate *cstate = > > - this_cpu_ptr(sha512_mb_alg_state.alg_cstate); > > - > > - struct sha512_hash_ctx *sha_ctx; > > - int ret = 0; > > - u8 data; > > - > > - /* sanity check */ > > - if (rctx->tag.cpu != smp_processor_id()) { > > - pr_err("mcryptd error: cpu clash\n"); > > - goto done; > > - } > > - > > - /* need to init context */ > > - req_ctx_init(rctx, areq); > > - > > - rctx->flag |= HASH_DONE | HASH_FINAL; > > - > > - sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq); > > - /* flag HASH_FINAL and 0 data size */ > > - sha512_mb_add_list(rctx, cstate); > > - kernel_fpu_begin(); > > - sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, &data, 0, HASH_LAST); > > - kernel_fpu_end(); > > - > > - /* check if anything is returned */ > > - if (!sha_ctx) > > - return -EINPROGRESS; > > - > > - if (sha_ctx->error) { > > - ret = sha_ctx->error; > > - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); > > - goto done; > > - } > > - > > - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); > > - ret = sha_finish_walk(&rctx, cstate, false); > > - if (!rctx) > > - return -EINPROGRESS; > > -done: > > - sha_complete_job(rctx, cstate, ret); > > - return ret; > > -} > > - > > -static int sha512_mb_export(struct ahash_request *areq, void *out) > > -{ > > - struct sha512_hash_ctx *sctx = ahash_request_ctx(areq); > > - > > - memcpy(out, sctx, sizeof(*sctx)); > > - > > - return 0; > > -} > > - > > -static int sha512_mb_import(struct ahash_request *areq, const void *in) > > -{ > > - struct sha512_hash_ctx *sctx = ahash_request_ctx(areq); > > - > > - memcpy(sctx, in, sizeof(*sctx)); > > - > > - return 0; > > -} > > - > > -static int sha512_mb_async_init_tfm(struct crypto_tfm *tfm) > > -{ > > - struct mcryptd_ahash *mcryptd_tfm; > > - struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm); > > - struct mcryptd_hash_ctx *mctx; > > - > > - mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha512-mb", > > - CRYPTO_ALG_INTERNAL, > > - CRYPTO_ALG_INTERNAL); > > - if (IS_ERR(mcryptd_tfm)) > > - return PTR_ERR(mcryptd_tfm); > > - mctx = crypto_ahash_ctx(&mcryptd_tfm->base); > > - mctx->alg_state = &sha512_mb_alg_state; > > - ctx->mcryptd_tfm = mcryptd_tfm; > > - crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), > > - sizeof(struct ahash_request) + > > - crypto_ahash_reqsize(&mcryptd_tfm->base)); > > - > > - return 0; > > -} > > - > > -static void sha512_mb_async_exit_tfm(struct crypto_tfm *tfm) > > -{ > > - struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm); > > - > > - mcryptd_free_ahash(ctx->mcryptd_tfm); > > -} > > - > > -static int sha512_mb_areq_init_tfm(struct crypto_tfm *tfm) > > -{ > > - crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), > > - sizeof(struct ahash_request) + > > - sizeof(struct sha512_hash_ctx)); > > - > > - return 0; > > -} > > - > > -static void sha512_mb_areq_exit_tfm(struct crypto_tfm *tfm) > > -{ > > - struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm); > > - > > - mcryptd_free_ahash(ctx->mcryptd_tfm); > > -} > > - > > -static struct ahash_alg sha512_mb_areq_alg = { > > - .init = sha512_mb_init, > > - .update = sha512_mb_update, > > - .final = sha512_mb_final, > > - .finup = sha512_mb_finup, > > - .export = sha512_mb_export, > > - .import = sha512_mb_import, > > - .halg = { > > - .digestsize = SHA512_DIGEST_SIZE, > > - .statesize = sizeof(struct sha512_hash_ctx), > > - .base = { > > - .cra_name = "__sha512-mb", > > - .cra_driver_name = "__intel_sha512-mb", > > - .cra_priority = 100, > > - /* > > - * use ASYNC flag as some buffers in multi-buffer > > - * algo may not have completed before hashing thread > > - * sleep > > - */ > > - .cra_flags = CRYPTO_ALG_ASYNC | > > - CRYPTO_ALG_INTERNAL, > > - .cra_blocksize = SHA512_BLOCK_SIZE, > > - .cra_module = THIS_MODULE, > > - .cra_list = LIST_HEAD_INIT > > - (sha512_mb_areq_alg.halg.base.cra_list), > > - .cra_init = sha512_mb_areq_init_tfm, > > - .cra_exit = sha512_mb_areq_exit_tfm, > > - .cra_ctxsize = sizeof(struct sha512_hash_ctx), > > - } > > - } > > -}; > > - > > -static int sha512_mb_async_init(struct ahash_request *req) > > -{ > > - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); > > - struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm); > > - struct ahash_request *mcryptd_req = ahash_request_ctx(req); > > - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; > > - > > - memcpy(mcryptd_req, req, sizeof(*req)); > > - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); > > - return crypto_ahash_init(mcryptd_req); > > -} > > - > > -static int sha512_mb_async_update(struct ahash_request *req) > > -{ > > - struct ahash_request *mcryptd_req = ahash_request_ctx(req); > > - > > - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); > > - struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm); > > - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; > > - > > - memcpy(mcryptd_req, req, sizeof(*req)); > > - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); > > - return crypto_ahash_update(mcryptd_req); > > -} > > - > > -static int sha512_mb_async_finup(struct ahash_request *req) > > -{ > > - struct ahash_request *mcryptd_req = ahash_request_ctx(req); > > - > > - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); > > - struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm); > > - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; > > - > > - memcpy(mcryptd_req, req, sizeof(*req)); > > - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); > > - return crypto_ahash_finup(mcryptd_req); > > -} > > - > > -static int sha512_mb_async_final(struct ahash_request *req) > > -{ > > - struct ahash_request *mcryptd_req = ahash_request_ctx(req); > > - > > - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); > > - struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm); > > - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; > > - > > - memcpy(mcryptd_req, req, sizeof(*req)); > > - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); > > - return crypto_ahash_final(mcryptd_req); > > -} > > - > > -static int sha512_mb_async_digest(struct ahash_request *req) > > -{ > > - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); > > - struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm); > > - struct ahash_request *mcryptd_req = ahash_request_ctx(req); > > - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; > > - > > - memcpy(mcryptd_req, req, sizeof(*req)); > > - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); > > - return crypto_ahash_digest(mcryptd_req); > > -} > > - > > -static int sha512_mb_async_export(struct ahash_request *req, void *out) > > -{ > > - struct ahash_request *mcryptd_req = ahash_request_ctx(req); > > - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); > > - struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm); > > - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; > > - > > - memcpy(mcryptd_req, req, sizeof(*req)); > > - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); > > - return crypto_ahash_export(mcryptd_req, out); > > -} > > - > > -static int sha512_mb_async_import(struct ahash_request *req, const void *in) > > -{ > > - struct ahash_request *mcryptd_req = ahash_request_ctx(req); > > - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); > > - struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm); > > - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; > > - struct crypto_ahash *child = mcryptd_ahash_child(mcryptd_tfm); > > - struct mcryptd_hash_request_ctx *rctx; > > - struct ahash_request *areq; > > - > > - memcpy(mcryptd_req, req, sizeof(*req)); > > - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); > > - rctx = ahash_request_ctx(mcryptd_req); > > - > > - areq = &rctx->areq; > > - > > - ahash_request_set_tfm(areq, child); > > - ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_SLEEP, > > - rctx->complete, req); > > - > > - return crypto_ahash_import(mcryptd_req, in); > > -} > > - > > -static struct ahash_alg sha512_mb_async_alg = { > > - .init = sha512_mb_async_init, > > - .update = sha512_mb_async_update, > > - .final = sha512_mb_async_final, > > - .finup = sha512_mb_async_finup, > > - .digest = sha512_mb_async_digest, > > - .export = sha512_mb_async_export, > > - .import = sha512_mb_async_import, > > - .halg = { > > - .digestsize = SHA512_DIGEST_SIZE, > > - .statesize = sizeof(struct sha512_hash_ctx), > > - .base = { > > - .cra_name = "sha512", > > - .cra_driver_name = "sha512_mb", > > - /* > > - * Low priority, since with few concurrent hash requests > > - * this is extremely slow due to the flush delay. Users > > - * whose workloads would benefit from this can request > > - * it explicitly by driver name, or can increase its > > - * priority at runtime using NETLINK_CRYPTO. > > - */ > > - .cra_priority = 50, > > - .cra_flags = CRYPTO_ALG_ASYNC, > > - .cra_blocksize = SHA512_BLOCK_SIZE, > > - .cra_module = THIS_MODULE, > > - .cra_list = LIST_HEAD_INIT > > - (sha512_mb_async_alg.halg.base.cra_list), > > - .cra_init = sha512_mb_async_init_tfm, > > - .cra_exit = sha512_mb_async_exit_tfm, > > - .cra_ctxsize = sizeof(struct sha512_mb_ctx), > > - .cra_alignmask = 0, > > - }, > > - }, > > -}; > > - > > -static unsigned long sha512_mb_flusher(struct mcryptd_alg_cstate *cstate) > > -{ > > - struct mcryptd_hash_request_ctx *rctx; > > - unsigned long cur_time; > > - unsigned long next_flush = 0; > > - struct sha512_hash_ctx *sha_ctx; > > - > > - > > - cur_time = jiffies; > > - > > - while (!list_empty(&cstate->work_list)) { > > - rctx = list_entry(cstate->work_list.next, > > - struct mcryptd_hash_request_ctx, waiter); > > - if time_before(cur_time, rctx->tag.expire) > > - break; > > - kernel_fpu_begin(); > > - sha_ctx = (struct sha512_hash_ctx *) > > - sha512_ctx_mgr_flush(cstate); > > - kernel_fpu_end(); > > - if (!sha_ctx) { > > - pr_err("sha512_mb error: nothing got flushed for" > > - " non-empty list\n"); > > - break; > > - } > > - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); > > - sha_finish_walk(&rctx, cstate, true); > > - sha_complete_job(rctx, cstate, 0); > > - } > > - > > - if (!list_empty(&cstate->work_list)) { > > - rctx = list_entry(cstate->work_list.next, > > - struct mcryptd_hash_request_ctx, waiter); > > - /* get the hash context and then flush time */ > > - next_flush = rctx->tag.expire; > > - mcryptd_arm_flusher(cstate, get_delay(next_flush)); > > - } > > - return next_flush; > > -} > > - > > -static int __init sha512_mb_mod_init(void) > > -{ > > - > > - int cpu; > > - int err; > > - struct mcryptd_alg_cstate *cpu_state; > > - > > - /* check for dependent cpu features */ > > - if (!boot_cpu_has(X86_FEATURE_AVX2) || > > - !boot_cpu_has(X86_FEATURE_BMI2)) > > - return -ENODEV; > > - > > - /* initialize multibuffer structures */ > > - sha512_mb_alg_state.alg_cstate = > > - alloc_percpu(struct mcryptd_alg_cstate); > > - > > - sha512_job_mgr_init = sha512_mb_mgr_init_avx2; > > - sha512_job_mgr_submit = sha512_mb_mgr_submit_avx2; > > - sha512_job_mgr_flush = sha512_mb_mgr_flush_avx2; > > - sha512_job_mgr_get_comp_job = sha512_mb_mgr_get_comp_job_avx2; > > - > > - if (!sha512_mb_alg_state.alg_cstate) > > - return -ENOMEM; > > - for_each_possible_cpu(cpu) { > > - cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu); > > - cpu_state->next_flush = 0; > > - cpu_state->next_seq_num = 0; > > - cpu_state->flusher_engaged = false; > > - INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher); > > - cpu_state->cpu = cpu; > > - cpu_state->alg_state = &sha512_mb_alg_state; > > - cpu_state->mgr = kzalloc(sizeof(struct sha512_ctx_mgr), > > - GFP_KERNEL); > > - if (!cpu_state->mgr) > > - goto err2; > > - sha512_ctx_mgr_init(cpu_state->mgr); > > - INIT_LIST_HEAD(&cpu_state->work_list); > > - spin_lock_init(&cpu_state->work_lock); > > - } > > - sha512_mb_alg_state.flusher = &sha512_mb_flusher; > > - > > - err = crypto_register_ahash(&sha512_mb_areq_alg); > > - if (err) > > - goto err2; > > - err = crypto_register_ahash(&sha512_mb_async_alg); > > - if (err) > > - goto err1; > > - > > - > > - return 0; > > -err1: > > - crypto_unregister_ahash(&sha512_mb_areq_alg); > > -err2: > > - for_each_possible_cpu(cpu) { > > - cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu); > > - kfree(cpu_state->mgr); > > - } > > - free_percpu(sha512_mb_alg_state.alg_cstate); > > - return -ENODEV; > > -} > > - > > -static void __exit sha512_mb_mod_fini(void) > > -{ > > - int cpu; > > - struct mcryptd_alg_cstate *cpu_state; > > - > > - crypto_unregister_ahash(&sha512_mb_async_alg); > > - crypto_unregister_ahash(&sha512_mb_areq_alg); > > - for_each_possible_cpu(cpu) { > > - cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu); > > - kfree(cpu_state->mgr); > > - } > > - free_percpu(sha512_mb_alg_state.alg_cstate); > > -} > > - > > -module_init(sha512_mb_mod_init); > > -module_exit(sha512_mb_mod_fini); > > - > > -MODULE_LICENSE("GPL"); > > -MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, multi buffer accelerated"); > > - > > -MODULE_ALIAS("sha512"); > > diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h b/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h > > deleted file mode 100644 > > index e5c465bd821e..000000000000 > > --- a/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h > > +++ /dev/null > > @@ -1,128 +0,0 @@ > > -/* > > - * Header file for multi buffer SHA512 context > > - * > > - * This file is provided under a dual BSD/GPLv2 license. When using or > > - * redistributing this file, you may do so under either license. > > - * > > - * GPL LICENSE SUMMARY > > - * > > - * Copyright(c) 2016 Intel Corporation. > > - * > > - * This program is free software; you can redistribute it and/or modify > > - * it under the terms of version 2 of the GNU General Public License as > > - * published by the Free Software Foundation. > > - * > > - * This program is distributed in the hope that it will be useful, but > > - * WITHOUT ANY WARRANTY; without even the implied warranty of > > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > - * General Public License for more details. > > - * > > - * Contact Information: > > - * Megha Dey <megha.dey@xxxxxxxxxxxxxxx> > > - * > > - * BSD LICENSE > > - * > > - * Copyright(c) 2016 Intel Corporation. > > - * > > - * Redistribution and use in source and binary forms, with or without > > - * modification, are permitted provided that the following conditions > > - * are met: > > - * > > - * * Redistributions of source code must retain the above copyright > > - * notice, this list of conditions and the following disclaimer. > > - * * Redistributions in binary form must reproduce the above copyright > > - * notice, this list of conditions and the following disclaimer in > > - * the documentation and/or other materials provided with the > > - * distribution. > > - * * Neither the name of Intel Corporation nor the names of its > > - * contributors may be used to endorse or promote products derived > > - * from this software without specific prior written permission. > > - * > > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > > - */ > > - > > -#ifndef _SHA_MB_CTX_INTERNAL_H > > -#define _SHA_MB_CTX_INTERNAL_H > > - > > -#include "sha512_mb_mgr.h" > > - > > -#define HASH_UPDATE 0x00 > > -#define HASH_LAST 0x01 > > -#define HASH_DONE 0x02 > > -#define HASH_FINAL 0x04 > > - > > -#define HASH_CTX_STS_IDLE 0x00 > > -#define HASH_CTX_STS_PROCESSING 0x01 > > -#define HASH_CTX_STS_LAST 0x02 > > -#define HASH_CTX_STS_COMPLETE 0x04 > > - > > -enum hash_ctx_error { > > - HASH_CTX_ERROR_NONE = 0, > > - HASH_CTX_ERROR_INVALID_FLAGS = -1, > > - HASH_CTX_ERROR_ALREADY_PROCESSING = -2, > > - HASH_CTX_ERROR_ALREADY_COMPLETED = -3, > > -}; > > - > > -#define hash_ctx_user_data(ctx) ((ctx)->user_data) > > -#define hash_ctx_digest(ctx) ((ctx)->job.result_digest) > > -#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING) > > -#define hash_ctx_complete(ctx) ((ctx)->status == HASH_CTX_STS_COMPLETE) > > -#define hash_ctx_status(ctx) ((ctx)->status) > > -#define hash_ctx_error(ctx) ((ctx)->error) > > -#define hash_ctx_init(ctx) \ > > - do { \ > > - (ctx)->error = HASH_CTX_ERROR_NONE; \ > > - (ctx)->status = HASH_CTX_STS_COMPLETE; \ > > - } while (0) > > - > > -/* Hash Constants and Typedefs */ > > -#define SHA512_DIGEST_LENGTH 8 > > -#define SHA512_LOG2_BLOCK_SIZE 7 > > - > > -#define SHA512_PADLENGTHFIELD_SIZE 16 > > - > > -#ifdef SHA_MB_DEBUG > > -#define assert(expr) \ > > -do { \ > > - if (unlikely(!(expr))) { \ > > - printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \ > > - #expr, __FILE__, __func__, __LINE__); \ > > - } \ > > -} while (0) > > -#else > > -#define assert(expr) do {} while (0) > > -#endif > > - > > -struct sha512_ctx_mgr { > > - struct sha512_mb_mgr mgr; > > -}; > > - > > -/* typedef struct sha512_ctx_mgr sha512_ctx_mgr; */ > > - > > -struct sha512_hash_ctx { > > - /* Must be at struct offset 0 */ > > - struct job_sha512 job; > > - /* status flag */ > > - int status; > > - /* error flag */ > > - int error; > > - > > - uint64_t total_length; > > - const void *incoming_buffer; > > - uint32_t incoming_buffer_length; > > - uint8_t partial_block_buffer[SHA512_BLOCK_SIZE * 2]; > > - uint32_t partial_block_buffer_length; > > - void *user_data; > > -}; > > - > > -#endif > > diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h b/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h > > deleted file mode 100644 > > index 178f17eef382..000000000000 > > --- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h > > +++ /dev/null > > @@ -1,104 +0,0 @@ > > -/* > > - * Header file for multi buffer SHA512 algorithm manager > > - * > > - * This file is provided under a dual BSD/GPLv2 license. When using or > > - * redistributing this file, you may do so under either license. > > - * > > - * GPL LICENSE SUMMARY > > - * > > - * Copyright(c) 2016 Intel Corporation. > > - * > > - * This program is free software; you can redistribute it and/or modify > > - * it under the terms of version 2 of the GNU General Public License as > > - * published by the Free Software Foundation. > > - * > > - * This program is distributed in the hope that it will be useful, but > > - * WITHOUT ANY WARRANTY; without even the implied warranty of > > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > - * General Public License for more details. > > - * > > - * Contact Information: > > - * Megha Dey <megha.dey@xxxxxxxxxxxxxxx> > > - * > > - * BSD LICENSE > > - * > > - * Copyright(c) 2016 Intel Corporation. > > - * > > - * Redistribution and use in source and binary forms, with or without > > - * modification, are permitted provided that the following conditions > > - * are met: > > - * > > - * * Redistributions of source code must retain the above copyright > > - * notice, this list of conditions and the following disclaimer. > > - * * Redistributions in binary form must reproduce the above copyright > > - * notice, this list of conditions and the following disclaimer in > > - * the documentation and/or other materials provided with the > > - * distribution. > > - * * Neither the name of Intel Corporation nor the names of its > > - * contributors may be used to endorse or promote products derived > > - * from this software without specific prior written permission. > > - * > > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > > - */ > > - > > -#ifndef __SHA_MB_MGR_H > > -#define __SHA_MB_MGR_H > > - > > -#include <linux/types.h> > > - > > -#define NUM_SHA512_DIGEST_WORDS 8 > > - > > -enum job_sts {STS_UNKNOWN = 0, > > - STS_BEING_PROCESSED = 1, > > - STS_COMPLETED = 2, > > - STS_INTERNAL_ERROR = 3, > > - STS_ERROR = 4 > > -}; > > - > > -struct job_sha512 { > > - u8 *buffer; > > - u64 len; > > - u64 result_digest[NUM_SHA512_DIGEST_WORDS] __aligned(32); > > - enum job_sts status; > > - void *user_data; > > -}; > > - > > -struct sha512_args_x4 { > > - uint64_t digest[8][4]; > > - uint8_t *data_ptr[4]; > > -}; > > - > > -struct sha512_lane_data { > > - struct job_sha512 *job_in_lane; > > -}; > > - > > -struct sha512_mb_mgr { > > - struct sha512_args_x4 args; > > - > > - uint64_t lens[4]; > > - > > - /* each byte is index (0...7) of unused lanes */ > > - uint64_t unused_lanes; > > - /* byte 4 is set to FF as a flag */ > > - struct sha512_lane_data ldata[4]; > > -}; > > - > > -#define SHA512_MB_MGR_NUM_LANES_AVX2 4 > > - > > -void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state); > > -struct job_sha512 *sha512_mb_mgr_submit_avx2(struct sha512_mb_mgr *state, > > - struct job_sha512 *job); > > -struct job_sha512 *sha512_mb_mgr_flush_avx2(struct sha512_mb_mgr *state); > > -struct job_sha512 *sha512_mb_mgr_get_comp_job_avx2(struct sha512_mb_mgr *state); > > - > > -#endif > > diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S > > deleted file mode 100644 > > index cf2636d4c9ba..000000000000 > > --- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S > > +++ /dev/null > > @@ -1,281 +0,0 @@ > > -/* > > - * Header file for multi buffer SHA256 algorithm data structure > > - * > > - * This file is provided under a dual BSD/GPLv2 license. When using or > > - * redistributing this file, you may do so under either license. > > - * > > - * GPL LICENSE SUMMARY > > - * > > - * Copyright(c) 2016 Intel Corporation. > > - * > > - * This program is free software; you can redistribute it and/or modify > > - * it under the terms of version 2 of the GNU General Public License as > > - * published by the Free Software Foundation. > > - * > > - * This program is distributed in the hope that it will be useful, but > > - * WITHOUT ANY WARRANTY; without even the implied warranty of > > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > - * General Public License for more details. > > - * > > - * Contact Information: > > - * Megha Dey <megha.dey@xxxxxxxxxxxxxxx> > > - * > > - * BSD LICENSE > > - * > > - * Copyright(c) 2016 Intel Corporation. > > - * > > - * Redistribution and use in source and binary forms, with or without > > - * modification, are permitted provided that the following conditions > > - * are met: > > - * > > - * * Redistributions of source code must retain the above copyright > > - * notice, this list of conditions and the following disclaimer. > > - * * Redistributions in binary form must reproduce the above copyright > > - * notice, this list of conditions and the following disclaimer in > > - * the documentation and/or other materials provided with the > > - * distribution. > > - * * Neither the name of Intel Corporation nor the names of its > > - * contributors may be used to endorse or promote products derived > > - * from this software without specific prior written permission. > > - * > > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > > - */ > > - > > -# Macros for defining data structures > > - > > -# Usage example > > - > > -#START_FIELDS # JOB_AES > > -### name size align > > -#FIELD _plaintext, 8, 8 # pointer to plaintext > > -#FIELD _ciphertext, 8, 8 # pointer to ciphertext > > -#FIELD _IV, 16, 8 # IV > > -#FIELD _keys, 8, 8 # pointer to keys > > -#FIELD _len, 4, 4 # length in bytes > > -#FIELD _status, 4, 4 # status enumeration > > -#FIELD _user_data, 8, 8 # pointer to user data > > -#UNION _union, size1, align1, \ > > -# size2, align2, \ > > -# size3, align3, \ > > -# ... > > -#END_FIELDS > > -#%assign _JOB_AES_size _FIELD_OFFSET > > -#%assign _JOB_AES_align _STRUCT_ALIGN > > - > > -######################################################################### > > - > > -# Alternate "struc-like" syntax: > > -# STRUCT job_aes2 > > -# RES_Q .plaintext, 1 > > -# RES_Q .ciphertext, 1 > > -# RES_DQ .IV, 1 > > -# RES_B .nested, _JOB_AES_SIZE, _JOB_AES_ALIGN > > -# RES_U .union, size1, align1, \ > > -# size2, align2, \ > > -# ... > > -# ENDSTRUCT > > -# # Following only needed if nesting > > -# %assign job_aes2_size _FIELD_OFFSET > > -# %assign job_aes2_align _STRUCT_ALIGN > > -# > > -# RES_* macros take a name, a count and an optional alignment. > > -# The count in in terms of the base size of the macro, and the > > -# default alignment is the base size. > > -# The macros are: > > -# Macro Base size > > -# RES_B 1 > > -# RES_W 2 > > -# RES_D 4 > > -# RES_Q 8 > > -# RES_DQ 16 > > -# RES_Y 32 > > -# RES_Z 64 > > -# > > -# RES_U defines a union. It's arguments are a name and two or more > > -# pairs of "size, alignment" > > -# > > -# The two assigns are only needed if this structure is being nested > > -# within another. Even if the assigns are not done, one can still use > > -# STRUCT_NAME_size as the size of the structure. > > -# > > -# Note that for nesting, you still need to assign to STRUCT_NAME_size. > > -# > > -# The differences between this and using "struc" directly are that each > > -# type is implicitly aligned to its natural length (although this can be > > -# over-ridden with an explicit third parameter), and that the structure > > -# is padded at the end to its overall alignment. > > -# > > - > > -######################################################################### > > - > > -#ifndef _DATASTRUCT_ASM_ > > -#define _DATASTRUCT_ASM_ > > - > > -#define PTR_SZ 8 > > -#define SHA512_DIGEST_WORD_SIZE 8 > > -#define SHA512_MB_MGR_NUM_LANES_AVX2 4 > > -#define NUM_SHA512_DIGEST_WORDS 8 > > -#define SZ4 4*SHA512_DIGEST_WORD_SIZE > > -#define ROUNDS 80*SZ4 > > -#define SHA512_DIGEST_ROW_SIZE (SHA512_MB_MGR_NUM_LANES_AVX2 * 8) > > - > > -# START_FIELDS > > -.macro START_FIELDS > > - _FIELD_OFFSET = 0 > > - _STRUCT_ALIGN = 0 > > -.endm > > - > > -# FIELD name size align > > -.macro FIELD name size align > > - _FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1)) > > - \name = _FIELD_OFFSET > > - _FIELD_OFFSET = _FIELD_OFFSET + (\size) > > -.if (\align > _STRUCT_ALIGN) > > - _STRUCT_ALIGN = \align > > -.endif > > -.endm > > - > > -# END_FIELDS > > -.macro END_FIELDS > > - _FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1)) > > -.endm > > - > > -.macro STRUCT p1 > > -START_FIELDS > > -.struc \p1 > > -.endm > > - > > -.macro ENDSTRUCT > > - tmp = _FIELD_OFFSET > > - END_FIELDS > > - tmp = (_FIELD_OFFSET - ##tmp) > > -.if (tmp > 0) > > - .lcomm tmp > > -.endm > > - > > -## RES_int name size align > > -.macro RES_int p1 p2 p3 > > - name = \p1 > > - size = \p2 > > - align = .\p3 > > - > > - _FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1)) > > -.align align > > -.lcomm name size > > - _FIELD_OFFSET = _FIELD_OFFSET + (size) > > -.if (align > _STRUCT_ALIGN) > > - _STRUCT_ALIGN = align > > -.endif > > -.endm > > - > > -# macro RES_B name, size [, align] > > -.macro RES_B _name, _size, _align=1 > > -RES_int _name _size _align > > -.endm > > - > > -# macro RES_W name, size [, align] > > -.macro RES_W _name, _size, _align=2 > > -RES_int _name 2*(_size) _align > > -.endm > > - > > -# macro RES_D name, size [, align] > > -.macro RES_D _name, _size, _align=4 > > -RES_int _name 4*(_size) _align > > -.endm > > - > > -# macro RES_Q name, size [, align] > > -.macro RES_Q _name, _size, _align=8 > > -RES_int _name 8*(_size) _align > > -.endm > > - > > -# macro RES_DQ name, size [, align] > > -.macro RES_DQ _name, _size, _align=16 > > -RES_int _name 16*(_size) _align > > -.endm > > - > > -# macro RES_Y name, size [, align] > > -.macro RES_Y _name, _size, _align=32 > > -RES_int _name 32*(_size) _align > > -.endm > > - > > -# macro RES_Z name, size [, align] > > -.macro RES_Z _name, _size, _align=64 > > -RES_int _name 64*(_size) _align > > -.endm > > - > > -#endif > > - > > -################################################################### > > -### Define SHA512 Out Of Order Data Structures > > -################################################################### > > - > > -START_FIELDS # LANE_DATA > > -### name size align > > -FIELD _job_in_lane, 8, 8 # pointer to job object > > -END_FIELDS > > - > > - _LANE_DATA_size = _FIELD_OFFSET > > - _LANE_DATA_align = _STRUCT_ALIGN > > - > > -#################################################################### > > - > > -START_FIELDS # SHA512_ARGS_X4 > > -### name size align > > -FIELD _digest, 8*8*4, 4 # transposed digest > > -FIELD _data_ptr, 8*4, 8 # array of pointers to data > > -END_FIELDS > > - > > - _SHA512_ARGS_X4_size = _FIELD_OFFSET > > - _SHA512_ARGS_X4_align = _STRUCT_ALIGN > > - > > -##################################################################### > > - > > -START_FIELDS # MB_MGR > > -### name size align > > -FIELD _args, _SHA512_ARGS_X4_size, _SHA512_ARGS_X4_align > > -FIELD _lens, 8*4, 8 > > -FIELD _unused_lanes, 8, 8 > > -FIELD _ldata, _LANE_DATA_size*4, _LANE_DATA_align > > -END_FIELDS > > - > > - _MB_MGR_size = _FIELD_OFFSET > > - _MB_MGR_align = _STRUCT_ALIGN > > - > > -_args_digest = _args + _digest > > -_args_data_ptr = _args + _data_ptr > > - > > -####################################################################### > > - > > -####################################################################### > > -#### Define constants > > -####################################################################### > > - > > -#define STS_UNKNOWN 0 > > -#define STS_BEING_PROCESSED 1 > > -#define STS_COMPLETED 2 > > - > > -####################################################################### > > -#### Define JOB_SHA512 structure > > -####################################################################### > > - > > -START_FIELDS # JOB_SHA512 > > -### name size align > > -FIELD _buffer, 8, 8 # pointer to buffer > > -FIELD _len, 8, 8 # length in bytes > > -FIELD _result_digest, 8*8, 32 # Digest (output) > > -FIELD _status, 4, 4 > > -FIELD _user_data, 8, 8 > > -END_FIELDS > > - > > - _JOB_SHA512_size = _FIELD_OFFSET > > - _JOB_SHA512_align = _STRUCT_ALIGN > > diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S > > deleted file mode 100644 > > index 7c629caebc05..000000000000 > > --- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S > > +++ /dev/null > > @@ -1,297 +0,0 @@ > > -/* > > - * Flush routine for SHA512 multibuffer > > - * > > - * This file is provided under a dual BSD/GPLv2 license. When using or > > - * redistributing this file, you may do so under either license. > > - * > > - * GPL LICENSE SUMMARY > > - * > > - * Copyright(c) 2016 Intel Corporation. > > - * > > - * This program is free software; you can redistribute it and/or modify > > - * it under the terms of version 2 of the GNU General Public License as > > - * published by the Free Software Foundation. > > - * > > - * This program is distributed in the hope that it will be useful, but > > - * WITHOUT ANY WARRANTY; without even the implied warranty of > > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > - * General Public License for more details. > > - * > > - * Contact Information: > > - * Megha Dey <megha.dey@xxxxxxxxxxxxxxx> > > - * > > - * BSD LICENSE > > - * > > - * Copyright(c) 2016 Intel Corporation. > > - * > > - * Redistribution and use in source and binary forms, with or without > > - * modification, are permitted provided that the following conditions > > - * are met: > > - * > > - * * Redistributions of source code must retain the above copyright > > - * notice, this list of conditions and the following disclaimer. > > - * * Redistributions in binary form must reproduce the above copyright > > - * notice, this list of conditions and the following disclaimer in > > - * the documentation and/or other materials provided with the > > - * distribution. > > - * * Neither the name of Intel Corporation nor the names of its > > - * contributors may be used to endorse or promote products derived > > - * from this software without specific prior written permission. > > - * > > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > > - */ > > - > > -#include <linux/linkage.h> > > -#include <asm/frame.h> > > -#include "sha512_mb_mgr_datastruct.S" > > - > > -.extern sha512_x4_avx2 > > - > > -# LINUX register definitions > > -#define arg1 %rdi > > -#define arg2 %rsi > > - > > -# idx needs to be other than arg1, arg2, rbx, r12 > > -#define idx %rdx > > - > > -# Common definitions > > -#define state arg1 > > -#define job arg2 > > -#define len2 arg2 > > - > > -#define unused_lanes %rbx > > -#define lane_data %rbx > > -#define tmp2 %rbx > > - > > -#define job_rax %rax > > -#define tmp1 %rax > > -#define size_offset %rax > > -#define tmp %rax > > -#define start_offset %rax > > - > > -#define tmp3 arg1 > > - > > -#define extra_blocks arg2 > > -#define p arg2 > > - > > -#define tmp4 %r8 > > -#define lens0 %r8 > > - > > -#define lens1 %r9 > > -#define lens2 %r10 > > -#define lens3 %r11 > > - > > -.macro LABEL prefix n > > -\prefix\n\(): > > -.endm > > - > > -.macro JNE_SKIP i > > -jne skip_\i > > -.endm > > - > > -.altmacro > > -.macro SET_OFFSET _offset > > -offset = \_offset > > -.endm > > -.noaltmacro > > - > > -# JOB* sha512_mb_mgr_flush_avx2(MB_MGR *state) > > -# arg 1 : rcx : state > > -ENTRY(sha512_mb_mgr_flush_avx2) > > - FRAME_BEGIN > > - push %rbx > > - > > - # If bit (32+3) is set, then all lanes are empty > > - mov _unused_lanes(state), unused_lanes > > - bt $32+7, unused_lanes > > - jc return_null > > - > > - # find a lane with a non-null job > > - xor idx, idx > > - offset = (_ldata + 1*_LANE_DATA_size + _job_in_lane) > > - cmpq $0, offset(state) > > - cmovne one(%rip), idx > > - offset = (_ldata + 2*_LANE_DATA_size + _job_in_lane) > > - cmpq $0, offset(state) > > - cmovne two(%rip), idx > > - offset = (_ldata + 3*_LANE_DATA_size + _job_in_lane) > > - cmpq $0, offset(state) > > - cmovne three(%rip), idx > > - > > - # copy idx to empty lanes > > -copy_lane_data: > > - offset = (_args + _data_ptr) > > - mov offset(state,idx,8), tmp > > - > > - I = 0 > > -.rep 4 > > - offset = (_ldata + I * _LANE_DATA_size + _job_in_lane) > > - cmpq $0, offset(state) > > -.altmacro > > - JNE_SKIP %I > > - offset = (_args + _data_ptr + 8*I) > > - mov tmp, offset(state) > > - offset = (_lens + 8*I +4) > > - movl $0xFFFFFFFF, offset(state) > > -LABEL skip_ %I > > - I = (I+1) > > -.noaltmacro > > -.endr > > - > > - # Find min length > > - mov _lens + 0*8(state),lens0 > > - mov lens0,idx > > - mov _lens + 1*8(state),lens1 > > - cmp idx,lens1 > > - cmovb lens1,idx > > - mov _lens + 2*8(state),lens2 > > - cmp idx,lens2 > > - cmovb lens2,idx > > - mov _lens + 3*8(state),lens3 > > - cmp idx,lens3 > > - cmovb lens3,idx > > - mov idx,len2 > > - and $0xF,idx > > - and $~0xFF,len2 > > - jz len_is_0 > > - > > - sub len2, lens0 > > - sub len2, lens1 > > - sub len2, lens2 > > - sub len2, lens3 > > - shr $32,len2 > > - mov lens0, _lens + 0*8(state) > > - mov lens1, _lens + 1*8(state) > > - mov lens2, _lens + 2*8(state) > > - mov lens3, _lens + 3*8(state) > > - > > - # "state" and "args" are the same address, arg1 > > - # len is arg2 > > - call sha512_x4_avx2 > > - # state and idx are intact > > - > > -len_is_0: > > - # process completed job "idx" > > - imul $_LANE_DATA_size, idx, lane_data > > - lea _ldata(state, lane_data), lane_data > > - > > - mov _job_in_lane(lane_data), job_rax > > - movq $0, _job_in_lane(lane_data) > > - movl $STS_COMPLETED, _status(job_rax) > > - mov _unused_lanes(state), unused_lanes > > - shl $8, unused_lanes > > - or idx, unused_lanes > > - mov unused_lanes, _unused_lanes(state) > > - > > - movl $0xFFFFFFFF, _lens+4(state, idx, 8) > > - > > - vmovq _args_digest+0*32(state, idx, 8), %xmm0 > > - vpinsrq $1, _args_digest+1*32(state, idx, 8), %xmm0, %xmm0 > > - vmovq _args_digest+2*32(state, idx, 8), %xmm1 > > - vpinsrq $1, _args_digest+3*32(state, idx, 8), %xmm1, %xmm1 > > - vmovq _args_digest+4*32(state, idx, 8), %xmm2 > > - vpinsrq $1, _args_digest+5*32(state, idx, 8), %xmm2, %xmm2 > > - vmovq _args_digest+6*32(state, idx, 8), %xmm3 > > - vpinsrq $1, _args_digest+7*32(state, idx, 8), %xmm3, %xmm3 > > - > > - vmovdqu %xmm0, _result_digest(job_rax) > > - vmovdqu %xmm1, _result_digest+1*16(job_rax) > > - vmovdqu %xmm2, _result_digest+2*16(job_rax) > > - vmovdqu %xmm3, _result_digest+3*16(job_rax) > > - > > -return: > > - pop %rbx > > - FRAME_END > > - ret > > - > > -return_null: > > - xor job_rax, job_rax > > - jmp return > > -ENDPROC(sha512_mb_mgr_flush_avx2) > > -.align 16 > > - > > -ENTRY(sha512_mb_mgr_get_comp_job_avx2) > > - push %rbx > > - > > - mov _unused_lanes(state), unused_lanes > > - bt $(32+7), unused_lanes > > - jc .return_null > > - > > - # Find min length > > - mov _lens(state),lens0 > > - mov lens0,idx > > - mov _lens+1*8(state),lens1 > > - cmp idx,lens1 > > - cmovb lens1,idx > > - mov _lens+2*8(state),lens2 > > - cmp idx,lens2 > > - cmovb lens2,idx > > - mov _lens+3*8(state),lens3 > > - cmp idx,lens3 > > - cmovb lens3,idx > > - test $~0xF,idx > > - jnz .return_null > > - and $0xF,idx > > - > > - #process completed job "idx" > > - imul $_LANE_DATA_size, idx, lane_data > > - lea _ldata(state, lane_data), lane_data > > - > > - mov _job_in_lane(lane_data), job_rax > > - movq $0, _job_in_lane(lane_data) > > - movl $STS_COMPLETED, _status(job_rax) > > - mov _unused_lanes(state), unused_lanes > > - shl $8, unused_lanes > > - or idx, unused_lanes > > - mov unused_lanes, _unused_lanes(state) > > - > > - movl $0xFFFFFFFF, _lens+4(state, idx, 8) > > - > > - vmovq _args_digest(state, idx, 8), %xmm0 > > - vpinsrq $1, _args_digest+1*32(state, idx, 8), %xmm0, %xmm0 > > - vmovq _args_digest+2*32(state, idx, 8), %xmm1 > > - vpinsrq $1, _args_digest+3*32(state, idx, 8), %xmm1, %xmm1 > > - vmovq _args_digest+4*32(state, idx, 8), %xmm2 > > - vpinsrq $1, _args_digest+5*32(state, idx, 8), %xmm2, %xmm2 > > - vmovq _args_digest+6*32(state, idx, 8), %xmm3 > > - vpinsrq $1, _args_digest+7*32(state, idx, 8), %xmm3, %xmm3 > > - > > - vmovdqu %xmm0, _result_digest+0*16(job_rax) > > - vmovdqu %xmm1, _result_digest+1*16(job_rax) > > - vmovdqu %xmm2, _result_digest+2*16(job_rax) > > - vmovdqu %xmm3, _result_digest+3*16(job_rax) > > - > > - pop %rbx > > - > > - ret > > - > > -.return_null: > > - xor job_rax, job_rax > > - pop %rbx > > - ret > > -ENDPROC(sha512_mb_mgr_get_comp_job_avx2) > > - > > -.section .rodata.cst8.one, "aM", @progbits, 8 > > -.align 8 > > -one: > > -.quad 1 > > - > > -.section .rodata.cst8.two, "aM", @progbits, 8 > > -.align 8 > > -two: > > -.quad 2 > > - > > -.section .rodata.cst8.three, "aM", @progbits, 8 > > -.align 8 > > -three: > > -.quad 3 > > diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c > > deleted file mode 100644 > > index d08805032f01..000000000000 > > --- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c > > +++ /dev/null > > @@ -1,69 +0,0 @@ > > -/* > > - * Initialization code for multi buffer SHA256 algorithm for AVX2 > > - * > > - * This file is provided under a dual BSD/GPLv2 license. When using or > > - * redistributing this file, you may do so under either license. > > - * > > - * GPL LICENSE SUMMARY > > - * > > - * Copyright(c) 2016 Intel Corporation. > > - * > > - * This program is free software; you can redistribute it and/or modify > > - * it under the terms of version 2 of the GNU General Public License as > > - * published by the Free Software Foundation. > > - * > > - * This program is distributed in the hope that it will be useful, but > > - * WITHOUT ANY WARRANTY; without even the implied warranty of > > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > - * General Public License for more details. > > - * > > - * Contact Information: > > - * Megha Dey <megha.dey@xxxxxxxxxxxxxxx> > > - * > > - * BSD LICENSE > > - * > > - * Copyright(c) 2016 Intel Corporation. > > - * > > - * Redistribution and use in source and binary forms, with or without > > - * modification, are permitted provided that the following conditions > > - * are met: > > - * > > - * * Redistributions of source code must retain the above copyright > > - * notice, this list of conditions and the following disclaimer. > > - * * Redistributions in binary form must reproduce the above copyright > > - * notice, this list of conditions and the following disclaimer in > > - * the documentation and/or other materials provided with the > > - * distribution. > > - * * Neither the name of Intel Corporation nor the names of its > > - * contributors may be used to endorse or promote products derived > > - * from this software without specific prior written permission. > > - * > > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > > - */ > > - > > -#include "sha512_mb_mgr.h" > > - > > -void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state) > > -{ > > - unsigned int j; > > - > > - /* initially all lanes are unused */ > > - state->lens[0] = 0xFFFFFFFF00000000; > > - state->lens[1] = 0xFFFFFFFF00000001; > > - state->lens[2] = 0xFFFFFFFF00000002; > > - state->lens[3] = 0xFFFFFFFF00000003; > > - > > - state->unused_lanes = 0xFF03020100; > > - for (j = 0; j < 4; j++) > > - state->ldata[j].job_in_lane = NULL; > > -} > > diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S > > deleted file mode 100644 > > index 4ba709ba78e5..000000000000 > > --- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S > > +++ /dev/null > > @@ -1,224 +0,0 @@ > > -/* > > - * Buffer submit code for multi buffer SHA512 algorithm > > - * > > - * This file is provided under a dual BSD/GPLv2 license. When using or > > - * redistributing this file, you may do so under either license. > > - * > > - * GPL LICENSE SUMMARY > > - * > > - * Copyright(c) 2016 Intel Corporation. > > - * > > - * This program is free software; you can redistribute it and/or modify > > - * it under the terms of version 2 of the GNU General Public License as > > - * published by the Free Software Foundation. > > - * > > - * This program is distributed in the hope that it will be useful, but > > - * WITHOUT ANY WARRANTY; without even the implied warranty of > > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > - * General Public License for more details. > > - * > > - * Contact Information: > > - * Megha Dey <megha.dey@xxxxxxxxxxxxxxx> > > - * > > - * BSD LICENSE > > - * > > - * Copyright(c) 2016 Intel Corporation. > > - * > > - * Redistribution and use in source and binary forms, with or without > > - * modification, are permitted provided that the following conditions > > - * are met: > > - * > > - * * Redistributions of source code must retain the above copyright > > - * notice, this list of conditions and the following disclaimer. > > - * * Redistributions in binary form must reproduce the above copyright > > - * notice, this list of conditions and the following disclaimer in > > - * the documentation and/or other materials provided with the > > - * distribution. > > - * * Neither the name of Intel Corporation nor the names of its > > - * contributors may be used to endorse or promote products derived > > - * from this software without specific prior written permission. > > - * > > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > > - */ > > - > > -#include <linux/linkage.h> > > -#include <asm/frame.h> > > -#include "sha512_mb_mgr_datastruct.S" > > - > > -.extern sha512_x4_avx2 > > - > > -#define arg1 %rdi > > -#define arg2 %rsi > > - > > -#define idx %rdx > > -#define last_len %rdx > > - > > -#define size_offset %rcx > > -#define tmp2 %rcx > > - > > -# Common definitions > > -#define state arg1 > > -#define job arg2 > > -#define len2 arg2 > > -#define p2 arg2 > > - > > -#define p %r11 > > -#define start_offset %r11 > > - > > -#define unused_lanes %rbx > > - > > -#define job_rax %rax > > -#define len %rax > > - > > -#define lane %r12 > > -#define tmp3 %r12 > > -#define lens3 %r12 > > - > > -#define extra_blocks %r8 > > -#define lens0 %r8 > > - > > -#define tmp %r9 > > -#define lens1 %r9 > > - > > -#define lane_data %r10 > > -#define lens2 %r10 > > - > > -#define DWORD_len %eax > > - > > -# JOB* sha512_mb_mgr_submit_avx2(MB_MGR *state, JOB *job) > > -# arg 1 : rcx : state > > -# arg 2 : rdx : job > > -ENTRY(sha512_mb_mgr_submit_avx2) > > - FRAME_BEGIN > > - push %rbx > > - push %r12 > > - > > - mov _unused_lanes(state), unused_lanes > > - movzb %bl,lane > > - shr $8, unused_lanes > > - imul $_LANE_DATA_size, lane,lane_data > > - movl $STS_BEING_PROCESSED, _status(job) > > - lea _ldata(state, lane_data), lane_data > > - mov unused_lanes, _unused_lanes(state) > > - movl _len(job), DWORD_len > > - > > - mov job, _job_in_lane(lane_data) > > - movl DWORD_len,_lens+4(state , lane, 8) > > - > > - # Load digest words from result_digest > > - vmovdqu _result_digest+0*16(job), %xmm0 > > - vmovdqu _result_digest+1*16(job), %xmm1 > > - vmovdqu _result_digest+2*16(job), %xmm2 > > - vmovdqu _result_digest+3*16(job), %xmm3 > > - > > - vmovq %xmm0, _args_digest(state, lane, 8) > > - vpextrq $1, %xmm0, _args_digest+1*32(state , lane, 8) > > - vmovq %xmm1, _args_digest+2*32(state , lane, 8) > > - vpextrq $1, %xmm1, _args_digest+3*32(state , lane, 8) > > - vmovq %xmm2, _args_digest+4*32(state , lane, 8) > > - vpextrq $1, %xmm2, _args_digest+5*32(state , lane, 8) > > - vmovq %xmm3, _args_digest+6*32(state , lane, 8) > > - vpextrq $1, %xmm3, _args_digest+7*32(state , lane, 8) > > - > > - mov _buffer(job), p > > - mov p, _args_data_ptr(state, lane, 8) > > - > > - cmp $0xFF, unused_lanes > > - jne return_null > > - > > -start_loop: > > - > > - # Find min length > > - mov _lens+0*8(state),lens0 > > - mov lens0,idx > > - mov _lens+1*8(state),lens1 > > - cmp idx,lens1 > > - cmovb lens1, idx > > - mov _lens+2*8(state),lens2 > > - cmp idx,lens2 > > - cmovb lens2,idx > > - mov _lens+3*8(state),lens3 > > - cmp idx,lens3 > > - cmovb lens3,idx > > - mov idx,len2 > > - and $0xF,idx > > - and $~0xFF,len2 > > - jz len_is_0 > > - > > - sub len2,lens0 > > - sub len2,lens1 > > - sub len2,lens2 > > - sub len2,lens3 > > - shr $32,len2 > > - mov lens0, _lens + 0*8(state) > > - mov lens1, _lens + 1*8(state) > > - mov lens2, _lens + 2*8(state) > > - mov lens3, _lens + 3*8(state) > > - > > - # "state" and "args" are the same address, arg1 > > - # len is arg2 > > - call sha512_x4_avx2 > > - # state and idx are intact > > - > > -len_is_0: > > - > > - # process completed job "idx" > > - imul $_LANE_DATA_size, idx, lane_data > > - lea _ldata(state, lane_data), lane_data > > - > > - mov _job_in_lane(lane_data), job_rax > > - mov _unused_lanes(state), unused_lanes > > - movq $0, _job_in_lane(lane_data) > > - movl $STS_COMPLETED, _status(job_rax) > > - shl $8, unused_lanes > > - or idx, unused_lanes > > - mov unused_lanes, _unused_lanes(state) > > - > > - movl $0xFFFFFFFF,_lens+4(state,idx,8) > > - vmovq _args_digest+0*32(state , idx, 8), %xmm0 > > - vpinsrq $1, _args_digest+1*32(state , idx, 8), %xmm0, %xmm0 > > - vmovq _args_digest+2*32(state , idx, 8), %xmm1 > > - vpinsrq $1, _args_digest+3*32(state , idx, 8), %xmm1, %xmm1 > > - vmovq _args_digest+4*32(state , idx, 8), %xmm2 > > - vpinsrq $1, _args_digest+5*32(state , idx, 8), %xmm2, %xmm2 > > - vmovq _args_digest+6*32(state , idx, 8), %xmm3 > > - vpinsrq $1, _args_digest+7*32(state , idx, 8), %xmm3, %xmm3 > > - > > - vmovdqu %xmm0, _result_digest + 0*16(job_rax) > > - vmovdqu %xmm1, _result_digest + 1*16(job_rax) > > - vmovdqu %xmm2, _result_digest + 2*16(job_rax) > > - vmovdqu %xmm3, _result_digest + 3*16(job_rax) > > - > > -return: > > - pop %r12 > > - pop %rbx > > - FRAME_END > > - ret > > - > > -return_null: > > - xor job_rax, job_rax > > - jmp return > > -ENDPROC(sha512_mb_mgr_submit_avx2) > > - > > -/* UNUSED? > > -.section .rodata.cst16, "aM", @progbits, 16 > > -.align 16 > > -H0: .int 0x6a09e667 > > -H1: .int 0xbb67ae85 > > -H2: .int 0x3c6ef372 > > -H3: .int 0xa54ff53a > > -H4: .int 0x510e527f > > -H5: .int 0x9b05688c > > -H6: .int 0x1f83d9ab > > -H7: .int 0x5be0cd19 > > -*/ > > diff --git a/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S b/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S > > deleted file mode 100644 > > index e22e907643a6..000000000000 > > --- a/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S > > +++ /dev/null > > @@ -1,531 +0,0 @@ > > -/* > > - * Multi-buffer SHA512 algorithm hash compute routine > > - * > > - * This file is provided under a dual BSD/GPLv2 license. When using or > > - * redistributing this file, you may do so under either license. > > - * > > - * GPL LICENSE SUMMARY > > - * > > - * Copyright(c) 2016 Intel Corporation. > > - * > > - * This program is free software; you can redistribute it and/or modify > > - * it under the terms of version 2 of the GNU General Public License as > > - * published by the Free Software Foundation. > > - * > > - * This program is distributed in the hope that it will be useful, but > > - * WITHOUT ANY WARRANTY; without even the implied warranty of > > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > - * General Public License for more details. > > - * > > - * Contact Information: > > - * Megha Dey <megha.dey@xxxxxxxxxxxxxxx> > > - * > > - * BSD LICENSE > > - * > > - * Copyright(c) 2016 Intel Corporation. > > - * > > - * Redistribution and use in source and binary forms, with or without > > - * modification, are permitted provided that the following conditions > > - * are met: > > - * > > - * * Redistributions of source code must retain the above copyright > > - * notice, this list of conditions and the following disclaimer. > > - * * Redistributions in binary form must reproduce the above copyright > > - * notice, this list of conditions and the following disclaimer in > > - * the documentation and/or other materials provided with the > > - * distribution. > > - * * Neither the name of Intel Corporation nor the names of its > > - * contributors may be used to endorse or promote products derived > > - * from this software without specific prior written permission. > > - * > > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > > - */ > > - > > -# code to compute quad SHA512 using AVX2 > > -# use YMMs to tackle the larger digest size > > -# outer calling routine takes care of save and restore of XMM registers > > -# Logic designed/laid out by JDG > > - > > -# Function clobbers: rax, rcx, rdx, rbx, rsi, rdi, r9-r15; ymm0-15 > > -# Stack must be aligned to 32 bytes before call > > -# Linux clobbers: rax rbx rcx rsi r8 r9 r10 r11 r12 > > -# Linux preserves: rcx rdx rdi rbp r13 r14 r15 > > -# clobbers ymm0-15 > > - > > -#include <linux/linkage.h> > > -#include "sha512_mb_mgr_datastruct.S" > > - > > -arg1 = %rdi > > -arg2 = %rsi > > - > > -# Common definitions > > -STATE = arg1 > > -INP_SIZE = arg2 > > - > > -IDX = %rax > > -ROUND = %rbx > > -TBL = %r8 > > - > > -inp0 = %r9 > > -inp1 = %r10 > > -inp2 = %r11 > > -inp3 = %r12 > > - > > -a = %ymm0 > > -b = %ymm1 > > -c = %ymm2 > > -d = %ymm3 > > -e = %ymm4 > > -f = %ymm5 > > -g = %ymm6 > > -h = %ymm7 > > - > > -a0 = %ymm8 > > -a1 = %ymm9 > > -a2 = %ymm10 > > - > > -TT0 = %ymm14 > > -TT1 = %ymm13 > > -TT2 = %ymm12 > > -TT3 = %ymm11 > > -TT4 = %ymm10 > > -TT5 = %ymm9 > > - > > -T1 = %ymm14 > > -TMP = %ymm15 > > - > > -# Define stack usage > > -STACK_SPACE1 = SZ4*16 + NUM_SHA512_DIGEST_WORDS*SZ4 + 24 > > - > > -#define VMOVPD vmovupd > > -_digest = SZ4*16 > > - > > -# transpose r0, r1, r2, r3, t0, t1 > > -# "transpose" data in {r0..r3} using temps {t0..t3} > > -# Input looks like: {r0 r1 r2 r3} > > -# r0 = {a7 a6 a5 a4 a3 a2 a1 a0} > > -# r1 = {b7 b6 b5 b4 b3 b2 b1 b0} > > -# r2 = {c7 c6 c5 c4 c3 c2 c1 c0} > > -# r3 = {d7 d6 d5 d4 d3 d2 d1 d0} > > -# > > -# output looks like: {t0 r1 r0 r3} > > -# t0 = {d1 d0 c1 c0 b1 b0 a1 a0} > > -# r1 = {d3 d2 c3 c2 b3 b2 a3 a2} > > -# r0 = {d5 d4 c5 c4 b5 b4 a5 a4} > > -# r3 = {d7 d6 c7 c6 b7 b6 a7 a6} > > - > > -.macro TRANSPOSE r0 r1 r2 r3 t0 t1 > > - vshufps $0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4 b1 b0 a1 a0} > > - vshufps $0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6 b3 b2 a3 a2} > > - vshufps $0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4 d1 d0 c1 c0} > > - vshufps $0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6 d3 d2 c3 c2} > > - > > - vperm2f128 $0x20, \r2, \r0, \r1 # h6...a6 > > - vperm2f128 $0x31, \r2, \r0, \r3 # h2...a2 > > - vperm2f128 $0x31, \t1, \t0, \r0 # h5...a5 > > - vperm2f128 $0x20, \t1, \t0, \t0 # h1...a1 > > -.endm > > - > > -.macro ROTATE_ARGS > > -TMP_ = h > > -h = g > > -g = f > > -f = e > > -e = d > > -d = c > > -c = b > > -b = a > > -a = TMP_ > > -.endm > > - > > -# PRORQ reg, imm, tmp > > -# packed-rotate-right-double > > -# does a rotate by doing two shifts and an or > > -.macro _PRORQ reg imm tmp > > - vpsllq $(64-\imm),\reg,\tmp > > - vpsrlq $\imm,\reg, \reg > > - vpor \tmp,\reg, \reg > > -.endm > > - > > -# non-destructive > > -# PRORQ_nd reg, imm, tmp, src > > -.macro _PRORQ_nd reg imm tmp src > > - vpsllq $(64-\imm), \src, \tmp > > - vpsrlq $\imm, \src, \reg > > - vpor \tmp, \reg, \reg > > -.endm > > - > > -# PRORQ dst/src, amt > > -.macro PRORQ reg imm > > - _PRORQ \reg, \imm, TMP > > -.endm > > - > > -# PRORQ_nd dst, src, amt > > -.macro PRORQ_nd reg tmp imm > > - _PRORQ_nd \reg, \imm, TMP, \tmp > > -.endm > > - > > -#; arguments passed implicitly in preprocessor symbols i, a...h > > -.macro ROUND_00_15 _T1 i > > - PRORQ_nd a0, e, (18-14) # sig1: a0 = (e >> 4) > > - > > - vpxor g, f, a2 # ch: a2 = f^g > > - vpand e,a2, a2 # ch: a2 = (f^g)&e > > - vpxor g, a2, a2 # a2 = ch > > - > > - PRORQ_nd a1,e,41 # sig1: a1 = (e >> 25) > > - > > - offset = SZ4*(\i & 0xf) > > - vmovdqu \_T1,offset(%rsp) > > - vpaddq (TBL,ROUND,1), \_T1, \_T1 # T1 = W + K > > - vpxor e,a0, a0 # sig1: a0 = e ^ (e >> 5) > > - PRORQ a0, 14 # sig1: a0 = (e >> 6) ^ (e >> 11) > > - vpaddq a2, h, h # h = h + ch > > - PRORQ_nd a2,a,6 # sig0: a2 = (a >> 11) > > - vpaddq \_T1,h, h # h = h + ch + W + K > > - vpxor a1, a0, a0 # a0 = sigma1 > > - vmovdqu a,\_T1 > > - PRORQ_nd a1,a,39 # sig0: a1 = (a >> 22) > > - vpxor c, \_T1, \_T1 # maj: T1 = a^c > > - add $SZ4, ROUND # ROUND++ > > - vpand b, \_T1, \_T1 # maj: T1 = (a^c)&b > > - vpaddq a0, h, h > > - vpaddq h, d, d > > - vpxor a, a2, a2 # sig0: a2 = a ^ (a >> 11) > > - PRORQ a2,28 # sig0: a2 = (a >> 2) ^ (a >> 13) > > - vpxor a1, a2, a2 # a2 = sig0 > > - vpand c, a, a1 # maj: a1 = a&c > > - vpor \_T1, a1, a1 # a1 = maj > > - vpaddq a1, h, h # h = h + ch + W + K + maj > > - vpaddq a2, h, h # h = h + ch + W + K + maj + sigma0 > > - ROTATE_ARGS > > -.endm > > - > > - > > -#; arguments passed implicitly in preprocessor symbols i, a...h > > -.macro ROUND_16_XX _T1 i > > - vmovdqu SZ4*((\i-15)&0xf)(%rsp), \_T1 > > - vmovdqu SZ4*((\i-2)&0xf)(%rsp), a1 > > - vmovdqu \_T1, a0 > > - PRORQ \_T1,7 > > - vmovdqu a1, a2 > > - PRORQ a1,42 > > - vpxor a0, \_T1, \_T1 > > - PRORQ \_T1, 1 > > - vpxor a2, a1, a1 > > - PRORQ a1, 19 > > - vpsrlq $7, a0, a0 > > - vpxor a0, \_T1, \_T1 > > - vpsrlq $6, a2, a2 > > - vpxor a2, a1, a1 > > - vpaddq SZ4*((\i-16)&0xf)(%rsp), \_T1, \_T1 > > - vpaddq SZ4*((\i-7)&0xf)(%rsp), a1, a1 > > - vpaddq a1, \_T1, \_T1 > > - > > - ROUND_00_15 \_T1,\i > > -.endm > > - > > - > > -# void sha512_x4_avx2(void *STATE, const int INP_SIZE) > > -# arg 1 : STATE : pointer to input data > > -# arg 2 : INP_SIZE : size of data in blocks (assumed >= 1) > > -ENTRY(sha512_x4_avx2) > > - # general registers preserved in outer calling routine > > - # outer calling routine saves all the XMM registers > > - # save callee-saved clobbered registers to comply with C function ABI > > - push %r12 > > - push %r13 > > - push %r14 > > - push %r15 > > - > > - sub $STACK_SPACE1, %rsp > > - > > - # Load the pre-transposed incoming digest. > > - vmovdqu 0*SHA512_DIGEST_ROW_SIZE(STATE),a > > - vmovdqu 1*SHA512_DIGEST_ROW_SIZE(STATE),b > > - vmovdqu 2*SHA512_DIGEST_ROW_SIZE(STATE),c > > - vmovdqu 3*SHA512_DIGEST_ROW_SIZE(STATE),d > > - vmovdqu 4*SHA512_DIGEST_ROW_SIZE(STATE),e > > - vmovdqu 5*SHA512_DIGEST_ROW_SIZE(STATE),f > > - vmovdqu 6*SHA512_DIGEST_ROW_SIZE(STATE),g > > - vmovdqu 7*SHA512_DIGEST_ROW_SIZE(STATE),h > > - > > - lea K512_4(%rip),TBL > > - > > - # load the address of each of the 4 message lanes > > - # getting ready to transpose input onto stack > > - mov _data_ptr+0*PTR_SZ(STATE),inp0 > > - mov _data_ptr+1*PTR_SZ(STATE),inp1 > > - mov _data_ptr+2*PTR_SZ(STATE),inp2 > > - mov _data_ptr+3*PTR_SZ(STATE),inp3 > > - > > - xor IDX, IDX > > -lloop: > > - xor ROUND, ROUND > > - > > - # save old digest > > - vmovdqu a, _digest(%rsp) > > - vmovdqu b, _digest+1*SZ4(%rsp) > > - vmovdqu c, _digest+2*SZ4(%rsp) > > - vmovdqu d, _digest+3*SZ4(%rsp) > > - vmovdqu e, _digest+4*SZ4(%rsp) > > - vmovdqu f, _digest+5*SZ4(%rsp) > > - vmovdqu g, _digest+6*SZ4(%rsp) > > - vmovdqu h, _digest+7*SZ4(%rsp) > > - i = 0 > > -.rep 4 > > - vmovdqu PSHUFFLE_BYTE_FLIP_MASK(%rip), TMP > > - VMOVPD i*32(inp0, IDX), TT2 > > - VMOVPD i*32(inp1, IDX), TT1 > > - VMOVPD i*32(inp2, IDX), TT4 > > - VMOVPD i*32(inp3, IDX), TT3 > > - TRANSPOSE TT2, TT1, TT4, TT3, TT0, TT5 > > - vpshufb TMP, TT0, TT0 > > - vpshufb TMP, TT1, TT1 > > - vpshufb TMP, TT2, TT2 > > - vpshufb TMP, TT3, TT3 > > - ROUND_00_15 TT0,(i*4+0) > > - ROUND_00_15 TT1,(i*4+1) > > - ROUND_00_15 TT2,(i*4+2) > > - ROUND_00_15 TT3,(i*4+3) > > - i = (i+1) > > -.endr > > - add $128, IDX > > - > > - i = (i*4) > > - > > - jmp Lrounds_16_xx > > -.align 16 > > -Lrounds_16_xx: > > -.rep 16 > > - ROUND_16_XX T1, i > > - i = (i+1) > > -.endr > > - cmp $0xa00,ROUND > > - jb Lrounds_16_xx > > - > > - # add old digest > > - vpaddq _digest(%rsp), a, a > > - vpaddq _digest+1*SZ4(%rsp), b, b > > - vpaddq _digest+2*SZ4(%rsp), c, c > > - vpaddq _digest+3*SZ4(%rsp), d, d > > - vpaddq _digest+4*SZ4(%rsp), e, e > > - vpaddq _digest+5*SZ4(%rsp), f, f > > - vpaddq _digest+6*SZ4(%rsp), g, g > > - vpaddq _digest+7*SZ4(%rsp), h, h > > - > > - sub $1, INP_SIZE # unit is blocks > > - jne lloop > > - > > - # write back to memory (state object) the transposed digest > > - vmovdqu a, 0*SHA512_DIGEST_ROW_SIZE(STATE) > > - vmovdqu b, 1*SHA512_DIGEST_ROW_SIZE(STATE) > > - vmovdqu c, 2*SHA512_DIGEST_ROW_SIZE(STATE) > > - vmovdqu d, 3*SHA512_DIGEST_ROW_SIZE(STATE) > > - vmovdqu e, 4*SHA512_DIGEST_ROW_SIZE(STATE) > > - vmovdqu f, 5*SHA512_DIGEST_ROW_SIZE(STATE) > > - vmovdqu g, 6*SHA512_DIGEST_ROW_SIZE(STATE) > > - vmovdqu h, 7*SHA512_DIGEST_ROW_SIZE(STATE) > > - > > - # update input data pointers > > - add IDX, inp0 > > - mov inp0, _data_ptr+0*PTR_SZ(STATE) > > - add IDX, inp1 > > - mov inp1, _data_ptr+1*PTR_SZ(STATE) > > - add IDX, inp2 > > - mov inp2, _data_ptr+2*PTR_SZ(STATE) > > - add IDX, inp3 > > - mov inp3, _data_ptr+3*PTR_SZ(STATE) > > - > > - #;;;;;;;;;;;;;;; > > - #; Postamble > > - add $STACK_SPACE1, %rsp > > - # restore callee-saved clobbered registers > > - > > - pop %r15 > > - pop %r14 > > - pop %r13 > > - pop %r12 > > - > > - # outer calling routine restores XMM and other GP registers > > - ret > > -ENDPROC(sha512_x4_avx2) > > - > > -.section .rodata.K512_4, "a", @progbits > > -.align 64 > > -K512_4: > > - .octa 0x428a2f98d728ae22428a2f98d728ae22,\ > > - 0x428a2f98d728ae22428a2f98d728ae22 > > - .octa 0x7137449123ef65cd7137449123ef65cd,\ > > - 0x7137449123ef65cd7137449123ef65cd > > - .octa 0xb5c0fbcfec4d3b2fb5c0fbcfec4d3b2f,\ > > - 0xb5c0fbcfec4d3b2fb5c0fbcfec4d3b2f > > - .octa 0xe9b5dba58189dbbce9b5dba58189dbbc,\ > > - 0xe9b5dba58189dbbce9b5dba58189dbbc > > - .octa 0x3956c25bf348b5383956c25bf348b538,\ > > - 0x3956c25bf348b5383956c25bf348b538 > > - .octa 0x59f111f1b605d01959f111f1b605d019,\ > > - 0x59f111f1b605d01959f111f1b605d019 > > - .octa 0x923f82a4af194f9b923f82a4af194f9b,\ > > - 0x923f82a4af194f9b923f82a4af194f9b > > - .octa 0xab1c5ed5da6d8118ab1c5ed5da6d8118,\ > > - 0xab1c5ed5da6d8118ab1c5ed5da6d8118 > > - .octa 0xd807aa98a3030242d807aa98a3030242,\ > > - 0xd807aa98a3030242d807aa98a3030242 > > - .octa 0x12835b0145706fbe12835b0145706fbe,\ > > - 0x12835b0145706fbe12835b0145706fbe > > - .octa 0x243185be4ee4b28c243185be4ee4b28c,\ > > - 0x243185be4ee4b28c243185be4ee4b28c > > - .octa 0x550c7dc3d5ffb4e2550c7dc3d5ffb4e2,\ > > - 0x550c7dc3d5ffb4e2550c7dc3d5ffb4e2 > > - .octa 0x72be5d74f27b896f72be5d74f27b896f,\ > > - 0x72be5d74f27b896f72be5d74f27b896f > > - .octa 0x80deb1fe3b1696b180deb1fe3b1696b1,\ > > - 0x80deb1fe3b1696b180deb1fe3b1696b1 > > - .octa 0x9bdc06a725c712359bdc06a725c71235,\ > > - 0x9bdc06a725c712359bdc06a725c71235 > > - .octa 0xc19bf174cf692694c19bf174cf692694,\ > > - 0xc19bf174cf692694c19bf174cf692694 > > - .octa 0xe49b69c19ef14ad2e49b69c19ef14ad2,\ > > - 0xe49b69c19ef14ad2e49b69c19ef14ad2 > > - .octa 0xefbe4786384f25e3efbe4786384f25e3,\ > > - 0xefbe4786384f25e3efbe4786384f25e3 > > - .octa 0x0fc19dc68b8cd5b50fc19dc68b8cd5b5,\ > > - 0x0fc19dc68b8cd5b50fc19dc68b8cd5b5 > > - .octa 0x240ca1cc77ac9c65240ca1cc77ac9c65,\ > > - 0x240ca1cc77ac9c65240ca1cc77ac9c65 > > - .octa 0x2de92c6f592b02752de92c6f592b0275,\ > > - 0x2de92c6f592b02752de92c6f592b0275 > > - .octa 0x4a7484aa6ea6e4834a7484aa6ea6e483,\ > > - 0x4a7484aa6ea6e4834a7484aa6ea6e483 > > - .octa 0x5cb0a9dcbd41fbd45cb0a9dcbd41fbd4,\ > > - 0x5cb0a9dcbd41fbd45cb0a9dcbd41fbd4 > > - .octa 0x76f988da831153b576f988da831153b5,\ > > - 0x76f988da831153b576f988da831153b5 > > - .octa 0x983e5152ee66dfab983e5152ee66dfab,\ > > - 0x983e5152ee66dfab983e5152ee66dfab > > - .octa 0xa831c66d2db43210a831c66d2db43210,\ > > - 0xa831c66d2db43210a831c66d2db43210 > > - .octa 0xb00327c898fb213fb00327c898fb213f,\ > > - 0xb00327c898fb213fb00327c898fb213f > > - .octa 0xbf597fc7beef0ee4bf597fc7beef0ee4,\ > > - 0xbf597fc7beef0ee4bf597fc7beef0ee4 > > - .octa 0xc6e00bf33da88fc2c6e00bf33da88fc2,\ > > - 0xc6e00bf33da88fc2c6e00bf33da88fc2 > > - .octa 0xd5a79147930aa725d5a79147930aa725,\ > > - 0xd5a79147930aa725d5a79147930aa725 > > - .octa 0x06ca6351e003826f06ca6351e003826f,\ > > - 0x06ca6351e003826f06ca6351e003826f > > - .octa 0x142929670a0e6e70142929670a0e6e70,\ > > - 0x142929670a0e6e70142929670a0e6e70 > > - .octa 0x27b70a8546d22ffc27b70a8546d22ffc,\ > > - 0x27b70a8546d22ffc27b70a8546d22ffc > > - .octa 0x2e1b21385c26c9262e1b21385c26c926,\ > > - 0x2e1b21385c26c9262e1b21385c26c926 > > - .octa 0x4d2c6dfc5ac42aed4d2c6dfc5ac42aed,\ > > - 0x4d2c6dfc5ac42aed4d2c6dfc5ac42aed > > - .octa 0x53380d139d95b3df53380d139d95b3df,\ > > - 0x53380d139d95b3df53380d139d95b3df > > - .octa 0x650a73548baf63de650a73548baf63de,\ > > - 0x650a73548baf63de650a73548baf63de > > - .octa 0x766a0abb3c77b2a8766a0abb3c77b2a8,\ > > - 0x766a0abb3c77b2a8766a0abb3c77b2a8 > > - .octa 0x81c2c92e47edaee681c2c92e47edaee6,\ > > - 0x81c2c92e47edaee681c2c92e47edaee6 > > - .octa 0x92722c851482353b92722c851482353b,\ > > - 0x92722c851482353b92722c851482353b > > - .octa 0xa2bfe8a14cf10364a2bfe8a14cf10364,\ > > - 0xa2bfe8a14cf10364a2bfe8a14cf10364 > > - .octa 0xa81a664bbc423001a81a664bbc423001,\ > > - 0xa81a664bbc423001a81a664bbc423001 > > - .octa 0xc24b8b70d0f89791c24b8b70d0f89791,\ > > - 0xc24b8b70d0f89791c24b8b70d0f89791 > > - .octa 0xc76c51a30654be30c76c51a30654be30,\ > > - 0xc76c51a30654be30c76c51a30654be30 > > - .octa 0xd192e819d6ef5218d192e819d6ef5218,\ > > - 0xd192e819d6ef5218d192e819d6ef5218 > > - .octa 0xd69906245565a910d69906245565a910,\ > > - 0xd69906245565a910d69906245565a910 > > - .octa 0xf40e35855771202af40e35855771202a,\ > > - 0xf40e35855771202af40e35855771202a > > - .octa 0x106aa07032bbd1b8106aa07032bbd1b8,\ > > - 0x106aa07032bbd1b8106aa07032bbd1b8 > > - .octa 0x19a4c116b8d2d0c819a4c116b8d2d0c8,\ > > - 0x19a4c116b8d2d0c819a4c116b8d2d0c8 > > - .octa 0x1e376c085141ab531e376c085141ab53,\ > > - 0x1e376c085141ab531e376c085141ab53 > > - .octa 0x2748774cdf8eeb992748774cdf8eeb99,\ > > - 0x2748774cdf8eeb992748774cdf8eeb99 > > - .octa 0x34b0bcb5e19b48a834b0bcb5e19b48a8,\ > > - 0x34b0bcb5e19b48a834b0bcb5e19b48a8 > > - .octa 0x391c0cb3c5c95a63391c0cb3c5c95a63,\ > > - 0x391c0cb3c5c95a63391c0cb3c5c95a63 > > - .octa 0x4ed8aa4ae3418acb4ed8aa4ae3418acb,\ > > - 0x4ed8aa4ae3418acb4ed8aa4ae3418acb > > - .octa 0x5b9cca4f7763e3735b9cca4f7763e373,\ > > - 0x5b9cca4f7763e3735b9cca4f7763e373 > > - .octa 0x682e6ff3d6b2b8a3682e6ff3d6b2b8a3,\ > > - 0x682e6ff3d6b2b8a3682e6ff3d6b2b8a3 > > - .octa 0x748f82ee5defb2fc748f82ee5defb2fc,\ > > - 0x748f82ee5defb2fc748f82ee5defb2fc > > - .octa 0x78a5636f43172f6078a5636f43172f60,\ > > - 0x78a5636f43172f6078a5636f43172f60 > > - .octa 0x84c87814a1f0ab7284c87814a1f0ab72,\ > > - 0x84c87814a1f0ab7284c87814a1f0ab72 > > - .octa 0x8cc702081a6439ec8cc702081a6439ec,\ > > - 0x8cc702081a6439ec8cc702081a6439ec > > - .octa 0x90befffa23631e2890befffa23631e28,\ > > - 0x90befffa23631e2890befffa23631e28 > > - .octa 0xa4506cebde82bde9a4506cebde82bde9,\ > > - 0xa4506cebde82bde9a4506cebde82bde9 > > - .octa 0xbef9a3f7b2c67915bef9a3f7b2c67915,\ > > - 0xbef9a3f7b2c67915bef9a3f7b2c67915 > > - .octa 0xc67178f2e372532bc67178f2e372532b,\ > > - 0xc67178f2e372532bc67178f2e372532b > > - .octa 0xca273eceea26619cca273eceea26619c,\ > > - 0xca273eceea26619cca273eceea26619c > > - .octa 0xd186b8c721c0c207d186b8c721c0c207,\ > > - 0xd186b8c721c0c207d186b8c721c0c207 > > - .octa 0xeada7dd6cde0eb1eeada7dd6cde0eb1e,\ > > - 0xeada7dd6cde0eb1eeada7dd6cde0eb1e > > - .octa 0xf57d4f7fee6ed178f57d4f7fee6ed178,\ > > - 0xf57d4f7fee6ed178f57d4f7fee6ed178 > > - .octa 0x06f067aa72176fba06f067aa72176fba,\ > > - 0x06f067aa72176fba06f067aa72176fba > > - .octa 0x0a637dc5a2c898a60a637dc5a2c898a6,\ > > - 0x0a637dc5a2c898a60a637dc5a2c898a6 > > - .octa 0x113f9804bef90dae113f9804bef90dae,\ > > - 0x113f9804bef90dae113f9804bef90dae > > - .octa 0x1b710b35131c471b1b710b35131c471b,\ > > - 0x1b710b35131c471b1b710b35131c471b > > - .octa 0x28db77f523047d8428db77f523047d84,\ > > - 0x28db77f523047d8428db77f523047d84 > > - .octa 0x32caab7b40c7249332caab7b40c72493,\ > > - 0x32caab7b40c7249332caab7b40c72493 > > - .octa 0x3c9ebe0a15c9bebc3c9ebe0a15c9bebc,\ > > - 0x3c9ebe0a15c9bebc3c9ebe0a15c9bebc > > - .octa 0x431d67c49c100d4c431d67c49c100d4c,\ > > - 0x431d67c49c100d4c431d67c49c100d4c > > - .octa 0x4cc5d4becb3e42b64cc5d4becb3e42b6,\ > > - 0x4cc5d4becb3e42b64cc5d4becb3e42b6 > > - .octa 0x597f299cfc657e2a597f299cfc657e2a,\ > > - 0x597f299cfc657e2a597f299cfc657e2a > > - .octa 0x5fcb6fab3ad6faec5fcb6fab3ad6faec,\ > > - 0x5fcb6fab3ad6faec5fcb6fab3ad6faec > > - .octa 0x6c44198c4a4758176c44198c4a475817,\ > > - 0x6c44198c4a4758176c44198c4a475817 > > - > > -.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32 > > -.align 32 > > -PSHUFFLE_BYTE_FLIP_MASK: .octa 0x08090a0b0c0d0e0f0001020304050607 > > - .octa 0x18191a1b1c1d1e1f1011121314151617 > > diff --git a/crypto/Kconfig b/crypto/Kconfig > > index f3e40ac56d93..4ee600bdefdb 100644 > > --- a/crypto/Kconfig > > +++ b/crypto/Kconfig > > @@ -213,20 +213,6 @@ config CRYPTO_CRYPTD > > converts an arbitrary synchronous software crypto algorithm > > into an asynchronous algorithm that executes in a kernel thread. > > > > -config CRYPTO_MCRYPTD > > - tristate "Software async multi-buffer crypto daemon" > > - select CRYPTO_BLKCIPHER > > - select CRYPTO_HASH > > - select CRYPTO_MANAGER > > - select CRYPTO_WORKQUEUE > > - help > > - This is a generic software asynchronous crypto daemon that > > - provides the kernel thread to assist multi-buffer crypto > > - algorithms for submitting jobs and flushing jobs in multi-buffer > > - crypto algorithms. Multi-buffer crypto algorithms are executed > > - in the context of this kernel thread and drivers can post > > - their crypto request asynchronously to be processed by this daemon. > > - > > config CRYPTO_AUTHENC > > tristate "Authenc support" > > select CRYPTO_AEAD > > @@ -848,54 +834,6 @@ config CRYPTO_SHA1_PPC_SPE > > SHA-1 secure hash standard (DFIPS 180-4) implemented > > using powerpc SPE SIMD instruction set. > > > > -config CRYPTO_SHA1_MB > > - tristate "SHA1 digest algorithm (x86_64 Multi-Buffer, Experimental)" > > - depends on X86 && 64BIT > > - select CRYPTO_SHA1 > > - select CRYPTO_HASH > > - select CRYPTO_MCRYPTD > > - help > > - SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented > > - using multi-buffer technique. This algorithm computes on > > - multiple data lanes concurrently with SIMD instructions for > > - better throughput. It should not be enabled by default but > > - used when there is significant amount of work to keep the keep > > - the data lanes filled to get performance benefit. If the data > > - lanes remain unfilled, a flush operation will be initiated to > > - process the crypto jobs, adding a slight latency. > > - > > -config CRYPTO_SHA256_MB > > - tristate "SHA256 digest algorithm (x86_64 Multi-Buffer, Experimental)" > > - depends on X86 && 64BIT > > - select CRYPTO_SHA256 > > - select CRYPTO_HASH > > - select CRYPTO_MCRYPTD > > - help > > - SHA-256 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented > > - using multi-buffer technique. This algorithm computes on > > - multiple data lanes concurrently with SIMD instructions for > > - better throughput. It should not be enabled by default but > > - used when there is significant amount of work to keep the keep > > - the data lanes filled to get performance benefit. If the data > > - lanes remain unfilled, a flush operation will be initiated to > > - process the crypto jobs, adding a slight latency. > > - > > -config CRYPTO_SHA512_MB > > - tristate "SHA512 digest algorithm (x86_64 Multi-Buffer, Experimental)" > > - depends on X86 && 64BIT > > - select CRYPTO_SHA512 > > - select CRYPTO_HASH > > - select CRYPTO_MCRYPTD > > - help > > - SHA-512 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented > > - using multi-buffer technique. This algorithm computes on > > - multiple data lanes concurrently with SIMD instructions for > > - better throughput. It should not be enabled by default but > > - used when there is significant amount of work to keep the keep > > - the data lanes filled to get performance benefit. If the data > > - lanes remain unfilled, a flush operation will be initiated to > > - process the crypto jobs, adding a slight latency. > > - > > config CRYPTO_SHA256 > > tristate "SHA224 and SHA256 digest algorithm" > > select CRYPTO_HASH > > diff --git a/crypto/Makefile b/crypto/Makefile > > index 6d1d40eeb964..80e3da755cbf 100644 > > --- a/crypto/Makefile > > +++ b/crypto/Makefile > > @@ -93,7 +93,6 @@ obj-$(CONFIG_CRYPTO_MORUS640) += morus640.o > > obj-$(CONFIG_CRYPTO_MORUS1280) += morus1280.o > > obj-$(CONFIG_CRYPTO_PCRYPT) += pcrypt.o > > obj-$(CONFIG_CRYPTO_CRYPTD) += cryptd.o > > -obj-$(CONFIG_CRYPTO_MCRYPTD) += mcryptd.o > > obj-$(CONFIG_CRYPTO_DES) += des_generic.o > > obj-$(CONFIG_CRYPTO_FCRYPT) += fcrypt.o > > obj-$(CONFIG_CRYPTO_BLOWFISH) += blowfish_generic.o > > diff --git a/crypto/mcryptd.c b/crypto/mcryptd.c > > deleted file mode 100644 > > index f14152147ce8..000000000000 > > --- a/crypto/mcryptd.c > > +++ /dev/null > > @@ -1,675 +0,0 @@ > > -/* > > - * Software multibuffer async crypto daemon. > > - * > > - * Copyright (c) 2014 Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx> > > - * > > - * Adapted from crypto daemon. > > - * > > - * This program is free software; you can redistribute it and/or modify it > > - * under the terms of the GNU General Public License as published by the Free > > - * Software Foundation; either version 2 of the License, or (at your option) > > - * any later version. > > - * > > - */ > > - > > -#include <crypto/algapi.h> > > -#include <crypto/internal/hash.h> > > -#include <crypto/internal/aead.h> > > -#include <crypto/mcryptd.h> > > -#include <crypto/crypto_wq.h> > > -#include <linux/err.h> > > -#include <linux/init.h> > > -#include <linux/kernel.h> > > -#include <linux/list.h> > > -#include <linux/module.h> > > -#include <linux/scatterlist.h> > > -#include <linux/sched.h> > > -#include <linux/sched/stat.h> > > -#include <linux/slab.h> > > - > > -#define MCRYPTD_MAX_CPU_QLEN 100 > > -#define MCRYPTD_BATCH 9 > > - > > -static void *mcryptd_alloc_instance(struct crypto_alg *alg, unsigned int head, > > - unsigned int tail); > > - > > -struct mcryptd_flush_list { > > - struct list_head list; > > - struct mutex lock; > > -}; > > - > > -static struct mcryptd_flush_list __percpu *mcryptd_flist; > > - > > -struct hashd_instance_ctx { > > - struct crypto_ahash_spawn spawn; > > - struct mcryptd_queue *queue; > > -}; > > - > > -static void mcryptd_queue_worker(struct work_struct *work); > > - > > -void mcryptd_arm_flusher(struct mcryptd_alg_cstate *cstate, unsigned long delay) > > -{ > > - struct mcryptd_flush_list *flist; > > - > > - if (!cstate->flusher_engaged) { > > - /* put the flusher on the flush list */ > > - flist = per_cpu_ptr(mcryptd_flist, smp_processor_id()); > > - mutex_lock(&flist->lock); > > - list_add_tail(&cstate->flush_list, &flist->list); > > - cstate->flusher_engaged = true; > > - cstate->next_flush = jiffies + delay; > > - queue_delayed_work_on(smp_processor_id(), kcrypto_wq, > > - &cstate->flush, delay); > > - mutex_unlock(&flist->lock); > > - } > > -} > > -EXPORT_SYMBOL(mcryptd_arm_flusher); > > - > > -static int mcryptd_init_queue(struct mcryptd_queue *queue, > > - unsigned int max_cpu_qlen) > > -{ > > - int cpu; > > - struct mcryptd_cpu_queue *cpu_queue; > > - > > - queue->cpu_queue = alloc_percpu(struct mcryptd_cpu_queue); > > - pr_debug("mqueue:%p mcryptd_cpu_queue %p\n", queue, queue->cpu_queue); > > - if (!queue->cpu_queue) > > - return -ENOMEM; > > - for_each_possible_cpu(cpu) { > > - cpu_queue = per_cpu_ptr(queue->cpu_queue, cpu); > > - pr_debug("cpu_queue #%d %p\n", cpu, queue->cpu_queue); > > - crypto_init_queue(&cpu_queue->queue, max_cpu_qlen); > > - INIT_WORK(&cpu_queue->work, mcryptd_queue_worker); > > - spin_lock_init(&cpu_queue->q_lock); > > - } > > - return 0; > > -} > > - > > -static void mcryptd_fini_queue(struct mcryptd_queue *queue) > > -{ > > - int cpu; > > - struct mcryptd_cpu_queue *cpu_queue; > > - > > - for_each_possible_cpu(cpu) { > > - cpu_queue = per_cpu_ptr(queue->cpu_queue, cpu); > > - BUG_ON(cpu_queue->queue.qlen); > > - } > > - free_percpu(queue->cpu_queue); > > -} > > - > > -static int mcryptd_enqueue_request(struct mcryptd_queue *queue, > > - struct crypto_async_request *request, > > - struct mcryptd_hash_request_ctx *rctx) > > -{ > > - int cpu, err; > > - struct mcryptd_cpu_queue *cpu_queue; > > - > > - cpu_queue = raw_cpu_ptr(queue->cpu_queue); > > - spin_lock(&cpu_queue->q_lock); > > - cpu = smp_processor_id(); > > - rctx->tag.cpu = smp_processor_id(); > > - > > - err = crypto_enqueue_request(&cpu_queue->queue, request); > > - pr_debug("enqueue request: cpu %d cpu_queue %p request %p\n", > > - cpu, cpu_queue, request); > > - spin_unlock(&cpu_queue->q_lock); > > - queue_work_on(cpu, kcrypto_wq, &cpu_queue->work); > > - > > - return err; > > -} > > - > > -/* > > - * Try to opportunisticlly flush the partially completed jobs if > > - * crypto daemon is the only task running. > > - */ > > -static void mcryptd_opportunistic_flush(void) > > -{ > > - struct mcryptd_flush_list *flist; > > - struct mcryptd_alg_cstate *cstate; > > - > > - flist = per_cpu_ptr(mcryptd_flist, smp_processor_id()); > > - while (single_task_running()) { > > - mutex_lock(&flist->lock); > > - cstate = list_first_entry_or_null(&flist->list, > > - struct mcryptd_alg_cstate, flush_list); > > - if (!cstate || !cstate->flusher_engaged) { > > - mutex_unlock(&flist->lock); > > - return; > > - } > > - list_del(&cstate->flush_list); > > - cstate->flusher_engaged = false; > > - mutex_unlock(&flist->lock); > > - cstate->alg_state->flusher(cstate); > > - } > > -} > > - > > -/* > > - * Called in workqueue context, do one real cryption work (via > > - * req->complete) and reschedule itself if there are more work to > > - * do. > > - */ > > -static void mcryptd_queue_worker(struct work_struct *work) > > -{ > > - struct mcryptd_cpu_queue *cpu_queue; > > - struct crypto_async_request *req, *backlog; > > - int i; > > - > > - /* > > - * Need to loop through more than once for multi-buffer to > > - * be effective. > > - */ > > - > > - cpu_queue = container_of(work, struct mcryptd_cpu_queue, work); > > - i = 0; > > - while (i < MCRYPTD_BATCH || single_task_running()) { > > - > > - spin_lock_bh(&cpu_queue->q_lock); > > - backlog = crypto_get_backlog(&cpu_queue->queue); > > - req = crypto_dequeue_request(&cpu_queue->queue); > > - spin_unlock_bh(&cpu_queue->q_lock); > > - > > - if (!req) { > > - mcryptd_opportunistic_flush(); > > - return; > > - } > > - > > - if (backlog) > > - backlog->complete(backlog, -EINPROGRESS); > > - req->complete(req, 0); > > - if (!cpu_queue->queue.qlen) > > - return; > > - ++i; > > - } > > - if (cpu_queue->queue.qlen) > > - queue_work_on(smp_processor_id(), kcrypto_wq, &cpu_queue->work); > > -} > > - > > -void mcryptd_flusher(struct work_struct *__work) > > -{ > > - struct mcryptd_alg_cstate *alg_cpu_state; > > - struct mcryptd_alg_state *alg_state; > > - struct mcryptd_flush_list *flist; > > - int cpu; > > - > > - cpu = smp_processor_id(); > > - alg_cpu_state = container_of(to_delayed_work(__work), > > - struct mcryptd_alg_cstate, flush); > > - alg_state = alg_cpu_state->alg_state; > > - if (alg_cpu_state->cpu != cpu) > > - pr_debug("mcryptd error: work on cpu %d, should be cpu %d\n", > > - cpu, alg_cpu_state->cpu); > > - > > - if (alg_cpu_state->flusher_engaged) { > > - flist = per_cpu_ptr(mcryptd_flist, cpu); > > - mutex_lock(&flist->lock); > > - list_del(&alg_cpu_state->flush_list); > > - alg_cpu_state->flusher_engaged = false; > > - mutex_unlock(&flist->lock); > > - alg_state->flusher(alg_cpu_state); > > - } > > -} > > -EXPORT_SYMBOL_GPL(mcryptd_flusher); > > - > > -static inline struct mcryptd_queue *mcryptd_get_queue(struct crypto_tfm *tfm) > > -{ > > - struct crypto_instance *inst = crypto_tfm_alg_instance(tfm); > > - struct mcryptd_instance_ctx *ictx = crypto_instance_ctx(inst); > > - > > - return ictx->queue; > > -} > > - > > -static void *mcryptd_alloc_instance(struct crypto_alg *alg, unsigned int head, > > - unsigned int tail) > > -{ > > - char *p; > > - struct crypto_instance *inst; > > - int err; > > - > > - p = kzalloc(head + sizeof(*inst) + tail, GFP_KERNEL); > > - if (!p) > > - return ERR_PTR(-ENOMEM); > > - > > - inst = (void *)(p + head); > > - > > - err = -ENAMETOOLONG; > > - if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME, > > - "mcryptd(%s)", alg->cra_driver_name) >= CRYPTO_MAX_ALG_NAME) > > - goto out_free_inst; > > - > > - memcpy(inst->alg.cra_name, alg->cra_name, CRYPTO_MAX_ALG_NAME); > > - > > - inst->alg.cra_priority = alg->cra_priority + 50; > > - inst->alg.cra_blocksize = alg->cra_blocksize; > > - inst->alg.cra_alignmask = alg->cra_alignmask; > > - > > -out: > > - return p; > > - > > -out_free_inst: > > - kfree(p); > > - p = ERR_PTR(err); > > - goto out; > > -} > > - > > -static inline bool mcryptd_check_internal(struct rtattr **tb, u32 *type, > > - u32 *mask) > > -{ > > - struct crypto_attr_type *algt; > > - > > - algt = crypto_get_attr_type(tb); > > - if (IS_ERR(algt)) > > - return false; > > - > > - *type |= algt->type & CRYPTO_ALG_INTERNAL; > > - *mask |= algt->mask & CRYPTO_ALG_INTERNAL; > > - > > - if (*type & *mask & CRYPTO_ALG_INTERNAL) > > - return true; > > - else > > - return false; > > -} > > - > > -static int mcryptd_hash_init_tfm(struct crypto_tfm *tfm) > > -{ > > - struct crypto_instance *inst = crypto_tfm_alg_instance(tfm); > > - struct hashd_instance_ctx *ictx = crypto_instance_ctx(inst); > > - struct crypto_ahash_spawn *spawn = &ictx->spawn; > > - struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(tfm); > > - struct crypto_ahash *hash; > > - > > - hash = crypto_spawn_ahash(spawn); > > - if (IS_ERR(hash)) > > - return PTR_ERR(hash); > > - > > - ctx->child = hash; > > - crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), > > - sizeof(struct mcryptd_hash_request_ctx) + > > - crypto_ahash_reqsize(hash)); > > - return 0; > > -} > > - > > -static void mcryptd_hash_exit_tfm(struct crypto_tfm *tfm) > > -{ > > - struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(tfm); > > - > > - crypto_free_ahash(ctx->child); > > -} > > - > > -static int mcryptd_hash_setkey(struct crypto_ahash *parent, > > - const u8 *key, unsigned int keylen) > > -{ > > - struct mcryptd_hash_ctx *ctx = crypto_ahash_ctx(parent); > > - struct crypto_ahash *child = ctx->child; > > - int err; > > - > > - crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK); > > - crypto_ahash_set_flags(child, crypto_ahash_get_flags(parent) & > > - CRYPTO_TFM_REQ_MASK); > > - err = crypto_ahash_setkey(child, key, keylen); > > - crypto_ahash_set_flags(parent, crypto_ahash_get_flags(child) & > > - CRYPTO_TFM_RES_MASK); > > - return err; > > -} > > - > > -static int mcryptd_hash_enqueue(struct ahash_request *req, > > - crypto_completion_t complete) > > -{ > > - int ret; > > - > > - struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); > > - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); > > - struct mcryptd_queue *queue = > > - mcryptd_get_queue(crypto_ahash_tfm(tfm)); > > - > > - rctx->complete = req->base.complete; > > - req->base.complete = complete; > > - > > - ret = mcryptd_enqueue_request(queue, &req->base, rctx); > > - > > - return ret; > > -} > > - > > -static void mcryptd_hash_init(struct crypto_async_request *req_async, int err) > > -{ > > - struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(req_async->tfm); > > - struct crypto_ahash *child = ctx->child; > > - struct ahash_request *req = ahash_request_cast(req_async); > > - struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); > > - struct ahash_request *desc = &rctx->areq; > > - > > - if (unlikely(err == -EINPROGRESS)) > > - goto out; > > - > > - ahash_request_set_tfm(desc, child); > > - ahash_request_set_callback(desc, CRYPTO_TFM_REQ_MAY_SLEEP, > > - rctx->complete, req_async); > > - > > - rctx->out = req->result; > > - err = crypto_ahash_init(desc); > > - > > -out: > > - local_bh_disable(); > > - rctx->complete(&req->base, err); > > - local_bh_enable(); > > -} > > - > > -static int mcryptd_hash_init_enqueue(struct ahash_request *req) > > -{ > > - return mcryptd_hash_enqueue(req, mcryptd_hash_init); > > -} > > - > > -static void mcryptd_hash_update(struct crypto_async_request *req_async, int err) > > -{ > > - struct ahash_request *req = ahash_request_cast(req_async); > > - struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); > > - > > - if (unlikely(err == -EINPROGRESS)) > > - goto out; > > - > > - rctx->out = req->result; > > - err = crypto_ahash_update(&rctx->areq); > > - if (err) { > > - req->base.complete = rctx->complete; > > - goto out; > > - } > > - > > - return; > > -out: > > - local_bh_disable(); > > - rctx->complete(&req->base, err); > > - local_bh_enable(); > > -} > > - > > -static int mcryptd_hash_update_enqueue(struct ahash_request *req) > > -{ > > - return mcryptd_hash_enqueue(req, mcryptd_hash_update); > > -} > > - > > -static void mcryptd_hash_final(struct crypto_async_request *req_async, int err) > > -{ > > - struct ahash_request *req = ahash_request_cast(req_async); > > - struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); > > - > > - if (unlikely(err == -EINPROGRESS)) > > - goto out; > > - > > - rctx->out = req->result; > > - err = crypto_ahash_final(&rctx->areq); > > - if (err) { > > - req->base.complete = rctx->complete; > > - goto out; > > - } > > - > > - return; > > -out: > > - local_bh_disable(); > > - rctx->complete(&req->base, err); > > - local_bh_enable(); > > -} > > - > > -static int mcryptd_hash_final_enqueue(struct ahash_request *req) > > -{ > > - return mcryptd_hash_enqueue(req, mcryptd_hash_final); > > -} > > - > > -static void mcryptd_hash_finup(struct crypto_async_request *req_async, int err) > > -{ > > - struct ahash_request *req = ahash_request_cast(req_async); > > - struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); > > - > > - if (unlikely(err == -EINPROGRESS)) > > - goto out; > > - rctx->out = req->result; > > - err = crypto_ahash_finup(&rctx->areq); > > - > > - if (err) { > > - req->base.complete = rctx->complete; > > - goto out; > > - } > > - > > - return; > > -out: > > - local_bh_disable(); > > - rctx->complete(&req->base, err); > > - local_bh_enable(); > > -} > > - > > -static int mcryptd_hash_finup_enqueue(struct ahash_request *req) > > -{ > > - return mcryptd_hash_enqueue(req, mcryptd_hash_finup); > > -} > > - > > -static void mcryptd_hash_digest(struct crypto_async_request *req_async, int err) > > -{ > > - struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(req_async->tfm); > > - struct crypto_ahash *child = ctx->child; > > - struct ahash_request *req = ahash_request_cast(req_async); > > - struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); > > - struct ahash_request *desc = &rctx->areq; > > - > > - if (unlikely(err == -EINPROGRESS)) > > - goto out; > > - > > - ahash_request_set_tfm(desc, child); > > - ahash_request_set_callback(desc, CRYPTO_TFM_REQ_MAY_SLEEP, > > - rctx->complete, req_async); > > - > > - rctx->out = req->result; > > - err = crypto_ahash_init(desc) ?: crypto_ahash_finup(desc); > > - > > -out: > > - local_bh_disable(); > > - rctx->complete(&req->base, err); > > - local_bh_enable(); > > -} > > - > > -static int mcryptd_hash_digest_enqueue(struct ahash_request *req) > > -{ > > - return mcryptd_hash_enqueue(req, mcryptd_hash_digest); > > -} > > - > > -static int mcryptd_hash_export(struct ahash_request *req, void *out) > > -{ > > - struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); > > - > > - return crypto_ahash_export(&rctx->areq, out); > > -} > > - > > -static int mcryptd_hash_import(struct ahash_request *req, const void *in) > > -{ > > - struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); > > - > > - return crypto_ahash_import(&rctx->areq, in); > > -} > > - > > -static int mcryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb, > > - struct mcryptd_queue *queue) > > -{ > > - struct hashd_instance_ctx *ctx; > > - struct ahash_instance *inst; > > - struct hash_alg_common *halg; > > - struct crypto_alg *alg; > > - u32 type = 0; > > - u32 mask = 0; > > - int err; > > - > > - if (!mcryptd_check_internal(tb, &type, &mask)) > > - return -EINVAL; > > - > > - halg = ahash_attr_alg(tb[1], type, mask); > > - if (IS_ERR(halg)) > > - return PTR_ERR(halg); > > - > > - alg = &halg->base; > > - pr_debug("crypto: mcryptd hash alg: %s\n", alg->cra_name); > > - inst = mcryptd_alloc_instance(alg, ahash_instance_headroom(), > > - sizeof(*ctx)); > > - err = PTR_ERR(inst); > > - if (IS_ERR(inst)) > > - goto out_put_alg; > > - > > - ctx = ahash_instance_ctx(inst); > > - ctx->queue = queue; > > - > > - err = crypto_init_ahash_spawn(&ctx->spawn, halg, > > - ahash_crypto_instance(inst)); > > - if (err) > > - goto out_free_inst; > > - > > - inst->alg.halg.base.cra_flags = CRYPTO_ALG_ASYNC | > > - (alg->cra_flags & (CRYPTO_ALG_INTERNAL | > > - CRYPTO_ALG_OPTIONAL_KEY)); > > - > > - inst->alg.halg.digestsize = halg->digestsize; > > - inst->alg.halg.statesize = halg->statesize; > > - inst->alg.halg.base.cra_ctxsize = sizeof(struct mcryptd_hash_ctx); > > - > > - inst->alg.halg.base.cra_init = mcryptd_hash_init_tfm; > > - inst->alg.halg.base.cra_exit = mcryptd_hash_exit_tfm; > > - > > - inst->alg.init = mcryptd_hash_init_enqueue; > > - inst->alg.update = mcryptd_hash_update_enqueue; > > - inst->alg.final = mcryptd_hash_final_enqueue; > > - inst->alg.finup = mcryptd_hash_finup_enqueue; > > - inst->alg.export = mcryptd_hash_export; > > - inst->alg.import = mcryptd_hash_import; > > - if (crypto_hash_alg_has_setkey(halg)) > > - inst->alg.setkey = mcryptd_hash_setkey; > > - inst->alg.digest = mcryptd_hash_digest_enqueue; > > - > > - err = ahash_register_instance(tmpl, inst); > > - if (err) { > > - crypto_drop_ahash(&ctx->spawn); > > -out_free_inst: > > - kfree(inst); > > - } > > - > > -out_put_alg: > > - crypto_mod_put(alg); > > - return err; > > -} > > - > > -static struct mcryptd_queue mqueue; > > - > > -static int mcryptd_create(struct crypto_template *tmpl, struct rtattr **tb) > > -{ > > - struct crypto_attr_type *algt; > > - > > - algt = crypto_get_attr_type(tb); > > - if (IS_ERR(algt)) > > - return PTR_ERR(algt); > > - > > - switch (algt->type & algt->mask & CRYPTO_ALG_TYPE_MASK) { > > - case CRYPTO_ALG_TYPE_DIGEST: > > - return mcryptd_create_hash(tmpl, tb, &mqueue); > > - break; > > - } > > - > > - return -EINVAL; > > -} > > - > > -static void mcryptd_free(struct crypto_instance *inst) > > -{ > > - struct mcryptd_instance_ctx *ctx = crypto_instance_ctx(inst); > > - struct hashd_instance_ctx *hctx = crypto_instance_ctx(inst); > > - > > - switch (inst->alg.cra_flags & CRYPTO_ALG_TYPE_MASK) { > > - case CRYPTO_ALG_TYPE_AHASH: > > - crypto_drop_ahash(&hctx->spawn); > > - kfree(ahash_instance(inst)); > > - return; > > - default: > > - crypto_drop_spawn(&ctx->spawn); > > - kfree(inst); > > - } > > -} > > - > > -static struct crypto_template mcryptd_tmpl = { > > - .name = "mcryptd", > > - .create = mcryptd_create, > > - .free = mcryptd_free, > > - .module = THIS_MODULE, > > -}; > > - > > -struct mcryptd_ahash *mcryptd_alloc_ahash(const char *alg_name, > > - u32 type, u32 mask) > > -{ > > - char mcryptd_alg_name[CRYPTO_MAX_ALG_NAME]; > > - struct crypto_ahash *tfm; > > - > > - if (snprintf(mcryptd_alg_name, CRYPTO_MAX_ALG_NAME, > > - "mcryptd(%s)", alg_name) >= CRYPTO_MAX_ALG_NAME) > > - return ERR_PTR(-EINVAL); > > - tfm = crypto_alloc_ahash(mcryptd_alg_name, type, mask); > > - if (IS_ERR(tfm)) > > - return ERR_CAST(tfm); > > - if (tfm->base.__crt_alg->cra_module != THIS_MODULE) { > > - crypto_free_ahash(tfm); > > - return ERR_PTR(-EINVAL); > > - } > > - > > - return __mcryptd_ahash_cast(tfm); > > -} > > -EXPORT_SYMBOL_GPL(mcryptd_alloc_ahash); > > - > > -struct crypto_ahash *mcryptd_ahash_child(struct mcryptd_ahash *tfm) > > -{ > > - struct mcryptd_hash_ctx *ctx = crypto_ahash_ctx(&tfm->base); > > - > > - return ctx->child; > > -} > > -EXPORT_SYMBOL_GPL(mcryptd_ahash_child); > > - > > -struct ahash_request *mcryptd_ahash_desc(struct ahash_request *req) > > -{ > > - struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); > > - return &rctx->areq; > > -} > > -EXPORT_SYMBOL_GPL(mcryptd_ahash_desc); > > - > > -void mcryptd_free_ahash(struct mcryptd_ahash *tfm) > > -{ > > - crypto_free_ahash(&tfm->base); > > -} > > -EXPORT_SYMBOL_GPL(mcryptd_free_ahash); > > - > > -static int __init mcryptd_init(void) > > -{ > > - int err, cpu; > > - struct mcryptd_flush_list *flist; > > - > > - mcryptd_flist = alloc_percpu(struct mcryptd_flush_list); > > - for_each_possible_cpu(cpu) { > > - flist = per_cpu_ptr(mcryptd_flist, cpu); > > - INIT_LIST_HEAD(&flist->list); > > - mutex_init(&flist->lock); > > - } > > - > > - err = mcryptd_init_queue(&mqueue, MCRYPTD_MAX_CPU_QLEN); > > - if (err) { > > - free_percpu(mcryptd_flist); > > - return err; > > - } > > - > > - err = crypto_register_template(&mcryptd_tmpl); > > - if (err) { > > - mcryptd_fini_queue(&mqueue); > > - free_percpu(mcryptd_flist); > > - } > > - > > - return err; > > -} > > - > > -static void __exit mcryptd_exit(void) > > -{ > > - mcryptd_fini_queue(&mqueue); > > - crypto_unregister_template(&mcryptd_tmpl); > > - free_percpu(mcryptd_flist); > > -} > > - > > -subsys_initcall(mcryptd_init); > > -module_exit(mcryptd_exit); > > - > > -MODULE_LICENSE("GPL"); > > -MODULE_DESCRIPTION("Software async multibuffer crypto daemon"); > > -MODULE_ALIAS_CRYPTO("mcryptd"); > > diff --git a/include/crypto/mcryptd.h b/include/crypto/mcryptd.h > > deleted file mode 100644 > > index b67404fc4b34..000000000000 > > --- a/include/crypto/mcryptd.h > > +++ /dev/null > > @@ -1,114 +0,0 @@ > > -/* SPDX-License-Identifier: GPL-2.0 */ > > -/* > > - * Software async multibuffer crypto daemon headers > > - * > > - * Author: > > - * Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx> > > - * > > - * Copyright (c) 2014, Intel Corporation. > > - */ > > - > > -#ifndef _CRYPTO_MCRYPT_H > > -#define _CRYPTO_MCRYPT_H > > - > > -#include <linux/crypto.h> > > -#include <linux/kernel.h> > > -#include <crypto/hash.h> > > - > > -struct mcryptd_ahash { > > - struct crypto_ahash base; > > -}; > > - > > -static inline struct mcryptd_ahash *__mcryptd_ahash_cast( > > - struct crypto_ahash *tfm) > > -{ > > - return (struct mcryptd_ahash *)tfm; > > -} > > - > > -struct mcryptd_cpu_queue { > > - struct crypto_queue queue; > > - spinlock_t q_lock; > > - struct work_struct work; > > -}; > > - > > -struct mcryptd_queue { > > - struct mcryptd_cpu_queue __percpu *cpu_queue; > > -}; > > - > > -struct mcryptd_instance_ctx { > > - struct crypto_spawn spawn; > > - struct mcryptd_queue *queue; > > -}; > > - > > -struct mcryptd_hash_ctx { > > - struct crypto_ahash *child; > > - struct mcryptd_alg_state *alg_state; > > -}; > > - > > -struct mcryptd_tag { > > - /* seq number of request */ > > - unsigned seq_num; > > - /* arrival time of request */ > > - unsigned long arrival; > > - unsigned long expire; > > - int cpu; > > -}; > > - > > -struct mcryptd_hash_request_ctx { > > - struct list_head waiter; > > - crypto_completion_t complete; > > - struct mcryptd_tag tag; > > - struct crypto_hash_walk walk; > > - u8 *out; > > - int flag; > > - struct ahash_request areq; > > -}; > > - > > -struct mcryptd_ahash *mcryptd_alloc_ahash(const char *alg_name, > > - u32 type, u32 mask); > > -struct crypto_ahash *mcryptd_ahash_child(struct mcryptd_ahash *tfm); > > -struct ahash_request *mcryptd_ahash_desc(struct ahash_request *req); > > -void mcryptd_free_ahash(struct mcryptd_ahash *tfm); > > -void mcryptd_flusher(struct work_struct *work); > > - > > -enum mcryptd_req_type { > > - MCRYPTD_NONE, > > - MCRYPTD_UPDATE, > > - MCRYPTD_FINUP, > > - MCRYPTD_DIGEST, > > - MCRYPTD_FINAL > > -}; > > - > > -struct mcryptd_alg_cstate { > > - unsigned long next_flush; > > - unsigned next_seq_num; > > - bool flusher_engaged; > > - struct delayed_work flush; > > - int cpu; > > - struct mcryptd_alg_state *alg_state; > > - void *mgr; > > - spinlock_t work_lock; > > - struct list_head work_list; > > - struct list_head flush_list; > > -}; > > - > > -struct mcryptd_alg_state { > > - struct mcryptd_alg_cstate __percpu *alg_cstate; > > - unsigned long (*flusher)(struct mcryptd_alg_cstate *cstate); > > -}; > > - > > -/* return delay in jiffies from current time */ > > -static inline unsigned long get_delay(unsigned long t) > > -{ > > - long delay; > > - > > - delay = (long) t - (long) jiffies; > > - if (delay <= 0) > > - return 0; > > - else > > - return (unsigned long) delay; > > -} > > - > > -void mcryptd_arm_flusher(struct mcryptd_alg_cstate *cstate, unsigned long delay); > > - > > -#endif > > >