On 08/22/2018 01:51 AM, Ard Biesheuvel wrote: > As it turns out, the AVX2 multibuffer SHA routines are currently > broken [0], in a way that would have likely been noticed if this > code were in wide use. Since the code is too complicated to be > maintained by anyone except the original authors, and since the > performance benefits for real-world use cases are debatable to > begin with, it is better to drop it entirely for the moment. > > [0] https://marc.info/?l=linux-crypto-vger&m=153476243825350&w=2 Sorry I was out of the loop for a while and haven't been following the code too closely. Megha is maintaining the code now. Before we pull the code, please give us a chance to fix it first. Thanks. Tim > > Suggested-by: Eric Biggers <ebiggers@xxxxxxxxxx> > Cc: Megha Dey <megha.dey@xxxxxxxxxxxxxxx> > Cc: Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx> > Cc: Geert Uytterhoeven <geert@xxxxxxxxxxxxxx> > Cc: Martin Schwidefsky <schwidefsky@xxxxxxxxxx> > Cc: Heiko Carstens <heiko.carstens@xxxxxxxxxx> > Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> > Cc: Ingo Molnar <mingo@xxxxxxxxxx> > Signed-off-by: Ard Biesheuvel <ard.biesheuvel@xxxxxxxxxx> > --- > MAINTAINERS | 8 - > arch/m68k/configs/amiga_defconfig | 1 - > arch/m68k/configs/apollo_defconfig | 1 - > arch/m68k/configs/atari_defconfig | 1 - > arch/m68k/configs/bvme6000_defconfig | 1 - > arch/m68k/configs/hp300_defconfig | 1 - > arch/m68k/configs/mac_defconfig | 1 - > arch/m68k/configs/multi_defconfig | 1 - > arch/m68k/configs/mvme147_defconfig | 1 - > arch/m68k/configs/mvme16x_defconfig | 1 - > arch/m68k/configs/q40_defconfig | 1 - > arch/m68k/configs/sun3_defconfig | 1 - > arch/m68k/configs/sun3x_defconfig | 1 - > arch/s390/configs/debug_defconfig | 1 - > arch/s390/configs/performance_defconfig | 1 - > arch/x86/crypto/Makefile | 3 - > arch/x86/crypto/sha1-mb/Makefile | 14 - > arch/x86/crypto/sha1-mb/sha1_mb.c | 1011 ---------------- > arch/x86/crypto/sha1-mb/sha1_mb_ctx.h | 134 --- > arch/x86/crypto/sha1-mb/sha1_mb_mgr.h | 110 -- > .../crypto/sha1-mb/sha1_mb_mgr_datastruct.S | 287 ----- > .../crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S | 304 ----- > .../crypto/sha1-mb/sha1_mb_mgr_init_avx2.c | 64 - > .../crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S | 209 ---- > arch/x86/crypto/sha1-mb/sha1_x8_avx2.S | 492 -------- > arch/x86/crypto/sha256-mb/Makefile | 14 - > arch/x86/crypto/sha256-mb/sha256_mb.c | 1013 ---------------- > arch/x86/crypto/sha256-mb/sha256_mb_ctx.h | 134 --- > arch/x86/crypto/sha256-mb/sha256_mb_mgr.h | 108 -- > .../sha256-mb/sha256_mb_mgr_datastruct.S | 304 ----- > .../sha256-mb/sha256_mb_mgr_flush_avx2.S | 307 ----- > .../sha256-mb/sha256_mb_mgr_init_avx2.c | 65 - > .../sha256-mb/sha256_mb_mgr_submit_avx2.S | 214 ---- > arch/x86/crypto/sha256-mb/sha256_x8_avx2.S | 598 ---------- > arch/x86/crypto/sha512-mb/Makefile | 12 - > arch/x86/crypto/sha512-mb/sha512_mb.c | 1047 ----------------- > arch/x86/crypto/sha512-mb/sha512_mb_ctx.h | 128 -- > arch/x86/crypto/sha512-mb/sha512_mb_mgr.h | 104 -- > .../sha512-mb/sha512_mb_mgr_datastruct.S | 281 ----- > .../sha512-mb/sha512_mb_mgr_flush_avx2.S | 297 ----- > .../sha512-mb/sha512_mb_mgr_init_avx2.c | 69 -- > .../sha512-mb/sha512_mb_mgr_submit_avx2.S | 224 ---- > arch/x86/crypto/sha512-mb/sha512_x4_avx2.S | 531 --------- > crypto/Kconfig | 62 - > crypto/Makefile | 1 - > crypto/mcryptd.c | 675 ----------- > include/crypto/mcryptd.h | 114 -- > 47 files changed, 8952 deletions(-) > delete mode 100644 arch/x86/crypto/sha1-mb/Makefile > delete mode 100644 arch/x86/crypto/sha1-mb/sha1_mb.c > delete mode 100644 arch/x86/crypto/sha1-mb/sha1_mb_ctx.h > delete mode 100644 arch/x86/crypto/sha1-mb/sha1_mb_mgr.h > delete mode 100644 arch/x86/crypto/sha1-mb/sha1_mb_mgr_datastruct.S > delete mode 100644 arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S > delete mode 100644 arch/x86/crypto/sha1-mb/sha1_mb_mgr_init_avx2.c > delete mode 100644 arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S > delete mode 100644 arch/x86/crypto/sha1-mb/sha1_x8_avx2.S > delete mode 100644 arch/x86/crypto/sha256-mb/Makefile > delete mode 100644 arch/x86/crypto/sha256-mb/sha256_mb.c > delete mode 100644 arch/x86/crypto/sha256-mb/sha256_mb_ctx.h > delete mode 100644 arch/x86/crypto/sha256-mb/sha256_mb_mgr.h > delete mode 100644 arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S > delete mode 100644 arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S > delete mode 100644 arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c > delete mode 100644 arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S > delete mode 100644 arch/x86/crypto/sha256-mb/sha256_x8_avx2.S > delete mode 100644 arch/x86/crypto/sha512-mb/Makefile > delete mode 100644 arch/x86/crypto/sha512-mb/sha512_mb.c > delete mode 100644 arch/x86/crypto/sha512-mb/sha512_mb_ctx.h > delete mode 100644 arch/x86/crypto/sha512-mb/sha512_mb_mgr.h > delete mode 100644 arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S > delete mode 100644 arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S > delete mode 100644 arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c > delete mode 100644 arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S > delete mode 100644 arch/x86/crypto/sha512-mb/sha512_x4_avx2.S > delete mode 100644 crypto/mcryptd.c > delete mode 100644 include/crypto/mcryptd.h > > diff --git a/MAINTAINERS b/MAINTAINERS > index 24b200d91b30..05747b8ac88e 100644 > --- a/MAINTAINERS > +++ b/MAINTAINERS > @@ -7487,14 +7487,6 @@ S: Supported > F: drivers/infiniband/hw/i40iw/ > F: include/uapi/rdma/i40iw-abi.h > > -INTEL SHA MULTIBUFFER DRIVER > -M: Megha Dey <megha.dey@xxxxxxxxxxxxxxx> > -R: Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx> > -L: linux-crypto@xxxxxxxxxxxxxxx > -S: Supported > -F: arch/x86/crypto/sha*-mb/ > -F: crypto/mcryptd.c > - > INTEL TELEMETRY DRIVER > M: Souvik Kumar Chakravarty <souvik.k.chakravarty@xxxxxxxxx> > L: platform-driver-x86@xxxxxxxxxxxxxxx > diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig > index 1d5483f6e457..70b10d712624 100644 > --- a/arch/m68k/configs/amiga_defconfig > +++ b/arch/m68k/configs/amiga_defconfig > @@ -621,7 +621,6 @@ CONFIG_CRYPTO_ECDH=m > CONFIG_CRYPTO_MANAGER=y > CONFIG_CRYPTO_USER=m > CONFIG_CRYPTO_CRYPTD=m > -CONFIG_CRYPTO_MCRYPTD=m > CONFIG_CRYPTO_TEST=m > CONFIG_CRYPTO_CHACHA20POLY1305=m > CONFIG_CRYPTO_AEGIS128=m > diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig > index 52a0af127951..211eec5859e8 100644 > --- a/arch/m68k/configs/apollo_defconfig > +++ b/arch/m68k/configs/apollo_defconfig > @@ -578,7 +578,6 @@ CONFIG_CRYPTO_ECDH=m > CONFIG_CRYPTO_MANAGER=y > CONFIG_CRYPTO_USER=m > CONFIG_CRYPTO_CRYPTD=m > -CONFIG_CRYPTO_MCRYPTD=m > CONFIG_CRYPTO_TEST=m > CONFIG_CRYPTO_CHACHA20POLY1305=m > CONFIG_CRYPTO_AEGIS128=m > diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig > index b3103e51268a..0da45c6084f7 100644 > --- a/arch/m68k/configs/atari_defconfig > +++ b/arch/m68k/configs/atari_defconfig > @@ -599,7 +599,6 @@ CONFIG_CRYPTO_ECDH=m > CONFIG_CRYPTO_MANAGER=y > CONFIG_CRYPTO_USER=m > CONFIG_CRYPTO_CRYPTD=m > -CONFIG_CRYPTO_MCRYPTD=m > CONFIG_CRYPTO_TEST=m > CONFIG_CRYPTO_CHACHA20POLY1305=m > CONFIG_CRYPTO_AEGIS128=m > diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig > index fb7d651a4cab..c09ae7219416 100644 > --- a/arch/m68k/configs/bvme6000_defconfig > +++ b/arch/m68k/configs/bvme6000_defconfig > @@ -570,7 +570,6 @@ CONFIG_CRYPTO_ECDH=m > CONFIG_CRYPTO_MANAGER=y > CONFIG_CRYPTO_USER=m > CONFIG_CRYPTO_CRYPTD=m > -CONFIG_CRYPTO_MCRYPTD=m > CONFIG_CRYPTO_TEST=m > CONFIG_CRYPTO_CHACHA20POLY1305=m > CONFIG_CRYPTO_AEGIS128=m > diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig > index 6b37f5537c39..8c4775b30748 100644 > --- a/arch/m68k/configs/hp300_defconfig > +++ b/arch/m68k/configs/hp300_defconfig > @@ -580,7 +580,6 @@ CONFIG_CRYPTO_ECDH=m > CONFIG_CRYPTO_MANAGER=y > CONFIG_CRYPTO_USER=m > CONFIG_CRYPTO_CRYPTD=m > -CONFIG_CRYPTO_MCRYPTD=m > CONFIG_CRYPTO_TEST=m > CONFIG_CRYPTO_CHACHA20POLY1305=m > CONFIG_CRYPTO_AEGIS128=m > diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig > index c717bf879449..48ad520e2f2d 100644 > --- a/arch/m68k/configs/mac_defconfig > +++ b/arch/m68k/configs/mac_defconfig > @@ -602,7 +602,6 @@ CONFIG_CRYPTO_ECDH=m > CONFIG_CRYPTO_MANAGER=y > CONFIG_CRYPTO_USER=m > CONFIG_CRYPTO_CRYPTD=m > -CONFIG_CRYPTO_MCRYPTD=m > CONFIG_CRYPTO_TEST=m > CONFIG_CRYPTO_CHACHA20POLY1305=m > CONFIG_CRYPTO_AEGIS128=m > diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig > index 226c994ce794..3a3cccb9f625 100644 > --- a/arch/m68k/configs/multi_defconfig > +++ b/arch/m68k/configs/multi_defconfig > @@ -684,7 +684,6 @@ CONFIG_CRYPTO_ECDH=m > CONFIG_CRYPTO_MANAGER=y > CONFIG_CRYPTO_USER=m > CONFIG_CRYPTO_CRYPTD=m > -CONFIG_CRYPTO_MCRYPTD=m > CONFIG_CRYPTO_TEST=m > CONFIG_CRYPTO_CHACHA20POLY1305=m > CONFIG_CRYPTO_AEGIS128=m > diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig > index b383327fd77a..63dc311f94ff 100644 > --- a/arch/m68k/configs/mvme147_defconfig > +++ b/arch/m68k/configs/mvme147_defconfig > @@ -570,7 +570,6 @@ CONFIG_CRYPTO_ECDH=m > CONFIG_CRYPTO_MANAGER=y > CONFIG_CRYPTO_USER=m > CONFIG_CRYPTO_CRYPTD=m > -CONFIG_CRYPTO_MCRYPTD=m > CONFIG_CRYPTO_TEST=m > CONFIG_CRYPTO_CHACHA20POLY1305=m > CONFIG_CRYPTO_AEGIS128=m > diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig > index 9783d3deb9e9..1ae39d1f9bb5 100644 > --- a/arch/m68k/configs/mvme16x_defconfig > +++ b/arch/m68k/configs/mvme16x_defconfig > @@ -570,7 +570,6 @@ CONFIG_CRYPTO_ECDH=m > CONFIG_CRYPTO_MANAGER=y > CONFIG_CRYPTO_USER=m > CONFIG_CRYPTO_CRYPTD=m > -CONFIG_CRYPTO_MCRYPTD=m > CONFIG_CRYPTO_TEST=m > CONFIG_CRYPTO_CHACHA20POLY1305=m > CONFIG_CRYPTO_AEGIS128=m > diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig > index a35d10ee10cb..ba2f351811da 100644 > --- a/arch/m68k/configs/q40_defconfig > +++ b/arch/m68k/configs/q40_defconfig > @@ -593,7 +593,6 @@ CONFIG_CRYPTO_ECDH=m > CONFIG_CRYPTO_MANAGER=y > CONFIG_CRYPTO_USER=m > CONFIG_CRYPTO_CRYPTD=m > -CONFIG_CRYPTO_MCRYPTD=m > CONFIG_CRYPTO_TEST=m > CONFIG_CRYPTO_CHACHA20POLY1305=m > CONFIG_CRYPTO_AEGIS128=m > diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig > index 573bf922d448..544b7475ff6a 100644 > --- a/arch/m68k/configs/sun3_defconfig > +++ b/arch/m68k/configs/sun3_defconfig > @@ -571,7 +571,6 @@ CONFIG_CRYPTO_ECDH=m > CONFIG_CRYPTO_MANAGER=y > CONFIG_CRYPTO_USER=m > CONFIG_CRYPTO_CRYPTD=m > -CONFIG_CRYPTO_MCRYPTD=m > CONFIG_CRYPTO_TEST=m > CONFIG_CRYPTO_CHACHA20POLY1305=m > CONFIG_CRYPTO_AEGIS128=m > diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig > index efb27a7fcc55..149edafbb9f9 100644 > --- a/arch/m68k/configs/sun3x_defconfig > +++ b/arch/m68k/configs/sun3x_defconfig > @@ -572,7 +572,6 @@ CONFIG_CRYPTO_ECDH=m > CONFIG_CRYPTO_MANAGER=y > CONFIG_CRYPTO_USER=m > CONFIG_CRYPTO_CRYPTD=m > -CONFIG_CRYPTO_MCRYPTD=m > CONFIG_CRYPTO_TEST=m > CONFIG_CRYPTO_CHACHA20POLY1305=m > CONFIG_CRYPTO_AEGIS128=m > diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig > index 941d8cc6c9f5..259d1698ac50 100644 > --- a/arch/s390/configs/debug_defconfig > +++ b/arch/s390/configs/debug_defconfig > @@ -668,7 +668,6 @@ CONFIG_CRYPTO_USER=m > # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set > CONFIG_CRYPTO_PCRYPT=m > CONFIG_CRYPTO_CRYPTD=m > -CONFIG_CRYPTO_MCRYPTD=m > CONFIG_CRYPTO_TEST=m > CONFIG_CRYPTO_CHACHA20POLY1305=m > CONFIG_CRYPTO_LRW=m > diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig > index eb6f75f24208..37fd60c20e22 100644 > --- a/arch/s390/configs/performance_defconfig > +++ b/arch/s390/configs/performance_defconfig > @@ -610,7 +610,6 @@ CONFIG_CRYPTO_USER=m > # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set > CONFIG_CRYPTO_PCRYPT=m > CONFIG_CRYPTO_CRYPTD=m > -CONFIG_CRYPTO_MCRYPTD=m > CONFIG_CRYPTO_TEST=m > CONFIG_CRYPTO_CHACHA20POLY1305=m > CONFIG_CRYPTO_LRW=m > diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile > index a450ad573dcb..9edfa5469f9f 100644 > --- a/arch/x86/crypto/Makefile > +++ b/arch/x86/crypto/Makefile > @@ -60,9 +60,6 @@ endif > ifeq ($(avx2_supported),yes) > obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o > obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o > - obj-$(CONFIG_CRYPTO_SHA1_MB) += sha1-mb/ > - obj-$(CONFIG_CRYPTO_SHA256_MB) += sha256-mb/ > - obj-$(CONFIG_CRYPTO_SHA512_MB) += sha512-mb/ > > obj-$(CONFIG_CRYPTO_MORUS1280_AVX2) += morus1280-avx2.o > endif > diff --git a/arch/x86/crypto/sha1-mb/Makefile b/arch/x86/crypto/sha1-mb/Makefile > deleted file mode 100644 > index 815ded3ba90e..000000000000 > --- a/arch/x86/crypto/sha1-mb/Makefile > +++ /dev/null > @@ -1,14 +0,0 @@ > -# SPDX-License-Identifier: GPL-2.0 > -# > -# Arch-specific CryptoAPI modules. > -# > - > -OBJECT_FILES_NON_STANDARD := y > - > -avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\ > - $(comma)4)$(comma)%ymm2,yes,no) > -ifeq ($(avx2_supported),yes) > - obj-$(CONFIG_CRYPTO_SHA1_MB) += sha1-mb.o > - sha1-mb-y := sha1_mb.o sha1_mb_mgr_flush_avx2.o \ > - sha1_mb_mgr_init_avx2.o sha1_mb_mgr_submit_avx2.o sha1_x8_avx2.o > -endif > diff --git a/arch/x86/crypto/sha1-mb/sha1_mb.c b/arch/x86/crypto/sha1-mb/sha1_mb.c > deleted file mode 100644 > index b93805664c1d..000000000000 > --- a/arch/x86/crypto/sha1-mb/sha1_mb.c > +++ /dev/null > @@ -1,1011 +0,0 @@ > -/* > - * Multi buffer SHA1 algorithm Glue Code > - * > - * This file is provided under a dual BSD/GPLv2 license. When using or > - * redistributing this file, you may do so under either license. > - * > - * GPL LICENSE SUMMARY > - * > - * Copyright(c) 2014 Intel Corporation. > - * > - * This program is free software; you can redistribute it and/or modify > - * it under the terms of version 2 of the GNU General Public License as > - * published by the Free Software Foundation. > - * > - * This program is distributed in the hope that it will be useful, but > - * WITHOUT ANY WARRANTY; without even the implied warranty of > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > - * General Public License for more details. > - * > - * Contact Information: > - * Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx> > - * > - * BSD LICENSE > - * > - * Copyright(c) 2014 Intel Corporation. > - * > - * Redistribution and use in source and binary forms, with or without > - * modification, are permitted provided that the following conditions > - * are met: > - * > - * * Redistributions of source code must retain the above copyright > - * notice, this list of conditions and the following disclaimer. > - * * Redistributions in binary form must reproduce the above copyright > - * notice, this list of conditions and the following disclaimer in > - * the documentation and/or other materials provided with the > - * distribution. > - * * Neither the name of Intel Corporation nor the names of its > - * contributors may be used to endorse or promote products derived > - * from this software without specific prior written permission. > - * > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > - */ > - > -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt > - > -#include <crypto/internal/hash.h> > -#include <linux/init.h> > -#include <linux/module.h> > -#include <linux/mm.h> > -#include <linux/cryptohash.h> > -#include <linux/types.h> > -#include <linux/list.h> > -#include <crypto/scatterwalk.h> > -#include <crypto/sha.h> > -#include <crypto/mcryptd.h> > -#include <crypto/crypto_wq.h> > -#include <asm/byteorder.h> > -#include <linux/hardirq.h> > -#include <asm/fpu/api.h> > -#include "sha1_mb_ctx.h" > - > -#define FLUSH_INTERVAL 1000 /* in usec */ > - > -static struct mcryptd_alg_state sha1_mb_alg_state; > - > -struct sha1_mb_ctx { > - struct mcryptd_ahash *mcryptd_tfm; > -}; > - > -static inline struct mcryptd_hash_request_ctx > - *cast_hash_to_mcryptd_ctx(struct sha1_hash_ctx *hash_ctx) > -{ > - struct ahash_request *areq; > - > - areq = container_of((void *) hash_ctx, struct ahash_request, __ctx); > - return container_of(areq, struct mcryptd_hash_request_ctx, areq); > -} > - > -static inline struct ahash_request > - *cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx) > -{ > - return container_of((void *) ctx, struct ahash_request, __ctx); > -} > - > -static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx, > - struct ahash_request *areq) > -{ > - rctx->flag = HASH_UPDATE; > -} > - > -static asmlinkage void (*sha1_job_mgr_init)(struct sha1_mb_mgr *state); > -static asmlinkage struct job_sha1* (*sha1_job_mgr_submit) > - (struct sha1_mb_mgr *state, struct job_sha1 *job); > -static asmlinkage struct job_sha1* (*sha1_job_mgr_flush) > - (struct sha1_mb_mgr *state); > -static asmlinkage struct job_sha1* (*sha1_job_mgr_get_comp_job) > - (struct sha1_mb_mgr *state); > - > -static inline uint32_t sha1_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2], > - uint64_t total_len) > -{ > - uint32_t i = total_len & (SHA1_BLOCK_SIZE - 1); > - > - memset(&padblock[i], 0, SHA1_BLOCK_SIZE); > - padblock[i] = 0x80; > - > - i += ((SHA1_BLOCK_SIZE - 1) & > - (0 - (total_len + SHA1_PADLENGTHFIELD_SIZE + 1))) > - + 1 + SHA1_PADLENGTHFIELD_SIZE; > - > -#if SHA1_PADLENGTHFIELD_SIZE == 16 > - *((uint64_t *) &padblock[i - 16]) = 0; > -#endif > - > - *((uint64_t *) &padblock[i - 8]) = cpu_to_be64(total_len << 3); > - > - /* Number of extra blocks to hash */ > - return i >> SHA1_LOG2_BLOCK_SIZE; > -} > - > -static struct sha1_hash_ctx *sha1_ctx_mgr_resubmit(struct sha1_ctx_mgr *mgr, > - struct sha1_hash_ctx *ctx) > -{ > - while (ctx) { > - if (ctx->status & HASH_CTX_STS_COMPLETE) { > - /* Clear PROCESSING bit */ > - ctx->status = HASH_CTX_STS_COMPLETE; > - return ctx; > - } > - > - /* > - * If the extra blocks are empty, begin hashing what remains > - * in the user's buffer. > - */ > - if (ctx->partial_block_buffer_length == 0 && > - ctx->incoming_buffer_length) { > - > - const void *buffer = ctx->incoming_buffer; > - uint32_t len = ctx->incoming_buffer_length; > - uint32_t copy_len; > - > - /* > - * Only entire blocks can be hashed. > - * Copy remainder to extra blocks buffer. > - */ > - copy_len = len & (SHA1_BLOCK_SIZE-1); > - > - if (copy_len) { > - len -= copy_len; > - memcpy(ctx->partial_block_buffer, > - ((const char *) buffer + len), > - copy_len); > - ctx->partial_block_buffer_length = copy_len; > - } > - > - ctx->incoming_buffer_length = 0; > - > - /* len should be a multiple of the block size now */ > - assert((len % SHA1_BLOCK_SIZE) == 0); > - > - /* Set len to the number of blocks to be hashed */ > - len >>= SHA1_LOG2_BLOCK_SIZE; > - > - if (len) { > - > - ctx->job.buffer = (uint8_t *) buffer; > - ctx->job.len = len; > - ctx = (struct sha1_hash_ctx *)sha1_job_mgr_submit(&mgr->mgr, > - &ctx->job); > - continue; > - } > - } > - > - /* > - * If the extra blocks are not empty, then we are > - * either on the last block(s) or we need more > - * user input before continuing. > - */ > - if (ctx->status & HASH_CTX_STS_LAST) { > - > - uint8_t *buf = ctx->partial_block_buffer; > - uint32_t n_extra_blocks = > - sha1_pad(buf, ctx->total_length); > - > - ctx->status = (HASH_CTX_STS_PROCESSING | > - HASH_CTX_STS_COMPLETE); > - ctx->job.buffer = buf; > - ctx->job.len = (uint32_t) n_extra_blocks; > - ctx = (struct sha1_hash_ctx *) > - sha1_job_mgr_submit(&mgr->mgr, &ctx->job); > - continue; > - } > - > - ctx->status = HASH_CTX_STS_IDLE; > - return ctx; > - } > - > - return NULL; > -} > - > -static struct sha1_hash_ctx > - *sha1_ctx_mgr_get_comp_ctx(struct sha1_ctx_mgr *mgr) > -{ > - /* > - * If get_comp_job returns NULL, there are no jobs complete. > - * If get_comp_job returns a job, verify that it is safe to return to > - * the user. > - * If it is not ready, resubmit the job to finish processing. > - * If sha1_ctx_mgr_resubmit returned a job, it is ready to be returned. > - * Otherwise, all jobs currently being managed by the hash_ctx_mgr > - * still need processing. > - */ > - struct sha1_hash_ctx *ctx; > - > - ctx = (struct sha1_hash_ctx *) sha1_job_mgr_get_comp_job(&mgr->mgr); > - return sha1_ctx_mgr_resubmit(mgr, ctx); > -} > - > -static void sha1_ctx_mgr_init(struct sha1_ctx_mgr *mgr) > -{ > - sha1_job_mgr_init(&mgr->mgr); > -} > - > -static struct sha1_hash_ctx *sha1_ctx_mgr_submit(struct sha1_ctx_mgr *mgr, > - struct sha1_hash_ctx *ctx, > - const void *buffer, > - uint32_t len, > - int flags) > -{ > - if (flags & ~(HASH_UPDATE | HASH_LAST)) { > - /* User should not pass anything other than UPDATE or LAST */ > - ctx->error = HASH_CTX_ERROR_INVALID_FLAGS; > - return ctx; > - } > - > - if (ctx->status & HASH_CTX_STS_PROCESSING) { > - /* Cannot submit to a currently processing job. */ > - ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING; > - return ctx; > - } > - > - if (ctx->status & HASH_CTX_STS_COMPLETE) { > - /* Cannot update a finished job. */ > - ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED; > - return ctx; > - } > - > - /* > - * If we made it here, there were no errors during this call to > - * submit > - */ > - ctx->error = HASH_CTX_ERROR_NONE; > - > - /* Store buffer ptr info from user */ > - ctx->incoming_buffer = buffer; > - ctx->incoming_buffer_length = len; > - > - /* > - * Store the user's request flags and mark this ctx as currently > - * being processed. > - */ > - ctx->status = (flags & HASH_LAST) ? > - (HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) : > - HASH_CTX_STS_PROCESSING; > - > - /* Advance byte counter */ > - ctx->total_length += len; > - > - /* > - * If there is anything currently buffered in the extra blocks, > - * append to it until it contains a whole block. > - * Or if the user's buffer contains less than a whole block, > - * append as much as possible to the extra block. > - */ > - if (ctx->partial_block_buffer_length || len < SHA1_BLOCK_SIZE) { > - /* > - * Compute how many bytes to copy from user buffer into > - * extra block > - */ > - uint32_t copy_len = SHA1_BLOCK_SIZE - > - ctx->partial_block_buffer_length; > - if (len < copy_len) > - copy_len = len; > - > - if (copy_len) { > - /* Copy and update relevant pointers and counters */ > - memcpy(&ctx->partial_block_buffer[ctx->partial_block_buffer_length], > - buffer, copy_len); > - > - ctx->partial_block_buffer_length += copy_len; > - ctx->incoming_buffer = (const void *) > - ((const char *)buffer + copy_len); > - ctx->incoming_buffer_length = len - copy_len; > - } > - > - /* > - * The extra block should never contain more than 1 block > - * here > - */ > - assert(ctx->partial_block_buffer_length <= SHA1_BLOCK_SIZE); > - > - /* > - * If the extra block buffer contains exactly 1 block, it can > - * be hashed. > - */ > - if (ctx->partial_block_buffer_length >= SHA1_BLOCK_SIZE) { > - ctx->partial_block_buffer_length = 0; > - > - ctx->job.buffer = ctx->partial_block_buffer; > - ctx->job.len = 1; > - ctx = (struct sha1_hash_ctx *) > - sha1_job_mgr_submit(&mgr->mgr, &ctx->job); > - } > - } > - > - return sha1_ctx_mgr_resubmit(mgr, ctx); > -} > - > -static struct sha1_hash_ctx *sha1_ctx_mgr_flush(struct sha1_ctx_mgr *mgr) > -{ > - struct sha1_hash_ctx *ctx; > - > - while (1) { > - ctx = (struct sha1_hash_ctx *) sha1_job_mgr_flush(&mgr->mgr); > - > - /* If flush returned 0, there are no more jobs in flight. */ > - if (!ctx) > - return NULL; > - > - /* > - * If flush returned a job, resubmit the job to finish > - * processing. > - */ > - ctx = sha1_ctx_mgr_resubmit(mgr, ctx); > - > - /* > - * If sha1_ctx_mgr_resubmit returned a job, it is ready to be > - * returned. Otherwise, all jobs currently being managed by the > - * sha1_ctx_mgr still need processing. Loop. > - */ > - if (ctx) > - return ctx; > - } > -} > - > -static int sha1_mb_init(struct ahash_request *areq) > -{ > - struct sha1_hash_ctx *sctx = ahash_request_ctx(areq); > - > - hash_ctx_init(sctx); > - sctx->job.result_digest[0] = SHA1_H0; > - sctx->job.result_digest[1] = SHA1_H1; > - sctx->job.result_digest[2] = SHA1_H2; > - sctx->job.result_digest[3] = SHA1_H3; > - sctx->job.result_digest[4] = SHA1_H4; > - sctx->total_length = 0; > - sctx->partial_block_buffer_length = 0; > - sctx->status = HASH_CTX_STS_IDLE; > - > - return 0; > -} > - > -static int sha1_mb_set_results(struct mcryptd_hash_request_ctx *rctx) > -{ > - int i; > - struct sha1_hash_ctx *sctx = ahash_request_ctx(&rctx->areq); > - __be32 *dst = (__be32 *) rctx->out; > - > - for (i = 0; i < 5; ++i) > - dst[i] = cpu_to_be32(sctx->job.result_digest[i]); > - > - return 0; > -} > - > -static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx, > - struct mcryptd_alg_cstate *cstate, bool flush) > -{ > - int flag = HASH_UPDATE; > - int nbytes, err = 0; > - struct mcryptd_hash_request_ctx *rctx = *ret_rctx; > - struct sha1_hash_ctx *sha_ctx; > - > - /* more work ? */ > - while (!(rctx->flag & HASH_DONE)) { > - nbytes = crypto_ahash_walk_done(&rctx->walk, 0); > - if (nbytes < 0) { > - err = nbytes; > - goto out; > - } > - /* check if the walk is done */ > - if (crypto_ahash_walk_last(&rctx->walk)) { > - rctx->flag |= HASH_DONE; > - if (rctx->flag & HASH_FINAL) > - flag |= HASH_LAST; > - > - } > - sha_ctx = (struct sha1_hash_ctx *) > - ahash_request_ctx(&rctx->areq); > - kernel_fpu_begin(); > - sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, > - rctx->walk.data, nbytes, flag); > - if (!sha_ctx) { > - if (flush) > - sha_ctx = sha1_ctx_mgr_flush(cstate->mgr); > - } > - kernel_fpu_end(); > - if (sha_ctx) > - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); > - else { > - rctx = NULL; > - goto out; > - } > - } > - > - /* copy the results */ > - if (rctx->flag & HASH_FINAL) > - sha1_mb_set_results(rctx); > - > -out: > - *ret_rctx = rctx; > - return err; > -} > - > -static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx, > - struct mcryptd_alg_cstate *cstate, > - int err) > -{ > - struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); > - struct sha1_hash_ctx *sha_ctx; > - struct mcryptd_hash_request_ctx *req_ctx; > - int ret; > - > - /* remove from work list */ > - spin_lock(&cstate->work_lock); > - list_del(&rctx->waiter); > - spin_unlock(&cstate->work_lock); > - > - if (irqs_disabled()) > - rctx->complete(&req->base, err); > - else { > - local_bh_disable(); > - rctx->complete(&req->base, err); > - local_bh_enable(); > - } > - > - /* check to see if there are other jobs that are done */ > - sha_ctx = sha1_ctx_mgr_get_comp_ctx(cstate->mgr); > - while (sha_ctx) { > - req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx); > - ret = sha_finish_walk(&req_ctx, cstate, false); > - if (req_ctx) { > - spin_lock(&cstate->work_lock); > - list_del(&req_ctx->waiter); > - spin_unlock(&cstate->work_lock); > - > - req = cast_mcryptd_ctx_to_req(req_ctx); > - if (irqs_disabled()) > - req_ctx->complete(&req->base, ret); > - else { > - local_bh_disable(); > - req_ctx->complete(&req->base, ret); > - local_bh_enable(); > - } > - } > - sha_ctx = sha1_ctx_mgr_get_comp_ctx(cstate->mgr); > - } > - > - return 0; > -} > - > -static void sha1_mb_add_list(struct mcryptd_hash_request_ctx *rctx, > - struct mcryptd_alg_cstate *cstate) > -{ > - unsigned long next_flush; > - unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL); > - > - /* initialize tag */ > - rctx->tag.arrival = jiffies; /* tag the arrival time */ > - rctx->tag.seq_num = cstate->next_seq_num++; > - next_flush = rctx->tag.arrival + delay; > - rctx->tag.expire = next_flush; > - > - spin_lock(&cstate->work_lock); > - list_add_tail(&rctx->waiter, &cstate->work_list); > - spin_unlock(&cstate->work_lock); > - > - mcryptd_arm_flusher(cstate, delay); > -} > - > -static int sha1_mb_update(struct ahash_request *areq) > -{ > - struct mcryptd_hash_request_ctx *rctx = > - container_of(areq, struct mcryptd_hash_request_ctx, areq); > - struct mcryptd_alg_cstate *cstate = > - this_cpu_ptr(sha1_mb_alg_state.alg_cstate); > - > - struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); > - struct sha1_hash_ctx *sha_ctx; > - int ret = 0, nbytes; > - > - > - /* sanity check */ > - if (rctx->tag.cpu != smp_processor_id()) { > - pr_err("mcryptd error: cpu clash\n"); > - goto done; > - } > - > - /* need to init context */ > - req_ctx_init(rctx, areq); > - > - nbytes = crypto_ahash_walk_first(req, &rctx->walk); > - > - if (nbytes < 0) { > - ret = nbytes; > - goto done; > - } > - > - if (crypto_ahash_walk_last(&rctx->walk)) > - rctx->flag |= HASH_DONE; > - > - /* submit */ > - sha_ctx = (struct sha1_hash_ctx *) ahash_request_ctx(areq); > - sha1_mb_add_list(rctx, cstate); > - kernel_fpu_begin(); > - sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data, > - nbytes, HASH_UPDATE); > - kernel_fpu_end(); > - > - /* check if anything is returned */ > - if (!sha_ctx) > - return -EINPROGRESS; > - > - if (sha_ctx->error) { > - ret = sha_ctx->error; > - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); > - goto done; > - } > - > - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); > - ret = sha_finish_walk(&rctx, cstate, false); > - > - if (!rctx) > - return -EINPROGRESS; > -done: > - sha_complete_job(rctx, cstate, ret); > - return ret; > -} > - > -static int sha1_mb_finup(struct ahash_request *areq) > -{ > - struct mcryptd_hash_request_ctx *rctx = > - container_of(areq, struct mcryptd_hash_request_ctx, areq); > - struct mcryptd_alg_cstate *cstate = > - this_cpu_ptr(sha1_mb_alg_state.alg_cstate); > - > - struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); > - struct sha1_hash_ctx *sha_ctx; > - int ret = 0, flag = HASH_UPDATE, nbytes; > - > - /* sanity check */ > - if (rctx->tag.cpu != smp_processor_id()) { > - pr_err("mcryptd error: cpu clash\n"); > - goto done; > - } > - > - /* need to init context */ > - req_ctx_init(rctx, areq); > - > - nbytes = crypto_ahash_walk_first(req, &rctx->walk); > - > - if (nbytes < 0) { > - ret = nbytes; > - goto done; > - } > - > - if (crypto_ahash_walk_last(&rctx->walk)) { > - rctx->flag |= HASH_DONE; > - flag = HASH_LAST; > - } > - > - /* submit */ > - rctx->flag |= HASH_FINAL; > - sha_ctx = (struct sha1_hash_ctx *) ahash_request_ctx(areq); > - sha1_mb_add_list(rctx, cstate); > - > - kernel_fpu_begin(); > - sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data, > - nbytes, flag); > - kernel_fpu_end(); > - > - /* check if anything is returned */ > - if (!sha_ctx) > - return -EINPROGRESS; > - > - if (sha_ctx->error) { > - ret = sha_ctx->error; > - goto done; > - } > - > - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); > - ret = sha_finish_walk(&rctx, cstate, false); > - if (!rctx) > - return -EINPROGRESS; > -done: > - sha_complete_job(rctx, cstate, ret); > - return ret; > -} > - > -static int sha1_mb_final(struct ahash_request *areq) > -{ > - struct mcryptd_hash_request_ctx *rctx = > - container_of(areq, struct mcryptd_hash_request_ctx, areq); > - struct mcryptd_alg_cstate *cstate = > - this_cpu_ptr(sha1_mb_alg_state.alg_cstate); > - > - struct sha1_hash_ctx *sha_ctx; > - int ret = 0; > - u8 data; > - > - /* sanity check */ > - if (rctx->tag.cpu != smp_processor_id()) { > - pr_err("mcryptd error: cpu clash\n"); > - goto done; > - } > - > - /* need to init context */ > - req_ctx_init(rctx, areq); > - > - rctx->flag |= HASH_DONE | HASH_FINAL; > - > - sha_ctx = (struct sha1_hash_ctx *) ahash_request_ctx(areq); > - /* flag HASH_FINAL and 0 data size */ > - sha1_mb_add_list(rctx, cstate); > - kernel_fpu_begin(); > - sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, &data, 0, > - HASH_LAST); > - kernel_fpu_end(); > - > - /* check if anything is returned */ > - if (!sha_ctx) > - return -EINPROGRESS; > - > - if (sha_ctx->error) { > - ret = sha_ctx->error; > - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); > - goto done; > - } > - > - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); > - ret = sha_finish_walk(&rctx, cstate, false); > - if (!rctx) > - return -EINPROGRESS; > -done: > - sha_complete_job(rctx, cstate, ret); > - return ret; > -} > - > -static int sha1_mb_export(struct ahash_request *areq, void *out) > -{ > - struct sha1_hash_ctx *sctx = ahash_request_ctx(areq); > - > - memcpy(out, sctx, sizeof(*sctx)); > - > - return 0; > -} > - > -static int sha1_mb_import(struct ahash_request *areq, const void *in) > -{ > - struct sha1_hash_ctx *sctx = ahash_request_ctx(areq); > - > - memcpy(sctx, in, sizeof(*sctx)); > - > - return 0; > -} > - > -static int sha1_mb_async_init_tfm(struct crypto_tfm *tfm) > -{ > - struct mcryptd_ahash *mcryptd_tfm; > - struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm); > - struct mcryptd_hash_ctx *mctx; > - > - mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha1-mb", > - CRYPTO_ALG_INTERNAL, > - CRYPTO_ALG_INTERNAL); > - if (IS_ERR(mcryptd_tfm)) > - return PTR_ERR(mcryptd_tfm); > - mctx = crypto_ahash_ctx(&mcryptd_tfm->base); > - mctx->alg_state = &sha1_mb_alg_state; > - ctx->mcryptd_tfm = mcryptd_tfm; > - crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), > - sizeof(struct ahash_request) + > - crypto_ahash_reqsize(&mcryptd_tfm->base)); > - > - return 0; > -} > - > -static void sha1_mb_async_exit_tfm(struct crypto_tfm *tfm) > -{ > - struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm); > - > - mcryptd_free_ahash(ctx->mcryptd_tfm); > -} > - > -static int sha1_mb_areq_init_tfm(struct crypto_tfm *tfm) > -{ > - crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), > - sizeof(struct ahash_request) + > - sizeof(struct sha1_hash_ctx)); > - > - return 0; > -} > - > -static void sha1_mb_areq_exit_tfm(struct crypto_tfm *tfm) > -{ > - struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm); > - > - mcryptd_free_ahash(ctx->mcryptd_tfm); > -} > - > -static struct ahash_alg sha1_mb_areq_alg = { > - .init = sha1_mb_init, > - .update = sha1_mb_update, > - .final = sha1_mb_final, > - .finup = sha1_mb_finup, > - .export = sha1_mb_export, > - .import = sha1_mb_import, > - .halg = { > - .digestsize = SHA1_DIGEST_SIZE, > - .statesize = sizeof(struct sha1_hash_ctx), > - .base = { > - .cra_name = "__sha1-mb", > - .cra_driver_name = "__intel_sha1-mb", > - .cra_priority = 100, > - /* > - * use ASYNC flag as some buffers in multi-buffer > - * algo may not have completed before hashing thread > - * sleep > - */ > - .cra_flags = CRYPTO_ALG_ASYNC | > - CRYPTO_ALG_INTERNAL, > - .cra_blocksize = SHA1_BLOCK_SIZE, > - .cra_module = THIS_MODULE, > - .cra_list = LIST_HEAD_INIT > - (sha1_mb_areq_alg.halg.base.cra_list), > - .cra_init = sha1_mb_areq_init_tfm, > - .cra_exit = sha1_mb_areq_exit_tfm, > - .cra_ctxsize = sizeof(struct sha1_hash_ctx), > - } > - } > -}; > - > -static int sha1_mb_async_init(struct ahash_request *req) > -{ > - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); > - struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm); > - struct ahash_request *mcryptd_req = ahash_request_ctx(req); > - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; > - > - memcpy(mcryptd_req, req, sizeof(*req)); > - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); > - return crypto_ahash_init(mcryptd_req); > -} > - > -static int sha1_mb_async_update(struct ahash_request *req) > -{ > - struct ahash_request *mcryptd_req = ahash_request_ctx(req); > - > - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); > - struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm); > - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; > - > - memcpy(mcryptd_req, req, sizeof(*req)); > - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); > - return crypto_ahash_update(mcryptd_req); > -} > - > -static int sha1_mb_async_finup(struct ahash_request *req) > -{ > - struct ahash_request *mcryptd_req = ahash_request_ctx(req); > - > - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); > - struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm); > - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; > - > - memcpy(mcryptd_req, req, sizeof(*req)); > - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); > - return crypto_ahash_finup(mcryptd_req); > -} > - > -static int sha1_mb_async_final(struct ahash_request *req) > -{ > - struct ahash_request *mcryptd_req = ahash_request_ctx(req); > - > - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); > - struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm); > - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; > - > - memcpy(mcryptd_req, req, sizeof(*req)); > - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); > - return crypto_ahash_final(mcryptd_req); > -} > - > -static int sha1_mb_async_digest(struct ahash_request *req) > -{ > - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); > - struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm); > - struct ahash_request *mcryptd_req = ahash_request_ctx(req); > - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; > - > - memcpy(mcryptd_req, req, sizeof(*req)); > - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); > - return crypto_ahash_digest(mcryptd_req); > -} > - > -static int sha1_mb_async_export(struct ahash_request *req, void *out) > -{ > - struct ahash_request *mcryptd_req = ahash_request_ctx(req); > - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); > - struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm); > - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; > - > - memcpy(mcryptd_req, req, sizeof(*req)); > - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); > - return crypto_ahash_export(mcryptd_req, out); > -} > - > -static int sha1_mb_async_import(struct ahash_request *req, const void *in) > -{ > - struct ahash_request *mcryptd_req = ahash_request_ctx(req); > - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); > - struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm); > - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; > - struct crypto_ahash *child = mcryptd_ahash_child(mcryptd_tfm); > - struct mcryptd_hash_request_ctx *rctx; > - struct ahash_request *areq; > - > - memcpy(mcryptd_req, req, sizeof(*req)); > - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); > - rctx = ahash_request_ctx(mcryptd_req); > - areq = &rctx->areq; > - > - ahash_request_set_tfm(areq, child); > - ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_SLEEP, > - rctx->complete, req); > - > - return crypto_ahash_import(mcryptd_req, in); > -} > - > -static struct ahash_alg sha1_mb_async_alg = { > - .init = sha1_mb_async_init, > - .update = sha1_mb_async_update, > - .final = sha1_mb_async_final, > - .finup = sha1_mb_async_finup, > - .digest = sha1_mb_async_digest, > - .export = sha1_mb_async_export, > - .import = sha1_mb_async_import, > - .halg = { > - .digestsize = SHA1_DIGEST_SIZE, > - .statesize = sizeof(struct sha1_hash_ctx), > - .base = { > - .cra_name = "sha1", > - .cra_driver_name = "sha1_mb", > - /* > - * Low priority, since with few concurrent hash requests > - * this is extremely slow due to the flush delay. Users > - * whose workloads would benefit from this can request > - * it explicitly by driver name, or can increase its > - * priority at runtime using NETLINK_CRYPTO. > - */ > - .cra_priority = 50, > - .cra_flags = CRYPTO_ALG_ASYNC, > - .cra_blocksize = SHA1_BLOCK_SIZE, > - .cra_module = THIS_MODULE, > - .cra_list = LIST_HEAD_INIT(sha1_mb_async_alg.halg.base.cra_list), > - .cra_init = sha1_mb_async_init_tfm, > - .cra_exit = sha1_mb_async_exit_tfm, > - .cra_ctxsize = sizeof(struct sha1_mb_ctx), > - .cra_alignmask = 0, > - }, > - }, > -}; > - > -static unsigned long sha1_mb_flusher(struct mcryptd_alg_cstate *cstate) > -{ > - struct mcryptd_hash_request_ctx *rctx; > - unsigned long cur_time; > - unsigned long next_flush = 0; > - struct sha1_hash_ctx *sha_ctx; > - > - > - cur_time = jiffies; > - > - while (!list_empty(&cstate->work_list)) { > - rctx = list_entry(cstate->work_list.next, > - struct mcryptd_hash_request_ctx, waiter); > - if (time_before(cur_time, rctx->tag.expire)) > - break; > - kernel_fpu_begin(); > - sha_ctx = (struct sha1_hash_ctx *) > - sha1_ctx_mgr_flush(cstate->mgr); > - kernel_fpu_end(); > - if (!sha_ctx) { > - pr_err("sha1_mb error: nothing got flushed for non-empty list\n"); > - break; > - } > - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); > - sha_finish_walk(&rctx, cstate, true); > - sha_complete_job(rctx, cstate, 0); > - } > - > - if (!list_empty(&cstate->work_list)) { > - rctx = list_entry(cstate->work_list.next, > - struct mcryptd_hash_request_ctx, waiter); > - /* get the hash context and then flush time */ > - next_flush = rctx->tag.expire; > - mcryptd_arm_flusher(cstate, get_delay(next_flush)); > - } > - return next_flush; > -} > - > -static int __init sha1_mb_mod_init(void) > -{ > - > - int cpu; > - int err; > - struct mcryptd_alg_cstate *cpu_state; > - > - /* check for dependent cpu features */ > - if (!boot_cpu_has(X86_FEATURE_AVX2) || > - !boot_cpu_has(X86_FEATURE_BMI2)) > - return -ENODEV; > - > - /* initialize multibuffer structures */ > - sha1_mb_alg_state.alg_cstate = alloc_percpu(struct mcryptd_alg_cstate); > - > - sha1_job_mgr_init = sha1_mb_mgr_init_avx2; > - sha1_job_mgr_submit = sha1_mb_mgr_submit_avx2; > - sha1_job_mgr_flush = sha1_mb_mgr_flush_avx2; > - sha1_job_mgr_get_comp_job = sha1_mb_mgr_get_comp_job_avx2; > - > - if (!sha1_mb_alg_state.alg_cstate) > - return -ENOMEM; > - for_each_possible_cpu(cpu) { > - cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu); > - cpu_state->next_flush = 0; > - cpu_state->next_seq_num = 0; > - cpu_state->flusher_engaged = false; > - INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher); > - cpu_state->cpu = cpu; > - cpu_state->alg_state = &sha1_mb_alg_state; > - cpu_state->mgr = kzalloc(sizeof(struct sha1_ctx_mgr), > - GFP_KERNEL); > - if (!cpu_state->mgr) > - goto err2; > - sha1_ctx_mgr_init(cpu_state->mgr); > - INIT_LIST_HEAD(&cpu_state->work_list); > - spin_lock_init(&cpu_state->work_lock); > - } > - sha1_mb_alg_state.flusher = &sha1_mb_flusher; > - > - err = crypto_register_ahash(&sha1_mb_areq_alg); > - if (err) > - goto err2; > - err = crypto_register_ahash(&sha1_mb_async_alg); > - if (err) > - goto err1; > - > - > - return 0; > -err1: > - crypto_unregister_ahash(&sha1_mb_areq_alg); > -err2: > - for_each_possible_cpu(cpu) { > - cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu); > - kfree(cpu_state->mgr); > - } > - free_percpu(sha1_mb_alg_state.alg_cstate); > - return -ENODEV; > -} > - > -static void __exit sha1_mb_mod_fini(void) > -{ > - int cpu; > - struct mcryptd_alg_cstate *cpu_state; > - > - crypto_unregister_ahash(&sha1_mb_async_alg); > - crypto_unregister_ahash(&sha1_mb_areq_alg); > - for_each_possible_cpu(cpu) { > - cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu); > - kfree(cpu_state->mgr); > - } > - free_percpu(sha1_mb_alg_state.alg_cstate); > -} > - > -module_init(sha1_mb_mod_init); > -module_exit(sha1_mb_mod_fini); > - > -MODULE_LICENSE("GPL"); > -MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, multi buffer accelerated"); > - > -MODULE_ALIAS_CRYPTO("sha1"); > diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h b/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h > deleted file mode 100644 > index 9454bd16f9f8..000000000000 > --- a/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h > +++ /dev/null > @@ -1,134 +0,0 @@ > -/* > - * Header file for multi buffer SHA context > - * > - * This file is provided under a dual BSD/GPLv2 license. When using or > - * redistributing this file, you may do so under either license. > - * > - * GPL LICENSE SUMMARY > - * > - * Copyright(c) 2014 Intel Corporation. > - * > - * This program is free software; you can redistribute it and/or modify > - * it under the terms of version 2 of the GNU General Public License as > - * published by the Free Software Foundation. > - * > - * This program is distributed in the hope that it will be useful, but > - * WITHOUT ANY WARRANTY; without even the implied warranty of > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > - * General Public License for more details. > - * > - * Contact Information: > - * Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx> > - * > - * BSD LICENSE > - * > - * Copyright(c) 2014 Intel Corporation. > - * > - * Redistribution and use in source and binary forms, with or without > - * modification, are permitted provided that the following conditions > - * are met: > - * > - * * Redistributions of source code must retain the above copyright > - * notice, this list of conditions and the following disclaimer. > - * * Redistributions in binary form must reproduce the above copyright > - * notice, this list of conditions and the following disclaimer in > - * the documentation and/or other materials provided with the > - * distribution. > - * * Neither the name of Intel Corporation nor the names of its > - * contributors may be used to endorse or promote products derived > - * from this software without specific prior written permission. > - * > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > - */ > - > -#ifndef _SHA_MB_CTX_INTERNAL_H > -#define _SHA_MB_CTX_INTERNAL_H > - > -#include "sha1_mb_mgr.h" > - > -#define HASH_UPDATE 0x00 > -#define HASH_LAST 0x01 > -#define HASH_DONE 0x02 > -#define HASH_FINAL 0x04 > - > -#define HASH_CTX_STS_IDLE 0x00 > -#define HASH_CTX_STS_PROCESSING 0x01 > -#define HASH_CTX_STS_LAST 0x02 > -#define HASH_CTX_STS_COMPLETE 0x04 > - > -enum hash_ctx_error { > - HASH_CTX_ERROR_NONE = 0, > - HASH_CTX_ERROR_INVALID_FLAGS = -1, > - HASH_CTX_ERROR_ALREADY_PROCESSING = -2, > - HASH_CTX_ERROR_ALREADY_COMPLETED = -3, > - > -#ifdef HASH_CTX_DEBUG > - HASH_CTX_ERROR_DEBUG_DIGEST_MISMATCH = -4, > -#endif > -}; > - > - > -#define hash_ctx_user_data(ctx) ((ctx)->user_data) > -#define hash_ctx_digest(ctx) ((ctx)->job.result_digest) > -#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING) > -#define hash_ctx_complete(ctx) ((ctx)->status == HASH_CTX_STS_COMPLETE) > -#define hash_ctx_status(ctx) ((ctx)->status) > -#define hash_ctx_error(ctx) ((ctx)->error) > -#define hash_ctx_init(ctx) \ > - do { \ > - (ctx)->error = HASH_CTX_ERROR_NONE; \ > - (ctx)->status = HASH_CTX_STS_COMPLETE; \ > - } while (0) > - > - > -/* Hash Constants and Typedefs */ > -#define SHA1_DIGEST_LENGTH 5 > -#define SHA1_LOG2_BLOCK_SIZE 6 > - > -#define SHA1_PADLENGTHFIELD_SIZE 8 > - > -#ifdef SHA_MB_DEBUG > -#define assert(expr) \ > -do { \ > - if (unlikely(!(expr))) { \ > - printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \ > - #expr, __FILE__, __func__, __LINE__); \ > - } \ > -} while (0) > -#else > -#define assert(expr) do {} while (0) > -#endif > - > -struct sha1_ctx_mgr { > - struct sha1_mb_mgr mgr; > -}; > - > -/* typedef struct sha1_ctx_mgr sha1_ctx_mgr; */ > - > -struct sha1_hash_ctx { > - /* Must be at struct offset 0 */ > - struct job_sha1 job; > - /* status flag */ > - int status; > - /* error flag */ > - int error; > - > - uint64_t total_length; > - const void *incoming_buffer; > - uint32_t incoming_buffer_length; > - uint8_t partial_block_buffer[SHA1_BLOCK_SIZE * 2]; > - uint32_t partial_block_buffer_length; > - void *user_data; > -}; > - > -#endif > diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr.h b/arch/x86/crypto/sha1-mb/sha1_mb_mgr.h > deleted file mode 100644 > index 08ad1a9acfd7..000000000000 > --- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr.h > +++ /dev/null > @@ -1,110 +0,0 @@ > -/* > - * Header file for multi buffer SHA1 algorithm manager > - * > - * This file is provided under a dual BSD/GPLv2 license. When using or > - * redistributing this file, you may do so under either license. > - * > - * GPL LICENSE SUMMARY > - * > - * Copyright(c) 2014 Intel Corporation. > - * > - * This program is free software; you can redistribute it and/or modify > - * it under the terms of version 2 of the GNU General Public License as > - * published by the Free Software Foundation. > - * > - * This program is distributed in the hope that it will be useful, but > - * WITHOUT ANY WARRANTY; without even the implied warranty of > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > - * General Public License for more details. > - * > - * Contact Information: > - * James Guilford <james.guilford@xxxxxxxxx> > - * Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx> > - * > - * BSD LICENSE > - * > - * Copyright(c) 2014 Intel Corporation. > - * > - * Redistribution and use in source and binary forms, with or without > - * modification, are permitted provided that the following conditions > - * are met: > - * > - * * Redistributions of source code must retain the above copyright > - * notice, this list of conditions and the following disclaimer. > - * * Redistributions in binary form must reproduce the above copyright > - * notice, this list of conditions and the following disclaimer in > - * the documentation and/or other materials provided with the > - * distribution. > - * * Neither the name of Intel Corporation nor the names of its > - * contributors may be used to endorse or promote products derived > - * from this software without specific prior written permission. > - * > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > - */ > -#ifndef __SHA_MB_MGR_H > -#define __SHA_MB_MGR_H > - > - > -#include <linux/types.h> > - > -#define NUM_SHA1_DIGEST_WORDS 5 > - > -enum job_sts { STS_UNKNOWN = 0, > - STS_BEING_PROCESSED = 1, > - STS_COMPLETED = 2, > - STS_INTERNAL_ERROR = 3, > - STS_ERROR = 4 > -}; > - > -struct job_sha1 { > - u8 *buffer; > - u32 len; > - u32 result_digest[NUM_SHA1_DIGEST_WORDS] __aligned(32); > - enum job_sts status; > - void *user_data; > -}; > - > -/* SHA1 out-of-order scheduler */ > - > -/* typedef uint32_t sha1_digest_array[5][8]; */ > - > -struct sha1_args_x8 { > - uint32_t digest[5][8]; > - uint8_t *data_ptr[8]; > -}; > - > -struct sha1_lane_data { > - struct job_sha1 *job_in_lane; > -}; > - > -struct sha1_mb_mgr { > - struct sha1_args_x8 args; > - > - uint32_t lens[8]; > - > - /* each byte is index (0...7) of unused lanes */ > - uint64_t unused_lanes; > - /* byte 4 is set to FF as a flag */ > - struct sha1_lane_data ldata[8]; > -}; > - > - > -#define SHA1_MB_MGR_NUM_LANES_AVX2 8 > - > -void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state); > -struct job_sha1 *sha1_mb_mgr_submit_avx2(struct sha1_mb_mgr *state, > - struct job_sha1 *job); > -struct job_sha1 *sha1_mb_mgr_flush_avx2(struct sha1_mb_mgr *state); > -struct job_sha1 *sha1_mb_mgr_get_comp_job_avx2(struct sha1_mb_mgr *state); > - > -#endif > diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_datastruct.S b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_datastruct.S > deleted file mode 100644 > index 86688c6e7a25..000000000000 > --- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_datastruct.S > +++ /dev/null > @@ -1,287 +0,0 @@ > -/* > - * Header file for multi buffer SHA1 algorithm data structure > - * > - * This file is provided under a dual BSD/GPLv2 license. When using or > - * redistributing this file, you may do so under either license. > - * > - * GPL LICENSE SUMMARY > - * > - * Copyright(c) 2014 Intel Corporation. > - * > - * This program is free software; you can redistribute it and/or modify > - * it under the terms of version 2 of the GNU General Public License as > - * published by the Free Software Foundation. > - * > - * This program is distributed in the hope that it will be useful, but > - * WITHOUT ANY WARRANTY; without even the implied warranty of > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > - * General Public License for more details. > - * > - * Contact Information: > - * James Guilford <james.guilford@xxxxxxxxx> > - * Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx> > - * > - * BSD LICENSE > - * > - * Copyright(c) 2014 Intel Corporation. > - * > - * Redistribution and use in source and binary forms, with or without > - * modification, are permitted provided that the following conditions > - * are met: > - * > - * * Redistributions of source code must retain the above copyright > - * notice, this list of conditions and the following disclaimer. > - * * Redistributions in binary form must reproduce the above copyright > - * notice, this list of conditions and the following disclaimer in > - * the documentation and/or other materials provided with the > - * distribution. > - * * Neither the name of Intel Corporation nor the names of its > - * contributors may be used to endorse or promote products derived > - * from this software without specific prior written permission. > - * > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > - */ > - > -# Macros for defining data structures > - > -# Usage example > - > -#START_FIELDS # JOB_AES > -### name size align > -#FIELD _plaintext, 8, 8 # pointer to plaintext > -#FIELD _ciphertext, 8, 8 # pointer to ciphertext > -#FIELD _IV, 16, 8 # IV > -#FIELD _keys, 8, 8 # pointer to keys > -#FIELD _len, 4, 4 # length in bytes > -#FIELD _status, 4, 4 # status enumeration > -#FIELD _user_data, 8, 8 # pointer to user data > -#UNION _union, size1, align1, \ > -# size2, align2, \ > -# size3, align3, \ > -# ... > -#END_FIELDS > -#%assign _JOB_AES_size _FIELD_OFFSET > -#%assign _JOB_AES_align _STRUCT_ALIGN > - > -######################################################################### > - > -# Alternate "struc-like" syntax: > -# STRUCT job_aes2 > -# RES_Q .plaintext, 1 > -# RES_Q .ciphertext, 1 > -# RES_DQ .IV, 1 > -# RES_B .nested, _JOB_AES_SIZE, _JOB_AES_ALIGN > -# RES_U .union, size1, align1, \ > -# size2, align2, \ > -# ... > -# ENDSTRUCT > -# # Following only needed if nesting > -# %assign job_aes2_size _FIELD_OFFSET > -# %assign job_aes2_align _STRUCT_ALIGN > -# > -# RES_* macros take a name, a count and an optional alignment. > -# The count in in terms of the base size of the macro, and the > -# default alignment is the base size. > -# The macros are: > -# Macro Base size > -# RES_B 1 > -# RES_W 2 > -# RES_D 4 > -# RES_Q 8 > -# RES_DQ 16 > -# RES_Y 32 > -# RES_Z 64 > -# > -# RES_U defines a union. It's arguments are a name and two or more > -# pairs of "size, alignment" > -# > -# The two assigns are only needed if this structure is being nested > -# within another. Even if the assigns are not done, one can still use > -# STRUCT_NAME_size as the size of the structure. > -# > -# Note that for nesting, you still need to assign to STRUCT_NAME_size. > -# > -# The differences between this and using "struc" directly are that each > -# type is implicitly aligned to its natural length (although this can be > -# over-ridden with an explicit third parameter), and that the structure > -# is padded at the end to its overall alignment. > -# > - > -######################################################################### > - > -#ifndef _SHA1_MB_MGR_DATASTRUCT_ASM_ > -#define _SHA1_MB_MGR_DATASTRUCT_ASM_ > - > -## START_FIELDS > -.macro START_FIELDS > - _FIELD_OFFSET = 0 > - _STRUCT_ALIGN = 0 > -.endm > - > -## FIELD name size align > -.macro FIELD name size align > - _FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1)) > - \name = _FIELD_OFFSET > - _FIELD_OFFSET = _FIELD_OFFSET + (\size) > -.if (\align > _STRUCT_ALIGN) > - _STRUCT_ALIGN = \align > -.endif > -.endm > - > -## END_FIELDS > -.macro END_FIELDS > - _FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1)) > -.endm > - > -######################################################################## > - > -.macro STRUCT p1 > -START_FIELDS > -.struc \p1 > -.endm > - > -.macro ENDSTRUCT > - tmp = _FIELD_OFFSET > - END_FIELDS > - tmp = (_FIELD_OFFSET - %%tmp) > -.if (tmp > 0) > - .lcomm tmp > -.endif > -.endstruc > -.endm > - > -## RES_int name size align > -.macro RES_int p1 p2 p3 > - name = \p1 > - size = \p2 > - align = .\p3 > - > - _FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1)) > -.align align > -.lcomm name size > - _FIELD_OFFSET = _FIELD_OFFSET + (size) > -.if (align > _STRUCT_ALIGN) > - _STRUCT_ALIGN = align > -.endif > -.endm > - > - > - > -# macro RES_B name, size [, align] > -.macro RES_B _name, _size, _align=1 > -RES_int _name _size _align > -.endm > - > -# macro RES_W name, size [, align] > -.macro RES_W _name, _size, _align=2 > -RES_int _name 2*(_size) _align > -.endm > - > -# macro RES_D name, size [, align] > -.macro RES_D _name, _size, _align=4 > -RES_int _name 4*(_size) _align > -.endm > - > -# macro RES_Q name, size [, align] > -.macro RES_Q _name, _size, _align=8 > -RES_int _name 8*(_size) _align > -.endm > - > -# macro RES_DQ name, size [, align] > -.macro RES_DQ _name, _size, _align=16 > -RES_int _name 16*(_size) _align > -.endm > - > -# macro RES_Y name, size [, align] > -.macro RES_Y _name, _size, _align=32 > -RES_int _name 32*(_size) _align > -.endm > - > -# macro RES_Z name, size [, align] > -.macro RES_Z _name, _size, _align=64 > -RES_int _name 64*(_size) _align > -.endm > - > - > -#endif > - > -######################################################################## > -#### Define constants > -######################################################################## > - > -######################################################################## > -#### Define SHA1 Out Of Order Data Structures > -######################################################################## > - > -START_FIELDS # LANE_DATA > -### name size align > -FIELD _job_in_lane, 8, 8 # pointer to job object > -END_FIELDS > - > -_LANE_DATA_size = _FIELD_OFFSET > -_LANE_DATA_align = _STRUCT_ALIGN > - > -######################################################################## > - > -START_FIELDS # SHA1_ARGS_X8 > -### name size align > -FIELD _digest, 4*5*8, 16 # transposed digest > -FIELD _data_ptr, 8*8, 8 # array of pointers to data > -END_FIELDS > - > -_SHA1_ARGS_X4_size = _FIELD_OFFSET > -_SHA1_ARGS_X4_align = _STRUCT_ALIGN > -_SHA1_ARGS_X8_size = _FIELD_OFFSET > -_SHA1_ARGS_X8_align = _STRUCT_ALIGN > - > -######################################################################## > - > -START_FIELDS # MB_MGR > -### name size align > -FIELD _args, _SHA1_ARGS_X4_size, _SHA1_ARGS_X4_align > -FIELD _lens, 4*8, 8 > -FIELD _unused_lanes, 8, 8 > -FIELD _ldata, _LANE_DATA_size*8, _LANE_DATA_align > -END_FIELDS > - > -_MB_MGR_size = _FIELD_OFFSET > -_MB_MGR_align = _STRUCT_ALIGN > - > -_args_digest = _args + _digest > -_args_data_ptr = _args + _data_ptr > - > - > -######################################################################## > -#### Define constants > -######################################################################## > - > -#define STS_UNKNOWN 0 > -#define STS_BEING_PROCESSED 1 > -#define STS_COMPLETED 2 > - > -######################################################################## > -#### Define JOB_SHA1 structure > -######################################################################## > - > -START_FIELDS # JOB_SHA1 > - > -### name size align > -FIELD _buffer, 8, 8 # pointer to buffer > -FIELD _len, 4, 4 # length in bytes > -FIELD _result_digest, 5*4, 32 # Digest (output) > -FIELD _status, 4, 4 > -FIELD _user_data, 8, 8 > -END_FIELDS > - > -_JOB_SHA1_size = _FIELD_OFFSET > -_JOB_SHA1_align = _STRUCT_ALIGN > diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S > deleted file mode 100644 > index 7cfba738f104..000000000000 > --- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S > +++ /dev/null > @@ -1,304 +0,0 @@ > -/* > - * Flush routine for SHA1 multibuffer > - * > - * This file is provided under a dual BSD/GPLv2 license. When using or > - * redistributing this file, you may do so under either license. > - * > - * GPL LICENSE SUMMARY > - * > - * Copyright(c) 2014 Intel Corporation. > - * > - * This program is free software; you can redistribute it and/or modify > - * it under the terms of version 2 of the GNU General Public License as > - * published by the Free Software Foundation. > - * > - * This program is distributed in the hope that it will be useful, but > - * WITHOUT ANY WARRANTY; without even the implied warranty of > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > - * General Public License for more details. > - * > - * Contact Information: > - * James Guilford <james.guilford@xxxxxxxxx> > - * Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx> > - * > - * BSD LICENSE > - * > - * Copyright(c) 2014 Intel Corporation. > - * > - * Redistribution and use in source and binary forms, with or without > - * modification, are permitted provided that the following conditions > - * are met: > - * > - * * Redistributions of source code must retain the above copyright > - * notice, this list of conditions and the following disclaimer. > - * * Redistributions in binary form must reproduce the above copyright > - * notice, this list of conditions and the following disclaimer in > - * the documentation and/or other materials provided with the > - * distribution. > - * * Neither the name of Intel Corporation nor the names of its > - * contributors may be used to endorse or promote products derived > - * from this software without specific prior written permission. > - * > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > - */ > -#include <linux/linkage.h> > -#include <asm/frame.h> > -#include "sha1_mb_mgr_datastruct.S" > - > - > -.extern sha1_x8_avx2 > - > -# LINUX register definitions > -#define arg1 %rdi > -#define arg2 %rsi > - > -# Common definitions > -#define state arg1 > -#define job arg2 > -#define len2 arg2 > - > -# idx must be a register not clobbered by sha1_x8_avx2 > -#define idx %r8 > -#define DWORD_idx %r8d > - > -#define unused_lanes %rbx > -#define lane_data %rbx > -#define tmp2 %rbx > -#define tmp2_w %ebx > - > -#define job_rax %rax > -#define tmp1 %rax > -#define size_offset %rax > -#define tmp %rax > -#define start_offset %rax > - > -#define tmp3 %arg1 > - > -#define extra_blocks %arg2 > -#define p %arg2 > - > -.macro LABEL prefix n > -\prefix\n\(): > -.endm > - > -.macro JNE_SKIP i > -jne skip_\i > -.endm > - > -.altmacro > -.macro SET_OFFSET _offset > -offset = \_offset > -.endm > -.noaltmacro > - > -# JOB* sha1_mb_mgr_flush_avx2(MB_MGR *state) > -# arg 1 : rcx : state > -ENTRY(sha1_mb_mgr_flush_avx2) > - FRAME_BEGIN > - push %rbx > - > - # If bit (32+3) is set, then all lanes are empty > - mov _unused_lanes(state), unused_lanes > - bt $32+3, unused_lanes > - jc return_null > - > - # find a lane with a non-null job > - xor idx, idx > - offset = (_ldata + 1 * _LANE_DATA_size + _job_in_lane) > - cmpq $0, offset(state) > - cmovne one(%rip), idx > - offset = (_ldata + 2 * _LANE_DATA_size + _job_in_lane) > - cmpq $0, offset(state) > - cmovne two(%rip), idx > - offset = (_ldata + 3 * _LANE_DATA_size + _job_in_lane) > - cmpq $0, offset(state) > - cmovne three(%rip), idx > - offset = (_ldata + 4 * _LANE_DATA_size + _job_in_lane) > - cmpq $0, offset(state) > - cmovne four(%rip), idx > - offset = (_ldata + 5 * _LANE_DATA_size + _job_in_lane) > - cmpq $0, offset(state) > - cmovne five(%rip), idx > - offset = (_ldata + 6 * _LANE_DATA_size + _job_in_lane) > - cmpq $0, offset(state) > - cmovne six(%rip), idx > - offset = (_ldata + 7 * _LANE_DATA_size + _job_in_lane) > - cmpq $0, offset(state) > - cmovne seven(%rip), idx > - > - # copy idx to empty lanes > -copy_lane_data: > - offset = (_args + _data_ptr) > - mov offset(state,idx,8), tmp > - > - I = 0 > -.rep 8 > - offset = (_ldata + I * _LANE_DATA_size + _job_in_lane) > - cmpq $0, offset(state) > -.altmacro > - JNE_SKIP %I > - offset = (_args + _data_ptr + 8*I) > - mov tmp, offset(state) > - offset = (_lens + 4*I) > - movl $0xFFFFFFFF, offset(state) > -LABEL skip_ %I > - I = (I+1) > -.noaltmacro > -.endr > - > - # Find min length > - vmovdqu _lens+0*16(state), %xmm0 > - vmovdqu _lens+1*16(state), %xmm1 > - > - vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A} > - vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C} > - vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F} > - vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E} > - vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min value in low dword > - > - vmovd %xmm2, DWORD_idx > - mov idx, len2 > - and $0xF, idx > - shr $4, len2 > - jz len_is_0 > - > - vpand clear_low_nibble(%rip), %xmm2, %xmm2 > - vpshufd $0, %xmm2, %xmm2 > - > - vpsubd %xmm2, %xmm0, %xmm0 > - vpsubd %xmm2, %xmm1, %xmm1 > - > - vmovdqu %xmm0, _lens+0*16(state) > - vmovdqu %xmm1, _lens+1*16(state) > - > - # "state" and "args" are the same address, arg1 > - # len is arg2 > - call sha1_x8_avx2 > - # state and idx are intact > - > - > -len_is_0: > - # process completed job "idx" > - imul $_LANE_DATA_size, idx, lane_data > - lea _ldata(state, lane_data), lane_data > - > - mov _job_in_lane(lane_data), job_rax > - movq $0, _job_in_lane(lane_data) > - movl $STS_COMPLETED, _status(job_rax) > - mov _unused_lanes(state), unused_lanes > - shl $4, unused_lanes > - or idx, unused_lanes > - mov unused_lanes, _unused_lanes(state) > - > - movl $0xFFFFFFFF, _lens(state, idx, 4) > - > - vmovd _args_digest(state , idx, 4) , %xmm0 > - vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0 > - vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0 > - vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0 > - movl _args_digest+4*32(state, idx, 4), tmp2_w > - > - vmovdqu %xmm0, _result_digest(job_rax) > - offset = (_result_digest + 1*16) > - mov tmp2_w, offset(job_rax) > - > -return: > - pop %rbx > - FRAME_END > - ret > - > -return_null: > - xor job_rax, job_rax > - jmp return > -ENDPROC(sha1_mb_mgr_flush_avx2) > - > - > -################################################################# > - > -.align 16 > -ENTRY(sha1_mb_mgr_get_comp_job_avx2) > - push %rbx > - > - ## if bit 32+3 is set, then all lanes are empty > - mov _unused_lanes(state), unused_lanes > - bt $(32+3), unused_lanes > - jc .return_null > - > - # Find min length > - vmovdqu _lens(state), %xmm0 > - vmovdqu _lens+1*16(state), %xmm1 > - > - vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A} > - vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C} > - vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F} > - vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E} > - vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min value in low dword > - > - vmovd %xmm2, DWORD_idx > - test $~0xF, idx > - jnz .return_null > - > - # process completed job "idx" > - imul $_LANE_DATA_size, idx, lane_data > - lea _ldata(state, lane_data), lane_data > - > - mov _job_in_lane(lane_data), job_rax > - movq $0, _job_in_lane(lane_data) > - movl $STS_COMPLETED, _status(job_rax) > - mov _unused_lanes(state), unused_lanes > - shl $4, unused_lanes > - or idx, unused_lanes > - mov unused_lanes, _unused_lanes(state) > - > - movl $0xFFFFFFFF, _lens(state, idx, 4) > - > - vmovd _args_digest(state, idx, 4), %xmm0 > - vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0 > - vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0 > - vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0 > - movl _args_digest+4*32(state, idx, 4), tmp2_w > - > - vmovdqu %xmm0, _result_digest(job_rax) > - movl tmp2_w, _result_digest+1*16(job_rax) > - > - pop %rbx > - > - ret > - > -.return_null: > - xor job_rax, job_rax > - pop %rbx > - ret > -ENDPROC(sha1_mb_mgr_get_comp_job_avx2) > - > -.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16 > -.align 16 > -clear_low_nibble: > -.octa 0x000000000000000000000000FFFFFFF0 > - > -.section .rodata.cst8, "aM", @progbits, 8 > -.align 8 > -one: > -.quad 1 > -two: > -.quad 2 > -three: > -.quad 3 > -four: > -.quad 4 > -five: > -.quad 5 > -six: > -.quad 6 > -seven: > -.quad 7 > diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_init_avx2.c b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_init_avx2.c > deleted file mode 100644 > index d2add0d35f43..000000000000 > --- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_init_avx2.c > +++ /dev/null > @@ -1,64 +0,0 @@ > -/* > - * Initialization code for multi buffer SHA1 algorithm for AVX2 > - * > - * This file is provided under a dual BSD/GPLv2 license. When using or > - * redistributing this file, you may do so under either license. > - * > - * GPL LICENSE SUMMARY > - * > - * Copyright(c) 2014 Intel Corporation. > - * > - * This program is free software; you can redistribute it and/or modify > - * it under the terms of version 2 of the GNU General Public License as > - * published by the Free Software Foundation. > - * > - * This program is distributed in the hope that it will be useful, but > - * WITHOUT ANY WARRANTY; without even the implied warranty of > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > - * General Public License for more details. > - * > - * Contact Information: > - * Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx> > - * > - * BSD LICENSE > - * > - * Copyright(c) 2014 Intel Corporation. > - * > - * Redistribution and use in source and binary forms, with or without > - * modification, are permitted provided that the following conditions > - * are met: > - * > - * * Redistributions of source code must retain the above copyright > - * notice, this list of conditions and the following disclaimer. > - * * Redistributions in binary form must reproduce the above copyright > - * notice, this list of conditions and the following disclaimer in > - * the documentation and/or other materials provided with the > - * distribution. > - * * Neither the name of Intel Corporation nor the names of its > - * contributors may be used to endorse or promote products derived > - * from this software without specific prior written permission. > - * > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > - */ > - > -#include "sha1_mb_mgr.h" > - > -void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state) > -{ > - unsigned int j; > - state->unused_lanes = 0xF76543210ULL; > - for (j = 0; j < 8; j++) { > - state->lens[j] = 0xFFFFFFFF; > - state->ldata[j].job_in_lane = NULL; > - } > -} > diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S > deleted file mode 100644 > index 7a93b1c0d69a..000000000000 > --- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S > +++ /dev/null > @@ -1,209 +0,0 @@ > -/* > - * Buffer submit code for multi buffer SHA1 algorithm > - * > - * This file is provided under a dual BSD/GPLv2 license. When using or > - * redistributing this file, you may do so under either license. > - * > - * GPL LICENSE SUMMARY > - * > - * Copyright(c) 2014 Intel Corporation. > - * > - * This program is free software; you can redistribute it and/or modify > - * it under the terms of version 2 of the GNU General Public License as > - * published by the Free Software Foundation. > - * > - * This program is distributed in the hope that it will be useful, but > - * WITHOUT ANY WARRANTY; without even the implied warranty of > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > - * General Public License for more details. > - * > - * Contact Information: > - * James Guilford <james.guilford@xxxxxxxxx> > - * Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx> > - * > - * BSD LICENSE > - * > - * Copyright(c) 2014 Intel Corporation. > - * > - * Redistribution and use in source and binary forms, with or without > - * modification, are permitted provided that the following conditions > - * are met: > - * > - * * Redistributions of source code must retain the above copyright > - * notice, this list of conditions and the following disclaimer. > - * * Redistributions in binary form must reproduce the above copyright > - * notice, this list of conditions and the following disclaimer in > - * the documentation and/or other materials provided with the > - * distribution. > - * * Neither the name of Intel Corporation nor the names of its > - * contributors may be used to endorse or promote products derived > - * from this software without specific prior written permission. > - * > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > - */ > - > -#include <linux/linkage.h> > -#include <asm/frame.h> > -#include "sha1_mb_mgr_datastruct.S" > - > - > -.extern sha1_x8_avx > - > -# LINUX register definitions > -arg1 = %rdi > -arg2 = %rsi > -size_offset = %rcx > -tmp2 = %rcx > -extra_blocks = %rdx > - > -# Common definitions > -#define state arg1 > -#define job %rsi > -#define len2 arg2 > -#define p2 arg2 > - > -# idx must be a register not clobberred by sha1_x8_avx2 > -idx = %r8 > -DWORD_idx = %r8d > -last_len = %r8 > - > -p = %r11 > -start_offset = %r11 > - > -unused_lanes = %rbx > -BYTE_unused_lanes = %bl > - > -job_rax = %rax > -len = %rax > -DWORD_len = %eax > - > -lane = %r12 > -tmp3 = %r12 > - > -tmp = %r9 > -DWORD_tmp = %r9d > - > -lane_data = %r10 > - > -# JOB* submit_mb_mgr_submit_avx2(MB_MGR *state, job_sha1 *job) > -# arg 1 : rcx : state > -# arg 2 : rdx : job > -ENTRY(sha1_mb_mgr_submit_avx2) > - FRAME_BEGIN > - push %rbx > - push %r12 > - > - mov _unused_lanes(state), unused_lanes > - mov unused_lanes, lane > - and $0xF, lane > - shr $4, unused_lanes > - imul $_LANE_DATA_size, lane, lane_data > - movl $STS_BEING_PROCESSED, _status(job) > - lea _ldata(state, lane_data), lane_data > - mov unused_lanes, _unused_lanes(state) > - movl _len(job), DWORD_len > - > - mov job, _job_in_lane(lane_data) > - shl $4, len > - or lane, len > - > - movl DWORD_len, _lens(state , lane, 4) > - > - # Load digest words from result_digest > - vmovdqu _result_digest(job), %xmm0 > - mov _result_digest+1*16(job), DWORD_tmp > - vmovd %xmm0, _args_digest(state, lane, 4) > - vpextrd $1, %xmm0, _args_digest+1*32(state , lane, 4) > - vpextrd $2, %xmm0, _args_digest+2*32(state , lane, 4) > - vpextrd $3, %xmm0, _args_digest+3*32(state , lane, 4) > - movl DWORD_tmp, _args_digest+4*32(state , lane, 4) > - > - mov _buffer(job), p > - mov p, _args_data_ptr(state, lane, 8) > - > - cmp $0xF, unused_lanes > - jne return_null > - > -start_loop: > - # Find min length > - vmovdqa _lens(state), %xmm0 > - vmovdqa _lens+1*16(state), %xmm1 > - > - vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A} > - vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C} > - vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F} > - vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E} > - vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min value in low dword > - > - vmovd %xmm2, DWORD_idx > - mov idx, len2 > - and $0xF, idx > - shr $4, len2 > - jz len_is_0 > - > - vpand clear_low_nibble(%rip), %xmm2, %xmm2 > - vpshufd $0, %xmm2, %xmm2 > - > - vpsubd %xmm2, %xmm0, %xmm0 > - vpsubd %xmm2, %xmm1, %xmm1 > - > - vmovdqa %xmm0, _lens + 0*16(state) > - vmovdqa %xmm1, _lens + 1*16(state) > - > - > - # "state" and "args" are the same address, arg1 > - # len is arg2 > - call sha1_x8_avx2 > - > - # state and idx are intact > - > -len_is_0: > - # process completed job "idx" > - imul $_LANE_DATA_size, idx, lane_data > - lea _ldata(state, lane_data), lane_data > - > - mov _job_in_lane(lane_data), job_rax > - mov _unused_lanes(state), unused_lanes > - movq $0, _job_in_lane(lane_data) > - movl $STS_COMPLETED, _status(job_rax) > - shl $4, unused_lanes > - or idx, unused_lanes > - mov unused_lanes, _unused_lanes(state) > - > - movl $0xFFFFFFFF, _lens(state, idx, 4) > - > - vmovd _args_digest(state, idx, 4), %xmm0 > - vpinsrd $1, _args_digest+1*32(state , idx, 4), %xmm0, %xmm0 > - vpinsrd $2, _args_digest+2*32(state , idx, 4), %xmm0, %xmm0 > - vpinsrd $3, _args_digest+3*32(state , idx, 4), %xmm0, %xmm0 > - movl _args_digest+4*32(state, idx, 4), DWORD_tmp > - > - vmovdqu %xmm0, _result_digest(job_rax) > - movl DWORD_tmp, _result_digest+1*16(job_rax) > - > -return: > - pop %r12 > - pop %rbx > - FRAME_END > - ret > - > -return_null: > - xor job_rax, job_rax > - jmp return > - > -ENDPROC(sha1_mb_mgr_submit_avx2) > - > -.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16 > -.align 16 > -clear_low_nibble: > - .octa 0x000000000000000000000000FFFFFFF0 > diff --git a/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S b/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S > deleted file mode 100644 > index 20f77aa633de..000000000000 > --- a/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S > +++ /dev/null > @@ -1,492 +0,0 @@ > -/* > - * Multi-buffer SHA1 algorithm hash compute routine > - * > - * This file is provided under a dual BSD/GPLv2 license. When using or > - * redistributing this file, you may do so under either license. > - * > - * GPL LICENSE SUMMARY > - * > - * Copyright(c) 2014 Intel Corporation. > - * > - * This program is free software; you can redistribute it and/or modify > - * it under the terms of version 2 of the GNU General Public License as > - * published by the Free Software Foundation. > - * > - * This program is distributed in the hope that it will be useful, but > - * WITHOUT ANY WARRANTY; without even the implied warranty of > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > - * General Public License for more details. > - * > - * Contact Information: > - * James Guilford <james.guilford@xxxxxxxxx> > - * Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx> > - * > - * BSD LICENSE > - * > - * Copyright(c) 2014 Intel Corporation. > - * > - * Redistribution and use in source and binary forms, with or without > - * modification, are permitted provided that the following conditions > - * are met: > - * > - * * Redistributions of source code must retain the above copyright > - * notice, this list of conditions and the following disclaimer. > - * * Redistributions in binary form must reproduce the above copyright > - * notice, this list of conditions and the following disclaimer in > - * the documentation and/or other materials provided with the > - * distribution. > - * * Neither the name of Intel Corporation nor the names of its > - * contributors may be used to endorse or promote products derived > - * from this software without specific prior written permission. > - * > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > - */ > - > -#include <linux/linkage.h> > -#include "sha1_mb_mgr_datastruct.S" > - > -## code to compute oct SHA1 using SSE-256 > -## outer calling routine takes care of save and restore of XMM registers > - > -## Function clobbers: rax, rcx, rdx, rbx, rsi, rdi, r9-r15# ymm0-15 > -## > -## Linux clobbers: rax rbx rcx rdx rsi r9 r10 r11 r12 r13 r14 r15 > -## Linux preserves: rdi rbp r8 > -## > -## clobbers ymm0-15 > - > - > -# TRANSPOSE8 r0, r1, r2, r3, r4, r5, r6, r7, t0, t1 > -# "transpose" data in {r0...r7} using temps {t0...t1} > -# Input looks like: {r0 r1 r2 r3 r4 r5 r6 r7} > -# r0 = {a7 a6 a5 a4 a3 a2 a1 a0} > -# r1 = {b7 b6 b5 b4 b3 b2 b1 b0} > -# r2 = {c7 c6 c5 c4 c3 c2 c1 c0} > -# r3 = {d7 d6 d5 d4 d3 d2 d1 d0} > -# r4 = {e7 e6 e5 e4 e3 e2 e1 e0} > -# r5 = {f7 f6 f5 f4 f3 f2 f1 f0} > -# r6 = {g7 g6 g5 g4 g3 g2 g1 g0} > -# r7 = {h7 h6 h5 h4 h3 h2 h1 h0} > -# > -# Output looks like: {r0 r1 r2 r3 r4 r5 r6 r7} > -# r0 = {h0 g0 f0 e0 d0 c0 b0 a0} > -# r1 = {h1 g1 f1 e1 d1 c1 b1 a1} > -# r2 = {h2 g2 f2 e2 d2 c2 b2 a2} > -# r3 = {h3 g3 f3 e3 d3 c3 b3 a3} > -# r4 = {h4 g4 f4 e4 d4 c4 b4 a4} > -# r5 = {h5 g5 f5 e5 d5 c5 b5 a5} > -# r6 = {h6 g6 f6 e6 d6 c6 b6 a6} > -# r7 = {h7 g7 f7 e7 d7 c7 b7 a7} > -# > - > -.macro TRANSPOSE8 r0 r1 r2 r3 r4 r5 r6 r7 t0 t1 > - # process top half (r0..r3) {a...d} > - vshufps $0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4 b1 b0 a1 a0} > - vshufps $0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6 b3 b2 a3 a2} > - vshufps $0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4 d1 d0 c1 c0} > - vshufps $0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6 d3 d2 c3 c2} > - vshufps $0xDD, \t1, \t0, \r3 # r3 = {d5 c5 b5 a5 d1 c1 b1 a1} > - vshufps $0x88, \r2, \r0, \r1 # r1 = {d6 c6 b6 a6 d2 c2 b2 a2} > - vshufps $0xDD, \r2, \r0, \r0 # r0 = {d7 c7 b7 a7 d3 c3 b3 a3} > - vshufps $0x88, \t1, \t0, \t0 # t0 = {d4 c4 b4 a4 d0 c0 b0 a0} > - > - # use r2 in place of t0 > - # process bottom half (r4..r7) {e...h} > - vshufps $0x44, \r5, \r4, \r2 # r2 = {f5 f4 e5 e4 f1 f0 e1 e0} > - vshufps $0xEE, \r5, \r4, \r4 # r4 = {f7 f6 e7 e6 f3 f2 e3 e2} > - vshufps $0x44, \r7, \r6, \t1 # t1 = {h5 h4 g5 g4 h1 h0 g1 g0} > - vshufps $0xEE, \r7, \r6, \r6 # r6 = {h7 h6 g7 g6 h3 h2 g3 g2} > - vshufps $0xDD, \t1, \r2, \r7 # r7 = {h5 g5 f5 e5 h1 g1 f1 e1} > - vshufps $0x88, \r6, \r4, \r5 # r5 = {h6 g6 f6 e6 h2 g2 f2 e2} > - vshufps $0xDD, \r6, \r4, \r4 # r4 = {h7 g7 f7 e7 h3 g3 f3 e3} > - vshufps $0x88, \t1, \r2, \t1 # t1 = {h4 g4 f4 e4 h0 g0 f0 e0} > - > - vperm2f128 $0x13, \r1, \r5, \r6 # h6...a6 > - vperm2f128 $0x02, \r1, \r5, \r2 # h2...a2 > - vperm2f128 $0x13, \r3, \r7, \r5 # h5...a5 > - vperm2f128 $0x02, \r3, \r7, \r1 # h1...a1 > - vperm2f128 $0x13, \r0, \r4, \r7 # h7...a7 > - vperm2f128 $0x02, \r0, \r4, \r3 # h3...a3 > - vperm2f128 $0x13, \t0, \t1, \r4 # h4...a4 > - vperm2f128 $0x02, \t0, \t1, \r0 # h0...a0 > - > -.endm > -## > -## Magic functions defined in FIPS 180-1 > -## > -# macro MAGIC_F0 F,B,C,D,T ## F = (D ^ (B & (C ^ D))) > -.macro MAGIC_F0 regF regB regC regD regT > - vpxor \regD, \regC, \regF > - vpand \regB, \regF, \regF > - vpxor \regD, \regF, \regF > -.endm > - > -# macro MAGIC_F1 F,B,C,D,T ## F = (B ^ C ^ D) > -.macro MAGIC_F1 regF regB regC regD regT > - vpxor \regC, \regD, \regF > - vpxor \regB, \regF, \regF > -.endm > - > -# macro MAGIC_F2 F,B,C,D,T ## F = ((B & C) | (B & D) | (C & D)) > -.macro MAGIC_F2 regF regB regC regD regT > - vpor \regC, \regB, \regF > - vpand \regC, \regB, \regT > - vpand \regD, \regF, \regF > - vpor \regT, \regF, \regF > -.endm > - > -# macro MAGIC_F3 F,B,C,D,T ## F = (B ^ C ^ D) > -.macro MAGIC_F3 regF regB regC regD regT > - MAGIC_F1 \regF,\regB,\regC,\regD,\regT > -.endm > - > -# PROLD reg, imm, tmp > -.macro PROLD reg imm tmp > - vpsrld $(32-\imm), \reg, \tmp > - vpslld $\imm, \reg, \reg > - vpor \tmp, \reg, \reg > -.endm > - > -.macro PROLD_nd reg imm tmp src > - vpsrld $(32-\imm), \src, \tmp > - vpslld $\imm, \src, \reg > - vpor \tmp, \reg, \reg > -.endm > - > -.macro SHA1_STEP_00_15 regA regB regC regD regE regT regF memW immCNT MAGIC > - vpaddd \immCNT, \regE, \regE > - vpaddd \memW*32(%rsp), \regE, \regE > - PROLD_nd \regT, 5, \regF, \regA > - vpaddd \regT, \regE, \regE > - \MAGIC \regF, \regB, \regC, \regD, \regT > - PROLD \regB, 30, \regT > - vpaddd \regF, \regE, \regE > -.endm > - > -.macro SHA1_STEP_16_79 regA regB regC regD regE regT regF memW immCNT MAGIC > - vpaddd \immCNT, \regE, \regE > - offset = ((\memW - 14) & 15) * 32 > - vmovdqu offset(%rsp), W14 > - vpxor W14, W16, W16 > - offset = ((\memW - 8) & 15) * 32 > - vpxor offset(%rsp), W16, W16 > - offset = ((\memW - 3) & 15) * 32 > - vpxor offset(%rsp), W16, W16 > - vpsrld $(32-1), W16, \regF > - vpslld $1, W16, W16 > - vpor W16, \regF, \regF > - > - ROTATE_W > - > - offset = ((\memW - 0) & 15) * 32 > - vmovdqu \regF, offset(%rsp) > - vpaddd \regF, \regE, \regE > - PROLD_nd \regT, 5, \regF, \regA > - vpaddd \regT, \regE, \regE > - \MAGIC \regF,\regB,\regC,\regD,\regT ## FUN = MAGIC_Fi(B,C,D) > - PROLD \regB,30, \regT > - vpaddd \regF, \regE, \regE > -.endm > - > -######################################################################## > -######################################################################## > -######################################################################## > - > -## FRAMESZ plus pushes must be an odd multiple of 8 > -YMM_SAVE = (15-15)*32 > -FRAMESZ = 32*16 + YMM_SAVE > -_YMM = FRAMESZ - YMM_SAVE > - > -#define VMOVPS vmovups > - > -IDX = %rax > -inp0 = %r9 > -inp1 = %r10 > -inp2 = %r11 > -inp3 = %r12 > -inp4 = %r13 > -inp5 = %r14 > -inp6 = %r15 > -inp7 = %rcx > -arg1 = %rdi > -arg2 = %rsi > -RSP_SAVE = %rdx > - > -# ymm0 A > -# ymm1 B > -# ymm2 C > -# ymm3 D > -# ymm4 E > -# ymm5 F AA > -# ymm6 T0 BB > -# ymm7 T1 CC > -# ymm8 T2 DD > -# ymm9 T3 EE > -# ymm10 T4 TMP > -# ymm11 T5 FUN > -# ymm12 T6 K > -# ymm13 T7 W14 > -# ymm14 T8 W15 > -# ymm15 T9 W16 > - > - > -A = %ymm0 > -B = %ymm1 > -C = %ymm2 > -D = %ymm3 > -E = %ymm4 > -F = %ymm5 > -T0 = %ymm6 > -T1 = %ymm7 > -T2 = %ymm8 > -T3 = %ymm9 > -T4 = %ymm10 > -T5 = %ymm11 > -T6 = %ymm12 > -T7 = %ymm13 > -T8 = %ymm14 > -T9 = %ymm15 > - > -AA = %ymm5 > -BB = %ymm6 > -CC = %ymm7 > -DD = %ymm8 > -EE = %ymm9 > -TMP = %ymm10 > -FUN = %ymm11 > -K = %ymm12 > -W14 = %ymm13 > -W15 = %ymm14 > -W16 = %ymm15 > - > -.macro ROTATE_ARGS > - TMP_ = E > - E = D > - D = C > - C = B > - B = A > - A = TMP_ > -.endm > - > -.macro ROTATE_W > -TMP_ = W16 > -W16 = W15 > -W15 = W14 > -W14 = TMP_ > -.endm > - > -# 8 streams x 5 32bit words per digest x 4 bytes per word > -#define DIGEST_SIZE (8*5*4) > - > -.align 32 > - > -# void sha1_x8_avx2(void **input_data, UINT128 *digest, UINT32 size) > -# arg 1 : pointer to array[4] of pointer to input data > -# arg 2 : size (in blocks) ;; assumed to be >= 1 > -# > -ENTRY(sha1_x8_avx2) > - > - # save callee-saved clobbered registers to comply with C function ABI > - push %r12 > - push %r13 > - push %r14 > - push %r15 > - > - #save rsp > - mov %rsp, RSP_SAVE > - sub $FRAMESZ, %rsp > - > - #align rsp to 32 Bytes > - and $~0x1F, %rsp > - > - ## Initialize digests > - vmovdqu 0*32(arg1), A > - vmovdqu 1*32(arg1), B > - vmovdqu 2*32(arg1), C > - vmovdqu 3*32(arg1), D > - vmovdqu 4*32(arg1), E > - > - ## transpose input onto stack > - mov _data_ptr+0*8(arg1),inp0 > - mov _data_ptr+1*8(arg1),inp1 > - mov _data_ptr+2*8(arg1),inp2 > - mov _data_ptr+3*8(arg1),inp3 > - mov _data_ptr+4*8(arg1),inp4 > - mov _data_ptr+5*8(arg1),inp5 > - mov _data_ptr+6*8(arg1),inp6 > - mov _data_ptr+7*8(arg1),inp7 > - > - xor IDX, IDX > -lloop: > - vmovdqu PSHUFFLE_BYTE_FLIP_MASK(%rip), F > - I=0 > -.rep 2 > - VMOVPS (inp0, IDX), T0 > - VMOVPS (inp1, IDX), T1 > - VMOVPS (inp2, IDX), T2 > - VMOVPS (inp3, IDX), T3 > - VMOVPS (inp4, IDX), T4 > - VMOVPS (inp5, IDX), T5 > - VMOVPS (inp6, IDX), T6 > - VMOVPS (inp7, IDX), T7 > - > - TRANSPOSE8 T0, T1, T2, T3, T4, T5, T6, T7, T8, T9 > - vpshufb F, T0, T0 > - vmovdqu T0, (I*8)*32(%rsp) > - vpshufb F, T1, T1 > - vmovdqu T1, (I*8+1)*32(%rsp) > - vpshufb F, T2, T2 > - vmovdqu T2, (I*8+2)*32(%rsp) > - vpshufb F, T3, T3 > - vmovdqu T3, (I*8+3)*32(%rsp) > - vpshufb F, T4, T4 > - vmovdqu T4, (I*8+4)*32(%rsp) > - vpshufb F, T5, T5 > - vmovdqu T5, (I*8+5)*32(%rsp) > - vpshufb F, T6, T6 > - vmovdqu T6, (I*8+6)*32(%rsp) > - vpshufb F, T7, T7 > - vmovdqu T7, (I*8+7)*32(%rsp) > - add $32, IDX > - I = (I+1) > -.endr > - # save old digests > - vmovdqu A,AA > - vmovdqu B,BB > - vmovdqu C,CC > - vmovdqu D,DD > - vmovdqu E,EE > - > -## > -## perform 0-79 steps > -## > - vmovdqu K00_19(%rip), K > -## do rounds 0...15 > - I = 0 > -.rep 16 > - SHA1_STEP_00_15 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0 > - ROTATE_ARGS > - I = (I+1) > -.endr > - > -## do rounds 16...19 > - vmovdqu ((16 - 16) & 15) * 32 (%rsp), W16 > - vmovdqu ((16 - 15) & 15) * 32 (%rsp), W15 > -.rep 4 > - SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0 > - ROTATE_ARGS > - I = (I+1) > -.endr > - > -## do rounds 20...39 > - vmovdqu K20_39(%rip), K > -.rep 20 > - SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F1 > - ROTATE_ARGS > - I = (I+1) > -.endr > - > -## do rounds 40...59 > - vmovdqu K40_59(%rip), K > -.rep 20 > - SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F2 > - ROTATE_ARGS > - I = (I+1) > -.endr > - > -## do rounds 60...79 > - vmovdqu K60_79(%rip), K > -.rep 20 > - SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F3 > - ROTATE_ARGS > - I = (I+1) > -.endr > - > - vpaddd AA,A,A > - vpaddd BB,B,B > - vpaddd CC,C,C > - vpaddd DD,D,D > - vpaddd EE,E,E > - > - sub $1, arg2 > - jne lloop > - > - # write out digests > - vmovdqu A, 0*32(arg1) > - vmovdqu B, 1*32(arg1) > - vmovdqu C, 2*32(arg1) > - vmovdqu D, 3*32(arg1) > - vmovdqu E, 4*32(arg1) > - > - # update input pointers > - add IDX, inp0 > - add IDX, inp1 > - add IDX, inp2 > - add IDX, inp3 > - add IDX, inp4 > - add IDX, inp5 > - add IDX, inp6 > - add IDX, inp7 > - mov inp0, _data_ptr (arg1) > - mov inp1, _data_ptr + 1*8(arg1) > - mov inp2, _data_ptr + 2*8(arg1) > - mov inp3, _data_ptr + 3*8(arg1) > - mov inp4, _data_ptr + 4*8(arg1) > - mov inp5, _data_ptr + 5*8(arg1) > - mov inp6, _data_ptr + 6*8(arg1) > - mov inp7, _data_ptr + 7*8(arg1) > - > - ################ > - ## Postamble > - > - mov RSP_SAVE, %rsp > - > - # restore callee-saved clobbered registers > - pop %r15 > - pop %r14 > - pop %r13 > - pop %r12 > - > - ret > -ENDPROC(sha1_x8_avx2) > - > - > -.section .rodata.cst32.K00_19, "aM", @progbits, 32 > -.align 32 > -K00_19: > -.octa 0x5A8279995A8279995A8279995A827999 > -.octa 0x5A8279995A8279995A8279995A827999 > - > -.section .rodata.cst32.K20_39, "aM", @progbits, 32 > -.align 32 > -K20_39: > -.octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1 > -.octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1 > - > -.section .rodata.cst32.K40_59, "aM", @progbits, 32 > -.align 32 > -K40_59: > -.octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC > -.octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC > - > -.section .rodata.cst32.K60_79, "aM", @progbits, 32 > -.align 32 > -K60_79: > -.octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6 > -.octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6 > - > -.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32 > -.align 32 > -PSHUFFLE_BYTE_FLIP_MASK: > -.octa 0x0c0d0e0f08090a0b0405060700010203 > -.octa 0x0c0d0e0f08090a0b0405060700010203 > diff --git a/arch/x86/crypto/sha256-mb/Makefile b/arch/x86/crypto/sha256-mb/Makefile > deleted file mode 100644 > index 53ad6e7db747..000000000000 > --- a/arch/x86/crypto/sha256-mb/Makefile > +++ /dev/null > @@ -1,14 +0,0 @@ > -# SPDX-License-Identifier: GPL-2.0 > -# > -# Arch-specific CryptoAPI modules. > -# > - > -OBJECT_FILES_NON_STANDARD := y > - > -avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\ > - $(comma)4)$(comma)%ymm2,yes,no) > -ifeq ($(avx2_supported),yes) > - obj-$(CONFIG_CRYPTO_SHA256_MB) += sha256-mb.o > - sha256-mb-y := sha256_mb.o sha256_mb_mgr_flush_avx2.o \ > - sha256_mb_mgr_init_avx2.o sha256_mb_mgr_submit_avx2.o sha256_x8_avx2.o > -endif > diff --git a/arch/x86/crypto/sha256-mb/sha256_mb.c b/arch/x86/crypto/sha256-mb/sha256_mb.c > deleted file mode 100644 > index 97c5fc43e115..000000000000 > --- a/arch/x86/crypto/sha256-mb/sha256_mb.c > +++ /dev/null > @@ -1,1013 +0,0 @@ > -/* > - * Multi buffer SHA256 algorithm Glue Code > - * > - * This file is provided under a dual BSD/GPLv2 license. When using or > - * redistributing this file, you may do so under either license. > - * > - * GPL LICENSE SUMMARY > - * > - * Copyright(c) 2016 Intel Corporation. > - * > - * This program is free software; you can redistribute it and/or modify > - * it under the terms of version 2 of the GNU General Public License as > - * published by the Free Software Foundation. > - * > - * This program is distributed in the hope that it will be useful, but > - * WITHOUT ANY WARRANTY; without even the implied warranty of > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > - * General Public License for more details. > - * > - * Contact Information: > - * Megha Dey <megha.dey@xxxxxxxxxxxxxxx> > - * > - * BSD LICENSE > - * > - * Copyright(c) 2016 Intel Corporation. > - * > - * Redistribution and use in source and binary forms, with or without > - * modification, are permitted provided that the following conditions > - * are met: > - * > - * * Redistributions of source code must retain the above copyright > - * notice, this list of conditions and the following disclaimer. > - * * Redistributions in binary form must reproduce the above copyright > - * notice, this list of conditions and the following disclaimer in > - * the documentation and/or other materials provided with the > - * distribution. > - * * Neither the name of Intel Corporation nor the names of its > - * contributors may be used to endorse or promote products derived > - * from this software without specific prior written permission. > - * > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > - */ > - > -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt > - > -#include <crypto/internal/hash.h> > -#include <linux/init.h> > -#include <linux/module.h> > -#include <linux/mm.h> > -#include <linux/cryptohash.h> > -#include <linux/types.h> > -#include <linux/list.h> > -#include <crypto/scatterwalk.h> > -#include <crypto/sha.h> > -#include <crypto/mcryptd.h> > -#include <crypto/crypto_wq.h> > -#include <asm/byteorder.h> > -#include <linux/hardirq.h> > -#include <asm/fpu/api.h> > -#include "sha256_mb_ctx.h" > - > -#define FLUSH_INTERVAL 1000 /* in usec */ > - > -static struct mcryptd_alg_state sha256_mb_alg_state; > - > -struct sha256_mb_ctx { > - struct mcryptd_ahash *mcryptd_tfm; > -}; > - > -static inline struct mcryptd_hash_request_ctx > - *cast_hash_to_mcryptd_ctx(struct sha256_hash_ctx *hash_ctx) > -{ > - struct ahash_request *areq; > - > - areq = container_of((void *) hash_ctx, struct ahash_request, __ctx); > - return container_of(areq, struct mcryptd_hash_request_ctx, areq); > -} > - > -static inline struct ahash_request > - *cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx) > -{ > - return container_of((void *) ctx, struct ahash_request, __ctx); > -} > - > -static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx, > - struct ahash_request *areq) > -{ > - rctx->flag = HASH_UPDATE; > -} > - > -static asmlinkage void (*sha256_job_mgr_init)(struct sha256_mb_mgr *state); > -static asmlinkage struct job_sha256* (*sha256_job_mgr_submit) > - (struct sha256_mb_mgr *state, struct job_sha256 *job); > -static asmlinkage struct job_sha256* (*sha256_job_mgr_flush) > - (struct sha256_mb_mgr *state); > -static asmlinkage struct job_sha256* (*sha256_job_mgr_get_comp_job) > - (struct sha256_mb_mgr *state); > - > -inline uint32_t sha256_pad(uint8_t padblock[SHA256_BLOCK_SIZE * 2], > - uint64_t total_len) > -{ > - uint32_t i = total_len & (SHA256_BLOCK_SIZE - 1); > - > - memset(&padblock[i], 0, SHA256_BLOCK_SIZE); > - padblock[i] = 0x80; > - > - i += ((SHA256_BLOCK_SIZE - 1) & > - (0 - (total_len + SHA256_PADLENGTHFIELD_SIZE + 1))) > - + 1 + SHA256_PADLENGTHFIELD_SIZE; > - > -#if SHA256_PADLENGTHFIELD_SIZE == 16 > - *((uint64_t *) &padblock[i - 16]) = 0; > -#endif > - > - *((uint64_t *) &padblock[i - 8]) = cpu_to_be64(total_len << 3); > - > - /* Number of extra blocks to hash */ > - return i >> SHA256_LOG2_BLOCK_SIZE; > -} > - > -static struct sha256_hash_ctx > - *sha256_ctx_mgr_resubmit(struct sha256_ctx_mgr *mgr, > - struct sha256_hash_ctx *ctx) > -{ > - while (ctx) { > - if (ctx->status & HASH_CTX_STS_COMPLETE) { > - /* Clear PROCESSING bit */ > - ctx->status = HASH_CTX_STS_COMPLETE; > - return ctx; > - } > - > - /* > - * If the extra blocks are empty, begin hashing what remains > - * in the user's buffer. > - */ > - if (ctx->partial_block_buffer_length == 0 && > - ctx->incoming_buffer_length) { > - > - const void *buffer = ctx->incoming_buffer; > - uint32_t len = ctx->incoming_buffer_length; > - uint32_t copy_len; > - > - /* > - * Only entire blocks can be hashed. > - * Copy remainder to extra blocks buffer. > - */ > - copy_len = len & (SHA256_BLOCK_SIZE-1); > - > - if (copy_len) { > - len -= copy_len; > - memcpy(ctx->partial_block_buffer, > - ((const char *) buffer + len), > - copy_len); > - ctx->partial_block_buffer_length = copy_len; > - } > - > - ctx->incoming_buffer_length = 0; > - > - /* len should be a multiple of the block size now */ > - assert((len % SHA256_BLOCK_SIZE) == 0); > - > - /* Set len to the number of blocks to be hashed */ > - len >>= SHA256_LOG2_BLOCK_SIZE; > - > - if (len) { > - > - ctx->job.buffer = (uint8_t *) buffer; > - ctx->job.len = len; > - ctx = (struct sha256_hash_ctx *) > - sha256_job_mgr_submit(&mgr->mgr, &ctx->job); > - continue; > - } > - } > - > - /* > - * If the extra blocks are not empty, then we are > - * either on the last block(s) or we need more > - * user input before continuing. > - */ > - if (ctx->status & HASH_CTX_STS_LAST) { > - > - uint8_t *buf = ctx->partial_block_buffer; > - uint32_t n_extra_blocks = > - sha256_pad(buf, ctx->total_length); > - > - ctx->status = (HASH_CTX_STS_PROCESSING | > - HASH_CTX_STS_COMPLETE); > - ctx->job.buffer = buf; > - ctx->job.len = (uint32_t) n_extra_blocks; > - ctx = (struct sha256_hash_ctx *) > - sha256_job_mgr_submit(&mgr->mgr, &ctx->job); > - continue; > - } > - > - ctx->status = HASH_CTX_STS_IDLE; > - return ctx; > - } > - > - return NULL; > -} > - > -static struct sha256_hash_ctx > - *sha256_ctx_mgr_get_comp_ctx(struct sha256_ctx_mgr *mgr) > -{ > - /* > - * If get_comp_job returns NULL, there are no jobs complete. > - * If get_comp_job returns a job, verify that it is safe to return to > - * the user. If it is not ready, resubmit the job to finish processing. > - * If sha256_ctx_mgr_resubmit returned a job, it is ready to be > - * returned. Otherwise, all jobs currently being managed by the > - * hash_ctx_mgr still need processing. > - */ > - struct sha256_hash_ctx *ctx; > - > - ctx = (struct sha256_hash_ctx *) sha256_job_mgr_get_comp_job(&mgr->mgr); > - return sha256_ctx_mgr_resubmit(mgr, ctx); > -} > - > -static void sha256_ctx_mgr_init(struct sha256_ctx_mgr *mgr) > -{ > - sha256_job_mgr_init(&mgr->mgr); > -} > - > -static struct sha256_hash_ctx *sha256_ctx_mgr_submit(struct sha256_ctx_mgr *mgr, > - struct sha256_hash_ctx *ctx, > - const void *buffer, > - uint32_t len, > - int flags) > -{ > - if (flags & ~(HASH_UPDATE | HASH_LAST)) { > - /* User should not pass anything other than UPDATE or LAST */ > - ctx->error = HASH_CTX_ERROR_INVALID_FLAGS; > - return ctx; > - } > - > - if (ctx->status & HASH_CTX_STS_PROCESSING) { > - /* Cannot submit to a currently processing job. */ > - ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING; > - return ctx; > - } > - > - if (ctx->status & HASH_CTX_STS_COMPLETE) { > - /* Cannot update a finished job. */ > - ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED; > - return ctx; > - } > - > - /* If we made it here, there was no error during this call to submit */ > - ctx->error = HASH_CTX_ERROR_NONE; > - > - /* Store buffer ptr info from user */ > - ctx->incoming_buffer = buffer; > - ctx->incoming_buffer_length = len; > - > - /* > - * Store the user's request flags and mark this ctx as currently > - * being processed. > - */ > - ctx->status = (flags & HASH_LAST) ? > - (HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) : > - HASH_CTX_STS_PROCESSING; > - > - /* Advance byte counter */ > - ctx->total_length += len; > - > - /* > - * If there is anything currently buffered in the extra blocks, > - * append to it until it contains a whole block. > - * Or if the user's buffer contains less than a whole block, > - * append as much as possible to the extra block. > - */ > - if (ctx->partial_block_buffer_length || len < SHA256_BLOCK_SIZE) { > - /* > - * Compute how many bytes to copy from user buffer into > - * extra block > - */ > - uint32_t copy_len = SHA256_BLOCK_SIZE - > - ctx->partial_block_buffer_length; > - if (len < copy_len) > - copy_len = len; > - > - if (copy_len) { > - /* Copy and update relevant pointers and counters */ > - memcpy( > - &ctx->partial_block_buffer[ctx->partial_block_buffer_length], > - buffer, copy_len); > - > - ctx->partial_block_buffer_length += copy_len; > - ctx->incoming_buffer = (const void *) > - ((const char *)buffer + copy_len); > - ctx->incoming_buffer_length = len - copy_len; > - } > - > - /* The extra block should never contain more than 1 block */ > - assert(ctx->partial_block_buffer_length <= SHA256_BLOCK_SIZE); > - > - /* > - * If the extra block buffer contains exactly 1 block, > - * it can be hashed. > - */ > - if (ctx->partial_block_buffer_length >= SHA256_BLOCK_SIZE) { > - ctx->partial_block_buffer_length = 0; > - > - ctx->job.buffer = ctx->partial_block_buffer; > - ctx->job.len = 1; > - ctx = (struct sha256_hash_ctx *) > - sha256_job_mgr_submit(&mgr->mgr, &ctx->job); > - } > - } > - > - return sha256_ctx_mgr_resubmit(mgr, ctx); > -} > - > -static struct sha256_hash_ctx *sha256_ctx_mgr_flush(struct sha256_ctx_mgr *mgr) > -{ > - struct sha256_hash_ctx *ctx; > - > - while (1) { > - ctx = (struct sha256_hash_ctx *) > - sha256_job_mgr_flush(&mgr->mgr); > - > - /* If flush returned 0, there are no more jobs in flight. */ > - if (!ctx) > - return NULL; > - > - /* > - * If flush returned a job, resubmit the job to finish > - * processing. > - */ > - ctx = sha256_ctx_mgr_resubmit(mgr, ctx); > - > - /* > - * If sha256_ctx_mgr_resubmit returned a job, it is ready to > - * be returned. Otherwise, all jobs currently being managed by > - * the sha256_ctx_mgr still need processing. Loop. > - */ > - if (ctx) > - return ctx; > - } > -} > - > -static int sha256_mb_init(struct ahash_request *areq) > -{ > - struct sha256_hash_ctx *sctx = ahash_request_ctx(areq); > - > - hash_ctx_init(sctx); > - sctx->job.result_digest[0] = SHA256_H0; > - sctx->job.result_digest[1] = SHA256_H1; > - sctx->job.result_digest[2] = SHA256_H2; > - sctx->job.result_digest[3] = SHA256_H3; > - sctx->job.result_digest[4] = SHA256_H4; > - sctx->job.result_digest[5] = SHA256_H5; > - sctx->job.result_digest[6] = SHA256_H6; > - sctx->job.result_digest[7] = SHA256_H7; > - sctx->total_length = 0; > - sctx->partial_block_buffer_length = 0; > - sctx->status = HASH_CTX_STS_IDLE; > - > - return 0; > -} > - > -static int sha256_mb_set_results(struct mcryptd_hash_request_ctx *rctx) > -{ > - int i; > - struct sha256_hash_ctx *sctx = ahash_request_ctx(&rctx->areq); > - __be32 *dst = (__be32 *) rctx->out; > - > - for (i = 0; i < 8; ++i) > - dst[i] = cpu_to_be32(sctx->job.result_digest[i]); > - > - return 0; > -} > - > -static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx, > - struct mcryptd_alg_cstate *cstate, bool flush) > -{ > - int flag = HASH_UPDATE; > - int nbytes, err = 0; > - struct mcryptd_hash_request_ctx *rctx = *ret_rctx; > - struct sha256_hash_ctx *sha_ctx; > - > - /* more work ? */ > - while (!(rctx->flag & HASH_DONE)) { > - nbytes = crypto_ahash_walk_done(&rctx->walk, 0); > - if (nbytes < 0) { > - err = nbytes; > - goto out; > - } > - /* check if the walk is done */ > - if (crypto_ahash_walk_last(&rctx->walk)) { > - rctx->flag |= HASH_DONE; > - if (rctx->flag & HASH_FINAL) > - flag |= HASH_LAST; > - > - } > - sha_ctx = (struct sha256_hash_ctx *) > - ahash_request_ctx(&rctx->areq); > - kernel_fpu_begin(); > - sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx, > - rctx->walk.data, nbytes, flag); > - if (!sha_ctx) { > - if (flush) > - sha_ctx = sha256_ctx_mgr_flush(cstate->mgr); > - } > - kernel_fpu_end(); > - if (sha_ctx) > - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); > - else { > - rctx = NULL; > - goto out; > - } > - } > - > - /* copy the results */ > - if (rctx->flag & HASH_FINAL) > - sha256_mb_set_results(rctx); > - > -out: > - *ret_rctx = rctx; > - return err; > -} > - > -static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx, > - struct mcryptd_alg_cstate *cstate, > - int err) > -{ > - struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); > - struct sha256_hash_ctx *sha_ctx; > - struct mcryptd_hash_request_ctx *req_ctx; > - int ret; > - > - /* remove from work list */ > - spin_lock(&cstate->work_lock); > - list_del(&rctx->waiter); > - spin_unlock(&cstate->work_lock); > - > - if (irqs_disabled()) > - rctx->complete(&req->base, err); > - else { > - local_bh_disable(); > - rctx->complete(&req->base, err); > - local_bh_enable(); > - } > - > - /* check to see if there are other jobs that are done */ > - sha_ctx = sha256_ctx_mgr_get_comp_ctx(cstate->mgr); > - while (sha_ctx) { > - req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx); > - ret = sha_finish_walk(&req_ctx, cstate, false); > - if (req_ctx) { > - spin_lock(&cstate->work_lock); > - list_del(&req_ctx->waiter); > - spin_unlock(&cstate->work_lock); > - > - req = cast_mcryptd_ctx_to_req(req_ctx); > - if (irqs_disabled()) > - req_ctx->complete(&req->base, ret); > - else { > - local_bh_disable(); > - req_ctx->complete(&req->base, ret); > - local_bh_enable(); > - } > - } > - sha_ctx = sha256_ctx_mgr_get_comp_ctx(cstate->mgr); > - } > - > - return 0; > -} > - > -static void sha256_mb_add_list(struct mcryptd_hash_request_ctx *rctx, > - struct mcryptd_alg_cstate *cstate) > -{ > - unsigned long next_flush; > - unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL); > - > - /* initialize tag */ > - rctx->tag.arrival = jiffies; /* tag the arrival time */ > - rctx->tag.seq_num = cstate->next_seq_num++; > - next_flush = rctx->tag.arrival + delay; > - rctx->tag.expire = next_flush; > - > - spin_lock(&cstate->work_lock); > - list_add_tail(&rctx->waiter, &cstate->work_list); > - spin_unlock(&cstate->work_lock); > - > - mcryptd_arm_flusher(cstate, delay); > -} > - > -static int sha256_mb_update(struct ahash_request *areq) > -{ > - struct mcryptd_hash_request_ctx *rctx = > - container_of(areq, struct mcryptd_hash_request_ctx, areq); > - struct mcryptd_alg_cstate *cstate = > - this_cpu_ptr(sha256_mb_alg_state.alg_cstate); > - > - struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); > - struct sha256_hash_ctx *sha_ctx; > - int ret = 0, nbytes; > - > - /* sanity check */ > - if (rctx->tag.cpu != smp_processor_id()) { > - pr_err("mcryptd error: cpu clash\n"); > - goto done; > - } > - > - /* need to init context */ > - req_ctx_init(rctx, areq); > - > - nbytes = crypto_ahash_walk_first(req, &rctx->walk); > - > - if (nbytes < 0) { > - ret = nbytes; > - goto done; > - } > - > - if (crypto_ahash_walk_last(&rctx->walk)) > - rctx->flag |= HASH_DONE; > - > - /* submit */ > - sha_ctx = (struct sha256_hash_ctx *) ahash_request_ctx(areq); > - sha256_mb_add_list(rctx, cstate); > - kernel_fpu_begin(); > - sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data, > - nbytes, HASH_UPDATE); > - kernel_fpu_end(); > - > - /* check if anything is returned */ > - if (!sha_ctx) > - return -EINPROGRESS; > - > - if (sha_ctx->error) { > - ret = sha_ctx->error; > - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); > - goto done; > - } > - > - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); > - ret = sha_finish_walk(&rctx, cstate, false); > - > - if (!rctx) > - return -EINPROGRESS; > -done: > - sha_complete_job(rctx, cstate, ret); > - return ret; > -} > - > -static int sha256_mb_finup(struct ahash_request *areq) > -{ > - struct mcryptd_hash_request_ctx *rctx = > - container_of(areq, struct mcryptd_hash_request_ctx, areq); > - struct mcryptd_alg_cstate *cstate = > - this_cpu_ptr(sha256_mb_alg_state.alg_cstate); > - > - struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); > - struct sha256_hash_ctx *sha_ctx; > - int ret = 0, flag = HASH_UPDATE, nbytes; > - > - /* sanity check */ > - if (rctx->tag.cpu != smp_processor_id()) { > - pr_err("mcryptd error: cpu clash\n"); > - goto done; > - } > - > - /* need to init context */ > - req_ctx_init(rctx, areq); > - > - nbytes = crypto_ahash_walk_first(req, &rctx->walk); > - > - if (nbytes < 0) { > - ret = nbytes; > - goto done; > - } > - > - if (crypto_ahash_walk_last(&rctx->walk)) { > - rctx->flag |= HASH_DONE; > - flag = HASH_LAST; > - } > - > - /* submit */ > - rctx->flag |= HASH_FINAL; > - sha_ctx = (struct sha256_hash_ctx *) ahash_request_ctx(areq); > - sha256_mb_add_list(rctx, cstate); > - > - kernel_fpu_begin(); > - sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data, > - nbytes, flag); > - kernel_fpu_end(); > - > - /* check if anything is returned */ > - if (!sha_ctx) > - return -EINPROGRESS; > - > - if (sha_ctx->error) { > - ret = sha_ctx->error; > - goto done; > - } > - > - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); > - ret = sha_finish_walk(&rctx, cstate, false); > - if (!rctx) > - return -EINPROGRESS; > -done: > - sha_complete_job(rctx, cstate, ret); > - return ret; > -} > - > -static int sha256_mb_final(struct ahash_request *areq) > -{ > - struct mcryptd_hash_request_ctx *rctx = > - container_of(areq, struct mcryptd_hash_request_ctx, > - areq); > - struct mcryptd_alg_cstate *cstate = > - this_cpu_ptr(sha256_mb_alg_state.alg_cstate); > - > - struct sha256_hash_ctx *sha_ctx; > - int ret = 0; > - u8 data; > - > - /* sanity check */ > - if (rctx->tag.cpu != smp_processor_id()) { > - pr_err("mcryptd error: cpu clash\n"); > - goto done; > - } > - > - /* need to init context */ > - req_ctx_init(rctx, areq); > - > - rctx->flag |= HASH_DONE | HASH_FINAL; > - > - sha_ctx = (struct sha256_hash_ctx *) ahash_request_ctx(areq); > - /* flag HASH_FINAL and 0 data size */ > - sha256_mb_add_list(rctx, cstate); > - kernel_fpu_begin(); > - sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx, &data, 0, > - HASH_LAST); > - kernel_fpu_end(); > - > - /* check if anything is returned */ > - if (!sha_ctx) > - return -EINPROGRESS; > - > - if (sha_ctx->error) { > - ret = sha_ctx->error; > - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); > - goto done; > - } > - > - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); > - ret = sha_finish_walk(&rctx, cstate, false); > - if (!rctx) > - return -EINPROGRESS; > -done: > - sha_complete_job(rctx, cstate, ret); > - return ret; > -} > - > -static int sha256_mb_export(struct ahash_request *areq, void *out) > -{ > - struct sha256_hash_ctx *sctx = ahash_request_ctx(areq); > - > - memcpy(out, sctx, sizeof(*sctx)); > - > - return 0; > -} > - > -static int sha256_mb_import(struct ahash_request *areq, const void *in) > -{ > - struct sha256_hash_ctx *sctx = ahash_request_ctx(areq); > - > - memcpy(sctx, in, sizeof(*sctx)); > - > - return 0; > -} > - > -static int sha256_mb_async_init_tfm(struct crypto_tfm *tfm) > -{ > - struct mcryptd_ahash *mcryptd_tfm; > - struct sha256_mb_ctx *ctx = crypto_tfm_ctx(tfm); > - struct mcryptd_hash_ctx *mctx; > - > - mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha256-mb", > - CRYPTO_ALG_INTERNAL, > - CRYPTO_ALG_INTERNAL); > - if (IS_ERR(mcryptd_tfm)) > - return PTR_ERR(mcryptd_tfm); > - mctx = crypto_ahash_ctx(&mcryptd_tfm->base); > - mctx->alg_state = &sha256_mb_alg_state; > - ctx->mcryptd_tfm = mcryptd_tfm; > - crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), > - sizeof(struct ahash_request) + > - crypto_ahash_reqsize(&mcryptd_tfm->base)); > - > - return 0; > -} > - > -static void sha256_mb_async_exit_tfm(struct crypto_tfm *tfm) > -{ > - struct sha256_mb_ctx *ctx = crypto_tfm_ctx(tfm); > - > - mcryptd_free_ahash(ctx->mcryptd_tfm); > -} > - > -static int sha256_mb_areq_init_tfm(struct crypto_tfm *tfm) > -{ > - crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), > - sizeof(struct ahash_request) + > - sizeof(struct sha256_hash_ctx)); > - > - return 0; > -} > - > -static void sha256_mb_areq_exit_tfm(struct crypto_tfm *tfm) > -{ > - struct sha256_mb_ctx *ctx = crypto_tfm_ctx(tfm); > - > - mcryptd_free_ahash(ctx->mcryptd_tfm); > -} > - > -static struct ahash_alg sha256_mb_areq_alg = { > - .init = sha256_mb_init, > - .update = sha256_mb_update, > - .final = sha256_mb_final, > - .finup = sha256_mb_finup, > - .export = sha256_mb_export, > - .import = sha256_mb_import, > - .halg = { > - .digestsize = SHA256_DIGEST_SIZE, > - .statesize = sizeof(struct sha256_hash_ctx), > - .base = { > - .cra_name = "__sha256-mb", > - .cra_driver_name = "__intel_sha256-mb", > - .cra_priority = 100, > - /* > - * use ASYNC flag as some buffers in multi-buffer > - * algo may not have completed before hashing thread > - * sleep > - */ > - .cra_flags = CRYPTO_ALG_ASYNC | > - CRYPTO_ALG_INTERNAL, > - .cra_blocksize = SHA256_BLOCK_SIZE, > - .cra_module = THIS_MODULE, > - .cra_list = LIST_HEAD_INIT > - (sha256_mb_areq_alg.halg.base.cra_list), > - .cra_init = sha256_mb_areq_init_tfm, > - .cra_exit = sha256_mb_areq_exit_tfm, > - .cra_ctxsize = sizeof(struct sha256_hash_ctx), > - } > - } > -}; > - > -static int sha256_mb_async_init(struct ahash_request *req) > -{ > - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); > - struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm); > - struct ahash_request *mcryptd_req = ahash_request_ctx(req); > - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; > - > - memcpy(mcryptd_req, req, sizeof(*req)); > - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); > - return crypto_ahash_init(mcryptd_req); > -} > - > -static int sha256_mb_async_update(struct ahash_request *req) > -{ > - struct ahash_request *mcryptd_req = ahash_request_ctx(req); > - > - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); > - struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm); > - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; > - > - memcpy(mcryptd_req, req, sizeof(*req)); > - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); > - return crypto_ahash_update(mcryptd_req); > -} > - > -static int sha256_mb_async_finup(struct ahash_request *req) > -{ > - struct ahash_request *mcryptd_req = ahash_request_ctx(req); > - > - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); > - struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm); > - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; > - > - memcpy(mcryptd_req, req, sizeof(*req)); > - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); > - return crypto_ahash_finup(mcryptd_req); > -} > - > -static int sha256_mb_async_final(struct ahash_request *req) > -{ > - struct ahash_request *mcryptd_req = ahash_request_ctx(req); > - > - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); > - struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm); > - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; > - > - memcpy(mcryptd_req, req, sizeof(*req)); > - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); > - return crypto_ahash_final(mcryptd_req); > -} > - > -static int sha256_mb_async_digest(struct ahash_request *req) > -{ > - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); > - struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm); > - struct ahash_request *mcryptd_req = ahash_request_ctx(req); > - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; > - > - memcpy(mcryptd_req, req, sizeof(*req)); > - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); > - return crypto_ahash_digest(mcryptd_req); > -} > - > -static int sha256_mb_async_export(struct ahash_request *req, void *out) > -{ > - struct ahash_request *mcryptd_req = ahash_request_ctx(req); > - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); > - struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm); > - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; > - > - memcpy(mcryptd_req, req, sizeof(*req)); > - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); > - return crypto_ahash_export(mcryptd_req, out); > -} > - > -static int sha256_mb_async_import(struct ahash_request *req, const void *in) > -{ > - struct ahash_request *mcryptd_req = ahash_request_ctx(req); > - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); > - struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm); > - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; > - struct crypto_ahash *child = mcryptd_ahash_child(mcryptd_tfm); > - struct mcryptd_hash_request_ctx *rctx; > - struct ahash_request *areq; > - > - memcpy(mcryptd_req, req, sizeof(*req)); > - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); > - rctx = ahash_request_ctx(mcryptd_req); > - areq = &rctx->areq; > - > - ahash_request_set_tfm(areq, child); > - ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_SLEEP, > - rctx->complete, req); > - > - return crypto_ahash_import(mcryptd_req, in); > -} > - > -static struct ahash_alg sha256_mb_async_alg = { > - .init = sha256_mb_async_init, > - .update = sha256_mb_async_update, > - .final = sha256_mb_async_final, > - .finup = sha256_mb_async_finup, > - .export = sha256_mb_async_export, > - .import = sha256_mb_async_import, > - .digest = sha256_mb_async_digest, > - .halg = { > - .digestsize = SHA256_DIGEST_SIZE, > - .statesize = sizeof(struct sha256_hash_ctx), > - .base = { > - .cra_name = "sha256", > - .cra_driver_name = "sha256_mb", > - /* > - * Low priority, since with few concurrent hash requests > - * this is extremely slow due to the flush delay. Users > - * whose workloads would benefit from this can request > - * it explicitly by driver name, or can increase its > - * priority at runtime using NETLINK_CRYPTO. > - */ > - .cra_priority = 50, > - .cra_flags = CRYPTO_ALG_ASYNC, > - .cra_blocksize = SHA256_BLOCK_SIZE, > - .cra_module = THIS_MODULE, > - .cra_list = LIST_HEAD_INIT > - (sha256_mb_async_alg.halg.base.cra_list), > - .cra_init = sha256_mb_async_init_tfm, > - .cra_exit = sha256_mb_async_exit_tfm, > - .cra_ctxsize = sizeof(struct sha256_mb_ctx), > - .cra_alignmask = 0, > - }, > - }, > -}; > - > -static unsigned long sha256_mb_flusher(struct mcryptd_alg_cstate *cstate) > -{ > - struct mcryptd_hash_request_ctx *rctx; > - unsigned long cur_time; > - unsigned long next_flush = 0; > - struct sha256_hash_ctx *sha_ctx; > - > - > - cur_time = jiffies; > - > - while (!list_empty(&cstate->work_list)) { > - rctx = list_entry(cstate->work_list.next, > - struct mcryptd_hash_request_ctx, waiter); > - if (time_before(cur_time, rctx->tag.expire)) > - break; > - kernel_fpu_begin(); > - sha_ctx = (struct sha256_hash_ctx *) > - sha256_ctx_mgr_flush(cstate->mgr); > - kernel_fpu_end(); > - if (!sha_ctx) { > - pr_err("sha256_mb error: nothing got" > - " flushed for non-empty list\n"); > - break; > - } > - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); > - sha_finish_walk(&rctx, cstate, true); > - sha_complete_job(rctx, cstate, 0); > - } > - > - if (!list_empty(&cstate->work_list)) { > - rctx = list_entry(cstate->work_list.next, > - struct mcryptd_hash_request_ctx, waiter); > - /* get the hash context and then flush time */ > - next_flush = rctx->tag.expire; > - mcryptd_arm_flusher(cstate, get_delay(next_flush)); > - } > - return next_flush; > -} > - > -static int __init sha256_mb_mod_init(void) > -{ > - > - int cpu; > - int err; > - struct mcryptd_alg_cstate *cpu_state; > - > - /* check for dependent cpu features */ > - if (!boot_cpu_has(X86_FEATURE_AVX2) || > - !boot_cpu_has(X86_FEATURE_BMI2)) > - return -ENODEV; > - > - /* initialize multibuffer structures */ > - sha256_mb_alg_state.alg_cstate = alloc_percpu > - (struct mcryptd_alg_cstate); > - > - sha256_job_mgr_init = sha256_mb_mgr_init_avx2; > - sha256_job_mgr_submit = sha256_mb_mgr_submit_avx2; > - sha256_job_mgr_flush = sha256_mb_mgr_flush_avx2; > - sha256_job_mgr_get_comp_job = sha256_mb_mgr_get_comp_job_avx2; > - > - if (!sha256_mb_alg_state.alg_cstate) > - return -ENOMEM; > - for_each_possible_cpu(cpu) { > - cpu_state = per_cpu_ptr(sha256_mb_alg_state.alg_cstate, cpu); > - cpu_state->next_flush = 0; > - cpu_state->next_seq_num = 0; > - cpu_state->flusher_engaged = false; > - INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher); > - cpu_state->cpu = cpu; > - cpu_state->alg_state = &sha256_mb_alg_state; > - cpu_state->mgr = kzalloc(sizeof(struct sha256_ctx_mgr), > - GFP_KERNEL); > - if (!cpu_state->mgr) > - goto err2; > - sha256_ctx_mgr_init(cpu_state->mgr); > - INIT_LIST_HEAD(&cpu_state->work_list); > - spin_lock_init(&cpu_state->work_lock); > - } > - sha256_mb_alg_state.flusher = &sha256_mb_flusher; > - > - err = crypto_register_ahash(&sha256_mb_areq_alg); > - if (err) > - goto err2; > - err = crypto_register_ahash(&sha256_mb_async_alg); > - if (err) > - goto err1; > - > - > - return 0; > -err1: > - crypto_unregister_ahash(&sha256_mb_areq_alg); > -err2: > - for_each_possible_cpu(cpu) { > - cpu_state = per_cpu_ptr(sha256_mb_alg_state.alg_cstate, cpu); > - kfree(cpu_state->mgr); > - } > - free_percpu(sha256_mb_alg_state.alg_cstate); > - return -ENODEV; > -} > - > -static void __exit sha256_mb_mod_fini(void) > -{ > - int cpu; > - struct mcryptd_alg_cstate *cpu_state; > - > - crypto_unregister_ahash(&sha256_mb_async_alg); > - crypto_unregister_ahash(&sha256_mb_areq_alg); > - for_each_possible_cpu(cpu) { > - cpu_state = per_cpu_ptr(sha256_mb_alg_state.alg_cstate, cpu); > - kfree(cpu_state->mgr); > - } > - free_percpu(sha256_mb_alg_state.alg_cstate); > -} > - > -module_init(sha256_mb_mod_init); > -module_exit(sha256_mb_mod_fini); > - > -MODULE_LICENSE("GPL"); > -MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, multi buffer accelerated"); > - > -MODULE_ALIAS_CRYPTO("sha256"); > diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h b/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h > deleted file mode 100644 > index 7c432543dc7f..000000000000 > --- a/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h > +++ /dev/null > @@ -1,134 +0,0 @@ > -/* > - * Header file for multi buffer SHA256 context > - * > - * This file is provided under a dual BSD/GPLv2 license. When using or > - * redistributing this file, you may do so under either license. > - * > - * GPL LICENSE SUMMARY > - * > - * Copyright(c) 2016 Intel Corporation. > - * > - * This program is free software; you can redistribute it and/or modify > - * it under the terms of version 2 of the GNU General Public License as > - * published by the Free Software Foundation. > - * > - * This program is distributed in the hope that it will be useful, but > - * WITHOUT ANY WARRANTY; without even the implied warranty of > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > - * General Public License for more details. > - * > - * Contact Information: > - * Megha Dey <megha.dey@xxxxxxxxxxxxxxx> > - * > - * BSD LICENSE > - * > - * Copyright(c) 2016 Intel Corporation. > - * > - * Redistribution and use in source and binary forms, with or without > - * modification, are permitted provided that the following conditions > - * are met: > - * > - * * Redistributions of source code must retain the above copyright > - * notice, this list of conditions and the following disclaimer. > - * * Redistributions in binary form must reproduce the above copyright > - * notice, this list of conditions and the following disclaimer in > - * the documentation and/or other materials provided with the > - * distribution. > - * * Neither the name of Intel Corporation nor the names of its > - * contributors may be used to endorse or promote products derived > - * from this software without specific prior written permission. > - * > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > - */ > - > -#ifndef _SHA_MB_CTX_INTERNAL_H > -#define _SHA_MB_CTX_INTERNAL_H > - > -#include "sha256_mb_mgr.h" > - > -#define HASH_UPDATE 0x00 > -#define HASH_LAST 0x01 > -#define HASH_DONE 0x02 > -#define HASH_FINAL 0x04 > - > -#define HASH_CTX_STS_IDLE 0x00 > -#define HASH_CTX_STS_PROCESSING 0x01 > -#define HASH_CTX_STS_LAST 0x02 > -#define HASH_CTX_STS_COMPLETE 0x04 > - > -enum hash_ctx_error { > - HASH_CTX_ERROR_NONE = 0, > - HASH_CTX_ERROR_INVALID_FLAGS = -1, > - HASH_CTX_ERROR_ALREADY_PROCESSING = -2, > - HASH_CTX_ERROR_ALREADY_COMPLETED = -3, > - > -#ifdef HASH_CTX_DEBUG > - HASH_CTX_ERROR_DEBUG_DIGEST_MISMATCH = -4, > -#endif > -}; > - > - > -#define hash_ctx_user_data(ctx) ((ctx)->user_data) > -#define hash_ctx_digest(ctx) ((ctx)->job.result_digest) > -#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING) > -#define hash_ctx_complete(ctx) ((ctx)->status == HASH_CTX_STS_COMPLETE) > -#define hash_ctx_status(ctx) ((ctx)->status) > -#define hash_ctx_error(ctx) ((ctx)->error) > -#define hash_ctx_init(ctx) \ > - do { \ > - (ctx)->error = HASH_CTX_ERROR_NONE; \ > - (ctx)->status = HASH_CTX_STS_COMPLETE; \ > - } while (0) > - > - > -/* Hash Constants and Typedefs */ > -#define SHA256_DIGEST_LENGTH 8 > -#define SHA256_LOG2_BLOCK_SIZE 6 > - > -#define SHA256_PADLENGTHFIELD_SIZE 8 > - > -#ifdef SHA_MB_DEBUG > -#define assert(expr) \ > -do { \ > - if (unlikely(!(expr))) { \ > - printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \ > - #expr, __FILE__, __func__, __LINE__); \ > - } \ > -} while (0) > -#else > -#define assert(expr) do {} while (0) > -#endif > - > -struct sha256_ctx_mgr { > - struct sha256_mb_mgr mgr; > -}; > - > -/* typedef struct sha256_ctx_mgr sha256_ctx_mgr; */ > - > -struct sha256_hash_ctx { > - /* Must be at struct offset 0 */ > - struct job_sha256 job; > - /* status flag */ > - int status; > - /* error flag */ > - int error; > - > - uint64_t total_length; > - const void *incoming_buffer; > - uint32_t incoming_buffer_length; > - uint8_t partial_block_buffer[SHA256_BLOCK_SIZE * 2]; > - uint32_t partial_block_buffer_length; > - void *user_data; > -}; > - > -#endif > diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr.h b/arch/x86/crypto/sha256-mb/sha256_mb_mgr.h > deleted file mode 100644 > index b01ae408c56d..000000000000 > --- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr.h > +++ /dev/null > @@ -1,108 +0,0 @@ > -/* > - * Header file for multi buffer SHA256 algorithm manager > - * > - * This file is provided under a dual BSD/GPLv2 license. When using or > - * redistributing this file, you may do so under either license. > - * > - * GPL LICENSE SUMMARY > - * > - * Copyright(c) 2016 Intel Corporation. > - * > - * This program is free software; you can redistribute it and/or modify > - * it under the terms of version 2 of the GNU General Public License as > - * published by the Free Software Foundation. > - * > - * This program is distributed in the hope that it will be useful, but > - * WITHOUT ANY WARRANTY; without even the implied warranty of > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > - * General Public License for more details. > - * > - * Contact Information: > - * Megha Dey <megha.dey@xxxxxxxxxxxxxxx> > - * > - * BSD LICENSE > - * > - * Copyright(c) 2016 Intel Corporation. > - * > - * Redistribution and use in source and binary forms, with or without > - * modification, are permitted provided that the following conditions > - * are met: > - * > - * * Redistributions of source code must retain the above copyright > - * notice, this list of conditions and the following disclaimer. > - * * Redistributions in binary form must reproduce the above copyright > - * notice, this list of conditions and the following disclaimer in > - * the documentation and/or other materials provided with the > - * distribution. > - * * Neither the name of Intel Corporation nor the names of its > - * contributors may be used to endorse or promote products derived > - * from this software without specific prior written permission. > - * > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > - */ > -#ifndef __SHA_MB_MGR_H > -#define __SHA_MB_MGR_H > - > -#include <linux/types.h> > - > -#define NUM_SHA256_DIGEST_WORDS 8 > - > -enum job_sts { STS_UNKNOWN = 0, > - STS_BEING_PROCESSED = 1, > - STS_COMPLETED = 2, > - STS_INTERNAL_ERROR = 3, > - STS_ERROR = 4 > -}; > - > -struct job_sha256 { > - u8 *buffer; > - u32 len; > - u32 result_digest[NUM_SHA256_DIGEST_WORDS] __aligned(32); > - enum job_sts status; > - void *user_data; > -}; > - > -/* SHA256 out-of-order scheduler */ > - > -/* typedef uint32_t sha8_digest_array[8][8]; */ > - > -struct sha256_args_x8 { > - uint32_t digest[8][8]; > - uint8_t *data_ptr[8]; > -}; > - > -struct sha256_lane_data { > - struct job_sha256 *job_in_lane; > -}; > - > -struct sha256_mb_mgr { > - struct sha256_args_x8 args; > - > - uint32_t lens[8]; > - > - /* each byte is index (0...7) of unused lanes */ > - uint64_t unused_lanes; > - /* byte 4 is set to FF as a flag */ > - struct sha256_lane_data ldata[8]; > -}; > - > - > -#define SHA256_MB_MGR_NUM_LANES_AVX2 8 > - > -void sha256_mb_mgr_init_avx2(struct sha256_mb_mgr *state); > -struct job_sha256 *sha256_mb_mgr_submit_avx2(struct sha256_mb_mgr *state, > - struct job_sha256 *job); > -struct job_sha256 *sha256_mb_mgr_flush_avx2(struct sha256_mb_mgr *state); > -struct job_sha256 *sha256_mb_mgr_get_comp_job_avx2(struct sha256_mb_mgr *state); > - > -#endif > diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S > deleted file mode 100644 > index 5c377bac21d0..000000000000 > --- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S > +++ /dev/null > @@ -1,304 +0,0 @@ > -/* > - * Header file for multi buffer SHA256 algorithm data structure > - * > - * This file is provided under a dual BSD/GPLv2 license. When using or > - * redistributing this file, you may do so under either license. > - * > - * GPL LICENSE SUMMARY > - * > - * Copyright(c) 2016 Intel Corporation. > - * > - * This program is free software; you can redistribute it and/or modify > - * it under the terms of version 2 of the GNU General Public License as > - * published by the Free Software Foundation. > - * > - * This program is distributed in the hope that it will be useful, but > - * WITHOUT ANY WARRANTY; without even the implied warranty of > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > - * General Public License for more details. > - * > - * Contact Information: > - * Megha Dey <megha.dey@xxxxxxxxxxxxxxx> > - * > - * BSD LICENSE > - * > - * Copyright(c) 2016 Intel Corporation. > - * > - * Redistribution and use in source and binary forms, with or without > - * modification, are permitted provided that the following conditions > - * are met: > - * > - * * Redistributions of source code must retain the above copyright > - * notice, this list of conditions and the following disclaimer. > - * * Redistributions in binary form must reproduce the above copyright > - * notice, this list of conditions and the following disclaimer in > - * the documentation and/or other materials provided with the > - * distribution. > - * * Neither the name of Intel Corporation nor the names of its > - * contributors may be used to endorse or promote products derived > - * from this software without specific prior written permission. > - * > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > - */ > - > -# Macros for defining data structures > - > -# Usage example > - > -#START_FIELDS # JOB_AES > -### name size align > -#FIELD _plaintext, 8, 8 # pointer to plaintext > -#FIELD _ciphertext, 8, 8 # pointer to ciphertext > -#FIELD _IV, 16, 8 # IV > -#FIELD _keys, 8, 8 # pointer to keys > -#FIELD _len, 4, 4 # length in bytes > -#FIELD _status, 4, 4 # status enumeration > -#FIELD _user_data, 8, 8 # pointer to user data > -#UNION _union, size1, align1, \ > -# size2, align2, \ > -# size3, align3, \ > -# ... > -#END_FIELDS > -#%assign _JOB_AES_size _FIELD_OFFSET > -#%assign _JOB_AES_align _STRUCT_ALIGN > - > -######################################################################### > - > -# Alternate "struc-like" syntax: > -# STRUCT job_aes2 > -# RES_Q .plaintext, 1 > -# RES_Q .ciphertext, 1 > -# RES_DQ .IV, 1 > -# RES_B .nested, _JOB_AES_SIZE, _JOB_AES_ALIGN > -# RES_U .union, size1, align1, \ > -# size2, align2, \ > -# ... > -# ENDSTRUCT > -# # Following only needed if nesting > -# %assign job_aes2_size _FIELD_OFFSET > -# %assign job_aes2_align _STRUCT_ALIGN > -# > -# RES_* macros take a name, a count and an optional alignment. > -# The count in in terms of the base size of the macro, and the > -# default alignment is the base size. > -# The macros are: > -# Macro Base size > -# RES_B 1 > -# RES_W 2 > -# RES_D 4 > -# RES_Q 8 > -# RES_DQ 16 > -# RES_Y 32 > -# RES_Z 64 > -# > -# RES_U defines a union. It's arguments are a name and two or more > -# pairs of "size, alignment" > -# > -# The two assigns are only needed if this structure is being nested > -# within another. Even if the assigns are not done, one can still use > -# STRUCT_NAME_size as the size of the structure. > -# > -# Note that for nesting, you still need to assign to STRUCT_NAME_size. > -# > -# The differences between this and using "struc" directly are that each > -# type is implicitly aligned to its natural length (although this can be > -# over-ridden with an explicit third parameter), and that the structure > -# is padded at the end to its overall alignment. > -# > - > -######################################################################### > - > -#ifndef _DATASTRUCT_ASM_ > -#define _DATASTRUCT_ASM_ > - > -#define SZ8 8*SHA256_DIGEST_WORD_SIZE > -#define ROUNDS 64*SZ8 > -#define PTR_SZ 8 > -#define SHA256_DIGEST_WORD_SIZE 4 > -#define MAX_SHA256_LANES 8 > -#define SHA256_DIGEST_WORDS 8 > -#define SHA256_DIGEST_ROW_SIZE (MAX_SHA256_LANES * SHA256_DIGEST_WORD_SIZE) > -#define SHA256_DIGEST_SIZE (SHA256_DIGEST_ROW_SIZE * SHA256_DIGEST_WORDS) > -#define SHA256_BLK_SZ 64 > - > -# START_FIELDS > -.macro START_FIELDS > - _FIELD_OFFSET = 0 > - _STRUCT_ALIGN = 0 > -.endm > - > -# FIELD name size align > -.macro FIELD name size align > - _FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1)) > - \name = _FIELD_OFFSET > - _FIELD_OFFSET = _FIELD_OFFSET + (\size) > -.if (\align > _STRUCT_ALIGN) > - _STRUCT_ALIGN = \align > -.endif > -.endm > - > -# END_FIELDS > -.macro END_FIELDS > - _FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1)) > -.endm > - > -######################################################################## > - > -.macro STRUCT p1 > -START_FIELDS > -.struc \p1 > -.endm > - > -.macro ENDSTRUCT > - tmp = _FIELD_OFFSET > - END_FIELDS > - tmp = (_FIELD_OFFSET - %%tmp) > -.if (tmp > 0) > - .lcomm tmp > -.endif > -.endstruc > -.endm > - > -## RES_int name size align > -.macro RES_int p1 p2 p3 > - name = \p1 > - size = \p2 > - align = .\p3 > - > - _FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1)) > -.align align > -.lcomm name size > - _FIELD_OFFSET = _FIELD_OFFSET + (size) > -.if (align > _STRUCT_ALIGN) > - _STRUCT_ALIGN = align > -.endif > -.endm > - > -# macro RES_B name, size [, align] > -.macro RES_B _name, _size, _align=1 > -RES_int _name _size _align > -.endm > - > -# macro RES_W name, size [, align] > -.macro RES_W _name, _size, _align=2 > -RES_int _name 2*(_size) _align > -.endm > - > -# macro RES_D name, size [, align] > -.macro RES_D _name, _size, _align=4 > -RES_int _name 4*(_size) _align > -.endm > - > -# macro RES_Q name, size [, align] > -.macro RES_Q _name, _size, _align=8 > -RES_int _name 8*(_size) _align > -.endm > - > -# macro RES_DQ name, size [, align] > -.macro RES_DQ _name, _size, _align=16 > -RES_int _name 16*(_size) _align > -.endm > - > -# macro RES_Y name, size [, align] > -.macro RES_Y _name, _size, _align=32 > -RES_int _name 32*(_size) _align > -.endm > - > -# macro RES_Z name, size [, align] > -.macro RES_Z _name, _size, _align=64 > -RES_int _name 64*(_size) _align > -.endm > - > -#endif > - > - > -######################################################################## > -#### Define SHA256 Out Of Order Data Structures > -######################################################################## > - > -START_FIELDS # LANE_DATA > -### name size align > -FIELD _job_in_lane, 8, 8 # pointer to job object > -END_FIELDS > - > - _LANE_DATA_size = _FIELD_OFFSET > - _LANE_DATA_align = _STRUCT_ALIGN > - > -######################################################################## > - > -START_FIELDS # SHA256_ARGS_X4 > -### name size align > -FIELD _digest, 4*8*8, 4 # transposed digest > -FIELD _data_ptr, 8*8, 8 # array of pointers to data > -END_FIELDS > - > - _SHA256_ARGS_X4_size = _FIELD_OFFSET > - _SHA256_ARGS_X4_align = _STRUCT_ALIGN > - _SHA256_ARGS_X8_size = _FIELD_OFFSET > - _SHA256_ARGS_X8_align = _STRUCT_ALIGN > - > -####################################################################### > - > -START_FIELDS # MB_MGR > -### name size align > -FIELD _args, _SHA256_ARGS_X4_size, _SHA256_ARGS_X4_align > -FIELD _lens, 4*8, 8 > -FIELD _unused_lanes, 8, 8 > -FIELD _ldata, _LANE_DATA_size*8, _LANE_DATA_align > -END_FIELDS > - > - _MB_MGR_size = _FIELD_OFFSET > - _MB_MGR_align = _STRUCT_ALIGN > - > -_args_digest = _args + _digest > -_args_data_ptr = _args + _data_ptr > - > -####################################################################### > - > -START_FIELDS #STACK_FRAME > -### name size align > -FIELD _data, 16*SZ8, 1 # transposed digest > -FIELD _digest, 8*SZ8, 1 # array of pointers to data > -FIELD _ytmp, 4*SZ8, 1 > -FIELD _rsp, 8, 1 > -END_FIELDS > - > - _STACK_FRAME_size = _FIELD_OFFSET > - _STACK_FRAME_align = _STRUCT_ALIGN > - > -####################################################################### > - > -######################################################################## > -#### Define constants > -######################################################################## > - > -#define STS_UNKNOWN 0 > -#define STS_BEING_PROCESSED 1 > -#define STS_COMPLETED 2 > - > -######################################################################## > -#### Define JOB_SHA256 structure > -######################################################################## > - > -START_FIELDS # JOB_SHA256 > - > -### name size align > -FIELD _buffer, 8, 8 # pointer to buffer > -FIELD _len, 8, 8 # length in bytes > -FIELD _result_digest, 8*4, 32 # Digest (output) > -FIELD _status, 4, 4 > -FIELD _user_data, 8, 8 > -END_FIELDS > - > - _JOB_SHA256_size = _FIELD_OFFSET > - _JOB_SHA256_align = _STRUCT_ALIGN > diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S > deleted file mode 100644 > index d2364c55bbde..000000000000 > --- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S > +++ /dev/null > @@ -1,307 +0,0 @@ > -/* > - * Flush routine for SHA256 multibuffer > - * > - * This file is provided under a dual BSD/GPLv2 license. When using or > - * redistributing this file, you may do so under either license. > - * > - * GPL LICENSE SUMMARY > - * > - * Copyright(c) 2016 Intel Corporation. > - * > - * This program is free software; you can redistribute it and/or modify > - * it under the terms of version 2 of the GNU General Public License as > - * published by the Free Software Foundation. > - * > - * This program is distributed in the hope that it will be useful, but > - * WITHOUT ANY WARRANTY; without even the implied warranty of > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > - * General Public License for more details. > - * > - * Contact Information: > - * Megha Dey <megha.dey@xxxxxxxxxxxxxxx> > - * > - * BSD LICENSE > - * > - * Copyright(c) 2016 Intel Corporation. > - * > - * Redistribution and use in source and binary forms, with or without > - * modification, are permitted provided that the following conditions > - * are met: > - * > - * * Redistributions of source code must retain the above copyright > - * notice, this list of conditions and the following disclaimer. > - * * Redistributions in binary form must reproduce the above copyright > - * notice, this list of conditions and the following disclaimer in > - * the documentation and/or other materials provided with the > - * distribution. > - * * Neither the name of Intel Corporation nor the names of its > - * contributors may be used to endorse or promote products derived > - * from this software without specific prior written permission. > - * > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > - */ > -#include <linux/linkage.h> > -#include <asm/frame.h> > -#include "sha256_mb_mgr_datastruct.S" > - > -.extern sha256_x8_avx2 > - > -#LINUX register definitions > -#define arg1 %rdi > -#define arg2 %rsi > - > -# Common register definitions > -#define state arg1 > -#define job arg2 > -#define len2 arg2 > - > -# idx must be a register not clobberred by sha1_mult > -#define idx %r8 > -#define DWORD_idx %r8d > - > -#define unused_lanes %rbx > -#define lane_data %rbx > -#define tmp2 %rbx > -#define tmp2_w %ebx > - > -#define job_rax %rax > -#define tmp1 %rax > -#define size_offset %rax > -#define tmp %rax > -#define start_offset %rax > - > -#define tmp3 %arg1 > - > -#define extra_blocks %arg2 > -#define p %arg2 > - > -.macro LABEL prefix n > -\prefix\n\(): > -.endm > - > -.macro JNE_SKIP i > -jne skip_\i > -.endm > - > -.altmacro > -.macro SET_OFFSET _offset > -offset = \_offset > -.endm > -.noaltmacro > - > -# JOB_SHA256* sha256_mb_mgr_flush_avx2(MB_MGR *state) > -# arg 1 : rcx : state > -ENTRY(sha256_mb_mgr_flush_avx2) > - FRAME_BEGIN > - push %rbx > - > - # If bit (32+3) is set, then all lanes are empty > - mov _unused_lanes(state), unused_lanes > - bt $32+3, unused_lanes > - jc return_null > - > - # find a lane with a non-null job > - xor idx, idx > - offset = (_ldata + 1 * _LANE_DATA_size + _job_in_lane) > - cmpq $0, offset(state) > - cmovne one(%rip), idx > - offset = (_ldata + 2 * _LANE_DATA_size + _job_in_lane) > - cmpq $0, offset(state) > - cmovne two(%rip), idx > - offset = (_ldata + 3 * _LANE_DATA_size + _job_in_lane) > - cmpq $0, offset(state) > - cmovne three(%rip), idx > - offset = (_ldata + 4 * _LANE_DATA_size + _job_in_lane) > - cmpq $0, offset(state) > - cmovne four(%rip), idx > - offset = (_ldata + 5 * _LANE_DATA_size + _job_in_lane) > - cmpq $0, offset(state) > - cmovne five(%rip), idx > - offset = (_ldata + 6 * _LANE_DATA_size + _job_in_lane) > - cmpq $0, offset(state) > - cmovne six(%rip), idx > - offset = (_ldata + 7 * _LANE_DATA_size + _job_in_lane) > - cmpq $0, offset(state) > - cmovne seven(%rip), idx > - > - # copy idx to empty lanes > -copy_lane_data: > - offset = (_args + _data_ptr) > - mov offset(state,idx,8), tmp > - > - I = 0 > -.rep 8 > - offset = (_ldata + I * _LANE_DATA_size + _job_in_lane) > - cmpq $0, offset(state) > -.altmacro > - JNE_SKIP %I > - offset = (_args + _data_ptr + 8*I) > - mov tmp, offset(state) > - offset = (_lens + 4*I) > - movl $0xFFFFFFFF, offset(state) > -LABEL skip_ %I > - I = (I+1) > -.noaltmacro > -.endr > - > - # Find min length > - vmovdqu _lens+0*16(state), %xmm0 > - vmovdqu _lens+1*16(state), %xmm1 > - > - vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A} > - vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C} > - vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F} > - vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E} > - vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min val in low dword > - > - vmovd %xmm2, DWORD_idx > - mov idx, len2 > - and $0xF, idx > - shr $4, len2 > - jz len_is_0 > - > - vpand clear_low_nibble(%rip), %xmm2, %xmm2 > - vpshufd $0, %xmm2, %xmm2 > - > - vpsubd %xmm2, %xmm0, %xmm0 > - vpsubd %xmm2, %xmm1, %xmm1 > - > - vmovdqu %xmm0, _lens+0*16(state) > - vmovdqu %xmm1, _lens+1*16(state) > - > - # "state" and "args" are the same address, arg1 > - # len is arg2 > - call sha256_x8_avx2 > - # state and idx are intact > - > -len_is_0: > - # process completed job "idx" > - imul $_LANE_DATA_size, idx, lane_data > - lea _ldata(state, lane_data), lane_data > - > - mov _job_in_lane(lane_data), job_rax > - movq $0, _job_in_lane(lane_data) > - movl $STS_COMPLETED, _status(job_rax) > - mov _unused_lanes(state), unused_lanes > - shl $4, unused_lanes > - or idx, unused_lanes > - > - mov unused_lanes, _unused_lanes(state) > - movl $0xFFFFFFFF, _lens(state,idx,4) > - > - vmovd _args_digest(state , idx, 4) , %xmm0 > - vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0 > - vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0 > - vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0 > - vmovd _args_digest+4*32(state, idx, 4), %xmm1 > - vpinsrd $1, _args_digest+5*32(state, idx, 4), %xmm1, %xmm1 > - vpinsrd $2, _args_digest+6*32(state, idx, 4), %xmm1, %xmm1 > - vpinsrd $3, _args_digest+7*32(state, idx, 4), %xmm1, %xmm1 > - > - vmovdqu %xmm0, _result_digest(job_rax) > - offset = (_result_digest + 1*16) > - vmovdqu %xmm1, offset(job_rax) > - > -return: > - pop %rbx > - FRAME_END > - ret > - > -return_null: > - xor job_rax, job_rax > - jmp return > -ENDPROC(sha256_mb_mgr_flush_avx2) > - > -############################################################################## > - > -.align 16 > -ENTRY(sha256_mb_mgr_get_comp_job_avx2) > - push %rbx > - > - ## if bit 32+3 is set, then all lanes are empty > - mov _unused_lanes(state), unused_lanes > - bt $(32+3), unused_lanes > - jc .return_null > - > - # Find min length > - vmovdqu _lens(state), %xmm0 > - vmovdqu _lens+1*16(state), %xmm1 > - > - vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A} > - vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C} > - vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F} > - vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E} > - vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min val in low dword > - > - vmovd %xmm2, DWORD_idx > - test $~0xF, idx > - jnz .return_null > - > - # process completed job "idx" > - imul $_LANE_DATA_size, idx, lane_data > - lea _ldata(state, lane_data), lane_data > - > - mov _job_in_lane(lane_data), job_rax > - movq $0, _job_in_lane(lane_data) > - movl $STS_COMPLETED, _status(job_rax) > - mov _unused_lanes(state), unused_lanes > - shl $4, unused_lanes > - or idx, unused_lanes > - mov unused_lanes, _unused_lanes(state) > - > - movl $0xFFFFFFFF, _lens(state, idx, 4) > - > - vmovd _args_digest(state, idx, 4), %xmm0 > - vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0 > - vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0 > - vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0 > - vmovd _args_digest+4*32(state, idx, 4), %xmm1 > - vpinsrd $1, _args_digest+5*32(state, idx, 4), %xmm1, %xmm1 > - vpinsrd $2, _args_digest+6*32(state, idx, 4), %xmm1, %xmm1 > - vpinsrd $3, _args_digest+7*32(state, idx, 4), %xmm1, %xmm1 > - > - vmovdqu %xmm0, _result_digest(job_rax) > - offset = (_result_digest + 1*16) > - vmovdqu %xmm1, offset(job_rax) > - > - pop %rbx > - > - ret > - > -.return_null: > - xor job_rax, job_rax > - pop %rbx > - ret > -ENDPROC(sha256_mb_mgr_get_comp_job_avx2) > - > -.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16 > -.align 16 > -clear_low_nibble: > -.octa 0x000000000000000000000000FFFFFFF0 > - > -.section .rodata.cst8, "aM", @progbits, 8 > -.align 8 > -one: > -.quad 1 > -two: > -.quad 2 > -three: > -.quad 3 > -four: > -.quad 4 > -five: > -.quad 5 > -six: > -.quad 6 > -seven: > -.quad 7 > diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c > deleted file mode 100644 > index b0c498371e67..000000000000 > --- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c > +++ /dev/null > @@ -1,65 +0,0 @@ > -/* > - * Initialization code for multi buffer SHA256 algorithm for AVX2 > - * > - * This file is provided under a dual BSD/GPLv2 license. When using or > - * redistributing this file, you may do so under either license. > - * > - * GPL LICENSE SUMMARY > - * > - * Copyright(c) 2016 Intel Corporation. > - * > - * This program is free software; you can redistribute it and/or modify > - * it under the terms of version 2 of the GNU General Public License as > - * published by the Free Software Foundation. > - * > - * This program is distributed in the hope that it will be useful, but > - * WITHOUT ANY WARRANTY; without even the implied warranty of > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > - * General Public License for more details. > - * > - * Contact Information: > - * Megha Dey <megha.dey@xxxxxxxxxxxxxxx> > - * > - * BSD LICENSE > - * > - * Copyright(c) 2016 Intel Corporation. > - * > - * Redistribution and use in source and binary forms, with or without > - * modification, are permitted provided that the following conditions > - * are met: > - * > - * * Redistributions of source code must retain the above copyright > - * notice, this list of conditions and the following disclaimer. > - * * Redistributions in binary form must reproduce the above copyright > - * notice, this list of conditions and the following disclaimer in > - * the documentation and/or other materials provided with the > - * distribution. > - * * Neither the name of Intel Corporation nor the names of its > - * contributors may be used to endorse or promote products derived > - * from this software without specific prior written permission. > - * > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > - */ > - > -#include "sha256_mb_mgr.h" > - > -void sha256_mb_mgr_init_avx2(struct sha256_mb_mgr *state) > -{ > - unsigned int j; > - > - state->unused_lanes = 0xF76543210ULL; > - for (j = 0; j < 8; j++) { > - state->lens[j] = 0xFFFFFFFF; > - state->ldata[j].job_in_lane = NULL; > - } > -} > diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S > deleted file mode 100644 > index b36ae7454084..000000000000 > --- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S > +++ /dev/null > @@ -1,214 +0,0 @@ > -/* > - * Buffer submit code for multi buffer SHA256 algorithm > - * > - * This file is provided under a dual BSD/GPLv2 license. When using or > - * redistributing this file, you may do so under either license. > - * > - * GPL LICENSE SUMMARY > - * > - * Copyright(c) 2016 Intel Corporation. > - * > - * This program is free software; you can redistribute it and/or modify > - * it under the terms of version 2 of the GNU General Public License as > - * published by the Free Software Foundation. > - * > - * This program is distributed in the hope that it will be useful, but > - * WITHOUT ANY WARRANTY; without even the implied warranty of > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > - * General Public License for more details. > - * > - * Contact Information: > - * Megha Dey <megha.dey@xxxxxxxxxxxxxxx> > - * > - * BSD LICENSE > - * > - * Copyright(c) 2016 Intel Corporation. > - * > - * Redistribution and use in source and binary forms, with or without > - * modification, are permitted provided that the following conditions > - * are met: > - * > - * * Redistributions of source code must retain the above copyright > - * notice, this list of conditions and the following disclaimer. > - * * Redistributions in binary form must reproduce the above copyright > - * notice, this list of conditions and the following disclaimer in > - * the documentation and/or other materials provided with the > - * distribution. > - * * Neither the name of Intel Corporation nor the names of its > - * contributors may be used to endorse or promote products derived > - * from this software without specific prior written permission. > - * > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > - */ > - > -#include <linux/linkage.h> > -#include <asm/frame.h> > -#include "sha256_mb_mgr_datastruct.S" > - > -.extern sha256_x8_avx2 > - > -# LINUX register definitions > -arg1 = %rdi > -arg2 = %rsi > -size_offset = %rcx > -tmp2 = %rcx > -extra_blocks = %rdx > - > -# Common definitions > -#define state arg1 > -#define job %rsi > -#define len2 arg2 > -#define p2 arg2 > - > -# idx must be a register not clobberred by sha1_x8_avx2 > -idx = %r8 > -DWORD_idx = %r8d > -last_len = %r8 > - > -p = %r11 > -start_offset = %r11 > - > -unused_lanes = %rbx > -BYTE_unused_lanes = %bl > - > -job_rax = %rax > -len = %rax > -DWORD_len = %eax > - > -lane = %r12 > -tmp3 = %r12 > - > -tmp = %r9 > -DWORD_tmp = %r9d > - > -lane_data = %r10 > - > -# JOB* sha256_mb_mgr_submit_avx2(MB_MGR *state, JOB_SHA256 *job) > -# arg 1 : rcx : state > -# arg 2 : rdx : job > -ENTRY(sha256_mb_mgr_submit_avx2) > - FRAME_BEGIN > - push %rbx > - push %r12 > - > - mov _unused_lanes(state), unused_lanes > - mov unused_lanes, lane > - and $0xF, lane > - shr $4, unused_lanes > - imul $_LANE_DATA_size, lane, lane_data > - movl $STS_BEING_PROCESSED, _status(job) > - lea _ldata(state, lane_data), lane_data > - mov unused_lanes, _unused_lanes(state) > - movl _len(job), DWORD_len > - > - mov job, _job_in_lane(lane_data) > - shl $4, len > - or lane, len > - > - movl DWORD_len, _lens(state , lane, 4) > - > - # Load digest words from result_digest > - vmovdqu _result_digest(job), %xmm0 > - vmovdqu _result_digest+1*16(job), %xmm1 > - vmovd %xmm0, _args_digest(state, lane, 4) > - vpextrd $1, %xmm0, _args_digest+1*32(state , lane, 4) > - vpextrd $2, %xmm0, _args_digest+2*32(state , lane, 4) > - vpextrd $3, %xmm0, _args_digest+3*32(state , lane, 4) > - vmovd %xmm1, _args_digest+4*32(state , lane, 4) > - > - vpextrd $1, %xmm1, _args_digest+5*32(state , lane, 4) > - vpextrd $2, %xmm1, _args_digest+6*32(state , lane, 4) > - vpextrd $3, %xmm1, _args_digest+7*32(state , lane, 4) > - > - mov _buffer(job), p > - mov p, _args_data_ptr(state, lane, 8) > - > - cmp $0xF, unused_lanes > - jne return_null > - > -start_loop: > - # Find min length > - vmovdqa _lens(state), %xmm0 > - vmovdqa _lens+1*16(state), %xmm1 > - > - vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A} > - vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C} > - vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F} > - vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E} > - vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min val in low dword > - > - vmovd %xmm2, DWORD_idx > - mov idx, len2 > - and $0xF, idx > - shr $4, len2 > - jz len_is_0 > - > - vpand clear_low_nibble(%rip), %xmm2, %xmm2 > - vpshufd $0, %xmm2, %xmm2 > - > - vpsubd %xmm2, %xmm0, %xmm0 > - vpsubd %xmm2, %xmm1, %xmm1 > - > - vmovdqa %xmm0, _lens + 0*16(state) > - vmovdqa %xmm1, _lens + 1*16(state) > - > - # "state" and "args" are the same address, arg1 > - # len is arg2 > - call sha256_x8_avx2 > - > - # state and idx are intact > - > -len_is_0: > - # process completed job "idx" > - imul $_LANE_DATA_size, idx, lane_data > - lea _ldata(state, lane_data), lane_data > - > - mov _job_in_lane(lane_data), job_rax > - mov _unused_lanes(state), unused_lanes > - movq $0, _job_in_lane(lane_data) > - movl $STS_COMPLETED, _status(job_rax) > - shl $4, unused_lanes > - or idx, unused_lanes > - mov unused_lanes, _unused_lanes(state) > - > - movl $0xFFFFFFFF, _lens(state,idx,4) > - > - vmovd _args_digest(state, idx, 4), %xmm0 > - vpinsrd $1, _args_digest+1*32(state , idx, 4), %xmm0, %xmm0 > - vpinsrd $2, _args_digest+2*32(state , idx, 4), %xmm0, %xmm0 > - vpinsrd $3, _args_digest+3*32(state , idx, 4), %xmm0, %xmm0 > - vmovd _args_digest+4*32(state, idx, 4), %xmm1 > - > - vpinsrd $1, _args_digest+5*32(state , idx, 4), %xmm1, %xmm1 > - vpinsrd $2, _args_digest+6*32(state , idx, 4), %xmm1, %xmm1 > - vpinsrd $3, _args_digest+7*32(state , idx, 4), %xmm1, %xmm1 > - > - vmovdqu %xmm0, _result_digest(job_rax) > - vmovdqu %xmm1, _result_digest+1*16(job_rax) > - > -return: > - pop %r12 > - pop %rbx > - FRAME_END > - ret > - > -return_null: > - xor job_rax, job_rax > - jmp return > - > -ENDPROC(sha256_mb_mgr_submit_avx2) > - > -.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16 > -.align 16 > -clear_low_nibble: > - .octa 0x000000000000000000000000FFFFFFF0 > diff --git a/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S b/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S > deleted file mode 100644 > index 1687c80c5995..000000000000 > --- a/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S > +++ /dev/null > @@ -1,598 +0,0 @@ > -/* > - * Multi-buffer SHA256 algorithm hash compute routine > - * > - * This file is provided under a dual BSD/GPLv2 license. When using or > - * redistributing this file, you may do so under either license. > - * > - * GPL LICENSE SUMMARY > - * > - * Copyright(c) 2016 Intel Corporation. > - * > - * This program is free software; you can redistribute it and/or modify > - * it under the terms of version 2 of the GNU General Public License as > - * published by the Free Software Foundation. > - * > - * This program is distributed in the hope that it will be useful, but > - * WITHOUT ANY WARRANTY; without even the implied warranty of > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > - * General Public License for more details. > - * > - * Contact Information: > - * Megha Dey <megha.dey@xxxxxxxxxxxxxxx> > - * > - * BSD LICENSE > - * > - * Copyright(c) 2016 Intel Corporation. > - * > - * Redistribution and use in source and binary forms, with or without > - * modification, are permitted provided that the following conditions > - * are met: > - * > - * * Redistributions of source code must retain the above copyright > - * notice, this list of conditions and the following disclaimer. > - * * Redistributions in binary form must reproduce the above copyright > - * notice, this list of conditions and the following disclaimer in > - * the documentation and/or other materials provided with the > - * distribution. > - * * Neither the name of Intel Corporation nor the names of its > - * contributors may be used to endorse or promote products derived > - * from this software without specific prior written permission. > - * > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > - */ > - > -#include <linux/linkage.h> > -#include "sha256_mb_mgr_datastruct.S" > - > -## code to compute oct SHA256 using SSE-256 > -## outer calling routine takes care of save and restore of XMM registers > -## Logic designed/laid out by JDG > - > -## Function clobbers: rax, rcx, rdx, rbx, rsi, rdi, r9-r15; %ymm0-15 > -## Linux clobbers: rax rbx rcx rdx rsi r9 r10 r11 r12 r13 r14 r15 > -## Linux preserves: rdi rbp r8 > -## > -## clobbers %ymm0-15 > - > -arg1 = %rdi > -arg2 = %rsi > -reg3 = %rcx > -reg4 = %rdx > - > -# Common definitions > -STATE = arg1 > -INP_SIZE = arg2 > - > -IDX = %rax > -ROUND = %rbx > -TBL = reg3 > - > -inp0 = %r9 > -inp1 = %r10 > -inp2 = %r11 > -inp3 = %r12 > -inp4 = %r13 > -inp5 = %r14 > -inp6 = %r15 > -inp7 = reg4 > - > -a = %ymm0 > -b = %ymm1 > -c = %ymm2 > -d = %ymm3 > -e = %ymm4 > -f = %ymm5 > -g = %ymm6 > -h = %ymm7 > - > -T1 = %ymm8 > - > -a0 = %ymm12 > -a1 = %ymm13 > -a2 = %ymm14 > -TMP = %ymm15 > -TMP0 = %ymm6 > -TMP1 = %ymm7 > - > -TT0 = %ymm8 > -TT1 = %ymm9 > -TT2 = %ymm10 > -TT3 = %ymm11 > -TT4 = %ymm12 > -TT5 = %ymm13 > -TT6 = %ymm14 > -TT7 = %ymm15 > - > -# Define stack usage > - > -# Assume stack aligned to 32 bytes before call > -# Therefore FRAMESZ mod 32 must be 32-8 = 24 > - > -#define FRAMESZ 0x388 > - > -#define VMOVPS vmovups > - > -# TRANSPOSE8 r0, r1, r2, r3, r4, r5, r6, r7, t0, t1 > -# "transpose" data in {r0...r7} using temps {t0...t1} > -# Input looks like: {r0 r1 r2 r3 r4 r5 r6 r7} > -# r0 = {a7 a6 a5 a4 a3 a2 a1 a0} > -# r1 = {b7 b6 b5 b4 b3 b2 b1 b0} > -# r2 = {c7 c6 c5 c4 c3 c2 c1 c0} > -# r3 = {d7 d6 d5 d4 d3 d2 d1 d0} > -# r4 = {e7 e6 e5 e4 e3 e2 e1 e0} > -# r5 = {f7 f6 f5 f4 f3 f2 f1 f0} > -# r6 = {g7 g6 g5 g4 g3 g2 g1 g0} > -# r7 = {h7 h6 h5 h4 h3 h2 h1 h0} > -# > -# Output looks like: {r0 r1 r2 r3 r4 r5 r6 r7} > -# r0 = {h0 g0 f0 e0 d0 c0 b0 a0} > -# r1 = {h1 g1 f1 e1 d1 c1 b1 a1} > -# r2 = {h2 g2 f2 e2 d2 c2 b2 a2} > -# r3 = {h3 g3 f3 e3 d3 c3 b3 a3} > -# r4 = {h4 g4 f4 e4 d4 c4 b4 a4} > -# r5 = {h5 g5 f5 e5 d5 c5 b5 a5} > -# r6 = {h6 g6 f6 e6 d6 c6 b6 a6} > -# r7 = {h7 g7 f7 e7 d7 c7 b7 a7} > -# > - > -.macro TRANSPOSE8 r0 r1 r2 r3 r4 r5 r6 r7 t0 t1 > - # process top half (r0..r3) {a...d} > - vshufps $0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4 b1 b0 a1 a0} > - vshufps $0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6 b3 b2 a3 a2} > - vshufps $0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4 d1 d0 c1 c0} > - vshufps $0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6 d3 d2 c3 c2} > - vshufps $0xDD, \t1, \t0, \r3 # r3 = {d5 c5 b5 a5 d1 c1 b1 a1} > - vshufps $0x88, \r2, \r0, \r1 # r1 = {d6 c6 b6 a6 d2 c2 b2 a2} > - vshufps $0xDD, \r2, \r0, \r0 # r0 = {d7 c7 b7 a7 d3 c3 b3 a3} > - vshufps $0x88, \t1, \t0, \t0 # t0 = {d4 c4 b4 a4 d0 c0 b0 a0} > - > - # use r2 in place of t0 > - # process bottom half (r4..r7) {e...h} > - vshufps $0x44, \r5, \r4, \r2 # r2 = {f5 f4 e5 e4 f1 f0 e1 e0} > - vshufps $0xEE, \r5, \r4, \r4 # r4 = {f7 f6 e7 e6 f3 f2 e3 e2} > - vshufps $0x44, \r7, \r6, \t1 # t1 = {h5 h4 g5 g4 h1 h0 g1 g0} > - vshufps $0xEE, \r7, \r6, \r6 # r6 = {h7 h6 g7 g6 h3 h2 g3 g2} > - vshufps $0xDD, \t1, \r2, \r7 # r7 = {h5 g5 f5 e5 h1 g1 f1 e1} > - vshufps $0x88, \r6, \r4, \r5 # r5 = {h6 g6 f6 e6 h2 g2 f2 e2} > - vshufps $0xDD, \r6, \r4, \r4 # r4 = {h7 g7 f7 e7 h3 g3 f3 e3} > - vshufps $0x88, \t1, \r2, \t1 # t1 = {h4 g4 f4 e4 h0 g0 f0 e0} > - > - vperm2f128 $0x13, \r1, \r5, \r6 # h6...a6 > - vperm2f128 $0x02, \r1, \r5, \r2 # h2...a2 > - vperm2f128 $0x13, \r3, \r7, \r5 # h5...a5 > - vperm2f128 $0x02, \r3, \r7, \r1 # h1...a1 > - vperm2f128 $0x13, \r0, \r4, \r7 # h7...a7 > - vperm2f128 $0x02, \r0, \r4, \r3 # h3...a3 > - vperm2f128 $0x13, \t0, \t1, \r4 # h4...a4 > - vperm2f128 $0x02, \t0, \t1, \r0 # h0...a0 > - > -.endm > - > -.macro ROTATE_ARGS > -TMP_ = h > -h = g > -g = f > -f = e > -e = d > -d = c > -c = b > -b = a > -a = TMP_ > -.endm > - > -.macro _PRORD reg imm tmp > - vpslld $(32-\imm),\reg,\tmp > - vpsrld $\imm,\reg, \reg > - vpor \tmp,\reg, \reg > -.endm > - > -# PRORD_nd reg, imm, tmp, src > -.macro _PRORD_nd reg imm tmp src > - vpslld $(32-\imm), \src, \tmp > - vpsrld $\imm, \src, \reg > - vpor \tmp, \reg, \reg > -.endm > - > -# PRORD dst/src, amt > -.macro PRORD reg imm > - _PRORD \reg,\imm,TMP > -.endm > - > -# PRORD_nd dst, src, amt > -.macro PRORD_nd reg tmp imm > - _PRORD_nd \reg, \imm, TMP, \tmp > -.endm > - > -# arguments passed implicitly in preprocessor symbols i, a...h > -.macro ROUND_00_15 _T1 i > - PRORD_nd a0,e,5 # sig1: a0 = (e >> 5) > - > - vpxor g, f, a2 # ch: a2 = f^g > - vpand e,a2, a2 # ch: a2 = (f^g)&e > - vpxor g, a2, a2 # a2 = ch > - > - PRORD_nd a1,e,25 # sig1: a1 = (e >> 25) > - > - vmovdqu \_T1,(SZ8*(\i & 0xf))(%rsp) > - vpaddd (TBL,ROUND,1), \_T1, \_T1 # T1 = W + K > - vpxor e,a0, a0 # sig1: a0 = e ^ (e >> 5) > - PRORD a0, 6 # sig1: a0 = (e >> 6) ^ (e >> 11) > - vpaddd a2, h, h # h = h + ch > - PRORD_nd a2,a,11 # sig0: a2 = (a >> 11) > - vpaddd \_T1,h, h # h = h + ch + W + K > - vpxor a1, a0, a0 # a0 = sigma1 > - PRORD_nd a1,a,22 # sig0: a1 = (a >> 22) > - vpxor c, a, \_T1 # maj: T1 = a^c > - add $SZ8, ROUND # ROUND++ > - vpand b, \_T1, \_T1 # maj: T1 = (a^c)&b > - vpaddd a0, h, h > - vpaddd h, d, d > - vpxor a, a2, a2 # sig0: a2 = a ^ (a >> 11) > - PRORD a2,2 # sig0: a2 = (a >> 2) ^ (a >> 13) > - vpxor a1, a2, a2 # a2 = sig0 > - vpand c, a, a1 # maj: a1 = a&c > - vpor \_T1, a1, a1 # a1 = maj > - vpaddd a1, h, h # h = h + ch + W + K + maj > - vpaddd a2, h, h # h = h + ch + W + K + maj + sigma0 > - ROTATE_ARGS > -.endm > - > -# arguments passed implicitly in preprocessor symbols i, a...h > -.macro ROUND_16_XX _T1 i > - vmovdqu (SZ8*((\i-15)&0xf))(%rsp), \_T1 > - vmovdqu (SZ8*((\i-2)&0xf))(%rsp), a1 > - vmovdqu \_T1, a0 > - PRORD \_T1,11 > - vmovdqu a1, a2 > - PRORD a1,2 > - vpxor a0, \_T1, \_T1 > - PRORD \_T1, 7 > - vpxor a2, a1, a1 > - PRORD a1, 17 > - vpsrld $3, a0, a0 > - vpxor a0, \_T1, \_T1 > - vpsrld $10, a2, a2 > - vpxor a2, a1, a1 > - vpaddd (SZ8*((\i-16)&0xf))(%rsp), \_T1, \_T1 > - vpaddd (SZ8*((\i-7)&0xf))(%rsp), a1, a1 > - vpaddd a1, \_T1, \_T1 > - > - ROUND_00_15 \_T1,\i > -.endm > - > -# SHA256_ARGS: > -# UINT128 digest[8]; // transposed digests > -# UINT8 *data_ptr[4]; > - > -# void sha256_x8_avx2(SHA256_ARGS *args, UINT64 bytes); > -# arg 1 : STATE : pointer to array of pointers to input data > -# arg 2 : INP_SIZE : size of input in blocks > - # general registers preserved in outer calling routine > - # outer calling routine saves all the XMM registers > - # save rsp, allocate 32-byte aligned for local variables > -ENTRY(sha256_x8_avx2) > - > - # save callee-saved clobbered registers to comply with C function ABI > - push %r12 > - push %r13 > - push %r14 > - push %r15 > - > - mov %rsp, IDX > - sub $FRAMESZ, %rsp > - and $~0x1F, %rsp > - mov IDX, _rsp(%rsp) > - > - # Load the pre-transposed incoming digest. > - vmovdqu 0*SHA256_DIGEST_ROW_SIZE(STATE),a > - vmovdqu 1*SHA256_DIGEST_ROW_SIZE(STATE),b > - vmovdqu 2*SHA256_DIGEST_ROW_SIZE(STATE),c > - vmovdqu 3*SHA256_DIGEST_ROW_SIZE(STATE),d > - vmovdqu 4*SHA256_DIGEST_ROW_SIZE(STATE),e > - vmovdqu 5*SHA256_DIGEST_ROW_SIZE(STATE),f > - vmovdqu 6*SHA256_DIGEST_ROW_SIZE(STATE),g > - vmovdqu 7*SHA256_DIGEST_ROW_SIZE(STATE),h > - > - lea K256_8(%rip),TBL > - > - # load the address of each of the 4 message lanes > - # getting ready to transpose input onto stack > - mov _args_data_ptr+0*PTR_SZ(STATE),inp0 > - mov _args_data_ptr+1*PTR_SZ(STATE),inp1 > - mov _args_data_ptr+2*PTR_SZ(STATE),inp2 > - mov _args_data_ptr+3*PTR_SZ(STATE),inp3 > - mov _args_data_ptr+4*PTR_SZ(STATE),inp4 > - mov _args_data_ptr+5*PTR_SZ(STATE),inp5 > - mov _args_data_ptr+6*PTR_SZ(STATE),inp6 > - mov _args_data_ptr+7*PTR_SZ(STATE),inp7 > - > - xor IDX, IDX > -lloop: > - xor ROUND, ROUND > - > - # save old digest > - vmovdqu a, _digest(%rsp) > - vmovdqu b, _digest+1*SZ8(%rsp) > - vmovdqu c, _digest+2*SZ8(%rsp) > - vmovdqu d, _digest+3*SZ8(%rsp) > - vmovdqu e, _digest+4*SZ8(%rsp) > - vmovdqu f, _digest+5*SZ8(%rsp) > - vmovdqu g, _digest+6*SZ8(%rsp) > - vmovdqu h, _digest+7*SZ8(%rsp) > - i = 0 > -.rep 2 > - VMOVPS i*32(inp0, IDX), TT0 > - VMOVPS i*32(inp1, IDX), TT1 > - VMOVPS i*32(inp2, IDX), TT2 > - VMOVPS i*32(inp3, IDX), TT3 > - VMOVPS i*32(inp4, IDX), TT4 > - VMOVPS i*32(inp5, IDX), TT5 > - VMOVPS i*32(inp6, IDX), TT6 > - VMOVPS i*32(inp7, IDX), TT7 > - vmovdqu g, _ytmp(%rsp) > - vmovdqu h, _ytmp+1*SZ8(%rsp) > - TRANSPOSE8 TT0, TT1, TT2, TT3, TT4, TT5, TT6, TT7, TMP0, TMP1 > - vmovdqu PSHUFFLE_BYTE_FLIP_MASK(%rip), TMP1 > - vmovdqu _ytmp(%rsp), g > - vpshufb TMP1, TT0, TT0 > - vpshufb TMP1, TT1, TT1 > - vpshufb TMP1, TT2, TT2 > - vpshufb TMP1, TT3, TT3 > - vpshufb TMP1, TT4, TT4 > - vpshufb TMP1, TT5, TT5 > - vpshufb TMP1, TT6, TT6 > - vpshufb TMP1, TT7, TT7 > - vmovdqu _ytmp+1*SZ8(%rsp), h > - vmovdqu TT4, _ytmp(%rsp) > - vmovdqu TT5, _ytmp+1*SZ8(%rsp) > - vmovdqu TT6, _ytmp+2*SZ8(%rsp) > - vmovdqu TT7, _ytmp+3*SZ8(%rsp) > - ROUND_00_15 TT0,(i*8+0) > - vmovdqu _ytmp(%rsp), TT0 > - ROUND_00_15 TT1,(i*8+1) > - vmovdqu _ytmp+1*SZ8(%rsp), TT1 > - ROUND_00_15 TT2,(i*8+2) > - vmovdqu _ytmp+2*SZ8(%rsp), TT2 > - ROUND_00_15 TT3,(i*8+3) > - vmovdqu _ytmp+3*SZ8(%rsp), TT3 > - ROUND_00_15 TT0,(i*8+4) > - ROUND_00_15 TT1,(i*8+5) > - ROUND_00_15 TT2,(i*8+6) > - ROUND_00_15 TT3,(i*8+7) > - i = (i+1) > -.endr > - add $64, IDX > - i = (i*8) > - > - jmp Lrounds_16_xx > -.align 16 > -Lrounds_16_xx: > -.rep 16 > - ROUND_16_XX T1, i > - i = (i+1) > -.endr > - > - cmp $ROUNDS,ROUND > - jb Lrounds_16_xx > - > - # add old digest > - vpaddd _digest+0*SZ8(%rsp), a, a > - vpaddd _digest+1*SZ8(%rsp), b, b > - vpaddd _digest+2*SZ8(%rsp), c, c > - vpaddd _digest+3*SZ8(%rsp), d, d > - vpaddd _digest+4*SZ8(%rsp), e, e > - vpaddd _digest+5*SZ8(%rsp), f, f > - vpaddd _digest+6*SZ8(%rsp), g, g > - vpaddd _digest+7*SZ8(%rsp), h, h > - > - sub $1, INP_SIZE # unit is blocks > - jne lloop > - > - # write back to memory (state object) the transposed digest > - vmovdqu a, 0*SHA256_DIGEST_ROW_SIZE(STATE) > - vmovdqu b, 1*SHA256_DIGEST_ROW_SIZE(STATE) > - vmovdqu c, 2*SHA256_DIGEST_ROW_SIZE(STATE) > - vmovdqu d, 3*SHA256_DIGEST_ROW_SIZE(STATE) > - vmovdqu e, 4*SHA256_DIGEST_ROW_SIZE(STATE) > - vmovdqu f, 5*SHA256_DIGEST_ROW_SIZE(STATE) > - vmovdqu g, 6*SHA256_DIGEST_ROW_SIZE(STATE) > - vmovdqu h, 7*SHA256_DIGEST_ROW_SIZE(STATE) > - > - # update input pointers > - add IDX, inp0 > - mov inp0, _args_data_ptr+0*8(STATE) > - add IDX, inp1 > - mov inp1, _args_data_ptr+1*8(STATE) > - add IDX, inp2 > - mov inp2, _args_data_ptr+2*8(STATE) > - add IDX, inp3 > - mov inp3, _args_data_ptr+3*8(STATE) > - add IDX, inp4 > - mov inp4, _args_data_ptr+4*8(STATE) > - add IDX, inp5 > - mov inp5, _args_data_ptr+5*8(STATE) > - add IDX, inp6 > - mov inp6, _args_data_ptr+6*8(STATE) > - add IDX, inp7 > - mov inp7, _args_data_ptr+7*8(STATE) > - > - # Postamble > - mov _rsp(%rsp), %rsp > - > - # restore callee-saved clobbered registers > - pop %r15 > - pop %r14 > - pop %r13 > - pop %r12 > - > - ret > -ENDPROC(sha256_x8_avx2) > - > -.section .rodata.K256_8, "a", @progbits > -.align 64 > -K256_8: > - .octa 0x428a2f98428a2f98428a2f98428a2f98 > - .octa 0x428a2f98428a2f98428a2f98428a2f98 > - .octa 0x71374491713744917137449171374491 > - .octa 0x71374491713744917137449171374491 > - .octa 0xb5c0fbcfb5c0fbcfb5c0fbcfb5c0fbcf > - .octa 0xb5c0fbcfb5c0fbcfb5c0fbcfb5c0fbcf > - .octa 0xe9b5dba5e9b5dba5e9b5dba5e9b5dba5 > - .octa 0xe9b5dba5e9b5dba5e9b5dba5e9b5dba5 > - .octa 0x3956c25b3956c25b3956c25b3956c25b > - .octa 0x3956c25b3956c25b3956c25b3956c25b > - .octa 0x59f111f159f111f159f111f159f111f1 > - .octa 0x59f111f159f111f159f111f159f111f1 > - .octa 0x923f82a4923f82a4923f82a4923f82a4 > - .octa 0x923f82a4923f82a4923f82a4923f82a4 > - .octa 0xab1c5ed5ab1c5ed5ab1c5ed5ab1c5ed5 > - .octa 0xab1c5ed5ab1c5ed5ab1c5ed5ab1c5ed5 > - .octa 0xd807aa98d807aa98d807aa98d807aa98 > - .octa 0xd807aa98d807aa98d807aa98d807aa98 > - .octa 0x12835b0112835b0112835b0112835b01 > - .octa 0x12835b0112835b0112835b0112835b01 > - .octa 0x243185be243185be243185be243185be > - .octa 0x243185be243185be243185be243185be > - .octa 0x550c7dc3550c7dc3550c7dc3550c7dc3 > - .octa 0x550c7dc3550c7dc3550c7dc3550c7dc3 > - .octa 0x72be5d7472be5d7472be5d7472be5d74 > - .octa 0x72be5d7472be5d7472be5d7472be5d74 > - .octa 0x80deb1fe80deb1fe80deb1fe80deb1fe > - .octa 0x80deb1fe80deb1fe80deb1fe80deb1fe > - .octa 0x9bdc06a79bdc06a79bdc06a79bdc06a7 > - .octa 0x9bdc06a79bdc06a79bdc06a79bdc06a7 > - .octa 0xc19bf174c19bf174c19bf174c19bf174 > - .octa 0xc19bf174c19bf174c19bf174c19bf174 > - .octa 0xe49b69c1e49b69c1e49b69c1e49b69c1 > - .octa 0xe49b69c1e49b69c1e49b69c1e49b69c1 > - .octa 0xefbe4786efbe4786efbe4786efbe4786 > - .octa 0xefbe4786efbe4786efbe4786efbe4786 > - .octa 0x0fc19dc60fc19dc60fc19dc60fc19dc6 > - .octa 0x0fc19dc60fc19dc60fc19dc60fc19dc6 > - .octa 0x240ca1cc240ca1cc240ca1cc240ca1cc > - .octa 0x240ca1cc240ca1cc240ca1cc240ca1cc > - .octa 0x2de92c6f2de92c6f2de92c6f2de92c6f > - .octa 0x2de92c6f2de92c6f2de92c6f2de92c6f > - .octa 0x4a7484aa4a7484aa4a7484aa4a7484aa > - .octa 0x4a7484aa4a7484aa4a7484aa4a7484aa > - .octa 0x5cb0a9dc5cb0a9dc5cb0a9dc5cb0a9dc > - .octa 0x5cb0a9dc5cb0a9dc5cb0a9dc5cb0a9dc > - .octa 0x76f988da76f988da76f988da76f988da > - .octa 0x76f988da76f988da76f988da76f988da > - .octa 0x983e5152983e5152983e5152983e5152 > - .octa 0x983e5152983e5152983e5152983e5152 > - .octa 0xa831c66da831c66da831c66da831c66d > - .octa 0xa831c66da831c66da831c66da831c66d > - .octa 0xb00327c8b00327c8b00327c8b00327c8 > - .octa 0xb00327c8b00327c8b00327c8b00327c8 > - .octa 0xbf597fc7bf597fc7bf597fc7bf597fc7 > - .octa 0xbf597fc7bf597fc7bf597fc7bf597fc7 > - .octa 0xc6e00bf3c6e00bf3c6e00bf3c6e00bf3 > - .octa 0xc6e00bf3c6e00bf3c6e00bf3c6e00bf3 > - .octa 0xd5a79147d5a79147d5a79147d5a79147 > - .octa 0xd5a79147d5a79147d5a79147d5a79147 > - .octa 0x06ca635106ca635106ca635106ca6351 > - .octa 0x06ca635106ca635106ca635106ca6351 > - .octa 0x14292967142929671429296714292967 > - .octa 0x14292967142929671429296714292967 > - .octa 0x27b70a8527b70a8527b70a8527b70a85 > - .octa 0x27b70a8527b70a8527b70a8527b70a85 > - .octa 0x2e1b21382e1b21382e1b21382e1b2138 > - .octa 0x2e1b21382e1b21382e1b21382e1b2138 > - .octa 0x4d2c6dfc4d2c6dfc4d2c6dfc4d2c6dfc > - .octa 0x4d2c6dfc4d2c6dfc4d2c6dfc4d2c6dfc > - .octa 0x53380d1353380d1353380d1353380d13 > - .octa 0x53380d1353380d1353380d1353380d13 > - .octa 0x650a7354650a7354650a7354650a7354 > - .octa 0x650a7354650a7354650a7354650a7354 > - .octa 0x766a0abb766a0abb766a0abb766a0abb > - .octa 0x766a0abb766a0abb766a0abb766a0abb > - .octa 0x81c2c92e81c2c92e81c2c92e81c2c92e > - .octa 0x81c2c92e81c2c92e81c2c92e81c2c92e > - .octa 0x92722c8592722c8592722c8592722c85 > - .octa 0x92722c8592722c8592722c8592722c85 > - .octa 0xa2bfe8a1a2bfe8a1a2bfe8a1a2bfe8a1 > - .octa 0xa2bfe8a1a2bfe8a1a2bfe8a1a2bfe8a1 > - .octa 0xa81a664ba81a664ba81a664ba81a664b > - .octa 0xa81a664ba81a664ba81a664ba81a664b > - .octa 0xc24b8b70c24b8b70c24b8b70c24b8b70 > - .octa 0xc24b8b70c24b8b70c24b8b70c24b8b70 > - .octa 0xc76c51a3c76c51a3c76c51a3c76c51a3 > - .octa 0xc76c51a3c76c51a3c76c51a3c76c51a3 > - .octa 0xd192e819d192e819d192e819d192e819 > - .octa 0xd192e819d192e819d192e819d192e819 > - .octa 0xd6990624d6990624d6990624d6990624 > - .octa 0xd6990624d6990624d6990624d6990624 > - .octa 0xf40e3585f40e3585f40e3585f40e3585 > - .octa 0xf40e3585f40e3585f40e3585f40e3585 > - .octa 0x106aa070106aa070106aa070106aa070 > - .octa 0x106aa070106aa070106aa070106aa070 > - .octa 0x19a4c11619a4c11619a4c11619a4c116 > - .octa 0x19a4c11619a4c11619a4c11619a4c116 > - .octa 0x1e376c081e376c081e376c081e376c08 > - .octa 0x1e376c081e376c081e376c081e376c08 > - .octa 0x2748774c2748774c2748774c2748774c > - .octa 0x2748774c2748774c2748774c2748774c > - .octa 0x34b0bcb534b0bcb534b0bcb534b0bcb5 > - .octa 0x34b0bcb534b0bcb534b0bcb534b0bcb5 > - .octa 0x391c0cb3391c0cb3391c0cb3391c0cb3 > - .octa 0x391c0cb3391c0cb3391c0cb3391c0cb3 > - .octa 0x4ed8aa4a4ed8aa4a4ed8aa4a4ed8aa4a > - .octa 0x4ed8aa4a4ed8aa4a4ed8aa4a4ed8aa4a > - .octa 0x5b9cca4f5b9cca4f5b9cca4f5b9cca4f > - .octa 0x5b9cca4f5b9cca4f5b9cca4f5b9cca4f > - .octa 0x682e6ff3682e6ff3682e6ff3682e6ff3 > - .octa 0x682e6ff3682e6ff3682e6ff3682e6ff3 > - .octa 0x748f82ee748f82ee748f82ee748f82ee > - .octa 0x748f82ee748f82ee748f82ee748f82ee > - .octa 0x78a5636f78a5636f78a5636f78a5636f > - .octa 0x78a5636f78a5636f78a5636f78a5636f > - .octa 0x84c8781484c8781484c8781484c87814 > - .octa 0x84c8781484c8781484c8781484c87814 > - .octa 0x8cc702088cc702088cc702088cc70208 > - .octa 0x8cc702088cc702088cc702088cc70208 > - .octa 0x90befffa90befffa90befffa90befffa > - .octa 0x90befffa90befffa90befffa90befffa > - .octa 0xa4506ceba4506ceba4506ceba4506ceb > - .octa 0xa4506ceba4506ceba4506ceba4506ceb > - .octa 0xbef9a3f7bef9a3f7bef9a3f7bef9a3f7 > - .octa 0xbef9a3f7bef9a3f7bef9a3f7bef9a3f7 > - .octa 0xc67178f2c67178f2c67178f2c67178f2 > - .octa 0xc67178f2c67178f2c67178f2c67178f2 > - > -.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32 > -.align 32 > -PSHUFFLE_BYTE_FLIP_MASK: > -.octa 0x0c0d0e0f08090a0b0405060700010203 > -.octa 0x0c0d0e0f08090a0b0405060700010203 > - > -.section .rodata.cst256.K256, "aM", @progbits, 256 > -.align 64 > -.global K256 > -K256: > - .int 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 > - .int 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 > - .int 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 > - .int 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 > - .int 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc > - .int 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da > - .int 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 > - .int 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 > - .int 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 > - .int 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 > - .int 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 > - .int 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 > - .int 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 > - .int 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 > - .int 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 > - .int 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 > diff --git a/arch/x86/crypto/sha512-mb/Makefile b/arch/x86/crypto/sha512-mb/Makefile > deleted file mode 100644 > index 90f1ef69152e..000000000000 > --- a/arch/x86/crypto/sha512-mb/Makefile > +++ /dev/null > @@ -1,12 +0,0 @@ > -# SPDX-License-Identifier: GPL-2.0 > -# > -# Arch-specific CryptoAPI modules. > -# > - > -avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\ > - $(comma)4)$(comma)%ymm2,yes,no) > -ifeq ($(avx2_supported),yes) > - obj-$(CONFIG_CRYPTO_SHA512_MB) += sha512-mb.o > - sha512-mb-y := sha512_mb.o sha512_mb_mgr_flush_avx2.o \ > - sha512_mb_mgr_init_avx2.o sha512_mb_mgr_submit_avx2.o sha512_x4_avx2.o > -endif > diff --git a/arch/x86/crypto/sha512-mb/sha512_mb.c b/arch/x86/crypto/sha512-mb/sha512_mb.c > deleted file mode 100644 > index 26b85678012d..000000000000 > --- a/arch/x86/crypto/sha512-mb/sha512_mb.c > +++ /dev/null > @@ -1,1047 +0,0 @@ > -/* > - * Multi buffer SHA512 algorithm Glue Code > - * > - * This file is provided under a dual BSD/GPLv2 license. When using or > - * redistributing this file, you may do so under either license. > - * > - * GPL LICENSE SUMMARY > - * > - * Copyright(c) 2016 Intel Corporation. > - * > - * This program is free software; you can redistribute it and/or modify > - * it under the terms of version 2 of the GNU General Public License as > - * published by the Free Software Foundation. > - * > - * This program is distributed in the hope that it will be useful, but > - * WITHOUT ANY WARRANTY; without even the implied warranty of > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > - * General Public License for more details. > - * > - * Contact Information: > - * Megha Dey <megha.dey@xxxxxxxxxxxxxxx> > - * > - * BSD LICENSE > - * > - * Copyright(c) 2016 Intel Corporation. > - * > - * Redistribution and use in source and binary forms, with or without > - * modification, are permitted provided that the following conditions > - * are met: > - * > - * * Redistributions of source code must retain the above copyright > - * notice, this list of conditions and the following disclaimer. > - * * Redistributions in binary form must reproduce the above copyright > - * notice, this list of conditions and the following disclaimer in > - * the documentation and/or other materials provided with the > - * distribution. > - * * Neither the name of Intel Corporation nor the names of its > - * contributors may be used to endorse or promote products derived > - * from this software without specific prior written permission. > - * > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > - */ > - > -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt > - > -#include <crypto/internal/hash.h> > -#include <linux/init.h> > -#include <linux/module.h> > -#include <linux/mm.h> > -#include <linux/cryptohash.h> > -#include <linux/types.h> > -#include <linux/list.h> > -#include <crypto/scatterwalk.h> > -#include <crypto/sha.h> > -#include <crypto/mcryptd.h> > -#include <crypto/crypto_wq.h> > -#include <asm/byteorder.h> > -#include <linux/hardirq.h> > -#include <asm/fpu/api.h> > -#include "sha512_mb_ctx.h" > - > -#define FLUSH_INTERVAL 1000 /* in usec */ > - > -static struct mcryptd_alg_state sha512_mb_alg_state; > - > -struct sha512_mb_ctx { > - struct mcryptd_ahash *mcryptd_tfm; > -}; > - > -static inline struct mcryptd_hash_request_ctx > - *cast_hash_to_mcryptd_ctx(struct sha512_hash_ctx *hash_ctx) > -{ > - struct ahash_request *areq; > - > - areq = container_of((void *) hash_ctx, struct ahash_request, __ctx); > - return container_of(areq, struct mcryptd_hash_request_ctx, areq); > -} > - > -static inline struct ahash_request > - *cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx) > -{ > - return container_of((void *) ctx, struct ahash_request, __ctx); > -} > - > -static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx, > - struct ahash_request *areq) > -{ > - rctx->flag = HASH_UPDATE; > -} > - > -static asmlinkage void (*sha512_job_mgr_init)(struct sha512_mb_mgr *state); > -static asmlinkage struct job_sha512* (*sha512_job_mgr_submit) > - (struct sha512_mb_mgr *state, > - struct job_sha512 *job); > -static asmlinkage struct job_sha512* (*sha512_job_mgr_flush) > - (struct sha512_mb_mgr *state); > -static asmlinkage struct job_sha512* (*sha512_job_mgr_get_comp_job) > - (struct sha512_mb_mgr *state); > - > -inline uint32_t sha512_pad(uint8_t padblock[SHA512_BLOCK_SIZE * 2], > - uint64_t total_len) > -{ > - uint32_t i = total_len & (SHA512_BLOCK_SIZE - 1); > - > - memset(&padblock[i], 0, SHA512_BLOCK_SIZE); > - padblock[i] = 0x80; > - > - i += ((SHA512_BLOCK_SIZE - 1) & > - (0 - (total_len + SHA512_PADLENGTHFIELD_SIZE + 1))) > - + 1 + SHA512_PADLENGTHFIELD_SIZE; > - > -#if SHA512_PADLENGTHFIELD_SIZE == 16 > - *((uint64_t *) &padblock[i - 16]) = 0; > -#endif > - > - *((uint64_t *) &padblock[i - 8]) = cpu_to_be64(total_len << 3); > - > - /* Number of extra blocks to hash */ > - return i >> SHA512_LOG2_BLOCK_SIZE; > -} > - > -static struct sha512_hash_ctx *sha512_ctx_mgr_resubmit > - (struct sha512_ctx_mgr *mgr, struct sha512_hash_ctx *ctx) > -{ > - while (ctx) { > - if (ctx->status & HASH_CTX_STS_COMPLETE) { > - /* Clear PROCESSING bit */ > - ctx->status = HASH_CTX_STS_COMPLETE; > - return ctx; > - } > - > - /* > - * If the extra blocks are empty, begin hashing what remains > - * in the user's buffer. > - */ > - if (ctx->partial_block_buffer_length == 0 && > - ctx->incoming_buffer_length) { > - > - const void *buffer = ctx->incoming_buffer; > - uint32_t len = ctx->incoming_buffer_length; > - uint32_t copy_len; > - > - /* > - * Only entire blocks can be hashed. > - * Copy remainder to extra blocks buffer. > - */ > - copy_len = len & (SHA512_BLOCK_SIZE-1); > - > - if (copy_len) { > - len -= copy_len; > - memcpy(ctx->partial_block_buffer, > - ((const char *) buffer + len), > - copy_len); > - ctx->partial_block_buffer_length = copy_len; > - } > - > - ctx->incoming_buffer_length = 0; > - > - /* len should be a multiple of the block size now */ > - assert((len % SHA512_BLOCK_SIZE) == 0); > - > - /* Set len to the number of blocks to be hashed */ > - len >>= SHA512_LOG2_BLOCK_SIZE; > - > - if (len) { > - > - ctx->job.buffer = (uint8_t *) buffer; > - ctx->job.len = len; > - ctx = (struct sha512_hash_ctx *) > - sha512_job_mgr_submit(&mgr->mgr, > - &ctx->job); > - continue; > - } > - } > - > - /* > - * If the extra blocks are not empty, then we are > - * either on the last block(s) or we need more > - * user input before continuing. > - */ > - if (ctx->status & HASH_CTX_STS_LAST) { > - > - uint8_t *buf = ctx->partial_block_buffer; > - uint32_t n_extra_blocks = > - sha512_pad(buf, ctx->total_length); > - > - ctx->status = (HASH_CTX_STS_PROCESSING | > - HASH_CTX_STS_COMPLETE); > - ctx->job.buffer = buf; > - ctx->job.len = (uint32_t) n_extra_blocks; > - ctx = (struct sha512_hash_ctx *) > - sha512_job_mgr_submit(&mgr->mgr, &ctx->job); > - continue; > - } > - > - if (ctx) > - ctx->status = HASH_CTX_STS_IDLE; > - return ctx; > - } > - > - return NULL; > -} > - > -static struct sha512_hash_ctx > - *sha512_ctx_mgr_get_comp_ctx(struct mcryptd_alg_cstate *cstate) > -{ > - /* > - * If get_comp_job returns NULL, there are no jobs complete. > - * If get_comp_job returns a job, verify that it is safe to return to > - * the user. > - * If it is not ready, resubmit the job to finish processing. > - * If sha512_ctx_mgr_resubmit returned a job, it is ready to be > - * returned. > - * Otherwise, all jobs currently being managed by the hash_ctx_mgr > - * still need processing. > - */ > - struct sha512_ctx_mgr *mgr; > - struct sha512_hash_ctx *ctx; > - unsigned long flags; > - > - mgr = cstate->mgr; > - spin_lock_irqsave(&cstate->work_lock, flags); > - ctx = (struct sha512_hash_ctx *) > - sha512_job_mgr_get_comp_job(&mgr->mgr); > - ctx = sha512_ctx_mgr_resubmit(mgr, ctx); > - spin_unlock_irqrestore(&cstate->work_lock, flags); > - return ctx; > -} > - > -static void sha512_ctx_mgr_init(struct sha512_ctx_mgr *mgr) > -{ > - sha512_job_mgr_init(&mgr->mgr); > -} > - > -static struct sha512_hash_ctx > - *sha512_ctx_mgr_submit(struct mcryptd_alg_cstate *cstate, > - struct sha512_hash_ctx *ctx, > - const void *buffer, > - uint32_t len, > - int flags) > -{ > - struct sha512_ctx_mgr *mgr; > - unsigned long irqflags; > - > - mgr = cstate->mgr; > - spin_lock_irqsave(&cstate->work_lock, irqflags); > - if (flags & ~(HASH_UPDATE | HASH_LAST)) { > - /* User should not pass anything other than UPDATE or LAST */ > - ctx->error = HASH_CTX_ERROR_INVALID_FLAGS; > - goto unlock; > - } > - > - if (ctx->status & HASH_CTX_STS_PROCESSING) { > - /* Cannot submit to a currently processing job. */ > - ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING; > - goto unlock; > - } > - > - if (ctx->status & HASH_CTX_STS_COMPLETE) { > - /* Cannot update a finished job. */ > - ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED; > - goto unlock; > - } > - > - /* > - * If we made it here, there were no errors during this call to > - * submit > - */ > - ctx->error = HASH_CTX_ERROR_NONE; > - > - /* Store buffer ptr info from user */ > - ctx->incoming_buffer = buffer; > - ctx->incoming_buffer_length = len; > - > - /* > - * Store the user's request flags and mark this ctx as currently being > - * processed. > - */ > - ctx->status = (flags & HASH_LAST) ? > - (HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) : > - HASH_CTX_STS_PROCESSING; > - > - /* Advance byte counter */ > - ctx->total_length += len; > - > - /* > - * If there is anything currently buffered in the extra blocks, > - * append to it until it contains a whole block. > - * Or if the user's buffer contains less than a whole block, > - * append as much as possible to the extra block. > - */ > - if (ctx->partial_block_buffer_length || len < SHA512_BLOCK_SIZE) { > - /* Compute how many bytes to copy from user buffer into extra > - * block > - */ > - uint32_t copy_len = SHA512_BLOCK_SIZE - > - ctx->partial_block_buffer_length; > - if (len < copy_len) > - copy_len = len; > - > - if (copy_len) { > - /* Copy and update relevant pointers and counters */ > - memcpy > - (&ctx->partial_block_buffer[ctx->partial_block_buffer_length], > - buffer, copy_len); > - > - ctx->partial_block_buffer_length += copy_len; > - ctx->incoming_buffer = (const void *) > - ((const char *)buffer + copy_len); > - ctx->incoming_buffer_length = len - copy_len; > - } > - > - /* The extra block should never contain more than 1 block > - * here > - */ > - assert(ctx->partial_block_buffer_length <= SHA512_BLOCK_SIZE); > - > - /* If the extra block buffer contains exactly 1 block, it can > - * be hashed. > - */ > - if (ctx->partial_block_buffer_length >= SHA512_BLOCK_SIZE) { > - ctx->partial_block_buffer_length = 0; > - > - ctx->job.buffer = ctx->partial_block_buffer; > - ctx->job.len = 1; > - ctx = (struct sha512_hash_ctx *) > - sha512_job_mgr_submit(&mgr->mgr, &ctx->job); > - } > - } > - > - ctx = sha512_ctx_mgr_resubmit(mgr, ctx); > -unlock: > - spin_unlock_irqrestore(&cstate->work_lock, irqflags); > - return ctx; > -} > - > -static struct sha512_hash_ctx *sha512_ctx_mgr_flush(struct mcryptd_alg_cstate *cstate) > -{ > - struct sha512_ctx_mgr *mgr; > - struct sha512_hash_ctx *ctx; > - unsigned long flags; > - > - mgr = cstate->mgr; > - spin_lock_irqsave(&cstate->work_lock, flags); > - while (1) { > - ctx = (struct sha512_hash_ctx *) > - sha512_job_mgr_flush(&mgr->mgr); > - > - /* If flush returned 0, there are no more jobs in flight. */ > - if (!ctx) > - break; > - > - /* > - * If flush returned a job, resubmit the job to finish > - * processing. > - */ > - ctx = sha512_ctx_mgr_resubmit(mgr, ctx); > - > - /* > - * If sha512_ctx_mgr_resubmit returned a job, it is ready to > - * be returned. Otherwise, all jobs currently being managed by > - * the sha512_ctx_mgr still need processing. Loop. > - */ > - if (ctx) > - break; > - } > - spin_unlock_irqrestore(&cstate->work_lock, flags); > - return ctx; > -} > - > -static int sha512_mb_init(struct ahash_request *areq) > -{ > - struct sha512_hash_ctx *sctx = ahash_request_ctx(areq); > - > - hash_ctx_init(sctx); > - sctx->job.result_digest[0] = SHA512_H0; > - sctx->job.result_digest[1] = SHA512_H1; > - sctx->job.result_digest[2] = SHA512_H2; > - sctx->job.result_digest[3] = SHA512_H3; > - sctx->job.result_digest[4] = SHA512_H4; > - sctx->job.result_digest[5] = SHA512_H5; > - sctx->job.result_digest[6] = SHA512_H6; > - sctx->job.result_digest[7] = SHA512_H7; > - sctx->total_length = 0; > - sctx->partial_block_buffer_length = 0; > - sctx->status = HASH_CTX_STS_IDLE; > - > - return 0; > -} > - > -static int sha512_mb_set_results(struct mcryptd_hash_request_ctx *rctx) > -{ > - int i; > - struct sha512_hash_ctx *sctx = ahash_request_ctx(&rctx->areq); > - __be64 *dst = (__be64 *) rctx->out; > - > - for (i = 0; i < 8; ++i) > - dst[i] = cpu_to_be64(sctx->job.result_digest[i]); > - > - return 0; > -} > - > -static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx, > - struct mcryptd_alg_cstate *cstate, bool flush) > -{ > - int flag = HASH_UPDATE; > - int nbytes, err = 0; > - struct mcryptd_hash_request_ctx *rctx = *ret_rctx; > - struct sha512_hash_ctx *sha_ctx; > - > - /* more work ? */ > - while (!(rctx->flag & HASH_DONE)) { > - nbytes = crypto_ahash_walk_done(&rctx->walk, 0); > - if (nbytes < 0) { > - err = nbytes; > - goto out; > - } > - /* check if the walk is done */ > - if (crypto_ahash_walk_last(&rctx->walk)) { > - rctx->flag |= HASH_DONE; > - if (rctx->flag & HASH_FINAL) > - flag |= HASH_LAST; > - > - } > - sha_ctx = (struct sha512_hash_ctx *) > - ahash_request_ctx(&rctx->areq); > - kernel_fpu_begin(); > - sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, > - rctx->walk.data, nbytes, flag); > - if (!sha_ctx) { > - if (flush) > - sha_ctx = sha512_ctx_mgr_flush(cstate); > - } > - kernel_fpu_end(); > - if (sha_ctx) > - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); > - else { > - rctx = NULL; > - goto out; > - } > - } > - > - /* copy the results */ > - if (rctx->flag & HASH_FINAL) > - sha512_mb_set_results(rctx); > - > -out: > - *ret_rctx = rctx; > - return err; > -} > - > -static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx, > - struct mcryptd_alg_cstate *cstate, > - int err) > -{ > - struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); > - struct sha512_hash_ctx *sha_ctx; > - struct mcryptd_hash_request_ctx *req_ctx; > - int ret; > - unsigned long flags; > - > - /* remove from work list */ > - spin_lock_irqsave(&cstate->work_lock, flags); > - list_del(&rctx->waiter); > - spin_unlock_irqrestore(&cstate->work_lock, flags); > - > - if (irqs_disabled()) > - rctx->complete(&req->base, err); > - else { > - local_bh_disable(); > - rctx->complete(&req->base, err); > - local_bh_enable(); > - } > - > - /* check to see if there are other jobs that are done */ > - sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate); > - while (sha_ctx) { > - req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx); > - ret = sha_finish_walk(&req_ctx, cstate, false); > - if (req_ctx) { > - spin_lock_irqsave(&cstate->work_lock, flags); > - list_del(&req_ctx->waiter); > - spin_unlock_irqrestore(&cstate->work_lock, flags); > - > - req = cast_mcryptd_ctx_to_req(req_ctx); > - if (irqs_disabled()) > - req_ctx->complete(&req->base, ret); > - else { > - local_bh_disable(); > - req_ctx->complete(&req->base, ret); > - local_bh_enable(); > - } > - } > - sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate); > - } > - > - return 0; > -} > - > -static void sha512_mb_add_list(struct mcryptd_hash_request_ctx *rctx, > - struct mcryptd_alg_cstate *cstate) > -{ > - unsigned long next_flush; > - unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL); > - unsigned long flags; > - > - /* initialize tag */ > - rctx->tag.arrival = jiffies; /* tag the arrival time */ > - rctx->tag.seq_num = cstate->next_seq_num++; > - next_flush = rctx->tag.arrival + delay; > - rctx->tag.expire = next_flush; > - > - spin_lock_irqsave(&cstate->work_lock, flags); > - list_add_tail(&rctx->waiter, &cstate->work_list); > - spin_unlock_irqrestore(&cstate->work_lock, flags); > - > - mcryptd_arm_flusher(cstate, delay); > -} > - > -static int sha512_mb_update(struct ahash_request *areq) > -{ > - struct mcryptd_hash_request_ctx *rctx = > - container_of(areq, struct mcryptd_hash_request_ctx, > - areq); > - struct mcryptd_alg_cstate *cstate = > - this_cpu_ptr(sha512_mb_alg_state.alg_cstate); > - > - struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); > - struct sha512_hash_ctx *sha_ctx; > - int ret = 0, nbytes; > - > - > - /* sanity check */ > - if (rctx->tag.cpu != smp_processor_id()) { > - pr_err("mcryptd error: cpu clash\n"); > - goto done; > - } > - > - /* need to init context */ > - req_ctx_init(rctx, areq); > - > - nbytes = crypto_ahash_walk_first(req, &rctx->walk); > - > - if (nbytes < 0) { > - ret = nbytes; > - goto done; > - } > - > - if (crypto_ahash_walk_last(&rctx->walk)) > - rctx->flag |= HASH_DONE; > - > - /* submit */ > - sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq); > - sha512_mb_add_list(rctx, cstate); > - kernel_fpu_begin(); > - sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, rctx->walk.data, > - nbytes, HASH_UPDATE); > - kernel_fpu_end(); > - > - /* check if anything is returned */ > - if (!sha_ctx) > - return -EINPROGRESS; > - > - if (sha_ctx->error) { > - ret = sha_ctx->error; > - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); > - goto done; > - } > - > - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); > - ret = sha_finish_walk(&rctx, cstate, false); > - > - if (!rctx) > - return -EINPROGRESS; > -done: > - sha_complete_job(rctx, cstate, ret); > - return ret; > -} > - > -static int sha512_mb_finup(struct ahash_request *areq) > -{ > - struct mcryptd_hash_request_ctx *rctx = > - container_of(areq, struct mcryptd_hash_request_ctx, > - areq); > - struct mcryptd_alg_cstate *cstate = > - this_cpu_ptr(sha512_mb_alg_state.alg_cstate); > - > - struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); > - struct sha512_hash_ctx *sha_ctx; > - int ret = 0, flag = HASH_UPDATE, nbytes; > - > - /* sanity check */ > - if (rctx->tag.cpu != smp_processor_id()) { > - pr_err("mcryptd error: cpu clash\n"); > - goto done; > - } > - > - /* need to init context */ > - req_ctx_init(rctx, areq); > - > - nbytes = crypto_ahash_walk_first(req, &rctx->walk); > - > - if (nbytes < 0) { > - ret = nbytes; > - goto done; > - } > - > - if (crypto_ahash_walk_last(&rctx->walk)) { > - rctx->flag |= HASH_DONE; > - flag = HASH_LAST; > - } > - > - /* submit */ > - rctx->flag |= HASH_FINAL; > - sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq); > - sha512_mb_add_list(rctx, cstate); > - > - kernel_fpu_begin(); > - sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, rctx->walk.data, > - nbytes, flag); > - kernel_fpu_end(); > - > - /* check if anything is returned */ > - if (!sha_ctx) > - return -EINPROGRESS; > - > - if (sha_ctx->error) { > - ret = sha_ctx->error; > - goto done; > - } > - > - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); > - ret = sha_finish_walk(&rctx, cstate, false); > - if (!rctx) > - return -EINPROGRESS; > -done: > - sha_complete_job(rctx, cstate, ret); > - return ret; > -} > - > -static int sha512_mb_final(struct ahash_request *areq) > -{ > - struct mcryptd_hash_request_ctx *rctx = > - container_of(areq, struct mcryptd_hash_request_ctx, > - areq); > - struct mcryptd_alg_cstate *cstate = > - this_cpu_ptr(sha512_mb_alg_state.alg_cstate); > - > - struct sha512_hash_ctx *sha_ctx; > - int ret = 0; > - u8 data; > - > - /* sanity check */ > - if (rctx->tag.cpu != smp_processor_id()) { > - pr_err("mcryptd error: cpu clash\n"); > - goto done; > - } > - > - /* need to init context */ > - req_ctx_init(rctx, areq); > - > - rctx->flag |= HASH_DONE | HASH_FINAL; > - > - sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq); > - /* flag HASH_FINAL and 0 data size */ > - sha512_mb_add_list(rctx, cstate); > - kernel_fpu_begin(); > - sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, &data, 0, HASH_LAST); > - kernel_fpu_end(); > - > - /* check if anything is returned */ > - if (!sha_ctx) > - return -EINPROGRESS; > - > - if (sha_ctx->error) { > - ret = sha_ctx->error; > - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); > - goto done; > - } > - > - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); > - ret = sha_finish_walk(&rctx, cstate, false); > - if (!rctx) > - return -EINPROGRESS; > -done: > - sha_complete_job(rctx, cstate, ret); > - return ret; > -} > - > -static int sha512_mb_export(struct ahash_request *areq, void *out) > -{ > - struct sha512_hash_ctx *sctx = ahash_request_ctx(areq); > - > - memcpy(out, sctx, sizeof(*sctx)); > - > - return 0; > -} > - > -static int sha512_mb_import(struct ahash_request *areq, const void *in) > -{ > - struct sha512_hash_ctx *sctx = ahash_request_ctx(areq); > - > - memcpy(sctx, in, sizeof(*sctx)); > - > - return 0; > -} > - > -static int sha512_mb_async_init_tfm(struct crypto_tfm *tfm) > -{ > - struct mcryptd_ahash *mcryptd_tfm; > - struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm); > - struct mcryptd_hash_ctx *mctx; > - > - mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha512-mb", > - CRYPTO_ALG_INTERNAL, > - CRYPTO_ALG_INTERNAL); > - if (IS_ERR(mcryptd_tfm)) > - return PTR_ERR(mcryptd_tfm); > - mctx = crypto_ahash_ctx(&mcryptd_tfm->base); > - mctx->alg_state = &sha512_mb_alg_state; > - ctx->mcryptd_tfm = mcryptd_tfm; > - crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), > - sizeof(struct ahash_request) + > - crypto_ahash_reqsize(&mcryptd_tfm->base)); > - > - return 0; > -} > - > -static void sha512_mb_async_exit_tfm(struct crypto_tfm *tfm) > -{ > - struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm); > - > - mcryptd_free_ahash(ctx->mcryptd_tfm); > -} > - > -static int sha512_mb_areq_init_tfm(struct crypto_tfm *tfm) > -{ > - crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), > - sizeof(struct ahash_request) + > - sizeof(struct sha512_hash_ctx)); > - > - return 0; > -} > - > -static void sha512_mb_areq_exit_tfm(struct crypto_tfm *tfm) > -{ > - struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm); > - > - mcryptd_free_ahash(ctx->mcryptd_tfm); > -} > - > -static struct ahash_alg sha512_mb_areq_alg = { > - .init = sha512_mb_init, > - .update = sha512_mb_update, > - .final = sha512_mb_final, > - .finup = sha512_mb_finup, > - .export = sha512_mb_export, > - .import = sha512_mb_import, > - .halg = { > - .digestsize = SHA512_DIGEST_SIZE, > - .statesize = sizeof(struct sha512_hash_ctx), > - .base = { > - .cra_name = "__sha512-mb", > - .cra_driver_name = "__intel_sha512-mb", > - .cra_priority = 100, > - /* > - * use ASYNC flag as some buffers in multi-buffer > - * algo may not have completed before hashing thread > - * sleep > - */ > - .cra_flags = CRYPTO_ALG_ASYNC | > - CRYPTO_ALG_INTERNAL, > - .cra_blocksize = SHA512_BLOCK_SIZE, > - .cra_module = THIS_MODULE, > - .cra_list = LIST_HEAD_INIT > - (sha512_mb_areq_alg.halg.base.cra_list), > - .cra_init = sha512_mb_areq_init_tfm, > - .cra_exit = sha512_mb_areq_exit_tfm, > - .cra_ctxsize = sizeof(struct sha512_hash_ctx), > - } > - } > -}; > - > -static int sha512_mb_async_init(struct ahash_request *req) > -{ > - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); > - struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm); > - struct ahash_request *mcryptd_req = ahash_request_ctx(req); > - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; > - > - memcpy(mcryptd_req, req, sizeof(*req)); > - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); > - return crypto_ahash_init(mcryptd_req); > -} > - > -static int sha512_mb_async_update(struct ahash_request *req) > -{ > - struct ahash_request *mcryptd_req = ahash_request_ctx(req); > - > - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); > - struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm); > - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; > - > - memcpy(mcryptd_req, req, sizeof(*req)); > - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); > - return crypto_ahash_update(mcryptd_req); > -} > - > -static int sha512_mb_async_finup(struct ahash_request *req) > -{ > - struct ahash_request *mcryptd_req = ahash_request_ctx(req); > - > - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); > - struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm); > - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; > - > - memcpy(mcryptd_req, req, sizeof(*req)); > - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); > - return crypto_ahash_finup(mcryptd_req); > -} > - > -static int sha512_mb_async_final(struct ahash_request *req) > -{ > - struct ahash_request *mcryptd_req = ahash_request_ctx(req); > - > - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); > - struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm); > - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; > - > - memcpy(mcryptd_req, req, sizeof(*req)); > - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); > - return crypto_ahash_final(mcryptd_req); > -} > - > -static int sha512_mb_async_digest(struct ahash_request *req) > -{ > - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); > - struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm); > - struct ahash_request *mcryptd_req = ahash_request_ctx(req); > - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; > - > - memcpy(mcryptd_req, req, sizeof(*req)); > - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); > - return crypto_ahash_digest(mcryptd_req); > -} > - > -static int sha512_mb_async_export(struct ahash_request *req, void *out) > -{ > - struct ahash_request *mcryptd_req = ahash_request_ctx(req); > - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); > - struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm); > - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; > - > - memcpy(mcryptd_req, req, sizeof(*req)); > - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); > - return crypto_ahash_export(mcryptd_req, out); > -} > - > -static int sha512_mb_async_import(struct ahash_request *req, const void *in) > -{ > - struct ahash_request *mcryptd_req = ahash_request_ctx(req); > - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); > - struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm); > - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; > - struct crypto_ahash *child = mcryptd_ahash_child(mcryptd_tfm); > - struct mcryptd_hash_request_ctx *rctx; > - struct ahash_request *areq; > - > - memcpy(mcryptd_req, req, sizeof(*req)); > - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); > - rctx = ahash_request_ctx(mcryptd_req); > - > - areq = &rctx->areq; > - > - ahash_request_set_tfm(areq, child); > - ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_SLEEP, > - rctx->complete, req); > - > - return crypto_ahash_import(mcryptd_req, in); > -} > - > -static struct ahash_alg sha512_mb_async_alg = { > - .init = sha512_mb_async_init, > - .update = sha512_mb_async_update, > - .final = sha512_mb_async_final, > - .finup = sha512_mb_async_finup, > - .digest = sha512_mb_async_digest, > - .export = sha512_mb_async_export, > - .import = sha512_mb_async_import, > - .halg = { > - .digestsize = SHA512_DIGEST_SIZE, > - .statesize = sizeof(struct sha512_hash_ctx), > - .base = { > - .cra_name = "sha512", > - .cra_driver_name = "sha512_mb", > - /* > - * Low priority, since with few concurrent hash requests > - * this is extremely slow due to the flush delay. Users > - * whose workloads would benefit from this can request > - * it explicitly by driver name, or can increase its > - * priority at runtime using NETLINK_CRYPTO. > - */ > - .cra_priority = 50, > - .cra_flags = CRYPTO_ALG_ASYNC, > - .cra_blocksize = SHA512_BLOCK_SIZE, > - .cra_module = THIS_MODULE, > - .cra_list = LIST_HEAD_INIT > - (sha512_mb_async_alg.halg.base.cra_list), > - .cra_init = sha512_mb_async_init_tfm, > - .cra_exit = sha512_mb_async_exit_tfm, > - .cra_ctxsize = sizeof(struct sha512_mb_ctx), > - .cra_alignmask = 0, > - }, > - }, > -}; > - > -static unsigned long sha512_mb_flusher(struct mcryptd_alg_cstate *cstate) > -{ > - struct mcryptd_hash_request_ctx *rctx; > - unsigned long cur_time; > - unsigned long next_flush = 0; > - struct sha512_hash_ctx *sha_ctx; > - > - > - cur_time = jiffies; > - > - while (!list_empty(&cstate->work_list)) { > - rctx = list_entry(cstate->work_list.next, > - struct mcryptd_hash_request_ctx, waiter); > - if time_before(cur_time, rctx->tag.expire) > - break; > - kernel_fpu_begin(); > - sha_ctx = (struct sha512_hash_ctx *) > - sha512_ctx_mgr_flush(cstate); > - kernel_fpu_end(); > - if (!sha_ctx) { > - pr_err("sha512_mb error: nothing got flushed for" > - " non-empty list\n"); > - break; > - } > - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); > - sha_finish_walk(&rctx, cstate, true); > - sha_complete_job(rctx, cstate, 0); > - } > - > - if (!list_empty(&cstate->work_list)) { > - rctx = list_entry(cstate->work_list.next, > - struct mcryptd_hash_request_ctx, waiter); > - /* get the hash context and then flush time */ > - next_flush = rctx->tag.expire; > - mcryptd_arm_flusher(cstate, get_delay(next_flush)); > - } > - return next_flush; > -} > - > -static int __init sha512_mb_mod_init(void) > -{ > - > - int cpu; > - int err; > - struct mcryptd_alg_cstate *cpu_state; > - > - /* check for dependent cpu features */ > - if (!boot_cpu_has(X86_FEATURE_AVX2) || > - !boot_cpu_has(X86_FEATURE_BMI2)) > - return -ENODEV; > - > - /* initialize multibuffer structures */ > - sha512_mb_alg_state.alg_cstate = > - alloc_percpu(struct mcryptd_alg_cstate); > - > - sha512_job_mgr_init = sha512_mb_mgr_init_avx2; > - sha512_job_mgr_submit = sha512_mb_mgr_submit_avx2; > - sha512_job_mgr_flush = sha512_mb_mgr_flush_avx2; > - sha512_job_mgr_get_comp_job = sha512_mb_mgr_get_comp_job_avx2; > - > - if (!sha512_mb_alg_state.alg_cstate) > - return -ENOMEM; > - for_each_possible_cpu(cpu) { > - cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu); > - cpu_state->next_flush = 0; > - cpu_state->next_seq_num = 0; > - cpu_state->flusher_engaged = false; > - INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher); > - cpu_state->cpu = cpu; > - cpu_state->alg_state = &sha512_mb_alg_state; > - cpu_state->mgr = kzalloc(sizeof(struct sha512_ctx_mgr), > - GFP_KERNEL); > - if (!cpu_state->mgr) > - goto err2; > - sha512_ctx_mgr_init(cpu_state->mgr); > - INIT_LIST_HEAD(&cpu_state->work_list); > - spin_lock_init(&cpu_state->work_lock); > - } > - sha512_mb_alg_state.flusher = &sha512_mb_flusher; > - > - err = crypto_register_ahash(&sha512_mb_areq_alg); > - if (err) > - goto err2; > - err = crypto_register_ahash(&sha512_mb_async_alg); > - if (err) > - goto err1; > - > - > - return 0; > -err1: > - crypto_unregister_ahash(&sha512_mb_areq_alg); > -err2: > - for_each_possible_cpu(cpu) { > - cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu); > - kfree(cpu_state->mgr); > - } > - free_percpu(sha512_mb_alg_state.alg_cstate); > - return -ENODEV; > -} > - > -static void __exit sha512_mb_mod_fini(void) > -{ > - int cpu; > - struct mcryptd_alg_cstate *cpu_state; > - > - crypto_unregister_ahash(&sha512_mb_async_alg); > - crypto_unregister_ahash(&sha512_mb_areq_alg); > - for_each_possible_cpu(cpu) { > - cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu); > - kfree(cpu_state->mgr); > - } > - free_percpu(sha512_mb_alg_state.alg_cstate); > -} > - > -module_init(sha512_mb_mod_init); > -module_exit(sha512_mb_mod_fini); > - > -MODULE_LICENSE("GPL"); > -MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, multi buffer accelerated"); > - > -MODULE_ALIAS("sha512"); > diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h b/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h > deleted file mode 100644 > index e5c465bd821e..000000000000 > --- a/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h > +++ /dev/null > @@ -1,128 +0,0 @@ > -/* > - * Header file for multi buffer SHA512 context > - * > - * This file is provided under a dual BSD/GPLv2 license. When using or > - * redistributing this file, you may do so under either license. > - * > - * GPL LICENSE SUMMARY > - * > - * Copyright(c) 2016 Intel Corporation. > - * > - * This program is free software; you can redistribute it and/or modify > - * it under the terms of version 2 of the GNU General Public License as > - * published by the Free Software Foundation. > - * > - * This program is distributed in the hope that it will be useful, but > - * WITHOUT ANY WARRANTY; without even the implied warranty of > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > - * General Public License for more details. > - * > - * Contact Information: > - * Megha Dey <megha.dey@xxxxxxxxxxxxxxx> > - * > - * BSD LICENSE > - * > - * Copyright(c) 2016 Intel Corporation. > - * > - * Redistribution and use in source and binary forms, with or without > - * modification, are permitted provided that the following conditions > - * are met: > - * > - * * Redistributions of source code must retain the above copyright > - * notice, this list of conditions and the following disclaimer. > - * * Redistributions in binary form must reproduce the above copyright > - * notice, this list of conditions and the following disclaimer in > - * the documentation and/or other materials provided with the > - * distribution. > - * * Neither the name of Intel Corporation nor the names of its > - * contributors may be used to endorse or promote products derived > - * from this software without specific prior written permission. > - * > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > - */ > - > -#ifndef _SHA_MB_CTX_INTERNAL_H > -#define _SHA_MB_CTX_INTERNAL_H > - > -#include "sha512_mb_mgr.h" > - > -#define HASH_UPDATE 0x00 > -#define HASH_LAST 0x01 > -#define HASH_DONE 0x02 > -#define HASH_FINAL 0x04 > - > -#define HASH_CTX_STS_IDLE 0x00 > -#define HASH_CTX_STS_PROCESSING 0x01 > -#define HASH_CTX_STS_LAST 0x02 > -#define HASH_CTX_STS_COMPLETE 0x04 > - > -enum hash_ctx_error { > - HASH_CTX_ERROR_NONE = 0, > - HASH_CTX_ERROR_INVALID_FLAGS = -1, > - HASH_CTX_ERROR_ALREADY_PROCESSING = -2, > - HASH_CTX_ERROR_ALREADY_COMPLETED = -3, > -}; > - > -#define hash_ctx_user_data(ctx) ((ctx)->user_data) > -#define hash_ctx_digest(ctx) ((ctx)->job.result_digest) > -#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING) > -#define hash_ctx_complete(ctx) ((ctx)->status == HASH_CTX_STS_COMPLETE) > -#define hash_ctx_status(ctx) ((ctx)->status) > -#define hash_ctx_error(ctx) ((ctx)->error) > -#define hash_ctx_init(ctx) \ > - do { \ > - (ctx)->error = HASH_CTX_ERROR_NONE; \ > - (ctx)->status = HASH_CTX_STS_COMPLETE; \ > - } while (0) > - > -/* Hash Constants and Typedefs */ > -#define SHA512_DIGEST_LENGTH 8 > -#define SHA512_LOG2_BLOCK_SIZE 7 > - > -#define SHA512_PADLENGTHFIELD_SIZE 16 > - > -#ifdef SHA_MB_DEBUG > -#define assert(expr) \ > -do { \ > - if (unlikely(!(expr))) { \ > - printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \ > - #expr, __FILE__, __func__, __LINE__); \ > - } \ > -} while (0) > -#else > -#define assert(expr) do {} while (0) > -#endif > - > -struct sha512_ctx_mgr { > - struct sha512_mb_mgr mgr; > -}; > - > -/* typedef struct sha512_ctx_mgr sha512_ctx_mgr; */ > - > -struct sha512_hash_ctx { > - /* Must be at struct offset 0 */ > - struct job_sha512 job; > - /* status flag */ > - int status; > - /* error flag */ > - int error; > - > - uint64_t total_length; > - const void *incoming_buffer; > - uint32_t incoming_buffer_length; > - uint8_t partial_block_buffer[SHA512_BLOCK_SIZE * 2]; > - uint32_t partial_block_buffer_length; > - void *user_data; > -}; > - > -#endif > diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h b/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h > deleted file mode 100644 > index 178f17eef382..000000000000 > --- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h > +++ /dev/null > @@ -1,104 +0,0 @@ > -/* > - * Header file for multi buffer SHA512 algorithm manager > - * > - * This file is provided under a dual BSD/GPLv2 license. When using or > - * redistributing this file, you may do so under either license. > - * > - * GPL LICENSE SUMMARY > - * > - * Copyright(c) 2016 Intel Corporation. > - * > - * This program is free software; you can redistribute it and/or modify > - * it under the terms of version 2 of the GNU General Public License as > - * published by the Free Software Foundation. > - * > - * This program is distributed in the hope that it will be useful, but > - * WITHOUT ANY WARRANTY; without even the implied warranty of > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > - * General Public License for more details. > - * > - * Contact Information: > - * Megha Dey <megha.dey@xxxxxxxxxxxxxxx> > - * > - * BSD LICENSE > - * > - * Copyright(c) 2016 Intel Corporation. > - * > - * Redistribution and use in source and binary forms, with or without > - * modification, are permitted provided that the following conditions > - * are met: > - * > - * * Redistributions of source code must retain the above copyright > - * notice, this list of conditions and the following disclaimer. > - * * Redistributions in binary form must reproduce the above copyright > - * notice, this list of conditions and the following disclaimer in > - * the documentation and/or other materials provided with the > - * distribution. > - * * Neither the name of Intel Corporation nor the names of its > - * contributors may be used to endorse or promote products derived > - * from this software without specific prior written permission. > - * > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > - */ > - > -#ifndef __SHA_MB_MGR_H > -#define __SHA_MB_MGR_H > - > -#include <linux/types.h> > - > -#define NUM_SHA512_DIGEST_WORDS 8 > - > -enum job_sts {STS_UNKNOWN = 0, > - STS_BEING_PROCESSED = 1, > - STS_COMPLETED = 2, > - STS_INTERNAL_ERROR = 3, > - STS_ERROR = 4 > -}; > - > -struct job_sha512 { > - u8 *buffer; > - u64 len; > - u64 result_digest[NUM_SHA512_DIGEST_WORDS] __aligned(32); > - enum job_sts status; > - void *user_data; > -}; > - > -struct sha512_args_x4 { > - uint64_t digest[8][4]; > - uint8_t *data_ptr[4]; > -}; > - > -struct sha512_lane_data { > - struct job_sha512 *job_in_lane; > -}; > - > -struct sha512_mb_mgr { > - struct sha512_args_x4 args; > - > - uint64_t lens[4]; > - > - /* each byte is index (0...7) of unused lanes */ > - uint64_t unused_lanes; > - /* byte 4 is set to FF as a flag */ > - struct sha512_lane_data ldata[4]; > -}; > - > -#define SHA512_MB_MGR_NUM_LANES_AVX2 4 > - > -void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state); > -struct job_sha512 *sha512_mb_mgr_submit_avx2(struct sha512_mb_mgr *state, > - struct job_sha512 *job); > -struct job_sha512 *sha512_mb_mgr_flush_avx2(struct sha512_mb_mgr *state); > -struct job_sha512 *sha512_mb_mgr_get_comp_job_avx2(struct sha512_mb_mgr *state); > - > -#endif > diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S > deleted file mode 100644 > index cf2636d4c9ba..000000000000 > --- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S > +++ /dev/null > @@ -1,281 +0,0 @@ > -/* > - * Header file for multi buffer SHA256 algorithm data structure > - * > - * This file is provided under a dual BSD/GPLv2 license. When using or > - * redistributing this file, you may do so under either license. > - * > - * GPL LICENSE SUMMARY > - * > - * Copyright(c) 2016 Intel Corporation. > - * > - * This program is free software; you can redistribute it and/or modify > - * it under the terms of version 2 of the GNU General Public License as > - * published by the Free Software Foundation. > - * > - * This program is distributed in the hope that it will be useful, but > - * WITHOUT ANY WARRANTY; without even the implied warranty of > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > - * General Public License for more details. > - * > - * Contact Information: > - * Megha Dey <megha.dey@xxxxxxxxxxxxxxx> > - * > - * BSD LICENSE > - * > - * Copyright(c) 2016 Intel Corporation. > - * > - * Redistribution and use in source and binary forms, with or without > - * modification, are permitted provided that the following conditions > - * are met: > - * > - * * Redistributions of source code must retain the above copyright > - * notice, this list of conditions and the following disclaimer. > - * * Redistributions in binary form must reproduce the above copyright > - * notice, this list of conditions and the following disclaimer in > - * the documentation and/or other materials provided with the > - * distribution. > - * * Neither the name of Intel Corporation nor the names of its > - * contributors may be used to endorse or promote products derived > - * from this software without specific prior written permission. > - * > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > - */ > - > -# Macros for defining data structures > - > -# Usage example > - > -#START_FIELDS # JOB_AES > -### name size align > -#FIELD _plaintext, 8, 8 # pointer to plaintext > -#FIELD _ciphertext, 8, 8 # pointer to ciphertext > -#FIELD _IV, 16, 8 # IV > -#FIELD _keys, 8, 8 # pointer to keys > -#FIELD _len, 4, 4 # length in bytes > -#FIELD _status, 4, 4 # status enumeration > -#FIELD _user_data, 8, 8 # pointer to user data > -#UNION _union, size1, align1, \ > -# size2, align2, \ > -# size3, align3, \ > -# ... > -#END_FIELDS > -#%assign _JOB_AES_size _FIELD_OFFSET > -#%assign _JOB_AES_align _STRUCT_ALIGN > - > -######################################################################### > - > -# Alternate "struc-like" syntax: > -# STRUCT job_aes2 > -# RES_Q .plaintext, 1 > -# RES_Q .ciphertext, 1 > -# RES_DQ .IV, 1 > -# RES_B .nested, _JOB_AES_SIZE, _JOB_AES_ALIGN > -# RES_U .union, size1, align1, \ > -# size2, align2, \ > -# ... > -# ENDSTRUCT > -# # Following only needed if nesting > -# %assign job_aes2_size _FIELD_OFFSET > -# %assign job_aes2_align _STRUCT_ALIGN > -# > -# RES_* macros take a name, a count and an optional alignment. > -# The count in in terms of the base size of the macro, and the > -# default alignment is the base size. > -# The macros are: > -# Macro Base size > -# RES_B 1 > -# RES_W 2 > -# RES_D 4 > -# RES_Q 8 > -# RES_DQ 16 > -# RES_Y 32 > -# RES_Z 64 > -# > -# RES_U defines a union. It's arguments are a name and two or more > -# pairs of "size, alignment" > -# > -# The two assigns are only needed if this structure is being nested > -# within another. Even if the assigns are not done, one can still use > -# STRUCT_NAME_size as the size of the structure. > -# > -# Note that for nesting, you still need to assign to STRUCT_NAME_size. > -# > -# The differences between this and using "struc" directly are that each > -# type is implicitly aligned to its natural length (although this can be > -# over-ridden with an explicit third parameter), and that the structure > -# is padded at the end to its overall alignment. > -# > - > -######################################################################### > - > -#ifndef _DATASTRUCT_ASM_ > -#define _DATASTRUCT_ASM_ > - > -#define PTR_SZ 8 > -#define SHA512_DIGEST_WORD_SIZE 8 > -#define SHA512_MB_MGR_NUM_LANES_AVX2 4 > -#define NUM_SHA512_DIGEST_WORDS 8 > -#define SZ4 4*SHA512_DIGEST_WORD_SIZE > -#define ROUNDS 80*SZ4 > -#define SHA512_DIGEST_ROW_SIZE (SHA512_MB_MGR_NUM_LANES_AVX2 * 8) > - > -# START_FIELDS > -.macro START_FIELDS > - _FIELD_OFFSET = 0 > - _STRUCT_ALIGN = 0 > -.endm > - > -# FIELD name size align > -.macro FIELD name size align > - _FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1)) > - \name = _FIELD_OFFSET > - _FIELD_OFFSET = _FIELD_OFFSET + (\size) > -.if (\align > _STRUCT_ALIGN) > - _STRUCT_ALIGN = \align > -.endif > -.endm > - > -# END_FIELDS > -.macro END_FIELDS > - _FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1)) > -.endm > - > -.macro STRUCT p1 > -START_FIELDS > -.struc \p1 > -.endm > - > -.macro ENDSTRUCT > - tmp = _FIELD_OFFSET > - END_FIELDS > - tmp = (_FIELD_OFFSET - ##tmp) > -.if (tmp > 0) > - .lcomm tmp > -.endm > - > -## RES_int name size align > -.macro RES_int p1 p2 p3 > - name = \p1 > - size = \p2 > - align = .\p3 > - > - _FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1)) > -.align align > -.lcomm name size > - _FIELD_OFFSET = _FIELD_OFFSET + (size) > -.if (align > _STRUCT_ALIGN) > - _STRUCT_ALIGN = align > -.endif > -.endm > - > -# macro RES_B name, size [, align] > -.macro RES_B _name, _size, _align=1 > -RES_int _name _size _align > -.endm > - > -# macro RES_W name, size [, align] > -.macro RES_W _name, _size, _align=2 > -RES_int _name 2*(_size) _align > -.endm > - > -# macro RES_D name, size [, align] > -.macro RES_D _name, _size, _align=4 > -RES_int _name 4*(_size) _align > -.endm > - > -# macro RES_Q name, size [, align] > -.macro RES_Q _name, _size, _align=8 > -RES_int _name 8*(_size) _align > -.endm > - > -# macro RES_DQ name, size [, align] > -.macro RES_DQ _name, _size, _align=16 > -RES_int _name 16*(_size) _align > -.endm > - > -# macro RES_Y name, size [, align] > -.macro RES_Y _name, _size, _align=32 > -RES_int _name 32*(_size) _align > -.endm > - > -# macro RES_Z name, size [, align] > -.macro RES_Z _name, _size, _align=64 > -RES_int _name 64*(_size) _align > -.endm > - > -#endif > - > -################################################################### > -### Define SHA512 Out Of Order Data Structures > -################################################################### > - > -START_FIELDS # LANE_DATA > -### name size align > -FIELD _job_in_lane, 8, 8 # pointer to job object > -END_FIELDS > - > - _LANE_DATA_size = _FIELD_OFFSET > - _LANE_DATA_align = _STRUCT_ALIGN > - > -#################################################################### > - > -START_FIELDS # SHA512_ARGS_X4 > -### name size align > -FIELD _digest, 8*8*4, 4 # transposed digest > -FIELD _data_ptr, 8*4, 8 # array of pointers to data > -END_FIELDS > - > - _SHA512_ARGS_X4_size = _FIELD_OFFSET > - _SHA512_ARGS_X4_align = _STRUCT_ALIGN > - > -##################################################################### > - > -START_FIELDS # MB_MGR > -### name size align > -FIELD _args, _SHA512_ARGS_X4_size, _SHA512_ARGS_X4_align > -FIELD _lens, 8*4, 8 > -FIELD _unused_lanes, 8, 8 > -FIELD _ldata, _LANE_DATA_size*4, _LANE_DATA_align > -END_FIELDS > - > - _MB_MGR_size = _FIELD_OFFSET > - _MB_MGR_align = _STRUCT_ALIGN > - > -_args_digest = _args + _digest > -_args_data_ptr = _args + _data_ptr > - > -####################################################################### > - > -####################################################################### > -#### Define constants > -####################################################################### > - > -#define STS_UNKNOWN 0 > -#define STS_BEING_PROCESSED 1 > -#define STS_COMPLETED 2 > - > -####################################################################### > -#### Define JOB_SHA512 structure > -####################################################################### > - > -START_FIELDS # JOB_SHA512 > -### name size align > -FIELD _buffer, 8, 8 # pointer to buffer > -FIELD _len, 8, 8 # length in bytes > -FIELD _result_digest, 8*8, 32 # Digest (output) > -FIELD _status, 4, 4 > -FIELD _user_data, 8, 8 > -END_FIELDS > - > - _JOB_SHA512_size = _FIELD_OFFSET > - _JOB_SHA512_align = _STRUCT_ALIGN > diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S > deleted file mode 100644 > index 7c629caebc05..000000000000 > --- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S > +++ /dev/null > @@ -1,297 +0,0 @@ > -/* > - * Flush routine for SHA512 multibuffer > - * > - * This file is provided under a dual BSD/GPLv2 license. When using or > - * redistributing this file, you may do so under either license. > - * > - * GPL LICENSE SUMMARY > - * > - * Copyright(c) 2016 Intel Corporation. > - * > - * This program is free software; you can redistribute it and/or modify > - * it under the terms of version 2 of the GNU General Public License as > - * published by the Free Software Foundation. > - * > - * This program is distributed in the hope that it will be useful, but > - * WITHOUT ANY WARRANTY; without even the implied warranty of > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > - * General Public License for more details. > - * > - * Contact Information: > - * Megha Dey <megha.dey@xxxxxxxxxxxxxxx> > - * > - * BSD LICENSE > - * > - * Copyright(c) 2016 Intel Corporation. > - * > - * Redistribution and use in source and binary forms, with or without > - * modification, are permitted provided that the following conditions > - * are met: > - * > - * * Redistributions of source code must retain the above copyright > - * notice, this list of conditions and the following disclaimer. > - * * Redistributions in binary form must reproduce the above copyright > - * notice, this list of conditions and the following disclaimer in > - * the documentation and/or other materials provided with the > - * distribution. > - * * Neither the name of Intel Corporation nor the names of its > - * contributors may be used to endorse or promote products derived > - * from this software without specific prior written permission. > - * > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > - */ > - > -#include <linux/linkage.h> > -#include <asm/frame.h> > -#include "sha512_mb_mgr_datastruct.S" > - > -.extern sha512_x4_avx2 > - > -# LINUX register definitions > -#define arg1 %rdi > -#define arg2 %rsi > - > -# idx needs to be other than arg1, arg2, rbx, r12 > -#define idx %rdx > - > -# Common definitions > -#define state arg1 > -#define job arg2 > -#define len2 arg2 > - > -#define unused_lanes %rbx > -#define lane_data %rbx > -#define tmp2 %rbx > - > -#define job_rax %rax > -#define tmp1 %rax > -#define size_offset %rax > -#define tmp %rax > -#define start_offset %rax > - > -#define tmp3 arg1 > - > -#define extra_blocks arg2 > -#define p arg2 > - > -#define tmp4 %r8 > -#define lens0 %r8 > - > -#define lens1 %r9 > -#define lens2 %r10 > -#define lens3 %r11 > - > -.macro LABEL prefix n > -\prefix\n\(): > -.endm > - > -.macro JNE_SKIP i > -jne skip_\i > -.endm > - > -.altmacro > -.macro SET_OFFSET _offset > -offset = \_offset > -.endm > -.noaltmacro > - > -# JOB* sha512_mb_mgr_flush_avx2(MB_MGR *state) > -# arg 1 : rcx : state > -ENTRY(sha512_mb_mgr_flush_avx2) > - FRAME_BEGIN > - push %rbx > - > - # If bit (32+3) is set, then all lanes are empty > - mov _unused_lanes(state), unused_lanes > - bt $32+7, unused_lanes > - jc return_null > - > - # find a lane with a non-null job > - xor idx, idx > - offset = (_ldata + 1*_LANE_DATA_size + _job_in_lane) > - cmpq $0, offset(state) > - cmovne one(%rip), idx > - offset = (_ldata + 2*_LANE_DATA_size + _job_in_lane) > - cmpq $0, offset(state) > - cmovne two(%rip), idx > - offset = (_ldata + 3*_LANE_DATA_size + _job_in_lane) > - cmpq $0, offset(state) > - cmovne three(%rip), idx > - > - # copy idx to empty lanes > -copy_lane_data: > - offset = (_args + _data_ptr) > - mov offset(state,idx,8), tmp > - > - I = 0 > -.rep 4 > - offset = (_ldata + I * _LANE_DATA_size + _job_in_lane) > - cmpq $0, offset(state) > -.altmacro > - JNE_SKIP %I > - offset = (_args + _data_ptr + 8*I) > - mov tmp, offset(state) > - offset = (_lens + 8*I +4) > - movl $0xFFFFFFFF, offset(state) > -LABEL skip_ %I > - I = (I+1) > -.noaltmacro > -.endr > - > - # Find min length > - mov _lens + 0*8(state),lens0 > - mov lens0,idx > - mov _lens + 1*8(state),lens1 > - cmp idx,lens1 > - cmovb lens1,idx > - mov _lens + 2*8(state),lens2 > - cmp idx,lens2 > - cmovb lens2,idx > - mov _lens + 3*8(state),lens3 > - cmp idx,lens3 > - cmovb lens3,idx > - mov idx,len2 > - and $0xF,idx > - and $~0xFF,len2 > - jz len_is_0 > - > - sub len2, lens0 > - sub len2, lens1 > - sub len2, lens2 > - sub len2, lens3 > - shr $32,len2 > - mov lens0, _lens + 0*8(state) > - mov lens1, _lens + 1*8(state) > - mov lens2, _lens + 2*8(state) > - mov lens3, _lens + 3*8(state) > - > - # "state" and "args" are the same address, arg1 > - # len is arg2 > - call sha512_x4_avx2 > - # state and idx are intact > - > -len_is_0: > - # process completed job "idx" > - imul $_LANE_DATA_size, idx, lane_data > - lea _ldata(state, lane_data), lane_data > - > - mov _job_in_lane(lane_data), job_rax > - movq $0, _job_in_lane(lane_data) > - movl $STS_COMPLETED, _status(job_rax) > - mov _unused_lanes(state), unused_lanes > - shl $8, unused_lanes > - or idx, unused_lanes > - mov unused_lanes, _unused_lanes(state) > - > - movl $0xFFFFFFFF, _lens+4(state, idx, 8) > - > - vmovq _args_digest+0*32(state, idx, 8), %xmm0 > - vpinsrq $1, _args_digest+1*32(state, idx, 8), %xmm0, %xmm0 > - vmovq _args_digest+2*32(state, idx, 8), %xmm1 > - vpinsrq $1, _args_digest+3*32(state, idx, 8), %xmm1, %xmm1 > - vmovq _args_digest+4*32(state, idx, 8), %xmm2 > - vpinsrq $1, _args_digest+5*32(state, idx, 8), %xmm2, %xmm2 > - vmovq _args_digest+6*32(state, idx, 8), %xmm3 > - vpinsrq $1, _args_digest+7*32(state, idx, 8), %xmm3, %xmm3 > - > - vmovdqu %xmm0, _result_digest(job_rax) > - vmovdqu %xmm1, _result_digest+1*16(job_rax) > - vmovdqu %xmm2, _result_digest+2*16(job_rax) > - vmovdqu %xmm3, _result_digest+3*16(job_rax) > - > -return: > - pop %rbx > - FRAME_END > - ret > - > -return_null: > - xor job_rax, job_rax > - jmp return > -ENDPROC(sha512_mb_mgr_flush_avx2) > -.align 16 > - > -ENTRY(sha512_mb_mgr_get_comp_job_avx2) > - push %rbx > - > - mov _unused_lanes(state), unused_lanes > - bt $(32+7), unused_lanes > - jc .return_null > - > - # Find min length > - mov _lens(state),lens0 > - mov lens0,idx > - mov _lens+1*8(state),lens1 > - cmp idx,lens1 > - cmovb lens1,idx > - mov _lens+2*8(state),lens2 > - cmp idx,lens2 > - cmovb lens2,idx > - mov _lens+3*8(state),lens3 > - cmp idx,lens3 > - cmovb lens3,idx > - test $~0xF,idx > - jnz .return_null > - and $0xF,idx > - > - #process completed job "idx" > - imul $_LANE_DATA_size, idx, lane_data > - lea _ldata(state, lane_data), lane_data > - > - mov _job_in_lane(lane_data), job_rax > - movq $0, _job_in_lane(lane_data) > - movl $STS_COMPLETED, _status(job_rax) > - mov _unused_lanes(state), unused_lanes > - shl $8, unused_lanes > - or idx, unused_lanes > - mov unused_lanes, _unused_lanes(state) > - > - movl $0xFFFFFFFF, _lens+4(state, idx, 8) > - > - vmovq _args_digest(state, idx, 8), %xmm0 > - vpinsrq $1, _args_digest+1*32(state, idx, 8), %xmm0, %xmm0 > - vmovq _args_digest+2*32(state, idx, 8), %xmm1 > - vpinsrq $1, _args_digest+3*32(state, idx, 8), %xmm1, %xmm1 > - vmovq _args_digest+4*32(state, idx, 8), %xmm2 > - vpinsrq $1, _args_digest+5*32(state, idx, 8), %xmm2, %xmm2 > - vmovq _args_digest+6*32(state, idx, 8), %xmm3 > - vpinsrq $1, _args_digest+7*32(state, idx, 8), %xmm3, %xmm3 > - > - vmovdqu %xmm0, _result_digest+0*16(job_rax) > - vmovdqu %xmm1, _result_digest+1*16(job_rax) > - vmovdqu %xmm2, _result_digest+2*16(job_rax) > - vmovdqu %xmm3, _result_digest+3*16(job_rax) > - > - pop %rbx > - > - ret > - > -.return_null: > - xor job_rax, job_rax > - pop %rbx > - ret > -ENDPROC(sha512_mb_mgr_get_comp_job_avx2) > - > -.section .rodata.cst8.one, "aM", @progbits, 8 > -.align 8 > -one: > -.quad 1 > - > -.section .rodata.cst8.two, "aM", @progbits, 8 > -.align 8 > -two: > -.quad 2 > - > -.section .rodata.cst8.three, "aM", @progbits, 8 > -.align 8 > -three: > -.quad 3 > diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c > deleted file mode 100644 > index d08805032f01..000000000000 > --- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c > +++ /dev/null > @@ -1,69 +0,0 @@ > -/* > - * Initialization code for multi buffer SHA256 algorithm for AVX2 > - * > - * This file is provided under a dual BSD/GPLv2 license. When using or > - * redistributing this file, you may do so under either license. > - * > - * GPL LICENSE SUMMARY > - * > - * Copyright(c) 2016 Intel Corporation. > - * > - * This program is free software; you can redistribute it and/or modify > - * it under the terms of version 2 of the GNU General Public License as > - * published by the Free Software Foundation. > - * > - * This program is distributed in the hope that it will be useful, but > - * WITHOUT ANY WARRANTY; without even the implied warranty of > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > - * General Public License for more details. > - * > - * Contact Information: > - * Megha Dey <megha.dey@xxxxxxxxxxxxxxx> > - * > - * BSD LICENSE > - * > - * Copyright(c) 2016 Intel Corporation. > - * > - * Redistribution and use in source and binary forms, with or without > - * modification, are permitted provided that the following conditions > - * are met: > - * > - * * Redistributions of source code must retain the above copyright > - * notice, this list of conditions and the following disclaimer. > - * * Redistributions in binary form must reproduce the above copyright > - * notice, this list of conditions and the following disclaimer in > - * the documentation and/or other materials provided with the > - * distribution. > - * * Neither the name of Intel Corporation nor the names of its > - * contributors may be used to endorse or promote products derived > - * from this software without specific prior written permission. > - * > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > - */ > - > -#include "sha512_mb_mgr.h" > - > -void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state) > -{ > - unsigned int j; > - > - /* initially all lanes are unused */ > - state->lens[0] = 0xFFFFFFFF00000000; > - state->lens[1] = 0xFFFFFFFF00000001; > - state->lens[2] = 0xFFFFFFFF00000002; > - state->lens[3] = 0xFFFFFFFF00000003; > - > - state->unused_lanes = 0xFF03020100; > - for (j = 0; j < 4; j++) > - state->ldata[j].job_in_lane = NULL; > -} > diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S > deleted file mode 100644 > index 4ba709ba78e5..000000000000 > --- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S > +++ /dev/null > @@ -1,224 +0,0 @@ > -/* > - * Buffer submit code for multi buffer SHA512 algorithm > - * > - * This file is provided under a dual BSD/GPLv2 license. When using or > - * redistributing this file, you may do so under either license. > - * > - * GPL LICENSE SUMMARY > - * > - * Copyright(c) 2016 Intel Corporation. > - * > - * This program is free software; you can redistribute it and/or modify > - * it under the terms of version 2 of the GNU General Public License as > - * published by the Free Software Foundation. > - * > - * This program is distributed in the hope that it will be useful, but > - * WITHOUT ANY WARRANTY; without even the implied warranty of > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > - * General Public License for more details. > - * > - * Contact Information: > - * Megha Dey <megha.dey@xxxxxxxxxxxxxxx> > - * > - * BSD LICENSE > - * > - * Copyright(c) 2016 Intel Corporation. > - * > - * Redistribution and use in source and binary forms, with or without > - * modification, are permitted provided that the following conditions > - * are met: > - * > - * * Redistributions of source code must retain the above copyright > - * notice, this list of conditions and the following disclaimer. > - * * Redistributions in binary form must reproduce the above copyright > - * notice, this list of conditions and the following disclaimer in > - * the documentation and/or other materials provided with the > - * distribution. > - * * Neither the name of Intel Corporation nor the names of its > - * contributors may be used to endorse or promote products derived > - * from this software without specific prior written permission. > - * > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > - */ > - > -#include <linux/linkage.h> > -#include <asm/frame.h> > -#include "sha512_mb_mgr_datastruct.S" > - > -.extern sha512_x4_avx2 > - > -#define arg1 %rdi > -#define arg2 %rsi > - > -#define idx %rdx > -#define last_len %rdx > - > -#define size_offset %rcx > -#define tmp2 %rcx > - > -# Common definitions > -#define state arg1 > -#define job arg2 > -#define len2 arg2 > -#define p2 arg2 > - > -#define p %r11 > -#define start_offset %r11 > - > -#define unused_lanes %rbx > - > -#define job_rax %rax > -#define len %rax > - > -#define lane %r12 > -#define tmp3 %r12 > -#define lens3 %r12 > - > -#define extra_blocks %r8 > -#define lens0 %r8 > - > -#define tmp %r9 > -#define lens1 %r9 > - > -#define lane_data %r10 > -#define lens2 %r10 > - > -#define DWORD_len %eax > - > -# JOB* sha512_mb_mgr_submit_avx2(MB_MGR *state, JOB *job) > -# arg 1 : rcx : state > -# arg 2 : rdx : job > -ENTRY(sha512_mb_mgr_submit_avx2) > - FRAME_BEGIN > - push %rbx > - push %r12 > - > - mov _unused_lanes(state), unused_lanes > - movzb %bl,lane > - shr $8, unused_lanes > - imul $_LANE_DATA_size, lane,lane_data > - movl $STS_BEING_PROCESSED, _status(job) > - lea _ldata(state, lane_data), lane_data > - mov unused_lanes, _unused_lanes(state) > - movl _len(job), DWORD_len > - > - mov job, _job_in_lane(lane_data) > - movl DWORD_len,_lens+4(state , lane, 8) > - > - # Load digest words from result_digest > - vmovdqu _result_digest+0*16(job), %xmm0 > - vmovdqu _result_digest+1*16(job), %xmm1 > - vmovdqu _result_digest+2*16(job), %xmm2 > - vmovdqu _result_digest+3*16(job), %xmm3 > - > - vmovq %xmm0, _args_digest(state, lane, 8) > - vpextrq $1, %xmm0, _args_digest+1*32(state , lane, 8) > - vmovq %xmm1, _args_digest+2*32(state , lane, 8) > - vpextrq $1, %xmm1, _args_digest+3*32(state , lane, 8) > - vmovq %xmm2, _args_digest+4*32(state , lane, 8) > - vpextrq $1, %xmm2, _args_digest+5*32(state , lane, 8) > - vmovq %xmm3, _args_digest+6*32(state , lane, 8) > - vpextrq $1, %xmm3, _args_digest+7*32(state , lane, 8) > - > - mov _buffer(job), p > - mov p, _args_data_ptr(state, lane, 8) > - > - cmp $0xFF, unused_lanes > - jne return_null > - > -start_loop: > - > - # Find min length > - mov _lens+0*8(state),lens0 > - mov lens0,idx > - mov _lens+1*8(state),lens1 > - cmp idx,lens1 > - cmovb lens1, idx > - mov _lens+2*8(state),lens2 > - cmp idx,lens2 > - cmovb lens2,idx > - mov _lens+3*8(state),lens3 > - cmp idx,lens3 > - cmovb lens3,idx > - mov idx,len2 > - and $0xF,idx > - and $~0xFF,len2 > - jz len_is_0 > - > - sub len2,lens0 > - sub len2,lens1 > - sub len2,lens2 > - sub len2,lens3 > - shr $32,len2 > - mov lens0, _lens + 0*8(state) > - mov lens1, _lens + 1*8(state) > - mov lens2, _lens + 2*8(state) > - mov lens3, _lens + 3*8(state) > - > - # "state" and "args" are the same address, arg1 > - # len is arg2 > - call sha512_x4_avx2 > - # state and idx are intact > - > -len_is_0: > - > - # process completed job "idx" > - imul $_LANE_DATA_size, idx, lane_data > - lea _ldata(state, lane_data), lane_data > - > - mov _job_in_lane(lane_data), job_rax > - mov _unused_lanes(state), unused_lanes > - movq $0, _job_in_lane(lane_data) > - movl $STS_COMPLETED, _status(job_rax) > - shl $8, unused_lanes > - or idx, unused_lanes > - mov unused_lanes, _unused_lanes(state) > - > - movl $0xFFFFFFFF,_lens+4(state,idx,8) > - vmovq _args_digest+0*32(state , idx, 8), %xmm0 > - vpinsrq $1, _args_digest+1*32(state , idx, 8), %xmm0, %xmm0 > - vmovq _args_digest+2*32(state , idx, 8), %xmm1 > - vpinsrq $1, _args_digest+3*32(state , idx, 8), %xmm1, %xmm1 > - vmovq _args_digest+4*32(state , idx, 8), %xmm2 > - vpinsrq $1, _args_digest+5*32(state , idx, 8), %xmm2, %xmm2 > - vmovq _args_digest+6*32(state , idx, 8), %xmm3 > - vpinsrq $1, _args_digest+7*32(state , idx, 8), %xmm3, %xmm3 > - > - vmovdqu %xmm0, _result_digest + 0*16(job_rax) > - vmovdqu %xmm1, _result_digest + 1*16(job_rax) > - vmovdqu %xmm2, _result_digest + 2*16(job_rax) > - vmovdqu %xmm3, _result_digest + 3*16(job_rax) > - > -return: > - pop %r12 > - pop %rbx > - FRAME_END > - ret > - > -return_null: > - xor job_rax, job_rax > - jmp return > -ENDPROC(sha512_mb_mgr_submit_avx2) > - > -/* UNUSED? > -.section .rodata.cst16, "aM", @progbits, 16 > -.align 16 > -H0: .int 0x6a09e667 > -H1: .int 0xbb67ae85 > -H2: .int 0x3c6ef372 > -H3: .int 0xa54ff53a > -H4: .int 0x510e527f > -H5: .int 0x9b05688c > -H6: .int 0x1f83d9ab > -H7: .int 0x5be0cd19 > -*/ > diff --git a/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S b/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S > deleted file mode 100644 > index e22e907643a6..000000000000 > --- a/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S > +++ /dev/null > @@ -1,531 +0,0 @@ > -/* > - * Multi-buffer SHA512 algorithm hash compute routine > - * > - * This file is provided under a dual BSD/GPLv2 license. When using or > - * redistributing this file, you may do so under either license. > - * > - * GPL LICENSE SUMMARY > - * > - * Copyright(c) 2016 Intel Corporation. > - * > - * This program is free software; you can redistribute it and/or modify > - * it under the terms of version 2 of the GNU General Public License as > - * published by the Free Software Foundation. > - * > - * This program is distributed in the hope that it will be useful, but > - * WITHOUT ANY WARRANTY; without even the implied warranty of > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > - * General Public License for more details. > - * > - * Contact Information: > - * Megha Dey <megha.dey@xxxxxxxxxxxxxxx> > - * > - * BSD LICENSE > - * > - * Copyright(c) 2016 Intel Corporation. > - * > - * Redistribution and use in source and binary forms, with or without > - * modification, are permitted provided that the following conditions > - * are met: > - * > - * * Redistributions of source code must retain the above copyright > - * notice, this list of conditions and the following disclaimer. > - * * Redistributions in binary form must reproduce the above copyright > - * notice, this list of conditions and the following disclaimer in > - * the documentation and/or other materials provided with the > - * distribution. > - * * Neither the name of Intel Corporation nor the names of its > - * contributors may be used to endorse or promote products derived > - * from this software without specific prior written permission. > - * > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > - */ > - > -# code to compute quad SHA512 using AVX2 > -# use YMMs to tackle the larger digest size > -# outer calling routine takes care of save and restore of XMM registers > -# Logic designed/laid out by JDG > - > -# Function clobbers: rax, rcx, rdx, rbx, rsi, rdi, r9-r15; ymm0-15 > -# Stack must be aligned to 32 bytes before call > -# Linux clobbers: rax rbx rcx rsi r8 r9 r10 r11 r12 > -# Linux preserves: rcx rdx rdi rbp r13 r14 r15 > -# clobbers ymm0-15 > - > -#include <linux/linkage.h> > -#include "sha512_mb_mgr_datastruct.S" > - > -arg1 = %rdi > -arg2 = %rsi > - > -# Common definitions > -STATE = arg1 > -INP_SIZE = arg2 > - > -IDX = %rax > -ROUND = %rbx > -TBL = %r8 > - > -inp0 = %r9 > -inp1 = %r10 > -inp2 = %r11 > -inp3 = %r12 > - > -a = %ymm0 > -b = %ymm1 > -c = %ymm2 > -d = %ymm3 > -e = %ymm4 > -f = %ymm5 > -g = %ymm6 > -h = %ymm7 > - > -a0 = %ymm8 > -a1 = %ymm9 > -a2 = %ymm10 > - > -TT0 = %ymm14 > -TT1 = %ymm13 > -TT2 = %ymm12 > -TT3 = %ymm11 > -TT4 = %ymm10 > -TT5 = %ymm9 > - > -T1 = %ymm14 > -TMP = %ymm15 > - > -# Define stack usage > -STACK_SPACE1 = SZ4*16 + NUM_SHA512_DIGEST_WORDS*SZ4 + 24 > - > -#define VMOVPD vmovupd > -_digest = SZ4*16 > - > -# transpose r0, r1, r2, r3, t0, t1 > -# "transpose" data in {r0..r3} using temps {t0..t3} > -# Input looks like: {r0 r1 r2 r3} > -# r0 = {a7 a6 a5 a4 a3 a2 a1 a0} > -# r1 = {b7 b6 b5 b4 b3 b2 b1 b0} > -# r2 = {c7 c6 c5 c4 c3 c2 c1 c0} > -# r3 = {d7 d6 d5 d4 d3 d2 d1 d0} > -# > -# output looks like: {t0 r1 r0 r3} > -# t0 = {d1 d0 c1 c0 b1 b0 a1 a0} > -# r1 = {d3 d2 c3 c2 b3 b2 a3 a2} > -# r0 = {d5 d4 c5 c4 b5 b4 a5 a4} > -# r3 = {d7 d6 c7 c6 b7 b6 a7 a6} > - > -.macro TRANSPOSE r0 r1 r2 r3 t0 t1 > - vshufps $0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4 b1 b0 a1 a0} > - vshufps $0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6 b3 b2 a3 a2} > - vshufps $0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4 d1 d0 c1 c0} > - vshufps $0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6 d3 d2 c3 c2} > - > - vperm2f128 $0x20, \r2, \r0, \r1 # h6...a6 > - vperm2f128 $0x31, \r2, \r0, \r3 # h2...a2 > - vperm2f128 $0x31, \t1, \t0, \r0 # h5...a5 > - vperm2f128 $0x20, \t1, \t0, \t0 # h1...a1 > -.endm > - > -.macro ROTATE_ARGS > -TMP_ = h > -h = g > -g = f > -f = e > -e = d > -d = c > -c = b > -b = a > -a = TMP_ > -.endm > - > -# PRORQ reg, imm, tmp > -# packed-rotate-right-double > -# does a rotate by doing two shifts and an or > -.macro _PRORQ reg imm tmp > - vpsllq $(64-\imm),\reg,\tmp > - vpsrlq $\imm,\reg, \reg > - vpor \tmp,\reg, \reg > -.endm > - > -# non-destructive > -# PRORQ_nd reg, imm, tmp, src > -.macro _PRORQ_nd reg imm tmp src > - vpsllq $(64-\imm), \src, \tmp > - vpsrlq $\imm, \src, \reg > - vpor \tmp, \reg, \reg > -.endm > - > -# PRORQ dst/src, amt > -.macro PRORQ reg imm > - _PRORQ \reg, \imm, TMP > -.endm > - > -# PRORQ_nd dst, src, amt > -.macro PRORQ_nd reg tmp imm > - _PRORQ_nd \reg, \imm, TMP, \tmp > -.endm > - > -#; arguments passed implicitly in preprocessor symbols i, a...h > -.macro ROUND_00_15 _T1 i > - PRORQ_nd a0, e, (18-14) # sig1: a0 = (e >> 4) > - > - vpxor g, f, a2 # ch: a2 = f^g > - vpand e,a2, a2 # ch: a2 = (f^g)&e > - vpxor g, a2, a2 # a2 = ch > - > - PRORQ_nd a1,e,41 # sig1: a1 = (e >> 25) > - > - offset = SZ4*(\i & 0xf) > - vmovdqu \_T1,offset(%rsp) > - vpaddq (TBL,ROUND,1), \_T1, \_T1 # T1 = W + K > - vpxor e,a0, a0 # sig1: a0 = e ^ (e >> 5) > - PRORQ a0, 14 # sig1: a0 = (e >> 6) ^ (e >> 11) > - vpaddq a2, h, h # h = h + ch > - PRORQ_nd a2,a,6 # sig0: a2 = (a >> 11) > - vpaddq \_T1,h, h # h = h + ch + W + K > - vpxor a1, a0, a0 # a0 = sigma1 > - vmovdqu a,\_T1 > - PRORQ_nd a1,a,39 # sig0: a1 = (a >> 22) > - vpxor c, \_T1, \_T1 # maj: T1 = a^c > - add $SZ4, ROUND # ROUND++ > - vpand b, \_T1, \_T1 # maj: T1 = (a^c)&b > - vpaddq a0, h, h > - vpaddq h, d, d > - vpxor a, a2, a2 # sig0: a2 = a ^ (a >> 11) > - PRORQ a2,28 # sig0: a2 = (a >> 2) ^ (a >> 13) > - vpxor a1, a2, a2 # a2 = sig0 > - vpand c, a, a1 # maj: a1 = a&c > - vpor \_T1, a1, a1 # a1 = maj > - vpaddq a1, h, h # h = h + ch + W + K + maj > - vpaddq a2, h, h # h = h + ch + W + K + maj + sigma0 > - ROTATE_ARGS > -.endm > - > - > -#; arguments passed implicitly in preprocessor symbols i, a...h > -.macro ROUND_16_XX _T1 i > - vmovdqu SZ4*((\i-15)&0xf)(%rsp), \_T1 > - vmovdqu SZ4*((\i-2)&0xf)(%rsp), a1 > - vmovdqu \_T1, a0 > - PRORQ \_T1,7 > - vmovdqu a1, a2 > - PRORQ a1,42 > - vpxor a0, \_T1, \_T1 > - PRORQ \_T1, 1 > - vpxor a2, a1, a1 > - PRORQ a1, 19 > - vpsrlq $7, a0, a0 > - vpxor a0, \_T1, \_T1 > - vpsrlq $6, a2, a2 > - vpxor a2, a1, a1 > - vpaddq SZ4*((\i-16)&0xf)(%rsp), \_T1, \_T1 > - vpaddq SZ4*((\i-7)&0xf)(%rsp), a1, a1 > - vpaddq a1, \_T1, \_T1 > - > - ROUND_00_15 \_T1,\i > -.endm > - > - > -# void sha512_x4_avx2(void *STATE, const int INP_SIZE) > -# arg 1 : STATE : pointer to input data > -# arg 2 : INP_SIZE : size of data in blocks (assumed >= 1) > -ENTRY(sha512_x4_avx2) > - # general registers preserved in outer calling routine > - # outer calling routine saves all the XMM registers > - # save callee-saved clobbered registers to comply with C function ABI > - push %r12 > - push %r13 > - push %r14 > - push %r15 > - > - sub $STACK_SPACE1, %rsp > - > - # Load the pre-transposed incoming digest. > - vmovdqu 0*SHA512_DIGEST_ROW_SIZE(STATE),a > - vmovdqu 1*SHA512_DIGEST_ROW_SIZE(STATE),b > - vmovdqu 2*SHA512_DIGEST_ROW_SIZE(STATE),c > - vmovdqu 3*SHA512_DIGEST_ROW_SIZE(STATE),d > - vmovdqu 4*SHA512_DIGEST_ROW_SIZE(STATE),e > - vmovdqu 5*SHA512_DIGEST_ROW_SIZE(STATE),f > - vmovdqu 6*SHA512_DIGEST_ROW_SIZE(STATE),g > - vmovdqu 7*SHA512_DIGEST_ROW_SIZE(STATE),h > - > - lea K512_4(%rip),TBL > - > - # load the address of each of the 4 message lanes > - # getting ready to transpose input onto stack > - mov _data_ptr+0*PTR_SZ(STATE),inp0 > - mov _data_ptr+1*PTR_SZ(STATE),inp1 > - mov _data_ptr+2*PTR_SZ(STATE),inp2 > - mov _data_ptr+3*PTR_SZ(STATE),inp3 > - > - xor IDX, IDX > -lloop: > - xor ROUND, ROUND > - > - # save old digest > - vmovdqu a, _digest(%rsp) > - vmovdqu b, _digest+1*SZ4(%rsp) > - vmovdqu c, _digest+2*SZ4(%rsp) > - vmovdqu d, _digest+3*SZ4(%rsp) > - vmovdqu e, _digest+4*SZ4(%rsp) > - vmovdqu f, _digest+5*SZ4(%rsp) > - vmovdqu g, _digest+6*SZ4(%rsp) > - vmovdqu h, _digest+7*SZ4(%rsp) > - i = 0 > -.rep 4 > - vmovdqu PSHUFFLE_BYTE_FLIP_MASK(%rip), TMP > - VMOVPD i*32(inp0, IDX), TT2 > - VMOVPD i*32(inp1, IDX), TT1 > - VMOVPD i*32(inp2, IDX), TT4 > - VMOVPD i*32(inp3, IDX), TT3 > - TRANSPOSE TT2, TT1, TT4, TT3, TT0, TT5 > - vpshufb TMP, TT0, TT0 > - vpshufb TMP, TT1, TT1 > - vpshufb TMP, TT2, TT2 > - vpshufb TMP, TT3, TT3 > - ROUND_00_15 TT0,(i*4+0) > - ROUND_00_15 TT1,(i*4+1) > - ROUND_00_15 TT2,(i*4+2) > - ROUND_00_15 TT3,(i*4+3) > - i = (i+1) > -.endr > - add $128, IDX > - > - i = (i*4) > - > - jmp Lrounds_16_xx > -.align 16 > -Lrounds_16_xx: > -.rep 16 > - ROUND_16_XX T1, i > - i = (i+1) > -.endr > - cmp $0xa00,ROUND > - jb Lrounds_16_xx > - > - # add old digest > - vpaddq _digest(%rsp), a, a > - vpaddq _digest+1*SZ4(%rsp), b, b > - vpaddq _digest+2*SZ4(%rsp), c, c > - vpaddq _digest+3*SZ4(%rsp), d, d > - vpaddq _digest+4*SZ4(%rsp), e, e > - vpaddq _digest+5*SZ4(%rsp), f, f > - vpaddq _digest+6*SZ4(%rsp), g, g > - vpaddq _digest+7*SZ4(%rsp), h, h > - > - sub $1, INP_SIZE # unit is blocks > - jne lloop > - > - # write back to memory (state object) the transposed digest > - vmovdqu a, 0*SHA512_DIGEST_ROW_SIZE(STATE) > - vmovdqu b, 1*SHA512_DIGEST_ROW_SIZE(STATE) > - vmovdqu c, 2*SHA512_DIGEST_ROW_SIZE(STATE) > - vmovdqu d, 3*SHA512_DIGEST_ROW_SIZE(STATE) > - vmovdqu e, 4*SHA512_DIGEST_ROW_SIZE(STATE) > - vmovdqu f, 5*SHA512_DIGEST_ROW_SIZE(STATE) > - vmovdqu g, 6*SHA512_DIGEST_ROW_SIZE(STATE) > - vmovdqu h, 7*SHA512_DIGEST_ROW_SIZE(STATE) > - > - # update input data pointers > - add IDX, inp0 > - mov inp0, _data_ptr+0*PTR_SZ(STATE) > - add IDX, inp1 > - mov inp1, _data_ptr+1*PTR_SZ(STATE) > - add IDX, inp2 > - mov inp2, _data_ptr+2*PTR_SZ(STATE) > - add IDX, inp3 > - mov inp3, _data_ptr+3*PTR_SZ(STATE) > - > - #;;;;;;;;;;;;;;; > - #; Postamble > - add $STACK_SPACE1, %rsp > - # restore callee-saved clobbered registers > - > - pop %r15 > - pop %r14 > - pop %r13 > - pop %r12 > - > - # outer calling routine restores XMM and other GP registers > - ret > -ENDPROC(sha512_x4_avx2) > - > -.section .rodata.K512_4, "a", @progbits > -.align 64 > -K512_4: > - .octa 0x428a2f98d728ae22428a2f98d728ae22,\ > - 0x428a2f98d728ae22428a2f98d728ae22 > - .octa 0x7137449123ef65cd7137449123ef65cd,\ > - 0x7137449123ef65cd7137449123ef65cd > - .octa 0xb5c0fbcfec4d3b2fb5c0fbcfec4d3b2f,\ > - 0xb5c0fbcfec4d3b2fb5c0fbcfec4d3b2f > - .octa 0xe9b5dba58189dbbce9b5dba58189dbbc,\ > - 0xe9b5dba58189dbbce9b5dba58189dbbc > - .octa 0x3956c25bf348b5383956c25bf348b538,\ > - 0x3956c25bf348b5383956c25bf348b538 > - .octa 0x59f111f1b605d01959f111f1b605d019,\ > - 0x59f111f1b605d01959f111f1b605d019 > - .octa 0x923f82a4af194f9b923f82a4af194f9b,\ > - 0x923f82a4af194f9b923f82a4af194f9b > - .octa 0xab1c5ed5da6d8118ab1c5ed5da6d8118,\ > - 0xab1c5ed5da6d8118ab1c5ed5da6d8118 > - .octa 0xd807aa98a3030242d807aa98a3030242,\ > - 0xd807aa98a3030242d807aa98a3030242 > - .octa 0x12835b0145706fbe12835b0145706fbe,\ > - 0x12835b0145706fbe12835b0145706fbe > - .octa 0x243185be4ee4b28c243185be4ee4b28c,\ > - 0x243185be4ee4b28c243185be4ee4b28c > - .octa 0x550c7dc3d5ffb4e2550c7dc3d5ffb4e2,\ > - 0x550c7dc3d5ffb4e2550c7dc3d5ffb4e2 > - .octa 0x72be5d74f27b896f72be5d74f27b896f,\ > - 0x72be5d74f27b896f72be5d74f27b896f > - .octa 0x80deb1fe3b1696b180deb1fe3b1696b1,\ > - 0x80deb1fe3b1696b180deb1fe3b1696b1 > - .octa 0x9bdc06a725c712359bdc06a725c71235,\ > - 0x9bdc06a725c712359bdc06a725c71235 > - .octa 0xc19bf174cf692694c19bf174cf692694,\ > - 0xc19bf174cf692694c19bf174cf692694 > - .octa 0xe49b69c19ef14ad2e49b69c19ef14ad2,\ > - 0xe49b69c19ef14ad2e49b69c19ef14ad2 > - .octa 0xefbe4786384f25e3efbe4786384f25e3,\ > - 0xefbe4786384f25e3efbe4786384f25e3 > - .octa 0x0fc19dc68b8cd5b50fc19dc68b8cd5b5,\ > - 0x0fc19dc68b8cd5b50fc19dc68b8cd5b5 > - .octa 0x240ca1cc77ac9c65240ca1cc77ac9c65,\ > - 0x240ca1cc77ac9c65240ca1cc77ac9c65 > - .octa 0x2de92c6f592b02752de92c6f592b0275,\ > - 0x2de92c6f592b02752de92c6f592b0275 > - .octa 0x4a7484aa6ea6e4834a7484aa6ea6e483,\ > - 0x4a7484aa6ea6e4834a7484aa6ea6e483 > - .octa 0x5cb0a9dcbd41fbd45cb0a9dcbd41fbd4,\ > - 0x5cb0a9dcbd41fbd45cb0a9dcbd41fbd4 > - .octa 0x76f988da831153b576f988da831153b5,\ > - 0x76f988da831153b576f988da831153b5 > - .octa 0x983e5152ee66dfab983e5152ee66dfab,\ > - 0x983e5152ee66dfab983e5152ee66dfab > - .octa 0xa831c66d2db43210a831c66d2db43210,\ > - 0xa831c66d2db43210a831c66d2db43210 > - .octa 0xb00327c898fb213fb00327c898fb213f,\ > - 0xb00327c898fb213fb00327c898fb213f > - .octa 0xbf597fc7beef0ee4bf597fc7beef0ee4,\ > - 0xbf597fc7beef0ee4bf597fc7beef0ee4 > - .octa 0xc6e00bf33da88fc2c6e00bf33da88fc2,\ > - 0xc6e00bf33da88fc2c6e00bf33da88fc2 > - .octa 0xd5a79147930aa725d5a79147930aa725,\ > - 0xd5a79147930aa725d5a79147930aa725 > - .octa 0x06ca6351e003826f06ca6351e003826f,\ > - 0x06ca6351e003826f06ca6351e003826f > - .octa 0x142929670a0e6e70142929670a0e6e70,\ > - 0x142929670a0e6e70142929670a0e6e70 > - .octa 0x27b70a8546d22ffc27b70a8546d22ffc,\ > - 0x27b70a8546d22ffc27b70a8546d22ffc > - .octa 0x2e1b21385c26c9262e1b21385c26c926,\ > - 0x2e1b21385c26c9262e1b21385c26c926 > - .octa 0x4d2c6dfc5ac42aed4d2c6dfc5ac42aed,\ > - 0x4d2c6dfc5ac42aed4d2c6dfc5ac42aed > - .octa 0x53380d139d95b3df53380d139d95b3df,\ > - 0x53380d139d95b3df53380d139d95b3df > - .octa 0x650a73548baf63de650a73548baf63de,\ > - 0x650a73548baf63de650a73548baf63de > - .octa 0x766a0abb3c77b2a8766a0abb3c77b2a8,\ > - 0x766a0abb3c77b2a8766a0abb3c77b2a8 > - .octa 0x81c2c92e47edaee681c2c92e47edaee6,\ > - 0x81c2c92e47edaee681c2c92e47edaee6 > - .octa 0x92722c851482353b92722c851482353b,\ > - 0x92722c851482353b92722c851482353b > - .octa 0xa2bfe8a14cf10364a2bfe8a14cf10364,\ > - 0xa2bfe8a14cf10364a2bfe8a14cf10364 > - .octa 0xa81a664bbc423001a81a664bbc423001,\ > - 0xa81a664bbc423001a81a664bbc423001 > - .octa 0xc24b8b70d0f89791c24b8b70d0f89791,\ > - 0xc24b8b70d0f89791c24b8b70d0f89791 > - .octa 0xc76c51a30654be30c76c51a30654be30,\ > - 0xc76c51a30654be30c76c51a30654be30 > - .octa 0xd192e819d6ef5218d192e819d6ef5218,\ > - 0xd192e819d6ef5218d192e819d6ef5218 > - .octa 0xd69906245565a910d69906245565a910,\ > - 0xd69906245565a910d69906245565a910 > - .octa 0xf40e35855771202af40e35855771202a,\ > - 0xf40e35855771202af40e35855771202a > - .octa 0x106aa07032bbd1b8106aa07032bbd1b8,\ > - 0x106aa07032bbd1b8106aa07032bbd1b8 > - .octa 0x19a4c116b8d2d0c819a4c116b8d2d0c8,\ > - 0x19a4c116b8d2d0c819a4c116b8d2d0c8 > - .octa 0x1e376c085141ab531e376c085141ab53,\ > - 0x1e376c085141ab531e376c085141ab53 > - .octa 0x2748774cdf8eeb992748774cdf8eeb99,\ > - 0x2748774cdf8eeb992748774cdf8eeb99 > - .octa 0x34b0bcb5e19b48a834b0bcb5e19b48a8,\ > - 0x34b0bcb5e19b48a834b0bcb5e19b48a8 > - .octa 0x391c0cb3c5c95a63391c0cb3c5c95a63,\ > - 0x391c0cb3c5c95a63391c0cb3c5c95a63 > - .octa 0x4ed8aa4ae3418acb4ed8aa4ae3418acb,\ > - 0x4ed8aa4ae3418acb4ed8aa4ae3418acb > - .octa 0x5b9cca4f7763e3735b9cca4f7763e373,\ > - 0x5b9cca4f7763e3735b9cca4f7763e373 > - .octa 0x682e6ff3d6b2b8a3682e6ff3d6b2b8a3,\ > - 0x682e6ff3d6b2b8a3682e6ff3d6b2b8a3 > - .octa 0x748f82ee5defb2fc748f82ee5defb2fc,\ > - 0x748f82ee5defb2fc748f82ee5defb2fc > - .octa 0x78a5636f43172f6078a5636f43172f60,\ > - 0x78a5636f43172f6078a5636f43172f60 > - .octa 0x84c87814a1f0ab7284c87814a1f0ab72,\ > - 0x84c87814a1f0ab7284c87814a1f0ab72 > - .octa 0x8cc702081a6439ec8cc702081a6439ec,\ > - 0x8cc702081a6439ec8cc702081a6439ec > - .octa 0x90befffa23631e2890befffa23631e28,\ > - 0x90befffa23631e2890befffa23631e28 > - .octa 0xa4506cebde82bde9a4506cebde82bde9,\ > - 0xa4506cebde82bde9a4506cebde82bde9 > - .octa 0xbef9a3f7b2c67915bef9a3f7b2c67915,\ > - 0xbef9a3f7b2c67915bef9a3f7b2c67915 > - .octa 0xc67178f2e372532bc67178f2e372532b,\ > - 0xc67178f2e372532bc67178f2e372532b > - .octa 0xca273eceea26619cca273eceea26619c,\ > - 0xca273eceea26619cca273eceea26619c > - .octa 0xd186b8c721c0c207d186b8c721c0c207,\ > - 0xd186b8c721c0c207d186b8c721c0c207 > - .octa 0xeada7dd6cde0eb1eeada7dd6cde0eb1e,\ > - 0xeada7dd6cde0eb1eeada7dd6cde0eb1e > - .octa 0xf57d4f7fee6ed178f57d4f7fee6ed178,\ > - 0xf57d4f7fee6ed178f57d4f7fee6ed178 > - .octa 0x06f067aa72176fba06f067aa72176fba,\ > - 0x06f067aa72176fba06f067aa72176fba > - .octa 0x0a637dc5a2c898a60a637dc5a2c898a6,\ > - 0x0a637dc5a2c898a60a637dc5a2c898a6 > - .octa 0x113f9804bef90dae113f9804bef90dae,\ > - 0x113f9804bef90dae113f9804bef90dae > - .octa 0x1b710b35131c471b1b710b35131c471b,\ > - 0x1b710b35131c471b1b710b35131c471b > - .octa 0x28db77f523047d8428db77f523047d84,\ > - 0x28db77f523047d8428db77f523047d84 > - .octa 0x32caab7b40c7249332caab7b40c72493,\ > - 0x32caab7b40c7249332caab7b40c72493 > - .octa 0x3c9ebe0a15c9bebc3c9ebe0a15c9bebc,\ > - 0x3c9ebe0a15c9bebc3c9ebe0a15c9bebc > - .octa 0x431d67c49c100d4c431d67c49c100d4c,\ > - 0x431d67c49c100d4c431d67c49c100d4c > - .octa 0x4cc5d4becb3e42b64cc5d4becb3e42b6,\ > - 0x4cc5d4becb3e42b64cc5d4becb3e42b6 > - .octa 0x597f299cfc657e2a597f299cfc657e2a,\ > - 0x597f299cfc657e2a597f299cfc657e2a > - .octa 0x5fcb6fab3ad6faec5fcb6fab3ad6faec,\ > - 0x5fcb6fab3ad6faec5fcb6fab3ad6faec > - .octa 0x6c44198c4a4758176c44198c4a475817,\ > - 0x6c44198c4a4758176c44198c4a475817 > - > -.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32 > -.align 32 > -PSHUFFLE_BYTE_FLIP_MASK: .octa 0x08090a0b0c0d0e0f0001020304050607 > - .octa 0x18191a1b1c1d1e1f1011121314151617 > diff --git a/crypto/Kconfig b/crypto/Kconfig > index f3e40ac56d93..4ee600bdefdb 100644 > --- a/crypto/Kconfig > +++ b/crypto/Kconfig > @@ -213,20 +213,6 @@ config CRYPTO_CRYPTD > converts an arbitrary synchronous software crypto algorithm > into an asynchronous algorithm that executes in a kernel thread. > > -config CRYPTO_MCRYPTD > - tristate "Software async multi-buffer crypto daemon" > - select CRYPTO_BLKCIPHER > - select CRYPTO_HASH > - select CRYPTO_MANAGER > - select CRYPTO_WORKQUEUE > - help > - This is a generic software asynchronous crypto daemon that > - provides the kernel thread to assist multi-buffer crypto > - algorithms for submitting jobs and flushing jobs in multi-buffer > - crypto algorithms. Multi-buffer crypto algorithms are executed > - in the context of this kernel thread and drivers can post > - their crypto request asynchronously to be processed by this daemon. > - > config CRYPTO_AUTHENC > tristate "Authenc support" > select CRYPTO_AEAD > @@ -848,54 +834,6 @@ config CRYPTO_SHA1_PPC_SPE > SHA-1 secure hash standard (DFIPS 180-4) implemented > using powerpc SPE SIMD instruction set. > > -config CRYPTO_SHA1_MB > - tristate "SHA1 digest algorithm (x86_64 Multi-Buffer, Experimental)" > - depends on X86 && 64BIT > - select CRYPTO_SHA1 > - select CRYPTO_HASH > - select CRYPTO_MCRYPTD > - help > - SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented > - using multi-buffer technique. This algorithm computes on > - multiple data lanes concurrently with SIMD instructions for > - better throughput. It should not be enabled by default but > - used when there is significant amount of work to keep the keep > - the data lanes filled to get performance benefit. If the data > - lanes remain unfilled, a flush operation will be initiated to > - process the crypto jobs, adding a slight latency. > - > -config CRYPTO_SHA256_MB > - tristate "SHA256 digest algorithm (x86_64 Multi-Buffer, Experimental)" > - depends on X86 && 64BIT > - select CRYPTO_SHA256 > - select CRYPTO_HASH > - select CRYPTO_MCRYPTD > - help > - SHA-256 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented > - using multi-buffer technique. This algorithm computes on > - multiple data lanes concurrently with SIMD instructions for > - better throughput. It should not be enabled by default but > - used when there is significant amount of work to keep the keep > - the data lanes filled to get performance benefit. If the data > - lanes remain unfilled, a flush operation will be initiated to > - process the crypto jobs, adding a slight latency. > - > -config CRYPTO_SHA512_MB > - tristate "SHA512 digest algorithm (x86_64 Multi-Buffer, Experimental)" > - depends on X86 && 64BIT > - select CRYPTO_SHA512 > - select CRYPTO_HASH > - select CRYPTO_MCRYPTD > - help > - SHA-512 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented > - using multi-buffer technique. This algorithm computes on > - multiple data lanes concurrently with SIMD instructions for > - better throughput. It should not be enabled by default but > - used when there is significant amount of work to keep the keep > - the data lanes filled to get performance benefit. If the data > - lanes remain unfilled, a flush operation will be initiated to > - process the crypto jobs, adding a slight latency. > - > config CRYPTO_SHA256 > tristate "SHA224 and SHA256 digest algorithm" > select CRYPTO_HASH > diff --git a/crypto/Makefile b/crypto/Makefile > index 6d1d40eeb964..80e3da755cbf 100644 > --- a/crypto/Makefile > +++ b/crypto/Makefile > @@ -93,7 +93,6 @@ obj-$(CONFIG_CRYPTO_MORUS640) += morus640.o > obj-$(CONFIG_CRYPTO_MORUS1280) += morus1280.o > obj-$(CONFIG_CRYPTO_PCRYPT) += pcrypt.o > obj-$(CONFIG_CRYPTO_CRYPTD) += cryptd.o > -obj-$(CONFIG_CRYPTO_MCRYPTD) += mcryptd.o > obj-$(CONFIG_CRYPTO_DES) += des_generic.o > obj-$(CONFIG_CRYPTO_FCRYPT) += fcrypt.o > obj-$(CONFIG_CRYPTO_BLOWFISH) += blowfish_generic.o > diff --git a/crypto/mcryptd.c b/crypto/mcryptd.c > deleted file mode 100644 > index f14152147ce8..000000000000 > --- a/crypto/mcryptd.c > +++ /dev/null > @@ -1,675 +0,0 @@ > -/* > - * Software multibuffer async crypto daemon. > - * > - * Copyright (c) 2014 Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx> > - * > - * Adapted from crypto daemon. > - * > - * This program is free software; you can redistribute it and/or modify it > - * under the terms of the GNU General Public License as published by the Free > - * Software Foundation; either version 2 of the License, or (at your option) > - * any later version. > - * > - */ > - > -#include <crypto/algapi.h> > -#include <crypto/internal/hash.h> > -#include <crypto/internal/aead.h> > -#include <crypto/mcryptd.h> > -#include <crypto/crypto_wq.h> > -#include <linux/err.h> > -#include <linux/init.h> > -#include <linux/kernel.h> > -#include <linux/list.h> > -#include <linux/module.h> > -#include <linux/scatterlist.h> > -#include <linux/sched.h> > -#include <linux/sched/stat.h> > -#include <linux/slab.h> > - > -#define MCRYPTD_MAX_CPU_QLEN 100 > -#define MCRYPTD_BATCH 9 > - > -static void *mcryptd_alloc_instance(struct crypto_alg *alg, unsigned int head, > - unsigned int tail); > - > -struct mcryptd_flush_list { > - struct list_head list; > - struct mutex lock; > -}; > - > -static struct mcryptd_flush_list __percpu *mcryptd_flist; > - > -struct hashd_instance_ctx { > - struct crypto_ahash_spawn spawn; > - struct mcryptd_queue *queue; > -}; > - > -static void mcryptd_queue_worker(struct work_struct *work); > - > -void mcryptd_arm_flusher(struct mcryptd_alg_cstate *cstate, unsigned long delay) > -{ > - struct mcryptd_flush_list *flist; > - > - if (!cstate->flusher_engaged) { > - /* put the flusher on the flush list */ > - flist = per_cpu_ptr(mcryptd_flist, smp_processor_id()); > - mutex_lock(&flist->lock); > - list_add_tail(&cstate->flush_list, &flist->list); > - cstate->flusher_engaged = true; > - cstate->next_flush = jiffies + delay; > - queue_delayed_work_on(smp_processor_id(), kcrypto_wq, > - &cstate->flush, delay); > - mutex_unlock(&flist->lock); > - } > -} > -EXPORT_SYMBOL(mcryptd_arm_flusher); > - > -static int mcryptd_init_queue(struct mcryptd_queue *queue, > - unsigned int max_cpu_qlen) > -{ > - int cpu; > - struct mcryptd_cpu_queue *cpu_queue; > - > - queue->cpu_queue = alloc_percpu(struct mcryptd_cpu_queue); > - pr_debug("mqueue:%p mcryptd_cpu_queue %p\n", queue, queue->cpu_queue); > - if (!queue->cpu_queue) > - return -ENOMEM; > - for_each_possible_cpu(cpu) { > - cpu_queue = per_cpu_ptr(queue->cpu_queue, cpu); > - pr_debug("cpu_queue #%d %p\n", cpu, queue->cpu_queue); > - crypto_init_queue(&cpu_queue->queue, max_cpu_qlen); > - INIT_WORK(&cpu_queue->work, mcryptd_queue_worker); > - spin_lock_init(&cpu_queue->q_lock); > - } > - return 0; > -} > - > -static void mcryptd_fini_queue(struct mcryptd_queue *queue) > -{ > - int cpu; > - struct mcryptd_cpu_queue *cpu_queue; > - > - for_each_possible_cpu(cpu) { > - cpu_queue = per_cpu_ptr(queue->cpu_queue, cpu); > - BUG_ON(cpu_queue->queue.qlen); > - } > - free_percpu(queue->cpu_queue); > -} > - > -static int mcryptd_enqueue_request(struct mcryptd_queue *queue, > - struct crypto_async_request *request, > - struct mcryptd_hash_request_ctx *rctx) > -{ > - int cpu, err; > - struct mcryptd_cpu_queue *cpu_queue; > - > - cpu_queue = raw_cpu_ptr(queue->cpu_queue); > - spin_lock(&cpu_queue->q_lock); > - cpu = smp_processor_id(); > - rctx->tag.cpu = smp_processor_id(); > - > - err = crypto_enqueue_request(&cpu_queue->queue, request); > - pr_debug("enqueue request: cpu %d cpu_queue %p request %p\n", > - cpu, cpu_queue, request); > - spin_unlock(&cpu_queue->q_lock); > - queue_work_on(cpu, kcrypto_wq, &cpu_queue->work); > - > - return err; > -} > - > -/* > - * Try to opportunisticlly flush the partially completed jobs if > - * crypto daemon is the only task running. > - */ > -static void mcryptd_opportunistic_flush(void) > -{ > - struct mcryptd_flush_list *flist; > - struct mcryptd_alg_cstate *cstate; > - > - flist = per_cpu_ptr(mcryptd_flist, smp_processor_id()); > - while (single_task_running()) { > - mutex_lock(&flist->lock); > - cstate = list_first_entry_or_null(&flist->list, > - struct mcryptd_alg_cstate, flush_list); > - if (!cstate || !cstate->flusher_engaged) { > - mutex_unlock(&flist->lock); > - return; > - } > - list_del(&cstate->flush_list); > - cstate->flusher_engaged = false; > - mutex_unlock(&flist->lock); > - cstate->alg_state->flusher(cstate); > - } > -} > - > -/* > - * Called in workqueue context, do one real cryption work (via > - * req->complete) and reschedule itself if there are more work to > - * do. > - */ > -static void mcryptd_queue_worker(struct work_struct *work) > -{ > - struct mcryptd_cpu_queue *cpu_queue; > - struct crypto_async_request *req, *backlog; > - int i; > - > - /* > - * Need to loop through more than once for multi-buffer to > - * be effective. > - */ > - > - cpu_queue = container_of(work, struct mcryptd_cpu_queue, work); > - i = 0; > - while (i < MCRYPTD_BATCH || single_task_running()) { > - > - spin_lock_bh(&cpu_queue->q_lock); > - backlog = crypto_get_backlog(&cpu_queue->queue); > - req = crypto_dequeue_request(&cpu_queue->queue); > - spin_unlock_bh(&cpu_queue->q_lock); > - > - if (!req) { > - mcryptd_opportunistic_flush(); > - return; > - } > - > - if (backlog) > - backlog->complete(backlog, -EINPROGRESS); > - req->complete(req, 0); > - if (!cpu_queue->queue.qlen) > - return; > - ++i; > - } > - if (cpu_queue->queue.qlen) > - queue_work_on(smp_processor_id(), kcrypto_wq, &cpu_queue->work); > -} > - > -void mcryptd_flusher(struct work_struct *__work) > -{ > - struct mcryptd_alg_cstate *alg_cpu_state; > - struct mcryptd_alg_state *alg_state; > - struct mcryptd_flush_list *flist; > - int cpu; > - > - cpu = smp_processor_id(); > - alg_cpu_state = container_of(to_delayed_work(__work), > - struct mcryptd_alg_cstate, flush); > - alg_state = alg_cpu_state->alg_state; > - if (alg_cpu_state->cpu != cpu) > - pr_debug("mcryptd error: work on cpu %d, should be cpu %d\n", > - cpu, alg_cpu_state->cpu); > - > - if (alg_cpu_state->flusher_engaged) { > - flist = per_cpu_ptr(mcryptd_flist, cpu); > - mutex_lock(&flist->lock); > - list_del(&alg_cpu_state->flush_list); > - alg_cpu_state->flusher_engaged = false; > - mutex_unlock(&flist->lock); > - alg_state->flusher(alg_cpu_state); > - } > -} > -EXPORT_SYMBOL_GPL(mcryptd_flusher); > - > -static inline struct mcryptd_queue *mcryptd_get_queue(struct crypto_tfm *tfm) > -{ > - struct crypto_instance *inst = crypto_tfm_alg_instance(tfm); > - struct mcryptd_instance_ctx *ictx = crypto_instance_ctx(inst); > - > - return ictx->queue; > -} > - > -static void *mcryptd_alloc_instance(struct crypto_alg *alg, unsigned int head, > - unsigned int tail) > -{ > - char *p; > - struct crypto_instance *inst; > - int err; > - > - p = kzalloc(head + sizeof(*inst) + tail, GFP_KERNEL); > - if (!p) > - return ERR_PTR(-ENOMEM); > - > - inst = (void *)(p + head); > - > - err = -ENAMETOOLONG; > - if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME, > - "mcryptd(%s)", alg->cra_driver_name) >= CRYPTO_MAX_ALG_NAME) > - goto out_free_inst; > - > - memcpy(inst->alg.cra_name, alg->cra_name, CRYPTO_MAX_ALG_NAME); > - > - inst->alg.cra_priority = alg->cra_priority + 50; > - inst->alg.cra_blocksize = alg->cra_blocksize; > - inst->alg.cra_alignmask = alg->cra_alignmask; > - > -out: > - return p; > - > -out_free_inst: > - kfree(p); > - p = ERR_PTR(err); > - goto out; > -} > - > -static inline bool mcryptd_check_internal(struct rtattr **tb, u32 *type, > - u32 *mask) > -{ > - struct crypto_attr_type *algt; > - > - algt = crypto_get_attr_type(tb); > - if (IS_ERR(algt)) > - return false; > - > - *type |= algt->type & CRYPTO_ALG_INTERNAL; > - *mask |= algt->mask & CRYPTO_ALG_INTERNAL; > - > - if (*type & *mask & CRYPTO_ALG_INTERNAL) > - return true; > - else > - return false; > -} > - > -static int mcryptd_hash_init_tfm(struct crypto_tfm *tfm) > -{ > - struct crypto_instance *inst = crypto_tfm_alg_instance(tfm); > - struct hashd_instance_ctx *ictx = crypto_instance_ctx(inst); > - struct crypto_ahash_spawn *spawn = &ictx->spawn; > - struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(tfm); > - struct crypto_ahash *hash; > - > - hash = crypto_spawn_ahash(spawn); > - if (IS_ERR(hash)) > - return PTR_ERR(hash); > - > - ctx->child = hash; > - crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), > - sizeof(struct mcryptd_hash_request_ctx) + > - crypto_ahash_reqsize(hash)); > - return 0; > -} > - > -static void mcryptd_hash_exit_tfm(struct crypto_tfm *tfm) > -{ > - struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(tfm); > - > - crypto_free_ahash(ctx->child); > -} > - > -static int mcryptd_hash_setkey(struct crypto_ahash *parent, > - const u8 *key, unsigned int keylen) > -{ > - struct mcryptd_hash_ctx *ctx = crypto_ahash_ctx(parent); > - struct crypto_ahash *child = ctx->child; > - int err; > - > - crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK); > - crypto_ahash_set_flags(child, crypto_ahash_get_flags(parent) & > - CRYPTO_TFM_REQ_MASK); > - err = crypto_ahash_setkey(child, key, keylen); > - crypto_ahash_set_flags(parent, crypto_ahash_get_flags(child) & > - CRYPTO_TFM_RES_MASK); > - return err; > -} > - > -static int mcryptd_hash_enqueue(struct ahash_request *req, > - crypto_completion_t complete) > -{ > - int ret; > - > - struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); > - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); > - struct mcryptd_queue *queue = > - mcryptd_get_queue(crypto_ahash_tfm(tfm)); > - > - rctx->complete = req->base.complete; > - req->base.complete = complete; > - > - ret = mcryptd_enqueue_request(queue, &req->base, rctx); > - > - return ret; > -} > - > -static void mcryptd_hash_init(struct crypto_async_request *req_async, int err) > -{ > - struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(req_async->tfm); > - struct crypto_ahash *child = ctx->child; > - struct ahash_request *req = ahash_request_cast(req_async); > - struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); > - struct ahash_request *desc = &rctx->areq; > - > - if (unlikely(err == -EINPROGRESS)) > - goto out; > - > - ahash_request_set_tfm(desc, child); > - ahash_request_set_callback(desc, CRYPTO_TFM_REQ_MAY_SLEEP, > - rctx->complete, req_async); > - > - rctx->out = req->result; > - err = crypto_ahash_init(desc); > - > -out: > - local_bh_disable(); > - rctx->complete(&req->base, err); > - local_bh_enable(); > -} > - > -static int mcryptd_hash_init_enqueue(struct ahash_request *req) > -{ > - return mcryptd_hash_enqueue(req, mcryptd_hash_init); > -} > - > -static void mcryptd_hash_update(struct crypto_async_request *req_async, int err) > -{ > - struct ahash_request *req = ahash_request_cast(req_async); > - struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); > - > - if (unlikely(err == -EINPROGRESS)) > - goto out; > - > - rctx->out = req->result; > - err = crypto_ahash_update(&rctx->areq); > - if (err) { > - req->base.complete = rctx->complete; > - goto out; > - } > - > - return; > -out: > - local_bh_disable(); > - rctx->complete(&req->base, err); > - local_bh_enable(); > -} > - > -static int mcryptd_hash_update_enqueue(struct ahash_request *req) > -{ > - return mcryptd_hash_enqueue(req, mcryptd_hash_update); > -} > - > -static void mcryptd_hash_final(struct crypto_async_request *req_async, int err) > -{ > - struct ahash_request *req = ahash_request_cast(req_async); > - struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); > - > - if (unlikely(err == -EINPROGRESS)) > - goto out; > - > - rctx->out = req->result; > - err = crypto_ahash_final(&rctx->areq); > - if (err) { > - req->base.complete = rctx->complete; > - goto out; > - } > - > - return; > -out: > - local_bh_disable(); > - rctx->complete(&req->base, err); > - local_bh_enable(); > -} > - > -static int mcryptd_hash_final_enqueue(struct ahash_request *req) > -{ > - return mcryptd_hash_enqueue(req, mcryptd_hash_final); > -} > - > -static void mcryptd_hash_finup(struct crypto_async_request *req_async, int err) > -{ > - struct ahash_request *req = ahash_request_cast(req_async); > - struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); > - > - if (unlikely(err == -EINPROGRESS)) > - goto out; > - rctx->out = req->result; > - err = crypto_ahash_finup(&rctx->areq); > - > - if (err) { > - req->base.complete = rctx->complete; > - goto out; > - } > - > - return; > -out: > - local_bh_disable(); > - rctx->complete(&req->base, err); > - local_bh_enable(); > -} > - > -static int mcryptd_hash_finup_enqueue(struct ahash_request *req) > -{ > - return mcryptd_hash_enqueue(req, mcryptd_hash_finup); > -} > - > -static void mcryptd_hash_digest(struct crypto_async_request *req_async, int err) > -{ > - struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(req_async->tfm); > - struct crypto_ahash *child = ctx->child; > - struct ahash_request *req = ahash_request_cast(req_async); > - struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); > - struct ahash_request *desc = &rctx->areq; > - > - if (unlikely(err == -EINPROGRESS)) > - goto out; > - > - ahash_request_set_tfm(desc, child); > - ahash_request_set_callback(desc, CRYPTO_TFM_REQ_MAY_SLEEP, > - rctx->complete, req_async); > - > - rctx->out = req->result; > - err = crypto_ahash_init(desc) ?: crypto_ahash_finup(desc); > - > -out: > - local_bh_disable(); > - rctx->complete(&req->base, err); > - local_bh_enable(); > -} > - > -static int mcryptd_hash_digest_enqueue(struct ahash_request *req) > -{ > - return mcryptd_hash_enqueue(req, mcryptd_hash_digest); > -} > - > -static int mcryptd_hash_export(struct ahash_request *req, void *out) > -{ > - struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); > - > - return crypto_ahash_export(&rctx->areq, out); > -} > - > -static int mcryptd_hash_import(struct ahash_request *req, const void *in) > -{ > - struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); > - > - return crypto_ahash_import(&rctx->areq, in); > -} > - > -static int mcryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb, > - struct mcryptd_queue *queue) > -{ > - struct hashd_instance_ctx *ctx; > - struct ahash_instance *inst; > - struct hash_alg_common *halg; > - struct crypto_alg *alg; > - u32 type = 0; > - u32 mask = 0; > - int err; > - > - if (!mcryptd_check_internal(tb, &type, &mask)) > - return -EINVAL; > - > - halg = ahash_attr_alg(tb[1], type, mask); > - if (IS_ERR(halg)) > - return PTR_ERR(halg); > - > - alg = &halg->base; > - pr_debug("crypto: mcryptd hash alg: %s\n", alg->cra_name); > - inst = mcryptd_alloc_instance(alg, ahash_instance_headroom(), > - sizeof(*ctx)); > - err = PTR_ERR(inst); > - if (IS_ERR(inst)) > - goto out_put_alg; > - > - ctx = ahash_instance_ctx(inst); > - ctx->queue = queue; > - > - err = crypto_init_ahash_spawn(&ctx->spawn, halg, > - ahash_crypto_instance(inst)); > - if (err) > - goto out_free_inst; > - > - inst->alg.halg.base.cra_flags = CRYPTO_ALG_ASYNC | > - (alg->cra_flags & (CRYPTO_ALG_INTERNAL | > - CRYPTO_ALG_OPTIONAL_KEY)); > - > - inst->alg.halg.digestsize = halg->digestsize; > - inst->alg.halg.statesize = halg->statesize; > - inst->alg.halg.base.cra_ctxsize = sizeof(struct mcryptd_hash_ctx); > - > - inst->alg.halg.base.cra_init = mcryptd_hash_init_tfm; > - inst->alg.halg.base.cra_exit = mcryptd_hash_exit_tfm; > - > - inst->alg.init = mcryptd_hash_init_enqueue; > - inst->alg.update = mcryptd_hash_update_enqueue; > - inst->alg.final = mcryptd_hash_final_enqueue; > - inst->alg.finup = mcryptd_hash_finup_enqueue; > - inst->alg.export = mcryptd_hash_export; > - inst->alg.import = mcryptd_hash_import; > - if (crypto_hash_alg_has_setkey(halg)) > - inst->alg.setkey = mcryptd_hash_setkey; > - inst->alg.digest = mcryptd_hash_digest_enqueue; > - > - err = ahash_register_instance(tmpl, inst); > - if (err) { > - crypto_drop_ahash(&ctx->spawn); > -out_free_inst: > - kfree(inst); > - } > - > -out_put_alg: > - crypto_mod_put(alg); > - return err; > -} > - > -static struct mcryptd_queue mqueue; > - > -static int mcryptd_create(struct crypto_template *tmpl, struct rtattr **tb) > -{ > - struct crypto_attr_type *algt; > - > - algt = crypto_get_attr_type(tb); > - if (IS_ERR(algt)) > - return PTR_ERR(algt); > - > - switch (algt->type & algt->mask & CRYPTO_ALG_TYPE_MASK) { > - case CRYPTO_ALG_TYPE_DIGEST: > - return mcryptd_create_hash(tmpl, tb, &mqueue); > - break; > - } > - > - return -EINVAL; > -} > - > -static void mcryptd_free(struct crypto_instance *inst) > -{ > - struct mcryptd_instance_ctx *ctx = crypto_instance_ctx(inst); > - struct hashd_instance_ctx *hctx = crypto_instance_ctx(inst); > - > - switch (inst->alg.cra_flags & CRYPTO_ALG_TYPE_MASK) { > - case CRYPTO_ALG_TYPE_AHASH: > - crypto_drop_ahash(&hctx->spawn); > - kfree(ahash_instance(inst)); > - return; > - default: > - crypto_drop_spawn(&ctx->spawn); > - kfree(inst); > - } > -} > - > -static struct crypto_template mcryptd_tmpl = { > - .name = "mcryptd", > - .create = mcryptd_create, > - .free = mcryptd_free, > - .module = THIS_MODULE, > -}; > - > -struct mcryptd_ahash *mcryptd_alloc_ahash(const char *alg_name, > - u32 type, u32 mask) > -{ > - char mcryptd_alg_name[CRYPTO_MAX_ALG_NAME]; > - struct crypto_ahash *tfm; > - > - if (snprintf(mcryptd_alg_name, CRYPTO_MAX_ALG_NAME, > - "mcryptd(%s)", alg_name) >= CRYPTO_MAX_ALG_NAME) > - return ERR_PTR(-EINVAL); > - tfm = crypto_alloc_ahash(mcryptd_alg_name, type, mask); > - if (IS_ERR(tfm)) > - return ERR_CAST(tfm); > - if (tfm->base.__crt_alg->cra_module != THIS_MODULE) { > - crypto_free_ahash(tfm); > - return ERR_PTR(-EINVAL); > - } > - > - return __mcryptd_ahash_cast(tfm); > -} > -EXPORT_SYMBOL_GPL(mcryptd_alloc_ahash); > - > -struct crypto_ahash *mcryptd_ahash_child(struct mcryptd_ahash *tfm) > -{ > - struct mcryptd_hash_ctx *ctx = crypto_ahash_ctx(&tfm->base); > - > - return ctx->child; > -} > -EXPORT_SYMBOL_GPL(mcryptd_ahash_child); > - > -struct ahash_request *mcryptd_ahash_desc(struct ahash_request *req) > -{ > - struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); > - return &rctx->areq; > -} > -EXPORT_SYMBOL_GPL(mcryptd_ahash_desc); > - > -void mcryptd_free_ahash(struct mcryptd_ahash *tfm) > -{ > - crypto_free_ahash(&tfm->base); > -} > -EXPORT_SYMBOL_GPL(mcryptd_free_ahash); > - > -static int __init mcryptd_init(void) > -{ > - int err, cpu; > - struct mcryptd_flush_list *flist; > - > - mcryptd_flist = alloc_percpu(struct mcryptd_flush_list); > - for_each_possible_cpu(cpu) { > - flist = per_cpu_ptr(mcryptd_flist, cpu); > - INIT_LIST_HEAD(&flist->list); > - mutex_init(&flist->lock); > - } > - > - err = mcryptd_init_queue(&mqueue, MCRYPTD_MAX_CPU_QLEN); > - if (err) { > - free_percpu(mcryptd_flist); > - return err; > - } > - > - err = crypto_register_template(&mcryptd_tmpl); > - if (err) { > - mcryptd_fini_queue(&mqueue); > - free_percpu(mcryptd_flist); > - } > - > - return err; > -} > - > -static void __exit mcryptd_exit(void) > -{ > - mcryptd_fini_queue(&mqueue); > - crypto_unregister_template(&mcryptd_tmpl); > - free_percpu(mcryptd_flist); > -} > - > -subsys_initcall(mcryptd_init); > -module_exit(mcryptd_exit); > - > -MODULE_LICENSE("GPL"); > -MODULE_DESCRIPTION("Software async multibuffer crypto daemon"); > -MODULE_ALIAS_CRYPTO("mcryptd"); > diff --git a/include/crypto/mcryptd.h b/include/crypto/mcryptd.h > deleted file mode 100644 > index b67404fc4b34..000000000000 > --- a/include/crypto/mcryptd.h > +++ /dev/null > @@ -1,114 +0,0 @@ > -/* SPDX-License-Identifier: GPL-2.0 */ > -/* > - * Software async multibuffer crypto daemon headers > - * > - * Author: > - * Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx> > - * > - * Copyright (c) 2014, Intel Corporation. > - */ > - > -#ifndef _CRYPTO_MCRYPT_H > -#define _CRYPTO_MCRYPT_H > - > -#include <linux/crypto.h> > -#include <linux/kernel.h> > -#include <crypto/hash.h> > - > -struct mcryptd_ahash { > - struct crypto_ahash base; > -}; > - > -static inline struct mcryptd_ahash *__mcryptd_ahash_cast( > - struct crypto_ahash *tfm) > -{ > - return (struct mcryptd_ahash *)tfm; > -} > - > -struct mcryptd_cpu_queue { > - struct crypto_queue queue; > - spinlock_t q_lock; > - struct work_struct work; > -}; > - > -struct mcryptd_queue { > - struct mcryptd_cpu_queue __percpu *cpu_queue; > -}; > - > -struct mcryptd_instance_ctx { > - struct crypto_spawn spawn; > - struct mcryptd_queue *queue; > -}; > - > -struct mcryptd_hash_ctx { > - struct crypto_ahash *child; > - struct mcryptd_alg_state *alg_state; > -}; > - > -struct mcryptd_tag { > - /* seq number of request */ > - unsigned seq_num; > - /* arrival time of request */ > - unsigned long arrival; > - unsigned long expire; > - int cpu; > -}; > - > -struct mcryptd_hash_request_ctx { > - struct list_head waiter; > - crypto_completion_t complete; > - struct mcryptd_tag tag; > - struct crypto_hash_walk walk; > - u8 *out; > - int flag; > - struct ahash_request areq; > -}; > - > -struct mcryptd_ahash *mcryptd_alloc_ahash(const char *alg_name, > - u32 type, u32 mask); > -struct crypto_ahash *mcryptd_ahash_child(struct mcryptd_ahash *tfm); > -struct ahash_request *mcryptd_ahash_desc(struct ahash_request *req); > -void mcryptd_free_ahash(struct mcryptd_ahash *tfm); > -void mcryptd_flusher(struct work_struct *work); > - > -enum mcryptd_req_type { > - MCRYPTD_NONE, > - MCRYPTD_UPDATE, > - MCRYPTD_FINUP, > - MCRYPTD_DIGEST, > - MCRYPTD_FINAL > -}; > - > -struct mcryptd_alg_cstate { > - unsigned long next_flush; > - unsigned next_seq_num; > - bool flusher_engaged; > - struct delayed_work flush; > - int cpu; > - struct mcryptd_alg_state *alg_state; > - void *mgr; > - spinlock_t work_lock; > - struct list_head work_list; > - struct list_head flush_list; > -}; > - > -struct mcryptd_alg_state { > - struct mcryptd_alg_cstate __percpu *alg_cstate; > - unsigned long (*flusher)(struct mcryptd_alg_cstate *cstate); > -}; > - > -/* return delay in jiffies from current time */ > -static inline unsigned long get_delay(unsigned long t) > -{ > - long delay; > - > - delay = (long) t - (long) jiffies; > - if (delay <= 0) > - return 0; > - else > - return (unsigned long) delay; > -} > - > -void mcryptd_arm_flusher(struct mcryptd_alg_cstate *cstate, unsigned long delay); > - > -#endif >