Glue code that plugs the PCLMULQDQ accelerated CRC T10 DIF hash into the crypto framework. The config CRYPTO_CRCT10DIF_PCLMUL should be turned on to enable the feature. The crc_t10dif crypto library function will use this faster algorithm when crct10dif_pclmul module is loaded. Signed-off-by: Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx> --- arch/x86/crypto/Makefile | 2 + arch/x86/crypto/crct10dif-pclmul_glue.c | 153 ++++++++++++++++++++++++++++++++ crypto/Kconfig | 21 +++++ 3 files changed, 176 insertions(+) create mode 100644 arch/x86/crypto/crct10dif-pclmul_glue.c diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 03cd731..d544a66 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -27,6 +27,7 @@ obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o +obj-$(CONFIG_CRYPTO_CRCT10DIF_PCLMUL) += crct10dif-pclmul.o # These modules require assembler to support AVX. ifeq ($(avx_supported),yes) @@ -70,3 +71,4 @@ crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o sha256-ssse3-y := sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256_ssse3_glue.o sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o +crct10dif-pclmul-y := crct10dif-pcl-asm_64.o crct10dif-pclmul_glue.o diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c new file mode 100644 index 0000000..e87f8d8 --- /dev/null +++ b/arch/x86/crypto/crct10dif-pclmul_glue.c @@ -0,0 +1,153 @@ +/* + * Cryptographic API. + * + * T10 Data Integrity Field CRC16 Crypto Xform using PCLMULQDQ Instructions + * + * Copyright (C) 2013 Intel Corporation + * Author: Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/types.h> +#include <linux/module.h> +#include <linux/crc-t10dif.h> +#include <crypto/internal/hash.h> +#include <linux/init.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <asm/i387.h> +#include <asm/cpufeature.h> + +asmlinkage __u16 crc_t10dif_pcl(__u16 crc, const unsigned char *buf, + size_t len); + +struct chksum_desc_ctx { + __u16 crc; +}; + +/* + * Steps through buffer one byte at at time, calculates reflected + * crc using table. + */ + +static int chksum_init(struct shash_desc *desc) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + ctx->crc = 0; + + return 0; +} + +static int chksum_update(struct shash_desc *desc, const u8 *data, + unsigned int length) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + if (irq_fpu_usable()) { + kernel_fpu_begin(); + ctx->crc = crc_t10dif_pcl(ctx->crc, data, length); + kernel_fpu_end(); + } else + ctx->crc = crc_t10dif_generic(ctx->crc, data, length); + return 0; +} + +static int chksum_final(struct shash_desc *desc, u8 *out) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + *(__u16 *)out = ctx->crc; + return 0; +} + +static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len, + u8 *out) +{ + if (irq_fpu_usable()) { + kernel_fpu_begin(); + *(__u16 *)out = crc_t10dif_pcl(*crcp, data, len); + kernel_fpu_end(); + } else + *(__u16 *)out = crc_t10dif_generic(*crcp, data, len); + return 0; +} + +static int chksum_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + return __chksum_finup(&ctx->crc, data, len, out); +} + +static int chksum_digest(struct shash_desc *desc, const u8 *data, + unsigned int length, u8 *out) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + return __chksum_finup(&ctx->crc, data, length, out); +} + +static struct shash_alg alg = { + .digestsize = CRC_T10DIF_DIGEST_SIZE, + .init = chksum_init, + .update = chksum_update, + .final = chksum_final, + .finup = chksum_finup, + .digest = chksum_digest, + .descsize = sizeof(struct chksum_desc_ctx), + .base = { + .cra_name = "crct10dif", + .cra_driver_name = "crct10dif-pclmul", + .cra_priority = 200, + .cra_blocksize = CRC_T10DIF_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static int __init crct10dif_intel_mod_init(void) +{ + int ret; + + ret = 0; + if (cpu_has_pclmulqdq) { + ret = crypto_register_shash(&alg); + if (!ret) + crc_t10dif_update_lib(); + } + return ret; +} + +static void __exit crct10dif_intel_mod_fini(void) +{ + /* switch crc_t10dif library back to generic algorithm */ + if (cpu_has_pclmulqdq) { + crypto_unregister_shash(&alg); + crc_t10dif_update_lib(); + } +} + +module_init(crct10dif_intel_mod_init); +module_exit(crct10dif_intel_mod_fini); + +MODULE_AUTHOR("Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx>"); +MODULE_DESCRIPTION("T10 DIF CRC calculation accelerated with PCLMULQDQ."); +MODULE_LICENSE("GPL"); + +MODULE_ALIAS("crct10dif"); +MODULE_ALIAS("crct10dif-pclmul"); diff --git a/crypto/Kconfig b/crypto/Kconfig index 0e7a237..02075ff 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -376,6 +376,27 @@ config CRYPTO_CRC32_PCLMUL which will enable any routine to use the CRC-32-IEEE 802.3 checksum and gain better performance as compared with the table implementation. +config CRYPTO_CRCT10DIF + bool "CRCT10DIF algorithm" + depends on CRC_T10DIF + select CRYPTO_HASH + help + CRC T10 Data Integrity Field computation is being cast as + a crypto transform. This allows for faster crc t10 diff + transforms to be used if they are available. + +config CRYPTO_CRCT10DIF_PCLMUL + tristate "CRCT10DIF PCLMULQDQ hardware acceleration" + depends on X86 && 64BIT && CRC_T10DIF + select CRYPTO_HASH + select CRYPTO_CRCT10DIF + help + For x86_64 processors with SSE4.2 and PCLMULQDQ supported, + CRC T10 DIF PCLMULQDQ computation can be hardware + accelerated PCLMULQDQ instruction. This option will create + 'crct10dif-plcmul' module, which is faster when computing the + crct10dif checksum compared to the generic table implementation. + config CRYPTO_GHASH tristate "GHASH digest algorithm" select CRYPTO_GF128MUL -- 1.7.11.7 -- To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html