This device exposes asymmetric crypto primitives rather than complete operations, and it only supports memory-mapped I/O, and its memory is only addressable one 32-bit word at a time. As a result, some of the code might look a little different from other crypto accelerator drivers. Signed-off-by: Daniel Parks <danielrparks@xxxxxx> --- drivers/crypto/Kconfig | 2 + drivers/crypto/Makefile | 1 + drivers/crypto/sa2ul_pka/Kconfig | 26 + drivers/crypto/sa2ul_pka/Makefile | 3 + drivers/crypto/sa2ul_pka/sa2ul_pka.h | 135 ++++++ drivers/crypto/sa2ul_pka/sa2ul_pka_base.c | 564 ++++++++++++++++++++++ drivers/crypto/sa2ul_pka/sa2ul_pka_dh.c | 150 ++++++ drivers/crypto/sa2ul_pka/sa2ul_pka_op.c | 205 ++++++++ drivers/crypto/sa2ul_pka/sa2ul_pka_op.h | 28 ++ drivers/crypto/sa2ul_pka/sa2ul_pka_rsa.c | 193 ++++++++ drivers/crypto/sa2ul_pka/sa2ul_pka_sg.c | 316 ++++++++++++ 11 files changed, 1623 insertions(+) create mode 100644 drivers/crypto/sa2ul_pka/Kconfig create mode 100644 drivers/crypto/sa2ul_pka/Makefile create mode 100644 drivers/crypto/sa2ul_pka/sa2ul_pka.h create mode 100644 drivers/crypto/sa2ul_pka/sa2ul_pka_base.c create mode 100644 drivers/crypto/sa2ul_pka/sa2ul_pka_dh.c create mode 100644 drivers/crypto/sa2ul_pka/sa2ul_pka_op.c create mode 100644 drivers/crypto/sa2ul_pka/sa2ul_pka_op.h create mode 100644 drivers/crypto/sa2ul_pka/sa2ul_pka_rsa.c create mode 100644 drivers/crypto/sa2ul_pka/sa2ul_pka_sg.c diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 3e6aa319920b..5238e3028ffd 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -816,7 +816,9 @@ config CRYPTO_DEV_SA2UL K3 devices include a security accelerator engine that may be used for crypto offload. Select this if you want to use hardware acceleration for cryptographic algorithms on these devices. +source "drivers/crypto/sa2ul_pka/Kconfig" + source "drivers/crypto/keembay/Kconfig" endif # CRYPTO_HW diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index f81703a86b98..3c66515ded16 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -37,8 +37,9 @@ obj-$(CONFIG_CRYPTO_DEV_QCE) += qce/ obj-$(CONFIG_CRYPTO_DEV_QCOM_RNG) += qcom-rng.o obj-$(CONFIG_CRYPTO_DEV_ROCKCHIP) += rockchip/ obj-$(CONFIG_CRYPTO_DEV_S5P) += s5p-sss.o obj-$(CONFIG_CRYPTO_DEV_SA2UL) += sa2ul.o +obj-$(CONFIG_CRYPTO_DEV_SA2UL_PKA) += sa2ul_pka/ obj-$(CONFIG_CRYPTO_DEV_SAHARA) += sahara.o obj-$(CONFIG_CRYPTO_DEV_SL3516) += gemini/ obj-$(CONFIG_ARCH_STM32) += stm32/ obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o diff --git a/drivers/crypto/sa2ul_pka/Kconfig b/drivers/crypto/sa2ul_pka/Kconfig new file mode 100644 index 000000000000..c4f87b14878e --- /dev/null +++ b/drivers/crypto/sa2ul_pka/Kconfig @@ -0,0 +1,26 @@ +# SPDX-License-Identifier: GPL-2.0-only + +config CRYPTO_DEV_SA2UL_PKA + tristate "Support for TI security accelerator public-key module" + depends on CRYPTO_DEV_SA2UL || COMPILE_TEST + select ARM64_CRYPTO + select CRYPTO_AKCIPHER + select CRYPTO_RSA + select PACKING + select CRYPTO_DH + help + The K3 security accelerator engine contains a public-key + cryptography module. Select this if you want to use hardware + acceleration for asymmetric cryptography on these devices. This + engine is not available to the Linux cores on most devices; check + your device tree if unsure. + +config CRYPTO_DEV_SA2UL_PKA_DEBUG + bool "Debugging options for TI security accelerator public-key module" + depends on CRYPTO_DEV_SA2UL_PKA + default n + help + Enables the module options 'snapshot' and 'poison_mem' to assist + in debugging the PKA driver. Choosing "y" will allow the root user + to extract cryptographic keys from the driver, so do not enable in + a production build. diff --git a/drivers/crypto/sa2ul_pka/Makefile b/drivers/crypto/sa2ul_pka/Makefile new file mode 100644 index 000000000000..b5af4597f186 --- /dev/null +++ b/drivers/crypto/sa2ul_pka/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_CRYPTO_DEV_SA2UL_PKA) += sa2ul_pka.o +sa2ul_pka-objs := sa2ul_pka_base.o sa2ul_pka_sg.o sa2ul_pka_op.o sa2ul_pka_rsa.o sa2ul_pka_dh.o diff --git a/drivers/crypto/sa2ul_pka/sa2ul_pka.h b/drivers/crypto/sa2ul_pka/sa2ul_pka.h new file mode 100644 index 000000000000..09e8929b2fa3 --- /dev/null +++ b/drivers/crypto/sa2ul_pka/sa2ul_pka.h @@ -0,0 +1,135 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * K3 SA2UL Public Key Accelerator driver + * + * Copyright (c) 2022 Texas Instruments Incorporated - https://www.ti.com + * + * Author: Daniel Parks <danielrparks@xxxxxx> + */ + +#ifndef _K3_SA2UL_PKA_ +#define _K3_SA2UL_PKA_ + +#include <crypto/akcipher.h> +#include <crypto/kpp.h> +#include <linux/scatterlist.h> + +/* registers */ +#define PKA_APTR 0x0000 +#define PKA_BPTR 0x0004 +#define PKA_CPTR 0x0008 +#define PKA_DPTR 0x000c +#define PKA_ALENGTH 0x0010 +#define PKA_BLENGTH 0x0014 +#define PKA_SHIFT 0x0018 +#define PKA_FUNCTION 0x001c +#define PKA_COMPARE 0x0020 +#define PKA_MSW 0x0024 +#define PKA_DIVMSW 0x0028 +#define PKA_STATUS 0x00c0 +#define PKA_SEQ_CTRL 0x00c8 +#define PKA_OPTIONS 0x00f4 +#define PKA_SW_REV 0x00f8 +#define PKA_REVISION 0x00fc + +#define PKA_REV 0x1fe0 +#define PKA_CLK_CTRL 0x1fe8 +#define PKA_SYSCONFIG 0x1ff0 +#define PKA_SYSSTATUS 0x1ff4 +#define PKA_IRQSTATUS 0x1ff8 +#define PKA_IRQENABLE 0x1ffc + +#define PKA_REGS_SIZE (PKA_REVISION + 4 - PKA_APTR \ + + PKA_IRQENABLE + 4 - PKA_REV) + +/* firmware info */ +#define PKA_FW_FMT "eip29t2_%u.%u.%u.bin" +#define PKA_FW_RETRIES 5 +/* memory map info */ +#define PKA_PROG_RAM_SIZE 0x4000 +#define PKA_RAM_SIZE 0x1000 + +/* register bits */ +#define PKA_FUNCTION_RUN BIT(15) +#define PKA_FUNCTION_OP_TOP_MASK 0x38 +#define PKA_FUNCTION_OP_BOTTOM_MASK 0x3 + +#define PKA_SEQ_CTRL_RST BIT(31) +#define PKA_SEQ_CTRL_START BIT(8) +#define PKA_SEQ_CTRL_STATUS_MASK 0x0000ff00 +#define PKA_SEQ_CTRL_STATUS_OFFSET 8 + +#define PKA_SYSCONFIG_SRST BIT(1) +#define PKA_SYSCONFIG_IDLE BIT(5) +#define PKA_SYSCONFIG_NOIDLE BIT(4) + +#define PKA_SYSSTATUS_RST_COMP BIT(0) + +#define PKA_RST_TIMEOUT_J 2 + +#define PKA_SW_REV_MASK 0x0fff0fff + +#define PKA_IRQ_MAIN BIT(0) + +#define PKA_MAX_VEC 520 + +/* device configuration */ +struct pka_config { + bool ecc : 1; + bool montgomery : 1; + bool prog_ram : 1; +}; + +struct pka_version { + u8 major; + u8 minor; + u8 patch; +}; + +struct pka_snapshot { + u32 *regs; + u32 *mem; +}; + +struct pka_data { + void __iomem *base; + void __iomem *mem; + struct device *dev; + struct pka_config config; + struct mutex mutex; /* locked while the PKA is executing an operation */ + bool running; + struct completion done; + int fw_tries; + char fw_name[64]; +#ifdef CONFIG_CRYPTO_DEV_SA2UL_PKA_DEBUG + struct pka_snapshot snapshot; +#endif +}; + +struct pka_alg_template { + struct pka_data *data; + union { + struct akcipher_alg akcipher; + struct kpp_alg kpp; + } alg; +}; + +#define PKA_TFM_DATA(tfm, type) \ + container_of((tfm)->base.__crt_alg, \ + struct pka_alg_template, \ + alg.type.base)->data + +int pka_op_wait(struct pka_data *data); +void pka_intr_enable(struct pka_data *dev_data); +void pka_intr_disable(struct pka_data *dev_data); + +#ifdef CONFIG_CRYPTO_DEV_SA2UL_PKA_DEBUG +void pka_take_snapshot(struct pka_data *data); +#else +static inline void pka_take_snapshot(struct pka_data *data) {} +#endif + +/* ciphers */ +extern struct pka_alg_template pka_rsa, pka_dh; + +#endif /* _K3_SA2UL_PKA_ */ diff --git a/drivers/crypto/sa2ul_pka/sa2ul_pka_base.c b/drivers/crypto/sa2ul_pka/sa2ul_pka_base.c new file mode 100644 index 000000000000..46a0e6b6ea37 --- /dev/null +++ b/drivers/crypto/sa2ul_pka/sa2ul_pka_base.c @@ -0,0 +1,564 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * K3 SA2UL Public Key Accelerator driver + * + * Copyright (c) 2022 Texas Instruments Incorporated - https://www.ti.com + * + * Author: Daniel Parks <danielrparks@xxxxxx> + */ + +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/of_device.h> +#include <linux/err.h> +#include <linux/packing.h> +#include <linux/pm_runtime.h> +#include <linux/firmware.h> +#include <linux/interrupt.h> +#include <linux/sysfs.h> +#include <linux/fips.h> +#include <linux/io.h> +#include <linux/iopoll.h> +#include <linux/delay.h> +#include <crypto/internal/akcipher.h> +#include <crypto/internal/kpp.h> +#include <asm/unaligned.h> + +#include "sa2ul_pka.h" + +bool poison_mem; +bool snapshot; +#ifdef CONFIG_CRYPTO_DEV_SA2UL_PKA_DEBUG +module_param(poison_mem, bool, 0); +module_param(snapshot, bool, 0); +#endif + +static const struct pka_version pka_fws[] = { + {2, 1, 0}, + {} +}; + +static inline int pka_packing(void *pbuf, u64 *uval, int startbit, int endbit, + size_t pbuflen, enum packing_op op) +{ + return packing(pbuf, uval, startbit, endbit, pbuflen, op, + QUIRK_LSW32_IS_FIRST | QUIRK_LITTLE_ENDIAN); +} + +static void pka_parse_version(struct pka_version *out, u32 packed_version, + int startbit) +{ + u64 res; + + WARN_ON(startbit < 11); + pka_packing(&packed_version, &res, + startbit, startbit - 3, 4, UNPACK); + out->major = res; + pka_packing(&packed_version, &res, + startbit - 4, startbit - 7, 4, UNPACK); + out->minor = res; + pka_packing(&packed_version, &res, + startbit - 8, startbit - 11, 4, UNPACK); + out->patch = res; +} + +static void pka_set_idle(void __iomem *base, bool idle) +{ + writel_relaxed(idle ? PKA_SYSCONFIG_IDLE : PKA_SYSCONFIG_NOIDLE, + base + PKA_SYSCONFIG); +} + +static int pka_request_fw(struct pka_data *dev_data); + +/* enable/disable interrupts device-side */ +void pka_intr_enable(struct pka_data *dev_data) +{ + writel_relaxed(PKA_IRQ_MAIN, dev_data->base + PKA_IRQSTATUS); + writel_relaxed(PKA_IRQ_MAIN, dev_data->base + PKA_IRQENABLE); +} + +void pka_intr_disable(struct pka_data *dev_data) +{ + writel_relaxed(0, dev_data->base + PKA_IRQENABLE); +} + +#ifdef CONFIG_CRYPTO_DEV_SA2UL_PKA_DEBUG +/* debugging - allows taking snapshots of memory and register file just before + * dispatching an operation + */ +void pka_take_snapshot(struct pka_data *data) +{ + if (snapshot) { + u32 i, j = 0; + + for (i = PKA_APTR; i <= PKA_REVISION; i += 4, j++) + data->snapshot.regs[j] = readl_relaxed(data->base + i); + for (i = PKA_REV; i <= PKA_IRQENABLE; i += 4, j++) + data->snapshot.regs[j] = readl_relaxed(data->base + i); + for (i = 0, j = 0; i < PKA_RAM_SIZE; i += 4, j++) + data->snapshot.mem[j] = readl_relaxed(data->mem + i); + } +} + +static ssize_t read_snap_reg(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, + loff_t offset, size_t count) +{ + struct pka_data *data = attr->private; + + memcpy(buf, (u8 *)data->snapshot.regs + offset, count); + return count; +} + +static ssize_t read_snap_mem(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, + loff_t offset, size_t count) +{ + struct pka_data *data = attr->private; + + memcpy(buf, (u8 *)data->snapshot.mem + offset, count); + return count; +} + +BIN_ATTR(regs, 0600, read_snap_reg, NULL, PKA_REGS_SIZE); +BIN_ATTR(mem, 0600, read_snap_mem, NULL, PKA_RAM_SIZE); + +static inline int setup_debug(struct pka_data *dev_data) +{ + int ret; + struct device *dev = dev_data->dev; + size_t i; + + if (snapshot) { + dev_data->snapshot.regs = devm_kzalloc(dev, PKA_REGS_SIZE, + GFP_KERNEL); + dev_data->snapshot.mem = devm_kzalloc(dev, PKA_RAM_SIZE, + GFP_KERNEL); + if (!dev_data->snapshot.mem || !dev_data->snapshot.regs) + return -ENOMEM; + sysfs_bin_attr_init(bin_attr_regs); + sysfs_bin_attr_init(bin_attr_mem); + bin_attr_regs.private = dev_data; + bin_attr_mem.private = dev_data; + ret = sysfs_create_bin_file(&dev->kobj, &bin_attr_regs); + if (ret) { + dev_err(dev, "failed to create regs snapshot file\n"); + return ret; + } + ret = sysfs_create_bin_file(&dev->kobj, &bin_attr_mem); + if (ret) { + dev_err(dev, "failed to create mem snapshot file\n"); + return ret; + } + } + if (poison_mem) { + for (i = 0; i < PKA_RAM_SIZE; i += 4) + writel_relaxed(0xccccccccU, dev_data->mem + i); + } + return 0; +} + +static inline void cleanup_debug(struct platform_device *pdev) +{ + if (snapshot) { + sysfs_remove_bin_file(&pdev->dev.kobj, &bin_attr_regs); + sysfs_remove_bin_file(&pdev->dev.kobj, &bin_attr_mem); + } +} + +#else +static inline int setup_debug(struct pka_data *dev_data) +{ + return 0; +} + +static inline void cleanup_debug(struct platform_device *pdev) {} +#endif + +/* actually load the firmware */ +static inline int copy_fw(const struct firmware *fw, struct pka_data *dev_data) +{ + size_t i; + void __iomem *base = dev_data->base; + void __iomem *ram = dev_data->mem; + u32 reg; + int ret; + + writel_relaxed(PKA_SEQ_CTRL_RST, base + PKA_SEQ_CTRL); + for (i = 0; i < fw->size; i += 4) + writel_relaxed(le32_to_cpu(get_unaligned( + (u32 *)(fw->data + i))), ram + i); + writel_relaxed(0, base + PKA_SEQ_CTRL); + ret = readl_relaxed_poll_timeout(base + PKA_SEQ_CTRL, reg, (reg & + PKA_SEQ_CTRL_START) == PKA_SEQ_CTRL_START, 0, + PKA_RST_TIMEOUT_J); + if (ret) { + dev_warn(dev_data->dev, + "timed out waiting for sequencer to start 0x%08x", + reg); + dev_warn(dev_data->dev, "function: 0x%08x", + readl_relaxed(base + PKA_FUNCTION)); + return -ETIMEDOUT; + } + return 0; +} + +static inline int verify_fw(const struct firmware *fw, + struct pka_data *dev_data) +{ + size_t i; + void __iomem *base = dev_data->base; + void __iomem *ram = dev_data->mem; + u32 reg; + int ret; + + writel_relaxed(PKA_SEQ_CTRL_RST, base + PKA_SEQ_CTRL); + for (i = 0; i < fw->size; i += 4) { + if (readl_relaxed(ram + i) + != le32_to_cpu(get_unaligned((u32 *)(fw->data + i)))) { + dev_warn(dev_data->dev, + "firmware download corrupted at 0x%llx\n", + (unsigned long long)i); + return -EINVAL; + } + } + writel_relaxed(0, base + PKA_SEQ_CTRL); + ret = readl_relaxed_poll_timeout(base + PKA_SEQ_CTRL, reg, (reg & + PKA_SEQ_CTRL_START) == PKA_SEQ_CTRL_START, 0, + PKA_RST_TIMEOUT_J); + return ret; +} + +static inline void print_fw_rev(struct pka_data *dev_data) +{ + struct pka_version internal, custom; + u32 reg; + + reg = readl_relaxed(dev_data->base + PKA_SW_REV) & PKA_SW_REV_MASK; + pka_parse_version(&internal, reg, 27); + pka_parse_version(&custom, reg, 11); + dev_info(dev_data->dev, "loaded firmware internal version %u.%u.%u, custom version %u.%u.%u\n", + internal.major, internal.minor, internal.patch, + custom.major, custom.minor, custom.patch); +} + +/* load a firmware (requested from pka_request_next_fw) onto the device */ +static int pka_load_fw(const struct firmware *fw, struct pka_data *dev_data) +{ + void __iomem *base = dev_data->base; + u32 reg; + int ret = -ENOENT; + + if (!fw) { + goto fw_bad; + } else if (fw->size > PKA_PROG_RAM_SIZE) { + dev_warn(dev_data->dev, "firmware is larger than max firmware size 0x%x, skipping\n", + PKA_PROG_RAM_SIZE); + goto fw_bad; + } + ret = -EAGAIN; + if (copy_fw(fw, dev_data)) + goto rst_fail; + if (verify_fw(fw, dev_data)) + goto rst_fail; + print_fw_rev(dev_data); + release_firmware(fw); + reg = readl_relaxed(base + PKA_FUNCTION); + if (reg & PKA_FUNCTION_RUN) + dev_warn(dev_data->dev, + "run bit set after loading firmware! 0x%08x\n", reg); + + pka_intr_enable(dev_data); + return 0; + +rst_fail: + dev_warn(dev_data->dev, + "firmware download corrupt (check silicon?), retrying\n"); + dev_data->fw_tries++; +fw_bad: + release_firmware(fw); + return ret; +} + +/* Do some basic checks to make sure this device is similar enough to the + * development device to work with this driver. Also determine some information + * about the device configuration so we know which algorithms to enable. + */ +static int pka_parse_options(struct pka_data *data, u32 options) +{ + int ret = 0; + u64 tmp; + + dev_info(data->dev, "options 0x%x\n", options); + // PKCP configuration + pka_packing(&options, &tmp, 1, 0, 4, UNPACK); + if (tmp != 2) { + ret = -EACCES; + goto out; + } + + pka_packing(&options, &tmp, 4, 2, 4, UNPACK); + if (tmp <= 2) { + data->config.montgomery = !!tmp; + } else { + ret = -EINVAL; + goto out; + } + // sequencer configuration + pka_packing(&options, &tmp, 6, 5, 4, UNPACK); + if (tmp != 1) { + ret = -EINVAL; + goto out; + } + + pka_packing(&options, &tmp, 7, 7, 4, UNPACK); + data->config.prog_ram = tmp; + // GF(2^m) configuration + pka_packing(&options, &tmp, 23, 22, 4, UNPACK); + if (tmp <= 1) { + data->config.ecc = tmp; + } else { + ret = -EINVAL; + goto out; + } + +out: + return ret; +} + +static void pka_print_hw_rev(struct pka_data *dev_data) +{ + struct pka_version ver; + u32 reg; + + reg = readl_relaxed(dev_data->base + PKA_REVISION); + pka_parse_version(&ver, reg, 27); + dev_info(dev_data->dev, "HW rev %u.%u.%u\n", + ver.major, ver.minor, ver.patch); +} + +/* try to load any available firmware into the device */ +static int pka_request_fw(struct pka_data *dev_data) +{ + int fw_next_idx = 0; + struct pka_version next_ver; + const struct firmware *fw; + int ret; + + next_ver = pka_fws[fw_next_idx++]; + while ((next_ver.major || next_ver.minor || next_ver.patch) && + dev_data->fw_tries < PKA_FW_RETRIES) { + snprintf(dev_data->fw_name, 64, PKA_FW_FMT, + next_ver.major, next_ver.minor, next_ver.patch); + + if (!request_firmware_direct(&fw, dev_data->fw_name, + dev_data->dev)) { + ret = pka_load_fw(fw, dev_data); + if (ret == 0) + return ret; + else if (ret == -EAGAIN) + fw_next_idx--; + } + next_ver = pka_fws[fw_next_idx++]; + } + + dev_warn(dev_data->dev, "unable to load firmware\n"); + return -ENOENT; +} + +/* interrupts support */ +irqreturn_t pka_intr(int irqnum, void *_data) +{ + struct pka_data *data = _data; + u32 reg; + + reg = readl_relaxed(data->base + PKA_IRQSTATUS); + if (reg & PKA_IRQ_MAIN) { + writel_relaxed(PKA_IRQ_MAIN, data->base + PKA_IRQSTATUS); + return IRQ_WAKE_THREAD; + } else { + return IRQ_NONE; + } +} + +/* data->running is not synchronized; this is just a best-effort check to aid + * in debugging. It is not intended to reliably filter bad interrupts from a + * malfunctioning device. + */ +irqreturn_t pka_intr_threaded(int irqnum, void *_data) +{ + struct pka_data *data = _data; + + if (unlikely(!data->running)) { + pr_warn("%s: spurious interrupt!\n", THIS_MODULE->name); + } else { + data->running = false; + complete(&data->done); + } + return IRQ_HANDLED; +} + +/** + * pka_op_wait - wait for operation to finish + * @data: the pka to wait for + * + * Description: + * Block waiting for the sequencer interrupt which indicates that the + * operation has finished. + * + * Return: + * * 0 - Success + * * -EINVAL - PKA returned unknown error code + * * -ETIMEDOUT - PKA died + */ +int pka_op_wait(struct pka_data *data) +{ + int ret; + u32 reg; + + ret = wait_for_completion_timeout(&data->done, HZ * 2); + reg = readl_relaxed(data->base + PKA_SEQ_CTRL); + if (!ret) { + /* if we get here, the pka is in a non-recoverable state and + * future operations will have undefined behavior, so taint the + * kernel + */ + WARN(true, "%s: pka stalled! function 0x%08x, seq 0x%08x\n", + THIS_MODULE->name, + readl_relaxed(data->base + PKA_FUNCTION), reg); + return -ETIMEDOUT; + } + reg = (reg & PKA_SEQ_CTRL_STATUS_MASK) >> PKA_SEQ_CTRL_STATUS_OFFSET; + if (reg == 1) + return 0; + dev_warn(data->dev, "pka op err 0x%02x\n", reg); + return -EINVAL; +} + +static int pka_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + static void __iomem *pka_base; + static void __iomem *pka_mem; + struct pka_data *dev_data; + int ret; + + if (fips_enabled) { + dev_err(dev, "FIPS mode is not supported\n"); + return -EACCES; + } + + dev_data = devm_kzalloc(dev, sizeof(*dev_data), GFP_KERNEL); + if (!dev_data) + return -ENOMEM; + pka_base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(pka_base)) + return PTR_ERR(pka_base); + pka_mem = devm_platform_ioremap_resource(pdev, 1); + if (IS_ERR(pka_mem)) + return PTR_ERR(pka_mem); + dev_data->dev = dev; + dev_data->base = pka_base; + dev_data->mem = pka_mem; + dev_data->running = false; + init_completion(&dev_data->done); + mutex_init(&dev_data->mutex); + platform_set_drvdata(pdev, dev_data); + dev_set_drvdata(dev, dev_data); + + pka_print_hw_rev(dev_data); + // disable smart idle mode because it isn't smart + pka_set_idle(pka_base, false); + + pm_runtime_enable(dev); + ret = pm_runtime_resume_and_get(dev); + if (ret < 0) + goto pm_disable; + + ret = pka_parse_options(dev_data, + readl_relaxed(pka_base + PKA_OPTIONS)); + if (ret) { + dev_err(dev, "failed to parse pka opts: %d\n", ret); + goto pm_disable; + } + if (dev_data->config.prog_ram) { + dev_data->fw_tries = 0; + ret = pka_request_fw(dev_data); + if (ret) + goto pm_disable; + } + ret = setup_debug(dev_data); + if (ret) + goto pm_disable; + ret = platform_get_irq(pdev, 0); + if (ret < 0) + goto pm_disable; + ret = devm_request_threaded_irq(dev, ret, pka_intr, pka_intr_threaded, + IRQF_TRIGGER_NONE, pdev->name, + dev_data); + if (ret) + goto pm_disable; + + if (dev_data->config.montgomery) { + pka_rsa.data = dev_data; + ret = crypto_register_akcipher(&pka_rsa.alg.akcipher); + if (ret) + dev_err(dev, + "failed to register crypto op pka_rsa: %d\n", + ret); + pka_dh.data = dev_data; + ret = crypto_register_kpp(&pka_dh.alg.kpp); + if (ret) + dev_err(dev, + "failed to register crypto op pka_dh: %d\n", + ret); + } + + return 0; +pm_disable: + pm_runtime_put_sync(dev); + pm_runtime_disable(dev); + pka_set_idle(pka_base, true); + return ret; +} + +static int pka_remove(struct platform_device *pdev) +{ + struct pka_data *dev_data = platform_get_drvdata(pdev); + + pka_intr_disable(dev_data); + + if (dev_data->config.montgomery) { + crypto_unregister_akcipher(&pka_rsa.alg.akcipher); + crypto_unregister_kpp(&pka_dh.alg.kpp); + } + + pm_runtime_put_sync(&pdev->dev); + pm_runtime_disable(&pdev->dev); + pka_set_idle(dev_data->base, true); + + cleanup_debug(pdev); + + return 0; +} + +static const struct of_device_id of_match[] = { + { .compatible = "inside-secure,safexcel-eip29t2" }, + {} +}; +MODULE_DEVICE_TABLE(of, of_match); + +static struct platform_driver pka_driver = { + .probe = pka_probe, + .remove = pka_remove, + .driver = { + .name = "saul-pka-crypto", + .probe_type = PROBE_PREFER_ASYNCHRONOUS, + .of_match_table = of_match, + }, +}; + +module_platform_driver(pka_driver); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Daniel Parks <danielrparks@xxxxxx>"); +MODULE_DESCRIPTION("SA2UL Public Key Accelerator Driver"); diff --git a/drivers/crypto/sa2ul_pka/sa2ul_pka_dh.c b/drivers/crypto/sa2ul_pka/sa2ul_pka_dh.c new file mode 100644 index 000000000000..9edd0383b9b1 --- /dev/null +++ b/drivers/crypto/sa2ul_pka/sa2ul_pka_dh.c @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * K3 SA2UL Public Key Accelerator driver + * + * Copyright (c) 2022 Texas Instruments Incorporated - https://www.ti.com + * + * Author: Daniel Parks <danielrparks@xxxxxx> + */ + +#include <linux/module.h> +#include <crypto/internal/kpp.h> +#include <crypto/kpp.h> +#include <crypto/dh.h> + +#include "sa2ul_pka.h" +#include "sa2ul_pka_op.h" + +/* these are needed per algorithm type because they tend to use different typing + * in their data structure + */ +static int pka_dh_copy_key_component(const void **dst, const void *src, + uint *newsz, uint sz, bool little) +{ + if (!src) + return 0; + // delete leading zeroes + while (!*(u8 *)src && sz > 0) { + src++; + sz--; + } + + *newsz = ALIGN(sz, 4); + + if (*newsz > PKA_MAX_VEC) { + pr_err("key is longer than maximum supported by PKA\n"); + return -EINVAL; + } + + kfree(*dst); + *dst = kmalloc(*newsz, GFP_KERNEL); + if (!*dst) + return -ENOMEM; + + if (little) { + reverse_memcpy((u8 *)*dst, src, sz); + memset((u8 *)*dst + sz, 0, *newsz - sz); + } else { + memset((u8 *)*dst, 0, *newsz - sz); + memcpy((u8 *)*dst + *newsz - sz, src, sz); + } + + return 0; +} + +static int pka_dh_copy_params(struct dh *dst, struct dh *src) +{ + int ret; + + ret = pka_dh_copy_key_component(&dst->key, src->key, &dst->key_size, + src->key_size, true); + if (ret) + goto die; + ret = pka_dh_copy_key_component(&dst->p, src->p, &dst->p_size, + src->p_size, true); + if (ret) + goto die; + ret = pka_dh_copy_key_component(&dst->g, src->g, &dst->g_size, + src->g_size, false); + if (ret) + goto die; + return 0; +die: + kfree(dst->key); + kfree(dst->p); + kfree(dst->g); + return ret; +} + +static int pka_dh_set_secret(struct crypto_kpp *tfm, const void *buffer, + uint len) +{ + struct dh *ctx = kpp_tfm_ctx(tfm); + struct dh new_params; + int ret; + + ret = crypto_dh_decode_key(buffer, len, &new_params); + if (ret < 0) + return ret; + ret = pka_dh_copy_params(ctx, &new_params); + if (ret) + return ret; + return 0; +} + +/* uses the same code-sharing trick as dh_compute_value() from crypto/dh.c */ +static int pka_dh_modexp(struct kpp_request *req) +{ + struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); + struct pka_data *data = PKA_TFM_DATA(tfm, kpp); + struct dh *ctx = kpp_tfm_ctx(tfm); + struct scatterlist sg; + struct scatterlist *base; + uint baselen; + bool check_base = false; + + if (req->src) { + base = req->src; + baselen = req->src_len; + } else { + sg_init_one(&sg, ctx->g, ctx->g_size); + base = &sg; + baselen = ctx->g_size; + check_base = true; + } + return pka_modexp(data, base, baselen, ctx->key, ctx->key_size, ctx->p, + ctx->p_size, req->dst, &req->dst_len, 1, check_base); +} + +static uint pka_dh_max_size(struct crypto_kpp *tfm) +{ + struct dh *ctx = kpp_tfm_ctx(tfm); + + return ctx->p_size; +} + +static void pka_dh_exit(struct crypto_kpp *tfm) +{ + struct dh *ctx = kpp_tfm_ctx(tfm); + + kfree(ctx->key); + kfree(ctx->p); + kfree(ctx->g); +} + +struct pka_alg_template pka_dh = { + .alg.kpp = { + .set_secret = pka_dh_set_secret, + .generate_public_key = pka_dh_modexp, + .compute_shared_secret = pka_dh_modexp, + .max_size = pka_dh_max_size, + .exit = pka_dh_exit, + .base = { + .cra_name = "dh", + .cra_driver_name = "dh-sa2ul-pka", + .cra_priority = 400, + .cra_module = THIS_MODULE, + .cra_ctxsize = sizeof(struct dh) + } + } +}; diff --git a/drivers/crypto/sa2ul_pka/sa2ul_pka_op.c b/drivers/crypto/sa2ul_pka/sa2ul_pka_op.c new file mode 100644 index 000000000000..0b7eb6342b0d --- /dev/null +++ b/drivers/crypto/sa2ul_pka/sa2ul_pka_op.c @@ -0,0 +1,205 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * K3 SA2UL Public Key Accelerator driver + * + * Copyright (c) 2022 Texas Instruments Incorporated - https://www.ti.com + * + * Author: Daniel Parks <danielrparks@xxxxxx> + */ + +#include <linux/iopoll.h> + +#include "sa2ul_pka.h" +#include "sa2ul_pka_op.h" + +/* opcodes */ +#define PKA_SEQ_OP_START_BIT PKA_FUNCTION_RUN +#define PKA_SEQ_OP(seqopcode) \ + (PKA_SEQ_OP_START_BIT | (((seqopcode) & 0x38) << 16) \ + | (((seqopcode) & 0x7) << 12)) +#define PKA_SEQ_OP_MODEXP PKA_SEQ_OP(0x06) +#define PKA_OP_COMPARE (BIT(10) | PKA_FUNCTION_RUN) + +#define PKA_CMP_A_LT_B BIT(1) + +/* + * The following macros (PKA_ARG_*, PKA_*_OP) can be used to describe a dynamic + * memory layout for a PKA operation. For example, the modexp operation + * requires the following layout: + * +--------+--------------------------+ + * | Vector | Contents | + * +--------+--------------------------+ + * | A | *Exponent | + * | B | *Modulus <pad> | + * | C | *Base <pad> | + * | D | *Result <pad> *workspace | + * +--------+--------------------------+ + * *64-bit aligned + * + * This layout can be created using the macros like this: + * PKA_SETUP_OP(data->base, data->mem, elen, mlen, + * PKA_ARG_START(PKA_APTR); PKA_ARG_IN_WORDS(exponent, elen); + * PKA_ARG_START(PKA_BPTR); PKA_ARG_IN_WORDS(modulus, mlen); + * PKA_ARG_PAD_WORDS(1); + * PKA_ARG_START(PKA_CPTR); PKA_ARG_IN_SCATTER_BE(base, mlen); + * PKA_ARG_PAD_WORDS(1); + * PKA_ARG_START(PKA_DPTR); PKA_ARG_GETLOC(outptr); + * PKA_ARG_PAD_WORDS(mlen); + * PKA_ARG_PAD_WORDS(1); + * PKA_ARG_WKSPC(...); + * ); + * + * The PKA_ARG_* macros must only be used as arguments to the + * PKA_SETUP_OP macro. + */ +#define PKA_ARG_LOC(loc) (_i = (loc)) +#define PKA_ARG_GETLOC(out) ((out) = _i) +#define PKA_ARG_ALIGN64() (_i = round_up((_i), 8)) +#define PKA_ARG_IN_WORDS(buf, len) do {\ + PKA_ARG_ALIGN64(); \ + for (_j = 0; _j < (len); _j++) { \ + writel_relaxed(le32_to_cpu((buf)[_j]), _mem + _i); \ + _i += 4; \ + } \ +} while (0) +#define _PKA_ARG_IN_SCATTER(sgl, len, be) do { \ + PKA_ARG_ALIGN64(); \ + _j = pka_sg_copy_in(_mem + _i, (sgl), (len), be); \ + _i += _j; \ +} while (0) +#define PKA_ARG_IN_SCATTER(sgl, len) _PKA_ARG_IN_SCATTER(sgl, len, false) +#define PKA_ARG_IN_SCATTER_BE(sgl, len) _PKA_ARG_IN_SCATTER(sgl, len, true) +#define PKA_ARG_PAD_WORDS(len) do { \ + for (_j = 0; _j < (len); _j++) { \ + writel_relaxed(0, _mem + _i); \ + _i += 4; \ + } \ +} while (0) +#define PKA_ARG_WKSPC(len) do { \ + PKA_ARG_ALIGN64(); \ + _i += 4 * (len); \ +} while (0) +#define PKA_ARG_START(reg) do { \ + PKA_ARG_ALIGN64(); \ + _reg = (reg); \ + writel_relaxed(_i / 4, _base + _reg); \ +} while (0) + +#define PKA_SETUP_OP(b, m, alen, blen, vecs) do {\ + u32 _reg, _j, _i = 0; \ + void __iomem *_base = b; \ + void __iomem *_mem = m; \ + writel_relaxed(alen, _base + PKA_ALENGTH); \ + writel_relaxed(blen, _base + PKA_BLENGTH); \ + vecs \ +} while (0) +#define PKA_SUBMIT_OP(_pka_data, opcode) do { \ + struct pka_data *pka_data = _pka_data; \ + (pka_data)->running = true; \ + pka_take_snapshot(pka_data); \ + writel_relaxed(opcode, (pka_data)->base + PKA_FUNCTION); \ +} while (0) + +void *reverse_memcpy(void *dest, const void *src, size_t count) +{ + char *tmp = dest + count - 1; + const char *s = src; + + while (count--) + *tmp-- = *s++; + return dest; +} + +/** + * pka_modexp - perform a modular exponentiation + * @src: base, as a scatterlist + * @slen: length in bytes of base + * @exp: exponent + * @elen: length in bytes of @exponent + * @mod: modulus + * @mlen: length in bytes of @modulus + * @dst: result pointer, as a scatterlist + * @dlen: length available in @dst + * @shift: number of odd primes to precompute (always 1 for public RSA, max 5 + * for 4096-bit private RSA) + * @check_base: true if (@base < @modulus) should be checked first + * + * Description: + * Perform a modular exponentiation using the PKA. + * Blocks waiting for operation to finish. + * If @check_base is set, it will take advantage of the fact that it can + * reuse memory after a comparison operation to perform the check + * (@base < @modulus) with almost no overhead (~10us). + * + * Return: + * * 0 - Success + * * -EINVAL - Problem with sizes of input buffers + * * -EINVAL - PKA returned error + * * -ETIMEDOUT - PKA died + */ +int pka_modexp(struct pka_data *data, struct scatterlist *src, uint slen, + const u8 *exp, size_t elen, const u8 *mod, size_t mlen, + struct scatterlist *dst, uint *dlen, u32 shift, + bool check_base) +{ + u32 alen, blen, m, diff, tmp = 0; + void __iomem *base = data->base; + void __iomem *mem = data->mem; + int ret; + + if (slen > mlen) { + return -EINVAL; + } else if (*dlen < mlen) { + *dlen = mlen; + return -EINVAL; + } + alen = elen / 4; + blen = mlen / 4; + mutex_lock(&data->mutex); + + PKA_SETUP_OP(base, mem, blen, 0, + PKA_ARG_ALIGN64(); PKA_ARG_IN_WORDS((u32 *)exp, alen); + PKA_ARG_START(PKA_BPTR); PKA_ARG_IN_WORDS((u32 *)mod, blen); + PKA_ARG_PAD_WORDS(1); + PKA_ARG_START(PKA_DPTR); + PKA_ARG_START(PKA_APTR); PKA_ARG_GETLOC(m); + PKA_ARG_IN_SCATTER_BE(src, slen); + PKA_ARG_PAD_WORDS((mlen - slen) / 4); + PKA_ARG_PAD_WORDS(1); + ); + if (check_base) { + pka_intr_disable(data); + PKA_SUBMIT_OP(data, PKA_OP_COMPARE); + ret = readl_relaxed_poll_timeout(base + PKA_FUNCTION, tmp, + (tmp & PKA_FUNCTION_RUN) == 0, + 0, 100); + pka_intr_enable(data); + if (ret & PKA_FUNCTION_RUN) { + ret = -ETIMEDOUT; + goto err; + } + tmp = readl_relaxed(base + PKA_COMPARE); + if (!(tmp & PKA_CMP_A_LT_B)) { + ret = -EINVAL; + goto err; + } + } + writel_relaxed(0, base + PKA_APTR); + writel_relaxed(m / 4, base + PKA_CPTR); + writel_relaxed(shift, base + PKA_SHIFT); + writel_relaxed(alen, base + PKA_ALENGTH); + writel_relaxed(blen, base + PKA_BLENGTH); + + PKA_SUBMIT_OP(data, PKA_SEQ_OP_MODEXP); + diff = *dlen - blen * 4; + ret = pka_op_wait(data); + if (ret) + goto err; + ret = pka_sg_copy_out(mem + m, dst, diff, *dlen, true); + + mutex_unlock(&data->mutex); + return ret > 0 ? 0 : ret; +err: + mutex_unlock(&data->mutex); + return ret; +} diff --git a/drivers/crypto/sa2ul_pka/sa2ul_pka_op.h b/drivers/crypto/sa2ul_pka/sa2ul_pka_op.h new file mode 100644 index 000000000000..6f7c6f07e8bb --- /dev/null +++ b/drivers/crypto/sa2ul_pka/sa2ul_pka_op.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * K3 SA2UL Public Key Accelerator driver + * + * Copyright (c) 2022 Texas Instruments Incorporated - https://www.ti.com + * + * Author: Daniel Parks <danielrparks@xxxxxx> + */ + +#ifndef PKA_OP_H +#define PKA_OP_H + +#include <linux/scatterlist.h> + +#include "sa2ul_pka.h" + +int pka_modexp(struct pka_data *data, struct scatterlist *src, uint slen, + const u8 *exponent, size_t elen, const u8 *modulus, size_t mlen, + struct scatterlist *dst, uint *dlen, u32 shift, + bool check_base); + +size_t pka_sg_copy_in(void __iomem *dst, struct scatterlist *src, size_t len, + bool big); +size_t pka_sg_copy_out(void __iomem *src, struct scatterlist *dst, size_t skip, + size_t len, bool big); +void *reverse_memcpy(void *dest, const void *src, size_t count); + +#endif /* PKA_OP_H */ diff --git a/drivers/crypto/sa2ul_pka/sa2ul_pka_rsa.c b/drivers/crypto/sa2ul_pka/sa2ul_pka_rsa.c new file mode 100644 index 000000000000..48d88372236d --- /dev/null +++ b/drivers/crypto/sa2ul_pka/sa2ul_pka_rsa.c @@ -0,0 +1,193 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * K3 SA2UL Public Key Accelerator driver + * + * Copyright (c) 2022 Texas Instruments Incorporated - https://www.ti.com + * + * Author: Daniel Parks <danielrparks@xxxxxx> + */ + +#include <linux/module.h> +#include <crypto/internal/rsa.h> +#include <crypto/akcipher.h> +#include <crypto/internal/akcipher.h> + +#include "sa2ul_pka.h" +#include "sa2ul_pka_op.h" + +static int pka_rsa_copy_key_component(const u8 **dst, const u8 *src, + size_t *newsz, size_t sz) +{ + if (!src) + return 0; + // delete leading zeroes + while (!*src && sz > 0) { + src++; + sz--; + } + + *newsz = ALIGN(sz, 4); + + if (*newsz > PKA_MAX_VEC) { + pr_err("key is longer than maximum supported by PKA\n"); + return -EINVAL; + } + + kfree(*dst); + *dst = kmalloc(*newsz, GFP_KERNEL); + if (!*dst) + return -ENOMEM; + + reverse_memcpy((u8 *)*dst, src, sz); + memset((u8 *)*dst + sz, 0, *newsz - sz); + + return 0; +} + +static int pka_rsa_copy_key(struct rsa_key *dst, struct rsa_key *src) +{ + int ret; + + ret = pka_rsa_copy_key_component(&dst->n, src->n, &dst->n_sz, + src->n_sz); + if (ret) + goto die; + ret = pka_rsa_copy_key_component(&dst->e, src->e, &dst->e_sz, + src->e_sz); + if (ret) + goto die; + ret = pka_rsa_copy_key_component(&dst->d, src->d, &dst->d_sz, + src->d_sz); + if (ret) + goto die; + ret = pka_rsa_copy_key_component(&dst->p, src->p, &dst->p_sz, + src->p_sz); + if (ret) + goto die; + ret = pka_rsa_copy_key_component(&dst->q, src->q, &dst->q_sz, + src->q_sz); + if (ret) + goto die; + ret = pka_rsa_copy_key_component(&dst->dp, src->dp, &dst->dp_sz, + src->dp_sz); + if (ret) + goto die; + ret = pka_rsa_copy_key_component(&dst->dq, src->dq, &dst->dq_sz, + src->dq_sz); + if (ret) + goto die; + ret = pka_rsa_copy_key_component(&dst->qinv, src->qinv, &dst->qinv_sz, + src->qinv_sz); + if (ret) + goto die; + return 0; +die: + kfree(dst->n); + kfree(dst->e); + kfree(dst->d); + kfree(dst->p); + kfree(dst->q); + kfree(dst->dp); + kfree(dst->dq); + kfree(dst->qinv); + return ret; +} + +static int pka_rsa_set_priv_key(struct crypto_akcipher *tfm, const void *key, + unsigned int keylen) +{ + struct rsa_key *dst = akcipher_tfm_ctx(tfm); + struct rsa_key src = {0}; + int ret; + + ret = rsa_parse_priv_key(&src, key, keylen); + if (ret) + return ret; + ret = pka_rsa_copy_key(dst, &src); + if (ret) + return ret; + return 0; +} + +static int pka_rsa_set_pub_key(struct crypto_akcipher *tfm, const void *key, + unsigned int keylen) +{ + struct rsa_key *dst = akcipher_tfm_ctx(tfm); + struct rsa_key src = {0}; + int ret; + + ret = rsa_parse_pub_key(&src, key, keylen); + if (ret) + return ret; + ret = pka_rsa_copy_key(dst, &src); + return ret; +} + +static int pka_rsa_encrypt(struct akcipher_request *req) +{ + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct pka_data *data = PKA_TFM_DATA(tfm, akcipher); + struct rsa_key *key = akcipher_tfm_ctx(tfm); + + // c ≡ m^e (mod n) + return pka_modexp(data, req->src, req->src_len, key->e, key->e_sz, + key->n, key->n_sz, req->dst, &req->dst_len, 1, true); +} + +static int pka_rsa_decrypt_standard(struct akcipher_request *req) +{ + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct pka_data *data = PKA_TFM_DATA(tfm, akcipher); + struct rsa_key *key = akcipher_tfm_ctx(tfm); + + // m ≡ c^d (mod n) + return pka_modexp(data, req->src, req->src_len, key->d, key->d_sz, + key->n, key->n_sz, req->dst, &req->dst_len, 4, true); +} + +/* in the future, we might use modexp_crt instead of modexp here */ +static int pka_rsa_decrypt(struct akcipher_request *req) +{ + return pka_rsa_decrypt_standard(req); +} + +static unsigned int pka_rsa_max_size(struct crypto_akcipher *tfm) +{ + struct rsa_key *key = akcipher_tfm_ctx(tfm); + + return key->n_sz; +} + +static void pka_rsa_exit_tfm(struct crypto_akcipher *tfm) +{ + struct rsa_key *key = akcipher_tfm_ctx(tfm); + + kfree(key->n); + kfree(key->e); + kfree(key->d); + kfree(key->p); + kfree(key->q); + kfree(key->dp); + kfree(key->dq); + kfree(key->qinv); +} + +struct pka_alg_template pka_rsa = { + .alg.akcipher = { + .encrypt = pka_rsa_encrypt, + .decrypt = pka_rsa_decrypt, + .sign = NULL, + .verify = NULL, + .set_priv_key = pka_rsa_set_priv_key, + .set_pub_key = pka_rsa_set_pub_key, + .max_size = pka_rsa_max_size, + .exit = pka_rsa_exit_tfm, + .base = { + .cra_name = "rsa", + .cra_driver_name = "rsa-sa2ul-pka", + .cra_priority = 400, + .cra_module = THIS_MODULE, + .cra_ctxsize = sizeof(struct rsa_key), + }, + } +}; diff --git a/drivers/crypto/sa2ul_pka/sa2ul_pka_sg.c b/drivers/crypto/sa2ul_pka/sa2ul_pka_sg.c new file mode 100644 index 000000000000..0b8996a1ca8d --- /dev/null +++ b/drivers/crypto/sa2ul_pka/sa2ul_pka_sg.c @@ -0,0 +1,316 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * K3 SA2UL Public Key Accelerator driver + * + * Copyright (c) 2022 Texas Instruments Incorporated - https://www.ti.com + * + * Author: Daniel Parks <danielrparks@xxxxxx> + */ + +#include <asm/unaligned.h> + +#include "sa2ul_pka_op.h" + +/* align @a and advance @b by the same amount as @a + * mod must be a power of two + */ +#define ALIGN_ADVANCE(a, b, mod) do { \ + b += ALIGN(a, mod) - a; \ + a = ALIGN(a, mod); \ +} while (0) +/* down-align @a and advance @b by the amount @a would be aligned *upwards* + * mod must be a power of two + */ +#define ALIGN_DOWN_ADVANCE(a, b, mod) do { \ + b += ALIGN(a, mod) - a; \ + a = ALIGN_DOWN(a, mod); \ +} while (0) +/* reverse a word index @idx with respect to length @len */ +#define REV_IDX(idx, len) ((len) - (idx) - 4) + +struct temp_buffer { + u32 buf; + bool dirty; +}; + +/* if we are in the middle of a word in the destination, copy the remainder of + * the word from the source and write it out + */ +static inline void in_fragment_begin(size_t *dsti, size_t *miteri, + struct sg_mapping_iter *miter, + void __iomem *dst, size_t len, + struct temp_buffer *buf, bool big) +{ + if (*dsti % 4 && *miteri + 4 - *dsti % 4 <= miter->length && + *dsti < len) { + memcpy(((u8 *)&buf->buf) + *dsti % 4, miter->addr + *miteri, + 4 - *dsti % 4); + ALIGN_DOWN_ADVANCE(*dsti, *miteri, 4); + if (big) + writel_relaxed(be32_to_cpu(buf->buf), + dst + REV_IDX(*dsti, len)); + else + writel_relaxed(le32_to_cpu(buf->buf), dst + *dsti); + buf->dirty = false; + buf->buf = 0; + *dsti += 4; + } +} + +/* if the source sg entry ends in the middle of a word, save the partial word */ +static inline void in_fragment_end(size_t *dsti, size_t *miteri, + struct sg_mapping_iter *miter, size_t len, + struct temp_buffer *buf) +{ + u32 tmp; + + if (*miteri < miter->length && *dsti < len) { + tmp = miter->length - *miteri; + memcpy((u8 *)&buf->buf + *dsti % 4, miter->addr + *miteri, tmp); + buf->dirty = true; + *dsti += tmp; + } +} + +/* if we have a partial word at the end of the source, zero pad and write it out + */ +static inline void in_fragment_last(size_t *dsti, void __iomem *dst, size_t len, + struct temp_buffer *buf, bool big) +{ + if (buf->dirty) { + *dsti = ALIGN_DOWN(*dsti, 4); + if (big) + writel_relaxed(be32_to_cpu(buf->buf), + dst + REV_IDX(*dsti, len)); + else + writel_relaxed(le32_to_cpu(buf->buf), dst + *dsti); + buf->dirty = false; + buf->buf = 0; + } +} + +/* copy words into the destination */ +static inline void in_loop(size_t *dsti, size_t *miteri, + struct sg_mapping_iter *miter, void __iomem *dst, + size_t len, bool big) +{ + u32 tmp; + + while (*miteri + 4 <= miter->length && *dsti + 4 <= len) { + if (big) { + tmp = be32_to_cpu(*((u32 *)(miter->addr + *miteri))); + writel_relaxed(tmp, dst + REV_IDX(*dsti, len)); + } else { + tmp = le32_to_cpu(*((u32 *)(miter->addr + *miteri))); + writel_relaxed(tmp, dst + *dsti); + } + *dsti += 4; + *miteri += 4; + } +} + +/* copy words into the destination from an unaligned source */ +static inline void in_loop_ua(size_t *dsti, size_t *miteri, + struct sg_mapping_iter *miter, void __iomem *dst, + size_t len, bool big) +{ + u32 tmp; + + while (*miteri + 4 <= miter->length && *dsti + 4 <= len) { + if (big) { + tmp = be32_to_cpu(get_unaligned( + (u32 *)(miter->addr + *miteri))); + writel_relaxed(tmp, dst + REV_IDX(*dsti, len)); + } else { + tmp = le32_to_cpu(get_unaligned( + (u32 *)(miter->addr + *miteri))); + writel_relaxed(tmp, dst + *dsti); + } + *dsti += 4; + *miteri += 4; + } +} + +/** + * pka_sg_copy_in - copy data from a scatterlist into PKA memory + * @dst: destination (in PKA device memory) + * @src: source scatterlist + * @len: length of the data to copy + * @big: true if source data is big-endian + * + * Description: + * Intended for copying big integers used in crypto algorithms. In this case, + * the endianness of the memory will be defined by the storage format (e.g. + * ASN.1) rather than the host system. This function performs the copy without + * using costly bounce buffers. + * The PKA memory is always little-endian; if necessary, the endianness will + * be swapped on-the-fly during the copy to the PKA. + * If the scatterlist ends before @len bytes have been copied, this function + * will stop early. + * + * Return: + * The number of bytes copied. + */ +size_t pka_sg_copy_in(void __iomem *dst, struct scatterlist *src, size_t len, + bool big) +{ + size_t dsti = 0, miteri; + struct sg_mapping_iter miter; + struct temp_buffer buf = {0, false}; + + if (big && len % 4) + ALIGN_ADVANCE(len, dsti, 4); + + sg_miter_start(&miter, src, sg_nents(src), SG_MITER_FROM_SG); + while (sg_miter_next(&miter) && dsti < len) { + miteri = 0; + in_fragment_begin(&dsti, &miteri, &miter, dst, len, &buf, big); + if (IS_ALIGNED((unsigned long)miter.addr + miteri, 4)) + in_loop(&dsti, &miteri, &miter, dst, len, big); + else + in_loop_ua(&dsti, &miteri, &miter, dst, len, big); + in_fragment_end(&dsti, &miteri, &miter, len, &buf); + } + in_fragment_last(&dsti, dst, len, &buf, big); + sg_miter_stop(&miter); + return dsti; +} + +/* if we are in the middle of a word in the source, write the remainder of the + * word to the destination + */ +static inline void out_fragment_begin(size_t *srci, size_t *miteri, + struct sg_mapping_iter *miter, + void __iomem *src, size_t len, u32 *buf, + bool big) +{ + void __iomem *addr; + + if (*srci % 4 && *miteri + 4 - *srci % 4 <= miter->length && + *srci < len) { + if (big) { + addr = src + REV_IDX(ALIGN_DOWN(*srci, 4), len); + *buf = cpu_to_be32(readl_relaxed(addr)); + } + memcpy(miter->addr + *miteri, ((u8 *)buf) + *srci % 4, + 4 - *srci % 4); + ALIGN_ADVANCE(*srci, *miteri, 4); + } +} + +/* if the destination sg entry ends in the middle of a word, write what we can + * and save the rest + */ +static inline void out_fragment_end(size_t *srci, size_t *miteri, + struct sg_mapping_iter *miter, + void __iomem *src, size_t len, u32 *buf, + bool big) +{ + u32 tmp; + void __iomem *addr; + + if (*miteri < miter->length && *srci < len) { + tmp = min(miter->length - *miteri, len - *srci); + if (big) { + addr = src + REV_IDX(*srci, len); + *buf = cpu_to_be32(readl_relaxed(addr)); + } else { + *buf = cpu_to_le32(readl_relaxed(src + *srci)); + } + memcpy(miter->addr + *miteri, (u8 *)buf + *srci % 4, tmp); + *srci += tmp; + } +} + +/* copy words from the source to the destination */ +static inline void out_loop(size_t *srci, size_t *miteri, + struct sg_mapping_iter *miter, void __iomem *src, + size_t len, bool big) +{ + u32 tmp; + void __iomem *addr; + + while (*miteri + 4 <= miter->length && *srci + 4 <= len) { + if (big) { + addr = src + REV_IDX(*srci, len); + tmp = cpu_to_be32(readl_relaxed(addr)); + *((u32 *)(miter->addr + *miteri)) = tmp; + } else { + tmp = cpu_to_le32(readl_relaxed(src + *srci)); + *((u32 *)(miter->addr + *miteri)) = tmp; + } + *srci += 4; + *miteri += 4; + } +} + +/* copy words from the source to an unaligned destination */ +static inline void out_loop_ua(size_t *srci, size_t *miteri, + struct sg_mapping_iter *miter, void __iomem *src, + size_t len, bool big) +{ + u32 tmp; + void __iomem *addr; + + while (*miteri + 4 <= miter->length && *srci + 4 <= len) { + if (big) { + addr = src + REV_IDX(*srci, len); + tmp = cpu_to_be32(readl_relaxed(addr)); + put_unaligned(tmp, (u32 *)(miter->addr + *miteri)); + } else { + tmp = cpu_to_le32(readl_relaxed(src + *srci)); + put_unaligned(tmp, (u32 *)(miter->addr + *miteri)); + } + *srci += 4; + *miteri += 4; + } +} + +/** + * pka_sg_copy_out - copy data from PKA memory into a scatterlist + * @src: source (in PKA device memory) + * @dst: destination scatterlist + * @skip: number of bytes to skip in the destination before copying + * @len: length of the data to copy + * @big: true if destination data is big endian + * + * Description: + * Intended for copying big integers used in crypto algorithms. In this case, + * the endianness of the memory will be defined by the storage format (e.g. + * ASN.1) rather than the host system. This function performs the copy without + * using costly bounce buffers. + * The PKA memory is always little-endian; if necessary, the endianness will + * be swapped on-the-fly during the copy from the PKA. + * If the scatterlist ends before @len bytes have been copied, this function + * will stop early. + * + * Return: + * The number of bytes copied. + */ +size_t pka_sg_copy_out(void __iomem *src, struct scatterlist *dst, size_t skip, + size_t len, bool big) +{ + size_t srci = 0, miteri; + struct sg_mapping_iter miter; + u32 buf = 0; + + len -= skip; + sg_miter_start(&miter, dst, sg_nents(dst), SG_MITER_TO_SG); + sg_zero_buffer(dst, sg_nents(dst), skip, 0); + if (!sg_miter_skip(&miter, skip)) + return 0; + if (big && len % 4) + ALIGN_ADVANCE(len, srci, 4); + + while (sg_miter_next(&miter) && srci < len) { + miteri = 0; + out_fragment_begin(&srci, &miteri, &miter, src, len, &buf, big); + if (IS_ALIGNED((unsigned long)miter.addr + miteri, 4)) + out_loop(&srci, &miteri, &miter, src, len, big); + else + out_loop_ua(&srci, &miteri, &miter, src, len, big); + out_fragment_end(&srci, &miteri, &miter, src, len, &buf, big); + } + sg_miter_stop(&miter); + return srci; +} -- 2.17.1