There are SoCs like LS1043A where CAAM endianness (BE) does not match the endianness of the core (LE). Moreover, there are requirements for the driver to handle cases like CPU_BIG_ENDIAN=y on ARM-based SoCs. This requires for a complete rewrite of the I/O accessors. PPC-specific accessors - {in,out}_{le,be}XX - are replaced with generic io{read,write}[be]XX where possible (no 64-bit generic I/O). Signed-off-by: Horia Geantă <horia.geanta@xxxxxxxxxxxxx> Signed-off-by: Alex Porosanu <alexandru.porosanu@xxxxxxxxxxxxx> --- While patch takes into consideration the S/G format (struct sec4_sg_entry) for i.MX7, I don't have a board so I haven't tested this platform. drivers/crypto/caam/caamhash.c | 5 +-- drivers/crypto/caam/ctrl.c | 2 +- drivers/crypto/caam/desc.h | 9 ++++- drivers/crypto/caam/desc_constr.h | 42 +++++++++++++++-------- drivers/crypto/caam/jr.c | 8 ++--- drivers/crypto/caam/regs.h | 72 ++++++++++++++++++++++++++++----------- drivers/crypto/caam/sg_sw_sec4.h | 10 +++--- 7 files changed, 102 insertions(+), 46 deletions(-) diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index 72acf8e5ac2f..c39332171fa3 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -841,7 +841,7 @@ static int ahash_update_ctx(struct ahash_request *req) *next_buflen, 0); } else { (edesc->sec4_sg + sec4_sg_src_index - 1)->len |= - SEC4_SG_LEN_FIN; + cpu_to_caam32(SEC4_SG_LEN_FIN); } state->current_buf = !state->current_buf; @@ -942,7 +942,8 @@ static int ahash_final_ctx(struct ahash_request *req) state->buf_dma = try_buf_map_to_sec4_sg(jrdev, edesc->sec4_sg + 1, buf, state->buf_dma, buflen, last_buflen); - (edesc->sec4_sg + sec4_sg_bytes - 1)->len |= SEC4_SG_LEN_FIN; + (edesc->sec4_sg + sec4_sg_bytes - 1)->len |= + cpu_to_caam32(SEC4_SG_LEN_FIN); edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, sec4_sg_bytes, DMA_TO_DEVICE); diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index 8abb4bc548cc..e527d1e0b9ab 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -128,7 +128,7 @@ static inline int run_descriptor_deco0(struct device *ctrldev, u32 *desc, } for (i = 0; i < desc_len(desc); i++) - wr_reg32(&deco->descbuf[i], *(desc + i)); + wr_reg32(&deco->descbuf[i], caam32_to_cpu(*(desc + i))); flags = DECO_JQCR_WHL; /* diff --git a/drivers/crypto/caam/desc.h b/drivers/crypto/caam/desc.h index 983d663ef671..fc257b343d3b 100644 --- a/drivers/crypto/caam/desc.h +++ b/drivers/crypto/caam/desc.h @@ -23,16 +23,23 @@ #define SEC4_SG_OFFS_MASK 0x00001fff struct sec4_sg_entry { -#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX +#if !defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) && \ + defined(CONFIG_CRYPTO_DEV_FSL_CAAM_IMX) u32 rsvd1; dma_addr_t ptr; #else u64 ptr; #endif /* CONFIG_CRYPTO_DEV_FSL_CAAM_IMX */ u32 len; +#ifndef CONFIG_CRYPTO_DEV_FSL_CAAM_LE u8 rsvd2; u8 buf_pool_id; u16 offset; +#else + u16 offset; + u8 buf_pool_id; + u8 rsvd2; +#endif /* CONFIG_CRYPTO_DEV_FSL_CAAM_LE */ }; /* Max size of any CAAM descriptor in 32-bit words, inclusive of header */ diff --git a/drivers/crypto/caam/desc_constr.h b/drivers/crypto/caam/desc_constr.h index 98d07de24fc4..40baf4752fbc 100644 --- a/drivers/crypto/caam/desc_constr.h +++ b/drivers/crypto/caam/desc_constr.h @@ -5,6 +5,7 @@ */ #include "desc.h" +#include "regs.h" #define IMMEDIATE (1 << 23) #define CAAM_CMD_SZ sizeof(u32) @@ -32,7 +33,7 @@ static inline int desc_len(u32 *desc) { - return *desc & HDR_DESCLEN_MASK; + return caam32_to_cpu(*desc) & HDR_DESCLEN_MASK; } static inline int desc_bytes(void *desc) @@ -52,7 +53,7 @@ static inline void *sh_desc_pdb(u32 *desc) static inline void init_desc(u32 *desc, u32 options) { - *desc = (options | HDR_ONE) + 1; + *desc = cpu_to_caam32((options | HDR_ONE) + 1); } static inline void init_sh_desc(u32 *desc, u32 options) @@ -78,9 +79,10 @@ static inline void append_ptr(u32 *desc, dma_addr_t ptr) { dma_addr_t *offset = (dma_addr_t *)desc_end(desc); - *offset = ptr; + *offset = wr_dma(ptr); - (*desc) += CAAM_PTR_SZ / CAAM_CMD_SZ; + (*desc) = cpu_to_caam32(caam32_to_cpu(*desc) + + CAAM_PTR_SZ / CAAM_CMD_SZ); } static inline void init_job_desc_shared(u32 *desc, dma_addr_t ptr, int len, @@ -99,16 +101,17 @@ static inline void append_data(u32 *desc, void *data, int len) if (len) /* avoid sparse warning: memcpy with byte count of 0 */ memcpy(offset, data, len); - (*desc) += (len + CAAM_CMD_SZ - 1) / CAAM_CMD_SZ; + (*desc) = cpu_to_caam32(caam32_to_cpu(*desc) + + (len + CAAM_CMD_SZ - 1) / CAAM_CMD_SZ); } static inline void append_cmd(u32 *desc, u32 command) { u32 *cmd = desc_end(desc); - *cmd = command; + *cmd = cpu_to_caam32(command); - (*desc)++; + (*desc) = cpu_to_caam32(caam32_to_cpu(*desc) + 1); } #define append_u32 append_cmd @@ -117,16 +120,22 @@ static inline void append_u64(u32 *desc, u64 data) { u32 *offset = desc_end(desc); - *offset = upper_32_bits(data); - *(++offset) = lower_32_bits(data); + /* Only 32-bit alignment is guaranteed in descriptor buffer */ + if (IS_ENABLED(CONFIG_CRYPTO_DEV_FSL_CAAM_LE)) { + *offset = cpu_to_caam32(lower_32_bits(data)); + *(++offset) = cpu_to_caam32(upper_32_bits(data)); + } else { + *offset = cpu_to_caam32(upper_32_bits(data)); + *(++offset) = cpu_to_caam32(lower_32_bits(data)); + } - (*desc) += 2; + (*desc) = cpu_to_caam32(caam32_to_cpu(*desc) + 2); } /* Write command without affecting header, and return pointer to next word */ static inline u32 *write_cmd(u32 *desc, u32 command) { - *desc = command; + *desc = cpu_to_caam32(command); return desc + 1; } @@ -168,14 +177,17 @@ APPEND_CMD_RET(move, MOVE) static inline void set_jump_tgt_here(u32 *desc, u32 *jump_cmd) { - *jump_cmd = *jump_cmd | (desc_len(desc) - (jump_cmd - desc)); + *jump_cmd = cpu_to_caam32(caam32_to_cpu(*jump_cmd) | + (desc_len(desc) - (jump_cmd - desc))); } static inline void set_move_tgt_here(u32 *desc, u32 *move_cmd) { - *move_cmd &= ~MOVE_OFFSET_MASK; - *move_cmd = *move_cmd | ((desc_len(desc) << (MOVE_OFFSET_SHIFT + 2)) & - MOVE_OFFSET_MASK); + u32 val = caam32_to_cpu(*move_cmd); + + val &= ~MOVE_OFFSET_MASK; + val |= (desc_len(desc) << (MOVE_OFFSET_SHIFT + 2)) & MOVE_OFFSET_MASK; + *move_cmd = cpu_to_caam32(val); } #define APPEND_CMD(cmd, op) \ diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c index f7e0d8d4c3da..5add26b2a2ff 100644 --- a/drivers/crypto/caam/jr.c +++ b/drivers/crypto/caam/jr.c @@ -182,7 +182,7 @@ static void caam_jr_dequeue(unsigned long devarg) sw_idx = (tail + i) & (JOBR_DEPTH - 1); if (jrp->outring[hw_idx].desc == - jrp->entinfo[sw_idx].desc_addr_dma) + rd_dma(jrp->entinfo[sw_idx].desc_addr_dma)) break; /* found */ } /* we should never fail to find a matching descriptor */ @@ -200,7 +200,7 @@ static void caam_jr_dequeue(unsigned long devarg) usercall = jrp->entinfo[sw_idx].callbk; userarg = jrp->entinfo[sw_idx].cbkarg; userdesc = jrp->entinfo[sw_idx].desc_addr_virt; - userstatus = jrp->outring[hw_idx].jrstatus; + userstatus = caam32_to_cpu(jrp->outring[hw_idx].jrstatus); /* * Make sure all information from the job has been obtained @@ -330,7 +330,7 @@ int caam_jr_enqueue(struct device *dev, u32 *desc, int head, tail, desc_size; dma_addr_t desc_dma; - desc_size = (*desc & HDR_JD_LENGTH_MASK) * sizeof(u32); + desc_size = (caam32_to_cpu(*desc) & HDR_JD_LENGTH_MASK) * sizeof(u32); desc_dma = dma_map_single(dev, desc, desc_size, DMA_TO_DEVICE); if (dma_mapping_error(dev, desc_dma)) { dev_err(dev, "caam_jr_enqueue(): can't map jobdesc\n"); @@ -356,7 +356,7 @@ int caam_jr_enqueue(struct device *dev, u32 *desc, head_entry->cbkarg = areq; head_entry->desc_addr_dma = desc_dma; - jrp->inpring[jrp->inp_ring_write_index] = desc_dma; + jrp->inpring[jrp->inp_ring_write_index] = wr_dma(desc_dma); /* * Guarantee that the descriptor's DMA address has been written to diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h index a8a79975682f..d3e93e066cd8 100644 --- a/drivers/crypto/caam/regs.h +++ b/drivers/crypto/caam/regs.h @@ -65,7 +65,7 @@ * */ -#ifdef CONFIG_ARM +#if defined(CONFIG_ARM) || defined(CONFIG_ARM64) /* These are common macros for Power, put here for ARM */ #define setbits32(_addr, _v) writel((readl(_addr) | (_v)), (_addr)) #define clrbits32(_addr, _v) writel((readl(_addr) & ~(_v)), (_addr)) @@ -86,26 +86,62 @@ #define clrsetbits_le32(addr, clear, set) clrsetbits(le32, addr, clear, set) #endif -#ifdef __BIG_ENDIAN -#define wr_reg32(reg, data) out_be32(reg, data) -#define rd_reg32(reg) in_be32(reg) +#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_LE +#define caam16_to_cpu(value) le16_to_cpu(value) +#define cpu_to_caam16(value) cpu_to_le16(value) +#define caam32_to_cpu(value) le32_to_cpu(value) +#define cpu_to_caam32(value) cpu_to_le32(value) +#define caam64_to_cpu(value) le64_to_cpu(value) +#define cpu_to_caam64(value) cpu_to_le64(value) +#define wr_reg32(reg, data) iowrite32(data, reg) +#define rd_reg32(reg) ioread32(reg) +#define clrsetbits_32(addr, clear, set) clrsetbits_le32(addr, clear, set) +#else +#define caam16_to_cpu(value) be16_to_cpu(value) +#define cpu_to_caam16(value) cpu_to_be16(value) +#define caam32_to_cpu(value) be32_to_cpu(value) +#define cpu_to_caam32(value) cpu_to_be32(value) +#define caam64_to_cpu(value) be64_to_cpu(value) +#define cpu_to_caam64(value) cpu_to_be64(value) +#define wr_reg32(reg, data) iowrite32be(data, reg) +#define rd_reg32(reg) ioread32be(reg) #define clrsetbits_32(addr, clear, set) clrsetbits_be32(addr, clear, set) -#ifdef CONFIG_64BIT -#define wr_reg64(reg, data) out_be64(reg, data) -#define rd_reg64(reg) in_be64(reg) #endif + +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT +#ifdef CONFIG_SOC_IMX7D +#define wr_dma(value) (((u64)cpu_to_caam32(lower_32_bits(value)) << 32) | \ + (u64)cpu_to_caam32(higher_32_bits(value))) +#define rd_dma(value) (((u64)caam32_to_cpu(lower_32_bits(value)) << 32) | \ + (u64)caam32_to_cpu(higher_32_bits(value))) #else -#ifdef __LITTLE_ENDIAN -#define wr_reg32(reg, data) __raw_writel(data, reg) -#define rd_reg32(reg) __raw_readl(reg) -#define clrsetbits_32(addr, clear, set) clrsetbits_le32(addr, clear, set) -#ifdef CONFIG_64BIT -#define wr_reg64(reg, data) __raw_writeq(data, reg) -#define rd_reg64(reg) __raw_readq(reg) -#endif -#endif +#define wr_dma(value) cpu_to_caam64(value) +#define rd_dma(value) caam64_to_cpu(value) +#endif /* CONFIG_SOC_IMX7D */ +#else +#define wr_dma(value) cpu_to_caam32(value) +#define rd_dma(value) caam32_to_cpu(value) #endif +#ifdef CONFIG_64BIT +#ifdef CONFIG_PPC +#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_LE +#define wr_reg64(reg, data) out_le64(reg, data) +#define rd_reg64(reg) in_le64(reg) +#else +#define wr_reg64(reg, data) out_be64(reg, data) +#define rd_reg64(reg) in_be64(reg) +#endif /* CONFIG_CRYPTO_DEV_FSL_CAAM_LE */ +#else /* CONFIG_PPC */ +#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_LE +#define wr_reg64(reg, data) writeq(data, reg) +#define rd_reg64(reg) readq(reg) +#else +#define wr_reg64(reg, data) iowrite64be(data, reg) +#define rd_reg64(reg) ioread64be(reg) +#endif /* CONFIG_CRYPTO_DEV_FSL_CAAM_LE */ +#endif /* CONFIG_PPC */ +#else /* CONFIG_64BIT */ /* * The only users of these wr/rd_reg64 functions is the Job Ring (JR). * The DMA address registers in the JR are handled differently depending on @@ -123,8 +159,6 @@ * base + 0x0000 : least-significant 32 bits * base + 0x0004 : most-significant 32 bits */ - -#ifndef CONFIG_64BIT #if !defined(CONFIG_CRYPTO_DEV_FSL_CAAM_LE) || \ defined(CONFIG_CRYPTO_DEV_FSL_CAAM_IMX) #define REG64_MS32(reg) ((u32 __iomem *)(reg)) @@ -145,7 +179,7 @@ static inline u64 rd_reg64(u64 __iomem *reg) return ((u64)rd_reg32(REG64_MS32(reg)) << 32 | (u64)rd_reg32(REG64_LS32(reg))); } -#endif +#endif /* CONFIG_64BIT */ /* * jr_outentry diff --git a/drivers/crypto/caam/sg_sw_sec4.h b/drivers/crypto/caam/sg_sw_sec4.h index 18cd6d1f5870..fb8e0084e3e5 100644 --- a/drivers/crypto/caam/sg_sw_sec4.h +++ b/drivers/crypto/caam/sg_sw_sec4.h @@ -5,6 +5,8 @@ * */ +#include "regs.h" + struct sec4_sg_entry; /* @@ -13,10 +15,10 @@ struct sec4_sg_entry; static inline void dma_to_sec4_sg_one(struct sec4_sg_entry *sec4_sg_ptr, dma_addr_t dma, u32 len, u32 offset) { - sec4_sg_ptr->ptr = dma; - sec4_sg_ptr->len = len; + sec4_sg_ptr->ptr = wr_dma(dma); + sec4_sg_ptr->len = cpu_to_caam32(len); sec4_sg_ptr->buf_pool_id = 0; - sec4_sg_ptr->offset = offset; + sec4_sg_ptr->offset = cpu_to_caam16(offset & SEC4_SG_OFFS_MASK); #ifdef DEBUG print_hex_dump(KERN_ERR, "sec4_sg_ptr@: ", DUMP_PREFIX_ADDRESS, 16, 4, sec4_sg_ptr, @@ -51,7 +53,7 @@ static inline void sg_to_sec4_sg_last(struct scatterlist *sg, int sg_count, u32 offset) { sec4_sg_ptr = sg_to_sec4_sg(sg, sg_count, sec4_sg_ptr, offset); - sec4_sg_ptr->len |= SEC4_SG_LEN_FIN; + sec4_sg_ptr->len |= cpu_to_caam32(SEC4_SG_LEN_FIN); } static inline struct sec4_sg_entry *sg_to_sec4_sg_len( -- 2.4.4 -- To unsubscribe from this list: send the line "unsubscribe linux-crypto" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html