Dear Dave, Thanks for your review. will refactor this patch per your suggestions. On 8/2/23 22:20, Dave Hansen wrote: > This code looks pretty rough. > >> +static int zhaoxin_sha1_update(struct shash_desc *desc, >> + const u8 *data, unsigned int len) >> +{ >> + struct sha1_state *sctx = shash_desc_ctx(desc); >> + unsigned int partial, done; >> + const u8 *src; >> + /*The PHE require the out buffer must 128 bytes and 16-bytes aligned*/ >> + u8 buf[128 + ZHAOXIN_SHA_ALIGNMENT - STACK_ALIGN] __attribute__ >> + ((aligned(STACK_ALIGN))); >> + u8 *dst = PTR_ALIGN(&buf[0], ZHAOXIN_SHA_ALIGNMENT); > > All of the different alignments here are pretty dazzling. > >> + partial = sctx->count & 0x3f; > > "0x3f" is a random magic number. > >> + sctx->count += len; >> + done = 0; >> + src = data; >> + memcpy(dst, (u8 *)(sctx->state), SHA1_DIGEST_SIZE); >> + >> + if ((partial + len) >= SHA1_BLOCK_SIZE) { >> + >> + /* Append the bytes in state's buffer to a block to handle */ >> + if (partial) { >> + done = -partial; >> + memcpy(sctx->buffer + partial, data, >> + done + SHA1_BLOCK_SIZE); >> + src = sctx->buffer; >> + asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" >> + : "+S"(src), "+D"(dst) >> + : "a"((long)-1), "c"(1UL)); > > Please look around the codebase for examples on how to do this. We > usually try to use real instructions when binutils supports them and > also don't repeatedly open-code the ".byte ...". > >> + done += SHA1_BLOCK_SIZE; >> + src = data + done; >> + } >> + >> + /* Process the left bytes from the input data */ >> + if (len - done >= SHA1_BLOCK_SIZE) { >> + asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" >> + : "+S"(src), "+D"(dst) >> + : "a"((long)-1), >> + "c"((unsigned long)((len - done) / SHA1_BLOCK_SIZE))); >> + done += ((len - done) - (len - done) % SHA1_BLOCK_SIZE); >> + src = data + done; >> + } >> + partial = 0; >> + } >> + memcpy((u8 *)(sctx->state), dst, SHA1_DIGEST_SIZE); > > What's the purpose of the cast? > >> + memcpy(sctx->buffer + partial, src, len - done); >> + >> + return 0; >> +} >> + >> +static int zhaoxin_sha1_final(struct shash_desc *desc, u8 *out) >> +{ >> + struct sha1_state *state = (struct sha1_state *)shash_desc_ctx(desc); > > What's the purpose of *this* cast? > >> + unsigned int partial, padlen; >> + __be64 bits; >> + static const u8 padding[64] = { 0x80, }; >> + >> + bits = cpu_to_be64(state->count << 3); >> + >> + /* Pad out to 56 mod 64 */ >> + partial = state->count & 0x3f; >> + padlen = (partial < 56) ? (56 - partial) : ((64+56) - partial); >> + zhaoxin_sha1_update(desc, padding, padlen); >> + >> + /* Append length field bytes */ >> + zhaoxin_sha1_update(desc, (const u8 *)&bits, sizeof(bits)); >> + >> + /* Swap to output */ >> + zhaoxin_output_block((uint32_t *)(state->state), (uint32_t *)out, 5); >> + >> + return 0; >> +} >> + >> +static int zhaoxin_sha256_init(struct shash_desc *desc) >> +{ >> + struct sha256_state *sctx = shash_desc_ctx(desc); >> + >> + *sctx = (struct sha256_state){ >> + .state = { SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, >> + SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7}, >> + }; >> + >> + return 0; >> +} >> + >> +static int zhaoxin_sha256_update(struct shash_desc *desc, const u8 *data, >> + unsigned int len) >> +{ >> + struct sha256_state *sctx = shash_desc_ctx(desc); >> + unsigned int partial, done; >> + const u8 *src; >> + /*The PHE require the out buffer must 128 bytes and 16-bytes aligned*/ >> + u8 buf[128 + ZHAOXIN_SHA_ALIGNMENT - STACK_ALIGN] __attribute__ >> + ((aligned(STACK_ALIGN))); >> + u8 *dst = PTR_ALIGN(&buf[0], ZHAOXIN_SHA_ALIGNMENT); >> + >> + partial = sctx->count & 0x3f; >> + sctx->count += len; >> + done = 0; >> + src = data; >> + memcpy(dst, (u8 *)(sctx->state), SHA256_DIGEST_SIZE); > > That looks familiar. > > This patch needs some serious cleanups and refactoring. It seems to be > missing even the basics like avoiding copy-and-pasting code. The > changelog is quite sparse. > > Could you spend some more time on this and give it another go, please?