> On 20 Mar 2025, at 5:44 AM, Ethan Carter Edwards <ethan@xxxxxxxxxxxxxxxxx> wrote: > > This library was originally developed by Apple and is BSD3 licensed. It > allows for the reading and writing of LZFSE and LZVN compressed files > on APFS filesystems. Encode functionality is not included as the > filesystem implementation does not yet support it but could be easily > added later if needed. > > Signed-off-by: Ethan Carter Edwards <ethan@xxxxxxxxxxxxxxxxx> > --- > drivers/staging/apfs/lzfse/lzfse.h | 86 +++ > drivers/staging/apfs/lzfse/lzfse_decode.c | 66 +++ > drivers/staging/apfs/lzfse/lzfse_decode_base.c | 725 +++++++++++++++++++++++ > drivers/staging/apfs/lzfse/lzfse_encode_tables.h | 224 +++++++ > drivers/staging/apfs/lzfse/lzfse_fse.c | 214 +++++++ > drivers/staging/apfs/lzfse/lzfse_fse.h | 632 ++++++++++++++++++++ > drivers/staging/apfs/lzfse/lzfse_internal.h | 599 +++++++++++++++++++ > drivers/staging/apfs/lzfse/lzfse_tunables.h | 50 ++ > drivers/staging/apfs/lzfse/lzvn_decode_base.c | 721 ++++++++++++++++++++++ > drivers/staging/apfs/lzfse/lzvn_decode_base.h | 53 ++ > drivers/staging/apfs/lzfse/lzvn_encode_base.h | 105 ++++ > 11 files changed, 3475 insertions(+) > > diff --git a/drivers/staging/apfs/lzfse/lzfse.h b/drivers/staging/apfs/lzfse/lzfse.h > new file mode 100644 > index 0000000000000000000000000000000000000000..573c85b8c9a476291cd8ef89833ae13caf5eaeb6 > --- /dev/null > +++ b/drivers/staging/apfs/lzfse/lzfse.h > @@ -0,0 +1,86 @@ > +/* SPDX-License-Identifier: BSD-3-Clause */ > +/* > + * Copyright (c) 2015-2016, Apple Inc. All rights reserved. > + * > + */ > + > +#ifndef LZFSE_H > +#define LZFSE_H > + > +#include <linux/stddef.h> > +#include <linux/types.h> > + > +/*! @abstract Get the required scratch buffer size to compress using LZFSE. */ > +size_t lzfse_encode_scratch_size(void); > + > +/*! @abstract Compress a buffer using LZFSE. > + * > + * @param dst_buffer > + * Pointer to the first byte of the destination buffer. > + * > + * @param dst_size > + * Size of the destination buffer in bytes. > + * > + * @param src_buffer > + * Pointer to the first byte of the source buffer. > + * > + * @param src_size > + * Size of the source buffer in bytes. > + * > + * @param scratch_buffer > + * If non-NULL, a pointer to scratch space for the routine to use as workspace; > + * the routine may use up to lzfse_encode_scratch_size( ) bytes of workspace > + * during its operation, and will not perform any internal allocations. If > + * NULL, the routine may allocate its own memory to use during operation via > + * a single call to malloc( ), and will release it by calling free( ) prior > + * to returning. For most use, passing NULL is perfectly satisfactory, but if > + * you require strict control over allocation, you will want to pass an > + * explicit scratch buffer. > + * > + * @return > + * The number of bytes written to the destination buffer if the input is > + * successfully compressed. If the input cannot be compressed to fit into > + * the provided buffer, or an error occurs, zero is returned, and the > + * contents of dst_buffer are unspecified. */ > +size_t lzfse_encode_buffer(uint8_t *__restrict dst_buffer, size_t dst_size, > + const uint8_t *__restrict src_buffer, > + size_t src_size, void *__restrict scratch_buffer); > + > +/*! @abstract Get the required scratch buffer size to decompress using LZFSE. */ > +size_t lzfse_decode_scratch_size(void); > + > +/*! @abstract Decompress a buffer using LZFSE. > + * > + * @param dst_buffer > + * Pointer to the first byte of the destination buffer. > + * > + * @param dst_size > + * Size of the destination buffer in bytes. > + * > + * @param src_buffer > + * Pointer to the first byte of the source buffer. > + * > + * @param src_size > + * Size of the source buffer in bytes. > + * > + * @param scratch_buffer > + * If non-NULL, a pointer to scratch space for the routine to use as workspace; > + * the routine may use up to lzfse_decode_scratch_size( ) bytes of workspace > + * during its operation, and will not perform any internal allocations. If > + * NULL, the routine may allocate its own memory to use during operation via > + * a single call to malloc( ), and will release it by calling free( ) prior > + * to returning. For most use, passing NULL is perfectly satisfactory, but if > + * you require strict control over allocation, you will want to pass an > + * explicit scratch buffer. > + * > + * @return > + * The number of bytes written to the destination buffer if the input is > + * successfully decompressed. If there is not enough space in the destination > + * buffer to hold the entire expanded output, only the first dst_size bytes > + * will be written to the buffer and dst_size is returned. Note that this > + * behavior differs from that of lzfse_encode_buffer. */ > +size_t lzfse_decode_buffer(uint8_t *__restrict dst_buffer, size_t dst_size, > + const uint8_t *__restrict src_buffer, > + size_t src_size, void *__restrict scratch_buffer); > + > +#endif /* LZFSE_H */ > diff --git a/drivers/staging/apfs/lzfse/lzfse_decode.c b/drivers/staging/apfs/lzfse/lzfse_decode.c > new file mode 100644 > index 0000000000000000000000000000000000000000..ef7d3487343af5387a9a5dacc8ed95212ec641c0 > --- /dev/null > +++ b/drivers/staging/apfs/lzfse/lzfse_decode.c > @@ -0,0 +1,66 @@ > +// SPDX-License-Identifier: BSD-3-Clause > +/* > + * Copyright (c) 2015-2016, Apple Inc. All rights reserved. > + * > + */ > + > +/* LZFSE decode API > + */ > + > +#include <linux/slab.h> > +#include "lzfse.h" > +#include "lzfse_internal.h" > + > +size_t lzfse_decode_scratch_size(void) > +{ > + return sizeof(lzfse_decoder_state); > +} > + > +static size_t lzfse_decode_buffer_with_scratch( > + uint8_t *__restrict dst_buffer, size_t dst_size, > + const uint8_t *__restrict src_buffer, size_t src_size, > + void *__restrict scratch_buffer) > +{ > + int status; > + lzfse_decoder_state *s = (lzfse_decoder_state *)scratch_buffer; > + memset(s, 0x00, sizeof(*s)); > + > + // Initialize state This is not Linux styled comment. This is: /* Initialize state */ Are you sure you ran check patch? That is something very basic you should do before submitting another revision. I would suggest running sparse as well, and fix any errors if you get them. > + s->src = src_buffer; > + s->src_begin = src_buffer; > + s->src_end = s->src + src_size; > + s->dst = dst_buffer; > + s->dst_begin = dst_buffer; > + s->dst_end = dst_buffer + dst_size; > + > + // Decode > + status = lzfse_decode(s); > + if (status == LZFSE_STATUS_DST_FULL) > + return dst_size; > + if (status != LZFSE_STATUS_OK) > + return 0; // failed > + return (size_t)(s->dst - dst_buffer); // bytes written > +} > + > +size_t lzfse_decode_buffer(uint8_t *__restrict dst_buffer, size_t dst_size, > + const uint8_t *__restrict src_buffer, > + size_t src_size, void *__restrict scratch_buffer) > +{ > + int has_malloc = 0; > + size_t ret = 0; > + > + // Deal with the possible NULL pointer > + if (scratch_buffer == NULL) { > + // +1 in case scratch size could be zero > + scratch_buffer = > + kmalloc(lzfse_decode_scratch_size() + 1, GFP_KERNEL); > + has_malloc = 1; > + } > + if (scratch_buffer == NULL) > + return 0; > + ret = lzfse_decode_buffer_with_scratch(dst_buffer, dst_size, src_buffer, > + src_size, scratch_buffer); > + if (has_malloc) > + kfree(scratch_buffer); > + return ret; > +} > diff --git a/drivers/staging/apfs/lzfse/lzfse_decode_base.c b/drivers/staging/apfs/lzfse/lzfse_decode_base.c > new file mode 100644 > index 0000000000000000000000000000000000000000..5fb9e374b40e7e7c11a27e2e3baf79886b3f0474 > --- /dev/null > +++ b/drivers/staging/apfs/lzfse/lzfse_decode_base.c > @@ -0,0 +1,725 @@ > +// SPDX-License-Identifier: BSD-3-Clause > +/* > + * Copyright (c) 2015-2016, Apple Inc. All rights reserved. > + * > + */ > + > +#include "lzfse_internal.h" > +#include "lzvn_decode_base.h" > + > +/*! @abstract Decode an entry value from next bits of stream. > + * Return \p value, and set \p *nbits to the number of bits to consume > + * (starting with LSB). */ > +static inline int lzfse_decode_v1_freq_value(uint32_t bits, int *nbits) > +{ > + static const int8_t lzfse_freq_nbits_table[32] = { > + 2, 3, 2, 5, 2, 3, 2, 8, 2, 3, 2, 5, 2, 3, 2, 14, > + 2, 3, 2, 5, 2, 3, 2, 8, 2, 3, 2, 5, 2, 3, 2, 14 > + }; > + static const int8_t lzfse_freq_value_table[32] = { > + 0, 2, 1, 4, 0, 3, 1, -1, 0, 2, 1, 5, 0, 3, 1, -1, > + 0, 2, 1, 6, 0, 3, 1, -1, 0, 2, 1, 7, 0, 3, 1, -1 > + }; > + > + uint32_t b = bits & 31; // lower 5 bits > + int n = lzfse_freq_nbits_table[b]; > + *nbits = n; > + > + // Special cases for > 5 bits encoding > + if (n == 8) > + return 8 + ((bits >> 4) & 0xf); > + if (n == 14) > + return 24 + ((bits >> 4) & 0x3ff); > + > + // <= 5 bits encoding from table > + return lzfse_freq_value_table[b]; > +} > + > +/*! @abstract Extracts up to 32 bits from a 64-bit field beginning at > + * \p offset, and zero-extends them to a \p uint32_t. > + * > + * If we number the bits of \p v from 0 (least significant) to 63 (most > + * significant), the result is bits \p offset to \p offset+nbits-1. */ > +static inline uint32_t get_field(uint64_t v, int offset, int nbits) > +{ > + if (nbits == 32) > + return (uint32_t)(v >> offset); > + return (uint32_t)((v >> offset) & ((1 << nbits) - 1)); > +} > + > +/*! @abstract Return \c header_size field from a \c lzfse_compressed_block_header_v2. */ > +static inline uint32_t > +lzfse_decode_v2_header_size(const lzfse_compressed_block_header_v2 *in) > +{ > + return get_field(in->packed_fields[2], 0, 32); > +} > + > +/*! @abstract Decode all fields from a \c lzfse_compressed_block_header_v2 to a > + * \c lzfse_compressed_block_header_v1. > + * @return 0 on success. > + * @return -1 on failure. */ > +static inline int lzfse_decode_v1(lzfse_compressed_block_header_v1 *out, > + const lzfse_compressed_block_header_v2 *in) > +{ > + uint64_t v0; > + uint64_t v1; > + uint64_t v2; > + uint16_t *dst = NULL; > + const uint8_t *src = NULL; > + const uint8_t *src_end = NULL; > + uint32_t accum = 0; > + int accum_nbits = 0; > + int nbits = 0; > + int i; > + > + // Clear all fields > + memset(out, 0x00, sizeof(lzfse_compressed_block_header_v1)); > + > + v0 = in->packed_fields[0]; > + v1 = in->packed_fields[1]; > + v2 = in->packed_fields[2]; > + > + out->magic = LZFSE_COMPRESSEDV1_BLOCK_MAGIC; > + out->n_raw_bytes = in->n_raw_bytes; > + > + // Literal state > + out->n_literals = get_field(v0, 0, 20); > + out->n_literal_payload_bytes = get_field(v0, 20, 20); > + out->literal_bits = (int)get_field(v0, 60, 3) - 7; > + out->literal_state[0] = get_field(v1, 0, 10); > + out->literal_state[1] = get_field(v1, 10, 10); > + out->literal_state[2] = get_field(v1, 20, 10); > + out->literal_state[3] = get_field(v1, 30, 10); > + > + // L,M,D state > + out->n_matches = get_field(v0, 40, 20); > + out->n_lmd_payload_bytes = get_field(v1, 40, 20); > + out->lmd_bits = (int)get_field(v1, 60, 3) - 7; > + out->l_state = get_field(v2, 32, 10); > + out->m_state = get_field(v2, 42, 10); > + out->d_state = get_field(v2, 52, 10); > + > + // Total payload size > + out->n_payload_bytes = > + out->n_literal_payload_bytes + out->n_lmd_payload_bytes; > + > + // Freq tables > + dst = &(out->l_freq[0]); > + src = &(in->freq[0]); > + src_end = (const uint8_t *)in + > + get_field(v2, 0, 32); // first byte after header > + accum = 0; > + accum_nbits = 0; > + > + // No freq tables? > + if (src_end == src) > + return 0; // OK, freq tables were omitted > + > + for (i = 0; > + i < LZFSE_ENCODE_L_SYMBOLS + LZFSE_ENCODE_M_SYMBOLS + > + LZFSE_ENCODE_D_SYMBOLS + LZFSE_ENCODE_LITERAL_SYMBOLS; > + i++) { > + // Refill accum, one byte at a time, until we reach end of header, or accum > + // is full > + while (src < src_end && accum_nbits + 8 <= 32) { > + accum |= (uint32_t)(*src) << accum_nbits; > + accum_nbits += 8; > + src++; > + } > + > + // Decode and store value > + nbits = 0; > + dst[i] = lzfse_decode_v1_freq_value(accum, &nbits); > + > + if (nbits > accum_nbits) > + return -1; // failed > + > + // Consume nbits bits > + accum >>= nbits; > + accum_nbits -= nbits; > + } > + > + if (accum_nbits >= 8 || src != src_end) > + return -1; // we need to end up exactly at the end of header, with less than > + // 8 bits in accumulator > + > + return 0; > +} > + > +static inline void copy(uint8_t *dst, const uint8_t *src, size_t length) > +{ > + const uint8_t *dst_end = dst + length; > + do { > + copy8(dst, src); > + dst += 8; > + src += 8; > + } while (dst < dst_end); > +} > + > +static int lzfse_decode_lmd(lzfse_decoder_state *s) > +{ > + lzfse_compressed_block_decoder_state *bs = > + &(s->compressed_lzfse_block_state); > + fse_state l_state = bs->l_state; > + fse_state m_state = bs->m_state; > + fse_state d_state = bs->d_state; > + fse_in_stream in = bs->lmd_in_stream; > + const uint8_t *src_start = s->src_begin; > + const uint8_t *src = s->src + bs->lmd_in_buf; > + const uint8_t *lit = bs->current_literal; > + uint8_t *dst = s->dst; > + uint32_t symbols = bs->n_matches; > + int32_t L = bs->l_value; > + int32_t M = bs->m_value; > + int32_t D = bs->d_value; > + int32_t new_d; > + > + // Number of bytes remaining in the destination buffer, minus 32 to > + // provide a margin of safety for using overlarge copies on the fast path. > + // This is a signed quantity, and may go negative when we are close to the > + // end of the buffer. That's OK; we're careful about how we handle it > + // in the slow-and-careful match execution path. > + ptrdiff_t remaining_bytes = s->dst_end - dst - 32; > + > + // If L or M is non-zero, that means that we have already started decoding > + // this block, and that we needed to interrupt decoding to get more space > + // from the caller. There's a pending L, M, D triplet that we weren't > + // able to completely process. Jump ahead to finish executing that symbol > + // before decoding new values. > + if (L || M) > + goto ExecuteMatch; > + > + while (symbols > 0) { > + int res; > + // Decode the next L, M, D symbol from the input stream. > + res = fse_in_flush(&in, &src, src_start); > + if (res) { > + return LZFSE_STATUS_ERROR; > + } > + L = fse_value_decode(&l_state, bs->l_decoder, &in); > + if ((lit + L) >= > + (bs->literals + LZFSE_LITERALS_PER_BLOCK + 64)) { > + return LZFSE_STATUS_ERROR; > + } > + res = fse_in_flush2(&in, &src, src_start); > + if (res) { > + return LZFSE_STATUS_ERROR; > + } > + M = fse_value_decode(&m_state, bs->m_decoder, &in); > + res = fse_in_flush2(&in, &src, src_start); > + if (res) { > + return LZFSE_STATUS_ERROR; > + } > + new_d = fse_value_decode(&d_state, bs->d_decoder, &in); > + D = new_d ? new_d : D; > + symbols--; > + > +ExecuteMatch: > + // Error if D is out of range, so that we avoid passing through > + // uninitialized data or accesssing memory out of the destination > + // buffer. > + if ((uint32_t)D > dst + L - s->dst_begin) > + return LZFSE_STATUS_ERROR; > + > + if (L + M <= remaining_bytes) { > + size_t i; > + // If we have plenty of space remaining, we can copy the literal > + // and match with 16- and 32-byte operations, without worrying > + // about writing off the end of the buffer. > + remaining_bytes -= L + M; > + copy(dst, lit, L); > + dst += L; > + lit += L; > + // For the match, we have two paths; a fast copy by 16-bytes if > + // the match distance is large enough to allow it, and a more > + // careful path that applies a permutation to account for the > + // possible overlap between source and destination if the distance > + // is small. > + if (D >= 8 || D >= M) > + copy(dst, dst - D, M); > + else > + for (i = 0; i < M; i++) > + dst[i] = dst[i - D]; > + dst += M; > + } > + > + else { > + // Otherwise, we are very close to the end of the destination > + // buffer, so we cannot use wide copies that slop off the end > + // of the region that we are copying to. First, we restore > + // the true length remaining, rather than the sham value we've > + // been using so far. > + remaining_bytes += 32; > + // Now, we process the literal. Either there's space for it > + // or there isn't; if there is, we copy the whole thing and > + // update all the pointers and lengths to reflect the copy. > + if (L <= remaining_bytes) { > + size_t i; > + for (i = 0; i < L; i++) > + dst[i] = lit[i]; > + dst += L; > + lit += L; > + remaining_bytes -= L; > + L = 0; > + } > + // There isn't enough space to fit the whole literal. Copy as > + // much of it as we can, update the pointers and the value of > + // L, and report that the destination buffer is full. Note that > + // we always write right up to the end of the destination buffer. > + else { > + size_t i; > + for (i = 0; i < remaining_bytes; i++) > + dst[i] = lit[i]; > + dst += remaining_bytes; > + lit += remaining_bytes; > + L -= remaining_bytes; > + goto DestinationBufferIsFull; > + } > + // The match goes just like the literal does. We copy as much as > + // we can byte-by-byte, and if we reach the end of the buffer > + // before finishing, we return to the caller indicating that > + // the buffer is full. > + if (M <= remaining_bytes) { > + size_t i; > + for (i = 0; i < M; i++) > + dst[i] = dst[i - D]; > + dst += M; > + remaining_bytes -= M; > + M = 0; > + (void)M; // no dead store warning > + // We don't need to update M = 0, because there's no partial > + // symbol to continue executing. Either we're at the end of > + // the block, in which case we will never need to resume with > + // this state, or we're going to decode another L, M, D set, > + // which will overwrite M anyway. > + // > + // But we still set M = 0, to maintain the post-condition. > + } else { > + size_t i; > + for (i = 0; i < remaining_bytes; i++) > + dst[i] = dst[i - D]; > + dst += remaining_bytes; > + M -= remaining_bytes; > +DestinationBufferIsFull: > + // Because we want to be able to resume decoding where we've left > + // off (even in the middle of a literal or match), we need to > + // update all of the block state fields with the current values > + // so that we can resume execution from this point once the > + // caller has given us more space to write into. > + bs->l_value = L; > + bs->m_value = M; > + bs->d_value = D; > + bs->l_state = l_state; > + bs->m_state = m_state; > + bs->d_state = d_state; > + bs->lmd_in_stream = in; > + bs->n_matches = symbols; > + bs->lmd_in_buf = (uint32_t)(src - s->src); > + bs->current_literal = lit; > + s->dst = dst; > + return LZFSE_STATUS_DST_FULL; > + } > + // Restore the "sham" decremented value of remaining_bytes and > + // continue to the next L, M, D triple. We'll just be back in > + // the careful path again, but this only happens at the very end > + // of the buffer, so a little minor inefficiency here is a good > + // tradeoff for simpler code. > + remaining_bytes -= 32; > + } > + } > + // Because we've finished with the whole block, we don't need to update > + // any of the blockstate fields; they will not be used again. We just > + // update the destination pointer in the state object and return. > + s->dst = dst; > + return LZFSE_STATUS_OK; > +} > + > +int lzfse_decode(lzfse_decoder_state *s) > +{ > + while (1) { > + // Are we inside a block? > + switch (s->block_magic) { > + case LZFSE_NO_BLOCK_MAGIC: { > + uint32_t magic; > + // We need at least 4 bytes of magic number to identify next block > + if (s->src + 4 > s->src_end) > + return LZFSE_STATUS_SRC_EMPTY; // SRC truncated > + magic = load4(s->src); > + > + if (magic == LZFSE_ENDOFSTREAM_BLOCK_MAGIC) { > + s->src += 4; > + s->end_of_stream = 1; > + return LZFSE_STATUS_OK; // done > + } > + > + if (magic == LZFSE_UNCOMPRESSED_BLOCK_MAGIC) { > + uncompressed_block_decoder_state *bs = NULL; > + if (s->src + sizeof(uncompressed_block_header) > > + s->src_end) > + return LZFSE_STATUS_SRC_EMPTY; // SRC truncated > + // Setup state for uncompressed block > + bs = &(s->uncompressed_block_state); > + bs->n_raw_bytes = load4( > + s->src + > + offsetof(uncompressed_block_header, > + n_raw_bytes)); > + s->src += sizeof(uncompressed_block_header); > + s->block_magic = magic; > + break; > + } > + > + if (magic == LZFSE_COMPRESSEDLZVN_BLOCK_MAGIC) { > + lzvn_compressed_block_decoder_state *bs = NULL; > + if (s->src + > + sizeof(lzvn_compressed_block_header) > > + s->src_end) > + return LZFSE_STATUS_SRC_EMPTY; // SRC truncated > + // Setup state for compressed LZVN block > + bs = &(s->compressed_lzvn_block_state); > + bs->n_raw_bytes = load4( > + s->src + > + offsetof(lzvn_compressed_block_header, > + n_raw_bytes)); > + bs->n_payload_bytes = load4( > + s->src + > + offsetof(lzvn_compressed_block_header, > + n_payload_bytes)); > + bs->d_prev = 0; > + s->src += sizeof(lzvn_compressed_block_header); > + s->block_magic = magic; > + break; > + } > + > + if (magic == LZFSE_COMPRESSEDV1_BLOCK_MAGIC || > + magic == LZFSE_COMPRESSEDV2_BLOCK_MAGIC) { > + lzfse_compressed_block_header_v1 header1; > + size_t header_size = 0; > + lzfse_compressed_block_decoder_state *bs = NULL; > + > + // Decode compressed headers > + if (magic == LZFSE_COMPRESSEDV2_BLOCK_MAGIC) { > + const lzfse_compressed_block_header_v2 > + *header2; > + int decodeStatus; > + // Check we have the fixed part of the structure > + if (s->src + > + offsetof( > + lzfse_compressed_block_header_v2, > + freq) > > + s->src_end) > + return LZFSE_STATUS_SRC_EMPTY; // SRC truncated > + > + // Get size, and check we have the entire structure > + header2 = > + (const lzfse_compressed_block_header_v2 > + *)s > + ->src; // not aligned, OK > + header_size = > + lzfse_decode_v2_header_size( > + header2); > + if (s->src + header_size > s->src_end) > + return LZFSE_STATUS_SRC_EMPTY; // SRC truncated > + decodeStatus = lzfse_decode_v1(&header1, > + header2); > + if (decodeStatus != 0) > + return LZFSE_STATUS_ERROR; // failed > + } else { > + if (s->src + > + sizeof(lzfse_compressed_block_header_v1) > > + s->src_end) > + return LZFSE_STATUS_SRC_EMPTY; // SRC truncated > + memcpy(&header1, s->src, > + sizeof(lzfse_compressed_block_header_v1)); > + header_size = sizeof( > + lzfse_compressed_block_header_v1); > + } > + > + // We require the header + entire encoded block to be present in SRC > + // during the entire block decoding. > + // This can be relaxed somehow, if it becomes a limiting factor, at the > + // price of a more complex state maintenance. > + // For DST, we can't easily require space for the entire decoded block, > + // because it may expand to something very very large. > + if (s->src + header_size + > + header1.n_literal_payload_bytes + > + header1.n_lmd_payload_bytes > > + s->src_end) > + return LZFSE_STATUS_SRC_EMPTY; // need all encoded block > + > + // Sanity checks > + if (lzfse_check_block_header_v1(&header1) != > + 0) { > + return LZFSE_STATUS_ERROR; > + } > + > + // Skip header > + s->src += header_size; > + > + // Setup state for compressed V1 block from header > + bs = &(s->compressed_lzfse_block_state); > + bs->n_lmd_payload_bytes = > + header1.n_lmd_payload_bytes; > + bs->n_matches = header1.n_matches; > + fse_init_decoder_table( > + LZFSE_ENCODE_LITERAL_STATES, > + LZFSE_ENCODE_LITERAL_SYMBOLS, > + header1.literal_freq, > + bs->literal_decoder); > + fse_init_value_decoder_table( > + LZFSE_ENCODE_L_STATES, > + LZFSE_ENCODE_L_SYMBOLS, header1.l_freq, > + l_extra_bits, l_base_value, > + bs->l_decoder); > + fse_init_value_decoder_table( > + LZFSE_ENCODE_M_STATES, > + LZFSE_ENCODE_M_SYMBOLS, header1.m_freq, > + m_extra_bits, m_base_value, > + bs->m_decoder); > + fse_init_value_decoder_table( > + LZFSE_ENCODE_D_STATES, > + LZFSE_ENCODE_D_SYMBOLS, header1.d_freq, > + d_extra_bits, d_base_value, > + bs->d_decoder); > + > + // Decode literals > + { > + fse_in_stream in; > + const uint8_t *buf_start = s->src_begin; > + const uint8_t *buf; > + fse_state state0; > + fse_state state1; > + fse_state state2; > + fse_state state3; > + uint32_t i; > + > + s->src += > + header1.n_literal_payload_bytes; // skip literal payload > + buf = s->src; // read bits backwards from the end > + if (fse_in_init(&in, > + header1.literal_bits, > + &buf, buf_start) != 0) > + return LZFSE_STATUS_ERROR; > + > + state0 = header1.literal_state[0]; > + state1 = header1.literal_state[1]; > + state2 = header1.literal_state[2]; > + state3 = header1.literal_state[3]; > + > + for (i = 0; i < header1.n_literals; > + i += > + 4) // n_literals is multiple of 4 > + { > +#if FSE_IOSTREAM_64 > + if (fse_in_flush(&in, &buf, > + buf_start) != > + 0) > + return LZFSE_STATUS_ERROR; // [57, 64] bits > + bs->literals[i + 0] = fse_decode( > + &state0, > + bs->literal_decoder, > + &in); // 10b max > + bs->literals[i + 1] = fse_decode( > + &state1, > + bs->literal_decoder, > + &in); // 10b max > + bs->literals[i + 2] = fse_decode( > + &state2, > + bs->literal_decoder, > + &in); // 10b max > + bs->literals[i + 3] = fse_decode( > + &state3, > + bs->literal_decoder, > + &in); // 10b max > +#else > + if (fse_in_flush(&in, &buf, > + buf_start) != > + 0) > + return LZFSE_STATUS_ERROR; // [25, 23] bits > + bs->literals[i + 0] = fse_decode( > + &state0, > + bs->literal_decoder, > + &in); // 10b max > + bs->literals[i + 1] = fse_decode( > + &state1, > + bs->literal_decoder, > + &in); // 10b max > + if (fse_in_flush(&in, &buf, > + buf_start) != > + 0) > + return LZFSE_STATUS_ERROR; // [25, 23] bits > + bs->literals[i + 2] = fse_decode( > + &state2, > + bs->literal_decoder, > + &in); // 10b max > + bs->literals[i + 3] = fse_decode( > + &state3, > + bs->literal_decoder, > + &in); // 10b max > +#endif > + } > + > + bs->current_literal = bs->literals; > + } // literals > + > + // SRC is not incremented to skip the LMD payload, since we need it > + // during block decode. > + // We will increment SRC at the end of the block only after this point. > + > + // Initialize the L,M,D decode stream, do not start decoding matches > + // yet, and store decoder state > + { > + fse_in_stream in; > + // read bits backwards from the end > + const uint8_t *buf = > + s->src + > + header1.n_lmd_payload_bytes; > + if (fse_in_init(&in, header1.lmd_bits, > + &buf, s->src) != 0) > + return LZFSE_STATUS_ERROR; > + > + bs->l_state = header1.l_state; > + bs->m_state = header1.m_state; > + bs->d_state = header1.d_state; > + bs->lmd_in_buf = > + (uint32_t)(buf - s->src); > + bs->l_value = bs->m_value = 0; > + // Initialize D to an illegal value so we can't erroneously use > + // an uninitialized "previous" value. > + bs->d_value = -1; > + bs->lmd_in_stream = in; > + } > + > + s->block_magic = magic; > + break; > + } > + > + // Here we have an invalid magic number > + return LZFSE_STATUS_ERROR; > + } // LZFSE_NO_BLOCK_MAGIC > + > + case LZFSE_UNCOMPRESSED_BLOCK_MAGIC: { > + uncompressed_block_decoder_state *bs = > + &(s->uncompressed_block_state); > + > + // Compute the size (in bytes) of the data that we will actually copy. > + // This size is minimum(bs->n_raw_bytes, space in src, space in dst). > + > + uint32_t copy_size = > + bs->n_raw_bytes; // bytes left to copy > + size_t src_space, dst_space; > + if (copy_size == 0) { > + s->block_magic = 0; > + break; > + } // end of block > + > + if (s->src_end <= s->src) > + return LZFSE_STATUS_SRC_EMPTY; // need more SRC data > + src_space = s->src_end - s->src; > + if (copy_size > src_space) > + copy_size = (uint32_t) > + src_space; // limit to SRC data (> 0) > + > + if (s->dst_end <= s->dst) > + return LZFSE_STATUS_DST_FULL; // need more DST capacity > + dst_space = s->dst_end - s->dst; > + if (copy_size > dst_space) > + copy_size = (uint32_t) > + dst_space; // limit to DST capacity (> 0) > + > + // Now that we know that the copy size is bounded to the source and > + // dest buffers, go ahead and copy the data. > + // We always have copy_size > 0 here > + memcpy(s->dst, s->src, copy_size); > + s->src += copy_size; > + s->dst += copy_size; > + bs->n_raw_bytes -= copy_size; > + > + break; > + } // LZFSE_UNCOMPRESSED_BLOCK_MAGIC > + > + case LZFSE_COMPRESSEDV1_BLOCK_MAGIC: > + case LZFSE_COMPRESSEDV2_BLOCK_MAGIC: { > + int status; > + lzfse_compressed_block_decoder_state *bs = > + &(s->compressed_lzfse_block_state); > + // Require the entire LMD payload to be in SRC > + if (s->src_end <= s->src || > + bs->n_lmd_payload_bytes > > + (size_t)(s->src_end - s->src)) > + return LZFSE_STATUS_SRC_EMPTY; > + > + status = lzfse_decode_lmd(s); > + if (status != LZFSE_STATUS_OK) > + return status; > + > + s->block_magic = LZFSE_NO_BLOCK_MAGIC; > + s->src += bs->n_lmd_payload_bytes; // to next block > + break; > + } // LZFSE_COMPRESSEDV1_BLOCK_MAGIC || LZFSE_COMPRESSEDV2_BLOCK_MAGIC > + > + case LZFSE_COMPRESSEDLZVN_BLOCK_MAGIC: { > + lzvn_compressed_block_decoder_state *bs = > + &(s->compressed_lzvn_block_state); > + lzvn_decoder_state dstate; > + size_t src_used, dst_used; > + if (bs->n_payload_bytes > 0 && s->src_end <= s->src) > + return LZFSE_STATUS_SRC_EMPTY; // need more SRC data > + > + // Init LZVN decoder state > + memset(&dstate, 0x00, sizeof(dstate)); > + dstate.src = s->src; > + dstate.src_end = s->src_end; > + if (dstate.src_end - s->src > bs->n_payload_bytes) > + dstate.src_end = > + s->src + > + bs->n_payload_bytes; // limit to payload bytes > + dstate.dst_begin = s->dst_begin; > + dstate.dst = s->dst; > + dstate.dst_end = s->dst_end; > + if (dstate.dst_end - s->dst > bs->n_raw_bytes) > + dstate.dst_end = > + s->dst + > + bs->n_raw_bytes; // limit to raw bytes > + dstate.d_prev = bs->d_prev; > + dstate.end_of_stream = 0; > + > + // Run LZVN decoder > + lzvn_decode(&dstate); > + > + // Update our state > + src_used = dstate.src - s->src; > + dst_used = dstate.dst - s->dst; > + if (src_used > bs->n_payload_bytes || > + dst_used > bs->n_raw_bytes) > + return LZFSE_STATUS_ERROR; // sanity check > + s->src = dstate.src; > + s->dst = dstate.dst; > + bs->n_payload_bytes -= (uint32_t)src_used; > + bs->n_raw_bytes -= (uint32_t)dst_used; > + bs->d_prev = (uint32_t)dstate.d_prev; > + > + // Test end of block > + if (bs->n_payload_bytes == 0 && bs->n_raw_bytes == 0 && > + dstate.end_of_stream) { > + s->block_magic = 0; > + break; > + } // block done > + > + // Check for invalid state > + if (bs->n_payload_bytes == 0 || bs->n_raw_bytes == 0 || > + dstate.end_of_stream) > + return LZFSE_STATUS_ERROR; > + > + // Here, block is not done and state is valid, so we need more space in dst. > + return LZFSE_STATUS_DST_FULL; > + } > + > + default: > + return LZFSE_STATUS_ERROR; // invalid magic > + > + } // switch magic > + > + } // block loop > + > + return LZFSE_STATUS_OK; > +} > diff --git a/drivers/staging/apfs/lzfse/lzfse_encode_tables.h b/drivers/staging/apfs/lzfse/lzfse_encode_tables.h > new file mode 100644 > index 0000000000000000000000000000000000000000..da8daac3c8be161a284ca570d60c7903c3703d75 > --- /dev/null > +++ b/drivers/staging/apfs/lzfse/lzfse_encode_tables.h > @@ -0,0 +1,224 @@ > +/* SPDX-License-Identifier: BSD-3-Clause */ > +/* > + * Copyright (c) 2015-2016, Apple Inc. All rights reserved. > + * > + */ > + > +#ifndef LZFSE_ENCODE_TABLES_H > +#define LZFSE_ENCODE_TABLES_H > + > +static inline uint8_t l_base_from_value(int32_t value) > +{ > + static const uint8_t sym[LZFSE_ENCODE_MAX_L_VALUE + 1] = { > + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, > + 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, > + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, > + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19 > + }; > + return sym[value]; > +} > +static inline uint8_t m_base_from_value(int32_t value) > +{ > + static const uint8_t sym[LZFSE_ENCODE_MAX_M_VALUE + 1] = { > + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, > + 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, > + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, > + 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, > + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, > + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, > + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, > + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, > + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, > + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, > + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, > + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, > + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, > + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, > + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, > + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, > + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, > + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, > + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, > + 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, > + 19, 19, 19, 19, 19, 19, 19, 19 > + }; > + return sym[value]; > +} > + > +static inline uint8_t d_base_from_value(int32_t value) > +{ > + static const uint8_t sym[64 * 4] = { > + 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 8, 8, > + 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12, 12, 12, 12, > + 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, > + 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, > + 16, 17, 18, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 24, 24, 24, > + 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 28, 28, 28, 28, > + 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30, 30, 30, 30, > + 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, > + 32, 33, 34, 35, 36, 36, 37, 37, 38, 38, 39, 39, 40, 40, 40, 40, > + 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43, 44, 44, 44, 44, > + 44, 44, 44, 44, 45, 45, 45, 45, 45, 45, 45, 45, 46, 46, 46, 46, > + 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, 47, 47, 48, 48, 48, 48, > + 48, 49, 50, 51, 52, 52, 53, 53, 54, 54, 55, 55, 56, 56, 56, 56, > + 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, > + 60, 60, 60, 60, 61, 61, 61, 61, 61, 61, 61, 61, 62, 62, 62, 62, > + 62, 62, 62, 62, 63, 63, 63, 63, 63, 63, 63, 63, 0, 0, 0, 0 > + }; > + int index = 0; > + int in_range_k; > + in_range_k = (value >= 0 && value < 60); > + index |= (((value - 0) >> 0) + 0) & -in_range_k; > + in_range_k = (value >= 60 && value < 1020); > + index |= (((value - 60) >> 4) + 64) & -in_range_k; > + in_range_k = (value >= 1020 && value < 16380); > + index |= (((value - 1020) >> 8) + 128) & -in_range_k; > + in_range_k = (value >= 16380 && value < 262140); > + index |= (((value - 16380) >> 12) + 192) & -in_range_k; > + return sym[index & 255]; > +} > + > +#endif // LZFSE_ENCODE_TABLES_H > diff --git a/drivers/staging/apfs/lzfse/lzfse_fse.c b/drivers/staging/apfs/lzfse/lzfse_fse.c > new file mode 100644 > index 0000000000000000000000000000000000000000..50329ccfae48d150b7b3c2322cc76bc8235fedd4 > --- /dev/null > +++ b/drivers/staging/apfs/lzfse/lzfse_fse.c > @@ -0,0 +1,214 @@ > +// SPDX-License-Identifier: BSD-3-Clause > +/* > + * Copyright (c) 2015-2016, Apple Inc. All rights reserved. > + * > + */ > + > +#include "lzfse_internal.h" > + > +/* Initialize encoder table T[NSYMBOLS]. > + * NSTATES = sum FREQ[i] is the number of states (a power of 2) > + * NSYMBOLS is the number of symbols. > + * FREQ[NSYMBOLS] is a normalized histogram of symbol frequencies, with FREQ[i] > + * >= 0. > + * Some symbols may have a 0 frequency. In that case, they should not be > + * present in the data. > + */ > +void fse_init_encoder_table(int nstates, int nsymbols, > + const uint16_t *__restrict freq, > + fse_encoder_entry *__restrict t) > +{ > + int offset = 0; // current offset > + int n_clz = __builtin_clz(nstates); > + int i; > + for (i = 0; i < nsymbols; i++) { > + int f = (int)freq[i]; > + int k; > + if (f == 0) > + continue; // skip this symbol, no occurrences > + k = __builtin_clz(f) - > + n_clz; // shift needed to ensure N <= (F<<K) < 2*N > + t[i].s0 = (int16_t)((f << k) - nstates); > + t[i].k = (int16_t)k; > + t[i].delta0 = (int16_t)(offset - f + (nstates >> k)); > + t[i].delta1 = (int16_t)(offset - f + (nstates >> (k - 1))); > + offset += f; > + } > +} > + > +/* Initialize decoder table T[NSTATES]. > + * NSTATES = sum FREQ[i] is the number of states (a power of 2) > + * NSYMBOLS is the number of symbols. > + * FREQ[NSYMBOLS] is a normalized histogram of symbol frequencies, with FREQ[i] > + * >= 0. > + * Some symbols may have a 0 frequency. In that case, they should not be > + * present in the data. > + */ > +int fse_init_decoder_table(int nstates, int nsymbols, > + const uint16_t *__restrict freq, > + int32_t *__restrict t) > +{ > + int n_clz = __builtin_clz(nstates); > + int sum_of_freq = 0; > + int i, j0, j; > + for (i = 0; i < nsymbols; i++) { > + int f = (int)freq[i]; > + int k; > + if (f == 0) > + continue; // skip this symbol, no occurrences > + > + sum_of_freq += f; > + > + if (sum_of_freq > nstates) { > + return -1; > + } > + > + k = __builtin_clz(f) - > + n_clz; // shift needed to ensure N <= (F<<K) < 2*N > + j0 = ((2 * nstates) >> k) - f; > + > + // Initialize all states S reached by this symbol: OFFSET <= S < OFFSET + F > + for (j = 0; j < f; j++) { > + fse_decoder_entry e; > + > + e.symbol = (uint8_t)i; > + if (j < j0) { > + e.k = (int8_t)k; > + e.delta = (int16_t)(((f + j) << k) - nstates); > + } else { > + e.k = (int8_t)(k - 1); > + e.delta = (int16_t)((j - j0) << (k - 1)); > + } > + > + memcpy(t, &e, sizeof(e)); > + t++; > + } > + } > + > + return 0; // OK > +} > + > +/* Initialize value decoder table T[NSTATES]. > + * NSTATES = sum FREQ[i] is the number of states (a power of 2) > + * NSYMBOLS is the number of symbols. > + * FREQ[NSYMBOLS] is a normalized histogram of symbol frequencies, with FREQ[i] > + * >= 0. > + * SYMBOL_VBITS[NSYMBOLS] and SYMBOLS_VBASE[NSYMBOLS] are the number of value > + * bits to read and the base value for each symbol. > + * Some symbols may have a 0 frequency. In that case, they should not be > + * present in the data. > + */ > +void fse_init_value_decoder_table(int nstates, int nsymbols, > + const uint16_t *__restrict freq, > + const uint8_t *__restrict symbol_vbits, > + const int32_t *__restrict symbol_vbase, > + fse_value_decoder_entry *__restrict t) > +{ > + int n_clz = __builtin_clz(nstates); > + int i; > + for (i = 0; i < nsymbols; i++) { > + fse_value_decoder_entry ei = { 0 }; > + int f = (int)freq[i]; > + int k, j0, j; > + if (f == 0) > + continue; // skip this symbol, no occurrences > + > + k = __builtin_clz(f) - > + n_clz; // shift needed to ensure N <= (F<<K) < 2*N > + j0 = ((2 * nstates) >> k) - f; > + > + ei.value_bits = symbol_vbits[i]; > + ei.vbase = symbol_vbase[i]; > + > + // Initialize all states S reached by this symbol: OFFSET <= S < OFFSET + F > + for (j = 0; j < f; j++) { > + fse_value_decoder_entry e = ei; > + > + if (j < j0) { > + e.total_bits = (uint8_t)k + e.value_bits; > + e.delta = (int16_t)(((f + j) << k) - nstates); > + } else { > + e.total_bits = (uint8_t)(k - 1) + e.value_bits; > + e.delta = (int16_t)((j - j0) << (k - 1)); > + } > + > + memcpy(t, &e, 8); > + t++; > + } > + } > +} > + > +/* Remove states from symbols until the correct number of states is used. > + */ > +static void fse_adjust_freqs(uint16_t *freq, int overrun, int nsymbols) > +{ > + int shift; > + for (shift = 3; overrun != 0; shift--) { > + int sym; > + for (sym = 0; sym < nsymbols; sym++) { > + if (freq[sym] > 1) { > + int n = (freq[sym] - 1) >> shift; > + if (n > overrun) > + n = overrun; > + freq[sym] -= n; > + overrun -= n; > + if (overrun == 0) > + break; > + } > + } > + } > +} > + > +/* Normalize a table T[NSYMBOLS] of occurrences to FREQ[NSYMBOLS]. > + */ > +void fse_normalize_freq(int nstates, int nsymbols, const uint32_t *__restrict t, > + uint16_t *__restrict freq) > +{ > + uint32_t s_count = 0; > + int remaining = nstates; // must be signed; this may become < 0 > + int max_freq = 0; > + int max_freq_sym = 0; > + int shift = __builtin_clz(nstates) - 1; > + uint32_t highprec_step; > + int i; > + > + // Compute the total number of symbol occurrences > + for (i = 0; i < nsymbols; i++) > + s_count += t[i]; > + > + if (s_count == 0) > + highprec_step = 0; // no symbols used > + else > + highprec_step = ((uint32_t)1 << 31) / s_count; > + > + for (i = 0; i < nsymbols; i++) { > + // Rescale the occurrence count to get the normalized frequency. > + // Round up if the fractional part is >= 0.5; otherwise round down. > + // For efficiency, we do this calculation using integer arithmetic. > + int f = (((t[i] * highprec_step) >> shift) + 1) >> 1; > + > + // If a symbol was used, it must be given a nonzero normalized frequency. > + if (f == 0 && t[i] != 0) > + f = 1; > + > + freq[i] = f; > + remaining -= f; > + > + // Remember the maximum frequency and which symbol had it. > + if (f > max_freq) { > + max_freq = f; > + max_freq_sym = i; > + } > + } > + > + // If there remain states to be assigned, then just assign them to the most > + // frequent symbol. Alternatively, if we assigned more states than were > + // actually available, then either remove states from the most frequent symbol > + // (for minor overruns) or use the slower adjustment algorithm (for major > + // overruns). > + if (-remaining < (max_freq >> 2)) { > + freq[max_freq_sym] += remaining; > + } else { > + fse_adjust_freqs(freq, -remaining, nsymbols); > + } > +} > diff --git a/drivers/staging/apfs/lzfse/lzfse_fse.h b/drivers/staging/apfs/lzfse/lzfse_fse.h > new file mode 100644 > index 0000000000000000000000000000000000000000..c6a5e4342cba554e391d3b54d5983c659c4e06bb > --- /dev/null > +++ b/drivers/staging/apfs/lzfse/lzfse_fse.h > @@ -0,0 +1,632 @@ > +/* SPDX-License-Identifier: BSD-3-Clause */ > +/* > + * Copyright (c) 2015-2016, Apple Inc. All rights reserved. > + * > + */ > + > +/* Finite state entropy coding (FSE) > + * This is an implementation of the tANS algorithm described by Jarek Duda, > + * we use the more descriptive name "Finite State Entropy". > + */ > + > +#ifndef LZFSE_FSE_H > +#define LZFSE_FSE_H > + > +#include <linux/types.h> > +#include <linux/string.h> > + > +/* Select between 32/64-bit I/O streams for FSE. Note that the FSE stream > + * size need not match the word size of the machine, but in practice you > + * want to use 64b streams on 64b systems for better performance. > + */ > +#define FSE_IOSTREAM_64 0 > + > +#define FSE_INLINE static inline __attribute__((__always_inline__)) > + > +/* MARK: - Bit utils > + */ > + > +/*! @abstract Signed type used to represent bit count. */ > +typedef int32_t fse_bit_count; > + > +/*! @abstract Unsigned type used to represent FSE state. */ > +typedef uint16_t fse_state; > + > +/* Mask the NBITS lsb of X. 0 <= NBITS < 64 > + */ > +static inline uint64_t fse_mask_lsb64(uint64_t x, fse_bit_count nbits) > +{ > + static const uint64_t mtable[65] = { > + 0x0000000000000000LLU, 0x0000000000000001LLU, > + 0x0000000000000003LLU, 0x0000000000000007LLU, > + 0x000000000000000fLLU, 0x000000000000001fLLU, > + 0x000000000000003fLLU, 0x000000000000007fLLU, > + 0x00000000000000ffLLU, 0x00000000000001ffLLU, > + 0x00000000000003ffLLU, 0x00000000000007ffLLU, > + 0x0000000000000fffLLU, 0x0000000000001fffLLU, > + 0x0000000000003fffLLU, 0x0000000000007fffLLU, > + 0x000000000000ffffLLU, 0x000000000001ffffLLU, > + 0x000000000003ffffLLU, 0x000000000007ffffLLU, > + 0x00000000000fffffLLU, 0x00000000001fffffLLU, > + 0x00000000003fffffLLU, 0x00000000007fffffLLU, > + 0x0000000000ffffffLLU, 0x0000000001ffffffLLU, > + 0x0000000003ffffffLLU, 0x0000000007ffffffLLU, > + 0x000000000fffffffLLU, 0x000000001fffffffLLU, > + 0x000000003fffffffLLU, 0x000000007fffffffLLU, > + 0x00000000ffffffffLLU, 0x00000001ffffffffLLU, > + 0x00000003ffffffffLLU, 0x00000007ffffffffLLU, > + 0x0000000fffffffffLLU, 0x0000001fffffffffLLU, > + 0x0000003fffffffffLLU, 0x0000007fffffffffLLU, > + 0x000000ffffffffffLLU, 0x000001ffffffffffLLU, > + 0x000003ffffffffffLLU, 0x000007ffffffffffLLU, > + 0x00000fffffffffffLLU, 0x00001fffffffffffLLU, > + 0x00003fffffffffffLLU, 0x00007fffffffffffLLU, > + 0x0000ffffffffffffLLU, 0x0001ffffffffffffLLU, > + 0x0003ffffffffffffLLU, 0x0007ffffffffffffLLU, > + 0x000fffffffffffffLLU, 0x001fffffffffffffLLU, > + 0x003fffffffffffffLLU, 0x007fffffffffffffLLU, > + 0x00ffffffffffffffLLU, 0x01ffffffffffffffLLU, > + 0x03ffffffffffffffLLU, 0x07ffffffffffffffLLU, > + 0x0fffffffffffffffLLU, 0x1fffffffffffffffLLU, > + 0x3fffffffffffffffLLU, 0x7fffffffffffffffLLU, > + 0xffffffffffffffffLLU, > + }; > + return x & mtable[nbits]; > +} > + > +/* Mask the NBITS lsb of X. 0 <= NBITS < 32 > + */ > +static inline uint32_t fse_mask_lsb32(uint32_t x, fse_bit_count nbits) > +{ > + static const uint32_t mtable[33] = { > + 0x0000000000000000U, 0x0000000000000001U, 0x0000000000000003U, > + 0x0000000000000007U, 0x000000000000000fU, 0x000000000000001fU, > + 0x000000000000003fU, 0x000000000000007fU, 0x00000000000000ffU, > + 0x00000000000001ffU, 0x00000000000003ffU, 0x00000000000007ffU, > + 0x0000000000000fffU, 0x0000000000001fffU, 0x0000000000003fffU, > + 0x0000000000007fffU, 0x000000000000ffffU, 0x000000000001ffffU, > + 0x000000000003ffffU, 0x000000000007ffffU, 0x00000000000fffffU, > + 0x00000000001fffffU, 0x00000000003fffffU, 0x00000000007fffffU, > + 0x0000000000ffffffU, 0x0000000001ffffffU, 0x0000000003ffffffU, > + 0x0000000007ffffffU, 0x000000000fffffffU, 0x000000001fffffffU, > + 0x000000003fffffffU, 0x000000007fffffffU, 0x00000000ffffffffU, > + }; > + return x & mtable[nbits]; > +} > + > +/*! @abstract Select \c nbits at index \c start from \c x. > + * 0 <= start <= start+nbits <= 64 */ > +FSE_INLINE uint64_t fse_extract_bits64(uint64_t x, fse_bit_count start, > + fse_bit_count nbits) > +{ > +#if defined(__GNUC__) > + // If START and NBITS are constants, map to bit-field extraction instructions > + if (__builtin_constant_p(start) && __builtin_constant_p(nbits)) > + return (x >> start) & ((1LLU << nbits) - 1LLU); > +#endif > + > + // Otherwise, shift and mask > + return fse_mask_lsb64(x >> start, nbits); > +} > + > +/*! @abstract Select \c nbits at index \c start from \c x. > + * 0 <= start <= start+nbits <= 32 */ > +FSE_INLINE uint32_t fse_extract_bits32(uint32_t x, fse_bit_count start, > + fse_bit_count nbits) > +{ > +#if defined(__GNUC__) > + // If START and NBITS are constants, map to bit-field extraction instructions > + if (__builtin_constant_p(start) && __builtin_constant_p(nbits)) > + return (x >> start) & ((1U << nbits) - 1U); > +#endif > + > + // Otherwise, shift and mask > + return fse_mask_lsb32(x >> start, nbits); > +} > + > +/* MARK: - Bit stream > + */ > + > +/* I/O streams > + * The streams can be shared between several FSE encoders/decoders, which is why > + * they are not in the state struct > + */ > + > +/*! @abstract Output stream, 64-bit accum. */ > +typedef struct { > + uint64_t accum; // Output bits > + fse_bit_count > + accum_nbits; // Number of valid bits in ACCUM, other bits are 0 > +} fse_out_stream64; > + > +/*! @abstract Output stream, 32-bit accum. */ > +typedef struct { > + uint32_t accum; // Output bits > + fse_bit_count > + accum_nbits; // Number of valid bits in ACCUM, other bits are 0 > +} fse_out_stream32; > + > +/*! @abstract Object representing an input stream. */ > +typedef struct { > + uint64_t accum; // Input bits > + fse_bit_count > + accum_nbits; // Number of valid bits in ACCUM, other bits are 0 > +} fse_in_stream64; > + > +/*! @abstract Object representing an input stream. */ > +typedef struct { > + uint32_t accum; // Input bits > + fse_bit_count > + accum_nbits; // Number of valid bits in ACCUM, other bits are 0 > +} fse_in_stream32; > + > +/*! @abstract Initialize an output stream object. */ > +FSE_INLINE void fse_out_init64(fse_out_stream64 *s) > +{ > + s->accum = 0; > + s->accum_nbits = 0; > +} > + > +/*! @abstract Initialize an output stream object. */ > +FSE_INLINE void fse_out_init32(fse_out_stream32 *s) > +{ > + s->accum = 0; > + s->accum_nbits = 0; > +} > + > +/*! @abstract Write full bytes from the accumulator to output buffer, ensuring > + * accum_nbits is in [0, 7]. > + * We assume we can write 8 bytes to the output buffer \c (*pbuf[0..7]) in all > + * cases. > + * @note *pbuf is incremented by the number of written bytes. */ > +FSE_INLINE void fse_out_flush64(fse_out_stream64 *s, uint8_t **pbuf) > +{ > + fse_bit_count nbits = s->accum_nbits & > + -8; // number of bits written, multiple of 8 > + > + // Write 8 bytes of current accumulator > + memcpy(*pbuf, &(s->accum), 8); > + *pbuf += (nbits >> 3); // bytes > + > + // Update state > + s->accum >>= nbits; // remove nbits > + s->accum_nbits -= nbits; > +} > + > +/*! @abstract Write full bytes from the accumulator to output buffer, ensuring > + * accum_nbits is in [0, 7]. > + * We assume we can write 4 bytes to the output buffer \c (*pbuf[0..3]) in all > + * cases. > + * @note *pbuf is incremented by the number of written bytes. */ > +FSE_INLINE void fse_out_flush32(fse_out_stream32 *s, uint8_t **pbuf) > +{ > + fse_bit_count nbits = s->accum_nbits & > + -8; // number of bits written, multiple of 8 > + > + // Write 4 bytes of current accumulator > + memcpy(*pbuf, &(s->accum), 4); > + *pbuf += (nbits >> 3); // bytes > + > + // Update state > + s->accum >>= nbits; // remove nbits > + s->accum_nbits -= nbits; > +} > + > +/*! @abstract Write the last bytes from the accumulator to output buffer, > + * ensuring accum_nbits is in [-7, 0]. Bits are padded with 0 if needed. > + * We assume we can write 8 bytes to the output buffer \c (*pbuf[0..7]) in all > + * cases. > + * @note *pbuf is incremented by the number of written bytes. */ > +FSE_INLINE void fse_out_finish64(fse_out_stream64 *s, uint8_t **pbuf) > +{ > + fse_bit_count nbits = (s->accum_nbits + 7) & > + -8; // number of bits written, multiple of 8 > + > + // Write 8 bytes of current accumulator > + memcpy(*pbuf, &(s->accum), 8); > + *pbuf += (nbits >> 3); // bytes > + > + // Update state > + s->accum = 0; // remove nbits > + s->accum_nbits -= nbits; > +} > + > +/*! @abstract Write the last bytes from the accumulator to output buffer, > + * ensuring accum_nbits is in [-7, 0]. Bits are padded with 0 if needed. > + * We assume we can write 4 bytes to the output buffer \c (*pbuf[0..3]) in all > + * cases. > + * @note *pbuf is incremented by the number of written bytes. */ > +FSE_INLINE void fse_out_finish32(fse_out_stream32 *s, uint8_t **pbuf) > +{ > + fse_bit_count nbits = (s->accum_nbits + 7) & > + -8; // number of bits written, multiple of 8 > + > + // Write 8 bytes of current accumulator > + memcpy(*pbuf, &(s->accum), 4); > + *pbuf += (nbits >> 3); // bytes > + > + // Update state > + s->accum = 0; // remove nbits > + s->accum_nbits -= nbits; > +} > + > +/*! @abstract Accumulate \c n bits \c b to output stream \c s. We \b must have: > + * 0 <= b < 2^n, and N + s->accum_nbits <= 64. > + * @note The caller must ensure out_flush is called \b before the accumulator > + * overflows to more than 64 bits. */ > +FSE_INLINE void fse_out_push64(fse_out_stream64 *s, fse_bit_count n, uint64_t b) > +{ > + s->accum |= b << s->accum_nbits; > + s->accum_nbits += n; > +} > + > +/*! @abstract Accumulate \c n bits \c b to output stream \c s. We \b must have: > + * 0 <= n < 2^n, and n + s->accum_nbits <= 32. > + * @note The caller must ensure out_flush is called \b before the accumulator > + * overflows to more than 32 bits. */ > +FSE_INLINE void fse_out_push32(fse_out_stream32 *s, fse_bit_count n, uint32_t b) > +{ > + s->accum |= b << s->accum_nbits; > + s->accum_nbits += n; > +} > + > +#define DEBUG_CHECK_INPUT_STREAM_PARAMETERS > + > +/*! @abstract Initialize the fse input stream so that accum holds between 56 > + * and 63 bits. We never want to have 64 bits in the stream, because that allows > + * us to avoid a special case in the fse_in_pull function (eliminating an > + * unpredictable branch), while not requiring any additional fse_flush > + * operations. This is why we have the special case for n == 0 (in which case > + * we want to load only 7 bytes instead of 8). */ > +FSE_INLINE int fse_in_checked_init64(fse_in_stream64 *s, fse_bit_count n, > + const uint8_t **pbuf, > + const uint8_t *buf_start) > +{ > + if (n) { > + if (*pbuf < buf_start + 8) > + return -1; // out of range > + *pbuf -= 8; > + memcpy(&(s->accum), *pbuf, 8); > + s->accum_nbits = n + 64; > + } else { > + if (*pbuf < buf_start + 7) > + return -1; // out of range > + *pbuf -= 7; > + memcpy(&(s->accum), *pbuf, 7); > + s->accum &= 0xffffffffffffff; > + s->accum_nbits = n + 56; > + } > + > + if ((s->accum_nbits < 56 || s->accum_nbits >= 64) || > + ((s->accum >> s->accum_nbits) != 0)) { > + return -1; // the incoming input is wrong (encoder should have zeroed the > + // upper bits) > + } > + > + return 0; // OK > +} > + > +/*! @abstract Identical to previous function, but for 32-bit operation > + * (resulting bit count is between 24 and 31 bits). */ > +FSE_INLINE int fse_in_checked_init32(fse_in_stream32 *s, fse_bit_count n, > + const uint8_t **pbuf, > + const uint8_t *buf_start) > +{ > + if (n) { > + if (*pbuf < buf_start + 4) > + return -1; // out of range > + *pbuf -= 4; > + memcpy(&(s->accum), *pbuf, 4); > + s->accum_nbits = n + 32; > + } else { > + if (*pbuf < buf_start + 3) > + return -1; // out of range > + *pbuf -= 3; > + memcpy(&(s->accum), *pbuf, 3); > + s->accum &= 0xffffff; > + s->accum_nbits = n + 24; > + } > + > + if ((s->accum_nbits < 24 || s->accum_nbits >= 32) || > + ((s->accum >> s->accum_nbits) != 0)) { > + return -1; // the incoming input is wrong (encoder should have zeroed the > + // upper bits) > + } > + > + return 0; // OK > +} > + > +/*! @abstract Read in new bytes from buffer to ensure that we have a full > + * complement of bits in the stream object (again, between 56 and 63 bits). > + * checking the new value of \c *pbuf remains >= \c buf_start. > + * @return 0 if OK. > + * @return -1 on failure. */ > +FSE_INLINE int fse_in_checked_flush64(fse_in_stream64 *s, const uint8_t **pbuf, > + const uint8_t *buf_start) > +{ > + // Get number of bits to add to bring us into the desired range. > + fse_bit_count nbits = (63 - s->accum_nbits) & -8; > + // Convert bits to bytes and decrement buffer address, then load new data. > + const uint8_t *buf = (*pbuf) - (nbits >> 3); > + uint64_t incoming; > + if (buf < buf_start) { > + return -1; // out of range > + } > + *pbuf = buf; > + memcpy(&incoming, buf, 8); > + // Update the state object and verify its validity (in DEBUG). > + s->accum = (s->accum << nbits) | fse_mask_lsb64(incoming, nbits); > + s->accum_nbits += nbits; > + DEBUG_CHECK_INPUT_STREAM_PARAMETERS > + return 0; // OK > +} > + > +/*! @abstract Identical to previous function (but again, we're only filling > + * a 32-bit field with between 24 and 31 bits). */ > +FSE_INLINE int fse_in_checked_flush32(fse_in_stream32 *s, const uint8_t **pbuf, > + const uint8_t *buf_start) > +{ > + // Get number of bits to add to bring us into the desired range. > + fse_bit_count nbits = (31 - s->accum_nbits) & -8; > + > + if (nbits > 0) { > + // Convert bits to bytes and decrement buffer address, then load new data. > + const uint8_t *buf = (*pbuf) - (nbits >> 3); > + uint32_t incoming; > + if (buf < buf_start) { > + return -1; // out of range > + } > + > + *pbuf = buf; > + > + incoming = *((uint32_t *)buf); > + > + // Update the state object and verify its validity (in DEBUG). > + s->accum = (s->accum << nbits) | > + fse_mask_lsb32(incoming, nbits); > + s->accum_nbits += nbits; > + } > + DEBUG_CHECK_INPUT_STREAM_PARAMETERS > + return 0; // OK > +} > + > +/*! @abstract Pull n bits out of the fse stream object. */ > +FSE_INLINE uint64_t fse_in_pull64(fse_in_stream64 *s, fse_bit_count n) > +{ > + uint64_t result; > + s->accum_nbits -= n; > + result = s->accum >> s->accum_nbits; > + s->accum = fse_mask_lsb64(s->accum, s->accum_nbits); > + return result; > +} > + > +/*! @abstract Pull n bits out of the fse stream object. */ > +FSE_INLINE uint32_t fse_in_pull32(fse_in_stream32 *s, fse_bit_count n) > +{ > + uint32_t result; > + s->accum_nbits -= n; > + result = s->accum >> s->accum_nbits; > + s->accum = fse_mask_lsb32(s->accum, s->accum_nbits); > + return result; > +} > + > +/* MARK: - Encode/Decode > + */ > + > +/* Map to 32/64-bit implementations and types for I/O > + */ > +#if FSE_IOSTREAM_64 > + > +typedef uint64_t fse_bits; > +typedef fse_out_stream64 fse_out_stream; > +typedef fse_in_stream64 fse_in_stream; > +#define fse_mask_lsb fse_mask_lsb64 > +#define fse_extract_bits fse_extract_bits64 > +#define fse_out_init fse_out_init64 > +#define fse_out_flush fse_out_flush64 > +#define fse_out_finish fse_out_finish64 > +#define fse_out_push fse_out_push64 > +#define fse_in_init fse_in_checked_init64 > +#define fse_in_checked_init fse_in_checked_init64 > +#define fse_in_flush fse_in_checked_flush64 > +#define fse_in_checked_flush fse_in_checked_flush64 > +#define fse_in_flush2(_unused, _parameters, _unused2) 0 /* nothing */ > +#define fse_in_checked_flush2(_unused, _parameters) /* nothing */ > +#define fse_in_pull fse_in_pull64 > + > +#else > + > +typedef uint32_t fse_bits; > +typedef fse_out_stream32 fse_out_stream; > +typedef fse_in_stream32 fse_in_stream; > +#define fse_mask_lsb fse_mask_lsb32 > +#define fse_extract_bits fse_extract_bits32 > +#define fse_out_init fse_out_init32 > +#define fse_out_flush fse_out_flush32 > +#define fse_out_finish fse_out_finish32 > +#define fse_out_push fse_out_push32 > +#define fse_in_init fse_in_checked_init32 > +#define fse_in_checked_init fse_in_checked_init32 > +#define fse_in_flush fse_in_checked_flush32 > +#define fse_in_checked_flush fse_in_checked_flush32 > +#define fse_in_flush2 fse_in_checked_flush32 > +#define fse_in_checked_flush2 fse_in_checked_flush32 > +#define fse_in_pull fse_in_pull32 > + > +#endif > + > +/*! @abstract Entry for one symbol in the encoder table (64b). */ > +typedef struct { > + int16_t s0; // First state requiring a K-bit shift > + int16_t k; // States S >= S0 are shifted K bits. States S < S0 are > + // shifted K-1 bits > + int16_t delta0; // Relative increment used to compute next state if S >= S0 > + int16_t delta1; // Relative increment used to compute next state if S < S0 > +} fse_encoder_entry; > + > +/*! @abstract Entry for one state in the decoder table (32b). */ > +typedef struct { // DO NOT REORDER THE FIELDS > + int8_t k; // Number of bits to read > + uint8_t symbol; // Emitted symbol > + int16_t delta; // Signed increment used to compute next state (+bias) > +} fse_decoder_entry; > + > +/*! @abstract Entry for one state in the value decoder table (64b). */ > +typedef struct { // DO NOT REORDER THE FIELDS > + uint8_t total_bits; // state bits + extra value bits = shift for next decode > + uint8_t value_bits; // extra value bits > + int16_t delta; // state base (delta) > + int32_t vbase; // value base > +} fse_value_decoder_entry; > + > +/*! @abstract Encode SYMBOL using the encoder table, and update \c *pstate, > + * \c out. > + * @note The caller must ensure we have enough bits available in the output > + * stream accumulator. */ > +FSE_INLINE void fse_encode(fse_state *__restrict pstate, > + const fse_encoder_entry *__restrict encoder_table, > + fse_out_stream *__restrict out, uint8_t symbol) > +{ > + int s = *pstate; > + fse_encoder_entry e = encoder_table[symbol]; > + int s0 = e.s0; > + int k = e.k; > + int delta0 = e.delta0; > + int delta1 = e.delta1; > + > + // Number of bits to write > + int hi = s >= s0; > + fse_bit_count nbits = hi ? k : (k - 1); > + fse_state delta = hi ? delta0 : delta1; > + > + // Write lower NBITS of state > + fse_bits b = fse_mask_lsb(s, nbits); > + fse_out_push(out, nbits, b); > + > + // Update state with remaining bits and delta > + *pstate = delta + (s >> nbits); > +} > + > +/*! @abstract Decode and return symbol using the decoder table, and update > + * \c *pstate, \c in. > + * @note The caller must ensure we have enough bits available in the input > + * stream accumulator. */ > +FSE_INLINE uint8_t fse_decode(fse_state *__restrict pstate, > + const int32_t *__restrict decoder_table, > + fse_in_stream *__restrict in) > +{ > + int32_t e = decoder_table[*pstate]; > + > + // Update state from K bits of input + DELTA > + *pstate = (fse_state)(e >> 16) + (fse_state)fse_in_pull(in, e & 0xff); > + > + // Return the symbol for this state > + return fse_extract_bits(e, 8, 8); // symbol > +} > + > +/*! @abstract Decode and return value using the decoder table, and update \c > + * *pstate, \c in. > + * \c value_decoder_table[nstates] > + * @note The caller must ensure we have enough bits available in the input > + * stream accumulator. */ > +FSE_INLINE int32_t > +fse_value_decode(fse_state *__restrict pstate, > + const fse_value_decoder_entry *value_decoder_table, > + fse_in_stream *__restrict in) > +{ > + fse_value_decoder_entry entry = value_decoder_table[*pstate]; > + uint32_t state_and_value_bits = > + (uint32_t)fse_in_pull(in, entry.total_bits); > + *pstate = (fse_state)(entry.delta + > + (state_and_value_bits >> entry.value_bits)); > + return (int32_t)(entry.vbase + > + fse_mask_lsb(state_and_value_bits, entry.value_bits)); > +} > + > +/* MARK: - Tables > + */ > + > +/* IMPORTANT: To properly decode an FSE encoded stream, both encoder/decoder > + * tables shall be initialized with the same parameters, including the > + * FREQ[NSYMBOL] array. > + * > + */ > + > +/*! @abstract Sanity check on frequency table, verify sum of \c freq > + * is <= \c number_of_states. */ > +FSE_INLINE int fse_check_freq(const uint16_t *freq_table, > + const size_t table_size, > + const size_t number_of_states) > +{ > + size_t sum_of_freq = 0; > + int i; > + for (i = 0; i < table_size; i++) { > + sum_of_freq += freq_table[i]; > + } > + return (sum_of_freq > number_of_states) ? -1 : 0; > +} > + > +/*! @abstract Initialize encoder table \c t[nsymbols]. > + * > + * @param nstates > + * sum \c freq[i]; the number of states (a power of 2). > + * > + * @param nsymbols > + * the number of symbols. > + * > + * @param freq[nsymbols] > + * is a normalized histogram of symbol frequencies, with \c freq[i] >= 0. > + * Some symbols may have a 0 frequency. In that case they should not be > + * present in the data. > + */ > +void fse_init_encoder_table(int nstates, int nsymbols, > + const uint16_t *__restrict freq, > + fse_encoder_entry *__restrict t); > + > +/*! @abstract Initialize decoder table \c t[nstates]. > + * > + * @param nstates > + * sum \c freq[i]; the number of states (a power of 2). > + * > + * @param nsymbols > + * the number of symbols. > + * > + * @param feq[nsymbols] > + * a normalized histogram of symbol frequencies, with \c freq[i] >= 0. > + * Some symbols may have a 0 frequency. In that case they should not be > + * present in the data. > + * > + * @return 0 if OK. > + * @return -1 on failure. > + */ > +int fse_init_decoder_table(int nstates, int nsymbols, > + const uint16_t *__restrict freq, > + int32_t *__restrict t); > + > +/*! @abstract Initialize value decoder table \c t[nstates]. > + * > + * @param nstates > + * sum \cfreq[i]; the number of states (a power of 2). > + * > + * @param nsymbols > + * the number of symbols. > + * > + * @param freq[nsymbols] > + * a normalized histogram of symbol frequencies, with \c freq[i] >= 0. > + * \c symbol_vbits[nsymbols] and \c symbol_vbase[nsymbols] are the number of > + * value bits to read and the base value for each symbol. > + * Some symbols may have a 0 frequency. In that case they should not be > + * present in the data. > + */ > +void fse_init_value_decoder_table(int nstates, int nsymbols, > + const uint16_t *__restrict freq, > + const uint8_t *__restrict symbol_vbits, > + const int32_t *__restrict symbol_vbase, > + fse_value_decoder_entry *__restrict t); > + > +/*! @abstract Normalize a table \c t[nsymbols] of occurrences to > + * \c freq[nsymbols]. */ > +void fse_normalize_freq(int nstates, int nsymbols, const uint32_t *__restrict t, > + uint16_t *__restrict freq); > + > +#endif /* LZFSE_FSE_H */ > diff --git a/drivers/staging/apfs/lzfse/lzfse_internal.h b/drivers/staging/apfs/lzfse/lzfse_internal.h > new file mode 100644 > index 0000000000000000000000000000000000000000..afca0b3eae410dc0ac4b93b319186a16c3441a4d > --- /dev/null > +++ b/drivers/staging/apfs/lzfse/lzfse_internal.h > @@ -0,0 +1,599 @@ > +/* SPDX-License-Identifier: BSD-3-Clause */ > +/* > + * Copyright (c) 2015-2016, Apple Inc. All rights reserved. > + * > + */ > + > +#ifndef LZFSE_INTERNAL_H > +#define LZFSE_INTERNAL_H > + > +/* Unlike the tunable parameters defined in lzfse_tunables.h, you probably > + * should not modify the values defined in this header. Doing so will either > + * break the compressor, or result in a compressed data format that is > + * incompatible. > + */ > + > +#define LZFSE_INLINE static inline __attribute__((__always_inline__)) > + > +#include "lzfse_fse.h" > +#include "lzfse_tunables.h" > +#include <linux/limits.h> > +#include <linux/stddef.h> > + > +/* Throughout LZFSE we refer to "L", "M" and "D"; these will always appear as > + * a triplet, and represent a "usual" LZ-style literal and match pair. "L" > + * is the number of literal bytes, "M" is the number of match bytes, and "D" > + * is the match "distance"; the distance in bytes between the current pointer > + * and the start of the match. > + */ > +#define LZFSE_ENCODE_HASH_VALUES (1 << LZFSE_ENCODE_HASH_BITS) > +#define LZFSE_ENCODE_L_SYMBOLS 20 > +#define LZFSE_ENCODE_M_SYMBOLS 20 > +#define LZFSE_ENCODE_D_SYMBOLS 64 > +#define LZFSE_ENCODE_LITERAL_SYMBOLS 256 > +#define LZFSE_ENCODE_L_STATES 64 > +#define LZFSE_ENCODE_M_STATES 64 > +#define LZFSE_ENCODE_D_STATES 256 > +#define LZFSE_ENCODE_LITERAL_STATES 1024 > +#define LZFSE_MATCHES_PER_BLOCK 10000 > +#define LZFSE_LITERALS_PER_BLOCK (4 * LZFSE_MATCHES_PER_BLOCK) > +#define LZFSE_DECODE_LITERALS_PER_BLOCK (4 * LZFSE_DECODE_MATCHES_PER_BLOCK) > + > +/* LZFSE internal status. These values are used by internal LZFSE routines > + * as return codes. There should not be any good reason to change their > + * values; it is plausible that additional codes might be added in the > + * future. > + */ > +#define LZFSE_STATUS_OK 0 > +#define LZFSE_STATUS_SRC_EMPTY -1 > +#define LZFSE_STATUS_DST_FULL -2 > +#define LZFSE_STATUS_ERROR -3 > + > +/* Type representing an offset between elements in a buffer. On 64-bit > + * systems, this is stored in a 64-bit container to avoid extra sign- > + * extension operations in addressing arithmetic, but the value is always > + * representable as a 32-bit signed value in LZFSE's usage. > + */ > +#if defined(_M_AMD64) || defined(__x86_64__) || defined(__arm64__) > +typedef int64_t lzfse_offset; > +#else > +typedef int32_t lzfse_offset; > +#endif > + > +typedef uint64_t uintmax_t; > + > +/*! @abstract History table set. Each line of the history table represents a set > + * of candidate match locations, each of which begins with four bytes with the > + * same hash. The table contains not only the positions, but also the first > + * four bytes at each position. This doubles the memory footprint of the > + * table, but allows us to quickly eliminate false-positive matches without > + * doing any pointer chasing and without pulling in any additional cachelines. > + * This provides a large performance win in practice. */ > +typedef struct { > + int32_t pos[LZFSE_ENCODE_HASH_WIDTH]; > + uint32_t value[LZFSE_ENCODE_HASH_WIDTH]; > +} lzfse_history_set; > + > +/*! @abstract An lzfse match is a sequence of bytes in the source buffer that > + * exactly matches an earlier (but possibly overlapping) sequence of bytes in > + * the same buffer. > + * @code > + * exeMPLARYexaMPLE > + * | | | ||-|--- lzfse_match2.length=3 > + * | | | ||----- lzfse_match2.pos > + * | | |-|------ lzfse_match1.length=3 > + * | | |-------- lzfse_match1.pos > + * | |-------------- lzfse_match2.ref > + * |----------------- lzfse_match1.ref > + * @endcode > + */ > +typedef struct { > + // Offset of the first byte in the match. > + lzfse_offset pos; > + // First byte of the source -- the earlier location in the buffer with the > + // same contents. > + lzfse_offset ref; > + // Length of the match. > + uint32_t length; > +} lzfse_match; > + > +/* MARK: - Encoder and Decoder state objects > + */ > + > +/*! @abstract Encoder state object. */ > +typedef struct { > + // Pointer to first byte of the source buffer. > + const uint8_t *src; > + // Length of the source buffer in bytes. Note that this is not a size_t, > + // but rather lzfse_offset, which is a signed type. The largest > + // representable buffer is 2GB, but arbitrarily large buffers may be > + // handled by repeatedly calling the encoder function and "translating" > + // the state between calls. When doing this, it is beneficial to use > + // blocks smaller than 2GB in order to maintain residency in the last-level > + // cache. Consult the implementation of lzfse_encode_buffer for details. > + lzfse_offset src_end; > + // Offset of the first byte of the next literal to encode in the source > + // buffer. > + lzfse_offset src_literal; > + // Offset of the byte currently being checked for a match. > + lzfse_offset src_encode_i; > + // The last byte offset to consider for a match. In some uses it makes > + // sense to use a smaller offset than src_end. > + lzfse_offset src_encode_end; > + // Pointer to the next byte to be written in the destination buffer. > + uint8_t *dst; > + // Pointer to the first byte of the destination buffer. > + uint8_t *dst_begin; > + // Pointer to one byte past the end of the destination buffer. > + uint8_t *dst_end; > + // Pending match; will be emitted unless a better match is found. > + lzfse_match pending; > + // The number of matches written so far. Note that there is no problem in > + // using a 32-bit field for this quantity, because the state already limits > + // us to at most 2GB of data; there cannot possibly be more matches than > + // there are bytes in the input. > + uint32_t n_matches; > + // The number of literals written so far. > + uint32_t n_literals; > + // Lengths of found literals. > + uint32_t l_values[LZFSE_MATCHES_PER_BLOCK]; > + // Lengths of found matches. > + uint32_t m_values[LZFSE_MATCHES_PER_BLOCK]; > + // Distances of found matches. > + uint32_t d_values[LZFSE_MATCHES_PER_BLOCK]; > + // Concatenated literal bytes. > + uint8_t literals[LZFSE_LITERALS_PER_BLOCK]; > + // History table used to search for matches. Each entry of the table > + // corresponds to a group of four byte sequences in the input stream > + // that hash to the same value. > + lzfse_history_set history_table[LZFSE_ENCODE_HASH_VALUES]; > +} lzfse_encoder_state; > + > +/*! @abstract Decoder state object for lzfse compressed blocks. */ > +typedef struct { > + // Number of matches remaining in the block. > + uint32_t n_matches; > + // Number of bytes used to encode L, M, D triplets for the block. > + uint32_t n_lmd_payload_bytes; > + // Pointer to the next literal to emit. > + const uint8_t *current_literal; > + // L, M, D triplet for the match currently being emitted. This is used only > + // if we need to restart after reaching the end of the destination buffer in > + // the middle of a literal or match. > + int32_t l_value, m_value, d_value; > + // FSE stream object. > + fse_in_stream lmd_in_stream; > + // Offset of L,M,D encoding in the input buffer. Because we read through an > + // FSE stream *backwards* while decoding, this is decremented as we move > + // through a block. > + uint32_t lmd_in_buf; > + // The current state of the L, M, and D FSE decoders. > + uint16_t l_state, m_state, d_state; > + // Internal FSE decoder tables for the current block. These have > + // alignment forced to 8 bytes to guarantee that a single state's > + // entry cannot span two cachelines. > + fse_value_decoder_entry l_decoder[LZFSE_ENCODE_L_STATES] > + __attribute__((__aligned__(8))); > + fse_value_decoder_entry m_decoder[LZFSE_ENCODE_M_STATES] > + __attribute__((__aligned__(8))); > + fse_value_decoder_entry d_decoder[LZFSE_ENCODE_D_STATES] > + __attribute__((__aligned__(8))); > + int32_t literal_decoder[LZFSE_ENCODE_LITERAL_STATES]; > + // The literal stream for the block, plus padding to allow for faster copy > + // operations. > + uint8_t literals[LZFSE_LITERALS_PER_BLOCK + 64]; > +} lzfse_compressed_block_decoder_state; > + > +/* Decoder state object for uncompressed blocks. > + */ > +typedef struct { > + uint32_t n_raw_bytes; > +} uncompressed_block_decoder_state; > + > +/*! @abstract Decoder state object for lzvn-compressed blocks. */ > +typedef struct { > + uint32_t n_raw_bytes; > + uint32_t n_payload_bytes; > + uint32_t d_prev; > +} lzvn_compressed_block_decoder_state; > + > +/*! @abstract Decoder state object. */ > +typedef struct { > + // Pointer to next byte to read from source buffer (this is advanced as we > + // decode; src_begin describe the buffer and do not change). > + const uint8_t *src; > + // Pointer to first byte of source buffer. > + const uint8_t *src_begin; > + // Pointer to one byte past the end of the source buffer. > + const uint8_t *src_end; > + // Pointer to the next byte to write to destination buffer (this is advanced > + // as we decode; dst_begin and dst_end describe the buffer and do not change). > + uint8_t *dst; > + // Pointer to first byte of destination buffer. > + uint8_t *dst_begin; > + // Pointer to one byte past the end of the destination buffer. > + uint8_t *dst_end; > + // 1 if we have reached the end of the stream, 0 otherwise. > + int end_of_stream; > + // magic number of the current block if we are within a block, > + // LZFSE_NO_BLOCK_MAGIC otherwise. > + uint32_t block_magic; > + lzfse_compressed_block_decoder_state compressed_lzfse_block_state; > + lzvn_compressed_block_decoder_state compressed_lzvn_block_state; > + uncompressed_block_decoder_state uncompressed_block_state; > +} lzfse_decoder_state; > + > +/* MARK: - Block header objects > + */ > + > +#define LZFSE_NO_BLOCK_MAGIC 0x00000000 // 0 (invalid) > +#define LZFSE_ENDOFSTREAM_BLOCK_MAGIC 0x24787662 // bvx$ (end of stream) > +#define LZFSE_UNCOMPRESSED_BLOCK_MAGIC 0x2d787662 // bvx- (raw data) > +#define LZFSE_COMPRESSEDV1_BLOCK_MAGIC \ > + 0x31787662 // bvx1 (lzfse compressed, uncompressed tables) > +#define LZFSE_COMPRESSEDV2_BLOCK_MAGIC \ > + 0x32787662 // bvx2 (lzfse compressed, compressed tables) > +#define LZFSE_COMPRESSEDLZVN_BLOCK_MAGIC 0x6e787662 // bvxn (lzvn compressed) > + > +/*! @abstract Uncompressed block header in encoder stream. */ > +typedef struct { > + // Magic number, always LZFSE_UNCOMPRESSED_BLOCK_MAGIC. > + uint32_t magic; > + // Number of raw bytes in block. > + uint32_t n_raw_bytes; > +} uncompressed_block_header; > + > +/*! @abstract Compressed block header with uncompressed tables. */ > +typedef struct { > + // Magic number, always LZFSE_COMPRESSEDV1_BLOCK_MAGIC. > + uint32_t magic; > + // Number of decoded (output) bytes in block. > + uint32_t n_raw_bytes; > + // Number of encoded (source) bytes in block. > + uint32_t n_payload_bytes; > + // Number of literal bytes output by block (*not* the number of literals). > + uint32_t n_literals; > + // Number of matches in block (which is also the number of literals). > + uint32_t n_matches; > + // Number of bytes used to encode literals. > + uint32_t n_literal_payload_bytes; > + // Number of bytes used to encode matches. > + uint32_t n_lmd_payload_bytes; > + > + // Final encoder states for the block, which will be the initial states for > + // the decoder: > + // Final accum_nbits for literals stream. > + int32_t literal_bits; > + // There are four interleaved streams of literals, so there are four final > + // states. > + uint16_t literal_state[4]; > + // accum_nbits for the l, m, d stream. > + int32_t lmd_bits; > + // Final L (literal length) state. > + uint16_t l_state; > + // Final M (match length) state. > + uint16_t m_state; > + // Final D (match distance) state. > + uint16_t d_state; > + > + // Normalized frequency tables for each stream. Sum of values in each > + // array is the number of states. > + uint16_t l_freq[LZFSE_ENCODE_L_SYMBOLS]; > + uint16_t m_freq[LZFSE_ENCODE_M_SYMBOLS]; > + uint16_t d_freq[LZFSE_ENCODE_D_SYMBOLS]; > + uint16_t literal_freq[LZFSE_ENCODE_LITERAL_SYMBOLS]; > +} lzfse_compressed_block_header_v1; > + > +/*! @abstract Compressed block header with compressed tables. Note that because > + * freq[] is compressed, the structure-as-stored-in-the-stream is *truncated*; > + * we only store the used bytes of freq[]. This means that some extra care must > + * be taken when reading one of these headers from the stream. */ > +typedef struct { > + // Magic number, always LZFSE_COMPRESSEDV2_BLOCK_MAGIC. > + uint32_t magic; > + // Number of decoded (output) bytes in block. > + uint32_t n_raw_bytes; > + // The fields n_payload_bytes ... d_state from the > + // lzfse_compressed_block_header_v1 object are packed into three 64-bit > + // fields in the compressed header, as follows: > + // > + // offset bits value > + // 0 20 n_literals > + // 20 20 n_literal_payload_bytes > + // 40 20 n_matches > + // 60 3 literal_bits > + // 63 1 --- unused --- > + // > + // 0 10 literal_state[0] > + // 10 10 literal_state[1] > + // 20 10 literal_state[2] > + // 30 10 literal_state[3] > + // 40 20 n_lmd_payload_bytes > + // 60 3 lmd_bits > + // 63 1 --- unused --- > + // > + // 0 32 header_size (total header size in bytes; this does not > + // correspond to a field in the uncompressed header version, > + // but is required; we wouldn't know the size of the > + // compresssed header otherwise. > + // 32 10 l_state > + // 42 10 m_state > + // 52 10 d_state > + // 62 2 --- unused --- > + uint64_t packed_fields[3]; > + // Variable size freq tables, using a Huffman-style fixed encoding. > + // Size allocated here is an upper bound (all values stored on 16 bits). > + uint8_t freq[2 * > + (LZFSE_ENCODE_L_SYMBOLS + LZFSE_ENCODE_M_SYMBOLS + > + LZFSE_ENCODE_D_SYMBOLS + LZFSE_ENCODE_LITERAL_SYMBOLS)]; > +} __attribute__((__packed__, __aligned__(1))) lzfse_compressed_block_header_v2; > + > +/*! @abstract LZVN compressed block header. */ > +typedef struct { > + // Magic number, always LZFSE_COMPRESSEDLZVN_BLOCK_MAGIC. > + uint32_t magic; > + // Number of decoded (output) bytes. > + uint32_t n_raw_bytes; > + // Number of encoded (source) bytes. > + uint32_t n_payload_bytes; > +} lzvn_compressed_block_header; > + > +/* MARK: - LZFSE encode/decode interfaces > + */ > + > +int lzfse_encode_init(lzfse_encoder_state *s); > +int lzfse_encode_translate(lzfse_encoder_state *s, lzfse_offset delta); > +int lzfse_encode_base(lzfse_encoder_state *s); > +int lzfse_encode_finish(lzfse_encoder_state *s); > +int lzfse_decode(lzfse_decoder_state *s); > + > +/* MARK: - LZVN encode/decode interfaces > + */ > + > +/* Minimum source buffer size for compression. Smaller buffers will not be > + * compressed; the lzvn encoder will simply return. > + */ > +#define LZVN_ENCODE_MIN_SRC_SIZE ((size_t)8) > + > +/* Maximum source buffer size for compression. Larger buffers will be > + * compressed partially. > + */ > +#define LZVN_ENCODE_MAX_SRC_SIZE ((size_t)0xffffffffU) > + > +/* Minimum destination buffer size for compression. No compression will take > + * place if smaller. > + */ > +#define LZVN_ENCODE_MIN_DST_SIZE ((size_t)8) > + > +size_t lzvn_decode_scratch_size(void); > +size_t lzvn_encode_scratch_size(void); > +size_t lzvn_encode_buffer(void *__restrict dst, size_t dst_size, > + const void *__restrict src, size_t src_size, > + void *__restrict work); > +size_t lzvn_decode_buffer(void *__restrict dst, size_t dst_size, > + const void *__restrict src, size_t src_size); > + > +/*! @abstract Signed offset in buffers, stored on either 32 or 64 bits. */ > +#if defined(_M_AMD64) || defined(__x86_64__) || defined(__arm64__) > +typedef int64_t lzvn_offset; > +#else > +typedef int32_t lzvn_offset; > +#endif > + > +/* MARK: - LZFSE utility functions > + */ > + > +/*! @abstract Load bytes from memory location SRC. */ > +LZFSE_INLINE uint16_t load2(const void *ptr) > +{ > + uint16_t data; > + memcpy(&data, ptr, sizeof data); > + return data; > +} > + > +LZFSE_INLINE uint32_t load4(const void *ptr) > +{ > + uint32_t data; > + memcpy(&data, ptr, sizeof data); > + return data; > +} > + > +LZFSE_INLINE uint64_t load8(const void *ptr) > +{ > + uint64_t data; > + memcpy(&data, ptr, sizeof data); > + return data; > +} > + > +/*! @abstract Store bytes to memory location DST. */ > +LZFSE_INLINE void store2(void *ptr, uint16_t data) > +{ > + memcpy(ptr, &data, sizeof data); > +} > + > +LZFSE_INLINE void store4(void *ptr, uint32_t data) > +{ > + memcpy(ptr, &data, sizeof data); > +} > + > +LZFSE_INLINE void store8(void *ptr, uint64_t data) > +{ > + memcpy(ptr, &data, sizeof data); > +} > + > +/*! @abstract Load+store bytes from locations SRC to DST. Not intended for use > + * with overlapping buffers. Note that for LZ-style compression, you need > + * copies to behave like naive memcpy( ) implementations do, splatting the > + * leading sequence if the buffers overlap. This copy does not do that, so > + * should not be used with overlapping buffers. */ > +LZFSE_INLINE void copy8(void *dst, const void *src) > +{ > + store8(dst, load8(src)); > +} > +LZFSE_INLINE void copy16(void *dst, const void *src) > +{ > + uint64_t m0 = load8(src); > + uint64_t m1 = load8((const unsigned char *)src + 8); > + store8(dst, m0); > + store8((unsigned char *)dst + 8, m1); > +} > + > +/* =============================================================== > + * Bitfield Operations > + */ > + > +/*! @abstract Extracts \p width bits from \p container, starting with \p lsb; if > + * we view \p container as a bit array, we extract \c container[lsb:lsb+width]. */ > +LZFSE_INLINE uintmax_t extract(uintmax_t container, unsigned lsb, > + unsigned width) > +{ > + static const size_t container_width = sizeof container * 8; > + if (width == container_width) > + return container; > + return (container >> lsb) & (((uintmax_t)1 << width) - 1); > +} > + > +/*! @abstract Inserts \p width bits from \p data into \p container, starting with \p lsb. > + * Viewed as bit arrays, the operations is: > + * @code > + * container[:lsb] is unchanged > + * container[lsb:lsb+width] <-- data[0:width] > + * container[lsb+width:] is unchanged > + * @endcode > + */ > +LZFSE_INLINE uintmax_t insert(uintmax_t container, uintmax_t data, unsigned lsb, > + unsigned width) > +{ > + static const size_t container_width = sizeof container * 8; > + uintmax_t mask; > + if (width == container_width) > + return container; > + mask = ((uintmax_t)1 << width) - 1; > + return (container & ~(mask << lsb)) | (data & mask) << lsb; > +} > + > +/*! @abstract Perform sanity checks on the values of lzfse_compressed_block_header_v1. > + * Test that the field values are in the allowed limits, verify that the > + * frequency tables sum to value less than total number of states. > + * @return 0 if all tests passed. > + * @return negative error code with 1 bit set for each failed test. */ > +LZFSE_INLINE int > +lzfse_check_block_header_v1(const lzfse_compressed_block_header_v1 *header) > +{ > + int tests_results = 0; > + uint16_t literal_state[4]; > + int res; > + tests_results = tests_results | > + ((header->magic == LZFSE_COMPRESSEDV1_BLOCK_MAGIC) ? > + 0 : > + (1 << 0)); > + tests_results = tests_results | > + ((header->n_literals <= LZFSE_LITERALS_PER_BLOCK) ? > + 0 : > + (1 << 1)); > + tests_results = > + tests_results | > + ((header->n_matches <= LZFSE_MATCHES_PER_BLOCK) ? 0 : (1 << 2)); > + > + memcpy(literal_state, header->literal_state, sizeof(uint16_t) * 4); > + > + tests_results = tests_results | > + ((literal_state[0] < LZFSE_ENCODE_LITERAL_STATES) ? > + 0 : > + (1 << 3)); > + tests_results = tests_results | > + ((literal_state[1] < LZFSE_ENCODE_LITERAL_STATES) ? > + 0 : > + (1 << 4)); > + tests_results = tests_results | > + ((literal_state[2] < LZFSE_ENCODE_LITERAL_STATES) ? > + 0 : > + (1 << 5)); > + tests_results = tests_results | > + ((literal_state[3] < LZFSE_ENCODE_LITERAL_STATES) ? > + 0 : > + (1 << 6)); > + > + tests_results = > + tests_results | > + ((header->l_state < LZFSE_ENCODE_L_STATES) ? 0 : (1 << 7)); > + tests_results = > + tests_results | > + ((header->m_state < LZFSE_ENCODE_M_STATES) ? 0 : (1 << 8)); > + tests_results = > + tests_results | > + ((header->d_state < LZFSE_ENCODE_D_STATES) ? 0 : (1 << 9)); > + > + res = fse_check_freq(header->l_freq, LZFSE_ENCODE_L_SYMBOLS, > + LZFSE_ENCODE_L_STATES); > + tests_results = tests_results | ((res == 0) ? 0 : (1 << 10)); > + res = fse_check_freq(header->m_freq, LZFSE_ENCODE_M_SYMBOLS, > + LZFSE_ENCODE_M_STATES); > + tests_results = tests_results | ((res == 0) ? 0 : (1 << 11)); > + res = fse_check_freq(header->d_freq, LZFSE_ENCODE_D_SYMBOLS, > + LZFSE_ENCODE_D_STATES); > + tests_results = tests_results | ((res == 0) ? 0 : (1 << 12)); > + res = fse_check_freq(header->literal_freq, LZFSE_ENCODE_LITERAL_SYMBOLS, > + LZFSE_ENCODE_LITERAL_STATES); > + tests_results = tests_results | ((res == 0) ? 0 : (1 << 13)); > + > + if (tests_results) { > + return tests_results | > + 0x80000000; // each 1 bit is a test that failed > + // (except for the sign bit) > + } > + > + return 0; // OK > +} > + > +/* MARK: - L, M, D encoding constants for LZFSE > + */ > + > +/* Largest encodable L (literal length), M (match length) and D (match > + * distance) values. > + */ > +#define LZFSE_ENCODE_MAX_L_VALUE 315 > +#define LZFSE_ENCODE_MAX_M_VALUE 2359 > +#define LZFSE_ENCODE_MAX_D_VALUE 262139 > + > +/*! @abstract The L, M, D data streams are all encoded as a "base" value, which is > + * FSE-encoded, and an "extra bits" value, which is the difference between > + * value and base, and is simply represented as a raw bit value (because it > + * is the low-order bits of a larger number, not much entropy can be > + * extracted from these bits by more complex encoding schemes). The following > + * tables represent the number of low-order bits to encode separately and the > + * base values for each of L, M, and D. > + * > + * @note The inverse tables for mapping the other way are significantly larger. > + * Those tables have been split out to lzfse_encode_tables.h in order to keep > + * this file relatively small. */ > +static const uint8_t l_extra_bits[LZFSE_ENCODE_L_SYMBOLS] = { > + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 5, 8 > +}; > +static const int32_t l_base_value[LZFSE_ENCODE_L_SYMBOLS] = { > + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 20, 28, 60 > +}; > +static const uint8_t m_extra_bits[LZFSE_ENCODE_M_SYMBOLS] = { > + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 5, 8, 11 > +}; > +static const int32_t m_base_value[LZFSE_ENCODE_M_SYMBOLS] = { > + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 24, 56, 312 > +}; > +static const uint8_t d_extra_bits[LZFSE_ENCODE_D_SYMBOLS] = { > + 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, > + 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, > + 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, > + 12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15 > +}; > +static const int32_t d_base_value[LZFSE_ENCODE_D_SYMBOLS] = { > + 0, 1, 2, 3, 4, 6, 8, 10, > + 12, 16, 20, 24, 28, 36, 44, 52, > + 60, 76, 92, 108, 124, 156, 188, 220, > + 252, 316, 380, 444, 508, 636, 764, 892, > + 1020, 1276, 1532, 1788, 2044, 2556, 3068, 3580, > + 4092, 5116, 6140, 7164, 8188, 10236, 12284, 14332, > + 16380, 20476, 24572, 28668, 32764, 40956, 49148, 57340, > + 65532, 81916, 98300, 114684, 131068, 163836, 196604, 229372 > +}; > + > +#endif // LZFSE_INTERNAL_H > diff --git a/drivers/staging/apfs/lzfse/lzfse_tunables.h b/drivers/staging/apfs/lzfse/lzfse_tunables.h > new file mode 100644 > index 0000000000000000000000000000000000000000..cb699bba5bf175a4ad525403f958d0c29aaee4e4 > --- /dev/null > +++ b/drivers/staging/apfs/lzfse/lzfse_tunables.h > @@ -0,0 +1,50 @@ > +/* SPDX-License-Identifier: BSD-3-Clause */ > +/* > + * Copyright (c) 2015-2016, Apple Inc. All rights reserved. > + * > + */ > + > +#ifndef LZFSE_TUNABLES_H > +#define LZFSE_TUNABLES_H > + > +/* Parameters controlling details of the LZ-style match search. These values > + * may be modified to fine tune compression ratio vs. encoding speed, while > + * keeping the compressed format compatible with LZFSE. Note that > + * modifying them will also change the amount of work space required by > + * the encoder. The values here are those used in the compression library > + * on iOS and OS X. > + */ > + > +/* Number of bits for hash function to produce. Should be in the range > + * [10, 16]. Larger values reduce the number of false-positive found during > + * the match search, and expand the history table, which may allow additional > + * matches to be found, generally improving the achieved compression ratio. > + * Larger values also increase the workspace size, and make it less likely > + * that the history table will be present in cache, which reduces performance. > + */ > +#define LZFSE_ENCODE_HASH_BITS 14 > + > +/* Number of positions to store for each line in the history table. May > + * be either 4 or 8. Using 8 doubles the size of the history table, which > + * increases the chance of finding matches (thus improving compression ratio), > + * but also increases the workspace size. > + */ > +#define LZFSE_ENCODE_HASH_WIDTH 4 > + > +/* Match length in bytes to cause immediate emission. Generally speaking, > + * LZFSE maintains multiple candidate matches and waits to decide which match > + * to emit until more information is available. When a match exceeds this > + * threshold, it is emitted immediately. Thus, smaller values may give > + * somewhat better performance, and larger values may give somewhat better > + * compression ratios. > + */ > +#define LZFSE_ENCODE_GOOD_MATCH 40 > + > +/* When the source buffer is very small, LZFSE doesn't compress as well as > + * some simpler algorithms. To maintain reasonable compression for these > + * cases, we transition to use LZVN instead if the size of the source buffer > + * is below this threshold. > + */ > +#define LZFSE_ENCODE_LZVN_THRESHOLD 4096 > + > +#endif // LZFSE_TUNABLES_H > diff --git a/drivers/staging/apfs/lzfse/lzvn_decode_base.c b/drivers/staging/apfs/lzfse/lzvn_decode_base.c > new file mode 100644 > index 0000000000000000000000000000000000000000..632bdd6e4da3c6e1db4a0e9fe45609b6d68a408e > --- /dev/null > +++ b/drivers/staging/apfs/lzfse/lzvn_decode_base.c > @@ -0,0 +1,721 @@ > +// SPDX-License-Identifier: BSD-3-Clause > +/* > + * Copyright (c) 2015-2016, Apple Inc. All rights reserved. > + * > + */ > + > +/* LZVN low-level decoder > + */ > + > +#include "lzvn_decode_base.h" > + > +#if !defined(HAVE_LABELS_AS_VALUES) > +# if defined(__GNUC__) || defined(__clang__) > +# define HAVE_LABELS_AS_VALUES 1 > +# else > +# define HAVE_LABELS_AS_VALUES 0 > +# endif > +#endif > + > +/* Both the source and destination buffers are represented by a pointer and > + * a length; they are *always* updated in concert using this macro; however > + * many bytes the pointer is advanced, the length is decremented by the same > + * amount. Thus, pointer + length always points to the byte one past the end > + * of the buffer. > + */ > +#define PTR_LEN_INC(_pointer, _length, _increment) \ > + (_pointer += _increment, _length -= _increment) > + > +/* Update state with current positions and distance, corresponding to the > + * beginning of an instruction in both streams > + */ > +#define UPDATE_GOOD \ > + (state->src = src_ptr, state->dst = dst_ptr, state->d_prev = D) > + > +void lzvn_decode(lzvn_decoder_state *state) > +{ > +#if HAVE_LABELS_AS_VALUES > + // Jump table for all instructions > + static const void *opc_tbl[256] = { > + &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&eos, > + &&lrg_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, > + &&nop, &&lrg_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, > + &&sml_d, &&nop, &&lrg_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, > + &&sml_d, &&sml_d, &&udef, &&lrg_d, &&sml_d, &&sml_d, &&sml_d, > + &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d, &&sml_d, &&sml_d, > + &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d, &&sml_d, > + &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d, > + &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, > + &&lrg_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, > + &&pre_d, &&lrg_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, > + &&sml_d, &&pre_d, &&lrg_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, > + &&sml_d, &&sml_d, &&pre_d, &&lrg_d, &&sml_d, &&sml_d, &&sml_d, > + &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, &&sml_d, &&sml_d, > + &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, &&sml_d, > + &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, > + &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, > + &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, > + &&udef, &&udef, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, > + &&sml_d, &&pre_d, &&lrg_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, > + &&sml_d, &&sml_d, &&pre_d, &&lrg_d, &&sml_d, &&sml_d, &&sml_d, > + &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, &&sml_d, &&sml_d, > + &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, &&med_d, > + &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, > + &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, > + &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, > + &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, > + &&med_d, &&med_d, &&med_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, > + &&sml_d, &&sml_d, &&pre_d, &&lrg_d, &&sml_d, &&sml_d, &&sml_d, > + &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, &&udef, &&udef, > + &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, > + &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, > + &&lrg_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, > + &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, > + &&sml_l, &&sml_l, &&lrg_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, > + &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, > + &&sml_m, &&sml_m, &&sml_m, &&sml_m > + }; > +#endif > + size_t src_len = state->src_end - state->src; > + size_t dst_len = state->dst_end - state->dst; > + const unsigned char *src_ptr = state->src; > + unsigned char *dst_ptr = state->dst; > + size_t D = state->d_prev; > + size_t M; > + size_t L; > + size_t opc_len; > + unsigned char opc; > + uint16_t opc23; > + > + if (src_len == 0 || dst_len == 0) > + return; // empty buffer > + > + // Do we have a partially expanded match saved in state? > + if (state->L != 0 || state->M != 0) { > + L = state->L; > + M = state->M; > + D = state->D; > + opc_len = 0; // we already skipped the op > + state->L = state->M = state->D = 0; > + if (M == 0) > + goto copy_literal; > + if (L == 0) > + goto copy_match; > + goto copy_literal_and_match; > + } > + > + opc = src_ptr[0]; > + > +#if HAVE_LABELS_AS_VALUES > + goto *opc_tbl[opc]; > +#else > + for (;;) { > + switch (opc) { > +#endif > +/* =============================================================== > + * These four opcodes (sml_d, med_d, lrg_d, and pre_d) encode both a > + * literal and a match. The bulk of their implementations are shared; > + * each label here only does the work of setting the opcode length (not > + * including any literal bytes), and extracting the literal length, match > + * length, and match distance (except in pre_d). They then jump into the > + * shared implementation to actually output the literal and match bytes. > + * > + * No error checking happens in the first stage, except for ensuring that > + * the source has enough length to represent the full opcode before > + * reading past the first byte. > + */ > +sml_d: > +#if !HAVE_LABELS_AS_VALUES > +case 0: > +case 1: > +case 2: > +case 3: > +case 4: > +case 5: > +case 8: > +case 9: > +case 10: > +case 11: > +case 12: > +case 13: > +case 16: > +case 17: > +case 18: > +case 19: > +case 20: > +case 21: > +case 24: > +case 25: > +case 26: > +case 27: > +case 28: > +case 29: > +case 32: > +case 33: > +case 34: > +case 35: > +case 36: > +case 37: > +case 40: > +case 41: > +case 42: > +case 43: > +case 44: > +case 45: > +case 48: > +case 49: > +case 50: > +case 51: > +case 52: > +case 53: > +case 56: > +case 57: > +case 58: > +case 59: > +case 60: > +case 61: > +case 64: > +case 65: > +case 66: > +case 67: > +case 68: > +case 69: > +case 72: > +case 73: > +case 74: > +case 75: > +case 76: > +case 77: > +case 80: > +case 81: > +case 82: > +case 83: > +case 84: > +case 85: > +case 88: > +case 89: > +case 90: > +case 91: > +case 92: > +case 93: > +case 96: > +case 97: > +case 98: > +case 99: > +case 100: > +case 101: > +case 104: > +case 105: > +case 106: > +case 107: > +case 108: > +case 109: > +case 128: > +case 129: > +case 130: > +case 131: > +case 132: > +case 133: > +case 136: > +case 137: > +case 138: > +case 139: > +case 140: > +case 141: > +case 144: > +case 145: > +case 146: > +case 147: > +case 148: > +case 149: > +case 152: > +case 153: > +case 154: > +case 155: > +case 156: > +case 157: > +case 192: > +case 193: > +case 194: > +case 195: > +case 196: > +case 197: > +case 200: > +case 201: > +case 202: > +case 203: > +case 204: > +case 205: > +#endif > + UPDATE_GOOD; > + // "small distance": This opcode has the structure LLMMMDDD DDDDDDDD LITERAL > + // where the length of literal (0-3 bytes) is encoded by the high 2 bits of > + // the first byte. We first extract the literal length so we know how long > + // the opcode is, then check that the source can hold both this opcode and > + // at least one byte of the next (because any valid input stream must be > + // terminated with an eos token). > + opc_len = 2; > + L = (size_t)extract(opc, 6, 2); > + M = (size_t)extract(opc, 3, 3) + 3; > + // We need to ensure that the source buffer is long enough that we can > + // safely read this entire opcode, the literal that follows, and the first > + // byte of the next opcode. Once we satisfy this requirement, we can > + // safely unpack the match distance. A check similar to this one is > + // present in all the opcode implementations. > + if (src_len <= opc_len + L) > + return; // source truncated > + D = (size_t)extract(opc, 0, 3) << 8 | src_ptr[1]; > + goto copy_literal_and_match; > + > +med_d: > +#if !HAVE_LABELS_AS_VALUES > +case 160: > +case 161: > +case 162: > +case 163: > +case 164: > +case 165: > +case 166: > +case 167: > +case 168: > +case 169: > +case 170: > +case 171: > +case 172: > +case 173: > +case 174: > +case 175: > +case 176: > +case 177: > +case 178: > +case 179: > +case 180: > +case 181: > +case 182: > +case 183: > +case 184: > +case 185: > +case 186: > +case 187: > +case 188: > +case 189: > +case 190: > +case 191: > +#endif > + UPDATE_GOOD; > + // "medium distance": This is a minor variant of the "small distance" > + // encoding, where we will now use two extra bytes instead of one to encode > + // the restof the match length and distance. This allows an extra two bits > + // for the match length, and an extra three bits for the match distance. The > + // full structure of the opcode is 101LLMMM DDDDDDMM DDDDDDDD LITERAL. > + opc_len = 3; > + L = (size_t)extract(opc, 3, 2); > + if (src_len <= opc_len + L) > + return; // source truncated > + opc23 = load2(&src_ptr[1]); > + M = (size_t)((extract(opc, 0, 3) << 2 | extract(opc23, 0, 2)) + 3); > + D = (size_t)extract(opc23, 2, 14); > + goto copy_literal_and_match; > + > +lrg_d: > +#if !HAVE_LABELS_AS_VALUES > +case 7: > +case 15: > +case 23: > +case 31: > +case 39: > +case 47: > +case 55: > +case 63: > +case 71: > +case 79: > +case 87: > +case 95: > +case 103: > +case 111: > +case 135: > +case 143: > +case 151: > +case 159: > +case 199: > +case 207: > +#endif > + UPDATE_GOOD; > + // "large distance": This is another variant of the "small distance" > + // encoding, where we will now use two extra bytes to encode the match > + // distance, which allows distances up to 65535 to be represented. The full > + // structure of the opcode is LLMMM111 DDDDDDDD DDDDDDDD LITERAL. > + opc_len = 3; > + L = (size_t)extract(opc, 6, 2); > + M = (size_t)extract(opc, 3, 3) + 3; > + if (src_len <= opc_len + L) > + return; // source truncated > + D = load2(&src_ptr[1]); > + goto copy_literal_and_match; > + > +pre_d: > +#if !HAVE_LABELS_AS_VALUES > +case 70: > +case 78: > +case 86: > +case 94: > +case 102: > +case 110: > +case 134: > +case 142: > +case 150: > +case 158: > +case 198: > +case 206: > +#endif > + UPDATE_GOOD; > + // "previous distance": This opcode has the structure LLMMM110, where the > + // length of the literal (0-3 bytes) is encoded by the high 2 bits of the > + // first byte. We first extract the literal length so we know how long > + // the opcode is, then check that the source can hold both this opcode and > + // at least one byte of the next (because any valid input stream must be > + // terminated with an eos token). > + opc_len = 1; > + L = (size_t)extract(opc, 6, 2); > + M = (size_t)extract(opc, 3, 3) + 3; > + if (src_len <= opc_len + L) > + return; // source truncated > + goto copy_literal_and_match; > + > +copy_literal_and_match: > + // Common implementation of writing data for opcodes that have both a > + // literal and a match. We begin by advancing the source pointer past > + // the opcode, so that it points at the first literal byte (if L > + // is non-zero; otherwise it points at the next opcode). > + PTR_LEN_INC(src_ptr, src_len, opc_len); > + // Now we copy the literal from the source pointer to the destination. > + if (__builtin_expect(dst_len >= 4 && src_len >= 4, 1)) { > + // The literal is 0-3 bytes; if we are not near the end of the buffer, > + // we can safely just do a 4 byte copy (which is guaranteed to cover > + // the complete literal, and may include some other bytes as well). > + store4(dst_ptr, load4(src_ptr)); > + } else if (L <= dst_len) { > + // We are too close to the end of either the input or output stream > + // to be able to safely use a four-byte copy, but we will not exhaust > + // either stream (we already know that the source will not be > + // exhausted from checks in the individual opcode implementations, > + // and we just tested that dst_len > L). Thus, we need to do a > + // byte-by-byte copy of the literal. This is slow, but it can only ever > + // happen near the very end of a buffer, so it is not an important case to > + // optimize. > + size_t i; > + for (i = 0; i < L; ++i) > + dst_ptr[i] = src_ptr[i]; > + } else { > + // Destination truncated: fill DST, and store partial match > + > + // Copy partial literal > + size_t i; > + for (i = 0; i < dst_len; ++i) > + dst_ptr[i] = src_ptr[i]; > + // Save state > + state->src = src_ptr + dst_len; > + state->dst = dst_ptr + dst_len; > + state->L = L - dst_len; > + state->M = M; > + state->D = D; > + return; // destination truncated > + } > + // Having completed the copy of the literal, we advance both the source > + // and destination pointers by the number of literal bytes. > + PTR_LEN_INC(dst_ptr, dst_len, L); > + PTR_LEN_INC(src_ptr, src_len, L); > + // Check if the match distance is valid; matches must not reference > + // bytes that preceed the start of the output buffer, nor can the match > + // distance be zero. > + if (D > dst_ptr - state->dst_begin || D == 0) > + goto invalid_match_distance; > +copy_match: > + // Now we copy the match from dst_ptr - D to dst_ptr. It is important to keep > + // in mind that we may have D < M, in which case the source and destination > + // windows overlap in the copy. The semantics of the match copy are *not* > + // those of memmove( ); if the buffers overlap it needs to behave as though > + // we were copying byte-by-byte in increasing address order. If, for example, > + // D is 1, the copy operation is equivalent to: > + // > + // memset(dst_ptr, dst_ptr[-1], M); > + // > + // i.e. it splats the previous byte. This means that we need to be very > + // careful about using wide loads or stores to perform the copy operation. > + if (__builtin_expect(dst_len >= M + 7 && D >= 8, 1)) { > + // We are not near the end of the buffer, and the match distance > + // is at least eight. Thus, we can safely loop using eight byte > + // copies. The last of these may slop over the intended end of > + // the match, but this is OK because we know we have a safety bound > + // away from the end of the destination buffer. > + size_t i; > + for (i = 0; i < M; i += 8) > + store8(&dst_ptr[i], load8(&dst_ptr[i - D])); > + } else if (M <= dst_len) { > + // Either the match distance is too small, or we are too close to > + // the end of the buffer to safely use eight byte copies. Fall back > + // on a simple byte-by-byte implementation. > + size_t i; > + for (i = 0; i < M; ++i) > + dst_ptr[i] = dst_ptr[i - D]; > + } else { > + // Destination truncated: fill DST, and store partial match > + > + // Copy partial match > + size_t i; > + for (i = 0; i < dst_len; ++i) > + dst_ptr[i] = dst_ptr[i - D]; > + // Save state > + state->src = src_ptr; > + state->dst = dst_ptr + dst_len; > + state->L = 0; > + state->M = M - dst_len; > + state->D = D; > + return; // destination truncated > + } > + // Update the destination pointer and length to account for the bytes > + // written by the match, then load the next opcode byte and branch to > + // the appropriate implementation. > + PTR_LEN_INC(dst_ptr, dst_len, M); > + opc = src_ptr[0]; > +#if HAVE_LABELS_AS_VALUES > + goto *opc_tbl[opc]; > +#else > + break; > +#endif > + > +/* =============================================================== > + * Opcodes representing only a match (no literal). > + * These two opcodes (lrg_m and sml_m) encode only a match. The match > + * distance is carried over from the previous opcode, so all they need > + * to encode is the match length. We are able to reuse the match copy > + * sequence from the literal and match opcodes to perform the actual > + * copy implementation. > + */ > +sml_m: > +#if !HAVE_LABELS_AS_VALUES > +case 241: > +case 242: > +case 243: > +case 244: > +case 245: > +case 246: > +case 247: > +case 248: > +case 249: > +case 250: > +case 251: > +case 252: > +case 253: > +case 254: > +case 255: > +#endif > + UPDATE_GOOD; > + // "small match": This opcode has no literal, and uses the previous match > + // distance (i.e. it encodes only the match length), in a single byte as > + // 1111MMMM. > + opc_len = 1; > + if (src_len <= opc_len) > + return; // source truncated > + M = (size_t)extract(opc, 0, 4); > + PTR_LEN_INC(src_ptr, src_len, opc_len); > + goto copy_match; > + > +lrg_m: > +#if !HAVE_LABELS_AS_VALUES > +case 240: > +#endif > + UPDATE_GOOD; > + // "large match": This opcode has no literal, and uses the previous match > + // distance (i.e. it encodes only the match length). It is encoded in two > + // bytes as 11110000 MMMMMMMM. Because matches smaller than 16 bytes can > + // be represented by sml_m, there is an implicit bias of 16 on the match > + // length; the representable values are [16,271]. > + opc_len = 2; > + if (src_len <= opc_len) > + return; // source truncated > + M = src_ptr[1] + 16; > + PTR_LEN_INC(src_ptr, src_len, opc_len); > + goto copy_match; > + > +/* =============================================================== > + * Opcodes representing only a literal (no match). > + * These two opcodes (lrg_l and sml_l) encode only a literal. There is no > + * match length or match distance to worry about (but we need to *not* > + * touch D, as it must be preserved between opcodes). > + */ > +sml_l: > +#if !HAVE_LABELS_AS_VALUES > +case 225: > +case 226: > +case 227: > +case 228: > +case 229: > +case 230: > +case 231: > +case 232: > +case 233: > +case 234: > +case 235: > +case 236: > +case 237: > +case 238: > +case 239: > +#endif > + UPDATE_GOOD; > + // "small literal": This opcode has no match, and encodes only a literal > + // of length up to 15 bytes. The format is 1110LLLL LITERAL. > + opc_len = 1; > + L = (size_t)extract(opc, 0, 4); > + goto copy_literal; > + > +lrg_l: > +#if !HAVE_LABELS_AS_VALUES > +case 224: > +#endif > + UPDATE_GOOD; > + // "large literal": This opcode has no match, and uses the previous match > + // distance (i.e. it encodes only the match length). It is encoded in two > + // bytes as 11100000 LLLLLLLL LITERAL. Because literals smaller than 16 > + // bytes can be represented by sml_l, there is an implicit bias of 16 on > + // the literal length; the representable values are [16,271]. > + opc_len = 2; > + if (src_len <= 2) > + return; // source truncated > + L = src_ptr[1] + 16; > + goto copy_literal; > + > +copy_literal: > + // Check that the source buffer is large enough to hold the complete > + // literal and at least the first byte of the next opcode. If so, advance > + // the source pointer to point to the first byte of the literal and adjust > + // the source length accordingly. > + if (src_len <= opc_len + L) > + return; // source truncated > + PTR_LEN_INC(src_ptr, src_len, opc_len); > + // Now we copy the literal from the source pointer to the destination. > + if (dst_len >= L + 7 && src_len >= L + 7) { > + // We are not near the end of the source or destination buffers; thus > + // we can safely copy the literal using wide copies, without worrying > + // about reading or writing past the end of either buffer. > + size_t i; > + for (i = 0; i < L; i += 8) > + store8(&dst_ptr[i], load8(&src_ptr[i])); > + } else if (L <= dst_len) { > + // We are too close to the end of either the input or output stream > + // to be able to safely use an eight-byte copy. Instead we copy the > + // literal byte-by-byte. > + size_t i; > + for (i = 0; i < L; ++i) > + dst_ptr[i] = src_ptr[i]; > + } else { > + // Destination truncated: fill DST, and store partial match > + > + // Copy partial literal > + size_t i; > + for (i = 0; i < dst_len; ++i) > + dst_ptr[i] = src_ptr[i]; > + // Save state > + state->src = src_ptr + dst_len; > + state->dst = dst_ptr + dst_len; > + state->L = L - dst_len; > + state->M = 0; > + state->D = D; > + return; // destination truncated > + } > + // Having completed the copy of the literal, we advance both the source > + // and destination pointers by the number of literal bytes. > + PTR_LEN_INC(dst_ptr, dst_len, L); > + PTR_LEN_INC(src_ptr, src_len, L); > + // Load the first byte of the next opcode, and jump to its implementation. > + opc = src_ptr[0]; > +#if HAVE_LABELS_AS_VALUES > + goto *opc_tbl[opc]; > +#else > + break; > +#endif > + > +/* =============================================================== > + * Other opcodes > + */ > +nop: > +#if !HAVE_LABELS_AS_VALUES > +case 14: > +case 22: > +#endif > + UPDATE_GOOD; > + opc_len = 1; > + if (src_len <= opc_len) > + return; // source truncated > + PTR_LEN_INC(src_ptr, src_len, opc_len); > + opc = src_ptr[0]; > +#if HAVE_LABELS_AS_VALUES > + goto *opc_tbl[opc]; > +#else > + break; > +#endif > + > +eos: > +#if !HAVE_LABELS_AS_VALUES > +case 6: > +#endif > + opc_len = 8; > + if (src_len < opc_len) > + return; // source truncated (here we don't need an extra byte for next op > + // code) > + PTR_LEN_INC(src_ptr, src_len, opc_len); > + state->end_of_stream = 1; > + UPDATE_GOOD; > + return; // end-of-stream > + > +/* =============================================================== > + * Return on error > + */ > +udef: > +#if !HAVE_LABELS_AS_VALUES > +case 30: > +case 38: > +case 46: > +case 54: > +case 62: > +case 112: > +case 113: > +case 114: > +case 115: > +case 116: > +case 117: > +case 118: > +case 119: > +case 120: > +case 121: > +case 122: > +case 123: > +case 124: > +case 125: > +case 126: > +case 127: > +case 208: > +case 209: > +case 210: > +case 211: > +case 212: > +case 213: > +case 214: > +case 215: > +case 216: > +case 217: > +case 218: > +case 219: > +case 220: > +case 221: > +case 222: > +case 223: > +#endif > +invalid_match_distance: > + > + return; // we already updated state > +#if !HAVE_LABELS_AS_VALUES > +} > +} > +#endif > +} > diff --git a/drivers/staging/apfs/lzfse/lzvn_decode_base.h b/drivers/staging/apfs/lzfse/lzvn_decode_base.h > new file mode 100644 > index 0000000000000000000000000000000000000000..ef6b0244cbf8adb15a77a68c920753adce7d93b4 > --- /dev/null > +++ b/drivers/staging/apfs/lzfse/lzvn_decode_base.h > @@ -0,0 +1,53 @@ > +/* SPDX-License-Identifier: BSD-3-Clause */ > +/* > + * Copyright (c) 2015-2016, Apple Inc. All rights reserved. > + * > + */ > + > +/* LZVN low-level decoder (v2) > + * Functions in the low-level API should switch to these at some point. > + * Apr 2014 > + */ > + > +#ifndef LZVN_DECODE_BASE_H > +#define LZVN_DECODE_BASE_H > + > +#include "lzfse_internal.h" > + > +/*! @abstract Base decoder state. */ > +typedef struct { > + // Decoder I/O > + > + // Next byte to read in source buffer > + const unsigned char *src; > + // Next byte after source buffer > + const unsigned char *src_end; > + > + // Next byte to write in destination buffer (by decoder) > + unsigned char *dst; > + // Valid range for destination buffer is [dst_begin, dst_end - 1] > + unsigned char *dst_begin; > + unsigned char *dst_end; > + // Next byte to read in destination buffer (modified by caller) > + unsigned char *dst_current; > + > + // Decoder state > + > + // Partially expanded match, or 0,0,0. > + // In that case, src points to the next literal to copy, or the next op-code > + // if L==0. > + size_t L, M, D; > + > + // Distance for last emitted match, or 0 > + lzvn_offset d_prev; > + > + // Did we decode end-of-stream? > + int end_of_stream; > + > +} lzvn_decoder_state; > + > +/*! @abstract Decode source to destination. > + * Updates \p state (src,dst,d_prev). */ > +void lzvn_decode(lzvn_decoder_state *state); > + > +#endif // LZVN_DECODE_BASE_H > diff --git a/drivers/staging/apfs/lzfse/lzvn_encode_base.h b/drivers/staging/apfs/lzfse/lzvn_encode_base.h > new file mode 100644 > index 0000000000000000000000000000000000000000..c65457143bb5d330c6172d690993250e3a6aab77 > --- /dev/null > +++ b/drivers/staging/apfs/lzfse/lzvn_encode_base.h > @@ -0,0 +1,105 @@ > +/* SPDX-License-Identifier: BSD-3-Clause */ > +/* > + * Copyright (c) 2015-2016, Apple Inc. All rights reserved. > + * > + */ > + > +/* LZVN low-level encoder > + */ > + > +#ifndef LZVN_ENCODE_BASE_H > +#define LZVN_ENCODE_BASE_H > + > +#include "lzfse_internal.h" > + > +/* =============================================================== > + * Types and Constants > + */ > + > +#define LZVN_ENCODE_HASH_BITS \ > + 14 // number of bits returned by the hash function [10, 16] > +#define LZVN_ENCODE_OFFSETS_PER_HASH \ > + 4 // stored offsets stack for each hash value, MUST be 4 > +#define LZVN_ENCODE_HASH_VALUES \ > + (1 << LZVN_ENCODE_HASH_BITS) // number of entries in hash table > +#define LZVN_ENCODE_MAX_DISTANCE \ > + 0xffff // max match distance we can represent with LZVN encoding, MUST be \ > + // 0xFFFF > +#define LZVN_ENCODE_MIN_MARGIN \ > + 8 // min number of bytes required between current and end during encoding, \ > + // MUST be >= 8 > +#define LZVN_ENCODE_MAX_LITERAL_BACKLOG \ > + 400 // if the number of pending literals exceeds this size, emit a long \ > + // literal, MUST be >= 271 > + > +/*! @abstract Type of table entry. */ > +typedef struct { > + int32_t indices[4]; // signed indices in source buffer > + uint32_t values[4]; // corresponding 32-bit values > +} lzvn_encode_entry_type; > + > +/* Work size > + */ > +#define LZVN_ENCODE_WORK_SIZE \ > + (LZVN_ENCODE_HASH_VALUES * sizeof(lzvn_encode_entry_type)) > + > +/*! @abstract Match */ > +typedef struct { > + lzvn_offset m_begin; // beginning of match, current position > + lzvn_offset > + m_end; // end of match, this is where the next literal would begin > + // if we emit the entire match > + lzvn_offset M; // match length M: m_end - m_begin > + lzvn_offset D; // match distance D > + lzvn_offset K; // match gain: M - distance storage (L not included) > +} lzvn_match_info; > + > +/* =============================================================== > + * Internal encoder state > + */ > + > +/*! @abstract Base encoder state and I/O. */ > +typedef struct { > + // Encoder I/O > + > + // Source buffer > + const unsigned char *src; > + // Valid range in source buffer: we can access src[i] for src_begin <= i < > + // src_end. src_begin may be negative. > + lzvn_offset src_begin; > + lzvn_offset src_end; > + // Next byte to process in source buffer > + lzvn_offset src_current; > + // Next byte after the last byte to process in source buffer. We MUST have: > + // src_current_end + 8 <= src_end. > + lzvn_offset src_current_end; > + // Next byte to encode in source buffer, may be before or after src_current. > + lzvn_offset src_literal; > + > + // Next byte to write in destination buffer > + unsigned char *dst; > + // Valid range in destination buffer: [dst_begin, dst_end - 1] > + unsigned char *dst_begin; > + unsigned char *dst_end; > + > + // Encoder state > + > + // Pending match > + lzvn_match_info pending; > + > + // Distance for last emitted match, or 0 > + lzvn_offset d_prev; > + > + // Hash table used to find matches. Stores LZVN_ENCODE_OFFSETS_PER_HASH 32-bit > + // signed indices in the source buffer, and the corresponding 4-byte values. > + // The number of entries in the table is LZVN_ENCODE_HASH_VALUES. > + lzvn_encode_entry_type *table; > + > +} lzvn_encoder_state; > + > +/*! @abstract Encode source to destination. > + * Update \p state. > + * The call ensures \c src_literal is never left too far behind \c src_current. */ > +void lzvn_encode(lzvn_encoder_state *state); > + > +#endif // LZVN_ENCODE_BASE_H > > -- > 2.48.1 >