Hi, I modified my code as suggested by Stephan and Eric. Moving the code from the header files into *.c source files slowed down the compression and decompression speed by a factor of up to 20. I made no changes to the code itself, that would explain, why it is so much slower. Signed-off-by: Benjamin Warnke <4bwarnke@xxxxxxxxxxxxxxxxxxxxxxxxx> --- crypto/Kconfig | 8 + crypto/Makefile | 1 + crypto/zbewalgo.c | 208 ++++++++++++++++ include/linux/zbewalgo.h | 59 +++++ lib/Kconfig | 3 + lib/Makefile | 1 + lib/zbewalgo/BWT.c | 111 +++++++++ lib/zbewalgo/BWT.h | 34 +++ lib/zbewalgo/JBE.c | 131 ++++++++++ lib/zbewalgo/JBE.h | 26 ++ lib/zbewalgo/JBE2.c | 135 +++++++++++ lib/zbewalgo/JBE2.h | 26 ++ lib/zbewalgo/MTF.c | 98 ++++++++ lib/zbewalgo/MTF.h | 26 ++ lib/zbewalgo/Makefile | 4 + lib/zbewalgo/RLE.c | 130 ++++++++++ lib/zbewalgo/RLE.h | 26 ++ lib/zbewalgo/bewalgo.c | 369 +++++++++++++++++++++++++++++ lib/zbewalgo/bewalgo.h | 26 ++ lib/zbewalgo/bewalgo2.c | 385 ++++++++++++++++++++++++++++++ lib/zbewalgo/bewalgo2.h | 26 ++ lib/zbewalgo/bitshuffle.c | 101 ++++++++ lib/zbewalgo/bitshuffle.h | 26 ++ lib/zbewalgo/huffman.c | 227 ++++++++++++++++++ lib/zbewalgo/huffman.h | 26 ++ lib/zbewalgo/include.h | 106 +++++++++ lib/zbewalgo/zbewalgo.c | 590 ++++++++++++++++++++++++++++++++++++++++++++++ 27 files changed, 2909 insertions(+) create mode 100644 crypto/zbewalgo.c create mode 100644 include/linux/zbewalgo.h create mode 100644 lib/zbewalgo/BWT.c create mode 100644 lib/zbewalgo/BWT.h create mode 100644 lib/zbewalgo/JBE.c create mode 100644 lib/zbewalgo/JBE.h create mode 100644 lib/zbewalgo/JBE2.c create mode 100644 lib/zbewalgo/JBE2.h create mode 100644 lib/zbewalgo/MTF.c create mode 100644 lib/zbewalgo/MTF.h create mode 100644 lib/zbewalgo/Makefile create mode 100644 lib/zbewalgo/RLE.c create mode 100644 lib/zbewalgo/RLE.h create mode 100644 lib/zbewalgo/bewalgo.c create mode 100644 lib/zbewalgo/bewalgo.h create mode 100644 lib/zbewalgo/bewalgo2.c create mode 100644 lib/zbewalgo/bewalgo2.h create mode 100644 lib/zbewalgo/bitshuffle.c create mode 100644 lib/zbewalgo/bitshuffle.h create mode 100644 lib/zbewalgo/huffman.c create mode 100644 lib/zbewalgo/huffman.h create mode 100644 lib/zbewalgo/include.h create mode 100644 lib/zbewalgo/zbewalgo.c diff --git a/crypto/Kconfig b/crypto/Kconfig index b44c0ae04..f63f543e9 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1667,6 +1667,14 @@ config CRYPTO_LZ4 help This is the LZ4 algorithm. +config CRYPTO_ZBEWALGO + tristate "ZBeWalgo compression algorithm" + select CRYPTO_ALGAPI + select CRYPTO_ACOMP2 + select ZBEWALGO_COMPRESS + help + This is the ZBeWalgo algorithm. + config CRYPTO_LZ4HC tristate "LZ4HC compression algorithm" select CRYPTO_ALGAPI diff --git a/crypto/Makefile b/crypto/Makefile index cdbc03b35..2a42fb289 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -121,6 +121,7 @@ obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif_common.o crct10dif_generic.o obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o obj-$(CONFIG_CRYPTO_LZO) += lzo.o obj-$(CONFIG_CRYPTO_LZ4) += lz4.o +obj-$(CONFIG_CRYPTO_ZBEWALGO) += zbewalgo.o obj-$(CONFIG_CRYPTO_LZ4HC) += lz4hc.o obj-$(CONFIG_CRYPTO_842) += 842.o obj-$(CONFIG_CRYPTO_RNG2) += rng.o diff --git a/crypto/zbewalgo.c b/crypto/zbewalgo.c new file mode 100644 index 000000000..3d12f9cdf --- /dev/null +++ b/crypto/zbewalgo.c @@ -0,0 +1,208 @@ +/* + * Cryptographic API. + * + * Copyright (c) 2018 Benjamin Warnke <4bwarnke@xxxxxxxxxxxxxxxxxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include <crypto/internal/scompress.h> +#include <linux/crypto.h> +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/vmalloc.h> +#include "linux/zbewalgo.h" + + +struct zbewalgo_ctx { + void *zbewalgo_comp_mem; +}; + +static void *zbewalgo_alloc_ctx(struct crypto_scomp *tfm) +{ + void *ctx; + + ctx = vmalloc(zbewalgo_get_wrkmem_size()); + if (!ctx) + return ERR_PTR(-ENOMEM); + return ctx; +} + +static int zbewalgo_init(struct crypto_tfm *tfm) +{ + struct zbewalgo_ctx *ctx = crypto_tfm_ctx(tfm); + + ctx->zbewalgo_comp_mem = zbewalgo_alloc_ctx(NULL); + if (IS_ERR(ctx->zbewalgo_comp_mem)) + return -ENOMEM; + return 0; +} + +static void zbewalgo_free_ctx(struct crypto_scomp *tfm, void *ctx) +{ + vfree(ctx); +} + +static void zbewalgo_exit(struct crypto_tfm *tfm) +{ + struct zbewalgo_ctx *ctx = crypto_tfm_ctx(tfm); + + zbewalgo_free_ctx(NULL, ctx->zbewalgo_comp_mem); +} + +static int __zbewalgo_compress_crypto( + const u8 *src, + unsigned int slen, + u8 *dst, + unsigned int *dlen, + void *ctx) +{ + int out_len; + + if (slen > 4096) + return -EINVAL; + out_len = zbewalgo_compress(src, dst, ctx, slen); + if (!out_len) + return -EINVAL; + *dlen = out_len; + return 0; +} + +static int zbewalgo_scompress( + struct crypto_scomp *tfm, + const u8 *src, + unsigned int slen, + u8 *dst, + unsigned int *dlen, + void *ctx) +{ + return __zbewalgo_compress_crypto(src, slen, dst, dlen, ctx); +} + +static int zbewalgo_compress_crypto( + struct crypto_tfm *tfm, + const u8 *src, + unsigned int slen, + u8 *dst, + unsigned int *dlen) +{ + struct zbewalgo_ctx *ctx = crypto_tfm_ctx(tfm); + + return __zbewalgo_compress_crypto( + src, + slen, + dst, + dlen, + ctx->zbewalgo_comp_mem); +} + +static int __zbewalgo_decompress_crypto( + const u8 *src, + unsigned int slen, + u8 *dst, + unsigned int *dlen, + void *ctx) +{ + int out_len; + + out_len = zbewalgo_decompress(src, dst, ctx); + if (out_len < 0) + return -EINVAL; + return 0; +} + +static int zbewalgo_sdecompress( + struct crypto_scomp *tfm, + const u8 *src, + unsigned int slen, + u8 *dst, + unsigned int *dlen, + void *ctx) +{ + return __zbewalgo_decompress_crypto(src, slen, dst, dlen, ctx); +} + +static int zbewalgo_decompress_crypto( + struct crypto_tfm *tfm, + const u8 *src, + unsigned int slen, + u8 *dst, + unsigned int *dlen) +{ + struct zbewalgo_ctx *ctx = crypto_tfm_ctx(tfm); + + return __zbewalgo_decompress_crypto( + src, + slen, + dst, + dlen, + ctx->zbewalgo_comp_mem); +} + +static struct crypto_alg crypto_alg_zbewalgo = { + .cra_name = "zbewalgo", + .cra_flags = CRYPTO_ALG_TYPE_COMPRESS, + .cra_ctxsize = sizeof(struct zbewalgo_ctx), + .cra_module = THIS_MODULE, + .cra_init = zbewalgo_init, + .cra_exit = zbewalgo_exit, + .cra_u = { + .compress = { + .coa_compress = zbewalgo_compress_crypto, + .coa_decompress = zbewalgo_decompress_crypto + } + } +}; + +static struct scomp_alg scomp = { + .alloc_ctx = zbewalgo_alloc_ctx, + .free_ctx = zbewalgo_free_ctx, + .compress = zbewalgo_scompress, + .decompress = zbewalgo_sdecompress, + .base = { + .cra_name = "zbewalgo", + .cra_driver_name = "zbewalgo-scomp", + .cra_module = THIS_MODULE, + } +}; + +static int __init zbewalgo_mod_init(void) +{ + int ret; + + ret = crypto_register_alg(&crypto_alg_zbewalgo); + if (ret) + return ret; + ret = crypto_register_scomp(&scomp); + if (ret) { + crypto_unregister_alg(&crypto_alg_zbewalgo); + return ret; + } + return ret; +} + +static void __exit zbewalgo_mod_fini(void) +{ + crypto_unregister_alg(&crypto_alg_zbewalgo); + crypto_unregister_scomp(&scomp); +} + +module_init(zbewalgo_mod_init); +module_exit(zbewalgo_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ZBeWalgo Compression Algorithm"); +MODULE_ALIAS_CRYPTO("zbewalgo"); diff --git a/include/linux/zbewalgo.h b/include/linux/zbewalgo.h new file mode 100644 index 000000000..6b0de177b --- /dev/null +++ b/include/linux/zbewalgo.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2018 Benjamin Warnke <4bwarnke@xxxxxxxxxxxxxxxxxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef ZBEWALGO_INCLUDE_H +#define ZBEWALGO_INCLUDE_H + +/* + * zbewalgo_get_wrkmem_size must be used to determine the size which is + * required for allocating the working memory for the compression and + * decompression algorithm + */ +int zbewalgo_get_wrkmem_size(void); + +/* + * this function compresses the data given by 'source' into the + * preallocated memory given by 'dest'. + * The maximum allowed source_length is 4096 Bytes. If larger values are + * given, the resulting data might be corrupt. + * If the data is not compressible the function returns -1. Otherwise the + * size of the compressed data is returned. + * wrkmem must point to a memory region of at least the size returned by + * 'zbewalgo_get_wrkmem_size'. + */ +int zbewalgo_compress( + const uint8_t * const source, + uint8_t * const dest, + uint8_t * const wrkmem, + const uint16_t source_length); + +/* + * this function decompresses data which was already successfully compressed + * with 'zbewalgo_compress'. + * the function decompresses the data given by 'source' into the preallocated + * buffer 'dest'. + * wrkmem must point to a memory region of at least the size returned by + * 'zbewalgo_get_wrkmem_size'. + * the wrkmem for compression and decompression dont need to be the same + */ +int zbewalgo_decompress( + const uint8_t * const source, + uint8_t * const dest, + uint8_t * const wrkmem); + +#endif diff --git a/lib/Kconfig b/lib/Kconfig index c5e84fbcb..ad72aeacc 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -239,6 +239,9 @@ config LZO_DECOMPRESS config LZ4_COMPRESS tristate +config ZBEWALGO_COMPRESS + tristate + config LZ4HC_COMPRESS tristate diff --git a/lib/Makefile b/lib/Makefile index d11c48ec8..a6a65c183 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -120,6 +120,7 @@ obj-$(CONFIG_LZO_DECOMPRESS) += lzo/ obj-$(CONFIG_LZ4_COMPRESS) += lz4/ obj-$(CONFIG_LZ4HC_COMPRESS) += lz4/ obj-$(CONFIG_LZ4_DECOMPRESS) += lz4/ +obj-$(CONFIG_ZBEWALGO_COMPRESS) += zbewalgo/ obj-$(CONFIG_ZSTD_COMPRESS) += zstd/ obj-$(CONFIG_ZSTD_DECOMPRESS) += zstd/ obj-$(CONFIG_XZ_DEC) += xz/ diff --git a/lib/zbewalgo/BWT.c b/lib/zbewalgo/BWT.c new file mode 100644 index 000000000..56920a9d1 --- /dev/null +++ b/lib/zbewalgo/BWT.c @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2018 Benjamin Warnke <4bwarnke@xxxxxxxxxxxxxxxxxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "BWT.h" + +/* + * implementation of the Burrows-Wheeler transformation. Optimized for speed + * and small input sizes + */ +static __always_inline int compress_bwt( + const uint8_t * const source, + uint8_t * const dest, + uint8_t * const wrkmem, + const uint16_t source_length) +{ + uint16_t * const C = (uint16_t *) wrkmem; + uint16_t i; + uint16_t alphabet; + uint8_t * const op = dest + 3; + *dest = source[source_length - 1]; + *((uint16_t *) (dest + 1)) = source_length; + memset(C, 0, 512); + for (i = 0; i < source_length; i++) + C[source[i]]++; + alphabet = (C[0] > 0); + for (i = 1; i < 256; i++) { + alphabet += (C[i] > 0); + C[i] += C[i - 1]; + } + if (alphabet > zbewalgo_bwt_max_alphabet) + return -1; + i = source_length - 1; + C[source[i]]--; + op[C[source[i]]] = source[i - 1]; + i--; + while (i > 0) { + C[source[i]]--; + op[C[source[i]]] = source[i - 1]; + i--; + } + C[source[0]]--; + op[C[source[0]]] = source[source_length - 1]; + return source_length + 3; +} + +/* + * reverses the transformation + */ +static __always_inline int decompress_bwt( + const uint8_t * const source, + uint8_t * const dest, + uint8_t * const wrkmem) +{ + const uint16_t dest_length = *((uint16_t *) (source + 1)); + uint16_t * const C = (uint16_t *) wrkmem; + uint8_t * const L = wrkmem + 512; + uint8_t key = *source; + uint8_t *dest_end = dest + dest_length; + const uint8_t *ip = source + 3; + int i, j; + + memset(C, 0, 512); + for (i = 0; i < dest_length; i++) + C[ip[i]]++; + for (i = 1; i < 256; i++) + C[i] += C[i - 1]; + j = 0; + for (i = 0; i < 256; i++) + while (j < C[i]) + L[j++] = i; + do { + C[key]--; + *--dest_end = L[C[key]]; + key = ip[C[key]]; + } while (dest < dest_end); + return dest_length; +} + +static int init_bwt(void) +{ + return 0; +} + +static void exit_bwt(void) +{ +} + +struct zbewalgo_alg alg_bwt = { + .name = "bwt", + .flags = ZBEWALGO_ALG_FLAG_TRANSFORM, + .wrkmem_size = PAGE_SIZE << 1, + .init = init_bwt, + .exit = exit_bwt, + .compress = compress_bwt, + .decompress = decompress_bwt +}; diff --git a/lib/zbewalgo/BWT.h b/lib/zbewalgo/BWT.h new file mode 100644 index 000000000..76262e239 --- /dev/null +++ b/lib/zbewalgo/BWT.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2018 Benjamin Warnke <4bwarnke@xxxxxxxxxxxxxxxxxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef ZBEWALGO_BWT_H +#define ZBEWALGO_BWT_H + +#include "include.h" + +extern struct zbewalgo_alg alg_bwt; + +/* + * If more than the specified number of chars are to be transformed, + * it is unlikely that the compression will achieve high ratios. + * As a consequence the Burrows-Wheeler Transformation will abort if the number + * of different bytes exeeds this value. + */ +static unsigned long zbewalgo_bwt_max_alphabet = 90; + +#endif diff --git a/lib/zbewalgo/JBE.c b/lib/zbewalgo/JBE.c new file mode 100644 index 000000000..4bf4de609 --- /dev/null +++ b/lib/zbewalgo/JBE.c @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2018 Benjamin Warnke <4bwarnke@xxxxxxxxxxxxxxxxxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "JBE.h" + +/* + * Implementation of the J-Bit encoding as published by + * 'I Made Agus Dwi Suarjaya' 2012 + */ +static __always_inline int compress_jbe( + const uint8_t * const source, + uint8_t * const dest, + uint8_t * const wrkmem, + const uint16_t source_length) +{ + uint64_t source_tmp; + uint8_t tmp; + const uint64_t *source_end = (uint64_t *) (source + source_length); + const uint64_t *ip = (uint64_t *) source; + uint8_t *dataI = dest + 2; + uint8_t *dataII = dataI + DIV_BY_8_ROUND_UP(source_length); + uint8_t * const source_tmp_ptr = (uint8_t *) (&source_tmp); + *(uint16_t *) dest = source_length; + do { + source_tmp = *ip++; + tmp = source_tmp_ptr[0] > 0; + *dataII = source_tmp_ptr[0]; + *dataI = tmp << 7; + dataII += tmp; + tmp = source_tmp_ptr[1] > 0; + *dataII = source_tmp_ptr[1]; + *dataI |= tmp << 6; + dataII += tmp; + tmp = source_tmp_ptr[2] > 0; + *dataII = source_tmp_ptr[2]; + *dataI |= tmp << 5; + dataII += tmp; + tmp = source_tmp_ptr[3] > 0; + *dataII = source_tmp_ptr[3]; + *dataI |= tmp << 4; + dataII += tmp; + tmp = source_tmp_ptr[4] > 0; + *dataII = source_tmp_ptr[4]; + *dataI |= tmp << 3; + dataII += tmp; + tmp = source_tmp_ptr[5] > 0; + *dataII = source_tmp_ptr[5]; + *dataI |= tmp << 2; + dataII += tmp; + tmp = source_tmp_ptr[6] > 0; + *dataII = source_tmp_ptr[6]; + *dataI |= tmp << 1; + dataII += tmp; + tmp = source_tmp_ptr[7] > 0; + *dataII = source_tmp_ptr[7]; + *dataI |= tmp; + dataII += tmp; + dataI++; + } while (ip < source_end); + return dataII - dest; +} + +static __always_inline int decompress_jbe( + const uint8_t * const source, + uint8_t * const dest, + uint8_t * const wrkmem) +{ + uint64_t dest_tmp; + const uint16_t dest_length = *(uint16_t *) source; + const uint8_t *dataI = source + 2; + const uint8_t *dataII = dataI + DIV_BY_8_ROUND_UP(dest_length); + const uint64_t *dest_end = (uint64_t *) (dest + dest_length); + uint8_t * const dest_tmp_ptr = (uint8_t *) (&dest_tmp); + uint64_t *op = (uint64_t *) dest; + + do { + dest_tmp_ptr[0] = ((*dataI & 0x80) ? *dataII : 0); + dataII += (*dataI & 0x80) > 0; + dest_tmp_ptr[1] = ((*dataI & 0x40) ? *dataII : 0); + dataII += (*dataI & 0x40) > 0; + dest_tmp_ptr[2] = ((*dataI & 0x20) ? *dataII : 0); + dataII += (*dataI & 0x20) > 0; + dest_tmp_ptr[3] = ((*dataI & 0x10) ? *dataII : 0); + dataII += (*dataI & 0x10) > 0; + dest_tmp_ptr[4] = ((*dataI & 0x08) ? *dataII : 0); + dataII += (*dataI & 0x08) > 0; + dest_tmp_ptr[5] = ((*dataI & 0x04) ? *dataII : 0); + dataII += (*dataI & 0x04) > 0; + dest_tmp_ptr[6] = ((*dataI & 0x02) ? *dataII : 0); + dataII += (*dataI & 0x02) > 0; + dest_tmp_ptr[7] = ((*dataI & 0x01) ? *dataII : 0); + dataII += (*dataI & 0x01) > 0; + dataI++; + *op++ = dest_tmp; + } while (op < dest_end); + return dest_length; +} + +static int init_jbe(void) +{ + return 0; +} + +static void exit_jbe(void) +{ +} + +struct zbewalgo_alg alg_jbe = { + .name = "jbe", + .flags = ZBEWALGO_ALG_FLAG_COMPRESS, + .wrkmem_size = 0, + .init = init_jbe, + .exit = exit_jbe, + .compress = compress_jbe, + .decompress = decompress_jbe +}; diff --git a/lib/zbewalgo/JBE.h b/lib/zbewalgo/JBE.h new file mode 100644 index 000000000..5bc766ac5 --- /dev/null +++ b/lib/zbewalgo/JBE.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2018 Benjamin Warnke <4bwarnke@xxxxxxxxxxxxxxxxxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef ZBEWALGO_JBE_H +#define ZBEWALGO_JBE_H + +#include "include.h" + +extern struct zbewalgo_alg alg_jbe; + +#endif diff --git a/lib/zbewalgo/JBE2.c b/lib/zbewalgo/JBE2.c new file mode 100644 index 000000000..51f828c46 --- /dev/null +++ b/lib/zbewalgo/JBE2.c @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2018 Benjamin Warnke <4bwarnke@xxxxxxxxxxxxxxxxxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "JBE2.h" + +/* + * Variant of the J-Bit encoding published by 'I Made Agus Dwi Suarjaya' 2012 + */ +static __always_inline int compress_jbe2( + const uint8_t * const source, + uint8_t * const dest, + uint8_t * const wrkmem, + const uint16_t source_length) +{ + uint64_t source_tmp; + uint8_t tmp; + const uint64_t *source_end = (uint64_t *) (source + source_length); + const uint64_t *ip = (uint64_t *) source; + uint8_t *dataI = dest + 2; + uint8_t *dataII = dataI + DIV_BY_8_ROUND_UP(source_length); + uint8_t * const source_tmp_ptr = (uint8_t *) (&source_tmp); + *(uint16_t *) dest = source_length; + do { + source_tmp = *ip++; + source_tmp = (source_tmp & 0xF0F0F0F00F0F0F0F) + | ((source_tmp & 0x0F0F0F0F00000000) >> 28) + | ((source_tmp & 0x00000000F0F0F0F0) << 28); + tmp = source_tmp_ptr[0] > 0; + *dataII = source_tmp_ptr[0]; + *dataI = tmp << 7; + dataII += tmp; + tmp = source_tmp_ptr[1] > 0; + *dataII = source_tmp_ptr[1]; + *dataI |= tmp << 6; + dataII += tmp; + tmp = source_tmp_ptr[2] > 0; + *dataII = source_tmp_ptr[2]; + *dataI |= tmp << 5; + dataII += tmp; + tmp = source_tmp_ptr[3] > 0; + *dataII = source_tmp_ptr[3]; + *dataI |= tmp << 4; + dataII += tmp; + tmp = source_tmp_ptr[4] > 0; + *dataII = source_tmp_ptr[4]; + *dataI |= tmp << 3; + dataII += tmp; + tmp = source_tmp_ptr[5] > 0; + *dataII = source_tmp_ptr[5]; + *dataI |= tmp << 2; + dataII += tmp; + tmp = source_tmp_ptr[6] > 0; + *dataII = source_tmp_ptr[6]; + *dataI |= tmp << 1; + dataII += tmp; + tmp = source_tmp_ptr[7] > 0; + *dataII = source_tmp_ptr[7]; + *dataI |= tmp; + dataII += tmp; + dataI++; + } while (ip < source_end); + return dataII - dest; +} + +static __always_inline int decompress_jbe2( + const uint8_t * const source, + uint8_t * const dest, + uint8_t * const wrkmem) +{ + uint64_t dest_tmp; + const uint16_t dest_length = *(uint16_t *) source; + const uint8_t *dataI = source + 2; + const uint8_t *dataII = dataI + DIV_BY_8_ROUND_UP(dest_length); + const uint64_t *dest_end = (uint64_t *) (dest + dest_length); + uint8_t * const dest_tmp_ptr = (uint8_t *) (&dest_tmp); + uint64_t *op = (uint64_t *) dest; + + do { + dest_tmp_ptr[0] = ((*dataI & 0x80) ? *dataII : 0); + dataII += (*dataI & 0x80) > 0; + dest_tmp_ptr[1] = ((*dataI & 0x40) ? *dataII : 0); + dataII += (*dataI & 0x40) > 0; + dest_tmp_ptr[2] = ((*dataI & 0x20) ? *dataII : 0); + dataII += (*dataI & 0x20) > 0; + dest_tmp_ptr[3] = ((*dataI & 0x10) ? *dataII : 0); + dataII += (*dataI & 0x10) > 0; + dest_tmp_ptr[4] = ((*dataI & 0x08) ? *dataII : 0); + dataII += (*dataI & 0x08) > 0; + dest_tmp_ptr[5] = ((*dataI & 0x04) ? *dataII : 0); + dataII += (*dataI & 0x04) > 0; + dest_tmp_ptr[6] = ((*dataI & 0x02) ? *dataII : 0); + dataII += (*dataI & 0x02) > 0; + dest_tmp_ptr[7] = ((*dataI & 0x01) ? *dataII : 0); + dataII += (*dataI & 0x01) > 0; + dataI++; + dest_tmp = (dest_tmp & 0xF0F0F0F00F0F0F0F) + | ((dest_tmp & 0x0F0F0F0F00000000) >> 28) + | ((dest_tmp & 0x00000000F0F0F0F0) << 28); + *op++ = dest_tmp; + } while (op < dest_end); + return dest_length; +} +static int init_jbe2(void) +{ + return 0; +} + +static void exit_jbe2(void) +{ +} + +struct zbewalgo_alg alg_jbe2 = { + .name = "jbe2", + .flags = ZBEWALGO_ALG_FLAG_COMPRESS, + .wrkmem_size = 0, + .init = init_jbe2, + .exit = exit_jbe2, + .compress = compress_jbe2, + .decompress = decompress_jbe2 +}; diff --git a/lib/zbewalgo/JBE2.h b/lib/zbewalgo/JBE2.h new file mode 100644 index 000000000..54da22f2a --- /dev/null +++ b/lib/zbewalgo/JBE2.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2018 Benjamin Warnke <4bwarnke@xxxxxxxxxxxxxxxxxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef ZBEWALGO_JBE2_H +#define ZBEWALGO_JBE2_H + +#include "include.h" + +extern struct zbewalgo_alg alg_jbe2; + +#endif diff --git a/lib/zbewalgo/MTF.c b/lib/zbewalgo/MTF.c new file mode 100644 index 000000000..a40b63575 --- /dev/null +++ b/lib/zbewalgo/MTF.c @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2018 Benjamin Warnke <4bwarnke@xxxxxxxxxxxxxxxxxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "MTF.h" + +/* + * Move To Front encoding + */ +static __always_inline int compress_mtf( + const uint8_t * const source, + uint8_t * const dest, + uint8_t * const wrkmem, + const uint16_t source_length) +{ + const uint8_t *source_end = source + source_length; + const uint8_t *ip = source; + uint8_t *op = dest + 2; + uint16_t i; + uint8_t tmp; + *(uint16_t *) dest = source_length; + for (i = 0; i < 256; i++) + wrkmem[i] = i; + do { + i = 0; + while (wrkmem[i] != *ip) + i++; + ip++; + *op++ = i; + tmp = wrkmem[i]; + while (i > 0) { + wrkmem[i] = wrkmem[i - 1]; + i--; + } + wrkmem[0] = tmp; + } while (ip < source_end); + return source_length + 2; +} + +static __always_inline int decompress_mtf( + const uint8_t * const source, + uint8_t * const dest, + uint8_t * const wrkmem) +{ + const uint16_t dest_length = *(uint16_t *) source; + uint8_t *dest_end = dest + dest_length; + uint16_t i; + uint8_t tmp; + const uint8_t *ip = source + 2; + uint8_t *op = dest; + + for (i = 0; i < 256; i++) + wrkmem[i] = i; + do { + i = *ip++; + *op++ = wrkmem[i]; + tmp = wrkmem[i]; + while (i > 0) { + wrkmem[i] = wrkmem[i - 1]; + i--; + } + wrkmem[0] = tmp; + } while (op < dest_end); + return dest_length; +} + +static int init_mtf(void) +{ + return 0; +} + +static void exit_mtf(void) +{ +} + +struct zbewalgo_alg alg_mtf = { + .name = "mtf", + .flags = ZBEWALGO_ALG_FLAG_TRANSFORM, + .wrkmem_size = 1 << 8, + .init = init_mtf, + .exit = exit_mtf, + .compress = compress_mtf, + .decompress = decompress_mtf +}; diff --git a/lib/zbewalgo/MTF.h b/lib/zbewalgo/MTF.h new file mode 100644 index 000000000..cc2f2a612 --- /dev/null +++ b/lib/zbewalgo/MTF.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2018 Benjamin Warnke <4bwarnke@xxxxxxxxxxxxxxxxxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef ZBEWALGO_MTF_H +#define ZBEWALGO_MTF_H + +#include "include.h" + +extern struct zbewalgo_alg alg_mtf; + +#endif diff --git a/lib/zbewalgo/Makefile b/lib/zbewalgo/Makefile new file mode 100644 index 000000000..dc015a01b --- /dev/null +++ b/lib/zbewalgo/Makefile @@ -0,0 +1,4 @@ +ccflags-y += -O3 -fno-tree-vrp -Werror + +obj-$(CONFIG_ZBEWALGO_COMPRESS) += zbewalgo_compress.o +zbewalgo_compress-objs := zbewalgo.o bewalgo.o bewalgo2.o bitshuffle.o BWT.o huffman.o JBE.o JBE2.o MTF.o RLE.o diff --git a/lib/zbewalgo/RLE.c b/lib/zbewalgo/RLE.c new file mode 100644 index 000000000..b2d35b434 --- /dev/null +++ b/lib/zbewalgo/RLE.c @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2018 Benjamin Warnke <4bwarnke@xxxxxxxxxxxxxxxxxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "RLE.h" + +#define RLE_REPEAT 0x80 +#define RLE_SIMPLE 0x00 +#define RLE_MAX_LENGTH ((1 << 7) - 1) + + +/* + * Run Length Encoder + */ +static __always_inline int compress_rle( + const uint8_t * const source, + uint8_t * const dest, + uint8_t * const wrkmem, + const uint16_t source_length) +{ + const uint8_t *anchor = source; + const uint8_t *source_end = source + source_length; + unsigned int count; + uint8_t lastval; + uint8_t *op = dest + 2; + const uint8_t *ip = source; + *((uint16_t *) dest) = source_length; + while (1) { + // RLE_SIMPLE + do { + lastval = *ip++; + } while ((ip < source_end) && (lastval != *ip)); + count = ip - anchor - (ip < source_end); + if (count > 0) { + while (count > RLE_MAX_LENGTH) { + *op++ = RLE_SIMPLE | RLE_MAX_LENGTH; + memcpy(op, anchor, RLE_MAX_LENGTH + 1); + anchor += RLE_MAX_LENGTH + 1; + op += RLE_MAX_LENGTH + 1; + count -= RLE_MAX_LENGTH + 1; + } + if (count > 0) { + *op++ = RLE_SIMPLE | (count - 1); + memcpy(op, anchor, count); + anchor += count; + op += count; + } + } + if (ip == source_end) + return op - dest; + // RLE_REPEAT + do { + lastval = *ip++; + } while ((ip < source_end) && (lastval == *ip)); + count = ip - anchor; + if (count > 0) { + anchor += count; + while (count > RLE_MAX_LENGTH) { + *op++ = RLE_REPEAT | RLE_MAX_LENGTH; + *op++ = lastval; + count -= RLE_MAX_LENGTH + 1; + } + if (count > 0) { + *op++ = RLE_REPEAT | (count - 1); + *op++ = lastval; + } + } + if (ip == source_end) + return op - dest; + } +} + +static __always_inline int decompress_rle( + const uint8_t * const source, + uint8_t * const dest, + uint8_t * const wrkmem) +{ + const uint16_t source_length = *((uint16_t *) source); + const uint8_t *dest_end = dest + source_length; + unsigned int length; + const uint8_t *ip = source + 2; + uint8_t *op = dest; + + do { + if ((*ip & RLE_REPEAT) == RLE_REPEAT) { + length = *ip++ - RLE_REPEAT + 1; + memset(op, *ip++, length); + op += length; + } else { + length = *ip++ - RLE_SIMPLE + 1; + memcpy(op, ip, length); + ip += length; + op += length; + } + } while (op < dest_end); + return op - dest; +} + +static int init_rle(void) +{ + return 0; +} + +static void exit_rle(void) +{ +} + +struct zbewalgo_alg alg_rle = { + .name = "rle", + .flags = ZBEWALGO_ALG_FLAG_COMPRESS, + .wrkmem_size = 0, + .init = init_rle, + .exit = exit_rle, + .compress = compress_rle, + .decompress = decompress_rle +}; diff --git a/lib/zbewalgo/RLE.h b/lib/zbewalgo/RLE.h new file mode 100644 index 000000000..09a806ab4 --- /dev/null +++ b/lib/zbewalgo/RLE.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2018 Benjamin Warnke <4bwarnke@xxxxxxxxxxxxxxxxxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef ZBEWALGO_RLE_H +#define ZBEWALGO_RLE_H + +#include "include.h" + +extern struct zbewalgo_alg alg_rle; + +#endif diff --git a/lib/zbewalgo/bewalgo.c b/lib/zbewalgo/bewalgo.c new file mode 100644 index 000000000..34833a2f7 --- /dev/null +++ b/lib/zbewalgo/bewalgo.c @@ -0,0 +1,369 @@ +/* + * Copyright (c) 2018 Benjamin Warnke <4bwarnke@xxxxxxxxxxxxxxxxxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "bewalgo.h" + +#define BEWALGO_ACCELERATION_DEFAULT 1 + +#define BEWALGO_MEMORY_USAGE 14 + +#define BEWALGO_SKIPTRIGGER 6 + +#define BEWALGO_LENGTH_BITS 8 +#define BEWALGO_LENGTH_MAX ((1 << BEWALGO_LENGTH_BITS) - 1) +#define BEWALGO_OFFSET_BITS 16 +#define BEWALGO_OFFSET_MAX ((1 << BEWALGO_OFFSET_BITS) - 1) + +#define BEWALGO_HASHLOG (BEWALGO_MEMORY_USAGE - 2) +#define BEWALGO_HASH_SIZE_uint32_t (1 << BEWALGO_HASHLOG) +struct bewalgo_compress_internal { + uint32_t table[BEWALGO_HASH_SIZE_uint32_t]; +}; + +#define bewalgo_copy_helper_src(dest, src, target) \ +do { \ + while ((src) < (target) - 3) { \ + (dest)[0] = (src)[0]; \ + (dest)[1] = (src)[1]; \ + (dest)[2] = (src)[2]; \ + (dest)[3] = (src)[3]; \ + (dest) += 4; \ + (src) += 4; \ + } \ + if ((src) < (target) - 1) { \ + (dest)[0] = (src)[0]; \ + (dest)[1] = (src)[1]; \ + (dest) += 2; \ + (src) += 2; \ + } \ + if ((src) < (target)) \ + *(dest)++ = *(src)++; \ +} while (0) + +static __always_inline int decompress_bewalgo( + const uint8_t * const source_, + uint8_t * const dest_, + uint8_t * const wrkmem) +{ + const uint64_t * const source = (const uint64_t *) source_; + uint64_t * const dest = (uint64_t *) dest_; + const uint64_t *ip; + uint64_t *match; + uint64_t *op; + const uint64_t *dest_end_ptr; + const uint8_t *controll_block_ptr; + const uint64_t *target; + uint32_t to_read; + uint32_t avail; + const uint16_t dest_length = *(uint16_t *) source; + const uint32_t dest_end = DIV_BY_8_ROUND_UP(dest_length); + + ip = (uint64_t *) (((uint8_t *) source) + 2); + controll_block_ptr = (uint8_t *) ip; + match = op = dest; + dest_end_ptr = dest_end + dest; + do { + if (unlikely(dest_end_ptr <= op)) + goto _last_control_block; + controll_block_ptr = (uint8_t *) ip; + ip++; + target = ip + controll_block_ptr[0]; + bewalgo_copy_helper_src(op, ip, target); + match = op - ((uint16_t *) controll_block_ptr)[1]; + target = match + controll_block_ptr[1]; + bewalgo_copy_helper_src(op, match, target); + target = ip + controll_block_ptr[4]; + bewalgo_copy_helper_src(op, ip, target); + match = op - ((uint16_t *) controll_block_ptr)[3]; + target = match + controll_block_ptr[5]; + bewalgo_copy_helper_src(op, match, target); + } while (1); +_last_control_block: + to_read = controll_block_ptr[0]; + avail = dest_end_ptr - op; + target = ip + min_t(uint32_t, to_read, avail); + bewalgo_copy_helper_src(op, ip, target); + match = op - ((uint16_t *) controll_block_ptr)[1]; + to_read = controll_block_ptr[1]; + avail = dest_end_ptr - op; + target = match + min_t(uint32_t, to_read, avail); + bewalgo_copy_helper_src(op, match, target); + to_read = controll_block_ptr[4]; + avail = dest_end_ptr - op; + target = ip + min_t(uint32_t, to_read, avail); + bewalgo_copy_helper_src(op, ip, target); + match = op - ((uint16_t *) controll_block_ptr)[3]; + to_read = controll_block_ptr[5]; + avail = dest_end_ptr - op; + target = match + min_t(uint32_t, to_read, avail); + bewalgo_copy_helper_src(op, match, target); + return (op - dest) << 3; +} + +static __always_inline uint32_t bewalgo_compress_hash(uint64_t sequence) +{ + return (uint32_t) ( + ((sequence >> 24) * 11400714785074694791ULL) + >> (64 - BEWALGO_HASHLOG)); +} + +static __always_inline int compress_bewalgo_( + struct bewalgo_compress_internal *wrkmem, + const uint64_t * const source, + uint64_t * const dest, + const uint16_t source_length, + uint8_t acceleration) +{ + const int acceleration_start = + (acceleration < 4 ? 32 >> acceleration : 4); + const uint64_t * const dest_end_ptr = + dest + zbewalgo_max_output_size; + const uint32_t source_end = + (source_length >> 3) + ((source_length & 7) > 0); + const uint64_t * const source_end_ptr = source + source_end; + const uint64_t * const source_end_ptr_1 = source_end_ptr - 1; + const uint64_t *match = source; + const uint64_t *anchor = source; + const uint64_t *ip = source; + uint64_t *op = (uint64_t *) (((uint8_t *) dest) + 2); + uint8_t *op_control = NULL; + uint32_t op_control_available = 0; + const uint64_t *target; + int length; + uint16_t offset; + uint32_t h; + int j; + int tmp_literal_length; + int match_nodd; + int match_nzero_nodd; + *(uint16_t *) dest = source_length; + memset(wrkmem, 0, sizeof(struct bewalgo_compress_internal)); + h = bewalgo_compress_hash(*ip); + wrkmem->table[h] = 0; + do { + j = acceleration_start; + length = source_end_ptr_1 - ip; + j = j < length ? j : length; + for (length = 1; length <= j; length++) { + ip++; + h = bewalgo_compress_hash(*ip); + match = source + wrkmem->table[h]; + wrkmem->table[h] = ip - source; + if (*(uint64_t *) match == *(uint64_t *) ip) + goto _find_match_left; + } + length = acceleration_start + + (acceleration << BEWALGO_SKIPTRIGGER); + ip++; + do { + if (unlikely(ip >= source_end_ptr)) + goto _encode_last_literal; + h = bewalgo_compress_hash(*ip); + match = source + wrkmem->table[h]; + wrkmem->table[h] = ip - source; + if (*(uint64_t *) match == *(uint64_t *) ip) + goto _find_match_left; + ip += (length++ >> BEWALGO_SKIPTRIGGER); + } while (1); +_find_match_left: + while ((match != source) && (match[-1] == ip[-1])) { + ip--; + match--; + if (ip == anchor) + goto _find_match_right; + } + length = ip - anchor; + tmp_literal_length = length + - (op_control_available ? BEWALGO_LENGTH_MAX : 0); + if (unlikely(op + + ((tmp_literal_length / (BEWALGO_LENGTH_MAX * 2))) + + ((tmp_literal_length % (BEWALGO_LENGTH_MAX * 2)) > 0) + + length > dest_end_ptr)) { + goto _error; + } + while (length > BEWALGO_LENGTH_MAX) { + if (op_control_available == 0) { + op_control = (uint8_t *) op; + *op++ = 0; + } + op_control_available = !op_control_available; + *op_control = BEWALGO_LENGTH_MAX; + op_control += 4; + target = anchor + BEWALGO_LENGTH_MAX; + bewalgo_copy_helper_src(op, anchor, target); + length -= BEWALGO_LENGTH_MAX; + } + if (likely(length > 0)) { + if (op_control_available == 0) { + op_control = (uint8_t *) op; + *op++ = 0; + } + op_control_available = !op_control_available; + *op_control = length; + op_control += 4; + bewalgo_copy_helper_src(op, anchor, ip); + } +_find_match_right: + do { + ip++; + match++; + } while ((ip < source_end_ptr) && (*match == *ip)); + length = ip - anchor; + offset = ip - match; + anchor = ip; + if (length > BEWALGO_LENGTH_MAX) { + uint32_t control_match_value = + (BEWALGO_LENGTH_MAX << 8) | (offset << 16); + size_t match_length_div_255 = length / 255; + size_t match_length_mod_255 = length % 255; + uint32_t match_zero = match_length_mod_255 == 0; + uint32_t match_nzero = !match_zero; + int control_blocks_needed = match_length_div_255 + + match_nzero + - op_control_available; + if (unlikely((op + + (control_blocks_needed >> 1) + + (control_blocks_needed & 1) + ) > dest_end_ptr)) + goto _error; + op_control = op_control_available > 0 + ? op_control + : (uint8_t *) op; + *((uint32_t *) op_control) = control_match_value; + match_length_div_255 -= op_control_available > 0; + match_nodd = !(match_length_div_255 & 1); + match_nzero_nodd = match_nzero && match_nodd; + if (match_length_div_255 > 0) { + uint64_t control_match_value_long = + control_match_value + | (((uint64_t) control_match_value) + << 32); + target = op + + (match_length_div_255 >> 1) + + (match_length_div_255 & 1); + do { + *op++ = control_match_value_long; + } while (op < target); + } + op_control = ((uint8_t *) op) - 4; + *(uint32_t *) (op_control + + (match_nzero_nodd << 3)) = 0; + *(uint32_t *) (op_control + + (match_nzero_nodd << 2)) = 0; + *(op_control + (match_nzero_nodd << 2) + 1) = + (match_zero & match_nodd) + ? BEWALGO_LENGTH_MAX + : match_length_mod_255; + *(uint16_t *) (op_control + + (match_nzero_nodd << 2) + 2) = offset; + op_control += match_nzero_nodd << 3; + op += match_nzero_nodd; + op_control_available = + (match_length_div_255 & 1) == match_zero; + } else { + if (unlikely((op_control_available == 0) + && (op >= dest_end_ptr) + && (op_control[-3] != 0))) + goto _error; + if (op_control[-3] != 0) { + if (op_control_available == 0) { + op_control = (uint8_t *) op; + *op++ = 0; + } + op_control_available = !op_control_available; + op_control += 4; + } + op_control[-3] = length; + ((uint16_t *) op_control)[-1] = offset; + } + if (unlikely(ip == source_end_ptr)) + goto _finish; + h = bewalgo_compress_hash(*ip); + match = source + wrkmem->table[h]; + wrkmem->table[h] = ip - source; + if (*(uint64_t *) match == *(uint64_t *) ip) + goto _find_match_right; + } while (1); +_encode_last_literal: + length = source_end_ptr - anchor; + tmp_literal_length = length + - (op_control_available ? BEWALGO_LENGTH_MAX : 0); + if (op + + ((tmp_literal_length / (BEWALGO_LENGTH_MAX * 2))) + + ((tmp_literal_length % (BEWALGO_LENGTH_MAX * 2)) > 0) + + length > dest_end_ptr) + goto _error; + while (length > BEWALGO_LENGTH_MAX) { + if (op_control_available == 0) { + op_control = (uint8_t *) op; + *op++ = 0; + } + op_control_available = !op_control_available; + *op_control = BEWALGO_LENGTH_MAX; + op_control += 4; + target = anchor + BEWALGO_LENGTH_MAX; + bewalgo_copy_helper_src(op, anchor, target); + length -= BEWALGO_LENGTH_MAX; + } + if (length > 0) { + if (op_control_available == 0) { + op_control = (uint8_t *) op; + *op++ = 0; + } + op_control_available = !op_control_available; + *op_control = length; + op_control += 4; + bewalgo_copy_helper_src(op, anchor, source_end_ptr); + } +_finish: + return ((uint8_t *) op) - ((uint8_t *) dest); +_error: + return -1; +} + +static __always_inline int compress_bewalgo( + const uint8_t * const source, + uint8_t * const dest, + uint8_t * const wrkmem, + const uint16_t source_length) +{ + return compress_bewalgo_( + (struct bewalgo_compress_internal *) wrkmem, + (const uint64_t *) source, + (uint64_t *) dest, + source_length, 1); +} + +static int init_bewalgo(void) +{ + return 0; +} + +static void exit_bewalgo(void) +{ +} + +struct zbewalgo_alg alg_bewalgo = { + .name = "bewalgo", + .flags = ZBEWALGO_ALG_FLAG_COMPRESS, + .wrkmem_size = sizeof(struct bewalgo_compress_internal), + .init = init_bewalgo, + .exit = exit_bewalgo, + .compress = compress_bewalgo, + .decompress = decompress_bewalgo +}; diff --git a/lib/zbewalgo/bewalgo.h b/lib/zbewalgo/bewalgo.h new file mode 100644 index 000000000..c8e0f163b --- /dev/null +++ b/lib/zbewalgo/bewalgo.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2018 Benjamin Warnke <4bwarnke@xxxxxxxxxxxxxxxxxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef ZBEWALGO_BEWALGO_H +#define ZBEWALGO_BEWALGO_H + +#include "include.h" + +extern struct zbewalgo_alg alg_bewalgo; + +#endif diff --git a/lib/zbewalgo/bewalgo2.c b/lib/zbewalgo/bewalgo2.c new file mode 100644 index 000000000..12027c623 --- /dev/null +++ b/lib/zbewalgo/bewalgo2.c @@ -0,0 +1,385 @@ +/* + * Copyright (c) 2018 Benjamin Warnke <4bwarnke@xxxxxxxxxxxxxxxxxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "bewalgo2.h" + +#define MAX_LITERALS ((zbewalgo_max_output_size >> 3) - 4) + +#define OFFSET_BITS 9 +#define OFFSET_SHIFT (16 - OFFSET_BITS) +#define MATCH_BIT_SHIFT 6 +#define MATCH_BIT_MASK (1 << MATCH_BIT_SHIFT) +#define LENGTH_BITS 6 +#define LENGTH_MASK ((1 << LENGTH_BITS) - 1) +#define LEFT 0 +#define RIGHT 1 +#define NEITHER 2 +#define TREE_NODE_NULL 0xffff + + +/* + * insert first node into an empty avl-tree + * the function returns the index of the node in the preallocated array + */ +static __always_inline int avl_insert_first( + uint64_t *wrk_literal, + uint16_t *wrk_tree, + uint16_t *treep, + uint64_t target, + uint16_t *treesize) +{ + uint16_t g_a_tree, g_o_tree2; + + g_a_tree = *treesize; + g_o_tree2 = g_a_tree << 2; + *treesize = *treesize + 1; + wrk_tree[g_o_tree2 + 0] = TREE_NODE_NULL; + wrk_tree[g_o_tree2 + 1] = TREE_NODE_NULL; + wrk_tree[g_o_tree2 + 2] = NEITHER; + wrk_literal[g_a_tree] = target; + *treep = g_a_tree; + return g_a_tree; +} + +/* + * insert a node into a non-empty avl-tree + * for speed, the nodes are saved in preallocated arrays + * the function returns the index of the node in the preallocated array + */ +static __always_inline int avl_insert( + uint64_t *wrk_literal, + uint16_t *wrk_tree, + uint16_t *treep, + uint64_t target, + uint16_t *treesize) +{ + uint16_t *path_top[2]; + uint16_t g_a_tree, g_o_tree2, g_o_next_step; + uint16_t r_1_arr[10]; + uint16_t path, path2, first[2], second; + + if (unlikely(target == wrk_literal[*treep])) + return *treep; + path_top[0] = treep; + g_o_next_step = target > wrk_literal[*treep]; + g_o_tree2 = *treep << 2; + path_top[wrk_tree[g_o_tree2 + 2] == NEITHER] = treep; + treep = &wrk_tree[g_o_tree2 + g_o_next_step]; + if (unlikely(*treep == TREE_NODE_NULL)) + goto _insert_new_node; +_search_node: + if (target == wrk_literal[*treep]) + return *treep; + g_o_next_step = target > wrk_literal[*treep]; + g_o_tree2 = *treep << 2; + path_top[wrk_tree[g_o_tree2 + 2] == NEITHER] = treep; + treep = &wrk_tree[g_o_tree2 + g_o_next_step]; + if (likely(*treep != TREE_NODE_NULL)) + goto _search_node; +_insert_new_node: + g_a_tree = *treesize; + g_o_tree2 = g_a_tree << 2; + *treesize = *treesize + 1; + wrk_tree[g_o_tree2 + 0] = TREE_NODE_NULL; + wrk_tree[g_o_tree2 + 1] = TREE_NODE_NULL; + wrk_tree[g_o_tree2 + 2] = NEITHER; + wrk_literal[g_a_tree] = target; + *treep = g_a_tree; + path = *path_top[0]; + path2 = path << 2; + if (wrk_tree[path2 + 2] == NEITHER) { + while (1) { + r_1_arr[0] = target > wrk_literal[path]; + wrk_tree[path2 + 2] = r_1_arr[0]; + path = wrk_tree[path2 + r_1_arr[0]]; + if (target == wrk_literal[path]) + return g_a_tree; + path2 = path << 2; + } + } + first[0] = target > wrk_literal[path]; + if (wrk_tree[path2 + 2] != first[0]) { + wrk_tree[path2 + 2] = NEITHER; + r_1_arr[2] = wrk_tree[path2 + first[0]]; + if (target == wrk_literal[r_1_arr[2]]) + return g_a_tree; + while (1) { + r_1_arr[0] = target > wrk_literal[r_1_arr[2]]; + r_1_arr[1] = r_1_arr[2] << 2; + r_1_arr[2] = wrk_tree[r_1_arr[1] + r_1_arr[0]]; + wrk_tree[r_1_arr[1] + 2] = r_1_arr[0]; + if (target == wrk_literal[r_1_arr[2]]) + return g_a_tree; + } + } + second = target > wrk_literal[wrk_tree[path2 + first[0]]]; + first[1] = 1 - first[0]; + if (first[0] == second) { + r_1_arr[2] = *path_top[0]; + r_1_arr[3] = r_1_arr[2] << 2; + r_1_arr[4] = wrk_tree[r_1_arr[3] + first[0]]; + r_1_arr[5] = r_1_arr[4] << 2; + wrk_tree[r_1_arr[3] + first[0]] = + wrk_tree[r_1_arr[5] + first[1]]; + path = wrk_tree[r_1_arr[5] + first[0]]; + *path_top[0] = r_1_arr[4]; + wrk_tree[r_1_arr[5] + first[1]] = r_1_arr[2]; + wrk_tree[r_1_arr[3] + 2] = NEITHER; + wrk_tree[r_1_arr[5] + 2] = NEITHER; + if (target == wrk_literal[path]) + return g_a_tree; + while (1) { + r_1_arr[0] = target > wrk_literal[path]; + r_1_arr[1] = path << 2; + wrk_tree[r_1_arr[1] + 2] = r_1_arr[0]; + path = wrk_tree[r_1_arr[1] + r_1_arr[0]]; + if (target == wrk_literal[path]) + return g_a_tree; + } + } + path = wrk_tree[(wrk_tree[path2 + first[0]] << 2) + second]; + r_1_arr[5] = *path_top[0]; + r_1_arr[1] = r_1_arr[5] << 2; + r_1_arr[8] = wrk_tree[r_1_arr[1] + first[0]]; + r_1_arr[0] = r_1_arr[8] << 2; + r_1_arr[6] = wrk_tree[r_1_arr[0] + first[1]]; + r_1_arr[7] = r_1_arr[6] << 2; + r_1_arr[2] = wrk_tree[r_1_arr[7] + first[1]]; + r_1_arr[3] = wrk_tree[r_1_arr[7] + first[0]]; + *path_top[0] = r_1_arr[6]; + wrk_tree[r_1_arr[7] + first[1]] = r_1_arr[5]; + wrk_tree[r_1_arr[7] + first[0]] = r_1_arr[8]; + wrk_tree[r_1_arr[1] + first[0]] = r_1_arr[2]; + wrk_tree[r_1_arr[0] + first[1]] = r_1_arr[3]; + wrk_tree[r_1_arr[7] + 2] = NEITHER; + wrk_tree[r_1_arr[1] + 2] = NEITHER; + wrk_tree[r_1_arr[0] + 2] = NEITHER; + if (target == wrk_literal[path]) + return g_a_tree; + r_1_arr[9] = (target > wrk_literal[path]) == first[0]; + wrk_tree[r_1_arr[r_1_arr[9]] + 2] = first[r_1_arr[9]]; + path = r_1_arr[r_1_arr[9] + 2]; + if (target == wrk_literal[path]) + return g_a_tree; + while (1) { + path2 = path << 2; + r_1_arr[4] = target > wrk_literal[path]; + wrk_tree[path2 + 2] = r_1_arr[4]; + path = wrk_tree[path2 + r_1_arr[4]]; + if (target == wrk_literal[path]) + return g_a_tree; + } +} + +/* + * compress the given data using a binary tree holding all the previously + * seen 64-bit values + * returns the length of the compressed data + */ +static __always_inline int compress_bewalgo2( + const uint8_t * const source, + uint8_t * const dest, + uint8_t * const wrkmem, + const uint16_t source_length) +{ + const uint64_t * const source_begin = (const uint64_t *) source; + uint64_t *wrk_literal = (uint64_t *) wrkmem; + uint16_t *wrk_tree = (uint16_t *) (wrkmem + PAGE_SIZE); + uint16_t *op_match = (uint16_t *) (dest + 4); + int count; + uint16_t i; + uint16_t j; + uint16_t tree_root = TREE_NODE_NULL; + uint16_t tree_size = 0; + const uint64_t *ip = ((const uint64_t *) source) + + DIV_BY_8_ROUND_UP(source_length) - 1; + + /* + * add first value into the tree + */ + i = avl_insert_first( + wrk_literal, + wrk_tree, + &tree_root, + *ip, + &tree_size); + /* + * to gain performance abort if the data does not seem to be + * compressible + */ + if (source_length > 512) { + /* + * verify that at there are at most 5 different values + * at the first 10 positions + */ + for (i = 2; i < 11; i++) + avl_insert( + wrk_literal, + wrk_tree, + &tree_root, + ip[-i], + &tree_size); + if (tree_size < 6) + goto _start; + /* + * verify that there are at most 12 different values + * at the first 10 and last 10 positions + */ + for (i = 0; i < 10; i++) + avl_insert( + wrk_literal, + wrk_tree, + &tree_root, + source_begin[i], + &tree_size); + if (tree_size < 13) + goto _start; + /* + * if the previous conditions do not pass, check some bytes + * in the middle for matches. + * if not enough matches found abort + */ + for (i = 0; i < 10; i++) + avl_insert( + wrk_literal, + wrk_tree, + &tree_root, + source_begin[256 + i], + &tree_size); + if (tree_size >= 21) + return -1; + } +_start: + i = 0; +_loop1_after_insert: + count = 0; + do { + ip--; + count++; + } while (likely(ip > source_begin) && (*ip == wrk_literal[i])); + if (count == 1) { + count = 0; + ip++; + j = i + count; + do { + ip--; + count++; + j++; + } while (likely(ip > source_begin) + && (j < tree_size) + && (*ip == wrk_literal[j])); + ip++; + count--; + if (likely(ip > source_begin)) { + do { + ip--; + count++; + j = avl_insert( + wrk_literal, + wrk_tree, + &tree_root, + *ip, + &tree_size); + if (unlikely(tree_size > MAX_LITERALS)) + return -1; + } while ((j == i + count) + && likely(ip > source_begin)); + } + while (unlikely(count > LENGTH_MASK)) { + *op_match++ = (i << OFFSET_SHIFT) + + MATCH_BIT_MASK + + LENGTH_MASK; + count -= LENGTH_MASK; + i += LENGTH_MASK; + } + *op_match++ = (i << OFFSET_SHIFT) + MATCH_BIT_MASK + count; + if (unlikely(ip <= source_begin)) + goto _finish; + i = j; + goto _loop1_after_insert; + } else { + while (unlikely(count > LENGTH_MASK)) { + *op_match++ = (i << OFFSET_SHIFT) + LENGTH_MASK; + count -= LENGTH_MASK; + } + *op_match++ = (i << OFFSET_SHIFT) + count; + } + if (unlikely(ip <= source_begin)) + goto _finish; + i = avl_insert(wrk_literal, wrk_tree, &tree_root, *ip, &tree_size); + goto _loop1_after_insert; +_finish: + j = avl_insert(wrk_literal, wrk_tree, &tree_root, *ip, &tree_size); + *op_match++ = (j << OFFSET_SHIFT) + 1; + ((uint16_t *) dest)[0] = op_match - ((uint16_t *) dest); + ((uint16_t *) dest)[1] = source_length; + count = sizeof(uint64_t) * (tree_size); + memcpy(op_match, wrk_literal, count); + return ((op_match - ((uint16_t *) dest)) << 1) + count; +} + +/* + * decompress the data and return the uncompressed length + */ +static __always_inline int decompress_bewalgo2( + const uint8_t * const source, + uint8_t * const dest, + uint8_t * const wrkmem) +{ + uint64_t *dest_anchor = (uint64_t *) dest; + const uint16_t *ip_match = ((const uint16_t *) (source + 4)); + const uint16_t *ip_match_end = + ((const uint16_t *) source) + ((const uint16_t *) source)[0]; + const uint64_t *ip_literal = (uint64_t *) ip_match_end; + uint16_t i; + uint16_t count; + uint64_t *op = dest_anchor + + DIV_BY_8_ROUND_UP(((uint16_t *) source)[1]) - 1; + + while (ip_match < ip_match_end) { + i = (*ip_match) >> OFFSET_SHIFT; + count = (*ip_match) & LENGTH_MASK; + if ((*ip_match) & MATCH_BIT_MASK) + while (count-- > 0) + *op-- = ip_literal[i++]; + else + while (count-- > 0) + *op-- = ip_literal[i]; + ip_match++; + } + return ((const uint16_t *) source)[1]; +} + +static int init_bewalgo2(void) +{ + return 0; +} + +static void exit_bewalgo2(void) +{ +} + +struct zbewalgo_alg alg_bewalgo2 = { + .name = "bewalgo2", + .flags = ZBEWALGO_ALG_FLAG_COMPRESS, + .wrkmem_size = 4096 << 1, + .init = init_bewalgo2, + .exit = exit_bewalgo2, + .compress = compress_bewalgo2, + .decompress = decompress_bewalgo2 +}; diff --git a/lib/zbewalgo/bewalgo2.h b/lib/zbewalgo/bewalgo2.h new file mode 100644 index 000000000..b4438ff47 --- /dev/null +++ b/lib/zbewalgo/bewalgo2.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2018 Benjamin Warnke <4bwarnke@xxxxxxxxxxxxxxxxxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef ZBEWALGO_BEWALGO2_H +#define ZBEWALGO_BEWALGO2_H + +#include "include.h" + +extern struct zbewalgo_alg alg_bewalgo2; + +#endif diff --git a/lib/zbewalgo/bitshuffle.c b/lib/zbewalgo/bitshuffle.c new file mode 100644 index 000000000..5661cbfd4 --- /dev/null +++ b/lib/zbewalgo/bitshuffle.c @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2018 Benjamin Warnke <4bwarnke@xxxxxxxxxxxxxxxxxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "bitshuffle.h" + +/* + * performs a static transformation on the data. Moves every eighth byte into + * a consecutive range + */ +static __always_inline int compress_bitshuffle( + const uint8_t *source, uint8_t *dest, + uint8_t * const wrkmem, + uint16_t source_length) +{ + uint16_t i; + *((uint16_t *) dest) = source_length; + dest += 2; + source_length = (source_length + 7) & (~7); + for (i = 0; i < source_length; i += 8) + *dest++ = source[i]; + for (i = 1; i < source_length; i += 8) + *dest++ = source[i]; + for (i = 2; i < source_length; i += 8) + *dest++ = source[i]; + for (i = 3; i < source_length; i += 8) + *dest++ = source[i]; + for (i = 4; i < source_length; i += 8) + *dest++ = source[i]; + for (i = 5; i < source_length; i += 8) + *dest++ = source[i]; + for (i = 6; i < source_length; i += 8) + *dest++ = source[i]; + for (i = 7; i < source_length; i += 8) + *dest++ = source[i]; + return source_length + 2; +} + +/* + * reverses the transformation step + */ +static __always_inline int decompress_bitshuffle( + const uint8_t *source, + uint8_t *dest, + uint8_t * const wrkmem) +{ + uint16_t i; + uint16_t dest_length = *((uint16_t *) source); + uint16_t dest_length2 = (dest_length + 7) & (~7); + + source += 2; + for (i = 0; i < dest_length2; i += 8) + dest[i] = *source++; + for (i = 1; i < dest_length2; i += 8) + dest[i] = *source++; + for (i = 2; i < dest_length2; i += 8) + dest[i] = *source++; + for (i = 3; i < dest_length2; i += 8) + dest[i] = *source++; + for (i = 4; i < dest_length2; i += 8) + dest[i] = *source++; + for (i = 5; i < dest_length2; i += 8) + dest[i] = *source++; + for (i = 6; i < dest_length2; i += 8) + dest[i] = *source++; + for (i = 7; i < dest_length2; i += 8) + dest[i] = *source++; + return dest_length; +} +static int init_bitshuffle(void) +{ + return 0; +} + +static void exit_bitshuffle(void) +{ +} + +struct zbewalgo_alg alg_bitshuffle = { + .name = "bitshuffle", + .flags = ZBEWALGO_ALG_FLAG_TRANSFORM, + .wrkmem_size = 0, + .init = init_bitshuffle, + .exit = exit_bitshuffle, + .compress = compress_bitshuffle, + .decompress = decompress_bitshuffle +}; diff --git a/lib/zbewalgo/bitshuffle.h b/lib/zbewalgo/bitshuffle.h new file mode 100644 index 000000000..0f1783cb5 --- /dev/null +++ b/lib/zbewalgo/bitshuffle.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2018 Benjamin Warnke <4bwarnke@xxxxxxxxxxxxxxxxxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef ZBEWALGO_BITSHUFFLE_H +#define ZBEWALGO_BITSHUFFLE_H + +#include "include.h" + +extern struct zbewalgo_alg alg_bitshuffle; + +#endif diff --git a/lib/zbewalgo/huffman.c b/lib/zbewalgo/huffman.c new file mode 100644 index 000000000..3eb814b5e --- /dev/null +++ b/lib/zbewalgo/huffman.c @@ -0,0 +1,227 @@ +/* + * Copyright (c) 2018 Benjamin Warnke <4bwarnke@xxxxxxxxxxxxxxxxxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "huffman.h" + +/* + * compression using the bare huffman encoding. Optimized for speed and small + * input buffers + */ +static __always_inline int compress_huffman( + const uint8_t * const source, + uint8_t * const dest, + uint8_t * const wrkmem, + const uint16_t source_length) +{ + const uint8_t * const source_end = source + source_length; + const uint8_t * const dest_end = dest + zbewalgo_max_output_size; + short * const nodes_index = (short *) (wrkmem); + uint16_t * const leaf_parent_index = (uint16_t *) (wrkmem + 1536); + uint16_t * const nodes_weight = (uint16_t *) (wrkmem + 2048); + uint16_t * const frequency = (uint16_t *) (wrkmem + 3072); + uint16_t * const code_lengths = (uint16_t *) (wrkmem + 3584); + uint32_t * const code_words = (uint32_t *) (wrkmem + 4096); + short i; + uint16_t node_index; + int i2; + short max_codeword_length; + uint32_t weight2; + short num_nodes; + uint32_t bits_in_buffer; + uint32_t bits_in_stack; + uint16_t free_index; + const uint8_t *ip; + uint8_t *op; + uint32_t stack; +#ifdef __LITTLE_ENDIAN + uint8_t * const stack_ptr = (uint8_t *) &stack; +#endif + memset(dest, 0, zbewalgo_max_output_size); + memset(wrkmem, 0, 5120); + op = dest; + ip = source; + *(uint16_t *) dest = source_length; + do { + frequency[*ip++]++; + } while (ip < source_end); + ip = source; + num_nodes = 0; + for (i = 0; i < 256; i++) { + if (frequency[i] > 0) { + i2 = num_nodes++; + while ((i2 > 0) && (nodes_weight[i2] > frequency[i])) { + nodes_weight[i2 + 1] = nodes_weight[i2]; + nodes_index[i2 + 1] = nodes_index[i2]; + leaf_parent_index[nodes_index[i2]]++; + i2--; + } + i2++; + nodes_index[i2] = -(i + 1); + leaf_parent_index[-(i + 1)] = i2; + nodes_weight[i2] = frequency[i]; + } + } + dest[2] = num_nodes; + op = dest + 3; + for (i = 1; i <= num_nodes; ++i) { + *op++ = -(nodes_index[i] + 1); + *(uint16_t *) op = nodes_weight[i]; + op += 2; + } + free_index = 2; + while (free_index <= num_nodes) { + weight2 = nodes_weight[free_index - 1] + + nodes_weight[free_index]; + i2 = num_nodes++; + while ((i2 > 0) && (nodes_weight[i2] > weight2)) { + nodes_weight[i2 + 1] = nodes_weight[i2]; + nodes_index[i2 + 1] = nodes_index[i2]; + leaf_parent_index[nodes_index[i2]]++; + i2--; + } + i2++; + nodes_index[i2] = free_index >> 1; + leaf_parent_index[free_index >> 1] = i2; + nodes_weight[i2] = weight2; + free_index += 2; + } + if (num_nodes > 400) + return -1; + for (i = 0; i < 256; i++) { + if (frequency[i] == 0) + continue; + bits_in_stack = 0; + stack = 0; + node_index = leaf_parent_index[-(i + 1)]; + while (node_index < num_nodes) { + stack |= (node_index & 0x1) << bits_in_stack; + bits_in_stack++; + node_index = leaf_parent_index[(node_index + 1) >> 1]; + } + code_lengths[i] = bits_in_stack; + code_words[i] = stack; + } + max_codeword_length = 0; + node_index = leaf_parent_index[nodes_index[1]]; + while (node_index < num_nodes) { + node_index = leaf_parent_index[(node_index + 1) >> 1]; + max_codeword_length++; + } + if (max_codeword_length > 24) + return -1; + bits_in_buffer = 0; + do { + bits_in_stack = code_lengths[*ip]; + stack = code_words[*ip]; + ip++; + bits_in_buffer += bits_in_stack; + stack = stack << (32 - bits_in_buffer); +#ifdef __LITTLE_ENDIAN + op[0] |= stack_ptr[3]; + op[1] |= stack_ptr[2]; + op[2] |= stack_ptr[1]; + op[3] |= stack_ptr[0]; +#else + *(uint32_t *) op |= stack; +#endif + op += bits_in_buffer >> 3; + bits_in_buffer = bits_in_buffer & 0x7; + if (unlikely(op >= dest_end)) + return -1; + } while (likely(ip < source_end)); + return op - dest + (bits_in_buffer > 0); +} + +/* + * reverses the huffman compression + */ +static __always_inline int decompress_huffman( + const uint8_t * const source, + uint8_t * const dest, + uint8_t * const wrkmem) +{ + const uint32_t dest_length = *(uint16_t *) source; + const uint8_t * const dest_end = dest + dest_length; + short * const nodes_index = (short *) (wrkmem); + uint16_t * const nodes_weight = (uint16_t *) (wrkmem + 1024); + uint32_t i; + int node_index; + uint32_t i2; + uint32_t weight2; + uint32_t num_nodes; + uint32_t current_bit; + uint32_t free_index; + const uint8_t *ip; + uint8_t *op; + + memset(wrkmem, 0, 2048); + num_nodes = source[2]; + ip = source + 3; + op = dest; + for (i = 1; i <= num_nodes; ++i) { + nodes_index[i] = -(*ip++ + 1); + nodes_weight[i] = *(uint16_t *) ip; + ip += 2; + } + free_index = 2; + while (free_index <= num_nodes) { + weight2 = nodes_weight[free_index - 1] + + nodes_weight[free_index]; + i2 = num_nodes++; + while (i2 > 0 && nodes_weight[i2] > weight2) { + nodes_weight[i2 + 1] = nodes_weight[i2]; + nodes_index[i2 + 1] = nodes_index[i2]; + i2--; + } + i2++; + nodes_index[i2] = free_index >> 1; + nodes_weight[i2] = weight2; + free_index += 2; + } + current_bit = 0; + do { + node_index = nodes_index[num_nodes]; + while (node_index > 0) { + ip += current_bit == 8; + current_bit = ((current_bit) & 0x7) + 1; + node_index = nodes_index[(node_index << 1) + - ((*ip >> (8 - current_bit)) & 0x1)]; + } + *op++ = -(node_index + 1); + } while (op < dest_end); + return dest_length; +} + +static int init_huffman(void) +{ + return 0; +} + +static void exit_huffman(void) +{ +} + +struct zbewalgo_alg alg_huffman = { + .name = "huffman", + .flags = ZBEWALGO_ALG_FLAG_COMPRESS, + .wrkmem_size = 5120, + .init = init_huffman, + .exit = exit_huffman, + .compress = compress_huffman, + .decompress = decompress_huffman +}; diff --git a/lib/zbewalgo/huffman.h b/lib/zbewalgo/huffman.h new file mode 100644 index 000000000..cd1ae8100 --- /dev/null +++ b/lib/zbewalgo/huffman.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2018 Benjamin Warnke <4bwarnke@xxxxxxxxxxxxxxxxxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef ZBEWALGO_HUFFMAN_H +#define ZBEWALGO_HUFFMAN_H + +#include "include.h" + +extern struct zbewalgo_alg alg_huffman; + +#endif diff --git a/lib/zbewalgo/include.h b/lib/zbewalgo/include.h new file mode 100644 index 000000000..f6b9475b5 --- /dev/null +++ b/lib/zbewalgo/include.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2018 Benjamin Warnke <4bwarnke@xxxxxxxxxxxxxxxxxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef ZBEWALGO_INCLUDE_H +#define ZBEWALGO_INCLUDE_H + +#include "linux/module.h" + +#define ZBEWALGO_ALG_MAX_NAME 128 +#define ZBEWALGO_ALG_FLAG_COMPRESS 1 +#define ZBEWALGO_ALG_FLAG_TRANSFORM 2 +#define ZBEWALGO_COMBINATION_MAX_IDS 7 +#define ZBEWALGO_MAX_BASE_ALGORITHMS 16 +#define ZBEWALGO_COMBINATION_MAX_ACTIVE 256 + +#define DIV_BY_8_ROUND_UP(val) ((val >> 3) + ((val & 0x7) > 0)) + +struct zbewalgo_alg { + char name[ZBEWALGO_ALG_MAX_NAME]; + /* flag wheether this algorithm compresses the data or + * transforms the data only + */ + uint8_t flags; + /* the wrkmem required for this algorithm */ + uint32_t wrkmem_size; + int (*init)(void); + void (*exit)(void); + /* the compression function must store the size of + * input/output data itself + */ + int (*compress)( + const uint8_t * const source, + uint8_t * const dest, + uint8_t * const wrkmem, + const uint16_t source_length); + int (*decompress)( + const uint8_t * const source, + uint8_t * const dest, + uint8_t * const wrkmem); + uint8_t id; +}; + +/* + * to gain speed the compression starts with the algorithm wich was good for + * the last compressed page. + */ +struct zbewalgo_combination { + uint8_t count; + uint8_t ids[ZBEWALGO_COMBINATION_MAX_IDS]; +}; + +struct zbewalgo_main_data { + /* + * best_id contains the id of the best combination for the last page + */ + uint8_t best_id; + + /* + * if zero search again for best_id - must be unsigned - underflow of + * values is intended + */ + uint8_t counter_search; + + /* + * a bit larger than original compressed size to be still accepted + * immediately + */ + uint16_t best_id_accepted_size; +}; + +/* + * compression aborts automatically if the compressed data is too large. + */ +extern unsigned long zbewalgo_max_output_size; + +/* + * add an combination to the current active ones. All combinations are the + * same for all instances of this algorithm + */ +int zbewalgo_add_combination( + const char * const string, + const int string_length); + +/* + * replace ALL combinations with the one specified. + */ +int zbewalgo_set_combination( + const char * const string, + const int string_length); + +#endif diff --git a/lib/zbewalgo/zbewalgo.c b/lib/zbewalgo/zbewalgo.c new file mode 100644 index 000000000..a039b6841 --- /dev/null +++ b/lib/zbewalgo/zbewalgo.c @@ -0,0 +1,590 @@ +/* + * Copyright (c) 2018 Benjamin Warnke <4bwarnke@xxxxxxxxxxxxxxxxxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include <linux/init.h> + +#include "include.h" + +#include "BWT.h" +#include "JBE.h" +#include "JBE2.h" +#include "MTF.h" +#include "RLE.h" +#include "bewalgo.h" +#include "bewalgo2.h" +#include "bitshuffle.h" +#include "huffman.h" + +static atomic64_t zbewalgo_stat_combination[256]; +static atomic64_t zbewalgo_stat_count[256]; + + +unsigned long zbewalgo_max_output_size; + +/* + * all currently available combination sequences of algorithms + */ +static struct zbewalgo_combination + zbewalgo_combinations[ZBEWALGO_COMBINATION_MAX_ACTIVE]; +static uint8_t zbewalgo_combinations_count; + +/* + * maximum required wrkmem for compression and decompression for each instance + * of the algorithm + */ +static uint32_t zbewalgo_wrkmem_size; + +/* + * compression can be aborted if the data is smaller than this theshold to + * speed up the algorithm. + */ +static unsigned long zbewalgo_early_abort_size; + +/* + * each cpu has its own independent compression history to avoid locks + */ +static struct zbewalgo_main_data __percpu *zbewalgo_main_data_ptr; + +/* + * all available algorithms + */ +static struct zbewalgo_alg + zbewalgo_base_algorithms[ZBEWALGO_MAX_BASE_ALGORITHMS]; +static uint8_t zbewalgo_base_algorithms_count; + +/* + * returns the required size of wrkmem for compression and decompression + */ +__always_inline int zbewalgo_get_wrkmem_size(void) +{ + return zbewalgo_wrkmem_size; +} +EXPORT_SYMBOL(zbewalgo_get_wrkmem_size); + +/* + * this function adds a combination to the set of enabled combinations or + * if 'flag' is set, replaces all combinations with the new supplied one. + * this function is called from the sysfs context, and therefore accepts + * a string as input. + */ +static __always_inline int add_set_combination( + const char * const string, + const int string_length, + int flag) +{ + /* points behind the string for fast looping over the entire string */ + const char * const string_end = string + string_length; + /* the string up to 'anchor' is parsed */ + const char *anchor = string; + const char *pos = string; + struct zbewalgo_combination combination; + uint8_t i; + + memset(&combination, 0, sizeof(struct zbewalgo_combination)); + /* loop over entire string */ + while ((pos < string_end) && (*pos != 0)) { + while ((pos < string_end) && (*pos != 0) && (*pos != '-')) + pos++; + if (pos - anchor <= 0) { + /* skipp leading or consecutive '-' chars */ + pos++; + anchor = pos; + continue; + } + /* find the algorithm by name in the list of all algorithms */ + for (i = 0; i < zbewalgo_base_algorithms_count; i++) { + if (((unsigned int) (pos - anchor) + == strlen(zbewalgo_base_algorithms[i].name)) + && (memcmp(anchor, + zbewalgo_base_algorithms[i].name, + pos - anchor) + == 0)) { + /* found algorithm */ + combination.ids[combination.count++] = + zbewalgo_base_algorithms[i].id; + break; + } + } + /* + * abort parsing if maximum of algorithms reached or + * if string is parsed completely + */ + if ((combination.count >= ZBEWALGO_COMBINATION_MAX_IDS) + || (*pos != '-')) + goto _finalize; + if (i == zbewalgo_base_algorithms_count) + /* misstyped arguments */ + return -1; + pos++; + anchor = pos; + } +_finalize: + if (combination.count) { + /* if combination has at least 1 algorithm */ + if (flag == 1) + zbewalgo_combinations_count = 0; + /* don't store the same combination twice */ + for (i = 0; i < zbewalgo_combinations_count; i++) + if (memcmp( + &zbewalgo_combinations[i], + &combination, + sizeof(struct zbewalgo_combination)) == 0) { + return 0; + } + /* store the new combination */ + memcpy( + &zbewalgo_combinations[zbewalgo_combinations_count], + &combination, + sizeof(struct zbewalgo_combination)); + zbewalgo_combinations_count++; + return 0; + } + return -1; /* empty algorithm is not allowed */ +} + +int zbewalgo_add_combination( + const char * const string, + const int string_length) +{ + return add_set_combination(string, string_length, 0); +} +EXPORT_SYMBOL(zbewalgo_add_combination); + +int zbewalgo_set_combination( + const char * const string, + const int string_length) +{ + return add_set_combination(string, string_length, 1); +} +EXPORT_SYMBOL(zbewalgo_set_combination); + +#define compress(i, j, s, d, w, l) \ + (zbewalgo_base_algorithms[zbewalgo_combinations[i].ids[j]] \ + .compress(s, d, w, l)) + + +__always_inline int zbewalgo_compress( + const uint8_t * const source, + uint8_t * const dest, + uint8_t * const wrkmem, + const uint16_t source_length) +{ + struct zbewalgo_main_data * const main_data = + get_cpu_ptr(zbewalgo_main_data_ptr); + uint8_t *dest_best = wrkmem; + uint8_t *dest1 = dest_best + (4096 << 1); + uint8_t *dest2 = dest1 + (4096 << 1); + uint8_t * const wrk = dest2 + (4096 << 1); + uint8_t *dest_tmp; + const uint8_t *current_source; + uint8_t i, j; + uint16_t dest_best_size = source_length << 1; + int dest_current_size; + uint8_t dest_best_id = 0; + uint8_t i_from = main_data->best_id + + (main_data->counter_search-- == 0); + uint8_t i_to = zbewalgo_combinations_count; + uint8_t looped = 0; + uint16_t local_abort_size = max_t(uint16_t, + main_data->best_id_accepted_size, + zbewalgo_early_abort_size); + uint16_t counter = 0; + struct zbewalgo_combination * const dest_best_combination = + (struct zbewalgo_combination *) dest; +_begin: + /* + * loop from 0 to zbewalgo_combinations_count starting with the last + * successfull combination + */ + i = i_from; + while (i < i_to) { + current_source = source; + dest_current_size = source_length; + counter++; + for (j = 0; j < zbewalgo_combinations[i].count; j++) { + dest_current_size = compress(i, j, + current_source, + dest2, + wrk, + dest_current_size); + if (dest_current_size <= 0) + goto _next_algorithm; + current_source = dest2; + dest_tmp = dest2; + dest2 = dest1; + dest1 = dest_tmp; + if (dest_current_size < dest_best_size) { + /* if a higher compression ratio is found, update to the best */ + dest_best_id = i; + dest_best_size = dest_current_size; + dest_tmp = dest_best; + dest_best = dest1; + dest1 = dest_tmp; + memcpy( + dest_best_combination, + &zbewalgo_combinations[i], + sizeof(struct zbewalgo_combination)); + /* partial combination is allowed, if its compression ratio is high */ + dest_best_combination->count = j + 1; + } + } + if (dest_best_size < local_abort_size) + goto _early_abort; +_next_algorithm: + local_abort_size = zbewalgo_early_abort_size; + i++; + } + if (!(looped++) && (i_from > 0)) { + i_to = min_t(uint8_t, i_from, zbewalgo_combinations_count); + i_from = 0; + goto _begin; + } + if (dest_best_size > zbewalgo_max_output_size) + return -1; +_early_abort: + atomic64_inc(&zbewalgo_stat_combination[dest_best_id]); + atomic64_inc(&zbewalgo_stat_count[counter]); + main_data->best_id = dest_best_id; + main_data->best_id_accepted_size = + max_t(uint8_t, + dest_best_size + (dest_best_size >> 3), + zbewalgo_early_abort_size); + memcpy( + dest + sizeof(struct zbewalgo_combination), + dest_best, + dest_best_size); + return sizeof(struct zbewalgo_combination) + dest_best_size; +} +EXPORT_SYMBOL(zbewalgo_compress); + +#define decompress(i, s, d, w) \ + (zbewalgo_base_algorithms[combination->ids[i]] \ + .decompress(s, d, w)) + +__always_inline int zbewalgo_decompress( + const uint8_t * const source, + uint8_t * const dest, + uint8_t * const wrkmem) +{ + uint8_t *dest1 = wrkmem; + uint8_t *dest2 = dest1 + (4096 << 1); + uint8_t *wrk = dest2 + (4096 << 1); + const struct zbewalgo_combination * const combination = + (struct zbewalgo_combination *) source; + uint8_t j; + int res; + + if (combination->count == 1) { + res = decompress(0, + (source + sizeof(struct zbewalgo_combination)), + dest, + wrk); + return res; + } + res = decompress(combination->count - 1, + (source + sizeof(struct zbewalgo_combination)), + dest1, + wrk); + for (j = combination->count - 2; j > 1; j -= 2) { + res = decompress(j, dest1, dest2, wrk); + res = decompress(j - 1, dest2, dest1, wrk); + } + if (j == 1) { + res = decompress(1, dest1, dest2, wrk); + res = decompress(0, dest2, dest, wrk); + return res; + } + res = decompress(0, dest1, dest, wrk); + return res; +} +EXPORT_SYMBOL(zbewalgo_decompress); + +#define add_combination_compile_time(name) \ + zbewalgo_add_combination(name, sizeof(name)) + +static ssize_t zbewalgo_combinations_show( + struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + ssize_t res = 0; + ssize_t tmp; + uint8_t i, j; + struct zbewalgo_combination *com; + + res = tmp = scnprintf(buf, PAGE_SIZE - res, "combinations={\n"); + buf += tmp; + for (i = 0; i < zbewalgo_combinations_count; i++) { + com = &zbewalgo_combinations[i]; + res += tmp = scnprintf( + buf, + PAGE_SIZE - res, + "\tcombination[%d]=", + i); + buf += tmp; + for (j = 0; j < com->count - 1; j++) { + res += tmp = scnprintf(buf, PAGE_SIZE - res, "%s-", + zbewalgo_base_algorithms[com->ids[j]].name); + buf += tmp; + } + res += tmp = scnprintf(buf, PAGE_SIZE - res, "%s\n", + zbewalgo_base_algorithms[com->ids[j]].name); + buf += tmp; + } + res += tmp = scnprintf(buf, PAGE_SIZE - res, "}\n"); + return res; +} + +static ssize_t zbewalgo_combinations_store( + struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, + size_t count) +{ + ssize_t res; + + count--; + if (count < 5) + return -1; + if (memcmp(buf, "add ", 4) == 0) { + res = zbewalgo_add_combination(buf + 4, count - 4); + return res < 0 ? res : count+1; + } + if (memcmp(buf, "set ", 4) == 0) { + res = zbewalgo_set_combination(buf + 4, count - 4); + return res < 0 ? res : count+1; + } + if (memcmp(buf, "reset", 5) == 0) { + zbewalgo_combinations_count = 0; + add_combination_compile_time( + "bewalgo2-bitshuffle-jbe-rle"); + add_combination_compile_time( + "bwt-mtf-bewalgo-huffman"); + add_combination_compile_time( + "bitshuffle-bewalgo2-mtf-bewalgo-jbe"); + add_combination_compile_time( + "bitshuffle-rle-bitshuffle-rle"); + return count; + } + return -1; +} + +static ssize_t zbewalgo_max_output_size_show( + struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "%lu", zbewalgo_max_output_size); +} + +static ssize_t zbewalgo_max_output_size_store( + struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, + size_t count) +{ + char localbuf[10]; + ssize_t res; + + memcpy(&localbuf[0], buf, count < 10 ? count : 10); + localbuf[count < 9 ? count : 9] = 0; + res = kstrtoul(localbuf, 10, &zbewalgo_max_output_size); + return res < 0 ? res : count; +} + +static ssize_t zbewalgo_early_abort_size_show( + struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "%lu", zbewalgo_early_abort_size); +} + +static ssize_t zbewalgo_early_abort_size_store( + struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, + size_t count) +{ + char localbuf[10]; + ssize_t res; + + memcpy(&localbuf[0], buf, count < 10 ? count : 10); + localbuf[count < 9 ? count : 9] = 0; + res = kstrtoul(localbuf, 10, &zbewalgo_early_abort_size); + return res < 0 ? res : count; +} + +static ssize_t zbewalgo_bwt_max_alphabet_show( + struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "%lu", zbewalgo_bwt_max_alphabet); +} + +static ssize_t zbewalgo_bwt_max_alphabet_store( + struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, + size_t count) +{ + char localbuf[10]; + ssize_t res; + + memcpy(&localbuf[0], buf, count < 10 ? count : 10); + localbuf[count < 9 ? count : 9] = 0; + res = kstrtoul(localbuf, 10, &zbewalgo_bwt_max_alphabet); + return res < 0 ? res : count; +} + +static struct kobj_attribute zbewalgo_combinations_attribute = __ATTR( + combinations, + 0664, + zbewalgo_combinations_show, + zbewalgo_combinations_store); +static struct kobj_attribute zbewalgo_max_output_size_attribute = __ATTR( + max_output_size, + 0664, + zbewalgo_max_output_size_show, + zbewalgo_max_output_size_store); +static struct kobj_attribute zbewalgo_early_abort_size_attribute = __ATTR( + early_abort_size, + 0664, + zbewalgo_early_abort_size_show, + zbewalgo_early_abort_size_store); +static struct kobj_attribute zbewalgo_bwt_max_alphabet_attribute = __ATTR( + bwt_max_alphabet, + 0664, + zbewalgo_bwt_max_alphabet_show, + zbewalgo_bwt_max_alphabet_store); +static struct attribute *attrs[] = { + &zbewalgo_combinations_attribute.attr, + &zbewalgo_max_output_size_attribute.attr, + &zbewalgo_early_abort_size_attribute.attr, + &zbewalgo_bwt_max_alphabet_attribute.attr, + NULL, +}; + +static struct attribute_group attr_group = { + .attrs = attrs, +}; + +static struct kobject *zbewalgo_kobj; + +static int __init zbewalgo_mod_init(void) +{ + uint8_t i; + int j; + int res = 0; + + zbewalgo_early_abort_size = 400; + /* + * this algorithm is intended to be used for zram with zsmalloc. + * Therefore zbewalgo_max_output_size equals zsmalloc max size class + */ + i = 0; + zbewalgo_max_output_size = 3264; + zbewalgo_base_algorithms[i++] = alg_bewalgo; + zbewalgo_base_algorithms[i++] = alg_bewalgo2; + zbewalgo_base_algorithms[i++] = alg_bitshuffle; + zbewalgo_base_algorithms[i++] = alg_bwt; + zbewalgo_base_algorithms[i++] = alg_jbe; + zbewalgo_base_algorithms[i++] = alg_jbe2; + zbewalgo_base_algorithms[i++] = alg_mtf; + zbewalgo_base_algorithms[i++] = alg_rle; + zbewalgo_base_algorithms[i++] = alg_huffman; + zbewalgo_base_algorithms_count = i; + /* + * the wrkmem size is the largest wrkmem required by any callable + * algorithm + */ + zbewalgo_wrkmem_size = 0; + for (i = 0; i < zbewalgo_base_algorithms_count; i++) { + res = zbewalgo_base_algorithms[i].init(); + if (res) { + if (i > 0) + zbewalgo_base_algorithms[0].exit(); + i--; + while (i > 0) + zbewalgo_base_algorithms[i].exit(); + return res; + } + zbewalgo_base_algorithms[i].id = i; + zbewalgo_wrkmem_size = max_t(uint32_t, + zbewalgo_wrkmem_size, + zbewalgo_base_algorithms[i].wrkmem_size); + } + /* adding some pages for temporary compression results */ + zbewalgo_wrkmem_size += 4096 * 6; + zbewalgo_main_data_ptr = alloc_percpu(struct zbewalgo_main_data); + for_each_possible_cpu(j) { + memset( + per_cpu_ptr(zbewalgo_main_data_ptr, j), + 0, + sizeof(struct zbewalgo_main_data)); + } + + memset(&zbewalgo_stat_combination[0], 0, 256 * sizeof(atomic64_t)); + memset(&zbewalgo_stat_count[0], 0, 256 * sizeof(atomic64_t)); + + zbewalgo_kobj = kobject_create_and_add("zbewalgo", kernel_kobj); + if (!zbewalgo_kobj) + return -ENOMEM; + res = sysfs_create_group(zbewalgo_kobj, &attr_group); + if (res) { + kobject_put(zbewalgo_kobj); + zbewalgo_combinations_store( + zbewalgo_kobj, + &zbewalgo_combinations_attribute, + "reset", + sizeof("reset")); + } + return res; +} + +static void __exit zbewalgo_mod_fini(void) +{ + uint16_t i; + uint64_t tmp; + + kobject_put(zbewalgo_kobj); + for (i = 0; i < zbewalgo_base_algorithms_count; i++) + zbewalgo_base_algorithms[i].exit(); + free_percpu(zbewalgo_main_data_ptr); + /* log statistics via printk for debugging purpose */ + for (i = 0; i < 256; i++) { + tmp = atomic64_read(&zbewalgo_stat_combination[i]); + if (tmp > 0) + printk(KERN_INFO "%s %4d -> %llu combination\n", + __func__, i, tmp); + } + for (i = 0; i < 256; i++) { + tmp = atomic64_read(&zbewalgo_stat_count[i]); + if (tmp > 0) + printk(KERN_INFO "%s %4d -> %llu counter\n", + __func__, i, tmp); + } +} + +module_init(zbewalgo_mod_init); +module_exit(zbewalgo_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ZBeWalgo Compression Algorithm"); + -- 2.11.0