Subject: [PATCH] crypto: add zbewalgo compression algorithm for zram

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Currently ZRAM uses the compression-algorithms from the crypto-api.
None of the current compression-algorithms in the crypto-api is designed
to compress 4KiB chunks of data efficiently.
This patch adds a new compression-algorithm especially designed for ZRAM,
to compress small pieces of data more efficiently.

Benchmarks:

To obtain reproduceable benchmarks, the datasets were first loaded into
a userspace-program. Than the data is written directly to a clean
zram-partition without any filesystem. Between writing and reading
'sync' and 'echo 3 > /proc/sys/vm/drop_caches' is called. All time
measurements are wall clock times, and the benchmarks are using only
one cpu-core at a time. The new algorithm is compared to all available
compression algorithms from the crypto-api.


- '/dev/zero' (8 GiB) This dataset is used to measure the speed
  limitations for ZRAM. ZRAM filters zero-data internally and does not
  even call the specified compression algorithm.

algorithm | compression   | decompression
 --zram-- | 2913.92 MiB/s | 3116.38 MiB/s


 - 'ecoham' (100 MiB) This dataset is one of the input files for the
  scientific application ECOHAM which runs an ocean simulation. This
  dataset contains a lot of zeros. Where the data is not zero there are
  arrays of floating point values, neighboring float values are likely
  to be similar to each other, allowing high compression ratios.

algorithm | ratio | compression   | decompression
zbewalgo  | 12.77 |  417.35 MiB/s | 1306.21 MiB/s
deflate   | 12.54 |   73.51 MiB/s |  788.85 MiB/s
842       | 12.26 |  199.22 MiB/s |  675.74 MiB/s
lz4hc     | 12.00 |   49.99 MiB/s | 1787.79 MiB/s
lz4       | 10.68 | 1356.77 MiB/s | 1690.34 MiB/s
lzo       |  9.79 | 1363.05 MiB/s | 1640.70 MiB/s


 - 'source-code' (800 MiB) This dataset is a tarball of the source-code
  from a linux-kernel.

algorithm | ratio | compression   | decompression
deflate   |  3.27 |   42.36 MiB/s |  256.22 MiB/s
lz4hc     |  2.40 |   99.62 MiB/s | 1187.15 MiB/s
lzo       |  2.27 |  432.95 MiB/s |  944.74 MiB/s
lz4       |  2.18 |  444.07 MiB/s | 1159.49 MiB/s
zbewalgo  |  1.89 |   43.44 MiB/s |   35.73 MiB/s
842       |  1.65 |   62.73 MiB/s |  149.71 MiB/s


 - 'hpcg' (8 GiB) This dataset is a (partial) memory-snapshot of the
  running hpcg-benchmark. At the time of the snapshot, that application
  performed a sparse matrix - vector multiplication.

algorithm | ratio | compression   | decompression
zbewalgo  | 15.88 |  174.49 MiB/s |  372.03 MiB/s
deflate   |  9.52 |   64.47 MiB/s |  705.95 MiB/s
lz4hc     |  4.96 |  190.97 MiB/s | 1727.96 MiB/s
842       |  4.20 |  144.33 MiB/s |  284.99 MiB/s
lzo       |  4.14 |  910.92 MiB/s | 1272.87 MiB/s
lz4       |  3.79 |  868.92 MiB/s | 1500.85 MiB/s


 - 'partdiff' (8 GiB) Array of double values. Neighbored values are
  similar, but not equal. This array is produced by an partial
  differential equation solver using a Jakobi-implementation.

algorithm | ratio | compression   | decompression
zbewalgo  |  1.17 |  220.36 MiB/s |  678.05 MiB/s
deflate   |  1.02 |   36.87 MiB/s | 1191.34 MiB/s
lzo       |  1.00 | 1727.67 MiB/s | 2103.98 MiB/s
lz4       |  1.00 | 1417.10 MiB/s | 2127.88 MiB/s
lz4hc     |  1.00 |  150.14 MiB/s | 2130.08 MiB/s
842       |  1.00 |   63.99 MiB/s | 2131.95 MiB/s


In the category 'compression ratio', the new algorithm zbewalgo is often
the best choice. The faster algorithms with lower compression ratio are
going to fill the zram-partition faster and finally writing their data
to a backing device - which in turn is much slower than imediately using the
zbewalgo algorithm.

Signed-off-by: Benjamin Warnke <4bwarnke@xxxxxxxxxxxxxxxxxxxxxxxxx>

---
 crypto/Kconfig                   |   8 +
 crypto/Makefile                  |   1 +
 crypto/zbewalgo.c                | 207 +++++++++++++++++++++
 include/linux/zbewalgo.h         |  59 ++++++
 lib/Kconfig                      |   3 +
 lib/Makefile                     |   1 +
 lib/zbewalgo/BWT.h               | 116 ++++++++++++
 lib/zbewalgo/JBE.h               | 135 ++++++++++++++
 lib/zbewalgo/JBE2.h              | 139 ++++++++++++++
 lib/zbewalgo/MTF.h               | 102 ++++++++++
 lib/zbewalgo/Makefile            |   3 +
 lib/zbewalgo/RLE.h               | 134 ++++++++++++++
 lib/zbewalgo/bewalgo.h           | 378 ++++++++++++++++++++++++++++++++++++++
 lib/zbewalgo/bewalgo2.h          | 388 +++++++++++++++++++++++++++++++++++++++
 lib/zbewalgo/bitshuffle.h        | 105 +++++++++++
 lib/zbewalgo/huffman.h           | 232 +++++++++++++++++++++++
 lib/zbewalgo/include.h           | 151 +++++++++++++++
 lib/zbewalgo/settings.h          | 220 ++++++++++++++++++++++
 lib/zbewalgo/zbewalgo_compress.c | 372 +++++++++++++++++++++++++++++++++++++
 19 files changed, 2754 insertions(+)
 create mode 100644 crypto/zbewalgo.c
 create mode 100644 include/linux/zbewalgo.h
 create mode 100644 lib/zbewalgo/BWT.h
 create mode 100644 lib/zbewalgo/JBE.h
 create mode 100644 lib/zbewalgo/JBE2.h
 create mode 100644 lib/zbewalgo/MTF.h
 create mode 100644 lib/zbewalgo/Makefile
 create mode 100644 lib/zbewalgo/RLE.h
 create mode 100644 lib/zbewalgo/bewalgo.h
 create mode 100644 lib/zbewalgo/bewalgo2.h
 create mode 100644 lib/zbewalgo/bitshuffle.h
 create mode 100644 lib/zbewalgo/huffman.h
 create mode 100644 lib/zbewalgo/include.h
 create mode 100644 lib/zbewalgo/settings.h
 create mode 100644 lib/zbewalgo/zbewalgo_compress.c

diff --git a/crypto/Kconfig b/crypto/Kconfig
index b44c0ae04..f63f543e9 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1667,6 +1667,14 @@ config CRYPTO_LZ4
 	help
 	  This is the LZ4 algorithm.
 
+config CRYPTO_ZBEWALGO
+	tristate "ZBeWalgo compression algorithm"
+	select CRYPTO_ALGAPI
+	select CRYPTO_ACOMP2
+	select ZBEWALGO_COMPRESS
+	help
+	  This is the ZBeWalgo algorithm.
+
 config CRYPTO_LZ4HC
 	tristate "LZ4HC compression algorithm"
 	select CRYPTO_ALGAPI
diff --git a/crypto/Makefile b/crypto/Makefile
index cdbc03b35..2a42fb289 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -121,6 +121,7 @@ obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif_common.o crct10dif_generic.o
 obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o
 obj-$(CONFIG_CRYPTO_LZO) += lzo.o
 obj-$(CONFIG_CRYPTO_LZ4) += lz4.o
+obj-$(CONFIG_CRYPTO_ZBEWALGO) += zbewalgo.o
 obj-$(CONFIG_CRYPTO_LZ4HC) += lz4hc.o
 obj-$(CONFIG_CRYPTO_842) += 842.o
 obj-$(CONFIG_CRYPTO_RNG2) += rng.o
diff --git a/crypto/zbewalgo.c b/crypto/zbewalgo.c
new file mode 100644
index 000000000..3578b9b54
--- /dev/null
+++ b/crypto/zbewalgo.c
@@ -0,0 +1,207 @@
+/*
+ * Cryptographic API.
+ *
+ * Copyright (c) 2018 Benjamin Warnke <4bwarnke@xxxxxxxxxxxxxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+#include <crypto/internal/scompress.h>
+#include <linux/crypto.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include "linux/zbewalgo.h"
+
+
+struct zbewalgo_ctx {
+	void *zbewalgo_comp_mem;
+};
+
+static void *zbewalgo_alloc_ctx(struct crypto_scomp *tfm)
+{
+	void *ctx;
+
+	ctx = vmalloc(zbewalgo_get_wrkmem_size());
+	if (!ctx)
+		return ERR_PTR(-ENOMEM);
+	return ctx;
+}
+
+static int zbewalgo_init(struct crypto_tfm *tfm)
+{
+	struct zbewalgo_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	ctx->zbewalgo_comp_mem = zbewalgo_alloc_ctx(NULL);
+	if (IS_ERR(ctx->zbewalgo_comp_mem))
+		return -ENOMEM;
+	return 0;
+}
+
+static void zbewalgo_free_ctx(struct crypto_scomp *tfm, void *ctx)
+{
+	vfree(ctx);
+}
+
+static void zbewalgo_exit(struct crypto_tfm *tfm)
+{
+	struct zbewalgo_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	zbewalgo_free_ctx(NULL, ctx->zbewalgo_comp_mem);
+}
+
+static int __zbewalgo_compress_crypto(
+	const u8 *src,
+	unsigned int slen,
+	u8 *dst,
+	unsigned int *dlen,
+	void *ctx)
+{
+	int out_len;
+	if(slen > 4096)
+		return -EINVAL;
+	out_len = zbewalgo_compress(src, dst, ctx, slen);
+	if (!out_len)
+		return -EINVAL;
+	*dlen = out_len;
+	return 0;
+}
+
+static int zbewalgo_scompress(
+	struct crypto_scomp *tfm,
+	const u8 *src,
+	unsigned int slen,
+	u8 *dst,
+	unsigned int *dlen,
+	void *ctx)
+{
+	return __zbewalgo_compress_crypto(src, slen, dst, dlen, ctx);
+}
+
+static int zbewalgo_compress_crypto(
+	struct crypto_tfm *tfm,
+	const u8 *src,
+	unsigned int slen,
+	u8 *dst,
+	unsigned int *dlen)
+{
+	struct zbewalgo_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	return __zbewalgo_compress_crypto(
+		src,
+		slen,
+		dst,
+		dlen,
+		ctx->zbewalgo_comp_mem);
+}
+
+static int __zbewalgo_decompress_crypto(
+	const u8 *src,
+	unsigned int slen,
+	u8 *dst,
+	unsigned int *dlen,
+	void *ctx)
+{
+	int out_len;
+
+	out_len = zbewalgo_decompress(src, dst, ctx);
+	if (out_len < 0)
+		return -EINVAL;
+	return 0;
+}
+
+static int zbewalgo_sdecompress(
+	struct crypto_scomp *tfm,
+	const u8 *src,
+	unsigned int slen,
+	u8 *dst,
+	unsigned int *dlen,
+	void *ctx)
+{
+	return __zbewalgo_decompress_crypto(src, slen, dst, dlen, ctx);
+}
+
+static int zbewalgo_decompress_crypto(
+	struct crypto_tfm *tfm,
+	const u8 *src,
+	unsigned int slen,
+	u8 *dst,
+	unsigned int *dlen)
+{
+	struct zbewalgo_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	return __zbewalgo_decompress_crypto(
+		src,
+		slen,
+		dst,
+		dlen,
+		ctx->zbewalgo_comp_mem);
+}
+
+static struct crypto_alg crypto_alg_zbewalgo = {
+	.cra_name = "zbewalgo",
+	.cra_flags = CRYPTO_ALG_TYPE_COMPRESS,
+	.cra_ctxsize = sizeof(struct zbewalgo_ctx),
+	.cra_module = THIS_MODULE,
+	.cra_init = zbewalgo_init,
+	.cra_exit = zbewalgo_exit,
+	.cra_u = {
+		.compress = {
+			.coa_compress = zbewalgo_compress_crypto,
+			.coa_decompress = zbewalgo_decompress_crypto
+		}
+	}
+};
+
+static struct scomp_alg scomp = {
+	.alloc_ctx = zbewalgo_alloc_ctx,
+	 .free_ctx = zbewalgo_free_ctx,
+	 .compress = zbewalgo_scompress,
+	 .decompress = zbewalgo_sdecompress,
+	 .base = {
+		 .cra_name = "zbewalgo",
+		 .cra_driver_name = "zbewalgo-scomp",
+		 .cra_module = THIS_MODULE,
+	}
+};
+
+static int __init zbewalgo_mod_init(void)
+{
+	int ret;
+
+	ret = crypto_register_alg(&crypto_alg_zbewalgo);
+	if (ret)
+		return ret;
+	ret = crypto_register_scomp(&scomp);
+	if (ret) {
+		crypto_unregister_alg(&crypto_alg_zbewalgo);
+		return ret;
+	}
+	return ret;
+}
+
+static void __exit zbewalgo_mod_fini(void)
+{
+	crypto_unregister_alg(&crypto_alg_zbewalgo);
+	crypto_unregister_scomp(&scomp);
+}
+
+module_init(zbewalgo_mod_init);
+module_exit(zbewalgo_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ZBeWalgo Compression Algorithm");
+MODULE_ALIAS_CRYPTO("zbewalgo");
diff --git a/include/linux/zbewalgo.h b/include/linux/zbewalgo.h
new file mode 100644
index 000000000..0dd40816e
--- /dev/null
+++ b/include/linux/zbewalgo.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2018 Benjamin Warnke <4bwarnke@xxxxxxxxxxxxxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+#ifndef ZBEWALGO_INCLUDE_H
+#define ZBEWALGO_INCLUDE_H
+
+/*
+ * zbewalgo_get_wrkmem_size must be used to determine the size which is
+ * required for allocating the working memory for the compression and
+ * decompression algorithm
+ */
+int zbewalgo_get_wrkmem_size(void);
+
+/*
+ * this function compresses the data given by 'source' into the
+ * preallocated memory given by 'dest'.
+ * The maximum allowed source_length is 4096 Bytes. If larger values are
+ * given, the resulting data might be corrupt.
+ * If the data is not compressible the function returns -1. Otherwise the
+ * size of the compressed data is returned.
+ * wrkmem must point to a memory region of at least the size returned by
+ * 'zbewalgo_get_wrkmem_size'.
+ */
+int zbewalgo_compress(
+	const uint8_t * const source,
+	uint8_t * const dest,
+	uint8_t * const wrkmem,
+	const uint16_t source_length);
+
+/*
+ * this function decompresses data which was already successfully compressed with
+ * 'zbewalgo_compress'.
+ * the function decompresses the data given by 'source' into the preallocated
+ * buffer 'dest'.
+ * wrkmem must point to a memory region of at least the size returned by
+ * 'zbewalgo_get_wrkmem_size'.
+ * the wrkmem for compression and decompression dont need to be the same
+ */
+int zbewalgo_decompress(
+	const uint8_t * const source,
+	uint8_t * const dest,
+	uint8_t * const wrkmem);
+
+#endif
diff --git a/lib/Kconfig b/lib/Kconfig
index c5e84fbcb..ad72aeacc 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -239,6 +239,9 @@ config LZO_DECOMPRESS
 config LZ4_COMPRESS
 	tristate
 
+config ZBEWALGO_COMPRESS
+	tristate
+
 config LZ4HC_COMPRESS
 	tristate
 
diff --git a/lib/Makefile b/lib/Makefile
index d11c48ec8..a6a65c183 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -120,6 +120,7 @@ obj-$(CONFIG_LZO_DECOMPRESS) += lzo/
 obj-$(CONFIG_LZ4_COMPRESS) += lz4/
 obj-$(CONFIG_LZ4HC_COMPRESS) += lz4/
 obj-$(CONFIG_LZ4_DECOMPRESS) += lz4/
+obj-$(CONFIG_ZBEWALGO_COMPRESS) += zbewalgo/
 obj-$(CONFIG_ZSTD_COMPRESS) += zstd/
 obj-$(CONFIG_ZSTD_DECOMPRESS) += zstd/
 obj-$(CONFIG_XZ_DEC) += xz/
diff --git a/lib/zbewalgo/BWT.h b/lib/zbewalgo/BWT.h
new file mode 100644
index 000000000..e04a60a61
--- /dev/null
+++ b/lib/zbewalgo/BWT.h
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2018 Benjamin Warnke <4bwarnke@xxxxxxxxxxxxxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+#ifndef ZBEWALGO_BWT_H
+#define ZBEWALGO_BWT_H
+
+#include "include.h"
+
+/*
+ * implementation of the Burrows-Wheeler transformation. Optimized for speed
+ * and small input sizes
+ */
+static __always_inline int compress_bwt(
+	const uint8_t * const source,
+	uint8_t * const dest,
+	uint8_t * const wrkmem,
+	const uint16_t source_length)
+{
+	uint16_t * const C = (uint16_t *) wrkmem;
+	uint16_t i;
+	uint16_t alphabet;
+	uint8_t * const op = dest + 3;
+	*dest = source[source_length - 1];
+	*((uint16_t *) (dest + 1)) = source_length;
+	memset(C, 0, 512);
+	for (i = 0; i < source_length; i++)
+		C[source[i]]++;
+	alphabet = (C[0] > 0);
+	for (i = 1; i < 256; i++) {
+		alphabet += (C[i] > 0);
+		C[i] += C[i - 1];
+	}
+	if (alphabet > zbewalgo_bwt_max_alphabet)
+		return -1;
+	i = source_length - 1;
+	C[source[i]]--;
+	op[C[source[i]]] = source[i - 1];
+	i--;
+	while (i > 0) {
+		C[source[i]]--;
+		op[C[source[i]]] = source[i - 1];
+		i--;
+	}
+	C[source[0]]--;
+	op[C[source[0]]] = source[source_length - 1];
+	return source_length + 3;
+}
+
+/*
+ * reverses the transformation
+ */
+static __always_inline int decompress_bwt(
+	const uint8_t * const source,
+	uint8_t * const dest,
+	uint8_t * const wrkmem)
+{
+	const uint16_t dest_length = *((uint16_t *) (source + 1));
+	uint16_t * const C = (uint16_t *) wrkmem;
+	uint8_t * const L = wrkmem + 512;
+	uint8_t key = *source;
+	uint8_t *dest_end = dest + dest_length;
+	const uint8_t *ip = source + 3;
+	int i, j;
+
+	memset(C, 0, 512);
+	for (i = 0; i < dest_length; i++)
+		C[ip[i]]++;
+	for (i = 1; i < 256; i++)
+		C[i] += C[i - 1];
+	j = 0;
+	for (i = 0; i < 256; i++)
+		while (j < C[i])
+			L[j++] = i;
+	do {
+		C[key]--;
+		*--dest_end = L[C[key]];
+		key = ip[C[key]];
+	} while (dest < dest_end);
+	return dest_length;
+}
+
+static int init_bwt(void)
+{
+	return 0;
+}
+
+static void exit_bwt(void)
+{
+}
+
+static struct zbewalgo_alg alg_bwt = {
+	.name = "bwt", //
+	.flags = ZBEWALGO_ALG_FLAG_TRANSFORM, //
+	.wrkmem_size = PAGE_SIZE << 1, //
+	.init = init_bwt, //
+	.exit = exit_bwt, //
+	.compress = compress_bwt, //
+	.decompress = decompress_bwt //
+};
+
+#endif
diff --git a/lib/zbewalgo/JBE.h b/lib/zbewalgo/JBE.h
new file mode 100644
index 000000000..38fc76469
--- /dev/null
+++ b/lib/zbewalgo/JBE.h
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2018 Benjamin Warnke <4bwarnke@xxxxxxxxxxxxxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+#ifndef ZBEWALGO_JBE_H
+#define ZBEWALGO_JBE_H
+
+#include "include.h"
+
+/*
+ * Implementation of the J-Bit encoding as published by
+ * 'I Made Agus Dwi Suarjaya' 2012
+ */
+static __always_inline int compress_jbe(
+	const uint8_t * const source,
+	uint8_t * const dest,
+	uint8_t * const wrkmem __attribute__ ((unused)),
+	const uint16_t source_length)
+{
+	uint64_t source_tmp;
+	uint8_t tmp;
+	const uint64_t *source_end = (uint64_t *) (source + source_length);
+	const uint64_t *ip = (uint64_t *) source;
+	uint8_t *dataI = dest + 2;
+	uint8_t *dataII = dataI + DIV_BY_8_ROUND_UP(source_length);
+	uint8_t * const source_tmp_ptr = (uint8_t *) (&source_tmp);
+	*(uint16_t *) dest = source_length;
+	do {
+		source_tmp = *ip++;
+		tmp = source_tmp_ptr[0] > 0;
+		*dataII = source_tmp_ptr[0];
+		*dataI = tmp << 7;
+		dataII += tmp;
+		tmp = source_tmp_ptr[1] > 0;
+		*dataII = source_tmp_ptr[1];
+		*dataI |= tmp << 6;
+		dataII += tmp;
+		tmp = source_tmp_ptr[2] > 0;
+		*dataII = source_tmp_ptr[2];
+		*dataI |= tmp << 5;
+		dataII += tmp;
+		tmp = source_tmp_ptr[3] > 0;
+		*dataII = source_tmp_ptr[3];
+		*dataI |= tmp << 4;
+		dataII += tmp;
+		tmp = source_tmp_ptr[4] > 0;
+		*dataII = source_tmp_ptr[4];
+		*dataI |= tmp << 3;
+		dataII += tmp;
+		tmp = source_tmp_ptr[5] > 0;
+		*dataII = source_tmp_ptr[5];
+		*dataI |= tmp << 2;
+		dataII += tmp;
+		tmp = source_tmp_ptr[6] > 0;
+		*dataII = source_tmp_ptr[6];
+		*dataI |= tmp << 1;
+		dataII += tmp;
+		tmp = source_tmp_ptr[7] > 0;
+		*dataII = source_tmp_ptr[7];
+		*dataI |= tmp;
+		dataII += tmp;
+		dataI++;
+	} while (ip < source_end);
+	return dataII - dest;
+}
+
+static __always_inline int decompress_jbe(
+	const uint8_t * const source,
+	uint8_t * const dest,
+	uint8_t * const wrkmem __attribute__ ((unused)))
+{
+	uint64_t dest_tmp;
+	const uint16_t dest_length = *(uint16_t *) source;
+	const uint8_t *dataI = source + 2;
+	const uint8_t *dataII = dataI + DIV_BY_8_ROUND_UP(dest_length);
+	const uint64_t *dest_end = (uint64_t *) (dest + dest_length);
+	uint8_t * const dest_tmp_ptr = (uint8_t *) (&dest_tmp);
+	uint64_t *op = (uint64_t *) dest;
+
+	do {
+		dest_tmp_ptr[0] = ((*dataI & 0x80) ? *dataII : 0);
+		dataII += (*dataI & 0x80) > 0;
+		dest_tmp_ptr[1] = ((*dataI & 0x40) ? *dataII : 0);
+		dataII += (*dataI & 0x40) > 0;
+		dest_tmp_ptr[2] = ((*dataI & 0x20) ? *dataII : 0);
+		dataII += (*dataI & 0x20) > 0;
+		dest_tmp_ptr[3] = ((*dataI & 0x10) ? *dataII : 0);
+		dataII += (*dataI & 0x10) > 0;
+		dest_tmp_ptr[4] = ((*dataI & 0x08) ? *dataII : 0);
+		dataII += (*dataI & 0x08) > 0;
+		dest_tmp_ptr[5] = ((*dataI & 0x04) ? *dataII : 0);
+		dataII += (*dataI & 0x04) > 0;
+		dest_tmp_ptr[6] = ((*dataI & 0x02) ? *dataII : 0);
+		dataII += (*dataI & 0x02) > 0;
+		dest_tmp_ptr[7] = ((*dataI & 0x01) ? *dataII : 0);
+		dataII += (*dataI & 0x01) > 0;
+		dataI++;
+		*op++ = dest_tmp;
+	} while (op < dest_end);
+	return dest_length;
+}
+
+static int init_jbe(void)
+{
+	return 0;
+}
+
+static void exit_jbe(void)
+{
+}
+
+static struct zbewalgo_alg alg_jbe = {
+	.name = "jbe",
+	.flags = ZBEWALGO_ALG_FLAG_COMPRESS,
+	.wrkmem_size = 0,
+	.init = init_jbe,
+	.exit = exit_jbe,
+	.compress = compress_jbe,
+	.decompress = decompress_jbe
+};
+#endif
diff --git a/lib/zbewalgo/JBE2.h b/lib/zbewalgo/JBE2.h
new file mode 100644
index 000000000..346a470bd
--- /dev/null
+++ b/lib/zbewalgo/JBE2.h
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2018 Benjamin Warnke <4bwarnke@xxxxxxxxxxxxxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+#ifndef ZBEWALGO_JBE2_H
+#define ZBEWALGO_JBE2_H
+
+#include "include.h"
+
+/*
+ * Variant of the J-Bit encoding published by 'I Made Agus Dwi Suarjaya' 2012
+ */
+static __always_inline int compress_jbe2(
+	const uint8_t * const source,
+	uint8_t * const dest,
+	uint8_t * const wrkmem __attribute__ ((unused)),
+	const uint16_t source_length)
+{
+	uint64_t source_tmp;
+	uint8_t tmp;
+	const uint64_t *source_end = (uint64_t *) (source + source_length);
+	const uint64_t *ip = (uint64_t *) source;
+	uint8_t *dataI = dest + 2;
+	uint8_t *dataII = dataI + DIV_BY_8_ROUND_UP(source_length);
+	uint8_t * const source_tmp_ptr = (uint8_t *) (&source_tmp);
+	*(uint16_t *) dest = source_length;
+	do {
+		source_tmp = *ip++;
+		source_tmp = (source_tmp & 0xF0F0F0F00F0F0F0F)
+			  | ((source_tmp & 0x0F0F0F0F00000000) >> 28)
+			  | ((source_tmp & 0x00000000F0F0F0F0) << 28);
+		tmp = source_tmp_ptr[0] > 0;
+		*dataII = source_tmp_ptr[0];
+		*dataI = tmp << 7;
+		dataII += tmp;
+		tmp = source_tmp_ptr[1] > 0;
+		*dataII = source_tmp_ptr[1];
+		*dataI |= tmp << 6;
+		dataII += tmp;
+		tmp = source_tmp_ptr[2] > 0;
+		*dataII = source_tmp_ptr[2];
+		*dataI |= tmp << 5;
+		dataII += tmp;
+		tmp = source_tmp_ptr[3] > 0;
+		*dataII = source_tmp_ptr[3];
+		*dataI |= tmp << 4;
+		dataII += tmp;
+		tmp = source_tmp_ptr[4] > 0;
+		*dataII = source_tmp_ptr[4];
+		*dataI |= tmp << 3;
+		dataII += tmp;
+		tmp = source_tmp_ptr[5] > 0;
+		*dataII = source_tmp_ptr[5];
+		*dataI |= tmp << 2;
+		dataII += tmp;
+		tmp = source_tmp_ptr[6] > 0;
+		*dataII = source_tmp_ptr[6];
+		*dataI |= tmp << 1;
+		dataII += tmp;
+		tmp = source_tmp_ptr[7] > 0;
+		*dataII = source_tmp_ptr[7];
+		*dataI |= tmp;
+		dataII += tmp;
+		dataI++;
+	} while (ip < source_end);
+	return dataII - dest;
+}
+
+static __always_inline int decompress_jbe2(
+	const uint8_t * const source,
+	uint8_t * const dest,
+	uint8_t * const wrkmem __attribute__ ((unused)))
+{
+	uint64_t dest_tmp;
+	const uint16_t dest_length = *(uint16_t *) source;
+	const uint8_t *dataI = source + 2;
+	const uint8_t *dataII = dataI + DIV_BY_8_ROUND_UP(dest_length);
+	const uint64_t *dest_end = (uint64_t *) (dest + dest_length);
+	uint8_t * const dest_tmp_ptr = (uint8_t *) (&dest_tmp);
+	uint64_t *op = (uint64_t *) dest;
+
+	do {
+		dest_tmp_ptr[0] = ((*dataI & 0x80) ? *dataII : 0);
+		dataII += (*dataI & 0x80) > 0;
+		dest_tmp_ptr[1] = ((*dataI & 0x40) ? *dataII : 0);
+		dataII += (*dataI & 0x40) > 0;
+		dest_tmp_ptr[2] = ((*dataI & 0x20) ? *dataII : 0);
+		dataII += (*dataI & 0x20) > 0;
+		dest_tmp_ptr[3] = ((*dataI & 0x10) ? *dataII : 0);
+		dataII += (*dataI & 0x10) > 0;
+		dest_tmp_ptr[4] = ((*dataI & 0x08) ? *dataII : 0);
+		dataII += (*dataI & 0x08) > 0;
+		dest_tmp_ptr[5] = ((*dataI & 0x04) ? *dataII : 0);
+		dataII += (*dataI & 0x04) > 0;
+		dest_tmp_ptr[6] = ((*dataI & 0x02) ? *dataII : 0);
+		dataII += (*dataI & 0x02) > 0;
+		dest_tmp_ptr[7] = ((*dataI & 0x01) ? *dataII : 0);
+		dataII += (*dataI & 0x01) > 0;
+		dataI++;
+		dest_tmp = (dest_tmp & 0xF0F0F0F00F0F0F0F)
+			| ((dest_tmp & 0x0F0F0F0F00000000) >> 28)
+			| ((dest_tmp & 0x00000000F0F0F0F0) << 28);
+		*op++ = dest_tmp;
+	} while (op < dest_end);
+	return dest_length;
+}
+static int init_jbe2(void)
+{
+	return 0;
+}
+
+static void exit_jbe2(void)
+{
+}
+
+static struct zbewalgo_alg alg_jbe2 = {
+	.name = "jbe2",
+	.flags = ZBEWALGO_ALG_FLAG_COMPRESS,
+	.wrkmem_size = 0,
+	.init = init_jbe2,
+	.exit = exit_jbe2,
+	.compress = compress_jbe2,
+	.decompress = decompress_jbe2
+};
+#endif
diff --git a/lib/zbewalgo/MTF.h b/lib/zbewalgo/MTF.h
new file mode 100644
index 000000000..bd8af364c
--- /dev/null
+++ b/lib/zbewalgo/MTF.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2018 Benjamin Warnke <4bwarnke@xxxxxxxxxxxxxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+#ifndef ZBEWALGO_MTF_H
+#define ZBEWALGO_MTF_H
+
+#include "include.h"
+
+/*
+ * Move To Front encoding
+ */
+static __always_inline int compress_mtf(
+	const uint8_t * const source,
+	uint8_t * const dest,
+	uint8_t * const wrkmem,
+	const uint16_t source_length)
+{
+	const uint8_t *source_end = source + source_length;
+	const uint8_t *ip = source;
+	uint8_t *op = dest + 2;
+	uint16_t i;
+	uint8_t tmp;
+	*(uint16_t *) dest = source_length;
+	for (i = 0; i < 256; i++)
+		wrkmem[i] = i;
+	do {
+		i = 0;
+		while (wrkmem[i] != *ip)
+			i++;
+		ip++;
+		*op++ = i;
+		tmp = wrkmem[i];
+		while (i > 0) {
+			wrkmem[i] = wrkmem[i - 1];
+			i--;
+		}
+		wrkmem[0] = tmp;
+	} while (ip < source_end);
+	return source_length + 2;
+}
+
+static __always_inline int decompress_mtf(
+	const uint8_t * const source,
+	uint8_t * const dest,
+	uint8_t * const wrkmem)
+{
+	const uint16_t dest_length = *(uint16_t *) source;
+	uint8_t *dest_end = dest + dest_length;
+	uint16_t i;
+	uint8_t tmp;
+	const uint8_t *ip = source + 2;
+	uint8_t *op = dest;
+
+	for (i = 0; i < 256; i++)
+		wrkmem[i] = i;
+	do {
+		i = *ip++;
+		*op++ = wrkmem[i];
+		tmp = wrkmem[i];
+		while (i > 0) {
+			wrkmem[i] = wrkmem[i - 1];
+			i--;
+		}
+		wrkmem[0] = tmp;
+	} while (op < dest_end);
+	return dest_length;
+}
+
+static int init_mtf(void)
+{
+	return 0;
+}
+
+static void exit_mtf(void)
+{
+}
+
+static struct zbewalgo_alg alg_mtf = {
+	.name = "mtf", //
+	.flags = ZBEWALGO_ALG_FLAG_TRANSFORM, //
+	.wrkmem_size = 1 << 8, //
+	.init = init_mtf, //
+	.exit = exit_mtf, //
+	.compress = compress_mtf, //
+	.decompress = decompress_mtf //
+};
+#endif
diff --git a/lib/zbewalgo/Makefile b/lib/zbewalgo/Makefile
new file mode 100644
index 000000000..0ab2159fc
--- /dev/null
+++ b/lib/zbewalgo/Makefile
@@ -0,0 +1,3 @@
+ccflags-y += -O3 -fno-tree-vrp
+
+obj-$(CONFIG_ZBEWALGO_COMPRESS) += zbewalgo_compress.o
diff --git a/lib/zbewalgo/RLE.h b/lib/zbewalgo/RLE.h
new file mode 100644
index 000000000..7ecd284bf
--- /dev/null
+++ b/lib/zbewalgo/RLE.h
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2018 Benjamin Warnke <4bwarnke@xxxxxxxxxxxxxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+#ifndef ZBEWALGO_RLE_H
+#define ZBEWALGO_RLE_H
+
+#include "include.h"
+
+#define RLE_REPEAT 0x80
+#define RLE_SIMPLE 0x00
+#define RLE_MAX_LENGTH ((1 << 7) - 1)
+
+
+/*
+ * Run Length Encoder
+ */
+static __always_inline int compress_rle(
+	const uint8_t * const source,
+	uint8_t * const dest,
+	uint8_t * const wrkmem __attribute__ ((unused)),
+	const uint16_t source_length)
+{
+	const uint8_t *anchor = source;
+	const uint8_t *source_end = source + source_length;
+	unsigned int count;
+	uint8_t lastval;
+	uint8_t *op = dest + 2;
+	const uint8_t *ip = source;
+	*((uint16_t *) dest) = source_length;
+	while (1) {
+		// RLE_SIMPLE
+		do {
+			lastval = *ip++;
+		} while ((ip < source_end) && (lastval != *ip));
+		count = ip - anchor - (ip < source_end);
+		if (count > 0) {
+			while (count > RLE_MAX_LENGTH) {
+				*op++ = RLE_SIMPLE | RLE_MAX_LENGTH;
+				memcpy(op, anchor, RLE_MAX_LENGTH + 1);
+				anchor += RLE_MAX_LENGTH + 1;
+				op += RLE_MAX_LENGTH + 1;
+				count -= RLE_MAX_LENGTH + 1;
+			}
+			if (count > 0) {
+				*op++ = RLE_SIMPLE | (count - 1);
+				memcpy(op, anchor, count);
+				anchor += count;
+				op += count;
+			}
+		}
+		if (ip == source_end)
+			return op - dest;
+		// RLE_REPEAT
+		do {
+			lastval = *ip++;
+		} while ((ip < source_end) && (lastval == *ip));
+		count = ip - anchor;
+		if (count > 0) {
+			anchor += count;
+			while (count > RLE_MAX_LENGTH) {
+				*op++ = RLE_REPEAT | RLE_MAX_LENGTH;
+				*op++ = lastval;
+				count -= RLE_MAX_LENGTH + 1;
+			}
+			if (count > 0) {
+				*op++ = RLE_REPEAT | (count - 1);
+				*op++ = lastval;
+			}
+		}
+		if (ip == source_end)
+			return op - dest;
+	}
+}
+
+static __always_inline int decompress_rle(
+	const uint8_t * const source,
+	uint8_t * const dest,
+	uint8_t * const wrkmem __attribute__ ((unused)))
+{
+	const uint16_t source_length = *((uint16_t *) source);
+	const uint8_t *dest_end = dest + source_length;
+	unsigned int length;
+	const uint8_t *ip = source + 2;
+	uint8_t *op = dest;
+
+	do {
+		if ((*ip & RLE_REPEAT) == RLE_REPEAT) {
+			length = *ip++ - RLE_REPEAT + 1;
+			memset(op, *ip++, length);
+			op += length;
+		} else {
+			length = *ip++ - RLE_SIMPLE + 1;
+			memcpy(op, ip, length);
+			ip += length;
+			op += length;
+		}
+	} while (op < dest_end);
+	return op - dest;
+}
+
+static int init_rle(void)
+{
+	return 0;
+}
+
+static void exit_rle(void)
+{
+}
+
+static struct zbewalgo_alg alg_rle = {
+	.name = "rle",
+	.flags = ZBEWALGO_ALG_FLAG_COMPRESS,
+	.wrkmem_size = 0,
+	.init = init_rle,
+	.exit = exit_rle,
+	.compress = compress_rle,
+	.decompress = decompress_rle
+};
+#endif
diff --git a/lib/zbewalgo/bewalgo.h b/lib/zbewalgo/bewalgo.h
new file mode 100644
index 000000000..80b6e9da9
--- /dev/null
+++ b/lib/zbewalgo/bewalgo.h
@@ -0,0 +1,378 @@
+/*
+ * Copyright (c) 2018 Benjamin Warnke <4bwarnke@xxxxxxxxxxxxxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+#ifndef ZBEWALGO_BEWALGO_H
+#define ZBEWALGO_BEWALGO_H
+
+#include "include.h"
+
+#define BEWALGO_ACCELERATION_DEFAULT 1
+
+#define BEWALGO_MEMORY_USAGE 14
+
+#define BEWALGO_SKIPTRIGGER 6
+
+#define BEWALGO_LENGTH_BITS 8
+#define BEWALGO_LENGTH_MAX ((1 << BEWALGO_LENGTH_BITS) - 1)
+#define BEWALGO_OFFSET_BITS 16
+#define BEWALGO_OFFSET_MAX ((1 << BEWALGO_OFFSET_BITS) - 1)
+
+#ifndef MIN
+#define MIN(a, b) (((a) < (b)) ? (a) : (b))
+#endif
+
+#define BEWALGO_HASHLOG (BEWALGO_MEMORY_USAGE - 2)
+#define BEWALGO_HASH_SIZE_uint32_t (1 << BEWALGO_HASHLOG)
+struct bewalgo_compress_internal {
+	uint32_t table[BEWALGO_HASH_SIZE_uint32_t];
+};
+
+#define bewalgo_copy_helper_src(dest, src, target) \
+do { \
+	while ((src) < (target) - 3) { \
+		(dest)[0] = (src)[0]; \
+		(dest)[1] = (src)[1]; \
+		(dest)[2] = (src)[2]; \
+		(dest)[3] = (src)[3]; \
+		(dest) += 4; \
+		(src) += 4; \
+	} \
+	if ((src) < (target) - 1) { \
+		(dest)[0] = (src)[0]; \
+		(dest)[1] = (src)[1]; \
+		(dest) += 2; \
+		(src) += 2; \
+	} \
+	if ((src) < (target)) \
+		*(dest)++ = *(src)++; \
+} while (0)
+
+static __always_inline int decompress_bewalgo(
+	const uint8_t * const source_,
+	uint8_t * const dest_,
+	uint8_t * const wrkmem __attribute__ ((unused)))
+{
+	const uint64_t * const source = (const uint64_t *) source_;
+	uint64_t * const dest = (uint64_t *) dest_;
+	const uint64_t *ip;
+	uint64_t *match;
+	uint64_t *op;
+	const uint64_t *dest_end_ptr;
+	const uint8_t *controll_block_ptr;
+	const uint64_t *target;
+	uint32_t to_read;
+	uint32_t avail;
+	const uint16_t dest_length = *(uint16_t *) source;
+	const uint32_t dest_end = DIV_BY_8_ROUND_UP(dest_length);
+
+	ip = (uint64_t *) (((uint8_t *) source) + 2);
+	controll_block_ptr = (uint8_t *) ip;
+	match = op = dest;
+	dest_end_ptr = dest_end + dest;
+	do {
+		if (unlikely(dest_end_ptr <= op))
+			goto _last_control_block;
+		controll_block_ptr = (uint8_t *) ip;
+		ip++;
+		target = ip + controll_block_ptr[0];
+		bewalgo_copy_helper_src(op, ip, target);
+		match = op - ((uint16_t *) controll_block_ptr)[1];
+		target = match + controll_block_ptr[1];
+		bewalgo_copy_helper_src(op, match, target);
+		target = ip + controll_block_ptr[4];
+		bewalgo_copy_helper_src(op, ip, target);
+		match = op - ((uint16_t *) controll_block_ptr)[3];
+		target = match + controll_block_ptr[5];
+		bewalgo_copy_helper_src(op, match, target);
+	} while (1);
+_last_control_block:
+	to_read = controll_block_ptr[0];
+	avail = dest_end_ptr - op;
+	target = ip + MIN(to_read, avail);
+	bewalgo_copy_helper_src(op, ip, target);
+	match = op - ((uint16_t *) controll_block_ptr)[1];
+	to_read = controll_block_ptr[1];
+	avail = dest_end_ptr - op;
+	target = match + MIN(to_read, avail);
+	bewalgo_copy_helper_src(op, match, target);
+	to_read = controll_block_ptr[4];
+	avail = dest_end_ptr - op;
+	target = ip + MIN(to_read, avail);
+	bewalgo_copy_helper_src(op, ip, target);
+	match = op - ((uint16_t *) controll_block_ptr)[3];
+	to_read = controll_block_ptr[5];
+	avail = dest_end_ptr - op;
+	target = match + MIN(to_read, avail);
+	bewalgo_copy_helper_src(op, match, target);
+	return (op - dest) << 3;
+}
+
+static __always_inline uint32_t bewalgo_compress_hash(uint64_t sequence)
+{
+	return (uint32_t) (
+		((sequence >> 24) * 11400714785074694791ULL)
+		 >> (64 - BEWALGO_HASHLOG));
+}
+
+static __always_inline int compress_bewalgo_(
+	struct bewalgo_compress_internal *wrkmem,
+	const uint64_t * const source,
+	uint64_t * const dest,
+	const uint16_t source_length,
+	uint8_t acceleration)
+{
+	const int acceleration_start =
+		(acceleration < 4 ? 32 >> acceleration : 4);
+	const uint64_t * const dest_end_ptr =
+		dest + zbewalgo_max_output_size;
+	const uint32_t source_end =
+		(source_length >> 3) + ((source_length & 7) > 0);
+	const uint64_t * const source_end_ptr = source + source_end;
+	const uint64_t * const source_end_ptr_1 = source_end_ptr - 1;
+	const uint64_t *match = source;
+	const uint64_t *anchor = source;
+	const uint64_t *ip = source;
+	uint64_t *op = (uint64_t *) (((uint8_t *) dest) + 2);
+	uint8_t *op_control = NULL;
+	uint32_t op_control_available = 0;
+	const uint64_t *target;
+	int length;
+	uint16_t offset;
+	uint32_t h;
+	int j;
+	int tmp_literal_length;
+	int match_nodd;
+	int match_nzero_nodd;
+	*(uint16_t *) dest = source_length;
+	memset(wrkmem, 0, sizeof(struct bewalgo_compress_internal));
+	h = bewalgo_compress_hash(*ip);
+	wrkmem->table[h] = 0;
+	do {
+		j = acceleration_start;
+		length = source_end_ptr_1 - ip;
+		j = j < length ? j : length;
+		for (length = 1; length <= j; length++) {
+			ip++;
+			h = bewalgo_compress_hash(*ip);
+			match = source + wrkmem->table[h];
+			wrkmem->table[h] = ip - source;
+			if (*(uint64_t *) match == *(uint64_t *) ip)
+				goto _find_match_left;
+		}
+		length = acceleration_start
+			+ (acceleration << BEWALGO_SKIPTRIGGER);
+		ip++;
+		do {
+			if (unlikely(ip >= source_end_ptr))
+				goto _encode_last_literal;
+			h = bewalgo_compress_hash(*ip);
+			match = source + wrkmem->table[h];
+			wrkmem->table[h] = ip - source;
+			if (*(uint64_t *) match == *(uint64_t *) ip)
+				goto _find_match_left;
+			ip += (length++ >> BEWALGO_SKIPTRIGGER);
+		} while (1);
+_find_match_left:
+		while ((match != source) && (match[-1] == ip[-1])) {
+			ip--;
+			match--;
+			if (ip == anchor)
+				goto _find_match_right;
+		}
+		length = ip - anchor;
+		tmp_literal_length = length
+			- (op_control_available ? BEWALGO_LENGTH_MAX : 0);
+		if (unlikely(op
+			 + ((tmp_literal_length / (BEWALGO_LENGTH_MAX * 2)))
+			 + ((tmp_literal_length % (BEWALGO_LENGTH_MAX * 2)) > 0)
+			 + length > dest_end_ptr)) {
+			goto _error;
+		}
+		while (length > BEWALGO_LENGTH_MAX) {
+			if (op_control_available == 0) {
+				op_control = (uint8_t *) op;
+				*op++ = 0;
+			}
+			op_control_available = !op_control_available;
+			*op_control = BEWALGO_LENGTH_MAX;
+			op_control += 4;
+			target = anchor + BEWALGO_LENGTH_MAX;
+			bewalgo_copy_helper_src(op, anchor, target);
+			length -= BEWALGO_LENGTH_MAX;
+		}
+		if (likely(length > 0)) {
+			if (op_control_available == 0) {
+				op_control = (uint8_t *) op;
+				*op++ = 0;
+			}
+			op_control_available = !op_control_available;
+			*op_control = length;
+			op_control += 4;
+			bewalgo_copy_helper_src(op, anchor, ip);
+		}
+_find_match_right:
+		do {
+			ip++;
+			match++;
+		} while ((ip < source_end_ptr) && (*match == *ip));
+		length = ip - anchor;
+		offset = ip - match;
+		anchor = ip;
+		if (length > BEWALGO_LENGTH_MAX) {
+			uint32_t control_match_value =
+				(BEWALGO_LENGTH_MAX << 8) | (offset << 16);
+			size_t match_length_div_255 = length / 255;
+			size_t match_length_mod_255 = length % 255;
+			uint32_t match_zero = match_length_mod_255 == 0;
+			uint32_t match_nzero = !match_zero;
+			int control_blocks_needed = match_length_div_255
+				+ match_nzero
+				- op_control_available;
+			if (unlikely((op
+					+ (control_blocks_needed >> 1)
+					+ (control_blocks_needed & 1)
+					) > dest_end_ptr))
+				goto _error;
+			op_control = op_control_available > 0
+				? op_control
+				: (uint8_t *) op;
+			*((uint32_t *) op_control) = control_match_value;
+			match_length_div_255 -= op_control_available > 0;
+			match_nodd = !(match_length_div_255 & 1);
+			match_nzero_nodd = match_nzero && match_nodd;
+			if (match_length_div_255 > 0) {
+				uint64_t control_match_value_long =
+					control_match_value
+					| (((uint64_t) control_match_value)
+						<< 32);
+				target = op
+					+ (match_length_div_255 >> 1)
+					+ (match_length_div_255 & 1);
+				do {
+					*op++ = control_match_value_long;
+				} while (op < target);
+			}
+			op_control = ((uint8_t *) op) - 4;
+			*(uint32_t *) (op_control
+				+ (match_nzero_nodd << 3)) = 0;
+			*(uint32_t *) (op_control
+				+ (match_nzero_nodd << 2)) = 0;
+			*(op_control + (match_nzero_nodd << 2) + 1) =
+				(match_zero & match_nodd)
+				 ? BEWALGO_LENGTH_MAX
+				 : match_length_mod_255;
+			*(uint16_t *) (op_control
+				+ (match_nzero_nodd << 2) + 2) = offset;
+			op_control += match_nzero_nodd << 3;
+			op += match_nzero_nodd;
+			op_control_available =
+				(match_length_div_255 & 1) == match_zero;
+		} else {
+			if (unlikely((op_control_available == 0)
+				&& (op >= dest_end_ptr)
+				&& (op_control[-3] != 0)))
+				goto _error;
+			if (op_control[-3] != 0) {
+				if (op_control_available == 0) {
+					op_control = (uint8_t *) op;
+					*op++ = 0;
+				}
+				op_control_available = !op_control_available;
+				op_control += 4;
+			}
+			op_control[-3] = length;
+			((uint16_t *) op_control)[-1] = offset;
+		}
+		if (unlikely(ip == source_end_ptr))
+			goto _finish;
+		h = bewalgo_compress_hash(*ip);
+		match = source + wrkmem->table[h];
+		wrkmem->table[h] = ip - source;
+		if (*(uint64_t *) match == *(uint64_t *) ip)
+			goto _find_match_right;
+	} while (1);
+_encode_last_literal:
+	length = source_end_ptr - anchor;
+	tmp_literal_length = length
+		- (op_control_available ? BEWALGO_LENGTH_MAX : 0);
+	if (op
+		+ ((tmp_literal_length / (BEWALGO_LENGTH_MAX * 2)))
+		+ ((tmp_literal_length % (BEWALGO_LENGTH_MAX * 2)) > 0)
+		+ length > dest_end_ptr)
+		goto _error;
+	while (length > BEWALGO_LENGTH_MAX) {
+		if (op_control_available == 0) {
+			op_control = (uint8_t *) op;
+			*op++ = 0;
+		}
+		op_control_available = !op_control_available;
+		*op_control = BEWALGO_LENGTH_MAX;
+		op_control += 4;
+		target = anchor + BEWALGO_LENGTH_MAX;
+		bewalgo_copy_helper_src(op, anchor, target);
+		length -= BEWALGO_LENGTH_MAX;
+	}
+	if (length > 0) {
+		if (op_control_available == 0) {
+			op_control = (uint8_t *) op;
+			*op++ = 0;
+		}
+		op_control_available = !op_control_available;
+		*op_control = length;
+		op_control += 4;
+		bewalgo_copy_helper_src(op, anchor, source_end_ptr);
+	}
+_finish:
+	return ((uint8_t *) op) - ((uint8_t *) dest);
+_error:
+	return -1;
+}
+
+static __always_inline int compress_bewalgo(
+	const uint8_t * const source,
+	uint8_t * const dest,
+	uint8_t * const wrkmem,
+	const uint16_t source_length)
+{
+	return compress_bewalgo_(
+		(struct bewalgo_compress_internal *) wrkmem,
+		(const uint64_t *) source,
+		(uint64_t *) dest,
+		source_length, 1);
+}
+
+static int init_bewalgo(void)
+{
+	return 0;
+}
+
+static void exit_bewalgo(void)
+{
+}
+
+static struct zbewalgo_alg alg_bewalgo = {
+	.name = "bewalgo",
+	.flags = ZBEWALGO_ALG_FLAG_COMPRESS,
+	.wrkmem_size = sizeof(struct bewalgo_compress_internal),
+	.init = init_bewalgo,
+	.exit = exit_bewalgo,
+	.compress = compress_bewalgo,
+	.decompress = decompress_bewalgo
+};
+
+#endif
diff --git a/lib/zbewalgo/bewalgo2.h b/lib/zbewalgo/bewalgo2.h
new file mode 100644
index 000000000..64367f24e
--- /dev/null
+++ b/lib/zbewalgo/bewalgo2.h
@@ -0,0 +1,388 @@
+/*
+ * Copyright (c) 2018 Benjamin Warnke <4bwarnke@xxxxxxxxxxxxxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+#ifndef BEWALGO2_BEWALGO2_H
+#define BEWALGO2_BEWALGO2_H
+
+#include "include.h"
+#define MAX_LITERALS ((zbewalgo_max_output_size >> 3) - 4)
+
+#define OFFSET_BITS 9
+#define OFFSET_SHIFT (16 - OFFSET_BITS)
+#define MATCH_BIT_SHIFT 6
+#define MATCH_BIT_MASK (1 << MATCH_BIT_SHIFT)
+#define LENGTH_BITS 6
+#define LENGTH_MASK ((1 << LENGTH_BITS) - 1)
+#define LEFT 0
+#define RIGHT 1
+#define NEITHER 2
+#define TREE_NODE_NULL 0xffff
+
+
+/*
+ * insert first node into an empty avl-tree
+ * the function returns the index of the node in the preallocated array
+ */
+static __always_inline int avl_insert_first(
+	uint64_t *wrk_literal,
+	uint16_t *wrk_tree,
+	uint16_t *treep,
+	uint64_t target,
+	uint16_t *treesize)
+{
+	uint16_t g_a_tree, g_o_tree2;
+
+	g_a_tree = *treesize;
+	g_o_tree2 = g_a_tree << 2;
+	*treesize = *treesize + 1;
+	wrk_tree[g_o_tree2 + 0] = TREE_NODE_NULL;
+	wrk_tree[g_o_tree2 + 1] = TREE_NODE_NULL;
+	wrk_tree[g_o_tree2 + 2] = NEITHER;
+	wrk_literal[g_a_tree] = target;
+	*treep = g_a_tree;
+	return g_a_tree;
+}
+
+/*
+ * insert a node into a non-empty avl-tree
+ * for speed, the nodes are saved in preallocated arrays
+ * the function returns the index of the node in the preallocated array
+ */
+static __always_inline int avl_insert(
+	uint64_t *wrk_literal,
+	uint16_t *wrk_tree,
+	uint16_t *treep,
+	uint64_t target,
+	uint16_t *treesize)
+{
+	uint16_t *path_top[2];
+	uint16_t g_a_tree, g_o_tree2, g_o_next_step;
+	uint16_t r_1_arr[10];
+	uint16_t path, path2, first[2], second;
+
+	if (unlikely(target == wrk_literal[*treep]))
+		return *treep;
+	path_top[0] = treep;
+	g_o_next_step = target > wrk_literal[*treep];
+	g_o_tree2 = *treep << 2;
+	path_top[wrk_tree[g_o_tree2 + 2] == NEITHER] = treep;
+	treep = &wrk_tree[g_o_tree2 + g_o_next_step];
+	if (unlikely(*treep == TREE_NODE_NULL))
+		goto _insert_new_node;
+_search_node:
+	if (target == wrk_literal[*treep])
+		return *treep;
+	g_o_next_step = target > wrk_literal[*treep];
+	g_o_tree2 = *treep << 2;
+	path_top[wrk_tree[g_o_tree2 + 2] == NEITHER] = treep;
+	treep = &wrk_tree[g_o_tree2 + g_o_next_step];
+	if (likely(*treep != TREE_NODE_NULL))
+		goto _search_node;
+_insert_new_node:
+	g_a_tree = *treesize;
+	g_o_tree2 = g_a_tree << 2;
+	*treesize = *treesize + 1;
+	wrk_tree[g_o_tree2 + 0] = TREE_NODE_NULL;
+	wrk_tree[g_o_tree2 + 1] = TREE_NODE_NULL;
+	wrk_tree[g_o_tree2 + 2] = NEITHER;
+	wrk_literal[g_a_tree] = target;
+	*treep = g_a_tree;
+	path = *path_top[0];
+	path2 = path << 2;
+	if (wrk_tree[path2 + 2] == NEITHER) {
+		while (1) {
+			r_1_arr[0] = target > wrk_literal[path];
+			wrk_tree[path2 + 2] = r_1_arr[0];
+			path = wrk_tree[path2 + r_1_arr[0]];
+			if (target == wrk_literal[path])
+				return g_a_tree;
+			path2 = path << 2;
+		}
+	}
+	first[0] = target > wrk_literal[path];
+	if (wrk_tree[path2 + 2] != first[0]) {
+		wrk_tree[path2 + 2] = NEITHER;
+		r_1_arr[2] = wrk_tree[path2 + first[0]];
+		if (target == wrk_literal[r_1_arr[2]])
+			return g_a_tree;
+		while (1) {
+			r_1_arr[0] = target > wrk_literal[r_1_arr[2]];
+			r_1_arr[1] = r_1_arr[2] << 2;
+			r_1_arr[2] = wrk_tree[r_1_arr[1] + r_1_arr[0]];
+			wrk_tree[r_1_arr[1] + 2] = r_1_arr[0];
+			if (target == wrk_literal[r_1_arr[2]])
+				return g_a_tree;
+		}
+	}
+	second = target > wrk_literal[wrk_tree[path2 + first[0]]];
+	first[1] = 1 - first[0];
+	if (first[0] == second) {
+		r_1_arr[2] = *path_top[0];
+		r_1_arr[3] = r_1_arr[2] << 2;
+		r_1_arr[4] = wrk_tree[r_1_arr[3] + first[0]];
+		r_1_arr[5] = r_1_arr[4] << 2;
+		wrk_tree[r_1_arr[3] + first[0]] =
+			wrk_tree[r_1_arr[5] + first[1]];
+		path = wrk_tree[r_1_arr[5] + first[0]];
+		*path_top[0] = r_1_arr[4];
+		wrk_tree[r_1_arr[5] + first[1]] = r_1_arr[2];
+		wrk_tree[r_1_arr[3] + 2] = NEITHER;
+		wrk_tree[r_1_arr[5] + 2] = NEITHER;
+		if (target == wrk_literal[path])
+			return g_a_tree;
+		while (1) {
+			r_1_arr[0] = target > wrk_literal[path];
+			r_1_arr[1] = path << 2;
+			wrk_tree[r_1_arr[1] + 2] = r_1_arr[0];
+			path = wrk_tree[r_1_arr[1] + r_1_arr[0]];
+			if (target == wrk_literal[path])
+				return g_a_tree;
+		}
+	}
+	path = wrk_tree[(wrk_tree[path2 + first[0]] << 2) + second];
+	r_1_arr[5] = *path_top[0];
+	r_1_arr[1] = r_1_arr[5] << 2;
+	r_1_arr[8] = wrk_tree[r_1_arr[1] + first[0]];
+	r_1_arr[0] = r_1_arr[8] << 2;
+	r_1_arr[6] = wrk_tree[r_1_arr[0] + first[1]];
+	r_1_arr[7] = r_1_arr[6] << 2;
+	r_1_arr[2] = wrk_tree[r_1_arr[7] + first[1]];
+	r_1_arr[3] = wrk_tree[r_1_arr[7] + first[0]];
+	*path_top[0] = r_1_arr[6];
+	wrk_tree[r_1_arr[7] + first[1]] = r_1_arr[5];
+	wrk_tree[r_1_arr[7] + first[0]] = r_1_arr[8];
+	wrk_tree[r_1_arr[1] + first[0]] = r_1_arr[2];
+	wrk_tree[r_1_arr[0] + first[1]] = r_1_arr[3];
+	wrk_tree[r_1_arr[7] + 2] = NEITHER;
+	wrk_tree[r_1_arr[1] + 2] = NEITHER;
+	wrk_tree[r_1_arr[0] + 2] = NEITHER;
+	if (target == wrk_literal[path])
+		return g_a_tree;
+	r_1_arr[9] = (target > wrk_literal[path]) == first[0];
+	wrk_tree[r_1_arr[r_1_arr[9]] + 2] = first[r_1_arr[9]];
+	path = r_1_arr[r_1_arr[9] + 2];
+	if (target == wrk_literal[path])
+		return g_a_tree;
+	while (1) {
+		path2 = path << 2;
+		r_1_arr[4] = target > wrk_literal[path];
+		wrk_tree[path2 + 2] = r_1_arr[4];
+		path = wrk_tree[path2 + r_1_arr[4]];
+		if (target == wrk_literal[path])
+			return g_a_tree;
+	}
+}
+
+/*
+ * compress the given data using a binary tree holding all the previously
+ * seen 64-bit values
+ * returns the length of the compressed data
+ */
+static __always_inline int compress_bewalgo2(
+	const uint8_t * const source,
+	uint8_t * const dest,
+	uint8_t * const wrkmem,
+	const uint16_t source_length)
+{
+	const uint64_t * const source_begin = (const uint64_t *) source;
+	uint64_t *wrk_literal = (uint64_t *) wrkmem;
+	uint16_t *wrk_tree = (uint16_t *) (wrkmem + PAGE_SIZE);
+	uint16_t *op_match = (uint16_t *) (dest + 4);
+	int count;
+	uint16_t i;
+	uint16_t j;
+	uint16_t tree_root = TREE_NODE_NULL;
+	uint16_t tree_size = 0;
+	const uint64_t *ip = ((const uint64_t *) source)
+		+ DIV_BY_8_ROUND_UP(source_length) - 1;
+
+	/*
+	 * add first value into the tree
+	 */
+	i = avl_insert_first(
+		wrk_literal,
+		wrk_tree,
+		&tree_root,
+		*ip,
+		&tree_size);
+	/*
+	 * to gain performance abort if the data does not seem to be
+	 * compressible
+	 */
+	if (source_length > 512) {
+		/*
+		 * verify that at there are at most 5 different values
+		 * at the first 10 positions
+		 */
+		for (i = 2; i < 11; i++)
+			avl_insert(
+				wrk_literal,
+				wrk_tree,
+				&tree_root,
+				ip[-i],
+				&tree_size);
+		if (tree_size < 6)
+			goto _start;
+		/*
+		 * verify that there are at most 12 different values
+		 * at the first 10 and last 10 positions
+		 */
+		for (i = 0; i < 10; i++)
+			avl_insert(
+				wrk_literal,
+				wrk_tree,
+				&tree_root,
+				source_begin[i],
+				&tree_size);
+		if (tree_size < 13)
+			goto _start;
+		/*
+		 * if the previous conditions do not pass, check some bytes
+		 * in the middle for matches.
+		 * if not enough matches found abort
+		 */
+		for (i = 0; i < 10; i++)
+			avl_insert(
+				wrk_literal,
+				wrk_tree,
+				&tree_root,
+				source_begin[256 + i],
+				&tree_size);
+		if (tree_size >= 21)
+			return -1;
+	}
+_start:
+	i = 0;
+_loop1_after_insert:
+	count = 0;
+	do {
+		ip--;
+		count++;
+	} while (likely(ip > source_begin) && (*ip == wrk_literal[i]));
+	if (count == 1) {
+		count = 0;
+		ip++;
+		j = i + count;
+		do {
+			ip--;
+			count++;
+			j++;
+		} while (likely(ip > source_begin)
+			&& (j < tree_size)
+			&& (*ip == wrk_literal[j]));
+		ip++;
+		count--;
+		if (likely(ip > source_begin)) {
+			do {
+				ip--;
+				count++;
+				j = avl_insert(
+					wrk_literal,
+					wrk_tree,
+					&tree_root,
+					*ip,
+					&tree_size);
+				if (unlikely(tree_size > MAX_LITERALS))
+					return -1;
+			} while ((j == i + count)
+				&& likely(ip > source_begin));
+		}
+		while (unlikely(count > LENGTH_MASK)) {
+			*op_match++ = (i << OFFSET_SHIFT)
+				+ MATCH_BIT_MASK
+				+ LENGTH_MASK;
+			count -= LENGTH_MASK;
+			i += LENGTH_MASK;
+		}
+		*op_match++ = (i << OFFSET_SHIFT) + MATCH_BIT_MASK + count;
+		if (unlikely(ip <= source_begin))
+			goto _finish;
+		i = j;
+		goto _loop1_after_insert;
+	} else {
+		while (unlikely(count > LENGTH_MASK)) {
+			*op_match++ = (i << OFFSET_SHIFT) + LENGTH_MASK;
+			count -= LENGTH_MASK;
+		}
+		*op_match++ = (i << OFFSET_SHIFT) + count;
+	}
+	if (unlikely(ip <= source_begin))
+		goto _finish;
+	i = avl_insert(wrk_literal, wrk_tree, &tree_root, *ip, &tree_size);
+	goto _loop1_after_insert;
+_finish:
+	j = avl_insert(wrk_literal, wrk_tree, &tree_root, *ip, &tree_size);
+	*op_match++ = (j << OFFSET_SHIFT) + 1;
+	((uint16_t *) dest)[0] = op_match - ((uint16_t *) dest);
+	((uint16_t *) dest)[1] = source_length;
+	count = sizeof(uint64_t) * (tree_size);
+	memcpy(op_match, wrk_literal, count);
+	return ((op_match - ((uint16_t *) dest)) << 1) + count;
+}
+
+/*
+ * decompress the data and return the uncompressed length
+ */
+static __always_inline int decompress_bewalgo2(
+	const uint8_t * const source,
+	uint8_t * const dest,
+	uint8_t * const wrkmem __attribute__ ((unused)))
+{
+	uint64_t *dest_anchor = (uint64_t *) dest;
+	const uint16_t *ip_match = ((const uint16_t *) (source + 4));
+	const uint16_t *ip_match_end =
+		((const uint16_t *) source) + ((const uint16_t *) source)[0];
+	const uint64_t *ip_literal = (uint64_t *) ip_match_end;
+	uint16_t i;
+	uint16_t count;
+	uint64_t *op = dest_anchor
+		+ DIV_BY_8_ROUND_UP(((uint16_t *) source)[1]) - 1;
+
+	while (ip_match < ip_match_end) {
+		i = (*ip_match) >> OFFSET_SHIFT;
+		count = (*ip_match) & LENGTH_MASK;
+		if ((*ip_match) & MATCH_BIT_MASK)
+			while (count-- > 0)
+				*op-- = ip_literal[i++];
+		else
+			while (count-- > 0)
+				*op-- = ip_literal[i];
+		ip_match++;
+	}
+	return ((const uint16_t *) source)[1];
+}
+
+static int init_bewalgo2(void)
+{
+	return 0;
+}
+
+static void exit_bewalgo2(void)
+{
+}
+
+static struct zbewalgo_alg alg_bewalgo2 = {
+	.name = "bewalgo2",
+	.flags = ZBEWALGO_ALG_FLAG_COMPRESS,
+	.wrkmem_size = 4096 << 1,
+	.init = init_bewalgo2,
+	.exit = exit_bewalgo2,
+	.compress = compress_bewalgo2,
+	.decompress = decompress_bewalgo2
+};
+#endif
diff --git a/lib/zbewalgo/bitshuffle.h b/lib/zbewalgo/bitshuffle.h
new file mode 100644
index 000000000..e6e01174a
--- /dev/null
+++ b/lib/zbewalgo/bitshuffle.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2018 Benjamin Warnke <4bwarnke@xxxxxxxxxxxxxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+#ifndef ZBEWALGO_BITSHUFFLE_H
+#define ZBEWALGO_BITSHUFFLE_H
+
+#include "include.h"
+
+/*
+ * performs a static transformation on the data. Moves every eighth byte into
+ * a consecutive range
+ */
+static __always_inline int compress_bitshuffle(
+	const uint8_t *source, uint8_t *dest,
+	uint8_t * const wrkmem __attribute__ ((unused)),
+	uint16_t source_length)
+{
+	uint16_t i;
+	*((uint16_t *) dest) = source_length;
+	dest += 2;
+	source_length = (source_length + 7) & (~7);
+	for (i = 0; i < source_length; i += 8)
+		*dest++ = source[i];
+	for (i = 1; i < source_length; i += 8)
+		*dest++ = source[i];
+	for (i = 2; i < source_length; i += 8)
+		*dest++ = source[i];
+	for (i = 3; i < source_length; i += 8)
+		*dest++ = source[i];
+	for (i = 4; i < source_length; i += 8)
+		*dest++ = source[i];
+	for (i = 5; i < source_length; i += 8)
+		*dest++ = source[i];
+	for (i = 6; i < source_length; i += 8)
+		*dest++ = source[i];
+	for (i = 7; i < source_length; i += 8)
+		*dest++ = source[i];
+	return source_length + 2;
+}
+
+/*
+ * reverses the transformation step
+ */
+static __always_inline int decompress_bitshuffle(
+	const uint8_t *source,
+	uint8_t *dest,
+	uint8_t * const wrkmem __attribute__ ((unused)))
+{
+	uint16_t i;
+	uint16_t dest_length = *((uint16_t *) source);
+	uint16_t dest_length2 = (dest_length + 7) & (~7);
+
+	source += 2;
+	for (i = 0; i < dest_length2; i += 8)
+		dest[i] = *source++;
+	for (i = 1; i < dest_length2; i += 8)
+		dest[i] = *source++;
+	for (i = 2; i < dest_length2; i += 8)
+		dest[i] = *source++;
+	for (i = 3; i < dest_length2; i += 8)
+		dest[i] = *source++;
+	for (i = 4; i < dest_length2; i += 8)
+		dest[i] = *source++;
+	for (i = 5; i < dest_length2; i += 8)
+		dest[i] = *source++;
+	for (i = 6; i < dest_length2; i += 8)
+		dest[i] = *source++;
+	for (i = 7; i < dest_length2; i += 8)
+		dest[i] = *source++;
+	return dest_length;
+}
+static int init_bitshuffle(void)
+{
+	return 0;
+}
+
+static void exit_bitshuffle(void)
+{
+}
+
+static struct zbewalgo_alg alg_bitshuffle = {
+	.name = "bitshuffle",
+	.flags = ZBEWALGO_ALG_FLAG_TRANSFORM,
+	.wrkmem_size = 0,
+	.init = init_bitshuffle,
+	.exit = exit_bitshuffle,
+	.compress = compress_bitshuffle,
+	.decompress = decompress_bitshuffle
+};
+#endif
diff --git a/lib/zbewalgo/huffman.h b/lib/zbewalgo/huffman.h
new file mode 100644
index 000000000..ad71504fd
--- /dev/null
+++ b/lib/zbewalgo/huffman.h
@@ -0,0 +1,232 @@
+/*
+ * Copyright (c) 2018 Benjamin Warnke <4bwarnke@xxxxxxxxxxxxxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+#ifndef ZBEWALGO_HUFFMAN_H
+#define ZBEWALGO_HUFFMAN_H
+
+#include "include.h"
+
+/*
+ * compression using the bare huffman encoding. Optimized for speed and small
+ * input buffers
+ */
+static __always_inline int compress_huffman(
+	const uint8_t * const source,
+	uint8_t * const dest,
+	uint8_t * const wrkmem,
+	const uint16_t source_length)
+{
+	const uint8_t * const source_end = source + source_length;
+	const uint8_t * const dest_end = dest + zbewalgo_max_output_size;
+	short * const nodes_index = (short *) (wrkmem);
+	uint16_t * const leaf_parent_index = (uint16_t *) (wrkmem + 1536);
+	uint16_t * const nodes_weight = (uint16_t *) (wrkmem + 2048);
+	uint16_t * const frequency = (uint16_t *) (wrkmem + 3072);
+	uint16_t * const code_lengths = (uint16_t *) (wrkmem + 3584);
+	uint32_t * const code_words = (uint32_t *) (wrkmem + 4096);
+	short i;
+	uint16_t node_index;
+	int i2;
+	short max_codeword_length;
+	uint32_t weight2;
+	short num_nodes;
+	uint32_t bits_in_buffer;
+	uint32_t bits_in_stack;
+	uint16_t free_index;
+	const uint8_t *ip;
+	uint8_t *op;
+	uint32_t stack;
+#ifdef __LITTLE_ENDIAN
+	uint8_t * const stack_ptr = (uint8_t *) &stack;
+#endif
+	memset(dest, 0, zbewalgo_max_output_size);
+	memset(wrkmem, 0, 5120);
+	op = dest;
+	ip = source;
+	*(uint16_t *) dest = source_length;
+	do {
+		frequency[*ip++]++;
+	} while (ip < source_end);
+	ip = source;
+	num_nodes = 0;
+	for (i = 0; i < 256; i++) {
+		if (frequency[i] > 0) {
+			i2 = num_nodes++;
+			while ((i2 > 0) && (nodes_weight[i2] > frequency[i])) {
+				nodes_weight[i2 + 1] = nodes_weight[i2];
+				nodes_index[i2 + 1] = nodes_index[i2];
+				leaf_parent_index[nodes_index[i2]]++;
+				i2--;
+			}
+			i2++;
+			nodes_index[i2] = -(i + 1);
+			leaf_parent_index[-(i + 1)] = i2;
+			nodes_weight[i2] = frequency[i];
+		}
+	}
+	dest[2] = num_nodes;
+	op = dest + 3;
+	for (i = 1; i <= num_nodes; ++i) {
+		*op++ = -(nodes_index[i] + 1);
+		*(uint16_t *) op = nodes_weight[i];
+		op += 2;
+	}
+	free_index = 2;
+	while (free_index <= num_nodes) {
+		weight2 = nodes_weight[free_index - 1]
+			+ nodes_weight[free_index];
+		i2 = num_nodes++;
+		while ((i2 > 0) && (nodes_weight[i2] > weight2)) {
+			nodes_weight[i2 + 1] = nodes_weight[i2];
+			nodes_index[i2 + 1] = nodes_index[i2];
+			leaf_parent_index[nodes_index[i2]]++;
+			i2--;
+		}
+		i2++;
+		nodes_index[i2] = free_index >> 1;
+		leaf_parent_index[free_index >> 1] = i2;
+		nodes_weight[i2] = weight2;
+		free_index += 2;
+	}
+	if (num_nodes > 400)
+		return -1;
+	for (i = 0; i < 256; i++) {
+		if (frequency[i] == 0)
+			continue;
+		bits_in_stack = 0;
+		stack = 0;
+		node_index = leaf_parent_index[-(i + 1)];
+		while (node_index < num_nodes) {
+			stack |= (node_index & 0x1) << bits_in_stack;
+			bits_in_stack++;
+			node_index = leaf_parent_index[(node_index + 1) >> 1];
+		}
+		code_lengths[i] = bits_in_stack;
+		code_words[i] = stack;
+	}
+	max_codeword_length = 0;
+	node_index = leaf_parent_index[nodes_index[1]];
+	while (node_index < num_nodes) {
+		node_index = leaf_parent_index[(node_index + 1) >> 1];
+		max_codeword_length++;
+	}
+	if (max_codeword_length > 24)
+		return -1;
+	bits_in_buffer = 0;
+	do {
+		bits_in_stack = code_lengths[*ip];
+		stack = code_words[*ip];
+		ip++;
+		bits_in_buffer += bits_in_stack;
+		stack = stack << (32 - bits_in_buffer);
+#ifdef __LITTLE_ENDIAN
+		op[0] |= stack_ptr[3];
+		op[1] |= stack_ptr[2];
+		op[2] |= stack_ptr[1];
+		op[3] |= stack_ptr[0];
+#else
+		*(uint32_t *) op |= stack;
+#endif
+		op += bits_in_buffer >> 3;
+		bits_in_buffer = bits_in_buffer & 0x7;
+		if (unlikely(op >= dest_end))
+			return -1;
+	} while (likely(ip < source_end));
+	return op - dest + (bits_in_buffer > 0);
+}
+
+/*
+ * reverses the huffman compression
+ */
+static __always_inline int decompress_huffman(
+	const uint8_t * const source,
+	uint8_t * const dest,
+	uint8_t * const wrkmem)
+{
+	const uint32_t dest_length = *(uint16_t *) source;
+	const uint8_t * const dest_end = dest + dest_length;
+	short * const nodes_index = (short *) (wrkmem);
+	uint16_t * const nodes_weight = (uint16_t *) (wrkmem + 1024);
+	uint32_t i;
+	int node_index;
+	uint32_t i2;
+	uint32_t weight2;
+	uint32_t num_nodes;
+	uint32_t current_bit;
+	uint32_t free_index;
+	const uint8_t *ip;
+	uint8_t *op;
+
+	memset(wrkmem, 0, 2048);
+	num_nodes = source[2];
+	ip = source + 3;
+	op = dest;
+	for (i = 1; i <= num_nodes; ++i) {
+		nodes_index[i] = -(*ip++ + 1);
+		nodes_weight[i] = *(uint16_t *) ip;
+		ip += 2;
+	}
+	free_index = 2;
+	while (free_index <= num_nodes) {
+		weight2 = nodes_weight[free_index - 1]
+			+ nodes_weight[free_index];
+		i2 = num_nodes++;
+		while (i2 > 0 && nodes_weight[i2] > weight2) {
+			nodes_weight[i2 + 1] = nodes_weight[i2];
+			nodes_index[i2 + 1] = nodes_index[i2];
+			i2--;
+		}
+		i2++;
+		nodes_index[i2] = free_index >> 1;
+		nodes_weight[i2] = weight2;
+		free_index += 2;
+	}
+	current_bit = 0;
+	do {
+		node_index = nodes_index[num_nodes];
+		while (node_index > 0) {
+			ip += current_bit == 8;
+			current_bit = ((current_bit) & 0x7) + 1;
+			node_index = nodes_index[(node_index << 1)
+				- ((*ip >> (8 - current_bit)) & 0x1)];
+		}
+		*op++ = -(node_index + 1);
+	} while (op < dest_end);
+	return dest_length;
+}
+
+static int init_huffman(void)
+{
+	return 0;
+}
+
+static void exit_huffman(void)
+{
+}
+
+static struct zbewalgo_alg alg_huffman = {
+	.name = "huffman",
+	.flags = ZBEWALGO_ALG_FLAG_COMPRESS,
+	.wrkmem_size = 5120,
+	.init = init_huffman,
+	.exit = exit_huffman,
+	.compress = compress_huffman,
+	.decompress = decompress_huffman
+};
+
+#endif
diff --git a/lib/zbewalgo/include.h b/lib/zbewalgo/include.h
new file mode 100644
index 000000000..9a910d227
--- /dev/null
+++ b/lib/zbewalgo/include.h
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2018 Benjamin Warnke <4bwarnke@xxxxxxxxxxxxxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+#ifndef ZBEWALGO_INCLUDE_H
+#define ZBEWALGO_INCLUDE_H
+
+#define ZBEWALGO_ALG_MAX_NAME 128
+#define ZBEWALGO_ALG_FLAG_COMPRESS 1
+#define ZBEWALGO_ALG_FLAG_TRANSFORM 2
+#define ZBEWALGO_COMBINATION_MAX_IDS 7
+#define ZBEWALGO_MAX_BASE_ALGORITHMS 16
+#define ZBEWALGO_COMBINATION_MAX_ACTIVE 256
+
+#ifndef MIN
+#define MIN(a, b) (((a) < (b)) ? (a) : (b))
+#endif
+
+#ifndef MAX
+#define MAX(a, b) (((a) > (b)) ? (a) : (b))
+#endif
+
+#define DIV_BY_8_ROUND_UP(val) ((val >> 3) + ((val & 0x7) > 0))
+
+struct zbewalgo_alg {
+	char name[ZBEWALGO_ALG_MAX_NAME];
+	/* flag wheether this algorithm compresses the data or
+	 * transforms the data only
+	 */
+	uint8_t flags;
+	/* the wrkmem required for this algorithm */
+	uint32_t wrkmem_size;
+	int (*init)(void);
+	void (*exit)(void);
+	/* the compression function must store the size of
+	 * input/output data itself
+	 */
+	int (*compress)(
+		const uint8_t * const source,
+		uint8_t * const dest,
+		uint8_t * const wrkmem,
+		const uint16_t source_length);
+	int (*decompress)(
+		const uint8_t * const source,
+		uint8_t * const dest,
+		uint8_t * const wrkmem);
+	uint8_t id;
+};
+
+/*
+ * to gain speed the compression starts with the algorithm wich was good for
+ * the last compressed page.
+ */
+struct zbewalgo_combination {
+	uint8_t count;
+	uint8_t ids[ZBEWALGO_COMBINATION_MAX_IDS];
+};
+
+struct zbewalgo_main_data {
+	/*
+	 * best_id contains the id of the best combination for the last page
+	 */
+	uint8_t best_id;
+
+	/*
+	 * if zero search again for best_id - must be unsigned - underflow of
+	 * values is intended
+	 */
+	uint8_t counter_search;
+
+	/*
+	 * a bit larger than original compressed size to be still accepted
+	 * immediately
+	 */
+	uint16_t best_id_accepted_size;
+};
+
+/*
+ * each cpu has its own independent compression history to avoid locks
+ */
+static struct zbewalgo_main_data __percpu *zbewalgo_main_data_ptr;
+
+/*
+ * all available algorithms
+ */
+static struct zbewalgo_alg
+	zbewalgo_base_algorithms[ZBEWALGO_MAX_BASE_ALGORITHMS];
+static uint8_t zbewalgo_base_algorithms_count;
+
+/*
+ * all currently available combination sequences of algorithms
+ */
+static struct zbewalgo_combination
+	zbewalgo_combinations[ZBEWALGO_COMBINATION_MAX_ACTIVE];
+static uint8_t zbewalgo_combinations_count;
+
+/*
+ * maximum required wrkmem for compression and decompression for each instance
+ * of the algorithm
+ */
+static uint32_t zbewalgo_wrkmem_size;
+
+/*
+ * compression aborts automatically if the compressed data is too large.
+ */
+static unsigned long zbewalgo_max_output_size;
+
+/*
+ * compression can be aborted if the data is smaller than this theshold to
+ * speed up the algorithm.
+ */
+static unsigned long zbewalgo_early_abort_size;
+
+/*
+ * If more than the specified number of chars are to be transformed,
+ * it is unlikely that the compression will achieve high ratios.
+ * As a consequence the Burrows-Wheeler Transformation will abort if the number
+ * of different bytes exeeds this value.
+ */
+static unsigned long zbewalgo_bwt_max_alphabet = 90;
+
+/*
+ * add an combination to the current active ones. All combinations are the
+ * same for all instances of this algorithm
+ */
+int zbewalgo_add_combination(
+	const char * const string,
+	const int string_length);
+
+/*
+ * replace ALL combinations with the one specified.
+ */
+int zbewalgo_set_combination(
+	const char * const string,
+	const int string_length);
+
+#endif
diff --git a/lib/zbewalgo/settings.h b/lib/zbewalgo/settings.h
new file mode 100644
index 000000000..e6e28dccd
--- /dev/null
+++ b/lib/zbewalgo/settings.h
@@ -0,0 +1,220 @@
+/*
+ * Copyright (c) 2018 Benjamin Warnke <4bwarnke@xxxxxxxxxxxxxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+#ifndef ZBEWALGO_SETTINGS_H
+#define ZBEWALGO_SETTINGS_H
+
+#include "include.h"
+
+#define add_combination_compile_time(name) \
+	zbewalgo_add_combination(name, sizeof(name))
+
+static ssize_t zbewalgo_combinations_show(
+	struct kobject *kobj,
+	struct kobj_attribute *attr,
+	char *buf)
+{
+	ssize_t res = 0;
+	ssize_t tmp;
+	uint8_t i, j;
+	struct zbewalgo_combination *com;
+
+	res = tmp = scnprintf(buf, PAGE_SIZE - res, "combinations={\n");
+	buf += tmp;
+	for (i = 0; i < zbewalgo_combinations_count; i++) {
+		com = &zbewalgo_combinations[i];
+		res += tmp = scnprintf(
+			buf,
+			PAGE_SIZE - res,
+			"\tcombination[%d]=",
+			i);
+		buf += tmp;
+		for (j = 0; j < com->count - 1; j++) {
+			res += tmp = scnprintf(buf, PAGE_SIZE - res, "%s-",
+				zbewalgo_base_algorithms[com->ids[j]].name);
+			buf += tmp;
+		}
+		res += tmp = scnprintf(buf, PAGE_SIZE - res, "%s\n",
+			zbewalgo_base_algorithms[com->ids[j]].name);
+		buf += tmp;
+	}
+	res += tmp = scnprintf(buf, PAGE_SIZE - res, "}\n");
+	return res;
+}
+
+static ssize_t zbewalgo_combinations_store(
+	struct kobject *kobj,
+	struct kobj_attribute *attr,
+	const char *buf,
+	size_t count)
+{
+	ssize_t res;
+
+	count--;
+	if (count < 5)
+		return -1;
+	if (memcmp(buf, "add ", 4) == 0) {
+		res = zbewalgo_add_combination(buf + 4, count - 4);
+		return res < 0 ? res : count+1;
+	}
+	if (memcmp(buf, "set ", 4) == 0) {
+		res = zbewalgo_set_combination(buf + 4, count - 4);
+		return res < 0 ? res : count+1;
+	}
+	if (memcmp(buf, "reset", 5) == 0) {
+		zbewalgo_combinations_count = 0;
+		add_combination_compile_time(
+			"bewalgo2-bitshuffle-jbe-rle");
+		add_combination_compile_time(
+			"bwt-mtf-bewalgo-huffman");
+		add_combination_compile_time(
+			"bitshuffle-bewalgo2-mtf-bewalgo-jbe");
+		add_combination_compile_time(
+			"bitshuffle-rle-bitshuffle-rle");
+		return count;
+	}
+	return -1;
+}
+
+static ssize_t zbewalgo_max_output_size_show(
+	struct kobject *kobj,
+	struct kobj_attribute *attr,
+	char *buf)
+{
+	return scnprintf(buf, PAGE_SIZE, "%lu", zbewalgo_max_output_size);
+}
+
+static ssize_t zbewalgo_max_output_size_store(
+	struct kobject *kobj,
+	struct kobj_attribute *attr,
+	const char *buf,
+	size_t count)
+{
+	char localbuf[10];
+	ssize_t res;
+
+	memcpy(&localbuf[0], buf, count < 10 ? count : 10);
+	localbuf[count < 9 ? count : 9] = 0;
+	res = kstrtoul(localbuf, 10, &zbewalgo_max_output_size);
+	return res < 0 ? res : count;
+}
+
+static ssize_t zbewalgo_early_abort_size_show(
+	struct kobject *kobj,
+	struct kobj_attribute *attr,
+	char *buf)
+{
+	return scnprintf(buf, PAGE_SIZE, "%lu", zbewalgo_early_abort_size);
+}
+
+static ssize_t zbewalgo_early_abort_size_store(
+	struct kobject *kobj,
+	struct kobj_attribute *attr,
+	const char *buf,
+	size_t count)
+{
+	char localbuf[10];
+	ssize_t res;
+
+	memcpy(&localbuf[0], buf, count < 10 ? count : 10);
+	localbuf[count < 9 ? count : 9] = 0;
+	res = kstrtoul(localbuf, 10, &zbewalgo_early_abort_size);
+	return res < 0 ? res : count;
+}
+
+static ssize_t zbewalgo_bwt_max_alphabet_show(
+	struct kobject *kobj,
+	struct kobj_attribute *attr,
+	char *buf)
+{
+	return scnprintf(buf, PAGE_SIZE, "%lu", zbewalgo_bwt_max_alphabet);
+}
+
+static ssize_t zbewalgo_bwt_max_alphabet_store(
+	struct kobject *kobj,
+	struct kobj_attribute *attr,
+	const char *buf,
+	size_t count)
+{
+	char localbuf[10];
+	ssize_t res;
+
+	memcpy(&localbuf[0], buf, count < 10 ? count : 10);
+	localbuf[count < 9 ? count : 9] = 0;
+	res = kstrtoul(localbuf, 10, &zbewalgo_bwt_max_alphabet);
+	return res < 0 ? res : count;
+}
+
+static struct kobj_attribute zbewalgo_combinations_attribute = __ATTR(
+	combinations,
+	0664,
+	zbewalgo_combinations_show,
+	zbewalgo_combinations_store);
+static struct kobj_attribute zbewalgo_max_output_size_attribute = __ATTR(
+	max_output_size,
+	0664,
+	zbewalgo_max_output_size_show,
+	zbewalgo_max_output_size_store);
+static struct kobj_attribute zbewalgo_early_abort_size_attribute = __ATTR(
+	early_abort_size,
+	0664,
+	zbewalgo_early_abort_size_show,
+	zbewalgo_early_abort_size_store);
+static struct kobj_attribute zbewalgo_bwt_max_alphabet_attribute = __ATTR(
+	bwt_max_alphabet,
+	0664,
+	zbewalgo_bwt_max_alphabet_show,
+	zbewalgo_bwt_max_alphabet_store);
+static struct attribute *attrs[] = {
+	&zbewalgo_combinations_attribute.attr,
+	&zbewalgo_max_output_size_attribute.attr,
+	&zbewalgo_early_abort_size_attribute.attr,
+	&zbewalgo_bwt_max_alphabet_attribute.attr,
+	NULL,
+};
+
+static struct attribute_group attr_group = {
+	.attrs = attrs,
+};
+
+static struct kobject *zbewalgo_kobj;
+
+static int init_settings(void)
+{
+	int resval;
+
+	zbewalgo_kobj = kobject_create_and_add("zbewalgo", kernel_kobj);
+	if (!zbewalgo_kobj)
+		return -ENOMEM;
+	resval = sysfs_create_group(zbewalgo_kobj, &attr_group);
+	if (resval) {
+		kobject_put(zbewalgo_kobj);
+		zbewalgo_combinations_store(
+			zbewalgo_kobj,
+			&zbewalgo_combinations_attribute,
+			"reset",
+			sizeof("reset"));
+	}
+	return resval;
+}
+
+static void exit_settings(void)
+{
+	kobject_put(zbewalgo_kobj);
+}
+#endif
diff --git a/lib/zbewalgo/zbewalgo_compress.c b/lib/zbewalgo/zbewalgo_compress.c
new file mode 100644
index 000000000..4873043d7
--- /dev/null
+++ b/lib/zbewalgo/zbewalgo_compress.c
@@ -0,0 +1,372 @@
+/*
+ * Copyright (c) 2018 Benjamin Warnke <4bwarnke@xxxxxxxxxxxxxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+
+#include "include.h"
+
+#include "BWT.h"
+#include "JBE.h"
+#include "JBE2.h"
+#include "MTF.h"
+#include "RLE.h"
+#include "bewalgo.h"
+#include "bewalgo2.h"
+#include "bitshuffle.h"
+#include "huffman.h"
+#include "settings.h"
+
+static atomic64_t zbewalgo_stat_combination[256];
+static atomic64_t zbewalgo_stat_count[256];
+
+/*
+ * returns the required size of wrkmem for compression and decompression
+ */
+__always_inline int zbewalgo_get_wrkmem_size(void)
+{
+	return zbewalgo_wrkmem_size;
+}
+EXPORT_SYMBOL(zbewalgo_get_wrkmem_size);
+
+/*
+ * this function adds a combination to the set of enabled combinations or
+ * if 'flag' is set, replaces all combinations with the new supplied one.
+ * this function is called from the sysfs context, and therefore accepts
+ * a string as input.
+ */
+static __always_inline int add_set_combination(
+	const char * const string,
+	const int string_length,
+	int flag)
+{
+	/* points behind the string for fast looping over the entire string */
+	const char * const string_end = string + string_length;
+	/* the string up to 'anchor' is parsed */
+	const char *anchor = string;
+	const char *pos = string;
+	struct zbewalgo_combination combination;
+	uint8_t i;
+
+	memset(&combination, 0, sizeof(struct zbewalgo_combination));
+	/* loop over entire string */
+	while ((pos < string_end) && (*pos != 0)) {
+		while ((pos < string_end) && (*pos != 0) && (*pos != '-'))
+			pos++;
+		if (pos - anchor <= 0) {
+			/* skipp leading or consecutive '-' chars */
+			pos++;
+			anchor = pos;
+			continue;
+		}
+		/* find the algorithm by name in the list of all algorithms */
+		for (i = 0; i < zbewalgo_base_algorithms_count; i++) {
+			if (((unsigned int) (pos - anchor)
+				== strlen(zbewalgo_base_algorithms[i].name))
+				&& (memcmp(anchor,
+					zbewalgo_base_algorithms[i].name,
+					pos - anchor)
+					 == 0)) {
+				/* found algorithm */
+				combination.ids[combination.count++] =
+					zbewalgo_base_algorithms[i].id;
+				break;
+			}
+		}
+		/*
+		 * abort parsing if maximum of algorithms reached or
+		 * if string is parsed completely
+		 */
+		if ((combination.count >= ZBEWALGO_COMBINATION_MAX_IDS)
+			|| (*pos != '-'))
+			goto _finalize;
+		if (i == zbewalgo_base_algorithms_count)
+			/* misstyped arguments */
+			return -1;
+		pos++;
+		anchor = pos;
+	}
+_finalize:
+	if (combination.count) {
+		/* if combination has at least 1 algorithm */
+		if (flag == 1)
+			zbewalgo_combinations_count = 0;
+		/* don't store the same combination twice */
+		for (i = 0; i < zbewalgo_combinations_count; i++)
+			if (memcmp(
+				&zbewalgo_combinations[i],
+				&combination,
+				sizeof(struct zbewalgo_combination)) == 0) {
+				return 0;
+			}
+		/* store the new combination */
+		memcpy(
+			&zbewalgo_combinations[zbewalgo_combinations_count],
+			&combination,
+			sizeof(struct zbewalgo_combination));
+		zbewalgo_combinations_count++;
+		return 0;
+	}
+	return -1; /* empty algorithm is not allowed */
+}
+
+int zbewalgo_add_combination(
+	const char * const string,
+	const int string_length)
+{
+	return add_set_combination(string, string_length, 0);
+}
+EXPORT_SYMBOL(zbewalgo_add_combination);
+
+int zbewalgo_set_combination(
+	const char * const string,
+	const int string_length)
+{
+	return add_set_combination(string, string_length, 1);
+}
+EXPORT_SYMBOL(zbewalgo_set_combination);
+
+static int __init zbewalgo_mod_init(void)
+{
+	uint8_t i;
+	int j;
+	int res = 0;
+
+	zbewalgo_early_abort_size = 400;
+	/*
+	 * this algorithm is intended to be used for zram with zsmalloc.
+	 * Therefore zbewalgo_max_output_size equals zsmalloc max size class
+	 */
+	i = 0;
+	zbewalgo_max_output_size = 3264;
+	zbewalgo_base_algorithms[i++] = alg_bewalgo;
+	zbewalgo_base_algorithms[i++] = alg_bewalgo2;
+	zbewalgo_base_algorithms[i++] = alg_bitshuffle;
+	zbewalgo_base_algorithms[i++] = alg_bwt;
+	zbewalgo_base_algorithms[i++] = alg_jbe;
+	zbewalgo_base_algorithms[i++] = alg_jbe2;
+	zbewalgo_base_algorithms[i++] = alg_mtf;
+	zbewalgo_base_algorithms[i++] = alg_rle;
+	zbewalgo_base_algorithms[i++] = alg_huffman;
+	zbewalgo_base_algorithms_count = i;
+	/*
+	 * the wrkmem size is the largest wrkmem required by any callable
+	 * algorithm
+	 */
+	zbewalgo_wrkmem_size = 0;
+	for (i = 0; i < zbewalgo_base_algorithms_count; i++) {
+		res = zbewalgo_base_algorithms[i].init();
+		if (res) {
+			if (i > 0)
+				zbewalgo_base_algorithms[0].exit();
+			i--;
+			while (i > 0)
+				zbewalgo_base_algorithms[i].exit();
+			return res;
+		}
+		zbewalgo_base_algorithms[i].id = i;
+		zbewalgo_wrkmem_size = MAX(
+			zbewalgo_wrkmem_size,
+			zbewalgo_base_algorithms[i].wrkmem_size);
+	}
+	/* adding some pages for temporary compression results */
+	zbewalgo_wrkmem_size += 4096 * 6;
+	zbewalgo_main_data_ptr = alloc_percpu(struct zbewalgo_main_data);
+	for_each_possible_cpu(j) {
+		memset(
+			per_cpu_ptr(zbewalgo_main_data_ptr, j),
+			0,
+			sizeof(struct zbewalgo_main_data));
+	}
+
+	memset(&zbewalgo_stat_combination[0], 0, 256 * sizeof(atomic64_t));
+	memset(&zbewalgo_stat_count[0], 0, 256 * sizeof(atomic64_t));
+	init_settings();
+	return res;
+}
+
+static void __exit zbewalgo_mod_fini(void)
+{
+	uint16_t i;
+	uint64_t tmp;
+
+	exit_settings();
+	for (i = 0; i < zbewalgo_base_algorithms_count; i++)
+		zbewalgo_base_algorithms[i].exit();
+	free_percpu(zbewalgo_main_data_ptr);
+	/* log statistics via printk for debugging purpose */
+	for (i = 0; i < 256; i++) {
+		tmp = atomic64_read(&zbewalgo_stat_combination[i]);
+		if (tmp > 0)
+			printk(KERN_INFO "%s %4d -> %llu combination\n",
+				__func__, i, tmp);
+	}
+	for (i = 0; i < 256; i++) {
+		tmp = atomic64_read(&zbewalgo_stat_count[i]);
+		if (tmp > 0)
+			printk(KERN_INFO "%s %4d -> %llu counter\n",
+				__func__, i, tmp);
+	}
+}
+
+#define compress(i, j, s, d, w, l) \
+	(zbewalgo_base_algorithms[zbewalgo_combinations[i].ids[j]] \
+		.compress(s, d, w, l))
+
+
+__always_inline int zbewalgo_compress(
+	const uint8_t * const source,
+	uint8_t * const dest,
+	uint8_t * const wrkmem,
+	const uint16_t source_length)
+{
+	struct zbewalgo_main_data * const main_data =
+		get_cpu_ptr(zbewalgo_main_data_ptr);
+	uint8_t *dest_best = wrkmem;
+	uint8_t *dest1 = dest_best + (4096 << 1);
+	uint8_t *dest2 = dest1 + (4096 << 1);
+	uint8_t * const wrk = dest2 + (4096 << 1);
+	uint8_t *dest_tmp;
+	const uint8_t *current_source;
+	uint8_t i, j;
+	uint16_t dest_best_size = source_length << 1;
+	int dest_current_size;
+	uint8_t dest_best_id = 0;
+	uint8_t i_from = main_data->best_id
+		+ (main_data->counter_search-- == 0);
+	uint8_t i_to = zbewalgo_combinations_count;
+	uint8_t looped = 0;
+	uint16_t local_abort_size = MAX(
+		main_data->best_id_accepted_size,
+		zbewalgo_early_abort_size);
+	uint16_t counter = 0;
+	struct zbewalgo_combination * const dest_best_combination =
+		(struct zbewalgo_combination *) dest;
+_begin:
+	/*
+	 * loop from 0 to zbewalgo_combinations_count starting with the last
+	 * successfull combination
+	 */
+	i = i_from;
+	while (i < i_to) {
+		current_source	  = source;
+		dest_current_size = source_length;
+		counter++;
+		for (j = 0; j < zbewalgo_combinations[i].count; j++) {
+			dest_current_size = compress(i, j,
+				current_source,
+				dest2,
+				wrk,
+				dest_current_size);
+			if (dest_current_size <= 0)
+				goto _next_algorithm;
+			current_source = dest2;
+			dest_tmp = dest2;
+			dest2 = dest1;
+			dest1 = dest_tmp;
+			if (dest_current_size < dest_best_size) {
+	/* if a higher compression ratio is found, update to the best */
+				dest_best_id = i;
+				dest_best_size = dest_current_size;
+				dest_tmp = dest_best;
+				dest_best = dest1;
+				dest1 = dest_tmp;
+				memcpy(
+					dest_best_combination,
+					&zbewalgo_combinations[i],
+					sizeof(struct zbewalgo_combination));
+	/* partial combination is allowed, if its compression ratio is high */
+				dest_best_combination->count = j + 1;
+			}
+		}
+		if (dest_best_size < local_abort_size)
+			goto _early_abort;
+_next_algorithm:
+		local_abort_size = zbewalgo_early_abort_size;
+		i++;
+	}
+	if (!(looped++) && (i_from > 0)) {
+		i_to = MIN(i_from, zbewalgo_combinations_count);
+		i_from = 0;
+		goto _begin;
+	}
+	if (dest_best_size > zbewalgo_max_output_size)
+		return -1;
+_early_abort:
+	atomic64_inc(&zbewalgo_stat_combination[dest_best_id]);
+	atomic64_inc(&zbewalgo_stat_count[counter]);
+	main_data->best_id = dest_best_id;
+	main_data->best_id_accepted_size =
+		MAX(
+			dest_best_size + (dest_best_size >> 3),
+			zbewalgo_early_abort_size);
+	memcpy(
+		dest + sizeof(struct zbewalgo_combination),
+		dest_best,
+		dest_best_size);
+	return sizeof(struct zbewalgo_combination) + dest_best_size;
+}
+EXPORT_SYMBOL(zbewalgo_compress);
+
+#define decompress(i, s, d, w) \
+	(zbewalgo_base_algorithms[combination->ids[i]] \
+		.decompress(s, d, w))
+
+__always_inline int zbewalgo_decompress(
+	const uint8_t * const source,
+	uint8_t * const dest,
+	uint8_t * const wrkmem)
+{
+	uint8_t *dest1 = wrkmem;
+	uint8_t *dest2 = dest1 + (4096 << 1);
+	uint8_t *wrk = dest2 + (4096 << 1);
+	const struct zbewalgo_combination * const combination =
+		(struct zbewalgo_combination *) source;
+	uint8_t j;
+	int res;
+
+	if (combination->count == 1) {
+		res = decompress(0,
+			(source + sizeof(struct zbewalgo_combination)),
+			dest,
+			wrk);
+		return res;
+	}
+	res = decompress(combination->count - 1,
+		(source + sizeof(struct zbewalgo_combination)),
+		dest1,
+		wrk);
+	for (j = combination->count - 2; j > 1; j -= 2) {
+		res = decompress(j, dest1, dest2, wrk);
+		res = decompress(j - 1, dest2, dest1, wrk);
+	}
+	if (j == 1) {
+		res = decompress(1, dest1, dest2, wrk);
+		res = decompress(0, dest2, dest, wrk);
+		return res;
+	}
+	res = decompress(0, dest1, dest, wrk);
+	return res;
+}
+EXPORT_SYMBOL(zbewalgo_decompress);
+
+module_init(zbewalgo_mod_init);
+module_exit(zbewalgo_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ZBeWalgo Compression Algorithm");
-- 
2.11.0





[Index of Archives]     [Kernel]     [Gnu Classpath]     [Gnu Crypto]     [DM Crypt]     [Netfilter]     [Bugtraq]

  Powered by Linux