5.15-stable review patch. If anyone has any objections, please let me know. ------------------ From: Gao Xiang <hsiangkao@xxxxxxxxxxxxxxxxx> commit 3c12466b6b7bf1e56f9b32c366a3d83d87afb4de upstream. Currently EROFS can map another compressed buffer for inplace decompression, that was used to handle the cases that some pages of compressed data are actually not in-place I/O. However, like most simple LZ77 algorithms, LZ4 expects the compressed data is arranged at the end of the decompressed buffer and it explicitly uses memmove() to handle overlapping: __________________________________________________________ |_ direction of decompression --> ____ |_ compressed data _| Although EROFS arranges compressed data like this, it typically maps two individual virtual buffers so the relative order is uncertain. Previously, it was hardly observed since LZ4 only uses memmove() for short overlapped literals and x86/arm64 memmove implementations seem to completely cover it up and they don't have this issue. Juhyung reported that EROFS data corruption can be found on a new Intel x86 processor. After some analysis, it seems that recent x86 processors with the new FSRM feature expose this issue with "rep movsb". Let's strictly use the decompressed buffer for lz4 inplace decompression for now. Later, as an useful improvement, we could try to tie up these two buffers together in the correct order. Reported-and-tested-by: Juhyung Park <qkrwngud825@xxxxxxxxx> Closes: https://lore.kernel.org/r/CAD14+f2AVKf8Fa2OO1aAUdDNTDsVzzR6ctU_oJSmTyd6zSYR2Q@xxxxxxxxxxxxxx Fixes: 0ffd71bcc3a0 ("staging: erofs: introduce LZ4 decompression inplace") Fixes: 598162d05080 ("erofs: support decompress big pcluster for lz4 backend") Cc: stable <stable@xxxxxxxxxxxxxxx> # 5.4+ Tested-by: Yifan Zhao <zhaoyifan@xxxxxxxxxxx> Signed-off-by: Gao Xiang <hsiangkao@xxxxxxxxxxxxxxxxx> Link: https://lore.kernel.org/r/20231206045534.3920847-1-hsiangkao@xxxxxxxxxxxxxxxxx Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> --- fs/erofs/decompressor.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) --- a/fs/erofs/decompressor.c +++ b/fs/erofs/decompressor.c @@ -124,11 +124,11 @@ static int z_erofs_lz4_prepare_destpages } static void *z_erofs_handle_inplace_io(struct z_erofs_decompress_req *rq, - void *inpage, unsigned int *inputmargin, int *maptype, - bool support_0padding) + void *inpage, void *out, unsigned int *inputmargin, int *maptype, + bool support_0padding) { unsigned int nrpages_in, nrpages_out; - unsigned int ofull, oend, inputsize, total, i, j; + unsigned int ofull, oend, inputsize, total, i; struct page **in; void *src, *tmp; @@ -143,12 +143,13 @@ static void *z_erofs_handle_inplace_io(s ofull - oend < LZ4_DECOMPRESS_INPLACE_MARGIN(inputsize)) goto docopy; - for (i = 0; i < nrpages_in; ++i) { - DBG_BUGON(rq->in[i] == NULL); - for (j = 0; j < nrpages_out - nrpages_in + i; ++j) - if (rq->out[j] == rq->in[i]) - goto docopy; - } + for (i = 0; i < nrpages_in; ++i) + if (rq->out[nrpages_out - nrpages_in + i] != + rq->in[i]) + goto docopy; + kunmap_atomic(inpage); + *maptype = 3; + return out + ((nrpages_out - nrpages_in) << PAGE_SHIFT); } if (nrpages_in <= 1) { @@ -156,7 +157,6 @@ static void *z_erofs_handle_inplace_io(s return inpage; } kunmap_atomic(inpage); - might_sleep(); src = erofs_vm_map_ram(rq->in, nrpages_in); if (!src) return ERR_PTR(-ENOMEM); @@ -193,10 +193,10 @@ docopy: return src; } -static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, u8 *out) +static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, u8 *dst) { unsigned int inputmargin; - u8 *headpage, *src; + u8 *out, *headpage, *src; bool support_0padding; int ret, maptype; @@ -220,11 +220,12 @@ static int z_erofs_lz4_decompress(struct } rq->inputsize -= inputmargin; - src = z_erofs_handle_inplace_io(rq, headpage, &inputmargin, &maptype, - support_0padding); + src = z_erofs_handle_inplace_io(rq, headpage, dst, &inputmargin, + &maptype, support_0padding); if (IS_ERR(src)) return PTR_ERR(src); + out = dst + rq->pageofs_out; /* legacy format could compress extra data in a pcluster. */ if (rq->partial_decoding || !support_0padding) ret = LZ4_decompress_safe_partial(src + inputmargin, out, @@ -253,7 +254,7 @@ static int z_erofs_lz4_decompress(struct vm_unmap_ram(src, PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT); } else if (maptype == 2) { erofs_put_pcpubuf(src); - } else { + } else if (maptype != 3) { DBG_BUGON(1); return -EFAULT; } @@ -354,8 +355,7 @@ static int z_erofs_decompress_generic(st dst_maptype = 2; dstmap_out: - ret = alg->decompress(rq, dst + rq->pageofs_out); - + ret = alg->decompress(rq, dst); if (!dst_maptype) kunmap_atomic(dst); else if (dst_maptype == 2)