[PATCH 4.1 073/202] Btrfs: update fix for read corruption of compressed and shared extents

Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> · Sat, 17 Oct 2015 18:57:36 -0700

4.1-stable review patch.  If anyone has any objections, please let me know.

------------------

From: Filipe Manana <fdmanana@xxxxxxxx>

commit 808f80b46790f27e145c72112189d6a3be2bc884 upstream.

My previous fix in commit 005efedf2c7d ("Btrfs: fix read corruption of
compressed and shared extents") was effective only if the compressed
extents cover a file range with a length that is not a multiple of 16
pages. That's because the detection of when we reached a different range
of the file that shares the same compressed extent as the previously
processed range was done at extent_io.c:__do_contiguous_readpages(),
which covers subranges with a length up to 16 pages, because
extent_readpages() groups the pages in clusters no larger than 16 pages.
So fix this by tracking the start of the previously processed file
range's extent map at extent_readpages().

The following test case for fstests reproduces the issue:

  seq=`basename $0`
  seqres=$RESULT_DIR/$seq
  echo "QA output created by $seq"
  tmp=/tmp/$$
  status=1	# failure is the default!
  trap "_cleanup; exit \$status" 0 1 2 3 15

  _cleanup()
  {
      rm -f $tmp.*
  }

  # get standard environment, filters and checks
  . ./common/rc
  . ./common/filter

  # real QA test starts here
  _need_to_be_root
  _supported_fs btrfs
  _supported_os Linux
  _require_scratch
  _require_cloner

  rm -f $seqres.full

  test_clone_and_read_compressed_extent()
  {
      local mount_opts=$1

      _scratch_mkfs >>$seqres.full 2>&1
      _scratch_mount $mount_opts

      # Create our test file with a single extent of 64Kb that is going to
      # be compressed no matter which compression algo is used (zlib/lzo).
      $XFS_IO_PROG -f -c "pwrite -S 0xaa 0K 64K" \
          $SCRATCH_MNT/foo | _filter_xfs_io

      # Now clone the compressed extent into an adjacent file offset.
      $CLONER_PROG -s 0 -d $((64 * 1024)) -l $((64 * 1024)) \
          $SCRATCH_MNT/foo $SCRATCH_MNT/foo

      echo "File digest before unmount:"
      md5sum $SCRATCH_MNT/foo | _filter_scratch

      # Remount the fs or clear the page cache to trigger the bug in
      # btrfs. Because the extent has an uncompressed length that is a
      # multiple of 16 pages, all the pages belonging to the second range
      # of the file (64K to 128K), which points to the same extent as the
      # first range (0K to 64K), had their contents full of zeroes instead
      # of the byte 0xaa. This was a bug exclusively in the read path of
      # compressed extents, the correct data was stored on disk, btrfs
      # just failed to fill in the pages correctly.
      _scratch_remount

      echo "File digest after remount:"
      # Must match the digest we got before.
      md5sum $SCRATCH_MNT/foo | _filter_scratch
  }

  echo -e "\nTesting with zlib compression..."
  test_clone_and_read_compressed_extent "-o compress=zlib"

  _scratch_unmount

  echo -e "\nTesting with lzo compression..."
  test_clone_and_read_compressed_extent "-o compress=lzo"

  status=0
  exit

Signed-off-by: Filipe Manana <fdmanana@xxxxxxxx>
Tested-by: Timofey Titovets <nefelim4ag@xxxxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>

---
 fs/btrfs/extent_io.c |   19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3128,12 +3128,12 @@ static inline void __do_contiguous_readp
 					     get_extent_t *get_extent,
 					     struct extent_map **em_cached,
 					     struct bio **bio, int mirror_num,
-					     unsigned long *bio_flags, int rw)
+					     unsigned long *bio_flags, int rw,
+					     u64 *prev_em_start)
 {
 	struct inode *inode;
 	struct btrfs_ordered_extent *ordered;
 	int index;
-	u64 prev_em_start = (u64)-1;
 
 	inode = pages[0]->mapping->host;
 	while (1) {
@@ -3149,7 +3149,7 @@ static inline void __do_contiguous_readp
 
 	for (index = 0; index < nr_pages; index++) {
 		__do_readpage(tree, pages[index], get_extent, em_cached, bio,
-			      mirror_num, bio_flags, rw, &prev_em_start);
+			      mirror_num, bio_flags, rw, prev_em_start);
 		page_cache_release(pages[index]);
 	}
 }
@@ -3159,7 +3159,8 @@ static void __extent_readpages(struct ex
 			       int nr_pages, get_extent_t *get_extent,
 			       struct extent_map **em_cached,
 			       struct bio **bio, int mirror_num,
-			       unsigned long *bio_flags, int rw)
+			       unsigned long *bio_flags, int rw,
+			       u64 *prev_em_start)
 {
 	u64 start = 0;
 	u64 end = 0;
@@ -3180,7 +3181,7 @@ static void __extent_readpages(struct ex
 						  index - first_index, start,
 						  end, get_extent, em_cached,
 						  bio, mirror_num, bio_flags,
-						  rw);
+						  rw, prev_em_start);
 			start = page_start;
 			end = start + PAGE_CACHE_SIZE - 1;
 			first_index = index;
@@ -3191,7 +3192,8 @@ static void __extent_readpages(struct ex
 		__do_contiguous_readpages(tree, &pages[first_index],
 					  index - first_index, start,
 					  end, get_extent, em_cached, bio,
-					  mirror_num, bio_flags, rw);
+					  mirror_num, bio_flags, rw,
+					  prev_em_start);
 }
 
 static int __extent_read_full_page(struct extent_io_tree *tree,
@@ -4202,6 +4204,7 @@ int extent_readpages(struct extent_io_tr
 	struct page *page;
 	struct extent_map *em_cached = NULL;
 	int nr = 0;
+	u64 prev_em_start = (u64)-1;
 
 	for (page_idx = 0; page_idx < nr_pages; page_idx++) {
 		page = list_entry(pages->prev, struct page, lru);
@@ -4218,12 +4221,12 @@ int extent_readpages(struct extent_io_tr
 		if (nr < ARRAY_SIZE(pagepool))
 			continue;
 		__extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
-				   &bio, 0, &bio_flags, READ);
+				   &bio, 0, &bio_flags, READ, &prev_em_start);
 		nr = 0;
 	}
 	if (nr)
 		__extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
-				   &bio, 0, &bio_flags, READ);
+				   &bio, 0, &bio_flags, READ, &prev_em_start);
 
 	if (em_cached)
 		free_extent_map(em_cached);


--
To unsubscribe from this list: send the line "unsubscribe stable" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html