[PATCH 1/2] reiser4: Auto-punching holes: basic stuff

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



. Auto-punching holes: basic stuff;
. Handle empty nodes appeared after node conversions at flush time,
  Cache a locked right neighbor in the struct flush_pos (to access
  it when current node becomes empty, and hence is removed from the
  tree).
Signed-off-by: Edward Shishkin <edward.shishkin@xxxxxxxxx>

---
 fs/reiser4/flush.c                     |  107 ++++++++++++++++++++++++---------
 fs/reiser4/flush.h                     |    4 -
 fs/reiser4/init_super.c                |    2 
 fs/reiser4/plugin/file/cryptcompress.c |   32 +++++++++
 fs/reiser4/plugin/file/cryptcompress.h |    1 
 fs/reiser4/plugin/item/ctail.c         |   75 +++++++++++++++++------
 fs/reiser4/super.h                     |    4 -
 7 files changed, 175 insertions(+), 50 deletions(-)

--- a/fs/reiser4/plugin/file/cryptcompress.c
+++ b/fs/reiser4/plugin/file/cryptcompress.c
@@ -921,12 +921,34 @@ static unsigned deflate_overrun(struct i
 	return coa_overrun(inode_compression_plugin(inode), ilen);
 }
 
+static bool is_all_zero(char const* mem, size_t size)
+{
+	while (size-- > 0)
+		if (*mem++)
+			return false;
+	return true;
+}
+
+static inline bool should_punch_hole(struct tfm_cluster *tc)
+{
+	if (!reiser4_is_set(reiser4_get_current_sb(), REISER4_DONT_PUNCH_HOLES)
+	    && is_all_zero(tfm_stream_data(tc, INPUT_STREAM), tc->lsize)) {
+		/*
+		 * the logical cluster is filled with zeros,
+		 * so we'll punch a hole
+		 */
+		tc->all_zero = 1;
+		return true;
+	}
+	return false;
+}
+
 /* Estimating compressibility of a logical cluster by various
    policies represented by compression mode plugin.
    If this returns false, then compressor won't be called for
    the cluster of index @index.
 */
-static int should_compress(struct tfm_cluster * tc, cloff_t index,
+static int should_compress(struct tfm_cluster *tc, cloff_t index,
 			   struct inode *inode)
 {
 	compression_plugin *cplug = inode_compression_plugin(inode);
@@ -936,6 +958,12 @@ static int should_compress(struct tfm_cl
 	assert("edward-1322", cplug != NULL);
 	assert("edward-1323", mplug != NULL);
 
+	if (should_punch_hole(tc))
+		/*
+		 * we are about to punch a hole,
+		 * so don't compress data
+		 */
+		return 0;
 	return /* estimate by size */
 		(cplug->min_size_deflate ?
 		 tc->len >= cplug->min_size_deflate() :
@@ -3368,7 +3396,7 @@ static int prune_cryptcompress(struct in
 	       clust.dstat == UNPR_DISK_CLUSTER);
 
 	assert("edward-1191", inode->i_size == new_size);
-	assert("edward-1206", body_truncate_ok(inode, ridx));
+
  truncate_fake:
 	/* drop all the pages that don't have jnodes (i.e. pages
 	   which can not be truncated by cut_file_items() because
--- a/fs/reiser4/plugin/file/cryptcompress.h
+++ b/fs/reiser4/plugin/file/cryptcompress.h
@@ -159,6 +159,7 @@ struct tfm_cluster {
 	int uptodate;
 	int lsize;        /* number of bytes in logical cluster */
 	int len;          /* length of the transform stream */
+	int all_zero;     /* logical cluster is filled with zeros */
 };
 
 static inline coa_t get_coa(struct tfm_cluster * tc, reiser4_compression_id id,
--- a/fs/reiser4/plugin/item/ctail.c
+++ b/fs/reiser4/plugin/item/ctail.c
@@ -1177,6 +1177,8 @@ static int alloc_item_convert_data(struc
 	sq->itm = kmalloc(sizeof(*sq->itm), reiser4_ctx_gfp_mask_get());
 	if (sq->itm == NULL)
 		return RETERR(-ENOMEM);
+	init_lh(&sq->right_lock);
+	sq->right_locked = 0;
 	return 0;
 }
 
@@ -1186,22 +1188,28 @@ static void free_item_convert_data(struc
 	assert("edward-819", sq->itm != NULL);
 	assert("edward-820", sq->iplug != NULL);
 
+	done_lh(&sq->right_lock);
+	sq->right_locked = 0;
 	kfree(sq->itm);
 	sq->itm = NULL;
 	return;
 }
 
-static int alloc_convert_data(flush_pos_t * pos)
+static struct convert_info *alloc_convert_data(void)
 {
-	assert("edward-821", pos != NULL);
-	assert("edward-822", pos->sq == NULL);
+	struct convert_info *info;
 
-	pos->sq = kmalloc(sizeof(*pos->sq), reiser4_ctx_gfp_mask_get());
-	if (!pos->sq)
-		return RETERR(-ENOMEM);
-	memset(pos->sq, 0, sizeof(*pos->sq));
-	cluster_init_write(&pos->sq->clust, NULL);
-	return 0;
+	info = kmalloc(sizeof(*info), reiser4_ctx_gfp_mask_get());
+	if (info != NULL) {
+		memset(info, 0, sizeof(*info));
+		cluster_init_write(&info->clust, NULL);
+	}
+	return info;
+}
+
+static void reset_convert_data(struct convert_info *info)
+{
+	info->clust.tc.all_zero = 0;
 }
 
 void free_convert_data(flush_pos_t * pos)
@@ -1230,7 +1238,6 @@ static int init_item_convert_data(flush_
 	assert("edward-828", inode != NULL);
 
 	sq = pos->sq;
-
 	memset(sq->itm, 0, sizeof(*sq->itm));
 
 	/* iplug->init_convert_data() */
@@ -1258,10 +1265,13 @@ static int attach_convert_idata(flush_po
 	       item_plugin_by_id(CTAIL_ID));
 
 	if (!pos->sq) {
-		ret = alloc_convert_data(pos);
-		if (ret)
-			return ret;
+		pos->sq = alloc_convert_data();
+		if (!pos->sq)
+			return RETERR(-ENOMEM);
 	}
+	else
+		reset_convert_data(pos->sq);
+
 	clust = &pos->sq->clust;
 	ret = grab_coa(&clust->tc, cplug);
 	if (ret)
@@ -1300,6 +1310,9 @@ static int attach_convert_idata(flush_po
 			     clust->tc.len,
 			     clust_to_off(clust->index, inode),
 			     WRITE_OP, &info->flow);
+	if (clust->tc.all_zero)
+		info->flow.length = 0;
+
 	jput(pos->child);
 	return 0;
       err:
@@ -1420,6 +1433,7 @@ static int pre_convert_ctail(flush_pos_t
 		coord_init_before_first_item(&coord, slider);
 
 		if (node_is_empty(slider)) {
+			warning("edward-1641", "Found empty right neighbor");
 			znode_make_dirty(slider);
 			znode_set_convertible(slider);
 			/*
@@ -1450,14 +1464,25 @@ static int pre_convert_ctail(flush_pos_t
 				znode_set_convertible(slider);
 			}
 			stop = 1;
+			convert_data(pos)->right_locked = 1;
 		} else {
 			item_convert_data(pos)->d_next = DC_AFTER_CLUSTER;
 			stop = 1;
+			convert_data(pos)->right_locked = 1;
 		}
 		zrelse(slider);
 		done_lh(&slider_lh);
 		move_lh(&slider_lh, &right_lh);
 	}
+	if (convert_data(pos)->right_locked)
+		/*
+		 * Store locked right neighbor in
+		 * the conversion info. Otherwise,
+		 * we won't be able to access it,
+		 * if the current node gets deleted
+		 * during conversion
+		 */
+		move_lh(&convert_data(pos)->right_lock, &slider_lh);
 	done_lh(&slider_lh);
 	done_lh(&right_lh);
 
@@ -1566,11 +1591,25 @@ static int assign_conversion_mode(flush_
 			}
 			if (ret)
 				goto dont_convert;
-			/*
-			 * this is the first ctail in the cluster,
-			 * so it should be overwritten
-			 */
-			*mode = CTAIL_OVERWRITE_ITEM;
+
+			if (pos->sq->clust.tc.all_zero) {
+				assert("edward-1634",
+				      item_convert_data(pos)->flow.length == 0);
+				/*
+				 * new content is filled with zeros -
+				 * we punch a hole using cut (not kill)
+				 * primitive, so attached pages won't
+				 * be truncated
+				 */
+				*mode = CTAIL_CUT_ITEM;
+			}
+			else
+				/*
+				 * this is the first ctail in the cluster,
+				 * so it (may be only its head) should be
+				 * overwritten
+				 */
+				*mode = CTAIL_OVERWRITE_ITEM;
 		} else
 			/*
 			 * non-convertible item
--- a/fs/reiser4/flush.h
+++ b/fs/reiser4/flush.h
@@ -74,6 +74,8 @@ struct convert_info {
 	item_plugin *iplug;	/* current item plugin */
 	struct convert_item_info *itm;	/* current item info */
 	struct cluster_handle clust;	/* transform cluster */
+	lock_handle right_lock; /* lock handle of the right neighbor */
+	int right_locked;
 };
 
 typedef enum flush_position_state {
@@ -231,7 +233,7 @@ static inline int should_terminate_squal
 	    item_convert_count(pos) >= SQUALLOC_THRESHOLD;
 }
 
-#if 1
+#if REISER4_DEBUG
 #define check_convert_info(pos)						\
 do {							        	\
 	if (unlikely(should_convert_right_neighbor(pos))) {		\
--- a/fs/reiser4/flush.c
+++ b/fs/reiser4/flush.c
@@ -1915,8 +1915,12 @@ out:
 }
 
 /*
- * Process nodes on leaf level until unformatted node or
- * rightmost node in the slum reached
+ * Process nodes on the leaf level until unformatted node or
+ * rightmost node in the slum reached.
+ *
+ * This function is a complicated beast, because it calls a
+ * static machine ->convert_node() for every node, which, in
+ * turn, scans node's items and does something for each of them.
  */
 static int handle_pos_on_formatted(flush_pos_t *pos)
 {
@@ -1933,19 +1937,39 @@ static int handle_pos_on_formatted(flush
 			return ret;
 	}
 	while (1) {
-		int expected;
-		expected = should_convert_right_neighbor(pos);
-		ret = neighbor_in_slum(pos->lock.node, &right_lock, RIGHT_SIDE,
-				       ZNODE_WRITE_LOCK, !expected, expected);
-		if (ret) {
-			if (expected)
-				warning("edward-1495",
-		        "Right neighbor is expected but not found (%d). Fsck?",
-					ret);
-			break;
+		assert("edward-1635",
+		       ergo(node_is_empty(pos->lock.node),
+			    ZF_ISSET(pos->lock.node, JNODE_HEARD_BANSHEE)));
+		/*
+		 * First of all, grab a right neighbor
+		 */
+		if (convert_data(pos) && convert_data(pos)->right_locked) {
+			/*
+			 * the right neighbor was locked by convert_node()
+			 * transfer the lock from the "cache".
+ 			 */
+			move_lh(&right_lock, &convert_data(pos)->right_lock);
+			done_lh(&convert_data(pos)->right_lock);
+			convert_data(pos)->right_locked = 0;
+		}
+		else {
+			ret = neighbor_in_slum(pos->lock.node, &right_lock,
+					       RIGHT_SIDE, ZNODE_WRITE_LOCK,
+					       1, 0);
+			if (ret) {
+				/*
+				 * There is no right neighbor for some reasons,
+				 * so finish with this level.
+				 */
+				assert("edward-1636",
+				       !should_convert_right_neighbor(pos));
+				break;
+			}
 		}
 		/*
-		 * we don't prep(allocate) nodes for flushing twice. This can be
+		 * Check "flushprepped" status of the right neighbor.
+		 *
+		 * We don't prep(allocate) nodes for flushing twice. This can be
 		 * suboptimal, or it can be optimal. For now we choose to live
 		 * with the risk that it will be suboptimal because it would be
 		 * quite complex to code it to be smarter.
@@ -1957,38 +1981,65 @@ static int handle_pos_on_formatted(flush
 			pos_stop(pos);
 			break;
 		}
-
 		ret = incr_load_count_znode(&right_load, right_lock.node);
 		if (ret)
 			break;
 		if (znode_convertible(right_lock.node)) {
+			assert("edward-xxxx",
+			       ergo(convert_data(pos),
+				    convert_data(pos)->right_locked == 0));
+
 			ret = convert_node(pos, right_lock.node);
 			if (ret)
 				break;
-			if (unlikely(node_is_empty(right_lock.node))) {
-				/*
-				 * node became empty after convertion,
-				 * skip this
-				 */
-				done_load_count(&right_load);
-				done_lh(&right_lock);
-				continue;
-			}
+		}
+		else
+			assert("edward-1637",
+			       !should_convert_right_neighbor(pos));
+
+		if (node_is_empty(pos->lock.node)) {
+			/*
+			 * Current node became empty after conversion
+			 * and, hence, was removed from the tree;
+			 * Advance the current position to the right neighbor.
+			 */
+			assert("edward-1638",
+			       ZF_ISSET(pos->lock.node, JNODE_HEARD_BANSHEE));
+			move_flush_pos(pos, &right_lock, &right_load, NULL);
+			continue;
+		}
+		if (node_is_empty(right_lock.node)) {
+			assert("edward-1639",
+			       ZF_ISSET(right_lock.node, JNODE_HEARD_BANSHEE));
+			/*
+			 * The right neighbor became empty after
+			 * convertion, and hence it was deleted
+			 * from the tree - skip this.
+			 * Since current node is not empty,
+			 * we'll obtain a correct pointer to
+			 * the next right neighbor
+			 */
+			done_load_count(&right_load);
+			done_lh(&right_lock);
+			continue;
 		}
 		/*
-		 * Current node and its right neighbor are converted.
+		 * At this point both, current node and its right
+		 * neigbor are converted and not empty.
 		 * Squeeze them _before_ going upward.
 		 */
 		ret = squeeze_right_neighbor(pos, pos->lock.node,
 					     right_lock.node);
 		if (ret < 0)
 			break;
-
 		if (node_is_empty(right_lock.node)) {
+			assert("edward-1640",
+			       ZF_ISSET(right_lock.node, JNODE_HEARD_BANSHEE));
 			/*
-                         * right node was squeezed completely,
-                         * skip this
-                         */
+                         * right neighbor was squeezed completely,
+                         * and hence has been deleted from the tree.
+			 * Skip this.
+			 */
                         done_load_count(&right_load);
                         done_lh(&right_lock);
                         continue;
--- a/fs/reiser4/init_super.c
+++ b/fs/reiser4/init_super.c
@@ -496,6 +496,8 @@ int reiser4_init_super_data(struct super
 	PUSH_BIT_OPT("no_write_barrier", REISER4_NO_WRITE_BARRIER);
 	/* enable issuing of discard requests */
 	PUSH_BIT_OPT("discard", REISER4_DISCARD);
+	/* disable hole punching at flush time */
+	PUSH_BIT_OPT("dont_punch_holes", REISER4_DONT_PUNCH_HOLES);
 
 	PUSH_OPT(p, opts,
 	{
--- a/fs/reiser4/super.h
+++ b/fs/reiser4/super.h
@@ -53,7 +53,9 @@ typedef enum {
 	/* don't use write barriers in the log writer code. */
 	REISER4_NO_WRITE_BARRIER = 7,
 	/* enable issuing of discard requests */
-	REISER4_DISCARD = 8
+	REISER4_DISCARD = 8,
+	/* disable hole punching at flush time */
+	REISER4_DONT_PUNCH_HOLES = 9
 } reiser4_fs_flag;
 
 /*

[Index of Archives]     [Linux File System Development]     [Linux BTRFS]     [Linux NFS]     [Linux Filesystems]     [Ext4 Filesystem]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite Forum]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Device Mapper]     [Linux Resources]

  Powered by Linux