[RFC][PATCH 1/3] ext4 online defrag (ver 0.7)

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Akira Fujita <a-fujita@xxxxxxxxxxxxx>

Interchange the data blocks of the target and temporary files
in an atomic manner.

Signed-off-by: Akira Fujita <a-fujita@xxxxxxxxxxxxx>
Signed-off-by: Takashi Sato <t-sato@xxxxxxxxxxxxx>
--
 fs/ext4/defrag.c |   55 ++++++++++++++++++++++++++++++++++++-----------------
 1 files changed, 37 insertions(+), 18 deletions(-)

diff --git a/fs/ext4/defrag.c b/fs/ext4/defrag.c
index d22bec9..d9e01ea 100644
--- a/fs/ext4/defrag.c
+++ b/fs/ext4/defrag.c
@@ -392,7 +392,7 @@ static int
 ext4_ext_defrag_reserve(struct inode *inode, ext4_fsblk_t goal, int len)
 {
 	struct super_block *sb = NULL;
-	handle_t *handle = NULL;
+	handle_t *handle;
 	struct buffer_head *bitmap_bh = NULL;
 	struct ext4_block_alloc_info *block_i;
 	struct ext4_reserve_window_node *my_rsv = NULL;
@@ -1301,11 +1301,10 @@ ext4_ext_defrag_leaf_block(handle_t *handle, struct inode *org_inode,
  * Replace extents for blocks from "from" to "from + count - 1".
  */
 static int
-ext4_ext_replace_branches(struct inode *org_inode, struct inode *dest_inode,
-			pgoff_t from_page,  pgoff_t dest_from_page,
-			pgoff_t count_page, int flag)
+ext4_ext_replace_branches(handle_t *handle, struct inode *org_inode,
+			struct inode *dest_inode, pgoff_t from_page,
+			pgoff_t dest_from_page, pgoff_t count_page, int flag)
 {
-	handle_t *handle = NULL;
 	struct ext4_ext_path *org_path = NULL;
 	struct ext4_ext_path *dest_path = NULL;
 	struct ext4_extent *oext, *dext, *swap_ext;
@@ -1314,7 +1313,6 @@ ext4_ext_replace_branches(struct inode *org_inode, struct inode *dest_inode,
 	int err = 0;
 	int depth;
 	int replaced_count = 0;
-	unsigned jnum;
 
 	from = (ext4_lblk_t)from_page <<
 			(PAGE_CACHE_SHIFT - dest_inode->i_blkbits);
@@ -1322,12 +1320,6 @@ ext4_ext_replace_branches(struct inode *org_inode, struct inode *dest_inode,
 			(PAGE_CACHE_SHIFT - dest_inode->i_blkbits);
 	dest_off = (ext4_lblk_t)dest_from_page <<
 			(PAGE_CACHE_SHIFT - dest_inode->i_blkbits);
-	jnum = ext4_ext_writepage_trans_blocks(org_inode, count) + 3;
-	handle = ext4_journal_start(org_inode, jnum);
-	if (IS_ERR(handle)) {
-		err = PTR_ERR(handle);
-		goto out;
-	}
 
 	/* Get the original extent for the block "from" */
 	org_path = ext4_ext_find_extent(org_inode, from, NULL);
@@ -1455,8 +1447,6 @@ ext4_ext_replace_branches(struct inode *org_inode, struct inode *dest_inode,
 	}
 
 out:
-	if (handle)
-		ext4_journal_stop(handle);
 	if (org_path) {
 		ext4_ext_drop_refs(org_path);
 		kfree(org_path);
@@ -1686,9 +1676,22 @@ ext4_ext_defrag_partial(struct inode *tmp_inode, struct file *filp,
 	struct inode *inode = filp->f_dentry->d_inode;
 	struct address_space *mapping = inode->i_mapping;
 	struct page *page;
+	handle_t *handle;
 	pgoff_t offset_in_page = PAGE_SIZE;
+	int jblocks;
 	int ret = 0;
 
+	/*
+	 * It needs twice the amount of ordinary journal buffers because
+	 * inode and tmp_inode may change each different metadata blocks.
+	 */
+	jblocks = ext4_writepage_trans_blocks(inode) * 2;
+	handle = ext4_journal_start(inode, jblocks);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		return ret;
+	}
+
 	up_write(&EXT4_I(inode)->i_data_sem);
 	page = read_cache_page(inode->i_mapping, org_offset,
 		(filler_t *)inode->i_mapping->a_ops->readpage, NULL);
@@ -1713,8 +1716,8 @@ ext4_ext_defrag_partial(struct inode *tmp_inode, struct file *filp,
 
 	/* release old bh and drop refs */
 	try_to_release_page(page, 0);
-	ret = ext4_ext_replace_branches(inode, tmp_inode, org_offset,
-			dest_offset, 1, flag);
+	ret = ext4_ext_replace_branches(handle, inode, tmp_inode,
+					org_offset, dest_offset, 1, flag);
 	if (ret < 0)
 		goto ERR;
 
@@ -1744,6 +1747,7 @@ ext4_ext_defrag_partial(struct inode *tmp_inode, struct file *filp,
 ERR:
 	unlock_page(page);
 	page_cache_release(page);
+	ext4_journal_stop(handle);
 
 	return (ret < 0 ? ret : 0);
 }
@@ -1766,7 +1770,9 @@ ext4_ext_defrag_partial2(struct inode *tmp_inode, struct file *filp,
 	struct buffer_head *bh;
 	struct page *page;
 	const struct address_space_operations *a_ops = mapping->a_ops;
+	handle_t *handle;
 	pgoff_t offset_in_page = PAGE_SIZE;
+	int jblocks;
 	int ret = 0;
 	int blocksize = inode->i_sb->s_blocksize;
 	int blocks_per_page = 0;
@@ -1776,6 +1782,17 @@ ext4_ext_defrag_partial2(struct inode *tmp_inode, struct file *filp,
 	unsigned int w_flags = 0;
 	void *fsdata;
 
+	/*
+	 * It needs twice the amount of ordinary journal buffers because
+	 * inode and tmp_inode may change each different metadata blocks.
+	 */
+	jblocks = ext4_writepage_trans_blocks(inode) * 2;
+	handle = ext4_journal_start(inode, jblocks);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		return ret;
+	}
+
 	if (segment_eq(get_fs(), KERNEL_DS))
 		w_flags |= AOP_FLAG_UNINTERRUPTIBLE;
 
@@ -1815,8 +1832,8 @@ ext4_ext_defrag_partial2(struct inode *tmp_inode, struct file *filp,
 
 	/* release old bh and drop refs */
 	try_to_release_page(page, 0);
-	ret = ext4_ext_replace_branches(inode, tmp_inode, org_offset,
-			dest_offset, 1, flag);
+	ret = ext4_ext_replace_branches(handle, inode, tmp_inode,
+					org_offset, dest_offset, 1, flag);
 
 	if (ret < 0)
 		goto ERR;
@@ -1849,6 +1866,8 @@ ext4_ext_defrag_partial2(struct inode *tmp_inode, struct file *filp,
 	if (unlikely(ret < 0))
 		goto ERR;
 ERR:
+	ext4_journal_stop(handle);
+
 	return (ret < 0 ? ret : 0);
 }
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Reiser Filesystem Development]     [Ceph FS]     [Kernel Newbies]     [Security]     [Netfilter]     [Bugtraq]     [Linux FS]     [Yosemite National Park]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Device Mapper]     [Linux Media]

  Powered by Linux