Re: [PATCH v2] f2fs: rewrite the f2fs_gc flow

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Change log from v1:
 o Fix a deadlock bug

From 4dfd08780bc3e648de5470b53956256957e0885f Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk.kim@xxxxxxxxxxx>
Date: Mon, 17 Dec 2012 13:37:08 +0900
Subject: [PATCH 1/2] f2fs: rewrite the f2fs_gc flow

This patch rewrites the f2fs_gc flow based on the following policies.

- background gc
 1. set victim pages dirty
 2. triggers no additional GC

- on-demand gc
 1. freeze all the fs operations through block_operations()
 2. move victim pages explicitly
 3. do checkpoint to reclaim freed sections
 4. If there is not enough free sections, trigger GC again

Signed-off-by: Jaegeuk Kim <jaegeuk.kim@xxxxxxxxxxx>
---
 fs/f2fs/checkpoint.c |  23 ++++--
 fs/f2fs/data.c       |  24 ++----
 fs/f2fs/f2fs.h       |   4 +-
 fs/f2fs/gc.c         | 218
+++++++++++++++++----------------------------------
 fs/f2fs/gc.h         |   9 ---
 fs/f2fs/inode.c      |  16 +---
 fs/f2fs/segment.c    |   2 +-
 fs/f2fs/super.c      |   1 -
 8 files changed, 101 insertions(+), 196 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 6ef36c3..81e04e4 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -546,14 +546,9 @@ retry:
 /*
  * Freeze all the FS-operations for checkpoint.
  */
-void block_operations(struct f2fs_sb_info *sbi)
+void block_data_writes(struct f2fs_sb_info *sbi)
 {
 	int t;
-	struct writeback_control wbc = {
-		.sync_mode = WB_SYNC_ALL,
-		.nr_to_write = LONG_MAX,
-		.for_reclaim = 0,
-	};
 
 	/* Stop renaming operation */
 	mutex_lock_op(sbi, RENAME);
@@ -572,8 +567,15 @@ retry_dents:
 	/* block all the operations */
 	for (t = DATA_NEW; t <= NODE_TRUNC; t++)
 		mutex_lock_op(sbi, t);
+}
 
-	mutex_lock(&sbi->write_inode);
+void block_node_writes(struct f2fs_sb_info *sbi)
+{
+	struct writeback_control wbc = {
+		.sync_mode = WB_SYNC_ALL,
+		.nr_to_write = LONG_MAX,
+		.for_reclaim = 0,
+	};
 
 	/*
 	 * POR: we should ensure that there is no dirty node pages
@@ -588,7 +590,12 @@ retry:
 		mutex_unlock_op(sbi, NODE_WRITE);
 		goto retry;
 	}
-	mutex_unlock(&sbi->write_inode);
+}
+
+void block_operations(struct f2fs_sb_info *sbi)
+{
+	block_data_writes(sbi);
+	block_node_writes(sbi);
 }
 
 static void unblock_operations(struct f2fs_sb_info *sbi)
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 655aeab..3754104 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -589,33 +589,25 @@ static int f2fs_write_begin(struct file *file,
struct address_space *mapping,
 
 	f2fs_balance_fs(sbi);
 
-	page = grab_cache_page_write_begin(mapping, index, flags);
-	if (!page)
-		return -ENOMEM;
-	*pagep = page;
-
 	mutex_lock_op(sbi, DATA_NEW);
 
 	set_new_dnode(&dn, inode, NULL, NULL, 0);
 	err = get_dnode_of_data(&dn, index, 0);
 	if (err) {
 		mutex_unlock_op(sbi, DATA_NEW);
-		f2fs_put_page(page, 1);
 		return err;
 	}
-
-	if (dn.data_blkaddr == NULL_ADDR) {
+	if (dn.data_blkaddr == NULL_ADDR)
 		err = reserve_new_block(&dn);
-		if (err) {
-			f2fs_put_dnode(&dn);
-			mutex_unlock_op(sbi, DATA_NEW);
-			f2fs_put_page(page, 1);
-			return err;
-		}
-	}
 	f2fs_put_dnode(&dn);
-
 	mutex_unlock_op(sbi, DATA_NEW);
+	if (err)
+		return err;
+
+	page = grab_cache_page_write_begin(mapping, index, flags);
+	if (!page)
+		return -ENOMEM;
+	*pagep = page;
 
 	if ((len == PAGE_CACHE_SIZE) || PageUptodate(page))
 		return 0;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index a18d63d..1277f0f 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -962,6 +962,8 @@ void set_dirty_dir_page(struct inode *, struct page
*);
 void remove_dirty_dir_inode(struct inode *);
 void sync_dirty_dir_inodes(struct f2fs_sb_info *);
 void block_operations(struct f2fs_sb_info *);
+void block_data_writes(struct f2fs_sb_info *);
+void block_node_writes(struct f2fs_sb_info *);
 void write_checkpoint(struct f2fs_sb_info *, bool, bool);
 void init_orphan_info(struct f2fs_sb_info *);
 int create_checkpoint_caches(void);
@@ -984,7 +986,7 @@ int do_write_data_page(struct page *);
 int start_gc_thread(struct f2fs_sb_info *);
 void stop_gc_thread(struct f2fs_sb_info *);
 block_t start_bidx_of_node(unsigned int);
-int f2fs_gc(struct f2fs_sb_info *, int);
+int f2fs_gc(struct f2fs_sb_info *);
 void build_gc_manager(struct f2fs_sb_info *);
 int create_gc_caches(void);
 void destroy_gc_caches(void);
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 644aa38..b72e5e6 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -78,7 +78,7 @@ static int gc_thread_func(void *data)
 
 		sbi->bg_gc++;
 
-		if (f2fs_gc(sbi, 1) == GC_NONE)
+		if (f2fs_gc(sbi) == 0)
 			wait_ms = GC_THREAD_NOGC_SLEEP_TIME;
 		else if (wait_ms == GC_THREAD_NOGC_SLEEP_TIME)
 			wait_ms = GC_THREAD_MAX_SLEEP_TIME;
@@ -300,31 +300,23 @@ static const struct victim_selection default_v_ops
= {
 	.get_victim = get_victim_by_default,
 };
 
-static struct inode *find_gc_inode(nid_t ino, struct list_head *ilist)
+static struct inode *add_gc_inode(struct super_block *sb,
+				struct list_head *ilist, nid_t ino)
 {
 	struct list_head *this;
-	struct inode_entry *ie;
+	struct inode_entry *new_ie, *ie;
+	struct inode *inode;
 
 	list_for_each(this, ilist) {
 		ie = list_entry(this, struct inode_entry, list);
 		if (ie->inode->i_ino == ino)
 			return ie->inode;
 	}
-	return NULL;
-}
-
-static void add_gc_inode(struct inode *inode, struct list_head *ilist)
-{
-	struct list_head *this;
-	struct inode_entry *new_ie, *ie;
 
-	list_for_each(this, ilist) {
-		ie = list_entry(this, struct inode_entry, list);
-		if (ie->inode == inode) {
-			iput(inode);
-			return;
-		}
-	}
+	/* there is no inode in the list */
+	inode = f2fs_iget_nowait(sb, ino);
+	if (IS_ERR(inode))
+		return NULL;
 repeat:
 	new_ie = kmem_cache_alloc(winode_slab, GFP_NOFS);
 	if (!new_ie) {
@@ -333,6 +325,7 @@ repeat:
 	}
 	new_ie->inode = inode;
 	list_add_tail(&new_ie->list, ilist);
+	return inode;
 }
 
 static void put_gc_inode(struct list_head *ilist)
@@ -356,7 +349,7 @@ static int check_valid_map(struct f2fs_sb_info *sbi,
 	sentry = get_seg_entry(sbi, segno);
 	ret = f2fs_test_bit(offset, sentry->cur_valid_map);
 	mutex_unlock(&sit_i->sentry_lock);
-	return ret ? GC_OK : GC_NEXT;
+	return ret;
 }
 
 /*
@@ -364,8 +357,8 @@ static int check_valid_map(struct f2fs_sb_info *sbi,
  * On validity, copy that node with cold status, otherwise (invalid
node)
  * ignore that.
  */
-static int gc_node_segment(struct f2fs_sb_info *sbi,
-		struct f2fs_summary *sum, unsigned int segno, int gc_type)
+static void gc_node_segment(struct f2fs_sb_info *sbi,
+		struct f2fs_summary *sum, unsigned int segno)
 {
 	bool initial = true;
 	struct f2fs_summary *entry;
@@ -376,23 +369,8 @@ next_step:
 	for (off = 0; off < sbi->blocks_per_seg; off++, entry++) {
 		nid_t nid = le32_to_cpu(entry->nid);
 		struct page *node_page;
-		int err;
-
-		/*
-		 * It makes sure that free segments are able to write
-		 * all the dirty node pages before CP after this CP.
-		 * So let's check the space of dirty node pages.
-		 */
-		if (should_do_checkpoint(sbi)) {
-			mutex_lock(&sbi->cp_mutex);
-			block_operations(sbi);
-			return GC_BLOCKED;
-		}
 
-		err = check_valid_map(sbi, segno, off);
-		if (err == GC_ERROR)
-			return err;
-		else if (err == GC_NEXT)
+		if (!check_valid_map(sbi, segno, off))
 			continue;
 
 		if (initial) {
@@ -413,16 +391,6 @@ next_step:
 		initial = false;
 		goto next_step;
 	}
-
-	if (gc_type == FG_GC) {
-		struct writeback_control wbc = {
-			.sync_mode = WB_SYNC_ALL,
-			.nr_to_write = LONG_MAX,
-			.for_reclaim = 0,
-		};
-		sync_node_pages(sbi, 0, &wbc);
-	}
-	return GC_DONE;
 }
 
 /*
@@ -467,13 +435,13 @@ static int check_dnode(struct f2fs_sb_info *sbi,
struct f2fs_summary *sum,
 
 	node_page = get_node_page(sbi, nid);
 	if (IS_ERR(node_page))
-		return GC_NEXT;
+		return PTR_ERR(node_page);
 
 	get_node_info(sbi, nid, dni);
 
 	if (sum->version != dni->version) {
 		f2fs_put_page(node_page, 1);
-		return GC_NEXT;
+		return -EINVAL;
 	}
 
 	*nofs = ofs_of_node(node_page);
@@ -481,8 +449,8 @@ static int check_dnode(struct f2fs_sb_info *sbi,
struct f2fs_summary *sum,
 	f2fs_put_page(node_page, 1);
 
 	if (source_blkaddr != blkaddr)
-		return GC_NEXT;
-	return GC_OK;
+		return -EINVAL;
+	return 0;
 }
 
 static void move_data_page(struct inode *inode, struct page *page, int
gc_type)
@@ -501,7 +469,6 @@ static void move_data_page(struct inode *inode,
struct page *page, int gc_type)
 		set_cold_data(page);
 	} else {
 		struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
-		mutex_lock_op(sbi, DATA_WRITE);
 		if (clear_page_dirty_for_io(page) &&
 			S_ISDIR(inode->i_mode)) {
 			dec_page_count(sbi, F2FS_DIRTY_DENTS);
@@ -509,7 +476,6 @@ static void move_data_page(struct inode *inode,
struct page *page, int gc_type)
 		}
 		set_cold_data(page);
 		do_write_data_page(page);
-		mutex_unlock_op(sbi, DATA_WRITE);
 		clear_cold_data(page);
 	}
 out:
@@ -523,13 +489,12 @@ out:
  * If the parent node is not valid or the data block address is
different,
  * the victim data block is ignored.
  */
-static int gc_data_segment(struct f2fs_sb_info *sbi, struct
f2fs_summary *sum,
+static void gc_data_segment(struct f2fs_sb_info *sbi, struct
f2fs_summary *sum,
 		struct list_head *ilist, unsigned int segno, int gc_type)
 {
-	struct super_block *sb = sbi->sb;
 	struct f2fs_summary *entry;
 	block_t start_addr;
-	int err, off;
+	int off;
 	int phase = 0;
 
 	start_addr = START_BLOCK(sbi, segno);
@@ -540,25 +505,11 @@ next_step:
 		struct page *data_page;
 		struct inode *inode;
 		struct node_info dni; /* dnode info for the data */
-		unsigned int ofs_in_node, nofs;
+		unsigned int ofs_in_node;
+		unsigned int nofs = -1;
 		block_t start_bidx;
 
-		/*
-		 * It makes sure that free segments are able to write
-		 * all the dirty node pages before CP after this CP.
-		 * So let's check the space of dirty node pages.
-		 */
-		if (should_do_checkpoint(sbi)) {
-			mutex_lock(&sbi->cp_mutex);
-			block_operations(sbi);
-			err = GC_BLOCKED;
-			goto stop;
-		}
-
-		err = check_valid_map(sbi, segno, off);
-		if (err == GC_ERROR)
-			goto stop;
-		else if (err == GC_NEXT)
+		if (!check_valid_map(sbi, segno, off))
 			continue;
 
 		if (phase == 0) {
@@ -567,10 +518,7 @@ next_step:
 		}
 
 		/* Get an inode by ino with checking validity */
-		err = check_dnode(sbi, entry, &dni, start_addr + off, &nofs);
-		if (err == GC_ERROR)
-			goto stop;
-		else if (err == GC_NEXT)
+		if (check_dnode(sbi, entry, &dni, start_addr + off, &nofs))
 			continue;
 
 		if (phase == 1) {
@@ -582,39 +530,25 @@ next_step:
 		ofs_in_node = le16_to_cpu(entry->ofs_in_node);
 
 		if (phase == 2) {
-			inode = f2fs_iget_nowait(sb, dni.ino);
-			if (IS_ERR(inode))
+			inode = add_gc_inode(sbi->sb, ilist, dni.ino);
+			if (!inode)
 				continue;
 
 			data_page = find_data_page(inode,
 					start_bidx + ofs_in_node);
 			if (IS_ERR(data_page))
-				goto next_iput;
-
-			f2fs_put_page(data_page, 0);
-			add_gc_inode(inode, ilist);
-		} else {
-			inode = find_gc_inode(dni.ino, ilist);
-			if (inode) {
-				data_page = get_lock_data_page(inode,
-						start_bidx + ofs_in_node);
-				if (IS_ERR(data_page))
-					continue;
+				continue;
+
+			if (trylock_page(data_page)) {
 				move_data_page(inode, data_page, gc_type);
 				stat_inc_data_blk_count(sbi, 1);
+			} else {
+				f2fs_put_page(data_page, 0);
 			}
 		}
-		continue;
-next_iput:
-		iput(inode);
 	}
-	if (++phase < 4)
+	if (++phase < 3)
 		goto next_step;
-	err = GC_DONE;
-stop:
-	if (gc_type == FG_GC)
-		f2fs_submit_bio(sbi, DATA, true);
-	return err;
 }
 
 static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
@@ -633,12 +567,11 @@ static int do_garbage_collect(struct f2fs_sb_info
*sbi, unsigned int segno,
 {
 	struct page *sum_page;
 	struct f2fs_summary_block *sum;
-	int ret = GC_DONE;
 
 	/* read segment summary of victim */
 	sum_page = get_sum_page(sbi, segno);
 	if (IS_ERR(sum_page))
-		return GC_ERROR;
+		return PTR_ERR(sum_page);
 
 	/*
 	 * CP needs to lock sum_page. In this time, we don't need
@@ -650,76 +583,67 @@ static int do_garbage_collect(struct f2fs_sb_info
*sbi, unsigned int segno,
 
 	switch (GET_SUM_TYPE((&sum->footer))) {
 	case SUM_TYPE_NODE:
-		ret = gc_node_segment(sbi, sum->entries, segno, gc_type);
+		gc_node_segment(sbi, sum->entries, segno);
 		break;
 	case SUM_TYPE_DATA:
-		ret = gc_data_segment(sbi, sum->entries, ilist, segno, gc_type);
+		gc_data_segment(sbi, sum->entries, ilist, segno, gc_type);
 		break;
 	}
 	stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer)));
 	stat_inc_call_count(sbi->stat_info);
 
 	f2fs_put_page(sum_page, 0);
-	return ret;
+	return 0;
 }
 
-int f2fs_gc(struct f2fs_sb_info *sbi, int nGC)
+int f2fs_gc(struct f2fs_sb_info *sbi)
 {
-	unsigned int segno;
-	int old_free_secs, cur_free_secs;
-	int gc_status, nfree;
+	unsigned int i, segno;
 	struct list_head ilist;
-	int gc_type = BG_GC;
+	int nfree = 0, gc_type = BG_GC;
 
-	INIT_LIST_HEAD(&ilist);
-gc_more:
-	nfree = 0;
-	gc_status = GC_NONE;
+	if (sbi->sb->s_flags & MS_RDONLY)
+		goto out;
 
-	if (has_not_enough_free_secs(sbi))
-		old_free_secs = reserved_sections(sbi);
-	else
-		old_free_secs = free_sections(sbi);
+	INIT_LIST_HEAD(&ilist);
+do_more:
+	/* Phase 1: initialize conditions */
+	if (has_not_enough_free_secs(sbi)) {
+		gc_type = FG_GC;
 
-	while (sbi->sb->s_flags & MS_ACTIVE) {
-		int i;
-		if (has_not_enough_free_secs(sbi))
-			gc_type = FG_GC;
+		/*
+		 * It makes sure that free segments are able to write
+		 * all the dirty node pages before CP after this CP.
+		 * So let's check the space of dirty node pages.
+		 */
+		mutex_lock(&sbi->cp_mutex);
+		block_data_writes(sbi);
+	}
 
-		cur_free_secs = free_sections(sbi) + nfree;
+	/* Phase 2: do GC */
+	if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE))
+		goto stop;
 
-		/* We got free space successfully. */
-		if (nGC < cur_free_secs - old_free_secs)
-			break;
+	for (i = 0; i < sbi->segs_per_sec; i++)
+		if (do_garbage_collect(sbi, segno + i, &ilist, gc_type))
+			goto stop;
 
-		if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE))
-			break;
+	if (gc_type == FG_GC)
+		block_node_writes(sbi);
 
-		for (i = 0; i < sbi->segs_per_sec; i++) {
-			/*
-			 * do_garbage_collect will give us three gc_status:
-			 * GC_ERROR, GC_DONE, and GC_BLOCKED.
-			 * If GC is finished uncleanly, we have to return
-			 * the victim to dirty segment list.
-			 */
-			gc_status = do_garbage_collect(sbi, segno + i,
-					&ilist, gc_type);
-			if (gc_status != GC_DONE)
-				goto stop;
-			nfree++;
-		}
-	}
+	nfree++;
 stop:
-	if (has_not_enough_free_secs(sbi) || gc_status == GC_BLOCKED) {
-		write_checkpoint(sbi, (gc_status == GC_BLOCKED), false);
-		if (nfree)
-			goto gc_more;
+	/* Phase 3: finalize GC */
+	if (gc_type == FG_GC) {
+		write_checkpoint(sbi, true, false);
+		if (has_not_enough_free_secs(sbi))
+			goto do_more;
 	}
-	mutex_unlock(&sbi->gc_mutex);
 
 	put_gc_inode(&ilist);
-	BUG_ON(!list_empty(&ilist));
-	return gc_status;
+out:
+	mutex_unlock(&sbi->gc_mutex);
+	return nfree;
 }
 
 void build_gc_manager(struct f2fs_sb_info *sbi)
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
index b026d93..0513adf 100644
--- a/fs/f2fs/gc.h
+++ b/fs/f2fs/gc.h
@@ -23,15 +23,6 @@
 /* Search max. number of dirty segments to select a victim segment */
 #define MAX_VICTIM_SEARCH	20
 
-enum {
-	GC_NONE = 0,
-	GC_ERROR,
-	GC_OK,
-	GC_NEXT,
-	GC_BLOCKED,
-	GC_DONE,
-};
-
 struct f2fs_gc_kthread {
 	struct task_struct *f2fs_gc_task;
 	wait_queue_head_t gc_wait_queue_head;
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index bf20b4d..3dac587 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -211,30 +211,20 @@ int f2fs_write_inode(struct inode *inode, struct
writeback_control *wbc)
 {
 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
 	struct page *node_page;
-	bool need_lock = false;
 
 	if (inode->i_ino == F2FS_NODE_INO(sbi) ||
 			inode->i_ino == F2FS_META_INO(sbi))
 		return 0;
 
+	if (wbc)
+		f2fs_balance_fs(sbi);
+
 	node_page = get_node_page(sbi, inode->i_ino);
 	if (IS_ERR(node_page))
 		return PTR_ERR(node_page);
 
-	if (!PageDirty(node_page)) {
-		need_lock = true;
-		f2fs_put_page(node_page, 1);
-		mutex_lock(&sbi->write_inode);
-		node_page = get_node_page(sbi, inode->i_ino);
-		if (IS_ERR(node_page)) {
-			mutex_unlock(&sbi->write_inode);
-			return PTR_ERR(node_page);
-		}
-	}
 	update_inode(inode, node_page);
 	f2fs_put_page(node_page, 1);
-	if (need_lock)
-		mutex_unlock(&sbi->write_inode);
 	return 0;
 }
 
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 8bc1b6f..3286bc1 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -62,7 +62,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi)
 
 	if (has_not_enough_free_secs(sbi)) {
 		mutex_lock(&sbi->gc_mutex);
-		f2fs_gc(sbi, 1);
+		f2fs_gc(sbi);
 	}
 }
 
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 50240d2..6e3f57d 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -465,7 +465,6 @@ static int f2fs_fill_super(struct super_block *sb,
void *data, int silent)
 	sbi->raw_super = raw_super;
 	sbi->raw_super_buf = raw_super_buf;
 	mutex_init(&sbi->gc_mutex);
-	mutex_init(&sbi->write_inode);
 	mutex_init(&sbi->writepages);
 	mutex_init(&sbi->cp_mutex);
 	for (i = 0; i < NR_LOCK_TYPE; i++)
-- 
1.8.0.1.250.gb7973fb




-- 
Jaegeuk Kim
Samsung

Attachment: signature.asc
Description: This is a digitally signed message part


[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux