Change log from v1: o Fix a deadlock bug From 4dfd08780bc3e648de5470b53956256957e0885f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim <jaegeuk.kim@xxxxxxxxxxx> Date: Mon, 17 Dec 2012 13:37:08 +0900 Subject: [PATCH 1/2] f2fs: rewrite the f2fs_gc flow This patch rewrites the f2fs_gc flow based on the following policies. - background gc 1. set victim pages dirty 2. triggers no additional GC - on-demand gc 1. freeze all the fs operations through block_operations() 2. move victim pages explicitly 3. do checkpoint to reclaim freed sections 4. If there is not enough free sections, trigger GC again Signed-off-by: Jaegeuk Kim <jaegeuk.kim@xxxxxxxxxxx> --- fs/f2fs/checkpoint.c | 23 ++++-- fs/f2fs/data.c | 24 ++---- fs/f2fs/f2fs.h | 4 +- fs/f2fs/gc.c | 218 +++++++++++++++++---------------------------------- fs/f2fs/gc.h | 9 --- fs/f2fs/inode.c | 16 +--- fs/f2fs/segment.c | 2 +- fs/f2fs/super.c | 1 - 8 files changed, 101 insertions(+), 196 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 6ef36c3..81e04e4 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -546,14 +546,9 @@ retry: /* * Freeze all the FS-operations for checkpoint. */ -void block_operations(struct f2fs_sb_info *sbi) +void block_data_writes(struct f2fs_sb_info *sbi) { int t; - struct writeback_control wbc = { - .sync_mode = WB_SYNC_ALL, - .nr_to_write = LONG_MAX, - .for_reclaim = 0, - }; /* Stop renaming operation */ mutex_lock_op(sbi, RENAME); @@ -572,8 +567,15 @@ retry_dents: /* block all the operations */ for (t = DATA_NEW; t <= NODE_TRUNC; t++) mutex_lock_op(sbi, t); +} - mutex_lock(&sbi->write_inode); +void block_node_writes(struct f2fs_sb_info *sbi) +{ + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL, + .nr_to_write = LONG_MAX, + .for_reclaim = 0, + }; /* * POR: we should ensure that there is no dirty node pages @@ -588,7 +590,12 @@ retry: mutex_unlock_op(sbi, NODE_WRITE); goto retry; } - mutex_unlock(&sbi->write_inode); +} + +void block_operations(struct f2fs_sb_info *sbi) +{ + block_data_writes(sbi); + block_node_writes(sbi); } static void unblock_operations(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 655aeab..3754104 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -589,33 +589,25 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, f2fs_balance_fs(sbi); - page = grab_cache_page_write_begin(mapping, index, flags); - if (!page) - return -ENOMEM; - *pagep = page; - mutex_lock_op(sbi, DATA_NEW); set_new_dnode(&dn, inode, NULL, NULL, 0); err = get_dnode_of_data(&dn, index, 0); if (err) { mutex_unlock_op(sbi, DATA_NEW); - f2fs_put_page(page, 1); return err; } - - if (dn.data_blkaddr == NULL_ADDR) { + if (dn.data_blkaddr == NULL_ADDR) err = reserve_new_block(&dn); - if (err) { - f2fs_put_dnode(&dn); - mutex_unlock_op(sbi, DATA_NEW); - f2fs_put_page(page, 1); - return err; - } - } f2fs_put_dnode(&dn); - mutex_unlock_op(sbi, DATA_NEW); + if (err) + return err; + + page = grab_cache_page_write_begin(mapping, index, flags); + if (!page) + return -ENOMEM; + *pagep = page; if ((len == PAGE_CACHE_SIZE) || PageUptodate(page)) return 0; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index a18d63d..1277f0f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -962,6 +962,8 @@ void set_dirty_dir_page(struct inode *, struct page *); void remove_dirty_dir_inode(struct inode *); void sync_dirty_dir_inodes(struct f2fs_sb_info *); void block_operations(struct f2fs_sb_info *); +void block_data_writes(struct f2fs_sb_info *); +void block_node_writes(struct f2fs_sb_info *); void write_checkpoint(struct f2fs_sb_info *, bool, bool); void init_orphan_info(struct f2fs_sb_info *); int create_checkpoint_caches(void); @@ -984,7 +986,7 @@ int do_write_data_page(struct page *); int start_gc_thread(struct f2fs_sb_info *); void stop_gc_thread(struct f2fs_sb_info *); block_t start_bidx_of_node(unsigned int); -int f2fs_gc(struct f2fs_sb_info *, int); +int f2fs_gc(struct f2fs_sb_info *); void build_gc_manager(struct f2fs_sb_info *); int create_gc_caches(void); void destroy_gc_caches(void); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 644aa38..b72e5e6 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -78,7 +78,7 @@ static int gc_thread_func(void *data) sbi->bg_gc++; - if (f2fs_gc(sbi, 1) == GC_NONE) + if (f2fs_gc(sbi) == 0) wait_ms = GC_THREAD_NOGC_SLEEP_TIME; else if (wait_ms == GC_THREAD_NOGC_SLEEP_TIME) wait_ms = GC_THREAD_MAX_SLEEP_TIME; @@ -300,31 +300,23 @@ static const struct victim_selection default_v_ops = { .get_victim = get_victim_by_default, }; -static struct inode *find_gc_inode(nid_t ino, struct list_head *ilist) +static struct inode *add_gc_inode(struct super_block *sb, + struct list_head *ilist, nid_t ino) { struct list_head *this; - struct inode_entry *ie; + struct inode_entry *new_ie, *ie; + struct inode *inode; list_for_each(this, ilist) { ie = list_entry(this, struct inode_entry, list); if (ie->inode->i_ino == ino) return ie->inode; } - return NULL; -} - -static void add_gc_inode(struct inode *inode, struct list_head *ilist) -{ - struct list_head *this; - struct inode_entry *new_ie, *ie; - list_for_each(this, ilist) { - ie = list_entry(this, struct inode_entry, list); - if (ie->inode == inode) { - iput(inode); - return; - } - } + /* there is no inode in the list */ + inode = f2fs_iget_nowait(sb, ino); + if (IS_ERR(inode)) + return NULL; repeat: new_ie = kmem_cache_alloc(winode_slab, GFP_NOFS); if (!new_ie) { @@ -333,6 +325,7 @@ repeat: } new_ie->inode = inode; list_add_tail(&new_ie->list, ilist); + return inode; } static void put_gc_inode(struct list_head *ilist) @@ -356,7 +349,7 @@ static int check_valid_map(struct f2fs_sb_info *sbi, sentry = get_seg_entry(sbi, segno); ret = f2fs_test_bit(offset, sentry->cur_valid_map); mutex_unlock(&sit_i->sentry_lock); - return ret ? GC_OK : GC_NEXT; + return ret; } /* @@ -364,8 +357,8 @@ static int check_valid_map(struct f2fs_sb_info *sbi, * On validity, copy that node with cold status, otherwise (invalid node) * ignore that. */ -static int gc_node_segment(struct f2fs_sb_info *sbi, - struct f2fs_summary *sum, unsigned int segno, int gc_type) +static void gc_node_segment(struct f2fs_sb_info *sbi, + struct f2fs_summary *sum, unsigned int segno) { bool initial = true; struct f2fs_summary *entry; @@ -376,23 +369,8 @@ next_step: for (off = 0; off < sbi->blocks_per_seg; off++, entry++) { nid_t nid = le32_to_cpu(entry->nid); struct page *node_page; - int err; - - /* - * It makes sure that free segments are able to write - * all the dirty node pages before CP after this CP. - * So let's check the space of dirty node pages. - */ - if (should_do_checkpoint(sbi)) { - mutex_lock(&sbi->cp_mutex); - block_operations(sbi); - return GC_BLOCKED; - } - err = check_valid_map(sbi, segno, off); - if (err == GC_ERROR) - return err; - else if (err == GC_NEXT) + if (!check_valid_map(sbi, segno, off)) continue; if (initial) { @@ -413,16 +391,6 @@ next_step: initial = false; goto next_step; } - - if (gc_type == FG_GC) { - struct writeback_control wbc = { - .sync_mode = WB_SYNC_ALL, - .nr_to_write = LONG_MAX, - .for_reclaim = 0, - }; - sync_node_pages(sbi, 0, &wbc); - } - return GC_DONE; } /* @@ -467,13 +435,13 @@ static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, node_page = get_node_page(sbi, nid); if (IS_ERR(node_page)) - return GC_NEXT; + return PTR_ERR(node_page); get_node_info(sbi, nid, dni); if (sum->version != dni->version) { f2fs_put_page(node_page, 1); - return GC_NEXT; + return -EINVAL; } *nofs = ofs_of_node(node_page); @@ -481,8 +449,8 @@ static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, f2fs_put_page(node_page, 1); if (source_blkaddr != blkaddr) - return GC_NEXT; - return GC_OK; + return -EINVAL; + return 0; } static void move_data_page(struct inode *inode, struct page *page, int gc_type) @@ -501,7 +469,6 @@ static void move_data_page(struct inode *inode, struct page *page, int gc_type) set_cold_data(page); } else { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - mutex_lock_op(sbi, DATA_WRITE); if (clear_page_dirty_for_io(page) && S_ISDIR(inode->i_mode)) { dec_page_count(sbi, F2FS_DIRTY_DENTS); @@ -509,7 +476,6 @@ static void move_data_page(struct inode *inode, struct page *page, int gc_type) } set_cold_data(page); do_write_data_page(page); - mutex_unlock_op(sbi, DATA_WRITE); clear_cold_data(page); } out: @@ -523,13 +489,12 @@ out: * If the parent node is not valid or the data block address is different, * the victim data block is ignored. */ -static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, +static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, struct list_head *ilist, unsigned int segno, int gc_type) { - struct super_block *sb = sbi->sb; struct f2fs_summary *entry; block_t start_addr; - int err, off; + int off; int phase = 0; start_addr = START_BLOCK(sbi, segno); @@ -540,25 +505,11 @@ next_step: struct page *data_page; struct inode *inode; struct node_info dni; /* dnode info for the data */ - unsigned int ofs_in_node, nofs; + unsigned int ofs_in_node; + unsigned int nofs = -1; block_t start_bidx; - /* - * It makes sure that free segments are able to write - * all the dirty node pages before CP after this CP. - * So let's check the space of dirty node pages. - */ - if (should_do_checkpoint(sbi)) { - mutex_lock(&sbi->cp_mutex); - block_operations(sbi); - err = GC_BLOCKED; - goto stop; - } - - err = check_valid_map(sbi, segno, off); - if (err == GC_ERROR) - goto stop; - else if (err == GC_NEXT) + if (!check_valid_map(sbi, segno, off)) continue; if (phase == 0) { @@ -567,10 +518,7 @@ next_step: } /* Get an inode by ino with checking validity */ - err = check_dnode(sbi, entry, &dni, start_addr + off, &nofs); - if (err == GC_ERROR) - goto stop; - else if (err == GC_NEXT) + if (check_dnode(sbi, entry, &dni, start_addr + off, &nofs)) continue; if (phase == 1) { @@ -582,39 +530,25 @@ next_step: ofs_in_node = le16_to_cpu(entry->ofs_in_node); if (phase == 2) { - inode = f2fs_iget_nowait(sb, dni.ino); - if (IS_ERR(inode)) + inode = add_gc_inode(sbi->sb, ilist, dni.ino); + if (!inode) continue; data_page = find_data_page(inode, start_bidx + ofs_in_node); if (IS_ERR(data_page)) - goto next_iput; - - f2fs_put_page(data_page, 0); - add_gc_inode(inode, ilist); - } else { - inode = find_gc_inode(dni.ino, ilist); - if (inode) { - data_page = get_lock_data_page(inode, - start_bidx + ofs_in_node); - if (IS_ERR(data_page)) - continue; + continue; + + if (trylock_page(data_page)) { move_data_page(inode, data_page, gc_type); stat_inc_data_blk_count(sbi, 1); + } else { + f2fs_put_page(data_page, 0); } } - continue; -next_iput: - iput(inode); } - if (++phase < 4) + if (++phase < 3) goto next_step; - err = GC_DONE; -stop: - if (gc_type == FG_GC) - f2fs_submit_bio(sbi, DATA, true); - return err; } static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, @@ -633,12 +567,11 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, { struct page *sum_page; struct f2fs_summary_block *sum; - int ret = GC_DONE; /* read segment summary of victim */ sum_page = get_sum_page(sbi, segno); if (IS_ERR(sum_page)) - return GC_ERROR; + return PTR_ERR(sum_page); /* * CP needs to lock sum_page. In this time, we don't need @@ -650,76 +583,67 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, switch (GET_SUM_TYPE((&sum->footer))) { case SUM_TYPE_NODE: - ret = gc_node_segment(sbi, sum->entries, segno, gc_type); + gc_node_segment(sbi, sum->entries, segno); break; case SUM_TYPE_DATA: - ret = gc_data_segment(sbi, sum->entries, ilist, segno, gc_type); + gc_data_segment(sbi, sum->entries, ilist, segno, gc_type); break; } stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer))); stat_inc_call_count(sbi->stat_info); f2fs_put_page(sum_page, 0); - return ret; + return 0; } -int f2fs_gc(struct f2fs_sb_info *sbi, int nGC) +int f2fs_gc(struct f2fs_sb_info *sbi) { - unsigned int segno; - int old_free_secs, cur_free_secs; - int gc_status, nfree; + unsigned int i, segno; struct list_head ilist; - int gc_type = BG_GC; + int nfree = 0, gc_type = BG_GC; - INIT_LIST_HEAD(&ilist); -gc_more: - nfree = 0; - gc_status = GC_NONE; + if (sbi->sb->s_flags & MS_RDONLY) + goto out; - if (has_not_enough_free_secs(sbi)) - old_free_secs = reserved_sections(sbi); - else - old_free_secs = free_sections(sbi); + INIT_LIST_HEAD(&ilist); +do_more: + /* Phase 1: initialize conditions */ + if (has_not_enough_free_secs(sbi)) { + gc_type = FG_GC; - while (sbi->sb->s_flags & MS_ACTIVE) { - int i; - if (has_not_enough_free_secs(sbi)) - gc_type = FG_GC; + /* + * It makes sure that free segments are able to write + * all the dirty node pages before CP after this CP. + * So let's check the space of dirty node pages. + */ + mutex_lock(&sbi->cp_mutex); + block_data_writes(sbi); + } - cur_free_secs = free_sections(sbi) + nfree; + /* Phase 2: do GC */ + if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE)) + goto stop; - /* We got free space successfully. */ - if (nGC < cur_free_secs - old_free_secs) - break; + for (i = 0; i < sbi->segs_per_sec; i++) + if (do_garbage_collect(sbi, segno + i, &ilist, gc_type)) + goto stop; - if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE)) - break; + if (gc_type == FG_GC) + block_node_writes(sbi); - for (i = 0; i < sbi->segs_per_sec; i++) { - /* - * do_garbage_collect will give us three gc_status: - * GC_ERROR, GC_DONE, and GC_BLOCKED. - * If GC is finished uncleanly, we have to return - * the victim to dirty segment list. - */ - gc_status = do_garbage_collect(sbi, segno + i, - &ilist, gc_type); - if (gc_status != GC_DONE) - goto stop; - nfree++; - } - } + nfree++; stop: - if (has_not_enough_free_secs(sbi) || gc_status == GC_BLOCKED) { - write_checkpoint(sbi, (gc_status == GC_BLOCKED), false); - if (nfree) - goto gc_more; + /* Phase 3: finalize GC */ + if (gc_type == FG_GC) { + write_checkpoint(sbi, true, false); + if (has_not_enough_free_secs(sbi)) + goto do_more; } - mutex_unlock(&sbi->gc_mutex); put_gc_inode(&ilist); - BUG_ON(!list_empty(&ilist)); - return gc_status; +out: + mutex_unlock(&sbi->gc_mutex); + return nfree; } void build_gc_manager(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index b026d93..0513adf 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -23,15 +23,6 @@ /* Search max. number of dirty segments to select a victim segment */ #define MAX_VICTIM_SEARCH 20 -enum { - GC_NONE = 0, - GC_ERROR, - GC_OK, - GC_NEXT, - GC_BLOCKED, - GC_DONE, -}; - struct f2fs_gc_kthread { struct task_struct *f2fs_gc_task; wait_queue_head_t gc_wait_queue_head; diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index bf20b4d..3dac587 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -211,30 +211,20 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); struct page *node_page; - bool need_lock = false; if (inode->i_ino == F2FS_NODE_INO(sbi) || inode->i_ino == F2FS_META_INO(sbi)) return 0; + if (wbc) + f2fs_balance_fs(sbi); + node_page = get_node_page(sbi, inode->i_ino); if (IS_ERR(node_page)) return PTR_ERR(node_page); - if (!PageDirty(node_page)) { - need_lock = true; - f2fs_put_page(node_page, 1); - mutex_lock(&sbi->write_inode); - node_page = get_node_page(sbi, inode->i_ino); - if (IS_ERR(node_page)) { - mutex_unlock(&sbi->write_inode); - return PTR_ERR(node_page); - } - } update_inode(inode, node_page); f2fs_put_page(node_page, 1); - if (need_lock) - mutex_unlock(&sbi->write_inode); return 0; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 8bc1b6f..3286bc1 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -62,7 +62,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi) if (has_not_enough_free_secs(sbi)) { mutex_lock(&sbi->gc_mutex); - f2fs_gc(sbi, 1); + f2fs_gc(sbi); } } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 50240d2..6e3f57d 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -465,7 +465,6 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) sbi->raw_super = raw_super; sbi->raw_super_buf = raw_super_buf; mutex_init(&sbi->gc_mutex); - mutex_init(&sbi->write_inode); mutex_init(&sbi->writepages); mutex_init(&sbi->cp_mutex); for (i = 0; i < NR_LOCK_TYPE; i++) -- 1.8.0.1.250.gb7973fb -- Jaegeuk Kim Samsung
Attachment:
signature.asc
Description: This is a digitally signed message part