Since f2fs mount process should scanning all valid SIT entries and reserve information in memory for following operations during mount process, the mount performance is worse than ext4 in embedded devices. We found a way to improve the mount performance based on current f2fs design strategy. For the tests on Galaxy SIII, the mount performance can be improved by 20% ~ 30%. Considering following items: 1. The maximum count of sit journal entries reserved in current CURSEG_COLD_DATA segment information is 6 (SIT_JOURNAL_ENTRIES), that means, the actual journal entries are no more than 6; 2. Each block in SIT area can contain 55 entries (SIT_ENTRY_PER_BLOCK). Because there are no more than 6 journal entries in checkpoint area, most sit entries is achieved from SIT#0 or SIT#1 and all the valid sit pages are read out for organizing all sit entries in memory. 3. Mostly the valid sit blocks exist in SIT#0 or SIT#1 continuously. 4. Read multiple continuous pages within one bio is faster than read page one by one in multiple bio. Thinking about the items above, we tried to read multiple continues pages within one bio for build sit entries in memory. Following is current design of mount function build_sit_entries: 1. Cycle from first segment to final segment; 2. Scan all checkpoint journal entries, if the segment number is the same as current cycle segment number, read sit and reserve in memory and go to step 1; otherwise, continue with step 3; 3. Read one meta page from SIT#0 or SIT#1 considering current valid meta page bitmap and reserve sit information in memory, go to step 1; We change the design of build_sit_entries as: 1. Create a page_array with maximum size as max_hw_blocks(sbi) (one page array can contain maximum size of pages). 2. Cycle from first SIT entry block to final SIT entry block. 3. ra_sit_pages: read multiple continuous sit pages. If a) reached maximum size of page_array or b) sit blocks are converted from SIT#0 to SIT#1 or from SIT#1 to SIT#0, return to build_sit_entries; (that means, try to read continuous pages in SIT#0 or SIT#1 within one bio) 4. get pages that is read previously one by one, and reserve sit entry information in memory; go to step 2; 5. After all valid sit entries in SIT#0 or SIT#1 are reserved in memory, free page_array, scanning all journal sit entries in checkpoint area and cover the information to memory sit entries (sit_i->sentries). One more optimization is, considering most sit entries contain totally valid blocks or totally invalid blocks in one page because of f2fs allocation and garbage collection strategy, we changed the check function check_block_count for sit entry: Here is our temp patch base on f2fs of linux-next: Signed-off-by: Tan Shu <shu.tan@xxxxxxxxxxx> Reviewed-by: Li Fan < fanofcode.li@xxxxxxxxxxx> Reviewed-by: Yu Chao <chao2.yu@xxxxxxxxxxx> --- fs/f2fs/data.c | 2 +- fs/f2fs/f2fs.h | 1 + fs/f2fs/segment.c | 211 +++++++++++++++++++++++++++++++++++++++++++++-------- fs/f2fs/segment.h | 18 +++++ 4 files changed, 202 insertions(+), 30 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c old mode 100644 new mode 100755 index 2c02ec8..7d8e9f6 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -355,7 +355,7 @@ repeat: return page; } -static void read_end_io(struct bio *bio, int err) +void read_end_io(struct bio *bio, int err) { const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h old mode 100644 new mode 100755 index 7fd99d8..9f3a784 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1117,6 +1117,7 @@ struct page *get_lock_data_page(struct inode *, pgoff_t); struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); int f2fs_readpage(struct f2fs_sb_info *, struct page *, block_t, int); int do_write_data_page(struct page *); +void read_end_io(struct bio *bio, int err); /* * gc.c diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c old mode 100644 new mode 100755 index bd79bbe..971838d --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -14,6 +14,9 @@ #include <linux/blkdev.h> #include <linux/prefetch.h> #include <linux/vmalloc.h> +#include <linux/mpage.h> +#include <linux/pagevec.h> +#include <linux/swap.h> #include "f2fs.h" #include "segment.h" @@ -1210,21 +1213,108 @@ int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type, } return -1; } - -static struct page *get_current_sit_page(struct f2fs_sb_info *sbi, - unsigned int segno) +static void ra_sit_pages(struct f2fs_sb_info *sbi, + struct page** page_array, + int array_size, + unsigned int start, + unsigned int* next, + unsigned int* base) { + struct address_space *mapping = sbi->meta_inode->i_mapping; struct sit_info *sit_i = SIT_I(sbi); - unsigned int offset = SIT_BLOCK_OFFSET(sit_i, segno); - block_t blk_addr = sit_i->sit_base_addr + offset; - - check_seg_range(sbi, segno); + block_t blk_addr = sit_i->sit_base_addr; + unsigned int sit_blk_cnt = (TOTAL_SEGS(sbi) + SIT_ENTRY_PER_BLOCK - 1)/SIT_ENTRY_PER_BLOCK; + unsigned int end = sit_blk_cnt; + struct block_device *bdev = sbi->sb->s_bdev; + struct bio *bio = NULL; + struct blk_plug plug; + int writecnt = 0; + int sit_blkaddr; + int i; + unsigned int start_idx = 0, end_idx; - /* calculate sit block address */ - if (f2fs_test_bit(offset, sit_i->sit_bitmap)) + if (f2fs_test_bit(start, sit_i->sit_bitmap)) { blk_addr += sit_i->sit_blocks; + for (i = start + 1; i < sit_blk_cnt; i++) { + if (((i - start) == array_size) || (!f2fs_test_bit(i, sit_i->sit_bitmap))){ + end = i; + break; + } + } + } + else { + for (i = start + 1; i < sit_blk_cnt; i++){ + if (((i - start) == array_size) || (f2fs_test_bit(i, sit_i->sit_bitmap))){ + end = i; + break; + } + } + } + + *next = end; + *base = blk_addr; + + blk_start_plug(&plug); + down_read(&sbi->bio_sem); + + for (i = 0; i < end - start; i++) { + sit_blkaddr = blk_addr + start + i; +repeat: + page_array[i] = grab_cache_page(mapping, sit_blkaddr); + if (!page_array[i]) { + cond_resched(); + goto repeat; + } - return get_meta_page(sbi, blk_addr); + if (PageUptodate(page_array[i])) { + /*Actually, this should not happen. But we add codes for confirmation*/ + f2fs_put_page(page_array[i], 1); + page_array[i] = NULL; + if (writecnt != 0){ + submit_bio(READ_SYNC, bio); + writecnt = 0; + } + continue; + } + + if (writecnt == 0){ + /* If writecnt is zero, we should allocate a new bio for submit */ + bio = f2fs_bio_alloc(bdev, array_size); + bio->bi_sector = SECTOR_FROM_BLOCK(sbi, sit_blkaddr); + bio->bi_end_io = read_end_io; + start_idx = i; + } + + if (bio_add_page(bio, page_array[i], PAGE_CACHE_SIZE, 0) < + PAGE_CACHE_S IZE) { + kfree(bio->bi_private); + bio_put(bio); + up_read(&sbi->bio_sem); + blk_finish_plug(&plug); + /*Here we should put page from start_idx to end_idx. + For the pages in previous submitted bio, we can ignore them.*/ + end_idx = i; + goto exit; + } + writecnt++; + } + + if (writecnt) + submit_bio(READ_SYNC, bio); + + up_read(&sbi->bio_sem); + blk_finish_plug(&plug); + + return; + +exit: /*ATTENTIONS: If failed, build_sit_entries will check and call get_meta_page later*/ + for (i = start_idx; i <= end_idx; i++){ + if (page_array[i]){ + f2fs_put_page(page_array[i], 1); + page_array[i] = NULL; + } + } + return; } static struct page *get_next_sit_page(struct f2fs_sb_info *sbi, @@ -1481,36 +1571,99 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) struct sit_info *sit_i = SIT_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); struct f2fs_summary_block *sum = curseg->sum_blk; - unsigned int start; - - for (start = 0; start < TOTAL_SEGS(sbi); start++) { - struct seg_entry *se = &sit_i->sentries[start]; + unsigned int start_sitblk = 0; + unsigned int next_sitblk = 0; + unsigned int base_sitblk = 0; struct f2fs_sit_block *sit_blk; struct f2fs_sit_entry sit; - struct page *page; - int i; + struct seg_entry *se; + unsigned int i, j; + struct page** page_array = NULL; + int array_size; + unsigned int sit_blk_cnt = (TOTAL_SEGS(sbi) + SIT_ENTRY_PER_BLOCK - 1)/SIT_ENTRY_PER_BLOCK; + struct address_space *mapping = sbi->meta_inode->i_mapping; + + array_size = max_hw_blocks(sbi); + page_array = (struct page**)vzalloc(sizeof(struct page*) * array_size); + + while (1){ + memset(page_array, 0x00, sizeof(struct page*) * array_size); + ra_sit_pages(sbi, page_array, array_size, start_sitblk, &next_sitblk, &base_sitblk); + + for (i = 0; i < next_sitblk - start_sitblk; i++) { + unsigned int sit_blkno; + + if (page_array[i]) + { + /* Get page from ra_sit_pages previously */ + lock_page(page_array[i]); + + if ((page_array[i]->mapping == mapping) + && PageUptodate(page_array[i])) + { + mark_page_accessed(page_array[i]); + } + else + { + /* Read single page, actually , this branch should not be entered. + We add this branch just for ensurance */ + f2fs_put_page(page_array[i], 1); + page_array[i] = NULL; + } + } + + if (!page_array[i]) + { + /* Read single page, actually , this branch should not be entered. + We add this branch just for ensurance */ + page_array[i] = get_meta_page(sbi, base_sitblk + start_sitblk + i); + } + + sit_blkno = i + start_sitblk; + sit_blk = (struct f2fs_sit_block *)page_address(page_array[i]); + for (j = 0; j < sit_i->sents_per_block; j++){ + unsigned int segno; + segno = sit_blkno * sit_i->sents_per_block + j; + if (segno >= TOTAL_SEGS(sbi)){ + break; + } + + se = &sit_i->sentries[segno]; + sit = sit_blk->entries[j]; + check_block_count(sbi, segno, &sit); + seg_info_from_raw_sit(se, &sit); + if (sbi->segs_per_sec > 1) { + struct sec_entry *e = get_sec_entry(sbi, segno); + e->valid_blocks += se->valid_blocks; + } + } + + f2fs_put_page(page_array[i], 1); + } + + if (next_sitblk >= sit_blk_cnt){ + break; + } + + start_sitblk = next_sitblk; + } + + vfree(page_array); mutex_lock(&curseg->curseg_mutex); for (i = 0; i < sits_in_cursum(sum); i++) { - if (le32_to_cpu(segno_in_journal(sum, i)) == start) { + j = le32_to_cpu(segno_in_journal(sum, i)); sit = sit_in_journal(sum, i); - mutex_unlock(&curseg->curseg_mutex); - goto got_it; - } - } - mutex_unlock(&curseg->curseg_mutex); - page = get_current_sit_page(sbi, start); - sit_blk = (struct f2fs_sit_block *)page_address(page); - sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)]; - f2fs_put_page(page, 1); -got_it: - check_block_count(sbi, start, &sit); + se = &sit_i->sentries[j]; + check_block_count(sbi, j, &sit); seg_info_from_raw_sit(se, &sit); if (sbi->segs_per_sec > 1) { - struct sec_entry *e = get_sec_entry(sbi, start); + struct sec_entry *e = get_sec_entry(sbi, j); e->valid_blocks += se->valid_blocks; } } + mutex_unlock(&curseg->curseg_mutex); + } static void init_free_segmap(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h old mode 100644 new mode 100755 index 7f94d78..fabcb25 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -552,6 +552,24 @@ static inline void check_block_count(struct f2fs_sb_info *sbi, /* check boundary of a given segment number */ BUG_ON(segno > end_segno); + if (GET_SIT_VBLOCKS(raw_sit) == 0) { + for (i = 0; i < SIT_VBLOCK_MAP_SIZE; i++) { + if (raw_sit->valid_map[i] != 0) { + goto mismatch_check; + } + } + return; + } + else if (GET_SIT_VBLOCKS(raw_sit) == sbi->blocks_per_seg) { + for (i = 0; i < SIT_VBLOCK_MAP_SIZE; i++) { + if (raw_sit->valid_map[i] != 0xFF) { + goto mismatch_check; + } + } + return; + } + +mismatch_check: /* check bitmap with valid block count */ for (i = 0; i < sbi->blocks_per_seg; i++) if (f2fs_test_bit(i, raw_sit->valid_map)) --- -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html