Re: [f2fs-dev] [PATCH RFC] f2fs: flush cp pack except cp page2 at first

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 2018/1/25 0:26, Gao Xiang wrote:
> Hi Chao,
> 
> 
> On 2018/1/24 23:57, Chao Yu wrote:
>> On 2018/1/24 14:53, Gaoxiang (OS) wrote:
>>> Previously, we attempt to flush the whole cp pack in a single bio,
>>> however, when suddenly power off at this time, we could meet an
>>> extreme scenario that cp page1 and cp page2 are updated and latest,
>>> but payload or current summaries are still outdated.
>>> (see reliable write in UFS spec)
>>>
>>> This patch write the whole cp pack except cp page2 with FLUSH
>>> at first, and then write the cp page2 with an extra independent
>>> bio with FLUSH.
>>>
>>> Signed-off-by: Gao Xiang <gaoxiang25@xxxxxxxxxx>
>>> ---
>>>   fs/f2fs/checkpoint.c | 48 +++++++++++++++++++++++++++++++++++++++++-------
>>>   fs/f2fs/f2fs.h       |  3 ++-
>>>   fs/f2fs/segment.c    | 11 +++++++++--
>>>   3 files changed, 52 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
>>> index 14d2fed..e7f5e85 100644
>>> --- a/fs/f2fs/checkpoint.c
>>> +++ b/fs/f2fs/checkpoint.c
>>> @@ -300,6 +300,35 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
>>>   	return 0;
>>>   }
>>>   
>>> +static int sync_meta_page_locked(struct f2fs_sb_info *sbi,
>>> +	struct page *page,
>>> +	enum page_type type, enum iostat_type io_type)
>>> +{
>>> +	struct writeback_control wbc = {
>>> +		.for_reclaim = 0,
>>> +	};
>>> +	int err;
>>> +
>>> +	BUG_ON(page->mapping != META_MAPPING(sbi));
>>> +	BUG_ON(!PageDirty(page));
>>> +
>>> +	f2fs_wait_on_page_writeback(page, META, true);
>>> +
>>> +	BUG_ON(PageWriteback(page));
>>> +	if (unlikely(!clear_page_dirty_for_io(page)))
>>> +		BUG();
>>> +
>>> +	err = __f2fs_write_meta_page(page, &wbc, io_type);
>>> +	if (err) {
>>> +		f2fs_put_page(page, 1);
>>> +		return err;
>>> +	}
>>> +	f2fs_put_page(page, 0);
>>> +
>>> +	f2fs_submit_merged_write(sbi, type);
>>> +	return err;
>>> +}
>>> +
>>>   long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
>>>   				long nr_to_write, enum iostat_type io_type)
>>>   {
>>> @@ -1172,6 +1201,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>>>   	struct curseg_info *seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);
>>>   	u64 kbytes_written;
>>>   	int err;
>>> +	struct page *cp_page2;
>>>   
>>>   	/* Flush all the NAT/SIT pages */
>>>   	while (get_pages(sbi, F2FS_DIRTY_META)) {
>>> @@ -1250,7 +1280,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>>>   		blk = start_blk + sbi->blocks_per_seg - nm_i->nat_bits_blocks;
>>>   		for (i = 0; i < nm_i->nat_bits_blocks; i++)
>>>   			update_meta_page(sbi, nm_i->nat_bits +
>>> -					(i << F2FS_BLKSIZE_BITS), blk + i);
>>> +					(i << F2FS_BLKSIZE_BITS), blk + i, NULL);
>>>   
>>>   		/* Flush all the NAT BITS pages */
>>>   		while (get_pages(sbi, F2FS_DIRTY_META)) {
>>> @@ -1271,11 +1301,11 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>>>   		return err;
>>>   
>>>   	/* write out checkpoint buffer at block 0 */
>>> -	update_meta_page(sbi, ckpt, start_blk++);
>>> +	update_meta_page(sbi, ckpt, start_blk++, NULL);
>>>   
>>>   	for (i = 1; i < 1 + cp_payload_blks; i++)
>>>   		update_meta_page(sbi, (char *)ckpt + i * F2FS_BLKSIZE,
>>> -							start_blk++);
>>> +							start_blk++, NULL);
>>>   
>>>   	if (orphan_num) {
>>>   		write_orphan_inodes(sbi, start_blk);
>>> @@ -1297,9 +1327,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>>>   		start_blk += NR_CURSEG_NODE_TYPE;
>>>   	}
>>>   
>>> -	/* writeout checkpoint block */
>>> -	update_meta_page(sbi, ckpt, start_blk);
>>> -
>>>   	/* wait for previous submitted node/meta pages writeback */
>>>   	wait_on_all_pages_writeback(sbi);
>>>   
>>> @@ -1313,12 +1340,19 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>>>   	sbi->last_valid_block_count = sbi->total_valid_block_count;
>>>   	percpu_counter_set(&sbi->alloc_valid_block_count, 0);
>>>   
>>> -	/* Here, we only have one bio having CP pack */
>>> +	/* Here, we only have one bio having CP pack except cp page 2 */
>>>   	sync_meta_pages(sbi, META_FLUSH, LONG_MAX, FS_CP_META_IO);
>> We don't need to use META_FLUSH here.
> 
> hmmm...I think that we need to write to the device medium rather than device cache, or I miss something?
> could you give me some hints about that? PREFLUSH or what? yet I cannot see some code related to that...

I mean sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO), because
MEAT_FLUSH will add PREFLUSH & FUA into last bio, we don't need that before
bio submission of last cp pack.

Thanks,

> 
>>
>>>   
>>>   	/* wait for previous submitted meta pages writeback */
>>>   	wait_on_all_pages_writeback(sbi);
>>>   
>>> +	/* write and flush checkpoint cp page 2 */
>>> +	update_meta_page(sbi, ckpt, start_blk, &cp_page2);
>>> +	sync_meta_page_locked(sbi, cp_page2, META_FLUSH, FS_CP_META_IO);
>> How about
>>
>> sync_checkpoint()
>> {
>> 	page = grab_meta_page()
>> 	memcpy()
>> 	set_page_dirty()
>>
>> 	...
>> 	__f2fs_write_meta_page()
>> 	f2fs_put_page()
>> 	f2fs_submit_merged_write()
>> }
> OK, I will fix tomorrow because some f2fs code I need to recheck :(
>>
>> BTW, could you give some numbers with this patch?
> Will be added, yet I think the performance depends on the specific flash 
> device tested.
> separating cp page2 from others will make checkpoint more reliable, I 
> think it is good for the file system stability.
> 
> 
> Thanks,
>> Thanks,
>>
>>
>>> +
>>> +	/* wait for previous submitted meta pages writeback */
>>> +	wait_on_all_pages_writeback(sbi);
>>> +
>>>   	release_ino_entry(sbi, false);
>>>   
>>>   	if (unlikely(f2fs_cp_error(sbi)))
>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>> index a4fb89d..7877ea3 100644
>>> --- a/fs/f2fs/f2fs.h
>>> +++ b/fs/f2fs/f2fs.h
>>> @@ -2680,7 +2680,8 @@ void allocate_new_segments(struct f2fs_sb_info *sbi);
>>>   int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range);
>>>   bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc);
>>>   struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno);
>>> -void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr);
>>> +void update_meta_page(struct f2fs_sb_info *sbi,
>>> +	void *src, block_t blk_addr, struct page **metapage);
>>>   void write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
>>>   						enum iostat_type io_type);
>>>   void write_node_page(unsigned int nid, struct f2fs_io_info *fio);
>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>> index 40e1d20..f48a536 100644
>>> --- a/fs/f2fs/segment.c
>>> +++ b/fs/f2fs/segment.c
>>> @@ -1988,19 +1988,26 @@ struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
>>>   	return get_meta_page(sbi, GET_SUM_BLOCK(sbi, segno));
>>>   }
>>>   
>>> -void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr)
>>> +void update_meta_page(struct f2fs_sb_info *sbi,
>>> +	void *src, block_t blk_addr, struct page **metapage)
>>>   {
>>>   	struct page *page = grab_meta_page(sbi, blk_addr);
>>>   
>>>   	memcpy(page_address(page), src, PAGE_SIZE);
>>>   	set_page_dirty(page);
>>> +
>>> +	if (unlikely(metapage)) {
>>> +		*metapage = page;
>>> +		return;
>>> +	}
>>> +
>>>   	f2fs_put_page(page, 1);
>>>   }
>>>   
>>>   static void write_sum_page(struct f2fs_sb_info *sbi,
>>>   			struct f2fs_summary_block *sum_blk, block_t blk_addr)
>>>   {
>>> -	update_meta_page(sbi, (void *)sum_blk, blk_addr);
>>> +	update_meta_page(sbi, (void *)sum_blk, blk_addr, NULL);
>>>   }
>>>   
>>>   static void write_current_sum_page(struct f2fs_sb_info *sbi,
>>>
> 
> 
> .
> 




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux