RE: [PATCH v2 1/1] f2fs: support zone capacity less than zone size

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



> -----Original Message-----
> From: Chao Yu <yuchao0@xxxxxxxxxx>
> Sent: Wednesday, July 15, 2020 7:32 AM
> To: Aravind Ramesh <Aravind.Ramesh@xxxxxxx>; jaegeuk@xxxxxxxxxx; linux-
> fsdevel@xxxxxxxxxxxxxxx; linux-f2fs-devel@xxxxxxxxxxxxxxxxxxxxx; hch@xxxxxx
> Cc: Damien Le Moal <Damien.LeMoal@xxxxxxx>; Niklas Cassel
> <Niklas.Cassel@xxxxxxx>; Matias Bjorling <Matias.Bjorling@xxxxxxx>
> Subject: Re: [PATCH v2 1/1] f2fs: support zone capacity less than zone size
> 
> On 2020/7/15 2:40, Aravind Ramesh wrote:
> > Thanks for the valuable feedback.
> > My comments are inline.
> > Will send the V3 with the feedback incorporated.
> >
> > Regards,
> > Aravind
> >
> >> -----Original Message-----
> >> From: Chao Yu <yuchao0@xxxxxxxxxx>
> >> Sent: Tuesday, July 14, 2020 5:28 PM
> >> To: Aravind Ramesh <Aravind.Ramesh@xxxxxxx>; jaegeuk@xxxxxxxxxx;
> >> linux- fsdevel@xxxxxxxxxxxxxxx;
> >> linux-f2fs-devel@xxxxxxxxxxxxxxxxxxxxx; hch@xxxxxx
> >> Cc: Damien Le Moal <Damien.LeMoal@xxxxxxx>; Niklas Cassel
> >> <Niklas.Cassel@xxxxxxx>; Matias Bjorling <Matias.Bjorling@xxxxxxx>
> >> Subject: Re: [PATCH v2 1/1] f2fs: support zone capacity less than
> >> zone size
> >>
[snip..]
       /*
> >>> +	* zone-capacity can be less than zone-size in zoned devices,
> >>> +	* resulting in less than expected usable segments in the zone,
> >>> +	* calculate the end segno in the zone which can be garbage collected
> >>> +	*/
> >>> +	if (f2fs_sb_has_blkzoned(sbi))
> >>> +		end_segno -= sbi->segs_per_sec -
> >>> +					f2fs_usable_segs_in_sec(sbi, segno);
> >>> +
> >>> +	else if (__is_large_section(sbi))
> >>>  		end_segno = rounddown(end_segno, sbi->segs_per_sec);
> >>
> >> end_segno could be beyond end of section, so below calculation could
> >> adjust it to the end of section first, then adjust to zone-capacity boundary.
> >>
> >> 	if (__is_large_section(sbi))
> >> 		end_segno = rounddown(end_segno, sbi->segs_per_sec);
> >>
> >> 	if (f2fs_sb_has_blkzoned(sbi))
> >> 		end_segno -= sbi->segs_per_sec -
> >> 				f2fs_usable_segs_in_sec(sbi, segno);
> > Ok, will change it.
> >>
> >>>
> >>>  	/* readahead multi ssa blocks those have contiguous address */ @@
> >>> -1356,7 +1368,8 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
> >>>  		goto stop;
> >>>
> >>>  	seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type);
> >>> -	if (gc_type == FG_GC && seg_freed == sbi->segs_per_sec)
> >>> +	if (gc_type == FG_GC &&
> >>> +		seg_freed == f2fs_usable_segs_in_sec(sbi, segno))
> >>>  		sec_freed++;
> >>>  	total_freed += seg_freed;
> >>>
> >>> diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index
> >>> db3c61046aa4..463b4e38b864 100644
> >>> --- a/fs/f2fs/gc.h
> >>> +++ b/fs/f2fs/gc.h
> >>> @@ -44,13 +44,47 @@ struct gc_inode_list {
> >>>  /*
> >>>   * inline functions
> >>>   */
> >>> +
> >>> +/*
> >>> + * On a Zoned device zone-capacity can be less than zone-size and
> >>> +if
> >>> + * zone-capacity is not aligned to f2fs segment size(2MB), then the
> >>> +segment
> >>> + * starting just before zone-capacity has some blocks spanning
> >>> +across the
> >>> + * zone-capacity, these blocks are not usable.
> >>> + * Such spanning segments can be in free list so calculate the sum
> >>> +of usable
> >>> + * blocks in currently free segments including normal and spanning segments.
> >>> + */
> >>> +static inline block_t free_segs_blk_count_zoned(struct f2fs_sb_info
> >>> +*sbi) {
> >>> +	block_t free_seg_blks = 0;
> >>> +	struct free_segmap_info *free_i = FREE_I(sbi);
> >>> +	int j;
> >>> +
> >>
> >> spin_lock(&free_i->segmap_lock);
> > Ok
> >>
> >>> +	for (j = 0; j < MAIN_SEGS(sbi); j++)
> >>> +		if (!test_bit(j, free_i->free_segmap))
> >>> +			free_seg_blks += f2fs_usable_blks_in_seg(sbi, j);
> >>
> >> spin_unlock(&free_i->segmap_lock);
> > Ok
> >>
> >>> +
> >>> +	return free_seg_blks;
> >>> +}
> >>> +
> >>> +static inline block_t free_segs_blk_count(struct f2fs_sb_info *sbi) {
> >>> +	if (f2fs_sb_has_blkzoned(sbi))
> >>> +		return free_segs_blk_count_zoned(sbi);
> >>> +
> >>> +	return free_segments(sbi) << sbi->log_blocks_per_seg; }
> >>> +
> >>>  static inline block_t free_user_blocks(struct f2fs_sb_info *sbi)  {
> >>> -	if (free_segments(sbi) < overprovision_segments(sbi))
> >>> +	block_t free_blks, ovp_blks;
> >>> +
> >>> +	free_blks = free_segs_blk_count(sbi);
> >>> +	ovp_blks = overprovision_segments(sbi) << sbi->log_blocks_per_seg;
> >>> +
> >>> +	if (free_blks < ovp_blks)
> >>>  		return 0;
> >>> -	else
> >>> -		return (free_segments(sbi) - overprovision_segments(sbi))
> >>> -			<< sbi->log_blocks_per_seg;
> >>> +
> >>> +	return free_blks - ovp_blks;
> >>>  }
> >>>
> >>>  static inline block_t limit_invalid_user_blocks(struct f2fs_sb_info
> >>> *sbi) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index
> >>> c35614d255e1..d75c1849dc83 100644
> >>> --- a/fs/f2fs/segment.c
> >>> +++ b/fs/f2fs/segment.c
> >>> @@ -869,10 +869,10 @@ static void locate_dirty_segment(struct
> >>> f2fs_sb_info
> >> *sbi, unsigned int segno)
> >>>  	ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno);
> >>
> >> unsigned int usable_blocks = f2fs_usable_blks_in_seg(sbi, segno);
> >>
> >>>
> >>>  	if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) ||
> >>> -				ckpt_valid_blocks == sbi->blocks_per_seg)) {
> >>
> >> ckpt_valid_blocks == usable_blocks
> >>
> >>> +		ckpt_valid_blocks == f2fs_usable_blks_in_seg(sbi, segno))) {
> >>>  		__locate_dirty_segment(sbi, segno, PRE);
> >>>  		__remove_dirty_segment(sbi, segno, DIRTY);
> >>> -	} else if (valid_blocks < sbi->blocks_per_seg) {
> >>> +	} else if (valid_blocks < f2fs_usable_blks_in_seg(sbi, segno)) {
> >>
> >> } else if (valid_blocks < usable_blocks) {
> >>
> > Ok.
> >>
> >> BTW, would you consider to add a field as below to avoid calculation
> >> whenever we want to get usable_blocks in segment:
> >
> > The reason to do it like this is that f2fs supports using multiple devices to create a
> volume.
> > So, if 2 ZNS devices are used where both have same zone-size but
> > different zone capacity, then having this single field to indicate
> > usable blocks will not work, as usable blocks of zones
> 
> The field @usable_blocks in 'struct seg_entry' is per-segment, that's not single, it can
> be initialized to 0 or blocks_per_seg or 'sec_cap_blkaddr - seg_start' based on its
> locatation in which zone of device during build_sit_entries().

I see that it is per segment entry, sorry about that.

> 
> > from different devices are not same. In such a scenario, we would need
> > to maintain an array, which stores the usable blocks of each zone based on its
> zone-capacity.
> > This approach can eliminate the calculation.
> >
> > So we actually implemented this approach with a buffer and at mount
> > time this buffer would be allocated and populated with all the zone's
> > usable_blocks information and would be read from this buffer
> > subsequently. But we did not see any performance difference when
> > compared to current approach and concluded that the overhead of
> > calculation was negligible and also current approach does not need to modify core
> data structure, as this field will be used only when using a ZNS drive and if that drive
> has zone capacity less than zone size.
> > And for all other cases, the function f2fs_usable_blks_in_seg() just
> > returns sbi->blocks_per_seg without any calculation.
> >
> > If you think, like the calculation overhead is more, then we need to
> > add a pointer and allocate memory to it to accommodate all zone's
> > zone-capacity info. In my opinion, the gain is very less because of the reasons
> stated above. However, I am open to change it based on your feedback.
> 
> I just thought those fixed size could be recorded in advance to avoid redundant
> calculation, but the change I suggested is not critical, and I do agree that it will not
> help performance a bit.
> 
> Anyway, I'm not against with keeping it as it is, let's go ahead. :)

Ok, thanks :)

> 
> >
> >>
> >> struct seg_entry {
> >> 	unsigned long long type:6;		/* segment type like

[snip...]

> >>> @@ -546,8 +548,8 @@ static inline bool
> >>> has_curseg_enough_space(struct
> >> f2fs_sb_info *sbi)
> >>>  	/* check current node segment */
> >>>  	for (i = CURSEG_HOT_NODE; i <= CURSEG_COLD_NODE; i++) {
> >>>  		segno = CURSEG_I(sbi, i)->segno;
> >>> -		left_blocks = sbi->blocks_per_seg -
> >>> -			get_seg_entry(sbi, segno)->ckpt_valid_blocks;
> >>> +		left_blocks = f2fs_usable_blks_in_seg(sbi, segno) -
> >>> +				get_seg_entry(sbi, segno)->ckpt_valid_blocks;
> >>>
> >>>  		if (node_blocks > left_blocks)
> >>>  			return false;
> >>> @@ -555,7 +557,7 @@ static inline bool
> >>> has_curseg_enough_space(struct f2fs_sb_info *sbi)
> >>>
> >>>  	/* check current data segment */
> >>>  	segno = CURSEG_I(sbi, CURSEG_HOT_DATA)->segno;
> >>> -	left_blocks = sbi->blocks_per_seg -
> >>> +	left_blocks = f2fs_usable_blks_in_seg(sbi, segno) -
> >>>  			get_seg_entry(sbi, segno)->ckpt_valid_blocks;
> >>>  	if (dent_blocks > left_blocks)
> >>>  		return false;
> >>> @@ -677,21 +679,22 @@ static inline int check_block_count(struct
> >>> f2fs_sb_info
> >> *sbi,
> >>>  	bool is_valid  = test_bit_le(0, raw_sit->valid_map) ? true : false;
> >>>  	int valid_blocks = 0;
> >>>  	int cur_pos = 0, next_pos;
> >>> +	unsigned int usable_blks_per_seg = f2fs_usable_blks_in_seg(sbi,
> >>> +segno);
> >>>
> >>>  	/* check bitmap with valid block count */
> >>>  	do {
> >>>  		if (is_valid) {
> >>>  			next_pos = find_next_zero_bit_le(&raw_sit->valid_map,
> >>> -					sbi->blocks_per_seg,
> >>> +					usable_blks_per_seg,
> >>>  					cur_pos);
> >>>  			valid_blocks += next_pos - cur_pos;
> >>>  		} else
> >>>  			next_pos = find_next_bit_le(&raw_sit->valid_map,
> >>> -					sbi->blocks_per_seg,
> >>> +					usable_blks_per_seg,
> >>>  					cur_pos);
> >>>  		cur_pos = next_pos;
> >>>  		is_valid = !is_valid;
> >>> -	} while (cur_pos < sbi->blocks_per_seg);
> >>> +	} while (cur_pos < usable_blks_per_seg);
> >>
> >> I meant we need to check that there should be no valid slot locates
> >> beyond zone- capacity in bitmap:
> > Ahh, ok. Got it. Will change.

I have retained the other checks also and added the condition to
check for set bits after the zone capacity, as you suggested.
Let me know if you feel the other checks are not necessary.

 [snip...]

> >>> @@ -3078,12 +3093,26 @@ static int init_blkz_info(struct
> >>> f2fs_sb_info *sbi, int
> >> devi)
> >>>  	if (!FDEV(devi).blkz_seq)
> >>>  		return -ENOMEM;
> >>>
> >>> -	/* Get block zones type */
> >>> +	/* Get block zones type and zone-capacity */
> >>> +	FDEV(devi).zone_capacity_blocks = f2fs_kzalloc(sbi,
> >>> +					FDEV(devi).nr_blkz * sizeof(block_t),
> >>> +					GFP_KERNEL);
> >>> +	if (!FDEV(devi).zone_capacity_blocks)
> >>> +		return -ENOMEM;
> >>> +
> >>> +	rep_zone_arg.dev = &FDEV(devi);
> >>> +	rep_zone_arg.zone_cap_mismatch = false;
> >>> +
> >>>  	ret = blkdev_report_zones(bdev, 0, BLK_ALL_ZONES, f2fs_report_zone_cb,
> >>> -				  &FDEV(devi));
> >>> +				  &rep_zone_arg);
> >>>  	if (ret < 0)
> >>
> >> kfree(FDEV(devi).zone_capacity_blocks);
> > Actually, we do not need to free this here, because if error (ret < 0)
> > is returned, then the control goes back to
> > destroy_device_list() and deallocates the buffer.
> 
> Confirmed.
> 
> Thanks,
> 
Thanks again for the review feedback.
Have sent v3 with all the recommended changes.

Thanks,
Aravind

> >
> >>
> >> Thanks,
> >>
> >>>  		return ret;
> >>>
> >>> +	if (!rep_zone_arg.zone_cap_mismatch) {
> >>> +		kfree(FDEV(devi).zone_capacity_blocks);
> >>> +		FDEV(devi).zone_capacity_blocks = NULL;
> >>> +	}
> >>> +
> >>>  	return 0;
> >>>  }
> >>>  #endif
> >>>
> > .
> >




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux