Re: [PATCH v2 2/9] nilfs2: extend SUFILE on-disk format to enable tracking of live blocks

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 2015-05-09 04:24, Ryusuke Konishi wrote:
> On Sun,  3 May 2015 12:05:15 +0200, Andreas Rohner wrote:
>> This patch extends the nilfs_segment_usage structure with two extra
>> fields. This changes the on-disk format of the SUFILE, but the NILFS2
>> metadata files are flexible enough, so that there are no compatibility
>> issues. The extension is fully backwards compatible. Nevertheless a
>> feature compatibility flag was added to indicate the on-disk format
>> change.
>>
>> The new field su_nlive_blks is used to track the number of live blocks
>> in the corresponding segment. Its value should always be smaller than
>> su_nblocks, which contains the total number of blocks in the segment.
>>
>> The field su_nlive_lastmod is necessary because of the protection period
>> used by the GC. It is a timestamp, which contains the last time
>> su_nlive_blks was modified. For example if a file is deleted, its
>> blocks are subtracted from su_nlive_blks and are therefore considered to
>> be reclaimable by the kernel. But the GC additionally protects them with
>> the protection period. So while su_nilve_blks contains the number of
>> potentially reclaimable blocks, the actual number depends on the
>> protection period. To enable GC policies to effectively choose or prefer
>> segments with unprotected blocks, the timestamp in su_nlive_lastmod is
>> necessary.
>>
>> The new field su_nsnapshot_blks contains the number of blocks in a
>> segment that are protected by a snapshot. The value is meant to be a
>> heuristic for the GC and is not necessarily always accurate.
>>
>> Signed-off-by: Andreas Rohner <andreas.rohner@xxxxxxx>
>> ---
>>  fs/nilfs2/ioctl.c         |  4 +--
>>  fs/nilfs2/sufile.c        | 45 +++++++++++++++++++++++++++++++--
>>  fs/nilfs2/sufile.h        |  6 +++++
>>  include/linux/nilfs2_fs.h | 63 +++++++++++++++++++++++++++++++++++++++++------
>>  4 files changed, 106 insertions(+), 12 deletions(-)
>>
>> diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
>> index 9a20e51..f6ee54e 100644
>> --- a/fs/nilfs2/ioctl.c
>> +++ b/fs/nilfs2/ioctl.c
>> @@ -1250,7 +1250,7 @@ static int nilfs_ioctl_set_suinfo(struct inode *inode, struct file *filp,
>>  		goto out;
>>  
>>  	ret = -EINVAL;
>> -	if (argv.v_size < sizeof(struct nilfs_suinfo_update))
>> +	if (argv.v_size < NILFS_MIN_SUINFO_UPDATE_SIZE)
>>  		goto out;
>>  
>>  	if (argv.v_nmembs > nilfs->ns_nsegments)
>> @@ -1316,7 +1316,7 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>>  		return nilfs_ioctl_get_cpstat(inode, filp, cmd, argp);
>>  	case NILFS_IOCTL_GET_SUINFO:
>>  		return nilfs_ioctl_get_info(inode, filp, cmd, argp,
>> -					    sizeof(struct nilfs_suinfo),
>> +					    NILFS_MIN_SEGMENT_USAGE_SIZE,
>>  					    nilfs_ioctl_do_get_suinfo);
>>  	case NILFS_IOCTL_SET_SUINFO:
>>  		return nilfs_ioctl_set_suinfo(inode, filp, cmd, argp);
>> diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
>> index 2a869c3..1cce358 100644
>> --- a/fs/nilfs2/sufile.c
>> +++ b/fs/nilfs2/sufile.c
>> @@ -453,6 +453,11 @@ void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum,
>>  	su->su_lastmod = cpu_to_le64(0);
>>  	su->su_nblocks = cpu_to_le32(0);
>>  	su->su_flags = cpu_to_le32(1UL << NILFS_SEGMENT_USAGE_DIRTY);
>> +	if (nilfs_sufile_live_blks_ext_supported(sufile)) {
>> +		su->su_nlive_blks = cpu_to_le32(0);
>> +		su->su_nsnapshot_blks = cpu_to_le32(0);
>> +		su->su_nlive_lastmod = cpu_to_le64(0);
>> +	}
>>  	kunmap_atomic(kaddr);
>>  
>>  	nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1);
>> @@ -482,7 +487,7 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum,
>>  	WARN_ON(!nilfs_segment_usage_dirty(su));
>>  
>>  	sudirty = nilfs_segment_usage_dirty(su);
>> -	nilfs_segment_usage_set_clean(su);
>> +	nilfs_segment_usage_set_clean(su, NILFS_MDT(sufile)->mi_entry_size);
>>  	kunmap_atomic(kaddr);
>>  	mark_buffer_dirty(su_bh);
>>  
>> @@ -698,7 +703,7 @@ static int nilfs_sufile_truncate_range(struct inode *sufile,
>>  		nc = 0;
>>  		for (su = su2, j = 0; j < n; j++, su = (void *)su + susz) {
>>  			if (nilfs_segment_usage_error(su)) {
>> -				nilfs_segment_usage_set_clean(su);
>> +				nilfs_segment_usage_set_clean(su, susz);
>>  				nc++;
>>  			}
>>  		}
>> @@ -821,6 +826,8 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf,
>>  	struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
>>  	void *kaddr;
>>  	unsigned long nsegs, segusages_per_block;
>> +	__u64 lm = 0;
>> +	__u32 nlb = 0, nsb = 0;
>>  	ssize_t n;
>>  	int ret, i, j;
>>  
>> @@ -858,6 +865,18 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf,
>>  			if (nilfs_segment_is_active(nilfs, segnum + j))
>>  				si->sui_flags |=
>>  					(1UL << NILFS_SEGMENT_USAGE_ACTIVE);
>> +
>> +			if (susz >= NILFS_LIVE_BLKS_EXT_SEGMENT_USAGE_SIZE) {
>> +				nlb = le32_to_cpu(su->su_nlive_blks);
>> +				nsb = le32_to_cpu(su->su_nsnapshot_blks);
>> +				lm = le64_to_cpu(su->su_nlive_lastmod);
>> +			}
>> +
>> +			if (sisz >= NILFS_LIVE_BLKS_EXT_SUINFO_SIZE) {
>> +				si->sui_nlive_blks = nlb;
>> +				si->sui_nsnapshot_blks = nsb;
>> +				si->sui_nlive_lastmod = lm;
>> +			}
>>  		}
>>  		kunmap_atomic(kaddr);
>>  		brelse(su_bh);
>> @@ -901,6 +920,9 @@ ssize_t nilfs_sufile_set_suinfo(struct inode *sufile, void *buf,
>>  	int cleansi, cleansu, dirtysi, dirtysu;
>>  	long ncleaned = 0, ndirtied = 0;
>>  	int ret = 0;
>> +	bool sup_ext = (supsz >= NILFS_LIVE_BLKS_EXT_SUINFO_UPDATE_SIZE);
>> +	bool su_ext = nilfs_sufile_live_blks_ext_supported(sufile);
>> +	bool supsu_ext = sup_ext && su_ext;
> 
> These boolean variables determine the control follow.  For these, more
> intuitive names are preferable.  For instance:
> 
>   - sup_ext -> suinfo_extended
>   - su_ext -> su_extended
>   - supsu_ext -> both_extended

I agree.

>>  
>>  	if (unlikely(nsup == 0))
>>  		return ret;
>> @@ -911,6 +933,13 @@ ssize_t nilfs_sufile_set_suinfo(struct inode *sufile, void *buf,
>>  				(~0UL << __NR_NILFS_SUINFO_UPDATE_FIELDS))
>>  			|| (nilfs_suinfo_update_nblocks(sup) &&
>>  				sup->sup_sui.sui_nblocks >
>> +				nilfs->ns_blocks_per_segment)
>> +			|| (nilfs_suinfo_update_nlive_blks(sup) && sup_ext &&
>> +				sup->sup_sui.sui_nlive_blks >
>> +				nilfs->ns_blocks_per_segment)
>> +			|| (nilfs_suinfo_update_nsnapshot_blks(sup) &&
>> +				sup_ext &&
>> +				sup->sup_sui.sui_nsnapshot_blks >
>>  				nilfs->ns_blocks_per_segment))
>>  			return -EINVAL;
>>  	}
> 
> Testing sup_ext repeatedly is pointless since it increases branches.
> Consider moving it forward as follows:
> 
>         for (sup = buf; sup < supend; sup = (void *)sup + supsz) {
>                 if (sup->sup_segnum >= nilfs->ns_nsegments ||
>                    || (sup->sup_flags &
>                        (~0UL << __NR_NILFS_SUINFO_UPDATE_FIELDS))
>                    || (nilfs_suinfo_update_nblocks(sup) &&
>                        sup->sup_sui.sui_nblocks > nilfs->ns_blocks_per_segment)\
> )
>                         return -EINVAL;
>                 if (!sup_extended)
>                         continue;
>                 if (nilfs_suinfo_update_nlive_blks(sup) &&
>                     (sup->sup_sui.sui_nlive_blks >
>                      nilfs->ns_blocks_per_segment)
>                     || (nilfs_suinfo_update_nsnapshot_blks(sup) &&
>                         sup->sup_sui.sui_nsnapshot_blks >
>                         nilfs->ns_blocks_per_segment))
>                         return -EINVAL;
>         }

Good suggestion.

>> @@ -938,6 +967,18 @@ ssize_t nilfs_sufile_set_suinfo(struct inode *sufile, void *buf,
>>  		if (nilfs_suinfo_update_nblocks(sup))
>>  			su->su_nblocks = cpu_to_le32(sup->sup_sui.sui_nblocks);
>>  
>> +		if (nilfs_suinfo_update_nlive_blks(sup) && supsu_ext)
>> +			su->su_nlive_blks =
>> +				cpu_to_le32(sup->sup_sui.sui_nlive_blks);
>> +
>> +		if (nilfs_suinfo_update_nsnapshot_blks(sup) && supsu_ext)
>> +			su->su_nsnapshot_blks =
>> +				cpu_to_le32(sup->sup_sui.sui_nsnapshot_blks);
>> +
>> +		if (nilfs_suinfo_update_nlive_lastmod(sup) && supsu_ext)
>> +			su->su_nlive_lastmod =
>> +				cpu_to_le64(sup->sup_sui.sui_nlive_lastmod);
>> +
> 
> Ditto.
> 
> Consider defining pointer to suinfo structure
> 
>         for (;;) {
>                 struct nilfs_suinfo *sui = &sup->sup_sui;
> 
> and simplifying the above part as follows:
> 
>                 if (both_extended) {
>                         if (nilfs_suinfo_update_nlive_blks(sup))
>                                 su->su_nlive_blks =
>                                         cpu_to_le32(sui->sui_nlive_blks);
>                         if (nilfs_suinfo_update_nsnapshot_blks(sup))
>                                 su->su_nsnapshot_blks =
>                                         cpu_to_le32(sui->sui_nsnapshot_blks);
>                         if (nilfs_suinfo_update_nlive_lastmod(sup))
>                                 su->su_nlive_lastmod =
>                                         cpu_to_le64(sui->sui_nlive_lastmod);
>                 }
> 

I agree this is much nicer.

>>  		if (nilfs_suinfo_update_flags(sup)) {
>>  			/*
>>  			 * Active flag is a virtual flag projected by running
>> diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h
>> index b8afd72..da78edf 100644
>> --- a/fs/nilfs2/sufile.h
>> +++ b/fs/nilfs2/sufile.h
>> @@ -28,6 +28,12 @@
>>  #include <linux/nilfs2_fs.h>
>>  #include "mdt.h"
>>  
>> +static inline int
>> +nilfs_sufile_live_blks_ext_supported(const struct inode *sufile)
>> +{
>> +	return NILFS_MDT(sufile)->mi_entry_size >=
>> +			NILFS_LIVE_BLKS_EXT_SEGMENT_USAGE_SIZE;
>> +}
>>  
>>  static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile)
>>  {
>> diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h
>> index ff3fea3..4800daa 100644
>> --- a/include/linux/nilfs2_fs.h
>> +++ b/include/linux/nilfs2_fs.h
>> @@ -220,9 +220,12 @@ struct nilfs_super_block {
>>   * If there is a bit set in the incompatible feature set that the kernel
>>   * doesn't know about, it should refuse to mount the filesystem.
>>   */
>> -#define NILFS_FEATURE_COMPAT_RO_BLOCK_COUNT	0x00000001ULL
> 
>> +#define NILFS_FEATURE_COMPAT_SUFILE_LIVE_BLKS_EXT	BIT(0)
> 
> You should not use BIT() macro and its variants for now because they
> are only enabled in kernel space (__KERNEL__ macro is required).
> 
> "nilfs2_fs.h" should be defined both for kernel space and user space.
> Consider defining it like "(1ULL << 0)".

Ok.

>>  
>> -#define NILFS_FEATURE_COMPAT_SUPP	0ULL
> 
>> +#define NILFS_FEATURE_COMPAT_RO_BLOCK_COUNT		BIT(0)
> 
> Ditto.
> 
>> +
>> +#define NILFS_FEATURE_COMPAT_SUPP					\
>> +			(NILFS_FEATURE_COMPAT_SUFILE_LIVE_BLKS_EXT)
>>  #define NILFS_FEATURE_COMPAT_RO_SUPP	NILFS_FEATURE_COMPAT_RO_BLOCK_COUNT
>>  #define NILFS_FEATURE_INCOMPAT_SUPP	0ULL
>>  
>> @@ -609,19 +612,34 @@ struct nilfs_cpfile_header {
>>  	  sizeof(struct nilfs_checkpoint) - 1) /			\
>>  			sizeof(struct nilfs_checkpoint))
>>  
>> +#ifndef offsetofend
>> +#define offsetofend(TYPE, MEMBER) \
>> +		(offsetof(TYPE, MEMBER) + sizeof(((TYPE *)0)->MEMBER))
>> +#endif
>> +
>>  /**
>>   * struct nilfs_segment_usage - segment usage
>>   * @su_lastmod: last modified timestamp
>>   * @su_nblocks: number of blocks in segment
>>   * @su_flags: flags
>> + * @su_nlive_blks: number of live blocks in the segment
>> + * @su_nsnapshot_blks: number of blocks belonging to a snapshot in the segment
>> + * @su_nlive_lastmod: timestamp nlive_blks was last modified
>>   */
>>  struct nilfs_segment_usage {
>>  	__le64 su_lastmod;
>>  	__le32 su_nblocks;
>>  	__le32 su_flags;
>> +	__le32 su_nlive_blks;
>> +	__le32 su_nsnapshot_blks;
>> +	__le64 su_nlive_lastmod;
>>  };
>>  
>> -#define NILFS_MIN_SEGMENT_USAGE_SIZE	16
>> +#define NILFS_MIN_SEGMENT_USAGE_SIZE	\
>> +	offsetofend(struct nilfs_segment_usage, su_flags)
>> +
>> +#define NILFS_LIVE_BLKS_EXT_SEGMENT_USAGE_SIZE	\
>> +	offsetofend(struct nilfs_segment_usage, su_nlive_lastmod)
>>  
>>  /* segment usage flag */
>>  enum {
>> @@ -658,11 +676,16 @@ NILFS_SEGMENT_USAGE_FNS(DIRTY, dirty)
>>  NILFS_SEGMENT_USAGE_FNS(ERROR, error)
>>  
> 
>>  static inline void
>> -nilfs_segment_usage_set_clean(struct nilfs_segment_usage *su)
>> +nilfs_segment_usage_set_clean(struct nilfs_segment_usage *su, size_t susz)
>>  {
>>  	su->su_lastmod = cpu_to_le64(0);
>>  	su->su_nblocks = cpu_to_le32(0);
>>  	su->su_flags = cpu_to_le32(0);
>> +	if (susz >= NILFS_LIVE_BLKS_EXT_SEGMENT_USAGE_SIZE) {
>> +		su->su_nlive_blks = cpu_to_le32(0);
>> +		su->su_nsnapshot_blks = cpu_to_le32(0);
>> +		su->su_nlive_lastmod = cpu_to_le64(0);
>> +	}
>>  }
> 
> nilfs_sufile_do_scrap() function does almost the same thing.
> Consider defining common inline function and using it for
> nilfs_segment_usage_set_clean() and nilfs_sufile_do_scrap():
> 
> static inline void
> nilfs_segment_usage_format(struct nilfs_segment_usage *su, size_t susz,
> 			   __u32 flags)
> {
> 	su->su_lastmod = cpu_to_le64(0);
> 	su->su_nblocks = cpu_to_le32(0);
> 	su->su_flags = cpu_to_le32(flags);
> 	if (susz >= NILFS_LIVE_BLKS_EXT_SEGMENT_USAGE_SIZE) {
> 		su->su_nlive_blks = cpu_to_le32(0);
> 		su->su_nsnapshot_blks = cpu_to_le32(0);
> 		su->su_nlive_lastmod = cpu_to_le64(0);
> 	}
> }
> 
> static inline void
> nilfs_segment_usage_set_clean(struct nilfs_segment_usage *su, size_t susz)
> {
> 	nilfs_segment_usage_format(su, susz, 0);
> }

Good idea. I will start with the modifications right away. I will of
course wait with version 3 of the patch set until you finished your review.

Regards,
Andreas Rohner

> Regards,
> Ryusuke Konishi
> 
>>  
>>  static inline int
>> @@ -684,23 +707,33 @@ struct nilfs_sufile_header {
>>  	/* ... */
>>  };
>>  
>> -#define NILFS_SUFILE_FIRST_SEGMENT_USAGE_OFFSET	\
>> -	((sizeof(struct nilfs_sufile_header) +				\
>> -	  sizeof(struct nilfs_segment_usage) - 1) /			\
>> -			 sizeof(struct nilfs_segment_usage))
>> +#define NILFS_SUFILE_FIRST_SEGMENT_USAGE_OFFSET(susz)	\
>> +	((sizeof(struct nilfs_sufile_header) + (susz) - 1) / (susz))
>>  
>>  /**
>>   * nilfs_suinfo - segment usage information
>>   * @sui_lastmod: timestamp of last modification
>>   * @sui_nblocks: number of written blocks in segment
>>   * @sui_flags: segment usage flags
>> + * @sui_nlive_blks: number of live blocks in the segment
>> + * @sui_nsnapshot_blks: number of blocks belonging to a snapshot in the segment
>> + * @sui_nlive_lastmod: timestamp nlive_blks was last modified
>>   */
>>  struct nilfs_suinfo {
>>  	__u64 sui_lastmod;
>>  	__u32 sui_nblocks;
>>  	__u32 sui_flags;
>> +	__u32 sui_nlive_blks;
>> +	__u32 sui_nsnapshot_blks;
>> +	__u64 sui_nlive_lastmod;
>>  };
>>  
>> +#define NILFS_MIN_SUINFO_SIZE	\
>> +	offsetofend(struct nilfs_suinfo, sui_flags)
>> +
>> +#define NILFS_LIVE_BLKS_EXT_SUINFO_SIZE	\
>> +	offsetofend(struct nilfs_suinfo, sui_nlive_lastmod)
>> +
>>  #define NILFS_SUINFO_FNS(flag, name)					\
>>  static inline int							\
>>  nilfs_suinfo_##name(const struct nilfs_suinfo *si)			\
>> @@ -736,6 +769,9 @@ enum {
>>  	NILFS_SUINFO_UPDATE_LASTMOD,
>>  	NILFS_SUINFO_UPDATE_NBLOCKS,
>>  	NILFS_SUINFO_UPDATE_FLAGS,
>> +	NILFS_SUINFO_UPDATE_NLIVE_BLKS,
>> +	NILFS_SUINFO_UPDATE_NLIVE_LASTMOD,
>> +	NILFS_SUINFO_UPDATE_NSNAPSHOT_BLKS,
>>  	__NR_NILFS_SUINFO_UPDATE_FIELDS,
>>  };
>>  
>> @@ -759,6 +795,17 @@ nilfs_suinfo_update_##name(const struct nilfs_suinfo_update *sup)	\
>>  NILFS_SUINFO_UPDATE_FNS(LASTMOD, lastmod)
>>  NILFS_SUINFO_UPDATE_FNS(NBLOCKS, nblocks)
>>  NILFS_SUINFO_UPDATE_FNS(FLAGS, flags)
>> +NILFS_SUINFO_UPDATE_FNS(NLIVE_BLKS, nlive_blks)
>> +NILFS_SUINFO_UPDATE_FNS(NSNAPSHOT_BLKS, nsnapshot_blks)
>> +NILFS_SUINFO_UPDATE_FNS(NLIVE_LASTMOD, nlive_lastmod)
>> +
>> +#define NILFS_MIN_SUINFO_UPDATE_SIZE	\
>> +	(offsetofend(struct nilfs_suinfo_update, sup_reserved) + \
>> +	NILFS_MIN_SUINFO_SIZE)
>> +
>> +#define NILFS_LIVE_BLKS_EXT_SUINFO_UPDATE_SIZE	\
>> +	(offsetofend(struct nilfs_suinfo_update, sup_reserved) + \
>> +	NILFS_LIVE_BLKS_EXT_SUINFO_SIZE)
>>  
>>  enum {
>>  	NILFS_CHECKPOINT,
>> -- 
>> 2.3.7
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-nilfs" in
>> the body of a message to majordomo@xxxxxxxxxxxxxxx
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-nilfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Filesystem Development]     [Linux BTRFS]     [Linux CIFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux SCSI]

  Powered by Linux