On 2015-05-09 04:24, Ryusuke Konishi wrote: > On Sun, 3 May 2015 12:05:15 +0200, Andreas Rohner wrote: >> This patch extends the nilfs_segment_usage structure with two extra >> fields. This changes the on-disk format of the SUFILE, but the NILFS2 >> metadata files are flexible enough, so that there are no compatibility >> issues. The extension is fully backwards compatible. Nevertheless a >> feature compatibility flag was added to indicate the on-disk format >> change. >> >> The new field su_nlive_blks is used to track the number of live blocks >> in the corresponding segment. Its value should always be smaller than >> su_nblocks, which contains the total number of blocks in the segment. >> >> The field su_nlive_lastmod is necessary because of the protection period >> used by the GC. It is a timestamp, which contains the last time >> su_nlive_blks was modified. For example if a file is deleted, its >> blocks are subtracted from su_nlive_blks and are therefore considered to >> be reclaimable by the kernel. But the GC additionally protects them with >> the protection period. So while su_nilve_blks contains the number of >> potentially reclaimable blocks, the actual number depends on the >> protection period. To enable GC policies to effectively choose or prefer >> segments with unprotected blocks, the timestamp in su_nlive_lastmod is >> necessary. >> >> The new field su_nsnapshot_blks contains the number of blocks in a >> segment that are protected by a snapshot. The value is meant to be a >> heuristic for the GC and is not necessarily always accurate. >> >> Signed-off-by: Andreas Rohner <andreas.rohner@xxxxxxx> >> --- >> fs/nilfs2/ioctl.c | 4 +-- >> fs/nilfs2/sufile.c | 45 +++++++++++++++++++++++++++++++-- >> fs/nilfs2/sufile.h | 6 +++++ >> include/linux/nilfs2_fs.h | 63 +++++++++++++++++++++++++++++++++++++++++------ >> 4 files changed, 106 insertions(+), 12 deletions(-) >> >> diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c >> index 9a20e51..f6ee54e 100644 >> --- a/fs/nilfs2/ioctl.c >> +++ b/fs/nilfs2/ioctl.c >> @@ -1250,7 +1250,7 @@ static int nilfs_ioctl_set_suinfo(struct inode *inode, struct file *filp, >> goto out; >> >> ret = -EINVAL; >> - if (argv.v_size < sizeof(struct nilfs_suinfo_update)) >> + if (argv.v_size < NILFS_MIN_SUINFO_UPDATE_SIZE) >> goto out; >> >> if (argv.v_nmembs > nilfs->ns_nsegments) >> @@ -1316,7 +1316,7 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) >> return nilfs_ioctl_get_cpstat(inode, filp, cmd, argp); >> case NILFS_IOCTL_GET_SUINFO: >> return nilfs_ioctl_get_info(inode, filp, cmd, argp, >> - sizeof(struct nilfs_suinfo), >> + NILFS_MIN_SEGMENT_USAGE_SIZE, >> nilfs_ioctl_do_get_suinfo); >> case NILFS_IOCTL_SET_SUINFO: >> return nilfs_ioctl_set_suinfo(inode, filp, cmd, argp); >> diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c >> index 2a869c3..1cce358 100644 >> --- a/fs/nilfs2/sufile.c >> +++ b/fs/nilfs2/sufile.c >> @@ -453,6 +453,11 @@ void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum, >> su->su_lastmod = cpu_to_le64(0); >> su->su_nblocks = cpu_to_le32(0); >> su->su_flags = cpu_to_le32(1UL << NILFS_SEGMENT_USAGE_DIRTY); >> + if (nilfs_sufile_live_blks_ext_supported(sufile)) { >> + su->su_nlive_blks = cpu_to_le32(0); >> + su->su_nsnapshot_blks = cpu_to_le32(0); >> + su->su_nlive_lastmod = cpu_to_le64(0); >> + } >> kunmap_atomic(kaddr); >> >> nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1); >> @@ -482,7 +487,7 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum, >> WARN_ON(!nilfs_segment_usage_dirty(su)); >> >> sudirty = nilfs_segment_usage_dirty(su); >> - nilfs_segment_usage_set_clean(su); >> + nilfs_segment_usage_set_clean(su, NILFS_MDT(sufile)->mi_entry_size); >> kunmap_atomic(kaddr); >> mark_buffer_dirty(su_bh); >> >> @@ -698,7 +703,7 @@ static int nilfs_sufile_truncate_range(struct inode *sufile, >> nc = 0; >> for (su = su2, j = 0; j < n; j++, su = (void *)su + susz) { >> if (nilfs_segment_usage_error(su)) { >> - nilfs_segment_usage_set_clean(su); >> + nilfs_segment_usage_set_clean(su, susz); >> nc++; >> } >> } >> @@ -821,6 +826,8 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf, >> struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; >> void *kaddr; >> unsigned long nsegs, segusages_per_block; >> + __u64 lm = 0; >> + __u32 nlb = 0, nsb = 0; >> ssize_t n; >> int ret, i, j; >> >> @@ -858,6 +865,18 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf, >> if (nilfs_segment_is_active(nilfs, segnum + j)) >> si->sui_flags |= >> (1UL << NILFS_SEGMENT_USAGE_ACTIVE); >> + >> + if (susz >= NILFS_LIVE_BLKS_EXT_SEGMENT_USAGE_SIZE) { >> + nlb = le32_to_cpu(su->su_nlive_blks); >> + nsb = le32_to_cpu(su->su_nsnapshot_blks); >> + lm = le64_to_cpu(su->su_nlive_lastmod); >> + } >> + >> + if (sisz >= NILFS_LIVE_BLKS_EXT_SUINFO_SIZE) { >> + si->sui_nlive_blks = nlb; >> + si->sui_nsnapshot_blks = nsb; >> + si->sui_nlive_lastmod = lm; >> + } >> } >> kunmap_atomic(kaddr); >> brelse(su_bh); >> @@ -901,6 +920,9 @@ ssize_t nilfs_sufile_set_suinfo(struct inode *sufile, void *buf, >> int cleansi, cleansu, dirtysi, dirtysu; >> long ncleaned = 0, ndirtied = 0; >> int ret = 0; >> + bool sup_ext = (supsz >= NILFS_LIVE_BLKS_EXT_SUINFO_UPDATE_SIZE); >> + bool su_ext = nilfs_sufile_live_blks_ext_supported(sufile); >> + bool supsu_ext = sup_ext && su_ext; > > These boolean variables determine the control follow. For these, more > intuitive names are preferable. For instance: > > - sup_ext -> suinfo_extended > - su_ext -> su_extended > - supsu_ext -> both_extended I agree. >> >> if (unlikely(nsup == 0)) >> return ret; >> @@ -911,6 +933,13 @@ ssize_t nilfs_sufile_set_suinfo(struct inode *sufile, void *buf, >> (~0UL << __NR_NILFS_SUINFO_UPDATE_FIELDS)) >> || (nilfs_suinfo_update_nblocks(sup) && >> sup->sup_sui.sui_nblocks > >> + nilfs->ns_blocks_per_segment) >> + || (nilfs_suinfo_update_nlive_blks(sup) && sup_ext && >> + sup->sup_sui.sui_nlive_blks > >> + nilfs->ns_blocks_per_segment) >> + || (nilfs_suinfo_update_nsnapshot_blks(sup) && >> + sup_ext && >> + sup->sup_sui.sui_nsnapshot_blks > >> nilfs->ns_blocks_per_segment)) >> return -EINVAL; >> } > > Testing sup_ext repeatedly is pointless since it increases branches. > Consider moving it forward as follows: > > for (sup = buf; sup < supend; sup = (void *)sup + supsz) { > if (sup->sup_segnum >= nilfs->ns_nsegments || > || (sup->sup_flags & > (~0UL << __NR_NILFS_SUINFO_UPDATE_FIELDS)) > || (nilfs_suinfo_update_nblocks(sup) && > sup->sup_sui.sui_nblocks > nilfs->ns_blocks_per_segment)\ > ) > return -EINVAL; > if (!sup_extended) > continue; > if (nilfs_suinfo_update_nlive_blks(sup) && > (sup->sup_sui.sui_nlive_blks > > nilfs->ns_blocks_per_segment) > || (nilfs_suinfo_update_nsnapshot_blks(sup) && > sup->sup_sui.sui_nsnapshot_blks > > nilfs->ns_blocks_per_segment)) > return -EINVAL; > } Good suggestion. >> @@ -938,6 +967,18 @@ ssize_t nilfs_sufile_set_suinfo(struct inode *sufile, void *buf, >> if (nilfs_suinfo_update_nblocks(sup)) >> su->su_nblocks = cpu_to_le32(sup->sup_sui.sui_nblocks); >> >> + if (nilfs_suinfo_update_nlive_blks(sup) && supsu_ext) >> + su->su_nlive_blks = >> + cpu_to_le32(sup->sup_sui.sui_nlive_blks); >> + >> + if (nilfs_suinfo_update_nsnapshot_blks(sup) && supsu_ext) >> + su->su_nsnapshot_blks = >> + cpu_to_le32(sup->sup_sui.sui_nsnapshot_blks); >> + >> + if (nilfs_suinfo_update_nlive_lastmod(sup) && supsu_ext) >> + su->su_nlive_lastmod = >> + cpu_to_le64(sup->sup_sui.sui_nlive_lastmod); >> + > > Ditto. > > Consider defining pointer to suinfo structure > > for (;;) { > struct nilfs_suinfo *sui = &sup->sup_sui; > > and simplifying the above part as follows: > > if (both_extended) { > if (nilfs_suinfo_update_nlive_blks(sup)) > su->su_nlive_blks = > cpu_to_le32(sui->sui_nlive_blks); > if (nilfs_suinfo_update_nsnapshot_blks(sup)) > su->su_nsnapshot_blks = > cpu_to_le32(sui->sui_nsnapshot_blks); > if (nilfs_suinfo_update_nlive_lastmod(sup)) > su->su_nlive_lastmod = > cpu_to_le64(sui->sui_nlive_lastmod); > } > I agree this is much nicer. >> if (nilfs_suinfo_update_flags(sup)) { >> /* >> * Active flag is a virtual flag projected by running >> diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h >> index b8afd72..da78edf 100644 >> --- a/fs/nilfs2/sufile.h >> +++ b/fs/nilfs2/sufile.h >> @@ -28,6 +28,12 @@ >> #include <linux/nilfs2_fs.h> >> #include "mdt.h" >> >> +static inline int >> +nilfs_sufile_live_blks_ext_supported(const struct inode *sufile) >> +{ >> + return NILFS_MDT(sufile)->mi_entry_size >= >> + NILFS_LIVE_BLKS_EXT_SEGMENT_USAGE_SIZE; >> +} >> >> static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile) >> { >> diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h >> index ff3fea3..4800daa 100644 >> --- a/include/linux/nilfs2_fs.h >> +++ b/include/linux/nilfs2_fs.h >> @@ -220,9 +220,12 @@ struct nilfs_super_block { >> * If there is a bit set in the incompatible feature set that the kernel >> * doesn't know about, it should refuse to mount the filesystem. >> */ >> -#define NILFS_FEATURE_COMPAT_RO_BLOCK_COUNT 0x00000001ULL > >> +#define NILFS_FEATURE_COMPAT_SUFILE_LIVE_BLKS_EXT BIT(0) > > You should not use BIT() macro and its variants for now because they > are only enabled in kernel space (__KERNEL__ macro is required). > > "nilfs2_fs.h" should be defined both for kernel space and user space. > Consider defining it like "(1ULL << 0)". Ok. >> >> -#define NILFS_FEATURE_COMPAT_SUPP 0ULL > >> +#define NILFS_FEATURE_COMPAT_RO_BLOCK_COUNT BIT(0) > > Ditto. > >> + >> +#define NILFS_FEATURE_COMPAT_SUPP \ >> + (NILFS_FEATURE_COMPAT_SUFILE_LIVE_BLKS_EXT) >> #define NILFS_FEATURE_COMPAT_RO_SUPP NILFS_FEATURE_COMPAT_RO_BLOCK_COUNT >> #define NILFS_FEATURE_INCOMPAT_SUPP 0ULL >> >> @@ -609,19 +612,34 @@ struct nilfs_cpfile_header { >> sizeof(struct nilfs_checkpoint) - 1) / \ >> sizeof(struct nilfs_checkpoint)) >> >> +#ifndef offsetofend >> +#define offsetofend(TYPE, MEMBER) \ >> + (offsetof(TYPE, MEMBER) + sizeof(((TYPE *)0)->MEMBER)) >> +#endif >> + >> /** >> * struct nilfs_segment_usage - segment usage >> * @su_lastmod: last modified timestamp >> * @su_nblocks: number of blocks in segment >> * @su_flags: flags >> + * @su_nlive_blks: number of live blocks in the segment >> + * @su_nsnapshot_blks: number of blocks belonging to a snapshot in the segment >> + * @su_nlive_lastmod: timestamp nlive_blks was last modified >> */ >> struct nilfs_segment_usage { >> __le64 su_lastmod; >> __le32 su_nblocks; >> __le32 su_flags; >> + __le32 su_nlive_blks; >> + __le32 su_nsnapshot_blks; >> + __le64 su_nlive_lastmod; >> }; >> >> -#define NILFS_MIN_SEGMENT_USAGE_SIZE 16 >> +#define NILFS_MIN_SEGMENT_USAGE_SIZE \ >> + offsetofend(struct nilfs_segment_usage, su_flags) >> + >> +#define NILFS_LIVE_BLKS_EXT_SEGMENT_USAGE_SIZE \ >> + offsetofend(struct nilfs_segment_usage, su_nlive_lastmod) >> >> /* segment usage flag */ >> enum { >> @@ -658,11 +676,16 @@ NILFS_SEGMENT_USAGE_FNS(DIRTY, dirty) >> NILFS_SEGMENT_USAGE_FNS(ERROR, error) >> > >> static inline void >> -nilfs_segment_usage_set_clean(struct nilfs_segment_usage *su) >> +nilfs_segment_usage_set_clean(struct nilfs_segment_usage *su, size_t susz) >> { >> su->su_lastmod = cpu_to_le64(0); >> su->su_nblocks = cpu_to_le32(0); >> su->su_flags = cpu_to_le32(0); >> + if (susz >= NILFS_LIVE_BLKS_EXT_SEGMENT_USAGE_SIZE) { >> + su->su_nlive_blks = cpu_to_le32(0); >> + su->su_nsnapshot_blks = cpu_to_le32(0); >> + su->su_nlive_lastmod = cpu_to_le64(0); >> + } >> } > > nilfs_sufile_do_scrap() function does almost the same thing. > Consider defining common inline function and using it for > nilfs_segment_usage_set_clean() and nilfs_sufile_do_scrap(): > > static inline void > nilfs_segment_usage_format(struct nilfs_segment_usage *su, size_t susz, > __u32 flags) > { > su->su_lastmod = cpu_to_le64(0); > su->su_nblocks = cpu_to_le32(0); > su->su_flags = cpu_to_le32(flags); > if (susz >= NILFS_LIVE_BLKS_EXT_SEGMENT_USAGE_SIZE) { > su->su_nlive_blks = cpu_to_le32(0); > su->su_nsnapshot_blks = cpu_to_le32(0); > su->su_nlive_lastmod = cpu_to_le64(0); > } > } > > static inline void > nilfs_segment_usage_set_clean(struct nilfs_segment_usage *su, size_t susz) > { > nilfs_segment_usage_format(su, susz, 0); > } Good idea. I will start with the modifications right away. I will of course wait with version 3 of the patch set until you finished your review. Regards, Andreas Rohner > Regards, > Ryusuke Konishi > >> >> static inline int >> @@ -684,23 +707,33 @@ struct nilfs_sufile_header { >> /* ... */ >> }; >> >> -#define NILFS_SUFILE_FIRST_SEGMENT_USAGE_OFFSET \ >> - ((sizeof(struct nilfs_sufile_header) + \ >> - sizeof(struct nilfs_segment_usage) - 1) / \ >> - sizeof(struct nilfs_segment_usage)) >> +#define NILFS_SUFILE_FIRST_SEGMENT_USAGE_OFFSET(susz) \ >> + ((sizeof(struct nilfs_sufile_header) + (susz) - 1) / (susz)) >> >> /** >> * nilfs_suinfo - segment usage information >> * @sui_lastmod: timestamp of last modification >> * @sui_nblocks: number of written blocks in segment >> * @sui_flags: segment usage flags >> + * @sui_nlive_blks: number of live blocks in the segment >> + * @sui_nsnapshot_blks: number of blocks belonging to a snapshot in the segment >> + * @sui_nlive_lastmod: timestamp nlive_blks was last modified >> */ >> struct nilfs_suinfo { >> __u64 sui_lastmod; >> __u32 sui_nblocks; >> __u32 sui_flags; >> + __u32 sui_nlive_blks; >> + __u32 sui_nsnapshot_blks; >> + __u64 sui_nlive_lastmod; >> }; >> >> +#define NILFS_MIN_SUINFO_SIZE \ >> + offsetofend(struct nilfs_suinfo, sui_flags) >> + >> +#define NILFS_LIVE_BLKS_EXT_SUINFO_SIZE \ >> + offsetofend(struct nilfs_suinfo, sui_nlive_lastmod) >> + >> #define NILFS_SUINFO_FNS(flag, name) \ >> static inline int \ >> nilfs_suinfo_##name(const struct nilfs_suinfo *si) \ >> @@ -736,6 +769,9 @@ enum { >> NILFS_SUINFO_UPDATE_LASTMOD, >> NILFS_SUINFO_UPDATE_NBLOCKS, >> NILFS_SUINFO_UPDATE_FLAGS, >> + NILFS_SUINFO_UPDATE_NLIVE_BLKS, >> + NILFS_SUINFO_UPDATE_NLIVE_LASTMOD, >> + NILFS_SUINFO_UPDATE_NSNAPSHOT_BLKS, >> __NR_NILFS_SUINFO_UPDATE_FIELDS, >> }; >> >> @@ -759,6 +795,17 @@ nilfs_suinfo_update_##name(const struct nilfs_suinfo_update *sup) \ >> NILFS_SUINFO_UPDATE_FNS(LASTMOD, lastmod) >> NILFS_SUINFO_UPDATE_FNS(NBLOCKS, nblocks) >> NILFS_SUINFO_UPDATE_FNS(FLAGS, flags) >> +NILFS_SUINFO_UPDATE_FNS(NLIVE_BLKS, nlive_blks) >> +NILFS_SUINFO_UPDATE_FNS(NSNAPSHOT_BLKS, nsnapshot_blks) >> +NILFS_SUINFO_UPDATE_FNS(NLIVE_LASTMOD, nlive_lastmod) >> + >> +#define NILFS_MIN_SUINFO_UPDATE_SIZE \ >> + (offsetofend(struct nilfs_suinfo_update, sup_reserved) + \ >> + NILFS_MIN_SUINFO_SIZE) >> + >> +#define NILFS_LIVE_BLKS_EXT_SUINFO_UPDATE_SIZE \ >> + (offsetofend(struct nilfs_suinfo_update, sup_reserved) + \ >> + NILFS_LIVE_BLKS_EXT_SUINFO_SIZE) >> >> enum { >> NILFS_CHECKPOINT, >> -- >> 2.3.7 >> >> -- >> To unsubscribe from this list: send the line "unsubscribe linux-nilfs" in >> the body of a message to majordomo@xxxxxxxxxxxxxxx >> More majordomo info at http://vger.kernel.org/majordomo-info.html > -- To unsubscribe from this list: send the line "unsubscribe linux-nilfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html