On Sun, 2014-03-16 at 11:47 +0100, Andreas Rohner wrote: > To accurately count the number of live blocks in a segment, it is > important to take snapshots into account, because snapshots can protect > reclaimable blocks from being cleaned. > > This patch uses the previously reserved de_rsv field of the > nilfs_dat_entry struct to store one of the snapshots the corresponding > block belongs to. One block can belong to many snapshots, but because > the snapshots are stored in a sorted linked list, it is easy to check if > a block belongs to any other snapshot given the previous and the next > snapshot. For example if the current snapshot (in de_ss) is being > removed and neither the previous nor the next snapshot is in the range > of de_start to de_end, then it is guaranteed that the block doesn't > belong to any other snapshot and is reclaimable. On the other hand if > lets say the previous snapshot is in the range of de_start to de_end, we > simply set de_ss to the previous snapshot and the block is not > reclaimable. > > To implement this every DAT entry is scanned at snapshot > creation/deletion time and updated if needed. It is well known problem of NILFS2 that deletion is very slow operation for big files because of necessity to update DAT file (de_end: end checkpoint number). So, how your addition does affect this disadvantage? > To avoid too many update > operations only potentially reclaimable blocks are ever updated. For > example if there are some deleted files and the checkpoint to which > these files belong is turned into a snapshot, then su_nblocks is > incremented for these blocks, which reverses the decrement that happened > when the files were deleted. If after some time this snapshot is > deleted, su_nblocks is decremented again to reverse the increment at > creation time. > > Signed-off-by: Andreas Rohner <andreas.rohner@xxxxxxx> > --- > fs/nilfs2/cpfile.c | 7 ++++ > fs/nilfs2/dat.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++ > fs/nilfs2/dat.h | 26 ++++++++++++++ > include/linux/nilfs2_fs.h | 4 +-- > 4 files changed, 121 insertions(+), 2 deletions(-) > > diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c > index 0d58075..29952f5 100644 > --- a/fs/nilfs2/cpfile.c > +++ b/fs/nilfs2/cpfile.c > @@ -28,6 +28,7 @@ > #include <linux/nilfs2_fs.h> > #include "mdt.h" > #include "cpfile.h" > +#include "sufile.h" > > > static inline unsigned long > @@ -584,6 +585,7 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno) > struct nilfs_cpfile_header *header; > struct nilfs_checkpoint *cp; > struct nilfs_snapshot_list *list; > + struct the_nilfs *nilfs = cpfile->i_sb->s_fs_info; > __u64 curr, prev; > unsigned long curr_blkoff, prev_blkoff; > void *kaddr; > @@ -681,6 +683,8 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno) > mark_buffer_dirty(header_bh); > nilfs_mdt_mark_dirty(cpfile); > > + nilfs_dat_scan_inc_ss(nilfs->ns_dat, cno); > + > brelse(prev_bh); > > out_curr: > @@ -703,6 +707,7 @@ static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno) > struct nilfs_cpfile_header *header; > struct nilfs_checkpoint *cp; > struct nilfs_snapshot_list *list; > + struct the_nilfs *nilfs = cpfile->i_sb->s_fs_info; > __u64 next, prev; > void *kaddr; > int ret; > @@ -784,6 +789,8 @@ static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno) > mark_buffer_dirty(header_bh); > nilfs_mdt_mark_dirty(cpfile); > > + nilfs_dat_scan_dec_ss(nilfs->ns_dat, cno, prev, next); > + > brelse(prev_bh); > > out_next: > diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c > index 0d5fada..89a4a5f 100644 > --- a/fs/nilfs2/dat.c > +++ b/fs/nilfs2/dat.c > @@ -28,6 +28,7 @@ > #include "mdt.h" > #include "alloc.h" > #include "dat.h" > +#include "sufile.h" > > > #define NILFS_CNO_MIN ((__u64)1) > @@ -97,6 +98,7 @@ void nilfs_dat_commit_alloc(struct inode *dat, struct nilfs_palloc_req *req) > entry->de_start = cpu_to_le64(NILFS_CNO_MIN); > entry->de_end = cpu_to_le64(NILFS_CNO_MAX); > entry->de_blocknr = cpu_to_le64(0); > + entry->de_ss = cpu_to_le64(0); > kunmap_atomic(kaddr); > > nilfs_palloc_commit_alloc_entry(dat, req); > @@ -121,6 +123,7 @@ static void nilfs_dat_commit_free(struct inode *dat, > entry->de_start = cpu_to_le64(NILFS_CNO_MIN); > entry->de_end = cpu_to_le64(NILFS_CNO_MIN); > entry->de_blocknr = cpu_to_le64(0); > + entry->de_ss = cpu_to_le64(0); > kunmap_atomic(kaddr); > > nilfs_dat_commit_entry(dat, req); > @@ -201,6 +204,7 @@ void nilfs_dat_commit_end(struct inode *dat, struct nilfs_palloc_req *req, > WARN_ON(start > end); > } > entry->de_end = cpu_to_le64(end); > + entry->de_ss = cpu_to_le64(NILFS_CNO_MAX); > blocknr = le64_to_cpu(entry->de_blocknr); > kunmap_atomic(kaddr); > > @@ -365,6 +369,8 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr) > } > WARN_ON(blocknr == 0); > entry->de_blocknr = cpu_to_le64(blocknr); > + if (entry->de_ss == cpu_to_le64(NILFS_CNO_MAX)) > + entry->de_ss = cpu_to_le64(0); > kunmap_atomic(kaddr); > > mark_buffer_dirty(entry_bh); > @@ -430,6 +436,86 @@ int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp) > return ret; > } > > +void nilfs_dat_do_scan_dec(struct inode *dat, struct nilfs_palloc_req *req, > + void *data) > +{ > + struct nilfs_dat_entry *entry; > + __u64 start, end, prev_ss; > + __u64 *ssp = data, ss = ssp[0], prev = ssp[1], next = ssp[2]; > + sector_t blocknr; > + void *kaddr; > + struct the_nilfs *nilfs; > + > + kaddr = kmap_atomic(req->pr_entry_bh->b_page); > + entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, > + req->pr_entry_bh, kaddr); > + start = le64_to_cpu(entry->de_start); > + end = le64_to_cpu(entry->de_end); > + blocknr = le64_to_cpu(entry->de_blocknr); > + prev_ss = le64_to_cpu(entry->de_ss); > + > + if (blocknr != 0 && end != NILFS_CNO_MAX && ss >= start && ss < end) { I think that it makes sense to use small functions with clear names about what we check. > + if (prev_ss == ss || prev_ss == NILFS_CNO_MAX) { > + if (prev && prev >= start && prev < end) > + entry->de_ss = cpu_to_le64(prev); > + else if (next && next >= start && next < end) > + entry->de_ss = cpu_to_le64(next); > + else > + entry->de_ss = cpu_to_le64(0); Ditto. > + > + if (prev_ss != NILFS_CNO_MAX) > + prev_ss = le64_to_cpu(entry->de_ss); > + kunmap_atomic(kaddr); > + mark_buffer_dirty(req->pr_entry_bh); > + nilfs_mdt_mark_dirty(dat); > + } else > + kunmap_atomic(kaddr); > + > + if (prev_ss == 0) { > + nilfs = dat->i_sb->s_fs_info; > + nilfs_sufile_add_segment_usage(nilfs->ns_sufile, > + nilfs_get_segnum_of_block(nilfs, blocknr), > + -1, 0); > + } > + } else > + kunmap_atomic(kaddr); > +} > + > +void nilfs_dat_do_scan_inc(struct inode *dat, struct nilfs_palloc_req *req, > + void *data) > +{ > + struct nilfs_dat_entry *entry; > + __u64 start, end, prev_ss; > + __u64 *ssp = data, ss = *ssp; > + sector_t blocknr; > + void *kaddr; > + struct the_nilfs *nilfs; > + > + kaddr = kmap_atomic(req->pr_entry_bh->b_page); > + entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, > + req->pr_entry_bh, kaddr); > + start = le64_to_cpu(entry->de_start); > + end = le64_to_cpu(entry->de_end); > + blocknr = le64_to_cpu(entry->de_blocknr); > + prev_ss = le64_to_cpu(entry->de_ss); > + > + if (blocknr != 0 && end != NILFS_CNO_MAX && ss >= start && ss < end && > + (prev_ss == 0 || prev_ss == NILFS_CNO_MAX)) { Ditto. Moreover, you repeat this check. > + > + entry->de_ss = cpu_to_le64(ss); > + > + kunmap_atomic(kaddr); > + mark_buffer_dirty(req->pr_entry_bh); > + nilfs_mdt_mark_dirty(dat); > + > + nilfs = dat->i_sb->s_fs_info; > + nilfs_sufile_add_segment_usage(nilfs->ns_sufile, > + nilfs_get_segnum_of_block(nilfs, blocknr), Looks weird. Maybe, variable? Thanks, Vyacheslav Dubeyko. > + 1, 0); > + } else > + kunmap_atomic(kaddr); > +} > + > ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned visz, > size_t nvi) > { > diff --git a/fs/nilfs2/dat.h b/fs/nilfs2/dat.h > index cbd8e97..92a187e 100644 > --- a/fs/nilfs2/dat.h > +++ b/fs/nilfs2/dat.h > @@ -55,5 +55,31 @@ ssize_t nilfs_dat_get_vinfo(struct inode *, void *, unsigned, size_t); > > int nilfs_dat_read(struct super_block *sb, size_t entry_size, > struct nilfs_inode *raw_inode, struct inode **inodep); > +void nilfs_dat_do_scan_dec(struct inode *, struct nilfs_palloc_req *, void *); > +void nilfs_dat_do_scan_inc(struct inode *, struct nilfs_palloc_req *, void *); > + > +/** > + * nilfs_dat_scan_dec_ss - scan all dat entries for a checkpoint dec suinfo > + * @dat: inode of dat file > + * @cno: snapshot number > + * @prev: previous snapshot number > + * @next: next snapshot number > + */ > +static inline int nilfs_dat_scan_dec_ss(struct inode *dat, __u64 cno, > + __u64 prev, __u64 next) > +{ > + __u64 data[3] = { cno, prev, next }; > + return nilfs_palloc_scan_entries(dat, nilfs_dat_do_scan_dec, data); > +} > + > +/** > + * nilfs_dat_scan_dec_ss - scan all dat entries for a checkpoint inc suinfo > + * @dat: inode of dat file > + * @cno: snapshot number > + */ > +static inline int nilfs_dat_scan_inc_ss(struct inode *dat, __u64 cno) > +{ > + return nilfs_palloc_scan_entries(dat, nilfs_dat_do_scan_inc, &cno); > +} > > #endif /* _NILFS_DAT_H */ > diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h > index ca269ad..ba9ebe02 100644 > --- a/include/linux/nilfs2_fs.h > +++ b/include/linux/nilfs2_fs.h > @@ -475,13 +475,13 @@ struct nilfs_palloc_group_desc { > * @de_blocknr: block number > * @de_start: start checkpoint number > * @de_end: end checkpoint number > - * @de_rsv: reserved for future use > + * @de_ss: one of the snapshots the block belongs to > */ > struct nilfs_dat_entry { > __le64 de_blocknr; > __le64 de_start; > __le64 de_end; > - __le64 de_rsv; > + __le64 de_ss; > }; > > #define NILFS_MIN_DAT_ENTRY_SIZE 32 -- To unsubscribe from this list: send the line "unsubscribe linux-nilfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html