Re: [PATCH v2 8/9] nilfs2: correct live block tracking for GC protection period

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 2015-05-10 20:15, Ryusuke Konishi wrote:
> On Sun,  3 May 2015 12:05:21 +0200, Andreas Rohner wrote:
>> The userspace GC uses the concept of a so-called protection period,
>> which is a period of time, where actually reclaimable blocks are
>> protected. If a segment is cleaned and there are blocks in it that are
>> protected by this, they have to be treated as if they were live blocks.
>>
>> This is a problem for the live block tracking on the kernel side,
>> because the kernel knows nothing about the protection period. This patch
>> introduces new flags for the nilfs_vdesc data structure, to mark blocks
>> that need to be treated as if they were alive, but must be counted as if
>> they were reclaimable. There are two reasons for this to happen.
>> Either a block was deleted within the protection period, or it is
>> part of a snapshot.
>>
>> After the blocks described by the nilfs_vdesc structures are read in,
>> the flags are passed on to the buffer_heads to get the information to
>> the segment construction phase. During segment construction, the live
>> block tracking is adjusted accordingly.
>>
>> Additionally the blocks are rechecked if they are reclaimable, since the
>> last check was in userspace without the proper locking.
>>
>> Signed-off-by: Andreas Rohner <andreas.rohner@xxxxxxx>
>> ---
>>  fs/nilfs2/dat.c           | 66 +++++++++++++++++++++++++++++++++++++++++++++++
>>  fs/nilfs2/dat.h           |  1 +
>>  fs/nilfs2/ioctl.c         | 15 +++++++++++
>>  fs/nilfs2/page.h          |  6 +++++
>>  fs/nilfs2/segment.c       | 41 ++++++++++++++++++++++++++++-
>>  include/linux/nilfs2_fs.h | 38 +++++++++++++++++++++++++--
>>  6 files changed, 164 insertions(+), 3 deletions(-)
>>
>> diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
>> index 9c2fc32..80a1905 100644
>> --- a/fs/nilfs2/dat.c
>> +++ b/fs/nilfs2/dat.c
>> @@ -35,6 +35,17 @@
>>  #define NILFS_CNO_MAX	(~(__u64)0)
>>  
>>  /**
>> + * nilfs_dat_entry_is_live - check if @entry is alive
>> + * @entry: DAT-Entry
>> + *
>> + * Description: Simple check if @entry is alive in the current checkpoint.
>> + */
>> +static int nilfs_dat_entry_is_live(struct nilfs_dat_entry *entry)
>> +{
>> +	return entry->de_end == cpu_to_le64(NILFS_CNO_MAX);
>> +}
>> +
>> +/**
>>   * struct nilfs_dat_info - on-memory private data of DAT file
>>   * @mi: on-memory private data of metadata file
>>   * @palloc_cache: persistent object allocator cache of DAT file
>> @@ -387,6 +398,61 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr)
>>  }
>>  
>>  /**
>> + * nilfs_dat_is_live - checks if the virtual block number is alive
>> + * @dat: DAT file inode
>> + * @vblocknr: virtual block number
>> + *
>> + * Description: nilfs_dat_is_live() looks up the DAT-Entry for
>> + * @vblocknr and determines if the corresponding block is alive in the current
>> + * checkpoint or not. This check ignores snapshots and protection periods.
>> + *
>> + * Return Value: 1 if vblocknr is alive and 0 otherwise. On error one of the
>> + * following negative error codes is returned
>> + *
>> + * %-EIO - I/O error.
>> + *
>> + * %-ENOMEM - Insufficient amount of memory available.
>> + *
>> + * %-ENOENT - A block number associated with @vblocknr does not exist.
>> + */
>> +int nilfs_dat_is_live(struct inode *dat, __u64 vblocknr)
>> +{
>> +	struct buffer_head *entry_bh, *bh;
>> +	struct nilfs_dat_entry *entry;
>> +	sector_t blocknr;
>> +	void *kaddr;
>> +	int ret;
>> +
>> +	ret = nilfs_palloc_get_entry_block(dat, vblocknr, 0, &entry_bh);
>> +	if (ret < 0)
>> +		return ret;
>> +
>> +	if (!nilfs_doing_gc() && buffer_nilfs_redirected(entry_bh)) {
>> +		bh = nilfs_mdt_get_frozen_buffer(dat, entry_bh);
>> +		if (bh) {
>> +			WARN_ON(!buffer_uptodate(bh));
>> +			put_bh(entry_bh);
>> +			entry_bh = bh;
>> +		}
>> +	}
>> +
>> +	kaddr = kmap_atomic(entry_bh->b_page);
>> +	entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr);
>> +	blocknr = le64_to_cpu(entry->de_blocknr);
>> +	if (blocknr == 0) {
>> +		ret = -ENOENT;
>> +		goto out_unmap;
>> +	}
>> +
>> +	ret = nilfs_dat_entry_is_live(entry);
>> +
>> +out_unmap:
>> +	kunmap_atomic(kaddr);
>> +	put_bh(entry_bh);
>> +	return ret;
>> +}
>> +
>> +/**
>>   * nilfs_dat_translate - translate a virtual block number to a block number
>>   * @dat: DAT file inode
>>   * @vblocknr: virtual block number
>> diff --git a/fs/nilfs2/dat.h b/fs/nilfs2/dat.h
>> index cbd8e97..a95547c 100644
>> --- a/fs/nilfs2/dat.h
>> +++ b/fs/nilfs2/dat.h
>> @@ -47,6 +47,7 @@ void nilfs_dat_commit_update(struct inode *, struct nilfs_palloc_req *,
>>  			     struct nilfs_palloc_req *, int);
>>  void nilfs_dat_abort_update(struct inode *, struct nilfs_palloc_req *,
>>  			    struct nilfs_palloc_req *);
>> +int nilfs_dat_is_live(struct inode *dat, __u64 vblocknr);
>>  
>>  int nilfs_dat_mark_dirty(struct inode *, __u64);
>>  int nilfs_dat_freev(struct inode *, __u64 *, size_t);
>> diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
>> index f6ee54e..40bf74a 100644
>> --- a/fs/nilfs2/ioctl.c
>> +++ b/fs/nilfs2/ioctl.c
>> @@ -612,6 +612,12 @@ static int nilfs_ioctl_move_inode_block(struct inode *inode,
>>  		brelse(bh);
>>  		return -EEXIST;
>>  	}
>> +
>> +	if (nilfs_vdesc_snapshot_protected(vdesc))
>> +		set_buffer_nilfs_snapshot_protected(bh);
>> +	if (nilfs_vdesc_period_protected(vdesc))
>> +		set_buffer_nilfs_period_protected(bh);
>> +
>>  	list_add_tail(&bh->b_assoc_buffers, buffers);
>>  	return 0;
>>  }
>> @@ -662,6 +668,15 @@ static int nilfs_ioctl_move_blocks(struct super_block *sb,
>>  		}
>>  
>>  		do {
>> +			/*
>> +			 * old user space tools to not initialize vd_blk_flags
>> +			 * if vd_period.p_start > 0 then vd_blk_flags was
>> +			 * not initialized properly and may contain invalid
>> +			 * flags
>> +			 */
>> +			if (vdesc->vd_period.p_start > 0)
>> +				vdesc->vd_blk_flags = 0;
>> +
>>  			ret = nilfs_ioctl_move_inode_block(inode, vdesc,
>>  							   &buffers);
>>  			if (unlikely(ret < 0)) {
>> diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h
>> index 4e35814..4835e37 100644
>> --- a/fs/nilfs2/page.h
>> +++ b/fs/nilfs2/page.h
>> @@ -36,6 +36,8 @@ enum {
>>  	BH_NILFS_Volatile,
>>  	BH_NILFS_Checked,
>>  	BH_NILFS_Redirected,
>> +	BH_NILFS_Snapshot_Protected,
>> +	BH_NILFS_Period_Protected,
>>  	BH_NILFS_Counted,
>>  };
>>  
>> @@ -43,6 +45,10 @@ BUFFER_FNS(NILFS_Node, nilfs_node)		/* nilfs node buffers */
>>  BUFFER_FNS(NILFS_Volatile, nilfs_volatile)
>>  BUFFER_FNS(NILFS_Checked, nilfs_checked)	/* buffer is verified */
>>  BUFFER_FNS(NILFS_Redirected, nilfs_redirected)	/* redirected to a copy */
>> +/* buffer belongs to a snapshot and is protected by it */
>> +BUFFER_FNS(NILFS_Snapshot_Protected, nilfs_snapshot_protected)
>> +/* protected by protection period */
>> +BUFFER_FNS(NILFS_Period_Protected, nilfs_period_protected)
>>  /* counted by propagate_p for segment usage */
>>  BUFFER_FNS(NILFS_Counted, nilfs_counted)
>>  
>> diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
>> index ab8df33..b476ce7 100644
>> --- a/fs/nilfs2/segment.c
>> +++ b/fs/nilfs2/segment.c
>> @@ -1564,12 +1564,41 @@ static void nilfs_list_replace_buffer(struct buffer_head *old_bh,
>>  	/* The caller must release old_bh */
>>  }
>>  
>> +/**
>> + * nilfs_segctor_dec_nlive_blks_gc - dec. nlive_blks for blocks of GC-Inodes
>> + * @dat: dat inode
>> + * @segbuf: currtent segment buffer
>> + * @bh: current buffer head
>> + *
>> + * Description: nilfs_segctor_dec_nlive_blks_gc() is called if the inode to
>> + * which @bh belongs is a GC-Inode. In that case it is not necessary to
>> + * decrement the previous segment, because at the end of the GC process it
>> + * will be freed anyway. It is however necessary to check again if the blocks
>> + * are alive here, because the last check was in userspace without the proper
>> + * locking. Additionally the blocks protected by the protection period should
>> + * be considered reclaimable. It is assumed, that @bh->b_blocknr contains
>> + * a virtual block number, which is only true if @bh is part of a GC-Inode.
>> + */
> 
>> +static void nilfs_segctor_dec_nlive_blks_gc(struct inode *dat,
>> +					    struct nilfs_segment_buffer *segbuf,
>> +					    struct buffer_head *bh) {
>> +	bool isreclaimable = buffer_nilfs_period_protected(bh) ||
>> +				nilfs_dat_is_live(dat, bh->b_blocknr) <= 0;
>> +
>> +	if (!buffer_nilfs_snapshot_protected(bh) && isreclaimable)
>> +		segbuf->sb_nlive_blks--;
>> +	if (buffer_nilfs_snapshot_protected(bh))
>> +		segbuf->sb_nsnapshot_blks++;
>> +}
> 
> I have some comments on this function:
> 
>  - The position of the brace "{" violates a CodingStyle rule of function.

I agree. Sorry for that stupid mistake.

>  - buffer_nilfs_snapshot_protected() is tested twice, but this can be
>    reduced as follows:
> 
> 	if (buffer_nilfs_snapshot_protected(bh))
> 		segbuf->sb_nsnapshot_blks++;
> 	else if (isreclaimable)
> 		segbuf->sb_nlive_blks--;

I agree.

>  - Additionally, I prefer "reclaimable" to "isreclaimable" since it's
>    simpler and still trivial.

Ok.

>  - The logic of isreclaimable is counterintuitive.  
> 
>> +	bool isreclaimable = buffer_nilfs_period_protected(bh) ||
>> +				nilfs_dat_is_live(dat, bh->b_blocknr) <= 0;
> 
>    It looks like buffer_nilfs_period_protected(bh) here implies that
>    the block is deleted.  But it's independent from the buffer is
>    protected by protection_period or not.

It is not independent. buffer_nilfs_period_protected() is only set for
deleted/reclaimable blocks that have to be copied because of the
protection period. So if the flag is set, then the block is always
reclaimable.

>    Why not just adding "still alive" or "deleted" flag and its
>    corresponding vdesc flag instead of adding the period protected
>    flag ?
> 
>    If we add the "still alive" flag, which means that the block is
>    not yet deleted from the latest checkpoint, then this function
>    can be simplified as follows:

I think buffer_nilfs_period_protected(bh) is a better name.

It does not mark all blocks within the protection period. Live blocks
within the protection period do not have this flag set. It marks exactly
those blocks that are dead and reclaimable but protected from being
discarded by the protection period. The protection period is key.
Without the protection period those blocks would not have been copied.
That is the exact meaning of the flag, and I think the name fits quite well.

> static void nilfs_segctor_dec_nlive_blks_gc(struct inode *dat,
> 					    struct nilfs_segment_buffer *segbuf,
> 					    struct buffer_head *bh)
> {
> 	if (buffer_nilfs_snapshot_protected(bh))
> 		segbuf->sb_nsnapshot_blks++;
> 	else if (!buffer_nilfs_still_alive(bh) ||
> 		 nilfs_dat_is_live(dat, bh->b_blocknr) <= 0)
> 		segbuf->sb_nlive_blks--;
> }

I can still simplify it like that:

	if (buffer_nilfs_snapshot_protected(bh))
		segbuf->sb_nsnapshot_blks++;
	else if (buffer_nilfs_period_protected(bh) ||
			nilfs_dat_is_live(dat, bh->b_blocknr) <= 0)
		segbuf->sb_nlive_blks--;

>  - The last comment: we usually expect that the first argument is a
>    pointer to nilfs_sc_info struct for function nilfs_segctor_xxxx(),
>    but this doesn't.  How about the following name ?
> 
> static void nilfs_segbuf_dec_nlive_blks_gc(struct nilfs_segment_buffer *segbuf,
> 					   struct buffer_head *bh,
> 				           struct inode *dat)

Yes I agree that looks better.

Regards,
Andreas Rohner

> Regards,
> Ryusuke Konishi
> 
>> +
>>  static int
>>  nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci,
>>  				     struct nilfs_segment_buffer *segbuf,
>>  				     int mode)
>>  {
>> +	struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
>>  	struct inode *inode = NULL;
>> +	struct nilfs_inode_info *ii;
>>  	sector_t blocknr;
>>  	unsigned long nfinfo = segbuf->sb_sum.nfinfo;
>>  	unsigned long nblocks = 0, ndatablk = 0;
>> @@ -1579,7 +1608,9 @@ nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci,
>>  	union nilfs_binfo binfo;
>>  	struct buffer_head *bh, *bh_org;
>>  	ino_t ino = 0;
>> -	int err = 0;
>> +	int err = 0, gc_inode = 0, track_live_blks;
>> +
>> +	track_live_blks = nilfs_feature_track_live_blks(nilfs);
>>  
>>  	if (!nfinfo)
>>  		goto out;
>> @@ -1601,6 +1632,9 @@ nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci,
>>  
>>  			inode = bh->b_page->mapping->host;
>>  
>> +			ii = NILFS_I(inode);
>> +			gc_inode = test_bit(NILFS_I_GCINODE, &ii->i_state);
>> +
>>  			if (mode == SC_LSEG_DSYNC)
>>  				sc_op = &nilfs_sc_dsync_ops;
>>  			else if (ino == NILFS_DAT_INO)
>> @@ -1608,6 +1642,11 @@ nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci,
>>  			else /* file blocks */
>>  				sc_op = &nilfs_sc_file_ops;
>>  		}
>> +
>> +		if (track_live_blks && gc_inode)
>> +			nilfs_segctor_dec_nlive_blks_gc(nilfs->ns_dat,
>> +							segbuf, bh);
>> +
>>  		bh_org = bh;
>>  		get_bh(bh_org);
>>  		err = nilfs_bmap_assign(NILFS_I(inode)->i_bmap, &bh, blocknr,
>> diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h
>> index 5f05bbf..ddc98e8 100644
>> --- a/include/linux/nilfs2_fs.h
>> +++ b/include/linux/nilfs2_fs.h
>> @@ -905,7 +905,7 @@ struct nilfs_vinfo {
>>   * @vd_blocknr: disk block number
>>   * @vd_offset: logical block offset inside a file
>>   * @vd_flags: flags (data or node block)
>> - * @vd_pad: padding
>> + * @vd_blk_flags: additional flags
>>   */
>>  struct nilfs_vdesc {
>>  	__u64 vd_ino;
>> @@ -915,9 +915,43 @@ struct nilfs_vdesc {
>>  	__u64 vd_blocknr;
>>  	__u64 vd_offset;
>>  	__u32 vd_flags;
>> -	__u32 vd_pad;
>> +	/*
>> +	 * vd_blk_flags needed because vd_flags doesn't support
>> +	 * bit-flags because of backwards compatibility
>> +	 */
>> +	__u32 vd_blk_flags;
>>  };
>>  
>> +/* vdesc flags */
>> +enum {
>> +	NILFS_VDESC_SNAPSHOT_PROTECTED,
>> +	NILFS_VDESC_PERIOD_PROTECTED,
>> +
>> +	/* ... */
>> +
>> +	__NR_NILFS_VDESC_FIELDS,
>> +};
>> +
>> +#define NILFS_VDESC_FNS(flag, name)					\
>> +static inline void							\
>> +nilfs_vdesc_set_##name(struct nilfs_vdesc *vdesc)			\
>> +{									\
>> +	vdesc->vd_blk_flags |= (1UL << NILFS_VDESC_##flag);		\
>> +}									\
>> +static inline void							\
>> +nilfs_vdesc_clear_##name(struct nilfs_vdesc *vdesc)			\
>> +{									\
>> +	vdesc->vd_blk_flags &= ~(1UL << NILFS_VDESC_##flag);		\
>> +}									\
>> +static inline int							\
>> +nilfs_vdesc_##name(const struct nilfs_vdesc *vdesc)			\
>> +{									\
>> +	return !!(vdesc->vd_blk_flags & (1UL << NILFS_VDESC_##flag));	\
>> +}
>> +
>> +NILFS_VDESC_FNS(SNAPSHOT_PROTECTED, snapshot_protected)
>> +NILFS_VDESC_FNS(PERIOD_PROTECTED, period_protected)
>> +
>>  /**
>>   * struct nilfs_bdesc - descriptor of disk block number
>>   * @bd_ino: inode number
>> -- 
>> 2.3.7
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-nilfs" in
>> the body of a message to majordomo@xxxxxxxxxxxxxxx
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nilfs" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-nilfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Filesystem Development]     [Linux BTRFS]     [Linux CIFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux SCSI]

  Powered by Linux