Re: [PATCH v2 8/9] nilfs2: correct live block tracking for GC protection period

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Sun,  3 May 2015 12:05:21 +0200, Andreas Rohner wrote:
> The userspace GC uses the concept of a so-called protection period,
> which is a period of time, where actually reclaimable blocks are
> protected. If a segment is cleaned and there are blocks in it that are
> protected by this, they have to be treated as if they were live blocks.
> 
> This is a problem for the live block tracking on the kernel side,
> because the kernel knows nothing about the protection period. This patch
> introduces new flags for the nilfs_vdesc data structure, to mark blocks
> that need to be treated as if they were alive, but must be counted as if
> they were reclaimable. There are two reasons for this to happen.
> Either a block was deleted within the protection period, or it is
> part of a snapshot.
> 
> After the blocks described by the nilfs_vdesc structures are read in,
> the flags are passed on to the buffer_heads to get the information to
> the segment construction phase. During segment construction, the live
> block tracking is adjusted accordingly.
> 
> Additionally the blocks are rechecked if they are reclaimable, since the
> last check was in userspace without the proper locking.
> 
> Signed-off-by: Andreas Rohner <andreas.rohner@xxxxxxx>
> ---
>  fs/nilfs2/dat.c           | 66 +++++++++++++++++++++++++++++++++++++++++++++++
>  fs/nilfs2/dat.h           |  1 +
>  fs/nilfs2/ioctl.c         | 15 +++++++++++
>  fs/nilfs2/page.h          |  6 +++++
>  fs/nilfs2/segment.c       | 41 ++++++++++++++++++++++++++++-
>  include/linux/nilfs2_fs.h | 38 +++++++++++++++++++++++++--
>  6 files changed, 164 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
> index 9c2fc32..80a1905 100644
> --- a/fs/nilfs2/dat.c
> +++ b/fs/nilfs2/dat.c
> @@ -35,6 +35,17 @@
>  #define NILFS_CNO_MAX	(~(__u64)0)
>  
>  /**
> + * nilfs_dat_entry_is_live - check if @entry is alive
> + * @entry: DAT-Entry
> + *
> + * Description: Simple check if @entry is alive in the current checkpoint.
> + */
> +static int nilfs_dat_entry_is_live(struct nilfs_dat_entry *entry)
> +{
> +	return entry->de_end == cpu_to_le64(NILFS_CNO_MAX);
> +}
> +
> +/**
>   * struct nilfs_dat_info - on-memory private data of DAT file
>   * @mi: on-memory private data of metadata file
>   * @palloc_cache: persistent object allocator cache of DAT file
> @@ -387,6 +398,61 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr)
>  }
>  
>  /**
> + * nilfs_dat_is_live - checks if the virtual block number is alive
> + * @dat: DAT file inode
> + * @vblocknr: virtual block number
> + *
> + * Description: nilfs_dat_is_live() looks up the DAT-Entry for
> + * @vblocknr and determines if the corresponding block is alive in the current
> + * checkpoint or not. This check ignores snapshots and protection periods.
> + *
> + * Return Value: 1 if vblocknr is alive and 0 otherwise. On error one of the
> + * following negative error codes is returned
> + *
> + * %-EIO - I/O error.
> + *
> + * %-ENOMEM - Insufficient amount of memory available.
> + *
> + * %-ENOENT - A block number associated with @vblocknr does not exist.
> + */
> +int nilfs_dat_is_live(struct inode *dat, __u64 vblocknr)
> +{
> +	struct buffer_head *entry_bh, *bh;
> +	struct nilfs_dat_entry *entry;
> +	sector_t blocknr;
> +	void *kaddr;
> +	int ret;
> +
> +	ret = nilfs_palloc_get_entry_block(dat, vblocknr, 0, &entry_bh);
> +	if (ret < 0)
> +		return ret;
> +
> +	if (!nilfs_doing_gc() && buffer_nilfs_redirected(entry_bh)) {
> +		bh = nilfs_mdt_get_frozen_buffer(dat, entry_bh);
> +		if (bh) {
> +			WARN_ON(!buffer_uptodate(bh));
> +			put_bh(entry_bh);
> +			entry_bh = bh;
> +		}
> +	}
> +
> +	kaddr = kmap_atomic(entry_bh->b_page);
> +	entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr);
> +	blocknr = le64_to_cpu(entry->de_blocknr);
> +	if (blocknr == 0) {
> +		ret = -ENOENT;
> +		goto out_unmap;
> +	}
> +
> +	ret = nilfs_dat_entry_is_live(entry);
> +
> +out_unmap:
> +	kunmap_atomic(kaddr);
> +	put_bh(entry_bh);
> +	return ret;
> +}
> +
> +/**
>   * nilfs_dat_translate - translate a virtual block number to a block number
>   * @dat: DAT file inode
>   * @vblocknr: virtual block number
> diff --git a/fs/nilfs2/dat.h b/fs/nilfs2/dat.h
> index cbd8e97..a95547c 100644
> --- a/fs/nilfs2/dat.h
> +++ b/fs/nilfs2/dat.h
> @@ -47,6 +47,7 @@ void nilfs_dat_commit_update(struct inode *, struct nilfs_palloc_req *,
>  			     struct nilfs_palloc_req *, int);
>  void nilfs_dat_abort_update(struct inode *, struct nilfs_palloc_req *,
>  			    struct nilfs_palloc_req *);
> +int nilfs_dat_is_live(struct inode *dat, __u64 vblocknr);
>  
>  int nilfs_dat_mark_dirty(struct inode *, __u64);
>  int nilfs_dat_freev(struct inode *, __u64 *, size_t);
> diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
> index f6ee54e..40bf74a 100644
> --- a/fs/nilfs2/ioctl.c
> +++ b/fs/nilfs2/ioctl.c
> @@ -612,6 +612,12 @@ static int nilfs_ioctl_move_inode_block(struct inode *inode,
>  		brelse(bh);
>  		return -EEXIST;
>  	}
> +
> +	if (nilfs_vdesc_snapshot_protected(vdesc))
> +		set_buffer_nilfs_snapshot_protected(bh);
> +	if (nilfs_vdesc_period_protected(vdesc))
> +		set_buffer_nilfs_period_protected(bh);
> +
>  	list_add_tail(&bh->b_assoc_buffers, buffers);
>  	return 0;
>  }
> @@ -662,6 +668,15 @@ static int nilfs_ioctl_move_blocks(struct super_block *sb,
>  		}
>  
>  		do {
> +			/*
> +			 * old user space tools to not initialize vd_blk_flags
> +			 * if vd_period.p_start > 0 then vd_blk_flags was
> +			 * not initialized properly and may contain invalid
> +			 * flags
> +			 */
> +			if (vdesc->vd_period.p_start > 0)
> +				vdesc->vd_blk_flags = 0;
> +
>  			ret = nilfs_ioctl_move_inode_block(inode, vdesc,
>  							   &buffers);
>  			if (unlikely(ret < 0)) {
> diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h
> index 4e35814..4835e37 100644
> --- a/fs/nilfs2/page.h
> +++ b/fs/nilfs2/page.h
> @@ -36,6 +36,8 @@ enum {
>  	BH_NILFS_Volatile,
>  	BH_NILFS_Checked,
>  	BH_NILFS_Redirected,
> +	BH_NILFS_Snapshot_Protected,
> +	BH_NILFS_Period_Protected,
>  	BH_NILFS_Counted,
>  };
>  
> @@ -43,6 +45,10 @@ BUFFER_FNS(NILFS_Node, nilfs_node)		/* nilfs node buffers */
>  BUFFER_FNS(NILFS_Volatile, nilfs_volatile)
>  BUFFER_FNS(NILFS_Checked, nilfs_checked)	/* buffer is verified */
>  BUFFER_FNS(NILFS_Redirected, nilfs_redirected)	/* redirected to a copy */
> +/* buffer belongs to a snapshot and is protected by it */
> +BUFFER_FNS(NILFS_Snapshot_Protected, nilfs_snapshot_protected)
> +/* protected by protection period */
> +BUFFER_FNS(NILFS_Period_Protected, nilfs_period_protected)
>  /* counted by propagate_p for segment usage */
>  BUFFER_FNS(NILFS_Counted, nilfs_counted)
>  
> diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
> index ab8df33..b476ce7 100644
> --- a/fs/nilfs2/segment.c
> +++ b/fs/nilfs2/segment.c
> @@ -1564,12 +1564,41 @@ static void nilfs_list_replace_buffer(struct buffer_head *old_bh,
>  	/* The caller must release old_bh */
>  }
>  
> +/**
> + * nilfs_segctor_dec_nlive_blks_gc - dec. nlive_blks for blocks of GC-Inodes
> + * @dat: dat inode
> + * @segbuf: currtent segment buffer
> + * @bh: current buffer head
> + *
> + * Description: nilfs_segctor_dec_nlive_blks_gc() is called if the inode to
> + * which @bh belongs is a GC-Inode. In that case it is not necessary to
> + * decrement the previous segment, because at the end of the GC process it
> + * will be freed anyway. It is however necessary to check again if the blocks
> + * are alive here, because the last check was in userspace without the proper
> + * locking. Additionally the blocks protected by the protection period should
> + * be considered reclaimable. It is assumed, that @bh->b_blocknr contains
> + * a virtual block number, which is only true if @bh is part of a GC-Inode.
> + */

> +static void nilfs_segctor_dec_nlive_blks_gc(struct inode *dat,
> +					    struct nilfs_segment_buffer *segbuf,
> +					    struct buffer_head *bh) {
> +	bool isreclaimable = buffer_nilfs_period_protected(bh) ||
> +				nilfs_dat_is_live(dat, bh->b_blocknr) <= 0;
> +
> +	if (!buffer_nilfs_snapshot_protected(bh) && isreclaimable)
> +		segbuf->sb_nlive_blks--;
> +	if (buffer_nilfs_snapshot_protected(bh))
> +		segbuf->sb_nsnapshot_blks++;
> +}

I have some comments on this function:

 - The position of the brace "{" violates a CodingStyle rule of function.
 - buffer_nilfs_snapshot_protected() is tested twice, but this can be
   reduced as follows:

	if (buffer_nilfs_snapshot_protected(bh))
		segbuf->sb_nsnapshot_blks++;
	else if (isreclaimable)
		segbuf->sb_nlive_blks--;

 - Additionally, I prefer "reclaimable" to "isreclaimable" since it's
   simpler and still trivial.

 - The logic of isreclaimable is counterintuitive.  

> +	bool isreclaimable = buffer_nilfs_period_protected(bh) ||
> +				nilfs_dat_is_live(dat, bh->b_blocknr) <= 0;

   It looks like buffer_nilfs_period_protected(bh) here implies that
   the block is deleted.  But it's independent from the buffer is
   protected by protection_period or not.

   Why not just adding "still alive" or "deleted" flag and its
   corresponding vdesc flag instead of adding the period protected
   flag ?

   If we add the "still alive" flag, which means that the block is
   not yet deleted from the latest checkpoint, then this function
   can be simplified as follows:

static void nilfs_segctor_dec_nlive_blks_gc(struct inode *dat,
					    struct nilfs_segment_buffer *segbuf,
					    struct buffer_head *bh)
{
	if (buffer_nilfs_snapshot_protected(bh))
		segbuf->sb_nsnapshot_blks++;
	else if (!buffer_nilfs_still_alive(bh) ||
		 nilfs_dat_is_live(dat, bh->b_blocknr) <= 0)
		segbuf->sb_nlive_blks--;
}

 - The last comment: we usually expect that the first argument is a
   pointer to nilfs_sc_info struct for function nilfs_segctor_xxxx(),
   but this doesn't.  How about the following name ?

static void nilfs_segbuf_dec_nlive_blks_gc(struct nilfs_segment_buffer *segbuf,
					   struct buffer_head *bh,
				           struct inode *dat)


Regards,
Ryusuke Konishi

> +
>  static int
>  nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci,
>  				     struct nilfs_segment_buffer *segbuf,
>  				     int mode)
>  {
> +	struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
>  	struct inode *inode = NULL;
> +	struct nilfs_inode_info *ii;
>  	sector_t blocknr;
>  	unsigned long nfinfo = segbuf->sb_sum.nfinfo;
>  	unsigned long nblocks = 0, ndatablk = 0;
> @@ -1579,7 +1608,9 @@ nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci,
>  	union nilfs_binfo binfo;
>  	struct buffer_head *bh, *bh_org;
>  	ino_t ino = 0;
> -	int err = 0;
> +	int err = 0, gc_inode = 0, track_live_blks;
> +
> +	track_live_blks = nilfs_feature_track_live_blks(nilfs);
>  
>  	if (!nfinfo)
>  		goto out;
> @@ -1601,6 +1632,9 @@ nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci,
>  
>  			inode = bh->b_page->mapping->host;
>  
> +			ii = NILFS_I(inode);
> +			gc_inode = test_bit(NILFS_I_GCINODE, &ii->i_state);
> +
>  			if (mode == SC_LSEG_DSYNC)
>  				sc_op = &nilfs_sc_dsync_ops;
>  			else if (ino == NILFS_DAT_INO)
> @@ -1608,6 +1642,11 @@ nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci,
>  			else /* file blocks */
>  				sc_op = &nilfs_sc_file_ops;
>  		}
> +
> +		if (track_live_blks && gc_inode)
> +			nilfs_segctor_dec_nlive_blks_gc(nilfs->ns_dat,
> +							segbuf, bh);
> +
>  		bh_org = bh;
>  		get_bh(bh_org);
>  		err = nilfs_bmap_assign(NILFS_I(inode)->i_bmap, &bh, blocknr,
> diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h
> index 5f05bbf..ddc98e8 100644
> --- a/include/linux/nilfs2_fs.h
> +++ b/include/linux/nilfs2_fs.h
> @@ -905,7 +905,7 @@ struct nilfs_vinfo {
>   * @vd_blocknr: disk block number
>   * @vd_offset: logical block offset inside a file
>   * @vd_flags: flags (data or node block)
> - * @vd_pad: padding
> + * @vd_blk_flags: additional flags
>   */
>  struct nilfs_vdesc {
>  	__u64 vd_ino;
> @@ -915,9 +915,43 @@ struct nilfs_vdesc {
>  	__u64 vd_blocknr;
>  	__u64 vd_offset;
>  	__u32 vd_flags;
> -	__u32 vd_pad;
> +	/*
> +	 * vd_blk_flags needed because vd_flags doesn't support
> +	 * bit-flags because of backwards compatibility
> +	 */
> +	__u32 vd_blk_flags;
>  };
>  
> +/* vdesc flags */
> +enum {
> +	NILFS_VDESC_SNAPSHOT_PROTECTED,
> +	NILFS_VDESC_PERIOD_PROTECTED,
> +
> +	/* ... */
> +
> +	__NR_NILFS_VDESC_FIELDS,
> +};
> +
> +#define NILFS_VDESC_FNS(flag, name)					\
> +static inline void							\
> +nilfs_vdesc_set_##name(struct nilfs_vdesc *vdesc)			\
> +{									\
> +	vdesc->vd_blk_flags |= (1UL << NILFS_VDESC_##flag);		\
> +}									\
> +static inline void							\
> +nilfs_vdesc_clear_##name(struct nilfs_vdesc *vdesc)			\
> +{									\
> +	vdesc->vd_blk_flags &= ~(1UL << NILFS_VDESC_##flag);		\
> +}									\
> +static inline int							\
> +nilfs_vdesc_##name(const struct nilfs_vdesc *vdesc)			\
> +{									\
> +	return !!(vdesc->vd_blk_flags & (1UL << NILFS_VDESC_##flag));	\
> +}
> +
> +NILFS_VDESC_FNS(SNAPSHOT_PROTECTED, snapshot_protected)
> +NILFS_VDESC_FNS(PERIOD_PROTECTED, period_protected)
> +
>  /**
>   * struct nilfs_bdesc - descriptor of disk block number
>   * @bd_ino: inode number
> -- 
> 2.3.7
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nilfs" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-nilfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Filesystem Development]     [Linux BTRFS]     [Linux CIFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux SCSI]

  Powered by Linux