Re: [PATCH 07/16] xfs: reserve space and initialise xlog_op_header in item formatting

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 19 Nov 2021 at 04:43, Dave Chinner wrote:
> From: Dave Chinner <dchinner@xxxxxxxxxx>
>
> Current xlog_write() adds op headers to the log manually for every
> log item region that is in the vector passed to it. While
> xlog_write() needs to stamp the transaction ID into the ophdr, we
> already know it's length, flags, clientid, etc at CIL commit time.
>
> This means the only time that xlog write really needs to format and
> reserve space for a new ophdr is when a region is split across two
> iclogs. Adding the opheader and accounting for it as part of the
> normal formatted item region means we simplify the accounting
> of space used by a transaction and we don't have to special case
> reserving of space in for the ophdrs in xlog_write(). It also means
> we can largely initialise the ophdr in transaction commit instead
> of xlog_write, making the xlog_write formatting inner loop much
> tighter.
>
> xlog_prepare_iovec() is now too large to stay as an inline function,
> so we move it out of line and into xfs_log.c.
>
> Object sizes:
> text	   data	    bss	    dec	    hex	filename
> 1125934	 305951	    484	1432369	 15db31 fs/xfs/built-in.a.before
> 1123360	 305951	    484	1429795	 15d123 fs/xfs/built-in.a.after
>
> So the code is a roughly 2.5kB smaller with xlog_prepare_iovec() now
> out of line, even though it grew in size itself.
>

I verified the following,
1. For embedded op headers, xlog_cil_alloc_shadow_bufs() correctly computes
   the space required to hold a region.
2. The space actually used by each iovec (plus space required
   by an op header) is correctly updated by xlog_finish_iovec().
3. op header's oh_len is correctly updated in both partial and non-partial
   writes of a region to an iclog.

Reviewed-by: Chandan Babu R <chandan.babu@xxxxxxxxxx>
   
> Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
> Reviewed-by: Darrick J. Wong <djwong@xxxxxxxxxx>
> Reviewed-by: Christoph Hellwig <hch@xxxxxx>
> ---
>  fs/xfs/xfs_log.c     | 115 +++++++++++++++++++++++++++++--------------
>  fs/xfs/xfs_log.h     |  42 +++-------------
>  fs/xfs/xfs_log_cil.c |  25 +++++-----
>  3 files changed, 99 insertions(+), 83 deletions(-)
>
> diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
> index f09663d3664b..0923bee8d4e2 100644
> --- a/fs/xfs/xfs_log.c
> +++ b/fs/xfs/xfs_log.c
> @@ -90,6 +90,62 @@ xlog_iclogs_empty(
>  static int
>  xfs_log_cover(struct xfs_mount *);
>  
> +/*
> + * We need to make sure the buffer pointer returned is naturally aligned for the
> + * biggest basic data type we put into it. We have already accounted for this
> + * padding when sizing the buffer.
> + *
> + * However, this padding does not get written into the log, and hence we have to
> + * track the space used by the log vectors separately to prevent log space hangs
> + * due to inaccurate accounting (i.e. a leak) of the used log space through the
> + * CIL context ticket.
> + *
> + * We also add space for the xlog_op_header that describes this region in the
> + * log. This prepends the data region we return to the caller to copy their data
> + * into, so do all the static initialisation of the ophdr now. Because the ophdr
> + * is not 8 byte aligned, we have to be careful to ensure that we align the
> + * start of the buffer such that the region we return to the call is 8 byte
> + * aligned and packed against the tail of the ophdr.
> + */
> +void *
> +xlog_prepare_iovec(
> +	struct xfs_log_vec	*lv,
> +	struct xfs_log_iovec	**vecp,
> +	uint			type)
> +{
> +	struct xfs_log_iovec	*vec = *vecp;
> +	struct xlog_op_header	*oph;
> +	uint32_t		len;
> +	void			*buf;
> +
> +	if (vec) {
> +		ASSERT(vec - lv->lv_iovecp < lv->lv_niovecs);
> +		vec++;
> +	} else {
> +		vec = &lv->lv_iovecp[0];
> +	}
> +
> +	len = lv->lv_buf_len + sizeof(struct xlog_op_header);
> +	if (!IS_ALIGNED(len, sizeof(uint64_t))) {
> +		lv->lv_buf_len = round_up(len, sizeof(uint64_t)) -
> +					sizeof(struct xlog_op_header);
> +	}
> +
> +	vec->i_type = type;
> +	vec->i_addr = lv->lv_buf + lv->lv_buf_len;
> +
> +	oph = vec->i_addr;
> +	oph->oh_clientid = XFS_TRANSACTION;
> +	oph->oh_res2 = 0;
> +	oph->oh_flags = 0;
> +
> +	buf = vec->i_addr + sizeof(struct xlog_op_header);
> +	ASSERT(IS_ALIGNED((unsigned long)buf, sizeof(uint64_t)));
> +
> +	*vecp = vec;
> +	return buf;
> +}
> +
>  static void
>  xlog_grant_sub_space(
>  	struct xlog		*log,
> @@ -2246,9 +2302,9 @@ xlog_print_trans(
>  }
>  
>  /*
> - * Calculate the potential space needed by the log vector. If this is a start
> - * transaction, the caller has already accounted for both opheaders in the start
> - * transaction, so we don't need to account for them here.
> + * Calculate the potential space needed by the log vector. All regions contain
> + * their own opheaders and they are accounted for in region space so we don't
> + * need to add them to the vector length here.
>   */
>  static int
>  xlog_write_calc_vec_length(
> @@ -2275,18 +2331,7 @@ xlog_write_calc_vec_length(
>  			xlog_tic_add_region(ticket, vecp->i_len, vecp->i_type);
>  		}
>  	}
> -
> -	/* Don't account for regions with embedded ophdrs */
> -	if (optype && headers > 0) {
> -		headers--;
> -		if (optype & XLOG_START_TRANS) {
> -			ASSERT(headers >= 1);
> -			headers--;
> -		}
> -	}
> -
>  	ticket->t_res_num_ophdrs += headers;
> -	len += headers * sizeof(struct xlog_op_header);
>  
>  	return len;
>  }
> @@ -2296,7 +2341,6 @@ xlog_write_setup_ophdr(
>  	struct xlog_op_header	*ophdr,
>  	struct xlog_ticket	*ticket)
>  {
> -	ophdr->oh_tid = cpu_to_be32(ticket->t_tid);
>  	ophdr->oh_clientid = XFS_TRANSACTION;
>  	ophdr->oh_res2 = 0;
>  	ophdr->oh_flags = 0;
> @@ -2514,21 +2558,25 @@ xlog_write(
>  			ASSERT((unsigned long)ptr % sizeof(int32_t) == 0);
>  
>  			/*
> -			 * The XLOG_START_TRANS has embedded ophdrs for the
> -			 * start record and transaction header. They will always
> -			 * be the first two regions in the lv chain. Commit and
> -			 * unmount records also have embedded ophdrs.
> +			 * Regions always have their ophdr at the start of the
> +			 * region, except for:
> +			 * - a transaction start which has a start record ophdr
> +			 *   before the first region ophdr; and
> +			 * - the previous region didn't fully fit into an iclog
> +			 *   so needs a continuation ophdr to prepend the region
> +			 *   in this new iclog.
>  			 */
> -			if (optype) {
> -				ophdr = reg->i_addr;
> -				if (index)
> -					optype &= ~XLOG_START_TRANS;
> -			} else {
> +			ophdr = reg->i_addr;
> +			if (optype && index) {
> +				optype &= ~XLOG_START_TRANS;
> +			} else if (partial_copy) {
>                                  ophdr = xlog_write_setup_ophdr(ptr, ticket);
>  				xlog_write_adv_cnt(&ptr, &len, &log_offset,
>  					   sizeof(struct xlog_op_header));
>  				added_ophdr = true;
>  			}
> +			ophdr->oh_tid = cpu_to_be32(ticket->t_tid);
> +
>  			len += xlog_write_setup_copy(ticket, ophdr,
>  						     iclog->ic_size-log_offset,
>  						     reg->i_len,
> @@ -2546,20 +2594,11 @@ xlog_write(
>  				ophdr->oh_len = cpu_to_be32(copy_len -
>  						sizeof(struct xlog_op_header));
>  			}
> -			/*
> -			 * Copy region.
> -			 *
> -			 * Commit records just log an opheader, so
> -			 * we can have empty payloads with no data region to
> -			 * copy.  Hence we only copy the payload if the vector
> -			 * says it has data to copy.
> -			 */
> -			ASSERT(copy_len >= 0);
> -			if (copy_len > 0) {
> -				memcpy(ptr, reg->i_addr + copy_off, copy_len);
> -				xlog_write_adv_cnt(&ptr, &len, &log_offset,
> -						   copy_len);
> -			}
> +
> +			ASSERT(copy_len > 0);
> +			memcpy(ptr, reg->i_addr + copy_off, copy_len);
> +			xlog_write_adv_cnt(&ptr, &len, &log_offset, copy_len);
> +
>  			if (added_ophdr)
>  				copy_len += sizeof(struct xlog_op_header);
>  			record_cnt++;
> diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
> index d1fc43476166..816f44d7dc81 100644
> --- a/fs/xfs/xfs_log.h
> +++ b/fs/xfs/xfs_log.h
> @@ -21,44 +21,18 @@ struct xfs_log_vec {
>  
>  #define XFS_LOG_VEC_ORDERED	(-1)
>  
> -/*
> - * We need to make sure the buffer pointer returned is naturally aligned for the
> - * biggest basic data type we put into it. We have already accounted for this
> - * padding when sizing the buffer.
> - *
> - * However, this padding does not get written into the log, and hence we have to
> - * track the space used by the log vectors separately to prevent log space hangs
> - * due to inaccurate accounting (i.e. a leak) of the used log space through the
> - * CIL context ticket.
> - */
> -static inline void *
> -xlog_prepare_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp,
> -		uint type)
> -{
> -	struct xfs_log_iovec *vec = *vecp;
> -
> -	if (vec) {
> -		ASSERT(vec - lv->lv_iovecp < lv->lv_niovecs);
> -		vec++;
> -	} else {
> -		vec = &lv->lv_iovecp[0];
> -	}
> -
> -	if (!IS_ALIGNED(lv->lv_buf_len, sizeof(uint64_t)))
> -		lv->lv_buf_len = round_up(lv->lv_buf_len, sizeof(uint64_t));
> -
> -	vec->i_type = type;
> -	vec->i_addr = lv->lv_buf + lv->lv_buf_len;
> -
> -	ASSERT(IS_ALIGNED((unsigned long)vec->i_addr, sizeof(uint64_t)));
> -
> -	*vecp = vec;
> -	return vec->i_addr;
> -}
> +void *xlog_prepare_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp,
> +		uint type);
>  
>  static inline void
>  xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec, int len)
>  {
> +	struct xlog_op_header	*oph = vec->i_addr;
> +
> +	/* opheader tracks payload length, logvec tracks region length */
> +	oph->oh_len = cpu_to_be32(len);
> +
> +	len += sizeof(struct xlog_op_header);
>  	lv->lv_buf_len += len;
>  	lv->lv_bytes += len;
>  	vec->i_len = len;
> diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
> index 29cf2d5d0b96..90a0e9b9d3e0 100644
> --- a/fs/xfs/xfs_log_cil.c
> +++ b/fs/xfs/xfs_log_cil.c
> @@ -181,13 +181,20 @@ xlog_cil_alloc_shadow_bufs(
>  		}
>  
>  		/*
> -		 * We 64-bit align the length of each iovec so that the start
> -		 * of the next one is naturally aligned.  We'll need to
> -		 * account for that slack space here. Then round nbytes up
> -		 * to 64-bit alignment so that the initial buffer alignment is
> -		 * easy to calculate and verify.
> +		 * We 64-bit align the length of each iovec so that the start of
> +		 * the next one is naturally aligned.  We'll need to account for
> +		 * that slack space here.
> +		 *
> +		 * We also add the xlog_op_header to each region when
> +		 * formatting, but that's not accounted to the size of the item
> +		 * at this point. Hence we'll need an addition number of bytes
> +		 * for each vector to hold an opheader.
> +		 *
> +		 * Then round nbytes up to 64-bit alignment so that the initial
> +		 * buffer alignment is easy to calculate and verify.
>  		 */
> -		nbytes += niovecs * sizeof(uint64_t);
> +		nbytes += niovecs *
> +			(sizeof(uint64_t) + sizeof(struct xlog_op_header));
>  		nbytes = round_up(nbytes, sizeof(uint64_t));
>  
>  		/*
> @@ -441,11 +448,6 @@ xlog_cil_insert_items(
>  
>  	spin_lock(&cil->xc_cil_lock);
>  
> -	/* account for space used by new iovec headers  */
> -	iovhdr_res = diff_iovecs * sizeof(xlog_op_header_t);
> -	len += iovhdr_res;
> -	ctx->nvecs += diff_iovecs;
> -
>  	/* attach the transaction to the CIL if it has any busy extents */
>  	if (!list_empty(&tp->t_busy))
>  		list_splice_init(&tp->t_busy, &ctx->busy_extents);
> @@ -477,6 +479,7 @@ xlog_cil_insert_items(
>  	}
>  	tp->t_ticket->t_curr_res -= len;
>  	ctx->space_used += len;
> +	ctx->nvecs += diff_iovecs;
>  
>  	/*
>  	 * If we've overrun the reservation, dump the tx details before we move


-- 
chandan



[Index of Archives]     [XFS Filesystem Development (older mail)]     [Linux Filesystem Development]     [Linux Audio Users]     [Yosemite Trails]     [Linux Kernel]     [Linux RAID]     [Linux SCSI]


  Powered by Linux