On 19 Nov 2021 at 04:43, Dave Chinner wrote: > From: Dave Chinner <dchinner@xxxxxxxxxx> > > Current xlog_write() adds op headers to the log manually for every > log item region that is in the vector passed to it. While > xlog_write() needs to stamp the transaction ID into the ophdr, we > already know it's length, flags, clientid, etc at CIL commit time. > > This means the only time that xlog write really needs to format and > reserve space for a new ophdr is when a region is split across two > iclogs. Adding the opheader and accounting for it as part of the > normal formatted item region means we simplify the accounting > of space used by a transaction and we don't have to special case > reserving of space in for the ophdrs in xlog_write(). It also means > we can largely initialise the ophdr in transaction commit instead > of xlog_write, making the xlog_write formatting inner loop much > tighter. > > xlog_prepare_iovec() is now too large to stay as an inline function, > so we move it out of line and into xfs_log.c. > > Object sizes: > text data bss dec hex filename > 1125934 305951 484 1432369 15db31 fs/xfs/built-in.a.before > 1123360 305951 484 1429795 15d123 fs/xfs/built-in.a.after > > So the code is a roughly 2.5kB smaller with xlog_prepare_iovec() now > out of line, even though it grew in size itself. > I verified the following, 1. For embedded op headers, xlog_cil_alloc_shadow_bufs() correctly computes the space required to hold a region. 2. The space actually used by each iovec (plus space required by an op header) is correctly updated by xlog_finish_iovec(). 3. op header's oh_len is correctly updated in both partial and non-partial writes of a region to an iclog. Reviewed-by: Chandan Babu R <chandan.babu@xxxxxxxxxx> > Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx> > Reviewed-by: Darrick J. Wong <djwong@xxxxxxxxxx> > Reviewed-by: Christoph Hellwig <hch@xxxxxx> > --- > fs/xfs/xfs_log.c | 115 +++++++++++++++++++++++++++++-------------- > fs/xfs/xfs_log.h | 42 +++------------- > fs/xfs/xfs_log_cil.c | 25 +++++----- > 3 files changed, 99 insertions(+), 83 deletions(-) > > diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c > index f09663d3664b..0923bee8d4e2 100644 > --- a/fs/xfs/xfs_log.c > +++ b/fs/xfs/xfs_log.c > @@ -90,6 +90,62 @@ xlog_iclogs_empty( > static int > xfs_log_cover(struct xfs_mount *); > > +/* > + * We need to make sure the buffer pointer returned is naturally aligned for the > + * biggest basic data type we put into it. We have already accounted for this > + * padding when sizing the buffer. > + * > + * However, this padding does not get written into the log, and hence we have to > + * track the space used by the log vectors separately to prevent log space hangs > + * due to inaccurate accounting (i.e. a leak) of the used log space through the > + * CIL context ticket. > + * > + * We also add space for the xlog_op_header that describes this region in the > + * log. This prepends the data region we return to the caller to copy their data > + * into, so do all the static initialisation of the ophdr now. Because the ophdr > + * is not 8 byte aligned, we have to be careful to ensure that we align the > + * start of the buffer such that the region we return to the call is 8 byte > + * aligned and packed against the tail of the ophdr. > + */ > +void * > +xlog_prepare_iovec( > + struct xfs_log_vec *lv, > + struct xfs_log_iovec **vecp, > + uint type) > +{ > + struct xfs_log_iovec *vec = *vecp; > + struct xlog_op_header *oph; > + uint32_t len; > + void *buf; > + > + if (vec) { > + ASSERT(vec - lv->lv_iovecp < lv->lv_niovecs); > + vec++; > + } else { > + vec = &lv->lv_iovecp[0]; > + } > + > + len = lv->lv_buf_len + sizeof(struct xlog_op_header); > + if (!IS_ALIGNED(len, sizeof(uint64_t))) { > + lv->lv_buf_len = round_up(len, sizeof(uint64_t)) - > + sizeof(struct xlog_op_header); > + } > + > + vec->i_type = type; > + vec->i_addr = lv->lv_buf + lv->lv_buf_len; > + > + oph = vec->i_addr; > + oph->oh_clientid = XFS_TRANSACTION; > + oph->oh_res2 = 0; > + oph->oh_flags = 0; > + > + buf = vec->i_addr + sizeof(struct xlog_op_header); > + ASSERT(IS_ALIGNED((unsigned long)buf, sizeof(uint64_t))); > + > + *vecp = vec; > + return buf; > +} > + > static void > xlog_grant_sub_space( > struct xlog *log, > @@ -2246,9 +2302,9 @@ xlog_print_trans( > } > > /* > - * Calculate the potential space needed by the log vector. If this is a start > - * transaction, the caller has already accounted for both opheaders in the start > - * transaction, so we don't need to account for them here. > + * Calculate the potential space needed by the log vector. All regions contain > + * their own opheaders and they are accounted for in region space so we don't > + * need to add them to the vector length here. > */ > static int > xlog_write_calc_vec_length( > @@ -2275,18 +2331,7 @@ xlog_write_calc_vec_length( > xlog_tic_add_region(ticket, vecp->i_len, vecp->i_type); > } > } > - > - /* Don't account for regions with embedded ophdrs */ > - if (optype && headers > 0) { > - headers--; > - if (optype & XLOG_START_TRANS) { > - ASSERT(headers >= 1); > - headers--; > - } > - } > - > ticket->t_res_num_ophdrs += headers; > - len += headers * sizeof(struct xlog_op_header); > > return len; > } > @@ -2296,7 +2341,6 @@ xlog_write_setup_ophdr( > struct xlog_op_header *ophdr, > struct xlog_ticket *ticket) > { > - ophdr->oh_tid = cpu_to_be32(ticket->t_tid); > ophdr->oh_clientid = XFS_TRANSACTION; > ophdr->oh_res2 = 0; > ophdr->oh_flags = 0; > @@ -2514,21 +2558,25 @@ xlog_write( > ASSERT((unsigned long)ptr % sizeof(int32_t) == 0); > > /* > - * The XLOG_START_TRANS has embedded ophdrs for the > - * start record and transaction header. They will always > - * be the first two regions in the lv chain. Commit and > - * unmount records also have embedded ophdrs. > + * Regions always have their ophdr at the start of the > + * region, except for: > + * - a transaction start which has a start record ophdr > + * before the first region ophdr; and > + * - the previous region didn't fully fit into an iclog > + * so needs a continuation ophdr to prepend the region > + * in this new iclog. > */ > - if (optype) { > - ophdr = reg->i_addr; > - if (index) > - optype &= ~XLOG_START_TRANS; > - } else { > + ophdr = reg->i_addr; > + if (optype && index) { > + optype &= ~XLOG_START_TRANS; > + } else if (partial_copy) { > ophdr = xlog_write_setup_ophdr(ptr, ticket); > xlog_write_adv_cnt(&ptr, &len, &log_offset, > sizeof(struct xlog_op_header)); > added_ophdr = true; > } > + ophdr->oh_tid = cpu_to_be32(ticket->t_tid); > + > len += xlog_write_setup_copy(ticket, ophdr, > iclog->ic_size-log_offset, > reg->i_len, > @@ -2546,20 +2594,11 @@ xlog_write( > ophdr->oh_len = cpu_to_be32(copy_len - > sizeof(struct xlog_op_header)); > } > - /* > - * Copy region. > - * > - * Commit records just log an opheader, so > - * we can have empty payloads with no data region to > - * copy. Hence we only copy the payload if the vector > - * says it has data to copy. > - */ > - ASSERT(copy_len >= 0); > - if (copy_len > 0) { > - memcpy(ptr, reg->i_addr + copy_off, copy_len); > - xlog_write_adv_cnt(&ptr, &len, &log_offset, > - copy_len); > - } > + > + ASSERT(copy_len > 0); > + memcpy(ptr, reg->i_addr + copy_off, copy_len); > + xlog_write_adv_cnt(&ptr, &len, &log_offset, copy_len); > + > if (added_ophdr) > copy_len += sizeof(struct xlog_op_header); > record_cnt++; > diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h > index d1fc43476166..816f44d7dc81 100644 > --- a/fs/xfs/xfs_log.h > +++ b/fs/xfs/xfs_log.h > @@ -21,44 +21,18 @@ struct xfs_log_vec { > > #define XFS_LOG_VEC_ORDERED (-1) > > -/* > - * We need to make sure the buffer pointer returned is naturally aligned for the > - * biggest basic data type we put into it. We have already accounted for this > - * padding when sizing the buffer. > - * > - * However, this padding does not get written into the log, and hence we have to > - * track the space used by the log vectors separately to prevent log space hangs > - * due to inaccurate accounting (i.e. a leak) of the used log space through the > - * CIL context ticket. > - */ > -static inline void * > -xlog_prepare_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp, > - uint type) > -{ > - struct xfs_log_iovec *vec = *vecp; > - > - if (vec) { > - ASSERT(vec - lv->lv_iovecp < lv->lv_niovecs); > - vec++; > - } else { > - vec = &lv->lv_iovecp[0]; > - } > - > - if (!IS_ALIGNED(lv->lv_buf_len, sizeof(uint64_t))) > - lv->lv_buf_len = round_up(lv->lv_buf_len, sizeof(uint64_t)); > - > - vec->i_type = type; > - vec->i_addr = lv->lv_buf + lv->lv_buf_len; > - > - ASSERT(IS_ALIGNED((unsigned long)vec->i_addr, sizeof(uint64_t))); > - > - *vecp = vec; > - return vec->i_addr; > -} > +void *xlog_prepare_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp, > + uint type); > > static inline void > xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec, int len) > { > + struct xlog_op_header *oph = vec->i_addr; > + > + /* opheader tracks payload length, logvec tracks region length */ > + oph->oh_len = cpu_to_be32(len); > + > + len += sizeof(struct xlog_op_header); > lv->lv_buf_len += len; > lv->lv_bytes += len; > vec->i_len = len; > diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c > index 29cf2d5d0b96..90a0e9b9d3e0 100644 > --- a/fs/xfs/xfs_log_cil.c > +++ b/fs/xfs/xfs_log_cil.c > @@ -181,13 +181,20 @@ xlog_cil_alloc_shadow_bufs( > } > > /* > - * We 64-bit align the length of each iovec so that the start > - * of the next one is naturally aligned. We'll need to > - * account for that slack space here. Then round nbytes up > - * to 64-bit alignment so that the initial buffer alignment is > - * easy to calculate and verify. > + * We 64-bit align the length of each iovec so that the start of > + * the next one is naturally aligned. We'll need to account for > + * that slack space here. > + * > + * We also add the xlog_op_header to each region when > + * formatting, but that's not accounted to the size of the item > + * at this point. Hence we'll need an addition number of bytes > + * for each vector to hold an opheader. > + * > + * Then round nbytes up to 64-bit alignment so that the initial > + * buffer alignment is easy to calculate and verify. > */ > - nbytes += niovecs * sizeof(uint64_t); > + nbytes += niovecs * > + (sizeof(uint64_t) + sizeof(struct xlog_op_header)); > nbytes = round_up(nbytes, sizeof(uint64_t)); > > /* > @@ -441,11 +448,6 @@ xlog_cil_insert_items( > > spin_lock(&cil->xc_cil_lock); > > - /* account for space used by new iovec headers */ > - iovhdr_res = diff_iovecs * sizeof(xlog_op_header_t); > - len += iovhdr_res; > - ctx->nvecs += diff_iovecs; > - > /* attach the transaction to the CIL if it has any busy extents */ > if (!list_empty(&tp->t_busy)) > list_splice_init(&tp->t_busy, &ctx->busy_extents); > @@ -477,6 +479,7 @@ xlog_cil_insert_items( > } > tp->t_ticket->t_curr_res -= len; > ctx->space_used += len; > + ctx->nvecs += diff_iovecs; > > /* > * If we've overrun the reservation, dump the tx details before we move -- chandan