From: Dave Chinner <dchinner@xxxxxxxxxx> Before we can start to remove the grant lock from grant head accounting operations, we need to be able to do atomic operations on the grant heads themselves. Currently they are made up of two 32 bit integers - the cycle number and the byte offset into the cycle - and as such cannot be updated atomically. To solve this problem, encode the grant heads into a single 64 bit variable as though they were an LSN, where the cycle number is held in the upper 32 bits and the byte offset in the lower 32 bits. By using this encoding, we can use the existing lsn manipulation macros to encode and extract the relevant details for calculations. Once we have the the grant heads encoded into a 64 bit variable it will be trivial to convert them to atomic variables for lockless manipulation later on. Further, rework xlog_space_left() so that it is passed the new combined value into xlog_space_left rather than a split cycle/bytes pair to simplify the caller code by hiding the encoding inside the xlog_space_left(). Also just pass the tail lsn in prepartion for atomic variale conversion. Also, factor out the common write head vs tail lsn debug code into it's own function to hide the encoding and remove some ifdef DEBUG mess. Finally, redo the space calculations to work on a pointer to a grant head so that the same calculation code is not duplicated for each grant head. This will further simplify the conversion to atomic grant head manipulation. Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx> --- fs/xfs/linux-2.6/xfs_trace.h | 20 ++--- fs/xfs/xfs_log.c | 225 ++++++++++++++++++++++++------------------ fs/xfs/xfs_log_priv.h | 6 +- fs/xfs/xfs_log_recover.c | 8 +- 4 files changed, 144 insertions(+), 115 deletions(-) diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index 0884f93..05a4bb9 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h @@ -768,10 +768,8 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class, __field(unsigned int, flags) __field(void *, reserveq) __field(void *, writeq) - __field(int, grant_reserve_cycle) - __field(int, grant_reserve_bytes) - __field(int, grant_write_cycle) - __field(int, grant_write_bytes) + __field(xfs_lsn_t, grant_reserve_lsn) + __field(xfs_lsn_t, grant_write_lsn) __field(int, curr_cycle) __field(int, curr_block) __field(xfs_lsn_t, tail_lsn) @@ -786,10 +784,8 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class, __entry->flags = tic->t_flags; __entry->reserveq = log->l_reserveq.next; __entry->writeq = log->l_writeq.next; - __entry->grant_reserve_cycle = log->l_grant_reserve_cycle; - __entry->grant_reserve_bytes = log->l_grant_reserve_bytes; - __entry->grant_write_cycle = log->l_grant_write_cycle; - __entry->grant_write_bytes = log->l_grant_write_bytes; + __entry->grant_reserve_lsn = log->l_grant_reserve_lsn; + __entry->grant_write_lsn = log->l_grant_write_lsn; __entry->curr_cycle = log->l_curr_cycle; __entry->curr_block = log->l_curr_block; __entry->tail_lsn = log->l_tail_lsn; @@ -809,10 +805,10 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class, __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS), __entry->reserveq, __entry->writeq, - __entry->grant_reserve_cycle, - __entry->grant_reserve_bytes, - __entry->grant_write_cycle, - __entry->grant_write_bytes, + CYCLE_LSN(__entry->grant_reserve_lsn), + BLOCK_LSN(__entry->grant_reserve_lsn), + CYCLE_LSN(__entry->grant_write_lsn), + BLOCK_LSN(__entry->grant_write_lsn), __entry->curr_cycle, __entry->curr_block, CYCLE_LSN(__entry->tail_lsn), diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 399e559..69a9563 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -47,7 +47,8 @@ STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp, xfs_buftarg_t *log_target, xfs_daddr_t blk_offset, int num_bblks); -STATIC int xlog_space_left(xlog_t *log, int cycle, int bytes); +STATIC int xlog_space_left(int logsize, xfs_lsn_t tail_lsn, + xfs_lsn_t head); STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog); STATIC void xlog_dealloc_log(xlog_t *log); @@ -81,7 +82,8 @@ STATIC void xlog_ungrant_log_space(xlog_t *log, #if defined(DEBUG) STATIC void xlog_verify_dest_ptr(xlog_t *log, char *ptr); -STATIC void xlog_verify_grant_head(xlog_t *log, int equals); +STATIC void xlog_verify_grant_head(struct log *log, int equals); +STATIC void xlog_verify_grant_tail(struct log *log); STATIC void xlog_verify_iclog(xlog_t *log, xlog_in_core_t *iclog, int count, boolean_t syncing); STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog, @@ -89,51 +91,82 @@ STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog, #else #define xlog_verify_dest_ptr(a,b) #define xlog_verify_grant_head(a,b) +#define xlog_verify_grant_tail(a) #define xlog_verify_iclog(a,b,c,d) #define xlog_verify_tail_lsn(a,b,c) #endif STATIC int xlog_iclogs_empty(xlog_t *log); +/* + * Grant space calculations use 64 bit atomic variables to store the current reserve + * and write grant markers. However, these are really two 32 bit numbers which + * need to be cracked out of the 64 bit variable, modified, recombined and then + * written back into the 64 bit atomic variable. And it has to be done + * atomically (i.e. without locks). + * + * The upper 32 bits is the log cycle, just like a xfs_lsn_t. The lower 32 bits + * is the byte offset into the log for the marker. Unlike the xfs_lsn_t, this + * is held in bytes rather than basic blocks, even though it uses the + * BLOCK_LSN() macro to extract it. + */ static void -xlog_grant_sub_space(struct log *log, int bytes) +__xlog_grant_sub_space( + xfs_lsn_t *head, + int bytes, + int logsize) { - log->l_grant_write_bytes -= bytes; - if (log->l_grant_write_bytes < 0) { - log->l_grant_write_bytes += log->l_logsize; - log->l_grant_write_cycle--; - } + int cycle, space; - log->l_grant_reserve_bytes -= bytes; - if ((log)->l_grant_reserve_bytes < 0) { - log->l_grant_reserve_bytes += log->l_logsize; - log->l_grant_reserve_cycle--; + cycle = CYCLE_LSN(*head); + space = BLOCK_LSN(*head); + space -= bytes; + if (space < 0) { + cycle--; + space += logsize; } - + *head = xlog_assign_lsn(cycle, space); } static void -xlog_grant_add_space_write(struct log *log, int bytes) +__xlog_grant_add_space( + xfs_lsn_t *head, + int bytes, + int logsize) { - int tmp = log->l_logsize - log->l_grant_write_bytes; + int cycle, space, tmp; + + cycle = CYCLE_LSN(*head); + space = BLOCK_LSN(*head); + tmp = logsize - space; if (tmp > bytes) - log->l_grant_write_bytes += bytes; + space += bytes; else { - log->l_grant_write_cycle++; - log->l_grant_write_bytes = bytes - tmp; + cycle++; + space = bytes - tmp; } + *head = xlog_assign_lsn(cycle, space); +} + +static inline void +xlog_grant_sub_space(struct log *log, int bytes) +{ + __xlog_grant_sub_space(&log->l_grant_write_lsn, bytes, log->l_logsize); + __xlog_grant_sub_space(&log->l_grant_reserve_lsn, bytes, + log->l_logsize); +} + +static inline void +xlog_grant_add_space_write(struct log *log, int bytes) +{ + __xlog_grant_add_space(&log->l_grant_write_lsn, bytes, log->l_logsize); } static void xlog_grant_add_space_reserve(struct log *log, int bytes) { - int tmp = log->l_logsize - log->l_grant_reserve_bytes; - if (tmp > bytes) - log->l_grant_reserve_bytes += bytes; - else { - log->l_grant_reserve_cycle++; - log->l_grant_reserve_bytes = bytes - tmp; - } + __xlog_grant_add_space(&log->l_grant_reserve_lsn, bytes, + log->l_logsize); } static inline void @@ -671,7 +704,7 @@ xfs_log_move_tail(xfs_mount_t *mp, { xlog_ticket_t *tic; xlog_t *log = mp->m_log; - int need_bytes, free_bytes, cycle, bytes; + int need_bytes, free_bytes; if (XLOG_FORCED_SHUTDOWN(log)) return; @@ -697,9 +730,8 @@ xfs_log_move_tail(xfs_mount_t *mp, if (log->l_flags & XLOG_ACTIVE_RECOVERY) panic("Recovery problem"); #endif - cycle = log->l_grant_write_cycle; - bytes = log->l_grant_write_bytes; - free_bytes = xlog_space_left(log, cycle, bytes); + free_bytes = xlog_space_left(log->l_logsize, log->l_tail_lsn, + log->l_grant_write_lsn); list_for_each_entry(tic, &log->l_writeq, t_queue) { ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV); @@ -716,9 +748,8 @@ xfs_log_move_tail(xfs_mount_t *mp, if (log->l_flags & XLOG_ACTIVE_RECOVERY) panic("Recovery problem"); #endif - cycle = log->l_grant_reserve_cycle; - bytes = log->l_grant_reserve_bytes; - free_bytes = xlog_space_left(log, cycle, bytes); + free_bytes = xlog_space_left(log->l_logsize, log->l_tail_lsn, + log->l_grant_reserve_lsn); list_for_each_entry(tic, &log->l_reserveq, t_queue) { if (tic->t_flags & XLOG_TIC_PERM_RESERV) need_bytes = tic->t_unit_res*tic->t_cnt; @@ -831,37 +862,40 @@ xlog_assign_tail_lsn(xfs_mount_t *mp) * result is that we return the size of the log as the amount of space left. */ STATIC int -xlog_space_left(xlog_t *log, int cycle, int bytes) +xlog_space_left( + int logsize, + xfs_lsn_t tail_lsn, + xfs_lsn_t head) { - int free_bytes; - int tail_bytes; - int tail_cycle; - - tail_bytes = BBTOB(BLOCK_LSN(log->l_tail_lsn)); - tail_cycle = CYCLE_LSN(log->l_tail_lsn); - if ((tail_cycle == cycle) && (bytes >= tail_bytes)) { - free_bytes = log->l_logsize - (bytes - tail_bytes); - } else if ((tail_cycle + 1) < cycle) { + int free_bytes; + int tail_bytes = BBTOB(BLOCK_LSN(tail_lsn)); + int tail_cycle = CYCLE_LSN(tail_lsn); + int head_cycle = CYCLE_LSN(head); + int head_bytes = BLOCK_LSN(head); + + if ((tail_cycle == head_cycle) && (head_bytes >= tail_bytes)) { + free_bytes = logsize - (head_bytes - tail_bytes); + } else if ((tail_cycle + 1) < head_cycle) { return 0; - } else if (tail_cycle < cycle) { - ASSERT(tail_cycle == (cycle - 1)); - free_bytes = tail_bytes - bytes; + } else if (tail_cycle < head_cycle) { + ASSERT(tail_cycle == (head_cycle - 1)); + free_bytes = tail_bytes - head_bytes; } else { /* * The reservation head is behind the tail. * In this case we just want to return the size of the * log as the amount of space left. */ - xfs_fs_cmn_err(CE_ALERT, log->l_mp, + cmn_err(CE_ALERT, "xlog_space_left: head behind tail\n" " tail_cycle = %d, tail_bytes = %d\n" " GH cycle = %d, GH bytes = %d", - tail_cycle, tail_bytes, cycle, bytes); + tail_cycle, tail_bytes, head_cycle, head_bytes); ASSERT(0); - free_bytes = log->l_logsize; + free_bytes = logsize; } return free_bytes; -} /* xlog_space_left */ +} /* @@ -1018,8 +1052,8 @@ xlog_alloc_log(xfs_mount_t *mp, /* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */ log->l_last_sync_lsn = log->l_tail_lsn; log->l_curr_cycle = 1; /* 0 is bad since this is initial value */ - log->l_grant_reserve_cycle = 1; - log->l_grant_write_cycle = 1; + log->l_grant_reserve_lsn = xlog_assign_lsn(1, 0); + log->l_grant_write_lsn = xlog_assign_lsn(1, 0); INIT_LIST_HEAD(&log->l_reserveq); INIT_LIST_HEAD(&log->l_writeq); @@ -1207,9 +1241,8 @@ xlog_grant_push_ail(xfs_mount_t *mp, ASSERT(BTOBB(need_bytes) < log->l_logBBsize); spin_lock(&log->l_grant_lock); - free_bytes = xlog_space_left(log, - log->l_grant_reserve_cycle, - log->l_grant_reserve_bytes); + free_bytes = xlog_space_left(log->l_logsize, log->l_tail_lsn, + log->l_grant_reserve_lsn); tail_lsn = log->l_tail_lsn; free_blocks = BTOBBT(free_bytes); @@ -2504,9 +2537,8 @@ xlog_grant_log_space( { int free_bytes; int need_bytes; -#ifdef DEBUG - xfs_lsn_t tail_lsn; +#ifdef DEBUG if (log->l_flags & XLOG_ACTIVE_RECOVERY) panic("grant Recovery problem"); #endif @@ -2524,8 +2556,8 @@ redo: if (XLOG_FORCED_SHUTDOWN(log)) goto error_return; - free_bytes = xlog_space_left(log, log->l_grant_reserve_cycle, - log->l_grant_reserve_bytes); + free_bytes = xlog_space_left(log->l_logsize, log->l_tail_lsn, + log->l_grant_reserve_lsn); /* * If there is not enough space or there is queued waiter and we * are not already on the queue, we need to wait. @@ -2552,20 +2584,9 @@ redo: /* we've got enough space */ xlog_grant_add_space(log, need_bytes); -#ifdef DEBUG - tail_lsn = log->l_tail_lsn; - /* - * Check to make sure the grant write head didn't just over lap the - * tail. If the cycles are the same, we can't be overlapping. - * Otherwise, make sure that the cycles differ by exactly one and - * check the byte count. - */ - if (CYCLE_LSN(tail_lsn) != log->l_grant_write_cycle) { - ASSERT(log->l_grant_write_cycle-1 == CYCLE_LSN(tail_lsn)); - ASSERT(log->l_grant_write_bytes <= BBTOB(BLOCK_LSN(tail_lsn))); - } -#endif + trace_xfs_log_grant_exit(log, tic); + xlog_verify_grant_tail(log); xlog_verify_grant_head(log, 1); spin_unlock(&log->l_grant_lock); return 0; @@ -2596,9 +2617,6 @@ xlog_regrant_write_log_space( { int free_bytes; int need_bytes; -#ifdef DEBUG - xfs_lsn_t tail_lsn; -#endif tic->t_curr_res = tic->t_unit_res; xlog_tic_reset_res(tic); @@ -2620,8 +2638,8 @@ redo: if (XLOG_FORCED_SHUTDOWN(log)) goto error_return; - free_bytes = xlog_space_left(log, log->l_grant_write_cycle, - log->l_grant_write_bytes); + free_bytes = xlog_space_left(log->l_logsize, log->l_tail_lsn, + log->l_grant_write_lsn); /* * If there is not enough space or there is queued waiter and we * are not already on the queue, we need to wait. @@ -2668,16 +2686,9 @@ redo: /* we've got enough space */ xlog_grant_add_space_write(log, need_bytes); -#ifdef DEBUG - tail_lsn = log->l_tail_lsn; - if (CYCLE_LSN(tail_lsn) != log->l_grant_write_cycle) { - ASSERT(log->l_grant_write_cycle-1 == CYCLE_LSN(tail_lsn)); - ASSERT(log->l_grant_write_bytes <= BBTOB(BLOCK_LSN(tail_lsn))); - } -#endif trace_xfs_log_regrant_write_exit(log, tic); - + xlog_verify_grant_tail(log); xlog_verify_grant_head(log, 1); spin_unlock(&log->l_grant_lock); return 0; @@ -3401,18 +3412,42 @@ xlog_verify_dest_ptr( } STATIC void -xlog_verify_grant_head(xlog_t *log, int equals) +xlog_verify_grant_head( + struct log *log, + int equals) { - if (log->l_grant_reserve_cycle == log->l_grant_write_cycle) { - if (equals) - ASSERT(log->l_grant_reserve_bytes >= log->l_grant_write_bytes); - else - ASSERT(log->l_grant_reserve_bytes > log->l_grant_write_bytes); - } else { - ASSERT(log->l_grant_reserve_cycle-1 == log->l_grant_write_cycle); - ASSERT(log->l_grant_write_bytes >= log->l_grant_reserve_bytes); - } -} /* xlog_verify_grant_head */ + xfs_lsn_t reserve = log->l_grant_reserve_lsn; + xfs_lsn_t write = log->l_grant_write_lsn; + + if (CYCLE_LSN(reserve) == CYCLE_LSN(write)) { + if (equals) + ASSERT(BLOCK_LSN(reserve) >= BLOCK_LSN(write)); + else + ASSERT(BLOCK_LSN(reserve) > BLOCK_LSN(write)); + } else { + ASSERT(CYCLE_LSN(reserve) - 1 == CYCLE_LSN(write)); + ASSERT(BLOCK_LSN(write) >= BLOCK_LSN(reserve)); + } +} + +STATIC void +xlog_verify_grant_tail( + struct log *log) +{ + xfs_lsn_t tail_lsn = log->l_tail_lsn; + xfs_lsn_t write_lsn = log->l_grant_write_lsn; + + /* + * Check to make sure the grant write head didn't just over lap the + * tail. If the cycles are the same, we can't be overlapping. + * Otherwise, make sure that the cycles differ by exactly one and + * check the byte count. + */ + if (CYCLE_LSN(tail_lsn) != CYCLE_LSN(write_lsn)) { + ASSERT(CYCLE_LSN(write_lsn) - 1 == CYCLE_LSN(tail_lsn)); + ASSERT(BLOCK_LSN(write_lsn) <= BBTOB(BLOCK_LSN(tail_lsn))); + } +} /* check if it will fit */ STATIC void diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index d45fe2d..4ebaf07 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -519,10 +519,8 @@ typedef struct log { spinlock_t l_grant_lock ____cacheline_aligned_in_smp; struct list_head l_reserveq; struct list_head l_writeq; - int l_grant_reserve_cycle; - int l_grant_reserve_bytes; - int l_grant_write_cycle; - int l_grant_write_bytes; + xfs_lsn_t l_grant_reserve_lsn; + xfs_lsn_t l_grant_write_lsn; /* The following field are used for debugging; need to hold icloglock */ #ifdef DEBUG diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index baad94a..d8c62f0 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -927,10 +927,10 @@ xlog_find_tail( log->l_curr_cycle++; log->l_tail_lsn = be64_to_cpu(rhead->h_tail_lsn); log->l_last_sync_lsn = be64_to_cpu(rhead->h_lsn); - log->l_grant_reserve_cycle = log->l_curr_cycle; - log->l_grant_reserve_bytes = BBTOB(log->l_curr_block); - log->l_grant_write_cycle = log->l_curr_cycle; - log->l_grant_write_bytes = BBTOB(log->l_curr_block); + log->l_grant_reserve_lsn = xlog_assign_lsn(log->l_curr_cycle, + BBTOB(log->l_curr_block)); + log->l_grant_write_lsn = xlog_assign_lsn(log->l_curr_cycle, + BBTOB(log->l_curr_block)); /* * Look for unmount record. If we find it, then we know there -- 1.7.2.3 _______________________________________________ xfs mailing list xfs@xxxxxxxxxxx http://oss.sgi.com/mailman/listinfo/xfs