Currently, AGI buffer is always touched since xfs_iunlink() adds unlinked inodes from head unconditionally, but since we now have the only one unlinked list and if we insert unlinked inodes from tail instead and there're more than 1 inodes, AGI buffer modification could be avoided. In order to do that, let's keep track of the tail of unlinked inode into the perag all the time in order for xfs_iunlink() to use. With this change, it shows that only 938 of 10000 operations modifies the head of unlinked list with the following workload: seq 1 10000 | xargs touch find . | xargs -n3 -P100 rm Signed-off-by: Gao Xiang <hsiangkao@xxxxxxxxxx> --- fs/xfs/xfs_inode.c | 120 ++++++++++++++++++++++++--------------- fs/xfs/xfs_log_recover.c | 5 ++ fs/xfs/xfs_mount.c | 1 + fs/xfs/xfs_mount.h | 3 + 4 files changed, 84 insertions(+), 45 deletions(-) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index d78aaa8ce252..3cfd84b76955 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1986,6 +1986,38 @@ xfs_iunlink_update_bucket( return 0; } +/* + * Lock the perag and take AGI lock if agi_unlinked is touched as well + * for xfs_iunlink_insert_inode(). As for the details of locking order, + * refer to the comments of xfs_iunlink_remove_lock(). + */ +static struct xfs_perag * +xfs_iunlink_insert_lock( + xfs_agino_t agno, + struct xfs_trans *tp, + struct xfs_inode *ip, + struct xfs_buf **agibpp) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_perag *pag; + int error; + + pag = xfs_perag_get(mp, agno); + mutex_lock(&pag->pag_unlinked_mutex); + + if (!pag->pag_unlinked_tail) { + mutex_unlock(&pag->pag_unlinked_mutex); + + error = xfs_read_agi(mp, tp, agno, agibpp); + if (error) { + xfs_perag_put(pag); + return ERR_PTR(error); + } + mutex_lock(&pag->pag_unlinked_mutex); + } + return pag; +} + /* * Always insert at the head, so we only have to do a next inode lookup to * update it's prev pointer. The AGI bucket will point at the one we are @@ -1993,50 +2025,47 @@ xfs_iunlink_update_bucket( */ static int xfs_iunlink_insert_inode( + struct xfs_perag *pag, struct xfs_trans *tp, struct xfs_buf *agibp, struct xfs_inode *ip) { struct xfs_mount *mp = tp->t_mountp; - struct xfs_agi *agi = agibp->b_addr; - xfs_agino_t agno = XFS_INO_TO_AGNO(mp, ip->i_ino); + struct xfs_inode *pip = pag->pag_unlinked_tail; xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino); xfs_agino_t next_agino; - /* - * Get the index into the agi hash table for the list this inode will - * go on. Make sure the pointer isn't garbage and that this inode - * isn't already on the list. - */ - next_agino = be32_to_cpu(agi->agi_unlinked[0]); - if (next_agino == agino || - !xfs_verify_agino_or_null(mp, agno, next_agino)) { - xfs_buf_mark_corrupt(agibp); - return -EFSCORRUPTED; - } - - ip->i_prev_unlinked = NULLAGINO; - ip->i_next_unlinked = next_agino; - if (ip->i_next_unlinked != NULLAGINO) { - struct xfs_inode *nip; - - nip = xfs_iunlink_lookup_next(agibp->b_pag, ip); - if (IS_ERR_OR_NULL(nip)) - return -EFSCORRUPTED; + if (!pip) { + xfs_agino_t agno = XFS_INO_TO_AGNO(mp, ip->i_ino); + struct xfs_agi *agi = agibp->b_addr; + int error; - if (nip->i_prev_unlinked != NULLAGINO) { - xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, - NULL, 0, __this_address); + ip->i_prev_unlinked = NULLAGINO; + /* + * Get the index into the agi hash table for the list this + * inode will go on. Make sure the pointer isn't garbage + * and that this inode isn't already on the list. + */ + next_agino = be32_to_cpu(agi->agi_unlinked[0]); + if (next_agino == agino || + !xfs_verify_agino_or_null(mp, agno, next_agino)) { + xfs_buf_mark_corrupt(agibp); return -EFSCORRUPTED; } - nip->i_prev_unlinked = agino; - /* update the on disk inode now */ - xfs_iunlink_log(tp, ip); + /* Point the head of the list to point to this inode. */ + error = xfs_iunlink_update_bucket(tp, agno, agibp, 0, agino); + if (error) + return error; + } else { + ip->i_prev_unlinked = XFS_INO_TO_AGINO(mp, pip->i_ino); + ASSERT(pip->i_next_unlinked == NULLAGINO); + pip->i_next_unlinked = agino; + xfs_iunlink_log(tp, pip); } - - /* Point the head of the list to point to this inode. */ - return xfs_iunlink_update_bucket(tp, agno, agibp, 0, agino); + ip->i_next_unlinked = NULLAGINO; + pag->pag_unlinked_tail = ip; + return 0; } /* @@ -2095,6 +2124,7 @@ xfs_iunlink_remove_inode( xfs_agino_t agno = XFS_INO_TO_AGNO(mp, ip->i_ino); xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino); xfs_agino_t next_agino = ip->i_next_unlinked; + struct xfs_inode *pip = NULL; int error; if (ip->i_prev_unlinked == NULLAGINO) { @@ -2122,8 +2152,6 @@ xfs_iunlink_remove_inode( return -EFSCORRUPTED; } else { /* lookup previous inode and update to point at next */ - struct xfs_inode *pip; - pip = xfs_iunlink_lookup_prev(pag, ip); if (IS_ERR_OR_NULL(pip)) return -EFSCORRUPTED; @@ -2139,8 +2167,12 @@ xfs_iunlink_remove_inode( xfs_iunlink_log(tp, pip); } - /* lookup the next inode and update to point at prev */ - if (ip->i_next_unlinked != NULLAGINO) { + if (next_agino == NULLAGINO) { + /* only care about the tail of bucket 0 for xfs_iunlink() */ + if (pag->pag_unlinked_tail == ip) + pag->pag_unlinked_tail = pip; + } else { + /* lookup the next inode and update to point at prev */ struct xfs_inode *nip; nip = xfs_iunlink_lookup_next(pag, ip); @@ -2188,23 +2220,21 @@ xfs_iunlink( struct xfs_trans *tp, struct xfs_inode *ip) { - struct xfs_mount *mp = tp->t_mountp; - struct xfs_buf *agibp; - xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, ip->i_ino); + struct xfs_buf *agibp = NULL; + struct xfs_perag *pag; int error; ASSERT(VFS_I(ip)->i_nlink == 0); ASSERT(VFS_I(ip)->i_mode != 0); trace_xfs_iunlink(ip); - /* Get the agi buffer first. It ensures lock ordering on the list. */ - error = xfs_read_agi(mp, tp, agno, &agibp); - if (error) - return error; + pag = xfs_iunlink_insert_lock(XFS_INO_TO_AGNO(tp->t_mountp, ip->i_ino), + tp, ip, &agibp); + if (IS_ERR(pag)) + return PTR_ERR(pag); - mutex_lock(&agibp->b_pag->pag_unlinked_mutex); - error = xfs_iunlink_insert_inode(tp, agibp, ip); - mutex_unlock(&agibp->b_pag->pag_unlinked_mutex); + error = xfs_iunlink_insert_inode(pag, tp, agibp, ip); + xfs_iunlink_unlock(pag); return error; } diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index d47eea31c165..30198aea7335 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2801,6 +2801,11 @@ xlog_recover_iunlink_ag( prev_ip->i_next_unlinked = NULLAGINO; break; } + + /* XXX: take pag_unlinked_mutex across the loop? */ + if (!bucket) + agibp->b_pag->pag_unlinked_tail = ip; + if (prev_ip) { ip->i_prev_unlinked = prev_agino; xfs_irele(prev_ip); diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index f94e14059e61..f1cd3e9c4da5 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -224,6 +224,7 @@ xfs_initialize_perag( first_initialised = index; spin_lock_init(&pag->pag_state_lock); mutex_init(&pag->pag_unlinked_mutex); + pag->pag_unlinked_tail = NULL; } index = xfs_set_inode_alloc(mp, agcount); diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 9a1d0f239fa4..934e7c373042 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -375,6 +375,9 @@ typedef struct xfs_perag { /* lock to protect unlinked inode list */ struct mutex pag_unlinked_mutex; + /* point to the inode tail of AGI unlinked bucket 0 */ + struct xfs_inode *pag_unlinked_tail; + /* * Unlinked inode information. This incore information reflects * data stored in the AGI, so callers must hold the AGI buffer lock -- 2.18.1