Hi Dave, During working on this stuff, I found some another noticiable places to mention. Hope it of some help... On Tue, Jun 23, 2020 at 07:50:14PM +1000, Dave Chinner wrote: ... > /* > - * This is called when the inode's link count has gone to 0 or we are creating > - * a tmpfile via O_TMPFILE. The inode @ip must have nlink == 0. > - * > - * We place the on-disk inode on a list in the AGI. It will be pulled from this > - * list when the inode is freed. > + * Always insert at the head, so we only have to do a next inode lookup to > + * update it's prev pointer. The AGI bucket will point at the one we are > + * inserting. > */ > -STATIC int > -xfs_iunlink( > +static int > +xfs_iunlink_insert_inode( > struct xfs_trans *tp, > + struct xfs_buf *agibp, > struct xfs_inode *ip) > { > struct xfs_mount *mp = tp->t_mountp; > - struct xfs_agi *agi; > - struct xfs_buf *agibp; > - xfs_agino_t next_agino; > - xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, ip->i_ino); > + struct xfs_agi *agi = agibp->b_addr; > + xfs_agino_t agno = XFS_INO_TO_AGNO(mp, ip->i_ino); unnecessary modification here... (use xfs_agnumber_t instead) > -/* Return the imap, dinode pointer, and buffer for an inode. */ > -STATIC int > -xfs_iunlink_map_ino( > +/* > + * Remove can be from anywhere in the list, so we have to do two adjacent inode > + * lookups here so we can update list pointers. We may be at the head or the > + * tail of the list, so we have to handle those cases as well. > + */ > +static int > +xfs_iunlink_remove_inode( > struct xfs_trans *tp, > - xfs_agnumber_t agno, > - xfs_agino_t agino, > - struct xfs_imap *imap, > - struct xfs_dinode **dipp, > - struct xfs_buf **bpp) > + struct xfs_buf *agibp, > + struct xfs_inode *ip) > { > struct xfs_mount *mp = tp->t_mountp; > + struct xfs_agi *agi = agibp->b_addr; > + xfs_agino_t agno = XFS_INO_TO_AGNO(mp, ip->i_ino); same here > /* > @@ -2762,56 +2735,107 @@ xlog_recover_process_one_iunlink( > * scheduled on this CPU to ensure other scheduled work can run without undue > * latency. > */ > -STATIC void > -xlog_recover_process_iunlinks( > - struct xlog *log) > +static int > +xlog_recover_iunlink_ag( > + struct xfs_mount *mp, > + xfs_agnumber_t agno) > { > - xfs_mount_t *mp; > - xfs_agnumber_t agno; > - xfs_agi_t *agi; > - xfs_buf_t *agibp; > - xfs_agino_t agino; > - int bucket; > - int error; > + struct xfs_agi *agi; > + struct xfs_buf *agibp; > + int bucket; > + int error; > > - mp = log->l_mp; > + /* > + * Find the agi for this ag. > + */ > + error = xfs_read_agi(mp, NULL, agno, &agibp); > + if (error) { > > - for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { > /* > - * Find the agi for this ag. > + * AGI is b0rked. Don't process it. > + * > + * We should probably mark the filesystem as corrupt after we've > + * recovered all the ag's we can.... > */ > - error = xfs_read_agi(mp, NULL, agno, &agibp); > + return 0; > + } > + > + /* > + * Unlock the buffer so that it can be acquired in the normal course of > + * the transaction to truncate and free each inode. Because we are not > + * racing with anyone else here for the AGI buffer, we don't even need > + * to hold it locked to read the initial unlinked bucket entries out of > + * the buffer. We keep buffer reference though, so that it stays pinned > + * in memory while we need the buffer. > + */ > + agi = agibp->b_addr; > + xfs_buf_unlock(agibp); > + > + /* > + * The unlinked inode list is maintained on incore inodes as a double > + * linked list. We don't have any of that state in memory, so we have to > + * create it as we go. This is simple as we are only removing from the > + * head of the list and that means we only need to pull the current > + * inode in and the next inode. Inodes are unlinked when their > + * reference count goes to zero, so we can overlap the xfs_iget() and > + * xfs_irele() calls so we always have the first two inodes on the list > + * in memory. Hence we can fake up the necessary in memory state for the > + * unlink to "just work". > + */ > + for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) { > + struct xfs_inode *ip, *prev_ip = NULL; > + xfs_agino_t agino, prev_agino = NULLAGINO; > + > + agino = be32_to_cpu(agi->agi_unlinked[bucket]); > + while (agino != NULLAGINO) { > + ip = xlog_recover_get_one_iunlink(mp, agno, agino, > + bucket); > + if (!ip) { > + /* > + * something busted, but still got to release > + * prev_ip, so make it look like it's at the end > + * of the list before it gets released. > + */ > + error = -EFSCORRUPTED; > + if (prev_ip) > + prev_ip->i_next_unlinked = NULLAGINO; > + break; > + } > + if (prev_ip) { > + ip->i_prev_unlinked = prev_agino; > + xfs_irele(prev_ip); > + } (little confused about this, why not xfs_irele the current ip and move it after agino = ip->i_next_unlinked; ....) And some comments about "[PATCH 4/4] xfs: introduce inode unlink log item. (no need to raise another thread about this since I'm not fully reviewing that..) > @@ -2185,6 +2136,7 @@ xfs_iunlink( > xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, ip->i_ino); > int error; > > + ASSERT(ip->i_next_unlinked == NULLAGINO); > ASSERT(VFS_I(ip)->i_nlink == 0); > ASSERT(VFS_I(ip)->i_mode != 0); > trace_xfs_iunlink(ip); this ASSERT is not necessary since some unreasonable numbers could be loaded from disk for crafted images... (seems only for debugging use...) Thanks, Gao Xiang