From: "Darrick J. Wong" <djwong@xxxxxxxxxx> [ Upstream commit 83771c50e42b92de6740a63e152c96c052d37736 ] The previous patch to reload unrecovered unlinked inodes when adding a newly created inode to the unlinked list is missing a key piece of functionality. It doesn't handle the case that someone calls xfs_iget on an inode that is not the last item in the incore list. For example, if at mount time the ondisk iunlink bucket looks like this: AGI -> 7 -> 22 -> 3 -> NULL None of these three inodes are cached in memory. Now let's say that someone tries to open inode 3 by handle. We need to walk the list to make sure that inodes 7 and 22 get loaded cold, and that the i_prev_unlinked of inode 3 gets set to 22. Signed-off-by: Darrick J. Wong <djwong@xxxxxxxxxx> Signed-off-by: Leah Rumancik <leah.rumancik@xxxxxxxxx> Acked-by: Chandan Babu R <chandanbabu@xxxxxxxxxx> Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> --- fs/xfs/xfs_export.c | 6 +++ fs/xfs/xfs_inode.c | 100 ++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_inode.h | 9 ++++ fs/xfs/xfs_itable.c | 9 ++++ fs/xfs/xfs_trace.h | 20 ++++++++++ 5 files changed, 144 insertions(+) --- a/fs/xfs/xfs_export.c +++ b/fs/xfs/xfs_export.c @@ -146,6 +146,12 @@ xfs_nfs_get_inode( return ERR_PTR(error); } + error = xfs_inode_reload_unlinked(ip); + if (error) { + xfs_irele(ip); + return ERR_PTR(error); + } + if (VFS_I(ip)->i_generation != generation) { xfs_irele(ip); return ERR_PTR(-ESTALE); --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -3622,3 +3622,103 @@ xfs_iunlock2_io_mmap( if (ip1 != ip2) inode_unlock(VFS_I(ip1)); } + +/* + * Reload the incore inode list for this inode. Caller should ensure that + * the link count cannot change, either by taking ILOCK_SHARED or otherwise + * preventing other threads from executing. + */ +int +xfs_inode_reload_unlinked_bucket( + struct xfs_trans *tp, + struct xfs_inode *ip) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_buf *agibp; + struct xfs_agi *agi; + struct xfs_perag *pag; + xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, ip->i_ino); + xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino); + xfs_agino_t prev_agino, next_agino; + unsigned int bucket; + bool foundit = false; + int error; + + /* Grab the first inode in the list */ + pag = xfs_perag_get(mp, agno); + error = xfs_ialloc_read_agi(pag, tp, &agibp); + xfs_perag_put(pag); + if (error) + return error; + + bucket = agino % XFS_AGI_UNLINKED_BUCKETS; + agi = agibp->b_addr; + + trace_xfs_inode_reload_unlinked_bucket(ip); + + xfs_info_ratelimited(mp, + "Found unrecovered unlinked inode 0x%x in AG 0x%x. Initiating list recovery.", + agino, agno); + + prev_agino = NULLAGINO; + next_agino = be32_to_cpu(agi->agi_unlinked[bucket]); + while (next_agino != NULLAGINO) { + struct xfs_inode *next_ip = NULL; + + if (next_agino == agino) { + /* Found this inode, set its backlink. */ + next_ip = ip; + next_ip->i_prev_unlinked = prev_agino; + foundit = true; + } + if (!next_ip) { + /* Inode already in memory. */ + next_ip = xfs_iunlink_lookup(pag, next_agino); + } + if (!next_ip) { + /* Inode not in memory, reload. */ + error = xfs_iunlink_reload_next(tp, agibp, prev_agino, + next_agino); + if (error) + break; + + next_ip = xfs_iunlink_lookup(pag, next_agino); + } + if (!next_ip) { + /* No incore inode at all? We reloaded it... */ + ASSERT(next_ip != NULL); + error = -EFSCORRUPTED; + break; + } + + prev_agino = next_agino; + next_agino = next_ip->i_next_unlinked; + } + + xfs_trans_brelse(tp, agibp); + /* Should have found this inode somewhere in the iunlinked bucket. */ + if (!error && !foundit) + error = -EFSCORRUPTED; + return error; +} + +/* Decide if this inode is missing its unlinked list and reload it. */ +int +xfs_inode_reload_unlinked( + struct xfs_inode *ip) +{ + struct xfs_trans *tp; + int error; + + error = xfs_trans_alloc_empty(ip->i_mount, &tp); + if (error) + return error; + + xfs_ilock(ip, XFS_ILOCK_SHARED); + if (xfs_inode_unlinked_incomplete(ip)) + error = xfs_inode_reload_unlinked_bucket(tp, ip); + xfs_iunlock(ip, XFS_ILOCK_SHARED); + xfs_trans_cancel(tp); + + return error; +} --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -593,4 +593,13 @@ void xfs_end_io(struct work_struct *work int xfs_ilock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2); void xfs_iunlock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2); +static inline bool +xfs_inode_unlinked_incomplete( + struct xfs_inode *ip) +{ + return VFS_I(ip)->i_nlink == 0 && !xfs_inode_on_unlinked_list(ip); +} +int xfs_inode_reload_unlinked_bucket(struct xfs_trans *tp, struct xfs_inode *ip); +int xfs_inode_reload_unlinked(struct xfs_inode *ip); + #endif /* __XFS_INODE_H__ */ --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -80,6 +80,15 @@ xfs_bulkstat_one_int( if (error) goto out; + if (xfs_inode_unlinked_incomplete(ip)) { + error = xfs_inode_reload_unlinked_bucket(tp, ip); + if (error) { + xfs_iunlock(ip, XFS_ILOCK_SHARED); + xfs_irele(ip); + return error; + } + } + ASSERT(ip != NULL); ASSERT(ip->i_imap.im_blkno != 0); inode = VFS_I(ip); --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -3704,6 +3704,26 @@ TRACE_EVENT(xfs_iunlink_reload_next, __entry->next_agino) ); +TRACE_EVENT(xfs_inode_reload_unlinked_bucket, + TP_PROTO(struct xfs_inode *ip), + TP_ARGS(ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(xfs_agino_t, agino) + ), + TP_fast_assign( + __entry->dev = ip->i_mount->m_super->s_dev; + __entry->agno = XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino); + __entry->agino = XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino); + ), + TP_printk("dev %d:%d agno 0x%x agino 0x%x bucket %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, + __entry->agino, + __entry->agino % XFS_AGI_UNLINKED_BUCKETS) +); + DECLARE_EVENT_CLASS(xfs_ag_inode_class, TP_PROTO(struct xfs_inode *ip), TP_ARGS(ip), Patches currently in stable-queue which might be from leah.rumancik@xxxxxxxxx are queue-6.1/xfs-fix-the-calculation-for-end-and-length.patch queue-6.1/xfs-journal-geometry-is-not-properly-bounds-checked.patch queue-6.1/xfs-use-i_prev_unlinked-to-distinguish-inodes-that-are-not-on-the-unlinked-list.patch queue-6.1/xfs-load-uncached-unlinked-inodes-into-memory-on-demand.patch queue-6.1/xfs-fix-extent-busy-updating.patch queue-6.1/xfs-reload-entire-unlinked-bucket-lists.patch queue-6.1/xfs-block-reservation-too-large-for-minleft-allocation.patch queue-6.1/xfs-don-t-use-bmbt-btree-split-workers-for-io-completion.patch queue-6.1/xfs-prefer-free-inodes-at-enospc-over-chunk-allocation.patch queue-6.1/xfs-quotacheck-failure-can-race-with-background-inode-inactivation.patch queue-6.1/xfs-dquot-shrinker-doesn-t-check-for-xfs_dqflag_freeing.patch queue-6.1/xfs-buffer-pins-need-to-hold-a-buffer-reference.patch queue-6.1/xfs-fix-agf-vs-inode-cluster-buffer-deadlock.patch queue-6.1/xfs-fix-bug_on-in-xfs_getbmap.patch queue-6.1/xfs-remove-warn-when-dquot-cache-insertion-fails.patch queue-6.1/xfs-correct-calculation-for-agend-and-blockcount.patch queue-6.1/xfs-defered-work-could-create-precommits.patch queue-6.1/xfs-fix-low-space-alloc-deadlock.patch queue-6.1/xfs-fix-unlink-vs-cluster-buffer-instantiation-race.patch queue-6.1/xfs-set-bnobt-cntbt-numrecs-correctly-when-formatting-new-ags.patch queue-6.1/xfs-fix-negative-array-access-in-xfs_getbmap.patch queue-6.1/xfs-collect-errors-from-inodegc-for-unlinked-inode-recovery.patch queue-6.1/xfs-fix-deadlock-on-xfs_inodegc_worker.patch queue-6.1/xfs-fix-reloading-entire-unlinked-bucket-lists.patch queue-6.1/xfs-fix-uninitialized-variable-access.patch queue-6.1/xfs-fix-ag-count-overflow-during-growfs.patch queue-6.1/xfs-make-inode-unlinked-bucket-recovery-work-with-quotacheck.patch