From: Dave Chinner <dchinner@xxxxxxxxxx> When we release a buffer from the cache, if it is dirty we wite it to disk then put the buffer on the free list for recycling. However, if the write fails (e.g. verifier failure due unfixed corruption) we effectively throw the buffer and it contents away. This causes all sorts of problems for xfs_repair as it then re-reads the buffer from disk on the next access and hence loses all the corrections that had previously been made, resulting in tripping over corruptions in code that assumes the corruptions have already been fixed/flagged in the buffer it receives. TO fix this, we have to make the cache aware that writes can fail, and keep the buffer in cache when writes fail. Hence we have to add an explicit error notification to the flush operation, and we need to do that before we release the buffer to the free list. This also means that we need to remove the writeback code from the release mechanisms, instead replacing them with assertions that the buffers are already clean. Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx> Reviewed-by: Brian Foster <bfoster@xxxxxxxxxx> Signed-off-by: Dave Chinner <david@xxxxxxxxxxxxx> --- include/cache.h | 2 +- libxfs/cache.c | 15 ++++++++++++++- libxfs/rdwr.c | 44 +++++++++++++++++++++++++++----------------- 3 files changed, 42 insertions(+), 19 deletions(-) diff --git a/include/cache.h b/include/cache.h index 0a84c69..87826be 100644 --- a/include/cache.h +++ b/include/cache.h @@ -64,7 +64,7 @@ typedef void *cache_key_t; typedef void (*cache_walk_t)(struct cache_node *); typedef struct cache_node * (*cache_node_alloc_t)(cache_key_t); -typedef void (*cache_node_flush_t)(struct cache_node *); +typedef int (*cache_node_flush_t)(struct cache_node *); typedef void (*cache_node_relse_t)(struct cache_node *); typedef unsigned int (*cache_node_hash_t)(cache_key_t, unsigned int, unsigned int); diff --git a/libxfs/cache.c b/libxfs/cache.c index 4753a1d..a48ebd9 100644 --- a/libxfs/cache.c +++ b/libxfs/cache.c @@ -219,6 +219,12 @@ cache_shake( if (pthread_mutex_trylock(&node->cn_mutex) != 0) continue; + /* can't release dirty objects */ + if (cache->flush(node)) { + pthread_mutex_unlock(&node->cn_mutex); + continue; + } + hash = cache->c_hash + node->cn_hashidx; if (pthread_mutex_trylock(&hash->ch_mutex) != 0) { pthread_mutex_unlock(&node->cn_mutex); @@ -311,6 +317,13 @@ __cache_node_purge( pthread_mutex_unlock(&node->cn_mutex); return count; } + + /* can't purge dirty objects */ + if (cache->flush(node)) { + pthread_mutex_unlock(&node->cn_mutex); + return 1; + } + mru = &cache->c_mrus[node->cn_priority]; pthread_mutex_lock(&mru->cm_mutex); list_del_init(&node->cn_mru); @@ -321,7 +334,7 @@ __cache_node_purge( pthread_mutex_destroy(&node->cn_mutex); list_del_init(&node->cn_hash); cache->relse(node); - return count; + return 0; } /* diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c index 7b23394..37162cd 100644 --- a/libxfs/rdwr.c +++ b/libxfs/rdwr.c @@ -659,6 +659,8 @@ __libxfs_getbufr(int blen) bp = kmem_zone_zalloc(xfs_buf_zone, 0); pthread_mutex_unlock(&xfs_buf_freelist.cm_mutex); bp->b_ops = NULL; + if (bp->b_flags & LIBXFS_B_DIRTY) + fprintf(stderr, "found dirty buffer (bulk) on free list!"); return bp; } @@ -1223,23 +1225,26 @@ libxfs_iomove(xfs_buf_t *bp, uint boff, int len, void *data, int flags) } static void -libxfs_brelse(struct cache_node *node) +libxfs_brelse( + struct cache_node *node) { - xfs_buf_t *bp = (xfs_buf_t *)node; + struct xfs_buf *bp = (struct xfs_buf *)node; - if (bp != NULL) { - if (bp->b_flags & LIBXFS_B_DIRTY) - libxfs_writebufr(bp); - pthread_mutex_lock(&xfs_buf_freelist.cm_mutex); - list_add(&bp->b_node.cn_mru, &xfs_buf_freelist.cm_list); - pthread_mutex_unlock(&xfs_buf_freelist.cm_mutex); - } + if (!bp) + return; + if (bp->b_flags & LIBXFS_B_DIRTY) + fprintf(stderr, + "releasing dirty buffer to free list!"); + + pthread_mutex_lock(&xfs_buf_freelist.cm_mutex); + list_add(&bp->b_node.cn_mru, &xfs_buf_freelist.cm_list); + pthread_mutex_unlock(&xfs_buf_freelist.cm_mutex); } static unsigned int libxfs_bulkrelse( - struct cache *cache, - struct list_head *list) + struct cache *cache, + struct list_head *list) { xfs_buf_t *bp; int count = 0; @@ -1249,7 +1254,8 @@ libxfs_bulkrelse( list_for_each_entry(bp, list, b_node.cn_mru) { if (bp->b_flags & LIBXFS_B_DIRTY) - libxfs_writebufr(bp); + fprintf(stderr, + "releasing dirty buffer (bulk) to free list!"); count++; } @@ -1260,18 +1266,22 @@ libxfs_bulkrelse( return count; } -static void -libxfs_bflush(struct cache_node *node) +static int +libxfs_bflush( + struct cache_node *node) { - xfs_buf_t *bp = (xfs_buf_t *)node; + struct xfs_buf *bp = (struct xfs_buf *)node; - if ((bp != NULL) && (bp->b_flags & LIBXFS_B_DIRTY)) - libxfs_writebufr(bp); + if (bp->b_flags & LIBXFS_B_DIRTY) + return libxfs_writebufr(bp); + return 0; } void libxfs_putbufr(xfs_buf_t *bp) { + if (bp->b_flags & LIBXFS_B_DIRTY) + libxfs_writebufr(bp); libxfs_brelse((struct cache_node *)bp); } -- 2.5.0 _______________________________________________ xfs mailing list xfs@xxxxxxxxxxx http://oss.sgi.com/mailman/listinfo/xfs