On a sufficiently corrupt filesystem walking the btree nodes might hit the same node node again, which currently will deadlock. Use a recursion counter to avoid the direct deadlock and let them normal loop detection (two bad nodes and out) do its work. This is how repair behaved before we added the lock when implementing buffer prefetching. Reported-by: Arkadiusz Mi??kiewicz <arekm@xxxxxxxx> Tested-by: Arkadiusz Mi??kiewicz <arekm@xxxxxxxx> Signed-off-by: Christoph Hellwig <hch@xxxxxx> Index: xfsprogs-dev/include/libxfs.h =================================================================== --- xfsprogs-dev.orig/include/libxfs.h 2011-11-22 22:28:23.000000000 +0000 +++ xfsprogs-dev/include/libxfs.h 2011-11-22 22:34:27.000000000 +0000 @@ -226,6 +226,8 @@ typedef struct xfs_buf { unsigned b_bcount; dev_t b_dev; pthread_mutex_t b_lock; + pthread_t b_holder; + unsigned int b_recur; void *b_fsprivate; void *b_fsprivate2; void *b_fsprivate3; Index: xfsprogs-dev/libxfs/rdwr.c =================================================================== --- xfsprogs-dev.orig/libxfs/rdwr.c 2011-11-22 22:28:23.000000000 +0000 +++ xfsprogs-dev/libxfs/rdwr.c 2011-11-22 22:40:01.000000000 +0000 @@ -342,6 +342,8 @@ libxfs_initbuf(xfs_buf_t *bp, dev_t devi list_head_init(&bp->b_lock_list); #endif pthread_mutex_init(&bp->b_lock, NULL); + bp->b_holder = 0; + bp->b_recur = 0; } xfs_buf_t * @@ -410,18 +412,24 @@ libxfs_getbuf_flags(dev_t device, xfs_da return NULL; if (use_xfs_buf_lock) { - if (flags & LIBXFS_GETBUF_TRYLOCK) { - int ret; + int ret; - ret = pthread_mutex_trylock(&bp->b_lock); - if (ret) { - ASSERT(ret == EAGAIN); - cache_node_put(libxfs_bcache, (struct cache_node *)bp); - return NULL; + ret = pthread_mutex_trylock(&bp->b_lock); + if (ret) { + ASSERT(ret == EAGAIN); + if (flags & LIBXFS_GETBUF_TRYLOCK) + goto out_put; + + if (pthread_equal(bp->b_holder, pthread_self())) { + fprintf(stderr, + _("recursive buffer locking detected\n")); + bp->b_recur++; + } else { + pthread_mutex_lock(&bp->b_lock); } - } else { - pthread_mutex_lock(&bp->b_lock); } + + bp->b_holder = pthread_self(); } cache_node_set_priority(libxfs_bcache, (struct cache_node *)bp, @@ -440,6 +448,9 @@ libxfs_getbuf_flags(dev_t device, xfs_da #endif return bp; +out_put: + cache_node_put(libxfs_bcache, (struct cache_node *)bp); + return NULL; } struct xfs_buf * @@ -458,8 +469,14 @@ libxfs_putbuf(xfs_buf_t *bp) list_del_init(&bp->b_lock_list); pthread_mutex_unlock(&libxfs_bcache->c_mutex); #endif - if (use_xfs_buf_lock) - pthread_mutex_unlock(&bp->b_lock); + if (use_xfs_buf_lock) { + if (bp->b_recur) { + bp->b_recur--; + } else { + bp->b_holder = 0; + pthread_mutex_unlock(&bp->b_lock); + } + } cache_node_put(libxfs_bcache, (struct cache_node *)bp); } _______________________________________________ xfs mailing list xfs@xxxxxxxxxxx http://oss.sgi.com/mailman/listinfo/xfs