Re: [PATCH] XFS: _xfs_buf_find ignores XBF_DONT_BLOCK flag

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Jul 14, 2010 at 7:48 PM, Dave Chinner <david@xxxxxxxxxxxxx> wrote:

> On Wed, Jul 14, 2010 at 03:03:21PM -0400, Peter Watkins wrote:

[ snip ]

>
> I think you misunderstand what XBF_DONT_BLOCK is for - that's not
> hard because it's not particularly well commented. FYI, it's to
> prevent memory reclaim recursion back into the filesystem during
> memory allocation in the buffer layer while we hold locked items.
> The buffer cache implementation of the flag has changed over time
> (originally an Irix flag, IIRC) so it doesn't quite describe what it
> does on linux anymore.  See xb_to_gfp() and xb_to_km() to find out
> what it triggers.
>
> FWIW, if we wanted to avoid blocking on buffer locks, then we'd be
> passing XBF_TRYLOCK....

Thanks for taking the time to clue me in.


>
> Ok, so the problem here is that we are trying a memory allocation
> with a locked inode buffer, and memory reclaim can trip over that
> locked inode buffer and deadlock. That means we should not allow
> allocation recursion from within xfs_iformat() even though we are
> not in a tranaction context.
>

Which is just what KM_NOFS is for. I see.

> The following patch converts the allocations during inode
> initialisation to use KM_NOFS. It applies to a 2.6.35-rc3 kernel,
> but might apply to a 2.6.27 kernel. Can you test it?

Sure, I'll test it on some of these systems. The bug is not easy to
reproduce, but
I'll try to recreate the load.

Thanks for the help!

>
> Cheers,
>
> Dave.
> --
> Dave Chinner
> david@xxxxxxxxxxxxx
>
> xfs: fix memory reclaim recursion deadlock on locked inode buffer
>
> From: Dave Chinner <dchinner@xxxxxxxxxx>
>
> Calling into memory reclaim with a locked inode buffer can deadlock
> if memory reclaim tries to lock the inode buffer during inode
> teardown. Convert the relevant memory allocations to use KM_NOFS to
> avoid this deadlock condition.
>
> Reported-by: Peter Watkins <treestem@xxxxxxxxx>
> Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
> ---
>  fs/xfs/xfs_inode.c |   20 +++++++++++---------
>  1 files changed, 11 insertions(+), 9 deletions(-)
>
> diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
> index 5715a9d..3c206b3 100644
> --- a/fs/xfs/xfs_inode.c
> +++ b/fs/xfs/xfs_inode.c
> @@ -422,7 +422,7 @@ xfs_iformat(
>        if (!XFS_DFORK_Q(dip))
>                return 0;
>        ASSERT(ip->i_afp == NULL);
> -       ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP);
> +       ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP|KM_NOFS);
>        ip->i_afp->if_ext_max =
>                XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
>        switch (dip->di_aformat) {
> @@ -505,7 +505,7 @@ xfs_iformat_local(
>                ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
>        else {
>                real_size = roundup(size, 4);
> -               ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP);
> +               ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP|KM_NOFS);
>        }
>        ifp->if_bytes = size;
>        ifp->if_real_bytes = real_size;
> @@ -632,7 +632,7 @@ xfs_iformat_btree(
>        }
>
>        ifp->if_broot_bytes = size;
> -       ifp->if_broot = kmem_alloc(size, KM_SLEEP);
> +       ifp->if_broot = kmem_alloc(size, KM_SLEEP|KM_NOFS);
>        ASSERT(ifp->if_broot != NULL);
>        /*
>         * Copy and convert from the on-disk structure
> @@ -2191,7 +2191,7 @@ xfs_iroot_realloc(
>                 */
>                if (ifp->if_broot_bytes == 0) {
>                        new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(rec_diff);
> -                       ifp->if_broot = kmem_alloc(new_size, KM_SLEEP);
> +                       ifp->if_broot = kmem_alloc(new_size, KM_SLEEP|KM_NOFS);
>                        ifp->if_broot_bytes = (int)new_size;
>                        return;
>                }
> @@ -2207,7 +2207,7 @@ xfs_iroot_realloc(
>                new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max);
>                ifp->if_broot = kmem_realloc(ifp->if_broot, new_size,
>                                (size_t)XFS_BMAP_BROOT_SPACE_CALC(cur_max), /* old size */
> -                               KM_SLEEP);
> +                               KM_SLEEP|KM_NOFS);
>                op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
>                                                     ifp->if_broot_bytes);
>                np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
> @@ -2233,7 +2233,7 @@ xfs_iroot_realloc(
>        else
>                new_size = 0;
>        if (new_size > 0) {
> -               new_broot = kmem_alloc(new_size, KM_SLEEP);
> +               new_broot = kmem_alloc(new_size, KM_SLEEP|KM_NOFS);
>                /*
>                 * First copy over the btree block header.
>                 */
> @@ -2337,7 +2337,8 @@ xfs_idata_realloc(
>                real_size = roundup(new_size, 4);
>                if (ifp->if_u1.if_data == NULL) {
>                        ASSERT(ifp->if_real_bytes == 0);
> -                       ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP);
> +                       ifp->if_u1.if_data = kmem_alloc(real_size,
> +                                                       KM_SLEEP|KM_NOFS);
>                } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
>                        /*
>                         * Only do the realloc if the underlying size
> @@ -2348,11 +2349,12 @@ xfs_idata_realloc(
>                                        kmem_realloc(ifp->if_u1.if_data,
>                                                        real_size,
>                                                        ifp->if_real_bytes,
> -                                                       KM_SLEEP);
> +                                                       KM_SLEEP|KM_NOFS);
>                        }
>                } else {
>                        ASSERT(ifp->if_real_bytes == 0);
> -                       ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP);
> +                       ifp->if_u1.if_data = kmem_alloc(real_size,
> +                                                       KM_SLEEP|KM_NOFS);
>                        memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data,
>                                ifp->if_bytes);
>                }
>

_______________________________________________
xfs mailing list
xfs@xxxxxxxxxxx
http://oss.sgi.com/mailman/listinfo/xfs



[Index of Archives]     [Linux XFS Devel]     [Linux Filesystem Development]     [Filesystem Testing]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux