Re: [PATCH v3] fuse: share lookup state between submount and its parent

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Oct 18, 2023 at 3:34 AM Krister Johansen
<kjlx@xxxxxxxxxxxxxxxxxx> wrote:
>
> Fuse submounts do not perform a lookup for the nodeid that they inherit
> from their parent.  Instead, the code decrements the nlookup on the
> submount's fuse_inode when it is instantiated, and no forget is
> performed when a submount root is evicted.
>
> Trouble arises when the submount's parent is evicted despite the
> submount itself being in use.  In this author's case, the submount was
> in a container and deatched from the initial mount namespace via a
> MNT_DEATCH operation.  When memory pressure triggered the shrinker, the
> inode from the parent was evicted, which triggered enough forgets to
> render the submount's nodeid invalid.
>
> Since submounts should still function, even if their parent goes away,
> solve this problem by sharing refcounted state between the parent and
> its submount.  When all of the references on this shared state reach
> zero, it's safe to forget the final lookup of the fuse nodeid.
>
> Signed-off-by: Krister Johansen <kjlx@xxxxxxxxxxxxxxxxxx>
> Cc: stable@xxxxxxxxxxxxxxx
> Fixes: 1866d779d5d2 ("fuse: Allow fuse_fill_super_common() for submounts")
> ---
>  fs/fuse/fuse_i.h | 20 +++++++++++
>  fs/fuse/inode.c  | 88 ++++++++++++++++++++++++++++++++++++++++++++++--
>  2 files changed, 105 insertions(+), 3 deletions(-)
>
> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> index 405252bb51f2..0d1659c5016b 100644
> --- a/fs/fuse/fuse_i.h
> +++ b/fs/fuse/fuse_i.h
> @@ -63,6 +63,24 @@ struct fuse_forget_link {
>         struct fuse_forget_link *next;
>  };
>
> +/* Submount lookup tracking */
> +struct fuse_submount_lookup {
> +       /** Refcount */
> +       refcount_t count;
> +
> +       /** Unique ID, which identifies the inode between userspace
> +        * and kernel */
> +       u64 nodeid;
> +
> +       /** Number of lookups on this inode */
> +       u64 nlookup;

sl->nlookup will always be one.  So that can just be implicit and this
field can just go away.

> +
> +       /** The request used for sending the FORGET message */
> +       struct fuse_forget_link *forget;
> +
> +       struct rcu_head rcu;

RCU would be needed if any fields could be accessed from RCU protected
code.  But AFAICS there's no such access, so this shouldn't be needed.
  Am I missing something?

> +};
> +
>  /** FUSE inode */
>  struct fuse_inode {
>         /** Inode data */
> @@ -158,6 +176,8 @@ struct fuse_inode {
>          */
>         struct fuse_inode_dax *dax;
>  #endif
> +       /** Submount specific lookup tracking */
> +       struct fuse_submount_lookup *submount_lookup;
>  };
>
>  /** FUSE inode state bits */
> diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
> index 444418e240c8..dc1499e2074f 100644
> --- a/fs/fuse/inode.c
> +++ b/fs/fuse/inode.c
> @@ -68,6 +68,24 @@ struct fuse_forget_link *fuse_alloc_forget(void)
>         return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL_ACCOUNT);
>  }
>
> +static struct fuse_submount_lookup *fuse_alloc_submount_lookup(void)
> +{
> +       struct fuse_submount_lookup *sl;
> +
> +       sl = kzalloc(sizeof(struct fuse_submount_lookup), GFP_KERNEL_ACCOUNT);
> +       if (!sl)
> +               return NULL;
> +       sl->forget = fuse_alloc_forget();
> +       if (!sl->forget)
> +               goto out_free;
> +
> +       return sl;
> +
> +out_free:
> +       kfree(sl);
> +       return NULL;
> +}
> +
>  static struct inode *fuse_alloc_inode(struct super_block *sb)
>  {
>         struct fuse_inode *fi;
> @@ -113,9 +131,24 @@ static void fuse_free_inode(struct inode *inode)
>         kmem_cache_free(fuse_inode_cachep, fi);
>  }
>
> +static void fuse_cleanup_submount_lookup(struct fuse_conn *fc,
> +                                        struct fuse_submount_lookup *sl)
> +{
> +       if (!refcount_dec_and_test(&sl->count))
> +               return;
> +
> +       if (sl->nlookup) {
> +               fuse_queue_forget(fc, sl->forget, sl->nodeid, sl->nlookup);
> +               sl->forget = NULL;
> +       }
> +       kfree(sl->forget);
> +       kfree_rcu(sl, rcu);
> +}
> +
>  static void fuse_evict_inode(struct inode *inode)
>  {
>         struct fuse_inode *fi = get_fuse_inode(inode);
> +       struct fuse_submount_lookup *sl = NULL;
>
>         /* Will write inode on close/munmap and in all other dirtiers */
>         WARN_ON(inode->i_state & I_DIRTY_INODE);
> @@ -132,6 +165,15 @@ static void fuse_evict_inode(struct inode *inode)
>                                           fi->nlookup);
>                         fi->forget = NULL;
>                 }
> +
> +               spin_lock(&fi->lock);
> +               if (fi->submount_lookup) {
> +                       sl = fi->submount_lookup;
> +                       fi->submount_lookup = NULL;
> +               }
> +               spin_unlock(&fi->lock);

I don't think locking is needed.  Eviction happens only once and at
that point nobody else should be touching the inode.

> +               if (sl)
> +                       fuse_cleanup_submount_lookup(fc, sl);
>         }
>         if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) {
>                 WARN_ON(!list_empty(&fi->write_files));
> @@ -332,6 +374,14 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
>                 fuse_dax_dontcache(inode, attr->flags);
>  }
>
> +static void fuse_init_submount_lookup(struct fuse_submount_lookup *sl,
> +                                     u64 nodeid)
> +{
> +       sl->nodeid = nodeid;
> +       sl->nlookup = 1;
> +       refcount_set(&sl->count, 1);
> +}
> +
>  static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr,
>                             struct fuse_conn *fc)
>  {
> @@ -395,12 +445,22 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
>          */
>         if (fc->auto_submounts && (attr->flags & FUSE_ATTR_SUBMOUNT) &&
>             S_ISDIR(attr->mode)) {
> +               struct fuse_inode *fi;
> +
>                 inode = new_inode(sb);
>                 if (!inode)
>                         return NULL;
>
>                 fuse_init_inode(inode, attr, fc);
> -               get_fuse_inode(inode)->nodeid = nodeid;
> +               fi = get_fuse_inode(inode);
> +               fi->nodeid = nodeid;
> +               fi->submount_lookup = fuse_alloc_submount_lookup();
> +               if (!fi->submount_lookup) {
> +                       iput(inode);
> +                       return NULL;
> +               }
> +               /* Sets nlookup = 1 on fi->submount_lookup->nlookup */
> +               fuse_init_submount_lookup(fi->submount_lookup, nodeid);
>                 inode->i_flags |= S_AUTOMOUNT;
>                 goto done;
>         }
> @@ -423,11 +483,11 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
>                 iput(inode);
>                 goto retry;
>         }
> -done:
>         fi = get_fuse_inode(inode);
>         spin_lock(&fi->lock);
>         fi->nlookup++;
>         spin_unlock(&fi->lock);
> +done:
>         fuse_change_attributes(inode, attr, NULL, attr_valid, attr_version);
>
>         return inode;
> @@ -1465,6 +1525,8 @@ static int fuse_fill_super_submount(struct super_block *sb,
>         struct super_block *parent_sb = parent_fi->inode.i_sb;
>         struct fuse_attr root_attr;
>         struct inode *root;
> +       struct fuse_submount_lookup *sl;
> +       struct fuse_inode *fi;
>
>         fuse_sb_defaults(sb);
>         fm->sb = sb;
> @@ -1487,12 +1549,32 @@ static int fuse_fill_super_submount(struct super_block *sb,
>          * its nlookup should not be incremented.  fuse_iget() does
>          * that, though, so undo it here.
>          */
> -       get_fuse_inode(root)->nlookup--;
> +       fi = get_fuse_inode(root);
> +       fi->nlookup--;
> +
>         sb->s_d_op = &fuse_dentry_operations;
>         sb->s_root = d_make_root(root);
>         if (!sb->s_root)
>                 return -ENOMEM;
>
> +       /*
> +        * Grab the parent's submount_lookup pointer and take a
> +        * reference on the shared nlookup from the parent.  This is to
> +        * prevent the last forget for this nodeid from getting
> +        * triggered until all users have finished with it.
> +        */
> +       spin_lock(&parent_fi->lock);

Root has just been allocated, no locking needed.

> +       sl = parent_fi->submount_lookup;
> +       if (sl) {

WARN_ON(!sl);

Thanks,
Miklos






[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux