On Fri, Jun 09, 2023 at 10:32:37AM +0300, Amir Goldstein wrote: > Overlayfs and cachefiles use open_with_fake_path() to allocate internal > files, where overlayfs also puts a "fake" path in f_path - a path which > is not on the same fs as f_inode. > > Allocate a container struct file_fake for those internal files, that > will be used to hold the fake path qlong with the real path. > > This commit does not populate the extra fake_path field and leaves the > overlayfs internal file's f_path fake. > > Signed-off-by: Amir Goldstein <amir73il@xxxxxxxxx> > --- > fs/cachefiles/namei.c | 2 +- > fs/file_table.c | 85 +++++++++++++++++++++++++++++++++++-------- > fs/internal.h | 5 ++- > fs/namei.c | 2 +- > fs/open.c | 11 +++--- > fs/overlayfs/file.c | 2 +- > include/linux/fs.h | 13 ++++--- > 7 files changed, 90 insertions(+), 30 deletions(-) > > diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c > index 82219a8f6084..a71bdf2d03ba 100644 > --- a/fs/cachefiles/namei.c > +++ b/fs/cachefiles/namei.c > @@ -561,7 +561,7 @@ static bool cachefiles_open_file(struct cachefiles_object *object, > path.mnt = cache->mnt; > path.dentry = dentry; > file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT, > - d_backing_inode(dentry), cache->cache_cred); > + &path, cache->cache_cred); > if (IS_ERR(file)) { > trace_cachefiles_vfs_error(object, d_backing_inode(dentry), > PTR_ERR(file), > diff --git a/fs/file_table.c b/fs/file_table.c > index 372653b92617..adc2a92faa52 100644 > --- a/fs/file_table.c > +++ b/fs/file_table.c > @@ -44,18 +44,48 @@ static struct kmem_cache *filp_cachep __read_mostly; > > static struct percpu_counter nr_files __cacheline_aligned_in_smp; > > +/* Container for file with optional fake path to display in /proc files */ > +struct file_fake { > + struct file file; > + struct path fake_path; > +}; > + > +static inline struct file_fake *file_fake(struct file *f) > +{ > + return container_of(f, struct file_fake, file); > +} > + > +/* Returns fake_path if one exists, f_path otherwise */ > +const struct path *file_fake_path(struct file *f) > +{ > + struct file_fake *ff = file_fake(f); > + > + if (!(f->f_mode & FMODE_FAKE_PATH) || !ff->fake_path.dentry) > + return &f->f_path; > + > + return &ff->fake_path; > +} > +EXPORT_SYMBOL(file_fake_path); > + > static void file_free_rcu(struct rcu_head *head) > { > struct file *f = container_of(head, struct file, f_rcuhead); > > put_cred(f->f_cred); > - kmem_cache_free(filp_cachep, f); > + if (f->f_mode & FMODE_FAKE_PATH) > + kfree(file_fake(f)); > + else > + kmem_cache_free(filp_cachep, f); > } > > static inline void file_free(struct file *f) > { > + struct file_fake *ff = file_fake(f); > + > security_file_free(f); > - if (!(f->f_mode & FMODE_NOACCOUNT)) > + if (f->f_mode & FMODE_FAKE_PATH) > + path_put(&ff->fake_path); > + else > percpu_counter_dec(&nr_files); > call_rcu(&f->f_rcuhead, file_free_rcu); > } > @@ -131,20 +161,15 @@ static int __init init_fs_stat_sysctls(void) > fs_initcall(init_fs_stat_sysctls); > #endif > > -static struct file *__alloc_file(int flags, const struct cred *cred) > +static int init_file(struct file *f, int flags, const struct cred *cred) > { > - struct file *f; > int error; > > - f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL); > - if (unlikely(!f)) > - return ERR_PTR(-ENOMEM); > - > f->f_cred = get_cred(cred); > error = security_file_alloc(f); > if (unlikely(error)) { > file_free_rcu(&f->f_rcuhead); > - return ERR_PTR(error); > + return error; > } > > atomic_long_set(&f->f_count, 1); > @@ -155,6 +180,22 @@ static struct file *__alloc_file(int flags, const struct cred *cred) > f->f_mode = OPEN_FMODE(flags); > /* f->f_version: 0 */ > > + return 0; > +} > + > +static struct file *__alloc_file(int flags, const struct cred *cred) > +{ > + struct file *f; > + int error; > + > + f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL); > + if (unlikely(!f)) > + return ERR_PTR(-ENOMEM); > + > + error = init_file(f, flags, cred); > + if (unlikely(error)) > + return ERR_PTR(error); > + > return f; > } > > @@ -201,18 +242,32 @@ struct file *alloc_empty_file(int flags, const struct cred *cred) > } > > /* > - * Variant of alloc_empty_file() that doesn't check and modify nr_files. > + * Variant of alloc_empty_file() that allocates a file_fake container > + * and doesn't check and modify nr_files. > * > * Should not be used unless there's a very good reason to do so. > */ > -struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred) > +struct file *alloc_empty_file_fake(const struct path *fake_path, int flags, > + const struct cred *cred) > { > - struct file *f = __alloc_file(flags, cred); > + struct file_fake *ff; > + int error; > > - if (!IS_ERR(f)) > - f->f_mode |= FMODE_NOACCOUNT; > + ff = kzalloc(sizeof(struct file_fake), GFP_KERNEL); > + if (unlikely(!ff)) > + return ERR_PTR(-ENOMEM); > > - return f; > + error = init_file(&ff->file, flags, cred); > + if (unlikely(error)) > + return ERR_PTR(error); > + > + ff->file.f_mode |= FMODE_FAKE_PATH; > + if (fake_path) { > + path_get(fake_path); > + ff->fake_path = *fake_path; > + } Hm, I see that this check is mostly done for vfs_tmpfile_open() which only fills in file->f_path in vfs_tmpfile() but leaves ff->fake_path NULL. So really I think having FMODE_FAKE_PATH set but ff->fake_path be NULL is an invitation for NULL derefs sooner or later. I would simply document that it's required to set ff->fake_path. For callers such as vfs_tmpfile_open() it can just be path itself. IOW, vfs_tmpfile_open() should set ff->fake_path to file->f_path.