Re: [PATCH 5/6] shmem: quota support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Jul 13, 2023 at 03:48:47PM +0200, cem@xxxxxxxxxx wrote:
> From: Carlos Maiolino <cem@xxxxxxxxxx>
> 
> Now the basic infra-structure is in place, enable quota support for tmpfs.
> 
> This offers user and group quotas to tmpfs (project quotas will be added
> later). Also, as other filesystems, the tmpfs quota is not supported
> within user namespaces yet, so idmapping is not translated.
> 
> Signed-off-by: Lukas Czerner <lczerner@xxxxxxxxxx>
> Signed-off-by: Carlos Maiolino <cmaiolino@xxxxxxxxxx>
> Reviewed-by: Jan Kara <jack@xxxxxxx>
> ---
>  Documentation/filesystems/tmpfs.rst |  15 +++
>  include/linux/shmem_fs.h            |   8 ++
>  mm/shmem.c                          | 180 ++++++++++++++++++++++++++--
>  3 files changed, 195 insertions(+), 8 deletions(-)
> 
> diff --git a/Documentation/filesystems/tmpfs.rst b/Documentation/filesystems/tmpfs.rst
> index f18f46be5c0c..0c7d8bd052f1 100644
> --- a/Documentation/filesystems/tmpfs.rst
> +++ b/Documentation/filesystems/tmpfs.rst
> @@ -130,6 +130,21 @@ for emergency or testing purposes. The values you can set for shmem_enabled are:
>      option, for testing
>  ==  ============================================================
>  
> +tmpfs also supports quota with the following mount options
> +
> +========  =============================================================
> +quota     User and group quota accounting and enforcement is enabled on
> +          the mount. Tmpfs is using hidden system quota files that are
> +          initialized on mount.
> +usrquota  User quota accounting and enforcement is enabled on the
> +          mount.
> +grpquota  Group quota accounting and enforcement is enabled on the
> +          mount.
> +========  =============================================================
> +
> +Note that tmpfs quotas do not support user namespaces so no uid/gid
> +translation is done if quotas are enabled inside user namespaces.
> +
>  tmpfs has a mount option to set the NUMA memory allocation policy for
>  all files in that instance (if CONFIG_NUMA is enabled) - which can be
>  adjusted on the fly via 'mount -o remount ...'
> diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
> index 7abfaf70b58a..1a568a0f542f 100644
> --- a/include/linux/shmem_fs.h
> +++ b/include/linux/shmem_fs.h
> @@ -31,6 +31,9 @@ struct shmem_inode_info {
>  	atomic_t		stop_eviction;	/* hold when working on inode */
>  	struct timespec64	i_crtime;	/* file creation time */
>  	unsigned int		fsflags;	/* flags for FS_IOC_[SG]ETFLAGS */
> +#ifdef CONFIG_TMPFS_QUOTA
> +	struct dquot		*i_dquot[MAXQUOTAS];
> +#endif
>  	struct inode		vfs_inode;
>  };
>  
> @@ -184,4 +187,9 @@ extern int shmem_mfill_atomic_pte(pmd_t *dst_pmd,
>  #define SHMEM_QUOTA_MAX_SPC_LIMIT 0x7fffffffffffffffLL /* 2^63-1 */
>  #define SHMEM_QUOTA_MAX_INO_LIMIT 0x7fffffffffffffffLL
>  
> +#ifdef CONFIG_TMPFS_QUOTA
> +extern const struct dquot_operations shmem_quota_operations;
> +extern struct quota_format_type shmem_quota_format;
> +#endif /* CONFIG_TMPFS_QUOTA */
> +
>  #endif
> diff --git a/mm/shmem.c b/mm/shmem.c
> index 2a7b8060b6f4..5022238dd68d 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -78,6 +78,7 @@ static struct vfsmount *shm_mnt;
>  #include <uapi/linux/memfd.h>
>  #include <linux/rmap.h>
>  #include <linux/uuid.h>
> +#include <linux/quotaops.h>
>  
>  #include <linux/uaccess.h>
>  
> @@ -116,11 +117,13 @@ struct shmem_options {
>  	int huge;
>  	int seen;
>  	bool noswap;
> +	unsigned short quota_types;
>  #define SHMEM_SEEN_BLOCKS 1
>  #define SHMEM_SEEN_INODES 2
>  #define SHMEM_SEEN_HUGE 4
>  #define SHMEM_SEEN_INUMS 8
>  #define SHMEM_SEEN_NOSWAP 16
> +#define SHMEM_SEEN_QUOTA 32
>  };
>  
>  #ifdef CONFIG_TMPFS
> @@ -212,7 +215,16 @@ static inline int shmem_inode_acct_block(struct inode *inode, long pages)
>  		if (percpu_counter_compare(&sbinfo->used_blocks,
>  					   sbinfo->max_blocks - pages) > 0)
>  			goto unacct;
> +
> +		err = dquot_alloc_block_nodirty(inode, pages);
> +		if (err)
> +			goto unacct;
> +
>  		percpu_counter_add(&sbinfo->used_blocks, pages);
> +	} else {
> +		err = dquot_alloc_block_nodirty(inode, pages);
> +		if (err)
> +			goto unacct;
>  	}
>  
>  	return 0;
> @@ -227,6 +239,8 @@ static inline void shmem_inode_unacct_blocks(struct inode *inode, long pages)
>  	struct shmem_inode_info *info = SHMEM_I(inode);
>  	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
>  
> +	dquot_free_block_nodirty(inode, pages);
> +
>  	if (sbinfo->max_blocks)
>  		percpu_counter_sub(&sbinfo->used_blocks, pages);
>  	shmem_unacct_blocks(info->flags, pages);
> @@ -255,6 +269,47 @@ bool vma_is_shmem(struct vm_area_struct *vma)
>  static LIST_HEAD(shmem_swaplist);
>  static DEFINE_MUTEX(shmem_swaplist_mutex);
>  
> +#ifdef CONFIG_TMPFS_QUOTA
> +
> +static int shmem_enable_quotas(struct super_block *sb,
> +			       unsigned short quota_types)
> +{
> +	int type, err = 0;
> +
> +	sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NOLIST_DIRTY;
> +	for (type = 0; type < SHMEM_MAXQUOTAS; type++) {
> +		if (!(quota_types & (1 << type)))
> +			continue;
> +		err = dquot_load_quota_sb(sb, type, QFMT_SHMEM,
> +					  DQUOT_USAGE_ENABLED |
> +					  DQUOT_LIMITS_ENABLED);
> +		if (err)
> +			goto out_err;
> +	}
> +	return 0;
> +
> +out_err:
> +	pr_warn("tmpfs: failed to enable quota tracking (type=%d, err=%d)\n",
> +		type, err);
> +	for (type--; type >= 0; type--)
> +		dquot_quota_off(sb, type);
> +	return err;
> +}
> +
> +static void shmem_disable_quotas(struct super_block *sb)
> +{
> +	int type;
> +
> +	for (type = 0; type < SHMEM_MAXQUOTAS; type++)
> +		dquot_quota_off(sb, type);
> +}
> +
> +static struct dquot **shmem_get_dquots(struct inode *inode)
> +{
> +	return SHMEM_I(inode)->i_dquot;
> +}
> +#endif /* CONFIG_TMPFS_QUOTA */
> +
>  /*
>   * shmem_reserve_inode() performs bookkeeping to reserve a shmem inode, and
>   * produces a novel ino for the newly allocated inode.
> @@ -361,7 +416,6 @@ static void shmem_recalc_inode(struct inode *inode)
>  	freed = info->alloced - info->swapped - inode->i_mapping->nrpages;
>  	if (freed > 0) {
>  		info->alloced -= freed;
> -		inode->i_blocks -= freed * BLOCKS_PER_PAGE;
>  		shmem_inode_unacct_blocks(inode, freed);
>  	}
>  }
> @@ -379,7 +433,6 @@ bool shmem_charge(struct inode *inode, long pages)
>  
>  	spin_lock_irqsave(&info->lock, flags);
>  	info->alloced += pages;
> -	inode->i_blocks += pages * BLOCKS_PER_PAGE;
>  	shmem_recalc_inode(inode);
>  	spin_unlock_irqrestore(&info->lock, flags);
>  
> @@ -395,7 +448,6 @@ void shmem_uncharge(struct inode *inode, long pages)
>  
>  	spin_lock_irqsave(&info->lock, flags);
>  	info->alloced -= pages;
> -	inode->i_blocks -= pages * BLOCKS_PER_PAGE;
>  	shmem_recalc_inode(inode);
>  	spin_unlock_irqrestore(&info->lock, flags);
>  
> @@ -1141,6 +1193,21 @@ static int shmem_setattr(struct mnt_idmap *idmap,
>  		}
>  	}
>  
> +	if (is_quota_modification(idmap, inode, attr)) {
> +		error = dquot_initialize(inode);
> +		if (error)
> +			return error;
> +	}
> +
> +	/* Transfer quota accounting */
> +	if (i_uid_needs_update(idmap, attr, inode) ||
> +	    i_gid_needs_update(idmap, attr, inode)) {
> +		error = dquot_transfer(idmap, inode, attr);
> +
> +		if (error)
> +			return error;
> +	}
> +
>  	setattr_copy(idmap, inode, attr);
>  	if (attr->ia_valid & ATTR_MODE)
>  		error = posix_acl_chmod(idmap, dentry, inode->i_mode);
> @@ -1187,6 +1254,10 @@ static void shmem_evict_inode(struct inode *inode)
>  	WARN_ON(inode->i_blocks);
>  	shmem_free_inode(inode->i_sb);
>  	clear_inode(inode);
> +#ifdef CONFIG_TMPFS_QUOTA
> +	dquot_free_inode(inode);
> +	dquot_drop(inode);
> +#endif
>  }
>  
>  static int shmem_find_swap_entries(struct address_space *mapping,
> @@ -1986,7 +2057,6 @@ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index,
>  
>  	spin_lock_irq(&info->lock);
>  	info->alloced += folio_nr_pages(folio);
> -	inode->i_blocks += (blkcnt_t)BLOCKS_PER_PAGE << folio_order(folio);
>  	shmem_recalc_inode(inode);
>  	spin_unlock_irq(&info->lock);
>  	alloced = true;
> @@ -2357,9 +2427,10 @@ static void shmem_set_inode_flags(struct inode *inode, unsigned int fsflags)
>  #define shmem_initxattrs NULL
>  #endif
>  
> -static struct inode *shmem_get_inode(struct mnt_idmap *idmap, struct super_block *sb,
> -				     struct inode *dir, umode_t mode, dev_t dev,
> -				     unsigned long flags)
> +static struct inode *__shmem_get_inode(struct mnt_idmap *idmap,
> +					     struct super_block *sb,
> +					     struct inode *dir, umode_t mode,
> +					     dev_t dev, unsigned long flags)
>  {
>  	struct inode *inode;
>  	struct shmem_inode_info *info;
> @@ -2436,6 +2507,43 @@ static struct inode *shmem_get_inode(struct mnt_idmap *idmap, struct super_block
>  	return inode;
>  }
>  
> +#ifdef CONFIG_TMPFS_QUOTA
> +static struct inode *shmem_get_inode(struct mnt_idmap *idmap,
> +				     struct super_block *sb, struct inode *dir,
> +				     umode_t mode, dev_t dev, unsigned long flags)
> +{
> +	int err;
> +	struct inode *inode;
> +
> +	inode = __shmem_get_inode(idmap, sb, dir, mode, dev, flags);
> +	if (IS_ERR(inode))
> +		return inode;
> +
> +	err = dquot_initialize(inode);
> +	if (err)
> +		goto errout;
> +
> +	err = dquot_alloc_inode(inode);
> +	if (err) {
> +		dquot_drop(inode);
> +		goto errout;
> +	}
> +	return inode;
> +
> +errout:
> +	inode->i_flags |= S_NOQUOTA;
> +	iput(inode);
> +	return ERR_PTR(err);
> +}
> +#else
> +static inline struct inode *shmem_get_inode(struct mnt_idmap *idmap,
> +				     struct super_block *sb, struct inode *dir,
> +				     umode_t mode, dev_t dev, unsigned long flags)
> +{
> +	return __shmem_get_inode(idmap, sb, dir, mode, dev, flags);
> +}
> +#endif /* CONFIG_TMPFS_QUOTA */
> +
>  #ifdef CONFIG_USERFAULTFD
>  int shmem_mfill_atomic_pte(pmd_t *dst_pmd,
>  			   struct vm_area_struct *dst_vma,
> @@ -2538,7 +2646,6 @@ int shmem_mfill_atomic_pte(pmd_t *dst_pmd,
>  
>  	spin_lock_irq(&info->lock);
>  	info->alloced++;
> -	inode->i_blocks += BLOCKS_PER_PAGE;
>  	shmem_recalc_inode(inode);
>  	spin_unlock_irq(&info->lock);
>  
> @@ -3516,6 +3623,7 @@ static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
>  
>  static const struct inode_operations shmem_short_symlink_operations = {
>  	.getattr	= shmem_getattr,
> +	.setattr	= shmem_setattr,
>  	.get_link	= simple_get_link,
>  #ifdef CONFIG_TMPFS_XATTR
>  	.listxattr	= shmem_listxattr,
> @@ -3524,6 +3632,7 @@ static const struct inode_operations shmem_short_symlink_operations = {
>  
>  static const struct inode_operations shmem_symlink_inode_operations = {
>  	.getattr	= shmem_getattr,
> +	.setattr	= shmem_setattr,
>  	.get_link	= shmem_get_link,
>  #ifdef CONFIG_TMPFS_XATTR
>  	.listxattr	= shmem_listxattr,
> @@ -3623,6 +3732,9 @@ enum shmem_param {
>  	Opt_inode32,
>  	Opt_inode64,
>  	Opt_noswap,
> +	Opt_quota,
> +	Opt_usrquota,
> +	Opt_grpquota,
>  };
>  
>  static const struct constant_table shmem_param_enums_huge[] = {
> @@ -3645,6 +3757,11 @@ const struct fs_parameter_spec shmem_fs_parameters[] = {
>  	fsparam_flag  ("inode32",	Opt_inode32),
>  	fsparam_flag  ("inode64",	Opt_inode64),
>  	fsparam_flag  ("noswap",	Opt_noswap),
> +#ifdef CONFIG_TMPFS_QUOTA
> +	fsparam_flag  ("quota",		Opt_quota),
> +	fsparam_flag  ("usrquota",	Opt_usrquota),
> +	fsparam_flag  ("grpquota",	Opt_grpquota),
> +#endif
>  	{}
>  };
>  
> @@ -3736,6 +3853,18 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
>  		ctx->noswap = true;
>  		ctx->seen |= SHMEM_SEEN_NOSWAP;
>  		break;
> +	case Opt_quota:
> +		ctx->seen |= SHMEM_SEEN_QUOTA;
> +		ctx->quota_types |= (QTYPE_MASK_USR | QTYPE_MASK_GRP);
> +		break;
> +	case Opt_usrquota:
> +		ctx->seen |= SHMEM_SEEN_QUOTA;
> +		ctx->quota_types |= QTYPE_MASK_USR;
> +		break;
> +	case Opt_grpquota:
> +		ctx->seen |= SHMEM_SEEN_QUOTA;
> +		ctx->quota_types |= QTYPE_MASK_GRP;
> +		break;
>  	}
>  	return 0;

I mentioned this in an earlier review; following the sequence:

if (ctx->seen & SHMEM_SEEN_QUOTA)
-> shmem_enable_quotas()
   -> dquot_load_quota_sb()

to then figure out that in dquot_load_quota_sb() we fail if
sb->s_user_ns != &init_user_ns is too subtle for a filesystem that's
mountable by unprivileged users. Every few months someone will end up
stumbling upon this code and wonder where it's blocked. There isn't even
a comment in the code.

Aside from that it's also really unfriendly to users because they may go
through setting up a tmpfs instances in the following way:

        fd_fs = fsopen("tmpfs");

User now enables quota:

        fsconfig(fd_fs, ..., "quota", ...) = 0

and goes on to set a bunch of other options:

        fsconfig(fd_fs, ..., "inode64", ...) = 0
        fsconfig(fd_fs, ..., "nr_inodes", ...) = 0
        fsconfig(fd_fs, ..., "nr_blocks", ...) = 0
        fsconfig(fd_fs, ..., "huge", ...) = 0
        fsconfig(fd_fs, ..., "mode", ...) = 0
        fsconfig(fd_fs, ..., "gid", ...) = 0

everything seems dandy and they create the superblock:

        fsconfig(fd_fs, FSCONFIG_CMD_CREATE, ...) = -EINVAL

which fails.

The user has not just performed 9 useless system calls they also have
zero clue what mount option caused the failure.

What this code really really should do is fail at:

        fsconfig(fd_fs, ..., "quota", ...) = -EINVAL

and log an error that the user can retrieve from the fs context. IOW,

diff --git a/mm/shmem.c b/mm/shmem.c
index 083ce6b478e7..baca8bf44569 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3863,14 +3863,20 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
                ctx->seen |= SHMEM_SEEN_NOSWAP;
                break;
        case Opt_quota:
+               if (fc->user_ns != &init_user_ns)
+                       return invalfc(fc, "Quotas in unprivileged tmpfs mounts unsupported");
                ctx->seen |= SHMEM_SEEN_QUOTA;
                ctx->quota_types |= (QTYPE_MASK_USR | QTYPE_MASK_GRP);
                break;
        case Opt_usrquota:
+               if (fc->user_ns != &init_user_ns)
+                       return invalfc(fc, "Quotas in unprivileged tmpfs mounts unsupported");
                ctx->seen |= SHMEM_SEEN_QUOTA;
                ctx->quota_types |= QTYPE_MASK_USR;
                break;
        case Opt_grpquota:
+               if (fc->user_ns != &init_user_ns)
+                       return invalfc(fc, "Quotas in unprivileged tmpfs mounts unsupported");
                ctx->seen |= SHMEM_SEEN_QUOTA;
                ctx->quota_types |= QTYPE_MASK_GRP;
                break;

This exactly what we already to for the "noswap" option btw.

Could you fold these changes into the patch and resend, please?
I synced with Andrew earlier and I'll be taking this series.

---

And btw, the *_SEEN_* logic for mount options is broken - but that's not
specific to your patch. Imagine:

        fd_fs = fsopen("tmpfs");
        fsconfig(fd_fs, ..., "nr_inodes", 0, "1000") = 0

Now ctx->inodes == 1000 and ctx->seen |= SHMEM_SEEN_INODES.

Now the user does:

        fsconfig(fd_fs, ..., "nr_inodes", 0, "-1234") = -EINVAL

This fails, but:

        ctx->inodes = memparse(param->string, &rest);
        if (*rest)
                goto bad_value;

will set ctx->inodes to whatever memparse returns but leaves
SHMEM_SEEN_INODES raised in ctx->seen. Now superblock creation may
succeed with a garbage inode limit. This should affect other mount
options as well.




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux