Re: [PATCH v2 5/8] tmpfs: Add casefold lookup support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



André Almeida <andrealmeid@xxxxxxxxxx> writes:

> Enable casefold lookup in tmpfs, based on the enconding defined by
> userspace. That means that instead of comparing byte per byte a file
> name, it compares to a case-insensitive equivalent of the Unicode
> string.
>
> * Dcache handling
>
> There's a special need when dealing with case-insensitive dentries.
> First of all, we currently invalidated every negative casefold dentries.
> That happens because currently VFS code has no proper support to deal
> with that, giving that it could incorrectly reuse a previous filename
> for a new file that has a casefold match. For instance, this could
> happen:
>
> $ mkdir DIR
> $ rm -r DIR
> $ mkdir dir
> $ ls
> DIR/
>
> And would be perceived as inconsistency from userspace point of view,
> because even that we match files in a case-insensitive manner, we still
> honor whatever is the initial filename.
>
> Along with that, tmpfs stores only the first equivalent name dentry used
> in the dcache, preventing duplications of dentries in the dcache. The
> d_compare() version for casefold files uses a normalized string, so the
> filename under lookup will be compared to another normalized string for
> the existing file, achieving a casefolded lookup.
>
> * Enabling casefold via mount options
>
> Most filesystems have their data stored in disk, so casefold option need
> to be enabled when building a filesystem on a device (via mkfs).
> However, as tmpfs is a RAM backed filesystem, there's no disk
> information and thus no mkfs to store information about casefold.
>
> For tmpfs, create casefold options for mounting. Userspace can then
> enable casefold support for a mount point using:
>
> $ mount -t tmpfs -o casefold=utf8-12.1.0 fs_name mount_dir/
>
> Userspace must set what Unicode standard is aiming to. The available
> options depends on what the kernel Unicode subsystem supports.
>
> And for strict encoding:
>
> $ mount -t tmpfs -o casefold=utf8-12.1.0,strict_encoding fs_name mount_dir/
>
> Strict encoding means that tmpfs will refuse to create invalid UTF-8
> sequences. When this option is not enabled, any invalid sequence will be
> treated as an opaque byte sequence, ignoring the encoding thus not being
> able to be looked up in a case-insensitive way.
>
> Signed-off-by: André Almeida <andrealmeid@xxxxxxxxxx>
> ---
>  mm/shmem.c | 115 +++++++++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 111 insertions(+), 4 deletions(-)
>
> diff --git a/mm/shmem.c b/mm/shmem.c
> index 5a77acf6ac6a..0f918010bc54 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -40,6 +40,8 @@
>  #include <linux/fs_parser.h>
>  #include <linux/swapfile.h>
>  #include <linux/iversion.h>
> +#include <linux/unicode.h>
> +#include <linux/parser.h>
>  #include "swap.h"
>  
>  static struct vfsmount *shm_mnt __ro_after_init;
> @@ -123,6 +125,8 @@ struct shmem_options {
>  	bool noswap;
>  	unsigned short quota_types;
>  	struct shmem_quota_limits qlimits;
> +	struct unicode_map *encoding;
> +	bool strict_encoding;
>  #define SHMEM_SEEN_BLOCKS 1
>  #define SHMEM_SEEN_INODES 2
>  #define SHMEM_SEEN_HUGE 4
> @@ -3427,6 +3431,11 @@ shmem_mknod(struct mnt_idmap *idmap, struct inode *dir,
>  	if (IS_ERR(inode))
>  		return PTR_ERR(inode);
>  
> +#if IS_ENABLED(CONFIG_UNICODE)
> +	if (!utf8_check_strict_name(dir, &dentry->d_name))
> +		return -EINVAL;
> +#endif

Please, fold it into the code when possible:

if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir))

> +
>  	error = simple_acl_create(dir, inode);
>  	if (error)
>  		goto out_iput;
> @@ -3442,7 +3451,12 @@ shmem_mknod(struct mnt_idmap *idmap, struct inode *dir,
>  	dir->i_size += BOGO_DIRENT_SIZE;
>  	inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
>  	inode_inc_iversion(dir);
> -	d_instantiate(dentry, inode);
> +
> +	if (IS_CASEFOLDED(dir))

If you have if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir))

It can be  optimized out when CONFIG_UNICODE=n.

> +		d_add(dentry, inode);
> +	else
> +		d_instantiate(dentry, inode);
>  	dget(dentry); /* Extra count - pin the dentry in core */
>  	return error;
>  
> @@ -3533,7 +3547,10 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir,
>  	inc_nlink(inode);
>  	ihold(inode);	/* New dentry reference */
>  	dget(dentry);	/* Extra pinning count for the created dentry */
> -	d_instantiate(dentry, inode);
> +	if (IS_CASEFOLDED(dir))
> +		d_add(dentry, inode);
> +	else
> +		d_instantiate(dentry, inode);
>  out:
>  	return ret;
>  }
> @@ -3553,6 +3570,14 @@ static int shmem_unlink(struct inode *dir, struct dentry *dentry)
>  	inode_inc_iversion(dir);
>  	drop_nlink(inode);
>  	dput(dentry);	/* Undo the count from "create" - does all the work */
> +
> +	/*
> +	 * For now, VFS can't deal with case-insensitive negative dentries, so
> +	 * we invalidate them
> +	 */
> +	if (IS_CASEFOLDED(dir))
> +		d_invalidate(dentry);
> +

likewise and also below.

>  	return 0;
>  }
>  
> @@ -3697,7 +3722,10 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir,
>  	dir->i_size += BOGO_DIRENT_SIZE;
>  	inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
>  	inode_inc_iversion(dir);
> -	d_instantiate(dentry, inode);
> +	if (IS_CASEFOLDED(dir))
> +		d_add(dentry, inode);
> +	else
> +		d_instantiate(dentry, inode);
>  	dget(dentry);
>  	return 0;
>  
> @@ -4050,6 +4078,8 @@ enum shmem_param {
>  	Opt_usrquota_inode_hardlimit,
>  	Opt_grpquota_block_hardlimit,
>  	Opt_grpquota_inode_hardlimit,
> +	Opt_casefold,
> +	Opt_strict_encoding,
>  };
>  
>  static const struct constant_table shmem_param_enums_huge[] = {
> @@ -4081,9 +4111,47 @@ const struct fs_parameter_spec shmem_fs_parameters[] = {
>  	fsparam_string("grpquota_block_hardlimit", Opt_grpquota_block_hardlimit),
>  	fsparam_string("grpquota_inode_hardlimit", Opt_grpquota_inode_hardlimit),
>  #endif
> +	fsparam_string("casefold",	Opt_casefold),
> +	fsparam_flag  ("strict_encoding", Opt_strict_encoding),
>  	{}
>  };
>  
> +#if IS_ENABLED(CONFIG_UNICODE)
> +static int shmem_parse_opt_casefold(struct fs_context *fc, struct fs_parameter *param)
> +{
> +	struct shmem_options *ctx = fc->fs_private;
> +	unsigned int maj = 0, min = 0, rev = 0, version_number;
> +	char version[10];
> +	int ret;
> +	struct unicode_map *encoding;
> +
> +	if (strncmp(param->string, "utf8-", 5))
> +		return invalfc(fc, "Only utf8 encondings are supported");
> +	ret = strscpy(version, param->string + 5, sizeof(version));

the extra buffer and the copy seem unnecessary.  It won't live past this
function anyway. Can you just pass the offseted param->string to
utf8_parse_version?

> +	if (ret < 0)
> +		return invalfc(fc, "Invalid enconding argument: %s",
> +			       param->string);

enconding=>encoding

> +
> +	ret = utf8_parse_version(version, &maj, &min, &rev);
> +	if (ret)
> +		return invalfc(fc, "Invalid utf8 version: %s", version);
> +	version_number = UNICODE_AGE(maj, min, rev);
> +	encoding = utf8_load(version_number);

utf8_load(UNICODE_AGE(maj, min, rev));

and drop version_number.

> +	if (IS_ERR(encoding))
> +		return invalfc(fc, "Invalid utf8 version: %s", version);
> +	pr_info("tmpfs: Using encoding provided by mount options: %s\n",
> +		param->string);
> +	ctx->encoding = encoding;
> +
> +	return 0;
> +}
> +#else
> +static int shmem_parse_opt_casefold(struct fs_context *fc, struct fs_parameter *param)
> +{
> +	return invalfc(fc, "tmpfs: No kernel support for casefold filesystems\n");
> +}
> +#endif
> +
>  static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
>  {
>  	struct shmem_options *ctx = fc->fs_private;
> @@ -4242,6 +4310,11 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
>  				       "Group quota inode hardlimit too large.");
>  		ctx->qlimits.grpquota_ihardlimit = size;
>  		break;
> +	case Opt_casefold:
> +		return shmem_parse_opt_casefold(fc, param);
> +	case Opt_strict_encoding:
> +		ctx->strict_encoding = true;
> +		break;
>  	}
>  	return 0;
>  
> @@ -4471,6 +4544,11 @@ static void shmem_put_super(struct super_block *sb)
>  {
>  	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
>  
> +#if IS_ENABLED(CONFIG_UNICODE)
> +	if (sb->s_encoding)
> +		utf8_unload(sb->s_encoding);
> +#endif
> +
>  #ifdef CONFIG_TMPFS_QUOTA
>  	shmem_disable_quotas(sb);
>  #endif
> @@ -4515,6 +4593,16 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
>  	}
>  	sb->s_export_op = &shmem_export_ops;
>  	sb->s_flags |= SB_NOSEC | SB_I_VERSION;
> +
> +#if IS_ENABLED(CONFIG_UNICODE)
> +	if (ctx->encoding) {
> +		sb->s_encoding = ctx->encoding;
> +		generic_set_sb_d_ops(sb);
> +		if (ctx->strict_encoding)
> +			sb->s_encoding_flags = SB_ENC_STRICT_MODE_FL;
> +	}
> +#endif
> +
>  #else
>  	sb->s_flags |= SB_NOUSER;
>  #endif
> @@ -4704,11 +4792,28 @@ static const struct inode_operations shmem_inode_operations = {
>  #endif
>  };
>  
> +static struct dentry *shmem_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
> +{
> +	if (dentry->d_name.len > NAME_MAX)
> +		return ERR_PTR(-ENAMETOOLONG);
> +
> +	/*
> +	 * For now, VFS can't deal with case-insensitive negative dentries, so
> +	 * we prevent them from being created
> +	 */
> +	if (IS_CASEFOLDED(dir))
> +		return NULL;
> +
> +	d_add(dentry, NULL);
> +
> +	return NULL;
> +}
> +
>  static const struct inode_operations shmem_dir_inode_operations = {
>  #ifdef CONFIG_TMPFS
>  	.getattr	= shmem_getattr,
>  	.create		= shmem_create,
> -	.lookup		= simple_lookup,
> +	.lookup		= shmem_lookup,

simple_lookup sets the dentry operations to enable a custom d_delete,
but this disables that. Without it, negative dentries will linger after
the filesystem is gone.

We want to preserve the current behavior both for normal and casefolding
directories.  You need a new flavor of generic_ci_dentry_ops with that
hook for casefolding shmem, as well as ensure &simple_dentry_operations
is still set for every dentry in non-casefolding volumes.

>  	.link		= shmem_link,
>  	.unlink		= shmem_unlink,
>  	.symlink	= shmem_symlink,
> @@ -4791,6 +4896,8 @@ int shmem_init_fs_context(struct fs_context *fc)
>  	ctx->uid = current_fsuid();
>  	ctx->gid = current_fsgid();
>  
> +	ctx->encoding = NULL;
> +
>  	fc->fs_private = ctx;
>  	fc->ops = &shmem_fs_context_ops;
>  	return 0;

-- 
Gabriel Krisman Bertazi





[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux