André Almeida <andrealmeid@xxxxxxxxxx> writes: > Enable casefold lookup in tmpfs, based on the enconding defined by > userspace. That means that instead of comparing byte per byte a file > name, it compares to a case-insensitive equivalent of the Unicode > string. > > * Dcache handling > > There's a special need when dealing with case-insensitive dentries. > First of all, we currently invalidated every negative casefold dentries. > That happens because currently VFS code has no proper support to deal > with that, giving that it could incorrectly reuse a previous filename > for a new file that has a casefold match. For instance, this could > happen: > > $ mkdir DIR > $ rm -r DIR > $ mkdir dir > $ ls > DIR/ > > And would be perceived as inconsistency from userspace point of view, > because even that we match files in a case-insensitive manner, we still > honor whatever is the initial filename. > > Along with that, tmpfs stores only the first equivalent name dentry used > in the dcache, preventing duplications of dentries in the dcache. The > d_compare() version for casefold files uses a normalized string, so the > filename under lookup will be compared to another normalized string for > the existing file, achieving a casefolded lookup. > > * Enabling casefold via mount options > > Most filesystems have their data stored in disk, so casefold option need > to be enabled when building a filesystem on a device (via mkfs). > However, as tmpfs is a RAM backed filesystem, there's no disk > information and thus no mkfs to store information about casefold. > > For tmpfs, create casefold options for mounting. Userspace can then > enable casefold support for a mount point using: > > $ mount -t tmpfs -o casefold=utf8-12.1.0 fs_name mount_dir/ > > Userspace must set what Unicode standard is aiming to. The available > options depends on what the kernel Unicode subsystem supports. > > And for strict encoding: > > $ mount -t tmpfs -o casefold=utf8-12.1.0,strict_encoding fs_name mount_dir/ > > Strict encoding means that tmpfs will refuse to create invalid UTF-8 > sequences. When this option is not enabled, any invalid sequence will be > treated as an opaque byte sequence, ignoring the encoding thus not being > able to be looked up in a case-insensitive way. > > Signed-off-by: André Almeida <andrealmeid@xxxxxxxxxx> > --- > mm/shmem.c | 115 +++++++++++++++++++++++++++++++++++++++++++++++++++-- > 1 file changed, 111 insertions(+), 4 deletions(-) > > diff --git a/mm/shmem.c b/mm/shmem.c > index 5a77acf6ac6a..0f918010bc54 100644 > --- a/mm/shmem.c > +++ b/mm/shmem.c > @@ -40,6 +40,8 @@ > #include <linux/fs_parser.h> > #include <linux/swapfile.h> > #include <linux/iversion.h> > +#include <linux/unicode.h> > +#include <linux/parser.h> > #include "swap.h" > > static struct vfsmount *shm_mnt __ro_after_init; > @@ -123,6 +125,8 @@ struct shmem_options { > bool noswap; > unsigned short quota_types; > struct shmem_quota_limits qlimits; > + struct unicode_map *encoding; > + bool strict_encoding; > #define SHMEM_SEEN_BLOCKS 1 > #define SHMEM_SEEN_INODES 2 > #define SHMEM_SEEN_HUGE 4 > @@ -3427,6 +3431,11 @@ shmem_mknod(struct mnt_idmap *idmap, struct inode *dir, > if (IS_ERR(inode)) > return PTR_ERR(inode); > > +#if IS_ENABLED(CONFIG_UNICODE) > + if (!utf8_check_strict_name(dir, &dentry->d_name)) > + return -EINVAL; > +#endif Please, fold it into the code when possible: if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir)) > + > error = simple_acl_create(dir, inode); > if (error) > goto out_iput; > @@ -3442,7 +3451,12 @@ shmem_mknod(struct mnt_idmap *idmap, struct inode *dir, > dir->i_size += BOGO_DIRENT_SIZE; > inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); > inode_inc_iversion(dir); > - d_instantiate(dentry, inode); > + > + if (IS_CASEFOLDED(dir)) If you have if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir)) It can be optimized out when CONFIG_UNICODE=n. > + d_add(dentry, inode); > + else > + d_instantiate(dentry, inode); > dget(dentry); /* Extra count - pin the dentry in core */ > return error; > > @@ -3533,7 +3547,10 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir, > inc_nlink(inode); > ihold(inode); /* New dentry reference */ > dget(dentry); /* Extra pinning count for the created dentry */ > - d_instantiate(dentry, inode); > + if (IS_CASEFOLDED(dir)) > + d_add(dentry, inode); > + else > + d_instantiate(dentry, inode); > out: > return ret; > } > @@ -3553,6 +3570,14 @@ static int shmem_unlink(struct inode *dir, struct dentry *dentry) > inode_inc_iversion(dir); > drop_nlink(inode); > dput(dentry); /* Undo the count from "create" - does all the work */ > + > + /* > + * For now, VFS can't deal with case-insensitive negative dentries, so > + * we invalidate them > + */ > + if (IS_CASEFOLDED(dir)) > + d_invalidate(dentry); > + likewise and also below. > return 0; > } > > @@ -3697,7 +3722,10 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir, > dir->i_size += BOGO_DIRENT_SIZE; > inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); > inode_inc_iversion(dir); > - d_instantiate(dentry, inode); > + if (IS_CASEFOLDED(dir)) > + d_add(dentry, inode); > + else > + d_instantiate(dentry, inode); > dget(dentry); > return 0; > > @@ -4050,6 +4078,8 @@ enum shmem_param { > Opt_usrquota_inode_hardlimit, > Opt_grpquota_block_hardlimit, > Opt_grpquota_inode_hardlimit, > + Opt_casefold, > + Opt_strict_encoding, > }; > > static const struct constant_table shmem_param_enums_huge[] = { > @@ -4081,9 +4111,47 @@ const struct fs_parameter_spec shmem_fs_parameters[] = { > fsparam_string("grpquota_block_hardlimit", Opt_grpquota_block_hardlimit), > fsparam_string("grpquota_inode_hardlimit", Opt_grpquota_inode_hardlimit), > #endif > + fsparam_string("casefold", Opt_casefold), > + fsparam_flag ("strict_encoding", Opt_strict_encoding), > {} > }; > > +#if IS_ENABLED(CONFIG_UNICODE) > +static int shmem_parse_opt_casefold(struct fs_context *fc, struct fs_parameter *param) > +{ > + struct shmem_options *ctx = fc->fs_private; > + unsigned int maj = 0, min = 0, rev = 0, version_number; > + char version[10]; > + int ret; > + struct unicode_map *encoding; > + > + if (strncmp(param->string, "utf8-", 5)) > + return invalfc(fc, "Only utf8 encondings are supported"); > + ret = strscpy(version, param->string + 5, sizeof(version)); the extra buffer and the copy seem unnecessary. It won't live past this function anyway. Can you just pass the offseted param->string to utf8_parse_version? > + if (ret < 0) > + return invalfc(fc, "Invalid enconding argument: %s", > + param->string); enconding=>encoding > + > + ret = utf8_parse_version(version, &maj, &min, &rev); > + if (ret) > + return invalfc(fc, "Invalid utf8 version: %s", version); > + version_number = UNICODE_AGE(maj, min, rev); > + encoding = utf8_load(version_number); utf8_load(UNICODE_AGE(maj, min, rev)); and drop version_number. > + if (IS_ERR(encoding)) > + return invalfc(fc, "Invalid utf8 version: %s", version); > + pr_info("tmpfs: Using encoding provided by mount options: %s\n", > + param->string); > + ctx->encoding = encoding; > + > + return 0; > +} > +#else > +static int shmem_parse_opt_casefold(struct fs_context *fc, struct fs_parameter *param) > +{ > + return invalfc(fc, "tmpfs: No kernel support for casefold filesystems\n"); > +} > +#endif > + > static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param) > { > struct shmem_options *ctx = fc->fs_private; > @@ -4242,6 +4310,11 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param) > "Group quota inode hardlimit too large."); > ctx->qlimits.grpquota_ihardlimit = size; > break; > + case Opt_casefold: > + return shmem_parse_opt_casefold(fc, param); > + case Opt_strict_encoding: > + ctx->strict_encoding = true; > + break; > } > return 0; > > @@ -4471,6 +4544,11 @@ static void shmem_put_super(struct super_block *sb) > { > struct shmem_sb_info *sbinfo = SHMEM_SB(sb); > > +#if IS_ENABLED(CONFIG_UNICODE) > + if (sb->s_encoding) > + utf8_unload(sb->s_encoding); > +#endif > + > #ifdef CONFIG_TMPFS_QUOTA > shmem_disable_quotas(sb); > #endif > @@ -4515,6 +4593,16 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc) > } > sb->s_export_op = &shmem_export_ops; > sb->s_flags |= SB_NOSEC | SB_I_VERSION; > + > +#if IS_ENABLED(CONFIG_UNICODE) > + if (ctx->encoding) { > + sb->s_encoding = ctx->encoding; > + generic_set_sb_d_ops(sb); > + if (ctx->strict_encoding) > + sb->s_encoding_flags = SB_ENC_STRICT_MODE_FL; > + } > +#endif > + > #else > sb->s_flags |= SB_NOUSER; > #endif > @@ -4704,11 +4792,28 @@ static const struct inode_operations shmem_inode_operations = { > #endif > }; > > +static struct dentry *shmem_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) > +{ > + if (dentry->d_name.len > NAME_MAX) > + return ERR_PTR(-ENAMETOOLONG); > + > + /* > + * For now, VFS can't deal with case-insensitive negative dentries, so > + * we prevent them from being created > + */ > + if (IS_CASEFOLDED(dir)) > + return NULL; > + > + d_add(dentry, NULL); > + > + return NULL; > +} > + > static const struct inode_operations shmem_dir_inode_operations = { > #ifdef CONFIG_TMPFS > .getattr = shmem_getattr, > .create = shmem_create, > - .lookup = simple_lookup, > + .lookup = shmem_lookup, simple_lookup sets the dentry operations to enable a custom d_delete, but this disables that. Without it, negative dentries will linger after the filesystem is gone. We want to preserve the current behavior both for normal and casefolding directories. You need a new flavor of generic_ci_dentry_ops with that hook for casefolding shmem, as well as ensure &simple_dentry_operations is still set for every dentry in non-casefolding volumes. > .link = shmem_link, > .unlink = shmem_unlink, > .symlink = shmem_symlink, > @@ -4791,6 +4896,8 @@ int shmem_init_fs_context(struct fs_context *fc) > ctx->uid = current_fsuid(); > ctx->gid = current_fsgid(); > > + ctx->encoding = NULL; > + > fc->fs_private = ctx; > fc->ops = &shmem_fs_context_ops; > return 0; -- Gabriel Krisman Bertazi