Re: [PATCH v2] unicode: Expose available encodings in sysfs

Anirudh Rayabharam <anirudh@xxxxxxxxxxxxx> · Tue, 14 Apr 2020 15:03:13 +0530

On Mon, Apr 13, 2020 at 12:53:52PM -0400, Gabriel Krisman Bertazi wrote:
> A filesystem configuration utility has no way to detect which filename
> encodings are supported by the running kernel.  This means, for
> instance, mkfs has no way to tell if the generated filesystem will be
> mountable in the current kernel or not.  Also, users have no easy way to
> know if they can update the encoding in their filesystems and still have
> something functional in the end.
> 
> This exposes details of the encodings available in the unicode
> subsystem, to fill that gap.
> 
> Cc: Theodore Ts'o <tytso@xxxxxxx>
> Cc: Jaegeuk Kim <jaegeuk@xxxxxxxxxx>
> Signed-off-by: Gabriel Krisman Bertazi <krisman@xxxxxxxxxxxxx>
> 
> ---
> Changes since v1:
>   - Make init functions static. (lkp)
> 
>  Documentation/ABI/testing/sysfs-fs-unicode | 13 +++++
>  fs/unicode/utf8-core.c                     | 64 ++++++++++++++++++++++
>  fs/unicode/utf8-norm.c                     | 18 ++++++
>  fs/unicode/utf8n.h                         |  5 ++
>  4 files changed, 100 insertions(+)
>  create mode 100644 Documentation/ABI/testing/sysfs-fs-unicode
> 
> diff --git a/Documentation/ABI/testing/sysfs-fs-unicode b/Documentation/ABI/testing/sysfs-fs-unicode
> new file mode 100644
> index 000000000000..15c63367bb8e
> --- /dev/null
> +++ b/Documentation/ABI/testing/sysfs-fs-unicode
> @@ -0,0 +1,13 @@
> +What:		/sys/fs/unicode/latest
> +Date:		April 2020
> +Contact:	Gabriel Krisman Bertazi <krisman@xxxxxxxxxxxxx>
> +Description:
> +		The latest version of the Unicode Standard supported by
> +		this kernel
> +
> +What:		/sys/fs/unicode/encodings
> +Date:		April 2020
> +Contact:	Gabriel Krisman Bertazi <krisman@xxxxxxxxxxxxx>
> +Description:
> +		List of encodings and corresponding versions supported
> +		by this kernel
> diff --git a/fs/unicode/utf8-core.c b/fs/unicode/utf8-core.c
> index 2a878b739115..b48e13e823a5 100644
> --- a/fs/unicode/utf8-core.c
> +++ b/fs/unicode/utf8-core.c
> @@ -6,6 +6,7 @@
>  #include <linux/parser.h>
>  #include <linux/errno.h>
>  #include <linux/unicode.h>
> +#include <linux/fs.h>
>  
>  #include "utf8n.h"
>  
> @@ -212,4 +213,67 @@ void utf8_unload(struct unicode_map *um)
>  }
>  EXPORT_SYMBOL(utf8_unload);
>  
> +static ssize_t latest_show(struct kobject *kobj,
> +			   struct kobj_attribute *attr, char *buf)
> +{
> +	int l = utf8version_latest();
> +
> +	return snprintf(buf, PAGE_SIZE, "UTF-8 %d.%d.%d\n", UNICODE_AGE_MAJ(l),
> +			UNICODE_AGE_MIN(l), UNICODE_AGE_REV(l));
> +
> +}
> +static ssize_t encodings_show(struct kobject *kobj,
> +			      struct kobj_attribute *attr, char *buf)
> +{
> +	int n;
> +
> +	n = snprintf(buf, PAGE_SIZE, "UTF-8:");
> +	n += utf8version_list(buf + n, PAGE_SIZE - n);
> +	n += snprintf(buf+n, PAGE_SIZE-n, "\n");

Spaces before and after the '+' and '-' operators?

	n += snprintf(buf + n, PAGE_SIZE - n, "\n");

Thanks,
Anirudh

> +
> +	return n;
> +}
> +
> +#define UCD_ATTR(x) \
> +	static struct kobj_attribute x ## _attr = __ATTR_RO(x)
> +
> +UCD_ATTR(latest);
> +UCD_ATTR(encodings);
> +
> +static struct attribute *ucd_attrs[] = {
> +	&latest_attr.attr,
> +	&encodings_attr.attr,
> +	NULL,
> +};
> +static const struct attribute_group ucd_attr_group = {
> +	.attrs = ucd_attrs,
> +};
> +static struct kobject *ucd_root;
> +
> +static int __init ucd_init(void)
> +{
> +	int ret;
> +
> +	ucd_root = kobject_create_and_add("unicode", fs_kobj);
> +	if (!ucd_root)
> +		return -ENOMEM;
> +
> +	ret = sysfs_create_group(ucd_root, &ucd_attr_group);
> +	if (ret) {
> +		kobject_put(ucd_root);
> +		ucd_root = NULL;
> +		return ret;
> +	}
> +
> +	return 0;
> +}
> +
> +static void __exit ucd_exit(void)
> +{
> +	kobject_put(ucd_root);
> +}
> +
> +module_init(ucd_init);
> +module_exit(ucd_exit)
> +
>  MODULE_LICENSE("GPL v2");
> diff --git a/fs/unicode/utf8-norm.c b/fs/unicode/utf8-norm.c
> index 1d2d2e5b906a..f9ebba89a138 100644
> --- a/fs/unicode/utf8-norm.c
> +++ b/fs/unicode/utf8-norm.c
> @@ -35,6 +35,24 @@ int utf8version_latest(void)
>  }
>  EXPORT_SYMBOL(utf8version_latest);
>  
> +int utf8version_list(char *buf, int len)
> +{
> +	int i = ARRAY_SIZE(utf8agetab) - 1;
> +	int ret = 0;
> +
> +	/*
> +	 * Print most relevant (latest) first.  No filesystem uses
> +	 * unicode <= 12.0.0, so don't expose them to userspace.
> +	 */
> +	for (; utf8agetab[i] >= UNICODE_AGE(12, 0, 0); i--) {
> +		ret += snprintf(buf+ret, len-ret, " %d.%d.%d",
> +				UNICODE_AGE_MAJ(utf8agetab[i]),
> +				UNICODE_AGE_MIN(utf8agetab[i]),
> +				UNICODE_AGE_REV(utf8agetab[i]));
> +	}
> +	return ret;
> +}
> +
>  /*
>   * UTF-8 valid ranges.
>   *
> diff --git a/fs/unicode/utf8n.h b/fs/unicode/utf8n.h
> index 0acd530c2c79..5dea2c4af1f3 100644
> --- a/fs/unicode/utf8n.h
> +++ b/fs/unicode/utf8n.h
> @@ -21,9 +21,14 @@
>  	 ((unsigned int)(MIN) << UNICODE_MIN_SHIFT) |	\
>  	 ((unsigned int)(REV)))
>  
> +#define UNICODE_AGE_MAJ(x) ((x) >> UNICODE_MAJ_SHIFT & 0xff)
> +#define UNICODE_AGE_MIN(x) ((x) >> UNICODE_MIN_SHIFT & 0xff)
> +#define UNICODE_AGE_REV(x) ((x) & 0xff)
> +
>  /* Highest unicode version supported by the data tables. */
>  extern int utf8version_is_supported(u8 maj, u8 min, u8 rev);
>  extern int utf8version_latest(void);
> +extern int utf8version_list(char *buf, int len);
>  
>  /*
>   * Look for the correct const struct utf8data for a unicode version.
> -- 
> 2.26.0
>