UTF-8 encoding loadable by converting it into a module.
Also, modify the file called unicode-core which will act as a layer for
unicode subsystem. It will load the UTF-8 module and access it's functions
whenever any filesystem that needs unicode is mounted.
Signed-off-by: Shreeya Patel <shreeya.patel@xxxxxxxxxxxxx>
---
Changes in v2
- Remove the duplicate file utf8-core.c
- Make the wrapper functions inline.
- Remove msleep and use try_module_get() and module_put()
for ensuring that module is loaded correctly and also
doesn't get unloaded while in use.
fs/unicode/Kconfig | 11 +-
fs/unicode/Makefile | 5 +-
fs/unicode/unicode-core.c | 229 +++++------------------------------
fs/unicode/utf8mod.c | 246 ++++++++++++++++++++++++++++++++++++++
include/linux/unicode.h | 73 ++++++++---
5 files changed, 346 insertions(+), 218 deletions(-)
create mode 100644 fs/unicode/utf8mod.c
diff --git a/fs/unicode/Kconfig b/fs/unicode/Kconfig
index 2c27b9a5cd6c..2961b0206b4d 100644
--- a/fs/unicode/Kconfig
+++ b/fs/unicode/Kconfig
@@ -8,7 +8,16 @@ config UNICODE
Say Y here to enable UTF-8 NFD normalization and NFD+CF casefolding
support.
+# UTF-8 encoding can be compiled as a module using UNICODE_UTF8 option.
+# Having UTF-8 encoding as a module will avoid carrying large
+# database table present in utf8data.h_shipped into the kernel
+# by being able to load it only when it is required by the filesystem.
+config UNICODE_UTF8
+ tristate "UTF-8 module"
+ depends on UNICODE
+ default m
+
config UNICODE_NORMALIZATION_SELFTEST
tristate "Test UTF-8 normalization support"
- depends on UNICODE
+ depends on UNICODE_UTF8
default n
diff --git a/fs/unicode/Makefile b/fs/unicode/Makefile
index fbf9a629ed0d..9dbb04194b32 100644
--- a/fs/unicode/Makefile
+++ b/fs/unicode/Makefile
@@ -1,11 +1,14 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_UNICODE) += unicode.o
+obj-$(CONFIG_UNICODE_UTF8) += utf8.o
obj-$(CONFIG_UNICODE_NORMALIZATION_SELFTEST) += utf8-selftest.o
-unicode-y := utf8-norm.o unicode-core.o
+unicode-y := unicode-core.o
+utf8-y := utf8mod.o utf8-norm.o
$(obj)/utf8-norm.o: $(obj)/utf8data.h
+$(obj)/utf8mod.o: $(obj)/utf8-norm.o
# In the normal build, the checked-in utf8data.h is just shipped.
#
diff --git a/fs/unicode/unicode-core.c b/fs/unicode/unicode-core.c
index 287a8a48836c..945984a3fe9e 100644
--- a/fs/unicode/unicode-core.c
+++ b/fs/unicode/unicode-core.c
@@ -1,235 +1,60 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/module.h>
#include <linux/kernel.h>
-#include <linux/string.h>
#include <linux/slab.h>
-#include <linux/parser.h>
#include <linux/errno.h>
#include <linux/unicode.h>
-#include <linux/stringhash.h>
-#include "utf8n.h"
-int unicode_validate(const struct unicode_map *um, const struct qstr *str)
-{
- const struct utf8data *data = utf8nfdi(um->version);
-
- if (utf8nlen(data, str->name, str->len) < 0)
- return -1;
- return 0;
-}
-EXPORT_SYMBOL(unicode_validate);
-
-int unicode_strncmp(const struct unicode_map *um,
- const struct qstr *s1, const struct qstr *s2)
-{
- const struct utf8data *data = utf8nfdi(um->version);
- struct utf8cursor cur1, cur2;
- int c1, c2;
-
- if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
- return -EINVAL;
-
- if (utf8ncursor(&cur2, data, s2->name, s2->len) < 0)
- return -EINVAL;
-
- do {
- c1 = utf8byte(&cur1);
- c2 = utf8byte(&cur2);
-
- if (c1 < 0 || c2 < 0)
- return -EINVAL;
- if (c1 != c2)
- return 1;
- } while (c1);
-
- return 0;
-}
-EXPORT_SYMBOL(unicode_strncmp);
-
-int unicode_strncasecmp(const struct unicode_map *um,
- const struct qstr *s1, const struct qstr *s2)
-{
- const struct utf8data *data = utf8nfdicf(um->version);
- struct utf8cursor cur1, cur2;
- int c1, c2;
+struct unicode_ops *utf8_ops;
+EXPORT_SYMBOL(utf8_ops);
- if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
- return -EINVAL;
-
- if (utf8ncursor(&cur2, data, s2->name, s2->len) < 0)
- return -EINVAL;
-
- do {
- c1 = utf8byte(&cur1);
- c2 = utf8byte(&cur2);
-
- if (c1 < 0 || c2 < 0)
- return -EINVAL;
- if (c1 != c2)
- return 1;
- } while (c1);
-
- return 0;
-}
-EXPORT_SYMBOL(unicode_strncasecmp);
-
-/* String cf is expected to be a valid UTF-8 casefolded
- * string.
- */
-int unicode_strncasecmp_folded(const struct unicode_map *um,
- const struct qstr *cf,
- const struct qstr *s1)
+static int unicode_load_module(void)
{
- const struct utf8data *data = utf8nfdicf(um->version);
- struct utf8cursor cur1;
- int c1, c2;
- int i = 0;
-
- if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
- return -EINVAL;
-
- do {
- c1 = utf8byte(&cur1);
- c2 = cf->name[i++];
- if (c1 < 0)
- return -EINVAL;
- if (c1 != c2)
- return 1;
- } while (c1);
-
- return 0;
-}
-EXPORT_SYMBOL(unicode_strncasecmp_folded);
-
-int unicode_casefold(const struct unicode_map *um, const struct qstr *str,
- unsigned char *dest, size_t dlen)
-{
- const struct utf8data *data = utf8nfdicf(um->version);
- struct utf8cursor cur;
- size_t nlen = 0;
-
- if (utf8ncursor(&cur, data, str->name, str->len) < 0)
- return -EINVAL;
+ int ret = request_module("utf8");
- for (nlen = 0; nlen < dlen; nlen++) {
- int c = utf8byte(&cur);
-
- dest[nlen] = c;
- if (!c)
- return nlen;
- if (c == -1)
- break;
+ if (ret) {
+ pr_err("Failed to load UTF-8 module\n");
+ return ret;
}
- return -EINVAL;
-}
-EXPORT_SYMBOL(unicode_casefold);
-
-int unicode_casefold_hash(const struct unicode_map *um, const void *salt,
- struct qstr *str)
-{
- const struct utf8data *data = utf8nfdicf(um->version);
- struct utf8cursor cur;
- int c;
- unsigned long hash = init_name_hash(salt);
- if (utf8ncursor(&cur, data, str->name, str->len) < 0)
- return -EINVAL;
-
- while ((c = utf8byte(&cur))) {
- if (c < 0)
- return -EINVAL;
- hash = partial_name_hash((unsigned char)c, hash);
- }
- str->hash = end_name_hash(hash);
return 0;
}
-EXPORT_SYMBOL(unicode_casefold_hash);
-int unicode_normalize(const struct unicode_map *um, const struct qstr *str,
- unsigned char *dest, size_t dlen)
+struct unicode_map *unicode_load(const char *version)
{
- const struct utf8data *data = utf8nfdi(um->version);
- struct utf8cursor cur;
- ssize_t nlen = 0;
+ int ret = unicode_load_module();
- if (utf8ncursor(&cur, data, str->name, str->len) < 0)
- return -EINVAL;
+ if (ret)
+ return ERR_PTR(ret);
- for (nlen = 0; nlen < dlen; nlen++) {
- int c = utf8byte(&cur);
+ if (utf8_ops && !try_module_get(utf8_ops->owner))
+ return ERR_PTR(-ENODEV);
- dest[nlen] = c;
- if (!c)
- return nlen;
- if (c == -1)
- break;
- }
- return -EINVAL;
+ else
+ return utf8_ops->load(version);