NLS table for utf8 is broken and cannot be fixed. So instead of broken utf8 nls functions char2uni() and uni2char() use functions utf8s_to_utf16s() and utf16s_to_utf8s() which implements correct conversion between UTF-16 and UTF-8. These functions implements also correct processing of UTF-16 surrogate pairs and therefore after this change jfs driver would be able to correctly handle also file names with 4-byte UTF-8 sequences. When iochatset=utf8 is used then set sbi->nls_tab to NULL and use it for distinguish between the fact if NLS table or native UTF-8 functions should be used. Signed-off-by: Pali Rohár <pali@xxxxxxxxxx> --- fs/jfs/jfs_unicode.c | 17 +++++++++++++++-- fs/jfs/super.c | 24 +++++++++++++++--------- 2 files changed, 30 insertions(+), 11 deletions(-) diff --git a/fs/jfs/jfs_unicode.c b/fs/jfs/jfs_unicode.c index 2db923872bf1..0b0b80063a98 100644 --- a/fs/jfs/jfs_unicode.c +++ b/fs/jfs/jfs_unicode.c @@ -46,6 +46,9 @@ int jfs_strfromUCS_le(char *to, int maxlen, const __le16 * from, } } } + } else { + outlen = utf16s_to_utf8s((const wchar_t *)from, len, + UTF16_LITTLE_ENDIAN, to, maxlen-1); } to[outlen] = 0; return outlen; @@ -61,6 +64,7 @@ static int jfs_strtoUCS(wchar_t * to, const unsigned char *from, int len, struct nls_table *codepage) { int charlen; + int outlen; int i; if (codepage) { @@ -75,10 +79,19 @@ static int jfs_strtoUCS(wchar_t * to, const unsigned char *from, int len, return charlen; } } + outlen = i; + } else { + outlen = utf8s_to_utf16s(from, len, UTF16_LITTLE_ENDIAN, + to, len); + if (outlen < 1) { + jfs_err("jfs_strtoUCS: utf8s_to_utf16s returned %d.", + outlen); + return outlen; + } } - to[i] = 0; - return i; + to[outlen] = 0; + return outlen; } /* diff --git a/fs/jfs/super.c b/fs/jfs/super.c index a2bb3d5d3f69..f26460147b62 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -261,16 +261,20 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, /* Don't do anything ;-) */ break; case Opt_iocharset: - if (nls_map && nls_map != (void *) -1) + if (nls_map && nls_map != (void *) -1) { unload_nls(nls_map); - /* compatibility alias none means ISO-8859-1 */ - if (strcmp(args[0].from, "none") == 0) - nls_map = load_nls("iso8859-1"); - else - nls_map = load_nls(args[0].from); - if (!nls_map) { - pr_err("JFS: charset not found\n"); - goto cleanup; + nls_map = NULL; + } + if (strcmp(args[0].from, "utf8") != 0) { + /* compatibility alias none means ISO-8859-1 */ + if (strcmp(args[0].from, "none") == 0) + nls_map = load_nls("iso8859-1"); + else + nls_map = load_nls(args[0].from); + if (!nls_map) { + pr_err("JFS: charset not found\n"); + goto cleanup; + } } break; case Opt_resize: @@ -713,6 +717,8 @@ static int jfs_show_options(struct seq_file *seq, struct dentry *root) seq_printf(seq, ",discard=%u", sbi->minblks_trim); if (sbi->nls_tab) seq_printf(seq, ",iocharset=%s", sbi->nls_tab->charset); + else + seq_puts(seq, ",iocharset=utf8"); if (sbi->flag & JFS_ERR_CONTINUE) seq_printf(seq, ",errors=continue"); if (sbi->flag & JFS_ERR_PANIC) -- 2.20.1