Hello Konstantin! On Friday 21 August 2020 16:25:15 Konstantin Komarov wrote: > diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c > new file mode 100644 > index 000000000000..5f1105f1283c > --- /dev/null > +++ b/fs/ntfs3/dir.c > @@ -0,0 +1,529 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * linux/fs/ntfs3/dir.c > + * > + * Copyright (C) 2019-2020 Paragon Software GmbH, All rights reserved. > + * > + * directory handling functions for ntfs-based filesystems > + * > + */ > +#include <linux/blkdev.h> > +#include <linux/buffer_head.h> > +#include <linux/fs.h> > +#include <linux/iversion.h> > +#include <linux/nls.h> > + > +#include "debug.h" > +#include "ntfs.h" > +#include "ntfs_fs.h" > + > +/* > + * Convert little endian Unicode 16 to UTF-8. I guess that by "Unicode 16" you mean UTF-16, right? Anyway, comment is incorrect as function does not support UTF-16 nor UTF-8. This function works only with UCS-2 encoding (not full UTD-16) and converts input buffer to NLS encoding, not UTF-8. Moreover kernel's NLS API does not support full UTF-8 and NLS's UTF-8 encoding is semi broken and limited to just 3-byte sequences. Which means it does not allow to access all UNICODE filenames. So result is that comment for uni_to_x8 function is incorrect. I would suggest to not use NLS API for encoding from/to UTF-8, but rather use utf16s_to_utf8s() and utf8s_to_utf16s() functions. See for example how it is implemented in exfat driver: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/fs/exfat/nls.c Look for functions exfat_utf16_to_nls() and exfat_nls_to_utf16(). Ideally check if you can store character 💩 (Pile of Poo, U+1F4A9, does not fit into 3byte UTF-8 sequence) into filename and if there is correct interoperability between Windows and this new ntfs3 implementation. > + */ > +int uni_to_x8(ntfs_sb_info *sbi, const struct le_str *uni, u8 *buf, int buf_len) > +{ > + const __le16 *ip = uni->name; > + u8 *op = buf; > + struct nls_table *nls = sbi->nls; > + int uni_len = uni->len; > + > + static_assert(sizeof(wchar_t) == sizeof(__le16)); > + > + while (uni_len--) { > + u16 ec; > + int charlen; > + > + if (buf_len < NLS_MAX_CHARSET_SIZE) { > + ntfs_warning( > + sbi->sb, > + "filename was truncated while converting."); > + break; > + } > + > + ec = le16_to_cpu(*ip++); > + charlen = nls->uni2char(ec, op, buf_len); > + > + if (charlen > 0) { > + op += charlen; > + buf_len -= charlen; > + } else { > + *op++ = ':'; > + op = hex_byte_pack(op, ec >> 8); > + op = hex_byte_pack(op, ec); > + buf_len -= 5; > + } > + } > + > + *op = 0; > + return op - buf; > +} > + > +static inline u8 get_digit(u8 d) > +{ > + u8 x = d & 0xf; > + > + return x <= 9 ? ('0' + x) : ('A' + x - 10); > +} > + > +/* > + * Convert input string to unicode > + * max_ulen - maximum possible unicode length > + * endian - unicode endian > + */ > +int x8_to_uni(ntfs_sb_info *sbi, const u8 *name, u32 name_len, > + struct cpu_str *uni, u32 max_ulen, enum utf16_endian endian) > +{ > + int i, ret, clen; > + u32 tail; > + const u8 *str = name; > + const u8 *end = name + name_len; > + u16 *uname = uni->name; > + struct nls_table *nls = sbi->nls; > + int warn = 0; > + > + static_assert(sizeof(wchar_t) == sizeof(u16)); > + > + for (ret = 0; str < end; ret += 1, uname += 1, str += clen) { > + if (ret >= max_ulen) > + return -ENAMETOOLONG; > + tail = end - str; > + > + clen = nls->char2uni(str, tail, uname); > + if (clen > 0) > + continue; > + > + if (!warn) { > + warn = 1; > + ntfs_warning( > + sbi->sb, > + "%s -> unicode failed: '%.*s', pos %d, chars %x %x %x", > + nls->charset, name_len, name, (int)(str - name), > + str[0], tail > 1 ? str[1] : 0, > + tail > 2 ? str[2] : 0); > + } > + > + if (ret + 3 > max_ulen) > + return -ENAMETOOLONG; > + > + uname[0] = '%'; > + uname[1] = get_digit(*str >> 4); > + uname[2] = get_digit(*str >> 0); > + > + uname += 2; > + ret += 2; // +1 will be added in for ( .... ) > + clen = 1; > + } > + > +#ifdef __BIG_ENDIAN > + if (endian == UTF16_LITTLE_ENDIAN) { > + __le16 *uname = (__le16 *)uni->name; > + > + for (i = 0; i < ret; i++, uname++) > + *uname = cpu_to_le16(*name); > + } > +#else > + if (endian == UTF16_BIG_ENDIAN) { > + __be16 *uname = (__be16 *)uni->name; > + > + for (i = 0; i < ret; i++, uname++) > + *uname = cpu_to_be16(*name); > + } > +#endif > + > + uni->len = ret; > + return ret; > +}