This is used for UDF and some other FS non-BMP support. This series requires my previously sent [PATCH 1/8] Support full unicode in uni2char and char2uni Can resend if needed. Signed-off-by: Vladimir Serbinenko <phcoder@xxxxxxxxx> --- fs/nls/nls_base.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++ fs/nls/nls_utf8.c | 2 +- include/linux/nls.h | 6 +++++ 3 files changed, 70 insertions(+), 1 deletion(-) diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c index 4f6d1ae..0c1ad5b 100644 --- a/fs/nls/nls_base.c +++ b/fs/nls/nls_base.c @@ -171,6 +171,32 @@ int utf8s_to_utf16s(const u8 *s, int inlen, enum utf16_endian endian, } EXPORT_SYMBOL(utf8s_to_utf16s); +int unicode_to_utf16s(unicode_t u, enum utf16_endian endian, + wchar_t *pwcs, int maxout) +{ + u16 *op = pwcs; + + op = pwcs; + + if (u >= PLANE_SIZE) { + if (maxout < 2) + return -1; + u -= PLANE_SIZE; + put_utf16(op++, SURROGATE_PAIR | + ((u >> 10) & SURROGATE_BITS), + endian); + put_utf16(op++, SURROGATE_PAIR | + SURROGATE_LOW | + (u & SURROGATE_BITS), + endian); + return 2; + } else { + put_utf16(op++, u, endian); + return 1; + } +} +EXPORT_SYMBOL(unicode_to_utf16s); + static inline unsigned long get_utf16(unsigned c, enum utf16_endian endian) { switch (endian) { @@ -232,6 +258,43 @@ int utf16s_to_utf8s(const wchar_t *pwcs, int inlen, enum utf16_endian endian, } EXPORT_SYMBOL(utf16s_to_utf8s); +int utf16s_to_unicode(const wchar_t *pwcs, int inlen, enum utf16_endian endian, + unicode_t *uni) +{ + unsigned long u, v; + const wchar_t *pwcs0 = pwcs; + + while (inlen > 0) { + u = get_utf16(*pwcs, endian); + if (!u) + break; + pwcs++; + inlen--; + if ((u & SURROGATE_MASK) == SURROGATE_PAIR) { + if (u & SURROGATE_LOW) { + /* Ignore character and move on */ + continue; + } + if (inlen <= 0) + break; + v = get_utf16(*pwcs, endian); + if ((v & SURROGATE_MASK) != SURROGATE_PAIR || + !(v & SURROGATE_LOW)) { + /* Ignore character and move on */ + continue; + } + u = PLANE_SIZE + ((u & SURROGATE_BITS) << 10) + + (v & SURROGATE_BITS); + pwcs++; + inlen--; + } + *uni = u; + return pwcs - pwcs0; + } + return 0; +} +EXPORT_SYMBOL(utf16s_to_unicode); + int register_nls(struct nls_table * nls) { struct nls_table ** tmp = &tables; diff --git a/fs/nls/nls_utf8.c b/fs/nls/nls_utf8.c index eb6392e..a3b3de0 100644 --- a/fs/nls/nls_utf8.c +++ b/fs/nls/nls_utf8.c @@ -37,7 +37,7 @@ static int char2uni(const unsigned char *rawstring, int boundlen, *uni = 0x003f; /* ? */ return -EINVAL; } - *uni = (wchar_t) u; + *uni = u; return n; } diff --git a/include/linux/nls.h b/include/linux/nls.h index c0292dd..7de1765 100644 --- a/include/linux/nls.h +++ b/include/linux/nls.h @@ -50,12 +50,18 @@ extern struct nls_table *load_nls(char *); extern void unload_nls(struct nls_table *); extern struct nls_table *load_nls_default(void); +#define MAX_UTF16_PER_UNICODE 2 + extern int utf8_to_utf32(const u8 *s, int len, unicode_t *pu); extern int utf32_to_utf8(unicode_t u, u8 *s, int maxlen); extern int utf8s_to_utf16s(const u8 *s, int len, enum utf16_endian endian, wchar_t *pwcs, int maxlen); extern int utf16s_to_utf8s(const wchar_t *pwcs, int len, enum utf16_endian endian, u8 *s, int maxlen); +int unicode_to_utf16s(unicode_t u, enum utf16_endian endian, + wchar_t *pwcs, int maxout); +int utf16s_to_unicode(const wchar_t *pwcs, int inlen, enum utf16_endian endian, + unicode_t *uni); static inline unsigned char nls_tolower(struct nls_table *t, unsigned char c) { -- 1.7.10 -- Regards Vladimir 'φ-coder/phcoder' Serbinenko
Attachment:
signature.asc
Description: OpenPGP digital signature