This one is little bit tricky since HFS+ transforms UTF-16 but since it was designed without any attention to non-BMP characters, they are not decomposed or case-folded. Signed-off-by: Vladimir Serbinenko <phcoder@xxxxxxxxx> --- fs/hfsplus/unicode.c | 76 ++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 62 insertions(+), 14 deletions(-) diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c index 5b2c8de..161a23b 100644 --- a/fs/hfsplus/unicode.c +++ b/fs/hfsplus/unicode.c @@ -97,6 +97,11 @@ int hfsplus_strcmp(const struct hfsplus_unistr *s1, #define Hangul_TCount 28 #define Hangul_NCount (Hangul_VCount * Hangul_TCount) +#define SURROGATE_MASK 0xfffff800 +#define SURROGATE_PAIR 0x0000d800 +#define SURROGATE_LOW 0x00000400 +#define SURROGATE_BITS 0x000003ff + static u16 *hfsplus_compose_lookup(u16 *p, u16 cc) { @@ -189,6 +194,9 @@ int hfsplus_uni2asc(struct super_block *sb, c0 = ':'; break; } + + if ((c0 & SURROGATE_MASK) == SURROGATE_PAIR) + goto same; res = nls->uni2char(c0, op, len); if (res < 0) { if (res == -ENAMETOOLONG) @@ -232,7 +240,19 @@ same: cc = c0; } done: - res = nls->uni2char(cc, op, len); + if ((cc & SURROGATE_MASK) == SURROGATE_PAIR + && !(cc & SURROGATE_LOW) + && ustrlen + && (be16_to_cpu(*ip) & SURROGATE_MASK) == SURROGATE_PAIR + && (be16_to_cpu(*ip) & SURROGATE_LOW)) { + unicode_t complete; + complete = (c0 & SURROGATE_BITS) << 10; + complete |= (be16_to_cpu(*ip++) & SURROGATE_BITS); + complete += 0x10000; + ustrlen--; + res = nls->uni2char(complete, op, len); + } else + res = nls->uni2char(cc, op, len); if (res < 0) { if (res == -ENAMETOOLONG) goto out; @@ -256,7 +276,7 @@ static inline int asc2unichar(struct super_block *sb, const char *astr, int len, unicode_t *uc) { int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc); - if (size <= 0 || *uc > 0xffff) { + if (size <= 0) { *uc = '?'; size = 1; } @@ -272,10 +292,13 @@ static inline int asc2unichar(struct super_block *sb, const char *astr, int len, } /* Decomposes a single unicode character. */ -static inline u16 *decompose_unichar(wchar_t uc, int *size) +static inline u16 *decompose_unichar(unicode_t uc, int *size) { int off; + if (uc >= 0x10000) + return NULL; + off = hfsplus_decompose_table[(uc >> 12) & 0xf]; if (off == 0 || off == 0xffff) return NULL; @@ -316,8 +339,16 @@ int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr, do { ustr->unicode[outlen++] = cpu_to_be16(*dstr++); } while (--dsize > 0); - } else - ustr->unicode[outlen++] = cpu_to_be16(c); + } else { + int s; + s = unicode_to_utf16s(c, UTF16_BIG_ENDIAN, + ustr->unicode + outlen, + HFSPLUS_MAX_STRLEN - outlen); + if (s <= 0) + break; + + outlen += s; + } astr += size; len -= size; @@ -342,7 +373,7 @@ int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode, int casefold, decompose, size, len; unsigned long hash; unicode_t c; - u16 c2; + unicode_t c2; casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags); decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); @@ -369,9 +400,17 @@ int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode, } while (--dsize > 0); } else { c2 = c; - if (casefold) + if (casefold && c2 < 0x10000) c2 = case_fold(c2); - if (!casefold || c2) + if (c2 >= 0x10000) { + int i, s; + u16 tmp[2]; + s = unicode_to_utf16s(c2, + UTF16_HOST_ENDIAN, + tmp, 2); + for (i = 0; i < s; i++) + hash = partial_name_hash(tmp[i], hash); + } else if (!casefold || c2) hash = partial_name_hash(c2, hash); } } @@ -395,6 +434,7 @@ int hfsplus_compare_dentry(const struct dentry *parent, int dsize1, dsize2, len1, len2; const u16 *dstr1, *dstr2; const char *astr1, *astr2; + u16 buf1[2], buf2[2]; u16 c1, c2; unicode_t c; @@ -416,9 +456,13 @@ int hfsplus_compare_dentry(const struct dentry *parent, if (decompose) dstr1 = decompose_unichar(c, &dsize1); if (!decompose || !dstr1) { - c1 = c; - dstr1 = &c1; - dsize1 = 1; + int s; + s = unicode_to_utf16s(c, UTF16_HOST_ENDIAN, + buf1, 2); + if (s <= 0) + s = 0; + dstr1 = buf1; + dsize1 = s; } } @@ -430,9 +474,13 @@ int hfsplus_compare_dentry(const struct dentry *parent, if (decompose) dstr2 = decompose_unichar(c, &dsize2); if (!decompose || !dstr2) { - c2 = c; - dstr2 = &c2; - dsize2 = 1; + int s; + s = unicode_to_utf16s(c, UTF16_HOST_ENDIAN, + buf2, 2); + if (s <= 0) + s = 0; + dstr2 = buf2; + dsize2 = s; } } -- 1.7.10 -- Regards Vladimir 'φ-coder/phcoder' Serbinenko
Attachment:
signature.asc
Description: OpenPGP digital signature