[PATCH 4/8] Support non-BMP characters on HFS+.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This one is little bit tricky since HFS+ transforms UTF-16 but since it was designed without any attention to non-BMP characters, they are not decomposed or case-folded.

Signed-off-by: Vladimir Serbinenko <phcoder@xxxxxxxxx>
---
 fs/hfsplus/unicode.c |   76 ++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 62 insertions(+), 14 deletions(-)

diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c
index 5b2c8de..161a23b 100644
--- a/fs/hfsplus/unicode.c
+++ b/fs/hfsplus/unicode.c
@@ -97,6 +97,11 @@ int hfsplus_strcmp(const struct hfsplus_unistr *s1,
 #define Hangul_TCount	28
 #define Hangul_NCount	(Hangul_VCount * Hangul_TCount)
 
+#define SURROGATE_MASK	0xfffff800
+#define SURROGATE_PAIR	0x0000d800
+#define SURROGATE_LOW	0x00000400
+#define SURROGATE_BITS	0x000003ff
+
 
 static u16 *hfsplus_compose_lookup(u16 *p, u16 cc)
 {
@@ -189,6 +194,9 @@ int hfsplus_uni2asc(struct super_block *sb,
 				c0 = ':';
 				break;
 			}
+
+			if ((c0 & SURROGATE_MASK) == SURROGATE_PAIR)
+				goto same;
 			res = nls->uni2char(c0, op, len);
 			if (res < 0) {
 				if (res == -ENAMETOOLONG)
@@ -232,7 +240,19 @@ same:
 			cc = c0;
 		}
 done:
-		res = nls->uni2char(cc, op, len);
+		if ((cc & SURROGATE_MASK) == SURROGATE_PAIR
+		    && !(cc & SURROGATE_LOW)
+		    && ustrlen
+		    && (be16_to_cpu(*ip) & SURROGATE_MASK) == SURROGATE_PAIR
+		    && (be16_to_cpu(*ip) & SURROGATE_LOW)) {
+			unicode_t complete;
+			complete = (c0 & SURROGATE_BITS) << 10;
+			complete |= (be16_to_cpu(*ip++) & SURROGATE_BITS);
+			complete += 0x10000;
+			ustrlen--;
+			res = nls->uni2char(complete, op, len);
+		} else
+			res = nls->uni2char(cc, op, len);
 		if (res < 0) {
 			if (res == -ENAMETOOLONG)
 				goto out;
@@ -256,7 +276,7 @@ static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
 			      unicode_t *uc)
 {
 	int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc);
-	if (size <= 0 || *uc > 0xffff) {
+	if (size <= 0) {
 		*uc = '?';
 		size = 1;
 	}
@@ -272,10 +292,13 @@ static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
 }
 
 /* Decomposes a single unicode character. */
-static inline u16 *decompose_unichar(wchar_t uc, int *size)
+static inline u16 *decompose_unichar(unicode_t uc, int *size)
 {
 	int off;
 
+	if (uc >= 0x10000)
+		return NULL;
+
 	off = hfsplus_decompose_table[(uc >> 12) & 0xf];
 	if (off == 0 || off == 0xffff)
 		return NULL;
@@ -316,8 +339,16 @@ int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr,
 			do {
 				ustr->unicode[outlen++] = cpu_to_be16(*dstr++);
 			} while (--dsize > 0);
-		} else
-			ustr->unicode[outlen++] = cpu_to_be16(c);
+		} else {
+			int s;
+			s = unicode_to_utf16s(c, UTF16_BIG_ENDIAN,
+					      ustr->unicode + outlen,
+					      HFSPLUS_MAX_STRLEN - outlen);
+			if (s <= 0)
+				break;
+
+			outlen += s;
+		}
 
 		astr += size;
 		len -= size;
@@ -342,7 +373,7 @@ int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode,
 	int casefold, decompose, size, len;
 	unsigned long hash;
 	unicode_t c;
-	u16 c2;
+	unicode_t c2;
 
 	casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
 	decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
@@ -369,9 +400,17 @@ int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode,
 			} while (--dsize > 0);
 		} else {
 			c2 = c;
-			if (casefold)
+			if (casefold && c2 < 0x10000)
 				c2 = case_fold(c2);
-			if (!casefold || c2)
+			if (c2 >= 0x10000) {
+				int i, s;
+				u16 tmp[2];
+				s = unicode_to_utf16s(c2,
+						      UTF16_HOST_ENDIAN,
+						      tmp, 2);
+				for (i = 0; i < s; i++)
+					hash = partial_name_hash(tmp[i], hash);
+			} else if (!casefold || c2)
 				hash = partial_name_hash(c2, hash);
 		}
 	}
@@ -395,6 +434,7 @@ int hfsplus_compare_dentry(const struct dentry *parent,
 	int dsize1, dsize2, len1, len2;
 	const u16 *dstr1, *dstr2;
 	const char *astr1, *astr2;
+	u16 buf1[2], buf2[2];
 	u16 c1, c2;
 	unicode_t c;
 
@@ -416,9 +456,13 @@ int hfsplus_compare_dentry(const struct dentry *parent,
 			if (decompose)
 				dstr1 = decompose_unichar(c, &dsize1);
 			if (!decompose || !dstr1) {
-				c1 = c;
-				dstr1 = &c1;
-				dsize1 = 1;
+				int s;
+				s = unicode_to_utf16s(c, UTF16_HOST_ENDIAN,
+						      buf1, 2);
+				if (s <= 0)
+					s = 0;
+				dstr1 = buf1;
+				dsize1 = s;
 			}
 		}
 
@@ -430,9 +474,13 @@ int hfsplus_compare_dentry(const struct dentry *parent,
 			if (decompose)
 				dstr2 = decompose_unichar(c, &dsize2);
 			if (!decompose || !dstr2) {
-				c2 = c;
-				dstr2 = &c2;
-				dsize2 = 1;
+				int s;
+				s = unicode_to_utf16s(c, UTF16_HOST_ENDIAN,
+						      buf2, 2);
+				if (s <= 0)
+					s = 0;
+				dstr2 = buf2;
+				dsize2 = s;
 			}
 		}
 
-- 
1.7.10

-- 
Regards
Vladimir 'φ-coder/phcoder' Serbinenko

Attachment: signature.asc
Description: OpenPGP digital signature


[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux