[PATCH 4/8] Support non-BMP characters on HFS+.

From: Vladimir 'Ï-coder/phcoder' Serbinenko
Date: Tue May 15 2012 - 19:07:08 EST


This one is little bit tricky since HFS+ transforms UTF-16 but since it was designed without any attention to non-BMP characters, they are not decomposed or case-folded.

Signed-off-by: Vladimir Serbinenko <phcoder@xxxxxxxxx>
---
fs/hfsplus/unicode.c | 76 ++++++++++++++++++++++++++++++++++++++++----------
1 file changed, 62 insertions(+), 14 deletions(-)

diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c
index 5b2c8de..161a23b 100644
--- a/fs/hfsplus/unicode.c
+++ b/fs/hfsplus/unicode.c
@@ -97,6 +97,11 @@ int hfsplus_strcmp(const struct hfsplus_unistr *s1,
#define Hangul_TCount 28
#define Hangul_NCount (Hangul_VCount * Hangul_TCount)

+#define SURROGATE_MASK 0xfffff800
+#define SURROGATE_PAIR 0x0000d800
+#define SURROGATE_LOW 0x00000400
+#define SURROGATE_BITS 0x000003ff
+

static u16 *hfsplus_compose_lookup(u16 *p, u16 cc)
{
@@ -189,6 +194,9 @@ int hfsplus_uni2asc(struct super_block *sb,
c0 = ':';
break;
}
+
+ if ((c0 & SURROGATE_MASK) == SURROGATE_PAIR)
+ goto same;
res = nls->uni2char(c0, op, len);
if (res < 0) {
if (res == -ENAMETOOLONG)
@@ -232,7 +240,19 @@ same:
cc = c0;
}
done:
- res = nls->uni2char(cc, op, len);
+ if ((cc & SURROGATE_MASK) == SURROGATE_PAIR
+ && !(cc & SURROGATE_LOW)
+ && ustrlen
+ && (be16_to_cpu(*ip) & SURROGATE_MASK) == SURROGATE_PAIR
+ && (be16_to_cpu(*ip) & SURROGATE_LOW)) {
+ unicode_t complete;
+ complete = (c0 & SURROGATE_BITS) << 10;
+ complete |= (be16_to_cpu(*ip++) & SURROGATE_BITS);
+ complete += 0x10000;
+ ustrlen--;
+ res = nls->uni2char(complete, op, len);
+ } else
+ res = nls->uni2char(cc, op, len);
if (res < 0) {
if (res == -ENAMETOOLONG)
goto out;
@@ -256,7 +276,7 @@ static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
unicode_t *uc)
{
int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc);
- if (size <= 0 || *uc > 0xffff) {
+ if (size <= 0) {
*uc = '?';
size = 1;
}
@@ -272,10 +292,13 @@ static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
}

/* Decomposes a single unicode character. */
-static inline u16 *decompose_unichar(wchar_t uc, int *size)
+static inline u16 *decompose_unichar(unicode_t uc, int *size)
{
int off;

+ if (uc >= 0x10000)
+ return NULL;
+
off = hfsplus_decompose_table[(uc >> 12) & 0xf];
if (off == 0 || off == 0xffff)
return NULL;
@@ -316,8 +339,16 @@ int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr,
do {
ustr->unicode[outlen++] = cpu_to_be16(*dstr++);
} while (--dsize > 0);
- } else
- ustr->unicode[outlen++] = cpu_to_be16(c);
+ } else {
+ int s;
+ s = unicode_to_utf16s(c, UTF16_BIG_ENDIAN,
+ ustr->unicode + outlen,
+ HFSPLUS_MAX_STRLEN - outlen);
+ if (s <= 0)
+ break;
+
+ outlen += s;
+ }

astr += size;
len -= size;
@@ -342,7 +373,7 @@ int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode,
int casefold, decompose, size, len;
unsigned long hash;
unicode_t c;
- u16 c2;
+ unicode_t c2;

casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
@@ -369,9 +400,17 @@ int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode,
} while (--dsize > 0);
} else {
c2 = c;
- if (casefold)
+ if (casefold && c2 < 0x10000)
c2 = case_fold(c2);
- if (!casefold || c2)
+ if (c2 >= 0x10000) {
+ int i, s;
+ u16 tmp[2];
+ s = unicode_to_utf16s(c2,
+ UTF16_HOST_ENDIAN,
+ tmp, 2);
+ for (i = 0; i < s; i++)
+ hash = partial_name_hash(tmp[i], hash);
+ } else if (!casefold || c2)
hash = partial_name_hash(c2, hash);
}
}
@@ -395,6 +434,7 @@ int hfsplus_compare_dentry(const struct dentry *parent,
int dsize1, dsize2, len1, len2;
const u16 *dstr1, *dstr2;
const char *astr1, *astr2;
+ u16 buf1[2], buf2[2];
u16 c1, c2;
unicode_t c;

@@ -416,9 +456,13 @@ int hfsplus_compare_dentry(const struct dentry *parent,
if (decompose)
dstr1 = decompose_unichar(c, &dsize1);
if (!decompose || !dstr1) {
- c1 = c;
- dstr1 = &c1;
- dsize1 = 1;
+ int s;
+ s = unicode_to_utf16s(c, UTF16_HOST_ENDIAN,
+ buf1, 2);
+ if (s <= 0)
+ s = 0;
+ dstr1 = buf1;
+ dsize1 = s;
}
}

@@ -430,9 +474,13 @@ int hfsplus_compare_dentry(const struct dentry *parent,
if (decompose)
dstr2 = decompose_unichar(c, &dsize2);
if (!decompose || !dstr2) {
- c2 = c;
- dstr2 = &c2;
- dsize2 = 1;
+ int s;
+ s = unicode_to_utf16s(c, UTF16_HOST_ENDIAN,
+ buf2, 2);
+ if (s <= 0)
+ s = 0;
+ dstr2 = buf2;
+ dsize2 = s;
}
}

--
1.7.10

--
Regards
Vladimir 'Ï-coder/phcoder' Serbinenko

Attachment: signature.asc
Description: OpenPGP digital signature