RE: [PATCH v9 10/13] exfat: add nls operations

"Namjae Jeon" <namjae.jeon@xxxxxxxxxxx> · Fri, 10 Jan 2020 07:35:47 +0900



> What about just filtering two u16 (one surrogate pair)? Existing NLS
> modules do not support code points above U+FFFF so two u16 (one
> surrogate pair) just needs to be converted to one replacement character.
Hi Pali,

You're right.
> 
> diff --git a/fs/exfat/nls.c b/fs/exfat/nls.c
> index 81d75aed9..f626a0a89 100644
> --- a/fs/exfat/nls.c
> +++ b/fs/exfat/nls.c
> @@ -545,7 +545,10 @@ static int __exfat_nls_vfsname_to_utf16s(struct
> super_block *sb,
>  	return unilen;
>  }
> 
> -static int __exfat_nls_uni16s_to_vfsname(struct super_block *sb,
> +#define SURROGATE_PAIR		0x0000d800
> +#define SURROGATE_LOW		0x00000400
> +
> +static int __exfat_nls_utf16s_to_vfsname(struct super_block *sb,
>  		struct exfat_uni_name *p_uniname, unsigned char *p_cstring,
>  		int buflen)
>  {
> @@ -559,7 +562,23 @@ static int __exfat_nls_uni16s_to_vfsname(struct
> super_block *sb,
>  		if (*uniname == '\0')
>  			break;
> 
> -		len = exfat_convert_uni_to_ch(nls, *uniname, buf, NULL);
> +		if ((*uniname & SURROGATE_MASK) != SURROGATE_PAIR) {
> +			len = exfat_convert_uni_to_ch(nls, *uniname, buf,
> NULL);
> +		} else {
> +			/* Process UTF-16 surrogate pair as one character */
> +			if (!(*uniname & SURROGATE_LOW) && i+1 <
> MAX_NAME_LENGTH &&
> +			    (*(uniname+1) & SURROGATE_MASK) == SURROGATE_PAIR
> &&
> +			    (*(uniname+1) & SURROGATE_LOW)) {
> +				uniname++;
> +				i++;
> +			}
> +			/* UTF-16 surrogate pair encodes code points above
> Ux+FFFF.
> +			 * Code points above U+FFFF are not supported by
> kernel NLS
> +			 * framework therefore use replacement character */
> +			len = 1;
> +			buf[0] = '_';
> +		}
> +
>  		if (out_len + len >= buflen)
>  			len = buflen - 1 - out_len;
>  		out_len += len;
> @@ -623,7 +642,7 @@ int exfat_nls_uni16s_to_vfsname(struct super_block *sb,
>  	if (EXFAT_SB(sb)->options.utf8)
>  		return __exfat_nls_utf16s_to_vfsname(sb, uniname, p_cstring,
>  				buflen);
> -	return __exfat_nls_uni16s_to_vfsname(sb, uniname, p_cstring,
> buflen);
> +	return __exfat_nls_utf16s_to_vfsname(sb, uniname, p_cstring,
> buflen);
>  }
> 
>  int exfat_nls_vfsname_to_uni16s(struct super_block *sb,
> 
> I have not tested this code, it is just an idea how to quick & dirty
> solve this problem that NLS framework works with UCS-2 encoding and
> UCS-4/UTF-32 or UTF-16.
I will check and test this code.
Thanks for your suggestion.