The function bisearch() in utf8.c does a pure binary search in double_width. It does not care about the 17 plane offsets which unicode/uniset/uniset prepends. Leaving the plane offsets in the table may cause wrong results. Filter out the plane offsets in update-unicode.sh and regenerate the table. Cc: Torsten Bögershausen <tboegi@xxxxxx> Signed-off-by: Beat Bolli <dev+git@xxxxxxxxx> --- Diff to v1: - add Thorsten's Cc: unicode_width.h | 17 ----------------- update_unicode.sh | 2 +- 2 files changed, 1 insertion(+), 18 deletions(-) diff --git a/unicode_width.h b/unicode_width.h index 73b5fd6..02207be 100644 --- a/unicode_width.h +++ b/unicode_width.h @@ -297,23 +297,6 @@ static const struct interval zero_width[] = { { 0xE0100, 0xE01EF } }; static const struct interval double_width[] = { -{ /* plane */ 0x0, 0x3D }, -{ /* plane */ 0x3D, 0x68 }, -{ /* plane */ 0x68, 0x69 }, -{ /* plane */ 0x69, 0x6A }, -{ /* plane */ 0x0, 0x0 }, -{ /* plane */ 0x0, 0x0 }, -{ /* plane */ 0x0, 0x0 }, -{ /* plane */ 0x0, 0x0 }, -{ /* plane */ 0x0, 0x0 }, -{ /* plane */ 0x0, 0x0 }, -{ /* plane */ 0x0, 0x0 }, -{ /* plane */ 0x0, 0x0 }, -{ /* plane */ 0x0, 0x0 }, -{ /* plane */ 0x0, 0x0 }, -{ /* plane */ 0x0, 0x0 }, -{ /* plane */ 0x0, 0x0 }, -{ /* plane */ 0x0, 0x0 }, { 0x1100, 0x115F }, { 0x231A, 0x231B }, { 0x2329, 0x232A }, diff --git a/update_unicode.sh b/update_unicode.sh index 3c84270..4c1ec8d 100755 --- a/update_unicode.sh +++ b/update_unicode.sh @@ -30,7 +30,7 @@ fi && grep -v plane) }; static const struct interval double_width[] = { - $(uniset/uniset --32 eaw:F,W) + $(uniset/uniset --32 eaw:F,W | grep -v plane) }; EOF ) -- 2.7.2