libs/unicode string folding

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

This patch adds string folding functions to libunicode. Since this
adds a new 
file 'fold.c' you'll need to rerun ./configure.

Cheers,
Jon

License: X11

Changelog:

  Jon Griffiths <jon_p_griffiths@yahoo.com>
  +include/wine/unicode.h libs/unicode/fold.c
    libs/unicode/Makefile.in libs/unicode/wine_unicode.def
    Add A/W string folding functions


__________________________________
Do you Yahoo!?
The New Yahoo! Shopping - with improved product search
http://shopping.yahoo.com
diff -u wine/libs/unicode/Makefile.in wine-develop/libs/unicode/Makefile.in
--- wine/libs/unicode/Makefile.in	2003-07-04 18:48:27.000000000 +0100
+++ wine-develop/libs/unicode/Makefile.in	2003-10-09 22:35:43.000000000 +0100
@@ -73,6 +73,7 @@
 	collation.c \
 	compose.c \
 	cptable.c \
+	fold.c \
 	mbtowc.c \
 	sortkey.c \
 	string.c \
diff -u wine/libs/unicode/wine_unicode.def wine-develop/libs/unicode/wine_unicode.def
--- wine/libs/unicode/wine_unicode.def	2003-07-04 18:48:32.000000000 +0100
+++ wine-develop/libs/unicode/wine_unicode.def	2003-10-09 22:39:53.000000000 +0100
@@ -16,6 +16,8 @@
     wine_cp_get_table
     wine_cp_mbstowcs
     wine_cp_wcstombs
+    wine_fold_stringA
+    wine_fold_stringW
     wine_get_sortkey
     wine_utf8_mbstowcs
     wine_utf8_wcstombs
--- wine/libs/unicode/fold.c	1970-01-01 01:00:00.000000000 +0100
+++ wine-develop/libs/unicode/fold.c	2003-10-13 14:00:43.000000000 +0100
@@ -0,0 +1,374 @@
+/*
+ * String folding
+ *
+ * Copyright 2003 Jon Griffiths
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include <string.h>
+
+#include "wine/unicode.h"
+
+static inline WCHAR tounicodedigitA( unsigned char c )
+{
+    extern const WCHAR wine_digitmap[];
+    return c + wine_digitmap[wine_digitmap[0] + (c & 0xff)];
+}
+
+static const unsigned char ligaturesA[32] =
+{
+    0x8C,'O','E',0x00,
+    0x9C,'o','e',0x00,
+    0xC6,'A','E',0x00,
+    0xDE,'T','H',0x00,
+    0xDF,'s','s',0x00,
+    0xE6,'a','e',0x00,
+    0xFE,'t','h',0x00,
+    0,  0,  0,  0
+};
+
+static inline int getligaturelenA( unsigned char c )
+{
+    const unsigned char *lig = ligaturesA;
+
+    if (c >= *lig)
+    {
+        while (*lig && c > *lig)
+            lig += 4;
+        if (*lig == c)
+            return 1;
+    }
+    return 0;
+}
+
+static inline const char* getligatureA( unsigned char c )
+{
+    const unsigned char *lig = ligaturesA;
+
+    if (c >= *lig)
+    {
+        while (*lig && c > *lig)
+            lig += 4;
+        if (*lig == c)
+            return lig + 1;
+    }
+    return NULL;
+}
+
+static const unsigned char compositesA[250] =
+{
+    /* Index (for chars >= 0x80) */
+    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+    0x00,0x00,0x02,0x00,0x00,0x00,0x04,0x00,
+    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+    0x00,0x00,0x06,0x00,0x00,0x00,0x08,0x0a,
+    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+    0x0c,0x0e,0x10,0x12,0x14,0x16,0x00,0x18,
+    0x1a,0x1c,0x1e,0x20,0x22,0x24,0x26,0x28,
+    0x00,0x2a,0x2c,0x2e,0x30,0x32,0x34,0x00,
+    0x36,0x38,0x3a,0x3c,0x3e,0x40,0x00,0x00,
+    0x42,0x44,0x46,0x48,0x4a,0x4c,0x00,0x4e,
+    0x50,0x52,0x54,0x56,0x58,0x5a,0x5c,0x5e,
+    0x00,0x60,0x62,0x64,0x66,0x68,0x6a,0x00,
+    0x6c,0x6e,0x70,0x72,0x74,0x76,0x00,0x78,
+
+    /* 2 char composite pairs */
+    0x00,0x00,
+    0x53,0x3f,0x5a,0x3f,0x73,0x3f,0x7a,0x3f,
+    0x59,0xa8,0x41,0x60,0x41,0xb4,0x41,0x5e,
+    0x41,0x7e,0x41,0xa8,0x41,0xb0,0x43,0xb8,
+    0x45,0x60,0x45,0xb4,0x45,0x5e,0x45,0xa8,
+    0x49,0x60,0x49,0xb4,0x49,0x5e,0x49,0xa8,
+    0x4e,0x7e,0x4f,0x60,0x4f,0xb4,0x4f,0x5e,
+    0x4f,0x7e,0x4f,0xa8,0x4f,0x3f,0x55,0x60,
+    0x55,0xb4,0x55,0x5e,0x55,0xa8,0x59,0xb4,
+    0x61,0x60,0x61,0xb4,0x61,0x5e,0x61,0x7e,
+    0x61,0xa8,0x61,0xb0,0x63,0xb8,0x65,0x60,
+    0x65,0xb4,0x65,0x5e,0x65,0xa8,0x69,0x60,
+    0x69,0xb4,0x69,0x5e,0x69,0xa8,0x6e,0x7e,
+    0x6f,0x60,0x6f,0xb4,0x6f,0x5e,0x6f,0x7e,
+    0x6f,0xa8,0x6f,0x3f,0x75,0x60,0x75,0xb4,
+    0x75,0x5e,0x75,0xa8,0x79,0xb4,0x79,0xa8
+};
+
+static inline int getcompositelenA( unsigned char c )
+{
+    if (c > 0x89 && compositesA[c & 0x7f])
+      return 1; /* All Ascii composites are 2 chars */
+    return 0;
+}
+
+static inline const unsigned char* getcompositeA( unsigned char c )
+{
+    if (c < 128)
+        c = 0;
+    return compositesA + 128 + compositesA[c & 0x7f];
+}
+
+/* fold an ascii string */
+int wine_fold_stringA(unsigned int flags, const char *src, int srclen,
+                      char *dst, int dstlen)
+{
+    char *dstbase = dst;
+    const char *expand;
+    int i;
+
+    if (srclen == -1)
+        srclen = strlen(src) + 1; /* Include terminating NUL in count */
+
+    if (!dstlen)
+    {
+        /* Calculate the required size for dst */
+        dstlen = srclen;
+
+        if (flags & MAP_EXPAND_LIGATURES)
+        {
+            while (srclen--)
+            {
+                dstlen += getligaturelenA(*src);
+                src++;
+            }
+        }
+        else if (flags & MAP_COMPOSITE)
+        {
+            while (srclen--)
+            {
+                dstlen += getcompositelenA(*src);
+                src++;
+            }
+        }
+        return dstlen;
+    }
+
+    if (srclen > dstlen)
+        return 0;
+
+    dstlen -= srclen;
+
+    /* Actually perform the mapping(s) specified */
+    for (i = 0; i < srclen; i++)
+    {
+        char ch = *src;
+
+        if (flags & MAP_EXPAND_LIGATURES)
+        {
+            if ((expand = getligatureA(ch)))
+            {
+                if (!dstlen--)
+                    return 0;
+                *dst++ = expand[0];
+                ch = expand[1];
+            }
+        }
+        else if (flags & MAP_COMPOSITE)
+        {
+            expand = getcompositeA(*src);
+            if (*expand)
+            {
+                if (!dstlen--)
+                    return 0;
+                *dst++ = expand[0];
+                ch = expand[1];
+            }
+        }
+        if (flags & MAP_FOLDDIGITS)
+            ch = tounicodedigitA(ch);
+
+        *dst++ = ch;
+        src++;
+    }
+    return dst - dstbase;
+}
+
+static inline WCHAR tounicodedigitW( WCHAR ch )
+{
+    extern const WCHAR wine_digitmap[];
+    return ch + wine_digitmap[wine_digitmap[ch >> 8] + (ch & 0xff)];
+}
+
+static inline WCHAR tounicodenativeW( WCHAR ch )
+{
+    extern const WCHAR wine_compatmap[];
+    return ch + wine_compatmap[wine_compatmap[ch >> 8] + (ch & 0xff)];
+}
+
+static const WCHAR wine_ligaturesW[] =
+{
+    0x00c6, 0x00de, 0x00df, 0x00e6, 0x00fe, 0x0132, 0x0133, 0x0152,
+    0x0153, 0x01c4, 0x01c5, 0x01c6, 0x01c7, 0x01c8, 0x01c9, 0x01ca,
+    0x01cb, 0x01cc, 0x01e2, 0x01e3, 0x01f1, 0x01f2, 0x01f3, 0x01fc,
+    0x01fd, 0x05f0, 0x05f1, 0x05f2, 0xfb00, 0xfb01, 0xfb02, 0xfb03,
+    0xfb04, 0xfb05, 0xfb06, '\0'
+};
+
+/* Unicode expanded ligatures */
+static const WCHAR wine_expanded_ligaturesW[] =
+{
+    'A','E','\0',1,
+    'T','H','\0',1,
+    's','s','\0',1,
+    'a','e','\0',1,
+    't','h','\0',1,
+    'I','J','\0',1,
+    'i','j','\0',1,
+    'O','E','\0',1,
+    'o','e','\0',1,
+    'D',0x017d,'\0',1,
+    'D',0x017e,'\0',1,
+    'd',0x017e,'\0',1,
+    'L','J','\0',1,
+    'L','j','\0',1,
+    'l','j','\0',1,
+    'N','J','\0',1,
+    'N','j','\0',1,
+    'n','j','\0',1,
+    0x0100,0x0112,'\0',1,
+    0x0101,0x0113,'\0',1,
+    'D','Z','\0',1,
+    'D','z','\0',1,
+    'd','z','\0',1,
+    0x00c1,0x00c9,'\0',1,
+    0x00e1,0x00e9,'\0',1,
+    0x05d5,0x05d5,'\0',1,
+    0x05d5,0x05d9,'\0',1,
+    0x05d9,0x05d9,'\0',1,
+    'f','f','\0',1,
+    'f','i','\0',1,
+    'f','l','\0',1,
+    'f','f','i',2,
+    'f','f','l',2,
+    0x017f,'t','\0',1,
+    's','t','\0',1
+};
+
+static inline int getligaturelenW( WCHAR wc )
+{
+    int low = 0, high = sizeof(wine_ligaturesW)/sizeof(WCHAR) -1;
+    while (low <= high)
+    {
+        int pos = (low + high) / 2;
+        if (wine_ligaturesW[pos] < wc)
+            low = pos + 1;
+        else if (wine_ligaturesW[pos] > wc)
+            high = pos - 1;
+        else
+            return wine_expanded_ligaturesW[(pos << 2) + 3];
+    }
+    return 0;
+}
+
+static inline const WCHAR* getligatureW( WCHAR wc )
+{
+    static const WCHAR empty_ligatureW[] = { '\0','\0','\0', 0 };
+    int low = 0, high = sizeof(wine_ligaturesW)/sizeof(WCHAR) -1;
+    while (low <= high)
+    {
+        int pos = (low + high) / 2;
+        if (wine_ligaturesW[pos] < wc)
+            low = pos + 1;
+        else if (wine_ligaturesW[pos] > wc)
+            high = pos - 1;
+        else
+            return &wine_expanded_ligaturesW[pos << 2];
+    }
+    return empty_ligatureW;
+}
+
+/* fold a unicode string */
+int wine_fold_stringW(unsigned int flags, const WCHAR *src, int srclen,
+                      WCHAR *dst, int dstlen)
+{
+    WCHAR *dstbase = dst;
+    const WCHAR *expand;
+    int i;
+
+    if (srclen == -1)
+        srclen = strlenW(src) + 1; /* Include terminating NUL in count */
+
+    if (!dstlen)
+    {
+        /* Calculate the required size for dst */
+        dstlen = srclen;
+
+        if (flags & MAP_EXPAND_LIGATURES)
+        {
+            while (srclen--)
+            {
+                dstlen += getligaturelenW(*src);
+                src++;
+            }
+        }
+        else if (flags & MAP_COMPOSITE)
+        {
+            /* FIXME */
+        }
+        else if (flags & MAP_PRECOMPOSED)
+        {
+            /* FIXME */
+        }
+        return dstlen;
+    }
+
+    if (srclen > dstlen)
+        return 0;
+
+    dstlen -= srclen;
+
+    /* Actually perform the mapping(s) specified */
+    for (i = 0; i < srclen; i++)
+    {
+        WCHAR ch = *src;
+
+        if (flags & MAP_EXPAND_LIGATURES)
+        {
+            expand = getligatureW(ch);
+            if (expand[0])
+            {
+                if (!dstlen--)
+                    return 0;
+                dst[0] = expand[0];
+                if (expand[2])
+                {
+                    if (!dstlen--)
+                        return 0;
+                    *++dst = expand[1];
+                    ch = expand[2];
+                }
+                else
+                    ch = expand[1];
+                dst++;
+            }
+        }
+        else if (flags & MAP_COMPOSITE)
+        {
+            /* FIXME */
+        }
+        else if (flags & MAP_PRECOMPOSED)
+        {
+            /* FIXME */
+        }
+        if (flags & MAP_FOLDDIGITS)
+            ch = tounicodedigitW(ch);
+        if (flags & MAP_FOLDCZONE)
+            ch = tounicodenativeW(ch);
+
+        *dst++ = ch;
+        src++;
+    }
+    return dst - dstbase;
+}
--- wine/include/wine/unicode.h	2003-09-29 22:32:11.000000000 +0100
+++ wine-develop/include/wine/unicode.h	2003-10-09 22:43:18.000000000 +0100
@@ -76,6 +76,9 @@
 
 extern int wine_get_sortkey( int flags, const WCHAR *src, int srclen, char *dst, int dstlen );
 
+extern int wine_fold_stringA(unsigned int, const char*, int , char*, int);
+extern int wine_fold_stringW(unsigned int, const WCHAR*, int , WCHAR*, int);
+
 extern int strcmpiW( const WCHAR *str1, const WCHAR *str2 );
 extern int strncmpiW( const WCHAR *str1, const WCHAR *str2, int n );
 extern WCHAR *strstrW( const WCHAR *str, const WCHAR *sub );

[Index of Archives]     [Gimp for Windows]     [Red Hat]     [Samba]     [Yosemite Camping]     [Graphics Cards]     [Wine Home]

  Powered by Linux