Hi, On 08/23/2013 10:25 PM, Marc-André Lureau wrote:
Convert line endings from/to LF/CRLF, in utf8. --- gtk/spice-util-priv.h | 2 + gtk/spice-util.c | 122 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 124 insertions(+) diff --git a/gtk/spice-util-priv.h b/gtk/spice-util-priv.h index ee5a42d..cc559dc 100644 --- a/gtk/spice-util-priv.h +++ b/gtk/spice-util-priv.h @@ -29,6 +29,8 @@ gboolean spice_strv_contains(const GStrv strv, const gchar *str); gchar* spice_uuid_to_string(const guint8 uuid[16]); const gchar* spice_yes_no(gboolean value); guint16 spice_make_scancode(guint scancode, gboolean release); +gchar* spice_unix2dos(const gchar *str, gssize len, GError **error); +gchar* spice_dos2unix(const gchar *str, gssize len, GError **error); #if GLIB_CHECK_VERSION(2,32,0) #define STATIC_MUTEX GMutex diff --git a/gtk/spice-util.c b/gtk/spice-util.c index 774a145..be10edc 100644 --- a/gtk/spice-util.c +++ b/gtk/spice-util.c @@ -19,6 +19,7 @@ #ifdef HAVE_CONFIG_H # include "config.h" #endif + #include <stdlib.h> #include <string.h> #include <glib-object.h> @@ -245,3 +246,124 @@ guint16 spice_make_scancode(guint scancode, gboolean release) g_return_val_if_reached(0); } + +typedef enum { + NEWLINE_TYPE_LF, + NEWLINE_TYPE_CR_LF +} NewlineType; + +static gssize get_line(const gchar *str, gsize len, + NewlineType type, gsize *nl_len, + GError **error) +{ + const gchar *p = str; + gsize nl = 0; + + if (type == NEWLINE_TYPE_CR_LF) { + while ((p - str) < len) { + p = g_utf8_strchr(p, len, '\r'); + if (!p) + break; + p = g_utf8_next_char(p); + if (g_utf8_get_char(p) == '\n') { + len = (p - str) - 1; + nl = 2; + break; + } + } + } else { + p = g_utf8_strchr(str, len, '\n'); + if (p) { + len = p - str; + nl = 1; + } + }
This looks way more complicated then it needs to be, in UTF-8 0x00 - 0x7f only are valid as a single-byte sequence. multi-byte encoded characters will never contain 0x00 - 0x7f. UTF-8 was designed this way, is so that existing string parsing code for non multi-byte encodings, which make look for example for ' " = or LF characters does not break when parsing strings with multi-byte characters in there. TL;DR: LF and CR will never be part of a multi byte character, so you can simple do: strstr(str, "\r\n") to find the CRLF.
+ + if (!g_utf8_validate(str, len, NULL)) { + g_set_error_literal(error, G_CONVERT_ERROR, + G_CONVERT_ERROR_ILLEGAL_SEQUENCE, + "Invalid byte sequence in conversion input"); + return -1; + }
And once you simply treat this as a regular C-string without worrying about multi-byte encodings you can also drop this.
+ + *nl_len = nl; + return len; +} + + +static gchar* spice_convert_newlines(const gchar *str, gssize len, + NewlineType from, + NewlineType to, + GError **error) +{ + GError *err = NULL; + gssize length; + gsize nl; + GString *output; + gboolean free_segment = FALSE; + gint i; + + g_return_val_if_fail(str != NULL, NULL); + g_return_val_if_fail(len >= -1, NULL); + g_return_val_if_fail(error == NULL || *error == NULL, NULL); + /* only 2 supported combinations */ + g_return_val_if_fail((from == NEWLINE_TYPE_LF && + to == NEWLINE_TYPE_CR_LF) || + (from == NEWLINE_TYPE_CR_LF && + to == NEWLINE_TYPE_LF), NULL); + + if (len == -1) + len = strlen(str); + /* sometime we get \0 terminated strings, skip that, or it fails + to utf8 validate line with \0 end */ + else if (str[len] == 0) + len -= 1; + + /* allocate worst case, if it's small enough, we don't care much, + * if it's big, malloc will put us in mmap'd region, and we can + * over allocate. + */ + output = g_string_sized_new(len * 2 + 1); + + for (i = 0; i < len; i += length + nl) { + length = get_line(str + i, len - i, from, &nl, error); + if (length < 0) + break; + + g_string_append_len(output, str + i, length); + + if (nl) { + /* let's not double \r if it's already in the line */ + if (to == NEWLINE_TYPE_CR_LF && + output->str[output->len - 1] != '\r') + g_string_append_c(output, '\r'); + + g_string_append_c(output, '\n'); + } + } + + if (err) { + g_propagate_error(error, err); + free_segment = TRUE; + } + + return g_string_free(output, free_segment); +} + +G_GNUC_INTERNAL +gchar* spice_dos2unix(const gchar *str, gssize len, GError **error) +{ + return spice_convert_newlines(str, len, + NEWLINE_TYPE_CR_LF, + NEWLINE_TYPE_LF, + error); +} + +G_GNUC_INTERNAL +gchar* spice_unix2dos(const gchar *str, gssize len, GError **error) +{ + return spice_convert_newlines(str, len, + NEWLINE_TYPE_LF, + NEWLINE_TYPE_CR_LF, + error); +}
Regards, Hans _______________________________________________ Spice-devel mailing list Spice-devel@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/spice-devel