Re: [PATCH spice-gtk 3/4] util: add unix2dos and dos2unix

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

On 08/23/2013 10:25 PM, Marc-André Lureau wrote:
Convert line endings from/to LF/CRLF, in utf8.
---
  gtk/spice-util-priv.h |   2 +
  gtk/spice-util.c      | 122 ++++++++++++++++++++++++++++++++++++++++++++++++++
  2 files changed, 124 insertions(+)

diff --git a/gtk/spice-util-priv.h b/gtk/spice-util-priv.h
index ee5a42d..cc559dc 100644
--- a/gtk/spice-util-priv.h
+++ b/gtk/spice-util-priv.h
@@ -29,6 +29,8 @@ gboolean spice_strv_contains(const GStrv strv, const gchar *str);
  gchar* spice_uuid_to_string(const guint8 uuid[16]);
  const gchar* spice_yes_no(gboolean value);
  guint16 spice_make_scancode(guint scancode, gboolean release);
+gchar* spice_unix2dos(const gchar *str, gssize len, GError **error);
+gchar* spice_dos2unix(const gchar *str, gssize len, GError **error);

  #if GLIB_CHECK_VERSION(2,32,0)
  #define STATIC_MUTEX            GMutex
diff --git a/gtk/spice-util.c b/gtk/spice-util.c
index 774a145..be10edc 100644
--- a/gtk/spice-util.c
+++ b/gtk/spice-util.c
@@ -19,6 +19,7 @@
  #ifdef HAVE_CONFIG_H
  # include "config.h"
  #endif
+
  #include <stdlib.h>
  #include <string.h>
  #include <glib-object.h>
@@ -245,3 +246,124 @@ guint16 spice_make_scancode(guint scancode, gboolean release)

      g_return_val_if_reached(0);
  }
+
+typedef enum {
+    NEWLINE_TYPE_LF,
+    NEWLINE_TYPE_CR_LF
+} NewlineType;
+
+static gssize get_line(const gchar *str, gsize len,
+                       NewlineType type, gsize *nl_len,
+                       GError **error)
+{
+    const gchar *p = str;
+    gsize nl = 0;
+
+    if (type == NEWLINE_TYPE_CR_LF) {
+        while ((p - str) < len) {
+            p = g_utf8_strchr(p, len, '\r');
+            if (!p)
+                break;
+            p = g_utf8_next_char(p);
+            if (g_utf8_get_char(p) == '\n') {
+                len = (p - str) - 1;
+                nl = 2;
+                break;
+            }
+        }
+    } else {
+        p = g_utf8_strchr(str, len, '\n');
+        if (p) {
+            len = p - str;
+            nl = 1;
+        }
+    }

This looks way more complicated then it needs to be, in UTF-8
0x00 - 0x7f only are valid as a single-byte sequence. multi-byte
encoded characters will never contain 0x00 - 0x7f. UTF-8 was designed
this way, is so that existing string parsing code for non multi-byte
encodings, which make look for example for ' " = or LF characters does
not break when parsing strings with multi-byte characters in there.

TL;DR: LF and CR will never be part of a multi byte character, so
you can simple do: strstr(str, "\r\n") to find the CRLF.


+
+    if (!g_utf8_validate(str, len, NULL)) {
+        g_set_error_literal(error, G_CONVERT_ERROR,
+                            G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
+                            "Invalid byte sequence in conversion input");
+        return -1;
+    }

And once you simply treat this as a regular C-string without worrying
about multi-byte encodings you can also drop this.

+
+    *nl_len = nl;
+    return len;
+}
+
+
+static gchar* spice_convert_newlines(const gchar *str, gssize len,
+                                     NewlineType from,
+                                     NewlineType to,
+                                     GError **error)
+{
+    GError *err = NULL;
+    gssize length;
+    gsize nl;
+    GString *output;
+    gboolean free_segment = FALSE;
+    gint i;
+
+    g_return_val_if_fail(str != NULL, NULL);
+    g_return_val_if_fail(len >= -1, NULL);
+    g_return_val_if_fail(error == NULL || *error == NULL, NULL);
+    /* only 2 supported combinations */
+    g_return_val_if_fail((from == NEWLINE_TYPE_LF &&
+                          to == NEWLINE_TYPE_CR_LF) ||
+                         (from == NEWLINE_TYPE_CR_LF &&
+                          to == NEWLINE_TYPE_LF), NULL);
+
+    if (len == -1)
+        len = strlen(str);
+    /* sometime we get \0 terminated strings, skip that, or it fails
+       to utf8 validate line with \0 end */
+    else if (str[len] == 0)
+        len -= 1;
+
+    /* allocate worst case, if it's small enough, we don't care much,
+     * if it's big, malloc will put us in mmap'd region, and we can
+     * over allocate.
+     */
+    output = g_string_sized_new(len * 2 + 1);
+
+    for (i = 0; i < len; i += length + nl) {
+        length = get_line(str + i, len - i, from, &nl, error);
+        if (length < 0)
+            break;
+
+        g_string_append_len(output, str + i, length);
+
+        if (nl) {
+            /* let's not double \r if it's already in the line */
+            if (to == NEWLINE_TYPE_CR_LF &&
+                output->str[output->len - 1] != '\r')
+                g_string_append_c(output, '\r');
+
+            g_string_append_c(output, '\n');
+        }
+    }
+
+    if (err) {
+        g_propagate_error(error, err);
+        free_segment = TRUE;
+    }
+
+    return g_string_free(output, free_segment);
+}
+
+G_GNUC_INTERNAL
+gchar* spice_dos2unix(const gchar *str, gssize len, GError **error)
+{
+    return spice_convert_newlines(str, len,
+                                  NEWLINE_TYPE_CR_LF,
+                                  NEWLINE_TYPE_LF,
+                                  error);
+}
+
+G_GNUC_INTERNAL
+gchar* spice_unix2dos(const gchar *str, gssize len, GError **error)
+{
+    return spice_convert_newlines(str, len,
+                                  NEWLINE_TYPE_LF,
+                                  NEWLINE_TYPE_CR_LF,
+                                  error);
+}


Regards,

Hans
_______________________________________________
Spice-devel mailing list
Spice-devel@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/spice-devel





[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Security]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]     [Monitors]