This is the bug report for https://bugzilla.gnome.org/show_bug.cgi?id=649172
The bug is that g_markup_unescape_text() does not unescape the HTML characters. This is the problem while opening the .map file. Also this is not dependent on the format(map type) of the map file.
My patch in comment #3 is wrong. As I mentioned in comment #5 the patch in comment #2 is not precise since it did not satisfy many testcases. So I will explain the problems in that.
When I tried with input: "sample entity : &sash; ' "
The expected output should be: "sample entity : &sash; ' "
But the result was: "sample ent ty : &sash; ' "
The exact problem is with "strcpy(p, p + strlen(tab[i].enc)-1);"
To prove that, I made couple of simple codes using 'strcpy' and user-defined 'strcopy' The output of these two shows the problem.
/* test1.c */
#include <stdio.h>
#include <string.h>
#include <glib.h>
int main ()
{
gchar input[] = "sample entity";
gchar *p = input;
printf ("\n%s\n", input);
strcpy (p, p + 1);
printf ("\n%s\n", input);
return 0;
}
[sashi@SashiPC ~]$ gcc -Wall -g -o test1 test1.c $(pkg-config --cflags --libs glib-2.0)
[sashi@SashiPC ~]$ ./test1
sample entity
ample eentity
/* test2.c */
#include <stdio.h>
#include <string.h>
#include <glib.h>
gchar* strcopy (gchar *dest, gchar *src);
int main ()
{
gchar input[] = "sample entity";
gchar *p = input;
printf ("\n%s\n", input);
strcopy (p, p + 1);
printf ("\n%s\n", input);
return 0;
}
gchar *
strcopy (gchar *dest,
gchar *src)
{
gchar *d = dest;
gchar *s = src;
do
*d++ = *s;
while (*s++ != '\0');
return d - 1;
}
[sashi@SashiPC ~]$ gcc -Wall -g -o test2 test2.c $(pkg-config --cflags --libs glib-2.0)
[sashi@SashiPC ~]$ ./test2
sample entity
ample entity
So the Expected Patch is:
--- imap_csim.y.old 2013-04-04 00:17:15.564849250 +0530
+++ imap_csim.y 2013-04-04 15:40:43.693297813 +0530
@@ -38,6 +38,9 @@
extern int csim_lex(void);
extern int csim_restart(FILE *csim_in);
static void csim_error(char* s);
+static gchar* unescape_text(gchar *);
+static gchar* strcopy (gchar *dest, gchar *src);
+
static enum {UNDEFINED, RECTANGLE, CIRCLE, POLYGON} current_type;
static Object_t *current_object;
@@ -260,7 +263,7 @@
if (current_type == UNDEFINED) {
g_strreplace(&_map_info->default_url, $3);
} else {
- object_set_url(current_object, $3);
+ object_set_url(current_object, unescape_text($3));
}
g_free ($3);
}
@@ -280,42 +283,42 @@
alt_tag : ALT '=' STRING
{
- object_set_comment(current_object, $3);
+ object_set_comment(current_object, unescape_text($3));
g_free ($3);
}
;
target_tag : TARGET '=' STRING
{
- object_set_target(current_object, $3);
+ object_set_target(current_object, unescape_text($3));
g_free ($3);
}
;
onmouseover_tag : ONMOUSEOVER '=' STRING
{
- object_set_mouse_over(current_object, $3);
+ object_set_mouse_over(current_object, unescape_text($3));
g_free ($3);
}
;
onmouseout_tag : ONMOUSEOUT '=' STRING
{
- object_set_mouse_out(current_object, $3);
+ object_set_mouse_out(current_object, unescape_text($3));
g_free ($3);
}
;
onfocus_tag : ONFOCUS '=' STRING
{
- object_set_focus(current_object, $3);
+ object_set_focus(current_object, unescape_text($3));
g_free ($3);
}
;
onblur_tag : ONBLUR '=' STRING
{
- object_set_blur(current_object, $3);
+ object_set_blur(current_object, unescape_text($3));
g_free ($3);
}
;
@@ -347,3 +350,53 @@
}
return status;
}
+
+static gchar*
+unescape_text (gchar *input)
+{
+ /*
+ * We "unescape" simple things "in place", knowing that unescaped strings always are
+ * shorter than the original input.
+ *
+ * It is a shame there is no g_markup_unescape_text() function, but instead you have
+ * to create a full GMarkupParser/Context.
+ */
+ struct token {
+ const char *enc, unenc;
+ };
+ const struct token tab[] = {
+ { """, '"' },
+ { "'", '\'' },
+ { "&", '&' },
+ { "<", '<' },
+ { ">", '>' }
+ };
+ size_t i;
+
+ for (i = 0; i < sizeof(tab)/sizeof(tab[0]); i++) {
+ char *p;
+ for (p = strstr(input, tab[i].enc); p != NULL; p = strstr(p, tab[i].enc)) {
+ *p++ = tab[i].unenc;
+ strcopy(p, p + strlen(tab[i].enc)-1);
+ if (*p == 0)
+ break;
+ }
+ }
+
+
+ return input;
+}
+
+static gchar*
+strcopy (gchar *dest,
+ gchar *src)
+{
+ gchar *d = dest;
+ gchar *s = src;
+
+ do
+ *d++ = *s;
+ while (*s++ != '\0');
+
+ return d - 1;
+}
This patch works good :)