On Tue, 2016-04-26 at 17:47 +0530, arun at accosted.net wrote: > From: Arun Raghavan <git at arunraghavan.net> > > Adding this to be able to drop dependency on json-c. > > Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=95135 > --- >  src/Makefile.am       |   8 + >  src/pulse/json.c      | 476 ++++++++++++++++++++++++++++++++++++++++++++++++++ >  src/pulse/json.h      |  57 ++++++ >  src/tests/json-test.c | 240 +++++++++++++++++++++++++ >  4 files changed, 781 insertions(+) >  create mode 100644 src/pulse/json.c >  create mode 100644 src/pulse/json.h >  create mode 100644 src/tests/json-test.c This patch could also add src/json-test to .gitignore. > diff --git a/src/Makefile.am b/src/Makefile.am > index b600dfb..9d952fc 100644 > --- a/src/Makefile.am > +++ b/src/Makefile.am > @@ -248,6 +248,7 @@ TESTS_default = \ >  thread-mainloop-test \ >  utf8-test \ >  format-test \ > + json-test \ >  get-binary-name-test \ >  hook-list-test \ >  memblock-test \ > @@ -375,6 +376,12 @@ format_test_CFLAGS = $(AM_CFLAGS) $(LIBCHECK_CFLAGS) >  format_test_LDADD = $(AM_LDADD) libpulsecore- at PA_MAJORMINOR@.la libpulse.la libpulsecommon- at PA_MAJORMINOR@.la >  format_test_LDFLAGS = $(AM_LDFLAGS) $(BINLDFLAGS) $(LIBCHECK_LIBS) >  > +json_test_SOURCES = tests/json-test.c > +json_test_CFLAGS = $(AM_CFLAGS) $(LIBCHECK_CFLAGS) > +json_test_LDADD = $(AM_LDADD) libpulse.la libpulsecommon- at PA_MAJORMINOR@.la > +json_test_LDFLAGS = $(AM_LDFLAGS) $(BINLDFLAGS) $(LIBCHECK_LIBS) > + > +srbchannel_test_SOURCES = tests/srbchannel-test.c >  srbchannel_test_SOURCES = tests/srbchannel-test.c It's probably sufficient to set srbchannel_test_SOURCES just once. >  srbchannel_test_CFLAGS = $(AM_CFLAGS) $(LIBCHECK_CFLAGS) >  srbchannel_test_LDADD = $(AM_LDADD) libpulse.la libpulsecommon- at PA_MAJORMINOR@.la > @@ -646,6 +653,7 @@ libpulsecommon_ at PA_MAJORMINOR@_la_SOURCES = \ >  pulse/client-conf.c pulse/client-conf.h \ >  pulse/fork-detect.c pulse/fork-detect.h \ >  pulse/format.c pulse/format.h \ > + pulse/json.c pulse/json.h \ >  pulse/xmalloc.c pulse/xmalloc.h \ >  pulse/proplist.c pulse/proplist.h \ >  pulse/utf8.c pulse/utf8.h \ > diff --git a/src/pulse/json.c b/src/pulse/json.c > new file mode 100644 > index 0000000..0e53902 > --- /dev/null > +++ b/src/pulse/json.c > @@ -0,0 +1,476 @@ > +/*** > +  This file is part of PulseAudio. > + > +  Copyright 2016 Arun Raghavan <mail at arunraghavan.net> > + > +  PulseAudio is free software; you can redistribute it and/or modify > +  it under the terms of the GNU Lesser General Public License as published > +  by the Free Software Foundation; either version 2.1 of the License, > +  or (at your option) any later version. > + > +  PulseAudio is distributed in the hope that it will be useful, but > +  WITHOUT ANY WARRANTY; without even the implied warranty of > +  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > +  General Public License for more details. > + > +  You should have received a copy of the GNU Lesser General Public License > +  along with PulseAudio; if not, see <http://www.gnu.org/licenses/>. > +***/ > + > +#include > + > +#include > +#include > +#include > +#include > + > +struct pa_json_object { > +    PA_REFCNT_DECLARE; > +    pa_json_type type; > + > +    union { > +        int int_value; > +        double double_value; > +        bool bool_value; > +        char *string_value; > +        pa_hashmap *object_values; /* name -> object */ > +        pa_idxset *array_values; /* objects */ > +    }; > +}; > + > +#define JSON_OBJECT_TYPE(o) ((o)->type) What's the rationale for this macro? Why is using the macro better than using o->type directly? > + > +static const char* parse_value(const char *str, const char *end, pa_json_object **obj); > + > +static pa_json_object* json_object_new(void) { > +    pa_json_object *obj; > + > +    obj = pa_xnew0(pa_json_object, 1); > + > +    return obj; > +} > + > +static bool is_whitespace(char c) { > +    return c == '\t' || c == '\f' || c == '\r' || c == ' '; > +} > + > +static bool is_digit(char c) { > +    return c >= '0' && c <= '9'; > +} > + > +static bool is_end(const char c, const char *end) { > +    if (!end) > +        return c == '\0'; > +    else  { > +        while (*end) { > +            if (c == *end) > +                return true; > +            end++; > +        } > +    } > + > +    return false; > +} > + > +static const char* consume_string(const char *str, const char *expect) > +{ The opening brace got misplaced. > +    while (*expect) { > +        if (*str != *expect) > +            return NULL; > + > +        str++; > +        expect++; > +    } > + > +    return str; > +} > + > +static const char* parse_null(const char *str, pa_json_object *obj) { > +    str = consume_string(str, "null"); > + > +    if (str) > +        obj->type = PA_JSON_TYPE_NULL; > + > +    return str; > +} > + > +static const char* parse_boolean(const char *str, pa_json_object *obj) { > +    const char *tmp; > + > +    tmp = consume_string(str, "true"); > + > +    if (tmp) { > +        obj->type = PA_JSON_TYPE_BOOL; > +        obj->bool_value = true; > +    } else { > +        tmp = consume_string(str, "false"); > + > +        if (str) { > +            obj->type = PA_JSON_TYPE_BOOL; > +            obj->bool_value = false; > +        } > +    } > + > +    return tmp; > +} > + > +static const char* parse_string(const char *str, pa_json_object *obj) { > +    pa_strbuf *buf = pa_strbuf_new(); > + > +    str++; /* Consume leading '"' */ > + > +    while (*str != '"') { > +        if (*str != '\\') { > +            /* Normal character, juts consume */ > +            pa_strbuf_putc(buf, *str); Control characters 0x00 <= *str <= 0x1F should cause an error, since they are disallowed by the JSON spec (0x00 is maybe special, since it signifies end-of-string, but that should cause an error nevertheless). Also, JSON data can be UTF-8, UTF-16 or UTF-32 encoded, but this code doesn't take that into account. If we are fed valid multi-byte sequences, this code probably can get confused. I feel it would be safest to only support 7-bit ASCII, and disallow any characters above 0x7F. > +        } else { > +            /* Need to unescape */ > +            str++; > + > +            switch (*str) { > +                case '"': > +                case '\\': > +                case '/': > +                    pa_strbuf_putc(buf, *str); > +                    break; > + > +                case 'b': > +                    pa_strbuf_putc(buf, '\b' /* backspace */); > +                    break; > + > +                case 'f': > +                    pa_strbuf_putc(buf, '\f' /* form feed */); > +                    break; > + > +                case 'n': > +                    pa_strbuf_putc(buf, '\n' /* new line */); > +                    break; > + > +                case 'r': > +                    pa_strbuf_putc(buf, '\r' /* carriage return */); > +                    break; > + > +                case 't': > +                    pa_strbuf_putc(buf, '\t' /* horizontal tab */); > +                    break; > + > +                case 'u': > +                    pa_log("Unicode code points are currently unsupported"); > +                    goto error; > + > +                default: > +                    pa_log("Unexepcted escape value: %c", *str); > +                    goto error; > +            } > +        } > + > +        str++; > +    } > + > +    if (*str != '"') { > +        pa_log("Failed to parse remainder of string: %s", str); > +        goto error; > +    } > + > +    str++; > + > +    obj->type = PA_JSON_TYPE_STRING; > +    obj->string_value = pa_strbuf_to_string_free(buf); > + > +    return str; > + > +error: > +    pa_strbuf_free(buf); > +    return NULL; > +} > + > +static const char* parse_number(const char *str, pa_json_object *obj) { > +    bool negative = false, has_fraction = false, has_exponent = false; > +    unsigned int integer = 0; > +    unsigned int fraction = 0; > +    unsigned int fraction_digits = 0; > +    int exponent = 0; > + > +    if (*str == '-') { > +        negative = true; > +        str++; > +    } > + > +    if (*str == '0') { > +        str++; > +        goto fraction; > +    } > + > +    while (is_digit(*str)) { > +        integer = (integer * 10) + (*str - '0'); > +        str++; > +    } Missing overflow checking (imagine "12341234123412341234123412341234"). > + > +fraction: > +    if (*str == '.') { > +        has_fraction = true; > +        str++; > + > +        while (is_digit(*str)) { > +            fraction = (fraction * 10) + (*str - '0'); > +            fraction_digits++; > +            str++; > +        } Missing overflow checking. > +    } > + > +    if (*str == 'e' || *str == 'E') { > +        bool exponent_negative = false; > + > +        has_exponent = true; > +        str++; > + > +        if (*str == '-') { > +            exponent_negative = true; > +            str++; > +        } else if (*str == '+') > +            str++; > + > +        while (is_digit(*str)) { > +            exponent = (exponent * 10) + (*str - '0'); > +            str++; > +        } Missing overflow checking. > + > +        if (exponent_negative) > +            exponent *= -1; > +    } > + > +    if (has_fraction || has_exponent) { > +        obj->type = PA_JSON_TYPE_DOUBLE; > +        obj->double_value = > +            (negative ? -1.0 : 1.0) * (integer + (double) fraction / pow(10, fraction_digits)) * pow(10, exponent); > +    } else { > +        obj->type = PA_JSON_TYPE_INT; > +        obj->int_value = (negative ? -1 : 1) * integer; > +    } > + > +    return str; > +} parse_number() doesn't seem to ever fail. Surely we should check that we're not fed any random crap that happens to start with "-" or with a digit? Maybe the idea is that parse_value() will fail if there's any non-whitespace stuff left after parse_number() returns. In that case, I think it would be good to have a comment about that in parse_number(), and at least those cases need to be checked where some mandatory content is missing (like "-", "1.", "1.e3" or "1e"). > + > +static const char *parse_object(const char *str, pa_json_object *obj) { > +    pa_json_object *name, *value; > + > +    obj->object_values = pa_hashmap_new_full(pa_idxset_string_hash_func, pa_idxset_string_compare_func, > +                                             pa_xfree, (pa_free_cb_t) pa_json_object_unref); > + > +    while (*str != '}') { > +        str++; /* Consume leading '{' or ',' */ > + > +        str = parse_value(str, ":", &name); > +        if (!str || JSON_OBJECT_TYPE(name) != PA_JSON_TYPE_STRING) { > +            pa_log("Could not parse key for object: %s", str); str is either NULL or points to after the parsed non-string value. Is that really what you want to log? > +            goto error; > +        } > + > +        /* Consume the ':' */ > +        str++; > + > +        str = parse_value(str, ",}", &value); As we parse nested objects, maybe we should check that we don't recurse too deep. Consider {"a":{"a":{"a":{"a":{.... I remember libdbus used to be vulnerable to this kind of an attack that made libdbus allocate so deep stack that it crashed the application. > +        if (!str) { > +            pa_log("Could not parse value for object: %s", str); str is NULL, don't log it. > +            goto error; > +        } > + > +        pa_hashmap_put(obj->object_values, pa_xstrdup(pa_json_object_get_string(name)), value); > +        pa_json_object_unref(name); > +    } > + > +    /* Drop trailing '}' */ > +    str++; > + > +    /* We now know the value was correctly parsed */ > +    obj->type = PA_JSON_TYPE_OBJECT; > + > +    return str; > + > +error: > +    pa_hashmap_free(obj->object_values); obj->object_values should be set to NULL. > +    return NULL; > +} > + > +static const char *parse_array(const char *str, pa_json_object *obj) { > +    pa_json_object *value; > + > +    obj->array_values = pa_idxset_new(NULL, NULL); > + > +    while (*str != ']') { > +        str++; /* Consume leading '[' or ',' */ > + > +        /* Need to chew up whitespaces as a special case to deal with the > +         * possibility of an empty array */ > +        while (is_whitespace(*str)) > +            str++; > + > +        if (*str == ']') > +            break; > + > +        str = parse_value(str, ",]", &value); > +        if (!str) { > +            pa_log("Could not parse value for array: %s", str); str is NULL, don't log it. > +            goto error; > +        } > + > +        pa_idxset_put(obj->array_values, value, NULL); > +    } > + > +    /* Drop trailing ']' */ > +    str++; > + > +    /* We now know the value was correctly parsed */ > +    obj->type = PA_JSON_TYPE_ARRAY; > + > +    return str; > + > +error: > +    pa_idxset_free(obj->array_values, (pa_free_cb_t) pa_json_object_unref); obj->array_values should be set to NULL. > +    return NULL; > +} > + > +typedef enum { > +    JSON_PARSER_STATE_INIT, > +    JSON_PARSER_STATE_FINISH, > +} json_parser_state; > + > +static const char* parse_value(const char *str, const char *end, pa_json_object **obj) { > +    json_parser_state state = JSON_PARSER_STATE_INIT; > + > +    pa_return_val_if_fail(str != NULL, NULL); I think a regular assertion would fit better here. > + > +    *obj = json_object_new(); I think it's better style to assign to obj only after the parsing has succeeded. Now you're returning a pointer to freed data in case of failure. > + > +    while (!is_end(*str, end)) { > +        switch (state) { > +            case JSON_PARSER_STATE_INIT: > +                if (is_whitespace(*str)) { > +                    str++; > +                } else if (*str == 'n') { > +                    str = parse_null(str, *obj); > +                    state = JSON_PARSER_STATE_FINISH; > +                } else if (*str == 't' || *str == 'f') { > +                    str = parse_boolean(str, *obj); > +                    state = JSON_PARSER_STATE_FINISH; > +                } else if (*str == '"') { > +                    str = parse_string(str, *obj); > +                    state = JSON_PARSER_STATE_FINISH; > +                } else if (is_digit(*str) || *str == '-') { > +                    str = parse_number(str, *obj); > +                    state = JSON_PARSER_STATE_FINISH; > +                } else if (*str == '{') { > +                    str = parse_object(str, *obj); > +                    state = JSON_PARSER_STATE_FINISH; > +                } else if (*str == '[') { > +                    str = parse_array(str, *obj); > +                    state = JSON_PARSER_STATE_FINISH; > +                } There's no final "else" branch here. If none of these checks evaluate to true, that should be an error. > + > +                if (!str) > +                    goto error; > + > +                break; > + > +            case JSON_PARSER_STATE_FINISH: > +                /* Consume trailing whitespaces */ > +                if (is_whitespace(*str)) { > +                    str++; > +                } else { > +                    goto error; > +                } > +        } > +    } > + > +    if (JSON_OBJECT_TYPE(*obj) == PA_JSON_TYPE_INIT) { > +        /* We didn't actually get any data */ > +        pa_log("No data while parsing json string: '%s' till '%s'", str, end); end can be NULL so maybe use pa_strnull()? I don't know if this is required. I think glibc handles NULL gracefully, but the manual page for printf appears silent on this, so probably this is undefined behaviour. > +        goto error; > +    } > + > +    return str; > + > +error: > +    pa_json_object_unref(*obj); > +    return NULL; > +} > + > + > +pa_json_object* pa_json_parse(const char *str) { > +    pa_json_object *obj; > + > +    str = parse_value(str, NULL, &obj); > +    pa_assert(*str == '\0'); This will crash on any parse failure... > + > +    return obj; > +} > + > +pa_json_type pa_json_object_get_type(const pa_json_object *obj) { > +    return JSON_OBJECT_TYPE(obj); > +} > + > +void pa_json_object_unref(pa_json_object *obj) { > +    if (PA_REFCNT_DEC(obj) > 0) > +        return; > + > +    switch (JSON_OBJECT_TYPE(obj)) { > +        case PA_JSON_TYPE_INIT: > +        case PA_JSON_TYPE_INT: > +        case PA_JSON_TYPE_DOUBLE: > +        case PA_JSON_TYPE_BOOL: > +        case PA_JSON_TYPE_NULL: > +            break; > + > +        case PA_JSON_TYPE_STRING: > +            pa_xfree(obj->string_value); > +            break; > + > +        case PA_JSON_TYPE_OBJECT: > +            pa_hashmap_free(obj->object_values); > +            break; > + > +        case PA_JSON_TYPE_ARRAY: > +            pa_idxset_free(obj->array_values, (pa_free_cb_t) pa_json_object_unref); > +            break; > + > +        default: > +            pa_assert_not_reached(); > +    } > + > +    pa_xfree(obj); > +} > + > +int pa_json_object_get_int(const pa_json_object *o) { > +    pa_return_val_if_fail(JSON_OBJECT_TYPE(o) == PA_JSON_TYPE_INT, 0); Is it a programming error to call this function with non-int object? If yes, a regular assertion should be used. If not, then shouldn't the caller decide whether to log something or not? (In case you didn't know, pa_return_val_if_fail() prints "Assertion %s failed..." at debug log level.) This same comment applies to the following functions too. > +    return o->int_value; > +} > + > +double pa_json_object_get_double(const pa_json_object *o) { > +    pa_return_val_if_fail(JSON_OBJECT_TYPE(o) == PA_JSON_TYPE_DOUBLE, 0); > +    return o->double_value; > +} > + > +bool pa_json_object_get_bool(const pa_json_object *o) { > +    pa_return_val_if_fail(JSON_OBJECT_TYPE(o) == PA_JSON_TYPE_BOOL, false); > +    return o->bool_value; > +} > + > +const char* pa_json_object_get_string(const pa_json_object *o) { > +    pa_return_val_if_fail(JSON_OBJECT_TYPE(o) == PA_JSON_TYPE_STRING, NULL); > +    return o->string_value; > +} > + > +const pa_json_object* pa_json_object_get_object_member(const pa_json_object *o, const char *name) { > +    pa_return_val_if_fail(JSON_OBJECT_TYPE(o) == PA_JSON_TYPE_OBJECT, NULL); > +    return pa_hashmap_get(o->object_values, name); > +} > + > +int pa_json_object_get_array_length(const pa_json_object *o) { > +    pa_return_val_if_fail(JSON_OBJECT_TYPE(o) == PA_JSON_TYPE_ARRAY, 0); > +    return pa_idxset_size(o->array_values); > +} > + > +const pa_json_object* pa_json_object_get_array_member(const pa_json_object *o, int index) { > +    pa_return_val_if_fail(JSON_OBJECT_TYPE(o) == PA_JSON_TYPE_ARRAY, NULL); > +    return pa_idxset_get_by_index(o->array_values, index); > +} > diff --git a/src/pulse/json.h b/src/pulse/json.h > new file mode 100644 > index 0000000..6eba9a9 > --- /dev/null > +++ b/src/pulse/json.h > @@ -0,0 +1,57 @@ > +/*** > +  This file is part of PulseAudio. > + > +  Copyright 2016 Arun Raghavan <mail at arunraghavan.net> > + > +  PulseAudio is free software; you can redistribute it and/or modify > +  it under the terms of the GNU Lesser General Public License as published > +  by the Free Software Foundation; either version 2.1 of the License, > +  or (at your option) any later version. > + > +  PulseAudio is distributed in the hope that it will be useful, but > +  WITHOUT ANY WARRANTY; without even the implied warranty of > +  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > +  General Public License for more details. > + > +  You should have received a copy of the GNU Lesser General Public License > +  along with PulseAudio; if not, see <http://www.gnu.org/licenses/>. > +***/ > + > +#ifdef HAVE_CONFIG_H > +#include > +#endif This belongs in the .c file (if it's needed at all). > + > +#include > + > +PA_C_DECL_BEGIN This isn't a public API, so this seems unnecessary. > diff --git a/src/tests/json-test.c b/src/tests/json-test.c > new file mode 100644 > index 0000000..9be4dac > --- /dev/null > +++ b/src/tests/json-test.c > +START_TEST(object_test) { > +    pa_json_object *o; > +    const pa_json_object *v; > + > +    o = pa_json_parse(" { \"name\" : \"A Person\" } "); > + > +    fail_unless(o != NULL); > +    fail_unless(pa_json_object_get_type(o) == PA_JSON_TYPE_OBJECT); > + > +    v = pa_json_object_get_object_member(o, "name"); > +    fail_unless(v != NULL); > +    fail_unless(pa_json_object_get_type(v) == PA_JSON_TYPE_STRING); > +    fail_unless(pa_streq(pa_json_object_get_string(v), "A Person")); > + > +    pa_json_object_unref(o); > + > +    o = pa_json_parse(" { \"age\" : -45.3e-0 } "); > + > +    fail_unless(o != NULL); > +    fail_unless(pa_json_object_get_type(o) == PA_JSON_TYPE_OBJECT); > + > +    v = pa_json_object_get_object_member(o, "age"); > +    fail_unless(v != NULL); > +    fail_unless(pa_json_object_get_type(v) == PA_JSON_TYPE_DOUBLE); > +    fail_unless(IS_EQUAL(pa_json_object_get_double(v), -45.3)); > + > +    pa_json_object_unref(o); > + > +    o = pa_json_parse("{\"person\":true}"); > + > +    fail_unless(o != NULL); > +    fail_unless(pa_json_object_get_type(o) == PA_JSON_TYPE_OBJECT); > + > +    v = pa_json_object_get_object_member(o, "person"); > +    fail_unless(v != NULL); > +    fail_unless(pa_json_object_get_type(v) == PA_JSON_TYPE_BOOL); > +    fail_unless(IS_EQUAL(pa_json_object_get_bool(v), true)); IS_EQUAL is only supposed to be used with floating point values, right? -- Tanu