Adds new c2xml program which dumps out the parse tree for a given file as well formed xml. A DTD for the format is included as parse.dtd. Signed-off-by: Rob Taylor <rob.taylor@xxxxxxxxxxxxxxx> --- Makefile | 15 +++ c2xml.c | 324 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ parse.dtd | 48 +++++++++ 3 files changed, 387 insertions(+), 0 deletions(-) create mode 100644 c2xml.c create mode 100644 parse.dtd diff --git a/Makefile b/Makefile index 039fe38..67da31f 100644 --- a/Makefile +++ b/Makefile @@ -7,6 +7,8 @@ CFLAGS=-O -g -Wall -Wwrite-strings -fpic LDFLAGS=-g AR=ar +HAVE_LIBXML=$(shell pkg-config --exists libxml-2.0 && echo 'yes') + # # For debugging, uncomment the next one # @@ -21,8 +23,15 @@ PKGCONFIGDIR=$(LIBDIR)/pkgconfig PROGRAMS=test-lexing test-parsing obfuscate compile graph sparse test-linearize example \ test-unssa test-dissect ctags + + INST_PROGRAMS=sparse cgcc +ifeq ($(HAVE_LIBXML),yes) +PROGRAMS+=c2xml +INST_PROGRAMS+=c2xml +endif + LIB_H= token.h parse.h lib.h symbol.h scope.h expression.h target.h \ linearize.h bitmap.h ident-list.h compat.h flow.h allocate.h \ storage.h ptrlist.h dissect.h @@ -107,6 +116,12 @@ test-dissect: test-dissect.o $(LIBS) ctags: ctags.o $(LIBS) $(QUIET_LINK)$(CC) $(LDFLAGS) -o $@ $< $(LIBS) +ifeq ($(HAVE_LIBXML),yes) +c2xml: c2xml.c $(LIBS) $(LIB_H) + $(CC) $(LDFLAGS) `pkg-config --cflags --libs libxml-2.0` -o $@ $< $(LIBS) + +endif + $(LIB_FILE): $(LIB_OBJS) $(QUIET_AR)$(AR) rcs $@ $(LIB_OBJS) diff --git a/c2xml.c b/c2xml.c new file mode 100644 index 0000000..25d1c40 --- /dev/null +++ b/c2xml.c @@ -0,0 +1,324 @@ +/* + * Sparse c2xml + * + * Dumps the parse tree as an xml document + * + * Copyright (C) 2007 Rob Taylor + * + * Licensed under the Open Software License version 1.1 + */ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> +#include <assert.h> +#include <libxml/parser.h> +#include <libxml/tree.h> + +#include "parse.h" +#include "scope.h" +#include "symbol.h" + +xmlDocPtr doc = NULL; /* document pointer */ +xmlNodePtr root_node = NULL;/* root node pointer */ +xmlDtdPtr dtd = NULL; /* DTD pointer */ +xmlNsPtr ns = NULL; /* namespace pointer */ +int idcount = 0; + +static struct symbol_list *taglist = NULL; + +static void examine_symbol(struct symbol *sym, xmlNodePtr node); + +static xmlAttrPtr newNumProp(xmlNodePtr node, const xmlChar * name, int value) +{ + char buf[256]; + snprintf(buf, 256, "%d", value); + return xmlNewProp(node, name, buf); +} + +static xmlAttrPtr newIdProp(xmlNodePtr node, const xmlChar * name, unsigned int id) +{ + char buf[256]; + snprintf(buf, 256, "_%d", id); + return xmlNewProp(node, name, buf); +} + +static xmlNodePtr new_sym_node(struct symbol *sym, const char *name, xmlNodePtr parent) +{ + xmlNodePtr node; + const char *ident = show_ident(sym->ident); + + assert(name != NULL); + assert(sym != NULL); + assert(parent != NULL); + + node = xmlNewChild(parent, NULL, "symbol", NULL); + + xmlNewProp(node, "type", name); + + newIdProp(node, "id", idcount); + + if (sym->ident && ident) + xmlNewProp(node, "ident", ident); + xmlNewProp(node, "file", stream_name(sym->pos.stream)); + + newNumProp(node, "start-line", sym->pos.line); + newNumProp(node, "start-col", sym->pos.pos); + + if (sym->endpos.type) { + newNumProp(node, "end-line", sym->endpos.line); + newNumProp(node, "end-col", sym->endpos.pos); + if (sym->pos.stream != sym->endpos.stream) + xmlNewProp(node, "end-file", stream_name(sym->endpos.stream)); + } + sym->aux = node; + + idcount++; + + return node; +} + +static inline void examine_members(struct symbol_list *list, xmlNodePtr node) +{ + struct symbol *sym; + xmlNodePtr child; + char buf[256]; + + FOR_EACH_PTR(list, sym) { + examine_symbol(sym, node); + } END_FOR_EACH_PTR(sym); +} + +static void examine_modifiers(struct symbol *sym, xmlNodePtr node) +{ + const char *modifiers[] = { + "auto", + "register", + "static", + "extern", + "const", + "volatile", + "signed", + "unsigned", + "char", + "short", + "long", + "long-long", + "typedef", + NULL, + NULL, + NULL, + NULL, + NULL, + "inline", + "addressable", + "nocast", + "noderef", + "accessed", + "toplevel", + "label", + "assigned", + "type-type", + "safe", + "user-type", + "force", + "explicitly-signed", + "bitwise"}; + + int i; + + if (sym->namespace != NS_SYMBOL) + return; + + /*iterate over the 32 bit bitfield*/ + for (i=0; i < 32; i++) { + if ((sym->ctype.modifiers & 1<<i) && modifiers[i]) + xmlNewProp(node, modifiers[i], "1"); + } +} + +static void +examine_layout(struct symbol *sym, xmlNodePtr node) +{ + char buf[256]; + + examine_symbol_type(sym); + + newNumProp(node, "bit-size", sym->bit_size); + newNumProp(node, "alignment", sym->ctype.alignment); + newNumProp(node, "offset", sym->offset); + if (is_bitfield_type(sym)) { + newNumProp(node, "bit-offset", sym->bit_offset); + } +} + +static void examine_symbol(struct symbol *sym, xmlNodePtr node) +{ + xmlNodePtr child = NULL; + const char *base; + int array_size; + char buf[256]; + + if (!sym) + return; + if (sym->aux) /*already visited */ + return; + + if (sym->ident && sym->ident->reserved) + return; + + child = new_sym_node(sym, get_type_name(sym->type), node); + examine_modifiers(sym, child); + examine_layout(sym, child); + + if (sym->ctype.base_type) { + if ((base = builtin_typename(sym->ctype.base_type)) == NULL) { + if (!sym->ctype.base_type->aux) { + examine_symbol(sym->ctype.base_type, root_node); + } + xmlNewProp(child, "base-type", + xmlGetProp((xmlNodePtr)sym->ctype.base_type->aux, "id")); + } else { + xmlNewProp(child, "base-type-builtin", base); + } + } + if (sym->array_size) { + /* TODO: modify get_expression_value to give error return */ + array_size = get_expression_value(sym->array_size); + newNumProp(child, "array-size", array_size); + } + + + switch (sym->type) { + case SYM_STRUCT: + case SYM_UNION: + examine_members(sym->symbol_list, child); + break; + case SYM_FN: + examine_members(sym->arguments, child); + break; + case SYM_UNINITIALIZED: + xmlNewProp(child, "base-type-builtin", builtin_typename(sym)); + break; + } + return; +} + +static struct position *get_expansion_end (struct token *token) +{ + struct token *p1, *p2; + + for (p1=NULL, p2=NULL; + !eof_token(token); + p2 = p1, p1 = token, token = token->next); + + if (p2) + return &(p2->pos); + else + return NULL; +} + +static void examine_macro(struct symbol *sym, xmlNodePtr node) +{ + xmlNodePtr child; + struct position *pos; + char buf[256]; + + /* this should probably go in the main codebase*/ + pos = get_expansion_end(sym->expansion); + if (pos) + sym->endpos = *pos; + else + sym->endpos = sym->pos; + + child = new_sym_node(sym, "macro", node); +} + +static void examine_namespace(struct symbol *sym) +{ + xmlChar *namespace_type = NULL; + + if (sym->ident && sym->ident->reserved) + return; + + switch(sym->namespace) { + case NS_MACRO: + examine_macro(sym, root_node); + break; + case NS_TYPEDEF: + case NS_STRUCT: + case NS_SYMBOL: + examine_symbol(sym, root_node); + break; + case NS_NONE: + case NS_LABEL: + case NS_ITERATOR: + case NS_UNDEF: + case NS_PREPROCESSOR: + case NS_KEYWORD: + break; + default: + die("Unrecognised namespace type %d",sym->namespace); + } + +} + +static int get_stream_id (const char *name) +{ + int i; + for (i=0; i<input_stream_nr; i++) { + if (strcmp(name, stream_name(i))==0) + return i; + } + return -1; +} + +static inline void examine_symbol_list(const char *file, struct symbol_list *list) +{ + struct symbol *sym; + int stream_id = get_stream_id (file); + + if (!list) + return; + FOR_EACH_PTR(list, sym) { + if (sym->pos.stream == stream_id) + examine_namespace(sym); + } END_FOR_EACH_PTR(sym); +} + +int main(int argc, char **argv) +{ + struct string_list *filelist = NULL; + struct symbol_list *symlist = NULL; + char *file; + + doc = xmlNewDoc("1.0"); + root_node = xmlNewNode(NULL, "parse"); + xmlDocSetRootElement(doc, root_node); + +/* - A DTD is probably unnecessary for something like this + + dtd = xmlCreateIntSubset(doc, "parse", "http://www.kernel.org/pub/software/devel/sparse/parse.dtd" NULL, "parse.dtd"); + + ns = xmlNewNs (root_node, "http://www.kernel.org/pub/software/devel/sparse/parse.dtd", NULL); + + xmlSetNs(root_node, ns); +*/ + symlist = sparse_initialize(argc, argv, &filelist); + + FOR_EACH_PTR_NOTAG(filelist, file) { + examine_symbol_list(file, symlist); + sparse_keep_tokens(file); + examine_symbol_list(file, file_scope->symbols); + examine_symbol_list(file, global_scope->symbols); + } END_FOR_EACH_PTR_NOTAG(file); + + + xmlSaveFormatFileEnc("-", doc, "UTF-8", 1); + xmlFreeDoc(doc); + xmlCleanupParser(); + + return 0; +} + diff --git a/parse.dtd b/parse.dtd new file mode 100644 index 0000000..0cbd1b4 --- /dev/null +++ b/parse.dtd @@ -0,0 +1,48 @@ +<!ELEMENT parse (symbol+) > + +<!ELEMENT symbol (symbol*) > + +<!ATTLIST symbol type (uninitialized|preprocessor|basetype|node|pointer|function|array|struct|union|enum|typedef|typeof|member|bitfield|label|restrict|fouled|keyword|bad) #REQUIRED + id ID #REQUIRED + file CDATA #REQUIRED + start CDATA #REQUIRED + end CDATA #IMPLIED + + ident CDATA #IMPLIED + base-type IDREF #IMPLIED + base-type-builtin (char|signed char|unsigned char|short|signed short|unsigned short|int|signed int|unsigned int|signed long|long|unsigned long|long long|signed long long|unsigned long long|void|bool|string|float|double|long double|incomplete type|abstract int|abstract fp|label type|bad type) #IMPLIED + + array-size CDATA #IMPLIED + + bit-size CDATA #IMPLIED + alignment CDATA #IMPLIED + offset CDATA #IMPLIED + bit-offset CDATA #IMPLIED + + auto (0|1) #IMPLIED + register (0|1) #IMPLIED + static (0|1) #IMPLIED + extern (0|1) #IMPLIED + const (0|1) #IMPLIED + volatile (0|1) #IMPLIED + signed (0|1) #IMPLIED + unsigned (0|1) #IMPLIED + char (0|1) #IMPLIED + short (0|1) #IMPLIED + long (0|1) #IMPLIED + long-long (0|1) #IMPLIED + typedef (0|1) #IMPLIED + inline (0|1) #IMPLIED + addressable (0|1) #IMPLIED + nocast (0|1) #IMPLIED + noderef (0|1) #IMPLIED + accessed (0|1) #IMPLIED + toplevel (0|1) #IMPLIED + label (0|1) #IMPLIED + assigned (0|1) #IMPLIED + type-type (0|1) #IMPLIED + safe (0|1) #IMPLIED + usertype (0|1) #IMPLIED + force (0|1) #IMPLIED + explicitly-signed (0|1) #IMPLIED + bitwise (0|1) #IMPLIED > -- 1.5.2-rc3.GIT --------------040700010305000000020101-- - To unsubscribe from this list: send the line "unsubscribe linux-sparse" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html