[PATCH 4/4] add c2xml program

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Adds new c2xml program which dumps out the parse tree for a given file as well formed xml. A DTD for the format is included as parse.dtd.

Signed-off-by: Rob Taylor <rob.taylor@xxxxxxxxxxxxxxx>
---
 Makefile  |   15 +++
 c2xml.c   |  324 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 parse.dtd |   48 +++++++++
 3 files changed, 387 insertions(+), 0 deletions(-)
 create mode 100644 c2xml.c
 create mode 100644 parse.dtd

diff --git a/Makefile b/Makefile
index 039fe38..67da31f 100644
--- a/Makefile
+++ b/Makefile
@@ -7,6 +7,8 @@ CFLAGS=-O -g -Wall -Wwrite-strings -fpic
 LDFLAGS=-g
 AR=ar
 
+HAVE_LIBXML=$(shell pkg-config --exists libxml-2.0 && echo 'yes')
+
 #
 # For debugging, uncomment the next one
 #
@@ -21,8 +23,15 @@ PKGCONFIGDIR=$(LIBDIR)/pkgconfig
 
 PROGRAMS=test-lexing test-parsing obfuscate compile graph sparse test-linearize example \
 	 test-unssa test-dissect ctags
+
+
 INST_PROGRAMS=sparse cgcc
 
+ifeq ($(HAVE_LIBXML),yes)
+PROGRAMS+=c2xml
+INST_PROGRAMS+=c2xml
+endif
+
 LIB_H=    token.h parse.h lib.h symbol.h scope.h expression.h target.h \
 	  linearize.h bitmap.h ident-list.h compat.h flow.h allocate.h \
 	  storage.h ptrlist.h dissect.h
@@ -107,6 +116,12 @@ test-dissect: test-dissect.o $(LIBS)
 ctags: ctags.o $(LIBS)
 	$(QUIET_LINK)$(CC) $(LDFLAGS) -o $@ $< $(LIBS)
 
+ifeq ($(HAVE_LIBXML),yes)
+c2xml: c2xml.c $(LIBS) $(LIB_H)
+	$(CC) $(LDFLAGS) `pkg-config --cflags --libs libxml-2.0` -o $@ $< $(LIBS)
+
+endif
+
 $(LIB_FILE): $(LIB_OBJS)
 	$(QUIET_AR)$(AR) rcs $@ $(LIB_OBJS)
 
diff --git a/c2xml.c b/c2xml.c
new file mode 100644
index 0000000..25d1c40
--- /dev/null
+++ b/c2xml.c
@@ -0,0 +1,324 @@
+/*
+ * Sparse c2xml
+ *
+ * Dumps the parse tree as an xml document
+ *
+ * Copyright (C) 2007 Rob Taylor
+ *
+ * Licensed under the Open Software License version 1.1
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <assert.h>
+#include <libxml/parser.h>
+#include <libxml/tree.h>
+
+#include "parse.h"
+#include "scope.h"
+#include "symbol.h"
+
+xmlDocPtr doc = NULL;       /* document pointer */
+xmlNodePtr root_node = NULL;/* root node pointer */
+xmlDtdPtr dtd = NULL;       /* DTD pointer */
+xmlNsPtr ns = NULL;         /* namespace pointer */
+int idcount = 0;
+
+static struct symbol_list *taglist = NULL;
+
+static void examine_symbol(struct symbol *sym, xmlNodePtr node);
+
+static xmlAttrPtr newNumProp(xmlNodePtr node, const xmlChar * name, int value)
+{
+	char buf[256];
+	snprintf(buf, 256, "%d", value);
+	return xmlNewProp(node, name, buf);
+}
+
+static xmlAttrPtr newIdProp(xmlNodePtr node, const xmlChar * name, unsigned int id)
+{
+	char buf[256];
+	snprintf(buf, 256, "_%d", id);
+	return xmlNewProp(node, name, buf);
+}
+
+static xmlNodePtr new_sym_node(struct symbol *sym, const char *name, xmlNodePtr parent)
+{
+	xmlNodePtr node;
+	const char *ident = show_ident(sym->ident);
+
+	assert(name != NULL);
+	assert(sym != NULL);
+	assert(parent != NULL);
+
+	node = xmlNewChild(parent, NULL, "symbol", NULL);
+
+	xmlNewProp(node, "type",  name);
+
+	newIdProp(node, "id", idcount);
+
+	if (sym->ident && ident)
+		xmlNewProp(node, "ident", ident);
+	xmlNewProp(node, "file", stream_name(sym->pos.stream));
+
+	newNumProp(node, "start-line", sym->pos.line);
+	newNumProp(node, "start-col", sym->pos.pos);
+
+	if (sym->endpos.type) {
+		newNumProp(node, "end-line", sym->endpos.line);
+		newNumProp(node, "end-col", sym->endpos.pos);
+		if (sym->pos.stream != sym->endpos.stream)
+			xmlNewProp(node, "end-file", stream_name(sym->endpos.stream));
+        }
+	sym->aux = node;
+
+	idcount++;
+
+	return node;
+}
+
+static inline void examine_members(struct symbol_list *list, xmlNodePtr node)
+{
+	struct symbol *sym;
+	xmlNodePtr child;
+	char buf[256];
+
+	FOR_EACH_PTR(list, sym) {
+		examine_symbol(sym, node);
+	} END_FOR_EACH_PTR(sym);
+}
+
+static void examine_modifiers(struct symbol *sym, xmlNodePtr node)
+{
+	const char *modifiers[] = {
+			"auto",
+			"register",
+			"static",
+			"extern",
+			"const",
+			"volatile",
+			"signed",
+			"unsigned",
+			"char",
+			"short",
+			"long",
+			"long-long",
+			"typedef",
+			NULL,
+			NULL,
+			NULL,
+			NULL,
+			NULL,
+			"inline",
+			"addressable",
+			"nocast",
+			"noderef",
+			"accessed",
+			"toplevel",
+			"label",
+			"assigned",
+			"type-type",
+			"safe",
+			"user-type",
+			"force",
+			"explicitly-signed",
+			"bitwise"};
+
+	int i;
+
+	if (sym->namespace != NS_SYMBOL)
+		return;
+
+	/*iterate over the 32 bit bitfield*/
+	for (i=0; i < 32; i++) {
+		if ((sym->ctype.modifiers & 1<<i) && modifiers[i])
+			xmlNewProp(node, modifiers[i], "1");
+	}
+}
+
+static void
+examine_layout(struct symbol *sym, xmlNodePtr node)
+{
+	char buf[256];
+
+	examine_symbol_type(sym);
+
+	newNumProp(node, "bit-size", sym->bit_size);
+	newNumProp(node, "alignment", sym->ctype.alignment);
+	newNumProp(node, "offset", sym->offset);
+	if (is_bitfield_type(sym)) {
+		newNumProp(node, "bit-offset", sym->bit_offset);
+	}
+}
+
+static void examine_symbol(struct symbol *sym, xmlNodePtr node)
+{
+	xmlNodePtr child = NULL;
+	const char *base;
+	int array_size;
+	char buf[256];
+
+	if (!sym)
+		return;
+	if (sym->aux)		/*already visited */
+		return;
+
+	if (sym->ident && sym->ident->reserved)
+		return;
+
+	child = new_sym_node(sym, get_type_name(sym->type), node);
+	examine_modifiers(sym, child);
+	examine_layout(sym, child);
+
+	if (sym->ctype.base_type) {
+		if ((base = builtin_typename(sym->ctype.base_type)) == NULL) {
+			if (!sym->ctype.base_type->aux) {
+				examine_symbol(sym->ctype.base_type, root_node);
+			}
+			xmlNewProp(child, "base-type", 
+				xmlGetProp((xmlNodePtr)sym->ctype.base_type->aux, "id"));
+		} else {
+			xmlNewProp(child, "base-type-builtin", base);
+		}
+	}
+	if (sym->array_size) {
+		/* TODO: modify get_expression_value to give error return */
+		array_size = get_expression_value(sym->array_size);
+		newNumProp(child, "array-size", array_size);
+	}
+
+
+	switch (sym->type) {
+	case SYM_STRUCT:
+	case SYM_UNION:
+		examine_members(sym->symbol_list, child);
+		break;
+	case SYM_FN:
+		examine_members(sym->arguments, child);
+		break;
+	case SYM_UNINITIALIZED:
+		xmlNewProp(child, "base-type-builtin", builtin_typename(sym));
+		break;
+	}
+	return;
+}
+
+static struct position *get_expansion_end (struct token *token)
+{
+	struct token *p1, *p2;
+
+	for (p1=NULL, p2=NULL;
+	     !eof_token(token);
+	     p2 = p1, p1 = token, token = token->next);
+
+	if (p2)
+		return &(p2->pos);
+	else
+		return NULL;
+}
+
+static void examine_macro(struct symbol *sym, xmlNodePtr node)
+{
+	xmlNodePtr child;
+	struct position *pos;
+	char buf[256];
+
+	/* this should probably go in the main codebase*/
+	pos = get_expansion_end(sym->expansion);
+	if (pos)
+		sym->endpos = *pos;
+	else
+		sym->endpos = sym->pos;
+
+	child = new_sym_node(sym, "macro", node);
+}
+
+static void examine_namespace(struct symbol *sym)
+{
+	xmlChar *namespace_type = NULL;
+
+	if (sym->ident && sym->ident->reserved)
+		return;
+
+	switch(sym->namespace) {
+	case NS_MACRO:
+		examine_macro(sym, root_node);
+		break;
+	case NS_TYPEDEF:
+	case NS_STRUCT:
+	case NS_SYMBOL:
+		examine_symbol(sym, root_node);
+		break;
+	case NS_NONE:
+	case NS_LABEL:
+	case NS_ITERATOR:
+	case NS_UNDEF:
+	case NS_PREPROCESSOR:
+	case NS_KEYWORD:
+		break;
+	default:
+		die("Unrecognised namespace type %d",sym->namespace);
+	}
+
+}
+
+static int get_stream_id (const char *name)
+{
+	int i;
+	for (i=0; i<input_stream_nr; i++) {
+		if (strcmp(name, stream_name(i))==0)
+			return i;
+	}
+	return -1;
+}
+
+static inline void examine_symbol_list(const char *file, struct symbol_list *list)
+{
+	struct symbol *sym;
+	int stream_id = get_stream_id (file);
+
+	if (!list)
+		return;
+	FOR_EACH_PTR(list, sym) {
+		if (sym->pos.stream == stream_id)
+			examine_namespace(sym);
+	} END_FOR_EACH_PTR(sym);
+}
+
+int main(int argc, char **argv)
+{
+	struct string_list *filelist = NULL;
+	struct symbol_list *symlist = NULL;
+	char *file;
+
+	doc = xmlNewDoc("1.0");
+	root_node = xmlNewNode(NULL, "parse");
+	xmlDocSetRootElement(doc, root_node);
+
+/* - A DTD is probably unnecessary for something like this
+ 
+	dtd = xmlCreateIntSubset(doc, "parse", "http://www.kernel.org/pub/software/devel/sparse/parse.dtd"; NULL, "parse.dtd");
+
+	ns = xmlNewNs (root_node, "http://www.kernel.org/pub/software/devel/sparse/parse.dtd";, NULL);
+
+	xmlSetNs(root_node, ns);
+*/
+	symlist = sparse_initialize(argc, argv, &filelist);
+
+	FOR_EACH_PTR_NOTAG(filelist, file) {
+		examine_symbol_list(file, symlist);
+		sparse_keep_tokens(file);
+		examine_symbol_list(file, file_scope->symbols);
+		examine_symbol_list(file, global_scope->symbols);
+	} END_FOR_EACH_PTR_NOTAG(file);
+
+
+	xmlSaveFormatFileEnc("-", doc, "UTF-8", 1);
+	xmlFreeDoc(doc);
+	xmlCleanupParser();
+
+	return 0;
+}
+
diff --git a/parse.dtd b/parse.dtd
new file mode 100644
index 0000000..0cbd1b4
--- /dev/null
+++ b/parse.dtd
@@ -0,0 +1,48 @@
+<!ELEMENT parse (symbol+) >
+
+<!ELEMENT symbol (symbol*) >
+
+<!ATTLIST symbol type (uninitialized|preprocessor|basetype|node|pointer|function|array|struct|union|enum|typedef|typeof|member|bitfield|label|restrict|fouled|keyword|bad) #REQUIRED
+                 id ID #REQUIRED
+		 file CDATA #REQUIRED
+		 start CDATA #REQUIRED
+		 end CDATA #IMPLIED
+
+		 ident CDATA #IMPLIED
+		 base-type IDREF #IMPLIED
+		 base-type-builtin (char|signed char|unsigned char|short|signed short|unsigned short|int|signed int|unsigned int|signed long|long|unsigned long|long long|signed long long|unsigned long long|void|bool|string|float|double|long double|incomplete type|abstract int|abstract fp|label type|bad type) #IMPLIED
+
+		 array-size CDATA #IMPLIED
+
+		 bit-size CDATA #IMPLIED
+		 alignment CDATA #IMPLIED
+		 offset CDATA #IMPLIED
+		 bit-offset CDATA #IMPLIED
+
+		 auto (0|1) #IMPLIED
+		 register (0|1) #IMPLIED
+		 static (0|1) #IMPLIED
+		 extern (0|1) #IMPLIED
+		 const (0|1) #IMPLIED
+		 volatile (0|1) #IMPLIED
+		 signed (0|1) #IMPLIED
+		 unsigned (0|1) #IMPLIED
+		 char (0|1) #IMPLIED
+		 short (0|1) #IMPLIED
+		 long (0|1) #IMPLIED
+		 long-long (0|1) #IMPLIED
+		 typedef (0|1) #IMPLIED
+		 inline (0|1) #IMPLIED
+		 addressable (0|1) #IMPLIED
+		 nocast (0|1) #IMPLIED
+		 noderef (0|1) #IMPLIED
+		 accessed (0|1) #IMPLIED
+		 toplevel (0|1) #IMPLIED
+		 label (0|1) #IMPLIED
+		 assigned (0|1) #IMPLIED
+		 type-type (0|1) #IMPLIED
+		 safe (0|1) #IMPLIED
+		 usertype (0|1) #IMPLIED
+		 force (0|1) #IMPLIED
+		 explicitly-signed (0|1) #IMPLIED
+		 bitwise (0|1) #IMPLIED >
-- 
1.5.2-rc3.GIT


--------------040700010305000000020101--
-
To unsubscribe from this list: send the line "unsubscribe linux-sparse" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Newbies FAQ]     [LKML]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Trinity Fuzzer Tool]

  Powered by Linux