Ramkumar Ramachandra <artagnon@xxxxxxxxx> writes: > Start off with some broad design sketches. Compile succeeds, but > parser is incorrect. Include a Makefile rule to build it into > vcs-svn/lib.a. > > Signed-off-by: Ramkumar Ramachandra <artagnon@xxxxxxxxx> I initially thought I should refrain from doing a usual picky review for contrib/ material, but realized this does not go to contrib/, so... > diff --git a/vcs-svn/dump_export.c b/vcs-svn/dump_export.c > new file mode 100644 > index 0000000..04ede06 > --- /dev/null > +++ b/vcs-svn/dump_export.c > @@ -0,0 +1,73 @@ > +/* > + * Licensed under a two-clause BSD-style license. > + * See LICENSE for details. > + */ > + > +#include "git-compat-util.h" > +#include "strbuf.h" > +#include "line_buffer.h" > +#include "dump_export.h" > + > +void dump_export_begin_rev(int revision, const char *revprops, > + int prop_len) { Style. "{" at the beginning of the next line (same everywhere). > +void dump_export_node(const char *path, enum node_kind kind, > + enum node_action action, unsigned long text_len, > + unsigned long copyfrom_rev, const char *copyfrom_path) { > + printf("Node-path: %s\n", path); > + printf("Node-kind: "); > + switch (action) { > + case NODE_KIND_NORMAL: > + printf("file\n"); > + break; > + case NODE_KIND_EXECUTABLE: > + printf("file\n"); > + break; > + case NODE_KIND_SYMLINK: > + printf("file\n"); > + break; > + case NODE_KIND_GITLINK: > + printf("file\n"); > + break; > + case NODE_KIND_SUBDIR: > + die("Unsupported: subdirectory"); > + default: > + break; > + } Hmph, is it expected that the repertoire of node-kind stay the same, or will it be extended over time? It might make sense to change the above switch a more table-driven one. The same for node-action that follows this part of the code. > diff --git a/vcs-svn/svnload.c b/vcs-svn/svnload.c > new file mode 100644 > index 0000000..7043ae7 > --- /dev/null > +++ b/vcs-svn/svnload.c > @@ -0,0 +1,294 @@ > ... > +#define SVN_DATE_FORMAT "%Y-%m-%dT%H:%M:%S.000000Z" > +#define SVN_DATE_LEN 28 > +#define LENGTH_UNKNOWN (~0) > + > +static struct line_buffer input = LINE_BUFFER_INIT; > +static struct strbuf blobs[100]; > + Is there a rationale for "100", or is it just a random number that can be tweakable at the source level? Would we want to have a symbolic constant for it? The "blank" line has a trailing whitespace. > +static struct { > + unsigned long prop_len, text_len, copyfrom_rev, mark; > + int text_delta, prop_delta; /* Boolean */ > + enum node_action action; > + enum node_kind kind; > + struct strbuf copyfrom_path, path; > +} node_ctx; > + ... > +static void reset_node_ctx(void) > +{ > + node_ctx.prop_len = LENGTH_UNKNOWN; > + node_ctx.text_len = LENGTH_UNKNOWN; > + node_ctx.mark = 0; > + node_ctx.copyfrom_rev = 0; > + node_ctx.text_delta = -1; > + node_ctx.prop_delta = -1; Does your "Boolean" type take values 0 or -1? Or is it perhaps a tristate false=0, true=1, unknown=-1? If so, the comment on the type above needs to be fixed. > + strbuf_reset(&node_ctx.copyfrom_path); > + strbuf_reset(&node_ctx.path); > +} > + > +static void populate_props(struct strbuf *props, const char *author, > + const char *log, const char *date) { Style on "{" (look everywhere). > + strbuf_reset(props); Trailing whitespace. > +static void parse_author_line(char *val, struct strbuf *name, > + struct strbuf *email, struct strbuf *date) { > + char *t, *tz_off; > + char time_buf[SVN_DATE_LEN]; > + const struct tm *tm_time; > + > + /* Simon Hausmann <shausman@xxxxxxxxxxxxx> 1170199019 +0100 */ > + strbuf_reset(name); > + strbuf_reset(email); > + strbuf_reset(date); > + tz_off = strrchr(val, ' '); > + *tz_off++ = '\0'; > + t = strrchr(val, ' '); > + *(t - 1) = '\0'; /* Ignore '>' from email */ > + t ++; Unwanted SP before "++" (look everywhere). > + tm_time = time_to_tm(strtoul(t, NULL, 10), atoi(tz_off)); Has your caller already verified tz_off is a reasonable integer? > +void svnload_read(void) { > ... > + switch (val - t - 1) { > + case 1: > + if (!memcmp(t, "D", 1)) { > + node_ctx.action = NODE_ACTION_DELETE; > + } > + else if (!memcmp(t, "C", 1)) { Style: "} else if (...) {". > + node_ctx.action = NODE_ACTION_ADD; > + } > + else if (!memcmp(t, "R", 1)) { > + node_ctx.action = NODE_ACTION_REPLACE; > + } > + else if (!memcmp(t, "M", 1)) { > + node_ctx.action = NODE_ACTION_CHANGE; > + mode_incr = 7; > + if (!memcmp(val, "100644", 6)) > + node_ctx.kind = NODE_KIND_NORMAL; > + else if (!memcmp(val, "100755", 6)) > + node_ctx.kind = NODE_KIND_EXECUTABLE; > + else if (!memcmp(val, "120000", 6)) > + node_ctx.kind = NODE_KIND_SYMLINK; > + else if (!memcmp(val, "160000", 6)) > + node_ctx.kind = NODE_KIND_GITLINK; > + else if (!memcmp(val, "040000", 6)) Is <mode> guaranteed to be a 6-digit octal, padded with 0 to the left? The manual page of git-fast-import seems to hint that is the case, but I am double checking, as the leading zero is an error in the tree object. > + node_ctx.kind = NODE_KIND_SUBDIR; > + else { > + if (!memcmp(val, "755", 3)) > + node_ctx.kind = NODE_KIND_EXECUTABLE; > + else if(!memcmp(val, "644", 3)) Style; missing SP after "if/switch" (look everywhere). > + node_ctx.kind = NODE_KIND_NORMAL; > + else > + die("Unrecognized mode: %s", val); > + mode_incr = 4; > + } > + val += mode_incr; Hmm, this whole thing is ugly. Perhaps a table-lookup, or at least a helper function that translates "val" to node-kind (while advancing val as the side effect) may make it easier to read? > + t = strchr(val, ' '); > + *t++ = '\0'; > + strbuf_reset(&node_ctx.path); > + strbuf_add(&node_ctx.path, t, strlen(t)); > + if (!memcmp(val + 1, "inline", 6)) > + die("Unsupported dataref: inline"); > + else if (*val == ':') > + to_dump = &blobs[strtoul(val + 1, NULL, 10)]; Has anybody so far verified the decimal number at (val+1) is a reasonable value, or am I seeing a future CVE here? Do we care what follows the len of digits on this line? Does a line in G-F-I stream allow trailing garbage (this question is not limited to this part of the code)? > +int svnload_init(const char *filename) > +{ > + int i; > + if (buffer_init(&input, filename)) > + return error("cannot open %s: %s", filename, strerror(errno)); > + active_ctx = UNKNOWN_CTX; > + strbuf_init(&rev_ctx.props, MAX_GITSVN_LINE_LEN); > + ... > + strbuf_init(&node_ctx.copyfrom_path, MAX_GITSVN_LINE_LEN); > + for (i = 0; i < 100; i ++) > + strbuf_init(&blobs[i], 10000); > + return 0; > +} > + > +void svnload_deinit(void) > +{ > + int i; > + reset_rev_ctx(0); > + reset_node_ctx(); > + strbuf_release(&rev_ctx.props); > + ... > + strbuf_release(&node_ctx.copyfrom_path); > + for (i = 0; i < 100; i ++) > + strbuf_release(&blobs[i]); > + if (buffer_deinit(&input)) > + fprintf(stderr, "Input error\n"); > + if (ferror(stdout)) > + fprintf(stderr, "Output error\n"); > +} Whoever needs to add an element to rev_ctx must consistently touch reset, init and deinit. Would it help him/her if you moved the code so that these functions are close together from the beginning? -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html