[WIP PATCH] vcs-svn: Add an svndiff0 parser

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Add a parser to apply one window from an svndiff version 0 stream to a
given source. svndiff version 0 is used by dumpfile version 3; this is
one step towards getting svn-fe to parse the output produced by
svnrdump. The svndiff format is described in $SVN_TRUNK/notes/svndiff.

Signed-off-by: Ramkumar Ramachandra <artagnon@xxxxxxxxx>
---
 vcs-svn/Makefile      |    2 +
 vcs-svn/line_buffer.c |    7 +-
 vcs-svn/line_buffer.h |    2 +-
 vcs-svn/svndiff.c     |  240 +++++++++++++++++++++++++++++++++++++++++++++++++
 vcs-svn/svndiff.h     |   41 +++++++++
 5 files changed, 288 insertions(+), 4 deletions(-)
 create mode 100644 vcs-svn/Makefile
 create mode 100644 vcs-svn/svndiff.c
 create mode 100644 vcs-svn/svndiff.h

diff --git a/vcs-svn/Makefile b/vcs-svn/Makefile
new file mode 100644
index 0000000..62cf093
--- /dev/null
+++ b/vcs-svn/Makefile
@@ -0,0 +1,2 @@
+default: svndiff.c line_buffer.c
+	$(CC) -I../ -g3 -O0 -o svndiff svndiff.c line_buffer.c
diff --git a/vcs-svn/line_buffer.c b/vcs-svn/line_buffer.c
index 1543567..17618c0 100644
--- a/vcs-svn/line_buffer.c
+++ b/vcs-svn/line_buffer.c
@@ -66,15 +66,16 @@ char *buffer_read_string(uint32_t len)
 	return ferror(infile) ? NULL : s;
 }
 
-void buffer_copy_bytes(uint32_t len)
+void buffer_copy_bytes(uint32_t len, FILE *dst)
 {
 	uint32_t in;
+	dst = dst ? dst : stdout;
 	while (len > 0 && !feof(infile) && !ferror(infile)) {
 		in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN;
 		in = fread(byte_buffer, 1, in, infile);
 		len -= in;
-		fwrite(byte_buffer, 1, in, stdout);
-		if (ferror(stdout)) {
+		fwrite(byte_buffer, 1, in, dst);
+		if (ferror(dst)) {
 			buffer_skip_bytes(len);
 			return;
 		}
diff --git a/vcs-svn/line_buffer.h b/vcs-svn/line_buffer.h
index 9c78ae1..fe67a22 100644
--- a/vcs-svn/line_buffer.h
+++ b/vcs-svn/line_buffer.h
@@ -5,7 +5,7 @@ int buffer_init(const char *filename);
 int buffer_deinit(void);
 char *buffer_read_line(void);
 char *buffer_read_string(uint32_t len);
-void buffer_copy_bytes(uint32_t len);
+void buffer_copy_bytes(uint32_t len, FILE *dst);
 void buffer_skip_bytes(uint32_t len);
 void buffer_reset(void);
 
diff --git a/vcs-svn/svndiff.c b/vcs-svn/svndiff.c
new file mode 100644
index 0000000..2d66dce
--- /dev/null
+++ b/vcs-svn/svndiff.c
@@ -0,0 +1,241 @@
+#include "git-compat-util.h"
+#include "line_buffer.h"
+#include "svndiff.h"
+
+#define DEBUG 1
+
+#define SVN_DELTA_WINDOW_SIZE 102400
+#define MAX_ENCODED_INT_LEN 10
+#define MAX_INSTRUCTION_LEN (2*MAX_ENCODED_INT_LEN+1)
+#define MAX_INSTRUCTION_SECTION_LEN (SVN_DELTA_WINDOW_SIZE*MAX_INSTRUCTION_LEN)
+static char buf[SVN_DELTA_WINDOW_SIZE];
+
+/* Remove when linking to gitcore */
+void die(const char *err, ...)
+{
+	va_list params;
+	va_start(params, err);
+	vfprintf(stderr, err, params);
+	printf("\n");
+	va_end(params);
+	exit(128);
+}
+
+/* Return the number of bytes read */
+size_t read_one_size(size_t *size)
+{
+	unsigned char c;
+	size_t result, bsize;
+	result = 0;
+	bsize = 0;
+
+	while (1)
+	{
+		fread(&c, 1, 1, stdin);
+		result = (result << 7) | (c & 127);
+		bsize ++;
+		if (!(c & 128))
+			/* No continuation bit */
+			break;
+	}
+	*size = result;
+	return bsize;
+}
+
+/* Return the number of bytes read */
+size_t read_one_instruction(struct svndiff_instruction *op)
+{
+	unsigned char c;
+	size_t action, bsize;
+	bsize = 0;
+
+	/* Read the 1-byte instruction-selector */
+	fread(&c, 1, 1, stdin);
+	bsize ++;
+
+	/* Decode the instruction selector from the two higher order
+	   bits; the remaining 6 bits may contain the length */
+	action = (c >> 6) & 3;
+	if (action >= 3)
+		die("Invalid instruction %d", action);
+
+	op->action_code = (enum svndiff_action)(action);
+
+	/* Attempt to extract the length length from the remaining
+	   bits */
+	op->length = c & 63;
+	if (op->length == 0)
+	{
+		bsize += read_one_size(&(op->length));
+		if (op->length == 0)
+			die("Zero length instruction");
+	}
+	/* Offset is present if action is copyfrom_source or
+	   copyfrom_target */
+	if (action != copyfrom_new)
+		bsize += read_one_size(&(op->offset));
+	return bsize;
+}
+
+size_t read_instructions(struct svndiff_window *window, size_t *ninst)
+{
+	size_t tpos = 0, npos, bsize;
+	struct svndiff_instruction *op;
+	npos = 0;
+	bsize = 0;
+	*ninst = 0;
+	
+	while (bsize < window->ins_len)
+	{
+		++(*ninst);
+		window->ops = realloc(window->ops, (*ninst) * sizeof(*op));
+		op = window->ops + (*ninst) - 1;
+		bsize += read_one_instruction(op);
+
+		if (DEBUG)
+			fprintf(stderr, "Instruction: %d %d %d (%d)\n",
+				op->action_code, op->length, op->offset, bsize);
+
+		if (op == NULL)
+			die("Invalid diff stream: "
+			    "instruction %d cannot be decoded", *ninst);
+		else if (op->length == 0)
+			die("Invalid diff stream: "
+			    "instruction %d has length zero", *ninst);
+		else if (op->length > window->tview_len - tpos)
+			die("Invalid diff stream: "
+			    "instruction %d overflows the target view", *ninst);
+
+		switch (op->action_code)
+		{
+		case copyfrom_source:
+			if (op->length > window->sview_len - op->offset ||
+			    op->offset > window->sview_len)
+				die("Invalid diff stream: "
+				    "[src] instruction %d overflows "
+				    " the source view", *ninst);
+			break;
+		case copyfrom_target:
+			if (op->offset >= tpos)
+				die("Invalid diff stream: "
+				    "[tgt] instruction %d starts "
+				    "beyond the target view position", *ninst);
+			break;
+		case copyfrom_new:
+			if (op->length > window->newdata_len - npos)
+				die("Invalid diff stream: "
+				    "[new] instruction %d overflows "
+				    "the new data section", *ninst);
+			npos += op->length;
+			break;
+		}
+		tpos += op->length;
+	}
+
+	if (tpos != window->tview_len)
+		die("Delta does not fill the target window");
+	if (npos != window->newdata_len)
+		die("Delta does not contain enough new data");
+	return bsize;
+}
+
+size_t read_window_header(struct svndiff_window *window)
+{
+	size_t bsize = 0;
+
+	/* Read five sizes; various offsets and lengths */
+	bsize += read_one_size(&(window->sview_offset));
+	bsize += read_one_size(&(window->sview_len));
+	bsize += read_one_size(&(window->tview_len));
+	bsize += read_one_size(&(window->ins_len));
+	bsize += read_one_size(&(window->newdata_len));
+
+	if (window->tview_len > SVN_DELTA_WINDOW_SIZE ||
+	    window->sview_len > SVN_DELTA_WINDOW_SIZE ||
+	    window->newdata_len > SVN_DELTA_WINDOW_SIZE + MAX_ENCODED_INT_LEN ||
+	    window->ins_len > MAX_INSTRUCTION_SECTION_LEN)
+		die("Svndiff contains a window that's too large");
+
+	/* Check for integer overflow */
+	if (window->ins_len + window->newdata_len < window->ins_len
+	    || window->sview_len + window->tview_len < window->sview_len
+	    || window->sview_offset + window->sview_len < window->sview_offset)
+		die("Svndiff contains corrupt window header");
+
+	if (DEBUG)
+		fprintf(stderr, "Window header: %d %d %d %d %d\n",
+			window->sview_offset, window->sview_len,
+			window->tview_len, window->ins_len, window->newdata_len);
+	return bsize;
+}
+
+void drive_window(struct svndiff_window *window, FILE *src_fd)
+{
+	struct svndiff_instruction *op;
+	size_t ninst;
+	FILE *target_fd;
+	long target_fd_end;
+
+	/* Populate the first five fields of the the window object
+	   with data from the stream */	
+	read_window_header(window);
+
+	/* Read instructions of length ins_len into window->ops
+	   performing memory allocations as necessary */
+	read_instructions(window, &ninst);
+
+	/* The Applier */
+	/* We're now looking at new_data; read ahead only in the
+	   copyfrom_new case */	
+	target_fd = tmpfile();
+	for (op = window->ops; ninst-- > 0; op++) {
+		switch (op->action_code) {
+		case copyfrom_source:
+			fseek(src_fd, op->offset, SEEK_SET);
+			fread(buf, op->length, 1, src_fd);
+			fwrite(buf, op->length, 1, target_fd);
+			break;
+		case copyfrom_target:
+			fseek(target_fd, op->offset, SEEK_SET);
+			fread(buf, op->length, 1, target_fd);
+			fseek(target_fd, 0, SEEK_END);
+			fwrite(buf, op->length, 1, target_fd);
+			break;
+		case copyfrom_new:
+			fseek(target_fd, 0, SEEK_END);
+			buffer_copy_bytes(op->length, target_fd);
+			break;
+		}
+	}
+	free(window->ops);
+	target_fd_end = ftell(target_fd);
+	fseek(target_fd, 0, SEEK_SET);
+	fread(buf, target_fd_end, 1, target_fd);
+	fwrite(buf, target_fd_end, 1, stdout);
+	fclose (target_fd);
+}
+
+int main(int argc, char **argv)
+{
+	int version;
+	struct svndiff_window *window;
+	FILE *src_fd;
+	buffer_init(NULL);
+
+	/* Read off the 4-byte header: "SVN\0" */
+	fread(&buf, 4, 1, stdin);
+	version = atoi(buf + 3);
+	if (version != 0)
+		die("Version %d unsupported", version);
+
+	/* Setup the source to apply windows to */
+	src_fd = fopen(argv[1], "r");
+
+	/* Read and drive the first window */
+	window = malloc(sizeof(*window));
+	drive_window(window, src_fd);
+	free(window);
+
+	buffer_deinit();
+	return 0;
+}
diff --git a/vcs-svn/svndiff.h b/vcs-svn/svndiff.h
new file mode 100644
index 0000000..6d0607f
--- /dev/null
+++ b/vcs-svn/svndiff.h
@@ -0,0 +1,41 @@
+enum svndiff_action {
+    copyfrom_source,
+    copyfrom_target,
+    copyfrom_new
+};
+
+struct svndiff_instruction
+{
+  enum svndiff_action action_code;
+  size_t offset;
+  size_t length;
+};
+
+/* An svndiff_window object describes how to reconstruct a
+ * contiguous section of the target string (the "target view") using a
+ * specified contiguous region of the source string (the "source
+ * view").  It contains a series of instructions which assemble the
+ * new target string text by pulling together substrings from:
+ *
+ *   - the source view,
+ *
+ *   - the previously constructed portion of the target view,
+ *
+ *   - a string of new data contained within the window structure
+ *
+ * The source view must always slide forward from one window to the
+ * next; that is, neither the beginning nor the end of the source view
+ * may move to the left as we read from a window stream.  This
+ * property allows us to apply deltas to non-seekable source streams
+ * without making a full copy of the source stream.
+ */
+struct svndiff_window
+{
+  size_t sview_offset;
+  size_t sview_len;
+  size_t tview_len;
+  size_t ins_len;
+  size_t newdata_len;
+  struct svndiff_instruction *ops;
+  char *newdata;
+};
-- 
1.7.2.2.409.gdbb11.dirty

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]