Hi David, David Barr writes: > Use the new cat-blob command for fast-import to extract > blobs so that text-deltas may be applied. I like this straightforward approach, and I like the name 'cat-blob'. > The backchannel should only need to be configured when > parsing v3 svn dump streams. Maybe get the synopsis to say this as well? > Based-on-patch-by: Ramkumar Ramachandra <artagnon@xxxxxxxxx> > Based-on-patch-by: Jonathan Nieder <jrnieder@xxxxxxxxx> > Tested-by: David Barr <david.barr@xxxxxxxxxxxx> > Signed-off-by: David Barr <david.barr@xxxxxxxxxxxx> > --- > contrib/svn-fe/svn-fe.txt | 6 +++- > t/t9010-svn-fe.sh | 6 ++-- > vcs-svn/fast_export.c | 86 +++++++++++++++++++++++++++++++++++++++++++- > 3 files changed, 92 insertions(+), 6 deletions(-) > > diff --git a/contrib/svn-fe/svn-fe.txt b/contrib/svn-fe/svn-fe.txt > index 35f84bd..39ffa07 100644 > --- a/contrib/svn-fe/svn-fe.txt > +++ b/contrib/svn-fe/svn-fe.txt > @@ -7,7 +7,11 @@ svn-fe - convert an SVN "dumpfile" to a fast-import stream > > SYNOPSIS > -------- > -svnadmin dump --incremental REPO | svn-fe [url] | git fast-import > +[verse] > +mkfifo backchannel && > +svnadmin dump --incremental REPO | > + svn-fe [url] 3<backchannel | > + git fast-import --cat-blob-fd=3 3>backchannel See above. > DESCRIPTION > ----------- > diff --git a/t/t9010-svn-fe.sh b/t/t9010-svn-fe.sh > index de976ed..d750c7a 100755 > --- a/t/t9010-svn-fe.sh > +++ b/t/t9010-svn-fe.sh > @@ -34,10 +34,10 @@ test_dump () { > svnadmin load "$label-svn" < "$TEST_DIRECTORY/$dump" && > svn_cmd export "file://$PWD/$label-svn" "$label-svnco" && > git init "$label-git" && > - test-svn-fe "$TEST_DIRECTORY/$dump" >"$label.fe" && > ( > - cd "$label-git" && > - git fast-import < ../"$label.fe" > + cd "$label-git" && mkfifo backchannel && \ > + test-svn-fe "$TEST_DIRECTORY/$dump" 3< backchannel | \ > + git fast-import --cat-blob-fd=3 3> backchannel > ) && > ( > cd "$label-svnco" && Ok. > diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c > index b017dfb..812563d 100644 > --- a/vcs-svn/fast_export.c > +++ b/vcs-svn/fast_export.c > @@ -8,10 +8,17 @@ > #include "line_buffer.h" > #include "repo_tree.h" > #include "string_pool.h" > +#include "svndiff.h" > > #define MAX_GITSVN_LINE_LEN 4096 > +#define REPORT_FILENO 3 > + > +#define SHA1_HEX_LENGTH 40 > > static uint32_t first_commit_done; > +static struct line_buffer preimage = LINE_BUFFER_INIT; > +static struct line_buffer postimage = LINE_BUFFER_INIT; > +static struct line_buffer backchannel = LINE_BUFFER_INIT; Elegant :) > void fast_export_delete(uint32_t depth, uint32_t *path) > { > @@ -63,16 +70,91 @@ void fast_export_commit(uint32_t revision, uint32_t author, char *log, > printf("progress Imported commit %"PRIu32".\n\n", revision); > } > > +static int fast_export_save_blob(FILE *out) > +{ > + size_t len; > + char *header; > + char *end; > + char *tail; > + > + if (!backchannel.infile) > + backchannel.infile = fdopen(REPORT_FILENO, "r"); > + if (!backchannel.infile) > + return error("Could not open backchannel fd: %d", REPORT_FILENO); REPORT_FILENO = 3 is hard-coded. Is this intended? Maybe a command-line option to specify the fd? > + header = buffer_read_line(&backchannel); > + if (header == NULL) > + return 1; Note to self: This prints the error "Failed to retrieve blob for delta application" in the caller. > + end = strchr(header, '\0'); > + if (end - header > 7 && !strcmp(end - 7, "missing")) > + return error("cat-blob reports missing blob: %s", header); > + if (end - header < SHA1_HEX_LENGTH) > + return error("cat-blob header too short for SHA1: %s", header); > + if (strncmp(header + SHA1_HEX_LENGTH, " blob ", 6)) > + return error("cat-blob header has wrong object type: %s", header); > + len = strtoumax(header + SHA1_HEX_LENGTH + 6, &end, 10); > + if (end == header + SHA1_HEX_LENGTH + 6) > + return error("cat-blob header did not contain length: %s", header); > + if (*end) > + return error("cat-blob header contained garbage after length: %s", header); > + buffer_copy_bytes(&backchannel, out, len); > + tail = buffer_read_line(&backchannel); > + if (!tail) > + return 1; Could you clarify when exactly will this happen? > + if (*tail) > + return error("cat-blob trailing line contained garbage: %s", tail); > + return 0; > +} > + > void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len, > uint32_t delta, uint32_t srcMark, uint32_t srcMode, > struct line_buffer *input) > { Note to reviewers: The function looks like this in `master`: void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len) New parameters intrduced in the svn-fe3 series: srcMark, srcMode, delta, input. > + long preimage_len = 0; > + > + if (delta) { > + if (!preimage.infile) > + preimage.infile = tmpfile(); Didn't you later decide against this and use one tmpfile instead? In this case, the temporary file will be automatically deleted when `preimage.infile` goes out of scope. > + if (!preimage.infile) > + die("Unable to open temp file for blob retrieval"); > + if (srcMark) { > + printf("cat-blob :%"PRIu32"\n", srcMark); > + fflush(stdout); > + if (srcMode == REPO_MODE_LNK) > + fwrite("link ", 1, 5, preimage.infile); Special handling for symbolic links. Perhaps you should mention it in a comment here? > + if (fast_export_save_blob(preimage.infile)) > + die("Failed to retrieve blob for delta application"); > + } > + preimage_len = ftell(preimage.infile); > + fseek(preimage.infile, 0, SEEK_SET); > + if (!postimage.infile) > + postimage.infile = tmpfile(); One tmpfile? > + if (!postimage.infile) > + die("Unable to open temp file for blob application"); > + svndiff0_apply(input, len, &preimage, postimage.infile); > + len = ftell(postimage.infile); Since you already have a preimage_len, perhaps name this postimage_len to avoid confusion? > + fseek(postimage.infile, 0, SEEK_SET); > + } > + > if (mode == REPO_MODE_LNK) { > /* svn symlink blobs start with "link " */ > - buffer_skip_bytes(input, 5); > + if (delta) > + buffer_skip_bytes(&postimage, 5); > + else > + buffer_skip_bytes(input, 5); > len -= 5; > } > printf("blob\nmark :%"PRIu32"\ndata %"PRIu32"\n", mark, len); > - buffer_copy_bytes(input, stdout, len); > + if (!delta) > + buffer_copy_bytes(input, stdout, len); > + else > + buffer_copy_bytes(&postimage, stdout, len); > fputc('\n', stdout); I should have asked this a long time ago: why the extra newline? > + > + if (preimage.infile) { > + fseek(preimage.infile, 0, SEEK_SET); > + } > + > + if (postimage.infile) { > + fseek(postimage.infile, 0, SEEK_SET); > + } Style nits: The extra braces around the `if` statement are unnecessary. Overall, pleasant read. Thanks for taking this forward. -- Ram -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html