repo_tree parses SVN revisions to build a Git objects, and use fast_export to emit them so they can be imported into the Git object store via a fast-import. Taken directly from David Michael Barr's svn-dump-fast-export repository. Signed-off-by: Ramkumar Ramachandra <artagnon@xxxxxxxxx> --- vcs-svn/fast_export.c | 61 +++++++++ vcs-svn/fast_export.h | 17 +++ vcs-svn/repo_tree.c | 333 +++++++++++++++++++++++++++++++++++++++++++++++++ vcs-svn/repo_tree.h | 31 +++++ 4 files changed, 442 insertions(+), 0 deletions(-) create mode 100644 vcs-svn/fast_export.c create mode 100644 vcs-svn/fast_export.h create mode 100644 vcs-svn/repo_tree.c create mode 100644 vcs-svn/repo_tree.h diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c new file mode 100644 index 0000000..f4d9ab7 --- /dev/null +++ b/vcs-svn/fast_export.c @@ -0,0 +1,61 @@ +#include <string.h> + +#include "fast_export.h" +#include "line_buffer.h" +#include "repo_tree.h" +#include "string_pool.h" + +#define MAX_GITSVN_LINE_LEN 4096 + +void fast_export_delete(uint32_t depth, uint32_t *path) +{ + putchar('D'); + putchar(' '); + pool_print_seq(depth, path, '/', stdout); + putchar('\n'); +} + +void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode, + uint32_t mark) +{ + printf("M %06o :%d ", mode, mark); + pool_print_seq(depth, path, '/', stdout); + putchar('\n'); +} + +static char gitsvnline[MAX_GITSVN_LINE_LEN]; +void fast_export_commit(uint32_t revision, char *author, char *log, + char *uuid, char *url, time_t timestamp) +{ + if (!author) + author = "nobody"; + if (!log) + log = ""; + if (uuid && url) { + snprintf(gitsvnline, MAX_GITSVN_LINE_LEN, "\n\ngit-svn-id: %s@%d %s\n", + url, revision, uuid); + } else { + *gitsvnline = '\0'; + } + printf("commit refs/heads/master\nmark :%d\n", revision); + printf("committer %s <%s@%s> %ld +0000\n", + author, author, uuid ? uuid : "local", timestamp); + printf("data %ld\n%s%s\n", + strlen(log) + strlen(gitsvnline), log, gitsvnline); + repo_diff(revision - 1, revision); + fputc('\n', stdout); + + printf("progress Imported commit %d.\n\n", revision); +} + +void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len) +{ + if (mode == REPO_MODE_LNK) { + /* svn symlink blobs start with "link " */ + buffer_skip_bytes(5); + len -= 5; + } + printf("blob\nmark :%d\ndata %d\n", mark, len); + buffer_copy_bytes(len); + fputc('\n', stdout); +} diff --git a/vcs-svn/fast_export.h b/vcs-svn/fast_export.h new file mode 100644 index 0000000..e84144e --- /dev/null +++ b/vcs-svn/fast_export.h @@ -0,0 +1,17 @@ +#ifndef FAST_EXPORT_H_ +#define FAST_EXPORT_H_ + +#include <stdint.h> +#include <time.h> + +void fast_export_delete(uint32_t depth, uint32_t *path); + +void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode, + uint32_t mark); + +void fast_export_commit(uint32_t revision, char *author, char *log, + char *uuid, char *url, time_t timestamp); + +void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len); + +#endif diff --git a/vcs-svn/repo_tree.c b/vcs-svn/repo_tree.c new file mode 100644 index 0000000..7c4a70f --- /dev/null +++ b/vcs-svn/repo_tree.c @@ -0,0 +1,333 @@ +#include <string.h> + +#include "string_pool.h" +#include "repo_tree.h" +#include "obj_pool.h" +#include "fast_export.h" + +typedef struct repo_dirent_s repo_dirent_t; + +struct repo_dirent_s { + uint32_t name_offset; + uint32_t mode; + uint32_t content_offset; +}; + +typedef struct repo_dir_s repo_dir_t; + +struct repo_dir_s { + uint32_t size; + uint32_t first_offset; +}; + +typedef struct repo_commit_s repo_commit_t; + +struct repo_commit_s { + uint32_t mark; + uint32_t root_dir_offset; +}; + +/* Generate memory pools for commit, dir and dirent */ +obj_pool_gen(commit, repo_commit_t, 4096); +obj_pool_gen(dir, repo_dir_t, 4096); +obj_pool_gen(dirent, repo_dirent_t, 4096); + +static uint32_t num_dirs_saved = 0; +static uint32_t num_dirents_saved = 0; +static uint32_t active_commit = -1; + +static repo_dir_t *repo_commit_root_dir(repo_commit_t *commit) +{ + return dir_pointer(commit->root_dir_offset); +} + +static repo_dirent_t *repo_first_dirent(repo_dir_t *dir) +{ + return dirent_pointer(dir->first_offset); +} + +static int repo_dirent_name_cmp(const void *a, const void *b) +{ + return (((repo_dirent_t *) a)->name_offset + > ((repo_dirent_t *) b)->name_offset) - + (((repo_dirent_t *) a)->name_offset + < ((repo_dirent_t *) b)->name_offset); +} + +static repo_dirent_t *repo_dirent_by_name(repo_dir_t *dir, + uint32_t name_offset) +{ + repo_dirent_t key; + if (dir == NULL || dir->size == 0) + return NULL; + key.name_offset = name_offset; + return bsearch(&key, repo_first_dirent(dir), dir->size, + sizeof(repo_dirent_t), repo_dirent_name_cmp); +} + +static int repo_dirent_is_dir(repo_dirent_t *dirent) +{ + return dirent != NULL && dirent->mode == REPO_MODE_DIR; +} + +static repo_dir_t *repo_dir_from_dirent(repo_dirent_t *dirent) +{ + if (!repo_dirent_is_dir(dirent)) + return NULL; + return dir_pointer(dirent->content_offset); +} + +static uint32_t dir_with_dirents_alloc(uint32_t size) +{ + uint32_t offset = dir_alloc(1); + dir_pointer(offset)->size = size; + dir_pointer(offset)->first_offset = dirent_alloc(size); + return offset; +} + +static repo_dir_t *repo_clone_dir(repo_dir_t *orig_dir, uint32_t padding) +{ + uint32_t orig_o, new_o, dirent_o; + orig_o = dir_offset(orig_dir); + if (orig_o < num_dirs_saved) { + new_o = dir_with_dirents_alloc(orig_dir->size + padding); + orig_dir = dir_pointer(orig_o); + dirent_o = dir_pointer(new_o)->first_offset; + } else { + if (padding == 0) + return orig_dir; + new_o = orig_o; + dirent_o = dirent_alloc(orig_dir->size + padding); + } + memcpy(dirent_pointer(dirent_o), repo_first_dirent(orig_dir), + orig_dir->size * sizeof(repo_dirent_t)); + dir_pointer(new_o)->size = orig_dir->size + padding; + dir_pointer(new_o)->first_offset = dirent_o; + return dir_pointer(new_o); +} + +static char repo_path_buffer[REPO_MAX_PATH_LEN]; +static repo_dirent_t *repo_read_dirent(uint32_t revision, char *path) +{ + char *ctx = NULL; + uint32_t name = 0; + repo_dir_t *dir = NULL; + repo_dirent_t *dirent = NULL; + dir = repo_commit_root_dir(commit_pointer(revision)); + strncpy(repo_path_buffer, path, REPO_MAX_PATH_LEN); + repo_path_buffer[REPO_MAX_PATH_LEN - 1] = '\0'; + path = repo_path_buffer; + for (name = pool_tok_r(path, "/", &ctx); + ~name; name = pool_tok_r(NULL, "/", &ctx)) { + dirent = repo_dirent_by_name(dir, name); + if (dirent == NULL) { + return NULL; + } else if (repo_dirent_is_dir(dirent)) { + dir = repo_dir_from_dirent(dirent); + } else { + break; + } + } + return dirent; +} + +static void +repo_write_dirent(char *path, uint32_t mode, uint32_t content_offset, + uint32_t del) +{ + char *ctx; + uint32_t name, revision, dirent_o, dir_o, parent_dir_o; + repo_dir_t *dir; + repo_dirent_t *dirent = NULL; + revision = active_commit; + dir = repo_commit_root_dir(commit_pointer(revision)); + dir = repo_clone_dir(dir, 0); + commit_pointer(revision)->root_dir_offset = dir_offset(dir); + strncpy(repo_path_buffer, path, REPO_MAX_PATH_LEN); + repo_path_buffer[REPO_MAX_PATH_LEN - 1] = '\0'; + path = repo_path_buffer; + for (name = pool_tok_r(path, "/", &ctx); ~name; + name = pool_tok_r(NULL, "/", &ctx)) { + parent_dir_o = dir_offset(dir); + dirent = repo_dirent_by_name(dir, name); + if (dirent == NULL) { + dir = repo_clone_dir(dir, 1); + dirent = &repo_first_dirent(dir)[dir->size - 1]; + dirent->name_offset = name; + dirent->mode = REPO_MODE_DIR; + qsort(repo_first_dirent(dir), dir->size, + sizeof(repo_dirent_t), repo_dirent_name_cmp); + dirent = repo_dirent_by_name(dir, name); + dir_o = dir_with_dirents_alloc(0); + dirent->content_offset = dir_o; + dir = dir_pointer(dir_o); + } else if ((dir = repo_dir_from_dirent(dirent))) { + dirent_o = dirent_offset(dirent); + dir = repo_clone_dir(dir, 0); + if (dirent_o != ~0) + dirent_pointer(dirent_o)->content_offset = dir_offset(dir); + } else { + dirent->mode = REPO_MODE_DIR; + dirent_o = dirent_offset(dirent); + dir_o = dir_with_dirents_alloc(0); + dirent = dirent_pointer(dirent_o); + dir = dir_pointer(dir_o); + dirent->content_offset = dir_o; + } + } + if (dirent) { + dirent->mode = mode; + dirent->content_offset = content_offset; + if (del && ~parent_dir_o) { + dirent->name_offset = ~0; + dir = dir_pointer(parent_dir_o); + qsort(repo_first_dirent(dir), dir->size, + sizeof(repo_dirent_t), repo_dirent_name_cmp); + dir->size--; + } + } +} + +uint32_t repo_copy(uint32_t revision, char *src, char *dst) +{ + uint32_t mode = 0, content_offset = 0; + repo_dirent_t *src_dirent; + src_dirent = repo_read_dirent(revision, src); + if (src_dirent != NULL) { + mode = src_dirent->mode; + content_offset = src_dirent->content_offset; + repo_write_dirent(dst, mode, content_offset, 0); + } + return mode; +} + +void repo_add(char *path, uint32_t mode, uint32_t blob_mark) +{ + repo_write_dirent(path, mode, blob_mark, 0); +} + +uint32_t repo_replace(char *path, uint32_t blob_mark) +{ + uint32_t mode = 0; + repo_dirent_t *src_dirent; + src_dirent = repo_read_dirent(active_commit, path); + if (src_dirent != NULL) { + mode = src_dirent->mode; + repo_write_dirent(path, mode, blob_mark, 0); + } + return mode; +} + +void repo_modify(char *path, uint32_t mode, uint32_t blob_mark) +{ + repo_write_dirent(path, mode, blob_mark, 0); +} + +void repo_delete(char *path) +{ + repo_write_dirent(path, 0, 0, 1); +} + +static void +repo_git_add_r(uint32_t depth, uint32_t *path, repo_dir_t *dir); + +static void +repo_git_add(uint32_t depth, uint32_t *path, repo_dirent_t *dirent) +{ + if (repo_dirent_is_dir(dirent)) { + repo_git_add_r(depth, path, repo_dir_from_dirent(dirent)); + } else { + fast_export_modify(depth, path, dirent->mode, dirent->content_offset); + } +} + +static void +repo_git_add_r(uint32_t depth, uint32_t *path, repo_dir_t *dir) +{ + uint32_t o; + repo_dirent_t *de; + de = repo_first_dirent(dir); + for (o = 0; o < dir->size; o++) { + path[depth] = de[o].name_offset; + repo_git_add(depth + 1, path, &de[o]); + } +} + +static void +repo_diff_r(uint32_t depth, uint32_t *path, repo_dir_t *dir1, + repo_dir_t *dir2) +{ + repo_dirent_t *de1, *de2, *max_de1, *max_de2; + de1 = repo_first_dirent(dir1); + de2 = repo_first_dirent(dir2); + max_de1 = &de1[dir1->size]; + max_de2 = &de2[dir2->size]; + + while (de1 < max_de1 && de2 < max_de2) { + if (de1->name_offset < de2->name_offset) { + path[depth] = (de1++)->name_offset; + fast_export_delete(depth + 1, path); + } else if (de1->name_offset == de2->name_offset) { + path[depth] = de1->name_offset; + if (de1->content_offset != de2->content_offset) { + if (repo_dirent_is_dir(de1) && repo_dirent_is_dir(de2)) { + repo_diff_r(depth + 1, path, + repo_dir_from_dirent(de1), + repo_dir_from_dirent(de2)); + } else { + if (repo_dirent_is_dir(de1) != repo_dirent_is_dir(de2)) { + fast_export_delete(depth + 1, path); + } + repo_git_add(depth + 1, path, de2); + } + } + de1++; + de2++; + } else { + path[depth] = de2->name_offset; + repo_git_add(depth + 1, path, de2++); + } + } + while (de1 < max_de1) { + path[depth] = (de1++)->name_offset; + fast_export_delete(depth + 1, path); + } + while (de2 < max_de2) { + path[depth] = de2->name_offset; + repo_git_add(depth + 1, path, de2++); + } +} + +static uint32_t path_stack[1000]; +void repo_diff(uint32_t r1, uint32_t r2) +{ + repo_diff_r(0, + path_stack, + repo_commit_root_dir(commit_pointer(r1)), + repo_commit_root_dir(commit_pointer(r2))); +} + +void repo_commit(uint32_t revision, char *author, char *log, char *uuid, + char *url, time_t timestamp) +{ + if (revision == 0) { + active_commit = commit_alloc(1); + commit_pointer(active_commit)->root_dir_offset = + dir_with_dirents_alloc(0); + } else { + fast_export_commit(revision, author, log, uuid, url, timestamp); + } + num_dirs_saved = dir_pool.size; + num_dirents_saved = dirent_pool.size; + active_commit = commit_alloc(1); + commit_pointer(active_commit)->root_dir_offset = + commit_pointer(active_commit - 1)->root_dir_offset; +} + +void repo_reset(void) +{ + pool_reset(); + commit_reset(); + dir_reset(); + dirent_reset(); +} diff --git a/vcs-svn/repo_tree.h b/vcs-svn/repo_tree.h new file mode 100644 index 0000000..2d645dc --- /dev/null +++ b/vcs-svn/repo_tree.h @@ -0,0 +1,31 @@ +#ifndef REPO_TREE_H_ +#define REPO_TREE_H_ + +#include <stdint.h> +#include <time.h> + +#define REPO_MODE_DIR 0040000 +#define REPO_MODE_BLB 0100644 +#define REPO_MODE_EXE 0100755 +#define REPO_MODE_LNK 0120000 + +#define REPO_MAX_PATH_LEN 4096 + +uint32_t repo_copy(uint32_t revision, char *src, char *dst); + +void repo_add(char *path, uint32_t mode, uint32_t blob_mark); + +uint32_t repo_replace(char *path, uint32_t blob_mark); + +void repo_modify(char *path, uint32_t mode, uint32_t blob_mark); + +void repo_delete(char *path); + +void repo_commit(uint32_t revision, char *author, char *log, char *uuid, + char *url, time_t timestamp); + +void repo_diff(uint32_t r1, uint32_t r2); + +void repo_reset(void); + +#endif -- 1.7.1 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html