[PATCH 6/7] Add SVN revision parser and exporter

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



repo_tree parses SVN revisions to build a Git objects, and use
fast_export to emit them so they can be imported into the Git object
store via a fast-import. Taken directly from David Michael Barr's
svn-dump-fast-export repository.

Signed-off-by: Ramkumar Ramachandra <artagnon@xxxxxxxxx>
---
 vcs-svn/fast_export.c |   61 +++++++++
 vcs-svn/fast_export.h |   17 +++
 vcs-svn/repo_tree.c   |  333 +++++++++++++++++++++++++++++++++++++++++++++++++
 vcs-svn/repo_tree.h   |   31 +++++
 4 files changed, 442 insertions(+), 0 deletions(-)
 create mode 100644 vcs-svn/fast_export.c
 create mode 100644 vcs-svn/fast_export.h
 create mode 100644 vcs-svn/repo_tree.c
 create mode 100644 vcs-svn/repo_tree.h

diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c
new file mode 100644
index 0000000..f4d9ab7
--- /dev/null
+++ b/vcs-svn/fast_export.c
@@ -0,0 +1,61 @@
+#include <string.h>
+
+#include "fast_export.h"
+#include "line_buffer.h"
+#include "repo_tree.h"
+#include "string_pool.h"
+
+#define MAX_GITSVN_LINE_LEN 4096
+
+void fast_export_delete(uint32_t depth, uint32_t *path)
+{
+    putchar('D');
+    putchar(' ');
+    pool_print_seq(depth, path, '/', stdout);
+    putchar('\n');
+}
+
+void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode,
+                        uint32_t mark)
+{
+    printf("M %06o :%d ", mode, mark);
+    pool_print_seq(depth, path, '/', stdout);
+    putchar('\n');
+}
+
+static char gitsvnline[MAX_GITSVN_LINE_LEN];
+void fast_export_commit(uint32_t revision, char *author, char *log,
+                        char *uuid, char *url, time_t timestamp)
+{
+    if (!author)
+        author = "nobody";
+    if (!log)
+        log = "";
+    if (uuid && url) {
+        snprintf(gitsvnline, MAX_GITSVN_LINE_LEN, "\n\ngit-svn-id: %s@%d %s\n",
+             url, revision, uuid);
+    } else {
+        *gitsvnline = '\0';
+    }
+    printf("commit refs/heads/master\nmark :%d\n", revision);
+    printf("committer %s <%s@%s> %ld +0000\n",
+         author, author, uuid ? uuid : "local", timestamp);
+    printf("data %ld\n%s%s\n",
+           strlen(log) + strlen(gitsvnline), log, gitsvnline);
+    repo_diff(revision - 1, revision);
+    fputc('\n', stdout);
+
+    printf("progress Imported commit %d.\n\n", revision);
+}
+
+void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len)
+{
+    if (mode == REPO_MODE_LNK) {
+        /* svn symlink blobs start with "link " */
+        buffer_skip_bytes(5);
+        len -= 5;
+    }
+    printf("blob\nmark :%d\ndata %d\n", mark, len);
+    buffer_copy_bytes(len);
+    fputc('\n', stdout);
+}
diff --git a/vcs-svn/fast_export.h b/vcs-svn/fast_export.h
new file mode 100644
index 0000000..e84144e
--- /dev/null
+++ b/vcs-svn/fast_export.h
@@ -0,0 +1,17 @@
+#ifndef FAST_EXPORT_H_
+#define FAST_EXPORT_H_
+
+#include <stdint.h>
+#include <time.h>
+
+void fast_export_delete(uint32_t depth, uint32_t *path);
+
+void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode,
+                        uint32_t mark);
+
+void fast_export_commit(uint32_t revision, char *author, char *log,
+                        char *uuid, char *url, time_t timestamp);
+
+void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len);
+
+#endif
diff --git a/vcs-svn/repo_tree.c b/vcs-svn/repo_tree.c
new file mode 100644
index 0000000..7c4a70f
--- /dev/null
+++ b/vcs-svn/repo_tree.c
@@ -0,0 +1,333 @@
+#include <string.h>
+
+#include "string_pool.h"
+#include "repo_tree.h"
+#include "obj_pool.h"
+#include "fast_export.h"
+
+typedef struct repo_dirent_s repo_dirent_t;
+
+struct repo_dirent_s {
+    uint32_t name_offset;
+    uint32_t mode;
+    uint32_t content_offset;
+};
+
+typedef struct repo_dir_s repo_dir_t;
+
+struct repo_dir_s {
+    uint32_t size;
+    uint32_t first_offset;
+};
+
+typedef struct repo_commit_s repo_commit_t;
+
+struct repo_commit_s {
+    uint32_t mark;
+    uint32_t root_dir_offset;
+};
+
+/* Generate memory pools for commit, dir and dirent */
+obj_pool_gen(commit, repo_commit_t, 4096);
+obj_pool_gen(dir, repo_dir_t, 4096);
+obj_pool_gen(dirent, repo_dirent_t, 4096);
+
+static uint32_t num_dirs_saved = 0;
+static uint32_t num_dirents_saved = 0;
+static uint32_t active_commit = -1;
+
+static repo_dir_t *repo_commit_root_dir(repo_commit_t *commit)
+{
+    return dir_pointer(commit->root_dir_offset);
+}
+
+static repo_dirent_t *repo_first_dirent(repo_dir_t *dir)
+{
+    return dirent_pointer(dir->first_offset);
+}
+
+static int repo_dirent_name_cmp(const void *a, const void *b)
+{
+    return (((repo_dirent_t *) a)->name_offset
+            > ((repo_dirent_t *) b)->name_offset) -
+        (((repo_dirent_t *) a)->name_offset
+         < ((repo_dirent_t *) b)->name_offset);
+}
+
+static repo_dirent_t *repo_dirent_by_name(repo_dir_t *dir,
+                                          uint32_t name_offset)
+{
+    repo_dirent_t key;
+    if (dir == NULL || dir->size == 0)
+        return NULL;
+    key.name_offset = name_offset;
+    return bsearch(&key, repo_first_dirent(dir), dir->size,
+                   sizeof(repo_dirent_t), repo_dirent_name_cmp);
+}
+
+static int repo_dirent_is_dir(repo_dirent_t *dirent)
+{
+    return dirent != NULL && dirent->mode == REPO_MODE_DIR;
+}
+
+static repo_dir_t *repo_dir_from_dirent(repo_dirent_t *dirent)
+{
+    if (!repo_dirent_is_dir(dirent))
+        return NULL;
+    return dir_pointer(dirent->content_offset);
+}
+
+static uint32_t dir_with_dirents_alloc(uint32_t size)
+{
+    uint32_t offset = dir_alloc(1);
+    dir_pointer(offset)->size = size;
+    dir_pointer(offset)->first_offset = dirent_alloc(size);
+    return offset;
+}
+
+static repo_dir_t *repo_clone_dir(repo_dir_t *orig_dir, uint32_t padding)
+{
+    uint32_t orig_o, new_o, dirent_o;
+    orig_o = dir_offset(orig_dir);
+    if (orig_o < num_dirs_saved) {
+        new_o = dir_with_dirents_alloc(orig_dir->size + padding);
+        orig_dir = dir_pointer(orig_o);
+        dirent_o = dir_pointer(new_o)->first_offset;
+    } else {
+        if (padding == 0)
+            return orig_dir;
+        new_o = orig_o;
+        dirent_o = dirent_alloc(orig_dir->size + padding);
+    }
+    memcpy(dirent_pointer(dirent_o), repo_first_dirent(orig_dir),
+           orig_dir->size * sizeof(repo_dirent_t));
+    dir_pointer(new_o)->size = orig_dir->size + padding;
+    dir_pointer(new_o)->first_offset = dirent_o;
+    return dir_pointer(new_o);
+}
+
+static char repo_path_buffer[REPO_MAX_PATH_LEN];
+static repo_dirent_t *repo_read_dirent(uint32_t revision, char *path)
+{
+    char *ctx = NULL;
+    uint32_t name = 0;
+    repo_dir_t *dir = NULL;
+    repo_dirent_t *dirent = NULL;
+    dir = repo_commit_root_dir(commit_pointer(revision));
+    strncpy(repo_path_buffer, path, REPO_MAX_PATH_LEN);
+    repo_path_buffer[REPO_MAX_PATH_LEN - 1] = '\0';
+    path = repo_path_buffer;
+    for (name = pool_tok_r(path, "/", &ctx);
+         ~name; name = pool_tok_r(NULL, "/", &ctx)) {
+        dirent = repo_dirent_by_name(dir, name);
+        if (dirent == NULL) {
+            return NULL;
+        } else if (repo_dirent_is_dir(dirent)) {
+            dir = repo_dir_from_dirent(dirent);
+        } else {
+            break;
+        }
+    }
+    return dirent;
+}
+
+static void
+repo_write_dirent(char *path, uint32_t mode, uint32_t content_offset,
+                  uint32_t del)
+{
+    char *ctx;
+    uint32_t name, revision, dirent_o, dir_o, parent_dir_o;
+    repo_dir_t *dir;
+    repo_dirent_t *dirent = NULL;
+    revision = active_commit;
+    dir = repo_commit_root_dir(commit_pointer(revision));
+    dir = repo_clone_dir(dir, 0);
+    commit_pointer(revision)->root_dir_offset = dir_offset(dir);
+    strncpy(repo_path_buffer, path, REPO_MAX_PATH_LEN);
+    repo_path_buffer[REPO_MAX_PATH_LEN - 1] = '\0';
+    path = repo_path_buffer;
+    for (name = pool_tok_r(path, "/", &ctx); ~name;
+         name = pool_tok_r(NULL, "/", &ctx)) {
+        parent_dir_o = dir_offset(dir);
+        dirent = repo_dirent_by_name(dir, name);
+        if (dirent == NULL) {
+            dir = repo_clone_dir(dir, 1);
+            dirent = &repo_first_dirent(dir)[dir->size - 1];
+            dirent->name_offset = name;
+            dirent->mode = REPO_MODE_DIR;
+            qsort(repo_first_dirent(dir), dir->size,
+                  sizeof(repo_dirent_t), repo_dirent_name_cmp);
+            dirent = repo_dirent_by_name(dir, name);
+            dir_o = dir_with_dirents_alloc(0);
+            dirent->content_offset = dir_o;
+            dir = dir_pointer(dir_o);
+        } else if ((dir = repo_dir_from_dirent(dirent))) {
+            dirent_o = dirent_offset(dirent);
+            dir = repo_clone_dir(dir, 0);
+            if (dirent_o != ~0)
+                dirent_pointer(dirent_o)->content_offset = dir_offset(dir);
+        } else {
+            dirent->mode = REPO_MODE_DIR;
+            dirent_o = dirent_offset(dirent);
+            dir_o = dir_with_dirents_alloc(0);
+            dirent = dirent_pointer(dirent_o);
+            dir = dir_pointer(dir_o);
+            dirent->content_offset = dir_o;
+        }
+    }
+    if (dirent) {
+        dirent->mode = mode;
+        dirent->content_offset = content_offset;
+        if (del && ~parent_dir_o) {
+            dirent->name_offset = ~0;
+            dir = dir_pointer(parent_dir_o);
+            qsort(repo_first_dirent(dir), dir->size,
+                  sizeof(repo_dirent_t), repo_dirent_name_cmp);
+            dir->size--;
+        }
+    }
+}
+
+uint32_t repo_copy(uint32_t revision, char *src, char *dst)
+{
+    uint32_t mode = 0, content_offset = 0;
+    repo_dirent_t *src_dirent;
+    src_dirent = repo_read_dirent(revision, src);
+    if (src_dirent != NULL) {
+        mode = src_dirent->mode;
+        content_offset = src_dirent->content_offset;
+        repo_write_dirent(dst, mode, content_offset, 0);
+    }
+    return mode;
+}
+
+void repo_add(char *path, uint32_t mode, uint32_t blob_mark)
+{
+    repo_write_dirent(path, mode, blob_mark, 0);
+}
+
+uint32_t repo_replace(char *path, uint32_t blob_mark)
+{
+    uint32_t mode = 0;
+    repo_dirent_t *src_dirent;
+    src_dirent = repo_read_dirent(active_commit, path);
+    if (src_dirent != NULL) {
+        mode = src_dirent->mode;
+        repo_write_dirent(path, mode, blob_mark, 0);
+    }
+    return mode;
+}
+
+void repo_modify(char *path, uint32_t mode, uint32_t blob_mark)
+{
+    repo_write_dirent(path, mode, blob_mark, 0);
+}
+
+void repo_delete(char *path)
+{
+    repo_write_dirent(path, 0, 0, 1);
+}
+
+static void
+repo_git_add_r(uint32_t depth, uint32_t *path, repo_dir_t *dir);
+
+static void
+repo_git_add(uint32_t depth, uint32_t *path, repo_dirent_t *dirent)
+{
+    if (repo_dirent_is_dir(dirent)) {
+        repo_git_add_r(depth, path, repo_dir_from_dirent(dirent));
+    } else {
+        fast_export_modify(depth, path, dirent->mode, dirent->content_offset);
+    }
+}
+
+static void
+repo_git_add_r(uint32_t depth, uint32_t *path, repo_dir_t *dir)
+{
+    uint32_t o;
+    repo_dirent_t *de;
+    de = repo_first_dirent(dir);
+    for (o = 0; o < dir->size; o++) {
+        path[depth] = de[o].name_offset;
+        repo_git_add(depth + 1, path, &de[o]);
+    }
+}
+
+static void
+repo_diff_r(uint32_t depth, uint32_t *path, repo_dir_t *dir1,
+            repo_dir_t *dir2)
+{
+    repo_dirent_t *de1, *de2, *max_de1, *max_de2;
+    de1 = repo_first_dirent(dir1);
+    de2 = repo_first_dirent(dir2);
+    max_de1 = &de1[dir1->size];
+    max_de2 = &de2[dir2->size];
+
+    while (de1 < max_de1 && de2 < max_de2) {
+        if (de1->name_offset < de2->name_offset) {
+            path[depth] = (de1++)->name_offset;
+            fast_export_delete(depth + 1, path);
+        } else if (de1->name_offset == de2->name_offset) {
+            path[depth] = de1->name_offset;
+            if (de1->content_offset != de2->content_offset) {
+                if (repo_dirent_is_dir(de1) && repo_dirent_is_dir(de2)) {
+                    repo_diff_r(depth + 1, path,
+                                repo_dir_from_dirent(de1),
+                                repo_dir_from_dirent(de2));
+                } else {
+                    if (repo_dirent_is_dir(de1) != repo_dirent_is_dir(de2)) {
+                        fast_export_delete(depth + 1, path);
+                    }
+                    repo_git_add(depth + 1, path, de2);
+                }
+            }
+            de1++;
+            de2++;
+        } else {
+            path[depth] = de2->name_offset;
+            repo_git_add(depth + 1, path, de2++);
+        }
+    }
+    while (de1 < max_de1) {
+        path[depth] = (de1++)->name_offset;
+        fast_export_delete(depth + 1, path);
+    }
+    while (de2 < max_de2) {
+        path[depth] = de2->name_offset;
+        repo_git_add(depth + 1, path, de2++);
+    }
+}
+
+static uint32_t path_stack[1000];
+void repo_diff(uint32_t r1, uint32_t r2)
+{
+    repo_diff_r(0,
+                path_stack,
+                repo_commit_root_dir(commit_pointer(r1)),
+                repo_commit_root_dir(commit_pointer(r2)));
+}
+
+void repo_commit(uint32_t revision, char *author, char *log, char *uuid,
+                 char *url, time_t timestamp)
+{
+    if (revision == 0) {
+        active_commit = commit_alloc(1);
+        commit_pointer(active_commit)->root_dir_offset =
+            dir_with_dirents_alloc(0);
+    } else {
+        fast_export_commit(revision, author, log, uuid, url, timestamp);
+    }
+    num_dirs_saved = dir_pool.size;
+    num_dirents_saved = dirent_pool.size;
+    active_commit = commit_alloc(1);
+    commit_pointer(active_commit)->root_dir_offset =
+        commit_pointer(active_commit - 1)->root_dir_offset;
+}
+
+void repo_reset(void)
+{
+    pool_reset();
+    commit_reset();
+    dir_reset();
+    dirent_reset();
+}
diff --git a/vcs-svn/repo_tree.h b/vcs-svn/repo_tree.h
new file mode 100644
index 0000000..2d645dc
--- /dev/null
+++ b/vcs-svn/repo_tree.h
@@ -0,0 +1,31 @@
+#ifndef REPO_TREE_H_
+#define REPO_TREE_H_
+
+#include <stdint.h>
+#include <time.h>
+
+#define REPO_MODE_DIR 0040000
+#define REPO_MODE_BLB 0100644
+#define REPO_MODE_EXE 0100755
+#define REPO_MODE_LNK 0120000
+
+#define REPO_MAX_PATH_LEN 4096
+
+uint32_t repo_copy(uint32_t revision, char *src, char *dst);
+
+void repo_add(char *path, uint32_t mode, uint32_t blob_mark);
+
+uint32_t repo_replace(char *path, uint32_t blob_mark);
+
+void repo_modify(char *path, uint32_t mode, uint32_t blob_mark);
+
+void repo_delete(char *path);
+
+void repo_commit(uint32_t revision, char *author, char *log, char *uuid,
+                 char *url, time_t timestamp);
+
+void repo_diff(uint32_t r1, uint32_t r2);
+
+void repo_reset(void);
+
+#endif
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]