Hi again, Ramkumar Ramachandra wrote: > repo_tree parses SVN revisions to build a Git objects, and use > fast_export to emit them so they can be imported into the Git object > store via a fast-import. Wait, where are SVN revisions being parsed? It seems that the repo module maintains the exporter's state and provides a facility to to call the fast_export module to write it out. repo_add(path, mode, blob_mark) is used to add a new file to the current commit. repo_modify is used to add a replacement for an existing file; it is implemented exactly the same way, but a check could be added later to distinguish the two cases. repo_copy copies a blob from a previous revision to the current commit. repo_replace modifies the content of a file from the current commit, if and only if it already exists. repo_delete removes a file or directory from the current commit. repo_commit calls out to fast_export to write the current commit to the fast-import stream in stdout. repo_diff is used by the fast_export module to write the changes for a commit. repo_reset erases the exporter's state, so valgrind can be happy. > +void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode, > + uint32_t mark) > +{ > + printf("M %06o :%d ", mode, mark); > + pool_print_seq(depth, path, '/', stdout); > + putchar('\n'); > +} Mode must be 100644, 100755, 120000, or 160000. [...] > +typedef struct repo_dirent_s repo_dirent_t; > + > +struct repo_dirent_s { > + uint32_t name_offset; > + uint32_t mode; > + uint32_t content_offset; > +}; > + > +typedef struct repo_dir_s repo_dir_t; > + > +struct repo_dir_s { > + uint32_t size; > + uint32_t first_offset; > +}; > + > +typedef struct repo_commit_s repo_commit_t; > + > +struct repo_commit_s { > + uint32_t mark; > + uint32_t root_dir_offset; > +}; Style: we don’t tend to use typedef to hide underlying struct definitions (see Documentation/CodingStyle from linux-2.6.git, chapter 5, for some explanation about why). > +static uint32_t num_dirs_saved = 0; > +static uint32_t num_dirents_saved = 0; Style: leave out the initializer for bss-allocated variables. > +static int repo_dirent_name_cmp(const void *a, const void *b) > +{ > + return (((repo_dirent_t *) a)->name_offset > + > ((repo_dirent_t *) b)->name_offset) - > + (((repo_dirent_t *) a)->name_offset > + < ((repo_dirent_t *) b)->name_offset); > +} Maybe some local variables would make this more readable: { const repo_dirent_t *dirent1 = a, *dirent2 = b; uint32_t a_offset = dirent1->name_offset; uint32_t b_offset = dirent2->name_offset; return (a_offset > b_offset) - (a_offset < b_offset); } > +static repo_dir_t *repo_clone_dir(repo_dir_t *orig_dir, uint32_t padding) > +{ > + uint32_t orig_o, new_o, dirent_o; > + orig_o = dir_offset(orig_dir); > + if (orig_o < num_dirs_saved) { > + new_o = dir_with_dirents_alloc(orig_dir->size + padding); > + orig_dir = dir_pointer(orig_o); > + dirent_o = dir_pointer(new_o)->first_offset; > + } else { > + if (padding == 0) > + return orig_dir; > + new_o = orig_o; > + dirent_o = dirent_alloc(orig_dir->size + padding); > + } > + memcpy(dirent_pointer(dirent_o), repo_first_dirent(orig_dir), > + orig_dir->size * sizeof(repo_dirent_t)); > + dir_pointer(new_o)->size = orig_dir->size + padding; > + dir_pointer(new_o)->first_offset = dirent_o; > + return dir_pointer(new_o); > +} Is this for adding new entries to an existing directory? It is getting late, so I did not look it over carefully. > +static char repo_path_buffer[REPO_MAX_PATH_LEN]; > +static repo_dirent_t *repo_read_dirent(uint32_t revision, char *path) > +{ > + char *ctx = NULL; > + uint32_t name = 0; > + repo_dir_t *dir = NULL; > + repo_dirent_t *dirent = NULL; > + dir = repo_commit_root_dir(commit_pointer(revision)); > + strncpy(repo_path_buffer, path, REPO_MAX_PATH_LEN); > + repo_path_buffer[REPO_MAX_PATH_LEN - 1] = '\0'; > + path = repo_path_buffer; > + for (name = pool_tok_r(path, "/", &ctx); > + ~name; name = pool_tok_r(NULL, "/", &ctx)) { > + dirent = repo_dirent_by_name(dir, name); > + if (dirent == NULL) { > + return NULL; > + } else if (repo_dirent_is_dir(dirent)) { > + dir = repo_dir_from_dirent(dirent); > + } else { > + break; > + } > + } > + return dirent; > +} If a file "foo" exists and I ask for "foo/bar", this will return the entry for foo. Is that appropriate? > +static void > +repo_write_dirent(char *path, uint32_t mode, uint32_t content_offset, > + uint32_t del) > +{ > + char *ctx; > + uint32_t name, revision, dirent_o, dir_o, parent_dir_o; > + repo_dir_t *dir; > + repo_dirent_t *dirent = NULL; > + revision = active_commit; > + dir = repo_commit_root_dir(commit_pointer(revision)); > + dir = repo_clone_dir(dir, 0); > + commit_pointer(revision)->root_dir_offset = dir_offset(dir); > + strncpy(repo_path_buffer, path, REPO_MAX_PATH_LEN); > + repo_path_buffer[REPO_MAX_PATH_LEN - 1] = '\0'; > + path = repo_path_buffer; > + for (name = pool_tok_r(path, "/", &ctx); ~name; > + name = pool_tok_r(NULL, "/", &ctx)) { > + parent_dir_o = dir_offset(dir); > + dirent = repo_dirent_by_name(dir, name); > + if (dirent == NULL) { static repo_dir_t *add_subdir(uint32_t parent_dir_o, repo_dir_t *dir, uint32_t name) { > + dir = repo_clone_dir(dir, 1); > + dirent = &repo_first_dirent(dir)[dir->size - 1]; > + dirent->name_offset = name; > + dirent->mode = REPO_MODE_DIR; > + qsort(repo_first_dirent(dir), dir->size, > + sizeof(repo_dirent_t), repo_dirent_name_cmp); > + dirent = repo_dirent_by_name(dir, name); > + dir_o = dir_with_dirents_alloc(0); > + dirent->content_offset = dir_o; > + dir = dir_pointer(dir_o); } > + } else if ((dir = repo_dir_from_dirent(dirent))) { > + dirent_o = dirent_offset(dirent); > + dir = repo_clone_dir(dir, 0); > + if (dirent_o != ~0) > + dirent_pointer(dirent_o)->content_offset = dir_offset(dir); > + } else { static repo_dir_t *replace_with_empty_dir(uint32_t dirent) { > + dirent->mode = REPO_MODE_DIR; > + dirent_o = dirent_offset(dirent); > + dir_o = dir_with_dirents_alloc(0); > + dirent = dirent_pointer(dirent_o); > + dir = dir_pointer(dir_o); > + dirent->content_offset = dir_o; } > + } > + } > + if (dirent) { When would it be NULL? Is an empty path allowed? static void fill_dirent(repo_dirent_t *dirent, uint32_t mode, uint32_t content_offset, uint32_t parent_dir_o, int del) { > + dirent->mode = mode; > + dirent->content_offset = content_offset; > + if (del && ~parent_dir_o) { > + dirent->name_offset = ~0; > + dir = dir_pointer(parent_dir_o); > + qsort(repo_first_dirent(dir), dir->size, > + sizeof(repo_dirent_t), repo_dirent_name_cmp); > + dir->size--; > + } } > + } > +} [...] > +static void > +repo_git_add_r(uint32_t depth, uint32_t *path, repo_dir_t *dir); > + > +static void > +repo_git_add(uint32_t depth, uint32_t *path, repo_dirent_t *dirent) > +{ > + if (repo_dirent_is_dir(dirent)) { > + repo_git_add_r(depth, path, repo_dir_from_dirent(dirent)); > + } else { > + fast_export_modify(depth, path, dirent->mode, dirent->content_offset); > + } > +} Just curious: does gcc notice the tail calls? > +static void > +repo_git_add_r(uint32_t depth, uint32_t *path, repo_dir_t *dir) > +{ > + uint32_t o; > + repo_dirent_t *de; > + de = repo_first_dirent(dir); > + for (o = 0; o < dir->size; o++) { > + path[depth] = de[o].name_offset; > + repo_git_add(depth + 1, path, &de[o]); > + } > +} Can this overflow the path stack? > + > +static void > +repo_diff_r(uint32_t depth, uint32_t *path, repo_dir_t *dir1, > + repo_dir_t *dir2) > +{ > + repo_dirent_t *de1, *de2, *max_de1, *max_de2; > + de1 = repo_first_dirent(dir1); > + de2 = repo_first_dirent(dir2); > + max_de1 = &de1[dir1->size]; > + max_de2 = &de2[dir2->size]; > + > + while (de1 < max_de1 && de2 < max_de2) { > + if (de1->name_offset < de2->name_offset) { > + path[depth] = (de1++)->name_offset; > + fast_export_delete(depth + 1, path); > + } else if (de1->name_offset == de2->name_offset) { > + path[depth] = de1->name_offset; > + if (de1->content_offset != de2->content_offset) { > + if (repo_dirent_is_dir(de1) && repo_dirent_is_dir(de2)) { > + repo_diff_r(depth + 1, path, > + repo_dir_from_dirent(de1), > + repo_dir_from_dirent(de2)); > + } else { > + if (repo_dirent_is_dir(de1) != repo_dirent_is_dir(de2)) { > + fast_export_delete(depth + 1, path); > + } > + repo_git_add(depth + 1, path, de2); > + } > + } > + de1++; > + de2++; > + } else { > + path[depth] = de2->name_offset; > + repo_git_add(depth + 1, path, de2++); > + } Might be easier to read the other way around: if (de1->name_offset < de2->name_offset) { path[depth] = (de1++)->name_offset; fast_export_delete(depth + 1, path); continue; } if (de1->name_offset > de2->name_offset) { path[depth] = de2->name_offset; repo_git_add(depth + 1, path, de2++); continue; } ... matching paths case ... > + } > + while (de1 < max_de1) { > + path[depth] = (de1++)->name_offset; > + fast_export_delete(depth + 1, path); > + } > + while (de2 < max_de2) { > + path[depth] = de2->name_offset; > + repo_git_add(depth + 1, path, de2++); > + } > +} [...] > +void repo_commit(uint32_t revision, char *author, char *log, char *uuid, > + char *url, time_t timestamp) > +{ > + if (revision == 0) { > + active_commit = commit_alloc(1); > + commit_pointer(active_commit)->root_dir_offset = > + dir_with_dirents_alloc(0); > + } else { > + fast_export_commit(revision, author, log, uuid, url, timestamp); > + } > + num_dirs_saved = dir_pool.size; > + num_dirents_saved = dirent_pool.size; I did not carefully trace the cases where repo_clone_dir may reuse a dir. I would be happier if someone else does. The rest looked good to me, for what it’s worth. Thanks, Jonathan -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html