This adds the reftable library, and hooks it up as a ref backend. At this point, I am mainly interested in feedback on the spots marked with XXX in the Git source code. v3 * passes gitgitgadget CI. Han-Wen Nienhuys (5): refs.h: clarify reflog iteration order create .git/refs in files-backend.c refs: document how ref_iterator_advance_fn should handle symrefs Add reftable library Reftable support for git-core Makefile | 24 +- builtin/init-db.c | 49 +- cache.h | 2 + refs.c | 20 +- refs.h | 5 +- refs/files-backend.c | 6 + refs/refs-internal.h | 6 + refs/reftable-backend.c | 880 +++++++++++++++++++++++++++++++ reftable/LICENSE | 31 ++ reftable/README.md | 19 + reftable/VERSION | 5 + reftable/basics.c | 196 +++++++ reftable/basics.h | 37 ++ reftable/block.c | 401 ++++++++++++++ reftable/block.h | 71 +++ reftable/blocksource.h | 20 + reftable/bytes.c | 0 reftable/config.h | 1 + reftable/constants.h | 27 + reftable/dump.c | 97 ++++ reftable/file.c | 97 ++++ reftable/iter.c | 229 ++++++++ reftable/iter.h | 56 ++ reftable/merged.c | 286 ++++++++++ reftable/merged.h | 34 ++ reftable/pq.c | 114 ++++ reftable/pq.h | 34 ++ reftable/reader.c | 708 +++++++++++++++++++++++++ reftable/reader.h | 52 ++ reftable/record.c | 1107 +++++++++++++++++++++++++++++++++++++++ reftable/record.h | 79 +++ reftable/reftable.h | 399 ++++++++++++++ reftable/slice.c | 199 +++++++ reftable/slice.h | 39 ++ reftable/stack.c | 983 ++++++++++++++++++++++++++++++++++ reftable/stack.h | 40 ++ reftable/system.h | 55 ++ reftable/tree.c | 66 +++ reftable/tree.h | 24 + reftable/writer.c | 623 ++++++++++++++++++++++ reftable/writer.h | 46 ++ reftable/zlib-compat.c | 92 ++++ repository.c | 2 + repository.h | 3 + setup.c | 12 +- 45 files changed, 7248 insertions(+), 28 deletions(-) create mode 100644 refs/reftable-backend.c create mode 100644 reftable/LICENSE create mode 100644 reftable/README.md create mode 100644 reftable/VERSION create mode 100644 reftable/basics.c create mode 100644 reftable/basics.h create mode 100644 reftable/block.c create mode 100644 reftable/block.h create mode 100644 reftable/blocksource.h create mode 100644 reftable/bytes.c create mode 100644 reftable/config.h create mode 100644 reftable/constants.h create mode 100644 reftable/dump.c create mode 100644 reftable/file.c create mode 100644 reftable/iter.c create mode 100644 reftable/iter.h create mode 100644 reftable/merged.c create mode 100644 reftable/merged.h create mode 100644 reftable/pq.c create mode 100644 reftable/pq.h create mode 100644 reftable/reader.c create mode 100644 reftable/reader.h create mode 100644 reftable/record.c create mode 100644 reftable/record.h create mode 100644 reftable/reftable.h create mode 100644 reftable/slice.c create mode 100644 reftable/slice.h create mode 100644 reftable/stack.c create mode 100644 reftable/stack.h create mode 100644 reftable/system.h create mode 100644 reftable/tree.c create mode 100644 reftable/tree.h create mode 100644 reftable/writer.c create mode 100644 reftable/writer.h create mode 100644 reftable/zlib-compat.c base-commit: 5b0ca878e008e82f91300091e793427205ce3544 Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-539%2Fhanwen%2Freftable-v4 Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-539/hanwen/reftable-v4 Pull-Request: https://github.com/gitgitgadget/git/pull/539 Range-diff vs v3: 1: c00403c94d = 1: c00403c94d refs.h: clarify reflog iteration order 2: 57c7342319 < -: ---------- setup.c: enable repo detection for reftable 3: 5b7060cb2f ! 2: 4d6da9bc47 create .git/refs in files-backend.c @@ -29,9 +29,13 @@ + files_ref_path(refs, &sb, "refs"); + safe_create_dir(sb.buf, 1); -+ /* adjust permissions even if directory already exists. */ ++ /* adjust permissions even if directory already exists. */ + adjust_shared_perm(sb.buf); + /* * Create .git/refs/{heads,tags} */ ++ strbuf_reset(&sb); + files_ref_path(refs, &sb, "refs/heads"); + safe_create_dir(sb.buf, 1); + 4: 1b01c735a9 = 3: fbdcdccc88 refs: document how ref_iterator_advance_fn should handle symrefs 5: eb0df10068 ! 4: 02d2ca8b87 Add reftable library @@ -98,11 +98,11 @@ --- /dev/null +++ b/reftable/VERSION @@ -+commit e54326f73d95bfe8b17f264c400f4c365dbd5e5e ++commit e7c3fc3099d9999bc8d895f84027b0e36348d5e6 +Author: Han-Wen Nienhuys <hanwen@xxxxxxxxxx> -+Date: Tue Feb 4 19:48:17 2020 +0100 ++Date: Thu Feb 6 20:17:40 2020 +0100 + -+ C: PRI?MAX use; clang-format. ++ C: use inttypes.h header definitions diff --git a/reftable/basics.c b/reftable/basics.c new file mode 100644 @@ -2882,7 +2882,7 @@ +{ + char hex[SHA256_SIZE + 1] = {}; + -+ printf("ref{%s(%" PRIdMAX ") ", ref->ref_name, ref->update_index); ++ printf("ref{%s(%" PRIu64 ") ", ref->ref_name, ref->update_index); + if (ref->value != NULL) { + hex_format(hex, ref->value, hash_size); + printf("%s", hex); @@ -3274,9 +3274,9 @@ +{ + char hex[SHA256_SIZE + 1] = {}; + -+ printf("log{%s(%" PRIdMAX ") %s <%s> %" PRIuMAX " %04d\n", -+ log->ref_name, log->update_index, log->name, log->email, -+ log->time, log->tz_offset); ++ printf("log{%s(%" PRIu64 ") %s <%s> %" PRIu64 " %04d\n", log->ref_name, ++ log->update_index, log->name, log->email, log->time, ++ log->tz_offset); + hex_format(hex, log->old_hash, hash_size); + printf("%s => ", hex); + hex_format(hex, log->new_hash, hash_size); @@ -4860,7 +4860,7 @@ +static void format_name(struct slice *dest, uint64_t min, uint64_t max) +{ + char buf[100]; -+ snprintf(buf, sizeof(buf), "%012" PRIxMAX "-%012" PRIxMAX, min, max); ++ snprintf(buf, sizeof(buf), "%012" PRIx64 "-%012" PRIx64, min, max); + slice_set_string(dest, buf); +} + @@ -5585,6 +5585,7 @@ +#include <assert.h> +#include <errno.h> +#include <fcntl.h> ++#include <inttypes.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> @@ -5595,9 +5596,6 @@ +#include <unistd.h> +#include <zlib.h> + -+#define PRIuMAX "lu" -+#define PRIdMAX "ld" -+#define PRIxMAX "lx" +#define ARRAY_SIZE(a) sizeof((a)) / sizeof((a)[0]) +#define FREE_AND_NULL(x) \ + do { \ @@ -5605,14 +5603,14 @@ + (x) = NULL; \ + } while (0) +#define QSORT(arr, n, cmp) qsort(arr, n, sizeof(arr[0]), cmp) -+#define SWAP(a, b) \ -+ { \ -+ char tmp[sizeof(a)]; \ -+ assert(sizeof(a)==sizeof(b)); \ -+ memcpy(&tmp[0], &a, sizeof(a)); \ -+ memcpy(&a, &b, sizeof(a)); \ -+ memcpy(&b, &tmp[0], sizeof(a)); \ -+ } ++#define SWAP(a, b) \ ++ { \ ++ char tmp[sizeof(a)]; \ ++ assert(sizeof(a) == sizeof(b)); \ ++ memcpy(&tmp[0], &a, sizeof(a)); \ ++ memcpy(&a, &b, sizeof(a)); \ ++ memcpy(&b, &tmp[0], sizeof(a)); \ ++ } +#endif + +int uncompress_return_consumed(Bytef *dest, uLongf *destLen, @@ -5758,6 +5756,7 @@ + return &w->stats.log_stats; + } + assert(false); ++ return NULL; +} + +/* write data, queuing the padding for the next write. Returns negative for @@ -6299,7 +6298,7 @@ + w->stats.blocks++; + + if (debug) { -+ fprintf(stderr, "block %c off %" PRIuMAX " sz %d (%d)\n", typ, ++ fprintf(stderr, "block %c off %" PRIu64 " sz %d (%d)\n", typ, + w->next, raw_bytes, + get_u24(w->block + w->block_writer->header_off + 1)); + } 6: 0b2a1a81d6 ! 5: 2786a6bf61 Reftable support for git-core @@ -39,6 +39,7 @@ reftable.LogRecord{RefName:"HEAD", UpdateIndex:0x2, New:[]uint8{0x37, 0x3d, 0x96, 0x97, 0x2f, 0xca, 0x9b, 0x63, 0x59, 0x57, 0x40, 0xbb, 0xa3, 0x89, 0x8a, 0x76, 0x27, 0x78, 0xba, 0x20}, Old:[]uint8{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, Name:"Han-Wen Nienhuys", Email:"hanwen@xxxxxxxxxx", Time:0x5e29ef27, TZOffset:100, Message:"commit (initial): x\n"} Signed-off-by: Han-Wen Nienhuys <hanwen@xxxxxxxxxx> + Co-authored-by: Jeff King <peff@xxxxxxxx> diff --git a/Makefile b/Makefile --- a/Makefile @@ -120,6 +121,31 @@ $^ { struct stat st1; struct strbuf buf = STRBUF_INIT; +@@ + is_bare_repository_cfg = init_is_bare_repository; + if (init_shared_repository != -1) + set_shared_repository(init_shared_repository); ++ if (flags & INIT_DB_REFTABLE) ++ the_repository->ref_storage_format = xstrdup("reftable"); + + /* + * We would have created the above under user's umask -- under +@@ + adjust_shared_perm(get_git_dir()); + } + ++ /* ++ * Check to see if .git/HEAD exists; this must happen before ++ * initializing the ref db, because we want to see if there is an ++ * existing HEAD. ++ */ ++ path = git_path_buf(&buf, "HEAD"); ++ reinit = (!access(path, R_OK) || ++ readlink(path, junk, sizeof(junk) - 1) != -1); ++ + /* + * We need to create a "refs" dir in any case so that older + * versions of git can tell that this is a repository. @@ * Create the default symlink from ".git/HEAD" to the "master" * branch, if it does not exist yet. @@ -127,18 +153,14 @@ $^ - path = git_path_buf(&buf, "HEAD"); - reinit = (!access(path, R_OK) - || readlink(path, junk, sizeof(junk)-1) != -1); -+ if (flags & INIT_DB_REFTABLE) { -+ reinit = 0; /* XXX - how do we recognize a reinit, -+ * and what should we do? */ -+ } else { -+ path = git_path_buf(&buf, "HEAD"); -+ reinit = (!access(path, R_OK) || -+ readlink(path, junk, sizeof(junk) - 1) != -1); -+ } -+ if (!reinit) { if (create_symref("HEAD", "refs/heads/master", NULL) < 0) exit(1); ++ } else { ++ /* ++ * XXX should check whether our ref backend matches the ++ * original one; if not, either die() or convert ++ */ } /* This forces creation of new config file */ @@ -251,10 +273,8 @@ $^ - r->refs = ref_store_init(r->gitdir, REF_STORE_ALL_CAPS); + r->refs = ref_store_init(r->gitdir, -+ /* XXX r->ref_storage_format == NULL. Where -+ * should the config file be parsed out? */ + r->ref_storage_format ? r->ref_storage_format : -+ "reftable", ++ "files", + REF_STORE_ALL_CAPS); return r->refs; } @@ -410,13 +430,14 @@ $^ +{ + struct reftable_ref_store *refs = + (struct reftable_ref_store *)ref_store; -+ ++ FILE *file; + safe_create_dir(refs->reftable_dir, 1); -+ FILE *f = fopen(refs->table_list_file, "a"); -+ if (f == NULL) { ++ ++ file = fopen(refs->table_list_file, "a"); ++ if (file == NULL) { + return -1; + } -+ fclose(f); ++ fclose(file); + return 0; +} + @@ -777,7 +798,6 @@ $^ + log.old_hash = old_oid.hash; + } + -+ /* XXX should the resolution be done relative or absolute? */ + if (refs_resolve_ref_unsafe((struct ref_store *)create->refs, + create->target, RESOLVE_REF_READING, + &new_oid, NULL) != NULL) { @@ -1194,9 +1214,7 @@ $^ if (worktree) repo_set_worktree(repo, worktree); -+ repo->ref_storage_format = format.ref_storage != NULL ? -+ xstrdup(format.ref_storage) : -+ "files"; /* XXX */ ++ repo->ref_storage_format = xstrdup_or_null(format.ref_storage); + clear_repository_format(&format); return 0; @@ -1227,7 +1245,7 @@ $^ + } else if (!strcmp(ext, "worktreeconfig")) { data->worktree_config = git_config_bool(var, value); - else -+ } else if (!strcmp(ext, "refStorage")) { ++ } else if (!strcmp(ext, "refstorage")) { + data->ref_storage = xstrdup(value); + } else string_list_append(&data->unknown_extensions, ext); @@ -1241,3 +1259,16 @@ $^ init_repository_format(format); } +@@ + gitdir = DEFAULT_GIT_DIR_ENVIRONMENT; + setup_git_env(gitdir); + } +- if (startup_info->have_repository) ++ if (startup_info->have_repository) { + repo_set_hash_algo(the_repository, repo_fmt.hash_algo); ++ the_repository->ref_storage_format = ++ xstrdup_or_null(repo_fmt.ref_storage); ++ } + } + + strbuf_release(&dir); -- gitgitgadget