This patch prepares for callers to be able to write reverse index files to disk. It adds the necessary machinery to write a format-compliant .rev file from within 'write_rev_file()', which is called from 'finish_tmp_packfile()'. Similar to the process by which the reverse index is computed in memory, these new paths also have to sort a list of objects by their offsets within a packfile. These new paths use a qsort() (as opposed to a radix sort), since our specialized radix sort requires a full revindex_entry struct per object, which is more memory than we need to allocate. The qsort is obviously slower, but the theoretical slowdown would require a repository with a large amount of objects, likely implying that the time spent in, say, pack-objects during a repack would dominate the overall runtime. Signed-off-by: Taylor Blau <me@xxxxxxxxxxxx> --- pack-write.c | 120 ++++++++++++++++++++++++++++++++++++++++++++++++++- pack.h | 4 ++ 2 files changed, 123 insertions(+), 1 deletion(-) diff --git a/pack-write.c b/pack-write.c index e9bb3fd949..680c36755d 100644 --- a/pack-write.c +++ b/pack-write.c @@ -167,6 +167,113 @@ const char *write_idx_file(const char *index_name, struct pack_idx_entry **objec return index_name; } +static int pack_order_cmp(const void *va, const void *vb, void *ctx) +{ + struct pack_idx_entry **objects = ctx; + + off_t oa = objects[*(uint32_t*)va]->offset; + off_t ob = objects[*(uint32_t*)vb]->offset; + + if (oa < ob) + return -1; + if (oa > ob) + return 1; + return 0; +} + +static void write_rev_header(struct hashfile *f) +{ + uint32_t oid_version; + switch (hash_algo_by_ptr(the_hash_algo)) { + case GIT_HASH_SHA1: + oid_version = 1; + break; + case GIT_HASH_SHA256: + oid_version = 2; + break; + default: + die("write_rev_header: unknown hash version"); + } + + hashwrite_be32(f, RIDX_SIGNATURE); + hashwrite_be32(f, RIDX_VERSION); + hashwrite_be32(f, oid_version); +} + +static void write_rev_index_positions(struct hashfile *f, + struct pack_idx_entry **objects, + uint32_t nr_objects) +{ + uint32_t *pack_order; + uint32_t i; + + ALLOC_ARRAY(pack_order, nr_objects); + for (i = 0; i < nr_objects; i++) + pack_order[i] = i; + QSORT_S(pack_order, nr_objects, pack_order_cmp, objects); + + for (i = 0; i < nr_objects; i++) + hashwrite_be32(f, pack_order[i]); + + free(pack_order); +} + +static void write_rev_trailer(struct hashfile *f, const unsigned char *hash) +{ + hashwrite(f, hash, the_hash_algo->rawsz); +} + +const char *write_rev_file(const char *rev_name, + struct pack_idx_entry **objects, + uint32_t nr_objects, + const unsigned char *hash, + unsigned flags) +{ + struct hashfile *f; + int fd; + + if ((flags & WRITE_REV) && (flags & WRITE_REV_VERIFY)) + die(_("cannot both write and verify reverse index")); + + if (flags & WRITE_REV) { + if (!rev_name) { + struct strbuf tmp_file = STRBUF_INIT; + fd = odb_mkstemp(&tmp_file, "pack/tmp_rev_XXXXXX"); + rev_name = strbuf_detach(&tmp_file, NULL); + } else { + unlink(rev_name); + fd = open(rev_name, O_CREAT|O_EXCL|O_WRONLY, 0600); + if (fd < 0) + die_errno("unable to create '%s'", rev_name); + } + f = hashfd(fd, rev_name); + } else if (flags & WRITE_REV_VERIFY) { + struct stat statbuf; + if (stat(rev_name, &statbuf)) { + if (errno == ENOENT) { + /* .rev files are optional */ + return NULL; + } else + die_errno(_("could not stat: %s"), rev_name); + } + f = hashfd_check(rev_name); + } else + return NULL; + + write_rev_header(f); + + write_rev_index_positions(f, objects, nr_objects); + write_rev_trailer(f, hash); + + if (rev_name && adjust_shared_perm(rev_name) < 0) + die(_("failed to make %s readable"), rev_name); + + finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_CLOSE | + ((flags & WRITE_IDX_VERIFY) ? 0 : CSUM_FSYNC)); + + return rev_name; +} + off_t write_pack_header(struct hashfile *f, uint32_t nr_entries) { struct pack_header hdr; @@ -342,7 +449,7 @@ void finish_tmp_packfile(struct strbuf *name_buffer, struct pack_idx_option *pack_idx_opts, unsigned char hash[]) { - const char *idx_tmp_name; + const char *idx_tmp_name, *rev_tmp_name = NULL; int basename_len = name_buffer->len; if (adjust_shared_perm(pack_tmp_name)) @@ -353,6 +460,9 @@ void finish_tmp_packfile(struct strbuf *name_buffer, if (adjust_shared_perm(idx_tmp_name)) die_errno("unable to make temporary index file readable"); + rev_tmp_name = write_rev_file(NULL, written_list, nr_written, hash, + pack_idx_opts->flags); + strbuf_addf(name_buffer, "%s.pack", hash_to_hex(hash)); if (rename(pack_tmp_name, name_buffer->buf)) @@ -366,6 +476,14 @@ void finish_tmp_packfile(struct strbuf *name_buffer, strbuf_setlen(name_buffer, basename_len); + if (rev_tmp_name) { + strbuf_addf(name_buffer, "%s.rev", hash_to_hex(hash)); + if (rename(rev_tmp_name, name_buffer->buf)) + die_errno("unable to rename temporary reverse-index file"); + } + + strbuf_setlen(name_buffer, basename_len); + free((void *)idx_tmp_name); } diff --git a/pack.h b/pack.h index 9ae640f417..afdcf8f5c7 100644 --- a/pack.h +++ b/pack.h @@ -42,6 +42,8 @@ struct pack_idx_option { /* flag bits */ #define WRITE_IDX_VERIFY 01 /* verify only, do not write the idx file */ #define WRITE_IDX_STRICT 02 +#define WRITE_REV 04 +#define WRITE_REV_VERIFY 010 uint32_t version; uint32_t off32_limit; @@ -91,6 +93,8 @@ struct ref; void write_promisor_file(const char *promisor_name, struct ref **sought, int nr_sought); +const char *write_rev_file(const char *rev_name, struct pack_idx_entry **objects, uint32_t nr_objects, const unsigned char *hash, unsigned flags); + /* * The "hdr" output buffer should be at least this big, which will handle sizes * up to 2^67. -- 2.30.0.138.g6d7191ea01