Here's a rather hackish implementation of the write side. Any thoughts on the format? (Obviously the implementation needs work. For example, it needs to be optional. Thoughts so far: - I want to put the value of "prefix" into an extended header. - Should blobs have their sha1 hashes in an extended header? Pros: it makes figuring out substitutions easier. Cons: it adds 512 bytes per file. - I want to support tags as roots. - I (or someone) need to write a verifier / verified unpacker. Does git accept Python code? This thing is tested in the sense that GNU tar unpacks its output without any warnings or other fanfare. --Andy
diff --git a/archive-tar.c b/archive-tar.c index 719b629..c6bf7e4 100644 --- a/archive-tar.c +++ b/archive-tar.c @@ -2,6 +2,8 @@ * Copyright (c) 2005, 2006 Rene Scharfe */ #include "cache.h" +#include "tree.h" +#include "object.h" #include "tar.h" #include "archive.h" #include "streaming.h" @@ -200,6 +202,74 @@ static int write_extended_header(struct archiver_args *args, return 0; } +/* + * A GIT-SCM object header is a global extended header that embeds a single + * git object. This object serves a purpose described by the "purpose" + * field. Valid purposes include: + * + * - "root" -- an object that, by itself, in conjunction with other roots, + * or in conjunction with external data, identifies a root to use to + * verify this archive. + * - "vrfy" -- an object that can be use to prove that the contents + * of this archive are as described. + * + * There's one basic rule to observe: every "vrfy" object must hash to + * a SHA-1 that matches something described in a "root", another "vrfy" object, + * or something typed in by a user decoding the archive. + * + * (Of course, if you want the archive to be usefully verifiable, all of the + * non-GIT-SCM contents should also be attributable to an appropriate + * "vrfy" object.) + * + * The fields are: + * GIT-SCM.obj.purpose: the purpose of the embedded object + * GIT-SCM.obj.sha1: the sha1 of the embedded object + * GIT-SCM.obj.type: the type of the embedded object + * GIT-SCM.obj.data: the data in the embedded object + * + * The block header is intentionally unspecified, except that it must + * have typeflag 'g'. (This is to allow some flexibility in trying to + * preserve compatibility with old tar implementations.) + */ +static int write_gitscm_obj_header(struct archiver_args *args, + const char *purpose, + const unsigned char *sha1) +{ + struct strbuf ext_header = STRBUF_INIT; + struct ustar_header header; + unsigned int mode; + enum object_type type; + unsigned long size; + void *buffer; + const char *typestr; + int err = 0; + + strbuf_append_ext_header(&ext_header, "GIT-SCM.obj.purpose", + purpose, strlen(purpose)); + strbuf_append_ext_header(&ext_header, "GIT-SCM.obj.sha1", + sha1_to_hex(sha1), 40); + + buffer = read_sha1_file(sha1, &type, &size); + typestr = typename(type); + + strbuf_append_ext_header(&ext_header, "GIT-SCM.obj.type", + typestr, strlen(typestr)); + strbuf_append_ext_header(&ext_header, "GIT-SCM.obj.data", + buffer, size); + free(buffer); + buffer = NULL; + + memset(&header, 0, sizeof(header)); + *header.typeflag = TYPEFLAG_GLOBAL_HEADER; + mode = 0100666; + strcpy(header.name, "pax_global_header"); + prepare_header(args, &header, mode, ext_header.len); + write_blocked(&header, sizeof(header)); + write_blocked(ext_header.buf, ext_header.len); + strbuf_release(&ext_header); + return err; +} + static int write_tar_entry(struct archiver_args *args, const unsigned char *sha1, const char *path, size_t pathlen, @@ -212,6 +282,10 @@ static int write_tar_entry(struct archiver_args *args, void *buffer; int err = 0; + if (S_ISDIR(mode)) { + write_gitscm_obj_header(args, "vrfy", sha1); + } + memset(&header, 0, sizeof(header)); if (S_ISDIR(mode) || S_ISGITLINK(mode)) { @@ -384,8 +458,11 @@ static int write_tar_archive(const struct archiver *ar, if (args->commit_sha1) err = write_global_extended_header(args); - if (!err) + if (!err) { + write_gitscm_obj_header(args, "root", args->commit_sha1); + write_gitscm_obj_header(args, "vrfy", args->tree->object.sha1); err = write_archive_entries(args, write_tar_entry); + } if (!err) write_trailer(); return err;