[PATCH 09/14] remote-curl: auto-detect bundles when fetching refs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



You can't currently fetch from a network bundle, like:

  git fetch http://example.com/foo.bundle

This patch takes the first (and biggest) step towards that
working: it auto-detects when fetching refs results in a
bundle, and automatically spools the bundle to disk and
fetches from it.

There are a few important design decisions to note:

  1. We auto-detect the bundle based on content, not based
     on a special token in the URL (like ending in
     ".bundle"). This lets the server side be flexible with
     its URLs (e.g., "http://example.com/bundle?repo=foo";).

  2. When fetching refs, we don't actually fetch $URL, but
     start with $URL/info/refs, looking for smart or dumb
     http. Some servers, when file "foo.bundle" exists, will
     serve it to the client when "foo.bundle/info/refs" is
     requested. Therefore we may be "surprised" to receive a
     bundle when we thought we were just getting the list of
     refs, and need to handle it appropriately.

  3. We spool the bundle to disk, and then run "index-pack
     --fix-thin" to create a packfile. That means we will
     momentarily use twice the size of the bundle in local
     disk space. Avoiding this would mean piping directly to
     "index-pack --fix-thin".  However, if we want to be
     able to resume the transfer of the bundle after an
     interruption, then we need to save the bundle's pack.

     In theory a smart index-pack that was interrupted could
     write out its partial results along with a count of how
     many bytes it actually consumed (i.e., where to resume
     next time), and then pick up where it left off when fed
     the rest of the data. But index-pack isn't that smart
     yet, so let's start off with spooling.

No tests yet, as apache is not one of the "surprising"
servers from (2), and our test harness is based around that
(though just with this patch, you can fetch from surprising
servers like lighttpd).

Signed-off-by: Jeff King <peff@xxxxxxxx>
---
This is really the big, interesting one.

 remote-curl.c |  124 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 files changed, 119 insertions(+), 5 deletions(-)

diff --git a/remote-curl.c b/remote-curl.c
index 014d413..84586e0 100644
--- a/remote-curl.c
+++ b/remote-curl.c
@@ -7,6 +7,7 @@
 #include "run-command.h"
 #include "pkt-line.h"
 #include "sideband.h"
+#include "bundle.h"
 
 static struct remote *remote;
 static const char *url; /* always ends with a trailing slash */
@@ -77,6 +78,10 @@ struct discovery {
 	char *buf;
 	size_t len;
 	unsigned proto_git : 1;
+
+	char *bundle_filename;
+	int bundle_fd;
+	struct bundle_header bundle_header;
 };
 static struct discovery *last_discovery;
 
@@ -86,26 +91,93 @@ static void free_discovery(struct discovery *d)
 		if (d == last_discovery)
 			last_discovery = NULL;
 		free(d->buf_alloc);
+		if (d->bundle_fd >= 0)
+			close(d->bundle_fd);
+		if (d->bundle_filename) {
+			unlink(d->bundle_filename);
+			free(d->bundle_filename);
+		}
 		free(d);
 	}
 }
 
 struct get_refs_cb_data {
 	struct strbuf *out;
+
+	int is_bundle;
+	const char *tmpname;
+	FILE *fh;
 };
 
 static size_t get_refs_callback(char *buf, size_t sz, size_t n, void *vdata)
 {
 	struct get_refs_cb_data *data = vdata;
-	strbuf_add(data->out, buf, sz * n);
+	struct strbuf *out = data->out;
+
+	if (data->is_bundle > 0)
+		return fwrite(buf, sz, n, data->fh);
+
+	strbuf_add(out, buf, sz * n);
+
+	if (data->is_bundle == 0)
+		return sz * n;
+
+	data->is_bundle = is_bundle_buf(out->buf, out->len);
+	if (data->is_bundle > 0) {
+		data->fh = fopen(data->tmpname, "wb");
+		if (!data->fh)
+			die_errno("unable to open %s", data->tmpname);
+		if (fwrite(out->buf, 1, out->len, data->fh) < out->len)
+			die_errno("unable to write to %s", data->tmpname);
+	}
 	return sz * n;
 }
 
-static int get_refs_from_url(const char *url, struct strbuf *out, int options)
+static int get_refs_from_url(const char *url, struct strbuf *out, int options,
+			     const char *tmpname, int *is_bundle)
 {
 	struct get_refs_cb_data data;
+	int ret;
+
 	data.out = out;
-	return http_get_callback(url, get_refs_callback, &data, 0, options);
+	data.is_bundle = -1;
+	data.tmpname = tmpname;
+	data.fh = NULL;
+
+	ret = http_get_callback(url, get_refs_callback, &data, 0, options);
+
+	if (data.fh) {
+		if (fclose(data.fh))
+			die_errno("unable to write to %s", data.tmpname);
+	}
+
+	*is_bundle = data.is_bundle > 0;
+	return ret;
+}
+
+static const char *url_to_bundle_tmpfile(const char *url)
+{
+	struct strbuf buf = STRBUF_INIT;
+	int last_was_quoted = 1;
+	const char *ret;
+
+	strbuf_addstr(&buf, "tmp_bundle_");
+	for (; *url; url++) {
+		if (isalpha(*url) || isdigit(*url)) {
+			strbuf_addch(&buf, *url);
+			last_was_quoted = 0;
+		}
+		else if (!last_was_quoted) {
+			strbuf_addch(&buf, '_');
+			last_was_quoted = 1;
+		}
+	}
+	if (last_was_quoted)
+		strbuf_setlen(&buf, buf.len - 1);
+
+	ret = git_path("objects/%s", buf.buf);
+	strbuf_release(&buf);
+	return ret;
 }
 
 static struct discovery* discover_refs(const char *service)
@@ -114,11 +186,15 @@ static int get_refs_from_url(const char *url, struct strbuf *out, int options)
 	struct discovery *last = last_discovery;
 	char *refs_url;
 	int http_ret, is_http = 0, proto_git_candidate = 1;
+	const char *filename;
+	int is_bundle;
 
 	if (last && !strcmp(service, last->service))
 		return last;
 	free_discovery(last);
 
+	filename = url_to_bundle_tmpfile(url);
+
 	strbuf_addf(&buffer, "%sinfo/refs", url);
 	if (!prefixcmp(url, "http://";) || !prefixcmp(url, "https://";)) {
 		is_http = 1;
@@ -130,7 +206,8 @@ static int get_refs_from_url(const char *url, struct strbuf *out, int options)
 	}
 	refs_url = strbuf_detach(&buffer, NULL);
 
-	http_ret = get_refs_from_url(refs_url, &buffer, HTTP_NO_CACHE);
+	http_ret = get_refs_from_url(refs_url, &buffer, HTTP_NO_CACHE,
+				     filename, &is_bundle);
 
 	/* try again with "plain" url (no ? or & appended) */
 	if (http_ret != HTTP_OK && http_ret != HTTP_NOAUTH) {
@@ -141,7 +218,8 @@ static int get_refs_from_url(const char *url, struct strbuf *out, int options)
 		strbuf_addf(&buffer, "%sinfo/refs", url);
 		refs_url = strbuf_detach(&buffer, NULL);
 
-		http_ret = get_refs_from_url(refs_url, &buffer, HTTP_NO_CACHE);
+		http_ret = get_refs_from_url(refs_url, &buffer, HTTP_NO_CACHE,
+					     filename, &is_bundle);
 	}
 
 	switch (http_ret) {
@@ -161,6 +239,7 @@ static int get_refs_from_url(const char *url, struct strbuf *out, int options)
 	last->service = service;
 	last->buf_alloc = strbuf_detach(&buffer, &last->len);
 	last->buf = last->buf_alloc;
+	last->bundle_fd = -1;
 
 	if (is_http && proto_git_candidate
 		&& 5 <= last->len && last->buf[4] == '#') {
@@ -190,6 +269,10 @@ static int get_refs_from_url(const char *url, struct strbuf *out, int options)
 		last->proto_git = 1;
 	}
 
+	else if (is_bundle) {
+		last->bundle_filename = xstrdup(filename);
+	}
+
 	free(refs_url);
 	strbuf_release(&buffer);
 	last_discovery = last;
@@ -276,6 +359,22 @@ static int write_discovery(int in, int out, void *data)
 	return refs;
 }
 
+static void ensure_bundle_open(struct discovery *heads)
+{
+	if (heads->bundle_fd >= 0)
+		return;
+	heads->bundle_fd = read_bundle_header(heads->bundle_filename,
+					      &heads->bundle_header);
+	if (heads->bundle_fd < 0)
+		die("could not read bundle from %s", url);
+}
+
+static struct ref *parse_bundle_refs(struct discovery *heads)
+{
+	ensure_bundle_open(heads);
+	return bundle_header_to_refs(&heads->bundle_header);
+}
+
 static struct ref *get_refs(int for_push)
 {
 	struct discovery *heads;
@@ -287,6 +386,11 @@ static int write_discovery(int in, int out, void *data)
 
 	if (heads->proto_git)
 		return parse_git_refs(heads);
+	if (heads->bundle_filename) {
+		if (for_push)
+			die("cannot push into a remote bundle");
+		return parse_bundle_refs(heads);
+	}
 	return parse_info_refs(heads);
 }
 
@@ -690,11 +794,21 @@ static int fetch_git(struct discovery *heads,
 	return err;
 }
 
+static int fetch_bundle(struct discovery *d,
+			int nr_heads, struct ref **to_fetch)
+{
+	ensure_bundle_open(d);
+	return unbundle(&d->bundle_header, d->bundle_fd,
+			options.progress ? BUNDLE_VERBOSE : 0);
+}
+
 static int fetch(int nr_heads, struct ref **to_fetch)
 {
 	struct discovery *d = discover_refs("git-upload-pack");
 	if (d->proto_git)
 		return fetch_git(d, nr_heads, to_fetch);
+	else if (d->bundle_filename)
+		return fetch_bundle(d, nr_heads, to_fetch);
 	else
 		return fetch_dumb(nr_heads, to_fetch);
 }
-- 
1.7.7.2.7.g9f96f

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]