[PATCH] preserve mtime of local clone

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



A local clone without hardlinks copies all objects, including dangling
ones, to the new repository. Since the mtimes are renewed, those
dangling objects cannot be pruned by "git gc --prune", even if they
would have been old enough for pruning in the original repository.

Instead, preserve mtime during copy. "git gc --prune" will then work
in the clone just like it would have in the original.

Signed-off-by: Clemens Buchacher <drizzd@xxxxxx>
---

I noticed this problem when I cloned a repo with lots of old dangling
objects onto a windows machine. git-gui immediately recommended running
git-gc, and I did. But each time I restarted git-gui, it recommended git-gc
again, because there were still plenty of dangling objects lying around
which could not be removed due to their recent mtimes.

So there is actually a problem with git-gui's recommendation. Especially on
Windows, where it only checks for 1 or more files in .git/objects/42 (as
opposed to 8 files on other platforms). The probability of that happening if
the repo contains about 100 loose objects is 1-(254/255)^100 = 32%. The
probability for the same to happen with at least 2 files is only 6% [*].
Maybe that would be a good compromise?

Alternatively, git-gc could remember the number of dangling objects, and
git-gui can adjust its recommendation accordingly, taking that number and
the date of the lastest repack into account.

Clemens

[*] The following octave script shows the probability for m or more objects
to be in .git/objects/42 for a total of n objects.

m = [1 2 8];
n = 100:100:3000;

P = zeros(length(n), length(m));
for k = 1:length(n)
	P(n(k), :) = 1-binocdf(m-1, n(k), 1/255);
end
plot(n, P);

n \ m	1	2	8
100	32%	6%	0%
500	86%	58%	0%
1000	98%	90%	5%
2000	100%	100%	55%

---
 builtin-clone.c   |    2 +-
 builtin-init-db.c |    2 +-
 cache.h           |    6 ++++--
 copy.c            |   25 ++++++++++++++++++++++---
 lockfile.c        |    2 +-
 rerere.c          |    2 +-
 6 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/builtin-clone.c b/builtin-clone.c
index ad04808..cb3c895 100644
--- a/builtin-clone.c
+++ b/builtin-clone.c
@@ -269,7 +269,7 @@ static void copy_or_link_directory(struct strbuf *src, struct strbuf *dest)
 				die_errno("failed to create link '%s'", dest->buf);
 			option_no_hardlinks = 1;
 		}
-		if (copy_file(dest->buf, src->buf, 0666))
+		if (copy_file(dest->buf, src->buf, 0666, 1))
 			die_errno("failed to copy file to '%s'", dest->buf);
 	}
 	closedir(dir);
diff --git a/builtin-init-db.c b/builtin-init-db.c
index dd84cae..5deb81d 100644
--- a/builtin-init-db.c
+++ b/builtin-init-db.c
@@ -100,7 +100,7 @@ static void copy_templates_1(char *path, int baselen,
 				die_errno("cannot symlink '%s' '%s'", lnk, path);
 		}
 		else if (S_ISREG(st_template.st_mode)) {
-			if (copy_file(path, template, st_template.st_mode))
+			if (copy_file(path, template, st_template.st_mode, 0))
 				die_errno("cannot copy '%s' to '%s'", template,
 					  path);
 		}
diff --git a/cache.h b/cache.h
index 5fad24c..1875c97 100644
--- a/cache.h
+++ b/cache.h
@@ -921,8 +921,10 @@ extern const char *git_mailmap_file;
 
 /* IO helper functions */
 extern void maybe_flush_or_die(FILE *, const char *);
-extern int copy_fd(int ifd, int ofd);
-extern int copy_file(const char *dst, const char *src, int mode);
+extern int copy_fd(int ifd, int ofd, int preserve_times);
+extern int copy_file(const char *dst, const char *src, int mode, int
+		preserve_times);
+extern int copy_times(int ofd, int ifd);
 extern ssize_t read_in_full(int fd, void *buf, size_t count);
 extern ssize_t write_in_full(int fd, const void *buf, size_t count);
 extern void write_or_die(int fd, const void *buf, size_t count);
diff --git a/copy.c b/copy.c
index e54d15a..fe0380e 100644
--- a/copy.c
+++ b/copy.c
@@ -1,6 +1,6 @@
 #include "cache.h"
 
-int copy_fd(int ifd, int ofd)
+int copy_fd(int ifd, int ofd, int preserve_times)
 {
 	while (1) {
 		char buffer[8192];
@@ -31,11 +31,18 @@ int copy_fd(int ifd, int ofd)
 			}
 		}
 	}
+	if (preserve_times && copy_times(ofd, ifd)) {
+		int time_error = errno;
+		close(ifd);
+		return error("copy-fd: failed to preserve times: %s",
+				strerror(time_error));
+	}
 	close(ifd);
 	return 0;
 }
 
-int copy_file(const char *dst, const char *src, int mode)
+int copy_file(const char *dst, const char *src, int mode,
+		int preserve_times)
 {
 	int fdi, fdo, status;
 
@@ -46,7 +53,7 @@ int copy_file(const char *dst, const char *src, int mode)
 		close(fdi);
 		return fdo;
 	}
-	status = copy_fd(fdi, fdo);
+	status = copy_fd(fdi, fdo, preserve_times);
 	if (close(fdo) != 0)
 		return error("%s: close error: %s", dst, strerror(errno));
 
@@ -55,3 +62,15 @@ int copy_file(const char *dst, const char *src, int mode)
 
 	return status;
 }
+
+int copy_times(int ofd, int ifd)
+{
+	struct stat st;
+	struct timespec times[2];
+	if (fstat(ifd, &st))
+		return -1;
+	times[0].tv_nsec = UTIME_OMIT;
+	times[1].tv_sec = st.st_mtime;
+	times[1].tv_nsec = ST_MTIME_NSEC(st);
+	return futimens(ofd, times);
+}
diff --git a/lockfile.c b/lockfile.c
index eb931ed..c7bbd4d 100644
--- a/lockfile.c
+++ b/lockfile.c
@@ -196,7 +196,7 @@ int hold_lock_file_for_append(struct lock_file *lk, const char *path, int flags)
 			close(fd);
 			return error("cannot open '%s' for copying", path);
 		}
-	} else if (copy_fd(orig_fd, fd)) {
+	} else if (copy_fd(orig_fd, fd, 0)) {
 		if (flags & LOCK_DIE_ON_ERROR)
 			exit(128);
 		close(fd);
diff --git a/rerere.c b/rerere.c
index 87360dc..d25f5f1 100644
--- a/rerere.c
+++ b/rerere.c
@@ -326,7 +326,7 @@ static int do_plain_rerere(struct string_list *rr, int fd)
 			continue;
 
 		fprintf(stderr, "Recorded resolution for '%s'.\n", path);
-		copy_file(rerere_path(name, "postimage"), path, 0666);
+		copy_file(rerere_path(name, "postimage"), path, 0666, 0);
 	mark_resolved:
 		rr->items[i].util = NULL;
 	}
-- 
1.6.4.2.266.gbaa17

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]