[PATCH] packfile: enhance the mtime of packfile by idx file

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Sun Chao <16657101987@xxxxxxx>

Commit 33d4221c79 (write_sha1_file: freshen existing objects,
2014-10-15) avoid writing existing objects by freshen their
mtime (especially the packfiles contains them) in order to
aid the correct caching, and some process like find_lru_pack
can make good decision. However, this is unfriendly to
incremental backup jobs or services rely on file system
cache when there are large '.pack' files exists.

For example, after packed all objects, use 'write-tree' to
create same commit with the same tree and same environments
such like GIT_COMMITTER_DATE and GIT_AUTHOR_DATE, we can
notice the '.pack' file's mtime changed, but '.idx' file not.

So if we update the mtime of packfile by updating the '.idx'
file instead of '.pack' file, when we check the mtime
of packfile, get it from '.idx' file instead. Large git
repository may contains large '.pack' files, but '.idx'
files are smaller enough, this can avoid file system cache
reload the large files again and speed up git commands.

Signed-off-by: Sun Chao <16657101987@xxxxxxx>
---
    packfile: enhance the mtime of packfile by idx file
    
    Commit 33d4221c79 (write_sha1_file: freshen existing objects,
    2014-10-15) avoid writing existing objects by freshen their mtime
    (especially the packfiles contains them) in order to aid the correct
    caching, and some process like find_lru_pack can make good decision.
    However, this is unfriendly to incremental backup jobs or services rely
    on file system cache when there are large '.pack' files exists.
    
    For example, after packed all objects, use 'write-tree' to create same
    commit with the same tree and same environments such like
    GIT_COMMITTER_DATE and GIT_AUTHOR_DATE, we can notice the '.pack' file's
    mtime changed, but '.idx' file not.
    
    So if we update the mtime of packfile by updating the '.idx' file
    instead of '.pack' file, when we check the mtime of packfile, get it
    from '.idx' file instead. Large git repository may contains large
    '.pack' files, but '.idx' files are smaller enough, this can avoid file
    system cache reload the large files again and speed up git commands.
    
    Signed-off-by: Sun Chao 16657101987@xxxxxxx

Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-git-1043%2Fsunchao9%2Fmaster-v1
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-git-1043/sunchao9/master-v1
Pull-Request: https://github.com/git/git/pull/1043

 builtin/index-pack.c                 | 19 +++----------------
 object-file.c                        |  7 ++++++-
 packfile.c                           | 19 ++++++++++++++++++-
 packfile.h                           |  7 +++++++
 t/t7701-repack-unpack-unreachable.sh |  2 +-
 5 files changed, 35 insertions(+), 19 deletions(-)

diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 3fbc5d70777..60bacc8ee7f 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1437,19 +1437,6 @@ static void fix_unresolved_deltas(struct hashfile *f)
 	free(sorted_by_pos);
 }
 
-static const char *derive_filename(const char *pack_name, const char *strip,
-				   const char *suffix, struct strbuf *buf)
-{
-	size_t len;
-	if (!strip_suffix(pack_name, strip, &len) || !len ||
-	    pack_name[len - 1] != '.')
-		die(_("packfile name '%s' does not end with '.%s'"),
-		    pack_name, strip);
-	strbuf_add(buf, pack_name, len);
-	strbuf_addstr(buf, suffix);
-	return buf->buf;
-}
-
 static void write_special_file(const char *suffix, const char *msg,
 			       const char *pack_name, const unsigned char *hash,
 			       const char **report)
@@ -1460,7 +1447,7 @@ static void write_special_file(const char *suffix, const char *msg,
 	int msg_len = strlen(msg);
 
 	if (pack_name)
-		filename = derive_filename(pack_name, "pack", suffix, &name_buf);
+		filename = derive_pack_filename(pack_name, "pack", suffix, &name_buf);
 	else
 		filename = odb_pack_name(&name_buf, hash, suffix);
 
@@ -1855,13 +1842,13 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
 	if (from_stdin && hash_algo)
 		die(_("--object-format cannot be used with --stdin"));
 	if (!index_name && pack_name)
-		index_name = derive_filename(pack_name, "pack", "idx", &index_name_buf);
+		index_name = derive_pack_filename(pack_name, "pack", "idx", &index_name_buf);
 
 	opts.flags &= ~(WRITE_REV | WRITE_REV_VERIFY);
 	if (rev_index) {
 		opts.flags |= verify ? WRITE_REV_VERIFY : WRITE_REV;
 		if (index_name)
-			rev_index_name = derive_filename(index_name,
+			rev_index_name = derive_pack_filename(index_name,
 							 "idx", "rev",
 							 &rev_index_name_buf);
 	}
diff --git a/object-file.c b/object-file.c
index f233b440b22..068ef99d461 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1974,11 +1974,16 @@ static int freshen_loose_object(const struct object_id *oid)
 static int freshen_packed_object(const struct object_id *oid)
 {
 	struct pack_entry e;
+	struct strbuf name_buf = STRBUF_INIT;
+	const char *filename;
+
 	if (!find_pack_entry(the_repository, oid, &e))
 		return 0;
 	if (e.p->freshened)
 		return 1;
-	if (!freshen_file(e.p->pack_name))
+
+	filename = derive_pack_filename(e.p->pack_name, "pack", "idx", &name_buf);
+	if (!freshen_file(filename))
 		return 0;
 	e.p->freshened = 1;
 	return 1;
diff --git a/packfile.c b/packfile.c
index 755aa7aec5e..46f8fb22462 100644
--- a/packfile.c
+++ b/packfile.c
@@ -40,6 +40,19 @@ char *sha1_pack_index_name(const unsigned char *sha1)
 	return odb_pack_name(&buf, sha1, "idx");
 }
 
+const char *derive_pack_filename(const char *pack_name, const char *strip,
+				const char *suffix, struct strbuf *buf)
+{
+	size_t len;
+	if (!strip_suffix(pack_name, strip, &len) || !len ||
+	    pack_name[len - 1] != '.')
+		die(_("packfile name '%s' does not end with '.%s'"),
+		    pack_name, strip);
+	strbuf_add(buf, pack_name, len);
+	strbuf_addstr(buf, suffix);
+	return buf->buf;
+}
+
 static unsigned int pack_used_ctr;
 static unsigned int pack_mmap_calls;
 static unsigned int peak_pack_open_windows;
@@ -693,6 +706,10 @@ struct packed_git *add_packed_git(const char *path, size_t path_len, int local)
 	size_t alloc;
 	struct packed_git *p;
 
+	if (stat(path, &st) || !S_ISREG(st.st_mode)) {
+		return NULL;
+	}
+
 	/*
 	 * Make sure a corresponding .pack file exists and that
 	 * the index looks sane.
@@ -707,6 +724,7 @@ struct packed_git *add_packed_git(const char *path, size_t path_len, int local)
 	alloc = st_add3(path_len, strlen(".promisor"), 1);
 	p = alloc_packed_git(alloc);
 	memcpy(p->pack_name, path, path_len);
+	p->mtime = st.st_mtime;
 
 	xsnprintf(p->pack_name + path_len, alloc - path_len, ".keep");
 	if (!access(p->pack_name, F_OK))
@@ -727,7 +745,6 @@ struct packed_git *add_packed_git(const char *path, size_t path_len, int local)
 	 */
 	p->pack_size = st.st_size;
 	p->pack_local = local;
-	p->mtime = st.st_mtime;
 	if (path_len < the_hash_algo->hexsz ||
 	    get_sha1_hex(path + path_len - the_hash_algo->hexsz, p->hash))
 		hashclr(p->hash);
diff --git a/packfile.h b/packfile.h
index 3ae117a8aef..ff702b22e6a 100644
--- a/packfile.h
+++ b/packfile.h
@@ -31,6 +31,13 @@ char *sha1_pack_name(const unsigned char *sha1);
  */
 char *sha1_pack_index_name(const unsigned char *sha1);
 
+/*
+ * Return the corresponding filename with given suffix from "file_name"
+ * which must has "strip" suffix.
+ */
+const char *derive_pack_filename(const char *file_name, const char *strip,
+		const char *suffix, struct strbuf *buf);
+
 /*
  * Return the basename of the packfile, omitting any containing directory
  * (e.g., "pack-1234abcd[...].pack").
diff --git a/t/t7701-repack-unpack-unreachable.sh b/t/t7701-repack-unpack-unreachable.sh
index 937f89ee8c8..51c1afcbe2c 100755
--- a/t/t7701-repack-unpack-unreachable.sh
+++ b/t/t7701-repack-unpack-unreachable.sh
@@ -106,7 +106,7 @@ test_expect_success 'do not bother loosening old objects' '
 	git prune-packed &&
 	git cat-file -p $obj1 &&
 	git cat-file -p $obj2 &&
-	test-tool chmtime =-86400 .git/objects/pack/pack-$pack2.pack &&
+	test-tool chmtime =-86400 .git/objects/pack/pack-$pack2.idx &&
 	git repack -A -d --unpack-unreachable=1.hour.ago &&
 	git cat-file -p $obj1 &&
 	test_must_fail git cat-file -p $obj2

base-commit: d486ca60a51c9cb1fe068803c3f540724e95e83a
-- 
gitgitgadget



[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux