[PATCH 3/4] fsck: avoid reading every object twice

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



During verify_pack() all objects are read for SHA-1 check. Then
fsck_sha1() is called on every object, which read the object again
(fsck_sha1 -> parse_object -> read_sha1_file).

Avoid reading an object twice, do fsck_sha1 while we have an object
uncompressed data in verify_pack.

On git.git, with this patch I got:

$ /usr/bin/time ./git fsck >/dev/null
98.97user 0.90system 1:40.01elapsed 99%CPU (0avgtext+0avgdata 616624maxresident)k
0inputs+0outputs (0major+194186minor)pagefaults 0swaps

Without it:

$ /usr/bin/time ./git fsck >/dev/null
231.23user 2.35system 3:53.82elapsed 99%CPU (0avgtext+0avgdata 636688maxresident)k
0inputs+0outputs (0major+461629minor)pagefaults 0swaps

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@xxxxxxxxx>
---
 builtin/fsck.c |   42 +++++++++++++++++++++++++-----------------
 pack-check.c   |   13 ++++++++++---
 pack.h         |    5 ++++-
 3 files changed, 39 insertions(+), 21 deletions(-)

diff --git a/builtin/fsck.c b/builtin/fsck.c
index 4ead98d..0603f64 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -282,14 +282,8 @@ static void check_connectivity(void)
 	}
 }
 
-static int fsck_sha1(const unsigned char *sha1)
+static int fsck_obj(struct object *obj)
 {
-	struct object *obj = parse_object(sha1);
-	if (!obj) {
-		errors_found |= ERROR_OBJECT;
-		return error("%s: object corrupt or missing",
-			     sha1_to_hex(sha1));
-	}
 	if (obj->flags & SEEN)
 		return 0;
 	obj->flags |= SEEN;
@@ -332,6 +326,29 @@ static int fsck_sha1(const unsigned char *sha1)
 	return 0;
 }
 
+static int fsck_sha1(const unsigned char *sha1)
+{
+	struct object *obj = parse_object(sha1);
+	if (!obj) {
+		errors_found |= ERROR_OBJECT;
+		return error("%s: object corrupt or missing",
+			     sha1_to_hex(sha1));
+	}
+	return fsck_obj(obj);
+}
+
+static int fsck_obj_buffer(const unsigned char *sha1, enum object_type type,
+			   unsigned long size, void *buffer, int *eaten)
+{
+	struct object *obj;
+	obj = parse_object_buffer(sha1, type, size, buffer, eaten);
+	if (!obj) {
+		errors_found |= ERROR_OBJECT;
+		return error("%s: object corrupt or missing", sha1_to_hex(sha1));
+	}
+	return fsck_obj(obj);
+}
+
 /*
  * This is the sorting chunk size: make it reasonably
  * big so that we can sort well..
@@ -627,17 +644,8 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)
 		prepare_packed_git();
 		for (p = packed_git; p; p = p->next)
 			/* verify gives error messages itself */
-			if (verify_pack(p))
+			if (verify_pack(p, fsck_obj_buffer))
 				errors_found |= ERROR_PACK;
-
-		for (p = packed_git; p; p = p->next) {
-			uint32_t j, num;
-			if (open_pack_index(p))
-				continue;
-			num = p->num_objects;
-			for (j = 0; j < num; j++)
-				fsck_sha1(nth_packed_object_sha1(p, j));
-		}
 	}
 
 	heads = 0;
diff --git a/pack-check.c b/pack-check.c
index e33ea79..372d6b2 100644
--- a/pack-check.c
+++ b/pack-check.c
@@ -42,7 +42,8 @@ int check_pack_crc(struct packed_git *p, struct pack_window **w_curs,
 }
 
 static int verify_packfile(struct packed_git *p,
-		struct pack_window **w_curs)
+			   struct pack_window **w_curs,
+			   verify_fn fn)
 {
 	off_t index_size = p->index_size;
 	const unsigned char *index_base = p->index_data;
@@ -129,6 +130,12 @@ static int verify_packfile(struct packed_git *p,
 			free(data);
 			break;
 		}
+		if (fn) {
+			int eaten = 0;
+			fn(entries[i].sha1, type, size, data, &eaten);
+			if (eaten)
+				data = NULL;
+		}
 		free(data);
 	}
 	free(entries);
@@ -159,7 +166,7 @@ int verify_pack_index(struct packed_git *p)
 	return err;
 }
 
-int verify_pack(struct packed_git *p)
+int verify_pack(struct packed_git *p, verify_fn fn)
 {
 	int err = 0;
 	struct pack_window *w_curs = NULL;
@@ -168,7 +175,7 @@ int verify_pack(struct packed_git *p)
 	if (!p->index_data)
 		return -1;
 
-	err |= verify_packfile(p, &w_curs);
+	err |= verify_packfile(p, &w_curs, fn);
 	unuse_pack(&w_curs);
 
 	return err;
diff --git a/pack.h b/pack.h
index 722a54e..70f3c29 100644
--- a/pack.h
+++ b/pack.h
@@ -70,10 +70,13 @@ struct pack_idx_entry {
 	off_t offset;
 };
 
+
+typedef int (*verify_fn)(const unsigned char*, enum object_type, unsigned long, void*, int*);
+
 extern const char *write_idx_file(const char *index_name, struct pack_idx_entry **objects, int nr_objects, const struct pack_idx_option *, unsigned char *sha1);
 extern int check_pack_crc(struct packed_git *p, struct pack_window **w_curs, off_t offset, off_t len, unsigned int nr);
 extern int verify_pack_index(struct packed_git *);
-extern int verify_pack(struct packed_git *);
+extern int verify_pack(struct packed_git *, verify_fn fn);
 extern void fixup_pack_header_footer(int, unsigned char *, const char *, uint32_t, unsigned char *, off_t);
 extern char *index_pack_lockfile(int fd);
 extern int encode_in_pack_object_header(enum object_type, uintmax_t, unsigned char *);
-- 
1.7.4.74.g639db

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]