[PATCH v8 00/15] nd/pack-objects-pack-struct updates

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



v8 changes

- prefer BUG() over die()
- do "1U <<" instead of "1 << " to avoid undefined behavior with
  signed shifting.
- add more comments based on Jeff's feedback
- plug a leak in try_delta() when delta_size is too large
- be kind and set depth/cache_max_small_delta_size to max limit
  instead of dying when the user gives a value over limit
- make travis execute pack-objects uncommon code
- use git_env_*() instead of manually handling getenv() values
- fallback code for when a new pack is added when pack-objects is
  running
- Compressed cached delta size limit is increased from 64k to 1MB
- Cached delta size limit is decreased from 2G to 1MB

Interdiff

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index c774821930..b5bba2c228 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1439,7 +1439,7 @@ static void check_object(struct object_entry *entry)
 			goto give_up;
 
 		if (type < 0)
-			die("BUG: invalid type %d", type);
+			BUG("invalid type %d", type);
 		entry->in_pack_type = type;
 
 		/*
@@ -1861,6 +1861,11 @@ static pthread_mutex_t progress_mutex;
 
 #endif
 
+/*
+ * Return the size of the object without doing any delta
+ * reconstruction (so non-deltas are true object sizes, but deltas
+ * return the size of the delta data).
+ */
 unsigned long oe_get_size_slow(struct packing_data *pack,
 			       const struct object_entry *e)
 {
@@ -1881,7 +1886,7 @@ unsigned long oe_get_size_slow(struct packing_data *pack,
 
 	p = oe_in_pack(pack, e);
 	if (!p)
-		die("BUG: when e->type is a delta, it must belong to a pack");
+		BUG("when e->type is a delta, it must belong to a pack");
 
 	read_lock();
 	w_curs = NULL;
@@ -2006,8 +2011,10 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
 	delta_buf = create_delta(src->index, trg->data, trg_size, &delta_size, max_size);
 	if (!delta_buf)
 		return 0;
-	if (delta_size >= (1 << OE_DELTA_SIZE_BITS))
+	if (delta_size >= (1U << OE_DELTA_SIZE_BITS)) {
+		free(delta_buf);
 		return 0;
+	}
 
 	if (DELTA(trg_entry)) {
 		/* Prefer only shallower same-sized deltas. */
@@ -2163,7 +2170,7 @@ static void find_deltas(struct object_entry **list, unsigned *list_size,
 			unsigned long size;
 
 			size = do_compress(&entry->delta_data, DELTA_SIZE(entry));
-			if (size < (1 << OE_Z_DELTA_BITS)) {
+			if (size < (1U << OE_Z_DELTA_BITS)) {
 				entry->z_delta_size = size;
 				cache_lock();
 				delta_cache_size -= DELTA_SIZE(entry);
@@ -3131,7 +3138,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 	};
 
 	if (DFS_NUM_STATES > (1 << OE_DFS_STATE_BITS))
-		die("BUG: too many dfs states, increase OE_DFS_STATE_BITS");
+		BUG("too many dfs states, increase OE_DFS_STATE_BITS");
 
 	check_replace_refs = 0;
 
@@ -3149,12 +3156,16 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 	if (pack_to_stdout != !base_name || argc)
 		usage_with_options(pack_usage, pack_objects_options);
 
-	if (depth >= (1 << OE_DEPTH_BITS))
-		die(_("delta chain depth %d is greater than maximum limit %d"),
-		    depth, (1 << OE_DEPTH_BITS) - 1);
-	if (cache_max_small_delta_size >= (1 << OE_Z_DELTA_BITS))
-		die(_("pack.deltaCacheLimit is greater than maximum limit %d"),
-		    (1 << OE_Z_DELTA_BITS) - 1);
+	if (depth >= (1 << OE_DEPTH_BITS)) {
+		warning(_("delta chain depth %d is too deep, forcing %d"),
+			depth, (1 << OE_DEPTH_BITS) - 1);
+		depth = (1 << OE_DEPTH_BITS) - 1;
+	}
+	if (cache_max_small_delta_size >= (1U << OE_Z_DELTA_BITS)) {
+		warning(_("pack.deltaCacheLimit is too high, forcing %d"),
+			(1U << OE_Z_DELTA_BITS) - 1);
+		cache_max_small_delta_size = (1U << OE_Z_DELTA_BITS) - 1;
+	}
 
 	argv_array_push(&rp, "pack-objects");
 	if (thin) {
@@ -3274,6 +3285,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 		}
 	}
 
+	prepare_packing_data(&to_pack);
+
 	if (progress)
 		progress_state = start_progress(_("Counting objects"), 0);
 	if (!use_internal_rev_list)
diff --git a/ci/run-tests.sh b/ci/run-tests.sh
index 73e273fac7..857d144ee8 100755
--- a/ci/run-tests.sh
+++ b/ci/run-tests.sh
@@ -10,7 +10,10 @@ ln -s "$cache_dir/.prove" t/.prove
 make --quiet test
 if test "$jobname" = "linux-gcc"
 then
-	GIT_TEST_SPLIT_INDEX=YesPlease make --quiet test
+	export GIT_TEST_SPLIT_INDEX=YesPlease
+	export GIT_TEST_FULL_IN_PACK_ARRAY=true
+	export GIT_TEST_OE_SIZE=10
+	make --quiet test
 fi
 
 check_unignored_build_artifacts
diff --git a/pack-objects.c b/pack-objects.c
index 59c6e40a02..bf2e0a808d 100644
--- a/pack-objects.c
+++ b/pack-objects.c
@@ -3,6 +3,7 @@
 #include "pack.h"
 #include "pack-objects.h"
 #include "packfile.h"
+#include "config.h"
 
 static uint32_t locate_object_entry_hash(struct packing_data *pdata,
 					 const unsigned char *sha1,
@@ -90,18 +91,14 @@ struct object_entry *packlist_find(struct packing_data *pdata,
 static void prepare_in_pack_by_idx(struct packing_data *pdata)
 {
 	struct packed_git **mapping, *p;
-	int cnt = 0, nr = 1 << OE_IN_PACK_BITS;
-
-	if (getenv("GIT_TEST_FULL_IN_PACK_ARRAY")) {
-		/*
-		 * leave in_pack_by_idx NULL to force in_pack[] to be
-		 * used instead
-		 */
-		return;
-	}
+	int cnt = 0, nr = 1U << OE_IN_PACK_BITS;
 
 	ALLOC_ARRAY(mapping, nr);
-	mapping[cnt++] = NULL; /* zero index must be mapped to NULL */
+	/*
+	 * oe_in_pack() on an all-zero'd object_entry
+	 * (i.e. in_pack_idx also zero) should return NULL.
+	 */
+	mapping[cnt++] = NULL;
 	prepare_packed_git();
 	for (p = packed_git; p; p = p->next, cnt++) {
 		if (cnt == nr) {
@@ -114,21 +111,50 @@ static void prepare_in_pack_by_idx(struct packing_data *pdata)
 	pdata->in_pack_by_idx = mapping;
 }
 
+/*
+ * A new pack appears after prepare_in_pack_by_idx() has been
+ * run. This is likely a race.
+ *
+ * We could map this new pack to in_pack_by_idx[] array, but then we
+ * have to deal with full array anyway. And since it's hard to test
+ * this fall back code, just stay simple and fall back to using
+ * in_pack[] array.
+ */
+void oe_map_new_pack(struct packing_data *pack,
+		     struct packed_git *p)
+{
+	uint32_t i;
+
+	REALLOC_ARRAY(pack->in_pack, pack->nr_alloc);
+
+	for (i = 0; i < pack->nr_objects; i++)
+		pack->in_pack[i] = oe_in_pack(pack, pack->objects + i);
+
+	FREE_AND_NULL(pack->in_pack_by_idx);
+}
+
+/* assume pdata is already zero'd by caller */
+void prepare_packing_data(struct packing_data *pdata)
+{
+	if (git_env_bool("GIT_TEST_FULL_IN_PACK_ARRAY", 0)) {
+		/*
+		 * do not initialize in_pack_by_idx[] to force the
+		 * slow path in oe_in_pack()
+		 */
+	} else {
+		prepare_in_pack_by_idx(pdata);
+	}
+
+	pdata->oe_size_limit = git_env_ulong("GIT_TEST_OE_SIZE",
+					     1U << OE_SIZE_BITS);
+}
+
 struct object_entry *packlist_alloc(struct packing_data *pdata,
 				    const unsigned char *sha1,
 				    uint32_t index_pos)
 {
 	struct object_entry *new_entry;
 
-	if (!pdata->nr_objects) {
-		prepare_in_pack_by_idx(pdata);
-		if (getenv("GIT_TEST_OE_SIZE_BITS")) {
-			int bits = atoi(getenv("GIT_TEST_OE_SIZE_BITS"));;
-			pdata->oe_size_limit = 1 << bits;
-		}
-		if (!pdata->oe_size_limit)
-			pdata->oe_size_limit = 1 << OE_SIZE_BITS;
-	}
 	if (pdata->nr_objects >= pdata->nr_alloc) {
 		pdata->nr_alloc = (pdata->nr_alloc  + 1024) * 3 / 2;
 		REALLOC_ARRAY(pdata->objects, pdata->nr_alloc);
diff --git a/pack-objects.h b/pack-objects.h
index c20f67e25b..60192cce1f 100644
--- a/pack-objects.h
+++ b/pack-objects.h
@@ -4,9 +4,13 @@
 #define OE_DFS_STATE_BITS	2
 #define OE_DEPTH_BITS		12
 #define OE_IN_PACK_BITS		10
-#define OE_Z_DELTA_BITS		16
+#define OE_Z_DELTA_BITS		20
+/*
+ * Note that oe_set_size() becomes expensive when the given size is
+ * above this limit. Don't lower it too much.
+ */
 #define OE_SIZE_BITS		31
-#define OE_DELTA_SIZE_BITS	31
+#define OE_DELTA_SIZE_BITS	20
 
 /*
  * State flags for depth-first search used for analyzing delta cycles.
@@ -36,7 +40,7 @@ enum dfs_state {
  *
  * "size" is the uncompressed object size. Compressed size of the raw
  * data for an object in a pack is not stored anywhere but is computed
- * and made available when reverse .idx is made. Note that when an
+ * and made available when reverse .idx is made. Note that when a
  * delta is reused, "size" is the uncompressed _delta_ size, not the
  * canonical one after the delta has been applied.
  *
@@ -77,15 +81,15 @@ struct object_entry {
 	void *delta_data;	/* cached delta (uncompressed) */
 	off_t in_pack_offset;
 	uint32_t hash;			/* name hint hash */
-	uint32_t size_:OE_SIZE_BITS;
+	unsigned size_:OE_SIZE_BITS;
 	unsigned size_valid:1;
 	uint32_t delta_idx;	/* delta base object */
 	uint32_t delta_child_idx; /* deltified objects who bases me */
 	uint32_t delta_sibling_idx; /* other deltified objects who
 				     * uses the same base as me
 				     */
-	uint32_t delta_size_:OE_DELTA_SIZE_BITS; /* delta data size (uncompressed) */
-	uint32_t delta_size_valid:1;
+	unsigned delta_size_:OE_DELTA_SIZE_BITS; /* delta data size (uncompressed) */
+	unsigned delta_size_valid:1;
 	unsigned in_pack_idx:OE_IN_PACK_BITS;	/* already in pack */
 	unsigned z_delta_size:OE_Z_DELTA_BITS;
 	unsigned type_valid:1;
@@ -103,7 +107,15 @@ struct object_entry {
 	unsigned char in_pack_header_size;
 	unsigned depth:OE_DEPTH_BITS;
 
-	/* size: 80, bit_padding: 20 bits, holes: 1 bit */
+	/*
+	 * pahole results on 64-bit linux (gcc and clang)
+	 *
+	 *   size: 80, bit_padding: 20 bits, holes: 8 bits
+	 *
+	 * and on 32-bit (gcc)
+	 *
+	 *   size: 76, bit_padding: 20 bits, holes: 8 bits
+	 */
 };
 
 struct packing_data {
@@ -127,6 +139,7 @@ struct packing_data {
 	uintmax_t oe_size_limit;
 };
 
+void prepare_packing_data(struct packing_data *pdata);
 struct object_entry *packlist_alloc(struct packing_data *pdata,
 				    const unsigned char *sha1,
 				    uint32_t index_pos);
@@ -164,7 +177,7 @@ static inline void oe_set_type(struct object_entry *e,
 			       enum object_type type)
 {
 	if (type >= OBJ_ANY)
-		die("BUG: OBJ_ANY cannot be set in pack-objects code");
+		BUG("OBJ_ANY cannot be set in pack-objects code");
 
 	e->type_valid = type >= OBJ_NONE;
 	e->type_ = (unsigned)type;
@@ -190,21 +203,20 @@ static inline struct packed_git *oe_in_pack(const struct packing_data *pack,
 		return pack->in_pack_by_idx[e->in_pack_idx];
 	else
 		return pack->in_pack[e - pack->objects];
-
 }
 
+void oe_map_new_pack(struct packing_data *pack,
+		     struct packed_git *p);
 static inline void oe_set_in_pack(struct packing_data *pack,
 				  struct object_entry *e,
 				  struct packed_git *p)
 {
-	if (pack->in_pack_by_idx) {
-		if (p->index <= 0)
-			die("BUG: found_pack should be NULL "
-					"instead of having non-positive index");
+	if (!p->index)
+		oe_map_new_pack(pack, p);
+	if (pack->in_pack_by_idx)
 		e->in_pack_idx = p->index;
-	} else
+	else
 		pack->in_pack[e - pack->objects] = p;
-
 }
 
 static inline struct object_entry *oe_delta(
@@ -307,7 +319,7 @@ static inline void oe_set_size(struct packing_data *pack,
 	} else {
 		e->size_valid = 0;
 		if (oe_get_size_slow(pack, e) != size)
-			die("BUG: 'size' is supposed to be the object size!");
+			BUG("'size' is supposed to be the object size!");
 	}
 }
 
@@ -326,7 +338,7 @@ static inline void oe_set_delta_size(struct packing_data *pack,
 	e->delta_size_ = size;
 	e->delta_size_valid = e->delta_size_ == size;
 	if (!e->delta_size_valid && size != oe_size(pack, e))
-		die("BUG: this can only happen in check_object() "
+		BUG("this can only happen in check_object() "
 		    "where delta size is the same as entry size");
 }
 
diff --git a/t/README b/t/README
index 02bfb3fed5..c01d210c15 100644
--- a/t/README
+++ b/t/README
@@ -291,16 +291,26 @@ expect the rest to function correctly.
 and know what setup is needed for it.  Or when you want to run
 everything up to a certain test.
 
+
+Running tests with special setups
+---------------------------------
+
+The whole test suite could be run to test some special features
+that cannot be easily covered by a few specific test cases. These
+could be enabled by running the test suite with correct GIT_TEST_
+environment set.
+
+GIT_TEST_SPLIT_INDEX forces split-index mode on the whole test suite.
+
 GIT_TEST_FULL_IN_PACK_ARRAY exercises the uncommon pack-objects code
 path where there are more than 1024 packs even if the actual number of
 packs in repository is below this limit.
 
-GIT_TEST_OE_SIZE_BITS=<bits> exercises the uncommon pack-objects
-code path where we do not cache objecct size in memory and read it
-from existing packs on demand. This normally only happens when the
-object size is over 2GB. This variable forces the code path on any
-object larger than 2^<bits> bytes.
-
+GIT_TEST_OE_SIZE=<n> exercises the uncommon pack-objects code path
+where we do not cache object size in memory and read it from existing
+packs on demand. This normally only happens when the object size is
+over 2GB. This variable forces the code path on any object larger than
+<n> bytes.
 
 Naming Tests
 ------------
Nguyễn Thái Ngọc Duy (15):
  t/README: mention about running the test suite in special modes
  pack-objects: a bit of document about struct object_entry
  pack-objects: turn type and in_pack_type to bitfields
  pack-objects: use bitfield for object_entry::dfs_state
  pack-objects: use bitfield for object_entry::depth
  pack-objects: move in_pack_pos out of struct object_entry
  pack-objects: move in_pack out of struct object_entry
  pack-objects: refer to delta objects by index instead of pointer
  pack-objects: shrink z_delta_size field in struct object_entry
  pack-objects: don't check size when the object is bad
  pack-objects: clarify the use of object_entry::size
  pack-objects: shrink size field in struct object_entry
  pack-objects: shrink delta_size field in struct object_entry
  pack-objects: reorder members to shrink struct object_entry
  ci: exercise the whole test suite with uncommon code in pack-objects

 Documentation/config.txt           |   4 +-
 Documentation/git-pack-objects.txt |   4 +-
 Documentation/git-repack.txt       |   4 +-
 builtin/pack-objects.c             | 366 +++++++++++++++++++----------
 cache.h                            |   3 +
 ci/run-tests.sh                    |   5 +-
 object.h                           |   1 -
 pack-bitmap-write.c                |  14 +-
 pack-bitmap.c                      |   2 +-
 pack-bitmap.h                      |   4 +-
 pack-objects.c                     |  69 ++++++
 pack-objects.h                     | 312 ++++++++++++++++++++++--
 t/README                           |  20 ++
 t/t5300-pack-object.sh             |   5 +
 14 files changed, 650 insertions(+), 163 deletions(-)

-- 
2.17.0.rc2.515.g4feb9b7923




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux