Re: [PATCH] delta-islands: free island-related data after use

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Nov 16 2022, Eric Wong wrote:

> On my use case involving 771 islands of Linux on kernel.org,
> this reduces memory usage by around 25MB.  The bulk of that
> comes from free_remote_islands, since free_island_regexes only
> saves around 40k.
>
> This memory is saved early in the memory-intensive pack process,
> making it available for the remainder of the long process.
>
> Signed-off-by: Eric Wong <e@xxxxxxxxx>
> ---
>   Note: I only noticed this when I screwed up up a pack.island RE,
>   ended up with hundreds of thousands of islands instead of 771,
>   and kept OOM-ing :x
>
>   Memory savings were measured using the following patch which
>   relies on a patched LD_PRELOAD-based malloc debugger:
>   https://80x24.org/spew/20221116095404.3974691-1-e@xxxxxxxxx/raw

FWIW SANITIZE=leak will find this if you stick a "remote_islands = NULL"
and run e.g. t5320-delta-islands.sh, but maybe you needed this closer to
production.

Valgrind will also work, but of course be *much* slower.

>   Will try to hunt down more memory savings in the nearish future.
>
>  delta-islands.c | 27 +++++++++++++++++++++++++++
>  1 file changed, 27 insertions(+)
>
> diff --git a/delta-islands.c b/delta-islands.c
> index 26f9e99e1a..391e947cc6 100644
> --- a/delta-islands.c
> +++ b/delta-islands.c
> @@ -454,6 +454,31 @@ static void deduplicate_islands(struct repository *r)
>  	free(list);
>  }
>  
> +static void free_island_regexes(void)
> +{
> +	unsigned int i;
> +
> +	for (i = 0; i < island_regexes_nr; i++)
> +		regfree(&island_regexes[i]);
> +
> +	FREE_AND_NULL(island_regexes);
> +	island_regexes_alloc = island_regexes_nr = 0;
> +}
> +
> +static void free_remote_islands(void)
> +{
> +	const char *island_name;
> +	struct remote_island *rl;
> +
> +	kh_foreach(remote_islands, island_name, rl, {
> +		free((void *)island_name);
> +		oid_array_clear(&rl->oids);
> +		free(rl);
> +	});
> +	kh_destroy_str(remote_islands);
> +	remote_islands = NULL;
> +}
> +
>  void load_delta_islands(struct repository *r, int progress)
>  {
>  	island_marks = kh_init_oid_map();
> @@ -461,7 +486,9 @@ void load_delta_islands(struct repository *r, int progress)
>  
>  	git_config(island_config_callback, NULL);
>  	for_each_ref(find_island_for_ref, NULL);
> +	free_island_regexes();
>  	deduplicate_islands(r);
> +	free_remote_islands();
>  
>  	if (progress)
>  		fprintf(stderr, _("Marked %d islands, done.\n"), island_counter);

Perfect shouldn't be the enemy of the good & all that, but in this case
it's not too much more effort to just give this data an appropriate
lifetime instead of the global, I tried that out for just the "regex"
part of this below.

The free_remote_islands() seems to be similarly alive between
"find_island_for_ref" and "deduplicate_islands".

Your version also works, but the root cause of this sort of thing is
these global lifetimes, which sometimes we do for a good reason, but in
this case we don't.

It's more lines, but e.g. your FREE_AND_NULL() and resetting "nr" and
"alloc" makes one wonder how the data might be used outside fo that
load_delta_islands() chain (which is needed, if we invoke it again),
when we can just init a stack variable to hold it instead...

diff --git a/delta-islands.c b/delta-islands.c
index 26f9e99e1a9..ef86a91059c 100644
--- a/delta-islands.c
+++ b/delta-islands.c
@@ -312,29 +312,41 @@ void resolve_tree_islands(struct repository *r,
 	free(todo);
 }
 
-static regex_t *island_regexes;
-static unsigned int island_regexes_alloc, island_regexes_nr;
+struct island_config_data {
+	regex_t *rx;
+	size_t nr;
+	size_t alloc;
+};
 static const char *core_island_name;
 
-static int island_config_callback(const char *k, const char *v, void *cb UNUSED)
+static void island_config_data_release(struct island_config_data *icd)
+{
+	for (size_t i = 0; i < icd->nr; i++)
+		regfree(&icd->rx[i]);
+	free(icd->rx);
+}
+
+static int island_config_callback(const char *k, const char *v, void *cb)
 {
+	struct island_config_data *data = cb;
+
 	if (!strcmp(k, "pack.island")) {
 		struct strbuf re = STRBUF_INIT;
 
 		if (!v)
 			return config_error_nonbool(k);
 
-		ALLOC_GROW(island_regexes, island_regexes_nr + 1, island_regexes_alloc);
+		ALLOC_GROW(data->rx, data->nr + 1, data->alloc);
 
 		if (*v != '^')
 			strbuf_addch(&re, '^');
 		strbuf_addstr(&re, v);
 
-		if (regcomp(&island_regexes[island_regexes_nr], re.buf, REG_EXTENDED))
+		if (regcomp(&data->rx[data->nr], re.buf, REG_EXTENDED))
 			die(_("failed to load island regex for '%s': %s"), k, re.buf);
 
 		strbuf_release(&re);
-		island_regexes_nr++;
+		data->nr++;
 		return 0;
 	}
 
@@ -365,8 +377,10 @@ static void add_ref_to_island(const char *island_name, const struct object_id *o
 }
 
 static int find_island_for_ref(const char *refname, const struct object_id *oid,
-			       int flags UNUSED, void *data UNUSED)
+			       int flags UNUSED, void *cb)
 {
+	struct island_config_data *data = cb;
+
 	/*
 	 * We should advertise 'ARRAY_SIZE(matches) - 2' as the max,
 	 * so we can diagnose below a config with more capture groups
@@ -377,8 +391,8 @@ static int find_island_for_ref(const char *refname, const struct object_id *oid,
 	struct strbuf island_name = STRBUF_INIT;
 
 	/* walk backwards to get last-one-wins ordering */
-	for (i = island_regexes_nr - 1; i >= 0; i--) {
-		if (!regexec(&island_regexes[i], refname,
+	for (i = data->nr - 1; i >= 0; i--) {
+		if (!regexec(&data->rx[i], refname,
 			     ARRAY_SIZE(matches), matches, 0))
 			break;
 	}
@@ -456,11 +470,14 @@ static void deduplicate_islands(struct repository *r)
 
 void load_delta_islands(struct repository *r, int progress)
 {
+	struct island_config_data data = { 0 };
+
 	island_marks = kh_init_oid_map();
 	remote_islands = kh_init_str();
 
-	git_config(island_config_callback, NULL);
-	for_each_ref(find_island_for_ref, NULL);
+	git_config(island_config_callback, &data);
+	for_each_ref(find_island_for_ref, &data);
+	island_config_data_release(&data);
 	deduplicate_islands(r);
 
 	if (progress)



[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux