> From: Derrick Stolee <dstolee@xxxxxxxxxxxxx> > > The 'git multi-pack-index expire' command looks at the existing > mult-pack-index, counts the number of objects referenced in each > pack-file, deletes the pack-fils with no referenced objects, and > rewrites the multi-pack-index to no longer reference those packs. Thanks - this was quite straightforwardly written. > @@ -745,7 +761,10 @@ int write_midx_file(const char *object_dir) > midx_name); > } > > - packs.m = load_multi_pack_index(object_dir, 1); > + if (m) > + packs.m = m; > + else > + packs.m = load_multi_pack_index(object_dir, 1); If we already loaded the m, we can just pass it in - OK. > + if (packs_to_drop && packs_to_drop->nr) { > + int drop_index = 0; > + int missing_drops = 0; > + > + for (i = 0; i < packs.nr && drop_index < packs_to_drop->nr; i++) { > + int cmp = strcmp(packs.info[i].pack_name, > + packs_to_drop->items[drop_index].string); > + > + if (!cmp) { > + drop_index++; > + packs.info[i].expired = 1; > + } else if (cmp > 0) { > + error(_("did not see pack-file %s to drop"), > + packs_to_drop->items[drop_index].string); > + drop_index++; > + missing_drops++; > + i--; > + } else { > + packs.info[i].expired = 0; > + } > + } > + > + if (missing_drops) { > + result = 1; > + goto cleanup; > + } > + } This takes into account that packfiles can shift while we run this command, I see. Other than that, this is a common pattern - how we iterate through 2 sorted arrays, one a subsequence of each other. And indeed packs_to_drop is a sorted list, because we use string_list_insert() below. > ALLOC_ARRAY(pack_perm, packs.nr); > for (i = 0; i < packs.nr; i++) { > - pack_perm[packs.info[i].orig_pack_int_id] = i; > + if (packs.info[i].expired) { > + dropped_packs++; > + pack_perm[packs.info[i].orig_pack_int_id] = PACK_EXPIRED; > + } else { > + pack_perm[packs.info[i].orig_pack_int_id] = i - dropped_packs; > + } Here... > } > > - for (i = 0; i < packs.nr; i++) > - pack_name_concat_len += strlen(packs.info[i].pack_name) + 1; > + for (i = 0; i < packs.nr; i++) { > + if (!packs.info[i].expired) > + pack_name_concat_len += strlen(packs.info[i].pack_name) + 1; > + } ...and here and elsewhere, we have to contend with the fact that packs.info has pack_info that we don't want to write. I think it would be slightly better to filter out the expired ones from packs.info, and then when generating pack_perm, first memset it to 0xff. This way, we wouldn't have to check expiry everywhere. But I don't feel too strongly about this. > int expire_midx_packs(const char *object_dir) > { > - return 0; > + uint32_t i, *count, result = 0; > + struct string_list packs_to_drop = STRING_LIST_INIT_DUP; > + struct multi_pack_index *m = load_multi_pack_index(object_dir, 1); > + > + if (!m) > + return 0; > + > + count = xcalloc(m->num_packs, sizeof(uint32_t)); > + for (i = 0; i < m->num_objects; i++) { > + int pack_int_id = nth_midxed_pack_int_id(m, i); > + count[pack_int_id]++; > + } > + > + for (i = 0; i < m->num_packs; i++) { > + char *pack_name; > + > + if (count[i]) > + continue; > + > + if (prepare_midx_pack(m, i)) > + continue; > + > + if (m->packs[i]->pack_keep) > + continue; > + > + pack_name = xstrdup(m->packs[i]->pack_name); > + close_pack(m->packs[i]); > + FREE_AND_NULL(m->packs[i]); > + > + string_list_insert(&packs_to_drop, m->pack_names[i]); > + unlink_pack_path(pack_name, 0); > + free(pack_name); > + } > + > + free(count); > + > + if (packs_to_drop.nr) > + result = write_midx_internal(object_dir, m, &packs_to_drop); > + > + string_list_clear(&packs_to_drop, 0); > + return result; > } This is as I expected - unlink all the files we don't want, and even though much of the midx hasn't changed, we still need to write it because it has a new list of packfiles. > +test_expect_success 'expire removes unreferenced packs' ' > + ( > + cd dup && > + git pack-objects --revs .git/objects/pack/pack-combined <<-EOF && > + refs/heads/A > + ^refs/heads/C > + EOF > + git multi-pack-index write && > + ls .git/objects/pack | grep -v -e pack-[AB] >expect && > + git multi-pack-index expire && > + ls .git/objects/pack >actual && > + test_cmp expect actual && > + ls .git/objects/pack/ | grep idx >expect-idx && > + test-tool read-midx .git/objects | grep idx >actual-midx && > + test_cmp expect-idx actual-midx > + ) > +' Maybe add a fsck at the end for sanity's sake. Also, I think that preservation of .keep packfiles is an important feature, and maybe worth a test.