Re: [PATCH v2 10/24] pack-bitmap-write: reimplement bitmap writing

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



[snip commit message]

Thanks for the very clear commit message explaining the new algorithm.

> +struct bb_commit {
> +	struct commit_list *children;
> +	struct bitmap *bitmap;
> +	unsigned selected:1;
> +	unsigned idx; /* within selected array */
> +};
> +
> +define_commit_slab(bb_data, struct bb_commit);
> +
> +struct bitmap_builder {
> +	struct bb_data data;
> +	struct commit **commits;
> +	size_t commits_nr, commits_alloc;
> +};
> +
> +static void bitmap_builder_init(struct bitmap_builder *bb,
> +				struct bitmap_writer *writer)
>  {
>  	struct rev_info revs;
> +	struct commit *commit;
> +	unsigned int i;
> +
> +	memset(bb, 0, sizeof(*bb));
> +	init_bb_data(&bb->data);
> +
> +	reset_revision_walk();
> +	repo_init_revisions(writer->to_pack->repo, &revs, NULL);
> +	revs.topo_order = 1;
> +
> +	for (i = 0; i < writer->selected_nr; i++) {
> +		struct commit *c = writer->selected[i].commit;
> +		struct bb_commit *ent = bb_data_at(&bb->data, c);
> +		ent->selected = 1;
> +		ent->idx = i;
> +		add_pending_object(&revs, &c->object, "");
> +	}
> +
> +	if (prepare_revision_walk(&revs))
> +		die("revision walk setup failed");
> +
> +	while ((commit = get_revision(&revs))) {
> +		struct commit_list *p;
> +
> +		parse_commit_or_die(commit);
> +
> +		ALLOC_GROW(bb->commits, bb->commits_nr + 1, bb->commits_alloc);
> +		bb->commits[bb->commits_nr++] = commit;
> +
> +		for (p = commit->parents; p; p = p->next) {
> +			struct bb_commit *ent = bb_data_at(&bb->data, p->item);
> +			commit_list_insert(commit, &ent->children);
> +		}
> +	}
> +}

Looks straightforward.

> +static void bitmap_builder_clear(struct bitmap_builder *bb)
> +{
> +	clear_bb_data(&bb->data);
> +	free(bb->commits);
> +	bb->commits_nr = bb->commits_alloc = 0;
> +}

I was wondering why the commit list and the children in struct bb_commit
weren't cleared, but that's because they are cleared during the
algorithm. So this is fine.

> +static void fill_bitmap_tree(struct bitmap *bitmap,
> +			     struct tree *tree)
> +{
> +	uint32_t pos;
> +	struct tree_desc desc;
> +	struct name_entry entry;
> +
> +	/*
> +	 * If our bit is already set, then there is nothing to do. Both this
> +	 * tree and all of its children will be set.
> +	 */
> +	pos = find_object_pos(&tree->object.oid);
> +	if (bitmap_get(bitmap, pos))
> +		return;
> +	bitmap_set(bitmap, pos);
> +
> +	if (parse_tree(tree) < 0)
> +		die("unable to load tree object %s",
> +		    oid_to_hex(&tree->object.oid));
> +	init_tree_desc(&desc, tree->buffer, tree->size);
> +
> +	while (tree_entry(&desc, &entry)) {
> +		switch (object_type(entry.mode)) {
> +		case OBJ_TREE:
> +			fill_bitmap_tree(bitmap,
> +					 lookup_tree(the_repository, &entry.oid));
> +			break;
> +		case OBJ_BLOB:
> +			bitmap_set(bitmap, find_object_pos(&entry.oid));
> +			break;
> +		default:
> +			/* Gitlink, etc; not reachable */
> +			break;
> +		}
> +	}
> +
> +	free_tree_buffer(tree);
> +}

Looks straightforward.

> +static void fill_bitmap_commit(struct bb_commit *ent,
> +			       struct commit *commit)
> +{
> +	if (!ent->bitmap)
> +		ent->bitmap = bitmap_new();
> +
> +	/*
> +	 * mark ourselves, but do not bother with parents; their values
> +	 * will already have been propagated to us
> +	 */
> +	bitmap_set(ent->bitmap, find_object_pos(&commit->object.oid));
> +	fill_bitmap_tree(ent->bitmap, get_commit_tree(commit));
> +}

OK - when filling the bitmap for a commit, we only set the specific bit
for the commit itself, and all the bits for the commit's tree and the
tree's descendants. This is consistent with the explanation of the
algorithm in the commit message.

> +static void store_selected(struct bb_commit *ent, struct commit *commit)
> +{
> +	struct bitmapped_commit *stored = &writer.selected[ent->idx];
> +	khiter_t hash_pos;
> +	int hash_ret;
> +
> +	/*
> +	 * the "reuse bitmaps" phase may have stored something here, but
> +	 * our new algorithm doesn't use it. Drop it.
> +	 */
> +	if (stored->bitmap)
> +		ewah_free(stored->bitmap);

I tried to figure out how the "reuse bitmaps" phase stores things in
this field, but that led me down a rabbit hole that I didn't pursue.
But anyway, the new bitmap is correctly generated, so clearing the old
bitmap is safe (except, possibly, wasting time, but I see that in a
subsequent patch, existing bitmaps will be reused in a new wawy).

> +
> +	stored->bitmap = bitmap_to_ewah(ent->bitmap);
> +
> +	hash_pos = kh_put_oid_map(writer.bitmaps, commit->object.oid, &hash_ret);
> +	if (hash_ret == 0)
> +		die("Duplicate entry when writing index: %s",
> +		    oid_to_hex(&commit->object.oid));
> +	kh_value(writer.bitmaps, hash_pos) = stored;
> +}
> +
> +void bitmap_writer_build(struct packing_data *to_pack)
> +{
> +	struct bitmap_builder bb;
> +	size_t i;
> +	int nr_stored = 0; /* for progress */
>  
>  	writer.bitmaps = kh_init_oid_map();
>  	writer.to_pack = to_pack;
>  
>  	if (writer.show_progress)
>  		writer.progress = start_progress("Building bitmaps", writer.selected_nr);
> +	trace2_region_enter("pack-bitmap-write", "building_bitmaps_total",
> +		the_repository);
> +
> +	bitmap_builder_init(&bb, &writer);
> +	for (i = bb.commits_nr; i > 0; i--) {
> +		struct commit *commit = bb.commits[i-1];
> +		struct bb_commit *ent = bb_data_at(&bb.data, commit);
> +		struct commit *child;
> +
> +		fill_bitmap_commit(ent, commit);
> +
> +		if (ent->selected) {
> +			store_selected(ent, commit);
> +			nr_stored++;
> +			display_progress(writer.progress, nr_stored);
> +		}
> +
> +		while ((child = pop_commit(&ent->children))) {

Here the children (specifically, the struct commit_list) are freed (one
by one).

> +			struct bb_commit *child_ent =
> +				bb_data_at(&bb.data, child);
> +
> +			if (child_ent->bitmap)
> +				bitmap_or(child_ent->bitmap, ent->bitmap);
> +			else
> +				child_ent->bitmap = bitmap_dup(ent->bitmap);
> +		}
> +		bitmap_free(ent->bitmap);
> +		ent->bitmap = NULL;

Here the bitmap is freed.

>  	}
> +	bitmap_builder_clear(&bb);
>  
>  	stop_progress(&writer.progress);
>  
>  	compute_xor_offsets();

Thanks - overall this looks straightforward.



[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux