--- Makefile | 1 + bloom-filter.c | 103 +++++++++++++++++++++++++++++++++++++++++++++++++ bloom-filter.h | 39 +++++++++++++++++++ 3 files changed, 143 insertions(+) create mode 100644 bloom-filter.c create mode 100644 bloom-filter.h diff --git a/Makefile b/Makefile index 13e1c52478..850eafb3ee 100644 --- a/Makefile +++ b/Makefile @@ -827,6 +827,7 @@ LIB_OBJS += base85.o LIB_OBJS += bisect.o LIB_OBJS += blame.o LIB_OBJS += blob.o +LIB_OBJS += bloom-filter.o LIB_OBJS += branch.o LIB_OBJS += bulk-checkin.o LIB_OBJS += bundle.o diff --git a/bloom-filter.c b/bloom-filter.c new file mode 100644 index 0000000000..7dce0e35fa --- /dev/null +++ b/bloom-filter.c @@ -0,0 +1,103 @@ +#include "cache.h" +#include "bloom-filter.h" + +void bloom_filter_init(struct bloom_filter *bf, uint32_t bit_size) +{ + if (bit_size % CHAR_BIT) + BUG("invalid size for bloom filter"); + + bf->nr_entries = 0; + bf->bit_size = bit_size; + bf->bits = xmalloc(bit_size / CHAR_BIT); +} + +void bloom_filter_free(struct bloom_filter *bf) +{ + bf->nr_entries = 0; + bf->bit_size = 0; + FREE_AND_NULL(bf->bits); +} + + +void bloom_filter_set_bits(struct bloom_filter *bf, const uint32_t *offsets, + int nr_offsets, int nr_entries) +{ + int i; + for (i = 0; i < nr_offsets; i++) { + uint32_t byte_offset = (offsets[i] % bf->bit_size) / CHAR_BIT; + unsigned char mask = 1 << offsets[i] % CHAR_BIT; + bf->bits[byte_offset] |= mask; + } + bf->nr_entries += nr_entries; +} + +int bloom_filter_check_bits(struct bloom_filter *bf, const uint32_t *offsets, + int nr) +{ + int i; + for (i = 0; i < nr; i++) { + uint32_t byte_offset = (offsets[i] % bf->bit_size) / CHAR_BIT; + unsigned char mask = 1 << offsets[i] % CHAR_BIT; + if (!(bf->bits[byte_offset] & mask)) + return 0; + } + return 1; +} + + +void bloom_filter_add_hash(struct bloom_filter *bf, const unsigned char *hash) +{ + uint32_t offsets[GIT_MAX_RAWSZ / sizeof(uint32_t)]; + hashcpy((unsigned char*)offsets, hash); + bloom_filter_set_bits(bf, offsets, + the_hash_algo->rawsz / sizeof(*offsets), 1); +} + +int bloom_filter_check_hash(struct bloom_filter *bf, const unsigned char *hash) +{ + uint32_t offsets[GIT_MAX_RAWSZ / sizeof(uint32_t)]; + hashcpy((unsigned char*)offsets, hash); + return bloom_filter_check_bits(bf, offsets, + the_hash_algo->rawsz / sizeof(*offsets)); +} + +void hashxor(const unsigned char *hash1, const unsigned char *hash2, + unsigned char *out) +{ + int i; + for (i = 0; i < the_hash_algo->rawsz; i++) + out[i] = hash1[i] ^ hash2[i]; +} + +/* hardcoded for now... */ +static GIT_PATH_FUNC(git_path_bloom, "objects/info/bloom") + +int bloom_filter_load(struct bloom_filter *bf) +{ + int fd = open(git_path_bloom(), O_RDONLY); + + if (fd < 0) + return -1; + + read_in_full(fd, &bf->nr_entries, sizeof(bf->nr_entries)); + read_in_full(fd, &bf->bit_size, sizeof(bf->bit_size)); + if (bf->bit_size % CHAR_BIT) + BUG("invalid size for bloom filter"); + bf->bits = xmalloc(bf->bit_size / CHAR_BIT); + read_in_full(fd, bf->bits, bf->bit_size / CHAR_BIT); + + close(fd); + + return 0; +} + +void bloom_filter_write(struct bloom_filter *bf) +{ + int fd = xopen(git_path_bloom(), O_WRONLY | O_CREAT | O_TRUNC, 0666); + + write_in_full(fd, &bf->nr_entries, sizeof(bf->nr_entries)); + write_in_full(fd, &bf->bit_size, sizeof(bf->bit_size)); + write_in_full(fd, bf->bits, bf->bit_size / CHAR_BIT); + + close(fd); +} diff --git a/bloom-filter.h b/bloom-filter.h new file mode 100644 index 0000000000..94d0af1708 --- /dev/null +++ b/bloom-filter.h @@ -0,0 +1,39 @@ +#ifndef BLOOM_FILTER_H +#define BLOOM_FILTER_H + +#include "git-compat-util.h" + +struct bloom_filter { + uint32_t nr_entries; + uint32_t bit_size; + unsigned char *bits; +}; + + +void bloom_filter_init(struct bloom_filter *bf, uint32_t bit_size); +void bloom_filter_free(struct bloom_filter *bf); + +void bloom_filter_set_bits(struct bloom_filter *bf, const uint32_t *offsets, + int nr_offsets, int nr_enries); +int bloom_filter_check_bits(struct bloom_filter *bf, const uint32_t *offsets, + int nr); + +/* + * Turns the given (SHA1) hash into 5 unsigned ints, and sets the bits at + * those positions (modulo the bitmap's size) in the Bloom filter. + */ +void bloom_filter_add_hash(struct bloom_filter *bf, const unsigned char *hash); +/* + * Turns the given (SHA1) hash into 5 unsigned ints, and checks the bits at + * those positions (modulo the bitmap's size) in the Bloom filter. + * Returns 1 if all those bits are set, 0 otherwise. + */ +int bloom_filter_check_hash(struct bloom_filter *bf, const unsigned char *hash); + +void hashxor(const unsigned char *hash1, const unsigned char *hash2, + unsigned char *out); + +int bloom_filter_load(struct bloom_filter *bf); +void bloom_filter_write(struct bloom_filter *bf); + +#endif -- 2.19.1.409.g0a0ee5eb6b