Jeff King <peff@xxxxxxxx> writes: > +static void strbuf_add_varint(struct strbuf *out, uintmax_t val) > +{ > + size_t len; > + strbuf_grow(out, 16); /* enough for any varint */ > + len = encode_varint(val, (unsigned char *)out->buf + out->len); > + strbuf_setlen(out, out->len + len); > +} > + > +static void bitmap_to_rle(struct strbuf *out, struct bitmap *bitmap) > +{ > + int curval = 0; /* count zeroes, then ones, then zeroes, etc */ > + size_t run = 0; > + size_t word; > + size_t orig_len = out->len; > + > + for (word = 0; word < bitmap->word_alloc; word++) { > + int bit; > + > + for (bit = 0; bit < BITS_IN_EWORD; bit++) { > + int val = !!(bitmap->words[word] & (((eword_t)1) << bit)); > + if (val == curval) > + run++; > + else { > + strbuf_add_varint(out, run); > + curval = 1 - curval; /* flip 0/1 */ > + run = 1; > + } > + } OK. I find it a bit disturbing to see that the loop knows a bit too much about how "struct bitmap" is implemented, but that is a complaint against the bitmap API, not this new user of the API. We do not try to handle the case where bitmap has bits that is not multiple of BITS_IN_EWORD and instead pretend that size of such a bitmap can be rounded up, because we ignore trailing 0-bit anyway, and we know the "struct bitmap" would pad with 0-bit at the tail? > + } > + > + /* > + * complete the run, but do not bother with trailing zeroes, unless we > + * failed to write even an initial run of 0's. > + */ > + if (curval && run) > + strbuf_add_varint(out, run); > + else if (orig_len == out->len) > + strbuf_add_varint(out, 0); > + > + /* signal end-of-input with an empty run */ > + strbuf_add_varint(out, 0); > +} OK. > +static size_t rle_each_bit(const unsigned char *in, size_t len, > + void (*fn)(size_t, void *), void *data) > +{ > + int curval = 0; /* look for zeroes first, then ones, etc */ > + const unsigned char *cur = in; > + const unsigned char *end = in + len; > + size_t pos; > + > + /* we always have a first run, even if it's 0 zeroes */ > + pos = decode_varint(&cur); > + > + /* > + * ugh, varint does not seem to have a way to prevent reading past > + * the end of the buffer. We'll do a length check after each one, > + * so the worst case is bounded. > + */ Sorry about that :-). > + if (cur > end) { > + error("input underflow in rle"); > + return len; > + } > + > + while (1) { > + size_t run = decode_varint(&cur); > + > + if (cur > end) { > + error("input underflow in rle"); > + return len; > + } > + > + if (!run) > + break; /* empty run signals end */ > + > + curval = 1 - curval; /* flip 0/1 */ > + if (curval) { > + /* we have a run of 1's; deliver them */ > + size_t i; > + for (i = 0; i < run; i++) > + fn(pos + i, data); > + } > + pos += run; > + } > + > + return cur - in; > +} Makes sense.