One struct to capture all types, just 4 methods: decode_message, encode_message, sizeof_message, hash_field. Signed-off-by: David Barr <davidbarr@xxxxxxxxxx> --- This is the first in a series of small patches to introduce some higher-level constructs into the git toolbag. The motivation is to empower a libfastimport implementation that is frugal with memory, fast and scalable. This version lacks any driver or tests. Soon to come are: * an efficient allocator that provides a mapping from an integer handle to pointer and length * a hash table that is time and memory effective for very numerous entries protobuf.c | 193 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ protobuf.h | 70 ++++++++++++++++++++++ varint.h | 59 ++++++++++++++++++ 3 files changed, 322 insertions(+), 0 deletions(-) create mode 100644 protobuf.c create mode 100644 protobuf.h create mode 100644 varint.h diff --git a/protobuf.c b/protobuf.c new file mode 100644 index 0000000..09223dd --- /dev/null +++ b/protobuf.c @@ -0,0 +1,193 @@ +#include "git-compat-util.h" +#include "protobuf.h" +#include "varint.h" + +static int decode_binary(const char **buf, const char *end, const char **result, size_t len) +{ + if (end && *buf + len > end) + return 1; + *result = *buf; + *buf += len; + return 0; +} + +static int encode_binary(char **buf, const char *end, const char *value, size_t len) +{ + if (end && *buf + len > end) + return 1; + memcpy(*buf, value, len); + *buf += len; + return 0; +} + +static int decode_field(const char **buf, const char *end, struct protobuf_field *field) +{ + uint32_t u32; + uint64_t key; + bzero(field, sizeof(struct protobuf_field)); + if (decode_varint(buf, end, &key)) + return 1; + field->tag = key >> WT_BITS, + field->type = key & WT_MASK; + switch (field->type) { + case WT_VARINT: + if (decode_varint(buf, end, &field->val.num)) + return 1; + return 0; + case WT_64BIT: + field->val.bin.len = sizeof(uint64_t); + break; + case WT_STRING: + if (decode_varint(buf, end, &field->val.bin.len)) + return 1; + break; + case WT_SHA1: + field->val.bin.len = 20; + break; + case WT_32BIT: + field->val.bin.len = sizeof(uint32_t); + break; + default: + return 1; + } + if (decode_binary(buf, end, &field->val.bin.ptr, field->val.bin.len)) + return 1; + switch (field->type) { + case WT_64BIT: + memcpy(&field->val.num, field->val.bin.ptr, field->val.bin.len); + break; + case WT_32BIT: + memcpy(&u32, field->val.bin.ptr, field->val.bin.len); + field->val.num = u32; + } + return 0; +} + +int decode_message(const char **buf, const char *end, struct protobuf_field *message, size_t fields) +{ + struct protobuf_field current; + bzero(message, fields * sizeof(struct protobuf_field)); + while (*buf != end) { + if (decode_field(buf, end, ¤t)) + return 1; + if (current.tag < fields) + message[current.tag] = current; + } + return 0; +} + +static size_t sizeof_field(const struct protobuf_field *field) +{ + size_t sizeof_key = sizeof_varint(field->tag << WT_BITS | field->type); + switch (field->type) { + case WT_VARINT: + if (field->val.num) + return sizeof_key + sizeof_varint(field->val.num); + break; + case WT_64BIT: + if (field->val.num) + return sizeof_key + sizeof(uint64_t); + break; + case WT_STRING: + if (field->val.bin.len && field->val.bin.ptr) + return sizeof_key + sizeof_varint(field->val.bin.len) + + field->val.bin.len; + break; + case WT_SHA1: + if (field->val.bin.ptr) + return sizeof_key + 20; + break; + case WT_32BIT: + if (field->val.num) + return sizeof_key + sizeof(uint32_t); + break; + } + return 0; +} + +uint64_t sizeof_message(const struct protobuf_field *message, size_t fields) +{ + uint64_t size = 0; + while (fields--) + size += sizeof_field(message++); + return size; +} + +static int encode_field(char **buf, const struct protobuf_field *field, char *end) +{ + uint32_t u32; + uint64_t len; + const char *ptr; + uint64_t key = (field->tag << WT_BITS) | field->type; + if (!sizeof_field(field)) + return 0; + if (encode_varint(buf, end, key)) + return 1; + switch (field->type) { + case WT_VARINT: + if (encode_varint(buf, end, field->val.num)) + return 1; + return 0; + case WT_64BIT: + ptr = (const char *)&field->val.num; + len = sizeof(uint64_t); + break; + case WT_STRING: + if (encode_varint(buf, end, field->val.bin.len)) + return 1; + ptr = field->val.bin.ptr; + len = field->val.bin.len; + break; + case WT_SHA1: + ptr = field->val.bin.ptr; + len = 20; + break; + case WT_32BIT: + u32 = field->val.num; + ptr = (const char *)&u32; + len = sizeof(uint64_t); + break; + default: + return 1; + } + if (encode_binary(buf, end, ptr, len)) + return 1; + return 0; +} + +int encode_message(char **buf, char *end, const struct protobuf_field *message, size_t fields) +{ + while (*buf != end && fields--) + if (encode_field(buf, message++, end)) + return 1; + return 0; +} + +static uint32_t x65599(const char *s, uint64_t len) +{ + uint32_t r = 0; + while (len--) + r = *s++ + (r << 6) + (r << 16) - r; + return r; +} + +uint32_t hash_field(const struct protobuf_field *field) +{ + uint32_t hc = 0; + switch (field->type) { + case WT_VARINT: + case WT_64BIT: + hc = (0x9e3779b97f4a7c15ull * field->val.num) >> 32; + break; + case WT_SHA1: + memcpy(&hc, field->val.bin.ptr, sizeof(hc)); + break; + case WT_STRING: + hc = x65599(field->val.bin.ptr, field->val.bin.len); + break; + case WT_32BIT: + hc = 0x9e3779b9ul * (uint32_t)field->val.num; + break; + } + return hc; +} diff --git a/protobuf.h b/protobuf.h new file mode 100644 index 0000000..89b70a4 --- /dev/null +++ b/protobuf.h @@ -0,0 +1,70 @@ +#ifndef PROTOBUF_H_ +#define PROTOBUF_H_ + +#define WT_BITS 3 +#define WT_MASK 0x7 + +enum wire_type { + WT_VARINT = 0, + WT_64BIT = 1, + WT_STRING = 2, + /* Custom wire type */ + WT_SHA1 = 3, + WT_32BIT = 5 +}; + +struct protobuf_field +{ + uint64_t tag : 64 - WT_BITS, + type : WT_BITS; + union protobuf_value { + uint64_t num; + struct protobuf_binary { + uint64_t len; + const char *ptr; + } bin; + } val; +}; + +#define PROTOBUF_KEY(p, f, wt) do { \ + (p)->f.tag = &((p)->f) - (struct protobuf_field*)(p); \ + (p)->f.type = wt; \ +} while(0) + +#define WT_VARINT(p, f, v) do { \ + PROTOBUF_KEY(p, f, WT_VARINT); \ + (p)->f.val.num = v; \ +} while(0) + +#define WT_64BIT(p, f, v) do { \ + PROTOBUF_KEY(p, f, WT_64BIT); \ + (p)->f.val.num = v; \ +} while(0) + +#define WT_STRING(p, f, s, l) do { \ + PROTOBUF_KEY(p, f, WT_STRING); \ + (p)->f.val.bin.ptr = s; \ + (p)->f.val.bin.len = l; \ +} while(0) + +#define WT_SHA1(p, f, v) do { \ + PROTOBUF_KEY(p, f, WT_SHA1); \ + (p)->f.val.bin.ptr = s; \ + (p)->f.val.bin.len = 20; \ +} while(0) + +#define WT_32BIT(p, f, v) do { \ + PROTOBUF_KEY(p, f, WT_32BIT); \ + (p)->f.val.num = v; \ +} while(0) + +#define PROTOBUF_CAST(p) \ + (struct protobuf_field*)(p), \ + sizeof(*(p)) / sizeof(struct protobuf_field) + +int decode_message(const char **buf, const char *end, struct protobuf_field *message, size_t fields); +uint64_t sizeof_message(const struct protobuf_field *message, size_t fields); +int encode_message(char **buf, char *end, const struct protobuf_field *message, size_t fields); +uint32_t hash_field(const struct protobuf_field *field); + +#endif diff --git a/varint.h b/varint.h new file mode 100644 index 0000000..48a3547 --- /dev/null +++ b/varint.h @@ -0,0 +1,59 @@ +#ifndef VARINT_H_ +#define VARINT_H_ + +#define VLI_CONTINUE 0x80 +#define VLI_DIGIT_MASK 0x7f +#define VLI_BITS_PER_DIGIT 7 + +static int decode_varint(const char **buf, const char *end, uint64_t *result) +{ + uint64_t rv = 0; + int shift = 0; + const char *pos; + for (pos = *buf; pos != end && shift < 64; pos++) { + unsigned char ch = *pos; + + rv |= (ch & VLI_DIGIT_MASK) << shift; + shift += VLI_BITS_PER_DIGIT; + if (ch & VLI_CONTINUE) + continue; + + *result = rv; + *buf = pos + 1; + return 0; + } + return 1; +} + +static int encode_varint(char **buf, const char *end, uint64_t value) +{ + char *pos; + for (pos = *buf; pos != end; pos++) { + unsigned char ch = value & VLI_DIGIT_MASK; + + value >>= VLI_BITS_PER_DIGIT; + if (value) + ch |= VLI_CONTINUE; + *pos = ch; + if (ch & VLI_CONTINUE) + continue; + + *buf = pos + 1; + return 0; + } + return 1; +} + +static size_t sizeof_varint(uint64_t value) { + size_t size = 0; + int shift = VLI_BITS_PER_DIGIT; + while (shift < 64) { + size++; + if (value < (1ull << shift)) + break; + shift += VLI_BITS_PER_DIGIT; + } + return size; +} + +#endif -- 1.7.5.1 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html