On Mon, Apr 06, 2015 at 12:39:15AM +0200, René Scharfe wrote: > >...this. I think that effort might be better spent on a ref storage > >format that's more efficient, simpler (with respect to subtle races and > >such), and could provide other features (e.g., transactional atomicity). > > Such as a DBMS? :-) Leaving storage details to SQLite or whatever sounds > attractive to me because I'm lazy. Exactly. Though I think some folks were worried about the extra dependency (e.g., I think SQLite is hard for JGit, because there's no pure-java implementation, which makes Eclipse unhappy). With pluggable backends we can make something like a SQLite backend optional. I.e., use it if you want the benefits and can accept the portability downsides. But that also risks fracturing the community, and people on the "old" format being left behind. > Forgot to say: I like your changes. But if strbuf_getline can only be made > fast enough beyond that by duplicating stdio buffering then I feel it's > better to take a different way. E.g. dropping the requirement to handle NUL > chars and basing it on fgets as Junio suggested in his reply to patch 3 > sounds good. Yeah, though we probably need to either audit the callers, or provide a flag for each caller to turn on the speed-over-NULs behavior. I'll look into that, but it may not be this week, as I'll be traveling starting tomorrow. > In any case, the packed refs file seems special enough to receive special > treatment. Using mmap would make the most sense if we could also avoid > copying lines to a strbuf for parsing, though. I had a similar thought. Below is hacky patch, on top of your mmap patch, that does this. It does shave off another 300ms (around 5%). I think we may be getting into a useless area of micro-optimizing here, though. The results are noticeable on this ridiculous repository, but probably not so much on real ones. The low-hanging fruit (e.g., dropping time in half by using getc_unlocked) seems to provide the most bang for the buck. --- diff --git a/refs.c b/refs.c index 144255f..708b49b 100644 --- a/refs.c +++ b/refs.c @@ -334,27 +334,40 @@ static int refname_is_safe(const char *refname) return 1; } -static struct ref_entry *create_ref_entry(const char *refname, - const unsigned char *sha1, int flag, - int check_name) +static struct ref_entry *create_ref_entry_len(const char *refname, size_t len, + const unsigned char *sha1, int flag, + int check_name) { - int len; struct ref_entry *ref; + /* + * allocate before checking, since the check functions require + * a NUL-terminated refname. And since we die() anyway if + * the check fails, the overhead of the extra malloc is negligible + */ + ref = xmalloc(sizeof(struct ref_entry) + len + 1); + hashcpy(ref->u.value.sha1, sha1); + hashclr(ref->u.value.peeled); + memcpy(ref->name, refname, len); + ref->name[len] = '\0'; + ref->flag = flag; + if (check_name && check_refname_format(refname, REFNAME_ALLOW_ONELEVEL)) die("Reference has invalid format: '%s'", refname); if (!check_name && !refname_is_safe(refname)) die("Reference has invalid name: '%s'", refname); - len = strlen(refname) + 1; - ref = xmalloc(sizeof(struct ref_entry) + len); - hashcpy(ref->u.value.sha1, sha1); - hashclr(ref->u.value.peeled); - memcpy(ref->name, refname, len); - ref->flag = flag; return ref; } +static struct ref_entry *create_ref_entry(const char *refname, + const unsigned char *sha1, int flag, + int check_name) +{ + return create_ref_entry_len(refname, strlen(refname), sha1, flag, + check_name); +} + static void clear_ref_dir(struct ref_dir *dir); static void free_ref_entry(struct ref_entry *entry) @@ -1095,7 +1108,9 @@ static const char PACKED_REFS_HEADER[] = * Return a pointer to the refname within the line (null-terminated), * or NULL if there was a problem. */ -static const char *parse_ref_line(struct strbuf *line, unsigned char *sha1) +static const char *parse_ref_line(const char *line, int len, + unsigned char *sha1, + size_t *refname_len) { const char *ref; @@ -1107,22 +1122,22 @@ static const char *parse_ref_line(struct strbuf *line, unsigned char *sha1) * +1 (space in between hex and name) * +1 (newline at the end of the line) */ - if (line->len <= 42) + if (len <= 42) return NULL; - if (get_sha1_hex(line->buf, sha1) < 0) + if (get_sha1_hex(line, sha1) < 0) return NULL; - if (!isspace(line->buf[40])) + if (!isspace(line[40])) return NULL; - ref = line->buf + 41; + ref = line + 41; if (isspace(*ref)) return NULL; - if (line->buf[line->len - 1] != '\n') + if (line[len - 1] != '\n') return NULL; - line->buf[--line->len] = 0; + *refname_len = len - (ref - line) - 1; return ref; } @@ -1156,7 +1171,6 @@ static const char *parse_ref_line(struct strbuf *line, unsigned char *sha1) static void read_packed_refs(int fd, struct ref_dir *dir) { struct ref_entry *last = NULL; - struct strbuf line = STRBUF_INIT; enum { PEELED_NONE, PEELED_TAGS, PEELED_FULLY } peeled = PEELED_NONE; struct stat st; void *map; @@ -1172,18 +1186,20 @@ static void read_packed_refs(int fd, struct ref_dir *dir) for (p = map, len = mapsz; len; ) { unsigned char sha1[20]; const char *refname; + size_t refname_len; const char *traits; const char *nl; + const char *line; size_t linelen; nl = memchr(p, '\n', len); + line = p; linelen = nl ? nl - p + 1 : len; - strbuf_reset(&line); - strbuf_add(&line, p, linelen); p += linelen; len -= linelen; - if (skip_prefix(line.buf, "# pack-refs with:", &traits)) { + /* XXX these should take care not to look past linelen */ + if (skip_prefix(line, "# pack-refs with:", &traits)) { if (strstr(traits, " fully-peeled ")) peeled = PEELED_FULLY; else if (strstr(traits, " peeled ")) @@ -1192,7 +1208,7 @@ static void read_packed_refs(int fd, struct ref_dir *dir) continue; } - refname = parse_ref_line(&line, sha1); + refname = parse_ref_line(line, linelen, sha1, &refname_len); if (refname) { int flag = REF_ISPACKED; @@ -1200,7 +1216,7 @@ static void read_packed_refs(int fd, struct ref_dir *dir) hashclr(sha1); flag |= REF_BAD_NAME | REF_ISBROKEN; } - last = create_ref_entry(refname, sha1, flag, 0); + last = create_ref_entry_len(refname, len, sha1, flag, 0); if (peeled == PEELED_FULLY || (peeled == PEELED_TAGS && starts_with(refname, "refs/tags/"))) last->flag |= REF_KNOWS_PEELED; @@ -1208,10 +1224,10 @@ static void read_packed_refs(int fd, struct ref_dir *dir) continue; } if (last && - line.buf[0] == '^' && - line.len == PEELED_LINE_LENGTH && - line.buf[PEELED_LINE_LENGTH - 1] == '\n' && - !get_sha1_hex(line.buf + 1, sha1)) { + line[0] == '^' && + linelen == PEELED_LINE_LENGTH && + line[PEELED_LINE_LENGTH - 1] == '\n' && + !get_sha1_hex(line + 1, sha1)) { hashcpy(last->u.value.peeled, sha1); /* * Regardless of what the file header said, @@ -1222,7 +1238,6 @@ static void read_packed_refs(int fd, struct ref_dir *dir) } } - strbuf_release(&line); munmap(map, mapsz); } -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html