sxenos@xxxxxxxxxx writes: > +/* > + * Search the commit buffer for a line starting with the given key. Unlike > + * find_commit_header, this also searches the commit message body. > + */ > +static const char *find_key(const char *msg, const char *key, size_t *out_len) > +{ > + int key_len = strlen(key); > + const char *line = msg; > + > + while (line) { > + const char *eol = strchrnul(line, '\n'); > + > + if (eol - line > key_len && > + !strncmp(line, key, key_len) && Use of strncmp() here forces readers to wonder if and why we are preparing the code to allow NUL in key[0..key_len] (as line..eol would not, because we just used strchrnul()). But because key_len was computed using strlen(key), there is no valid reason to do so. If the code used memcmp(), it won't waste readers' time. > + line[key_len] == ' ') { > + *out_len = eol - line - key_len - 1; > + return line + key_len + 1; > + } > + line = *eol ? eol + 1 : NULL; > + } > + return NULL; > +} > + > +static struct commit *get_commit_by_index(struct commit_list *to_search, int index) > +{ > + while (to_search && index) { > + to_search = to_search->next; > + --index; Style: unlike some C++ shop, we tend to use post-increment/decrement when we do not use the value. > + } > + > + return to_search->item; > +} If a maliciously crafted parent-type field has excess ' ' to make index larger than the number of "parent" field in the commit object, the while() loop terminates upon noticing that to_search has become NULL. And then this return statement dereferences that NULL pointer. > +/* > + * Writes the content parent's object id to "content". > + * Returns the metacommit type. See the METACOMMIT_TYPE_* constants. > + */ > +int get_metacommit_content( > + struct commit *commit, struct object_id *content) > +{ > + const char *buffer = get_commit_buffer(commit, NULL); > + size_t parent_types_size; > + const char *parent_types = find_key(buffer, "parent-type", > + &parent_types_size); > + const char *end; > + int index = 0; > + int ret; > + struct commit *content_parent; > + > + if (!parent_types) { > + return METACOMMIT_TYPE_NONE; Unnecessary brace? > + } > + > + end = &(parent_types[parent_types_size]); Unnecessary parenthesis? > + while (1) { > + char next = *parent_types; > + if (next == ' ') { > + index++; > + } > + if (next == 'c') { > + ret = METACOMMIT_TYPE_NORMAL; > + break; > + } > + if (next == 'a') { > + ret = METACOMMIT_TYPE_ABANDONED; > + break; > + } The parsing of this seems somewhat loose. If there is 'x' on the line, this loop spins and consumes it without doing anything, which means that the same commit with a parent-type field can be encoded in different ways by adding arbitrary number of 'x' just after SP after the "parent-type" keyword, no? > + parent_types++; > + if (parent_types >= end) { > + return METACOMMIT_TYPE_NONE; > + } > + } > + > + content_parent = get_commit_by_index(commit->parents, index); > + > + if (!content_parent) { > + return METACOMMIT_TYPE_NONE; > + } > + > + oidcpy(content, &(content_parent->object.oid)); > + return ret; > +} > diff --git a/metacommit-parser.h b/metacommit-parser.h > new file mode 100644 > index 0000000000..e546f5a7e7 > --- /dev/null > +++ b/metacommit-parser.h > @@ -0,0 +1,16 @@ > +#ifndef METACOMMIT_PARSER_H > +#define METACOMMIT_PARSER_H > + > +// Indicates a normal commit (non-metacommit) No C99 // comments please. Not in the header, and not in the code. > +#define METACOMMIT_TYPE_NONE 0 > +// Indicates a metacommit with normal content (non-abandoned) > +#define METACOMMIT_TYPE_NORMAL 1 > +// Indicates a metacommit with abandoned content > +#define METACOMMIT_TYPE_ABANDONED 2 > + > +struct commit; > + > +extern int get_metacommit_content( > + struct commit *commit, struct object_id *content); > + > +#endif