From: Victoria Dye <vdye@xxxxxxxxxx> Move 'read_index_info()' into a new header 'index-info.h' and generalize the function to call a provided callback for each parsed line. Update 'update-index.c' to use this generalized 'read_index_info()', adding the callback 'apply_index_info()' to verify the parsed line and update the index according to its contents. Switching to using a callback to validate the parsed entry in 'update-index' results in a slight change to the error message indicating a file could not be removed from the index. The original implementation uses the raw, quoted pathname in the error message, whereas the callback (without access to the raw pathname) uses the unquoted value. However, this change makes the failed removal message consistent with all other error messages in the function, and that consistency is likely more beneficial than not to a user. The motivation for this change is to consolidate the already-similar input parsing logic in 'git update-index' and 'git mktree', avoiding code duplication and the associated maintenance burden. The input formats accepted by 'update-index' are a superset of those accepted by 'mktree', so in a later commit we can replace the input parsing of the latter with 'read_index_info()' without breaking existing usage. Co-authored-by: Junio C Hamano <gitster@xxxxxxxxx> Signed-off-by: Victoria Dye <vdye@xxxxxxxxxx> --- Documentation/git-update-index.txt | 16 +--- Documentation/index-info-formats.txt | 13 +++ Makefile | 1 + builtin/update-index.c | 129 +++++++-------------------- index-info.c | 90 +++++++++++++++++++ index-info.h | 11 +++ t/t2107-update-index-basic.sh | 27 ++++++ 7 files changed, 177 insertions(+), 110 deletions(-) create mode 100644 Documentation/index-info-formats.txt create mode 100644 index-info.c create mode 100644 index-info.h diff --git a/Documentation/git-update-index.txt b/Documentation/git-update-index.txt index 7128aed5405..e52aecb845d 100644 --- a/Documentation/git-update-index.txt +++ b/Documentation/git-update-index.txt @@ -278,21 +278,9 @@ USING --INDEX-INFO `--index-info` is a more powerful mechanism that lets you feed multiple entry definitions from the standard input, and designed -specifically for scripts. It can take inputs of three formats: +specifically for scripts. It can take inputs in the following formats: - . mode SP type SP sha1 TAB path -+ -This format is to stuff `git ls-tree` output into the index. - - . mode SP sha1 SP stage TAB path -+ -This format is to put higher order stages into the -index file and matches 'git ls-files --stage' output. - - . mode SP sha1 TAB path -+ -This format is no longer produced by any Git command, but is -and will continue to be supported by `update-index --index-info`. +include::index-info-formats.txt[] To place a higher stage entry to the index, the path should first be removed by feeding a mode=0 entry for the path, and diff --git a/Documentation/index-info-formats.txt b/Documentation/index-info-formats.txt new file mode 100644 index 00000000000..037ebd24321 --- /dev/null +++ b/Documentation/index-info-formats.txt @@ -0,0 +1,13 @@ + . mode SP type SP sha1 TAB path ++ +This format is to use `git ls-tree` output. + + . mode SP sha1 SP stage TAB path ++ +This format allows higher order stages to appear and +matches 'git ls-files --stage' output. + + . mode SP sha1 TAB path ++ +This format is no longer produced by any Git command, but is +and will continue to be supported. diff --git a/Makefile b/Makefile index 2f5f16847ae..db9604e59c3 100644 --- a/Makefile +++ b/Makefile @@ -1037,6 +1037,7 @@ LIB_OBJS += hex.o LIB_OBJS += hex-ll.o LIB_OBJS += hook.o LIB_OBJS += ident.o +LIB_OBJS += index-info.o LIB_OBJS += json-writer.o LIB_OBJS += kwset.o LIB_OBJS += levenshtein.o diff --git a/builtin/update-index.c b/builtin/update-index.c index d343416ae26..fddf59b54c1 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -11,6 +11,7 @@ #include "gettext.h" #include "hash.h" #include "hex.h" +#include "index-info.h" #include "lockfile.h" #include "quote.h" #include "cache-tree.h" @@ -509,100 +510,29 @@ static void update_one(const char *path) report("add '%s'", path); } -static void read_index_info(int nul_term_line) +static int apply_index_info(unsigned int mode, struct object_id *oid, int stage, + const char *path_name, void *cbdata UNUSED) { - const int hexsz = the_hash_algo->hexsz; - struct strbuf buf = STRBUF_INIT; - struct strbuf uq = STRBUF_INIT; - strbuf_getline_fn getline_fn; + if (!verify_path(path_name, mode)) { + fprintf(stderr, "Ignoring path %s\n", path_name); + return 0; + } - getline_fn = nul_term_line ? strbuf_getline_nul : strbuf_getline_lf; - while (getline_fn(&buf, stdin) != EOF) { - char *ptr, *tab; - char *path_name; - struct object_id oid; - unsigned int mode; - unsigned long ul; - int stage; - - /* This reads lines formatted in one of three formats: - * - * (1) mode SP sha1 TAB path - * The first format is what "git apply --index-info" - * reports, and used to reconstruct a partial tree - * that is used for phony merge base tree when falling - * back on 3-way merge. - * - * (2) mode SP type SP sha1 TAB path - * The second format is to stuff "git ls-tree" output - * into the index file. - * - * (3) mode SP sha1 SP stage TAB path - * This format is to put higher order stages into the - * index file and matches "git ls-files --stage" output. + if (!mode) { + /* mode == 0 means there is no such path -- remove */ + if (remove_file_from_index(the_repository->index, path_name)) + die("git update-index: unable to remove %s", path_name); + } + else { + /* mode ' ' sha1 '\t' name + * ptr[-1] points at tab, + * ptr[-41] is at the beginning of sha1 */ - errno = 0; - ul = strtoul(buf.buf, &ptr, 8); - if (ptr == buf.buf || *ptr != ' ' - || errno || (unsigned int) ul != ul) - goto bad_line; - mode = ul; - - tab = strchr(ptr, '\t'); - if (!tab || tab - ptr < hexsz + 1) - goto bad_line; - - if (tab[-2] == ' ' && '0' <= tab[-1] && tab[-1] <= '3') { - stage = tab[-1] - '0'; - ptr = tab + 1; /* point at the head of path */ - tab = tab - 2; /* point at tail of sha1 */ - } - else { - stage = 0; - ptr = tab + 1; /* point at the head of path */ - } - - if (get_oid_hex(tab - hexsz, &oid) || - tab[-(hexsz + 1)] != ' ') - goto bad_line; - - path_name = ptr; - if (!nul_term_line && path_name[0] == '"') { - strbuf_reset(&uq); - if (unquote_c_style(&uq, path_name, NULL)) { - die("git update-index: bad quoting of path name"); - } - path_name = uq.buf; - } - - if (!verify_path(path_name, mode)) { - fprintf(stderr, "Ignoring path %s\n", path_name); - continue; - } - - if (!mode) { - /* mode == 0 means there is no such path -- remove */ - if (remove_file_from_index(the_repository->index, path_name)) - die("git update-index: unable to remove %s", - ptr); - } - else { - /* mode ' ' sha1 '\t' name - * ptr[-1] points at tab, - * ptr[-41] is at the beginning of sha1 - */ - ptr[-(hexsz + 2)] = ptr[-1] = 0; - if (add_cacheinfo(mode, &oid, path_name, stage)) - die("git update-index: unable to update %s", - path_name); - } - continue; - - bad_line: - die("malformed index info %s", buf.buf); + if (add_cacheinfo(mode, oid, path_name, stage)) + die("git update-index: unable to update %s", path_name); } - strbuf_release(&buf); - strbuf_release(&uq); + + return 0; } static const char * const update_index_usage[] = { @@ -848,16 +778,23 @@ static enum parse_opt_result stdin_cacheinfo_callback( struct parse_opt_ctx_t *ctx, const struct option *opt, const char *arg, int unset) { - int *nul_term_line = opt->value; + int ret = 0; BUG_ON_OPT_NEG(unset); BUG_ON_OPT_ARG(arg); - if (ctx->argc != 1) - return error("option '%s' must be the last argument", opt->long_name); - allow_add = allow_replace = allow_remove = 1; - read_index_info(*nul_term_line); - return 0; + if (ctx->argc != 1) { + ret = error("option '%s' must be the last argument", opt->long_name); + } else { + int *nul_term_line = opt->value; + + allow_add = allow_replace = allow_remove = 1; + ret = read_index_info(*nul_term_line, apply_index_info, NULL); + if (ret) + ret = -1; + } + + return ret; } static enum parse_opt_result stdin_callback( diff --git a/index-info.c b/index-info.c new file mode 100644 index 00000000000..8ccaac5487b --- /dev/null +++ b/index-info.c @@ -0,0 +1,90 @@ +#include "git-compat-util.h" +#include "index-info.h" +#include "hash.h" +#include "hex.h" +#include "strbuf.h" +#include "quote.h" + +int read_index_info(int nul_term_line, each_index_info_fn fn, void *cbdata) +{ + const int hexsz = the_hash_algo->hexsz; + struct strbuf buf = STRBUF_INIT; + struct strbuf uq = STRBUF_INIT; + strbuf_getline_fn getline_fn; + int ret = 0; + + getline_fn = nul_term_line ? strbuf_getline_nul : strbuf_getline_lf; + while (getline_fn(&buf, stdin) != EOF) { + char *ptr, *tab; + char *path_name; + struct object_id oid; + unsigned int mode; + unsigned long ul; + int stage; + + /* This reads lines formatted in one of three formats: + * + * (1) mode SP sha1 TAB path + * The first format is what "git apply --index-info" + * reports, and used to reconstruct a partial tree + * that is used for phony merge base tree when falling + * back on 3-way merge. + * + * (2) mode SP type SP sha1 TAB path + * The second format is to stuff "git ls-tree" output + * into the index file. + * + * (3) mode SP sha1 SP stage TAB path + * This format is to put higher order stages into the + * index file and matches "git ls-files --stage" output. + */ + errno = 0; + ul = strtoul(buf.buf, &ptr, 8); + if (ptr == buf.buf || *ptr != ' ' + || errno || (unsigned int) ul != ul) + goto bad_line; + mode = ul; + + tab = strchr(ptr, '\t'); + if (!tab || tab - ptr < hexsz + 1) + goto bad_line; + + if (tab[-2] == ' ' && '0' <= tab[-1] && tab[-1] <= '3') { + stage = tab[-1] - '0'; + ptr = tab + 1; /* point at the head of path */ + tab = tab - 2; /* point at tail of sha1 */ + } else { + stage = 0; + ptr = tab + 1; /* point at the head of path */ + } + + if (get_oid_hex(tab - hexsz, &oid) || + tab[-(hexsz + 1)] != ' ') + goto bad_line; + + path_name = ptr; + if (!nul_term_line && path_name[0] == '"') { + strbuf_reset(&uq); + if (unquote_c_style(&uq, path_name, NULL)) { + ret = error("bad quoting of path name"); + break; + } + path_name = uq.buf; + } + + ret = fn(mode, &oid, stage, path_name, cbdata); + if (ret) { + ret = -1; + break; + } + + continue; + + bad_line: + die("malformed input line '%s'", buf.buf); + } + strbuf_release(&buf); + strbuf_release(&uq); + + return ret; +} diff --git a/index-info.h b/index-info.h new file mode 100644 index 00000000000..d650498325a --- /dev/null +++ b/index-info.h @@ -0,0 +1,11 @@ +#ifndef INDEX_INFO_H +#define INDEX_INFO_H + +#include "hash.h" + +typedef int (*each_index_info_fn)(unsigned int, struct object_id *, int, const char *, void *); + +/* Iterate over parsed index info from stdin */ +int read_index_info(int nul_term_line, each_index_info_fn fn, void *cbdata); + +#endif /* INDEX_INFO_H */ diff --git a/t/t2107-update-index-basic.sh b/t/t2107-update-index-basic.sh index cc72ead79f3..794a5b1a184 100755 --- a/t/t2107-update-index-basic.sh +++ b/t/t2107-update-index-basic.sh @@ -142,4 +142,31 @@ test_expect_success '--index-version' ' test_must_be_empty actual ' +test_expect_success '--index-info fails on malformed input' ' + # empty line + echo "" | + test_must_fail git update-index --index-info 2>err && + test_grep "malformed input line" err && + + # bad whitespace + printf "100644 $EMPTY_BLOB A" | + test_must_fail git update-index --index-info 2>err && + test_grep "malformed input line" err && + + # invalid stage value + printf "100644 $EMPTY_BLOB 5\tA" | + test_must_fail git update-index --index-info 2>err && + test_grep "malformed input line" err && + + # invalid OID length + printf "100755 abc123\tA" | + test_must_fail git update-index --index-info 2>err && + test_grep "malformed input line" err && + + # bad quoting + printf "100644 $EMPTY_BLOB\t\"A" | + test_must_fail git update-index --index-info 2>err && + test_grep "bad quoting of path name" err +' + test_done -- gitgitgadget