From: Lars Schneider <larsxschneider@xxxxxxxxx> The goal of this series is to avoid launching a new clean/smudge filter process for each file that is filtered. A short summary about v1 to v5 can be found here: https://git.github.io/rev_news/2016/08/17/edition-18/ This series is also published on web: https://github.com/larsxschneider/git/pull/14 Patches 1 and 2 are cleanups and not strictly necessary for the series. Patches 3 to 12 are required preparation. Patch 13 is the main patch. Patch 14 adds an example how to use the Git filter protocol in contrib. Thanks a lot to Jakub, Junio, and Peff for very helpful reviews, Lars ## Changes since v9 * replace the very specific "wait after close(stdin)" behavior with the more flexible run-command "clean_on_exit_handler" flag to fix flaky t0021, see discussion: http://public-inbox.org/git/xmqq37k9mm7k.fsf@xxxxxxxxxxxxxxxxxxxxxxxxxxx/ http://public-inbox.org/git/xmqq8tubitjs.fsf@xxxxxxxxxxxxxxxxxxxxxxxxxxx/ * run stop_multi_file_filter() for all filters on Git shutdown * actually kill filter in kill_multi_file_filter() * add new filter process to hashmap only if the process start was successful * remove superfluous fstat() call * avoid potential buffer overflow in packet_write_gently() packet size check * remove superfluous buffer in write_packetized_from_buf() * improve test name Lars Schneider (14): convert: quote filter names in error messages convert: modernize tests run-command: move check_pipe() from write_or_die to run_command run-command: add clean_on_exit_handler pkt-line: rename packet_write() to packet_write_fmt() pkt-line: extract set_packet_header() pkt-line: add packet_write_fmt_gently() pkt-line: add packet_flush_gently() pkt-line: add packet_write_gently() pkt-line: add functions to read/write flush terminated packet streams convert: make apply_filter() adhere to standard Git error handling convert: prepare filter.<driver>.process option convert: add filter.<driver>.process option contrib/long-running-filter: add long running filter example Documentation/gitattributes.txt | 159 ++++++++++- builtin/archive.c | 4 +- builtin/receive-pack.c | 4 +- builtin/remote-ext.c | 4 +- builtin/upload-archive.c | 4 +- connect.c | 2 +- contrib/long-running-filter/example.pl | 127 +++++++++ convert.c | 372 +++++++++++++++++++++--- daemon.c | 2 +- http-backend.c | 2 +- pkt-line.c | 152 +++++++++- pkt-line.h | 12 +- run-command.c | 36 ++- run-command.h | 4 +- shallow.c | 2 +- t/t0021-conversion.sh | 505 ++++++++++++++++++++++++++++++--- t/t0021/rot13-filter.pl | 191 +++++++++++++ upload-pack.c | 30 +- write_or_die.c | 13 - 19 files changed, 1492 insertions(+), 133 deletions(-) create mode 100755 contrib/long-running-filter/example.pl create mode 100755 t/t0021/rot13-filter.pl ## Interdiff (v9..v10) diff --git a/convert.c b/convert.c index 88581d6..1d89632 100644 --- a/convert.c +++ b/convert.c @@ -516,23 +516,6 @@ static struct cmd2process *find_multi_file_filter_entry(struct hashmap *hashmap, return hashmap_get(hashmap, &key, NULL); } -static void kill_multi_file_filter(struct hashmap *hashmap, struct cmd2process *entry) -{ - if (!entry) - return; - sigchain_push(SIGPIPE, SIG_IGN); - /* - * We kill the filter most likely because an error happened already. - * That's why we are not interested in any error code here. - */ - close(entry->process.in); - close(entry->process.out); - sigchain_pop(SIGPIPE); - finish_command(&entry->process); - hashmap_remove(hashmap, entry, NULL); - free(entry); -} - static int packet_write_list(int fd, const char *line, ...) { va_list args; @@ -552,6 +535,49 @@ static int packet_write_list(int fd, const char *line, ...) return packet_flush_gently(fd); } +static void read_multi_file_filter_status(int fd, struct strbuf *status) { + struct strbuf **pair; + char *line; + for (;;) { + line = packet_read_line(fd, NULL); + if (!line) + break; + pair = strbuf_split_str(line, '=', 2); + if (pair[0] && pair[0]->len && pair[1]) { + /* the last "status=<foo>" line wins */ + if (!strcmp(pair[0]->buf, "status=")) { + strbuf_reset(status); + strbuf_addbuf(status, pair[1]); + } + } + strbuf_list_free(pair); + } +} + +static void kill_multi_file_filter(struct hashmap *hashmap, struct cmd2process *entry) +{ + if (!entry) + return; + + entry->process.clean_on_exit = 0; + kill(entry->process.pid, SIGTERM); + finish_command(&entry->process); + + hashmap_remove(hashmap, entry, NULL); + free(entry); +} + +void stop_multi_file_filter(struct child_process *process) +{ + sigchain_push(SIGPIPE, SIG_IGN); + /* Closing the pipe signals the filter to initiate a shutdown. */ + close(process->in); + close(process->out); + sigchain_pop(SIGPIPE); + /* Finish command will wait until the shutdown is complete. */ + finish_command(process); +} + static struct cmd2process *start_multi_file_filter(struct hashmap *hashmap, const char *cmd) { int err; @@ -563,7 +589,6 @@ static struct cmd2process *start_multi_file_filter(struct hashmap *hashmap, cons const char *cap_name; entry = xmalloc(sizeof(*entry)); - hashmap_entry_init(entry, strhash(cmd)); entry->cmd = cmd; entry->supported_capabilities = 0; process = &entry->process; @@ -573,14 +598,16 @@ static struct cmd2process *start_multi_file_filter(struct hashmap *hashmap, cons process->use_shell = 1; process->in = -1; process->out = -1; - process->wait_on_exit = 1; + process->clean_on_exit = 1; + process->clean_on_exit_handler = stop_multi_file_filter; if (start_command(process)) { error("cannot fork to run external filter '%s'", cmd); - kill_multi_file_filter(hashmap, entry); return NULL; } + hashmap_entry_init(entry, strhash(cmd)); + sigchain_push(SIGPIPE, SIG_IGN); err = packet_write_list(process->in, "git-filter-client", "version=2", NULL); @@ -635,24 +662,6 @@ static struct cmd2process *start_multi_file_filter(struct hashmap *hashmap, cons return entry; } -static void read_multi_file_filter_status(int fd, struct strbuf *status) { - struct strbuf **pair; - char *line; - for (;;) { - line = packet_read_line(fd, NULL); - if (!line) - break; - pair = strbuf_split_str(line, '=', 2); - if (pair[0] && pair[0]->len && pair[1]) { - if (!strcmp(pair[0]->buf, "status=")) { - strbuf_reset(status); - strbuf_addbuf(status, pair[1]); - } - } - strbuf_list_free(pair); - } -} - static int apply_multi_file_filter(const char *path, const char *src, size_t len, int fd, struct strbuf *dst, const char *cmd, const unsigned int wanted_capability) @@ -660,10 +669,9 @@ static int apply_multi_file_filter(const char *path, const char *src, size_t len int err; struct cmd2process *entry; struct child_process *process; - struct stat file_stat; struct strbuf nbuf = STRBUF_INIT; struct strbuf filter_status = STRBUF_INIT; - char *filter_type; + const char *filter_type; if (!cmd_process_map_initialized) { cmd_process_map_initialized = 1; @@ -692,12 +700,6 @@ static int apply_multi_file_filter(const char *path, const char *src, size_t len else die("unexpected filter type"); - if (fd >= 0 && !src) { - if (fstat(fd, &file_stat) == -1) - return 0; - len = xsize_t(file_stat.st_size); - } - sigchain_push(SIGPIPE, SIG_IGN); assert(strlen(filter_type) < LARGE_PACKET_DATA_MAX - strlen("command=\n")); diff --git a/pkt-line.c b/pkt-line.c index b82aaca..0b5125f 100644 --- a/pkt-line.c +++ b/pkt-line.c @@ -174,12 +174,13 @@ int packet_write_fmt_gently(int fd, const char *fmt, ...) static int packet_write_gently(const int fd_out, const char *buf, size_t size) { static char packet_write_buffer[LARGE_PACKET_MAX]; - const size_t packet_size = size + 4; + size_t packet_size; - if (packet_size > sizeof(packet_write_buffer)) + if (size > sizeof(packet_write_buffer) - 4) return error("packet write failed - data exceeds max packet size"); packet_trace(buf, size, 1); + packet_size = size + 4; set_packet_header(packet_write_buffer, packet_size); memcpy(packet_write_buffer + 4, buf, size); if (write_in_full(fd_out, packet_write_buffer, packet_size) == packet_size) @@ -217,14 +218,13 @@ int write_packetized_from_fd(int fd_in, int fd_out) int write_packetized_from_buf(const char *src_in, size_t len, int fd_out) { - static char buf[LARGE_PACKET_DATA_MAX]; int err = 0; size_t bytes_written = 0; size_t bytes_to_write; while (!err) { - if ((len - bytes_written) > sizeof(buf)) - bytes_to_write = sizeof(buf); + if ((len - bytes_written) > LARGE_PACKET_DATA_MAX) + bytes_to_write = LARGE_PACKET_DATA_MAX; else bytes_to_write = len - bytes_written; if (bytes_to_write == 0) diff --git a/run-command.c b/run-command.c index 96c54fe..e5fd6ff 100644 --- a/run-command.c +++ b/run-command.c @@ -21,9 +21,7 @@ void child_process_clear(struct child_process *child) struct child_to_clean { pid_t pid; - char *name; - int stdin; - int wait; + struct child_process *process; struct child_to_clean *next; }; static struct child_to_clean *children_to_clean; @@ -31,35 +29,23 @@ static int installed_child_cleanup_handler; static void cleanup_children(int sig, int in_signal) { - int status; - struct child_to_clean *p = children_to_clean; - - /* Close the the child's stdin as indicator that Git will exit soon */ - while (p) { - if (p->wait) - if (p->stdin > 0) - close(p->stdin); - p = p->next; - } - while (children_to_clean) { - p = children_to_clean; + struct child_to_clean *p = children_to_clean; children_to_clean = p->next; - if (p->wait) { - fprintf(stderr, _("Waiting for '%s' to finish..."), p->name); - while ((waitpid(p->pid, &status, 0)) < 0 && errno == EINTR) - ; /* nothing */ - fprintf(stderr, _("done!\n")); + if (p->process && !in_signal) { + struct child_process *process = p->process; + if (process->clean_on_exit_handler) { + trace_printf("trace: run_command: running exit handler for pid %d", p->pid); + process->clean_on_exit_handler(process); + } } kill(p->pid, sig); - if (!in_signal) { - free(p->name); + if (!in_signal) free(p); } } -} static void cleanup_children_on_signal(int sig) { @@ -73,16 +59,11 @@ static void cleanup_children_on_exit(void) cleanup_children(SIGTERM, 0); } -static void mark_child_for_cleanup(pid_t pid, const char *name, int stdin, int wait) +static void mark_child_for_cleanup(pid_t pid, struct child_process *process) { struct child_to_clean *p = xmalloc(sizeof(*p)); p->pid = pid; - p->wait = wait; - p->stdin = stdin; - if (name) - p->name = xstrdup(name); - else - p->name = "process"; + p->process = process; p->next = children_to_clean; children_to_clean = p; @@ -93,13 +74,6 @@ static void mark_child_for_cleanup(pid_t pid, const char *name, int stdin, int w } } -#ifdef NO_PTHREADS -static void mark_child_for_cleanup_no_wait(pid_t pid, const char *name, int timeout, int stdin) -{ - mark_child_for_cleanup(pid, NULL, 0, 0); -} -#endif - static void clear_child_for_cleanup(pid_t pid) { struct child_to_clean **pp; @@ -458,9 +432,8 @@ int start_command(struct child_process *cmd) } if (cmd->pid < 0) error_errno("cannot fork() for %s", cmd->argv[0]); - else if (cmd->clean_on_exit || cmd->wait_on_exit) - mark_child_for_cleanup( - cmd->pid, cmd->argv[0], cmd->in, cmd->wait_on_exit); + else if (cmd->clean_on_exit) + mark_child_for_cleanup(cmd->pid, cmd); /* * Wait for child's execvp. If the execvp succeeds (or if fork() @@ -520,9 +493,8 @@ int start_command(struct child_process *cmd) failed_errno = errno; if (cmd->pid < 0 && (!cmd->silent_exec_failure || errno != ENOENT)) error_errno("cannot spawn %s", cmd->argv[0]); - if ((cmd->clean_on_exit || cmd->wait_on_exit) && cmd->pid >= 0) - mark_child_for_cleanup( - cmd->pid, cmd->argv[0], cmd->in, cmd->clean_on_exit_timeout); + if (cmd->clean_on_exit && cmd->pid >= 0) + mark_child_for_cleanup(cmd->pid, cmd); argv_array_clear(&nargv); cmd->argv = sargv; @@ -804,7 +776,7 @@ int start_async(struct async *async) exit(!!async->proc(proc_in, proc_out, async->data)); } - mark_child_for_cleanup_no_wait(async->pid); + mark_child_for_cleanup(async->pid, NULL); if (need_in) close(fdin[0]); diff --git a/run-command.h b/run-command.h index f7b9907..dd1c78c 100644 --- a/run-command.h +++ b/run-command.h @@ -42,10 +42,9 @@ struct child_process { unsigned silent_exec_failure:1; unsigned stdout_to_stderr:1; unsigned use_shell:1; - /* kill the child on Git exit */ unsigned clean_on_exit:1; - /* close the child's stdin on Git exit and wait until it terminates */ - unsigned wait_on_exit:1; + void (*clean_on_exit_handler)(struct child_process *process); + void *clean_on_exit_handler_cbdata; }; #define CHILD_PROCESS_INIT { NULL, ARGV_ARRAY_INIT, ARGV_ARRAY_INIT } diff --git a/t/t0021-conversion.sh b/t/t0021-conversion.sh index 52b7fe9..9f892c0 100755 --- a/t/t0021-conversion.sh +++ b/t/t0021-conversion.sh @@ -28,9 +28,7 @@ file_size () { filter_git () { rm -f rot13-filter.log && git "$@" 2>git-stderr.log && - sed '/Waiting for/d' git-stderr.log >git-stderr-clean.log && - test_must_be_empty git-stderr-clean.log && - rm -f git-stderr.log git-stderr-clean.log + rm -f git-stderr.log } # Count unique lines in two files and compare them. @@ -668,7 +666,7 @@ test_expect_success PERL 'process filter should not be restarted if it signals a ) ' -test_expect_success PERL 'process filter signals abort once to abort processing of all future files' ' +test_expect_success PERL 'process filter abort stops processing of all further files' ' test_config_global filter.protocol.process "$TEST_DIRECTORY/t0021/rot13-filter.pl clean smudge" && rm -rf repo && mkdir repo && @@ -688,6 +686,8 @@ test_expect_success PERL 'process filter signals abort once to abort processing git add . && rm -f *.r && + # Note: This test assumes that Git filters files in alphabetical + # order ("abort.r" before "test.r"). filter_git checkout --quiet --no-progress . && cat >expected.log <<-EOF && START -- 2.10.0