Allow the parallel workers to create the leading directories of the entries being checked out, instead of pre-creating them in the main process. This optimization should be more effective on file systems with higher I/O latency. Part of the process of creating leading dirs is the removal of any non-directory file that could be in the way. This is currently done inside entry.c:create_directories(). However, if we were to move this to the workers as well, we would risk removing a file just written by another worker, which collided with the one currently being written. In a worse scenario, we could remove the file right after a worker have closed it but before it called stat(). To avoid these problems, let's remove the non-directory files in the main process. And to avoid the cost of extra lstat() calls in this process, we use has_dirs_only_path(), which will have the necessary information already cached from check_path(). Finally, to create the leading dirs in the workers, we could re-use create_directories(). But, unlike the main process, we wouldn't have the stat() information cached. Thus, let's use raceproof_create_file(), which will only stat() the path components after a open() failure, saving us time when creating subsequent files in the same directory. Signed-off-by: Matheus Tavares <matheus.bernardino@xxxxxx> --- entry.c | 45 ++++++++++++++++++++++++++++++++++++++++++--- parallel-checkout.c | 42 ++++++++++++++++++++++++++++++++++++------ 2 files changed, 78 insertions(+), 9 deletions(-) diff --git a/entry.c b/entry.c index e876adff19..5dfd4d150d 100644 --- a/entry.c +++ b/entry.c @@ -57,6 +57,43 @@ static void create_directories(const char *path, int path_len, free(buf); } +static void remove_non_dirs(const char *path, int path_len, + const struct checkout *state) +{ + char *buf = xmallocz(path_len); + int len = 0; + + while (len < path_len) { + int ret; + + do { + buf[len] = path[len]; + len++; + } while (len < path_len && !is_dir_sep(path[len])); + if (len >= path_len) + break; + buf[len] = 0; + + ret = has_dirs_only_path(buf, len, state->base_dir_len); + + if (ret > 0) + continue; /* Is directory. */ + if (ret < 0) + break; /* No entry */ + + /* ret == 0: not a directory, let's unlink it. */ + + if (!state->force) + die("'%s' already exists, and it's not a directory", buf); + + if (unlink(buf)) + die_errno("cannot unlink '%s'", buf); + else + break; + } + free(buf); +} + static void remove_subtree(struct strbuf *path) { DIR *dir = opendir(path->buf); @@ -555,8 +592,6 @@ int checkout_entry_ca(struct cache_entry *ce, struct conv_attrs *ca, } else if (state->not_new) return 0; - create_directories(path.buf, path.len, state); - if (nr_checkouts) (*nr_checkouts)++; @@ -565,9 +600,13 @@ int checkout_entry_ca(struct cache_entry *ce, struct conv_attrs *ca, ca = &ca_buf; } - if (!enqueue_checkout(ce, ca)) + if (!enqueue_checkout(ce, ca)) { + /* "clean" path so that workers can create leading dirs */ + remove_non_dirs(path.buf, path.len, state); return 0; + } + create_directories(path.buf, path.len, state); return write_entry(ce, path.buf, ca, state, 0); } diff --git a/parallel-checkout.c b/parallel-checkout.c index 4d72540256..5b73d8fa4b 100644 --- a/parallel-checkout.c +++ b/parallel-checkout.c @@ -298,20 +298,48 @@ static int close_and_clear(int *fd) return ret; } +struct ci_open_data { + int fd; + unsigned int mode; +}; + +static int ci_open(const char *path, void *cb) +{ + struct ci_open_data *data = cb; + data->fd = open(path, O_WRONLY | O_CREAT | O_EXCL, data->mode); + + if (data->fd < 0) { + /* + * EISDIR can only indicate path collisions among the entries + * being checked out. We don't need raceproof_create_file() to + * try removing empty dirs. Instead, just let the caller known + * that the path already exists, so that the collision can be + * properly handled later. + */ + if (errno == EISDIR) + errno = EEXIST; + return 1; + } + + return 0; +} + void write_checkout_item(struct checkout *state, struct checkout_item *ci) { - unsigned int mode = (ci->ce->ce_mode & 0100) ? 0777 : 0666; + struct ci_open_data open_data; int fd = -1, fstat_done = 0; struct strbuf path = STRBUF_INIT; + open_data.mode = (ci->ce->ce_mode & 0100) ? 0777 : 0666; strbuf_add(&path, state->base_dir, state->base_dir_len); strbuf_add(&path, ci->ce->name, ci->ce->ce_namelen); - fd = open(path.buf, O_WRONLY | O_CREAT | O_EXCL, mode); - - if (fd < 0) { - if (errno == EEXIST || errno == EISDIR || errno == ENOENT || - errno == ENOTDIR) { + /* + * The main process already removed any non-directory file that was in + * the way. So if we find one, it's a path collision. + */ + if (raceproof_create_file(path.buf, ci_open, &open_data)) { + if (errno == EEXIST || errno == ENOTDIR || errno == ENOENT) { /* * Errors which probably represent a path collision. * Suppress the error message and mark the ci to be @@ -325,6 +353,8 @@ void write_checkout_item(struct checkout *state, struct checkout_item *ci) goto out; } + fd = open_data.fd; + if (write_checkout_item_to_fd(fd, state, ci, path.buf)) { /* Error was already reported. */ ci->status = CI_FAILED; -- 2.27.0