These functions can be useful to other parts of Git. Let's move them to dir.c, while renaming them to be make their functionality more explicit. Signed-off-by: Atharva Raykar <raykar.ath@xxxxxxxxx> Mentored-by: Christian Couder <christian.couder@xxxxxxxxx> Mentored-by: Shourya Shukla <periperidip@xxxxxxxxx> --- builtin/clone.c | 118 +----------------------------------------------- dir.c | 114 ++++++++++++++++++++++++++++++++++++++++++++++ dir.h | 11 +++++ 3 files changed, 127 insertions(+), 116 deletions(-) diff --git a/builtin/clone.c b/builtin/clone.c index 66fe66679c..5ba24b7ae7 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -217,120 +217,6 @@ static char *get_repo_path(const char *repo, int *is_bundle) return canon; } -static char *guess_dir_name(const char *repo, int is_bundle, int is_bare) -{ - const char *end = repo + strlen(repo), *start, *ptr; - size_t len; - char *dir; - - /* - * Skip scheme. - */ - start = strstr(repo, "://"); - if (start == NULL) - start = repo; - else - start += 3; - - /* - * Skip authentication data. The stripping does happen - * greedily, such that we strip up to the last '@' inside - * the host part. - */ - for (ptr = start; ptr < end && !is_dir_sep(*ptr); ptr++) { - if (*ptr == '@') - start = ptr + 1; - } - - /* - * Strip trailing spaces, slashes and /.git - */ - while (start < end && (is_dir_sep(end[-1]) || isspace(end[-1]))) - end--; - if (end - start > 5 && is_dir_sep(end[-5]) && - !strncmp(end - 4, ".git", 4)) { - end -= 5; - while (start < end && is_dir_sep(end[-1])) - end--; - } - - /* - * Strip trailing port number if we've got only a - * hostname (that is, there is no dir separator but a - * colon). This check is required such that we do not - * strip URI's like '/foo/bar:2222.git', which should - * result in a dir '2222' being guessed due to backwards - * compatibility. - */ - if (memchr(start, '/', end - start) == NULL - && memchr(start, ':', end - start) != NULL) { - ptr = end; - while (start < ptr && isdigit(ptr[-1]) && ptr[-1] != ':') - ptr--; - if (start < ptr && ptr[-1] == ':') - end = ptr - 1; - } - - /* - * Find last component. To remain backwards compatible we - * also regard colons as path separators, such that - * cloning a repository 'foo:bar.git' would result in a - * directory 'bar' being guessed. - */ - ptr = end; - while (start < ptr && !is_dir_sep(ptr[-1]) && ptr[-1] != ':') - ptr--; - start = ptr; - - /* - * Strip .{bundle,git}. - */ - len = end - start; - strip_suffix_mem(start, &len, is_bundle ? ".bundle" : ".git"); - - if (!len || (len == 1 && *start == '/')) - die(_("No directory name could be guessed.\n" - "Please specify a directory on the command line")); - - if (is_bare) - dir = xstrfmt("%.*s.git", (int)len, start); - else - dir = xstrndup(start, len); - /* - * Replace sequences of 'control' characters and whitespace - * with one ascii space, remove leading and trailing spaces. - */ - if (*dir) { - char *out = dir; - int prev_space = 1 /* strip leading whitespace */; - for (end = dir; *end; ++end) { - char ch = *end; - if ((unsigned char)ch < '\x20') - ch = '\x20'; - if (isspace(ch)) { - if (prev_space) - continue; - prev_space = 1; - } else - prev_space = 0; - *out++ = ch; - } - *out = '\0'; - if (out > dir && prev_space) - out[-1] = '\0'; - } - return dir; -} - -static void strip_trailing_slashes(char *dir) -{ - char *end = dir + strlen(dir); - - while (dir < end - 1 && is_dir_sep(end[-1])) - end--; - *end = '\0'; -} - static int add_one_reference(struct string_list_item *item, void *cb_data) { struct strbuf err = STRBUF_INIT; @@ -1041,8 +927,8 @@ int cmd_clone(int argc, const char **argv, const char *prefix) if (argc == 2) dir = xstrdup(argv[1]); else - dir = guess_dir_name(repo_name, is_bundle, option_bare); - strip_trailing_slashes(dir); + dir = git_url_basename(repo_name, is_bundle, option_bare); + strip_dir_trailing_slashes(dir); dest_exists = path_exists(dir); if (dest_exists && !is_empty_dir(dir)) diff --git a/dir.c b/dir.c index 03c4d21267..6d26db3189 100644 --- a/dir.c +++ b/dir.c @@ -2970,6 +2970,120 @@ int is_empty_dir(const char *path) return ret; } +char *git_url_basename(const char *repo, int is_bundle, int is_bare) +{ + const char *end = repo + strlen(repo), *start, *ptr; + size_t len; + char *dir; + + /* + * Skip scheme. + */ + start = strstr(repo, "://"); + if (start == NULL) + start = repo; + else + start += 3; + + /* + * Skip authentication data. The stripping does happen + * greedily, such that we strip up to the last '@' inside + * the host part. + */ + for (ptr = start; ptr < end && !is_dir_sep(*ptr); ptr++) { + if (*ptr == '@') + start = ptr + 1; + } + + /* + * Strip trailing spaces, slashes and /.git + */ + while (start < end && (is_dir_sep(end[-1]) || isspace(end[-1]))) + end--; + if (end - start > 5 && is_dir_sep(end[-5]) && + !strncmp(end - 4, ".git", 4)) { + end -= 5; + while (start < end && is_dir_sep(end[-1])) + end--; + } + + /* + * Strip trailing port number if we've got only a + * hostname (that is, there is no dir separator but a + * colon). This check is required such that we do not + * strip URI's like '/foo/bar:2222.git', which should + * result in a dir '2222' being guessed due to backwards + * compatibility. + */ + if (memchr(start, '/', end - start) == NULL + && memchr(start, ':', end - start) != NULL) { + ptr = end; + while (start < ptr && isdigit(ptr[-1]) && ptr[-1] != ':') + ptr--; + if (start < ptr && ptr[-1] == ':') + end = ptr - 1; + } + + /* + * Find last component. To remain backwards compatible we + * also regard colons as path separators, such that + * cloning a repository 'foo:bar.git' would result in a + * directory 'bar' being guessed. + */ + ptr = end; + while (start < ptr && !is_dir_sep(ptr[-1]) && ptr[-1] != ':') + ptr--; + start = ptr; + + /* + * Strip .{bundle,git}. + */ + len = end - start; + strip_suffix_mem(start, &len, is_bundle ? ".bundle" : ".git"); + + if (!len || (len == 1 && *start == '/')) + die(_("No directory name could be guessed.\n" + "Please specify a directory on the command line")); + + if (is_bare) + dir = xstrfmt("%.*s.git", (int)len, start); + else + dir = xstrndup(start, len); + /* + * Replace sequences of 'control' characters and whitespace + * with one ascii space, remove leading and trailing spaces. + */ + if (*dir) { + char *out = dir; + int prev_space = 1 /* strip leading whitespace */; + for (end = dir; *end; ++end) { + char ch = *end; + if ((unsigned char)ch < '\x20') + ch = '\x20'; + if (isspace(ch)) { + if (prev_space) + continue; + prev_space = 1; + } else + prev_space = 0; + *out++ = ch; + } + *out = '\0'; + if (out > dir && prev_space) + out[-1] = '\0'; + } + return dir; +} + +void strip_dir_trailing_slashes(char *dir) +{ + char *end = dir + strlen(dir); + + while (dir < end - 1 && is_dir_sep(end[-1])) + end--; + *end = '\0'; +} + static int remove_dir_recurse(struct strbuf *path, int flag, int *kept_up) { DIR *dir; diff --git a/dir.h b/dir.h index b3e1a54a97..63ff04f5ac 100644 --- a/dir.h +++ b/dir.h @@ -453,6 +453,17 @@ static inline int is_dot_or_dotdot(const char *name) int is_empty_dir(const char *dir); +/* + * Retrieve the "humanish" basename of the given Git URL. + * + * For example: + * /path/to/repo.git => "repo" + * host.xz:foo/.git => "foo" + * http://example.com/user/bar.baz => "bar.baz" + */ +char *git_url_basename(const char *repo, int is_bundle, int is_bare); +void strip_dir_trailing_slashes(char *dir); + void setup_standard_excludes(struct dir_struct *dir); char *get_sparse_checkout_filename(void); -- 2.32.0