On Sun, 28 Apr 2024, Matheus Afonso Martins Moreira via GitGitGadget <gitgitgadget@xxxxxxxxx> wrote: > From: Matheus Afonso Martins Moreira <matheus@xxxxxxxxxxxxxxxxxx> > > Define general parsing function that supports all Git URLs > including scp style URLs such as hostname:~user/repo. > Has the same interface as the URL normalization function > and uses the same data structures, facilitating its use. > It's adapted from the algorithm used to process URLs in connect.c, > so it should support the same inputs. > > Signed-off-by: Matheus Afonso Martins Moreira <matheus@xxxxxxxxxxxxxxxxxx> > --- > urlmatch.c | 90 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ > urlmatch.h | 1 + > 2 files changed, 91 insertions(+) > > diff --git a/urlmatch.c b/urlmatch.c > index 1d0254abacb..5a442e31fa2 100644 > --- a/urlmatch.c > +++ b/urlmatch.c > @@ -3,6 +3,7 @@ > #include "hex-ll.h" > #include "strbuf.h" > #include "urlmatch.h" > +#include "url.h" > > #define URL_ALPHA "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" > #define URL_DIGIT "0123456789" > @@ -438,6 +439,95 @@ char *url_normalize(const char *url, struct url_info *out_info) > return url_normalize_1(url, out_info, 0); > } > > +enum protocol { > + PROTO_UNKNOWN = 0, > + PROTO_LOCAL, > + PROTO_FILE, > + PROTO_SSH, > + PROTO_GIT, > +}; > + > +static enum protocol url_get_protocol(const char *name, size_t n) > +{ > + if (!strncmp(name, "ssh", n)) > + return PROTO_SSH; > + if (!strncmp(name, "git", n)) > + return PROTO_GIT; > + if (!strncmp(name, "git+ssh", n)) /* deprecated - do not use */ > + return PROTO_SSH; > + if (!strncmp(name, "ssh+git", n)) /* deprecated - do not use */ > + return PROTO_SSH; > + if (!strncmp(name, "file", n)) > + return PROTO_FILE; > + return PROTO_UNKNOWN; > +} > + > +char *url_parse(const char *url_orig, struct url_info *out_info) > +{ > + struct strbuf url; > + char *host, *separator; > + char *detached, *normalized; > + enum protocol protocol = PROTO_LOCAL; > + struct url_info local_info; > + struct url_info *info = out_info? out_info : &local_info; > + bool scp_syntax = false; > + > + if (is_url(url_orig)) { > + url_orig = url_decode(url_orig); > + } else { > + url_orig = xstrdup(url_orig); > + } > + > + strbuf_init(&url, strlen(url_orig) + sizeof("ssh://")); > + strbuf_addstr(&url, url_orig); > + > + host = strstr(url.buf, "://"); > + if (host) { > + protocol = url_get_protocol(url.buf, host - url.buf); > + host += 3; > + } else { > + if (!url_is_local_not_ssh(url.buf)) { > + scp_syntax = true; > + protocol = PROTO_SSH; > + strbuf_insertstr(&url, 0, "ssh://"); > + host = url.buf + 6; > + } > + } Interesting. ` $ ./git url-parse -c protocol file:/test/test ssh ` seems like only having a single slash after the 'protocol:' prints 'ssh' always (I think this may not even be a valid url). After this 'else' block, the url turns into 'ssh://file/test/test'. Will examine the details later. Not that it's your code's doing, and rather the result of url_is_local_not_ssh(). But just wanted to point this out and ask if this should error out or is this an intended behavior that I can't figure out. Thanks.