On Wed, Jan 18 2023, Matthew John Cheetham via GitGitGadget wrote: > From: Matthew John Cheetham <mjcheetham@xxxxxxxxxxx> > > Add ability to parse HTTP requests to the test-http-server test helper. > > Signed-off-by: Matthew John Cheetham <mjcheetham@xxxxxxxxxxx> > --- > t/helper/test-http-server.c | 175 +++++++++++++++++++++++++++++++++++- > 1 file changed, 173 insertions(+), 2 deletions(-) > > diff --git a/t/helper/test-http-server.c b/t/helper/test-http-server.c > index 6cdac223a55..36f4a54fe6d 100644 > --- a/t/helper/test-http-server.c > +++ b/t/helper/test-http-server.c > @@ -83,6 +83,42 @@ enum worker_result { > WR_HANGUP = 1<<1, > }; > > +/* > + * Fields from a parsed HTTP request. > + */ > +struct req { > + struct strbuf start_line; > + > + const char *method; > + const char *http_version; > + > + struct strbuf uri_path; > + struct strbuf query_args; > + > + struct string_list header_list; > + const char *content_type; > + ssize_t content_length; > +}; > + > +#define REQ__INIT { \ > + .start_line = STRBUF_INIT, \ > + .uri_path = STRBUF_INIT, \ > + .query_args = STRBUF_INIT, \ > + .header_list = STRING_LIST_INIT_NODUP, \ > + .content_type = NULL, \ > + .content_length = -1 \ > + } Style nit: Don't indent the trailing "}", and add a "," after the last "content_length" item. We omit the comma by convention when there really should not be another item, such as when we have a "NULL" terminator, here though we might add a struct element at the end, so... > +static enum worker_result req__read(struct req *req, int fd) > +{ > + struct strbuf h = STRBUF_INIT; > + struct string_list start_line_fields = STRING_LIST_INIT_DUP; > + int nr_start_line_fields; > + const char *uri_target; > + const char *query; > + char *hp; > + const char *hv; > + > + enum worker_result result = WR_OK; > + > + /* > + * Read line 0 of the request and split it into component parts: > + * > + * <method> SP <uri-target> SP <HTTP-version> CRLF > + * > + */ > + if (strbuf_getwholeline_fd(&req->start_line, fd, '\n') == EOF) { > + result = WR_OK | WR_HANGUP; > + goto done; > + } > + > + strbuf_trim_trailing_newline(&req->start_line); > + > + nr_start_line_fields = string_list_split(&start_line_fields, > + req->start_line.buf, > + ' ', -1); > + if (nr_start_line_fields != 3) { > + logerror("could not parse request start-line '%s'", > + req->start_line.buf); > + result = WR_IO_ERROR; > + goto done; > + } > + > + req->method = xstrdup(start_line_fields.items[0].string); > + req->http_version = xstrdup(start_line_fields.items[2].string); > + > + uri_target = start_line_fields.items[1].string; > + > + if (strcmp(req->http_version, "HTTP/1.1")) { > + logerror("unsupported version '%s' (expecting HTTP/1.1)", > + req->http_version); > + result = WR_IO_ERROR; > + goto done; > + } > + > + query = strchr(uri_target, '?'); > + > + if (query) { > + strbuf_add(&req->uri_path, uri_target, (query - uri_target)); > + strbuf_trim_trailing_dir_sep(&req->uri_path); > + strbuf_addstr(&req->query_args, query + 1); > + } else { > + strbuf_addstr(&req->uri_path, uri_target); > + strbuf_trim_trailing_dir_sep(&req->uri_path); > + } > + > + /* > + * Read the set of HTTP headers into a string-list. > + */ > + while (1) { > + if (strbuf_getwholeline_fd(&h, fd, '\n') == EOF) > + goto done; > + strbuf_trim_trailing_newline(&h); > + > + if (!h.len) > + goto done; /* a blank line ends the header */ > + > + hp = strbuf_detach(&h, NULL); > + string_list_append(&req->header_list, hp); > + > + /* also store common request headers as struct req members */ > + if (skip_prefix(hp, "Content-Type: ", &hv)) { > + req->content_type = hv; > + } else if (skip_prefix(hp, "Content-Length: ", &hv)) { > + req->content_length = strtol(hv, &hp, 10); In POSIX the "ssize_t" is not a "this is the unsigned size_t", but can be a much smaller integer type (although in practice it tends to be the signed version of "size_t". But this seems like a potential overflow trap as a result, but sometimes we need to live with "ssize_t". However, in this case it seems like we don't, as it seems the only reason you init'd this to -1 and then... > + if (trace2_is_enabled()) { > + struct string_list_item *item; > + trace2_printf("%s: %s", TR2_CAT, req->start_line.buf); > + trace2_printf("%s: hver: %s", TR2_CAT, req->http_version); > + trace2_printf("%s: hmth: %s", TR2_CAT, req->method); > + trace2_printf("%s: path: %s", TR2_CAT, req->uri_path.buf); > + trace2_printf("%s: qury: %s", TR2_CAT, req->query_args.buf); > + if (req->content_length >= 0) > + trace2_printf("%s: clen: %d", TR2_CAT, req->content_length); ...use that ">= 0" is to keep the state of "did I assign to this above? So firstly, shouldn't we error or something on a "Content-Length: 0", and aside from that wouldn't we just have a "int have_content_length = 0" in this function that we'd then flip to 1?