diff --git a/Makefile b/Makefile index 4eceeaae..10b88d8a 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ CPPFLAGS?=-Wall -Wextra -Werror -I. -OPT_DEBUG=$(CPPFLAGS) -O0 -g -DHTTP_PARSER_STRICT=1 -OPT_FAST=$(CPPFLAGS) -O3 -DHTTP_PARSER_STRICT=0 +OPT_DEBUG=$(CPPFLAGS) -O0 -g -DHTTP_PARSER_STRICT=1 -DHTTP_PARSER_DEBUG=1 +OPT_FAST=$(CPPFLAGS) -O3 -DHTTP_PARSER_STRICT=0 -DHTTP_PARSER_DEBUG=0 CC?=gcc AR?=ar diff --git a/http_parser.c b/http_parser.c index 91c217dd..baa4c935 100644 --- a/http_parser.c +++ b/http_parser.c @@ -31,10 +31,24 @@ #endif +#if HTTP_PARSER_DEBUG +#define SET_ERRNO(e) \ +do { \ + parser->state = 0x80 | (e); \ + parser->error_lineno = __LINE__; \ +} while (0) +#else +#define SET_ERRNO(e) do { parser->state = 0x80 | (e); } while(0) +#endif + + #define CALLBACK2(FOR) \ do { \ if (settings->on_##FOR) { \ - if (0 != settings->on_##FOR(parser)) return (p - data); \ + if (0 != settings->on_##FOR(parser)) { \ + SET_ERRNO(HPE_CB_##FOR); \ + return (p - data); \ + } \ } \ } while (0) @@ -52,6 +66,7 @@ do { \ FOR##_mark, \ p - FOR##_mark)) \ { \ + SET_ERRNO(HPE_CB_##FOR); \ return (p - data); \ } \ } \ @@ -319,7 +334,13 @@ enum header_states #if HTTP_PARSER_STRICT -# define STRICT_CHECK(cond) if (cond) goto error +# define STRICT_CHECK(cond) \ +do { \ + if (cond) { \ + SET_ERRNO(HPE_STRICT); \ + goto error; \ + } \ +} while (0) # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead) #else # define STRICT_CHECK(cond) @@ -327,6 +348,17 @@ enum header_states #endif +/* Map errno values to strings for human-readable output */ +#define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s }, +static struct { + const char *name; + const char *description; +} http_strerror_tab[] = { + HTTP_ERRNO_MAP(HTTP_STRERROR_GEN) +}; +#undef HTTP_STRERROR_GEN + + size_t http_parser_execute (http_parser *parser, const http_parser_settings *settings, const char *data, @@ -336,12 +368,21 @@ size_t http_parser_execute (http_parser *parser, int8_t unhex_val; const char *p = data, *pe; int64_t to_read; - - enum state state = (enum state) parser->state; - enum header_states header_state = (enum header_states) parser->header_state; + enum state state; + enum header_states header_state; uint64_t index = parser->index; uint64_t nread = parser->nread; + /* We're in an error state. Don't attempt to do anything lest we overwrite + * the error information that landed us here. + */ + if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { + return 0; + } + + state = (enum state) parser->state; + header_state = (enum header_states) parser->header_state; + if (len == 0) { switch (state) { case s_body_identity_eof: @@ -355,7 +396,8 @@ size_t http_parser_execute (http_parser *parser, return 0; default: - return 1; // error + SET_ERRNO(HPE_INVALID_EOF_STATE); + return 1; } } @@ -392,7 +434,10 @@ size_t http_parser_execute (http_parser *parser, if (PARSING_HEADER(state)) { ++nread; /* Buffer overflow attack */ - if (nread > HTTP_MAX_HEADER_SIZE) goto error; + if (nread > HTTP_MAX_HEADER_SIZE) { + SET_ERRNO(HPE_HEADER_OVERFLOW); + goto error; + } } switch (state) { @@ -401,6 +446,7 @@ size_t http_parser_execute (http_parser *parser, /* this state is used after a 'Connection: close' message * the parser will error out if it reads another message */ + SET_ERRNO(HPE_CLOSED_CONNECTION); goto error; case s_start_req_or_res: @@ -426,7 +472,11 @@ size_t http_parser_execute (http_parser *parser, parser->type = HTTP_RESPONSE; state = s_res_HT; } else { - if (ch != 'E') goto error; + if (ch != 'E') { + SET_ERRNO(HPE_INVALID_CONSTANT); + goto error; + } + parser->type = HTTP_REQUEST; parser->method = HTTP_HEAD; index = 2; @@ -451,6 +501,7 @@ size_t http_parser_execute (http_parser *parser, break; default: + SET_ERRNO(HPE_INVALID_CONSTANT); goto error; } break; @@ -477,7 +528,11 @@ size_t http_parser_execute (http_parser *parser, break; case s_res_first_http_major: - if (ch < '1' || ch > '9') goto error; + if (ch < '1' || ch > '9') { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + parser->http_major = ch - '0'; state = s_res_http_major; break; @@ -490,18 +545,29 @@ size_t http_parser_execute (http_parser *parser, break; } - if (!IS_NUM(ch)) goto error; + if (!IS_NUM(ch)) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } parser->http_major *= 10; parser->http_major += ch - '0'; - if (parser->http_major > 999) goto error; + if (parser->http_major > 999) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + break; } /* first digit of minor HTTP version */ case s_res_first_http_minor: - if (!IS_NUM(ch)) goto error; + if (!IS_NUM(ch)) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + parser->http_minor = ch - '0'; state = s_res_http_minor; break; @@ -514,12 +580,19 @@ size_t http_parser_execute (http_parser *parser, break; } - if (!IS_NUM(ch)) goto error; + if (!IS_NUM(ch)) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } parser->http_minor *= 10; parser->http_minor += ch - '0'; - if (parser->http_minor > 999) goto error; + if (parser->http_minor > 999) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + break; } @@ -529,6 +602,8 @@ size_t http_parser_execute (http_parser *parser, if (ch == ' ') { break; } + + SET_ERRNO(HPE_INVALID_STATUS); goto error; } parser->status_code = ch - '0'; @@ -550,6 +625,7 @@ size_t http_parser_execute (http_parser *parser, state = s_header_field_start; break; default: + SET_ERRNO(HPE_INVALID_STATUS); goto error; } break; @@ -558,7 +634,11 @@ size_t http_parser_execute (http_parser *parser, parser->status_code *= 10; parser->status_code += ch - '0'; - if (parser->status_code > 999) goto error; + if (parser->status_code > 999) { + SET_ERRNO(HPE_INVALID_STATUS); + goto error; + } + break; } @@ -590,7 +670,10 @@ size_t http_parser_execute (http_parser *parser, CALLBACK2(message_begin); - if (!IS_ALPHA(LOWER(ch))) goto error; + if (!IS_ALPHA(LOWER(ch))) { + SET_ERRNO(HPE_INVALID_METHOD); + goto error; + } start_req_method_assign: parser->method = (enum http_method) 0; @@ -611,7 +694,9 @@ size_t http_parser_execute (http_parser *parser, case 'S': parser->method = HTTP_SUBSCRIBE; break; case 'T': parser->method = HTTP_TRACE; break; case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break; - default: goto error; + default: + SET_ERRNO(HPE_INVALID_METHOD); + goto error; } state = s_req_method; break; @@ -619,8 +704,10 @@ size_t http_parser_execute (http_parser *parser, case s_req_method: { - if (ch == '\0') + if (ch == '\0') { + SET_ERRNO(HPE_INVALID_METHOD); goto error; + } const char *matcher = method_strings[parser->method]; if (ch == ' ' && matcher[index] == '\0') { @@ -658,6 +745,7 @@ size_t http_parser_execute (http_parser *parser, } else if (index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') { parser->method = HTTP_PROPPATCH; } else { + SET_ERRNO(HPE_INVALID_METHOD); goto error; } @@ -687,6 +775,7 @@ size_t http_parser_execute (http_parser *parser, break; } + SET_ERRNO(HPE_INVALID_URL); goto error; } @@ -701,6 +790,7 @@ size_t http_parser_execute (http_parser *parser, break; } + SET_ERRNO(HPE_INVALID_URL); goto error; } @@ -737,6 +827,7 @@ size_t http_parser_execute (http_parser *parser, state = s_req_query_string_start; break; default: + SET_ERRNO(HPE_INVALID_HOST); goto error; } break; @@ -762,6 +853,7 @@ size_t http_parser_execute (http_parser *parser, state = s_req_query_string_start; break; default: + SET_ERRNO(HPE_INVALID_PORT); goto error; } break; @@ -800,6 +892,7 @@ size_t http_parser_execute (http_parser *parser, state = s_req_fragment_start; break; default: + SET_ERRNO(HPE_INVALID_PATH); goto error; } break; @@ -836,6 +929,7 @@ size_t http_parser_execute (http_parser *parser, state = s_req_fragment_start; break; default: + SET_ERRNO(HPE_INVALID_QUERY_STRING); goto error; } break; @@ -873,6 +967,7 @@ size_t http_parser_execute (http_parser *parser, state = s_req_fragment_start; break; default: + SET_ERRNO(HPE_INVALID_QUERY_STRING); goto error; } break; @@ -910,6 +1005,7 @@ size_t http_parser_execute (http_parser *parser, case '#': break; default: + SET_ERRNO(HPE_INVALID_FRAGMENT); goto error; } break; @@ -943,6 +1039,7 @@ size_t http_parser_execute (http_parser *parser, case '#': break; default: + SET_ERRNO(HPE_INVALID_FRAGMENT); goto error; } break; @@ -956,6 +1053,7 @@ size_t http_parser_execute (http_parser *parser, case ' ': break; default: + SET_ERRNO(HPE_INVALID_CONSTANT); goto error; } break; @@ -982,7 +1080,11 @@ size_t http_parser_execute (http_parser *parser, /* first digit of major HTTP version */ case s_req_first_http_major: - if (ch < '1' || ch > '9') goto error; + if (ch < '1' || ch > '9') { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + parser->http_major = ch - '0'; state = s_req_http_major; break; @@ -995,18 +1097,29 @@ size_t http_parser_execute (http_parser *parser, break; } - if (!IS_NUM(ch)) goto error; + if (!IS_NUM(ch)) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } parser->http_major *= 10; parser->http_major += ch - '0'; - if (parser->http_major > 999) goto error; + if (parser->http_major > 999) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + break; } /* first digit of minor HTTP version */ case s_req_first_http_minor: - if (!IS_NUM(ch)) goto error; + if (!IS_NUM(ch)) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + parser->http_minor = ch - '0'; state = s_req_http_minor; break; @@ -1026,19 +1139,30 @@ size_t http_parser_execute (http_parser *parser, /* XXX allow spaces after digit? */ - if (!IS_NUM(ch)) goto error; + if (!IS_NUM(ch)) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } parser->http_minor *= 10; parser->http_minor += ch - '0'; - if (parser->http_minor > 999) goto error; + if (parser->http_minor > 999) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + break; } /* end of request line */ case s_req_line_almost_done: { - if (ch != LF) goto error; + if (ch != LF) { + SET_ERRNO(HPE_LF_EXPECTED); + goto error; + } + state = s_header_field_start; break; } @@ -1060,7 +1184,10 @@ size_t http_parser_execute (http_parser *parser, c = TOKEN(ch); - if (!c) goto error; + if (!c) { + SET_ERRNO(HPE_INVALID_HEADER_TOKEN); + goto error; + } MARK(header_field); @@ -1217,6 +1344,7 @@ size_t http_parser_execute (http_parser *parser, break; } + SET_ERRNO(HPE_INVALID_HEADER_TOKEN); goto error; } @@ -1260,7 +1388,11 @@ size_t http_parser_execute (http_parser *parser, break; case h_content_length: - if (!IS_NUM(ch)) goto error; + if (!IS_NUM(ch)) { + SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); + goto error; + } + parser->content_length = ch - '0'; break; @@ -1310,7 +1442,11 @@ size_t http_parser_execute (http_parser *parser, case h_content_length: if (ch == ' ') break; - if (!IS_NUM(ch)) goto error; + if (!IS_NUM(ch)) { + SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); + goto error; + } + parser->content_length *= 10; parser->content_length += ch - '0'; break; @@ -1431,6 +1567,7 @@ size_t http_parser_execute (http_parser *parser, default: parser->state = state; + SET_ERRNO(HPE_CB_headers_complete); return p - data; /* Error */ } } @@ -1498,7 +1635,11 @@ size_t http_parser_execute (http_parser *parser, assert(parser->flags & F_CHUNKED); unhex_val = unhex[(unsigned char)ch]; - if (unhex_val == -1) goto error; + if (unhex_val == -1) { + SET_ERRNO(HPE_INVALID_CHUNK_SIZE); + goto error; + } + parser->content_length = unhex_val; state = s_chunk_size; break; @@ -1520,6 +1661,8 @@ size_t http_parser_execute (http_parser *parser, state = s_chunk_parameters; break; } + + SET_ERRNO(HPE_INVALID_CHUNK_SIZE); goto error; } @@ -1588,6 +1731,7 @@ size_t http_parser_execute (http_parser *parser, default: assert(0 && "unhandled state"); + SET_ERRNO(HPE_INVALID_INTERNAL_STATE); goto error; } } @@ -1607,7 +1751,10 @@ size_t http_parser_execute (http_parser *parser, return len; error: - parser->state = s_dead; + if (HTTP_PARSER_ERRNO(parser) == HPE_OK) { + SET_ERRNO(HPE_UNKNOWN); + } + return (p - data); } @@ -1649,3 +1796,15 @@ http_parser_init (http_parser *parser, enum http_parser_type t) parser->flags = 0; parser->method = 0; } + +const char * +http_errno_name(enum http_errno err) { + assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0]))); + return http_strerror_tab[err].name; +} + +const char * +http_errno_description(enum http_errno err) { + assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0]))); + return http_strerror_tab[err].description; +} diff --git a/http_parser.h b/http_parser.h index 6a54a2d6..6e8cf4d4 100644 --- a/http_parser.h +++ b/http_parser.h @@ -51,6 +51,13 @@ typedef int ssize_t; # define HTTP_PARSER_STRICT 1 #endif +/* Compile with -DHTTP_PARSER_DEBUG=1 to add extra debugging information to + * the error reporting facility. + */ +#ifndef HTTP_PARSER_DEBUG +# define HTTP_PARSER_DEBUG 0 +#endif + /* Maximium header size allowed */ #define HTTP_MAX_HEADER_SIZE (80*1024) @@ -58,6 +65,7 @@ typedef int ssize_t; typedef struct http_parser http_parser; typedef struct http_parser_settings http_parser_settings; +typedef struct http_parser_result http_parser_result; /* Callbacks should return non-zero to indicate an error. The parser will @@ -125,6 +133,74 @@ enum flags }; +/* Map for errno-related constants + * + * The provided argument should be a macro that takes 2 arguments. + */ +#define HTTP_ERRNO_MAP(XX) \ + /* No error */ \ + XX(OK, "success") \ + \ + /* Callback-related errors */ \ + XX(CB_message_begin, "the on_message_begin callback failed") \ + XX(CB_path, "the on_path callback failed") \ + XX(CB_query_string, "the on_query_string callback failed") \ + XX(CB_url, "the on_url callback failed") \ + XX(CB_fragment, "the on_fragment callback failed") \ + XX(CB_header_field, "the on_header_field callback failed") \ + XX(CB_header_value, "the on_header_value callback failed") \ + XX(CB_headers_complete, "the on_headers_complete callback failed") \ + XX(CB_body, "th on_body callback failed") \ + XX(CB_message_complete, "the on_message_complete callback failed") \ + \ + /* Parsing-related errors */ \ + XX(INVALID_EOF_STATE, "stream ended at an unexpected time") \ + XX(HEADER_OVERFLOW, \ + "too many header bytes seen; overflow detected") \ + XX(CLOSED_CONNECTION, \ + "data received after completed connection: close message") \ + XX(INVALID_VERSION, "invalid HTTP version") \ + XX(INVALID_STATUS, "invalid HTTP status code") \ + XX(INVALID_METHOD, "invalid HTTP method") \ + XX(INVALID_URL, "invalid URL") \ + XX(INVALID_HOST, "invalid host") \ + XX(INVALID_PORT, "invalid port") \ + XX(INVALID_PATH, "invalid path") \ + XX(INVALID_QUERY_STRING, "invalid query string") \ + XX(INVALID_FRAGMENT, "invalid fragment") \ + XX(LF_EXPECTED, "LF character expected") \ + XX(INVALID_HEADER_TOKEN, "invalid character in header") \ + XX(INVALID_CONTENT_LENGTH, \ + "invalid character in content-length header") \ + XX(INVALID_CHUNK_SIZE, \ + "invalid character in chunk size header") \ + XX(INVALID_CONSTANT, "invalid constant string") \ + XX(INVALID_INTERNAL_STATE, "encountered unexpected internal state")\ + XX(STRICT, "strict mode assertion failed") \ + XX(UNKNOWN, "an unknown error occurred") + + +/* Define HPE_* values for each errno value above */ +#define HTTP_ERRNO_GEN(n, s) HPE_##n, +enum http_errno { + HTTP_ERRNO_MAP(HTTP_ERRNO_GEN) +}; +#undef HTTP_ERRNO_GEN + + +/* Get an http_errno value from an http_parser */ +#define HTTP_PARSER_ERRNO(p) \ + ((enum http_errno) (((p)->state & 0x80) ? (p)->state & ~0x80 : 0)) + + +/* Get the line number that generated the current error */ +#if HTTP_PARSER_DEBUG +#define HTTP_PARSER_ERRNO_LINE(p) ((p)->error_lineno) +#else +#define HTTP_PARSER_ERRNO_LINE(p) 0 +#endif + + struct http_parser { /** PRIVATE **/ unsigned char type : 2; @@ -149,6 +225,10 @@ struct http_parser { */ char upgrade; +#if HTTP_PARSER_DEBUG + uint32_t error_lineno; +#endif + /** PUBLIC **/ void *data; /* A pointer to get hook to the "connection" or "socket" object */ }; @@ -186,7 +266,13 @@ size_t http_parser_execute(http_parser *parser, int http_should_keep_alive(http_parser *parser); /* Returns a string version of the HTTP method. */ -const char *http_method_str(enum http_method); +const char *http_method_str(enum http_method m); + +/* Return a string name of the given error */ +const char *http_errno_name(enum http_errno err); + +/* Return a string description of the given error */ +const char *http_errno_description(enum http_errno err); #ifdef __cplusplus } diff --git a/test.c b/test.c index 4d6e3384..35b3b098 100644 --- a/test.c +++ b/test.c @@ -1490,7 +1490,9 @@ upgrade_message_fix(char *body, const size_t nread, const size_t nmsgs, ...) { static void print_error (const char *raw, size_t error_location) { - fprintf(stderr, "\n*** parse error ***\n\n"); + fprintf(stderr, "\n*** %s:%d -- %s ***\n\n", + "http_parser.c", HTTP_PARSER_ERRNO_LINE(parser), + http_errno_description(HTTP_PARSER_ERRNO(parser))); int this_line = 0, char_len = 0; size_t i, j, len = strlen(raw), error_location_line = 0; @@ -1626,21 +1628,32 @@ test_message_count_body (const struct message *message) } void -test_simple (const char *buf, int should_pass) +test_simple (const char *buf, enum http_errno err_expected) { parser_init(HTTP_REQUEST); size_t parsed; int pass; + enum http_errno err; + parsed = parse(buf, strlen(buf)); pass = (parsed == strlen(buf)); + err = HTTP_PARSER_ERRNO(parser); parsed = parse(NULL, 0); pass &= (parsed == 0); parser_free(); - if (pass != should_pass) { - fprintf(stderr, "\n*** test_simple expected %s ***\n\n%s", should_pass ? "success" : "error", buf); + /* In strict mode, allow us to pass with an unexpected HPE_STRICT as + * long as the caller isn't expecting success. + */ +#if HTTP_PARSER_STRICT + if (err_expected != err && err_expected != HPE_OK && err != HPE_STRICT) { +#else + if (err_expected != err) { +#endif + fprintf(stderr, "\n*** test_simple expected %s, but saw %s ***\n\n%s\n", + http_errno_name(err_expected), http_errno_name(err), buf); exit(1); } } @@ -1657,10 +1670,14 @@ test_header_overflow_error (int req) assert(parsed == strlen(buf)); buf = "header-key: header-value\r\n"; + size_t buflen = strlen(buf); + int i; for (i = 0; i < 10000; i++) { - if (http_parser_execute(&parser, &settings_null, buf, strlen(buf)) != strlen(buf)) { + parsed = http_parser_execute(&parser, &settings_null, buf, buflen); + if (parsed != buflen) { //fprintf(stderr, "error found on iter %d\n", i); + assert(HTTP_PARSER_ERRNO(&parser) == HPE_HEADER_OVERFLOW); return; } } @@ -1996,13 +2013,13 @@ main (void) /// REQUESTS - test_simple("hello world", 0); - test_simple("GET / HTP/1.1\r\n\r\n", 0); + test_simple("hello world", HPE_INVALID_METHOD); + test_simple("GET / HTP/1.1\r\n\r\n", HPE_INVALID_VERSION); - test_simple("ASDF / HTTP/1.1\r\n\r\n", 0); - test_simple("PROPPATCHA / HTTP/1.1\r\n\r\n", 0); - test_simple("GETA / HTTP/1.1\r\n\r\n", 0); + test_simple("ASDF / HTTP/1.1\r\n\r\n", HPE_INVALID_METHOD); + test_simple("PROPPATCHA / HTTP/1.1\r\n\r\n", HPE_INVALID_METHOD); + test_simple("GETA / HTTP/1.1\r\n\r\n", HPE_INVALID_METHOD); // Well-formed but incomplete test_simple("GET / HTTP/1.1\r\n" @@ -2010,7 +2027,7 @@ main (void) "Content-Length: 6\r\n" "\r\n" "fooba", - 0); + HPE_OK); static const char *all_methods[] = { "DELETE", @@ -2033,7 +2050,7 @@ main (void) for (this_method = all_methods; *this_method; this_method++) { char buf[200]; sprintf(buf, "%s / HTTP/1.1\r\n\r\n", *this_method); - test_simple(buf, 1); + test_simple(buf, HPE_OK); } static const char *bad_methods[] = { @@ -2043,7 +2060,7 @@ main (void) for (this_method = bad_methods; *this_method; this_method++) { char buf[200]; sprintf(buf, "%s / HTTP/1.1\r\n\r\n", *this_method); - test_simple(buf, 0); + test_simple(buf, HPE_UNKNOWN); } const char *dumbfuck2 = @@ -2081,7 +2098,7 @@ main (void) "\tRA==\r\n" "\t-----END CERTIFICATE-----\r\n" "\r\n"; - test_simple(dumbfuck2, 1); + test_simple(dumbfuck2, HPE_OK); #if 0 // NOTE(Wed Nov 18 11:57:27 CET 2009) this seems okay. we just read body