From 845ddb43da8aa2a8c80a9d9638d63386ddf97d7e Mon Sep 17 00:00:00 2001 From: Paul Duncan Date: Sun, 28 Aug 2016 20:32:55 -0400 Subject: add content-length handling and partial chunked transfer-encoding support (still need footers and compression) --- TODO | 13 +++ ctx.c | 233 ++++++++++++++++++++++++++++++++++++++++++++++++++ error.c | 4 + header-value-parser.c | 2 +- include/fhp/fhp.h | 31 +++++++ internal.h | 10 ++- test.c | 6 +- token.c | 13 +++ 8 files changed, 306 insertions(+), 6 deletions(-) create mode 100644 TODO diff --git a/TODO b/TODO new file mode 100644 index 0000000..514ac79 --- /dev/null +++ b/TODO @@ -0,0 +1,13 @@ +TODO +[ ] tokens + [ ] TOKEN_CONNECTION_START + [ ] TOKEN_CONNECTION_END + [ ] TOKEN_REQUEST_START +[ ] content-types: + [ ] x-form-url-encoded + [ ] multipart/form-data +[ ] transfer-encodings + [-] chunked + [ ] gzip + [ ] deflate + [ ] compress diff --git a/ctx.c b/ctx.c index 405223f..d2d7245 100644 --- a/ctx.c +++ b/ctx.c @@ -391,6 +391,7 @@ retry: case '\n': // set state ctx->state = FHP_STATE_HEADERS_END; + goto retry; break; } @@ -401,6 +402,7 @@ retry: case '\n': // set state ctx->state = FHP_STATE_HEADERS_END; + goto retry; break; default: @@ -568,6 +570,7 @@ retry: // set state ctx->state = FHP_STATE_HEADERS_END; + goto retry; break; default: @@ -579,6 +582,7 @@ retry: switch (byte) { case '\n': ctx->state = FHP_STATE_HEADERS_END; + goto retry; break; default: @@ -587,7 +591,236 @@ retry: break; case FHP_STATE_HEADERS_END: + // send end headers token + if (!ctx->cb(ctx, FHP_TOKEN_HEADERS_END, 0, 0)) + return FHP_ERR_CB; + + switch (ctx->body_type) { + case FHP_BODY_TYPE_NONE: + // no body + + // send request end token + if (!ctx->cb(ctx, FHP_TOKEN_REQUEST_END, 0, 0)) + return FHP_ERR_CB; + + // set state + ctx->state = FHP_STATE_REQUEST_END; + + break; + case FHP_BODY_TYPE_CONTENT_LENGTH: + // send body start token + if (!ctx->cb(ctx, FHP_TOKEN_BODY_START, 0, 0)) + return FHP_ERR_CB; + + if (ctx->content_length > 0) { + // grab content length and clear buffer + ctx->bytes_left = ctx->content_length; + ctx->buf_len = 0; + + // set state + ctx->state = FHP_STATE_CL_BODY; + } else { + // empty body + + // send body end token + if (!ctx->cb(ctx, FHP_TOKEN_BODY_END, 0, 0)) + return FHP_ERR_CB; + + // send request end token + if (!ctx->cb(ctx, FHP_TOKEN_REQUEST_END, 0, 0)) + return FHP_ERR_CB; + + // set state + ctx->state = FHP_STATE_REQUEST_END; + } + + break; + case FHP_BODY_TYPE_TRANSFER_ENCODING: + // send body start token + if (!ctx->cb(ctx, FHP_TOKEN_BODY_START, 0, 0)) + return FHP_ERR_CB; + + // clear chunk size + ctx->chunk_len = 0; + + // set state + ctx->state = FHP_STATE_CHUNK_LEN; + + break; + default: + // invalid body type (bug?) + return FHP_ERR_INVALID_BODY_TYPE; + } + + break; + case FHP_STATE_CL_BODY: + // add to buffer + if ((err = fhp_ctx_buf_push(ctx, FHP_TOKEN_BODY_FRAGMENT, byte)) != FHP_OK) + return err; + + // decriment remaining bytes + ctx->bytes_left--; + + if (!ctx->bytes_left) { + // flush buffer + if ((err = fhp_ctx_buf_flush(ctx, FHP_TOKEN_BODY_FRAGMENT)) != FHP_OK) + return err; + + // send body end token + if (!ctx->cb(ctx, FHP_TOKEN_BODY_END, 0, 0)) + return FHP_ERR_CB; + + // send request end token + if (!ctx->cb(ctx, FHP_TOKEN_REQUEST_END, 0, 0)) + return FHP_ERR_CB; + + // set state + ctx->state = FHP_STATE_REQUEST_END; + } + + break; + case FHP_STATE_CHUNK_LEN: + { + uint64_t old_chunk_len = ctx->chunk_len; + + switch (byte) { + case '\r': + // set state + ctx->state = FHP_STATE_CHUNK_LEN_CR; + + break; + case '\n': + // set state + ctx->state = FHP_STATE_CHUNK_LEN_CR; + goto retry; + + break; + CASE_DIGIT_CHARS + // update chunk length + ctx->chunk_len = (ctx->chunk_len << 4) + (byte - '0'); + + break; + CASE_HEX_LC_ALPHA_CHARS + // update chunk length + ctx->chunk_len = (ctx->chunk_len << 4) + (byte - 'a'); + + break; + CASE_HEX_UC_ALPHA_CHARS + // update chunk length + ctx->chunk_len = (ctx->chunk_len << 4) + (byte - 'A'); + + break; + default: + // invalid character + return FHP_ERR_INVALID_CHAR_IN_CHUNK_LEN; + } + + // check for overflow + if (ctx->chunk_len < old_chunk_len) { + // overflow in chunk length + return FHP_ERR_CHUNK_LEN_OVERFLOW; + } + } + + break; + case FHP_STATE_CHUNK_LEN_CR: + switch (byte) { + case '\n': + // TODO: check chunk len + + if (ctx->chunk_len > 0) { + ctx->bytes_left = ctx->chunk_len; + ctx->buf_len = 0; + + // send chunk start token + if (!ctx->cb(ctx, FHP_TOKEN_CHUNK_START, 0, 0)) + return FHP_ERR_CB; + + // set state + ctx->state = FHP_STATE_CHUNK_BODY; + } else { + // last chunk + + // send chunk start token + if (!ctx->cb(ctx, FHP_TOKEN_CHUNK_LAST, 0, 0)) + return FHP_ERR_CB; + + // set state + ctx->state = FHP_STATE_TE_FOOTER; + } + + break; + default: + return FHP_ERR_INVALID_CHAR_AFTER_CHUNK_LEN; + } + + break; + case FHP_STATE_CHUNK_BODY: + // add to buffer + if ((err = fhp_ctx_buf_push(ctx, FHP_TOKEN_BODY_FRAGMENT, byte)) != FHP_OK) + return err; + + // decriment remaining bytes + ctx->bytes_left--; + + if (!ctx->bytes_left) { + // flush buffer + if ((err = fhp_ctx_buf_flush(ctx, FHP_TOKEN_BODY_FRAGMENT)) != FHP_OK) + return err; + + // send body end token + if (!ctx->cb(ctx, FHP_TOKEN_CHUNK_END, 0, 0)) + return FHP_ERR_CB; + + // set state + ctx->state = FHP_STATE_CHUNK_BODY_END; + } + break; + case FHP_STATE_CHUNK_BODY_END: + switch (byte) { + case '\r': + // set state + ctx->state = FHP_STATE_CHUNK_BODY_END_CR; + + break; + case '\n': + // set state + ctx->state = FHP_STATE_CHUNK_BODY_END_CR; + goto retry; + + break; + default: + return FHP_ERR_INVALID_CHAR_AFTER_CHUNK_BODY; + } + + break; + case FHP_STATE_CHUNK_BODY_END_CR: + switch (byte) { + case '\n': + // clear chunk len, set state + ctx->chunk_len = 0; + ctx->state = FHP_STATE_CHUNK_LEN; + + break; + default: + return FHP_ERR_INVALID_CHAR_AFTER_CHUNK_BODY; + } + + break; + case FHP_STATE_TE_FOOTER: // TODO + break; + case FHP_STATE_REQUEST_END: + switch (byte) { + case '\r': + case '\n': + // eat newlines + break; + default: + // set state + ctx->state = FHP_STATE_INIT; + goto retry; + } break; default: diff --git a/error.c b/error.c index 9850de8..1e83555 100644 --- a/error.c +++ b/error.c @@ -32,6 +32,10 @@ fhp_errors[] = { "content-length parser already done", "bad content-length parser state", "overflow in content-length header value", + "invalid character in chunk length value", + "invalid character after chunk length value", + "overflow in chunk lenth value", + "invalid character after chunk body", }; fhp_err_t diff --git a/header-value-parser.c b/header-value-parser.c index 2a1bf74..e3c4107 100644 --- a/header-value-parser.c +++ b/header-value-parser.c @@ -128,7 +128,7 @@ fhp_header_value_parser_done(fhp_ctx_t * const ctx) { switch (ctx->body_type) { case FHP_BODY_TYPE_NONE: // set body type - ctx->body_type = FHP_BODY_TYPE_TRANSFER_ENCODING; + ctx->body_type = FHP_BODY_TYPE_CONTENT_LENGTH; break; case FHP_BODY_TYPE_CONTENT_LENGTH: diff --git a/include/fhp/fhp.h b/include/fhp/fhp.h index 639eb0f..3aa020d 100644 --- a/include/fhp/fhp.h +++ b/include/fhp/fhp.h @@ -47,6 +47,10 @@ typedef enum { FHP_ERR_CL_PARSER_DONE, FHP_ERR_BAD_CL_STATE, FHP_ERR_CL_OVERFLOW, + FHP_ERR_INVALID_CHAR_IN_CHUNK_LEN, + FHP_ERR_INVALID_CHAR_AFTER_CHUNK_LEN, + FHP_ERR_CHUNK_LEN_OVERFLOW, + FHP_ERR_INVALID_CHAR_AFTER_CHUNK_BODY, FHP_ERR_LAST } fhp_err_t; @@ -93,6 +97,19 @@ typedef enum { FHP_TOKEN_HEADER_TRANSFER_ENCODING, FHP_TOKEN_HEADER_CONTENT_LENGTH, + FHP_TOKEN_HEADERS_END, + + FHP_TOKEN_BODY_NONE, + FHP_TOKEN_BODY_START, + FHP_TOKEN_BODY_FRAGMENT, + FHP_TOKEN_BODY_END, + + FHP_TOKEN_CHUNK_START, + FHP_TOKEN_CHUNK_END, + FHP_TOKEN_CHUNK_LAST, + + FHP_TOKEN_REQUEST_END, + FHP_TOKEN_LAST } fhp_token_t; @@ -225,6 +242,14 @@ typedef enum { FHP_STATE_HEADER_VALUE_END, FHP_STATE_HEADERS_END, FHP_STATE_HEADERS_END_CR, + FHP_STATE_CL_BODY, + FHP_STATE_CHUNK_LEN, + FHP_STATE_CHUNK_LEN_CR, + FHP_STATE_CHUNK_BODY, + FHP_STATE_CHUNK_BODY_END, + FHP_STATE_CHUNK_BODY_END_CR, + FHP_STATE_TE_FOOTER, + FHP_STATE_REQUEST_END, FHP_STATE_LAST } fhp_state_t; @@ -291,6 +316,12 @@ struct fhp_ctx_t_ { // state for url hex decoder uint32_t hex; + + // state for chunk length decoder + uint64_t chunk_len; + + // bytes remaining for body (content-length) or current chunk (chunked) + uint64_t bytes_left; }; fhp_err_t diff --git a/internal.h b/internal.h index 994b208..1e2733a 100644 --- a/internal.h +++ b/internal.h @@ -79,13 +79,15 @@ case '8': \ case '9': -#define CASE_HEX_ALPHA_CHARS \ +#define CASE_HEX_LC_ALPHA_CHARS \ case 'a': \ case 'b': \ case 'c': \ case 'd': \ case 'e': \ - case 'f': \ + case 'f': + +#define CASE_HEX_UC_ALPHA_CHARS \ case 'A': \ case 'B': \ case 'C': \ @@ -93,6 +95,10 @@ case 'E': \ case 'F': +#define CASE_HEX_ALPHA_CHARS \ + CASE_HEX_LC_ALPHA_CHARS \ + CASE_HEX_UC_ALPHA_CHARS \ + // // rfc7230, Appendix B // https://tools.ietf.org/html/rfc7230 diff --git a/test.c b/test.c index a2ccb0a..7bcb360 100644 --- a/test.c +++ b/test.c @@ -32,9 +32,9 @@ basic_str = "GET / HTTP/1.1\r\n" "Host: pablotron.org\r\n" "Connection: close\r\n" - "Content-Length: 123456\r\n" - "Transfer-Encoding: deflate, chunked\r\n" - "\r\n"; + "Content-Length: 15\r\n" + "\r\n" + "hello test test"; static bool basic_cb( diff --git a/token.c b/token.c index 652cdba..721740d 100644 --- a/token.c +++ b/token.c @@ -41,6 +41,19 @@ fhp_tokens[] = { "HEADER_TRANSFER_ENCODING", "HEADER_CONTENT_LENGTH", + "HEADERS_END", + + "BODY_NONE", + "BODY_START", + "BODY_FRAGMENT", + "BODY_END", + + "CHUNK_START", + "CHUNK_END", + "CHUNK_LAST", + + "REQUEST_END", + "LAST" }; -- cgit v1.2.3