From ef52728f7e7461cab37ee3cbc3ef9b0d7e328c65 Mon Sep 17 00:00:00 2001 From: Paul Duncan Date: Sun, 28 Aug 2016 02:45:06 -0400 Subject: add header value parser, fix hashing --- fhp.c | 216 ++++++++++++++++++++++++++++++++++++++++++++---------- hash.c | 6 +- include/fhp/fhp.h | 17 +++++ test.c | 1 + token.c | 3 + 5 files changed, 202 insertions(+), 41 deletions(-) diff --git a/fhp.c b/fhp.c index a2ba7a2..6048b2b 100644 --- a/fhp.c +++ b/fhp.c @@ -1,4 +1,114 @@ #include "internal.h" +#include + +// +// header value parser +// + +static fhp_err_t +fhp_header_value_parser_init(fhp_t * const fhp) { + uint32_t hash = fhp->header_name_hash; + fhp_header_value_parser_t parser = FHP_HEADER_VALUE_PARSER_NONE; + fhp_err_t err; + + /* fprintf( + stderr, + "hashes: header = %u, env = %u, str = %u, str(lc) = %u\n", + hash, + fhp->env->hashes[FHP_STR_TRANSFER_ENCODING], + fhp_hash_string("Transfer-Encoding"), + fhp_lc_hash_string("Transfer-Encoding") + ); */ + + if (hash == fhp->env->hashes[FHP_STR_TRANSFER_ENCODING]) { + // set parser type + parser = FHP_HEADER_VALUE_PARSER_TRANSFER_ENCODING; + + // init parser + if ((err = fhp_te_parser_init(&(fhp->parsers.te))) != FHP_OK) + return err; + } else if (hash == fhp->env->hashes[FHP_STR_CONTENT_LENGTH]) { + // set parser type + parser = FHP_HEADER_VALUE_PARSER_CONTENT_LENGTH; + } else { + // set default parser type + parser = FHP_HEADER_VALUE_PARSER_NONE; + } + + // set value parser + fhp->header_value_parser = parser; + + // return success + return FHP_OK; +} + +static fhp_err_t +fhp_header_value_parser_push( + fhp_t * const fhp, + uint8_t * const buf, + size_t len +) { + fhp_err_t r = FHP_OK; + + switch (fhp->header_value_parser) { + case FHP_HEADER_VALUE_PARSER_TRANSFER_ENCODING: + r = fhp_te_parser_push(&(fhp->parsers.te), buf, len); + break; + case FHP_HEADER_VALUE_PARSER_CONTENT_LENGTH: + // TODO + r = FHP_OK; + break; + default: + // do nothing + r = FHP_OK; + break; + } + + // return result + return r; +} + +static fhp_err_t +fhp_header_value_parser_done(fhp_t * const fhp) { + fhp_err_t r = FHP_OK; + + switch (fhp->header_value_parser) { + case FHP_HEADER_VALUE_PARSER_TRANSFER_ENCODING: + // finish parsing tes + if ((r = fhp_te_parser_done(&(fhp->parsers.te), &(fhp->num_tes))) != FHP_OK) + return r; + + // check number of tes + if (fhp->num_tes > FHP_MAX_TRANSFER_ENCODINGS) + return FHP_ERR_TOO_MANY_TES; + + // copy tes to context + if ((r = fhp_te_parser_get_tes(&(fhp->parsers.te), fhp->tes, FHP_MAX_TRANSFER_ENCODINGS)) != FHP_OK) + return r; + + // notify callback + if (!fhp->cb(fhp, FHP_TOKEN_HEADER_TRANSFER_ENCODING, 0, 0)) + return FHP_ERR_CB; + + break; + case FHP_HEADER_VALUE_PARSER_CONTENT_LENGTH: + // TODO + r = FHP_OK; + + break; + default: + // do nothing + r = FHP_OK; + + break; + } + + // clear header value parser + fhp->header_value_parser = FHP_HEADER_VALUE_PARSER_NONE; + + // return result + return r; +} // // context functions @@ -13,6 +123,7 @@ static const fhp_t FHP_DEFAULT_CONTEXT = { .buf_len = 0, .is_hashing = false, .header_name_hash = 0, + .header_value_parser = FHP_HEADER_VALUE_PARSER_NONE, .body_type = FHP_BODY_TYPE_NONE, .content_length = 0, .num_tes = 0, @@ -40,7 +151,7 @@ fhp_buf_clear(fhp_t * const fhp) { fhp->buf_len = 0; } -static bool +static fhp_err_t fhp_buf_flush( fhp_t * const fhp, fhp_token_t token @@ -48,21 +159,26 @@ fhp_buf_flush( if (fhp->buf_len > 0) { // push data if (!fhp->cb(fhp, token, fhp->buf, fhp->buf_len)) - return false; + return FHP_ERR_CB; // update buffer hash if (fhp->is_hashing) - fhp->buf_hash = fhp_hash_push(fhp->buf_hash, fhp->buf, fhp->buf_len); + fhp->buf_hash = fhp_lc_hash_push(fhp->buf_hash, fhp->buf, fhp->buf_len); + + // push to header value parser + fhp_err_t err; + if ((err = fhp_header_value_parser_push(fhp, fhp->buf, fhp->buf_len)) != FHP_OK) + return err; // clear buffer fhp_buf_clear(fhp); } // return success - return true; + return FHP_OK; } -static bool +static fhp_err_t fhp_buf_push( fhp_t * const fhp, fhp_token_t token, @@ -70,8 +186,9 @@ fhp_buf_push( ) { // flush buffer if (fhp->buf_len + 1 >= FHP_MAX_BUF_SIZE) { - if (!fhp_buf_flush(fhp, token)) - return false; + fhp_err_t err; + if ((err = fhp_buf_flush(fhp, token)) != FHP_OK) + return err; } // append to buffer @@ -79,7 +196,7 @@ fhp_buf_push( fhp->buf_len++; // return success - return true; + return FHP_OK; } static fhp_err_t @@ -158,6 +275,8 @@ fhp_push_byte( fhp_t * const fhp, uint8_t byte ) { + fhp_err_t err; + retry: switch (fhp->state) { case FHP_STATE_INIT: @@ -185,14 +304,14 @@ retry: switch (byte) { CASE_TOKEN_CHARS // add to buffer - if (!fhp_buf_push(fhp, FHP_TOKEN_METHOD_FRAGMENT, byte)) - return FHP_ERR_CB; + if ((err = fhp_buf_push(fhp, FHP_TOKEN_METHOD_FRAGMENT, byte)) != FHP_OK) + return err; break; case ' ': // flush buffer - if (!fhp_buf_flush(fhp, FHP_TOKEN_METHOD_FRAGMENT)) - return FHP_ERR_CB; + if ((err = fhp_buf_flush(fhp, FHP_TOKEN_METHOD_FRAGMENT)) != FHP_OK) + return err; // disable buffer hashing fhp->is_hashing = false; @@ -237,8 +356,8 @@ retry: switch (byte) { case ' ': // flush buffer - if (!fhp_buf_flush(fhp, FHP_TOKEN_URL_FRAGMENT)) - return FHP_ERR_CB; + if ((err = fhp_buf_flush(fhp, FHP_TOKEN_URL_FRAGMENT)) != FHP_OK) + return err; // send end token if (!fhp->cb(fhp, FHP_TOKEN_URL_END, 0, 0)) @@ -256,8 +375,8 @@ retry: break; CASE_URL_CHARS // add to buffer - if (!fhp_buf_push(fhp, FHP_TOKEN_URL_FRAGMENT, byte)) - return FHP_ERR_CB; + if ((err = fhp_buf_push(fhp, FHP_TOKEN_URL_FRAGMENT, byte)) != FHP_OK) + return err; } break; @@ -291,8 +410,8 @@ retry: } // add to buffer - if (!fhp_buf_push(fhp, FHP_TOKEN_URL_FRAGMENT, fhp->hex)) - return FHP_ERR_CB; + if ((err = fhp_buf_push(fhp, FHP_TOKEN_URL_FRAGMENT, fhp->hex)) != FHP_OK) + return err; // set state fhp->state = FHP_STATE_URL; @@ -322,8 +441,8 @@ retry: case '\r': case '\n': // flush buffer - if (!fhp_buf_flush(fhp, FHP_TOKEN_VERSION_FRAGMENT)) - return FHP_ERR_CB; + if ((err = fhp_buf_flush(fhp, FHP_TOKEN_VERSION_FRAGMENT)) != FHP_OK) + return err; // send end token if (!fhp->cb(fhp, FHP_TOKEN_VERSION_END, 0, 0)) @@ -351,8 +470,8 @@ retry: break; CASE_VERSION_CHARS // add to buffer - if (!fhp_buf_push(fhp, FHP_TOKEN_VERSION_FRAGMENT, byte)) - return FHP_ERR_CB; + if ((err = fhp_buf_push(fhp, FHP_TOKEN_VERSION_FRAGMENT, byte)) != FHP_OK) + return err; break; default: @@ -433,14 +552,14 @@ retry: switch (byte) { CASE_TOKEN_CHARS // add to buffer - if (!fhp_buf_push(fhp, FHP_TOKEN_HEADER_NAME_FRAGMENT, byte)) - return FHP_ERR_CB; + if ((err = fhp_buf_push(fhp, FHP_TOKEN_HEADER_NAME_FRAGMENT, byte)) != FHP_OK) + return err; break; case ':': // flush buffer - if (!fhp_buf_flush(fhp, FHP_TOKEN_HEADER_NAME_FRAGMENT)) - return FHP_ERR_CB; + if ((err = fhp_buf_flush(fhp, FHP_TOKEN_HEADER_NAME_FRAGMENT)) != FHP_OK) + return err; // disable buffer hashing and cache header name hash fhp->is_hashing = false; @@ -475,6 +594,11 @@ retry: if (!fhp->cb(fhp, FHP_TOKEN_HEADER_VALUE_START, 0, 0)) return FHP_ERR_CB; + // init header value parser + fhp_err_t err; + if ((err = fhp_header_value_parser_init(fhp)) != FHP_OK) + return err; + // set state fhp->state = FHP_STATE_HEADER_VALUE; goto retry; @@ -496,8 +620,8 @@ retry: // FIXME: need more limits on valid octets // add to buffer - if (!fhp_buf_push(fhp, FHP_TOKEN_HEADER_VALUE_FRAGMENT, byte)) - return FHP_ERR_CB; + if ((err = fhp_buf_push(fhp, FHP_TOKEN_HEADER_VALUE_FRAGMENT, byte)) != FHP_OK) + return err; break; } @@ -521,8 +645,8 @@ retry: // add space to buffer // folding to ' ', as per RFC7230 3.2.4 // https://tools.ietf.org/html/rfc7230#section-3.2.4 - if (!fhp_buf_push(fhp, FHP_TOKEN_HEADER_VALUE_FRAGMENT, ' ')) - return FHP_ERR_CB; + if ((err = fhp_buf_push(fhp, FHP_TOKEN_HEADER_VALUE_FRAGMENT, ' ')) != FHP_OK) + return err; // set state fhp->state = FHP_STATE_HEADER_VALUE; @@ -530,20 +654,28 @@ retry: break; CASE_TOKEN_CHARS // flush buffer - if (!fhp_buf_flush(fhp, FHP_TOKEN_HEADER_VALUE_FRAGMENT)) - return FHP_ERR_CB; + if ((err = fhp_buf_flush(fhp, FHP_TOKEN_HEADER_VALUE_FRAGMENT)) != FHP_OK) + return err; // end header value if (!fhp->cb(fhp, FHP_TOKEN_HEADER_VALUE_END, 0, 0)) return FHP_ERR_CB; + // end header value parser + if ((err = fhp_header_value_parser_done(fhp)) != FHP_OK) + return err; + // send start token if (!fhp->cb(fhp, FHP_TOKEN_HEADER_NAME_START, 0, 0)) return FHP_ERR_CB; + // enable buffer hashing + fhp->is_hashing = true; + fhp->buf_hash = fhp_hash_init(); + // add to buffer - if (!fhp_buf_push(fhp, FHP_TOKEN_HEADER_NAME_FRAGMENT, byte)) - return FHP_ERR_CB; + if ((err = fhp_buf_push(fhp, FHP_TOKEN_HEADER_NAME_FRAGMENT, byte)) != FHP_OK) + return err; // set state fhp->state = FHP_STATE_HEADER_NAME; @@ -551,26 +683,34 @@ retry: break; case '\r': // flush buffer - if (!fhp_buf_flush(fhp, FHP_TOKEN_HEADER_VALUE_FRAGMENT)) - return FHP_ERR_CB; + if ((err = fhp_buf_flush(fhp, FHP_TOKEN_HEADER_VALUE_FRAGMENT)) != FHP_OK) + return err; // end header value if (!fhp->cb(fhp, FHP_TOKEN_HEADER_VALUE_END, 0, 0)) return FHP_ERR_CB; + // end header value parser + if ((err = fhp_header_value_parser_done(fhp)) != FHP_OK) + return err; + // set state fhp->state = FHP_STATE_HEADERS_END_CR; break; case '\n': // flush buffer - if (!fhp_buf_flush(fhp, FHP_TOKEN_HEADER_VALUE_FRAGMENT)) - return FHP_ERR_CB; + if ((err = fhp_buf_flush(fhp, FHP_TOKEN_HEADER_VALUE_FRAGMENT)) != FHP_OK) + return err; // end header value if (!fhp->cb(fhp, FHP_TOKEN_HEADER_VALUE_END, 0, 0)) return FHP_ERR_CB; + // end header value parser + if ((err = fhp_header_value_parser_done(fhp)) != FHP_OK) + return err; + // set state fhp->state = FHP_STATE_HEADERS_END; diff --git a/hash.c b/hash.c index c35590e..e4bd7c2 100644 --- a/hash.c +++ b/hash.c @@ -13,7 +13,7 @@ fhp_hash_init(void) { uint32_t fhp_hash_push(uint32_t hash, uint8_t * const buf, size_t len) { for (size_t i = 0; i < len; i++) - hash = ((hash << 5) + hash) + buf[len]; + hash = ((hash << 5) + hash) + buf[i]; return hash; } @@ -31,9 +31,9 @@ fhp_lc_hash_push( size_t len ) { for (size_t i = 0; i < len; i++) { - uint8_t c = buf[len]; + uint8_t c = buf[i]; - if (c >= 'A' && c <= 'Z') + if ((c >= 'A') && (c <= 'Z')) c = (c - 'A') + 'a'; hash = ((hash << 5) + hash) + c; diff --git a/include/fhp/fhp.h b/include/fhp/fhp.h index 5ca48b7..46efb44 100644 --- a/include/fhp/fhp.h +++ b/include/fhp/fhp.h @@ -84,6 +84,9 @@ typedef enum { FHP_TOKEN_HEADER_VALUE_FRAGMENT, FHP_TOKEN_HEADER_VALUE_END, + FHP_TOKEN_HEADER_TRANSFER_ENCODING, + FHP_TOKEN_HEADER_CONTENT_LENGTH, + FHP_TOKEN_LAST } fhp_token_t; @@ -191,6 +194,13 @@ typedef enum { FHP_STATE_LAST } fhp_state_t; +typedef enum { + FHP_HEADER_VALUE_PARSER_NONE, + FHP_HEADER_VALUE_PARSER_TRANSFER_ENCODING, + FHP_HEADER_VALUE_PARSER_CONTENT_LENGTH, + FHP_HEADER_VALUE_PARSER_LAST +} fhp_header_value_parser_t; + #define FHP_MAX_BUF_SIZE 1024 #define FHP_MAX_TRANSFER_ENCODINGS 4 @@ -227,6 +237,13 @@ struct fhp_t_ { // hash of last header name uint32_t header_name_hash; + // header value parser + fhp_header_value_parser_t header_value_parser; + + union { + fhp_te_parser_t te; + } parsers; + // request body type fhp_body_type_t body_type; diff --git a/test.c b/test.c index b1ca674..42e6f78 100644 --- a/test.c +++ b/test.c @@ -32,6 +32,7 @@ basic_str = "GET / HTTP/1.1\r\n" "Host: pablotron.org\r\n" "Connection: close\r\n" + "Transfer-Encoding: deflate, chunked\r\n" "\r\n"; static bool diff --git a/token.c b/token.c index 4e5dbfc..652cdba 100644 --- a/token.c +++ b/token.c @@ -38,6 +38,9 @@ fhp_tokens[] = { "HEADER_VALUE_FRAGMENT", "HEADER_VALUE_END", + "HEADER_TRANSFER_ENCODING", + "HEADER_CONTENT_LENGTH", + "LAST" }; -- cgit v1.2.3