From 19653e153b70e72e9a75e74655c8e56025ce4d61 Mon Sep 17 00:00:00 2001 From: Paul Duncan Date: Sat, 27 Aug 2016 14:39:23 -0400 Subject: hash http method and version and add tokens for fast parsing --- fhp.c | 198 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 196 insertions(+), 2 deletions(-) (limited to 'fhp.c') diff --git a/fhp.c b/fhp.c index 416d054..be4b7f3 100644 --- a/fhp.c +++ b/fhp.c @@ -170,6 +170,58 @@ case ' ': \ case '\t': +// +// hash functions (djb2) +// (see http://aras-p.info/blog/2016/08/02/Hash-Functions-all-the-way-down/) +// + +uint32_t +fhp_hash_init(void) { + return 5381; +} + +uint32_t +fhp_hash_push(uint32_t hash, uint8_t * const buf, size_t len) { + for (size_t i = 0; i < len; i++) + hash = ((hash << 5) + hash) + buf[len]; + + return hash; +} + +uint32_t +fhp_hash_string(char * const str) { + uint32_t r = fhp_hash_init(); + return fhp_hash_push(r, (uint8_t*) str, strlen(str)); +} + +uint32_t +fhp_lc_hash_push( + uint32_t hash, + uint8_t * const buf, + size_t len +) { + for (size_t i = 0; i < len; i++) { + uint8_t c = buf[len]; + + if (c >= 'A' && c <= 'Z') + c = (c - 'A') + 'a'; + + hash = ((hash << 5) + hash) + c; + } + + return hash; +} + +uint32_t +fhp_lc_hash_string(char * const str) { + uint32_t r = fhp_hash_init(); + return fhp_lc_hash_push(r, (uint8_t*) str, strlen(str)); +} + +// +// error functions +// + static const char * fhp_errors[] = { "OK", @@ -208,12 +260,24 @@ fhp_strerror( return FHP_OK; } +// +// token functions +// + static const char * fhp_tokens[] = { "METHOD_START", "METHOD_FRAGMENT", "METHOD_END", + "METHOD_GET", + "METHOD_POST", + "METHOD_HEAD", + "METHOD_PUT", + "METHOD_DELETE", + "METHOD_OPTIONS", + "METHOD_OTHER", + "URL_START", "URL_FRAGMENT", "URL_END", @@ -222,6 +286,10 @@ fhp_tokens[] = { "VERSION_FRAGMENT", "VERSION_END", + "VERSION_HTTP_10", + "VERSION_HTTP_11", + "VERSION_OTHER", + "HEADER_NAME_START", "HEADER_NAME_FRAGMENT", "HEADER_NAME_END", @@ -255,22 +323,92 @@ fhp_strtoken( return FHP_OK; } -static fhp_t DEFAULT_CONTEXT = { +// +// string functions +// + +static char * const +fhp_strings[] = { + "GET", + "POST", + "HEAD", + "PUT", + "DELETE", + "OPTIONS", + "HTTP/1.0", + "HTTP/1.1", + "content-length", + "transfer-encoding", + "gzip", + "x-gzip", + "deflate", + "x-deflate", + "chunked", + NULL +}; + +typedef enum { + FHP_STR_GET, + FHP_STR_POST, + FHP_STR_HEAD, + FHP_STR_PUT, + FHP_STR_DELETE, + FHP_STR_OPTIONS, + FHP_STR_HTTP_10, + FHP_STR_HTTP_11, + FHP_STR_CONTENT_LENGTH, + FHP_STR_TRANSFER_ENCODING, + FHP_STR_GZIP, + FHP_STR_X_GZIP, + FHP_STR_DEFLATE, + FHP_STR_X_DEFLATE, + FHP_STR_CHUNKED, + FHP_STR_LAST +} fhp_str_t; + +void +fhp_env_init(fhp_env_t * const env) { + for (size_t i = 0; i < FHP_STR_LAST; i++) + env->hashes[i] = fhp_lc_hash_string(fhp_strings[i]); +} + +static fhp_env_t fhp_default_env; + +fhp_env_t * +fhp_get_default_env(void) { + static fhp_env_t *r = NULL; + + if (!r) { + r = &fhp_default_env; + fhp_env_init(r); + } + + return r; +} + +// +// context functions +// + +static const fhp_t DEFAULT_CONTEXT = { .state = FHP_STATE_INIT, .user_data = NULL, .cb = NULL, .err = FHP_OK, .ofs = 0, .buf_len = 0, + .is_hashing = false, }; fhp_err_t fhp_init( fhp_t * const fhp, + fhp_env_t * const env, fhp_cb_t cb, void * const user_data ) { *fhp = DEFAULT_CONTEXT; + fhp->env = env ? env : fhp_get_default_env(); fhp->user_data = user_data; fhp->cb = cb; @@ -294,6 +432,10 @@ fhp_buf_flush( if (!fhp->cb(fhp, token, fhp->buf, fhp->buf_len)) return false; + // update buffer hash + if (fhp->is_hashing) + fhp->buf_hash = fhp_hash_push(fhp->buf_hash, fhp->buf, fhp->buf_len); + // clear buffer fhp_buf_clear(fhp); } @@ -336,6 +478,10 @@ retry: if (!fhp->cb(fhp, FHP_TOKEN_METHOD_START, 0, 0)) return FHP_ERR_CB; + // enable buffer hashing + fhp->is_hashing = true; + fhp->buf_hash = fhp_hash_init(); + // set state fhp->state = FHP_STATE_METHOD; goto retry; @@ -359,10 +505,33 @@ retry: if (!fhp_buf_flush(fhp, FHP_TOKEN_METHOD_FRAGMENT)) return FHP_ERR_CB; + // disable buffer hashing + fhp->is_hashing = false; + // send end token if (!fhp->cb(fhp, FHP_TOKEN_METHOD_END, 0, 0)) return FHP_ERR_CB; + // get method token + fhp->http_method = FHP_TOKEN_METHOD_OTHER; + if (fhp->buf_hash == fhp->env->hashes[FHP_STR_GET]) { + fhp->http_method = FHP_TOKEN_METHOD_GET; + } else if (fhp->buf_hash == fhp->env->hashes[FHP_STR_POST]) { + fhp->http_method = FHP_TOKEN_METHOD_POST; + } else if (fhp->buf_hash == fhp->env->hashes[FHP_STR_HEAD]) { + fhp->http_method = FHP_TOKEN_METHOD_HEAD; + } else if (fhp->buf_hash == fhp->env->hashes[FHP_STR_PUT]) { + fhp->http_method = FHP_TOKEN_METHOD_PUT; + } else if (fhp->buf_hash == fhp->env->hashes[FHP_STR_DELETE]) { + fhp->http_method = FHP_TOKEN_METHOD_DELETE; + } else if (fhp->buf_hash == fhp->env->hashes[FHP_STR_OPTIONS]) { + fhp->http_method = FHP_TOKEN_METHOD_OPTIONS; + } + + // send method token + if (!fhp->cb(fhp, fhp->http_method, 0, 0)) + return FHP_ERR_CB; + // set state fhp->state = FHP_STATE_METHOD_END; goto retry; @@ -464,6 +633,11 @@ retry: if (!fhp->cb(fhp, FHP_TOKEN_VERSION_START, 0, 0)) return FHP_ERR_CB; + // enable buffer hashing + fhp->is_hashing = true; + fhp->buf_hash = fhp_hash_init(); + + // set state fhp->state = FHP_STATE_VERSION; goto retry; } @@ -481,6 +655,21 @@ retry: if (!fhp->cb(fhp, FHP_TOKEN_VERSION_END, 0, 0)) return FHP_ERR_CB; + // disable buffer hashing + fhp->is_hashing = false; + + // get version token + fhp->http_version = FHP_TOKEN_VERSION_OTHER; + if (fhp->buf_hash == fhp->env->hashes[FHP_STR_HTTP_10]) { + fhp->http_version = FHP_TOKEN_VERSION_HTTP_10; + } else if (fhp->buf_hash == fhp->env->hashes[FHP_STR_HTTP_11]) { + fhp->http_version = FHP_TOKEN_VERSION_HTTP_11; + } + + // send version token + if (!fhp->cb(fhp, fhp->http_version, 0, 0)) + return FHP_ERR_CB; + // set state fhp->state = FHP_STATE_VERSION_END; goto retry; @@ -761,7 +950,12 @@ fhp_push( return FHP_OK; } +fhp_env_t * +fhp_get_env(fhp_t * const fhp) { + return fhp->env; +} + void * -fhp_user_data(fhp_t * const fhp) { +fhp_get_user_data(fhp_t * const fhp) { return fhp->user_data; } -- cgit v1.2.3