From 19653e153b70e72e9a75e74655c8e56025ce4d61 Mon Sep 17 00:00:00 2001 From: Paul Duncan Date: Sat, 27 Aug 2016 14:39:23 -0400 Subject: hash http method and version and add tokens for fast parsing --- Makefile | 4 +- fhp.c | 198 +++++++++++++++++++++++++++++++++++++++++++++++++++++- include/fhp/fhp.h | 71 ++++++++++++++++++-- test.c | 6 +- 4 files changed, 267 insertions(+), 12 deletions(-) diff --git a/Makefile b/Makefile index fc81c8b..e80b70a 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,8 @@ CC ?= cc CFLAGS=-std=c99 -W -Wall -pedantic -O2 -Iinclude -fPIC OBJS=fhp.o -SONAME=libfhp.so.1 -LIB=libfhp.so.1 +SONAME=libfhp.so +LIB=libfhp.so TEST_OBJS=test.o TEST_APP=./fhp-test diff --git a/fhp.c b/fhp.c index 416d054..be4b7f3 100644 --- a/fhp.c +++ b/fhp.c @@ -170,6 +170,58 @@ case ' ': \ case '\t': +// +// hash functions (djb2) +// (see http://aras-p.info/blog/2016/08/02/Hash-Functions-all-the-way-down/) +// + +uint32_t +fhp_hash_init(void) { + return 5381; +} + +uint32_t +fhp_hash_push(uint32_t hash, uint8_t * const buf, size_t len) { + for (size_t i = 0; i < len; i++) + hash = ((hash << 5) + hash) + buf[len]; + + return hash; +} + +uint32_t +fhp_hash_string(char * const str) { + uint32_t r = fhp_hash_init(); + return fhp_hash_push(r, (uint8_t*) str, strlen(str)); +} + +uint32_t +fhp_lc_hash_push( + uint32_t hash, + uint8_t * const buf, + size_t len +) { + for (size_t i = 0; i < len; i++) { + uint8_t c = buf[len]; + + if (c >= 'A' && c <= 'Z') + c = (c - 'A') + 'a'; + + hash = ((hash << 5) + hash) + c; + } + + return hash; +} + +uint32_t +fhp_lc_hash_string(char * const str) { + uint32_t r = fhp_hash_init(); + return fhp_lc_hash_push(r, (uint8_t*) str, strlen(str)); +} + +// +// error functions +// + static const char * fhp_errors[] = { "OK", @@ -208,12 +260,24 @@ fhp_strerror( return FHP_OK; } +// +// token functions +// + static const char * fhp_tokens[] = { "METHOD_START", "METHOD_FRAGMENT", "METHOD_END", + "METHOD_GET", + "METHOD_POST", + "METHOD_HEAD", + "METHOD_PUT", + "METHOD_DELETE", + "METHOD_OPTIONS", + "METHOD_OTHER", + "URL_START", "URL_FRAGMENT", "URL_END", @@ -222,6 +286,10 @@ fhp_tokens[] = { "VERSION_FRAGMENT", "VERSION_END", + "VERSION_HTTP_10", + "VERSION_HTTP_11", + "VERSION_OTHER", + "HEADER_NAME_START", "HEADER_NAME_FRAGMENT", "HEADER_NAME_END", @@ -255,22 +323,92 @@ fhp_strtoken( return FHP_OK; } -static fhp_t DEFAULT_CONTEXT = { +// +// string functions +// + +static char * const +fhp_strings[] = { + "GET", + "POST", + "HEAD", + "PUT", + "DELETE", + "OPTIONS", + "HTTP/1.0", + "HTTP/1.1", + "content-length", + "transfer-encoding", + "gzip", + "x-gzip", + "deflate", + "x-deflate", + "chunked", + NULL +}; + +typedef enum { + FHP_STR_GET, + FHP_STR_POST, + FHP_STR_HEAD, + FHP_STR_PUT, + FHP_STR_DELETE, + FHP_STR_OPTIONS, + FHP_STR_HTTP_10, + FHP_STR_HTTP_11, + FHP_STR_CONTENT_LENGTH, + FHP_STR_TRANSFER_ENCODING, + FHP_STR_GZIP, + FHP_STR_X_GZIP, + FHP_STR_DEFLATE, + FHP_STR_X_DEFLATE, + FHP_STR_CHUNKED, + FHP_STR_LAST +} fhp_str_t; + +void +fhp_env_init(fhp_env_t * const env) { + for (size_t i = 0; i < FHP_STR_LAST; i++) + env->hashes[i] = fhp_lc_hash_string(fhp_strings[i]); +} + +static fhp_env_t fhp_default_env; + +fhp_env_t * +fhp_get_default_env(void) { + static fhp_env_t *r = NULL; + + if (!r) { + r = &fhp_default_env; + fhp_env_init(r); + } + + return r; +} + +// +// context functions +// + +static const fhp_t DEFAULT_CONTEXT = { .state = FHP_STATE_INIT, .user_data = NULL, .cb = NULL, .err = FHP_OK, .ofs = 0, .buf_len = 0, + .is_hashing = false, }; fhp_err_t fhp_init( fhp_t * const fhp, + fhp_env_t * const env, fhp_cb_t cb, void * const user_data ) { *fhp = DEFAULT_CONTEXT; + fhp->env = env ? env : fhp_get_default_env(); fhp->user_data = user_data; fhp->cb = cb; @@ -294,6 +432,10 @@ fhp_buf_flush( if (!fhp->cb(fhp, token, fhp->buf, fhp->buf_len)) return false; + // update buffer hash + if (fhp->is_hashing) + fhp->buf_hash = fhp_hash_push(fhp->buf_hash, fhp->buf, fhp->buf_len); + // clear buffer fhp_buf_clear(fhp); } @@ -336,6 +478,10 @@ retry: if (!fhp->cb(fhp, FHP_TOKEN_METHOD_START, 0, 0)) return FHP_ERR_CB; + // enable buffer hashing + fhp->is_hashing = true; + fhp->buf_hash = fhp_hash_init(); + // set state fhp->state = FHP_STATE_METHOD; goto retry; @@ -359,10 +505,33 @@ retry: if (!fhp_buf_flush(fhp, FHP_TOKEN_METHOD_FRAGMENT)) return FHP_ERR_CB; + // disable buffer hashing + fhp->is_hashing = false; + // send end token if (!fhp->cb(fhp, FHP_TOKEN_METHOD_END, 0, 0)) return FHP_ERR_CB; + // get method token + fhp->http_method = FHP_TOKEN_METHOD_OTHER; + if (fhp->buf_hash == fhp->env->hashes[FHP_STR_GET]) { + fhp->http_method = FHP_TOKEN_METHOD_GET; + } else if (fhp->buf_hash == fhp->env->hashes[FHP_STR_POST]) { + fhp->http_method = FHP_TOKEN_METHOD_POST; + } else if (fhp->buf_hash == fhp->env->hashes[FHP_STR_HEAD]) { + fhp->http_method = FHP_TOKEN_METHOD_HEAD; + } else if (fhp->buf_hash == fhp->env->hashes[FHP_STR_PUT]) { + fhp->http_method = FHP_TOKEN_METHOD_PUT; + } else if (fhp->buf_hash == fhp->env->hashes[FHP_STR_DELETE]) { + fhp->http_method = FHP_TOKEN_METHOD_DELETE; + } else if (fhp->buf_hash == fhp->env->hashes[FHP_STR_OPTIONS]) { + fhp->http_method = FHP_TOKEN_METHOD_OPTIONS; + } + + // send method token + if (!fhp->cb(fhp, fhp->http_method, 0, 0)) + return FHP_ERR_CB; + // set state fhp->state = FHP_STATE_METHOD_END; goto retry; @@ -464,6 +633,11 @@ retry: if (!fhp->cb(fhp, FHP_TOKEN_VERSION_START, 0, 0)) return FHP_ERR_CB; + // enable buffer hashing + fhp->is_hashing = true; + fhp->buf_hash = fhp_hash_init(); + + // set state fhp->state = FHP_STATE_VERSION; goto retry; } @@ -481,6 +655,21 @@ retry: if (!fhp->cb(fhp, FHP_TOKEN_VERSION_END, 0, 0)) return FHP_ERR_CB; + // disable buffer hashing + fhp->is_hashing = false; + + // get version token + fhp->http_version = FHP_TOKEN_VERSION_OTHER; + if (fhp->buf_hash == fhp->env->hashes[FHP_STR_HTTP_10]) { + fhp->http_version = FHP_TOKEN_VERSION_HTTP_10; + } else if (fhp->buf_hash == fhp->env->hashes[FHP_STR_HTTP_11]) { + fhp->http_version = FHP_TOKEN_VERSION_HTTP_11; + } + + // send version token + if (!fhp->cb(fhp, fhp->http_version, 0, 0)) + return FHP_ERR_CB; + // set state fhp->state = FHP_STATE_VERSION_END; goto retry; @@ -761,7 +950,12 @@ fhp_push( return FHP_OK; } +fhp_env_t * +fhp_get_env(fhp_t * const fhp) { + return fhp->env; +} + void * -fhp_user_data(fhp_t * const fhp) { +fhp_get_user_data(fhp_t * const fhp) { return fhp->user_data; } diff --git a/include/fhp/fhp.h b/include/fhp/fhp.h index 563d2ad..2e1dc95 100644 --- a/include/fhp/fhp.h +++ b/include/fhp/fhp.h @@ -5,6 +5,21 @@ #include // for size_t #include // for size_t +// +// hash functions (djb2) +// (see http://aras-p.info/blog/2016/08/02/Hash-Functions-all-the-way-down/) +// + +uint32_t fhp_hash_init(void); +uint32_t fhp_hash_push(uint32_t, uint8_t * const, size_t); +uint32_t fhp_hash_string(char * const); +uint32_t fhp_lc_hash_push(uint32_t, uint8_t * const, size_t); +uint32_t fhp_lc_hash_string(char * const); + +// +// error functions +// + typedef enum { FHP_OK, FHP_ERR_CB, @@ -24,11 +39,23 @@ typedef enum { fhp_err_t fhp_strerror(fhp_err_t, char * const, size_t); +// +// token functions +// + typedef enum { FHP_TOKEN_METHOD_START, FHP_TOKEN_METHOD_FRAGMENT, FHP_TOKEN_METHOD_END, + FHP_TOKEN_METHOD_GET, + FHP_TOKEN_METHOD_POST, + FHP_TOKEN_METHOD_HEAD, + FHP_TOKEN_METHOD_PUT, + FHP_TOKEN_METHOD_DELETE, + FHP_TOKEN_METHOD_OPTIONS, + FHP_TOKEN_METHOD_OTHER, + FHP_TOKEN_URL_START, FHP_TOKEN_URL_FRAGMENT, FHP_TOKEN_URL_END, @@ -37,6 +64,10 @@ typedef enum { FHP_TOKEN_VERSION_FRAGMENT, FHP_TOKEN_VERSION_END, + FHP_TOKEN_VERSION_HTTP_10, + FHP_TOKEN_VERSION_HTTP_11, + FHP_TOKEN_VERSION_OTHER, + FHP_TOKEN_HEADER_NAME_START, FHP_TOKEN_HEADER_NAME_FRAGMENT, FHP_TOKEN_HEADER_NAME_END, @@ -51,6 +82,23 @@ typedef enum { fhp_err_t fhp_strtoken(fhp_token_t, char * const, size_t); +// +// env functions +// + +#define FHP_ENV_NUM_HASHES 6 + +typedef struct { + uint32_t hashes[FHP_ENV_NUM_HASHES]; +} fhp_env_t; + +void fhp_env_init(fhp_env_t * const env); +fhp_env_t *fhp_get_default_env(void); + +// +// context functions +// + typedef struct fhp_t_ fhp_t; typedef bool (*fhp_cb_t)( @@ -88,15 +136,18 @@ typedef enum { #define FHP_BUF_SIZE 1024 struct fhp_t_ { + // env pointer + fhp_env_t *env; + + // opaque user data + void *user_data; + // current parser state fhp_state_t state; // user callback fhp_cb_t cb; - // opaque user data - void *user_data; - // last error fhp_err_t err; @@ -107,17 +158,27 @@ struct fhp_t_ { uint8_t buf[FHP_BUF_SIZE]; size_t buf_len; + // buffer hashing state + bool is_hashing; + uint32_t buf_hash; + + // cached http method and version + fhp_token_t http_method, http_version; + // state for url hex decoder uint32_t hex; }; fhp_err_t -fhp_init(fhp_t * const, fhp_cb_t, void * const); +fhp_init(fhp_t * const, fhp_env_t * const, fhp_cb_t, void * const); fhp_err_t fhp_push(fhp_t * const, uint8_t * const, size_t); +fhp_env_t * +fhp_get_env(fhp_t * const); + void * -fhp_user_data(fhp_t * const); +fhp_get_user_data(fhp_t * const); #endif /* FHP_H */ diff --git a/test.c b/test.c index 792316d..fc209b9 100644 --- a/test.c +++ b/test.c @@ -64,7 +64,7 @@ test_basic(void) { fhp_t fhp; // init parser - if ((err = fhp_init(&fhp, basic_cb, NULL)) != FHP_OK) { + if ((err = fhp_init(&fhp, NULL, basic_cb, NULL)) != FHP_OK) { die("test_basic", "fhp_init", err); } @@ -98,7 +98,7 @@ percent_cb( uint8_t * const buf, size_t len ) { - percent_data *data = fhp_user_data(fhp); + percent_data *data = fhp_get_user_data(fhp); switch (token) { case FHP_TOKEN_URL_START: @@ -134,7 +134,7 @@ test_percent(void) { percent_data data; // init parser - if ((err = fhp_init(&fhp, percent_cb, &data)) != FHP_OK) { + if ((err = fhp_init(&fhp, NULL, percent_cb, &data)) != FHP_OK) { die("test_percent", "fhp_init", err); } -- cgit v1.2.3