summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Duncan <pabs@pablotron.org>2016-08-27 14:39:23 -0400
committerPaul Duncan <pabs@pablotron.org>2016-08-27 14:39:23 -0400
commit19653e153b70e72e9a75e74655c8e56025ce4d61 (patch)
tree93d0b8e740ca1ef6f9883353e1d47754e1196f5c
parent145175a2aa42e02b3d01b012aca6bbc46abc1106 (diff)
downloadlibfhp-19653e153b70e72e9a75e74655c8e56025ce4d61.tar.bz2
libfhp-19653e153b70e72e9a75e74655c8e56025ce4d61.zip
hash http method and version and add tokens for fast parsing
-rw-r--r--Makefile4
-rw-r--r--fhp.c198
-rw-r--r--include/fhp/fhp.h71
-rw-r--r--test.c6
4 files changed, 267 insertions, 12 deletions
diff --git a/Makefile b/Makefile
index fc81c8b..e80b70a 100644
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,8 @@
CC ?= cc
CFLAGS=-std=c99 -W -Wall -pedantic -O2 -Iinclude -fPIC
OBJS=fhp.o
-SONAME=libfhp.so.1
-LIB=libfhp.so.1
+SONAME=libfhp.so
+LIB=libfhp.so
TEST_OBJS=test.o
TEST_APP=./fhp-test
diff --git a/fhp.c b/fhp.c
index 416d054..be4b7f3 100644
--- a/fhp.c
+++ b/fhp.c
@@ -170,6 +170,58 @@
case ' ': \
case '\t':
+//
+// hash functions (djb2)
+// (see http://aras-p.info/blog/2016/08/02/Hash-Functions-all-the-way-down/)
+//
+
+uint32_t
+fhp_hash_init(void) {
+ return 5381;
+}
+
+uint32_t
+fhp_hash_push(uint32_t hash, uint8_t * const buf, size_t len) {
+ for (size_t i = 0; i < len; i++)
+ hash = ((hash << 5) + hash) + buf[len];
+
+ return hash;
+}
+
+uint32_t
+fhp_hash_string(char * const str) {
+ uint32_t r = fhp_hash_init();
+ return fhp_hash_push(r, (uint8_t*) str, strlen(str));
+}
+
+uint32_t
+fhp_lc_hash_push(
+ uint32_t hash,
+ uint8_t * const buf,
+ size_t len
+) {
+ for (size_t i = 0; i < len; i++) {
+ uint8_t c = buf[len];
+
+ if (c >= 'A' && c <= 'Z')
+ c = (c - 'A') + 'a';
+
+ hash = ((hash << 5) + hash) + c;
+ }
+
+ return hash;
+}
+
+uint32_t
+fhp_lc_hash_string(char * const str) {
+ uint32_t r = fhp_hash_init();
+ return fhp_lc_hash_push(r, (uint8_t*) str, strlen(str));
+}
+
+//
+// error functions
+//
+
static const char *
fhp_errors[] = {
"OK",
@@ -208,12 +260,24 @@ fhp_strerror(
return FHP_OK;
}
+//
+// token functions
+//
+
static const char *
fhp_tokens[] = {
"METHOD_START",
"METHOD_FRAGMENT",
"METHOD_END",
+ "METHOD_GET",
+ "METHOD_POST",
+ "METHOD_HEAD",
+ "METHOD_PUT",
+ "METHOD_DELETE",
+ "METHOD_OPTIONS",
+ "METHOD_OTHER",
+
"URL_START",
"URL_FRAGMENT",
"URL_END",
@@ -222,6 +286,10 @@ fhp_tokens[] = {
"VERSION_FRAGMENT",
"VERSION_END",
+ "VERSION_HTTP_10",
+ "VERSION_HTTP_11",
+ "VERSION_OTHER",
+
"HEADER_NAME_START",
"HEADER_NAME_FRAGMENT",
"HEADER_NAME_END",
@@ -255,22 +323,92 @@ fhp_strtoken(
return FHP_OK;
}
-static fhp_t DEFAULT_CONTEXT = {
+//
+// string functions
+//
+
+static char * const
+fhp_strings[] = {
+ "GET",
+ "POST",
+ "HEAD",
+ "PUT",
+ "DELETE",
+ "OPTIONS",
+ "HTTP/1.0",
+ "HTTP/1.1",
+ "content-length",
+ "transfer-encoding",
+ "gzip",
+ "x-gzip",
+ "deflate",
+ "x-deflate",
+ "chunked",
+ NULL
+};
+
+typedef enum {
+ FHP_STR_GET,
+ FHP_STR_POST,
+ FHP_STR_HEAD,
+ FHP_STR_PUT,
+ FHP_STR_DELETE,
+ FHP_STR_OPTIONS,
+ FHP_STR_HTTP_10,
+ FHP_STR_HTTP_11,
+ FHP_STR_CONTENT_LENGTH,
+ FHP_STR_TRANSFER_ENCODING,
+ FHP_STR_GZIP,
+ FHP_STR_X_GZIP,
+ FHP_STR_DEFLATE,
+ FHP_STR_X_DEFLATE,
+ FHP_STR_CHUNKED,
+ FHP_STR_LAST
+} fhp_str_t;
+
+void
+fhp_env_init(fhp_env_t * const env) {
+ for (size_t i = 0; i < FHP_STR_LAST; i++)
+ env->hashes[i] = fhp_lc_hash_string(fhp_strings[i]);
+}
+
+static fhp_env_t fhp_default_env;
+
+fhp_env_t *
+fhp_get_default_env(void) {
+ static fhp_env_t *r = NULL;
+
+ if (!r) {
+ r = &fhp_default_env;
+ fhp_env_init(r);
+ }
+
+ return r;
+}
+
+//
+// context functions
+//
+
+static const fhp_t DEFAULT_CONTEXT = {
.state = FHP_STATE_INIT,
.user_data = NULL,
.cb = NULL,
.err = FHP_OK,
.ofs = 0,
.buf_len = 0,
+ .is_hashing = false,
};
fhp_err_t
fhp_init(
fhp_t * const fhp,
+ fhp_env_t * const env,
fhp_cb_t cb,
void * const user_data
) {
*fhp = DEFAULT_CONTEXT;
+ fhp->env = env ? env : fhp_get_default_env();
fhp->user_data = user_data;
fhp->cb = cb;
@@ -294,6 +432,10 @@ fhp_buf_flush(
if (!fhp->cb(fhp, token, fhp->buf, fhp->buf_len))
return false;
+ // update buffer hash
+ if (fhp->is_hashing)
+ fhp->buf_hash = fhp_hash_push(fhp->buf_hash, fhp->buf, fhp->buf_len);
+
// clear buffer
fhp_buf_clear(fhp);
}
@@ -336,6 +478,10 @@ retry:
if (!fhp->cb(fhp, FHP_TOKEN_METHOD_START, 0, 0))
return FHP_ERR_CB;
+ // enable buffer hashing
+ fhp->is_hashing = true;
+ fhp->buf_hash = fhp_hash_init();
+
// set state
fhp->state = FHP_STATE_METHOD;
goto retry;
@@ -359,10 +505,33 @@ retry:
if (!fhp_buf_flush(fhp, FHP_TOKEN_METHOD_FRAGMENT))
return FHP_ERR_CB;
+ // disable buffer hashing
+ fhp->is_hashing = false;
+
// send end token
if (!fhp->cb(fhp, FHP_TOKEN_METHOD_END, 0, 0))
return FHP_ERR_CB;
+ // get method token
+ fhp->http_method = FHP_TOKEN_METHOD_OTHER;
+ if (fhp->buf_hash == fhp->env->hashes[FHP_STR_GET]) {
+ fhp->http_method = FHP_TOKEN_METHOD_GET;
+ } else if (fhp->buf_hash == fhp->env->hashes[FHP_STR_POST]) {
+ fhp->http_method = FHP_TOKEN_METHOD_POST;
+ } else if (fhp->buf_hash == fhp->env->hashes[FHP_STR_HEAD]) {
+ fhp->http_method = FHP_TOKEN_METHOD_HEAD;
+ } else if (fhp->buf_hash == fhp->env->hashes[FHP_STR_PUT]) {
+ fhp->http_method = FHP_TOKEN_METHOD_PUT;
+ } else if (fhp->buf_hash == fhp->env->hashes[FHP_STR_DELETE]) {
+ fhp->http_method = FHP_TOKEN_METHOD_DELETE;
+ } else if (fhp->buf_hash == fhp->env->hashes[FHP_STR_OPTIONS]) {
+ fhp->http_method = FHP_TOKEN_METHOD_OPTIONS;
+ }
+
+ // send method token
+ if (!fhp->cb(fhp, fhp->http_method, 0, 0))
+ return FHP_ERR_CB;
+
// set state
fhp->state = FHP_STATE_METHOD_END;
goto retry;
@@ -464,6 +633,11 @@ retry:
if (!fhp->cb(fhp, FHP_TOKEN_VERSION_START, 0, 0))
return FHP_ERR_CB;
+ // enable buffer hashing
+ fhp->is_hashing = true;
+ fhp->buf_hash = fhp_hash_init();
+
+ // set state
fhp->state = FHP_STATE_VERSION;
goto retry;
}
@@ -481,6 +655,21 @@ retry:
if (!fhp->cb(fhp, FHP_TOKEN_VERSION_END, 0, 0))
return FHP_ERR_CB;
+ // disable buffer hashing
+ fhp->is_hashing = false;
+
+ // get version token
+ fhp->http_version = FHP_TOKEN_VERSION_OTHER;
+ if (fhp->buf_hash == fhp->env->hashes[FHP_STR_HTTP_10]) {
+ fhp->http_version = FHP_TOKEN_VERSION_HTTP_10;
+ } else if (fhp->buf_hash == fhp->env->hashes[FHP_STR_HTTP_11]) {
+ fhp->http_version = FHP_TOKEN_VERSION_HTTP_11;
+ }
+
+ // send version token
+ if (!fhp->cb(fhp, fhp->http_version, 0, 0))
+ return FHP_ERR_CB;
+
// set state
fhp->state = FHP_STATE_VERSION_END;
goto retry;
@@ -761,7 +950,12 @@ fhp_push(
return FHP_OK;
}
+fhp_env_t *
+fhp_get_env(fhp_t * const fhp) {
+ return fhp->env;
+}
+
void *
-fhp_user_data(fhp_t * const fhp) {
+fhp_get_user_data(fhp_t * const fhp) {
return fhp->user_data;
}
diff --git a/include/fhp/fhp.h b/include/fhp/fhp.h
index 563d2ad..2e1dc95 100644
--- a/include/fhp/fhp.h
+++ b/include/fhp/fhp.h
@@ -5,6 +5,21 @@
#include <stddef.h> // for size_t
#include <stdbool.h> // for size_t
+//
+// hash functions (djb2)
+// (see http://aras-p.info/blog/2016/08/02/Hash-Functions-all-the-way-down/)
+//
+
+uint32_t fhp_hash_init(void);
+uint32_t fhp_hash_push(uint32_t, uint8_t * const, size_t);
+uint32_t fhp_hash_string(char * const);
+uint32_t fhp_lc_hash_push(uint32_t, uint8_t * const, size_t);
+uint32_t fhp_lc_hash_string(char * const);
+
+//
+// error functions
+//
+
typedef enum {
FHP_OK,
FHP_ERR_CB,
@@ -24,11 +39,23 @@ typedef enum {
fhp_err_t
fhp_strerror(fhp_err_t, char * const, size_t);
+//
+// token functions
+//
+
typedef enum {
FHP_TOKEN_METHOD_START,
FHP_TOKEN_METHOD_FRAGMENT,
FHP_TOKEN_METHOD_END,
+ FHP_TOKEN_METHOD_GET,
+ FHP_TOKEN_METHOD_POST,
+ FHP_TOKEN_METHOD_HEAD,
+ FHP_TOKEN_METHOD_PUT,
+ FHP_TOKEN_METHOD_DELETE,
+ FHP_TOKEN_METHOD_OPTIONS,
+ FHP_TOKEN_METHOD_OTHER,
+
FHP_TOKEN_URL_START,
FHP_TOKEN_URL_FRAGMENT,
FHP_TOKEN_URL_END,
@@ -37,6 +64,10 @@ typedef enum {
FHP_TOKEN_VERSION_FRAGMENT,
FHP_TOKEN_VERSION_END,
+ FHP_TOKEN_VERSION_HTTP_10,
+ FHP_TOKEN_VERSION_HTTP_11,
+ FHP_TOKEN_VERSION_OTHER,
+
FHP_TOKEN_HEADER_NAME_START,
FHP_TOKEN_HEADER_NAME_FRAGMENT,
FHP_TOKEN_HEADER_NAME_END,
@@ -51,6 +82,23 @@ typedef enum {
fhp_err_t
fhp_strtoken(fhp_token_t, char * const, size_t);
+//
+// env functions
+//
+
+#define FHP_ENV_NUM_HASHES 6
+
+typedef struct {
+ uint32_t hashes[FHP_ENV_NUM_HASHES];
+} fhp_env_t;
+
+void fhp_env_init(fhp_env_t * const env);
+fhp_env_t *fhp_get_default_env(void);
+
+//
+// context functions
+//
+
typedef struct fhp_t_ fhp_t;
typedef bool (*fhp_cb_t)(
@@ -88,15 +136,18 @@ typedef enum {
#define FHP_BUF_SIZE 1024
struct fhp_t_ {
+ // env pointer
+ fhp_env_t *env;
+
+ // opaque user data
+ void *user_data;
+
// current parser state
fhp_state_t state;
// user callback
fhp_cb_t cb;
- // opaque user data
- void *user_data;
-
// last error
fhp_err_t err;
@@ -107,17 +158,27 @@ struct fhp_t_ {
uint8_t buf[FHP_BUF_SIZE];
size_t buf_len;
+ // buffer hashing state
+ bool is_hashing;
+ uint32_t buf_hash;
+
+ // cached http method and version
+ fhp_token_t http_method, http_version;
+
// state for url hex decoder
uint32_t hex;
};
fhp_err_t
-fhp_init(fhp_t * const, fhp_cb_t, void * const);
+fhp_init(fhp_t * const, fhp_env_t * const, fhp_cb_t, void * const);
fhp_err_t
fhp_push(fhp_t * const, uint8_t * const, size_t);
+fhp_env_t *
+fhp_get_env(fhp_t * const);
+
void *
-fhp_user_data(fhp_t * const);
+fhp_get_user_data(fhp_t * const);
#endif /* FHP_H */
diff --git a/test.c b/test.c
index 792316d..fc209b9 100644
--- a/test.c
+++ b/test.c
@@ -64,7 +64,7 @@ test_basic(void) {
fhp_t fhp;
// init parser
- if ((err = fhp_init(&fhp, basic_cb, NULL)) != FHP_OK) {
+ if ((err = fhp_init(&fhp, NULL, basic_cb, NULL)) != FHP_OK) {
die("test_basic", "fhp_init", err);
}
@@ -98,7 +98,7 @@ percent_cb(
uint8_t * const buf,
size_t len
) {
- percent_data *data = fhp_user_data(fhp);
+ percent_data *data = fhp_get_user_data(fhp);
switch (token) {
case FHP_TOKEN_URL_START:
@@ -134,7 +134,7 @@ test_percent(void) {
percent_data data;
// init parser
- if ((err = fhp_init(&fhp, percent_cb, &data)) != FHP_OK) {
+ if ((err = fhp_init(&fhp, NULL, percent_cb, &data)) != FHP_OK) {
die("test_percent", "fhp_init", err);
}