From 1baa78dab630530cabe06ca0d48ac3ab2b30a0f7 Mon Sep 17 00:00:00 2001 From: Paul Duncan Date: Fri, 26 Aug 2016 23:57:45 -0400 Subject: add url percent decoding --- fhp.c | 193 ++++++++++++++++++++++++++++++++++++++++++++---------- include/fhp/fhp.h | 25 ++++++- test.c | 87 ++++++++++++++++++++++-- 3 files changed, 262 insertions(+), 43 deletions(-) diff --git a/fhp.c b/fhp.c index 7b95722..eb0a7ae 100644 --- a/fhp.c +++ b/fhp.c @@ -3,29 +3,7 @@ #define UNUSED(a) ((void) (a)) -// -// rfc7230, Appendix B -// https://tools.ietf.org/html/rfc7230 -// -// tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / -// "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA -// token = 1*tchar -// -#define CASE_TOKEN_CHARS \ - case '!': \ - case '#': \ - case '$': \ - case '%': \ - case '&': \ - case '\'': \ - case '*': \ - case '+': \ - case '-': \ - case '.': \ - case '^': \ - case '_': \ - case '|': \ - case '~': \ +#define CASE_ALNUM_CHARS \ case '0': \ case '1': \ case '2': \ @@ -89,6 +67,99 @@ case 'Y': \ case 'Z': +#define CASE_DIGIT_CHARS \ + case '0': \ + case '1': \ + case '2': \ + case '3': \ + case '4': \ + case '5': \ + case '6': \ + case '7': \ + case '8': \ + case '9': + +#define CASE_HEX_ALPHA_CHARS \ + case 'a': \ + case 'b': \ + case 'c': \ + case 'd': \ + case 'e': \ + case 'f': \ + case 'A': \ + case 'B': \ + case 'C': \ + case 'D': \ + case 'E': \ + case 'F': + +// +// rfc7230, Appendix B +// https://tools.ietf.org/html/rfc7230 +// +// tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / +// "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA +// token = 1*tchar +// +#define CASE_TOKEN_CHARS \ + CASE_ALNUM_CHARS \ + case '!': \ + case '#': \ + case '$': \ + case '%': \ + case '&': \ + case '\'': \ + case '*': \ + case '+': \ + case '-': \ + case '.': \ + case '^': \ + case '_': \ + case '|': \ + case '~': \ + +#define CASE_URL_CHARS \ + CASE_ALNUM_CHARS \ + case '!': \ + case '"': \ + case '#': \ + case '$': \ + case '&': \ + case '\'': \ + case '(': \ + case ')': \ + case '*': \ + case '+': \ + case ',': \ + case '-': \ + case '.': \ + case '/': \ + case ':': \ + case ';': \ + case '<': \ + case '=': \ + case '>': \ + case '?': \ + case '@': \ + case '[': \ + case '\\': \ + case ']': \ + case '^': \ + case '_': \ + case '`': \ + case '{': \ + case '|': \ + case '}': \ + case '~': + +#define CASE_VISUAL_CHARS \ + CASE_URL_CHARS \ + case '%': + +#define CASE_VERSION_CHARS \ + CASE_TOKEN_CHARS \ + case '/': + // // rfc7230, Appendix B // https://tools.ietf.org/html/rfc7230 @@ -105,6 +176,10 @@ fhp_errors[] = { "callback error", "bad state", "invalid character", + "invalid character in HTTP method", + "invalid character in HTTP URL", + "invalid percent-encoded character in HTTP URL", + "invalid character in HTTP version", "invalid error code", "buffer too small", }; @@ -265,8 +340,7 @@ retry: break; default: - // FIXME: invalid character - return FHP_ERR_INVALID_CHAR; + return FHP_ERR_INVALID_CHAR_IN_METHOD; } break; @@ -293,8 +367,7 @@ retry: break; default: - // FIXME: invalid character - return FHP_ERR_INVALID_CHAR; + return FHP_ERR_INVALID_CHAR_IN_METHOD; } break; @@ -304,12 +377,14 @@ retry: // FIXME: do we want to allow more than one whitespace? // ignore break; - default: + CASE_URL_CHARS if (!fhp->cb(fhp, FHP_TOKEN_URL_START, 0, 0)) return FHP_ERR_CB; fhp->state = FHP_STATE_URL; goto retry; + default: + return FHP_ERR_INVALID_CHAR_IN_URL; } break; @@ -329,12 +404,54 @@ retry: goto retry; break; - default: + case '%': + // set state + fhp->state = FHP_STATE_URL_PERCENT; + + break; + CASE_URL_CHARS // add to buffer if (!fhp_buf_push(fhp, FHP_TOKEN_URL_FRAGMENT, byte)) return FHP_ERR_CB; } + break; + case FHP_STATE_URL_PERCENT: + switch (byte) { + CASE_DIGIT_CHARS + fhp->hex = byte - '0'; + fhp->state = FHP_STATE_URL_PERCENT_LAST; + + break; + CASE_HEX_ALPHA_CHARS + fhp->hex = 10 + byte - ((byte >= 'a') ? 'a' : 'A'); + fhp->state = FHP_STATE_URL_PERCENT_LAST; + + break; + default: + return FHP_ERR_INVALID_CHAR_IN_URL_PERCENT; + } + + break; + case FHP_STATE_URL_PERCENT_LAST: + switch (byte) { + CASE_DIGIT_CHARS + fhp->hex = (fhp->hex << 4) + (byte - '0'); + break; + CASE_HEX_ALPHA_CHARS + fhp->hex = (fhp->hex << 4) + (10 + byte - ((byte >= 'a') ? 'a' : 'A')); + break; + default: + return FHP_ERR_INVALID_CHAR_IN_URL_PERCENT; + } + + // add to buffer + if (!fhp_buf_push(fhp, FHP_TOKEN_URL_FRAGMENT, fhp->hex)) + return FHP_ERR_CB; + + // set state + fhp->state = FHP_STATE_URL; + break; case FHP_STATE_URL_END: switch (byte) { @@ -367,15 +484,14 @@ retry: goto retry; break; - case ' ': - // FIXME: invalid character - return FHP_ERR_INVALID_CHAR; - - break; - default: + CASE_VERSION_CHARS // add to buffer if (!fhp_buf_push(fhp, FHP_TOKEN_VERSION_FRAGMENT, byte)) return FHP_ERR_CB; + + break; + default: + return FHP_ERR_INVALID_CHAR_IN_VERSION; } break; @@ -390,7 +506,7 @@ retry: break; default: - // FIXME: invalid character + // never reached return FHP_ERR_INVALID_CHAR; }; @@ -642,3 +758,8 @@ fhp_push( // return success return FHP_OK; } + +void * +fhp_user_data(fhp_t * const fhp) { + return fhp->user_data; +} diff --git a/include/fhp/fhp.h b/include/fhp/fhp.h index 3b7e280..72df8a5 100644 --- a/include/fhp/fhp.h +++ b/include/fhp/fhp.h @@ -10,6 +10,10 @@ typedef enum { FHP_ERR_CB, FHP_ERR_BAD_STATE, FHP_ERR_INVALID_CHAR, + FHP_ERR_INVALID_CHAR_IN_METHOD, + FHP_ERR_INVALID_CHAR_IN_URL, + FHP_ERR_INVALID_CHAR_IN_URL_PERCENT, + FHP_ERR_INVALID_CHAR_IN_VERSION, FHP_ERR_INVALID_ERROR, FHP_ERR_BUFFER_TOO_SMALL, FHP_ERR_LAST @@ -60,6 +64,8 @@ typedef enum { FHP_STATE_METHOD, FHP_STATE_METHOD_END, FHP_STATE_URL, + FHP_STATE_URL_PERCENT, + FHP_STATE_URL_PERCENT_LAST, FHP_STATE_URL_END, FHP_STATE_VERSION, FHP_STATE_VERSION_END, @@ -80,15 +86,27 @@ typedef enum { #define FHP_BUF_SIZE 1024 struct fhp_t_ { + // current parser state fhp_state_t state; + + // user callback fhp_cb_t cb; - uint64_t ofs; - fhp_err_t err; + // opaque user data void *user_data; + // last error + fhp_err_t err; + + // number of bytes read + uint64_t ofs; + + // fragment data buffer uint8_t buf[FHP_BUF_SIZE]; size_t buf_len; + + // state for url hex decoder + uint32_t hex; }; fhp_err_t @@ -97,4 +115,7 @@ fhp_init(fhp_t * const, fhp_cb_t, void * const); fhp_err_t fhp_push(fhp_t * const, uint8_t * const, size_t); +void * +fhp_user_data(fhp_t * const); + #endif /* FHP_H */ diff --git a/test.c b/test.c index 3749ce7..792316d 100644 --- a/test.c +++ b/test.c @@ -23,15 +23,19 @@ static void die( exit(EXIT_FAILURE); } +/**************/ +/* basic test */ +/**************/ + static const char * -str_test_basic = +basic_str = "GET / HTTP/1.1\r\n" "Host: pablotron.org\r\n" "Connection: close\r\n" "\r\n"; static bool -test_basic_cb( +basic_cb( fhp_t *fhp, fhp_token_t token, uint8_t * const buf, @@ -59,21 +63,94 @@ test_basic(void) { fhp_err_t err; fhp_t fhp; - if ((err = fhp_init(&fhp, test_basic_cb, NULL)) != FHP_OK) { + // init parser + if ((err = fhp_init(&fhp, basic_cb, NULL)) != FHP_OK) { die("test_basic", "fhp_init", err); } - size_t len = strlen(str_test_basic); - if ((err = fhp_push(&fhp, (uint8_t*) str_test_basic, len)) != FHP_OK) { + // parse data + size_t len = strlen(basic_str); + if ((err = fhp_push(&fhp, (uint8_t*) basic_str, len)) != FHP_OK) { die("test_basic", "fhp_push", err); } } +/****************/ +/* percent test */ +/****************/ + +typedef struct { + uint8_t buf[1024]; + size_t buf_len; +} percent_data; + +static const char * +percent_str = + "GET /foo%20bar HTTP/1.1\r\n" + "Host: pablotron.org\r\n" + "Connection: close\r\n" + "\r\n"; + +static bool +percent_cb( + fhp_t *fhp, + fhp_token_t token, + uint8_t * const buf, + size_t len +) { + percent_data *data = fhp_user_data(fhp); + + switch (token) { + case FHP_TOKEN_URL_START: + // clear buffer + data->buf_len = 0; + + break; + case FHP_TOKEN_URL_FRAGMENT: + // buffer overflow, do not use in real code!!! + memcpy(data->buf + data->buf_len, buf, len); + data->buf_len += len; + + break; + case FHP_TOKEN_URL_END: + // terminate and print buffer + data->buf[data->buf_len] = '\0'; + fprintf(stderr, "decoded URL: \"%s\"\n", data->buf); + + break; + default: + // do nothing + NULL; + } + + // return success + return true; +} + +static void +test_percent(void) { + fhp_err_t err; + fhp_t fhp; + percent_data data; + + // init parser + if ((err = fhp_init(&fhp, percent_cb, &data)) != FHP_OK) { + die("test_percent", "fhp_init", err); + } + + // parse data + size_t len = strlen(percent_str); + if ((err = fhp_push(&fhp, (uint8_t*) percent_str, len)) != FHP_OK) { + die("test_percent", "fhp_push", err); + } +} + int main(int argc, char *argv[]) { UNUSED(argc); UNUSED(argv); test_basic(); + test_percent(); return EXIT_SUCCESS; } -- cgit v1.2.3