From 1baa78dab630530cabe06ca0d48ac3ab2b30a0f7 Mon Sep 17 00:00:00 2001 From: Paul Duncan Date: Fri, 26 Aug 2016 23:57:45 -0400 Subject: add url percent decoding --- fhp.c | 193 +++++++++++++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 157 insertions(+), 36 deletions(-) (limited to 'fhp.c') diff --git a/fhp.c b/fhp.c index 7b95722..eb0a7ae 100644 --- a/fhp.c +++ b/fhp.c @@ -3,29 +3,7 @@ #define UNUSED(a) ((void) (a)) -// -// rfc7230, Appendix B -// https://tools.ietf.org/html/rfc7230 -// -// tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / -// "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA -// token = 1*tchar -// -#define CASE_TOKEN_CHARS \ - case '!': \ - case '#': \ - case '$': \ - case '%': \ - case '&': \ - case '\'': \ - case '*': \ - case '+': \ - case '-': \ - case '.': \ - case '^': \ - case '_': \ - case '|': \ - case '~': \ +#define CASE_ALNUM_CHARS \ case '0': \ case '1': \ case '2': \ @@ -89,6 +67,99 @@ case 'Y': \ case 'Z': +#define CASE_DIGIT_CHARS \ + case '0': \ + case '1': \ + case '2': \ + case '3': \ + case '4': \ + case '5': \ + case '6': \ + case '7': \ + case '8': \ + case '9': + +#define CASE_HEX_ALPHA_CHARS \ + case 'a': \ + case 'b': \ + case 'c': \ + case 'd': \ + case 'e': \ + case 'f': \ + case 'A': \ + case 'B': \ + case 'C': \ + case 'D': \ + case 'E': \ + case 'F': + +// +// rfc7230, Appendix B +// https://tools.ietf.org/html/rfc7230 +// +// tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / +// "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA +// token = 1*tchar +// +#define CASE_TOKEN_CHARS \ + CASE_ALNUM_CHARS \ + case '!': \ + case '#': \ + case '$': \ + case '%': \ + case '&': \ + case '\'': \ + case '*': \ + case '+': \ + case '-': \ + case '.': \ + case '^': \ + case '_': \ + case '|': \ + case '~': \ + +#define CASE_URL_CHARS \ + CASE_ALNUM_CHARS \ + case '!': \ + case '"': \ + case '#': \ + case '$': \ + case '&': \ + case '\'': \ + case '(': \ + case ')': \ + case '*': \ + case '+': \ + case ',': \ + case '-': \ + case '.': \ + case '/': \ + case ':': \ + case ';': \ + case '<': \ + case '=': \ + case '>': \ + case '?': \ + case '@': \ + case '[': \ + case '\\': \ + case ']': \ + case '^': \ + case '_': \ + case '`': \ + case '{': \ + case '|': \ + case '}': \ + case '~': + +#define CASE_VISUAL_CHARS \ + CASE_URL_CHARS \ + case '%': + +#define CASE_VERSION_CHARS \ + CASE_TOKEN_CHARS \ + case '/': + // // rfc7230, Appendix B // https://tools.ietf.org/html/rfc7230 @@ -105,6 +176,10 @@ fhp_errors[] = { "callback error", "bad state", "invalid character", + "invalid character in HTTP method", + "invalid character in HTTP URL", + "invalid percent-encoded character in HTTP URL", + "invalid character in HTTP version", "invalid error code", "buffer too small", }; @@ -265,8 +340,7 @@ retry: break; default: - // FIXME: invalid character - return FHP_ERR_INVALID_CHAR; + return FHP_ERR_INVALID_CHAR_IN_METHOD; } break; @@ -293,8 +367,7 @@ retry: break; default: - // FIXME: invalid character - return FHP_ERR_INVALID_CHAR; + return FHP_ERR_INVALID_CHAR_IN_METHOD; } break; @@ -304,12 +377,14 @@ retry: // FIXME: do we want to allow more than one whitespace? // ignore break; - default: + CASE_URL_CHARS if (!fhp->cb(fhp, FHP_TOKEN_URL_START, 0, 0)) return FHP_ERR_CB; fhp->state = FHP_STATE_URL; goto retry; + default: + return FHP_ERR_INVALID_CHAR_IN_URL; } break; @@ -329,12 +404,54 @@ retry: goto retry; break; - default: + case '%': + // set state + fhp->state = FHP_STATE_URL_PERCENT; + + break; + CASE_URL_CHARS // add to buffer if (!fhp_buf_push(fhp, FHP_TOKEN_URL_FRAGMENT, byte)) return FHP_ERR_CB; } + break; + case FHP_STATE_URL_PERCENT: + switch (byte) { + CASE_DIGIT_CHARS + fhp->hex = byte - '0'; + fhp->state = FHP_STATE_URL_PERCENT_LAST; + + break; + CASE_HEX_ALPHA_CHARS + fhp->hex = 10 + byte - ((byte >= 'a') ? 'a' : 'A'); + fhp->state = FHP_STATE_URL_PERCENT_LAST; + + break; + default: + return FHP_ERR_INVALID_CHAR_IN_URL_PERCENT; + } + + break; + case FHP_STATE_URL_PERCENT_LAST: + switch (byte) { + CASE_DIGIT_CHARS + fhp->hex = (fhp->hex << 4) + (byte - '0'); + break; + CASE_HEX_ALPHA_CHARS + fhp->hex = (fhp->hex << 4) + (10 + byte - ((byte >= 'a') ? 'a' : 'A')); + break; + default: + return FHP_ERR_INVALID_CHAR_IN_URL_PERCENT; + } + + // add to buffer + if (!fhp_buf_push(fhp, FHP_TOKEN_URL_FRAGMENT, fhp->hex)) + return FHP_ERR_CB; + + // set state + fhp->state = FHP_STATE_URL; + break; case FHP_STATE_URL_END: switch (byte) { @@ -367,15 +484,14 @@ retry: goto retry; break; - case ' ': - // FIXME: invalid character - return FHP_ERR_INVALID_CHAR; - - break; - default: + CASE_VERSION_CHARS // add to buffer if (!fhp_buf_push(fhp, FHP_TOKEN_VERSION_FRAGMENT, byte)) return FHP_ERR_CB; + + break; + default: + return FHP_ERR_INVALID_CHAR_IN_VERSION; } break; @@ -390,7 +506,7 @@ retry: break; default: - // FIXME: invalid character + // never reached return FHP_ERR_INVALID_CHAR; }; @@ -642,3 +758,8 @@ fhp_push( // return success return FHP_OK; } + +void * +fhp_user_data(fhp_t * const fhp) { + return fhp->user_data; +} -- cgit v1.2.3