#include #include "fhp/fhp.h" #define UNUSED(a) ((void) (a)) #define CASE_ALNUM_CHARS \ case '0': \ case '1': \ case '2': \ case '3': \ case '4': \ case '5': \ case '6': \ case '7': \ case '8': \ case '9': \ case 'a': \ case 'b': \ case 'c': \ case 'd': \ case 'e': \ case 'f': \ case 'g': \ case 'h': \ case 'i': \ case 'j': \ case 'k': \ case 'l': \ case 'm': \ case 'n': \ case 'o': \ case 'p': \ case 'q': \ case 'r': \ case 's': \ case 't': \ case 'u': \ case 'v': \ case 'w': \ case 'x': \ case 'y': \ case 'z': \ case 'A': \ case 'B': \ case 'C': \ case 'D': \ case 'E': \ case 'F': \ case 'G': \ case 'H': \ case 'I': \ case 'J': \ case 'K': \ case 'L': \ case 'M': \ case 'N': \ case 'O': \ case 'P': \ case 'Q': \ case 'R': \ case 'S': \ case 'T': \ case 'U': \ case 'V': \ case 'W': \ case 'X': \ case 'Y': \ case 'Z': #define CASE_DIGIT_CHARS \ case '0': \ case '1': \ case '2': \ case '3': \ case '4': \ case '5': \ case '6': \ case '7': \ case '8': \ case '9': #define CASE_HEX_ALPHA_CHARS \ case 'a': \ case 'b': \ case 'c': \ case 'd': \ case 'e': \ case 'f': \ case 'A': \ case 'B': \ case 'C': \ case 'D': \ case 'E': \ case 'F': // // rfc7230, Appendix B // https://tools.ietf.org/html/rfc7230 // // tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / // "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA // token = 1*tchar // #define CASE_TOKEN_CHARS \ CASE_ALNUM_CHARS \ case '!': \ case '#': \ case '$': \ case '%': \ case '&': \ case '\'': \ case '*': \ case '+': \ case '-': \ case '.': \ case '^': \ case '_': \ case '|': \ case '~': \ #define CASE_URL_CHARS \ CASE_ALNUM_CHARS \ case '!': \ case '"': \ case '#': \ case '$': \ case '&': \ case '\'': \ case '(': \ case ')': \ case '*': \ case '+': \ case ',': \ case '-': \ case '.': \ case '/': \ case ':': \ case ';': \ case '<': \ case '=': \ case '>': \ case '?': \ case '@': \ case '[': \ case '\\': \ case ']': \ case '^': \ case '_': \ case '`': \ case '{': \ case '|': \ case '}': \ case '~': #define CASE_VISUAL_CHARS \ CASE_URL_CHARS \ case '%': #define CASE_VERSION_CHARS \ CASE_TOKEN_CHARS \ case '/': // // rfc7230, Appendix B // https://tools.ietf.org/html/rfc7230 // // OWS = *( SP / HTAB ) // #define CASE_OWS_CHARS \ case ' ': \ case '\t': // // hash functions (djb2) // (see http://aras-p.info/blog/2016/08/02/Hash-Functions-all-the-way-down/) // uint32_t fhp_hash_init(void) { return 5381; } uint32_t fhp_hash_push(uint32_t hash, uint8_t * const buf, size_t len) { for (size_t i = 0; i < len; i++) hash = ((hash << 5) + hash) + buf[len]; return hash; } uint32_t fhp_hash_string(char * const str) { uint32_t r = fhp_hash_init(); return fhp_hash_push(r, (uint8_t*) str, strlen(str)); } uint32_t fhp_lc_hash_push( uint32_t hash, uint8_t * const buf, size_t len ) { for (size_t i = 0; i < len; i++) { uint8_t c = buf[len]; if (c >= 'A' && c <= 'Z') c = (c - 'A') + 'a'; hash = ((hash << 5) + hash) + c; } return hash; } uint32_t fhp_lc_hash_string(char * const str) { uint32_t r = fhp_hash_init(); return fhp_lc_hash_push(r, (uint8_t*) str, strlen(str)); } // // error functions // static const char * fhp_errors[] = { "OK", "callback error", "bad state", "invalid character", "invalid character in HTTP method", "invalid character in HTTP URL", "invalid percent-encoded character in HTTP URL", "invalid character in HTTP version", "invalid character after carriage return", "invalid character in HTTP header name", "invalid error code", "buffer too small", }; fhp_err_t fhp_strerror( fhp_err_t err, char * const buf, size_t len ) { // check error code if (err >= FHP_ERR_LAST) return FHP_ERR_INVALID_ERROR; // check buffer size size_t err_len = strlen(fhp_errors[err]) + 1; if (len < err_len) return FHP_ERR_BUFFER_TOO_SMALL; // copy string memcpy(buf, fhp_errors[err], err_len); // return success return FHP_OK; } // // token functions // static const char * fhp_tokens[] = { "METHOD_START", "METHOD_FRAGMENT", "METHOD_END", "METHOD_GET", "METHOD_POST", "METHOD_HEAD", "METHOD_PUT", "METHOD_DELETE", "METHOD_OPTIONS", "METHOD_OTHER", "URL_START", "URL_FRAGMENT", "URL_END", "VERSION_START", "VERSION_FRAGMENT", "VERSION_END", "VERSION_HTTP_10", "VERSION_HTTP_11", "VERSION_OTHER", "HEADER_NAME_START", "HEADER_NAME_FRAGMENT", "HEADER_NAME_END", "HEADER_VALUE_START", "HEADER_VALUE_FRAGMENT", "HEADER_VALUE_END", "LAST" }; fhp_err_t fhp_strtoken( fhp_token_t token, char * const buf, size_t len ) { // check token code if (token >= FHP_TOKEN_LAST) return FHP_ERR_INVALID_ERROR; // check buffer size size_t str_len = strlen(fhp_tokens[token]) + 1; if (len < str_len) return FHP_ERR_BUFFER_TOO_SMALL; // copy string memcpy(buf, fhp_tokens[token], str_len); // return success return FHP_OK; } // // string functions // static char * const fhp_strings[] = { "GET", "POST", "HEAD", "PUT", "DELETE", "OPTIONS", "HTTP/1.0", "HTTP/1.1", "content-length", "transfer-encoding", "gzip", "x-gzip", "deflate", "x-deflate", "chunked", NULL }; typedef enum { FHP_STR_GET, FHP_STR_POST, FHP_STR_HEAD, FHP_STR_PUT, FHP_STR_DELETE, FHP_STR_OPTIONS, FHP_STR_HTTP_10, FHP_STR_HTTP_11, FHP_STR_CONTENT_LENGTH, FHP_STR_TRANSFER_ENCODING, FHP_STR_GZIP, FHP_STR_X_GZIP, FHP_STR_DEFLATE, FHP_STR_X_DEFLATE, FHP_STR_CHUNKED, FHP_STR_LAST } fhp_str_t; void fhp_env_init(fhp_env_t * const env) { for (size_t i = 0; i < FHP_STR_LAST; i++) env->hashes[i] = fhp_lc_hash_string(fhp_strings[i]); } static fhp_env_t fhp_default_env; fhp_env_t * fhp_get_default_env(void) { static fhp_env_t *r = NULL; if (!r) { r = &fhp_default_env; fhp_env_init(r); } return r; } // // context functions // static const fhp_t DEFAULT_CONTEXT = { .state = FHP_STATE_INIT, .user_data = NULL, .cb = NULL, .err = FHP_OK, .ofs = 0, .buf_len = 0, .is_hashing = false, .header_name_hash = 0, }; fhp_err_t fhp_init( fhp_t * const fhp, fhp_env_t * const env, fhp_cb_t cb, void * const user_data ) { *fhp = DEFAULT_CONTEXT; fhp->env = env ? env : fhp_get_default_env(); fhp->user_data = user_data; fhp->cb = cb; /* return success */ return FHP_OK; } static void fhp_buf_clear(fhp_t * const fhp) { // clear buffer fhp->buf_len = 0; } static bool fhp_buf_flush( fhp_t * const fhp, fhp_token_t token ) { if (fhp->buf_len > 0) { // push data if (!fhp->cb(fhp, token, fhp->buf, fhp->buf_len)) return false; // update buffer hash if (fhp->is_hashing) fhp->buf_hash = fhp_hash_push(fhp->buf_hash, fhp->buf, fhp->buf_len); // clear buffer fhp_buf_clear(fhp); } // return success return true; } static bool fhp_buf_push( fhp_t * const fhp, fhp_token_t token, uint8_t byte ) { // flush buffer if (fhp->buf_len + 1 >= FHP_BUF_SIZE) { if (!fhp_buf_flush(fhp, token)) return false; } // append to buffer fhp->buf[fhp->buf_len] = byte; fhp->buf_len++; // return success return true; } static fhp_err_t fhp_push_byte( fhp_t * const fhp, uint8_t byte ) { retry: switch (fhp->state) { case FHP_STATE_INIT: switch (byte) { CASE_TOKEN_CHARS // send start token if (!fhp->cb(fhp, FHP_TOKEN_METHOD_START, 0, 0)) return FHP_ERR_CB; // enable buffer hashing fhp->is_hashing = true; fhp->buf_hash = fhp_hash_init(); // set state fhp->state = FHP_STATE_METHOD; goto retry; break; default: return FHP_ERR_INVALID_CHAR_IN_METHOD; } break; case FHP_STATE_METHOD: switch (byte) { CASE_TOKEN_CHARS // add to buffer if (!fhp_buf_push(fhp, FHP_TOKEN_METHOD_FRAGMENT, byte)) return FHP_ERR_CB; break; case ' ': // flush buffer if (!fhp_buf_flush(fhp, FHP_TOKEN_METHOD_FRAGMENT)) return FHP_ERR_CB; // disable buffer hashing fhp->is_hashing = false; // send end token if (!fhp->cb(fhp, FHP_TOKEN_METHOD_END, 0, 0)) return FHP_ERR_CB; // get method token fhp->http_method = FHP_TOKEN_METHOD_OTHER; if (fhp->buf_hash == fhp->env->hashes[FHP_STR_GET]) { fhp->http_method = FHP_TOKEN_METHOD_GET; } else if (fhp->buf_hash == fhp->env->hashes[FHP_STR_POST]) { fhp->http_method = FHP_TOKEN_METHOD_POST; } else if (fhp->buf_hash == fhp->env->hashes[FHP_STR_HEAD]) { fhp->http_method = FHP_TOKEN_METHOD_HEAD; } else if (fhp->buf_hash == fhp->env->hashes[FHP_STR_PUT]) { fhp->http_method = FHP_TOKEN_METHOD_PUT; } else if (fhp->buf_hash == fhp->env->hashes[FHP_STR_DELETE]) { fhp->http_method = FHP_TOKEN_METHOD_DELETE; } else if (fhp->buf_hash == fhp->env->hashes[FHP_STR_OPTIONS]) { fhp->http_method = FHP_TOKEN_METHOD_OPTIONS; } // send method token if (!fhp->cb(fhp, fhp->http_method, 0, 0)) return FHP_ERR_CB; // set state fhp->state = FHP_STATE_METHOD_END; goto retry; break; default: return FHP_ERR_INVALID_CHAR_IN_METHOD; } break; case FHP_STATE_METHOD_END: switch (byte) { case ' ': // FIXME: do we want to allow more than one whitespace? // ignore break; CASE_URL_CHARS if (!fhp->cb(fhp, FHP_TOKEN_URL_START, 0, 0)) return FHP_ERR_CB; fhp->state = FHP_STATE_URL; goto retry; default: return FHP_ERR_INVALID_CHAR_IN_URL; } break; case FHP_STATE_URL: switch (byte) { case ' ': // flush buffer if (!fhp_buf_flush(fhp, FHP_TOKEN_URL_FRAGMENT)) return FHP_ERR_CB; // send end token if (!fhp->cb(fhp, FHP_TOKEN_URL_END, 0, 0)) return FHP_ERR_CB; // set state fhp->state = FHP_STATE_URL_END; goto retry; break; case '%': // set state fhp->state = FHP_STATE_URL_PERCENT; break; CASE_URL_CHARS // add to buffer if (!fhp_buf_push(fhp, FHP_TOKEN_URL_FRAGMENT, byte)) return FHP_ERR_CB; } break; case FHP_STATE_URL_PERCENT: switch (byte) { CASE_DIGIT_CHARS fhp->hex = byte - '0'; fhp->state = FHP_STATE_URL_PERCENT_LAST; break; CASE_HEX_ALPHA_CHARS fhp->hex = 10 + byte - ((byte >= 'a') ? 'a' : 'A'); fhp->state = FHP_STATE_URL_PERCENT_LAST; break; default: return FHP_ERR_INVALID_CHAR_IN_URL_PERCENT; } break; case FHP_STATE_URL_PERCENT_LAST: switch (byte) { CASE_DIGIT_CHARS fhp->hex = (fhp->hex << 4) + (byte - '0'); break; CASE_HEX_ALPHA_CHARS fhp->hex = (fhp->hex << 4) + (10 + byte - ((byte >= 'a') ? 'a' : 'A')); break; default: return FHP_ERR_INVALID_CHAR_IN_URL_PERCENT; } // add to buffer if (!fhp_buf_push(fhp, FHP_TOKEN_URL_FRAGMENT, fhp->hex)) return FHP_ERR_CB; // set state fhp->state = FHP_STATE_URL; break; case FHP_STATE_URL_END: switch (byte) { case ' ': // ignore break; default: if (!fhp->cb(fhp, FHP_TOKEN_VERSION_START, 0, 0)) return FHP_ERR_CB; // enable buffer hashing fhp->is_hashing = true; fhp->buf_hash = fhp_hash_init(); // set state fhp->state = FHP_STATE_VERSION; goto retry; } break; case FHP_STATE_VERSION: switch (byte) { case '\r': case '\n': // flush buffer if (!fhp_buf_flush(fhp, FHP_TOKEN_VERSION_FRAGMENT)) return FHP_ERR_CB; // send end token if (!fhp->cb(fhp, FHP_TOKEN_VERSION_END, 0, 0)) return FHP_ERR_CB; // disable buffer hashing fhp->is_hashing = false; // get version token fhp->http_version = FHP_TOKEN_VERSION_OTHER; if (fhp->buf_hash == fhp->env->hashes[FHP_STR_HTTP_10]) { fhp->http_version = FHP_TOKEN_VERSION_HTTP_10; } else if (fhp->buf_hash == fhp->env->hashes[FHP_STR_HTTP_11]) { fhp->http_version = FHP_TOKEN_VERSION_HTTP_11; } // send version token if (!fhp->cb(fhp, fhp->http_version, 0, 0)) return FHP_ERR_CB; // set state fhp->state = FHP_STATE_VERSION_END; goto retry; break; CASE_VERSION_CHARS // add to buffer if (!fhp_buf_push(fhp, FHP_TOKEN_VERSION_FRAGMENT, byte)) return FHP_ERR_CB; break; default: return FHP_ERR_INVALID_CHAR_IN_VERSION; } break; case FHP_STATE_VERSION_END: switch (byte) { case '\r': fhp->state = FHP_STATE_VERSION_END_CR; break; case '\n': fhp->state = FHP_STATE_STATUS_END; break; default: // invalid character // (should never be reached) return FHP_ERR_INVALID_CHAR; }; break; case FHP_STATE_VERSION_END_CR: switch (byte) { case '\n': fhp->state = FHP_STATE_STATUS_END; break; default: return FHP_ERR_INVALID_CHAR_AFTER_CR; }; break; case FHP_STATE_STATUS_END: switch (byte) { CASE_TOKEN_CHARS // send start token if (!fhp->cb(fhp, FHP_TOKEN_HEADER_NAME_START, 0, 0)) return FHP_ERR_CB; // enable buffer hashing fhp->is_hashing = true; fhp->buf_hash = fhp_hash_init(); // set state fhp->state = FHP_STATE_HEADER_NAME; goto retry; break; case '\r': // set state fhp->state = FHP_STATE_STATUS_END_CR; break; case '\n': // set state fhp->state = FHP_STATE_HEADERS_END; break; } break; case FHP_STATE_STATUS_END_CR: switch (byte) { case '\n': // set state fhp->state = FHP_STATE_HEADERS_END; break; default: return FHP_ERR_INVALID_CHAR_AFTER_CR; } break; case FHP_STATE_HEADER_NAME: switch (byte) { CASE_TOKEN_CHARS // add to buffer if (!fhp_buf_push(fhp, FHP_TOKEN_HEADER_NAME_FRAGMENT, byte)) return FHP_ERR_CB; break; case ':': // flush buffer if (!fhp_buf_flush(fhp, FHP_TOKEN_HEADER_NAME_FRAGMENT)) return FHP_ERR_CB; // send end token if (!fhp->cb(fhp, FHP_TOKEN_HEADER_NAME_END, 0, 0)) return FHP_ERR_CB; // disable buffer hashing fhp->is_hashing = false; fhp->header_name_hash = fhp->buf_hash; // set state fhp->state = FHP_STATE_HEADER_NAME_END; break; default: return FHP_ERR_INVALID_CHAR_IN_HEADER_NAME; } break; case FHP_STATE_HEADER_NAME_END: switch (byte) { CASE_OWS_CHARS // ignore leading spaces break; default: // send start token if (!fhp->cb(fhp, FHP_TOKEN_HEADER_VALUE_START, 0, 0)) return FHP_ERR_CB; // set state fhp->state = FHP_STATE_HEADER_VALUE; goto retry; break; } break; case FHP_STATE_HEADER_VALUE: switch (byte) { case '\r': fhp->state = FHP_STATE_HEADER_VALUE_END_CR; break; case '\n': fhp->state = FHP_STATE_HEADER_VALUE_END; break; default: // FIXME: need more limits on valid octets // add to buffer if (!fhp_buf_push(fhp, FHP_TOKEN_HEADER_VALUE_FRAGMENT, byte)) return FHP_ERR_CB; break; } break; case FHP_STATE_HEADER_VALUE_END_CR: switch (byte) { case '\n': fhp->state = FHP_STATE_HEADER_VALUE_END; break; default: return FHP_ERR_INVALID_CHAR; } break; case FHP_STATE_HEADER_VALUE_END: switch (byte) { CASE_OWS_CHARS // ows-fold // add space to buffer // folding to ' ', as per RFC7230 3.2.4 // https://tools.ietf.org/html/rfc7230#section-3.2.4 if (!fhp_buf_push(fhp, FHP_TOKEN_HEADER_VALUE_FRAGMENT, ' ')) return FHP_ERR_CB; // set state fhp->state = FHP_STATE_HEADER_VALUE; break; CASE_TOKEN_CHARS // flush buffer if (!fhp_buf_flush(fhp, FHP_TOKEN_HEADER_VALUE_FRAGMENT)) return FHP_ERR_CB; // end header value if (!fhp->cb(fhp, FHP_TOKEN_HEADER_VALUE_END, 0, 0)) return FHP_ERR_CB; // send start token if (!fhp->cb(fhp, FHP_TOKEN_HEADER_NAME_START, 0, 0)) return FHP_ERR_CB; // add to buffer if (!fhp_buf_push(fhp, FHP_TOKEN_HEADER_NAME_FRAGMENT, byte)) return FHP_ERR_CB; // set state fhp->state = FHP_STATE_HEADER_NAME; break; case '\r': // flush buffer if (!fhp_buf_flush(fhp, FHP_TOKEN_HEADER_VALUE_FRAGMENT)) return FHP_ERR_CB; // end header value if (!fhp->cb(fhp, FHP_TOKEN_HEADER_VALUE_END, 0, 0)) return FHP_ERR_CB; // set state fhp->state = FHP_STATE_HEADERS_END_CR; break; case '\n': // flush buffer if (!fhp_buf_flush(fhp, FHP_TOKEN_HEADER_VALUE_FRAGMENT)) return FHP_ERR_CB; // end header value if (!fhp->cb(fhp, FHP_TOKEN_HEADER_VALUE_END, 0, 0)) return FHP_ERR_CB; // set state fhp->state = FHP_STATE_HEADERS_END; break; default: return FHP_ERR_INVALID_CHAR; } break; case FHP_STATE_HEADERS_END_CR: switch (byte) { case '\n': fhp->state = FHP_STATE_HEADERS_END; break; default: return FHP_ERR_INVALID_CHAR; } break; case FHP_STATE_HEADERS_END: // TODO break; default: // invalid state // (should never be reached) return FHP_ERR_BAD_STATE; } // increment byte offset fhp->ofs++; /* return success */ return FHP_OK; } fhp_err_t fhp_push( fhp_t * const fhp, uint8_t * const buf, size_t len ) { switch (fhp->state) { case FHP_STATE_ERROR: return fhp->err; break; default: for (size_t i = 0; i < len; i++) { // push byte fhp_err_t err = fhp_push_byte(fhp, buf[i]); // check result if (err != FHP_OK) { fhp->state = FHP_STATE_ERROR; fhp->err = err; return err; } } } // return success return FHP_OK; } fhp_env_t * fhp_get_env(fhp_t * const fhp) { return fhp->env; } void * fhp_get_user_data(fhp_t * const fhp) { return fhp->user_data; }