summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Duncan <pabs@pablotron.org>2016-08-26 23:57:45 -0400
committerPaul Duncan <pabs@pablotron.org>2016-08-26 23:57:45 -0400
commit1baa78dab630530cabe06ca0d48ac3ab2b30a0f7 (patch)
treeb3fcceb7ac872d448f86101f876694aa88de9696
parent23661f102f363ae9e783afb06cf5d0b8474fdc16 (diff)
downloadlibfhp-1baa78dab630530cabe06ca0d48ac3ab2b30a0f7.tar.bz2
libfhp-1baa78dab630530cabe06ca0d48ac3ab2b30a0f7.zip
add url percent decoding
-rw-r--r--fhp.c193
-rw-r--r--include/fhp/fhp.h25
-rw-r--r--test.c87
3 files changed, 262 insertions, 43 deletions
diff --git a/fhp.c b/fhp.c
index 7b95722..eb0a7ae 100644
--- a/fhp.c
+++ b/fhp.c
@@ -3,29 +3,7 @@
#define UNUSED(a) ((void) (a))
-//
-// rfc7230, Appendix B
-// https://tools.ietf.org/html/rfc7230
-//
-// tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
-// "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
-// token = 1*tchar
-//
-#define CASE_TOKEN_CHARS \
- case '!': \
- case '#': \
- case '$': \
- case '%': \
- case '&': \
- case '\'': \
- case '*': \
- case '+': \
- case '-': \
- case '.': \
- case '^': \
- case '_': \
- case '|': \
- case '~': \
+#define CASE_ALNUM_CHARS \
case '0': \
case '1': \
case '2': \
@@ -89,6 +67,99 @@
case 'Y': \
case 'Z':
+#define CASE_DIGIT_CHARS \
+ case '0': \
+ case '1': \
+ case '2': \
+ case '3': \
+ case '4': \
+ case '5': \
+ case '6': \
+ case '7': \
+ case '8': \
+ case '9':
+
+#define CASE_HEX_ALPHA_CHARS \
+ case 'a': \
+ case 'b': \
+ case 'c': \
+ case 'd': \
+ case 'e': \
+ case 'f': \
+ case 'A': \
+ case 'B': \
+ case 'C': \
+ case 'D': \
+ case 'E': \
+ case 'F':
+
+//
+// rfc7230, Appendix B
+// https://tools.ietf.org/html/rfc7230
+//
+// tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
+// "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
+// token = 1*tchar
+//
+#define CASE_TOKEN_CHARS \
+ CASE_ALNUM_CHARS \
+ case '!': \
+ case '#': \
+ case '$': \
+ case '%': \
+ case '&': \
+ case '\'': \
+ case '*': \
+ case '+': \
+ case '-': \
+ case '.': \
+ case '^': \
+ case '_': \
+ case '|': \
+ case '~': \
+
+#define CASE_URL_CHARS \
+ CASE_ALNUM_CHARS \
+ case '!': \
+ case '"': \
+ case '#': \
+ case '$': \
+ case '&': \
+ case '\'': \
+ case '(': \
+ case ')': \
+ case '*': \
+ case '+': \
+ case ',': \
+ case '-': \
+ case '.': \
+ case '/': \
+ case ':': \
+ case ';': \
+ case '<': \
+ case '=': \
+ case '>': \
+ case '?': \
+ case '@': \
+ case '[': \
+ case '\\': \
+ case ']': \
+ case '^': \
+ case '_': \
+ case '`': \
+ case '{': \
+ case '|': \
+ case '}': \
+ case '~':
+
+#define CASE_VISUAL_CHARS \
+ CASE_URL_CHARS \
+ case '%':
+
+#define CASE_VERSION_CHARS \
+ CASE_TOKEN_CHARS \
+ case '/':
+
//
// rfc7230, Appendix B
// https://tools.ietf.org/html/rfc7230
@@ -105,6 +176,10 @@ fhp_errors[] = {
"callback error",
"bad state",
"invalid character",
+ "invalid character in HTTP method",
+ "invalid character in HTTP URL",
+ "invalid percent-encoded character in HTTP URL",
+ "invalid character in HTTP version",
"invalid error code",
"buffer too small",
};
@@ -265,8 +340,7 @@ retry:
break;
default:
- // FIXME: invalid character
- return FHP_ERR_INVALID_CHAR;
+ return FHP_ERR_INVALID_CHAR_IN_METHOD;
}
break;
@@ -293,8 +367,7 @@ retry:
break;
default:
- // FIXME: invalid character
- return FHP_ERR_INVALID_CHAR;
+ return FHP_ERR_INVALID_CHAR_IN_METHOD;
}
break;
@@ -304,12 +377,14 @@ retry:
// FIXME: do we want to allow more than one whitespace?
// ignore
break;
- default:
+ CASE_URL_CHARS
if (!fhp->cb(fhp, FHP_TOKEN_URL_START, 0, 0))
return FHP_ERR_CB;
fhp->state = FHP_STATE_URL;
goto retry;
+ default:
+ return FHP_ERR_INVALID_CHAR_IN_URL;
}
break;
@@ -329,13 +404,55 @@ retry:
goto retry;
break;
- default:
+ case '%':
+ // set state
+ fhp->state = FHP_STATE_URL_PERCENT;
+
+ break;
+ CASE_URL_CHARS
// add to buffer
if (!fhp_buf_push(fhp, FHP_TOKEN_URL_FRAGMENT, byte))
return FHP_ERR_CB;
}
break;
+ case FHP_STATE_URL_PERCENT:
+ switch (byte) {
+ CASE_DIGIT_CHARS
+ fhp->hex = byte - '0';
+ fhp->state = FHP_STATE_URL_PERCENT_LAST;
+
+ break;
+ CASE_HEX_ALPHA_CHARS
+ fhp->hex = 10 + byte - ((byte >= 'a') ? 'a' : 'A');
+ fhp->state = FHP_STATE_URL_PERCENT_LAST;
+
+ break;
+ default:
+ return FHP_ERR_INVALID_CHAR_IN_URL_PERCENT;
+ }
+
+ break;
+ case FHP_STATE_URL_PERCENT_LAST:
+ switch (byte) {
+ CASE_DIGIT_CHARS
+ fhp->hex = (fhp->hex << 4) + (byte - '0');
+ break;
+ CASE_HEX_ALPHA_CHARS
+ fhp->hex = (fhp->hex << 4) + (10 + byte - ((byte >= 'a') ? 'a' : 'A'));
+ break;
+ default:
+ return FHP_ERR_INVALID_CHAR_IN_URL_PERCENT;
+ }
+
+ // add to buffer
+ if (!fhp_buf_push(fhp, FHP_TOKEN_URL_FRAGMENT, fhp->hex))
+ return FHP_ERR_CB;
+
+ // set state
+ fhp->state = FHP_STATE_URL;
+
+ break;
case FHP_STATE_URL_END:
switch (byte) {
case ' ':
@@ -367,15 +484,14 @@ retry:
goto retry;
break;
- case ' ':
- // FIXME: invalid character
- return FHP_ERR_INVALID_CHAR;
-
- break;
- default:
+ CASE_VERSION_CHARS
// add to buffer
if (!fhp_buf_push(fhp, FHP_TOKEN_VERSION_FRAGMENT, byte))
return FHP_ERR_CB;
+
+ break;
+ default:
+ return FHP_ERR_INVALID_CHAR_IN_VERSION;
}
break;
@@ -390,7 +506,7 @@ retry:
break;
default:
- // FIXME: invalid character
+ // never reached
return FHP_ERR_INVALID_CHAR;
};
@@ -642,3 +758,8 @@ fhp_push(
// return success
return FHP_OK;
}
+
+void *
+fhp_user_data(fhp_t * const fhp) {
+ return fhp->user_data;
+}
diff --git a/include/fhp/fhp.h b/include/fhp/fhp.h
index 3b7e280..72df8a5 100644
--- a/include/fhp/fhp.h
+++ b/include/fhp/fhp.h
@@ -10,6 +10,10 @@ typedef enum {
FHP_ERR_CB,
FHP_ERR_BAD_STATE,
FHP_ERR_INVALID_CHAR,
+ FHP_ERR_INVALID_CHAR_IN_METHOD,
+ FHP_ERR_INVALID_CHAR_IN_URL,
+ FHP_ERR_INVALID_CHAR_IN_URL_PERCENT,
+ FHP_ERR_INVALID_CHAR_IN_VERSION,
FHP_ERR_INVALID_ERROR,
FHP_ERR_BUFFER_TOO_SMALL,
FHP_ERR_LAST
@@ -60,6 +64,8 @@ typedef enum {
FHP_STATE_METHOD,
FHP_STATE_METHOD_END,
FHP_STATE_URL,
+ FHP_STATE_URL_PERCENT,
+ FHP_STATE_URL_PERCENT_LAST,
FHP_STATE_URL_END,
FHP_STATE_VERSION,
FHP_STATE_VERSION_END,
@@ -80,15 +86,27 @@ typedef enum {
#define FHP_BUF_SIZE 1024
struct fhp_t_ {
+ // current parser state
fhp_state_t state;
+
+ // user callback
fhp_cb_t cb;
- uint64_t ofs;
- fhp_err_t err;
+ // opaque user data
void *user_data;
+ // last error
+ fhp_err_t err;
+
+ // number of bytes read
+ uint64_t ofs;
+
+ // fragment data buffer
uint8_t buf[FHP_BUF_SIZE];
size_t buf_len;
+
+ // state for url hex decoder
+ uint32_t hex;
};
fhp_err_t
@@ -97,4 +115,7 @@ fhp_init(fhp_t * const, fhp_cb_t, void * const);
fhp_err_t
fhp_push(fhp_t * const, uint8_t * const, size_t);
+void *
+fhp_user_data(fhp_t * const);
+
#endif /* FHP_H */
diff --git a/test.c b/test.c
index 3749ce7..792316d 100644
--- a/test.c
+++ b/test.c
@@ -23,15 +23,19 @@ static void die(
exit(EXIT_FAILURE);
}
+/**************/
+/* basic test */
+/**************/
+
static const char *
-str_test_basic =
+basic_str =
"GET / HTTP/1.1\r\n"
"Host: pablotron.org\r\n"
"Connection: close\r\n"
"\r\n";
static bool
-test_basic_cb(
+basic_cb(
fhp_t *fhp,
fhp_token_t token,
uint8_t * const buf,
@@ -59,21 +63,94 @@ test_basic(void) {
fhp_err_t err;
fhp_t fhp;
- if ((err = fhp_init(&fhp, test_basic_cb, NULL)) != FHP_OK) {
+ // init parser
+ if ((err = fhp_init(&fhp, basic_cb, NULL)) != FHP_OK) {
die("test_basic", "fhp_init", err);
}
- size_t len = strlen(str_test_basic);
- if ((err = fhp_push(&fhp, (uint8_t*) str_test_basic, len)) != FHP_OK) {
+ // parse data
+ size_t len = strlen(basic_str);
+ if ((err = fhp_push(&fhp, (uint8_t*) basic_str, len)) != FHP_OK) {
die("test_basic", "fhp_push", err);
}
}
+/****************/
+/* percent test */
+/****************/
+
+typedef struct {
+ uint8_t buf[1024];
+ size_t buf_len;
+} percent_data;
+
+static const char *
+percent_str =
+ "GET /foo%20bar HTTP/1.1\r\n"
+ "Host: pablotron.org\r\n"
+ "Connection: close\r\n"
+ "\r\n";
+
+static bool
+percent_cb(
+ fhp_t *fhp,
+ fhp_token_t token,
+ uint8_t * const buf,
+ size_t len
+) {
+ percent_data *data = fhp_user_data(fhp);
+
+ switch (token) {
+ case FHP_TOKEN_URL_START:
+ // clear buffer
+ data->buf_len = 0;
+
+ break;
+ case FHP_TOKEN_URL_FRAGMENT:
+ // buffer overflow, do not use in real code!!!
+ memcpy(data->buf + data->buf_len, buf, len);
+ data->buf_len += len;
+
+ break;
+ case FHP_TOKEN_URL_END:
+ // terminate and print buffer
+ data->buf[data->buf_len] = '\0';
+ fprintf(stderr, "decoded URL: \"%s\"\n", data->buf);
+
+ break;
+ default:
+ // do nothing
+ NULL;
+ }
+
+ // return success
+ return true;
+}
+
+static void
+test_percent(void) {
+ fhp_err_t err;
+ fhp_t fhp;
+ percent_data data;
+
+ // init parser
+ if ((err = fhp_init(&fhp, percent_cb, &data)) != FHP_OK) {
+ die("test_percent", "fhp_init", err);
+ }
+
+ // parse data
+ size_t len = strlen(percent_str);
+ if ((err = fhp_push(&fhp, (uint8_t*) percent_str, len)) != FHP_OK) {
+ die("test_percent", "fhp_push", err);
+ }
+}
+
int main(int argc, char *argv[]) {
UNUSED(argc);
UNUSED(argv);
test_basic();
+ test_percent();
return EXIT_SUCCESS;
}