Mercurial > njs
changeset 161:323f00dc9879
decodeURI() and decodeURIComponent() functions.
| author | Igor Sysoev <igor@sysoev.ru> |
|---|---|
| date | Tue, 30 Aug 2016 12:05:46 +0300 |
| parents | d63ecb57f164 |
| children | 47f4830c3d22 |
| files | njs/njs_builtin.c njs/njs_generator.c njs/njs_lexer_keyword.c njs/njs_parser.c njs/njs_parser.h njs/njs_string.c njs/njs_string.h njs/njs_vm.c njs/njs_vm.h njs/test/njs_unit_test.c |
| diffstat | 10 files changed, 264 insertions(+), 5 deletions(-) [+] |
line wrap: on
line diff
--- a/njs/njs_builtin.c Tue Aug 30 12:02:31 2016 +0300 +++ b/njs/njs_builtin.c Tue Aug 30 12:05:46 2016 +0300 @@ -90,6 +90,8 @@ NULL, /* parseFloat */ NULL, /* encodeURI */ NULL, /* encodeURIComponent */ + NULL, /* decodeURI */ + NULL, /* decodeURIComponent */ }; static const njs_function_init_t native_functions[] = { @@ -103,6 +105,8 @@ { njs_number_parse_float, { NJS_SKIP_ARG, NJS_STRING_ARG } }, { njs_string_encode_uri, { NJS_SKIP_ARG, NJS_STRING_ARG } }, { njs_string_encode_uri_component, { NJS_SKIP_ARG, NJS_STRING_ARG } }, + { njs_string_decode_uri, { NJS_SKIP_ARG, NJS_STRING_ARG } }, + { njs_string_decode_uri_component, { NJS_SKIP_ARG, NJS_STRING_ARG } }, }; static const njs_object_prop_t null_proto_property = {
--- a/njs/njs_generator.c Tue Aug 30 12:02:31 2016 +0300 +++ b/njs/njs_generator.c Tue Aug 30 12:05:46 2016 +0300 @@ -302,6 +302,8 @@ case NJS_TOKEN_PARSE_FLOAT: case NJS_TOKEN_ENCODE_URI: case NJS_TOKEN_ENCODE_URI_COMPONENT: + case NJS_TOKEN_DECODE_URI: + case NJS_TOKEN_DECODE_URI_COMPONENT: return njs_generate_builtin_object(vm, parser, node); case NJS_TOKEN_FUNCTION:
--- a/njs/njs_lexer_keyword.c Tue Aug 30 12:02:31 2016 +0300 +++ b/njs/njs_lexer_keyword.c Tue Aug 30 12:05:46 2016 +0300 @@ -95,6 +95,8 @@ { nxt_string("parseFloat"), NJS_TOKEN_PARSE_FLOAT, 0 }, { nxt_string("encodeURI"), NJS_TOKEN_ENCODE_URI, 0 }, { nxt_string("encodeURIComponent"), NJS_TOKEN_ENCODE_URI_COMPONENT, 0 }, + { nxt_string("decodeURI"), NJS_TOKEN_DECODE_URI, 0 }, + { nxt_string("decodeURIComponent"), NJS_TOKEN_DECODE_URI_COMPONENT, 0 }, /* Reserved words. */
--- a/njs/njs_parser.c Tue Aug 30 12:02:31 2016 +0300 +++ b/njs/njs_parser.c Tue Aug 30 12:05:46 2016 +0300 @@ -1673,6 +1673,8 @@ case NJS_TOKEN_PARSE_FLOAT: case NJS_TOKEN_ENCODE_URI: case NJS_TOKEN_ENCODE_URI_COMPONENT: + case NJS_TOKEN_DECODE_URI: + case NJS_TOKEN_DECODE_URI_COMPONENT: return njs_parser_builtin_function(vm, parser, node); default:
--- a/njs/njs_parser.h Tue Aug 30 12:02:31 2016 +0300 +++ b/njs/njs_parser.h Tue Aug 30 12:05:46 2016 +0300 @@ -183,14 +183,16 @@ NJS_TOKEN_PARSE_FLOAT, NJS_TOKEN_ENCODE_URI, NJS_TOKEN_ENCODE_URI_COMPONENT, + NJS_TOKEN_DECODE_URI, + NJS_TOKEN_DECODE_URI_COMPONENT, NJS_TOKEN_RESERVED, } njs_token_t; typedef struct { - njs_token_t token:8; - njs_token_t prev_token:8; + njs_token_t token:16; + njs_token_t prev_token:16; uint8_t property; /* 1 bit */ uint32_t key_hash; @@ -222,8 +224,8 @@ typedef struct njs_parser_node_s njs_parser_node_t; struct njs_parser_node_s { - njs_token_t token:8; - njs_variable_node_state_t state:8; /* 2 bits */ + njs_token_t token:16; + njs_variable_node_state_t state:2; /* 2 bits */ uint8_t ctor:1; /* 1 bit */ uint8_t temporary; /* 1 bit */ uint32_t token_line;
--- a/njs/njs_string.c Tue Aug 30 12:02:31 2016 +0300 +++ b/njs/njs_string.c Tue Aug 30 12:05:46 2016 +0300 @@ -47,6 +47,8 @@ u_char *start, size_t size, nxt_uint_t utf8); static njs_ret_t njs_string_encode(njs_vm_t *vm, njs_value_t *value, const uint32_t *escape); +static njs_ret_t njs_string_decode(njs_vm_t *vm, njs_value_t *value, + const uint32_t *reserve); njs_ret_t @@ -2238,6 +2240,210 @@ } +/* + * decodeURI(string) + */ + +njs_ret_t +njs_string_decode_uri(njs_vm_t *vm, njs_value_t *args, nxt_uint_t nargs, + njs_index_t unused) +{ + static const uint32_t reserve[] = { + 0x00000000, /* 0000 0000 0000 0000 0000 0000 0000 0000 */ + + /* ?>=< ;:98 7654 3210 /.-, +*)( '&%$ #"! */ + 0xac009858, /* 1010 1100 0000 0000 1001 1000 0101 1000 */ + + /* _^]\ [ZYX WVUT SRQP ONML KJIH GFED CBA@ */ + 0x00000001, /* 0000 0000 0000 0000 0000 0000 0000 0001 */ + + /* ~}| {zyx wvut srqp onml kjih gfed cba` */ + 0x00000000, /* 0000 0000 0000 0000 0000 0000 0000 0000 */ + + 0x00000000, /* 0000 0000 0000 0000 0000 0000 0000 0000 */ + 0x00000000, /* 0000 0000 0000 0000 0000 0000 0000 0000 */ + 0x00000000, /* 0000 0000 0000 0000 0000 0000 0000 0000 */ + 0x00000000, /* 0000 0000 0000 0000 0000 0000 0000 0000 */ + }; + + if (nargs > 1) { + return njs_string_decode(vm, &args[1], reserve); + } + + vm->retval = njs_string_void; + + return NXT_OK; +} + + +/* + * decodeURIComponent(string) + */ + +njs_ret_t +njs_string_decode_uri_component(njs_vm_t *vm, njs_value_t *args, + nxt_uint_t nargs, njs_index_t unused) +{ + static const uint32_t reserve[] = { + 0x00000000, /* 0000 0000 0000 0000 0000 0000 0000 0000 */ + + /* ?>=< ;:98 7654 3210 /.-, +*)( '&%$ #"! */ + 0x00000000, /* 0000 0000 0000 0000 0000 0000 0000 0000 */ + + /* _^]\ [ZYX WVUT SRQP ONML KJIH GFED CBA@ */ + 0x00000000, /* 0000 0000 0000 0000 0000 0000 0000 0000 */ + + /* ~}| {zyx wvut srqp onml kjih gfed cba` */ + 0x00000000, /* 0000 0000 0000 0000 0000 0000 0000 0000 */ + + 0x00000000, /* 0000 0000 0000 0000 0000 0000 0000 0000 */ + 0x00000000, /* 0000 0000 0000 0000 0000 0000 0000 0000 */ + 0x00000000, /* 0000 0000 0000 0000 0000 0000 0000 0000 */ + 0x00000000, /* 0000 0000 0000 0000 0000 0000 0000 0000 */ + }; + + if (nargs > 1) { + return njs_string_decode(vm, &args[1], reserve); + } + + vm->retval = njs_string_void; + + return NXT_OK; +} + + +static njs_ret_t +njs_string_decode(njs_vm_t *vm, njs_value_t *value, const uint32_t *reserve) +{ + int8_t d0, d1; + u_char byte, *start, *src, *dst; + size_t n, size; + ssize_t length; + nxt_bool_t utf8; + njs_string_prop_t string; + + static const int8_t hex[256] + nxt_aligned(32) = + { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + }; + + nxt_prefetch(&hex['0']); + nxt_prefetch(reserve); + + (void) njs_string_prop(&string, value); + + src = string.start; + n = 0; + + for (size = string.size; size != 0; size--) { + byte = *src++; + + if (byte == '%') { + if (size < 3) { + goto uri_error; + } + + d0 = hex[*src++]; + if (d0 < 0) { + goto uri_error; + } + + d1 = hex[*src++]; + if (d1 < 0) { + goto uri_error; + } + + byte = (d0 << 4) + d1; + + if ((reserve[byte >> 5] & ((uint32_t) 1 << (byte & 0x1f))) == 0) { + n += 2; + } + } + } + + if (n == 0) { + /* GC: retain src. */ + vm->retval = *value; + return NXT_OK; + } + + n = string.size - n; + + start = njs_string_alloc(vm, &vm->retval, n, n); + if (nxt_slow_path(start == NULL)) { + return NXT_ERROR; + } + + utf8 = 0; + dst = start; + size = string.size; + src = string.start; + + do { + byte = *src++; + + if (byte == '%') { + d0 = hex[*src++]; + d1 = hex[*src++]; + byte = (d0 << 4) + d1; + + utf8 |= (byte >= 0x80); + + if ((reserve[byte >> 5] & ((uint32_t) 1 << (byte & 0x1f))) != 0) { + size -= 2; + *dst++ = '%'; + *dst++ = src[-2]; + byte = src[-1]; + } + } + + *dst++ = byte; + + size--; + + } while (size != 0); + + if (utf8) { + length = nxt_utf8_length(start, n); + + if (length < 0) { + length = 0; + } + + if (vm->retval.short_string.size != NJS_STRING_LONG) { + vm->retval.short_string.length = length; + + } else { + vm->retval.data.u.string->length = length; + } + } + + return NXT_OK; + +uri_error: + + vm->exception = &njs_exception_uri_error; + + return NXT_ERROR; +} + + static nxt_int_t njs_values_hash_test(nxt_lvlhsh_query_t *lhq, void *data) {
--- a/njs/njs_string.h Tue Aug 30 12:02:31 2016 +0300 +++ b/njs/njs_string.h Tue Aug 30 12:05:46 2016 +0300 @@ -108,6 +108,10 @@ nxt_uint_t nargs, njs_index_t unused); njs_ret_t njs_string_encode_uri_component(njs_vm_t *vm, njs_value_t *args, nxt_uint_t nargs, njs_index_t unused); +njs_ret_t njs_string_decode_uri(njs_vm_t *vm, njs_value_t *args, + nxt_uint_t nargs, njs_index_t unused); +njs_ret_t njs_string_decode_uri_component(njs_vm_t *vm, njs_value_t *args, + nxt_uint_t nargs, njs_index_t unused); njs_index_t njs_value_index(njs_vm_t *vm, njs_parser_t *parser, const njs_value_t *src);
--- a/njs/njs_vm.c Tue Aug 30 12:02:31 2016 +0300 +++ b/njs/njs_vm.c Tue Aug 30 12:05:46 2016 +0300 @@ -141,6 +141,7 @@ const njs_value_t njs_exception_reference_error = njs_string("ReferenceError"); const njs_value_t njs_exception_type_error = njs_string("TypeError"); const njs_value_t njs_exception_range_error = njs_string("RangeError"); +const njs_value_t njs_exception_uri_error = njs_string("URIError"); const njs_value_t njs_exception_memory_error = njs_string("MemoryError"); const njs_value_t njs_exception_internal_error = njs_string("InternalError");
--- a/njs/njs_vm.h Tue Aug 30 12:02:31 2016 +0300 +++ b/njs/njs_vm.h Tue Aug 30 12:05:46 2016 +0300 @@ -712,7 +712,9 @@ NJS_FUNCTION_PARSE_FLOAT, NJS_FUNCTION_STRING_ENCODE_URI, NJS_FUNCTION_STRING_ENCODE_URI_COMPONENT, -#define NJS_FUNCTION_MAX (NJS_FUNCTION_STRING_ENCODE_URI_COMPONENT + 1) + NJS_FUNCTION_STRING_DECODE_URI, + NJS_FUNCTION_STRING_DECODE_URI_COMPONENT, +#define NJS_FUNCTION_MAX (NJS_FUNCTION_STRING_DECODE_URI_COMPONENT + 1) }; @@ -1005,6 +1007,7 @@ extern const njs_value_t njs_exception_reference_error; extern const njs_value_t njs_exception_type_error; extern const njs_value_t njs_exception_range_error; +extern const njs_value_t njs_exception_uri_error; extern const njs_value_t njs_exception_memory_error; extern const njs_value_t njs_exception_internal_error;
--- a/njs/test/njs_unit_test.c Tue Aug 30 12:02:31 2016 +0300 +++ b/njs/test/njs_unit_test.c Tue Aug 30 12:05:46 2016 +0300 @@ -3309,6 +3309,39 @@ { nxt_string("encodeURIComponent('~}|{`_^]\\\\[@?>=<;:/.-,+*)(\\\'&%$#\"! ')"), nxt_string("~%7D%7C%7B%60_%5E%5D%5C%5B%40%3F%3E%3D%3C%3B%3A%2F.-%2C%2B*)('%26%25%24%23%22!%20")}, + { nxt_string("decodeURI()"), + nxt_string("undefined")}, + + { nxt_string("decodeURI('%QQ')"), + nxt_string("URIError")}, + + { nxt_string("decodeURI('%')"), + nxt_string("URIError")}, + + { nxt_string("decodeURI('%0')"), + nxt_string("URIError")}, + + { nxt_string("decodeURI('%00')"), + nxt_string("\0")}, + + { nxt_string("decodeURI('%3012%D0%B0%D0%B1%D0%B2')"), + nxt_string("012абв")}, + + { nxt_string("decodeURI('%7e%7d%7c%7b%60%5f%5e%5d%5c%5b%40%3f%3e%3d%3c%3b%3a%2f%2e%2c%2b%2a%29%28%27%26%25%24%23%22%21%20')"), + nxt_string("~}|{`_^]\\[%40%3f>%3d<%3b%3a%2f.%2c%2b*)('%26%%24%23\"! ")}, + + { nxt_string("decodeURIComponent('%7e%7d%7c%7b%60%5f%5e%5d%5c%5b%40%3f%3e%3d%3c%3b%3a%2f%2e%2c%2b%2a%29%28%27%26%25%24%23%22%21%20')"), + nxt_string("~}|{`_^]\\[@?>=<;:/.,+*)('&%$#\"! ")}, + + { nxt_string("decodeURI('%41%42%43').length"), + nxt_string("3")}, + + { nxt_string("decodeURI('%D0%B0%D0%B1%D0%B2').length"), + nxt_string("3")}, + + { nxt_string("decodeURI('%80%81%82').length"), + nxt_string("3")}, + /* Functions. */ { nxt_string("return"),
