Mercurial > njs
changeset 1485:bcd1a41c6a67
Introduced TextEncoder/TextDecoder implementation.
According to WHATWG encoding spec.
| author | Alexander Borisov <alexander.borisov@nginx.com> |
|---|---|
| date | Tue, 28 Jul 2020 16:58:59 +0300 |
| parents | 0fad09ddb37a |
| children | a0f2c61c1c83 |
| files | auto/sources src/njs_builtin.c src/njs_encoding.c src/njs_encoding.h src/njs_main.h src/njs_object_hash.h src/njs_typed_array.c src/njs_typed_array.h src/njs_unicode.h src/njs_value.h src/njs_vm.h src/test/njs_unit_test.c |
| diffstat | 12 files changed, 1072 insertions(+), 30 deletions(-) [+] |
line wrap: on
line diff
--- a/auto/sources Thu Jul 30 17:47:05 2020 +0000 +++ b/auto/sources Tue Jul 28 16:58:59 2020 +0300 @@ -56,6 +56,7 @@ src/njs_typed_array.c \ src/njs_promise.c \ src/njs_query_string.c \ + src/njs_encoding.c \ " NJS_LIB_TEST_SRCS=" \
--- a/src/njs_builtin.c Thu Jul 30 17:47:05 2020 +0000 +++ b/src/njs_builtin.c Tue Jul 28 16:58:59 2020 +0300 @@ -71,6 +71,8 @@ &njs_date_type_init, &njs_promise_type_init, &njs_array_buffer_type_init, + &njs_text_decoder_type_init, + &njs_text_encoder_type_init, /* Hidden types. */ @@ -1283,6 +1285,26 @@ { .type = NJS_PROPERTY_HANDLER, + .name = njs_string("TextDecoder"), + .value = njs_prop_handler2(njs_top_level_constructor, + NJS_OBJ_TYPE_TEXT_DECODER, + NJS_TEXT_DECODER_HASH), + .writable = 1, + .configurable = 1, + }, + + { + .type = NJS_PROPERTY_HANDLER, + .name = njs_string("TextEncoder"), + .value = njs_prop_handler2(njs_top_level_constructor, + NJS_OBJ_TYPE_TEXT_ENCODER, + NJS_TEXT_ENCODER_HASH), + .writable = 1, + .configurable = 1, + }, + + { + .type = NJS_PROPERTY_HANDLER, .name = njs_string("Uint8Array"), .value = njs_prop_handler2(njs_top_level_constructor, NJS_OBJ_TYPE_UINT8_ARRAY,
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/njs_encoding.c Tue Jul 28 16:58:59 2020 +0300 @@ -0,0 +1,804 @@ + +/* + * Copyright (C) Alexander Borisov + * Copyright (C) NGINX, Inc. + */ + + +#include <njs_main.h> + + +typedef enum { + NJS_ENCODING_UTF8, +} njs_encoding_t; + + +typedef struct { + njs_encoding_t encoding; + njs_bool_t fatal; + njs_bool_t ignore_bom; + + uint32_t codepoint; + njs_unicode_decode_t ctx; +} njs_encoding_decode_t; + + +typedef struct { + njs_str_t name; + njs_encoding_t encoding; +} njs_encoding_label_t; + + +static njs_encoding_label_t njs_encoding_labels[] = +{ + { njs_str("utf-8"), NJS_ENCODING_UTF8 }, + { njs_str("utf8") , NJS_ENCODING_UTF8 }, + { njs_null_str, 0 } +}; + + +static njs_int_t njs_text_encoder_encode_utf8(njs_vm_t *vm, + njs_string_prop_t *prop); +static njs_int_t njs_text_decoder_arg_encoding(njs_vm_t *vm, njs_value_t *args, + njs_uint_t nargs, njs_encoding_decode_t *data); +static njs_int_t njs_text_decoder_arg_options(njs_vm_t *vm, njs_value_t *args, + njs_uint_t nargs, njs_encoding_decode_t *data); + + +static njs_int_t +njs_text_encoder_constructor(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs, + njs_index_t unused) +{ + njs_object_t *proto; + njs_object_value_t *ov; + + if (!vm->top_frame->ctor) { + njs_type_error(vm, "Constructor of TextEncoder requires 'new'"); + return NJS_ERROR; + } + + ov = njs_mp_alloc(vm->mem_pool, sizeof(njs_object_value_t)); + if (njs_slow_path(ov == NULL)) { + njs_memory_error(vm); + return NJS_ERROR; + } + + proto = &vm->prototypes[NJS_OBJ_TYPE_TEXT_ENCODER].object; + + njs_lvlhsh_init(&ov->object.hash); + njs_lvlhsh_init(&ov->object.shared_hash); + ov->object.type = NJS_OBJECT_VALUE; + ov->object.shared = 0; + ov->object.extensible = 1; + ov->object.error_data = 0; + ov->object.fast_array = 0; + ov->object.__proto__ = proto; + ov->object.slots = NULL; + + njs_set_data(&ov->value, NULL, NJS_DATA_TAG_TEXT_ENCODER); + njs_set_object_value(&vm->retval, ov); + + return NJS_OK; +} + + +static njs_int_t +njs_text_encoder_encode(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs, + njs_index_t unused) +{ + u_char *dst; + int64_t size; + uint32_t cp; + njs_int_t ret; + njs_value_t *this, *input, value; + const u_char *p, *start, *end; + njs_string_prop_t prop; + njs_typed_array_t *array; + njs_unicode_decode_t ctx; + + this = njs_argument(args, 0); + + if (njs_slow_path(!njs_is_object_data(this, NJS_DATA_TAG_TEXT_ENCODER))) { + njs_type_error(vm, "\"this\" is not a TextEncoder"); + return NJS_ERROR; + } + + start = NULL; + end = NULL; + + if (nargs > 1) { + input = njs_argument(args, 1); + + if (njs_slow_path(!njs_is_string(input))) { + ret = njs_value_to_string(vm, input, input); + if (njs_slow_path(ret != NJS_OK)) { + return ret; + } + } + + (void) njs_string_prop(&prop, input); + + if (prop.length != 0) { + return njs_text_encoder_encode_utf8(vm, &prop); + } + + start = prop.start; + end = start + prop.size; + } + + p = start; + + cp = 0; + size = 0; + + njs_utf8_decode_init(&ctx); + + while (p < end) { + cp = njs_utf8_decode(&ctx, &p, end); + + if (cp > NJS_UNICODE_MAX_CODEPOINT) { + if (cp == NJS_UNICODE_CONTINUE) { + continue; + } + + cp = NJS_UNICODE_REPLACEMENT; + } + + size += njs_utf8_size(cp); + } + + if (cp == NJS_UNICODE_CONTINUE) { + size += njs_utf8_size(NJS_UNICODE_REPLACEMENT); + } + + njs_set_number(&value, size); + + array = njs_typed_array_alloc(vm, &value, 1, NJS_OBJ_TYPE_UINT8_ARRAY); + if (njs_slow_path(array == NULL)) { + return NJS_ERROR; + } + + dst = njs_typed_array_buffer(array)->u.u8; + njs_utf8_decode_init(&ctx); + + while (start < end) { + cp = njs_utf8_decode(&ctx, &start, end); + + if (cp > NJS_UNICODE_MAX_CODEPOINT) { + if (cp == NJS_UNICODE_CONTINUE) { + continue; + } + + cp = NJS_UNICODE_REPLACEMENT; + } + + dst = njs_utf8_encode(dst, cp); + } + + if (cp == NJS_UNICODE_CONTINUE) { + (void) njs_utf8_encode(dst, NJS_UNICODE_REPLACEMENT); + } + + njs_set_typed_array(&vm->retval, array); + + return NJS_OK; +} + + +static njs_int_t +njs_text_encoder_encode_utf8(njs_vm_t *vm, njs_string_prop_t *prop) +{ + njs_value_t value; + njs_typed_array_t *array; + + njs_set_number(&value, prop->size); + + array = njs_typed_array_alloc(vm, &value, 1, NJS_OBJ_TYPE_UINT8_ARRAY); + if (njs_slow_path(array == NULL)) { + return NJS_ERROR; + } + + memcpy(njs_typed_array_buffer(array)->u.u8, prop->start, prop->size); + + njs_set_typed_array(&vm->retval, array); + + return NJS_OK; +} + + +static njs_int_t +njs_text_encoder_encode_into(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs, + njs_index_t unused) +{ + u_char *to, *to_end; + size_t size; + uint32_t cp; + njs_int_t ret; + njs_str_t str; + njs_value_t *this, *input, *dest, retval, read, written; + const u_char *start, *end; + njs_typed_array_t *array; + njs_unicode_decode_t ctx; + + static const njs_value_t read_str = njs_string("read"); + static const njs_value_t written_str = njs_string("written"); + + this = njs_argument(args, 0); + input = njs_arg(args, nargs, 1); + dest = njs_arg(args, nargs, 2); + + if (njs_slow_path(!njs_is_object_data(this, NJS_DATA_TAG_TEXT_ENCODER))) { + njs_type_error(vm, "\"this\" is not a TextEncoder"); + return NJS_ERROR; + } + + if (njs_slow_path(!njs_is_string(input))) { + ret = njs_value_to_string(vm, &retval, input); + if (njs_slow_path(ret != NJS_OK)) { + return ret; + } + + input = &retval; + } + + if (njs_slow_path(!njs_is_typed_array_uint8(dest))) { + njs_type_error(vm, "The \"destination\" argument must be an instance " + "of Uint8Array"); + return NJS_ERROR; + } + + njs_string_get(input, &str); + + start = str.start; + end = start + str.length; + + array = njs_typed_array(dest); + to = njs_typed_array_buffer(array)->u.u8; + to_end = to + array->byte_length; + + cp = 0; + njs_set_number(&read, 0); + njs_set_number(&written, 0); + + njs_utf8_decode_init(&ctx); + + while (start < end) { + cp = njs_utf8_decode(&ctx, &start, end); + + if (cp > NJS_UNICODE_MAX_CODEPOINT) { + cp = NJS_UNICODE_REPLACEMENT; + } + + size = njs_utf8_size(cp); + + if (to + size > to_end) { + break; + } + + njs_number(&read) += (cp > 0xFFFF) ? 2 : 1; + njs_number(&written) += size; + + to = njs_utf8_encode(to, cp); + } + + return njs_vm_object_alloc(vm, &vm->retval, &read_str, &read, + &written_str, &written, NULL); +} + + +static const njs_object_prop_t njs_text_encoder_properties[] = +{ + { + .type = NJS_PROPERTY_HANDLER, + .name = njs_string("constructor"), + .value = njs_prop_handler(njs_object_prototype_create_constructor), + .writable = 1, + .configurable = 1, + }, + + { + .type = NJS_PROPERTY, + .name = njs_string("encoding"), + .value = njs_string("utf-8"), + }, + + { + .type = NJS_PROPERTY, + .name = njs_string("encode"), + .value = njs_native_function(njs_text_encoder_encode, 0), + .writable = 1, + .configurable = 1, + }, + + { + .type = NJS_PROPERTY, + .name = njs_string("encodeInto"), + .value = njs_native_function(njs_text_encoder_encode_into, 2), + .writable = 1, + .configurable = 1, + }, +}; + + +const njs_object_init_t njs_text_encoder_init = { + njs_text_encoder_properties, + njs_nitems(njs_text_encoder_properties), +}; + + +static const njs_object_prop_t njs_text_encoder_constructor_properties[] = +{ + { + .type = NJS_PROPERTY, + .name = njs_string("name"), + .value = njs_string("TextEncoder"), + .configurable = 1, + }, + + { + .type = NJS_PROPERTY, + .name = njs_string("length"), + .value = njs_value(NJS_NUMBER, 0, 0.0), + .configurable = 1, + }, + + { + .type = NJS_PROPERTY_HANDLER, + .name = njs_string("prototype"), + .value = njs_prop_handler(njs_object_prototype_create), + }, +}; + + +const njs_object_init_t njs_text_encoder_constructor_init = { + njs_text_encoder_constructor_properties, + njs_nitems(njs_text_encoder_constructor_properties), +}; + + +const njs_object_type_init_t njs_text_encoder_type_init = { + .constructor = njs_native_ctor(njs_text_encoder_constructor, 0, 0), + .prototype_props = &njs_text_encoder_init, + .constructor_props = &njs_text_encoder_constructor_init, + .prototype_value = { .object = { .type = NJS_OBJECT } }, +}; + + +static njs_int_t +njs_text_decoder_constructor(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs, + njs_index_t unused) +{ + njs_int_t ret; + njs_object_t *proto; + njs_object_value_t *ov; + njs_encoding_decode_t *data; + + if (!vm->top_frame->ctor) { + njs_type_error(vm, "Constructor of TextDecoder requires 'new'"); + return NJS_ERROR; + } + + ov = njs_mp_alloc(vm->mem_pool, sizeof(njs_object_value_t) + + sizeof(njs_encoding_decode_t)); + if (njs_slow_path(ov == NULL)) { + njs_memory_error(vm); + return NJS_ERROR; + } + + proto = &vm->prototypes[NJS_OBJ_TYPE_TEXT_DECODER].object; + + njs_lvlhsh_init(&ov->object.hash); + njs_lvlhsh_init(&ov->object.shared_hash); + ov->object.type = NJS_OBJECT_VALUE; + ov->object.shared = 0; + ov->object.extensible = 1; + ov->object.error_data = 0; + ov->object.fast_array = 0; + ov->object.__proto__ = proto; + ov->object.slots = NULL; + + data = (njs_encoding_decode_t *) ((uint8_t *) ov + + sizeof(njs_object_value_t)); + + ret = njs_text_decoder_arg_encoding(vm, args, nargs, data); + if (njs_slow_path(ret != NJS_OK)) { + return ret; + } + + ret = njs_text_decoder_arg_options(vm, args, nargs, data); + if (njs_slow_path(ret != NJS_OK)) { + return ret; + } + + data->codepoint = 0; + njs_utf8_decode_init(&data->ctx); + + njs_set_data(&ov->value, data, NJS_DATA_TAG_TEXT_DECODER); + njs_set_object_value(&vm->retval, ov); + + return NJS_OK; +} + + +static njs_int_t +njs_text_decoder_arg_encoding(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs, + njs_encoding_decode_t *data) +{ + njs_str_t str; + njs_int_t ret; + njs_value_t *value; + njs_encoding_label_t *label; + + if (nargs < 2) { + data->encoding = NJS_ENCODING_UTF8; + return NJS_OK; + } + + value = njs_argument(args, 1); + + if (njs_slow_path(!njs_is_string(value))) { + ret = njs_value_to_string(vm, value, value); + if (njs_slow_path(ret != NJS_OK)) { + return ret; + } + } + + njs_string_get(value, &str); + + for (label = &njs_encoding_labels[0]; label->name.length != 0; label++) { + if (njs_strstr_eq(&str, &label->name)) { + data->encoding = label->encoding; + return NJS_OK; + } + } + + njs_range_error(vm, "The \"%V\" encoding is not supported", &str); + + return NJS_ERROR; +} + + +static njs_int_t +njs_text_decoder_arg_options(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs, + njs_encoding_decode_t *data) +{ + njs_int_t ret; + njs_value_t retval, *value; + + static const njs_value_t fatal_str = njs_string("fatal"); + static const njs_value_t ignore_bom_str = njs_string("ignoreBOM"); + + if (nargs < 3) { + data->fatal = 0; + data->ignore_bom = 0; + + return NJS_OK; + } + + value = njs_argument(args, 2); + + if (njs_slow_path(!njs_is_object(value))) { + njs_type_error(vm, "The \"options\" argument must be of type object"); + return NJS_ERROR; + } + + ret = njs_value_property(vm, value, njs_value_arg(&fatal_str), &retval); + if (njs_slow_path(ret == NJS_ERROR)) { + return ret; + } + + data->fatal = njs_bool(&retval); + + ret = njs_value_property(vm, value, njs_value_arg(&ignore_bom_str), + &retval); + if (njs_slow_path(ret == NJS_ERROR)) { + return ret; + } + + data->ignore_bom = njs_bool(&retval); + + return NJS_OK; +} + + +static njs_int_t +njs_text_decoder_encoding(njs_vm_t *vm, njs_object_prop_t *prop, + njs_value_t *value, njs_value_t *setval, njs_value_t *retval) +{ + njs_encoding_decode_t *data; + + static const njs_value_t utf8_str = njs_string("utf-8"); + + if (njs_slow_path(!njs_is_object_data(value, NJS_DATA_TAG_TEXT_DECODER))) { + njs_set_undefined(retval); + return NJS_DECLINED; + } + + data = njs_object_data(value); + + switch (data->encoding) { + case NJS_ENCODING_UTF8: + *retval = utf8_str; + break; + + default: + njs_type_error(vm, "unknown encoding"); + return NJS_ERROR; + } + + return NJS_OK; +} + + +static njs_int_t +njs_text_decoder_fatal(njs_vm_t *vm, njs_object_prop_t *prop, + njs_value_t *value, njs_value_t *setval, njs_value_t *retval) +{ + njs_encoding_decode_t *data; + + if (njs_slow_path(!njs_is_object_data(value, NJS_DATA_TAG_TEXT_DECODER))) { + njs_set_undefined(retval); + return NJS_DECLINED; + } + + data = njs_object_data(value); + + njs_set_boolean(retval, data->fatal); + + return NJS_OK; +} + + +static njs_int_t +njs_text_decoder_ignore_bom(njs_vm_t *vm, njs_object_prop_t *prop, + njs_value_t *value, njs_value_t *setval, njs_value_t *retval) +{ + njs_encoding_decode_t *data; + + if (njs_slow_path(!njs_is_object_data(value, NJS_DATA_TAG_TEXT_DECODER))) { + njs_set_undefined(retval); + return NJS_DECLINED; + } + + data = njs_object_data(value); + + njs_set_boolean(retval, data->ignore_bom); + + return NJS_OK; +} + + +static njs_int_t +njs_text_decoder_decode(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs, + njs_index_t unused) +{ + u_char *dst; + uint32_t length, cp; + uint64_t size; + njs_int_t ret; + njs_bool_t stream; + njs_value_t retval, *this, *typed_array, *options; + const u_char *start, *end, *p; + njs_unicode_decode_t ctx; + njs_encoding_decode_t *data; + const njs_typed_array_t *array; + + static const njs_value_t stream_str = njs_string("stream"); + + start = NULL; + end = NULL; + + stream = 0; + + this = njs_argument(args, 0); + + if (njs_slow_path(!njs_is_object_data(this, NJS_DATA_TAG_TEXT_DECODER))) { + njs_type_error(vm, "\"this\" is not a TextDecoder"); + return NJS_ERROR; + } + + if (njs_fast_path(nargs > 1)) { + typed_array = njs_argument(args, 1); + if (njs_slow_path(!njs_is_typed_array(typed_array))) { + njs_type_error(vm, "The \"input\" argument must be an instance " + "of TypedArray"); + return NJS_ERROR; + } + + array = njs_typed_array(typed_array); + + start = array->buffer->u.u8; + end = start + array->byte_length; + } + + if (nargs > 2) { + options = njs_argument(args, 2); + + if (njs_slow_path(!njs_is_object(options))) { + njs_type_error(vm, "The \"options\" argument must be " + "of type object"); + return NJS_ERROR; + } + + ret = njs_value_property(vm, options, njs_value_arg(&stream_str), + &retval); + if (njs_slow_path(ret == NJS_ERROR)) { + return ret; + } + + stream = njs_bool(&retval); + } + + data = njs_object_data(this); + + ctx = data->ctx; + cp = data->codepoint; + + size = 0; + length = 0; + + p = start; + + /* Looking for BOM. */ + + if (!data->ignore_bom && p + 3 <= end) { + cp = njs_utf8_decode(&ctx, &p, end); + + if (cp == NJS_UNICODE_BOM) { + start = p; + + } else { + p = start; + } + } + + while (p < end) { + cp = njs_utf8_decode(&ctx, &p, end); + + if (njs_slow_path(cp > NJS_UNICODE_MAX_CODEPOINT)) { + if (cp == NJS_UNICODE_CONTINUE) { + break; + } + + if (data->fatal) { + goto fatal; + } + + cp = NJS_UNICODE_REPLACEMENT; + } + + size += njs_utf8_size(cp); + length++; + } + + if (cp == NJS_UNICODE_CONTINUE && !stream) { + if (data->fatal) { + goto fatal; + } + + size += njs_utf8_size(NJS_UNICODE_REPLACEMENT); + length++; + } + + dst = njs_string_alloc(vm, &vm->retval, size, length); + if (njs_slow_path(dst == NULL)) { + return NJS_ERROR; + } + + while (start < end) { + cp = njs_utf8_decode(&data->ctx, &start, end); + + if (cp > NJS_UNICODE_MAX_CODEPOINT) { + if (cp == NJS_UNICODE_CONTINUE) { + break; + } + + cp = NJS_UNICODE_REPLACEMENT; + } + + dst = njs_utf8_encode(dst, cp); + } + + if (stream) { + data->codepoint = cp; + return NJS_OK; + } + + if (cp == NJS_UNICODE_CONTINUE) { + (void) njs_utf8_encode(dst, NJS_UNICODE_REPLACEMENT); + } + + data->codepoint = 0; + + njs_utf8_decode_init(&data->ctx); + + return NJS_OK; + +fatal: + + njs_type_error(vm, "The encoded data was not valid"); + + return NJS_ERROR; +} + + +static const njs_object_prop_t njs_text_decoder_properties[] = +{ + { + .type = NJS_PROPERTY_HANDLER, + .name = njs_string("constructor"), + .value = njs_prop_handler(njs_object_prototype_create_constructor), + .writable = 1, + .configurable = 1, + }, + + { + .type = NJS_PROPERTY_HANDLER, + .name = njs_string("encoding"), + .value = njs_prop_handler(njs_text_decoder_encoding), + }, + + { + .type = NJS_PROPERTY_HANDLER, + .name = njs_string("fatal"), + .value = njs_prop_handler(njs_text_decoder_fatal), + }, + + { + .type = NJS_PROPERTY_HANDLER, + .name = njs_string("ignoreBOM"), + .value = njs_prop_handler(njs_text_decoder_ignore_bom), + }, + + { + .type = NJS_PROPERTY, + .name = njs_string("decode"), + .value = njs_native_function(njs_text_decoder_decode, 0), + .writable = 1, + .configurable = 1, + }, +}; + + +const njs_object_init_t njs_text_decoder_init = { + njs_text_decoder_properties, + njs_nitems(njs_text_decoder_properties), +}; + + +static const njs_object_prop_t njs_text_decoder_constructor_properties[] = +{ + { + .type = NJS_PROPERTY, + .name = njs_string("name"), + .value = njs_string("TextDecoder"), + .configurable = 1, + }, + + { + .type = NJS_PROPERTY, + .name = njs_string("length"), + .value = njs_value(NJS_NUMBER, 0, 0.0), + .configurable = 1, + }, + + { + .type = NJS_PROPERTY_HANDLER, + .name = njs_string("prototype"), + .value = njs_prop_handler(njs_object_prototype_create), + }, +}; + + +const njs_object_init_t njs_text_decoder_constructor_init = { + njs_text_decoder_constructor_properties, + njs_nitems(njs_text_decoder_constructor_properties), +}; + + +const njs_object_type_init_t njs_text_decoder_type_init = { + .constructor = njs_native_ctor(njs_text_decoder_constructor, 0, 0), + .prototype_props = &njs_text_decoder_init, + .constructor_props = &njs_text_decoder_constructor_init, + .prototype_value = { .object = { .type = NJS_OBJECT } }, +};
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/njs_encoding.h Tue Jul 28 16:58:59 2020 +0300 @@ -0,0 +1,14 @@ + +/* + * Copyright (C) Alexander Borisov + * Copyright (C) NGINX, Inc. + */ + +#ifndef _NJS_ENCODING_H_INCLUDED_ +#define _NJS_ENCODING_H_INCLUDED_ + +extern const njs_object_type_init_t njs_text_encoder_type_init; +extern const njs_object_type_init_t njs_text_decoder_type_init; + + +#endif /* _NJS_ENCODING_H_INCLUDED_ */
--- a/src/njs_main.h Thu Jul 30 17:47:05 2020 +0000 +++ b/src/njs_main.h Tue Jul 28 16:58:59 2020 +0300 @@ -73,6 +73,7 @@ #include <njs_math.h> #include <njs_json.h> +#include <njs_encoding.h> #include <njs_timer.h> #include <njs_module.h>
--- a/src/njs_object_hash.h Thu Jul 30 17:47:05 2020 +0000 +++ b/src/njs_object_hash.h Tue Jul 28 16:58:59 2020 +0300 @@ -750,4 +750,34 @@ 'd'), 'A'), 'r'), 'r'), 'a'), 'y') +#define NJS_TEXT_DECODER_HASH \ + njs_djb_hash_add( \ + njs_djb_hash_add( \ + njs_djb_hash_add( \ + njs_djb_hash_add( \ + njs_djb_hash_add( \ + njs_djb_hash_add( \ + njs_djb_hash_add( \ + njs_djb_hash_add( \ + njs_djb_hash_add( \ + njs_djb_hash_add( \ + njs_djb_hash_add(NJS_DJB_HASH_INIT, \ + 'T'), 'e'), 'x'), 't'), 'D'), 'e'), 'c'), 'o'), 'd'), 'e'), 'r') + + +#define NJS_TEXT_ENCODER_HASH \ + njs_djb_hash_add( \ + njs_djb_hash_add( \ + njs_djb_hash_add( \ + njs_djb_hash_add( \ + njs_djb_hash_add( \ + njs_djb_hash_add( \ + njs_djb_hash_add( \ + njs_djb_hash_add( \ + njs_djb_hash_add( \ + njs_djb_hash_add( \ + njs_djb_hash_add(NJS_DJB_HASH_INIT, \ + 'T'), 'e'), 'x'), 't'), 'E'), 'n'), 'c'), 'o'), 'd'), 'e'), 'r') + + #endif /* _NJS_OBJECT_HASH_H_INCLUDED_ */
--- a/src/njs_typed_array.c Thu Jul 30 17:47:05 2020 +0000 +++ b/src/njs_typed_array.c Tue Jul 28 16:58:59 2020 +0300 @@ -8,9 +8,9 @@ #include <njs_main.h> -static njs_int_t -njs_typed_array_constructor(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs, - njs_index_t magic) +njs_typed_array_t * +njs_typed_array_alloc(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs, + njs_object_type_t type) { double num; int64_t i, length; @@ -19,7 +19,6 @@ njs_int_t ret; njs_value_t *value, prop; njs_array_t *src_array; - njs_object_type_t type; njs_typed_array_t *array, *src_tarray; njs_array_buffer_t *buffer; @@ -31,54 +30,48 @@ src_array = NULL; src_tarray = NULL; - type = magic; element_size = njs_typed_array_element_size(type); - if (!vm->top_frame->ctor) { - njs_type_error(vm, "Constructor of TypedArray requires 'new'"); - return NJS_ERROR; - } - - value = njs_arg(args, nargs, 1); + value = njs_arg(args, nargs, 0); if (njs_is_array_buffer(value)) { buffer = njs_array_buffer(value); - ret = njs_value_to_index(vm, njs_arg(args, nargs, 2), &offset); + ret = njs_value_to_index(vm, njs_arg(args, nargs, 1), &offset); if (njs_slow_path(ret != NJS_OK)) { - return NJS_ERROR; + return NULL; } if (njs_slow_path((offset % element_size) != 0)) { njs_range_error(vm, "start offset must be multiple of %uD", element_size); - return NJS_ERROR; + return NULL; } - if (!njs_is_undefined(njs_arg(args, nargs, 3))) { - ret = njs_value_to_index(vm, njs_argument(args, 3), &size); + if (!njs_is_undefined(njs_arg(args, nargs, 2))) { + ret = njs_value_to_index(vm, njs_argument(args, 2), &size); if (njs_slow_path(ret != NJS_OK)) { - return NJS_ERROR; + return NULL; } size *= element_size; if (njs_slow_path((offset + size) > buffer->size)) { njs_range_error(vm, "Invalid typed array length: %uL", size); - return NJS_ERROR; + return NULL; } } else { if (njs_slow_path((buffer->size % element_size) != 0)) { njs_range_error(vm, "byteLength of buffer must be " "multiple of %uD", element_size); - return NJS_ERROR; + return NULL; } if (offset > buffer->size) { njs_range_error(vm, "byteOffset %uL is outside the bound of " "the buffer", offset); - return NJS_ERROR; + return NULL; } size = buffer->size - offset; @@ -96,7 +89,7 @@ } else { ret = njs_object_length(vm, value, &length); if (njs_slow_path(ret == NJS_ERROR)) { - return ret; + return NULL; } } @@ -105,7 +98,7 @@ } else { ret = njs_value_to_index(vm, value, &size); if (njs_slow_path(ret != NJS_OK)) { - return NJS_ERROR; + return NULL; } size *= element_size; @@ -114,7 +107,7 @@ if (buffer == NULL) { buffer = njs_array_buffer_alloc(vm, size); if (njs_slow_path(buffer == NULL)) { - return NJS_ERROR; + return NULL; } } @@ -144,7 +137,7 @@ for (i = 0; i < length; i++) { ret = njs_value_to_number(vm, &src_array->start[i], &num); if (njs_slow_path(ret == NJS_ERROR)) { - return NJS_ERROR; + return NULL; } if (ret == NJS_OK) { @@ -156,7 +149,7 @@ for (i = 0; i < length; i++) { ret = njs_value_property_i64(vm, value, i, &prop); if (njs_slow_path(ret == NJS_ERROR)) { - return NJS_ERROR; + return NULL; } num = NAN; @@ -164,7 +157,7 @@ if (ret == NJS_OK) { ret = njs_value_to_number(vm, &prop, &num); if (njs_slow_path(ret == NJS_ERROR)) { - return NJS_ERROR; + return NULL; } } @@ -179,15 +172,35 @@ array->object.extensible = 1; array->object.fast_array = 1; - njs_set_typed_array(&vm->retval, array); - - return NJS_OK; + return array; memory_error: njs_memory_error(vm); - return NJS_ERROR; + return NULL; +} + + +static njs_int_t +njs_typed_array_constructor(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs, + njs_index_t magic) +{ + njs_typed_array_t *array; + + if (!vm->top_frame->ctor) { + njs_type_error(vm, "Constructor of TypedArray requires 'new'"); + return NJS_ERROR; + } + + array = njs_typed_array_alloc(vm, &args[1], nargs - 1, magic); + if (njs_slow_path(array == NULL)) { + return NJS_ERROR; + } + + njs_set_typed_array(&vm->retval, array); + + return NJS_OK; }
--- a/src/njs_typed_array.h Thu Jul 30 17:47:05 2020 +0000 +++ b/src/njs_typed_array.h Tue Jul 28 16:58:59 2020 +0300 @@ -8,6 +8,8 @@ #define _NJS_TYPED_ARRAY_H_INCLUDED_ +njs_typed_array_t *njs_typed_array_alloc(njs_vm_t *vm, njs_value_t *args, + njs_uint_t nargs, njs_object_type_t type); njs_int_t njs_typed_array_set_value(njs_vm_t *vm, njs_typed_array_t *array, uint32_t index, njs_value_t *setval); njs_int_t njs_typed_array_to_chain(njs_vm_t *vm, njs_chb_t *chain,
--- a/src/njs_unicode.h Thu Jul 30 17:47:05 2020 +0000 +++ b/src/njs_unicode.h Tue Jul 28 16:58:59 2020 +0300 @@ -9,6 +9,7 @@ enum { + NJS_UNICODE_BOM = 0xFEFF, NJS_UNICODE_REPLACEMENT = 0xFFFD, NJS_UNICODE_MAX_CODEPOINT = 0x10FFFF, NJS_UNICODE_ERROR = 0x1FFFFF,
--- a/src/njs_value.h Thu Jul 30 17:47:05 2020 +0000 +++ b/src/njs_value.h Tue Jul 28 16:58:59 2020 +0300 @@ -81,6 +81,8 @@ NJS_DATA_TAG_EXTERNAL, NJS_DATA_TAG_CRYPTO_HASH, NJS_DATA_TAG_CRYPTO_HMAC, + NJS_DATA_TAG_TEXT_ENCODER, + NJS_DATA_TAG_TEXT_DECODER, NJS_DATA_TAG_MAX } njs_data_tag_t; @@ -654,6 +656,11 @@ ((value)->type == NJS_TYPED_ARRAY) +#define njs_is_typed_array_uint8(value) \ + (njs_is_typed_array(value) \ + && njs_typed_array(value)->type == NJS_OBJ_TYPE_UINT8_ARRAY) + + #define njs_is_function(value) \ ((value)->type == NJS_FUNCTION)
--- a/src/njs_vm.h Thu Jul 30 17:47:05 2020 +0000 +++ b/src/njs_vm.h Tue Jul 28 16:58:59 2020 +0300 @@ -84,6 +84,8 @@ NJS_OBJ_TYPE_DATE, NJS_OBJ_TYPE_PROMISE, NJS_OBJ_TYPE_ARRAY_BUFFER, + NJS_OBJ_TYPE_TEXT_DECODER, + NJS_OBJ_TYPE_TEXT_ENCODER, NJS_OBJ_TYPE_FS_DIRENT, #define NJS_OBJ_TYPE_HIDDEN_MIN (NJS_OBJ_TYPE_FS_DIRENT) @@ -107,6 +109,7 @@ #define NJS_OBJ_TYPE_TYPED_ARRAY_MAX (NJS_OBJ_TYPE_FLOAT64_ARRAY + 1) #define NJS_OBJ_TYPE_TYPED_ARRAY_SIZE (NJS_OBJ_TYPE_TYPED_ARRAY_MAX \ - NJS_OBJ_TYPE_TYPED_ARRAY_MIN) + NJS_OBJ_TYPE_ERROR, NJS_OBJ_TYPE_EVAL_ERROR, NJS_OBJ_TYPE_INTERNAL_ERROR, @@ -116,6 +119,7 @@ NJS_OBJ_TYPE_TYPE_ERROR, NJS_OBJ_TYPE_URI_ERROR, NJS_OBJ_TYPE_MEMORY_ERROR, + NJS_OBJ_TYPE_MAX, } njs_object_type_t;
--- a/src/test/njs_unit_test.c Thu Jul 30 17:47:05 2020 +0000 +++ b/src/test/njs_unit_test.c Tue Jul 28 16:58:59 2020 +0300 @@ -17637,6 +17637,149 @@ { njs_str("var qs = require('querystring');" "qs.unescape('abc%CE%B1%CE%B1%CE%B1%CE%B1def')"), njs_str("abcααααdef") }, + + /* TextEncoder. */ + + { njs_str("var en = new TextEncoder(); typeof en.encode()"), + njs_str("object") }, + + { njs_str("var en = new TextEncoder(); en.encode()"), + njs_str("") }, + + { njs_str("var en = new TextEncoder(); var res = en.encode('α'); res"), + njs_str("206,177") }, + + { njs_str("var en = new TextEncoder(); var res = en.encode('α1α'); res[2]"), + njs_str("49") }, + + { njs_str("var en = new TextEncoder(); en.encode(String.bytesFrom([0xCE]))"), + njs_str("239,191,189") }, + + { njs_str("var en = new TextEncoder();" + "en.encode(String.bytesFrom([0xCE, 0xB1, 0xCE]))"), + njs_str("206,177,239,191,189") }, + + { njs_str("var en = new TextEncoder();" + "en.encode(String.bytesFrom([0xCE, 0xCE, 0xB1]))"), + njs_str("239,191,189,206,177") }, + + { njs_str("var en = new TextEncoder(); en.encoding"), + njs_str("utf-8") }, + + { njs_str("TextEncoder.prototype.encode.apply({}, [])"), + njs_str("TypeError: \"this\" is not a TextEncoder") }, + + { njs_str("var en = new TextEncoder();" + "var utf8 = new Uint8Array(5);" + "var res = en.encodeInto('ααααα', utf8); njs.dump(res)"), + njs_str("{read:2,written:4}") }, + + { njs_str("var en = new TextEncoder();" + "var utf8 = new Uint8Array(10);" + "var res = en.encodeInto('ααααα', utf8); njs.dump(res)"), + njs_str("{read:5,written:10}") }, + + { njs_str("var str = String.bytesFrom([0xCE]);" + "var en = new TextEncoder();" + "var utf8 = new Uint8Array(3);" + "var res = en.encodeInto(str, utf8); " + "[njs.dump(res), utf8]"), + njs_str("{read:1,written:3},239,191,189") }, + + { njs_str("var str = String.bytesFrom([0xCE]);" + "var en = new TextEncoder();" + "var utf8 = new Uint8Array(5);" + "en.encodeInto(str, utf8); utf8"), + njs_str("239,191,189,0,0") }, + + { njs_str("var str = String.bytesFrom([0xCE, 0xB1, 0xCE]);" + "var en = new TextEncoder();" + "var utf8 = new Uint8Array(5);" + "var res = en.encodeInto(str, utf8);" + "[njs.dump(res), utf8]"), + njs_str("{read:2,written:5},206,177,239,191,189") }, + + { njs_str("var str = String.bytesFrom([0xCE, 0xCE, 0xB1]);" + "var en = new TextEncoder();" + "var utf8 = new Uint8Array(5);" + "var res = en.encodeInto(str, utf8);" + "[njs.dump(res), utf8]"), + njs_str("{read:2,written:5},239,191,189,206,177") }, + + { njs_str("TextEncoder.prototype.encodeInto.apply({}, [])"), + njs_str("TypeError: \"this\" is not a TextEncoder") }, + + { njs_str("(new TextEncoder()).encodeInto('', 0.12) "), + njs_str("TypeError: The \"destination\" argument must be an instance of Uint8Array") }, + + /* TextDecoder. */ + + { njs_str("var de = new TextDecoder();" + "var u8arr = new Uint8Array([240, 160, 174, 183]);" + "var u16arr = new Uint16Array([41200, 47022]);" + "var u32arr = new Uint32Array([3081674992]);" + "[u8arr, u16arr, u32arr].map(v=>de.decode(v)).join(',')"), + njs_str("𠮷,𠮷,𠮷") }, + + { njs_str("var de = new TextDecoder();" + "[new Uint8Array([240, 160]), " + " new Uint8Array([174]), " + " new Uint8Array([183])].map(v=>de.decode(v, {stream: 1}))[2]"), + njs_str("𠮷") }, + + { njs_str("var de = new TextDecoder();" + "de.decode(new Uint8Array([240, 160]), {stream: 1});" + "de.decode(new Uint8Array([174]), {stream: 1});" + "de.decode(new Uint8Array([183]))"), + njs_str("𠮷") }, + + { njs_str("var de = new TextDecoder();" + "de.decode(new Uint8Array([240, 160]), {stream: 1});" + "de.decode()"), + njs_str("�") }, + + { njs_str("var de = new TextDecoder('utf-8', {fatal: true});" + "de.decode(new Uint8Array([240, 160]))"), + njs_str("TypeError: The encoded data was not valid") }, + + { njs_str("var de = new TextDecoder('utf-8', {fatal: false});" + "de.decode(new Uint8Array([240, 160]))"), + njs_str("�") }, + + { njs_str("var en = new TextEncoder();" + "var de = new TextDecoder('utf-8', {ignoreBOM: true});" + "en.encode(de.decode(new Uint8Array([239, 187, 191, 50])))"), + njs_str("239,187,191,50") }, + + { njs_str("var en = new TextEncoder();" + "var de = new TextDecoder('utf-8', {ignoreBOM: false});" + "en.encode(de.decode(new Uint8Array([239, 187, 191, 50])))"), + njs_str("50") }, + + { njs_str("var en = new TextEncoder(); var de = new TextDecoder();" + "en.encode(de.decode(new Uint8Array([239, 187, 191, 50])))"), + njs_str("50") }, + + { njs_str("var de = new TextDecoder(); de.decode('')"), + njs_str("TypeError: The \"input\" argument must be an instance of TypedArray") }, + + { njs_str("var de = new TextDecoder({})"), + njs_str("RangeError: The \"[object Object]\" encoding is not supported") }, + + { njs_str("var de = new TextDecoder('foo')"), + njs_str("RangeError: The \"foo\" encoding is not supported") }, + + { njs_str("var de = new TextDecoder(); de.encoding"), + njs_str("utf-8") }, + + { njs_str("var de = new TextDecoder(); de.fatal"), + njs_str("false") }, + + { njs_str("var de = new TextDecoder(); de.ignoreBOM"), + njs_str("false") }, + + { njs_str("TextDecoder.prototype.decode.apply({}, new Uint8Array([1]))"), + njs_str("TypeError: \"this\" is not a TextDecoder") }, };
