summaryrefslogtreecommitdiff
path: root/ext/json/parser/parser.rl
diff options
context:
space:
mode:
Diffstat (limited to 'ext/json/parser/parser.rl')
-rw-r--r--ext/json/parser/parser.rl747
1 files changed, 571 insertions, 176 deletions
diff --git a/ext/json/parser/parser.rl b/ext/json/parser/parser.rl
index 15ec2b6843..6d4cc7a5b0 100644
--- a/ext/json/parser/parser.rl
+++ b/ext/json/parser/parser.rl
@@ -1,5 +1,308 @@
+#include "ruby.h"
#include "../fbuffer/fbuffer.h"
-#include "parser.h"
+
+static VALUE mJSON, mExt, cParser, eNestingError, Encoding_UTF_8;
+static VALUE CNaN, CInfinity, CMinusInfinity;
+
+static ID i_json_creatable_p, i_json_create, i_create_id,
+ i_chr, i_deep_const_get, i_match, i_aset, i_aref,
+ i_leftshift, i_new, i_try_convert, i_uminus, i_encode;
+
+static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze,
+ sym_create_additions, sym_create_id, sym_object_class, sym_array_class,
+ sym_decimal_class, sym_match_string;
+
+static int binary_encindex;
+static int utf8_encindex;
+
+#ifndef HAVE_RB_GC_MARK_LOCATIONS
+// For TruffleRuby
+void rb_gc_mark_locations(const VALUE *start, const VALUE *end)
+{
+ VALUE *value = start;
+
+ while (value < end) {
+ rb_gc_mark(*value);
+ value++;
+ }
+}
+#endif
+
+#ifndef HAVE_RB_HASH_BULK_INSERT
+// For TruffleRuby
+void rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
+{
+ long index = 0;
+ while (index < count) {
+ VALUE name = pairs[index++];
+ VALUE value = pairs[index++];
+ rb_hash_aset(hash, name, value);
+ }
+ RB_GC_GUARD(hash);
+}
+#endif
+
+/* name cache */
+
+#include <string.h>
+#include <ctype.h>
+
+// Object names are likely to be repeated, and are frozen.
+// As such we can re-use them if we keep a cache of the ones we've seen so far,
+// and save much more expensive lookups into the global fstring table.
+// This cache implementation is deliberately simple, as we're optimizing for compactness,
+// to be able to fit safely on the stack.
+// As such, binary search into a sorted array gives a good tradeoff between compactness and
+// performance.
+#define JSON_RVALUE_CACHE_CAPA 63
+typedef struct rvalue_cache_struct {
+ int length;
+ VALUE entries[JSON_RVALUE_CACHE_CAPA];
+} rvalue_cache;
+
+static rb_encoding *enc_utf8;
+
+#define JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH 55
+
+static inline VALUE build_interned_string(const char *str, const long length)
+{
+# ifdef HAVE_RB_ENC_INTERNED_STR
+ return rb_enc_interned_str(str, length, enc_utf8);
+# else
+ VALUE rstring = rb_utf8_str_new(str, length);
+ return rb_funcall(rb_str_freeze(rstring), i_uminus, 0);
+# endif
+}
+
+static inline VALUE build_symbol(const char *str, const long length)
+{
+ return rb_str_intern(build_interned_string(str, length));
+}
+
+static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring)
+{
+ MEMMOVE(&cache->entries[index + 1], &cache->entries[index], VALUE, cache->length - index);
+ cache->length++;
+ cache->entries[index] = rstring;
+}
+
+static inline int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
+{
+ long rstring_length = RSTRING_LEN(rstring);
+ if (length == rstring_length) {
+ return memcmp(str, RSTRING_PTR(rstring), length);
+ } else {
+ return (int)(length - rstring_length);
+ }
+}
+
+static VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
+{
+ if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
+ // Common names aren't likely to be very long. So we just don't
+ // cache names above an arbitrary threshold.
+ return Qfalse;
+ }
+
+ if (RB_UNLIKELY(!isalpha(str[0]))) {
+ // Simple heuristic, if the first character isn't a letter,
+ // we're much less likely to see this string again.
+ // We mostly want to cache strings that are likely to be repeated.
+ return Qfalse;
+ }
+
+ int low = 0;
+ int high = cache->length - 1;
+ int mid = 0;
+ int last_cmp = 0;
+
+ while (low <= high) {
+ mid = (high + low) >> 1;
+ VALUE entry = cache->entries[mid];
+ last_cmp = rstring_cache_cmp(str, length, entry);
+
+ if (last_cmp == 0) {
+ return entry;
+ } else if (last_cmp > 0) {
+ low = mid + 1;
+ } else {
+ high = mid - 1;
+ }
+ }
+
+ if (RB_UNLIKELY(memchr(str, '\\', length))) {
+ // We assume the overwhelming majority of names don't need to be escaped.
+ // But if they do, we have to fallback to the slow path.
+ return Qfalse;
+ }
+
+ VALUE rstring = build_interned_string(str, length);
+
+ if (cache->length < JSON_RVALUE_CACHE_CAPA) {
+ if (last_cmp > 0) {
+ mid += 1;
+ }
+
+ rvalue_cache_insert_at(cache, mid, rstring);
+ }
+ return rstring;
+}
+
+static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length)
+{
+ if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
+ // Common names aren't likely to be very long. So we just don't
+ // cache names above an arbitrary threshold.
+ return Qfalse;
+ }
+
+ if (RB_UNLIKELY(!isalpha(str[0]))) {
+ // Simple heuristic, if the first character isn't a letter,
+ // we're much less likely to see this string again.
+ // We mostly want to cache strings that are likely to be repeated.
+ return Qfalse;
+ }
+
+ int low = 0;
+ int high = cache->length - 1;
+ int mid = 0;
+ int last_cmp = 0;
+
+ while (low <= high) {
+ mid = (high + low) >> 1;
+ VALUE entry = cache->entries[mid];
+ last_cmp = rstring_cache_cmp(str, length, rb_sym2str(entry));
+
+ if (last_cmp == 0) {
+ return entry;
+ } else if (last_cmp > 0) {
+ low = mid + 1;
+ } else {
+ high = mid - 1;
+ }
+ }
+
+ if (RB_UNLIKELY(memchr(str, '\\', length))) {
+ // We assume the overwhelming majority of names don't need to be escaped.
+ // But if they do, we have to fallback to the slow path.
+ return Qfalse;
+ }
+
+ VALUE rsymbol = build_symbol(str, length);
+
+ if (cache->length < JSON_RVALUE_CACHE_CAPA) {
+ if (last_cmp > 0) {
+ mid += 1;
+ }
+
+ rvalue_cache_insert_at(cache, mid, rsymbol);
+ }
+ return rsymbol;
+}
+
+/* rvalue stack */
+
+#define RVALUE_STACK_INITIAL_CAPA 128
+
+enum rvalue_stack_type {
+ RVALUE_STACK_HEAP_ALLOCATED = 0,
+ RVALUE_STACK_STACK_ALLOCATED = 1,
+};
+
+typedef struct rvalue_stack_struct {
+ enum rvalue_stack_type type;
+ long capa;
+ long head;
+ VALUE *ptr;
+} rvalue_stack;
+
+static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref);
+
+static rvalue_stack *rvalue_stack_grow(rvalue_stack *stack, VALUE *handle, rvalue_stack **stack_ref)
+{
+ long required = stack->capa * 2;
+
+ if (stack->type == RVALUE_STACK_STACK_ALLOCATED) {
+ stack = rvalue_stack_spill(stack, handle, stack_ref);
+ } else {
+ REALLOC_N(stack->ptr, VALUE, required);
+ stack->capa = required;
+ }
+ return stack;
+}
+
+static void rvalue_stack_push(rvalue_stack *stack, VALUE value, VALUE *handle, rvalue_stack **stack_ref)
+{
+ if (RB_UNLIKELY(stack->head >= stack->capa)) {
+ stack = rvalue_stack_grow(stack, handle, stack_ref);
+ }
+ stack->ptr[stack->head] = value;
+ stack->head++;
+}
+
+static inline VALUE *rvalue_stack_peek(rvalue_stack *stack, long count)
+{
+ return stack->ptr + (stack->head - count);
+}
+
+static inline void rvalue_stack_pop(rvalue_stack *stack, long count)
+{
+ stack->head -= count;
+}
+
+static void rvalue_stack_mark(void *ptr)
+{
+ rvalue_stack *stack = (rvalue_stack *)ptr;
+ rb_gc_mark_locations(stack->ptr, stack->ptr + stack->head);
+}
+
+static void rvalue_stack_free(void *ptr)
+{
+ rvalue_stack *stack = (rvalue_stack *)ptr;
+ if (stack) {
+ ruby_xfree(stack->ptr);
+ ruby_xfree(stack);
+ }
+}
+
+static size_t rvalue_stack_memsize(const void *ptr)
+{
+ const rvalue_stack *stack = (const rvalue_stack *)ptr;
+ return sizeof(rvalue_stack) + sizeof(VALUE) * stack->capa;
+}
+
+static const rb_data_type_t JSON_Parser_rvalue_stack_type = {
+ "JSON::Ext::Parser/rvalue_stack",
+ {
+ .dmark = rvalue_stack_mark,
+ .dfree = rvalue_stack_free,
+ .dsize = rvalue_stack_memsize,
+ },
+ 0, 0,
+ RUBY_TYPED_FREE_IMMEDIATELY,
+};
+
+static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref)
+{
+ rvalue_stack *stack;
+ *handle = TypedData_Make_Struct(0, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack);
+ *stack_ref = stack;
+ MEMCPY(stack, old_stack, rvalue_stack, 1);
+
+ stack->capa = old_stack->capa << 1;
+ stack->ptr = ALLOC_N(VALUE, stack->capa);
+ stack->type = RVALUE_STACK_HEAP_ALLOCATED;
+ MEMCPY(stack->ptr, old_stack->ptr, VALUE, old_stack->head);
+ return stack;
+}
+
+static void rvalue_stack_eagerly_release(VALUE handle)
+{
+ rvalue_stack *stack;
+ TypedData_Get_Struct(handle, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack);
+ RTYPEDDATA_DATA(handle) = NULL;
+ rvalue_stack_free(stack);
+}
/* unicode */
@@ -67,6 +370,50 @@ static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
return len;
}
+typedef struct JSON_ParserStruct {
+ VALUE Vsource;
+ char *source;
+ long len;
+ char *memo;
+ VALUE create_id;
+ VALUE object_class;
+ VALUE array_class;
+ VALUE decimal_class;
+ VALUE match_string;
+ FBuffer fbuffer;
+ int max_nesting;
+ bool allow_nan;
+ bool allow_trailing_comma;
+ bool parsing_name;
+ bool symbolize_names;
+ bool freeze;
+ bool create_additions;
+ bool deprecated_create_additions;
+ rvalue_cache name_cache;
+ rvalue_stack *stack;
+ VALUE stack_handle;
+} JSON_Parser;
+
+#define GET_PARSER \
+ GET_PARSER_INIT; \
+ if (!json->Vsource) rb_raise(rb_eTypeError, "uninitialized instance")
+
+#define GET_PARSER_INIT \
+ JSON_Parser *json; \
+ TypedData_Get_Struct(self, JSON_Parser, &JSON_Parser_type, json)
+
+#define MinusInfinity "-Infinity"
+#define EVIL 0x666
+
+static const rb_data_type_t JSON_Parser_type;
+static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result);
+static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting);
+static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting);
+static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result);
+static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result);
+static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting);
+
+
#define PARSE_ERROR_FRAGMENT_LEN 32
#ifdef RBIMPL_ATTR_NORETURN
RBIMPL_ATTR_NORETURN()
@@ -84,21 +431,9 @@ static void raise_parse_error(const char *format, const char *start)
ptr = buffer;
}
- rb_enc_raise(rb_utf8_encoding(), rb_path2class("JSON::ParserError"), format, ptr);
+ rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr);
}
-static VALUE mJSON, mExt, cParser, eNestingError, Encoding_UTF_8;
-static VALUE CNaN, CInfinity, CMinusInfinity;
-
-static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions,
- i_chr, i_max_nesting, i_allow_nan, i_symbolize_names,
- i_object_class, i_array_class, i_decimal_class,
- i_deep_const_get, i_match, i_match_string, i_aset, i_aref,
- i_leftshift, i_new, i_try_convert, i_freeze, i_uminus, i_encode;
-
-static int binary_encindex;
-static int utf8_encindex;
-
%%{
machine JSON_common;
@@ -135,27 +470,25 @@ static int utf8_encindex;
write data;
action parse_value {
- VALUE v = Qnil;
- char *np = JSON_parse_value(json, fpc, pe, &v, current_nesting);
+ char *np = JSON_parse_value(json, fpc, pe, result, current_nesting);
if (np == NULL) {
fhold; fbreak;
} else {
- if (NIL_P(json->object_class)) {
- OBJ_FREEZE(last_name);
- rb_hash_aset(*result, last_name, v);
- } else {
- rb_funcall(*result, i_aset, 2, last_name, v);
- }
fexec np;
}
}
+ action allow_trailing_comma { json->allow_trailing_comma }
+
action parse_name {
char *np;
- json->parsing_name = 1;
- np = JSON_parse_string(json, fpc, pe, &last_name);
- json->parsing_name = 0;
- if (np == NULL) { fhold; fbreak; } else fexec np;
+ json->parsing_name = true;
+ np = JSON_parse_string(json, fpc, pe, result);
+ json->parsing_name = false;
+ if (np == NULL) { fhold; fbreak; } else {
+ PUSH(*result);
+ fexec np;
+ }
}
action exit { fhold; fbreak; }
@@ -165,33 +498,57 @@ static int utf8_encindex;
main := (
begin_object
- (pair (next_pair)*)? ignore*
+ (pair (next_pair)*((ignore* value_separator) when allow_trailing_comma)?)? ignore*
end_object
) @exit;
}%%
+#define PUSH(result) rvalue_stack_push(json->stack, result, &json->stack_handle, &json->stack)
+
static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
{
int cs = EVIL;
- VALUE last_name = Qnil;
- VALUE object_class = json->object_class;
if (json->max_nesting && current_nesting > json->max_nesting) {
rb_raise(eNestingError, "nesting of %d is too deep", current_nesting);
}
- *result = NIL_P(object_class) ? rb_hash_new() : rb_class_new_instance(0, 0, object_class);
+ long stack_head = json->stack->head;
%% write init;
%% write exec;
if (cs >= JSON_object_first_final) {
- if (json->create_additions) {
+ long count = json->stack->head - stack_head;
+
+ if (RB_UNLIKELY(json->object_class)) {
+ VALUE object = rb_class_new_instance(0, 0, json->object_class);
+ long index = 0;
+ VALUE *items = rvalue_stack_peek(json->stack, count);
+ while (index < count) {
+ VALUE name = items[index++];
+ VALUE value = items[index++];
+ rb_funcall(object, i_aset, 2, name, value);
+ }
+ *result = object;
+ } else {
+ VALUE hash;
+#ifdef HAVE_RB_HASH_NEW_CAPA
+ hash = rb_hash_new_capa(count >> 1);
+#else
+ hash = rb_hash_new();
+#endif
+ rb_hash_bulk_insert(count, rvalue_stack_peek(json->stack, count), hash);
+ *result = hash;
+ }
+ rvalue_stack_pop(json->stack, count);
+
+ if (RB_UNLIKELY(json->create_additions)) {
VALUE klassname;
- if (NIL_P(json->object_class)) {
- klassname = rb_hash_aref(*result, json->create_id);
+ if (json->object_class) {
+ klassname = rb_funcall(*result, i_aref, 1, json->create_id);
} else {
- klassname = rb_funcall(*result, i_aref, 1, json->create_id);
+ klassname = rb_hash_aref(*result, json->create_id);
}
if (!NIL_P(klassname)) {
VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname);
@@ -209,7 +566,6 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu
}
}
-
%%{
machine JSON_value;
include JSON_common;
@@ -241,7 +597,12 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu
}
action parse_string {
char *np = JSON_parse_string(json, fpc, pe, result);
- if (np == NULL) { fhold; fbreak; } else fexec np;
+ if (np == NULL) {
+ fhold;
+ fbreak;
+ } else {
+ fexec np;
+ }
}
action parse_number {
@@ -256,9 +617,13 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu
}
}
np = JSON_parse_float(json, fpc, pe, result);
- if (np != NULL) fexec np;
+ if (np != NULL) {
+ fexec np;
+ }
np = JSON_parse_integer(json, fpc, pe, result);
- if (np != NULL) fexec np;
+ if (np != NULL) {
+ fexec np;
+ }
fhold; fbreak;
}
@@ -301,6 +666,7 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul
}
if (cs >= JSON_value_first_final) {
+ PUSH(*result);
return p;
} else {
return NULL;
@@ -362,7 +728,7 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul
if (cs >= JSON_float_first_final) {
VALUE mod = Qnil;
ID method_id = 0;
- if (!NIL_P(json->decimal_class)) {
+ if (json->decimal_class) {
if (rb_respond_to(json->decimal_class, i_try_convert)) {
mod = json->decimal_class;
method_id = i_try_convert;
@@ -421,39 +787,51 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul
if (np == NULL) {
fhold; fbreak;
} else {
- if (NIL_P(json->array_class)) {
- rb_ary_push(*result, v);
- } else {
- rb_funcall(*result, i_leftshift, 1, v);
- }
fexec np;
}
}
+ action allow_trailing_comma { json->allow_trailing_comma }
+
action exit { fhold; fbreak; }
next_element = value_separator ignore* begin_value >parse_value;
main := begin_array ignore*
((begin_value >parse_value ignore*)
- (ignore* next_element ignore*)*)?
+ (ignore* next_element ignore*)*((value_separator ignore*) when allow_trailing_comma)?)?
end_array @exit;
}%%
static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
{
int cs = EVIL;
- VALUE array_class = json->array_class;
if (json->max_nesting && current_nesting > json->max_nesting) {
rb_raise(eNestingError, "nesting of %d is too deep", current_nesting);
}
- *result = NIL_P(array_class) ? rb_ary_new() : rb_class_new_instance(0, 0, array_class);
+ long stack_head = json->stack->head;
%% write init;
%% write exec;
if(cs >= JSON_array_first_final) {
+ long count = json->stack->head - stack_head;
+
+ if (RB_UNLIKELY(json->array_class)) {
+ VALUE array = rb_class_new_instance(0, 0, json->array_class);
+ VALUE *items = rvalue_stack_peek(json->stack, count);
+ long index;
+ for (index = 0; index < count; index++) {
+ rb_funcall(array, i_leftshift, 1, items[index]);
+ }
+ *result = array;
+ } else {
+ VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(json->stack, count));
+ *result = array;
+ }
+ rvalue_stack_pop(json->stack, count);
+
return p + 1;
} else {
raise_parse_error("unexpected token at '%s'", p);
@@ -469,7 +847,7 @@ static inline VALUE build_string(const char *start, const char *end, bool intern
VALUE result;
# ifdef HAVE_RB_ENC_INTERNED_STR
if (intern) {
- result = rb_enc_interned_str(start, (long)(end - start), rb_utf8_encoding());
+ result = rb_enc_interned_str(start, (long)(end - start), enc_utf8);
} else {
result = rb_utf8_str_new(start, (long)(end - start));
}
@@ -487,13 +865,26 @@ static inline VALUE build_string(const char *start, const char *end, bool intern
return result;
}
-static VALUE json_string_unescape(char *string, char *stringEnd, bool intern, bool symbolize)
+static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize)
{
size_t bufferSize = stringEnd - string;
char *p = string, *pe = string, *unescape, *bufferStart, *buffer;
int unescape_len;
char buf[4];
+ if (is_name) {
+ VALUE cached_key;
+ if (RB_UNLIKELY(symbolize)) {
+ cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize);
+ } else {
+ cached_key = rstring_cache_fetch(&json->name_cache, string, bufferSize);
+ }
+
+ if (RB_LIKELY(cached_key)) {
+ return cached_key;
+ }
+ }
+
pe = memchr(p, '\\', bufferSize);
if (RB_LIKELY(pe == NULL)) {
return build_string(string, stringEnd, intern, symbolize);
@@ -602,7 +993,7 @@ static VALUE json_string_unescape(char *string, char *stringEnd, bool intern, bo
write data;
action parse_string {
- *result = json_string_unescape(json->memo + 1, p, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
+ *result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
if (NIL_P(*result)) {
fhold;
fbreak;
@@ -671,7 +1062,7 @@ static VALUE convert_encoding(VALUE source)
{
int encindex = RB_ENCODING_GET(source);
- if (encindex == utf8_encindex) {
+ if (RB_LIKELY(encindex == utf8_encindex)) {
return source;
}
@@ -683,6 +1074,68 @@ static VALUE convert_encoding(VALUE source)
return rb_funcall(source, i_encode, 1, Encoding_UTF_8);
}
+static int configure_parser_i(VALUE key, VALUE val, VALUE data)
+{
+ JSON_Parser *json = (JSON_Parser *)data;
+
+ if (key == sym_max_nesting) { json->max_nesting = RTEST(val) ? FIX2INT(val) : 0; }
+ else if (key == sym_allow_nan) { json->allow_nan = RTEST(val); }
+ else if (key == sym_allow_trailing_comma) { json->allow_trailing_comma = RTEST(val); }
+ else if (key == sym_symbolize_names) { json->symbolize_names = RTEST(val); }
+ else if (key == sym_freeze) { json->freeze = RTEST(val); }
+ else if (key == sym_create_id) { json->create_id = RTEST(val) ? val : Qfalse; }
+ else if (key == sym_object_class) { json->object_class = RTEST(val) ? val : Qfalse; }
+ else if (key == sym_array_class) { json->array_class = RTEST(val) ? val : Qfalse; }
+ else if (key == sym_decimal_class) { json->decimal_class = RTEST(val) ? val : Qfalse; }
+ else if (key == sym_match_string) { json->match_string = RTEST(val) ? val : Qfalse; }
+ else if (key == sym_create_additions) {
+ if (NIL_P(val)) {
+ json->create_additions = true;
+ json->deprecated_create_additions = true;
+ } else {
+ json->create_additions = RTEST(val);
+ json->deprecated_create_additions = false;
+ }
+ }
+
+ return ST_CONTINUE;
+}
+
+static void parser_init(JSON_Parser *json, VALUE source, VALUE opts)
+{
+ if (json->Vsource) {
+ rb_raise(rb_eTypeError, "already initialized instance");
+ }
+
+ json->fbuffer.initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT;
+ json->max_nesting = 100;
+
+ if (!NIL_P(opts)) {
+ Check_Type(opts, T_HASH);
+ if (RHASH_SIZE(opts) > 0) {
+ // We assume in most cases few keys are set so it's faster to go over
+ // the provided keys than to check all possible keys.
+ rb_hash_foreach(opts, configure_parser_i, (VALUE)json);
+
+ if (json->symbolize_names && json->create_additions) {
+ rb_raise(rb_eArgError,
+ "options :symbolize_names and :create_additions cannot be "
+ " used in conjunction");
+ }
+
+ if (json->create_additions && !json->create_id) {
+ json->create_id = rb_funcall(mJSON, i_create_id, 0);
+ }
+ }
+
+ }
+ source = convert_encoding(StringValue(source));
+ StringValue(source);
+ json->len = RSTRING_LEN(source);
+ json->source = RSTRING_PTR(source);
+ json->Vsource = source;
+}
+
/*
* call-seq: new(source, opts => {})
*
@@ -717,117 +1170,11 @@ static VALUE convert_encoding(VALUE source)
*/
static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
{
- VALUE source, opts;
GET_PARSER_INIT;
- if (json->Vsource) {
- rb_raise(rb_eTypeError, "already initialized instance");
- }
-
rb_check_arity(argc, 1, 2);
- source = argv[0];
- opts = Qnil;
- if (argc == 2) {
- opts = argv[1];
- Check_Type(argv[1], T_HASH);
- if (RHASH_SIZE(argv[1]) > 0) {
- opts = argv[1];
- }
- }
-
- if (!NIL_P(opts)) {
- VALUE tmp = ID2SYM(i_max_nesting);
- if (option_given_p(opts, tmp)) {
- VALUE max_nesting = rb_hash_aref(opts, tmp);
- if (RTEST(max_nesting)) {
- Check_Type(max_nesting, T_FIXNUM);
- json->max_nesting = FIX2INT(max_nesting);
- } else {
- json->max_nesting = 0;
- }
- } else {
- json->max_nesting = 100;
- }
- tmp = ID2SYM(i_allow_nan);
- if (option_given_p(opts, tmp)) {
- json->allow_nan = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
- } else {
- json->allow_nan = 0;
- }
- tmp = ID2SYM(i_symbolize_names);
- if (option_given_p(opts, tmp)) {
- json->symbolize_names = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
- } else {
- json->symbolize_names = 0;
- }
- tmp = ID2SYM(i_freeze);
- if (option_given_p(opts, tmp)) {
- json->freeze = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
- } else {
- json->freeze = 0;
- }
- tmp = ID2SYM(i_create_additions);
- if (option_given_p(opts, tmp)) {
- tmp = rb_hash_aref(opts, tmp);
- if (NIL_P(tmp)) {
- json->create_additions = 1;
- json->deprecated_create_additions = 1;
- } else {
- json->create_additions = RTEST(tmp);
- json->deprecated_create_additions = 0;
- }
- }
- if (json->symbolize_names && json->create_additions) {
- rb_raise(rb_eArgError,
- "options :symbolize_names and :create_additions cannot be "
- " used in conjunction");
- }
- tmp = ID2SYM(i_create_id);
- if (option_given_p(opts, tmp)) {
- json->create_id = rb_hash_aref(opts, tmp);
- } else {
- json->create_id = rb_funcall(mJSON, i_create_id, 0);
- }
- tmp = ID2SYM(i_object_class);
- if (option_given_p(opts, tmp)) {
- json->object_class = rb_hash_aref(opts, tmp);
- } else {
- json->object_class = Qnil;
- }
- tmp = ID2SYM(i_array_class);
- if (option_given_p(opts, tmp)) {
- json->array_class = rb_hash_aref(opts, tmp);
- } else {
- json->array_class = Qnil;
- }
- tmp = ID2SYM(i_decimal_class);
- if (option_given_p(opts, tmp)) {
- json->decimal_class = rb_hash_aref(opts, tmp);
- } else {
- json->decimal_class = Qnil;
- }
- tmp = ID2SYM(i_match_string);
- if (option_given_p(opts, tmp)) {
- VALUE match_string = rb_hash_aref(opts, tmp);
- json->match_string = RTEST(match_string) ? match_string : Qnil;
- } else {
- json->match_string = Qnil;
- }
- } else {
- json->max_nesting = 100;
- json->allow_nan = 0;
- json->create_additions = 0;
- json->create_id = Qnil;
- json->object_class = Qnil;
- json->array_class = Qnil;
- json->decimal_class = Qnil;
- }
- source = convert_encoding(StringValue(source));
- StringValue(source);
- json->len = RSTRING_LEN(source);
- json->source = RSTRING_PTR(source);
- json->Vsource = source;
+ parser_init(json, argv[0], argc == 2 ? argv[1] : Qnil);
return self;
}
@@ -862,11 +1209,26 @@ static VALUE cParser_parse(VALUE self)
VALUE result = Qnil;
GET_PARSER;
+ char stack_buffer[FBUFFER_STACK_SIZE];
+ fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE);
+
+ VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA];
+ rvalue_stack stack = {
+ .type = RVALUE_STACK_STACK_ALLOCATED,
+ .ptr = rvalue_stack_buffer,
+ .capa = RVALUE_STACK_INITIAL_CAPA,
+ };
+ json->stack = &stack;
+
%% write init;
p = json->source;
pe = p + json->len;
%% write exec;
+ if (json->stack_handle) {
+ rvalue_stack_eagerly_release(json->stack_handle);
+ }
+
if (cs >= JSON_first_final && p == pe) {
return result;
} else {
@@ -875,18 +1237,43 @@ static VALUE cParser_parse(VALUE self)
}
}
-#ifndef HAVE_RB_GC_MARK_LOCATIONS
-// For TruffleRuby
-void rb_gc_mark_locations(const VALUE *start, const VALUE *end)
+static VALUE cParser_m_parse(VALUE klass, VALUE source, VALUE opts)
{
- VALUE *value = start;
+ char *p, *pe;
+ int cs = EVIL;
+ VALUE result = Qnil;
- while (value < end) {
- rb_gc_mark(*value);
- value++;
+ JSON_Parser _parser = {0};
+ JSON_Parser *json = &_parser;
+ parser_init(json, source, opts);
+
+ char stack_buffer[FBUFFER_STACK_SIZE];
+ fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE);
+
+ VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA];
+ rvalue_stack stack = {
+ .type = RVALUE_STACK_STACK_ALLOCATED,
+ .ptr = rvalue_stack_buffer,
+ .capa = RVALUE_STACK_INITIAL_CAPA,
+ };
+ json->stack = &stack;
+
+ %% write init;
+ p = json->source;
+ pe = p + json->len;
+ %% write exec;
+
+ if (json->stack_handle) {
+ rvalue_stack_eagerly_release(json->stack_handle);
+ }
+
+ if (cs >= JSON_first_final && p == pe) {
+ return result;
+ } else {
+ raise_parse_error("unexpected token at '%s'", p);
+ return Qnil;
}
}
-#endif
static void JSON_mark(void *ptr)
{
@@ -897,6 +1284,8 @@ static void JSON_mark(void *ptr)
rb_gc_mark(json->array_class);
rb_gc_mark(json->decimal_class);
rb_gc_mark(json->match_string);
+ rb_gc_mark(json->stack_handle);
+
const VALUE *name_cache_entries = &json->name_cache.entries[0];
rb_gc_mark_locations(name_cache_entries, name_cache_entries + json->name_cache.length);
}
@@ -959,6 +1348,8 @@ void Init_parser(void)
rb_define_method(cParser, "parse", cParser_parse, 0);
rb_define_method(cParser, "source", cParser_source, 0);
+ rb_define_singleton_method(cParser, "parse", cParser_m_parse, 2);
+
CNaN = rb_const_get(mJSON, rb_intern("NaN"));
rb_gc_register_mark_object(CNaN);
@@ -971,31 +1362,35 @@ void Init_parser(void)
rb_global_variable(&Encoding_UTF_8);
Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8"));
+ sym_max_nesting = ID2SYM(rb_intern("max_nesting"));
+ sym_allow_nan = ID2SYM(rb_intern("allow_nan"));
+ sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma"));
+ sym_symbolize_names = ID2SYM(rb_intern("symbolize_names"));
+ sym_freeze = ID2SYM(rb_intern("freeze"));
+ sym_create_additions = ID2SYM(rb_intern("create_additions"));
+ sym_create_id = ID2SYM(rb_intern("create_id"));
+ sym_object_class = ID2SYM(rb_intern("object_class"));
+ sym_array_class = ID2SYM(rb_intern("array_class"));
+ sym_decimal_class = ID2SYM(rb_intern("decimal_class"));
+ sym_match_string = ID2SYM(rb_intern("match_string"));
+
+ i_create_id = rb_intern("create_id");
i_json_creatable_p = rb_intern("json_creatable?");
i_json_create = rb_intern("json_create");
- i_create_id = rb_intern("create_id");
- i_create_additions = rb_intern("create_additions");
i_chr = rb_intern("chr");
- i_max_nesting = rb_intern("max_nesting");
- i_allow_nan = rb_intern("allow_nan");
- i_symbolize_names = rb_intern("symbolize_names");
- i_object_class = rb_intern("object_class");
- i_array_class = rb_intern("array_class");
- i_decimal_class = rb_intern("decimal_class");
i_match = rb_intern("match");
- i_match_string = rb_intern("match_string");
i_deep_const_get = rb_intern("deep_const_get");
i_aset = rb_intern("[]=");
i_aref = rb_intern("[]");
i_leftshift = rb_intern("<<");
i_new = rb_intern("new");
i_try_convert = rb_intern("try_convert");
- i_freeze = rb_intern("freeze");
i_uminus = rb_intern("-@");
i_encode = rb_intern("encode");
binary_encindex = rb_ascii8bit_encindex();
utf8_encindex = rb_utf8_encindex();
+ enc_utf8 = rb_utf8_encoding();
}
/*