summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJean Boussier <[email protected]>2025-01-15 12:54:25 +0100
committerHiroshi SHIBATA <[email protected]>2025-01-20 16:09:00 +0900
commitef585744c0db927efa8fbb072912458c2200bb5e (patch)
tree79d0608e14f3882a822bef29dea88f9ab47d94ec
parent1c8fc25b889b6f6b61d26669d4f7559a8b9884d6 (diff)
Finalize Kevin's handrolled parser.
And get rid of the Ragel parser. This is 7% faster on activitypub, 15% after on twitter and 11% faster on citm_catalog. There might be some more optimization opportunities, I did a quick optimization pass to fix a regression in string parsing, but other than that I haven't dug much in performance.
Notes
Notes: Merged: https://github.com/ruby/ruby/pull/12598
-rw-r--r--ext/json/parser/parser.c2791
-rw-r--r--ext/json/parser/parser.rl1434
-rw-r--r--test/json/json_parser_test.rb7
3 files changed, 589 insertions, 3643 deletions
diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c
index 2906cfd1e8..366ba4e7b5 100644
--- a/ext/json/parser/parser.c
+++ b/ext/json/parser/parser.c
@@ -1,5 +1,3 @@
-/* This file is automatically generated from parser.rl by using ragel */
-#line 1 "parser.rl"
#include "ruby.h"
#include "../fbuffer/fbuffer.h"
@@ -30,7 +28,8 @@ static const char deprecated_create_additions_warning[] =
#ifndef HAVE_RB_HASH_BULK_INSERT
// For TruffleRuby
-void rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
+void
+rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
{
long index = 0;
while (index < count) {
@@ -42,6 +41,11 @@ void rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
}
#endif
+#ifndef HAVE_RB_HASH_NEW_CAPA
+#define rb_hash_new_capa(n) rb_hash_new()
+#endif
+
+
/* name cache */
#include <string.h>
@@ -231,13 +235,14 @@ static rvalue_stack *rvalue_stack_grow(rvalue_stack *stack, VALUE *handle, rvalu
return stack;
}
-static void rvalue_stack_push(rvalue_stack *stack, VALUE value, VALUE *handle, rvalue_stack **stack_ref)
+static VALUE rvalue_stack_push(rvalue_stack *stack, VALUE value, VALUE *handle, rvalue_stack **stack_ref)
{
if (RB_UNLIKELY(stack->head >= stack->capa)) {
stack = rvalue_stack_grow(stack, handle, stack_ref);
}
stack->ptr[stack->head] = value;
stack->head++;
+ return value;
}
static inline VALUE *rvalue_stack_peek(rvalue_stack *stack, long count)
@@ -301,10 +306,12 @@ static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle,
static void rvalue_stack_eagerly_release(VALUE handle)
{
- rvalue_stack *stack;
- TypedData_Get_Struct(handle, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack);
- RTYPEDDATA_DATA(handle) = NULL;
- rvalue_stack_free(stack);
+ if (handle) {
+ rvalue_stack *stack;
+ TypedData_Get_Struct(handle, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack);
+ RTYPEDDATA_DATA(handle) = NULL;
+ rvalue_stack_free(stack);
+ }
}
/* unicode */
@@ -387,34 +394,25 @@ typedef struct JSON_ParserStruct {
bool freeze;
bool create_additions;
bool deprecated_create_additions;
-} JSON_Parser;
+} JSON_ParserConfig;
typedef struct JSON_ParserStateStruct {
- JSON_Parser *json;
- VALUE Vsource;
+ JSON_ParserConfig *config;
VALUE stack_handle;
- char *source;
- long len;
- char *memo;
+ const char *cursor;
+ const char *end;
FBuffer fbuffer;
rvalue_stack *stack;
rvalue_cache name_cache;
int in_array;
+ int current_nesting;
} JSON_ParserState;
-#define GET_PARSER \
- JSON_Parser *json; \
- TypedData_Get_Struct(self, JSON_Parser, &JSON_Parser_type, json)
-
-#define MinusInfinity "-Infinity"
-#define EVIL 0x666
+#define GET_PARSER_CONFIG \
+ JSON_ParserConfig *config; \
+ TypedData_Get_Struct(self, JSON_ParserConfig, &JSON_ParserConfig_type, config)
-static const rb_data_type_t JSON_Parser_type;
-static char *JSON_parse_string(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result);
-static char *JSON_parse_object(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting);
-static char *JSON_parse_value(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting);
-static char *JSON_parse_number(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result);
-static char *JSON_parse_array(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting);
+static const rb_data_type_t JSON_ParserConfig_type;
#ifndef HAVE_STRNLEN
static size_t strnlen(const char *s, size_t maxlen)
@@ -432,7 +430,7 @@ static void raise_parse_error(const char *format, const char *start)
{
char buffer[PARSE_ERROR_FRAGMENT_LEN + 1];
- size_t len = strnlen(start, PARSE_ERROR_FRAGMENT_LEN);
+ size_t len = start ? strnlen(start, PARSE_ERROR_FRAGMENT_LEN) : 0;
const char *ptr = start;
if (len == PARSE_ERROR_FRAGMENT_LEN) {
@@ -444,1798 +442,61 @@ static void raise_parse_error(const char *format, const char *start)
rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr);
}
+static const bool whitespace[256] = {
+ [' '] = 1,
+ ['\t'] = 1,
+ ['\n'] = 1,
+ ['\r'] = 1,
+ ['/'] = 1,
+};
-
-#line 472 "parser.rl"
-
-
-
-#line 454 "parser.c"
-enum {JSON_object_start = 1};
-enum {JSON_object_first_final = 32};
-enum {JSON_object_error = 0};
-
-enum {JSON_object_en_main = 1};
-
-
-#line 512 "parser.rl"
-
-
-#define PUSH(result) rvalue_stack_push(state->stack, result, &state->stack_handle, &state->stack)
-
-static char *JSON_parse_object(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
+static void
+json_eat_comments(JSON_ParserState *state)
{
- int cs = EVIL;
-
- if (json->max_nesting && current_nesting > json->max_nesting) {
- rb_raise(eNestingError, "nesting of %d is too deep", current_nesting);
- }
-
- long stack_head = state->stack->head;
-
-
-#line 478 "parser.c"
- {
- cs = JSON_object_start;
- }
-
-#line 527 "parser.rl"
-
-#line 485 "parser.c"
- {
- short _widec;
- if ( p == pe )
- goto _test_eof;
- switch ( cs )
- {
-case 1:
- if ( (*p) == 123 )
- goto st2;
- goto st0;
-st0:
-cs = 0;
- goto _out;
-st2:
- if ( ++p == pe )
- goto _test_eof2;
-case 2:
- switch( (*p) ) {
- case 13: goto st2;
- case 32: goto st2;
- case 34: goto tr2;
- case 47: goto st28;
- case 125: goto tr4;
- }
- if ( 9 <= (*p) && (*p) <= 10 )
- goto st2;
- goto st0;
-tr2:
-#line 491 "parser.rl"
- {
- char *np;
- json->parsing_name = true;
- np = JSON_parse_string(state, json, p, pe, result);
- json->parsing_name = false;
- if (np == NULL) { p--; {p++; cs = 3; goto _out;} } else {
- PUSH(*result);
- {p = (( np))-1;}
- }
- }
- goto st3;
-st3:
- if ( ++p == pe )
- goto _test_eof3;
-case 3:
-#line 530 "parser.c"
- switch( (*p) ) {
- case 13: goto st3;
- case 32: goto st3;
- case 47: goto st4;
- case 58: goto st8;
- }
- if ( 9 <= (*p) && (*p) <= 10 )
- goto st3;
- goto st0;
-st4:
- if ( ++p == pe )
- goto _test_eof4;
-case 4:
- switch( (*p) ) {
- case 42: goto st5;
- case 47: goto st7;
- }
- goto st0;
-st5:
- if ( ++p == pe )
- goto _test_eof5;
-case 5:
- if ( (*p) == 42 )
- goto st6;
- goto st5;
-st6:
- if ( ++p == pe )
- goto _test_eof6;
-case 6:
- switch( (*p) ) {
- case 42: goto st6;
- case 47: goto st3;
- }
- goto st5;
-st7:
- if ( ++p == pe )
- goto _test_eof7;
-case 7:
- if ( (*p) == 10 )
- goto st3;
- goto st7;
-st8:
- if ( ++p == pe )
- goto _test_eof8;
-case 8:
- switch( (*p) ) {
- case 13: goto st8;
- case 32: goto st8;
- case 34: goto tr11;
- case 45: goto tr11;
- case 47: goto st24;
- case 73: goto tr11;
- case 78: goto tr11;
- case 91: goto tr11;
- case 102: goto tr11;
- case 110: goto tr11;
- case 116: goto tr11;
- case 123: goto tr11;
- }
- if ( (*p) > 10 ) {
- if ( 48 <= (*p) && (*p) <= 57 )
- goto tr11;
- } else if ( (*p) >= 9 )
- goto st8;
- goto st0;
-tr11:
-#line 480 "parser.rl"
- {
- char *np = JSON_parse_value(state, json, p, pe, result, current_nesting);
- if (np == NULL) {
- p--; {p++; cs = 9; goto _out;}
- } else {
- {p = (( np))-1;}
- }
- }
- goto st9;
-st9:
- if ( ++p == pe )
- goto _test_eof9;
-case 9:
-#line 611 "parser.c"
- _widec = (*p);
- if ( (*p) < 13 ) {
- if ( (*p) > 9 ) {
- if ( 10 <= (*p) && (*p) <= 10 ) {
- _widec = (short)(128 + ((*p) - -128));
- if (
-#line 489 "parser.rl"
- json->allow_trailing_comma ) _widec += 256;
- }
- } else if ( (*p) >= 9 ) {
- _widec = (short)(128 + ((*p) - -128));
- if (
-#line 489 "parser.rl"
- json->allow_trailing_comma ) _widec += 256;
- }
- } else if ( (*p) > 13 ) {
- if ( (*p) < 44 ) {
- if ( 32 <= (*p) && (*p) <= 32 ) {
- _widec = (short)(128 + ((*p) - -128));
- if (
-#line 489 "parser.rl"
- json->allow_trailing_comma ) _widec += 256;
- }
- } else if ( (*p) > 44 ) {
- if ( 47 <= (*p) && (*p) <= 47 ) {
- _widec = (short)(128 + ((*p) - -128));
- if (
-#line 489 "parser.rl"
- json->allow_trailing_comma ) _widec += 256;
- }
- } else {
- _widec = (short)(128 + ((*p) - -128));
- if (
-#line 489 "parser.rl"
- json->allow_trailing_comma ) _widec += 256;
- }
- } else {
- _widec = (short)(128 + ((*p) - -128));
- if (
-#line 489 "parser.rl"
- json->allow_trailing_comma ) _widec += 256;
- }
- switch( _widec ) {
- case 125: goto tr4;
- case 269: goto st10;
- case 288: goto st10;
- case 300: goto st11;
- case 303: goto st16;
- case 525: goto st9;
- case 544: goto st9;
- case 556: goto st2;
- case 559: goto st20;
- }
- if ( _widec > 266 ) {
- if ( 521 <= _widec && _widec <= 522 )
- goto st9;
- } else if ( _widec >= 265 )
- goto st10;
- goto st0;
-tr4:
-#line 502 "parser.rl"
- { p--; {p++; cs = 32; goto _out;} }
- goto st32;
-st32:
- if ( ++p == pe )
- goto _test_eof32;
-case 32:
-#line 679 "parser.c"
- goto st0;
-st10:
- if ( ++p == pe )
- goto _test_eof10;
-case 10:
- switch( (*p) ) {
- case 13: goto st10;
- case 32: goto st10;
- case 44: goto st11;
- case 47: goto st16;
- case 125: goto tr4;
- }
- if ( 9 <= (*p) && (*p) <= 10 )
- goto st10;
- goto st0;
-st11:
- if ( ++p == pe )
- goto _test_eof11;
-case 11:
- switch( (*p) ) {
- case 13: goto st11;
- case 32: goto st11;
- case 34: goto tr2;
- case 47: goto st12;
- }
- if ( 9 <= (*p) && (*p) <= 10 )
- goto st11;
- goto st0;
-st12:
- if ( ++p == pe )
- goto _test_eof12;
-case 12:
- switch( (*p) ) {
- case 42: goto st13;
- case 47: goto st15;
- }
- goto st0;
-st13:
- if ( ++p == pe )
- goto _test_eof13;
-case 13:
- if ( (*p) == 42 )
- goto st14;
- goto st13;
-st14:
- if ( ++p == pe )
- goto _test_eof14;
-case 14:
- switch( (*p) ) {
- case 42: goto st14;
- case 47: goto st11;
- }
- goto st13;
-st15:
- if ( ++p == pe )
- goto _test_eof15;
-case 15:
- if ( (*p) == 10 )
- goto st11;
- goto st15;
-st16:
- if ( ++p == pe )
- goto _test_eof16;
-case 16:
- switch( (*p) ) {
- case 42: goto st17;
- case 47: goto st19;
- }
- goto st0;
-st17:
- if ( ++p == pe )
- goto _test_eof17;
-case 17:
- if ( (*p) == 42 )
- goto st18;
- goto st17;
-st18:
- if ( ++p == pe )
- goto _test_eof18;
-case 18:
- switch( (*p) ) {
- case 42: goto st18;
- case 47: goto st10;
- }
- goto st17;
-st19:
- if ( ++p == pe )
- goto _test_eof19;
-case 19:
- if ( (*p) == 10 )
- goto st10;
- goto st19;
-st20:
- if ( ++p == pe )
- goto _test_eof20;
-case 20:
- _widec = (*p);
- if ( (*p) > 42 ) {
- if ( 47 <= (*p) && (*p) <= 47 ) {
- _widec = (short)(128 + ((*p) - -128));
- if (
-#line 489 "parser.rl"
- json->allow_trailing_comma ) _widec += 256;
- }
- } else if ( (*p) >= 42 ) {
- _widec = (short)(128 + ((*p) - -128));
- if (
-#line 489 "parser.rl"
- json->allow_trailing_comma ) _widec += 256;
- }
- switch( _widec ) {
- case 298: goto st17;
- case 303: goto st19;
- case 554: goto st21;
- case 559: goto st23;
- }
- goto st0;
-st21:
- if ( ++p == pe )
- goto _test_eof21;
-case 21:
- _widec = (*p);
- if ( (*p) < 42 ) {
- if ( (*p) <= 41 ) {
- _widec = (short)(128 + ((*p) - -128));
- if (
-#line 489 "parser.rl"
- json->allow_trailing_comma ) _widec += 256;
- }
- } else if ( (*p) > 42 ) {
- if ( 43 <= (*p) )
- { _widec = (short)(128 + ((*p) - -128));
- if (
-#line 489 "parser.rl"
- json->allow_trailing_comma ) _widec += 256;
- }
- } else {
- _widec = (short)(128 + ((*p) - -128));
- if (
-#line 489 "parser.rl"
- json->allow_trailing_comma ) _widec += 256;
- }
- switch( _widec ) {
- case 298: goto st18;
- case 554: goto st22;
- }
- if ( _widec > 383 ) {
- if ( 384 <= _widec && _widec <= 639 )
- goto st21;
- } else if ( _widec >= 128 )
- goto st17;
- goto st0;
-st22:
- if ( ++p == pe )
- goto _test_eof22;
-case 22:
- _widec = (*p);
- if ( (*p) < 43 ) {
- if ( (*p) > 41 ) {
- if ( 42 <= (*p) && (*p) <= 42 ) {
- _widec = (short)(128 + ((*p) - -128));
- if (
-#line 489 "parser.rl"
- json->allow_trailing_comma ) _widec += 256;
- }
- } else {
- _widec = (short)(128 + ((*p) - -128));
- if (
-#line 489 "parser.rl"
- json->allow_trailing_comma ) _widec += 256;
- }
- } else if ( (*p) > 46 ) {
- if ( (*p) > 47 ) {
- if ( 48 <= (*p) )
- { _widec = (short)(128 + ((*p) - -128));
- if (
-#line 489 "parser.rl"
- json->allow_trailing_comma ) _widec += 256;
- }
- } else if ( (*p) >= 47 ) {
- _widec = (short)(128 + ((*p) - -128));
- if (
-#line 489 "parser.rl"
- json->allow_trailing_comma ) _widec += 256;
- }
- } else {
- _widec = (short)(128 + ((*p) - -128));
- if (
-#line 489 "parser.rl"
- json->allow_trailing_comma ) _widec += 256;
- }
- switch( _widec ) {
- case 298: goto st18;
- case 303: goto st10;
- case 554: goto st22;
- case 559: goto st9;
- }
- if ( _widec > 383 ) {
- if ( 384 <= _widec && _widec <= 639 )
- goto st21;
- } else if ( _widec >= 128 )
- goto st17;
- goto st0;
-st23:
- if ( ++p == pe )
- goto _test_eof23;
-case 23:
- _widec = (*p);
- if ( (*p) < 10 ) {
- if ( (*p) <= 9 ) {
- _widec = (short)(128 + ((*p) - -128));
- if (
-#line 489 "parser.rl"
- json->allow_trailing_comma ) _widec += 256;
- }
- } else if ( (*p) > 10 ) {
- if ( 11 <= (*p) )
- { _widec = (short)(128 + ((*p) - -128));
- if (
-#line 489 "parser.rl"
- json->allow_trailing_comma ) _widec += 256;
- }
- } else {
- _widec = (short)(128 + ((*p) - -128));
- if (
-#line 489 "parser.rl"
- json->allow_trailing_comma ) _widec += 256;
- }
- switch( _widec ) {
- case 266: goto st10;
- case 522: goto st9;
- }
- if ( _widec > 383 ) {
- if ( 384 <= _widec && _widec <= 639 )
- goto st23;
- } else if ( _widec >= 128 )
- goto st19;
- goto st0;
-st24:
- if ( ++p == pe )
- goto _test_eof24;
-case 24:
- switch( (*p) ) {
- case 42: goto st25;
- case 47: goto st27;
- }
- goto st0;
-st25:
- if ( ++p == pe )
- goto _test_eof25;
-case 25:
- if ( (*p) == 42 )
- goto st26;
- goto st25;
-st26:
- if ( ++p == pe )
- goto _test_eof26;
-case 26:
- switch( (*p) ) {
- case 42: goto st26;
- case 47: goto st8;
- }
- goto st25;
-st27:
- if ( ++p == pe )
- goto _test_eof27;
-case 27:
- if ( (*p) == 10 )
- goto st8;
- goto st27;
-st28:
- if ( ++p == pe )
- goto _test_eof28;
-case 28:
- switch( (*p) ) {
- case 42: goto st29;
- case 47: goto st31;
- }
- goto st0;
-st29:
- if ( ++p == pe )
- goto _test_eof29;
-case 29:
- if ( (*p) == 42 )
- goto st30;
- goto st29;
-st30:
- if ( ++p == pe )
- goto _test_eof30;
-case 30:
- switch( (*p) ) {
- case 42: goto st30;
- case 47: goto st2;
- }
- goto st29;
-st31:
- if ( ++p == pe )
- goto _test_eof31;
-case 31:
- if ( (*p) == 10 )
- goto st2;
- goto st31;
- }
- _test_eof2: cs = 2; goto _test_eof;
- _test_eof3: cs = 3; goto _test_eof;
- _test_eof4: cs = 4; goto _test_eof;
- _test_eof5: cs = 5; goto _test_eof;
- _test_eof6: cs = 6; goto _test_eof;
- _test_eof7: cs = 7; goto _test_eof;
- _test_eof8: cs = 8; goto _test_eof;
- _test_eof9: cs = 9; goto _test_eof;
- _test_eof32: cs = 32; goto _test_eof;
- _test_eof10: cs = 10; goto _test_eof;
- _test_eof11: cs = 11; goto _test_eof;
- _test_eof12: cs = 12; goto _test_eof;
- _test_eof13: cs = 13; goto _test_eof;
- _test_eof14: cs = 14; goto _test_eof;
- _test_eof15: cs = 15; goto _test_eof;
- _test_eof16: cs = 16; goto _test_eof;
- _test_eof17: cs = 17; goto _test_eof;
- _test_eof18: cs = 18; goto _test_eof;
- _test_eof19: cs = 19; goto _test_eof;
- _test_eof20: cs = 20; goto _test_eof;
- _test_eof21: cs = 21; goto _test_eof;
- _test_eof22: cs = 22; goto _test_eof;
- _test_eof23: cs = 23; goto _test_eof;
- _test_eof24: cs = 24; goto _test_eof;
- _test_eof25: cs = 25; goto _test_eof;
- _test_eof26: cs = 26; goto _test_eof;
- _test_eof27: cs = 27; goto _test_eof;
- _test_eof28: cs = 28; goto _test_eof;
- _test_eof29: cs = 29; goto _test_eof;
- _test_eof30: cs = 30; goto _test_eof;
- _test_eof31: cs = 31; goto _test_eof;
-
- _test_eof: {}
- _out: {}
- }
-
-#line 528 "parser.rl"
-
- if (cs >= JSON_object_first_final) {
- long count = state->stack->head - stack_head;
-
- if (RB_UNLIKELY(json->object_class)) {
- VALUE object = rb_class_new_instance(0, 0, json->object_class);
- long index = 0;
- VALUE *items = rvalue_stack_peek(state->stack, count);
- while (index < count) {
- VALUE name = items[index++];
- VALUE value = items[index++];
- rb_funcall(object, i_aset, 2, name, value);
- }
- *result = object;
- } else {
- VALUE hash;
-#ifdef HAVE_RB_HASH_NEW_CAPA
- hash = rb_hash_new_capa(count >> 1);
-#else
- hash = rb_hash_new();
-#endif
- rb_hash_bulk_insert(count, rvalue_stack_peek(state->stack, count), hash);
- *result = hash;
- }
- rvalue_stack_pop(state->stack, count);
-
- if (RB_UNLIKELY(json->create_additions)) {
- VALUE klassname;
- if (json->object_class) {
- klassname = rb_funcall(*result, i_aref, 1, json->create_id);
- } else {
- klassname = rb_hash_aref(*result, json->create_id);
- }
- if (!NIL_P(klassname)) {
- VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname);
- if (RTEST(rb_funcall(klass, i_json_creatable_p, 0))) {
- if (json->deprecated_create_additions) {
- json_deprecated(deprecated_create_additions_warning);
- }
- *result = rb_funcall(klass, i_json_create, 1, *result);
+ if (state->cursor + 1 < state->end) {
+ switch(state->cursor[1]) {
+ case '/': {
+ state->cursor = memchr(state->cursor, '\n', state->end - state->cursor);
+ if (!state->cursor) {
+ state->cursor = state->end;
+ } else {
+ state->cursor++;
}
+ break;
}
- }
- return p + 1;
- } else {
- return NULL;
- }
-}
-
-
-#line 1070 "parser.c"
-enum {JSON_value_start = 1};
-enum {JSON_value_first_final = 29};
-enum {JSON_value_error = 0};
-
-enum {JSON_value_en_main = 1};
-
-
-#line 661 "parser.rl"
-
-
-static char *JSON_parse_value(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
-{
- int cs = EVIL;
-
-
-#line 1086 "parser.c"
- {
- cs = JSON_value_start;
- }
-
-#line 668 "parser.rl"
-
-#line 1093 "parser.c"
- {
- if ( p == pe )
- goto _test_eof;
- switch ( cs )
- {
-st1:
- if ( ++p == pe )
- goto _test_eof1;
-case 1:
- switch( (*p) ) {
- case 13: goto st1;
- case 32: goto st1;
- case 34: goto tr2;
- case 45: goto tr3;
- case 47: goto st6;
- case 73: goto st10;
- case 78: goto st17;
- case 91: goto tr7;
- case 102: goto st19;
- case 110: goto st23;
- case 116: goto st26;
- case 123: goto tr11;
- }
- if ( (*p) > 10 ) {
- if ( 48 <= (*p) && (*p) <= 57 )
- goto tr3;
- } else if ( (*p) >= 9 )
- goto st1;
- goto st0;
-st0:
-cs = 0;
- goto _out;
-tr2:
-#line 606 "parser.rl"
- {
- char *np = JSON_parse_string(state, json, p, pe, result);
- if (np == NULL) {
- p--;
- {p++; cs = 29; goto _out;}
- } else {
- {p = (( np))-1;}
- }
- }
- goto st29;
-tr3:
-#line 616 "parser.rl"
- {
- char *np;
- if(pe > p + 8 && !strncmp(MinusInfinity, p, 9)) {
- if (json->allow_nan) {
- *result = CMinusInfinity;
- {p = (( p + 10))-1;}
- p--; {p++; cs = 29; goto _out;}
- } else {
- raise_parse_error("unexpected token at '%s'", p);
- }
- }
- np = JSON_parse_number(state, json, p, pe, result);
- if (np != NULL) {
- {p = (( np))-1;}
- }
- p--; {p++; cs = 29; goto _out;}
- }
- goto st29;
-tr7:
-#line 634 "parser.rl"
- {
- char *np;
- state->in_array++;
- np = JSON_parse_array(state, json, p, pe, result, current_nesting + 1);
- state->in_array--;
- if (np == NULL) { p--; {p++; cs = 29; goto _out;} } else {p = (( np))-1;}
- }
- goto st29;
-tr11:
-#line 642 "parser.rl"
- {
- char *np;
- np = JSON_parse_object(state, json, p, pe, result, current_nesting + 1);
- if (np == NULL) { p--; {p++; cs = 29; goto _out;} } else {p = (( np))-1;}
- }
- goto st29;
-tr25:
-#line 599 "parser.rl"
- {
- if (json->allow_nan) {
- *result = CInfinity;
- } else {
- raise_parse_error("unexpected token at '%s'", p - 7);
- }
- }
- goto st29;
-tr27:
-#line 592 "parser.rl"
- {
- if (json->allow_nan) {
- *result = CNaN;
- } else {
- raise_parse_error("unexpected token at '%s'", p - 2);
- }
- }
- goto st29;
-tr31:
-#line 586 "parser.rl"
- {
- *result = Qfalse;
- }
- goto st29;
-tr34:
-#line 583 "parser.rl"
- {
- *result = Qnil;
- }
- goto st29;
-tr37:
-#line 589 "parser.rl"
- {
- *result = Qtrue;
- }
- goto st29;
-st29:
- if ( ++p == pe )
- goto _test_eof29;
-case 29:
-#line 648 "parser.rl"
- { p--; {p++; cs = 29; goto _out;} }
-#line 1220 "parser.c"
- switch( (*p) ) {
- case 13: goto st29;
- case 32: goto st29;
- case 47: goto st2;
- }
- if ( 9 <= (*p) && (*p) <= 10 )
- goto st29;
- goto st0;
-st2:
- if ( ++p == pe )
- goto _test_eof2;
-case 2:
- switch( (*p) ) {
- case 42: goto st3;
- case 47: goto st5;
- }
- goto st0;
-st3:
- if ( ++p == pe )
- goto _test_eof3;
-case 3:
- if ( (*p) == 42 )
- goto st4;
- goto st3;
-st4:
- if ( ++p == pe )
- goto _test_eof4;
-case 4:
- switch( (*p) ) {
- case 42: goto st4;
- case 47: goto st29;
- }
- goto st3;
-st5:
- if ( ++p == pe )
- goto _test_eof5;
-case 5:
- if ( (*p) == 10 )
- goto st29;
- goto st5;
-st6:
- if ( ++p == pe )
- goto _test_eof6;
-case 6:
- switch( (*p) ) {
- case 42: goto st7;
- case 47: goto st9;
- }
- goto st0;
-st7:
- if ( ++p == pe )
- goto _test_eof7;
-case 7:
- if ( (*p) == 42 )
- goto st8;
- goto st7;
-st8:
- if ( ++p == pe )
- goto _test_eof8;
-case 8:
- switch( (*p) ) {
- case 42: goto st8;
- case 47: goto st1;
- }
- goto st7;
-st9:
- if ( ++p == pe )
- goto _test_eof9;
-case 9:
- if ( (*p) == 10 )
- goto st1;
- goto st9;
-st10:
- if ( ++p == pe )
- goto _test_eof10;
-case 10:
- if ( (*p) == 110 )
- goto st11;
- goto st0;
-st11:
- if ( ++p == pe )
- goto _test_eof11;
-case 11:
- if ( (*p) == 102 )
- goto st12;
- goto st0;
-st12:
- if ( ++p == pe )
- goto _test_eof12;
-case 12:
- if ( (*p) == 105 )
- goto st13;
- goto st0;
-st13:
- if ( ++p == pe )
- goto _test_eof13;
-case 13:
- if ( (*p) == 110 )
- goto st14;
- goto st0;
-st14:
- if ( ++p == pe )
- goto _test_eof14;
-case 14:
- if ( (*p) == 105 )
- goto st15;
- goto st0;
-st15:
- if ( ++p == pe )
- goto _test_eof15;
-case 15:
- if ( (*p) == 116 )
- goto st16;
- goto st0;
-st16:
- if ( ++p == pe )
- goto _test_eof16;
-case 16:
- if ( (*p) == 121 )
- goto tr25;
- goto st0;
-st17:
- if ( ++p == pe )
- goto _test_eof17;
-case 17:
- if ( (*p) == 97 )
- goto st18;
- goto st0;
-st18:
- if ( ++p == pe )
- goto _test_eof18;
-case 18:
- if ( (*p) == 78 )
- goto tr27;
- goto st0;
-st19:
- if ( ++p == pe )
- goto _test_eof19;
-case 19:
- if ( (*p) == 97 )
- goto st20;
- goto st0;
-st20:
- if ( ++p == pe )
- goto _test_eof20;
-case 20:
- if ( (*p) == 108 )
- goto st21;
- goto st0;
-st21:
- if ( ++p == pe )
- goto _test_eof21;
-case 21:
- if ( (*p) == 115 )
- goto st22;
- goto st0;
-st22:
- if ( ++p == pe )
- goto _test_eof22;
-case 22:
- if ( (*p) == 101 )
- goto tr31;
- goto st0;
-st23:
- if ( ++p == pe )
- goto _test_eof23;
-case 23:
- if ( (*p) == 117 )
- goto st24;
- goto st0;
-st24:
- if ( ++p == pe )
- goto _test_eof24;
-case 24:
- if ( (*p) == 108 )
- goto st25;
- goto st0;
-st25:
- if ( ++p == pe )
- goto _test_eof25;
-case 25:
- if ( (*p) == 108 )
- goto tr34;
- goto st0;
-st26:
- if ( ++p == pe )
- goto _test_eof26;
-case 26:
- if ( (*p) == 114 )
- goto st27;
- goto st0;
-st27:
- if ( ++p == pe )
- goto _test_eof27;
-case 27:
- if ( (*p) == 117 )
- goto st28;
- goto st0;
-st28:
- if ( ++p == pe )
- goto _test_eof28;
-case 28:
- if ( (*p) == 101 )
- goto tr37;
- goto st0;
- }
- _test_eof1: cs = 1; goto _test_eof;
- _test_eof29: cs = 29; goto _test_eof;
- _test_eof2: cs = 2; goto _test_eof;
- _test_eof3: cs = 3; goto _test_eof;
- _test_eof4: cs = 4; goto _test_eof;
- _test_eof5: cs = 5; goto _test_eof;
- _test_eof6: cs = 6; goto _test_eof;
- _test_eof7: cs = 7; goto _test_eof;
- _test_eof8: cs = 8; goto _test_eof;
- _test_eof9: cs = 9; goto _test_eof;
- _test_eof10: cs = 10; goto _test_eof;
- _test_eof11: cs = 11; goto _test_eof;
- _test_eof12: cs = 12; goto _test_eof;
- _test_eof13: cs = 13; goto _test_eof;
- _test_eof14: cs = 14; goto _test_eof;
- _test_eof15: cs = 15; goto _test_eof;
- _test_eof16: cs = 16; goto _test_eof;
- _test_eof17: cs = 17; goto _test_eof;
- _test_eof18: cs = 18; goto _test_eof;
- _test_eof19: cs = 19; goto _test_eof;
- _test_eof20: cs = 20; goto _test_eof;
- _test_eof21: cs = 21; goto _test_eof;
- _test_eof22: cs = 22; goto _test_eof;
- _test_eof23: cs = 23; goto _test_eof;
- _test_eof24: cs = 24; goto _test_eof;
- _test_eof25: cs = 25; goto _test_eof;
- _test_eof26: cs = 26; goto _test_eof;
- _test_eof27: cs = 27; goto _test_eof;
- _test_eof28: cs = 28; goto _test_eof;
-
- _test_eof: {}
- _out: {}
- }
-
-#line 669 "parser.rl"
-
- if (json->freeze) {
- OBJ_FREEZE(*result);
- }
-
- if (cs >= JSON_value_first_final) {
- PUSH(*result);
- return p;
- } else {
- return NULL;
- }
-}
-
-
-#line 1476 "parser.c"
-enum {JSON_integer_start = 1};
-enum {JSON_integer_first_final = 3};
-enum {JSON_integer_error = 0};
-
-enum {JSON_integer_en_main = 1};
-
-
-#line 690 "parser.rl"
-
-
-#define MAX_FAST_INTEGER_SIZE 18
-static inline VALUE fast_parse_integer(char *p, char *pe)
-{
- bool negative = false;
- if (*p == '-') {
- negative = true;
- p++;
- }
-
- long long memo = 0;
- while (p < pe) {
- memo *= 10;
- memo += *p - '0';
- p++;
- }
-
- if (negative) {
- memo = -memo;
- }
- return LL2NUM(memo);
-}
-
-static char *JSON_decode_integer(JSON_ParserState *state, JSON_Parser *json, char *p, VALUE *result)
-{
- long len = p - state->memo;
- if (RB_LIKELY(len < MAX_FAST_INTEGER_SIZE)) {
- *result = fast_parse_integer(state->memo, p);
- } else {
- fbuffer_clear(&state->fbuffer);
- fbuffer_append(&state->fbuffer, state->memo, len);
- fbuffer_append_char(&state->fbuffer, '\0');
- *result = rb_cstr2inum(FBUFFER_PTR(&state->fbuffer), 10);
- }
- return p + 1;
-}
-
-
-#line 1524 "parser.c"
-enum {JSON_float_start = 1};
-enum {JSON_float_first_final = 6};
-enum {JSON_float_error = 0};
-
-enum {JSON_float_en_main = 1};
-
-
-#line 742 "parser.rl"
-
-
-static char *JSON_parse_number(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result)
-{
- int cs = EVIL;
- bool is_float = false;
-
-
-#line 1541 "parser.c"
- {
- cs = JSON_float_start;
- }
-
-#line 750 "parser.rl"
- state->memo = p;
-
-#line 1549 "parser.c"
- {
- if ( p == pe )
- goto _test_eof;
- switch ( cs )
- {
-case 1:
- switch( (*p) ) {
- case 45: goto st2;
- case 48: goto st6;
- }
- if ( 49 <= (*p) && (*p) <= 57 )
- goto st10;
- goto st0;
-st0:
-cs = 0;
- goto _out;
-st2:
- if ( ++p == pe )
- goto _test_eof2;
-case 2:
- if ( (*p) == 48 )
- goto st6;
- if ( 49 <= (*p) && (*p) <= 57 )
- goto st10;
- goto st0;
-st6:
- if ( ++p == pe )
- goto _test_eof6;
-case 6:
- switch( (*p) ) {
- case 45: goto st0;
- case 46: goto tr8;
- case 69: goto tr9;
- case 101: goto tr9;
- }
- if ( 48 <= (*p) && (*p) <= 57 )
- goto st0;
- goto tr7;
-tr7:
-#line 734 "parser.rl"
- { p--; {p++; cs = 7; goto _out;} }
- goto st7;
-st7:
- if ( ++p == pe )
- goto _test_eof7;
-case 7:
-#line 1596 "parser.c"
- goto st0;
-tr8:
-#line 735 "parser.rl"
- { is_float = true; }
- goto st3;
-st3:
- if ( ++p == pe )
- goto _test_eof3;
-case 3:
-#line 1606 "parser.c"
- if ( 48 <= (*p) && (*p) <= 57 )
- goto st8;
- goto st0;
-st8:
- if ( ++p == pe )
- goto _test_eof8;
-case 8:
- switch( (*p) ) {
- case 69: goto st4;
- case 101: goto st4;
- }
- if ( (*p) > 46 ) {
- if ( 48 <= (*p) && (*p) <= 57 )
- goto st8;
- } else if ( (*p) >= 45 )
- goto st0;
- goto tr7;
-tr9:
-#line 735 "parser.rl"
- { is_float = true; }
- goto st4;
-st4:
- if ( ++p == pe )
- goto _test_eof4;
-case 4:
-#line 1632 "parser.c"
- switch( (*p) ) {
- case 43: goto st5;
- case 45: goto st5;
- }
- if ( 48 <= (*p) && (*p) <= 57 )
- goto st9;
- goto st0;
-st5:
- if ( ++p == pe )
- goto _test_eof5;
-case 5:
- if ( 48 <= (*p) && (*p) <= 57 )
- goto st9;
- goto st0;
-st9:
- if ( ++p == pe )
- goto _test_eof9;
-case 9:
- switch( (*p) ) {
- case 69: goto st0;
- case 101: goto st0;
- }
- if ( (*p) > 46 ) {
- if ( 48 <= (*p) && (*p) <= 57 )
- goto st9;
- } else if ( (*p) >= 45 )
- goto st0;
- goto tr7;
-st10:
- if ( ++p == pe )
- goto _test_eof10;
-case 10:
- switch( (*p) ) {
- case 45: goto st0;
- case 46: goto tr8;
- case 69: goto tr9;
- case 101: goto tr9;
- }
- if ( 48 <= (*p) && (*p) <= 57 )
- goto st10;
- goto tr7;
- }
- _test_eof2: cs = 2; goto _test_eof;
- _test_eof6: cs = 6; goto _test_eof;
- _test_eof7: cs = 7; goto _test_eof;
- _test_eof3: cs = 3; goto _test_eof;
- _test_eof8: cs = 8; goto _test_eof;
- _test_eof4: cs = 4; goto _test_eof;
- _test_eof5: cs = 5; goto _test_eof;
- _test_eof9: cs = 9; goto _test_eof;
- _test_eof10: cs = 10; goto _test_eof;
-
- _test_eof: {}
- _out: {}
- }
-
-#line 752 "parser.rl"
-
- if (cs >= JSON_float_first_final) {
- if (!is_float) {
- return JSON_decode_integer(state, json, p, result);
- }
- VALUE mod = Qnil;
- ID method_id = 0;
- if (json->decimal_class) {
- if (rb_respond_to(json->decimal_class, i_try_convert)) {
- mod = json->decimal_class;
- method_id = i_try_convert;
- } else if (rb_respond_to(json->decimal_class, i_new)) {
- mod = json->decimal_class;
- method_id = i_new;
- } else if (RB_TYPE_P(json->decimal_class, T_CLASS)) {
- VALUE name = rb_class_name(json->decimal_class);
- const char *name_cstr = RSTRING_PTR(name);
- const char *last_colon = strrchr(name_cstr, ':');
- if (last_colon) {
- const char *mod_path_end = last_colon - 1;
- VALUE mod_path = rb_str_substr(name, 0, mod_path_end - name_cstr);
- mod = rb_path_to_class(mod_path);
-
- const char *method_name_beg = last_colon + 1;
- long before_len = method_name_beg - name_cstr;
- long len = RSTRING_LEN(name) - before_len;
- VALUE method_name = rb_str_substr(name, before_len, len);
- method_id = SYM2ID(rb_str_intern(method_name));
- } else {
- mod = rb_mKernel;
- method_id = SYM2ID(rb_str_intern(name));
+ case '*': {
+ state->cursor += 2;
+ while (true) {
+ state->cursor = memchr(state->cursor, '*', state->end - state->cursor);
+ if (!state->cursor) {
+ state->cursor = state->end;
+ break;
+ } else {
+ state->cursor++;
+ if (state->cursor < state->end && *state->cursor == '/') {
+ state->cursor++;
+ break;
+ }
+ }
}
+ break;
}
+ default:
+ return;
}
-
- long len = p - state->memo;
- fbuffer_clear(&state->fbuffer);
- fbuffer_append(&state->fbuffer, state->memo, len);
- fbuffer_append_char(&state->fbuffer, '\0');
-
- if (method_id) {
- VALUE text = rb_str_new2(FBUFFER_PTR(&state->fbuffer));
- *result = rb_funcallv(mod, method_id, 1, &text);
- } else {
- *result = DBL2NUM(rb_cstr_to_dbl(FBUFFER_PTR(&state->fbuffer), 1));
- }
-
- return p + 1;
- } else {
- return NULL;
}
}
-
-
-#line 1745 "parser.c"
-enum {JSON_array_start = 1};
-enum {JSON_array_first_final = 22};
-enum {JSON_array_error = 0};
-
-enum {JSON_array_en_main = 1};
-
-
-#line 832 "parser.rl"
-
-
-static char *JSON_parse_array(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
+static inline void
+json_eat_whitespace(JSON_ParserState *state)
{
- int cs = EVIL;
-
- if (json->max_nesting && current_nesting > json->max_nesting) {
- rb_raise(eNestingError, "nesting of %d is too deep", current_nesting);
- }
- long stack_head = state->stack->head;
-
-
-#line 1766 "parser.c"
- {
- cs = JSON_array_start;
- }
-
-#line 844 "parser.rl"
-
-#line 1773 "parser.c"
- {
- short _widec;
- if ( p == pe )
- goto _test_eof;
- switch ( cs )
- {
-case 1:
- if ( (*p) == 91 )
- goto st2;
- goto st0;
-st0:
-cs = 0;
- goto _out;
-st2:
- if ( ++p == pe )
- goto _test_eof2;
-case 2:
- switch( (*p) ) {
- case 13: goto st2;
- case 32: goto st2;
- case 34: goto tr2;
- case 45: goto tr2;
- case 47: goto st18;
- case 73: goto tr2;
- case 78: goto tr2;
- case 91: goto tr2;
- case 93: goto tr4;
- case 102: goto tr2;
- case 110: goto tr2;
- case 116: goto tr2;
- case 123: goto tr2;
- }
- if ( (*p) > 10 ) {
- if ( 48 <= (*p) && (*p) <= 57 )
- goto tr2;
- } else if ( (*p) >= 9 )
- goto st2;
- goto st0;
-tr2:
-#line 812 "parser.rl"
- {
- VALUE v = Qnil;
- char *np = JSON_parse_value(state, json, p, pe, &v, current_nesting);
- if (np == NULL) {
- p--; {p++; cs = 3; goto _out;}
+ while (state->cursor < state->end && RB_UNLIKELY(whitespace[(unsigned char)*state->cursor])) {
+ if (RB_LIKELY(*state->cursor != '/')) {
+ state->cursor++;
} else {
- {p = (( np))-1;}
+ json_eat_comments(state);
}
}
- goto st3;
-st3:
- if ( ++p == pe )
- goto _test_eof3;
-case 3:
-#line 1828 "parser.c"
- _widec = (*p);
- if ( 44 <= (*p) && (*p) <= 44 ) {
- _widec = (short)(128 + ((*p) - -128));
- if (
-#line 822 "parser.rl"
- json->allow_trailing_comma ) _widec += 256;
- }
- switch( _widec ) {
- case 13: goto st3;
- case 32: goto st3;
- case 47: goto st4;
- case 93: goto tr4;
- case 300: goto st8;
- case 556: goto st13;
- }
- if ( 9 <= _widec && _widec <= 10 )
- goto st3;
- goto st0;
-st4:
- if ( ++p == pe )
- goto _test_eof4;
-case 4:
- switch( (*p) ) {
- case 42: goto st5;
- case 47: goto st7;
- }
- goto st0;
-st5:
- if ( ++p == pe )
- goto _test_eof5;
-case 5:
- if ( (*p) == 42 )
- goto st6;
- goto st5;
-st6:
- if ( ++p == pe )
- goto _test_eof6;
-case 6:
- switch( (*p) ) {
- case 42: goto st6;
- case 47: goto st3;
- }
- goto st5;
-st7:
- if ( ++p == pe )
- goto _test_eof7;
-case 7:
- if ( (*p) == 10 )
- goto st3;
- goto st7;
-tr4:
-#line 824 "parser.rl"
- { p--; {p++; cs = 22; goto _out;} }
- goto st22;
-st22:
- if ( ++p == pe )
- goto _test_eof22;
-case 22:
-#line 1887 "parser.c"
- goto st0;
-st8:
- if ( ++p == pe )
- goto _test_eof8;
-case 8:
- switch( (*p) ) {
- case 13: goto st8;
- case 32: goto st8;
- case 34: goto tr2;
- case 45: goto tr2;
- case 47: goto st9;
- case 73: goto tr2;
- case 78: goto tr2;
- case 91: goto tr2;
- case 102: goto tr2;
- case 110: goto tr2;
- case 116: goto tr2;
- case 123: goto tr2;
- }
- if ( (*p) > 10 ) {
- if ( 48 <= (*p) && (*p) <= 57 )
- goto tr2;
- } else if ( (*p) >= 9 )
- goto st8;
- goto st0;
-st9:
- if ( ++p == pe )
- goto _test_eof9;
-case 9:
- switch( (*p) ) {
- case 42: goto st10;
- case 47: goto st12;
- }
- goto st0;
-st10:
- if ( ++p == pe )
- goto _test_eof10;
-case 10:
- if ( (*p) == 42 )
- goto st11;
- goto st10;
-st11:
- if ( ++p == pe )
- goto _test_eof11;
-case 11:
- switch( (*p) ) {
- case 42: goto st11;
- case 47: goto st8;
- }
- goto st10;
-st12:
- if ( ++p == pe )
- goto _test_eof12;
-case 12:
- if ( (*p) == 10 )
- goto st8;
- goto st12;
-st13:
- if ( ++p == pe )
- goto _test_eof13;
-case 13:
- _widec = (*p);
- if ( (*p) < 13 ) {
- if ( (*p) > 9 ) {
- if ( 10 <= (*p) && (*p) <= 10 ) {
- _widec = (short)(128 + ((*p) - -128));
- if (
-#line 822 "parser.rl"
- json->allow_trailing_comma ) _widec += 256;
- }
- } else if ( (*p) >= 9 ) {
- _widec = (short)(128 + ((*p) - -128));
- if (
-#line 822 "parser.rl"
- json->allow_trailing_comma ) _widec += 256;
- }
- } else if ( (*p) > 13 ) {
- if ( (*p) > 32 ) {
- if ( 47 <= (*p) && (*p) <= 47 ) {
- _widec = (short)(128 + ((*p) - -128));
- if (
-#line 822 "parser.rl"
- json->allow_trailing_comma ) _widec += 256;
- }
- } else if ( (*p) >= 32 ) {
- _widec = (short)(128 + ((*p) - -128));
- if (
-#line 822 "parser.rl"
- json->allow_trailing_comma ) _widec += 256;
- }
- } else {
- _widec = (short)(128 + ((*p) - -128));
- if (
-#line 822 "parser.rl"
- json->allow_trailing_comma ) _widec += 256;
- }
- switch( _widec ) {
- case 34: goto tr2;
- case 45: goto tr2;
- case 73: goto tr2;
- case 78: goto tr2;
- case 91: goto tr2;
- case 93: goto tr4;
- case 102: goto tr2;
- case 110: goto tr2;
- case 116: goto tr2;
- case 123: goto tr2;
- case 269: goto st8;
- case 288: goto st8;
- case 303: goto st9;
- case 525: goto st13;
- case 544: goto st13;
- case 559: goto st14;
- }
- if ( _widec < 265 ) {
- if ( 48 <= _widec && _widec <= 57 )
- goto tr2;
- } else if ( _widec > 266 ) {
- if ( 521 <= _widec && _widec <= 522 )
- goto st13;
- } else
- goto st8;
- goto st0;
-st14:
- if ( ++p == pe )
- goto _test_eof14;
-case 14:
- _widec = (*p);
- if ( (*p) > 42 ) {
- if ( 47 <= (*p) && (*p) <= 47 ) {
- _widec = (short)(128 + ((*p) - -128));
- if (
-#line 822 "parser.rl"
- json->allow_trailing_comma ) _widec += 256;
- }
- } else if ( (*p) >= 42 ) {
- _widec = (short)(128 + ((*p) - -128));
- if (
-#line 822 "parser.rl"
- json->allow_trailing_comma ) _widec += 256;
- }
- switch( _widec ) {
- case 298: goto st10;
- case 303: goto st12;
- case 554: goto st15;
- case 559: goto st17;
- }
- goto st0;
-st15:
- if ( ++p == pe )
- goto _test_eof15;
-case 15:
- _widec = (*p);
- if ( (*p) < 42 ) {
- if ( (*p) <= 41 ) {
- _widec = (short)(128 + ((*p) - -128));
- if (
-#line 822 "parser.rl"
- json->allow_trailing_comma ) _widec += 256;
- }
- } else if ( (*p) > 42 ) {
- if ( 43 <= (*p) )
- { _widec = (short)(128 + ((*p) - -128));
- if (
-#line 822 "parser.rl"
- json->allow_trailing_comma ) _widec += 256;
- }
- } else {
- _widec = (short)(128 + ((*p) - -128));
- if (
-#line 822 "parser.rl"
- json->allow_trailing_comma ) _widec += 256;
- }
- switch( _widec ) {
- case 298: goto st11;
- case 554: goto st16;
- }
- if ( _widec > 383 ) {
- if ( 384 <= _widec && _widec <= 639 )
- goto st15;
- } else if ( _widec >= 128 )
- goto st10;
- goto st0;
-st16:
- if ( ++p == pe )
- goto _test_eof16;
-case 16:
- _widec = (*p);
- if ( (*p) < 43 ) {
- if ( (*p) > 41 ) {
- if ( 42 <= (*p) && (*p) <= 42 ) {
- _widec = (short)(128 + ((*p) - -128));
- if (
-#line 822 "parser.rl"
- json->allow_trailing_comma ) _widec += 256;
- }
- } else {
- _widec = (short)(128 + ((*p) - -128));
- if (
-#line 822 "parser.rl"
- json->allow_trailing_comma ) _widec += 256;
- }
- } else if ( (*p) > 46 ) {
- if ( (*p) > 47 ) {
- if ( 48 <= (*p) )
- { _widec = (short)(128 + ((*p) - -128));
- if (
-#line 822 "parser.rl"
- json->allow_trailing_comma ) _widec += 256;
- }
- } else if ( (*p) >= 47 ) {
- _widec = (short)(128 + ((*p) - -128));
- if (
-#line 822 "parser.rl"
- json->allow_trailing_comma ) _widec += 256;
- }
- } else {
- _widec = (short)(128 + ((*p) - -128));
- if (
-#line 822 "parser.rl"
- json->allow_trailing_comma ) _widec += 256;
- }
- switch( _widec ) {
- case 298: goto st11;
- case 303: goto st8;
- case 554: goto st16;
- case 559: goto st13;
- }
- if ( _widec > 383 ) {
- if ( 384 <= _widec && _widec <= 639 )
- goto st15;
- } else if ( _widec >= 128 )
- goto st10;
- goto st0;
-st17:
- if ( ++p == pe )
- goto _test_eof17;
-case 17:
- _widec = (*p);
- if ( (*p) < 10 ) {
- if ( (*p) <= 9 ) {
- _widec = (short)(128 + ((*p) - -128));
- if (
-#line 822 "parser.rl"
- json->allow_trailing_comma ) _widec += 256;
- }
- } else if ( (*p) > 10 ) {
- if ( 11 <= (*p) )
- { _widec = (short)(128 + ((*p) - -128));
- if (
-#line 822 "parser.rl"
- json->allow_trailing_comma ) _widec += 256;
- }
- } else {
- _widec = (short)(128 + ((*p) - -128));
- if (
-#line 822 "parser.rl"
- json->allow_trailing_comma ) _widec += 256;
- }
- switch( _widec ) {
- case 266: goto st8;
- case 522: goto st13;
- }
- if ( _widec > 383 ) {
- if ( 384 <= _widec && _widec <= 639 )
- goto st17;
- } else if ( _widec >= 128 )
- goto st12;
- goto st0;
-st18:
- if ( ++p == pe )
- goto _test_eof18;
-case 18:
- switch( (*p) ) {
- case 42: goto st19;
- case 47: goto st21;
- }
- goto st0;
-st19:
- if ( ++p == pe )
- goto _test_eof19;
-case 19:
- if ( (*p) == 42 )
- goto st20;
- goto st19;
-st20:
- if ( ++p == pe )
- goto _test_eof20;
-case 20:
- switch( (*p) ) {
- case 42: goto st20;
- case 47: goto st2;
- }
- goto st19;
-st21:
- if ( ++p == pe )
- goto _test_eof21;
-case 21:
- if ( (*p) == 10 )
- goto st2;
- goto st21;
- }
- _test_eof2: cs = 2; goto _test_eof;
- _test_eof3: cs = 3; goto _test_eof;
- _test_eof4: cs = 4; goto _test_eof;
- _test_eof5: cs = 5; goto _test_eof;
- _test_eof6: cs = 6; goto _test_eof;
- _test_eof7: cs = 7; goto _test_eof;
- _test_eof22: cs = 22; goto _test_eof;
- _test_eof8: cs = 8; goto _test_eof;
- _test_eof9: cs = 9; goto _test_eof;
- _test_eof10: cs = 10; goto _test_eof;
- _test_eof11: cs = 11; goto _test_eof;
- _test_eof12: cs = 12; goto _test_eof;
- _test_eof13: cs = 13; goto _test_eof;
- _test_eof14: cs = 14; goto _test_eof;
- _test_eof15: cs = 15; goto _test_eof;
- _test_eof16: cs = 16; goto _test_eof;
- _test_eof17: cs = 17; goto _test_eof;
- _test_eof18: cs = 18; goto _test_eof;
- _test_eof19: cs = 19; goto _test_eof;
- _test_eof20: cs = 20; goto _test_eof;
- _test_eof21: cs = 21; goto _test_eof;
-
- _test_eof: {}
- _out: {}
- }
-
-#line 845 "parser.rl"
-
- if(cs >= JSON_array_first_final) {
- long count = state->stack->head - stack_head;
-
- if (RB_UNLIKELY(json->array_class)) {
- VALUE array = rb_class_new_instance(0, 0, json->array_class);
- VALUE *items = rvalue_stack_peek(state->stack, count);
- long index;
- for (index = 0; index < count; index++) {
- rb_funcall(array, i_leftshift, 1, items[index]);
- }
- *result = array;
- } else {
- VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(state->stack, count));
- *result = array;
- }
- rvalue_stack_pop(state->stack, count);
-
- return p + 1;
- } else {
- raise_parse_error("unexpected token at '%s'", p);
- return NULL;
- }
}
static inline VALUE build_string(const char *start, const char *end, bool intern, bool symbolize)
@@ -2264,7 +525,7 @@ static inline VALUE build_string(const char *start, const char *end, bool intern
return result;
}
-static VALUE json_string_fastpath(JSON_ParserState *state, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize)
+static inline VALUE json_string_fastpath(JSON_ParserState *state, const char *string, const char *stringEnd, bool is_name, bool intern, bool symbolize)
{
size_t bufferSize = stringEnd - string;
@@ -2284,10 +545,11 @@ static VALUE json_string_fastpath(JSON_ParserState *state, char *string, char *s
return build_string(string, stringEnd, intern, symbolize);
}
-static VALUE json_string_unescape(JSON_ParserState *state, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize)
+static VALUE json_string_unescape(JSON_ParserState *state, const char *string, const char *stringEnd, bool is_name, bool intern, bool symbolize)
{
size_t bufferSize = stringEnd - string;
- char *p = string, *pe = string, *unescape, *bufferStart, *buffer;
+ const char *p = string, *pe = string, *unescape, *bufferStart;
+ char *buffer;
int unescape_len;
char buf[4];
@@ -2311,7 +573,8 @@ static VALUE json_string_unescape(JSON_ParserState *state, char *string, char *s
VALUE result = rb_str_buf_new(bufferSize);
rb_enc_associate_index(result, utf8_encindex);
- buffer = bufferStart = RSTRING_PTR(result);
+ buffer = RSTRING_PTR(result);
+ bufferStart = buffer;
while (pe < stringEnd) {
if (*pe == '\\') {
@@ -2405,20 +668,157 @@ static VALUE json_string_unescape(JSON_ParserState *state, char *string, char *s
return result;
}
+#define MAX_FAST_INTEGER_SIZE 18
+static inline VALUE fast_decode_integer(const char *p, const char *pe)
+{
+ bool negative = false;
+ if (*p == '-') {
+ negative = true;
+ p++;
+ }
-#line 2410 "parser.c"
-enum {JSON_string_start = 1};
-enum {JSON_string_first_final = 9};
-enum {JSON_string_error = 0};
+ long long memo = 0;
+ while (p < pe) {
+ memo *= 10;
+ memo += *p - '0';
+ p++;
+ }
-enum {JSON_string_en_main = 1};
+ if (negative) {
+ memo = -memo;
+ }
+ return LL2NUM(memo);
+}
+static VALUE
+json_decode_integer(JSON_ParserState *state, const char *start, const char *end)
+{
+ long len = end - start;
+ if (RB_LIKELY(len < MAX_FAST_INTEGER_SIZE)) {
+ return fast_decode_integer(start, end);
+ }
-#line 1068 "parser.rl"
+ fbuffer_clear(&state->fbuffer);
+ fbuffer_append(&state->fbuffer, start, len);
+ fbuffer_append_char(&state->fbuffer, '\0');
+ return rb_cstr2inum(FBUFFER_PTR(&state->fbuffer), 10);
+}
+static VALUE json_decode_float(JSON_ParserState *state, const char *start, const char *end)
+{
+ VALUE mod = Qnil;
+ ID method_id = 0;
+ JSON_ParserConfig *config = state->config;
+ if (config->decimal_class) {
+ // TODO: we should move this to the constructor
+ if (rb_respond_to(config->decimal_class, i_try_convert)) {
+ mod = config->decimal_class;
+ method_id = i_try_convert;
+ } else if (rb_respond_to(config->decimal_class, i_new)) {
+ mod = config->decimal_class;
+ method_id = i_new;
+ } else if (RB_TYPE_P(config->decimal_class, T_CLASS)) {
+ VALUE name = rb_class_name(config->decimal_class);
+ const char *name_cstr = RSTRING_PTR(name);
+ const char *last_colon = strrchr(name_cstr, ':');
+ if (last_colon) {
+ const char *mod_path_end = last_colon - 1;
+ VALUE mod_path = rb_str_substr(name, 0, mod_path_end - name_cstr);
+ mod = rb_path_to_class(mod_path);
+
+ const char *method_name_beg = last_colon + 1;
+ long before_len = method_name_beg - name_cstr;
+ long len = RSTRING_LEN(name) - before_len;
+ VALUE method_name = rb_str_substr(name, before_len, len);
+ method_id = SYM2ID(rb_str_intern(method_name));
+ } else {
+ mod = rb_mKernel;
+ method_id = SYM2ID(rb_str_intern(name));
+ }
+ }
+ }
+
+ long len = end - start;
+ fbuffer_clear(&state->fbuffer);
+ fbuffer_append(&state->fbuffer, start, len);
+ fbuffer_append_char(&state->fbuffer, '\0');
-static int
-match_i(VALUE regexp, VALUE klass, VALUE memo)
+ if (method_id) {
+ VALUE text = rb_str_new2(FBUFFER_PTR(&state->fbuffer));
+ return rb_funcallv(mod, method_id, 1, &text);
+ } else {
+ return DBL2NUM(rb_cstr_to_dbl(FBUFFER_PTR(&state->fbuffer), 1));
+ }
+}
+
+static inline VALUE json_decode_array(JSON_ParserState *state, long count)
+{
+ VALUE array;
+ if (RB_UNLIKELY(state->config->array_class)) {
+ array = rb_class_new_instance(0, 0, state->config->array_class);
+ VALUE *items = rvalue_stack_peek(state->stack, count);
+ long index;
+ for (index = 0; index < count; index++) {
+ rb_funcall(array, i_leftshift, 1, items[index]);
+ }
+ } else {
+ array = rb_ary_new_from_values(count, rvalue_stack_peek(state->stack, count));
+ }
+
+ rvalue_stack_pop(state->stack, count);
+
+ if (state->config->freeze) {
+ RB_OBJ_FREEZE(array);
+ }
+
+ return array;
+}
+
+static inline VALUE json_decode_object(JSON_ParserState *state, long count)
+{
+ VALUE object;
+ if (RB_UNLIKELY(state->config->object_class)) {
+ object = rb_class_new_instance(0, 0, state->config->object_class);
+ long index = 0;
+ VALUE *items = rvalue_stack_peek(state->stack, count);
+ while (index < count) {
+ VALUE name = items[index++];
+ VALUE value = items[index++];
+ rb_funcall(object, i_aset, 2, name, value);
+ }
+ } else {
+ object = rb_hash_new_capa(count);
+ rb_hash_bulk_insert(count, rvalue_stack_peek(state->stack, count), object);
+ }
+
+ rvalue_stack_pop(state->stack, count);
+
+ if (RB_UNLIKELY(state->config->create_additions)) {
+ VALUE klassname;
+ if (state->config->object_class) {
+ klassname = rb_funcall(object, i_aref, 1, state->config->create_id);
+ } else {
+ klassname = rb_hash_aref(object, state->config->create_id);
+ }
+ if (!NIL_P(klassname)) {
+ VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname);
+ if (RTEST(rb_funcall(klass, i_json_creatable_p, 0))) {
+ if (state->config->deprecated_create_additions) {
+ json_deprecated(deprecated_create_additions_warning);
+ }
+ object = rb_funcall(klass, i_json_create, 1, object);
+ }
+ }
+ }
+
+ if (state->config->freeze) {
+ RB_OBJ_FREEZE(object);
+ }
+
+ return object;
+}
+
+static int match_i(VALUE regexp, VALUE klass, VALUE memo)
{
if (regexp == Qundef) return ST_STOP;
if (RTEST(rb_funcall(klass, i_json_creatable_p, 0)) &&
@@ -2429,178 +829,330 @@ match_i(VALUE regexp, VALUE klass, VALUE memo)
return ST_CONTINUE;
}
-static char *JSON_parse_string(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result)
+static inline VALUE json_decode_string(JSON_ParserState *state, const char *start, const char *end, bool escaped, bool is_name)
{
- int cs = EVIL;
- VALUE match_string;
-
-
-#line 2439 "parser.c"
- {
- cs = JSON_string_start;
- }
-
-#line 1088 "parser.rl"
- state->memo = p;
-
-#line 2447 "parser.c"
- {
- if ( p == pe )
- goto _test_eof;
- switch ( cs )
- {
-case 1:
- if ( (*p) == 34 )
- goto st2;
- goto st0;
-st0:
-cs = 0;
- goto _out;
-st2:
- if ( ++p == pe )
- goto _test_eof2;
-case 2:
- switch( (*p) ) {
- case 34: goto tr2;
- case 92: goto st3;
- }
- if ( 0 <= (signed char)(*(p)) && (*(p)) <= 31 )
- goto st0;
- goto st2;
-tr2:
-#line 1050 "parser.rl"
- {
- *result = json_string_fastpath(state, state->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
- {p = (( p + 1))-1;}
- p--;
- {p++; cs = 9; goto _out;}
- }
-#line 1043 "parser.rl"
- {
- *result = json_string_unescape(state, state->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
- {p = (( p + 1))-1;}
- p--;
- {p++; cs = 9; goto _out;}
- }
- goto st9;
-tr6:
-#line 1043 "parser.rl"
- {
- *result = json_string_unescape(state, state->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
- {p = (( p + 1))-1;}
- p--;
- {p++; cs = 9; goto _out;}
+ VALUE string;
+ bool intern = is_name || state->config->freeze;
+ bool symbolize = is_name && state->config->symbolize_names;
+ if (escaped) {
+ string = json_string_unescape(state, start, end, is_name, intern, symbolize);
+ } else {
+ string = json_string_fastpath(state, start, end, is_name, intern, symbolize);
}
- goto st9;
-st9:
- if ( ++p == pe )
- goto _test_eof9;
-case 9:
-#line 2500 "parser.c"
- goto st0;
-st3:
- if ( ++p == pe )
- goto _test_eof3;
-case 3:
- if ( (*p) == 117 )
- goto st5;
- if ( 0 <= (signed char)(*(p)) && (*(p)) <= 31 )
- goto st0;
- goto st4;
-st4:
- if ( ++p == pe )
- goto _test_eof4;
-case 4:
- switch( (*p) ) {
- case 34: goto tr6;
- case 92: goto st3;
- }
- if ( 0 <= (signed char)(*(p)) && (*(p)) <= 31 )
- goto st0;
- goto st4;
-st5:
- if ( ++p == pe )
- goto _test_eof5;
-case 5:
- if ( (*p) < 65 ) {
- if ( 48 <= (*p) && (*p) <= 57 )
- goto st6;
- } else if ( (*p) > 70 ) {
- if ( 97 <= (*p) && (*p) <= 102 )
- goto st6;
- } else
- goto st6;
- goto st0;
-st6:
- if ( ++p == pe )
- goto _test_eof6;
-case 6:
- if ( (*p) < 65 ) {
- if ( 48 <= (*p) && (*p) <= 57 )
- goto st7;
- } else if ( (*p) > 70 ) {
- if ( 97 <= (*p) && (*p) <= 102 )
- goto st7;
- } else
- goto st7;
- goto st0;
-st7:
- if ( ++p == pe )
- goto _test_eof7;
-case 7:
- if ( (*p) < 65 ) {
- if ( 48 <= (*p) && (*p) <= 57 )
- goto st8;
- } else if ( (*p) > 70 ) {
- if ( 97 <= (*p) && (*p) <= 102 )
- goto st8;
- } else
- goto st8;
- goto st0;
-st8:
- if ( ++p == pe )
- goto _test_eof8;
-case 8:
- if ( (*p) < 65 ) {
- if ( 48 <= (*p) && (*p) <= 57 )
- goto st4;
- } else if ( (*p) > 70 ) {
- if ( 97 <= (*p) && (*p) <= 102 )
- goto st4;
- } else
- goto st4;
- goto st0;
- }
- _test_eof2: cs = 2; goto _test_eof;
- _test_eof9: cs = 9; goto _test_eof;
- _test_eof3: cs = 3; goto _test_eof;
- _test_eof4: cs = 4; goto _test_eof;
- _test_eof5: cs = 5; goto _test_eof;
- _test_eof6: cs = 6; goto _test_eof;
- _test_eof7: cs = 7; goto _test_eof;
- _test_eof8: cs = 8; goto _test_eof;
-
- _test_eof: {}
- _out: {}
- }
-
-#line 1090 "parser.rl"
-
- if (json->create_additions && RTEST(match_string = json->match_string)) {
+
+ if (RB_UNLIKELY(state->config->create_additions && RTEST(state->config->match_string))) {
VALUE klass;
VALUE memo = rb_ary_new2(2);
- rb_ary_push(memo, *result);
- rb_hash_foreach(match_string, match_i, memo);
+ rb_ary_push(memo, string);
+ rb_hash_foreach(state->config->match_string, match_i, memo);
klass = rb_ary_entry(memo, 1);
if (RTEST(klass)) {
- *result = rb_funcall(klass, i_json_create, 1, *result);
+ string = rb_funcall(klass, i_json_create, 1, string);
}
}
- if (cs >= JSON_string_first_final) {
- return p + 1;
- } else {
- return NULL;
+ return string;
+}
+
+#define PUSH(result) rvalue_stack_push(state->stack, result, &state->stack_handle, &state->stack)
+
+static const bool string_scan[256] = {
+ // ASCII Control Characters
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ // ASCII Characters
+ 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // '"'
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // '\\'
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+static inline VALUE json_parse_string(JSON_ParserState *state, bool is_name)
+{
+ state->cursor++;
+ const char *start = state->cursor;
+ bool escaped = false;
+
+ while (state->cursor < state->end) {
+ if (RB_UNLIKELY(string_scan[(unsigned char)*state->cursor])) {
+ switch (*state->cursor) {
+ case '"': {
+ VALUE string = json_decode_string(state, start, state->cursor, escaped, is_name);
+ state->cursor++;
+ return PUSH(string);
+ }
+ case '\\': {
+ state->cursor++;
+ escaped = true;
+ if ((unsigned char)*state->cursor < 0x20) {
+ raise_parse_error("invalid ASCII control character in string: %s", state->cursor);
+ }
+ break;
+ }
+ default:
+ raise_parse_error("invalid ASCII control character in string: %s", state->cursor);
+ break;
+ }
+ }
+
+ state->cursor++;
+ }
+
+ raise_parse_error("unexpected end of input, expected closing \"", state->cursor);
+ return Qfalse;
+}
+
+static VALUE json_parse_any(JSON_ParserState *state)
+{
+ json_eat_whitespace(state);
+ if (state->cursor >= state->end) {
+ raise_parse_error("unexpected end of input", state->cursor);
+ }
+
+ switch (*state->cursor) {
+ case 'n':
+ if ((state->end - state->cursor >= 4) && (memcmp(state->cursor, "null", 4) == 0)) {
+ state->cursor += 4;
+ return PUSH(Qnil);
+ }
+
+ raise_parse_error("unexpected token at '%s'", state->cursor);
+ break;
+ case 't':
+ if ((state->end - state->cursor >= 4) && (memcmp(state->cursor, "true", 4) == 0)) {
+ state->cursor += 4;
+ return PUSH(Qtrue);
+ }
+
+ raise_parse_error("unexpected token at '%s'", state->cursor);
+ break;
+ case 'f':
+ // Note: memcmp with a small power of two compile to an integer comparison
+ if ((state->end - state->cursor >= 5) && (memcmp(state->cursor + 1, "alse", 4) == 0)) {
+ state->cursor += 5;
+ return PUSH(Qfalse);
+ }
+
+ raise_parse_error("unexpected token at '%s'", state->cursor);
+ break;
+ case 'N':
+ // Note: memcmp with a small power of two compile to an integer comparison
+ if (state->config->allow_nan && (state->end - state->cursor >= 3) && (memcmp(state->cursor + 1, "aN", 2) == 0)) {
+ state->cursor += 3;
+ return PUSH(CNaN);
+ }
+
+ raise_parse_error("unexpected token at '%s'", state->cursor);
+ break;
+ case 'I':
+ if (state->config->allow_nan && (state->end - state->cursor >= 8) && (memcmp(state->cursor, "Infinity", 8) == 0)) {
+ state->cursor += 8;
+ return PUSH(CInfinity);
+ }
+
+ raise_parse_error("unexpected token at '%s'", state->cursor);
+ break;
+ case '-':
+ // Note: memcmp with a small power of two compile to an integer comparison
+ if ((state->end - state->cursor >= 9) && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) {
+ if (state->config->allow_nan) {
+ state->cursor += 9;
+ return PUSH(CMinusInfinity);
+ } else {
+ raise_parse_error("unexpected token at '%s'", state->cursor);
+ }
+ }
+ // Fallthrough
+ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': {
+ bool integer = true;
+
+ // /\A-?(0|[1-9]\d*)(\.\d+)?([Ee][-+]?\d+)?/
+ const char *start = state->cursor;
+ state->cursor++;
+
+ while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
+ state->cursor++;
+ }
+
+ long integer_length = state->cursor - start;
+
+ if (RB_UNLIKELY(start[0] == '0' && integer_length > 1)) {
+ raise_parse_error("invalid number: %s", start);
+ } else if (RB_UNLIKELY(integer_length > 2 && start[0] == '-' && start[1] == '0')) {
+ raise_parse_error("invalid number: %s", start);
+ }
+
+ if ((state->cursor < state->end) && (*state->cursor == '.')) {
+ integer = false;
+ state->cursor++;
+
+ if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
+ raise_parse_error("invalid number: %s", state->cursor);
+ }
+
+ while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
+ state->cursor++;
+ }
+ }
+
+ if ((state->cursor < state->end) && ((*state->cursor == 'e') || (*state->cursor == 'E'))) {
+ integer = false;
+ state->cursor++;
+ if ((state->cursor < state->end) && ((*state->cursor == '+') || (*state->cursor == '-'))) {
+ state->cursor++;
+ }
+
+ if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
+ raise_parse_error("invalid number: %s", state->cursor);
+ }
+
+ while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
+ state->cursor++;
+ }
+ }
+
+ if (integer) {
+ return PUSH(json_decode_integer(state, start, state->cursor));
+ }
+ return PUSH(json_decode_float(state, start, state->cursor));
+ }
+ case '"': {
+ // %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"}
+ return json_parse_string(state, false);
+ break;
+ }
+ case '[': {
+ state->cursor++;
+ json_eat_whitespace(state);
+ long stack_head = state->stack->head;
+
+ if ((state->cursor < state->end) && (*state->cursor == ']')) {
+ state->cursor++;
+ return PUSH(json_decode_array(state, 0));
+ } else {
+ state->current_nesting++;
+ if (RB_UNLIKELY(state->config->max_nesting && (state->config->max_nesting < state->current_nesting))) {
+ rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
+ }
+ state->in_array++;
+ json_parse_any(state);
+ }
+
+ while (true) {
+ json_eat_whitespace(state);
+
+ if (state->cursor < state->end) {
+ if (*state->cursor == ']') {
+ state->cursor++;
+ long count = state->stack->head - stack_head;
+ state->current_nesting--;
+ state->in_array--;
+ return PUSH(json_decode_array(state, count));
+ }
+
+ if (*state->cursor == ',') {
+ state->cursor++;
+ if (state->config->allow_trailing_comma) {
+ json_eat_whitespace(state);
+ if ((state->cursor < state->end) && (*state->cursor == ']')) {
+ continue;
+ }
+ }
+ json_parse_any(state);
+ continue;
+ }
+ }
+
+ raise_parse_error("expected ',' or ']' after array value", state->cursor);
+ }
+ break;
+ }
+ case '{': {
+ state->cursor++;
+ json_eat_whitespace(state);
+ long stack_head = state->stack->head;
+
+ if ((state->cursor < state->end) && (*state->cursor == '}')) {
+ state->cursor++;
+ return PUSH(json_decode_object(state, 0));
+ } else {
+ state->current_nesting++;
+ if (RB_UNLIKELY(state->config->max_nesting && (state->config->max_nesting < state->current_nesting))) {
+ rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
+ }
+
+ if (*state->cursor != '"') {
+ raise_parse_error("expected object key, got '%s", state->cursor);
+ }
+ json_parse_string(state, true);
+
+ json_eat_whitespace(state);
+ if ((state->cursor >= state->end) || (*state->cursor != ':')) {
+ raise_parse_error("expected ':' after object key", state->cursor);
+ }
+ state->cursor++;
+
+ json_parse_any(state);
+ }
+
+ while (true) {
+ json_eat_whitespace(state);
+
+ if (state->cursor < state->end) {
+ if (*state->cursor == '}') {
+ state->cursor++;
+ state->current_nesting--;
+ long count = state->stack->head - stack_head;
+ return PUSH(json_decode_object(state, count));
+ }
+
+ if (*state->cursor == ',') {
+ state->cursor++;
+ json_eat_whitespace(state);
+
+ if (state->config->allow_trailing_comma) {
+ if ((state->cursor < state->end) && (*state->cursor == '}')) {
+ continue;
+ }
+ }
+
+ if (*state->cursor != '"') {
+ raise_parse_error("expected object key, got: '%s'", state->cursor);
+ }
+ json_parse_string(state, true);
+
+ json_eat_whitespace(state);
+ if ((state->cursor >= state->end) || (*state->cursor != ':')) {
+ raise_parse_error("expected ':' after object key, got: '%s", state->cursor);
+ }
+ state->cursor++;
+
+ json_parse_any(state);
+
+ continue;
+ }
+ }
+
+ raise_parse_error("expected ',' or '}' after object value, got: '%s'", state->cursor);
+ }
+ break;
+ }
+
+ default:
+ raise_parse_error("unexpected character: '%s'", state->cursor);
+ break;
+ }
+
+ raise_parse_error("unreacheable: '%s'", state->cursor);
+}
+
+static void json_ensure_eof(JSON_ParserState *state)
+{
+ json_eat_whitespace(state);
+ if (state->cursor != state->end) {
+ raise_parse_error("unexpected token at end of stream '%s'", state->cursor);
}
}
@@ -2634,50 +1186,50 @@ static VALUE convert_encoding(VALUE source)
static int configure_parser_i(VALUE key, VALUE val, VALUE data)
{
- JSON_Parser *json = (JSON_Parser *)data;
-
- if (key == sym_max_nesting) { json->max_nesting = RTEST(val) ? FIX2INT(val) : 0; }
- else if (key == sym_allow_nan) { json->allow_nan = RTEST(val); }
- else if (key == sym_allow_trailing_comma) { json->allow_trailing_comma = RTEST(val); }
- else if (key == sym_symbolize_names) { json->symbolize_names = RTEST(val); }
- else if (key == sym_freeze) { json->freeze = RTEST(val); }
- else if (key == sym_create_id) { json->create_id = RTEST(val) ? val : Qfalse; }
- else if (key == sym_object_class) { json->object_class = RTEST(val) ? val : Qfalse; }
- else if (key == sym_array_class) { json->array_class = RTEST(val) ? val : Qfalse; }
- else if (key == sym_decimal_class) { json->decimal_class = RTEST(val) ? val : Qfalse; }
- else if (key == sym_match_string) { json->match_string = RTEST(val) ? val : Qfalse; }
+ JSON_ParserConfig *config = (JSON_ParserConfig *)data;
+
+ if (key == sym_max_nesting) { config->max_nesting = RTEST(val) ? FIX2INT(val) : 0; }
+ else if (key == sym_allow_nan) { config->allow_nan = RTEST(val); }
+ else if (key == sym_allow_trailing_comma) { config->allow_trailing_comma = RTEST(val); }
+ else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
+ else if (key == sym_freeze) { config->freeze = RTEST(val); }
+ else if (key == sym_create_id) { config->create_id = RTEST(val) ? val : Qfalse; }
+ else if (key == sym_object_class) { config->object_class = RTEST(val) ? val : Qfalse; }
+ else if (key == sym_array_class) { config->array_class = RTEST(val) ? val : Qfalse; }
+ else if (key == sym_decimal_class) { config->decimal_class = RTEST(val) ? val : Qfalse; }
+ else if (key == sym_match_string) { config->match_string = RTEST(val) ? val : Qfalse; }
else if (key == sym_create_additions) {
if (NIL_P(val)) {
- json->create_additions = true;
- json->deprecated_create_additions = true;
+ config->create_additions = true;
+ config->deprecated_create_additions = true;
} else {
- json->create_additions = RTEST(val);
- json->deprecated_create_additions = false;
+ config->create_additions = RTEST(val);
+ config->deprecated_create_additions = false;
}
}
return ST_CONTINUE;
}
-static void parser_init(JSON_Parser *json, VALUE opts)
+static void parser_config_init(JSON_ParserConfig *config, VALUE opts)
{
- json->max_nesting = 100;
+ config->max_nesting = 100;
if (!NIL_P(opts)) {
Check_Type(opts, T_HASH);
if (RHASH_SIZE(opts) > 0) {
// We assume in most cases few keys are set so it's faster to go over
// the provided keys than to check all possible keys.
- rb_hash_foreach(opts, configure_parser_i, (VALUE)json);
+ rb_hash_foreach(opts, configure_parser_i, (VALUE)config);
- if (json->symbolize_names && json->create_additions) {
+ if (config->symbolize_names && config->create_additions) {
rb_raise(rb_eArgError,
"options :symbolize_names and :create_additions cannot be "
" used in conjunction");
}
- if (json->create_additions && !json->create_id) {
- json->create_id = rb_funcall(mJSON, i_create_id, 0);
+ if (config->create_additions && !config->create_id) {
+ config->create_id = rb_funcall(mJSON, i_create_id, 0);
}
}
@@ -2718,190 +1270,21 @@ static void parser_init(JSON_Parser *json, VALUE opts)
*/
static VALUE cParserConfig_initialize(VALUE self, VALUE opts)
{
- GET_PARSER;
+ GET_PARSER_CONFIG;
- parser_init(json, opts);
+ parser_config_init(config, opts);
return self;
}
-
-#line 2729 "parser.c"
-enum {JSON_start = 1};
-enum {JSON_first_final = 10};
-enum {JSON_error = 0};
-
-enum {JSON_en_main = 1};
-
-
-#line 1244 "parser.rl"
-
-
static VALUE cParser_parse_safe(VALUE vstate)
{
JSON_ParserState *state = (JSON_ParserState *)vstate;
- VALUE result = Qnil;
- char *p, *pe;
- int cs = EVIL;
- JSON_Parser *json = state->json;
-
-
-#line 2749 "parser.c"
- {
- cs = JSON_start;
- }
-
-#line 1255 "parser.rl"
- p = state->source;
- pe = p + state->len;
-
-#line 2758 "parser.c"
- {
- if ( p == pe )
- goto _test_eof;
- switch ( cs )
- {
-st1:
- if ( ++p == pe )
- goto _test_eof1;
-case 1:
- switch( (*p) ) {
- case 13: goto st1;
- case 32: goto st1;
- case 34: goto tr2;
- case 45: goto tr2;
- case 47: goto st6;
- case 73: goto tr2;
- case 78: goto tr2;
- case 91: goto tr2;
- case 102: goto tr2;
- case 110: goto tr2;
- case 116: goto tr2;
- case 123: goto tr2;
- }
- if ( (*p) > 10 ) {
- if ( 48 <= (*p) && (*p) <= 57 )
- goto tr2;
- } else if ( (*p) >= 9 )
- goto st1;
- goto st0;
-st0:
-cs = 0;
- goto _out;
-tr2:
-#line 1236 "parser.rl"
- {
- char *np = JSON_parse_value(state, json, p, pe, &result, 0);
- if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;}
- }
- goto st10;
-st10:
- if ( ++p == pe )
- goto _test_eof10;
-case 10:
-#line 2802 "parser.c"
- switch( (*p) ) {
- case 13: goto st10;
- case 32: goto st10;
- case 47: goto st2;
- }
- if ( 9 <= (*p) && (*p) <= 10 )
- goto st10;
- goto st0;
-st2:
- if ( ++p == pe )
- goto _test_eof2;
-case 2:
- switch( (*p) ) {
- case 42: goto st3;
- case 47: goto st5;
- }
- goto st0;
-st3:
- if ( ++p == pe )
- goto _test_eof3;
-case 3:
- if ( (*p) == 42 )
- goto st4;
- goto st3;
-st4:
- if ( ++p == pe )
- goto _test_eof4;
-case 4:
- switch( (*p) ) {
- case 42: goto st4;
- case 47: goto st10;
- }
- goto st3;
-st5:
- if ( ++p == pe )
- goto _test_eof5;
-case 5:
- if ( (*p) == 10 )
- goto st10;
- goto st5;
-st6:
- if ( ++p == pe )
- goto _test_eof6;
-case 6:
- switch( (*p) ) {
- case 42: goto st7;
- case 47: goto st9;
- }
- goto st0;
-st7:
- if ( ++p == pe )
- goto _test_eof7;
-case 7:
- if ( (*p) == 42 )
- goto st8;
- goto st7;
-st8:
- if ( ++p == pe )
- goto _test_eof8;
-case 8:
- switch( (*p) ) {
- case 42: goto st8;
- case 47: goto st1;
- }
- goto st7;
-st9:
- if ( ++p == pe )
- goto _test_eof9;
-case 9:
- if ( (*p) == 10 )
- goto st1;
- goto st9;
- }
- _test_eof1: cs = 1; goto _test_eof;
- _test_eof10: cs = 10; goto _test_eof;
- _test_eof2: cs = 2; goto _test_eof;
- _test_eof3: cs = 3; goto _test_eof;
- _test_eof4: cs = 4; goto _test_eof;
- _test_eof5: cs = 5; goto _test_eof;
- _test_eof6: cs = 6; goto _test_eof;
- _test_eof7: cs = 7; goto _test_eof;
- _test_eof8: cs = 8; goto _test_eof;
- _test_eof9: cs = 9; goto _test_eof;
-
- _test_eof: {}
- _out: {}
- }
-
-#line 1258 "parser.rl"
-
- if (state->stack_handle) {
- rvalue_stack_eagerly_release(state->stack_handle);
- }
-
- if (cs >= JSON_first_final && p == pe) {
- return result;
- } else {
- raise_parse_error("unexpected token at '%s'", p);
- return Qnil;
- }
+ VALUE result = json_parse_any(state);
+ json_ensure_eof(state);
+ return result;
}
-static VALUE cParser_parse(JSON_Parser *json, VALUE Vsource)
+static VALUE cParser_parse(JSON_ParserConfig *config, VALUE Vsource)
{
Vsource = convert_encoding(StringValue(Vsource));
StringValue(Vsource);
@@ -2914,10 +1297,9 @@ static VALUE cParser_parse(JSON_Parser *json, VALUE Vsource)
};
JSON_ParserState _state = {
- .json = json,
- .len = RSTRING_LEN(Vsource),
- .source = RSTRING_PTR(Vsource),
- .Vsource = Vsource,
+ .config = config,
+ .cursor = RSTRING_PTR(Vsource),
+ .end = RSTRING_PTR(Vsource) + RSTRING_LEN(Vsource),
.stack = &stack,
};
JSON_ParserState *state = &_state;
@@ -2928,6 +1310,7 @@ static VALUE cParser_parse(JSON_Parser *json, VALUE Vsource)
int interupted;
VALUE result = rb_protect(cParser_parse_safe, (VALUE)state, &interupted);
+ rvalue_stack_eagerly_release(state->stack_handle);
fbuffer_free(&state->fbuffer);
if (interupted) {
rb_jump_tag(interupted);
@@ -2945,8 +1328,8 @@ static VALUE cParser_parse(JSON_Parser *json, VALUE Vsource)
*/
static VALUE cParserConfig_parse(VALUE self, VALUE Vsource)
{
- GET_PARSER;
- return cParser_parse(json, Vsource);
+ GET_PARSER_CONFIG;
+ return cParser_parse(config, Vsource);
}
static VALUE cParser_m_parse(VALUE klass, VALUE Vsource, VALUE opts)
@@ -2954,36 +1337,36 @@ static VALUE cParser_m_parse(VALUE klass, VALUE Vsource, VALUE opts)
Vsource = convert_encoding(StringValue(Vsource));
StringValue(Vsource);
- JSON_Parser _parser = {0};
- JSON_Parser *json = &_parser;
- parser_init(json, opts);
+ JSON_ParserConfig _config = {0};
+ JSON_ParserConfig *config = &_config;
+ parser_config_init(config, opts);
- return cParser_parse(json, Vsource);
+ return cParser_parse(config, Vsource);
}
static void JSON_mark(void *ptr)
{
- JSON_Parser *json = ptr;
- rb_gc_mark(json->create_id);
- rb_gc_mark(json->object_class);
- rb_gc_mark(json->array_class);
- rb_gc_mark(json->decimal_class);
- rb_gc_mark(json->match_string);
+ JSON_ParserConfig *config = ptr;
+ rb_gc_mark(config->create_id);
+ rb_gc_mark(config->object_class);
+ rb_gc_mark(config->array_class);
+ rb_gc_mark(config->decimal_class);
+ rb_gc_mark(config->match_string);
}
static void JSON_free(void *ptr)
{
- JSON_Parser *json = ptr;
- ruby_xfree(json);
+ JSON_ParserConfig *config = ptr;
+ ruby_xfree(config);
}
static size_t JSON_memsize(const void *ptr)
{
- return sizeof(JSON_Parser);
+ return sizeof(JSON_ParserConfig);
}
-static const rb_data_type_t JSON_Parser_type = {
- "JSON/Parser",
+static const rb_data_type_t JSON_ParserConfig_type = {
+ "JSON/ParserConfig",
{JSON_mark, JSON_free, JSON_memsize,},
0, 0,
RUBY_TYPED_FREE_IMMEDIATELY,
@@ -2991,8 +1374,8 @@ static const rb_data_type_t JSON_Parser_type = {
static VALUE cJSON_parser_s_allocate(VALUE klass)
{
- JSON_Parser *json;
- return TypedData_Make_Struct(klass, JSON_Parser, &JSON_Parser_type, json);
+ JSON_ParserConfig *config;
+ return TypedData_Make_Struct(klass, JSON_ParserConfig, &JSON_ParserConfig_type, config);
}
void Init_parser(void)
@@ -3057,11 +1440,3 @@ void Init_parser(void)
utf8_encindex = rb_utf8_encindex();
enc_utf8 = rb_utf8_encoding();
}
-
-/*
- * Local variables:
- * mode: c
- * c-file-style: ruby
- * indent-tabs-mode: nil
- * End:
- */
diff --git a/ext/json/parser/parser.rl b/ext/json/parser/parser.rl
deleted file mode 100644
index 50226a7259..0000000000
--- a/ext/json/parser/parser.rl
+++ /dev/null
@@ -1,1434 +0,0 @@
-#include "ruby.h"
-#include "../fbuffer/fbuffer.h"
-
-static VALUE mJSON, eNestingError, Encoding_UTF_8;
-static VALUE CNaN, CInfinity, CMinusInfinity;
-
-static ID i_json_creatable_p, i_json_create, i_create_id,
- i_chr, i_deep_const_get, i_match, i_aset, i_aref,
- i_leftshift, i_new, i_try_convert, i_uminus, i_encode;
-
-static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze,
- sym_create_additions, sym_create_id, sym_object_class, sym_array_class,
- sym_decimal_class, sym_match_string;
-
-static int binary_encindex;
-static int utf8_encindex;
-
-#ifdef HAVE_RB_CATEGORY_WARN
-# define json_deprecated(message) rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, message)
-#else
-# define json_deprecated(message) rb_warn(message)
-#endif
-
-static const char deprecated_create_additions_warning[] =
- "JSON.load implicit support for `create_additions: true` is deprecated "
- "and will be removed in 3.0, use JSON.unsafe_load or explicitly "
- "pass `create_additions: true`";
-
-#ifndef HAVE_RB_HASH_BULK_INSERT
-// For TruffleRuby
-void rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
-{
- long index = 0;
- while (index < count) {
- VALUE name = pairs[index++];
- VALUE value = pairs[index++];
- rb_hash_aset(hash, name, value);
- }
- RB_GC_GUARD(hash);
-}
-#endif
-
-/* name cache */
-
-#include <string.h>
-#include <ctype.h>
-
-// Object names are likely to be repeated, and are frozen.
-// As such we can re-use them if we keep a cache of the ones we've seen so far,
-// and save much more expensive lookups into the global fstring table.
-// This cache implementation is deliberately simple, as we're optimizing for compactness,
-// to be able to fit safely on the stack.
-// As such, binary search into a sorted array gives a good tradeoff between compactness and
-// performance.
-#define JSON_RVALUE_CACHE_CAPA 63
-typedef struct rvalue_cache_struct {
- int length;
- VALUE entries[JSON_RVALUE_CACHE_CAPA];
-} rvalue_cache;
-
-static rb_encoding *enc_utf8;
-
-#define JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH 55
-
-static inline VALUE build_interned_string(const char *str, const long length)
-{
-# ifdef HAVE_RB_ENC_INTERNED_STR
- return rb_enc_interned_str(str, length, enc_utf8);
-# else
- VALUE rstring = rb_utf8_str_new(str, length);
- return rb_funcall(rb_str_freeze(rstring), i_uminus, 0);
-# endif
-}
-
-static inline VALUE build_symbol(const char *str, const long length)
-{
- return rb_str_intern(build_interned_string(str, length));
-}
-
-static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring)
-{
- MEMMOVE(&cache->entries[index + 1], &cache->entries[index], VALUE, cache->length - index);
- cache->length++;
- cache->entries[index] = rstring;
-}
-
-static inline int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
-{
- long rstring_length = RSTRING_LEN(rstring);
- if (length == rstring_length) {
- return memcmp(str, RSTRING_PTR(rstring), length);
- } else {
- return (int)(length - rstring_length);
- }
-}
-
-static VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
-{
- if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
- // Common names aren't likely to be very long. So we just don't
- // cache names above an arbitrary threshold.
- return Qfalse;
- }
-
- if (RB_UNLIKELY(!isalpha(str[0]))) {
- // Simple heuristic, if the first character isn't a letter,
- // we're much less likely to see this string again.
- // We mostly want to cache strings that are likely to be repeated.
- return Qfalse;
- }
-
- int low = 0;
- int high = cache->length - 1;
- int mid = 0;
- int last_cmp = 0;
-
- while (low <= high) {
- mid = (high + low) >> 1;
- VALUE entry = cache->entries[mid];
- last_cmp = rstring_cache_cmp(str, length, entry);
-
- if (last_cmp == 0) {
- return entry;
- } else if (last_cmp > 0) {
- low = mid + 1;
- } else {
- high = mid - 1;
- }
- }
-
- if (RB_UNLIKELY(memchr(str, '\\', length))) {
- // We assume the overwhelming majority of names don't need to be escaped.
- // But if they do, we have to fallback to the slow path.
- return Qfalse;
- }
-
- VALUE rstring = build_interned_string(str, length);
-
- if (cache->length < JSON_RVALUE_CACHE_CAPA) {
- if (last_cmp > 0) {
- mid += 1;
- }
-
- rvalue_cache_insert_at(cache, mid, rstring);
- }
- return rstring;
-}
-
-static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length)
-{
- if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
- // Common names aren't likely to be very long. So we just don't
- // cache names above an arbitrary threshold.
- return Qfalse;
- }
-
- if (RB_UNLIKELY(!isalpha(str[0]))) {
- // Simple heuristic, if the first character isn't a letter,
- // we're much less likely to see this string again.
- // We mostly want to cache strings that are likely to be repeated.
- return Qfalse;
- }
-
- int low = 0;
- int high = cache->length - 1;
- int mid = 0;
- int last_cmp = 0;
-
- while (low <= high) {
- mid = (high + low) >> 1;
- VALUE entry = cache->entries[mid];
- last_cmp = rstring_cache_cmp(str, length, rb_sym2str(entry));
-
- if (last_cmp == 0) {
- return entry;
- } else if (last_cmp > 0) {
- low = mid + 1;
- } else {
- high = mid - 1;
- }
- }
-
- if (RB_UNLIKELY(memchr(str, '\\', length))) {
- // We assume the overwhelming majority of names don't need to be escaped.
- // But if they do, we have to fallback to the slow path.
- return Qfalse;
- }
-
- VALUE rsymbol = build_symbol(str, length);
-
- if (cache->length < JSON_RVALUE_CACHE_CAPA) {
- if (last_cmp > 0) {
- mid += 1;
- }
-
- rvalue_cache_insert_at(cache, mid, rsymbol);
- }
- return rsymbol;
-}
-
-/* rvalue stack */
-
-#define RVALUE_STACK_INITIAL_CAPA 128
-
-enum rvalue_stack_type {
- RVALUE_STACK_HEAP_ALLOCATED = 0,
- RVALUE_STACK_STACK_ALLOCATED = 1,
-};
-
-typedef struct rvalue_stack_struct {
- enum rvalue_stack_type type;
- long capa;
- long head;
- VALUE *ptr;
-} rvalue_stack;
-
-static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref);
-
-static rvalue_stack *rvalue_stack_grow(rvalue_stack *stack, VALUE *handle, rvalue_stack **stack_ref)
-{
- long required = stack->capa * 2;
-
- if (stack->type == RVALUE_STACK_STACK_ALLOCATED) {
- stack = rvalue_stack_spill(stack, handle, stack_ref);
- } else {
- REALLOC_N(stack->ptr, VALUE, required);
- stack->capa = required;
- }
- return stack;
-}
-
-static void rvalue_stack_push(rvalue_stack *stack, VALUE value, VALUE *handle, rvalue_stack **stack_ref)
-{
- if (RB_UNLIKELY(stack->head >= stack->capa)) {
- stack = rvalue_stack_grow(stack, handle, stack_ref);
- }
- stack->ptr[stack->head] = value;
- stack->head++;
-}
-
-static inline VALUE *rvalue_stack_peek(rvalue_stack *stack, long count)
-{
- return stack->ptr + (stack->head - count);
-}
-
-static inline void rvalue_stack_pop(rvalue_stack *stack, long count)
-{
- stack->head -= count;
-}
-
-static void rvalue_stack_mark(void *ptr)
-{
- rvalue_stack *stack = (rvalue_stack *)ptr;
- long index;
- for (index = 0; index < stack->head; index++) {
- rb_gc_mark(stack->ptr[index]);
- }
-}
-
-static void rvalue_stack_free(void *ptr)
-{
- rvalue_stack *stack = (rvalue_stack *)ptr;
- if (stack) {
- ruby_xfree(stack->ptr);
- ruby_xfree(stack);
- }
-}
-
-static size_t rvalue_stack_memsize(const void *ptr)
-{
- const rvalue_stack *stack = (const rvalue_stack *)ptr;
- return sizeof(rvalue_stack) + sizeof(VALUE) * stack->capa;
-}
-
-static const rb_data_type_t JSON_Parser_rvalue_stack_type = {
- "JSON::Ext::Parser/rvalue_stack",
- {
- .dmark = rvalue_stack_mark,
- .dfree = rvalue_stack_free,
- .dsize = rvalue_stack_memsize,
- },
- 0, 0,
- RUBY_TYPED_FREE_IMMEDIATELY,
-};
-
-static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref)
-{
- rvalue_stack *stack;
- *handle = TypedData_Make_Struct(0, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack);
- *stack_ref = stack;
- MEMCPY(stack, old_stack, rvalue_stack, 1);
-
- stack->capa = old_stack->capa << 1;
- stack->ptr = ALLOC_N(VALUE, stack->capa);
- stack->type = RVALUE_STACK_HEAP_ALLOCATED;
- MEMCPY(stack->ptr, old_stack->ptr, VALUE, old_stack->head);
- return stack;
-}
-
-static void rvalue_stack_eagerly_release(VALUE handle)
-{
- rvalue_stack *stack;
- TypedData_Get_Struct(handle, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack);
- RTYPEDDATA_DATA(handle) = NULL;
- rvalue_stack_free(stack);
-}
-
-/* unicode */
-
-static const signed char digit_values[256] = {
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1,
- -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1
-};
-
-static uint32_t unescape_unicode(const unsigned char *p)
-{
- const uint32_t replacement_char = 0xFFFD;
-
- signed char b;
- uint32_t result = 0;
- b = digit_values[p[0]];
- if (b < 0) return replacement_char;
- result = (result << 4) | (unsigned char)b;
- b = digit_values[p[1]];
- if (b < 0) return replacement_char;
- result = (result << 4) | (unsigned char)b;
- b = digit_values[p[2]];
- if (b < 0) return replacement_char;
- result = (result << 4) | (unsigned char)b;
- b = digit_values[p[3]];
- if (b < 0) return replacement_char;
- result = (result << 4) | (unsigned char)b;
- return result;
-}
-
-static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
-{
- int len = 1;
- if (ch <= 0x7F) {
- buf[0] = (char) ch;
- } else if (ch <= 0x07FF) {
- buf[0] = (char) ((ch >> 6) | 0xC0);
- buf[1] = (char) ((ch & 0x3F) | 0x80);
- len++;
- } else if (ch <= 0xFFFF) {
- buf[0] = (char) ((ch >> 12) | 0xE0);
- buf[1] = (char) (((ch >> 6) & 0x3F) | 0x80);
- buf[2] = (char) ((ch & 0x3F) | 0x80);
- len += 2;
- } else if (ch <= 0x1fffff) {
- buf[0] =(char) ((ch >> 18) | 0xF0);
- buf[1] =(char) (((ch >> 12) & 0x3F) | 0x80);
- buf[2] =(char) (((ch >> 6) & 0x3F) | 0x80);
- buf[3] =(char) ((ch & 0x3F) | 0x80);
- len += 3;
- } else {
- buf[0] = '?';
- }
- return len;
-}
-
-typedef struct JSON_ParserStruct {
- VALUE create_id;
- VALUE object_class;
- VALUE array_class;
- VALUE decimal_class;
- VALUE match_string;
- int max_nesting;
- bool allow_nan;
- bool allow_trailing_comma;
- bool parsing_name;
- bool symbolize_names;
- bool freeze;
- bool create_additions;
- bool deprecated_create_additions;
-} JSON_Parser;
-
-typedef struct JSON_ParserStateStruct {
- JSON_Parser *json;
- VALUE Vsource;
- VALUE stack_handle;
- char *source;
- long len;
- char *memo;
- FBuffer fbuffer;
- rvalue_stack *stack;
- rvalue_cache name_cache;
- int in_array;
-} JSON_ParserState;
-
-#define GET_PARSER \
- JSON_Parser *json; \
- TypedData_Get_Struct(self, JSON_Parser, &JSON_Parser_type, json)
-
-#define MinusInfinity "-Infinity"
-#define EVIL 0x666
-
-static const rb_data_type_t JSON_Parser_type;
-static char *JSON_parse_string(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result);
-static char *JSON_parse_object(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting);
-static char *JSON_parse_value(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting);
-static char *JSON_parse_number(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result);
-static char *JSON_parse_array(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting);
-
-#ifndef HAVE_STRNLEN
-static size_t strnlen(const char *s, size_t maxlen)
-{
- char *p;
- return ((p = memchr(s, '\0', maxlen)) ? p - s : maxlen);
-}
-#endif
-
-#define PARSE_ERROR_FRAGMENT_LEN 32
-#ifdef RBIMPL_ATTR_NORETURN
-RBIMPL_ATTR_NORETURN()
-#endif
-static void raise_parse_error(const char *format, const char *start)
-{
- char buffer[PARSE_ERROR_FRAGMENT_LEN + 1];
-
- size_t len = strnlen(start, PARSE_ERROR_FRAGMENT_LEN);
- const char *ptr = start;
-
- if (len == PARSE_ERROR_FRAGMENT_LEN) {
- MEMCPY(buffer, start, char, PARSE_ERROR_FRAGMENT_LEN);
- buffer[PARSE_ERROR_FRAGMENT_LEN] = '\0';
- ptr = buffer;
- }
-
- rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr);
-}
-
-
-%%{
- machine JSON_common;
-
- cr = '\n';
- cr_neg = [^\n];
- ws = [ \t\r\n];
- c_comment = '/*' ( any* - (any* '*/' any* ) ) '*/';
- cpp_comment = '//' cr_neg* cr;
- comment = c_comment | cpp_comment;
- ignore = ws | comment;
- name_separator = ':';
- value_separator = ',';
- Vnull = 'null';
- Vfalse = 'false';
- Vtrue = 'true';
- VNaN = 'NaN';
- VInfinity = 'Infinity';
- VMinusInfinity = '-Infinity';
- begin_value = [nft\"\-\[\{NI] | digit;
- begin_object = '{';
- end_object = '}';
- begin_array = '[';
- end_array = ']';
- begin_string = '"';
- begin_name = begin_string;
- begin_number = digit | '-';
-}%%
-
-%%{
- machine JSON_object;
- include JSON_common;
-
- write data;
-
- action parse_value {
- char *np = JSON_parse_value(state, json, fpc, pe, result, current_nesting);
- if (np == NULL) {
- fhold; fbreak;
- } else {
- fexec np;
- }
- }
-
- action allow_trailing_comma { json->allow_trailing_comma }
-
- action parse_name {
- char *np;
- json->parsing_name = true;
- np = JSON_parse_string(state, json, fpc, pe, result);
- json->parsing_name = false;
- if (np == NULL) { fhold; fbreak; } else {
- PUSH(*result);
- fexec np;
- }
- }
-
- action exit { fhold; fbreak; }
-
- pair = ignore* begin_name >parse_name ignore* name_separator ignore* begin_value >parse_value;
- next_pair = ignore* value_separator pair;
-
- main := (
- begin_object
- (pair (next_pair)*((ignore* value_separator) when allow_trailing_comma)?)? ignore*
- end_object
- ) @exit;
-}%%
-
-#define PUSH(result) rvalue_stack_push(state->stack, result, &state->stack_handle, &state->stack)
-
-static char *JSON_parse_object(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
-{
- int cs = EVIL;
-
- if (json->max_nesting && current_nesting > json->max_nesting) {
- rb_raise(eNestingError, "nesting of %d is too deep", current_nesting);
- }
-
- long stack_head = state->stack->head;
-
- %% write init;
- %% write exec;
-
- if (cs >= JSON_object_first_final) {
- long count = state->stack->head - stack_head;
-
- if (RB_UNLIKELY(json->object_class)) {
- VALUE object = rb_class_new_instance(0, 0, json->object_class);
- long index = 0;
- VALUE *items = rvalue_stack_peek(state->stack, count);
- while (index < count) {
- VALUE name = items[index++];
- VALUE value = items[index++];
- rb_funcall(object, i_aset, 2, name, value);
- }
- *result = object;
- } else {
- VALUE hash;
-#ifdef HAVE_RB_HASH_NEW_CAPA
- hash = rb_hash_new_capa(count >> 1);
-#else
- hash = rb_hash_new();
-#endif
- rb_hash_bulk_insert(count, rvalue_stack_peek(state->stack, count), hash);
- *result = hash;
- }
- rvalue_stack_pop(state->stack, count);
-
- if (RB_UNLIKELY(json->create_additions)) {
- VALUE klassname;
- if (json->object_class) {
- klassname = rb_funcall(*result, i_aref, 1, json->create_id);
- } else {
- klassname = rb_hash_aref(*result, json->create_id);
- }
- if (!NIL_P(klassname)) {
- VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname);
- if (RTEST(rb_funcall(klass, i_json_creatable_p, 0))) {
- if (json->deprecated_create_additions) {
- json_deprecated(deprecated_create_additions_warning);
- }
- *result = rb_funcall(klass, i_json_create, 1, *result);
- }
- }
- }
- return p + 1;
- } else {
- return NULL;
- }
-}
-
-%%{
- machine JSON_value;
- include JSON_common;
-
- write data;
-
- action parse_null {
- *result = Qnil;
- }
- action parse_false {
- *result = Qfalse;
- }
- action parse_true {
- *result = Qtrue;
- }
- action parse_nan {
- if (json->allow_nan) {
- *result = CNaN;
- } else {
- raise_parse_error("unexpected token at '%s'", p - 2);
- }
- }
- action parse_infinity {
- if (json->allow_nan) {
- *result = CInfinity;
- } else {
- raise_parse_error("unexpected token at '%s'", p - 7);
- }
- }
- action parse_string {
- char *np = JSON_parse_string(state, json, fpc, pe, result);
- if (np == NULL) {
- fhold;
- fbreak;
- } else {
- fexec np;
- }
- }
-
- action parse_number {
- char *np;
- if(pe > fpc + 8 && !strncmp(MinusInfinity, fpc, 9)) {
- if (json->allow_nan) {
- *result = CMinusInfinity;
- fexec p + 10;
- fhold; fbreak;
- } else {
- raise_parse_error("unexpected token at '%s'", p);
- }
- }
- np = JSON_parse_number(state, json, fpc, pe, result);
- if (np != NULL) {
- fexec np;
- }
- fhold; fbreak;
- }
-
- action parse_array {
- char *np;
- state->in_array++;
- np = JSON_parse_array(state, json, fpc, pe, result, current_nesting + 1);
- state->in_array--;
- if (np == NULL) { fhold; fbreak; } else fexec np;
- }
-
- action parse_object {
- char *np;
- np = JSON_parse_object(state, json, fpc, pe, result, current_nesting + 1);
- if (np == NULL) { fhold; fbreak; } else fexec np;
- }
-
- action exit { fhold; fbreak; }
-
-main := ignore* (
- Vnull @parse_null |
- Vfalse @parse_false |
- Vtrue @parse_true |
- VNaN @parse_nan |
- VInfinity @parse_infinity |
- begin_number @parse_number |
- begin_string @parse_string |
- begin_array @parse_array |
- begin_object @parse_object
- ) ignore* %*exit;
-}%%
-
-static char *JSON_parse_value(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
-{
- int cs = EVIL;
-
- %% write init;
- %% write exec;
-
- if (json->freeze) {
- OBJ_FREEZE(*result);
- }
-
- if (cs >= JSON_value_first_final) {
- PUSH(*result);
- return p;
- } else {
- return NULL;
- }
-}
-
-%%{
- machine JSON_integer;
-
- write data;
-
- action exit { fhold; fbreak; }
-
- main := '-'? ('0' | [1-9][0-9]*) (^[0-9]? @exit);
-}%%
-
-#define MAX_FAST_INTEGER_SIZE 18
-static inline VALUE fast_parse_integer(char *p, char *pe)
-{
- bool negative = false;
- if (*p == '-') {
- negative = true;
- p++;
- }
-
- long long memo = 0;
- while (p < pe) {
- memo *= 10;
- memo += *p - '0';
- p++;
- }
-
- if (negative) {
- memo = -memo;
- }
- return LL2NUM(memo);
-}
-
-static char *JSON_decode_integer(JSON_ParserState *state, JSON_Parser *json, char *p, VALUE *result)
-{
- long len = p - state->memo;
- if (RB_LIKELY(len < MAX_FAST_INTEGER_SIZE)) {
- *result = fast_parse_integer(state->memo, p);
- } else {
- fbuffer_clear(&state->fbuffer);
- fbuffer_append(&state->fbuffer, state->memo, len);
- fbuffer_append_char(&state->fbuffer, '\0');
- *result = rb_cstr2inum(FBUFFER_PTR(&state->fbuffer), 10);
- }
- return p + 1;
-}
-
-%%{
- machine JSON_float;
- include JSON_common;
-
- write data;
-
- action exit { fhold; fbreak; }
- action isFloat { is_float = true; }
-
- main := '-'? (
- (('0' | [1-9][0-9]*)
- ((('.' [0-9]+ ([Ee] [+\-]?[0-9]+)?) |
- ([Ee] [+\-]?[0-9]+)) > isFloat)?
- ) (^[0-9Ee.\-]? @exit ));
-}%%
-
-static char *JSON_parse_number(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result)
-{
- int cs = EVIL;
- bool is_float = false;
-
- %% write init;
- state->memo = p;
- %% write exec;
-
- if (cs >= JSON_float_first_final) {
- if (!is_float) {
- return JSON_decode_integer(state, json, p, result);
- }
- VALUE mod = Qnil;
- ID method_id = 0;
- if (json->decimal_class) {
- if (rb_respond_to(json->decimal_class, i_try_convert)) {
- mod = json->decimal_class;
- method_id = i_try_convert;
- } else if (rb_respond_to(json->decimal_class, i_new)) {
- mod = json->decimal_class;
- method_id = i_new;
- } else if (RB_TYPE_P(json->decimal_class, T_CLASS)) {
- VALUE name = rb_class_name(json->decimal_class);
- const char *name_cstr = RSTRING_PTR(name);
- const char *last_colon = strrchr(name_cstr, ':');
- if (last_colon) {
- const char *mod_path_end = last_colon - 1;
- VALUE mod_path = rb_str_substr(name, 0, mod_path_end - name_cstr);
- mod = rb_path_to_class(mod_path);
-
- const char *method_name_beg = last_colon + 1;
- long before_len = method_name_beg - name_cstr;
- long len = RSTRING_LEN(name) - before_len;
- VALUE method_name = rb_str_substr(name, before_len, len);
- method_id = SYM2ID(rb_str_intern(method_name));
- } else {
- mod = rb_mKernel;
- method_id = SYM2ID(rb_str_intern(name));
- }
- }
- }
-
- long len = p - state->memo;
- fbuffer_clear(&state->fbuffer);
- fbuffer_append(&state->fbuffer, state->memo, len);
- fbuffer_append_char(&state->fbuffer, '\0');
-
- if (method_id) {
- VALUE text = rb_str_new2(FBUFFER_PTR(&state->fbuffer));
- *result = rb_funcallv(mod, method_id, 1, &text);
- } else {
- *result = DBL2NUM(rb_cstr_to_dbl(FBUFFER_PTR(&state->fbuffer), 1));
- }
-
- return p + 1;
- } else {
- return NULL;
- }
-}
-
-
-%%{
- machine JSON_array;
- include JSON_common;
-
- write data;
-
- action parse_value {
- VALUE v = Qnil;
- char *np = JSON_parse_value(state, json, fpc, pe, &v, current_nesting);
- if (np == NULL) {
- fhold; fbreak;
- } else {
- fexec np;
- }
- }
-
- action allow_trailing_comma { json->allow_trailing_comma }
-
- action exit { fhold; fbreak; }
-
- next_element = value_separator ignore* begin_value >parse_value;
-
- main := begin_array ignore*
- ((begin_value >parse_value ignore*)
- (ignore* next_element ignore*)*((value_separator ignore*) when allow_trailing_comma)?)?
- end_array @exit;
-}%%
-
-static char *JSON_parse_array(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
-{
- int cs = EVIL;
-
- if (json->max_nesting && current_nesting > json->max_nesting) {
- rb_raise(eNestingError, "nesting of %d is too deep", current_nesting);
- }
- long stack_head = state->stack->head;
-
- %% write init;
- %% write exec;
-
- if(cs >= JSON_array_first_final) {
- long count = state->stack->head - stack_head;
-
- if (RB_UNLIKELY(json->array_class)) {
- VALUE array = rb_class_new_instance(0, 0, json->array_class);
- VALUE *items = rvalue_stack_peek(state->stack, count);
- long index;
- for (index = 0; index < count; index++) {
- rb_funcall(array, i_leftshift, 1, items[index]);
- }
- *result = array;
- } else {
- VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(state->stack, count));
- *result = array;
- }
- rvalue_stack_pop(state->stack, count);
-
- return p + 1;
- } else {
- raise_parse_error("unexpected token at '%s'", p);
- return NULL;
- }
-}
-
-static inline VALUE build_string(const char *start, const char *end, bool intern, bool symbolize)
-{
- if (symbolize) {
- intern = true;
- }
- VALUE result;
-# ifdef HAVE_RB_ENC_INTERNED_STR
- if (intern) {
- result = rb_enc_interned_str(start, (long)(end - start), enc_utf8);
- } else {
- result = rb_utf8_str_new(start, (long)(end - start));
- }
-# else
- result = rb_utf8_str_new(start, (long)(end - start));
- if (intern) {
- result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
- }
-# endif
-
- if (symbolize) {
- result = rb_str_intern(result);
- }
-
- return result;
-}
-
-static VALUE json_string_fastpath(JSON_ParserState *state, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize)
-{
- size_t bufferSize = stringEnd - string;
-
- if (is_name && state->in_array) {
- VALUE cached_key;
- if (RB_UNLIKELY(symbolize)) {
- cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize);
- } else {
- cached_key = rstring_cache_fetch(&state->name_cache, string, bufferSize);
- }
-
- if (RB_LIKELY(cached_key)) {
- return cached_key;
- }
- }
-
- return build_string(string, stringEnd, intern, symbolize);
-}
-
-static VALUE json_string_unescape(JSON_ParserState *state, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize)
-{
- size_t bufferSize = stringEnd - string;
- char *p = string, *pe = string, *unescape, *bufferStart, *buffer;
- int unescape_len;
- char buf[4];
-
- if (is_name && state->in_array) {
- VALUE cached_key;
- if (RB_UNLIKELY(symbolize)) {
- cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize);
- } else {
- cached_key = rstring_cache_fetch(&state->name_cache, string, bufferSize);
- }
-
- if (RB_LIKELY(cached_key)) {
- return cached_key;
- }
- }
-
- pe = memchr(p, '\\', bufferSize);
- if (RB_UNLIKELY(pe == NULL)) {
- return build_string(string, stringEnd, intern, symbolize);
- }
-
- VALUE result = rb_str_buf_new(bufferSize);
- rb_enc_associate_index(result, utf8_encindex);
- buffer = bufferStart = RSTRING_PTR(result);
-
- while (pe < stringEnd) {
- if (*pe == '\\') {
- unescape = (char *) "?";
- unescape_len = 1;
- if (pe > p) {
- MEMCPY(buffer, p, char, pe - p);
- buffer += pe - p;
- }
- switch (*++pe) {
- case 'n':
- unescape = (char *) "\n";
- break;
- case 'r':
- unescape = (char *) "\r";
- break;
- case 't':
- unescape = (char *) "\t";
- break;
- case '"':
- unescape = (char *) "\"";
- break;
- case '\\':
- unescape = (char *) "\\";
- break;
- case 'b':
- unescape = (char *) "\b";
- break;
- case 'f':
- unescape = (char *) "\f";
- break;
- case 'u':
- if (pe > stringEnd - 4) {
- raise_parse_error("incomplete unicode character escape sequence at '%s'", p);
- } else {
- uint32_t ch = unescape_unicode((unsigned char *) ++pe);
- pe += 3;
- /* To handle values above U+FFFF, we take a sequence of
- * \uXXXX escapes in the U+D800..U+DBFF then
- * U+DC00..U+DFFF ranges, take the low 10 bits from each
- * to make a 20-bit number, then add 0x10000 to get the
- * final codepoint.
- *
- * See Unicode 15: 3.8 "Surrogates", 5.3 "Handling
- * Surrogate Pairs in UTF-16", and 23.6 "Surrogates
- * Area".
- */
- if ((ch & 0xFC00) == 0xD800) {
- pe++;
- if (pe > stringEnd - 6) {
- raise_parse_error("incomplete surrogate pair at '%s'", p);
- }
- if (pe[0] == '\\' && pe[1] == 'u') {
- uint32_t sur = unescape_unicode((unsigned char *) pe + 2);
- ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
- | (sur & 0x3FF));
- pe += 5;
- } else {
- unescape = (char *) "?";
- break;
- }
- }
- unescape_len = convert_UTF32_to_UTF8(buf, ch);
- unescape = buf;
- }
- break;
- default:
- p = pe;
- continue;
- }
- MEMCPY(buffer, unescape, char, unescape_len);
- buffer += unescape_len;
- p = ++pe;
- } else {
- pe++;
- }
- }
-
- if (pe > p) {
- MEMCPY(buffer, p, char, pe - p);
- buffer += pe - p;
- }
- rb_str_set_len(result, buffer - bufferStart);
-
- if (symbolize) {
- result = rb_str_intern(result);
- } else if (intern) {
- result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
- }
-
- return result;
-}
-
-%%{
- machine JSON_string;
- include JSON_common;
-
- write data;
-
- action parse_complex_string {
- *result = json_string_unescape(state, state->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
- fexec p + 1;
- fhold;
- fbreak;
- }
-
- action parse_simple_string {
- *result = json_string_fastpath(state, state->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
- fexec p + 1;
- fhold;
- fbreak;
- }
-
- double_quote = '"';
- escape = '\\';
- control = 0..0x1f;
- simple = any - escape - double_quote - control;
-
- main := double_quote (
- (simple*)(
- (double_quote) @parse_simple_string |
- ((^([\"\\] | control) | escape[\"\\/bfnrt] | '\\u'[0-9a-fA-F]{4} | escape^([\"\\/bfnrtu]|0..0x1f))* double_quote) @parse_complex_string
- )
- );
-}%%
-
-static int
-match_i(VALUE regexp, VALUE klass, VALUE memo)
-{
- if (regexp == Qundef) return ST_STOP;
- if (RTEST(rb_funcall(klass, i_json_creatable_p, 0)) &&
- RTEST(rb_funcall(regexp, i_match, 1, rb_ary_entry(memo, 0)))) {
- rb_ary_push(memo, klass);
- return ST_STOP;
- }
- return ST_CONTINUE;
-}
-
-static char *JSON_parse_string(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result)
-{
- int cs = EVIL;
- VALUE match_string;
-
- %% write init;
- state->memo = p;
- %% write exec;
-
- if (json->create_additions && RTEST(match_string = json->match_string)) {
- VALUE klass;
- VALUE memo = rb_ary_new2(2);
- rb_ary_push(memo, *result);
- rb_hash_foreach(match_string, match_i, memo);
- klass = rb_ary_entry(memo, 1);
- if (RTEST(klass)) {
- *result = rb_funcall(klass, i_json_create, 1, *result);
- }
- }
-
- if (cs >= JSON_string_first_final) {
- return p + 1;
- } else {
- return NULL;
- }
-}
-
-/*
- * Document-class: JSON::Ext::Parser
- *
- * This is the JSON parser implemented as a C extension. It can be configured
- * to be used by setting
- *
- * JSON.parser = JSON::Ext::Parser
- *
- * with the method parser= in JSON.
- *
- */
-
-static VALUE convert_encoding(VALUE source)
-{
- int encindex = RB_ENCODING_GET(source);
-
- if (RB_LIKELY(encindex == utf8_encindex)) {
- return source;
- }
-
- if (encindex == binary_encindex) {
- // For historical reason, we silently reinterpret binary strings as UTF-8
- return rb_enc_associate_index(rb_str_dup(source), utf8_encindex);
- }
-
- return rb_funcall(source, i_encode, 1, Encoding_UTF_8);
-}
-
-static int configure_parser_i(VALUE key, VALUE val, VALUE data)
-{
- JSON_Parser *json = (JSON_Parser *)data;
-
- if (key == sym_max_nesting) { json->max_nesting = RTEST(val) ? FIX2INT(val) : 0; }
- else if (key == sym_allow_nan) { json->allow_nan = RTEST(val); }
- else if (key == sym_allow_trailing_comma) { json->allow_trailing_comma = RTEST(val); }
- else if (key == sym_symbolize_names) { json->symbolize_names = RTEST(val); }
- else if (key == sym_freeze) { json->freeze = RTEST(val); }
- else if (key == sym_create_id) { json->create_id = RTEST(val) ? val : Qfalse; }
- else if (key == sym_object_class) { json->object_class = RTEST(val) ? val : Qfalse; }
- else if (key == sym_array_class) { json->array_class = RTEST(val) ? val : Qfalse; }
- else if (key == sym_decimal_class) { json->decimal_class = RTEST(val) ? val : Qfalse; }
- else if (key == sym_match_string) { json->match_string = RTEST(val) ? val : Qfalse; }
- else if (key == sym_create_additions) {
- if (NIL_P(val)) {
- json->create_additions = true;
- json->deprecated_create_additions = true;
- } else {
- json->create_additions = RTEST(val);
- json->deprecated_create_additions = false;
- }
- }
-
- return ST_CONTINUE;
-}
-
-static void parser_init(JSON_Parser *json, VALUE opts)
-{
- json->max_nesting = 100;
-
- if (!NIL_P(opts)) {
- Check_Type(opts, T_HASH);
- if (RHASH_SIZE(opts) > 0) {
- // We assume in most cases few keys are set so it's faster to go over
- // the provided keys than to check all possible keys.
- rb_hash_foreach(opts, configure_parser_i, (VALUE)json);
-
- if (json->symbolize_names && json->create_additions) {
- rb_raise(rb_eArgError,
- "options :symbolize_names and :create_additions cannot be "
- " used in conjunction");
- }
-
- if (json->create_additions && !json->create_id) {
- json->create_id = rb_funcall(mJSON, i_create_id, 0);
- }
- }
-
- }
-}
-
-/*
- * call-seq: new(opts => {})
- *
- * Creates a new JSON::Ext::ParserConfig instance.
- *
- * It will be configured by the _opts_ hash. _opts_ can have the following
- * keys:
- *
- * _opts_ can have the following keys:
- * * *max_nesting*: The maximum depth of nesting allowed in the parsed data
- * structures. Disable depth checking with :max_nesting => false|nil|0, it
- * defaults to 100.
- * * *allow_nan*: If set to true, allow NaN, Infinity and -Infinity in
- * defiance of RFC 4627 to be parsed by the Parser. This option defaults to
- * false.
- * * *symbolize_names*: If set to true, returns symbols for the names
- * (keys) in a JSON object. Otherwise strings are returned, which is
- * also the default. It's not possible to use this option in
- * conjunction with the *create_additions* option.
- * * *create_additions*: If set to false, the Parser doesn't create
- * additions even if a matching class and create_id was found. This option
- * defaults to false.
- * * *object_class*: Defaults to Hash. If another type is provided, it will be used
- * instead of Hash to represent JSON objects. The type must respond to
- * +new+ without arguments, and return an object that respond to +[]=+.
- * * *array_class*: Defaults to Array If another type is provided, it will be used
- * instead of Hash to represent JSON arrays. The type must respond to
- * +new+ without arguments, and return an object that respond to +<<+.
- * * *decimal_class*: Specifies which class to use instead of the default
- * (Float) when parsing decimal numbers. This class must accept a single
- * string argument in its constructor.
- */
-static VALUE cParserConfig_initialize(VALUE self, VALUE opts)
-{
- GET_PARSER;
-
- parser_init(json, opts);
- return self;
-}
-
-%%{
- machine JSON;
-
- write data;
-
- include JSON_common;
-
- action parse_value {
- char *np = JSON_parse_value(state, json, fpc, pe, &result, 0);
- if (np == NULL) { fhold; fbreak; } else fexec np;
- }
-
- main := ignore* (
- begin_value >parse_value
- ) ignore*;
-}%%
-
-static VALUE cParser_parse_safe(VALUE vstate)
-{
- JSON_ParserState *state = (JSON_ParserState *)vstate;
- VALUE result = Qnil;
- char *p, *pe;
- int cs = EVIL;
- JSON_Parser *json = state->json;
-
- %% write init;
- p = state->source;
- pe = p + state->len;
- %% write exec;
-
- if (state->stack_handle) {
- rvalue_stack_eagerly_release(state->stack_handle);
- }
-
- if (cs >= JSON_first_final && p == pe) {
- return result;
- } else {
- raise_parse_error("unexpected token at '%s'", p);
- return Qnil;
- }
-}
-
-static VALUE cParser_parse(JSON_Parser *json, VALUE Vsource)
-{
- Vsource = convert_encoding(StringValue(Vsource));
- StringValue(Vsource);
-
- VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA];
- rvalue_stack stack = {
- .type = RVALUE_STACK_STACK_ALLOCATED,
- .ptr = rvalue_stack_buffer,
- .capa = RVALUE_STACK_INITIAL_CAPA,
- };
-
- JSON_ParserState _state = {
- .json = json,
- .len = RSTRING_LEN(Vsource),
- .source = RSTRING_PTR(Vsource),
- .Vsource = Vsource,
- .stack = &stack,
- };
- JSON_ParserState *state = &_state;
-
- char stack_buffer[FBUFFER_STACK_SIZE];
- fbuffer_stack_init(&state->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE);
-
- int interupted;
- VALUE result = rb_protect(cParser_parse_safe, (VALUE)state, &interupted);
-
- fbuffer_free(&state->fbuffer);
- if (interupted) {
- rb_jump_tag(interupted);
- }
-
- return result;
-}
-
-/*
- * call-seq: parse(source)
- *
- * Parses the current JSON text _source_ and returns the complete data
- * structure as a result.
- * It raises JSON::ParserError if fail to parse.
- */
-static VALUE cParserConfig_parse(VALUE self, VALUE Vsource)
-{
- GET_PARSER;
- return cParser_parse(json, Vsource);
-}
-
-static VALUE cParser_m_parse(VALUE klass, VALUE Vsource, VALUE opts)
-{
- Vsource = convert_encoding(StringValue(Vsource));
- StringValue(Vsource);
-
- JSON_Parser _parser = {0};
- JSON_Parser *json = &_parser;
- parser_init(json, opts);
-
- return cParser_parse(json, Vsource);
-}
-
-static void JSON_mark(void *ptr)
-{
- JSON_Parser *json = ptr;
- rb_gc_mark(json->create_id);
- rb_gc_mark(json->object_class);
- rb_gc_mark(json->array_class);
- rb_gc_mark(json->decimal_class);
- rb_gc_mark(json->match_string);
-}
-
-static void JSON_free(void *ptr)
-{
- JSON_Parser *json = ptr;
- ruby_xfree(json);
-}
-
-static size_t JSON_memsize(const void *ptr)
-{
- return sizeof(JSON_Parser);
-}
-
-static const rb_data_type_t JSON_Parser_type = {
- "JSON/Parser",
- {JSON_mark, JSON_free, JSON_memsize,},
- 0, 0,
- RUBY_TYPED_FREE_IMMEDIATELY,
-};
-
-static VALUE cJSON_parser_s_allocate(VALUE klass)
-{
- JSON_Parser *json;
- return TypedData_Make_Struct(klass, JSON_Parser, &JSON_Parser_type, json);
-}
-
-void Init_parser(void)
-{
-#ifdef HAVE_RB_EXT_RACTOR_SAFE
- rb_ext_ractor_safe(true);
-#endif
-
-#undef rb_intern
- rb_require("json/common");
- mJSON = rb_define_module("JSON");
- VALUE mExt = rb_define_module_under(mJSON, "Ext");
- VALUE cParserConfig = rb_define_class_under(mExt, "ParserConfig", rb_cObject);
- eNestingError = rb_path2class("JSON::NestingError");
- rb_gc_register_mark_object(eNestingError);
- rb_define_alloc_func(cParserConfig, cJSON_parser_s_allocate);
- rb_define_method(cParserConfig, "initialize", cParserConfig_initialize, 1);
- rb_define_method(cParserConfig, "parse", cParserConfig_parse, 1);
-
- VALUE cParser = rb_define_class_under(mExt, "Parser", rb_cObject);
- rb_define_singleton_method(cParser, "parse", cParser_m_parse, 2);
-
- CNaN = rb_const_get(mJSON, rb_intern("NaN"));
- rb_gc_register_mark_object(CNaN);
-
- CInfinity = rb_const_get(mJSON, rb_intern("Infinity"));
- rb_gc_register_mark_object(CInfinity);
-
- CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity"));
- rb_gc_register_mark_object(CMinusInfinity);
-
- rb_global_variable(&Encoding_UTF_8);
- Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8"));
-
- sym_max_nesting = ID2SYM(rb_intern("max_nesting"));
- sym_allow_nan = ID2SYM(rb_intern("allow_nan"));
- sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma"));
- sym_symbolize_names = ID2SYM(rb_intern("symbolize_names"));
- sym_freeze = ID2SYM(rb_intern("freeze"));
- sym_create_additions = ID2SYM(rb_intern("create_additions"));
- sym_create_id = ID2SYM(rb_intern("create_id"));
- sym_object_class = ID2SYM(rb_intern("object_class"));
- sym_array_class = ID2SYM(rb_intern("array_class"));
- sym_decimal_class = ID2SYM(rb_intern("decimal_class"));
- sym_match_string = ID2SYM(rb_intern("match_string"));
-
- i_create_id = rb_intern("create_id");
- i_json_creatable_p = rb_intern("json_creatable?");
- i_json_create = rb_intern("json_create");
- i_chr = rb_intern("chr");
- i_match = rb_intern("match");
- i_deep_const_get = rb_intern("deep_const_get");
- i_aset = rb_intern("[]=");
- i_aref = rb_intern("[]");
- i_leftshift = rb_intern("<<");
- i_new = rb_intern("new");
- i_try_convert = rb_intern("try_convert");
- i_uminus = rb_intern("-@");
- i_encode = rb_intern("encode");
-
- binary_encindex = rb_ascii8bit_encindex();
- utf8_encindex = rb_utf8_encindex();
- enc_utf8 = rb_utf8_encoding();
-}
-
-/*
- * Local variables:
- * mode: c
- * c-file-style: ruby
- * indent-tabs-mode: nil
- * End:
- */
diff --git a/test/json/json_parser_test.rb b/test/json/json_parser_test.rb
index c01e28910f..5956200893 100644
--- a/test/json/json_parser_test.rb
+++ b/test/json/json_parser_test.rb
@@ -104,6 +104,11 @@ class JSONParserTest < Test::Unit::TestCase
assert_raise(JSON::ParserError) { parse('+23') }
assert_raise(JSON::ParserError) { parse('.23') }
assert_raise(JSON::ParserError) { parse('023') }
+ assert_raise(JSON::ParserError) { parse('-023') }
+ assert_raise(JSON::ParserError) { parse('023.12') }
+ assert_raise(JSON::ParserError) { parse('-023.12') }
+ assert_raise(JSON::ParserError) { parse('023e12') }
+ assert_raise(JSON::ParserError) { parse('-023e12') }
assert_equal(23, parse('23'))
assert_equal(-23, parse('-23'))
assert_equal_float(3.141, parse('3.141'))
@@ -620,7 +625,7 @@ class JSONParserTest < Test::Unit::TestCase
JSON.parse('{"input":{"firstName":"Bob","lastName":"Mob","email":"[email protected]"}')
end
if RUBY_ENGINE == "ruby"
- assert_equal %(unexpected token at '{"input":{"firstName":"Bob","las'), error.message
+ assert_equal %(expected ',' or '}' after object value, got: ''), error.message
end
end