summaryrefslogtreecommitdiff
path: root/ext/json/parser/parser.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/json/parser/parser.c')
-rw-r--r--ext/json/parser/parser.c182
1 files changed, 98 insertions, 84 deletions
diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c
index 128f683e0f..b7428891a5 100644
--- a/ext/json/parser/parser.c
+++ b/ext/json/parser/parser.c
@@ -22,26 +22,28 @@ static const signed char digit_values[256] = {
-1, -1, -1, -1, -1, -1, -1
};
-static UTF32 unescape_unicode(const unsigned char *p)
+static uint32_t unescape_unicode(const unsigned char *p)
{
+ const uint32_t replacement_char = 0xFFFD;
+
signed char b;
- UTF32 result = 0;
+ uint32_t result = 0;
b = digit_values[p[0]];
- if (b < 0) return UNI_REPLACEMENT_CHAR;
+ if (b < 0) return replacement_char;
result = (result << 4) | (unsigned char)b;
b = digit_values[p[1]];
- if (b < 0) return UNI_REPLACEMENT_CHAR;
+ if (b < 0) return replacement_char;
result = (result << 4) | (unsigned char)b;
b = digit_values[p[2]];
- if (b < 0) return UNI_REPLACEMENT_CHAR;
+ if (b < 0) return replacement_char;
result = (result << 4) | (unsigned char)b;
b = digit_values[p[3]];
- if (b < 0) return UNI_REPLACEMENT_CHAR;
+ if (b < 0) return replacement_char;
result = (result << 4) | (unsigned char)b;
return result;
}
-static int convert_UTF32_to_UTF8(char *buf, UTF32 ch)
+static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
{
int len = 1;
if (ch <= 0x7F) {
@@ -77,11 +79,11 @@ static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions,
i_leftshift, i_new, i_try_convert, i_freeze, i_uminus;
-#line 125 "parser.rl"
+#line 105 "parser.rl"
-#line 107 "parser.c"
+#line 87 "parser.c"
enum {JSON_object_start = 1};
enum {JSON_object_first_final = 27};
enum {JSON_object_error = 0};
@@ -89,7 +91,7 @@ enum {JSON_object_error = 0};
enum {JSON_object_en_main = 1};
-#line 167 "parser.rl"
+#line 147 "parser.rl"
static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
@@ -105,14 +107,14 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu
*result = NIL_P(object_class) ? rb_hash_new() : rb_class_new_instance(0, 0, object_class);
-#line 131 "parser.c"
+#line 111 "parser.c"
{
cs = JSON_object_start;
}
-#line 182 "parser.rl"
+#line 162 "parser.rl"
-#line 138 "parser.c"
+#line 118 "parser.c"
{
if ( p == pe )
goto _test_eof;
@@ -140,7 +142,7 @@ case 2:
goto st2;
goto st0;
tr2:
-#line 149 "parser.rl"
+#line 129 "parser.rl"
{
char *np;
json->parsing_name = 1;
@@ -153,7 +155,7 @@ st3:
if ( ++p == pe )
goto _test_eof3;
case 3:
-#line 179 "parser.c"
+#line 159 "parser.c"
switch( (*p) ) {
case 13: goto st3;
case 32: goto st3;
@@ -220,7 +222,7 @@ case 8:
goto st8;
goto st0;
tr11:
-#line 133 "parser.rl"
+#line 113 "parser.rl"
{
VALUE v = Qnil;
char *np = JSON_parse_value(json, p, pe, &v, current_nesting);
@@ -241,7 +243,7 @@ st9:
if ( ++p == pe )
goto _test_eof9;
case 9:
-#line 267 "parser.c"
+#line 247 "parser.c"
switch( (*p) ) {
case 13: goto st9;
case 32: goto st9;
@@ -330,14 +332,14 @@ case 18:
goto st9;
goto st18;
tr4:
-#line 157 "parser.rl"
+#line 137 "parser.rl"
{ p--; {p++; cs = 27; goto _out;} }
goto st27;
st27:
if ( ++p == pe )
goto _test_eof27;
case 27:
-#line 363 "parser.c"
+#line 343 "parser.c"
goto st0;
st19:
if ( ++p == pe )
@@ -435,7 +437,7 @@ case 26:
_out: {}
}
-#line 183 "parser.rl"
+#line 163 "parser.rl"
if (cs >= JSON_object_first_final) {
if (json->create_additions) {
@@ -460,7 +462,7 @@ case 26:
-#line 486 "parser.c"
+#line 466 "parser.c"
enum {JSON_value_start = 1};
enum {JSON_value_first_final = 29};
enum {JSON_value_error = 0};
@@ -468,7 +470,7 @@ enum {JSON_value_error = 0};
enum {JSON_value_en_main = 1};
-#line 283 "parser.rl"
+#line 263 "parser.rl"
static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
@@ -476,14 +478,14 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul
int cs = EVIL;
-#line 502 "parser.c"
+#line 482 "parser.c"
{
cs = JSON_value_start;
}
-#line 290 "parser.rl"
+#line 270 "parser.rl"
-#line 509 "parser.c"
+#line 489 "parser.c"
{
if ( p == pe )
goto _test_eof;
@@ -517,14 +519,14 @@ st0:
cs = 0;
goto _out;
tr2:
-#line 235 "parser.rl"
+#line 215 "parser.rl"
{
char *np = JSON_parse_string(json, p, pe, result);
if (np == NULL) { p--; {p++; cs = 29; goto _out;} } else {p = (( np))-1;}
}
goto st29;
tr3:
-#line 240 "parser.rl"
+#line 220 "parser.rl"
{
char *np;
if(pe > p + 8 && !strncmp(MinusInfinity, p, 9)) {
@@ -544,7 +546,7 @@ tr3:
}
goto st29;
tr7:
-#line 258 "parser.rl"
+#line 238 "parser.rl"
{
char *np;
np = JSON_parse_array(json, p, pe, result, current_nesting + 1);
@@ -552,7 +554,7 @@ tr7:
}
goto st29;
tr11:
-#line 264 "parser.rl"
+#line 244 "parser.rl"
{
char *np;
np = JSON_parse_object(json, p, pe, result, current_nesting + 1);
@@ -560,7 +562,7 @@ tr11:
}
goto st29;
tr25:
-#line 228 "parser.rl"
+#line 208 "parser.rl"
{
if (json->allow_nan) {
*result = CInfinity;
@@ -570,7 +572,7 @@ tr25:
}
goto st29;
tr27:
-#line 221 "parser.rl"
+#line 201 "parser.rl"
{
if (json->allow_nan) {
*result = CNaN;
@@ -580,19 +582,19 @@ tr27:
}
goto st29;
tr31:
-#line 215 "parser.rl"
+#line 195 "parser.rl"
{
*result = Qfalse;
}
goto st29;
tr34:
-#line 212 "parser.rl"
+#line 192 "parser.rl"
{
*result = Qnil;
}
goto st29;
tr37:
-#line 218 "parser.rl"
+#line 198 "parser.rl"
{
*result = Qtrue;
}
@@ -601,9 +603,9 @@ st29:
if ( ++p == pe )
goto _test_eof29;
case 29:
-#line 270 "parser.rl"
+#line 250 "parser.rl"
{ p--; {p++; cs = 29; goto _out;} }
-#line 629 "parser.c"
+#line 609 "parser.c"
switch( (*p) ) {
case 13: goto st29;
case 32: goto st29;
@@ -844,7 +846,7 @@ case 28:
_out: {}
}
-#line 291 "parser.rl"
+#line 271 "parser.rl"
if (json->freeze) {
OBJ_FREEZE(*result);
@@ -858,7 +860,7 @@ case 28:
}
-#line 884 "parser.c"
+#line 864 "parser.c"
enum {JSON_integer_start = 1};
enum {JSON_integer_first_final = 3};
enum {JSON_integer_error = 0};
@@ -866,7 +868,7 @@ enum {JSON_integer_error = 0};
enum {JSON_integer_en_main = 1};
-#line 311 "parser.rl"
+#line 291 "parser.rl"
static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result)
@@ -874,15 +876,15 @@ static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *res
int cs = EVIL;
-#line 900 "parser.c"
+#line 880 "parser.c"
{
cs = JSON_integer_start;
}
-#line 318 "parser.rl"
+#line 298 "parser.rl"
json->memo = p;
-#line 908 "parser.c"
+#line 888 "parser.c"
{
if ( p == pe )
goto _test_eof;
@@ -916,14 +918,14 @@ case 3:
goto st0;
goto tr4;
tr4:
-#line 308 "parser.rl"
+#line 288 "parser.rl"
{ p--; {p++; cs = 4; goto _out;} }
goto st4;
st4:
if ( ++p == pe )
goto _test_eof4;
case 4:
-#line 949 "parser.c"
+#line 929 "parser.c"
goto st0;
st5:
if ( ++p == pe )
@@ -942,7 +944,7 @@ case 5:
_out: {}
}
-#line 320 "parser.rl"
+#line 300 "parser.rl"
if (cs >= JSON_integer_first_final) {
long len = p - json->memo;
@@ -957,7 +959,7 @@ case 5:
}
-#line 983 "parser.c"
+#line 963 "parser.c"
enum {JSON_float_start = 1};
enum {JSON_float_first_final = 8};
enum {JSON_float_error = 0};
@@ -965,7 +967,7 @@ enum {JSON_float_error = 0};
enum {JSON_float_en_main = 1};
-#line 345 "parser.rl"
+#line 325 "parser.rl"
static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result)
@@ -973,15 +975,15 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul
int cs = EVIL;
-#line 999 "parser.c"
+#line 979 "parser.c"
{
cs = JSON_float_start;
}
-#line 352 "parser.rl"
+#line 332 "parser.rl"
json->memo = p;
-#line 1007 "parser.c"
+#line 987 "parser.c"
{
if ( p == pe )
goto _test_eof;
@@ -1039,14 +1041,14 @@ case 8:
goto st0;
goto tr9;
tr9:
-#line 339 "parser.rl"
+#line 319 "parser.rl"
{ p--; {p++; cs = 9; goto _out;} }
goto st9;
st9:
if ( ++p == pe )
goto _test_eof9;
case 9:
-#line 1072 "parser.c"
+#line 1052 "parser.c"
goto st0;
st5:
if ( ++p == pe )
@@ -1107,7 +1109,7 @@ case 7:
_out: {}
}
-#line 354 "parser.rl"
+#line 334 "parser.rl"
if (cs >= JSON_float_first_final) {
VALUE mod = Qnil;
@@ -1158,7 +1160,7 @@ case 7:
-#line 1184 "parser.c"
+#line 1164 "parser.c"
enum {JSON_array_start = 1};
enum {JSON_array_first_final = 17};
enum {JSON_array_error = 0};
@@ -1166,7 +1168,7 @@ enum {JSON_array_error = 0};
enum {JSON_array_en_main = 1};
-#line 432 "parser.rl"
+#line 412 "parser.rl"
static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
@@ -1180,14 +1182,14 @@ static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *resul
*result = NIL_P(array_class) ? rb_ary_new() : rb_class_new_instance(0, 0, array_class);
-#line 1206 "parser.c"
+#line 1186 "parser.c"
{
cs = JSON_array_start;
}
-#line 445 "parser.rl"
+#line 425 "parser.rl"
-#line 1213 "parser.c"
+#line 1193 "parser.c"
{
if ( p == pe )
goto _test_eof;
@@ -1226,7 +1228,7 @@ case 2:
goto st2;
goto st0;
tr2:
-#line 409 "parser.rl"
+#line 389 "parser.rl"
{
VALUE v = Qnil;
char *np = JSON_parse_value(json, p, pe, &v, current_nesting);
@@ -1246,7 +1248,7 @@ st3:
if ( ++p == pe )
goto _test_eof3;
case 3:
-#line 1272 "parser.c"
+#line 1252 "parser.c"
switch( (*p) ) {
case 13: goto st3;
case 32: goto st3;
@@ -1346,14 +1348,14 @@ case 12:
goto st3;
goto st12;
tr4:
-#line 424 "parser.rl"
+#line 404 "parser.rl"
{ p--; {p++; cs = 17; goto _out;} }
goto st17;
st17:
if ( ++p == pe )
goto _test_eof17;
case 17:
-#line 1379 "parser.c"
+#line 1359 "parser.c"
goto st0;
st13:
if ( ++p == pe )
@@ -1409,7 +1411,7 @@ case 16:
_out: {}
}
-#line 446 "parser.rl"
+#line 426 "parser.rl"
if(cs >= JSON_array_first_final) {
return p + 1;
@@ -1482,9 +1484,19 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int
"incomplete unicode character escape sequence at '%s'", p
);
} else {
- UTF32 ch = unescape_unicode((unsigned char *) ++pe);
+ uint32_t ch = unescape_unicode((unsigned char *) ++pe);
pe += 3;
- if (UNI_SUR_HIGH_START == (ch & 0xFC00)) {
+ /* To handle values above U+FFFF, we take a sequence of
+ * \uXXXX escapes in the U+D800..U+DBFF then
+ * U+DC00..U+DFFF ranges, take the low 10 bits from each
+ * to make a 20-bit number, then add 0x10000 to get the
+ * final codepoint.
+ *
+ * See Unicode 15: §3.8 "Surrogates", §5.3 "Handling
+ * Surrogate Pairs in UTF-16", and §23.6 "Surrogates
+ * Area".
+ */
+ if ((ch & 0xFC00) == 0xD800) {
pe++;
if (pe > stringEnd - 6) {
if (bufferSize > MAX_STACK_BUFFER_SIZE) {
@@ -1496,7 +1508,7 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int
);
}
if (pe[0] == '\\' && pe[1] == 'u') {
- UTF32 sur = unescape_unicode((unsigned char *) pe + 2);
+ uint32_t sur = unescape_unicode((unsigned char *) pe + 2);
ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
| (sur & 0x3FF));
pe += 5;
@@ -1566,7 +1578,7 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int
}
-#line 1592 "parser.c"
+#line 1582 "parser.c"
enum {JSON_string_start = 1};
enum {JSON_string_first_final = 8};
enum {JSON_string_error = 0};
@@ -1574,7 +1586,7 @@ enum {JSON_string_error = 0};
enum {JSON_string_en_main = 1};
-#line 620 "parser.rl"
+#line 610 "parser.rl"
static int
@@ -1595,15 +1607,15 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu
VALUE match_string;
-#line 1621 "parser.c"
+#line 1611 "parser.c"
{
cs = JSON_string_start;
}
-#line 640 "parser.rl"
+#line 630 "parser.rl"
json->memo = p;
-#line 1629 "parser.c"
+#line 1619 "parser.c"
{
if ( p == pe )
goto _test_eof;
@@ -1628,7 +1640,7 @@ case 2:
goto st0;
goto st2;
tr2:
-#line 607 "parser.rl"
+#line 597 "parser.rl"
{
*result = json_string_unescape(json->memo + 1, p, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
if (NIL_P(*result)) {
@@ -1638,14 +1650,14 @@ tr2:
{p = (( p + 1))-1;}
}
}
-#line 617 "parser.rl"
+#line 607 "parser.rl"
{ p--; {p++; cs = 8; goto _out;} }
goto st8;
st8:
if ( ++p == pe )
goto _test_eof8;
case 8:
-#line 1671 "parser.c"
+#line 1661 "parser.c"
goto st0;
st3:
if ( ++p == pe )
@@ -1721,7 +1733,7 @@ case 7:
_out: {}
}
-#line 642 "parser.rl"
+#line 632 "parser.rl"
if (json->create_additions && RTEST(match_string = json->match_string)) {
VALUE klass;
@@ -1755,6 +1767,7 @@ case 7:
static VALUE convert_encoding(VALUE source)
{
+#ifdef HAVE_RUBY_ENCODING_H
rb_encoding *enc = rb_enc_get(source);
if (enc == rb_ascii8bit_encoding()) {
if (OBJ_FROZEN(source)) {
@@ -1764,7 +1777,8 @@ static VALUE convert_encoding(VALUE source)
} else {
source = rb_str_conv_enc(source, rb_enc_get(source), rb_utf8_encoding());
}
- return source;
+#endif
+ return source;
}
/*
@@ -1892,7 +1906,7 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
}
-#line 1920 "parser.c"
+#line 1910 "parser.c"
enum {JSON_start = 1};
enum {JSON_first_final = 10};
enum {JSON_error = 0};
@@ -1900,7 +1914,7 @@ enum {JSON_error = 0};
enum {JSON_en_main = 1};
-#line 828 "parser.rl"
+#line 818 "parser.rl"
/*
@@ -1918,16 +1932,16 @@ static VALUE cParser_parse(VALUE self)
GET_PARSER;
-#line 1946 "parser.c"
+#line 1936 "parser.c"
{
cs = JSON_start;
}
-#line 845 "parser.rl"
+#line 835 "parser.rl"
p = json->source;
pe = p + json->len;
-#line 1955 "parser.c"
+#line 1945 "parser.c"
{
if ( p == pe )
goto _test_eof;
@@ -1961,7 +1975,7 @@ st0:
cs = 0;
goto _out;
tr2:
-#line 820 "parser.rl"
+#line 810 "parser.rl"
{
char *np = JSON_parse_value(json, p, pe, &result, 0);
if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;}
@@ -1971,7 +1985,7 @@ st10:
if ( ++p == pe )
goto _test_eof10;
case 10:
-#line 1999 "parser.c"
+#line 1989 "parser.c"
switch( (*p) ) {
case 13: goto st10;
case 32: goto st10;
@@ -2060,7 +2074,7 @@ case 9:
_out: {}
}
-#line 848 "parser.rl"
+#line 838 "parser.rl"
if (cs >= JSON_first_final && p == pe) {
return result;