summaryrefslogtreecommitdiff
path: root/ext/json/parser/parser.c
diff options
context:
space:
mode:
authorBenoit Daloze <[email protected]>2024-10-30 21:39:24 +0100
committerHiroshi SHIBATA <[email protected]>2024-11-01 13:04:24 +0900
commit6412e6f6c3c3078130007a902b1239bce70e71e1 (patch)
tree77fed2b9c6693f91d368f3d70e8ea16a08ba02c5 /ext/json/parser/parser.c
parent3782600f0f4f2f70a4d17f025fde6c096f82b424 (diff)
[ruby/json] Use String#encode instead of rb_str_conv_enc()
* rb_str_conv_enc() returns the source string unmodified if the conversion did not work. But we should be consistent with the generator here and only accept BINARY or convertible to UTF-8. https://github.com/ruby/json/commit/1344ad6f66
Diffstat (limited to 'ext/json/parser/parser.c')
-rw-r--r--ext/json/parser/parser.c35
1 files changed, 17 insertions, 18 deletions
diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c
index 9d2c1a5ea3..1c42ee3868 100644
--- a/ext/json/parser/parser.c
+++ b/ext/json/parser/parser.c
@@ -89,14 +89,14 @@ static void raise_parse_error(const char *format, const char *start)
rb_enc_raise(rb_utf8_encoding(), rb_path2class("JSON::ParserError"), format, ptr);
}
-static VALUE mJSON, mExt, cParser, eNestingError;
+static VALUE mJSON, mExt, cParser, eNestingError, Encoding_UTF_8;
static VALUE CNaN, CInfinity, CMinusInfinity;
static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions,
i_chr, i_max_nesting, i_allow_nan, i_symbolize_names,
i_object_class, i_array_class, i_decimal_class,
i_deep_const_get, i_match, i_match_string, i_aset, i_aref,
- i_leftshift, i_new, i_try_convert, i_freeze, i_uminus;
+ i_leftshift, i_new, i_try_convert, i_freeze, i_uminus, i_encode;
static int binary_encindex;
static int utf8_encindex;
@@ -1797,16 +1797,11 @@ static VALUE convert_encoding(VALUE source)
}
if (encindex == binary_encindex) {
- // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
- VALUE utf8_string = rb_enc_associate_index(rb_str_dup(source), utf8_encindex);
- switch (rb_enc_str_coderange(utf8_string)) {
- case ENC_CODERANGE_7BIT:
- case ENC_CODERANGE_VALID:
- return utf8_string;
- }
+ // For historical reason, we silently reinterpret binary strings as UTF-8
+ return rb_enc_associate_index(rb_str_dup(source), utf8_encindex);
}
- return rb_str_conv_enc(source, rb_enc_from_index(encindex), rb_utf8_encoding());
+ return rb_funcall(source, i_encode, 1, Encoding_UTF_8);
}
/*
@@ -1958,7 +1953,7 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
}
-#line 1962 "parser.c"
+#line 1957 "parser.c"
enum {JSON_start = 1};
enum {JSON_first_final = 10};
enum {JSON_error = 0};
@@ -1966,7 +1961,7 @@ enum {JSON_error = 0};
enum {JSON_en_main = 1};
-#line 870 "parser.rl"
+#line 865 "parser.rl"
/*
@@ -1984,16 +1979,16 @@ static VALUE cParser_parse(VALUE self)
GET_PARSER;
-#line 1988 "parser.c"
+#line 1983 "parser.c"
{
cs = JSON_start;
}
-#line 887 "parser.rl"
+#line 882 "parser.rl"
p = json->source;
pe = p + json->len;
-#line 1997 "parser.c"
+#line 1992 "parser.c"
{
if ( p == pe )
goto _test_eof;
@@ -2027,7 +2022,7 @@ st0:
cs = 0;
goto _out;
tr2:
-#line 862 "parser.rl"
+#line 857 "parser.rl"
{
char *np = JSON_parse_value(json, p, pe, &result, 0);
if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;}
@@ -2037,7 +2032,7 @@ st10:
if ( ++p == pe )
goto _test_eof10;
case 10:
-#line 2041 "parser.c"
+#line 2036 "parser.c"
switch( (*p) ) {
case 13: goto st10;
case 32: goto st10;
@@ -2126,7 +2121,7 @@ case 9:
_out: {}
}
-#line 890 "parser.rl"
+#line 885 "parser.rl"
if (cs >= JSON_first_final && p == pe) {
return result;
@@ -2214,6 +2209,9 @@ void Init_parser(void)
CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity"));
rb_gc_register_mark_object(CMinusInfinity);
+ rb_global_variable(&Encoding_UTF_8);
+ Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8"));
+
i_json_creatable_p = rb_intern("json_creatable?");
i_json_create = rb_intern("json_create");
i_create_id = rb_intern("create_id");
@@ -2235,6 +2233,7 @@ void Init_parser(void)
i_try_convert = rb_intern("try_convert");
i_freeze = rb_intern("freeze");
i_uminus = rb_intern("-@");
+ i_encode = rb_intern("encode");
binary_encindex = rb_ascii8bit_encindex();
utf8_encindex = rb_utf8_encindex();