summaryrefslogtreecommitdiff
path: root/ext/json/generator/generator.c
diff options
context:
space:
mode:
authorJean Boussier <[email protected]>2023-04-13 19:22:29 +0200
committerHiroshi SHIBATA <[email protected]>2023-12-01 16:47:06 +0900
commit0dfeb172968cdaefca2ab828c94d3e5f44d91f8f (patch)
tree2850662a485dd3f42068bf14ce4618d77b4b13d6 /ext/json/generator/generator.c
parent698cb8406298ff289ba7abadf34abc3d09a07ef0 (diff)
Rename escape_slash in script_safe and also escape E+2028 and E+2029
It is rather common to directly interpolate JSON string inside <script> tags in HTML as to provide configuration or parameters to a script. However this may lead to XSS vulnerabilities, to prevent that 3 characters need to be escaped: - `/` (forward slash) - `U+2028` (LINE SEPARATOR) - `U+2029` (PARAGRAPH SEPARATOR) The forward slash need to be escaped to prevent closing the script tag early, and the other two are valid JSON but invalid Javascript and can be used to break JS parsing. Given that the intent of escaping forward slash is the same than escaping U+2028 and U+2029, I chos to rename and repurpose the existing `escape_slash` option.
Diffstat (limited to 'ext/json/generator/generator.c')
-rw-r--r--ext/json/generator/generator.c62
1 files changed, 43 insertions, 19 deletions
diff --git a/ext/json/generator/generator.c b/ext/json/generator/generator.c
index 8f7c57e042..efb6b4a0a2 100644
--- a/ext/json/generator/generator.c
+++ b/ext/json/generator/generator.c
@@ -16,7 +16,7 @@ static ID i_to_s, i_to_json, i_new, i_indent, i_space, i_space_before,
i_object_nl, i_array_nl, i_max_nesting, i_allow_nan, i_ascii_only,
i_pack, i_unpack, i_create_id, i_extend, i_key_p,
i_aref, i_send, i_respond_to_p, i_match, i_keys, i_depth,
- i_buffer_initial_length, i_dup, i_escape_slash;
+ i_buffer_initial_length, i_dup, i_script_safe, i_escape_slash;
/*
* Copyright 2001-2004 Unicode, Inc.
@@ -124,7 +124,7 @@ static void unicode_escape_to_buffer(FBuffer *buffer, char buf[6], UTF16
/* Converts string to a JSON string in FBuffer buffer, where all but the ASCII
* and control characters are JSON escaped. */
-static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char escape_slash)
+static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char script_safe)
{
const UTF8 *source = (UTF8 *) RSTRING_PTR(string);
const UTF8 *sourceEnd = source + RSTRING_LEN(string);
@@ -175,7 +175,7 @@ static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char escap
fbuffer_append(buffer, "\\\"", 2);
break;
case '/':
- if(escape_slash) {
+ if(script_safe) {
fbuffer_append(buffer, "\\/", 2);
break;
}
@@ -228,7 +228,7 @@ static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char escap
* characters required by the JSON standard are JSON escaped. The remaining
* characters (should be UTF8) are just passed through and appended to the
* result. */
-static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string, char escape_slash)
+static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string, char script_safe)
{
const char *ptr = RSTRING_PTR(string), *p;
unsigned long len = RSTRING_LEN(string), start = 0, end = 0;
@@ -280,7 +280,7 @@ static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string, char escape_slas
escape_len = 2;
break;
case '/':
- if(escape_slash) {
+ if(script_safe) {
escape = "\\/";
escape_len = 2;
break;
@@ -294,6 +294,22 @@ static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string, char escape_slas
rb_raise(rb_path2class("JSON::GeneratorError"),
"partial character in source, but hit end");
}
+
+ if (script_safe && c == 0xE2) {
+ unsigned char c2 = (unsigned char) *(p+1);
+ unsigned char c3 = (unsigned char) *(p+2);
+ if (c2 == 0x80 && (c3 == 0xA8 || c3 == 0xA9)) {
+ fbuffer_append(buffer, ptr + start, end - start);
+ start = end = (end + clen);
+ if (c3 == 0xA8) {
+ fbuffer_append(buffer, "\\u2028", 6);
+ } else {
+ fbuffer_append(buffer, "\\u2029", 6);
+ }
+ continue;
+ }
+ }
+
if (!isLegalUTF8((UTF8 *) p, clen)) {
rb_raise(rb_path2class("JSON::GeneratorError"),
"source sequence is illegal/malformed utf-8");
@@ -727,8 +743,12 @@ static VALUE cState_configure(VALUE self, VALUE opts)
state->allow_nan = RTEST(tmp);
tmp = rb_hash_aref(opts, ID2SYM(i_ascii_only));
state->ascii_only = RTEST(tmp);
- tmp = rb_hash_aref(opts, ID2SYM(i_escape_slash));
- state->escape_slash = RTEST(tmp);
+ tmp = rb_hash_aref(opts, ID2SYM(i_script_safe));
+ state->script_safe = RTEST(tmp);
+ if (!state->script_safe) {
+ tmp = rb_hash_aref(opts, ID2SYM(i_escape_slash));
+ state->script_safe = RTEST(tmp);
+ }
return self;
}
@@ -763,7 +783,7 @@ static VALUE cState_to_h(VALUE self)
rb_hash_aset(result, ID2SYM(i_allow_nan), state->allow_nan ? Qtrue : Qfalse);
rb_hash_aset(result, ID2SYM(i_ascii_only), state->ascii_only ? Qtrue : Qfalse);
rb_hash_aset(result, ID2SYM(i_max_nesting), LONG2FIX(state->max_nesting));
- rb_hash_aset(result, ID2SYM(i_escape_slash), state->escape_slash ? Qtrue : Qfalse);
+ rb_hash_aset(result, ID2SYM(i_script_safe), state->script_safe ? Qtrue : Qfalse);
rb_hash_aset(result, ID2SYM(i_depth), LONG2FIX(state->depth));
rb_hash_aset(result, ID2SYM(i_buffer_initial_length), LONG2FIX(state->buffer_initial_length));
return result;
@@ -948,9 +968,9 @@ static void generate_json_string(FBuffer *buffer, VALUE Vstate, JSON_Generator_S
}
#endif
if (state->ascii_only) {
- convert_UTF8_to_JSON_ASCII(buffer, obj, state->escape_slash);
+ convert_UTF8_to_JSON_ASCII(buffer, obj, state->script_safe);
} else {
- convert_UTF8_to_JSON(buffer, obj, state->escape_slash);
+ convert_UTF8_to_JSON(buffer, obj, state->script_safe);
}
fbuffer_append_char(buffer, '"');
}
@@ -1391,27 +1411,27 @@ static VALUE cState_max_nesting_set(VALUE self, VALUE depth)
}
/*
- * call-seq: escape_slash
+ * call-seq: script_safe
*
* If this boolean is true, the forward slashes will be escaped in
* the json output.
*/
-static VALUE cState_escape_slash(VALUE self)
+static VALUE cState_script_safe(VALUE self)
{
GET_STATE(self);
- return state->escape_slash ? Qtrue : Qfalse;
+ return state->script_safe ? Qtrue : Qfalse;
}
/*
- * call-seq: escape_slash=(depth)
+ * call-seq: script_safe=(depth)
*
* This sets whether or not the forward slashes will be escaped in
* the json output.
*/
-static VALUE cState_escape_slash_set(VALUE self, VALUE enable)
+static VALUE cState_script_safe_set(VALUE self, VALUE enable)
{
GET_STATE(self);
- state->escape_slash = RTEST(enable);
+ state->script_safe = RTEST(enable);
return Qnil;
}
@@ -1531,9 +1551,12 @@ void Init_generator(void)
rb_define_method(cState, "array_nl=", cState_array_nl_set, 1);
rb_define_method(cState, "max_nesting", cState_max_nesting, 0);
rb_define_method(cState, "max_nesting=", cState_max_nesting_set, 1);
- rb_define_method(cState, "escape_slash", cState_escape_slash, 0);
- rb_define_method(cState, "escape_slash?", cState_escape_slash, 0);
- rb_define_method(cState, "escape_slash=", cState_escape_slash_set, 1);
+ rb_define_method(cState, "script_safe", cState_script_safe, 0);
+ rb_define_method(cState, "script_safe?", cState_script_safe, 0);
+ rb_define_method(cState, "script_safe=", cState_script_safe_set, 1);
+ rb_define_alias(cState, "escape_slash", "script_safe");
+ rb_define_alias(cState, "escape_slash?", "script_safe?");
+ rb_define_alias(cState, "escape_slash=", "script_safe=");
rb_define_method(cState, "check_circular?", cState_check_circular_p, 0);
rb_define_method(cState, "allow_nan?", cState_allow_nan_p, 0);
rb_define_method(cState, "ascii_only?", cState_ascii_only_p, 0);
@@ -1590,6 +1613,7 @@ void Init_generator(void)
i_object_nl = rb_intern("object_nl");
i_array_nl = rb_intern("array_nl");
i_max_nesting = rb_intern("max_nesting");
+ i_script_safe = rb_intern("script_safe");
i_escape_slash = rb_intern("escape_slash");
i_allow_nan = rb_intern("allow_nan");
i_ascii_only = rb_intern("ascii_only");