diff options
-rw-r--r-- | .github/workflows/prism.yml | 2 | ||||
-rw-r--r-- | prism_compile.c | 423 | ||||
-rw-r--r-- | test/.excludes-prism/TestRegexp.rb | 7 | ||||
-rw-r--r-- | test/.excludes-prism/TestUnicodeEscape.rb | 1 |
4 files changed, 233 insertions, 200 deletions
diff --git a/.github/workflows/prism.yml b/.github/workflows/prism.yml index 49058c232f..8b295f7f81 100644 --- a/.github/workflows/prism.yml +++ b/.github/workflows/prism.yml @@ -92,7 +92,7 @@ jobs: timeout-minutes: 40 env: GNUMAKEFLAGS: '' - RUBY_TESTOPTS: '-q --tty=no --excludes-dir="../src/test/.excludes-prism" --exclude="test_ast.rb" --exclude="test_regexp.rb" --exclude="error_highlight/test_error_highlight.rb" --exclude="prism/encoding_test.rb"' + RUBY_TESTOPTS: '-q --tty=no --excludes-dir="../src/test/.excludes-prism" --exclude="test_ast.rb" --exclude="error_highlight/test_error_highlight.rb" --exclude="prism/encoding_test.rb"' RUN_OPTS: ${{ matrix.run_opts }} - name: make test-prism-spec diff --git a/prism_compile.c b/prism_compile.c index db032e2a05..d7eada1538 100644 --- a/prism_compile.c +++ b/prism_compile.c @@ -289,131 +289,243 @@ pm_optimizable_range_item_p(pm_node_t *node) return (!node || PM_NODE_TYPE_P(node, PM_INTEGER_NODE) || PM_NODE_TYPE_P(node, PM_NIL_NODE)); } +static void pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node); + +static int +pm_interpolated_node_compile(const pm_node_list_t *parts, rb_iseq_t *iseq, NODE dummy_line_node, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node) +{ + int number_of_items_pushed = 0; + size_t parts_size = parts->size; + + if (parts_size > 0) { + VALUE current_string = Qnil; + + for (size_t index = 0; index < parts_size; index++) { + const pm_node_t *part = parts->nodes[index]; + + if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) { + const pm_string_node_t *string_node = (const pm_string_node_t *)part; + VALUE string_value = parse_string_encoded(scope_node, (pm_node_t *)string_node, &string_node->unescaped); + + if (RTEST(current_string)) { + current_string = rb_str_concat(current_string, string_value); + } + else { + current_string = string_value; + } + } + else if (PM_NODE_TYPE_P(part, PM_EMBEDDED_STATEMENTS_NODE) && + ((const pm_embedded_statements_node_t *) part)->statements != NULL && + ((const pm_embedded_statements_node_t *) part)->statements->body.size == 1 && + PM_NODE_TYPE_P(((const pm_embedded_statements_node_t *) part)->statements->body.nodes[0], PM_STRING_NODE)) { + const pm_string_node_t *string_node = (const pm_string_node_t *) ((const pm_embedded_statements_node_t *) part)->statements->body.nodes[0]; + VALUE string_value = parse_string_encoded(scope_node, (pm_node_t *)string_node, &string_node->unescaped); + + if (RTEST(current_string)) { + current_string = rb_str_concat(current_string, string_value); + } + else { + current_string = string_value; + } + } + else { + if (!RTEST(current_string)) { + current_string = rb_enc_str_new(NULL, 0, scope_node->encoding); + } + + ADD_INSN1(ret, &dummy_line_node, putobject, rb_fstring(current_string)); + + current_string = Qnil; + number_of_items_pushed++; + + PM_COMPILE_NOT_POPPED(part); + PM_DUP; + ADD_INSN1(ret, &dummy_line_node, objtostring, new_callinfo(iseq, idTo_s, 0, VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE , NULL, FALSE)); + ADD_INSN(ret, &dummy_line_node, anytostring); + + number_of_items_pushed++; + } + } + + if (RTEST(current_string)) { + current_string = rb_fstring(current_string); + ADD_INSN1(ret, &dummy_line_node, putobject, current_string); + current_string = Qnil; + number_of_items_pushed++; + } + } + else { + PM_PUTNIL; + } + + return number_of_items_pushed; +} + +static VALUE +pm_static_literal_concat(const pm_node_list_t *nodes, const pm_scope_node_t *scope_node, bool top) +{ + VALUE current = Qnil; + + for (size_t index = 0; index < nodes->size; index++) { + const pm_node_t *part = nodes->nodes[index]; + VALUE string; + + switch (PM_NODE_TYPE(part)) { + case PM_STRING_NODE: + string = parse_string_encoded(scope_node, part, &((const pm_string_node_t *) part)->unescaped); + break; + case PM_INTERPOLATED_STRING_NODE: + string = pm_static_literal_concat(&((const pm_interpolated_string_node_t *) part)->parts, scope_node, false); + break; + default: + RUBY_ASSERT(false && "unexpected node type in pm_static_literal_concat"); + return Qnil; + } + + if (current != Qnil) { + current = rb_str_concat(current, string); + } + else { + current = string; + } + } + + return top ? rb_fstring(current) : current; +} + #define RE_OPTION_ENCODING_SHIFT 8 +#define RE_OPTION_ENCODING(encoding) (((encoding) & 0xFF) << RE_OPTION_ENCODING_SHIFT) +#define ARG_ENCODING_NONE 32 +#define ARG_ENCODING_FIXED 16 +#define ENC_ASCII8BIT 1 +#define ENC_EUC_JP 2 +#define ENC_Windows_31J 3 +#define ENC_UTF8 4 /** * Check the prism flags of a regular expression-like node and return the flags * that are expected by the CRuby VM. */ static int -pm_reg_flags(const pm_node_t *node) { +parse_regexp_flags(const pm_node_t *node) +{ int flags = 0; - int dummy = 0; // Check "no encoding" first so that flags don't get clobbered // We're calling `rb_char_to_option_kcode` in this case so that // we don't need to have access to `ARG_ENCODING_NONE` - if (node->flags & PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT) { - rb_char_to_option_kcode('n', &flags, &dummy); + if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT)) { + flags |= ARG_ENCODING_NONE; } - if (node->flags & PM_REGULAR_EXPRESSION_FLAGS_EUC_JP) { - rb_char_to_option_kcode('e', &flags, &dummy); - flags |= ('e' << RE_OPTION_ENCODING_SHIFT); + if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EUC_JP)) { + flags |= (ARG_ENCODING_FIXED | RE_OPTION_ENCODING(ENC_EUC_JP)); } - if (node->flags & PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J) { - rb_char_to_option_kcode('s', &flags, &dummy); - flags |= ('s' << RE_OPTION_ENCODING_SHIFT); + if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J)) { + flags |= (ARG_ENCODING_FIXED | RE_OPTION_ENCODING(ENC_Windows_31J)); } - if (node->flags & PM_REGULAR_EXPRESSION_FLAGS_UTF_8) { - rb_char_to_option_kcode('u', &flags, &dummy); - flags |= ('u' << RE_OPTION_ENCODING_SHIFT); + if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_UTF_8)) { + flags |= (ARG_ENCODING_FIXED | RE_OPTION_ENCODING(ENC_UTF8)); } - if (node->flags & PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE) { + if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE)) { flags |= ONIG_OPTION_IGNORECASE; } - if (node->flags & PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE) { + if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE)) { flags |= ONIG_OPTION_MULTILINE; } - if (node->flags & PM_REGULAR_EXPRESSION_FLAGS_EXTENDED) { + if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED)) { flags |= ONIG_OPTION_EXTEND; } return flags; } +#undef RE_OPTION_ENCODING_SHIFT +#undef RE_OPTION_ENCODING +#undef ARG_ENCODING_FIXED +#undef ARG_ENCODING_NONE +#undef ENC_ASCII8BIT +#undef ENC_EUC_JP +#undef ENC_Windows_31J +#undef ENC_UTF8 + static rb_encoding * -pm_reg_enc(const pm_scope_node_t *scope_node, const pm_node_t *node) +parse_regexp_encoding(const pm_scope_node_t *scope_node, const pm_node_t *node) { if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT)) { return rb_ascii8bit_encoding(); } - - if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EUC_JP)) { + else if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_UTF_8)) { + return rb_utf8_encoding(); + } + else if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EUC_JP)) { return rb_enc_get_from_index(ENCINDEX_EUC_JP); } - - if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J)) { + else if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J)) { return rb_enc_get_from_index(ENCINDEX_Windows_31J); } - - if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_UTF_8)) { - return rb_utf8_encoding(); + else { + return scope_node->encoding; } - - return scope_node->encoding; } -/** - * Certain nodes can be compiled literally, which can lead to further - * optimizations. These nodes will all have the PM_NODE_FLAG_STATIC_LITERAL flag - * set. - */ -static inline bool -pm_static_literal_p(const pm_node_t *node) +/** Raise an error corresponding to the invalid regular expression. */ +static VALUE +parse_regexp_error(rb_iseq_t *iseq, int32_t line_number, const char *fmt, ...) { - return node->flags & PM_NODE_FLAG_STATIC_LITERAL; + va_list args; + va_start(args, fmt); + VALUE error = rb_syntax_error_append(Qnil, rb_iseq_path(iseq), line_number, -1, NULL, "%" PRIsVALUE, args); + va_end(args); + rb_exc_raise(error); } static VALUE -pm_new_regex(const pm_scope_node_t *scope_node, const pm_node_t *node, const pm_string_t *unescaped) +parse_regexp(rb_iseq_t *iseq, const pm_scope_node_t *scope_node, const pm_node_t *node, VALUE string) { - VALUE regex_str = parse_string(scope_node, unescaped); - rb_encoding *enc = pm_reg_enc(scope_node, node); + VALUE errinfo = rb_errinfo(); - VALUE regex = rb_enc_reg_new(RSTRING_PTR(regex_str), RSTRING_LEN(regex_str), enc, pm_reg_flags(node)); - RB_GC_GUARD(regex_str); + int32_t line_number = pm_node_line_number(scope_node->parser, node); + VALUE regexp = rb_reg_compile(string, parse_regexp_flags(node), (const char *) pm_string_source(&scope_node->parser->filepath), line_number); - rb_obj_freeze(regex); + if (NIL_P(regexp)) { + VALUE message = rb_attr_get(rb_errinfo(), idMesg); + rb_set_errinfo(errinfo); - return regex; + parse_regexp_error(iseq, line_number, "%" PRIsVALUE, message); + return Qnil; + } + + rb_obj_freeze(regexp); + return regexp; } -static VALUE -pm_static_literal_concat(const pm_node_list_t *nodes, const pm_scope_node_t *scope_node, bool top) +static inline VALUE +parse_regexp_literal(rb_iseq_t *iseq, const pm_scope_node_t *scope_node, const pm_node_t *node, const pm_string_t *unescaped) { - VALUE current = Qnil; - - for (size_t index = 0; index < nodes->size; index++) { - const pm_node_t *part = nodes->nodes[index]; - VALUE string; - - switch (PM_NODE_TYPE(part)) { - case PM_STRING_NODE: - string = parse_string_encoded(scope_node, part, &((const pm_string_node_t *) part)->unescaped); - break; - case PM_INTERPOLATED_STRING_NODE: - string = pm_static_literal_concat(&((const pm_interpolated_string_node_t *) part)->parts, scope_node, false); - break; - default: - RUBY_ASSERT(false && "unexpected node type in pm_static_literal_concat"); - return Qnil; - } + VALUE string = rb_enc_str_new((const char *) pm_string_source(unescaped), pm_string_length(unescaped), parse_regexp_encoding(scope_node, node)); + return parse_regexp(iseq, scope_node, node, string); +} - if (current != Qnil) { - current = rb_str_concat(current, string); - } - else { - current = string; - } - } +static inline VALUE +parse_regexp_concat(rb_iseq_t *iseq, const pm_scope_node_t *scope_node, const pm_node_t *node, const pm_node_list_t *parts) +{ + VALUE string = pm_static_literal_concat(parts, scope_node, false); + rb_enc_associate(string, parse_regexp_encoding(scope_node, node)); + return parse_regexp(iseq, scope_node, node, string); +} - return top ? rb_fstring(current) : current; +static void +pm_compile_regexp_dynamic(rb_iseq_t *iseq, const pm_node_t *node, const pm_node_list_t *parts, const pm_line_column_t *node_location, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node) +{ + NODE dummy_line_node = generate_dummy_line_node(node_location->line, node_location->column); + int length = pm_interpolated_node_compile(parts, iseq, dummy_line_node, ret, popped, scope_node); + PUSH_INSN2(ret, *node_location, toregexp, INT2FIX(parse_regexp_flags(node) & 0xFF), INT2FIX(length)); } /** @@ -422,11 +534,11 @@ pm_static_literal_concat(const pm_node_list_t *nodes, const pm_scope_node_t *sco * literal values can be compiled into a literal array. */ static VALUE -pm_static_literal_value(const pm_node_t *node, const pm_scope_node_t *scope_node) +pm_static_literal_value(rb_iseq_t *iseq, const pm_node_t *node, const pm_scope_node_t *scope_node) { // Every node that comes into this function should already be marked as // static literal. If it's not, then we have a bug somewhere. - RUBY_ASSERT(pm_static_literal_p(node)); + RUBY_ASSERT(PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)); switch (PM_NODE_TYPE(node)) { case PM_ARRAY_NODE: { @@ -435,7 +547,7 @@ pm_static_literal_value(const pm_node_t *node, const pm_scope_node_t *scope_node VALUE value = rb_ary_hidden_new(elements->size); for (size_t index = 0; index < elements->size; index++) { - rb_ary_push(value, pm_static_literal_value(elements->nodes[index], scope_node)); + rb_ary_push(value, pm_static_literal_value(iseq, elements->nodes[index], scope_node)); } OBJ_FREEZE(value); @@ -453,7 +565,7 @@ pm_static_literal_value(const pm_node_t *node, const pm_scope_node_t *scope_node for (size_t index = 0; index < elements->size; index++) { RUBY_ASSERT(PM_NODE_TYPE_P(elements->nodes[index], PM_ASSOC_NODE)); pm_assoc_node_t *cast = (pm_assoc_node_t *) elements->nodes[index]; - VALUE pair[2] = { pm_static_literal_value(cast->key, scope_node), pm_static_literal_value(cast->value, scope_node) }; + VALUE pair[2] = { pm_static_literal_value(iseq, cast->key, scope_node), pm_static_literal_value(iseq, cast->value, scope_node) }; rb_ary_cat(array, pair, 2); } @@ -470,17 +582,11 @@ pm_static_literal_value(const pm_node_t *node, const pm_scope_node_t *scope_node return parse_integer((const pm_integer_node_t *) node); case PM_INTERPOLATED_MATCH_LAST_LINE_NODE: { const pm_interpolated_match_last_line_node_t *cast = (const pm_interpolated_match_last_line_node_t *) node; - VALUE string = pm_static_literal_concat(&cast->parts, scope_node, true); - - rb_encoding *encoding = pm_reg_enc(scope_node, (const pm_node_t *) cast); - return rb_obj_freeze(rb_enc_reg_new(RSTRING_PTR(string), RSTRING_LEN(string), encoding, pm_reg_flags((const pm_node_t *) cast))); + return parse_regexp_concat(iseq, scope_node, (const pm_node_t *) cast, &cast->parts); } case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE: { const pm_interpolated_regular_expression_node_t *cast = (const pm_interpolated_regular_expression_node_t *) node; - VALUE string = pm_static_literal_concat(&cast->parts, scope_node, true); - - rb_encoding *encoding = pm_reg_enc(scope_node, (const pm_node_t *) cast); - return rb_obj_freeze(rb_enc_reg_new(RSTRING_PTR(string), RSTRING_LEN(string), encoding, pm_reg_flags((const pm_node_t *) cast))); + return parse_regexp_concat(iseq, scope_node, (const pm_node_t *) cast, &cast->parts); } case PM_INTERPOLATED_STRING_NODE: return pm_static_literal_concat(&((const pm_interpolated_string_node_t *) node)->parts, scope_node, true); @@ -492,7 +598,7 @@ pm_static_literal_value(const pm_node_t *node, const pm_scope_node_t *scope_node } case PM_MATCH_LAST_LINE_NODE: { const pm_match_last_line_node_t *cast = (const pm_match_last_line_node_t *) node; - return pm_new_regex(scope_node, (const pm_node_t *) cast, &cast->unescaped); + return parse_regexp_literal(iseq, scope_node, (const pm_node_t *) cast, &cast->unescaped); } case PM_NIL_NODE: return Qnil; @@ -500,13 +606,20 @@ pm_static_literal_value(const pm_node_t *node, const pm_scope_node_t *scope_node return parse_rational((const pm_rational_node_t *) node); case PM_REGULAR_EXPRESSION_NODE: { const pm_regular_expression_node_t *cast = (const pm_regular_expression_node_t *) node; - return pm_new_regex(scope_node, (const pm_node_t *) cast, &cast->unescaped); + return parse_regexp_literal(iseq, scope_node, (const pm_node_t *) cast, &cast->unescaped); } case PM_SOURCE_ENCODING_NODE: return rb_enc_from_encoding(scope_node->encoding); case PM_SOURCE_FILE_NODE: { - pm_source_file_node_t *cast = (pm_source_file_node_t *)node; - return cast->filepath.length ? parse_string(scope_node, &cast->filepath) : rb_fstring_lit("<compiled>"); + const pm_source_file_node_t *cast = (const pm_source_file_node_t *) node; + size_t length = pm_string_length(&cast->filepath); + + if (length > 0) { + return rb_enc_str_new((const char *) pm_string_source(&cast->filepath), length, scope_node->encoding); + } + else { + return rb_fstring_lit("<compiled>"); + } } case PM_SOURCE_LINE_NODE: return INT2FIX(pm_node_line_number(scope_node->parser, node)); @@ -601,8 +714,6 @@ pm_compile_logical(rb_iseq_t *iseq, LINK_ANCHOR *const ret, pm_node_t *cond, LAB return; } -static void pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node); - static void pm_compile_flip_flop_bound(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node) { @@ -842,76 +953,6 @@ pm_compile_loop(rb_iseq_t *iseq, const pm_line_column_t *line_column, pm_node_fl return; } -static int -pm_interpolated_node_compile(const pm_node_list_t *parts, rb_iseq_t *iseq, NODE dummy_line_node, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node) -{ - int number_of_items_pushed = 0; - size_t parts_size = parts->size; - - if (parts_size > 0) { - VALUE current_string = Qnil; - - for (size_t index = 0; index < parts_size; index++) { - const pm_node_t *part = parts->nodes[index]; - - if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) { - const pm_string_node_t *string_node = (const pm_string_node_t *)part; - VALUE string_value = parse_string_encoded(scope_node, (pm_node_t *)string_node, &string_node->unescaped); - - if (RTEST(current_string)) { - current_string = rb_str_concat(current_string, string_value); - } - else { - current_string = string_value; - } - } - else if (PM_NODE_TYPE_P(part, PM_EMBEDDED_STATEMENTS_NODE) && - ((const pm_embedded_statements_node_t *) part)->statements != NULL && - ((const pm_embedded_statements_node_t *) part)->statements->body.size == 1 && - PM_NODE_TYPE_P(((const pm_embedded_statements_node_t *) part)->statements->body.nodes[0], PM_STRING_NODE)) { - const pm_string_node_t *string_node = (const pm_string_node_t *) ((const pm_embedded_statements_node_t *) part)->statements->body.nodes[0]; - VALUE string_value = parse_string_encoded(scope_node, (pm_node_t *)string_node, &string_node->unescaped); - - if (RTEST(current_string)) { - current_string = rb_str_concat(current_string, string_value); - } - else { - current_string = string_value; - } - } - else { - if (!RTEST(current_string)) { - current_string = rb_enc_str_new(NULL, 0, scope_node->encoding); - } - - ADD_INSN1(ret, &dummy_line_node, putobject, rb_fstring(current_string)); - - current_string = Qnil; - number_of_items_pushed++; - - PM_COMPILE_NOT_POPPED(part); - PM_DUP; - ADD_INSN1(ret, &dummy_line_node, objtostring, new_callinfo(iseq, idTo_s, 0, VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE , NULL, FALSE)); - ADD_INSN(ret, &dummy_line_node, anytostring); - - number_of_items_pushed++; - } - } - - if (RTEST(current_string)) { - current_string = rb_fstring(current_string); - ADD_INSN1(ret, &dummy_line_node, putobject, current_string); - current_string = Qnil; - number_of_items_pushed++; - } - } - else { - PM_PUTNIL; - } - - return number_of_items_pushed; -} - // This recurses through scopes and finds the local index at any scope level // It also takes a pointer to depth, and increments depth appropriately // according to the depth of the local. @@ -1201,7 +1242,7 @@ pm_setup_args_core(const pm_arguments_node_t *arguments_node, const pm_node_t *b // Retrieve the stored index from the hash for this // keyword. - VALUE keyword = pm_static_literal_value(assoc->key, scope_node); + VALUE keyword = pm_static_literal_value(iseq, assoc->key, scope_node); VALUE stored_index = rb_hash_aref(stored_indices, keyword); // If this keyword was already seen in the hash, @@ -1233,7 +1274,7 @@ pm_setup_args_core(const pm_arguments_node_t *arguments_node, const pm_node_t *b bool popped = true; if (rb_ary_entry(keyword_indices, (long) element_index) == Qtrue) { - keywords[keyword_index++] = pm_static_literal_value(assoc->key, scope_node); + keywords[keyword_index++] = pm_static_literal_value(iseq, assoc->key, scope_node); popped = false; } @@ -4232,13 +4273,13 @@ pm_compile_constant_path(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *co * optimization entirely. */ static VALUE -pm_compile_case_node_dispatch(VALUE dispatch, const pm_node_t *node, LABEL *label, const pm_scope_node_t *scope_node) +pm_compile_case_node_dispatch(rb_iseq_t *iseq, VALUE dispatch, const pm_node_t *node, LABEL *label, const pm_scope_node_t *scope_node) { VALUE key = Qundef; switch (PM_NODE_TYPE(node)) { case PM_FLOAT_NODE: { - key = pm_static_literal_value(node, scope_node); + key = pm_static_literal_value(iseq, node, scope_node); double intptr; if (modf(RFLOAT_VALUE(key), &intptr) == 0.0) { @@ -4254,7 +4295,7 @@ pm_compile_case_node_dispatch(VALUE dispatch, const pm_node_t *node, LABEL *labe case PM_SOURCE_LINE_NODE: case PM_SYMBOL_NODE: case PM_TRUE_NODE: - key = pm_static_literal_value(node, scope_node); + key = pm_static_literal_value(iseq, node, scope_node); break; case PM_STRING_NODE: { const pm_string_node_t *cast = (const pm_string_node_t *) node; @@ -4379,13 +4420,13 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, // If every node in the array is static, then we can compile the entire // array now instead of later. - if (pm_static_literal_p(node)) { + if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) { // We're only going to compile this node if it's not popped. If it // is popped, then we know we don't need to do anything since it's // statically known. if (!popped) { if (elements->size) { - VALUE value = pm_static_literal_value(node, scope_node); + VALUE value = pm_static_literal_value(iseq, node, scope_node); PUSH_INSN1(ret, location, duparray, value); } else { @@ -4868,7 +4909,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, // we're going to try to compile the condition into the // dispatch hash. if (dispatch != Qundef) { - dispatch = pm_compile_case_node_dispatch(dispatch, condition, label, scope_node); + dispatch = pm_compile_case_node_dispatch(iseq, dispatch, condition, label, scope_node); } if (PM_NODE_TYPE_P(condition, PM_SPLAT_NODE)) { @@ -5886,12 +5927,12 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, case PM_HASH_NODE: { // If every node in the hash is static, then we can compile the entire // hash now instead of later. - if (pm_static_literal_p(node)) { + if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) { // We're only going to compile this node if it's not popped. If it // is popped, then we know we don't need to do anything since it's // statically known. if (!popped) { - VALUE value = pm_static_literal_value(node, scope_node); + VALUE value = pm_static_literal_value(iseq, node, scope_node); ADD_INSN1(ret, &dummy_line_node, duphash, value); RB_OBJ_WRITTEN(iseq, Qundef, value); } @@ -6072,14 +6113,12 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, // ^^^^^^^^^^^^ if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) { if (!popped) { - VALUE regexp = pm_static_literal_value(node, scope_node); + VALUE regexp = pm_static_literal_value(iseq, node, scope_node); PUSH_INSN1(ret, location, putobject, regexp); } } else { - const pm_interpolated_match_last_line_node_t *cast = (const pm_interpolated_match_last_line_node_t *) node; - int length = pm_interpolated_node_compile(&cast->parts, iseq, dummy_line_node, ret, popped, scope_node); - PUSH_INSN2(ret, location, toregexp, INT2FIX(pm_reg_flags((const pm_node_t *) cast)), INT2FIX(length)); + pm_compile_regexp_dynamic(iseq, node, &((const pm_interpolated_match_last_line_node_t *) node)->parts, &location, ret, popped, scope_node); } PUSH_INSN1(ret, location, getglobal, rb_id2sym(idLASTLINE)); @@ -6106,20 +6145,18 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, PUSH_INSN2(ret, location, once, block_iseq, INT2FIX(ise_index)); ISEQ_COMPILE_DATA(iseq)->current_block = prevblock; + if (popped) PUSH_INSN(ret, location, pop); return; } if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) { if (!popped) { - VALUE regexp = pm_static_literal_value(node, scope_node); + VALUE regexp = pm_static_literal_value(iseq, node, scope_node); PUSH_INSN1(ret, location, putobject, regexp); } } else { - const pm_interpolated_regular_expression_node_t *cast = (const pm_interpolated_regular_expression_node_t *) node; - int length = pm_interpolated_node_compile(&cast->parts, iseq, dummy_line_node, ret, popped, scope_node); - - PUSH_INSN2(ret, location, toregexp, INT2FIX(pm_reg_flags((const pm_node_t *) cast)), INT2FIX(length)); + pm_compile_regexp_dynamic(iseq, node, &((const pm_interpolated_regular_expression_node_t *) node)->parts, &location, ret, popped, scope_node); if (popped) PUSH_INSN(ret, location, pop); } @@ -6130,7 +6167,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, // ^^^^^^^^^^^^ if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) { if (!popped) { - VALUE string = pm_static_literal_value(node, scope_node); + VALUE string = pm_static_literal_value(iseq, node, scope_node); if (PM_NODE_FLAG_P(node, PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN)) { PUSH_INSN1(ret, location, putobject, string); @@ -6161,7 +6198,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) { if (!popped) { - VALUE symbol = pm_static_literal_value(node, scope_node); + VALUE symbol = pm_static_literal_value(iseq, node, scope_node); PUSH_INSN1(ret, location, putobject, symbol); } } @@ -6323,7 +6360,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, case PM_MATCH_LAST_LINE_NODE: { // if /foo/ then end // ^^^^^ - VALUE regexp = pm_static_literal_value(node, scope_node); + VALUE regexp = pm_static_literal_value(iseq, node, scope_node); PUSH_INSN1(ret, location, putobject, regexp); PUSH_INSN2(ret, location, getspecial, INT2FIX(0), INT2FIX(0)); @@ -6895,8 +6932,8 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, // /foo/ // ^^^^^ if (!popped) { - VALUE regex = pm_static_literal_value(node, scope_node); - PUSH_INSN1(ret, location, putobject, regex); + VALUE regexp = pm_static_literal_value(iseq, node, scope_node); + PUSH_INSN1(ret, location, putobject, regexp); } return; } @@ -7500,12 +7537,8 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, pm_node_t *value = cast->value; name = cast->name; - if (pm_static_literal_p(value) && - !(PM_NODE_TYPE_P(value, PM_ARRAY_NODE) || - PM_NODE_TYPE_P(value, PM_HASH_NODE) || - PM_NODE_TYPE_P(value, PM_RANGE_NODE))) { - - rb_ary_push(default_values, pm_static_literal_value(value, scope_node)); + if (PM_NODE_FLAG_P(value, PM_NODE_FLAG_STATIC_LITERAL) && !(PM_NODE_TYPE_P(value, PM_ARRAY_NODE) || PM_NODE_TYPE_P(value, PM_HASH_NODE) || PM_NODE_TYPE_P(value, PM_RANGE_NODE))) { + rb_ary_push(default_values, pm_static_literal_value(iseq, value, scope_node)); } else { rb_ary_push(default_values, complex_mark); @@ -7814,10 +7847,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, pm_node_t *value = cast->value; name = cast->name; - if (!(pm_static_literal_p(value)) || - PM_NODE_TYPE_P(value, PM_ARRAY_NODE) || - PM_NODE_TYPE_P(value, PM_HASH_NODE) || - PM_NODE_TYPE_P(value, PM_RANGE_NODE)) { + if (!PM_NODE_FLAG_P(value, PM_NODE_FLAG_STATIC_LITERAL) || PM_NODE_TYPE_P(value, PM_ARRAY_NODE) || PM_NODE_TYPE_P(value, PM_HASH_NODE) || PM_NODE_TYPE_P(value, PM_RANGE_NODE)) { LABEL *end_label = NEW_LABEL(nd_line(&dummy_line_node)); pm_local_index_t index = pm_lookup_local_index(iseq, scope_node, name, 0); @@ -7910,11 +7940,8 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, break; } case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE: { - pm_interpolated_regular_expression_node_t *cast = (pm_interpolated_regular_expression_node_t *) scope_node->ast_node; - - int parts_size = pm_interpolated_node_compile(&cast->parts, iseq, dummy_line_node, ret, popped, scope_node); - - ADD_INSN2(ret, &dummy_line_node, toregexp, INT2FIX(pm_reg_flags((pm_node_t *)cast)), INT2FIX(parts_size)); + const pm_interpolated_regular_expression_node_t *cast = (const pm_interpolated_regular_expression_node_t *) scope_node->ast_node; + pm_compile_regexp_dynamic(iseq, (const pm_node_t *) cast, &cast->parts, &location, ret, popped, scope_node); break; } default: @@ -8037,7 +8064,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, // __ENCODING__ // ^^^^^^^^^^^^ if (!popped) { - VALUE value = pm_static_literal_value(node, scope_node); + VALUE value = pm_static_literal_value(iseq, node, scope_node); PUSH_INSN1(ret, location, putobject, value); } return; @@ -8065,7 +8092,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, // __LINE__ // ^^^^^^^^ if (!popped) { - VALUE value = pm_static_literal_value(node, scope_node); + VALUE value = pm_static_literal_value(iseq, node, scope_node); PUSH_INSN1(ret, location, putobject, value); } return; @@ -8156,7 +8183,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, // :foo // ^^^^ if (!popped) { - VALUE value = pm_static_literal_value(node, scope_node); + VALUE value = pm_static_literal_value(iseq, node, scope_node); PUSH_INSN1(ret, location, putobject, value); } return; diff --git a/test/.excludes-prism/TestRegexp.rb b/test/.excludes-prism/TestRegexp.rb new file mode 100644 index 0000000000..f2b817d79a --- /dev/null +++ b/test/.excludes-prism/TestRegexp.rb @@ -0,0 +1,7 @@ +exclude(:test_unicode_age_14_0, "unknown") +exclude(:test_invalid_fragment, "unknown") +exclude(:test_assign_named_capture_to_reserved_word, "unknown") +exclude(:test_unicode_age_15_0, "unknown") +exclude(:test_unescape, "unknown") +exclude(:test_invalid_escape_error, "unknown") +exclude(:test_unicode_age, "unknown") diff --git a/test/.excludes-prism/TestUnicodeEscape.rb b/test/.excludes-prism/TestUnicodeEscape.rb index 93ed9fcb45..add4911bc2 100644 --- a/test/.excludes-prism/TestUnicodeEscape.rb +++ b/test/.excludes-prism/TestUnicodeEscape.rb @@ -1,2 +1 @@ exclude(:test_fail, "unknown") -exclude(:test_regexp, "unknown") |