summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.github/workflows/prism.yml2
-rw-r--r--prism_compile.c423
-rw-r--r--test/.excludes-prism/TestRegexp.rb7
-rw-r--r--test/.excludes-prism/TestUnicodeEscape.rb1
4 files changed, 233 insertions, 200 deletions
diff --git a/.github/workflows/prism.yml b/.github/workflows/prism.yml
index 49058c232f..8b295f7f81 100644
--- a/.github/workflows/prism.yml
+++ b/.github/workflows/prism.yml
@@ -92,7 +92,7 @@ jobs:
timeout-minutes: 40
env:
GNUMAKEFLAGS: ''
- RUBY_TESTOPTS: '-q --tty=no --excludes-dir="../src/test/.excludes-prism" --exclude="test_ast.rb" --exclude="test_regexp.rb" --exclude="error_highlight/test_error_highlight.rb" --exclude="prism/encoding_test.rb"'
+ RUBY_TESTOPTS: '-q --tty=no --excludes-dir="../src/test/.excludes-prism" --exclude="test_ast.rb" --exclude="error_highlight/test_error_highlight.rb" --exclude="prism/encoding_test.rb"'
RUN_OPTS: ${{ matrix.run_opts }}
- name: make test-prism-spec
diff --git a/prism_compile.c b/prism_compile.c
index db032e2a05..d7eada1538 100644
--- a/prism_compile.c
+++ b/prism_compile.c
@@ -289,131 +289,243 @@ pm_optimizable_range_item_p(pm_node_t *node)
return (!node || PM_NODE_TYPE_P(node, PM_INTEGER_NODE) || PM_NODE_TYPE_P(node, PM_NIL_NODE));
}
+static void pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node);
+
+static int
+pm_interpolated_node_compile(const pm_node_list_t *parts, rb_iseq_t *iseq, NODE dummy_line_node, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node)
+{
+ int number_of_items_pushed = 0;
+ size_t parts_size = parts->size;
+
+ if (parts_size > 0) {
+ VALUE current_string = Qnil;
+
+ for (size_t index = 0; index < parts_size; index++) {
+ const pm_node_t *part = parts->nodes[index];
+
+ if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
+ const pm_string_node_t *string_node = (const pm_string_node_t *)part;
+ VALUE string_value = parse_string_encoded(scope_node, (pm_node_t *)string_node, &string_node->unescaped);
+
+ if (RTEST(current_string)) {
+ current_string = rb_str_concat(current_string, string_value);
+ }
+ else {
+ current_string = string_value;
+ }
+ }
+ else if (PM_NODE_TYPE_P(part, PM_EMBEDDED_STATEMENTS_NODE) &&
+ ((const pm_embedded_statements_node_t *) part)->statements != NULL &&
+ ((const pm_embedded_statements_node_t *) part)->statements->body.size == 1 &&
+ PM_NODE_TYPE_P(((const pm_embedded_statements_node_t *) part)->statements->body.nodes[0], PM_STRING_NODE)) {
+ const pm_string_node_t *string_node = (const pm_string_node_t *) ((const pm_embedded_statements_node_t *) part)->statements->body.nodes[0];
+ VALUE string_value = parse_string_encoded(scope_node, (pm_node_t *)string_node, &string_node->unescaped);
+
+ if (RTEST(current_string)) {
+ current_string = rb_str_concat(current_string, string_value);
+ }
+ else {
+ current_string = string_value;
+ }
+ }
+ else {
+ if (!RTEST(current_string)) {
+ current_string = rb_enc_str_new(NULL, 0, scope_node->encoding);
+ }
+
+ ADD_INSN1(ret, &dummy_line_node, putobject, rb_fstring(current_string));
+
+ current_string = Qnil;
+ number_of_items_pushed++;
+
+ PM_COMPILE_NOT_POPPED(part);
+ PM_DUP;
+ ADD_INSN1(ret, &dummy_line_node, objtostring, new_callinfo(iseq, idTo_s, 0, VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE , NULL, FALSE));
+ ADD_INSN(ret, &dummy_line_node, anytostring);
+
+ number_of_items_pushed++;
+ }
+ }
+
+ if (RTEST(current_string)) {
+ current_string = rb_fstring(current_string);
+ ADD_INSN1(ret, &dummy_line_node, putobject, current_string);
+ current_string = Qnil;
+ number_of_items_pushed++;
+ }
+ }
+ else {
+ PM_PUTNIL;
+ }
+
+ return number_of_items_pushed;
+}
+
+static VALUE
+pm_static_literal_concat(const pm_node_list_t *nodes, const pm_scope_node_t *scope_node, bool top)
+{
+ VALUE current = Qnil;
+
+ for (size_t index = 0; index < nodes->size; index++) {
+ const pm_node_t *part = nodes->nodes[index];
+ VALUE string;
+
+ switch (PM_NODE_TYPE(part)) {
+ case PM_STRING_NODE:
+ string = parse_string_encoded(scope_node, part, &((const pm_string_node_t *) part)->unescaped);
+ break;
+ case PM_INTERPOLATED_STRING_NODE:
+ string = pm_static_literal_concat(&((const pm_interpolated_string_node_t *) part)->parts, scope_node, false);
+ break;
+ default:
+ RUBY_ASSERT(false && "unexpected node type in pm_static_literal_concat");
+ return Qnil;
+ }
+
+ if (current != Qnil) {
+ current = rb_str_concat(current, string);
+ }
+ else {
+ current = string;
+ }
+ }
+
+ return top ? rb_fstring(current) : current;
+}
+
#define RE_OPTION_ENCODING_SHIFT 8
+#define RE_OPTION_ENCODING(encoding) (((encoding) & 0xFF) << RE_OPTION_ENCODING_SHIFT)
+#define ARG_ENCODING_NONE 32
+#define ARG_ENCODING_FIXED 16
+#define ENC_ASCII8BIT 1
+#define ENC_EUC_JP 2
+#define ENC_Windows_31J 3
+#define ENC_UTF8 4
/**
* Check the prism flags of a regular expression-like node and return the flags
* that are expected by the CRuby VM.
*/
static int
-pm_reg_flags(const pm_node_t *node) {
+parse_regexp_flags(const pm_node_t *node)
+{
int flags = 0;
- int dummy = 0;
// Check "no encoding" first so that flags don't get clobbered
// We're calling `rb_char_to_option_kcode` in this case so that
// we don't need to have access to `ARG_ENCODING_NONE`
- if (node->flags & PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT) {
- rb_char_to_option_kcode('n', &flags, &dummy);
+ if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT)) {
+ flags |= ARG_ENCODING_NONE;
}
- if (node->flags & PM_REGULAR_EXPRESSION_FLAGS_EUC_JP) {
- rb_char_to_option_kcode('e', &flags, &dummy);
- flags |= ('e' << RE_OPTION_ENCODING_SHIFT);
+ if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EUC_JP)) {
+ flags |= (ARG_ENCODING_FIXED | RE_OPTION_ENCODING(ENC_EUC_JP));
}
- if (node->flags & PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J) {
- rb_char_to_option_kcode('s', &flags, &dummy);
- flags |= ('s' << RE_OPTION_ENCODING_SHIFT);
+ if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J)) {
+ flags |= (ARG_ENCODING_FIXED | RE_OPTION_ENCODING(ENC_Windows_31J));
}
- if (node->flags & PM_REGULAR_EXPRESSION_FLAGS_UTF_8) {
- rb_char_to_option_kcode('u', &flags, &dummy);
- flags |= ('u' << RE_OPTION_ENCODING_SHIFT);
+ if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_UTF_8)) {
+ flags |= (ARG_ENCODING_FIXED | RE_OPTION_ENCODING(ENC_UTF8));
}
- if (node->flags & PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE) {
+ if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE)) {
flags |= ONIG_OPTION_IGNORECASE;
}
- if (node->flags & PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE) {
+ if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE)) {
flags |= ONIG_OPTION_MULTILINE;
}
- if (node->flags & PM_REGULAR_EXPRESSION_FLAGS_EXTENDED) {
+ if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED)) {
flags |= ONIG_OPTION_EXTEND;
}
return flags;
}
+#undef RE_OPTION_ENCODING_SHIFT
+#undef RE_OPTION_ENCODING
+#undef ARG_ENCODING_FIXED
+#undef ARG_ENCODING_NONE
+#undef ENC_ASCII8BIT
+#undef ENC_EUC_JP
+#undef ENC_Windows_31J
+#undef ENC_UTF8
+
static rb_encoding *
-pm_reg_enc(const pm_scope_node_t *scope_node, const pm_node_t *node)
+parse_regexp_encoding(const pm_scope_node_t *scope_node, const pm_node_t *node)
{
if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT)) {
return rb_ascii8bit_encoding();
}
-
- if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EUC_JP)) {
+ else if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_UTF_8)) {
+ return rb_utf8_encoding();
+ }
+ else if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EUC_JP)) {
return rb_enc_get_from_index(ENCINDEX_EUC_JP);
}
-
- if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J)) {
+ else if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J)) {
return rb_enc_get_from_index(ENCINDEX_Windows_31J);
}
-
- if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_UTF_8)) {
- return rb_utf8_encoding();
+ else {
+ return scope_node->encoding;
}
-
- return scope_node->encoding;
}
-/**
- * Certain nodes can be compiled literally, which can lead to further
- * optimizations. These nodes will all have the PM_NODE_FLAG_STATIC_LITERAL flag
- * set.
- */
-static inline bool
-pm_static_literal_p(const pm_node_t *node)
+/** Raise an error corresponding to the invalid regular expression. */
+static VALUE
+parse_regexp_error(rb_iseq_t *iseq, int32_t line_number, const char *fmt, ...)
{
- return node->flags & PM_NODE_FLAG_STATIC_LITERAL;
+ va_list args;
+ va_start(args, fmt);
+ VALUE error = rb_syntax_error_append(Qnil, rb_iseq_path(iseq), line_number, -1, NULL, "%" PRIsVALUE, args);
+ va_end(args);
+ rb_exc_raise(error);
}
static VALUE
-pm_new_regex(const pm_scope_node_t *scope_node, const pm_node_t *node, const pm_string_t *unescaped)
+parse_regexp(rb_iseq_t *iseq, const pm_scope_node_t *scope_node, const pm_node_t *node, VALUE string)
{
- VALUE regex_str = parse_string(scope_node, unescaped);
- rb_encoding *enc = pm_reg_enc(scope_node, node);
+ VALUE errinfo = rb_errinfo();
- VALUE regex = rb_enc_reg_new(RSTRING_PTR(regex_str), RSTRING_LEN(regex_str), enc, pm_reg_flags(node));
- RB_GC_GUARD(regex_str);
+ int32_t line_number = pm_node_line_number(scope_node->parser, node);
+ VALUE regexp = rb_reg_compile(string, parse_regexp_flags(node), (const char *) pm_string_source(&scope_node->parser->filepath), line_number);
- rb_obj_freeze(regex);
+ if (NIL_P(regexp)) {
+ VALUE message = rb_attr_get(rb_errinfo(), idMesg);
+ rb_set_errinfo(errinfo);
- return regex;
+ parse_regexp_error(iseq, line_number, "%" PRIsVALUE, message);
+ return Qnil;
+ }
+
+ rb_obj_freeze(regexp);
+ return regexp;
}
-static VALUE
-pm_static_literal_concat(const pm_node_list_t *nodes, const pm_scope_node_t *scope_node, bool top)
+static inline VALUE
+parse_regexp_literal(rb_iseq_t *iseq, const pm_scope_node_t *scope_node, const pm_node_t *node, const pm_string_t *unescaped)
{
- VALUE current = Qnil;
-
- for (size_t index = 0; index < nodes->size; index++) {
- const pm_node_t *part = nodes->nodes[index];
- VALUE string;
-
- switch (PM_NODE_TYPE(part)) {
- case PM_STRING_NODE:
- string = parse_string_encoded(scope_node, part, &((const pm_string_node_t *) part)->unescaped);
- break;
- case PM_INTERPOLATED_STRING_NODE:
- string = pm_static_literal_concat(&((const pm_interpolated_string_node_t *) part)->parts, scope_node, false);
- break;
- default:
- RUBY_ASSERT(false && "unexpected node type in pm_static_literal_concat");
- return Qnil;
- }
+ VALUE string = rb_enc_str_new((const char *) pm_string_source(unescaped), pm_string_length(unescaped), parse_regexp_encoding(scope_node, node));
+ return parse_regexp(iseq, scope_node, node, string);
+}
- if (current != Qnil) {
- current = rb_str_concat(current, string);
- }
- else {
- current = string;
- }
- }
+static inline VALUE
+parse_regexp_concat(rb_iseq_t *iseq, const pm_scope_node_t *scope_node, const pm_node_t *node, const pm_node_list_t *parts)
+{
+ VALUE string = pm_static_literal_concat(parts, scope_node, false);
+ rb_enc_associate(string, parse_regexp_encoding(scope_node, node));
+ return parse_regexp(iseq, scope_node, node, string);
+}
- return top ? rb_fstring(current) : current;
+static void
+pm_compile_regexp_dynamic(rb_iseq_t *iseq, const pm_node_t *node, const pm_node_list_t *parts, const pm_line_column_t *node_location, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node)
+{
+ NODE dummy_line_node = generate_dummy_line_node(node_location->line, node_location->column);
+ int length = pm_interpolated_node_compile(parts, iseq, dummy_line_node, ret, popped, scope_node);
+ PUSH_INSN2(ret, *node_location, toregexp, INT2FIX(parse_regexp_flags(node) & 0xFF), INT2FIX(length));
}
/**
@@ -422,11 +534,11 @@ pm_static_literal_concat(const pm_node_list_t *nodes, const pm_scope_node_t *sco
* literal values can be compiled into a literal array.
*/
static VALUE
-pm_static_literal_value(const pm_node_t *node, const pm_scope_node_t *scope_node)
+pm_static_literal_value(rb_iseq_t *iseq, const pm_node_t *node, const pm_scope_node_t *scope_node)
{
// Every node that comes into this function should already be marked as
// static literal. If it's not, then we have a bug somewhere.
- RUBY_ASSERT(pm_static_literal_p(node));
+ RUBY_ASSERT(PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL));
switch (PM_NODE_TYPE(node)) {
case PM_ARRAY_NODE: {
@@ -435,7 +547,7 @@ pm_static_literal_value(const pm_node_t *node, const pm_scope_node_t *scope_node
VALUE value = rb_ary_hidden_new(elements->size);
for (size_t index = 0; index < elements->size; index++) {
- rb_ary_push(value, pm_static_literal_value(elements->nodes[index], scope_node));
+ rb_ary_push(value, pm_static_literal_value(iseq, elements->nodes[index], scope_node));
}
OBJ_FREEZE(value);
@@ -453,7 +565,7 @@ pm_static_literal_value(const pm_node_t *node, const pm_scope_node_t *scope_node
for (size_t index = 0; index < elements->size; index++) {
RUBY_ASSERT(PM_NODE_TYPE_P(elements->nodes[index], PM_ASSOC_NODE));
pm_assoc_node_t *cast = (pm_assoc_node_t *) elements->nodes[index];
- VALUE pair[2] = { pm_static_literal_value(cast->key, scope_node), pm_static_literal_value(cast->value, scope_node) };
+ VALUE pair[2] = { pm_static_literal_value(iseq, cast->key, scope_node), pm_static_literal_value(iseq, cast->value, scope_node) };
rb_ary_cat(array, pair, 2);
}
@@ -470,17 +582,11 @@ pm_static_literal_value(const pm_node_t *node, const pm_scope_node_t *scope_node
return parse_integer((const pm_integer_node_t *) node);
case PM_INTERPOLATED_MATCH_LAST_LINE_NODE: {
const pm_interpolated_match_last_line_node_t *cast = (const pm_interpolated_match_last_line_node_t *) node;
- VALUE string = pm_static_literal_concat(&cast->parts, scope_node, true);
-
- rb_encoding *encoding = pm_reg_enc(scope_node, (const pm_node_t *) cast);
- return rb_obj_freeze(rb_enc_reg_new(RSTRING_PTR(string), RSTRING_LEN(string), encoding, pm_reg_flags((const pm_node_t *) cast)));
+ return parse_regexp_concat(iseq, scope_node, (const pm_node_t *) cast, &cast->parts);
}
case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE: {
const pm_interpolated_regular_expression_node_t *cast = (const pm_interpolated_regular_expression_node_t *) node;
- VALUE string = pm_static_literal_concat(&cast->parts, scope_node, true);
-
- rb_encoding *encoding = pm_reg_enc(scope_node, (const pm_node_t *) cast);
- return rb_obj_freeze(rb_enc_reg_new(RSTRING_PTR(string), RSTRING_LEN(string), encoding, pm_reg_flags((const pm_node_t *) cast)));
+ return parse_regexp_concat(iseq, scope_node, (const pm_node_t *) cast, &cast->parts);
}
case PM_INTERPOLATED_STRING_NODE:
return pm_static_literal_concat(&((const pm_interpolated_string_node_t *) node)->parts, scope_node, true);
@@ -492,7 +598,7 @@ pm_static_literal_value(const pm_node_t *node, const pm_scope_node_t *scope_node
}
case PM_MATCH_LAST_LINE_NODE: {
const pm_match_last_line_node_t *cast = (const pm_match_last_line_node_t *) node;
- return pm_new_regex(scope_node, (const pm_node_t *) cast, &cast->unescaped);
+ return parse_regexp_literal(iseq, scope_node, (const pm_node_t *) cast, &cast->unescaped);
}
case PM_NIL_NODE:
return Qnil;
@@ -500,13 +606,20 @@ pm_static_literal_value(const pm_node_t *node, const pm_scope_node_t *scope_node
return parse_rational((const pm_rational_node_t *) node);
case PM_REGULAR_EXPRESSION_NODE: {
const pm_regular_expression_node_t *cast = (const pm_regular_expression_node_t *) node;
- return pm_new_regex(scope_node, (const pm_node_t *) cast, &cast->unescaped);
+ return parse_regexp_literal(iseq, scope_node, (const pm_node_t *) cast, &cast->unescaped);
}
case PM_SOURCE_ENCODING_NODE:
return rb_enc_from_encoding(scope_node->encoding);
case PM_SOURCE_FILE_NODE: {
- pm_source_file_node_t *cast = (pm_source_file_node_t *)node;
- return cast->filepath.length ? parse_string(scope_node, &cast->filepath) : rb_fstring_lit("<compiled>");
+ const pm_source_file_node_t *cast = (const pm_source_file_node_t *) node;
+ size_t length = pm_string_length(&cast->filepath);
+
+ if (length > 0) {
+ return rb_enc_str_new((const char *) pm_string_source(&cast->filepath), length, scope_node->encoding);
+ }
+ else {
+ return rb_fstring_lit("<compiled>");
+ }
}
case PM_SOURCE_LINE_NODE:
return INT2FIX(pm_node_line_number(scope_node->parser, node));
@@ -601,8 +714,6 @@ pm_compile_logical(rb_iseq_t *iseq, LINK_ANCHOR *const ret, pm_node_t *cond, LAB
return;
}
-static void pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node);
-
static void
pm_compile_flip_flop_bound(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node)
{
@@ -842,76 +953,6 @@ pm_compile_loop(rb_iseq_t *iseq, const pm_line_column_t *line_column, pm_node_fl
return;
}
-static int
-pm_interpolated_node_compile(const pm_node_list_t *parts, rb_iseq_t *iseq, NODE dummy_line_node, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node)
-{
- int number_of_items_pushed = 0;
- size_t parts_size = parts->size;
-
- if (parts_size > 0) {
- VALUE current_string = Qnil;
-
- for (size_t index = 0; index < parts_size; index++) {
- const pm_node_t *part = parts->nodes[index];
-
- if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
- const pm_string_node_t *string_node = (const pm_string_node_t *)part;
- VALUE string_value = parse_string_encoded(scope_node, (pm_node_t *)string_node, &string_node->unescaped);
-
- if (RTEST(current_string)) {
- current_string = rb_str_concat(current_string, string_value);
- }
- else {
- current_string = string_value;
- }
- }
- else if (PM_NODE_TYPE_P(part, PM_EMBEDDED_STATEMENTS_NODE) &&
- ((const pm_embedded_statements_node_t *) part)->statements != NULL &&
- ((const pm_embedded_statements_node_t *) part)->statements->body.size == 1 &&
- PM_NODE_TYPE_P(((const pm_embedded_statements_node_t *) part)->statements->body.nodes[0], PM_STRING_NODE)) {
- const pm_string_node_t *string_node = (const pm_string_node_t *) ((const pm_embedded_statements_node_t *) part)->statements->body.nodes[0];
- VALUE string_value = parse_string_encoded(scope_node, (pm_node_t *)string_node, &string_node->unescaped);
-
- if (RTEST(current_string)) {
- current_string = rb_str_concat(current_string, string_value);
- }
- else {
- current_string = string_value;
- }
- }
- else {
- if (!RTEST(current_string)) {
- current_string = rb_enc_str_new(NULL, 0, scope_node->encoding);
- }
-
- ADD_INSN1(ret, &dummy_line_node, putobject, rb_fstring(current_string));
-
- current_string = Qnil;
- number_of_items_pushed++;
-
- PM_COMPILE_NOT_POPPED(part);
- PM_DUP;
- ADD_INSN1(ret, &dummy_line_node, objtostring, new_callinfo(iseq, idTo_s, 0, VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE , NULL, FALSE));
- ADD_INSN(ret, &dummy_line_node, anytostring);
-
- number_of_items_pushed++;
- }
- }
-
- if (RTEST(current_string)) {
- current_string = rb_fstring(current_string);
- ADD_INSN1(ret, &dummy_line_node, putobject, current_string);
- current_string = Qnil;
- number_of_items_pushed++;
- }
- }
- else {
- PM_PUTNIL;
- }
-
- return number_of_items_pushed;
-}
-
// This recurses through scopes and finds the local index at any scope level
// It also takes a pointer to depth, and increments depth appropriately
// according to the depth of the local.
@@ -1201,7 +1242,7 @@ pm_setup_args_core(const pm_arguments_node_t *arguments_node, const pm_node_t *b
// Retrieve the stored index from the hash for this
// keyword.
- VALUE keyword = pm_static_literal_value(assoc->key, scope_node);
+ VALUE keyword = pm_static_literal_value(iseq, assoc->key, scope_node);
VALUE stored_index = rb_hash_aref(stored_indices, keyword);
// If this keyword was already seen in the hash,
@@ -1233,7 +1274,7 @@ pm_setup_args_core(const pm_arguments_node_t *arguments_node, const pm_node_t *b
bool popped = true;
if (rb_ary_entry(keyword_indices, (long) element_index) == Qtrue) {
- keywords[keyword_index++] = pm_static_literal_value(assoc->key, scope_node);
+ keywords[keyword_index++] = pm_static_literal_value(iseq, assoc->key, scope_node);
popped = false;
}
@@ -4232,13 +4273,13 @@ pm_compile_constant_path(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *co
* optimization entirely.
*/
static VALUE
-pm_compile_case_node_dispatch(VALUE dispatch, const pm_node_t *node, LABEL *label, const pm_scope_node_t *scope_node)
+pm_compile_case_node_dispatch(rb_iseq_t *iseq, VALUE dispatch, const pm_node_t *node, LABEL *label, const pm_scope_node_t *scope_node)
{
VALUE key = Qundef;
switch (PM_NODE_TYPE(node)) {
case PM_FLOAT_NODE: {
- key = pm_static_literal_value(node, scope_node);
+ key = pm_static_literal_value(iseq, node, scope_node);
double intptr;
if (modf(RFLOAT_VALUE(key), &intptr) == 0.0) {
@@ -4254,7 +4295,7 @@ pm_compile_case_node_dispatch(VALUE dispatch, const pm_node_t *node, LABEL *labe
case PM_SOURCE_LINE_NODE:
case PM_SYMBOL_NODE:
case PM_TRUE_NODE:
- key = pm_static_literal_value(node, scope_node);
+ key = pm_static_literal_value(iseq, node, scope_node);
break;
case PM_STRING_NODE: {
const pm_string_node_t *cast = (const pm_string_node_t *) node;
@@ -4379,13 +4420,13 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret,
// If every node in the array is static, then we can compile the entire
// array now instead of later.
- if (pm_static_literal_p(node)) {
+ if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
// We're only going to compile this node if it's not popped. If it
// is popped, then we know we don't need to do anything since it's
// statically known.
if (!popped) {
if (elements->size) {
- VALUE value = pm_static_literal_value(node, scope_node);
+ VALUE value = pm_static_literal_value(iseq, node, scope_node);
PUSH_INSN1(ret, location, duparray, value);
}
else {
@@ -4868,7 +4909,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret,
// we're going to try to compile the condition into the
// dispatch hash.
if (dispatch != Qundef) {
- dispatch = pm_compile_case_node_dispatch(dispatch, condition, label, scope_node);
+ dispatch = pm_compile_case_node_dispatch(iseq, dispatch, condition, label, scope_node);
}
if (PM_NODE_TYPE_P(condition, PM_SPLAT_NODE)) {
@@ -5886,12 +5927,12 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret,
case PM_HASH_NODE: {
// If every node in the hash is static, then we can compile the entire
// hash now instead of later.
- if (pm_static_literal_p(node)) {
+ if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
// We're only going to compile this node if it's not popped. If it
// is popped, then we know we don't need to do anything since it's
// statically known.
if (!popped) {
- VALUE value = pm_static_literal_value(node, scope_node);
+ VALUE value = pm_static_literal_value(iseq, node, scope_node);
ADD_INSN1(ret, &dummy_line_node, duphash, value);
RB_OBJ_WRITTEN(iseq, Qundef, value);
}
@@ -6072,14 +6113,12 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret,
// ^^^^^^^^^^^^
if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
if (!popped) {
- VALUE regexp = pm_static_literal_value(node, scope_node);
+ VALUE regexp = pm_static_literal_value(iseq, node, scope_node);
PUSH_INSN1(ret, location, putobject, regexp);
}
}
else {
- const pm_interpolated_match_last_line_node_t *cast = (const pm_interpolated_match_last_line_node_t *) node;
- int length = pm_interpolated_node_compile(&cast->parts, iseq, dummy_line_node, ret, popped, scope_node);
- PUSH_INSN2(ret, location, toregexp, INT2FIX(pm_reg_flags((const pm_node_t *) cast)), INT2FIX(length));
+ pm_compile_regexp_dynamic(iseq, node, &((const pm_interpolated_match_last_line_node_t *) node)->parts, &location, ret, popped, scope_node);
}
PUSH_INSN1(ret, location, getglobal, rb_id2sym(idLASTLINE));
@@ -6106,20 +6145,18 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret,
PUSH_INSN2(ret, location, once, block_iseq, INT2FIX(ise_index));
ISEQ_COMPILE_DATA(iseq)->current_block = prevblock;
+ if (popped) PUSH_INSN(ret, location, pop);
return;
}
if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
if (!popped) {
- VALUE regexp = pm_static_literal_value(node, scope_node);
+ VALUE regexp = pm_static_literal_value(iseq, node, scope_node);
PUSH_INSN1(ret, location, putobject, regexp);
}
}
else {
- const pm_interpolated_regular_expression_node_t *cast = (const pm_interpolated_regular_expression_node_t *) node;
- int length = pm_interpolated_node_compile(&cast->parts, iseq, dummy_line_node, ret, popped, scope_node);
-
- PUSH_INSN2(ret, location, toregexp, INT2FIX(pm_reg_flags((const pm_node_t *) cast)), INT2FIX(length));
+ pm_compile_regexp_dynamic(iseq, node, &((const pm_interpolated_regular_expression_node_t *) node)->parts, &location, ret, popped, scope_node);
if (popped) PUSH_INSN(ret, location, pop);
}
@@ -6130,7 +6167,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret,
// ^^^^^^^^^^^^
if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
if (!popped) {
- VALUE string = pm_static_literal_value(node, scope_node);
+ VALUE string = pm_static_literal_value(iseq, node, scope_node);
if (PM_NODE_FLAG_P(node, PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN)) {
PUSH_INSN1(ret, location, putobject, string);
@@ -6161,7 +6198,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret,
if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
if (!popped) {
- VALUE symbol = pm_static_literal_value(node, scope_node);
+ VALUE symbol = pm_static_literal_value(iseq, node, scope_node);
PUSH_INSN1(ret, location, putobject, symbol);
}
}
@@ -6323,7 +6360,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret,
case PM_MATCH_LAST_LINE_NODE: {
// if /foo/ then end
// ^^^^^
- VALUE regexp = pm_static_literal_value(node, scope_node);
+ VALUE regexp = pm_static_literal_value(iseq, node, scope_node);
PUSH_INSN1(ret, location, putobject, regexp);
PUSH_INSN2(ret, location, getspecial, INT2FIX(0), INT2FIX(0));
@@ -6895,8 +6932,8 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret,
// /foo/
// ^^^^^
if (!popped) {
- VALUE regex = pm_static_literal_value(node, scope_node);
- PUSH_INSN1(ret, location, putobject, regex);
+ VALUE regexp = pm_static_literal_value(iseq, node, scope_node);
+ PUSH_INSN1(ret, location, putobject, regexp);
}
return;
}
@@ -7500,12 +7537,8 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret,
pm_node_t *value = cast->value;
name = cast->name;
- if (pm_static_literal_p(value) &&
- !(PM_NODE_TYPE_P(value, PM_ARRAY_NODE) ||
- PM_NODE_TYPE_P(value, PM_HASH_NODE) ||
- PM_NODE_TYPE_P(value, PM_RANGE_NODE))) {
-
- rb_ary_push(default_values, pm_static_literal_value(value, scope_node));
+ if (PM_NODE_FLAG_P(value, PM_NODE_FLAG_STATIC_LITERAL) && !(PM_NODE_TYPE_P(value, PM_ARRAY_NODE) || PM_NODE_TYPE_P(value, PM_HASH_NODE) || PM_NODE_TYPE_P(value, PM_RANGE_NODE))) {
+ rb_ary_push(default_values, pm_static_literal_value(iseq, value, scope_node));
}
else {
rb_ary_push(default_values, complex_mark);
@@ -7814,10 +7847,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret,
pm_node_t *value = cast->value;
name = cast->name;
- if (!(pm_static_literal_p(value)) ||
- PM_NODE_TYPE_P(value, PM_ARRAY_NODE) ||
- PM_NODE_TYPE_P(value, PM_HASH_NODE) ||
- PM_NODE_TYPE_P(value, PM_RANGE_NODE)) {
+ if (!PM_NODE_FLAG_P(value, PM_NODE_FLAG_STATIC_LITERAL) || PM_NODE_TYPE_P(value, PM_ARRAY_NODE) || PM_NODE_TYPE_P(value, PM_HASH_NODE) || PM_NODE_TYPE_P(value, PM_RANGE_NODE)) {
LABEL *end_label = NEW_LABEL(nd_line(&dummy_line_node));
pm_local_index_t index = pm_lookup_local_index(iseq, scope_node, name, 0);
@@ -7910,11 +7940,8 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret,
break;
}
case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE: {
- pm_interpolated_regular_expression_node_t *cast = (pm_interpolated_regular_expression_node_t *) scope_node->ast_node;
-
- int parts_size = pm_interpolated_node_compile(&cast->parts, iseq, dummy_line_node, ret, popped, scope_node);
-
- ADD_INSN2(ret, &dummy_line_node, toregexp, INT2FIX(pm_reg_flags((pm_node_t *)cast)), INT2FIX(parts_size));
+ const pm_interpolated_regular_expression_node_t *cast = (const pm_interpolated_regular_expression_node_t *) scope_node->ast_node;
+ pm_compile_regexp_dynamic(iseq, (const pm_node_t *) cast, &cast->parts, &location, ret, popped, scope_node);
break;
}
default:
@@ -8037,7 +8064,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret,
// __ENCODING__
// ^^^^^^^^^^^^
if (!popped) {
- VALUE value = pm_static_literal_value(node, scope_node);
+ VALUE value = pm_static_literal_value(iseq, node, scope_node);
PUSH_INSN1(ret, location, putobject, value);
}
return;
@@ -8065,7 +8092,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret,
// __LINE__
// ^^^^^^^^
if (!popped) {
- VALUE value = pm_static_literal_value(node, scope_node);
+ VALUE value = pm_static_literal_value(iseq, node, scope_node);
PUSH_INSN1(ret, location, putobject, value);
}
return;
@@ -8156,7 +8183,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret,
// :foo
// ^^^^
if (!popped) {
- VALUE value = pm_static_literal_value(node, scope_node);
+ VALUE value = pm_static_literal_value(iseq, node, scope_node);
PUSH_INSN1(ret, location, putobject, value);
}
return;
diff --git a/test/.excludes-prism/TestRegexp.rb b/test/.excludes-prism/TestRegexp.rb
new file mode 100644
index 0000000000..f2b817d79a
--- /dev/null
+++ b/test/.excludes-prism/TestRegexp.rb
@@ -0,0 +1,7 @@
+exclude(:test_unicode_age_14_0, "unknown")
+exclude(:test_invalid_fragment, "unknown")
+exclude(:test_assign_named_capture_to_reserved_word, "unknown")
+exclude(:test_unicode_age_15_0, "unknown")
+exclude(:test_unescape, "unknown")
+exclude(:test_invalid_escape_error, "unknown")
+exclude(:test_unicode_age, "unknown")
diff --git a/test/.excludes-prism/TestUnicodeEscape.rb b/test/.excludes-prism/TestUnicodeEscape.rb
index 93ed9fcb45..add4911bc2 100644
--- a/test/.excludes-prism/TestUnicodeEscape.rb
+++ b/test/.excludes-prism/TestUnicodeEscape.rb
@@ -1,2 +1 @@
exclude(:test_fail, "unknown")
-exclude(:test_regexp, "unknown")