summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYusuke Endoh <[email protected]>2021-06-17 23:43:08 +0900
committerYusuke Endoh <[email protected]>2021-06-18 02:34:27 +0900
commitacae5f363dfaedd9c2873cee68c9498da3c072f5 (patch)
tree8919487eefdc8610b2914366abe637ac34812331
parentc639b58823cd8cc62853acf00a49b67ac359ea73 (diff)
ast.rb: RubyVM::AST.parse and .of accepts `save_script_lines: true`
This option makes the parser keep the original source as an array of the original code lines. This feature exploits the mechanism of `SCRIPT_LINES__` but records only the specified code that is passed to RubyVM::AST.of or .parse, instead of recording all parsed program texts.
Notes
Notes: Merged: https://github.com/ruby/ruby/pull/4581
-rw-r--r--ast.c48
-rw-r--r--ast.rb48
-rw-r--r--compile.c1
-rw-r--r--internal/parse.h1
-rw-r--r--node.c1
-rw-r--r--node.h1
-rw-r--r--parse.y17
-rw-r--r--test/ruby/test_ast.rb50
8 files changed, 145 insertions, 22 deletions
diff --git a/ast.c b/ast.c
index b7416ad203..3fec251bcc 100644
--- a/ast.c
+++ b/ast.c
@@ -64,8 +64,8 @@ ast_new_internal(rb_ast_t *ast, const NODE *node)
return obj;
}
-static VALUE rb_ast_parse_str(VALUE str);
-static VALUE rb_ast_parse_file(VALUE path);
+static VALUE rb_ast_parse_str(VALUE str, VALUE save_script_lines);
+static VALUE rb_ast_parse_file(VALUE path, VALUE save_script_lines);
static VALUE
ast_parse_new(void)
@@ -85,29 +85,31 @@ ast_parse_done(rb_ast_t *ast)
}
static VALUE
-ast_s_parse(rb_execution_context_t *ec, VALUE module, VALUE str)
+ast_s_parse(rb_execution_context_t *ec, VALUE module, VALUE str, VALUE save_script_lines)
{
- return rb_ast_parse_str(str);
+ return rb_ast_parse_str(str, save_script_lines);
}
static VALUE
-rb_ast_parse_str(VALUE str)
+rb_ast_parse_str(VALUE str, VALUE save_script_lines)
{
rb_ast_t *ast = 0;
StringValue(str);
- ast = rb_parser_compile_string_path(ast_parse_new(), Qnil, str, 1);
+ VALUE vparser = ast_parse_new();
+ if (RTEST(save_script_lines)) rb_parser_save_script_lines(vparser);
+ ast = rb_parser_compile_string_path(vparser, Qnil, str, 1);
return ast_parse_done(ast);
}
static VALUE
-ast_s_parse_file(rb_execution_context_t *ec, VALUE module, VALUE path)
+ast_s_parse_file(rb_execution_context_t *ec, VALUE module, VALUE path, VALUE save_script_lines)
{
- return rb_ast_parse_file(path);
+ return rb_ast_parse_file(path, save_script_lines);
}
static VALUE
-rb_ast_parse_file(VALUE path)
+rb_ast_parse_file(VALUE path, VALUE save_script_lines)
{
VALUE f;
rb_ast_t *ast = 0;
@@ -116,7 +118,9 @@ rb_ast_parse_file(VALUE path)
FilePathValue(path);
f = rb_file_open_str(path, "r");
rb_funcall(f, rb_intern("set_encoding"), 2, rb_enc_from_encoding(enc), rb_str_new_cstr("-"));
- ast = rb_parser_compile_file_path(ast_parse_new(), Qnil, f, 1);
+ VALUE vparser = ast_parse_new();
+ if (RTEST(save_script_lines)) rb_parser_save_script_lines(vparser);
+ ast = rb_parser_compile_file_path(vparser, Qnil, f, 1);
rb_io_close(f);
return ast_parse_done(ast);
}
@@ -135,12 +139,14 @@ lex_array(VALUE array, int index)
}
static VALUE
-rb_ast_parse_array(VALUE array)
+rb_ast_parse_array(VALUE array, VALUE save_script_lines)
{
rb_ast_t *ast = 0;
array = rb_check_array_type(array);
- ast = rb_parser_compile_generic(ast_parse_new(), lex_array, Qnil, array, 1);
+ VALUE vparser = ast_parse_new();
+ if (RTEST(save_script_lines)) rb_parser_save_script_lines(vparser);
+ ast = rb_parser_compile_generic(vparser, lex_array, Qnil, array, 1);
return ast_parse_done(ast);
}
@@ -187,7 +193,7 @@ script_lines(VALUE path)
}
static VALUE
-ast_s_of(rb_execution_context_t *ec, VALUE module, VALUE body)
+ast_s_of(rb_execution_context_t *ec, VALUE module, VALUE body, VALUE save_script_lines)
{
VALUE path, node, lines;
int node_id;
@@ -209,13 +215,13 @@ ast_s_of(rb_execution_context_t *ec, VALUE module, VALUE body)
path = rb_iseq_path(iseq);
node_id = iseq->body->location.node_id;
if (!NIL_P(lines = script_lines(path))) {
- node = rb_ast_parse_array(lines);
+ node = rb_ast_parse_array(lines, save_script_lines);
}
else if (RSTRING_LEN(path) == 2 && memcmp(RSTRING_PTR(path), "-e", 2) == 0) {
- node = rb_ast_parse_str(rb_e_script);
+ node = rb_ast_parse_str(rb_e_script, save_script_lines);
}
else {
- node = rb_ast_parse_file(path);
+ node = rb_ast_parse_file(path, save_script_lines);
}
return node_find(node, node_id);
@@ -698,6 +704,16 @@ ast_node_inspect(rb_execution_context_t *ec, VALUE self)
return str;
}
+static VALUE
+ast_node_script_lines(rb_execution_context_t *ec, VALUE self)
+{
+ struct ASTNodeData *data;
+ TypedData_Get_Struct(self, struct ASTNodeData, &rb_node_type, data);
+ VALUE ret = data->ast->body.script_lines;
+ if (!ret) ret = Qnil;
+ return ret;
+}
+
#include "ast.rbinc"
void
diff --git a/ast.rb b/ast.rb
index 9d4b05bdf1..ce99f53c45 100644
--- a/ast.rb
+++ b/ast.rb
@@ -29,8 +29,8 @@ module RubyVM::AbstractSyntaxTree
#
# RubyVM::AbstractSyntaxTree.parse("x = 1 + 2")
# # => #<RubyVM::AbstractSyntaxTree::Node:SCOPE@1:0-1:9>
- def self.parse string
- Primitive.ast_s_parse string
+ def self.parse string, save_script_lines: false
+ Primitive.ast_s_parse string, save_script_lines
end
# call-seq:
@@ -44,8 +44,8 @@ module RubyVM::AbstractSyntaxTree
#
# RubyVM::AbstractSyntaxTree.parse_file("my-app/app.rb")
# # => #<RubyVM::AbstractSyntaxTree::Node:SCOPE@1:0-31:3>
- def self.parse_file pathname
- Primitive.ast_s_parse_file pathname
+ def self.parse_file pathname, save_script_lines: false
+ Primitive.ast_s_parse_file pathname, save_script_lines
end
# call-seq:
@@ -63,8 +63,8 @@ module RubyVM::AbstractSyntaxTree
#
# RubyVM::AbstractSyntaxTree.of(method(:hello))
# # => #<RubyVM::AbstractSyntaxTree::Node:SCOPE@1:0-3:3>
- def self.of body
- Primitive.ast_s_of body
+ def self.of body, save_script_lines: false
+ Primitive.ast_s_of body, save_script_lines
end
# RubyVM::AbstractSyntaxTree::Node instances are created by parse methods in
@@ -139,5 +139,41 @@ module RubyVM::AbstractSyntaxTree
def inspect
Primitive.ast_node_inspect
end
+
+ # call-seq:
+ # node.script_lines -> array
+ #
+ # Returns the original source code as an array of lines.
+ #
+ # Note that this is an API for ruby internal use, debugging,
+ # and research. Do not use this for any other purpose.
+ # The compatibility is not guaranteed.
+ def script_lines
+ Primitive.ast_node_script_lines
+ end
+
+ # call-seq:
+ # node.source -> string
+ #
+ # Returns the code fragment that corresponds to this AST.
+ #
+ # Note that this is an API for ruby internal use, debugging,
+ # and research. Do not use this for any other purpose.
+ # The compatibility is not guaranteed.
+ #
+ # Also note that this API may return an incomplete code fragment
+ # that does not parse; for example, a here document following
+ # an expression may be dropped.
+ def source
+ lines = script_lines
+ if lines
+ lines = lines[first_lineno - 1 .. last_lineno - 1]
+ lines[-1] = lines[-1][0...last_column]
+ lines[0] = lines[0][first_column..-1]
+ lines.join
+ else
+ nil
+ end
+ end
end
end
diff --git a/compile.c b/compile.c
index c58a6c204a..30c75119b2 100644
--- a/compile.c
+++ b/compile.c
@@ -1329,6 +1329,7 @@ new_child_iseq(rb_iseq_t *iseq, const NODE *const node,
ast.root = node;
ast.compile_option = 0;
ast.line_count = -1;
+ ast.script_lines = Qfalse;
debugs("[new_child_iseq]> ---------------------------------------\n");
int isolated_depth = ISEQ_COMPILE_DATA(iseq)->isolated_depth;
diff --git a/internal/parse.h b/internal/parse.h
index a37a39f84d..588b2b34da 100644
--- a/internal/parse.h
+++ b/internal/parse.h
@@ -15,6 +15,7 @@ struct rb_iseq_struct; /* in vm_core.h */
/* parse.y */
VALUE rb_parser_set_yydebug(VALUE, VALUE);
void *rb_parser_load_file(VALUE parser, VALUE name);
+void rb_parser_save_script_lines(VALUE vparser);
RUBY_SYMBOL_EXPORT_BEGIN
VALUE rb_parser_set_context(VALUE, const struct rb_iseq_struct *, int);
diff --git a/node.c b/node.c
index bef9d7bcbd..f3dbf6e959 100644
--- a/node.c
+++ b/node.c
@@ -1407,6 +1407,7 @@ rb_ast_mark(rb_ast_t *ast)
iterate_node_values(&nb->markable, mark_ast_value, NULL);
}
+ if (ast->body.script_lines) rb_gc_mark(ast->body.script_lines);
}
void
diff --git a/node.h b/node.h
index 192e121fd7..592b285b83 100644
--- a/node.h
+++ b/node.h
@@ -399,6 +399,7 @@ typedef struct rb_ast_body_struct {
const NODE *root;
VALUE compile_option;
int line_count;
+ VALUE script_lines;
} rb_ast_body_t;
typedef struct rb_ast_struct {
VALUE flags;
diff --git a/parse.y b/parse.y
index 6b42b6b31b..47b63e810d 100644
--- a/parse.y
+++ b/parse.y
@@ -337,6 +337,7 @@ struct parser_params {
unsigned int do_loop: 1;
unsigned int do_chomp: 1;
unsigned int do_split: 1;
+ unsigned int save_script_lines: 1;
NODE *eval_tree_begin;
NODE *eval_tree;
@@ -6241,6 +6242,13 @@ yycompile0(VALUE arg)
cov = Qtrue;
}
}
+ if (p->save_script_lines) {
+ if (!p->debug_lines) {
+ p->debug_lines = rb_ary_new();
+ }
+
+ RB_OBJ_WRITE(p->ast, &p->ast->body.script_lines, p->debug_lines);
+ }
parser_prepare(p);
#define RUBY_DTRACE_PARSE_HOOK(name) \
@@ -13186,6 +13194,15 @@ rb_parser_set_context(VALUE vparser, const struct rb_iseq_struct *base, int main
p->parent_iseq = base;
return vparser;
}
+
+void
+rb_parser_save_script_lines(VALUE vparser)
+{
+ struct parser_params *p;
+
+ TypedData_Get_Struct(vparser, struct parser_params, &parser_data_type, p);
+ p->save_script_lines = 1;
+}
#endif
#ifdef RIPPER
diff --git a/test/ruby/test_ast.rb b/test/ruby/test_ast.rb
index b039911f3a..5a229eabd4 100644
--- a/test/ruby/test_ast.rb
+++ b/test/ruby/test_ast.rb
@@ -372,4 +372,54 @@ class TestAst < Test::Unit::TestCase
_, args = *node.children.last.children[1].children
assert_equal(:a, args.children[rest])
end
+
+ def test_save_script_lines_for_parse
+ node = RubyVM::AbstractSyntaxTree.parse(<<~END, save_script_lines: true)
+1.times do
+ 2.times do
+ end
+end
+__END__
+dummy
+ END
+
+ expected = [
+ "1.times do\n",
+ " 2.times do\n",
+ " end\n",
+ "end\n",
+ "__END__\n",
+ ]
+ assert_equal(expected, node.script_lines)
+
+ expected =
+ "1.times do\n" +
+ " 2.times do\n" +
+ " end\n" +
+ "end"
+ assert_equal(expected, node.source)
+
+ expected =
+ "do\n" +
+ " 2.times do\n" +
+ " end\n" +
+ "end"
+ assert_equal(expected, node.children.last.children.last.source)
+
+ expected =
+ "2.times do\n" +
+ " end"
+ assert_equal(expected, node.children.last.children.last.children.last.source)
+ end
+
+ def test_save_script_lines_for_of
+ proc = Proc.new { 1 + 2 }
+ method = self.method(__method__)
+
+ node_proc = RubyVM::AbstractSyntaxTree.of(proc, save_script_lines: true)
+ node_method = RubyVM::AbstractSyntaxTree.of(method, save_script_lines: true)
+
+ assert_equal("{ 1 + 2 }", node_proc.source)
+ assert_equal("def test_save_script_lines_for_of\n", node_method.source.lines.first)
+ end
end