[ruby/psych] Add support for ruby 3.2 Data objects
[ruby.git] / ast.c
blobb98fba6fabcb1fdfb8ffcde1d3066bd3a17a194f
1 /* indent-tabs-mode: nil */
2 #include "internal.h"
3 #include "internal/ruby_parser.h"
4 #include "internal/symbol.h"
5 #include "internal/warnings.h"
6 #include "iseq.h"
7 #include "node.h"
8 #include "ruby.h"
9 #include "ruby/encoding.h"
10 #include "ruby/util.h"
11 #include "vm_core.h"
13 #include "builtin.h"
15 static VALUE rb_mAST;
16 static VALUE rb_cNode;
17 static VALUE rb_cLocation;
19 struct ASTNodeData {
20 VALUE ast_value;
21 const NODE *node;
24 static void
25 node_gc_mark(void *ptr)
27 struct ASTNodeData *data = (struct ASTNodeData *)ptr;
28 rb_gc_mark(data->ast_value);
31 static size_t
32 node_memsize(const void *ptr)
34 struct ASTNodeData *data = (struct ASTNodeData *)ptr;
35 rb_ast_t *ast = rb_ruby_ast_data_get(data->ast_value);
37 return sizeof(struct ASTNodeData) + rb_ast_memsize(ast);
40 static const rb_data_type_t rb_node_type = {
41 "AST/node",
42 {node_gc_mark, RUBY_TYPED_DEFAULT_FREE, node_memsize,},
43 0, 0,
44 RUBY_TYPED_FREE_IMMEDIATELY,
47 struct ASTLocationData {
48 int first_lineno;
49 int first_column;
50 int last_lineno;
51 int last_column;
54 static void
55 location_gc_mark(void *ptr)
59 static size_t
60 location_memsize(const void *ptr)
62 return sizeof(struct ASTLocationData);
65 static const rb_data_type_t rb_location_type = {
66 "AST/location",
67 {location_gc_mark, RUBY_TYPED_DEFAULT_FREE, location_memsize,},
68 0, 0,
69 RUBY_TYPED_FREE_IMMEDIATELY,
73 static VALUE rb_ast_node_alloc(VALUE klass);
75 static void
76 setup_node(VALUE obj, VALUE ast_value, const NODE *node)
78 struct ASTNodeData *data;
80 TypedData_Get_Struct(obj, struct ASTNodeData, &rb_node_type, data);
81 data->ast_value = ast_value;
82 data->node = node;
85 static VALUE
86 ast_new_internal(VALUE ast_value, const NODE *node)
88 VALUE obj;
90 obj = rb_ast_node_alloc(rb_cNode);
91 setup_node(obj, ast_value, node);
93 return obj;
96 static VALUE rb_ast_parse_str(VALUE str, VALUE keep_script_lines, VALUE error_tolerant, VALUE keep_tokens);
97 static VALUE rb_ast_parse_file(VALUE path, VALUE keep_script_lines, VALUE error_tolerant, VALUE keep_tokens);
99 static VALUE
100 ast_parse_new(void)
102 return rb_parser_set_context(rb_parser_new(), NULL, 0);
105 static VALUE
106 ast_parse_done(VALUE ast_value)
108 rb_ast_t *ast = rb_ruby_ast_data_get(ast_value);
110 if (!ast->body.root) {
111 rb_ast_dispose(ast);
112 rb_exc_raise(GET_EC()->errinfo);
115 return ast_new_internal(ast_value, (NODE *)ast->body.root);
118 static VALUE
119 setup_vparser(VALUE keep_script_lines, VALUE error_tolerant, VALUE keep_tokens)
121 VALUE vparser = ast_parse_new();
122 if (RTEST(keep_script_lines)) rb_parser_set_script_lines(vparser);
123 if (RTEST(error_tolerant)) rb_parser_error_tolerant(vparser);
124 if (RTEST(keep_tokens)) rb_parser_keep_tokens(vparser);
125 return vparser;
128 static VALUE
129 ast_s_parse(rb_execution_context_t *ec, VALUE module, VALUE str, VALUE keep_script_lines, VALUE error_tolerant, VALUE keep_tokens)
131 return rb_ast_parse_str(str, keep_script_lines, error_tolerant, keep_tokens);
134 static VALUE
135 rb_ast_parse_str(VALUE str, VALUE keep_script_lines, VALUE error_tolerant, VALUE keep_tokens)
137 VALUE ast_value = Qnil;
138 StringValue(str);
139 VALUE vparser = setup_vparser(keep_script_lines, error_tolerant, keep_tokens);
140 ast_value = rb_parser_compile_string_path(vparser, Qnil, str, 1);
141 return ast_parse_done(ast_value);
144 static VALUE
145 ast_s_parse_file(rb_execution_context_t *ec, VALUE module, VALUE path, VALUE keep_script_lines, VALUE error_tolerant, VALUE keep_tokens)
147 return rb_ast_parse_file(path, keep_script_lines, error_tolerant, keep_tokens);
150 static VALUE
151 rb_ast_parse_file(VALUE path, VALUE keep_script_lines, VALUE error_tolerant, VALUE keep_tokens)
153 VALUE f;
154 VALUE ast_value = Qnil;
155 rb_encoding *enc = rb_utf8_encoding();
157 f = rb_file_open_str(path, "r");
158 rb_funcall(f, rb_intern("set_encoding"), 2, rb_enc_from_encoding(enc), rb_str_new_cstr("-"));
159 VALUE vparser = setup_vparser(keep_script_lines, error_tolerant, keep_tokens);
160 ast_value = rb_parser_compile_file_path(vparser, Qnil, f, 1);
161 rb_io_close(f);
162 return ast_parse_done(ast_value);
165 static VALUE
166 rb_ast_parse_array(VALUE array, VALUE keep_script_lines, VALUE error_tolerant, VALUE keep_tokens)
168 VALUE ast_value = Qnil;
170 array = rb_check_array_type(array);
171 VALUE vparser = setup_vparser(keep_script_lines, error_tolerant, keep_tokens);
172 ast_value = rb_parser_compile_array(vparser, Qnil, array, 1);
173 return ast_parse_done(ast_value);
176 static VALUE node_children(VALUE, const NODE*);
178 static VALUE
179 node_find(VALUE self, const int node_id)
181 VALUE ary;
182 long i;
183 struct ASTNodeData *data;
184 TypedData_Get_Struct(self, struct ASTNodeData, &rb_node_type, data);
186 if (nd_node_id(data->node) == node_id) return self;
188 ary = node_children(data->ast_value, data->node);
190 for (i = 0; i < RARRAY_LEN(ary); i++) {
191 VALUE child = RARRAY_AREF(ary, i);
193 if (CLASS_OF(child) == rb_cNode) {
194 VALUE result = node_find(child, node_id);
195 if (RTEST(result)) return result;
199 return Qnil;
202 extern VALUE rb_e_script;
204 static VALUE
205 node_id_for_backtrace_location(rb_execution_context_t *ec, VALUE module, VALUE location)
207 int node_id;
209 if (!rb_frame_info_p(location)) {
210 rb_raise(rb_eTypeError, "Thread::Backtrace::Location object expected");
213 node_id = rb_get_node_id_from_frame_info(location);
214 if (node_id == -1) {
215 return Qnil;
218 return INT2NUM(node_id);
221 static VALUE
222 ast_s_of(rb_execution_context_t *ec, VALUE module, VALUE body, VALUE keep_script_lines, VALUE error_tolerant, VALUE keep_tokens)
224 VALUE node, lines = Qnil;
225 const rb_iseq_t *iseq;
226 int node_id;
228 if (rb_frame_info_p(body)) {
229 iseq = rb_get_iseq_from_frame_info(body);
230 node_id = rb_get_node_id_from_frame_info(body);
232 else {
233 iseq = NULL;
235 if (rb_obj_is_proc(body)) {
236 iseq = vm_proc_iseq(body);
238 if (!rb_obj_is_iseq((VALUE)iseq)) return Qnil;
240 else {
241 iseq = rb_method_iseq(body);
243 if (iseq) {
244 node_id = ISEQ_BODY(iseq)->location.node_id;
248 if (!iseq) {
249 return Qnil;
252 if (ISEQ_BODY(iseq)->prism) {
253 rb_raise(rb_eRuntimeError, "cannot get AST for ISEQ compiled by prism");
256 lines = ISEQ_BODY(iseq)->variable.script_lines;
258 VALUE path = rb_iseq_path(iseq);
259 int e_option = RSTRING_LEN(path) == 2 && memcmp(RSTRING_PTR(path), "-e", 2) == 0;
261 if (NIL_P(lines) && rb_iseq_from_eval_p(iseq) && !e_option) {
262 rb_raise(rb_eArgError, "cannot get AST for method defined in eval");
265 if (!NIL_P(lines)) {
266 node = rb_ast_parse_array(lines, keep_script_lines, error_tolerant, keep_tokens);
268 else if (e_option) {
269 node = rb_ast_parse_str(rb_e_script, keep_script_lines, error_tolerant, keep_tokens);
271 else {
272 node = rb_ast_parse_file(path, keep_script_lines, error_tolerant, keep_tokens);
275 return node_find(node, node_id);
278 static VALUE
279 rb_ast_node_alloc(VALUE klass)
281 struct ASTNodeData *data;
282 VALUE obj = TypedData_Make_Struct(klass, struct ASTNodeData, &rb_node_type, data);
284 return obj;
287 static const char*
288 node_type_to_str(const NODE *node)
290 return (ruby_node_name(nd_type(node)) + rb_strlen_lit("NODE_"));
293 static VALUE
294 ast_node_type(rb_execution_context_t *ec, VALUE self)
296 struct ASTNodeData *data;
297 TypedData_Get_Struct(self, struct ASTNodeData, &rb_node_type, data);
299 return rb_sym_intern_ascii_cstr(node_type_to_str(data->node));
302 static VALUE
303 ast_node_node_id(rb_execution_context_t *ec, VALUE self)
305 struct ASTNodeData *data;
306 TypedData_Get_Struct(self, struct ASTNodeData, &rb_node_type, data);
308 return INT2FIX(nd_node_id(data->node));
311 #define NEW_CHILD(ast_value, node) (node ? ast_new_internal(ast_value, node) : Qnil)
313 static VALUE
314 rb_ary_new_from_node_args(VALUE ast_value, long n, ...)
316 va_list ar;
317 VALUE ary;
318 long i;
320 ary = rb_ary_new2(n);
322 va_start(ar, n);
323 for (i=0; i<n; i++) {
324 NODE *node;
325 node = va_arg(ar, NODE *);
326 rb_ary_push(ary, NEW_CHILD(ast_value, node));
328 va_end(ar);
329 return ary;
332 static VALUE
333 dump_block(VALUE ast_value, const struct RNode_BLOCK *node)
335 VALUE ary = rb_ary_new();
336 do {
337 rb_ary_push(ary, NEW_CHILD(ast_value, node->nd_head));
338 } while (node->nd_next &&
339 nd_type_p(node->nd_next, NODE_BLOCK) &&
340 (node = RNODE_BLOCK(node->nd_next), 1));
341 if (node->nd_next) {
342 rb_ary_push(ary, NEW_CHILD(ast_value, node->nd_next));
345 return ary;
348 static VALUE
349 dump_array(VALUE ast_value, const struct RNode_LIST *node)
351 VALUE ary = rb_ary_new();
352 rb_ary_push(ary, NEW_CHILD(ast_value, node->nd_head));
354 while (node->nd_next && nd_type_p(node->nd_next, NODE_LIST)) {
355 node = RNODE_LIST(node->nd_next);
356 rb_ary_push(ary, NEW_CHILD(ast_value, node->nd_head));
358 rb_ary_push(ary, NEW_CHILD(ast_value, node->nd_next));
360 return ary;
363 static VALUE
364 dump_parser_array(VALUE ast_value, rb_parser_ary_t *p_ary)
366 VALUE ary;
368 if (p_ary->data_type != PARSER_ARY_DATA_NODE) {
369 rb_bug("unexpected rb_parser_ary_data_type: %d", p_ary->data_type);
372 ary = rb_ary_new();
374 for (long i = 0; i < p_ary->len; i++) {
375 rb_ary_push(ary, NEW_CHILD(ast_value, p_ary->data[i]));
378 return ary;
381 static VALUE
382 var_name(ID id)
384 if (!id) return Qnil;
385 if (!rb_id2str(id)) return Qnil;
386 return ID2SYM(id);
389 static VALUE
390 no_name_rest(void)
392 ID rest;
393 CONST_ID(rest, "NODE_SPECIAL_NO_NAME_REST");
394 return ID2SYM(rest);
397 static VALUE
398 rest_arg(VALUE ast_value, const NODE *rest_arg)
400 return NODE_NAMED_REST_P(rest_arg) ? NEW_CHILD(ast_value, rest_arg) : no_name_rest();
403 static VALUE
404 node_children(VALUE ast_value, const NODE *node)
406 char name[sizeof("$") + DECIMAL_SIZE_OF(long)];
408 enum node_type type = nd_type(node);
409 switch (type) {
410 case NODE_BLOCK:
411 return dump_block(ast_value, RNODE_BLOCK(node));
412 case NODE_IF:
413 return rb_ary_new_from_node_args(ast_value, 3, RNODE_IF(node)->nd_cond, RNODE_IF(node)->nd_body, RNODE_IF(node)->nd_else);
414 case NODE_UNLESS:
415 return rb_ary_new_from_node_args(ast_value, 3, RNODE_UNLESS(node)->nd_cond, RNODE_UNLESS(node)->nd_body, RNODE_UNLESS(node)->nd_else);
416 case NODE_CASE:
417 return rb_ary_new_from_node_args(ast_value, 2, RNODE_CASE(node)->nd_head, RNODE_CASE(node)->nd_body);
418 case NODE_CASE2:
419 return rb_ary_new_from_node_args(ast_value, 2, RNODE_CASE2(node)->nd_head, RNODE_CASE2(node)->nd_body);
420 case NODE_CASE3:
421 return rb_ary_new_from_node_args(ast_value, 2, RNODE_CASE3(node)->nd_head, RNODE_CASE3(node)->nd_body);
422 case NODE_WHEN:
423 return rb_ary_new_from_node_args(ast_value, 3, RNODE_WHEN(node)->nd_head, RNODE_WHEN(node)->nd_body, RNODE_WHEN(node)->nd_next);
424 case NODE_IN:
425 return rb_ary_new_from_node_args(ast_value, 3, RNODE_IN(node)->nd_head, RNODE_IN(node)->nd_body, RNODE_IN(node)->nd_next);
426 case NODE_WHILE:
427 case NODE_UNTIL:
428 return rb_ary_push(rb_ary_new_from_node_args(ast_value, 2, RNODE_WHILE(node)->nd_cond, RNODE_WHILE(node)->nd_body),
429 RBOOL(RNODE_WHILE(node)->nd_state));
430 case NODE_ITER:
431 case NODE_FOR:
432 return rb_ary_new_from_node_args(ast_value, 2, RNODE_ITER(node)->nd_iter, RNODE_ITER(node)->nd_body);
433 case NODE_FOR_MASGN:
434 return rb_ary_new_from_node_args(ast_value, 1, RNODE_FOR_MASGN(node)->nd_var);
435 case NODE_BREAK:
436 return rb_ary_new_from_node_args(ast_value, 1, RNODE_BREAK(node)->nd_stts);
437 case NODE_NEXT:
438 return rb_ary_new_from_node_args(ast_value, 1, RNODE_NEXT(node)->nd_stts);
439 case NODE_RETURN:
440 return rb_ary_new_from_node_args(ast_value, 1, RNODE_RETURN(node)->nd_stts);
441 case NODE_REDO:
442 return rb_ary_new_from_node_args(ast_value, 0);
443 case NODE_RETRY:
444 return rb_ary_new_from_node_args(ast_value, 0);
445 case NODE_BEGIN:
446 return rb_ary_new_from_node_args(ast_value, 1, RNODE_BEGIN(node)->nd_body);
447 case NODE_RESCUE:
448 return rb_ary_new_from_node_args(ast_value, 3, RNODE_RESCUE(node)->nd_head, RNODE_RESCUE(node)->nd_resq, RNODE_RESCUE(node)->nd_else);
449 case NODE_RESBODY:
450 return rb_ary_new_from_node_args(ast_value, 4, RNODE_RESBODY(node)->nd_args, RNODE_RESBODY(node)->nd_exc_var, RNODE_RESBODY(node)->nd_body, RNODE_RESBODY(node)->nd_next);
451 case NODE_ENSURE:
452 return rb_ary_new_from_node_args(ast_value, 2, RNODE_ENSURE(node)->nd_head, RNODE_ENSURE(node)->nd_ensr);
453 case NODE_AND:
454 case NODE_OR:
456 VALUE ary = rb_ary_new();
458 while (1) {
459 rb_ary_push(ary, NEW_CHILD(ast_value, RNODE_AND(node)->nd_1st));
460 if (!RNODE_AND(node)->nd_2nd || !nd_type_p(RNODE_AND(node)->nd_2nd, type))
461 break;
462 node = RNODE_AND(node)->nd_2nd;
464 rb_ary_push(ary, NEW_CHILD(ast_value, RNODE_AND(node)->nd_2nd));
465 return ary;
467 case NODE_MASGN:
468 if (NODE_NAMED_REST_P(RNODE_MASGN(node)->nd_args)) {
469 return rb_ary_new_from_node_args(ast_value, 3, RNODE_MASGN(node)->nd_value, RNODE_MASGN(node)->nd_head, RNODE_MASGN(node)->nd_args);
471 else {
472 return rb_ary_new_from_args(3, NEW_CHILD(ast_value, RNODE_MASGN(node)->nd_value),
473 NEW_CHILD(ast_value, RNODE_MASGN(node)->nd_head),
474 no_name_rest());
476 case NODE_LASGN:
477 if (NODE_REQUIRED_KEYWORD_P(RNODE_LASGN(node)->nd_value)) {
478 return rb_ary_new_from_args(2, var_name(RNODE_LASGN(node)->nd_vid), ID2SYM(rb_intern("NODE_SPECIAL_REQUIRED_KEYWORD")));
480 return rb_ary_new_from_args(2, var_name(RNODE_LASGN(node)->nd_vid), NEW_CHILD(ast_value, RNODE_LASGN(node)->nd_value));
481 case NODE_DASGN:
482 if (NODE_REQUIRED_KEYWORD_P(RNODE_DASGN(node)->nd_value)) {
483 return rb_ary_new_from_args(2, var_name(RNODE_DASGN(node)->nd_vid), ID2SYM(rb_intern("NODE_SPECIAL_REQUIRED_KEYWORD")));
485 return rb_ary_new_from_args(2, var_name(RNODE_DASGN(node)->nd_vid), NEW_CHILD(ast_value, RNODE_DASGN(node)->nd_value));
486 case NODE_IASGN:
487 return rb_ary_new_from_args(2, var_name(RNODE_IASGN(node)->nd_vid), NEW_CHILD(ast_value, RNODE_IASGN(node)->nd_value));
488 case NODE_CVASGN:
489 return rb_ary_new_from_args(2, var_name(RNODE_CVASGN(node)->nd_vid), NEW_CHILD(ast_value, RNODE_CVASGN(node)->nd_value));
490 case NODE_GASGN:
491 return rb_ary_new_from_args(2, var_name(RNODE_GASGN(node)->nd_vid), NEW_CHILD(ast_value, RNODE_GASGN(node)->nd_value));
492 case NODE_CDECL:
493 if (RNODE_CDECL(node)->nd_vid) {
494 return rb_ary_new_from_args(2, ID2SYM(RNODE_CDECL(node)->nd_vid), NEW_CHILD(ast_value, RNODE_CDECL(node)->nd_value));
496 return rb_ary_new_from_args(3, NEW_CHILD(ast_value, RNODE_CDECL(node)->nd_else), ID2SYM(RNODE_COLON2(RNODE_CDECL(node)->nd_else)->nd_mid), NEW_CHILD(ast_value, RNODE_CDECL(node)->nd_value));
497 case NODE_OP_ASGN1:
498 return rb_ary_new_from_args(4, NEW_CHILD(ast_value, RNODE_OP_ASGN1(node)->nd_recv),
499 ID2SYM(RNODE_OP_ASGN1(node)->nd_mid),
500 NEW_CHILD(ast_value, RNODE_OP_ASGN1(node)->nd_index),
501 NEW_CHILD(ast_value, RNODE_OP_ASGN1(node)->nd_rvalue));
502 case NODE_OP_ASGN2:
503 return rb_ary_new_from_args(5, NEW_CHILD(ast_value, RNODE_OP_ASGN2(node)->nd_recv),
504 RBOOL(RNODE_OP_ASGN2(node)->nd_aid),
505 ID2SYM(RNODE_OP_ASGN2(node)->nd_vid),
506 ID2SYM(RNODE_OP_ASGN2(node)->nd_mid),
507 NEW_CHILD(ast_value, RNODE_OP_ASGN2(node)->nd_value));
508 case NODE_OP_ASGN_AND:
509 return rb_ary_new_from_args(3, NEW_CHILD(ast_value, RNODE_OP_ASGN_AND(node)->nd_head), ID2SYM(idANDOP),
510 NEW_CHILD(ast_value, RNODE_OP_ASGN_AND(node)->nd_value));
511 case NODE_OP_ASGN_OR:
512 return rb_ary_new_from_args(3, NEW_CHILD(ast_value, RNODE_OP_ASGN_OR(node)->nd_head), ID2SYM(idOROP),
513 NEW_CHILD(ast_value, RNODE_OP_ASGN_OR(node)->nd_value));
514 case NODE_OP_CDECL:
515 return rb_ary_new_from_args(3, NEW_CHILD(ast_value, RNODE_OP_CDECL(node)->nd_head),
516 ID2SYM(RNODE_OP_CDECL(node)->nd_aid),
517 NEW_CHILD(ast_value, RNODE_OP_CDECL(node)->nd_value));
518 case NODE_CALL:
519 return rb_ary_new_from_args(3, NEW_CHILD(ast_value, RNODE_CALL(node)->nd_recv),
520 ID2SYM(RNODE_CALL(node)->nd_mid),
521 NEW_CHILD(ast_value, RNODE_CALL(node)->nd_args));
522 case NODE_OPCALL:
523 return rb_ary_new_from_args(3, NEW_CHILD(ast_value, RNODE_OPCALL(node)->nd_recv),
524 ID2SYM(RNODE_OPCALL(node)->nd_mid),
525 NEW_CHILD(ast_value, RNODE_OPCALL(node)->nd_args));
526 case NODE_QCALL:
527 return rb_ary_new_from_args(3, NEW_CHILD(ast_value, RNODE_QCALL(node)->nd_recv),
528 ID2SYM(RNODE_QCALL(node)->nd_mid),
529 NEW_CHILD(ast_value, RNODE_QCALL(node)->nd_args));
530 case NODE_FCALL:
531 return rb_ary_new_from_args(2, ID2SYM(RNODE_FCALL(node)->nd_mid),
532 NEW_CHILD(ast_value, RNODE_FCALL(node)->nd_args));
533 case NODE_VCALL:
534 return rb_ary_new_from_args(1, ID2SYM(RNODE_VCALL(node)->nd_mid));
535 case NODE_SUPER:
536 return rb_ary_new_from_node_args(ast_value, 1, RNODE_SUPER(node)->nd_args);
537 case NODE_ZSUPER:
538 return rb_ary_new_from_node_args(ast_value, 0);
539 case NODE_LIST:
540 return dump_array(ast_value, RNODE_LIST(node));
541 case NODE_ZLIST:
542 return rb_ary_new_from_node_args(ast_value, 0);
543 case NODE_HASH:
544 return rb_ary_new_from_node_args(ast_value, 1, RNODE_HASH(node)->nd_head);
545 case NODE_YIELD:
546 return rb_ary_new_from_node_args(ast_value, 1, RNODE_YIELD(node)->nd_head);
547 case NODE_LVAR:
548 return rb_ary_new_from_args(1, var_name(RNODE_LVAR(node)->nd_vid));
549 case NODE_DVAR:
550 return rb_ary_new_from_args(1, var_name(RNODE_DVAR(node)->nd_vid));
551 case NODE_IVAR:
552 return rb_ary_new_from_args(1, ID2SYM(RNODE_IVAR(node)->nd_vid));
553 case NODE_CONST: