diff options
author | Kevin Newton <[email protected]> | 2024-03-11 12:13:12 -0400 |
---|---|---|
committer | git <[email protected]> | 2024-03-12 03:30:50 +0000 |
commit | 21ea290b34e310371b5a8c2569c161a67467981c (patch) | |
tree | a894282d4dbc201f6c61ce16ddfe42a076e08709 | |
parent | cb4bc4d03ca41bf367d1fe1ae47865a8b80092ed (diff) |
[ruby/prism] Static literals inspect
https://github.com/ruby/prism/commit/4913d112da
-rw-r--r-- | prism/extension.c | 35 | ||||
-rw-r--r-- | prism/prism.c | 9 | ||||
-rw-r--r-- | prism/static_literals.c | 179 | ||||
-rw-r--r-- | prism/static_literals.h | 9 | ||||
-rw-r--r-- | prism/util/pm_buffer.c | 23 | ||||
-rw-r--r-- | prism/util/pm_buffer.h | 20 | ||||
-rw-r--r-- | test/prism/static_inspect_test.rb | 90 | ||||
-rw-r--r-- | test/prism/static_literals_test.rb | 38 |
8 files changed, 382 insertions, 21 deletions
diff --git a/prism/extension.c b/prism/extension.c index 8c9d7944b5..9dbebdf6f4 100644 --- a/prism/extension.c +++ b/prism/extension.c @@ -1190,6 +1190,40 @@ format_errors(VALUE self, VALUE source, VALUE colorize) { } /** + * call-seq: + * Debug::static_inspect(source) -> String + * + * Inspect the node as it would be inspected by the warnings used in static + * literal sets. + */ +static VALUE +static_inspect(int argc, VALUE *argv, VALUE self) { + pm_string_t input; + pm_options_t options = { 0 }; + string_options(argc, argv, &input, &options); + + pm_parser_t parser; + pm_parser_init(&parser, pm_string_source(&input), pm_string_length(&input), &options); + + pm_node_t *program = pm_parse(&parser); + pm_node_t *node = ((pm_program_node_t *) program)->statements->body.nodes[0]; + + pm_buffer_t buffer = { 0 }; + pm_static_literal_inspect(&buffer, &parser, node); + + rb_encoding *encoding = rb_enc_find(parser.encoding->name); + VALUE result = rb_enc_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer), encoding); + + pm_buffer_free(&buffer); + pm_node_destroy(&parser, program); + pm_parser_free(&parser); + pm_string_free(&input); + pm_options_free(&options); + + return result; +} + +/** * call-seq: Debug::Encoding.all -> Array[Debug::Encoding] * * Return an array of all of the encodings that prism knows about. @@ -1338,6 +1372,7 @@ Init_prism(void) { rb_define_singleton_method(rb_cPrismDebug, "profile_file", profile_file, 1); rb_define_singleton_method(rb_cPrismDebug, "inspect_node", inspect_node, 1); rb_define_singleton_method(rb_cPrismDebug, "format_errors", format_errors, 2); + rb_define_singleton_method(rb_cPrismDebug, "static_inspect", static_inspect, -1); // Next, define the functions that are exposed through the private // Debug::Encoding class. diff --git a/prism/prism.c b/prism/prism.c index 98c18407b8..739ca4671a 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -12129,15 +12129,20 @@ pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *liter const pm_node_t *duplicated = pm_static_literals_add(parser, literals, node); if (duplicated != NULL) { + pm_buffer_t buffer = { 0 }; + pm_static_literal_inspect(&buffer, parser, duplicated); + pm_diagnostic_list_append_format( &parser->warning_list, duplicated->location.start, duplicated->location.end, PM_WARN_DUPLICATED_HASH_KEY, - (int) (duplicated->location.end - duplicated->location.start), - duplicated->location.start, + (int) pm_buffer_length(&buffer), + pm_buffer_value(&buffer), pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line ); + + pm_buffer_free(&buffer); } } diff --git a/prism/static_literals.c b/prism/static_literals.c index 81231692f6..af612f5e97 100644 --- a/prism/static_literals.c +++ b/prism/static_literals.c @@ -371,3 +371,182 @@ pm_static_literals_free(pm_static_literals_t *literals) { pm_node_hash_free(&literals->regexp_nodes); pm_node_hash_free(&literals->symbol_nodes); } + +/** + * A helper to determine if the given node is a static literal that is positive. + * This is used for formatting imaginary nodes. + */ +static bool +pm_static_literal_positive_p(const pm_node_t *node) { + switch (PM_NODE_TYPE(node)) { + case PM_FLOAT_NODE: + return ((const pm_float_node_t *) node)->value > 0; + case PM_INTEGER_NODE: + return !((const pm_integer_node_t *) node)->value.negative; + case PM_RATIONAL_NODE: + return pm_static_literal_positive_p(((const pm_rational_node_t *) node)->numeric); + case PM_IMAGINARY_NODE: + return pm_static_literal_positive_p(((const pm_imaginary_node_t *) node)->numeric); + default: + assert(false && "unreachable"); + return false; + } +} + +/** + * Inspect a rational node that wraps a float node. This is going to be a + * poor-man's version of the Ruby `Rational#to_s` method, because we're not + * going to try to reduce the rational by finding the GCD. We'll leave that for + * a future improvement. + */ +static void +pm_rational_inspect(pm_buffer_t *buffer, pm_rational_node_t *node) { + const uint8_t *start = node->base.location.start; + const uint8_t *end = node->base.location.end - 1; // r + + while (start < end && *start == '0') start++; // 0.1 -> .1 + while (end > start && end[-1] == '0') end--; // 1.0 -> 1. + size_t length = (size_t) (end - start); + + const uint8_t *point = memchr(start, '.', length); + assert(point && "should have a decimal point"); + + uint8_t *digits = malloc(length - 1); + if (digits == NULL) return; + + memcpy(digits, start, (unsigned long) (point - start)); + memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1)); + + pm_integer_t numerator = { 0 }; + pm_integer_parse(&numerator, PM_INTEGER_BASE_DECIMAL, digits, digits + length - 1); + + pm_buffer_append_byte(buffer, '('); + pm_integer_string(buffer, &numerator); + pm_buffer_append_string(buffer, "/1", 2); + for (size_t index = 0; index < (size_t) (end - point - 1); index++) pm_buffer_append_byte(buffer, '0'); + pm_buffer_append_byte(buffer, ')'); + + pm_integer_free(&numerator); + free(digits); +} + +/** + * Create a string-based representation of the given static literal. + */ +PRISM_EXPORTED_FUNCTION void +pm_static_literal_inspect(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_node_t *node) { + assert(PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)); + + switch (PM_NODE_TYPE(node)) { + case PM_FALSE_NODE: + pm_buffer_append_string(buffer, "false", 5); + break; + case PM_FLOAT_NODE: { + const double value = ((const pm_float_node_t *) node)->value; + + if (isinf(value)) { + if (*node->location.start == '-') { + pm_buffer_append_byte(buffer, '-'); + } + pm_buffer_append_string(buffer, "Infinity", 8); + } else if (value == 0.0) { + if (*node->location.start == '-') { + pm_buffer_append_byte(buffer, '-'); + } + pm_buffer_append_string(buffer, "0.0", 3); + } else { + pm_buffer_append_format(buffer, "%g", value); + + // %g will not insert a .0 for 1e100 (we'll get back 1e+100). So + // we check for the decimal point and add it in here if it's not + // present. + if (pm_buffer_index(buffer, '.') == -1) { + ssize_t exponent_index = pm_buffer_index(buffer, 'e'); + size_t index = exponent_index == -1 ? pm_buffer_length(buffer) : (size_t) exponent_index; + pm_buffer_insert(buffer, index, ".0", 2); + } + } + + break; + } + case PM_IMAGINARY_NODE: { + const pm_node_t *numeric = ((const pm_imaginary_node_t *) node)->numeric; + pm_buffer_append_string(buffer, "(0", 2); + if (pm_static_literal_positive_p(numeric)) pm_buffer_append_byte(buffer, '+'); + pm_static_literal_inspect(buffer, parser, numeric); + if (PM_NODE_TYPE_P(numeric, PM_RATIONAL_NODE)) pm_buffer_append_byte(buffer, '*'); + pm_buffer_append_string(buffer, "i)", 2); + break; + } + case PM_INTEGER_NODE: + pm_integer_string(buffer, &((const pm_integer_node_t *) node)->value); + break; + case PM_NIL_NODE: + pm_buffer_append_string(buffer, "nil", 3); + break; + case PM_RATIONAL_NODE: { + const pm_node_t *numeric = ((const pm_rational_node_t *) node)->numeric; + + switch (PM_NODE_TYPE(numeric)) { + case PM_INTEGER_NODE: + pm_buffer_append_byte(buffer, '('); + pm_static_literal_inspect(buffer, parser, numeric); + pm_buffer_append_string(buffer, "/1)", 3); + break; + case PM_FLOAT_NODE: + pm_rational_inspect(buffer, (pm_rational_node_t *) node); + break; + default: + assert(false && "unreachable"); + break; + } + + break; + } + case PM_REGULAR_EXPRESSION_NODE: { + const pm_string_t *unescaped = &((const pm_regular_expression_node_t *) node)->unescaped; + pm_buffer_append_byte(buffer, '/'); + pm_buffer_append_source(buffer, pm_string_source(unescaped), pm_string_length(unescaped), PM_BUFFER_ESCAPING_RUBY); + pm_buffer_append_byte(buffer, '/'); + + if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE)) pm_buffer_append_string(buffer, "m", 1); + if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE)) pm_buffer_append_string(buffer, "i", 1); + if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED)) pm_buffer_append_string(buffer, "x", 1); + if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT)) pm_buffer_append_string(buffer, "n", 1); + + break; + } + case PM_SOURCE_ENCODING_NODE: + pm_buffer_append_format(buffer, "#<Encoding:%s>", parser->encoding->name); + break; + case PM_SOURCE_FILE_NODE: { + const pm_string_t *filepath = &((const pm_source_file_node_t *) node)->filepath; + pm_buffer_append_byte(buffer, '"'); + pm_buffer_append_source(buffer, pm_string_source(filepath), pm_string_length(filepath), PM_BUFFER_ESCAPING_RUBY); + pm_buffer_append_byte(buffer, '"'); + break; + } + case PM_SOURCE_LINE_NODE: + pm_buffer_append_format(buffer, "%d", pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line); + break; + case PM_STRING_NODE: { + const pm_string_t *unescaped = &((const pm_string_node_t *) node)->unescaped; + pm_buffer_append_byte(buffer, '"'); + pm_buffer_append_source(buffer, pm_string_source(unescaped), pm_string_length(unescaped), PM_BUFFER_ESCAPING_RUBY); + pm_buffer_append_byte(buffer, '"'); + break; + } + case PM_SYMBOL_NODE: { + const pm_string_t *unescaped = &((const pm_symbol_node_t *) node)->unescaped; + pm_buffer_append_byte(buffer, ':'); + pm_buffer_append_source(buffer, pm_string_source(unescaped), pm_string_length(unescaped), PM_BUFFER_ESCAPING_RUBY); + break; + } + case PM_TRUE_NODE: + pm_buffer_append_string(buffer, "true", 4); + break; + default: + assert(false && "unreachable"); + break; + } +} diff --git a/prism/static_literals.h b/prism/static_literals.h index 2a3d815fa9..dd1f2e7f84 100644 --- a/prism/static_literals.h +++ b/prism/static_literals.h @@ -106,4 +106,13 @@ pm_node_t * pm_static_literals_add(const pm_parser_t *parser, pm_static_literals */ void pm_static_literals_free(pm_static_literals_t *literals); +/** + * Create a string-based representation of the given static literal. + * + * @param buffer The buffer to write the string to. + * @param parser The parser that created the node. + * @param node The node to create a string representation of. + */ +PRISM_EXPORTED_FUNCTION void pm_static_literal_inspect(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_node_t *node); + #endif diff --git a/prism/util/pm_buffer.c b/prism/util/pm_buffer.c index 87f79ddd2c..f150a22cee 100644 --- a/prism/util/pm_buffer.c +++ b/prism/util/pm_buffer.c @@ -284,6 +284,29 @@ pm_buffer_rstrip(pm_buffer_t *buffer) { } /** + * Checks if the buffer includes the given value. + */ +ssize_t pm_buffer_index(const pm_buffer_t *buffer, char value) { + const char *first = memchr(buffer->value, value, buffer->length); + return (first == NULL) ? -1 : (ssize_t) (first - buffer->value); +} + +/** + * Insert the given string into the buffer at the given index. + */ +void pm_buffer_insert(pm_buffer_t *buffer, size_t index, const char *value, size_t length) { + assert(index <= buffer->length); + + if (index == buffer->length) { + pm_buffer_append_string(buffer, value, length); + } else { + pm_buffer_append_zeroes(buffer, length); + memmove(buffer->value + index + length, buffer->value + index, buffer->length - length - index); + memcpy(buffer->value + index, value, length); + } +} + +/** * Free the memory associated with the buffer. */ void diff --git a/prism/util/pm_buffer.h b/prism/util/pm_buffer.h index d8ec8180e7..f80b0e7b82 100644 --- a/prism/util/pm_buffer.h +++ b/prism/util/pm_buffer.h @@ -189,6 +189,26 @@ void pm_buffer_clear(pm_buffer_t *buffer); void pm_buffer_rstrip(pm_buffer_t *buffer); /** + * Checks if the buffer includes the given value. + * + * @param buffer The buffer to check. + * @param value The value to check for. + * @returns The index of the first occurrence of the value in the buffer, or -1 + * if the value is not found. + */ +ssize_t pm_buffer_index(const pm_buffer_t *buffer, char value); + +/** + * Insert the given string into the buffer at the given index. + * + * @param buffer The buffer to insert into. + * @param index The index to insert at. + * @param value The string to insert. + * @param length The length of the string to insert. + */ +void pm_buffer_insert(pm_buffer_t *buffer, size_t index, const char *value, size_t length); + +/** * Free the memory associated with the buffer. * * @param buffer The buffer to free. diff --git a/test/prism/static_inspect_test.rb b/test/prism/static_inspect_test.rb new file mode 100644 index 0000000000..07b7ac3244 --- /dev/null +++ b/test/prism/static_inspect_test.rb @@ -0,0 +1,90 @@ +# frozen_string_literal: true + +require_relative "test_helper" + +return if Prism::BACKEND == :FFI + +module Prism + class StaticInspectTest < TestCase + def test_false + assert_equal "false", static_inspect("false") + end + + def test_float + assert_equal "0.25", static_inspect("0.25") + assert_equal "5.125", static_inspect("5.125") + + assert_equal "0.0", static_inspect("0.0") + assert_equal "-0.0", static_inspect("-0.0") + + assert_equal "1.0e+100", static_inspect("1e100") + assert_equal "-1.0e+100", static_inspect("-1e100") + + assert_equal "Infinity", static_inspect("1e1000") + assert_equal "-Infinity", static_inspect("-1e1000") + end + + def test_imaginary + assert_equal "(0+1i)", static_inspect("1i") + assert_equal "(0-1i)", static_inspect("-1i") + end + + def test_integer + assert_equal "1000", static_inspect("1_0_0_0") + assert_equal "10000000000000000000000000000", static_inspect("1_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0") + end + + def test_nil + assert_equal "nil", static_inspect("nil") + end + + def test_rational + assert_equal "(0/1)", static_inspect("0r") + assert_equal "(1/1)", static_inspect("1r") + assert_equal "(1/1)", static_inspect("1.0r") + assert_equal "(77777/1000)", static_inspect("77.777r") + end + + def test_regular_expression + assert_equal "/.*/", static_inspect("/.*/") + assert_equal "/.*/i", static_inspect("/.*/i") + assert_equal "/.*/", static_inspect("/.*/u") + assert_equal "/.*/n", static_inspect("/.*/un") + end + + def test_source_encoding + assert_equal "#<Encoding:UTF-8>", static_inspect("__ENCODING__") + assert_equal "#<Encoding:Shift_JIS>", static_inspect("__ENCODING__", encoding: "Shift_JIS") + end + + def test_source_file + assert_equal __FILE__.inspect, static_inspect("__FILE__", filepath: __FILE__) + end + + def test_source_line + assert_equal "1", static_inspect("__LINE__") + assert_equal "5", static_inspect("__LINE__", line: 5) + end + + def test_string + assert_equal "\"\"", static_inspect('""', frozen_string_literal: true) + assert_equal "\"Hello, World!\"", static_inspect('"Hello, World!"', frozen_string_literal: true) + assert_equal "\"\\a\"", static_inspect("\"\\a\"", frozen_string_literal: true) + end + + def test_symbol + assert_equal ":foo", static_inspect(":foo") + assert_equal ":foo", static_inspect("%s[foo]") + end + + def test_true + assert_equal "true", static_inspect("true") + end + + private + + def static_inspect(source, **options) + Debug.static_inspect(source, **options) + end + end +end diff --git a/test/prism/static_literals_test.rb b/test/prism/static_literals_test.rb index 6108e2ddbc..4e98c15701 100644 --- a/test/prism/static_literals_test.rb +++ b/test/prism/static_literals_test.rb @@ -6,31 +6,31 @@ module Prism class StaticLiteralsTest < TestCase def test_static_literals assert_warning("1") - assert_warning("0xA", "10") - assert_warning("0o10", "8") - assert_warning("0b10", "2") - assert_warning("1_000") - assert_warning((2**32).to_s(10), "0x#{(2**32).to_s(16)}") - assert_warning((2**64).to_s(10), "0x#{(2**64).to_s(16)}") + assert_warning("0xA", "10", "10") + assert_warning("0o10", "8", "8") + assert_warning("0b10", "2", "2") + assert_warning("1_000", "1000", "1000") + assert_warning((2**32).to_s(10), "0x#{(2**32).to_s(16)}", (2**32).to_s(10)) + assert_warning((2**64).to_s(10), "0x#{(2**64).to_s(16)}", (2**64).to_s(10)) refute_warning("1", "-1") refute_warning((2**32).to_s(10), "-0x#{(2**32).to_s(16)}") refute_warning((2**64).to_s(10), "-0x#{(2**64).to_s(16)}") - assert_warning("__LINE__", "2") - assert_warning("3", "__LINE__") + assert_warning("__LINE__", "2", "2") + assert_warning("3", "__LINE__", "3") assert_warning("1.0") - assert_warning("1e2", "100.0") + assert_warning("1e2", "100.0", "100.0") - assert_warning("1r") - assert_warning("1.0r") + assert_warning("1r", "1r", "(1/1)") + assert_warning("1.0r", "1.0r", "(1/1)") - assert_warning("1i") - assert_warning("1.0i") + assert_warning("1i", "1i", "(0+1i)") + assert_warning("1.0i", "1.0i", "(0+1.0i)") - assert_warning("1ri") - assert_warning("1.0ri") + assert_warning("1ri", "1ri", "(0+(1/1)*i)") + assert_warning("1.0ri", "1.0ri", "(0+(1/1)*i)") assert_warning("\"#{__FILE__}\"") assert_warning("\"foo\"") @@ -41,12 +41,12 @@ module Prism refute_warning("/foo/", "/foo/i") assert_warning(":foo") - assert_warning("%s[foo]") + assert_warning("%s[foo]", ":foo", ":foo") assert_warning("true") assert_warning("false") assert_warning("nil") - assert_warning("__ENCODING__") + assert_warning("__ENCODING__", "__ENCODING__", "#<Encoding:UTF-8>") end private @@ -71,10 +71,10 @@ module Prism warnings end - def assert_warning(left, right = left) + def assert_warning(left, right = left, message = left) hash_keys, when_clauses = parse_warnings(left, right) - assert_include hash_keys.message, left + assert_include hash_keys.message, message assert_include hash_keys.message, "line 3" assert_include when_clauses.message, "line 3" end |