summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Newton <[email protected]>2024-03-11 12:13:12 -0400
committergit <[email protected]>2024-03-12 03:30:50 +0000
commit21ea290b34e310371b5a8c2569c161a67467981c (patch)
treea894282d4dbc201f6c61ce16ddfe42a076e08709
parentcb4bc4d03ca41bf367d1fe1ae47865a8b80092ed (diff)
[ruby/prism] Static literals inspect
https://github.com/ruby/prism/commit/4913d112da
-rw-r--r--prism/extension.c35
-rw-r--r--prism/prism.c9
-rw-r--r--prism/static_literals.c179
-rw-r--r--prism/static_literals.h9
-rw-r--r--prism/util/pm_buffer.c23
-rw-r--r--prism/util/pm_buffer.h20
-rw-r--r--test/prism/static_inspect_test.rb90
-rw-r--r--test/prism/static_literals_test.rb38
8 files changed, 382 insertions, 21 deletions
diff --git a/prism/extension.c b/prism/extension.c
index 8c9d7944b5..9dbebdf6f4 100644
--- a/prism/extension.c
+++ b/prism/extension.c
@@ -1190,6 +1190,40 @@ format_errors(VALUE self, VALUE source, VALUE colorize) {
}
/**
+ * call-seq:
+ * Debug::static_inspect(source) -> String
+ *
+ * Inspect the node as it would be inspected by the warnings used in static
+ * literal sets.
+ */
+static VALUE
+static_inspect(int argc, VALUE *argv, VALUE self) {
+ pm_string_t input;
+ pm_options_t options = { 0 };
+ string_options(argc, argv, &input, &options);
+
+ pm_parser_t parser;
+ pm_parser_init(&parser, pm_string_source(&input), pm_string_length(&input), &options);
+
+ pm_node_t *program = pm_parse(&parser);
+ pm_node_t *node = ((pm_program_node_t *) program)->statements->body.nodes[0];
+
+ pm_buffer_t buffer = { 0 };
+ pm_static_literal_inspect(&buffer, &parser, node);
+
+ rb_encoding *encoding = rb_enc_find(parser.encoding->name);
+ VALUE result = rb_enc_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer), encoding);
+
+ pm_buffer_free(&buffer);
+ pm_node_destroy(&parser, program);
+ pm_parser_free(&parser);
+ pm_string_free(&input);
+ pm_options_free(&options);
+
+ return result;
+}
+
+/**
* call-seq: Debug::Encoding.all -> Array[Debug::Encoding]
*
* Return an array of all of the encodings that prism knows about.
@@ -1338,6 +1372,7 @@ Init_prism(void) {
rb_define_singleton_method(rb_cPrismDebug, "profile_file", profile_file, 1);
rb_define_singleton_method(rb_cPrismDebug, "inspect_node", inspect_node, 1);
rb_define_singleton_method(rb_cPrismDebug, "format_errors", format_errors, 2);
+ rb_define_singleton_method(rb_cPrismDebug, "static_inspect", static_inspect, -1);
// Next, define the functions that are exposed through the private
// Debug::Encoding class.
diff --git a/prism/prism.c b/prism/prism.c
index 98c18407b8..739ca4671a 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -12129,15 +12129,20 @@ pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *liter
const pm_node_t *duplicated = pm_static_literals_add(parser, literals, node);
if (duplicated != NULL) {
+ pm_buffer_t buffer = { 0 };
+ pm_static_literal_inspect(&buffer, parser, duplicated);
+
pm_diagnostic_list_append_format(
&parser->warning_list,
duplicated->location.start,
duplicated->location.end,
PM_WARN_DUPLICATED_HASH_KEY,
- (int) (duplicated->location.end - duplicated->location.start),
- duplicated->location.start,
+ (int) pm_buffer_length(&buffer),
+ pm_buffer_value(&buffer),
pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line
);
+
+ pm_buffer_free(&buffer);
}
}
diff --git a/prism/static_literals.c b/prism/static_literals.c
index 81231692f6..af612f5e97 100644
--- a/prism/static_literals.c
+++ b/prism/static_literals.c
@@ -371,3 +371,182 @@ pm_static_literals_free(pm_static_literals_t *literals) {
pm_node_hash_free(&literals->regexp_nodes);
pm_node_hash_free(&literals->symbol_nodes);
}
+
+/**
+ * A helper to determine if the given node is a static literal that is positive.
+ * This is used for formatting imaginary nodes.
+ */
+static bool
+pm_static_literal_positive_p(const pm_node_t *node) {
+ switch (PM_NODE_TYPE(node)) {
+ case PM_FLOAT_NODE:
+ return ((const pm_float_node_t *) node)->value > 0;
+ case PM_INTEGER_NODE:
+ return !((const pm_integer_node_t *) node)->value.negative;
+ case PM_RATIONAL_NODE:
+ return pm_static_literal_positive_p(((const pm_rational_node_t *) node)->numeric);
+ case PM_IMAGINARY_NODE:
+ return pm_static_literal_positive_p(((const pm_imaginary_node_t *) node)->numeric);
+ default:
+ assert(false && "unreachable");
+ return false;
+ }
+}
+
+/**
+ * Inspect a rational node that wraps a float node. This is going to be a
+ * poor-man's version of the Ruby `Rational#to_s` method, because we're not
+ * going to try to reduce the rational by finding the GCD. We'll leave that for
+ * a future improvement.
+ */
+static void
+pm_rational_inspect(pm_buffer_t *buffer, pm_rational_node_t *node) {
+ const uint8_t *start = node->base.location.start;
+ const uint8_t *end = node->base.location.end - 1; // r
+
+ while (start < end && *start == '0') start++; // 0.1 -> .1
+ while (end > start && end[-1] == '0') end--; // 1.0 -> 1.
+ size_t length = (size_t) (end - start);
+
+ const uint8_t *point = memchr(start, '.', length);
+ assert(point && "should have a decimal point");
+
+ uint8_t *digits = malloc(length - 1);
+ if (digits == NULL) return;
+
+ memcpy(digits, start, (unsigned long) (point - start));
+ memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1));
+
+ pm_integer_t numerator = { 0 };
+ pm_integer_parse(&numerator, PM_INTEGER_BASE_DECIMAL, digits, digits + length - 1);
+
+ pm_buffer_append_byte(buffer, '(');
+ pm_integer_string(buffer, &numerator);
+ pm_buffer_append_string(buffer, "/1", 2);
+ for (size_t index = 0; index < (size_t) (end - point - 1); index++) pm_buffer_append_byte(buffer, '0');
+ pm_buffer_append_byte(buffer, ')');
+
+ pm_integer_free(&numerator);
+ free(digits);
+}
+
+/**
+ * Create a string-based representation of the given static literal.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_static_literal_inspect(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_node_t *node) {
+ assert(PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL));
+
+ switch (PM_NODE_TYPE(node)) {
+ case PM_FALSE_NODE:
+ pm_buffer_append_string(buffer, "false", 5);
+ break;
+ case PM_FLOAT_NODE: {
+ const double value = ((const pm_float_node_t *) node)->value;
+
+ if (isinf(value)) {
+ if (*node->location.start == '-') {
+ pm_buffer_append_byte(buffer, '-');
+ }
+ pm_buffer_append_string(buffer, "Infinity", 8);
+ } else if (value == 0.0) {
+ if (*node->location.start == '-') {
+ pm_buffer_append_byte(buffer, '-');
+ }
+ pm_buffer_append_string(buffer, "0.0", 3);
+ } else {
+ pm_buffer_append_format(buffer, "%g", value);
+
+ // %g will not insert a .0 for 1e100 (we'll get back 1e+100). So
+ // we check for the decimal point and add it in here if it's not
+ // present.
+ if (pm_buffer_index(buffer, '.') == -1) {
+ ssize_t exponent_index = pm_buffer_index(buffer, 'e');
+ size_t index = exponent_index == -1 ? pm_buffer_length(buffer) : (size_t) exponent_index;
+ pm_buffer_insert(buffer, index, ".0", 2);
+ }
+ }
+
+ break;
+ }
+ case PM_IMAGINARY_NODE: {
+ const pm_node_t *numeric = ((const pm_imaginary_node_t *) node)->numeric;
+ pm_buffer_append_string(buffer, "(0", 2);
+ if (pm_static_literal_positive_p(numeric)) pm_buffer_append_byte(buffer, '+');
+ pm_static_literal_inspect(buffer, parser, numeric);
+ if (PM_NODE_TYPE_P(numeric, PM_RATIONAL_NODE)) pm_buffer_append_byte(buffer, '*');
+ pm_buffer_append_string(buffer, "i)", 2);
+ break;
+ }
+ case PM_INTEGER_NODE:
+ pm_integer_string(buffer, &((const pm_integer_node_t *) node)->value);
+ break;
+ case PM_NIL_NODE:
+ pm_buffer_append_string(buffer, "nil", 3);
+ break;
+ case PM_RATIONAL_NODE: {
+ const pm_node_t *numeric = ((const pm_rational_node_t *) node)->numeric;
+
+ switch (PM_NODE_TYPE(numeric)) {
+ case PM_INTEGER_NODE:
+ pm_buffer_append_byte(buffer, '(');
+ pm_static_literal_inspect(buffer, parser, numeric);
+ pm_buffer_append_string(buffer, "/1)", 3);
+ break;
+ case PM_FLOAT_NODE:
+ pm_rational_inspect(buffer, (pm_rational_node_t *) node);
+ break;
+ default:
+ assert(false && "unreachable");
+ break;
+ }
+
+ break;
+ }
+ case PM_REGULAR_EXPRESSION_NODE: {
+ const pm_string_t *unescaped = &((const pm_regular_expression_node_t *) node)->unescaped;
+ pm_buffer_append_byte(buffer, '/');
+ pm_buffer_append_source(buffer, pm_string_source(unescaped), pm_string_length(unescaped), PM_BUFFER_ESCAPING_RUBY);
+ pm_buffer_append_byte(buffer, '/');
+
+ if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE)) pm_buffer_append_string(buffer, "m", 1);
+ if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE)) pm_buffer_append_string(buffer, "i", 1);
+ if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED)) pm_buffer_append_string(buffer, "x", 1);
+ if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT)) pm_buffer_append_string(buffer, "n", 1);
+
+ break;
+ }
+ case PM_SOURCE_ENCODING_NODE:
+ pm_buffer_append_format(buffer, "#<Encoding:%s>", parser->encoding->name);
+ break;
+ case PM_SOURCE_FILE_NODE: {
+ const pm_string_t *filepath = &((const pm_source_file_node_t *) node)->filepath;
+ pm_buffer_append_byte(buffer, '"');
+ pm_buffer_append_source(buffer, pm_string_source(filepath), pm_string_length(filepath), PM_BUFFER_ESCAPING_RUBY);
+ pm_buffer_append_byte(buffer, '"');
+ break;
+ }
+ case PM_SOURCE_LINE_NODE:
+ pm_buffer_append_format(buffer, "%d", pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line);
+ break;
+ case PM_STRING_NODE: {
+ const pm_string_t *unescaped = &((const pm_string_node_t *) node)->unescaped;
+ pm_buffer_append_byte(buffer, '"');
+ pm_buffer_append_source(buffer, pm_string_source(unescaped), pm_string_length(unescaped), PM_BUFFER_ESCAPING_RUBY);
+ pm_buffer_append_byte(buffer, '"');
+ break;
+ }
+ case PM_SYMBOL_NODE: {
+ const pm_string_t *unescaped = &((const pm_symbol_node_t *) node)->unescaped;
+ pm_buffer_append_byte(buffer, ':');
+ pm_buffer_append_source(buffer, pm_string_source(unescaped), pm_string_length(unescaped), PM_BUFFER_ESCAPING_RUBY);
+ break;
+ }
+ case PM_TRUE_NODE:
+ pm_buffer_append_string(buffer, "true", 4);
+ break;
+ default:
+ assert(false && "unreachable");
+ break;
+ }
+}
diff --git a/prism/static_literals.h b/prism/static_literals.h
index 2a3d815fa9..dd1f2e7f84 100644
--- a/prism/static_literals.h
+++ b/prism/static_literals.h
@@ -106,4 +106,13 @@ pm_node_t * pm_static_literals_add(const pm_parser_t *parser, pm_static_literals
*/
void pm_static_literals_free(pm_static_literals_t *literals);
+/**
+ * Create a string-based representation of the given static literal.
+ *
+ * @param buffer The buffer to write the string to.
+ * @param parser The parser that created the node.
+ * @param node The node to create a string representation of.
+ */
+PRISM_EXPORTED_FUNCTION void pm_static_literal_inspect(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_node_t *node);
+
#endif
diff --git a/prism/util/pm_buffer.c b/prism/util/pm_buffer.c
index 87f79ddd2c..f150a22cee 100644
--- a/prism/util/pm_buffer.c
+++ b/prism/util/pm_buffer.c
@@ -284,6 +284,29 @@ pm_buffer_rstrip(pm_buffer_t *buffer) {
}
/**
+ * Checks if the buffer includes the given value.
+ */
+ssize_t pm_buffer_index(const pm_buffer_t *buffer, char value) {
+ const char *first = memchr(buffer->value, value, buffer->length);
+ return (first == NULL) ? -1 : (ssize_t) (first - buffer->value);
+}
+
+/**
+ * Insert the given string into the buffer at the given index.
+ */
+void pm_buffer_insert(pm_buffer_t *buffer, size_t index, const char *value, size_t length) {
+ assert(index <= buffer->length);
+
+ if (index == buffer->length) {
+ pm_buffer_append_string(buffer, value, length);
+ } else {
+ pm_buffer_append_zeroes(buffer, length);
+ memmove(buffer->value + index + length, buffer->value + index, buffer->length - length - index);
+ memcpy(buffer->value + index, value, length);
+ }
+}
+
+/**
* Free the memory associated with the buffer.
*/
void
diff --git a/prism/util/pm_buffer.h b/prism/util/pm_buffer.h
index d8ec8180e7..f80b0e7b82 100644
--- a/prism/util/pm_buffer.h
+++ b/prism/util/pm_buffer.h
@@ -189,6 +189,26 @@ void pm_buffer_clear(pm_buffer_t *buffer);
void pm_buffer_rstrip(pm_buffer_t *buffer);
/**
+ * Checks if the buffer includes the given value.
+ *
+ * @param buffer The buffer to check.
+ * @param value The value to check for.
+ * @returns The index of the first occurrence of the value in the buffer, or -1
+ * if the value is not found.
+ */
+ssize_t pm_buffer_index(const pm_buffer_t *buffer, char value);
+
+/**
+ * Insert the given string into the buffer at the given index.
+ *
+ * @param buffer The buffer to insert into.
+ * @param index The index to insert at.
+ * @param value The string to insert.
+ * @param length The length of the string to insert.
+ */
+void pm_buffer_insert(pm_buffer_t *buffer, size_t index, const char *value, size_t length);
+
+/**
* Free the memory associated with the buffer.
*
* @param buffer The buffer to free.
diff --git a/test/prism/static_inspect_test.rb b/test/prism/static_inspect_test.rb
new file mode 100644
index 0000000000..07b7ac3244
--- /dev/null
+++ b/test/prism/static_inspect_test.rb
@@ -0,0 +1,90 @@
+# frozen_string_literal: true
+
+require_relative "test_helper"
+
+return if Prism::BACKEND == :FFI
+
+module Prism
+ class StaticInspectTest < TestCase
+ def test_false
+ assert_equal "false", static_inspect("false")
+ end
+
+ def test_float
+ assert_equal "0.25", static_inspect("0.25")
+ assert_equal "5.125", static_inspect("5.125")
+
+ assert_equal "0.0", static_inspect("0.0")
+ assert_equal "-0.0", static_inspect("-0.0")
+
+ assert_equal "1.0e+100", static_inspect("1e100")
+ assert_equal "-1.0e+100", static_inspect("-1e100")
+
+ assert_equal "Infinity", static_inspect("1e1000")
+ assert_equal "-Infinity", static_inspect("-1e1000")
+ end
+
+ def test_imaginary
+ assert_equal "(0+1i)", static_inspect("1i")
+ assert_equal "(0-1i)", static_inspect("-1i")
+ end
+
+ def test_integer
+ assert_equal "1000", static_inspect("1_0_0_0")
+ assert_equal "10000000000000000000000000000", static_inspect("1_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0")
+ end
+
+ def test_nil
+ assert_equal "nil", static_inspect("nil")
+ end
+
+ def test_rational
+ assert_equal "(0/1)", static_inspect("0r")
+ assert_equal "(1/1)", static_inspect("1r")
+ assert_equal "(1/1)", static_inspect("1.0r")
+ assert_equal "(77777/1000)", static_inspect("77.777r")
+ end
+
+ def test_regular_expression
+ assert_equal "/.*/", static_inspect("/.*/")
+ assert_equal "/.*/i", static_inspect("/.*/i")
+ assert_equal "/.*/", static_inspect("/.*/u")
+ assert_equal "/.*/n", static_inspect("/.*/un")
+ end
+
+ def test_source_encoding
+ assert_equal "#<Encoding:UTF-8>", static_inspect("__ENCODING__")
+ assert_equal "#<Encoding:Shift_JIS>", static_inspect("__ENCODING__", encoding: "Shift_JIS")
+ end
+
+ def test_source_file
+ assert_equal __FILE__.inspect, static_inspect("__FILE__", filepath: __FILE__)
+ end
+
+ def test_source_line
+ assert_equal "1", static_inspect("__LINE__")
+ assert_equal "5", static_inspect("__LINE__", line: 5)
+ end
+
+ def test_string
+ assert_equal "\"\"", static_inspect('""', frozen_string_literal: true)
+ assert_equal "\"Hello, World!\"", static_inspect('"Hello, World!"', frozen_string_literal: true)
+ assert_equal "\"\\a\"", static_inspect("\"\\a\"", frozen_string_literal: true)
+ end
+
+ def test_symbol
+ assert_equal ":foo", static_inspect(":foo")
+ assert_equal ":foo", static_inspect("%s[foo]")
+ end
+
+ def test_true
+ assert_equal "true", static_inspect("true")
+ end
+
+ private
+
+ def static_inspect(source, **options)
+ Debug.static_inspect(source, **options)
+ end
+ end
+end
diff --git a/test/prism/static_literals_test.rb b/test/prism/static_literals_test.rb
index 6108e2ddbc..4e98c15701 100644
--- a/test/prism/static_literals_test.rb
+++ b/test/prism/static_literals_test.rb
@@ -6,31 +6,31 @@ module Prism
class StaticLiteralsTest < TestCase
def test_static_literals
assert_warning("1")
- assert_warning("0xA", "10")
- assert_warning("0o10", "8")
- assert_warning("0b10", "2")
- assert_warning("1_000")
- assert_warning((2**32).to_s(10), "0x#{(2**32).to_s(16)}")
- assert_warning((2**64).to_s(10), "0x#{(2**64).to_s(16)}")
+ assert_warning("0xA", "10", "10")
+ assert_warning("0o10", "8", "8")
+ assert_warning("0b10", "2", "2")
+ assert_warning("1_000", "1000", "1000")
+ assert_warning((2**32).to_s(10), "0x#{(2**32).to_s(16)}", (2**32).to_s(10))
+ assert_warning((2**64).to_s(10), "0x#{(2**64).to_s(16)}", (2**64).to_s(10))
refute_warning("1", "-1")
refute_warning((2**32).to_s(10), "-0x#{(2**32).to_s(16)}")
refute_warning((2**64).to_s(10), "-0x#{(2**64).to_s(16)}")
- assert_warning("__LINE__", "2")
- assert_warning("3", "__LINE__")
+ assert_warning("__LINE__", "2", "2")
+ assert_warning("3", "__LINE__", "3")
assert_warning("1.0")
- assert_warning("1e2", "100.0")
+ assert_warning("1e2", "100.0", "100.0")
- assert_warning("1r")
- assert_warning("1.0r")
+ assert_warning("1r", "1r", "(1/1)")
+ assert_warning("1.0r", "1.0r", "(1/1)")
- assert_warning("1i")
- assert_warning("1.0i")
+ assert_warning("1i", "1i", "(0+1i)")
+ assert_warning("1.0i", "1.0i", "(0+1.0i)")
- assert_warning("1ri")
- assert_warning("1.0ri")
+ assert_warning("1ri", "1ri", "(0+(1/1)*i)")
+ assert_warning("1.0ri", "1.0ri", "(0+(1/1)*i)")
assert_warning("\"#{__FILE__}\"")
assert_warning("\"foo\"")
@@ -41,12 +41,12 @@ module Prism
refute_warning("/foo/", "/foo/i")
assert_warning(":foo")
- assert_warning("%s[foo]")
+ assert_warning("%s[foo]", ":foo", ":foo")
assert_warning("true")
assert_warning("false")
assert_warning("nil")
- assert_warning("__ENCODING__")
+ assert_warning("__ENCODING__", "__ENCODING__", "#<Encoding:UTF-8>")
end
private
@@ -71,10 +71,10 @@ module Prism
warnings
end
- def assert_warning(left, right = left)
+ def assert_warning(left, right = left, message = left)
hash_keys, when_clauses = parse_warnings(left, right)
- assert_include hash_keys.message, left
+ assert_include hash_keys.message, message
assert_include hash_keys.message, "line 3"
assert_include when_clauses.message, "line 3"
end