[ruby/prism] Static literals inspect

https://github.com/ruby/prism/commit/4913d112da
author: Kevin Newton <[email protected]> 2024-03-11 12:13:12 -0400
committer: git <[email protected]> 2024-03-12 03:30:50 +0000
commit: 21ea290b34e310371b5a8c2569c161a67467981c (patch)
tree: a894282d4dbc201f6c61ce16ddfe42a076e08709
parent: cb4bc4d03ca41bf367d1fe1ae47865a8b80092ed (diff)
8 files changed, 382 insertions, 21 deletions
diff --git a/prism/extension.c b/prism/extension.c
index 8c9d7944b5..9dbebdf6f4 100644
--- a/prism/extension.c
+++ b/prism/extension.c
@@ -1190,6 +1190,40 @@ format_errors(VALUE self, VALUE source, VALUE colorize) {
 }
 
 /**
+ * call-seq:
+ *   Debug::static_inspect(source) -> String
+ *
+ * Inspect the node as it would be inspected by the warnings used in static
+ * literal sets.
+ */
+static VALUE
+static_inspect(int argc, VALUE *argv, VALUE self) {
+    pm_string_t input;
+    pm_options_t options = { 0 };
+    string_options(argc, argv, &input, &options);
+
+    pm_parser_t parser;
+    pm_parser_init(&parser, pm_string_source(&input), pm_string_length(&input), &options);
+
+    pm_node_t *program = pm_parse(&parser);
+    pm_node_t *node = ((pm_program_node_t *) program)->statements->body.nodes[0];
+
+    pm_buffer_t buffer = { 0 };
+    pm_static_literal_inspect(&buffer, &parser, node);
+
+    rb_encoding *encoding = rb_enc_find(parser.encoding->name);
+    VALUE result = rb_enc_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer), encoding);
+
+    pm_buffer_free(&buffer);
+    pm_node_destroy(&parser, program);
+    pm_parser_free(&parser);
+    pm_string_free(&input);
+    pm_options_free(&options);
+
+    return result;
+}
+
+/**
  * call-seq: Debug::Encoding.all -> Array[Debug::Encoding]
  *
  * Return an array of all of the encodings that prism knows about.
@@ -1338,6 +1372,7 @@ Init_prism(void) {
     rb_define_singleton_method(rb_cPrismDebug, "profile_file", profile_file, 1);
     rb_define_singleton_method(rb_cPrismDebug, "inspect_node", inspect_node, 1);
     rb_define_singleton_method(rb_cPrismDebug, "format_errors", format_errors, 2);
+    rb_define_singleton_method(rb_cPrismDebug, "static_inspect", static_inspect, -1);
 
     // Next, define the functions that are exposed through the private
     // Debug::Encoding class.
diff --git a/prism/prism.c b/prism/prism.c
index 98c18407b8..739ca4671a 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -12129,15 +12129,20 @@ pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *liter
     const pm_node_t *duplicated = pm_static_literals_add(parser, literals, node);
 
     if (duplicated != NULL) {
+        pm_buffer_t buffer = { 0 };
+        pm_static_literal_inspect(&buffer, parser, duplicated);
+
         pm_diagnostic_list_append_format(
             &parser->warning_list,
             duplicated->location.start,
             duplicated->location.end,
             PM_WARN_DUPLICATED_HASH_KEY,
-            (int) (duplicated->location.end - duplicated->location.start),
-            duplicated->location.start,
+            (int) pm_buffer_length(&buffer),
+            pm_buffer_value(&buffer),
             pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line
         );
+
+        pm_buffer_free(&buffer);
     }
 }
 
diff --git a/prism/static_literals.c b/prism/static_literals.c
index 81231692f6..af612f5e97 100644
--- a/prism/static_literals.c
+++ b/prism/static_literals.c
@@ -371,3 +371,182 @@ pm_static_literals_free(pm_static_literals_t *literals) {
     pm_node_hash_free(&literals->regexp_nodes);
     pm_node_hash_free(&literals->symbol_nodes);
 }
+
+/**
+ * A helper to determine if the given node is a static literal that is positive.
+ * This is used for formatting imaginary nodes.
+ */
+static bool
+pm_static_literal_positive_p(const pm_node_t *node) {
+    switch (PM_NODE_TYPE(node)) {
+        case PM_FLOAT_NODE:
+            return ((const pm_float_node_t *) node)->value > 0;
+        case PM_INTEGER_NODE:
+            return !((const pm_integer_node_t *) node)->value.negative;
+        case PM_RATIONAL_NODE:
+            return pm_static_literal_positive_p(((const pm_rational_node_t *) node)->numeric);
+        case PM_IMAGINARY_NODE:
+            return pm_static_literal_positive_p(((const pm_imaginary_node_t *) node)->numeric);
+        default:
+            assert(false && "unreachable");
+            return false;
+    }
+}
+
+/**
+ * Inspect a rational node that wraps a float node. This is going to be a
+ * poor-man's version of the Ruby `Rational#to_s` method, because we're not
+ * going to try to reduce the rational by finding the GCD. We'll leave that for
+ * a future improvement.
+ */
+static void
+pm_rational_inspect(pm_buffer_t *buffer, pm_rational_node_t *node) {
+    const uint8_t *start = node->base.location.start;
+    const uint8_t *end = node->base.location.end - 1; // r
+
+    while (start < end && *start == '0') start++; // 0.1 -> .1
+    while (end > start && end[-1] == '0') end--; // 1.0 -> 1.
+    size_t length = (size_t) (end - start);
+
+    const uint8_t *point = memchr(start, '.', length);
+    assert(point && "should have a decimal point");
+
+    uint8_t *digits = malloc(length - 1);
+    if (digits == NULL) return;
+
+    memcpy(digits, start, (unsigned long) (point - start));
+    memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1));
+
+    pm_integer_t numerator = { 0 };
+    pm_integer_parse(&numerator, PM_INTEGER_BASE_DECIMAL, digits, digits + length - 1);
+
+    pm_buffer_append_byte(buffer, '(');
+    pm_integer_string(buffer, &numerator);
+    pm_buffer_append_string(buffer, "/1", 2);
+    for (size_t index = 0; index < (size_t) (end - point - 1); index++) pm_buffer_append_byte(buffer, '0');
+    pm_buffer_append_byte(buffer, ')');
+
+    pm_integer_free(&numerator);
+    free(digits);
+}
+
+/**
+ * Create a string-based representation of the given static literal.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_static_literal_inspect(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_node_t *node) {
+    assert(PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL));
+
+    switch (PM_NODE_TYPE(node)) {
+        case PM_FALSE_NODE:
+            pm_buffer_append_string(buffer, "false", 5);
+            break;
+        case PM_FLOAT_NODE: {
+            const double value = ((const pm_float_node_t *) node)->value;
+
+            if (isinf(value)) {
+                if (*node->location.start == '-') {
+                    pm_buffer_append_byte(buffer, '-');
+                }
+                pm_buffer_append_string(buffer, "Infinity", 8);
+            } else if (value == 0.0) {
+                if (*node->location.start == '-') {
+                    pm_buffer_append_byte(buffer, '-');
+                }
+                pm_buffer_append_string(buffer, "0.0", 3);
+            } else {
+                pm_buffer_append_format(buffer, "%g", value);
+
+                // %g will not insert a .0 for 1e100 (we'll get back 1e+100). So
+                // we check for the decimal point and add it in here if it's not
+                // present.
+                if (pm_buffer_index(buffer, '.') == -1) {
+                    ssize_t exponent_index = pm_buffer_index(buffer, 'e');
+                    size_t index = exponent_index == -1 ? pm_buffer_length(buffer) : (size_t) exponent_index;
+                    pm_buffer_insert(buffer, index, ".0", 2);
+                }
+            }
+
+            break;
+        }
+        case PM_IMAGINARY_NODE: {
+            const pm_node_t *numeric = ((const pm_imaginary_node_t *) node)->numeric;
+            pm_buffer_append_string(buffer, "(0", 2);
+            if (pm_static_literal_positive_p(numeric)) pm_buffer_append_byte(buffer, '+');
+            pm_static_literal_inspect(buffer, parser, numeric);
+            if (PM_NODE_TYPE_P(numeric, PM_RATIONAL_NODE)) pm_buffer_append_byte(buffer, '*');
+            pm_buffer_append_string(buffer, "i)", 2);
+            break;
+        }
+        case PM_INTEGER_NODE:
+            pm_integer_string(buffer, &((const pm_integer_node_t *) node)->value);
+            break;
+        case PM_NIL_NODE:
+            pm_buffer_append_string(buffer, "nil", 3);
+            break;
+        case PM_RATIONAL_NODE: {
+            const pm_node_t *numeric = ((const pm_rational_node_t *) node)->numeric;
+
+            switch (PM_NODE_TYPE(numeric)) {
+                case PM_INTEGER_NODE:
+                    pm_buffer_append_byte(buffer, '(');
+                    pm_static_literal_inspect(buffer, parser, numeric);
+                    pm_buffer_append_string(buffer, "/1)", 3);
+                    break;
+                case PM_FLOAT_NODE:
+                    pm_rational_inspect(buffer, (pm_rational_node_t *) node);
+                    break;
+                default:
+                    assert(false && "unreachable");
+                    break;
+            }
+
+            break;
+        }
+        case PM_REGULAR_EXPRESSION_NODE: {
+            const pm_string_t *unescaped = &((const pm_regular_expression_node_t *) node)->unescaped;
+            pm_buffer_append_byte(buffer, '/');
+            pm_buffer_append_source(buffer, pm_string_source(unescaped), pm_string_length(unescaped), PM_BUFFER_ESCAPING_RUBY);
+            pm_buffer_append_byte(buffer, '/');
+
+            if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE)) pm_buffer_append_string(buffer, "m", 1);
+            if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE)) pm_buffer_append_string(buffer, "i", 1);
+            if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED)) pm_buffer_append_string(buffer, "x", 1);
+            if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT)) pm_buffer_append_string(buffer, "n", 1);
+
+            break;
+        }
+        case PM_SOURCE_ENCODING_NODE:
+            pm_buffer_append_format(buffer, "#<Encoding:%s>", parser->encoding->name);
+            break;
+        case PM_SOURCE_FILE_NODE: {
+            const pm_string_t *filepath = &((const pm_source_file_node_t *) node)->filepath;
+            pm_buffer_append_byte(buffer, '"');
+            pm_buffer_append_source(buffer, pm_string_source(filepath), pm_string_length(filepath), PM_BUFFER_ESCAPING_RUBY);
+            pm_buffer_append_byte(buffer, '"');
+            break;
+        }
+        case PM_SOURCE_LINE_NODE:
+            pm_buffer_append_format(buffer, "%d", pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line);
+            break;
+        case PM_STRING_NODE: {
+            const pm_string_t *unescaped = &((const pm_string_node_t *) node)->unescaped;
+            pm_buffer_append_byte(buffer, '"');
+            pm_buffer_append_source(buffer, pm_string_source(unescaped), pm_string_length(unescaped), PM_BUFFER_ESCAPING_RUBY);
+            pm_buffer_append_byte(buffer, '"');
+            break;
+        }
+        case PM_SYMBOL_NODE: {
+            const pm_string_t *unescaped = &((const pm_symbol_node_t *) node)->unescaped;
+            pm_buffer_append_byte(buffer, ':');
+            pm_buffer_append_source(buffer, pm_string_source(unescaped), pm_string_length(unescaped), PM_BUFFER_ESCAPING_RUBY);
+            break;
+        }
+        case PM_TRUE_NODE:
+            pm_buffer_append_string(buffer, "true", 4);
+            break;
+        default:
+            assert(false && "unreachable");
+            break;
+    }
+}
diff --git a/prism/static_literals.h b/prism/static_literals.h
index 2a3d815fa9..dd1f2e7f84 100644
--- a/prism/static_literals.h
+++ b/prism/static_literals.h
@@ -106,4 +106,13 @@ pm_node_t * pm_static_literals_add(const pm_parser_t *parser, pm_static_literals
  */
 void pm_static_literals_free(pm_static_literals_t *literals);
 
+/**
+ * Create a string-based representation of the given static literal.
+ *
+ * @param buffer The buffer to write the string to.
+ * @param parser The parser that created the node.
+ * @param node The node to create a string representation of.
+ */
+PRISM_EXPORTED_FUNCTION void pm_static_literal_inspect(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_node_t *node);
+
 #endif
diff --git a/prism/util/pm_buffer.c b/prism/util/pm_buffer.c
index 87f79ddd2c..f150a22cee 100644
--- a/prism/util/pm_buffer.c
+++ b/prism/util/pm_buffer.c
@@ -284,6 +284,29 @@ pm_buffer_rstrip(pm_buffer_t *buffer) {
 }
 
 /**
+ * Checks if the buffer includes the given value.
+ */
+ssize_t pm_buffer_index(const pm_buffer_t *buffer, char value) {
+    const char *first = memchr(buffer->value, value, buffer->length);
+    return (first == NULL) ? -1 : (ssize_t) (first - buffer->value);
+}
+
+/**
+ * Insert the given string into the buffer at the given index.
+ */
+void pm_buffer_insert(pm_buffer_t *buffer, size_t index, const char *value, size_t length) {
+    assert(index <= buffer->length);
+
+    if (index == buffer->length) {
+        pm_buffer_append_string(buffer, value, length);
+    } else {
+        pm_buffer_append_zeroes(buffer, length);
+        memmove(buffer->value + index + length, buffer->value + index, buffer->length - length - index);
+        memcpy(buffer->value + index, value, length);
+    }
+}
+
+/**
  * Free the memory associated with the buffer.
  */
 void
diff --git a/prism/util/pm_buffer.h b/prism/util/pm_buffer.h
index d8ec8180e7..f80b0e7b82 100644
--- a/prism/util/pm_buffer.h
+++ b/prism/util/pm_buffer.h
@@ -189,6 +189,26 @@ void pm_buffer_clear(pm_buffer_t *buffer);
 void pm_buffer_rstrip(pm_buffer_t *buffer);
 
 /**
+ * Checks if the buffer includes the given value.
+ *
+ * @param buffer The buffer to check.
+ * @param value The value to check for.
+ * @returns The index of the first occurrence of the value in the buffer, or -1
+ *   if the value is not found.
+ */
+ssize_t pm_buffer_index(const pm_buffer_t *buffer, char value);
+
+/**
+ * Insert the given string into the buffer at the given index.
+ *
+ * @param buffer The buffer to insert into.
+ * @param index The index to insert at.
+ * @param value The string to insert.
+ * @param length The length of the string to insert.
+ */
+void pm_buffer_insert(pm_buffer_t *buffer, size_t index, const char *value, size_t length);
+
+/**
  * Free the memory associated with the buffer.
  *
  * @param buffer The buffer to free.
diff --git a/test/prism/static_inspect_test.rb b/test/prism/static_inspect_test.rb
new file mode 100644
index 0000000000..07b7ac3244
--- /dev/null
+++ b/test/prism/static_inspect_test.rb
@@ -0,0 +1,90 @@
+# frozen_string_literal: true
+
+require_relative "test_helper"
+
+return if Prism::BACKEND == :FFI
+
+module Prism
+  class StaticInspectTest < TestCase
+    def test_false
+      assert_equal "false", static_inspect("false")
+    end
+
+    def test_float
+      assert_equal "0.25", static_inspect("0.25")
+      assert_equal "5.125", static_inspect("5.125")
+
+      assert_equal "0.0", static_inspect("0.0")
+      assert_equal "-0.0", static_inspect("-0.0")
+
+      assert_equal "1.0e+100", static_inspect("1e100")
+      assert_equal "-1.0e+100", static_inspect("-1e100")
+
+      assert_equal "Infinity", static_inspect("1e1000")
+      assert_equal "-Infinity", static_inspect("-1e1000")
+    end
+
+    def test_imaginary
+      assert_equal "(0+1i)", static_inspect("1i")
+      assert_equal "(0-1i)", static_inspect("-1i")
+    end
+
+    def test_integer
+      assert_equal "1000", static_inspect("1_0_0_0")
+      assert_equal "10000000000000000000000000000", static_inspect("1_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0")
+    end
+
+    def test_nil
+      assert_equal "nil", static_inspect("nil")
+    end
+
+    def test_rational
+      assert_equal "(0/1)", static_inspect("0r")
+      assert_equal "(1/1)", static_inspect("1r")
+      assert_equal "(1/1)", static_inspect("1.0r")
+      assert_equal "(77777/1000)", static_inspect("77.777r")
+    end
+
+    def test_regular_expression
+      assert_equal "/.*/", static_inspect("/.*/")
+      assert_equal "/.*/i", static_inspect("/.*/i")
+      assert_equal "/.*/", static_inspect("/.*/u")
+      assert_equal "/.*/n", static_inspect("/.*/un")
+    end
+
+    def test_source_encoding
+      assert_equal "#<Encoding:UTF-8>", static_inspect("__ENCODING__")
+      assert_equal "#<Encoding:Shift_JIS>", static_inspect("__ENCODING__", encoding: "Shift_JIS")
+    end
+
+    def test_source_file
+      assert_equal __FILE__.inspect, static_inspect("__FILE__", filepath: __FILE__)
+    end
+
+    def test_source_line
+      assert_equal "1", static_inspect("__LINE__")
+      assert_equal "5", static_inspect("__LINE__", line: 5)
+    end
+
+    def test_string
+      assert_equal "\"\"", static_inspect('""', frozen_string_literal: true)
+      assert_equal "\"Hello, World!\"", static_inspect('"Hello, World!"', frozen_string_literal: true)
+      assert_equal "\"\\a\"", static_inspect("\"\\a\"", frozen_string_literal: true)
+    end
+
+    def test_symbol
+      assert_equal ":foo", static_inspect(":foo")
+      assert_equal ":foo", static_inspect("%s[foo]")
+    end
+
+    def test_true
+      assert_equal "true", static_inspect("true")
+    end
+
+    private
+
+    def static_inspect(source, **options)
+      Debug.static_inspect(source, **options)
+    end
+  end
+end
diff --git a/test/prism/static_literals_test.rb b/test/prism/static_literals_test.rb
index 6108e2ddbc..4e98c15701 100644
--- a/test/prism/static_literals_test.rb
+++ b/test/prism/static_literals_test.rb
@@ -6,31 +6,31 @@ module Prism
   class StaticLiteralsTest < TestCase
     def test_static_literals
       assert_warning("1")
-      assert_warning("0xA", "10")
-      assert_warning("0o10", "8")
-      assert_warning("0b10", "2")
-      assert_warning("1_000")
-      assert_warning((2**32).to_s(10), "0x#{(2**32).to_s(16)}")
-      assert_warning((2**64).to_s(10), "0x#{(2**64).to_s(16)}")
+      assert_warning("0xA", "10", "10")
+      assert_warning("0o10", "8", "8")
+      assert_warning("0b10", "2", "2")
+      assert_warning("1_000", "1000", "1000")
+      assert_warning((2**32).to_s(10), "0x#{(2**32).to_s(16)}", (2**32).to_s(10))
+      assert_warning((2**64).to_s(10), "0x#{(2**64).to_s(16)}", (2**64).to_s(10))
 
       refute_warning("1", "-1")
       refute_warning((2**32).to_s(10), "-0x#{(2**32).to_s(16)}")
       refute_warning((2**64).to_s(10), "-0x#{(2**64).to_s(16)}")
 
-      assert_warning("__LINE__", "2")
-      assert_warning("3", "__LINE__")
+      assert_warning("__LINE__", "2", "2")
+      assert_warning("3", "__LINE__", "3")
 
       assert_warning("1.0")
-      assert_warning("1e2", "100.0")
+      assert_warning("1e2", "100.0", "100.0")
 
-      assert_warning("1r")
-      assert_warning("1.0r")
+      assert_warning("1r", "1r", "(1/1)")
+      assert_warning("1.0r", "1.0r", "(1/1)")
 
-      assert_warning("1i")
-      assert_warning("1.0i")
+      assert_warning("1i", "1i", "(0+1i)")
+      assert_warning("1.0i", "1.0i", "(0+1.0i)")
 
-      assert_warning("1ri")
-      assert_warning("1.0ri")
+      assert_warning("1ri", "1ri", "(0+(1/1)*i)")
+      assert_warning("1.0ri", "1.0ri", "(0+(1/1)*i)")
 
       assert_warning("\"#{__FILE__}\"")
       assert_warning("\"foo\"")
@@ -41,12 +41,12 @@ module Prism
       refute_warning("/foo/", "/foo/i")
 
       assert_warning(":foo")
-      assert_warning("%s[foo]")
+      assert_warning("%s[foo]", ":foo", ":foo")
 
       assert_warning("true")
       assert_warning("false")
       assert_warning("nil")
-      assert_warning("__ENCODING__")
+      assert_warning("__ENCODING__", "__ENCODING__", "#<Encoding:UTF-8>")
     end
 
     private
@@ -71,10 +71,10 @@ module Prism
       warnings
     end
 
-    def assert_warning(left, right = left)
+    def assert_warning(left, right = left, message = left)
       hash_keys, when_clauses = parse_warnings(left, right)
 
-      assert_include hash_keys.message, left
+      assert_include hash_keys.message, message
       assert_include hash_keys.message, "line 3"
       assert_include when_clauses.message, "line 3"
     end
author	Kevin Newton <[email protected]>	2024-03-11 12:13:12 -0400
committer	git <[email protected]>	2024-03-12 03:30:50 +0000
commit	21ea290b34e310371b5a8c2569c161a67467981c (patch)
tree	a894282d4dbc201f6c61ce16ddfe42a076e08709
parent	cb4bc4d03ca41bf367d1fe1ae47865a8b80092ed (diff)