summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorEarlopain <[email protected]>2025-01-15 23:24:05 +0100
committerKevin Newton <[email protected]>2025-03-18 13:36:53 -0400
commita8adf5e006da03b8ccaa2bf900f4f077ca9888cf (patch)
treee7e6994efa7806d708033ef13fbe81dcd5c36c37 /lib
parentfc14d3ac7d4fa14f568d2428b846f391ebdf0d62 (diff)
[ruby/prism] Further refine string handling in the parser translator
Mostly around newlines and line continuation. * percent arrays need special backslash handling in the ast * Fix offset issue for heredocs with many line continuations (used wrong variable as index access) * More refined rules on when to simplify string tokens * Handle line continuations in squiggly heredocs * Correctly dedent squiggly heredocs with interpolation * Consider `':foo:` and `%s[foo]` to not be interpolation https://github.com/ruby/prism/commit/4edfe9d981
Diffstat (limited to 'lib')
-rw-r--r--lib/prism/translation/parser/compiler.rb13
-rw-r--r--lib/prism/translation/parser/lexer.rb29
2 files changed, 42 insertions, 0 deletions
diff --git a/lib/prism/translation/parser/compiler.rb b/lib/prism/translation/parser/compiler.rb
index 8f6a602c42..6a169600b5 100644
--- a/lib/prism/translation/parser/compiler.rb
+++ b/lib/prism/translation/parser/compiler.rb
@@ -1120,6 +1120,7 @@ module Prism
<<<<<<< HEAD
<<<<<<< HEAD
<<<<<<< HEAD
+<<<<<<< HEAD
=======
parts = if node.parts.one? { |part| part.type == :string_node }
node.parts.flat_map do |node|
@@ -1153,6 +1154,8 @@ module Prism
>>>>>>> a651126458 (Fix an incompatibility with the parser translator)
=======
>>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
+=======
+>>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
builder.string_compose(
token(node.opening_loc),
string_nodes_from_interpolation(node, node.opening),
@@ -2218,12 +2221,16 @@ module Prism
escaped = escaped.lines
<<<<<<< HEAD
<<<<<<< HEAD
+<<<<<<< HEAD
percent_array = opening&.start_with?("%w", "%W", "%i", "%I")
=======
>>>>>>> 2637007929 (Better handle all kinds of multiline strings in the parser translator)
=======
percent_array = opening&.start_with?("%w", "%W", "%i", "%I")
>>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
+=======
+ percent_array = opening&.start_with?("%w", "%W", "%i", "%I")
+>>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
# Non-interpolating strings
if opening&.end_with?("'") || opening&.start_with?("%q", "%s", "%w", "%i")
@@ -2232,6 +2239,9 @@ module Prism
<<<<<<< HEAD
<<<<<<< HEAD
+<<<<<<< HEAD
+=======
+>>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
=======
>>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
escaped.filter_map.with_index do |escaped_line, index|
@@ -2249,6 +2259,7 @@ module Prism
current_length = 0
s
<<<<<<< HEAD
+<<<<<<< HEAD
=======
if opening&.end_with?("'")
escaped.each do |line|
@@ -2258,6 +2269,8 @@ module Prism
>>>>>>> 2637007929 (Better handle all kinds of multiline strings in the parser translator)
=======
>>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
+=======
+>>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
end
else
escaped_lengths = []
diff --git a/lib/prism/translation/parser/lexer.rb b/lib/prism/translation/parser/lexer.rb
index 98fd7aaa02..39eb9943d7 100644
--- a/lib/prism/translation/parser/lexer.rb
+++ b/lib/prism/translation/parser/lexer.rb
@@ -440,6 +440,7 @@ module Prism
if (lines = token.value.lines).one?
<<<<<<< HEAD
+<<<<<<< HEAD
# Heredoc interpolation can have multiple STRING_CONTENT nodes on the same line.
is_first_token_on_line = lexed[index - 1] && token.location.start_line != lexed[index - 2][0].location&.start_line
# The parser gem only removes indentation when the heredoc is not nested
@@ -489,6 +490,25 @@ module Prism
end
>>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
+=======
+ # Prism usually emits a single token for strings with line continuations.
+ # For squiggly heredocs they are not joined so we do that manually here.
+ current_string = +""
+ current_length = 0
+ start_offset = token.location.start_offset
+ while token.type == :STRING_CONTENT
+ current_length += token.value.bytesize
+ # Heredoc interpolation can have multiple STRING_CONTENT nodes on the same line.
+ is_first_token_on_line = lexed[index - 1] && token.location.start_line != lexed[index - 2][0].location&.start_line
+ # The parser gem only removes indentation when the heredoc is not nested
+ not_nested = heredoc_stack.size == 1
+ if is_percent_array
+ value = percent_array_unescape(token.value)
+ elsif is_first_token_on_line && not_nested && (current_heredoc = heredoc_stack.last).common_whitespace > 0
+ value = trim_heredoc_whitespace(token.value, current_heredoc)
+ end
+
+>>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
current_string << unescape_string(value, quote_stack.last)
if (backslash_count = token.value[/(\\{1,})\n/, 1]&.length).nil? || backslash_count.even? || !interpolation?(quote_stack.last)
tokens << [:tSTRING_CONTENT, [current_string, range(start_offset, start_offset + current_length)]]
@@ -817,6 +837,9 @@ module Prism
<<<<<<< HEAD
<<<<<<< HEAD
+<<<<<<< HEAD
+=======
+>>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
=======
>>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
# Certain strings are merged into a single string token.
@@ -838,6 +861,7 @@ module Prism
<<<<<<< HEAD
<<<<<<< HEAD
+<<<<<<< HEAD
=======
>>>>>>> 09c59a3aa5 (Handle control and meta escapes in parser translation)
# Escape a byte value, given the control and meta flags.
@@ -903,6 +927,8 @@ module Prism
>>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
=======
>>>>>>> 09c59a3aa5 (Handle control and meta escapes in parser translation)
+=======
+>>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
# In a percent array, certain whitespace can be preceeded with a backslash,
# causing the following characters to be part of the previous element.
def percent_array_unescape(string)
@@ -928,6 +954,7 @@ module Prism
def interpolation?(quote)
!quote.end_with?("'") && !quote.start_with?("%q", "%w", "%i", "%s")
<<<<<<< HEAD
+<<<<<<< HEAD
end
# Regexp allow interpolation but are handled differently during unescaping
@@ -935,6 +962,8 @@ module Prism
quote == "/" || quote.start_with?("%r")
=======
>>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
+=======
+>>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
end
# Regexp allow interpolation but are handled differently during unescaping