summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorEarlopain <[email protected]>2025-01-06 20:18:03 +0100
committerKevin Newton <[email protected]>2025-01-11 19:09:05 -0500
commita234fd516f82702f9efd67e3f67de129702a6801 (patch)
tree5cb7b38794458953bcf4cd70887833eb734356f4 /lib
parentd1a70014f9a1ee411c41338d0929443bab004cda (diff)
[ruby/prism] Fix parser translator ast for regex with line continuation
Turns out, the vast majority of work was already done with handling the same for heredocs I'm confident this should also apply to actual string nodes (there's even a todo for it) but no tests change if I apply it there too, so I can't say for sure if the logic would be correct. The individual test files are a bit too large, maybe something else would break that currently passes. Leaving it for later to look more closely into that. https://github.com/ruby/prism/commit/6bba1c54e1
Diffstat (limited to 'lib')
-rw-r--r--lib/prism/translation/parser/compiler.rb111
1 files changed, 56 insertions, 55 deletions
diff --git a/lib/prism/translation/parser/compiler.rb b/lib/prism/translation/parser/compiler.rb
index 54e08eb991..c6a7154625 100644
--- a/lib/prism/translation/parser/compiler.rb
+++ b/lib/prism/translation/parser/compiler.rb
@@ -1511,13 +1511,9 @@ module Prism
# /foo/
# ^^^^^
def visit_regular_expression_node(node)
- content = node.content
parts =
- if content.include?("\n")
- offset = node.content_loc.start_offset
- content.lines.map do |line|
- builder.string_internal([line, srange_offsets(offset, offset += line.bytesize)])
- end
+ if node.content.include?("\n")
+ string_nodes_from_line_continuations(node, node.content_loc.start_offset, node.opening)
else
[builder.string_internal(token(node.content_loc))]
end
@@ -2074,55 +2070,7 @@ module Prism
node.parts.each do |part|
pushing =
if part.is_a?(StringNode) && part.unescaped.include?("\n")
- unescaped = part.unescaped.lines
- escaped = part.content.lines
-
- escaped_lengths = []
- normalized_lengths = []
- # Keeps track of where an unescaped line should start a new token. An unescaped
- # \n would otherwise be indistinguishable from the actual newline at the end of
- # of the line. The parser gem only emits a new string node at "real" newlines,
- # line continuations don't start a new node as well.
- do_next_tokens = []
-
- if node.opening.end_with?("'")
- escaped.each do |line|
- escaped_lengths << line.bytesize
- normalized_lengths << chomped_bytesize(line)
- do_next_tokens << true
- end
- else
- escaped
- .chunk_while { |before, after| before[/(\\*)\r?\n$/, 1]&.length&.odd? || false }
- .each do |lines|
- escaped_lengths << lines.sum(&:bytesize)
- normalized_lengths << lines.sum { |line| chomped_bytesize(line) }
- unescaped_lines_count = lines.sum do |line|
- line.scan(/(\\*)n/).count { |(backslashes)| backslashes&.length&.odd? || false }
- end
- do_next_tokens.concat(Array.new(unescaped_lines_count + 1, false))
- do_next_tokens[-1] = true
- end
- end
-
- start_offset = part.location.start_offset
- current_line = +""
- current_normalized_length = 0
-
- unescaped.filter_map.with_index do |unescaped_line, index|
- current_line << unescaped_line
- current_normalized_length += normalized_lengths.fetch(index, 0)
-
- if do_next_tokens[index]
- inner_part = builder.string_internal([current_line, srange_offsets(start_offset, start_offset + current_normalized_length)])
- start_offset += escaped_lengths.fetch(index, 0)
- current_line = +""
- current_normalized_length = 0
- inner_part
- else
- nil
- end
- end
+ string_nodes_from_line_continuations(part, part.location.start_offset, node.opening)
else
[visit(part)]
end
@@ -2172,6 +2120,59 @@ module Prism
parser.pattern_variables.pop
end
end
+
+ # Create parser string nodes from a single prism node. The parser gem
+ # "glues" strings together when a line continuation is encountered.
+ def string_nodes_from_line_continuations(node, start_offset, opening)
+ unescaped = node.unescaped.lines
+ escaped = node.content.lines
+
+ escaped_lengths = []
+ normalized_lengths = []
+ # Keeps track of where an unescaped line should start a new token. An unescaped
+ # \n would otherwise be indistinguishable from the actual newline at the end of
+ # of the line. The parser gem only emits a new string node at "real" newlines,
+ # line continuations don't start a new node as well.
+ do_next_tokens = []
+
+ if opening.end_with?("'")
+ escaped.each do |line|
+ escaped_lengths << line.bytesize
+ normalized_lengths << chomped_bytesize(line)
+ do_next_tokens << true
+ end
+ else
+ escaped
+ .chunk_while { |before, after| before[/(\\*)\r?\n$/, 1]&.length&.odd? || false }
+ .each do |lines|
+ escaped_lengths << lines.sum(&:bytesize)
+ normalized_lengths << lines.sum { |line| chomped_bytesize(line) }
+ unescaped_lines_count = lines.sum do |line|
+ line.scan(/(\\*)n/).count { |(backslashes)| backslashes&.length&.odd? || false }
+ end
+ do_next_tokens.concat(Array.new(unescaped_lines_count + 1, false))
+ do_next_tokens[-1] = true
+ end
+ end
+
+ current_line = +""
+ current_normalized_length = 0
+
+ unescaped.filter_map.with_index do |unescaped_line, index|
+ current_line << unescaped_line
+ current_normalized_length += normalized_lengths.fetch(index, 0)
+
+ if do_next_tokens[index]
+ inner_part = builder.string_internal([current_line, srange_offsets(start_offset, start_offset + current_normalized_length)])
+ start_offset += escaped_lengths.fetch(index, 0)
+ current_line = +""
+ current_normalized_length = 0
+ inner_part
+ else
+ nil
+ end
+ end
+ end
end
end
end