summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEarlopain <[email protected]>2025-01-15 23:24:05 +0100
committerKevin Newton <[email protected]>2025-03-18 13:36:53 -0400
commitbc506295a30a5806b3346ed09cd679f3b8ee6f64 (patch)
treed37cb1f1b816eaa63be60f55b99eebe0a970e9eb
parent9e5e3f1bede46ed499a809975c663ba32c34ffff (diff)
[ruby/prism] Further refine string handling in the parser translator
Mostly around newlines and line continuation. * percent arrays need special backslash handling in the ast * Fix offset issue for heredocs with many line continuations (used wrong variable as index access) * More refined rules on when to simplify string tokens * Handle line continuations in squiggly heredocs * Correctly dedent squiggly heredocs with interpolation * Consider `':foo:` and `%s[foo]` to not be interpolation https://github.com/ruby/prism/commit/4edfe9d981
-rw-r--r--lib/prism/translation/parser/compiler.rb152
-rw-r--r--lib/prism/translation/parser/lexer.rb179
-rw-r--r--test/prism/fixtures/heredocs_with_fake_newlines.txt55
-rw-r--r--test/prism/fixtures/regex_with_fake_newlines.txt41
-rw-r--r--test/prism/fixtures/strings.txt34
-rw-r--r--test/prism/lex_test.rb6
-rw-r--r--test/prism/ruby/parser_test.rb8
-rw-r--r--test/prism/ruby/ripper_test.rb1
-rw-r--r--test/prism/ruby/ruby_parser_test.rb1
9 files changed, 362 insertions, 115 deletions
diff --git a/lib/prism/translation/parser/compiler.rb b/lib/prism/translation/parser/compiler.rb
index 4eec8205c8..1459b53c48 100644
--- a/lib/prism/translation/parser/compiler.rb
+++ b/lib/prism/translation/parser/compiler.rb
@@ -74,7 +74,29 @@ module Prism
# []
# ^^
def visit_array_node(node)
- builder.array(token(node.opening_loc), visit_all(node.elements), token(node.closing_loc))
+ if node.opening&.start_with?("%w", "%W", "%i", "%I")
+ elements = node.elements.flat_map do |element|
+ if element.is_a?(StringNode)
+ if element.content.include?("\n")
+ string_nodes_from_line_continuations(element.unescaped, element.content, element.content_loc.start_offset, node.opening)
+ else
+ [builder.string_internal([element.unescaped, srange(element.content_loc)])]
+ end
+ elsif element.is_a?(InterpolatedStringNode)
+ builder.string_compose(
+ token(element.opening_loc),
+ string_nodes_from_interpolation(element, node.opening),
+ token(element.closing_loc)
+ )
+ else
+ [visit(element)]
+ end
+ end
+ else
+ elements = visit_all(node.elements)
+ end
+
+ builder.array(token(node.opening_loc), elements, token(node.closing_loc))
end
# foo => [bar]
@@ -1088,19 +1110,9 @@ module Prism
return visit_heredoc(node) { |children, closing| builder.string_compose(token(node.opening_loc), children, closing) }
end
- parts = node.parts.flat_map do |part|
- # When the content of a string node is split across multiple lines, the
- # parser gem creates individual string nodes for each line the content is part of.
- if part.type == :string_node && part.content.include?("\n") && part.opening_loc.nil?
- string_nodes_from_line_continuations(part.unescaped, part.content, part.content_loc.start_offset, node.opening)
- else
- visit(part)
- end
- end
-
builder.string_compose(
token(node.opening_loc),
- parts,
+ string_nodes_from_interpolation(node, node.opening),
token(node.closing_loc)
)
end
@@ -1119,14 +1131,14 @@ module Prism
# ^^^^^^^^^^^^
def visit_interpolated_x_string_node(node)
if node.heredoc?
- visit_heredoc(node) { |children, closing| builder.xstring_compose(token(node.opening_loc), children, closing) }
- else
- builder.xstring_compose(
- token(node.opening_loc),
- visit_all(node.parts),
- token(node.closing_loc)
- )
+ return visit_heredoc(node) { |children, closing| builder.xstring_compose(token(node.opening_loc), children, closing) }
end
+
+ builder.xstring_compose(
+ token(node.opening_loc),
+ string_nodes_from_interpolation(node, node.opening),
+ token(node.closing_loc)
+ )
end
# -> { it }
@@ -2024,13 +2036,6 @@ module Prism
end
end
- # The parser gem automatically converts \r\n to \n, meaning our offsets
- # need to be adjusted to always subtract 1 from the length.
- def chomped_bytesize(line)
- chomped = line.chomp
- chomped.bytesize + (chomped == line ? 0 : 1)
- end
-
# Visit a heredoc that can be either a string or an xstring.
def visit_heredoc(node)
children = Array.new
@@ -2099,55 +2104,88 @@ module Prism
end
end
+ # When the content of a string node is split across multiple lines, the
+ # parser gem creates individual string nodes for each line the content is part of.
+ def string_nodes_from_interpolation(node, opening)
+ node.parts.flat_map do |part|
+ if part.type == :string_node && part.content.include?("\n") && part.opening_loc.nil?
+ string_nodes_from_line_continuations(part.unescaped, part.content, part.content_loc.start_offset, opening)
+ else
+ visit(part)
+ end
+ end
+ end
+
# Create parser string nodes from a single prism node. The parser gem
# "glues" strings together when a line continuation is encountered.
def string_nodes_from_line_continuations(unescaped, escaped, start_offset, opening)
unescaped = unescaped.lines
escaped = escaped.lines
+ percent_array = opening&.start_with?("%w", "%W", "%i", "%I")
+
+ # Non-interpolating strings
+ if opening&.end_with?("'") || opening&.start_with?("%q", "%s", "%w", "%i")
+ current_length = 0
+ current_line = +""
+
+ escaped.filter_map.with_index do |escaped_line, index|
+ unescaped_line = unescaped.fetch(index, "")
+ current_length += escaped_line.bytesize
+ current_line << unescaped_line
- escaped_lengths = []
- normalized_lengths = []
- # Keeps track of where an unescaped line should start a new token. An unescaped
- # \n would otherwise be indistinguishable from the actual newline at the end of
- # of the line. The parser gem only emits a new string node at "real" newlines,
- # line continuations don't start a new node as well.
- do_next_tokens = []
-
- if opening&.end_with?("'")
- escaped.each do |line|
- escaped_lengths << line.bytesize
- normalized_lengths << chomped_bytesize(line)
- do_next_tokens << true
+ # Glue line continuations together. Only %w and %i arrays can contain these.
+ if percent_array && escaped_line[/(\\)*\n$/, 1]&.length&.odd?
+ next unless index == escaped.count - 1
+ end
+ s = builder.string_internal([current_line, srange_offsets(start_offset, start_offset + current_length)])
+ start_offset += escaped_line.bytesize
+ current_line = +""
+ current_length = 0
+ s
end
else
+ escaped_lengths = []
+ normalized_lengths = []
+ # Keeps track of where an unescaped line should start a new token. An unescaped
+ # \n would otherwise be indistinguishable from the actual newline at the end of
+ # of the line. The parser gem only emits a new string node at "real" newlines,
+ # line continuations don't start a new node as well.
+ do_next_tokens = []
+
escaped
.chunk_while { |before, after| before[/(\\*)\r?\n$/, 1]&.length&.odd? || false }
.each do |lines|
escaped_lengths << lines.sum(&:bytesize)
- normalized_lengths << lines.sum { |line| chomped_bytesize(line) }
unescaped_lines_count = lines.sum do |line|
line.scan(/(\\*)n/).count { |(backslashes)| backslashes&.length&.odd? || false }
end
- do_next_tokens.concat(Array.new(unescaped_lines_count + 1, false))
+ extra = 1
+ extra = lines.count if percent_array # Account for line continuations in percent arrays
+
+ normalized_lengths.concat(Array.new(unescaped_lines_count + extra, 0))
+ normalized_lengths[-1] = lines.sum { |line| line.bytesize }
+ do_next_tokens.concat(Array.new(unescaped_lines_count + extra, false))
do_next_tokens[-1] = true
end
- end
-
- current_line = +""
- current_normalized_length = 0
- unescaped.filter_map.with_index do |unescaped_line, index|
- current_line << unescaped_line
- current_normalized_length += normalized_lengths.fetch(index, 0)
-
- if do_next_tokens[index]
- inner_part = builder.string_internal([current_line, srange_offsets(start_offset, start_offset + current_normalized_length)])
- start_offset += escaped_lengths.fetch(index, 0)
- current_line = +""
- current_normalized_length = 0
- inner_part
- else
- nil
+ current_line = +""
+ current_normalized_length = 0
+
+ emitted_count = 0
+ unescaped.filter_map.with_index do |unescaped_line, index|
+ current_line << unescaped_line
+ current_normalized_length += normalized_lengths.fetch(index, 0)
+
+ if do_next_tokens[index]
+ inner_part = builder.string_internal([current_line, srange_offsets(start_offset, start_offset + current_normalized_length)])
+ start_offset += escaped_lengths.fetch(emitted_count, 0)
+ current_line = +""
+ current_normalized_length = 0
+ emitted_count += 1
+ inner_part
+ else
+ nil
+ end
end
end
end
diff --git a/lib/prism/translation/parser/lexer.rb b/lib/prism/translation/parser/lexer.rb
index f7187b1724..7db519499f 100644
--- a/lib/prism/translation/parser/lexer.rb
+++ b/lib/prism/translation/parser/lexer.rb
@@ -341,6 +341,7 @@ module Prism
when :tRATIONAL
value = parse_rational(value)
when :tSPACE
+ location = range(token.location.start_offset, token.location.start_offset + percent_array_leading_whitespace(value))
value = nil
when :tSTRING_BEG
next_token = lexed[index][0]
@@ -354,11 +355,15 @@ module Prism
location = range(next_location.start_offset, next_location.end_offset)
index += 1
elsif value.start_with?("'", '"', "%")
- if next_token&.type == :STRING_CONTENT && next_token.value.lines.count <= 1 && next_next_token&.type == :STRING_END
- # the parser gem doesn't simplify strings when its value ends in a newline
- if !(string_value = next_token.value).end_with?("\n") && basic_quotes
+ if next_token&.type == :STRING_CONTENT && next_next_token&.type == :STRING_END
+ string_value = next_token.value
+ if simplify_string?(string_value, value)
next_location = token.location.join(next_next_token.location)
- value = unescape_string(string_value, value)
+ if percent_array?(value)
+ value = percent_array_unescape(string_value)
+ else
+ value = unescape_string(string_value, value)
+ end
type = :tSTRING
location = range(next_location.start_offset, next_location.end_offset)
index += 2
@@ -397,16 +402,34 @@ module Prism
quote_stack.push(value)
end
when :tSTRING_CONTENT
+ is_percent_array = percent_array?(quote_stack.last)
+
if (lines = token.value.lines).one?
- # Heredoc interpolation can have multiple STRING_CONTENT nodes on the same line.
- is_first_token_on_line = lexed[index - 1] && token.location.start_line != lexed[index - 2][0].location&.start_line
- # The parser gem only removes indentation when the heredoc is not nested
- not_nested = heredoc_stack.size == 1
- if is_first_token_on_line && not_nested && (current_heredoc = heredoc_stack.last).common_whitespace > 0
- value = trim_heredoc_whitespace(value, current_heredoc)
- end
+ # Prism usually emits a single token for strings with line continuations.
+ # For squiggly heredocs they are not joined so we do that manually here.
+ current_string = +""
+ current_length = 0
+ start_offset = token.location.start_offset
+ while token.type == :STRING_CONTENT
+ current_length += token.value.bytesize
+ # Heredoc interpolation can have multiple STRING_CONTENT nodes on the same line.
+ is_first_token_on_line = lexed[index - 1] && token.location.start_line != lexed[index - 2][0].location&.start_line
+ # The parser gem only removes indentation when the heredoc is not nested
+ not_nested = heredoc_stack.size == 1
+ if is_percent_array
+ value = percent_array_unescape(token.value)
+ elsif is_first_token_on_line && not_nested && (current_heredoc = heredoc_stack.last).common_whitespace > 0
+ value = trim_heredoc_whitespace(token.value, current_heredoc)
+ end
- value = unescape_string(value, quote_stack.last)
+ current_string << unescape_string(value, quote_stack.last)
+ if (backslash_count = token.value[/(\\{1,})\n/, 1]&.length).nil? || backslash_count.even? || !interpolation?(quote_stack.last)
+ tokens << [:tSTRING_CONTENT, [current_string, range(start_offset, start_offset + current_length)]]
+ break
+ end
+ token = lexed[index][0]
+ index += 1
+ end
else
# When the parser gem encounters a line continuation inside of a multiline string,
# it emits a single string node. The backslash (and remaining newline) is removed.
@@ -419,12 +442,10 @@ module Prism
chomped_line = line.chomp
backslash_count = chomped_line[/\\{1,}\z/]&.length || 0
is_interpolation = interpolation?(quote_stack.last)
- is_percent_array = percent_array?(quote_stack.last)
if backslash_count.odd? && (is_interpolation || is_percent_array)
if is_percent_array
- # Remove the last backslash, keep potential newlines
- current_line << line.sub(/(\\)(\r?\n)\z/, '\2')
+ current_line << percent_array_unescape(line)
adjustment += 1
else
chomped_line.delete_suffix!("\\")
@@ -446,8 +467,8 @@ module Prism
adjustment = 0
end
end
- next
end
+ next
when :tSTRING_DVAR
value = nil
when :tSTRING_END
@@ -570,12 +591,13 @@ module Prism
while (lexed[next_token_index] && next_token = lexed[next_token_index][0])
next_token_index += 1
next_next_token = lexed[next_token_index] && lexed[next_token_index][0]
+ first_token_on_line = next_token.location.start_column == 0
# String content inside nested heredocs and interpolation is ignored
if next_token.type == :HEREDOC_START || next_token.type == :EMBEXPR_BEGIN
# When interpolation is the first token of a line there is no string
# content to check against. There will be no common whitespace.
- if nesting_level == 0 && next_token.location.start_column == 0
+ if nesting_level == 0 && first_token_on_line
result = 0
end
nesting_level += 1
@@ -583,7 +605,7 @@ module Prism
nesting_level -= 1
# When we encountered the matching heredoc end, we can exit
break if nesting_level == -1
- elsif next_token.type == :STRING_CONTENT && nesting_level == 0
+ elsif next_token.type == :STRING_CONTENT && nesting_level == 0 && first_token_on_line
common_whitespace = 0
next_token.value[/^\s*/].each_char do |char|
if char == "\t"
@@ -672,50 +694,107 @@ module Prism
while (skipped = scanner.skip_until(/\\/))
# Append what was just skipped over, excluding the found backslash.
result.append_as_bytes(string.byteslice(scanner.pos - skipped, skipped - 1))
-
- # Simple single-character escape sequences like \n
- if (replacement = ESCAPES[scanner.peek(1)])
- result.append_as_bytes(replacement)
- scanner.pos += 1
- elsif (octal = scanner.check(/[0-7]{1,3}/))
- # \nnn
- result.append_as_bytes(octal.to_i(8).chr)
- scanner.pos += octal.bytesize
- elsif (hex = scanner.check(/x([0-9a-fA-F]{1,2})/))
- # \xnn
- result.append_as_bytes(hex[1..].to_i(16).chr)
- scanner.pos += hex.bytesize
- elsif (unicode = scanner.check(/u([0-9a-fA-F]{4})/))
- # \unnnn
- result.append_as_bytes(unicode[1..].hex.chr(Encoding::UTF_8))
- scanner.pos += unicode.bytesize
- elsif scanner.peek(3) == "u{}"
- # https://github.com/whitequark/parser/issues/856
- scanner.pos += 3
- elsif (unicode_parts = scanner.check(/u{.*}/))
- # \u{nnnn ...}
- unicode_parts[2..-2].split.each do |unicode|
- result.append_as_bytes(unicode.hex.chr(Encoding::UTF_8))
- end
- scanner.pos += unicode_parts.bytesize
- end
+ escape_read(result, scanner, false, false)
end
- # Add remainging chars
+ # Add remaining chars
result.append_as_bytes(string.byteslice(scanner.pos..))
-
result.force_encoding(source_buffer.source.encoding)
-
- result
else
delimiters = Regexp.escape("#{delimiter}#{DELIMITER_SYMETRY[delimiter]}")
string.gsub(/\\([\\#{delimiters}])/, '\1')
end
end
+ # Certain strings are merged into a single string token.
+ def simplify_string?(value, quote)
+ case quote
+ when "'"
+ # Only simplify 'foo'
+ !value.include?("\n")
+ when '"'
+ # Simplify when every line ends with a line continuation, or it is the last line
+ value.lines.all? do |line|
+ !line.end_with?("\n") || line[/(\\*)$/, 1]&.length&.odd?
+ end
+ else
+ # %q and similar are never simplified
+ false
+ end
+ end
+
+ # Escape a byte value, given the control and meta flags.
+ def escape_build(value, control, meta)
+ value &= 0x9f if control
+ value |= 0x80 if meta
+ value
+ end
+
+ # Read an escape out of the string scanner, given the control and meta
+ # flags, and push the unescaped value into the result.
+ def escape_read(result, scanner, control, meta)
+ if scanner.skip("\n")
+ # Line continuation
+ elsif (value = ESCAPES[scanner.peek(1)])
+ # Simple single-character escape sequences like \n
+ result.append_as_bytes(value)
+ scanner.pos += 1
+ elsif (value = scanner.scan(/[0-7]{1,3}/))
+ # \nnn
+ result.append_as_bytes(escape_build(value.to_i(8), control, meta))
+ elsif (value = scanner.scan(/x[0-9a-fA-F]{1,2}/))
+ # \xnn
+ result.append_as_bytes(escape_build(value[1..].to_i(16), control, meta))
+ elsif (value = scanner.scan(/u[0-9a-fA-F]{4}/))
+ # \unnnn
+ result.append_as_bytes(value[1..].hex.chr(Encoding::UTF_8))
+ elsif scanner.skip("u{}")
+ # https://github.com/whitequark/parser/issues/856
+ elsif (value = scanner.scan(/u{.*?}/))
+ # \u{nnnn ...}
+ value[2..-2].split.each do |unicode|
+ result.append_as_bytes(unicode.hex.chr(Encoding::UTF_8))
+ end
+ elsif (value = scanner.scan(/c\\?(?=[[:print:]])|C-\\?(?=[[:print:]])/))
+ # \cx or \C-x where x is an ASCII printable character
+ escape_read(result, scanner, true, meta)
+ elsif (value = scanner.scan(/M-\\?(?=[[:print:]])/))
+ # \M-x where x is an ASCII printable character
+ escape_read(result, scanner, control, true)
+ elsif (byte = scanner.get_byte)
+ # Something else after an escape.
+ if control && byte == "?"
+ result.append_as_bytes(escape_build(0x7f, false, meta))
+ else
+ result.append_as_bytes(escape_build(byte.ord, control, meta))
+ end
+ end
+ end
+
+ # In a percent array, certain whitespace can be preceeded with a backslash,
+ # causing the following characters to be part of the previous element.
+ def percent_array_unescape(string)
+ string.gsub(/(\\)+[ \f\n\r\t\v]/) do |full_match|
+ full_match.delete_prefix!("\\") if Regexp.last_match[1].length.odd?
+ full_match
+ end
+ end
+
+ # For %-arrays whitespace, the parser gem only considers whitespace before the newline.
+ def percent_array_leading_whitespace(string)
+ return 1 if string.start_with?("\n")
+
+ leading_whitespace = 0
+ string.each_char do |c|
+ break if c == "\n"
+ leading_whitespace += 1
+ end
+ leading_whitespace
+ end
+
# Determine if characters preceeded by a backslash should be escaped or not
def interpolation?(quote)
- quote != "'" && !quote.start_with?("%q", "%w", "%i")
+ !quote.end_with?("'") && !quote.start_with?("%q", "%w", "%i", "%s")
end
# Regexp allow interpolation but are handled differently during unescaping
diff --git a/test/prism/fixtures/heredocs_with_fake_newlines.txt b/test/prism/fixtures/heredocs_with_fake_newlines.txt
new file mode 100644
index 0000000000..887b7ab5e7
--- /dev/null
+++ b/test/prism/fixtures/heredocs_with_fake_newlines.txt
@@ -0,0 +1,55 @@
+<<-RUBY
+ \n
+ \n
+ exit
+ \\n
+ \n\n\n\n
+ argh
+ \\
+ \\\
+ foo\nbar
+ \f
+ ok
+RUBY
+
+<<~RUBY
+ \n
+ \n
+ exit
+ \\n
+ \n\n\n\n
+ argh
+ \\
+ \\\
+ foo\nbar
+ \f
+ ok
+RUBY
+
+<<~RUBY
+ #{123}\n
+ \n
+ exit
+ \\#{123}n
+ \n#{123}\n\n\n
+ argh
+ \\#{123}baz
+ \\\
+ foo\nbar
+ \f
+ ok
+RUBY
+
+<<'RUBY'
+ \n
+ \n
+ exit
+ \n
+ \n\n\n\n
+ argh
+ \
+ \
+ foo\nbar
+ \f
+ ok
+RUBY
diff --git a/test/prism/fixtures/regex_with_fake_newlines.txt b/test/prism/fixtures/regex_with_fake_newlines.txt
new file mode 100644
index 0000000000..d92a2e4ade
--- /dev/null
+++ b/test/prism/fixtures/regex_with_fake_newlines.txt
@@ -0,0 +1,41 @@
+/
+ \n
+ \n
+ exit
+ \\n
+ \n\n\n\n
+ argh
+ \\
+ \\\
+ foo\nbar
+ \f
+ ok
+/
+
+%r{
+ \n
+ \n
+ exit
+ \\n
+ \n\n\n\n
+ argh
+ \\
+ \\\
+ foo\nbar
+ \f
+ ok
+}
+
+%r{
+ #{123}\n
+ \n
+ exit\\\
+ \\#{123}n
+ \n#{123}\n\n\n
+ argh\
+ \\#{123}baz\\
+ \\\
+ foo\nbar
+ \f
+ ok
+}
diff --git a/test/prism/fixtures/strings.txt b/test/prism/fixtures/strings.txt
index 030f15a2c9..0787152786 100644
--- a/test/prism/fixtures/strings.txt
+++ b/test/prism/fixtures/strings.txt
@@ -45,6 +45,10 @@ foo\
b\nar
"
+"foo
+\nbar\n\n
+a\nb\n\nc\n"
+
%q{abc}
%s[abc]
@@ -69,6 +73,34 @@ b\nar
%w[foo\ bar baz]
+%w[foo\ bar\\ baz\\\
+ bat]
+
+%W[#{foo}\
+bar
+baz #{bat}
+]
+
+%w(foo\n)
+
+%w(foo\
+)
+
+%w(foo \n)
+
+%W(foo\
+bar)
+
+%w[foo bar]
+
+%w[
+ a
+ b c
+ d
+]
+
+%W[f\u{006f 006f}]
+
%W[a b#{c}d e]
%W[a b c]
@@ -98,6 +130,8 @@ baz
"ち\xE3\x81\xFF"
+"\777"
+
%[abc]
%(abc)
diff --git a/test/prism/lex_test.rb b/test/prism/lex_test.rb
index 0e03874a15..2786c45a22 100644
--- a/test/prism/lex_test.rb
+++ b/test/prism/lex_test.rb
@@ -15,7 +15,11 @@ module Prism
# the heredoc are combined into a single token. See
# https://bugs.ruby-lang.org/issues/19838.
"spanning_heredoc.txt",
- "spanning_heredoc_newlines.txt"
+ "spanning_heredoc_newlines.txt",
+ # Prism emits a single :on_tstring_content in <<- style heredocs when there
+ # is a line continuation preceeded by escaped backslashes. It should emit two, same
+ # as if the backslashes are not present.
+ "heredocs_with_fake_newlines.txt",
]
if RUBY_VERSION < "3.3.0"
diff --git a/test/prism/ruby/parser_test.rb b/test/prism/ruby/parser_test.rb
index 274730d641..5469e610fa 100644
--- a/test/prism/ruby/parser_test.rb
+++ b/test/prism/ruby/parser_test.rb
@@ -88,10 +88,7 @@ module Prism
# skip them for now.
skip_all = skip_incorrect | [
"unescaping.txt",
- "seattlerb/pctW_lineno.txt",
"seattlerb/regexp_esc_C_slash.txt",
- "unparser/corpus/literal/literal.txt",
- "whitequark/parser_slash_slash_n_escaping_in_literals.txt",
]
# Not sure why these files are failing on JRuby, but skipping them for now.
@@ -104,7 +101,6 @@ module Prism
skip_tokens = [
"dash_heredocs.txt",
"embdoc_no_newline_at_end.txt",
- "heredocs_with_ignored_newlines.txt",
"methods.txt",
"strings.txt",
"seattlerb/bug169.txt",
@@ -116,9 +112,9 @@ module Prism
"seattlerb/parse_line_heredoc.txt",
"seattlerb/pct_w_heredoc_interp_nested.txt",
"seattlerb/required_kwarg_no_value.txt",
- "seattlerb/slashy_newlines_within_string.txt",
"seattlerb/TestRubyParserShared.txt",
"unparser/corpus/literal/assignment.txt",
+ "unparser/corpus/literal/literal.txt",
"whitequark/args.txt",
"whitequark/beginless_erange_after_newline.txt",
"whitequark/beginless_irange_after_newline.txt",
@@ -127,13 +123,11 @@ module Prism
"whitequark/lbrace_arg_after_command_args.txt",
"whitequark/multiple_pattern_matches.txt",
"whitequark/newline_in_hash_argument.txt",
- "whitequark/parser_bug_640.txt",
"whitequark/pattern_matching_expr_in_paren.txt",
"whitequark/pattern_matching_hash.txt",
"whitequark/pin_expr.txt",
"whitequark/ruby_bug_14690.txt",
"whitequark/ruby_bug_9669.txt",
- "whitequark/slash_newline_in_heredocs.txt",
"whitequark/space_args_arg_block.txt",
"whitequark/space_args_block.txt"
]
diff --git a/test/prism/ruby/ripper_test.rb b/test/prism/ruby/ripper_test.rb
index 7ed32ed216..4afe377038 100644
--- a/test/prism/ruby/ripper_test.rb
+++ b/test/prism/ruby/ripper_test.rb
@@ -32,6 +32,7 @@ module Prism
# Skip these tests that we haven't implemented yet.
omitted = [
"dos_endings.txt",
+ "heredocs_with_fake_newlines.txt",
"heredocs_with_ignored_newlines.txt",
"seattlerb/block_call_dot_op2_brace_block.txt",
"seattlerb/block_command_operation_colon.txt",
diff --git a/test/prism/ruby/ruby_parser_test.rb b/test/prism/ruby/ruby_parser_test.rb
index fd1dbf1ac8..a92e8080de 100644
--- a/test/prism/ruby/ruby_parser_test.rb
+++ b/test/prism/ruby/ruby_parser_test.rb
@@ -53,6 +53,7 @@ module Prism
"alias.txt",
"dsym_str.txt",
"dos_endings.txt",
+ "heredocs_with_fake_newlines.txt",
"heredocs_with_ignored_newlines.txt",
"method_calls.txt",
"methods.txt",