summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/irb/ruby-lex.rb37
-rw-r--r--test/irb/test_ruby_lex.rb21
2 files changed, 57 insertions, 1 deletions
diff --git a/lib/irb/ruby-lex.rb b/lib/irb/ruby-lex.rb
index 77c5b07ae9..333d4ac452 100644
--- a/lib/irb/ruby-lex.rb
+++ b/lib/irb/ruby-lex.rb
@@ -124,9 +124,42 @@ class RubyLex
"#{local_variables.join('=')}=nil;" unless local_variables.empty?
end
+ # Some part of the code is not included in Ripper's token.
+ # Example: DATA part, token after heredoc_beg when heredoc has unclosed embexpr.
+ # With interpolated tokens, tokens.map(&:tok).join will be equal to code.
+ def self.interpolate_ripper_ignored_tokens(code, tokens)
+ line_positions = [0]
+ code.lines.each do |line|
+ line_positions << line_positions.last + line.bytesize
+ end
+ prev_byte_pos = 0
+ interpolated = []
+ prev_line = 1
+ tokens.each do |t|
+ line, col = t.pos
+ byte_pos = line_positions[line - 1] + col
+ if prev_byte_pos < byte_pos
+ tok = code.byteslice(prev_byte_pos...byte_pos)
+ pos = [prev_line, prev_byte_pos - line_positions[prev_line - 1]]
+ interpolated << Ripper::Lexer::Elem.new(pos, :on_ignored_by_ripper, tok, 0)
+ prev_line += tok.count("\n")
+ end
+ interpolated << t
+ prev_byte_pos = byte_pos + t.tok.bytesize
+ prev_line += t.tok.count("\n")
+ end
+ if prev_byte_pos < code.bytesize
+ tok = code.byteslice(prev_byte_pos..)
+ pos = [prev_line, prev_byte_pos - line_positions[prev_line - 1]]
+ interpolated << Ripper::Lexer::Elem.new(pos, :on_ignored_by_ripper, tok, 0)
+ end
+ interpolated
+ end
+
def self.ripper_lex_without_warning(code, context: nil)
verbose, $VERBOSE = $VERBOSE, nil
lvars_code = generate_local_variables_assign_code(context&.local_variables || [])
+ original_code = code
if lvars_code
code = "#{lvars_code}\n#{code}"
line_no = 0
@@ -136,7 +169,8 @@ class RubyLex
compile_with_errors_suppressed(code, line_no: line_no) do |inner_code, line_no|
lexer = Ripper::Lexer.new(inner_code, '-', line_no)
- lexer.scan.each_with_object([]) do |t, tokens|
+ tokens = []
+ lexer.scan.each do |t|
next if t.pos.first == 0
prev_tk = tokens.last
position_overlapped = prev_tk && t.pos[0] == prev_tk.pos[0] && t.pos[1] < prev_tk.pos[1] + prev_tk.tok.bytesize
@@ -146,6 +180,7 @@ class RubyLex
tokens << t
end
end
+ interpolate_ripper_ignored_tokens(original_code, tokens)
end
ensure
$VERBOSE = verbose
diff --git a/test/irb/test_ruby_lex.rb b/test/irb/test_ruby_lex.rb
index 9d7910cca6..5630dd2953 100644
--- a/test/irb/test_ruby_lex.rb
+++ b/test/irb/test_ruby_lex.rb
@@ -103,6 +103,27 @@ module TestIRB
[indent, code_block_open]
end
+ def test_interpolate_token_with_heredoc_and_unclosed_embexpr
+ code = <<~'EOC'
+ ①+<<A-②
+ #{③*<<B/④
+ #{⑤&<<C|⑥
+ EOC
+ ripper_tokens = Ripper.tokenize(code)
+ rubylex_tokens = RubyLex.ripper_lex_without_warning(code)
+ # Assert no missing part
+ assert_equal(code, rubylex_tokens.map(&:tok).join)
+ # Assert ripper tokens are not removed
+ ripper_tokens.each do |tok|
+ assert(rubylex_tokens.any? { |t| t.tok == tok && t.tok != :on_ignored_by_ripper })
+ end
+ # Assert interpolated token position
+ rubylex_tokens.each do |t|
+ row, col = t.pos
+ assert_equal t.tok, code.lines[row - 1].byteslice(col, t.tok.bytesize)
+ end
+ end
+
def test_auto_indent
input_with_correct_indents = [
Row.new(%q(def each_top_level_statement), nil, 2),