diff options
author | Earlopain <[email protected]> | 2025-01-11 22:25:09 +0100 |
---|---|---|
committer | git <[email protected]> | 2025-01-12 00:49:54 +0000 |
commit | 723f31cf6ba307df0058426b515a2b578340b37b (patch) | |
tree | f30c44bda11a4e30af90cc5776ed43d9a2f26184 /lib | |
parent | 8e56d9e4156f2c609636603ca0ccbcb8db6340b1 (diff) |
[ruby/prism] Fix binary encoding for the parser translator
Skipping detecting the encoding is almost always right, just for binary it should actually happen.
A symbol containing escapes that are invalid
in utf-8 would fail to parse since symbols must be valid in the script encoding.
Additionally, the parser gem would raise an exception somewhere during string handling
https://github.com/ruby/prism/commit/fa0154d9e4
Diffstat (limited to 'lib')
-rw-r--r-- | lib/prism/translation/parser.rb | 20 |
1 files changed, 17 insertions, 3 deletions
diff --git a/lib/prism/translation/parser.rb b/lib/prism/translation/parser.rb index 4f28afa86f..6b417be423 100644 --- a/lib/prism/translation/parser.rb +++ b/lib/prism/translation/parser.rb @@ -51,7 +51,7 @@ module Prism source = source_buffer.source offset_cache = build_offset_cache(source) - result = unwrap(Prism.parse(source, filepath: source_buffer.name, version: convert_for_prism(version), partial_script: true, encoding: false), offset_cache) + result = unwrap(Prism.parse(source, **prism_options), offset_cache) build_ast(result.value, offset_cache) ensure @@ -64,7 +64,7 @@ module Prism source = source_buffer.source offset_cache = build_offset_cache(source) - result = unwrap(Prism.parse(source, filepath: source_buffer.name, version: convert_for_prism(version), partial_script: true, encoding: false), offset_cache) + result = unwrap(Prism.parse(source, **prism_options), offset_cache) [ build_ast(result.value, offset_cache), @@ -83,7 +83,7 @@ module Prism offset_cache = build_offset_cache(source) result = begin - unwrap(Prism.parse_lex(source, filepath: source_buffer.name, version: convert_for_prism(version), partial_script: true, encoding: false), offset_cache) + unwrap(Prism.parse_lex(source, **prism_options), offset_cache) rescue ::Parser::SyntaxError raise if !recover end @@ -285,6 +285,20 @@ module Prism ) end + # Options for how prism should parse/lex the source. + def prism_options + options = { + filepath: @source_buffer.name, + version: convert_for_prism(version), + partial_script: true, + } + # The parser gem always encodes to UTF-8, unless it is binary. + # https://github.com/whitequark/parser/blob/v3.3.6.0/lib/parser/source/buffer.rb#L80-L107 + options[:encoding] = false if @source_buffer.source.encoding != Encoding::BINARY + + options + end + # Converts the version format handled by Parser to the format handled by Prism. def convert_for_prism(version) case version |