summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorEarlopain <[email protected]>2025-01-11 22:25:09 +0100
committergit <[email protected]>2025-01-12 00:49:54 +0000
commit723f31cf6ba307df0058426b515a2b578340b37b (patch)
treef30c44bda11a4e30af90cc5776ed43d9a2f26184 /lib
parent8e56d9e4156f2c609636603ca0ccbcb8db6340b1 (diff)
[ruby/prism] Fix binary encoding for the parser translator
Skipping detecting the encoding is almost always right, just for binary it should actually happen. A symbol containing escapes that are invalid in utf-8 would fail to parse since symbols must be valid in the script encoding. Additionally, the parser gem would raise an exception somewhere during string handling https://github.com/ruby/prism/commit/fa0154d9e4
Diffstat (limited to 'lib')
-rw-r--r--lib/prism/translation/parser.rb20
1 files changed, 17 insertions, 3 deletions
diff --git a/lib/prism/translation/parser.rb b/lib/prism/translation/parser.rb
index 4f28afa86f..6b417be423 100644
--- a/lib/prism/translation/parser.rb
+++ b/lib/prism/translation/parser.rb
@@ -51,7 +51,7 @@ module Prism
source = source_buffer.source
offset_cache = build_offset_cache(source)
- result = unwrap(Prism.parse(source, filepath: source_buffer.name, version: convert_for_prism(version), partial_script: true, encoding: false), offset_cache)
+ result = unwrap(Prism.parse(source, **prism_options), offset_cache)
build_ast(result.value, offset_cache)
ensure
@@ -64,7 +64,7 @@ module Prism
source = source_buffer.source
offset_cache = build_offset_cache(source)
- result = unwrap(Prism.parse(source, filepath: source_buffer.name, version: convert_for_prism(version), partial_script: true, encoding: false), offset_cache)
+ result = unwrap(Prism.parse(source, **prism_options), offset_cache)
[
build_ast(result.value, offset_cache),
@@ -83,7 +83,7 @@ module Prism
offset_cache = build_offset_cache(source)
result =
begin
- unwrap(Prism.parse_lex(source, filepath: source_buffer.name, version: convert_for_prism(version), partial_script: true, encoding: false), offset_cache)
+ unwrap(Prism.parse_lex(source, **prism_options), offset_cache)
rescue ::Parser::SyntaxError
raise if !recover
end
@@ -285,6 +285,20 @@ module Prism
)
end
+ # Options for how prism should parse/lex the source.
+ def prism_options
+ options = {
+ filepath: @source_buffer.name,
+ version: convert_for_prism(version),
+ partial_script: true,
+ }
+ # The parser gem always encodes to UTF-8, unless it is binary.
+ # https://github.com/whitequark/parser/blob/v3.3.6.0/lib/parser/source/buffer.rb#L80-L107
+ options[:encoding] = false if @source_buffer.source.encoding != Encoding::BINARY
+
+ options
+ end
+
# Converts the version format handled by Parser to the format handled by Prism.
def convert_for_prism(version)
case version