summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Newton <[email protected]>2024-02-09 11:16:03 -0500
committergit <[email protected]>2024-02-09 16:27:05 +0000
commitcf1cd215c0a057da123ec9753091154230b3dc97 (patch)
tree68957677cae8e9df76d5d8691265e71cfc6741e1
parent5c2d96df194abcb7d9d6f154635c30f7d8811c13 (diff)
[ruby/prism] Significantly faster offset cache for parser
https://github.com/ruby/prism/commit/8cd92eef79
-rw-r--r--lib/prism/translation/parser.rb19
1 files changed, 10 insertions, 9 deletions
diff --git a/lib/prism/translation/parser.rb b/lib/prism/translation/parser.rb
index 6723216d00..6e678dde6b 100644
--- a/lib/prism/translation/parser.rb
+++ b/lib/prism/translation/parser.rb
@@ -124,20 +124,21 @@ module Prism
# build the parser gem AST.
#
# If the bytesize of the source is the same as the length, then we can
- # just use the offset directly. Otherwise, we build a hash that functions
- # as a cache for the conversion.
- #
- # This is a good opportunity for some optimizations. If the source file
- # has any multi-byte characters, this can tank the performance of the
- # translator. We could make this significantly faster by using a
- # different data structure for the cache.
+ # just use the offset directly. Otherwise, we build an array where the
+ # index is the byte offset and the value is the character offset.
def build_offset_cache(source)
if source.bytesize == source.length
-> (offset) { offset }
else
- Hash.new do |hash, offset|
- hash[offset] = source.byteslice(0, offset).length
+ offset_cache = []
+ offset = 0
+
+ source.each_char do |char|
+ char.bytesize.times { offset_cache << offset }
+ offset += 1
end
+
+ offset_cache << offset
end
end