diff options
author | Kevin Newton <[email protected]> | 2024-02-09 11:16:03 -0500 |
---|---|---|
committer | git <[email protected]> | 2024-02-09 16:27:05 +0000 |
commit | cf1cd215c0a057da123ec9753091154230b3dc97 (patch) | |
tree | 68957677cae8e9df76d5d8691265e71cfc6741e1 | |
parent | 5c2d96df194abcb7d9d6f154635c30f7d8811c13 (diff) |
[ruby/prism] Significantly faster offset cache for parser
https://github.com/ruby/prism/commit/8cd92eef79
-rw-r--r-- | lib/prism/translation/parser.rb | 19 |
1 files changed, 10 insertions, 9 deletions
diff --git a/lib/prism/translation/parser.rb b/lib/prism/translation/parser.rb index 6723216d00..6e678dde6b 100644 --- a/lib/prism/translation/parser.rb +++ b/lib/prism/translation/parser.rb @@ -124,20 +124,21 @@ module Prism # build the parser gem AST. # # If the bytesize of the source is the same as the length, then we can - # just use the offset directly. Otherwise, we build a hash that functions - # as a cache for the conversion. - # - # This is a good opportunity for some optimizations. If the source file - # has any multi-byte characters, this can tank the performance of the - # translator. We could make this significantly faster by using a - # different data structure for the cache. + # just use the offset directly. Otherwise, we build an array where the + # index is the byte offset and the value is the character offset. def build_offset_cache(source) if source.bytesize == source.length -> (offset) { offset } else - Hash.new do |hash, offset| - hash[offset] = source.byteslice(0, offset).length + offset_cache = [] + offset = 0 + + source.each_char do |char| + char.bytesize.times { offset_cache << offset } + offset += 1 end + + offset_cache << offset end end |