diff options
author | Jean Boussier <[email protected]> | 2025-02-10 17:30:34 +0100 |
---|---|---|
committer | Jean Boussier <[email protected]> | 2025-02-12 10:23:50 +0100 |
commit | f32d5071b7b01f258eb45cf533496d82d5c0f6a1 (patch) | |
tree | 8d06f73cf4b023e1a990708add70118f47994829 /string.c | |
parent | b8db606d2c721b4c3a439b9602771623ac01b288 (diff) |
Elide string allocation when using `String#gsub` in MAP mode
If the provided Hash doesn't have a default proc, we know for
sure that we'll never call into user provided code, hence the
string we allocate to access the Hash can't possibly escape.
So we don't actually have to allocate it, we can use a fake_str,
AKA a stack allocated string.
```
compare-ruby: ruby 3.5.0dev (2025-02-10T13:47:44Z master 3fb455adab) +PRISM [arm64-darwin23]
built-ruby: ruby 3.5.0dev (2025-02-10T17:09:52Z opt-gsub-alloc ea5c28958f) +PRISM [arm64-darwin23]
warming up....
| |compare-ruby|built-ruby|
|:----------------|-----------:|---------:|
|escape | 3.374k| 3.722k|
| | -| 1.10x|
|escape_bin | 5.469k| 6.587k|
| | -| 1.20x|
|escape_utf8 | 3.465k| 3.734k|
| | -| 1.08x|
|escape_utf8_bin | 5.752k| 7.283k|
| | -| 1.27x|
```
Notes
Notes:
Merged: https://github.com/ruby/ruby/pull/12730
Diffstat (limited to 'string.c')
-rw-r--r-- | string.c | 19 |
1 files changed, 17 insertions, 2 deletions
@@ -31,6 +31,7 @@ #include "internal/encoding.h" #include "internal/error.h" #include "internal/gc.h" +#include "internal/hash.h" #include "internal/numeric.h" #include "internal/object.h" #include "internal/proc.h" @@ -6295,7 +6296,7 @@ str_gsub(int argc, VALUE *argv, VALUE str, int bang) VALUE pat, val = Qnil, repl, match0 = Qnil, dest, hash = Qnil; long beg, beg0, end0; long offset, blen, slen, len, last; - enum {STR, ITER, MAP} mode = STR; + enum {STR, ITER, FAST_MAP, MAP} mode = STR; char *sp, *cp; int need_backref = -1; rb_encoding *str_enc; @@ -6311,6 +6312,9 @@ str_gsub(int argc, VALUE *argv, VALUE str, int bang) if (NIL_P(hash)) { StringValue(repl); } + else if (rb_hash_default_unredefined(hash) && !FL_TEST_RAW(hash, RHASH_PROC_DEFAULT)) { + mode = FAST_MAP; + } else { mode = MAP; } @@ -6355,7 +6359,18 @@ str_gsub(int argc, VALUE *argv, VALUE str, int bang) val = rb_obj_as_string(rb_yield(match0)); } else { - val = rb_hash_aref(hash, rb_str_subseq(str, beg0, end0 - beg0)); + struct RString fake_str; + VALUE key; + if (mode == FAST_MAP) { + // It is safe to use a fake_str here because we established that it won't escape, + // as it's only used for `rb_hash_aref` and we checked the hash doesn't have a + // default proc. + key = setup_fake_str(&fake_str, sp + beg0, end0 - beg0, ENCODING_GET_INLINED(str)); + } + else { + key = rb_str_subseq(str, beg0, end0 - beg0); + } + val = rb_hash_aref(hash, key); val = rb_obj_as_string(val); } str_mod_check(str, sp, slen); |