summaryrefslogtreecommitdiff
path: root/encoding.c
diff options
context:
space:
mode:
authorJean Boussier <[email protected]>2024-11-13 09:34:45 +0100
committerJean Boussier <[email protected]>2024-11-13 13:32:32 +0100
commitfae86a701edf9afef6b05199fe8f6651b1e155ea (patch)
treec78e7ffdcc5c612d31b816f0fb31d298a5ece468 /encoding.c
parentbfb4783c0133e2b3b39093c439100dbe1b546c4e (diff)
string.c: Directly create strings with the correct encoding
While profiling msgpack-ruby I noticed a very substantial amout of time spent in `rb_enc_associate_index`, called by `rb_utf8_str_new`. On that benchmark, `rb_utf8_str_new` is 33% of the total runtime, in big part because it cause GC to trigger often, but even then `5.3%` of the total runtime is spent in `rb_enc_associate_index` called by `rb_utf8_str_new`. After closer inspection, it appears that it's performing a lot of safety check we can assert we don't need, and other extra useless operations, because strings are first created and filled as ASCII-8BIT and then later reassociated to the desired encoding. By directly allocating the string with the right encoding, it allow to skip a lot of duplicated and useless operations. After this change, the time spent in `rb_utf8_str_new` is down to `28.4%` of total runtime, and most of that is GC.
Notes
Notes: Merged: https://github.com/ruby/ruby/pull/12076
Diffstat (limited to 'encoding.c')
-rw-r--r--encoding.c15
1 files changed, 15 insertions, 0 deletions
diff --git a/encoding.c b/encoding.c
index d4fe6ea124..e6b49ef145 100644
--- a/encoding.c
+++ b/encoding.c
@@ -968,6 +968,21 @@ enc_set_index(VALUE obj, int idx)
}
void
+rb_enc_raw_set(VALUE obj, rb_encoding *enc)
+{
+ RUBY_ASSERT(enc_capable(obj));
+
+ int idx = enc ? ENC_TO_ENCINDEX(enc) : 0;
+
+ if (idx < ENCODING_INLINE_MAX) {
+ ENCODING_SET_INLINED(obj, idx);
+ return;
+ }
+ ENCODING_SET_INLINED(obj, ENCODING_INLINE_MAX);
+ rb_ivar_set(obj, rb_id_encoding(), INT2NUM(idx));
+}
+
+void
rb_enc_set_index(VALUE obj, int idx)
{
rb_check_frozen(obj);