summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJean Boussier <[email protected]>2024-09-03 09:38:18 +0200
committerJean Boussier <[email protected]>2024-09-03 12:21:36 +0200
commitb7fa2dd0d0b7328f8f713b7cf0332d1e34c20fe4 (patch)
treea4ac16a74fbe895602302b3950efaaa8db4464ed
parent245ed2fc890f7f33526e1c0f42b79c30366493e6 (diff)
rb_enc_str_asciionly_p: avoid always fetching the encoding
Profiling of `JSON.dump` shows a significant amount of time spent in `rb_enc_str_asciionly_p`, in large part because it fetches the encoding. It can be made twice as fast in this scenario by first checking the coderange and only falling back to fetching the encoding if the coderange is unknown. Additionally we can skip fetching the encoding for the common popular encodings.
Notes
Notes: Merged: https://github.com/ruby/ruby/pull/11533
-rw-r--r--string.c32
1 files changed, 23 insertions, 9 deletions
diff --git a/string.c b/string.c
index 025382c0f4..0dd02f8253 100644
--- a/string.c
+++ b/string.c
@@ -137,10 +137,10 @@ VALUE rb_cSymbol;
} while (0)
static inline bool
-str_enc_fastpath(VALUE str)
+str_encindex_fastpath(int encindex)
{
// The overwhelming majority of strings are in one of these 3 encodings.
- switch (ENCODING_GET_INLINED(str)) {
+ switch (encindex) {
case ENCINDEX_ASCII_8BIT:
case ENCINDEX_UTF_8:
case ENCINDEX_US_ASCII:
@@ -150,6 +150,12 @@ str_enc_fastpath(VALUE str)
}
}
+static inline bool
+str_enc_fastpath(VALUE str)
+{
+ return str_encindex_fastpath(ENCODING_GET_INLINED(str));
+}
+
#define TERM_LEN(str) (str_enc_fastpath(str) ? 1 : rb_enc_mbminlen(rb_enc_from_index(ENCODING_GET(str))))
#define TERM_FILL(ptr, termlen) do {\
char *const term_fill_ptr = (ptr);\
@@ -862,16 +868,24 @@ rb_enc_str_coderange(VALUE str)
return cr;
}
+static inline bool
+rb_enc_str_asciicompat(VALUE str)
+{
+ int encindex = ENCODING_GET_INLINED(str);
+ return str_encindex_fastpath(encindex) || rb_enc_asciicompat(rb_enc_get_from_index(encindex));
+}
+
int
rb_enc_str_asciionly_p(VALUE str)
{
- rb_encoding *enc = STR_ENC_GET(str);
-
- if (!rb_enc_asciicompat(enc))
- return FALSE;
- else if (is_ascii_string(str))
- return TRUE;
- return FALSE;
+ switch(ENC_CODERANGE(str)) {
+ case ENC_CODERANGE_UNKNOWN:
+ return rb_enc_str_asciicompat(str) && is_ascii_string(str);
+ case ENC_CODERANGE_7BIT:
+ return true;
+ default:
+ return false;
+ }
}
static inline void