diff options
author | Jean Boussier <[email protected]> | 2024-08-09 15:24:49 +0200 |
---|---|---|
committer | Jean Boussier <[email protected]> | 2024-08-09 22:06:44 +0200 |
commit | a332367dad3172e8a8c12efed3913c7fde684b06 (patch) | |
tree | 9b150e58a54cf5d07b56b2a98ec01430e5f07855 | |
parent | 5a570421a5a773945aa8f9497b5d64bac809a196 (diff) |
string.c: Add fastpath to single_byte_optimizable
`rb_enc_from_index` is a costly operation so it is worth avoiding
to call it for the common encodings.
Also in the case of UTF-8, it's more efficient to scan the
coderange if it is unknown that to fallback to the slower
algorithms.
Notes
Notes:
Merged: https://github.com/ruby/ruby/pull/11353
-rw-r--r-- | string.c | 26 |
1 files changed, 17 insertions, 9 deletions
@@ -594,22 +594,30 @@ fstring_cmp(VALUE a, VALUE b) memcmp(aptr, bptr, alen) != 0); } -static inline int +static inline bool single_byte_optimizable(VALUE str) { - rb_encoding *enc; - + int encindex = ENCODING_GET(str); + switch (encindex) { + case ENCINDEX_ASCII_8BIT: + case ENCINDEX_US_ASCII: + return true; + case ENCINDEX_UTF_8: + // For UTF-8 it's worth scanning the string coderange when unknown. + return rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT; + } /* Conservative. It may be ENC_CODERANGE_UNKNOWN. */ - if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT) - return 1; + if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT) { + return true; + } - enc = STR_ENC_GET(str); - if (rb_enc_mbmaxlen(enc) == 1) - return 1; + if (rb_enc_mbmaxlen(rb_enc_from_index(encindex)) == 1) { + return true; + } /* Conservative. Possibly single byte. * "\xa1" in Shift_JIS for example. */ - return 0; + return false; } VALUE rb_fs; |