string.c: Add fastpath to single_byte_optimizable

`rb_enc_from_index` is a costly operation so it is worth avoiding to call it for the common encodings. Also in the case of UTF-8, it's more efficient to scan the coderange if it is unknown that to fallback to the slower algorithms.
author: Jean Boussier <[email protected]> 2024-08-09 15:24:49 +0200
committer: Jean Boussier <[email protected]> 2024-08-09 22:06:44 +0200
commit: a332367dad3172e8a8c12efed3913c7fde684b06 (patch)
tree: 9b150e58a54cf5d07b56b2a98ec01430e5f07855
parent: 5a570421a5a773945aa8f9497b5d64bac809a196 (diff)
1 files changed, 17 insertions, 9 deletions
diff --git a/string.c b/string.c
index 5dd0496ca8..5db35724e1 100644
--- a/string.c
+++ b/string.c
@@ -594,22 +594,30 @@ fstring_cmp(VALUE a, VALUE b)
             memcmp(aptr, bptr, alen) != 0);
 }
 
-static inline int
+static inline bool
 single_byte_optimizable(VALUE str)
 {
-    rb_encoding *enc;
-
+    int encindex = ENCODING_GET(str);
+    switch (encindex) {
+      case ENCINDEX_ASCII_8BIT:
+      case ENCINDEX_US_ASCII:
+        return true;
+      case ENCINDEX_UTF_8:
+        // For UTF-8 it's worth scanning the string coderange when unknown.
+        return rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT;
+    }
     /* Conservative.  It may be ENC_CODERANGE_UNKNOWN. */
-    if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT)
-        return 1;
+    if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT) {
+        return true;
+    }
 
-    enc = STR_ENC_GET(str);
-    if (rb_enc_mbmaxlen(enc) == 1)
-        return 1;
+    if (rb_enc_mbmaxlen(rb_enc_from_index(encindex)) == 1) {
+        return true;
+    }
 
     /* Conservative.  Possibly single byte.
      * "\xa1" in Shift_JIS for example. */
-    return 0;
+    return false;
 }
 
 VALUE rb_fs;
author	Jean Boussier <[email protected]>	2024-08-09 15:24:49 +0200
committer	Jean Boussier <[email protected]>	2024-08-09 22:06:44 +0200
commit	a332367dad3172e8a8c12efed3913c7fde684b06 (patch)
tree	9b150e58a54cf5d07b56b2a98ec01430e5f07855
parent	5a570421a5a773945aa8f9497b5d64bac809a196 (diff)