summaryrefslogtreecommitdiff
path: root/string.c
diff options
context:
space:
mode:
Diffstat (limited to 'string.c')
-rw-r--r--string.c106
1 files changed, 90 insertions, 16 deletions
diff --git a/string.c b/string.c
index d43a6391be..98dbf1b706 100644
--- a/string.c
+++ b/string.c
@@ -89,6 +89,9 @@ VALUE rb_cSymbol;
* another string (the shared root).
* 3: STR_CHILLED (will be frozen in a future version)
* The string appears frozen but can be mutated with a warning.
+ * 4: STR_PRECOMPUTED_HASH
+ * The string is embedded and has its precomputed hascode stored
+ * after the terminator.
* 5: STR_SHARED_ROOT
* Other strings may point to the contents of this string. When this
* flag is set, STR_SHARED must not be set.
@@ -116,6 +119,7 @@ VALUE rb_cSymbol;
*/
#define RUBY_MAX_CHAR_LEN 16
+#define STR_PRECOMPUTED_HASH FL_USER4
#define STR_SHARED_ROOT FL_USER5
#define STR_BORROWED FL_USER6
#define STR_TMPLOCK FL_USER7
@@ -240,6 +244,11 @@ rb_str_size_as_embedded(VALUE str)
else {
real_size = sizeof(struct RString);
}
+
+ if (FL_TEST_RAW(str, STR_PRECOMPUTED_HASH)) {
+ real_size += sizeof(st_index_t);
+ }
+
return real_size;
}
@@ -257,6 +266,7 @@ static VALUE str_new(VALUE klass, const char *ptr, long len);
static void str_make_independent_expand(VALUE str, long len, long expand, const int termlen);
static inline void str_modifiable(VALUE str);
static VALUE rb_str_downcase(int argc, VALUE *argv, VALUE str);
+static inline VALUE str_alloc_embed(VALUE klass, size_t capa);
static inline void
str_make_independent(VALUE str)
@@ -334,7 +344,7 @@ mustnot_wchar(VALUE str)
static int fstring_cmp(VALUE a, VALUE b);
-static VALUE register_fstring(VALUE str, bool copy);
+static VALUE register_fstring(VALUE str, bool copy, bool precompute_hash);
const struct st_hash_type rb_fstring_hash_type = {
fstring_cmp,
@@ -343,9 +353,42 @@ const struct st_hash_type rb_fstring_hash_type = {
#define BARE_STRING_P(str) (!FL_ANY_RAW(str, FL_EXIVAR) && RBASIC_CLASS(str) == rb_cString)
+static inline st_index_t
+str_do_hash(VALUE str)
+{
+ st_index_t h = rb_memhash((const void *)RSTRING_PTR(str), RSTRING_LEN(str));
+ int e = RSTRING_LEN(str) ? ENCODING_GET(str) : 0;
+ if (e && !is_ascii_string(str)) {
+ h = rb_hash_end(rb_hash_uint32(h, (uint32_t)e));
+ }
+ return h;
+}
+
+static VALUE
+str_precompute_hash(VALUE str)
+{
+ RUBY_ASSERT(!FL_TEST_RAW(str, STR_PRECOMPUTED_HASH));
+ RUBY_ASSERT(STR_EMBED_P(str));
+
+#if RUBY_DEBUG
+ size_t used_bytes = (RSTRING_LEN(str) + TERM_LEN(str));
+ size_t free_bytes = str_embed_capa(str) - used_bytes;
+ RUBY_ASSERT(free_bytes >= sizeof(st_index_t));
+#endif
+
+ typedef struct {char bytes[sizeof(st_index_t)];} unaligned_index;
+ union {st_index_t i; unaligned_index b;} u = {.i = str_do_hash(str)};
+ *(unaligned_index *)(RSTRING_END(str) + TERM_LEN(str)) = u.b;
+
+ FL_SET(str, STR_PRECOMPUTED_HASH);
+
+ return str;
+}
+
struct fstr_update_arg {
VALUE fstr;
bool copy;
+ bool precompute_hash;
};
static int
@@ -370,8 +413,23 @@ fstr_update_callback(st_data_t *key, st_data_t *value, st_data_t data, int exist
else {
if (FL_TEST_RAW(str, STR_FAKESTR)) {
if (arg->copy) {
- VALUE new_str = str_new(rb_cString, RSTRING(str)->as.heap.ptr, RSTRING(str)->len);
- rb_enc_copy(new_str, str);
+ VALUE new_str;
+ long len = RSTRING_LEN(str);
+ long capa = len + sizeof(st_index_t);
+ int term_len = TERM_LEN(str);
+
+ if (arg->precompute_hash && STR_EMBEDDABLE_P(capa, term_len)) {
+ new_str = str_alloc_embed(rb_cString, capa + term_len);
+ memcpy(RSTRING_PTR(new_str), RSTRING_PTR(str), len);
+ STR_SET_LEN(new_str, RSTRING_LEN(str));
+ TERM_FILL(RSTRING_END(new_str), TERM_LEN(str));
+ rb_enc_copy(new_str, str);
+ str_precompute_hash(new_str);
+ }
+ else {
+ new_str = str_new(rb_cString, RSTRING(str)->as.heap.ptr, RSTRING(str)->len);
+ rb_enc_copy(new_str, str);
+ }
str = new_str;
}
else {
@@ -428,7 +486,7 @@ rb_fstring(VALUE str)
if (!FL_TEST_RAW(str, FL_FREEZE | STR_NOFREE | STR_CHILLED))
rb_str_resize(str, RSTRING_LEN(str));
- fstr = register_fstring(str, FALSE);
+ fstr = register_fstring(str, false, false);
if (!bare) {
str_replace_shared_without_enc(str, fstr);
@@ -439,10 +497,12 @@ rb_fstring(VALUE str)
}
static VALUE
-register_fstring(VALUE str, bool copy)
+register_fstring(VALUE str, bool copy, bool precompute_hash)
{
- struct fstr_update_arg args;
- args.copy = copy;
+ struct fstr_update_arg args = {
+ .copy = copy,
+ .precompute_hash = precompute_hash
+ };
RB_VM_LOCK_ENTER();
{
@@ -500,14 +560,14 @@ VALUE
rb_fstring_new(const char *ptr, long len)
{
struct RString fake_str;
- return register_fstring(setup_fake_str(&fake_str, ptr, len, ENCINDEX_US_ASCII), FALSE);
+ return register_fstring(setup_fake_str(&fake_str, ptr, len, ENCINDEX_US_ASCII), false, false);
}
VALUE
rb_fstring_enc_new(const char *ptr, long len, rb_encoding *enc)
{
struct RString fake_str;
- return register_fstring(rb_setup_fake_str(&fake_str, ptr, len, enc), FALSE);
+ return register_fstring(rb_setup_fake_str(&fake_str, ptr, len, enc), false, false);
}
VALUE
@@ -3658,12 +3718,15 @@ rb_str_prepend_multi(int argc, VALUE *argv, VALUE str)
st_index_t
rb_str_hash(VALUE str)
{
- st_index_t h = rb_memhash((const void *)RSTRING_PTR(str), RSTRING_LEN(str));
- int e = RSTRING_LEN(str) ? ENCODING_GET(str) : 0;
- if (e && !is_ascii_string(str)) {
- h = rb_hash_end(rb_hash_uint32(h, (uint32_t)e));
+ if (FL_TEST_RAW(str, STR_PRECOMPUTED_HASH)) {
+ typedef struct {char bytes[sizeof(st_index_t)];} unaligned_index;
+ st_index_t precomputed_hash = ((union {st_index_t i; unaligned_index b;} *)(RSTRING_END(str) + TERM_LEN(str)))->i;
+
+ RUBY_ASSERT(precomputed_hash == str_do_hash(str));
+ return precomputed_hash;
}
- return h;
+
+ return str_do_hash(str);
}
int
@@ -12133,7 +12196,7 @@ VALUE
rb_interned_str(const char *ptr, long len)
{
struct RString fake_str;
- return register_fstring(setup_fake_str(&fake_str, ptr, len, ENCINDEX_US_ASCII), TRUE);
+ return register_fstring(setup_fake_str(&fake_str, ptr, len, ENCINDEX_US_ASCII), true, false);
}
VALUE
@@ -12150,7 +12213,18 @@ rb_enc_interned_str(const char *ptr, long len, rb_encoding *enc)
}
struct RString fake_str;
- return register_fstring(rb_setup_fake_str(&fake_str, ptr, len, enc), TRUE);
+ return register_fstring(rb_setup_fake_str(&fake_str, ptr, len, enc), true, false);
+}
+
+VALUE
+rb_enc_literal_str(const char *ptr, long len, rb_encoding *enc)
+{
+ if (enc != NULL && UNLIKELY(rb_enc_autoload_p(enc))) {
+ rb_enc_autoload(enc);
+ }
+
+ struct RString fake_str;
+ return register_fstring(rb_setup_fake_str(&fake_str, ptr, len, enc), true, true);
}
VALUE