summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Zhu <[email protected]>2023-06-06 10:19:20 -0400
committerGitHub <[email protected]>2023-06-06 10:19:20 -0400
commit7577c101ed6452de3e72fadb43db595946acc701 (patch)
treedeed85a09fc431132145897d7e1982fd61c74e0c
parentfae2f80d06f5058b40e91f62ba27fb01f2463d12 (diff)
Unify length field for embedded and heap strings (#7908)
* Unify length field for embedded and heap strings The length field is of the same type and position in RString for both embedded and heap allocated strings, so we can unify it. * Remove RSTRING_EMBED_LEN
Notes
Notes: Merged-By: maximecb <[email protected]>
-rw-r--r--ext/-test-/string/cstr.c5
-rw-r--r--include/ruby/internal/core/rstring.h56
-rw-r--r--lib/ruby_vm/rjit/insn_compiler.rb5
-rw-r--r--rjit_c.rb3
-rw-r--r--string.c104
-rw-r--r--test/-ext-/string/test_capacity.rb2
-rw-r--r--yjit.c3
-rw-r--r--yjit/src/codegen.rs7
-rw-r--r--yjit/src/cruby_bindings.inc.rs3
9 files changed, 69 insertions, 119 deletions
diff --git a/ext/-test-/string/cstr.c b/ext/-test-/string/cstr.c
index ecca793145..b0b1ef5374 100644
--- a/ext/-test-/string/cstr.c
+++ b/ext/-test-/string/cstr.c
@@ -61,13 +61,12 @@ bug_str_unterminated_substring(VALUE str, VALUE vbeg, VALUE vlen)
if (RSTRING_LEN(str) < beg) rb_raise(rb_eIndexError, "beg: %ld", beg);
if (RSTRING_LEN(str) < beg + len) rb_raise(rb_eIndexError, "end: %ld", beg + len);
str = rb_str_new_shared(str);
+ RSTRING(str)->len = len;
if (STR_EMBED_P(str)) {
- RSTRING(str)->as.embed.len = (short)len;
memmove(RSTRING(str)->as.embed.ary, RSTRING(str)->as.embed.ary + beg, len);
}
else {
RSTRING(str)->as.heap.ptr += beg;
- RSTRING(str)->as.heap.len = len;
}
return str;
}
@@ -114,7 +113,7 @@ bug_str_s_cstr_noembed(VALUE self, VALUE str)
RBASIC(str2)->flags &= ~(STR_SHARED | FL_USER5 | FL_USER6);
RSTRING(str2)->as.heap.aux.capa = capacity;
RSTRING(str2)->as.heap.ptr = buf;
- RSTRING(str2)->as.heap.len = RSTRING_LEN(str);
+ RSTRING(str2)->len = RSTRING_LEN(str);
TERM_FILL(RSTRING_END(str2), TERM_LEN(str));
return str2;
}
diff --git a/include/ruby/internal/core/rstring.h b/include/ruby/internal/core/rstring.h
index 777fe6dab1..a03ad5b2ec 100644
--- a/include/ruby/internal/core/rstring.h
+++ b/include/ruby/internal/core/rstring.h
@@ -43,7 +43,6 @@
/** @cond INTERNAL_MACRO */
#define RSTRING_NOEMBED RSTRING_NOEMBED
#define RSTRING_FSTR RSTRING_FSTR
-#define RSTRING_EMBED_LEN RSTRING_EMBED_LEN
#define RSTRING_LEN RSTRING_LEN
#define RSTRING_LENINT RSTRING_LENINT
#define RSTRING_PTR RSTRING_PTR
@@ -199,6 +198,13 @@ struct RString {
/** Basic part, including flags and class. */
struct RBasic basic;
+ /**
+ * Length of the string, not including terminating NUL character.
+ *
+ * @note This is in bytes.
+ */
+ long len;
+
/** String's specific fields. */
union {
@@ -207,14 +213,6 @@ struct RString {
* pattern.
*/
struct {
-
- /**
- * Length of the string, not including terminating NUL character.
- *
- * @note This is in bytes.
- */
- long len;
-
/**
* Pointer to the contents of the string. In the old days each
* string had dedicated memory regions. That is no longer true
@@ -245,7 +243,6 @@ struct RString {
/** Embedded contents. */
struct {
- long len;
/* This is a length 1 array because:
* 1. GCC has a bug that does not optimize C flexible array members
* (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102452)
@@ -364,24 +361,12 @@ RBIMPL_ATTR_ARTIFICIAL()
*
* @param[in] str String in question.
* @return Its length, in bytes.
- * @pre `str` must be an instance of ::RString, and must has its
- * ::RSTRING_NOEMBED flag off.
- *
- * @internal
- *
- * This was a macro before. It was inevitable to be public, since macros are
- * global constructs. But should it be forever? Now that it is a function,
- * @shyouhei thinks it could just be eliminated, hidden into implementation
- * details.
+ * @pre `str` must be an instance of ::RString.
*/
static inline long
-RSTRING_EMBED_LEN(VALUE str)
+RSTRING_LEN(VALUE str)
{
- RBIMPL_ASSERT_TYPE(str, RUBY_T_STRING);
- RBIMPL_ASSERT_OR_ASSUME(! RB_FL_ANY_RAW(str, RSTRING_NOEMBED));
-
- long f = RSTRING(str)->as.embed.len;
- return f;
+ return RSTRING(str)->len;
}
RBIMPL_WARNING_PUSH()
@@ -411,7 +396,7 @@ rbimpl_rstring_getmem(VALUE str)
else {
/* Expecting compilers to optimize this on-stack struct away. */
struct RString retval;
- retval.as.heap.len = RSTRING_EMBED_LEN(str);
+ retval.len = RSTRING_LEN(str);
retval.as.heap.ptr = RSTRING(str)->as.embed.ary;
return retval;
}
@@ -419,21 +404,6 @@ rbimpl_rstring_getmem(VALUE str)
RBIMPL_WARNING_POP()
-RBIMPL_ATTR_PURE_UNLESS_DEBUG()
-RBIMPL_ATTR_ARTIFICIAL()
-/**
- * Queries the length of the string.
- *
- * @param[in] str String in question.
- * @return Its length, in bytes.
- * @pre `str` must be an instance of ::RString.
- */
-static inline long
-RSTRING_LEN(VALUE str)
-{
- return rbimpl_rstring_getmem(str).as.heap.len;
-}
-
RBIMPL_ATTR_ARTIFICIAL()
/**
* Queries the contents pointer of the string.
@@ -482,7 +452,7 @@ RSTRING_END(VALUE str)
rb_debug_rstring_null_ptr("RSTRING_END");
}
- return &buf.as.heap.ptr[buf.as.heap.len];
+ return &buf.as.heap.ptr[buf.len];
}
RBIMPL_ATTR_ARTIFICIAL()
@@ -516,7 +486,7 @@ RSTRING_LENINT(VALUE str)
__extension__ ({ \
struct RString rbimpl_str = rbimpl_rstring_getmem(str); \
(ptrvar) = rbimpl_str.as.heap.ptr; \
- (lenvar) = rbimpl_str.as.heap.len; \
+ (lenvar) = rbimpl_str.len; \
})
#else
# define RSTRING_GETMEM(str, ptrvar, lenvar) \
diff --git a/lib/ruby_vm/rjit/insn_compiler.rb b/lib/ruby_vm/rjit/insn_compiler.rb
index 619f5078dc..d9a2ab52ae 100644
--- a/lib/ruby_vm/rjit/insn_compiler.rb
+++ b/lib/ruby_vm/rjit/insn_compiler.rb
@@ -2994,15 +2994,12 @@ module RubyVM::RJIT
# @param ctx [RubyVM::RJIT::Context]
# @param asm [RubyVM::RJIT::Assembler]
def jit_rb_str_empty_p(jit, ctx, asm, argc, known_recv_class)
- # Assume same offset to len embedded or not so we can use one code path to read the length
- #assert_equal(C.RString.offsetof(:as, :heap, :len), C.RString.offsetof(:as, :embed, :len))
-
recv_opnd = ctx.stack_pop(1)
out_opnd = ctx.stack_push(Type::UnknownImm)
asm.comment('get string length')
asm.mov(:rax, recv_opnd)
- str_len_opnd = [:rax, C.RString.offsetof(:as, :heap, :len)]
+ str_len_opnd = [:rax, C.RString.offsetof(:len)]
asm.cmp(str_len_opnd, 0)
asm.mov(:rax, Qfalse)
diff --git a/rjit_c.rb b/rjit_c.rb
index 9373507275..000fc0a5cc 100644
--- a/rjit_c.rb
+++ b/rjit_c.rb
@@ -857,11 +857,11 @@ module RubyVM::RJIT # :nodoc: all
@RString ||= CType::Struct.new(
"RString", Primitive.cexpr!("SIZEOF(struct RString)"),
basic: [self.RBasic, Primitive.cexpr!("OFFSETOF((*((struct RString *)NULL)), basic)")],
+ len: [CType::Immediate.parse("long"), Primitive.cexpr!("OFFSETOF((*((struct RString *)NULL)), len)")],
as: [CType::Union.new(
"", Primitive.cexpr!("SIZEOF(((struct RString *)NULL)->as)"),
heap: CType::Struct.new(
"", Primitive.cexpr!("SIZEOF(((struct RString *)NULL)->as.heap)"),
- len: [CType::Immediate.parse("long"), Primitive.cexpr!("OFFSETOF(((struct RString *)NULL)->as.heap, len)")],
ptr: [CType::Pointer.new { CType::Immediate.parse("char") }, Primitive.cexpr!("OFFSETOF(((struct RString *)NULL)->as.heap, ptr)")],
aux: [CType::Union.new(
"", Primitive.cexpr!("SIZEOF(((struct RString *)NULL)->as.heap.aux)"),
@@ -871,7 +871,6 @@ module RubyVM::RJIT # :nodoc: all
),
embed: CType::Struct.new(
"", Primitive.cexpr!("SIZEOF(((struct RString *)NULL)->as.embed)"),
- len: [CType::Immediate.parse("long"), Primitive.cexpr!("OFFSETOF(((struct RString *)NULL)->as.embed, len)")],
ary: [CType::Pointer.new { CType::Immediate.parse("char") }, Primitive.cexpr!("OFFSETOF(((struct RString *)NULL)->as.embed, ary)")],
),
), Primitive.cexpr!("OFFSETOF((*((struct RString *)NULL)), as)")],
diff --git a/string.c b/string.c
index 470bc59b76..a7d05693f1 100644
--- a/string.c
+++ b/string.c
@@ -110,18 +110,9 @@ VALUE rb_cSymbol;
FL_UNSET((str), STR_SHARED | STR_SHARED_ROOT | STR_BORROWED);\
} while (0)
#define STR_SET_EMBED(str) FL_UNSET((str), (STR_NOEMBED|STR_NOFREE))
-# define STR_SET_EMBED_LEN(str, n) do { \
- assert(str_embed_capa(str) > (n));\
- RSTRING(str)->as.embed.len = (n);\
-} while (0)
#define STR_SET_LEN(str, n) do { \
- if (STR_EMBED_P(str)) {\
- STR_SET_EMBED_LEN((str), (n));\
- }\
- else {\
- RSTRING(str)->as.heap.len = (n);\
- }\
+ RSTRING(str)->len = (n); \
} while (0)
static inline bool
@@ -158,7 +149,7 @@ str_enc_fastpath(VALUE str)
const long tlen = RSTRING_LEN(str);\
memcpy(tmp, RSTRING_PTR(str), tlen);\
RSTRING(str)->as.heap.ptr = tmp;\
- RSTRING(str)->as.heap.len = tlen;\
+ RSTRING(str)->len = tlen;\
STR_SET_NOEMBED(str);\
RSTRING(str)->as.heap.aux.capa = (capacity);\
}\
@@ -222,7 +213,7 @@ rb_str_size_as_embedded(VALUE str)
{
size_t real_size;
if (STR_EMBED_P(str)) {
- real_size = rb_str_embed_size(RSTRING(str)->as.embed.len) + TERM_LEN(str);
+ real_size = rb_str_embed_size(RSTRING(str)->len) + TERM_LEN(str);
}
/* if the string is not currently embedded, but it can be embedded, how
* much space would it require */
@@ -275,10 +266,10 @@ rb_str_make_embedded(VALUE str)
RUBY_ASSERT(!STR_EMBED_P(str));
char *buf = RSTRING(str)->as.heap.ptr;
- long len = RSTRING(str)->as.heap.len;
+ long len = RSTRING(str)->len;
STR_SET_EMBED(str);
- STR_SET_EMBED_LEN(str, len);
+ STR_SET_LEN(str, len);
if (len > 0) {
memcpy(RSTRING_PTR(str), buf, len);
@@ -382,13 +373,13 @@ fstr_update_callback(st_data_t *key, st_data_t *value, st_data_t data, int exist
else {
if (FL_TEST_RAW(str, STR_FAKESTR)) {
if (arg->copy) {
- VALUE new_str = str_new(rb_cString, RSTRING(str)->as.heap.ptr, RSTRING(str)->as.heap.len);
+ VALUE new_str = str_new(rb_cString, RSTRING(str)->as.heap.ptr, RSTRING(str)->len);
rb_enc_copy(new_str, str);
str = new_str;
}
else {
str = str_new_static(rb_cString, RSTRING(str)->as.heap.ptr,
- RSTRING(str)->as.heap.len,
+ RSTRING(str)->len,
ENCODING_GET(str));
}
OBJ_FREEZE_RAW(str);
@@ -486,7 +477,7 @@ setup_fake_str(struct RString *fake_str, const char *name, long len, int encidx)
ENCODING_SET_INLINED((VALUE)fake_str, encidx);
RBASIC_SET_CLASS_RAW((VALUE)fake_str, rb_cString);
- fake_str->as.heap.len = len;
+ fake_str->len = len;
fake_str->as.heap.ptr = (char *)name;
fake_str->as.heap.aux.capa = len;
return (VALUE)fake_str;
@@ -832,7 +823,7 @@ str_capacity(VALUE str, const int termlen)
return str_embed_capa(str) - termlen;
}
else if (FL_TEST(str, STR_SHARED|STR_NOFREE)) {
- return RSTRING(str)->as.heap.len;
+ return RSTRING(str)->len;
}
else {
return RSTRING(str)->as.heap.aux.capa;
@@ -1012,7 +1003,7 @@ str_new_static(VALUE klass, const char *ptr, long len, int encindex)
else {
RUBY_DTRACE_CREATE_HOOK(STRING, len);
str = str_alloc_heap(klass);
- RSTRING(str)->as.heap.len = len;
+ RSTRING(str)->len = len;
RSTRING(str)->as.heap.ptr = (char *)ptr;
RSTRING(str)->as.heap.aux.capa = len;
RBASIC(str)->flags |= STR_NOFREE;
@@ -1296,7 +1287,6 @@ str_replace_shared_without_enc(VALUE str2, VALUE str)
char *ptr2 = RSTRING(str2)->as.embed.ary;
STR_SET_EMBED(str2);
memcpy(ptr2, RSTRING_PTR(str), len);
- STR_SET_EMBED_LEN(str2, len);
TERM_FILL(ptr2+len, termlen);
}
else {
@@ -1320,10 +1310,12 @@ str_replace_shared_without_enc(VALUE str2, VALUE str)
}
}
FL_SET(str2, STR_NOEMBED);
- RSTRING(str2)->as.heap.len = len;
RSTRING(str2)->as.heap.ptr = ptr;
STR_SET_SHARED(str2, root);
}
+
+ STR_SET_LEN(str2, len);
+
return str2;
}
@@ -1383,7 +1375,7 @@ rb_str_tmp_frozen_release(VALUE orig, VALUE tmp)
if (shared == tmp && !FL_TEST_RAW(tmp, STR_BORROWED)) {
assert(RSTRING(orig)->as.heap.ptr == RSTRING(tmp)->as.heap.ptr);
- assert(RSTRING(orig)->as.heap.len == RSTRING(tmp)->as.heap.len);
+ assert(RSTRING_LEN(orig) == RSTRING_LEN(tmp));
/* Unshare orig since the root (tmp) only has this one child. */
FL_UNSET_RAW(orig, STR_SHARED);
@@ -1393,7 +1385,7 @@ rb_str_tmp_frozen_release(VALUE orig, VALUE tmp)
/* Make tmp embedded and empty so it is safe for sweeping. */
STR_SET_EMBED(tmp);
- STR_SET_EMBED_LEN(tmp, 0);
+ STR_SET_LEN(tmp, 0);
}
}
}
@@ -1411,7 +1403,7 @@ heap_str_make_shared(VALUE klass, VALUE orig)
assert(!STR_SHARED_P(orig));
VALUE str = str_alloc_heap(klass);
- RSTRING(str)->as.heap.len = RSTRING_LEN(orig);
+ STR_SET_LEN(str, RSTRING_LEN(orig));
RSTRING(str)->as.heap.ptr = RSTRING_PTR(orig);
RSTRING(str)->as.heap.aux.capa = RSTRING(orig)->as.heap.aux.capa;
RBASIC(str)->flags |= RBASIC(orig)->flags & STR_NOFREE;
@@ -1438,7 +1430,7 @@ str_new_frozen_buffer(VALUE klass, VALUE orig, int copy_encoding)
if (FL_TEST_RAW(orig, STR_SHARED)) {
VALUE shared = RSTRING(orig)->as.heap.aux.shared;
long ofs = RSTRING(orig)->as.heap.ptr - RSTRING_PTR(shared);
- long rest = RSTRING_LEN(shared) - ofs - RSTRING(orig)->as.heap.len;
+ long rest = RSTRING_LEN(shared) - ofs - RSTRING_LEN(orig);
assert(ofs >= 0);
assert(rest >= 0);
assert(ofs + rest <= RSTRING_LEN(shared));
@@ -1450,7 +1442,7 @@ str_new_frozen_buffer(VALUE klass, VALUE orig, int copy_encoding)
str = str_new_shared(klass, shared);
assert(!STR_EMBED_P(str));
RSTRING(str)->as.heap.ptr += ofs;
- RSTRING(str)->as.heap.len -= ofs + rest;
+ STR_SET_LEN(str, RSTRING_LEN(str) - (ofs + rest));
}
else {
if (RBASIC_CLASS(shared) == 0)
@@ -1462,7 +1454,7 @@ str_new_frozen_buffer(VALUE klass, VALUE orig, int copy_encoding)
str = str_alloc_embed(klass, RSTRING_LEN(orig) + TERM_LEN(orig));
STR_SET_EMBED(str);
memcpy(RSTRING_PTR(str), RSTRING_PTR(orig), RSTRING_LEN(orig));
- STR_SET_EMBED_LEN(str, RSTRING_LEN(orig));
+ STR_SET_LEN(str, RSTRING_LEN(orig));
TERM_FILL(RSTRING_END(str), TERM_LEN(orig));
}
else {
@@ -1591,23 +1583,24 @@ str_shared_replace(VALUE str, VALUE str2)
str_discard(str);
termlen = rb_enc_mbminlen(enc);
+ STR_SET_LEN(str, RSTRING_LEN(str2));
+
if (str_embed_capa(str) >= RSTRING_LEN(str2) + termlen) {
STR_SET_EMBED(str);
memcpy(RSTRING_PTR(str), RSTRING_PTR(str2), (size_t)RSTRING_LEN(str2) + termlen);
- STR_SET_EMBED_LEN(str, RSTRING_LEN(str2));
rb_enc_associate(str, enc);
ENC_CODERANGE_SET(str, cr);
}
else {
if (STR_EMBED_P(str2)) {
assert(!FL_TEST(str2, STR_SHARED));
- long len = RSTRING(str2)->as.embed.len;
+ long len = RSTRING_LEN(str2);
assert(len + termlen <= str_embed_capa(str2));
char *new_ptr = ALLOC_N(char, len + termlen);
memcpy(new_ptr, RSTRING(str2)->as.embed.ary, len + termlen);
RSTRING(str2)->as.heap.ptr = new_ptr;
- RSTRING(str2)->as.heap.len = len;
+ STR_SET_LEN(str2, len);
RSTRING(str2)->as.heap.aux.capa = len;
STR_SET_NOEMBED(str2);
}
@@ -1615,7 +1608,6 @@ str_shared_replace(VALUE str, VALUE str2)
STR_SET_NOEMBED(str);
FL_UNSET(str, STR_SHARED);
RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
- RSTRING(str)->as.heap.len = RSTRING_LEN(str2);
if (FL_TEST(str2, STR_SHARED)) {
VALUE shared = RSTRING(str2)->as.heap.aux.shared;
@@ -1628,7 +1620,7 @@ str_shared_replace(VALUE str, VALUE str2)
/* abandon str2 */
STR_SET_EMBED(str2);
RSTRING_PTR(str2)[0] = 0;
- STR_SET_EMBED_LEN(str2, 0);
+ STR_SET_LEN(str2, 0);
rb_enc_associate(str, enc);
ENC_CODERANGE_SET(str, cr);
}
@@ -1664,7 +1656,7 @@ str_replace(VALUE str, VALUE str2)
VALUE shared = RSTRING(str2)->as.heap.aux.shared;
assert(OBJ_FROZEN(shared));
STR_SET_NOEMBED(str);
- RSTRING(str)->as.heap.len = len;
+ STR_SET_LEN(str, len);
RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
STR_SET_SHARED(str, shared);
rb_enc_cr_str_exact_copy(str, str2);
@@ -1708,11 +1700,10 @@ str_duplicate_setup(VALUE klass, VALUE str, VALUE dup)
VALUE flags = FL_TEST_RAW(str, flag_mask);
int encidx = 0;
if (STR_EMBED_P(str)) {
- long len = RSTRING_EMBED_LEN(str);
+ long len = RSTRING_LEN(str);
assert(STR_EMBED_P(dup));
assert(str_embed_capa(dup) >= len + 1);
- STR_SET_EMBED_LEN(dup, len);
MEMCPY(RSTRING(dup)->as.embed.ary, RSTRING(str)->as.embed.ary, char, len + 1);
}
else {
@@ -1727,13 +1718,14 @@ str_duplicate_setup(VALUE klass, VALUE str, VALUE dup)
assert(!STR_SHARED_P(root));
assert(RB_OBJ_FROZEN_RAW(root));
- RSTRING(dup)->as.heap.len = RSTRING_LEN(str);
RSTRING(dup)->as.heap.ptr = RSTRING_PTR(str);
FL_SET(root, STR_SHARED_ROOT);
RB_OBJ_WRITE(dup, &RSTRING(dup)->as.heap.aux.shared, root);
flags |= RSTRING_NOEMBED | STR_SHARED;
}
+ STR_SET_LEN(dup, RSTRING_LEN(str));
+
if ((flags & ENCODING_MASK) == (ENCODING_INLINE_MAX<<ENCODING_SHIFT)) {
encidx = rb_enc_get_index(str);
flags &= ~ENCODING_MASK;
@@ -1751,7 +1743,7 @@ ec_str_duplicate(struct rb_execution_context_struct *ec, VALUE klass, VALUE str)
dup = ec_str_alloc_heap(ec, klass);
}
else {
- dup = ec_str_alloc_embed(ec, klass, RSTRING_EMBED_LEN(str) + TERM_LEN(str));
+ dup = ec_str_alloc_embed(ec, klass, RSTRING_LEN(str) + TERM_LEN(str));
}
return str_duplicate_setup(klass, str, dup);
@@ -1765,7 +1757,7 @@ str_duplicate(VALUE klass, VALUE str)
dup = str_alloc_heap(klass);
}
else {
- dup = str_alloc_embed(klass, RSTRING_EMBED_LEN(str) + TERM_LEN(str));
+ dup = str_alloc_embed(klass, RSTRING_LEN(str) + TERM_LEN(str));
}
return str_duplicate_setup(klass, str, dup);
@@ -1841,14 +1833,14 @@ rb_str_init(int argc, VALUE *argv, VALUE str)
str_modifiable(str);
if (STR_EMBED_P(str)) { /* make noembed always */
char *new_ptr = ALLOC_N(char, (size_t)capa + termlen);
- assert(RSTRING(str)->as.embed.len + 1 <= str_embed_capa(str));
- memcpy(new_ptr, RSTRING(str)->as.embed.ary, RSTRING(str)->as.embed.len + 1);
+ assert(RSTRING_LEN(str) + 1 <= str_embed_capa(str));
+ memcpy(new_ptr, RSTRING(str)->as.embed.ary, RSTRING_LEN(str) + 1);
RSTRING(str)->as.heap.ptr = new_ptr;
}
else if (FL_TEST(str, STR_SHARED|STR_NOFREE)) {
const size_t size = (size_t)capa + termlen;
const char *const old_ptr = RSTRING_PTR(str);
- const size_t osize = RSTRING(str)->as.heap.len + TERM_LEN(str);
+ const size_t osize = RSTRING_LEN(str) + TERM_LEN(str);
char *new_ptr = ALLOC_N(char, (size_t)capa + termlen);
memcpy(new_ptr, old_ptr, osize < size ? osize : size);
FL_UNSET_RAW(str, STR_SHARED|STR_NOFREE);
@@ -1858,7 +1850,7 @@ rb_str_init(int argc, VALUE *argv, VALUE str)
SIZED_REALLOC_N(RSTRING(str)->as.heap.ptr, char,
(size_t)capa + termlen, STR_HEAP_SIZE(str));
}
- RSTRING(str)->as.heap.len = len;
+ STR_SET_LEN(str, len);
TERM_FILL(&RSTRING(str)->as.heap.ptr[len], termlen);
if (n == 1) {
memcpy(RSTRING(str)->as.heap.ptr, RSTRING_PTR(orig), len);
@@ -2350,7 +2342,7 @@ str_make_independent_expand(VALUE str, long len, long expand, const int termlen)
STR_SET_EMBED(str);
memcpy(RSTRING(str)->as.embed.ary, ptr, len);
TERM_FILL(RSTRING(str)->as.embed.ary + len, termlen);
- STR_SET_EMBED_LEN(str, len);
+ STR_SET_LEN(str, len);
return;
}
@@ -2366,7 +2358,7 @@ str_make_independent_expand(VALUE str, long len, long expand, const int termlen)
FL_UNSET(str, STR_SHARED|STR_NOFREE);
TERM_FILL(ptr + len, termlen);
RSTRING(str)->as.heap.ptr = ptr;
- RSTRING(str)->as.heap.len = len;
+ STR_SET_LEN(str, len);
RSTRING(str)->as.heap.aux.capa = capa;
}
@@ -2418,7 +2410,7 @@ str_discard(VALUE str)
if (!STR_EMBED_P(str) && !FL_TEST(str, STR_SHARED|STR_NOFREE)) {
ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
RSTRING(str)->as.heap.ptr = 0;
- RSTRING(str)->as.heap.len = 0;
+ STR_SET_LEN(str, 0);
}
}
@@ -2751,8 +2743,8 @@ str_subseq(VALUE str, long beg, long len)
str2 = str_new_shared(rb_cString, str);
ENC_CODERANGE_CLEAR(str2);
RSTRING(str2)->as.heap.ptr += beg;
- if (RSTRING(str2)->as.heap.len > len) {
- RSTRING(str2)->as.heap.len = len;
+ if (RSTRING_LEN(str2) > len) {
+ STR_SET_LEN(str2, len);
}
}
@@ -3001,7 +2993,7 @@ rb_str_resize(VALUE str, long len)
if (STR_EMBED_P(str)) {
if (len == slen) return str;
if (str_embed_capa(str) >= len + termlen) {
- STR_SET_EMBED_LEN(str, len);
+ STR_SET_LEN(str, len);
TERM_FILL(RSTRING(str)->as.embed.ary + len, termlen);
return str;
}
@@ -3013,7 +3005,7 @@ rb_str_resize(VALUE str, long len)
if (slen > len) slen = len;
if (slen > 0) MEMCPY(RSTRING(str)->as.embed.ary, ptr, char, slen);
TERM_FILL(RSTRING(str)->as.embed.ary + len, termlen);
- STR_SET_EMBED_LEN(str, len);
+ STR_SET_LEN(str, len);
if (independent) ruby_xfree(ptr);
return str;
}
@@ -3028,7 +3020,7 @@ rb_str_resize(VALUE str, long len)
RSTRING(str)->as.heap.aux.capa = len;
}
else if (len == slen) return str;
- RSTRING(str)->as.heap.len = len;
+ STR_SET_LEN(str, len);
TERM_FILL(RSTRING(str)->as.heap.ptr + len, termlen); /* sentinel */
}
return str;
@@ -5158,7 +5150,6 @@ rb_str_drop_bytes(VALUE str, long len)
char *oldptr = ptr;
int fl = (int)(RBASIC(str)->flags & (STR_NOEMBED|STR_SHARED|STR_NOFREE));
STR_SET_EMBED(str);
- STR_SET_EMBED_LEN(str, nlen);
ptr = RSTRING(str)->as.embed.ary;
memmove(ptr, oldptr + len, nlen);
if (fl == STR_NOEMBED) xfree(oldptr);
@@ -5170,8 +5161,9 @@ rb_str_drop_bytes(VALUE str, long len)
OBJ_FREEZE(shared);
}
ptr = RSTRING(str)->as.heap.ptr += len;
- RSTRING(str)->as.heap.len = nlen;
}
+ STR_SET_LEN(str, nlen);
+
ptr[nlen] = 0;
ENC_CODERANGE_CLEAR(str);
return str;
@@ -5946,7 +5938,7 @@ rb_str_clear(VALUE str)
{
str_discard(str);
STR_SET_EMBED(str);
- STR_SET_EMBED_LEN(str, 0);
+ STR_SET_LEN(str, 0);
RSTRING_PTR(str)[0] = 0;
if (rb_enc_asciicompat(STR_ENC_GET(str)))
ENC_CODERANGE_SET(str, ENC_CODERANGE_7BIT);
@@ -7926,7 +7918,7 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
}
TERM_FILL((char *)t, termlen);
RSTRING(str)->as.heap.ptr = (char *)buf;
- RSTRING(str)->as.heap.len = t - buf;
+ STR_SET_LEN(str, t - buf);
STR_SET_NOEMBED(str);
RSTRING(str)->as.heap.aux.capa = max;
}
@@ -8002,7 +7994,7 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
}
TERM_FILL((char *)t, termlen);
RSTRING(str)->as.heap.ptr = (char *)buf;
- RSTRING(str)->as.heap.len = t - buf;
+ STR_SET_LEN(str, t - buf);
STR_SET_NOEMBED(str);
RSTRING(str)->as.heap.aux.capa = max;
}
@@ -10722,7 +10714,7 @@ rb_str_b(VALUE str)
str2 = str_alloc_heap(rb_cString);
}
else {
- str2 = str_alloc_embed(rb_cString, RSTRING_EMBED_LEN(str) + TERM_LEN(str));
+ str2 = str_alloc_embed(rb_cString, RSTRING_LEN(str) + TERM_LEN(str));
}
str_replace_shared_without_enc(str2, str);
diff --git a/test/-ext-/string/test_capacity.rb b/test/-ext-/string/test_capacity.rb
index 50f8c10f58..71f91918e7 100644
--- a/test/-ext-/string/test_capacity.rb
+++ b/test/-ext-/string/test_capacity.rb
@@ -66,7 +66,7 @@ class Test_StringCapacity < Test::Unit::TestCase
end
def embed_header_size
- 2 * RbConfig::SIZEOF['void*'] + RbConfig::SIZEOF['long']
+ 3 * RbConfig::SIZEOF['void*']
end
def max_embed_len
diff --git a/yjit.c b/yjit.c
index 2f8abd4567..8e0c43f1bd 100644
--- a/yjit.c
+++ b/yjit.c
@@ -40,8 +40,7 @@
// Field offsets for the RString struct
enum rstring_offsets {
- RUBY_OFFSET_RSTRING_AS_HEAP_LEN = offsetof(struct RString, as.heap.len),
- RUBY_OFFSET_RSTRING_EMBED_LEN = offsetof(struct RString, as.embed.len),
+ RUBY_OFFSET_RSTRING_LEN = offsetof(struct RString, len)
};
// We need size_t to have a known size to simplify code generation and FFI.
diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs
index 6bee890f4e..b64bdc0258 100644
--- a/yjit/src/codegen.rs
+++ b/yjit/src/codegen.rs
@@ -4422,18 +4422,13 @@ fn jit_rb_str_empty_p(
_argc: i32,
_known_recv_class: *const VALUE,
) -> bool {
- const _: () = assert!(
- RUBY_OFFSET_RSTRING_AS_HEAP_LEN == RUBY_OFFSET_RSTRING_EMBED_LEN,
- "same offset to len embedded or not so we can use one code path to read the length",
- );
-
let recv_opnd = asm.stack_pop(1);
asm.comment("get string length");
let str_len_opnd = Opnd::mem(
std::os::raw::c_long::BITS as u8,
asm.load(recv_opnd),
- RUBY_OFFSET_RSTRING_AS_HEAP_LEN as i32,
+ RUBY_OFFSET_RSTRING_LEN as i32,
);
asm.cmp(str_len_opnd, Opnd::UImm(0));
diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs
index 16bb6e1feb..8415b444ba 100644
--- a/yjit/src/cruby_bindings.inc.rs
+++ b/yjit/src/cruby_bindings.inc.rs
@@ -1063,8 +1063,7 @@ pub type ruby_vminsn_type = u32;
pub type rb_iseq_callback = ::std::option::Option<
unsafe extern "C" fn(arg1: *const rb_iseq_t, arg2: *mut ::std::os::raw::c_void),
>;
-pub const RUBY_OFFSET_RSTRING_AS_HEAP_LEN: rstring_offsets = 16;
-pub const RUBY_OFFSET_RSTRING_EMBED_LEN: rstring_offsets = 16;
+pub const RUBY_OFFSET_RSTRING_LEN: rstring_offsets = 16;
pub type rstring_offsets = u32;
pub type rb_seq_param_keyword_struct = rb_iseq_constant_body__bindgen_ty_1_rb_iseq_param_keyword;
extern "C" {