summaryrefslogtreecommitdiff
path: root/string.c
diff options
context:
space:
mode:
authorNobuyoshi Nakada <[email protected]>2020-01-31 16:53:04 +0900
committerNobuyoshi Nakada <[email protected]>2020-01-31 17:12:05 +0900
commit05229cef45fe04ed2bd0effd35eef271f7599879 (patch)
tree83e7730e7a64926b532b33f13f26776fdff79fbf /string.c
parent0dd6f020fcffd26cb89ee9eda59b15483e160f45 (diff)
Improve `String#slice!` performance
Instead of searching twice to extract and to delete, extract and delete the found position at the first search. This makes faster nearly twice, for regexps and strings. | |compare-ruby|built-ruby| |:-------------|-----------:|---------:| |regexp-short | 2.143M| 3.918M| |regexp-long | 105.162k| 205.410k| |string-short | 3.789M| 7.964M| |string-long | 1.301M| 2.457M|
Notes
Notes: Merged: https://github.com/ruby/ruby/pull/2871
Diffstat (limited to 'string.c')
-rw-r--r--string.c84
1 files changed, 69 insertions, 15 deletions
diff --git a/string.c b/string.c
index 9088cec03d..12f5d014f4 100644
--- a/string.c
+++ b/string.c
@@ -215,14 +215,6 @@ str_make_independent(VALUE str)
/* symbols for [up|down|swap]case/capitalize options */
static VALUE sym_ascii, sym_turkic, sym_lithuanian, sym_fold;
-static const struct RString empty_fake_string = {
- {
- T_STRING | STR_FAKESTR |
- ENC_CODERANGE_7BIT | (ENCINDEX_US_ASCII << ENCODING_SHIFT)
- }
-};
-#define empty_string ((VALUE)&empty_fake_string)
-
static rb_encoding *
get_actual_encoding(const int encidx, VALUE str)
{
@@ -4889,16 +4881,78 @@ rb_str_insert(VALUE str, VALUE idx, VALUE str2)
static VALUE
rb_str_slice_bang(int argc, VALUE *argv, VALUE str)
{
- VALUE result;
- VALUE buf[3];
+ VALUE result = Qnil;
+ VALUE indx;
+ long beg, len = 1;
+ char *p;
rb_check_arity(argc, 1, 2);
- MEMCPY(buf, argv, VALUE, argc);
str_modify_keep_cr(str);
- result = rb_str_aref_m(argc, buf, str);
- if (!NIL_P(result)) {
- buf[argc] = empty_string;
- rb_str_aset_m(argc+1, buf, str);
+ indx = argv[0];
+ if (RB_TYPE_P(indx, T_REGEXP)) {
+ if (rb_reg_search(indx, str, 0, 0) < 0) return Qnil;
+ VALUE match = rb_backref_get();
+ struct re_registers *regs = RMATCH_REGS(match);
+ int nth = 0;
+ if (argc > 1 && (nth = rb_reg_backref_number(match, argv[1])) < 0) {
+ if ((nth += regs->num_regs) <= 0) return Qnil;
+ }
+ else if (nth >= regs->num_regs) return Qnil;
+ beg = BEG(nth);
+ len = END(nth) - beg;
+ subseq:
+ result = rb_str_new_with_class(str, RSTRING_PTR(str)+beg, len);
+ rb_enc_cr_str_copy_for_substr(result, str);
+ }
+ else if (argc == 2) {
+ beg = NUM2LONG(indx);
+ len = NUM2LONG(argv[1]);
+ num_index:
+ if (!(p = rb_str_subpos(str, beg, &len))) return Qnil;
+ beg = p - RSTRING_PTR(str);
+ goto subseq;
+ }
+ else if (FIXNUM_P(indx)) {
+ beg = FIX2LONG(indx);
+ if (!(p = rb_str_subpos(str, beg, &len))) return Qnil;
+ if (!len) return Qnil;
+ beg = p - RSTRING_PTR(str);
+ goto subseq;
+ }
+ else if (RB_TYPE_P(indx, T_STRING)) {
+ beg = rb_str_index(str, indx, 0);
+ if (beg == -1) return Qnil;
+ len = RSTRING_LEN(indx);
+ result = rb_str_dup(indx);
+ }
+ else {
+ switch (rb_range_beg_len(indx, &beg, &len, str_strlen(str, NULL), 0)) {
+ case Qnil:
+ return Qnil;
+ case Qfalse:
+ beg = NUM2LONG(indx);
+ goto num_index;
+ default:
+ goto num_index;
+ }
+ }
+
+ if (len > 0) {
+ if (beg == 0) {
+ rb_str_drop_bytes(str, len);
+ }
+ else {
+ char *sptr = RSTRING_PTR(str);
+ long slen = RSTRING_LEN(str);
+ if (beg + len > slen) /* pathological check */
+ len = slen - beg;
+ memmove(sptr + beg,
+ sptr + beg + len,
+ slen - (beg + len));
+ slen -= len;
+ STR_SET_LEN(str, slen);
+ TERM_FILL(&sptr[slen], TERM_LEN(str));
+ }
}
return result;
}