summaryrefslogtreecommitdiff
path: root/re.c
diff options
context:
space:
mode:
authorPeter Zhu <[email protected]>2024-07-29 20:45:26 -0400
committerGitHub <[email protected]>2024-07-29 17:45:26 -0700
commitce565cd4b851977bf37a470bee54e441bb60486d (patch)
tree1a7fea867b14c3a8a88dc07c3fc3c29fa3ebcb4a /re.c
parent6d744837abc3f7f71a1f10c7ca399201f6f05e43 (diff)
[Bug #20653] Fix memory leak in String#start_with? when regexp times out (#11255)
Fix memory leak in String#start_with? when regexp times out [Bug #20653] This commit refactors how Onigmo handles timeout. Instead of raising a timeout error, onig_search will return a ONIGERR_TIMEOUT which the caller can free memory, and then raise a timeout error. This fixes a memory leak in String#start_with when the regexp times out. For example: regex = Regexp.new("^#{"(a*)" * 10_000}x$", timeout: 0.000001) str = "a" * 1000000 + "x" 10.times do 100.times do str.start_with?(regex) rescue end puts `ps -o rss= -p #{$$}` end Before: 33216 51936 71152 81728 97152 103248 120384 133392 133520 133616 After: 14912 15376 15824 15824 16128 16128 16144 16144 16160 16160
Diffstat (limited to 're.c')
-rw-r--r--re.c63
1 files changed, 15 insertions, 48 deletions
diff --git a/re.c b/re.c
index a19dcb920d..a1d0ca3400 100644
--- a/re.c
+++ b/re.c
@@ -1671,10 +1671,16 @@ rb_reg_onig_match(VALUE re, VALUE str,
if (result < 0) {
onig_region_free(regs, 0);
- if (result != ONIG_MISMATCH) {
+ switch (result) {
+ case ONIG_MISMATCH:
+ break;
+ case ONIGERR_TIMEOUT:
+ rb_raise(rb_eRegexpTimeoutError, "regexp match timeout");
+ default: {
onig_errmsg_buffer err = "";
onig_error_code_to_str((UChar*)err, (int)result);
rb_reg_raise(err, re);
+ }
}
}
@@ -1735,23 +1741,6 @@ reg_onig_search(regex_t *reg, VALUE str, struct re_registers *regs, void *args_p
ONIG_OPTION_NONE);
}
-struct rb_reg_onig_match_args {
- VALUE re;
- VALUE str;
- struct reg_onig_search_args args;
- struct re_registers regs;
-
- OnigPosition result;
-};
-
-static VALUE
-rb_reg_onig_match_try(VALUE value_args)
-{
- struct rb_reg_onig_match_args *args = (struct rb_reg_onig_match_args *)value_args;
- args->result = rb_reg_onig_match(args->re, args->str, reg_onig_search, &args->args, &args->regs);
- return Qnil;
-}
-
/* returns byte offset */
static long
rb_reg_search_set_match(VALUE re, VALUE str, long pos, int reverse, int set_backref_str, VALUE *set_match)
@@ -1762,38 +1751,22 @@ rb_reg_search_set_match(VALUE re, VALUE str, long pos, int reverse, int set_back
return -1;
}
- struct rb_reg_onig_match_args args = {
- .re = re,
- .str = str,
- .args = {
- .pos = pos,
- .range = reverse ? 0 : len,
- },
- .regs = {0}
+ struct reg_onig_search_args args = {
+ .pos = pos,
+ .range = reverse ? 0 : len,
};
+ struct re_registers regs = {0};
- /* If there is a timeout set, then rb_reg_onig_match could raise a
- * Regexp::TimeoutError so we want to protect it from leaking memory. */
- if (rb_reg_match_time_limit) {
- int state;
- rb_protect(rb_reg_onig_match_try, (VALUE)&args, &state);
- if (state) {
- onig_region_free(&args.regs, false);
- rb_jump_tag(state);
- }
- }
- else {
- rb_reg_onig_match_try((VALUE)&args);
- }
+ OnigPosition result = rb_reg_onig_match(re, str, reg_onig_search, &args, &regs);
- if (args.result == ONIG_MISMATCH) {
+ if (result == ONIG_MISMATCH) {
rb_backref_set(Qnil);
return ONIG_MISMATCH;
}
VALUE match = match_alloc(rb_cMatch);
rb_matchext_t *rm = RMATCH_EXT(match);
- rm->regs = args.regs;
+ rm->regs = regs;
if (set_backref_str) {
RB_OBJ_WRITE(match, &RMATCH(match)->str, rb_str_new4(str));
@@ -1810,7 +1783,7 @@ rb_reg_search_set_match(VALUE re, VALUE str, long pos, int reverse, int set_back
rb_backref_set(match);
if (set_match) *set_match = match;
- return args.result;
+ return result;
}
long
@@ -4672,12 +4645,6 @@ rb_reg_timeout_p(regex_t *reg, void *end_time_)
return false;
}
-void
-rb_reg_raise_timeout(void)
-{
- rb_raise(rb_eRegexpTimeoutError, "regexp match timeout");
-}
-
/*
* call-seq:
* Regexp.timeout -> float or nil