diff options
author | Nobuyoshi Nakada <[email protected]> | 2024-02-27 12:20:35 +0900 |
---|---|---|
committer | Nobuyoshi Nakada <[email protected]> | 2024-02-27 13:58:03 +0900 |
commit | 3a04ea2d0379dd8c6623c2d5563e6b4e23986fae (patch) | |
tree | 380483783eb22f8f451ca09e46fe5d42a8b6af90 /regexec.c | |
parent | 7b56353d97d38719c8dfa0255c9ca46dacd7ffad (diff) |
[Bug #20305] Fix matching against an incomplete character
When matching against an incomplete character, some `enclen` calls are
expected not to exceed the limit, and some are expected to return the
required length and then the results are checked if it exceeds.
Diffstat (limited to 'regexec.c')
-rw-r--r-- | regexec.c | 37 |
1 files changed, 25 insertions, 12 deletions
@@ -1943,6 +1943,19 @@ static int string_cmp_ic(OnigEncoding enc, int case_fold_flag, # define ABSENT_END_POS end #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ +int onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, const struct OnigEncodingTypeST* enc); + +static inline int +enclen_approx(OnigEncoding enc, const OnigUChar* p, const OnigUChar* e) +{ + if (enc->max_enc_len == enc->min_enc_len) { + return (p < e ? enc->min_enc_len : 0); + } + else { + return onigenc_mbclen_approximate(p, e, enc); + } +} + #ifdef USE_CAPTURE_HISTORY static int @@ -2923,7 +2936,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, int mb_len; DATA_ENSURE(1); - mb_len = enclen(encode, s, end); + mb_len = enclen_approx(encode, s, end); DATA_ENSURE(mb_len); ss = s; s += mb_len; @@ -3028,7 +3041,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_ANYCHAR) MOP_IN(OP_ANYCHAR); DATA_ENSURE(1); - n = enclen(encode, s, end); + n = enclen_approx(encode, s, end); DATA_ENSURE(n); if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail; s += n; @@ -3037,7 +3050,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_ANYCHAR_ML) MOP_IN(OP_ANYCHAR_ML); DATA_ENSURE(1); - n = enclen(encode, s, end); + n = enclen_approx(encode, s, end); DATA_ENSURE(n); s += n; MOP_OUT; @@ -3047,7 +3060,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, while (DATA_ENSURE_CHECK1) { CHECK_MATCH_CACHE; STACK_PUSH_ALT(p, s, sprev, pkeep); - n = enclen(encode, s, end); + n = enclen_approx(encode, s, end); DATA_ENSURE(n); if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail; sprev = s; @@ -3060,7 +3073,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, while (DATA_ENSURE_CHECK1) { CHECK_MATCH_CACHE; STACK_PUSH_ALT(p, s, sprev, pkeep); - n = enclen(encode, s, end); + n = enclen_approx(encode, s, end); if (n > 1) { DATA_ENSURE(n); sprev = s; @@ -3086,7 +3099,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, msa->num_fails++; #endif } - n = enclen(encode, s, end); + n = enclen_approx(encode, s, end); DATA_ENSURE(n); if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail; sprev = s; @@ -3108,7 +3121,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, msa->num_fails++; #endif } - n = enclen(encode, s, end); + n = enclen_approx(encode, s, end); if (n > 1) { DATA_ENSURE(n); sprev = s; @@ -3131,7 +3144,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (scv) goto fail; STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep); - n = enclen(encode, s, end); + n = enclen_approx(encode, s, end); DATA_ENSURE(n); if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail; sprev = s; @@ -3149,7 +3162,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (scv) goto fail; STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep); - n = enclen(encode, s, end); + n = enclen_approx(encode, s, end); if (n > 1) { DATA_ENSURE(n); sprev = s; @@ -3491,7 +3504,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, DATA_ENSURE(n); sprev = s; STRING_CMP(pstart, s, n); - while (sprev + (len = enclen(encode, sprev, end)) < s) + while (sprev + (len = enclen_approx(encode, sprev, end)) < s) sprev += len; MOP_OUT; @@ -3522,7 +3535,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, DATA_ENSURE(n); sprev = s; STRING_CMP_IC(case_fold_flag, pstart, &s, n, end); - while (sprev + (len = enclen(encode, sprev, end)) < s) + while (sprev + (len = enclen_approx(encode, sprev, end)) < s) sprev += len; MOP_OUT; @@ -3557,7 +3570,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STRING_CMP_VALUE(pstart, swork, n, is_fail); if (is_fail) continue; s = swork; - while (sprev + (len = enclen(encode, sprev, end)) < s) + while (sprev + (len = enclen_approx(encode, sprev, end)) < s) sprev += len; p += (SIZE_MEMNUM * (tlen - i - 1)); |