1 /**********************************************************************
2 regexec.c - Onigmo (Oniguruma-mod) (regular expression library)
3 **********************************************************************/
5 * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 # undef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
36 # define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
39 #ifndef USE_TOKEN_THREADED_VM
41 # define USE_TOKEN_THREADED_VM 1
43 # define USE_TOKEN_THREADED_VM 0
48 # define ENC_DUMMY_FLAG (1<<24)
50 rb_enc_asciicompat(OnigEncoding enc
)
52 return ONIGENC_MBC_MINLEN(enc
)==1 && !((enc
)->ruby_encoding_index
& ENC_DUMMY_FLAG
);
54 # undef ONIGENC_IS_MBC_ASCII_WORD
55 # define ONIGENC_IS_MBC_ASCII_WORD(enc,s,end) \
56 (rb_enc_asciicompat(enc) ? (ISALNUM(*s) || *s=='_') : \
57 onigenc_ascii_is_code_ctype( \
58 ONIGENC_MBC_TO_CODE(enc,s,end),ONIGENC_CTYPE_WORD,enc))
61 #ifdef USE_CRNL_AS_LINE_TERMINATOR
62 # define ONIGENC_IS_MBC_CRNL(enc,p,end) \
63 (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
64 ONIGENC_MBC_TO_CODE(enc,(p+enclen(enc,p,end)),end) == 10)
65 # define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \
66 is_mbc_newline_ex((enc),(p),(start),(end),(option),(check_prev))
68 is_mbc_newline_ex(OnigEncoding enc
, const UChar
*p
, const UChar
*start
,
69 const UChar
*end
, OnigOptionType option
, int check_prev
)
71 if (IS_NEWLINE_CRLF(option
)) {
72 if (ONIGENC_MBC_TO_CODE(enc
, p
, end
) == 0x0a) {
74 const UChar
*prev
= onigenc_get_prev_char_head(enc
, start
, p
, end
);
75 if ((prev
!= NULL
) && ONIGENC_MBC_TO_CODE(enc
, prev
, end
) == 0x0d)
84 const UChar
*pnext
= p
+ enclen(enc
, p
, end
);
86 ONIGENC_MBC_TO_CODE(enc
, p
, end
) == 0x0d &&
87 ONIGENC_MBC_TO_CODE(enc
, pnext
, end
) == 0x0a)
89 if (ONIGENC_IS_MBC_NEWLINE(enc
, p
, end
))
95 return ONIGENC_IS_MBC_NEWLINE(enc
, p
, end
);
98 #else /* USE_CRNL_AS_LINE_TERMINATOR */
99 # define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \
100 ONIGENC_IS_MBC_NEWLINE((enc), (p), (end))
101 #endif /* USE_CRNL_AS_LINE_TERMINATOR */
103 #ifdef USE_CAPTURE_HISTORY
104 static void history_tree_free(OnigCaptureTreeNode
* node
);
107 history_tree_clear(OnigCaptureTreeNode
* node
)
111 if (IS_NOT_NULL(node
)) {
112 for (i
= 0; i
< node
->num_childs
; i
++) {
113 if (IS_NOT_NULL(node
->childs
[i
])) {
114 history_tree_free(node
->childs
[i
]);
117 for (i
= 0; i
< node
->allocated
; i
++) {
118 node
->childs
[i
] = (OnigCaptureTreeNode
* )0;
120 node
->num_childs
= 0;
121 node
->beg
= ONIG_REGION_NOTPOS
;
122 node
->end
= ONIG_REGION_NOTPOS
;
125 node
->childs
= (OnigCaptureTreeNode
** )0;
130 history_tree_free(OnigCaptureTreeNode
* node
)
132 history_tree_clear(node
);
137 history_root_free(OnigRegion
* r
)
139 if (IS_NOT_NULL(r
->history_root
)) {
140 history_tree_free(r
->history_root
);
141 r
->history_root
= (OnigCaptureTreeNode
* )0;
145 static OnigCaptureTreeNode
*
146 history_node_new(void)
148 OnigCaptureTreeNode
* node
;
150 node
= (OnigCaptureTreeNode
* )xmalloc(sizeof(OnigCaptureTreeNode
));
151 CHECK_NULL_RETURN(node
);
152 node
->childs
= (OnigCaptureTreeNode
** )0;
154 node
->num_childs
= 0;
156 node
->beg
= ONIG_REGION_NOTPOS
;
157 node
->end
= ONIG_REGION_NOTPOS
;
163 history_tree_add_child(OnigCaptureTreeNode
* parent
, OnigCaptureTreeNode
* child
)
165 # define HISTORY_TREE_INIT_ALLOC_SIZE 8
167 if (parent
->num_childs
>= parent
->allocated
) {
170 if (IS_NULL(parent
->childs
)) {
171 n
= HISTORY_TREE_INIT_ALLOC_SIZE
;
173 (OnigCaptureTreeNode
** )xmalloc(sizeof(OnigCaptureTreeNode
*) * n
);
174 CHECK_NULL_RETURN_MEMERR(parent
->childs
);
177 OnigCaptureTreeNode
** tmp
;
178 n
= parent
->allocated
* 2;
180 (OnigCaptureTreeNode
** )xrealloc(parent
->childs
,
181 sizeof(OnigCaptureTreeNode
*) * n
);
183 history_tree_clear(parent
);
184 return ONIGERR_MEMORY
;
186 parent
->childs
= tmp
;
188 for (i
= parent
->allocated
; i
< n
; i
++) {
189 parent
->childs
[i
] = (OnigCaptureTreeNode
* )0;
191 parent
->allocated
= n
;
194 parent
->childs
[parent
->num_childs
] = child
;
195 parent
->num_childs
++;
199 static OnigCaptureTreeNode
*
200 history_tree_clone(OnigCaptureTreeNode
* node
)
203 OnigCaptureTreeNode
*clone
, *child
;
205 clone
= history_node_new();
206 CHECK_NULL_RETURN(clone
);
208 clone
->beg
= node
->beg
;
209 clone
->end
= node
->end
;
210 for (i
= 0; i
< node
->num_childs
; i
++) {
211 child
= history_tree_clone(node
->childs
[i
]);
212 if (IS_NULL(child
)) {
213 history_tree_free(clone
);
214 return (OnigCaptureTreeNode
* )0;
216 r
= history_tree_add_child(clone
, child
);
218 history_tree_free(child
);
219 history_tree_free(clone
);
220 return (OnigCaptureTreeNode
* )0;
227 extern OnigCaptureTreeNode
*
228 onig_get_capture_tree(OnigRegion
* region
)
230 return region
->history_root
;
232 #endif /* USE_CAPTURE_HISTORY */
234 #ifdef USE_MATCH_CACHE
237 Glossary for "match cache"
239 "match cache" or "match cache optimization"
240 The `Regexp#match` optimization by using a cache.
243 A cacheable opcode (e.g. `OP_PUSH`, `OP_REPEAT`, etc).
244 It is corresponding to some cache points.
247 A cacheable point on matching.
248 Usually, one-to-one corresponding between a cache opcode and a cache point exists,
249 but cache opcodes between `OP_REPEAT` and `OP_REPEAT_INC` have some corresponding
250 cache points depending on repetition counts.
253 A pair of a cache point and a position on an input string.
254 We encode a match cache point to an integer value by the following equation:
255 "match cache point" = "position on input string" * "total number of cache points" + "cache point"
258 A bit-array for memoizing (recording) match cache points once backtracked.
261 static OnigPosition
count_num_cache_opcodes_inner(
263 MemNumType current_repeat_mem
, int lookaround_nesting
,
264 UChar
** pp
, long* num_cache_opcodes_ptr
268 UChar
* pend
= reg
->p
+ reg
->used
;
270 MemNumType repeat_mem
;
271 OnigEncoding enc
= reg
->enc
;
272 long num_cache_opcodes
= *num_cache_opcodes_ptr
;
281 case OP_EXACT1
: p
++; break;
282 case OP_EXACT2
: p
+= 2; break;
283 case OP_EXACT3
: p
+= 3; break;
284 case OP_EXACT4
: p
+= 4; break;
285 case OP_EXACT5
: p
+= 5; break;
287 GET_LENGTH_INC(len
, p
); p
+= len
; break;
288 case OP_EXACTMB2N1
: p
+= 2; break;
289 case OP_EXACTMB2N2
: p
+= 4; break;
290 case OP_EXACTMB2N3
: p
+= 6; break;
292 GET_LENGTH_INC(len
, p
); p
+= len
* 2; break;
294 GET_LENGTH_INC(len
, p
); p
+= len
* 3; break;
298 GET_LENGTH_INC(mb_len
, p
);
299 GET_LENGTH_INC(len
, p
);
305 len
= enclen(enc
, p
, pend
); p
+= len
; break;
307 GET_LENGTH_INC(len
, p
); p
+= len
; break;
311 p
+= SIZE_BITSET
; break;
313 case OP_CCLASS_MB_NOT
:
314 GET_LENGTH_INC(len
, p
); p
+= len
; break;
316 case OP_CCLASS_MIX_NOT
:
318 GET_LENGTH_INC(len
, p
);
325 case OP_ANYCHAR_STAR
:
326 case OP_ANYCHAR_ML_STAR
:
327 num_cache_opcodes
++; break;
328 case OP_ANYCHAR_STAR_PEEK_NEXT
:
329 case OP_ANYCHAR_ML_STAR_PEEK_NEXT
:
330 p
++; num_cache_opcodes
++; break;
335 case OP_NOT_WORD_BOUND
:
341 case OP_NOT_ASCII_WORD
:
342 case OP_ASCII_WORD_BOUND
:
343 case OP_NOT_ASCII_WORD_BOUND
:
344 case OP_ASCII_WORD_BEGIN
:
345 case OP_ASCII_WORD_END
:
352 case OP_SEMI_END_BUF
:
353 case OP_BEGIN_POSITION
:
360 case OP_BACKREF_MULTI
:
361 case OP_BACKREF_MULTI_IC
:
362 case OP_BACKREF_WITH_LEVEL
:
365 case OP_MEMORY_START
:
366 case OP_MEMORY_START_PUSH
:
367 case OP_MEMORY_END_PUSH
:
368 case OP_MEMORY_END_PUSH_REC
:
370 case OP_MEMORY_END_REC
:
372 // A memory (capture) in look-around is found.
373 if (lookaround_nesting
!= 0) {
392 case OP_PUSH_OR_JUMP_EXACT1
:
393 case OP_PUSH_IF_PEEK_NEXT
:
394 p
+= SIZE_RELADDR
+ 1; num_cache_opcodes
++; break;
397 if (current_repeat_mem
!= -1) {
398 // A nested OP_REPEAT is not yet supported.
401 GET_MEMNUM_INC(repeat_mem
, p
);
403 if (reg
->repeat_range
[repeat_mem
].lower
== 0 && reg
->repeat_range
[repeat_mem
].upper
== 0) {
404 long dummy_num_cache_opcodes
= 0;
405 result
= count_num_cache_opcodes_inner(reg
, repeat_mem
, lookaround_nesting
, &p
, &dummy_num_cache_opcodes
);
406 if (result
< 0 || dummy_num_cache_opcodes
< 0) {
410 if (reg
->repeat_range
[repeat_mem
].lower
== 0) {
413 result
= count_num_cache_opcodes_inner(reg
, repeat_mem
, lookaround_nesting
, &p
, &num_cache_opcodes
);
414 if (result
< 0 || num_cache_opcodes
< 0) {
417 OnigRepeatRange
*repeat_range
= ®
->repeat_range
[repeat_mem
];
418 if (repeat_range
->lower
< repeat_range
->upper
) {
424 case OP_REPEAT_INC_NG
:
425 GET_MEMNUM_INC(repeat_mem
, p
);
426 if (repeat_mem
!= current_repeat_mem
) {
427 // A lone or invalid OP_REPEAT_INC is found.
431 case OP_REPEAT_INC_SG
:
432 case OP_REPEAT_INC_NG_SG
:
434 case OP_NULL_CHECK_START
:
437 case OP_NULL_CHECK_END
:
438 case OP_NULL_CHECK_END_MEMST_PUSH
:
441 case OP_NULL_CHECK_END_MEMST
:
446 if (lookaround_nesting
< 0) {
447 // A look-around nested in a atomic grouping is found.
450 result
= count_num_cache_opcodes_inner(reg
, current_repeat_mem
, lookaround_nesting
+ 1, &p
, &num_cache_opcodes
);
451 if (result
< 0 || num_cache_opcodes
< 0) {
455 case OP_PUSH_POS_NOT
:
456 if (lookaround_nesting
< 0) {
457 // A look-around nested in a atomic grouping is found.
461 result
= count_num_cache_opcodes_inner(reg
, current_repeat_mem
, lookaround_nesting
+ 1, &p
, &num_cache_opcodes
);
462 if (result
< 0 || num_cache_opcodes
< 0) {
466 case OP_PUSH_LOOK_BEHIND_NOT
:
467 if (lookaround_nesting
< 0) {
468 // A look-around nested in a atomic grouping is found.
473 result
= count_num_cache_opcodes_inner(reg
, current_repeat_mem
, lookaround_nesting
+ 1, &p
, &num_cache_opcodes
);
474 if (result
< 0 || num_cache_opcodes
< 0) {
478 case OP_PUSH_STOP_BT
:
479 if (lookaround_nesting
!= 0) {
480 // A nested atomic grouping is found.
483 result
= count_num_cache_opcodes_inner(reg
, current_repeat_mem
, -1, &p
, &num_cache_opcodes
);
484 if (result
< 0 || num_cache_opcodes
< 0) {
490 case OP_FAIL_LOOK_BEHIND_NOT
:
497 case OP_PUSH_ABSENT_POS
:
509 case OP_STATE_CHECK_PUSH
:
510 case OP_STATE_CHECK_PUSH_OR_JUMP
:
512 case OP_STATE_CHECK_ANYCHAR_STAR
:
513 case OP_STATE_CHECK_ANYCHAR_ML_STAR
:
516 case OP_SET_OPTION_PUSH
:
528 *num_cache_opcodes_ptr
= num_cache_opcodes
;
532 *num_cache_opcodes_ptr
= num_cache_opcodes
;
536 *num_cache_opcodes_ptr
= NUM_CACHE_OPCODES_IMPOSSIBLE
;
540 return ONIGERR_UNDEFINED_BYTECODE
;
543 /* count the total number of cache opcodes for allocating a match cache buffer. */
545 count_num_cache_opcodes(const regex_t
* reg
, long* num_cache_opcodes_ptr
)
548 *num_cache_opcodes_ptr
= 0;
549 OnigPosition result
= count_num_cache_opcodes_inner(reg
, -1, 0, &p
, num_cache_opcodes_ptr
);
550 if (result
== 0 && *num_cache_opcodes_ptr
>= 0 && p
!= reg
->p
+ reg
->used
) {
551 return ONIGERR_UNDEFINED_BYTECODE
;
558 init_cache_opcodes_inner(
560 MemNumType current_repeat_mem
, int lookaround_nesting
,
561 OnigCacheOpcode
** cache_opcodes_ptr
, UChar
** pp
, long* num_cache_points_ptr
565 UChar
* pend
= reg
->p
+ reg
->used
;
568 MemNumType repeat_mem
;
569 OnigEncoding enc
= reg
->enc
;
570 long cache_point
= *num_cache_points_ptr
;
571 OnigCacheOpcode
*cache_opcodes
= *cache_opcodes_ptr
;
574 # define INC_CACHE_OPCODES if (cache_opcodes != NULL) {\
575 cache_opcodes->addr = pbegin;\
576 cache_opcodes->cache_point = cache_point;\
577 cache_opcodes->outer_repeat_mem = current_repeat_mem;\
578 cache_opcodes->num_cache_points_at_outer_repeat = 0;\
579 cache_opcodes->num_cache_points_in_outer_repeat = 0;\
580 cache_opcodes->lookaround_nesting = lookaround_nesting;\
581 cache_opcodes->match_addr = NULL;\
582 cache_point += lookaround_nesting != 0 ? 2 : 1;\
593 case OP_EXACT1
: p
++; break;
594 case OP_EXACT2
: p
+= 2; break;
595 case OP_EXACT3
: p
+= 3; break;
596 case OP_EXACT4
: p
+= 4; break;
597 case OP_EXACT5
: p
+= 5; break;
599 GET_LENGTH_INC(len
, p
); p
+= len
; break;
600 case OP_EXACTMB2N1
: p
+= 2; break;
601 case OP_EXACTMB2N2
: p
+= 4; break;
602 case OP_EXACTMB2N3
: p
+= 6; break;
604 GET_LENGTH_INC(len
, p
); p
+= len
* 2; break;
606 GET_LENGTH_INC(len
, p
); p
+= len
* 3; break;
610 GET_LENGTH_INC(mb_len
, p
);
611 GET_LENGTH_INC(len
, p
);
617 len
= enclen(enc
, p
, pend
); p
+= len
; break;
619 GET_LENGTH_INC(len
, p
); p
+= len
; break;
623 p
+= SIZE_BITSET
; break;
625 case OP_CCLASS_MB_NOT
:
626 GET_LENGTH_INC(len
, p
); p
+= len
; break;
628 case OP_CCLASS_MIX_NOT
:
630 GET_LENGTH_INC(len
, p
);
637 case OP_ANYCHAR_STAR
:
638 case OP_ANYCHAR_ML_STAR
:
641 case OP_ANYCHAR_STAR_PEEK_NEXT
:
642 case OP_ANYCHAR_ML_STAR_PEEK_NEXT
:
650 case OP_NOT_WORD_BOUND
:
656 case OP_NOT_ASCII_WORD
:
657 case OP_ASCII_WORD_BOUND
:
658 case OP_NOT_ASCII_WORD_BOUND
:
659 case OP_ASCII_WORD_BEGIN
:
660 case OP_ASCII_WORD_END
:
667 case OP_SEMI_END_BUF
:
668 case OP_BEGIN_POSITION
:
675 case OP_BACKREF_MULTI
:
676 case OP_BACKREF_MULTI_IC
:
677 case OP_BACKREF_WITH_LEVEL
:
678 goto unexpected_bytecode_error
;
680 case OP_MEMORY_START
:
681 case OP_MEMORY_START_PUSH
:
682 case OP_MEMORY_END_PUSH
:
683 case OP_MEMORY_END_PUSH_REC
:
685 case OP_MEMORY_END_REC
:
687 if (lookaround_nesting
!= 0) {
688 goto unexpected_bytecode_error
;
706 case OP_PUSH_OR_JUMP_EXACT1
:
707 case OP_PUSH_IF_PEEK_NEXT
:
708 p
+= SIZE_RELADDR
+ 1;
713 GET_MEMNUM_INC(repeat_mem
, p
);
715 if (reg
->repeat_range
[repeat_mem
].lower
== 0 && reg
->repeat_range
[repeat_mem
].upper
== 0) {
716 long dummy_num_cache_points
= 0;
717 OnigCacheOpcode
* dummy_cache_opcodes
= NULL
;
718 result
= init_cache_opcodes_inner(reg
, repeat_mem
, lookaround_nesting
, &dummy_cache_opcodes
, &p
, &dummy_num_cache_points
);
723 if (reg
->repeat_range
[repeat_mem
].lower
== 0) {
727 long num_cache_points_in_repeat
= 0;
728 long num_cache_points_at_repeat
= cache_point
;
729 OnigCacheOpcode
* cache_opcodes_in_repeat
= cache_opcodes
;
730 result
= init_cache_opcodes_inner(reg
, repeat_mem
, lookaround_nesting
, &cache_opcodes
, &p
, &num_cache_points_in_repeat
);
734 OnigRepeatRange
*repeat_range
= ®
->repeat_range
[repeat_mem
];
735 if (repeat_range
->lower
< repeat_range
->upper
) {
737 cache_point
-= lookaround_nesting
!= 0 ? 2 : 1;
739 int repeat_bounds
= repeat_range
->upper
== 0x7fffffff ? 1 : repeat_range
->upper
- repeat_range
->lower
;
740 cache_point
+= num_cache_points_in_repeat
* repeat_range
->lower
+ (num_cache_points_in_repeat
+ (lookaround_nesting
!= 0 ? 2 : 1)) * repeat_bounds
;
741 for (; cache_opcodes_in_repeat
< cache_opcodes
; cache_opcodes_in_repeat
++) {
742 cache_opcodes_in_repeat
->num_cache_points_at_outer_repeat
= num_cache_points_at_repeat
;
743 cache_opcodes_in_repeat
->num_cache_points_in_outer_repeat
= num_cache_points_in_repeat
;
749 case OP_REPEAT_INC_NG
:
752 case OP_REPEAT_INC_SG
:
753 case OP_REPEAT_INC_NG_SG
:
754 goto unexpected_bytecode_error
;
755 case OP_NULL_CHECK_START
:
758 case OP_NULL_CHECK_END
:
759 case OP_NULL_CHECK_END_MEMST_PUSH
:
762 case OP_NULL_CHECK_END_MEMST
:
769 OnigCacheOpcode
* cache_opcodes_in_lookaround
= cache_opcodes
;
770 result
= init_cache_opcodes_inner(reg
, current_repeat_mem
, lookaround_nesting
+ 1, &cache_opcodes
, &p
, &cache_point
);
774 UChar
* match_addr
= p
- 1;
775 for (; cache_opcodes_in_lookaround
< cache_opcodes
; cache_opcodes_in_lookaround
++) {
776 if (cache_opcodes_in_lookaround
->match_addr
== NULL
) {
777 cache_opcodes_in_lookaround
->match_addr
= match_addr
;
782 case OP_PUSH_POS_NOT
:
785 case OP_PUSH_LOOK_BEHIND_NOT
:
789 case OP_PUSH_STOP_BT
:
791 OnigCacheOpcode
* cache_opcodes_in_atomic
= cache_opcodes
;
792 result
= init_cache_opcodes_inner(reg
, current_repeat_mem
, -1, &cache_opcodes
, &p
, &cache_point
);
796 UChar
* match_addr
= p
- 1;
797 for (; cache_opcodes_in_atomic
< cache_opcodes
; cache_opcodes_in_atomic
++) {
798 if (cache_opcodes_in_atomic
->match_addr
== NULL
) {
799 cache_opcodes_in_atomic
->match_addr
= match_addr
;
806 case OP_FAIL_LOOK_BEHIND_NOT
:
815 goto unexpected_bytecode_error
;
819 goto unexpected_bytecode_error
;
822 goto unexpected_bytecode_error
;
824 case OP_STATE_CHECK_PUSH
:
825 case OP_STATE_CHECK_PUSH_OR_JUMP
:
827 case OP_STATE_CHECK_ANYCHAR_STAR
:
828 case OP_STATE_CHECK_ANYCHAR_ML_STAR
:
829 goto unexpected_bytecode_error
;
831 case OP_SET_OPTION_PUSH
:
842 *cache_opcodes_ptr
= cache_opcodes
;
844 *num_cache_points_ptr
= cache_point
;
850 unexpected_bytecode_error
:
851 return ONIGERR_UNEXPECTED_BYTECODE
;
854 return ONIGERR_UNDEFINED_BYTECODE
;
857 /* collect cache opcodes from the given regex program, and compute the total number of cache points. */
859 init_cache_opcodes(const regex_t
* reg
, OnigCacheOpcode
* cache_opcodes_ptr
, long* num_cache_points_ptr
)
862 *num_cache_points_ptr
= 0;
863 OnigPosition result
= init_cache_opcodes_inner(reg
, -1, 0, &cache_opcodes_ptr
, &p
, num_cache_points_ptr
);
864 if (result
== 0 && p
!= reg
->p
+ reg
->used
) {
865 return ONIGERR_UNDEFINED_BYTECODE
;
872 count_num_cache_opcodes(regex_t
* reg
, long* num_cache_opcodes
)
874 *num_cache_opcodes
= NUM_CACHE_OPCODES_IMPOSSIBLE
;
877 #endif /* USE_MATCH_CACHE */
880 onig_check_linear_time(OnigRegexType
* reg
)
882 long num_cache_opcodes
= 0;
883 count_num_cache_opcodes(reg
, &num_cache_opcodes
);
884 return num_cache_opcodes
!= NUM_CACHE_OPCODES_IMPOSSIBLE
;
888 onig_region_clear(OnigRegion
* region
)
892 for (i
= 0; i
< region
->num_regs
; i
++) {
893 region
->beg
[i
] = region
->end
[i
] = ONIG_REGION_NOTPOS
;
895 #ifdef USE_CAPTURE_HISTORY
896 history_root_free(region
);
901 onig_region_resize(OnigRegion
* region
, int n
)
903 region
->num_regs
= n
;
905 if (n
< ONIG_NREGION
)
908 if (region
->allocated
== 0) {
909 region
->beg
= (OnigPosition
* )xmalloc(n
* sizeof(OnigPosition
));
910 if (region
->beg
== 0)
911 return ONIGERR_MEMORY
;
913 region
->end
= (OnigPosition
* )xmalloc(n
* sizeof(OnigPosition
));
914 if (region
->end
== 0) {
916 return ONIGERR_MEMORY
;
919 region
->allocated
= n
;
921 else if (region
->allocated
< n
) {
924 region
->allocated
= 0;
925 tmp
= (OnigPosition
* )xrealloc(region
->beg
, n
* sizeof(OnigPosition
));
929 return ONIGERR_MEMORY
;
932 tmp
= (OnigPosition
* )xrealloc(region
->end
, n
* sizeof(OnigPosition
));
936 return ONIGERR_MEMORY
;
940 region
->allocated
= n
;
947 onig_region_resize_clear(OnigRegion
* region
, int n
)
951 r
= onig_region_resize(region
, n
);
952 if (r
!= 0) return r
;
953 onig_region_clear(region
);
958 onig_region_set(OnigRegion
* region
, int at
, int beg
, int end
)
960 if (at
< 0) return ONIGERR_INVALID_ARGUMENT
;
962 if (at
>= region
->allocated
) {
963 int r
= onig_region_resize(region
, at
+ 1);
967 region
->beg
[at
] = beg
;
968 region
->end
[at
] = end
;
973 onig_region_init(OnigRegion
* region
)
975 region
->num_regs
= 0;
976 region
->allocated
= 0;
977 region
->beg
= (OnigPosition
* )0;
978 region
->end
= (OnigPosition
* )0;
979 #ifdef USE_CAPTURE_HISTORY
980 region
->history_root
= (OnigCaptureTreeNode
* )0;
985 onig_region_new(void)
989 r
= (OnigRegion
* )xmalloc(sizeof(OnigRegion
));
996 onig_region_free(OnigRegion
* r
, int free_self
)
999 if (r
->allocated
> 0) {
1003 #ifdef USE_CAPTURE_HISTORY
1004 history_root_free(r
);
1010 memset(r
, 0, sizeof(OnigRegion
));
1016 onig_region_copy(OnigRegion
* to
, const OnigRegion
* from
)
1018 #define RREGC_SIZE (sizeof(int) * from->num_regs)
1021 if (to
== from
) return;
1023 r
= onig_region_resize(to
, from
->num_regs
);
1026 for (i
= 0; i
< from
->num_regs
; i
++) {
1027 to
->beg
[i
] = from
->beg
[i
];
1028 to
->end
[i
] = from
->end
[i
];
1030 to
->num_regs
= from
->num_regs
;
1032 #ifdef USE_CAPTURE_HISTORY
1033 history_root_free(to
);
1035 if (IS_NOT_NULL(from
->history_root
)) {
1036 to
->history_root
= history_tree_clone(from
->history_root
);
1043 #define INVALID_STACK_INDEX -1
1046 /* used by normal-POP */
1047 #define STK_ALT 0x0001
1048 #define STK_LOOK_BEHIND_NOT 0x0002
1049 #define STK_POS_NOT 0x0003
1050 /* handled by normal-POP */
1051 #define STK_MEM_START 0x0100
1052 #define STK_MEM_END 0x8200
1053 #define STK_REPEAT_INC 0x0300
1054 #define STK_STATE_CHECK_MARK 0x1000
1055 /* avoided by normal-POP */
1056 #define STK_NULL_CHECK_START 0x3000
1057 #define STK_NULL_CHECK_END 0x5000 /* for recursive call */
1058 #define STK_MEM_END_MARK 0x8400
1059 #define STK_POS 0x0500 /* used when POP-POS */
1060 #define STK_STOP_BT 0x0600 /* mark for "(?>...)" */
1061 #define STK_REPEAT 0x0700
1062 #define STK_CALL_FRAME 0x0800
1063 #define STK_RETURN 0x0900
1064 #define STK_VOID 0x0a00 /* for fill a blank */
1065 #define STK_ABSENT_POS 0x0b00 /* for absent */
1066 #define STK_ABSENT 0x0c00 /* absent inner loop marker */
1067 #define STK_MATCH_CACHE_POINT 0x0d00 /* for the match cache optimization */
1068 #define STK_ATOMIC_MATCH_CACHE_POINT 0x0e00
1070 /* stack type check mask */
1071 #define STK_MASK_POP_USED 0x00ff
1072 #define STK_MASK_TO_VOID_TARGET 0x10ff
1073 #define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
1075 #ifdef USE_MATCH_CACHE
1076 #define MATCH_ARG_INIT_MATCH_CACHE(msa) do {\
1077 (msa).match_cache_status = MATCH_CACHE_STATUS_UNINIT;\
1078 (msa).num_fails = 0;\
1079 (msa).num_cache_opcodes = NUM_CACHE_OPCODES_UNINIT;\
1080 (msa).cache_opcodes = (OnigCacheOpcode*)NULL;\
1081 (msa).num_cache_points = 0;\
1082 (msa).match_cache_buf = (uint8_t*)NULL;\
1084 #define MATCH_ARG_FREE_MATCH_CACHE(msa) do {\
1085 xfree((msa).cache_opcodes);\
1086 xfree((msa).match_cache_buf);\
1087 (msa).cache_opcodes = (OnigCacheOpcode*)NULL;\
1088 (msa).match_cache_buf = (uint8_t*)NULL;\
1091 #define MATCH_ARG_INIT_MATCH_CACHE(msa)
1092 #define MATCH_ARG_FREE_MATCH_CACHE(msa)
1095 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1096 # define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
1097 (msa).stack_p = (void* )0;\
1098 (msa).options = (arg_option);\
1099 (msa).region = (arg_region);\
1100 (msa).start = (arg_start);\
1101 (msa).gpos = (arg_gpos);\
1102 (msa).best_len = ONIG_MISMATCH;\
1104 (msa).end_time = 0;\
1105 MATCH_ARG_INIT_MATCH_CACHE(msa);\
1108 # define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
1109 (msa).stack_p = (void* )0;\
1110 (msa).options = (arg_option);\
1111 (msa).region = (arg_region);\
1112 (msa).start = (arg_start);\
1113 (msa).gpos = (arg_gpos);\
1115 (msa).end_time = 0;\
1116 MATCH_ARG_INIT_MATCH_CACHE(msa);\
1120 #ifdef USE_COMBINATION_EXPLOSION_CHECK
1122 # define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16
1124 # define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \
1125 if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
1126 unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\
1127 offset = ((offset) * (state_num)) >> 3;\
1128 if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\
1129 if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) {\
1130 (msa).state_check_buff = (void* )xmalloc(size);\
1131 CHECK_NULL_RETURN_MEMERR((msa).state_check_buff);\
1134 (msa).state_check_buff = (void* )xalloca(size);\
1135 xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \
1136 (size_t )(size - (offset))); \
1137 (msa).state_check_buff_size = size;\
1140 (msa).state_check_buff = (void* )0;\
1141 (msa).state_check_buff_size = 0;\
1145 (msa).state_check_buff = (void* )0;\
1146 (msa).state_check_buff_size = 0;\
1150 # define MATCH_ARG_FREE(msa) do {\
1151 xfree((msa).stack_p);\
1152 if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
1153 xfree((msa).state_check_buff);\
1155 MATCH_ARG_FREE_MATCH_CACHE(msa);\
1157 #else /* USE_COMBINATION_EXPLOSION_CHECK */
1158 # define MATCH_ARG_FREE(msa) do {\
1159 xfree((msa).stack_p);\
1160 MATCH_ARG_FREE_MATCH_CACHE(msa);\
1162 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
1166 #define MAX_PTR_NUM 100
1168 #define STACK_INIT(alloc_addr, heap_addr, ptr_num, stack_num) do {\
1169 if (ptr_num > MAX_PTR_NUM) {\
1170 alloc_addr = (char* )xmalloc(sizeof(OnigStackIndex) * (ptr_num));\
1171 heap_addr = alloc_addr;\
1172 if (msa->stack_p) {\
1173 stk_alloc = (OnigStackType* )(msa->stack_p);\
1174 stk_base = stk_alloc;\
1176 stk_end = stk_base + msa->stack_n;\
1178 stk_alloc = (OnigStackType* )xalloca(sizeof(OnigStackType) * (stack_num));\
1179 stk_base = stk_alloc;\
1181 stk_end = stk_base + (stack_num);\
1183 } else if (msa->stack_p) {\
1184 alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num));\
1186 stk_alloc = (OnigStackType* )(msa->stack_p);\
1187 stk_base = stk_alloc;\
1189 stk_end = stk_base + msa->stack_n;\
1192 alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num)\
1193 + sizeof(OnigStackType) * (stack_num));\
1195 stk_alloc = (OnigStackType* )(alloc_addr + sizeof(OnigStackIndex) * (ptr_num));\
1196 stk_base = stk_alloc;\
1198 stk_end = stk_base + (stack_num);\
1202 #define STACK_SAVE do{\
1203 if (stk_base != stk_alloc) {\
1204 msa->stack_p = stk_base;\
1205 msa->stack_n = stk_end - stk_base; /* TODO: check overflow */\
1209 static unsigned int MatchStackLimitSize
= DEFAULT_MATCH_STACK_LIMIT_SIZE
;
1212 onig_get_match_stack_limit_size(void)
1214 return MatchStackLimitSize
;
1218 onig_set_match_stack_limit_size(unsigned int size
)
1220 MatchStackLimitSize
= size
;
1225 stack_double(OnigStackType
** arg_stk_base
, OnigStackType
** arg_stk_end
,
1226 OnigStackType
** arg_stk
, OnigStackType
* stk_alloc
, OnigMatchArg
* msa
)
1229 OnigStackType
*x
, *stk_base
, *stk_end
, *stk
;
1231 stk_base
= *arg_stk_base
;
1232 stk_end
= *arg_stk_end
;
1235 n
= stk_end
- stk_base
;
1236 if (stk_base
== stk_alloc
&& IS_NULL(msa
->stack_p
)) {
1237 x
= (OnigStackType
* )xmalloc(sizeof(OnigStackType
) * n
* 2);
1240 return ONIGERR_MEMORY
;
1242 xmemcpy(x
, stk_base
, n
* sizeof(OnigStackType
));
1246 unsigned int limit_size
= MatchStackLimitSize
;
1248 if (limit_size
!= 0 && n
> limit_size
) {
1249 if ((unsigned int )(stk_end
- stk_base
) == limit_size
)
1250 return ONIGERR_MATCH_STACK_LIMIT_OVER
;
1254 x
= (OnigStackType
* )xrealloc(stk_base
, sizeof(OnigStackType
) * n
);
1257 return ONIGERR_MEMORY
;
1260 *arg_stk
= x
+ (stk
- stk_base
);
1262 *arg_stk_end
= x
+ n
;
1266 #define STACK_ENSURE(n) do {\
1267 if (stk_end - stk < (n)) {\
1268 int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\
1271 xfree(xmalloc_base);\
1277 #define STACK_AT(index) (stk_base + (index))
1278 #define GET_STACK_INDEX(stk) ((stk) - stk_base)
1280 #define STACK_PUSH_TYPE(stack_type) do {\
1282 stk->type = (stack_type);\
1283 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1287 #define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
1289 #ifdef USE_COMBINATION_EXPLOSION_CHECK
1290 # define STATE_CHECK_POS(s,snum) \
1291 (((s) - str) * num_comb_exp_check + ((snum) - 1))
1292 # define STATE_CHECK_VAL(v,snum) do {\
1293 if (state_check_buff != NULL) {\
1294 ptrdiff_t x = STATE_CHECK_POS(s,snum);\
1295 (v) = state_check_buff[x/8] & (1<<(x%8));\
1301 # define ELSE_IF_STATE_CHECK_MARK(stk) \
1302 else if ((stk)->type == STK_STATE_CHECK_MARK) { \
1303 ptrdiff_t x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\
1304 state_check_buff[x/8] |= (1<<(x%8)); \
1307 # define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\
1309 stk->type = (stack_type);\
1310 stk->u.state.pcode = (pat);\
1311 stk->u.state.pstr = (s);\
1312 stk->u.state.pstr_prev = (sprev);\
1313 stk->u.state.state_check = 0;\
1314 stk->u.state.pkeep = (keep);\
1318 # define STACK_PUSH_ENSURED(stack_type,pat) do {\
1319 stk->type = (stack_type);\
1320 stk->u.state.pcode = (pat);\
1321 stk->u.state.state_check = 0;\
1325 # define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum,keep) do {\
1327 stk->type = STK_ALT;\
1328 stk->u.state.pcode = (pat);\
1329 stk->u.state.pstr = (s);\
1330 stk->u.state.pstr_prev = (sprev);\
1331 stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\
1332 stk->u.state.pkeep = (keep);\
1336 # define STACK_PUSH_STATE_CHECK(s,snum) do {\
1337 if (state_check_buff != NULL) {\
1339 stk->type = STK_STATE_CHECK_MARK;\
1340 stk->u.state.pstr = (s);\
1341 stk->u.state.state_check = (snum);\
1346 #else /* USE_COMBINATION_EXPLOSION_CHECK */
1348 # define ELSE_IF_STATE_CHECK_MARK(stk)
1350 # define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\
1352 stk->type = (stack_type);\
1353 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1354 stk->u.state.pcode = (pat);\
1355 stk->u.state.pstr = (s);\
1356 stk->u.state.pstr_prev = (sprev);\
1357 stk->u.state.pkeep = (keep);\
1361 # define STACK_PUSH_ENSURED(stack_type,pat) do {\
1362 stk->type = (stack_type);\
1363 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1364 stk->u.state.pcode = (pat);\
1367 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
1369 #define STACK_PUSH_ALT(pat,s,sprev,keep) STACK_PUSH(STK_ALT,pat,s,sprev,keep)
1370 #define STACK_PUSH_POS(s,sprev,keep) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev,keep)
1371 #define STACK_PUSH_POS_NOT(pat,s,sprev,keep) STACK_PUSH(STK_POS_NOT,pat,s,sprev,keep)
1372 #define STACK_PUSH_ABSENT STACK_PUSH_TYPE(STK_ABSENT)
1373 #define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT)
1374 #define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev,keep) \
1375 STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev,keep)
1377 #define STACK_PUSH_REPEAT(id, pat) do {\
1379 stk->type = STK_REPEAT;\
1380 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1381 stk->u.repeat.num = (id);\
1382 stk->u.repeat.pcode = (pat);\
1383 stk->u.repeat.count = 0;\
1387 #define STACK_PUSH_REPEAT_INC(sindex) do {\
1389 stk->type = STK_REPEAT_INC;\
1390 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1391 stk->u.repeat_inc.si = (sindex);\
1395 #define STACK_PUSH_MEM_START(mnum, s) do {\
1397 stk->type = STK_MEM_START;\
1398 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1399 stk->u.mem.num = (mnum);\
1400 stk->u.mem.pstr = (s);\
1401 stk->u.mem.start = mem_start_stk[mnum];\
1402 stk->u.mem.end = mem_end_stk[mnum];\
1403 mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
1404 mem_end_stk[mnum] = INVALID_STACK_INDEX;\
1408 #define STACK_PUSH_MEM_END(mnum, s) do {\
1410 stk->type = STK_MEM_END;\
1411 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1412 stk->u.mem.num = (mnum);\
1413 stk->u.mem.pstr = (s);\
1414 stk->u.mem.start = mem_start_stk[mnum];\
1415 stk->u.mem.end = mem_end_stk[mnum];\
1416 mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
1420 #define STACK_PUSH_MEM_END_MARK(mnum) do {\
1422 stk->type = STK_MEM_END_MARK;\
1423 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1424 stk->u.mem.num = (mnum);\
1428 #define STACK_GET_MEM_START(mnum, k) do {\
1431 while (k > stk_base) {\
1433 if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
1434 && k->u.mem.num == (mnum)) {\
1437 else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
1438 if (level == 0) break;\
1444 #define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
1447 if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
1448 if (level == 0) (start) = k->u.mem.pstr;\
1451 else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
1454 (end) = k->u.mem.pstr;\
1462 #define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\
1464 stk->type = STK_NULL_CHECK_START;\
1465 stk->null_check = (OnigStackIndex)(stk - stk_base);\
1466 stk->u.null_check.num = (cnum);\
1467 stk->u.null_check.pstr = (s);\
1471 #define STACK_PUSH_NULL_CHECK_END(cnum) do {\
1473 stk->type = STK_NULL_CHECK_END;\
1474 stk->null_check = (OnigStackIndex)(stk - stk_base);\
1475 stk->u.null_check.num = (cnum);\
1479 #define STACK_PUSH_CALL_FRAME(pat) do {\
1481 stk->type = STK_CALL_FRAME;\
1482 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1483 stk->u.call_frame.ret_addr = (pat);\
1487 #define STACK_PUSH_RETURN do {\
1489 stk->type = STK_RETURN;\
1490 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1494 #define STACK_PUSH_ABSENT_POS(start, end) do {\
1496 stk->type = STK_ABSENT_POS;\
1497 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1498 stk->u.absent_pos.abs_pstr = (start);\
1499 stk->u.absent_pos.end_pstr = (end);\
1503 #define STACK_PUSH_MATCH_CACHE_POINT(match_cache_point_index, match_cache_point_mask) do {\
1505 stk->type = STK_MATCH_CACHE_POINT;\
1506 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1507 stk->u.match_cache_point.index = (match_cache_point_index);\
1508 stk->u.match_cache_point.mask = (match_cache_point_mask);\
1514 # define STACK_BASE_CHECK(p, at) \
1515 if ((p) < stk_base) {\
1516 fprintf(stderr, "at %s\n", at);\
1520 # define STACK_BASE_CHECK(p, at)
1523 #ifdef ONIG_DEBUG_MATCH_CACHE
1524 # define MATCH_CACHE_DEBUG_MEMOIZE(stkp) fprintf(stderr, "MATCH CACHE: memoize (index=%ld mask=%d)\n", stkp->u.match_cache_point.index, stkp->u.match_cache_point.mask);
1526 # define MATCH_CACHE_DEBUG_MEMOIZE(stkp) ((void) 0)
1529 #ifdef USE_MATCH_CACHE
1530 # define INC_NUM_FAILS msa->num_fails++
1531 # define MEMOIZE_MATCH_CACHE_POINT do {\
1532 if (stk->type == STK_MATCH_CACHE_POINT) {\
1533 msa->match_cache_buf[stk->u.match_cache_point.index] |= stk->u.match_cache_point.mask;\
1534 MATCH_CACHE_DEBUG_MEMOIZE(stk);\
1535 } else if (stk->type == STK_ATOMIC_MATCH_CACHE_POINT) {\
1536 memoize_extended_match_cache_point(msa->match_cache_buf, stk->u.match_cache_point.index, stk->u.match_cache_point.mask);\
1537 MATCH_CACHE_DEBUG_MEMOIZE(stkp);\
1540 # define MEMOIZE_LOOKAROUND_MATCH_CACHE_POINT(stkp) do {\
1541 if (stkp->type == STK_MATCH_CACHE_POINT) {\
1542 stkp->type = STK_VOID;\
1543 memoize_extended_match_cache_point(msa->match_cache_buf, stkp->u.match_cache_point.index, stkp->u.match_cache_point.mask);\
1544 MATCH_CACHE_DEBUG_MEMOIZE(stkp);\
1547 # define MEMOIZE_ATOMIC_MATCH_CACHE_POINT do {\
1548 if (stk->type == STK_MATCH_CACHE_POINT) {\
1549 memoize_extended_match_cache_point(msa->match_cache_buf, stk->u.match_cache_point.index, stk->u.match_cache_point.mask);\
1550 MATCH_CACHE_DEBUG_MEMOIZE(stkp);\
1554 # define INC_NUM_FAILS ((void) 0)
1555 # define MEMOIZE_MATCH_CACHE_POINT ((void) 0)
1556 # define MEMOIZE_LOOKAROUND_MATCH_CACHE_POINT(stkp) ((void) 0)
1559 #define STACK_POP_ONE do {\
1561 STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
1564 #define STACK_POP do {\
1565 switch (pop_level) {\
1566 case STACK_POP_LEVEL_FREE:\
1569 STACK_BASE_CHECK(stk, "STACK_POP"); \
1570 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1571 ELSE_IF_STATE_CHECK_MARK(stk);\
1572 MEMOIZE_MATCH_CACHE_POINT;\
1575 case STACK_POP_LEVEL_MEM_START:\
1578 STACK_BASE_CHECK(stk, "STACK_POP 2"); \
1579 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1580 else if (stk->type == STK_MEM_START) {\
1581 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1582 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1584 ELSE_IF_STATE_CHECK_MARK(stk);\
1585 MEMOIZE_MATCH_CACHE_POINT;\
1591 STACK_BASE_CHECK(stk, "STACK_POP 3"); \
1592 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1593 else if (stk->type == STK_MEM_START) {\
1594 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1595 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1597 else if (stk->type == STK_REPEAT_INC) {\
1598 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1600 else if (stk->type == STK_MEM_END) {\
1601 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1602 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1604 ELSE_IF_STATE_CHECK_MARK(stk);\
1605 MEMOIZE_MATCH_CACHE_POINT;\
1611 #define STACK_POP_TIL_POS_NOT do {\
1614 STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \
1615 if (stk->type == STK_POS_NOT) break;\
1616 else if (stk->type == STK_MEM_START) {\
1617 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1618 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1620 else if (stk->type == STK_REPEAT_INC) {\
1621 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1623 else if (stk->type == STK_MEM_END) {\
1624 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1625 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1627 else if (IS_TO_VOID_TARGET(stk)) {\
1630 ELSE_IF_STATE_CHECK_MARK(stk);\
1631 MEMOIZE_LOOKAROUND_MATCH_CACHE_POINT(stk);\
1635 #define STACK_POP_TIL_LOOK_BEHIND_NOT do {\
1638 STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \
1639 if (stk->type == STK_LOOK_BEHIND_NOT) break;\
1640 else if (stk->type == STK_MEM_START) {\
1641 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1642 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1644 else if (stk->type == STK_REPEAT_INC) {\
1645 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1647 else if (stk->type == STK_MEM_END) {\
1648 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1649 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1651 ELSE_IF_STATE_CHECK_MARK(stk);\
1655 #define STACK_POP_TIL_ABSENT do {\
1658 STACK_BASE_CHECK(stk, "STACK_POP_TIL_ABSENT"); \
1659 if (stk->type == STK_ABSENT) break;\
1660 else if (stk->type == STK_MEM_START) {\
1661 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1662 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1664 else if (stk->type == STK_REPEAT_INC) {\
1665 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1667 else if (stk->type == STK_MEM_END) {\
1668 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1669 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1671 ELSE_IF_STATE_CHECK_MARK(stk);\
1675 #define STACK_POP_ABSENT_POS(start, end) do {\
1677 STACK_BASE_CHECK(stk, "STACK_POP_ABSENT_POS"); \
1678 (start) = stk->u.absent_pos.abs_pstr;\
1679 (end) = stk->u.absent_pos.end_pstr;\
1682 #define STACK_POS_END(k) do {\
1686 STACK_BASE_CHECK(k, "STACK_POS_END"); \
1687 if (IS_TO_VOID_TARGET(k)) {\
1689 k->type = STK_VOID;\
1691 else if (k->type == STK_POS) {\
1692 k->type = STK_VOID;\
1695 MEMOIZE_LOOKAROUND_MATCH_CACHE_POINT(k);\
1699 #define STACK_STOP_BT_END do {\
1700 OnigStackType *k = stk;\
1703 STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \
1704 if (IS_TO_VOID_TARGET(k)) {\
1706 k->type = STK_VOID;\
1708 else if (k->type == STK_STOP_BT) {\
1709 k->type = STK_VOID;\
1712 else if (k->type == STK_MATCH_CACHE_POINT) {\
1713 k->type = STK_ATOMIC_MATCH_CACHE_POINT;\
1718 #define STACK_STOP_BT_FAIL do {\
1721 STACK_BASE_CHECK(stk, "STACK_STOP_BT_END"); \
1722 if (stk->type == STK_STOP_BT) {\
1723 stk->type = STK_VOID;\
1726 MEMOIZE_ATOMIC_MATCH_CACHE_POINT;\
1730 #define STACK_NULL_CHECK(isnull,id,s) do {\
1731 OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1734 STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \
1735 if (k->type == STK_NULL_CHECK_START) {\
1736 if (k->u.null_check.num == (id)) {\
1737 (isnull) = (k->u.null_check.pstr == (s));\
1744 #define STACK_NULL_CHECK_REC(isnull,id,s) do {\
1746 OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1749 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \
1750 if (k->type == STK_NULL_CHECK_START) {\
1751 if (k->u.null_check.num == (id)) {\
1753 (isnull) = (k->u.null_check.pstr == (s));\
1759 else if (k->type == STK_NULL_CHECK_END) {\
1765 #define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\
1766 OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1769 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \
1770 if (k->type == STK_NULL_CHECK_START) {\
1771 if (k->u.null_check.num == (id)) {\
1772 if (k->u.null_check.pstr != (s)) {\
1780 if (k->type == STK_MEM_START) {\
1781 if (k->u.mem.end == INVALID_STACK_INDEX) {\
1782 (isnull) = 0; break;\
1784 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
1785 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
1787 endp = (UChar* )k->u.mem.end;\
1788 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
1789 (isnull) = 0; break;\
1791 else if (endp != s) {\
1792 (isnull) = -1; /* empty, but position changed */ \
1804 #define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\
1806 OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1809 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \
1810 if (k->type == STK_NULL_CHECK_START) {\
1811 if (k->u.null_check.num == (id)) {\
1813 if (k->u.null_check.pstr != (s)) {\
1821 if (k->type == STK_MEM_START) {\
1822 if (k->u.mem.end == INVALID_STACK_INDEX) {\
1823 (isnull) = 0; break;\
1825 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
1826 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
1828 endp = (UChar* )k->u.mem.end;\
1829 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
1830 (isnull) = 0; break;\
1832 else if (endp != s) {\
1833 (isnull) = -1; /* empty, but position changed */ \
1846 else if (k->type == STK_NULL_CHECK_END) {\
1847 if (k->u.null_check.num == (id)) level++;\
1852 #define STACK_GET_REPEAT(id, k) do {\
1857 STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
1858 if (k->type == STK_REPEAT) {\
1860 if (k->u.repeat.num == (id)) {\
1865 else if (k->type == STK_CALL_FRAME) level--;\
1866 else if (k->type == STK_RETURN) level++;\
1870 #define STACK_RETURN(addr) do {\
1872 OnigStackType* k = stk;\
1875 STACK_BASE_CHECK(k, "STACK_RETURN"); \
1876 if (k->type == STK_CALL_FRAME) {\
1878 (addr) = k->u.call_frame.ret_addr;\
1883 else if (k->type == STK_RETURN)\
1889 #define STRING_CMP(s1,s2,len) do {\
1890 while (len-- > 0) {\
1891 if (*s1++ != *s2++) goto fail;\
1895 #define STRING_CMP_IC(case_fold_flag,s1,ps2,len,text_end) do {\
1896 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \
1900 static int string_cmp_ic(OnigEncoding enc
, int case_fold_flag
,
1901 UChar
* s1
, UChar
** ps2
, OnigDistance mblen
, const UChar
* text_end
)
1903 UChar buf1
[ONIGENC_MBC_CASE_FOLD_MAXLEN
];
1904 UChar buf2
[ONIGENC_MBC_CASE_FOLD_MAXLEN
];
1905 UChar
*p1
, *p2
, *end1
, *s2
;
1911 len1
= ONIGENC_MBC_CASE_FOLD(enc
, case_fold_flag
, &s1
, text_end
, buf1
);
1912 len2
= ONIGENC_MBC_CASE_FOLD(enc
, case_fold_flag
, &s2
, text_end
, buf2
);
1913 if (len1
!= len2
) return 0;
1916 while (len1
-- > 0) {
1917 if (*p1
!= *p2
) return 0;
1927 #define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
1929 while (len-- > 0) {\
1930 if (*s1++ != *s2++) {\
1931 is_fail = 1; break;\
1936 #define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,text_end,is_fail) do {\
1937 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \
1944 #define IS_EMPTY_STR (str == end)
1945 #define ON_STR_BEGIN(s) ((s) == str)
1946 #define ON_STR_END(s) ((s) == end)
1947 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1948 # define DATA_ENSURE_CHECK1 (s < right_range)
1949 # define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
1950 # define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
1951 # define DATA_ENSURE_CONTINUE(n) if (s + (n) > right_range) continue
1952 # define ABSENT_END_POS right_range
1954 # define DATA_ENSURE_CHECK1 (s < end)
1955 # define DATA_ENSURE_CHECK(n) (s + (n) <= end)
1956 # define DATA_ENSURE(n) if (s + (n) > end) goto fail
1957 # define DATA_ENSURE_CONTINUE(n) if (s + (n) > end) continue
1958 # define ABSENT_END_POS end
1959 #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
1961 int onigenc_mbclen_approximate(const OnigUChar
* p
,const OnigUChar
* e
, const struct OnigEncodingTypeST
* enc
);
1964 enclen_approx(OnigEncoding enc
, const OnigUChar
* p
, const OnigUChar
* e
)
1966 if (enc
->max_enc_len
== enc
->min_enc_len
) {
1967 return (p
< e
? enc
->min_enc_len
: 0);
1970 return onigenc_mbclen_approximate(p
, e
, enc
);
1975 #ifdef USE_CAPTURE_HISTORY
1977 make_capture_history_tree(OnigCaptureTreeNode
* node
, OnigStackType
** kp
,
1978 OnigStackType
* stk_top
, UChar
* str
, regex_t
* reg
)
1981 OnigCaptureTreeNode
* child
;
1982 OnigStackType
* k
= *kp
;
1984 while (k
< stk_top
) {
1985 if (k
->type
== STK_MEM_START
) {
1987 if (n
<= ONIG_MAX_CAPTURE_HISTORY_GROUP
&&
1988 BIT_STATUS_AT(reg
->capture_history
, n
) != 0) {
1989 child
= history_node_new();
1990 CHECK_NULL_RETURN_MEMERR(child
);
1992 child
->beg
= k
->u
.mem
.pstr
- str
;
1993 r
= history_tree_add_child(node
, child
);
1995 history_tree_free(child
);
1999 r
= make_capture_history_tree(child
, kp
, stk_top
, str
, reg
);
2000 if (r
!= 0) return r
;
2003 child
->end
= k
->u
.mem
.pstr
- str
;
2006 else if (k
->type
== STK_MEM_END
) {
2007 if (k
->u
.mem
.num
== node
->group
) {
2008 node
->end
= k
->u
.mem
.pstr
- str
;
2016 return 1; /* 1: root node ending. */
2018 #endif /* USE_CAPTURE_HISTORY */
2020 #ifdef USE_BACKREF_WITH_LEVEL
2022 mem_is_in_memp(int mem
, int num
, UChar
* memp
)
2027 for (i
= 0; i
< num
; i
++) {
2028 GET_MEMNUM_INC(m
, memp
);
2029 if (mem
== (int )m
) return 1;
2034 static int backref_match_at_nested_level(regex_t
* reg
,
2035 OnigStackType
* top
, OnigStackType
* stk_base
,
2036 int ignore_case
, int case_fold_flag
,
2037 int nest
, int mem_num
, UChar
* memp
, UChar
** s
, const UChar
* send
)
2039 UChar
*ss
, *p
, *pstart
, *pend
= NULL_UCHARP
;
2046 while (k
>= stk_base
) {
2047 if (k
->type
== STK_CALL_FRAME
) {
2050 else if (k
->type
== STK_RETURN
) {
2053 else if (level
== nest
) {
2054 if (k
->type
== STK_MEM_START
) {
2055 if (mem_is_in_memp(k
->u
.mem
.num
, mem_num
, memp
)) {
2056 pstart
= k
->u
.mem
.pstr
;
2057 if (pend
!= NULL_UCHARP
) {
2058 if (pend
- pstart
> send
- *s
) return 0; /* or goto next_mem; */
2062 if (ignore_case
!= 0) {
2063 if (string_cmp_ic(reg
->enc
, case_fold_flag
,
2064 pstart
, &ss
, pend
- pstart
, send
) == 0)
2065 return 0; /* or goto next_mem; */
2069 if (*p
++ != *ss
++) return 0; /* or goto next_mem; */
2078 else if (k
->type
== STK_MEM_END
) {
2079 if (mem_is_in_memp(k
->u
.mem
.num
, mem_num
, memp
)) {
2080 pend
= k
->u
.mem
.pstr
;
2089 #endif /* USE_BACKREF_WITH_LEVEL */
2092 #ifdef ONIG_DEBUG_STATISTICS
2095 # include <windows.h>
2096 static LARGE_INTEGER ts
, te
, freq
;
2097 # define GETTIME(t) QueryPerformanceCounter(&(t))
2098 # define TIMEDIFF(te,ts) (unsigned long )(((te).QuadPart - (ts).QuadPart) \
2099 * 1000000 / freq.QuadPart)
2102 # define USE_TIMEOFDAY
2104 # ifdef USE_TIMEOFDAY
2105 # ifdef HAVE_SYS_TIME_H
2106 # include <sys/time.h>
2108 # ifdef HAVE_UNISTD_H
2109 # include <unistd.h>
2111 static struct timeval ts
, te
;
2112 # define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)
2113 # define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
2114 (((te).tv_sec - (ts).tv_sec)*1000000))
2115 # else /* USE_TIMEOFDAY */
2116 # ifdef HAVE_SYS_TIMES_H
2117 # include <sys/times.h>
2119 static struct tms ts
, te
;
2120 # define GETTIME(t) times(&(t))
2121 # define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)
2122 # endif /* USE_TIMEOFDAY */
2124 # endif /* _WIN32 */
2126 static int OpCounter
[256];
2127 static int OpPrevCounter
[256];
2128 static unsigned long OpTime
[256];
2129 static int OpCurr
= OP_FINISH
;
2130 static int OpPrevTarget
= OP_FAIL
;
2131 static int MaxStackDepth
= 0;
2133 # define MOP_IN(opcode) do {\
2134 if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
2136 OpCounter[opcode]++;\
2140 # define MOP_OUT do {\
2142 OpTime[OpCurr] += TIMEDIFF(te, ts);\
2146 onig_statistics_init(void)
2149 for (i
= 0; i
< 256; i
++) {
2150 OpCounter
[i
] = OpPrevCounter
[i
] = 0; OpTime
[i
] = 0;
2154 QueryPerformanceFrequency(&freq
);
2159 onig_print_statistics(FILE* f
)
2162 fprintf(f
, " count prev time\n");
2163 for (i
= 0; OnigOpInfo
[i
].opcode
>= 0; i
++) {
2164 fprintf(f
, "%8d: %8d: %10lu: %s\n",
2165 OpCounter
[i
], OpPrevCounter
[i
], OpTime
[i
], OnigOpInfo
[i
].name
);
2167 fprintf(f
, "\nmax stack depth: %d\n", MaxStackDepth
);
2170 # define STACK_INC do {\
2172 if (stk - stk_base > MaxStackDepth) \
2173 MaxStackDepth = stk - stk_base;\
2176 #else /* ONIG_DEBUG_STATISTICS */
2177 # define STACK_INC stk++
2179 # define MOP_IN(opcode)
2181 #endif /* ONIG_DEBUG_STATISTICS */
2184 #ifdef ONIG_DEBUG_MATCH
2186 stack_type_str(int stack_type
)
2188 switch (stack_type
) {
2189 case STK_ALT
: return "Alt ";
2190 case STK_LOOK_BEHIND_NOT
: return "LBNot ";
2191 case STK_POS_NOT
: return "PosNot";
2192 case STK_MEM_START
: return "MemS ";
2193 case STK_MEM_END
: return "MemE ";
2194 case STK_REPEAT_INC
: return "RepInc";
2195 case STK_STATE_CHECK_MARK
: return "StChMk";
2196 case STK_NULL_CHECK_START
: return "NulChS";
2197 case STK_NULL_CHECK_END
: return "NulChE";
2198 case STK_MEM_END_MARK
: return "MemEMk";
2199 case STK_POS
: return "Pos ";
2200 case STK_STOP_BT
: return "StopBt";
2201 case STK_REPEAT
: return "Rep ";
2202 case STK_CALL_FRAME
: return "Call ";
2203 case STK_RETURN
: return "Ret ";
2204 case STK_VOID
: return "Void ";
2205 case STK_ABSENT_POS
: return "AbsPos";
2206 case STK_ABSENT
: return "Absent";
2207 case STK_MATCH_CACHE_POINT
: return "MCache";
2208 default: return " ";
2212 #ifdef USE_MATCH_CACHE
2215 bsearch_cache_opcodes(const OnigCacheOpcode
*cache_opcodes
, long num_cache_opcodes
, const UChar
* p
)
2217 long l
= 0, r
= num_cache_opcodes
- 1, m
= 0;
2221 if (cache_opcodes
[m
].addr
== p
) break;
2222 if (cache_opcodes
[m
].addr
< p
) l
= m
+ 1;
2229 find_cache_point(regex_t
* reg
, const OnigCacheOpcode
* cache_opcodes
, long num_cache_opcodes
, const UChar
* p
, const OnigStackType
*stk
, const OnigStackIndex
*repeat_stk
, const OnigCacheOpcode
**cache_opcode_ptr
)
2232 const OnigCacheOpcode
* cache_opcode
;
2233 const OnigRepeatRange
* range
;
2234 const OnigStackType
*stkp
;
2236 int is_inc
= *p
== OP_REPEAT_INC
|| *p
== OP_REPEAT_INC_NG
;
2238 long num_cache_points_at_outer_repeat
;
2239 long num_cache_points_in_outer_repeat
;
2241 m
= bsearch_cache_opcodes(cache_opcodes
, num_cache_opcodes
, p
);
2243 if (!(0 <= m
&& m
< num_cache_opcodes
&& cache_opcodes
[m
].addr
== p
)) {
2247 cache_opcode
= &cache_opcodes
[m
];
2248 *cache_opcode_ptr
= &cache_opcodes
[m
];
2249 cache_point
= cache_opcode
->cache_point
;
2250 if (cache_opcode
->outer_repeat_mem
== -1) {
2254 num_cache_points_at_outer_repeat
= cache_opcode
->num_cache_points_at_outer_repeat
;
2255 num_cache_points_in_outer_repeat
= cache_opcode
->num_cache_points_in_outer_repeat
;
2257 range
= ®
->repeat_range
[cache_opcode
->outer_repeat_mem
];
2259 stkp
= &stk
[repeat_stk
[cache_opcode
->outer_repeat_mem
]];
2260 count
= is_inc
? stkp
->u
.repeat
.count
- 1 : stkp
->u
.repeat
.count
;
2262 if (count
< range
->lower
) {
2263 return num_cache_points_at_outer_repeat
+
2264 num_cache_points_in_outer_repeat
* count
+
2268 if (range
->upper
== 0x7fffffff) {
2269 return num_cache_points_at_outer_repeat
+
2270 num_cache_points_in_outer_repeat
* (range
->lower
- (is_inc
? 1 : 0)) + (is_inc
? 0 : 1) +
2274 return num_cache_points_at_outer_repeat
+
2275 num_cache_points_in_outer_repeat
* (range
->lower
- 1) +
2276 (num_cache_points_in_outer_repeat
+ 1) * (count
- range
->lower
+ 1) +
2280 static int check_extended_match_cache_point(uint8_t *match_cache_buf
, long match_cache_point_index
, uint8_t match_cache_point_mask
) {
2281 if (match_cache_point_mask
& 0x80) {
2282 return (match_cache_buf
[match_cache_point_index
+ 1] & 0x01) > 0;
2284 return (match_cache_buf
[match_cache_point_index
] & (match_cache_point_mask
<< 1)) > 0;
2288 static void memoize_extended_match_cache_point(uint8_t *match_cache_buf
, long match_cache_point_index
, uint8_t match_cache_point_mask
) {
2289 match_cache_buf
[match_cache_point_index
] |= match_cache_point_mask
;
2290 if (match_cache_point_mask
& 0x80) {
2291 match_cache_buf
[match_cache_point_index
+ 1] |= 0x01;
2293 match_cache_buf
[match_cache_point_index
] |= match_cache_point_mask
<< 1;
2297 #endif /* USE_MATCH_CACHE */
2299 /* match data(str - end) from position (sstart). */
2300 /* if sstart == str then set sprev to NULL. */
2302 match_at(regex_t
* reg
, const UChar
* str
, const UChar
* end
,
2303 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
2304 const UChar
* right_range
,
2306 const UChar
* sstart
, UChar
* sprev
, OnigMatchArg
* msa
)
2308 static const UChar FinishCode
[] = { OP_FINISH
};
2310 int i
, num_mem
, pop_level
;
2311 ptrdiff_t n
, best_len
;
2312 LengthType tlen
, tlen2
;
2315 OnigOptionType option
= reg
->options
;
2316 OnigEncoding encode
= reg
->enc
;
2317 OnigCaseFoldType case_fold_flag
= reg
->case_fold_flag
;
2318 UChar
*s
, *q
, *sbegin
;
2323 char *xmalloc_base
= NULL
;
2324 OnigStackType
*stk_alloc
, *stk_base
= NULL
, *stk
, *stk_end
;
2325 OnigStackType
*stkp
; /* used as any purpose. */
2327 OnigStackIndex
*repeat_stk
;
2328 OnigStackIndex
*mem_start_stk
, *mem_end_stk
;
2329 #ifdef USE_COMBINATION_EXPLOSION_CHECK
2331 unsigned char* state_check_buff
= msa
->state_check_buff
;
2332 int num_comb_exp_check
= reg
->num_comb_exp_check
;
2335 #if USE_TOKEN_THREADED_VM
2336 # define OP_OFFSET 1
2337 # define VM_LOOP JUMP;
2338 # define VM_LOOP_END
2339 # define CASE(x) L_##x: sbegin = s; OPCODE_EXEC_HOOK;
2340 # define DEFAULT L_DEFAULT:
2341 # define NEXT sprev = sbegin; JUMP
2342 # define JUMP pbegin = p; RB_GNUC_EXTENSION_BLOCK(goto *oplabels[*p++])
2344 RB_GNUC_EXTENSION
static const void *oplabels
[] = {
2345 &&L_OP_FINISH
, /* matching process terminator (no more alternative) */
2346 &&L_OP_END
, /* pattern code terminator (success end) */
2348 &&L_OP_EXACT1
, /* single byte, N = 1 */
2349 &&L_OP_EXACT2
, /* single byte, N = 2 */
2350 &&L_OP_EXACT3
, /* single byte, N = 3 */
2351 &&L_OP_EXACT4
, /* single byte, N = 4 */
2352 &&L_OP_EXACT5
, /* single byte, N = 5 */
2353 &&L_OP_EXACTN
, /* single byte */
2354 &&L_OP_EXACTMB2N1
, /* mb-length = 2 N = 1 */
2355 &&L_OP_EXACTMB2N2
, /* mb-length = 2 N = 2 */
2356 &&L_OP_EXACTMB2N3
, /* mb-length = 2 N = 3 */
2357 &&L_OP_EXACTMB2N
, /* mb-length = 2 */
2358 &&L_OP_EXACTMB3N
, /* mb-length = 3 */
2359 &&L_OP_EXACTMBN
, /* other length */
2361 &&L_OP_EXACT1_IC
, /* single byte, N = 1, ignore case */
2362 &&L_OP_EXACTN_IC
, /* single byte, ignore case */
2368 &&L_OP_CCLASS_MB_NOT
,
2369 &&L_OP_CCLASS_MIX_NOT
,
2371 &&L_OP_ANYCHAR
, /* "." */
2372 &&L_OP_ANYCHAR_ML
, /* "." multi-line */
2373 &&L_OP_ANYCHAR_STAR
, /* ".*" */
2374 &&L_OP_ANYCHAR_ML_STAR
, /* ".*" multi-line */
2375 &&L_OP_ANYCHAR_STAR_PEEK_NEXT
,
2376 &&L_OP_ANYCHAR_ML_STAR_PEEK_NEXT
,
2381 &&L_OP_NOT_WORD_BOUND
,
2382 # ifdef USE_WORD_BEGIN_END
2390 &&L_OP_NOT_ASCII_WORD
,
2391 &&L_OP_ASCII_WORD_BOUND
,
2392 &&L_OP_NOT_ASCII_WORD_BOUND
,
2393 # ifdef USE_WORD_BEGIN_END
2394 &&L_OP_ASCII_WORD_BEGIN
,
2395 &&L_OP_ASCII_WORD_END
,
2405 &&L_OP_SEMI_END_BUF
,
2406 &&L_OP_BEGIN_POSITION
,
2412 &&L_OP_BACKREF_MULTI
,
2413 &&L_OP_BACKREF_MULTI_IC
,
2414 # ifdef USE_BACKREF_WITH_LEVEL
2415 &&L_OP_BACKREF_WITH_LEVEL
, /* \k<xxx+n>, \k<xxx-n> */
2419 &&L_OP_MEMORY_START
,
2420 &&L_OP_MEMORY_START_PUSH
, /* push back-tracker to stack */
2421 &&L_OP_MEMORY_END_PUSH
, /* push back-tracker to stack */
2422 # ifdef USE_SUBEXP_CALL
2423 &&L_OP_MEMORY_END_PUSH_REC
, /* push back-tracker to stack */
2428 # ifdef USE_SUBEXP_CALL
2429 &&L_OP_MEMORY_END_REC
, /* push marker to stack */
2436 &&L_OP_FAIL
, /* pop stack and move */
2440 # ifdef USE_OP_PUSH_OR_JUMP_EXACT
2441 &&L_OP_PUSH_OR_JUMP_EXACT1
, /* if match exact then push, else jump. */
2445 &&L_OP_PUSH_IF_PEEK_NEXT
, /* if match exact then push, else none. */
2446 &&L_OP_REPEAT
, /* {n,m} */
2447 &&L_OP_REPEAT_NG
, /* {n,m}? (non greedy) */
2449 &&L_OP_REPEAT_INC_NG
, /* non greedy */
2450 &&L_OP_REPEAT_INC_SG
, /* search and get in stack */
2451 &&L_OP_REPEAT_INC_NG_SG
, /* search and get in stack (non greedy) */
2452 &&L_OP_NULL_CHECK_START
, /* null loop checker start */
2453 &&L_OP_NULL_CHECK_END
, /* null loop checker end */
2454 # ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2455 &&L_OP_NULL_CHECK_END_MEMST
, /* null loop checker end (with capture status) */
2459 # ifdef USE_SUBEXP_CALL
2460 &&L_OP_NULL_CHECK_END_MEMST_PUSH
, /* with capture status and push check-end */
2465 &&L_OP_PUSH_POS
, /* (?=...) start */
2466 &&L_OP_POP_POS
, /* (?=...) end */
2467 &&L_OP_PUSH_POS_NOT
, /* (?!...) start */
2468 &&L_OP_FAIL_POS
, /* (?!...) end */
2469 &&L_OP_PUSH_STOP_BT
, /* (?>...) start */
2470 &&L_OP_POP_STOP_BT
, /* (?>...) end */
2471 &&L_OP_LOOK_BEHIND
, /* (?<=...) start (no needs end opcode) */
2472 &&L_OP_PUSH_LOOK_BEHIND_NOT
, /* (?<!...) start */
2473 &&L_OP_FAIL_LOOK_BEHIND_NOT
, /* (?<!...) end */
2474 &&L_OP_PUSH_ABSENT_POS
, /* (?~...) start */
2475 &&L_OP_ABSENT
, /* (?~...) start of inner loop */
2476 &&L_OP_ABSENT_END
, /* (?~...) end */
2478 # ifdef USE_SUBEXP_CALL
2479 &&L_OP_CALL
, /* \g<name> */
2487 # ifdef USE_COMBINATION_EXPLOSION_CHECK
2488 &&L_OP_STATE_CHECK_PUSH
, /* combination explosion check and push */
2489 &&L_OP_STATE_CHECK_PUSH_OR_JUMP
, /* check ok -> push, else jump */
2490 &&L_OP_STATE_CHECK
, /* check only */
2496 # ifdef USE_COMBINATION_EXPLOSION_CHECK
2497 &&L_OP_STATE_CHECK_ANYCHAR_STAR
,
2498 &&L_OP_STATE_CHECK_ANYCHAR_ML_STAR
,
2503 /* no need: IS_DYNAMIC_OPTION() == 0 */
2504 # if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */
2505 &&L_OP_SET_OPTION_PUSH
, /* set option and push recover option */
2506 &&L_OP_SET_OPTION
/* set option */
2512 #else /* USE_TOKEN_THREADED_VM */
2514 # define OP_OFFSET 0
2521 # define VM_LOOP_END } sprev = sbegin; }
2522 # define CASE(x) case x:
2523 # define DEFAULT default:
2525 # define JUMP continue; break
2526 #endif /* USE_TOKEN_THREADED_VM */
2529 #ifdef USE_SUBEXP_CALL
2530 /* Stack #0 is used to store the pattern itself and used for (?R), \g<0>,
2531 etc. Additional space is required. */
2532 # define ADD_NUMMEM 1
2534 /* Stack #0 not is used. */
2535 # define ADD_NUMMEM 0
2538 n
= reg
->num_repeat
+ (reg
->num_mem
+ ADD_NUMMEM
) * 2;
2540 STACK_INIT(alloca_base
, xmalloc_base
, n
, INIT_MATCH_STACK_SIZE
);
2541 pop_level
= reg
->stack_pop_level
;
2542 num_mem
= reg
->num_mem
;
2543 repeat_stk
= (OnigStackIndex
* )alloca_base
;
2545 mem_start_stk
= (OnigStackIndex
* )(repeat_stk
+ reg
->num_repeat
);
2546 mem_end_stk
= mem_start_stk
+ (num_mem
+ ADD_NUMMEM
);
2548 OnigStackIndex
*pp
= mem_start_stk
;
2549 for (; pp
< repeat_stk
+ n
; pp
+= 2) {
2550 pp
[0] = INVALID_STACK_INDEX
;
2551 pp
[1] = INVALID_STACK_INDEX
;
2554 #ifndef USE_SUBEXP_CALL
2555 mem_start_stk
--; /* for index start from 1,
2556 mem_start_stk[1]..mem_start_stk[num_mem] */
2557 mem_end_stk
--; /* for index start from 1,
2558 mem_end_stk[1]..mem_end_stk[num_mem] */
2561 #ifdef ONIG_DEBUG_MATCH
2562 fprintf(stderr
, "match_at: str: %"PRIuPTR
" (%p), end: %"PRIuPTR
" (%p), start: %"PRIuPTR
" (%p), sprev: %"PRIuPTR
" (%p)\n",
2563 (uintptr_t )str
, str
, (uintptr_t )end
, end
, (uintptr_t )sstart
, sstart
, (uintptr_t )sprev
, sprev
);
2564 fprintf(stderr
, "size: %d, start offset: %d\n",
2565 (int )(end
- str
), (int )(sstart
- str
));
2566 fprintf(stderr
, "\n ofs> str stk:type addr:opcode\n");
2569 STACK_PUSH_ENSURED(STK_ALT
, (UChar
* )FinishCode
); /* bottom stack */
2570 best_len
= ONIG_MISMATCH
;
2571 s
= (UChar
* )sstart
;
2572 pkeep
= (UChar
* )sstart
;
2575 #ifdef ONIG_DEBUG_MATCH
2576 # define OPCODE_EXEC_HOOK \
2578 UChar *op, *q, *bp, buf[50]; \
2580 op = p - OP_OFFSET; \
2581 fprintf(stderr, "%4"PRIdPTR"> \"", (*op == OP_FINISH) ? (ptrdiff_t )-1 : s - str); \
2584 if (*op != OP_FINISH) { /* s may not be a valid pointer if OP_FINISH. */ \
2585 for (i = 0; i < 7 && q < end; i++) { \
2586 len = enclen(encode, q, end); \
2587 while (len-- > 0) *bp++ = *q++; \
2589 if (q < end) { xmemcpy(bp, "...", 3); bp += 3; } \
2591 xmemcpy(bp, "\"", 1); bp += 1; \
2593 fputs((char* )buf, stderr); \
2594 for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); \
2595 fprintf(stderr, "%4"PRIdPTR":%s %4"PRIdPTR":", \
2596 stk - stk_base - 1, \
2597 (stk > stk_base) ? stack_type_str(stk[-1].type) : " ", \
2598 (op == FinishCode) ? (ptrdiff_t )-1 : op - reg->p); \
2599 onig_print_compiled_byte_code(stderr, op, reg->p+reg->used, NULL, encode); \
2600 fprintf(stderr, "\n"); \
2603 # define OPCODE_EXEC_HOOK ((void) 0)
2606 #ifdef USE_MATCH_CACHE
2607 #ifdef ONIG_DEBUG_MATCH_CACHE
2608 #define MATCH_CACHE_DEBUG fprintf(stderr, "MATCH CACHE: cache %ld (p=%p index=%ld mask=%d)\n", match_cache_point, pbegin, match_cache_point_index, match_cache_point_mask)
2609 #define MATCH_CACHE_DEBUG_HIT fprintf(stderr, "MATCH CACHE: cache hit\n")
2611 #define MATCH_CACHE_DEBUG ((void) 0)
2612 #define MATCH_CACHE_DEBUG_HIT ((void) 0)
2615 #define MATCH_CACHE_HIT ((void) 0)
2617 # define CHECK_MATCH_CACHE do {\
2618 if (msa->match_cache_status == MATCH_CACHE_STATUS_ENABLED) {\
2619 const OnigCacheOpcode *cache_opcode;\
2620 long cache_point = find_cache_point(reg, msa->cache_opcodes, msa->num_cache_opcodes, pbegin, stk_base, repeat_stk, &cache_opcode);\
2621 if (cache_point >= 0) {\
2622 long match_cache_point = msa->num_cache_points * (long)(s - str) + cache_point;\
2623 long match_cache_point_index = match_cache_point >> 3;\
2624 uint8_t match_cache_point_mask = 1 << (match_cache_point & 7);\
2626 if (msa->match_cache_buf[match_cache_point_index] & match_cache_point_mask) {\
2627 MATCH_CACHE_DEBUG_HIT; MATCH_CACHE_HIT;\
2628 if (cache_opcode->lookaround_nesting == 0) goto fail;\
2629 else if (cache_opcode->lookaround_nesting < 0) {\
2630 if (check_extended_match_cache_point(msa->match_cache_buf, match_cache_point_index, match_cache_point_mask)) {\
2631 STACK_STOP_BT_FAIL;\
2635 if (check_extended_match_cache_point(msa->match_cache_buf, match_cache_point_index, match_cache_point_mask)) {\
2636 p = cache_opcode->match_addr;\
2642 STACK_PUSH_MATCH_CACHE_POINT(match_cache_point_index, match_cache_point_mask);\
2647 # define CHECK_MATCH_CACHE ((void) 0)
2651 CASE(OP_END) MOP_IN(OP_END);
2655 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
2656 if (IS_FIND_LONGEST(option)) {
2657 if (n > msa->best_len) {
2659 msa->best_s = (UChar* )sstart;
2666 region = msa->region;
2668 region->beg[0] = ((pkeep > s) ? s : pkeep) - str;
2669 region->end[0] = s - str;
2670 for (i = 1; i <= num_mem; i++) {
2671 if (mem_end_stk[i] != INVALID_STACK_INDEX) {
2672 if (BIT_STATUS_AT(reg->bt_mem_start, i))
2673 region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
2675 region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str;
2677 region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i)
2678 ? STACK_AT(mem_end_stk[i])->u.mem.pstr
2679 : (UChar* )((void* )mem_end_stk[i])) - str;
2682 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
2686 #ifdef USE_CAPTURE_HISTORY
2687 if (reg->capture_history != 0) {
2689 OnigCaptureTreeNode* node;
2691 if (IS_NULL(region->history_root)) {
2692 region->history_root = node = history_node_new();
2693 CHECK_NULL_RETURN_MEMERR(node);
2696 node = region->history_root;
2697 history_tree_clear(node);
2701 node->beg = ((pkeep > s) ? s : pkeep) - str;
2702 node->end = s - str;
2705 r = make_capture_history_tree(region->history_root, &stkp,
2706 stk, (UChar* )str, reg);
2708 best_len = r; /* error code */
2712 #endif /* USE_CAPTURE_HISTORY */
2714 } /* n > best_len */
2716 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
2721 if (IS_FIND_CONDITION(option)) {
2722 if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
2723 best_len = ONIG_MISMATCH;
2724 goto fail; /* for retry */
2726 if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
2727 goto fail; /* for retry */
2731 /* default behavior: return first-matching result. */
2735 CASE(OP_EXACT1) MOP_IN(OP_EXACT1);
2737 if (*p != *s) goto fail;
2742 CASE(OP_EXACT1_IC) MOP_IN(OP_EXACT1_IC);
2745 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2748 len = ONIGENC_MBC_CASE_FOLD(encode,
2749 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
2764 CASE(OP_EXACT2) MOP_IN(OP_EXACT2);
2766 if (*p != *s) goto fail;
2768 if (*p != *s) goto fail;
2774 CASE(OP_EXACT3) MOP_IN(OP_EXACT3);
2776 if (*p != *s) goto fail;
2778 if (*p != *s) goto fail;
2780 if (*p != *s) goto fail;
2786 CASE(OP_EXACT4) MOP_IN(OP_EXACT4);
2788 if (*p != *s) goto fail;
2790 if (*p != *s) goto fail;
2792 if (*p != *s) goto fail;
2794 if (*p != *s) goto fail;
2800 CASE(OP_EXACT5) MOP_IN(OP_EXACT5);
2802 if (*p != *s) goto fail;
2804 if (*p != *s) goto fail;
2806 if (*p != *s) goto fail;
2808 if (*p != *s) goto fail;
2810 if (*p != *s) goto fail;
2816 CASE(OP_EXACTN) MOP_IN(OP_EXACTN);
2817 GET_LENGTH_INC(tlen, p);
2819 while (tlen-- > 0) {
2820 if (*p++ != *s++) goto fail;
2826 CASE(OP_EXACTN_IC) MOP_IN(OP_EXACTN_IC);
2829 UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2831 GET_LENGTH_INC(tlen, p);
2837 len = ONIGENC_MBC_CASE_FOLD(encode,
2838 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
2844 if (*p != *q) goto fail;
2853 CASE(OP_EXACTMB2N1) MOP_IN(OP_EXACTMB2N1);
2855 if (*p != *s) goto fail;
2857 if (*p != *s) goto fail;
2862 CASE(OP_EXACTMB2N2) MOP_IN(OP_EXACTMB2N2);
2864 if (*p != *s) goto fail;
2866 if (*p != *s) goto fail;
2869 if (*p != *s) goto fail;
2871 if (*p != *s) goto fail;
2876 CASE(OP_EXACTMB2N3) MOP_IN(OP_EXACTMB2N3);
2878 if (*p != *s) goto fail;
2880 if (*p != *s) goto fail;
2882 if (*p != *s) goto fail;
2884 if (*p != *s) goto fail;
2887 if (*p != *s) goto fail;
2889 if (*p != *s) goto fail;
2894 CASE(OP_EXACTMB2N) MOP_IN(OP_EXACTMB2N);
2895 GET_LENGTH_INC(tlen, p);
2896 DATA_ENSURE(tlen * 2);
2897 while (tlen-- > 0) {
2898 if (*p != *s) goto fail;
2900 if (*p != *s) goto fail;
2907 CASE(OP_EXACTMB3N) MOP_IN(OP_EXACTMB3N);
2908 GET_LENGTH_INC(tlen, p);
2909 DATA_ENSURE(tlen * 3);
2910 while (tlen-- > 0) {
2911 if (*p != *s) goto fail;
2913 if (*p != *s) goto fail;
2915 if (*p != *s) goto fail;
2922 CASE(OP_EXACTMBN) MOP_IN(OP_EXACTMBN);
2923 GET_LENGTH_INC(tlen, p); /* mb-len */
2924 GET_LENGTH_INC(tlen2, p); /* string len */
2927 while (tlen2-- > 0) {
2928 if (*p != *s) goto fail;
2935 CASE(OP_CCLASS) MOP_IN(OP_CCLASS);
2937 if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail;
2939 s += enclen(encode, s, end); /* OP_CCLASS can match mb-code. \D, \S */
2943 CASE(OP_CCLASS_MB) MOP_IN(OP_CCLASS_MB);
2944 if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) goto fail;
2947 GET_LENGTH_INC(tlen, p);
2954 mb_len = enclen_approx(encode, s, end);
2955 DATA_ENSURE(mb_len);
2958 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
2960 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
2961 if (! onig_is_in_code_range(p, code)) goto fail;
2965 if (! onig_is_in_code_range(q, code)) goto fail;
2972 CASE(OP_CCLASS_MIX) MOP_IN(OP_CCLASS_MIX);
2974 if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
2979 if (BITSET_AT(((BitSetRef )p), *s) == 0)
2983 GET_LENGTH_INC(tlen, p);
2990 CASE(OP_CCLASS_NOT) MOP_IN(OP_CCLASS_NOT);
2992 if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail;
2994 s += enclen(encode, s, end);
2998 CASE(OP_CCLASS_MB_NOT) MOP_IN(OP_CCLASS_MB_NOT);
3000 if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) {
3002 GET_LENGTH_INC(tlen, p);
3004 goto cc_mb_not_success;
3008 GET_LENGTH_INC(tlen, p);
3012 int mb_len = enclen(encode, s, end);
3014 if (! DATA_ENSURE_CHECK(mb_len)) {
3018 goto cc_mb_not_success;
3023 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
3025 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
3026 if (onig_is_in_code_range(p, code)) goto fail;
3030 if (onig_is_in_code_range(q, code)) goto fail;
3039 CASE(OP_CCLASS_MIX_NOT) MOP_IN(OP_CCLASS_MIX_NOT);
3041 if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
3046 if (BITSET_AT(((BitSetRef )p), *s) != 0)
3050 GET_LENGTH_INC(tlen, p);
3057 CASE(OP_ANYCHAR) MOP_IN(OP_ANYCHAR);
3059 n = enclen_approx(encode, s, end);
3061 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
3066 CASE(OP_ANYCHAR_ML) MOP_IN(OP_ANYCHAR_ML);
3068 n = enclen_approx(encode, s, end);
3074 CASE(OP_ANYCHAR_STAR) MOP_IN(OP_ANYCHAR_STAR);
3075 while (DATA_ENSURE_CHECK1) {
3077 STACK_PUSH_ALT(p, s, sprev, pkeep);
3078 n = enclen_approx(encode, s, end);
3080 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
3087 CASE(OP_ANYCHAR_ML_STAR) MOP_IN(OP_ANYCHAR_ML_STAR);
3088 while (DATA_ENSURE_CHECK1) {
3090 STACK_PUSH_ALT(p, s, sprev, pkeep);
3091 n = enclen_approx(encode, s, end);
3105 CASE(OP_ANYCHAR_STAR_PEEK_NEXT) MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
3106 while (DATA_ENSURE_CHECK1) {
3109 STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
3111 #ifdef USE_MATCH_CACHE
3112 /* We need to increment num_fails here, for invoking a cache optimization correctly. */
3113 /* Actually, the matching will be failed if we use `OP_ANYCHAR_STAR` simply in this case.*/
3117 n = enclen_approx(encode, s, end);
3119 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
3127 CASE(OP_ANYCHAR_ML_STAR_PEEK_NEXT)MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
3128 while (DATA_ENSURE_CHECK1) {
3131 STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
3133 #ifdef USE_MATCH_CACHE
3134 /* We need to increment num_fails here, for invoking a cache optimization correctly. */
3135 /* Actually, the matching will be failed if we use `OP_ANYCHAR_STAR_ML` simply in this case.*/
3139 n = enclen_approx(encode, s, end);
3154 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3155 CASE(OP_STATE_CHECK_ANYCHAR_STAR) MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
3156 GET_STATE_CHECK_NUM_INC(mem, p);
3157 while (DATA_ENSURE_CHECK1) {
3158 STATE_CHECK_VAL(scv, mem);
3161 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
3162 n = enclen_approx(encode, s, end);
3164 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
3171 CASE(OP_STATE_CHECK_ANYCHAR_ML_STAR)
3172 MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR);
3174 GET_STATE_CHECK_NUM_INC(mem, p);
3175 while (DATA_ENSURE_CHECK1) {
3176 STATE_CHECK_VAL(scv, mem);
3179 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
3180 n = enclen_approx(encode, s, end);
3193 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
3195 CASE(OP_WORD) MOP_IN(OP_WORD);
3197 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
3200 s += enclen(encode, s, end);
3204 CASE(OP_ASCII_WORD) MOP_IN(OP_ASCII_WORD);
3206 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
3209 s += enclen(encode, s, end);
3213 CASE(OP_NOT_WORD) MOP_IN(OP_NOT_WORD);
3215 if (ONIGENC_IS_MBC_WORD(encode, s, end))
3218 s += enclen(encode, s, end);
3222 CASE(OP_NOT_ASCII_WORD) MOP_IN(OP_NOT_ASCII_WORD);
3224 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
3227 s += enclen(encode, s, end);
3231 CASE(OP_WORD_BOUND) MOP_IN(OP_WORD_BOUND);
3232 if (ON_STR_BEGIN(s)) {
3234 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
3237 else if (ON_STR_END(s)) {
3238 if (! ONIGENC_IS_MBC_WORD(encode, sprev, end))
3242 if (ONIGENC_IS_MBC_WORD(encode, s, end)
3243 == ONIGENC_IS_MBC_WORD(encode, sprev, end))
3249 CASE(OP_ASCII_WORD_BOUND) MOP_IN(OP_ASCII_WORD_BOUND);
3250 if (ON_STR_BEGIN(s)) {
3252 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
3255 else if (ON_STR_END(s)) {
3256 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
3260 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)
3261 == ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
3267 CASE(OP_NOT_WORD_BOUND) MOP_IN(OP_NOT_WORD_BOUND);
3268 if (ON_STR_BEGIN(s)) {
3269 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end))
3272 else if (ON_STR_END(s)) {
3273 if (ONIGENC_IS_MBC_WORD(encode, sprev, end))
3277 if (ONIGENC_IS_MBC_WORD(encode, s, end)
3278 != ONIGENC_IS_MBC_WORD(encode, sprev, end))
3284 CASE(OP_NOT_ASCII_WORD_BOUND) MOP_IN(OP_NOT_ASCII_WORD_BOUND);
3285 if (ON_STR_BEGIN(s)) {
3286 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
3289 else if (ON_STR_END(s)) {
3290 if (ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
3294 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)
3295 != ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
3301 #ifdef USE_WORD_BEGIN_END
3302 CASE(OP_WORD_BEGIN) MOP_IN(OP_WORD_BEGIN);
3303 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) {
3304 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
3312 CASE(OP_ASCII_WORD_BEGIN) MOP_IN(OP_ASCII_WORD_BEGIN);
3313 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
3314 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
3322 CASE(OP_WORD_END) MOP_IN(OP_WORD_END);
3323 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
3324 if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) {
3332 CASE(OP_ASCII_WORD_END) MOP_IN(OP_ASCII_WORD_END);
3333 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
3334 if (ON_STR_END(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
3343 CASE(OP_BEGIN_BUF) MOP_IN(OP_BEGIN_BUF);
3344 if (! ON_STR_BEGIN(s)) goto fail;
3345 if (IS_NOTBOS(msa->options)) goto fail;
3350 CASE(OP_END_BUF) MOP_IN(OP_END_BUF);
3351 if (! ON_STR_END(s)) goto fail;
3352 if (IS_NOTEOS(msa->options)) goto fail;
3357 CASE(OP_BEGIN_LINE) MOP_IN(OP_BEGIN_LINE);
3358 if (ON_STR_BEGIN(s)) {
3359 if (IS_NOTBOL(msa->options)) goto fail;
3363 else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)
3364 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3365 && !(IS_NEWLINE_CRLF(option)
3366 && ONIGENC_IS_MBC_CRNL(encode, sprev, end))
3368 && !ON_STR_END(s)) {
3375 CASE(OP_END_LINE) MOP_IN(OP_END_LINE);
3376 if (ON_STR_END(s)) {
3377 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3378 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) {
3380 if (IS_NOTEOL(msa->options)) goto fail;
3383 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3387 else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) {
3394 CASE(OP_SEMI_END_BUF) MOP_IN(OP_SEMI_END_BUF);
3395 if (ON_STR_END(s)) {
3396 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3397 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) {
3399 if (IS_NOTEOL(msa->options)) goto fail;
3402 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3406 else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) {
3407 UChar* ss = s + enclen(encode, s, end);
3408 if (ON_STR_END(ss)) {
3412 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3413 else if (IS_NEWLINE_CRLF(option)
3414 && ONIGENC_IS_MBC_CRNL(encode, s, end)) {
3415 ss += enclen(encode, ss, end);
3416 if (ON_STR_END(ss)) {
3426 CASE(OP_BEGIN_POSITION) MOP_IN(OP_BEGIN_POSITION);
3433 CASE(OP_MEMORY_START_PUSH) MOP_IN(OP_MEMORY_START_PUSH);
3434 GET_MEMNUM_INC(mem, p);
3435 STACK_PUSH_MEM_START(mem, s);
3439 CASE(OP_MEMORY_START) MOP_IN(OP_MEMORY_START);
3440 GET_MEMNUM_INC(mem, p);
3441 mem_start_stk[mem] = (OnigStackIndex )((void* )s);
3442 mem_end_stk[mem] = INVALID_STACK_INDEX;
3446 CASE(OP_MEMORY_END_PUSH) MOP_IN(OP_MEMORY_END_PUSH);
3447 GET_MEMNUM_INC(mem, p);
3448 STACK_PUSH_MEM_END(mem, s);
3452 CASE(OP_MEMORY_END) MOP_IN(OP_MEMORY_END);
3453 GET_MEMNUM_INC(mem, p);
3454 mem_end_stk[mem] = (OnigStackIndex )((void* )s);
3458 CASE(OP_KEEP) MOP_IN(OP_KEEP);
3463 #ifdef USE_SUBEXP_CALL
3464 CASE(OP_MEMORY_END_PUSH_REC) MOP_IN(OP_MEMORY_END_PUSH_REC);
3465 GET_MEMNUM_INC(mem, p);
3466 STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
3467 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
3468 STACK_PUSH_MEM_END(mem, s);
3472 CASE(OP_MEMORY_END_REC) MOP_IN(OP_MEMORY_END_REC);
3473 GET_MEMNUM_INC(mem, p);
3474 mem_end_stk[mem] = (OnigStackIndex )((void* )s);
3475 STACK_GET_MEM_START(mem, stkp);
3477 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3478 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
3480 mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr);
3482 STACK_PUSH_MEM_END_MARK(mem);
3487 CASE(OP_BACKREF1) MOP_IN(OP_BACKREF1);
3492 CASE(OP_BACKREF2) MOP_IN(OP_BACKREF2);
3497 CASE(OP_BACKREFN) MOP_IN(OP_BACKREFN);
3498 GET_MEMNUM_INC(mem, p);
3502 UChar *pstart, *pend;
3504 /* if you want to remove following line,
3505 you should check in parse and compile time. */
3506 if (mem > num_mem) goto fail;
3507 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
3508 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
3510 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3511 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3513 pstart = (UChar* )((void* )mem_start_stk[mem]);
3515 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3516 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3517 : (UChar* )((void* )mem_end_stk[mem]));
3521 STRING_CMP(pstart, s, n);
3522 while (sprev + (len = enclen_approx(encode, sprev, end)) < s)
3529 CASE(OP_BACKREFN_IC) MOP_IN(OP_BACKREFN_IC);
3530 GET_MEMNUM_INC(mem, p);
3533 UChar *pstart, *pend;
3535 /* if you want to remove following line,
3536 you should check in parse and compile time. */
3537 if (mem > num_mem) goto fail;
3538 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
3539 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
3541 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3542 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3544 pstart = (UChar* )((void* )mem_start_stk[mem]);
3546 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3547 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3548 : (UChar* )((void* )mem_end_stk[mem]));
3552 STRING_CMP_IC(case_fold_flag, pstart, &s, n, end);
3553 while (sprev + (len = enclen_approx(encode, sprev, end)) < s)
3561 CASE(OP_BACKREF_MULTI) MOP_IN(OP_BACKREF_MULTI);
3564 UChar *pstart, *pend, *swork;
3566 GET_LENGTH_INC(tlen, p);
3567 for (i = 0; i < tlen; i++) {
3568 GET_MEMNUM_INC(mem, p);
3570 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
3571 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
3573 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3574 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3576 pstart = (UChar* )((void* )mem_start_stk[mem]);
3578 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3579 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3580 : (UChar* )((void* )mem_end_stk[mem]));
3582 DATA_ENSURE_CONTINUE(n);
3585 STRING_CMP_VALUE(pstart, swork, n, is_fail);
3586 if (is_fail) continue;
3588 while (sprev + (len = enclen_approx(encode, sprev, end)) < s)
3591 p += (SIZE_MEMNUM * (tlen - i - 1));
3592 break; /* success */
3594 if (i == tlen) goto fail;
3600 CASE(OP_BACKREF_MULTI_IC) MOP_IN(OP_BACKREF_MULTI_IC);
3603 UChar *pstart, *pend, *swork;
3605 GET_LENGTH_INC(tlen, p);
3606 for (i = 0; i < tlen; i++) {
3607 GET_MEMNUM_INC(mem, p);
3609 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
3610 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
3612 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3613 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3615 pstart = (UChar* )((void* )mem_start_stk[mem]);
3617 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3618 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3619 : (UChar* )((void* )mem_end_stk[mem]));
3621 DATA_ENSURE_CONTINUE(n);
3624 STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, end, is_fail);
3625 if (is_fail) continue;
3627 while (sprev + (len = enclen(encode, sprev, end)) < s)
3630 p += (SIZE_MEMNUM * (tlen - i - 1));
3631 break; /* success */
3633 if (i == tlen) goto fail;
3638 #ifdef USE_BACKREF_WITH_LEVEL
3639 CASE(OP_BACKREF_WITH_LEVEL)
3645 GET_OPTION_INC(ic, p);
3646 GET_LENGTH_INC(level, p);
3647 GET_LENGTH_INC(tlen, p);
3650 if (backref_match_at_nested_level(reg, stk, stk_base, ic,
3651 case_fold_flag, (int )level, (int )tlen, p, &s, end)) {
3652 while (sprev + (len = enclen(encode, sprev, end)) < s)
3655 p += (SIZE_MEMNUM * tlen);
3666 #if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */
3667 CASE(OP_SET_OPTION_PUSH) MOP_IN(OP_SET_OPTION_PUSH);
3668 GET_OPTION_INC(option, p);
3669 STACK_PUSH_ALT(p, s, sprev, pkeep);
3670 p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL;
3674 CASE(OP_SET_OPTION) MOP_IN(OP_SET_OPTION);
3675 GET_OPTION_INC(option, p);
3680 CASE(OP_NULL_CHECK_START) MOP_IN(OP_NULL_CHECK_START);
3681 GET_MEMNUM_INC(mem, p); /* mem: null check id */
3682 STACK_PUSH_NULL_CHECK_START(mem, s);
3686 CASE(OP_NULL_CHECK_END) MOP_IN(OP_NULL_CHECK_END);
3690 GET_MEMNUM_INC(mem, p); /* mem: null check id */
3691 STACK_NULL_CHECK(isnull, mem, s);
3693 #ifdef ONIG_DEBUG_MATCH
3694 fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%"PRIuPTR" (%p)\n",
3695 (int )mem, (uintptr_t )s, s);
3698 /* empty loop founded, skip next instruction */
3705 case OP_REPEAT_INC_NG:
3706 case OP_REPEAT_INC_SG:
3707 case OP_REPEAT_INC_NG_SG:
3711 goto unexpected_bytecode_error;
3719 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3720 CASE(OP_NULL_CHECK_END_MEMST) MOP_IN(OP_NULL_CHECK_END_MEMST);
3724 GET_MEMNUM_INC(mem, p); /* mem: null check id */
3725 STACK_NULL_CHECK_MEMST(isnull, mem, s, reg);
3727 # ifdef ONIG_DEBUG_MATCH
3728 fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%"PRIuPTR" (%p)\n",
3729 (int )mem, (uintptr_t )s, s);
3731 if (isnull == -1) goto fail;
3732 goto null_check_found;
3739 #ifdef USE_SUBEXP_CALL
3740 CASE(OP_NULL_CHECK_END_MEMST_PUSH)
3741 MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH);
3745 GET_MEMNUM_INC(mem, p); /* mem: null check id */
3746 # ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3747 STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg);
3749 STACK_NULL_CHECK_REC(isnull, mem, s);
3752 # ifdef ONIG_DEBUG_MATCH
3753 fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%"PRIuPTR" (%p)\n",
3754 (int )mem, (uintptr_t )s, s);
3756 if (isnull == -1) goto fail;
3757 goto null_check_found;
3760 STACK_PUSH_NULL_CHECK_END(mem);
3767 CASE(OP_JUMP) MOP_IN(OP_JUMP);
3768 GET_RELADDR_INC(addr, p);
3771 CHECK_INTERRUPT_IN_MATCH_AT;
3774 CASE(OP_PUSH) MOP_IN(OP_PUSH);
3775 GET_RELADDR_INC(addr, p);
3777 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3781 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3782 CASE(OP_STATE_CHECK_PUSH) MOP_IN(OP_STATE_CHECK_PUSH);
3783 GET_STATE_CHECK_NUM_INC(mem, p);
3784 STATE_CHECK_VAL(scv, mem);
3787 GET_RELADDR_INC(addr, p);
3788 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep);
3792 CASE(OP_STATE_CHECK_PUSH_OR_JUMP) MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP);
3793 GET_STATE_CHECK_NUM_INC(mem, p);
3794 GET_RELADDR_INC(addr, p);
3795 STATE_CHECK_VAL(scv, mem);
3800 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep);
3805 CASE(OP_STATE_CHECK) MOP_IN(OP_STATE_CHECK);
3806 GET_STATE_CHECK_NUM_INC(mem, p);
3807 STATE_CHECK_VAL(scv, mem);
3810 STACK_PUSH_STATE_CHECK(s, mem);
3813 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
3815 CASE(OP_POP) MOP_IN(OP_POP);
3817 #ifdef USE_MATCH_CACHE
3818 /* We need to increment num_fails here, for invoking a cache optimization correctly, */
3819 /* because Onigmo makes a loop, which is pairwise disjoint to the following set, as atomic. */
3825 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
3826 CASE(OP_PUSH_OR_JUMP_EXACT1) MOP_IN(OP_PUSH_OR_JUMP_EXACT1);
3827 GET_RELADDR_INC(addr, p);
3828 if (*p == *s && DATA_ENSURE_CHECK1) {
3831 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3840 CASE(OP_PUSH_IF_PEEK_NEXT) MOP_IN(OP_PUSH_IF_PEEK_NEXT);
3841 GET_RELADDR_INC(addr, p);
3845 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3854 CASE(OP_REPEAT) MOP_IN(OP_REPEAT);
3856 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3857 GET_RELADDR_INC(addr, p);
3860 repeat_stk[mem] = GET_STACK_INDEX(stk);
3861 STACK_PUSH_REPEAT(mem, p);
3863 if (reg->repeat_range[mem].lower == 0) {
3865 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3871 CASE(OP_REPEAT_NG) MOP_IN(OP_REPEAT_NG);
3873 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3874 GET_RELADDR_INC(addr, p);
3877 repeat_stk[mem] = GET_STACK_INDEX(stk);
3878 STACK_PUSH_REPEAT(mem, p);
3880 if (reg->repeat_range[mem].lower == 0) {
3882 STACK_PUSH_ALT(p, s, sprev, pkeep);
3889 CASE(OP_REPEAT_INC) MOP_IN(OP_REPEAT_INC);
3890 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3891 si = repeat_stk[mem];
3892 stkp = STACK_AT(si);
3895 stkp->u.repeat.count++;
3896 if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
3897 /* end of repeat. Nothing to do. */
3899 else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
3900 #ifdef USE_MATCH_CACHE
3901 if (*pbegin == OP_REPEAT_INC) {
3902 #undef MATCH_CACHE_HIT
3903 #define MATCH_CACHE_HIT stkp->u.repeat.count--;
3905 #undef MATCH_CACHE_HIT
3906 #define MATCH_CACHE_HIT ((void) 0)
3909 STACK_PUSH_ALT(p, s, sprev, pkeep);
3910 p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */
3913 p = stkp->u.repeat.pcode;
3915 STACK_PUSH_REPEAT_INC(si);
3917 CHECK_INTERRUPT_IN_MATCH_AT;
3920 CASE(OP_REPEAT_INC_SG) MOP_IN(OP_REPEAT_INC_SG);
3921 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3922 STACK_GET_REPEAT(mem, stkp);
3923 si = GET_STACK_INDEX(stkp);
3927 CASE(OP_REPEAT_INC_NG) MOP_IN(OP_REPEAT_INC_NG);
3928 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3929 si = repeat_stk[mem];
3930 stkp = STACK_AT(si);
3933 stkp->u.repeat.count++;
3934 if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
3935 if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
3936 UChar* pcode = stkp->u.repeat.pcode;
3938 STACK_PUSH_REPEAT_INC(si);
3939 if (*pbegin == OP_REPEAT_INC_NG) {
3942 STACK_PUSH_ALT(pcode, s, sprev, pkeep);
3945 p = stkp->u.repeat.pcode;
3946 STACK_PUSH_REPEAT_INC(si);
3949 else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
3950 STACK_PUSH_REPEAT_INC(si);
3953 CHECK_INTERRUPT_IN_MATCH_AT;
3956 CASE(OP_REPEAT_INC_NG_SG) MOP_IN(OP_REPEAT_INC_NG_SG);
3957 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3958 STACK_GET_REPEAT(mem, stkp);
3959 si = GET_STACK_INDEX(stkp);
3963 CASE(OP_PUSH_POS) MOP_IN(OP_PUSH_POS);
3964 STACK_PUSH_POS(s, sprev, pkeep);
3968 CASE(OP_POP_POS) MOP_IN(OP_POP_POS);
3970 STACK_POS_END(stkp);
3971 s = stkp->u.state.pstr;
3972 sprev = stkp->u.state.pstr_prev;
3977 CASE(OP_PUSH_POS_NOT) MOP_IN(OP_PUSH_POS_NOT);
3978 GET_RELADDR_INC(addr, p);
3979 STACK_PUSH_POS_NOT(p + addr, s, sprev, pkeep);
3983 CASE(OP_FAIL_POS) MOP_IN(OP_FAIL_POS);
3984 STACK_POP_TIL_POS_NOT;
3988 CASE(OP_PUSH_STOP_BT) MOP_IN(OP_PUSH_STOP_BT);
3993 CASE(OP_POP_STOP_BT) MOP_IN(OP_POP_STOP_BT);
3998 CASE(OP_LOOK_BEHIND) MOP_IN(OP_LOOK_BEHIND);
3999 GET_LENGTH_INC(tlen, p);
4000 s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen);
4001 if (IS_NULL(s)) goto fail;
4002 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
4006 CASE(OP_PUSH_LOOK_BEHIND_NOT) MOP_IN(OP_PUSH_LOOK_BEHIND_NOT);
4007 GET_RELADDR_INC(addr, p);
4008 GET_LENGTH_INC(tlen, p);
4009 q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen);
4011 /* too short case -> success. ex. /(?<!XXX)a/.match("a")
4012 If you want to change to fail, replace following line. */
4017 STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev, pkeep);
4019 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
4024 CASE(OP_FAIL_LOOK_BEHIND_NOT) MOP_IN(OP_FAIL_LOOK_BEHIND_NOT);
4025 STACK_POP_TIL_LOOK_BEHIND_NOT;
4029 CASE(OP_PUSH_ABSENT_POS) MOP_IN(OP_PUSH_ABSENT_POS);
4030 /* Save the absent-start-pos and the original end-pos. */
4031 STACK_PUSH_ABSENT_POS(s, ABSENT_END_POS);
4035 CASE(OP_ABSENT) MOP_IN(OP_ABSENT);
4037 const UChar* aend = ABSENT_END_POS;
4039 UChar* selfp = p - 1;
4041 STACK_POP_ABSENT_POS(absent, ABSENT_END_POS); /* Restore end-pos. */
4042 GET_RELADDR_INC(addr, p);
4043 #ifdef ONIG_DEBUG_MATCH
4044 fprintf(stderr, "ABSENT: s:%p, end:%p, absent:%p, aend:%p\n", s, end, absent, aend);
4046 if ((absent > aend) && (s > absent)) {
4047 /* An empty match occurred in (?~...) at the start point.
4052 else if ((s >= aend) && (s > absent)) {
4054 /* Only one (or less) character matched in the last iteration.
4055 * This is not a possible point. */
4058 /* All possible points were found. Try matching after (?~...). */
4062 else if (s == end) {
4063 /* At the end of the string, just match with it */
4068 STACK_PUSH_ALT(p + addr, s, sprev, pkeep); /* Push possible point. */
4069 n = enclen(encode, s, end);
4070 STACK_PUSH_ABSENT_POS(absent, ABSENT_END_POS); /* Save the original pos. */
4071 STACK_PUSH_ALT(selfp, s + n, s, pkeep); /* Next iteration. */
4073 ABSENT_END_POS = aend;
4079 CASE(OP_ABSENT_END) MOP_IN(OP_ABSENT_END);
4080 /* The pattern inside (?~...) was matched.
4081 * Set the end-pos temporary and go to next iteration. */
4082 if (sprev < ABSENT_END_POS)
4083 ABSENT_END_POS = sprev;
4084 #ifdef ONIG_DEBUG_MATCH
4085 fprintf(stderr, "ABSENT_END: end:%p\n", ABSENT_END_POS);
4087 STACK_POP_TIL_ABSENT;
4091 #ifdef USE_SUBEXP_CALL
4092 CASE(OP_CALL) MOP_IN(OP_CALL);
4093 GET_ABSADDR_INC(addr, p);
4094 STACK_PUSH_CALL_FRAME(p);
4099 CASE(OP_RETURN) MOP_IN(OP_RETURN);
4106 CASE(OP_CONDITION) MOP_IN(OP_CONDITION);
4107 GET_MEMNUM_INC(mem, p);
4108 GET_RELADDR_INC(addr, p);
4109 if ((mem > num_mem) ||
4110 (mem_end_stk[mem] == INVALID_STACK_INDEX) ||
4111 (mem_start_stk[mem] == INVALID_STACK_INDEX)) {
4129 p = stk->u.state.pcode;
4130 s = stk->u.state.pstr;
4131 sprev = stk->u.state.pstr_prev;
4132 pkeep = stk->u.state.pkeep;
4134 #ifdef USE_MATCH_CACHE
4136 msa->match_cache_status != MATCH_CACHE_STATUS_DISABLED &&
4137 ++msa->num_fails >= (long)(end - str) * msa->num_cache_opcodes
4139 if (msa->match_cache_status == MATCH_CACHE_STATUS_UNINIT) {
4140 msa->match_cache_status = MATCH_CACHE_STATUS_INIT;
4141 OnigPosition r = count_num_cache_opcodes(reg, &msa->num_cache_opcodes);
4142 if (r < 0) goto bytecode_error;
4144 if (msa->num_cache_opcodes == NUM_CACHE_OPCODES_IMPOSSIBLE || msa->num_cache_opcodes == 0) {
4145 msa->match_cache_status = MATCH_CACHE_STATUS_DISABLED;
4146 goto fail_match_cache;
4148 if (msa->num_fails < (long)(end - str) * msa->num_cache_opcodes) {
4149 goto fail_match_cache;
4151 if (msa->cache_opcodes == NULL) {
4152 msa->match_cache_status = MATCH_CACHE_STATUS_ENABLED;
4153 OnigCacheOpcode* cache_opcodes = (OnigCacheOpcode*)xmalloc(msa->num_cache_opcodes * sizeof(OnigCacheOpcode));
4154 if (cache_opcodes == NULL) {
4155 return ONIGERR_MEMORY;
4157 OnigPosition r = init_cache_opcodes(reg, cache_opcodes, &msa->num_cache_points);
4159 if (r == ONIGERR_UNEXPECTED_BYTECODE) goto unexpected_bytecode_error;
4160 else goto bytecode_error;
4162 msa->cache_opcodes = cache_opcodes;
4163 #ifdef ONIG_DEBUG_MATCH_CACHE
4164 fprintf(stderr, "MATCH CACHE: #cache opcodes = %ld\n", msa->num_cache_opcodes);
4165 fprintf(stderr, "MATCH CACHE: #cache points = %ld\n", msa->num_cache_points);
4166 fprintf(stderr, "MATCH CACHE: cache opcodes (%p):\n", msa->cache_opcodes);
4167 for (int i = 0; i < msa->num_cache_opcodes; i++) {
4168 fprintf(stderr, "MATCH CACHE: [%p] cache_point=%ld outer_repeat_mem=%d num_cache_opcodes_at_outer_repeat=%ld num_cache_opcodes_in_outer_repeat=%ld lookaround_nesting=%d match_addr=%p\n", msa->cache_opcodes[i].addr, msa->cache_opcodes[i].cache_point, msa->cache_opcodes[i].outer_repeat_mem, msa->cache_opcodes[i].num_cache_points_at_outer_repeat, msa->cache_opcodes[i].num_cache_points_in_outer_repeat, msa->cache_opcodes[i].lookaround_nesting, msa->cache_opcodes[i].match_addr);
4172 if (msa->match_cache_buf == NULL) {
4173 size_t length = (end - str) + 1;
4174 size_t num_match_cache_points = (size_t)msa->num_cache_points * length;
4175 #ifdef ONIG_DEBUG_MATCH_CACHE
4176 fprintf(stderr, "MATCH CACHE: #match cache points = %zu (length = %zu)\n", num_match_cache_points, length);
4178 /* Overflow check */
4179 if (num_match_cache_points / length != (size_t)msa->num_cache_points) {
4180 return ONIGERR_MEMORY;
4182 if (num_match_cache_points >= LONG_MAX_LIMIT) {
4183 return ONIGERR_MEMORY;
4185 size_t match_cache_buf_length = (num_match_cache_points >> 3) + (num_match_cache_points & 7 ? 1 : 0) + 1;
4186 uint8_t* match_cache_buf = (uint8_t*)xmalloc(match_cache_buf_length * sizeof(uint8_t));
4187 if (match_cache_buf == NULL) {
4188 return ONIGERR_MEMORY;
4190 xmemset(match_cache_buf, 0, match_cache_buf_length * sizeof(uint8_t));
4191 msa->match_cache_buf = match_cache_buf;
4197 #ifdef USE_COMBINATION_EXPLOSION_CHECK
4198 if (stk->u.state.state_check != 0) {
4199 stk->type = STK_STATE_CHECK_MARK;
4205 CHECK_INTERRUPT_IN_MATCH_AT;
4209 goto bytecode_error;
4214 xfree(xmalloc_base);
4220 xfree(xmalloc_base);
4221 return ONIGERR_STACK_BUG;
4226 xfree(xmalloc_base);
4227 return ONIGERR_UNDEFINED_BYTECODE;
4229 unexpected_bytecode_error:
4231 xfree(xmalloc_base);
4232 return ONIGERR_UNEXPECTED_BYTECODE;
4236 xfree(xmalloc_base);
4237 return ONIGERR_TIMEOUT;
4242 slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
4243 const UChar* text, const UChar* text_end, UChar* text_range)
4245 UChar *t, *p, *s, *end;
4247 end = (UChar* )text_end;
4248 end -= target_end - target - 1;
4249 if (end > text_range)
4254 if (enc->max_enc_len == enc->min_enc_len) {
4255 int n = enc->max_enc_len;
4258 if (*s == *target) {
4261 if (target_end == t || memcmp(t, p, target_end - t) == 0)
4266 return (UChar* )NULL;
4269 if (*s == *target) {
4272 if (target_end == t || memcmp(t, p, target_end - t) == 0)
4275 s += enclen(enc, s, text_end);
4278 return (UChar* )NULL;
4282 str_lower_case_match(OnigEncoding enc, int case_fold_flag,
4283 const UChar* t, const UChar* tend,
4284 const UChar* p, const UChar* end)
4287 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
4290 lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf);
4292 while (lowlen > 0) {
4293 if (*t++ != *q++) return 0;
4302 slow_search_ic(OnigEncoding enc, int case_fold_flag,
4303 UChar* target, UChar* target_end,
4304 const UChar* text, const UChar* text_end, UChar* text_range)
4308 end = (UChar* )text_end;
4309 end -= target_end - target - 1;
4310 if (end > text_range)
4316 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4320 s += enclen(enc, s, text_end);
4323 return (UChar* )NULL;
4327 slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
4328 const UChar* text, const UChar* adjust_text,
4329 const UChar* text_end, const UChar* text_start)
4333 s = (UChar* )text_end;
4334 s -= (target_end - target);
4336 s = (UChar* )text_start;
4338 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end);
4341 if (*s == *target) {
4344 while (t < target_end) {
4349 if (t == target_end)
4352 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
4355 return (UChar* )NULL;
4359 slow_search_backward_ic(OnigEncoding enc, int case_fold_flag,
4360 UChar* target, UChar* target_end,
4361 const UChar* text, const UChar* adjust_text,
4362 const UChar* text_end, const UChar* text_start)
4366 s = (UChar* )text_end;
4367 s -= (target_end - target);
4369 s = (UChar* )text_start;
4371 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end);
4374 if (str_lower_case_match(enc, case_fold_flag,
4375 target, target_end, s, text_end))
4378 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
4381 return (UChar* )NULL;
4384 #ifndef USE_SUNDAY_QUICK_SEARCH
4385 /* Boyer-Moore-Horspool search applied to a multibyte string */
4387 bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
4388 const UChar* text, const UChar* text_end,
4389 const UChar* text_range)
4391 const UChar *s, *se, *t, *p, *end;
4393 ptrdiff_t skip, tlen1;
4395 # ifdef ONIG_DEBUG_SEARCH
4396 fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4397 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4400 tail = target_end - 1;
4401 tlen1 = tail - target;
4403 if (end + tlen1 > text_end)
4404 end = text_end - tlen1;
4408 if (IS_NULL(reg->int_map)) {
4413 if (t == target) return (UChar* )s;
4416 skip = reg->map[*se];
4419 s += enclen(reg->enc, s, end);
4420 } while ((s - t) < skip && s < end);
4424 # if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4429 if (t == target) return (UChar* )s;
4432 skip = reg->int_map[*se];
4435 s += enclen(reg->enc, s, end);
4436 } while ((s - t) < skip && s < end);
4441 return (UChar* )NULL;
4444 /* Boyer-Moore-Horspool search */
4446 bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
4447 const UChar* text, const UChar* text_end, const UChar* text_range)
4449 const UChar *s, *t, *p, *end;
4452 # ifdef ONIG_DEBUG_SEARCH
4453 fprintf(stderr, "bm_search: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4454 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4457 end = text_range + (target_end - target) - 1;
4461 tail = target_end - 1;
4462 s = text + (target_end - target) - 1;
4463 if (IS_NULL(reg->int_map)) {
4467 # ifdef ONIG_DEBUG_SEARCH
4468 fprintf(stderr, "bm_search_loop: pos: %"PRIdPTR" %s\n",
4469 (intptr_t )(s - text), s);
4472 if (t == target) return (UChar* )p;
4478 else { /* see int_map[] */
4479 # if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4484 if (t == target) return (UChar* )p;
4487 s += reg->int_map[*s];
4491 return (UChar* )NULL;
4494 /* Boyer-Moore-Horspool search applied to a multibyte string (ignore case) */
4496 bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
4497 const UChar* text, const UChar* text_end,
4498 const UChar* text_range)
4500 const UChar *s, *se, *t, *end;
4502 ptrdiff_t skip, tlen1;
4503 OnigEncoding enc = reg->enc;
4504 int case_fold_flag = reg->case_fold_flag;
4506 # ifdef ONIG_DEBUG_SEARCH
4507 fprintf(stderr, "bm_search_notrev_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
4508 (int )text, text, (int )text_end, text_end, (int )text_range, text_range);
4511 tail = target_end - 1;
4512 tlen1 = tail - target;
4514 if (end + tlen1 > text_end)
4515 end = text_end - tlen1;
4519 if (IS_NULL(reg->int_map)) {
4522 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4525 skip = reg->map[*se];
4528 s += enclen(reg->enc, s, end);
4529 } while ((s - t) < skip && s < end);
4533 # if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4536 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4539 skip = reg->int_map[*se];
4542 s += enclen(reg->enc, s, end);
4543 } while ((s - t) < skip && s < end);
4548 return (UChar* )NULL;
4551 /* Boyer-Moore-Horspool search (ignore case) */
4553 bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
4554 const UChar* text, const UChar* text_end, const UChar* text_range)
4556 const UChar *s, *p, *end;
4558 OnigEncoding enc = reg->enc;
4559 int case_fold_flag = reg->case_fold_flag;
4561 # ifdef ONIG_DEBUG_SEARCH
4562 fprintf(stderr, "bm_search_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
4563 (int )text, text, (int )text_end, text_end, (int )text_range, text_range);
4566 end = text_range + (target_end - target) - 1;
4570 tail = target_end - 1;
4571 s = text + (target_end - target) - 1;
4572 if (IS_NULL(reg->int_map)) {
4574 p = s - (target_end - target) + 1;
4575 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4581 else { /* see int_map[] */
4582 # if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4584 p = s - (target_end - target) + 1;
4585 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4588 s += reg->int_map[*s];
4592 return (UChar* )NULL;
4595 #else /* USE_SUNDAY_QUICK_SEARCH */
4597 /* Sunday's quick search applied to a multibyte string */
4599 bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
4600 const UChar* text, const UChar* text_end,
4601 const UChar* text_range)
4603 const UChar *s, *se, *t, *p, *end;
4605 ptrdiff_t skip, tlen1;
4606 OnigEncoding enc = reg->enc;
4608 # ifdef ONIG_DEBUG_SEARCH
4609 fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4610 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4613 tail = target_end - 1;
4614 tlen1 = tail - target;
4616 if (end + tlen1 > text_end)
4617 end = text_end - tlen1;
4621 if (IS_NULL(reg->int_map)) {
4626 if (t == target) return (UChar* )s;
4629 if (s + 1 >= end) break;
4630 skip = reg->map[se[1]];
4633 s += enclen(enc, s, end);
4634 } while ((s - t) < skip && s < end);
4638 # if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4643 if (t == target) return (UChar* )s;
4646 if (s + 1 >= end) break;
4647 skip = reg->int_map[se[1]];
4650 s += enclen(enc, s, end);
4651 } while ((s - t) < skip && s < end);
4656 return (UChar* )NULL;
4659 /* Sunday's quick search */
4661 bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
4662 const UChar* text, const UChar* text_end, const UChar* text_range)
4664 const UChar *s, *t, *p, *end;
4668 # ifdef ONIG_DEBUG_SEARCH
4669 fprintf(stderr, "bm_search: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4670 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4673 tail = target_end - 1;
4674 tlen1 = tail - target;
4675 end = text_range + tlen1;
4680 if (IS_NULL(reg->int_map)) {
4685 if (t == target) return (UChar* )p;
4688 if (s + 1 >= end) break;
4689 s += reg->map[s[1]];
4692 else { /* see int_map[] */
4693 # if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4698 if (t == target) return (UChar* )p;
4701 if (s + 1 >= end) break;
4702 s += reg->int_map[s[1]];
4706 return (UChar* )NULL;
4709 /* Sunday's quick search applied to a multibyte string (ignore case) */
4711 bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
4712 const UChar* text, const UChar* text_end,
4713 const UChar* text_range)
4715 const UChar *s, *se, *t, *end;
4717 ptrdiff_t skip, tlen1;
4718 OnigEncoding enc = reg->enc;
4719 int case_fold_flag = reg->case_fold_flag;
4721 # ifdef ONIG_DEBUG_SEARCH
4722 fprintf(stderr, "bm_search_notrev_ic: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4723 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4726 tail = target_end - 1;
4727 tlen1 = tail - target;
4729 if (end + tlen1 > text_end)
4730 end = text_end - tlen1;
4734 if (IS_NULL(reg->int_map)) {
4737 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4740 if (s + 1 >= end) break;
4741 skip = reg->map[se[1]];
4744 s += enclen(enc, s, end);
4745 } while ((s - t) < skip && s < end);
4749 # if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4752 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4755 if (s + 1 >= end) break;
4756 skip = reg->int_map[se[1]];
4759 s += enclen(enc, s, end);
4760 } while ((s - t) < skip && s < end);
4765 return (UChar* )NULL;
4768 /* Sunday's quick search (ignore case) */
4770 bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
4771 const UChar* text, const UChar* text_end, const UChar* text_range)
4773 const UChar *s, *p, *end;
4776 OnigEncoding enc = reg->enc;
4777 int case_fold_flag = reg->case_fold_flag;
4779 # ifdef ONIG_DEBUG_SEARCH
4780 fprintf(stderr, "bm_search_ic: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4781 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4784 tail = target_end - 1;
4785 tlen1 = tail - target;
4786 end = text_range + tlen1;
4791 if (IS_NULL(reg->int_map)) {
4794 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4797 if (s + 1 >= end) break;
4798 s += reg->map[s[1]];
4801 else { /* see int_map[] */
4802 # if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4805 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4808 if (s + 1 >= end) break;
4809 s += reg->int_map[s[1]];
4813 return (UChar* )NULL;
4815 #endif /* USE_SUNDAY_QUICK_SEARCH */
4817 #ifdef USE_INT_MAP_BACKWARD
4819 set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
4824 if (IS_NULL(*skip)) {
4825 *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
4826 if (IS_NULL(*skip)) return ONIGERR_MEMORY;
4829 len = (int )(end - s);
4830 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
4833 for (i = len - 1; i > 0; i--)
4840 bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end,
4841 const UChar* text, const UChar* adjust_text,
4842 const UChar* text_end, const UChar* text_start)
4844 const UChar *s, *t, *p;
4846 s = text_end - (target_end - target);
4850 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
4855 while (t < target_end && *p == *t) {
4858 if (t == target_end)
4861 s -= reg->int_map_backward[*s];
4862 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
4865 return (UChar* )NULL;
4870 map_search(OnigEncoding enc, UChar map[],
4871 const UChar* text, const UChar* text_range, const UChar* text_end)
4873 const UChar *s = text;
4875 while (s < text_range) {
4876 if (map[*s]) return (UChar* )s;
4878 s += enclen(enc, s, text_end);
4880 return (UChar* )NULL;
4884 map_search_backward(OnigEncoding enc, UChar map[],
4885 const UChar* text, const UChar* adjust_text,
4886 const UChar* text_start, const UChar* text_end)
4888 const UChar *s = text_start;
4891 if (map[*s]) return (UChar* )s;
4893 s = onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
4895 return (UChar* )NULL;
4899 onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region,
4900 OnigOptionType option)
4906 MATCH_ARG_INIT(msa, option, region, at, at);
4907 #ifdef USE_COMBINATION_EXPLOSION_CHECK
4909 ptrdiff_t offset = at - str;
4910 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
4915 r = onig_region_resize_clear(region, reg->num_mem + 1);
4921 prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at, end);
4922 r = match_at(reg, str, end,
4923 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
4929 MATCH_ARG_FREE(msa);
4934 forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
4935 UChar* range, UChar** low, UChar** high, UChar** low_prev)
4937 UChar *p, *pprev = (UChar* )NULL;
4938 size_t input_len = end - str;
4940 #ifdef ONIG_DEBUG_SEARCH
4941 fprintf(stderr, "forward_search_range: str: %"PRIuPTR" (%p), end: %"PRIuPTR" (%p), s: %"PRIuPTR" (%p), range: %"PRIuPTR" (%p)\n",
4942 (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )s, s, (uintptr_t )range, range);
4945 if (reg->dmin > input_len) {
4950 if (reg->dmin > 0) {
4951 if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
4955 UChar *q = p + reg->dmin;
4957 if (q >= end) return 0; /* fail */
4958 while (p < q) p += enclen(reg->enc, p, end);
4963 switch (reg->optimize) {
4964 case ONIG_OPTIMIZE_EXACT:
4965 p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
4967 case ONIG_OPTIMIZE_EXACT_IC:
4968 p = slow_search_ic(reg->enc, reg->case_fold_flag,
4969 reg->exact, reg->exact_end, p, end, range);
4972 case ONIG_OPTIMIZE_EXACT_BM:
4973 p = bm_search(reg, reg->exact, reg->exact_end, p, end, range);
4976 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
4977 p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range);
4980 case ONIG_OPTIMIZE_EXACT_BM_IC:
4981 p = bm_search_ic(reg, reg->exact, reg->exact_end, p, end, range);
4984 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC:
4985 p = bm_search_notrev_ic(reg, reg->exact, reg->exact_end, p, end, range);
4988 case ONIG_OPTIMIZE_MAP:
4989 p = map_search(reg->enc, reg->map, p, range, end);
4993 if (p && p < range) {
4994 if (p - reg->dmin < s) {
4997 p += enclen(reg->enc, p, end);
5001 if (reg->sub_anchor) {
5004 switch (reg->sub_anchor) {
5005 case ANCHOR_BEGIN_LINE:
5006 if (!ON_STR_BEGIN(p)) {
5007 prev = onigenc_get_prev_char_head(reg->enc,
5008 (pprev ? pprev : str), p, end);
5009 if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0))
5014 case ANCHOR_END_LINE:
5015 if (ON_STR_END(p)) {
5016 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
5017 prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
5018 (pprev ? pprev : str), p);
5019 if (prev && ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1))
5023 else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1))
5029 if (reg->dmax == 0) {
5033 *low_prev = onigenc_get_prev_char_head(reg->enc, s, p, end);
5035 *low_prev = onigenc_get_prev_char_head(reg->enc,
5036 (pprev ? pprev : str), p, end);
5040 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
5041 if (p < str + reg->dmax) {
5042 *low = (UChar* )str;
5044 *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low, end);
5047 *low = p - reg->dmax;
5049 *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
5050 *low, end, (const UChar** )low_prev);
5051 if (low_prev && IS_NULL(*low_prev))
5052 *low_prev = onigenc_get_prev_char_head(reg->enc,
5053 (pprev ? pprev : s), *low, end);
5057 *low_prev = onigenc_get_prev_char_head(reg->enc,
5058 (pprev ? pprev : str), *low, end);
5063 /* no needs to adjust *high, *high is used as range check only */
5064 *high = p - reg->dmin;
5066 #ifdef ONIG_DEBUG_SEARCH
5068 "forward_search_range success: low: %"PRIdPTR", high: %"PRIdPTR", dmin: %"PRIdPTR", dmax: %"PRIdPTR"\n",
5069 *low - str, *high - str, reg->dmin, reg->dmax);
5071 return 1; /* success */
5074 return 0; /* fail */
5077 #define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100
5080 backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
5081 UChar* s, const UChar* range, UChar* adjrange,
5082 UChar** low, UChar** high)
5085 size_t input_len = end - str;
5087 if (reg->dmin > input_len) {
5095 switch (reg->optimize) {
5096 case ONIG_OPTIMIZE_EXACT:
5098 p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
5099 range, adjrange, end, p);
5102 case ONIG_OPTIMIZE_EXACT_IC:
5103 case ONIG_OPTIMIZE_EXACT_BM_IC:
5104 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC:
5105 p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
5106 reg->exact, reg->exact_end,
5107 range, adjrange, end, p);
5110 case ONIG_OPTIMIZE_EXACT_BM:
5111 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
5112 #ifdef USE_INT_MAP_BACKWARD
5113 if (IS_NULL(reg->int_map_backward)) {
5115 if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD)
5118 r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc,
5119 &(reg->int_map_backward));
5122 p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange,
5129 case ONIG_OPTIMIZE_MAP:
5130 p = map_search_backward(reg->enc, reg->map, range, adjrange, p, end);
5135 if (reg->sub_anchor) {
5138 switch (reg->sub_anchor) {
5139 case ANCHOR_BEGIN_LINE:
5140 if (!ON_STR_BEGIN(p)) {
5141 prev = onigenc_get_prev_char_head(reg->enc, str, p, end);
5142 if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)) {
5149 case ANCHOR_END_LINE:
5150 if (ON_STR_END(p)) {
5151 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
5152 prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
5153 if (IS_NULL(prev)) goto fail;
5154 if (ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1)) {
5160 else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1)) {
5161 p = onigenc_get_prev_char_head(reg->enc, adjrange, p, end);
5162 if (IS_NULL(p)) goto fail;
5169 /* no needs to adjust *high, *high is used as range check only */
5170 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
5171 *low = p - reg->dmax;
5172 *high = p - reg->dmin;
5173 *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high, end);
5176 #ifdef ONIG_DEBUG_SEARCH
5177 fprintf(stderr, "backward_search_range: low: %d, high: %d\n",
5178 (int )(*low - str), (int )(*high - str));
5180 return 1; /* success */
5184 #ifdef ONIG_DEBUG_SEARCH
5185 fprintf(stderr, "backward_search_range: fail.\n");
5187 return 0; /* fail */
5192 onig_search(regex_t* reg, const UChar* str, const UChar* end,
5193 const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
5195 return onig_search_gpos(reg, str, end, start, start, range, region, option);
5199 onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
5200 const UChar* global_pos,
5201 const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
5206 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
5207 const UChar *orig_start = start;
5208 const UChar *orig_range = range;
5211 #ifdef ONIG_DEBUG_SEARCH
5213 "onig_search (entry point): str: %"PRIuPTR" (%p), end: %"PRIuPTR", start: %"PRIuPTR", range: %"PRIuPTR"\n",
5214 (uintptr_t )str, str, end - str, start - str, range - str);
5218 r = onig_region_resize_clear(region, reg->num_mem + 1);
5219 if (r) goto finish_no_msa;
5222 if (start > end || start < str) goto mismatch_no_msa;
5225 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
5226 # ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
5227 # define MATCH_AND_RETURN_CHECK(upper_range) \
5228 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
5230 case ONIG_MISMATCH: \
5232 case ONIGERR_TIMEOUT: \
5236 if (! IS_FIND_LONGEST(reg->options)) { \
5240 else goto finish; /* error */ \
5243 # define MATCH_AND_RETURN_CHECK(upper_range) \
5244 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
5246 case ONIG_MISMATCH: \
5248 case ONIGERR_TIMEOUT: \
5254 else goto finish; /* error */ \
5256 # endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
5258 # ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
5259 # define MATCH_AND_RETURN_CHECK(none) \
5260 r = match_at(reg, str, end, s, prev, &msa);\
5262 case ONIG_MISMATCH: \
5264 case ONIGERR_TIMEOUT: \
5268 if (! IS_FIND_LONGEST(reg->options)) { \
5272 else goto finish; /* error */ \
5275 # define MATCH_AND_RETURN_CHECK(none) \
5276 r = match_at(reg, str, end, s, prev, &msa);\
5278 case ONIG_MISMATCH: \
5280 case ONIGERR_TIMEOUT: \
5286 else goto finish; /* error */ \
5288 # endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
5289 #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
5292 /* anchor optimize: resume search range */
5293 if (reg->anchor != 0 && str < end) {
5294 UChar *min_semi_end, *max_semi_end;
5296 if (reg->anchor & ANCHOR_BEGIN_POSITION) {
5297 /* search start-position only */
5301 if (global_pos > start)
5303 if (global_pos < range)
5304 range = global_pos + 1;
5312 else if (reg->anchor & ANCHOR_BEGIN_BUF) {
5313 /* search str-position only */
5314 if (range > start) {
5315 if (start != str) goto mismatch_no_msa;
5324 goto mismatch_no_msa;
5327 else if (reg->anchor & ANCHOR_END_BUF) {
5328 min_semi_end = max_semi_end = (UChar* )end;
5331 if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin)
5332 goto mismatch_no_msa;
5334 if (range > start) {
5335 if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) {
5336 start = min_semi_end - reg->anchor_dmax;
5338 start = onigenc_get_right_adjust_char_head(reg->enc, str, start, end);
5340 if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
5341 range = max_semi_end - reg->anchor_dmin + 1;
5344 if (start > range) goto mismatch_no_msa;
5345 /* If start == range, match with empty at end.
5346 Backward search is used. */
5349 if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) {
5350 range = min_semi_end - reg->anchor_dmax;
5352 if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) {
5353 start = max_semi_end - reg->anchor_dmin;
5354 start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start, end);
5356 if (range > start) goto mismatch_no_msa;
5359 else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
5360 UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, end, 1);
5362 max_semi_end = (UChar* )end;
5363 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
5364 min_semi_end = pre_end;
5366 #ifdef USE_CRNL_AS_LINE_TERMINATOR
5367 pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, end, 1);
5368 if (IS_NOT_NULL(pre_end) &&
5369 IS_NEWLINE_CRLF(reg->options) &&
5370 ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
5371 min_semi_end = pre_end;
5374 if (min_semi_end > str && start <= min_semi_end) {
5379 min_semi_end = (UChar* )end;
5383 else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) {
5384 goto begin_position;
5387 else if (str == end) { /* empty string */
5388 static const UChar address_for_empty_string[] = "";
5390 #ifdef ONIG_DEBUG_SEARCH
5391 fprintf(stderr, "onig_search: empty string.\n");
5394 if (reg->threshold_len == 0) {
5395 start = end = str = address_for_empty_string;
5397 prev = (UChar* )NULL;
5399 MATCH_ARG_INIT(msa, option, region, start, start);
5400 #ifdef USE_COMBINATION_EXPLOSION_CHECK
5401 msa.state_check_buff = (void* )0;
5402 msa.state_check_buff_size = 0; /* NO NEED, for valgrind */
5404 MATCH_AND_RETURN_CHECK(end);
5407 goto mismatch_no_msa;
5410 #ifdef ONIG_DEBUG_SEARCH
5411 fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
5412 (int )(end - str), (int )(start - str), (int )(range - str));
5415 MATCH_ARG_INIT(msa, option, region, start, global_pos);
5416 #ifdef USE_COMBINATION_EXPLOSION_CHECK
5418 ptrdiff_t offset = (MIN(start, range) - str);
5419 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
5424 if (range > start) { /* forward search */
5426 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
5428 prev = (UChar* )NULL;
5430 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
5431 UChar *sch_range, *low, *high, *low_prev;
5433 sch_range = (UChar* )range;
5434 if (reg->dmax != 0) {
5435 if (reg->dmax == ONIG_INFINITE_DISTANCE)
5436 sch_range = (UChar* )end;
5438 sch_range += reg->dmax;
5439 if (sch_range > end) sch_range = (UChar* )end;
5443 if ((end - start) < reg->threshold_len)
5446 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
5448 if (! forward_search_range(reg, str, end, s, sch_range,
5449 &low, &high, &low_prev)) goto mismatch;
5455 MATCH_AND_RETURN_CHECK(orig_range);
5457 s += enclen(reg->enc, s, end);
5459 } while (s < range);
5462 else { /* check only. */
5463 if (! forward_search_range(reg, str, end, s, sch_range,
5464 &low, &high, (UChar** )NULL)) goto mismatch;
5466 if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
5468 MATCH_AND_RETURN_CHECK(orig_range);
5470 s += enclen(reg->enc, s, end);
5472 if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) {
5473 while (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)
5476 s += enclen(reg->enc, s, end);
5479 } while (s < range);
5486 MATCH_AND_RETURN_CHECK(orig_range);
5488 s += enclen(reg->enc, s, end);
5489 } while (s < range);
5491 if (s == range) { /* because empty match with /$/. */
5492 MATCH_AND_RETURN_CHECK(orig_range);
5495 else { /* backward search */
5496 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
5497 UChar *low, *high, *adjrange, *sch_start;
5500 adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range, end);
5502 adjrange = (UChar* )end;
5504 if (reg->dmax != ONIG_INFINITE_DISTANCE &&
5505 (end - range) >= reg->threshold_len) {
5507 sch_start = s + reg->dmax;
5508 if (sch_start > end) sch_start = (UChar* )end;
5509 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
5517 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
5518 MATCH_AND_RETURN_CHECK(orig_start);
5521 } while (s >= range);
5524 else { /* check only. */
5525 if ((end - range) < reg->threshold_len) goto mismatch;
5528 if (reg->dmax != 0) {
5529 if (reg->dmax == ONIG_INFINITE_DISTANCE)
5530 sch_start = (UChar* )end;
5532 sch_start += reg->dmax;
5533 if (sch_start > end) sch_start = (UChar* )end;
5535 sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
5536 start, sch_start, end);
5539 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
5540 &low, &high) <= 0) goto mismatch;
5545 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
5546 MATCH_AND_RETURN_CHECK(orig_start);
5548 } while (s >= range);
5552 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
5553 if (IS_FIND_LONGEST(reg->options)) {
5554 if (msa.best_len >= 0) {
5563 MATCH_ARG_FREE(msa);
5565 /* If result is mismatch and no FIND_NOT_EMPTY option,
5566 then the region is not set in match_at(). */
5567 if (IS_FIND_NOT_EMPTY(reg->options) && region) {
5568 onig_region_clear(region);
5572 if (r != ONIG_MISMATCH)
5573 fprintf(stderr, "onig_search: error %"PRIdPTRDIFF"\n", r);
5581 if (r != ONIG_MISMATCH)
5582 fprintf(stderr, "onig_search: error %"PRIdPTRDIFF"\n", r);
5587 MATCH_ARG_FREE(msa);
5591 MATCH_ARG_FREE(msa);
5592 return ONIGERR_TIMEOUT;
5596 onig_scan(regex_t* reg, const UChar* str, const UChar* end,
5597 OnigRegion* region, OnigOptionType option,
5598 int (*scan_callback)(OnigPosition, OnigPosition, OnigRegion*, void*),
5609 r = onig_search(reg, str, end, start, end, region, option);
5611 rs = scan_callback(n, r, region, callback_arg);
5616 if (region->end[0] == start - str) {
5617 if (start >= end) break;
5618 start += enclen(reg->enc, start, end);
5621 start = str + region->end[0];
5626 else if (r == ONIG_MISMATCH) {
5638 onig_get_encoding(const regex_t* reg)
5643 extern OnigOptionType
5644 onig_get_options(const regex_t* reg)
5646 return reg->options;
5649 extern OnigCaseFoldType
5650 onig_get_case_fold_flag(const regex_t* reg)
5652 return reg->case_fold_flag;
5655 extern const OnigSyntaxType*
5656 onig_get_syntax(const regex_t* reg)
5662 onig_number_of_captures(const regex_t* reg)
5664 return reg->num_mem;
5668 onig_number_of_capture_histories(const regex_t* reg)
5670 #ifdef USE_CAPTURE_HISTORY
5674 for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
5675 if (BIT_STATUS_AT(reg->capture_history, i) != 0)
5685 onig_copy_encoding(OnigEncodingType *to, OnigEncoding from)