[ruby/psych] Add support for ruby 3.2 Data objects
[ruby.git] / regexec.c
blobd200a3cc2898bbdd7647162e423940a488bca4ef
1 /**********************************************************************
2 regexec.c - Onigmo (Oniguruma-mod) (regular expression library)
3 **********************************************************************/
4 /*-
5 * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
7 * All rights reserved.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
31 #include "regint.h"
33 #ifdef RUBY
34 # undef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
35 #else
36 # define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
37 #endif
39 #ifndef USE_TOKEN_THREADED_VM
40 # ifdef __GNUC__
41 # define USE_TOKEN_THREADED_VM 1
42 # else
43 # define USE_TOKEN_THREADED_VM 0
44 # endif
45 #endif
47 #ifdef RUBY
48 # define ENC_DUMMY_FLAG (1<<24)
49 static inline int
50 rb_enc_asciicompat(OnigEncoding enc)
52 return ONIGENC_MBC_MINLEN(enc)==1 && !((enc)->ruby_encoding_index & ENC_DUMMY_FLAG);
54 # undef ONIGENC_IS_MBC_ASCII_WORD
55 # define ONIGENC_IS_MBC_ASCII_WORD(enc,s,end) \
56 (rb_enc_asciicompat(enc) ? (ISALNUM(*s) || *s=='_') : \
57 onigenc_ascii_is_code_ctype( \
58 ONIGENC_MBC_TO_CODE(enc,s,end),ONIGENC_CTYPE_WORD,enc))
59 #endif /* RUBY */
61 #ifdef USE_CRNL_AS_LINE_TERMINATOR
62 # define ONIGENC_IS_MBC_CRNL(enc,p,end) \
63 (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
64 ONIGENC_MBC_TO_CODE(enc,(p+enclen(enc,p,end)),end) == 10)
65 # define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \
66 is_mbc_newline_ex((enc),(p),(start),(end),(option),(check_prev))
67 static int
68 is_mbc_newline_ex(OnigEncoding enc, const UChar *p, const UChar *start,
69 const UChar *end, OnigOptionType option, int check_prev)
71 if (IS_NEWLINE_CRLF(option)) {
72 if (ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0a) {
73 if (check_prev) {
74 const UChar *prev = onigenc_get_prev_char_head(enc, start, p, end);
75 if ((prev != NULL) && ONIGENC_MBC_TO_CODE(enc, prev, end) == 0x0d)
76 return 0;
77 else
78 return 1;
80 else
81 return 1;
83 else {
84 const UChar *pnext = p + enclen(enc, p, end);
85 if (pnext < end &&
86 ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0d &&
87 ONIGENC_MBC_TO_CODE(enc, pnext, end) == 0x0a)
88 return 1;
89 if (ONIGENC_IS_MBC_NEWLINE(enc, p, end))
90 return 1;
91 return 0;
94 else {
95 return ONIGENC_IS_MBC_NEWLINE(enc, p, end);
98 #else /* USE_CRNL_AS_LINE_TERMINATOR */
99 # define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \
100 ONIGENC_IS_MBC_NEWLINE((enc), (p), (end))
101 #endif /* USE_CRNL_AS_LINE_TERMINATOR */
103 #ifdef USE_CAPTURE_HISTORY
104 static void history_tree_free(OnigCaptureTreeNode* node);
106 static void
107 history_tree_clear(OnigCaptureTreeNode* node)
109 int i;
111 if (IS_NOT_NULL(node)) {
112 for (i = 0; i < node->num_childs; i++) {
113 if (IS_NOT_NULL(node->childs[i])) {
114 history_tree_free(node->childs[i]);
117 for (i = 0; i < node->allocated; i++) {
118 node->childs[i] = (OnigCaptureTreeNode* )0;
120 node->num_childs = 0;
121 node->beg = ONIG_REGION_NOTPOS;
122 node->end = ONIG_REGION_NOTPOS;
123 node->group = -1;
124 xfree(node->childs);
125 node->childs = (OnigCaptureTreeNode** )0;
129 static void
130 history_tree_free(OnigCaptureTreeNode* node)
132 history_tree_clear(node);
133 xfree(node);
136 static void
137 history_root_free(OnigRegion* r)
139 if (IS_NOT_NULL(r->history_root)) {
140 history_tree_free(r->history_root);
141 r->history_root = (OnigCaptureTreeNode* )0;
145 static OnigCaptureTreeNode*
146 history_node_new(void)
148 OnigCaptureTreeNode* node;
150 node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode));
151 CHECK_NULL_RETURN(node);
152 node->childs = (OnigCaptureTreeNode** )0;
153 node->allocated = 0;
154 node->num_childs = 0;
155 node->group = -1;
156 node->beg = ONIG_REGION_NOTPOS;
157 node->end = ONIG_REGION_NOTPOS;
159 return node;
162 static int
163 history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
165 # define HISTORY_TREE_INIT_ALLOC_SIZE 8
167 if (parent->num_childs >= parent->allocated) {
168 int n, i;
170 if (IS_NULL(parent->childs)) {
171 n = HISTORY_TREE_INIT_ALLOC_SIZE;
172 parent->childs =
173 (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n);
174 CHECK_NULL_RETURN_MEMERR(parent->childs);
176 else {
177 OnigCaptureTreeNode** tmp;
178 n = parent->allocated * 2;
179 tmp =
180 (OnigCaptureTreeNode** )xrealloc(parent->childs,
181 sizeof(OnigCaptureTreeNode*) * n);
182 if (tmp == 0) {
183 history_tree_clear(parent);
184 return ONIGERR_MEMORY;
186 parent->childs = tmp;
188 for (i = parent->allocated; i < n; i++) {
189 parent->childs[i] = (OnigCaptureTreeNode* )0;
191 parent->allocated = n;
194 parent->childs[parent->num_childs] = child;
195 parent->num_childs++;
196 return 0;
199 static OnigCaptureTreeNode*
200 history_tree_clone(OnigCaptureTreeNode* node)
202 int i, r;
203 OnigCaptureTreeNode *clone, *child;
205 clone = history_node_new();
206 CHECK_NULL_RETURN(clone);
208 clone->beg = node->beg;
209 clone->end = node->end;
210 for (i = 0; i < node->num_childs; i++) {
211 child = history_tree_clone(node->childs[i]);
212 if (IS_NULL(child)) {
213 history_tree_free(clone);
214 return (OnigCaptureTreeNode* )0;
216 r = history_tree_add_child(clone, child);
217 if (r != 0) {
218 history_tree_free(child);
219 history_tree_free(clone);
220 return (OnigCaptureTreeNode* )0;
224 return clone;
227 extern OnigCaptureTreeNode*
228 onig_get_capture_tree(OnigRegion* region)
230 return region->history_root;
232 #endif /* USE_CAPTURE_HISTORY */
234 #ifdef USE_MATCH_CACHE
237 Glossary for "match cache"
239 "match cache" or "match cache optimization"
240 The `Regexp#match` optimization by using a cache.
242 "cache opcode"
243 A cacheable opcode (e.g. `OP_PUSH`, `OP_REPEAT`, etc).
244 It is corresponding to some cache points.
246 "cache point"
247 A cacheable point on matching.
248 Usually, one-to-one corresponding between a cache opcode and a cache point exists,
249 but cache opcodes between `OP_REPEAT` and `OP_REPEAT_INC` have some corresponding
250 cache points depending on repetition counts.
252 "match cache point"
253 A pair of a cache point and a position on an input string.
254 We encode a match cache point to an integer value by the following equation:
255 "match cache point" = "position on input string" * "total number of cache points" + "cache point"
257 "match cache buffer"
258 A bit-array for memoizing (recording) match cache points once backtracked.
261 static OnigPosition count_num_cache_opcodes_inner(
262 const regex_t* reg,
263 MemNumType current_repeat_mem, int lookaround_nesting,
264 UChar** pp, long* num_cache_opcodes_ptr
267 UChar* p = *pp;
268 UChar* pend = reg->p + reg->used;
269 LengthType len;
270 MemNumType repeat_mem;
271 OnigEncoding enc = reg->enc;
272 long num_cache_opcodes = *num_cache_opcodes_ptr;
273 OnigPosition result;
275 while (p < pend) {
276 switch (*p++) {
277 case OP_FINISH:
278 case OP_END:
279 break;
281 case OP_EXACT1: p++; break;
282 case OP_EXACT2: p += 2; break;
283 case OP_EXACT3: p += 3; break;
284 case OP_EXACT4: p += 4; break;
285 case OP_EXACT5: p += 5; break;
286 case OP_EXACTN:
287 GET_LENGTH_INC(len, p); p += len; break;
288 case OP_EXACTMB2N1: p += 2; break;
289 case OP_EXACTMB2N2: p += 4; break;
290 case OP_EXACTMB2N3: p += 6; break;
291 case OP_EXACTMB2N:
292 GET_LENGTH_INC(len, p); p += len * 2; break;
293 case OP_EXACTMB3N:
294 GET_LENGTH_INC(len, p); p += len * 3; break;
295 case OP_EXACTMBN:
297 int mb_len;
298 GET_LENGTH_INC(mb_len, p);
299 GET_LENGTH_INC(len, p);
300 p += mb_len * len;
302 break;
304 case OP_EXACT1_IC:
305 len = enclen(enc, p, pend); p += len; break;
306 case OP_EXACTN_IC:
307 GET_LENGTH_INC(len, p); p += len; break;
309 case OP_CCLASS:
310 case OP_CCLASS_NOT:
311 p += SIZE_BITSET; break;
312 case OP_CCLASS_MB:
313 case OP_CCLASS_MB_NOT:
314 GET_LENGTH_INC(len, p); p += len; break;
315 case OP_CCLASS_MIX:
316 case OP_CCLASS_MIX_NOT:
317 p += SIZE_BITSET;
318 GET_LENGTH_INC(len, p);
319 p += len;
320 break;
322 case OP_ANYCHAR:
323 case OP_ANYCHAR_ML:
324 break;
325 case OP_ANYCHAR_STAR:
326 case OP_ANYCHAR_ML_STAR:
327 num_cache_opcodes++; break;
328 case OP_ANYCHAR_STAR_PEEK_NEXT:
329 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
330 p++; num_cache_opcodes++; break;
332 case OP_WORD:
333 case OP_NOT_WORD:
334 case OP_WORD_BOUND:
335 case OP_NOT_WORD_BOUND:
336 case OP_WORD_BEGIN:
337 case OP_WORD_END:
338 break;
340 case OP_ASCII_WORD:
341 case OP_NOT_ASCII_WORD:
342 case OP_ASCII_WORD_BOUND:
343 case OP_NOT_ASCII_WORD_BOUND:
344 case OP_ASCII_WORD_BEGIN:
345 case OP_ASCII_WORD_END:
346 break;
348 case OP_BEGIN_BUF:
349 case OP_END_BUF:
350 case OP_BEGIN_LINE:
351 case OP_END_LINE:
352 case OP_SEMI_END_BUF:
353 case OP_BEGIN_POSITION:
354 break;
356 case OP_BACKREF1:
357 case OP_BACKREF2:
358 case OP_BACKREFN:
359 case OP_BACKREFN_IC:
360 case OP_BACKREF_MULTI:
361 case OP_BACKREF_MULTI_IC:
362 case OP_BACKREF_WITH_LEVEL:
363 goto impossible;
365 case OP_MEMORY_START:
366 case OP_MEMORY_START_PUSH:
367 case OP_MEMORY_END_PUSH:
368 case OP_MEMORY_END_PUSH_REC:
369 case OP_MEMORY_END:
370 case OP_MEMORY_END_REC:
371 p += SIZE_MEMNUM;
372 // A memory (capture) in look-around is found.
373 if (lookaround_nesting != 0) {
374 goto impossible;
376 break;
378 case OP_KEEP:
379 break;
381 case OP_FAIL:
382 break;
383 case OP_JUMP:
384 p += SIZE_RELADDR;
385 break;
386 case OP_PUSH:
387 p += SIZE_RELADDR;
388 num_cache_opcodes++;
389 break;
390 case OP_POP:
391 break;
392 case OP_PUSH_OR_JUMP_EXACT1:
393 case OP_PUSH_IF_PEEK_NEXT:
394 p += SIZE_RELADDR + 1; num_cache_opcodes++; break;
395 case OP_REPEAT:
396 case OP_REPEAT_NG:
397 if (current_repeat_mem != -1) {
398 // A nested OP_REPEAT is not yet supported.
399 goto impossible;
401 GET_MEMNUM_INC(repeat_mem, p);
402 p += SIZE_RELADDR;
403 if (reg->repeat_range[repeat_mem].lower == 0 && reg->repeat_range[repeat_mem].upper == 0) {
404 long dummy_num_cache_opcodes = 0;
405 result = count_num_cache_opcodes_inner(reg, repeat_mem, lookaround_nesting, &p, &dummy_num_cache_opcodes);
406 if (result < 0 || dummy_num_cache_opcodes < 0) {
407 goto fail;
409 } else {
410 if (reg->repeat_range[repeat_mem].lower == 0) {
411 num_cache_opcodes++;
413 result = count_num_cache_opcodes_inner(reg, repeat_mem, lookaround_nesting, &p, &num_cache_opcodes);
414 if (result < 0 || num_cache_opcodes < 0) {
415 goto fail;
417 OnigRepeatRange *repeat_range = &reg->repeat_range[repeat_mem];
418 if (repeat_range->lower < repeat_range->upper) {
419 num_cache_opcodes++;
422 break;
423 case OP_REPEAT_INC:
424 case OP_REPEAT_INC_NG:
425 GET_MEMNUM_INC(repeat_mem, p);
426 if (repeat_mem != current_repeat_mem) {
427 // A lone or invalid OP_REPEAT_INC is found.
428 goto impossible;
430 goto exit;
431 case OP_REPEAT_INC_SG:
432 case OP_REPEAT_INC_NG_SG:
433 goto impossible;
434 case OP_NULL_CHECK_START:
435 p += SIZE_MEMNUM;
436 break;
437 case OP_NULL_CHECK_END:
438 case OP_NULL_CHECK_END_MEMST_PUSH:
439 p += SIZE_MEMNUM;
440 break;
441 case OP_NULL_CHECK_END_MEMST:
442 p += SIZE_MEMNUM;
443 break;
445 case OP_PUSH_POS:
446 if (lookaround_nesting < 0) {
447 // A look-around nested in a atomic grouping is found.
448 goto impossible;
450 result = count_num_cache_opcodes_inner(reg, current_repeat_mem, lookaround_nesting + 1, &p, &num_cache_opcodes);
451 if (result < 0 || num_cache_opcodes < 0) {
452 goto fail;
454 break;
455 case OP_PUSH_POS_NOT:
456 if (lookaround_nesting < 0) {
457 // A look-around nested in a atomic grouping is found.
458 goto impossible;
460 p += SIZE_RELADDR;
461 result = count_num_cache_opcodes_inner(reg, current_repeat_mem, lookaround_nesting + 1, &p, &num_cache_opcodes);
462 if (result < 0 || num_cache_opcodes < 0) {
463 goto fail;
465 break;
466 case OP_PUSH_LOOK_BEHIND_NOT:
467 if (lookaround_nesting < 0) {
468 // A look-around nested in a atomic grouping is found.
469 goto impossible;
471 p += SIZE_RELADDR;
472 p += SIZE_LENGTH;
473 result = count_num_cache_opcodes_inner(reg, current_repeat_mem, lookaround_nesting + 1, &p, &num_cache_opcodes);
474 if (result < 0 || num_cache_opcodes < 0) {
475 goto fail;
477 break;
478 case OP_PUSH_STOP_BT:
479 if (lookaround_nesting != 0) {
480 // A nested atomic grouping is found.
481 goto impossible;
483 result = count_num_cache_opcodes_inner(reg, current_repeat_mem, -1, &p, &num_cache_opcodes);
484 if (result < 0 || num_cache_opcodes < 0) {
485 goto fail;
487 break;
488 case OP_POP_POS:
489 case OP_FAIL_POS:
490 case OP_FAIL_LOOK_BEHIND_NOT:
491 case OP_POP_STOP_BT:
492 goto exit;
493 case OP_LOOK_BEHIND:
494 p += SIZE_LENGTH;
495 break;
497 case OP_PUSH_ABSENT_POS:
498 case OP_ABSENT_END:
499 case OP_ABSENT:
500 goto impossible;
502 case OP_CALL:
503 case OP_RETURN:
504 goto impossible;
506 case OP_CONDITION:
507 goto impossible;
509 case OP_STATE_CHECK_PUSH:
510 case OP_STATE_CHECK_PUSH_OR_JUMP:
511 case OP_STATE_CHECK:
512 case OP_STATE_CHECK_ANYCHAR_STAR:
513 case OP_STATE_CHECK_ANYCHAR_ML_STAR:
514 goto impossible;
516 case OP_SET_OPTION_PUSH:
517 case OP_SET_OPTION:
518 p += SIZE_OPTION;
519 break;
521 default:
522 goto bytecode_error;
526 exit:
527 *pp = p;
528 *num_cache_opcodes_ptr = num_cache_opcodes;
529 return 0;
531 fail:
532 *num_cache_opcodes_ptr = num_cache_opcodes;
533 return result;
535 impossible:
536 *num_cache_opcodes_ptr = NUM_CACHE_OPCODES_IMPOSSIBLE;
537 return 0;
539 bytecode_error:
540 return ONIGERR_UNDEFINED_BYTECODE;
543 /* count the total number of cache opcodes for allocating a match cache buffer. */
544 static OnigPosition
545 count_num_cache_opcodes(const regex_t* reg, long* num_cache_opcodes_ptr)
547 UChar* p = reg->p;
548 *num_cache_opcodes_ptr = 0;
549 OnigPosition result = count_num_cache_opcodes_inner(reg, -1, 0, &p, num_cache_opcodes_ptr);
550 if (result == 0 && *num_cache_opcodes_ptr >= 0 && p != reg->p + reg->used) {
551 return ONIGERR_UNDEFINED_BYTECODE;
554 return result;
557 static OnigPosition
558 init_cache_opcodes_inner(
559 const regex_t* reg,
560 MemNumType current_repeat_mem, int lookaround_nesting,
561 OnigCacheOpcode** cache_opcodes_ptr, UChar** pp, long* num_cache_points_ptr
564 UChar* p = *pp;
565 UChar* pend = reg->p + reg->used;
566 UChar* pbegin;
567 LengthType len;
568 MemNumType repeat_mem;
569 OnigEncoding enc = reg->enc;
570 long cache_point = *num_cache_points_ptr;
571 OnigCacheOpcode *cache_opcodes = *cache_opcodes_ptr;
572 OnigPosition result;
574 # define INC_CACHE_OPCODES if (cache_opcodes != NULL) {\
575 cache_opcodes->addr = pbegin;\
576 cache_opcodes->cache_point = cache_point;\
577 cache_opcodes->outer_repeat_mem = current_repeat_mem;\
578 cache_opcodes->num_cache_points_at_outer_repeat = 0;\
579 cache_opcodes->num_cache_points_in_outer_repeat = 0;\
580 cache_opcodes->lookaround_nesting = lookaround_nesting;\
581 cache_opcodes->match_addr = NULL;\
582 cache_point += lookaround_nesting != 0 ? 2 : 1;\
583 cache_opcodes++;\
586 while (p < pend) {
587 pbegin = p;
588 switch (*p++) {
589 case OP_FINISH:
590 case OP_END:
591 break;
593 case OP_EXACT1: p++; break;
594 case OP_EXACT2: p += 2; break;
595 case OP_EXACT3: p += 3; break;
596 case OP_EXACT4: p += 4; break;
597 case OP_EXACT5: p += 5; break;
598 case OP_EXACTN:
599 GET_LENGTH_INC(len, p); p += len; break;
600 case OP_EXACTMB2N1: p += 2; break;
601 case OP_EXACTMB2N2: p += 4; break;
602 case OP_EXACTMB2N3: p += 6; break;
603 case OP_EXACTMB2N:
604 GET_LENGTH_INC(len, p); p += len * 2; break;
605 case OP_EXACTMB3N:
606 GET_LENGTH_INC(len, p); p += len * 3; break;
607 case OP_EXACTMBN:
609 int mb_len;
610 GET_LENGTH_INC(mb_len, p);
611 GET_LENGTH_INC(len, p);
612 p += mb_len * len;
614 break;
616 case OP_EXACT1_IC:
617 len = enclen(enc, p, pend); p += len; break;
618 case OP_EXACTN_IC:
619 GET_LENGTH_INC(len, p); p += len; break;
621 case OP_CCLASS:
622 case OP_CCLASS_NOT:
623 p += SIZE_BITSET; break;
624 case OP_CCLASS_MB:
625 case OP_CCLASS_MB_NOT:
626 GET_LENGTH_INC(len, p); p += len; break;
627 case OP_CCLASS_MIX:
628 case OP_CCLASS_MIX_NOT:
629 p += SIZE_BITSET;
630 GET_LENGTH_INC(len, p);
631 p += len;
632 break;
634 case OP_ANYCHAR:
635 case OP_ANYCHAR_ML:
636 break;
637 case OP_ANYCHAR_STAR:
638 case OP_ANYCHAR_ML_STAR:
639 INC_CACHE_OPCODES;
640 break;
641 case OP_ANYCHAR_STAR_PEEK_NEXT:
642 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
643 p++;
644 INC_CACHE_OPCODES;
645 break;
647 case OP_WORD:
648 case OP_NOT_WORD:
649 case OP_WORD_BOUND:
650 case OP_NOT_WORD_BOUND:
651 case OP_WORD_BEGIN:
652 case OP_WORD_END:
653 break;
655 case OP_ASCII_WORD:
656 case OP_NOT_ASCII_WORD:
657 case OP_ASCII_WORD_BOUND:
658 case OP_NOT_ASCII_WORD_BOUND:
659 case OP_ASCII_WORD_BEGIN:
660 case OP_ASCII_WORD_END:
661 break;
663 case OP_BEGIN_BUF:
664 case OP_END_BUF:
665 case OP_BEGIN_LINE:
666 case OP_END_LINE:
667 case OP_SEMI_END_BUF:
668 case OP_BEGIN_POSITION:
669 break;
671 case OP_BACKREF1:
672 case OP_BACKREF2:
673 case OP_BACKREFN:
674 case OP_BACKREFN_IC:
675 case OP_BACKREF_MULTI:
676 case OP_BACKREF_MULTI_IC:
677 case OP_BACKREF_WITH_LEVEL:
678 goto unexpected_bytecode_error;
680 case OP_MEMORY_START:
681 case OP_MEMORY_START_PUSH:
682 case OP_MEMORY_END_PUSH:
683 case OP_MEMORY_END_PUSH_REC:
684 case OP_MEMORY_END:
685 case OP_MEMORY_END_REC:
686 p += SIZE_MEMNUM;
687 if (lookaround_nesting != 0) {
688 goto unexpected_bytecode_error;
690 break;
692 case OP_KEEP:
693 break;
695 case OP_FAIL:
696 break;
697 case OP_JUMP:
698 p += SIZE_RELADDR;
699 break;
700 case OP_PUSH:
701 p += SIZE_RELADDR;
702 INC_CACHE_OPCODES;
703 break;
704 case OP_POP:
705 break;
706 case OP_PUSH_OR_JUMP_EXACT1:
707 case OP_PUSH_IF_PEEK_NEXT:
708 p += SIZE_RELADDR + 1;
709 INC_CACHE_OPCODES;
710 break;
711 case OP_REPEAT:
712 case OP_REPEAT_NG:
713 GET_MEMNUM_INC(repeat_mem, p);
714 p += SIZE_RELADDR;
715 if (reg->repeat_range[repeat_mem].lower == 0 && reg->repeat_range[repeat_mem].upper == 0) {
716 long dummy_num_cache_points = 0;
717 OnigCacheOpcode* dummy_cache_opcodes = NULL;
718 result = init_cache_opcodes_inner(reg, repeat_mem, lookaround_nesting, &dummy_cache_opcodes, &p, &dummy_num_cache_points);
719 if (result != 0) {
720 goto fail;
722 } else {
723 if (reg->repeat_range[repeat_mem].lower == 0) {
724 INC_CACHE_OPCODES;
727 long num_cache_points_in_repeat = 0;
728 long num_cache_points_at_repeat = cache_point;
729 OnigCacheOpcode* cache_opcodes_in_repeat = cache_opcodes;
730 result = init_cache_opcodes_inner(reg, repeat_mem, lookaround_nesting, &cache_opcodes, &p, &num_cache_points_in_repeat);
731 if (result != 0) {
732 goto fail;
734 OnigRepeatRange *repeat_range = &reg->repeat_range[repeat_mem];
735 if (repeat_range->lower < repeat_range->upper) {
736 INC_CACHE_OPCODES;
737 cache_point -= lookaround_nesting != 0 ? 2 : 1;
739 int repeat_bounds = repeat_range->upper == 0x7fffffff ? 1 : repeat_range->upper - repeat_range->lower;
740 cache_point += num_cache_points_in_repeat * repeat_range->lower + (num_cache_points_in_repeat + (lookaround_nesting != 0 ? 2 : 1)) * repeat_bounds;
741 for (; cache_opcodes_in_repeat < cache_opcodes; cache_opcodes_in_repeat++) {
742 cache_opcodes_in_repeat->num_cache_points_at_outer_repeat = num_cache_points_at_repeat;
743 cache_opcodes_in_repeat->num_cache_points_in_outer_repeat = num_cache_points_in_repeat;
747 break;
748 case OP_REPEAT_INC:
749 case OP_REPEAT_INC_NG:
750 p += SIZE_MEMNUM;
751 goto exit;
752 case OP_REPEAT_INC_SG:
753 case OP_REPEAT_INC_NG_SG:
754 goto unexpected_bytecode_error;
755 case OP_NULL_CHECK_START:
756 p += SIZE_MEMNUM;
757 break;
758 case OP_NULL_CHECK_END:
759 case OP_NULL_CHECK_END_MEMST_PUSH:
760 p += SIZE_MEMNUM;
761 break;
762 case OP_NULL_CHECK_END_MEMST:
763 p += SIZE_MEMNUM;
764 break;
766 case OP_PUSH_POS:
767 lookaround:
769 OnigCacheOpcode* cache_opcodes_in_lookaround = cache_opcodes;
770 result = init_cache_opcodes_inner(reg, current_repeat_mem, lookaround_nesting + 1, &cache_opcodes, &p, &cache_point);
771 if (result != 0) {
772 goto fail;
774 UChar* match_addr = p - 1;
775 for (; cache_opcodes_in_lookaround < cache_opcodes; cache_opcodes_in_lookaround++) {
776 if (cache_opcodes_in_lookaround->match_addr == NULL) {
777 cache_opcodes_in_lookaround->match_addr = match_addr;
781 break;
782 case OP_PUSH_POS_NOT:
783 p += SIZE_RELADDR;
784 goto lookaround;
785 case OP_PUSH_LOOK_BEHIND_NOT:
786 p += SIZE_RELADDR;
787 p += SIZE_LENGTH;
788 goto lookaround;
789 case OP_PUSH_STOP_BT:
791 OnigCacheOpcode* cache_opcodes_in_atomic = cache_opcodes;
792 result = init_cache_opcodes_inner(reg, current_repeat_mem, -1, &cache_opcodes, &p, &cache_point);
793 if (result != 0) {
794 goto fail;
796 UChar* match_addr = p - 1;
797 for (; cache_opcodes_in_atomic < cache_opcodes; cache_opcodes_in_atomic++) {
798 if (cache_opcodes_in_atomic->match_addr == NULL) {
799 cache_opcodes_in_atomic->match_addr = match_addr;
803 break;
804 case OP_POP_POS:
805 case OP_FAIL_POS:
806 case OP_FAIL_LOOK_BEHIND_NOT:
807 case OP_POP_STOP_BT:
808 goto exit;
809 case OP_LOOK_BEHIND:
810 p += SIZE_LENGTH;
811 break;
813 case OP_ABSENT_END:
814 case OP_ABSENT:
815 goto unexpected_bytecode_error;
817 case OP_CALL:
818 case OP_RETURN:
819 goto unexpected_bytecode_error;
821 case OP_CONDITION:
822 goto unexpected_bytecode_error;
824 case OP_STATE_CHECK_PUSH:
825 case OP_STATE_CHECK_PUSH_OR_JUMP:
826 case OP_STATE_CHECK:
827 case OP_STATE_CHECK_ANYCHAR_STAR:
828 case OP_STATE_CHECK_ANYCHAR_ML_STAR:
829 goto unexpected_bytecode_error;
831 case OP_SET_OPTION_PUSH:
832 case OP_SET_OPTION:
833 p += SIZE_OPTION;
834 break;
836 default:
837 goto bytecode_error;
841 exit:
842 *cache_opcodes_ptr = cache_opcodes;
843 *pp = p;
844 *num_cache_points_ptr = cache_point;
845 return 0;
847 fail:
848 return result;
850 unexpected_bytecode_error:
851 return ONIGERR_UNEXPECTED_BYTECODE;
853 bytecode_error:
854 return ONIGERR_UNDEFINED_BYTECODE;
857 /* collect cache opcodes from the given regex program, and compute the total number of cache points. */
858 static OnigPosition
859 init_cache_opcodes(const regex_t* reg, OnigCacheOpcode* cache_opcodes_ptr, long* num_cache_points_ptr)
861 UChar* p = reg->p;
862 *num_cache_points_ptr = 0;
863 OnigPosition result = init_cache_opcodes_inner(reg, -1, 0, &cache_opcodes_ptr, &p, num_cache_points_ptr);
864 if (result == 0 && p != reg->p + reg->used) {
865 return ONIGERR_UNDEFINED_BYTECODE;
868 return result;
870 #else
871 static OnigPosition
872 count_num_cache_opcodes(regex_t* reg, long* num_cache_opcodes)
874 *num_cache_opcodes = NUM_CACHE_OPCODES_IMPOSSIBLE;
875 return 0;
877 #endif /* USE_MATCH_CACHE */
879 extern int
880 onig_check_linear_time(OnigRegexType* reg)
882 long num_cache_opcodes = 0;
883 count_num_cache_opcodes(reg, &num_cache_opcodes);
884 return num_cache_opcodes != NUM_CACHE_OPCODES_IMPOSSIBLE;
887 extern void
888 onig_region_clear(OnigRegion* region)
890 int i;
892 for (i = 0; i < region->num_regs; i++) {
893 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
895 #ifdef USE_CAPTURE_HISTORY
896 history_root_free(region);
897 #endif
900 extern int
901 onig_region_resize(OnigRegion* region, int n)
903 region->num_regs = n;
905 if (n < ONIG_NREGION)
906 n = ONIG_NREGION;
908 if (region->allocated == 0) {
909 region->beg = (OnigPosition* )xmalloc(n * sizeof(OnigPosition));
910 if (region->beg == 0)
911 return ONIGERR_MEMORY;
913 region->end = (OnigPosition* )xmalloc(n * sizeof(OnigPosition));
914 if (region->end == 0) {
915 xfree(region->beg);
916 return ONIGERR_MEMORY;
919 region->allocated = n;
921 else if (region->allocated < n) {
922 OnigPosition *tmp;
924 region->allocated = 0;
925 tmp = (OnigPosition* )xrealloc(region->beg, n * sizeof(OnigPosition));
926 if (tmp == 0) {
927 xfree(region->beg);
928 xfree(region->end);
929 return ONIGERR_MEMORY;
931 region->beg = tmp;
932 tmp = (OnigPosition* )xrealloc(region->end, n * sizeof(OnigPosition));
933 if (tmp == 0) {
934 xfree(region->beg);
935 xfree(region->end);
936 return ONIGERR_MEMORY;
938 region->end = tmp;
940 region->allocated = n;
943 return 0;
946 static int
947 onig_region_resize_clear(OnigRegion* region, int n)
949 int r;
951 r = onig_region_resize(region, n);
952 if (r != 0) return r;
953 onig_region_clear(region);
954 return 0;
957 extern int
958 onig_region_set(OnigRegion* region, int at, int beg, int end)
960 if (at < 0) return ONIGERR_INVALID_ARGUMENT;
962 if (at >= region->allocated) {
963 int r = onig_region_resize(region, at + 1);
964 if (r < 0) return r;
967 region->beg[at] = beg;
968 region->end[at] = end;
969 return 0;
972 extern void
973 onig_region_init(OnigRegion* region)
975 region->num_regs = 0;
976 region->allocated = 0;
977 region->beg = (OnigPosition* )0;
978 region->end = (OnigPosition* )0;
979 #ifdef USE_CAPTURE_HISTORY
980 region->history_root = (OnigCaptureTreeNode* )0;
981 #endif
984 extern OnigRegion*
985 onig_region_new(void)
987 OnigRegion* r;
989 r = (OnigRegion* )xmalloc(sizeof(OnigRegion));
990 if (r)
991 onig_region_init(r);
992 return r;
995 extern void
996 onig_region_free(OnigRegion* r, int free_self)
998 if (r) {
999 if (r->allocated > 0) {
1000 xfree(r->beg);
1001 xfree(r->end);
1003 #ifdef USE_CAPTURE_HISTORY
1004 history_root_free(r);
1005 #endif
1006 if (free_self) {
1007 xfree(r);
1009 else {
1010 memset(r, 0, sizeof(OnigRegion));
1015 extern void
1016 onig_region_copy(OnigRegion* to, const OnigRegion* from)
1018 #define RREGC_SIZE (sizeof(int) * from->num_regs)
1019 int i, r;
1021 if (to == from) return;
1023 r = onig_region_resize(to, from->num_regs);
1024 if (r) return;
1026 for (i = 0; i < from->num_regs; i++) {
1027 to->beg[i] = from->beg[i];
1028 to->end[i] = from->end[i];
1030 to->num_regs = from->num_regs;
1032 #ifdef USE_CAPTURE_HISTORY
1033 history_root_free(to);
1035 if (IS_NOT_NULL(from->history_root)) {
1036 to->history_root = history_tree_clone(from->history_root);
1038 #endif
1042 /** stack **/
1043 #define INVALID_STACK_INDEX -1
1045 /* stack type */
1046 /* used by normal-POP */
1047 #define STK_ALT 0x0001
1048 #define STK_LOOK_BEHIND_NOT 0x0002
1049 #define STK_POS_NOT 0x0003
1050 /* handled by normal-POP */
1051 #define STK_MEM_START 0x0100
1052 #define STK_MEM_END 0x8200
1053 #define STK_REPEAT_INC 0x0300
1054 #define STK_STATE_CHECK_MARK 0x1000
1055 /* avoided by normal-POP */
1056 #define STK_NULL_CHECK_START 0x3000
1057 #define STK_NULL_CHECK_END 0x5000 /* for recursive call */
1058 #define STK_MEM_END_MARK 0x8400
1059 #define STK_POS 0x0500 /* used when POP-POS */
1060 #define STK_STOP_BT 0x0600 /* mark for "(?>...)" */
1061 #define STK_REPEAT 0x0700
1062 #define STK_CALL_FRAME 0x0800
1063 #define STK_RETURN 0x0900
1064 #define STK_VOID 0x0a00 /* for fill a blank */
1065 #define STK_ABSENT_POS 0x0b00 /* for absent */
1066 #define STK_ABSENT 0x0c00 /* absent inner loop marker */
1067 #define STK_MATCH_CACHE_POINT 0x0d00 /* for the match cache optimization */
1068 #define STK_ATOMIC_MATCH_CACHE_POINT 0x0e00
1070 /* stack type check mask */
1071 #define STK_MASK_POP_USED 0x00ff
1072 #define STK_MASK_TO_VOID_TARGET 0x10ff
1073 #define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
1075 #ifdef USE_MATCH_CACHE
1076 #define MATCH_ARG_INIT_MATCH_CACHE(msa) do {\
1077 (msa).match_cache_status = MATCH_CACHE_STATUS_UNINIT;\
1078 (msa).num_fails = 0;\
1079 (msa).num_cache_opcodes = NUM_CACHE_OPCODES_UNINIT;\
1080 (msa).cache_opcodes = (OnigCacheOpcode*)NULL;\
1081 (msa).num_cache_points = 0;\
1082 (msa).match_cache_buf = (uint8_t*)NULL;\
1083 } while(0)
1084 #define MATCH_ARG_FREE_MATCH_CACHE(msa) do {\
1085 xfree((msa).cache_opcodes);\
1086 xfree((msa).match_cache_buf);\
1087 (msa).cache_opcodes = (OnigCacheOpcode*)NULL;\
1088 (msa).match_cache_buf = (uint8_t*)NULL;\
1089 } while(0)
1090 #else
1091 #define MATCH_ARG_INIT_MATCH_CACHE(msa)
1092 #define MATCH_ARG_FREE_MATCH_CACHE(msa)
1093 #endif
1095 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1096 # define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
1097 (msa).stack_p = (void* )0;\
1098 (msa).options = (arg_option);\
1099 (msa).region = (arg_region);\
1100 (msa).start = (arg_start);\
1101 (msa).gpos = (arg_gpos);\
1102 (msa).best_len = ONIG_MISMATCH;\
1103 (msa).counter = 0;\
1104 (msa).end_time = 0;\
1105 MATCH_ARG_INIT_MATCH_CACHE(msa);\
1106 } while(0)
1107 #else
1108 # define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
1109 (msa).stack_p = (void* )0;\
1110 (msa).options = (arg_option);\
1111 (msa).region = (arg_region);\
1112 (msa).start = (arg_start);\
1113 (msa).gpos = (arg_gpos);\
1114 (msa).counter = 0;\
1115 (msa).end_time = 0;\
1116 MATCH_ARG_INIT_MATCH_CACHE(msa);\
1117 } while(0)
1118 #endif
1120 #ifdef USE_COMBINATION_EXPLOSION_CHECK
1122 # define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16
1124 # define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \
1125 if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
1126 unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\
1127 offset = ((offset) * (state_num)) >> 3;\
1128 if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\
1129 if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) {\
1130 (msa).state_check_buff = (void* )xmalloc(size);\
1131 CHECK_NULL_RETURN_MEMERR((msa).state_check_buff);\
1133 else \
1134 (msa).state_check_buff = (void* )xalloca(size);\
1135 xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \
1136 (size_t )(size - (offset))); \
1137 (msa).state_check_buff_size = size;\
1139 else {\
1140 (msa).state_check_buff = (void* )0;\
1141 (msa).state_check_buff_size = 0;\
1144 else {\
1145 (msa).state_check_buff = (void* )0;\
1146 (msa).state_check_buff_size = 0;\
1148 } while(0)
1150 # define MATCH_ARG_FREE(msa) do {\
1151 xfree((msa).stack_p);\
1152 if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
1153 xfree((msa).state_check_buff);\
1155 MATCH_ARG_FREE_MATCH_CACHE(msa);\
1156 } while(0)
1157 #else /* USE_COMBINATION_EXPLOSION_CHECK */
1158 # define MATCH_ARG_FREE(msa) do {\
1159 xfree((msa).stack_p);\
1160 MATCH_ARG_FREE_MATCH_CACHE(msa);\
1161 } while (0)
1162 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
1166 #define MAX_PTR_NUM 100
1168 #define STACK_INIT(alloc_addr, heap_addr, ptr_num, stack_num) do {\
1169 if (ptr_num > MAX_PTR_NUM) {\
1170 alloc_addr = (char* )xmalloc(sizeof(OnigStackIndex) * (ptr_num));\
1171 heap_addr = alloc_addr;\
1172 if (msa->stack_p) {\
1173 stk_alloc = (OnigStackType* )(msa->stack_p);\
1174 stk_base = stk_alloc;\
1175 stk = stk_base;\
1176 stk_end = stk_base + msa->stack_n;\
1177 } else {\
1178 stk_alloc = (OnigStackType* )xalloca(sizeof(OnigStackType) * (stack_num));\
1179 stk_base = stk_alloc;\
1180 stk = stk_base;\
1181 stk_end = stk_base + (stack_num);\
1183 } else if (msa->stack_p) {\
1184 alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num));\
1185 heap_addr = NULL;\
1186 stk_alloc = (OnigStackType* )(msa->stack_p);\
1187 stk_base = stk_alloc;\
1188 stk = stk_base;\
1189 stk_end = stk_base + msa->stack_n;\
1191 else {\
1192 alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num)\
1193 + sizeof(OnigStackType) * (stack_num));\
1194 heap_addr = NULL;\
1195 stk_alloc = (OnigStackType* )(alloc_addr + sizeof(OnigStackIndex) * (ptr_num));\
1196 stk_base = stk_alloc;\
1197 stk = stk_base;\
1198 stk_end = stk_base + (stack_num);\
1200 } while(0)
1202 #define STACK_SAVE do{\
1203 if (stk_base != stk_alloc) {\
1204 msa->stack_p = stk_base;\
1205 msa->stack_n = stk_end - stk_base; /* TODO: check overflow */\
1207 } while(0)
1209 static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE;
1211 extern unsigned int
1212 onig_get_match_stack_limit_size(void)
1214 return MatchStackLimitSize;
1217 extern int
1218 onig_set_match_stack_limit_size(unsigned int size)
1220 MatchStackLimitSize = size;
1221 return 0;
1224 static int
1225 stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
1226 OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa)
1228 size_t n;
1229 OnigStackType *x, *stk_base, *stk_end, *stk;
1231 stk_base = *arg_stk_base;
1232 stk_end = *arg_stk_end;
1233 stk = *arg_stk;
1235 n = stk_end - stk_base;
1236 if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) {
1237 x = (OnigStackType* )xmalloc(sizeof(OnigStackType) * n * 2);
1238 if (IS_NULL(x)) {
1239 STACK_SAVE;
1240 return ONIGERR_MEMORY;
1242 xmemcpy(x, stk_base, n * sizeof(OnigStackType));
1243 n *= 2;
1245 else {
1246 unsigned int limit_size = MatchStackLimitSize;
1247 n *= 2;
1248 if (limit_size != 0 && n > limit_size) {
1249 if ((unsigned int )(stk_end - stk_base) == limit_size)
1250 return ONIGERR_MATCH_STACK_LIMIT_OVER;
1251 else
1252 n = limit_size;
1254 x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n);
1255 if (IS_NULL(x)) {
1256 STACK_SAVE;
1257 return ONIGERR_MEMORY;
1260 *arg_stk = x + (stk - stk_base);
1261 *arg_stk_base = x;
1262 *arg_stk_end = x + n;
1263 return 0;
1266 #define STACK_ENSURE(n) do {\
1267 if (stk_end - stk < (n)) {\
1268 int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\
1269 if (r != 0) {\
1270 STACK_SAVE;\
1271 xfree(xmalloc_base);\
1272 return r;\
1275 } while(0)
1277 #define STACK_AT(index) (stk_base + (index))
1278 #define GET_STACK_INDEX(stk) ((stk) - stk_base)
1280 #define STACK_PUSH_TYPE(stack_type) do {\
1281 STACK_ENSURE(1);\
1282 stk->type = (stack_type);\
1283 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1284 STACK_INC;\
1285 } while(0)
1287 #define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
1289 #ifdef USE_COMBINATION_EXPLOSION_CHECK
1290 # define STATE_CHECK_POS(s,snum) \
1291 (((s) - str) * num_comb_exp_check + ((snum) - 1))
1292 # define STATE_CHECK_VAL(v,snum) do {\
1293 if (state_check_buff != NULL) {\
1294 ptrdiff_t x = STATE_CHECK_POS(s,snum);\
1295 (v) = state_check_buff[x/8] & (1<<(x%8));\
1297 else (v) = 0;\
1298 } while(0)
1301 # define ELSE_IF_STATE_CHECK_MARK(stk) \
1302 else if ((stk)->type == STK_STATE_CHECK_MARK) { \
1303 ptrdiff_t x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\
1304 state_check_buff[x/8] |= (1<<(x%8)); \
1307 # define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\
1308 STACK_ENSURE(1);\
1309 stk->type = (stack_type);\
1310 stk->u.state.pcode = (pat);\
1311 stk->u.state.pstr = (s);\
1312 stk->u.state.pstr_prev = (sprev);\
1313 stk->u.state.state_check = 0;\
1314 stk->u.state.pkeep = (keep);\
1315 STACK_INC;\
1316 } while(0)
1318 # define STACK_PUSH_ENSURED(stack_type,pat) do {\
1319 stk->type = (stack_type);\
1320 stk->u.state.pcode = (pat);\
1321 stk->u.state.state_check = 0;\
1322 STACK_INC;\
1323 } while(0)
1325 # define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum,keep) do {\
1326 STACK_ENSURE(1);\
1327 stk->type = STK_ALT;\
1328 stk->u.state.pcode = (pat);\
1329 stk->u.state.pstr = (s);\
1330 stk->u.state.pstr_prev = (sprev);\
1331 stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\
1332 stk->u.state.pkeep = (keep);\
1333 STACK_INC;\
1334 } while(0)
1336 # define STACK_PUSH_STATE_CHECK(s,snum) do {\
1337 if (state_check_buff != NULL) {\
1338 STACK_ENSURE(1);\
1339 stk->type = STK_STATE_CHECK_MARK;\
1340 stk->u.state.pstr = (s);\
1341 stk->u.state.state_check = (snum);\
1342 STACK_INC;\
1344 } while(0)
1346 #else /* USE_COMBINATION_EXPLOSION_CHECK */
1348 # define ELSE_IF_STATE_CHECK_MARK(stk)
1350 # define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\
1351 STACK_ENSURE(1);\
1352 stk->type = (stack_type);\
1353 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1354 stk->u.state.pcode = (pat);\
1355 stk->u.state.pstr = (s);\
1356 stk->u.state.pstr_prev = (sprev);\
1357 stk->u.state.pkeep = (keep);\
1358 STACK_INC;\
1359 } while(0)
1361 # define STACK_PUSH_ENSURED(stack_type,pat) do {\
1362 stk->type = (stack_type);\
1363 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1364 stk->u.state.pcode = (pat);\
1365 STACK_INC;\
1366 } while(0)
1367 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
1369 #define STACK_PUSH_ALT(pat,s,sprev,keep) STACK_PUSH(STK_ALT,pat,s,sprev,keep)
1370 #define STACK_PUSH_POS(s,sprev,keep) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev,keep)
1371 #define STACK_PUSH_POS_NOT(pat,s,sprev,keep) STACK_PUSH(STK_POS_NOT,pat,s,sprev,keep)
1372 #define STACK_PUSH_ABSENT STACK_PUSH_TYPE(STK_ABSENT)
1373 #define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT)
1374 #define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev,keep) \
1375 STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev,keep)
1377 #define STACK_PUSH_REPEAT(id, pat) do {\
1378 STACK_ENSURE(1);\
1379 stk->type = STK_REPEAT;\
1380 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1381 stk->u.repeat.num = (id);\
1382 stk->u.repeat.pcode = (pat);\
1383 stk->u.repeat.count = 0;\
1384 STACK_INC;\
1385 } while(0)
1387 #define STACK_PUSH_REPEAT_INC(sindex) do {\
1388 STACK_ENSURE(1);\
1389 stk->type = STK_REPEAT_INC;\
1390 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1391 stk->u.repeat_inc.si = (sindex);\
1392 STACK_INC;\
1393 } while(0)
1395 #define STACK_PUSH_MEM_START(mnum, s) do {\
1396 STACK_ENSURE(1);\
1397 stk->type = STK_MEM_START;\
1398 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1399 stk->u.mem.num = (mnum);\
1400 stk->u.mem.pstr = (s);\
1401 stk->u.mem.start = mem_start_stk[mnum];\
1402 stk->u.mem.end = mem_end_stk[mnum];\
1403 mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
1404 mem_end_stk[mnum] = INVALID_STACK_INDEX;\
1405 STACK_INC;\
1406 } while(0)
1408 #define STACK_PUSH_MEM_END(mnum, s) do {\
1409 STACK_ENSURE(1);\
1410 stk->type = STK_MEM_END;\
1411 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1412 stk->u.mem.num = (mnum);\
1413 stk->u.mem.pstr = (s);\
1414 stk->u.mem.start = mem_start_stk[mnum];\
1415 stk->u.mem.end = mem_end_stk[mnum];\
1416 mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
1417 STACK_INC;\
1418 } while(0)
1420 #define STACK_PUSH_MEM_END_MARK(mnum) do {\
1421 STACK_ENSURE(1);\
1422 stk->type = STK_MEM_END_MARK;\
1423 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1424 stk->u.mem.num = (mnum);\
1425 STACK_INC;\
1426 } while(0)
1428 #define STACK_GET_MEM_START(mnum, k) do {\
1429 int level = 0;\
1430 k = stk;\
1431 while (k > stk_base) {\
1432 k--;\
1433 if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
1434 && k->u.mem.num == (mnum)) {\
1435 level++;\
1437 else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
1438 if (level == 0) break;\
1439 level--;\
1442 } while(0)
1444 #define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
1445 int level = 0;\
1446 while (k < stk) {\
1447 if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
1448 if (level == 0) (start) = k->u.mem.pstr;\
1449 level++;\
1451 else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
1452 level--;\
1453 if (level == 0) {\
1454 (end) = k->u.mem.pstr;\
1455 break;\
1458 k++;\
1460 } while(0)
1462 #define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\
1463 STACK_ENSURE(1);\
1464 stk->type = STK_NULL_CHECK_START;\
1465 stk->null_check = (OnigStackIndex)(stk - stk_base);\
1466 stk->u.null_check.num = (cnum);\
1467 stk->u.null_check.pstr = (s);\
1468 STACK_INC;\
1469 } while(0)
1471 #define STACK_PUSH_NULL_CHECK_END(cnum) do {\
1472 STACK_ENSURE(1);\
1473 stk->type = STK_NULL_CHECK_END;\
1474 stk->null_check = (OnigStackIndex)(stk - stk_base);\
1475 stk->u.null_check.num = (cnum);\
1476 STACK_INC;\
1477 } while(0)
1479 #define STACK_PUSH_CALL_FRAME(pat) do {\
1480 STACK_ENSURE(1);\
1481 stk->type = STK_CALL_FRAME;\
1482 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1483 stk->u.call_frame.ret_addr = (pat);\
1484 STACK_INC;\
1485 } while(0)
1487 #define STACK_PUSH_RETURN do {\
1488 STACK_ENSURE(1);\
1489 stk->type = STK_RETURN;\
1490 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1491 STACK_INC;\
1492 } while(0)
1494 #define STACK_PUSH_ABSENT_POS(start, end) do {\
1495 STACK_ENSURE(1);\
1496 stk->type = STK_ABSENT_POS;\
1497 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1498 stk->u.absent_pos.abs_pstr = (start);\
1499 stk->u.absent_pos.end_pstr = (end);\
1500 STACK_INC;\
1501 } while(0)
1503 #define STACK_PUSH_MATCH_CACHE_POINT(match_cache_point_index, match_cache_point_mask) do {\
1504 STACK_ENSURE(1);\
1505 stk->type = STK_MATCH_CACHE_POINT;\
1506 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1507 stk->u.match_cache_point.index = (match_cache_point_index);\
1508 stk->u.match_cache_point.mask = (match_cache_point_mask);\
1509 STACK_INC;\
1510 } while(0)
1513 #ifdef ONIG_DEBUG
1514 # define STACK_BASE_CHECK(p, at) \
1515 if ((p) < stk_base) {\
1516 fprintf(stderr, "at %s\n", at);\
1517 goto stack_error;\
1519 #else
1520 # define STACK_BASE_CHECK(p, at)
1521 #endif
1523 #ifdef ONIG_DEBUG_MATCH_CACHE
1524 # define MATCH_CACHE_DEBUG_MEMOIZE(stkp) fprintf(stderr, "MATCH CACHE: memoize (index=%ld mask=%d)\n", stkp->u.match_cache_point.index, stkp->u.match_cache_point.mask);
1525 #else
1526 # define MATCH_CACHE_DEBUG_MEMOIZE(stkp) ((void) 0)
1527 #endif
1529 #ifdef USE_MATCH_CACHE
1530 # define INC_NUM_FAILS msa->num_fails++
1531 # define MEMOIZE_MATCH_CACHE_POINT do {\
1532 if (stk->type == STK_MATCH_CACHE_POINT) {\
1533 msa->match_cache_buf[stk->u.match_cache_point.index] |= stk->u.match_cache_point.mask;\
1534 MATCH_CACHE_DEBUG_MEMOIZE(stk);\
1535 } else if (stk->type == STK_ATOMIC_MATCH_CACHE_POINT) {\
1536 memoize_extended_match_cache_point(msa->match_cache_buf, stk->u.match_cache_point.index, stk->u.match_cache_point.mask);\
1537 MATCH_CACHE_DEBUG_MEMOIZE(stkp);\
1539 } while(0)
1540 # define MEMOIZE_LOOKAROUND_MATCH_CACHE_POINT(stkp) do {\
1541 if (stkp->type == STK_MATCH_CACHE_POINT) {\
1542 stkp->type = STK_VOID;\
1543 memoize_extended_match_cache_point(msa->match_cache_buf, stkp->u.match_cache_point.index, stkp->u.match_cache_point.mask);\
1544 MATCH_CACHE_DEBUG_MEMOIZE(stkp);\
1546 } while(0)
1547 # define MEMOIZE_ATOMIC_MATCH_CACHE_POINT do {\
1548 if (stk->type == STK_MATCH_CACHE_POINT) {\
1549 memoize_extended_match_cache_point(msa->match_cache_buf, stk->u.match_cache_point.index, stk->u.match_cache_point.mask);\
1550 MATCH_CACHE_DEBUG_MEMOIZE(stkp);\
1552 } while(0)
1553 #else
1554 # define INC_NUM_FAILS ((void) 0)
1555 # define MEMOIZE_MATCH_CACHE_POINT ((void) 0)
1556 # define MEMOIZE_LOOKAROUND_MATCH_CACHE_POINT(stkp) ((void) 0)
1557 #endif
1559 #define STACK_POP_ONE do {\
1560 stk--;\
1561 STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
1562 } while(0)
1564 #define STACK_POP do {\
1565 switch (pop_level) {\
1566 case STACK_POP_LEVEL_FREE:\
1567 while (1) {\
1568 stk--;\
1569 STACK_BASE_CHECK(stk, "STACK_POP"); \
1570 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1571 ELSE_IF_STATE_CHECK_MARK(stk);\
1572 MEMOIZE_MATCH_CACHE_POINT;\
1574 break;\
1575 case STACK_POP_LEVEL_MEM_START:\
1576 while (1) {\
1577 stk--;\
1578 STACK_BASE_CHECK(stk, "STACK_POP 2"); \
1579 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1580 else if (stk->type == STK_MEM_START) {\
1581 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1582 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1584 ELSE_IF_STATE_CHECK_MARK(stk);\
1585 MEMOIZE_MATCH_CACHE_POINT;\
1587 break;\
1588 default:\
1589 while (1) {\
1590 stk--;\
1591 STACK_BASE_CHECK(stk, "STACK_POP 3"); \
1592 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1593 else if (stk->type == STK_MEM_START) {\
1594 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1595 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1597 else if (stk->type == STK_REPEAT_INC) {\
1598 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1600 else if (stk->type == STK_MEM_END) {\
1601 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1602 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1604 ELSE_IF_STATE_CHECK_MARK(stk);\
1605 MEMOIZE_MATCH_CACHE_POINT;\
1607 break;\
1609 } while(0)
1611 #define STACK_POP_TIL_POS_NOT do {\
1612 while (1) {\
1613 stk--;\
1614 STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \
1615 if (stk->type == STK_POS_NOT) break;\
1616 else if (stk->type == STK_MEM_START) {\
1617 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1618 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1620 else if (stk->type == STK_REPEAT_INC) {\
1621 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1623 else if (stk->type == STK_MEM_END) {\
1624 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1625 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1627 else if (IS_TO_VOID_TARGET(stk)) {\
1628 INC_NUM_FAILS;\
1630 ELSE_IF_STATE_CHECK_MARK(stk);\
1631 MEMOIZE_LOOKAROUND_MATCH_CACHE_POINT(stk);\
1633 } while(0)
1635 #define STACK_POP_TIL_LOOK_BEHIND_NOT do {\
1636 while (1) {\
1637 stk--;\
1638 STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \
1639 if (stk->type == STK_LOOK_BEHIND_NOT) break;\
1640 else if (stk->type == STK_MEM_START) {\
1641 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1642 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1644 else if (stk->type == STK_REPEAT_INC) {\
1645 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1647 else if (stk->type == STK_MEM_END) {\
1648 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1649 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1651 ELSE_IF_STATE_CHECK_MARK(stk);\
1653 } while(0)
1655 #define STACK_POP_TIL_ABSENT do {\
1656 while (1) {\
1657 stk--;\
1658 STACK_BASE_CHECK(stk, "STACK_POP_TIL_ABSENT"); \
1659 if (stk->type == STK_ABSENT) break;\
1660 else if (stk->type == STK_MEM_START) {\
1661 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1662 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1664 else if (stk->type == STK_REPEAT_INC) {\
1665 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1667 else if (stk->type == STK_MEM_END) {\
1668 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1669 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1671 ELSE_IF_STATE_CHECK_MARK(stk);\
1673 } while(0)
1675 #define STACK_POP_ABSENT_POS(start, end) do {\
1676 stk--;\
1677 STACK_BASE_CHECK(stk, "STACK_POP_ABSENT_POS"); \
1678 (start) = stk->u.absent_pos.abs_pstr;\
1679 (end) = stk->u.absent_pos.end_pstr;\
1680 } while(0)
1682 #define STACK_POS_END(k) do {\
1683 k = stk;\
1684 while (1) {\
1685 k--;\
1686 STACK_BASE_CHECK(k, "STACK_POS_END"); \
1687 if (IS_TO_VOID_TARGET(k)) {\
1688 INC_NUM_FAILS;\
1689 k->type = STK_VOID;\
1691 else if (k->type == STK_POS) {\
1692 k->type = STK_VOID;\
1693 break;\
1695 MEMOIZE_LOOKAROUND_MATCH_CACHE_POINT(k);\
1697 } while(0)
1699 #define STACK_STOP_BT_END do {\
1700 OnigStackType *k = stk;\
1701 while (1) {\
1702 k--;\
1703 STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \
1704 if (IS_TO_VOID_TARGET(k)) {\
1705 INC_NUM_FAILS;\
1706 k->type = STK_VOID;\
1708 else if (k->type == STK_STOP_BT) {\
1709 k->type = STK_VOID;\
1710 break;\
1712 else if (k->type == STK_MATCH_CACHE_POINT) {\
1713 k->type = STK_ATOMIC_MATCH_CACHE_POINT;\
1716 } while(0)
1718 #define STACK_STOP_BT_FAIL do {\
1719 while (1) {\
1720 stk--;\
1721 STACK_BASE_CHECK(stk, "STACK_STOP_BT_END"); \
1722 if (stk->type == STK_STOP_BT) {\
1723 stk->type = STK_VOID;\
1724 break;\
1726 MEMOIZE_ATOMIC_MATCH_CACHE_POINT;\
1728 } while(0)
1730 #define STACK_NULL_CHECK(isnull,id,s) do {\
1731 OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1732 while (1) {\
1733 k--;\
1734 STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \
1735 if (k->type == STK_NULL_CHECK_START) {\
1736 if (k->u.null_check.num == (id)) {\
1737 (isnull) = (k->u.null_check.pstr == (s));\
1738 break;\
1742 } while(0)
1744 #define STACK_NULL_CHECK_REC(isnull,id,s) do {\
1745 int level = 0;\
1746 OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1747 while (1) {\
1748 k--;\
1749 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \
1750 if (k->type == STK_NULL_CHECK_START) {\
1751 if (k->u.null_check.num == (id)) {\
1752 if (level == 0) {\
1753 (isnull) = (k->u.null_check.pstr == (s));\
1754 break;\
1756 else level--;\
1759 else if (k->type == STK_NULL_CHECK_END) {\
1760 level++;\
1763 } while(0)
1765 #define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\
1766 OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1767 while (1) {\
1768 k--;\
1769 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \
1770 if (k->type == STK_NULL_CHECK_START) {\
1771 if (k->u.null_check.num == (id)) {\
1772 if (k->u.null_check.pstr != (s)) {\
1773 (isnull) = 0;\
1774 break;\
1776 else {\
1777 UChar* endp;\
1778 (isnull) = 1;\
1779 while (k < stk) {\
1780 if (k->type == STK_MEM_START) {\
1781 if (k->u.mem.end == INVALID_STACK_INDEX) {\
1782 (isnull) = 0; break;\
1784 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
1785 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
1786 else\
1787 endp = (UChar* )k->u.mem.end;\
1788 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
1789 (isnull) = 0; break;\
1791 else if (endp != s) {\
1792 (isnull) = -1; /* empty, but position changed */ \
1795 k++;\
1797 break;\
1802 } while(0)
1804 #define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\
1805 int level = 0;\
1806 OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1807 while (1) {\
1808 k--;\
1809 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \
1810 if (k->type == STK_NULL_CHECK_START) {\
1811 if (k->u.null_check.num == (id)) {\
1812 if (level == 0) {\
1813 if (k->u.null_check.pstr != (s)) {\
1814 (isnull) = 0;\
1815 break;\
1817 else {\
1818 UChar* endp;\
1819 (isnull) = 1;\
1820 while (k < stk) {\
1821 if (k->type == STK_MEM_START) {\
1822 if (k->u.mem.end == INVALID_STACK_INDEX) {\
1823 (isnull) = 0; break;\
1825 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
1826 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
1827 else\
1828 endp = (UChar* )k->u.mem.end;\
1829 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
1830 (isnull) = 0; break;\
1832 else if (endp != s) {\
1833 (isnull) = -1; /* empty, but position changed */ \
1836 k++;\
1838 break;\
1841 else {\
1842 level--;\
1846 else if (k->type == STK_NULL_CHECK_END) {\
1847 if (k->u.null_check.num == (id)) level++;\
1850 } while(0)
1852 #define STACK_GET_REPEAT(id, k) do {\
1853 int level = 0;\
1854 k = stk;\
1855 while (1) {\
1856 k--;\
1857 STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
1858 if (k->type == STK_REPEAT) {\
1859 if (level == 0) {\
1860 if (k->u.repeat.num == (id)) {\
1861 break;\
1865 else if (k->type == STK_CALL_FRAME) level--;\
1866 else if (k->type == STK_RETURN) level++;\
1868 } while(0)
1870 #define STACK_RETURN(addr) do {\
1871 int level = 0;\
1872 OnigStackType* k = stk;\
1873 while (1) {\
1874 k--;\
1875 STACK_BASE_CHECK(k, "STACK_RETURN"); \
1876 if (k->type == STK_CALL_FRAME) {\
1877 if (level == 0) {\
1878 (addr) = k->u.call_frame.ret_addr;\
1879 break;\
1881 else level--;\
1883 else if (k->type == STK_RETURN)\
1884 level++;\
1886 } while(0)
1889 #define STRING_CMP(s1,s2,len) do {\
1890 while (len-- > 0) {\
1891 if (*s1++ != *s2++) goto fail;\
1893 } while(0)
1895 #define STRING_CMP_IC(case_fold_flag,s1,ps2,len,text_end) do {\
1896 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \
1897 goto fail; \
1898 } while(0)
1900 static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
1901 UChar* s1, UChar** ps2, OnigDistance mblen, const UChar* text_end)
1903 UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1904 UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1905 UChar *p1, *p2, *end1, *s2;
1906 int len1, len2;
1908 s2 = *ps2;
1909 end1 = s1 + mblen;
1910 while (s1 < end1) {
1911 len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, text_end, buf1);
1912 len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, text_end, buf2);
1913 if (len1 != len2) return 0;
1914 p1 = buf1;
1915 p2 = buf2;
1916 while (len1-- > 0) {
1917 if (*p1 != *p2) return 0;
1918 p1++;
1919 p2++;
1923 *ps2 = s2;
1924 return 1;
1927 #define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
1928 is_fail = 0;\
1929 while (len-- > 0) {\
1930 if (*s1++ != *s2++) {\
1931 is_fail = 1; break;\
1934 } while(0)
1936 #define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,text_end,is_fail) do {\
1937 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \
1938 is_fail = 1; \
1939 else \
1940 is_fail = 0; \
1941 } while(0)
1944 #define IS_EMPTY_STR (str == end)
1945 #define ON_STR_BEGIN(s) ((s) == str)
1946 #define ON_STR_END(s) ((s) == end)
1947 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1948 # define DATA_ENSURE_CHECK1 (s < right_range)
1949 # define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
1950 # define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
1951 # define DATA_ENSURE_CONTINUE(n) if (s + (n) > right_range) continue
1952 # define ABSENT_END_POS right_range
1953 #else
1954 # define DATA_ENSURE_CHECK1 (s < end)
1955 # define DATA_ENSURE_CHECK(n) (s + (n) <= end)
1956 # define DATA_ENSURE(n) if (s + (n) > end) goto fail
1957 # define DATA_ENSURE_CONTINUE(n) if (s + (n) > end) continue
1958 # define ABSENT_END_POS end
1959 #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
1961 int onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, const struct OnigEncodingTypeST* enc);
1963 static inline int
1964 enclen_approx(OnigEncoding enc, const OnigUChar* p, const OnigUChar* e)
1966 if (enc->max_enc_len == enc->min_enc_len) {
1967 return (p < e ? enc->min_enc_len : 0);
1969 else {
1970 return onigenc_mbclen_approximate(p, e, enc);
1975 #ifdef USE_CAPTURE_HISTORY
1976 static int
1977 make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp,
1978 OnigStackType* stk_top, UChar* str, regex_t* reg)
1980 int n, r;
1981 OnigCaptureTreeNode* child;
1982 OnigStackType* k = *kp;
1984 while (k < stk_top) {
1985 if (k->type == STK_MEM_START) {
1986 n = k->u.mem.num;
1987 if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
1988 BIT_STATUS_AT(reg->capture_history, n) != 0) {
1989 child = history_node_new();
1990 CHECK_NULL_RETURN_MEMERR(child);
1991 child->group = n;
1992 child->beg = k->u.mem.pstr - str;
1993 r = history_tree_add_child(node, child);
1994 if (r != 0) {
1995 history_tree_free(child);
1996 return r;
1998 *kp = (k + 1);
1999 r = make_capture_history_tree(child, kp, stk_top, str, reg);
2000 if (r != 0) return r;
2002 k = *kp;
2003 child->end = k->u.mem.pstr - str;
2006 else if (k->type == STK_MEM_END) {
2007 if (k->u.mem.num == node->group) {
2008 node->end = k->u.mem.pstr - str;
2009 *kp = k;
2010 return 0;
2013 k++;
2016 return 1; /* 1: root node ending. */
2018 #endif /* USE_CAPTURE_HISTORY */
2020 #ifdef USE_BACKREF_WITH_LEVEL
2021 static int
2022 mem_is_in_memp(int mem, int num, UChar* memp)
2024 int i;
2025 MemNumType m;
2027 for (i = 0; i < num; i++) {
2028 GET_MEMNUM_INC(m, memp);
2029 if (mem == (int )m) return 1;
2031 return 0;
2034 static int backref_match_at_nested_level(regex_t* reg,
2035 OnigStackType* top, OnigStackType* stk_base,
2036 int ignore_case, int case_fold_flag,
2037 int nest, int mem_num, UChar* memp, UChar** s, const UChar* send)
2039 UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
2040 int level;
2041 OnigStackType* k;
2043 level = 0;
2044 k = top;
2045 k--;
2046 while (k >= stk_base) {
2047 if (k->type == STK_CALL_FRAME) {
2048 level--;
2050 else if (k->type == STK_RETURN) {
2051 level++;
2053 else if (level == nest) {
2054 if (k->type == STK_MEM_START) {
2055 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
2056 pstart = k->u.mem.pstr;
2057 if (pend != NULL_UCHARP) {
2058 if (pend - pstart > send - *s) return 0; /* or goto next_mem; */
2059 p = pstart;
2060 ss = *s;
2062 if (ignore_case != 0) {
2063 if (string_cmp_ic(reg->enc, case_fold_flag,
2064 pstart, &ss, pend - pstart, send) == 0)
2065 return 0; /* or goto next_mem; */
2067 else {
2068 while (p < pend) {
2069 if (*p++ != *ss++) return 0; /* or goto next_mem; */
2073 *s = ss;
2074 return 1;
2078 else if (k->type == STK_MEM_END) {
2079 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
2080 pend = k->u.mem.pstr;
2084 k--;
2087 return 0;
2089 #endif /* USE_BACKREF_WITH_LEVEL */
2092 #ifdef ONIG_DEBUG_STATISTICS
2094 # ifdef _WIN32
2095 # include <windows.h>
2096 static LARGE_INTEGER ts, te, freq;
2097 # define GETTIME(t) QueryPerformanceCounter(&(t))
2098 # define TIMEDIFF(te,ts) (unsigned long )(((te).QuadPart - (ts).QuadPart) \
2099 * 1000000 / freq.QuadPart)
2100 # else /* _WIN32 */
2102 # define USE_TIMEOFDAY
2104 # ifdef USE_TIMEOFDAY
2105 # ifdef HAVE_SYS_TIME_H
2106 # include <sys/time.h>
2107 # endif
2108 # ifdef HAVE_UNISTD_H
2109 # include <unistd.h>
2110 # endif
2111 static struct timeval ts, te;
2112 # define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)
2113 # define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
2114 (((te).tv_sec - (ts).tv_sec)*1000000))
2115 # else /* USE_TIMEOFDAY */
2116 # ifdef HAVE_SYS_TIMES_H
2117 # include <sys/times.h>
2118 # endif
2119 static struct tms ts, te;
2120 # define GETTIME(t) times(&(t))
2121 # define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)
2122 # endif /* USE_TIMEOFDAY */
2124 # endif /* _WIN32 */
2126 static int OpCounter[256];
2127 static int OpPrevCounter[256];
2128 static unsigned long OpTime[256];
2129 static int OpCurr = OP_FINISH;
2130 static int OpPrevTarget = OP_FAIL;
2131 static int MaxStackDepth = 0;
2133 # define MOP_IN(opcode) do {\
2134 if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
2135 OpCurr = opcode;\
2136 OpCounter[opcode]++;\
2137 GETTIME(ts);\
2138 } while(0)
2140 # define MOP_OUT do {\
2141 GETTIME(te);\
2142 OpTime[OpCurr] += TIMEDIFF(te, ts);\
2143 } while(0)
2145 extern void
2146 onig_statistics_init(void)
2148 int i;
2149 for (i = 0; i < 256; i++) {
2150 OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
2152 MaxStackDepth = 0;
2153 # ifdef _WIN32
2154 QueryPerformanceFrequency(&freq);
2155 # endif
2158 extern void
2159 onig_print_statistics(FILE* f)
2161 int i;
2162 fprintf(f, " count prev time\n");
2163 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
2164 fprintf(f, "%8d: %8d: %10lu: %s\n",
2165 OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name);
2167 fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);
2170 # define STACK_INC do {\
2171 stk++;\
2172 if (stk - stk_base > MaxStackDepth) \
2173 MaxStackDepth = stk - stk_base;\
2174 } while(0)
2176 #else /* ONIG_DEBUG_STATISTICS */
2177 # define STACK_INC stk++
2179 # define MOP_IN(opcode)
2180 # define MOP_OUT
2181 #endif /* ONIG_DEBUG_STATISTICS */
2184 #ifdef ONIG_DEBUG_MATCH
2185 static const char *
2186 stack_type_str(int stack_type)
2188 switch (stack_type) {
2189 case STK_ALT: return "Alt ";
2190 case STK_LOOK_BEHIND_NOT: return "LBNot ";
2191 case STK_POS_NOT: return "PosNot";
2192 case STK_MEM_START: return "MemS ";
2193 case STK_MEM_END: return "MemE ";
2194 case STK_REPEAT_INC: return "RepInc";
2195 case STK_STATE_CHECK_MARK: return "StChMk";
2196 case STK_NULL_CHECK_START: return "NulChS";
2197 case STK_NULL_CHECK_END: return "NulChE";
2198 case STK_MEM_END_MARK: return "MemEMk";
2199 case STK_POS: return "Pos ";
2200 case STK_STOP_BT: return "StopBt";
2201 case STK_REPEAT: return "Rep ";
2202 case STK_CALL_FRAME: return "Call ";
2203 case STK_RETURN: return "Ret ";
2204 case STK_VOID: return "Void ";
2205 case STK_ABSENT_POS: return "AbsPos";
2206 case STK_ABSENT: return "Absent";
2207 case STK_MATCH_CACHE_POINT: return "MCache";
2208 default: return " ";
2211 #endif
2212 #ifdef USE_MATCH_CACHE
2214 static long
2215 bsearch_cache_opcodes(const OnigCacheOpcode *cache_opcodes, long num_cache_opcodes, const UChar* p)
2217 long l = 0, r = num_cache_opcodes - 1, m = 0;
2219 while (l <= r) {
2220 m = (l + r) / 2;
2221 if (cache_opcodes[m].addr == p) break;
2222 if (cache_opcodes[m].addr < p) l = m + 1;
2223 else r = m - 1;
2225 return m;
2228 static long
2229 find_cache_point(regex_t* reg, const OnigCacheOpcode* cache_opcodes, long num_cache_opcodes, const UChar* p, const OnigStackType *stk, const OnigStackIndex *repeat_stk, const OnigCacheOpcode **cache_opcode_ptr)
2231 long m;
2232 const OnigCacheOpcode* cache_opcode;
2233 const OnigRepeatRange* range;
2234 const OnigStackType *stkp;
2235 int count = 0;
2236 int is_inc = *p == OP_REPEAT_INC || *p == OP_REPEAT_INC_NG;
2237 long cache_point;
2238 long num_cache_points_at_outer_repeat;
2239 long num_cache_points_in_outer_repeat;
2241 m = bsearch_cache_opcodes(cache_opcodes, num_cache_opcodes, p);
2243 if (!(0 <= m && m < num_cache_opcodes && cache_opcodes[m].addr == p)) {
2244 return -1;
2247 cache_opcode = &cache_opcodes[m];
2248 *cache_opcode_ptr = &cache_opcodes[m];
2249 cache_point = cache_opcode->cache_point;
2250 if (cache_opcode->outer_repeat_mem == -1) {
2251 return cache_point;
2254 num_cache_points_at_outer_repeat = cache_opcode->num_cache_points_at_outer_repeat;
2255 num_cache_points_in_outer_repeat = cache_opcode->num_cache_points_in_outer_repeat;
2257 range = &reg->repeat_range[cache_opcode->outer_repeat_mem];
2259 stkp = &stk[repeat_stk[cache_opcode->outer_repeat_mem]];
2260 count = is_inc ? stkp->u.repeat.count - 1 : stkp->u.repeat.count;
2262 if (count < range->lower) {
2263 return num_cache_points_at_outer_repeat +
2264 num_cache_points_in_outer_repeat * count +
2265 cache_point;
2268 if (range->upper == 0x7fffffff) {
2269 return num_cache_points_at_outer_repeat +
2270 num_cache_points_in_outer_repeat * (range->lower - (is_inc ? 1 : 0)) + (is_inc ? 0 : 1) +
2271 cache_point;
2274 return num_cache_points_at_outer_repeat +
2275 num_cache_points_in_outer_repeat * (range->lower - 1) +
2276 (num_cache_points_in_outer_repeat + 1) * (count - range->lower + 1) +
2277 cache_point;
2280 static int check_extended_match_cache_point(uint8_t *match_cache_buf, long match_cache_point_index, uint8_t match_cache_point_mask) {
2281 if (match_cache_point_mask & 0x80) {
2282 return (match_cache_buf[match_cache_point_index + 1] & 0x01) > 0;
2283 } else {
2284 return (match_cache_buf[match_cache_point_index] & (match_cache_point_mask << 1)) > 0;
2288 static void memoize_extended_match_cache_point(uint8_t *match_cache_buf, long match_cache_point_index, uint8_t match_cache_point_mask) {
2289 match_cache_buf[match_cache_point_index] |= match_cache_point_mask;
2290 if (match_cache_point_mask & 0x80) {
2291 match_cache_buf[match_cache_point_index + 1] |= 0x01;
2292 } else {
2293 match_cache_buf[match_cache_point_index] |= match_cache_point_mask << 1;
2297 #endif /* USE_MATCH_CACHE */
2299 /* match data(str - end) from position (sstart). */
2300 /* if sstart == str then set sprev to NULL. */
2301 static OnigPosition
2302 match_at(regex_t* reg, const UChar* str, const UChar* end,
2303 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
2304 const UChar* right_range,
2305 #endif
2306 const UChar* sstart, UChar* sprev, OnigMatchArg* msa)
2308 static const UChar FinishCode[] = { OP_FINISH };
2310 int i, num_mem, pop_level;
2311 ptrdiff_t n, best_len;
2312 LengthType tlen, tlen2;
2313 MemNumType mem;
2314 RelAddrType addr;
2315 OnigOptionType option = reg->options;
2316 OnigEncoding encode = reg->enc;
2317 OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
2318 UChar *s, *q, *sbegin;
2319 UChar *p = reg->p;
2320 UChar *pbegin = p;
2321 UChar *pkeep;
2322 char *alloca_base;
2323 char *xmalloc_base = NULL;
2324 OnigStackType *stk_alloc, *stk_base = NULL, *stk, *stk_end;
2325 OnigStackType *stkp; /* used as any purpose. */
2326 OnigStackIndex si;
2327 OnigStackIndex *repeat_stk;
2328 OnigStackIndex *mem_start_stk, *mem_end_stk;
2329 #ifdef USE_COMBINATION_EXPLOSION_CHECK
2330 int scv;
2331 unsigned char* state_check_buff = msa->state_check_buff;
2332 int num_comb_exp_check = reg->num_comb_exp_check;
2333 #endif
2335 #if USE_TOKEN_THREADED_VM
2336 # define OP_OFFSET 1
2337 # define VM_LOOP JUMP;
2338 # define VM_LOOP_END
2339 # define CASE(x) L_##x: sbegin = s; OPCODE_EXEC_HOOK;
2340 # define DEFAULT L_DEFAULT:
2341 # define NEXT sprev = sbegin; JUMP
2342 # define JUMP pbegin = p; RB_GNUC_EXTENSION_BLOCK(goto *oplabels[*p++])
2344 RB_GNUC_EXTENSION static const void *oplabels[] = {
2345 &&L_OP_FINISH, /* matching process terminator (no more alternative) */
2346 &&L_OP_END, /* pattern code terminator (success end) */
2348 &&L_OP_EXACT1, /* single byte, N = 1 */
2349 &&L_OP_EXACT2, /* single byte, N = 2 */
2350 &&L_OP_EXACT3, /* single byte, N = 3 */
2351 &&L_OP_EXACT4, /* single byte, N = 4 */
2352 &&L_OP_EXACT5, /* single byte, N = 5 */
2353 &&L_OP_EXACTN, /* single byte */
2354 &&L_OP_EXACTMB2N1, /* mb-length = 2 N = 1 */
2355 &&L_OP_EXACTMB2N2, /* mb-length = 2 N = 2 */
2356 &&L_OP_EXACTMB2N3, /* mb-length = 2 N = 3 */
2357 &&L_OP_EXACTMB2N, /* mb-length = 2 */
2358 &&L_OP_EXACTMB3N, /* mb-length = 3 */
2359 &&L_OP_EXACTMBN, /* other length */
2361 &&L_OP_EXACT1_IC, /* single byte, N = 1, ignore case */
2362 &&L_OP_EXACTN_IC, /* single byte, ignore case */
2364 &&L_OP_CCLASS,
2365 &&L_OP_CCLASS_MB,
2366 &&L_OP_CCLASS_MIX,
2367 &&L_OP_CCLASS_NOT,
2368 &&L_OP_CCLASS_MB_NOT,
2369 &&L_OP_CCLASS_MIX_NOT,
2371 &&L_OP_ANYCHAR, /* "." */
2372 &&L_OP_ANYCHAR_ML, /* "." multi-line */
2373 &&L_OP_ANYCHAR_STAR, /* ".*" */
2374 &&L_OP_ANYCHAR_ML_STAR, /* ".*" multi-line */
2375 &&L_OP_ANYCHAR_STAR_PEEK_NEXT,
2376 &&L_OP_ANYCHAR_ML_STAR_PEEK_NEXT,
2378 &&L_OP_WORD,
2379 &&L_OP_NOT_WORD,
2380 &&L_OP_WORD_BOUND,
2381 &&L_OP_NOT_WORD_BOUND,
2382 # ifdef USE_WORD_BEGIN_END
2383 &&L_OP_WORD_BEGIN,
2384 &&L_OP_WORD_END,
2385 # else
2386 &&L_DEFAULT,
2387 &&L_DEFAULT,
2388 # endif
2389 &&L_OP_ASCII_WORD,
2390 &&L_OP_NOT_ASCII_WORD,
2391 &&L_OP_ASCII_WORD_BOUND,
2392 &&L_OP_NOT_ASCII_WORD_BOUND,
2393 # ifdef USE_WORD_BEGIN_END
2394 &&L_OP_ASCII_WORD_BEGIN,
2395 &&L_OP_ASCII_WORD_END,
2396 # else
2397 &&L_DEFAULT,
2398 &&L_DEFAULT,
2399 # endif
2401 &&L_OP_BEGIN_BUF,
2402 &&L_OP_END_BUF,
2403 &&L_OP_BEGIN_LINE,
2404 &&L_OP_END_LINE,
2405 &&L_OP_SEMI_END_BUF,
2406 &&L_OP_BEGIN_POSITION,
2408 &&L_OP_BACKREF1,
2409 &&L_OP_BACKREF2,
2410 &&L_OP_BACKREFN,
2411 &&L_OP_BACKREFN_IC,
2412 &&L_OP_BACKREF_MULTI,
2413 &&L_OP_BACKREF_MULTI_IC,
2414 # ifdef USE_BACKREF_WITH_LEVEL
2415 &&L_OP_BACKREF_WITH_LEVEL, /* \k<xxx+n>, \k<xxx-n> */
2416 # else
2417 &&L_DEFAULT,
2418 # endif
2419 &&L_OP_MEMORY_START,
2420 &&L_OP_MEMORY_START_PUSH, /* push back-tracker to stack */
2421 &&L_OP_MEMORY_END_PUSH, /* push back-tracker to stack */
2422 # ifdef USE_SUBEXP_CALL
2423 &&L_OP_MEMORY_END_PUSH_REC, /* push back-tracker to stack */
2424 # else
2425 &&L_DEFAULT,
2426 # endif
2427 &&L_OP_MEMORY_END,
2428 # ifdef USE_SUBEXP_CALL
2429 &&L_OP_MEMORY_END_REC, /* push marker to stack */
2430 # else
2431 &&L_DEFAULT,
2432 # endif
2434 &&L_OP_KEEP,
2436 &&L_OP_FAIL, /* pop stack and move */
2437 &&L_OP_JUMP,
2438 &&L_OP_PUSH,
2439 &&L_OP_POP,
2440 # ifdef USE_OP_PUSH_OR_JUMP_EXACT
2441 &&L_OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */
2442 # else
2443 &&L_DEFAULT,
2444 # endif
2445 &&L_OP_PUSH_IF_PEEK_NEXT, /* if match exact then push, else none. */
2446 &&L_OP_REPEAT, /* {n,m} */
2447 &&L_OP_REPEAT_NG, /* {n,m}? (non greedy) */
2448 &&L_OP_REPEAT_INC,
2449 &&L_OP_REPEAT_INC_NG, /* non greedy */
2450 &&L_OP_REPEAT_INC_SG, /* search and get in stack */
2451 &&L_OP_REPEAT_INC_NG_SG, /* search and get in stack (non greedy) */
2452 &&L_OP_NULL_CHECK_START, /* null loop checker start */
2453 &&L_OP_NULL_CHECK_END, /* null loop checker end */
2454 # ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2455 &&L_OP_NULL_CHECK_END_MEMST, /* null loop checker end (with capture status) */
2456 # else
2457 &&L_DEFAULT,
2458 # endif
2459 # ifdef USE_SUBEXP_CALL
2460 &&L_OP_NULL_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */
2461 # else
2462 &&L_DEFAULT,
2463 # endif
2465 &&L_OP_PUSH_POS, /* (?=...) start */
2466 &&L_OP_POP_POS, /* (?=...) end */
2467 &&L_OP_PUSH_POS_NOT, /* (?!...) start */
2468 &&L_OP_FAIL_POS, /* (?!...) end */
2469 &&L_OP_PUSH_STOP_BT, /* (?>...) start */
2470 &&L_OP_POP_STOP_BT, /* (?>...) end */
2471 &&L_OP_LOOK_BEHIND, /* (?<=...) start (no needs end opcode) */
2472 &&L_OP_PUSH_LOOK_BEHIND_NOT, /* (?<!...) start */
2473 &&L_OP_FAIL_LOOK_BEHIND_NOT, /* (?<!...) end */
2474 &&L_OP_PUSH_ABSENT_POS, /* (?~...) start */
2475 &&L_OP_ABSENT, /* (?~...) start of inner loop */
2476 &&L_OP_ABSENT_END, /* (?~...) end */
2478 # ifdef USE_SUBEXP_CALL
2479 &&L_OP_CALL, /* \g<name> */
2480 &&L_OP_RETURN,
2481 # else
2482 &&L_DEFAULT,
2483 &&L_DEFAULT,
2484 # endif
2485 &&L_OP_CONDITION,
2487 # ifdef USE_COMBINATION_EXPLOSION_CHECK
2488 &&L_OP_STATE_CHECK_PUSH, /* combination explosion check and push */
2489 &&L_OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */
2490 &&L_OP_STATE_CHECK, /* check only */
2491 # else
2492 &&L_DEFAULT,
2493 &&L_DEFAULT,
2494 &&L_DEFAULT,
2495 # endif
2496 # ifdef USE_COMBINATION_EXPLOSION_CHECK
2497 &&L_OP_STATE_CHECK_ANYCHAR_STAR,
2498 &&L_OP_STATE_CHECK_ANYCHAR_ML_STAR,
2499 # else
2500 &&L_DEFAULT,
2501 &&L_DEFAULT,
2502 # endif
2503 /* no need: IS_DYNAMIC_OPTION() == 0 */
2504 # if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */
2505 &&L_OP_SET_OPTION_PUSH, /* set option and push recover option */
2506 &&L_OP_SET_OPTION /* set option */
2507 # else
2508 &&L_DEFAULT,
2509 &&L_DEFAULT
2510 # endif
2512 #else /* USE_TOKEN_THREADED_VM */
2514 # define OP_OFFSET 0
2515 # define VM_LOOP \
2516 while (1) { \
2517 OPCODE_EXEC_HOOK; \
2518 pbegin = p; \
2519 sbegin = s; \
2520 switch (*p++) {
2521 # define VM_LOOP_END } sprev = sbegin; }
2522 # define CASE(x) case x:
2523 # define DEFAULT default:
2524 # define NEXT break
2525 # define JUMP continue; break
2526 #endif /* USE_TOKEN_THREADED_VM */
2529 #ifdef USE_SUBEXP_CALL
2530 /* Stack #0 is used to store the pattern itself and used for (?R), \g<0>,
2531 etc. Additional space is required. */
2532 # define ADD_NUMMEM 1
2533 #else
2534 /* Stack #0 not is used. */
2535 # define ADD_NUMMEM 0
2536 #endif
2538 n = reg->num_repeat + (reg->num_mem + ADD_NUMMEM) * 2;
2540 STACK_INIT(alloca_base, xmalloc_base, n, INIT_MATCH_STACK_SIZE);
2541 pop_level = reg->stack_pop_level;
2542 num_mem = reg->num_mem;
2543 repeat_stk = (OnigStackIndex* )alloca_base;
2545 mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat);
2546 mem_end_stk = mem_start_stk + (num_mem + ADD_NUMMEM);
2548 OnigStackIndex *pp = mem_start_stk;
2549 for (; pp < repeat_stk + n; pp += 2) {
2550 pp[0] = INVALID_STACK_INDEX;
2551 pp[1] = INVALID_STACK_INDEX;
2554 #ifndef USE_SUBEXP_CALL
2555 mem_start_stk--; /* for index start from 1,
2556 mem_start_stk[1]..mem_start_stk[num_mem] */
2557 mem_end_stk--; /* for index start from 1,
2558 mem_end_stk[1]..mem_end_stk[num_mem] */
2559 #endif
2561 #ifdef ONIG_DEBUG_MATCH
2562 fprintf(stderr, "match_at: str: %"PRIuPTR" (%p), end: %"PRIuPTR" (%p), start: %"PRIuPTR" (%p), sprev: %"PRIuPTR" (%p)\n",
2563 (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )sstart, sstart, (uintptr_t )sprev, sprev);
2564 fprintf(stderr, "size: %d, start offset: %d\n",
2565 (int )(end - str), (int )(sstart - str));
2566 fprintf(stderr, "\n ofs> str stk:type addr:opcode\n");
2567 #endif
2569 STACK_PUSH_ENSURED(STK_ALT, (UChar* )FinishCode); /* bottom stack */
2570 best_len = ONIG_MISMATCH;
2571 s = (UChar* )sstart;
2572 pkeep = (UChar* )sstart;
2575 #ifdef ONIG_DEBUG_MATCH
2576 # define OPCODE_EXEC_HOOK \
2577 if (s) { \
2578 UChar *op, *q, *bp, buf[50]; \
2579 int len; \
2580 op = p - OP_OFFSET; \
2581 fprintf(stderr, "%4"PRIdPTR"> \"", (*op == OP_FINISH) ? (ptrdiff_t )-1 : s - str); \
2582 bp = buf; \
2583 q = s; \
2584 if (*op != OP_FINISH) { /* s may not be a valid pointer if OP_FINISH. */ \
2585 for (i = 0; i < 7 && q < end; i++) { \
2586 len = enclen(encode, q, end); \
2587 while (len-- > 0) *bp++ = *q++; \
2589 if (q < end) { xmemcpy(bp, "...", 3); bp += 3; } \
2591 xmemcpy(bp, "\"", 1); bp += 1; \
2592 *bp = 0; \
2593 fputs((char* )buf, stderr); \
2594 for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); \
2595 fprintf(stderr, "%4"PRIdPTR":%s %4"PRIdPTR":", \
2596 stk - stk_base - 1, \
2597 (stk > stk_base) ? stack_type_str(stk[-1].type) : " ", \
2598 (op == FinishCode) ? (ptrdiff_t )-1 : op - reg->p); \
2599 onig_print_compiled_byte_code(stderr, op, reg->p+reg->used, NULL, encode); \
2600 fprintf(stderr, "\n"); \
2602 #else
2603 # define OPCODE_EXEC_HOOK ((void) 0)
2604 #endif
2606 #ifdef USE_MATCH_CACHE
2607 #ifdef ONIG_DEBUG_MATCH_CACHE
2608 #define MATCH_CACHE_DEBUG fprintf(stderr, "MATCH CACHE: cache %ld (p=%p index=%ld mask=%d)\n", match_cache_point, pbegin, match_cache_point_index, match_cache_point_mask)
2609 #define MATCH_CACHE_DEBUG_HIT fprintf(stderr, "MATCH CACHE: cache hit\n")
2610 #else
2611 #define MATCH_CACHE_DEBUG ((void) 0)
2612 #define MATCH_CACHE_DEBUG_HIT ((void) 0)
2613 #endif
2615 #define MATCH_CACHE_HIT ((void) 0)
2617 # define CHECK_MATCH_CACHE do {\
2618 if (msa->match_cache_status == MATCH_CACHE_STATUS_ENABLED) {\
2619 const OnigCacheOpcode *cache_opcode;\
2620 long cache_point = find_cache_point(reg, msa->cache_opcodes, msa->num_cache_opcodes, pbegin, stk_base, repeat_stk, &cache_opcode);\
2621 if (cache_point >= 0) {\
2622 long match_cache_point = msa->num_cache_points * (long)(s - str) + cache_point;\
2623 long match_cache_point_index = match_cache_point >> 3;\
2624 uint8_t match_cache_point_mask = 1 << (match_cache_point & 7);\
2625 MATCH_CACHE_DEBUG;\
2626 if (msa->match_cache_buf[match_cache_point_index] & match_cache_point_mask) {\
2627 MATCH_CACHE_DEBUG_HIT; MATCH_CACHE_HIT;\
2628 if (cache_opcode->lookaround_nesting == 0) goto fail;\
2629 else if (cache_opcode->lookaround_nesting < 0) {\
2630 if (check_extended_match_cache_point(msa->match_cache_buf, match_cache_point_index, match_cache_point_mask)) {\
2631 STACK_STOP_BT_FAIL;\
2632 goto fail;\
2633 } else goto fail;\
2634 } else {\
2635 if (check_extended_match_cache_point(msa->match_cache_buf, match_cache_point_index, match_cache_point_mask)) {\
2636 p = cache_opcode->match_addr;\
2637 MOP_OUT;\
2638 JUMP;\
2639 } else goto fail;\
2642 STACK_PUSH_MATCH_CACHE_POINT(match_cache_point_index, match_cache_point_mask);\
2645 } while (0)
2646 #else
2647 # define CHECK_MATCH_CACHE ((void) 0)
2648 #endif
2650 VM_LOOP {
2651 CASE(OP_END) MOP_IN(OP_END);
2652 n = s - sstart;
2653 if (n > best_len) {
2654 OnigRegion* region;
2655 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
2656 if (IS_FIND_LONGEST(option)) {
2657 if (n > msa->best_len) {
2658 msa->best_len = n;
2659 msa->best_s = (UChar* )sstart;
2661 else
2662 goto end_best_len;
2664 #endif
2665 best_len = n;
2666 region = msa->region;
2667 if (region) {
2668 region->beg[0] = ((pkeep > s) ? s : pkeep) - str;
2669 region->end[0] = s - str;
2670 for (i = 1; i <= num_mem; i++) {
2671 if (mem_end_stk[i] != INVALID_STACK_INDEX) {
2672 if (BIT_STATUS_AT(reg->bt_mem_start, i))
2673 region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
2674 else
2675 region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str;
2677 region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i)
2678 ? STACK_AT(mem_end_stk[i])->u.mem.pstr
2679 : (UChar* )((void* )mem_end_stk[i])) - str;
2681 else {
2682 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
2686 #ifdef USE_CAPTURE_HISTORY
2687 if (reg->capture_history != 0) {
2688 int r;
2689 OnigCaptureTreeNode* node;
2691 if (IS_NULL(region->history_root)) {
2692 region->history_root = node = history_node_new();
2693 CHECK_NULL_RETURN_MEMERR(node);
2695 else {
2696 node = region->history_root;
2697 history_tree_clear(node);
2700 node->group = 0;
2701 node->beg = ((pkeep > s) ? s : pkeep) - str;
2702 node->end = s - str;
2704 stkp = stk_base;
2705 r = make_capture_history_tree(region->history_root, &stkp,
2706 stk, (UChar* )str, reg);
2707 if (r < 0) {
2708 best_len = r; /* error code */
2709 goto finish;
2712 #endif /* USE_CAPTURE_HISTORY */
2713 } /* if (region) */
2714 } /* n > best_len */
2716 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
2717 end_best_len:
2718 #endif
2719 MOP_OUT;
2721 if (IS_FIND_CONDITION(option)) {
2722 if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
2723 best_len = ONIG_MISMATCH;
2724 goto fail; /* for retry */
2726 if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
2727 goto fail; /* for retry */
2731 /* default behavior: return first-matching result. */
2732 goto finish;
2733 NEXT;
2735 CASE(OP_EXACT1) MOP_IN(OP_EXACT1);
2736 DATA_ENSURE(1);
2737 if (*p != *s) goto fail;
2738 p++; s++;
2739 MOP_OUT;
2740 NEXT;
2742 CASE(OP_EXACT1_IC) MOP_IN(OP_EXACT1_IC);
2744 int len;
2745 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2747 DATA_ENSURE(1);
2748 len = ONIGENC_MBC_CASE_FOLD(encode,
2749 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
2750 case_fold_flag,
2751 &s, end, lowbuf);
2752 DATA_ENSURE(0);
2753 q = lowbuf;
2754 while (len-- > 0) {
2755 if (*p != *q) {
2756 goto fail;
2758 p++; q++;
2761 MOP_OUT;
2762 NEXT;
2764 CASE(OP_EXACT2) MOP_IN(OP_EXACT2);
2765 DATA_ENSURE(2);
2766 if (*p != *s) goto fail;
2767 p++; s++;
2768 if (*p != *s) goto fail;
2769 sprev = s;
2770 p++; s++;
2771 MOP_OUT;
2772 JUMP;
2774 CASE(OP_EXACT3) MOP_IN(OP_EXACT3);
2775 DATA_ENSURE(3);
2776 if (*p != *s) goto fail;
2777 p++; s++;
2778 if (*p != *s) goto fail;
2779 p++; s++;
2780 if (*p != *s) goto fail;
2781 sprev = s;
2782 p++; s++;
2783 MOP_OUT;
2784 JUMP;
2786 CASE(OP_EXACT4) MOP_IN(OP_EXACT4);
2787 DATA_ENSURE(4);
2788 if (*p != *s) goto fail;
2789 p++; s++;
2790 if (*p != *s) goto fail;
2791 p++; s++;
2792 if (*p != *s) goto fail;
2793 p++; s++;
2794 if (*p != *s) goto fail;
2795 sprev = s;
2796 p++; s++;
2797 MOP_OUT;
2798 JUMP;
2800 CASE(OP_EXACT5) MOP_IN(OP_EXACT5);
2801 DATA_ENSURE(5);
2802 if (*p != *s) goto fail;
2803 p++; s++;
2804 if (*p != *s) goto fail;
2805 p++; s++;
2806 if (*p != *s) goto fail;
2807 p++; s++;
2808 if (*p != *s) goto fail;
2809 p++; s++;
2810 if (*p != *s) goto fail;
2811 sprev = s;
2812 p++; s++;
2813 MOP_OUT;
2814 JUMP;
2816 CASE(OP_EXACTN) MOP_IN(OP_EXACTN);
2817 GET_LENGTH_INC(tlen, p);
2818 DATA_ENSURE(tlen);
2819 while (tlen-- > 0) {
2820 if (*p++ != *s++) goto fail;
2822 sprev = s - 1;
2823 MOP_OUT;
2824 JUMP;
2826 CASE(OP_EXACTN_IC) MOP_IN(OP_EXACTN_IC);
2828 int len;
2829 UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2831 GET_LENGTH_INC(tlen, p);
2832 endp = p + tlen;
2834 while (p < endp) {
2835 sprev = s;
2836 DATA_ENSURE(1);
2837 len = ONIGENC_MBC_CASE_FOLD(encode,
2838 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
2839 case_fold_flag,
2840 &s, end, lowbuf);
2841 DATA_ENSURE(0);
2842 q = lowbuf;
2843 while (len-- > 0) {
2844 if (*p != *q) goto fail;
2845 p++; q++;
2850 MOP_OUT;
2851 JUMP;
2853 CASE(OP_EXACTMB2N1) MOP_IN(OP_EXACTMB2N1);
2854 DATA_ENSURE(2);
2855 if (*p != *s) goto fail;
2856 p++; s++;
2857 if (*p != *s) goto fail;
2858 p++; s++;
2859 MOP_OUT;
2860 NEXT;
2862 CASE(OP_EXACTMB2N2) MOP_IN(OP_EXACTMB2N2);
2863 DATA_ENSURE(4);
2864 if (*p != *s) goto fail;
2865 p++; s++;
2866 if (*p != *s) goto fail;
2867 p++; s++;
2868 sprev = s;
2869 if (*p != *s) goto fail;
2870 p++; s++;
2871 if (*p != *s) goto fail;
2872 p++; s++;
2873 MOP_OUT;
2874 JUMP;
2876 CASE(OP_EXACTMB2N3) MOP_IN(OP_EXACTMB2N3);
2877 DATA_ENSURE(6);
2878 if (*p != *s) goto fail;
2879 p++; s++;
2880 if (*p != *s) goto fail;
2881 p++; s++;
2882 if (*p != *s) goto fail;
2883 p++; s++;
2884 if (*p != *s) goto fail;
2885 p++; s++;
2886 sprev = s;
2887 if (*p != *s) goto fail;
2888 p++; s++;
2889 if (*p != *s) goto fail;
2890 p++; s++;
2891 MOP_OUT;
2892 JUMP;
2894 CASE(OP_EXACTMB2N) MOP_IN(OP_EXACTMB2N);
2895 GET_LENGTH_INC(tlen, p);
2896 DATA_ENSURE(tlen * 2);
2897 while (tlen-- > 0) {
2898 if (*p != *s) goto fail;
2899 p++; s++;
2900 if (*p != *s) goto fail;
2901 p++; s++;
2903 sprev = s - 2;
2904 MOP_OUT;
2905 JUMP;
2907 CASE(OP_EXACTMB3N) MOP_IN(OP_EXACTMB3N);
2908 GET_LENGTH_INC(tlen, p);
2909 DATA_ENSURE(tlen * 3);
2910 while (tlen-- > 0) {
2911 if (*p != *s) goto fail;
2912 p++; s++;
2913 if (*p != *s) goto fail;
2914 p++; s++;
2915 if (*p != *s) goto fail;
2916 p++; s++;
2918 sprev = s - 3;
2919 MOP_OUT;
2920 JUMP;
2922 CASE(OP_EXACTMBN) MOP_IN(OP_EXACTMBN);
2923 GET_LENGTH_INC(tlen, p); /* mb-len */
2924 GET_LENGTH_INC(tlen2, p); /* string len */
2925 tlen2 *= tlen;
2926 DATA_ENSURE(tlen2);
2927 while (tlen2-- > 0) {
2928 if (*p != *s) goto fail;
2929 p++; s++;
2931 sprev = s - tlen;
2932 MOP_OUT;
2933 JUMP;
2935 CASE(OP_CCLASS) MOP_IN(OP_CCLASS);
2936 DATA_ENSURE(1);
2937 if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail;
2938 p += SIZE_BITSET;
2939 s += enclen(encode, s, end); /* OP_CCLASS can match mb-code. \D, \S */
2940 MOP_OUT;
2941 NEXT;
2943 CASE(OP_CCLASS_MB) MOP_IN(OP_CCLASS_MB);
2944 if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) goto fail;
2946 cclass_mb:
2947 GET_LENGTH_INC(tlen, p);
2949 OnigCodePoint code;
2950 UChar *ss;
2951 int mb_len;
2953 DATA_ENSURE(1);
2954 mb_len = enclen_approx(encode, s, end);
2955 DATA_ENSURE(mb_len);
2956 ss = s;
2957 s += mb_len;
2958 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
2960 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
2961 if (! onig_is_in_code_range(p, code)) goto fail;
2962 #else
2963 q = p;
2964 ALIGNMENT_RIGHT(q);
2965 if (! onig_is_in_code_range(q, code)) goto fail;
2966 #endif
2968 p += tlen;
2969 MOP_OUT;
2970 NEXT;
2972 CASE(OP_CCLASS_MIX) MOP_IN(OP_CCLASS_MIX);
2973 DATA_ENSURE(1);
2974 if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
2975 p += SIZE_BITSET;
2976 goto cclass_mb;
2978 else {
2979 if (BITSET_AT(((BitSetRef )p), *s) == 0)
2980 goto fail;
2982 p += SIZE_BITSET;
2983 GET_LENGTH_INC(tlen, p);
2984 p += tlen;
2985 s++;
2987 MOP_OUT;
2988 NEXT;
2990 CASE(OP_CCLASS_NOT) MOP_IN(OP_CCLASS_NOT);
2991 DATA_ENSURE(1);
2992 if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail;
2993 p += SIZE_BITSET;
2994 s += enclen(encode, s, end);
2995 MOP_OUT;
2996 NEXT;
2998 CASE(OP_CCLASS_MB_NOT) MOP_IN(OP_CCLASS_MB_NOT);
2999 DATA_ENSURE(1);
3000 if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) {
3001 s++;
3002 GET_LENGTH_INC(tlen, p);
3003 p += tlen;
3004 goto cc_mb_not_success;
3007 cclass_mb_not:
3008 GET_LENGTH_INC(tlen, p);
3010 OnigCodePoint code;
3011 UChar *ss;
3012 int mb_len = enclen(encode, s, end);
3014 if (! DATA_ENSURE_CHECK(mb_len)) {
3015 DATA_ENSURE(1);
3016 s = (UChar* )end;
3017 p += tlen;
3018 goto cc_mb_not_success;
3021 ss = s;
3022 s += mb_len;
3023 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
3025 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
3026 if (onig_is_in_code_range(p, code)) goto fail;
3027 #else
3028 q = p;
3029 ALIGNMENT_RIGHT(q);
3030 if (onig_is_in_code_range(q, code)) goto fail;
3031 #endif
3033 p += tlen;
3035 cc_mb_not_success:
3036 MOP_OUT;
3037 NEXT;
3039 CASE(OP_CCLASS_MIX_NOT) MOP_IN(OP_CCLASS_MIX_NOT);
3040 DATA_ENSURE(1);
3041 if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
3042 p += SIZE_BITSET;
3043 goto cclass_mb_not;
3045 else {
3046 if (BITSET_AT(((BitSetRef )p), *s) != 0)
3047 goto fail;
3049 p += SIZE_BITSET;
3050 GET_LENGTH_INC(tlen, p);
3051 p += tlen;
3052 s++;
3054 MOP_OUT;
3055 NEXT;
3057 CASE(OP_ANYCHAR) MOP_IN(OP_ANYCHAR);
3058 DATA_ENSURE(1);
3059 n = enclen_approx(encode, s, end);
3060 DATA_ENSURE(n);
3061 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
3062 s += n;
3063 MOP_OUT;
3064 NEXT;
3066 CASE(OP_ANYCHAR_ML) MOP_IN(OP_ANYCHAR_ML);
3067 DATA_ENSURE(1);
3068 n = enclen_approx(encode, s, end);
3069 DATA_ENSURE(n);
3070 s += n;
3071 MOP_OUT;
3072 NEXT;
3074 CASE(OP_ANYCHAR_STAR) MOP_IN(OP_ANYCHAR_STAR);
3075 while (DATA_ENSURE_CHECK1) {
3076 CHECK_MATCH_CACHE;
3077 STACK_PUSH_ALT(p, s, sprev, pkeep);
3078 n = enclen_approx(encode, s, end);
3079 DATA_ENSURE(n);
3080 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
3081 sprev = s;
3082 s += n;
3084 MOP_OUT;
3085 JUMP;
3087 CASE(OP_ANYCHAR_ML_STAR) MOP_IN(OP_ANYCHAR_ML_STAR);
3088 while (DATA_ENSURE_CHECK1) {
3089 CHECK_MATCH_CACHE;
3090 STACK_PUSH_ALT(p, s, sprev, pkeep);
3091 n = enclen_approx(encode, s, end);
3092 if (n > 1) {
3093 DATA_ENSURE(n);
3094 sprev = s;
3095 s += n;
3097 else {
3098 sprev = s;
3099 s++;
3102 MOP_OUT;
3103 JUMP;
3105 CASE(OP_ANYCHAR_STAR_PEEK_NEXT) MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
3106 while (DATA_ENSURE_CHECK1) {
3107 CHECK_MATCH_CACHE;
3108 if (*p == *s) {
3109 STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
3110 } else {
3111 #ifdef USE_MATCH_CACHE
3112 /* We need to increment num_fails here, for invoking a cache optimization correctly. */
3113 /* Actually, the matching will be failed if we use `OP_ANYCHAR_STAR` simply in this case.*/
3114 msa->num_fails++;
3115 #endif
3117 n = enclen_approx(encode, s, end);
3118 DATA_ENSURE(n);
3119 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
3120 sprev = s;
3121 s += n;
3123 p++;
3124 MOP_OUT;
3125 NEXT;
3127 CASE(OP_ANYCHAR_ML_STAR_PEEK_NEXT)MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
3128 while (DATA_ENSURE_CHECK1) {
3129 CHECK_MATCH_CACHE;
3130 if (*p == *s) {
3131 STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
3132 } else {
3133 #ifdef USE_MATCH_CACHE
3134 /* We need to increment num_fails here, for invoking a cache optimization correctly. */
3135 /* Actually, the matching will be failed if we use `OP_ANYCHAR_STAR_ML` simply in this case.*/
3136 msa->num_fails++;
3137 #endif
3139 n = enclen_approx(encode, s, end);
3140 if (n > 1) {
3141 DATA_ENSURE(n);
3142 sprev = s;
3143 s += n;
3145 else {
3146 sprev = s;
3147 s++;
3150 p++;
3151 MOP_OUT;
3152 NEXT;
3154 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3155 CASE(OP_STATE_CHECK_ANYCHAR_STAR) MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
3156 GET_STATE_CHECK_NUM_INC(mem, p);
3157 while (DATA_ENSURE_CHECK1) {
3158 STATE_CHECK_VAL(scv, mem);
3159 if (scv) goto fail;
3161 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
3162 n = enclen_approx(encode, s, end);
3163 DATA_ENSURE(n);
3164 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
3165 sprev = s;
3166 s += n;
3168 MOP_OUT;
3169 NEXT;
3171 CASE(OP_STATE_CHECK_ANYCHAR_ML_STAR)
3172 MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR);
3174 GET_STATE_CHECK_NUM_INC(mem, p);
3175 while (DATA_ENSURE_CHECK1) {
3176 STATE_CHECK_VAL(scv, mem);
3177 if (scv) goto fail;
3179 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
3180 n = enclen_approx(encode, s, end);
3181 if (n > 1) {
3182 DATA_ENSURE(n);
3183 sprev = s;
3184 s += n;
3186 else {
3187 sprev = s;
3188 s++;
3191 MOP_OUT;
3192 NEXT;
3193 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
3195 CASE(OP_WORD) MOP_IN(OP_WORD);
3196 DATA_ENSURE(1);
3197 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
3198 goto fail;
3200 s += enclen(encode, s, end);
3201 MOP_OUT;
3202 NEXT;
3204 CASE(OP_ASCII_WORD) MOP_IN(OP_ASCII_WORD);
3205 DATA_ENSURE(1);
3206 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
3207 goto fail;
3209 s += enclen(encode, s, end);
3210 MOP_OUT;
3211 NEXT;
3213 CASE(OP_NOT_WORD) MOP_IN(OP_NOT_WORD);
3214 DATA_ENSURE(1);
3215 if (ONIGENC_IS_MBC_WORD(encode, s, end))
3216 goto fail;
3218 s += enclen(encode, s, end);
3219 MOP_OUT;
3220 NEXT;
3222 CASE(OP_NOT_ASCII_WORD) MOP_IN(OP_NOT_ASCII_WORD);
3223 DATA_ENSURE(1);
3224 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
3225 goto fail;
3227 s += enclen(encode, s, end);
3228 MOP_OUT;
3229 NEXT;
3231 CASE(OP_WORD_BOUND) MOP_IN(OP_WORD_BOUND);
3232 if (ON_STR_BEGIN(s)) {
3233 DATA_ENSURE(1);
3234 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
3235 goto fail;
3237 else if (ON_STR_END(s)) {
3238 if (! ONIGENC_IS_MBC_WORD(encode, sprev, end))
3239 goto fail;
3241 else {
3242 if (ONIGENC_IS_MBC_WORD(encode, s, end)
3243 == ONIGENC_IS_MBC_WORD(encode, sprev, end))
3244 goto fail;
3246 MOP_OUT;
3247 JUMP;
3249 CASE(OP_ASCII_WORD_BOUND) MOP_IN(OP_ASCII_WORD_BOUND);
3250 if (ON_STR_BEGIN(s)) {
3251 DATA_ENSURE(1);
3252 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
3253 goto fail;
3255 else if (ON_STR_END(s)) {
3256 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
3257 goto fail;
3259 else {
3260 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)
3261 == ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
3262 goto fail;
3264 MOP_OUT;
3265 JUMP;
3267 CASE(OP_NOT_WORD_BOUND) MOP_IN(OP_NOT_WORD_BOUND);
3268 if (ON_STR_BEGIN(s)) {
3269 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end))
3270 goto fail;
3272 else if (ON_STR_END(s)) {
3273 if (ONIGENC_IS_MBC_WORD(encode, sprev, end))
3274 goto fail;
3276 else {
3277 if (ONIGENC_IS_MBC_WORD(encode, s, end)
3278 != ONIGENC_IS_MBC_WORD(encode, sprev, end))
3279 goto fail;
3281 MOP_OUT;
3282 JUMP;
3284 CASE(OP_NOT_ASCII_WORD_BOUND) MOP_IN(OP_NOT_ASCII_WORD_BOUND);
3285 if (ON_STR_BEGIN(s)) {
3286 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
3287 goto fail;
3289 else if (ON_STR_END(s)) {
3290 if (ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
3291 goto fail;
3293 else {
3294 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)
3295 != ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
3296 goto fail;
3298 MOP_OUT;
3299 JUMP;
3301 #ifdef USE_WORD_BEGIN_END
3302 CASE(OP_WORD_BEGIN) MOP_IN(OP_WORD_BEGIN);
3303 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) {
3304 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
3305 MOP_OUT;
3306 JUMP;
3309 goto fail;
3310 NEXT;
3312 CASE(OP_ASCII_WORD_BEGIN) MOP_IN(OP_ASCII_WORD_BEGIN);
3313 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
3314 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
3315 MOP_OUT;
3316 JUMP;
3319 goto fail;
3320 NEXT;
3322 CASE(OP_WORD_END) MOP_IN(OP_WORD_END);
3323 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
3324 if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) {
3325 MOP_OUT;
3326 JUMP;
3329 goto fail;
3330 NEXT;
3332 CASE(OP_ASCII_WORD_END) MOP_IN(OP_ASCII_WORD_END);
3333 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
3334 if (ON_STR_END(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
3335 MOP_OUT;
3336 JUMP;
3339 goto fail;
3340 NEXT;
3341 #endif
3343 CASE(OP_BEGIN_BUF) MOP_IN(OP_BEGIN_BUF);
3344 if (! ON_STR_BEGIN(s)) goto fail;
3345 if (IS_NOTBOS(msa->options)) goto fail;
3347 MOP_OUT;
3348 JUMP;
3350 CASE(OP_END_BUF) MOP_IN(OP_END_BUF);
3351 if (! ON_STR_END(s)) goto fail;
3352 if (IS_NOTEOS(msa->options)) goto fail;
3354 MOP_OUT;
3355 JUMP;
3357 CASE(OP_BEGIN_LINE) MOP_IN(OP_BEGIN_LINE);
3358 if (ON_STR_BEGIN(s)) {
3359 if (IS_NOTBOL(msa->options)) goto fail;
3360 MOP_OUT;
3361 JUMP;
3363 else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)
3364 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3365 && !(IS_NEWLINE_CRLF(option)
3366 && ONIGENC_IS_MBC_CRNL(encode, sprev, end))
3367 #endif
3368 && !ON_STR_END(s)) {
3369 MOP_OUT;
3370 JUMP;
3372 goto fail;
3373 NEXT;
3375 CASE(OP_END_LINE) MOP_IN(OP_END_LINE);
3376 if (ON_STR_END(s)) {
3377 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3378 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) {
3379 #endif
3380 if (IS_NOTEOL(msa->options)) goto fail;
3381 MOP_OUT;
3382 JUMP;
3383 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3385 #endif
3387 else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) {
3388 MOP_OUT;
3389 JUMP;
3391 goto fail;
3392 NEXT;
3394 CASE(OP_SEMI_END_BUF) MOP_IN(OP_SEMI_END_BUF);
3395 if (ON_STR_END(s)) {
3396 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3397 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) {
3398 #endif
3399 if (IS_NOTEOL(msa->options)) goto fail;
3400 MOP_OUT;
3401 JUMP;
3402 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3404 #endif
3406 else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) {
3407 UChar* ss = s + enclen(encode, s, end);
3408 if (ON_STR_END(ss)) {
3409 MOP_OUT;
3410 JUMP;
3412 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3413 else if (IS_NEWLINE_CRLF(option)
3414 && ONIGENC_IS_MBC_CRNL(encode, s, end)) {
3415 ss += enclen(encode, ss, end);
3416 if (ON_STR_END(ss)) {
3417 MOP_OUT;
3418 JUMP;
3421 #endif
3423 goto fail;
3424 NEXT;
3426 CASE(OP_BEGIN_POSITION) MOP_IN(OP_BEGIN_POSITION);
3427 if (s != msa->gpos)
3428 goto fail;
3430 MOP_OUT;
3431 JUMP;
3433 CASE(OP_MEMORY_START_PUSH) MOP_IN(OP_MEMORY_START_PUSH);
3434 GET_MEMNUM_INC(mem, p);
3435 STACK_PUSH_MEM_START(mem, s);
3436 MOP_OUT;
3437 JUMP;
3439 CASE(OP_MEMORY_START) MOP_IN(OP_MEMORY_START);
3440 GET_MEMNUM_INC(mem, p);
3441 mem_start_stk[mem] = (OnigStackIndex )((void* )s);
3442 mem_end_stk[mem] = INVALID_STACK_INDEX;
3443 MOP_OUT;
3444 JUMP;
3446 CASE(OP_MEMORY_END_PUSH) MOP_IN(OP_MEMORY_END_PUSH);
3447 GET_MEMNUM_INC(mem, p);
3448 STACK_PUSH_MEM_END(mem, s);
3449 MOP_OUT;
3450 JUMP;
3452 CASE(OP_MEMORY_END) MOP_IN(OP_MEMORY_END);
3453 GET_MEMNUM_INC(mem, p);
3454 mem_end_stk[mem] = (OnigStackIndex )((void* )s);
3455 MOP_OUT;
3456 JUMP;
3458 CASE(OP_KEEP) MOP_IN(OP_KEEP);
3459 pkeep = s;
3460 MOP_OUT;
3461 JUMP;
3463 #ifdef USE_SUBEXP_CALL
3464 CASE(OP_MEMORY_END_PUSH_REC) MOP_IN(OP_MEMORY_END_PUSH_REC);
3465 GET_MEMNUM_INC(mem, p);
3466 STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
3467 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
3468 STACK_PUSH_MEM_END(mem, s);
3469 MOP_OUT;
3470 JUMP;
3472 CASE(OP_MEMORY_END_REC) MOP_IN(OP_MEMORY_END_REC);
3473 GET_MEMNUM_INC(mem, p);
3474 mem_end_stk[mem] = (OnigStackIndex )((void* )s);
3475 STACK_GET_MEM_START(mem, stkp);
3477 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3478 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
3479 else
3480 mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr);
3482 STACK_PUSH_MEM_END_MARK(mem);
3483 MOP_OUT;
3484 JUMP;
3485 #endif
3487 CASE(OP_BACKREF1) MOP_IN(OP_BACKREF1);
3488 mem = 1;
3489 goto backref;
3490 NEXT;
3492 CASE(OP_BACKREF2) MOP_IN(OP_BACKREF2);
3493 mem = 2;
3494 goto backref;
3495 NEXT;
3497 CASE(OP_BACKREFN) MOP_IN(OP_BACKREFN);
3498 GET_MEMNUM_INC(mem, p);
3499 backref:
3501 int len;
3502 UChar *pstart, *pend;
3504 /* if you want to remove following line,
3505 you should check in parse and compile time. */
3506 if (mem > num_mem) goto fail;
3507 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
3508 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
3510 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3511 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3512 else
3513 pstart = (UChar* )((void* )mem_start_stk[mem]);
3515 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3516 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3517 : (UChar* )((void* )mem_end_stk[mem]));
3518 n = pend - pstart;
3519 DATA_ENSURE(n);
3520 sprev = s;
3521 STRING_CMP(pstart, s, n);
3522 while (sprev + (len = enclen_approx(encode, sprev, end)) < s)
3523 sprev += len;
3525 MOP_OUT;
3526 JUMP;
3529 CASE(OP_BACKREFN_IC) MOP_IN(OP_BACKREFN_IC);
3530 GET_MEMNUM_INC(mem, p);
3532 int len;
3533 UChar *pstart, *pend;
3535 /* if you want to remove following line,
3536 you should check in parse and compile time. */
3537 if (mem > num_mem) goto fail;
3538 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
3539 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
3541 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3542 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3543 else
3544 pstart = (UChar* )((void* )mem_start_stk[mem]);
3546 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3547 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3548 : (UChar* )((void* )mem_end_stk[mem]));
3549 n = pend - pstart;
3550 DATA_ENSURE(n);
3551 sprev = s;
3552 STRING_CMP_IC(case_fold_flag, pstart, &s, n, end);
3553 while (sprev + (len = enclen_approx(encode, sprev, end)) < s)
3554 sprev += len;
3556 MOP_OUT;
3557 JUMP;
3559 NEXT;
3561 CASE(OP_BACKREF_MULTI) MOP_IN(OP_BACKREF_MULTI);
3563 int len, is_fail;
3564 UChar *pstart, *pend, *swork;
3566 GET_LENGTH_INC(tlen, p);
3567 for (i = 0; i < tlen; i++) {
3568 GET_MEMNUM_INC(mem, p);
3570 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
3571 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
3573 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3574 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3575 else
3576 pstart = (UChar* )((void* )mem_start_stk[mem]);
3578 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3579 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3580 : (UChar* )((void* )mem_end_stk[mem]));
3581 n = pend - pstart;
3582 DATA_ENSURE_CONTINUE(n);
3583 sprev = s;
3584 swork = s;
3585 STRING_CMP_VALUE(pstart, swork, n, is_fail);
3586 if (is_fail) continue;
3587 s = swork;
3588 while (sprev + (len = enclen_approx(encode, sprev, end)) < s)
3589 sprev += len;
3591 p += (SIZE_MEMNUM * (tlen - i - 1));
3592 break; /* success */
3594 if (i == tlen) goto fail;
3595 MOP_OUT;
3596 JUMP;
3598 NEXT;
3600 CASE(OP_BACKREF_MULTI_IC) MOP_IN(OP_BACKREF_MULTI_IC);
3602 int len, is_fail;
3603 UChar *pstart, *pend, *swork;
3605 GET_LENGTH_INC(tlen, p);
3606 for (i = 0; i < tlen; i++) {
3607 GET_MEMNUM_INC(mem, p);
3609 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
3610 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
3612 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3613 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3614 else
3615 pstart = (UChar* )((void* )mem_start_stk[mem]);
3617 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3618 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3619 : (UChar* )((void* )mem_end_stk[mem]));
3620 n = pend - pstart;
3621 DATA_ENSURE_CONTINUE(n);
3622 sprev = s;
3623 swork = s;
3624 STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, end, is_fail);
3625 if (is_fail) continue;
3626 s = swork;
3627 while (sprev + (len = enclen(encode, sprev, end)) < s)
3628 sprev += len;
3630 p += (SIZE_MEMNUM * (tlen - i - 1));
3631 break; /* success */
3633 if (i == tlen) goto fail;
3634 MOP_OUT;
3635 JUMP;
3638 #ifdef USE_BACKREF_WITH_LEVEL
3639 CASE(OP_BACKREF_WITH_LEVEL)
3641 int len;
3642 OnigOptionType ic;
3643 LengthType level;
3645 GET_OPTION_INC(ic, p);
3646 GET_LENGTH_INC(level, p);
3647 GET_LENGTH_INC(tlen, p);
3649 sprev = s;
3650 if (backref_match_at_nested_level(reg, stk, stk_base, ic,
3651 case_fold_flag, (int )level, (int )tlen, p, &s, end)) {
3652 while (sprev + (len = enclen(encode, sprev, end)) < s)
3653 sprev += len;
3655 p += (SIZE_MEMNUM * tlen);
3657 else
3658 goto fail;
3660 MOP_OUT;
3661 JUMP;
3664 #endif
3666 #if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */
3667 CASE(OP_SET_OPTION_PUSH) MOP_IN(OP_SET_OPTION_PUSH);
3668 GET_OPTION_INC(option, p);
3669 STACK_PUSH_ALT(p, s, sprev, pkeep);
3670 p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL;
3671 MOP_OUT;
3672 JUMP;
3674 CASE(OP_SET_OPTION) MOP_IN(OP_SET_OPTION);
3675 GET_OPTION_INC(option, p);
3676 MOP_OUT;
3677 JUMP;
3678 #endif
3680 CASE(OP_NULL_CHECK_START) MOP_IN(OP_NULL_CHECK_START);
3681 GET_MEMNUM_INC(mem, p); /* mem: null check id */
3682 STACK_PUSH_NULL_CHECK_START(mem, s);
3683 MOP_OUT;
3684 JUMP;
3686 CASE(OP_NULL_CHECK_END) MOP_IN(OP_NULL_CHECK_END);
3688 int isnull;
3690 GET_MEMNUM_INC(mem, p); /* mem: null check id */
3691 STACK_NULL_CHECK(isnull, mem, s);
3692 if (isnull) {
3693 #ifdef ONIG_DEBUG_MATCH
3694 fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%"PRIuPTR" (%p)\n",
3695 (int )mem, (uintptr_t )s, s);
3696 #endif
3697 null_check_found:
3698 /* empty loop founded, skip next instruction */
3699 switch (*p++) {
3700 case OP_JUMP:
3701 case OP_PUSH:
3702 p += SIZE_RELADDR;
3703 break;
3704 case OP_REPEAT_INC:
3705 case OP_REPEAT_INC_NG:
3706 case OP_REPEAT_INC_SG:
3707 case OP_REPEAT_INC_NG_SG:
3708 p += SIZE_MEMNUM;
3709 break;
3710 default:
3711 goto unexpected_bytecode_error;
3712 break;
3716 MOP_OUT;
3717 JUMP;
3719 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3720 CASE(OP_NULL_CHECK_END_MEMST) MOP_IN(OP_NULL_CHECK_END_MEMST);
3722 int isnull;
3724 GET_MEMNUM_INC(mem, p); /* mem: null check id */
3725 STACK_NULL_CHECK_MEMST(isnull, mem, s, reg);
3726 if (isnull) {
3727 # ifdef ONIG_DEBUG_MATCH
3728 fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%"PRIuPTR" (%p)\n",
3729 (int )mem, (uintptr_t )s, s);
3730 # endif
3731 if (isnull == -1) goto fail;
3732 goto null_check_found;
3735 MOP_OUT;
3736 JUMP;
3737 #endif
3739 #ifdef USE_SUBEXP_CALL
3740 CASE(OP_NULL_CHECK_END_MEMST_PUSH)
3741 MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH);
3743 int isnull;
3745 GET_MEMNUM_INC(mem, p); /* mem: null check id */
3746 # ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3747 STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg);
3748 # else
3749 STACK_NULL_CHECK_REC(isnull, mem, s);
3750 # endif
3751 if (isnull) {
3752 # ifdef ONIG_DEBUG_MATCH
3753 fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%"PRIuPTR" (%p)\n",
3754 (int )mem, (uintptr_t )s, s);
3755 # endif
3756 if (isnull == -1) goto fail;
3757 goto null_check_found;
3759 else {
3760 STACK_PUSH_NULL_CHECK_END(mem);
3763 MOP_OUT;
3764 JUMP;
3765 #endif
3767 CASE(OP_JUMP) MOP_IN(OP_JUMP);
3768 GET_RELADDR_INC(addr, p);
3769 p += addr;
3770 MOP_OUT;
3771 CHECK_INTERRUPT_IN_MATCH_AT;
3772 JUMP;
3774 CASE(OP_PUSH) MOP_IN(OP_PUSH);
3775 GET_RELADDR_INC(addr, p);
3776 CHECK_MATCH_CACHE;
3777 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3778 MOP_OUT;
3779 JUMP;
3781 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3782 CASE(OP_STATE_CHECK_PUSH) MOP_IN(OP_STATE_CHECK_PUSH);
3783 GET_STATE_CHECK_NUM_INC(mem, p);
3784 STATE_CHECK_VAL(scv, mem);
3785 if (scv) goto fail;
3787 GET_RELADDR_INC(addr, p);
3788 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep);
3789 MOP_OUT;
3790 JUMP;
3792 CASE(OP_STATE_CHECK_PUSH_OR_JUMP) MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP);
3793 GET_STATE_CHECK_NUM_INC(mem, p);
3794 GET_RELADDR_INC(addr, p);
3795 STATE_CHECK_VAL(scv, mem);
3796 if (scv) {
3797 p += addr;
3799 else {
3800 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep);
3802 MOP_OUT;
3803 JUMP;
3805 CASE(OP_STATE_CHECK) MOP_IN(OP_STATE_CHECK);
3806 GET_STATE_CHECK_NUM_INC(mem, p);
3807 STATE_CHECK_VAL(scv, mem);
3808 if (scv) goto fail;
3810 STACK_PUSH_STATE_CHECK(s, mem);
3811 MOP_OUT;
3812 JUMP;
3813 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
3815 CASE(OP_POP) MOP_IN(OP_POP);
3816 STACK_POP_ONE;
3817 #ifdef USE_MATCH_CACHE
3818 /* We need to increment num_fails here, for invoking a cache optimization correctly, */
3819 /* because Onigmo makes a loop, which is pairwise disjoint to the following set, as atomic. */
3820 msa->num_fails++;
3821 #endif
3822 MOP_OUT;
3823 JUMP;
3825 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
3826 CASE(OP_PUSH_OR_JUMP_EXACT1) MOP_IN(OP_PUSH_OR_JUMP_EXACT1);
3827 GET_RELADDR_INC(addr, p);
3828 if (*p == *s && DATA_ENSURE_CHECK1) {
3829 p++;
3830 CHECK_MATCH_CACHE;
3831 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3832 MOP_OUT;
3833 JUMP;
3835 p += (addr + 1);
3836 MOP_OUT;
3837 JUMP;
3838 #endif
3840 CASE(OP_PUSH_IF_PEEK_NEXT) MOP_IN(OP_PUSH_IF_PEEK_NEXT);
3841 GET_RELADDR_INC(addr, p);
3842 CHECK_MATCH_CACHE;
3843 if (*p == *s) {
3844 p++;
3845 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3846 MOP_OUT;
3847 JUMP;
3849 p++;
3850 INC_NUM_FAILS;
3851 MOP_OUT;
3852 JUMP;
3854 CASE(OP_REPEAT) MOP_IN(OP_REPEAT);
3856 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3857 GET_RELADDR_INC(addr, p);
3859 STACK_ENSURE(1);
3860 repeat_stk[mem] = GET_STACK_INDEX(stk);
3861 STACK_PUSH_REPEAT(mem, p);
3863 if (reg->repeat_range[mem].lower == 0) {
3864 CHECK_MATCH_CACHE;
3865 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3868 MOP_OUT;
3869 JUMP;
3871 CASE(OP_REPEAT_NG) MOP_IN(OP_REPEAT_NG);
3873 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3874 GET_RELADDR_INC(addr, p);
3876 STACK_ENSURE(1);
3877 repeat_stk[mem] = GET_STACK_INDEX(stk);
3878 STACK_PUSH_REPEAT(mem, p);
3880 if (reg->repeat_range[mem].lower == 0) {
3881 CHECK_MATCH_CACHE;
3882 STACK_PUSH_ALT(p, s, sprev, pkeep);
3883 p += addr;
3886 MOP_OUT;
3887 JUMP;
3889 CASE(OP_REPEAT_INC) MOP_IN(OP_REPEAT_INC);
3890 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3891 si = repeat_stk[mem];
3892 stkp = STACK_AT(si);
3894 repeat_inc:
3895 stkp->u.repeat.count++;
3896 if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
3897 /* end of repeat. Nothing to do. */
3899 else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
3900 #ifdef USE_MATCH_CACHE
3901 if (*pbegin == OP_REPEAT_INC) {
3902 #undef MATCH_CACHE_HIT
3903 #define MATCH_CACHE_HIT stkp->u.repeat.count--;
3904 CHECK_MATCH_CACHE;
3905 #undef MATCH_CACHE_HIT
3906 #define MATCH_CACHE_HIT ((void) 0)
3908 #endif
3909 STACK_PUSH_ALT(p, s, sprev, pkeep);
3910 p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */
3912 else {
3913 p = stkp->u.repeat.pcode;
3915 STACK_PUSH_REPEAT_INC(si);
3916 MOP_OUT;
3917 CHECK_INTERRUPT_IN_MATCH_AT;
3918 JUMP;
3920 CASE(OP_REPEAT_INC_SG) MOP_IN(OP_REPEAT_INC_SG);
3921 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3922 STACK_GET_REPEAT(mem, stkp);
3923 si = GET_STACK_INDEX(stkp);
3924 goto repeat_inc;
3925 NEXT;
3927 CASE(OP_REPEAT_INC_NG) MOP_IN(OP_REPEAT_INC_NG);
3928 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3929 si = repeat_stk[mem];
3930 stkp = STACK_AT(si);
3932 repeat_inc_ng:
3933 stkp->u.repeat.count++;
3934 if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
3935 if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
3936 UChar* pcode = stkp->u.repeat.pcode;
3938 STACK_PUSH_REPEAT_INC(si);
3939 if (*pbegin == OP_REPEAT_INC_NG) {
3940 CHECK_MATCH_CACHE;
3942 STACK_PUSH_ALT(pcode, s, sprev, pkeep);
3944 else {
3945 p = stkp->u.repeat.pcode;
3946 STACK_PUSH_REPEAT_INC(si);
3949 else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
3950 STACK_PUSH_REPEAT_INC(si);
3952 MOP_OUT;
3953 CHECK_INTERRUPT_IN_MATCH_AT;
3954 JUMP;
3956 CASE(OP_REPEAT_INC_NG_SG) MOP_IN(OP_REPEAT_INC_NG_SG);
3957 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3958 STACK_GET_REPEAT(mem, stkp);
3959 si = GET_STACK_INDEX(stkp);
3960 goto repeat_inc_ng;
3961 NEXT;
3963 CASE(OP_PUSH_POS) MOP_IN(OP_PUSH_POS);
3964 STACK_PUSH_POS(s, sprev, pkeep);
3965 MOP_OUT;
3966 JUMP;
3968 CASE(OP_POP_POS) MOP_IN(OP_POP_POS);
3970 STACK_POS_END(stkp);
3971 s = stkp->u.state.pstr;
3972 sprev = stkp->u.state.pstr_prev;
3974 MOP_OUT;
3975 JUMP;
3977 CASE(OP_PUSH_POS_NOT) MOP_IN(OP_PUSH_POS_NOT);
3978 GET_RELADDR_INC(addr, p);
3979 STACK_PUSH_POS_NOT(p + addr, s, sprev, pkeep);
3980 MOP_OUT;
3981 JUMP;
3983 CASE(OP_FAIL_POS) MOP_IN(OP_FAIL_POS);
3984 STACK_POP_TIL_POS_NOT;
3985 goto fail;
3986 NEXT;
3988 CASE(OP_PUSH_STOP_BT) MOP_IN(OP_PUSH_STOP_BT);
3989 STACK_PUSH_STOP_BT;
3990 MOP_OUT;
3991 JUMP;
3993 CASE(OP_POP_STOP_BT) MOP_IN(OP_POP_STOP_BT);
3994 STACK_STOP_BT_END;
3995 MOP_OUT;
3996 JUMP;
3998 CASE(OP_LOOK_BEHIND) MOP_IN(OP_LOOK_BEHIND);
3999 GET_LENGTH_INC(tlen, p);
4000 s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen);
4001 if (IS_NULL(s)) goto fail;
4002 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
4003 MOP_OUT;
4004 JUMP;
4006 CASE(OP_PUSH_LOOK_BEHIND_NOT) MOP_IN(OP_PUSH_LOOK_BEHIND_NOT);
4007 GET_RELADDR_INC(addr, p);
4008 GET_LENGTH_INC(tlen, p);
4009 q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen);
4010 if (IS_NULL(q)) {
4011 /* too short case -> success. ex. /(?<!XXX)a/.match("a")
4012 If you want to change to fail, replace following line. */
4013 p += addr;
4014 /* goto fail; */
4016 else {
4017 STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev, pkeep);
4018 s = q;
4019 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
4021 MOP_OUT;
4022 JUMP;
4024 CASE(OP_FAIL_LOOK_BEHIND_NOT) MOP_IN(OP_FAIL_LOOK_BEHIND_NOT);
4025 STACK_POP_TIL_LOOK_BEHIND_NOT;
4026 goto fail;
4027 NEXT;
4029 CASE(OP_PUSH_ABSENT_POS) MOP_IN(OP_PUSH_ABSENT_POS);
4030 /* Save the absent-start-pos and the original end-pos. */
4031 STACK_PUSH_ABSENT_POS(s, ABSENT_END_POS);
4032 MOP_OUT;
4033 JUMP;
4035 CASE(OP_ABSENT) MOP_IN(OP_ABSENT);
4037 const UChar* aend = ABSENT_END_POS;
4038 UChar* absent;
4039 UChar* selfp = p - 1;
4041 STACK_POP_ABSENT_POS(absent, ABSENT_END_POS); /* Restore end-pos. */
4042 GET_RELADDR_INC(addr, p);
4043 #ifdef ONIG_DEBUG_MATCH
4044 fprintf(stderr, "ABSENT: s:%p, end:%p, absent:%p, aend:%p\n", s, end, absent, aend);
4045 #endif
4046 if ((absent > aend) && (s > absent)) {
4047 /* An empty match occurred in (?~...) at the start point.
4048 * Never match. */
4049 STACK_POP;
4050 goto fail;
4052 else if ((s >= aend) && (s > absent)) {
4053 if (s > aend) {
4054 /* Only one (or less) character matched in the last iteration.
4055 * This is not a possible point. */
4056 goto fail;
4058 /* All possible points were found. Try matching after (?~...). */
4059 DATA_ENSURE(0);
4060 p += addr;
4062 else if (s == end) {
4063 /* At the end of the string, just match with it */
4064 DATA_ENSURE(0);
4065 p += addr;
4067 else {
4068 STACK_PUSH_ALT(p + addr, s, sprev, pkeep); /* Push possible point. */
4069 n = enclen(encode, s, end);
4070 STACK_PUSH_ABSENT_POS(absent, ABSENT_END_POS); /* Save the original pos. */
4071 STACK_PUSH_ALT(selfp, s + n, s, pkeep); /* Next iteration. */
4072 STACK_PUSH_ABSENT;
4073 ABSENT_END_POS = aend;
4076 MOP_OUT;
4077 JUMP;
4079 CASE(OP_ABSENT_END) MOP_IN(OP_ABSENT_END);
4080 /* The pattern inside (?~...) was matched.
4081 * Set the end-pos temporary and go to next iteration. */
4082 if (sprev < ABSENT_END_POS)
4083 ABSENT_END_POS = sprev;
4084 #ifdef ONIG_DEBUG_MATCH
4085 fprintf(stderr, "ABSENT_END: end:%p\n", ABSENT_END_POS);
4086 #endif
4087 STACK_POP_TIL_ABSENT;
4088 goto fail;
4089 NEXT;
4091 #ifdef USE_SUBEXP_CALL
4092 CASE(OP_CALL) MOP_IN(OP_CALL);
4093 GET_ABSADDR_INC(addr, p);
4094 STACK_PUSH_CALL_FRAME(p);
4095 p = reg->p + addr;
4096 MOP_OUT;
4097 JUMP;
4099 CASE(OP_RETURN) MOP_IN(OP_RETURN);
4100 STACK_RETURN(p);
4101 STACK_PUSH_RETURN;
4102 MOP_OUT;
4103 JUMP;
4104 #endif
4106 CASE(OP_CONDITION) MOP_IN(OP_CONDITION);
4107 GET_MEMNUM_INC(mem, p);
4108 GET_RELADDR_INC(addr, p);
4109 if ((mem > num_mem) ||
4110 (mem_end_stk[mem] == INVALID_STACK_INDEX) ||
4111 (mem_start_stk[mem] == INVALID_STACK_INDEX)) {
4112 p += addr;
4114 MOP_OUT;
4115 JUMP;
4117 CASE(OP_FINISH)
4118 goto finish;
4119 NEXT;
4121 CASE(OP_FAIL)
4122 if (0) {
4123 /* fall */
4124 fail:
4125 MOP_OUT;
4127 MOP_IN(OP_FAIL);
4128 STACK_POP;
4129 p = stk->u.state.pcode;
4130 s = stk->u.state.pstr;
4131 sprev = stk->u.state.pstr_prev;
4132 pkeep = stk->u.state.pkeep;
4134 #ifdef USE_MATCH_CACHE
4135 if (
4136 msa->match_cache_status != MATCH_CACHE_STATUS_DISABLED &&
4137 ++msa->num_fails >= (long)(end - str) * msa->num_cache_opcodes
4139 if (msa->match_cache_status == MATCH_CACHE_STATUS_UNINIT) {
4140 msa->match_cache_status = MATCH_CACHE_STATUS_INIT;
4141 OnigPosition r = count_num_cache_opcodes(reg, &msa->num_cache_opcodes);
4142 if (r < 0) goto bytecode_error;
4144 if (msa->num_cache_opcodes == NUM_CACHE_OPCODES_IMPOSSIBLE || msa->num_cache_opcodes == 0) {
4145 msa->match_cache_status = MATCH_CACHE_STATUS_DISABLED;
4146 goto fail_match_cache;
4148 if (msa->num_fails < (long)(end - str) * msa->num_cache_opcodes) {
4149 goto fail_match_cache;
4151 if (msa->cache_opcodes == NULL) {
4152 msa->match_cache_status = MATCH_CACHE_STATUS_ENABLED;
4153 OnigCacheOpcode* cache_opcodes = (OnigCacheOpcode*)xmalloc(msa->num_cache_opcodes * sizeof(OnigCacheOpcode));
4154 if (cache_opcodes == NULL) {
4155 return ONIGERR_MEMORY;
4157 OnigPosition r = init_cache_opcodes(reg, cache_opcodes, &msa->num_cache_points);
4158 if (r < 0) {
4159 if (r == ONIGERR_UNEXPECTED_BYTECODE) goto unexpected_bytecode_error;
4160 else goto bytecode_error;
4162 msa->cache_opcodes = cache_opcodes;
4163 #ifdef ONIG_DEBUG_MATCH_CACHE
4164 fprintf(stderr, "MATCH CACHE: #cache opcodes = %ld\n", msa->num_cache_opcodes);
4165 fprintf(stderr, "MATCH CACHE: #cache points = %ld\n", msa->num_cache_points);
4166 fprintf(stderr, "MATCH CACHE: cache opcodes (%p):\n", msa->cache_opcodes);
4167 for (int i = 0; i < msa->num_cache_opcodes; i++) {
4168 fprintf(stderr, "MATCH CACHE: [%p] cache_point=%ld outer_repeat_mem=%d num_cache_opcodes_at_outer_repeat=%ld num_cache_opcodes_in_outer_repeat=%ld lookaround_nesting=%d match_addr=%p\n", msa->cache_opcodes[i].addr, msa->cache_opcodes[i].cache_point, msa->cache_opcodes[i].outer_repeat_mem, msa->cache_opcodes[i].num_cache_points_at_outer_repeat, msa->cache_opcodes[i].num_cache_points_in_outer_repeat, msa->cache_opcodes[i].lookaround_nesting, msa->cache_opcodes[i].match_addr);
4170 #endif
4172 if (msa->match_cache_buf == NULL) {
4173 size_t length = (end - str) + 1;
4174 size_t num_match_cache_points = (size_t)msa->num_cache_points * length;
4175 #ifdef ONIG_DEBUG_MATCH_CACHE
4176 fprintf(stderr, "MATCH CACHE: #match cache points = %zu (length = %zu)\n", num_match_cache_points, length);
4177 #endif
4178 /* Overflow check */
4179 if (num_match_cache_points / length != (size_t)msa->num_cache_points) {
4180 return ONIGERR_MEMORY;
4182 if (num_match_cache_points >= LONG_MAX_LIMIT) {
4183 return ONIGERR_MEMORY;
4185 size_t match_cache_buf_length = (num_match_cache_points >> 3) + (num_match_cache_points & 7 ? 1 : 0) + 1;
4186 uint8_t* match_cache_buf = (uint8_t*)xmalloc(match_cache_buf_length * sizeof(uint8_t));
4187 if (match_cache_buf == NULL) {
4188 return ONIGERR_MEMORY;
4190 xmemset(match_cache_buf, 0, match_cache_buf_length * sizeof(uint8_t));
4191 msa->match_cache_buf = match_cache_buf;
4194 fail_match_cache:
4195 #endif
4197 #ifdef USE_COMBINATION_EXPLOSION_CHECK
4198 if (stk->u.state.state_check != 0) {
4199 stk->type = STK_STATE_CHECK_MARK;
4200 stk++;
4202 #endif
4204 MOP_OUT;
4205 CHECK_INTERRUPT_IN_MATCH_AT;
4206 JUMP;
4208 DEFAULT
4209 goto bytecode_error;
4210 } VM_LOOP_END
4212 finish:
4213 STACK_SAVE;
4214 xfree(xmalloc_base);
4215 return best_len;
4217 #ifdef ONIG_DEBUG
4218 stack_error:
4219 STACK_SAVE;
4220 xfree(xmalloc_base);
4221 return ONIGERR_STACK_BUG;
4222 #endif
4224 bytecode_error:
4225 STACK_SAVE;
4226 xfree(xmalloc_base);
4227 return ONIGERR_UNDEFINED_BYTECODE;
4229 unexpected_bytecode_error:
4230 STACK_SAVE;
4231 xfree(xmalloc_base);
4232 return ONIGERR_UNEXPECTED_BYTECODE;
4234 timeout:
4235 STACK_SAVE;
4236 xfree(xmalloc_base);
4237 return ONIGERR_TIMEOUT;
4241 static UChar*
4242 slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
4243 const UChar* text, const UChar* text_end, UChar* text_range)
4245 UChar *t, *p, *s, *end;
4247 end = (UChar* )text_end;
4248 end -= target_end - target - 1;
4249 if (end > text_range)
4250 end = text_range;
4252 s = (UChar* )text;
4254 if (enc->max_enc_len == enc->min_enc_len) {
4255 int n = enc->max_enc_len;
4257 while (s < end) {
4258 if (*s == *target) {
4259 p = s + 1;
4260 t = target + 1;
4261 if (target_end == t || memcmp(t, p, target_end - t) == 0)
4262 return s;
4264 s += n;
4266 return (UChar* )NULL;
4268 while (s < end) {
4269 if (*s == *target) {
4270 p = s + 1;
4271 t = target + 1;
4272 if (target_end == t || memcmp(t, p, target_end - t) == 0)
4273 return s;
4275 s += enclen(enc, s, text_end);
4278 return (UChar* )NULL;
4281 static int
4282 str_lower_case_match(OnigEncoding enc, int case_fold_flag,
4283 const UChar* t, const UChar* tend,
4284 const UChar* p, const UChar* end)
4286 int lowlen;
4287 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
4289 while (t < tend) {
4290 lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf);
4291 q = lowbuf;
4292 while (lowlen > 0) {
4293 if (*t++ != *q++) return 0;
4294 lowlen--;
4298 return 1;
4301 static UChar*
4302 slow_search_ic(OnigEncoding enc, int case_fold_flag,
4303 UChar* target, UChar* target_end,
4304 const UChar* text, const UChar* text_end, UChar* text_range)
4306 UChar *s, *end;
4308 end = (UChar* )text_end;
4309 end -= target_end - target - 1;
4310 if (end > text_range)
4311 end = text_range;
4313 s = (UChar* )text;
4315 while (s < end) {
4316 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4317 s, text_end))
4318 return s;
4320 s += enclen(enc, s, text_end);
4323 return (UChar* )NULL;
4326 static UChar*
4327 slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
4328 const UChar* text, const UChar* adjust_text,
4329 const UChar* text_end, const UChar* text_start)
4331 UChar *t, *p, *s;
4333 s = (UChar* )text_end;
4334 s -= (target_end - target);
4335 if (s > text_start)
4336 s = (UChar* )text_start;
4337 else
4338 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end);
4340 while (s >= text) {
4341 if (*s == *target) {
4342 p = s + 1;
4343 t = target + 1;
4344 while (t < target_end) {
4345 if (*t != *p++)
4346 break;
4347 t++;
4349 if (t == target_end)
4350 return s;
4352 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
4355 return (UChar* )NULL;
4358 static UChar*
4359 slow_search_backward_ic(OnigEncoding enc, int case_fold_flag,
4360 UChar* target, UChar* target_end,
4361 const UChar* text, const UChar* adjust_text,
4362 const UChar* text_end, const UChar* text_start)
4364 UChar *s;
4366 s = (UChar* )text_end;
4367 s -= (target_end - target);
4368 if (s > text_start)
4369 s = (UChar* )text_start;
4370 else
4371 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end);
4373 while (s >= text) {
4374 if (str_lower_case_match(enc, case_fold_flag,
4375 target, target_end, s, text_end))
4376 return s;
4378 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
4381 return (UChar* )NULL;
4384 #ifndef USE_SUNDAY_QUICK_SEARCH
4385 /* Boyer-Moore-Horspool search applied to a multibyte string */
4386 static UChar*
4387 bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
4388 const UChar* text, const UChar* text_end,
4389 const UChar* text_range)
4391 const UChar *s, *se, *t, *p, *end;
4392 const UChar *tail;
4393 ptrdiff_t skip, tlen1;
4395 # ifdef ONIG_DEBUG_SEARCH
4396 fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4397 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4398 # endif
4400 tail = target_end - 1;
4401 tlen1 = tail - target;
4402 end = text_range;
4403 if (end + tlen1 > text_end)
4404 end = text_end - tlen1;
4406 s = text;
4408 if (IS_NULL(reg->int_map)) {
4409 while (s < end) {
4410 p = se = s + tlen1;
4411 t = tail;
4412 while (*p == *t) {
4413 if (t == target) return (UChar* )s;
4414 p--; t--;
4416 skip = reg->map[*se];
4417 t = s;
4418 do {
4419 s += enclen(reg->enc, s, end);
4420 } while ((s - t) < skip && s < end);
4423 else {
4424 # if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4425 while (s < end) {
4426 p = se = s + tlen1;
4427 t = tail;
4428 while (*p == *t) {
4429 if (t == target) return (UChar* )s;
4430 p--; t--;
4432 skip = reg->int_map[*se];
4433 t = s;
4434 do {
4435 s += enclen(reg->enc, s, end);
4436 } while ((s - t) < skip && s < end);
4438 # endif
4441 return (UChar* )NULL;
4444 /* Boyer-Moore-Horspool search */
4445 static UChar*
4446 bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
4447 const UChar* text, const UChar* text_end, const UChar* text_range)
4449 const UChar *s, *t, *p, *end;
4450 const UChar *tail;
4452 # ifdef ONIG_DEBUG_SEARCH
4453 fprintf(stderr, "bm_search: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4454 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4455 # endif
4457 end = text_range + (target_end - target) - 1;
4458 if (end > text_end)
4459 end = text_end;
4461 tail = target_end - 1;
4462 s = text + (target_end - target) - 1;
4463 if (IS_NULL(reg->int_map)) {
4464 while (s < end) {
4465 p = s;
4466 t = tail;
4467 # ifdef ONIG_DEBUG_SEARCH
4468 fprintf(stderr, "bm_search_loop: pos: %"PRIdPTR" %s\n",
4469 (intptr_t )(s - text), s);
4470 # endif
4471 while (*p == *t) {
4472 if (t == target) return (UChar* )p;
4473 p--; t--;
4475 s += reg->map[*s];
4478 else { /* see int_map[] */
4479 # if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4480 while (s < end) {
4481 p = s;
4482 t = tail;
4483 while (*p == *t) {
4484 if (t == target) return (UChar* )p;
4485 p--; t--;
4487 s += reg->int_map[*s];
4489 # endif
4491 return (UChar* )NULL;
4494 /* Boyer-Moore-Horspool search applied to a multibyte string (ignore case) */
4495 static UChar*
4496 bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
4497 const UChar* text, const UChar* text_end,
4498 const UChar* text_range)
4500 const UChar *s, *se, *t, *end;
4501 const UChar *tail;
4502 ptrdiff_t skip, tlen1;
4503 OnigEncoding enc = reg->enc;
4504 int case_fold_flag = reg->case_fold_flag;
4506 # ifdef ONIG_DEBUG_SEARCH
4507 fprintf(stderr, "bm_search_notrev_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
4508 (int )text, text, (int )text_end, text_end, (int )text_range, text_range);
4509 # endif
4511 tail = target_end - 1;
4512 tlen1 = tail - target;
4513 end = text_range;
4514 if (end + tlen1 > text_end)
4515 end = text_end - tlen1;
4517 s = text;
4519 if (IS_NULL(reg->int_map)) {
4520 while (s < end) {
4521 se = s + tlen1;
4522 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4523 s, se + 1))
4524 return (UChar* )s;
4525 skip = reg->map[*se];
4526 t = s;
4527 do {
4528 s += enclen(reg->enc, s, end);
4529 } while ((s - t) < skip && s < end);
4532 else {
4533 # if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4534 while (s < end) {
4535 se = s + tlen1;
4536 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4537 s, se + 1))
4538 return (UChar* )s;
4539 skip = reg->int_map[*se];
4540 t = s;
4541 do {
4542 s += enclen(reg->enc, s, end);
4543 } while ((s - t) < skip && s < end);
4545 # endif
4548 return (UChar* )NULL;
4551 /* Boyer-Moore-Horspool search (ignore case) */
4552 static UChar*
4553 bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
4554 const UChar* text, const UChar* text_end, const UChar* text_range)
4556 const UChar *s, *p, *end;
4557 const UChar *tail;
4558 OnigEncoding enc = reg->enc;
4559 int case_fold_flag = reg->case_fold_flag;
4561 # ifdef ONIG_DEBUG_SEARCH
4562 fprintf(stderr, "bm_search_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
4563 (int )text, text, (int )text_end, text_end, (int )text_range, text_range);
4564 # endif
4566 end = text_range + (target_end - target) - 1;
4567 if (end > text_end)
4568 end = text_end;
4570 tail = target_end - 1;
4571 s = text + (target_end - target) - 1;
4572 if (IS_NULL(reg->int_map)) {
4573 while (s < end) {
4574 p = s - (target_end - target) + 1;
4575 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4576 p, s + 1))
4577 return (UChar* )p;
4578 s += reg->map[*s];
4581 else { /* see int_map[] */
4582 # if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4583 while (s < end) {
4584 p = s - (target_end - target) + 1;
4585 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4586 p, s + 1))
4587 return (UChar* )p;
4588 s += reg->int_map[*s];
4590 # endif
4592 return (UChar* )NULL;
4595 #else /* USE_SUNDAY_QUICK_SEARCH */
4597 /* Sunday's quick search applied to a multibyte string */
4598 static UChar*
4599 bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
4600 const UChar* text, const UChar* text_end,
4601 const UChar* text_range)
4603 const UChar *s, *se, *t, *p, *end;
4604 const UChar *tail;
4605 ptrdiff_t skip, tlen1;
4606 OnigEncoding enc = reg->enc;
4608 # ifdef ONIG_DEBUG_SEARCH
4609 fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4610 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4611 # endif
4613 tail = target_end - 1;
4614 tlen1 = tail - target;
4615 end = text_range;
4616 if (end + tlen1 > text_end)
4617 end = text_end - tlen1;
4619 s = text;
4621 if (IS_NULL(reg->int_map)) {
4622 while (s < end) {
4623 p = se = s + tlen1;
4624 t = tail;
4625 while (*p == *t) {
4626 if (t == target) return (UChar* )s;
4627 p--; t--;
4629 if (s + 1 >= end) break;
4630 skip = reg->map[se[1]];
4631 t = s;
4632 do {
4633 s += enclen(enc, s, end);
4634 } while ((s - t) < skip && s < end);
4637 else {
4638 # if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4639 while (s < end) {
4640 p = se = s + tlen1;
4641 t = tail;
4642 while (*p == *t) {
4643 if (t == target) return (UChar* )s;
4644 p--; t--;
4646 if (s + 1 >= end) break;
4647 skip = reg->int_map[se[1]];
4648 t = s;
4649 do {
4650 s += enclen(enc, s, end);
4651 } while ((s - t) < skip && s < end);
4653 # endif
4656 return (UChar* )NULL;
4659 /* Sunday's quick search */
4660 static UChar*
4661 bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
4662 const UChar* text, const UChar* text_end, const UChar* text_range)
4664 const UChar *s, *t, *p, *end;
4665 const UChar *tail;
4666 ptrdiff_t tlen1;
4668 # ifdef ONIG_DEBUG_SEARCH
4669 fprintf(stderr, "bm_search: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4670 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4671 # endif
4673 tail = target_end - 1;
4674 tlen1 = tail - target;
4675 end = text_range + tlen1;
4676 if (end > text_end)
4677 end = text_end;
4679 s = text + tlen1;
4680 if (IS_NULL(reg->int_map)) {
4681 while (s < end) {
4682 p = s;
4683 t = tail;
4684 while (*p == *t) {
4685 if (t == target) return (UChar* )p;
4686 p--; t--;
4688 if (s + 1 >= end) break;
4689 s += reg->map[s[1]];
4692 else { /* see int_map[] */
4693 # if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4694 while (s < end) {
4695 p = s;
4696 t = tail;
4697 while (*p == *t) {
4698 if (t == target) return (UChar* )p;
4699 p--; t--;
4701 if (s + 1 >= end) break;
4702 s += reg->int_map[s[1]];
4704 # endif
4706 return (UChar* )NULL;
4709 /* Sunday's quick search applied to a multibyte string (ignore case) */
4710 static UChar*
4711 bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
4712 const UChar* text, const UChar* text_end,
4713 const UChar* text_range)
4715 const UChar *s, *se, *t, *end;
4716 const UChar *tail;
4717 ptrdiff_t skip, tlen1;
4718 OnigEncoding enc = reg->enc;
4719 int case_fold_flag = reg->case_fold_flag;
4721 # ifdef ONIG_DEBUG_SEARCH
4722 fprintf(stderr, "bm_search_notrev_ic: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4723 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4724 # endif
4726 tail = target_end - 1;
4727 tlen1 = tail - target;
4728 end = text_range;
4729 if (end + tlen1 > text_end)
4730 end = text_end - tlen1;
4732 s = text;
4734 if (IS_NULL(reg->int_map)) {
4735 while (s < end) {
4736 se = s + tlen1;
4737 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4738 s, se + 1))
4739 return (UChar* )s;
4740 if (s + 1 >= end) break;
4741 skip = reg->map[se[1]];
4742 t = s;
4743 do {
4744 s += enclen(enc, s, end);
4745 } while ((s - t) < skip && s < end);
4748 else {
4749 # if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4750 while (s < end) {
4751 se = s + tlen1;
4752 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4753 s, se + 1))
4754 return (UChar* )s;
4755 if (s + 1 >= end) break;
4756 skip = reg->int_map[se[1]];
4757 t = s;
4758 do {
4759 s += enclen(enc, s, end);
4760 } while ((s - t) < skip && s < end);
4762 # endif
4765 return (UChar* )NULL;
4768 /* Sunday's quick search (ignore case) */
4769 static UChar*
4770 bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
4771 const UChar* text, const UChar* text_end, const UChar* text_range)
4773 const UChar *s, *p, *end;
4774 const UChar *tail;
4775 ptrdiff_t tlen1;
4776 OnigEncoding enc = reg->enc;
4777 int case_fold_flag = reg->case_fold_flag;
4779 # ifdef ONIG_DEBUG_SEARCH
4780 fprintf(stderr, "bm_search_ic: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4781 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4782 # endif
4784 tail = target_end - 1;
4785 tlen1 = tail - target;
4786 end = text_range + tlen1;
4787 if (end > text_end)
4788 end = text_end;
4790 s = text + tlen1;
4791 if (IS_NULL(reg->int_map)) {
4792 while (s < end) {
4793 p = s - tlen1;
4794 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4795 p, s + 1))
4796 return (UChar* )p;
4797 if (s + 1 >= end) break;
4798 s += reg->map[s[1]];
4801 else { /* see int_map[] */
4802 # if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4803 while (s < end) {
4804 p = s - tlen1;
4805 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4806 p, s + 1))
4807 return (UChar* )p;
4808 if (s + 1 >= end) break;
4809 s += reg->int_map[s[1]];
4811 # endif
4813 return (UChar* )NULL;
4815 #endif /* USE_SUNDAY_QUICK_SEARCH */
4817 #ifdef USE_INT_MAP_BACKWARD
4818 static int
4819 set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
4820 int** skip)
4822 int i, len;
4824 if (IS_NULL(*skip)) {
4825 *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
4826 if (IS_NULL(*skip)) return ONIGERR_MEMORY;
4829 len = (int )(end - s);
4830 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
4831 (*skip)[i] = len;
4833 for (i = len - 1; i > 0; i--)
4834 (*skip)[s[i]] = i;
4836 return 0;
4839 static UChar*
4840 bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end,
4841 const UChar* text, const UChar* adjust_text,
4842 const UChar* text_end, const UChar* text_start)
4844 const UChar *s, *t, *p;
4846 s = text_end - (target_end - target);
4847 if (text_start < s)
4848 s = text_start;
4849 else
4850 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
4852 while (s >= text) {
4853 p = s;
4854 t = target;
4855 while (t < target_end && *p == *t) {
4856 p++; t++;
4858 if (t == target_end)
4859 return (UChar* )s;
4861 s -= reg->int_map_backward[*s];
4862 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
4865 return (UChar* )NULL;
4867 #endif
4869 static UChar*
4870 map_search(OnigEncoding enc, UChar map[],
4871 const UChar* text, const UChar* text_range, const UChar* text_end)
4873 const UChar *s = text;
4875 while (s < text_range) {
4876 if (map[*s]) return (UChar* )s;
4878 s += enclen(enc, s, text_end);
4880 return (UChar* )NULL;
4883 static UChar*
4884 map_search_backward(OnigEncoding enc, UChar map[],
4885 const UChar* text, const UChar* adjust_text,
4886 const UChar* text_start, const UChar* text_end)
4888 const UChar *s = text_start;
4890 while (s >= text) {
4891 if (map[*s]) return (UChar* )s;
4893 s = onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
4895 return (UChar* )NULL;
4898 extern OnigPosition
4899 onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region,
4900 OnigOptionType option)
4902 ptrdiff_t r;
4903 UChar *prev;
4904 OnigMatchArg msa;
4906 MATCH_ARG_INIT(msa, option, region, at, at);
4907 #ifdef USE_COMBINATION_EXPLOSION_CHECK
4909 ptrdiff_t offset = at - str;
4910 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
4912 #endif
4914 if (region) {
4915 r = onig_region_resize_clear(region, reg->num_mem + 1);
4917 else
4918 r = 0;
4920 if (r == 0) {
4921 prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at, end);
4922 r = match_at(reg, str, end,
4923 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
4924 end,
4925 #endif
4926 at, prev, &msa);
4929 MATCH_ARG_FREE(msa);
4930 return r;
4933 static int
4934 forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
4935 UChar* range, UChar** low, UChar** high, UChar** low_prev)
4937 UChar *p, *pprev = (UChar* )NULL;
4938 size_t input_len = end - str;
4940 #ifdef ONIG_DEBUG_SEARCH
4941 fprintf(stderr, "forward_search_range: str: %"PRIuPTR" (%p), end: %"PRIuPTR" (%p), s: %"PRIuPTR" (%p), range: %"PRIuPTR" (%p)\n",
4942 (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )s, s, (uintptr_t )range, range);
4943 #endif
4945 if (reg->dmin > input_len) {
4946 return 0;
4949 p = s;
4950 if (reg->dmin > 0) {
4951 if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
4952 p += reg->dmin;
4954 else {
4955 UChar *q = p + reg->dmin;
4957 if (q >= end) return 0; /* fail */
4958 while (p < q) p += enclen(reg->enc, p, end);
4962 retry:
4963 switch (reg->optimize) {
4964 case ONIG_OPTIMIZE_EXACT:
4965 p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
4966 break;
4967 case ONIG_OPTIMIZE_EXACT_IC:
4968 p = slow_search_ic(reg->enc, reg->case_fold_flag,
4969 reg->exact, reg->exact_end, p, end, range);
4970 break;
4972 case ONIG_OPTIMIZE_EXACT_BM:
4973 p = bm_search(reg, reg->exact, reg->exact_end, p, end, range);
4974 break;
4976 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
4977 p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range);
4978 break;
4980 case ONIG_OPTIMIZE_EXACT_BM_IC:
4981 p = bm_search_ic(reg, reg->exact, reg->exact_end, p, end, range);
4982 break;
4984 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC:
4985 p = bm_search_notrev_ic(reg, reg->exact, reg->exact_end, p, end, range);
4986 break;
4988 case ONIG_OPTIMIZE_MAP:
4989 p = map_search(reg->enc, reg->map, p, range, end);
4990 break;
4993 if (p && p < range) {
4994 if (p - reg->dmin < s) {
4995 retry_gate:
4996 pprev = p;
4997 p += enclen(reg->enc, p, end);
4998 goto retry;
5001 if (reg->sub_anchor) {
5002 UChar* prev;
5004 switch (reg->sub_anchor) {
5005 case ANCHOR_BEGIN_LINE:
5006 if (!ON_STR_BEGIN(p)) {
5007 prev = onigenc_get_prev_char_head(reg->enc,
5008 (pprev ? pprev : str), p, end);
5009 if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0))
5010 goto retry_gate;
5012 break;
5014 case ANCHOR_END_LINE:
5015 if (ON_STR_END(p)) {
5016 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
5017 prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
5018 (pprev ? pprev : str), p);
5019 if (prev && ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1))
5020 goto retry_gate;
5021 #endif
5023 else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1))
5024 goto retry_gate;
5025 break;
5029 if (reg->dmax == 0) {
5030 *low = p;
5031 if (low_prev) {
5032 if (*low > s)
5033 *low_prev = onigenc_get_prev_char_head(reg->enc, s, p, end);
5034 else
5035 *low_prev = onigenc_get_prev_char_head(reg->enc,
5036 (pprev ? pprev : str), p, end);
5039 else {
5040 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
5041 if (p < str + reg->dmax) {
5042 *low = (UChar* )str;
5043 if (low_prev)
5044 *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low, end);
5046 else {
5047 *low = p - reg->dmax;
5048 if (*low > s) {
5049 *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
5050 *low, end, (const UChar** )low_prev);
5051 if (low_prev && IS_NULL(*low_prev))
5052 *low_prev = onigenc_get_prev_char_head(reg->enc,
5053 (pprev ? pprev : s), *low, end);
5055 else {
5056 if (low_prev)
5057 *low_prev = onigenc_get_prev_char_head(reg->enc,
5058 (pprev ? pprev : str), *low, end);
5063 /* no needs to adjust *high, *high is used as range check only */
5064 *high = p - reg->dmin;
5066 #ifdef ONIG_DEBUG_SEARCH
5067 fprintf(stderr,
5068 "forward_search_range success: low: %"PRIdPTR", high: %"PRIdPTR", dmin: %"PRIdPTR", dmax: %"PRIdPTR"\n",
5069 *low - str, *high - str, reg->dmin, reg->dmax);
5070 #endif
5071 return 1; /* success */
5074 return 0; /* fail */
5077 #define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100
5079 static int
5080 backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
5081 UChar* s, const UChar* range, UChar* adjrange,
5082 UChar** low, UChar** high)
5084 UChar *p;
5085 size_t input_len = end - str;
5087 if (reg->dmin > input_len) {
5088 return 0;
5091 range += reg->dmin;
5092 p = s;
5094 retry:
5095 switch (reg->optimize) {
5096 case ONIG_OPTIMIZE_EXACT:
5097 exact_method:
5098 p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
5099 range, adjrange, end, p);
5100 break;
5102 case ONIG_OPTIMIZE_EXACT_IC:
5103 case ONIG_OPTIMIZE_EXACT_BM_IC:
5104 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC:
5105 p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
5106 reg->exact, reg->exact_end,
5107 range, adjrange, end, p);
5108 break;
5110 case ONIG_OPTIMIZE_EXACT_BM:
5111 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
5112 #ifdef USE_INT_MAP_BACKWARD
5113 if (IS_NULL(reg->int_map_backward)) {
5114 int r;
5115 if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD)
5116 goto exact_method;
5118 r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc,
5119 &(reg->int_map_backward));
5120 if (r) return r;
5122 p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange,
5123 end, p);
5124 #else
5125 goto exact_method;
5126 #endif
5127 break;
5129 case ONIG_OPTIMIZE_MAP:
5130 p = map_search_backward(reg->enc, reg->map, range, adjrange, p, end);
5131 break;
5134 if (p) {
5135 if (reg->sub_anchor) {
5136 UChar* prev;
5138 switch (reg->sub_anchor) {
5139 case ANCHOR_BEGIN_LINE:
5140 if (!ON_STR_BEGIN(p)) {
5141 prev = onigenc_get_prev_char_head(reg->enc, str, p, end);
5142 if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)) {
5143 p = prev;
5144 goto retry;
5147 break;
5149 case ANCHOR_END_LINE:
5150 if (ON_STR_END(p)) {
5151 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
5152 prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
5153 if (IS_NULL(prev)) goto fail;
5154 if (ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1)) {
5155 p = prev;
5156 goto retry;
5158 #endif
5160 else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1)) {
5161 p = onigenc_get_prev_char_head(reg->enc, adjrange, p, end);
5162 if (IS_NULL(p)) goto fail;
5163 goto retry;
5165 break;
5169 /* no needs to adjust *high, *high is used as range check only */
5170 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
5171 *low = p - reg->dmax;
5172 *high = p - reg->dmin;
5173 *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high, end);
5176 #ifdef ONIG_DEBUG_SEARCH
5177 fprintf(stderr, "backward_search_range: low: %d, high: %d\n",
5178 (int )(*low - str), (int )(*high - str));
5179 #endif
5180 return 1; /* success */
5183 fail:
5184 #ifdef ONIG_DEBUG_SEARCH
5185 fprintf(stderr, "backward_search_range: fail.\n");
5186 #endif
5187 return 0; /* fail */
5191 extern OnigPosition
5192 onig_search(regex_t* reg, const UChar* str, const UChar* end,
5193 const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
5195 return onig_search_gpos(reg, str, end, start, start, range, region, option);
5198 extern OnigPosition
5199 onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
5200 const UChar* global_pos,
5201 const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
5203 ptrdiff_t r;
5204 UChar *s, *prev;
5205 OnigMatchArg msa;
5206 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
5207 const UChar *orig_start = start;
5208 const UChar *orig_range = range;
5209 #endif
5211 #ifdef ONIG_DEBUG_SEARCH
5212 fprintf(stderr,
5213 "onig_search (entry point): str: %"PRIuPTR" (%p), end: %"PRIuPTR", start: %"PRIuPTR", range: %"PRIuPTR"\n",
5214 (uintptr_t )str, str, end - str, start - str, range - str);
5215 #endif
5217 if (region) {
5218 r = onig_region_resize_clear(region, reg->num_mem + 1);
5219 if (r) goto finish_no_msa;
5222 if (start > end || start < str) goto mismatch_no_msa;
5225 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
5226 # ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
5227 # define MATCH_AND_RETURN_CHECK(upper_range) \
5228 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
5229 switch (r) { \
5230 case ONIG_MISMATCH: \
5231 break; \
5232 case ONIGERR_TIMEOUT: \
5233 goto timeout; \
5234 default: \
5235 if (r >= 0) { \
5236 if (! IS_FIND_LONGEST(reg->options)) { \
5237 goto match; \
5240 else goto finish; /* error */ \
5242 # else
5243 # define MATCH_AND_RETURN_CHECK(upper_range) \
5244 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
5245 switch (r) { \
5246 case ONIG_MISMATCH: \
5247 break; \
5248 case ONIGERR_TIMEOUT: \
5249 goto timeout; \
5250 default: \
5251 if (r >= 0) { \
5252 goto match; \
5254 else goto finish; /* error */ \
5256 # endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
5257 #else
5258 # ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
5259 # define MATCH_AND_RETURN_CHECK(none) \
5260 r = match_at(reg, str, end, s, prev, &msa);\
5261 switch (r) { \
5262 case ONIG_MISMATCH: \
5263 break; \
5264 case ONIGERR_TIMEOUT: \
5265 goto timeout; \
5266 default: \
5267 if (r >= 0) { \
5268 if (! IS_FIND_LONGEST(reg->options)) { \
5269 goto match; \
5272 else goto finish; /* error */ \
5274 # else
5275 # define MATCH_AND_RETURN_CHECK(none) \
5276 r = match_at(reg, str, end, s, prev, &msa);\
5277 switch (r) { \
5278 case ONIG_MISMATCH: \
5279 break; \
5280 case ONIGERR_TIMEOUT: \
5281 goto timeout; \
5282 default: \
5283 if (r >= 0) { \
5284 goto match; \
5286 else goto finish; /* error */ \
5288 # endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
5289 #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
5292 /* anchor optimize: resume search range */
5293 if (reg->anchor != 0 && str < end) {
5294 UChar *min_semi_end, *max_semi_end;
5296 if (reg->anchor & ANCHOR_BEGIN_POSITION) {
5297 /* search start-position only */
5298 begin_position:
5299 if (range > start)
5301 if (global_pos > start)
5303 if (global_pos < range)
5304 range = global_pos + 1;
5306 else
5307 range = start + 1;
5309 else
5310 range = start;
5312 else if (reg->anchor & ANCHOR_BEGIN_BUF) {
5313 /* search str-position only */
5314 if (range > start) {
5315 if (start != str) goto mismatch_no_msa;
5316 range = str + 1;
5318 else {
5319 if (range <= str) {
5320 start = str;
5321 range = str;
5323 else
5324 goto mismatch_no_msa;
5327 else if (reg->anchor & ANCHOR_END_BUF) {
5328 min_semi_end = max_semi_end = (UChar* )end;
5330 end_buf:
5331 if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin)
5332 goto mismatch_no_msa;
5334 if (range > start) {
5335 if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) {
5336 start = min_semi_end - reg->anchor_dmax;
5337 if (start < end)
5338 start = onigenc_get_right_adjust_char_head(reg->enc, str, start, end);
5340 if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
5341 range = max_semi_end - reg->anchor_dmin + 1;
5344 if (start > range) goto mismatch_no_msa;
5345 /* If start == range, match with empty at end.
5346 Backward search is used. */
5348 else {
5349 if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) {
5350 range = min_semi_end - reg->anchor_dmax;
5352 if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) {
5353 start = max_semi_end - reg->anchor_dmin;
5354 start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start, end);
5356 if (range > start) goto mismatch_no_msa;
5359 else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
5360 UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, end, 1);
5362 max_semi_end = (UChar* )end;
5363 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
5364 min_semi_end = pre_end;
5366 #ifdef USE_CRNL_AS_LINE_TERMINATOR
5367 pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, end, 1);
5368 if (IS_NOT_NULL(pre_end) &&
5369 IS_NEWLINE_CRLF(reg->options) &&
5370 ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
5371 min_semi_end = pre_end;
5373 #endif
5374 if (min_semi_end > str && start <= min_semi_end) {
5375 goto end_buf;
5378 else {
5379 min_semi_end = (UChar* )end;
5380 goto end_buf;
5383 else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) {
5384 goto begin_position;
5387 else if (str == end) { /* empty string */
5388 static const UChar address_for_empty_string[] = "";
5390 #ifdef ONIG_DEBUG_SEARCH
5391 fprintf(stderr, "onig_search: empty string.\n");
5392 #endif
5394 if (reg->threshold_len == 0) {
5395 start = end = str = address_for_empty_string;
5396 s = (UChar* )start;
5397 prev = (UChar* )NULL;
5399 MATCH_ARG_INIT(msa, option, region, start, start);
5400 #ifdef USE_COMBINATION_EXPLOSION_CHECK
5401 msa.state_check_buff = (void* )0;
5402 msa.state_check_buff_size = 0; /* NO NEED, for valgrind */
5403 #endif
5404 MATCH_AND_RETURN_CHECK(end);
5405 goto mismatch;
5407 goto mismatch_no_msa;
5410 #ifdef ONIG_DEBUG_SEARCH
5411 fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
5412 (int )(end - str), (int )(start - str), (int )(range - str));
5413 #endif
5415 MATCH_ARG_INIT(msa, option, region, start, global_pos);
5416 #ifdef USE_COMBINATION_EXPLOSION_CHECK
5418 ptrdiff_t offset = (MIN(start, range) - str);
5419 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
5421 #endif
5423 s = (UChar* )start;
5424 if (range > start) { /* forward search */
5425 if (s > str)
5426 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
5427 else
5428 prev = (UChar* )NULL;
5430 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
5431 UChar *sch_range, *low, *high, *low_prev;
5433 sch_range = (UChar* )range;
5434 if (reg->dmax != 0) {
5435 if (reg->dmax == ONIG_INFINITE_DISTANCE)
5436 sch_range = (UChar* )end;
5437 else {
5438 sch_range += reg->dmax;
5439 if (sch_range > end) sch_range = (UChar* )end;
5443 if ((end - start) < reg->threshold_len)
5444 goto mismatch;
5446 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
5447 do {
5448 if (! forward_search_range(reg, str, end, s, sch_range,
5449 &low, &high, &low_prev)) goto mismatch;
5450 if (s < low) {
5451 s = low;
5452 prev = low_prev;
5454 while (s <= high) {
5455 MATCH_AND_RETURN_CHECK(orig_range);
5456 prev = s;
5457 s += enclen(reg->enc, s, end);
5459 } while (s < range);
5460 goto mismatch;
5462 else { /* check only. */
5463 if (! forward_search_range(reg, str, end, s, sch_range,
5464 &low, &high, (UChar** )NULL)) goto mismatch;
5466 if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
5467 do {
5468 MATCH_AND_RETURN_CHECK(orig_range);
5469 prev = s;
5470 s += enclen(reg->enc, s, end);
5472 if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) {
5473 while (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)
5474 && s < range) {
5475 prev = s;
5476 s += enclen(reg->enc, s, end);
5479 } while (s < range);
5480 goto mismatch;
5485 do {
5486 MATCH_AND_RETURN_CHECK(orig_range);
5487 prev = s;
5488 s += enclen(reg->enc, s, end);
5489 } while (s < range);
5491 if (s == range) { /* because empty match with /$/. */
5492 MATCH_AND_RETURN_CHECK(orig_range);
5495 else { /* backward search */
5496 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
5497 UChar *low, *high, *adjrange, *sch_start;
5499 if (range < end)
5500 adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range, end);
5501 else
5502 adjrange = (UChar* )end;
5504 if (reg->dmax != ONIG_INFINITE_DISTANCE &&
5505 (end - range) >= reg->threshold_len) {
5506 do {
5507 sch_start = s + reg->dmax;
5508 if (sch_start > end) sch_start = (UChar* )end;
5509 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
5510 &low, &high) <= 0)
5511 goto mismatch;
5513 if (s > high)
5514 s = high;
5516 while (s >= low) {
5517 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
5518 MATCH_AND_RETURN_CHECK(orig_start);
5519 s = prev;
5521 } while (s >= range);
5522 goto mismatch;
5524 else { /* check only. */
5525 if ((end - range) < reg->threshold_len) goto mismatch;
5527 sch_start = s;
5528 if (reg->dmax != 0) {
5529 if (reg->dmax == ONIG_INFINITE_DISTANCE)
5530 sch_start = (UChar* )end;
5531 else {
5532 sch_start += reg->dmax;
5533 if (sch_start > end) sch_start = (UChar* )end;
5534 else
5535 sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
5536 start, sch_start, end);
5539 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
5540 &low, &high) <= 0) goto mismatch;
5544 do {
5545 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
5546 MATCH_AND_RETURN_CHECK(orig_start);
5547 s = prev;
5548 } while (s >= range);
5551 mismatch:
5552 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
5553 if (IS_FIND_LONGEST(reg->options)) {
5554 if (msa.best_len >= 0) {
5555 s = msa.best_s;
5556 goto match;
5559 #endif
5560 r = ONIG_MISMATCH;
5562 finish:
5563 MATCH_ARG_FREE(msa);
5565 /* If result is mismatch and no FIND_NOT_EMPTY option,
5566 then the region is not set in match_at(). */
5567 if (IS_FIND_NOT_EMPTY(reg->options) && region) {
5568 onig_region_clear(region);
5571 #ifdef ONIG_DEBUG
5572 if (r != ONIG_MISMATCH)
5573 fprintf(stderr, "onig_search: error %"PRIdPTRDIFF"\n", r);
5574 #endif
5575 return r;
5577 mismatch_no_msa:
5578 r = ONIG_MISMATCH;
5579 finish_no_msa:
5580 #ifdef ONIG_DEBUG
5581 if (r != ONIG_MISMATCH)
5582 fprintf(stderr, "onig_search: error %"PRIdPTRDIFF"\n", r);
5583 #endif
5584 return r;
5586 match:
5587 MATCH_ARG_FREE(msa);
5588 return s - str;
5590 timeout:
5591 MATCH_ARG_FREE(msa);
5592 return ONIGERR_TIMEOUT;
5595 extern OnigPosition
5596 onig_scan(regex_t* reg, const UChar* str, const UChar* end,
5597 OnigRegion* region, OnigOptionType option,
5598 int (*scan_callback)(OnigPosition, OnigPosition, OnigRegion*, void*),
5599 void* callback_arg)
5601 OnigPosition r;
5602 OnigPosition n;
5603 int rs;
5604 const UChar* start;
5606 n = 0;
5607 start = str;
5608 while (1) {
5609 r = onig_search(reg, str, end, start, end, region, option);
5610 if (r >= 0) {
5611 rs = scan_callback(n, r, region, callback_arg);
5612 n++;
5613 if (rs != 0)
5614 return rs;
5616 if (region->end[0] == start - str) {
5617 if (start >= end) break;
5618 start += enclen(reg->enc, start, end);
5620 else
5621 start = str + region->end[0];
5623 if (start > end)
5624 break;
5626 else if (r == ONIG_MISMATCH) {
5627 break;
5629 else { /* error */
5630 return r;
5634 return n;
5637 extern OnigEncoding
5638 onig_get_encoding(const regex_t* reg)
5640 return reg->enc;
5643 extern OnigOptionType
5644 onig_get_options(const regex_t* reg)
5646 return reg->options;
5649 extern OnigCaseFoldType
5650 onig_get_case_fold_flag(const regex_t* reg)
5652 return reg->case_fold_flag;
5655 extern const OnigSyntaxType*
5656 onig_get_syntax(const regex_t* reg)
5658 return reg->syntax;
5661 extern int
5662 onig_number_of_captures(const regex_t* reg)
5664 return reg->num_mem;
5667 extern int
5668 onig_number_of_capture_histories(const regex_t* reg)
5670 #ifdef USE_CAPTURE_HISTORY
5671 int i, n;
5673 n = 0;
5674 for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
5675 if (BIT_STATUS_AT(reg->capture_history, i) != 0)
5676 n++;
5678 return n;
5679 #else
5680 return 0;
5681 #endif
5684 extern void
5685 onig_copy_encoding(OnigEncodingType *to, OnigEncoding from)
5687 *to = *from;