Ruby  2.0.0p353(2013-11-22revision43784)
regcomp.c
Go to the documentation of this file.
1 /**********************************************************************
2  regcomp.c - Onigmo (Oniguruma-mod) (regular expression library)
3 **********************************************************************/
4 /*-
5  * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6  * Copyright (c) 2011-2013 K.Takata <kentkt AT csc DOT jp>
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  * notice, this list of conditions and the following disclaimer in the
16  * documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include "regparse.h"
32 
34 
35 extern OnigCaseFoldType
37 {
39 }
40 
41 extern int
43 {
44  OnigDefaultCaseFoldFlag = case_fold_flag;
45  return 0;
46 }
47 
48 
49 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
50 static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
51 #endif
52 
53 #if 0
54 static UChar*
55 str_dup(UChar* s, UChar* end)
56 {
57  ptrdiff_t len = end - s;
58 
59  if (len > 0) {
60  UChar* r = (UChar* )xmalloc(len + 1);
62  xmemcpy(r, s, len);
63  r[len] = (UChar )0;
64  return r;
65  }
66  else return NULL;
67 }
68 #endif
69 
70 static void
72 {
73  Node c;
74  c = *a; *a = *b; *b = c;
75 
76  if (NTYPE(a) == NT_STR) {
77  StrNode* sn = NSTR(a);
78  if (sn->capa == 0) {
79  size_t len = sn->end - sn->s;
80  sn->s = sn->buf;
81  sn->end = sn->s + len;
82  }
83  }
84 
85  if (NTYPE(b) == NT_STR) {
86  StrNode* sn = NSTR(b);
87  if (sn->capa == 0) {
88  size_t len = sn->end - sn->s;
89  sn->s = sn->buf;
90  sn->end = sn->s + len;
91  }
92  }
93 }
94 
95 static OnigDistance
97 {
100  else {
101  if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
102  else return ONIG_INFINITE_DISTANCE;
103  }
104 }
105 
106 static OnigDistance
108 {
109  if (m == 0) return 0;
110 
111  if (d < ONIG_INFINITE_DISTANCE / m)
112  return d * m;
113  else
114  return ONIG_INFINITE_DISTANCE;
115 }
116 
117 static int
119 {
120  int i;
121  for (i = 0; i < BITSET_SIZE; i++) {
122  if (bs[i] != 0) return 0;
123  }
124  return 1;
125 }
126 
127 #ifdef ONIG_DEBUG
128 static int
129 onig_is_prelude(void)
130 {
131  return !rb_const_defined(rb_cThread, rb_intern_const("MUTEX_FOR_THREAD_EXCLUSIVE"));
132 }
133 
134 static int
135 bitset_on_num(BitSetRef bs)
136 {
137  int i, n;
138 
139  n = 0;
140  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
141  if (BITSET_AT(bs, i)) n++;
142  }
143  return n;
144 }
145 #endif
146 
147 extern int
149 {
150  if (size <= 0) {
151  size = 0;
152  buf->p = NULL;
153  }
154  else {
155  buf->p = (UChar* )xmalloc(size);
156  if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
157  }
158 
159  buf->alloc = (unsigned int )size;
160  buf->used = 0;
161  return 0;
162 }
163 
164 
165 #ifdef USE_SUBEXP_CALL
166 
167 static int
169 {
170  UnsetAddr* p;
171 
172  p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
174  uslist->num = 0;
175  uslist->alloc = size;
176  uslist->us = p;
177  return 0;
178 }
179 
180 static void
182 {
183  if (IS_NOT_NULL(uslist->us))
184  xfree(uslist->us);
185 }
186 
187 static int
188 unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
189 {
190  UnsetAddr* p;
191  int size;
192 
193  if (uslist->num >= uslist->alloc) {
194  size = uslist->alloc * 2;
195  p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
197  uslist->alloc = size;
198  uslist->us = p;
199  }
200 
201  uslist->us[uslist->num].offset = offset;
202  uslist->us[uslist->num].target = node;
203  uslist->num++;
204  return 0;
205 }
206 #endif /* USE_SUBEXP_CALL */
207 
208 
209 static int
210 add_opcode(regex_t* reg, int opcode)
211 {
212  BBUF_ADD1(reg, opcode);
213  return 0;
214 }
215 
216 #ifdef USE_COMBINATION_EXPLOSION_CHECK
217 static int
218 add_state_check_num(regex_t* reg, int num)
219 {
221 
222  BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
223  return 0;
224 }
225 #endif
226 
227 static int
228 add_rel_addr(regex_t* reg, int addr)
229 {
230  RelAddrType ra = (RelAddrType )addr;
231 
232  BBUF_ADD(reg, &ra, SIZE_RELADDR);
233  return 0;
234 }
235 
236 static int
237 add_abs_addr(regex_t* reg, int addr)
238 {
239  AbsAddrType ra = (AbsAddrType )addr;
240 
241  BBUF_ADD(reg, &ra, SIZE_ABSADDR);
242  return 0;
243 }
244 
245 static int
247 {
248  LengthType l = (LengthType )len;
249 
250  BBUF_ADD(reg, &l, SIZE_LENGTH);
251  return 0;
252 }
253 
254 static int
255 add_mem_num(regex_t* reg, int num)
256 {
257  MemNumType n = (MemNumType )num;
258 
259  BBUF_ADD(reg, &n, SIZE_MEMNUM);
260  return 0;
261 }
262 
263 static int
264 add_pointer(regex_t* reg, void* addr)
265 {
266  PointerType ptr = (PointerType )addr;
267 
268  BBUF_ADD(reg, &ptr, SIZE_POINTER);
269  return 0;
270 }
271 
272 static int
274 {
275  BBUF_ADD(reg, &option, SIZE_OPTION);
276  return 0;
277 }
278 
279 static int
280 add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
281 {
282  int r;
283 
284  r = add_opcode(reg, opcode);
285  if (r) return r;
286  r = add_rel_addr(reg, addr);
287  return r;
288 }
289 
290 static int
292 {
293  BBUF_ADD(reg, bytes, len);
294  return 0;
295 }
296 
297 static int
299 {
300  BBUF_ADD(reg, bs, SIZE_BITSET);
301  return 0;
302 }
303 
304 static int
305 add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
306 {
307  int r;
308 
309  r = add_opcode(reg, opcode);
310  if (r) return r;
311  r = add_option(reg, option);
312  return r;
313 }
314 
315 static int compile_length_tree(Node* node, regex_t* reg);
316 static int compile_tree(Node* node, regex_t* reg);
317 
318 
319 #define IS_NEED_STR_LEN_OP_EXACT(op) \
320  ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
321  (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
322 
323 static int
324 select_str_opcode(int mb_len, OnigDistance str_len, int ignore_case)
325 {
326  int op;
327 
328  if (ignore_case) {
329  switch (str_len) {
330  case 1: op = OP_EXACT1_IC; break;
331  default: op = OP_EXACTN_IC; break;
332  }
333  }
334  else {
335  switch (mb_len) {
336  case 1:
337  switch (str_len) {
338  case 1: op = OP_EXACT1; break;
339  case 2: op = OP_EXACT2; break;
340  case 3: op = OP_EXACT3; break;
341  case 4: op = OP_EXACT4; break;
342  case 5: op = OP_EXACT5; break;
343  default: op = OP_EXACTN; break;
344  }
345  break;
346 
347  case 2:
348  switch (str_len) {
349  case 1: op = OP_EXACTMB2N1; break;
350  case 2: op = OP_EXACTMB2N2; break;
351  case 3: op = OP_EXACTMB2N3; break;
352  default: op = OP_EXACTMB2N; break;
353  }
354  break;
355 
356  case 3:
357  op = OP_EXACTMB3N;
358  break;
359 
360  default:
361  op = OP_EXACTMBN;
362  break;
363  }
364  }
365  return op;
366 }
367 
368 static int
369 compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
370 {
371  int r;
372  int saved_num_null_check = reg->num_null_check;
373 
374  if (empty_info != 0) {
376  if (r) return r;
377  r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
378  if (r) return r;
379  reg->num_null_check++;
380  }
381 
382  r = compile_tree(node, reg);
383  if (r) return r;
384 
385  if (empty_info != 0) {
386  if (empty_info == NQ_TARGET_IS_EMPTY)
387  r = add_opcode(reg, OP_NULL_CHECK_END);
388  else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
390  else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
392 
393  if (r) return r;
394  r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
395  }
396  return r;
397 }
398 
399 #ifdef USE_SUBEXP_CALL
400 static int
402 {
403  int r;
404 
405  r = add_opcode(reg, OP_CALL);
406  if (r) return r;
408  node->target);
409  if (r) return r;
410  r = add_abs_addr(reg, 0 /*dummy addr.*/);
411  return r;
412 }
413 #endif
414 
415 static int
416 compile_tree_n_times(Node* node, int n, regex_t* reg)
417 {
418  int i, r;
419 
420  for (i = 0; i < n; i++) {
421  r = compile_tree(node, reg);
422  if (r) return r;
423  }
424  return 0;
425 }
426 
427 static int
429  regex_t* reg ARG_UNUSED, int ignore_case)
430 {
431  int len;
432  int op = select_str_opcode(mb_len, str_len, ignore_case);
433 
434  len = SIZE_OPCODE;
435 
436  if (op == OP_EXACTMBN) len += SIZE_LENGTH;
437  if (IS_NEED_STR_LEN_OP_EXACT(op))
438  len += SIZE_LENGTH;
439 
440  len += mb_len * (int )str_len;
441  return len;
442 }
443 
444 static int
445 add_compile_string(UChar* s, int mb_len, OnigDistance str_len,
446  regex_t* reg, int ignore_case)
447 {
448  int op = select_str_opcode(mb_len, str_len, ignore_case);
449  add_opcode(reg, op);
450 
451  if (op == OP_EXACTMBN)
452  add_length(reg, mb_len);
453 
454  if (IS_NEED_STR_LEN_OP_EXACT(op)) {
455  if (op == OP_EXACTN_IC)
456  add_length(reg, mb_len * str_len);
457  else
458  add_length(reg, str_len);
459  }
460 
461  add_bytes(reg, s, mb_len * str_len);
462  return 0;
463 }
464 
465 
466 static int
468 {
469  int rlen, r, len, prev_len, slen, ambig;
470  OnigEncoding enc = reg->enc;
471  UChar *p, *prev;
472  StrNode* sn;
473 
474  sn = NSTR(node);
475  if (sn->end <= sn->s)
476  return 0;
477 
478  ambig = NSTRING_IS_AMBIG(node);
479 
480  p = prev = sn->s;
481  prev_len = enclen(enc, p, sn->end);
482  p += prev_len;
483  slen = 1;
484  rlen = 0;
485 
486  for (; p < sn->end; ) {
487  len = enclen(enc, p, sn->end);
488  if (len == prev_len) {
489  slen++;
490  }
491  else {
492  r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
493  rlen += r;
494  prev = p;
495  slen = 1;
496  prev_len = len;
497  }
498  p += len;
499  }
500  r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
501  rlen += r;
502  return rlen;
503 }
504 
505 static int
507 {
508  if (sn->end <= sn->s)
509  return 0;
510 
511  return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
512 }
513 
514 static int
516 {
517  int r, len, prev_len, slen, ambig;
518  OnigEncoding enc = reg->enc;
519  UChar *p, *prev, *end;
520  StrNode* sn;
521 
522  sn = NSTR(node);
523  if (sn->end <= sn->s)
524  return 0;
525 
526  end = sn->end;
527  ambig = NSTRING_IS_AMBIG(node);
528 
529  p = prev = sn->s;
530  prev_len = enclen(enc, p, end);
531  p += prev_len;
532  slen = 1;
533 
534  for (; p < end; ) {
535  len = enclen(enc, p, end);
536  if (len == prev_len) {
537  slen++;
538  }
539  else {
540  r = add_compile_string(prev, prev_len, slen, reg, ambig);
541  if (r) return r;
542 
543  prev = p;
544  slen = 1;
545  prev_len = len;
546  }
547 
548  p += len;
549  }
550  return add_compile_string(prev, prev_len, slen, reg, ambig);
551 }
552 
553 static int
555 {
556  if (sn->end <= sn->s)
557  return 0;
558 
559  return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
560 }
561 
562 static int
564 {
565 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
566  add_length(reg, mbuf->used);
567  return add_bytes(reg, mbuf->p, mbuf->used);
568 #else
569  int r, pad_size;
571 
572  GET_ALIGNMENT_PAD_SIZE(p, pad_size);
573  add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
574  if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
575 
576  r = add_bytes(reg, mbuf->p, mbuf->used);
577 
578  /* padding for return value from compile_length_cclass_node() to be fix. */
579  pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
580  if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
581  return r;
582 #endif
583 }
584 
585 static int
587 {
588  int len;
589 
590  if (IS_NCCLASS_SHARE(cc)) {
591  len = SIZE_OPCODE + SIZE_POINTER;
592  return len;
593  }
594 
595  if (IS_NULL(cc->mbuf)) {
596  len = SIZE_OPCODE + SIZE_BITSET;
597  }
598  else {
599  if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
600  len = SIZE_OPCODE;
601  }
602  else {
603  len = SIZE_OPCODE + SIZE_BITSET;
604  }
605 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
606  len += SIZE_LENGTH + cc->mbuf->used;
607 #else
608  len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
609 #endif
610  }
611 
612  return len;
613 }
614 
615 static int
617 {
618  int r;
619 
620  if (IS_NCCLASS_SHARE(cc)) {
622  r = add_pointer(reg, cc);
623  return r;
624  }
625 
626  if (IS_NULL(cc->mbuf)) {
627  if (IS_NCCLASS_NOT(cc))
629  else
630  add_opcode(reg, OP_CCLASS);
631 
632  r = add_bitset(reg, cc->bs);
633  }
634  else {
635  if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
636  if (IS_NCCLASS_NOT(cc))
638  else
639  add_opcode(reg, OP_CCLASS_MB);
640 
641  r = add_multi_byte_cclass(cc->mbuf, reg);
642  }
643  else {
644  if (IS_NCCLASS_NOT(cc))
646  else
648 
649  r = add_bitset(reg, cc->bs);
650  if (r) return r;
651  r = add_multi_byte_cclass(cc->mbuf, reg);
652  }
653  }
654 
655  return r;
656 }
657 
658 static int
659 entry_repeat_range(regex_t* reg, int id, int lower, int upper)
660 {
661 #define REPEAT_RANGE_ALLOC 4
662 
664 
665  if (reg->repeat_range_alloc == 0) {
668  reg->repeat_range = p;
670  }
671  else if (reg->repeat_range_alloc <= id) {
672  int n;
675  sizeof(OnigRepeatRange) * n);
677  reg->repeat_range = p;
678  reg->repeat_range_alloc = n;
679  }
680  else {
681  p = reg->repeat_range;
682  }
683 
684  p[id].lower = lower;
685  p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
686  return 0;
687 }
688 
689 static int
690 compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
691  regex_t* reg)
692 {
693  int r;
694  int num_repeat = reg->num_repeat;
695 
696  r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
697  if (r) return r;
698  r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
699  reg->num_repeat++;
700  if (r) return r;
701  r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
702  if (r) return r;
703 
704  r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
705  if (r) return r;
706 
707  r = compile_tree_empty_check(qn->target, reg, empty_info);
708  if (r) return r;
709 
710  if (
711 #ifdef USE_SUBEXP_CALL
712  reg->num_call > 0 ||
713 #endif
716  }
717  else {
719  }
720  if (r) return r;
721  r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
722  return r;
723 }
724 
725 static int
727 {
728  if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
729  NTYPE(qn->target) == NT_CANY)
730  return 1;
731  else
732  return 0;
733 }
734 
735 #define QUANTIFIER_EXPAND_LIMIT_SIZE 50
736 #define CKN_ON (ckn > 0)
737 
738 #ifdef USE_COMBINATION_EXPLOSION_CHECK
739 
740 static int
742 {
743  int len, mod_tlen, cklen;
744  int ckn;
745  int infinite = IS_REPEAT_INFINITE(qn->upper);
746  int empty_info = qn->target_empty_info;
747  int tlen = compile_length_tree(qn->target, reg);
748 
749  if (tlen < 0) return tlen;
750 
751  ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
752 
753  cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
754 
755  /* anychar repeat */
756  if (NTYPE(qn->target) == NT_CANY) {
757  if (qn->greedy && infinite) {
758  if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
759  return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
760  else
761  return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
762  }
763  }
764 
765  if (empty_info != 0)
766  mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
767  else
768  mod_tlen = tlen;
769 
770  if (infinite && qn->lower <= 1) {
771  if (qn->greedy) {
772  if (qn->lower == 1)
773  len = SIZE_OP_JUMP;
774  else
775  len = 0;
776 
777  len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
778  }
779  else {
780  if (qn->lower == 0)
781  len = SIZE_OP_JUMP;
782  else
783  len = 0;
784 
785  len += mod_tlen + SIZE_OP_PUSH + cklen;
786  }
787  }
788  else if (qn->upper == 0) {
789  if (qn->is_refered != 0) /* /(?<n>..){0}/ */
790  len = SIZE_OP_JUMP + tlen;
791  else
792  len = 0;
793  }
794  else if (qn->upper == 1 && qn->greedy) {
795  if (qn->lower == 0) {
796  if (CKN_ON) {
797  len = SIZE_OP_STATE_CHECK_PUSH + tlen;
798  }
799  else {
800  len = SIZE_OP_PUSH + tlen;
801  }
802  }
803  else {
804  len = tlen;
805  }
806  }
807  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
808  len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
809  }
810  else {
811  len = SIZE_OP_REPEAT_INC
812  + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
813  if (CKN_ON)
814  len += SIZE_OP_STATE_CHECK;
815  }
816 
817  return len;
818 }
819 
820 static int
822 {
823  int r, mod_tlen;
824  int ckn;
825  int infinite = IS_REPEAT_INFINITE(qn->upper);
826  int empty_info = qn->target_empty_info;
827  int tlen = compile_length_tree(qn->target, reg);
828 
829  if (tlen < 0) return tlen;
830 
831  ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
832 
833  if (is_anychar_star_quantifier(qn)) {
834  r = compile_tree_n_times(qn->target, qn->lower, reg);
835  if (r) return r;
836  if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
837  if (IS_MULTILINE(reg->options))
839  else
841  if (r) return r;
842  if (CKN_ON) {
843  r = add_state_check_num(reg, ckn);
844  if (r) return r;
845  }
846 
847  return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
848  }
849  else {
850  if (IS_MULTILINE(reg->options)) {
851  r = add_opcode(reg, (CKN_ON ?
853  : OP_ANYCHAR_ML_STAR));
854  }
855  else {
856  r = add_opcode(reg, (CKN_ON ?
858  : OP_ANYCHAR_STAR));
859  }
860  if (r) return r;
861  if (CKN_ON)
862  r = add_state_check_num(reg, ckn);
863 
864  return r;
865  }
866  }
867 
868  if (empty_info != 0)
869  mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
870  else
871  mod_tlen = tlen;
872 
873  if (infinite && qn->lower <= 1) {
874  if (qn->greedy) {
875  if (qn->lower == 1) {
876  r = add_opcode_rel_addr(reg, OP_JUMP,
877  (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
878  if (r) return r;
879  }
880 
881  if (CKN_ON) {
883  if (r) return r;
884  r = add_state_check_num(reg, ckn);
885  if (r) return r;
886  r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
887  }
888  else {
889  r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
890  }
891  if (r) return r;
892  r = compile_tree_empty_check(qn->target, reg, empty_info);
893  if (r) return r;
894  r = add_opcode_rel_addr(reg, OP_JUMP,
895  -(mod_tlen + (int )SIZE_OP_JUMP
896  + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
897  }
898  else {
899  if (qn->lower == 0) {
900  r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
901  if (r) return r;
902  }
903  r = compile_tree_empty_check(qn->target, reg, empty_info);
904  if (r) return r;
905  if (CKN_ON) {
907  if (r) return r;
908  r = add_state_check_num(reg, ckn);
909  if (r) return r;
910  r = add_rel_addr(reg,
911  -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
912  }
913  else
914  r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
915  }
916  }
917  else if (qn->upper == 0) {
918  if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
919  r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
920  if (r) return r;
921  r = compile_tree(qn->target, reg);
922  }
923  else
924  r = 0;
925  }
926  else if (qn->upper == 1 && qn->greedy) {
927  if (qn->lower == 0) {
928  if (CKN_ON) {
930  if (r) return r;
931  r = add_state_check_num(reg, ckn);
932  if (r) return r;
933  r = add_rel_addr(reg, tlen);
934  }
935  else {
936  r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
937  }
938  if (r) return r;
939  }
940 
941  r = compile_tree(qn->target, reg);
942  }
943  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
944  if (CKN_ON) {
946  if (r) return r;
947  r = add_state_check_num(reg, ckn);
948  if (r) return r;
949  r = add_rel_addr(reg, SIZE_OP_JUMP);
950  }
951  else {
953  }
954 
955  if (r) return r;
956  r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
957  if (r) return r;
958  r = compile_tree(qn->target, reg);
959  }
960  else {
961  r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
962  if (CKN_ON) {
963  if (r) return r;
964  r = add_opcode(reg, OP_STATE_CHECK);
965  if (r) return r;
966  r = add_state_check_num(reg, ckn);
967  }
968  }
969  return r;
970 }
971 
972 #else /* USE_COMBINATION_EXPLOSION_CHECK */
973 
974 static int
976 {
977  int len, mod_tlen;
978  int infinite = IS_REPEAT_INFINITE(qn->upper);
979  int empty_info = qn->target_empty_info;
980  int tlen = compile_length_tree(qn->target, reg);
981 
982  if (tlen < 0) return tlen;
983 
984  /* anychar repeat */
985  if (NTYPE(qn->target) == NT_CANY) {
986  if (qn->greedy && infinite) {
987  if (IS_NOT_NULL(qn->next_head_exact))
988  return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
989  else
990  return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
991  }
992  }
993 
994  if (empty_info != 0)
995  mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
996  else
997  mod_tlen = tlen;
998 
999  if (infinite &&
1000  (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1001  if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1002  len = SIZE_OP_JUMP;
1003  }
1004  else {
1005  len = tlen * qn->lower;
1006  }
1007 
1008  if (qn->greedy) {
1009  if (IS_NOT_NULL(qn->head_exact))
1010  len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
1011  else if (IS_NOT_NULL(qn->next_head_exact))
1012  len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
1013  else
1014  len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
1015  }
1016  else
1017  len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
1018  }
1019  else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
1020  len = SIZE_OP_JUMP + tlen;
1021  }
1022  else if (!infinite && qn->greedy &&
1023  (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1025  len = tlen * qn->lower;
1026  len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
1027  }
1028  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1029  len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
1030  }
1031  else {
1032  len = SIZE_OP_REPEAT_INC
1033  + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
1034  }
1035 
1036  return len;
1037 }
1038 
1039 static int
1041 {
1042  int i, r, mod_tlen;
1043  int infinite = IS_REPEAT_INFINITE(qn->upper);
1044  int empty_info = qn->target_empty_info;
1045  int tlen = compile_length_tree(qn->target, reg);
1046 
1047  if (tlen < 0) return tlen;
1048 
1049  if (is_anychar_star_quantifier(qn)) {
1050  r = compile_tree_n_times(qn->target, qn->lower, reg);
1051  if (r) return r;
1052  if (IS_NOT_NULL(qn->next_head_exact)) {
1053  if (IS_MULTILINE(reg->options))
1055  else
1057  if (r) return r;
1058  return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1059  }
1060  else {
1061  if (IS_MULTILINE(reg->options))
1062  return add_opcode(reg, OP_ANYCHAR_ML_STAR);
1063  else
1064  return add_opcode(reg, OP_ANYCHAR_STAR);
1065  }
1066  }
1067 
1068  if (empty_info != 0)
1069  mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1070  else
1071  mod_tlen = tlen;
1072 
1073  if (infinite &&
1074  (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1075  if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1076  if (qn->greedy) {
1077  if (IS_NOT_NULL(qn->head_exact))
1079  else if (IS_NOT_NULL(qn->next_head_exact))
1081  else
1083  }
1084  else {
1086  }
1087  if (r) return r;
1088  }
1089  else {
1090  r = compile_tree_n_times(qn->target, qn->lower, reg);
1091  if (r) return r;
1092  }
1093 
1094  if (qn->greedy) {
1095  if (IS_NOT_NULL(qn->head_exact)) {
1097  mod_tlen + SIZE_OP_JUMP);
1098  if (r) return r;
1099  add_bytes(reg, NSTR(qn->head_exact)->s, 1);
1100  r = compile_tree_empty_check(qn->target, reg, empty_info);
1101  if (r) return r;
1102  r = add_opcode_rel_addr(reg, OP_JUMP,
1103  -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
1104  }
1105  else if (IS_NOT_NULL(qn->next_head_exact)) {
1107  mod_tlen + SIZE_OP_JUMP);
1108  if (r) return r;
1109  add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1110  r = compile_tree_empty_check(qn->target, reg, empty_info);
1111  if (r) return r;
1112  r = add_opcode_rel_addr(reg, OP_JUMP,
1113  -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
1114  }
1115  else {
1116  r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
1117  if (r) return r;
1118  r = compile_tree_empty_check(qn->target, reg, empty_info);
1119  if (r) return r;
1120  r = add_opcode_rel_addr(reg, OP_JUMP,
1121  -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
1122  }
1123  }
1124  else {
1125  r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
1126  if (r) return r;
1127  r = compile_tree_empty_check(qn->target, reg, empty_info);
1128  if (r) return r;
1129  r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
1130  }
1131  }
1132  else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
1133  r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1134  if (r) return r;
1135  r = compile_tree(qn->target, reg);
1136  }
1137  else if (!infinite && qn->greedy &&
1138  (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1140  int n = qn->upper - qn->lower;
1141 
1142  r = compile_tree_n_times(qn->target, qn->lower, reg);
1143  if (r) return r;
1144 
1145  for (i = 0; i < n; i++) {
1146  r = add_opcode_rel_addr(reg, OP_PUSH,
1147  (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
1148  if (r) return r;
1149  r = compile_tree(qn->target, reg);
1150  if (r) return r;
1151  }
1152  }
1153  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1155  if (r) return r;
1156  r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1157  if (r) return r;
1158  r = compile_tree(qn->target, reg);
1159  }
1160  else {
1161  r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
1162  }
1163  return r;
1164 }
1165 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
1166 
1167 static int
1169 {
1170  int tlen;
1171  OnigOptionType prev = reg->options;
1172 
1173  reg->options = node->option;
1174  tlen = compile_length_tree(node->target, reg);
1175  reg->options = prev;
1176 
1177  if (tlen < 0) return tlen;
1178 
1179  if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1181  + tlen + SIZE_OP_SET_OPTION;
1182  }
1183  else
1184  return tlen;
1185 }
1186 
1187 static int
1189 {
1190  int r;
1191  OnigOptionType prev = reg->options;
1192 
1193  if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1194  r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
1195  if (r) return r;
1196  r = add_opcode_option(reg, OP_SET_OPTION, prev);
1197  if (r) return r;
1198  r = add_opcode(reg, OP_FAIL);
1199  if (r) return r;
1200  }
1201 
1202  reg->options = node->option;
1203  r = compile_tree(node->target, reg);
1204  reg->options = prev;
1205 
1206  if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1207  if (r) return r;
1208  r = add_opcode_option(reg, OP_SET_OPTION, prev);
1209  }
1210  return r;
1211 }
1212 
1213 static int
1215 {
1216  int len;
1217  int tlen;
1218 
1219  if (node->type == ENCLOSE_OPTION)
1220  return compile_length_option_node(node, reg);
1221 
1222  if (node->target) {
1223  tlen = compile_length_tree(node->target, reg);
1224  if (tlen < 0) return tlen;
1225  }
1226  else
1227  tlen = 0;
1228 
1229  switch (node->type) {
1230  case ENCLOSE_MEMORY:
1231 #ifdef USE_SUBEXP_CALL
1232  if (IS_ENCLOSE_CALLED(node)) {
1233  len = SIZE_OP_MEMORY_START_PUSH + tlen
1235  if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1236  len += (IS_ENCLOSE_RECURSION(node)
1238  else
1239  len += (IS_ENCLOSE_RECURSION(node)
1241  }
1242  else
1243 #endif
1244  {
1245  if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1247  else
1248  len = SIZE_OP_MEMORY_START;
1249 
1250  len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1252  }
1253  break;
1254 
1257  QtfrNode* qn = NQTFR(node->target);
1258  tlen = compile_length_tree(qn->target, reg);
1259  if (tlen < 0) return tlen;
1260 
1261  len = tlen * qn->lower
1262  + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
1263  }
1264  else {
1266  }
1267  break;
1268 
1269  case ENCLOSE_CONDITION:
1270  len = SIZE_OP_CONDITION;
1271  if (NTYPE(node->target) == NT_ALT) {
1272  Node* x = node->target;
1273 
1274  tlen = compile_length_tree(NCAR(x), reg); /* yes-node */
1275  if (tlen < 0) return tlen;
1276  len += tlen + SIZE_OP_JUMP;
1277  if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
1278  x = NCDR(x);
1279  tlen = compile_length_tree(NCAR(x), reg); /* no-node */
1280  if (tlen < 0) return tlen;
1281  len += tlen;
1282  if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
1283  }
1284  else {
1285  return ONIGERR_PARSER_BUG;
1286  }
1287  break;
1288 
1289  default:
1290  return ONIGERR_TYPE_BUG;
1291  break;
1292  }
1293 
1294  return len;
1295 }
1296 
1297 static int get_char_length_tree(Node* node, regex_t* reg, int* len);
1298 
1299 static int
1301 {
1302  int r, len;
1303 
1304  if (node->type == ENCLOSE_OPTION)
1305  return compile_option_node(node, reg);
1306 
1307  switch (node->type) {
1308  case ENCLOSE_MEMORY:
1309 #ifdef USE_SUBEXP_CALL
1310  if (IS_ENCLOSE_CALLED(node)) {
1311  r = add_opcode(reg, OP_CALL);
1312  if (r) return r;
1314  node->state |= NST_ADDR_FIXED;
1315  r = add_abs_addr(reg, (int )node->call_addr);
1316  if (r) return r;
1317  len = compile_length_tree(node->target, reg);
1319  if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1320  len += (IS_ENCLOSE_RECURSION(node)
1322  else
1323  len += (IS_ENCLOSE_RECURSION(node)
1325 
1326  r = add_opcode_rel_addr(reg, OP_JUMP, len);
1327  if (r) return r;
1328  }
1329 #endif
1330  if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1331  r = add_opcode(reg, OP_MEMORY_START_PUSH);
1332  else
1333  r = add_opcode(reg, OP_MEMORY_START);
1334  if (r) return r;
1335  r = add_mem_num(reg, node->regnum);
1336  if (r) return r;
1337  r = compile_tree(node->target, reg);
1338  if (r) return r;
1339 #ifdef USE_SUBEXP_CALL
1340  if (IS_ENCLOSE_CALLED(node)) {
1341  if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1342  r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1344  else
1345  r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1347 
1348  if (r) return r;
1349  r = add_mem_num(reg, node->regnum);
1350  if (r) return r;
1351  r = add_opcode(reg, OP_RETURN);
1352  }
1353  else
1354 #endif
1355  {
1356  if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1357  r = add_opcode(reg, OP_MEMORY_END_PUSH);
1358  else
1359  r = add_opcode(reg, OP_MEMORY_END);
1360  if (r) return r;
1361  r = add_mem_num(reg, node->regnum);
1362  }
1363  break;
1364 
1367  QtfrNode* qn = NQTFR(node->target);
1368  r = compile_tree_n_times(qn->target, qn->lower, reg);
1369  if (r) return r;
1370 
1371  len = compile_length_tree(qn->target, reg);
1372  if (len < 0) return len;
1373 
1375  if (r) return r;
1376  r = compile_tree(qn->target, reg);
1377  if (r) return r;
1378  r = add_opcode(reg, OP_POP);
1379  if (r) return r;
1380  r = add_opcode_rel_addr(reg, OP_JUMP,
1381  -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
1382  }
1383  else {
1384  r = add_opcode(reg, OP_PUSH_STOP_BT);
1385  if (r) return r;
1386  r = compile_tree(node->target, reg);
1387  if (r) return r;
1388  r = add_opcode(reg, OP_POP_STOP_BT);
1389  }
1390  break;
1391 
1392  case ENCLOSE_CONDITION:
1393  r = add_opcode(reg, OP_CONDITION);
1394  if (r) return r;
1395  r = add_mem_num(reg, node->regnum);
1396  if (r) return r;
1397 
1398  if (NTYPE(node->target) == NT_ALT) {
1399  Node* x = node->target;
1400  int len2;
1401 
1402  len = compile_length_tree(NCAR(x), reg); /* yes-node */
1403  if (len < 0) return len;
1404  if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
1405  x = NCDR(x);
1406  len2 = compile_length_tree(NCAR(x), reg); /* no-node */
1407  if (len2 < 0) return len2;
1408  if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
1409 
1410  x = node->target;
1411  r = add_rel_addr(reg, len + SIZE_OP_JUMP);
1412  if (r) return r;
1413  r = compile_tree(NCAR(x), reg); /* yes-node */
1414  if (r) return r;
1415  r = add_opcode_rel_addr(reg, OP_JUMP, len2);
1416  if (r) return r;
1417  x = NCDR(x);
1418  r = compile_tree(NCAR(x), reg); /* no-node */
1419  }
1420  else {
1421  return ONIGERR_PARSER_BUG;
1422  }
1423  break;
1424 
1425  default:
1426  return ONIGERR_TYPE_BUG;
1427  break;
1428  }
1429 
1430  return r;
1431 }
1432 
1433 static int
1435 {
1436  int len;
1437  int tlen = 0;
1438 
1439  if (node->target) {
1440  tlen = compile_length_tree(node->target, reg);
1441  if (tlen < 0) return tlen;
1442  }
1443 
1444  switch (node->type) {
1445  case ANCHOR_PREC_READ:
1446  len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
1447  break;
1448  case ANCHOR_PREC_READ_NOT:
1449  len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
1450  break;
1451  case ANCHOR_LOOK_BEHIND:
1452  len = SIZE_OP_LOOK_BEHIND + tlen;
1453  break;
1456  break;
1457 
1458  default:
1459  len = SIZE_OPCODE;
1460  break;
1461  }
1462 
1463  return len;
1464 }
1465 
1466 static int
1468 {
1469  int r, len;
1470 
1471  switch (node->type) {
1472  case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;
1473  case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break;
1474  case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break;
1475  case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break;
1476  case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;
1477  case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
1478 
1479  /* used for implicit anchor optimization: /.*a/ ==> /(?:^|\G).*a/ */
1480  case ANCHOR_ANYCHAR_STAR: r = add_opcode(reg, OP_BEGIN_POS_OR_LINE); break;
1481 
1482  case ANCHOR_WORD_BOUND:
1483  if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BOUND);
1484  else r = add_opcode(reg, OP_WORD_BOUND);
1485  break;
1486  case ANCHOR_NOT_WORD_BOUND:
1487  if (node->ascii_range) r = add_opcode(reg, OP_NOT_ASCII_WORD_BOUND);
1488  else r = add_opcode(reg, OP_NOT_WORD_BOUND);
1489  break;
1490 #ifdef USE_WORD_BEGIN_END
1491  case ANCHOR_WORD_BEGIN:
1492  if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BEGIN);
1493  else r = add_opcode(reg, OP_WORD_BEGIN);
1494  break;
1495  case ANCHOR_WORD_END:
1496  if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_END);
1497  else r = add_opcode(reg, OP_WORD_END);
1498  break;
1499 #endif
1500  case ANCHOR_KEEP: r = add_opcode(reg, OP_KEEP); break;
1501 
1502  case ANCHOR_PREC_READ:
1503  r = add_opcode(reg, OP_PUSH_POS);
1504  if (r) return r;
1505  r = compile_tree(node->target, reg);
1506  if (r) return r;
1507  r = add_opcode(reg, OP_POP_POS);
1508  break;
1509 
1510  case ANCHOR_PREC_READ_NOT:
1511  len = compile_length_tree(node->target, reg);
1512  if (len < 0) return len;
1514  if (r) return r;
1515  r = compile_tree(node->target, reg);
1516  if (r) return r;
1517  r = add_opcode(reg, OP_FAIL_POS);
1518  break;
1519 
1520  case ANCHOR_LOOK_BEHIND:
1521  {
1522  int n;
1523  r = add_opcode(reg, OP_LOOK_BEHIND);
1524  if (r) return r;
1525  if (node->char_len < 0) {
1526  r = get_char_length_tree(node->target, reg, &n);
1528  }
1529  else
1530  n = node->char_len;
1531  r = add_length(reg, n);
1532  if (r) return r;
1533  r = compile_tree(node->target, reg);
1534  }
1535  break;
1536 
1538  {
1539  int n;
1540  len = compile_length_tree(node->target, reg);
1543  if (r) return r;
1544  if (node->char_len < 0) {
1545  r = get_char_length_tree(node->target, reg, &n);
1547  }
1548  else
1549  n = node->char_len;
1550  r = add_length(reg, n);
1551  if (r) return r;
1552  r = compile_tree(node->target, reg);
1553  if (r) return r;
1555  }
1556  break;
1557 
1558  default:
1559  return ONIGERR_TYPE_BUG;
1560  break;
1561  }
1562 
1563  return r;
1564 }
1565 
1566 static int
1568 {
1569  int len, type, r;
1570 
1571  type = NTYPE(node);
1572  switch (type) {
1573  case NT_LIST:
1574  len = 0;
1575  do {
1576  r = compile_length_tree(NCAR(node), reg);
1577  if (r < 0) return r;
1578  len += r;
1579  } while (IS_NOT_NULL(node = NCDR(node)));
1580  r = len;
1581  break;
1582 
1583  case NT_ALT:
1584  {
1585  int n;
1586 
1587  n = r = 0;
1588  do {
1589  r += compile_length_tree(NCAR(node), reg);
1590  n++;
1591  } while (IS_NOT_NULL(node = NCDR(node)));
1592  r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
1593  }
1594  break;
1595 
1596  case NT_STR:
1597  if (NSTRING_IS_RAW(node))
1598  r = compile_length_string_raw_node(NSTR(node), reg);
1599  else
1600  r = compile_length_string_node(node, reg);
1601  break;
1602 
1603  case NT_CCLASS:
1604  r = compile_length_cclass_node(NCCLASS(node), reg);
1605  break;
1606 
1607  case NT_CTYPE:
1608  case NT_CANY:
1609  r = SIZE_OPCODE;
1610  break;
1611 
1612  case NT_BREF:
1613  {
1614  BRefNode* br = NBREF(node);
1615 
1616 #ifdef USE_BACKREF_WITH_LEVEL
1617  if (IS_BACKREF_NEST_LEVEL(br)) {
1619  SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1620  }
1621  else
1622 #endif
1623  if (br->back_num == 1) {
1624  r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
1626  }
1627  else {
1628  r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1629  }
1630  }
1631  break;
1632 
1633 #ifdef USE_SUBEXP_CALL
1634  case NT_CALL:
1635  r = SIZE_OP_CALL;
1636  break;
1637 #endif
1638 
1639  case NT_QTFR:
1640  r = compile_length_quantifier_node(NQTFR(node), reg);
1641  break;
1642 
1643  case NT_ENCLOSE:
1644  r = compile_length_enclose_node(NENCLOSE(node), reg);
1645  break;
1646 
1647  case NT_ANCHOR:
1648  r = compile_length_anchor_node(NANCHOR(node), reg);
1649  break;
1650 
1651  default:
1652  return ONIGERR_TYPE_BUG;
1653  break;
1654  }
1655 
1656  return r;
1657 }
1658 
1659 static int
1661 {
1662  int n, type, len, pos, r = 0;
1663 
1664  type = NTYPE(node);
1665  switch (type) {
1666  case NT_LIST:
1667  do {
1668  r = compile_tree(NCAR(node), reg);
1669  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1670  break;
1671 
1672  case NT_ALT:
1673  {
1674  Node* x = node;
1675  len = 0;
1676  do {
1677  len += compile_length_tree(NCAR(x), reg);
1678  if (NCDR(x) != NULL) {
1679  len += SIZE_OP_PUSH + SIZE_OP_JUMP;
1680  }
1681  } while (IS_NOT_NULL(x = NCDR(x)));
1682  pos = reg->used + len; /* goal position */
1683 
1684  do {
1685  len = compile_length_tree(NCAR(node), reg);
1686  if (IS_NOT_NULL(NCDR(node))) {
1687  r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
1688  if (r) break;
1689  }
1690  r = compile_tree(NCAR(node), reg);
1691  if (r) break;
1692  if (IS_NOT_NULL(NCDR(node))) {
1693  len = pos - (reg->used + SIZE_OP_JUMP);
1694  r = add_opcode_rel_addr(reg, OP_JUMP, len);
1695  if (r) break;
1696  }
1697  } while (IS_NOT_NULL(node = NCDR(node)));
1698  }
1699  break;
1700 
1701  case NT_STR:
1702  if (NSTRING_IS_RAW(node))
1703  r = compile_string_raw_node(NSTR(node), reg);
1704  else
1705  r = compile_string_node(node, reg);
1706  break;
1707 
1708  case NT_CCLASS:
1709  r = compile_cclass_node(NCCLASS(node), reg);
1710  break;
1711 
1712  case NT_CTYPE:
1713  {
1714  int op;
1715 
1716  switch (NCTYPE(node)->ctype) {
1717  case ONIGENC_CTYPE_WORD:
1718  if (NCTYPE(node)->ascii_range != 0) {
1719  if (NCTYPE(node)->not != 0) op = OP_NOT_ASCII_WORD;
1720  else op = OP_ASCII_WORD;
1721  }
1722  else {
1723  if (NCTYPE(node)->not != 0) op = OP_NOT_WORD;
1724  else op = OP_WORD;
1725  }
1726  break;
1727  default:
1728  return ONIGERR_TYPE_BUG;
1729  break;
1730  }
1731  r = add_opcode(reg, op);
1732  }
1733  break;
1734 
1735  case NT_CANY:
1736  if (IS_MULTILINE(reg->options))
1737  r = add_opcode(reg, OP_ANYCHAR_ML);
1738  else
1739  r = add_opcode(reg, OP_ANYCHAR);
1740  break;
1741 
1742  case NT_BREF:
1743  {
1744  BRefNode* br = NBREF(node);
1745 
1746 #ifdef USE_BACKREF_WITH_LEVEL
1747  if (IS_BACKREF_NEST_LEVEL(br)) {
1749  if (r) return r;
1750  r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
1751  if (r) return r;
1752  r = add_length(reg, br->nest_level);
1753  if (r) return r;
1754 
1755  goto add_bacref_mems;
1756  }
1757  else
1758 #endif
1759  if (br->back_num == 1) {
1760  n = br->back_static[0];
1761  if (IS_IGNORECASE(reg->options)) {
1762  r = add_opcode(reg, OP_BACKREFN_IC);
1763  if (r) return r;
1764  r = add_mem_num(reg, n);
1765  }
1766  else {
1767  switch (n) {
1768  case 1: r = add_opcode(reg, OP_BACKREF1); break;
1769  case 2: r = add_opcode(reg, OP_BACKREF2); break;
1770  default:
1771  r = add_opcode(reg, OP_BACKREFN);
1772  if (r) return r;
1773  r = add_mem_num(reg, n);
1774  break;
1775  }
1776  }
1777  }
1778  else {
1779  int i;
1780  int* p;
1781 
1782  if (IS_IGNORECASE(reg->options)) {
1783  r = add_opcode(reg, OP_BACKREF_MULTI_IC);
1784  }
1785  else {
1786  r = add_opcode(reg, OP_BACKREF_MULTI);
1787  }
1788  if (r) return r;
1789 
1790 #ifdef USE_BACKREF_WITH_LEVEL
1791  add_bacref_mems:
1792 #endif
1793  r = add_length(reg, br->back_num);
1794  if (r) return r;
1795  p = BACKREFS_P(br);
1796  for (i = br->back_num - 1; i >= 0; i--) {
1797  r = add_mem_num(reg, p[i]);
1798  if (r) return r;
1799  }
1800  }
1801  }
1802  break;
1803 
1804 #ifdef USE_SUBEXP_CALL
1805  case NT_CALL:
1806  r = compile_call(NCALL(node), reg);
1807  break;
1808 #endif
1809 
1810  case NT_QTFR:
1811  r = compile_quantifier_node(NQTFR(node), reg);
1812  break;
1813 
1814  case NT_ENCLOSE:
1815  r = compile_enclose_node(NENCLOSE(node), reg);
1816  break;
1817 
1818  case NT_ANCHOR:
1819  r = compile_anchor_node(NANCHOR(node), reg);
1820  break;
1821 
1822  default:
1823 #ifdef ONIG_DEBUG
1824  fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
1825 #endif
1826  break;
1827  }
1828 
1829  return r;
1830 }
1831 
1832 #ifdef USE_NAMED_GROUP
1833 
1834 static int
1835 noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
1836 {
1837  int r = 0;
1838  Node* node = *plink;
1839 
1840  switch (NTYPE(node)) {
1841  case NT_LIST:
1842  case NT_ALT:
1843  do {
1844  r = noname_disable_map(&(NCAR(node)), map, counter);
1845  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1846  break;
1847 
1848  case NT_QTFR:
1849  {
1850  Node** ptarget = &(NQTFR(node)->target);
1851  Node* old = *ptarget;
1852  r = noname_disable_map(ptarget, map, counter);
1853  if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
1854  onig_reduce_nested_quantifier(node, *ptarget);
1855  }
1856  }
1857  break;
1858 
1859  case NT_ENCLOSE:
1860  {
1861  EncloseNode* en = NENCLOSE(node);
1862  if (en->type == ENCLOSE_MEMORY) {
1863  if (IS_ENCLOSE_NAMED_GROUP(en)) {
1864  (*counter)++;
1865  map[en->regnum].new_val = *counter;
1866  en->regnum = *counter;
1867  r = noname_disable_map(&(en->target), map, counter);
1868  }
1869  else {
1870  *plink = en->target;
1871  en->target = NULL_NODE;
1872  onig_node_free(node);
1873  r = noname_disable_map(plink, map, counter);
1874  }
1875  }
1876  else
1877  r = noname_disable_map(&(en->target), map, counter);
1878  }
1879  break;
1880 
1881  case NT_ANCHOR:
1882  {
1883  AnchorNode* an = NANCHOR(node);
1884  switch (an->type) {
1885  case ANCHOR_PREC_READ:
1886  case ANCHOR_PREC_READ_NOT:
1887  case ANCHOR_LOOK_BEHIND:
1889  r = noname_disable_map(&(an->target), map, counter);
1890  break;
1891  }
1892  }
1893  break;
1894 
1895  default:
1896  break;
1897  }
1898 
1899  return r;
1900 }
1901 
1902 static int
1904 {
1905  int i, pos, n, old_num;
1906  int *backs;
1907  BRefNode* bn = NBREF(node);
1908 
1909  if (! IS_BACKREF_NAME_REF(bn))
1911 
1912  old_num = bn->back_num;
1913  if (IS_NULL(bn->back_dynamic))
1914  backs = bn->back_static;
1915  else
1916  backs = bn->back_dynamic;
1917 
1918  for (i = 0, pos = 0; i < old_num; i++) {
1919  n = map[backs[i]].new_val;
1920  if (n > 0) {
1921  backs[pos] = n;
1922  pos++;
1923  }
1924  }
1925 
1926  bn->back_num = pos;
1927  return 0;
1928 }
1929 
1930 static int
1932 {
1933  int r = 0;
1934 
1935  switch (NTYPE(node)) {
1936  case NT_LIST:
1937  case NT_ALT:
1938  do {
1939  r = renumber_by_map(NCAR(node), map);
1940  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1941  break;
1942  case NT_QTFR:
1943  r = renumber_by_map(NQTFR(node)->target, map);
1944  break;
1945  case NT_ENCLOSE:
1946  {
1947  EncloseNode* en = NENCLOSE(node);
1948  if (en->type == ENCLOSE_CONDITION)
1949  en->regnum = map[en->regnum].new_val;
1950  r = renumber_by_map(en->target, map);
1951  }
1952  break;
1953 
1954  case NT_BREF:
1955  r = renumber_node_backref(node, map);
1956  break;
1957 
1958  case NT_ANCHOR:
1959  {
1960  AnchorNode* an = NANCHOR(node);
1961  switch (an->type) {
1962  case ANCHOR_PREC_READ:
1963  case ANCHOR_PREC_READ_NOT:
1964  case ANCHOR_LOOK_BEHIND:
1966  r = renumber_by_map(an->target, map);
1967  break;
1968  }
1969  }
1970  break;
1971 
1972  default:
1973  break;
1974  }
1975 
1976  return r;
1977 }
1978 
1979 static int
1981 {
1982  int r = 0;
1983 
1984  switch (NTYPE(node)) {
1985  case NT_LIST:
1986  case NT_ALT:
1987  do {
1988  r = numbered_ref_check(NCAR(node));
1989  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1990  break;
1991  case NT_QTFR:
1992  r = numbered_ref_check(NQTFR(node)->target);
1993  break;
1994  case NT_ENCLOSE:
1995  r = numbered_ref_check(NENCLOSE(node)->target);
1996  break;
1997 
1998  case NT_BREF:
1999  if (! IS_BACKREF_NAME_REF(NBREF(node)))
2001  break;
2002 
2003  default:
2004  break;
2005  }
2006 
2007  return r;
2008 }
2009 
2010 static int
2012 {
2013  int r, i, pos, counter;
2014  BitStatusType loc;
2015  GroupNumRemap* map;
2016 
2017  map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
2019  for (i = 1; i <= env->num_mem; i++) {
2020  map[i].new_val = 0;
2021  }
2022  counter = 0;
2023  r = noname_disable_map(root, map, &counter);
2024  if (r != 0) return r;
2025 
2026  r = renumber_by_map(*root, map);
2027  if (r != 0) return r;
2028 
2029  for (i = 1, pos = 1; i <= env->num_mem; i++) {
2030  if (map[i].new_val > 0) {
2031  SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
2032  pos++;
2033  }
2034  }
2035 
2036  loc = env->capture_history;
2038  for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
2039  if (BIT_STATUS_AT(loc, i)) {
2041  }
2042  }
2043 
2044  env->num_mem = env->num_named;
2045  reg->num_mem = env->num_named;
2046 
2047  return onig_renumber_name_table(reg, map);
2048 }
2049 #endif /* USE_NAMED_GROUP */
2050 
2051 #ifdef USE_SUBEXP_CALL
2052 static int
2054 {
2055  int i, offset;
2056  EncloseNode* en;
2057  AbsAddrType addr;
2058 
2059  for (i = 0; i < uslist->num; i++) {
2060  en = NENCLOSE(uslist->us[i].target);
2061  if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
2062  addr = en->call_addr;
2063  offset = uslist->us[i].offset;
2064 
2065  BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
2066  }
2067  return 0;
2068 }
2069 #endif
2070 
2071 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2072 static int
2074 {
2075  int r = 0;
2076 
2077  switch (NTYPE(node)) {
2078  case NT_LIST:
2079  case NT_ALT:
2080  {
2081  int v;
2082  do {
2084  if (v > r) r = v;
2085  } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
2086  }
2087  break;
2088 
2089 #ifdef USE_SUBEXP_CALL
2090  case NT_CALL:
2091  if (IS_CALL_RECURSION(NCALL(node))) {
2092  return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
2093  }
2094  else
2095  r = quantifiers_memory_node_info(NCALL(node)->target);
2096  break;
2097 #endif
2098 
2099  case NT_QTFR:
2100  {
2101  QtfrNode* qn = NQTFR(node);
2102  if (qn->upper != 0) {
2104  }
2105  }
2106  break;
2107 
2108  case NT_ENCLOSE:
2109  {
2110  EncloseNode* en = NENCLOSE(node);
2111  switch (en->type) {
2112  case ENCLOSE_MEMORY:
2113  return NQ_TARGET_IS_EMPTY_MEM;
2114  break;
2115 
2116  case ENCLOSE_OPTION:
2118  case ENCLOSE_CONDITION:
2120  break;
2121  default:
2122  break;
2123  }
2124  }
2125  break;
2126 
2127  case NT_BREF:
2128  case NT_STR:
2129  case NT_CTYPE:
2130  case NT_CCLASS:
2131  case NT_CANY:
2132  case NT_ANCHOR:
2133  default:
2134  break;
2135  }
2136 
2137  return r;
2138 }
2139 #endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
2140 
2141 static int
2143 {
2144  OnigDistance tmin;
2145  int r = 0;
2146 
2147  *min = 0;
2148  switch (NTYPE(node)) {
2149  case NT_BREF:
2150  {
2151  int i;
2152  int* backs;
2153  Node** nodes = SCANENV_MEM_NODES(env);
2154  BRefNode* br = NBREF(node);
2155  if (br->state & NST_RECURSION) break;
2156 
2157  backs = BACKREFS_P(br);
2158  if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2159  r = get_min_match_length(nodes[backs[0]], min, env);
2160  if (r != 0) break;
2161  for (i = 1; i < br->back_num; i++) {
2162  if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2163  r = get_min_match_length(nodes[backs[i]], &tmin, env);
2164  if (r != 0) break;
2165  if (*min > tmin) *min = tmin;
2166  }
2167  }
2168  break;
2169 
2170 #ifdef USE_SUBEXP_CALL
2171  case NT_CALL:
2172  if (IS_CALL_RECURSION(NCALL(node))) {
2173  EncloseNode* en = NENCLOSE(NCALL(node)->target);
2174  if (IS_ENCLOSE_MIN_FIXED(en))
2175  *min = en->min_len;
2176  }
2177  else
2178  r = get_min_match_length(NCALL(node)->target, min, env);
2179  break;
2180 #endif
2181 
2182  case NT_LIST:
2183  do {
2184  r = get_min_match_length(NCAR(node), &tmin, env);
2185  if (r == 0) *min += tmin;
2186  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2187  break;
2188 
2189  case NT_ALT:
2190  {
2191  Node *x, *y;
2192  y = node;
2193  do {
2194  x = NCAR(y);
2195  r = get_min_match_length(x, &tmin, env);
2196  if (r != 0) break;
2197  if (y == node) *min = tmin;
2198  else if (*min > tmin) *min = tmin;
2199  } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
2200  }
2201  break;
2202 
2203  case NT_STR:
2204  {
2205  StrNode* sn = NSTR(node);
2206  *min = sn->end - sn->s;
2207  }
2208  break;
2209 
2210  case NT_CTYPE:
2211  *min = 1;
2212  break;
2213 
2214  case NT_CCLASS:
2215  case NT_CANY:
2216  *min = 1;
2217  break;
2218 
2219  case NT_QTFR:
2220  {
2221  QtfrNode* qn = NQTFR(node);
2222 
2223  if (qn->lower > 0) {
2224  r = get_min_match_length(qn->target, min, env);
2225  if (r == 0)
2226  *min = distance_multiply(*min, qn->lower);
2227  }
2228  }
2229  break;
2230 
2231  case NT_ENCLOSE:
2232  {
2233  EncloseNode* en = NENCLOSE(node);
2234  switch (en->type) {
2235  case ENCLOSE_MEMORY:
2236 #ifdef USE_SUBEXP_CALL
2237  if (IS_ENCLOSE_MIN_FIXED(en))
2238  *min = en->min_len;
2239  else {
2240  r = get_min_match_length(en->target, min, env);
2241  if (r == 0) {
2242  en->min_len = *min;
2244  }
2245  }
2246  break;
2247 #endif
2248  case ENCLOSE_OPTION:
2250  case ENCLOSE_CONDITION:
2251  r = get_min_match_length(en->target, min, env);
2252  break;
2253  }
2254  }
2255  break;
2256 
2257  case NT_ANCHOR:
2258  default:
2259  break;
2260  }
2261 
2262  return r;
2263 }
2264 
2265 static int
2267 {
2268  OnigDistance tmax;
2269  int r = 0;
2270 
2271  *max = 0;
2272  switch (NTYPE(node)) {
2273  case NT_LIST:
2274  do {
2275  r = get_max_match_length(NCAR(node), &tmax, env);
2276  if (r == 0)
2277  *max = distance_add(*max, tmax);
2278  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2279  break;
2280 
2281  case NT_ALT:
2282  do {
2283  r = get_max_match_length(NCAR(node), &tmax, env);
2284  if (r == 0 && *max < tmax) *max = tmax;
2285  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2286  break;
2287 
2288  case NT_STR:
2289  {
2290  StrNode* sn = NSTR(node);
2291  *max = sn->end - sn->s;
2292  }
2293  break;
2294 
2295  case NT_CTYPE:
2296  *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2297  break;
2298 
2299  case NT_CCLASS:
2300  case NT_CANY:
2301  *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2302  break;
2303 
2304  case NT_BREF:
2305  {
2306  int i;
2307  int* backs;
2308  Node** nodes = SCANENV_MEM_NODES(env);
2309  BRefNode* br = NBREF(node);
2310  if (br->state & NST_RECURSION) {
2311  *max = ONIG_INFINITE_DISTANCE;
2312  break;
2313  }
2314  backs = BACKREFS_P(br);
2315  for (i = 0; i < br->back_num; i++) {
2316  if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2317  r = get_max_match_length(nodes[backs[i]], &tmax, env);
2318  if (r != 0) break;
2319  if (*max < tmax) *max = tmax;
2320  }
2321  }
2322  break;
2323 
2324 #ifdef USE_SUBEXP_CALL
2325  case NT_CALL:
2326  if (! IS_CALL_RECURSION(NCALL(node)))
2327  r = get_max_match_length(NCALL(node)->target, max, env);
2328  else
2329  *max = ONIG_INFINITE_DISTANCE;
2330  break;
2331 #endif
2332 
2333  case NT_QTFR:
2334  {
2335  QtfrNode* qn = NQTFR(node);
2336 
2337  if (qn->upper != 0) {
2338  r = get_max_match_length(qn->target, max, env);
2339  if (r == 0 && *max != 0) {
2340  if (! IS_REPEAT_INFINITE(qn->upper))
2341  *max = distance_multiply(*max, qn->upper);
2342  else
2343  *max = ONIG_INFINITE_DISTANCE;
2344  }
2345  }
2346  }
2347  break;
2348 
2349  case NT_ENCLOSE:
2350  {
2351  EncloseNode* en = NENCLOSE(node);
2352  switch (en->type) {
2353  case ENCLOSE_MEMORY:
2354 #ifdef USE_SUBEXP_CALL
2355  if (IS_ENCLOSE_MAX_FIXED(en))
2356  *max = en->max_len;
2357  else {
2358  r = get_max_match_length(en->target, max, env);
2359  if (r == 0) {
2360  en->max_len = *max;
2362  }
2363  }
2364  break;
2365 #endif
2366  case ENCLOSE_OPTION:
2368  case ENCLOSE_CONDITION:
2369  r = get_max_match_length(en->target, max, env);
2370  break;
2371  }
2372  }
2373  break;
2374 
2375  case NT_ANCHOR:
2376  default:
2377  break;
2378  }
2379 
2380  return r;
2381 }
2382 
2383 #define GET_CHAR_LEN_VARLEN -1
2384 #define GET_CHAR_LEN_TOP_ALT_VARLEN -2
2385 
2386 /* fixed size pattern node only */
2387 static int
2388 get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
2389 {
2390  int tlen;
2391  int r = 0;
2392 
2393  level++;
2394  *len = 0;
2395  switch (NTYPE(node)) {
2396  case NT_LIST:
2397  do {
2398  r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2399  if (r == 0)
2400  *len = (int )distance_add(*len, tlen);
2401  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2402  break;
2403 
2404  case NT_ALT:
2405  {
2406  int tlen2;
2407  int varlen = 0;
2408 
2409  r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2410  while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
2411  r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
2412  if (r == 0) {
2413  if (tlen != tlen2)
2414  varlen = 1;
2415  }
2416  }
2417  if (r == 0) {
2418  if (varlen != 0) {
2419  if (level == 1)
2421  else
2422  r = GET_CHAR_LEN_VARLEN;
2423  }
2424  else
2425  *len = tlen;
2426  }
2427  }
2428  break;
2429 
2430  case NT_STR:
2431  {
2432  StrNode* sn = NSTR(node);
2433  UChar *s = sn->s;
2434  while (s < sn->end) {
2435  s += enclen(reg->enc, s, sn->end);
2436  (*len)++;
2437  }
2438  }
2439  break;
2440 
2441  case NT_QTFR:
2442  {
2443  QtfrNode* qn = NQTFR(node);
2444  if (qn->lower == qn->upper) {
2445  r = get_char_length_tree1(qn->target, reg, &tlen, level);
2446  if (r == 0)
2447  *len = (int )distance_multiply(tlen, qn->lower);
2448  }
2449  else
2450  r = GET_CHAR_LEN_VARLEN;
2451  }
2452  break;
2453 
2454 #ifdef USE_SUBEXP_CALL
2455  case NT_CALL:
2456  if (! IS_CALL_RECURSION(NCALL(node)))
2457  r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
2458  else
2459  r = GET_CHAR_LEN_VARLEN;
2460  break;
2461 #endif
2462 
2463  case NT_CTYPE:
2464  *len = 1;
2465  break;
2466 
2467  case NT_CCLASS:
2468  case NT_CANY:
2469  *len = 1;
2470  break;
2471 
2472  case NT_ENCLOSE:
2473  {
2474  EncloseNode* en = NENCLOSE(node);
2475  switch (en->type) {
2476  case ENCLOSE_MEMORY:
2477 #ifdef USE_SUBEXP_CALL
2478  if (IS_ENCLOSE_CLEN_FIXED(en))
2479  *len = en->char_len;
2480  else {
2481  r = get_char_length_tree1(en->target, reg, len, level);
2482  if (r == 0) {
2483  en->char_len = *len;
2485  }
2486  }
2487  break;
2488 #endif
2489  case ENCLOSE_OPTION:
2491  case ENCLOSE_CONDITION:
2492  r = get_char_length_tree1(en->target, reg, len, level);
2493  break;
2494  default:
2495  break;
2496  }
2497  }
2498  break;
2499 
2500  case NT_ANCHOR:
2501  break;
2502 
2503  default:
2504  r = GET_CHAR_LEN_VARLEN;
2505  break;
2506  }
2507 
2508  return r;
2509 }
2510 
2511 static int
2512 get_char_length_tree(Node* node, regex_t* reg, int* len)
2513 {
2514  return get_char_length_tree1(node, reg, len, 0);
2515 }
2516 
2517 /* x is not included y ==> 1 : 0 */
2518 static int
2520 {
2521  int i;
2522  OnigDistance len;
2523  OnigCodePoint code;
2524  UChar *p;
2525  int ytype;
2526 
2527  retry:
2528  ytype = NTYPE(y);
2529  switch (NTYPE(x)) {
2530  case NT_CTYPE:
2531  {
2532  switch (ytype) {
2533  case NT_CTYPE:
2534  if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
2535  NCTYPE(y)->not != NCTYPE(x)->not &&
2536  NCTYPE(y)->ascii_range == NCTYPE(x)->ascii_range)
2537  return 1;
2538  else
2539  return 0;
2540  break;
2541 
2542  case NT_CCLASS:
2543  swap:
2544  {
2545  Node* tmp;
2546  tmp = x; x = y; y = tmp;
2547  goto retry;
2548  }
2549  break;
2550 
2551  case NT_STR:
2552  goto swap;
2553  break;
2554 
2555  default:
2556  break;
2557  }
2558  }
2559  break;
2560 
2561  case NT_CCLASS:
2562  {
2563  CClassNode* xc = NCCLASS(x);
2564  switch (ytype) {
2565  case NT_CTYPE:
2566  switch (NCTYPE(y)->ctype) {
2567  case ONIGENC_CTYPE_WORD:
2568  if (NCTYPE(y)->not == 0) {
2569  if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
2570  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2571  if (BITSET_AT(xc->bs, i)) {
2572  if (NCTYPE(y)->ascii_range) {
2573  if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
2574  }
2575  else {
2576  if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0;
2577  }
2578  }
2579  }
2580  return 1;
2581  }
2582  return 0;
2583  }
2584  else {
2585  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2586  int is_word;
2587  if (NCTYPE(y)->ascii_range)
2588  is_word = IS_CODE_SB_WORD(reg->enc, i);
2589  else
2590  is_word = ONIGENC_IS_CODE_WORD(reg->enc, i);
2591  if (! is_word) {
2592  if (!IS_NCCLASS_NOT(xc)) {
2593  if (BITSET_AT(xc->bs, i))
2594  return 0;
2595  }
2596  else {
2597  if (! BITSET_AT(xc->bs, i))
2598  return 0;
2599  }
2600  }
2601  }
2602  return 1;
2603  }
2604  break;
2605 
2606  default:
2607  break;
2608  }
2609  break;
2610 
2611  case NT_CCLASS:
2612  {
2613  int v;
2614  CClassNode* yc = NCCLASS(y);
2615 
2616  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2617  v = BITSET_AT(xc->bs, i);
2618  if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
2619  (v == 0 && IS_NCCLASS_NOT(xc))) {
2620  v = BITSET_AT(yc->bs, i);
2621  if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
2622  (v == 0 && IS_NCCLASS_NOT(yc)))
2623  return 0;
2624  }
2625  }
2626  if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
2627  (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
2628  return 1;
2629  return 0;
2630  }
2631  break;
2632 
2633  case NT_STR:
2634  goto swap;
2635  break;
2636 
2637  default:
2638  break;
2639  }
2640  }
2641  break;
2642 
2643  case NT_STR:
2644  {
2645  StrNode* xs = NSTR(x);
2646  if (NSTRING_LEN(x) == 0)
2647  break;
2648 
2649  switch (ytype) {
2650  case NT_CTYPE:
2651  switch (NCTYPE(y)->ctype) {
2652  case ONIGENC_CTYPE_WORD:
2653  if (NCTYPE(y)->ascii_range) {
2654  if (ONIGENC_IS_MBC_ASCII_WORD(reg->enc, xs->s, xs->end))
2655  return NCTYPE(y)->not;
2656  else
2657  return !(NCTYPE(y)->not);
2658  }
2659  else {
2660  if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
2661  return NCTYPE(y)->not;
2662  else
2663  return !(NCTYPE(y)->not);
2664  }
2665  break;
2666  default:
2667  break;
2668  }
2669  break;
2670 
2671  case NT_CCLASS:
2672  {
2673  CClassNode* cc = NCCLASS(y);
2674 
2675  code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
2676  xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
2677  return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
2678  }
2679  break;
2680 
2681  case NT_STR:
2682  {
2683  UChar *q;
2684  StrNode* ys = NSTR(y);
2685  len = NSTRING_LEN(x);
2686  if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
2687  if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
2688  /* tiny version */
2689  return 0;
2690  }
2691  else {
2692  for (i = 0, p = ys->s, q = xs->s; (OnigDistance )i < len; i++, p++, q++) {
2693  if (*p != *q) return 1;
2694  }
2695  }
2696  }
2697  break;
2698 
2699  default:
2700  break;
2701  }
2702  }
2703  break;
2704 
2705  default:
2706  break;
2707  }
2708 
2709  return 0;
2710 }
2711 
2712 static Node*
2713 get_head_value_node(Node* node, int exact, regex_t* reg)
2714 {
2715  Node* n = NULL_NODE;
2716 
2717  switch (NTYPE(node)) {
2718  case NT_BREF:
2719  case NT_ALT:
2720  case NT_CANY:
2721 #ifdef USE_SUBEXP_CALL
2722  case NT_CALL:
2723 #endif
2724  break;
2725 
2726  case NT_CTYPE:
2727  case NT_CCLASS:
2728  if (exact == 0) {
2729  n = node;
2730  }
2731  break;
2732 
2733  case NT_LIST:
2734  n = get_head_value_node(NCAR(node), exact, reg);
2735  break;
2736 
2737  case NT_STR:
2738  {
2739  StrNode* sn = NSTR(node);
2740 
2741  if (sn->end <= sn->s)
2742  break;
2743 
2744  if (exact != 0 &&
2745  !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
2746  }
2747  else {
2748  n = node;
2749  }
2750  }
2751  break;
2752 
2753  case NT_QTFR:
2754  {
2755  QtfrNode* qn = NQTFR(node);
2756  if (qn->lower > 0) {
2757  if (IS_NOT_NULL(qn->head_exact))
2758  n = qn->head_exact;
2759  else
2760  n = get_head_value_node(qn->target, exact, reg);
2761  }
2762  }
2763  break;
2764 
2765  case NT_ENCLOSE:
2766  {
2767  EncloseNode* en = NENCLOSE(node);
2768  switch (en->type) {
2769  case ENCLOSE_OPTION:
2770  {
2772 
2773  reg->options = NENCLOSE(node)->option;
2774  n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
2775  reg->options = options;
2776  }
2777  break;
2778 
2779  case ENCLOSE_MEMORY:
2781  case ENCLOSE_CONDITION:
2782  n = get_head_value_node(en->target, exact, reg);
2783  break;
2784  }
2785  }
2786  break;
2787 
2788  case NT_ANCHOR:
2789  if (NANCHOR(node)->type == ANCHOR_PREC_READ)
2790  n = get_head_value_node(NANCHOR(node)->target, exact, reg);
2791  break;
2792 
2793  default:
2794  break;
2795  }
2796 
2797  return n;
2798 }
2799 
2800 static int
2801 check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
2802 {
2803  int type, r = 0;
2804 
2805  type = NTYPE(node);
2806  if ((NTYPE2BIT(type) & type_mask) == 0)
2807  return 1;
2808 
2809  switch (type) {
2810  case NT_LIST:
2811  case NT_ALT:
2812  do {
2813  r = check_type_tree(NCAR(node), type_mask, enclose_mask,
2814  anchor_mask);
2815  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2816  break;
2817 
2818  case NT_QTFR:
2819  r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
2820  anchor_mask);
2821  break;
2822 
2823  case NT_ENCLOSE:
2824  {
2825  EncloseNode* en = NENCLOSE(node);
2826  if ((en->type & enclose_mask) == 0)
2827  return 1;
2828 
2829  r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
2830  }
2831  break;
2832 
2833  case NT_ANCHOR:
2834  type = NANCHOR(node)->type;
2835  if ((type & anchor_mask) == 0)
2836  return 1;
2837 
2838  if (NANCHOR(node)->target)
2839  r = check_type_tree(NANCHOR(node)->target,
2840  type_mask, enclose_mask, anchor_mask);
2841  break;
2842 
2843  default:
2844  break;
2845  }
2846  return r;
2847 }
2848 
2849 #ifdef USE_SUBEXP_CALL
2850 
2851 #define RECURSION_EXIST 1
2852 #define RECURSION_INFINITE 2
2853 
2854 static int
2856 {
2857  int type;
2858  int r = 0;
2859 
2860  type = NTYPE(node);
2861  switch (type) {
2862  case NT_LIST:
2863  {
2864  Node *x;
2865  OnigDistance min;
2866  int ret;
2867 
2868  x = node;
2869  do {
2870  ret = subexp_inf_recursive_check(NCAR(x), env, head);
2871  if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2872  r |= ret;
2873  if (head) {
2874  ret = get_min_match_length(NCAR(x), &min, env);
2875  if (ret != 0) return ret;
2876  if (min != 0) head = 0;
2877  }
2878  } while (IS_NOT_NULL(x = NCDR(x)));
2879  }
2880  break;
2881 
2882  case NT_ALT:
2883  {
2884  int ret;
2885  r = RECURSION_EXIST;
2886  do {
2887  ret = subexp_inf_recursive_check(NCAR(node), env, head);
2888  if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2889  r &= ret;
2890  } while (IS_NOT_NULL(node = NCDR(node)));
2891  }
2892  break;
2893 
2894  case NT_QTFR:
2895  r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
2896  if (r == RECURSION_EXIST) {
2897  if (NQTFR(node)->lower == 0) r = 0;
2898  }
2899  break;
2900 
2901  case NT_ANCHOR:
2902  {
2903  AnchorNode* an = NANCHOR(node);
2904  switch (an->type) {
2905  case ANCHOR_PREC_READ:
2906  case ANCHOR_PREC_READ_NOT:
2907  case ANCHOR_LOOK_BEHIND:
2909  r = subexp_inf_recursive_check(an->target, env, head);
2910  break;
2911  }
2912  }
2913  break;
2914 
2915  case NT_CALL:
2916  r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
2917  break;
2918 
2919  case NT_ENCLOSE:
2920  if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
2921  return 0;
2922  else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2923  return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
2924  else {
2926  r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
2928  }
2929  break;
2930 
2931  default:
2932  break;
2933  }
2934 
2935  return r;
2936 }
2937 
2938 static int
2940 {
2941  int type;
2942  int r = 0;
2943 
2944  type = NTYPE(node);
2945  switch (type) {
2946  case NT_LIST:
2947  case NT_ALT:
2948  do {
2949  r = subexp_inf_recursive_check_trav(NCAR(node), env);
2950  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2951  break;
2952 
2953  case NT_QTFR:
2954  r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
2955  break;
2956 
2957  case NT_ANCHOR:
2958  {
2959  AnchorNode* an = NANCHOR(node);
2960  switch (an->type) {
2961  case ANCHOR_PREC_READ:
2962  case ANCHOR_PREC_READ_NOT:
2963  case ANCHOR_LOOK_BEHIND:
2966  break;
2967  }
2968  }
2969  break;
2970 
2971  case NT_ENCLOSE:
2972  {
2973  EncloseNode* en = NENCLOSE(node);
2974 
2975  if (IS_ENCLOSE_RECURSION(en)) {
2977  r = subexp_inf_recursive_check(en->target, env, 1);
2978  if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
2980  }
2982  }
2983 
2984  break;
2985 
2986  default:
2987  break;
2988  }
2989 
2990  return r;
2991 }
2992 
2993 static int
2995 {
2996  int r = 0;
2997 
2998  switch (NTYPE(node)) {
2999  case NT_LIST:
3000  case NT_ALT:
3001  do {
3002  r |= subexp_recursive_check(NCAR(node));
3003  } while (IS_NOT_NULL(node = NCDR(node)));
3004  break;
3005 
3006  case NT_QTFR:
3007  r = subexp_recursive_check(NQTFR(node)->target);
3008  break;
3009 
3010  case NT_ANCHOR:
3011  {
3012  AnchorNode* an = NANCHOR(node);
3013  switch (an->type) {
3014  case ANCHOR_PREC_READ:
3015  case ANCHOR_PREC_READ_NOT:
3016  case ANCHOR_LOOK_BEHIND:
3018  r = subexp_recursive_check(an->target);
3019  break;
3020  }
3021  }
3022  break;
3023 
3024  case NT_CALL:
3025  r = subexp_recursive_check(NCALL(node)->target);
3026  if (r != 0) SET_CALL_RECURSION(node);
3027  break;
3028 
3029  case NT_ENCLOSE:
3030  if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
3031  return 0;
3032  else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
3033  return 1; /* recursion */
3034  else {
3036  r = subexp_recursive_check(NENCLOSE(node)->target);
3038  }
3039  break;
3040 
3041  default:
3042  break;
3043  }
3044 
3045  return r;
3046 }
3047 
3048 
3049 static int
3051 {
3052 #define FOUND_CALLED_NODE 1
3053 
3054  int type;
3055  int r = 0;
3056 
3057  type = NTYPE(node);
3058  switch (type) {
3059  case NT_LIST:
3060  case NT_ALT:
3061  {
3062  int ret;
3063  do {
3064  ret = subexp_recursive_check_trav(NCAR(node), env);
3065  if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
3066  else if (ret < 0) return ret;
3067  } while (IS_NOT_NULL(node = NCDR(node)));
3068  }
3069  break;
3070 
3071  case NT_QTFR:
3072  r = subexp_recursive_check_trav(NQTFR(node)->target, env);
3073  if (NQTFR(node)->upper == 0) {
3074  if (r == FOUND_CALLED_NODE)
3075  NQTFR(node)->is_refered = 1;
3076  }
3077  break;
3078 
3079  case NT_ANCHOR:
3080  {
3081  AnchorNode* an = NANCHOR(node);
3082  switch (an->type) {
3083  case ANCHOR_PREC_READ:
3084  case ANCHOR_PREC_READ_NOT:
3085  case ANCHOR_LOOK_BEHIND:
3087  r = subexp_recursive_check_trav(an->target, env);
3088  break;
3089  }
3090  }
3091  break;
3092 
3093  case NT_ENCLOSE:
3094  {
3095  EncloseNode* en = NENCLOSE(node);
3096 
3097  if (! IS_ENCLOSE_RECURSION(en)) {
3098  if (IS_ENCLOSE_CALLED(en)) {
3100  r = subexp_recursive_check(en->target);
3101  if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
3103  }
3104  }
3105  r = subexp_recursive_check_trav(en->target, env);
3106  if (IS_ENCLOSE_CALLED(en))
3107  r |= FOUND_CALLED_NODE;
3108  }
3109  break;
3110 
3111  default:
3112  break;
3113  }
3114 
3115  return r;
3116 }
3117 
3118 static int
3120 {
3121  int type;
3122  int r = 0;
3123 
3124  type = NTYPE(node);
3125  switch (type) {
3126  case NT_LIST:
3127  do {
3128  r = setup_subexp_call(NCAR(node), env);
3129  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3130  break;
3131 
3132  case NT_ALT:
3133  do {
3134  r = setup_subexp_call(NCAR(node), env);
3135  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3136  break;
3137 
3138  case NT_QTFR:
3139  r = setup_subexp_call(NQTFR(node)->target, env);
3140  break;
3141  case NT_ENCLOSE:
3142  r = setup_subexp_call(NENCLOSE(node)->target, env);
3143  break;
3144 
3145  case NT_CALL:
3146  {
3147  CallNode* cn = NCALL(node);
3148  Node** nodes = SCANENV_MEM_NODES(env);
3149 
3150  if (cn->group_num != 0) {
3151  int gnum = cn->group_num;
3152 
3153 #ifdef USE_NAMED_GROUP
3154  if (env->num_named > 0 &&
3158  }
3159 #endif
3160  if (gnum > env->num_mem) {
3164  }
3165 
3166 #ifdef USE_NAMED_GROUP
3167  set_call_attr:
3168 #endif
3169  cn->target = nodes[cn->group_num];
3170  if (IS_NULL(cn->target)) {
3174  }
3177  cn->unset_addr_list = env->unset_addr_list;
3178  }
3179 #ifdef USE_NAMED_GROUP
3180 #ifdef USE_PERL_SUBEXP_CALL
3181  else if (cn->name == cn->name_end) {
3182  goto set_call_attr;
3183  }
3184 #endif
3185  else {
3186  int *refs;
3187 
3188  int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
3189  &refs);
3190  if (n <= 0) {
3194  }
3195  else if (n > 1 &&
3200  }
3201  else {
3202  cn->group_num = refs[0];
3203  goto set_call_attr;
3204  }
3205  }
3206 #endif
3207  }
3208  break;
3209 
3210  case NT_ANCHOR:
3211  {
3212  AnchorNode* an = NANCHOR(node);
3213 
3214  switch (an->type) {
3215  case ANCHOR_PREC_READ:
3216  case ANCHOR_PREC_READ_NOT:
3217  case ANCHOR_LOOK_BEHIND:
3219  r = setup_subexp_call(an->target, env);
3220  break;
3221  }
3222  }
3223  break;
3224 
3225  default:
3226  break;
3227  }
3228 
3229  return r;
3230 }
3231 #endif
3232 
3233 /* divide different length alternatives in look-behind.
3234  (?<=A|B) ==> (?<=A)|(?<=B)
3235  (?<!A|B) ==> (?<!A)(?<!B)
3236 */
3237 static int
3239 {
3240  Node *head, *np, *insert_node;
3241  AnchorNode* an = NANCHOR(node);
3242  int anc_type = an->type;
3243 
3244  head = an->target;
3245  np = NCAR(head);
3246  swap_node(node, head);
3247  NCAR(node) = head;
3248  NANCHOR(head)->target = np;
3249 
3250  np = node;
3251  while ((np = NCDR(np)) != NULL_NODE) {
3252  insert_node = onig_node_new_anchor(anc_type);
3253  CHECK_NULL_RETURN_MEMERR(insert_node);
3254  NANCHOR(insert_node)->target = NCAR(np);
3255  NCAR(np) = insert_node;
3256  }
3257 
3258  if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
3259  np = node;
3260  do {
3261  SET_NTYPE(np, NT_LIST); /* alt -> list */
3262  } while ((np = NCDR(np)) != NULL_NODE);
3263  }
3264  return 0;
3265 }
3266 
3267 static int
3269 {
3270  int r, len;
3271  AnchorNode* an = NANCHOR(node);
3272 
3273  r = get_char_length_tree(an->target, reg, &len);
3274  if (r == 0)
3275  an->char_len = len;
3276  else if (r == GET_CHAR_LEN_VARLEN)
3278  else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
3281  else
3283  }
3284 
3285  return r;
3286 }
3287 
3288 static int
3289 next_setup(Node* node, Node* next_node, int in_root, regex_t* reg)
3290 {
3291  int type;
3292 
3293  retry:
3294  type = NTYPE(node);
3295  if (type == NT_QTFR) {
3296  QtfrNode* qn = NQTFR(node);
3297  if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
3298 #ifdef USE_QTFR_PEEK_NEXT
3299  Node* n = get_head_value_node(next_node, 1, reg);
3300  /* '\0': for UTF-16BE etc... */
3301  if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
3302  qn->next_head_exact = n;
3303  }
3304 #endif
3305  /* automatic possessivation a*b ==> (?>a*)b */
3306  if (qn->lower <= 1) {
3307  int ttype = NTYPE(qn->target);
3308  if (IS_NODE_TYPE_SIMPLE(ttype)) {
3309  Node *x, *y;
3310  x = get_head_value_node(qn->target, 0, reg);
3311  if (IS_NOT_NULL(x)) {
3312  y = get_head_value_node(next_node, 0, reg);
3313  if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
3317  swap_node(node, en);
3318  NENCLOSE(node)->target = en;
3319  }
3320  }
3321  }
3322  }
3323 
3324 #ifndef ONIG_DONT_OPTIMIZE
3325  if (NTYPE(node) == NT_QTFR && /* the type may be changed by above block */
3326  in_root && /* qn->lower == 0 && */
3327  NTYPE(qn->target) == NT_CANY &&
3328  ! IS_MULTILINE(reg->options)) {
3329  /* implicit anchor: /.*a/ ==> /(?:^|\G).*a/ */
3330  Node *np;
3333  swap_node(node, np);
3334  NCDR(node) = onig_node_new_list(np, NULL_NODE);
3335  if (IS_NULL(NCDR(node))) {
3336  onig_node_free(np);
3337  return ONIGERR_MEMORY;
3338  }
3339  np = onig_node_new_anchor(ANCHOR_ANYCHAR_STAR); /* (?:^|\G) */
3341  NCAR(node) = np;
3342  }
3343 #endif
3344  }
3345  }
3346  else if (type == NT_ENCLOSE) {
3347  EncloseNode* en = NENCLOSE(node);
3348  in_root = 0;
3349  if (en->type == ENCLOSE_MEMORY) {
3350  node = en->target;
3351  goto retry;
3352  }
3353  }
3354  return 0;
3355 }
3356 
3357 
3358 static int
3360 {
3362  UChar *sbuf, *ebuf, *sp;
3363  int r, i, len;
3364  OnigDistance sbuf_size;
3365  StrNode* sn = NSTR(node);
3366 
3367  end = sn->end;
3368  sbuf_size = (end - sn->s) * 2;
3369  sbuf = (UChar* )xmalloc(sbuf_size);
3371  ebuf = sbuf + sbuf_size;
3372 
3373  sp = sbuf;
3374  p = sn->s;
3375  while (p < end) {
3376  len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
3377  for (i = 0; i < len; i++) {
3378  if (sp >= ebuf) {
3379  UChar* p = (UChar* )xrealloc(sbuf, sbuf_size * 2);
3380  if (IS_NULL(p)) {
3381  xfree(sbuf);
3382  return ONIGERR_MEMORY;
3383  }
3384  sbuf = p;
3385  sp = sbuf + sbuf_size;
3386  sbuf_size *= 2;
3387  ebuf = sbuf + sbuf_size;
3388  }
3389 
3390  *sp++ = buf[i];
3391  }
3392  }
3393 
3394  r = onig_node_str_set(node, sbuf, sp);
3395  if (r != 0) {
3396  xfree(sbuf);
3397  return r;
3398  }
3399 
3400  xfree(sbuf);
3401  return 0;
3402 }
3403 
3404 static int
3406  regex_t* reg)
3407 {
3408  int r;
3409  Node *node;
3410 
3411  node = onig_node_new_str(s, end);
3412  if (IS_NULL(node)) return ONIGERR_MEMORY;
3413 
3414  r = update_string_node_case_fold(reg, node);
3415  if (r != 0) {
3416  onig_node_free(node);
3417  return r;
3418  }
3419 
3420  NSTRING_SET_AMBIG(node);
3422  *rnode = node;
3423  return 0;
3424 }
3425 
3426 static int
3428  UChar *p, int slen, UChar *end,
3429  regex_t* reg, Node **rnode)
3430 {
3431  int r, i, j, len, varlen, varclen;
3432  Node *anode, *var_anode, *snode, *xnode, *an;
3434 
3435  *rnode = var_anode = NULL_NODE;
3436 
3437  varlen = 0;
3438  varclen = 0;
3439  for (i = 0; i < item_num; i++) {
3440  if (items[i].byte_len != slen) {
3441  varlen = 1;
3442  break;
3443  }
3444  if (items[i].code_len != 1) {
3445  varclen = 1;
3446  }
3447  }
3448 
3449  if (varlen != 0) {
3450  *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3451  if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
3452 
3453  xnode = onig_node_new_list(NULL, NULL);
3454  if (IS_NULL(xnode)) goto mem_err;
3455  NCAR(var_anode) = xnode;
3456 
3458  if (IS_NULL(anode)) goto mem_err;
3459  NCAR(xnode) = anode;
3460  }
3461  else {
3462  *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3463  if (IS_NULL(anode)) return ONIGERR_MEMORY;
3464  }
3465 
3466  snode = onig_node_new_str(p, p + slen);
3467  if (IS_NULL(snode)) goto mem_err;
3468 
3469  NCAR(anode) = snode;
3470 
3471  for (i = 0; i < item_num; i++) {
3472  snode = onig_node_new_str(NULL, NULL);
3473  if (IS_NULL(snode)) goto mem_err;
3474 
3475  for (j = 0; j < items[i].code_len; j++) {
3476  len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
3477  if (len < 0) {
3478  r = len;
3479  goto mem_err2;
3480  }
3481 
3482  r = onig_node_str_cat(snode, buf, buf + len);
3483  if (r != 0) goto mem_err2;
3484  }
3485 
3487  if (IS_NULL(an)) {
3488  goto mem_err2;
3489  }
3490 
3491  if (items[i].byte_len != slen) {
3492  Node *rem;
3493  UChar *q = p + items[i].byte_len;
3494 
3495  if (q < end) {
3496  r = expand_case_fold_make_rem_string(&rem, q, end, reg);
3497  if (r != 0) {
3498  onig_node_free(an);
3499  goto mem_err2;
3500  }
3501 
3502  xnode = onig_node_list_add(NULL_NODE, snode);
3503  if (IS_NULL(xnode)) {
3504  onig_node_free(an);
3505  onig_node_free(rem);
3506  goto mem_err2;
3507  }
3508  if (IS_NULL(onig_node_list_add(xnode, rem))) {
3509  onig_node_free(an);
3510  onig_node_free(xnode);
3511  onig_node_free(rem);
3512  goto mem_err;
3513  }
3514 
3515  NCAR(an) = xnode;
3516  }
3517  else {
3518  NCAR(an) = snode;
3519  }
3520 
3521  NCDR(var_anode) = an;
3522  var_anode = an;
3523  }
3524  else {
3525  NCAR(an) = snode;
3526  NCDR(anode) = an;
3527  anode = an;
3528  }
3529  }
3530 
3531  if (varclen && !varlen)
3532  return 2;
3533  return varlen;
3534 
3535  mem_err2:
3536  onig_node_free(snode);
3537 
3538  mem_err:
3539  onig_node_free(*rnode);
3540 
3541  return ONIGERR_MEMORY;
3542 }
3543 
3544 static int
3546 {
3547 #define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
3548 
3549  int r, n, len, alt_num;
3550  int varlen = 0;
3551  UChar *start, *end, *p;
3552  Node *top_root, *root, *snode, *prev_node;
3554  StrNode* sn = NSTR(node);
3555 
3556  if (NSTRING_IS_AMBIG(node)) return 0;
3557 
3558  start = sn->s;
3559  end = sn->end;
3560  if (start >= end) return 0;
3561 
3562  r = 0;
3563  top_root = root = prev_node = snode = NULL_NODE;
3564  alt_num = 1;
3565  p = start;
3566  while (p < end) {
3568  p, end, items);
3569  if (n < 0) {
3570  r = n;
3571  goto err;
3572  }
3573 
3574  len = enclen(reg->enc, p, end);
3575 
3576  if (n == 0) {
3577  if (IS_NULL(snode)) {
3578  if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3579  top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3580  if (IS_NULL(root)) {
3581  onig_node_free(prev_node);
3582  goto mem_err;
3583  }
3584  }
3585 
3586  prev_node = snode = onig_node_new_str(NULL, NULL);
3587  if (IS_NULL(snode)) goto mem_err;
3588  if (IS_NOT_NULL(root)) {
3589  if (IS_NULL(onig_node_list_add(root, snode))) {
3590  onig_node_free(snode);
3591  goto mem_err;
3592  }
3593  }
3594  }
3595 
3596  r = onig_node_str_cat(snode, p, p + len);
3597  if (r != 0) goto err;
3598  }
3599  else {
3600  alt_num *= (n + 1);
3601  if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
3602 
3603  if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3604  top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3605  if (IS_NULL(root)) {
3606  onig_node_free(prev_node);
3607  goto mem_err;
3608  }
3609  }
3610 
3611  r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
3612  if (r < 0) goto mem_err;
3613  if (r > 0) varlen = 1;
3614  if (r == 1) {
3615  if (IS_NULL(root)) {
3616  top_root = prev_node;
3617  }
3618  else {
3619  if (IS_NULL(onig_node_list_add(root, prev_node))) {
3620  onig_node_free(prev_node);
3621  goto mem_err;
3622  }
3623  }
3624 
3625  root = NCAR(prev_node);
3626  }
3627  else { /* r == 0 || r == 2 */
3628  if (IS_NOT_NULL(root)) {
3629  if (IS_NULL(onig_node_list_add(root, prev_node))) {
3630  onig_node_free(prev_node);
3631  goto mem_err;
3632  }
3633  }
3634  }
3635 
3636  snode = NULL_NODE;
3637  }
3638 
3639  p += len;
3640  }
3641 
3642  if (p < end) {
3643  Node *srem;
3644 
3645  r = expand_case_fold_make_rem_string(&srem, p, end, reg);
3646  if (r != 0) goto mem_err;
3647 
3648  if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
3649  top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3650  if (IS_NULL(root)) {
3651  onig_node_free(srem);
3652  onig_node_free(prev_node);
3653  goto mem_err;
3654  }
3655  }
3656 
3657  if (IS_NULL(root)) {
3658  prev_node = srem;
3659  }
3660  else {
3661  if (IS_NULL(onig_node_list_add(root, srem))) {
3662  onig_node_free(srem);
3663  goto mem_err;
3664  }
3665  }
3666  }
3667 
3668  /* ending */
3669  top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
3670  if (!varlen) {
3671  /* When all expanded strings are same length, case-insensitive
3672  BM search will be used. */
3673  r = update_string_node_case_fold(reg, node);
3674  if (r == 0) {
3675  NSTRING_SET_AMBIG(node);
3676  }
3677  }
3678  else {
3679  swap_node(node, top_root);
3680  r = 0;
3681  }
3682  onig_node_free(top_root);
3683  return r;
3684 
3685  mem_err:
3686  r = ONIGERR_MEMORY;
3687 
3688  err:
3689  onig_node_free(top_root);
3690  return r;
3691 }
3692 
3693 
3694 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3695 
3696 #define CEC_THRES_NUM_BIG_REPEAT 512
3697 #define CEC_INFINITE_NUM 0x7fffffff
3698 
3699 #define CEC_IN_INFINITE_REPEAT (1<<0)
3700 #define CEC_IN_FINITE_REPEAT (1<<1)
3701 #define CEC_CONT_BIG_REPEAT (1<<2)
3702 
3703 static int
3704 setup_comb_exp_check(Node* node, int state, ScanEnv* env)
3705 {
3706  int type;
3707  int r = state;
3708 
3709  type = NTYPE(node);
3710  switch (type) {
3711  case NT_LIST:
3712  {
3713  Node* prev = NULL_NODE;
3714  do {
3715  r = setup_comb_exp_check(NCAR(node), r, env);
3716  prev = NCAR(node);
3717  } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
3718  }
3719  break;
3720 
3721  case NT_ALT:
3722  {
3723  int ret;
3724  do {
3725  ret = setup_comb_exp_check(NCAR(node), state, env);
3726  r |= ret;
3727  } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
3728  }
3729  break;
3730 
3731  case NT_QTFR:
3732  {
3733  int child_state = state;
3734  int add_state = 0;
3735  QtfrNode* qn = NQTFR(node);
3736  Node* target = qn->target;
3737  int var_num;
3738 
3739  if (! IS_REPEAT_INFINITE(qn->upper)) {
3740  if (qn->upper > 1) {
3741  /* {0,1}, {1,1} are allowed */
3742  child_state |= CEC_IN_FINITE_REPEAT;
3743 
3744  /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
3745  if (env->backrefed_mem == 0) {
3746  if (NTYPE(qn->target) == NT_ENCLOSE) {
3747  EncloseNode* en = NENCLOSE(qn->target);
3748  if (en->type == ENCLOSE_MEMORY) {
3749  if (NTYPE(en->target) == NT_QTFR) {
3750  QtfrNode* q = NQTFR(en->target);
3751  if (IS_REPEAT_INFINITE(q->upper)
3752  && q->greedy == qn->greedy) {
3753  qn->upper = (qn->lower == 0 ? 1 : qn->lower);
3754  if (qn->upper == 1)
3755  child_state = state;
3756  }
3757  }
3758  }
3759  }
3760  }
3761  }
3762  }
3763 
3764  if (state & CEC_IN_FINITE_REPEAT) {
3765  qn->comb_exp_check_num = -1;
3766  }
3767  else {
3768  if (IS_REPEAT_INFINITE(qn->upper)) {
3769  var_num = CEC_INFINITE_NUM;
3770  child_state |= CEC_IN_INFINITE_REPEAT;
3771  }
3772  else {
3773  var_num = qn->upper - qn->lower;
3774  }
3775 
3776  if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
3777  add_state |= CEC_CONT_BIG_REPEAT;
3778 
3779  if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
3780  ((state & CEC_CONT_BIG_REPEAT) != 0 &&
3781  var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
3782  if (qn->comb_exp_check_num == 0) {
3783  env->num_comb_exp_check++;
3784  qn->comb_exp_check_num = env->num_comb_exp_check;
3785  if (env->curr_max_regnum > env->comb_exp_max_regnum)
3786  env->comb_exp_max_regnum = env->curr_max_regnum;
3787  }
3788  }
3789  }
3790 
3791  r = setup_comb_exp_check(target, child_state, env);
3792  r |= add_state;
3793  }
3794  break;
3795 
3796  case NT_ENCLOSE:
3797  {
3798  EncloseNode* en = NENCLOSE(node);
3799 
3800  switch (en->type) {
3801  case ENCLOSE_MEMORY:
3802  {
3803  if (env->curr_max_regnum < en->regnum)
3804  env->curr_max_regnum = en->regnum;
3805 
3806  r = setup_comb_exp_check(en->target, state, env);
3807  }
3808  break;
3809 
3810  default:
3811  r = setup_comb_exp_check(en->target, state, env);
3812  break;
3813  }
3814  }
3815  break;
3816 
3817 #ifdef USE_SUBEXP_CALL
3818  case NT_CALL:
3819  if (IS_CALL_RECURSION(NCALL(node)))
3820  env->has_recursion = 1;
3821  else
3822  r = setup_comb_exp_check(NCALL(node)->target, state, env);
3823  break;
3824 #endif
3825 
3826  default:
3827  break;
3828  }
3829 
3830  return r;
3831 }
3832 #endif
3833 
3834 #define IN_ALT (1<<0)
3835 #define IN_NOT (1<<1)
3836 #define IN_REPEAT (1<<2)
3837 #define IN_VAR_REPEAT (1<<3)
3838 #define IN_ROOT (1<<4)
3839 
3840 /* setup_tree does the following work.
3841  1. check empty loop. (set qn->target_empty_info)
3842  2. expand ignore-case in char class.
3843  3. set memory status bit flags. (reg->mem_stats)
3844  4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
3845  5. find invalid patterns in look-behind.
3846  6. expand repeated string.
3847  */
3848 static int
3849 setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
3850 {
3851  int type;
3852  int r = 0;
3853  int in_root = state & IN_ROOT;
3854 
3855  state &= ~IN_ROOT;
3856 restart:
3857  type = NTYPE(node);
3858  switch (type) {
3859  case NT_LIST:
3860  {
3861  Node* prev = NULL_NODE;
3862  int prev_in_root = 0;
3863  state |= in_root;
3864  do {
3865  r = setup_tree(NCAR(node), reg, state, env);
3866  if (IS_NOT_NULL(prev) && r == 0) {
3867  r = next_setup(prev, NCAR(node), prev_in_root, reg);
3868  }
3869  prev = NCAR(node);
3870  prev_in_root = state & IN_ROOT;
3871  state &= ~IN_ROOT;
3872  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3873  }
3874  break;
3875 
3876  case NT_ALT:
3877  do {
3878  r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
3879  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3880  break;
3881 
3882  case NT_CCLASS:
3883  break;
3884 
3885  case NT_STR:
3886  if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
3887  r = expand_case_fold_string(node, reg);
3888  }
3889  break;
3890 
3891  case NT_CTYPE:
3892  case NT_CANY:
3893  break;
3894 
3895 #ifdef USE_SUBEXP_CALL
3896  case NT_CALL:
3897  break;
3898 #endif
3899 
3900  case NT_BREF:
3901  {
3902  int i;
3903  int* p;
3904  Node** nodes = SCANENV_MEM_NODES(env);
3905  BRefNode* br = NBREF(node);
3906  p = BACKREFS_P(br);
3907  for (i = 0; i < br->back_num; i++) {
3908  if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
3909  BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
3910  BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
3911 #ifdef USE_BACKREF_WITH_LEVEL
3912  if (IS_BACKREF_NEST_LEVEL(br)) {
3913  BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
3914  }
3915 #endif
3916  SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
3917  }
3918  }
3919  break;
3920 
3921  case NT_QTFR:
3922  {
3923  OnigDistance d;
3924  QtfrNode* qn = NQTFR(node);
3925  Node* target = qn->target;
3926 
3927  if ((state & IN_REPEAT) != 0) {
3928  qn->state |= NST_IN_REPEAT;
3929  }
3930 
3931  if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
3932  r = get_min_match_length(target, &d, env);
3933  if (r) break;
3934  if (d == 0) {
3936 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3937  r = quantifiers_memory_node_info(target);
3938  if (r < 0) break;
3939  if (r > 0) {
3940  qn->target_empty_info = r;
3941  }
3942 #endif
3943 #if 0
3944  r = get_max_match_length(target, &d, env);
3945  if (r == 0 && d == 0) {
3946  /* ()* ==> ()?, ()+ ==> () */
3947  qn->upper = 1;
3948  if (qn->lower > 1) qn->lower = 1;
3949  if (NTYPE(target) == NT_STR) {
3950  qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */
3951  }
3952  }
3953 #endif
3954  }
3955  }
3956 
3957  state |= IN_REPEAT;
3958  if (qn->lower != qn->upper)
3959  state |= IN_VAR_REPEAT;
3960  r = setup_tree(target, reg, state, env);
3961  if (r) break;
3962 
3963  /* expand string */
3964 #define EXPAND_STRING_MAX_LENGTH 100
3965  if (NTYPE(target) == NT_STR) {
3966  if (qn->lower > 1) {
3967  int i, n = qn->lower;
3968  OnigDistance len = NSTRING_LEN(target);
3969  StrNode* sn = NSTR(target);
3970  Node* np;
3971 
3972  np = onig_node_new_str(sn->s, sn->end);
3973  if (IS_NULL(np)) return ONIGERR_MEMORY;
3974  NSTR(np)->flag = sn->flag;
3975 
3976  for (i = 1; i < n && (i+1) * len <= EXPAND_STRING_MAX_LENGTH; i++) {
3977  r = onig_node_str_cat(np, sn->s, sn->end);
3978  if (r) {
3979  onig_node_free(np);
3980  return r;
3981  }
3982  }
3983  if (i < qn->upper || IS_REPEAT_INFINITE(qn->upper)) {
3984  Node *np1, *np2;
3985 
3986  qn->lower -= i;
3987  if (! IS_REPEAT_INFINITE(qn->upper))
3988  qn->upper -= i;
3989 
3990  np1 = onig_node_new_list(np, NULL);
3991  if (IS_NULL(np1)) {
3992  onig_node_free(np);
3993  return ONIGERR_MEMORY;
3994  }
3995  swap_node(np1, node);
3996  np2 = onig_node_list_add(node, np1);
3997  if (IS_NULL(np2)) {
3998  onig_node_free(np1);
3999  return ONIGERR_MEMORY;
4000  }
4001  }
4002  else {
4003  swap_node(np, node);
4004  onig_node_free(np);
4005  }
4006  break; /* break case NT_QTFR: */
4007  }
4008  }
4009 
4010 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
4011  if (qn->greedy && (qn->target_empty_info != 0)) {
4012  if (NTYPE(target) == NT_QTFR) {
4013  QtfrNode* tqn = NQTFR(target);
4014  if (IS_NOT_NULL(tqn->head_exact)) {
4015  qn->head_exact = tqn->head_exact;
4016  tqn->head_exact = NULL;
4017  }
4018  }
4019  else {
4020  qn->head_exact = get_head_value_node(qn->target, 1, reg);
4021  }
4022  }
4023 #endif
4024  }
4025  break;
4026 
4027  case NT_ENCLOSE:
4028  {
4029  EncloseNode* en = NENCLOSE(node);
4030 
4031  switch (en->type) {
4032  case ENCLOSE_OPTION:
4033  {
4035  state |= in_root;
4036  reg->options = NENCLOSE(node)->option;
4037  r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4038  reg->options = options;
4039  }
4040  break;
4041 
4042  case ENCLOSE_MEMORY:
4043  if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) {
4045  /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
4046  }
4047  r = setup_tree(en->target, reg, state, env);
4048  break;
4049 
4051  {
4052  Node* target = en->target;
4053  r = setup_tree(target, reg, state, env);
4054  if (NTYPE(target) == NT_QTFR) {
4055  QtfrNode* tqn = NQTFR(target);
4056  if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
4057  tqn->greedy != 0) { /* (?>a*), a*+ etc... */
4058  int qtype = NTYPE(tqn->target);
4059  if (IS_NODE_TYPE_SIMPLE(qtype))
4061  }
4062  }
4063  }
4064  break;
4065 
4066  case ENCLOSE_CONDITION:
4067 #ifdef USE_NAMED_GROUP
4068  if (! IS_ENCLOSE_NAME_REF(NENCLOSE(node)) &&
4069  env->num_named > 0 &&
4073  }
4074 #endif
4075  r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4076  break;
4077  }
4078  }
4079  break;
4080 
4081  case NT_ANCHOR:
4082  {
4083  AnchorNode* an = NANCHOR(node);
4084 
4085  switch (an->type) {
4086  case ANCHOR_PREC_READ:
4087  r = setup_tree(an->target, reg, state, env);
4088  break;
4089  case ANCHOR_PREC_READ_NOT:
4090  r = setup_tree(an->target, reg, (state | IN_NOT), env);
4091  break;
4092 
4093 /* allowed node types in look-behind */
4094 #define ALLOWED_TYPE_IN_LB \
4095  ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
4096  BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
4097 
4098 #define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY | ENCLOSE_OPTION )
4099 #define ALLOWED_ENCLOSE_IN_LB_NOT ENCLOSE_OPTION
4100 
4101 #define ALLOWED_ANCHOR_IN_LB \
4102 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4103  ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4104  ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4105  ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4106 #define ALLOWED_ANCHOR_IN_LB_NOT \
4107 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4108  ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4109  ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4110  ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4111 
4112  case ANCHOR_LOOK_BEHIND:
4113  {
4116  if (r < 0) return r;
4117  if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4118  r = setup_look_behind(node, reg, env);
4119  if (r != 0) return r;
4120  if (NTYPE(node) != NT_ANCHOR) goto restart;
4121  r = setup_tree(an->target, reg, state, env);
4122  }
4123  break;
4124 
4126  {
4129  if (r < 0) return r;
4130  if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4131  r = setup_look_behind(node, reg, env);
4132  if (r != 0) return r;
4133  if (NTYPE(node) != NT_ANCHOR) goto restart;
4134  r = setup_tree(an->target, reg, (state | IN_NOT), env);
4135  }
4136  break;
4137  }
4138  }
4139  break;
4140 
4141  default:
4142  break;
4143  }
4144 
4145  return r;
4146 }
4147 
4148 #ifndef USE_SUNDAY_QUICK_SEARCH
4149 /* set skip map for Boyer-Moore search */
4150 static int
4151 set_bm_skip(UChar* s, UChar* end, regex_t* reg,
4152  UChar skip[], int** int_skip, int ignore_case)
4153 {
4154  OnigDistance i, len;
4155  int clen, flen, n, j, k;
4158  OnigEncoding enc = reg->enc;
4159 
4160  len = end - s;
4161  if (len < ONIG_CHAR_TABLE_SIZE) {
4162  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )len;
4163 
4164  n = 0;
4165  for (i = 0; i < len - 1; i += clen) {
4166  p = s + i;
4167  if (ignore_case)
4169  p, end, items);
4170  clen = enclen(enc, p, end);
4171 
4172  for (j = 0; j < n; j++) {
4173  if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4174  return 1; /* different length isn't supported. */
4175  flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4176  if (flen != clen)
4177  return 1; /* different length isn't supported. */
4178  }
4179  for (j = 0; j < clen; j++) {
4180  skip[s[i + j]] = (UChar )(len - 1 - i - j);
4181  for (k = 0; k < n; k++) {
4182  skip[buf[k][j]] = (UChar )(len - 1 - i - j);
4183  }
4184  }
4185  }
4186  }
4187  else {
4188  if (IS_NULL(*int_skip)) {
4189  *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
4190  if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
4191  }
4192  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )len;
4193 
4194  n = 0;
4195  for (i = 0; i < len - 1; i += clen) {
4196  p = s + i;
4197  if (ignore_case)
4199  p, end, items);
4200  clen = enclen(enc, p, end);
4201 
4202  for (j = 0; j < n; j++) {
4203  if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4204  return 1; /* different length isn't supported. */
4205  flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4206  if (flen != clen)
4207  return 1; /* different length isn't supported. */
4208  }
4209  for (j = 0; j < clen; j++) {
4210  (*int_skip)[s[i + j]] = (int )(len - 1 - i - j);
4211  for (k = 0; k < n; k++) {
4212  (*int_skip)[buf[k][j]] = (int )(len - 1 - i - j);
4213  }
4214  }
4215  }
4216  }
4217  return 0;
4218 }
4219 
4220 #else /* USE_SUNDAY_QUICK_SEARCH */
4221 
4222 /* set skip map for Sunday's quick search */
4223 static int
4225  UChar skip[], int** int_skip, int ignore_case)
4226 {
4227  OnigDistance i, len;
4228  int clen, flen, n, j, k;
4231  OnigEncoding enc = reg->enc;
4232 
4233  len = end - s;
4234  if (len < ONIG_CHAR_TABLE_SIZE) {
4235  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )(len + 1);
4236 
4237  n = 0;
4238  for (i = 0; i < len; i += clen) {
4239  p = s + i;
4240  if (ignore_case)
4242  p, end, items);
4243  clen = enclen(enc, p, end);
4244 
4245  for (j = 0; j < n; j++) {
4246  if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4247  return 1; /* different length isn't supported. */
4248  flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4249  if (flen != clen)
4250  return 1; /* different length isn't supported. */
4251  }
4252  for (j = 0; j < clen; j++) {
4253  skip[s[i + j]] = (UChar )(len - i - j);
4254  for (k = 0; k < n; k++) {
4255  skip[buf[k][j]] = (UChar )(len - i - j);
4256  }
4257  }
4258  }
4259  }
4260  else {
4261  if (IS_NULL(*int_skip)) {
4262  *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
4263  if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
4264  }
4265  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )(len + 1);
4266 
4267  n = 0;
4268  for (i = 0; i < len; i += clen) {
4269  p = s + i;
4270  if (ignore_case)
4272  p, end, items);
4273  clen = enclen(enc, p, end);
4274 
4275  for (j = 0; j < n; j++) {
4276  if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4277  return 1; /* different length isn't supported. */
4278  flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4279  if (flen != clen)
4280  return 1; /* different length isn't supported. */
4281  }
4282  for (j = 0; j < clen; j++) {
4283  (*int_skip)[s[i + j]] = (int )(len - i - j);
4284  for (k = 0; k < n; k++) {
4285  (*int_skip)[buf[k][j]] = (int )(len - i - j);
4286  }
4287  }
4288  }
4289  }
4290  return 0;
4291 }
4292 #endif /* USE_SUNDAY_QUICK_SEARCH */
4293 
4294 #define OPT_EXACT_MAXLEN 24
4295 
4296 typedef struct {
4297  OnigDistance min; /* min byte length */
4298  OnigDistance max; /* max byte length */
4299 } MinMaxLen;
4300 
4301 typedef struct {
4307 } OptEnv;
4308 
4309 typedef struct {
4312 } OptAncInfo;
4313 
4314 typedef struct {
4315  MinMaxLen mmd; /* info position */
4317 
4319  int ignore_case; /* -1: unset, 0: case sensitive, 1: ignore case */
4320  int len;
4322 } OptExactInfo;
4323 
4324 typedef struct {
4325  MinMaxLen mmd; /* info position */
4327 
4328  int value; /* weighted value */
4330 } OptMapInfo;
4331 
4332 typedef struct {
4334 
4336  OptExactInfo exb; /* boundary */
4337  OptExactInfo exm; /* middle */
4338  OptExactInfo expr; /* prec read (?=...) */
4339 
4340  OptMapInfo map; /* boundary */
4341 } NodeOptInfo;
4342 
4343 
4344 static int
4346 {
4347  static const short int ByteValTable[] = {
4348  5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
4349  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4350  12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
4351  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
4352  5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4353  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
4354  5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4355  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
4356  };
4357 
4358  if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) {
4359  if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
4360  return 20;
4361  else
4362  return (int )ByteValTable[i];
4363  }
4364  else
4365  return 4; /* Take it easy. */
4366 }
4367 
4368 static int
4370 {
4371  /* 1000 / (min-max-dist + 1) */
4372  static const short int dist_vals[] = {
4373  1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
4374  91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
4375  48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
4376  32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
4377  24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
4378  20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
4379  16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
4380  14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
4381  12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
4382  11, 11, 11, 11, 11, 10, 10, 10, 10, 10
4383  };
4384 
4385  OnigDistance d;
4386 
4387  if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
4388 
4389  d = mm->max - mm->min;
4390  if (d < sizeof(dist_vals)/sizeof(dist_vals[0]))
4391  /* return dist_vals[d] * 16 / (mm->min + 12); */
4392  return (int )dist_vals[d];
4393  else
4394  return 1;
4395 }
4396 
4397 static int
4399 {
4400  if (v2 <= 0) return -1;
4401  if (v1 <= 0) return 1;
4402 
4403  v1 *= distance_value(d1);
4404  v2 *= distance_value(d2);
4405 
4406  if (v2 > v1) return 1;
4407  if (v2 < v1) return -1;
4408 
4409  if (d2->min < d1->min) return 1;
4410  if (d2->min > d1->min) return -1;
4411  return 0;
4412 }
4413 
4414 static int
4416 {
4417  return (a->min == b->min && a->max == b->max) ? 1 : 0;
4418 }
4419 
4420 
4421 static void
4423 {
4424  mml->min = min;
4425  mml->max = max;
4426 }
4427 
4428 static void
4430 {
4431  mml->min = mml->max = 0;
4432 }
4433 
4434 static void
4436 {
4437  to->min = from->min;
4438  to->max = from->max;
4439 }
4440 
4441 static void
4443 {
4444  to->min = distance_add(to->min, from->min);
4445  to->max = distance_add(to->max, from->max);
4446 }
4447 
4448 #if 0
4449 static void
4450 add_len_mml(MinMaxLen* to, OnigDistance len)
4451 {
4452  to->min = distance_add(to->min, len);
4453  to->max = distance_add(to->max, len);
4454 }
4455 #endif
4456 
4457 static void
4459 {
4460  if (to->min > from->min) to->min = from->min;
4461  if (to->max < from->max) to->max = from->max;
4462 }
4463 
4464 static void
4466 {
4467  *to = *from;
4468 }
4469 
4470 static void
4472 {
4473  anc->left_anchor = 0;
4474  anc->right_anchor = 0;
4475 }
4476 
4477 static void
4479 {
4480  *to = *from;
4481 }
4482 
4483 static void
4485  OnigDistance left_len, OnigDistance right_len)
4486 {
4487  clear_opt_anc_info(to);
4488 
4489  to->left_anchor = left->left_anchor;
4490  if (left_len == 0) {
4491  to->left_anchor |= right->left_anchor;
4492  }
4493 
4494  to->right_anchor = right->right_anchor;
4495  if (right_len == 0) {
4496  to->right_anchor |= left->right_anchor;
4497  }
4498 }
4499 
4500 static int
4502 {
4503  if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
4504  anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
4505  anc == ANCHOR_PREC_READ_NOT)
4506  return 0;
4507 
4508  return 1;
4509 }
4510 
4511 static int
4513 {
4514  if ((to->left_anchor & anc) != 0) return 1;
4515 
4516  return ((to->right_anchor & anc) != 0 ? 1 : 0);
4517 }
4518 
4519 static void
4521 {
4522  if (is_left_anchor(anc))
4523  to->left_anchor |= anc;
4524  else
4525  to->right_anchor |= anc;
4526 }
4527 
4528 static void
4530 {
4531  if (is_left_anchor(anc))
4532  to->left_anchor &= ~anc;
4533  else
4534  to->right_anchor &= ~anc;
4535 }
4536 
4537 static void
4539 {
4540  to->left_anchor &= add->left_anchor;
4541  to->right_anchor &= add->right_anchor;
4542 }
4543 
4544 static int
4546 {
4547  return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
4548 }
4549 
4550 static void
4552 {
4553  clear_mml(&ex->mmd);
4554  clear_opt_anc_info(&ex->anc);
4555  ex->reach_end = 0;
4556  ex->ignore_case = -1; /* unset */
4557  ex->len = 0;
4558  ex->s[0] = '\0';
4559 }
4560 
4561 static void
4563 {
4564  *to = *from;
4565 }
4566 
4567 static void
4569 {
4570  int i, j, len;
4571  UChar *p, *end;
4572  OptAncInfo tanc;
4573 
4574  if (to->ignore_case < 0)
4575  to->ignore_case = add->ignore_case;
4576  else if (to->ignore_case != add->ignore_case)
4577  return ; /* avoid */
4578 
4579  p = add->s;
4580  end = p + add->len;
4581  for (i = to->len; p < end; ) {
4582  len = enclen(enc, p, end);
4583  if (i + len > OPT_EXACT_MAXLEN) break;
4584  for (j = 0; j < len && p < end; j++)
4585  to->s[i++] = *p++;
4586  }
4587 
4588  to->len = i;
4589  to->reach_end = (p == end ? add->reach_end : 0);
4590 
4591  concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
4592  if (! to->reach_end) tanc.right_anchor = 0;
4593  copy_opt_anc_info(&to->anc, &tanc);
4594 }
4595 
4596 static void
4598  int raw ARG_UNUSED, OnigEncoding enc)
4599 {
4600  int i, j, len;
4601  UChar *p;
4602 
4603  for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
4604  len = enclen(enc, p, end);
4605  if (i + len > OPT_EXACT_MAXLEN) break;
4606  for (j = 0; j < len && p < end; j++)
4607  to->s[i++] = *p++;
4608  }
4609 
4610  to->len = i;
4611 }
4612 
4613 static void
4615 {
4616  int i, j, len;
4617 
4618  if (add->len == 0 || to->len == 0) {
4620  return ;
4621  }
4622 
4623  if (! is_equal_mml(&to->mmd, &add->mmd)) {
4625  return ;
4626  }
4627 
4628  for (i = 0; i < to->len && i < add->len; ) {
4629  if (to->s[i] != add->s[i]) break;
4630  len = enclen(env->enc, to->s + i, to->s + to->len);
4631 
4632  for (j = 1; j < len; j++) {
4633  if (to->s[i+j] != add->s[i+j]) break;
4634  }
4635  if (j < len) break;
4636  i += len;
4637  }
4638 
4639  if (! add->reach_end || i < add->len || i < to->len) {
4640  to->reach_end = 0;
4641  }
4642  to->len = i;
4643  if (to->ignore_case < 0)
4644  to->ignore_case = add->ignore_case;
4645  else if (add->ignore_case >= 0)
4646  to->ignore_case |= add->ignore_case;
4647 
4648  alt_merge_opt_anc_info(&to->anc, &add->anc);
4649  if (! to->reach_end) to->anc.right_anchor = 0;
4650 }
4651 
4652 static void
4654 {
4655  int v1, v2;
4656 
4657  v1 = now->len;
4658  v2 = alt->len;
4659 
4660  if (v2 == 0) {
4661  return ;
4662  }
4663  else if (v1 == 0) {
4664  copy_opt_exact_info(now, alt);
4665  return ;
4666  }
4667  else if (v1 <= 2 && v2 <= 2) {
4668  /* ByteValTable[x] is big value --> low price */
4669  v2 = map_position_value(enc, now->s[0]);
4670  v1 = map_position_value(enc, alt->s[0]);
4671 
4672  if (now->len > 1) v1 += 5;
4673  if (alt->len > 1) v2 += 5;
4674  }
4675 
4676  if (now->ignore_case <= 0) v1 *= 2;
4677  if (alt->ignore_case <= 0) v2 *= 2;
4678 
4679  if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4680  copy_opt_exact_info(now, alt);
4681 }
4682 
4683 static void
4685 {
4686  static const OptMapInfo clean_info = {
4687  {0, 0}, {0, 0}, 0,
4688  {
4689  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4690  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4691  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4692  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4693  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4694  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4695  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4696  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4697  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4698  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4699  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4700  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4701  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4702  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4703  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4704  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4705  }
4706  };
4707 
4708  xmemcpy(map, &clean_info, sizeof(OptMapInfo));
4709 }
4710 
4711 static void
4713 {
4714  *to = *from;
4715 }
4716 
4717 static void
4719 {
4720  if (map->map[c] == 0) {
4721  map->map[c] = 1;
4722  map->value += map_position_value(enc, c);
4723  }
4724 }
4725 
4726 static int
4728  OnigEncoding enc, OnigCaseFoldType case_fold_flag)
4729 {
4732  int i, n;
4733 
4734  add_char_opt_map_info(map, p[0], enc);
4735 
4736  case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
4737  n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
4738  if (n < 0) return n;
4739 
4740  for (i = 0; i < n; i++) {
4741  ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
4742  add_char_opt_map_info(map, buf[0], enc);
4743  }
4744 
4745  return 0;
4746 }
4747 
4748 static void
4750 {
4751  const int z = 1<<15; /* 32768: something big value */
4752 
4753  int v1, v2;
4754 
4755  if (alt->value == 0) return ;
4756  if (now->value == 0) {
4757  copy_opt_map_info(now, alt);
4758  return ;
4759  }
4760 
4761  v1 = z / now->value;
4762  v2 = z / alt->value;
4763  if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4764  copy_opt_map_info(now, alt);
4765 }
4766 
4767 static int
4769 {
4770 #define COMP_EM_BASE 20
4771  int ve, vm;
4772 
4773  if (m->value <= 0) return -1;
4774 
4775  ve = COMP_EM_BASE * e->len * (e->ignore_case > 0 ? 1 : 2);
4776  vm = COMP_EM_BASE * 5 * 2 / m->value;
4777  return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
4778 }
4779 
4780 static void
4782 {
4783  int i, val;
4784 
4785  /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
4786  if (to->value == 0) return ;
4787  if (add->value == 0 || to->mmd.max < add->mmd.min) {
4788  clear_opt_map_info(to);
4789  return ;
4790  }
4791 
4792  alt_merge_mml(&to->mmd, &add->mmd);
4793 
4794  val = 0;
4795  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
4796  if (add->map[i])
4797  to->map[i] = 1;
4798 
4799  if (to->map[i])
4800  val += map_position_value(enc, i);
4801  }
4802  to->value = val;
4803 
4804  alt_merge_opt_anc_info(&to->anc, &add->anc);
4805 }
4806 
4807 static void
4809 {
4810  copy_mml(&(opt->exb.mmd), mmd);
4811  copy_mml(&(opt->expr.mmd), mmd);
4812  copy_mml(&(opt->map.mmd), mmd);
4813 }
4814 
4815 static void
4817 {
4818  clear_mml(&opt->len);
4819  clear_opt_anc_info(&opt->anc);
4820  clear_opt_exact_info(&opt->exb);
4821  clear_opt_exact_info(&opt->exm);
4822  clear_opt_exact_info(&opt->expr);
4823  clear_opt_map_info(&opt->map);
4824 }
4825 
4826 static void
4828 {
4829  *to = *from;
4830 }
4831 
4832 static void
4834 {
4835  int exb_reach, exm_reach;
4836  OptAncInfo tanc;
4837 
4838  concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
4839  copy_opt_anc_info(&to->anc, &tanc);
4840 
4841  if (add->exb.len > 0 && to->len.max == 0) {
4842  concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
4843  to->len.max, add->len.max);
4844  copy_opt_anc_info(&add->exb.anc, &tanc);
4845  }
4846 
4847  if (add->map.value > 0 && to->len.max == 0) {
4848  if (add->map.mmd.max == 0)
4849  add->map.anc.left_anchor |= to->anc.left_anchor;
4850  }
4851 
4852  exb_reach = to->exb.reach_end;
4853  exm_reach = to->exm.reach_end;
4854 
4855  if (add->len.max != 0)
4856  to->exb.reach_end = to->exm.reach_end = 0;
4857 
4858  if (add->exb.len > 0) {
4859  if (exb_reach) {
4860  concat_opt_exact_info(&to->exb, &add->exb, enc);
4861  clear_opt_exact_info(&add->exb);
4862  }
4863  else if (exm_reach) {
4864  concat_opt_exact_info(&to->exm, &add->exb, enc);
4865  clear_opt_exact_info(&add->exb);
4866  }
4867  }
4868  select_opt_exact_info(enc, &to->exm, &add->exb);
4869  select_opt_exact_info(enc, &to->exm, &add->exm);
4870 
4871  if (to->expr.len > 0) {
4872  if (add->len.max > 0) {
4873  if (to->expr.len > (int )add->len.max)
4874  to->expr.len = (int )add->len.max;
4875 
4876  if (to->expr.mmd.max == 0)
4877  select_opt_exact_info(enc, &to->exb, &to->expr);
4878  else
4879  select_opt_exact_info(enc, &to->exm, &to->expr);
4880  }
4881  }
4882  else if (add->expr.len > 0) {
4883  copy_opt_exact_info(&to->expr, &add->expr);
4884  }
4885 
4886  select_opt_map_info(&to->map, &add->map);
4887 
4888  add_mml(&to->len, &add->len);
4889 }
4890 
4891 static void
4893 {
4894  alt_merge_opt_anc_info (&to->anc, &add->anc);
4895  alt_merge_opt_exact_info(&to->exb, &add->exb, env);
4896  alt_merge_opt_exact_info(&to->exm, &add->exm, env);
4897  alt_merge_opt_exact_info(&to->expr, &add->expr, env);
4898  alt_merge_opt_map_info(env->enc, &to->map, &add->map);
4899 
4900  alt_merge_mml(&to->len, &add->len);
4901 }
4902 
4903 
4904 #define MAX_NODE_OPT_INFO_REF_COUNT 5
4905 
4906 static int
4908 {
4909  int type;
4910  int r = 0;
4911 
4912  clear_node_opt_info(opt);
4913  set_bound_node_opt_info(opt, &env->mmd);
4914 
4915  type = NTYPE(node);
4916  switch (type) {
4917  case NT_LIST:
4918  {
4919  OptEnv nenv;
4920  NodeOptInfo nopt;
4921  Node* nd = node;
4922 
4923  copy_opt_env(&nenv, env);
4924  do {
4925  r = optimize_node_left(NCAR(nd), &nopt, &nenv);
4926  if (r == 0) {
4927  add_mml(&nenv.mmd, &nopt.len);
4928  concat_left_node_opt_info(env->enc, opt, &nopt);
4929  }
4930  } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
4931  }
4932  break;
4933 
4934  case NT_ALT:
4935  {
4936  NodeOptInfo nopt;
4937  Node* nd = node;
4938 
4939  do {
4940  r = optimize_node_left(NCAR(nd), &nopt, env);
4941  if (r == 0) {
4942  if (nd == node) copy_node_opt_info(opt, &nopt);
4943  else alt_merge_node_opt_info(opt, &nopt, env);
4944  }
4945  } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
4946  }
4947  break;
4948 
4949  case NT_STR:
4950  {
4951  StrNode* sn = NSTR(node);
4952  OnigDistance slen = sn->end - sn->s;
4953  int is_raw = NSTRING_IS_RAW(node);
4954 
4955  if (! NSTRING_IS_AMBIG(node)) {
4956  concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
4957  is_raw, env->enc);
4958  opt->exb.ignore_case = 0;
4959  if (slen > 0) {
4960  add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
4961  }
4962  set_mml(&opt->len, slen, slen);
4963  }
4964  else {
4965  OnigDistance max;
4966 
4967  if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
4968  int n = onigenc_strlen(env->enc, sn->s, sn->end);
4969  max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
4970  }
4971  else {
4972  concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
4973  is_raw, env->enc);
4974  opt->exb.ignore_case = 1;
4975 
4976  if (slen > 0) {
4977  r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
4978  env->enc, env->case_fold_flag);
4979  if (r != 0) break;
4980  }
4981 
4982  max = slen;
4983  }
4984 
4985  set_mml(&opt->len, slen, max);
4986  }
4987 
4988  if ((OnigDistance )opt->exb.len == slen)
4989  opt->exb.reach_end = 1;
4990  }
4991  break;
4992 
4993  case NT_CCLASS:
4994  {
4995  int i, z;
4996  CClassNode* cc = NCCLASS(node);
4997 
4998  /* no need to check ignore case. (set in setup_tree()) */
4999 
5000  if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
5001  OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
5003 
5004  set_mml(&opt->len, min, max);
5005  }
5006  else {
5007  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
5008  z = BITSET_AT(cc->bs, i);
5009  if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
5010  add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5011  }
5012  }
5013  set_mml(&opt->len, 1, 1);
5014  }
5015  }
5016  break;
5017 
5018  case NT_CTYPE:
5019  {
5020  int i, min, max;
5021  int maxcode;
5022 
5023  max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5024 
5025  if (max == 1) {
5026  min = 1;
5027 
5028  maxcode = NCTYPE(node)->ascii_range ? 0x80 : SINGLE_BYTE_SIZE;
5029  switch (NCTYPE(node)->ctype) {
5030  case ONIGENC_CTYPE_WORD:
5031  if (NCTYPE(node)->not != 0) {
5032  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
5033  if (! ONIGENC_IS_CODE_WORD(env->enc, i) || i >= maxcode) {
5034  add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5035  }
5036  }
5037  }
5038  else {
5039  for (i = 0; i < maxcode; i++) {
5040  if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
5041  add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5042  }
5043  }
5044  }
5045  break;
5046  }
5047  }
5048  else {
5049  min = ONIGENC_MBC_MINLEN(env->enc);
5050  }
5051  set_mml(&opt->len, min, max);
5052  }
5053  break;
5054 
5055  case NT_CANY:
5056  {
5057  OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
5059  set_mml(&opt->len, min, max);
5060  }
5061  break;
5062 
5063  case NT_ANCHOR:
5064  switch (NANCHOR(node)->type) {
5065  case ANCHOR_BEGIN_BUF:
5066  case ANCHOR_BEGIN_POSITION:
5067  case ANCHOR_BEGIN_LINE:
5068  case ANCHOR_END_BUF:
5069  case ANCHOR_SEMI_END_BUF:
5070  case ANCHOR_END_LINE:
5071  case ANCHOR_LOOK_BEHIND: /* just for (?<=x).* */
5072  add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
5073  break;
5074 
5075  case ANCHOR_PREC_READ:
5076  {
5077  NodeOptInfo nopt;
5078 
5079  r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
5080  if (r == 0) {
5081  if (nopt.exb.len > 0)
5082  copy_opt_exact_info(&opt->expr, &nopt.exb);
5083  else if (nopt.exm.len > 0)
5084  copy_opt_exact_info(&opt->expr, &nopt.exm);
5085 
5086  opt->expr.reach_end = 0;
5087 
5088  if (nopt.map.value > 0)
5089  copy_opt_map_info(&opt->map, &nopt.map);
5090  }
5091  }
5092  break;
5093 
5094  case ANCHOR_PREC_READ_NOT:
5096  break;
5097  }
5098  break;
5099 
5100  case NT_BREF:
5101  {
5102  int i;
5103  int* backs;
5104  OnigDistance min, max, tmin, tmax;
5105  Node** nodes = SCANENV_MEM_NODES(env->scan_env);
5106  BRefNode* br = NBREF(node);
5107 
5108  if (br->state & NST_RECURSION) {
5109  set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5110  break;
5111  }
5112  backs = BACKREFS_P(br);
5113  r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
5114  if (r != 0) break;
5115  r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
5116  if (r != 0) break;
5117  for (i = 1; i < br->back_num; i++) {
5118  r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
5119  if (r != 0) break;
5120  r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
5121  if (r != 0) break;
5122  if (min > tmin) min = tmin;
5123  if (max < tmax) max = tmax;
5124  }
5125  if (r == 0) set_mml(&opt->len, min, max);
5126  }
5127  break;
5128 
5129 #ifdef USE_SUBEXP_CALL
5130  case NT_CALL:
5131  if (IS_CALL_RECURSION(NCALL(node)))
5132  set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5133  else {
5134  OnigOptionType save = env->options;
5135  env->options = NENCLOSE(NCALL(node)->target)->option;
5136  r = optimize_node_left(NCALL(node)->target, opt, env);
5137  env->options = save;
5138  }
5139  break;
5140 #endif
5141 
5142  case NT_QTFR:
5143  {
5144  int i;
5145  OnigDistance min, max;
5146  NodeOptInfo nopt;
5147  QtfrNode* qn = NQTFR(node);
5148 
5149  r = optimize_node_left(qn->target, &nopt, env);
5150  if (r) break;
5151 
5152  if (/*qn->lower == 0 &&*/ IS_REPEAT_INFINITE(qn->upper)) {
5153  if (env->mmd.max == 0 &&
5154  NTYPE(qn->target) == NT_CANY && qn->greedy) {
5155  if (IS_MULTILINE(env->options))
5156  /* implicit anchor: /.*a/ ==> /\A.*a/ */
5158  else
5160  }
5161  }
5162  else {
5163  if (qn->lower > 0) {
5164  copy_node_opt_info(opt, &nopt);
5165  if (nopt.exb.len > 0) {
5166  if (nopt.exb.reach_end) {
5167  for (i = 2; i <= qn->lower &&
5168  ! is_full_opt_exact_info(&opt->exb); i++) {
5169  concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
5170  }
5171  if (i < qn->lower) {
5172  opt->exb.reach_end = 0;
5173  }
5174  }
5175  }
5176 
5177  if (qn->lower != qn->upper) {
5178  opt->exb.reach_end = 0;
5179  opt->exm.reach_end = 0;
5180  }
5181  if (qn->lower > 1)
5182  opt->exm.reach_end = 0;
5183  }
5184  }
5185 
5186  min = distance_multiply(nopt.len.min, qn->lower);
5187  if (IS_REPEAT_INFINITE(qn->upper))
5188  max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
5189  else
5190  max = distance_multiply(nopt.len.max, qn->upper);
5191 
5192  set_mml(&opt->len, min, max);
5193  }
5194  break;
5195 
5196  case NT_ENCLOSE:
5197  {
5198  EncloseNode* en = NENCLOSE(node);
5199 
5200  switch (en->type) {
5201  case ENCLOSE_OPTION:
5202  {
5203  OnigOptionType save = env->options;
5204 
5205  env->options = en->option;
5206  r = optimize_node_left(en->target, opt, env);
5207  env->options = save;
5208  }
5209  break;
5210 
5211  case ENCLOSE_MEMORY:
5212 #ifdef USE_SUBEXP_CALL
5213  en->opt_count++;
5215  OnigDistance min, max;
5216 
5217  min = 0;
5218  max = ONIG_INFINITE_DISTANCE;
5219  if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
5220  if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
5221  set_mml(&opt->len, min, max);
5222  }
5223  else
5224 #endif
5225  {
5226  r = optimize_node_left(en->target, opt, env);
5227 
5229  if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
5231  }
5232  }
5233  break;
5234 
5236  case ENCLOSE_CONDITION:
5237  r = optimize_node_left(en->target, opt, env);
5238  break;
5239  }
5240  }
5241  break;
5242 
5243  default:
5244 #ifdef ONIG_DEBUG
5245  if (!onig_is_prelude()) fprintf(stderr, "optimize_node_left: undefined node type %d\n",
5246  NTYPE(node));
5247 #endif
5248  r = ONIGERR_TYPE_BUG;
5249  break;
5250  }
5251 
5252  return r;
5253 }
5254 
5255 static int
5257 {
5258  int r;
5259  int allow_reverse;
5260 
5261  if (e->len == 0) return 0;
5262 
5263  reg->exact = (UChar* )xmalloc(e->len);
5265  xmemcpy(reg->exact, e->s, e->len);
5266  reg->exact_end = reg->exact + e->len;
5267 
5268  allow_reverse =
5270 
5271  if (e->ignore_case > 0) {
5272  if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5273  r = set_bm_skip(reg->exact, reg->exact_end, reg,
5274  reg->map, &(reg->int_map), 1);
5275  if (r == 0) {
5276  reg->optimize = (allow_reverse != 0
5278  }
5279  else {
5281  }
5282  }
5283  else {
5285  }
5286  }
5287  else {
5288  if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5289  r = set_bm_skip(reg->exact, reg->exact_end, reg,
5290  reg->map, &(reg->int_map), 0);
5291  if (r) return r;
5292 
5293  reg->optimize = (allow_reverse != 0
5295  }
5296  else {
5298  }
5299  }
5300 
5301  reg->dmin = e->mmd.min;
5302  reg->dmax = e->mmd.max;
5303 
5304  if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5305  reg->threshold_len = (int )(reg->dmin + (reg->exact_end - reg->exact));
5306  }
5307 
5308  return 0;
5309 }
5310 
5311 static void
5313 {
5314  int i;
5315 
5316  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5317  reg->map[i] = m->map[i];
5318 
5319  reg->optimize = ONIG_OPTIMIZE_MAP;
5320  reg->dmin = m->mmd.min;
5321  reg->dmax = m->mmd.max;
5322 
5323  if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5324  reg->threshold_len = (int )(reg->dmin + 1);
5325  }
5326 }
5327 
5328 static void
5330 {
5331  reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE;
5332  reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
5333 }
5334 
5335 #ifdef ONIG_DEBUG
5336 static void print_optimize_info(FILE* f, regex_t* reg);
5337 #endif
5338 
5339 static int
5341 {
5342 
5343  int r;
5344  NodeOptInfo opt;
5345  OptEnv env;
5346 
5347  env.enc = reg->enc;
5348  env.options = reg->options;
5349  env.case_fold_flag = reg->case_fold_flag;
5350  env.scan_env = scan_env;
5351  clear_mml(&env.mmd);
5352 
5353  r = optimize_node_left(node, &opt, &env);
5354  if (r) return r;
5355 
5356  reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
5359 
5361 
5362  if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
5363  reg->anchor_dmin = opt.len.min;
5364  reg->anchor_dmax = opt.len.max;
5365  }
5366 
5367  if (opt.exb.len > 0 || opt.exm.len > 0) {
5368  select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
5369  if (opt.map.value > 0 &&
5370  comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
5371  goto set_map;
5372  }
5373  else {
5374  r = set_optimize_exact_info(reg, &opt.exb);
5375  set_sub_anchor(reg, &opt.exb.anc);
5376  }
5377  }
5378  else if (opt.map.value > 0) {
5379  set_map:
5380  set_optimize_map_info(reg, &opt.map);
5381  set_sub_anchor(reg, &opt.map.anc);
5382  }
5383  else {
5385  if (opt.len.max == 0)
5387  }
5388 
5389 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5390  if (!onig_is_prelude()) print_optimize_info(stderr, reg);
5391 #endif
5392  return r;
5393 }
5394 
5395 static void
5397 {
5399  reg->anchor = 0;
5400  reg->anchor_dmin = 0;
5401  reg->anchor_dmax = 0;
5402  reg->sub_anchor = 0;
5403  reg->exact_end = (UChar* )NULL;
5404  reg->threshold_len = 0;
5405  if (IS_NOT_NULL(reg->exact)) {
5406  xfree(reg->exact);
5407  reg->exact = (UChar* )NULL;
5408  }
5409 }
5410 
5411 #ifdef ONIG_DEBUG
5412 
5413 static void print_enc_string(FILE* fp, OnigEncoding enc,
5414  const UChar *s, const UChar *end)
5415 {
5416  fprintf(fp, "\nPATTERN: /");
5417 
5418  if (ONIGENC_MBC_MINLEN(enc) > 1) {
5419  const UChar *p;
5420  OnigCodePoint code;
5421 
5422  p = s;
5423  while (p < end) {
5424  code = ONIGENC_MBC_TO_CODE(enc, p, end);
5425  if (code >= 0x80) {
5426  fprintf(fp, " 0x%04x ", (int )code);
5427  }
5428  else {
5429  fputc((int )code, fp);
5430  }
5431 
5432  p += enclen(enc, p, end);
5433  }
5434  }
5435  else {
5436  while (s < end) {
5437  fputc((int )*s, fp);
5438  s++;
5439  }
5440  }
5441 
5442  fprintf(fp, "/ (%s)\n", enc->name);
5443 }
5444 
5445 static void
5446 print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
5447 {
5448  if (a == ONIG_INFINITE_DISTANCE)
5449  fputs("inf", f);
5450  else
5451  fprintf(f, "(%"PRIuSIZE")", a);
5452 
5453  fputs("-", f);
5454 
5455  if (b == ONIG_INFINITE_DISTANCE)
5456  fputs("inf", f);
5457  else
5458  fprintf(f, "(%"PRIuSIZE")", b);
5459 }
5460 
5461 static void
5462 print_anchor(FILE* f, int anchor)
5463 {
5464  int q = 0;
5465 
5466  fprintf(f, "[");
5467 
5468  if (anchor & ANCHOR_BEGIN_BUF) {
5469  fprintf(f, "begin-buf");
5470  q = 1;
5471  }
5472  if (anchor & ANCHOR_BEGIN_LINE) {
5473  if (q) fprintf(f, ", ");
5474  q = 1;
5475  fprintf(f, "begin-line");
5476  }
5477  if (anchor & ANCHOR_BEGIN_POSITION) {
5478  if (q) fprintf(f, ", ");
5479  q = 1;
5480  fprintf(f, "begin-pos");
5481  }
5482  if (anchor & ANCHOR_END_BUF) {
5483  if (q) fprintf(f, ", ");
5484  q = 1;
5485  fprintf(f, "end-buf");
5486  }
5487  if (anchor & ANCHOR_SEMI_END_BUF) {
5488  if (q) fprintf(f, ", ");
5489  q = 1;
5490  fprintf(f, "semi-end-buf");
5491  }
5492  if (anchor & ANCHOR_END_LINE) {
5493  if (q) fprintf(f, ", ");
5494  q = 1;
5495  fprintf(f, "end-line");
5496  }
5497  if (anchor & ANCHOR_ANYCHAR_STAR) {
5498  if (q) fprintf(f, ", ");
5499  q = 1;
5500  fprintf(f, "anychar-star");
5501  }
5502  if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
5503  if (q) fprintf(f, ", ");
5504  fprintf(f, "anychar-star-ml");
5505  }
5506 
5507  fprintf(f, "]");
5508 }
5509 
5510 static void
5511 print_optimize_info(FILE* f, regex_t* reg)
5512 {
5513  static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
5514  "EXACT_IC", "MAP",
5515  "EXACT_BM_IC", "EXACT_BM_NOT_REV_IC" };
5516 
5517  fprintf(f, "optimize: %s\n", on[reg->optimize]);
5518  fprintf(f, " anchor: "); print_anchor(f, reg->anchor);
5519  if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
5520  print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
5521  fprintf(f, "\n");
5522 
5523  if (reg->optimize) {
5524  fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor);
5525  fprintf(f, "\n");
5526  }
5527  fprintf(f, "\n");
5528 
5529  if (reg->exact) {
5530  UChar *p;
5531  fprintf(f, "exact: [");
5532  for (p = reg->exact; p < reg->exact_end; p++) {
5533  fputc(*p, f);
5534  }
5535  fprintf(f, "]: length: %ld\n", (reg->exact_end - reg->exact));
5536  }
5537  else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
5538  int c, i, n = 0;
5539 
5540  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5541  if (reg->map[i]) n++;
5542 
5543  fprintf(f, "map: n=%d\n", n);
5544  if (n > 0) {
5545  c = 0;
5546  fputc('[', f);
5547  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
5548  if (reg->map[i] != 0) {
5549  if (c > 0) fputs(", ", f);
5550  c++;
5551  if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
5553  fputc(i, f);
5554  else
5555  fprintf(f, "%d", i);
5556  }
5557  }
5558  fprintf(f, "]\n");
5559  }
5560  }
5561 }
5562 #endif /* ONIG_DEBUG */
5563 
5564 
5565 extern void
5567 {
5568  if (IS_NOT_NULL(reg)) {
5569  if (IS_NOT_NULL(reg->p)) xfree(reg->p);
5570  if (IS_NOT_NULL(reg->exact)) xfree(reg->exact);
5571  if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map);
5573  if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);
5574  if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain);
5575 
5576 #ifdef USE_NAMED_GROUP
5577  onig_names_free(reg);
5578 #endif
5579  }
5580 }
5581 
5582 extern void
5584 {
5585  if (IS_NOT_NULL(reg)) {
5586  onig_free_body(reg);
5587  xfree(reg);
5588  }
5589 }
5590 
5591 size_t
5593 {
5594  size_t size = sizeof(regex_t);
5595  if (IS_NULL(reg)) return 0;
5596  if (IS_NOT_NULL(reg->p)) size += reg->alloc;
5597  if (IS_NOT_NULL(reg->exact)) size += reg->exact_end - reg->exact;
5598  if (IS_NOT_NULL(reg->int_map)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
5599  if (IS_NOT_NULL(reg->int_map_backward)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
5600  if (IS_NOT_NULL(reg->repeat_range)) size += reg->repeat_range_alloc * sizeof(OnigRepeatRange);
5601  if (IS_NOT_NULL(reg->chain)) size += onig_memsize(reg->chain);
5602 
5603  return size;
5604 }
5605 
5606 size_t
5608 {
5609  size_t size = sizeof(*regs);
5610  if (IS_NULL(regs)) return 0;
5611  size += regs->allocated * (sizeof(*regs->beg) + sizeof(*regs->end));
5612  return size;
5613 }
5614 
5615 #define REGEX_TRANSFER(to,from) do {\
5616  (to)->state = ONIG_STATE_MODIFY;\
5617  onig_free_body(to);\
5618  xmemcpy(to, from, sizeof(regex_t));\
5619  xfree(from);\
5620 } while (0)
5621 
5622 extern void
5624 {
5626  REGEX_TRANSFER(to, from);
5628 }
5629 
5630 #define REGEX_CHAIN_HEAD(reg) do {\
5631  while (IS_NOT_NULL((reg)->chain)) {\
5632  (reg) = (reg)->chain;\
5633  }\
5634 } while (0)
5635 
5636 extern void
5638 {
5640  REGEX_CHAIN_HEAD(to);
5641  to->chain = add;
5643 }
5644 
5645 extern void
5647 {
5648  regex_t *head, *prev;
5649 
5650  prev = reg;
5651  head = prev->chain;
5652  if (IS_NOT_NULL(head)) {
5653  reg->state = ONIG_STATE_MODIFY;
5654  while (IS_NOT_NULL(head->chain)) {
5655  prev = head;
5656  head = head->chain;
5657  }
5658  prev->chain = (regex_t* )NULL;
5659  REGEX_TRANSFER(reg, head);
5660  }
5661 }
5662 
5663 #ifdef ONIG_DEBUG
5664 static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg));
5665 #endif
5666 #ifdef ONIG_DEBUG_PARSE_TREE
5667 static void print_tree P_((FILE* f, Node* node));
5668 #endif
5669 
5670 extern int
5671 onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5672  OnigErrorInfo* einfo, const char *sourcefile, int sourceline)
5673 {
5674 #define COMPILE_INIT_SIZE 20
5675 
5676  int r;
5677  OnigDistance init_size;
5678  Node* root;
5679  ScanEnv scan_env = {0};
5680 #ifdef USE_SUBEXP_CALL
5681  UnsetAddrList uslist;
5682 #endif
5683 
5684  if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
5685 
5686  scan_env.sourcefile = sourcefile;
5687  scan_env.sourceline = sourceline;
5688  reg->state = ONIG_STATE_COMPILING;
5689 
5690 #ifdef ONIG_DEBUG
5691  if (!onig_is_prelude()) print_enc_string(stderr, reg->enc, pattern, pattern_end);
5692 #endif
5693 
5694  if (reg->alloc == 0) {
5695  init_size = (pattern_end - pattern) * 2;
5696  if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
5697  r = BBUF_INIT(reg, init_size);
5698  if (r != 0) goto end;
5699  }
5700  else
5701  reg->used = 0;
5702 
5703  reg->num_mem = 0;
5704  reg->num_repeat = 0;
5705  reg->num_null_check = 0;
5706  reg->repeat_range_alloc = 0;
5707  reg->repeat_range = (OnigRepeatRange* )NULL;
5708 #ifdef USE_COMBINATION_EXPLOSION_CHECK
5709  reg->num_comb_exp_check = 0;
5710 #endif
5711 
5712  r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
5713  if (r != 0) goto err;
5714 
5715 #ifdef ONIG_DEBUG_PARSE_TREE
5716 # if 0
5717  fprintf(stderr, "ORIGINAL PARSE TREE:\n");
5718  if (!onig_is_prelude()) {
5719  print_tree(stderr, root);
5720  }
5721 # endif
5722 #endif
5723 
5724 #ifdef USE_NAMED_GROUP
5725  /* mixed use named group and no-named group */
5726  if (scan_env.num_named > 0 &&
5729  if (scan_env.num_named != scan_env.num_mem)
5730  r = disable_noname_group_capture(&root, reg, &scan_env);
5731  else
5732  r = numbered_ref_check(root);
5733 
5734  if (r != 0) goto err;
5735  }
5736 #endif
5737 
5738 #ifdef USE_SUBEXP_CALL
5739  if (scan_env.num_call > 0) {
5740  r = unset_addr_list_init(&uslist, scan_env.num_call);
5741  if (r != 0) goto err;
5742  scan_env.unset_addr_list = &uslist;
5743  r = setup_subexp_call(root, &scan_env);
5744  if (r != 0) goto err_unset;
5745  r = subexp_recursive_check_trav(root, &scan_env);
5746  if (r < 0) goto err_unset;
5747  r = subexp_inf_recursive_check_trav(root, &scan_env);
5748  if (r != 0) goto err_unset;
5749 
5750  reg->num_call = scan_env.num_call;
5751  }
5752  else
5753  reg->num_call = 0;
5754 #endif
5755 
5756  r = setup_tree(root, reg, IN_ROOT, &scan_env);
5757  if (r != 0) goto err_unset;
5758 
5759 #ifdef ONIG_DEBUG_PARSE_TREE
5760  if (!onig_is_prelude()) print_tree(stderr, root);
5761 #endif
5762 
5763  reg->capture_history = scan_env.capture_history;
5764  reg->bt_mem_start = scan_env.bt_mem_start;
5765  reg->bt_mem_start |= reg->capture_history;
5766  if (IS_FIND_CONDITION(reg->options))
5768  else {
5769  reg->bt_mem_end = scan_env.bt_mem_end;
5770  reg->bt_mem_end |= reg->capture_history;
5771  }
5772 
5773 #ifdef USE_COMBINATION_EXPLOSION_CHECK
5774  if (scan_env.backrefed_mem == 0
5775 #ifdef USE_SUBEXP_CALL
5776  || scan_env.num_call == 0
5777 #endif
5778  ) {
5779  setup_comb_exp_check(root, 0, &scan_env);
5780 #ifdef USE_SUBEXP_CALL
5781  if (scan_env.has_recursion != 0) {
5782  scan_env.num_comb_exp_check = 0;
5783  }
5784  else
5785 #endif
5786  if (scan_env.comb_exp_max_regnum > 0) {
5787  int i;
5788  for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
5789  if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
5790  scan_env.num_comb_exp_check = 0;
5791  break;
5792  }
5793  }
5794  }
5795  }
5796 
5797  reg->num_comb_exp_check = scan_env.num_comb_exp_check;
5798 #endif
5799 
5800  clear_optimize_info(reg);
5801 #ifndef ONIG_DONT_OPTIMIZE
5802  r = set_optimize_info_from_tree(root, reg, &scan_env);
5803  if (r != 0) goto err_unset;
5804 #endif
5805 
5806  if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
5807  xfree(scan_env.mem_nodes_dynamic);
5808  scan_env.mem_nodes_dynamic = (Node** )NULL;
5809  }
5810 
5811  r = compile_tree(root, reg);
5812  if (r == 0) {
5813  r = add_opcode(reg, OP_END);
5814 #ifdef USE_SUBEXP_CALL
5815  if (scan_env.num_call > 0) {
5816  r = unset_addr_list_fix(&uslist, reg);
5817  unset_addr_list_end(&uslist);
5818  if (r) goto err;
5819  }
5820 #endif
5821 
5822  if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
5824  else {
5825  if (reg->bt_mem_start != 0)
5827  else
5829  }
5830  }
5831 #ifdef USE_SUBEXP_CALL
5832  else if (scan_env.num_call > 0) {
5833  unset_addr_list_end(&uslist);
5834  }
5835 #endif
5836  onig_node_free(root);
5837 
5838 #ifdef ONIG_DEBUG_COMPILE
5839 #ifdef USE_NAMED_GROUP
5840  if (!onig_is_prelude()) onig_print_names(stderr, reg);
5841 #endif
5842  if (!onig_is_prelude()) print_compiled_byte_code_list(stderr, reg);
5843 #endif
5844 
5845  end:
5846  reg->state = ONIG_STATE_NORMAL;
5847  return r;
5848 
5849  err_unset:
5850 #ifdef USE_SUBEXP_CALL
5851  if (scan_env.num_call > 0) {
5852  unset_addr_list_end(&uslist);
5853  }
5854 #endif
5855  err:
5856  if (IS_NOT_NULL(scan_env.error)) {
5857  if (IS_NOT_NULL(einfo)) {
5858  einfo->enc = scan_env.enc;
5859  einfo->par = scan_env.error;
5860  einfo->par_end = scan_env.error_end;
5861  }
5862  }
5863 
5864  onig_node_free(root);
5865  if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
5866  xfree(scan_env.mem_nodes_dynamic);
5867  return r;
5868 }
5869 
5870 #ifdef USE_RECOMPILE_API
5871 extern int
5872 onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5873  OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
5874  OnigErrorInfo* einfo)
5875 {
5876  int r;
5877  regex_t *new_reg;
5878 
5879  r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo);
5880  if (r) return r;
5881  if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
5882  onig_transfer(reg, new_reg);
5883  }
5884  else {
5885  onig_chain_link_add(reg, new_reg);
5886  }
5887  return 0;
5888 }
5889 #endif
5890 
5891 static int onig_inited = 0;
5892 
5893 extern int
5895  OnigCaseFoldType case_fold_flag,
5896  OnigEncoding enc, const OnigSyntaxType* syntax)
5897 {
5898  if (! onig_inited)
5899  onig_init();
5900 
5901  if (IS_NULL(reg))
5902  return ONIGERR_INVALID_ARGUMENT;
5903 
5904  if (ONIGENC_IS_UNDEF(enc))
5906 
5910  }
5911 
5912  (reg)->state = ONIG_STATE_MODIFY;
5913 
5914  if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
5915  option |= syntax->options;
5916  option &= ~ONIG_OPTION_SINGLELINE;
5917  }
5918  else
5919  option |= syntax->options;
5920 
5921  (reg)->enc = enc;
5922  (reg)->options = option;
5923  (reg)->syntax = syntax;
5924  (reg)->optimize = 0;
5925  (reg)->exact = (UChar* )NULL;
5926  (reg)->int_map = (int* )NULL;
5927  (reg)->int_map_backward = (int* )NULL;
5928  (reg)->chain = (regex_t* )NULL;
5929 
5930  (reg)->p = (UChar* )NULL;
5931  (reg)->alloc = 0;
5932  (reg)->used = 0;
5933  (reg)->name_table = (void* )NULL;
5934 
5935  (reg)->case_fold_flag = case_fold_flag;
5936  return 0;
5937 }
5938 
5939 extern int
5940 onig_new_without_alloc(regex_t* reg, const UChar* pattern,
5941  const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
5942  OnigSyntaxType* syntax, OnigErrorInfo* einfo)
5943 {
5944  int r;
5945 
5946  r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
5947  if (r) return r;
5948 
5949  r = onig_compile(reg, pattern, pattern_end, einfo, NULL, 0);
5950  return r;
5951 }
5952 
5953 extern int
5954 onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
5955  OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax,
5956  OnigErrorInfo* einfo)
5957 {
5958  int r;
5959 
5960  *reg = (regex_t* )xmalloc(sizeof(regex_t));
5961  if (IS_NULL(*reg)) return ONIGERR_MEMORY;
5962 
5963  r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
5964  if (r) goto err;
5965 
5966  r = onig_compile(*reg, pattern, pattern_end, einfo, NULL, 0);
5967  if (r) {
5968  err:
5969  onig_free(*reg);
5970  *reg = NULL;
5971  }
5972  return r;
5973 }
5974 
5975 
5976 extern int
5978 {
5979  if (onig_inited != 0)
5980  return 0;
5981 
5984 
5985  onig_inited = 1;
5986 
5987  onigenc_init();
5988  /* onigenc_set_default_caseconv_table((UChar* )0); */
5989 
5990 #ifdef ONIG_DEBUG_STATISTICS
5991  onig_statistics_init();
5992 #endif
5993 
5995  return 0;
5996 }
5997 
5998 
5999 extern int
6001 {
6003 
6004 #ifdef ONIG_DEBUG_STATISTICS
6005  if (!onig_is_prelude()) onig_print_statistics(stderr);
6006 #endif
6007 
6008 #ifdef USE_SHARED_CCLASS_TABLE
6010 #endif
6011 
6012 #ifdef USE_PARSE_TREE_NODE_RECYCLE
6014 #endif
6015 
6016  onig_inited = 0;
6017 
6020  return 0;
6021 }
6022 
6023 extern int
6025 {
6026  OnigCodePoint n, *data;
6027  OnigCodePoint low, high, x;
6028 
6029  GET_CODE_POINT(n, p);
6030  data = (OnigCodePoint* )p;
6031  data++;
6032 
6033  for (low = 0, high = n; low < high; ) {
6034  x = (low + high) >> 1;
6035  if (code > data[x * 2 + 1])
6036  low = x + 1;
6037  else
6038  high = x;
6039  }
6040 
6041  return ((low < n && code >= data[low * 2]) ? 1 : 0);
6042 }
6043 
6044 extern int
6046 {
6047  int found;
6048 
6049  if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
6050  if (IS_NULL(cc->mbuf)) {
6051  found = 0;
6052  }
6053  else {
6054  found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
6055  }
6056  }
6057  else {
6058  found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
6059  }
6060 
6061  if (IS_NCCLASS_NOT(cc))
6062  return !found;
6063  else
6064  return found;
6065 }
6066 
6067 extern int
6069 {
6070  int len;
6071 
6072  if (ONIGENC_MBC_MINLEN(enc) > 1) {
6073  len = 2;
6074  }
6075  else {
6076  len = ONIGENC_CODE_TO_MBCLEN(enc, code);
6077  }
6078  return onig_is_code_in_cc_len(len, code, cc);
6079 }
6080 
6081 
6082 #ifdef ONIG_DEBUG
6083 
6084 /* arguments type */
6085 #define ARG_SPECIAL -1
6086 #define ARG_NON 0
6087 #define ARG_RELADDR 1
6088 #define ARG_ABSADDR 2
6089 #define ARG_LENGTH 3
6090 #define ARG_MEMNUM 4
6091 #define ARG_OPTION 5
6092 #define ARG_STATE_CHECK 6
6093 
6094 OnigOpInfoType OnigOpInfo[] = {
6095  { OP_FINISH, "finish", ARG_NON },
6096  { OP_END, "end", ARG_NON },
6097  { OP_EXACT1, "exact1", ARG_SPECIAL },
6098  { OP_EXACT2, "exact2", ARG_SPECIAL },
6099  { OP_EXACT3, "exact3", ARG_SPECIAL },
6100  { OP_EXACT4, "exact4", ARG_SPECIAL },
6101  { OP_EXACT5, "exact5", ARG_SPECIAL },
6102  { OP_EXACTN, "exactn", ARG_SPECIAL },
6103  { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL },
6104  { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL },
6105  { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL },
6106  { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL },
6107  { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL },
6108  { OP_EXACTMBN, "exactmbn", ARG_SPECIAL },
6109  { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL },
6110  { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL },
6111  { OP_CCLASS, "cclass", ARG_SPECIAL },
6112  { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL },
6113  { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL },
6114  { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL },
6115  { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL },
6116  { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL },
6117  { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL },
6118  { OP_ANYCHAR, "anychar", ARG_NON },
6119  { OP_ANYCHAR_ML, "anychar-ml", ARG_NON },
6120  { OP_ANYCHAR_STAR, "anychar*", ARG_NON },
6121  { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON },
6122  { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
6123  { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
6124  { OP_WORD, "word", ARG_NON },
6125  { OP_NOT_WORD, "not-word", ARG_NON },
6126  { OP_WORD_BOUND, "word-bound", ARG_NON },
6127  { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON },
6128  { OP_WORD_BEGIN, "word-begin", ARG_NON },
6129  { OP_WORD_END, "word-end", ARG_NON },
6130  { OP_ASCII_WORD, "ascii-word", ARG_NON },
6131  { OP_NOT_ASCII_WORD, "not-ascii-word", ARG_NON },
6132  { OP_ASCII_WORD_BOUND, "ascii-word-bound", ARG_NON },
6133  { OP_NOT_ASCII_WORD_BOUND,"not-ascii-word-bound", ARG_NON },
6134  { OP_ASCII_WORD_BEGIN, "ascii-word-begin", ARG_NON },
6135  { OP_ASCII_WORD_END, "ascii-word-end", ARG_NON },
6136  { OP_BEGIN_BUF, "begin-buf", ARG_NON },
6137  { OP_END_BUF, "end-buf", ARG_NON },
6138  { OP_BEGIN_LINE, "begin-line", ARG_NON },
6139  { OP_END_LINE, "end-line", ARG_NON },
6140  { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
6141  { OP_BEGIN_POSITION, "begin-position", ARG_NON },
6142  { OP_BEGIN_POS_OR_LINE, "begin-pos-or-line", ARG_NON },
6143  { OP_BACKREF1, "backref1", ARG_NON },
6144  { OP_BACKREF2, "backref2", ARG_NON },
6145  { OP_BACKREFN, "backrefn", ARG_MEMNUM },
6146  { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },
6147  { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
6148  { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL },
6149  { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL },
6150  { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
6151  { OP_MEMORY_START, "mem-start", ARG_MEMNUM },
6152  { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
6153  { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },
6154  { OP_MEMORY_END, "mem-end", ARG_MEMNUM },
6155  { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },
6156  { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },
6157  { OP_SET_OPTION, "set-option", ARG_OPTION },
6158  { OP_KEEP, "keep", ARG_NON },
6159  { OP_FAIL, "fail", ARG_NON },
6160  { OP_JUMP, "jump", ARG_RELADDR },
6161  { OP_PUSH, "push", ARG_RELADDR },
6162  { OP_POP, "pop", ARG_NON },
6163  { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
6164  { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
6165  { OP_REPEAT, "repeat", ARG_SPECIAL },
6166  { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
6167  { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
6168  { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
6169  { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },
6170  { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },
6171  { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM },
6172  { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },
6173  { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },
6174  { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM },
6175  { OP_PUSH_POS, "push-pos", ARG_NON },
6176  { OP_POP_POS, "pop-pos", ARG_NON },
6177  { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },
6178  { OP_FAIL_POS, "fail-pos", ARG_NON },
6179  { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON },
6180  { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON },
6181  { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
6182  { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
6183  { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
6184  { OP_CALL, "call", ARG_ABSADDR },
6185  { OP_RETURN, "return", ARG_NON },
6186  { OP_CONDITION, "condition", ARG_SPECIAL },
6187  { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL },
6188  { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
6189  { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK },
6190  { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK },
6192  "state-check-anychar-ml*", ARG_STATE_CHECK },
6193  { -1, "", ARG_NON }
6194 };
6195 
6196 static const char*
6197 op2name(int opcode)
6198 {
6199  int i;
6200 
6201  for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6202  if (opcode == OnigOpInfo[i].opcode)
6203  return OnigOpInfo[i].name;
6204  }
6205  return "";
6206 }
6207 
6208 static int
6209 op2arg_type(int opcode)
6210 {
6211  int i;
6212 
6213  for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6214  if (opcode == OnigOpInfo[i].opcode)
6215  return OnigOpInfo[i].arg_type;
6216  }
6217  return ARG_SPECIAL;
6218 }
6219 
6220 static void
6221 Indent(FILE* f, int indent)
6222 {
6223  int i;
6224  for (i = 0; i < indent; i++) putc(' ', f);
6225 }
6226 
6227 static void
6228 p_string(FILE* f, int len, UChar* s)
6229 {
6230  fputs(":", f);
6231  while (len-- > 0) { fputc(*s++, f); }
6232 }
6233 
6234 static void
6235 p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
6236 {
6237  int x = len * mb_len;
6238 
6239  fprintf(f, ":%d:", len);
6240  while (x-- > 0) { fputc(*s++, f); }
6241 }
6242 
6243 extern void
6244 onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
6245  OnigEncoding enc)
6246 {
6247  int i, n, arg_type;
6248  RelAddrType addr;
6249  LengthType len;
6250  MemNumType mem;
6251  StateCheckNumType scn;
6252  OnigCodePoint code;
6253  UChar *q;
6254 
6255  fprintf(f, "[%s", op2name(*bp));
6256  arg_type = op2arg_type(*bp);
6257  if (arg_type != ARG_SPECIAL) {
6258  bp++;
6259  switch (arg_type) {
6260  case ARG_NON:
6261  break;
6262  case ARG_RELADDR:
6263  GET_RELADDR_INC(addr, bp);
6264  fprintf(f, ":(%d)", addr);
6265  break;
6266  case ARG_ABSADDR:
6267  GET_ABSADDR_INC(addr, bp);
6268  fprintf(f, ":(%d)", addr);
6269  break;
6270  case ARG_LENGTH:
6271  GET_LENGTH_INC(len, bp);
6272  fprintf(f, ":%d", len);
6273  break;
6274  case ARG_MEMNUM:
6275  mem = *((MemNumType* )bp);
6276  bp += SIZE_MEMNUM;
6277  fprintf(f, ":%d", mem);
6278  break;
6279  case ARG_OPTION:
6280  {
6281  OnigOptionType option = *((OnigOptionType* )bp);
6282  bp += SIZE_OPTION;
6283  fprintf(f, ":%d", option);
6284  }
6285  break;
6286 
6287  case ARG_STATE_CHECK:
6288  scn = *((StateCheckNumType* )bp);
6289  bp += SIZE_STATE_CHECK_NUM;
6290  fprintf(f, ":%d", scn);
6291  break;
6292  }
6293  }
6294  else {
6295  switch (*bp++) {
6296  case OP_EXACT1:
6299  p_string(f, 1, bp++); break;
6300  case OP_EXACT2:
6301  p_string(f, 2, bp); bp += 2; break;
6302  case OP_EXACT3:
6303  p_string(f, 3, bp); bp += 3; break;
6304  case OP_EXACT4:
6305  p_string(f, 4, bp); bp += 4; break;
6306  case OP_EXACT5:
6307  p_string(f, 5, bp); bp += 5; break;
6308  case OP_EXACTN:
6309  GET_LENGTH_INC(len, bp);
6310  p_len_string(f, len, 1, bp);
6311  bp += len;
6312  break;
6313 
6314  case OP_EXACTMB2N1:
6315  p_string(f, 2, bp); bp += 2; break;
6316  case OP_EXACTMB2N2:
6317  p_string(f, 4, bp); bp += 4; break;
6318  case OP_EXACTMB2N3:
6319  p_string(f, 6, bp); bp += 6; break;
6320  case OP_EXACTMB2N:
6321  GET_LENGTH_INC(len, bp);
6322  p_len_string(f, len, 2, bp);
6323  bp += len * 2;
6324  break;
6325  case OP_EXACTMB3N:
6326  GET_LENGTH_INC(len, bp);
6327  p_len_string(f, len, 3, bp);
6328  bp += len * 3;
6329  break;
6330  case OP_EXACTMBN:
6331  {
6332  int mb_len;
6333 
6334  GET_LENGTH_INC(mb_len, bp);
6335  GET_LENGTH_INC(len, bp);
6336  fprintf(f, ":%d:%d:", mb_len, len);
6337  n = len * mb_len;
6338  while (n-- > 0) { fputc(*bp++, f); }
6339  }
6340  break;
6341 
6342  case OP_EXACT1_IC:
6343  len = enclen(enc, bp, bpend);
6344  p_string(f, len, bp);
6345  bp += len;
6346  break;
6347  case OP_EXACTN_IC:
6348  GET_LENGTH_INC(len, bp);
6349  p_len_string(f, len, 1, bp);
6350  bp += len;
6351  break;
6352 
6353  case OP_CCLASS:
6354  n = bitset_on_num((BitSetRef )bp);
6355  bp += SIZE_BITSET;
6356  fprintf(f, ":%d", n);
6357  break;
6358 
6359  case OP_CCLASS_NOT:
6360  n = bitset_on_num((BitSetRef )bp);
6361  bp += SIZE_BITSET;
6362  fprintf(f, ":%d", n);
6363  break;
6364 
6365  case OP_CCLASS_MB:
6366  case OP_CCLASS_MB_NOT:
6367  GET_LENGTH_INC(len, bp);
6368  q = bp;
6369 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6370  ALIGNMENT_RIGHT(q);
6371 #endif
6372  GET_CODE_POINT(code, q);
6373  bp += len;
6374  fprintf(f, ":%d:%d", (int )code, len);
6375  break;
6376 
6377  case OP_CCLASS_MIX:
6378  case OP_CCLASS_MIX_NOT:
6379  n = bitset_on_num((BitSetRef )bp);
6380  bp += SIZE_BITSET;
6381  GET_LENGTH_INC(len, bp);
6382  q = bp;
6383 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6384  ALIGNMENT_RIGHT(q);
6385 #endif
6386  GET_CODE_POINT(code, q);
6387  bp += len;
6388  fprintf(f, ":%d:%d:%d", n, (int )code, len);
6389  break;
6390 
6391  case OP_CCLASS_NODE:
6392  {
6393  CClassNode *cc;
6394 
6395  GET_POINTER_INC(cc, bp);
6396  n = bitset_on_num(cc->bs);
6397  fprintf(f, ":%"PRIuPTR":%d", (uintptr_t)cc, n);
6398  }
6399  break;
6400 
6401  case OP_BACKREFN_IC:
6402  mem = *((MemNumType* )bp);
6403  bp += SIZE_MEMNUM;
6404  fprintf(f, ":%d", mem);
6405  break;
6406 
6407  case OP_BACKREF_MULTI_IC:
6408  case OP_BACKREF_MULTI:
6409  fputs(" ", f);
6410  GET_LENGTH_INC(len, bp);
6411  for (i = 0; i < len; i++) {
6412  GET_MEMNUM_INC(mem, bp);
6413  if (i > 0) fputs(", ", f);
6414  fprintf(f, "%d", mem);
6415  }
6416  break;
6417 
6418  case OP_BACKREF_WITH_LEVEL:
6419  {
6420  OnigOptionType option;
6421  LengthType level;
6422 
6423  GET_OPTION_INC(option, bp);
6424  fprintf(f, ":%d", option);
6425  GET_LENGTH_INC(level, bp);
6426  fprintf(f, ":%d", level);
6427 
6428  fputs(" ", f);
6429  GET_LENGTH_INC(len, bp);
6430  for (i = 0; i < len; i++) {
6431  GET_MEMNUM_INC(mem, bp);
6432  if (i > 0) fputs(", ", f);
6433  fprintf(f, "%d", mem);
6434  }
6435  }
6436  break;
6437 
6438  case OP_REPEAT:
6439  case OP_REPEAT_NG:
6440  {
6441  mem = *((MemNumType* )bp);
6442  bp += SIZE_MEMNUM;
6443  addr = *((RelAddrType* )bp);
6444  bp += SIZE_RELADDR;
6445  fprintf(f, ":%d:%d", mem, addr);
6446  }
6447  break;
6448 
6450  case OP_PUSH_IF_PEEK_NEXT:
6451  addr = *((RelAddrType* )bp);
6452  bp += SIZE_RELADDR;
6453  fprintf(f, ":(%d)", addr);
6454  p_string(f, 1, bp);
6455  bp += 1;
6456  break;
6457 
6458  case OP_LOOK_BEHIND:
6459  GET_LENGTH_INC(len, bp);
6460  fprintf(f, ":%d", len);
6461  break;
6462 
6464  GET_RELADDR_INC(addr, bp);
6465  GET_LENGTH_INC(len, bp);
6466  fprintf(f, ":%d:(%d)", len, addr);
6467  break;
6468 
6469  case OP_STATE_CHECK_PUSH:
6471  scn = *((StateCheckNumType* )bp);
6472  bp += SIZE_STATE_CHECK_NUM;
6473  addr = *((RelAddrType* )bp);
6474  bp += SIZE_RELADDR;
6475  fprintf(f, ":%d:(%d)", scn, addr);
6476  break;
6477 
6478  case OP_CONDITION:
6479  GET_MEMNUM_INC(mem, bp);
6480  GET_RELADDR_INC(addr, bp);
6481  fprintf(f, ":%d:(%d)", mem, addr);
6482  break;
6483 
6484  default:
6485  fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
6486  *--bp);
6487  }
6488  }
6489  fputs("]", f);
6490  if (nextp) *nextp = bp;
6491 }
6492 
6493 static void
6494 print_compiled_byte_code_list(FILE* f, regex_t* reg)
6495 {
6496  int ncode;
6497  UChar* bp = reg->p;
6498  UChar* end = reg->p + reg->used;
6499 
6500  fprintf(f, "code length: %d", reg->used);
6501 
6502  ncode = -1;
6503  while (bp < end) {
6504  ncode++;
6505  if (ncode % 5 == 0)
6506  fprintf(f, "\n%ld:", bp - reg->p);
6507  else
6508  fprintf(f, " %ld:", bp - reg->p);
6509  onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc);
6510  }
6511 
6512  fprintf(f, "\n");
6513 }
6514 
6515 static void
6516 print_indent_tree(FILE* f, Node* node, int indent)
6517 {
6518  int i, type, container_p = 0;
6519  int add = 3;
6520  UChar* p;
6521 
6522  Indent(f, indent);
6523  if (IS_NULL(node)) {
6524  fprintf(f, "ERROR: null node!!!\n");
6525  exit (0);
6526  }
6527 
6528  type = NTYPE(node);
6529  switch (type) {
6530  case NT_LIST:
6531  case NT_ALT:
6532  if (NTYPE(node) == NT_LIST)
6533  fprintf(f, "<list:%"PRIxPTR">\n", (intptr_t)node);
6534  else
6535  fprintf(f, "<alt:%"PRIxPTR">\n", (intptr_t)node);
6536 
6537  print_indent_tree(f, NCAR(node), indent + add);
6538  while (IS_NOT_NULL(node = NCDR(node))) {
6539  if (NTYPE(node) != type) {
6540  fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
6541  exit(0);
6542  }
6543  print_indent_tree(f, NCAR(node), indent + add);
6544  }
6545  break;
6546 
6547  case NT_STR:
6548  fprintf(f, "<string%s:%"PRIxPTR">",
6549  (NSTRING_IS_RAW(node) ? "-raw" : ""), (intptr_t)node);
6550  for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
6551  if (*p >= 0x20 && *p < 0x7f)
6552  fputc(*p, f);
6553  else {
6554  fprintf(f, " 0x%02x", *p);
6555  }
6556  }
6557  break;
6558 
6559  case NT_CCLASS:
6560  fprintf(f, "<cclass:%"PRIxPTR">", (intptr_t)node);
6561  if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f);
6562  if (NCCLASS(node)->mbuf) {
6563  BBuf* bbuf = NCCLASS(node)->mbuf;
6564  for (i = 0; i < (int )bbuf->used; i++) {
6565  if (i > 0) fprintf(f, ",");
6566  fprintf(f, "%0x", bbuf->p[i]);
6567  }
6568  }
6569  break;
6570 
6571  case NT_CTYPE:
6572  fprintf(f, "<ctype:%"PRIxPTR"> ", (intptr_t)node);
6573  switch (NCTYPE(node)->ctype) {
6574  case ONIGENC_CTYPE_WORD:
6575  if (NCTYPE(node)->not != 0)
6576  fputs("not word", f);
6577  else
6578  fputs("word", f);
6579  break;
6580 
6581  default:
6582  fprintf(f, "ERROR: undefined ctype.\n");
6583  exit(0);
6584  }
6585  break;
6586 
6587  case NT_CANY:
6588  fprintf(f, "<anychar:%"PRIxPTR">", (intptr_t)node);
6589  break;
6590 
6591  case NT_ANCHOR:
6592  fprintf(f, "<anchor:%"PRIxPTR"> ", (intptr_t)node);
6593  switch (NANCHOR(node)->type) {
6594  case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;
6595  case ANCHOR_END_BUF: fputs("end buf", f); break;
6596  case ANCHOR_BEGIN_LINE: fputs("begin line", f); break;
6597  case ANCHOR_END_LINE: fputs("end line", f); break;
6598  case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break;
6599  case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
6600  case ANCHOR_ANYCHAR_STAR: fputs("begin position/line", f); break;
6601 
6602  case ANCHOR_WORD_BOUND: fputs("word bound", f); break;
6603  case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break;
6604 #ifdef USE_WORD_BEGIN_END
6605  case ANCHOR_WORD_BEGIN: fputs("word begin", f); break;
6606  case ANCHOR_WORD_END: fputs("word end", f); break;
6607 #endif
6608  case ANCHOR_PREC_READ: fputs("prec read", f); container_p = TRUE; break;
6609  case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); container_p = TRUE; break;
6610  case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); container_p = TRUE; break;
6611  case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); container_p = TRUE; break;
6612  case ANCHOR_KEEP: fputs("keep",f); break;
6613 
6614  default:
6615  fprintf(f, "ERROR: undefined anchor type.\n");
6616  break;
6617  }
6618  break;
6619 
6620  case NT_BREF:
6621  {
6622  int* p;
6623  BRefNode* br = NBREF(node);
6624  p = BACKREFS_P(br);
6625  fprintf(f, "<backref:%"PRIxPTR">", (intptr_t)node);
6626  for (i = 0; i < br->back_num; i++) {
6627  if (i > 0) fputs(", ", f);
6628  fprintf(f, "%d", p[i]);
6629  }
6630  }
6631  break;
6632 
6633 #ifdef USE_SUBEXP_CALL
6634  case NT_CALL:
6635  {
6636  CallNode* cn = NCALL(node);
6637  fprintf(f, "<call:%"PRIxPTR">", (intptr_t)node);
6638  p_string(f, cn->name_end - cn->name, cn->name);
6639  }
6640  break;
6641 #endif
6642 
6643  case NT_QTFR:
6644  fprintf(f, "<quantifier:%"PRIxPTR">{%d,%d}%s\n", (intptr_t)node,
6645  NQTFR(node)->lower, NQTFR(node)->upper,
6646  (NQTFR(node)->greedy ? "" : "?"));
6647  print_indent_tree(f, NQTFR(node)->target, indent + add);
6648  break;
6649 
6650  case NT_ENCLOSE:
6651  fprintf(f, "<enclose:%"PRIxPTR"> ", (intptr_t)node);
6652  switch (NENCLOSE(node)->type) {
6653  case ENCLOSE_OPTION:
6654  fprintf(f, "option:%d", NENCLOSE(node)->option);
6655  break;
6656  case ENCLOSE_MEMORY:
6657  fprintf(f, "memory:%d", NENCLOSE(node)->regnum);
6658  break;
6660  fprintf(f, "stop-bt");
6661  break;
6662  case ENCLOSE_CONDITION:
6663  fprintf(f, "condition:%d", NENCLOSE(node)->regnum);
6664  break;
6665 
6666  default:
6667  break;
6668  }
6669  fprintf(f, "\n");
6670  print_indent_tree(f, NENCLOSE(node)->target, indent + add);
6671  break;
6672 
6673  default:
6674  fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
6675  break;
6676  }
6677 
6678  if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
6679  type != NT_ENCLOSE)
6680  fprintf(f, "\n");
6681 
6682  if (container_p) print_indent_tree(f, NANCHOR(node)->target, indent + add);
6683 
6684  fflush(f);
6685 }
6686 #endif /* ONIG_DEBUG */
6687 
6688 #ifdef ONIG_DEBUG_PARSE_TREE
6689 static void
6690 print_tree(FILE* f, Node* node)
6691 {
6692  print_indent_tree(f, node, 0);
6693 }
6694 #endif
#define SIZE_OP_SET_OPTION_PUSH
Definition: regint.h:691
void onig_transfer(regex_t *to, regex_t *from)
Definition: regcomp.c:5623
#define ANCHOR_ANYCHAR_STAR_ML
Definition: regint.h:517
#define SIZE_OP_MEMORY_END_PUSH_REC
Definition: regint.h:696
#define IS_DYNAMIC_OPTION(option)
Definition: regint.h:382
#define NSTRING_SET_AMBIG(node)
Definition: regparse.h:108
#define BIT_STATUS_AT(stats, n)
Definition: regint.h:337
#define IS_ENCLOSE_CALLED(en)
Definition: regparse.h:144
void onig_scan_env_set_error_string(ScanEnv *env, int ecode ARG_UNUSED, UChar *arg, UChar *arg_end)
Definition: regparse.c:6323
int onig_new_without_alloc(regex_t *reg, const UChar *pattern, const UChar *pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType *syntax, OnigErrorInfo *einfo)
Definition: regcomp.c:5940
static int add_bitset(regex_t *reg, BitSetRef bs)
Definition: regcomp.c:298
static void concat_opt_exact_info_str(OptExactInfo *to, UChar *s, UChar *end, int raw ARG_UNUSED, OnigEncoding enc)
Definition: regcomp.c:4597
int is_refered
Definition: regparse.h:186
#define ONIGERR_INVALID_CONDITION_PATTERN
Definition: oniguruma.h:563
unsigned int OnigCodePoint
Definition: oniguruma.h:114
#define IS_NULL(p)
Definition: regint.h:278
#define ENCLOSE_MEMORY
Definition: regparse.h:92
#define ONIGENC_CASE_FOLD_DEFAULT
Definition: oniguruma.h:131
unsigned int alloc
Definition: regint.h:423
int onig_new(regex_t **reg, const UChar *pattern, const UChar *pattern_end, OnigOptionType option, OnigEncoding enc, const OnigSyntaxType *syntax, OnigErrorInfo *einfo)
Definition: regcomp.c:5954
#define OPT_EXACT_MAXLEN
Definition: regcomp.c:4294
int * int_map_backward
Definition: oniguruma.h:694
#define IS_REPEAT_INFINITE(n)
Definition: regint.h:388
UChar * end
Definition: regparse.h:170
int onig_free_node_list(void)
Definition: regparse.c:1110
#define ALLOWED_ANCHOR_IN_LB_NOT
#define ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL
Definition: oniguruma.h:583
int onig_node_str_cat(Node *node, const UChar *s, const UChar *end)
Definition: regparse.c:1443
int regnum
Definition: regparse.h:196
Node * onig_node_list_add(Node *list, Node *x)
Definition: regparse.c:1259
#define IS_SYNTAX_BV(syn, bvm)
Definition: regparse.h:326
static void alt_merge_opt_exact_info(OptExactInfo *to, OptExactInfo *add, OptEnv *env)
Definition: regcomp.c:4614
static int get_char_length_tree(Node *node, regex_t *reg, int *len)
Definition: regcomp.c:2512
static void concat_opt_anc_info(OptAncInfo *to, OptAncInfo *left, OptAncInfo *right, OnigDistance left_len, OnigDistance right_len)
Definition: regcomp.c:4484
struct _Node * target
Definition: regparse.h:226
#define ONIGENC_IS_CODE_WORD(enc, code)
Definition: oniguruma.h:302
#define NSTRING_IS_RAW(node)
Definition: regparse.h:111
void onig_print_compiled_byte_code(FILE *f, UChar *bp, UChar *bpend, UChar **nextp, OnigEncoding enc)
#define WORD_ALIGNMENT_SIZE
Definition: regint.h:301
#define ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM
Definition: oniguruma.h:135
int i
Definition: win32ole.c:784
#define BIT_STATUS_ON_AT(stats, n)
Definition: regint.h:340
#define ONIG_OPTION_SINGLELINE
Definition: oniguruma.h:358
#define ONIG_OPTIMIZE_EXACT
Definition: regint.h:323
#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION
#define ANCHOR_END_BUF
Definition: regint.h:503
static int compile_length_quantifier_node(QtfrNode *qn, regex_t *reg)
Definition: regcomp.c:975
#define SIZE_OPCODE
Definition: regint.h:647
static int max(int a, int b)
Definition: strftime.c:141
#define BBUF_ADD1(buf, byte)
Definition: regint.h:465
#define d1
if(dispIdMember==DISPID_VALUE)
Definition: win32ole.c:791
#define ANCHOR_WORD_BEGIN
Definition: regint.h:509
#define SINGLE_BYTE_SIZE
Definition: regint.h:392
#define SIZE_MEMNUM
Definition: regint.h:651
#define IS_ENCLOSE_RECURSION(en)
Definition: regparse.h:146
#define SIZE_OP_REPEAT_INC
Definition: regint.h:684
int num_call
Definition: regparse.h:301
#define NSTRING_IS_AMBIG(node)
Definition: regparse.h:112
static int compile_call(CallNode *node, regex_t *reg)
Definition: regcomp.c:401
#define BBUF_WRITE(buf, pos, bytes, n)
Definition: regint.h:450
static int add_multi_byte_cclass(BBuf *mbuf, regex_t *reg)
Definition: regcomp.c:563
static int get_char_length_tree1(Node *node, regex_t *reg, int *len, int level)
Definition: regcomp.c:2388
MinMaxLen len
Definition: regcomp.c:4333
OnigUChar * par_end
Definition: oniguruma.h:635
unsigned int OnigCaseFoldType
Definition: oniguruma.h:121
#define ONIGENC_MBC_CASE_FOLD(enc, flag, pp, end, buf)
Definition: oniguruma.h:234
static void swap_node(Node *a, Node *b)
Definition: regcomp.c:71
int sourceline
Definition: regparse.h:320
unsigned int bt_mem_end
Definition: oniguruma.h:672
#define REGEX_CHAIN_HEAD(reg)
Definition: regcomp.c:5630
#define NST_CALLED
Definition: regparse.h:133
#define IS_ENCLOSE_MAX_FIXED(en)
Definition: regparse.h:150
static int comp_distance_value(MinMaxLen *d1, MinMaxLen *d2, int v1, int v2)
Definition: regcomp.c:4398
#define SCANENV_MEM_NODES(senv)
Definition: regparse.h:283
size_t onig_memsize(const regex_t *reg)
Definition: regcomp.c:5592
#define THREAD_SYSTEM_END
Definition: regint.h:117
static int add_bytes(regex_t *reg, UChar *bytes, OnigDistance len)
Definition: regcomp.c:291
#define IS_CODE_SB_WORD(enc, code)
Definition: regint.h:844
int onig_names_free(regex_t *reg)
Definition: regparse.c:486
#define ANCHOR_BEGIN_LINE
Definition: regint.h:501
OnigPosition * end
Definition: oniguruma.h:616
const int id
Definition: nkf.c:209
#define SIZE_OP_CALL
Definition: regint.h:706
static void select_opt_exact_info(OnigEncoding enc, OptExactInfo *now, OptExactInfo *alt)
Definition: regcomp.c:4653
int onig_is_in_code_range(const UChar *p, OnigCodePoint code)
Definition: regcomp.c:6024
#define QUANTIFIER_EXPAND_LIMIT_SIZE
Definition: regcomp.c:735
int nest_level
Definition: regparse.h:238
UChar * error
Definition: regparse.h:298
int onig_node_str_set(Node *node, const UChar *s, const UChar *end)
Definition: regparse.c:1479
MinMaxLen mmd
Definition: regcomp.c:4325
OnigCaseFoldType onig_get_default_case_fold_flag(void)
Definition: regcomp.c:36
#define SIZE_OP_POP_POS
Definition: regint.h:688
#define IS_NCCLASS_NOT(nd)
Definition: regint.h:767
#define ONIGENC_IS_CODE_PRINT(enc, code)
Definition: oniguruma.h:280
int reach_end
Definition: regcomp.c:4318
static void concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo *to, NodeOptInfo *add)
Definition: regcomp.c:4833
#define GET_CHAR_LEN_VARLEN
Definition: regcomp.c:2383
static OnigDistance distance_add(OnigDistance d1, OnigDistance d2)
Definition: regcomp.c:96
#define NENCLOSE(node)
Definition: regparse.h:79
static int subexp_recursive_check(Node *node)
Definition: regcomp.c:2994
OptExactInfo exm
Definition: regcomp.c:4337
static int compile_length_string_raw_node(StrNode *sn, regex_t *reg)
Definition: regcomp.c:506
int target_empty_info
Definition: regparse.h:183
UChar * s
Definition: regparse.h:169
#define IS_ENCLOSE_MIN_FIXED(en)
Definition: regparse.h:149
OnigDistance anchor_dmax
Definition: oniguruma.h:688
#define ANCHOR_END_LINE
Definition: regint.h:505
#define ANCHOR_PREC_READ
Definition: regint.h:511
int opt_count
Definition: regparse.h:204
UnsetAddrList * unset_addr_list
Definition: regparse.h:303
#define IS_BACKREF_NEST_LEVEL(bn)
Definition: regparse.h:161
#define NT_QTFR
Definition: regparse.h:45
#define SIZE_ABSADDR
Definition: regint.h:649
#define ONIGERR_INVALID_COMBINATION_OF_OPTIONS
Definition: oniguruma.h:591
static int map_position_value(OnigEncoding enc, int i)
Definition: regcomp.c:4345
struct _Node * next_head_exact
Definition: regparse.h:185
int onigenc_strlen(OnigEncoding enc, const UChar *p, const UChar *end)
Definition: regenc.c:123
static void set_optimize_map_info(regex_t *reg, OptMapInfo *m)
Definition: regcomp.c:5312
#define CKN_ON
Definition: regcomp.c:736
#define GET_CODE_POINT(code, p)
Definition: regint.h:669
#define ONIGERR_NEVER_ENDING_RECURSION
Definition: oniguruma.h:584
Node * onig_node_new_alt(Node *left, Node *right)
Definition: regparse.c:1277
#define NQ_TARGET_IS_EMPTY_MEM
Definition: regparse.h:121
void * PointerType
Definition: regint.h:645
int rb_const_defined(VALUE, ID)
Definition: variable.c:2103
unsigned char map[ONIG_CHAR_TABLE_SIZE]
Definition: oniguruma.h:692
#define IS_CALL_RECURSION(cn)
Definition: regparse.h:158
#define BIT_STATUS_ON_AT_SIMPLE(stats, n)
Definition: regint.h:347
#define ALLOWED_ANCHOR_IN_LB
static int subexp_recursive_check_trav(Node *node, ScanEnv *env)
Definition: regcomp.c:3050
#define ONIGENC_CODE_TO_MBC_MAXLEN
Definition: oniguruma.h:191
#define REPEAT_RANGE_ALLOC
static int optimize_node_left(Node *node, NodeOptInfo *opt, OptEnv *env)
Definition: regcomp.c:4907
int onig_init(void)
Definition: regcomp.c:5977
static int compile_string_node(Node *node, regex_t *reg)
Definition: regcomp.c:515
#define ONIG_OPTIMIZE_EXACT_IC
Definition: regint.h:326
#define NST_MEM_BACKREFED
Definition: regparse.h:130
Definition: regint.h:599
static int add_opcode_rel_addr(regex_t *reg, int opcode, int addr)
Definition: regcomp.c:280
#define BACKREFS_P(br)
Definition: regparse.h:116
#define GET_OPTION_INC(option, p)
Definition: regint.h:664
#define NST_RECURSION
Definition: regparse.h:132
#define ONIGERR_TYPE_BUG
Definition: oniguruma.h:530
unsigned char * p
Definition: oniguruma.h:660
#define GET_LENGTH_INC(len, p)
Definition: regint.h:661
#define ANCHOR_BEGIN_POSITION
Definition: regint.h:502
UnsetAddr * us
Definition: regparse.h:217
#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL
Definition: oniguruma.h:499
#define ONIG_STATE_MODIFY
Definition: oniguruma.h:653
OnigCaseFoldType case_fold_flag
Definition: regcomp.c:4305
void onig_chain_link_add(regex_t *to, regex_t *add)
Definition: regcomp.c:5637
#define RECURSION_INFINITE
Definition: regcomp.c:2852
regex_t * reg
Definition: regparse.h:300
#define ONIGERR_INVALID_LOOK_BEHIND_PATTERN
Definition: oniguruma.h:561
#define NST_ADDR_FIXED
Definition: regparse.h:134
OptAncInfo anc
Definition: regcomp.c:4316
int left_anchor
Definition: regcomp.c:4310
unsigned int OnigOptionType
Definition: oniguruma.h:348
OnigDistance anchor_dmin
Definition: oniguruma.h:687
static int set_optimize_exact_info(regex_t *reg, OptExactInfo *e)
Definition: regcomp.c:5256
UChar buf[NODE_STR_BUF_SIZE]
Definition: regparse.h:173
#define ONIGENC_CTYPE_WORD
Definition: oniguruma.h:208
#define ANCHOR_NOT_WORD_BOUND
Definition: regint.h:508
int num_named
Definition: regparse.h:307
unsigned int bt_mem_start
Definition: oniguruma.h:671
MinMaxLen mmd
Definition: regcomp.c:4315
int char_len
Definition: regparse.h:203
#define ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, acs)
Definition: oniguruma.h:242
unsigned int BitStatusType
Definition: regint.h:332
#define BIT_STATUS_ON_ALL(stats)
Definition: regint.h:336
unsigned char * exact
Definition: oniguruma.h:690
OptAncInfo anc
Definition: regcomp.c:4335
#define SIZE_OP_ANYCHAR_STAR_PEEK_NEXT
Definition: regint.h:678
static int add_opcode_option(regex_t *reg, int opcode, OnigOptionType option)
Definition: regcomp.c:305
#define SIZE_OP_FAIL
Definition: regint.h:692
static int compile_tree_empty_check(Node *node, regex_t *reg, int empty_info)
Definition: regcomp.c:369
#define SIZE_STATE_CHECK_NUM
Definition: regint.h:652
#define IS_ENCLOSE_CLEN_FIXED(en)
Definition: regparse.h:151
#define NBREF(node)
Definition: regparse.h:77
#define NT_ALT
Definition: regparse.h:49
#define IN_ALT
Definition: regcomp.c:3834
#define head
Definition: st.c:107
static void unset_addr_list_end(UnsetAddrList *uslist)
Definition: regcomp.c:181
#define ALLOWED_ENCLOSE_IN_LB
void onig_free_body(regex_t *reg)
Definition: regcomp.c:5566
#define COMPILE_INIT_SIZE
#define BIT_STATUS_CLEAR(stats)
Definition: regint.h:335
#define SIZE_OP_MEMORY_START
Definition: regint.h:693
UnsetAddrList * unset_addr_list
Definition: regparse.h:227
#define SIZE_OPTION
Definition: regint.h:654
static int compile_cclass_node(CClassNode *cc, regex_t *reg)
Definition: regcomp.c:616
struct _Node * target
Definition: regparse.h:244
int group_num
Definition: regparse.h:223
static int setup_tree(Node *node, regex_t *reg, int state, ScanEnv *env)
Definition: regcomp.c:3849
static int compile_quantifier_node(QtfrNode *qn, regex_t *reg)
Definition: regcomp.c:1040
#define NCAR(node)
Definition: regparse.h:84
#define COMP_EM_BASE
Win32OLEIDispatch * p
Definition: win32ole.c:786
#define xalloca
Definition: regint.h:189
int onig_compile(regex_t *reg, const UChar *pattern, const UChar *pattern_end, OnigErrorInfo *einfo, const char *sourcefile, int sourceline)
Definition: regcomp.c:5671
#define SIZE_OP_PUSH_STOP_BT
Definition: regint.h:699
Definition: regint.h:420
struct _Node * head_exact
Definition: regparse.h:184
static int add_option(regex_t *reg, OnigOptionType option)
Definition: regcomp.c:273
#define ALLOWED_ENCLOSE_IN_LB_NOT
#define IS_IGNORECASE(option)
Definition: regint.h:363
static int numbered_ref_check(Node *node)
Definition: regcomp.c:1980
#define SIZE_OP_CONDITION
Definition: regint.h:708
void onig_chain_reduce(regex_t *reg)
Definition: regcomp.c:5646
BitSet bs
Definition: regint.h:779
static int is_full_opt_exact_info(OptExactInfo *ex)
Definition: regcomp.c:4545
int state
Definition: regparse.h:234
OnigRepeatRange * repeat_range
Definition: oniguruma.h:675
#define GET_MEMNUM_INC(num, p)
Definition: regint.h:662
static int set_optimize_info_from_tree(Node *node, regex_t *reg, ScanEnv *scan_env)
Definition: regcomp.c:5340
#define STACK_POP_LEVEL_MEM_START
Definition: regint.h:318
static int add_rel_addr(regex_t *reg, int addr)
Definition: regcomp.c:228
int RelAddrType
Definition: regint.h:639
#define STACK_POP_LEVEL_ALL
Definition: regint.h:319
const char * sourcefile
Definition: regparse.h:319
#define ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC
Definition: regint.h:329
static int quantifiers_memory_node_info(Node *node)
Definition: regcomp.c:2073
static int compile_length_option_node(EncloseNode *node, regex_t *reg)
Definition: regcomp.c:1168
int onig_bbuf_init(BBuf *buf, OnigDistance size)
Definition: regcomp.c:148
#define THREAD_ATOMIC_END
Definition: regint.h:119
#define SIZE_OP_NULL_CHECK_START
Definition: regint.h:701
static void add_mml(MinMaxLen *to, MinMaxLen *from)
Definition: regcomp.c:4442
#define putc(_c, _stream)
Definition: win32.h:125
#define IS_NCCLASS_SHARE(nd)
Definition: regint.h:768
#define ONIG_IS_OPTION_ON(options, option)
Definition: oniguruma.h:378
static int compile_length_tree(Node *node, regex_t *reg)
Definition: regcomp.c:1567
static int check_type_tree(Node *node, int type_mask, int enclose_mask, int anchor_mask)
Definition: regcomp.c:2801
int capa
Definition: regparse.h:172
#define GET_CHAR_LEN_TOP_ALT_VARLEN
Definition: regcomp.c:2384
#define RECURSION_EXIST
Definition: regcomp.c:2851
OnigOptionType options
Definition: regcomp.c:4304
#define SIZE_OP_POP
Definition: regint.h:681
OnigCaseFoldType case_fold_flag
Definition: oniguruma.h:680
#define MAX_NODE_OPT_INFO_REF_COUNT
Definition: regcomp.c:4904
static void set_mml(MinMaxLen *mml, OnigDistance min, OnigDistance max)
Definition: regcomp.c:4422
OnigDistance min_len
Definition: regparse.h:201
int AbsAddrType
Definition: regint.h:640
#define ONIG_CHAR_TABLE_SIZE
Definition: oniguruma.h:647
int onigenc_init(void)
Definition: regenc.c:36
#define NULL_NODE
Definition: regparse.h:280
static void clear_opt_exact_info(OptExactInfo *ex)
Definition: regcomp.c:4551
#define ONIG_OPTION_NEGATE_SINGLELINE
Definition: oniguruma.h:361
#define level
#define BBUF_INIT(buf, size)
Definition: regint.h:426
#define ONIG_STATE_COMPILING
Definition: oniguruma.h:652
static int disable_noname_group_capture(Node **root, regex_t *reg, ScanEnv *env)
Definition: regcomp.c:2011
#define SET_ENCLOSE_STATUS(node, f)
Definition: regparse.h:141
const OnigSyntaxType * syntax
Definition: regparse.h:291
#define ONIG_STATE_NORMAL
Definition: oniguruma.h:650
static void copy_mml(MinMaxLen *to, MinMaxLen *from)
Definition: regcomp.c:4435
#define ARG_UNUSED
Definition: nkf.h:181
#define IS_MULTILINE(option)
Definition: regint.h:362
static void alt_merge_mml(MinMaxLen *to, MinMaxLen *from)
Definition: regcomp.c:4458
static int expand_case_fold_string(Node *node, regex_t *reg)
Definition: regcomp.c:3545
#define ALIGNMENT_RIGHT(addr)
Definition: regint.h:309
static int is_equal_mml(MinMaxLen *a, MinMaxLen *b)
Definition: regcomp.c:4415
#define ONIGERR_INVALID_BACKREF
Definition: oniguruma.h:573
#define DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag)
Definition: regint.h:384
static int divide_look_behind_alternatives(Node *node)
Definition: regcomp.c:3238
OptAncInfo anc
Definition: regcomp.c:4326
#define add(x, y)
Definition: date_strftime.c:23
#define SIZE_OP_PUSH_LOOK_BEHIND_NOT
Definition: regint.h:704
#define NQ_TARGET_IS_EMPTY_REC
Definition: regparse.h:122
#define BITSET_AT(bs, pos)
Definition: regint.h:414
int onig_free_shared_cclass_table(void)
Definition: regparse.c:5414
int * back_dynamic
Definition: regparse.h:237
int lower
Definition: regparse.h:180
#define NCCLASS(node)
Definition: regparse.h:75
OnigCaseFoldType OnigDefaultCaseFoldFlag
Definition: regcomp.c:33
#define xmemcpy
Definition: regint.h:182
static int distance_value(MinMaxLen *mm)
Definition: regcomp.c:4369
#define CHECK_NULL_RETURN_MEMERR(p)
Definition: regint.h:281
Node * onig_node_new_enclose(int type)
Definition: regparse.c:1414
#define NCTYPE(node)
Definition: regparse.h:76
int onig_reg_init(regex_t *reg, OnigOptionType option, OnigCaseFoldType case_fold_flag, OnigEncoding enc, const OnigSyntaxType *syntax)
Definition: regcomp.c:5894
#define ONIG_OPTIMIZE_MAP
Definition: regint.h:327
#define SIZE_OP_MEMORY_END_PUSH
Definition: regint.h:695
#define SIZE_OP_RETURN
Definition: regint.h:707
#define NQ_TARGET_IS_EMPTY
Definition: regparse.h:120
size_t onig_region_memsize(const OnigRegion *regs)
Definition: regcomp.c:5607
static void clear_node_opt_info(NodeOptInfo *opt)
Definition: regcomp.c:4816
static int compile_tree_n_times(Node *node, int n, regex_t *reg)
Definition: regcomp.c:416
static void alt_merge_opt_anc_info(OptAncInfo *to, OptAncInfo *add)
Definition: regcomp.c:4538
static int setup_subexp_call(Node *node, ScanEnv *env)
Definition: regcomp.c:3119
short int StateCheckNumType
Definition: regint.h:644
#define NT_LIST
Definition: regparse.h:48
struct re_pattern_buffer * chain
Definition: oniguruma.h:699
static void clear_optimize_info(regex_t *reg)
Definition: regcomp.c:5396
int err
Definition: win32.c:87
#define NT_ANCHOR
Definition: regparse.h:47
#define ANCHOR_WORD_END
Definition: regint.h:510
#define IN_REPEAT
Definition: regcomp.c:3836
#define FOUND_CALLED_NODE
#define SIZE_OP_ANYCHAR_STAR
Definition: regint.h:677
#define ALLOWED_TYPE_IN_LB
static int unset_addr_list_init(UnsetAddrList *uslist, int size)
Definition: regcomp.c:168
#define GET_ALIGNMENT_PAD_SIZE(addr, pad_size)
Definition: regint.h:303
#define SIZE_LENGTH
Definition: regint.h:650
int upper
Definition: regparse.h:181
#define NST_CLEN_FIXED
Definition: regparse.h:127
static int compile_length_string_node(Node *node, regex_t *reg)
Definition: regcomp.c:467
static void copy_opt_anc_info(OptAncInfo *to, OptAncInfo *from)
Definition: regcomp.c:4478
static int update_string_node_case_fold(regex_t *reg, Node *node)
Definition: regcomp.c:3359
#define ONIGERR_UNDEFINED_NAME_REFERENCE
Definition: oniguruma.h:580
#define EXPAND_STRING_MAX_LENGTH
static int set_bm_skip(UChar *s, UChar *end, regex_t *reg, UChar skip[], int **int_skip, int ignore_case)
Definition: regcomp.c:4224
#define ONIGENC_CODE_TO_MBCLEN(enc, code)
Definition: oniguruma.h:269
#define SIZE_OP_MEMORY_END
Definition: regint.h:697
#define ONIG_OPTIMIZE_EXACT_BM_IC
Definition: regint.h:328
#define SIZE_OP_MEMORY_START_PUSH
Definition: regint.h:694
#define NT_STR
Definition: regparse.h:40
int type
Definition: regparse.h:243
#define NQTFR(node)
Definition: regparse.h:78
static void alt_merge_node_opt_info(NodeOptInfo *to, NodeOptInfo *add, OptEnv *env)
Definition: regcomp.c:4892
#define SIZE_BITSET
Definition: regint.h:404
static int noname_disable_map(Node **plink, GroupNumRemap *map, int *counter)
Definition: regcomp.c:1835
#define THREAD_ATOMIC_START
Definition: regint.h:118
#define ONIGENC_IS_MBC_WORD(enc, s, end)
Definition: oniguruma.h:224
#define ONIGENC_CASE_FOLD_MIN
Definition: oniguruma.h:130
#define TRUE
Definition: nkf.h:175
struct _Node * target
Definition: regparse.h:211
static int compile_anchor_node(AnchorNode *node, regex_t *reg)
Definition: regcomp.c:1467
MinMaxLen mmd
Definition: regcomp.c:4302
#define IS_ENCLOSE_NAME_REF(en)
Definition: regparse.h:155
Bits * BitSetRef
Definition: regint.h:402
#define SIZE_OP_PUSH_IF_PEEK_NEXT
Definition: regint.h:683
#define ONIGENC_MBC_CASE_FOLD_MAXLEN
Definition: oniguruma.h:192
OnigDistance max_len
Definition: regparse.h:202
unsigned int flag
Definition: regparse.h:171
#define NST_MARK2
Definition: regparse.h:129
#define NT_CANY
Definition: regparse.h:43
UChar * name_end
Definition: regparse.h:225
OnigEncoding enc
Definition: regcomp.c:4303
int onig_name_to_group_numbers(regex_t *reg, const UChar *name, const UChar *name_end, int **nums)
Definition: regparse.c:848
BBuf * mbuf
Definition: regint.h:780
int LengthType
Definition: regint.h:641
OptMapInfo map
Definition: regcomp.c:4340
#define ANCHOR_END_BUF_MASK
Definition: regparse.h:90
int right_anchor
Definition: regcomp.c:4311
static int get_max_match_length(Node *node, OnigDistance *max, ScanEnv *env)
Definition: regcomp.c:2266
unsigned char buf[MIME_BUF_SIZE]
Definition: nkf.c:4308
#define IS_QUANTIFIER_IN_REPEAT(qn)
Definition: regparse.h:162
static int expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], UChar *p, int slen, UChar *end, regex_t *reg, Node **rnode)
Definition: regcomp.c:3427
static int add_compile_string_length(UChar *s ARG_UNUSED, int mb_len, OnigDistance str_len, regex_t *reg ARG_UNUSED, int ignore_case)
Definition: regcomp.c:428
int onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode *cc)
Definition: regcomp.c:6068
unsigned int uintptr_t
Definition: win32.h:94
#define SIZE_OP_JUMP
Definition: regint.h:679
#define NTYPE(node)
Definition: regparse.h:71
static int is_anychar_star_quantifier(QtfrNode *qn)
Definition: regcomp.c:726
#define ANCHOR_LOOK_BEHIND_NOT
Definition: regint.h:514
int state
Definition: regparse.h:178
int type
Definition: tcltklib.c:111
BitStatusType backrefed_mem
Definition: regparse.h:295
static int options(unsigned char *cp)
Definition: nkf.c:6355
#define P_(args)
Definition: oniguruma.h:71
int ignore_case
Definition: regcomp.c:4319
const char * name
Definition: oniguruma.h:162
static int renumber_node_backref(Node *node, GroupNumRemap *map)
Definition: regcomp.c:1903
#define NCALL(node)
Definition: regparse.h:82
#define SIZE_OP_NULL_CHECK_END
Definition: regint.h:702
static int compile_length_anchor_node(AnchorNode *node, regex_t *reg)
Definition: regcomp.c:1434
OnigEncoding enc
Definition: oniguruma.h:633
static int expand_case_fold_make_rem_string(Node **rnode, UChar *s, UChar *end, regex_t *reg)
Definition: regcomp.c:3405
RUBY_EXTERN VALUE rb_cThread
Definition: ruby.h:1459
int num_mem
Definition: regparse.h:305
static void alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo *to, OptMapInfo *add)
Definition: regcomp.c:4781
#define ONIGENC_CODE_TO_MBC(enc, code, buf)
Definition: oniguruma.h:270
OnigDistance max
Definition: regcomp.c:4298
Node * onig_node_new_list(Node *left, Node *right)
Definition: regparse.c:1253
int intptr_t
Definition: win32.h:86
#define NSTRING_IS_DONT_GET_OPT_INFO(node)
Definition: regparse.h:113
#define IS_ENCLOSE_ADDR_FIXED(en)
Definition: regparse.h:145
static int add_pointer(regex_t *reg, void *addr)
Definition: regcomp.c:264
#define IS_BACKREF_NAME_REF(bn)
Definition: regparse.h:160
static int is_set_opt_anc_info(OptAncInfo *to, int anc)
Definition: regcomp.c:4512
#define ONIGENC_IS_MBC_ASCII_WORD(enc, s, end)
Definition: oniguruma.h:226
static Node * get_head_value_node(Node *node, int exact, regex_t *reg)
Definition: regcomp.c:2713
#define NT_CCLASS
Definition: regparse.h:41
#define SIZE_OP_POP_STOP_BT
Definition: regint.h:700
static int setup_look_behind(Node *node, regex_t *reg, ScanEnv *env)
Definition: regcomp.c:3268
#define ONIGERR_PARSER_BUG
Definition: oniguruma.h:531
#define ANCHOR_SEMI_END_BUF
Definition: regint.h:504
#define ONIG_OPTIMIZE_NONE
Definition: regint.h:322
unsigned int used
Definition: oniguruma.h:661
Node * onig_node_new_str(const UChar *s, const UChar *end)
Definition: regparse.c:1546
OnigPosition * beg
Definition: oniguruma.h:615
Definition: regint.h:524
#define IS_ENCLOSE_MARK1(en)
Definition: regparse.h:147
#define ONIG_INFINITE_DISTANCE
Definition: oniguruma.h:119
UChar * error_end
Definition: regparse.h:299
#define SIZE_OP_FAIL_LOOK_BEHIND_NOT
Definition: regint.h:705
Node ** mem_nodes_dynamic
Definition: regparse.h:311
static void set_sub_anchor(regex_t *reg, OptAncInfo *anc)
Definition: regcomp.c:5329
UChar s[OPT_EXACT_MAXLEN]
Definition: regcomp.c:4321
#define ONIG_OPTION_CAPTURE_GROUP
Definition: oniguruma.h:363
static int add_opcode(regex_t *reg, int opcode)
Definition: regcomp.c:210
#define ONIGENC_MBC_TO_CODE(enc, p, end)
Definition: oniguruma.h:268
static void copy_node_opt_info(NodeOptInfo *to, NodeOptInfo *from)
Definition: regcomp.c:4827
BitStatusType bt_mem_end
Definition: regparse.h:294
#define ONIGENC_MBC_MINLEN(enc)
Definition: oniguruma.h:266
#define ENCLOSE_STOP_BACKTRACK
Definition: regparse.h:94
void xfree(void *)
#define USE_SUBEXP_CALL
Definition: regint.h:60
static int is_left_anchor(int anc)
Definition: regcomp.c:4501
#define UChar
Definition: oniguruma.h:110
OnigEncoding enc
Definition: regparse.h:290
void onig_node_free(Node *node)
Definition: regparse.c:1027
#define ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED
Definition: oniguruma.h:574
#define NANCHOR(node)
Definition: regparse.h:80
static int subexp_inf_recursive_check(Node *node, ScanEnv *env, int head)
Definition: regcomp.c:2855
#define NST_STOP_BT_SIMPLE_REPEAT
Definition: regparse.h:131
static int add_char_amb_opt_map_info(OptMapInfo *map, UChar *p, UChar *end, OnigEncoding enc, OnigCaseFoldType case_fold_flag)
Definition: regcomp.c:4727
#define ANCHOR_ANYCHAR_STAR
Definition: regint.h:516
#define NT_BREF
Definition: regparse.h:44
#define SIZE_RELADDR
Definition: regint.h:648
static unsigned char PadBuf[WORD_ALIGNMENT_SIZE]
Definition: regcomp.c:50
OnigDistance min
Definition: regcomp.c:4297
#define NST_IN_REPEAT
Definition: regparse.h:137
#define IS_NEED_STR_LEN_OP_EXACT(op)
Definition: regcomp.c:319
static int compile_string_raw_node(StrNode *sn, regex_t *reg)
Definition: regcomp.c:554
#define STACK_POP_LEVEL_FREE
Definition: regint.h:317
static void add_char_opt_map_info(OptMapInfo *map, UChar c, OnigEncoding enc)
Definition: regcomp.c:4718
#define CHECK_NULL_RETURN(p)
Definition: regint.h:280
#define SIZE_OP_FAIL_POS
Definition: regint.h:689
unsigned char * exact_end
Definition: oniguruma.h:691
static int add_length(regex_t *reg, OnigDistance len)
Definition: regcomp.c:246
OnigDistance dmax
Definition: oniguruma.h:696
static void copy_opt_map_info(OptMapInfo *to, OptMapInfo *from)
Definition: regcomp.c:4712
static void clear_opt_anc_info(OptAncInfo *anc)
Definition: regcomp.c:4471
int size
Definition: encoding.c:52
#define ONIGENC_IS_UNDEF(enc)
Definition: oniguruma.h:219
#define f
static int compile_tree(Node *node, regex_t *reg)
Definition: regcomp.c:1660
#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, s, end)
Definition: oniguruma.h:236
unsigned int used
Definition: regint.h:422
#define SIZE_OP_SET_OPTION
Definition: regint.h:690
#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP
Definition: oniguruma.h:496
OnigRegexType regex_t
Definition: oniguruma.h:705
#define GET_RELADDR_INC(addr, p)
Definition: regint.h:659
unsigned int alloc
Definition: oniguruma.h:662
#define NST_MARK1
Definition: regparse.h:128
#define ONIG_MAX_CAPTURE_HISTORY_GROUP
Definition: oniguruma.h:598
#define xmalloc
Definition: defines.h:64
static int onig_inited
Definition: regcomp.c:5891
static int add_compile_string(UChar *s, int mb_len, OnigDistance str_len, regex_t *reg, int ignore_case)
Definition: regcomp.c:445
#define ANCHOR_PREC_READ_NOT
Definition: regint.h:512
#define ENCLOSE_CONDITION
Definition: regparse.h:95
size_t OnigDistance
Definition: oniguruma.h:116
#define IN_NOT
Definition: regcomp.c:3835
void onig_reduce_nested_quantifier(Node *pnode, Node *cnode)
Definition: regparse.c:2267
#define ENCLOSE_OPTION
Definition: regparse.h:93
static int is_not_included(Node *x, Node *y, regex_t *reg)
Definition: regcomp.c:2519
#define NST_MIN_FIXED
Definition: regparse.h:125
unsigned int capture_history
Definition: oniguruma.h:670
#define IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(en)
Definition: regparse.h:152
#define ONIGERR_MEMORY
Definition: oniguruma.h:529
OptExactInfo exb
Definition: regcomp.c:4336
#define IS_FIND_CONDITION(option)
Definition: regint.h:367
#define THREAD_SYSTEM_INIT
Definition: regint.h:116
UChar * p
Definition: regint.h:421
static int renumber_by_map(Node *node, GroupNumRemap *map)
Definition: regcomp.c:1931
static OnigDistance distance_multiply(OnigDistance d, int m)
Definition: regcomp.c:107
#define PRIuSIZE
Definition: ruby.h:189
#define NSTR(node)
Definition: regparse.h:74
v
Definition: win32ole.c:798
static void clear_opt_map_info(OptMapInfo *map)
Definition: regcomp.c:4684
static void select_opt_map_info(OptMapInfo *now, OptMapInfo *alt)
Definition: regcomp.c:4749
#define SIZE_OP_MEMORY_END_REC
Definition: regint.h:698
OnigCodePoint code[ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN]
Definition: oniguruma.h:146
#define ANCHOR_ANYCHAR_STAR_MASK
Definition: regparse.h:89
OptExactInfo expr
Definition: regcomp.c:4338
static void concat_opt_exact_info(OptExactInfo *to, OptExactInfo *add, OnigEncoding enc)
Definition: regcomp.c:4568
static void set_bound_node_opt_info(NodeOptInfo *opt, MinMaxLen *mmd)
Definition: regcomp.c:4808
OnigDistance dmin
Definition: oniguruma.h:695
#define ONIGENC_MBC_MAXLEN_DIST(enc)
Definition: oniguruma.h:265
static int get_min_match_length(Node *node, OnigDistance *min, ScanEnv *env)
Definition: regcomp.c:2142
OnigOptionType option
Definition: regparse.h:288
#define CLEAR_ENCLOSE_STATUS(node, f)
Definition: regparse.h:142
static int compile_range_repeat_node(QtfrNode *qn, int target_len, int empty_info, regex_t *reg)
Definition: regcomp.c:690
static int compile_enclose_node(EncloseNode *node, regex_t *reg)
Definition: regcomp.c:1300
struct _Node * target
Definition: regparse.h:179
#define NTYPE2BIT(type)
Definition: regparse.h:53
#define SIZE_POINTER
Definition: regint.h:656
int greedy
Definition: regparse.h:182
#define ONIG_OPTION_DONT_CAPTURE_GROUP
Definition: oniguruma.h:362
#define ONIGERR_INVALID_ARGUMENT
Definition: oniguruma.h:539
int value
Definition: regcomp.c:4328
int onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode *cc)
Definition: regcomp.c:6045
int onig_parse_make_tree(Node **root, const UChar *pattern, const UChar *end, regex_t *reg, ScanEnv *env)
Definition: regparse.c:6296
#define NT_CALL
Definition: regparse.h:50
#define xrealloc
Definition: defines.h:67
#define SIZE_OP_PUSH_POS
Definition: regint.h:686
#define ONIGERR_DEFAULT_ENCODING_IS_NOT_SET
Definition: oniguruma.h:536
#define NT_CTYPE
Definition: regparse.h:42
#define ONIG_OPTION_IGNORECASE
Definition: oniguruma.h:354
#define NST_MAX_FIXED
Definition: regparse.h:126
#define ONIG_OPTIMIZE_EXACT_BM_NOT_REV
Definition: regint.h:325
static int compile_length_cclass_node(CClassNode *cc, regex_t *reg)
Definition: regcomp.c:586
#define ANCHOR_BEGIN_BUF
Definition: regint.h:500
#define IN_VAR_REPEAT
Definition: regcomp.c:3837
OnigEncoding enc
Definition: oniguruma.h:677
#define SIZE_OP_PUSH_OR_JUMP_EXACT1
Definition: regint.h:682
static void copy_opt_env(OptEnv *to, OptEnv *from)
Definition: regcomp.c:4465
static void copy_opt_exact_info(OptExactInfo *to, OptExactInfo *from)
Definition: regcomp.c:4562
#define IS_NOT_NULL(p)
Definition: regint.h:279
static int entry_repeat_range(regex_t *reg, int id, int lower, int upper)
Definition: regcomp.c:659
#define IS_NODE_TYPE_SIMPLE(type)
Definition: regparse.h:67
int onig_end(void)
Definition: regcomp.c:6000
#define ANCHOR_LOOK_BEHIND
Definition: regint.h:513
short int MemNumType
Definition: regint.h:643
#define IS_ENCLOSE_MARK2(en)
Definition: regparse.h:148
static int add_abs_addr(regex_t *reg, int addr)
Definition: regcomp.c:237
static int add_mem_num(regex_t *reg, int num)
Definition: regcomp.c:255
#define GET_POINTER_INC(ptr, p)
Definition: regint.h:665
#define NSTRING_LEN(node)
Definition: regparse.h:105
#define IS_ENCLOSE_NAMED_GROUP(en)
Definition: regparse.h:154
static void clear_mml(MinMaxLen *mml)
Definition: regcomp.c:4429
int char_len
Definition: regparse.h:245
UChar * name
Definition: regparse.h:224
#define rb_intern_const(str)
Definition: ruby.h:1332
struct _Node * target
Definition: regparse.h:198
static void remove_opt_anc_info(OptAncInfo *to, int anc)
Definition: regcomp.c:4529
static int next_setup(Node *node, Node *next_node, int in_root, regex_t *reg)
Definition: regcomp.c:3289
UChar map[ONIG_CHAR_TABLE_SIZE]
Definition: regcomp.c:4329
BitStatusType bt_mem_start
Definition: regparse.h:293
static int subexp_inf_recursive_check_trav(Node *node, ScanEnv *env)
Definition: regcomp.c:2939
#define NSTRING_SET_DONT_GET_OPT_INFO(node)
Definition: regparse.h:109
#define SIZE_OP_PUSH_POS_NOT
Definition: regint.h:687
ScanEnv * scan_env
Definition: regcomp.c:4306
#define ANCHOR_WORD_BOUND
Definition: regint.h:507
static int comp_opt_exact_or_map_info(OptExactInfo *e, OptMapInfo *m)
Definition: regcomp.c:4768
int ascii_range
Definition: regparse.h:246
OnigOptionType option
Definition: regparse.h:197
#define ANCHOR_KEEP
Definition: regint.h:519
AbsAddrType call_addr
Definition: regparse.h:199
#define ONIG_OPTIMIZE_EXACT_BM
Definition: regint.h:324
OnigOptionType options
Definition: oniguruma.h:678
#define env
int onig_renumber_name_table(regex_t *reg, GroupNumRemap *map)
Definition: regparse.c:572
#define NULL
Definition: _sdbm.c:103
#define SIZE_OP_PUSH
Definition: regint.h:680
#define IN_ROOT
Definition: regcomp.c:3838
void onig_free(regex_t *reg)
Definition: regcomp.c:5583
static int compile_length_enclose_node(EncloseNode *node, regex_t *reg)
Definition: regcomp.c:1214
int back_num
Definition: regparse.h:235
#define NCDR(node)
Definition: regparse.h:85
static int unset_addr_list_add(UnsetAddrList *uslist, int offset, struct _Node *node)
Definition: regcomp.c:188
OnigOptionType options
Definition: oniguruma.h:385
#define GET_ABSADDR_INC(addr, p)
Definition: regint.h:660
#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND
Definition: oniguruma.h:495
static int compile_option_node(EncloseNode *node, regex_t *reg)
Definition: regcomp.c:1188
#define SET_CALL_RECURSION(node)
Definition: regparse.h:157
#define bp()
Definition: vm_debug.h:27
#define BBUF_GET_OFFSET_POS(buf)
Definition: regint.h:467
#define SET_NTYPE(node, ntype)
Definition: regparse.h:72
static int unset_addr_list_fix(UnsetAddrList *uslist, regex_t *reg)
Definition: regcomp.c:2053
#define REGEX_TRANSFER(to, from)
Definition: regcomp.c:5615
int offset
Definition: regparse.h:210
static int bitset_is_empty(BitSetRef bs)
Definition: regcomp.c:118
#define NT_ENCLOSE
Definition: regparse.h:46
int onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
Definition: regcomp.c:42
static void add_opt_anc_info(OptAncInfo *to, int anc)
Definition: regcomp.c:4520
#define ONIGERR_UNDEFINED_GROUP_REFERENCE
Definition: oniguruma.h:581
#define ONIG_STATE(reg)
Definition: oniguruma.h:655
OnigUChar * par
Definition: oniguruma.h:634
#define enclen(enc, p, e)
Definition: regenc.h:78
#define BBUF_GET_ADD_ADDRESS(buf)
Definition: regint.h:466
#define SIZE_OP_LOOK_BEHIND
Definition: regint.h:703
#define BBUF_ADD(buf, bytes, n)
Definition: regint.h:464
static int select_str_opcode(int mb_len, OnigDistance str_len, int ignore_case)
Definition: regcomp.c:324
BitStatusType capture_history
Definition: regparse.h:292
#define ONIGENC_MBC_MAXLEN(enc)
Definition: oniguruma.h:264
#define BITSET_SIZE
Definition: regint.h:394
int back_static[NODE_BACKREFS_SIZE]
Definition: regparse.h:236
Node * onig_node_new_anchor(int type)
Definition: regparse.c:1289