diff --git a/ext/mbstring/oniguruma/HISTORY b/ext/mbstring/oniguruma/HISTORY
index 17b696f84ca..f844b07658c 100644
--- a/ext/mbstring/oniguruma/HISTORY
+++ b/ext/mbstring/oniguruma/HISTORY
@@ -1,5 +1,61 @@
History
+2006/08/21: Version 4.3.1
+
+2006/08/21: [test] success in ruby 1.9.0 (2006-07-28) [i686-linux].
+2006/08/21: [impl] change stack type values
+ and re-define STK_MASK_TO_VOID_TARGET etc...
+2006/08/21: [impl] set repeat_range[].upper to 0x7fffffff as infinite.
+2006/08/21: [impl] add STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE.
+2006/08/21: [impl] reduce (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n}
+2006/09/21: [impl] reduce (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n}
+ if backreference is not used.
+2006/08/17: [bug] should check scan_env.num_call > 0 for backrefed pattern
+ in combination explosion check.
+
+2006/08/17: Version 4.3.0
+
+2006/08/17: [test] success in ruby 1.9.0 (2006-07-28) [i686-linux].
+2006/08/17: [new] add config USE_COMBINATION_EXPLOSION_CHECK.
+ check /(.+)*/, /(\s*foo\s*)*/ etc...
+ [API] add num_comb_exp_check member in regex_t.
+ [dist] change LTVERSION value to "1:0:0" in configure.in.
+2006/08/15: [bug] OP_REPEAT_INC process in match_at().
+ should check repeat-count >= range-upper and
+ range-upper may be infinite.
+
+2006/08/11: Version 4.2.3
+
+2006/08/11: [test] success in ruby 1.9.0 (2006-07-28) [i686-linux].
+2006/08/10: [impl] remove double call in set_qualifier().
+2006/08/10: [impl] remove by_number member in QualifierNode.
+2006/08/09: [impl] remove a comma at the end of enum ReduceType
+ for escape warning on Mac OS X.
+2006/08/07: [impl] remove warning in regcomp.c.
+2006/08/07: [spec] move definition of USE_BACKREF_AT_LEVEL into NOT_RUBY.
+
+2006/08/03: Version 4.2.2
+
+2006/08/03: [test] success in ruby 1.9.0 (2006-07-28) [i686-linux].
+2006/08/03: [bug] (thanks Hiroyuki Yamamoto)
+ segmentation fault in regexec(). (POSIX API)
+2006/08/02: [bug] combination of \G in look-ahead/look-behind and other
+ anchors(\A, \z, \Z) cause invalid result.
+ ex. /(?!\G)a\z/.match("ba")
+ start arg. of MATCH_ARG_INIT() should be original
+ arg. of onig_search().
+
+2006/07/31: Version 4.2.1
+
+2006/07/31: [test] success in ruby 1.9.0 (2006-07-28) [i686-linux].
+2006/07/31: [bug] (thanks Kimura Minoru)
+ re-implement bm_search_notrev().
+2006/07/31: [impl] bm_search_notrev() refactoring.
+2006/07/31: [bug] (thanks Kimura Minoru)
+ fix incomplete multibyte string in exact info.
+2006/07/31: [impl] (thanks Seiji Masugata)
+ remove cast in va_init_list() for Intel C Compiler.
+
2006/07/18: Version 4.2.0
2006/07/18: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux].
diff --git a/ext/mbstring/oniguruma/index.html b/ext/mbstring/oniguruma/index.html
index a2d6c97b975..f72d9f22942 100755
--- a/ext/mbstring/oniguruma/index.html
+++ b/ext/mbstring/oniguruma/index.html
@@ -8,7 +8,7 @@
Oniguruma
-2006/07/18 (C) K.Kosako
+2006/08/21 (C) K.Kosako
@@ -37,8 +37,8 @@ ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16
What's new
-- Version 4.2.0 released. (2006/07/18)
-
- Version 2.5.6 released. (2006/05/29)
+
- Version 4.3.1 released. (2006/08/21)
+
- Version 2.5.7 released. (2006/07/28)
@@ -70,13 +70,13 @@ It follows the BSD license in the case of the one except for it.
Download:
@@ -87,7 +87,7 @@ It follows the BSD license in the case of the one except for it.
-Documents: (version 4.2.0)
+Documents: (version 4.3.1)
@@ -158,7 +159,6 @@ It follows the BSD license in the case of the one except for it.
Regular expressions memo (Japanese page)
Regular expressions technique (Japanese page)
"Text Processing" Lecture documents (Tanaka Akira) (Japanese page)
- "Regex library in Ruby 1.9/2.0" Japan Ruby Conference 2006 (K.Kosako) (Japanese)
diff --git a/ext/mbstring/oniguruma/oniguruma.h b/ext/mbstring/oniguruma/oniguruma.h
index ef8a49f7dad..92c67c858e3 100644
--- a/ext/mbstring/oniguruma/oniguruma.h
+++ b/ext/mbstring/oniguruma/oniguruma.h
@@ -37,8 +37,8 @@ extern "C" {
#define ONIGURUMA
#define ONIGURUMA_VERSION_MAJOR 4
-#define ONIGURUMA_VERSION_MINOR 2
-#define ONIGURUMA_VERSION_TEENY 0
+#define ONIGURUMA_VERSION_MINOR 3
+#define ONIGURUMA_VERSION_TEENY 1
#ifdef __cplusplus
# ifndef HAVE_PROTOTYPES
@@ -744,6 +744,7 @@ typedef struct re_pattern_buffer {
int num_mem; /* used memory(...) num counted from 1 */
int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */
int num_null_check; /* OP_NULL_CHECK_START/END id counter */
+ int num_comb_exp_check; /* combination explosion check */
int num_call; /* number of subexp call */
unsigned int capture_history; /* (?@...) flag (1-31) */
unsigned int bt_mem_start; /* need backtrack flag */
diff --git a/ext/mbstring/oniguruma/regcomp.c b/ext/mbstring/oniguruma/regcomp.c
index db58be72f08..507faee8e2d 100644
--- a/ext/mbstring/oniguruma/regcomp.c
+++ b/ext/mbstring/oniguruma/regcomp.c
@@ -186,6 +186,15 @@ add_opcode(regex_t* reg, int opcode)
return 0;
}
+static int
+add_state_check_num(regex_t* reg, int num)
+{
+ StateCheckNumType n = (StateCheckNumType )num;
+
+ BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
+ return 0;
+}
+
static int
add_rel_addr(regex_t* reg, int addr)
{
@@ -644,7 +653,7 @@ entry_repeat_range(regex_t* reg, int id, int lower, int upper)
}
p[id].lower = lower;
- p[id].upper = upper;
+ p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
return 0;
}
@@ -684,7 +693,258 @@ compile_range_repeat_node(QualifierNode* qn, int target_len, int empty_info,
return r;
}
+static int
+is_anychar_star_qualifier(QualifierNode* qn)
+{
+ if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
+ NTYPE(qn->target) == N_ANYCHAR)
+ return 1;
+ else
+ return 0;
+}
+
#define QUALIFIER_EXPAND_LIMIT_SIZE 50
+#define CKN_ON (ckn > 0)
+
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+
+static int
+compile_length_qualifier_node(QualifierNode* qn, regex_t* reg)
+{
+ int len, mod_tlen, cklen;
+ int ckn;
+ int infinite = IS_REPEAT_INFINITE(qn->upper);
+ int empty_info = qn->target_empty_info;
+ int tlen = compile_length_tree(qn->target, reg);
+
+ if (tlen < 0) return tlen;
+
+ ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
+
+ cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
+
+ /* anychar repeat */
+ if (NTYPE(qn->target) == N_ANYCHAR) {
+ if (qn->greedy && infinite) {
+ if (IS_NOT_NULL(qn->next_head_exact))
+ return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
+ else
+ return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
+ }
+ }
+
+ if (empty_info != 0)
+ mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
+ else
+ mod_tlen = tlen;
+
+ if (infinite && qn->lower <= 1) {
+ if (qn->greedy) {
+ if (qn->lower == 1)
+ len = SIZE_OP_JUMP;
+ else
+ len = 0;
+
+ len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
+ }
+ else {
+ if (qn->lower == 0)
+ len = SIZE_OP_JUMP;
+ else
+ len = 0;
+
+ len += mod_tlen + SIZE_OP_PUSH + cklen;
+ }
+ }
+ else if (qn->upper == 0) {
+ if (qn->is_refered != 0) /* /(?..){0}/ */
+ len = SIZE_OP_JUMP + tlen;
+ else
+ len = 0;
+ }
+ else if (qn->upper == 1 && qn->greedy) {
+ if (qn->lower == 0) {
+ if (CKN_ON) {
+ len = SIZE_OP_STATE_CHECK_PUSH + tlen;
+ }
+ else {
+ len = SIZE_OP_PUSH + tlen;
+ }
+ }
+ else {
+ len = tlen;
+ }
+ }
+ else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
+ len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
+ }
+ else {
+ len = SIZE_OP_REPEAT_INC
+ + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
+ if (CKN_ON)
+ len += SIZE_OP_STATE_CHECK;
+ }
+
+ return len;
+}
+
+static int
+compile_qualifier_node(QualifierNode* qn, regex_t* reg)
+{
+ int r, mod_tlen;
+ int ckn;
+ int infinite = IS_REPEAT_INFINITE(qn->upper);
+ int empty_info = qn->target_empty_info;
+ int tlen = compile_length_tree(qn->target, reg);
+
+ if (tlen < 0) return tlen;
+
+ ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
+
+ if (is_anychar_star_qualifier(qn)) {
+ r = compile_tree_n_times(qn->target, qn->lower, reg);
+ if (r) return r;
+ if (IS_NOT_NULL(qn->next_head_exact)) {
+ if (IS_MULTILINE(reg->options))
+ r = add_opcode(reg, (CKN_ON ?
+ OP_STATE_CHECK_ANYCHAR_ML_STAR_PEEK_NEXT
+ : OP_ANYCHAR_ML_STAR_PEEK_NEXT));
+ else
+ r = add_opcode(reg, (CKN_ON ?
+ OP_STATE_CHECK_ANYCHAR_STAR_PEEK_NEXT
+ : OP_ANYCHAR_STAR_PEEK_NEXT));
+ if (r) return r;
+ if (CKN_ON) {
+ r = add_state_check_num(reg, ckn);
+ if (r) return r;
+ }
+
+ return add_bytes(reg, NSTRING(qn->next_head_exact).s, 1);
+ }
+ else {
+ if (IS_MULTILINE(reg->options)) {
+ r = add_opcode(reg, (CKN_ON ?
+ OP_STATE_CHECK_ANYCHAR_ML_STAR
+ : OP_ANYCHAR_ML_STAR));
+ }
+ else {
+ r = add_opcode(reg, (CKN_ON ?
+ OP_STATE_CHECK_ANYCHAR_STAR
+ : OP_ANYCHAR_STAR));
+ }
+ if (r) return r;
+ if (CKN_ON)
+ r = add_state_check_num(reg, ckn);
+
+ return r;
+ }
+ }
+
+ if (empty_info != 0)
+ mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
+ else
+ mod_tlen = tlen;
+
+ if (infinite && qn->lower <= 1) {
+ if (qn->greedy) {
+ if (qn->lower == 1) {
+ r = add_opcode_rel_addr(reg, OP_JUMP,
+ (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
+ if (r) return r;
+ }
+
+ if (CKN_ON) {
+ r = add_opcode(reg, OP_STATE_CHECK_PUSH);
+ if (r) return r;
+ r = add_state_check_num(reg, ckn);
+ if (r) return r;
+ r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
+ }
+ else {
+ r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
+ }
+ if (r) return r;
+ r = compile_tree_empty_check(qn->target, reg, empty_info);
+ if (r) return r;
+ r = add_opcode_rel_addr(reg, OP_JUMP,
+ -(mod_tlen + (int )SIZE_OP_JUMP
+ + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
+ }
+ else {
+ if (qn->lower == 0) {
+ r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
+ if (r) return r;
+ }
+ r = compile_tree_empty_check(qn->target, reg, empty_info);
+ if (r) return r;
+ if (CKN_ON) {
+ r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
+ if (r) return r;
+ r = add_state_check_num(reg, ckn);
+ if (r) return r;
+ r = add_rel_addr(reg,
+ -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
+ }
+ else
+ r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
+ }
+ }
+ else if (qn->upper == 0) {
+ if (qn->is_refered != 0) { /* /(?..){0}/ */
+ r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
+ if (r) return r;
+ r = compile_tree(qn->target, reg);
+ }
+ else
+ r = 0;
+ }
+ else if (qn->upper == 1 && qn->greedy) {
+ if (qn->lower == 0) {
+ if (CKN_ON) {
+ r = add_opcode(reg, OP_STATE_CHECK_PUSH);
+ if (r) return r;
+ r = add_state_check_num(reg, ckn);
+ if (r) return r;
+ r = add_rel_addr(reg, tlen);
+ }
+ else {
+ r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
+ }
+ if (r) return r;
+ }
+
+ r = compile_tree(qn->target, reg);
+ }
+ else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
+ if (CKN_ON) {
+ r = add_opcode(reg, OP_STATE_CHECK_PUSH);
+ if (r) return r;
+ r = add_state_check_num(reg, ckn);
+ if (r) return r;
+ r = add_rel_addr(reg, SIZE_OP_JUMP);
+ }
+ else {
+ r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
+ }
+
+ if (r) return r;
+ r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
+ if (r) return r;
+ r = compile_tree(qn->target, reg);
+ }
+ else {
+ r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
+ if (CKN_ON) {
+ if (r) return r;
+ r = add_opcode(reg, OP_STATE_CHECK);
+ if (r) return r;
+ r = add_state_check_num(reg, ckn);
+ }
+ }
+ return r;
+}
+
+#else /* USE_COMBINATION_EXPLOSION_CHECK */
static int
compile_length_qualifier_node(QualifierNode* qn, regex_t* reg)
@@ -751,16 +1011,6 @@ compile_length_qualifier_node(QualifierNode* qn, regex_t* reg)
return len;
}
-static int
-is_anychar_star_qualifier(QualifierNode* qn)
-{
- if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
- NTYPE(qn->target) == N_ANYCHAR)
- return 1;
- else
- return 0;
-}
-
static int
compile_qualifier_node(QualifierNode* qn, regex_t* reg)
{
@@ -887,6 +1137,7 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg)
}
return r;
}
+#endif /* USE_COMBINATION_EXPLOSION_CHECK */
static int
compile_length_option_node(EffectNode* node, regex_t* reg)
@@ -1435,7 +1686,9 @@ compile_tree(Node* node, regex_t* reg)
}
if (r) return r;
+#ifdef USE_BACKREF_AT_LEVEL
add_bacref_mems:
+#endif
r = add_length(reg, br->back_num);
if (r) return r;
p = BACKREFS_P(br);
@@ -3040,6 +3293,146 @@ divide_ambig_string_node(Node* node, regex_t* reg)
return 0;
}
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+
+#define CEC_THRES_NUM_BIG_REPEAT 512
+#define CEC_INFINITE_NUM 0x7fffffff
+
+#define CEC_IN_INFINITE_REPEAT (1<<0)
+#define CEC_IN_FINITE_REPEAT (1<<1)
+#define CEC_CONT_BIG_REPEAT (1<<2)
+
+static int
+setup_comb_exp_check(Node* node, int state, ScanEnv* env)
+{
+ int type;
+ int r = state;
+
+ type = NTYPE(node);
+ switch (type) {
+ case N_LIST:
+ {
+ Node* prev = NULL_NODE;
+ do {
+ r = setup_comb_exp_check(NCONS(node).left, r, env);
+ prev = NCONS(node).left;
+ } while (r >= 0 && IS_NOT_NULL(node = NCONS(node).right));
+ }
+ break;
+
+ case N_ALT:
+ {
+ int ret;
+ do {
+ ret = setup_comb_exp_check(NCONS(node).left, state, env);
+ r |= ret;
+ } while (ret >= 0 && IS_NOT_NULL(node = NCONS(node).right));
+ }
+ break;
+
+ case N_QUALIFIER:
+ {
+ int child_state = state;
+ int add_state = 0;
+ QualifierNode* qn = &(NQUALIFIER(node));
+ Node* target = qn->target;
+ int var_num;
+
+ if (! IS_REPEAT_INFINITE(qn->upper)) {
+ if (qn->upper > 1) {
+ /* {0,1}, {1,1} are allowed */
+ child_state |= CEC_IN_FINITE_REPEAT;
+
+ /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
+ if (env->backrefed_mem == 0) {
+ if (NTYPE(qn->target) == N_EFFECT) {
+ EffectNode* en = &(NEFFECT(qn->target));
+ if (en->type == EFFECT_MEMORY) {
+ if (NTYPE(en->target) == N_QUALIFIER) {
+ QualifierNode* q = &(NQUALIFIER(en->target));
+ if (IS_REPEAT_INFINITE(q->upper)
+ && q->greedy == qn->greedy) {
+ qn->upper = (qn->lower == 0 ? 1 : qn->lower);
+ if (qn->upper == 1)
+ child_state = state;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (state & CEC_IN_FINITE_REPEAT) {
+ qn->comb_exp_check_num = -1;
+ }
+ else {
+ if (IS_REPEAT_INFINITE(qn->upper)) {
+ var_num = CEC_INFINITE_NUM;
+ child_state |= CEC_IN_INFINITE_REPEAT;
+ }
+ else {
+ var_num = qn->upper - qn->lower;
+ }
+
+ if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
+ add_state |= CEC_CONT_BIG_REPEAT;
+
+ if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
+ ((state & CEC_CONT_BIG_REPEAT) != 0 &&
+ var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
+ if (qn->comb_exp_check_num == 0) {
+ env->num_comb_exp_check++;
+ qn->comb_exp_check_num = env->num_comb_exp_check;
+ if (env->curr_max_regnum > env->comb_exp_max_regnum)
+ env->comb_exp_max_regnum = env->curr_max_regnum;
+ }
+ }
+ }
+
+ r = setup_comb_exp_check(target, child_state, env);
+ r |= add_state;
+ }
+ break;
+
+ case N_EFFECT:
+ {
+ EffectNode* en = &(NEFFECT(node));
+
+ switch (en->type) {
+ case EFFECT_MEMORY:
+ {
+ if (env->curr_max_regnum < en->regnum)
+ env->curr_max_regnum = en->regnum;
+
+ r = setup_comb_exp_check(en->target, state, env);
+ }
+ break;
+
+ default:
+ r = setup_comb_exp_check(en->target, state, env);
+ break;
+ }
+ }
+ break;
+
+#ifdef USE_SUBEXP_CALL
+ case N_CALL:
+ if (IS_CALL_RECURSION(&(NCALL(node))))
+ env->has_recursion = 1;
+ else
+ r = setup_comb_exp_check(NCALL(node).target, state, env);
+ break;
+#endif
+
+ default:
+ break;
+ }
+
+ return r;
+}
+#endif
+
#define IN_ALT (1<<0)
#define IN_NOT (1<<1)
#define IN_REPEAT (1<<2)
@@ -3600,9 +3993,10 @@ copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
}
static void
-concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add)
+concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
{
- int i, n;
+ int i, j, len;
+ UChar *p, *end;
OptAncInfo tanc;
if (! to->ignore_case && add->ignore_case) {
@@ -3611,11 +4005,17 @@ concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add)
to->ignore_case = 1;
}
- for (i = to->len, n = 0; n < add->len && i < OPT_EXACT_MAXLEN; i++, n++)
- to->s[i] = add->s[n];
+ p = add->s;
+ end = p + add->len;
+ for (i = to->len; p < end; ) {
+ len = enc_len(enc, p);
+ if (i + len > OPT_EXACT_MAXLEN) break;
+ for (j = 0; j < len && p < end; j++)
+ to->s[i++] = *p++;
+ }
to->len = i;
- to->reach_end = (n == add->len ? add->reach_end : 0);
+ to->reach_end = (p == end ? add->reach_end : 0);
concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
if (! to->reach_end) tanc.right_anchor = 0;
@@ -3630,15 +4030,10 @@ concat_opt_exact_info_str(OptExactInfo* to,
UChar *p;
for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
- if (raw) {
+ len = enc_len(enc, p);
+ if (i + len > OPT_EXACT_MAXLEN) break;
+ for (j = 0; j < len && p < end; j++)
to->s[i++] = *p++;
- }
- else {
- len = enc_len(enc, p);
- if (i + len > OPT_EXACT_MAXLEN) break;
- for (j = 0; j < len; j++)
- to->s[i++] = *p++;
- }
}
to->len = i;
@@ -3903,11 +4298,11 @@ concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
if (add->exb.len > 0) {
if (exb_reach) {
- concat_opt_exact_info(&to->exb, &add->exb);
+ concat_opt_exact_info(&to->exb, &add->exb, enc);
clear_opt_exact_info(&add->exb);
}
else if (exm_reach) {
- concat_opt_exact_info(&to->exm, &add->exb);
+ concat_opt_exact_info(&to->exm, &add->exb, enc);
clear_opt_exact_info(&add->exb);
}
}
@@ -4206,7 +4601,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
if (nopt.exb.reach_end) {
for (i = 2; i < qn->lower &&
! is_full_opt_exact_info(&opt->exb); i++) {
- concat_opt_exact_info(&opt->exb, &nopt.exb);
+ concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
}
if (i < qn->lower) {
opt->exb.reach_end = 0;
@@ -4744,6 +5139,9 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
reg->num_null_check = 0;
reg->repeat_range_alloc = 0;
reg->repeat_range = (OnigRepeatRange* )NULL;
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ reg->num_comb_exp_check = 0;
+#endif
r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
if (r != 0) goto err;
@@ -4797,6 +5195,33 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
reg->bt_mem_end |= reg->capture_history;
}
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ if (scan_env.backrefed_mem == 0
+#ifdef USE_SUBEXP_CALL
+ || scan_env.num_call == 0
+#endif
+ ) {
+ setup_comb_exp_check(root, 0, &scan_env);
+#ifdef USE_SUBEXP_CALL
+ if (scan_env.has_recursion != 0) {
+ scan_env.num_comb_exp_check = 0;
+ }
+ else
+#endif
+ if (scan_env.comb_exp_max_regnum > 0) {
+ int i;
+ for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
+ if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
+ scan_env.num_comb_exp_check = 0;
+ break;
+ }
+ }
+ }
+ }
+
+ reg->num_comb_exp_check = scan_env.num_comb_exp_check;
+#endif
+
clear_optimize_info(reg);
#ifndef ONIG_DONT_OPTIMIZE
r = set_optimize_info_from_tree(root, reg, &scan_env);
@@ -5006,6 +5431,16 @@ onig_end()
#ifdef ONIG_DEBUG
+/* arguments type */
+#define ARG_SPECIAL -1
+#define ARG_NON 0
+#define ARG_RELADDR 1
+#define ARG_ABSADDR 2
+#define ARG_LENGTH 3
+#define ARG_MEMNUM 4
+#define ARG_OPTION 5
+#define ARG_STATE_CHECK 6
+
OnigOpInfoType OnigOpInfo[] = {
{ OP_FINISH, "finish", ARG_NON },
{ OP_END, "end", ARG_NON },
@@ -5036,63 +5471,73 @@ OnigOpInfoType OnigOpInfo[] = {
{ OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON },
{ OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
{ OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
- { OP_WORD, "word", ARG_NON },
- { OP_NOT_WORD, "not-word", ARG_NON },
- { OP_WORD_SB, "word-sb", ARG_NON },
- { OP_WORD_MB, "word-mb", ARG_NON },
- { OP_WORD_BOUND, "word-bound", ARG_NON },
- { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON },
- { OP_WORD_BEGIN, "word-begin", ARG_NON },
- { OP_WORD_END, "word-end", ARG_NON },
- { OP_BEGIN_BUF, "begin-buf", ARG_NON },
- { OP_END_BUF, "end-buf", ARG_NON },
- { OP_BEGIN_LINE, "begin-line", ARG_NON },
- { OP_END_LINE, "end-line", ARG_NON },
- { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
- { OP_BEGIN_POSITION, "begin-position", ARG_NON },
- { OP_BACKREF1, "backref1", ARG_NON },
- { OP_BACKREF2, "backref2", ARG_NON },
- { OP_BACKREF3, "backref3", ARG_NON },
- { OP_BACKREFN, "backrefn", ARG_MEMNUM },
- { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },
- { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
- { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL },
- { OP_BACKREF_AT_LEVEL, "backref_at_level", ARG_SPECIAL },
- { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
- { OP_MEMORY_START, "mem-start", ARG_MEMNUM },
- { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
- { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },
- { OP_MEMORY_END, "mem-end", ARG_MEMNUM },
- { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },
- { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },
- { OP_SET_OPTION, "set-option", ARG_OPTION },
- { OP_FAIL, "fail", ARG_NON },
- { OP_JUMP, "jump", ARG_RELADDR },
- { OP_PUSH, "push", ARG_RELADDR },
- { OP_POP, "pop", ARG_NON },
- { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
- { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
- { OP_REPEAT, "repeat", ARG_SPECIAL },
- { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
- { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
- { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
- { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },
- { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },
- { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM },
- { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },
- { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },
- { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM },
- { OP_PUSH_POS, "push-pos", ARG_NON },
- { OP_POP_POS, "pop-pos", ARG_NON },
- { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },
- { OP_FAIL_POS, "fail-pos", ARG_NON },
- { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON },
- { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON },
- { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
+ { OP_WORD, "word", ARG_NON },
+ { OP_NOT_WORD, "not-word", ARG_NON },
+ { OP_WORD_SB, "word-sb", ARG_NON },
+ { OP_WORD_MB, "word-mb", ARG_NON },
+ { OP_WORD_BOUND, "word-bound", ARG_NON },
+ { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON },
+ { OP_WORD_BEGIN, "word-begin", ARG_NON },
+ { OP_WORD_END, "word-end", ARG_NON },
+ { OP_BEGIN_BUF, "begin-buf", ARG_NON },
+ { OP_END_BUF, "end-buf", ARG_NON },
+ { OP_BEGIN_LINE, "begin-line", ARG_NON },
+ { OP_END_LINE, "end-line", ARG_NON },
+ { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
+ { OP_BEGIN_POSITION, "begin-position", ARG_NON },
+ { OP_BACKREF1, "backref1", ARG_NON },
+ { OP_BACKREF2, "backref2", ARG_NON },
+ { OP_BACKREF3, "backref3", ARG_NON },
+ { OP_BACKREFN, "backrefn", ARG_MEMNUM },
+ { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },
+ { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
+ { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL },
+ { OP_BACKREF_AT_LEVEL, "backref_at_level", ARG_SPECIAL },
+ { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
+ { OP_MEMORY_START, "mem-start", ARG_MEMNUM },
+ { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
+ { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },
+ { OP_MEMORY_END, "mem-end", ARG_MEMNUM },
+ { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },
+ { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },
+ { OP_SET_OPTION, "set-option", ARG_OPTION },
+ { OP_FAIL, "fail", ARG_NON },
+ { OP_JUMP, "jump", ARG_RELADDR },
+ { OP_PUSH, "push", ARG_RELADDR },
+ { OP_POP, "pop", ARG_NON },
+ { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
+ { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
+ { OP_REPEAT, "repeat", ARG_SPECIAL },
+ { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
+ { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
+ { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
+ { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },
+ { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },
+ { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM },
+ { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },
+ { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },
+ { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM },
+ { OP_PUSH_POS, "push-pos", ARG_NON },
+ { OP_POP_POS, "pop-pos", ARG_NON },
+ { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },
+ { OP_FAIL_POS, "fail-pos", ARG_NON },
+ { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON },
+ { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON },
+ { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
{ OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
{ OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
- { OP_CALL, "call", ARG_ABSADDR },
- { OP_RETURN, "return", ARG_NON },
+ { OP_CALL, "call", ARG_ABSADDR },
+ { OP_RETURN, "return", ARG_NON },
+ { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL },
+ { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
+ { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK },
+ { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK },
+ { OP_STATE_CHECK_ANYCHAR_ML_STAR,
+ "state-check-anychar-ml*", ARG_STATE_CHECK },
+ { OP_STATE_CHECK_ANYCHAR_STAR_PEEK_NEXT,
+ "state-check-anychar*-peek-next", ARG_SPECIAL },
+ { OP_STATE_CHECK_ANYCHAR_ML_STAR_PEEK_NEXT,
+ "state-check-anychar-ml*-peek-next", ARG_SPECIAL },
{ -1, "", ARG_NON }
};
@@ -5151,6 +5596,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
RelAddrType addr;
LengthType len;
MemNumType mem;
+ StateCheckNumType scn;
OnigCodePoint code;
UChar *q;
@@ -5185,6 +5631,12 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
fprintf(f, ":%d", option);
}
break;
+
+ case ARG_STATE_CHECK:
+ scn = *((StateCheckNumType* )bp);
+ bp += SIZE_STATE_CHECK_NUM;
+ fprintf(f, ":%d", scn);
+ break;
}
}
else {
@@ -5362,6 +5814,24 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
fprintf(f, ":%d:(%d)", len, addr);
break;
+ case OP_STATE_CHECK_PUSH:
+ case OP_STATE_CHECK_PUSH_OR_JUMP:
+ scn = *((StateCheckNumType* )bp);
+ bp += SIZE_STATE_CHECK_NUM;
+ addr = *((RelAddrType* )bp);
+ bp += SIZE_RELADDR;
+ fprintf(f, ":%d:(%d)", scn, addr);
+ break;
+
+ case OP_STATE_CHECK_ANYCHAR_STAR_PEEK_NEXT:
+ case OP_STATE_CHECK_ANYCHAR_ML_STAR_PEEK_NEXT:
+ scn = *((StateCheckNumType* )bp);
+ bp += SIZE_STATE_CHECK_NUM;
+ fprintf(f, ":%d", scn);
+ p_string(f, 1, bp);
+ bp += 1;
+ break;
+
default:
fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
*--bp);
diff --git a/ext/mbstring/oniguruma/regexec.c b/ext/mbstring/oniguruma/regexec.c
index 7a1a35a0e0a..93662fea7f0 100644
--- a/ext/mbstring/oniguruma/regexec.c
+++ b/ext/mbstring/oniguruma/regexec.c
@@ -306,6 +306,9 @@ typedef struct _StackType {
UChar *pcode; /* byte code position */
UChar *pstr; /* string position */
UChar *pstr_prev; /* previous char position of pstr */
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ unsigned int state_check;
+#endif
} state;
struct {
int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */
@@ -339,29 +342,28 @@ typedef struct _StackType {
/* stack type */
/* used by normal-POP */
#define STK_ALT 0x0001
-#define STK_LOOK_BEHIND_NOT 0x0003
-#define STK_POS_NOT 0x0005
-/* avoided by normal-POP, but value should be small */
-#define STK_NULL_CHECK_START 0x0100
+#define STK_LOOK_BEHIND_NOT 0x0002
+#define STK_POS_NOT 0x0003
/* handled by normal-POP */
-#define STK_MEM_START 0x0200
-#define STK_MEM_END 0x0300
-#define STK_REPEAT_INC 0x0400
+#define STK_MEM_START 0x0100
+#define STK_MEM_END 0x8200
+#define STK_REPEAT_INC 0x0300
+#define STK_STATE_CHECK_MARK 0x1000
/* avoided by normal-POP */
+#define STK_NULL_CHECK_START 0x3000
+#define STK_NULL_CHECK_END 0x5000 /* for recursive call */
+#define STK_MEM_END_MARK 0x8400
#define STK_POS 0x0500 /* used when POP-POS */
#define STK_STOP_BT 0x0600 /* mark for "(?>...)" */
#define STK_REPEAT 0x0700
#define STK_CALL_FRAME 0x0800
#define STK_RETURN 0x0900
-#define STK_MEM_END_MARK 0x0a00
-#define STK_VOID 0x0b00 /* for fill a blank */
-#define STK_NULL_CHECK_END 0x0c00 /* for recursive call */
+#define STK_VOID 0x0a00 /* for fill a blank */
/* stack type check mask */
-#define STK_MASK_POP_USED 0x00ff
-#define IS_TO_VOID_TARGET(stk) \
- (((stk)->type & STK_MASK_POP_USED) || \
- (stk)->type == STK_NULL_CHECK_START || (stk)->type == STK_NULL_CHECK_END)
+#define STK_MASK_POP_USED 0x00ff
+#define STK_MASK_TO_VOID_TARGET 0x10ff
+#define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
typedef struct {
void* stack_p;
@@ -369,6 +371,10 @@ typedef struct {
OnigOptionType options;
OnigRegion* region;
const UChar* start; /* search start position (for \G: BEGIN_POSITION) */
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ void* state_check_buff;
+ int state_check_buff_size;
+#endif
} MatchArg;
#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
@@ -378,7 +384,36 @@ typedef struct {
(msa).start = (arg_start);\
} while (0)
-#define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+
+#define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16
+
+#define STATE_CHECK_BUFF_INIT(msa, str_len, state_num) do { \
+ (msa).state_check_buff = (void* )0;\
+ if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
+ int size = ((int )((str_len) + 1) * (state_num) + 7) / 8;\
+ (msa).state_check_buff_size = size; \
+ if (size > 0 && size < STATE_CHECK_BUFF_MAX_SIZE) {\
+ if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) \
+ (msa).state_check_buff = (void* )xmalloc(size);\
+ else \
+ (msa).state_check_buff = (void* )xalloca(size);\
+ xmemset((msa).state_check_buff, 0, (size_t )size);\
+ }\
+ }\
+} while (0)
+
+#define MATCH_ARG_FREE(msa) do {\
+ if ((msa).stack_p) xfree((msa).stack_p);\
+ if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
+ if ((msa).state_check_buff) xfree((msa).state_check_buff);\
+ }\
+} while (0);
+#else
+#define STATE_CHECK_BUFF_INIT(msa, str_len, state_num)
+#define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
+#endif
+
#define STACK_INIT(alloc_addr, ptr_num, stack_num) do {\
@@ -472,26 +507,88 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
#define STACK_AT(index) (stk_base + (index))
#define GET_STACK_INDEX(stk) ((stk) - stk_base)
+#define STACK_PUSH_TYPE(stack_type) do {\
+ STACK_ENSURE(1);\
+ stk->type = (stack_type);\
+ STACK_INC;\
+} while(0)
+
+#define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
+
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+#define STATE_CHECK_POS(s,snum) \
+ (((s) - str) * num_comb_exp_check + ((snum) - 1))
+#define STATE_CHECK_VAL(v,snum) do {\
+ if (state_check_buff != NULL) {\
+ int x = STATE_CHECK_POS(s,snum);\
+ (v) = state_check_buff[x/8] & (1<<(x%8));\
+ }\
+ else (v) = 0;\
+} while(0)
+
+
+#define ELSE_IF_STATE_CHECK_MARK(stk) \
+ else if ((stk)->type == STK_STATE_CHECK_MARK) { \
+ int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\
+ state_check_buff[x/8] |= (1<<(x%8)); \
+ }
+
#define STACK_PUSH(stack_type,pat,s,sprev) do {\
STACK_ENSURE(1);\
stk->type = (stack_type);\
stk->u.state.pcode = (pat);\
stk->u.state.pstr = (s);\
stk->u.state.pstr_prev = (sprev);\
+ stk->u.state.state_check = 0;\
STACK_INC;\
} while(0)
#define STACK_PUSH_ENSURED(stack_type,pat) do {\
stk->type = (stack_type);\
stk->u.state.pcode = (pat);\
+ stk->u.state.state_check = 0;\
STACK_INC;\
} while(0)
-#define STACK_PUSH_TYPE(stack_type) do {\
+#define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum) do {\
STACK_ENSURE(1);\
+ stk->type = STK_ALT;\
+ stk->u.state.pcode = (pat);\
+ stk->u.state.pstr = (s);\
+ stk->u.state.pstr_prev = (sprev);\
+ stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\
+ STACK_INC;\
+} while(0)
+
+#define STACK_PUSH_STATE_CHECK(s,snum) do {\
+ if (state_check_buff != NULL) {\
+ STACK_ENSURE(1);\
+ stk->type = STK_STATE_CHECK_MARK;\
+ stk->u.state.pstr = (s);\
+ stk->u.state.state_check = (snum);\
+ STACK_INC;\
+ }\
+} while(0)
+
+#else /* USE_COMBINATION_EXPLOSION_CHECK */
+
+#define ELSE_IF_STATE_CHECK_MARK(stk)
+
+#define STACK_PUSH(stack_type,pat,s,sprev) do {\
+ STACK_ENSURE(1);\
+ stk->type = (stack_type);\
+ stk->u.state.pcode = (pat);\
+ stk->u.state.pstr = (s);\
+ stk->u.state.pstr_prev = (sprev);\
+ STACK_INC;\
+} while(0)
+
+#define STACK_PUSH_ENSURED(stack_type,pat) do {\
stk->type = (stack_type);\
+ stk->u.state.pcode = (pat);\
STACK_INC;\
} while(0)
+#endif /* USE_COMBINATION_EXPLOSION_CHECK */
#define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev)
#define STACK_PUSH_POS(s,sprev) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev)
@@ -551,7 +648,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
k = stk;\
while (k > stk_base) {\
k--;\
- if ((k->type == STK_MEM_END_MARK || k->type == STK_MEM_END) \
+ if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
&& k->u.mem.num == (mnum)) {\
level++;\
}\
@@ -631,6 +728,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
stk--;\
STACK_BASE_CHECK(stk, "STACK_POP"); \
if ((stk->type & STK_MASK_POP_USED) != 0) break;\
+ ELSE_IF_STATE_CHECK_MARK(stk);\
}\
break;\
case STACK_POP_LEVEL_MEM_START:\
@@ -642,6 +740,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
}\
+ ELSE_IF_STATE_CHECK_MARK(stk);\
}\
break;\
default:\
@@ -660,6 +759,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
}\
+ ELSE_IF_STATE_CHECK_MARK(stk);\
}\
break;\
}\
@@ -681,6 +781,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
}\
+ ELSE_IF_STATE_CHECK_MARK(stk);\
}\
} while(0)
@@ -700,6 +801,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
}\
+ ELSE_IF_STATE_CHECK_MARK(stk);\
}\
} while(0)
@@ -947,6 +1049,7 @@ static int string_cmp_ic(OnigEncoding enc, int ambig_flag,
is_fail = 0; \
} while(0)
+
#define ON_STR_BEGIN(s) ((s) == str)
#define ON_STR_END(s) ((s) == end)
#define IS_EMPTY_STR (str == end)
@@ -1314,6 +1417,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
StackIndex si;
StackIndex *repeat_stk;
StackIndex *mem_start_stk, *mem_end_stk;
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ int scv;
+ unsigned char* state_check_buff = msa->state_check_buff;
+ int num_comb_exp_check = reg->num_comb_exp_check;
+#endif
n = reg->num_repeat + reg->num_mem * 2;
STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE);
@@ -1924,6 +2032,94 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
STAT_OP_OUT;
break;
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ case OP_STATE_CHECK_ANYCHAR_STAR: STAT_OP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
+ GET_STATE_CHECK_NUM_INC(mem, p);
+ while (s < end) {
+ STATE_CHECK_VAL(scv, mem);
+ if (scv) goto fail;
+
+ STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
+ n = enc_len(encode, s);
+ DATA_ENSURE(n);
+ if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
+ sprev = s;
+ s += n;
+ }
+ STAT_OP_OUT;
+ break;
+
+ case OP_STATE_CHECK_ANYCHAR_ML_STAR:
+ STAT_OP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR);
+
+ GET_STATE_CHECK_NUM_INC(mem, p);
+ while (s < end) {
+ STATE_CHECK_VAL(scv, mem);
+ if (scv) goto fail;
+
+ STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
+ n = enc_len(encode, s);
+ if (n > 1) {
+ DATA_ENSURE(n);
+ sprev = s;
+ s += n;
+ }
+ else {
+ sprev = s;
+ s++;
+ }
+ }
+ STAT_OP_OUT;
+ break;
+
+ case OP_STATE_CHECK_ANYCHAR_STAR_PEEK_NEXT:
+ STAT_OP_IN(OP_STATE_CHECK_ANYCHAR_STAR_PEEK_NEXT);
+
+ GET_STATE_CHECK_NUM_INC(mem, p);
+ while (s < end) {
+ STATE_CHECK_VAL(scv, mem);
+ if (scv) goto fail;
+
+ if (*p == *s) {
+ STACK_PUSH_ALT_WITH_STATE_CHECK(p + 1, s, sprev, mem);
+ }
+ n = enc_len(encode, s);
+ DATA_ENSURE(n);
+ if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
+ sprev = s;
+ s += n;
+ }
+ p++;
+ STAT_OP_OUT;
+ break;
+
+ case OP_STATE_CHECK_ANYCHAR_ML_STAR_PEEK_NEXT:
+ STAT_OP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR_PEEK_NEXT);
+
+ GET_STATE_CHECK_NUM_INC(mem, p);
+ while (s < end) {
+ STATE_CHECK_VAL(scv, mem);
+ if (scv) goto fail;
+
+ if (*p == *s) {
+ STACK_PUSH_ALT_WITH_STATE_CHECK(p + 1, s, sprev, mem);
+ }
+ n = enc_len(encode, s);
+ if (n >1) {
+ DATA_ENSURE(n);
+ sprev = s;
+ s += n;
+ }
+ else {
+ sprev = s;
+ s++;
+ }
+ }
+ p++;
+ STAT_OP_OUT;
+ break;
+#endif /* USE_COMBINATION_EXPLOSION_CHECK */
+
case OP_WORD: STAT_OP_IN(OP_WORD);
DATA_ENSURE(1);
if (! ONIGENC_IS_MBC_WORD(encode, s, end))
@@ -2451,6 +2647,43 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
continue;
break;
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ case OP_STATE_CHECK_PUSH: STAT_OP_IN(OP_STATE_CHECK_PUSH);
+ GET_STATE_CHECK_NUM_INC(mem, p);
+ STATE_CHECK_VAL(scv, mem);
+ if (scv) goto fail;
+
+ GET_RELADDR_INC(addr, p);
+ STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_STATE_CHECK_PUSH_OR_JUMP: STAT_OP_IN(OP_STATE_CHECK_PUSH_OR_JUMP);
+ GET_STATE_CHECK_NUM_INC(mem, p);
+ GET_RELADDR_INC(addr, p);
+ STATE_CHECK_VAL(scv, mem);
+ if (scv) {
+ p += addr;
+ }
+ else {
+ STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
+ }
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_STATE_CHECK: STAT_OP_IN(OP_STATE_CHECK);
+ GET_STATE_CHECK_NUM_INC(mem, p);
+ STATE_CHECK_VAL(scv, mem);
+ if (scv) goto fail;
+
+ STACK_PUSH_STATE_CHECK(s, mem);
+ STAT_OP_OUT;
+ continue;
+ break;
+#endif /* USE_COMBINATION_EXPLOSION_CHECK */
+
case OP_POP: STAT_OP_IN(OP_POP);
STACK_POP_ONE;
STAT_OP_OUT;
@@ -2525,7 +2758,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
repeat_inc:
stkp->u.repeat.count++;
- if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
+ if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
/* end of repeat. Nothing to do. */
}
else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
@@ -2555,8 +2788,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
repeat_inc_ng:
stkp->u.repeat.count++;
- if (stkp->u.repeat.count < reg->repeat_range[mem].upper ||
- IS_REPEAT_INFINITE(reg->repeat_range[mem].upper)) {
+ if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
UChar* pcode = stkp->u.repeat.pcode;
@@ -2685,6 +2917,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
p = stk->u.state.pcode;
s = stk->u.state.pstr;
sprev = stk->u.state.pstr_prev;
+
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ if (stk->u.state.state_check != 0) {
+ stk->type = STK_STATE_CHECK_MARK;
+ stk++;
+ }
+#endif
+
STAT_OP_OUT;
continue;
break;
@@ -2869,66 +3109,56 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
const UChar* text, const UChar* text_end,
const UChar* text_range)
{
- const UChar *s, *t, *p, *end;
+ const UChar *s, *se, *t, *p, *end;
const UChar *tail;
- int skip;
+ int skip, tlen1;
#ifdef ONIG_DEBUG_SEARCH
fprintf(stderr, "bm_search_notrev: text: %d, text_end: %d, text_range: %d\n",
(int )text, (int )text_end, (int )text_range);
#endif
- end = text_range + (target_end - target) - 1;
+ tlen1 = (target_end - target) - 1;
+ end = text_range + tlen1;
if (end > text_end)
end = text_end;
tail = target_end - 1;
s = text;
- while ((s - text) < target_end - target) {
- s += enc_len(reg->enc, s);
- }
- s--; /* set to text check tail position. */
if (IS_NULL(reg->int_map)) {
while (s < end) {
- p = s;
+ p = se = s + tlen1;
t = tail;
- while (t >= target && *p == *t) {
- p--; t--;
+ while (*p == *t && t >= target) {
+ p--; t--;
}
- if (t < target) return (UChar* )(p + 1);
+ if (t < target) return (UChar* )s;
- skip = reg->map[*s];
- p = s + 1;
- if (p >= text_end) return (UChar* )NULL;
- t = p;
+ skip = reg->map[*se];
+ t = s;
do {
- p += enc_len(reg->enc, p);
- } while ((p - t) < skip && p < text_end);
-
- s += (p - t);
+ s += enc_len(reg->enc, s);
+ } while ((s - t) < skip && s < end);
}
}
else {
while (s < end) {
- p = s;
+ p = se = s + tlen1;
t = tail;
- while (t >= target && *p == *t) {
- p--; t--;
+ while (*p == *t && t >= target) {
+ p--; t--;
}
- if (t < target) return (UChar* )(p + 1);
+ if (t < target) return (UChar* )s;
- skip = reg->int_map[*s];
- p = s + 1;
- if (p >= text_end) return (UChar* )NULL;
- t = p;
+ skip = reg->int_map[*se];
+ t = s;
do {
- p += enc_len(reg->enc, p);
- } while ((p - t) < skip && p < text_end);
-
- s += (p - t);
+ s += enc_len(reg->enc, s);
+ } while ((s - t) < skip && s < end);
}
}
+
return (UChar* )NULL;
}
@@ -3083,6 +3313,7 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On
#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
MATCH_ARG_INIT(msa, option, region, at);
+ STATE_CHECK_BUFF_INIT(msa, end - str, reg->num_comb_exp_check);
if (region
#ifdef USE_POSIX_REGION_OPTION
@@ -3343,6 +3574,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
int r;
UChar *s, *prev;
MatchArg msa;
+ const UChar *orig_start = start;
#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
start:
@@ -3484,6 +3716,9 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
prev = (UChar* )NULL;
MATCH_ARG_INIT(msa, option, region, start);
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ msa.state_check_buff = (void* )0;
+#endif
MATCH_AND_RETURN_CHECK;
goto mismatch;
}
@@ -3495,7 +3730,8 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
(int )(end - str), (int )(start - str), (int )(range - str));
#endif
- MATCH_ARG_INIT(msa, option, region, start);
+ MATCH_ARG_INIT(msa, option, region, orig_start);
+ STATE_CHECK_BUFF_INIT(msa, end - str, reg->num_comb_exp_check);
s = (UChar* )start;
if (range > start) { /* forward search */
diff --git a/ext/mbstring/oniguruma/regint.h b/ext/mbstring/oniguruma/regint.h
index 4c4341c6165..c9f494e44e0 100644
--- a/ext/mbstring/oniguruma/regint.h
+++ b/ext/mbstring/oniguruma/regint.h
@@ -59,7 +59,7 @@
/* #define USE_UNICODE_FULL_RANGE_CTYPE */ /* --> move to regenc.h */
#define USE_NAMED_GROUP
#define USE_SUBEXP_CALL
-#define USE_BACKREF_AT_LEVEL
+#define USE_COMBINATION_EXPLOSION_CHECK /* (X*)* */
#define USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK /* /(?:()|())*\2/ */
#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */
#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
@@ -82,6 +82,7 @@
/* interface to external system */
#ifdef NOT_RUBY /* given from Makefile */
#include "config.h"
+#define USE_BACKREF_AT_LEVEL
#define USE_CAPTURE_HISTORY
#define USE_VARIABLE_META_CHARS
#define USE_WORD_BEGIN_END /* "\<": word-begin, "\>": word-end */
@@ -117,6 +118,9 @@
#endif /* else NOT_RUBY */
+#define STATE_CHECK_STRING_THRESHOLD_LEN 7
+#define STATE_CHECK_BUFF_MAX_SIZE 0x08000000
+
#define THREAD_PASS_LIMIT_COUNT 8
#define xmemset memset
#define xmemcpy memcpy
@@ -639,34 +643,35 @@ enum OpCode {
OP_FAIL_LOOK_BEHIND_NOT, /* (? */
- OP_RETURN
+ OP_RETURN,
+
+ OP_STATE_CHECK_PUSH, /* combination explosion check and push */
+ OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */
+ OP_STATE_CHECK, /* check only */
+ OP_STATE_CHECK_ANYCHAR_STAR,
+ OP_STATE_CHECK_ANYCHAR_ML_STAR,
+ OP_STATE_CHECK_ANYCHAR_STAR_PEEK_NEXT,
+ OP_STATE_CHECK_ANYCHAR_ML_STAR_PEEK_NEXT
};
-/* arguments type */
-#define ARG_SPECIAL -1
-#define ARG_NON 0
-#define ARG_RELADDR 1
-#define ARG_ABSADDR 2
-#define ARG_LENGTH 3
-#define ARG_MEMNUM 4
-#define ARG_OPTION 5
-
typedef int RelAddrType;
typedef int AbsAddrType;
typedef int LengthType;
typedef int RepeatNumType;
typedef short int MemNumType;
+typedef short int StateCheckNumType;
typedef void* PointerType;
-#define SIZE_OPCODE 1
-#define SIZE_RELADDR sizeof(RelAddrType)
-#define SIZE_ABSADDR sizeof(AbsAddrType)
-#define SIZE_LENGTH sizeof(LengthType)
-#define SIZE_MEMNUM sizeof(MemNumType)
-#define SIZE_REPEATNUM sizeof(RepeatNumType)
-#define SIZE_OPTION sizeof(OnigOptionType)
-#define SIZE_CODE_POINT sizeof(OnigCodePoint)
-#define SIZE_POINTER sizeof(PointerType)
+#define SIZE_OPCODE 1
+#define SIZE_RELADDR sizeof(RelAddrType)
+#define SIZE_ABSADDR sizeof(AbsAddrType)
+#define SIZE_LENGTH sizeof(LengthType)
+#define SIZE_MEMNUM sizeof(MemNumType)
+#define SIZE_STATE_CHECK_NUM sizeof(StateCheckNumType)
+#define SIZE_REPEATNUM sizeof(RepeatNumType)
+#define SIZE_OPTION sizeof(OnigOptionType)
+#define SIZE_CODE_POINT sizeof(OnigCodePoint)
+#define SIZE_POINTER sizeof(PointerType)
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
@@ -692,6 +697,7 @@ typedef void* PointerType;
#define GET_REPEATNUM_INC(num,p) PLATFORM_GET_INC(num, p, RepeatNumType)
#define GET_OPTION_INC(option,p) PLATFORM_GET_INC(option, p, OnigOptionType)
#define GET_POINTER_INC(ptr,p) PLATFORM_GET_INC(ptr, p, PointerType)
+#define GET_STATE_CHECK_NUM_INC(num,p) PLATFORM_GET_INC(num, p, StateCheckNumType)
/* code point's address must be aligned address. */
#define GET_CODE_POINT(code,p) code = *((OnigCodePoint* )(p))
@@ -734,6 +740,13 @@ typedef void* PointerType;
#define SIZE_OP_CALL (SIZE_OPCODE + SIZE_ABSADDR)
#define SIZE_OP_RETURN SIZE_OPCODE
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+#define SIZE_OP_STATE_CHECK (SIZE_OPCODE + SIZE_STATE_CHECK_NUM)
+#define SIZE_OP_STATE_CHECK_PUSH (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR)
+#define SIZE_OP_STATE_CHECK_PUSH_OR_JUMP (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR)
+#define SIZE_OP_STATE_CHECK_ANYCHAR_STAR (SIZE_OPCODE + SIZE_STATE_CHECK_NUM)
+#define SIZE_OP_STATE_CHECK_ANYCHAR_STAR_PEEK_NEXT (SIZE_OPCODE + 1 + SIZE_STATE_CHECK_NUM)
+#endif
#define MC_ESC(enc) (enc)->meta_char_table.esc
#define MC_ANYCHAR(enc) (enc)->meta_char_table.anychar
diff --git a/ext/mbstring/oniguruma/regparse.c b/ext/mbstring/oniguruma/regparse.c
index d70dbb6c3b1..407b73fc44b 100644
--- a/ext/mbstring/oniguruma/regparse.c
+++ b/ext/mbstring/oniguruma/regparse.c
@@ -940,6 +940,13 @@ scan_env_clear(ScanEnv* env)
for (i = 0; i < SCANENV_MEMNODES_SIZE; i++)
env->mem_nodes_static[i] = NULL_NODE;
+
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ env->num_comb_exp_check = 0;
+ env->comb_exp_max_regnum = 0;
+ env->curr_max_regnum = 0;
+ env->has_recursion = 0;
+#endif
}
static int
@@ -1321,11 +1328,17 @@ node_new_qualifier(int lower, int upper, int by_number)
NQUALIFIER(node).lower = lower;
NQUALIFIER(node).upper = upper;
NQUALIFIER(node).greedy = 1;
- NQUALIFIER(node).by_number = by_number;
NQUALIFIER(node).target_empty_info = NQ_TARGET_ISNOT_EMPTY;
NQUALIFIER(node).head_exact = NULL_NODE;
NQUALIFIER(node).next_head_exact = NULL_NODE;
NQUALIFIER(node).is_refered = 0;
+ if (by_number != 0)
+ NQUALIFIER(node).state |= NST_BY_NUMBER;
+
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ NQUALIFIER(node).comb_exp_check_num = 0;
+#endif
+
return node;
}
@@ -2140,7 +2153,7 @@ enum ReduceType {
RQ_AQ, /* to '*?' */
RQ_QQ, /* to '??' */
RQ_P_QQ, /* to '+)??' */
- RQ_PQ_Q, /* to '+?)?' */
+ RQ_PQ_Q /* to '+?)?' */
};
static enum ReduceType ReduceTypeTable[6][6] = {
@@ -4633,16 +4646,14 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
{ /* check redundant double repeat. */
/* verbose warn (?:.?)? etc... but not warn (.?)? etc... */
QualifierNode* qnt = &(NQUALIFIER(target));
+ int nestq_num = popular_qualifier_num(qn);
+ int targetq_num = popular_qualifier_num(qnt);
#ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
- if (qn->by_number == 0 && qnt->by_number == 0 &&
+ if (!IS_QUALIFIER_BY_NUMBER(qn) && !IS_QUALIFIER_BY_NUMBER(qnt) &&
IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {
- int nestq_num, targetq_num;
UChar buf[WARN_BUFSIZE];
- nestq_num = popular_qualifier_num(qn);
- targetq_num = popular_qualifier_num(qnt);
-
switch(ReduceTypeTable[targetq_num][nestq_num]) {
case RQ_ASIS:
break;
@@ -4673,9 +4684,17 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
warn_exit:
#endif
- if (popular_qualifier_num(qnt) >= 0 && popular_qualifier_num(qn) >= 0) {
- onig_reduce_nested_qualifier(qnode, target);
- goto q_exit;
+ if (targetq_num >= 0) {
+ if (nestq_num >= 0) {
+ onig_reduce_nested_qualifier(qnode, target);
+ goto q_exit;
+ }
+ else if (targetq_num == 1 || targetq_num == 2) { /* * or + */
+ /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */
+ if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) {
+ qn->upper = (qn->lower == 0 ? 1 : qn->lower);
+ }
+ }
}
}
break;
diff --git a/ext/mbstring/oniguruma/regparse.h b/ext/mbstring/oniguruma/regparse.h
index 0958c909bfe..ca62dddf7ea 100644
--- a/ext/mbstring/oniguruma/regparse.h
+++ b/ext/mbstring/oniguruma/regparse.h
@@ -124,11 +124,13 @@ typedef struct {
int lower;
int upper;
int greedy;
- int by_number; /* {n,m} */
int target_empty_info;
struct _Node* head_exact;
struct _Node* next_head_exact;
int is_refered; /* include called node. don't eliminate even if {0} */
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ int comb_exp_check_num; /* 1,2,3...: check, 0: no check */
+#endif
} QualifierNode;
/* status bits */
@@ -146,6 +148,7 @@ typedef struct {
#define NST_NAME_REF (1<<11)
#define NST_IN_REPEAT (1<<12) /* STK_REPEAT is nested in stack. */
#define NST_NEST_LEVEL (1<<13)
+#define NST_BY_NUMBER (1<<14) /* {n,m} */
#define SET_EFFECT_STATUS(node,f) (node)->u.effect.state |= (f)
#define CLEAR_EFFECT_STATUS(node,f) (node)->u.effect.state &= ~(f)
@@ -168,6 +171,7 @@ typedef struct {
#define IS_BACKREF_NAME_REF(bn) (((bn)->state & NST_NAME_REF) != 0)
#define IS_BACKREF_NEST_LEVEL(bn) (((bn)->state & NST_NEST_LEVEL) != 0)
#define IS_QUALIFIER_IN_REPEAT(qn) (((qn)->state & NST_IN_REPEAT) != 0)
+#define IS_QUALIFIER_BY_NUMBER(qn) (((qn)->state & NST_BY_NUMBER) != 0)
typedef struct {
int state;
@@ -277,6 +281,12 @@ typedef struct {
int mem_alloc;
Node* mem_nodes_static[SCANENV_MEMNODES_SIZE];
Node** mem_nodes_dynamic;
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ int num_comb_exp_check;
+ int comb_exp_max_regnum;
+ int curr_max_regnum;
+ int has_recursion;
+#endif
} ScanEnv;
diff --git a/ext/mbstring/oniguruma/regposix.c b/ext/mbstring/oniguruma/regposix.c
index fa7b5c4b24d..a3bacf722e8 100644
--- a/ext/mbstring/oniguruma/regposix.c
+++ b/ext/mbstring/oniguruma/regposix.c
@@ -2,7 +2,7 @@
regposix.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2005 K.Kosako
+ * Copyright (c) 2002-2006 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -192,7 +192,7 @@ regexec(regex_t* reg, const char* str, size_t nmatch,
ENC_STRING_LEN(ONIG_C(reg)->enc, str, len);
end = (UChar* )(str + len);
r = onig_search(ONIG_C(reg), (UChar* )str, end, (UChar* )str, end,
- (OnigRegion* )pmatch, options);
+ (OnigRegion* )pm, options);
if (r >= 0) {
r = 0; /* Match */
@@ -212,6 +212,11 @@ regexec(regex_t* reg, const char* str, size_t nmatch,
if (pm != pmatch && pm != NULL)
xfree(pm);
+#if 0
+ if (reg->re_nsub > nmatch - 1)
+ reg->re_nsub = (nmatch <= 1 ? 0 : nmatch - 1);
+#endif
+
return r;
}