File perl-study.diff of Package perl.29721
--- ./embed.fnc.orig 2020-06-09 16:06:11.901233194 +0000
+++ ./embed.fnc 2020-06-09 16:06:20.989212199 +0000
@@ -2040,7 +2040,8 @@ Es |I32 |study_chunk |NN struct RExC_sta
|NULLOK struct scan_data_t *data \
|I32 stopparen|NULLOK U8* recursed \
|NULLOK struct regnode_charclass_class *and_withp \
- |U32 flags|U32 depth
+ |U32 flags|U32 depth|bool was_mutate_ok
+Es |void |rck_elide_nothing|NN regnode *node
EsRn |U32 |add_data |NN struct RExC_state_t *pRExC_state|U32 n \
|NN const char *s
rs |void |re_croak2 |NN const char* pat1|NN const char* pat2|...
--- ./embed.h.orig 2020-06-09 16:06:11.901233194 +0000
+++ ./embed.h 2020-06-09 16:06:20.993212189 +0000
@@ -932,6 +932,7 @@
#define make_trie_failtable(a,b,c,d) S_make_trie_failtable(aTHX_ a,b,c,d)
#define nextchar(a) S_nextchar(aTHX_ a)
#define parse_lparen_question_flags(a) S_parse_lparen_question_flags(aTHX_ a)
+#define rck_elide_nothing(a) S_rck_elide_nothing(aTHX_ a)
#define reg(a,b,c,d) S_reg(aTHX_ a,b,c,d)
#define reg_node(a,b) S_reg_node(aTHX_ a,b)
#define reg_recode(a,b) S_reg_recode(aTHX_ a,b)
@@ -949,7 +950,7 @@
#define reguni(a,b,c) S_reguni(aTHX_ a,b,c)
#define regwhite S_regwhite
#define scan_commit(a,b,c,d) S_scan_commit(aTHX_ a,b,c,d)
-#define study_chunk(a,b,c,d,e,f,g,h,i,j,k) S_study_chunk(aTHX_ a,b,c,d,e,f,g,h,i,j,k)
+#define study_chunk(a,b,c,d,e,f,g,h,i,j,k,l) S_study_chunk(aTHX_ a,b,c,d,e,f,g,h,i,j,k,l)
# endif
# if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_UTF8_C)
#define _get_invlist_len_addr(a) S__get_invlist_len_addr(aTHX_ a)
--- ./proto.h.orig 2020-06-09 16:06:11.901233194 +0000
+++ ./proto.h 2020-06-09 16:06:54.237135377 +0000
@@ -6650,6 +6650,11 @@ STATIC void S_parse_lparen_question_flag
#define PERL_ARGS_ASSERT_PARSE_LPAREN_QUESTION_FLAGS \
assert(pRExC_state)
+STATIC void S_rck_elide_nothing(pTHX_ regnode *node)
+ __attribute__nonnull__(pTHX_1);
+#define PERL_ARGS_ASSERT_RCK_ELIDE_NOTHING \
+ assert(node)
+
PERL_STATIC_NO_RET void S_re_croak2(pTHX_ const char* pat1, const char* pat2, ...)
__attribute__noreturn__
__attribute__nonnull__(pTHX_1)
@@ -6757,7 +6762,7 @@ STATIC void S_scan_commit(pTHX_ const st
#define PERL_ARGS_ASSERT_SCAN_COMMIT \
assert(pRExC_state); assert(data); assert(minlenp)
-STATIC I32 S_study_chunk(pTHX_ struct RExC_state_t *pRExC_state, regnode **scanp, I32 *minlenp, I32 *deltap, regnode *last, struct scan_data_t *data, I32 stopparen, U8* recursed, struct regnode_charclass_class *and_withp, U32 flags, U32 depth)
+STATIC I32 S_study_chunk(pTHX_ struct RExC_state_t *pRExC_state, regnode **scanp, I32 *minlenp, I32 *deltap, regnode *last, struct scan_data_t *data, I32 stopparen, U8* recursed, struct regnode_charclass_class *and_withp, U32 flags, U32 depth, bool was_mutate_ok)
__attribute__nonnull__(pTHX_1)
__attribute__nonnull__(pTHX_2)
__attribute__nonnull__(pTHX_3)
--- ./regcomp.c.orig 2020-06-09 16:06:11.909233177 +0000
+++ ./regcomp.c 2020-06-09 16:06:20.993212189 +0000
@@ -2994,8 +2994,40 @@ typedef struct scan_frame {
regnode *next; /* next node to process when last is reached */
struct scan_frame *prev; /*previous frame*/
I32 stop; /* what stopparen do we use */
+ bool in_gosub;
} scan_frame;
+/* Follow the next-chain of the current node and optimize away
+ all the NOTHINGs from it.
+ */
+STATIC void
+S_rck_elide_nothing(pTHX_ regnode *node)
+{
+ dVAR;
+
+ PERL_ARGS_ASSERT_RCK_ELIDE_NOTHING;
+
+ if (OP(node) != CURLYX) {
+ const int max = (reg_off_by_arg[OP(node)]
+ ? I32_MAX
+ /* I32 may be smaller than U16 on CRAYs! */
+ : (I32_MAX < U16_MAX ? I32_MAX : U16_MAX));
+ int off = (reg_off_by_arg[OP(node)] ? ARG(node) : NEXT_OFF(node));
+ int noff;
+ regnode *n = node;
+
+ /* Skip NOTHING and LONGJMP. */
+ while ((n = regnext(n))
+ && ((PL_regkind[OP(n)] == NOTHING && (noff = NEXT_OFF(n)))
+ || ((OP(n) == LONGJMP) && (noff = ARG(n))))
+ && off + noff < max)
+ off += noff;
+ if (reg_off_by_arg[OP(node)])
+ ARG(node) = off;
+ else
+ NEXT_OFF(node) = off;
+ }
+}
#define SCAN_COMMIT(s, data, m) scan_commit(s, data, m, is_inf)
@@ -3007,7 +3039,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_
I32 stopparen,
U8* recursed,
struct regnode_charclass_class *and_withp,
- U32 flags, U32 depth)
+ U32 flags, U32 depth, bool was_mutate_ok)
/* scanp: Start here (read-write). */
/* deltap: Write maxlen-minlen here. */
/* last: Stop before this one. */
@@ -3049,6 +3081,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_
node length to get a real minimum (because
the folded version may be shorter) */
bool has_exactf_sharp_s = FALSE;
+ bool mutate_ok = was_mutate_ok && !(frame && frame->in_gosub);
/* Peephole optimizer: */
DEBUG_STUDYDATA("Peep:", data,depth);
DEBUG_PEEP("Peep",scan,depth);
@@ -3056,30 +3089,12 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_
/* Its not clear to khw or hv why this is done here, and not in the
* clauses that deal with EXACT nodes. khw's guess is that it's
* because of a previous design */
- JOIN_EXACT(scan,&min_subtract, &has_exactf_sharp_s, 0);
+ if (mutate_ok)
+ JOIN_EXACT(scan,&min_subtract, &has_exactf_sharp_s, 0);
/* Follow the next-chain of the current node and optimize
away all the NOTHINGs from it. */
- if (OP(scan) != CURLYX) {
- const int max = (reg_off_by_arg[OP(scan)]
- ? I32_MAX
- /* I32 may be smaller than U16 on CRAYs! */
- : (I32_MAX < U16_MAX ? I32_MAX : U16_MAX));
- int off = (reg_off_by_arg[OP(scan)] ? ARG(scan) : NEXT_OFF(scan));
- int noff;
- regnode *n = scan;
-
- /* Skip NOTHING and LONGJMP. */
- while ((n = regnext(n))
- && ((PL_regkind[OP(n)] == NOTHING && (noff = NEXT_OFF(n)))
- || ((OP(n) == LONGJMP) && (noff = ARG(n))))
- && off + noff < max)
- off += noff;
- if (reg_off_by_arg[OP(scan)])
- ARG(scan) = off;
- else
- NEXT_OFF(scan) = off;
- }
+ rck_elide_nothing(scan);
@@ -3133,7 +3148,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_
/* we suppose the run is continuous, last=next...*/
minnext = study_chunk(pRExC_state, &scan, minlenp, &deltanext,
next, &data_fake,
- stopparen, recursed, NULL, f,depth+1);
+ stopparen, recursed, NULL, f,depth+1, mutate_ok);
if (min1 > minnext)
min1 = minnext;
if (deltanext == I32_MAX) {
@@ -3201,7 +3216,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_
}
}
- if (PERL_ENABLE_TRIE_OPTIMISATION && OP( startbranch ) == BRANCH ) {
+ if (PERL_ENABLE_TRIE_OPTIMISATION && OP( startbranch ) == BRANCH && mutate_ok) {
/* demq.
Assuming this was/is a branch we are dealing with: 'scan' now
@@ -3550,6 +3565,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_
newframe->last = last;
newframe->stop = stopparen;
newframe->prev = frame;
+ newframe->in_gosub = ((frame && frame->in_gosub) || OP(scan) == GOSUB);
frame = newframe;
scan = start;
@@ -3858,7 +3874,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_
minnext = study_chunk(pRExC_state, &scan, minlenp, &deltanext,
last, data, stopparen, recursed, NULL,
(mincount == 0
- ? (f & ~SCF_DO_SUBSTR) : f),depth+1);
+ ? (f & ~SCF_DO_SUBSTR) : f),depth+1, mutate_ok);
if (flags & SCF_DO_STCLASS)
data->start_class = oclass;
@@ -3902,6 +3918,12 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_
(void)ReREFCNT_inc(RExC_rx_sv);
}
+ if ( ( minnext > 0 && mincount >= I32_MAX / minnext )
+ || min >= I32_MAX - minnext * mincount )
+ {
+ FAIL("Regexp out of space");
+ }
+
min += minnext * mincount;
is_inf_internal |= deltanext == I32_MAX
|| (maxcount == REG_INFTY && minnext + deltanext > 0);
@@ -4008,7 +4030,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_
#endif
/* Optimize again: */
study_chunk(pRExC_state, &nxt1, minlenp, &deltanext, nxt,
- NULL, stopparen, recursed, NULL, 0,depth+1);
+ NULL, stopparen, recursed, NULL, 0,depth+1, mutate_ok);
}
else
oscan->flags = 0;
@@ -4132,11 +4154,7 @@ PerlIO_printf(Perl_debug_log, "LHS=%d RH
if (data && (fl & SF_HAS_EVAL))
data->flags |= SF_HAS_EVAL;
optimize_curly_tail:
- if (OP(oscan) != CURLYX) {
- while (PL_regkind[OP(next = regnext(oscan))] == NOTHING
- && NEXT_OFF(next))
- NEXT_OFF(oscan) += NEXT_OFF(next);
- }
+ rck_elide_nothing(oscan);
continue;
default: /* REF, and CLUMP only? */
if (flags & SCF_DO_SUBSTR) {
@@ -4367,7 +4385,7 @@ PerlIO_printf(Perl_debug_log, "LHS=%d RH
next = regnext(scan);
nscan = NEXTOPER(NEXTOPER(scan));
minnext = study_chunk(pRExC_state, &nscan, minlenp, &deltanext,
- last, &data_fake, stopparen, recursed, NULL, f, depth+1);
+ last, &data_fake, stopparen, recursed, NULL, f, depth+1, mutate_ok);
if (scan->flags) {
if (deltanext) {
FAIL("Variable length lookbehind not implemented");
@@ -4453,7 +4471,7 @@ PerlIO_printf(Perl_debug_log, "LHS=%d RH
nscan = NEXTOPER(NEXTOPER(scan));
*minnextp = study_chunk(pRExC_state, &nscan, minnextp, &deltanext,
- last, &data_fake, stopparen, recursed, NULL, f,depth+1);
+ last, &data_fake, stopparen, recursed, NULL, f,depth+1, mutate_ok);
if (scan->flags) {
if (deltanext) {
FAIL("Variable length lookbehind not implemented");
@@ -4614,7 +4632,7 @@ PerlIO_printf(Perl_debug_log, "LHS=%d RH
*/
minnext = study_chunk(pRExC_state, &scan, minlenp,
&deltanext, (regnode *)nextbranch, &data_fake,
- stopparen, recursed, NULL, f,depth+1);
+ stopparen, recursed, NULL, f,depth+1, mutate_ok);
}
if (nextbranch && PL_regkind[OP(nextbranch)]==BRANCH)
nextbranch= regnext((regnode*)nextbranch);
@@ -6203,7 +6221,7 @@ reStudy:
minlen = study_chunk(pRExC_state, &first, &minlen, &fake, scan + RExC_size, /* Up to end */
&data, -1, NULL, NULL,
- SCF_DO_SUBSTR | SCF_WHILEM_VISITED_POS | stclass_flag,0);
+ SCF_DO_SUBSTR | SCF_WHILEM_VISITED_POS | stclass_flag,0,TRUE);
CHECK_RESTUDY_GOTO_butfirst(LEAVE_with_name("study_chunk"));
@@ -6339,7 +6357,7 @@ reStudy:
minlen = study_chunk(pRExC_state, &scan, &minlen, &fake, scan + RExC_size,
- &data, -1, NULL, NULL, SCF_DO_STCLASS_AND|SCF_WHILEM_VISITED_POS,0);
+ &data, -1, NULL, NULL, SCF_DO_STCLASS_AND|SCF_WHILEM_VISITED_POS,0,TRUE);
CHECK_RESTUDY_GOTO_butfirst(NOOP);