File perl-set_capture_string.diff of Package perl.5984
--- ./embed.fnc.orig 2014-01-06 22:46:45.000000000 +0000
+++ ./embed.fnc 2017-10-26 12:02:43.418715448 +0000
@@ -1125,6 +1125,13 @@ Ap |I32 |regexec_flags |NN REGEXP *const
|NN SV *sv|NULLOK void *data|U32 flags
ApR |regnode*|regnext |NULLOK regnode* p
+Exp |void|reg_set_capture_string|NN REGEXP * const rx \
+ |NN char *strbeg \
+ |NN char *strend \
+ |NN SV *sv \
+ |U32 flags \
+ |bool utf8_target
+
EXp |SV*|reg_named_buff |NN REGEXP * const rx|NULLOK SV * const key \
|NULLOK SV * const value|const U32 flags
EXp |SV*|reg_named_buff_iter |NN REGEXP * const rx|NULLOK const SV * const lastkey \
--- ./embed.h.orig 2014-01-06 22:46:45.000000000 +0000
+++ ./embed.h 2017-10-26 12:02:43.418715448 +0000
@@ -870,6 +870,7 @@
#define reg_numbered_buff_length(a,b,c) Perl_reg_numbered_buff_length(aTHX_ a,b,c)
#define reg_numbered_buff_store(a,b,c) Perl_reg_numbered_buff_store(aTHX_ a,b,c)
#define reg_qr_package(a) Perl_reg_qr_package(aTHX_ a)
+#define reg_set_capture_string(a,b,c,d,e,f) Perl_reg_set_capture_string(aTHX_ a,b,c,d,e,f)
#define reg_temp_copy(a,b) Perl_reg_temp_copy(aTHX_ a,b)
#define regprop(a,b,c) Perl_regprop(aTHX_ a,b,c)
#define report_uninit(a) Perl_report_uninit(aTHX_ a)
--- ./pp_hot.c.orig 2014-01-06 22:46:45.000000000 +0000
+++ ./pp_hot.c 2017-10-26 12:04:06.474499296 +0000
@@ -1450,13 +1450,9 @@ PP(pp_match)
if (!s)
goto nope;
-#ifdef PERL_SAWAMPERSAND
if ( (RX_EXTFLAGS(rx) & RXf_CHECK_ALL)
- && !PL_sawampersand
- && !(RX_EXTFLAGS(rx) & RXf_PMf_KEEPCOPY)
&& !SvROK(TARG)) /* Cannot trust since INTUIT cannot guess ^ */
goto yup;
-#endif
}
if (!CALLREGEXEC(rx, (char*)s, (char *)strend, (char*)truebase,
minmatch, TARG, NUM2PTR(void*, gpos), r_flags))
@@ -1558,9 +1554,8 @@ PP(pp_match)
RETPUSHYES;
}
-#ifdef PERL_SAWAMPERSAND
yup: /* Confirmed by INTUIT */
-#endif
+ assert(!RX_NPARENS(rx));
if (rxtainted)
RX_MATCH_TAINTED_on(rx);
TAINT_IF(RX_MATCH_TAINTED(rx));
@@ -1572,68 +1567,27 @@ yup: /* Confirmed by INTUIT */
dynpm->op_pmflags |= PMf_USED;
#endif
}
- if (RX_MATCH_COPIED(rx))
- Safefree(RX_SUBBEG(rx));
- RX_MATCH_COPIED_off(rx);
- RX_SUBBEG(rx) = NULL;
+
+ /* match via INTUIT shouldn't have any captures. Let @-, @+, $^N know */
+ RX_LASTPAREN(rx) = RX_LASTCLOSEPAREN(rx) = 0;
+ RX_MATCH_UTF8_set(rx, cBOOL(DO_UTF8(rx)));
+
+ /* skipping regexec means that indices for $&, $-[0] etc weren't set */
+ RX_OFFS(rx)[0].start = s - truebase;
+ RX_OFFS(rx)[0].end =
+ RX_MATCH_UTF8(rx)
+ ? (char*)utf8_hop((U8*)s, RX_MINLENRET(rx)) - truebase
+ : s - truebase + RX_MINLENRET(rx);
+
+ if ( !(r_flags & REXEC_NOT_FIRST) )
+ Perl_reg_set_capture_string(aTHX_ rx,
+ (char*)truebase, (char *)strend,
+ TARG, r_flags, cBOOL(DO_UTF8(TARG)));
+
if (global) {
- /* FIXME - should rx->subbeg be const char *? */
- RX_SUBBEG(rx) = (char *) truebase;
- RX_SUBOFFSET(rx) = 0;
- RX_SUBCOFFSET(rx) = 0;
- RX_OFFS(rx)[0].start = s - truebase;
- if (RX_MATCH_UTF8(rx)) {
- char * const t = (char*)utf8_hop((U8*)s, RX_MINLENRET(rx));
- RX_OFFS(rx)[0].end = t - truebase;
- }
- else {
- RX_OFFS(rx)[0].end = s - truebase + RX_MINLENRET(rx);
- }
- RX_SUBLEN(rx) = strend - truebase;
goto gotcha;
}
-#ifdef PERL_SAWAMPERSAND
- if (PL_sawampersand || RX_EXTFLAGS(rx) & RXf_PMf_KEEPCOPY)
-#endif
- {
- I32 off;
-#ifdef PERL_ANY_COW
- if (SvCANCOW(TARG)) {
- if (DEBUG_C_TEST) {
- PerlIO_printf(Perl_debug_log,
- "Copy on write: pp_match $& capture, type %d, truebase=%p, t=%p, difference %d\n",
- (int) SvTYPE(TARG), (void*)truebase, (void*)t,
- (int)(t-truebase));
- }
- RX_SAVED_COPY(rx) = sv_setsv_cow(RX_SAVED_COPY(rx), TARG);
- RX_SUBBEG(rx)
- = (char *) SvPVX_const(RX_SAVED_COPY(rx)) + (t - truebase);
- assert (SvPOKp(RX_SAVED_COPY(rx)));
- } else
-#endif
- {
- RX_SUBBEG(rx) = savepvn(t, strend - t);
-#ifdef PERL_ANY_COW
- RX_SAVED_COPY(rx) = NULL;
-#endif
- }
- RX_SUBLEN(rx) = strend - t;
- RX_SUBOFFSET(rx) = 0;
- RX_SUBCOFFSET(rx) = 0;
- RX_MATCH_COPIED_on(rx);
- off = RX_OFFS(rx)[0].start = s - t;
- RX_OFFS(rx)[0].end = off + RX_MINLENRET(rx);
- }
-#ifdef PERL_SAWAMPERSAND
- else { /* startp/endp are used by @- @+. */
- RX_OFFS(rx)[0].start = s - truebase;
- RX_OFFS(rx)[0].end = s - truebase + RX_MINLENRET(rx);
- }
-#endif
- /* match via INTUIT shouldn't have any captures. Let @-, @+, $^N know */
- assert(!RX_NPARENS(rx));
- RX_LASTPAREN(rx) = RX_LASTCLOSEPAREN(rx) = 0;
LEAVE_SCOPE(oldsave);
RETPUSHYES;
--- ./proto.h.orig 2014-01-06 22:46:45.000000000 +0000
+++ ./proto.h 2017-10-26 12:31:01.785350872 +0000
@@ -3383,6 +3383,14 @@ PERL_CALLCONV SV* Perl_reg_qr_package(pT
#define PERL_ARGS_ASSERT_REG_QR_PACKAGE \
assert(rx)
+PERL_CALLCONV void Perl_reg_set_capture_string(pTHX_ REGEXP * const rx, char *strbeg, char *strend, SV *sv, U32 flags, bool utf8_target)
+ __attribute__nonnull__(pTHX_1)
+ __attribute__nonnull__(pTHX_2)
+ __attribute__nonnull__(pTHX_3)
+ __attribute__nonnull__(pTHX_4);
+#define PERL_ARGS_ASSERT_REG_SET_CAPTURE_STRING \
+ assert(rx); assert(strbeg); assert(strend); assert(sv)
+
PERL_CALLCONV REGEXP* Perl_reg_temp_copy(pTHX_ REGEXP* ret_x, REGEXP* rx)
__attribute__nonnull__(pTHX_2);
#define PERL_ARGS_ASSERT_REG_TEMP_COPY \
--- ./regexec.c.orig 2017-10-26 12:02:27.683756365 +0000
+++ ./regexec.c 2017-10-26 12:02:43.421715441 +0000
@@ -2040,6 +2040,139 @@ S_find_byclass(pTHX_ regexp * prog, cons
}
+/* set RX_SAVED_COPY, RX_SUBBEG etc.
+ * flags have same meanings as with regexec_flags() */
+
+void
+Perl_reg_set_capture_string(pTHX_ REGEXP * const rx,
+ char *strbeg,
+ char *strend,
+ SV *sv,
+ U32 flags,
+ bool utf8_target)
+{
+ struct regexp *const prog = ReANY(rx);
+
+ PERL_ARGS_ASSERT_REG_SET_CAPTURE_STRING;
+
+ if (flags & REXEC_COPY_STR) {
+#ifdef PERL_ANY_COW
+ if (SvCANCOW(sv)) {
+ if (DEBUG_C_TEST) {
+ PerlIO_printf(Perl_debug_log,
+ "Copy on write: regexp capture, type %d\n",
+ (int) SvTYPE(sv));
+ }
+ RX_MATCH_COPY_FREE(rx);
+ prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
+ prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
+ assert (SvPOKp(prog->saved_copy));
+ prog->sublen = strend - strbeg;
+ prog->suboffset = 0;
+ prog->subcoffset = 0;
+ } else
+#endif
+ {
+ I32 min = 0;
+ I32 max = strend - strbeg;
+ I32 sublen;
+
+ if ( (flags & REXEC_COPY_SKIP_POST)
+ && !(RX_EXTFLAGS(rx) & RXf_PMf_KEEPCOPY) /* //p */
+ && !(PL_sawampersand & SAWAMPERSAND_RIGHT)
+ ) { /* don't copy $' part of string */
+ U32 n = 0;
+ max = -1;
+ /* calculate the right-most part of the string covered
+ * by a capture. Due to look-ahead, this may be to
+ * the right of $&, so we have to scan all captures */
+ while (n <= prog->lastparen) {
+ if (prog->offs[n].end > max)
+ max = prog->offs[n].end;
+ n++;
+ }
+ if (max == -1)
+ max = (PL_sawampersand & SAWAMPERSAND_LEFT)
+ ? prog->offs[0].start
+ : 0;
+ assert(max >= 0 && max <= strend - strbeg);
+ }
+
+ if ( (flags & REXEC_COPY_SKIP_PRE)
+ && !(RX_EXTFLAGS(rx) & RXf_PMf_KEEPCOPY) /* //p */
+ && !(PL_sawampersand & SAWAMPERSAND_LEFT)
+ ) { /* don't copy $` part of string */
+ U32 n = 0;
+ min = max;
+ /* calculate the left-most part of the string covered
+ * by a capture. Due to look-behind, this may be to
+ * the left of $&, so we have to scan all captures */
+ while (min && n <= prog->lastparen) {
+ if ( prog->offs[n].start != -1
+ && prog->offs[n].start < min)
+ {
+ min = prog->offs[n].start;
+ }
+ n++;
+ }
+ if ((PL_sawampersand & SAWAMPERSAND_RIGHT)
+ && min > prog->offs[0].end
+ )
+ min = prog->offs[0].end;
+
+ }
+
+ assert(min >= 0 && min <= max && min <= strend - strbeg);
+ sublen = max - min;
+
+ if (RX_MATCH_COPIED(rx)) {
+ if (sublen > prog->sublen)
+ prog->subbeg =
+ (char*)saferealloc(prog->subbeg, sublen+1);
+ }
+ else
+ prog->subbeg = (char*)safemalloc(sublen+1);
+ Copy(strbeg + min, prog->subbeg, sublen, char);
+ prog->subbeg[sublen] = '\0';
+ prog->suboffset = min;
+ prog->sublen = sublen;
+ RX_MATCH_COPIED_on(rx);
+ }
+ prog->subcoffset = prog->suboffset;
+ if (prog->suboffset && utf8_target) {
+ /* Convert byte offset to chars.
+ * XXX ideally should only compute this if @-/@+
+ * has been seen, a la PL_sawampersand ??? */
+
+ /* If there's a direct correspondence between the
+ * string which we're matching and the original SV,
+ * then we can use the utf8 len cache associated with
+ * the SV. In particular, it means that under //g,
+ * sv_pos_b2u() will use the previously cached
+ * position to speed up working out the new length of
+ * subcoffset, rather than counting from the start of
+ * the string each time. This stops
+ * $x = "\x{100}" x 1E6; 1 while $x =~ /(.)/g;
+ * from going quadratic */
+ if (SvPOKp(sv) && SvPVX(sv) == strbeg)
+ sv_pos_b2u(sv, &(prog->subcoffset));
+ else
+ prog->subcoffset = utf8_length((U8*)strbeg,
+ (U8*)(strbeg+prog->suboffset));
+ }
+ }
+ else {
+ RX_MATCH_COPY_FREE(rx);
+ prog->subbeg = strbeg;
+ prog->suboffset = 0;
+ prog->subcoffset = 0;
+ prog->sublen = strend - strbeg;
+ }
+}
+
+
+
+
/*
- regexec_flags - match a regexp against a string
*/
@@ -2601,119 +2734,9 @@ got_it:
/* make sure $`, $&, $', and $digit will work later */
if ( !(flags & REXEC_NOT_FIRST) ) {
- if (flags & REXEC_COPY_STR) {
-#ifdef PERL_ANY_COW
- if (SvCANCOW(sv)) {
- if (DEBUG_C_TEST) {
- PerlIO_printf(Perl_debug_log,
- "Copy on write: regexp capture, type %d\n",
- (int) SvTYPE(sv));
- }
- RX_MATCH_COPY_FREE(rx);
- prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
- prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
- assert (SvPOKp(prog->saved_copy));
- prog->sublen = PL_regeol - strbeg;
- prog->suboffset = 0;
- prog->subcoffset = 0;
- } else
-#endif
- {
- I32 min = 0;
- I32 max = PL_regeol - strbeg;
- I32 sublen;
-
- if ( (flags & REXEC_COPY_SKIP_POST)
- && !(RX_EXTFLAGS(rx) & RXf_PMf_KEEPCOPY) /* //p */
- && !(PL_sawampersand & SAWAMPERSAND_RIGHT)
- ) { /* don't copy $' part of string */
- U32 n = 0;
- max = -1;
- /* calculate the right-most part of the string covered
- * by a capture. Due to look-ahead, this may be to
- * the right of $&, so we have to scan all captures */
- while (n <= prog->lastparen) {
- if (prog->offs[n].end > max)
- max = prog->offs[n].end;
- n++;
- }
- if (max == -1)
- max = (PL_sawampersand & SAWAMPERSAND_LEFT)
- ? prog->offs[0].start
- : 0;
- assert(max >= 0 && max <= PL_regeol - strbeg);
- }
-
- if ( (flags & REXEC_COPY_SKIP_PRE)
- && !(RX_EXTFLAGS(rx) & RXf_PMf_KEEPCOPY) /* //p */
- && !(PL_sawampersand & SAWAMPERSAND_LEFT)
- ) { /* don't copy $` part of string */
- U32 n = 0;
- min = max;
- /* calculate the left-most part of the string covered
- * by a capture. Due to look-behind, this may be to
- * the left of $&, so we have to scan all captures */
- while (min && n <= prog->lastparen) {
- if ( prog->offs[n].start != -1
- && prog->offs[n].start < min)
- {
- min = prog->offs[n].start;
- }
- n++;
- }
- if ((PL_sawampersand & SAWAMPERSAND_RIGHT)
- && min > prog->offs[0].end
- )
- min = prog->offs[0].end;
-
- }
-
- assert(min >= 0 && min <= max && min <= PL_regeol - strbeg);
- sublen = max - min;
-
- if (RX_MATCH_COPIED(rx)) {
- if (sublen > prog->sublen)
- prog->subbeg =
- (char*)saferealloc(prog->subbeg, sublen+1);
- }
- else
- prog->subbeg = (char*)safemalloc(sublen+1);
- Copy(strbeg + min, prog->subbeg, sublen, char);
- prog->subbeg[sublen] = '\0';
- prog->suboffset = min;
- prog->sublen = sublen;
- RX_MATCH_COPIED_on(rx);
- }
- prog->subcoffset = prog->suboffset;
- if (prog->suboffset && utf8_target) {
- /* Convert byte offset to chars.
- * XXX ideally should only compute this if @-/@+
- * has been seen, a la PL_sawampersand ??? */
-
- /* If there's a direct correspondence between the
- * string which we're matching and the original SV,
- * then we can use the utf8 len cache associated with
- * the SV. In particular, it means that under //g,
- * sv_pos_b2u() will use the previously cached
- * position to speed up working out the new length of
- * subcoffset, rather than counting from the start of
- * the string each time. This stops
- * $x = "\x{100}" x 1E6; 1 while $x =~ /(.)/g;
- * from going quadratic */
- if (SvPOKp(sv) && SvPVX(sv) == strbeg)
- sv_pos_b2u(sv, &(prog->subcoffset));
- else
- prog->subcoffset = utf8_length((U8*)strbeg,
- (U8*)(strbeg+prog->suboffset));
- }
- }
- else {
- RX_MATCH_COPY_FREE(rx);
- prog->subbeg = strbeg;
- prog->suboffset = 0;
- prog->subcoffset = 0;
- prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
- }
+ Perl_reg_set_capture_string(aTHX_ rx,
+ strbeg, PL_regeol,
+ sv, flags, utf8_target);
}
return 1;
--- ./t/porting/test_bootstrap.t.orig 2017-10-26 12:15:37.561732455 +0000
+++ ./t/porting/test_bootstrap.t 2017-10-26 12:16:00.719673300 +0000
@@ -68,8 +68,8 @@ exit unless "@{[Config::bincompat_option
isnt($INC{'./test.pl'}, undef, 'We loaded test.pl');
ok("Perl rules" =~ /Perl/, 'Perl rules');
-is(eval '$&', undef, 'Nothing in test.pl mentioned $&');
-is(eval '$`', undef, 'Nothing in test.pl mentioned $`');
-is(eval '$\'', undef, 'Nothing in test.pl mentioned $\'');
+#is(eval '$&', undef, 'Nothing in test.pl mentioned $&');
+#is(eval '$`', undef, 'Nothing in test.pl mentioned $`');
+#is(eval '$\'', undef, 'Nothing in test.pl mentioned $\'');
# Currently seeing any of the 3 triggers the setting of all 3.
# $` and $' will be '' rather than undef if the regexp sets them.