File perl-regcomp-uni-semantics.diff of Package perl.6851

--- ./regcomp.c.orig	2018-03-12 10:12:01.394695035 +0000
+++ ./regcomp.c	2018-03-12 15:10:48.943906636 +0000
@@ -259,6 +259,14 @@ typedef struct RExC_state_t {
                                      }                                     \
                         } STMT_END
 
+#define REQUIRE_UNI_RULES(flagp, restart_retval)                            \
+    STMT_START {                                                            \
+            if (DEPENDS_SEMANTICS) {                                        \
+                set_regex_charset(&RExC_flags, REGEX_UNICODE_CHARSET);      \
+                RExC_uni_semantics = 1;                                     \
+            }                                                               \
+    } STMT_END
+
 /* This converts the named class defined in regcomp.h to its equivalent class
  * number defined in handy.h. */
 #define namedclass_to_classnum(class)  ((int) ((class) / 2))
@@ -5727,7 +5735,7 @@ Perl_re_op_compile(pTHX_ SV ** const pat
     if (initial_charset == REGEX_LOCALE_CHARSET) {
 	RExC_contains_locale = 1;
     }
-    else if (RExC_utf8 && initial_charset == REGEX_DEPENDS_CHARSET) {
+    else if (initial_charset == REGEX_DEPENDS_CHARSET && (RExC_utf8 ||RExC_uni_semantics)) {
 
 	/* Set to use unicode semantics if the pattern is in utf8 and has the
 	 * 'depends' charset specified, as it means unicode when utf8  */
@@ -9473,6 +9481,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I
     if (paren) {
         /* restore original flags, but keep (?p) */
 	RExC_flags = oregflags | (RExC_flags & RXf_PMf_KEEPCOPY);
+        if (DEPENDS_SEMANTICS && RExC_uni_semantics) {
+            set_regex_charset(&RExC_flags, REGEX_UNICODE_CHARSET);
+        }
 	if (RExC_parse >= RExC_end || *nextchar(pRExC_state) != ')') {
 	    RExC_parse = oregcomp_parse;
 	    vFAIL("Unmatched (");
@@ -9943,7 +9954,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pREx
         return ret;
     }
 
-    RExC_uni_semantics = 1; /* Unicode named chars imply Unicode semantics */
+    REQUIRE_UNI_RULES(flagp, FALSE); /* Unicode named chars imply Unicode semantics */
     RExC_parse += 2;	/* Skip past the 'U+' */
 
     endchar = RExC_parse + strcspn(RExC_parse, ".}");
@@ -11633,7 +11644,7 @@ S_handle_regex_sets(pTHX_ RExC_state_t *
     if (LOC) {
         vFAIL("(?[...]) not valid in locale");
     }
-    RExC_uni_semantics = 1;
+    REQUIRE_UNI_RULES(flagp, FALSE);
 
     /* This will return only an ANYOF regnode, or (unlikely) something smaller
      * (such as EXACT).  Thus we can skip most everything if just sizing.  We
@@ -12483,7 +12494,7 @@ parseit:
                                                 named */
 
 		/* \p means they want Unicode semantics */
-		RExC_uni_semantics = 1;
+		REQUIRE_UNI_RULES(flagp, FALSE);
 		}
 		break;
 	    case 'n':	value = '\n';			break;
@@ -13000,7 +13011,7 @@ parseit:
 	/* non-Latin1 code point implies unicode semantics.  Must be set in
 	 * pass1 so is there for the whole of pass 2 */
 	if (value > 255) {
-	    RExC_uni_semantics = 1;
+	    REQUIRE_UNI_RULES(flagp, FALSE);
 	}
 
         /* Ready to process either the single value, or the completed range.
openSUSE Build Service is sponsored by