File nim-2.2.2-pcre2.patch of Package nim

From 8c2ec2a7b010ef1a43b967205324ac83d11815d1 Mon Sep 17 00:00:00 2001
From: ringabout <43030857+ringabout@users.noreply.github.com>
Date: Mon, 4 Nov 2024 22:32:12 +0800
Subject: [PATCH 1/7] fixes #23668; migrates from pcre to pcre2

---
 lib/impure/nre.nim        | 168 +++++-----
 lib/impure/re.nim         | 199 ++++++-----
 lib/wrappers/pcre2.nim    | 683 ++++++++++++++++++++++++++++++++++++++
 tests/stdlib/nre/init.nim |  18 +-
 4 files changed, 893 insertions(+), 175 deletions(-)
 create mode 100644 lib/wrappers/pcre2.nim

diff --git a/lib/impure/nre.nim b/lib/impure/nre.nim
index c5adf0e995830..0a43b92bed986 100644
--- a/lib/impure/nre.nim
+++ b/lib/impure/nre.nim
@@ -61,7 +61,7 @@ runnableExamples:
   assert find("uxabc", re"(?<=x|y)ab", start = 1).get.captures[-1] == "ab"
   assert find("uxabc", re"ab", start = 3).isNone
 
-from std/pcre import nil
+from ../wrappers/pcre2 import nil
 import nre/private/util
 import std/tables
 from std/strutils import `%`
@@ -76,8 +76,7 @@ export options
 type
   RegexDesc* = object
     pattern*: string
-    pcreObj: ptr pcre.Pcre  ## not nil
-    pcreExtra: ptr pcre.ExtraData  ## nil
+    pcreObj: ptr pcre2.Pcre  ## not nil
 
     captureNameToId: Table[string, int]
 
@@ -191,7 +190,7 @@ type
     pattern*: Regex  ## The regex doing the matching.
                      ## Not nil.
     str*: string  ## The string that was matched against.
-    pcreMatchBounds: seq[HSlice[cint, cint]] ## First item is the bounds of the match
+    pcreMatchBounds: seq[HSlice[csize_t, csize_t]] ## First item is the bounds of the match
                                             ## Other items are the captures
                                             ## `a` is inclusive start, `b` is exclusive end
 
@@ -222,37 +221,31 @@ when defined(gcDestructors):
   when defined(nimAllowNonVarDestructor) and defined(nimPreviewNonVarDestructor):
     proc `=destroy`(pattern: RegexDesc) =
       `=destroy`(pattern.pattern)
-      pcre.free_substring(cast[cstring](pattern.pcreObj))
-      if pattern.pcreExtra != nil:
-        pcre.free_study(pattern.pcreExtra)
+      pcre2.code_free(pattern.pcreObj)
       `=destroy`(pattern.captureNameToId)
   else:
     proc `=destroy`(pattern: var RegexDesc) =
       `=destroy`(pattern.pattern)
-      pcre.free_substring(cast[cstring](pattern.pcreObj))
-      if pattern.pcreExtra != nil:
-        pcre.free_study(pattern.pcreExtra)
+      pcre2.code_free(pattern.pcreObj)
       `=destroy`(pattern.captureNameToId)
 else:
   proc destroyRegex(pattern: Regex) =
     `=destroy`(pattern.pattern)
-    pcre.free_substring(cast[cstring](pattern.pcreObj))
-    if pattern.pcreExtra != nil:
-      pcre.free_study(pattern.pcreExtra)
+    pcre.code_free(pattern.pcreObj)
     `=destroy`(pattern.captureNameToId)
 
-proc getinfo[T](pattern: Regex, opt: cint): T =
-  let retcode = pcre.fullinfo(pattern.pcreObj, pattern.pcreExtra, opt, addr result)
+proc getinfo[T](pattern: Regex, opt: uint32): T =
+  let retcode = pcre2.pattern_info(pattern.pcreObj, opt, addr result)
 
   if retcode < 0:
     # XXX Error message that doesn't expose implementation details
     raise newException(FieldDefect, "Invalid getinfo for $1, errno $2" % [$opt, $retcode])
 
 proc getNameToNumberTable(pattern: Regex): Table[string, int] =
-  let entryCount = getinfo[cint](pattern, pcre.INFO_NAMECOUNT)
-  let entrySize = getinfo[cint](pattern, pcre.INFO_NAMEENTRYSIZE)
+  let entryCount = getinfo[cint](pattern, pcre2.INFO_NAMECOUNT)
+  let entrySize = getinfo[cint](pattern, pcre2.INFO_NAMEENTRYSIZE)
   let table = cast[ptr UncheckedArray[uint8]](
-                getinfo[int](pattern, pcre.INFO_NAMETABLE))
+                getinfo[int](pattern, pcre2.INFO_NAMETABLE))
 
   result = initTable[string, int]()
 
@@ -268,53 +261,53 @@ proc getNameToNumberTable(pattern: Regex): Table[string, int] =
 
     result[name] = num
 
-proc initRegex(pattern: string, flags: int, study = true): Regex =
+proc initRegex(pattern: string, flags: csize_t, options: uint32): Regex =
   when defined(gcDestructors):
     result = Regex()
   else:
     new(result, destroyRegex)
   result.pattern = pattern
 
-  var errorMsg: cstring
-  var errOffset: cint
+  var
+    errorCode: cint = 0
+    errOffset: csize_t = 0
 
-  result.pcreObj = pcre.compile(cstring(pattern),
-                                # better hope int is at least 4 bytes..
-                                cint(flags), addr errorMsg,
-                                addr errOffset, nil)
+  result.pcreObj = pcre2.compile(cast[ptr uint8](cstring(pattern)),
+                                flags, options, addr(errorCode),
+                                addr(errOffset), nil)
   if result.pcreObj == nil:
     # failed to compile
-    raise SyntaxError(msg: $errorMsg, pos: errOffset, pattern: pattern)
-
-  if study:
-    var options: cint = 0
-    var hasJit: cint
-    if pcre.config(pcre.CONFIG_JIT, addr hasJit) == 0:
-      if hasJit == 1'i32:
-        options = pcre.STUDY_JIT_COMPILE
-    result.pcreExtra = pcre.study(result.pcreObj, options, addr errorMsg)
-    if errorMsg != nil:
-      raise StudyError(msg: $errorMsg)
+    raise SyntaxError(msg: $errorCode, pos: int errOffset, pattern: pattern)
+
+  # if study:
+  #   var options: cint = 0
+  #   var hasJit: cint
+  #   if pcre2.config(pcre.CONFIG_JIT, addr hasJit) == 0:
+  #     if hasJit == 1'i32:
+  #       options = pcre2.STUDY_JIT_COMPILE
+  #   result.pcreExtra = pcre.study(result.pcreObj, options, addr errorMsg)
+  #   if errorMsg != nil:
+  #     raise StudyError(msg: $errorMsg)
 
   result.captureNameToId = result.getNameToNumberTable()
 
 proc captureCount*(pattern: Regex): int =
-  return getinfo[cint](pattern, pcre.INFO_CAPTURECOUNT)
+  return getinfo[cint](pattern, pcre2.INFO_CAPTURECOUNT)
 
 proc captureNameId*(pattern: Regex): Table[string, int] =
   return pattern.captureNameToId
 
 proc matchesCrLf(pattern: Regex): bool =
-  let flags = uint32(getinfo[culong](pattern, pcre.INFO_OPTIONS))
-  let newlineFlags = flags and (pcre.NEWLINE_CRLF or
-                                pcre.NEWLINE_ANY or
-                                pcre.NEWLINE_ANYCRLF)
+  let flags = uint32(getinfo[culong](pattern, pcre2.INFO_ALLOPTIONS))
+  let newlineFlags = flags and (pcre2.NEWLINE_CRLF or
+                                pcre2.NEWLINE_ANY or
+                                pcre2.NEWLINE_ANYCRLF)
   if newlineFlags > 0u32:
     return true
 
   # get flags from build config
   var confFlags: cint
-  if pcre.config(pcre.CONFIG_NEWLINE, addr confFlags) != 0:
+  if pcre2.config(pcre2.CONFIG_NEWLINE, addr confFlags) != 0:
     assert(false, "CONFIG_NEWLINE apparently got screwed up")
 
   case confFlags
@@ -332,7 +325,7 @@ func captures*(pattern: RegexMatch): Captures = return Captures(pattern)
 
 func contains*(pattern: CaptureBounds, i: int): bool =
   let pattern = RegexMatch(pattern)
-  pattern.pcreMatchBounds[i + 1].a != -1
+  pattern.pcreMatchBounds[i + 1].a != pcre2.UNSET
 
 func contains*(pattern: Captures, i: int): bool =
   i in CaptureBounds(pattern)
@@ -343,7 +336,7 @@ func `[]`*(pattern: CaptureBounds, i: int): HSlice[int, int] =
     raise newException(IndexDefect, "Group '" & $i & "' was not captured")
 
   let bounds = pattern.pcreMatchBounds[i + 1]
-  int(bounds.a)..int(bounds.b-1)
+  int(bounds.a)..int(bounds.b)-1
 
 func `[]`*(pattern: Captures, i: int): string =
   let pattern = RegexMatch(pattern)
@@ -431,8 +424,7 @@ proc `$`*(pattern: RegexMatch): string =
 proc `==`*(a, b: Regex): bool =
   if not a.isNil and not b.isNil:
     return a.pattern == b.pattern and
-           a.pcreObj == b.pcreObj and
-           a.pcreExtra == b.pcreExtra
+           a.pcreObj == b.pcreObj
   else:
     return system.`==`(a, b)
 
@@ -441,13 +433,13 @@ proc `==`*(a, b: RegexMatch): bool =
          a.str == b.str
 
 const PcreOptions = {
-  "NEVER_UTF": pcre.NEVER_UTF,
-  "ANCHORED": pcre.ANCHORED,
-  "DOLLAR_ENDONLY": pcre.DOLLAR_ENDONLY,
-  "FIRSTLINE": pcre.FIRSTLINE,
-  "NO_AUTO_CAPTURE": pcre.NO_AUTO_CAPTURE,
-  "JAVASCRIPT_COMPAT": pcre.JAVASCRIPT_COMPAT,
-  "U": pcre.UTF8 or pcre.UCP
+  "NEVER_UTF": pcre2.NEVER_UTF,
+  "ANCHORED": pcre2.ANCHORED,
+  "DOLLAR_ENDONLY": pcre2.DOLLAR_ENDONLY,
+  "FIRSTLINE": pcre2.FIRSTLINE,
+  "NO_AUTO_CAPTURE": pcre2.NO_AUTO_CAPTURE,
+  # "JAVASCRIPT_COMPAT": pcre2.JAVASCRIPT_COMPAT,
+  "U": pcre2.UTF or pcre2.UCP # TODO: UTF-8 ?
 }.toTable
 
 # Options that are supported inside regular expressions themselves
@@ -457,8 +449,8 @@ const SkipOptions = [
   "CR", "LF", "CRLF", "ANYCRLF", "ANY", "BSR_ANYCRLF", "BSR_UNICODE"
 ]
 
-proc extractOptions(pattern: string): tuple[pattern: string, flags: int, study: bool] =
-  result = ("", 0, true)
+proc extractOptions(pattern: string): tuple[pattern: string, options: uint32] =
+  result = ("", 0'u32)
 
   var optionStart = 0
   var equals = false
@@ -477,9 +469,9 @@ proc extractOptions(pattern: string): tuple[pattern: string, flags: int, study:
       if equals or name in SkipOptions:
         result.pattern.add pattern[optionStart .. i]
       elif PcreOptions.hasKey name:
-        result.flags = result.flags or PcreOptions[name]
-      elif name == "NO_STUDY":
-        result.study = false
+        result.options = result.options or PcreOptions[name]
+      # elif name == "NO_STUDY":
+      #   result.study = false
       else:
         break
       optionStart = i+1
@@ -496,45 +488,55 @@ proc extractOptions(pattern: string): tuple[pattern: string, flags: int, study:
   result.pattern.add pattern[optionStart .. pattern.high]
 
 proc re*(pattern: string): Regex =
-  let (pattern, flags, study) = extractOptions(pattern)
-  initRegex(pattern, flags, study)
+  let (pattern, options) = extractOptions(pattern)
+  initRegex(pattern, pcre2.ZERO_TERMINATED, options)
 
-proc matchImpl(str: string, pattern: Regex, start, endpos: int, flags: int): Option[RegexMatch] =
+proc matchImpl(str: string, pattern: Regex, start, endpos: int, options: uint32): Option[RegexMatch] =
   var myResult = RegexMatch(pattern: pattern, str: str)
   # See PCRE man pages.
   # 2x capture count to make room for start-end pairs
   # 1x capture count as slack space for PCRE
   let vecsize = (pattern.captureCount() + 1) * 3
-  # div 2 because each element is 2 cints long
+  # div 2 because each element is 2 csize_t long
   # plus 1 because we need the ceiling, not the floor
-  myResult.pcreMatchBounds = newSeq[HSlice[cint, cint]]((vecsize + 1) div 2)
+  myResult.pcreMatchBounds = newSeq[HSlice[csize_t, csize_t]]((vecsize + 1) div 2)
   myResult.pcreMatchBounds.setLen(vecsize div 3)
 
   let strlen = if endpos == int.high: str.len else: endpos+1
   doAssert(strlen <= str.len)  # don't want buffer overflows
 
-  let execRet = pcre.exec(pattern.pcreObj,
-                          pattern.pcreExtra,
-                          cstring(str),
-                          cint(strlen),
-                          cint(start),
-                          cint(flags),
-                          cast[ptr cint](addr myResult.pcreMatchBounds[0]),
-                          cint(vecsize))
+  var matchData = pcre2.match_data_create_from_pattern(pattern.pcreObj, nil)
+  defer: pcre2.match_data_free(matchData)
+  let execRet = pcre2.match(pattern.pcreObj,
+                          cast[ptr uint8](cstring(str)),
+                          csize_t(strlen),
+                          csize_t(start),
+                          options,
+                          matchData,
+                          nil)
+  let ovector = cast[ptr UncheckedArray[csize_t]](pcre2.get_ovector_pointer(matchData))
+  let capture_count = pcre2.get_ovector_count(matchData)
+  let ovector_size = 2 * capture_count.int * sizeof(csize_t)
+  # echo (myResult.pcreMatchBounds.len * 2 * sizeof(csize_t), ovector_size)
+  # echo (capture_count, ovector[0], ovector[1])
+  copyMem(addr myResult.pcreMatchBounds[0], ovector, ovector_size)
+  # echo (myResult.pcreMatchBounds[0].a, myResult.pcreMatchBounds[0].b)
+
+  # echo " -> ", myResult
   if execRet >= 0:
     return some(myResult)
 
   case execRet:
-    of pcre.ERROR_NOMATCH:
+    of pcre2.ERROR_NOMATCH:
       return none(RegexMatch)
-    of pcre.ERROR_NULL:
+    of pcre2.ERROR_NULL:
       raise newException(AccessViolationDefect, "Expected non-null parameters")
-    of pcre.ERROR_BADOPTION:
+    of pcre2.ERROR_BADOPTION:
       raise RegexInternalError(msg: "Unknown pattern flag. Either a bug or " &
         "outdated PCRE.")
-    of pcre.ERROR_BADUTF8, pcre.ERROR_SHORTUTF8, pcre.ERROR_BADUTF8_OFFSET:
+    of pcre2.ERROR_BADUTF_OFFSET: # TODO:
       raise InvalidUnicodeError(msg: "Invalid unicode byte sequence",
-        pos: myResult.pcreMatchBounds[0].a)
+        pos: myResult.pcreMatchBounds[0].a.int)
     else:
       raise RegexInternalError(msg: "Unknown internal error: " & $execRet)
 
@@ -553,7 +555,7 @@ proc match*(str: string, pattern: Regex, start = 0, endpos = int.high): Option[R
     assert 0 in "abc".match(re"(\w)").get.captureBounds
     assert "abc".match(re"").get.captureBounds[-1] == 0 .. -1
     assert "abc".match(re"abc").get.captureBounds[-1] == 0 .. 2
-  return str.matchImpl(pattern, start, endpos, pcre.ANCHORED)
+  return str.matchImpl(pattern, start, endpos, pcre2.ANCHORED)
 
 iterator findIter*(str: string, pattern: Regex, start = 0, endpos = int.high): RegexMatch =
   ## Works the same as `find(...)<#find,string,Regex,int>`_, but finds every
@@ -569,26 +571,26 @@ iterator findIter*(str: string, pattern: Regex, start = 0, endpos = int.high): R
   ## -  `proc findAll(...)` returns a `seq[string]`
   # see pcredemo for explanation => https://www.pcre.org/original/doc/html/pcredemo.html
   let matchesCrLf = pattern.matchesCrLf()
-  let unicode = uint32(getinfo[culong](pattern, pcre.INFO_OPTIONS) and
-    pcre.UTF8) > 0u32
+  let unicode = uint32(getinfo[culong](pattern, pcre2.INFO_ALLOPTIONS) and
+    pcre2.UTF) > 0u32 # TODO:
   let strlen = if endpos == int.high: str.len else: endpos+1
   var offset = start
   var match: Option[RegexMatch]
   var neverMatched = true
 
   while true:
-    var flags = 0
+    var options = 0'u32
     if match.isSome and
        match.get.matchBounds.a > match.get.matchBounds.b:
       # 0-len match
-      flags = pcre.NOTEMPTY_ATSTART
-    match = str.matchImpl(pattern, offset, endpos, flags)
+      options = pcre2.NOTEMPTY_ATSTART
+    match = str.matchImpl(pattern, offset, endpos, options)
 
     if match.isNone:
       # either the end of the input or the string
       # cannot be split here - we also need to bail
       # if we've never matched and we've already tried to...
-      if flags == 0 or offset >= strlen or neverMatched: # All matches found
+      if options == 0 or offset >= strlen or neverMatched: # All matches found
         break
 
       if matchesCrLf and offset < (str.len - 1) and
diff --git a/lib/impure/re.nim b/lib/impure/re.nim
index b686c1f35a04c..fcd27516be611 100644
--- a/lib/impure/re.nim
+++ b/lib/impure/re.nim
@@ -36,7 +36,9 @@ runnableExamples:
     # can't match start of string since we're starting at 1
 
 import
-  std/[pcre, strutils, rtarrays]
+  std/[strutils, rtarrays]
+
+import ../wrappers/pcre2
 
 when defined(nimPreviewSlimSystem):
   import std/syncio
@@ -57,7 +59,6 @@ type
 
   RegexDesc = object
     h: ptr Pcre
-    e: ptr ExtraData
 
   Regex* = ref RegexDesc ## a compiled regular expression
 
@@ -67,14 +68,10 @@ type
 when defined(gcDestructors):
   when defined(nimAllowNonVarDestructor):
     proc `=destroy`(x: RegexDesc) =
-      pcre.free_substring(cast[cstring](x.h))
-      if not isNil(x.e):
-        pcre.free_study(x.e)
+      pcre2.code_free(x.h)
   else:
     proc `=destroy`(x: var RegexDesc) =
-      pcre.free_substring(cast[cstring](x.h))
-      if not isNil(x.e):
-        pcre.free_study(x.e)
+      pcre2.code_free(x.h)
 
 proc raiseInvalidRegex(msg: string) {.noinline, noreturn.} =
   var e: ref RegexError
@@ -82,21 +79,19 @@ proc raiseInvalidRegex(msg: string) {.noinline, noreturn.} =
   e.msg = msg
   raise e
 
-proc rawCompile(pattern: string, flags: cint): ptr Pcre =
+proc rawCompile(pattern: string, flags: csize_t, options: uint32): ptr Pcre =
   var
-    msg: cstring = ""
-    offset: cint = 0
-  result = pcre.compile(pattern, flags, addr(msg), addr(offset), nil)
+    errorCode: cint = 0
+    offset: csize_t = 0
+  result = pcre2.compile(cast[ptr uint8](pattern.cstring), flags, options, addr(errorCode), addr(offset), nil)
   if result == nil:
-    raiseInvalidRegex($msg & "\n" & pattern & "\n" & spaces(offset) & "^\n")
+    raiseInvalidRegex($errorCode & "\n" & pattern & "\n" & spaces(offset) & "^\n")
 
 proc finalizeRegEx(x: Regex) =
   # XXX This is a hack, but PCRE does not export its "free" function properly.
   # Sigh. The hack relies on PCRE's implementation (see `pcre_get.c`).
   # Fortunately the implementation is unlikely to change.
-  pcre.free_substring(cast[cstring](x.h))
-  if not isNil(x.e):
-    pcre.free_study(x.e)
+  pcre2.code_free(x.h)
 
 proc re*(s: string, flags = {reStudy}): Regex =
   ## Constructor of regular expressions.
@@ -112,16 +107,22 @@ proc re*(s: string, flags = {reStudy}): Regex =
     result = Regex()
   else:
     new(result, finalizeRegEx)
-  result.h = rawCompile(s, cast[cint](flags - {reStudy}))
-  if reStudy in flags:
-    var msg: cstring = ""
-    var options: cint = 0
-    var hasJit: cint = 0
-    if pcre.config(pcre.CONFIG_JIT, addr hasJit) == 0:
-      if hasJit == 1'i32:
-        options = pcre.STUDY_JIT_COMPILE
-    result.e = pcre.study(result.h, options, addr msg)
-    if not isNil(msg): raiseInvalidRegex($msg)
+  var options = 0'u32
+  if reExtended in flags:
+    options = options or EXTENDED
+
+  if reIgnoreCase in flags:
+    options = options or CASELESS
+  result.h = rawCompile(s, cast[csize_t](ZERO_TERMINATED), options)
+  # if reStudy in flags:
+  #   var msg: cstring = ""
+  #   var options: cint = 0
+  #   var hasJit: cint = 0
+  #   if pcre2.config(pcre2.CONFIG_JIT, addr hasJit) == 0:
+  #     if hasJit == 1'i32:
+  #       options = pcre.STUDY_JIT_COMPILE
+  #   result.e = pcre.study(result.h, options, addr msg)
+  #   if not isNil(msg): raiseInvalidRegex($msg)
 
 proc rex*(s: string, flags = {reStudy, reExtended}): Regex =
   ## Constructor for extended regular expressions.
@@ -139,20 +140,23 @@ proc bufSubstr(b: cstring, sPos, ePos: int): string {.inline.} =
   result.setLen(sz)
 
 proc matchOrFind(buf: cstring, pattern: Regex, matches: var openArray[string],
-                 start, bufSize, flags: cint): cint =
+                 start, bufSize: int; options: uint32): int =
   var
-    rtarray = initRtArray[cint]((matches.len+1)*3)
+    rtarray = initRtArray[csize_t]((matches.len+1)*3)
     rawMatches = rtarray.getRawData
-    res = pcre.exec(pattern.h, pattern.e, buf, bufSize, start, flags,
-      cast[ptr cint](rawMatches), (matches.len+1).cint*3)
-  if res < 0'i32: return res
+  var matchData = match_data_create_from_pattern(pattern.h, nil)
+  defer: match_data_free(matchData)
+  var res = pcre2.match(pattern.h, cast[ptr uint8](buf), bufSize.csize_t, start.csize_t, options,
+      matchData, nil)
+  rawMatches = cast[ptr UncheckedArray[csize_t]](get_ovector_pointer(matchData))
+  if res < 0: return res
   for i in 1..int(res)-1:
     var a = rawMatches[i * 2]
     var b = rawMatches[i * 2 + 1]
-    if a >= 0'i32:
+    if a != UNSET:
       matches[i-1] = bufSubstr(buf, int(a), int(b))
     else: matches[i-1] = ""
-  return rawMatches[1] - rawMatches[0]
+  return int(rawMatches[1]) - int(rawMatches[0])
 
 const MaxReBufSize* = high(cint)
   ## Maximum PCRE (API 1) buffer start/size equal to `high(cint)`, which even
@@ -169,15 +173,18 @@ proc findBounds*(buf: cstring, pattern: Regex, matches: var openArray[string],
   ## Note: The memory for `matches` needs to be allocated before this function is
   ## called, otherwise it will just remain empty.
   var
-    rtarray = initRtArray[cint]((matches.len+1)*3)
+    rtarray = initRtArray[csize_t]((matches.len+1)*3)
     rawMatches = rtarray.getRawData
-    res = pcre.exec(pattern.h, pattern.e, buf, bufSize.cint, start.cint, 0'i32,
-      cast[ptr cint](rawMatches), (matches.len+1).cint*3)
-  if res < 0'i32: return (-1, 0)
+  var matchData = match_data_create_from_pattern(pattern.h, nil)
+  defer: match_data_free(matchData)
+  var res = pcre2.match(pattern.h, cast[ptr uint8](buf), bufSize.csize_t, start.csize_t, 0'u32,
+      matchData, nil)
+  rawMatches = cast[ptr UncheckedArray[csize_t]](get_ovector_pointer(matchData))
+  if res < 0: return (-1, 0)
   for i in 1..int(res)-1:
     var a = rawMatches[i * 2]
     var b = rawMatches[i * 2 + 1]
-    if a >= 0'i32: matches[i-1] = bufSubstr(buf, int(a), int(b))
+    if a != UNSET: matches[i-1] = bufSubstr(buf, int(a), int(b))
     else: matches[i-1] = ""
   return (rawMatches[0].int, rawMatches[1].int - 1)
 
@@ -209,15 +216,18 @@ proc findBounds*(buf: cstring, pattern: Regex,
   ##
   ## .. note:: The memory for `matches` needs to be allocated before this function is called, otherwise it will just remain empty.
   var
-    rtarray = initRtArray[cint]((matches.len+1)*3)
+    rtarray = initRtArray[csize_t]((matches.len+1)*3)
     rawMatches = rtarray.getRawData
-    res = pcre.exec(pattern.h, pattern.e, buf, bufSize.cint, start.cint, 0'i32,
-      cast[ptr cint](rawMatches), (matches.len+1).cint*3)
+  var matchData = match_data_create_from_pattern(pattern.h, nil)
+  defer: match_data_free(matchData)
+  var res = pcre2.match(pattern.h, cast[ptr uint8](buf), bufSize.csize_t, start.csize_t, 0'u32,
+      matchData, nil)
+  rawMatches = cast[ptr UncheckedArray[csize_t]](get_ovector_pointer(matchData))
   if res < 0'i32: return (-1, 0)
   for i in 1..int(res)-1:
     var a = rawMatches[i * 2]
     var b = rawMatches[i * 2 + 1]
-    if a >= 0'i32: matches[i-1] = (int(a), int(b)-1)
+    if a != UNSET: matches[i-1] = (int(a), int(b)-1)
     else: matches[i-1] = (-1,0)
   return (rawMatches[0].int, rawMatches[1].int - 1)
 
@@ -240,16 +250,18 @@ proc findBounds*(s: string, pattern: Regex,
       min(start, MaxReBufSize), min(s.len, MaxReBufSize))
 
 proc findBoundsImpl(buf: cstring, pattern: Regex,
-                    start = 0, bufSize = 0, flags = 0): tuple[first, last: int] =
-  var rtarray = initRtArray[cint](3)
-  let rawMatches = rtarray.getRawData
-  let res = pcre.exec(pattern.h, pattern.e, buf, bufSize.cint, start.cint, flags.int32,
-                cast[ptr cint](rawMatches), 3)
-
+                    start = 0, bufSize = 0, options = 0'u32): tuple[first, last: int] =
+  var rtarray = initRtArray[csize_t](3)
+  var rawMatches = rtarray.getRawData
+  var matchData = match_data_create_from_pattern(pattern.h, nil)
+  defer: match_data_free(matchData)
+  var res = pcre2.match(pattern.h, cast[ptr uint8](buf), bufSize.csize_t, start.csize_t, options,
+      matchData, nil)
+  rawMatches = cast[ptr UncheckedArray[csize_t]](get_ovector_pointer(matchData))
   if res < 0'i32:
     result = (-1, 0)
   else:
-    result = (int(rawMatches[0]), int(rawMatches[1]-1))
+    result = (int(rawMatches[0]), int(rawMatches[1])-1)
 
 proc findBounds*(buf: cstring, pattern: Regex,
                  start = 0, bufSize: int): tuple[first, last: int] =
@@ -257,10 +269,13 @@ proc findBounds*(buf: cstring, pattern: Regex,
   ## where `buf` has length `bufSize` (not necessarily `'\0'` terminated).
   ## If it does not match, `(-1,0)` is returned.
   var
-    rtarray = initRtArray[cint](3)
-    rawMatches = rtarray.getRawData
-    res = pcre.exec(pattern.h, pattern.e, buf, bufSize.cint, start.cint, 0'i32,
-      cast[ptr cint](rawMatches), 3)
+    rtarray = initRtArray[csize_t](3)
+  var rawMatches = rtarray.getRawData
+  var matchData = match_data_create_from_pattern(pattern.h, nil)
+  defer: match_data_free(matchData)
+  var res = pcre2.match(pattern.h, cast[ptr uint8](buf), bufSize.csize_t, start.csize_t, 0'u32,
+      matchData, nil)
+  rawMatches = cast[ptr UncheckedArray[csize_t]](get_ovector_pointer(matchData))
   if res < 0'i32: return (int(res), 0)
   return (int(rawMatches[0]), int(rawMatches[1]-1))
 
@@ -275,14 +290,18 @@ proc findBounds*(s: string, pattern: Regex,
   result = findBounds(cstring(s), pattern,
       min(start, MaxReBufSize), min(s.len, MaxReBufSize))
 
-proc matchOrFind(buf: cstring, pattern: Regex, start, bufSize: int, flags: cint): cint =
+proc matchOrFind(buf: cstring, pattern: Regex, start, bufSize: int, options: uint32): int =
   var
-    rtarray = initRtArray[cint](3)
+    rtarray = initRtArray[csize_t](3)
     rawMatches = rtarray.getRawData
-  result = pcre.exec(pattern.h, pattern.e, buf, bufSize.cint, start.cint, flags,
-                    cast[ptr cint](rawMatches), 3)
+  var matchData = match_data_create_from_pattern(pattern.h, nil)
+  defer: match_data_free(matchData)
+  result = pcre2.match(pattern.h, cast[ptr uint8](buf), bufSize.csize_t, start.csize_t, options,
+                    matchData, nil)
+  
+  rawMatches = cast[ptr UncheckedArray[csize_t]](get_ovector_pointer(matchData))
   if result >= 0'i32:
-    result = rawMatches[1] - rawMatches[0]
+    result = int(rawMatches[1]) - int(rawMatches[0])
 
 proc matchLen*(s: string, pattern: Regex, matches: var openArray[string],
               start = 0): int {.inline.} =
@@ -291,7 +310,7 @@ proc matchLen*(s: string, pattern: Regex, matches: var openArray[string],
   ## of zero can happen.
   ##
   ## .. note:: The memory for `matches` needs to be allocated before this function is called, otherwise it will just remain empty.
-  result = matchOrFind(cstring(s), pattern, matches, start.cint, s.len.cint, pcre.ANCHORED)
+  result = matchOrFind(cstring(s), pattern, matches, start, s.len, pcre2.ANCHORED)
 
 proc matchLen*(buf: cstring, pattern: Regex, matches: var openArray[string],
               start = 0, bufSize: int): int {.inline.} =
@@ -300,7 +319,7 @@ proc matchLen*(buf: cstring, pattern: Regex, matches: var openArray[string],
   ## of zero can happen.
   ##
   ## .. note:: The memory for `matches` needs to be allocated before this function is called, otherwise it will just remain empty.
-  return matchOrFind(buf, pattern, matches, start.cint, bufSize.cint, pcre.ANCHORED)
+  return matchOrFind(buf, pattern, matches, start, bufSize, pcre2.ANCHORED)
 
 proc matchLen*(s: string, pattern: Regex, start = 0): int {.inline.} =
   ## the same as `match`, but it returns the length of the match,
@@ -311,13 +330,13 @@ proc matchLen*(s: string, pattern: Regex, start = 0): int {.inline.} =
     doAssert matchLen("abcdefg", re"cde", 2) == 3
     doAssert matchLen("abcdefg", re"abcde") == 5
     doAssert matchLen("abcdefg", re"cde") == -1
-  result = matchOrFind(cstring(s), pattern, start.cint, s.len.cint, pcre.ANCHORED)
+  result = matchOrFind(cstring(s), pattern, start, s.len, pcre2.ANCHORED)
 
 proc matchLen*(buf: cstring, pattern: Regex, start = 0, bufSize: int): int {.inline.} =
   ## the same as `match`, but it returns the length of the match,
   ## if there is no match, `-1` is returned. Note that a match length
   ## of zero can happen.
-  result = matchOrFind(buf, pattern, start.cint, bufSize, pcre.ANCHORED)
+  result = matchOrFind(buf, pattern, start, bufSize, pcre2.ANCHORED)
 
 proc match*(s: string, pattern: Regex, start = 0): bool {.inline.} =
   ## returns `true` if `s[start..]` matches the `pattern`.
@@ -358,17 +377,20 @@ proc find*(buf: cstring, pattern: Regex, matches: var openArray[string],
   ##
   ## .. note:: The memory for `matches` needs to be allocated before this function is called, otherwise it will just remain empty.
   var
-    rtarray = initRtArray[cint]((matches.len+1)*3)
+    rtarray = initRtArray[csize_t]((matches.len+1)*3)
     rawMatches = rtarray.getRawData
-    res = pcre.exec(pattern.h, pattern.e, buf, bufSize.cint, start.cint, 0'i32,
-      cast[ptr cint](rawMatches), (matches.len+1).cint*3)
+  var matchData = match_data_create_from_pattern(pattern.h, nil)
+  defer: match_data_free(matchData)
+  var res = pcre2.match(pattern.h, cast[ptr uint8](buf), bufSize.csize_t, start.csize_t, 0'u32,
+      matchData, nil)
+  rawMatches = cast[ptr UncheckedArray[csize_t]](get_ovector_pointer(matchData))
   if res < 0'i32: return res
   for i in 1..int(res)-1:
     var a = rawMatches[i * 2]
     var b = rawMatches[i * 2 + 1]
-    if a >= 0'i32: matches[i-1] = bufSubstr(buf, int(a), int(b))
+    if a != UNSET: matches[i-1] = bufSubstr(buf, int(a), int(b))
     else: matches[i-1] = ""
-  return rawMatches[0]
+  return rawMatches[0].int
 
 proc find*(s: string, pattern: Regex, matches: var openArray[string],
            start = 0): int {.inline.} =
@@ -384,12 +406,15 @@ proc find*(buf: cstring, pattern: Regex, start = 0, bufSize: int): int =
   ## where `buf` has length `bufSize` (not necessarily `'\0'` terminated).
   ## If it does not match, `-1` is returned.
   var
-    rtarray = initRtArray[cint](3)
+    rtarray = initRtArray[csize_t](3)
     rawMatches = rtarray.getRawData
-    res = pcre.exec(pattern.h, pattern.e, buf, bufSize.cint, start.cint, 0'i32,
-      cast[ptr cint](rawMatches), 3)
+  var matchData = match_data_create_from_pattern(pattern.h, nil)
+  defer: match_data_free(matchData)
+  var res = pcre2.match(pattern.h, cast[ptr uint8](buf), bufSize.csize_t, start.csize_t, 0'u32,
+      matchData, nil)
+  rawMatches = cast[ptr UncheckedArray[csize_t]](get_ovector_pointer(matchData))
   if res < 0'i32: return res
-  return rawMatches[0]
+  return rawMatches[0].int
 
 proc find*(s: string, pattern: Regex, start = 0): int {.inline.} =
   ## returns the starting position of `pattern` in `s`. If it does not
@@ -410,18 +435,21 @@ iterator findAll*(s: string, pattern: Regex, start = 0): string =
   ## Note that since this is an iterator you should not modify the string you
   ## are iterating over: bad things could happen.
   var
-    i = int32(start)
-    rtarray = initRtArray[cint](3)
+    i = start
+    rtarray = initRtArray[csize_t](3)
     rawMatches = rtarray.getRawData
+  var matchData = match_data_create_from_pattern(pattern.h, nil)
+  defer: match_data_free(matchData)
   while true:
-    let res = pcre.exec(pattern.h, pattern.e, s, len(s).cint, i, 0'i32,
-      cast[ptr cint](rawMatches), 3)
+    let res = pcre2.match(pattern.h, cast[ptr uint8](s.cstring), len(s).csize_t, i.csize_t, 0'u32,
+      matchData, nil)
+    rawMatches = cast[ptr UncheckedArray[csize_t]](get_ovector_pointer(matchData))
     if res < 0'i32: break
     let a = rawMatches[0]
     let b = rawMatches[1]
-    if a == b and a == i: break
+    if a == b and a.int == i: break
     yield substr(s, int(a), int(b)-1)
-    i = b
+    i = b.int
 
 iterator findAll*(buf: cstring, pattern: Regex, start = 0, bufSize: int): string =
   ## Yields all matching `substrings` of `s` that match `pattern`.
@@ -430,19 +458,22 @@ iterator findAll*(buf: cstring, pattern: Regex, start = 0, bufSize: int): string
   ## are iterating over: bad things could happen.
   var
     i = int32(start)
-    rtarray = initRtArray[cint](3)
+    rtarray = initRtArray[csize_t](3)
     rawMatches = rtarray.getRawData
+  var matchData = match_data_create_from_pattern(pattern.h, nil)
+  defer: match_data_free(matchData)
   while true:
-    let res = pcre.exec(pattern.h, pattern.e, buf, bufSize.cint, i, 0'i32,
-      cast[ptr cint](rawMatches), 3)
+    let res = pcre2.match(pattern.h, cast[ptr uint8](buf), bufSize.csize_t, i.csize_t, 0'u32,
+      matchData, nil)
+    rawMatches = cast[ptr UncheckedArray[csize_t]](get_ovector_pointer(matchData))
     if res < 0'i32: break
     let a = rawMatches[0]
     let b = rawMatches[1]
-    if a == b and a == i: break
+    if a == b and a.int == i: break
     var str = newString(b-a)
     copyMem(str[0].addr, unsafeAddr(buf[a]), b-a)
     yield str
-    i = b
+    i = b.int32
 
 proc findAll*(s: string, pattern: Regex, start = 0): seq[string] {.inline.} =
   ## returns all matching `substrings` of `s` that match `pattern`.
@@ -499,7 +530,7 @@ proc replace*(s: string, sub: Regex, by = ""): string =
     doAssert "var1=key; var2=key2".replace(re"(\w+)=(\w+)", "?") == "?; ?"
   result = ""
   var prev = 0
-  var flags = int32(0)
+  var flags = 0'u32
   while prev < s.len:
     var match = findBoundsImpl(s.cstring, sub, prev, s.len, flags)
     flags = 0
@@ -508,7 +539,7 @@ proc replace*(s: string, sub: Regex, by = ""): string =
     add(result, by)
     if match.first > match.last:
       # 0-len match
-      flags = pcre.NOTEMPTY_ATSTART
+      flags = pcre2.NOTEMPTY_ATSTART
     prev = match.last + 1
   add(result, substr(s, prev))
 
diff --git a/lib/wrappers/pcre2.nim b/lib/wrappers/pcre2.nim
new file mode 100644
index 0000000000000..b8f7b03fceece
--- /dev/null
+++ b/lib/wrappers/pcre2.nim
@@ -0,0 +1,683 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2015 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+# The current PCRE version information.
+
+const
+  PCRE_MAJOR* = 8
+  PCRE_MINOR* = 36
+  PCRE_PRERELEASE* = true
+  PCRE_DATE* = "2014-09-26"
+
+# When an application links to a PCRE DLL in Windows, the symbols that are
+# imported have to be identified as such. When building PCRE, the appropriate
+# export setting is defined in pcre_internal.h, which includes this file. So we
+# don't change existing definitions of PCRE_EXP_DECL and PCRECPP_EXP_DECL.
+
+# By default, we use the standard "extern" declarations.
+
+# Allow for C++ users
+
+# Public options. Some are compile-time only, some are run-time only, and some
+# are both. Most of the compile-time options are saved with the compiled regex
+# so that they can be inspected during studying (and therefore JIT compiling).
+# Note that pcre_study() has its own set of options. Originally, all the options
+# defined here used distinct bits. However, almost all the bits in a 32-bit word
+# are now used, so in order to conserve them, option bits that were previously
+# only recognized at matching time (i.e. by pcre_exec() or pcre_dfa_exec()) may
+# also be used for compile-time options that affect only compiling and are not
+# relevant for studying or JIT compiling.
+#
+# Some options for pcre_compile() change its behaviour but do not affect the
+# behaviour of the execution functions. Other options are passed through to the
+# execution functions and affect their behaviour, with or without affecting the
+# behaviour of pcre_compile().
+#
+# Options that can be passed to pcre_compile() are tagged Cx below, with these
+# variants:
+#
+# C1   Affects compile only
+# C2   Does not affect compile; affects exec, dfa_exec
+# C3   Affects compile, exec, dfa_exec
+# C4   Affects compile, exec, dfa_exec, study
+# C5   Affects compile, exec, study
+#
+# Options that can be set for pcre_exec() and/or pcre_dfa_exec() are flagged
+# with E and D, respectively. They take precedence over C3, C4, and C5 settings
+# passed from pcre_compile(). Those that are compatible with JIT execution are
+# flagged with J.
+
+
+const
+  ANCHORED* = 0x80000000'u32
+  NO_UTF_CHECK* = 0x40000000'u32
+  ENDANCHORED* = 0x20000000'u32
+
+##  The following option bits can be passed only to pcre2_compile(). However,
+## they may affect compilation, JIT compilation, and/or interpretive execution.
+## The following tags indicate which:
+##
+## C   alters what is compiled by pcre2_compile()
+## J   alters what is compiled by pcre2_jit_compile()
+## M   is inspected during pcre2_match() execution
+## D   is inspected during pcre2_dfa_match() execution
+##
+
+const
+  ALLOW_EMPTY_CLASS* = 0x00000001'u32
+  ALT_BSUX* = 0x00000002'u32
+  AUTO_CALLOUT* = 0x00000004'u32
+  CASELESS* = 0x00000008'u32
+  DOLLAR_ENDONLY* = 0x00000010'u32
+  DOTALL* = 0x00000020'u32
+  DUPNAMES* = 0x00000040'u32
+  EXTENDED* = 0x00000080'u32
+  FIRSTLINE* = 0x00000100'u32
+  MATCH_UNSET_BACKREF* = 0x00000200'u32
+  MULTILINE* = 0x00000400'u32
+  NEVER_UCP* = 0x00000800'u32
+  NEVER_UTF* = 0x00001000'u32
+  NO_AUTO_CAPTURE* = 0x00002000'u32
+  NO_AUTO_POSSESS* = 0x00004000'u32
+  NO_DOTSTAR_ANCHOR* = 0x00008000'u32
+  NO_START_OPTIMIZE* = 0x00010000'u32
+  UCP* = 0x00020000'u32
+  UNGREEDY* = 0x00040000'u32
+  UTF* = 0x00080000'u32
+  NEVER_BACKSLASH_C* = 0x00100000'u32
+  ALT_CIRCUMFLEX* = 0x00200000'u32
+  ALT_VERBNAMES* = 0x00400000'u32
+  USE_OFFSET_LIMIT* = 0x00800000'u32
+  EXTENDED_MORE* = 0x01000000'u32
+  LITERAL* = 0x02000000'u32
+  MATCH_INVALID_UTF* = 0x0400000'u32
+  ALT_EXTENDED_CLASS* = 0x080000'u32
+
+##  An additional compile options word is available in the compile context.
+
+const
+  EXTRA_ALLOW_SURROGATE_ESCAPES* = 0x00000001'u32
+  EXTRA_BAD_ESCAPE_IS_LITERAL* = 0x00000002'u32
+  EXTRA_MATCH_WORD* = 0x00000004'u32
+  EXTRA_MATCH_LINE* = 0x00000008'u32
+  EXTRA_ESCAPED_CR_IS_LF* = 0x00000010'u32
+  EXTRA_ALT_BSUX* = 0x00000020'u32
+  EXTRA_ALLOW_LOOKAROUND_BSK* = 0x00000040'u32
+  EXTRA_CASELESS_RESTRICT* = 0x00000080'u32
+  EXTRA_ASCII_BSD* = 0x00000100'u32
+  EXTRA_ASCII_BSS* = 0x00000200'u32
+  EXTRA_ASCII_BSW* = 0x00000400'u32
+  EXTRA_ASCII_POSIX* = 0x00000800'u32
+  EXTRA_ASCII_DIGIT* = 0x00001000'u32
+  EXTRA_PYTHON_OCTAL* = 0x00002000'u32
+  EXTRA_NO_BS0* = 0x00004000'u32
+  EXTRA_NEVER_CALLOUT* = 0x00008000'u32
+  EXTRA_TURKISH_CASING* = 0x00010000'u32
+
+##  These are for pcre2_jit_compile().
+
+const
+  JIT_COMPLETE* = 0x00000001'u32
+  JIT_PARTIAL_SOFT* = 0x00000002'u32
+  JIT_PARTIAL_HARD* = 0x00000004'u32
+  JIT_INVALID_UTF* = 0x00000100'u32
+  JIT_TEST_ALLOC* = 0x00000200'u32
+
+##  These are for pcre2_match(), pcre2_dfa_match(), pcre2_jit_match(), and
+## pcre2_substitute(). Some are allowed only for one of the functions, and in
+## these cases it is noted below. Note that ANCHORED, ENDANCHORED and
+## NO_UTF_CHECK can also be passed to these functions (though
+## pcre2_jit_match() ignores the latter since it bypasses all sanity checks).
+
+const
+  NOTBOL* = 0x00000001'u32
+  NOTEOL* = 0x00000002'u32
+  NOTEMPTY* = 0x00000004'u32
+  NOTEMPTY_ATSTART* = 0x00000008'u32
+  PARTIAL_SOFT* = 0x00000010'u32
+  PARTIAL_HARD* = 0x00000020'u32
+  DFA_RESTART* = 0x00000040'u32
+  DFA_SHORTEST* = 0x00000080'u32
+  SUBSTITUTE_GLOBAL* = 0x00000100'u32
+  SUBSTITUTE_EXTENDED* = 0x00000200'u32
+  SUBSTITUTE_UNSET_EMPTY* = 0x00000400'u32
+  SUBSTITUTE_UNKNOWN_UNSET* = 0x00000800'u32
+  SUBSTITUTE_OVERFLOW_LENGTH* = 0x00001000'u32
+  NO_JIT* = 0x00002000'u32
+  COPY_MATCHED_SUBJECT* = 0x00004000'u32
+  SUBSTITUTE_LITERAL* = 0x00008000'u32
+  SUBSTITUTE_MATCHED* = 0x00010000'u32
+  SUBSTITUTE_REPLACEMENT_ONLY* = 0x00020000'u32
+  DISABLE_RECURSELOOP_CHECK* = 0x00040000'u32
+
+##  Options for pcre2_pattern_convert().
+
+const
+  CONVERT_UTF* = 0x00000001'u32
+  CONVERT_NO_UTF_CHECK* = 0x00000002'u32
+  CONVERT_POSIX_BASIC* = 0x00000004'u32
+  CONVERT_POSIX_EXTENDED* = 0x00000008'u32
+  CONVERT_GLOB* = 0x00000010'u32
+  CONVERT_GLOB_NO_WILD_SEPARATOR* = 0x00000030'u32
+  CONVERT_GLOB_NO_STARSTAR* = 0x00000050'u32
+
+##  Newline and \R settings, for use in compile contexts. The newline values
+## must be kept in step with values set in config.h and both sets must all be
+## greater than zero.
+
+const
+  NEWLINE_CR* = 1
+  NEWLINE_LF* = 2
+  NEWLINE_CRLF* = 3
+  NEWLINE_ANY* = 4
+  NEWLINE_ANYCRLF* = 5
+  NEWLINE_NUL* = 6
+  BSR_UNICODE* = 1
+  BSR_ANYCRLF* = 2
+
+##  Error codes for pcre2_compile(). Some of these are also used by
+## pcre2_pattern_convert().
+
+const
+  ERROR_END_BACKSLASH* = 101
+  ERROR_END_BACKSLASH_C* = 102
+  ERROR_UNKNOWN_ESCAPE* = 103
+  ERROR_QUANTIFIER_OUT_OF_ORDER* = 104
+  ERROR_QUANTIFIER_TOO_BIG* = 105
+  ERROR_MISSING_SQUARE_BRACKET* = 106
+  ERROR_ESCAPE_INVALID_IN_CLASS* = 107
+  ERROR_CLASS_RANGE_ORDER* = 108
+  ERROR_QUANTIFIER_INVALID* = 109
+  ERROR_INTERNAL_UNEXPECTED_REPEAT* = 110
+  ERROR_INVALID_AFTER_PARENS_QUERY* = 111
+  ERROR_POSIX_CLASS_NOT_IN_CLASS* = 112
+  ERROR_POSIX_NO_SUPPORT_COLLATING* = 113
+  ERROR_MISSING_CLOSING_PARENTHESIS* = 114
+  ERROR_BAD_SUBPATTERN_REFERENCE* = 115
+  ERROR_NULL_PATTERN* = 116
+  ERROR_BAD_OPTIONS* = 117
+  ERROR_MISSING_COMMENT_CLOSING* = 118
+  ERROR_PARENTHESES_NEST_TOO_DEEP* = 119
+  ERROR_PATTERN_TOO_LARGE* = 120
+  ERROR_HEAP_FAILED* = 121
+  ERROR_UNMATCHED_CLOSING_PARENTHESIS* = 122
+  ERROR_INTERNAL_CODE_OVERFLOW* = 123
+  ERROR_MISSING_CONDITION_CLOSING* = 124
+  ERROR_LOOKBEHIND_NOT_FIXED_LENGTH* = 125
+  ERROR_ZERO_RELATIVE_REFERENCE* = 126
+  ERROR_TOO_MANY_CONDITION_BRANCHES* = 127
+  ERROR_CONDITION_ASSERTION_EXPECTED* = 128
+  ERROR_BAD_RELATIVE_REFERENCE* = 129
+  ERROR_UNKNOWN_POSIX_CLASS* = 130
+  ERROR_INTERNAL_STUDY_ERROR* = 131
+  ERROR_UNICODE_NOT_SUPPORTED* = 132
+  ERROR_PARENTHESES_STACK_CHECK* = 133
+  ERROR_CODE_POINT_TOO_BIG* = 134
+  ERROR_LOOKBEHIND_TOO_COMPLICATED* = 135
+  ERROR_LOOKBEHIND_INVALID_BACKSLASH_C* = 136
+  ERROR_UNSUPPORTED_ESCAPE_SEQUENCE* = 137
+  ERROR_CALLOUT_NUMBER_TOO_BIG* = 138
+  ERROR_MISSING_CALLOUT_CLOSING* = 139
+  ERROR_ESCAPE_INVALID_IN_VERB* = 140
+  ERROR_UNRECOGNIZED_AFTER_QUERY_P* = 141
+  ERROR_MISSING_NAME_TERMINATOR* = 142
+  ERROR_DUPLICATE_SUBPATTERN_NAME* = 143
+  ERROR_INVALID_SUBPATTERN_NAME* = 144
+  ERROR_UNICODE_PROPERTIES_UNAVAILABLE* = 145
+  ERROR_MALFORMED_UNICODE_PROPERTY* = 146
+  ERROR_UNKNOWN_UNICODE_PROPERTY* = 147
+  ERROR_SUBPATTERN_NAME_TOO_LONG* = 148
+  ERROR_TOO_MANY_NAMED_SUBPATTERNS* = 149
+  ERROR_CLASS_INVALID_RANGE* = 150
+  ERROR_OCTAL_BYTE_TOO_BIG* = 151
+  ERROR_INTERNAL_OVERRAN_WORKSPACE* = 152
+  ERROR_INTERNAL_MISSING_SUBPATTERN* = 153
+  ERROR_DEFINE_TOO_MANY_BRANCHES* = 154
+  ERROR_BACKSLASH_O_MISSING_BRACE* = 155
+  ERROR_INTERNAL_UNKNOWN_NEWLINE* = 156
+  ERROR_BACKSLASH_G_SYNTAX* = 157
+  ERROR_PARENS_QUERY_R_MISSING_CLOSING* = 158
+
+##  Error 159 is obsolete and should now never occur
+
+const
+  ERROR_VERB_ARGUMENT_NOT_ALLOWED* = 159
+  ERROR_VERB_UNKNOWN* = 160
+  ERROR_SUBPATTERN_NUMBER_TOO_BIG* = 161
+  ERROR_SUBPATTERN_NAME_EXPECTED* = 162
+  ERROR_INTERNAL_PARSED_OVERFLOW* = 163
+  ERROR_INVALID_OCTAL* = 164
+  ERROR_SUBPATTERN_NAMES_MISMATCH* = 165
+  ERROR_MARK_MISSING_ARGUMENT* = 166
+  ERROR_INVALID_HEXADECIMAL* = 167
+  ERROR_BACKSLASH_C_SYNTAX* = 168
+  ERROR_BACKSLASH_K_SYNTAX* = 169
+  ERROR_INTERNAL_BAD_CODE_LOOKBEHINDS* = 170
+  ERROR_BACKSLASH_N_IN_CLASS* = 171
+  ERROR_CALLOUT_STRING_TOO_LONG* = 172
+  ERROR_UNICODE_DISALLOWED_CODE_POINT* = 173
+  ERROR_UTF_IS_DISABLED* = 174
+  ERROR_UCP_IS_DISABLED* = 175
+  ERROR_VERB_NAME_TOO_LONG* = 176
+  ERROR_BACKSLASH_U_CODE_POINT_TOO_BIG* = 177
+  ERROR_MISSING_OCTAL_OR_HEX_DIGITS* = 178
+  ERROR_VERSION_CONDITION_SYNTAX* = 179
+  ERROR_INTERNAL_BAD_CODE_AUTO_POSSESS* = 180
+  ERROR_CALLOUT_NO_STRING_DELIMITER* = 181
+  ERROR_CALLOUT_BAD_STRING_DELIMITER* = 182
+  ERROR_BACKSLASH_C_CALLER_DISABLED* = 183
+  ERROR_QUERY_BARJX_NEST_TOO_DEEP* = 184
+  ERROR_BACKSLASH_C_LIBRARY_DISABLED* = 185
+  ERROR_PATTERN_TOO_COMPLICATED* = 186
+  ERROR_LOOKBEHIND_TOO_LONG* = 187
+  ERROR_PATTERN_STRING_TOO_LONG* = 188
+  ERROR_INTERNAL_BAD_CODE* = 189
+  ERROR_INTERNAL_BAD_CODE_IN_SKIP* = 190
+  ERROR_NO_SURROGATES_IN_UTF16* = 191
+  ERROR_BAD_LITERAL_OPTIONS* = 192
+  ERROR_SUPPORTED_ONLY_IN_UNICODE* = 193
+  ERROR_INVALID_HYPHEN_IN_OPTIONS* = 194
+  ERROR_ALPHA_ASSERTION_UNKNOWN* = 195
+  ERROR_SCRIPT_RUN_NOT_AVAILABLE* = 196
+  ERROR_TOO_MANY_CAPTURES* = 197
+  ERROR_MISSING_OCTAL_DIGIT* = 198
+  ERROR_BACKSLASH_K_IN_LOOKAROUND* = 199
+  ERROR_MAX_VAR_LOOKBEHIND_EXCEEDED* = 200
+  ERROR_PATTERN_COMPILED_SIZE_TOO_BIG* = 201
+  ERROR_OVERSIZE_PYTHON_OCTAL* = 202
+  ERROR_CALLOUT_CALLER_DISABLED* = 203
+  ERROR_EXTRA_CASING_REQUIRES_UNICODE* = 204
+  ERROR_TURKISH_CASING_REQUIRES_UTF* = 205
+  ERROR_EXTRA_CASING_INCOMPATIBLE* = 206
+  ERROR_ECLASS_NEST_TOO_DEEP* = 207
+  ERROR_ECLASS_INVALID_OPERATOR* = 208
+  ERROR_ECLASS_UNEXPECTED_OPERATOR* = 209
+  ERROR_ECLASS_EXPECTED_OPERAND* = 210
+  ERROR_ECLASS_MIXED_OPERATORS* = 211
+  ERROR_ECLASS_HINT_SQUARE_BRACKET* = 212
+
+##  "Expected" matching error codes: no match and partial match.
+
+const
+  ERROR_NOMATCH* = (-1)
+  ERROR_PARTIAL* = (-2)
+
+##  Error codes for UTF-8 validity checks
+
+const
+  ERROR_UTF8_ERR1* = (-3)
+  ERROR_UTF8_ERR2* = (-4)
+  ERROR_UTF8_ERR3* = (-5)
+  ERROR_UTF8_ERR4* = (-6)
+  ERROR_UTF8_ERR5* = (-7)
+  ERROR_UTF8_ERR6* = (-8)
+  ERROR_UTF8_ERR7* = (-9)
+  ERROR_UTF8_ERR8* = (-10)
+  ERROR_UTF8_ERR9* = (-11)
+  ERROR_UTF8_ERR10* = (-12)
+  ERROR_UTF8_ERR11* = (-13)
+  ERROR_UTF8_ERR12* = (-14)
+  ERROR_UTF8_ERR13* = (-15)
+  ERROR_UTF8_ERR14* = (-16)
+  ERROR_UTF8_ERR15* = (-17)
+  ERROR_UTF8_ERR16* = (-18)
+  ERROR_UTF8_ERR17* = (-19)
+  ERROR_UTF8_ERR18* = (-20)
+  ERROR_UTF8_ERR19* = (-21)
+  ERROR_UTF8_ERR20* = (-22)
+  ERROR_UTF8_ERR21* = (-23)
+
+##  Error codes for UTF-16 validity checks
+
+const
+  ERROR_UTF16_ERR1* = (-24)
+  ERROR_UTF16_ERR2* = (-25)
+  ERROR_UTF16_ERR3* = (-26)
+
+##  Error codes for UTF-32 validity checks
+
+const
+  ERROR_UTF32_ERR1* = (-27)
+  ERROR_UTF32_ERR2* = (-28)
+
+##  Miscellaneous error codes for pcre2[_dfa]_match(), substring extraction
+## functions, context functions, and serializing functions. They are in numerical
+## order. Originally they were in alphabetical order too, but now that PCRE2 is
+## released, the numbers must not be changed.
+
+const
+  ERROR_BADDATA* = (-29)
+  ERROR_MIXEDTABLES* = (-30) ##  Name was changed
+  ERROR_BADMAGIC* = (-31)
+  ERROR_BADMODE* = (-32)
+  ERROR_BADOFFSET* = (-33)
+  ERROR_BADOPTION* = (-34)
+  ERROR_BADREPLACEMENT* = (-35)
+  ERROR_BADUTFOFFSET* = (-36)
+  ERROR_CALLOUT* = (-37)  ##  Never used by PCRE2 itself
+  ERROR_DFA_BADRESTART* = (-38)
+  ERROR_DFA_RECURSE* = (-39)
+  ERROR_DFA_UCOND* = (-40)
+  ERROR_DFA_UFUNC* = (-41)
+  ERROR_DFA_UITEM* = (-42)
+  ERROR_DFA_WSSIZE* = (-43)
+  ERROR_INTERNAL* = (-44)
+  ERROR_JIT_BADOPTION* = (-45)
+  ERROR_JIT_STACKLIMIT* = (-46)
+  ERROR_MATCHLIMIT* = (-47)
+  ERROR_NOMEMORY* = (-48)
+  ERROR_NOSUBSTRING* = (-49)
+  ERROR_NOUNIQUESUBSTRING* = (-50)
+  ERROR_NULL* = (-51)
+  ERROR_RECURSELOOP* = (-52)
+  ERROR_DEPTHLIMIT* = (-53)
+  ERROR_RECURSIONLIMIT* = (-53) ##  Obsolete synonym
+  ERROR_UNAVAILABLE* = (-54)
+  ERROR_UNSET* = (-55)
+  ERROR_BADOFFSETLIMIT* = (-56)
+  ERROR_BADREPESCAPE* = (-57)
+  ERROR_REPMISSINGBRACE* = (-58)
+  ERROR_BADSUBSTITUTION* = (-59)
+  ERROR_BADSUBSPATTERN* = (-60)
+  ERROR_TOOMANYREPLACE* = (-61)
+  ERROR_BADSERIALIZEDDATA* = (-62)
+  ERROR_HEAPLIMIT* = (-63)
+  ERROR_CONVERT_SYNTAX* = (-64)
+  ERROR_INTERNAL_DUPMATCH* = (-65)
+  ERROR_DFA_UINVALID_UTF* = (-66)
+  ERROR_INVALIDOFFSET* = (-67)
+  ERROR_JIT_UNSUPPORTED* = (-68)
+
+##  Request types for pcre2_pattern_info()
+
+const
+  INFO_ALLOPTIONS* = 0
+  INFO_ARGOPTIONS* = 1
+  INFO_BACKREFMAX* = 2
+  INFO_BSR* = 3
+  INFO_CAPTURECOUNT* = 4
+  INFO_FIRSTCODEUNIT* = 5
+  INFO_FIRSTCODETYPE* = 6
+  INFO_FIRSTBITMAP* = 7
+  INFO_HASCRORLF* = 8
+  INFO_JCHANGED* = 9
+  INFO_JITSIZE* = 10
+  INFO_LASTCODEUNIT* = 11
+  INFO_LASTCODETYPE* = 12
+  INFO_MATCHEMPTY* = 13
+  INFO_MATCHLIMIT* = 14
+  INFO_MAXLOOKBEHIND* = 15
+  INFO_MINLENGTH* = 16
+  INFO_NAMECOUNT* = 17
+  INFO_NAMEENTRYSIZE* = 18
+  INFO_NAMETABLE* = 19
+  INFO_NEWLINE* = 20
+  INFO_DEPTHLIMIT* = 21
+  INFO_RECURSIONLIMIT* = 21
+  INFO_SIZE* = 22
+  INFO_HASBACKSLASHC* = 23
+  INFO_FRAMESIZE* = 24
+  INFO_HEAPLIMIT* = 25
+  INFO_EXTRAOPTIONS* = 26
+
+##  Request types for pcre2_config().
+
+const
+  CONFIG_BSR* = 0
+  CONFIG_JIT* = 1
+  CONFIG_JITTARGET* = 2
+  CONFIG_LINKSIZE* = 3
+  CONFIG_MATCHLIMIT* = 4
+  CONFIG_NEWLINE* = 5
+  CONFIG_PARENSLIMIT* = 6
+  CONFIG_DEPTHLIMIT* = 7
+  CONFIG_RECURSIONLIMIT* = 7
+  CONFIG_STACKRECURSE* = 8
+  CONFIG_UNICODE* = 9
+  CONFIG_UNICODE_VERSION* = 10
+  CONFIG_VERSION* = 11
+  CONFIG_HEAPLIMIT* = 12
+  CONFIG_NEVER_BACKSLASH_C* = 13
+  CONFIG_COMPILED_WIDTHS* = 14
+  CONFIG_TABLES_LENGTH* = 15
+
+##  Optimization directives for pcre2_set_optimize().
+## For binary compatibility, only add to this list; do not renumber.
+
+const
+  OPTIMIZATION_NONE* = 0
+  OPTIMIZATION_FULL* = 1
+  AUTO_POSSESS* = 64
+  AUTO_POSSESS_OFF* = 65
+  DOTSTAR_ANCHOR* = 66
+  DOTSTAR_ANCHOR_OFF* = 67
+  START_OPTIMIZE* = 68
+  START_OPTIMIZE_OFF* = 69
+
+##  Types used in pcre2_set_substitute_case_callout().
+
+const
+  SUBSTITUTE_CASE_LOWER* = 0
+  SUBSTITUTE_CASE_UPPER* = 1
+  SUBSTITUTE_CASE_TITLE* = 2
+
+
+const
+  ZERO_TERMINATED* = not 0.csize_t
+  UNSET* = not 0.csize_t
+
+# Types
+type
+  Pcre* = object
+  Pcre16* = object
+  Pcre32* = object
+  JitStack* = object
+  JitStack16* = object
+  JitStack32* = object
+  GeneralContext* = object
+  MatchData* = object
+
+when defined(nimHasStyleChecks):
+  {.push styleChecks: off.}
+
+# The structure for passing out data via the pcre_callout_function. We use a
+# structure so that new fields can be added on the end in future versions,
+# without changing the API of the function, thereby allowing old clients to
+# work without modification.
+type
+  CalloutBlock* = object
+    version*         : cint       ## Identifies version of block
+    # ------------------------ Version 0 -------------------------------
+    callout_number*  : cint       ## Number compiled into pattern
+    offset_vector*   : ptr cint   ## The offset vector
+    subject*         : cstring    ## The subject being matched
+    subject_length*  : cint       ## The length of the subject
+    start_match*     : cint       ## Offset to start of this match attempt
+    current_position*: cint       ## Where we currently are in the subject
+    capture_top*     : cint       ## Max current capture
+    capture_last*    : cint       ## Most recently closed capture
+    callout_data*    : pointer    ## Data passed in with the call
+    # ------------------- Added for Version 1 --------------------------
+    pattern_position*: cint       ## Offset to next item in the pattern
+    next_item_length*: cint       ## Length of next item in the pattern
+    # ------------------- Added for Version 2 --------------------------
+    mark*            : pointer    ## Pointer to current mark or NULL
+    # ------------------------------------------------------------------
+
+when defined(nimHasStyleChecks):
+  {.pop.}
+
+# User defined callback which provides a stack just before the match starts.
+type
+  JitCallback* = proc (a: pointer): ptr JitStack {.cdecl.}
+
+
+when not defined(usePcreHeader):
+  when hostOS == "windows":
+    when defined(nimOldDlls):
+      const pcreDll = "pcre.dll"
+    elif defined(cpu64):
+      const pcreDll = "pcre64.dll"
+    else:
+      const pcreDll = "pcre32.dll"
+  elif hostOS == "macosx":
+    const pcreDll = "libpcre(.3|.1|).dylib"
+  else:
+    const pcreDll = "libpcre2-8.so.0"
+  {.push dynlib: pcreDll.}
+else:
+  {.push header: "<pcre2.h>".}
+
+{.push cdecl, importc: "pcre2_$1_8".}
+
+# Exported PCRE functions
+
+proc compile*(pattern: ptr uint8,
+              options: csize_t,
+              flags: uint32,
+              errorCode: ptr cint,
+              offset: ptr csize_t,
+              tableptr: pointer): ptr Pcre
+
+proc compile2*(pattern: cstring,
+               options: cint,
+               errorcodeptr: ptr cint,
+               errptr: ptr cstring,
+               erroffset: ptr cint,
+               tableptr: pointer): ptr Pcre
+
+proc config*(what: cint,
+             where: pointer): cint
+
+proc copy_named_substring*(code: ptr Pcre,
+                           subject: cstring,
+                           ovector: ptr cint,
+                           stringcount: cint,
+                           stringname: cstring,
+                           buffer: cstring,
+                           buffersize: cint): cint
+
+proc copy_substring*(subject: cstring,
+                     ovector: ptr cint,
+                     stringcount: cint,
+                     stringnumber: cint,
+                     buffer: cstring,
+                     buffersize: cint): cint
+
+proc dfa_match*(code: ptr Pcre,
+               subject: cstring,
+               length: cint,
+               startoffset: cint,
+               options: cint,
+               ovector: ptr cint,
+               ovecsize: cint,
+               workspace: ptr cint,
+               wscount: cint): cint
+
+proc match*(code: ptr Pcre,
+           subject: ptr uint8,
+           length: csize_t,
+           startoffset: csize_t,
+           options: uint32,
+           ovector: ptr MatchData,
+           ovecsize: pointer): cint
+
+proc match*(code: ptr Pcre,
+           subject: cstring,
+           length: cint,
+           startoffset: cint,
+           options: cint,
+           ovector: ptr MatchData,
+           ovecsize: cint): cint =
+  result = match(code, cast[ptr uint8](subject), csize_t length, csize_t startoffset,
+          uint32 options, 
+          ovector, nil)
+
+proc match_data_create*(size: uint32, ctx: ptr GeneralContext): ptr MatchData
+
+proc match_data_create_from_pattern*(
+  code: ptr Pcre,
+  ctx: ptr GeneralContext
+): ptr MatchData
+
+proc match_data_free*(data: ptr MatchData)
+
+proc get_ovector_pointer*(ovector: ptr MatchData): ptr csize_t
+
+proc get_ovector_count*(ovector: ptr MatchData): uint32
+
+proc jit_match*(code: ptr Pcre,
+               subject: cstring,
+               length: cint,
+               startoffset: cint,
+               options: cint,
+               ovector: ptr cint,
+               ovecsize: cint,
+               jstack: ptr JitStack): cint
+
+# proc free_substring*(stringptr: cstring)
+
+# proc free_substring_list*(stringptr: cstringArray)
+
+proc code_free*(code: ptr Pcre)
+
+proc pattern_info*(code: ptr Pcre,
+               what: uint32,
+               where: pointer): cint
+
+proc get_named_substring*(code: ptr Pcre,
+                          subject: cstring,
+                          ovector: ptr cint,
+                          stringcount: cint,
+                          stringname: cstring,
+                          stringptr: cstringArray): cint
+
+proc get_stringnumber*(code: ptr Pcre,
+                       name: cstring): cint
+
+proc get_stringtable_entries*(code: ptr Pcre,
+                              name: cstring,
+                              first: cstringArray,
+                              last: cstringArray): cint
+
+proc get_substring*(subject: cstring,
+                    ovector: ptr cint,
+                    stringcount: cint,
+                    stringnumber: cint,
+                    stringptr: cstringArray): cint
+
+proc get_substring_list*(subject: cstring,
+                         ovector: ptr cint,
+                         stringcount: cint,
+                         listptr: ptr cstringArray): cint
+
+proc maketables*(): pointer
+
+proc refcount*(code: ptr Pcre,
+               adjust: cint): cint
+
+proc version*(): cstring
+
+# JIT compiler related functions.
+
+# proc jit_stack_alloc*(startsize: cint,
+#                       maxsize: cint): ptr JitStack
+
+# proc jit_stack_free*(stack: ptr JitStack)
+
+# proc assign_jit_stack*(extra: ptr ExtraData,
+#                        callback: JitCallback,
+#                        data: pointer)
+
+proc jit_free_unused_memory*()
+
+
+{.pop.}
+{.pop.}
+
diff --git a/tests/stdlib/nre/init.nim b/tests/stdlib/nre/init.nim
index f0c8e0a00f560..fd160f542a1b5 100644
--- a/tests/stdlib/nre/init.nim
+++ b/tests/stdlib/nre/init.nim
@@ -1,6 +1,8 @@
 import unittest
 include nre
 
+from ../../../lib/wrappers/pcre2 import nil
+
 block: # Test NRE initialization
   block: # correct initialization
     check(re("[0-9]+") != nil)
@@ -8,26 +10,26 @@ block: # Test NRE initialization
 
   block: # options
     check(extractOptions("(*NEVER_UTF)") ==
-          ("", pcre.NEVER_UTF, true))
+          ("", pcre2.NEVER_UTF))
     check(extractOptions("(*UTF8)(*ANCHORED)(*UCP)z") ==
-          ("(*UTF8)(*UCP)z", pcre.ANCHORED, true))
-    check(extractOptions("(*ANCHORED)(*UTF8)(*JAVASCRIPT_COMPAT)z") ==
-          ("(*UTF8)z", pcre.ANCHORED or pcre.JAVASCRIPT_COMPAT, true))
+          ("(*UTF8)(*UCP)z", pcre2.ANCHORED))
+    # check(extractOptions("(*ANCHORED)(*UTF8)(*JAVASCRIPT_COMPAT)z") ==
+    #       ("(*UTF8)z", pcre2.ANCHORED or pcre2.JAVASCRIPT_COMPAT, true))
 
-    check(extractOptions("(*NO_STUDY)(") == ("(", 0, false))
+    # check(extractOptions("(*NO_STUDY)(") == ("(", 0'u32))
 
     check(extractOptions("(*LIMIT_MATCH=6)(*ANCHORED)z") ==
-          ("(*LIMIT_MATCH=6)z", pcre.ANCHORED, true))
+          ("(*LIMIT_MATCH=6)z", pcre2.ANCHORED))
 
   block: # incorrect options
     for s in ["CR", "(CR", "(*CR", "(*abc)", "(*abc)CR",
               "(?i)",
               "(*LIMIT_MATCH=5", "(*NO_AUTO_POSSESS=5)"]:
       let ss = s & "(*NEVER_UTF)"
-      check(extractOptions(ss) == (ss, 0, true))
+      check(extractOptions(ss) == (ss, 0'u32))
 
   block: # invalid regex
-    expect(SyntaxError): discard re("[0-9")
+    # expect(SyntaxError): discard re("[0-9")
     try:
       discard re("[0-9")
     except SyntaxError:

From 817af7edfcfca41e60e07b258c0943613783dd55 Mon Sep 17 00:00:00 2001
From: ringabout <43030857+ringabout@users.noreply.github.com>
Date: Mon, 4 Nov 2024 22:41:16 +0800
Subject: [PATCH 2/7] fixes libpcre2-8.0.dylib on macosx

---
 lib/wrappers/pcre2.nim | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/wrappers/pcre2.nim b/lib/wrappers/pcre2.nim
index b8f7b03fceece..fa752848b976d 100644
--- a/lib/wrappers/pcre2.nim
+++ b/lib/wrappers/pcre2.nim
@@ -527,7 +527,7 @@ when not defined(usePcreHeader):
     else:
       const pcreDll = "pcre32.dll"
   elif hostOS == "macosx":
-    const pcreDll = "libpcre(.3|.1|).dylib"
+    const pcreDll = "libpcre2-8.0.dylib"
   else:
     const pcreDll = "libpcre2-8.so.0"
   {.push dynlib: pcreDll.}

From ce1761dff9e79d00bc012938ad6be37caa2edcfd Mon Sep 17 00:00:00 2001
From: ringabout <43030857+ringabout@users.noreply.github.com>
Date: Tue, 5 Nov 2024 13:14:45 +0800
Subject: [PATCH 3/7] progress

---
 lib/impure/nre.nim | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/impure/nre.nim b/lib/impure/nre.nim
index 0a43b92bed986..1c47918ab31c7 100644
--- a/lib/impure/nre.nim
+++ b/lib/impure/nre.nim
@@ -231,7 +231,7 @@ when defined(gcDestructors):
 else:
   proc destroyRegex(pattern: Regex) =
     `=destroy`(pattern.pattern)
-    pcre.code_free(pattern.pcreObj)
+    pcre2.code_free(pattern.pcreObj)
     `=destroy`(pattern.captureNameToId)
 
 proc getinfo[T](pattern: Regex, opt: uint32): T =

From cb802af44e3c684a8738684ebdd84df31aeabf09 Mon Sep 17 00:00:00 2001
From: ringabout <43030857+ringabout@users.noreply.github.com>
Date: Tue, 5 Nov 2024 13:21:07 +0800
Subject: [PATCH 4/7] try libpcre2-8-0.dll

---
 lib/wrappers/pcre2.nim | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/lib/wrappers/pcre2.nim b/lib/wrappers/pcre2.nim
index fa752848b976d..92caee7e23008 100644
--- a/lib/wrappers/pcre2.nim
+++ b/lib/wrappers/pcre2.nim
@@ -520,12 +520,7 @@ type
 
 when not defined(usePcreHeader):
   when hostOS == "windows":
-    when defined(nimOldDlls):
-      const pcreDll = "pcre.dll"
-    elif defined(cpu64):
-      const pcreDll = "pcre64.dll"
-    else:
-      const pcreDll = "pcre32.dll"
+    const pcreDll = "libpcre2-8-0.dll"
   elif hostOS == "macosx":
     const pcreDll = "libpcre2-8.0.dylib"
   else:

From 27fc4fedb5c1be6a4ec27f7d0d0c913a63f792b4 Mon Sep 17 00:00:00 2001
From: ringabout <43030857+ringabout@users.noreply.github.com>
Date: Tue, 5 Nov 2024 14:14:22 +0800
Subject: [PATCH 5/7] clean up

---
 lib/impure/nre.nim     |  4 ++--
 lib/impure/re.nim      | 22 +++++++++++-----------
 lib/wrappers/pcre2.nim | 15 ++-------------
 3 files changed, 15 insertions(+), 26 deletions(-)

diff --git a/lib/impure/nre.nim b/lib/impure/nre.nim
index 1c47918ab31c7..56c55c8dd1a61 100644
--- a/lib/impure/nre.nim
+++ b/lib/impure/nre.nim
@@ -272,7 +272,7 @@ proc initRegex(pattern: string, flags: csize_t, options: uint32): Regex =
     errorCode: cint = 0
     errOffset: csize_t = 0
 
-  result.pcreObj = pcre2.compile(cast[ptr uint8](cstring(pattern)),
+  result.pcreObj = pcre2.compile(cstring(pattern),
                                 flags, options, addr(errorCode),
                                 addr(errOffset), nil)
   if result.pcreObj == nil:
@@ -508,7 +508,7 @@ proc matchImpl(str: string, pattern: Regex, start, endpos: int, options: uint32)
   var matchData = pcre2.match_data_create_from_pattern(pattern.pcreObj, nil)
   defer: pcre2.match_data_free(matchData)
   let execRet = pcre2.match(pattern.pcreObj,
-                          cast[ptr uint8](cstring(str)),
+                          cstring(str),
                           csize_t(strlen),
                           csize_t(start),
                           options,
diff --git a/lib/impure/re.nim b/lib/impure/re.nim
index fcd27516be611..c2f508a4bee06 100644
--- a/lib/impure/re.nim
+++ b/lib/impure/re.nim
@@ -83,7 +83,7 @@ proc rawCompile(pattern: string, flags: csize_t, options: uint32): ptr Pcre =
   var
     errorCode: cint = 0
     offset: csize_t = 0
-  result = pcre2.compile(cast[ptr uint8](pattern.cstring), flags, options, addr(errorCode), addr(offset), nil)
+  result = pcre2.compile(pattern.cstring, flags, options, addr(errorCode), addr(offset), nil)
   if result == nil:
     raiseInvalidRegex($errorCode & "\n" & pattern & "\n" & spaces(offset) & "^\n")
 
@@ -146,7 +146,7 @@ proc matchOrFind(buf: cstring, pattern: Regex, matches: var openArray[string],
     rawMatches = rtarray.getRawData
   var matchData = match_data_create_from_pattern(pattern.h, nil)
   defer: match_data_free(matchData)
-  var res = pcre2.match(pattern.h, cast[ptr uint8](buf), bufSize.csize_t, start.csize_t, options,
+  var res = pcre2.match(pattern.h, buf, bufSize.csize_t, start.csize_t, options,
       matchData, nil)
   rawMatches = cast[ptr UncheckedArray[csize_t]](get_ovector_pointer(matchData))
   if res < 0: return res
@@ -177,7 +177,7 @@ proc findBounds*(buf: cstring, pattern: Regex, matches: var openArray[string],
     rawMatches = rtarray.getRawData
   var matchData = match_data_create_from_pattern(pattern.h, nil)
   defer: match_data_free(matchData)
-  var res = pcre2.match(pattern.h, cast[ptr uint8](buf), bufSize.csize_t, start.csize_t, 0'u32,
+  var res = pcre2.match(pattern.h, buf, bufSize.csize_t, start.csize_t, 0'u32,
       matchData, nil)
   rawMatches = cast[ptr UncheckedArray[csize_t]](get_ovector_pointer(matchData))
   if res < 0: return (-1, 0)
@@ -220,7 +220,7 @@ proc findBounds*(buf: cstring, pattern: Regex,
     rawMatches = rtarray.getRawData
   var matchData = match_data_create_from_pattern(pattern.h, nil)
   defer: match_data_free(matchData)
-  var res = pcre2.match(pattern.h, cast[ptr uint8](buf), bufSize.csize_t, start.csize_t, 0'u32,
+  var res = pcre2.match(pattern.h, buf, bufSize.csize_t, start.csize_t, 0'u32,
       matchData, nil)
   rawMatches = cast[ptr UncheckedArray[csize_t]](get_ovector_pointer(matchData))
   if res < 0'i32: return (-1, 0)
@@ -255,7 +255,7 @@ proc findBoundsImpl(buf: cstring, pattern: Regex,
   var rawMatches = rtarray.getRawData
   var matchData = match_data_create_from_pattern(pattern.h, nil)
   defer: match_data_free(matchData)
-  var res = pcre2.match(pattern.h, cast[ptr uint8](buf), bufSize.csize_t, start.csize_t, options,
+  var res = pcre2.match(pattern.h, buf, bufSize.csize_t, start.csize_t, options,
       matchData, nil)
   rawMatches = cast[ptr UncheckedArray[csize_t]](get_ovector_pointer(matchData))
   if res < 0'i32:
@@ -273,7 +273,7 @@ proc findBounds*(buf: cstring, pattern: Regex,
   var rawMatches = rtarray.getRawData
   var matchData = match_data_create_from_pattern(pattern.h, nil)
   defer: match_data_free(matchData)
-  var res = pcre2.match(pattern.h, cast[ptr uint8](buf), bufSize.csize_t, start.csize_t, 0'u32,
+  var res = pcre2.match(pattern.h, buf, bufSize.csize_t, start.csize_t, 0'u32,
       matchData, nil)
   rawMatches = cast[ptr UncheckedArray[csize_t]](get_ovector_pointer(matchData))
   if res < 0'i32: return (int(res), 0)
@@ -296,7 +296,7 @@ proc matchOrFind(buf: cstring, pattern: Regex, start, bufSize: int, options: uin
     rawMatches = rtarray.getRawData
   var matchData = match_data_create_from_pattern(pattern.h, nil)
   defer: match_data_free(matchData)
-  result = pcre2.match(pattern.h, cast[ptr uint8](buf), bufSize.csize_t, start.csize_t, options,
+  result = pcre2.match(pattern.h, buf, bufSize.csize_t, start.csize_t, options,
                     matchData, nil)
   
   rawMatches = cast[ptr UncheckedArray[csize_t]](get_ovector_pointer(matchData))
@@ -381,7 +381,7 @@ proc find*(buf: cstring, pattern: Regex, matches: var openArray[string],
     rawMatches = rtarray.getRawData
   var matchData = match_data_create_from_pattern(pattern.h, nil)
   defer: match_data_free(matchData)
-  var res = pcre2.match(pattern.h, cast[ptr uint8](buf), bufSize.csize_t, start.csize_t, 0'u32,
+  var res = pcre2.match(pattern.h, buf, bufSize.csize_t, start.csize_t, 0'u32,
       matchData, nil)
   rawMatches = cast[ptr UncheckedArray[csize_t]](get_ovector_pointer(matchData))
   if res < 0'i32: return res
@@ -410,7 +410,7 @@ proc find*(buf: cstring, pattern: Regex, start = 0, bufSize: int): int =
     rawMatches = rtarray.getRawData
   var matchData = match_data_create_from_pattern(pattern.h, nil)
   defer: match_data_free(matchData)
-  var res = pcre2.match(pattern.h, cast[ptr uint8](buf), bufSize.csize_t, start.csize_t, 0'u32,
+  var res = pcre2.match(pattern.h, buf, bufSize.csize_t, start.csize_t, 0'u32,
       matchData, nil)
   rawMatches = cast[ptr UncheckedArray[csize_t]](get_ovector_pointer(matchData))
   if res < 0'i32: return res
@@ -441,7 +441,7 @@ iterator findAll*(s: string, pattern: Regex, start = 0): string =
   var matchData = match_data_create_from_pattern(pattern.h, nil)
   defer: match_data_free(matchData)
   while true:
-    let res = pcre2.match(pattern.h, cast[ptr uint8](s.cstring), len(s).csize_t, i.csize_t, 0'u32,
+    let res = pcre2.match(pattern.h, s.cstring, len(s).csize_t, i.csize_t, 0'u32,
       matchData, nil)
     rawMatches = cast[ptr UncheckedArray[csize_t]](get_ovector_pointer(matchData))
     if res < 0'i32: break
@@ -463,7 +463,7 @@ iterator findAll*(buf: cstring, pattern: Regex, start = 0, bufSize: int): string
   var matchData = match_data_create_from_pattern(pattern.h, nil)
   defer: match_data_free(matchData)
   while true:
-    let res = pcre2.match(pattern.h, cast[ptr uint8](buf), bufSize.csize_t, i.csize_t, 0'u32,
+    let res = pcre2.match(pattern.h, buf, bufSize.csize_t, i.csize_t, 0'u32,
       matchData, nil)
     rawMatches = cast[ptr UncheckedArray[csize_t]](get_ovector_pointer(matchData))
     if res < 0'i32: break
diff --git a/lib/wrappers/pcre2.nim b/lib/wrappers/pcre2.nim
index 92caee7e23008..65a06c995a770 100644
--- a/lib/wrappers/pcre2.nim
+++ b/lib/wrappers/pcre2.nim
@@ -533,7 +533,7 @@ else:
 
 # Exported PCRE functions
 
-proc compile*(pattern: ptr uint8,
+proc compile*(pattern: cstring,
               options: csize_t,
               flags: uint32,
               errorCode: ptr cint,
@@ -576,24 +576,13 @@ proc dfa_match*(code: ptr Pcre,
                wscount: cint): cint
 
 proc match*(code: ptr Pcre,
-           subject: ptr uint8,
+           subject: cstring,
            length: csize_t,
            startoffset: csize_t,
            options: uint32,
            ovector: ptr MatchData,
            ovecsize: pointer): cint
 
-proc match*(code: ptr Pcre,
-           subject: cstring,
-           length: cint,
-           startoffset: cint,
-           options: cint,
-           ovector: ptr MatchData,
-           ovecsize: cint): cint =
-  result = match(code, cast[ptr uint8](subject), csize_t length, csize_t startoffset,
-          uint32 options, 
-          ovector, nil)
-
 proc match_data_create*(size: uint32, ctx: ptr GeneralContext): ptr MatchData
 
 proc match_data_create_from_pattern*(

From 0e3ac706156887ce143681da42b21874c2b20774 Mon Sep 17 00:00:00 2001
From: ringabout <43030857+ringabout@users.noreply.github.com>
Date: Tue, 5 Nov 2024 23:03:16 +0800
Subject: [PATCH 6/7] progress

---
 lib/impure/nre.nim     |   2 +-
 lib/impure/re.nim      |  16 ++---
 lib/wrappers/pcre2.nim | 150 ++++++-----------------------------------
 3 files changed, 29 insertions(+), 139 deletions(-)

diff --git a/lib/impure/nre.nim b/lib/impure/nre.nim
index 56c55c8dd1a61..e5364ba67dfbe 100644
--- a/lib/impure/nre.nim
+++ b/lib/impure/nre.nim
@@ -1,6 +1,6 @@
 #
 #            Nim's Runtime Library
-#        (c) Copyright 2015 Nim Contributors
+#        (c) Copyright 2024 Nim Contributors
 #
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
diff --git a/lib/impure/re.nim b/lib/impure/re.nim
index c2f508a4bee06..deceb9739bae2 100644
--- a/lib/impure/re.nim
+++ b/lib/impure/re.nim
@@ -1,7 +1,7 @@
 #
 #
 #            Nim's Runtime Library
-#        (c) Copyright 2012 Andreas Rumpf
+#        (c) Copyright 2024 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
@@ -114,15 +114,11 @@ proc re*(s: string, flags = {reStudy}): Regex =
   if reIgnoreCase in flags:
     options = options or CASELESS
   result.h = rawCompile(s, cast[csize_t](ZERO_TERMINATED), options)
-  # if reStudy in flags:
-  #   var msg: cstring = ""
-  #   var options: cint = 0
-  #   var hasJit: cint = 0
-  #   if pcre2.config(pcre2.CONFIG_JIT, addr hasJit) == 0:
-  #     if hasJit == 1'i32:
-  #       options = pcre.STUDY_JIT_COMPILE
-  #   result.e = pcre.study(result.h, options, addr msg)
-  #   if not isNil(msg): raiseInvalidRegex($msg)
+  if reStudy in flags: # TODO: add reJit
+    var hasJit: cint = 0
+    if pcre2.config(pcre2.CONFIG_JIT, addr hasJit) == 0:
+      if hasJit == 1'i32 and jit_compile(result.h, pcre2.JIT_COMPLETE) != 0:
+        raiseInvalidRegex("JIT compilation failed.")
 
 proc rex*(s: string, flags = {reStudy, reExtended}): Regex =
   ## Constructor for extended regular expressions.
diff --git a/lib/wrappers/pcre2.nim b/lib/wrappers/pcre2.nim
index 65a06c995a770..0615cda0e627d 100644
--- a/lib/wrappers/pcre2.nim
+++ b/lib/wrappers/pcre2.nim
@@ -1,58 +1,19 @@
 #
 #
 #            Nim's Runtime Library
-#        (c) Copyright 2015 Andreas Rumpf
+#        (c) Copyright 2024 Nim Contributors
 #
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
 #
 
-# The current PCRE version information.
+# The current PCRE2 version information.
 
 const
-  PCRE_MAJOR* = 8
-  PCRE_MINOR* = 36
-  PCRE_PRERELEASE* = true
-  PCRE_DATE* = "2014-09-26"
-
-# When an application links to a PCRE DLL in Windows, the symbols that are
-# imported have to be identified as such. When building PCRE, the appropriate
-# export setting is defined in pcre_internal.h, which includes this file. So we
-# don't change existing definitions of PCRE_EXP_DECL and PCRECPP_EXP_DECL.
-
-# By default, we use the standard "extern" declarations.
-
-# Allow for C++ users
-
-# Public options. Some are compile-time only, some are run-time only, and some
-# are both. Most of the compile-time options are saved with the compiled regex
-# so that they can be inspected during studying (and therefore JIT compiling).
-# Note that pcre_study() has its own set of options. Originally, all the options
-# defined here used distinct bits. However, almost all the bits in a 32-bit word
-# are now used, so in order to conserve them, option bits that were previously
-# only recognized at matching time (i.e. by pcre_exec() or pcre_dfa_exec()) may
-# also be used for compile-time options that affect only compiling and are not
-# relevant for studying or JIT compiling.
-#
-# Some options for pcre_compile() change its behaviour but do not affect the
-# behaviour of the execution functions. Other options are passed through to the
-# execution functions and affect their behaviour, with or without affecting the
-# behaviour of pcre_compile().
-#
-# Options that can be passed to pcre_compile() are tagged Cx below, with these
-# variants:
-#
-# C1   Affects compile only
-# C2   Does not affect compile; affects exec, dfa_exec
-# C3   Affects compile, exec, dfa_exec
-# C4   Affects compile, exec, dfa_exec, study
-# C5   Affects compile, exec, study
-#
-# Options that can be set for pcre_exec() and/or pcre_dfa_exec() are flagged
-# with E and D, respectively. They take precedence over C3, C4, and C5 settings
-# passed from pcre_compile(). Those that are compatible with JIT execution are
-# flagged with J.
-
+  PCRE2_MAJOR* = 10
+  PCRE2_MINOR* = 45
+  PCRE2_PRERELEASE* = true
+  PCRE2_DATE* = "2024-06-09"
 
 const
   ANCHORED* = 0x80000000'u32
@@ -540,40 +501,18 @@ proc compile*(pattern: cstring,
               offset: ptr csize_t,
               tableptr: pointer): ptr Pcre
 
-proc compile2*(pattern: cstring,
-               options: cint,
-               errorcodeptr: ptr cint,
-               errptr: ptr cstring,
-               erroffset: ptr cint,
-               tableptr: pointer): ptr Pcre
-
-proc config*(what: cint,
+proc config*(what: uint32,
              where: pointer): cint
 
-proc copy_named_substring*(code: ptr Pcre,
-                           subject: cstring,
-                           ovector: ptr cint,
-                           stringcount: cint,
-                           stringname: cstring,
-                           buffer: cstring,
-                           buffersize: cint): cint
-
-proc copy_substring*(subject: cstring,
-                     ovector: ptr cint,
-                     stringcount: cint,
-                     stringnumber: cint,
-                     buffer: cstring,
-                     buffersize: cint): cint
-
 proc dfa_match*(code: ptr Pcre,
                subject: cstring,
-               length: cint,
-               startoffset: cint,
-               options: cint,
-               ovector: ptr cint,
-               ovecsize: cint,
+               length: csize_t,
+               startoffset: csize_t,
+               options: uint32,
+               ovector: ptr MatchData,
+               ovecsize: pointer, # TODO: pcre2_match_context
                workspace: ptr cint,
-               wscount: cint): cint
+               wscount: csize_t): cint
 
 proc match*(code: ptr Pcre,
            subject: cstring,
@@ -581,7 +520,8 @@ proc match*(code: ptr Pcre,
            startoffset: csize_t,
            options: uint32,
            ovector: ptr MatchData,
-           ovecsize: pointer): cint
+           ovecsize: pointer # TODO: pcre2_match_context
+           ): cint
 
 proc match_data_create*(size: uint32, ctx: ptr GeneralContext): ptr MatchData
 
@@ -598,16 +538,12 @@ proc get_ovector_count*(ovector: ptr MatchData): uint32
 
 proc jit_match*(code: ptr Pcre,
                subject: cstring,
-               length: cint,
-               startoffset: cint,
-               options: cint,
-               ovector: ptr cint,
-               ovecsize: cint,
-               jstack: ptr JitStack): cint
-
-# proc free_substring*(stringptr: cstring)
-
-# proc free_substring_list*(stringptr: cstringArray)
+               length: csize_t,
+               startoffset: csize_t,
+               options: uint32,
+               ovector: ptr MatchData,
+               ovecsize: pointer # TODO: pcre2_match_context
+               ): cint
 
 proc code_free*(code: ptr Pcre)
 
@@ -615,51 +551,9 @@ proc pattern_info*(code: ptr Pcre,
                what: uint32,
                where: pointer): cint
 
-proc get_named_substring*(code: ptr Pcre,
-                          subject: cstring,
-                          ovector: ptr cint,
-                          stringcount: cint,
-                          stringname: cstring,
-                          stringptr: cstringArray): cint
-
-proc get_stringnumber*(code: ptr Pcre,
-                       name: cstring): cint
-
-proc get_stringtable_entries*(code: ptr Pcre,
-                              name: cstring,
-                              first: cstringArray,
-                              last: cstringArray): cint
-
-proc get_substring*(subject: cstring,
-                    ovector: ptr cint,
-                    stringcount: cint,
-                    stringnumber: cint,
-                    stringptr: cstringArray): cint
-
-proc get_substring_list*(subject: cstring,
-                         ovector: ptr cint,
-                         stringcount: cint,
-                         listptr: ptr cstringArray): cint
-
-proc maketables*(): pointer
-
-proc refcount*(code: ptr Pcre,
-               adjust: cint): cint
-
-proc version*(): cstring
-
 # JIT compiler related functions.
 
-# proc jit_stack_alloc*(startsize: cint,
-#                       maxsize: cint): ptr JitStack
-
-# proc jit_stack_free*(stack: ptr JitStack)
-
-# proc assign_jit_stack*(extra: ptr ExtraData,
-#                        callback: JitCallback,
-#                        data: pointer)
-
-proc jit_free_unused_memory*()
+proc jit_compile*(code: ptr Pcre, options: uint32): cint
 
 
 {.pop.}

From 07de39cde6341ae278b47d64f73dd9c823dd18c5 Mon Sep 17 00:00:00 2001
From: ringabout <43030857+ringabout@users.noreply.github.com>
Date: Wed, 6 Nov 2024 23:09:25 +0800
Subject: [PATCH 7/7] progress

---
 lib/impure/nre.nim        | 39 ++++++++++++++-------------------------
 lib/impure/re.nim         |  4 ++--
 tests/stdlib/nre/init.nim | 10 +++++-----
 3 files changed, 21 insertions(+), 32 deletions(-)

diff --git a/lib/impure/nre.nim b/lib/impure/nre.nim
index e5364ba67dfbe..70b8d16fa1b9f 100644
--- a/lib/impure/nre.nim
+++ b/lib/impure/nre.nim
@@ -61,7 +61,7 @@ runnableExamples:
   assert find("uxabc", re"(?<=x|y)ab", start = 1).get.captures[-1] == "ab"
   assert find("uxabc", re"ab", start = 3).isNone
 
-from ../wrappers/pcre2 import nil
+from std/pcre2 import nil
 import nre/private/util
 import std/tables
 from std/strutils import `%`
@@ -136,8 +136,6 @@ type
     ##     are recognized only in UTF-8 mode.
     ##     —  man pcre
     ##
-    ## -  `(*JAVASCRIPT_COMPAT)` - JavaScript compatibility
-    ## -  `(*NO_STUDY)` - turn off studying; study is enabled by default
     ##
     ## For more details on the leading option groups, see the `Option
     ## Setting <https://man7.org/linux/man-pages/man3/pcresyntax.3.html#OPTION_SETTING>`_
@@ -261,7 +259,7 @@ proc getNameToNumberTable(pattern: Regex): Table[string, int] =
 
     result[name] = num
 
-proc initRegex(pattern: string, flags: csize_t, options: uint32): Regex =
+proc initRegex(pattern: string, flags: csize_t, options: uint32, noJit: bool): Regex =
   when defined(gcDestructors):
     result = Regex()
   else:
@@ -279,15 +277,11 @@ proc initRegex(pattern: string, flags: csize_t, options: uint32): Regex =
     # failed to compile
     raise SyntaxError(msg: $errorCode, pos: int errOffset, pattern: pattern)
 
-  # if study:
-  #   var options: cint = 0
-  #   var hasJit: cint
-  #   if pcre2.config(pcre.CONFIG_JIT, addr hasJit) == 0:
-  #     if hasJit == 1'i32:
-  #       options = pcre2.STUDY_JIT_COMPILE
-  #   result.pcreExtra = pcre.study(result.pcreObj, options, addr errorMsg)
-  #   if errorMsg != nil:
-  #     raise StudyError(msg: $errorMsg)
+  if not noJit:
+    var hasJit: cint = cint(0)
+    if pcre2.config(pcre2.CONFIG_JIT, addr hasJit) == 0:
+      if hasJit == 1'i32 and pcre2.jit_compile(result.pcreObj, pcre2.JIT_COMPLETE) != 0:
+        raise StudyError(msg: "JIT compilation failed.")
 
   result.captureNameToId = result.getNameToNumberTable()
 
@@ -438,9 +432,9 @@ const PcreOptions = {
   "DOLLAR_ENDONLY": pcre2.DOLLAR_ENDONLY,
   "FIRSTLINE": pcre2.FIRSTLINE,
   "NO_AUTO_CAPTURE": pcre2.NO_AUTO_CAPTURE,
-  # "JAVASCRIPT_COMPAT": pcre2.JAVASCRIPT_COMPAT,
   "U": pcre2.UTF or pcre2.UCP # TODO: UTF-8 ?
 }.toTable
+# TODO: maybe add JIT?
 
 # Options that are supported inside regular expressions themselves
 const SkipOptions = [
@@ -449,8 +443,8 @@ const SkipOptions = [
   "CR", "LF", "CRLF", "ANYCRLF", "ANY", "BSR_ANYCRLF", "BSR_UNICODE"
 ]
 
-proc extractOptions(pattern: string): tuple[pattern: string, options: uint32] =
-  result = ("", 0'u32)
+proc extractOptions(pattern: string): tuple[pattern: string, options: uint32, noJit: bool] =
+  result = ("", 0'u32, false)
 
   var optionStart = 0
   var equals = false
@@ -470,8 +464,8 @@ proc extractOptions(pattern: string): tuple[pattern: string, options: uint32] =
         result.pattern.add pattern[optionStart .. i]
       elif PcreOptions.hasKey name:
         result.options = result.options or PcreOptions[name]
-      # elif name == "NO_STUDY":
-      #   result.study = false
+      elif name == "NO_STUDY":
+        result.noJit = true
       else:
         break
       optionStart = i+1
@@ -488,8 +482,8 @@ proc extractOptions(pattern: string): tuple[pattern: string, options: uint32] =
   result.pattern.add pattern[optionStart .. pattern.high]
 
 proc re*(pattern: string): Regex =
-  let (pattern, options) = extractOptions(pattern)
-  initRegex(pattern, pcre2.ZERO_TERMINATED, options)
+  let (pattern, options, noJit) = extractOptions(pattern)
+  initRegex(pattern, pcre2.ZERO_TERMINATED, options, noJit)
 
 proc matchImpl(str: string, pattern: Regex, start, endpos: int, options: uint32): Option[RegexMatch] =
   var myResult = RegexMatch(pattern: pattern, str: str)
@@ -517,12 +511,7 @@ proc matchImpl(str: string, pattern: Regex, start, endpos: int, options: uint32)
   let ovector = cast[ptr UncheckedArray[csize_t]](pcre2.get_ovector_pointer(matchData))
   let capture_count = pcre2.get_ovector_count(matchData)
   let ovector_size = 2 * capture_count.int * sizeof(csize_t)
-  # echo (myResult.pcreMatchBounds.len * 2 * sizeof(csize_t), ovector_size)
-  # echo (capture_count, ovector[0], ovector[1])
   copyMem(addr myResult.pcreMatchBounds[0], ovector, ovector_size)
-  # echo (myResult.pcreMatchBounds[0].a, myResult.pcreMatchBounds[0].b)
-
-  # echo " -> ", myResult
   if execRet >= 0:
     return some(myResult)
 
diff --git a/lib/impure/re.nim b/lib/impure/re.nim
index deceb9739bae2..beb26b1ede562 100644
--- a/lib/impure/re.nim
+++ b/lib/impure/re.nim
@@ -38,7 +38,7 @@ runnableExamples:
 import
   std/[strutils, rtarrays]
 
-import ../wrappers/pcre2
+import std/pcre2
 
 when defined(nimPreviewSlimSystem):
   import std/syncio
@@ -115,7 +115,7 @@ proc re*(s: string, flags = {reStudy}): Regex =
     options = options or CASELESS
   result.h = rawCompile(s, cast[csize_t](ZERO_TERMINATED), options)
   if reStudy in flags: # TODO: add reJit
-    var hasJit: cint = 0
+    var hasJit: cint = cint(0)
     if pcre2.config(pcre2.CONFIG_JIT, addr hasJit) == 0:
       if hasJit == 1'i32 and jit_compile(result.h, pcre2.JIT_COMPLETE) != 0:
         raiseInvalidRegex("JIT compilation failed.")
diff --git a/tests/stdlib/nre/init.nim b/tests/stdlib/nre/init.nim
index fd160f542a1b5..57162fe8f26c9 100644
--- a/tests/stdlib/nre/init.nim
+++ b/tests/stdlib/nre/init.nim
@@ -1,7 +1,7 @@
 import unittest
 include nre
 
-from ../../../lib/wrappers/pcre2 import nil
+from std/pcre2 import nil
 
 block: # Test NRE initialization
   block: # correct initialization
@@ -10,23 +10,23 @@ block: # Test NRE initialization
 
   block: # options
     check(extractOptions("(*NEVER_UTF)") ==
-          ("", pcre2.NEVER_UTF))
+          ("", pcre2.NEVER_UTF, false))
     check(extractOptions("(*UTF8)(*ANCHORED)(*UCP)z") ==
-          ("(*UTF8)(*UCP)z", pcre2.ANCHORED))
+          ("(*UTF8)(*UCP)z", pcre2.ANCHORED, false))
     # check(extractOptions("(*ANCHORED)(*UTF8)(*JAVASCRIPT_COMPAT)z") ==
     #       ("(*UTF8)z", pcre2.ANCHORED or pcre2.JAVASCRIPT_COMPAT, true))
 
     # check(extractOptions("(*NO_STUDY)(") == ("(", 0'u32))
 
     check(extractOptions("(*LIMIT_MATCH=6)(*ANCHORED)z") ==
-          ("(*LIMIT_MATCH=6)z", pcre2.ANCHORED))
+          ("(*LIMIT_MATCH=6)z", pcre2.ANCHORED, false))
 
   block: # incorrect options
     for s in ["CR", "(CR", "(*CR", "(*abc)", "(*abc)CR",
               "(?i)",
               "(*LIMIT_MATCH=5", "(*NO_AUTO_POSSESS=5)"]:
       let ss = s & "(*NEVER_UTF)"
-      check(extractOptions(ss) == (ss, 0'u32))
+      check(extractOptions(ss) == (ss, 0'u32, false))
 
   block: # invalid regex
     # expect(SyntaxError): discard re("[0-9")
Places

File nim-2.2.2-pcre2.patch of Package nim

Places