File v8-highway-1.1.patch of Package nodejs-electron
From 1cb68a06f8c0ea3ad29cab6efe5ac673817596ce Mon Sep 17 00:00:00 2001
From: pthier <pthier@chromium.org>
Date: Tue, 6 May 2025 10:28:03 +0200
Subject: [PATCH] Reland "[regexp] Simdify global atom match with single
character pattern"
This is a reland of commit 36f07e9a04484dd4b97713f8e821d3b83ade8f53
Changes since revert: Accumulate number of matches after a cache hit
instead of overwriting them.
Original change's description:
> [regexp] Simdify global atom match with single character pattern
>
> Use highway to find matching characters for RegExp with a single
> character atom pattern.
>
> Bug: 413411337
> Change-Id: I9bf686aca2da37025613a9227eb0ec69176a676f
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/6487695
> Reviewed-by: Jakob Linke <jgruber@chromium.org>
> Commit-Queue: Patrick Thier <pthier@chromium.org>
> Cr-Commit-Position: refs/heads/main@{#100006}
Fixed: 414857029
Bug: 413411337
Change-Id: I3ebd72f3b91ce5e7b603e43540cd4e10090c1868
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/6513551
Reviewed-by: Camillo Bruni <cbruni@chromium.org>
Commit-Queue: Patrick Thier <pthier@chromium.org>
Cr-Commit-Position: refs/heads/main@{#100088}
---
src/runtime/runtime-regexp.cc | 95 ++++++++++++++++++++++++++++++++---
test/mjsunit/regexp-global.js | 24 +++++++++
2 files changed, 112 insertions(+), 7 deletions(-)
diff --git a/src/runtime/runtime-regexp.cc b/src/runtime/runtime-regexp.cc
index 5689cdc8d1db..e5cee437d1c3 100644
--- a/v8/src/runtime/runtime-regexp.cc
+++ b/v8/src/runtime/runtime-regexp.cc
@@ -3,7 +3,9 @@
// found in the LICENSE file.
#include <functional>
+#include <type_traits>
+#include "hwy/highway.h"
#include "src/base/small-vector.h"
#include "src/base/strings.h"
#include "src/common/message-template.h"
@@ -2164,13 +2166,92 @@ inline void RegExpMatchGlobalAtom_OneCharPattern(
Isolate* isolate, base::Vector<const SChar> subject, const PChar pattern,
int start_index, int* number_of_matches, int* last_match_index,
const DisallowGarbageCollection& no_gc) {
- for (int i = start_index; i < subject.length(); i++) {
- // Subtle: the valid variants are {SChar,PChar} in:
- // {uint8_t,uint8_t}, {uc16,uc16}, {uc16,uint8_t}. In the latter case,
- // we cast the uint8_t pattern to uc16 for the comparison.
- if (subject[i] != static_cast<const SChar>(pattern)) continue;
- (*number_of_matches)++;
- (*last_match_index) = i;
+ static_assert(std::is_unsigned_v<SChar>);
+ static_assert(std::is_unsigned_v<PChar>);
+ // We can utilize SIMD to check multiple characters at once.
+ // Since the pattern is a single char, we create a mask setting each lane in
+ // the vector to the pattern char.
+ // Since reductions from a vector to a general purpose register (i.e.
+ // ReduceSum in this algorithm) are expensive, we keep a count for each lane
+ // in a vector until the count could potentially overflow and only reduce to
+ // a general purpose register then. I.e. if SChar is uint8_t, we have a
+ // 16xuint8_t vector to count matches, which we reduce to an int every 255
+ // blocks.
+ namespace hw = hwy::HWY_NAMESPACE;
+ hw::ScalableTag<SChar> tag;
+ // We need a wider tag to avoid overflows on lanes when summing up submatches.
+ using WidenedTag = hw::RepartitionToWide<decltype(tag)>;
+ WidenedTag sum_tag;
+ static constexpr size_t stride = hw::Lanes(tag);
+ // Subtle: the valid variants are {SChar,PChar} in:
+ // {uint8_t,uint8_t}, {uc16,uc16}, {uc16,uint8_t}. In the latter case,
+ // we cast the uint8_t pattern to uc16 for the comparison.
+ const auto mask = hw::Set(tag, static_cast<const SChar>(pattern));
+
+ int matches = 0;
+ auto submatches = hw::Zero(tag);
+ const SChar* last_match_block = nullptr;
+ hw::Mask<decltype(tag)> last_match_vec;
+
+ const SChar* block = subject.data() + start_index;
+ const SChar* end = subject.data() + subject.length();
+
+ // ReduceSum is expensive, so we gather matches into a vector. max_count is
+ // the maximum number of matches we can count in the vector before it
+ // overflows.
+ int max_count = std::numeric_limits<SChar>::max();
+ while (block + stride * max_count <= end) {
+ for (int i = 0; i < max_count; i++, block += stride) {
+ const auto input = hw::LoadU(tag, block);
+ const auto match = input == mask;
+ // Lanes with matches have all bits set, so we subtract to increase the
+ // count by 1.
+ submatches = hw::Sub(submatches, hw::VecFromMask(tag, match));
+ if (!hw::AllFalse(tag, match)) {
+ last_match_block = block;
+ last_match_vec = match;
+ }
+ }
+ // SumsOf2 promotes the sum of 2 consecutive lanes into a wider lane.
+ auto promoted_submatches = hw::SumsOf2(submatches);
+ // Wider lane sums can be reduces without overflows.
+ matches += hw::ReduceSum(sum_tag, promoted_submatches);
+ submatches = hw::Zero(tag);
+ }
+
+ // For blocks shorter than stride * max_count, lanes in submatches can't
+ // overflow.
+ DCHECK_LT(end - block, stride * max_count);
+ for (; block + stride <= end; block += stride) {
+ const auto input = hw::LoadU(tag, block);
+ const auto match = input == mask;
+ submatches = hw::Sub(submatches, hw::VecFromMask(tag, match));
+ if (!hw::AllFalse(tag, match)) {
+ last_match_block = block;
+ last_match_vec = match;
+ }
+ }
+ auto promoted_submatches = hw::SumsOf2(submatches);
+ matches += hw::ReduceSum(sum_tag, promoted_submatches);
+
+ // Handle remaining chars.
+ // last_match_block already contains the last match position, so use a special
+ // vector with lane 0 set to extract the last_match_index later.
+ const auto scalar_last_match_vec = hw::FirstN(tag, 1);
+ for (SChar c = *block; block < end; c = *(++block)) {
+ if (c != static_cast<const SChar>(pattern)) continue;
+ matches++;
+ last_match_block = block;
+ last_match_vec = scalar_last_match_vec;
+ }
+
+ // Store results.
+ *number_of_matches += matches;
+ if (last_match_block != nullptr) {
+ DCHECK(!hw::AllFalse(tag, last_match_vec));
+ *last_match_index = static_cast<int>(
+ last_match_block + hw::FindKnownLastTrue(tag, last_match_vec) -
+ subject.data());
}
}