File 0001-XML_QDom-speedup-encodeText.patch of Package libqt5-qtbase.40462
From 319f422dbcf2571b7431b0e0d3c8f8bd5ff1b056 Mon Sep 17 00:00:00 2001
From: Christian Ehrlicher <ch.ehrlicher@gmx.de>
Date: Tue, 6 Aug 2024 22:39:44 +0200
Subject: [PATCH] XML/QDom: speedup encodeText()
The code copied the whole string, then replaced parts inline, at
the cost of relocating everything beyond, at each replacement.
Instead, copy character by character (in chunks where possible)
and append replacements as we skip what they replace.
Manual conflict resolution for 6.5:
- This is a manual cherry-pick. The original change was only
picked to 6.8, but the quadratic behavior is present in Qt 5, too.
- Changed Task-number to Fixes: because this is the real fix;
the QString change, 315210de916d060c044c01e53ff249d676122b1b,
was unrelated to the original QTBUG-127549.
Manual conflcit resolution for 5.15:
- Kept/re-added QTextCodec::canEncode() check
- Ported from Qt 6 to 5, to wit:
- qsizetype -> int
- QStringView::first/sliced(n) -> left/mid(n)
(these functions are clearly called in-range, so the widened
contract of the Qt 5 functions doesn't matter)
- Ported from C++17- and C++14-isms to C++11:
- replaced polymorphic lambda with a normal one (this requires
rewriting the !canEncode() branch to use QByteArray/QLatin1String
instead of QString)
- As a drive-by, corrected the indentation of the case labels to
horizontally align existing code (and follow Qt style)
Fixes: QTBUG-127549
Change-Id: I368482859ed0c4127f1eec2919183711b5488ada
Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
(cherry picked from commit 2ce08e3671b8d18b0284447e5908ce15e6e8f80f)
Reviewed-by: Qt Cherry-pick Bot <cherrypick_bot@qt-project.org>
(cherry picked from commit 225e235cf966a44af23dbe9aaaa2fd20ab6430ee)
Reviewed-by: Fabian Kosmale <fabian.kosmale@qt.io>
(cherry picked from commit 905a5bd421efff6a1d90b6140500d134d32ca745)
---
src/xml/dom/qdom.cpp | 94 ++++++++++++++++++++++++--------------------
1 file changed, 51 insertions(+), 43 deletions(-)
diff --git a/src/xml/dom/qdom.cpp b/src/xml/dom/qdom.cpp
index 45da7f961de..04b868a7e1d 100644
--- a/src/xml/dom/qdom.cpp
+++ b/src/xml/dom/qdom.cpp
@@ -3676,59 +3676,67 @@ static QString encodeText(const QString &str,
const QTextCodec *const codec = s.codec();
Q_ASSERT(codec);
#endif
- QString retval(str);
- int len = retval.length();
- int i = 0;
+ QString retval;
+ int start = 0;
+ auto appendToOutput = [&](int cur, QLatin1String replacement)
+ {
+ if (start < cur) {
+ retval.reserve(str.size() + replacement.size());
+ retval.append(QStringView(str).left(cur).mid(start));
+ }
+ // Skip over str[cur], replaced by replacement
+ start = cur + 1;
+ retval.append(replacement);
+ };
- while (i < len) {
- const QChar ati(retval.at(i));
-
- if (ati == QLatin1Char('<')) {
- retval.replace(i, 1, QLatin1String("<"));
- len += 3;
- i += 4;
- } else if (encodeQuotes && (ati == QLatin1Char('"'))) {
- retval.replace(i, 1, QLatin1String("""));
- len += 5;
- i += 6;
- } else if (ati == QLatin1Char('&')) {
- retval.replace(i, 1, QLatin1String("&"));
- len += 4;
- i += 5;
- } else if (ati == QLatin1Char('>') && i >= 2 && retval[i - 1] == QLatin1Char(']') && retval[i - 2] == QLatin1Char(']')) {
- retval.replace(i, 1, QLatin1String(">"));
- len += 3;
- i += 4;
- } else if (performAVN &&
- (ati == QChar(0xA) ||
- ati == QChar(0xD) ||
- ati == QChar(0x9))) {
- const QString replacement(QLatin1String("&#x") + QString::number(ati.unicode(), 16) + QLatin1Char(';'));
- retval.replace(i, 1, replacement);
- i += replacement.length();
- len += replacement.length() - 1;
- } else if (encodeEOLs && ati == QChar(0xD)) {
- retval.replace(i, 1, QLatin1String("
")); // Replace a single 0xD with a ref for 0xD
- len += 4;
- i += 5;
- } else {
+ const int len = str.size();
+ for (int cur = 0; cur < len; ++cur) {
+ switch (const char16_t ati = str[cur].unicode()) {
+ case u'<':
+ appendToOutput(cur, QLatin1String("<"));
+ break;
+ case u'"':
+ if (encodeQuotes)
+ appendToOutput(cur, QLatin1String("""));
+ break;
+ case u'&':
+ appendToOutput(cur, QLatin1String("&"));
+ break;
+ case u'>':
+ if (cur >= 2 && str[cur - 1] == u']' && str[cur - 2] == u']')
+ appendToOutput(cur, QLatin1String(">"));
+ break;
+ case u'\r':
+ if (performAVN || encodeEOLs)
+ appendToOutput(cur, QLatin1String("
")); // \r == 0x0d
+ break;
+ case u'\n':
+ if (performAVN)
+ appendToOutput(cur, QLatin1String("
")); // \n == 0x0a
+ break;
+ case u'\t':
+ if (performAVN)
+ appendToOutput(cur, QLatin1String("	")); // \t == 0x09
+ break;
+ default:
#if QT_CONFIG(textcodec)
if(codec->canEncode(ati))
- ++i;
+ ; // continue
else
#endif
{
// We have to use a character reference to get it through.
- const ushort codepoint(ati.unicode());
- const QString replacement(QLatin1String("&#x") + QString::number(codepoint, 16) + QLatin1Char(';'));
- retval.replace(i, 1, replacement);
- i += replacement.length();
- len += replacement.length() - 1;
+ const QByteArray replacement = "&#x" + QByteArray::number(uint{ati}, 16) + ';';
+ appendToOutput(cur, QLatin1String{replacement});
}
+ break;
}
}
-
- return retval;
+ if (start > 0) {
+ retval.append(QStringView(str).left(len).mid(start));
+ return retval;
+ }
+ return str;
}
void QDomAttrPrivate::save(QTextStream& s, int, int) const
--
GitLab