File 0001-Sanitise-notification-HTML.patch of Package plasma5-workspace.7768

From 5bc696b5abcdb460c1017592e80b2d7f6ed3107c Mon Sep 17 00:00:00 2001
From: David Edmundson <kde@davidedmundson.co.uk>
Date: Wed, 31 Jan 2018 14:28:17 +0000
Subject: [PATCH] Sanitise notification HTML

Summary:
Qt labels support a HTML subset, using a completely internal parser in
QTextDocument.

The Notification spec support an even smaller subset of notification
elements.

It's important to strip out irrelevant tags that could potentially load
remote information without user interaction, such as img
src or even <b style="background:url...

But we want to maintain the basic rich text formatting of bold and
italics and links.

This parser iterates reads the XML, copying only permissable tags and
attributes.

A future obvious improvement would be to merge the original regular
expressions into this stream parser, but I'm trying to minimise
breakages to get this into 5.12.

Test Plan:
Moved code into it's own class for easy unit testing
Tried a bunch of things, including what the old regexes were doing

Also ran notify send with a few options to make sure things worked

Reviewers: #plasma, fvogt

Reviewed By: fvogt

Subscribers: aacid, fvogt, plasma-devel

Tags: #plasma

Differential Revision: https://phabricator.kde.org/D10188
---
 dataengines/notifications/CMakeLists.txt           |   8 ++
 dataengines/notifications/notifications_test.cpp   |  68 +++++++++++++
 .../notifications/notificationsanitizer.cpp        | 106 +++++++++++++++++++++
 dataengines/notifications/notificationsanitizer.h  |  35 +++++++
 dataengines/notifications/notificationsengine.cpp  |  19 +---
 5 files changed, 219 insertions(+), 17 deletions(-)
 create mode 100644 dataengines/notifications/notifications_test.cpp
 create mode 100644 dataengines/notifications/notificationsanitizer.cpp
 create mode 100644 dataengines/notifications/notificationsanitizer.h

diff --git a/dataengines/notifications/CMakeLists.txt b/dataengines/notifications/CMakeLists.txt
index 4fd3ee76..ad6e2120 100644
--- a/dataengines/notifications/CMakeLists.txt
+++ b/dataengines/notifications/CMakeLists.txt
@@ -4,6 +4,7 @@ set(notifications_engine_SRCS
     notificationsengine.cpp
     notificationservice.cpp
     notificationaction.cpp
+    notificationsanitizer.cpp
 )
 
 qt5_add_dbus_adaptor( notifications_engine_SRCS org.freedesktop.Notifications.xml notificationsengine.h  NotificationsEngine )
@@ -26,3 +27,10 @@ kcoreaddons_desktop_to_json(plasma_engine_notifications plasma-dataengine-notifi
 install(TARGETS plasma_engine_notifications DESTINATION ${KDE_INSTALL_PLUGINDIR}/plasma/dataengine)
 install(FILES plasma-dataengine-notifications.desktop DESTINATION ${KDE_INSTALL_KSERVICES5DIR} )
 install(FILES notifications.operations DESTINATION ${PLASMA_DATA_INSTALL_DIR}/services)
+
+
+#unit test
+
+add_executable(notification_test  notificationsanitizer.cpp notifications_test.cpp)
+target_link_libraries(notification_test Qt5::Test Qt5::Core)
+ecm_mark_as_test(notification_test)
diff --git a/dataengines/notifications/notifications_test.cpp b/dataengines/notifications/notifications_test.cpp
new file mode 100644
index 00000000..58399746
--- /dev/null
+++ b/dataengines/notifications/notifications_test.cpp
@@ -0,0 +1,68 @@
+#include <QtTest>
+#include <QObject>
+#include <QDebug>
+#include "notificationsanitizer.h"
+
+class NotificationTest : public QObject
+{
+    Q_OBJECT
+public:
+    NotificationTest() {}
+private Q_SLOTS:
+    void parse_data();
+    void parse();
+};
+
+void NotificationTest::parse_data()
+{
+    QTest::addColumn<QString>("messageIn");
+    QTest::addColumn<QString>("expectedOut");
+
+    QTest::newRow("basic no HTML") << "I am a notification" << "I am a notification";
+    QTest::newRow("whitespace") << "      I am a   notification  " << "I am a notification";
+
+    QTest::newRow("basic html") << "I am <b>the</b> notification" << "I am <b>the</b> notification";
+    QTest::newRow("nested html") << "I am <i><b>the</b></i> notification" << "I am <i><b>the</b></i> notification";
+
+    QTest::newRow("no extra tags") << "I am <blink>the</blink> notification" << "I am the notification";
+    QTest::newRow("no extra attrs") << "I am <b style=\"font-weight:20\">the</b> notification" << "I am <b>the</b> notification";
+
+    QTest::newRow("newlines") << "I am\nthe\nnotification" << "I am<br/>the<br/>notification";
+    QTest::newRow("multinewlines") << "I am\n\nthe\n\n\nnotification" << "I am<br/>the<br/>notification";
+
+    QTest::newRow("amp") << "me&you" << "me&amp;you";
+    QTest::newRow("double escape") << "foo &amp; &lt;bar&gt;" << "foo &amp; &lt;bar&gt;";
+
+    QTest::newRow("quotes") << "&apos;foo&apos;" << "'foo'";//as label can't handle this normally valid entity
+
+    QTest::newRow("image normal") << "This is <img src=\"file:://foo/boo.png\" alt=\"cheese\"/> and more text" << "This is <img src=\"file:://foo/boo.png\" alt=\"cheese\"/> and more text";
+
+    //this input is technically wrong, so the output is also wrong, but QTextHtmlParser does the "right" thing
+    QTest::newRow("image normal no close") << "This is <img src=\"file:://foo/boo.png\" alt=\"cheese\"> and more text" << "This is <img src=\"file:://foo/boo.png\" alt=\"cheese\"> and more text</img>";
+
+    QTest::newRow("image remote URL") << "This is <img src=\"http://foo.com/boo.png\" alt=\"cheese\" /> and more text" << "This is <img alt=\"cheese\"/> and more text";
+
+    //more bad formatted options. To some extent actual output doesn't matter. Garbage in, garbabe out.
+    //the important thing is that it doesn't contain anything that could be parsed as the remote URL
+    QTest::newRow("image remote URL no close") << "This is <img src=\"http://foo.com/boo.png>\" alt=\"cheese\">  and more text" << "This is <img alt=\"cheese\"> and more text</img>";
+    QTest::newRow("image remote URL double open") << "This is <<img src=\"http://foo.com/boo.png>\"  and more text" << "This is ";
+    QTest::newRow("image remote URL no entitiy close") << "This is <img src=\"http://foo.com/boo.png\"  and more text" << "This is ";
+    QTest::newRow("image remote URL space in element name") << "This is < img src=\"http://foo.com/boo.png\" alt=\"cheese\" /> and more text" << "This is ";
+
+    QTest::newRow("link") << "This is a link <a href=\"http://foo.com/boo\"/> and more text" << "This is a link <a href=\"http://foo.com/boo\"/> and more text";
+}
+
+void NotificationTest::parse()
+{
+    QFETCH(QString, messageIn);
+    QFETCH(QString, expectedOut);
+
+    const QString out = NotificationSanitizer::parse(messageIn);
+    expectedOut = "<?xml version=\"1.0\"?><html>"  + expectedOut + "</html>\n";
+    QCOMPARE(out, expectedOut);
+}
+
+
+QTEST_GUILESS_MAIN(NotificationTest)
+
+#include "notifications_test.moc"
diff --git a/dataengines/notifications/notificationsanitizer.cpp b/dataengines/notifications/notificationsanitizer.cpp
new file mode 100644
index 00000000..5410132c
--- /dev/null
+++ b/dataengines/notifications/notificationsanitizer.cpp
@@ -0,0 +1,106 @@
+/*
+ *   Copyright (C) 2017 David Edmundson <davidedmundson@kde.org>
+ *
+ * This program is free software you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+*/
+
+#include "notificationsanitizer.h"
+
+#include <QXmlStreamReader>
+#include <QXmlStreamWriter>
+#include <QRegularExpression>
+#include <QDebug>
+#include <QUrl>
+
+QString NotificationSanitizer::parse(const QString &text)
+{
+    // replace all \ns with <br/>
+    QString t = text;
+
+    t.replace(QLatin1String("\n"), QStringLiteral("<br/>"));
+    // Now remove all inner whitespace (\ns are already <br/>s)
+    t = t.simplified();
+    // Finally, check if we don't have multiple <br/>s following,
+    // can happen for example when "\n       \n" is sent, this replaces
+    // all <br/>s in succsession with just one
+    t.replace(QRegularExpression(QStringLiteral("<br/>\\s*<br/>(\\s|<br/>)*")), QLatin1String("<br/>"));
+    // This fancy RegExp escapes every occurence of & since QtQuick Text will blatantly cut off
+    // text where it finds a stray ampersand.
+    // Only &{apos, quot, gt, lt, amp}; as well as &#123 character references will be allowed
+    t.replace(QRegularExpression(QStringLiteral("&(?!(?:apos|quot|[gl]t|amp);|#)")), QLatin1String("&amp;"));
+
+    QXmlStreamReader r(QStringLiteral("<html>") + t + QStringLiteral("</html>"));
+    QString result;
+    QXmlStreamWriter out(&result);
+
+    const QVector<QString> allowedTags = {"b", "i", "u", "img", "a", "html", "br"};
+
+    out.writeStartDocument();
+    while (!r.atEnd()) {
+        r.readNext();
+
+        if (r.tokenType() == QXmlStreamReader::StartElement) {
+            const QString name = r.name().toString();
+            if (!allowedTags.contains(name)) {
+                continue;
+            }
+            out.writeStartElement(name);
+            if (name == QLatin1String("img")) {
+                auto src = r.attributes().value("src").toString();
+                auto alt = r.attributes().value("alt").toString();
+
+                const QUrl url(src);
+                if (url.isLocalFile()) {
+                    out.writeAttribute(QStringLiteral("src"), src);
+                } else {
+                    //image denied for security reasons! Do not copy the image src here!
+                }
+
+                out.writeAttribute(QStringLiteral("alt"), alt);
+            }
+            if (name == QLatin1String("a")) {
+                out.writeAttribute(QStringLiteral("href"), r.attributes().value("href").toString());
+            }
+        }
+
+        if (r.tokenType() == QXmlStreamReader::EndElement) {
+            const QString name = r.name().toString();
+            if (!allowedTags.contains(name)) {
+                continue;
+            }
+            out.writeEndElement();
+        }
+
+        if (r.tokenType() == QXmlStreamReader::Characters) {
+            const auto text = r.text().toString();
+            out.writeCharacters(text); //this auto escapes chars -> HTML entities
+        }
+    }
+    out.writeEndDocument();
+
+    if (r.hasError()) {
+        qWarning() << "Notification to send to backend contains invalid XML: "
+                      << r.errorString() << "line" << r.lineNumber()
+                      << "col" << r.columnNumber();
+    }
+
+    // The Text.StyledText format handles only html3.2 stuff and &apos; is html4 stuff
+    // so we need to replace it here otherwise it will not render at all.
+    result = result.replace(QLatin1String("&apos;"), QChar('\''));
+
+
+    return result;
+}
diff --git a/dataengines/notifications/notificationsanitizer.h b/dataengines/notifications/notificationsanitizer.h
new file mode 100644
index 00000000..561a84b7
--- /dev/null
+++ b/dataengines/notifications/notificationsanitizer.h
@@ -0,0 +1,35 @@
+/*
+ *   Copyright (C) 2017 David Edmundson <davidedmundson@kde.org>
+ *
+ * This program is free software you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+*/
+
+#include <QString>
+
+namespace NotificationSanitizer
+{
+    /*
+     * This turns generic random text of either plain text of any degree of faux-HTML into HTML allowed
+     * in the notification spec namely:
+     * a, img, b, i, u  and br
+     * All other tags and attributes are stripped
+     * Whitespace is stripped and converted to <br/>
+     * Double newlines are compressed
+     *
+     * Image src is only copied when referring to a local file
+     */
+    QString parse(const QString &in);
+}
diff --git a/dataengines/notifications/notificationsengine.cpp b/dataengines/notifications/notificationsengine.cpp
index 2bc4dc25..9fbd3617 100644
--- a/dataengines/notifications/notificationsengine.cpp
+++ b/dataengines/notifications/notificationsengine.cpp
@@ -20,6 +20,7 @@
 #include "notificationsengine.h"
 #include "notificationservice.h"
 #include "notificationsadaptor.h"
+#include "notificationsanitizer.h"
 
 #include <QDebug>
 #include <KConfigGroup>
@@ -261,23 +262,7 @@ uint NotificationsEngine::Notify(const QString &app_name, uint replaces_id,
     const QString source = QStringLiteral("notification %1").arg(id);
 
     QString bodyFinal = (partOf == 0 ? body : _body);
-    // First trim whitespace from beginning and end
-    bodyFinal = bodyFinal.trimmed();
-    // Now replace all \ns with <br/>
-    bodyFinal = bodyFinal.replace(QLatin1String("\n"), QLatin1String("<br/>"));
-    // Now remove all inner whitespace (\ns are already <br/>s
-    bodyFinal = bodyFinal.simplified();
-    // Finally, check if we don't have multiple <br/>s following,
-    // can happen for example when "\n       \n" is sent, this replaces
-    // all <br/>s in succsession with just one
-    bodyFinal.replace(QRegularExpression(QStringLiteral("<br/>\\s*<br/>(\\s|<br/>)*")), QLatin1String("<br/>"));
-    // This fancy RegExp escapes every occurence of & since QtQuick Text will blatantly cut off
-    // text where it finds a stray ampersand.
-    // Only &{apos, quot, gt, lt, amp}; as well as &#123 character references will be allowed
-    bodyFinal.replace(QRegularExpression(QStringLiteral("&(?!(?:apos|quot|[gl]t|amp);|#)")), QLatin1String("&amp;"));
-    // The Text.StyledText format handles only html3.2 stuff and &apos; is html4 stuff
-    // so we need to replace it here otherwise it will not render at all.
-    bodyFinal.replace(QLatin1String("&apos;"), QChar('\''));
+    bodyFinal = NotificationSanitizer::parse(bodyFinal);
 
     Plasma::DataEngine::Data notificationData;
     notificationData.insert(QStringLiteral("id"), QString::number(id));
-- 
2.16.0

openSUSE Build Service is sponsored by