Cherrypick Improve Text Handling to 23.04

Improve the handling of text both when sending and receiving.

The main feature is to fix the linked bug (and a host of others that are unreported but similar) which is caused by the fact that we don't properly clean html. This mr does that as per the matrix spec https://spec.matrix.org/v1.5/client-server-api/#mroommessage-msgtypes. So any disallowed tags or attributes are removed and it does the special handling for certain attributes.

Additionally the functions are also designed to cover any other text formatting required, particularly fro received strings.

The receive side is covered by 2 functions `handleRecieveRichText` and `handleRecievePlainText`. The rich/plain in the function name refers to the output type not the input type (both can take plain and rich input), so `handleRecieveRichText` is called to get a string suitable to go in a rich text control and `handleRecievePlainText` for a plain control.

The functions also handle the following some of which was previously handled by `eventToString` in `NeoChatRoom`:
- Strip and reply from the string
- Format any user mentions
- Linkify links in plain strings
- Handle mxc urls in rich text (uses the new `room->makeMediaUrl` functionality from libQuotient)
- `handleRecievePlainText` also deals with markup making `NeoChatRoom->subtitle` redundant

There is also an extensive test suite which defines the behaviour and the best way to review this is probably to look at the tests and decide whether you agree with the expected output given the inputs and/or if there is any missing behaviour.

The final aim especially with the test suite is to give us a framework to make further updates in the future easier and hopefully prevent a new feature breaking old behaviour with the tests.

BUG: 463932 \
BUG: 466330 \
BUG: 466930


(cherry picked from commit f6ba4f2ecd)
This commit is contained in:
James Graham
2023-03-13 18:18:17 +00:00
parent 498cfedfea
commit da1c664f94
16 changed files with 1041 additions and 148 deletions

View File

@@ -8,3 +8,9 @@ ecm_add_test(
LINK_LIBRARIES neochat Qt::Test Quotient
TEST_NAME neochatroomtest
)
ecm_add_test(
texthandlertest.cpp
LINK_LIBRARIES neochat Qt::Test
TEST_NAME texthandlertest
)

View File

@@ -136,7 +136,7 @@ void NeoChatRoomTest::initTestCase()
void NeoChatRoomTest::subtitleTextTest()
{
QCOMPARE(room->timelineSize(), 1);
QCOMPARE(room->subtitleText(), QStringLiteral("@example:example.org: This is an example text message"));
QCOMPARE(room->lastEventToString(), QStringLiteral("@example:example.org: This is an example text message"));
}
void NeoChatRoomTest::eventTest()

View File

@@ -0,0 +1,482 @@
// SPDX-FileCopyrightText: 2023 James Graham <james.h.graham@protonmail.com>
// SPDX-License-Identifier: GPL-2.0-only OR GPL-3.0-only OR LicenseRef-KDE-Accepted-GPL
#include <QObject>
#include <QTest>
#include "texthandler.h"
#include <connection.h>
#include <quotient_common.h>
#include <syncdata.h>
using namespace Quotient;
class TestRoom : public NeoChatRoom
{
public:
using NeoChatRoom::NeoChatRoom;
void update(SyncRoomData &&data, bool fromCache = false)
{
Room::updateData(std::move(data), fromCache);
}
};
class TextHandlerTest : public QObject
{
Q_OBJECT
private:
Connection *connection = nullptr;
TestRoom *room = nullptr;
private Q_SLOTS:
void initTestCase();
void allowedAttributes();
void stripDisallowedTags();
void stripDisallowedAttributes();
void emptyCodeTags();
void sendSimpleStringCase();
void sendSingleParaMarkup();
void sendMultipleSectionMarkup();
void sendBadLinks();
void sendEscapeCode();
void sendCodeClass();
void receiveStripReply();
void receivePlainTextIn();
void recieveRichInPlainOut();
void receivePlainStripHtml();
void receivePlainStripMarkup();
void receiveStripNewlines();
void receiveRichUserPill();
void receiveRichStrikethrough();
void receiveRichtextIn();
void receiveRichMxcUrl();
void receiveRichPlainUrl();
};
#ifdef QUOTIENT_07
void TextHandlerTest::initTestCase()
{
connection = Connection::makeMockConnection(QStringLiteral("@bob:kde.org"));
room = new TestRoom(connection, QStringLiteral("#myroom:kde.org"), JoinState::Join);
const auto json = QJsonDocument::fromJson(R"EVENT({
"account_data": {
"events": [
{
"content": {
"tags": {
"u.work": {
"order": 0.9
}
}
},
"type": "m.tag"
},
{
"content": {
"custom_config_key": "custom_config_value"
},
"type": "org.example.custom.room.config"
}
]
},
"ephemeral": {
"events": [
{
"content": {
"user_ids": [
"@alice:matrix.org",
"@bob:example.com"
]
},
"room_id": "!jEsUZKDJdhlrceRyVU:example.org",
"type": "m.typing"
}
]
},
"state": {
"events": [
{
"content": {
"avatar_url": "mxc://example.org/SEsfnsuifSDFSSEF",
"displayname": "Alice Margatroid",
"membership": "join",
"reason": "Looking for support"
},
"event_id": "$143273582443PhrSn:example.org",
"origin_server_ts": 1432735824653,
"room_id": "!jEsUZKDJdhlrceRyVU:example.org",
"sender": "@example:example.org",
"state_key": "@alice:example.org",
"type": "m.room.member",
"unsigned": {
"age": 1234
}
}
]
},
"summary": {
"m.heroes": [
"@alice:example.com",
"@bob:example.com"
],
"m.invited_member_count": 0,
"m.joined_member_count": 2
},
"timeline": {
"events": [
{
"content": {
"body": "This is an **example** text message",
"format": "org.matrix.custom.html",
"formatted_body": "<b>This is an example text message</b>",
"msgtype": "m.text"
},
"event_id": "$143273582443PhrSn:example.org",
"origin_server_ts": 1432735824654,
"room_id": "!jEsUZKDJdhlrceRyVU:example.org",
"sender": "@example:example.org",
"type": "m.room.message",
"unsigned": {
"age": 1235
}
}
],
"limited": true,
"prev_batch": "t34-23535_0_0"
}
})EVENT");
SyncRoomData roomData(QStringLiteral("@bob:kde.org"), JoinState::Join, json.object());
room->update(std::move(roomData));
}
#endif
void TextHandlerTest::allowedAttributes()
{
const QString testInputString = QStringLiteral("<p><span data-mx-spoiler><font color=#FFFFFF>Test</font><span></p>");
const QString testOutputString = QStringLiteral("<p><span data-mx-spoiler><font color=#FFFFFF>Test</font><span></p>");
TextHandler testTextHandler;
testTextHandler.setData(testInputString);
QCOMPARE(testTextHandler.handleSendText(), testOutputString);
QCOMPARE(testTextHandler.handleRecieveRichText(), testOutputString);
}
void TextHandlerTest::stripDisallowedTags()
{
const QString testInputString = QStringLiteral("<p>Allowed</p> <span>Allowed</span> <body>Disallowed</body>");
const QString testOutputString = QStringLiteral("<p>Allowed</p> <span>Allowed</span> Disallowed");
TextHandler testTextHandler;
testTextHandler.setData(testInputString);
QCOMPARE(testTextHandler.handleSendText(), testOutputString);
QCOMPARE(testTextHandler.handleRecieveRichText(), testOutputString);
}
void TextHandlerTest::stripDisallowedAttributes()
{
const QString testInputString = QStringLiteral("<p style=\"font-size:50px;\" color=#FFFFFF>Test</p>");
const QString testOutputString = QStringLiteral("<p>Test</p>");
TextHandler testTextHandler;
testTextHandler.setData(testInputString);
QCOMPARE(testTextHandler.handleSendText(), testOutputString);
QCOMPARE(testTextHandler.handleRecieveRichText(), testOutputString);
}
/**
* Make sure that empty code tags are handled.
* (this was a bug during development hence the test)
*/
void TextHandlerTest::emptyCodeTags()
{
const QString testInputString = QStringLiteral("<pre><code></code></pre>");
const QString testOutputString = QStringLiteral("<pre><code></code></pre>");
TextHandler testTextHandler;
testTextHandler.setData(testInputString);
QCOMPARE(testTextHandler.handleSendText(), testOutputString);
QCOMPARE(testTextHandler.handleRecieveRichText(), testOutputString);
}
void TextHandlerTest::sendSimpleStringCase()
{
const QString testInputString = QStringLiteral("This data should just be put in a paragraph.");
const QString testOutputString = QStringLiteral("<p>This data should just be put in a paragraph.</p>");
TextHandler testTextHandler;
testTextHandler.setData(testInputString);
QCOMPARE(testTextHandler.handleSendText(), testOutputString);
}
void TextHandlerTest::sendSingleParaMarkup()
{
const QString testInputString = QStringLiteral(
"Text para with **bold**, *italic*, [link](https://kde.org), ![image](mxc://kde.org/aebd3ffd40503e1ef0525bf8f0d60282fec6183e), `inline code`.");
const QString testOutputString = QStringLiteral(
"<p>Text para with <strong>bold</strong>, <em>italic</em>, <a href=\"https://kde.org\">link</a>, <img "
"src=\"mxc://kde.org/aebd3ffd40503e1ef0525bf8f0d60282fec6183e\" alt=\"image\">, <code>inline code</code>.</p>");
TextHandler testTextHandler;
testTextHandler.setData(testInputString);
QCOMPARE(testTextHandler.handleSendText(), testOutputString);
}
void TextHandlerTest::sendMultipleSectionMarkup()
{
const QString testInputString =
QStringLiteral("Text para\n> blockquote\n* List 1\n* List 2\n1. one\n2. two\n# Heading 1\n## Heading 2\nhorizontal rule\n\n---\n```\ncodeblock\n```");
const QString testOutputString = QStringLiteral(
"<p>Text para</p>\n<blockquote>\n<p>blockquote</p>\n</blockquote>\n<ul>\n<li>List 1</li>\n<li>List "
"2</li>\n</ul>\n<ol>\n<li>one</li>\n<li>two</li>\n</ol>\n<h1>Heading 1</h1>\n<h2>Heading 2</h2>\n<p>horizontal "
"rule</p>\n<hr>\n<pre><code>codeblock\n</code></pre>");
TextHandler testTextHandler;
testTextHandler.setData(testInputString);
QCOMPARE(testTextHandler.handleSendText(), testOutputString);
}
void TextHandlerTest::sendBadLinks()
{
const QString testInputString = QStringLiteral("[link](kde.org), ![image](https://kde.org/aebd3ffd40503e1ef0525bf8f0d60282fec6183e)");
const QString testOutputString = QStringLiteral("<p><a>link</a>, <img alt=\"image\"></p>");
TextHandler testTextHandler;
testTextHandler.setData(testInputString);
QCOMPARE(testTextHandler.handleSendText(), testOutputString);
}
/**
* All text between code tags is treated as plain so it should get escaped.
*/
void TextHandlerTest::sendEscapeCode()
{
const QString testInputString = QStringLiteral("```\n<p>Test <span style=\"font-size:50px;\">some</span> code</p>\n```");
const QString testOutputString =
QStringLiteral("<pre><code>&lt;p&gt;Test &lt;span style=&quot;font-size:50px;&quot;&gt;some&lt;/span&gt; code&lt;/p&gt;\n</code></pre>");
TextHandler testTextHandler;
testTextHandler.setData(testInputString);
QCOMPARE(testTextHandler.handleSendText(), testOutputString);
}
void TextHandlerTest::sendCodeClass()
{
const QString testInputString = QStringLiteral("```html\nsome code\n```\n<pre><code class=\"code-underline\">some more code</code></pre>");
const QString testOutputString = QStringLiteral("<pre><code class=\"language-html\">some code\n</code></pre>\n<pre><code>some more code</code></pre>");
TextHandler testTextHandler;
testTextHandler.setData(testInputString);
QCOMPARE(testTextHandler.handleSendText(), testOutputString);
}
void TextHandlerTest::receiveStripReply()
{
const QString testInputString = QStringLiteral(
"<mx-reply><blockquote><a href=\"https://matrix.to/#/!somewhere:example.org/$event:example.org\">In reply to</a><a "
"href=\"https://matrix.to/#/@alice:example.org\">@alice:example.org</a><br />Message replied to.</blockquote></mx-reply>Reply message.");
const QString testOutputString = QStringLiteral("Reply message.");
TextHandler testTextHandler;
testTextHandler.setData(testInputString);
QCOMPARE(testTextHandler.handleRecieveRichText(), testOutputString);
QCOMPARE(testTextHandler.handleRecievePlainText(), testOutputString);
}
void TextHandlerTest::recieveRichInPlainOut()
{
const QString testInputString = QStringLiteral("a &amp; b");
const QString testOutputString = QStringLiteral("a & b");
TextHandler testTextHandler;
testTextHandler.setData(testInputString);
QCOMPARE(testTextHandler.handleRecievePlainText(), testOutputString);
}
void TextHandlerTest::receivePlainTextIn()
{
const QString testInputString = QStringLiteral("<plain text in tag bracket>\nTest link https://kde.org.");
const QString testOutputStringRich = QStringLiteral("&lt;plain text in tag bracket&gt;<br>Test link <a href=\"https://kde.org\">https://kde.org</a>.");
QString testOutputStringPlain = QStringLiteral("<plain text in tag bracket>\nTest link https://kde.org.");
TextHandler testTextHandler;
testTextHandler.setData(testInputString);
QCOMPARE(testTextHandler.handleRecieveRichText(Qt::PlainText), testOutputStringRich);
QCOMPARE(testTextHandler.handleRecievePlainText(), testOutputStringPlain);
}
void TextHandlerTest::receiveStripNewlines()
{
const QString testInputStringPlain = QStringLiteral("Test\nmany\nnew\nlines.");
const QString testInputStringRich = QStringLiteral("Test<br>many<br />new<br>lines.");
const QString testOutputString = QStringLiteral("Test many new lines.");
TextHandler testTextHandler;
testTextHandler.setData(testInputStringPlain);
QCOMPARE(testTextHandler.handleRecievePlainText(Qt::PlainText, true), testOutputString);
QCOMPARE(testTextHandler.handleRecieveRichText(Qt::PlainText, nullptr, nullptr, true), testOutputString);
testTextHandler.setData(testInputStringRich);
QCOMPARE(testTextHandler.handleRecievePlainText(Qt::RichText, true), testOutputString);
QCOMPARE(testTextHandler.handleRecieveRichText(Qt::RichText, nullptr, nullptr, true), testOutputString);
}
/**
* For a plain text output of a received string all html is stripped except for
* code which is unescaped if it's html.
*/
void TextHandlerTest::receivePlainStripHtml()
{
const QString testInputString = QStringLiteral("<p>Test</p> <pre><code>Some code <strong>with tags</strong></code></pre>");
const QString testOutputString = QStringLiteral("Test Some code <strong>with tags</strong>");
TextHandler testTextHandler;
testTextHandler.setData(testInputString);
QCOMPARE(testTextHandler.handleRecievePlainText(Qt::RichText), testOutputString);
}
void TextHandlerTest::receivePlainStripMarkup()
{
const QString testInputString = QStringLiteral("**bold** `<p>inline code</p>` *italic*");
const QString testOutputString = QStringLiteral("bold <p>inline code</p> italic");
TextHandler testTextHandler;
testTextHandler.setData(testInputString);
QCOMPARE(testTextHandler.handleRecievePlainText(), testOutputString);
}
void TextHandlerTest::receiveRichUserPill()
{
const QString testInputString = QStringLiteral("<p><a href=\"https://matrix.to/#/@alice:example.org\">@alice:example.org</a></p>");
const QString testOutputString = QStringLiteral("<p><b><a href=\"https://matrix.to/#/@alice:example.org\">@alice:example.org</a></b></p>");
TextHandler testTextHandler;
testTextHandler.setData(testInputString);
QCOMPARE(testTextHandler.handleRecieveRichText(), testOutputString);
}
void TextHandlerTest::receiveRichStrikethrough()
{
const QString testInputString = QStringLiteral("<p><del>Test</del></p>");
const QString testOutputString = QStringLiteral("<p><s>Test</s></p>");
TextHandler testTextHandler;
testTextHandler.setData(testInputString);
QCOMPARE(testTextHandler.handleRecieveRichText(), testOutputString);
}
void TextHandlerTest::receiveRichtextIn()
{
const QString testInputString = QStringLiteral("<p>Test</p> <pre><code>Some code <strong>with tags</strong></code></pre>");
const QString testOutputString = QStringLiteral("<p>Test</p> <pre><code>Some code &lt;strong&gt;with tags&lt;/strong&gt;</code></pre>");
TextHandler testTextHandler;
testTextHandler.setData(testInputString);
QCOMPARE(testTextHandler.handleRecieveRichText(), testOutputString);
}
#ifdef QUOTIENT_07
void TextHandlerTest::receiveRichMxcUrl()
{
const QString testInputString = QStringLiteral(
"<img src=\"mxc://kde.org/aebd3ffd40503e1ef0525bf8f0d60282fec6183e\" alt=\"image\"><img src=\"mxc://kde.org/34c3464b3a1bd7f55af2d559e07d2c773c430e73\" "
"alt=\"image\">");
const QString testOutputString = QStringLiteral(
"<img "
"src=\"mxc://kde.org/aebd3ffd40503e1ef0525bf8f0d60282fec6183e?user_id=@bob:kde.org&room_id=%23myroom:kde.org&event_id=$143273582443PhrSn:example.org\" "
"alt=\"image\"><img "
"src=\"mxc://kde.org/34c3464b3a1bd7f55af2d559e07d2c773c430e73?user_id=@bob:kde.org&room_id=%23myroom:kde.org&event_id=$143273582443PhrSn:example.org\" "
"alt=\"image\">");
TextHandler testTextHandler;
testTextHandler.setData(testInputString);
QCOMPARE(testTextHandler.handleRecieveRichText(Qt::RichText, room, room->messageEvents().back().get()), testOutputString);
}
#endif
/**
* For when your rich input string has a plain text url left in.
*
* This test is to show that a url that is already rich will be left alone but a
* plain one will be linkified.
*/
void TextHandlerTest::receiveRichPlainUrl()
{
// This is an actual link that caused trouble which is why it's so long. Keeping
// so we can confirm consistent behaviour for complex urls.
const QString testInputStringLink1 = QStringLiteral(
"https://matrix.to/#/!RvzunyTWZGfNxJVQqv:matrix.org/$-9TJVTh5PvW6MvIhFDwteiyLBVGriinueO5eeIazQS8?via=libera.chat&amp;via=matrix.org&amp;via=fedora.im "
"<a "
"href=\"https://matrix.to/#/!RvzunyTWZGfNxJVQqv:matrix.org/"
"$-9TJVTh5PvW6MvIhFDwteiyLBVGriinueO5eeIazQS8?via=libera.chat&amp;via=matrix.org&amp;via=fedora.im\">Link already rich</a>");
const QString testOutputStringLink1 = QStringLiteral(
"<a "
"href=\"https://matrix.to/#/!RvzunyTWZGfNxJVQqv:matrix.org/"
"$-9TJVTh5PvW6MvIhFDwteiyLBVGriinueO5eeIazQS8?via=libera.chat&amp;via=matrix.org&amp;via=fedora.im\">https://matrix.to/#/"
"!RvzunyTWZGfNxJVQqv:matrix.org/$-9TJVTh5PvW6MvIhFDwteiyLBVGriinueO5eeIazQS8?via=libera.chat&amp;via=matrix.org&amp;via=fedora.im</a> <a "
"href=\"https://matrix.to/#/!RvzunyTWZGfNxJVQqv:matrix.org/"
"$-9TJVTh5PvW6MvIhFDwteiyLBVGriinueO5eeIazQS8?via=libera.chat&amp;via=matrix.org&amp;via=fedora.im\">Link already rich</a>");
// Another real case. The linkification wasn't handling it when a single link
// contains what looks like and email. It was been broken into 3 but needs to
// be just single link.
const QString testInputStringLink2 = QStringLiteral("https://lore.kernel.org/lkml/CAHk-=wio46vC4t6xXD-sFqjoPwFm_u515jm3suzmkGxQTeA1_A@mail.gmail.com/");
const QString testOutputStringLink2 = QStringLiteral(
"<a "
"href=\"https://lore.kernel.org/lkml/CAHk-=wio46vC4t6xXD-sFqjoPwFm_u515jm3suzmkGxQTeA1_A@mail.gmail.com/\">https://lore.kernel.org/lkml/"
"CAHk-=wio46vC4t6xXD-sFqjoPwFm_u515jm3suzmkGxQTeA1_A@mail.gmail.com/</a>");
QString testInputStringEmail = QStringLiteral(R"(email@example.com <a href="mailto:email@example.com">Link already rich</a>)");
QString testOutputStringEmail =
QStringLiteral(R"(<a href="mailto:email@example.com">email@example.com</a> <a href="mailto:email@example.com">Link already rich</a>)");
QString testInputStringMxId = QStringLiteral("@user:kde.org <a href=\"https://matrix.to/#/@user:kde.org\">Link already rich</a>");
QString testOutputStringMxId = QStringLiteral(
"<b><a href=\"https://matrix.to/#/@user:kde.org\">@user:kde.org</a></b> <b><a href=\"https://matrix.to/#/@user:kde.org\">Link already rich</a></b>");
TextHandler testTextHandler;
testTextHandler.setData(testInputStringLink1);
QCOMPARE(testTextHandler.handleRecieveRichText(Qt::RichText), testOutputStringLink1);
testTextHandler.setData(testInputStringLink2);
QCOMPARE(testTextHandler.handleRecieveRichText(Qt::RichText), testOutputStringLink2);
testTextHandler.setData(testInputStringEmail);
QCOMPARE(testTextHandler.handleRecieveRichText(Qt::RichText), testOutputStringEmail);
testTextHandler.setData(testInputStringMxId);
QCOMPARE(testTextHandler.handleRecieveRichText(Qt::RichText), testOutputStringMxId);
}
QTEST_MAIN(TextHandlerTest)
#include "texthandlertest.moc"

View File

@@ -24,7 +24,6 @@ add_library(neochat STATIC
models/publicroomlistmodel.cpp
models/userdirectorylistmodel.cpp
models/keywordnotificationrulemodel.cpp
utils.cpp
notificationsmanager.cpp
models/sortfilterroomlistmodel.cpp
chatdocumenthandler.cpp
@@ -47,6 +46,7 @@ add_library(neochat STATIC
models/statemodel.cpp
filetransferpseudojob.cpp
models/searchmodel.cpp
texthandler.cpp
)
add_executable(neochat-app

View File

@@ -20,25 +20,10 @@
#include "neochatroom.h"
#include "neochatuser.h"
#include "roommanager.h"
#include "texthandler.h"
using namespace Quotient;
QString markdownToHTML(const QString &markdown)
{
const auto str = markdown.toUtf8();
char *tmp_buf = cmark_markdown_to_html(str.constData(), str.size(), CMARK_OPT_HARDBREAKS | CMARK_OPT_UNSAFE);
const std::string html(tmp_buf);
free(tmp_buf);
auto result = QString::fromStdString(html).trimmed();
result.replace("<!-- raw HTML omitted -->", "");
return result;
}
ActionsHandler::ActionsHandler(QObject *parent)
: QObject(parent)
{
@@ -169,7 +154,10 @@ void ActionsHandler::handleMessage(const QString &text, QString handledText, con
}
handledText = CustomEmojiModel::instance().preprocessText(handledText);
handledText = markdownToHTML(handledText);
TextHandler textHandler;
textHandler.setData(handledText);
handledText = textHandler.handleSendText();
if (handledText.count("<p>") == 1 && handledText.count("</p>") == 1) {
handledText.remove("<p>");
handledText.remove("</p>");

View File

@@ -50,5 +50,3 @@ private:
QString handleMentions(QString handledText, const bool &isEdit = false);
void handleMessage(const QString &text, QString handledText, const bool &isEdit = false);
};
QString markdownToHTML(const QString &markdown);

View File

@@ -27,7 +27,6 @@
#include <KLocalizedString>
#include "neochatuser.h"
#include "utils.h"
using namespace Quotient;

View File

@@ -417,7 +417,7 @@ QVariant RoomListModel::data(const QModelIndex &index, int role) const
return m_categoryVisibility.value(data(index, CategoryRole).toInt(), true);
}
if (role == SubtitleTextRole) {
return room->subtitleText();
return room->lastEventToString(Qt::PlainText, true);
}
if (role == AvatarImageRole) {
return room->avatar(128);

View File

@@ -47,7 +47,7 @@
#endif
#include "filetransferpseudojob.h"
#include "stickerevent.h"
#include "utils.h"
#include "texthandler.h"
#ifndef Q_OS_ANDROID
#include <KIO/Job>
@@ -257,10 +257,11 @@ bool NeoChatRoom::lastEventIsSpoiler() const
return false;
}
QString NeoChatRoom::lastEventToString() const
QString NeoChatRoom::lastEventToString(Qt::TextFormat format, bool stripNewlines) const
{
if (auto event = lastEvent()) {
return roomMembername(event->senderId()) + (event->isStateEvent() ? " " : ": ") + eventToString(*event);
return roomMembername(event->senderId()) + (event->isStateEvent() ? QLatin1String(" ") : QLatin1String(": "))
+ eventToString(*event, format, stripNewlines);
}
return QLatin1String("");
}
@@ -329,45 +330,6 @@ QDateTime NeoChatRoom::lastActiveTime()
return messageEvents().rbegin()->get()->originTimestamp();
}
QString NeoChatRoom::subtitleText()
{
static const QRegularExpression blockquote("(\r\n\t|\n|\r\t|)> ");
static const QRegularExpression heading("(\r\n\t|\n|\r\t|)\\#{1,6} ");
static const QRegularExpression newlines("(\r\n\t|\n|\r\t|\r\n)");
static const QRegularExpression bold1("(\\*\\*|__)(?=\\S)([^\\r]*\\S)\\1");
static const QRegularExpression bold2("(\\*|_)(?=\\S)([^\\r]*\\S)\\1");
static const QRegularExpression strike1("~~(.*)~~");
static const QRegularExpression strike2("~(.*)~");
static const QRegularExpression del("<del>(.*)</del>");
static const QRegularExpression multileLineCode("```([^```]+)```");
static const QRegularExpression singleLinecode("`([^`]+)`");
QString subtitle = lastEventToString().size() == 0 ? topic() : lastEventToString();
subtitle
// replace blockquote, i.e. '> text'
.replace(blockquote, " ")
// replace headings, i.e. "# text"
.replace(heading, " ")
// replace newlines
.replace(newlines, " ")
// replace '**text**' and '__text__'
.replace(bold1, "\\2")
// replace '*text*' and '_text_'
.replace(bold2, "\\2")
// replace '~~text~~'
.replace(strike1, "\\1")
// replace '~text~'
.replace(strike2, "\\1")
// replace '<del>text</del>'
.replace(del, "\\1")
// replace '```code```'
.replace(multileLineCode, "\\1")
// replace '`code`'
.replace(singleLinecode, "\\1");
return subtitle.size() > 0 ? subtitle : QStringLiteral(" ");
}
int NeoChatRoom::savedTopVisibleIndex() const
{
return firstDisplayedMarker() == historyEdge() ? 0 : int(firstDisplayedMarker() - messageEvents().rbegin());
@@ -451,7 +413,7 @@ QString NeoChatRoom::avatarMediaId() const
return {};
}
QString NeoChatRoom::eventToString(const RoomEvent &evt, Qt::TextFormat format, bool removeReply) const
QString NeoChatRoom::eventToString(const RoomEvent &evt, Qt::TextFormat format, bool stripNewlines) const
{
const bool prettyPrint = (format == Qt::RichText);
@@ -462,53 +424,43 @@ QString NeoChatRoom::eventToString(const RoomEvent &evt, Qt::TextFormat format,
return visit(
#endif
evt,
[this, prettyPrint, removeReply](const RoomMessageEvent &e) {
[this, format, stripNewlines](const RoomMessageEvent &e) {
using namespace MessageEventContent;
// 1. prettyPrint/HTML
if (prettyPrint && e.hasTextContent() && e.mimeType().name() != "text/plain") {
auto htmlBody = static_cast<const TextContent *>(e.content())->body;
if (removeReply) {
htmlBody.remove(utils::removeRichReplyRegex);
}
htmlBody.replace(utils::userPillRegExp, R"(<b class="user-pill">\1</b>)");
htmlBody.replace(utils::strikethroughRegExp, "<s>\\1</s>");
auto url = connection()->homeserver();
auto base = url.scheme() + QStringLiteral("://") + url.host() + (url.port() != -1 ? ':' + QString::number(url.port()) : QString());
htmlBody.replace(utils::mxcImageRegExp, QStringLiteral(R"(<img \1 src="%1/_matrix/media/r0/download/\2/\3" \4 > )").arg(base));
return htmlBody;
}
TextHandler textHandler;
if (e.hasFileContent()) {
auto fileCaption = e.content()->fileInfo()->originalName.toHtmlEscaped();
auto fileCaption = e.content()->fileInfo()->originalName;
if (fileCaption.isEmpty()) {
fileCaption = prettyPrint ? Quotient::prettyPrint(e.plainBody()) : e.plainBody();
fileCaption = e.plainBody();
} else if (e.content()->fileInfo()->originalName != e.plainBody()) {
fileCaption = e.plainBody() + " | " + fileCaption;
}
return !fileCaption.isEmpty() ? fileCaption : i18n("a file");
textHandler.setData(fileCaption);
return !fileCaption.isEmpty() ? textHandler.handleRecievePlainText() : i18n("a file");
}
// 2. prettyPrint/text 3. plainText/HTML 4. plainText/text
QString plainBody;
if (e.hasTextContent() && e.content() && e.mimeType().name() == "text/plain") { // 2/4
plainBody = static_cast<const TextContent *>(e.content())->body;
} else { // 3
plainBody = e.plainBody();
QString body;
if (e.hasTextContent() && e.content()) {
body = static_cast<const TextContent *>(e.content())->body;
} else {
body = e.plainBody();
}
if (prettyPrint) {
if (removeReply) {
plainBody.remove(utils::removeReplyRegex);
}
return Quotient::prettyPrint(plainBody);
textHandler.setData(body);
Qt::TextFormat inputFormat;
if (e.mimeType().name() == "text/plain") {
inputFormat = Qt::PlainText;
} else {
inputFormat = Qt::RichText;
}
if (removeReply) {
return plainBody.remove(utils::removeReplyRegex);
if (format == Qt::RichText) {
return textHandler.handleRecieveRichText(inputFormat, this, &e, stripNewlines);
} else {
return textHandler.handleRecievePlainText(inputFormat, stripNewlines);
}
return plainBody;
},
[](const StickerEvent &e) {
return e.body();

View File

@@ -124,7 +124,7 @@ public:
///
/// \see lastEvent
/// \see lastEventIsSpoiler
[[nodiscard]] QString lastEventToString() const;
[[nodiscard]] QString lastEventToString(Qt::TextFormat format = Qt::PlainText, bool stripNewlines = false) const;
/// Convenient way to check if the last event looks like it has spoilers.
///
@@ -137,12 +137,6 @@ public:
/// \see lastEvent
[[nodiscard]] QDateTime lastActiveTime();
/// Get subtitle text for room
///
/// Fetches last event and removes markdown formatting
/// \see lastEventToString
[[nodiscard]] QString subtitleText();
[[nodiscard]] bool isSpace();
bool isEventHighlighted(const Quotient::RoomEvent *e) const;
@@ -262,7 +256,7 @@ public:
[[nodiscard]] QString avatarMediaId() const;
[[nodiscard]] QString eventToString(const Quotient::RoomEvent &evt, Qt::TextFormat format = Qt::PlainText, bool removeReply = true) const;
[[nodiscard]] QString eventToString(const Quotient::RoomEvent &evt, Qt::TextFormat format = Qt::PlainText, bool stripNewlines = false) const;
[[nodiscard]] QString eventToGenericString(const Quotient::RoomEvent &evt) const;
Q_INVOKABLE [[nodiscard]] bool containsUser(const QString &userID) const;

View File

@@ -22,11 +22,11 @@
#include <jobs/basejob.h>
#include <user.h>
#include "actionshandler.h"
#include "controller.h"
#include "neochatconfig.h"
#include "neochatroom.h"
#include "roommanager.h"
#include "texthandler.h"
#include "windowcontroller.h"
using namespace Quotient;
@@ -85,7 +85,9 @@ void NotificationsManager::postNotification(NeoChatRoom *room,
std::unique_ptr<KNotificationReplyAction> replyAction(new KNotificationReplyAction(i18n("Reply")));
replyAction->setPlaceholderText(i18n("Reply..."));
connect(replyAction.get(), &KNotificationReplyAction::replied, this, [room, replyEventId](const QString &text) {
room->postMessage(text, markdownToHTML(text), RoomMessageEvent::MsgType::Text, replyEventId, QString());
TextHandler textHandler;
textHandler.setData(text);
room->postMessage(text, textHandler.handleSendText(), RoomMessageEvent::MsgType::Text, replyEventId, QString());
});
notification->setReplyAction(std::move(replyAction));
}

View File

@@ -16,25 +16,7 @@ TextEdit {
property bool isEmote: false
property bool isReplyLabel: false
readonly property var linkRegex: /(href=["'])?(\b(https?):\/\/[^\s\<\>\"\'\\\?\:\)\(]+(\(.*?\))*(\?(?=[a-z])[^\s\\\)]+|$)?)/g
property string textMessage: model.display.includes("http")
? model.display.replace(linkRegex, function() {
if (arguments[0].includes("/_matrix/media/r0/download/")) {
return arguments[0];
}
if (arguments[1]) {
return arguments[0];
}
const l = arguments[2];
if ([".", ","].includes(l[l.length-1])) {
const link = l.substring(0, l.length-1);
const leftover = l[l.length-1];
return `<a href="${link}">${link}</a>${leftover}`;
}
return `<a href="${l}">${l}</a>`;
})
: model.display
property string textMessage: model.display
property bool spoilerRevealed: !hasSpoiler.test(textMessage)
ListView.onReused: Qt.binding(() => !hasSpoiler.test(textMessage))
@@ -46,6 +28,7 @@ TextEdit {
Controller.forceRefreshTextDocument(contentLabel.textDocument, contentLabel)
}
onTextChanged: console.log(text)
text: "<style>
table {
width:100%;

378
src/texthandler.cpp Normal file
View File

@@ -0,0 +1,378 @@
// SPDX-FileCopyrightText: 2023 James Graham <james.h.graham@protonmail.com>
// SPDX-License-Identifier: GPL-2.0-only OR GPL-3.0-only OR LicenseRef-KDE-Accepted-GPL
#include "texthandler.h"
#include <QDebug>
#include <QUrl>
#include <util.h>
#include <cmark.h>
static const QStringList allowedTags = {
QStringLiteral("font"), QStringLiteral("del"), QStringLiteral("h1"), QStringLiteral("h2"), QStringLiteral("h3"), QStringLiteral("h4"),
QStringLiteral("h5"), QStringLiteral("h6"), QStringLiteral("blockquote"), QStringLiteral("p"), QStringLiteral("a"), QStringLiteral("ul"),
QStringLiteral("ol"), QStringLiteral("sup"), QStringLiteral("sub"), QStringLiteral("li"), QStringLiteral("b"), QStringLiteral("i"),
QStringLiteral("u"), QStringLiteral("strong"), QStringLiteral("em"), QStringLiteral("strike"), QStringLiteral("code"), QStringLiteral("hr"),
QStringLiteral("br"), QStringLiteral("div"), QStringLiteral("table"), QStringLiteral("thead"), QStringLiteral("tbody"), QStringLiteral("tr"),
QStringLiteral("th"), QStringLiteral("td"), QStringLiteral("caption"), QStringLiteral("pre"), QStringLiteral("span"), QStringLiteral("img"),
QStringLiteral("details"), QStringLiteral("summary")};
static const QHash<QString, QStringList> allowedAttributes = {
{QStringLiteral("font"), {QStringLiteral("data-mx-bg-color"), QStringLiteral("data-mx-color"), QStringLiteral("color")}},
{QStringLiteral("span"), {QStringLiteral("data-mx-bg-color"), QStringLiteral("data-mx-color"), QStringLiteral("data-mx-spoiler")}},
{QStringLiteral("a"), {QStringLiteral("name"), QStringLiteral("target"), QStringLiteral("href")}},
{QStringLiteral("img"), {QStringLiteral("width"), QStringLiteral("height"), QStringLiteral("alt"), QStringLiteral("title"), QStringLiteral("src")}},
{QStringLiteral("ol"), {QStringLiteral("start")}},
{QStringLiteral("code"), {QStringLiteral("class")}}};
static const QStringList allowedLinkSchemes = {QStringLiteral("https"),
QStringLiteral("http"),
QStringLiteral("ftp"),
QStringLiteral("mailto"),
QStringLiteral("magnet")};
QString TextHandler::data() const
{
return m_data;
}
void TextHandler::setData(const QString &string)
{
m_data = string;
m_pos = 0;
}
QString TextHandler::handleSendText()
{
m_pos = 0;
m_dataBuffer = markdownToHTML(m_data);
nextTokenType();
// Strip any disallowed tags/attributes.
QString outputString;
while (m_pos < m_dataBuffer.length()) {
next();
QString nextTokenBuffer = m_nextToken;
if (m_nextTokenType == Type::Text || m_nextTokenType == Type::TextCode) {
nextTokenBuffer = escapeHtml(nextTokenBuffer);
} else if (m_nextTokenType == Type::Tag) {
if (!isAllowedTag(getTagType())) {
nextTokenBuffer = QString();
}
nextTokenBuffer = cleanAttributes(getTagType(), nextTokenBuffer);
}
outputString.append(nextTokenBuffer);
nextTokenType();
}
return outputString;
}
QString TextHandler::handleRecieveRichText(Qt::TextFormat inputFormat, const NeoChatRoom *room, const Quotient::RoomEvent *event, bool stripNewlines)
{
m_pos = 0;
m_dataBuffer = m_data;
// Strip mx-reply if present.
m_dataBuffer.remove(TextRegex::removeRichReply);
// For plain text, convert links, escape html and convert line brakes.
if (inputFormat == Qt::PlainText) {
m_dataBuffer = escapeHtml(m_dataBuffer);
m_dataBuffer.replace(u'\n', QStringLiteral("<br>"));
}
// Linkify any plain text urls
m_dataBuffer = linkifyUrls(m_dataBuffer);
// Apply user style
m_dataBuffer.replace(TextRegex::userPill, QStringLiteral(R"(<b>\1</b>)"));
// Make all media URLs resolvable.
if (room && event) {
QRegularExpressionMatchIterator i = TextRegex::mxcImage.globalMatch(m_dataBuffer);
while (i.hasNext()) {
const QRegularExpressionMatch match = i.next();
#ifdef QUOTIENT_07
const QUrl mediaUrl = room->makeMediaUrl(event->id(), QUrl(QStringLiteral("mxc://") + match.captured(2) + u'/' + match.captured(3)));
m_dataBuffer.replace(match.captured(0),
QStringLiteral("<img ") + match.captured(1) + QStringLiteral("src=\"") + mediaUrl.toString() + u'"' + match.captured(4)
+ u'>');
#else
auto url = room->connection()->homeserver();
auto base = url.scheme() + QStringLiteral("://") + url.host() + (url.port() != -1 ? ':' + QString::number(url.port()) : QString());
m_dataBuffer.replace(match.captured(0),
QStringLiteral("<img ") + match.captured(1) + QStringLiteral("src=\"") + base + QStringLiteral("/_matrix/media/r0/download/")
+ match.captured(2) + u'/' + match.captured(3) + u'"' + match.captured(4) + u'>');
#endif
}
}
// Strip any disallowed tags/attributes.
QString outputString;
nextTokenType();
while (m_pos < m_dataBuffer.length()) {
next();
QString nextTokenBuffer = m_nextToken;
if (m_nextTokenType == Type::Text || m_nextTokenType == Type::TextCode) {
nextTokenBuffer = escapeHtml(nextTokenBuffer);
} else if (m_nextTokenType == Type::Tag) {
if (!isAllowedTag(getTagType())) {
nextTokenBuffer = QString();
} else if ((getTagType() == QStringLiteral("br") && stripNewlines)) {
nextTokenBuffer = u' ';
}
nextTokenBuffer = cleanAttributes(getTagType(), nextTokenBuffer);
}
outputString.append(nextTokenBuffer);
nextTokenType();
}
/**
* Replace <del> with <s>
* Note: <s> is still not a valid tag for the message from the server. We
* convert as that is what is needed for Qt::RichText.
*/
outputString.replace(TextRegex::strikethrough, QStringLiteral("<s>\\1</s>"));
return outputString;
}
QString TextHandler::handleRecievePlainText(Qt::TextFormat inputFormat, const bool &stripNewlines)
{
m_pos = 0;
m_dataBuffer = m_data;
// Strip mx-reply if present.
m_dataBuffer.remove(TextRegex::removeRichReply);
if (stripNewlines) {
m_dataBuffer.replace(QStringLiteral("<br>"), QStringLiteral(" "));
m_dataBuffer.replace(QStringLiteral("<br />"), QStringLiteral(" "));
m_dataBuffer.replace(u'\n', QStringLiteral(" "));
}
// Escaping then unescaping allows < and > to be maintained in a plain text string
// otherwise markdownToHTML will strip what it thinks is a bad html tag entirely.
if (inputFormat == Qt::PlainText) {
m_dataBuffer = escapeHtml(m_dataBuffer);
}
/**
* This seems counterproductive but by converting any markup which could
* arrive (e.g. in a caption body) it can then be stripped by the same code.
*/
m_dataBuffer = markdownToHTML(m_dataBuffer);
// Strip all tags/attributes except code blocks which will be escaped.
QString outputString;
nextTokenType();
while (m_pos < m_dataBuffer.length()) {
next();
QString nextTokenBuffer = m_nextToken;
if (m_nextTokenType == Type::TextCode) {
nextTokenBuffer = unescapeHtml(nextTokenBuffer);
} else if (m_nextTokenType == Type::Tag) {
nextTokenBuffer = QString();
}
outputString.append(nextTokenBuffer);
nextTokenType();
}
// Escaping then unescaping allows < and > to be maintained in a plain text string
// otherwise markdownToHTML will strip what it thinks is a bad html tag entirely.
if (inputFormat == Qt::PlainText) {
outputString = unescapeHtml(outputString);
}
return outputString;
}
void TextHandler::next()
{
QString searchStr;
if (m_nextTokenType == Type::Tag) {
searchStr = u'>';
} else if (m_nextTokenType == Type::TextCode) {
// Anything between code tags is assumed to be plain text
searchStr = QStringLiteral("</code>");
} else {
searchStr = u'<';
}
int tokenEnd = m_dataBuffer.indexOf(searchStr, m_pos + 1);
if (tokenEnd == -1) {
tokenEnd = m_dataBuffer.length();
}
m_nextToken = m_dataBuffer.mid(m_pos, tokenEnd - m_pos + (m_nextTokenType == Type::Tag ? 1 : 0));
m_pos = tokenEnd + (m_nextTokenType == Type::Tag ? 1 : 0);
}
void TextHandler::nextTokenType()
{
if (m_nextTokenType == Type::Tag && getTagType() == QStringLiteral("code") && !isCloseTag()
&& m_dataBuffer.indexOf(QStringLiteral("</code>"), m_pos) != m_pos) {
m_nextTokenType = Type::TextCode;
} else if (m_dataBuffer[m_pos] == u'<' && m_dataBuffer[m_pos + 1] != u' ') {
m_nextTokenType = Type::Tag;
} else {
m_nextTokenType = Type::Text;
}
}
QString TextHandler::getTagType() const
{
const int tagTypeStart = m_nextToken[1] == u'/' ? 2 : 1;
const int tagTypeEnd = m_nextToken.indexOf(TextRegex::endTagType, tagTypeStart);
return m_nextToken.mid(tagTypeStart, tagTypeEnd - tagTypeStart);
}
bool TextHandler::isCloseTag() const
{
return m_nextToken[1] == u'/';
}
QString TextHandler::getAttributeType(const QString &string)
{
if (!string.contains(u'=')) {
return string;
}
const int equalsPos = string.indexOf(u'=');
return string.left(equalsPos);
}
QString TextHandler::getAttributeData(const QString &string)
{
if (!string.contains(u'=')) {
return QStringLiteral();
}
const int equalsPos = string.indexOf(u'=');
return string.right(string.length() - equalsPos - 1);
}
bool TextHandler::isAllowedTag(const QString &type)
{
return allowedTags.contains(type);
}
bool TextHandler::isAllowedAttribute(const QString &tag, const QString &attribute)
{
return allowedAttributes[tag].contains(attribute);
}
bool TextHandler::isAllowedLink(const QString &link, bool isImg)
{
const QUrl linkUrl = QUrl(link);
if (isImg) {
#ifdef QUOTIENT_07
return !linkUrl.isRelative() && linkUrl.scheme() == "mxc";
#else
return !linkUrl.isRelative() && (linkUrl.scheme() == "mxc" || linkUrl.scheme() == "https");
#endif
} else {
return !linkUrl.isRelative() && allowedLinkSchemes.contains(linkUrl.scheme());
}
}
QString TextHandler::cleanAttributes(const QString &tag, const QString &tagString)
{
int nextAttributeIndex = tagString.indexOf(u' ', 1);
if (nextAttributeIndex != -1) {
QString outputString = tagString.left(nextAttributeIndex);
QString nextAttribute;
int nextSpaceIndex;
nextAttributeIndex += 1;
while (nextAttributeIndex < tagString.length()) {
nextSpaceIndex = tagString.indexOf(TextRegex::endTagType, nextAttributeIndex);
if (nextSpaceIndex == -1) {
nextSpaceIndex = tagString.length();
}
nextAttribute = tagString.mid(nextAttributeIndex, nextSpaceIndex - nextAttributeIndex);
if (isAllowedAttribute(tag, getAttributeType(nextAttribute))) {
if (tag == QStringLiteral("img") && getAttributeType(nextAttribute) == QStringLiteral("src")) {
QString attributeData = getAttributeData(nextAttribute).remove(u'"');
if (isAllowedLink(attributeData, true)) {
outputString.append(u' ' + nextAttribute);
}
} else if (tag == u'a' && getAttributeType(nextAttribute) == QStringLiteral("href")) {
if (isAllowedLink(getAttributeData(nextAttribute).remove(u'"'))) {
outputString.append(u' ' + nextAttribute);
}
} else if (tag == QStringLiteral("code") && getAttributeType(nextAttribute) == QStringLiteral("class")) {
if (getAttributeData(nextAttribute).remove(u'"').startsWith(QStringLiteral("language-"))) {
outputString.append(u' ' + nextAttribute);
}
} else {
outputString.append(u' ' + nextAttribute);
}
}
nextAttributeIndex = nextSpaceIndex + 1;
}
outputString += u'>';
return outputString;
}
return tagString;
}
QString TextHandler::markdownToHTML(const QString &markdown)
{
const auto str = markdown.toUtf8();
char *tmp_buf = cmark_markdown_to_html(str.constData(), str.size(), CMARK_OPT_HARDBREAKS | CMARK_OPT_UNSAFE);
const std::string html(tmp_buf);
free(tmp_buf);
auto result = QString::fromStdString(html).trimmed();
result.replace(QStringLiteral("<!-- raw HTML omitted -->"), QString());
return result;
}
/**
* TODO: make this more intelligent currently other characters are not escaped
* especially & as this can conflict with the cmark markdown to html conversion
* which already escapes characters in code blocks. The < > still need to be handled
* when the user manually types in the html.
*/
QString TextHandler::escapeHtml(QString stringIn)
{
stringIn.replace(u'<', QStringLiteral("&lt;"));
stringIn.replace(u'>', QStringLiteral("&gt;"));
return stringIn;
}
QString TextHandler::unescapeHtml(QString stringIn)
{
// For those situations where brackets in code block get double escaped
stringIn.replace(QStringLiteral("&amp;lt;"), QStringLiteral("<"));
stringIn.replace(QStringLiteral("&amp;gt;"), QStringLiteral(">"));
stringIn.replace(QStringLiteral("&lt;"), QStringLiteral("<"));
stringIn.replace(QStringLiteral("&gt;"), QStringLiteral(">"));
stringIn.replace(QStringLiteral("&amp;"), QStringLiteral("&"));
return stringIn;
}
QString TextHandler::linkifyUrls(QString stringIn)
{
stringIn = stringIn.replace(TextRegex::mxId, QStringLiteral(R"(\1<a href="https://matrix.to/#/\2">\2</a>)"));
stringIn.replace(TextRegex::fullUrl, QStringLiteral(R"(<a href="\1">\1</a>)"));
stringIn = stringIn.replace(TextRegex::emailAddress, QStringLiteral(R"(<a href="mailto:\2">\1\2</a>)"));
return stringIn;
}

131
src/texthandler.h Normal file
View File

@@ -0,0 +1,131 @@
// SPDX-FileCopyrightText: 2023 James Graham <james.h.graham@protonmail.com>
// SPDX-License-Identifier: GPL-2.0-only OR GPL-3.0-only OR LicenseRef-KDE-Accepted-GPL
#pragma once
#include <QHash>
#include <QRegularExpression>
#include <QString>
#include <QStringList>
#include "neochatroom.h"
namespace TextRegex
{
static const QRegularExpression endTagType{QStringLiteral("(>| )")};
static const QRegularExpression removeReply{QStringLiteral("> <.*?>.*?\\n\\n"), QRegularExpression::DotMatchesEverythingOption};
static const QRegularExpression removeRichReply{QStringLiteral("<mx-reply>.*?</mx-reply>"), QRegularExpression::DotMatchesEverythingOption};
static const QRegularExpression codePill{QStringLiteral("<pre><code[^>]*>(.*?)</code></pre>"), QRegularExpression::DotMatchesEverythingOption};
static const QRegularExpression userPill{QStringLiteral("(<a href=\"https://matrix.to/#/@.*?:.*?\">.*?</a>)"), QRegularExpression::DotMatchesEverythingOption};
static const QRegularExpression strikethrough{QStringLiteral("<del>(.*?)</del>"), QRegularExpression::DotMatchesEverythingOption};
static const QRegularExpression mxcImage{QStringLiteral(R"AAA(<img(.*?)src="mxc:\/\/(.*?)\/(.*?)"(.*?)>)AAA")};
static const QRegularExpression fullUrl(
QStringLiteral(
R"(<a.*?<\/a>(*SKIP)(*F)|\b((www\.(?!\.)(?!(\w|\.|-)+@)|(https?|ftp):(//)?\w|(magnet|matrix):)(&(?![lg]t;)|[^&\s<>'"])+(&(?![lg]t;)|[^&!,.\s<>'"\]):])))"),
QRegularExpression::CaseInsensitiveOption | QRegularExpression::UseUnicodePropertiesOption);
static const QRegularExpression emailAddress(QStringLiteral(R"(<a.*?<\/a>(*SKIP)(*F)|\b(mailto:)?((\w|\.|-)+@(\w|\.|-)+\.\w+\b))"),
QRegularExpression::CaseInsensitiveOption | QRegularExpression::UseUnicodePropertiesOption);
static const QRegularExpression mxId(QStringLiteral(R"((^|[][[:space:](){}`'";])([!#@][-a-z0-9_=#/.]{1,252}:\w(?:\w|\.|-)*\.\w+(?::\d{1,5})?))"),
QRegularExpression::CaseInsensitiveOption | QRegularExpression::UseUnicodePropertiesOption);
}
/**
* @class TextHandler
*
* This class is designed to handle the text of both incoming and outgoing messages.
*
* This includes converting markdown to html and removing any html tags that shouldn't
* be present as per the matrix spec
* (https://spec.matrix.org/v1.5/client-server-api/#mroommessage-msgtypes).
*/
class TextHandler
{
public:
/**
* @brief List of token types
*/
enum Type {
Text, /*!< Anything not a tag that doesn't have special handling */
Tag, /*!< For any generic tag that doesn't have special handling */
TextCode, /*!< Text between code tags */
};
/**
* @brief Get the string being handled.
*
* Setting new data resets the TextHandler.
*/
QString data() const;
/**
* @brief Set the string being handled.
*
* @note The TextHandler doesn't modify the input data variable so the unhandled
* text can always be retrieved.
*/
void setData(const QString &string);
/**
* @brief Handle the text for a message that is being sent.
*/
QString handleSendText();
/**
* @brief Handle the text as a rich output for a message being received.
*
* The function does the following:
* - Removes invalid html tags and attributes
* - Strips any reply from the message
* - Formats user mentions
*
* @note In this case the rich text refers to the output format. The input
* can be in either and the parameter inputFormat just needs to be set
* appropriately.
*/
QString handleRecieveRichText(Qt::TextFormat inputFormat = Qt::RichText,
const NeoChatRoom *room = nullptr,
const Quotient::RoomEvent *event = nullptr,
bool stripNewlines = false);
/**
* @brief Handle the text as a plain output for a message being received.
*
* The function does the following:
* - Removes all html tags and attributes (except inside of code tags)
* - Strips any reply from the message
*
* @note In this case the plain text refers to the output format. The input
* can be in either and the parameter inputFormat just needs to be set
* appropriately.
*
* @warning The output of this function should NEVER be input into a rich text
* control. It will try to preserve < and > in the plain string which
* could be malicious tags if the control uses rich text format.
*/
QString handleRecievePlainText(Qt::TextFormat inputFormat = Qt::PlainText, const bool &stripNewlines = false);
private:
QString m_data;
QString m_dataBuffer;
int m_pos;
Type m_nextTokenType;
QString m_nextToken;
void next();
void nextTokenType();
QString getTagType() const;
bool isCloseTag() const;
QString getAttributeType(const QString &string);
QString getAttributeData(const QString &string);
bool isAllowedTag(const QString &type);
bool isAllowedAttribute(const QString &tag, const QString &attribute);
bool isAllowedLink(const QString &link, bool isImg = false);
QString cleanAttributes(const QString &tag, const QString &tagString);
QString markdownToHTML(const QString &markdown);
QString escapeHtml(QString stringIn);
QString unescapeHtml(QString stringIn);
QString linkifyUrls(QString stringIn);
};

View File

@@ -1,4 +0,0 @@
// SPDX-FileCopyrightText: 2018 Black Hat <bhat@encom.eu.org>
// SPDX-License-Identifier: GPL-3.0-only
#include "utils.h"

View File

@@ -1,16 +0,0 @@
// SPDX-FileCopyrightText: 2018 Black Hat <bhat@encom.eu.org>
// SPDX-License-Identifier: GPL-3.0-only
#pragma once
#include <QRegularExpression>
namespace utils
{
static const QRegularExpression removeReplyRegex{"> <.*?>.*?\\n\\n", QRegularExpression::DotMatchesEverythingOption};
static const QRegularExpression removeRichReplyRegex{"<mx-reply>.*?</mx-reply>", QRegularExpression::DotMatchesEverythingOption};
static const QRegularExpression codePillRegExp{"<pre><code[^>]*>(.*?)</code></pre>", QRegularExpression::DotMatchesEverythingOption};
static const QRegularExpression userPillRegExp{"(<a href=\"https://matrix.to/#/@.*?:.*?\">.*?</a>)", QRegularExpression::DotMatchesEverythingOption};
static const QRegularExpression strikethroughRegExp{"<del>(.*?)</del>", QRegularExpression::DotMatchesEverythingOption};
static const QRegularExpression mxcImageRegExp{R"AAA(<img(.*?)src="mxc:\/\/(.*?)\/(.*?)"(.*?)>)AAA"};
}