Cherrypick Improve Text Handling to 23.04
Improve the handling of text both when sending and receiving.
The main feature is to fix the linked bug (and a host of others that are unreported but similar) which is caused by the fact that we don't properly clean html. This mr does that as per the matrix spec https://spec.matrix.org/v1.5/client-server-api/#mroommessage-msgtypes. So any disallowed tags or attributes are removed and it does the special handling for certain attributes.
Additionally the functions are also designed to cover any other text formatting required, particularly fro received strings.
The receive side is covered by 2 functions `handleRecieveRichText` and `handleRecievePlainText`. The rich/plain in the function name refers to the output type not the input type (both can take plain and rich input), so `handleRecieveRichText` is called to get a string suitable to go in a rich text control and `handleRecievePlainText` for a plain control.
The functions also handle the following some of which was previously handled by `eventToString` in `NeoChatRoom`:
- Strip and reply from the string
- Format any user mentions
- Linkify links in plain strings
- Handle mxc urls in rich text (uses the new `room->makeMediaUrl` functionality from libQuotient)
- `handleRecievePlainText` also deals with markup making `NeoChatRoom->subtitle` redundant
There is also an extensive test suite which defines the behaviour and the best way to review this is probably to look at the tests and decide whether you agree with the expected output given the inputs and/or if there is any missing behaviour.
The final aim especially with the test suite is to give us a framework to make further updates in the future easier and hopefully prevent a new feature breaking old behaviour with the tests.
BUG: 463932 \
BUG: 466330 \
BUG: 466930
(cherry picked from commit f6ba4f2ecd)
This commit is contained in:
@@ -24,7 +24,6 @@ add_library(neochat STATIC
|
||||
models/publicroomlistmodel.cpp
|
||||
models/userdirectorylistmodel.cpp
|
||||
models/keywordnotificationrulemodel.cpp
|
||||
utils.cpp
|
||||
notificationsmanager.cpp
|
||||
models/sortfilterroomlistmodel.cpp
|
||||
chatdocumenthandler.cpp
|
||||
@@ -47,6 +46,7 @@ add_library(neochat STATIC
|
||||
models/statemodel.cpp
|
||||
filetransferpseudojob.cpp
|
||||
models/searchmodel.cpp
|
||||
texthandler.cpp
|
||||
)
|
||||
|
||||
add_executable(neochat-app
|
||||
|
||||
@@ -20,25 +20,10 @@
|
||||
#include "neochatroom.h"
|
||||
#include "neochatuser.h"
|
||||
#include "roommanager.h"
|
||||
#include "texthandler.h"
|
||||
|
||||
using namespace Quotient;
|
||||
|
||||
QString markdownToHTML(const QString &markdown)
|
||||
{
|
||||
const auto str = markdown.toUtf8();
|
||||
char *tmp_buf = cmark_markdown_to_html(str.constData(), str.size(), CMARK_OPT_HARDBREAKS | CMARK_OPT_UNSAFE);
|
||||
|
||||
const std::string html(tmp_buf);
|
||||
|
||||
free(tmp_buf);
|
||||
|
||||
auto result = QString::fromStdString(html).trimmed();
|
||||
|
||||
result.replace("<!-- raw HTML omitted -->", "");
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
ActionsHandler::ActionsHandler(QObject *parent)
|
||||
: QObject(parent)
|
||||
{
|
||||
@@ -169,7 +154,10 @@ void ActionsHandler::handleMessage(const QString &text, QString handledText, con
|
||||
}
|
||||
|
||||
handledText = CustomEmojiModel::instance().preprocessText(handledText);
|
||||
handledText = markdownToHTML(handledText);
|
||||
TextHandler textHandler;
|
||||
textHandler.setData(handledText);
|
||||
handledText = textHandler.handleSendText();
|
||||
|
||||
if (handledText.count("<p>") == 1 && handledText.count("</p>") == 1) {
|
||||
handledText.remove("<p>");
|
||||
handledText.remove("</p>");
|
||||
|
||||
@@ -50,5 +50,3 @@ private:
|
||||
QString handleMentions(QString handledText, const bool &isEdit = false);
|
||||
void handleMessage(const QString &text, QString handledText, const bool &isEdit = false);
|
||||
};
|
||||
|
||||
QString markdownToHTML(const QString &markdown);
|
||||
|
||||
@@ -27,7 +27,6 @@
|
||||
#include <KLocalizedString>
|
||||
|
||||
#include "neochatuser.h"
|
||||
#include "utils.h"
|
||||
|
||||
using namespace Quotient;
|
||||
|
||||
|
||||
@@ -417,7 +417,7 @@ QVariant RoomListModel::data(const QModelIndex &index, int role) const
|
||||
return m_categoryVisibility.value(data(index, CategoryRole).toInt(), true);
|
||||
}
|
||||
if (role == SubtitleTextRole) {
|
||||
return room->subtitleText();
|
||||
return room->lastEventToString(Qt::PlainText, true);
|
||||
}
|
||||
if (role == AvatarImageRole) {
|
||||
return room->avatar(128);
|
||||
|
||||
@@ -47,7 +47,7 @@
|
||||
#endif
|
||||
#include "filetransferpseudojob.h"
|
||||
#include "stickerevent.h"
|
||||
#include "utils.h"
|
||||
#include "texthandler.h"
|
||||
|
||||
#ifndef Q_OS_ANDROID
|
||||
#include <KIO/Job>
|
||||
@@ -257,10 +257,11 @@ bool NeoChatRoom::lastEventIsSpoiler() const
|
||||
return false;
|
||||
}
|
||||
|
||||
QString NeoChatRoom::lastEventToString() const
|
||||
QString NeoChatRoom::lastEventToString(Qt::TextFormat format, bool stripNewlines) const
|
||||
{
|
||||
if (auto event = lastEvent()) {
|
||||
return roomMembername(event->senderId()) + (event->isStateEvent() ? " " : ": ") + eventToString(*event);
|
||||
return roomMembername(event->senderId()) + (event->isStateEvent() ? QLatin1String(" ") : QLatin1String(": "))
|
||||
+ eventToString(*event, format, stripNewlines);
|
||||
}
|
||||
return QLatin1String("");
|
||||
}
|
||||
@@ -329,45 +330,6 @@ QDateTime NeoChatRoom::lastActiveTime()
|
||||
return messageEvents().rbegin()->get()->originTimestamp();
|
||||
}
|
||||
|
||||
QString NeoChatRoom::subtitleText()
|
||||
{
|
||||
static const QRegularExpression blockquote("(\r\n\t|\n|\r\t|)> ");
|
||||
static const QRegularExpression heading("(\r\n\t|\n|\r\t|)\\#{1,6} ");
|
||||
static const QRegularExpression newlines("(\r\n\t|\n|\r\t|\r\n)");
|
||||
static const QRegularExpression bold1("(\\*\\*|__)(?=\\S)([^\\r]*\\S)\\1");
|
||||
static const QRegularExpression bold2("(\\*|_)(?=\\S)([^\\r]*\\S)\\1");
|
||||
static const QRegularExpression strike1("~~(.*)~~");
|
||||
static const QRegularExpression strike2("~(.*)~");
|
||||
static const QRegularExpression del("<del>(.*)</del>");
|
||||
static const QRegularExpression multileLineCode("```([^```]+)```");
|
||||
static const QRegularExpression singleLinecode("`([^`]+)`");
|
||||
QString subtitle = lastEventToString().size() == 0 ? topic() : lastEventToString();
|
||||
|
||||
subtitle
|
||||
// replace blockquote, i.e. '> text'
|
||||
.replace(blockquote, " ")
|
||||
// replace headings, i.e. "# text"
|
||||
.replace(heading, " ")
|
||||
// replace newlines
|
||||
.replace(newlines, " ")
|
||||
// replace '**text**' and '__text__'
|
||||
.replace(bold1, "\\2")
|
||||
// replace '*text*' and '_text_'
|
||||
.replace(bold2, "\\2")
|
||||
// replace '~~text~~'
|
||||
.replace(strike1, "\\1")
|
||||
// replace '~text~'
|
||||
.replace(strike2, "\\1")
|
||||
// replace '<del>text</del>'
|
||||
.replace(del, "\\1")
|
||||
// replace '```code```'
|
||||
.replace(multileLineCode, "\\1")
|
||||
// replace '`code`'
|
||||
.replace(singleLinecode, "\\1");
|
||||
|
||||
return subtitle.size() > 0 ? subtitle : QStringLiteral(" ");
|
||||
}
|
||||
|
||||
int NeoChatRoom::savedTopVisibleIndex() const
|
||||
{
|
||||
return firstDisplayedMarker() == historyEdge() ? 0 : int(firstDisplayedMarker() - messageEvents().rbegin());
|
||||
@@ -451,7 +413,7 @@ QString NeoChatRoom::avatarMediaId() const
|
||||
return {};
|
||||
}
|
||||
|
||||
QString NeoChatRoom::eventToString(const RoomEvent &evt, Qt::TextFormat format, bool removeReply) const
|
||||
QString NeoChatRoom::eventToString(const RoomEvent &evt, Qt::TextFormat format, bool stripNewlines) const
|
||||
{
|
||||
const bool prettyPrint = (format == Qt::RichText);
|
||||
|
||||
@@ -462,53 +424,43 @@ QString NeoChatRoom::eventToString(const RoomEvent &evt, Qt::TextFormat format,
|
||||
return visit(
|
||||
#endif
|
||||
evt,
|
||||
[this, prettyPrint, removeReply](const RoomMessageEvent &e) {
|
||||
[this, format, stripNewlines](const RoomMessageEvent &e) {
|
||||
using namespace MessageEventContent;
|
||||
|
||||
// 1. prettyPrint/HTML
|
||||
if (prettyPrint && e.hasTextContent() && e.mimeType().name() != "text/plain") {
|
||||
auto htmlBody = static_cast<const TextContent *>(e.content())->body;
|
||||
if (removeReply) {
|
||||
htmlBody.remove(utils::removeRichReplyRegex);
|
||||
}
|
||||
htmlBody.replace(utils::userPillRegExp, R"(<b class="user-pill">\1</b>)");
|
||||
htmlBody.replace(utils::strikethroughRegExp, "<s>\\1</s>");
|
||||
|
||||
auto url = connection()->homeserver();
|
||||
auto base = url.scheme() + QStringLiteral("://") + url.host() + (url.port() != -1 ? ':' + QString::number(url.port()) : QString());
|
||||
htmlBody.replace(utils::mxcImageRegExp, QStringLiteral(R"(<img \1 src="%1/_matrix/media/r0/download/\2/\3" \4 > )").arg(base));
|
||||
|
||||
return htmlBody;
|
||||
}
|
||||
TextHandler textHandler;
|
||||
|
||||
if (e.hasFileContent()) {
|
||||
auto fileCaption = e.content()->fileInfo()->originalName.toHtmlEscaped();
|
||||
auto fileCaption = e.content()->fileInfo()->originalName;
|
||||
if (fileCaption.isEmpty()) {
|
||||
fileCaption = prettyPrint ? Quotient::prettyPrint(e.plainBody()) : e.plainBody();
|
||||
fileCaption = e.plainBody();
|
||||
} else if (e.content()->fileInfo()->originalName != e.plainBody()) {
|
||||
fileCaption = e.plainBody() + " | " + fileCaption;
|
||||
}
|
||||
return !fileCaption.isEmpty() ? fileCaption : i18n("a file");
|
||||
textHandler.setData(fileCaption);
|
||||
return !fileCaption.isEmpty() ? textHandler.handleRecievePlainText() : i18n("a file");
|
||||
}
|
||||
|
||||
// 2. prettyPrint/text 3. plainText/HTML 4. plainText/text
|
||||
QString plainBody;
|
||||
if (e.hasTextContent() && e.content() && e.mimeType().name() == "text/plain") { // 2/4
|
||||
plainBody = static_cast<const TextContent *>(e.content())->body;
|
||||
} else { // 3
|
||||
plainBody = e.plainBody();
|
||||
QString body;
|
||||
if (e.hasTextContent() && e.content()) {
|
||||
body = static_cast<const TextContent *>(e.content())->body;
|
||||
} else {
|
||||
body = e.plainBody();
|
||||
}
|
||||
|
||||
if (prettyPrint) {
|
||||
if (removeReply) {
|
||||
plainBody.remove(utils::removeReplyRegex);
|
||||
}
|
||||
return Quotient::prettyPrint(plainBody);
|
||||
textHandler.setData(body);
|
||||
|
||||
Qt::TextFormat inputFormat;
|
||||
if (e.mimeType().name() == "text/plain") {
|
||||
inputFormat = Qt::PlainText;
|
||||
} else {
|
||||
inputFormat = Qt::RichText;
|
||||
}
|
||||
if (removeReply) {
|
||||
return plainBody.remove(utils::removeReplyRegex);
|
||||
|
||||
if (format == Qt::RichText) {
|
||||
return textHandler.handleRecieveRichText(inputFormat, this, &e, stripNewlines);
|
||||
} else {
|
||||
return textHandler.handleRecievePlainText(inputFormat, stripNewlines);
|
||||
}
|
||||
return plainBody;
|
||||
},
|
||||
[](const StickerEvent &e) {
|
||||
return e.body();
|
||||
|
||||
@@ -124,7 +124,7 @@ public:
|
||||
///
|
||||
/// \see lastEvent
|
||||
/// \see lastEventIsSpoiler
|
||||
[[nodiscard]] QString lastEventToString() const;
|
||||
[[nodiscard]] QString lastEventToString(Qt::TextFormat format = Qt::PlainText, bool stripNewlines = false) const;
|
||||
|
||||
/// Convenient way to check if the last event looks like it has spoilers.
|
||||
///
|
||||
@@ -137,12 +137,6 @@ public:
|
||||
/// \see lastEvent
|
||||
[[nodiscard]] QDateTime lastActiveTime();
|
||||
|
||||
/// Get subtitle text for room
|
||||
///
|
||||
/// Fetches last event and removes markdown formatting
|
||||
/// \see lastEventToString
|
||||
[[nodiscard]] QString subtitleText();
|
||||
|
||||
[[nodiscard]] bool isSpace();
|
||||
|
||||
bool isEventHighlighted(const Quotient::RoomEvent *e) const;
|
||||
@@ -262,7 +256,7 @@ public:
|
||||
|
||||
[[nodiscard]] QString avatarMediaId() const;
|
||||
|
||||
[[nodiscard]] QString eventToString(const Quotient::RoomEvent &evt, Qt::TextFormat format = Qt::PlainText, bool removeReply = true) const;
|
||||
[[nodiscard]] QString eventToString(const Quotient::RoomEvent &evt, Qt::TextFormat format = Qt::PlainText, bool stripNewlines = false) const;
|
||||
[[nodiscard]] QString eventToGenericString(const Quotient::RoomEvent &evt) const;
|
||||
|
||||
Q_INVOKABLE [[nodiscard]] bool containsUser(const QString &userID) const;
|
||||
|
||||
@@ -22,11 +22,11 @@
|
||||
#include <jobs/basejob.h>
|
||||
#include <user.h>
|
||||
|
||||
#include "actionshandler.h"
|
||||
#include "controller.h"
|
||||
#include "neochatconfig.h"
|
||||
#include "neochatroom.h"
|
||||
#include "roommanager.h"
|
||||
#include "texthandler.h"
|
||||
#include "windowcontroller.h"
|
||||
|
||||
using namespace Quotient;
|
||||
@@ -85,7 +85,9 @@ void NotificationsManager::postNotification(NeoChatRoom *room,
|
||||
std::unique_ptr<KNotificationReplyAction> replyAction(new KNotificationReplyAction(i18n("Reply")));
|
||||
replyAction->setPlaceholderText(i18n("Reply..."));
|
||||
connect(replyAction.get(), &KNotificationReplyAction::replied, this, [room, replyEventId](const QString &text) {
|
||||
room->postMessage(text, markdownToHTML(text), RoomMessageEvent::MsgType::Text, replyEventId, QString());
|
||||
TextHandler textHandler;
|
||||
textHandler.setData(text);
|
||||
room->postMessage(text, textHandler.handleSendText(), RoomMessageEvent::MsgType::Text, replyEventId, QString());
|
||||
});
|
||||
notification->setReplyAction(std::move(replyAction));
|
||||
}
|
||||
|
||||
@@ -16,25 +16,7 @@ TextEdit {
|
||||
|
||||
property bool isEmote: false
|
||||
property bool isReplyLabel: false
|
||||
|
||||
readonly property var linkRegex: /(href=["'])?(\b(https?):\/\/[^\s\<\>\"\'\\\?\:\)\(]+(\(.*?\))*(\?(?=[a-z])[^\s\\\)]+|$)?)/g
|
||||
property string textMessage: model.display.includes("http")
|
||||
? model.display.replace(linkRegex, function() {
|
||||
if (arguments[0].includes("/_matrix/media/r0/download/")) {
|
||||
return arguments[0];
|
||||
}
|
||||
if (arguments[1]) {
|
||||
return arguments[0];
|
||||
}
|
||||
const l = arguments[2];
|
||||
if ([".", ","].includes(l[l.length-1])) {
|
||||
const link = l.substring(0, l.length-1);
|
||||
const leftover = l[l.length-1];
|
||||
return `<a href="${link}">${link}</a>${leftover}`;
|
||||
}
|
||||
return `<a href="${l}">${l}</a>`;
|
||||
})
|
||||
: model.display
|
||||
property string textMessage: model.display
|
||||
property bool spoilerRevealed: !hasSpoiler.test(textMessage)
|
||||
|
||||
ListView.onReused: Qt.binding(() => !hasSpoiler.test(textMessage))
|
||||
@@ -46,6 +28,7 @@ TextEdit {
|
||||
Controller.forceRefreshTextDocument(contentLabel.textDocument, contentLabel)
|
||||
}
|
||||
|
||||
onTextChanged: console.log(text)
|
||||
text: "<style>
|
||||
table {
|
||||
width:100%;
|
||||
|
||||
378
src/texthandler.cpp
Normal file
378
src/texthandler.cpp
Normal file
@@ -0,0 +1,378 @@
|
||||
// SPDX-FileCopyrightText: 2023 James Graham <james.h.graham@protonmail.com>
|
||||
// SPDX-License-Identifier: GPL-2.0-only OR GPL-3.0-only OR LicenseRef-KDE-Accepted-GPL
|
||||
|
||||
#include "texthandler.h"
|
||||
|
||||
#include <QDebug>
|
||||
#include <QUrl>
|
||||
|
||||
#include <util.h>
|
||||
|
||||
#include <cmark.h>
|
||||
|
||||
static const QStringList allowedTags = {
|
||||
QStringLiteral("font"), QStringLiteral("del"), QStringLiteral("h1"), QStringLiteral("h2"), QStringLiteral("h3"), QStringLiteral("h4"),
|
||||
QStringLiteral("h5"), QStringLiteral("h6"), QStringLiteral("blockquote"), QStringLiteral("p"), QStringLiteral("a"), QStringLiteral("ul"),
|
||||
QStringLiteral("ol"), QStringLiteral("sup"), QStringLiteral("sub"), QStringLiteral("li"), QStringLiteral("b"), QStringLiteral("i"),
|
||||
QStringLiteral("u"), QStringLiteral("strong"), QStringLiteral("em"), QStringLiteral("strike"), QStringLiteral("code"), QStringLiteral("hr"),
|
||||
QStringLiteral("br"), QStringLiteral("div"), QStringLiteral("table"), QStringLiteral("thead"), QStringLiteral("tbody"), QStringLiteral("tr"),
|
||||
QStringLiteral("th"), QStringLiteral("td"), QStringLiteral("caption"), QStringLiteral("pre"), QStringLiteral("span"), QStringLiteral("img"),
|
||||
QStringLiteral("details"), QStringLiteral("summary")};
|
||||
static const QHash<QString, QStringList> allowedAttributes = {
|
||||
{QStringLiteral("font"), {QStringLiteral("data-mx-bg-color"), QStringLiteral("data-mx-color"), QStringLiteral("color")}},
|
||||
{QStringLiteral("span"), {QStringLiteral("data-mx-bg-color"), QStringLiteral("data-mx-color"), QStringLiteral("data-mx-spoiler")}},
|
||||
{QStringLiteral("a"), {QStringLiteral("name"), QStringLiteral("target"), QStringLiteral("href")}},
|
||||
{QStringLiteral("img"), {QStringLiteral("width"), QStringLiteral("height"), QStringLiteral("alt"), QStringLiteral("title"), QStringLiteral("src")}},
|
||||
{QStringLiteral("ol"), {QStringLiteral("start")}},
|
||||
{QStringLiteral("code"), {QStringLiteral("class")}}};
|
||||
static const QStringList allowedLinkSchemes = {QStringLiteral("https"),
|
||||
QStringLiteral("http"),
|
||||
QStringLiteral("ftp"),
|
||||
QStringLiteral("mailto"),
|
||||
QStringLiteral("magnet")};
|
||||
|
||||
QString TextHandler::data() const
|
||||
{
|
||||
return m_data;
|
||||
}
|
||||
|
||||
void TextHandler::setData(const QString &string)
|
||||
{
|
||||
m_data = string;
|
||||
m_pos = 0;
|
||||
}
|
||||
|
||||
QString TextHandler::handleSendText()
|
||||
{
|
||||
m_pos = 0;
|
||||
m_dataBuffer = markdownToHTML(m_data);
|
||||
|
||||
nextTokenType();
|
||||
|
||||
// Strip any disallowed tags/attributes.
|
||||
QString outputString;
|
||||
while (m_pos < m_dataBuffer.length()) {
|
||||
next();
|
||||
|
||||
QString nextTokenBuffer = m_nextToken;
|
||||
if (m_nextTokenType == Type::Text || m_nextTokenType == Type::TextCode) {
|
||||
nextTokenBuffer = escapeHtml(nextTokenBuffer);
|
||||
} else if (m_nextTokenType == Type::Tag) {
|
||||
if (!isAllowedTag(getTagType())) {
|
||||
nextTokenBuffer = QString();
|
||||
}
|
||||
nextTokenBuffer = cleanAttributes(getTagType(), nextTokenBuffer);
|
||||
}
|
||||
|
||||
outputString.append(nextTokenBuffer);
|
||||
|
||||
nextTokenType();
|
||||
}
|
||||
return outputString;
|
||||
}
|
||||
|
||||
QString TextHandler::handleRecieveRichText(Qt::TextFormat inputFormat, const NeoChatRoom *room, const Quotient::RoomEvent *event, bool stripNewlines)
|
||||
{
|
||||
m_pos = 0;
|
||||
m_dataBuffer = m_data;
|
||||
|
||||
// Strip mx-reply if present.
|
||||
m_dataBuffer.remove(TextRegex::removeRichReply);
|
||||
|
||||
// For plain text, convert links, escape html and convert line brakes.
|
||||
if (inputFormat == Qt::PlainText) {
|
||||
m_dataBuffer = escapeHtml(m_dataBuffer);
|
||||
m_dataBuffer.replace(u'\n', QStringLiteral("<br>"));
|
||||
}
|
||||
|
||||
// Linkify any plain text urls
|
||||
m_dataBuffer = linkifyUrls(m_dataBuffer);
|
||||
|
||||
// Apply user style
|
||||
m_dataBuffer.replace(TextRegex::userPill, QStringLiteral(R"(<b>\1</b>)"));
|
||||
|
||||
// Make all media URLs resolvable.
|
||||
if (room && event) {
|
||||
QRegularExpressionMatchIterator i = TextRegex::mxcImage.globalMatch(m_dataBuffer);
|
||||
while (i.hasNext()) {
|
||||
const QRegularExpressionMatch match = i.next();
|
||||
#ifdef QUOTIENT_07
|
||||
const QUrl mediaUrl = room->makeMediaUrl(event->id(), QUrl(QStringLiteral("mxc://") + match.captured(2) + u'/' + match.captured(3)));
|
||||
m_dataBuffer.replace(match.captured(0),
|
||||
QStringLiteral("<img ") + match.captured(1) + QStringLiteral("src=\"") + mediaUrl.toString() + u'"' + match.captured(4)
|
||||
+ u'>');
|
||||
#else
|
||||
auto url = room->connection()->homeserver();
|
||||
auto base = url.scheme() + QStringLiteral("://") + url.host() + (url.port() != -1 ? ':' + QString::number(url.port()) : QString());
|
||||
m_dataBuffer.replace(match.captured(0),
|
||||
QStringLiteral("<img ") + match.captured(1) + QStringLiteral("src=\"") + base + QStringLiteral("/_matrix/media/r0/download/")
|
||||
+ match.captured(2) + u'/' + match.captured(3) + u'"' + match.captured(4) + u'>');
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
// Strip any disallowed tags/attributes.
|
||||
QString outputString;
|
||||
nextTokenType();
|
||||
while (m_pos < m_dataBuffer.length()) {
|
||||
next();
|
||||
|
||||
QString nextTokenBuffer = m_nextToken;
|
||||
if (m_nextTokenType == Type::Text || m_nextTokenType == Type::TextCode) {
|
||||
nextTokenBuffer = escapeHtml(nextTokenBuffer);
|
||||
} else if (m_nextTokenType == Type::Tag) {
|
||||
if (!isAllowedTag(getTagType())) {
|
||||
nextTokenBuffer = QString();
|
||||
} else if ((getTagType() == QStringLiteral("br") && stripNewlines)) {
|
||||
nextTokenBuffer = u' ';
|
||||
}
|
||||
nextTokenBuffer = cleanAttributes(getTagType(), nextTokenBuffer);
|
||||
}
|
||||
|
||||
outputString.append(nextTokenBuffer);
|
||||
|
||||
nextTokenType();
|
||||
}
|
||||
|
||||
/**
|
||||
* Replace <del> with <s>
|
||||
* Note: <s> is still not a valid tag for the message from the server. We
|
||||
* convert as that is what is needed for Qt::RichText.
|
||||
*/
|
||||
outputString.replace(TextRegex::strikethrough, QStringLiteral("<s>\\1</s>"));
|
||||
return outputString;
|
||||
}
|
||||
|
||||
QString TextHandler::handleRecievePlainText(Qt::TextFormat inputFormat, const bool &stripNewlines)
|
||||
{
|
||||
m_pos = 0;
|
||||
m_dataBuffer = m_data;
|
||||
|
||||
// Strip mx-reply if present.
|
||||
m_dataBuffer.remove(TextRegex::removeRichReply);
|
||||
|
||||
if (stripNewlines) {
|
||||
m_dataBuffer.replace(QStringLiteral("<br>"), QStringLiteral(" "));
|
||||
m_dataBuffer.replace(QStringLiteral("<br />"), QStringLiteral(" "));
|
||||
m_dataBuffer.replace(u'\n', QStringLiteral(" "));
|
||||
}
|
||||
|
||||
// Escaping then unescaping allows < and > to be maintained in a plain text string
|
||||
// otherwise markdownToHTML will strip what it thinks is a bad html tag entirely.
|
||||
if (inputFormat == Qt::PlainText) {
|
||||
m_dataBuffer = escapeHtml(m_dataBuffer);
|
||||
}
|
||||
|
||||
/**
|
||||
* This seems counterproductive but by converting any markup which could
|
||||
* arrive (e.g. in a caption body) it can then be stripped by the same code.
|
||||
*/
|
||||
m_dataBuffer = markdownToHTML(m_dataBuffer);
|
||||
|
||||
// Strip all tags/attributes except code blocks which will be escaped.
|
||||
QString outputString;
|
||||
nextTokenType();
|
||||
while (m_pos < m_dataBuffer.length()) {
|
||||
next();
|
||||
|
||||
QString nextTokenBuffer = m_nextToken;
|
||||
if (m_nextTokenType == Type::TextCode) {
|
||||
nextTokenBuffer = unescapeHtml(nextTokenBuffer);
|
||||
} else if (m_nextTokenType == Type::Tag) {
|
||||
nextTokenBuffer = QString();
|
||||
}
|
||||
|
||||
outputString.append(nextTokenBuffer);
|
||||
|
||||
nextTokenType();
|
||||
}
|
||||
|
||||
// Escaping then unescaping allows < and > to be maintained in a plain text string
|
||||
// otherwise markdownToHTML will strip what it thinks is a bad html tag entirely.
|
||||
if (inputFormat == Qt::PlainText) {
|
||||
outputString = unescapeHtml(outputString);
|
||||
}
|
||||
|
||||
return outputString;
|
||||
}
|
||||
|
||||
void TextHandler::next()
|
||||
{
|
||||
QString searchStr;
|
||||
if (m_nextTokenType == Type::Tag) {
|
||||
searchStr = u'>';
|
||||
} else if (m_nextTokenType == Type::TextCode) {
|
||||
// Anything between code tags is assumed to be plain text
|
||||
searchStr = QStringLiteral("</code>");
|
||||
} else {
|
||||
searchStr = u'<';
|
||||
}
|
||||
|
||||
int tokenEnd = m_dataBuffer.indexOf(searchStr, m_pos + 1);
|
||||
if (tokenEnd == -1) {
|
||||
tokenEnd = m_dataBuffer.length();
|
||||
}
|
||||
|
||||
m_nextToken = m_dataBuffer.mid(m_pos, tokenEnd - m_pos + (m_nextTokenType == Type::Tag ? 1 : 0));
|
||||
m_pos = tokenEnd + (m_nextTokenType == Type::Tag ? 1 : 0);
|
||||
}
|
||||
|
||||
void TextHandler::nextTokenType()
|
||||
{
|
||||
if (m_nextTokenType == Type::Tag && getTagType() == QStringLiteral("code") && !isCloseTag()
|
||||
&& m_dataBuffer.indexOf(QStringLiteral("</code>"), m_pos) != m_pos) {
|
||||
m_nextTokenType = Type::TextCode;
|
||||
} else if (m_dataBuffer[m_pos] == u'<' && m_dataBuffer[m_pos + 1] != u' ') {
|
||||
m_nextTokenType = Type::Tag;
|
||||
} else {
|
||||
m_nextTokenType = Type::Text;
|
||||
}
|
||||
}
|
||||
|
||||
QString TextHandler::getTagType() const
|
||||
{
|
||||
const int tagTypeStart = m_nextToken[1] == u'/' ? 2 : 1;
|
||||
const int tagTypeEnd = m_nextToken.indexOf(TextRegex::endTagType, tagTypeStart);
|
||||
return m_nextToken.mid(tagTypeStart, tagTypeEnd - tagTypeStart);
|
||||
}
|
||||
|
||||
bool TextHandler::isCloseTag() const
|
||||
{
|
||||
return m_nextToken[1] == u'/';
|
||||
}
|
||||
|
||||
QString TextHandler::getAttributeType(const QString &string)
|
||||
{
|
||||
if (!string.contains(u'=')) {
|
||||
return string;
|
||||
}
|
||||
const int equalsPos = string.indexOf(u'=');
|
||||
return string.left(equalsPos);
|
||||
}
|
||||
|
||||
QString TextHandler::getAttributeData(const QString &string)
|
||||
{
|
||||
if (!string.contains(u'=')) {
|
||||
return QStringLiteral();
|
||||
}
|
||||
const int equalsPos = string.indexOf(u'=');
|
||||
return string.right(string.length() - equalsPos - 1);
|
||||
}
|
||||
|
||||
bool TextHandler::isAllowedTag(const QString &type)
|
||||
{
|
||||
return allowedTags.contains(type);
|
||||
}
|
||||
|
||||
bool TextHandler::isAllowedAttribute(const QString &tag, const QString &attribute)
|
||||
{
|
||||
return allowedAttributes[tag].contains(attribute);
|
||||
}
|
||||
|
||||
bool TextHandler::isAllowedLink(const QString &link, bool isImg)
|
||||
{
|
||||
const QUrl linkUrl = QUrl(link);
|
||||
|
||||
if (isImg) {
|
||||
#ifdef QUOTIENT_07
|
||||
return !linkUrl.isRelative() && linkUrl.scheme() == "mxc";
|
||||
#else
|
||||
return !linkUrl.isRelative() && (linkUrl.scheme() == "mxc" || linkUrl.scheme() == "https");
|
||||
#endif
|
||||
} else {
|
||||
return !linkUrl.isRelative() && allowedLinkSchemes.contains(linkUrl.scheme());
|
||||
}
|
||||
}
|
||||
|
||||
QString TextHandler::cleanAttributes(const QString &tag, const QString &tagString)
|
||||
{
|
||||
int nextAttributeIndex = tagString.indexOf(u' ', 1);
|
||||
|
||||
if (nextAttributeIndex != -1) {
|
||||
QString outputString = tagString.left(nextAttributeIndex);
|
||||
QString nextAttribute;
|
||||
int nextSpaceIndex;
|
||||
nextAttributeIndex += 1;
|
||||
|
||||
while (nextAttributeIndex < tagString.length()) {
|
||||
nextSpaceIndex = tagString.indexOf(TextRegex::endTagType, nextAttributeIndex);
|
||||
if (nextSpaceIndex == -1) {
|
||||
nextSpaceIndex = tagString.length();
|
||||
}
|
||||
nextAttribute = tagString.mid(nextAttributeIndex, nextSpaceIndex - nextAttributeIndex);
|
||||
|
||||
if (isAllowedAttribute(tag, getAttributeType(nextAttribute))) {
|
||||
if (tag == QStringLiteral("img") && getAttributeType(nextAttribute) == QStringLiteral("src")) {
|
||||
QString attributeData = getAttributeData(nextAttribute).remove(u'"');
|
||||
if (isAllowedLink(attributeData, true)) {
|
||||
outputString.append(u' ' + nextAttribute);
|
||||
}
|
||||
} else if (tag == u'a' && getAttributeType(nextAttribute) == QStringLiteral("href")) {
|
||||
if (isAllowedLink(getAttributeData(nextAttribute).remove(u'"'))) {
|
||||
outputString.append(u' ' + nextAttribute);
|
||||
}
|
||||
} else if (tag == QStringLiteral("code") && getAttributeType(nextAttribute) == QStringLiteral("class")) {
|
||||
if (getAttributeData(nextAttribute).remove(u'"').startsWith(QStringLiteral("language-"))) {
|
||||
outputString.append(u' ' + nextAttribute);
|
||||
}
|
||||
} else {
|
||||
outputString.append(u' ' + nextAttribute);
|
||||
}
|
||||
}
|
||||
nextAttributeIndex = nextSpaceIndex + 1;
|
||||
}
|
||||
|
||||
outputString += u'>';
|
||||
return outputString;
|
||||
}
|
||||
|
||||
return tagString;
|
||||
}
|
||||
|
||||
QString TextHandler::markdownToHTML(const QString &markdown)
|
||||
{
|
||||
const auto str = markdown.toUtf8();
|
||||
char *tmp_buf = cmark_markdown_to_html(str.constData(), str.size(), CMARK_OPT_HARDBREAKS | CMARK_OPT_UNSAFE);
|
||||
|
||||
const std::string html(tmp_buf);
|
||||
|
||||
free(tmp_buf);
|
||||
|
||||
auto result = QString::fromStdString(html).trimmed();
|
||||
|
||||
result.replace(QStringLiteral("<!-- raw HTML omitted -->"), QString());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* TODO: make this more intelligent currently other characters are not escaped
|
||||
* especially & as this can conflict with the cmark markdown to html conversion
|
||||
* which already escapes characters in code blocks. The < > still need to be handled
|
||||
* when the user manually types in the html.
|
||||
*/
|
||||
QString TextHandler::escapeHtml(QString stringIn)
|
||||
{
|
||||
stringIn.replace(u'<', QStringLiteral("<"));
|
||||
stringIn.replace(u'>', QStringLiteral(">"));
|
||||
return stringIn;
|
||||
}
|
||||
|
||||
QString TextHandler::unescapeHtml(QString stringIn)
|
||||
{
|
||||
// For those situations where brackets in code block get double escaped
|
||||
stringIn.replace(QStringLiteral("&lt;"), QStringLiteral("<"));
|
||||
stringIn.replace(QStringLiteral("&gt;"), QStringLiteral(">"));
|
||||
stringIn.replace(QStringLiteral("<"), QStringLiteral("<"));
|
||||
stringIn.replace(QStringLiteral(">"), QStringLiteral(">"));
|
||||
stringIn.replace(QStringLiteral("&"), QStringLiteral("&"));
|
||||
return stringIn;
|
||||
}
|
||||
|
||||
QString TextHandler::linkifyUrls(QString stringIn)
|
||||
{
|
||||
stringIn = stringIn.replace(TextRegex::mxId, QStringLiteral(R"(\1<a href="https://matrix.to/#/\2">\2</a>)"));
|
||||
stringIn.replace(TextRegex::fullUrl, QStringLiteral(R"(<a href="\1">\1</a>)"));
|
||||
stringIn = stringIn.replace(TextRegex::emailAddress, QStringLiteral(R"(<a href="mailto:\2">\1\2</a>)"));
|
||||
return stringIn;
|
||||
}
|
||||
131
src/texthandler.h
Normal file
131
src/texthandler.h
Normal file
@@ -0,0 +1,131 @@
|
||||
// SPDX-FileCopyrightText: 2023 James Graham <james.h.graham@protonmail.com>
|
||||
// SPDX-License-Identifier: GPL-2.0-only OR GPL-3.0-only OR LicenseRef-KDE-Accepted-GPL
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <QHash>
|
||||
#include <QRegularExpression>
|
||||
#include <QString>
|
||||
#include <QStringList>
|
||||
|
||||
#include "neochatroom.h"
|
||||
|
||||
namespace TextRegex
|
||||
{
|
||||
static const QRegularExpression endTagType{QStringLiteral("(>| )")};
|
||||
static const QRegularExpression removeReply{QStringLiteral("> <.*?>.*?\\n\\n"), QRegularExpression::DotMatchesEverythingOption};
|
||||
static const QRegularExpression removeRichReply{QStringLiteral("<mx-reply>.*?</mx-reply>"), QRegularExpression::DotMatchesEverythingOption};
|
||||
static const QRegularExpression codePill{QStringLiteral("<pre><code[^>]*>(.*?)</code></pre>"), QRegularExpression::DotMatchesEverythingOption};
|
||||
static const QRegularExpression userPill{QStringLiteral("(<a href=\"https://matrix.to/#/@.*?:.*?\">.*?</a>)"), QRegularExpression::DotMatchesEverythingOption};
|
||||
static const QRegularExpression strikethrough{QStringLiteral("<del>(.*?)</del>"), QRegularExpression::DotMatchesEverythingOption};
|
||||
static const QRegularExpression mxcImage{QStringLiteral(R"AAA(<img(.*?)src="mxc:\/\/(.*?)\/(.*?)"(.*?)>)AAA")};
|
||||
static const QRegularExpression fullUrl(
|
||||
QStringLiteral(
|
||||
R"(<a.*?<\/a>(*SKIP)(*F)|\b((www\.(?!\.)(?!(\w|\.|-)+@)|(https?|ftp):(//)?\w|(magnet|matrix):)(&(?![lg]t;)|[^&\s<>'"])+(&(?![lg]t;)|[^&!,.\s<>'"\]):])))"),
|
||||
QRegularExpression::CaseInsensitiveOption | QRegularExpression::UseUnicodePropertiesOption);
|
||||
static const QRegularExpression emailAddress(QStringLiteral(R"(<a.*?<\/a>(*SKIP)(*F)|\b(mailto:)?((\w|\.|-)+@(\w|\.|-)+\.\w+\b))"),
|
||||
QRegularExpression::CaseInsensitiveOption | QRegularExpression::UseUnicodePropertiesOption);
|
||||
static const QRegularExpression mxId(QStringLiteral(R"((^|[][[:space:](){}`'";])([!#@][-a-z0-9_=#/.]{1,252}:\w(?:\w|\.|-)*\.\w+(?::\d{1,5})?))"),
|
||||
QRegularExpression::CaseInsensitiveOption | QRegularExpression::UseUnicodePropertiesOption);
|
||||
}
|
||||
|
||||
/**
|
||||
* @class TextHandler
|
||||
*
|
||||
* This class is designed to handle the text of both incoming and outgoing messages.
|
||||
*
|
||||
* This includes converting markdown to html and removing any html tags that shouldn't
|
||||
* be present as per the matrix spec
|
||||
* (https://spec.matrix.org/v1.5/client-server-api/#mroommessage-msgtypes).
|
||||
*/
|
||||
class TextHandler
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* @brief List of token types
|
||||
*/
|
||||
enum Type {
|
||||
Text, /*!< Anything not a tag that doesn't have special handling */
|
||||
Tag, /*!< For any generic tag that doesn't have special handling */
|
||||
TextCode, /*!< Text between code tags */
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Get the string being handled.
|
||||
*
|
||||
* Setting new data resets the TextHandler.
|
||||
*/
|
||||
QString data() const;
|
||||
|
||||
/**
|
||||
* @brief Set the string being handled.
|
||||
*
|
||||
* @note The TextHandler doesn't modify the input data variable so the unhandled
|
||||
* text can always be retrieved.
|
||||
*/
|
||||
void setData(const QString &string);
|
||||
|
||||
/**
|
||||
* @brief Handle the text for a message that is being sent.
|
||||
*/
|
||||
QString handleSendText();
|
||||
|
||||
/**
|
||||
* @brief Handle the text as a rich output for a message being received.
|
||||
*
|
||||
* The function does the following:
|
||||
* - Removes invalid html tags and attributes
|
||||
* - Strips any reply from the message
|
||||
* - Formats user mentions
|
||||
*
|
||||
* @note In this case the rich text refers to the output format. The input
|
||||
* can be in either and the parameter inputFormat just needs to be set
|
||||
* appropriately.
|
||||
*/
|
||||
QString handleRecieveRichText(Qt::TextFormat inputFormat = Qt::RichText,
|
||||
const NeoChatRoom *room = nullptr,
|
||||
const Quotient::RoomEvent *event = nullptr,
|
||||
bool stripNewlines = false);
|
||||
|
||||
/**
|
||||
* @brief Handle the text as a plain output for a message being received.
|
||||
*
|
||||
* The function does the following:
|
||||
* - Removes all html tags and attributes (except inside of code tags)
|
||||
* - Strips any reply from the message
|
||||
*
|
||||
* @note In this case the plain text refers to the output format. The input
|
||||
* can be in either and the parameter inputFormat just needs to be set
|
||||
* appropriately.
|
||||
*
|
||||
* @warning The output of this function should NEVER be input into a rich text
|
||||
* control. It will try to preserve < and > in the plain string which
|
||||
* could be malicious tags if the control uses rich text format.
|
||||
*/
|
||||
QString handleRecievePlainText(Qt::TextFormat inputFormat = Qt::PlainText, const bool &stripNewlines = false);
|
||||
|
||||
private:
|
||||
QString m_data;
|
||||
|
||||
QString m_dataBuffer;
|
||||
int m_pos;
|
||||
Type m_nextTokenType;
|
||||
QString m_nextToken;
|
||||
|
||||
void next();
|
||||
void nextTokenType();
|
||||
|
||||
QString getTagType() const;
|
||||
bool isCloseTag() const;
|
||||
QString getAttributeType(const QString &string);
|
||||
QString getAttributeData(const QString &string);
|
||||
bool isAllowedTag(const QString &type);
|
||||
bool isAllowedAttribute(const QString &tag, const QString &attribute);
|
||||
bool isAllowedLink(const QString &link, bool isImg = false);
|
||||
QString cleanAttributes(const QString &tag, const QString &tagString);
|
||||
|
||||
QString markdownToHTML(const QString &markdown);
|
||||
QString escapeHtml(QString stringIn);
|
||||
QString unescapeHtml(QString stringIn);
|
||||
QString linkifyUrls(QString stringIn);
|
||||
};
|
||||
@@ -1,4 +0,0 @@
|
||||
// SPDX-FileCopyrightText: 2018 Black Hat <bhat@encom.eu.org>
|
||||
// SPDX-License-Identifier: GPL-3.0-only
|
||||
|
||||
#include "utils.h"
|
||||
16
src/utils.h
16
src/utils.h
@@ -1,16 +0,0 @@
|
||||
// SPDX-FileCopyrightText: 2018 Black Hat <bhat@encom.eu.org>
|
||||
// SPDX-License-Identifier: GPL-3.0-only
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <QRegularExpression>
|
||||
|
||||
namespace utils
|
||||
{
|
||||
static const QRegularExpression removeReplyRegex{"> <.*?>.*?\\n\\n", QRegularExpression::DotMatchesEverythingOption};
|
||||
static const QRegularExpression removeRichReplyRegex{"<mx-reply>.*?</mx-reply>", QRegularExpression::DotMatchesEverythingOption};
|
||||
static const QRegularExpression codePillRegExp{"<pre><code[^>]*>(.*?)</code></pre>", QRegularExpression::DotMatchesEverythingOption};
|
||||
static const QRegularExpression userPillRegExp{"(<a href=\"https://matrix.to/#/@.*?:.*?\">.*?</a>)", QRegularExpression::DotMatchesEverythingOption};
|
||||
static const QRegularExpression strikethroughRegExp{"<del>(.*?)</del>", QRegularExpression::DotMatchesEverythingOption};
|
||||
static const QRegularExpression mxcImageRegExp{R"AAA(<img(.*?)src="mxc:\/\/(.*?)\/(.*?)"(.*?)>)AAA"};
|
||||
}
|
||||
Reference in New Issue
Block a user