2022-04-05 15:04:26 -04:00
|
|
|
/*
|
2023-02-06 01:47:15 -05:00
|
|
|
* Copyright (C) 2021-2023 Savoir-faire Linux Inc.
|
2021-07-06 10:20:46 -04:00
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation; either version 3 of the License, or
|
|
|
|
* (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "previewengine.h"
|
|
|
|
|
2023-03-20 16:26:37 -04:00
|
|
|
#include <QRegularExpression>
|
2023-01-06 14:07:33 -05:00
|
|
|
|
2023-05-18 10:06:29 -04:00
|
|
|
const QRegularExpression PreviewEngine::newlineRe("\\r?\\n");
|
2022-04-05 15:04:26 -04:00
|
|
|
|
2023-03-20 16:26:37 -04:00
|
|
|
PreviewEngine::PreviewEngine(ConnectivityMonitor* cm, QObject* parent)
|
|
|
|
: NetworkManager(cm, parent)
|
|
|
|
, htmlParser_(new HtmlParser(this))
|
|
|
|
{
|
|
|
|
// Connect on a queued connection to avoid blocking caller thread.
|
|
|
|
connect(this, &PreviewEngine::parseLink, this, &PreviewEngine::onParseLink, Qt::QueuedConnection);
|
|
|
|
}
|
2022-04-05 15:04:26 -04:00
|
|
|
|
2023-03-20 16:26:37 -04:00
|
|
|
QString
|
2023-07-06 18:55:14 -04:00
|
|
|
PreviewEngine::getTagContent(const QList<QString>& tags, const QString& value)
|
2023-03-20 16:26:37 -04:00
|
|
|
{
|
|
|
|
Q_FOREACH (auto tag, tags) {
|
|
|
|
const QRegularExpression re("(property|name)=\"(og:|twitter:|)" + value
|
|
|
|
+ "\".*?content=\"([^\"]+)\"");
|
|
|
|
const auto match = re.match(tag.remove(newlineRe));
|
|
|
|
if (match.hasMatch()) {
|
|
|
|
return match.captured(3);
|
|
|
|
}
|
2022-04-05 15:04:26 -04:00
|
|
|
}
|
2023-03-20 16:26:37 -04:00
|
|
|
return QString {};
|
|
|
|
}
|
2021-07-06 10:20:46 -04:00
|
|
|
|
2023-03-20 16:26:37 -04:00
|
|
|
QString
|
2023-07-06 18:55:14 -04:00
|
|
|
PreviewEngine::getTitle(const QList<QString>& metaTags)
|
2021-07-06 10:20:46 -04:00
|
|
|
{
|
2023-03-20 16:26:37 -04:00
|
|
|
// Try with opengraph/twitter props
|
2023-07-06 18:55:14 -04:00
|
|
|
QString title = getTagContent(metaTags, "title");
|
2023-03-20 16:26:37 -04:00
|
|
|
if (title.isEmpty()) { // Try with title tag
|
2023-07-06 18:55:14 -04:00
|
|
|
title = htmlParser_->getTagInnerHtml(TidyTag_TITLE);
|
2023-03-20 16:26:37 -04:00
|
|
|
}
|
|
|
|
if (title.isEmpty()) { // Try with h1 tag
|
2023-07-06 18:55:14 -04:00
|
|
|
title = htmlParser_->getTagInnerHtml(TidyTag_H1);
|
2023-03-20 16:26:37 -04:00
|
|
|
}
|
|
|
|
if (title.isEmpty()) { // Try with h2 tag
|
2023-07-06 18:55:14 -04:00
|
|
|
title = htmlParser_->getTagInnerHtml(TidyTag_H2);
|
2023-03-20 16:26:37 -04:00
|
|
|
}
|
|
|
|
return title;
|
2021-07-06 10:20:46 -04:00
|
|
|
}
|
|
|
|
|
2023-03-20 16:26:37 -04:00
|
|
|
QString
|
2023-07-06 18:55:14 -04:00
|
|
|
PreviewEngine::getDescription(const QList<QString>& metaTags)
|
2021-07-06 10:20:46 -04:00
|
|
|
{
|
2023-03-20 16:26:37 -04:00
|
|
|
// Try with og/twitter props
|
2023-07-06 18:55:14 -04:00
|
|
|
QString desc = getTagContent(metaTags, "description");
|
|
|
|
if (desc.isEmpty()) { // Try with first paragraph
|
|
|
|
desc = htmlParser_->getTagInnerHtml(TidyTag_P);
|
2023-03-20 16:26:37 -04:00
|
|
|
}
|
2023-07-06 18:55:14 -04:00
|
|
|
return desc;
|
2021-07-06 10:20:46 -04:00
|
|
|
}
|
|
|
|
|
2023-03-20 16:26:37 -04:00
|
|
|
QString
|
2023-07-06 18:55:14 -04:00
|
|
|
PreviewEngine::getImage(const QList<QString>& metaTags)
|
2022-05-06 15:06:05 -04:00
|
|
|
{
|
2023-03-20 16:26:37 -04:00
|
|
|
// Try with og/twitter props
|
2023-07-06 18:55:14 -04:00
|
|
|
QString image = getTagContent(metaTags, "image");
|
2023-03-20 16:26:37 -04:00
|
|
|
if (image.isEmpty()) { // Try with href of link tag (rel="image_src")
|
2023-07-06 18:55:14 -04:00
|
|
|
auto tagsNodes = htmlParser_->getTagsNodes({TidyTag_LINK});
|
|
|
|
Q_FOREACH (auto tag, tagsNodes[TidyTag_LINK]) {
|
|
|
|
QString href = htmlParser_->getNodeAttr(tag, TidyAttr_HREF);
|
|
|
|
if (!href.isEmpty()) {
|
|
|
|
return href;
|
2023-03-20 16:26:37 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return image;
|
2022-05-06 15:06:05 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2023-03-20 16:26:37 -04:00
|
|
|
PreviewEngine::onParseLink(const QString& messageId, const QString& link)
|
2022-05-06 15:06:05 -04:00
|
|
|
{
|
2023-03-20 16:26:37 -04:00
|
|
|
sendGetRequest(QUrl(link), [this, messageId, link](const QByteArray& html) {
|
|
|
|
htmlParser_->parseHtmlString(html);
|
2023-07-06 18:55:14 -04:00
|
|
|
auto tagsNodes = htmlParser_->getTagsNodes({TidyTag_META});
|
|
|
|
auto metaTagNodes = tagsNodes[TidyTag_META];
|
|
|
|
QList<QString> metaTags;
|
|
|
|
Q_FOREACH (auto tag, metaTagNodes) {
|
|
|
|
metaTags.append(htmlParser_->getNodeText(tag));
|
|
|
|
}
|
2023-03-20 16:26:37 -04:00
|
|
|
QString domain = QUrl(link).host();
|
|
|
|
if (domain.isEmpty()) {
|
|
|
|
domain = link;
|
|
|
|
}
|
|
|
|
Q_EMIT infoReady(messageId,
|
|
|
|
{{"title", getTitle(metaTags)},
|
|
|
|
{"description", getDescription(metaTags)},
|
|
|
|
{"image", getImage(metaTags)},
|
|
|
|
{"url", link},
|
|
|
|
{"domain", domain}});
|
|
|
|
});
|
2022-05-06 15:06:05 -04:00
|
|
|
}
|