2010-05-11 20:17:10 -04:00
|
|
|
/****************************************************************
|
|
|
|
* This file is distributed under the following license:
|
|
|
|
*
|
|
|
|
* Copyright (c) 2010, Thomas Kister
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU General Public License
|
|
|
|
* as published by the Free Software Foundation; either version 2
|
|
|
|
* of the License, or (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program; if not, write to the Free Software
|
|
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
|
|
|
* Boston, MA 02110-1301, USA.
|
|
|
|
****************************************************************/
|
|
|
|
|
2011-03-06 07:58:18 -05:00
|
|
|
#include <QTextBrowser>
|
2010-05-11 20:17:10 -04:00
|
|
|
#include "HandleRichText.h"
|
2011-05-01 18:26:41 -04:00
|
|
|
#include "gui/RetroShareLink.h"
|
2010-05-11 20:17:10 -04:00
|
|
|
|
2011-09-29 05:20:09 -04:00
|
|
|
#include <iostream>
|
|
|
|
|
2010-12-30 12:09:32 -05:00
|
|
|
namespace RsHtml {
|
2010-05-11 20:17:10 -04:00
|
|
|
|
2010-12-30 12:09:32 -05:00
|
|
|
EmbedInHtmlImg defEmbedImg;
|
2010-05-11 20:17:10 -04:00
|
|
|
|
|
|
|
void EmbedInHtmlImg::InitFromAwkwardHash(const QHash< QString, QString >& hash)
|
|
|
|
{
|
|
|
|
QString newRE;
|
|
|
|
for(QHash<QString,QString>::const_iterator it = hash.begin(); it != hash.end(); ++it)
|
|
|
|
foreach(QString smile, it.key().split("|")) {
|
2010-09-15 10:32:09 -04:00
|
|
|
if (smile.isEmpty()) {
|
|
|
|
continue;
|
|
|
|
}
|
2010-05-11 20:17:10 -04:00
|
|
|
smileys.insert(smile, it.value());
|
|
|
|
newRE += "(" + QRegExp::escape(smile) + ")|";
|
|
|
|
}
|
|
|
|
newRE.chop(1); // remove last |
|
|
|
|
myRE.setPattern(newRE);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Parses a DOM tree and replaces text by HTML tags.
|
|
|
|
* The tree is traversed depth-first, but only through children of Element type
|
|
|
|
* nodes. Any other kind of node is terminal.
|
|
|
|
*
|
|
|
|
* If the node is of type Text, its data is checked against the user-provided
|
|
|
|
* regular expression. If there is a match, the text is cut in three parts: the
|
|
|
|
* preceding part that will be inserted before, the part to be replaced, and the
|
|
|
|
* following part which will be itself checked against the regular expression.
|
|
|
|
*
|
|
|
|
* The part to be replaced is sent to a user-provided functor that will create
|
|
|
|
* the necessary embedding and return a new Element node to be inserted.
|
|
|
|
*
|
|
|
|
* @param[in] doc The whole DOM tree, necessary to create new nodes
|
|
|
|
* @param[in,out] currentElement The current node (which is of type Element)
|
|
|
|
* @param[in] embedInfos The regular expression and the type of embedding to use
|
|
|
|
*/
|
2011-05-15 16:21:14 -04:00
|
|
|
static void embedHtml(QDomDocument& doc, QDomElement& currentElement, EmbedInHtml& embedInfos)
|
2010-05-11 20:17:10 -04:00
|
|
|
{
|
|
|
|
if(embedInfos.myRE.pattern().length() == 0) // we'll get stuck with an empty regexp
|
|
|
|
return;
|
|
|
|
|
|
|
|
QDomNodeList children = currentElement.childNodes();
|
|
|
|
for(uint index = 0; index < children.length(); index++) {
|
2011-05-01 18:26:41 -04:00
|
|
|
QDomNode node = children.item(index);
|
|
|
|
if(node.isElement()) {
|
2010-05-11 20:17:10 -04:00
|
|
|
// child is an element, we skip it if it's an <a> tag
|
2011-05-01 18:26:41 -04:00
|
|
|
QDomElement element = node.toElement();
|
|
|
|
if(element.tagName().toLower() == "head") {
|
|
|
|
// skip it
|
|
|
|
} else if (element.tagName().toLower() == "a") {
|
|
|
|
// skip it, but add title if not available
|
|
|
|
if (element.attribute("title").isEmpty()) {
|
|
|
|
RetroShareLink link(element.attribute("href"));
|
|
|
|
QString title = link.title();
|
|
|
|
if (!title.isEmpty()) {
|
|
|
|
element.setAttribute("title", title);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
embedHtml(doc, element, embedInfos);
|
|
|
|
}
|
2010-05-11 20:17:10 -04:00
|
|
|
}
|
2011-05-01 18:26:41 -04:00
|
|
|
else if(node.isText()) {
|
2010-05-11 20:17:10 -04:00
|
|
|
// child is a text, we parse it
|
2011-05-01 18:26:41 -04:00
|
|
|
QString tempText = node.toText().data();
|
2010-05-11 20:17:10 -04:00
|
|
|
if(embedInfos.myRE.indexIn(tempText) == -1)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
// there is at least one link inside, we start replacing
|
|
|
|
int currentPos = 0;
|
|
|
|
int nextPos = 0;
|
|
|
|
while((nextPos = embedInfos.myRE.indexIn(tempText, currentPos)) != -1) {
|
|
|
|
// if nextPos == 0 it means the text begins by a link
|
|
|
|
if(nextPos > 0) {
|
|
|
|
QDomText textPart = doc.createTextNode(tempText.mid(currentPos, nextPos - currentPos));
|
2011-05-01 18:26:41 -04:00
|
|
|
currentElement.insertBefore(textPart, node);
|
2010-05-11 20:17:10 -04:00
|
|
|
index++;
|
|
|
|
}
|
|
|
|
|
|
|
|
// inserted tag
|
|
|
|
QDomElement insertedTag;
|
|
|
|
switch(embedInfos.myType) {
|
2011-05-01 18:26:41 -04:00
|
|
|
case Ahref:
|
|
|
|
{
|
|
|
|
insertedTag = doc.createElement("a");
|
|
|
|
insertedTag.setAttribute("href", embedInfos.myRE.cap(0));
|
|
|
|
|
|
|
|
RetroShareLink link(embedInfos.myRE.cap(0));
|
|
|
|
QString title = link.title();
|
|
|
|
if (!title.isEmpty()) {
|
|
|
|
insertedTag.setAttribute("title", title);
|
|
|
|
}
|
|
|
|
|
|
|
|
insertedTag.appendChild(doc.createTextNode(embedInfos.myRE.cap(0)));
|
|
|
|
}
|
2010-05-11 20:17:10 -04:00
|
|
|
break;
|
2011-05-01 18:26:41 -04:00
|
|
|
case Img:
|
2010-05-11 20:17:10 -04:00
|
|
|
{
|
2011-05-01 18:26:41 -04:00
|
|
|
insertedTag = doc.createElement("img");
|
2010-05-11 20:17:10 -04:00
|
|
|
const EmbedInHtmlImg& embedImg = static_cast<const EmbedInHtmlImg&>(embedInfos);
|
|
|
|
insertedTag.setAttribute("src", embedImg.smileys[embedInfos.myRE.cap(0)]);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2011-05-01 18:26:41 -04:00
|
|
|
currentElement.insertBefore(insertedTag, node);
|
2010-05-11 20:17:10 -04:00
|
|
|
|
|
|
|
currentPos = nextPos + embedInfos.myRE.matchedLength();
|
|
|
|
index++;
|
|
|
|
}
|
|
|
|
|
|
|
|
// text after the last link, only if there's one, don't touch the index
|
2011-05-01 18:26:41 -04:00
|
|
|
// otherwise decrement the index because we're going to remove node
|
2010-05-11 20:17:10 -04:00
|
|
|
if(currentPos < tempText.length()) {
|
|
|
|
QDomText textPart = doc.createTextNode(tempText.mid(currentPos));
|
2011-05-01 18:26:41 -04:00
|
|
|
currentElement.insertBefore(textPart, node);
|
2010-05-11 20:17:10 -04:00
|
|
|
}
|
|
|
|
else
|
|
|
|
index--;
|
|
|
|
|
2011-05-01 18:26:41 -04:00
|
|
|
currentElement.removeChild(node);
|
2010-05-11 20:17:10 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-05-15 16:21:14 -04:00
|
|
|
QString formatText(const QString &text, unsigned int flag)
|
|
|
|
{
|
2011-06-07 18:28:07 -04:00
|
|
|
if (flag == 0 || text.isEmpty()) {
|
2011-05-15 16:21:14 -04:00
|
|
|
// nothing to do
|
|
|
|
return text;
|
|
|
|
}
|
|
|
|
|
|
|
|
QDomDocument doc;
|
|
|
|
if (doc.setContent(text) == false) {
|
|
|
|
// convert text with QTextBrowser
|
|
|
|
QTextBrowser textBrowser;
|
|
|
|
textBrowser.setText(text);
|
|
|
|
doc.setContent(textBrowser.toHtml());
|
|
|
|
}
|
|
|
|
|
|
|
|
QDomElement body = doc.documentElement();
|
|
|
|
if (flag & RSHTML_FORMATTEXT_EMBED_SMILEYS) {
|
|
|
|
embedHtml(doc, body, defEmbedImg);
|
|
|
|
}
|
|
|
|
if (flag & RSHTML_FORMATTEXT_EMBED_LINKS) {
|
|
|
|
EmbedInHtmlAhref defEmbedAhref;
|
|
|
|
embedHtml(doc, body, defEmbedAhref);
|
|
|
|
}
|
|
|
|
|
2012-01-30 19:06:24 -05:00
|
|
|
QString formattedText = doc.toString(-1); // -1 removes any annoying carriage return misinterpreted by QTextEdit
|
2012-03-31 11:20:19 -04:00
|
|
|
|
|
|
|
unsigned int optimizeFlag = 0;
|
|
|
|
if (flag & RSHTML_FORMATTEXT_REMOVE_FONT) {
|
|
|
|
optimizeFlag |= RSHTML_OPTIMIZEHTML_REMOVE_FONT;
|
|
|
|
}
|
|
|
|
if (flag & RSHTML_FORMATTEXT_REMOVE_COLOR) {
|
|
|
|
optimizeFlag |= RSHTML_OPTIMIZEHTML_REMOVE_COLOR;
|
|
|
|
}
|
|
|
|
optimizeHtml(formattedText, optimizeFlag);
|
2012-01-30 19:06:24 -05:00
|
|
|
|
|
|
|
return formattedText;
|
2011-05-15 16:21:14 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
static void findElements(QDomDocument& doc, QDomElement& currentElement, const QString& nodeName, const QString& nodeAttribute, QStringList &elements)
|
|
|
|
{
|
|
|
|
if(nodeName.isEmpty()) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
QDomNodeList children = currentElement.childNodes();
|
|
|
|
for (uint index = 0; index < children.length(); index++) {
|
|
|
|
QDomNode node = children.item(index);
|
|
|
|
if (node.isElement()) {
|
|
|
|
QDomElement element = node.toElement();
|
|
|
|
if (QString::compare(element.tagName(), nodeName, Qt::CaseInsensitive) == 0) {
|
|
|
|
if (nodeAttribute.isEmpty()) {
|
|
|
|
// use text
|
|
|
|
elements.append(element.text());
|
|
|
|
} else {
|
|
|
|
QString attribute = element.attribute(nodeAttribute);
|
|
|
|
if (attribute.isEmpty() == false) {
|
|
|
|
elements.append(attribute);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
findElements(doc, element, nodeName, nodeAttribute, elements);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bool findAnchors(const QString &text, QStringList& urls)
|
|
|
|
{
|
|
|
|
QDomDocument doc;
|
|
|
|
if (doc.setContent(text) == false) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
QDomElement body = doc.documentElement();
|
|
|
|
findElements(doc, body, "a", "href", urls);
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2012-01-30 19:06:24 -05:00
|
|
|
static void removeElement(QDomElement& parentElement, QDomElement& element)
|
|
|
|
{
|
|
|
|
QDomNodeList children = element.childNodes();
|
|
|
|
while (children.length() > 0) {
|
|
|
|
QDomNode childElement = element.removeChild(children.item(children.length() - 1));
|
|
|
|
parentElement.insertAfter(childElement, element);
|
|
|
|
}
|
|
|
|
parentElement.removeChild(element);
|
|
|
|
}
|
|
|
|
|
2012-03-31 11:20:19 -04:00
|
|
|
static void optimizeHtml(QDomDocument& doc, QDomElement& currentElement, unsigned int flag)
|
2011-09-29 05:20:09 -04:00
|
|
|
{
|
2012-01-30 19:06:24 -05:00
|
|
|
if (currentElement.tagName().toLower() == "html") {
|
|
|
|
// change <html> to <span>
|
|
|
|
currentElement.setTagName("span");
|
|
|
|
}
|
|
|
|
|
|
|
|
QDomNode styleNode;
|
|
|
|
bool addBR = false;
|
|
|
|
|
2011-09-29 05:20:09 -04:00
|
|
|
QDomNodeList children = currentElement.childNodes();
|
|
|
|
for (uint index = 0; index < children.length(); ) {
|
|
|
|
QDomNode node = children.item(index);
|
2012-01-30 19:06:24 -05:00
|
|
|
|
|
|
|
// compress style attribute
|
|
|
|
styleNode = node.attributes().namedItem("style");
|
|
|
|
if (styleNode.isAttr()) {
|
2012-03-31 11:20:19 -04:00
|
|
|
QDomAttr styleAttr = styleNode.toAttr();
|
|
|
|
QString style = styleAttr.value().simplified();
|
|
|
|
style.replace("margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px;", "margin:0px 0px 0px 0px;");
|
|
|
|
style.replace("; ", ";");
|
|
|
|
|
|
|
|
if (flag & (RSHTML_OPTIMIZEHTML_REMOVE_FONT | RSHTML_OPTIMIZEHTML_REMOVE_COLOR)) {
|
|
|
|
QStringList styles = style.split(';');
|
|
|
|
style.clear();
|
|
|
|
foreach (QString pair, styles) {
|
|
|
|
if (!pair.trimmed().isEmpty()) {
|
|
|
|
QStringList keyvalue = pair.split(':');
|
|
|
|
if (keyvalue.length() == 2) {
|
|
|
|
QString key = keyvalue.at(0).trimmed();
|
|
|
|
|
|
|
|
if (flag & RSHTML_OPTIMIZEHTML_REMOVE_FONT) {
|
|
|
|
if (key == "font-family" ||
|
|
|
|
key == "font-size" ||
|
|
|
|
key == "font-weight" ||
|
|
|
|
key == "font-style") {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (flag & RSHTML_OPTIMIZEHTML_REMOVE_COLOR) {
|
|
|
|
if (key == "color") {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
style += key + ":" + keyvalue.at(1).trimmed() + ";";
|
|
|
|
} else {
|
|
|
|
style += pair + ";";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (style.isEmpty()) {
|
|
|
|
node.attributes().removeNamedItem("style");
|
|
|
|
styleNode.clear();
|
|
|
|
} else {
|
|
|
|
styleAttr.setValue(style);
|
|
|
|
}
|
2012-01-30 19:06:24 -05:00
|
|
|
}
|
|
|
|
|
2011-09-29 05:20:09 -04:00
|
|
|
if (node.isElement()) {
|
|
|
|
QDomElement element = node.toElement();
|
2012-01-30 19:06:24 -05:00
|
|
|
|
|
|
|
// not <p>
|
|
|
|
if (addBR && element.tagName().toLower() != "p") {
|
|
|
|
// add <br> after a removed <p> but not before a <p>
|
|
|
|
QDomElement elementBr = doc.createElement("br");
|
|
|
|
currentElement.insertBefore(elementBr, element);
|
|
|
|
addBR = false;
|
|
|
|
++index;
|
|
|
|
}
|
|
|
|
|
|
|
|
// <body>
|
|
|
|
if (element.tagName().toLower() == "body") {
|
|
|
|
if (element.attributes().length() == 0) {
|
|
|
|
// remove <body> without attributes
|
|
|
|
removeElement(currentElement, element);
|
|
|
|
// no ++index;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
// change <body> to <span>
|
|
|
|
element.setTagName("span");
|
|
|
|
}
|
|
|
|
|
|
|
|
// <head>
|
2011-09-29 05:20:09 -04:00
|
|
|
if (element.tagName().toLower() == "head") {
|
2012-01-30 19:06:24 -05:00
|
|
|
// remove <head>
|
2011-09-29 05:20:09 -04:00
|
|
|
currentElement.removeChild(node);
|
2012-01-30 19:06:24 -05:00
|
|
|
// no ++index;
|
2011-09-29 05:20:09 -04:00
|
|
|
continue;
|
|
|
|
}
|
2012-01-30 19:06:24 -05:00
|
|
|
|
|
|
|
// iterate children
|
2012-03-31 11:20:19 -04:00
|
|
|
optimizeHtml(doc, element, flag);
|
2012-01-30 19:06:24 -05:00
|
|
|
|
|
|
|
// <p>
|
|
|
|
if (element.tagName().toLower() == "p") {
|
|
|
|
// <p style="...">
|
|
|
|
//styleNode = element.attributes().namedItem("style");
|
|
|
|
if (element.attributes().size() == 1 && styleNode.isAttr()) {
|
2012-03-31 11:20:19 -04:00
|
|
|
QString value = styleNode.toAttr().value().simplified();
|
2012-01-30 19:06:24 -05:00
|
|
|
if (value == "margin:0px 0px 0px 0px;-qt-block-indent:0;text-indent:0px;" ||
|
|
|
|
value.startsWith("-qt-paragraph-type:empty;margin:0px 0px 0px 0px;-qt-block-indent:0;text-indent:0px;")) {
|
|
|
|
|
|
|
|
if (addBR) {
|
|
|
|
// add <br> after a removed <p> before a removed <p>
|
|
|
|
QDomElement elementBr = doc.createElement("br");
|
|
|
|
currentElement.insertBefore(elementBr, element);
|
|
|
|
++index;
|
|
|
|
}
|
|
|
|
// remove Qt standard <p> or empty <p>
|
|
|
|
index += element.childNodes().length();
|
|
|
|
removeElement(currentElement, element);
|
|
|
|
addBR = true;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
addBR = false;
|
|
|
|
}
|
2011-09-29 05:20:09 -04:00
|
|
|
}
|
|
|
|
++index;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-03-31 11:20:19 -04:00
|
|
|
void optimizeHtml(QTextEdit *textEdit, QString &text, unsigned int flag)
|
2011-09-29 05:20:09 -04:00
|
|
|
{
|
|
|
|
if (textEdit->toHtml() == QTextDocument(textEdit->toPlainText()).toHtml()) {
|
|
|
|
text = textEdit->toPlainText();
|
|
|
|
std::cerr << "Optimized text to " << text.length() << " bytes , instead of " << textEdit->toHtml().length() << std::endl;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
text = textEdit->toHtml();
|
|
|
|
|
2012-03-31 11:20:19 -04:00
|
|
|
optimizeHtml(text, flag);
|
2012-01-30 19:06:24 -05:00
|
|
|
}
|
|
|
|
|
2012-03-31 11:20:19 -04:00
|
|
|
void optimizeHtml(QString &text, unsigned int flag)
|
2012-01-30 19:06:24 -05:00
|
|
|
{
|
|
|
|
int originalLength = text.length();
|
|
|
|
|
2011-09-29 05:20:09 -04:00
|
|
|
// remove doctype
|
|
|
|
text.remove(QRegExp("<!DOCTYPE[^>]*>"));
|
|
|
|
|
|
|
|
QDomDocument doc;
|
|
|
|
if (doc.setContent(text) == false) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
QDomElement body = doc.documentElement();
|
2012-03-31 11:20:19 -04:00
|
|
|
optimizeHtml(doc, body, flag);
|
2011-09-29 05:20:09 -04:00
|
|
|
text = doc.toString(-1);
|
|
|
|
|
2012-01-30 19:06:24 -05:00
|
|
|
std::cerr << "Optimized text to " << text.length() << " bytes , instead of " << originalLength << std::endl;
|
2011-09-29 05:20:09 -04:00
|
|
|
}
|
|
|
|
|
2012-01-18 18:00:50 -05:00
|
|
|
QString toHtml(QString text, bool realHtml)
|
|
|
|
{
|
|
|
|
// replace "\n" from the optimized html with "<br>"
|
|
|
|
text.replace("\n", "<br>");
|
|
|
|
if (!realHtml) {
|
|
|
|
return text;
|
|
|
|
}
|
|
|
|
|
|
|
|
QTextDocument doc;
|
|
|
|
doc.setHtml(text);
|
|
|
|
return doc.toHtml();
|
|
|
|
}
|
|
|
|
|
2010-12-30 12:09:32 -05:00
|
|
|
} // namespace RsHtml
|