/**************************************************************** * This file is distributed under the following license: * * Copyright (c) 2010, Thomas Kister * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, * Boston, MA 02110-1301, USA. ****************************************************************/ #include #include #include "HandleRichText.h" #include "gui/RetroShareLink.h" #include "util/ObjectPainter.h" #include /** * The type of embedding we'd like to do */ enum EmbeddedType { Ahref, ///< into Img ///< into }; /** * Base class for storing information about a given kind of embedding. * * Its only constructor is protected so it is impossible to instantiate it, and * at the same time derived classes have to provide a type. */ class EmbedInHtml { protected: EmbedInHtml(EmbeddedType newType) : myType(newType) {} public: const EmbeddedType myType; QRegExp myRE; }; /** * This class is used to store information for embedding links into tags. */ class EmbedInHtmlAhref : public EmbedInHtml { public: EmbedInHtmlAhref() : EmbedInHtml(Ahref) { myRE.setPattern("(\\bretroshare://[^\\s]*)|(\\bhttps?://[^\\s]*)|(\\bfile://[^\\s]*)|(\\bwww\\.[^\\s]*)"); } }; /** * This class is used to store information for embedding smileys into tags. * * By default the QRegExp the variables are empty, which means it must be * filled at runtime, typically when the smileys set is loaded. It can be * either done by hand or by using one of the helper methods available. * * Note: The QHash uses only *one* smiley per key (unlike soon-to-be-upgraded * code out there). */ class EmbedInHtmlImg : public EmbedInHtml { public: EmbedInHtmlImg() : EmbedInHtml(Img) {} QHash smileys; }; /* global instance for embedding emoticons */ static EmbedInHtmlImg defEmbedImg; RsHtml::RsHtml() { } void RsHtml::initEmoticons(const QHash< QString, QString >& hash) { QString newRE; for(QHash::const_iterator it = hash.begin(); it != hash.end(); ++it) foreach(QString smile, it.key().split("|")) { if (smile.isEmpty()) { continue; } defEmbedImg.smileys.insert(smile, it.value()); newRE += "(" + QRegExp::escape(smile) + ")|"; } newRE.chop(1); // remove last | defEmbedImg.myRE.setPattern(newRE); } bool RsHtml::canReplaceAnchor(QDomDocument &/*doc*/, QDomElement &/*element*/, const RetroShareLink &link) { switch (link.type()) { case RetroShareLink::TYPE_UNKNOWN: case RetroShareLink::TYPE_FILE: case RetroShareLink::TYPE_PERSON: case RetroShareLink::TYPE_FORUM: case RetroShareLink::TYPE_CHANNEL: case RetroShareLink::TYPE_SEARCH: case RetroShareLink::TYPE_MESSAGE: // not yet implemented break; case RetroShareLink::TYPE_CERTIFICATE: return true; } return false; } void RsHtml::anchorTextForImg(QDomDocument &/*doc*/, QDomElement &/*element*/, const RetroShareLink &link, QString &text) { text = link.niceName(); } void RsHtml::anchorStylesheetForImg(QDomDocument &/*doc*/, QDomElement &/*element*/, const RetroShareLink &link, QString &styleSheet) { switch (link.type()) { case RetroShareLink::TYPE_UNKNOWN: case RetroShareLink::TYPE_FILE: case RetroShareLink::TYPE_PERSON: case RetroShareLink::TYPE_FORUM: case RetroShareLink::TYPE_CHANNEL: case RetroShareLink::TYPE_SEARCH: case RetroShareLink::TYPE_MESSAGE: // not yet implemented break; case RetroShareLink::TYPE_CERTIFICATE: styleSheet = ""; break; } } void RsHtml::replaceAnchorWithImg(QDomDocument &doc, QDomElement &element, QTextDocument *textDocument, const RetroShareLink &link) { if (!textDocument) { return; } if (!link.valid()) { return; } if (element.childNodes().length() != 1) { return; } if (!canReplaceAnchor(doc, element, link)) { return; } QString imgText; anchorTextForImg(doc, element, link, imgText); QString styleSheet; anchorStylesheetForImg(doc, element, link, styleSheet); QDomNode childNode = element.firstChild(); /* build resource name */ QString resourceName = QString("%1_%2.png").arg(link.type()).arg(imgText); if (!textDocument->resource(QTextDocument::ImageResource, QUrl(resourceName)).isValid()) { /* draw a button on a pixmap */ QPixmap pixmap; ObjectPainter::drawButton(imgText, styleSheet, pixmap); /* add the image to the resource cache of the text document */ textDocument->addResource(QTextDocument::ImageResource, QUrl(resourceName), QVariant(pixmap)); } element.removeChild(childNode); /* replace text of the anchor with */ QDomElement img = doc.createElement("img"); img.setAttribute("src", resourceName); element.appendChild(img); } /** * Parses a DOM tree and replaces text by HTML tags. * The tree is traversed depth-first, but only through children of Element type * nodes. Any other kind of node is terminal. * * If the node is of type Text, its data is checked against the user-provided * regular expression. If there is a match, the text is cut in three parts: the * preceding part that will be inserted before, the part to be replaced, and the * following part which will be itself checked against the regular expression. * * The part to be replaced is sent to a user-provided functor that will create * the necessary embedding and return a new Element node to be inserted. * * @param[in] doc The whole DOM tree, necessary to create new nodes * @param[in,out] currentElement The current node (which is of type Element) * @param[in] embedInfos The regular expression and the type of embedding to use */ void RsHtml::embedHtml(QTextDocument *textDocument, QDomDocument& doc, QDomElement& currentElement, EmbedInHtml& embedInfos, ulong flag) { if(embedInfos.myRE.pattern().length() == 0) // we'll get stuck with an empty regexp return; QDomNodeList children = currentElement.childNodes(); for(uint index = 0; index < children.length(); index++) { QDomNode node = children.item(index); if(node.isElement()) { // child is an element, we skip it if it's an tag QDomElement element = node.toElement(); if(element.tagName().toLower() == "head") { // skip it } else if (element.tagName().toLower() == "a") { // skip it if (embedInfos.myType == Ahref) { // but add title if not available if (element.attribute("title").isEmpty()) { RetroShareLink link(element.attribute("href")); if (link.valid()) { QString title = link.title(); if (!title.isEmpty()) { element.setAttribute("title", title); } if (textDocument && (flag & RSHTML_FORMATTEXT_REPLACE_LINKS)) { replaceAnchorWithImg(doc, element, textDocument, link); } } } else { if (textDocument && (flag & RSHTML_FORMATTEXT_REPLACE_LINKS)) { RetroShareLink link(element.attribute("href")); if (link.valid()) { replaceAnchorWithImg(doc, element, textDocument, link); } } } } } else { embedHtml(textDocument, doc, element, embedInfos, flag); } } else if(node.isText()) { // child is a text, we parse it QString tempText = node.toText().data(); if(embedInfos.myRE.indexIn(tempText) == -1) continue; // there is at least one link inside, we start replacing int currentPos = 0; int nextPos = 0; while((nextPos = embedInfos.myRE.indexIn(tempText, currentPos)) != -1) { // if nextPos == 0 it means the text begins by a link if(nextPos > 0) { QDomText textPart = doc.createTextNode(tempText.mid(currentPos, nextPos - currentPos)); currentElement.insertBefore(textPart, node); index++; } // inserted tag QDomElement insertedTag; switch(embedInfos.myType) { case Ahref: { insertedTag = doc.createElement("a"); insertedTag.setAttribute("href", embedInfos.myRE.cap(0)); insertedTag.appendChild(doc.createTextNode(embedInfos.myRE.cap(0))); RetroShareLink link(embedInfos.myRE.cap(0)); if (link.valid()) { QString title = link.title(); if (!title.isEmpty()) { insertedTag.setAttribute("title", title); } if (textDocument && (flag & RSHTML_FORMATTEXT_REPLACE_LINKS)) { replaceAnchorWithImg(doc, insertedTag, textDocument, link); } } } break; case Img: { insertedTag = doc.createElement("img"); const EmbedInHtmlImg& embedImg = static_cast(embedInfos); insertedTag.setAttribute("src", embedImg.smileys[embedInfos.myRE.cap(0)]); } break; } currentElement.insertBefore(insertedTag, node); index++; currentPos = nextPos + embedInfos.myRE.matchedLength(); } // text after the last link, only if there's one, don't touch the index // otherwise decrement the index because we're going to remove node if(currentPos < tempText.length()) { QDomText textPart = doc.createTextNode(tempText.mid(currentPos)); currentElement.insertBefore(textPart, node); } else index--; currentElement.removeChild(node); } } } QString RsHtml::formatText(QTextDocument *textDocument, const QString &text, ulong flag) { if (flag == 0 || text.isEmpty()) { // nothing to do return text; } QDomDocument doc; if (doc.setContent(text) == false) { // convert text with QTextBrowser QTextBrowser textBrowser; textBrowser.setText(text); doc.setContent(textBrowser.toHtml()); } QDomElement body = doc.documentElement(); if (flag & RSHTML_FORMATTEXT_EMBED_SMILEYS) { embedHtml(textDocument, doc, body, defEmbedImg, flag); } if (flag & RSHTML_FORMATTEXT_EMBED_LINKS) { EmbedInHtmlAhref defEmbedAhref; embedHtml(textDocument, doc, body, defEmbedAhref, flag); } QString formattedText = doc.toString(-1); // -1 removes any annoying carriage return misinterpreted by QTextEdit unsigned int optimizeFlag = 0; if (flag & RSHTML_FORMATTEXT_REMOVE_FONT) { optimizeFlag |= RSHTML_OPTIMIZEHTML_REMOVE_FONT; } if (flag & RSHTML_FORMATTEXT_REMOVE_COLOR) { optimizeFlag |= RSHTML_OPTIMIZEHTML_REMOVE_COLOR; } if (optimizeFlag || (flag & RSHTML_FORMATTEXT_OPTIMIZE)) { optimizeHtml(formattedText, optimizeFlag); } return formattedText; } static void findElements(QDomDocument& doc, QDomElement& currentElement, const QString& nodeName, const QString& nodeAttribute, QStringList &elements) { if(nodeName.isEmpty()) { return; } QDomNodeList children = currentElement.childNodes(); for (uint index = 0; index < children.length(); index++) { QDomNode node = children.item(index); if (node.isElement()) { QDomElement element = node.toElement(); if (QString::compare(element.tagName(), nodeName, Qt::CaseInsensitive) == 0) { if (nodeAttribute.isEmpty()) { // use text elements.append(element.text()); } else { QString attribute = element.attribute(nodeAttribute); if (attribute.isEmpty() == false) { elements.append(attribute); } } continue; } findElements(doc, element, nodeName, nodeAttribute, elements); } } } bool RsHtml::findAnchors(const QString &text, QStringList& urls) { QDomDocument doc; if (doc.setContent(text) == false) { // convert text with QTextBrowser QTextBrowser textBrowser; textBrowser.setText(text); doc.setContent(textBrowser.toHtml()); } QDomElement body = doc.documentElement(); findElements(doc, body, "a", "href", urls); return true; } static void removeElement(QDomElement& parentElement, QDomElement& element) { QDomNodeList children = element.childNodes(); while (children.length() > 0) { QDomNode childElement = element.removeChild(children.item(children.length() - 1)); parentElement.insertAfter(childElement, element); } parentElement.removeChild(element); } static void optimizeHtml(QDomDocument& doc, QDomElement& currentElement, unsigned int flag) { if (currentElement.tagName().toLower() == "html") { // change to currentElement.setTagName("span"); } QDomNode styleNode; bool addBR = false; QDomNodeList children = currentElement.childNodes(); for (uint index = 0; index < children.length(); ) { QDomNode node = children.item(index); // compress style attribute styleNode = node.attributes().namedItem("style"); if (styleNode.isAttr()) { QDomAttr styleAttr = styleNode.toAttr(); QString style = styleAttr.value().simplified(); style.replace("margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px;", "margin:0px 0px 0px 0px;"); style.replace("; ", ";"); if (flag & (RSHTML_OPTIMIZEHTML_REMOVE_FONT | RSHTML_OPTIMIZEHTML_REMOVE_COLOR)) { QStringList styles = style.split(';'); style.clear(); foreach (QString pair, styles) { if (!pair.trimmed().isEmpty()) { QStringList keyvalue = pair.split(':'); if (keyvalue.length() == 2) { QString key = keyvalue.at(0).trimmed(); if (flag & RSHTML_OPTIMIZEHTML_REMOVE_FONT) { if (key == "font-family" || key == "font-size" || key == "font-weight" || key == "font-style") { continue; } } if (flag & RSHTML_OPTIMIZEHTML_REMOVE_COLOR) { if (key == "color") { continue; } } style += key + ":" + keyvalue.at(1).trimmed() + ";"; } else { style += pair + ";"; } } } } if (style.isEmpty()) { node.attributes().removeNamedItem("style"); styleNode.clear(); } else { styleAttr.setValue(style); } } if (node.isElement()) { QDomElement element = node.toElement(); // not

if (addBR && element.tagName().toLower() != "p") { // add
after a removed

but not before a

QDomElement elementBr = doc.createElement("br"); currentElement.insertBefore(elementBr, element); addBR = false; ++index; } // if (element.tagName().toLower() == "body") { if (element.attributes().length() == 0) { // remove without attributes removeElement(currentElement, element); // no ++index; continue; } // change to element.setTagName("span"); } // if (element.tagName().toLower() == "head") { // remove currentElement.removeChild(node); // no ++index; continue; } // iterate children optimizeHtml(doc, element, flag); //

if (element.tagName().toLower() == "p") { //

if (element.attributes().size() == 1 && styleNode.isAttr()) { QString style = styleNode.toAttr().value().simplified(); if (style == "margin:0px 0px 0px 0px;-qt-block-indent:0;text-indent:0px;" || style.startsWith("-qt-paragraph-type:empty;margin:0px 0px 0px 0px;-qt-block-indent:0;text-indent:0px;")) { if (addBR) { // add
after a removed

before a removed

QDomElement elementBr = doc.createElement("br"); currentElement.insertBefore(elementBr, element); ++index; } // remove Qt standard

or empty

index += element.childNodes().length(); removeElement(currentElement, element); addBR = true; continue; } // check for blockquote (not ready) // style="margin-top:12px;margin-bottom:12px;margin-left:40px;margin-right:40px;-qt-block-indent:0;text-indent:0px;" // int count = 0; // should be 6 // QStringList styles = style.split(';'); // foreach (QString pair, styles) { // if (!pair.trimmed().isEmpty()) { // QStringList keyvalue = pair.split(':'); // if (keyvalue.length() == 2) { // QString key = keyvalue.at(0).trimmed(); // QString value = keyvalue.at(1).trimmed(); // if ((key == "margin-top" || key == "margin-bottom") && value == "12px") { // ++count; // continue; // } // if (key == "margin-left" || key == "margin-right") { // ++count; // continue; // } // if (key == "-qt-block-indent" && value == "0") { // ++count; // continue; // } // if (key == "text-indent" && value == "0px") { // ++count; // continue; // } // count = 0; // break; // } else { // count = 0; // break; // } // } // } // if (count == 6) { // // change to "blockquote" // element.setTagName("blockquote"); // element.attributes().removeNamedItem("style"); // element.setAttribute("type", "cite"); // } } addBR = false; } } ++index; } } void RsHtml::optimizeHtml(QTextEdit *textEdit, QString &text, unsigned int flag) { if (textEdit->toHtml() == QTextDocument(textEdit->toPlainText()).toHtml()) { text = textEdit->toPlainText(); std::cerr << "Optimized text to " << text.length() << " bytes , instead of " << textEdit->toHtml().length() << std::endl; return; } text = textEdit->toHtml(); optimizeHtml(text, flag); } void RsHtml::optimizeHtml(QString &text, unsigned int flag) { int originalLength = text.length(); // remove doctype text.remove(QRegExp("]*>")); QDomDocument doc; if (doc.setContent(text) == false) { return; } QDomElement body = doc.documentElement(); ::optimizeHtml(doc, body, flag); text = doc.toString(-1); std::cerr << "Optimized text to " << text.length() << " bytes , instead of " << originalLength << std::endl; } QString RsHtml::toHtml(QString text, bool realHtml) { // replace "\n" from the optimized html with "
" text.replace("\n", "
"); if (!realHtml) { return text; } QTextDocument doc; doc.setHtml(text); return doc.toHtml(); }