Fix slow emoticons by using generic regex rules where possible.

This commit is contained in:
hunbernd 2017-02-05 19:13:40 +01:00
parent 3baaae7a88
commit d0ab10cc38
2 changed files with 56 additions and 26 deletions

View File

@ -168,6 +168,13 @@ RsHtml::RsHtml()
void RsHtml::initEmoticons(const QHash<QString, QPair<QVector<QString>, QHash<QString, QString> > >& hash) void RsHtml::initEmoticons(const QHash<QString, QPair<QVector<QString>, QHash<QString, QString> > >& hash)
{ {
//add rules for standard emoticons
QString genericpattern;
genericpattern += "(?:^|\\s)(:\\w{1,40}:)(?:$|\\s)|"; //generic rule for :emoji_name:
genericpattern += "(?:^|\\s)(\\(\\w{1,40}\\))(?:$|\\s)"; //generic rule for (emoji_name)
QRegExp genericrx(genericpattern);
genericrx.setMinimal(true);
QString newRE; QString newRE;
for(QHash<QString, QPair<QVector<QString>, QHash<QString, QString> > >::const_iterator groupit = hash.begin(); groupit != hash.end(); ++groupit) { for(QHash<QString, QPair<QVector<QString>, QHash<QString, QString> > >::const_iterator groupit = hash.begin(); groupit != hash.end(); ++groupit) {
QHash<QString,QString> group = groupit.value().second; QHash<QString,QString> group = groupit.value().second;
@ -177,38 +184,42 @@ void RsHtml::initEmoticons(const QHash<QString, QPair<QVector<QString>, QHash<QS
continue; continue;
} }
defEmbedImg.smileys.insert(smile, it.value()); defEmbedImg.smileys.insert(smile, it.value());
// add space around smileys //check if smiley is using standard format :new-format: or (old-format) and don't make a new regexp for it
newRE += "(?:^|\\s)(" + QRegExp::escape(smile) + ")(?:$|\\s)|"; if(!genericrx.exactMatch(smile)) {
// explanations: // add space around smileys
// (?:^|\s)(*smiley*)(?:$|\s) newRE += "(?:^|\\s)(" + QRegExp::escape(smile) + ")(?:$|\\s)|";
// // explanations:
// (?:^|\s) Non-capturing group // (?:^|\s)(*smiley*)(?:$|\s)
// 1st Alternative: ^ //
// ^ assert position at start of the string // (?:^|\s) Non-capturing group
// 2nd Alternative: \s // 1st Alternative: ^
// \s match any white space character [\r\n\t\f ] // ^ assert position at start of the string
// // 2nd Alternative: \s
// 1st Capturing group (*smiley*) // \s match any white space character [\r\n\t\f ]
// *smiley* matches the characters *smiley* literally (case sensitive) //
// // 1st Capturing group (*smiley*)
// (?:$|\s) Non-capturing group // *smiley* matches the characters *smiley* literally (case sensitive)
// 1st Alternative: $ //
// $ assert position at end of the string // (?:$|\s) Non-capturing group
// 2nd Alternative: \s // 1st Alternative: $
// \s match any white space character [\r\n\t\f ] // $ assert position at end of the string
// 2nd Alternative: \s
// \s match any white space character [\r\n\t\f ]
/* /*
* TODO * TODO
* a better version is: * a better version is:
* (?<=^|\s)(*smile*)(?=$|\s) using the lookbehind/lookahead operator instead of non-capturing groups. * (?<=^|\s)(*smile*)(?=$|\s) using the lookbehind/lookahead operator instead of non-capturing groups.
* This solves the problem that spaces are matched, too (see workaround in RsHtml::embedHtml) * This solves the problem that spaces are matched, too (see workaround in RsHtml::embedHtml)
* This is not supported by Qt4! * This is not supported by Qt4!
*/ */
}
} }
} }
newRE.chop(1); // remove last | QRegExp emojimatcher(newRE + genericpattern);
defEmbedImg.myREs.append(QRegExp(newRE)); emojimatcher.setMinimal(true);
defEmbedImg.myREs.append(emojimatcher);
} }
bool RsHtml::canReplaceAnchor(QDomDocument &/*doc*/, QDomElement &/*element*/, const RetroShareLink &link) bool RsHtml::canReplaceAnchor(QDomDocument &/*doc*/, QDomElement &/*element*/, const RetroShareLink &link)
@ -311,6 +322,22 @@ void RsHtml::replaceAnchorWithImg(QDomDocument &doc, QDomElement &element, QText
element.appendChild(img); element.appendChild(img);
} }
int RsHtml::indexInWithValidation(QRegExp &rx, const QString &text, EmbedInHtml &embedInfos, int pos)
{
int index = rx.indexIn(text, pos);
if(index == -1 || embedInfos.myType != Img) return index;
const EmbedInHtmlImg& embedImg = static_cast<const EmbedInHtmlImg&>(embedInfos);
while((index = rx.indexIn(text, pos)) != -1) {
if(embedImg.smileys.contains(rx.cap(0).trimmed()))
return index;
else
++pos;
}
return -1;
}
/** /**
* Parses a DOM tree and replaces text by HTML tags. * Parses a DOM tree and replaces text by HTML tags.
* The tree is traversed depth-first, but only through children of Element type * The tree is traversed depth-first, but only through children of Element type
@ -376,13 +403,13 @@ void RsHtml::embedHtml(QTextDocument *textDocument, QDomDocument& doc, QDomEleme
if(myRE.pattern().length() == 0) // we'll get stuck with an empty regexp if(myRE.pattern().length() == 0) // we'll get stuck with an empty regexp
return; return;
if(myRE.indexIn(tempText) == -1) int nextPos = 0;
if((nextPos = indexInWithValidation(myRE, tempText, embedInfos)) == -1)
continue; continue;
// there is at least one link inside, we start replacing // there is at least one link inside, we start replacing
int currentPos = 0; int currentPos = 0;
int nextPos = 0; do {
while((nextPos = myRE.indexIn(tempText, currentPos)) != -1) {
// if nextPos == 0 it means the text begins by a link // if nextPos == 0 it means the text begins by a link
if(nextPos > 0) { if(nextPos > 0) {
QDomText textPart = doc.createTextNode(tempText.mid(currentPos, nextPos - currentPos)); QDomText textPart = doc.createTextNode(tempText.mid(currentPos, nextPos - currentPos));
@ -439,7 +466,7 @@ void RsHtml::embedHtml(QTextDocument *textDocument, QDomDocument& doc, QDomEleme
index++; index++;
currentPos = nextPos + myRE.matchedLength(); currentPos = nextPos + myRE.matchedLength();
} } while((nextPos = indexInWithValidation(myRE, tempText, embedInfos, currentPos)) != -1);
// text after the last link, only if there's one, don't touch the index // text after the last link, only if there's one, don't touch the index
// otherwise decrement the index because we're going to remove node // otherwise decrement the index because we're going to remove node

View File

@ -86,6 +86,9 @@ protected:
virtual bool canReplaceAnchor(QDomDocument &doc, QDomElement &element, const RetroShareLink &link); virtual bool canReplaceAnchor(QDomDocument &doc, QDomElement &element, const RetroShareLink &link);
virtual void anchorTextForImg(QDomDocument &doc, QDomElement &element, const RetroShareLink &link, QString &text); virtual void anchorTextForImg(QDomDocument &doc, QDomElement &element, const RetroShareLink &link, QString &text);
virtual void anchorStylesheetForImg(QDomDocument &doc, QDomElement &element, const RetroShareLink &link, QString &styleSheet); virtual void anchorStylesheetForImg(QDomDocument &doc, QDomElement &element, const RetroShareLink &link, QString &styleSheet);
private:
int indexInWithValidation(QRegExp &rx, const QString &text, EmbedInHtml &embedInfos, int pos = 0);
}; };
#endif // HANDLE_RICH_TEXT_H_ #endif // HANDLE_RICH_TEXT_H_