Fix handle of url in EmbedInHtmlAhref.

The url cannot contains space. Query and hash are now handled.
This commit is contained in:
Phenom 2016-10-22 14:48:23 +02:00
parent b8b78dd6cb
commit 58a4d39b0f

View File

@ -71,39 +71,73 @@ class EmbedInHtmlAhref : public EmbedInHtml
public: public:
EmbedInHtmlAhref() : EmbedInHtml(Ahref) EmbedInHtmlAhref() : EmbedInHtml(Ahref)
{ {
// myRE.setPattern("(\\bretroshare://[^\\s]*)|(\\bhttps?://[^\\s]*)|(\\bfile://[^\\s]*)|(\\bwww\\.[^\\s]*)"); // The following regular expressions for finding URLs in
// plain text are borrowed from https://regex101.com/r/eR9yG2/4
//Modified to: (Adding \s to stop when query have space char else don't stop at end.)
// (([\s]+)?(([a-z0-9.+-]+):)((\/\/)(\/|(((([^\/:@?&#\s]+)(:([^\/@?&#\s]+))?)@)?([^:\/?&#\s]+)(:([1-9][0-9]*))?)(?=[\/#$?]))))(([^#?\s]+)?(\?([^#\s]+))?(#([^\s]+))?([\s])?)
// regAddress .|| .| || | .| | . . .| .| ..| .../regPath .| | . .| .| ./
// regBefo/eChar .|| .| || | .| | . . .| .| ..| ... regPathnam/| | . .regHash /regEnd/har
// regScheme /regGrp5| || | .| | . . .| .| ..| ../ regSearch . /
// |regS/ash || | .| | . . .| .| ..| .. regQuery/
// regFileAuthHost .| | . . .| .| ..| ./
// regAuthHost .| | . . .| .| ./regPosLk /
// regUserPass .| | . . /regHost /regPort /
// regUser /| | . .
// regGrp12 . /
// regPassCharse/
//
// to get all group captured.
// Test patern: " https://user:password@example.com:8080/./api/api/../users/./get/22iohoife.extension?return=name&return=email&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3#test "
// The following regular expressions for finding URLs in QString regBeforeChar = "([\\s]+)?";//2nd Group: " "
// plain text are borrowed from *gnome-terminal*: QStringList regSchemes;
// regSchemes.append("news:");
// regSchemes.append("telnet:");
// regSchemes.append("nntp:");
// regSchemes.append("file:/");
regSchemes.append("https?:");
// regSchemes.append("ftps?:");
// regSchemes.append("sftp:");
// regSchemes.append("webcal:");
regSchemes.append("retroshare:");
QString regPassCharset = "[-\\w,?;\\.:/!%$^*&~\\\"#']"; QString regScheme = "(?:" + regSchemes.join(")|(?:") + ")";//3rd Group: "https:" //4th group inside
QString regHost = "[-\\w]+(\\.[-\\w]+)*";
QString regPort = "(?:\\:\\d{1,5})?"; QString regSlash = "(\\/\\/)";//6th Group: "//"
QString regPathCharset = "[-\\w_$\\.+!*,;@&=?/~#%]";
QString regPathTermSet = "[^\\]'.}<>) \\t\\r\\n,\\\"]"; QString regUser = "([^\\/:@?&#\\s]+)";//11th Group: "user"
QStringList regSchemes; QString regPassCharset = "([^\\/@?&#\\s]+)";//13th Group "password"
// regSchemes.append("news:"); QString regGrp12 = "(:" + regPassCharset + ")?"; // 12th Group: ":password"
// regSchemes.append("telnet:"); QString regUserPass = "((" + regUser + regGrp12 + ")@)?"; //9th Group: "user:password@" with 10th inside
// regSchemes.append("nntp:");
// regSchemes.append("file:/"); QString regHost = "([^:\\/?&#\\s]+)"; //14th Group: "example.com"
regSchemes.append("https?:"); QString regPort = "(:([1-9][0-9]*))?"; //15th Group: ":8080" with 16th inside
// regSchemes.append("ftps?:");
// regSchemes.append("sftp:"); QString regAuthHost = regUserPass + regHost + regPort; //8th Group: "user:password@example.com:8080"
// regSchemes.append("webcal:"); QString regPosLk = ""; //Positive Lookahead
regSchemes.append("retroshare:");
QString regScheme = "((?:" + regSchemes.join(")|(?:") + "))"; QString regFileAuthHost = "(\\/|" + regAuthHost + regPosLk + ")"; //7th Group: "user:password@example.com:8080" Could be "/" with "file:///"
QString regUserPass = "[-\\w]+(?:%s+)?" % regPassCharset; QString regGrp5 = "(" + regSlash + regFileAuthHost + ")"; //5th Group: "//user:password@example.com:8080"
QString regUrlPath = "(?:(/" + regPathCharset + "+(?:[(]" + regPathCharset +"*[)])*" + regPathCharset + "*)*" + regPathTermSet + ")?"; QString regAddress = "(" + regBeforeChar + regScheme + regGrp5 + ")"; //1rst Group: "https://user:password@example.com:8080"
QStringList regHotLinkFinders;
regHotLinkFinders.append(regScheme + "//(?:" + regUserPass + "@)?"+ regHost + regPort + regUrlPath); QString regPathName = "([^#?\\s]+)?"; //18th Group: "/./api/api/../users/./get/22iohoife.extension"
// regHotLinkFinders.append("(?:(?:www)|(?:ftp))[-\\w]*\\." + regHost + regPort + regUrlPath);
// regHotLinkFinders.append("(?:(?:callto:)|(?:h323:)|(?:sip:))[-\\w][-\\w\\.]*(?:" + regPort + "/[a-z0-9]+)?@" + regHost); QString regQuery = "([^#\\s]+)"; //20th Group: "return=name&return=email&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3"
// regHotLinkFinders.append("(?:mailto:)?[-\\w][-\\w\\.]*@[-\\w]+\\." + regHost); QString regSearch = "(\\?" + regQuery + ")?"; //19th Group: "?return=name&return=email&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3"
// regHotLinkFinders.append("news:[\\w^_{|}~!\\\"#$%&'()*+,\\./;:=?`]+");
while (!regHotLinkFinders.isEmpty()) { QString regHash = "(#([^\\s]+))?"; //21th Group: "#test" 22th inside
myREs.append(QRegExp(regHotLinkFinders.takeFirst(), Qt::CaseInsensitive)); QString regEndChar = "";//"([\\s])?"; //23th Group: " "
}; QString regPath = "(" + regPathName + regSearch + regHash + regEndChar +")"; //17th Group: "/./api/api/../users/./get/22iohoife.extension?return=name&return=email&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3#test"
}
QString regUrlPath = regAddress + regPath;
QStringList regHotLinkFinders;
regHotLinkFinders.append(regUrlPath);
while (!regHotLinkFinders.isEmpty()) {
myREs.append(QRegExp(regHotLinkFinders.takeFirst(), Qt::CaseInsensitive));
};
}
}; };