Fix handle of url in EmbedInHtmlAhref.

The url cannot contains space. Query and hash are now handled.
This commit is contained in:
Phenom 2016-10-22 14:48:23 +02:00
parent b8b78dd6cb
commit 58a4d39b0f

View File

@ -71,35 +71,69 @@ class EmbedInHtmlAhref : public EmbedInHtml
public: public:
EmbedInHtmlAhref() : EmbedInHtml(Ahref) EmbedInHtmlAhref() : EmbedInHtml(Ahref)
{ {
// myRE.setPattern("(\\bretroshare://[^\\s]*)|(\\bhttps?://[^\\s]*)|(\\bfile://[^\\s]*)|(\\bwww\\.[^\\s]*)");
// The following regular expressions for finding URLs in // The following regular expressions for finding URLs in
// plain text are borrowed from *gnome-terminal*: // plain text are borrowed from https://regex101.com/r/eR9yG2/4
//Modified to: (Adding \s to stop when query have space char else don't stop at end.)
// (([\s]+)?(([a-z0-9.+-]+):)((\/\/)(\/|(((([^\/:@?&#\s]+)(:([^\/@?&#\s]+))?)@)?([^:\/?&#\s]+)(:([1-9][0-9]*))?)(?=[\/#$?]))))(([^#?\s]+)?(\?([^#\s]+))?(#([^\s]+))?([\s])?)
// regAddress .|| .| || | .| | . . .| .| ..| .../regPath .| | . .| .| ./
// regBefo/eChar .|| .| || | .| | . . .| .| ..| ... regPathnam/| | . .regHash /regEnd/har
// regScheme /regGrp5| || | .| | . . .| .| ..| ../ regSearch . /
// |regS/ash || | .| | . . .| .| ..| .. regQuery/
// regFileAuthHost .| | . . .| .| ..| ./
// regAuthHost .| | . . .| .| ./regPosLk /
// regUserPass .| | . . /regHost /regPort /
// regUser /| | . .
// regGrp12 . /
// regPassCharse/
//
// to get all group captured.
// Test patern: " https://user:password@example.com:8080/./api/api/../users/./get/22iohoife.extension?return=name&return=email&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3#test "
QString regPassCharset = "[-\\w,?;\\.:/!%$^*&~\\\"#']"; QString regBeforeChar = "([\\s]+)?";//2nd Group: " "
QString regHost = "[-\\w]+(\\.[-\\w]+)*";
QString regPort = "(?:\\:\\d{1,5})?";
QString regPathCharset = "[-\\w_$\\.+!*,;@&=?/~#%]";
QString regPathTermSet = "[^\\]'.}<>) \\t\\r\\n,\\\"]";
QStringList regSchemes; QStringList regSchemes;
// regSchemes.append("news:"); // regSchemes.append("news:");
// regSchemes.append("telnet:"); // regSchemes.append("telnet:");
// regSchemes.append("nntp:"); // regSchemes.append("nntp:");
// regSchemes.append("file:/"); // regSchemes.append("file:/");
regSchemes.append("https?:"); regSchemes.append("https?:");
// regSchemes.append("ftps?:"); // regSchemes.append("ftps?:");
// regSchemes.append("sftp:"); // regSchemes.append("sftp:");
// regSchemes.append("webcal:"); // regSchemes.append("webcal:");
regSchemes.append("retroshare:"); regSchemes.append("retroshare:");
QString regScheme = "((?:" + regSchemes.join(")|(?:") + "))";
QString regUserPass = "[-\\w]+(?:%s+)?" % regPassCharset; QString regScheme = "(?:" + regSchemes.join(")|(?:") + ")";//3rd Group: "https:" //4th group inside
QString regUrlPath = "(?:(/" + regPathCharset + "+(?:[(]" + regPathCharset +"*[)])*" + regPathCharset + "*)*" + regPathTermSet + ")?";
QString regSlash = "(\\/\\/)";//6th Group: "//"
QString regUser = "([^\\/:@?&#\\s]+)";//11th Group: "user"
QString regPassCharset = "([^\\/@?&#\\s]+)";//13th Group "password"
QString regGrp12 = "(:" + regPassCharset + ")?"; // 12th Group: ":password"
QString regUserPass = "((" + regUser + regGrp12 + ")@)?"; //9th Group: "user:password@" with 10th inside
QString regHost = "([^:\\/?&#\\s]+)"; //14th Group: "example.com"
QString regPort = "(:([1-9][0-9]*))?"; //15th Group: ":8080" with 16th inside
QString regAuthHost = regUserPass + regHost + regPort; //8th Group: "user:password@example.com:8080"
QString regPosLk = ""; //Positive Lookahead
QString regFileAuthHost = "(\\/|" + regAuthHost + regPosLk + ")"; //7th Group: "user:password@example.com:8080" Could be "/" with "file:///"
QString regGrp5 = "(" + regSlash + regFileAuthHost + ")"; //5th Group: "//user:password@example.com:8080"
QString regAddress = "(" + regBeforeChar + regScheme + regGrp5 + ")"; //1rst Group: "https://user:password@example.com:8080"
QString regPathName = "([^#?\\s]+)?"; //18th Group: "/./api/api/../users/./get/22iohoife.extension"
QString regQuery = "([^#\\s]+)"; //20th Group: "return=name&return=email&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3"
QString regSearch = "(\\?" + regQuery + ")?"; //19th Group: "?return=name&return=email&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3"
QString regHash = "(#([^\\s]+))?"; //21th Group: "#test" 22th inside
QString regEndChar = "";//"([\\s])?"; //23th Group: " "
QString regPath = "(" + regPathName + regSearch + regHash + regEndChar +")"; //17th Group: "/./api/api/../users/./get/22iohoife.extension?return=name&return=email&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3#test"
QString regUrlPath = regAddress + regPath;
QStringList regHotLinkFinders; QStringList regHotLinkFinders;
regHotLinkFinders.append(regScheme + "//(?:" + regUserPass + "@)?"+ regHost + regPort + regUrlPath); regHotLinkFinders.append(regUrlPath);
// regHotLinkFinders.append("(?:(?:www)|(?:ftp))[-\\w]*\\." + regHost + regPort + regUrlPath);
// regHotLinkFinders.append("(?:(?:callto:)|(?:h323:)|(?:sip:))[-\\w][-\\w\\.]*(?:" + regPort + "/[a-z0-9]+)?@" + regHost);
// regHotLinkFinders.append("(?:mailto:)?[-\\w][-\\w\\.]*@[-\\w]+\\." + regHost);
// regHotLinkFinders.append("news:[\\w^_{|}~!\\\"#$%&'()*+,\\./;:=?`]+");
while (!regHotLinkFinders.isEmpty()) { while (!regHotLinkFinders.isEmpty()) {
myREs.append(QRegExp(regHotLinkFinders.takeFirst(), Qt::CaseInsensitive)); myREs.append(QRegExp(regHotLinkFinders.takeFirst(), Qt::CaseInsensitive));
}; };