From 58a4d39b0f93902b1b49fe5ed8cd89c4c73f1d1c Mon Sep 17 00:00:00 2001 From: Phenom Date: Sat, 22 Oct 2016 14:48:23 +0200 Subject: [PATCH] Fix handle of url in EmbedInHtmlAhref. The url cannot contains space. Query and hash are now handled. --- retroshare-gui/src/util/HandleRichText.cpp | 96 +++++++++++++++------- 1 file changed, 65 insertions(+), 31 deletions(-) diff --git a/retroshare-gui/src/util/HandleRichText.cpp b/retroshare-gui/src/util/HandleRichText.cpp index 5f494224c..191016589 100644 --- a/retroshare-gui/src/util/HandleRichText.cpp +++ b/retroshare-gui/src/util/HandleRichText.cpp @@ -71,39 +71,73 @@ class EmbedInHtmlAhref : public EmbedInHtml public: EmbedInHtmlAhref() : EmbedInHtml(Ahref) { - // myRE.setPattern("(\\bretroshare://[^\\s]*)|(\\bhttps?://[^\\s]*)|(\\bfile://[^\\s]*)|(\\bwww\\.[^\\s]*)"); + // The following regular expressions for finding URLs in + // plain text are borrowed from https://regex101.com/r/eR9yG2/4 + //Modified to: (Adding \s to stop when query have space char else don't stop at end.) + // (([\s]+)?(([a-z0-9.+-]+):)((\/\/)(\/|(((([^\/:@?&#\s]+)(:([^\/@?&#\s]+))?)@)?([^:\/?&#\s]+)(:([1-9][0-9]*))?)(?=[\/#$?]))))(([^#?\s]+)?(\?([^#\s]+))?(#([^\s]+))?([\s])?) + // regAddress .|| .| || | .| | . . .| .| ..| .../regPath .| | . .| .| ./ + // regBefo/eChar .|| .| || | .| | . . .| .| ..| ... regPathnam/| | . .regHash /regEnd/har + // regScheme /regGrp5| || | .| | . . .| .| ..| ../ regSearch . / + // |regS/ash || | .| | . . .| .| ..| .. regQuery/ + // regFileAuthHost .| | . . .| .| ..| ./ + // regAuthHost .| | . . .| .| ./regPosLk / + // regUserPass .| | . . /regHost /regPort / + // regUser /| | . . + // regGrp12 . / + // regPassCharse/ + // + // to get all group captured. + // Test patern: " https://user:password@example.com:8080/./api/api/../users/./get/22iohoife.extension?return=name&return=email&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3#test " - // The following regular expressions for finding URLs in - // plain text are borrowed from *gnome-terminal*: + QString regBeforeChar = "([\\s]+)?";//2nd Group: " " + QStringList regSchemes; + // regSchemes.append("news:"); + // regSchemes.append("telnet:"); + // regSchemes.append("nntp:"); + // regSchemes.append("file:/"); + regSchemes.append("https?:"); + // regSchemes.append("ftps?:"); + // regSchemes.append("sftp:"); + // regSchemes.append("webcal:"); + regSchemes.append("retroshare:"); - QString regPassCharset = "[-\\w,?;\\.:/!%$^*&~\\\"#']"; - QString regHost = "[-\\w]+(\\.[-\\w]+)*"; - QString regPort = "(?:\\:\\d{1,5})?"; - QString regPathCharset = "[-\\w_$\\.+!*,;@&=?/~#%]"; - QString regPathTermSet = "[^\\]'.}<>) \\t\\r\\n,\\\"]"; - QStringList regSchemes; -// regSchemes.append("news:"); -// regSchemes.append("telnet:"); -// regSchemes.append("nntp:"); -// regSchemes.append("file:/"); - regSchemes.append("https?:"); -// regSchemes.append("ftps?:"); -// regSchemes.append("sftp:"); -// regSchemes.append("webcal:"); - regSchemes.append("retroshare:"); - QString regScheme = "((?:" + regSchemes.join(")|(?:") + "))"; - QString regUserPass = "[-\\w]+(?:%s+)?" % regPassCharset; - QString regUrlPath = "(?:(/" + regPathCharset + "+(?:[(]" + regPathCharset +"*[)])*" + regPathCharset + "*)*" + regPathTermSet + ")?"; - QStringList regHotLinkFinders; - regHotLinkFinders.append(regScheme + "//(?:" + regUserPass + "@)?"+ regHost + regPort + regUrlPath); -// regHotLinkFinders.append("(?:(?:www)|(?:ftp))[-\\w]*\\." + regHost + regPort + regUrlPath); -// regHotLinkFinders.append("(?:(?:callto:)|(?:h323:)|(?:sip:))[-\\w][-\\w\\.]*(?:" + regPort + "/[a-z0-9]+)?@" + regHost); -// regHotLinkFinders.append("(?:mailto:)?[-\\w][-\\w\\.]*@[-\\w]+\\." + regHost); -// regHotLinkFinders.append("news:[\\w^_{|}~!\\\"#$%&'()*+,\\./;:=?`]+"); - while (!regHotLinkFinders.isEmpty()) { - myREs.append(QRegExp(regHotLinkFinders.takeFirst(), Qt::CaseInsensitive)); - }; - } + QString regScheme = "(?:" + regSchemes.join(")|(?:") + ")";//3rd Group: "https:" //4th group inside + + QString regSlash = "(\\/\\/)";//6th Group: "//" + + QString regUser = "([^\\/:@?&#\\s]+)";//11th Group: "user" + QString regPassCharset = "([^\\/@?&#\\s]+)";//13th Group "password" + QString regGrp12 = "(:" + regPassCharset + ")?"; // 12th Group: ":password" + QString regUserPass = "((" + regUser + regGrp12 + ")@)?"; //9th Group: "user:password@" with 10th inside + + QString regHost = "([^:\\/?&#\\s]+)"; //14th Group: "example.com" + QString regPort = "(:([1-9][0-9]*))?"; //15th Group: ":8080" with 16th inside + + QString regAuthHost = regUserPass + regHost + regPort; //8th Group: "user:password@example.com:8080" + QString regPosLk = ""; //Positive Lookahead + + QString regFileAuthHost = "(\\/|" + regAuthHost + regPosLk + ")"; //7th Group: "user:password@example.com:8080" Could be "/" with "file:///" + QString regGrp5 = "(" + regSlash + regFileAuthHost + ")"; //5th Group: "//user:password@example.com:8080" + QString regAddress = "(" + regBeforeChar + regScheme + regGrp5 + ")"; //1rst Group: "https://user:password@example.com:8080" + + QString regPathName = "([^#?\\s]+)?"; //18th Group: "/./api/api/../users/./get/22iohoife.extension" + + QString regQuery = "([^#\\s]+)"; //20th Group: "return=name&return=email&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3" + QString regSearch = "(\\?" + regQuery + ")?"; //19th Group: "?return=name&return=email&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3" + + QString regHash = "(#([^\\s]+))?"; //21th Group: "#test" 22th inside + QString regEndChar = "";//"([\\s])?"; //23th Group: " " + QString regPath = "(" + regPathName + regSearch + regHash + regEndChar +")"; //17th Group: "/./api/api/../users/./get/22iohoife.extension?return=name&return=email&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3&a[]=3#test" + + QString regUrlPath = regAddress + regPath; + + QStringList regHotLinkFinders; + regHotLinkFinders.append(regUrlPath); + + while (!regHotLinkFinders.isEmpty()) { + myREs.append(QRegExp(regHotLinkFinders.takeFirst(), Qt::CaseInsensitive)); + }; + } };