Implement deep indexing and search for forums

RsGxsNetTunnelService::receiveSearchRequest handle no results case
  properly
RsNxsObserver::handleDistantSearchRequest improve method behaviour
  documentation
RsTurtleClientService Improve documentation
This commit is contained in:
Gioacchino Mazzurco 2021-02-19 23:23:02 +01:00
parent 1b551d809f
commit 9c38eed648
No known key found for this signature in database
GPG key ID: A1FBCA3872E87051
13 changed files with 902 additions and 89 deletions

View file

@ -168,13 +168,33 @@ std::string simpleTextHtmlExtract(const std::string& rsHtmlDoc)
std::string retVal(rsHtmlDoc.substr(bodyTagEnd+1));
// strip also CSS inside <style></style>
oSize = retVal.size();
auto styleTagBegin(retVal.find("<style"));
if(styleTagBegin < oSize)
{
auto styleEnd(retVal.find("</style>", styleTagBegin));
if(styleEnd < oSize)
retVal.erase(styleTagBegin, 8+styleEnd-styleTagBegin);
}
std::string::size_type oPos;
std::string::size_type cPos;
int itCount = 0;
while((oPos = retVal.find("<")) < retVal.size())
{
if((cPos = retVal.find(">")) <= retVal.size())
retVal.erase(oPos, 1+cPos-oPos);
else break;
// Avoid infinite loop with crafty input
if(itCount > 1000)
{
RS_WARN( "Breaking stripping loop due to max allowed iterations ",
"rsHtmlDoc: ", rsHtmlDoc, " retVal: ", retVal );
break;
}
++itCount;
}
return retVal;