diff --git a/libretroshare/src/deep_search/deep_search.h b/libretroshare/src/deep_search/deep_search.h
index 3d916a62a..7152e34ce 100644
--- a/libretroshare/src/deep_search/deep_search.h
+++ b/libretroshare/src/deep_search/deep_search.h
@@ -17,17 +17,18 @@
* along with this program. If not, see .
*/
+#include
#include
#include
#include "retroshare/rsgxschannels.h"
#include "retroshare/rsinit.h"
+#include "util/rsurl.h"
struct DeepSearch
{
struct SearchResult
{
- // TODO: Use RsUrl from extra_locators branch instead of plain string
std::string mUrl;
std::string mSnippet;
};
@@ -90,6 +91,11 @@ struct DeepSearch
// Index each field with a suitable prefix.
termgenerator.index_text(chan.mMeta.mGroupName, 1, "G");
+
+ char date[] = "YYYYMMDD\0";
+ std::strftime(date, 9, "%Y%m%d", std::gmtime(&chan.mMeta.mPublishTs));
+ termgenerator.index_text(date, 1, "D");
+
termgenerator.index_text(chan.mDescription, 1, "XD");
// Index fields without prefixes for general search.
@@ -97,8 +103,14 @@ struct DeepSearch
termgenerator.increase_termpos();
termgenerator.index_text(chan.mDescription);
- std::string rsLink("retroshare://channel?id=");
- rsLink += chan.mMeta.mGroupId.toStdString();
+ RsUrl chanUrl; chanUrl
+ .setScheme("retroshare").setPath("/channel")
+ .setQueryKV("id", chan.mMeta.mGroupId.toStdString());
+ const std::string idTerm("Q" + chanUrl.toString());
+
+ chanUrl.setQueryKV("publishDate", date);
+ chanUrl.setQueryKV("name", chan.mMeta.mGroupName);
+ std::string rsLink(chanUrl.toString());
// store the RS link so we are able to retrive it on matching search
doc.add_value(URL_VALUENO, rsLink);
@@ -109,7 +121,6 @@ struct DeepSearch
// We use the identifier to ensure each object ends up in the
// database only once no matter how many times we run the
// indexer. "Q" prefix is a Xapian convention for unique id term.
- const std::string idTerm("Q" + rsLink);
doc.add_boolean_term(idTerm);
db.replace_document(idTerm, doc);
}
@@ -117,8 +128,10 @@ struct DeepSearch
static void removeChannelFromIndex(RsGxsGroupId grpId)
{
// "Q" prefix is a Xapian convention for unique id term.
- std::string idTerm("Qretroshare://channel?id=");
- idTerm += grpId.toStdString();
+ RsUrl chanUrl; chanUrl
+ .setScheme("retroshare").setPath("/channel")
+ .setQueryKV("id", grpId.toStdString());
+ std::string idTerm("Q" + chanUrl.toString());
Xapian::WritableDatabase db(dbPath(), Xapian::DB_CREATE_OR_OPEN);
db.delete_document(idTerm);
@@ -138,24 +151,72 @@ struct DeepSearch
// Index each field with a suitable prefix.
termgenerator.index_text(post.mMeta.mMsgName, 1, "S");
- termgenerator.index_text(post.mMsg, 1, "XD");
+
+ char date[] = "YYYYMMDD\0";
+ std::strftime(date, 9, "%Y%m%d", std::gmtime(&post.mMeta.mPublishTs));
+ termgenerator.index_text(date, 1, "D");
+
+ // Avoid indexing HTML
+ bool isPlainMsg = post.mMsg[0] != '<' || post.mMsg[post.mMsg.size() - 1] != '>';
+
+ if(isPlainMsg)
+ termgenerator.index_text(post.mMsg, 1, "XD");
// Index fields without prefixes for general search.
termgenerator.index_text(post.mMeta.mMsgName);
- termgenerator.increase_termpos();
- termgenerator.index_text(post.mMsg);
+ if(isPlainMsg)
+ {
+ termgenerator.increase_termpos();
+ termgenerator.index_text(post.mMsg);
+ }
+
+ for(const RsGxsFile& attachment : post.mFiles)
+ {
+ termgenerator.index_text(attachment.mName, 1, "F");
+
+ termgenerator.increase_termpos();
+ termgenerator.index_text(attachment.mName);
+ }
// We use the identifier to ensure each object ends up in the
// database only once no matter how many times we run the
// indexer.
- std::string idTerm("Qretroshare://channel?id=");
- idTerm += post.mMeta.mGroupId.toStdString();
- idTerm += "&msgid=";
- idTerm += post.mMeta.mMsgId.toStdString();
+ RsUrl postUrl; postUrl
+ .setScheme("retroshare").setPath("/channel")
+ .setQueryKV("id", post.mMeta.mGroupId.toStdString())
+ .setQueryKV("msgid", post.mMeta.mMsgId.toStdString());
+ std::string idTerm("Q" + postUrl.toString());
+
+ postUrl.setQueryKV("publishDate", date);
+ postUrl.setQueryKV("name", post.mMeta.mMsgName);
+ std::string rsLink(postUrl.toString());
+
+ // store the RS link so we are able to retrive it on matching search
+ doc.add_value(URL_VALUENO, rsLink);
+
+ // Store some fields for display purposes.
+ if(isPlainMsg)
+ doc.set_data(post.mMeta.mMsgName + "\n" + post.mMsg);
+ else doc.set_data(post.mMeta.mMsgName);
+
doc.add_boolean_term(idTerm);
db.replace_document(idTerm, doc);
}
+ static void removeChannelPostFromIndex(
+ RsGxsGroupId grpId, RsGxsMessageId msgId )
+ {
+ RsUrl postUrl; postUrl
+ .setScheme("retroshare").setPath("/channel")
+ .setQueryKV("id", grpId.toStdString())
+ .setQueryKV("msgid", msgId.toStdString());
+ // "Q" prefix is a Xapian convention for unique id term.
+ std::string idTerm("Q" + postUrl.toString());
+
+ Xapian::WritableDatabase db(dbPath(), Xapian::DB_CREATE_OR_OPEN);
+ db.delete_document(idTerm);
+ }
+
private:
enum : Xapian::valueno
diff --git a/libretroshare/src/gxs/rsgxsutil.cc b/libretroshare/src/gxs/rsgxsutil.cc
index 60106b411..75b43da83 100644
--- a/libretroshare/src/gxs/rsgxsutil.cc
+++ b/libretroshare/src/gxs/rsgxsutil.cc
@@ -166,6 +166,7 @@ bool RsGxsIntegrityCheck::check()
{
#ifdef RS_DEEP_SEARCH
bool isGxsChannels = dynamic_cast(mGenExchangeClient);
+ std::set indexedGroups;
#endif
// first take out all the groups
@@ -232,6 +233,7 @@ bool RsGxsIntegrityCheck::check()
cgIt->toChannelGroup(cg, false);
cg.mMeta = meta;
+ indexedGroups.insert(grp->grpId);
DeepSearch::indexChannelGroup(cg);
}
else
@@ -309,53 +311,99 @@ bool RsGxsIntegrityCheck::check()
}
if (nxsMsgIt == nxsMsgV.end())
- {
- msgsToDel[grpId].insert(msgId);
+ {
+ msgsToDel[grpId].insert(msgId);
+#ifdef RS_DEEP_SEARCH
+ if(isGxsChannels)
+ DeepSearch::removeChannelPostFromIndex(grpId, msgId);
+#endif
}
}
}
- GxsMsgResult::iterator mit = msgs.begin();
+ GxsMsgResult::iterator mit = msgs.begin();
+ for(; mit != msgs.end(); ++mit)
+ {
+ std::vector& msgV = mit->second;
+ std::vector::iterator vit = msgV.begin();
- for(; mit != msgs.end(); ++mit)
- {
- std::vector& msgV = mit->second;
- std::vector::iterator vit = msgV.begin();
+ for(; vit != msgV.end(); ++vit)
+ {
+ RsNxsMsg* msg = *vit;
+ RsFileHash currHash;
+ pqihash pHash;
+ pHash.addData(msg->msg.bin_data, msg->msg.bin_len);
+ pHash.Complete(currHash);
- for(; vit != msgV.end(); ++vit)
- {
- RsNxsMsg* msg = *vit;
- RsFileHash currHash;
- pqihash pHash;
- pHash.addData(msg->msg.bin_data, msg->msg.bin_len);
- pHash.Complete(currHash);
-
- if(msg->metaData == NULL || currHash != msg->metaData->mHash)
- {
- std::cerr << "(EE) deleting message data with wrong hash or null meta data. meta=" << (void*)msg->metaData << std::endl;
- msgsToDel[msg->grpId].insert(msg->msgId);
- }
- else if(!msg->metaData->mAuthorId.isNull() && subscribed_groups.find(msg->metaData->mGroupId)!=subscribed_groups.end())
- {
-#ifdef DEBUG_GXSUTIL
- GXSUTIL_DEBUG() << "TimeStamping message authors' key ID " << msg->metaData->mAuthorId << " in message " << msg->msgId << ", group ID " << msg->grpId<< std::endl;
+ if(msg->metaData == NULL || currHash != msg->metaData->mHash)
+ {
+ std::cerr << __PRETTY_FUNCTION__ <<" (EE) deleting message data"
+ << " with wrong hash or null meta data. meta="
+ << (void*)msg->metaData << std::endl;
+ msgsToDel[msg->grpId].insert(msg->msgId);
+#ifdef RS_DEEP_SEARCH
+ if(isGxsChannels)
+ DeepSearch::removeChannelPostFromIndex(msg->grpId, msg->msgId);
#endif
- if(rsReputations!=NULL && rsReputations->overallReputationLevel(msg->metaData->mAuthorId) > RsReputations::REPUTATION_LOCALLY_NEGATIVE)
- used_gxs_ids.insert(std::make_pair(msg->metaData->mAuthorId,RsIdentityUsage(mGenExchangeClient->serviceType(),RsIdentityUsage::MESSAGE_AUTHOR_KEEP_ALIVE,msg->metaData->mGroupId,msg->metaData->mMsgId))) ;
- }
+ }
+ else if (subscribed_groups.count(msg->metaData->mGroupId))
+ {
+#ifdef RS_DEEP_SEARCH
+ if( isGxsChannels
+ && indexedGroups.count(msg->metaData->mGroupId) )
+ {
+ RsGxsMsgMetaData meta;
+ meta.deserialise(msg->meta.bin_data, &msg->meta.bin_len);
+
+ uint32_t blz = msg->msg.bin_len;
+ RsItem* rIt = mSerializer.deserialise(msg->msg.bin_data,
+ &blz);
+
+ if( RsGxsChannelPostItem* cgIt =
+ dynamic_cast(rIt) )
+ {
+ RsGxsChannelPost cg;
+ cgIt->toChannelPost(cg, false);
+ cg.mMeta = meta;
+
+ DeepSearch::indexChannelPost(cg);
+ }
+ else if(dynamic_cast(rIt)) {}
+ else if(dynamic_cast(rIt)) {}
+ else
+ {
+ std::cerr << __PRETTY_FUNCTION__ << " Message: "
+ << meta.mMsgId.toStdString()
+ << " in group: "
+ << meta.mGroupId.toStdString() << " "
+ << " doesn't seems a channel post, please "
+ << "report to developers"
+ << std::endl;
+ print_stacktrace();
+ }
+
+ delete rIt;
+ }
+#endif
+
+ if(!msg->metaData->mAuthorId.isNull())
+ {
+#ifdef DEBUG_GXSUTIL
+ GXSUTIL_DEBUG() << "TimeStamping message authors' key ID " << msg->metaData->mAuthorId << " in message " << msg->msgId << ", group ID " << msg->grpId<< std::endl;
+#endif
+ if(rsReputations!=NULL && rsReputations->overallReputationLevel(msg->metaData->mAuthorId) > RsReputations::REPUTATION_LOCALLY_NEGATIVE)
+ used_gxs_ids.insert(std::make_pair(msg->metaData->mAuthorId,RsIdentityUsage(mGenExchangeClient->serviceType(),RsIdentityUsage::MESSAGE_AUTHOR_KEEP_ALIVE,msg->metaData->mGroupId,msg->metaData->mMsgId))) ;
+ }
+ }
delete msg;
}
}
-#ifdef RS_DEEP_SEARCH
- // TODO:remove msgsToDel from deep search index too
-#endif
-
mDs->removeMsgs(msgsToDel);
{
- RsStackMutex stack(mIntegrityMutex);
+ RS_STACK_MUTEX(mIntegrityMutex);
std::vector::iterator grpIt;
for(grpIt = grpsToDel.begin(); grpIt != grpsToDel.end(); ++grpIt)