From ce61174d79227588375678bfe446980420f50168 Mon Sep 17 00:00:00 2001 From: Gioacchino Mazzurco Date: Sat, 9 Jun 2018 18:02:52 +0200 Subject: [PATCH 01/15] DROP before merge. Reduce INTEGRITY_CHECK_PERIOD So it run each two 2 minutes and it's easy to debug deep search --- libretroshare/src/gxs/rsgenexchange.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/libretroshare/src/gxs/rsgenexchange.cc b/libretroshare/src/gxs/rsgenexchange.cc index ad7e02dcb..69344383d 100644 --- a/libretroshare/src/gxs/rsgenexchange.cc +++ b/libretroshare/src/gxs/rsgenexchange.cc @@ -67,7 +67,7 @@ static const uint32_t INDEX_AUTHEN_ADMIN = 0x00000040; // admin key //#define GEN_EXCH_DEBUG 1 static const uint32_t MSG_CLEANUP_PERIOD = 60*59; // 59 minutes -static const uint32_t INTEGRITY_CHECK_PERIOD = 60*31; // 31 minutes +static const uint32_t INTEGRITY_CHECK_PERIOD = 60*2; // 31 minutes // TODO: Restore this line before merging deep_search RsGenExchange::RsGenExchange(RsGeneralDataService *gds, RsNetworkExchangeService *ns, RsSerialType *serviceSerialiser, uint16_t servType, RsGixs* gixs, @@ -1634,7 +1634,6 @@ void RsGenExchange::notifyNewMessages(std::vector& messages) } } } - void RsGenExchange::notifyReceivePublishKey(const RsGxsGroupId &grpId) { RS_STACK_MUTEX(mGenMtx); From c0e92ddc6b6b8291995e4cdf4c76ea5314f19ead Mon Sep 17 00:00:00 2001 From: Gioacchino Mazzurco Date: Sat, 9 Jun 2018 18:06:14 +0200 Subject: [PATCH 02/15] WIP Index GXS channels with xapian Use temporary DB ATM --- libretroshare/src/deep_search/deep_search.h | 106 ++++++++++++ libretroshare/src/gxs/rsgxsutil.cc | 183 +++++++++++++------- libretroshare/src/libretroshare.pro | 4 +- libretroshare/src/rsitems/rsnxsitems.h | 3 +- libretroshare/src/use_libretroshare.pri | 4 + retroshare.pri | 9 + 6 files changed, 247 insertions(+), 62 deletions(-) create mode 100644 libretroshare/src/deep_search/deep_search.h diff --git a/libretroshare/src/deep_search/deep_search.h b/libretroshare/src/deep_search/deep_search.h new file mode 100644 index 000000000..6af963c6d --- /dev/null +++ b/libretroshare/src/deep_search/deep_search.h @@ -0,0 +1,106 @@ +#pragma once +/* + * RetroShare Content Search and Indexing. + * Copyright (C) 2018 Gioacchino Mazzurco + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include + +#include "retroshare/rsgxschannels.h" + +struct DeepSearch +{ + //DeepSearch(const std::string& dbPath) : mDbPath(dbPath) {} + + static void search(/*query*/) { /*return all matching results*/ } + + + static void indexChannelGroup(const RsGxsChannelGroup& chan) + { + Xapian::WritableDatabase db(mDbPath, Xapian::DB_CREATE_OR_OPEN); + + // Set up a TermGenerator that we'll use in indexing. + Xapian::TermGenerator termgenerator; + //termgenerator.set_stemmer(Xapian::Stem("en")); + + // We make a document and tell the term generator to use this. + Xapian::Document doc; + termgenerator.set_document(doc); + + // Index each field with a suitable prefix. + termgenerator.index_text(chan.mMeta.mGroupName, 1, "G"); + termgenerator.index_text(chan.mDescription, 1, "XD"); + + // Index fields without prefixes for general search. + termgenerator.index_text(chan.mMeta.mGroupName); + termgenerator.increase_termpos(); + termgenerator.index_text(chan.mDescription); + + // We use the identifier to ensure each object ends up in the + // database only once no matter how many times we run the + // indexer. + std::string idTerm("Qretroshare://channel?id="); + idTerm += chan.mMeta.mGroupId.toStdString(); + + doc.add_boolean_term(idTerm); + db.replace_document(idTerm, doc); + } + + static void removeChannelFromIndex(RsGxsGroupId grpId) + { + std::string idTerm("Qretroshare://channel?id="); + idTerm += grpId.toStdString(); + + Xapian::WritableDatabase db(mDbPath, Xapian::DB_CREATE_OR_OPEN); + db.delete_document(idTerm); + } + + static void indexChannelPost(const RsGxsChannelPost& post) + { + Xapian::WritableDatabase db(mDbPath, Xapian::DB_CREATE_OR_OPEN); + + // Set up a TermGenerator that we'll use in indexing. + Xapian::TermGenerator termgenerator; + //termgenerator.set_stemmer(Xapian::Stem("en")); + + // We make a document and tell the term generator to use this. + Xapian::Document doc; + termgenerator.set_document(doc); + + // Index each field with a suitable prefix. + termgenerator.index_text(post.mMeta.mMsgName, 1, "S"); + termgenerator.index_text(post.mMsg, 1, "XD"); + + // Index fields without prefixes for general search. + termgenerator.index_text(post.mMeta.mMsgName); + termgenerator.increase_termpos(); + termgenerator.index_text(post.mMsg); + + // We use the identifier to ensure each object ends up in the + // database only once no matter how many times we run the + // indexer. + std::string idTerm("Qretroshare://channel?id="); + idTerm += post.mMeta.mGroupId.toStdString(); + idTerm += "&msgid="; + idTerm += post.mMeta.mMsgId.toStdString(); + doc.add_boolean_term(idTerm); + db.replace_document(idTerm, doc); + } + + static std::string mDbPath; +}; + +std::string DeepSearch::mDbPath = "/tmp/deep_search_xapian_db"; diff --git a/libretroshare/src/gxs/rsgxsutil.cc b/libretroshare/src/gxs/rsgxsutil.cc index bb0b1fb05..2c23d9e89 100644 --- a/libretroshare/src/gxs/rsgxsutil.cc +++ b/libretroshare/src/gxs/rsgxsutil.cc @@ -31,6 +31,12 @@ #include "pqi/pqihash.h" #include "gxs/rsgixs.h" +#ifdef RS_DEEP_SEARCH +# include "deep_search/deep_search.h" +# include "services/p3gxschannels.h" +# include "rsitems/rsgxschannelitems.h" +#endif + static const uint32_t MAX_GXS_IDS_REQUESTS_NET = 10 ; // max number of requests from cache/net (avoids killing the system!) //#define DEBUG_GXSUTIL 1 @@ -141,20 +147,28 @@ bool RsGxsMessageCleanUp::clean() return mGrpMeta.empty(); } -RsGxsIntegrityCheck::RsGxsIntegrityCheck(RsGeneralDataService* const dataService, RsGenExchange *genex, RsGixs *gixs) : - mDs(dataService),mGenExchangeClient(genex), mDone(false), mIntegrityMutex("integrity"),mGixs(gixs) -{ } +RsGxsIntegrityCheck::RsGxsIntegrityCheck( + RsGeneralDataService* const dataService, RsGenExchange* genex, + RsGixs* gixs ) : + mDs(dataService), mGenExchangeClient(genex), mDone(false), + mIntegrityMutex("integrity"), mGixs(gixs) {} void RsGxsIntegrityCheck::run() { check(); - RsStackMutex stack(mIntegrityMutex); - mDone = true; + RS_STACK_MUTEX(mIntegrityMutex); + mDone = true; } bool RsGxsIntegrityCheck::check() { +#ifdef RS_DEEP_SEARCH + bool isGxsChannels = dynamic_cast(mGenExchangeClient); + std::cout << __PRETTY_FUNCTION__ << " isGxsChannels: " << isGxsChannels + << std::endl; +#endif + // first take out all the groups std::map grp; mDs->retrieveNxsGrps(grp, true, true); @@ -166,67 +180,113 @@ bool RsGxsIntegrityCheck::check() std::set subscribed_groups ; // compute hash and compare to stored value, if it fails then simply add it - // to list - std::map::iterator git = grp.begin(); - for(; git != grp.end(); ++git) - { - RsNxsGrp* grp = git->second; - RsFileHash currHash; - pqihash pHash; - pHash.addData(grp->grp.bin_data, grp->grp.bin_len); - pHash.Complete(currHash); + // to list + for( std::map::iterator git = grp.begin(); + git != grp.end(); ++git ) + { + RsNxsGrp* grp = git->second; + RsFileHash currHash; + pqihash pHash; + pHash.addData(grp->grp.bin_data, grp->grp.bin_len); + pHash.Complete(currHash); - if(currHash == grp->metaData->mHash) - { - // get all message ids of group - if (mDs->retrieveMsgIds(grp->grpId, msgIds[grp->grpId]) == 1) - { - // store the group for retrieveNxsMsgs - grps[grp->grpId]; + if(currHash == grp->metaData->mHash) + { + // get all message ids of group + if (mDs->retrieveMsgIds(grp->grpId, msgIds[grp->grpId]) == 1) + { + // store the group for retrieveNxsMsgs + grps[grp->grpId]; - if(grp->metaData->mSubscribeFlags & GXS_SERV::GROUP_SUBSCRIBE_SUBSCRIBED) - { - subscribed_groups.insert(git->first) ; + if(grp->metaData->mSubscribeFlags & GXS_SERV::GROUP_SUBSCRIBE_SUBSCRIBED) + { + subscribed_groups.insert(git->first); - if(!grp->metaData->mAuthorId.isNull()) - { -#ifdef DEBUG_GXSUTIL - GXSUTIL_DEBUG() << "TimeStamping group authors' key ID " << grp->metaData->mAuthorId << " in group ID " << grp->grpId << std::endl; +#ifdef RS_DEEP_SEARCH + if(isGxsChannels) + { + RsGxsChannelGroup cg; + RsGxsGrpMetaData meta; + + meta.deserialise(grp->meta.bin_data, grp->meta.bin_len); + + /* TODO: Apparently a copy of the pointer to + * grp.bin_data is stored into grp.bin_data thus + * breaking the deserialization, skipping the pointer + * (8 bytes on x86_64 debug build) fix the + * deserilization, talk to Cyril how to properly fix + * this.*/ + RsGenericSerializer::SerializeContext ctx( + static_cast(grp->grp.bin_data)+8, + grp->grp.bin_len-8 ); + + RsGxsChannelGroupItem cgIt; + cgIt.serial_process( RsGenericSerializer::DESERIALIZE, + ctx ); + + if(ctx.mOk) + { + cgIt.toChannelGroup(cg, false); + cg.mMeta = meta; + + DeepSearch::indexChannelGroup(cg); + + std::cout << __PRETTY_FUNCTION__ << " ||Channel: " + << meta.mGroupName << " ||Description: " + << cg.mDescription << std::endl; + } + else + std::cout << __PRETTY_FUNCTION__ << " ||Group: " + << meta.mGroupName + << " ||doesn't seems a channel" + << " ||ctx.mOk: " << ctx.mOk + << " ||ctx.mData: " << (void*)ctx.mData + << " ||ctx.mSize: " << ctx.mSize + << " ||grp->grp.bin_data: " << grp->grp.bin_data + << " ||grp->grp.bin_len: " << grp->grp.bin_len + << std::endl; + } #endif - if(rsReputations!=NULL && rsReputations->overallReputationLevel(grp->metaData->mAuthorId) > RsReputations::REPUTATION_LOCALLY_NEGATIVE) - used_gxs_ids.insert(std::make_pair(grp->metaData->mAuthorId,RsIdentityUsage(mGenExchangeClient->serviceType(),RsIdentityUsage::GROUP_AUTHOR_KEEP_ALIVE,grp->grpId))) ; - } - } - } - else - { - msgIds.erase(msgIds.find(grp->grpId)); - // grpsToDel.push_back(grp->grpId); - } - - } - else - { - grpsToDel.push_back(grp->grpId); - } - - if(!(grp->metaData->mSubscribeFlags & GXS_SERV::GROUP_SUBSCRIBE_SUBSCRIBED) && !(grp->metaData->mSubscribeFlags & GXS_SERV::GROUP_SUBSCRIBE_ADMIN) && !(grp->metaData->mSubscribeFlags & GXS_SERV::GROUP_SUBSCRIBE_PUBLISH)) - { - RsGroupNetworkStats stats ; - mGenExchangeClient->getGroupNetworkStats(grp->grpId,stats); - - if(stats.mSuppliers == 0 && stats.mMaxVisibleCount == 0 && stats.mGrpAutoSync) - { + if(!grp->metaData->mAuthorId.isNull()) + { #ifdef DEBUG_GXSUTIL - GXSUTIL_DEBUG() << "Scheduling group \"" << grp->metaData->mGroupName << "\" ID=" << grp->grpId << " in service " << std::hex << mGenExchangeClient->serviceType() << std::dec << " for deletion because it has no suppliers not any visible data at friends." << std::endl; + GXSUTIL_DEBUG() << "TimeStamping group authors' key ID " << grp->metaData->mAuthorId << " in group ID " << grp->grpId << std::endl; +#endif + if( rsReputations && rsReputations->overallReputationLevel(grp->metaData->mAuthorId ) > RsReputations::REPUTATION_LOCALLY_NEGATIVE ) + used_gxs_ids.insert(std::make_pair(grp->metaData->mAuthorId, RsIdentityUsage(mGenExchangeClient->serviceType(), RsIdentityUsage::GROUP_AUTHOR_KEEP_ALIVE,grp->grpId))); + } + } + } + else msgIds.erase(msgIds.find(grp->grpId)); + } + else + { + grpsToDel.push_back(grp->grpId); +#ifdef RS_DEEP_SEARCH + if(isGxsChannels) DeepSearch::removeChannelFromIndex(grp->grpId); +#endif + } + + if( !(grp->metaData->mSubscribeFlags & GXS_SERV::GROUP_SUBSCRIBE_SUBSCRIBED) && + !(grp->metaData->mSubscribeFlags & GXS_SERV::GROUP_SUBSCRIBE_ADMIN) && + !(grp->metaData->mSubscribeFlags & GXS_SERV::GROUP_SUBSCRIBE_PUBLISH) ) + { + RsGroupNetworkStats stats; + mGenExchangeClient->getGroupNetworkStats(grp->grpId,stats); + + if( stats.mSuppliers == 0 && stats.mMaxVisibleCount == 0 + && stats.mGrpAutoSync ) + { +#ifdef DEBUG_GXSUTIL + GXSUTIL_DEBUG() << "Scheduling group \"" << grp->metaData->mGroupName << "\" ID=" << grp->grpId << " in service " << std::hex << mGenExchangeClient->serviceType() << std::dec << " for deletion because it has no suppliers not any visible data at friends." << std::endl; #endif grpsToDel.push_back(grp->grpId); - } - } + } + } - delete grp; - } + delete grp; + } mDs->removeGroups(grpsToDel); @@ -299,6 +359,10 @@ bool RsGxsIntegrityCheck::check() } } +#ifdef RS_DEEP_SEARCH + // TODO:remove msgsToDel from deep search index too +#endif + mDs->removeMsgs(msgsToDel); { @@ -373,14 +437,13 @@ bool RsGxsIntegrityCheck::check() bool RsGxsIntegrityCheck::isDone() { - RsStackMutex stack(mIntegrityMutex); + RS_STACK_MUTEX(mIntegrityMutex); return mDone; } void RsGxsIntegrityCheck::getDeletedIds(std::list& grpIds, std::map >& msgIds) { - RsStackMutex stack(mIntegrityMutex); - + RS_STACK_MUTEX(mIntegrityMutex); grpIds = mDeletedGrps; msgIds = mDeletedMsgs; } diff --git a/libretroshare/src/libretroshare.pro b/libretroshare/src/libretroshare.pro index 13d8fc8b2..70ded089e 100644 --- a/libretroshare/src/libretroshare.pro +++ b/libretroshare/src/libretroshare.pro @@ -846,7 +846,9 @@ rs_gxs_trans { SOURCES += gxstrans/p3gxstransitems.cc gxstrans/p3gxstrans.cc } - +rs_deep_search { + HEADERS += deep_search/deep_search.h +} ########################################################################################################### diff --git a/libretroshare/src/rsitems/rsnxsitems.h b/libretroshare/src/rsitems/rsnxsitems.h index f717a3d09..6c7a72002 100644 --- a/libretroshare/src/rsitems/rsnxsitems.h +++ b/libretroshare/src/rsitems/rsnxsitems.h @@ -293,7 +293,8 @@ public: virtual void clear(); - virtual void serial_process(RsGenericSerializer::SerializeJob j,RsGenericSerializer::SerializeContext& ctx); + virtual void serial_process( RsGenericSerializer::SerializeJob j, + RsGenericSerializer::SerializeContext& ctx ); RsGxsGroupId grpId; /// group Id, needed to complete version Id (ncvi) static int refcount; diff --git a/libretroshare/src/use_libretroshare.pri b/libretroshare/src/use_libretroshare.pri index 3a3d1acb7..8dcf2d381 100644 --- a/libretroshare/src/use_libretroshare.pri +++ b/libretroshare/src/use_libretroshare.pri @@ -26,6 +26,10 @@ linux-* { mLibs += dl } +rs_deep_search { + mLibs += xapian +} + static { sLibs *= $$mLibs } else { diff --git a/retroshare.pri b/retroshare.pri index 111530a39..1348464ab 100644 --- a/retroshare.pri +++ b/retroshare.pri @@ -115,6 +115,11 @@ rs_macos10.9:CONFIG -= rs_macos10.11 rs_macos10.10:CONFIG -= rs_macos10.11 rs_macos10.12:CONFIG -= rs_macos10.11 +# To disable deep search append the following assignation to qmake command line +# "CONFIG+=no_rs_deep_search" +CONFIG *= rs_deep_search +no_rs_deep_search:CONFIG -= rs_deep_search + ########################################################################################################################################################### # # V07_NON_BACKWARD_COMPATIBLE_CHANGE_001: @@ -313,6 +318,10 @@ rs_chatserver { DEFINES *= RS_CHATSERVER } +rs_deep_search { + DEFINES *= RS_DEEP_SEARCH +} + debug { QMAKE_CXXFLAGS -= -O2 -fomit-frame-pointer QMAKE_CFLAGS -= -O2 -fomit-frame-pointer From c15ae864b5d7f82c39267b4762a6b37060b77e87 Mon Sep 17 00:00:00 2001 From: Gioacchino Mazzurco Date: Sun, 10 Jun 2018 11:00:38 +0200 Subject: [PATCH 03/15] deep_search: use service serializer not serial_process Avoid tricky pointers arithmetic, thanks Cyril for suggestion --- libretroshare/src/gxs/rsgenexchange.cc | 3 +- libretroshare/src/gxs/rsgxsutil.cc | 44 +++++++++++--------------- libretroshare/src/gxs/rsgxsutil.h | 12 ++++--- 3 files changed, 28 insertions(+), 31 deletions(-) diff --git a/libretroshare/src/gxs/rsgenexchange.cc b/libretroshare/src/gxs/rsgenexchange.cc index 69344383d..7538c19c0 100644 --- a/libretroshare/src/gxs/rsgenexchange.cc +++ b/libretroshare/src/gxs/rsgenexchange.cc @@ -206,7 +206,8 @@ void RsGenExchange::tick() if(!mIntegrityCheck) { - mIntegrityCheck = new RsGxsIntegrityCheck(mDataStore,this,mGixs); + mIntegrityCheck = new RsGxsIntegrityCheck( mDataStore, this, + *mSerialiser, mGixs); mIntegrityCheck->start("gxs integrity"); mChecking = true; } diff --git a/libretroshare/src/gxs/rsgxsutil.cc b/libretroshare/src/gxs/rsgxsutil.cc index 2c23d9e89..5e3ed9b83 100644 --- a/libretroshare/src/gxs/rsgxsutil.cc +++ b/libretroshare/src/gxs/rsgxsutil.cc @@ -149,9 +149,9 @@ bool RsGxsMessageCleanUp::clean() RsGxsIntegrityCheck::RsGxsIntegrityCheck( RsGeneralDataService* const dataService, RsGenExchange* genex, - RsGixs* gixs ) : - mDs(dataService), mGenExchangeClient(genex), mDone(false), - mIntegrityMutex("integrity"), mGixs(gixs) {} + RsSerialType& serializer, RsGixs* gixs ) : + mDs(dataService), mGenExchangeClient(genex), mSerializer(serializer), + mDone(false), mIntegrityMutex("integrity"), mGixs(gixs) {} void RsGxsIntegrityCheck::run() { @@ -205,28 +205,18 @@ bool RsGxsIntegrityCheck::check() #ifdef RS_DEEP_SEARCH if(isGxsChannels) { - RsGxsChannelGroup cg; RsGxsGrpMetaData meta; - meta.deserialise(grp->meta.bin_data, grp->meta.bin_len); - /* TODO: Apparently a copy of the pointer to - * grp.bin_data is stored into grp.bin_data thus - * breaking the deserialization, skipping the pointer - * (8 bytes on x86_64 debug build) fix the - * deserilization, talk to Cyril how to properly fix - * this.*/ - RsGenericSerializer::SerializeContext ctx( - static_cast(grp->grp.bin_data)+8, - grp->grp.bin_len-8 ); + uint32_t blz = grp->grp.bin_len; + RsItem* rIt = mSerializer.deserialise(grp->grp.bin_data, + &blz); - RsGxsChannelGroupItem cgIt; - cgIt.serial_process( RsGenericSerializer::DESERIALIZE, - ctx ); - - if(ctx.mOk) + if( RsGxsChannelGroupItem* cgIt = + dynamic_cast(rIt) ) { - cgIt.toChannelGroup(cg, false); + RsGxsChannelGroup cg; + cgIt->toChannelGroup(cg, false); cg.mMeta = meta; DeepSearch::indexChannelGroup(cg); @@ -239,12 +229,14 @@ bool RsGxsIntegrityCheck::check() std::cout << __PRETTY_FUNCTION__ << " ||Group: " << meta.mGroupName << " ||doesn't seems a channel" - << " ||ctx.mOk: " << ctx.mOk - << " ||ctx.mData: " << (void*)ctx.mData - << " ||ctx.mSize: " << ctx.mSize - << " ||grp->grp.bin_data: " << grp->grp.bin_data - << " ||grp->grp.bin_len: " << grp->grp.bin_len - << std::endl; + << " ||grp->grp.bin_data: " + << grp->grp.bin_data + << " ||grp->grp.bin_len: " + << grp->grp.bin_len + << " ||rIt: " << rIt << " ||blz: " << blz + << " ||cgIt: " << cgIt << std::endl; + + delete rIt; } #endif diff --git a/libretroshare/src/gxs/rsgxsutil.h b/libretroshare/src/gxs/rsgxsutil.h index 70e832c3e..faea08040 100644 --- a/libretroshare/src/gxs/rsgxsutil.h +++ b/libretroshare/src/gxs/rsgxsutil.h @@ -201,7 +201,9 @@ public: * @param chunkSize * @param sleepPeriod */ - RsGxsIntegrityCheck(RsGeneralDataService* const dataService, RsGenExchange *genex, RsGixs *gixs); + RsGxsIntegrityCheck( RsGeneralDataService* const dataService, + RsGenExchange *genex, RsSerialType& gxsSerialiser, + RsGixs *gixs); bool check(); bool isDone(); @@ -213,13 +215,15 @@ public: private: RsGeneralDataService* const mDs; - RsGenExchange *mGenExchangeClient; + RsGenExchange *mGenExchangeClient; + RsSerialType& mSerializer; + bool mDone; RsMutex mIntegrityMutex; std::list mDeletedGrps; std::map > mDeletedMsgs; - - RsGixs *mGixs ; + + RsGixs* mGixs; }; class GroupUpdate From 0f63283f96f644f475a87315a48583bd8c44267a Mon Sep 17 00:00:00 2001 From: Gioacchino Mazzurco Date: Sun, 10 Jun 2018 19:04:11 +0200 Subject: [PATCH 04/15] Add search capability to DeepSearch --- libretroshare/src/deep_search/deep_search.h | 78 +++++++++++++++++++-- libretroshare/src/gxs/rsgxsutil.cc | 23 +++--- libretroshare/src/gxs/rsgxsutil.h | 1 + 3 files changed, 83 insertions(+), 19 deletions(-) diff --git a/libretroshare/src/deep_search/deep_search.h b/libretroshare/src/deep_search/deep_search.h index 6af963c6d..ae5a97f2e 100644 --- a/libretroshare/src/deep_search/deep_search.h +++ b/libretroshare/src/deep_search/deep_search.h @@ -17,6 +17,7 @@ * along with this program. If not, see . */ +#include #include #include "retroshare/rsgxschannels.h" @@ -25,7 +26,55 @@ struct DeepSearch { //DeepSearch(const std::string& dbPath) : mDbPath(dbPath) {} - static void search(/*query*/) { /*return all matching results*/ } + struct SearchResult + { + // TODO: Use RsUrl from extra_locators branch instead of plain string + std::string mUrl; + std::string mSnippet; + }; + + /** + * @return search results count + */ + static uint32_t search( const std::string& queryStr, + std::vector& results, + uint32_t maxResults = 100 ) + { + results.clear(); + + // Open the database we're going to search. + Xapian::Database db(mDbPath); + + // Set up a QueryParser with a stemmer and suitable prefixes. + Xapian::QueryParser queryparser; + //queryparser.set_stemmer(Xapian::Stem("en")); + queryparser.set_stemming_strategy(queryparser.STEM_SOME); + // Start of prefix configuration. + //queryparser.add_prefix("title", "S"); + //queryparser.add_prefix("description", "XD"); + // End of prefix configuration. + + // And parse the query. + Xapian::Query query = queryparser.parse_query(queryStr); + + // Use an Enquire object on the database to run the query. + Xapian::Enquire enquire(db); + enquire.set_query(query); + + Xapian::MSet mset = enquire.get_mset(0, maxResults); + + for ( Xapian::MSetIterator m = mset.begin(); m != mset.end(); ++m ) + { + const Xapian::Document& doc = m.get_document(); + + SearchResult s; + s.mUrl = doc.get_value(URL_VALUENO); + s.mSnippet = mset.snippet(doc.get_data()); + results.push_back(s); + } + + return results.size(); + } static void indexChannelGroup(const RsGxsChannelGroup& chan) @@ -49,18 +98,26 @@ struct DeepSearch termgenerator.increase_termpos(); termgenerator.index_text(chan.mDescription); + std::string rsLink("retroshare://channel?id="); + rsLink += chan.mMeta.mGroupId.toStdString(); + + // store the RS link so we are able to retrive it on matching search + doc.add_value(URL_VALUENO, rsLink); + + // Store some fields for display purposes. + doc.set_data(chan.mMeta.mGroupName + "\n" + chan.mDescription); + // We use the identifier to ensure each object ends up in the // database only once no matter how many times we run the - // indexer. - std::string idTerm("Qretroshare://channel?id="); - idTerm += chan.mMeta.mGroupId.toStdString(); - + // indexer. "Q" prefix is a Xapian convention for unique id term. + const std::string idTerm("Q" + rsLink); doc.add_boolean_term(idTerm); db.replace_document(idTerm, doc); } static void removeChannelFromIndex(RsGxsGroupId grpId) { + // "Q" prefix is a Xapian convention for unique id term. std::string idTerm("Qretroshare://channel?id="); idTerm += grpId.toStdString(); @@ -100,6 +157,17 @@ struct DeepSearch db.replace_document(idTerm, doc); } +private: + + enum : Xapian::valueno + { + /// Used to store retroshare url of indexed documents + URL_VALUENO, + + /// @see Xapian::BAD_VALUENO + BAD_VALUENO = Xapian::BAD_VALUENO + }; + static std::string mDbPath; }; diff --git a/libretroshare/src/gxs/rsgxsutil.cc b/libretroshare/src/gxs/rsgxsutil.cc index 5e3ed9b83..2011ff8d9 100644 --- a/libretroshare/src/gxs/rsgxsutil.cc +++ b/libretroshare/src/gxs/rsgxsutil.cc @@ -4,6 +4,7 @@ * RetroShare C++ Interface. Generic routines that are useful in GXS * * Copyright 2013-2013 by Christopher Evi-Parker + * Copyright (C) 2018 Gioacchino Mazzurco * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public @@ -165,8 +166,6 @@ bool RsGxsIntegrityCheck::check() { #ifdef RS_DEEP_SEARCH bool isGxsChannels = dynamic_cast(mGenExchangeClient); - std::cout << __PRETTY_FUNCTION__ << " isGxsChannels: " << isGxsChannels - << std::endl; #endif // first take out all the groups @@ -220,21 +219,17 @@ bool RsGxsIntegrityCheck::check() cg.mMeta = meta; DeepSearch::indexChannelGroup(cg); - - std::cout << __PRETTY_FUNCTION__ << " ||Channel: " - << meta.mGroupName << " ||Description: " - << cg.mDescription << std::endl; } else - std::cout << __PRETTY_FUNCTION__ << " ||Group: " + { + std::cerr << __PRETTY_FUNCTION__ << " Group: " + << meta.mGroupId.toStdString() << " " << meta.mGroupName - << " ||doesn't seems a channel" - << " ||grp->grp.bin_data: " - << grp->grp.bin_data - << " ||grp->grp.bin_len: " - << grp->grp.bin_len - << " ||rIt: " << rIt << " ||blz: " << blz - << " ||cgIt: " << cgIt << std::endl; + << " doesn't seems a channel, please " + << "report to developers" + << std::endl; + print_stacktrace(); + } delete rIt; } diff --git a/libretroshare/src/gxs/rsgxsutil.h b/libretroshare/src/gxs/rsgxsutil.h index faea08040..694f22116 100644 --- a/libretroshare/src/gxs/rsgxsutil.h +++ b/libretroshare/src/gxs/rsgxsutil.h @@ -4,6 +4,7 @@ * RetroShare C++ Interface. Generic routines that are useful in GXS * * Copyright 2013-2013 by Christopher Evi-Parker + * Copyright (C) 2018 Gioacchino Mazzurco * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public From 5a41b3cb3762450d27dd44d9ba8a754e613ee880 Mon Sep 17 00:00:00 2001 From: Gioacchino Mazzurco Date: Mon, 11 Jun 2018 13:03:01 +0200 Subject: [PATCH 05/15] Index only public channels --- libretroshare/src/gxs/rsgxsutil.cc | 70 +++++++++++---------- libretroshare/src/retroshare/rsgxscircles.h | 7 +-- 2 files changed, 39 insertions(+), 38 deletions(-) diff --git a/libretroshare/src/gxs/rsgxsutil.cc b/libretroshare/src/gxs/rsgxsutil.cc index 2011ff8d9..60106b411 100644 --- a/libretroshare/src/gxs/rsgxsutil.cc +++ b/libretroshare/src/gxs/rsgxsutil.cc @@ -201,40 +201,6 @@ bool RsGxsIntegrityCheck::check() { subscribed_groups.insert(git->first); -#ifdef RS_DEEP_SEARCH - if(isGxsChannels) - { - RsGxsGrpMetaData meta; - meta.deserialise(grp->meta.bin_data, grp->meta.bin_len); - - uint32_t blz = grp->grp.bin_len; - RsItem* rIt = mSerializer.deserialise(grp->grp.bin_data, - &blz); - - if( RsGxsChannelGroupItem* cgIt = - dynamic_cast(rIt) ) - { - RsGxsChannelGroup cg; - cgIt->toChannelGroup(cg, false); - cg.mMeta = meta; - - DeepSearch::indexChannelGroup(cg); - } - else - { - std::cerr << __PRETTY_FUNCTION__ << " Group: " - << meta.mGroupId.toStdString() << " " - << meta.mGroupName - << " doesn't seems a channel, please " - << "report to developers" - << std::endl; - print_stacktrace(); - } - - delete rIt; - } -#endif - if(!grp->metaData->mAuthorId.isNull()) { #ifdef DEBUG_GXSUTIL @@ -246,6 +212,42 @@ bool RsGxsIntegrityCheck::check() } } else msgIds.erase(msgIds.find(grp->grpId)); + +#ifdef RS_DEEP_SEARCH + if( isGxsChannels + && grp->metaData->mCircleType == GXS_CIRCLE_TYPE_PUBLIC + && grp->metaData->mSubscribeFlags & GXS_SERV::GROUP_SUBSCRIBE_SUBSCRIBED ) + { + RsGxsGrpMetaData meta; + meta.deserialise(grp->meta.bin_data, grp->meta.bin_len); + + uint32_t blz = grp->grp.bin_len; + RsItem* rIt = mSerializer.deserialise(grp->grp.bin_data, + &blz); + + if( RsGxsChannelGroupItem* cgIt = + dynamic_cast(rIt) ) + { + RsGxsChannelGroup cg; + cgIt->toChannelGroup(cg, false); + cg.mMeta = meta; + + DeepSearch::indexChannelGroup(cg); + } + else + { + std::cerr << __PRETTY_FUNCTION__ << " Group: " + << meta.mGroupId.toStdString() << " " + << meta.mGroupName + << " doesn't seems a channel, please " + << "report to developers" + << std::endl; + print_stacktrace(); + } + + delete rIt; + } +#endif } else { diff --git a/libretroshare/src/retroshare/rsgxscircles.h b/libretroshare/src/retroshare/rsgxscircles.h index 8ce446200..6d93507e8 100644 --- a/libretroshare/src/retroshare/rsgxscircles.h +++ b/libretroshare/src/retroshare/rsgxscircles.h @@ -49,10 +49,9 @@ extern RsGxsCircles *rsGxsCircles; typedef RsPgpId RsPgpId; -// The meaning of the different circle types is: -// -// -static const uint32_t GXS_CIRCLE_TYPE_UNKNOWN = 0x0000 ; // not known. Is treated as public. +/// The meaning of the different circle types is: +/// TODO: convert to enum +static const uint32_t GXS_CIRCLE_TYPE_UNKNOWN = 0x0000 ; /// Used to detect uninizialized values. static const uint32_t GXS_CIRCLE_TYPE_PUBLIC = 0x0001 ; // not restricted to a circle static const uint32_t GXS_CIRCLE_TYPE_EXTERNAL = 0x0002 ; // restricted to an external circle, made of RsGxsId static const uint32_t GXS_CIRCLE_TYPE_YOUR_FRIENDS_ONLY = 0x0003 ; // restricted to a subset of friend nodes of a given RS node given by a RsPgpId list From 32014eaac1cb0496195956b3305cdc12a4833245 Mon Sep 17 00:00:00 2001 From: Gioacchino Mazzurco Date: Tue, 12 Jun 2018 14:12:08 +0200 Subject: [PATCH 06/15] Use proper path for DeepSearch xapian DB --- libretroshare/src/deep_search/deep_search.h | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/libretroshare/src/deep_search/deep_search.h b/libretroshare/src/deep_search/deep_search.h index ae5a97f2e..3d916a62a 100644 --- a/libretroshare/src/deep_search/deep_search.h +++ b/libretroshare/src/deep_search/deep_search.h @@ -21,11 +21,10 @@ #include #include "retroshare/rsgxschannels.h" +#include "retroshare/rsinit.h" struct DeepSearch { - //DeepSearch(const std::string& dbPath) : mDbPath(dbPath) {} - struct SearchResult { // TODO: Use RsUrl from extra_locators branch instead of plain string @@ -43,7 +42,7 @@ struct DeepSearch results.clear(); // Open the database we're going to search. - Xapian::Database db(mDbPath); + Xapian::Database db(dbPath()); // Set up a QueryParser with a stemmer and suitable prefixes. Xapian::QueryParser queryparser; @@ -79,7 +78,7 @@ struct DeepSearch static void indexChannelGroup(const RsGxsChannelGroup& chan) { - Xapian::WritableDatabase db(mDbPath, Xapian::DB_CREATE_OR_OPEN); + Xapian::WritableDatabase db(dbPath(), Xapian::DB_CREATE_OR_OPEN); // Set up a TermGenerator that we'll use in indexing. Xapian::TermGenerator termgenerator; @@ -121,13 +120,13 @@ struct DeepSearch std::string idTerm("Qretroshare://channel?id="); idTerm += grpId.toStdString(); - Xapian::WritableDatabase db(mDbPath, Xapian::DB_CREATE_OR_OPEN); + Xapian::WritableDatabase db(dbPath(), Xapian::DB_CREATE_OR_OPEN); db.delete_document(idTerm); } static void indexChannelPost(const RsGxsChannelPost& post) { - Xapian::WritableDatabase db(mDbPath, Xapian::DB_CREATE_OR_OPEN); + Xapian::WritableDatabase db(dbPath(), Xapian::DB_CREATE_OR_OPEN); // Set up a TermGenerator that we'll use in indexing. Xapian::TermGenerator termgenerator; @@ -168,7 +167,11 @@ private: BAD_VALUENO = Xapian::BAD_VALUENO }; - static std::string mDbPath; + static const std::string& dbPath() + { + static const std::string dbDir = + RsAccounts::AccountDirectory() + "/deep_search_xapian_db"; + return dbDir; + } }; -std::string DeepSearch::mDbPath = "/tmp/deep_search_xapian_db"; From d3e5b760a2e8bf663bd002d85e5e52fed2fd46ec Mon Sep 17 00:00:00 2001 From: Gioacchino Mazzurco Date: Wed, 4 Jul 2018 12:08:50 +0200 Subject: [PATCH 07/15] DeepSearch index channels posts too Improve indexing using RsUrl, store some relevant fields in stored url --- libretroshare/src/deep_search/deep_search.h | 87 ++++++++++++--- libretroshare/src/gxs/rsgxsutil.cc | 112 ++++++++++++++------ 2 files changed, 154 insertions(+), 45 deletions(-) diff --git a/libretroshare/src/deep_search/deep_search.h b/libretroshare/src/deep_search/deep_search.h index 3d916a62a..7152e34ce 100644 --- a/libretroshare/src/deep_search/deep_search.h +++ b/libretroshare/src/deep_search/deep_search.h @@ -17,17 +17,18 @@ * along with this program. If not, see . */ +#include #include #include #include "retroshare/rsgxschannels.h" #include "retroshare/rsinit.h" +#include "util/rsurl.h" struct DeepSearch { struct SearchResult { - // TODO: Use RsUrl from extra_locators branch instead of plain string std::string mUrl; std::string mSnippet; }; @@ -90,6 +91,11 @@ struct DeepSearch // Index each field with a suitable prefix. termgenerator.index_text(chan.mMeta.mGroupName, 1, "G"); + + char date[] = "YYYYMMDD\0"; + std::strftime(date, 9, "%Y%m%d", std::gmtime(&chan.mMeta.mPublishTs)); + termgenerator.index_text(date, 1, "D"); + termgenerator.index_text(chan.mDescription, 1, "XD"); // Index fields without prefixes for general search. @@ -97,8 +103,14 @@ struct DeepSearch termgenerator.increase_termpos(); termgenerator.index_text(chan.mDescription); - std::string rsLink("retroshare://channel?id="); - rsLink += chan.mMeta.mGroupId.toStdString(); + RsUrl chanUrl; chanUrl + .setScheme("retroshare").setPath("/channel") + .setQueryKV("id", chan.mMeta.mGroupId.toStdString()); + const std::string idTerm("Q" + chanUrl.toString()); + + chanUrl.setQueryKV("publishDate", date); + chanUrl.setQueryKV("name", chan.mMeta.mGroupName); + std::string rsLink(chanUrl.toString()); // store the RS link so we are able to retrive it on matching search doc.add_value(URL_VALUENO, rsLink); @@ -109,7 +121,6 @@ struct DeepSearch // We use the identifier to ensure each object ends up in the // database only once no matter how many times we run the // indexer. "Q" prefix is a Xapian convention for unique id term. - const std::string idTerm("Q" + rsLink); doc.add_boolean_term(idTerm); db.replace_document(idTerm, doc); } @@ -117,8 +128,10 @@ struct DeepSearch static void removeChannelFromIndex(RsGxsGroupId grpId) { // "Q" prefix is a Xapian convention for unique id term. - std::string idTerm("Qretroshare://channel?id="); - idTerm += grpId.toStdString(); + RsUrl chanUrl; chanUrl + .setScheme("retroshare").setPath("/channel") + .setQueryKV("id", grpId.toStdString()); + std::string idTerm("Q" + chanUrl.toString()); Xapian::WritableDatabase db(dbPath(), Xapian::DB_CREATE_OR_OPEN); db.delete_document(idTerm); @@ -138,24 +151,72 @@ struct DeepSearch // Index each field with a suitable prefix. termgenerator.index_text(post.mMeta.mMsgName, 1, "S"); - termgenerator.index_text(post.mMsg, 1, "XD"); + + char date[] = "YYYYMMDD\0"; + std::strftime(date, 9, "%Y%m%d", std::gmtime(&post.mMeta.mPublishTs)); + termgenerator.index_text(date, 1, "D"); + + // Avoid indexing HTML + bool isPlainMsg = post.mMsg[0] != '<' || post.mMsg[post.mMsg.size() - 1] != '>'; + + if(isPlainMsg) + termgenerator.index_text(post.mMsg, 1, "XD"); // Index fields without prefixes for general search. termgenerator.index_text(post.mMeta.mMsgName); - termgenerator.increase_termpos(); - termgenerator.index_text(post.mMsg); + if(isPlainMsg) + { + termgenerator.increase_termpos(); + termgenerator.index_text(post.mMsg); + } + + for(const RsGxsFile& attachment : post.mFiles) + { + termgenerator.index_text(attachment.mName, 1, "F"); + + termgenerator.increase_termpos(); + termgenerator.index_text(attachment.mName); + } // We use the identifier to ensure each object ends up in the // database only once no matter how many times we run the // indexer. - std::string idTerm("Qretroshare://channel?id="); - idTerm += post.mMeta.mGroupId.toStdString(); - idTerm += "&msgid="; - idTerm += post.mMeta.mMsgId.toStdString(); + RsUrl postUrl; postUrl + .setScheme("retroshare").setPath("/channel") + .setQueryKV("id", post.mMeta.mGroupId.toStdString()) + .setQueryKV("msgid", post.mMeta.mMsgId.toStdString()); + std::string idTerm("Q" + postUrl.toString()); + + postUrl.setQueryKV("publishDate", date); + postUrl.setQueryKV("name", post.mMeta.mMsgName); + std::string rsLink(postUrl.toString()); + + // store the RS link so we are able to retrive it on matching search + doc.add_value(URL_VALUENO, rsLink); + + // Store some fields for display purposes. + if(isPlainMsg) + doc.set_data(post.mMeta.mMsgName + "\n" + post.mMsg); + else doc.set_data(post.mMeta.mMsgName); + doc.add_boolean_term(idTerm); db.replace_document(idTerm, doc); } + static void removeChannelPostFromIndex( + RsGxsGroupId grpId, RsGxsMessageId msgId ) + { + RsUrl postUrl; postUrl + .setScheme("retroshare").setPath("/channel") + .setQueryKV("id", grpId.toStdString()) + .setQueryKV("msgid", msgId.toStdString()); + // "Q" prefix is a Xapian convention for unique id term. + std::string idTerm("Q" + postUrl.toString()); + + Xapian::WritableDatabase db(dbPath(), Xapian::DB_CREATE_OR_OPEN); + db.delete_document(idTerm); + } + private: enum : Xapian::valueno diff --git a/libretroshare/src/gxs/rsgxsutil.cc b/libretroshare/src/gxs/rsgxsutil.cc index 60106b411..75b43da83 100644 --- a/libretroshare/src/gxs/rsgxsutil.cc +++ b/libretroshare/src/gxs/rsgxsutil.cc @@ -166,6 +166,7 @@ bool RsGxsIntegrityCheck::check() { #ifdef RS_DEEP_SEARCH bool isGxsChannels = dynamic_cast(mGenExchangeClient); + std::set indexedGroups; #endif // first take out all the groups @@ -232,6 +233,7 @@ bool RsGxsIntegrityCheck::check() cgIt->toChannelGroup(cg, false); cg.mMeta = meta; + indexedGroups.insert(grp->grpId); DeepSearch::indexChannelGroup(cg); } else @@ -309,53 +311,99 @@ bool RsGxsIntegrityCheck::check() } if (nxsMsgIt == nxsMsgV.end()) - { - msgsToDel[grpId].insert(msgId); + { + msgsToDel[grpId].insert(msgId); +#ifdef RS_DEEP_SEARCH + if(isGxsChannels) + DeepSearch::removeChannelPostFromIndex(grpId, msgId); +#endif } } } - GxsMsgResult::iterator mit = msgs.begin(); + GxsMsgResult::iterator mit = msgs.begin(); + for(; mit != msgs.end(); ++mit) + { + std::vector& msgV = mit->second; + std::vector::iterator vit = msgV.begin(); - for(; mit != msgs.end(); ++mit) - { - std::vector& msgV = mit->second; - std::vector::iterator vit = msgV.begin(); + for(; vit != msgV.end(); ++vit) + { + RsNxsMsg* msg = *vit; + RsFileHash currHash; + pqihash pHash; + pHash.addData(msg->msg.bin_data, msg->msg.bin_len); + pHash.Complete(currHash); - for(; vit != msgV.end(); ++vit) - { - RsNxsMsg* msg = *vit; - RsFileHash currHash; - pqihash pHash; - pHash.addData(msg->msg.bin_data, msg->msg.bin_len); - pHash.Complete(currHash); - - if(msg->metaData == NULL || currHash != msg->metaData->mHash) - { - std::cerr << "(EE) deleting message data with wrong hash or null meta data. meta=" << (void*)msg->metaData << std::endl; - msgsToDel[msg->grpId].insert(msg->msgId); - } - else if(!msg->metaData->mAuthorId.isNull() && subscribed_groups.find(msg->metaData->mGroupId)!=subscribed_groups.end()) - { -#ifdef DEBUG_GXSUTIL - GXSUTIL_DEBUG() << "TimeStamping message authors' key ID " << msg->metaData->mAuthorId << " in message " << msg->msgId << ", group ID " << msg->grpId<< std::endl; + if(msg->metaData == NULL || currHash != msg->metaData->mHash) + { + std::cerr << __PRETTY_FUNCTION__ <<" (EE) deleting message data" + << " with wrong hash or null meta data. meta=" + << (void*)msg->metaData << std::endl; + msgsToDel[msg->grpId].insert(msg->msgId); +#ifdef RS_DEEP_SEARCH + if(isGxsChannels) + DeepSearch::removeChannelPostFromIndex(msg->grpId, msg->msgId); #endif - if(rsReputations!=NULL && rsReputations->overallReputationLevel(msg->metaData->mAuthorId) > RsReputations::REPUTATION_LOCALLY_NEGATIVE) - used_gxs_ids.insert(std::make_pair(msg->metaData->mAuthorId,RsIdentityUsage(mGenExchangeClient->serviceType(),RsIdentityUsage::MESSAGE_AUTHOR_KEEP_ALIVE,msg->metaData->mGroupId,msg->metaData->mMsgId))) ; - } + } + else if (subscribed_groups.count(msg->metaData->mGroupId)) + { +#ifdef RS_DEEP_SEARCH + if( isGxsChannels + && indexedGroups.count(msg->metaData->mGroupId) ) + { + RsGxsMsgMetaData meta; + meta.deserialise(msg->meta.bin_data, &msg->meta.bin_len); + + uint32_t blz = msg->msg.bin_len; + RsItem* rIt = mSerializer.deserialise(msg->msg.bin_data, + &blz); + + if( RsGxsChannelPostItem* cgIt = + dynamic_cast(rIt) ) + { + RsGxsChannelPost cg; + cgIt->toChannelPost(cg, false); + cg.mMeta = meta; + + DeepSearch::indexChannelPost(cg); + } + else if(dynamic_cast(rIt)) {} + else if(dynamic_cast(rIt)) {} + else + { + std::cerr << __PRETTY_FUNCTION__ << " Message: " + << meta.mMsgId.toStdString() + << " in group: " + << meta.mGroupId.toStdString() << " " + << " doesn't seems a channel post, please " + << "report to developers" + << std::endl; + print_stacktrace(); + } + + delete rIt; + } +#endif + + if(!msg->metaData->mAuthorId.isNull()) + { +#ifdef DEBUG_GXSUTIL + GXSUTIL_DEBUG() << "TimeStamping message authors' key ID " << msg->metaData->mAuthorId << " in message " << msg->msgId << ", group ID " << msg->grpId<< std::endl; +#endif + if(rsReputations!=NULL && rsReputations->overallReputationLevel(msg->metaData->mAuthorId) > RsReputations::REPUTATION_LOCALLY_NEGATIVE) + used_gxs_ids.insert(std::make_pair(msg->metaData->mAuthorId,RsIdentityUsage(mGenExchangeClient->serviceType(),RsIdentityUsage::MESSAGE_AUTHOR_KEEP_ALIVE,msg->metaData->mGroupId,msg->metaData->mMsgId))) ; + } + } delete msg; } } -#ifdef RS_DEEP_SEARCH - // TODO:remove msgsToDel from deep search index too -#endif - mDs->removeMsgs(msgsToDel); { - RsStackMutex stack(mIntegrityMutex); + RS_STACK_MUTEX(mIntegrityMutex); std::vector::iterator grpIt; for(grpIt = grpsToDel.begin(); grpIt != grpsToDel.end(); ++grpIt) From 3fc9ff3fef95cbca2fab51967280b704d5c8aec8 Mon Sep 17 00:00:00 2001 From: Gioacchino Mazzurco Date: Fri, 20 Jul 2018 15:29:37 +0200 Subject: [PATCH 08/15] WIP Plug deep search into GXS search --- libretroshare/src/gxs/rsgxsnetservice.cc | 53 ++++++++++++++++++++--- libretroshare/src/retroshare/rsgxsiface.h | 5 ++- 2 files changed, 50 insertions(+), 8 deletions(-) diff --git a/libretroshare/src/gxs/rsgxsnetservice.cc b/libretroshare/src/gxs/rsgxsnetservice.cc index d8a882945..6818caae6 100644 --- a/libretroshare/src/gxs/rsgxsnetservice.cc +++ b/libretroshare/src/gxs/rsgxsnetservice.cc @@ -260,6 +260,10 @@ #include "util/rsmemory.h" #include "util/stacktrace.h" +#ifdef RS_DEEP_SEARCH +# include "deep_search/deep_search.h" +#endif + /*** * Use the following defines to debug: NXS_NET_DEBUG_0 shows group update high level information @@ -5271,16 +5275,52 @@ void RsGxsNetService::receiveTurtleSearchResults(TurtleRequestId req,const unsig mObserver->receiveNewGroups(new_grps); } -bool RsGxsNetService::search(const std::string& substring,std::list& group_infos) +bool RsGxsNetService::search( const std::string& substring, + std::list& group_infos ) { + group_infos.clear(); + RsGxsGrpMetaTemporaryMap grpMetaMap; - { + { RS_STACK_MUTEX(mNxsMutex) ; mDataStore->retrieveGxsGrpMetaData(grpMetaMap); - } + } +#ifdef RS_DEEP_SEARCH + std::vector results; + DeepSearch::search(substring, results, 0); + + for(auto dsr : results) + { + RsUrl rUrl(dsr.mUrl); + auto rit = rUrl.query().find("id"); + if(rit != rUrl.query().end()) + { + RsGroupNetworkStats stats; + RsGxsGroupId grpId(rit->second); + RsGxsGrpMetaTemporaryMap::iterator mIt; + if( !grpId.isNull() && + (mIt = grpMetaMap.find(grpId)) != grpMetaMap.end() && + getGroupNetworkStats(grpId, stats) ) + { + RsGxsGrpMetaData& gMeta(*mIt->second); + RsGxsGroupSummary s; + s.group_id = grpId; + s.group_name = gMeta.mGroupName; + s.search_context = dsr.mSnippet; + s.sign_flags = gMeta.mSignFlags; + s.publish_ts = gMeta.mSignFlags; + s.author_id = gMeta.mAuthorId; + s.number_of_messages = stats.mMaxVisibleCount; + s.last_message_ts = stats.mLastGroupModificationTS; + s.popularity = gMeta.mPop; + + group_infos.push_back(s); + } + } + } +#else // RS_DEEP_SEARCH RsGroupNetworkStats stats ; - for(auto it(grpMetaMap.begin());it!=grpMetaMap.end();++it) if(termSearch(it->second->mGroupName,substring)) { @@ -5289,7 +5329,7 @@ bool RsGxsNetService::search(const std::string& substring,std::listfirst ; s.group_name = it->second->mGroupName ; - s.group_description = it->second->mGroupName ; // to be filled with something better when we use the real search + // to be filled with something better when we use the real search s.search_context = it->second->mGroupName ; s.sign_flags = it->second->mSignFlags; s.publish_ts = it->second->mPublishTs; @@ -5298,8 +5338,9 @@ bool RsGxsNetService::search(const std::string& substring,std::listsecond->mPop; - group_infos.push_back(s) ; + group_infos.push_back(s); } +#endif // RS_DEEP_SEARCH #ifdef NXS_NET_DEBUG_8 GXSNETDEBUG___ << " performing local substring search in response to distant request. Found " << group_infos.size() << " responses." << std::endl; diff --git a/libretroshare/src/retroshare/rsgxsiface.h b/libretroshare/src/retroshare/rsgxsiface.h index bdf65c115..7ccedff36 100644 --- a/libretroshare/src/retroshare/rsgxsiface.h +++ b/libretroshare/src/retroshare/rsgxsiface.h @@ -31,6 +31,7 @@ #include "retroshare/rsgxsservice.h" #include "gxs/rsgxsdata.h" #include "retroshare/rsgxsifacetypes.h" +#include "util/rsdeprecate.h" /*! * \brief The RsGxsGroupSymmary struct @@ -44,8 +45,8 @@ struct RsGxsGroupSummary RsGxsGroupId group_id ; - std::string group_name ; - std::string group_description ; + std::string group_name ; + RS_DEPRECATED std::string group_description; std::string search_context ; RsGxsId author_id ; time_t publish_ts ; From 8149ef9e45fe421df333faf976f4587d48f4245e Mon Sep 17 00:00:00 2001 From: Gioacchino Mazzurco Date: Sat, 21 Jul 2018 01:17:28 +0200 Subject: [PATCH 09/15] Install xapian dependency in Continuos Integration --- .travis.yml | 4 ++-- appveyor.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 4ecc0dab7..e11da2148 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,10 +13,10 @@ matrix: before_install: - if [ $TRAVIS_OS_NAME == linux ]; then sudo apt-get update; fi - - if [ $TRAVIS_OS_NAME == linux ]; then sudo apt-get install -y build-essential libssl-dev libsqlcipher-dev libbz2-dev libmicrohttpd-dev libsqlite3-dev libupnp-dev pkg-config qt5-default libxss-dev qtmultimedia5-dev libqt5x11extras5-dev libqt5designer5 qttools5-dev; fi + - if [ $TRAVIS_OS_NAME == linux ]; then sudo apt-get install -y build-essential libssl-dev libsqlcipher-dev libbz2-dev libmicrohttpd-dev libsqlite3-dev libupnp-dev pkg-config qt5-default libxss-dev qtmultimedia5-dev libqt5x11extras5-dev libqt5designer5 libxapian-dev qttools5-dev; fi - if [ $TRAVIS_OS_NAME == osx ]; then brew update ; fi - - if [ $TRAVIS_OS_NAME == osx ]; then brew install qt55 openssl miniupnpc libmicrohttpd sqlcipher; fi + - if [ $TRAVIS_OS_NAME == osx ]; then brew install qt55 openssl miniupnpc libmicrohttpd sqlcipher xapian; fi - if [ $TRAVIS_OS_NAME == osx ]; then brew link --force qt55 ; fi - wget https://github.com/Tencent/rapidjson/archive/v1.1.0.tar.gz diff --git a/appveyor.yml b/appveyor.yml index 6f26a1c67..8764ef0f5 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -88,7 +88,7 @@ install: # Configuring MSys2 - set PATH=C:\msys64\usr\bin;%PATH% - set PATH=C:\msys64\mingw32\bin;%PATH% - - pacman --noconfirm -S mingw-w64-i686-qt5 mingw-w64-i686-miniupnpc mingw-w64-i686-sqlcipher mingw-w64-i686-libmicrohttpd + - pacman --noconfirm -S mingw-w64-i686-qt5 mingw-w64-i686-miniupnpc mingw-w64-i686-sqlcipher mingw-w64-i686-libmicrohttpd mingw-w64-xapian-core #- pacman --noconfirm -S mingw-w64-i686-qt5-static mingw-w64-i686-miniupnpc mingw-w64-i686-sqlcipher mingw-w64-i686-libmicrohttpd #- set PATH=C:\msys64\mingw32\qt5-static\bin\;%PATH% From 6982ae6cd5be55ab79135f6bfa1adc88f9f621e2 Mon Sep 17 00:00:00 2001 From: Gioacchino Mazzurco Date: Sat, 21 Jul 2018 13:20:50 +0200 Subject: [PATCH 10/15] Improve retrocompatibility with older xapian --- libretroshare/src/deep_search/deep_search.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/libretroshare/src/deep_search/deep_search.h b/libretroshare/src/deep_search/deep_search.h index 7152e34ce..368aa7e84 100644 --- a/libretroshare/src/deep_search/deep_search.h +++ b/libretroshare/src/deep_search/deep_search.h @@ -25,6 +25,13 @@ #include "retroshare/rsinit.h" #include "util/rsurl.h" +#ifndef XAPIAN_AT_LEAST +#define XAPIAN_AT_LEAST(A,B,C) (XAPIAN_MAJOR_VERSION > (A) || \ + (XAPIAN_MAJOR_VERSION == (A) && \ + (XAPIAN_MINOR_VERSION > (B) || \ + (XAPIAN_MINOR_VERSION == (B) && XAPIAN_REVISION >= (C))))) +#endif // ndef XAPIAN_AT_LEAST + struct DeepSearch { struct SearchResult @@ -66,10 +73,11 @@ struct DeepSearch for ( Xapian::MSetIterator m = mset.begin(); m != mset.end(); ++m ) { const Xapian::Document& doc = m.get_document(); - SearchResult s; s.mUrl = doc.get_value(URL_VALUENO); +#if XAPIAN_AT_LEAST(1,3,5) s.mSnippet = mset.snippet(doc.get_data()); +#endif // XAPIAN_AT_LEAST(1,3,5) results.push_back(s); } From f97dc8a1258ff66efcc01ae4105f8b653544e35d Mon Sep 17 00:00:00 2001 From: Gioacchino Mazzurco Date: Sun, 22 Jul 2018 21:33:40 +0200 Subject: [PATCH 11/15] Properly plug deep search in GXS search Some modifications breaks retrocompatibility of GXS search: remove horrible templated RsTypeSerializer::serial_process for RsGxsGroupSummary with hardcoded member names RsGxsGroupSummary doesn't use old TLV serialization format anymore RsGxsGroupSummary remove unused description member RsGxsGroupSummary derive from RsSerializable and use serialization helper macro Add autor id and signature flags to the index so there is no need to retrive them from GXS, thus improving performances RsGroupNetworkStats initialize members properly RsGxsGroupSummary rename members to follow usual mMemberName convention --- libretroshare/src/deep_search/deep_search.h | 5 ++ libretroshare/src/gxs/rsgds.h | 26 ++++--- libretroshare/src/gxs/rsgxsnetservice.cc | 70 +++++++++++-------- libretroshare/src/gxs/rsgxsnettunnel.cc | 18 +---- libretroshare/src/retroshare/rsgxsiface.h | 48 ++++++++----- libretroshare/src/services/p3gxschannels.cc | 21 +++--- .../src/gui/gxs/GxsGroupFrameDialog.cpp | 23 +++--- 7 files changed, 111 insertions(+), 100 deletions(-) diff --git a/libretroshare/src/deep_search/deep_search.h b/libretroshare/src/deep_search/deep_search.h index 368aa7e84..914fce6a2 100644 --- a/libretroshare/src/deep_search/deep_search.h +++ b/libretroshare/src/deep_search/deep_search.h @@ -118,6 +118,11 @@ struct DeepSearch chanUrl.setQueryKV("publishDate", date); chanUrl.setQueryKV("name", chan.mMeta.mGroupName); + if(!chan.mMeta.mAuthorId.isNull()) + chanUrl.setQueryKV("authorId", chan.mMeta.mAuthorId.toStdString()); + if(chan.mMeta.mSignFlags) + chanUrl.setQueryKV( "signFlags", + std::to_string(chan.mMeta.mSignFlags) ); std::string rsLink(chanUrl.toString()); // store the RS link so we are able to retrive it on matching search diff --git a/libretroshare/src/gxs/rsgds.h b/libretroshare/src/gxs/rsgds.h index 34e5d2076..84276d7b6 100644 --- a/libretroshare/src/gxs/rsgds.h +++ b/libretroshare/src/gxs/rsgds.h @@ -74,23 +74,21 @@ public: }; /*! - * This is used to query network statistics for a given group. This is useful to e.g. show group - * popularity, or number of visible messages for unsubscribed group. + * This is used to query network statistics for a given group. This is useful + * to e.g. show group popularity, or number of visible messages for unsubscribed + * group. */ - -class RsGroupNetworkStats +struct RsGroupNetworkStats { -public: - RsGroupNetworkStats() - { - mMaxVisibleCount = 0 ; - } + RsGroupNetworkStats() : + mSuppliers(0), mMaxVisibleCount(0), mGrpAutoSync(false), + mAllowMsgSync(false), mLastGroupModificationTS(0) {} - uint32_t mSuppliers ; - uint32_t mMaxVisibleCount ; - bool mGrpAutoSync ; - bool mAllowMsgSync; - time_t mLastGroupModificationTS ; + uint32_t mSuppliers; + uint32_t mMaxVisibleCount; + bool mGrpAutoSync; + bool mAllowMsgSync; + time_t mLastGroupModificationTS; }; typedef std::map > NxsMsgDataResult; diff --git a/libretroshare/src/gxs/rsgxsnetservice.cc b/libretroshare/src/gxs/rsgxsnetservice.cc index b5751b2cd..b6fe8420d 100644 --- a/libretroshare/src/gxs/rsgxsnetservice.cc +++ b/libretroshare/src/gxs/rsgxsnetservice.cc @@ -243,6 +243,7 @@ #include #include #include +#include #include "rsgxsnetservice.h" #include "gxssecurity.h" @@ -5187,8 +5188,8 @@ void RsGxsNetService::receiveTurtleSearchResults(TurtleRequestId req, const std: std::map& search_results_map(mDistantSearchResults[req]) ; for(auto it(group_infos.begin());it!=group_infos.end();++it) - if(search_results_map.find((*it).group_id) == search_results_map.end()) - grpMeta[(*it).group_id] = NULL; + if(search_results_map.find((*it).mGroupId) == search_results_map.end()) + grpMeta[(*it).mGroupId] = NULL; mDataStore->retrieveGxsGrpMetaData(grpMeta); @@ -5197,26 +5198,26 @@ void RsGxsNetService::receiveTurtleSearchResults(TurtleRequestId req, const std: // only keep groups that are not locally known, and groups that are not already in the mDistantSearchResults structure for(auto it(group_infos.begin());it!=group_infos.end();++it) - if(grpMeta[(*it).group_id] == NULL) + if(grpMeta[(*it).mGroupId] == NULL) { filtered_results.push_back(*it) ; - auto it2 = search_results_map.find((*it).group_id) ; + auto it2 = search_results_map.find((*it).mGroupId) ; if(it2 != search_results_map.end()) { // update existing data - it2->second.popularity++ ; - it2->second.number_of_messages = std::max(it2->second.number_of_messages,(*it).number_of_messages) ; + it2->second.mPopularity++ ; + it2->second.mNumberOfMessages = std::max(it2->second.mNumberOfMessages,(*it).mNumberOfMessages) ; } else { - search_results_map[(*it).group_id] = *it; - search_results_map[(*it).group_id].popularity = 1; // number of results so far + search_results_map[(*it).mGroupId] = *it; + search_results_map[(*it).mGroupId].mPopularity = 1; // number of results so far } - mObserver->receiveDistantSearchResults(req,(*it).group_id) ; + mObserver->receiveDistantSearchResults(req,(*it).mGroupId) ; } } @@ -5277,12 +5278,6 @@ bool RsGxsNetService::search( const std::string& substring, { group_infos.clear(); - RsGxsGrpMetaTemporaryMap grpMetaMap; - { - RS_STACK_MUTEX(mNxsMutex) ; - mDataStore->retrieveGxsGrpMetaData(grpMetaMap); - } - #ifdef RS_DEEP_SEARCH std::vector results; DeepSearch::search(substring, results, 0); @@ -5290,33 +5285,48 @@ bool RsGxsNetService::search( const std::string& substring, for(auto dsr : results) { RsUrl rUrl(dsr.mUrl); - auto rit = rUrl.query().find("id"); + const auto& uQ(rUrl.query()); + auto rit = uQ.find("id"); if(rit != rUrl.query().end()) { RsGroupNetworkStats stats; RsGxsGroupId grpId(rit->second); - RsGxsGrpMetaTemporaryMap::iterator mIt; - if( !grpId.isNull() && - (mIt = grpMetaMap.find(grpId)) != grpMetaMap.end() && - getGroupNetworkStats(grpId, stats) ) + if( !grpId.isNull() && getGroupNetworkStats(grpId, stats) ) { - RsGxsGrpMetaData& gMeta(*mIt->second); RsGxsGroupSummary s; - s.group_id = grpId; - s.group_name = gMeta.mGroupName; - s.search_context = dsr.mSnippet; - s.sign_flags = gMeta.mSignFlags; - s.publish_ts = gMeta.mSignFlags; - s.author_id = gMeta.mAuthorId; - s.number_of_messages = stats.mMaxVisibleCount; - s.last_message_ts = stats.mLastGroupModificationTS; - s.popularity = gMeta.mPop; + + s.mGroupId = grpId; + + if((rit = uQ.find("name")) != uQ.end()) + s.mGroupName = rit->second; + if((rit = uQ.find("signFlags")) != uQ.end()) + s.mSignFlags = std::stoul(rit->second); + if((rit = uQ.find("publishDate")) != uQ.end()) + { + std::istringstream ss(rit->second); + std::tm tm; + ss >> std::get_time(&tm, "%Y%m%d"); + s.mPublishTs = mktime(&tm); + } + if((rit = uQ.find("authorId")) != uQ.end()) + s.mAuthorId = RsGxsId(rit->second); + + s.mSearchContext = dsr.mSnippet; + + s.mNumberOfMessages = stats.mMaxVisibleCount; + s.mLastMessageTs = stats.mLastGroupModificationTS; + s.mPopularity = stats.mSuppliers; group_infos.push_back(s); } } } #else // RS_DEEP_SEARCH + RsGxsGrpMetaTemporaryMap grpMetaMap; + { + RS_STACK_MUTEX(mNxsMutex) ; + mDataStore->retrieveGxsGrpMetaData(grpMetaMap); + } RsGroupNetworkStats stats ; for(auto it(grpMetaMap.begin());it!=grpMetaMap.end();++it) if(termSearch(it->second->mGroupName,substring)) diff --git a/libretroshare/src/gxs/rsgxsnettunnel.cc b/libretroshare/src/gxs/rsgxsnettunnel.cc index ff3134493..dfa0d8655 100644 --- a/libretroshare/src/gxs/rsgxsnettunnel.cc +++ b/libretroshare/src/gxs/rsgxsnettunnel.cc @@ -216,22 +216,6 @@ public: } }; -template<> -void RsTypeSerializer::serial_process( RsGenericSerializer::SerializeJob j, RsGenericSerializer::SerializeContext& ctx, RsGxsGroupSummary& gs, const std::string& member_name ) -{ - RsTypeSerializer::serial_process (j,ctx,gs.group_id ,member_name+"-group_id") ; // RsGxsGroupId group_id ; - RsTypeSerializer::serial_process (j,ctx,TLV_TYPE_STR_NAME ,gs.group_name,member_name+"-group_name") ; // std::string group_name ; - RsTypeSerializer::serial_process (j,ctx,TLV_TYPE_STR_COMMENT ,gs.group_description,member_name+"-group_description") ; // std::string group_description ; - RsTypeSerializer::serial_process (j,ctx,TLV_TYPE_STR_VALUE ,gs.search_context,member_name+"-group_name") ; // std::string search_context ; - RsTypeSerializer::serial_process (j,ctx,gs.author_id ,member_name+"-author_id") ; // RsGxsId author_id ; - RsTypeSerializer::serial_process (j,ctx,gs.publish_ts ,member_name+"-publish_ts") ; // time_t publish_ts ; - RsTypeSerializer::serial_process (j,ctx,gs.number_of_messages,member_name+"-number_of_messages") ; // uint32_t number_of_messages ; - RsTypeSerializer::serial_process (j,ctx,gs.last_message_ts ,member_name+"-last_message_ts") ; // time_t last_message_ts ; - RsTypeSerializer::serial_process(j,ctx,gs.sign_flags ,member_name+"-sign_flags") ; // uint32_t sign_flags ; - RsTypeSerializer::serial_process(j,ctx,gs.popularity ,member_name+"-popularity") ; // uint32_t popularity ; -} - - //===========================================================================================================================================// // Interface with rest of the software // //===========================================================================================================================================// @@ -1102,7 +1086,7 @@ void RsGxsNetTunnelService::receiveSearchResult(TurtleSearchRequestId request_id GXS_NET_TUNNEL_DEBUG() << " : result is of type group summary result for service " << result_gs->service << std::dec << ": " << std::endl; for(auto it(result_gs->group_infos.begin());it!=result_gs->group_infos.end();++it) - std::cerr << " group " << (*it).group_id << ": " << (*it).group_name << ", " << (*it).number_of_messages << " messages, last is " << time(NULL)-(*it).last_message_ts << " secs ago." << std::endl; + std::cerr << " group " << (*it).mGroupId << ": " << (*it).mGroupName << ", " << (*it).mNumberOfMessages << " messages, last is " << time(NULL)-(*it).mLastMessageTs << " secs ago." << std::endl; auto it = mSearchableServices.find(result_gs->service) ; diff --git a/libretroshare/src/retroshare/rsgxsiface.h b/libretroshare/src/retroshare/rsgxsiface.h index 7ccedff36..b85f3fbce 100644 --- a/libretroshare/src/retroshare/rsgxsiface.h +++ b/libretroshare/src/retroshare/rsgxsiface.h @@ -34,26 +34,42 @@ #include "util/rsdeprecate.h" /*! - * \brief The RsGxsGroupSymmary struct - * This structure is used to transport group summary information when a GXS service is searched. It contains the group information - * as well as a context string to tell where the information was found. It is more compact than a GroupMeta object, so as to make - * search responses as light as possible. + * This structure is used to transport group summary information when a GXS + * service is searched. It contains the group information as well as a context + * string to tell where the information was found. It is more compact than a + * GroupMeta object, so as to make search responses as light as possible. */ -struct RsGxsGroupSummary +struct RsGxsGroupSummary : RsSerializable { - RsGxsGroupSummary() : publish_ts(0), number_of_messages(0),last_message_ts(0),sign_flags(0),popularity(0) {} + RsGxsGroupSummary() : + mPublishTs(0), mNumberOfMessages(0),mLastMessageTs(0), + mSignFlags(0),mPopularity(0) {} - RsGxsGroupId group_id ; + RsGxsGroupId mGroupId; + std::string mGroupName; + RsGxsId mAuthorId; + time_t mPublishTs; + uint32_t mNumberOfMessages; + time_t mLastMessageTs; + uint32_t mSignFlags; + uint32_t mPopularity; - std::string group_name ; - RS_DEPRECATED std::string group_description; - std::string search_context ; - RsGxsId author_id ; - time_t publish_ts ; - uint32_t number_of_messages ; - time_t last_message_ts ; - uint32_t sign_flags ; - uint32_t popularity ; + std::string mSearchContext; + + /// @see RsSerializable::serial_process + void serial_process( RsGenericSerializer::SerializeJob j, + RsGenericSerializer::SerializeContext& ctx ) + { + RS_SERIAL_PROCESS(mGroupId); + RS_SERIAL_PROCESS(mGroupName); + RS_SERIAL_PROCESS(mAuthorId); + RS_SERIAL_PROCESS(mPublishTs); + RS_SERIAL_PROCESS(mNumberOfMessages); + RS_SERIAL_PROCESS(mLastMessageTs); + RS_SERIAL_PROCESS(mSignFlags); + RS_SERIAL_PROCESS(mPopularity); + RS_SERIAL_PROCESS(mSearchContext); + } }; diff --git a/libretroshare/src/services/p3gxschannels.cc b/libretroshare/src/services/p3gxschannels.cc index 8474d3111..1c7cf1058 100644 --- a/libretroshare/src/services/p3gxschannels.cc +++ b/libretroshare/src/services/p3gxschannels.cc @@ -1703,21 +1703,18 @@ bool p3GxsChannels::retrieveDistantSearchResults(TurtleRequestId req,std::mapretrieveDistantGroupSummary(group_id,gs)) { // This is a placeholder information by the time we receive the full group meta data. - - distant_group.mDescription = gs.group_description; - - distant_group.mMeta.mGroupId = gs.group_id ; - distant_group.mMeta.mGroupName = gs.group_name; + distant_group.mMeta.mGroupId = gs.mGroupId ; + distant_group.mMeta.mGroupName = gs.mGroupName; distant_group.mMeta.mGroupFlags = GXS_SERV::FLAG_PRIVACY_PUBLIC ; - distant_group.mMeta.mSignFlags = gs.sign_flags; + distant_group.mMeta.mSignFlags = gs.mSignFlags; - distant_group.mMeta.mPublishTs = gs.publish_ts; - distant_group.mMeta.mAuthorId = gs.author_id; + distant_group.mMeta.mPublishTs = gs.mPublishTs; + distant_group.mMeta.mAuthorId = gs.mAuthorId; distant_group.mMeta.mCircleType = GXS_CIRCLE_TYPE_PUBLIC ;// guessed, otherwise the group would not be search-able. @@ -1726,9 +1723,9 @@ bool p3GxsChannels::retrieveDistantGroup(const RsGxsGroupId& group_id,RsGxsChann distant_group.mMeta.mSubscribeFlags = GXS_SERV::GROUP_SUBSCRIBE_NOT_SUBSCRIBED ; - distant_group.mMeta.mPop = gs.popularity; // Popularity = number of friend subscribers - distant_group.mMeta.mVisibleMsgCount = gs.number_of_messages; // Max messages reported by friends - distant_group.mMeta.mLastPost = gs.last_message_ts; // Timestamp for last message. Not used yet. + distant_group.mMeta.mPop = gs.mPopularity; // Popularity = number of friend subscribers + distant_group.mMeta.mVisibleMsgCount = gs.mNumberOfMessages; // Max messages reported by friends + distant_group.mMeta.mLastPost = gs.mLastMessageTs; // Timestamp for last message. Not used yet. return true ; } diff --git a/retroshare-gui/src/gui/gxs/GxsGroupFrameDialog.cpp b/retroshare-gui/src/gui/gxs/GxsGroupFrameDialog.cpp index 58e7bb93a..473b26e7d 100644 --- a/retroshare-gui/src/gui/gxs/GxsGroupFrameDialog.cpp +++ b/retroshare-gui/src/gui/gxs/GxsGroupFrameDialog.cpp @@ -279,21 +279,22 @@ void GxsGroupFrameDialog::updateSearchResults() QList group_items ; - for(auto it3(group_infos.begin());it3!=group_infos.end();++it3) - if(mCachedGroupMetas.find(it3->first) == mCachedGroupMetas.end()) + for(auto it3(group_infos.begin());it3!=group_infos.end();++it3) + if(mCachedGroupMetas.find(it3->first) == mCachedGroupMetas.end()) { - std::cerr << " adding new group " << it3->first << " " << it3->second.group_id << " \"" << it3->second.group_name << "\"" << std::endl; + std::cerr << " adding new group " << it3->first << " " + << it3->second.mGroupId << " \"" + << it3->second.mGroupName << "\"" << std::endl; - GroupItemInfo i ; - i.id = QString(it3->second.group_id.toStdString().c_str()) ; - i.name = QString::fromUtf8(it3->second.group_name.c_str()) ; - i.description = QString::fromUtf8(it3->second.group_description.c_str()) ; + GroupItemInfo i; + i.id = QString(it3->second.mGroupId.toStdString().c_str()); + i.name = QString::fromUtf8(it3->second.mGroupName.c_str()); i.popularity = 0; // could be set to the number of hits - i.lastpost = QDateTime::fromTime_t(it3->second.last_message_ts); + i.lastpost = QDateTime::fromTime_t(it3->second.mLastMessageTs); i.subscribeFlags = 0; // irrelevant here - i.publishKey = false ; // IS_GROUP_PUBLISHER(groupInfo.mSubscribeFlags) ; - i.adminKey = false ; // IS_GROUP_ADMIN(groupInfo.mSubscribeFlags) ; - i.max_visible_posts = it3->second.number_of_messages ; + i.publishKey = false ; // IS_GROUP_PUBLISHER(groupInfo.mSubscribeFlags); + i.adminKey = false ; // IS_GROUP_ADMIN(groupInfo.mSubscribeFlags); + i.max_visible_posts = it3->second.mNumberOfMessages; group_items.push_back(i); } From d9aa37219cc715fc8e8e255712d28e2558ec3e5b Mon Sep 17 00:00:00 2001 From: Gioacchino Mazzurco Date: Sun, 22 Jul 2018 21:45:43 +0200 Subject: [PATCH 12/15] Revert "DROP before merge. Reduce INTEGRITY_CHECK_PERIOD" This reverts commit ce61174d79227588375678bfe446980420f50168. --- libretroshare/src/gxs/rsgenexchange.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libretroshare/src/gxs/rsgenexchange.cc b/libretroshare/src/gxs/rsgenexchange.cc index 2630c7379..3927e6f61 100644 --- a/libretroshare/src/gxs/rsgenexchange.cc +++ b/libretroshare/src/gxs/rsgenexchange.cc @@ -59,10 +59,10 @@ static const uint32_t INDEX_AUTHEN_ADMIN = 0x00000040; // admin key #define GXS_MASK "GXS_MASK_HACK" -#define GEN_EXCH_DEBUG 1 +//#define GEN_EXCH_DEBUG 1 static const uint32_t MSG_CLEANUP_PERIOD = 60*59; // 59 minutes -static const uint32_t INTEGRITY_CHECK_PERIOD = 60*2; // 31 minutes // TODO: Restore this line before merging deep_search +static const uint32_t INTEGRITY_CHECK_PERIOD = 60*31; // 31 minutes RsGenExchange::RsGenExchange(RsGeneralDataService *gds, RsNetworkExchangeService *ns, RsSerialType *serviceSerialiser, uint16_t servType, RsGixs* gixs, From 51c25219bb965f2c2e5702664a229e48cae16c8c Mon Sep 17 00:00:00 2001 From: Gioacchino Mazzurco Date: Sun, 22 Jul 2018 22:47:30 +0200 Subject: [PATCH 13/15] Fix compiling with old GCC --- libretroshare/src/gxs/rsgxsnetservice.cc | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/libretroshare/src/gxs/rsgxsnetservice.cc b/libretroshare/src/gxs/rsgxsnetservice.cc index b6fe8420d..bd68b53f7 100644 --- a/libretroshare/src/gxs/rsgxsnetservice.cc +++ b/libretroshare/src/gxs/rsgxsnetservice.cc @@ -243,7 +243,6 @@ #include #include #include -#include #include "rsgxsnetservice.h" #include "gxssecurity.h" @@ -262,6 +261,13 @@ # include "deep_search/deep_search.h" #endif +#include "util/cxx11retrocompat.h" +#if defined(GCC_VERSION) && GCC_VERSION > 50100 +# include +#else +# include +#endif + /*** * Use the following defines to debug: NXS_NET_DEBUG_0 shows group update high level information @@ -5303,9 +5309,15 @@ bool RsGxsNetService::search( const std::string& substring, s.mSignFlags = std::stoul(rit->second); if((rit = uQ.find("publishDate")) != uQ.end()) { + std::tm tm; memset(&tm, 0, sizeof(tm)); +#if defined(GCC_VERSION) && GCC_VERSION > 50100 std::istringstream ss(rit->second); - std::tm tm; ss >> std::get_time(&tm, "%Y%m%d"); +#else // defined(GCC_VERSION) && GCC_VERSION > 50100 + sscanf( rit->second.c_str(), + "%4d%2d%2d", &tm.tm_year, &tm.tm_mon, &tm.tm_mday ); +#endif // defined(GCC_VERSION) && GCC_VERSION > 50100 + s.mPublishTs = mktime(&tm); } if((rit = uQ.find("authorId")) != uQ.end()) From 6f8c2f6f41af9b7b63795d662950669690f30a64 Mon Sep 17 00:00:00 2001 From: Gioacchino Mazzurco Date: Sun, 22 Jul 2018 23:39:06 +0200 Subject: [PATCH 14/15] Fix compilation if deep_search is disabled --- libretroshare/src/gxs/rsgxsnetservice.cc | 28 ++++++++++++------------ 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/libretroshare/src/gxs/rsgxsnetservice.cc b/libretroshare/src/gxs/rsgxsnetservice.cc index bd68b53f7..12b218138 100644 --- a/libretroshare/src/gxs/rsgxsnetservice.cc +++ b/libretroshare/src/gxs/rsgxsnetservice.cc @@ -5339,23 +5339,23 @@ bool RsGxsNetService::search( const std::string& substring, RS_STACK_MUTEX(mNxsMutex) ; mDataStore->retrieveGxsGrpMetaData(grpMetaMap); } - RsGroupNetworkStats stats ; - for(auto it(grpMetaMap.begin());it!=grpMetaMap.end();++it) + + RsGroupNetworkStats stats; + for(auto it(grpMetaMap.begin());it!=grpMetaMap.end();++it) if(termSearch(it->second->mGroupName,substring)) { - getGroupNetworkStats(it->first,stats) ; + getGroupNetworkStats(it->first,stats); - RsGxsGroupSummary s ; - s.group_id = it->first ; - s.group_name = it->second->mGroupName ; - // to be filled with something better when we use the real search - s.search_context = it->second->mGroupName ; - s.sign_flags = it->second->mSignFlags; - s.publish_ts = it->second->mPublishTs; - s.author_id = it->second->mAuthorId; - s.number_of_messages = stats.mMaxVisibleCount ; - s.last_message_ts = stats.mLastGroupModificationTS ; - s.popularity = it->second->mPop; + RsGxsGroupSummary s; + s.mGroupId = it->first; + s.mGroupName = it->second->mGroupName; + s.mSearchContext = it->second->mGroupName; + s.mSignFlags = it->second->mSignFlags; + s.mPublishTs = it->second->mPublishTs; + s.mAuthorId = it->second->mAuthorId; + s.mNumberOfMessages = stats.mMaxVisibleCount; + s.mLastMessageTs = stats.mLastGroupModificationTS; + s.mPopularity = it->second->mPop; group_infos.push_back(s); } From e8c9ba52b247bb32348d5b1c2f6b1fdf7d63d364 Mon Sep 17 00:00:00 2001 From: Gioacchino Mazzurco Date: Mon, 23 Jul 2018 11:18:32 +0200 Subject: [PATCH 15/15] Improve code quality after Cyril review --- libretroshare/src/deep_search/deep_search.h | 23 +++++++++++---------- libretroshare/src/gxs/rsgxsnetservice.cc | 22 ++------------------ libretroshare/src/gxs/rsgxsnettunnel.cc | 3 ++- libretroshare/src/gxs/rsgxsutil.cc | 2 +- 4 files changed, 17 insertions(+), 33 deletions(-) diff --git a/libretroshare/src/deep_search/deep_search.h b/libretroshare/src/deep_search/deep_search.h index 914fce6a2..3fed67c01 100644 --- a/libretroshare/src/deep_search/deep_search.h +++ b/libretroshare/src/deep_search/deep_search.h @@ -99,11 +99,7 @@ struct DeepSearch // Index each field with a suitable prefix. termgenerator.index_text(chan.mMeta.mGroupName, 1, "G"); - - char date[] = "YYYYMMDD\0"; - std::strftime(date, 9, "%Y%m%d", std::gmtime(&chan.mMeta.mPublishTs)); - termgenerator.index_text(date, 1, "D"); - + termgenerator.index_text(timetToXapianDate(chan.mMeta.mPublishTs), 1, "D"); termgenerator.index_text(chan.mDescription, 1, "XD"); // Index fields without prefixes for general search. @@ -116,7 +112,7 @@ struct DeepSearch .setQueryKV("id", chan.mMeta.mGroupId.toStdString()); const std::string idTerm("Q" + chanUrl.toString()); - chanUrl.setQueryKV("publishDate", date); + chanUrl.setQueryKV("publishTs", std::to_string(chan.mMeta.mPublishTs)); chanUrl.setQueryKV("name", chan.mMeta.mGroupName); if(!chan.mMeta.mAuthorId.isNull()) chanUrl.setQueryKV("authorId", chan.mMeta.mAuthorId.toStdString()); @@ -164,10 +160,7 @@ struct DeepSearch // Index each field with a suitable prefix. termgenerator.index_text(post.mMeta.mMsgName, 1, "S"); - - char date[] = "YYYYMMDD\0"; - std::strftime(date, 9, "%Y%m%d", std::gmtime(&post.mMeta.mPublishTs)); - termgenerator.index_text(date, 1, "D"); + termgenerator.index_text(timetToXapianDate(post.mMeta.mPublishTs), 1, "D"); // Avoid indexing HTML bool isPlainMsg = post.mMsg[0] != '<' || post.mMsg[post.mMsg.size() - 1] != '>'; @@ -200,8 +193,9 @@ struct DeepSearch .setQueryKV("msgid", post.mMeta.mMsgId.toStdString()); std::string idTerm("Q" + postUrl.toString()); - postUrl.setQueryKV("publishDate", date); + postUrl.setQueryKV("publishTs", std::to_string(post.mMeta.mPublishTs)); postUrl.setQueryKV("name", post.mMeta.mMsgName); + postUrl.setQueryKV("authorId", post.mMeta.mAuthorId.toStdString()); std::string rsLink(postUrl.toString()); // store the RS link so we are able to retrive it on matching search @@ -247,5 +241,12 @@ private: RsAccounts::AccountDirectory() + "/deep_search_xapian_db"; return dbDir; } + + static std::string timetToXapianDate(const time_t& time) + { + char date[] = "YYYYMMDD\0"; + std::strftime(date, 9, "%Y%m%d", std::gmtime(&time)); + return date; + } }; diff --git a/libretroshare/src/gxs/rsgxsnetservice.cc b/libretroshare/src/gxs/rsgxsnetservice.cc index 12b218138..8f4ee033c 100644 --- a/libretroshare/src/gxs/rsgxsnetservice.cc +++ b/libretroshare/src/gxs/rsgxsnetservice.cc @@ -261,13 +261,6 @@ # include "deep_search/deep_search.h" #endif -#include "util/cxx11retrocompat.h" -#if defined(GCC_VERSION) && GCC_VERSION > 50100 -# include -#else -# include -#endif - /*** * Use the following defines to debug: NXS_NET_DEBUG_0 shows group update high level information @@ -5307,19 +5300,8 @@ bool RsGxsNetService::search( const std::string& substring, s.mGroupName = rit->second; if((rit = uQ.find("signFlags")) != uQ.end()) s.mSignFlags = std::stoul(rit->second); - if((rit = uQ.find("publishDate")) != uQ.end()) - { - std::tm tm; memset(&tm, 0, sizeof(tm)); -#if defined(GCC_VERSION) && GCC_VERSION > 50100 - std::istringstream ss(rit->second); - ss >> std::get_time(&tm, "%Y%m%d"); -#else // defined(GCC_VERSION) && GCC_VERSION > 50100 - sscanf( rit->second.c_str(), - "%4d%2d%2d", &tm.tm_year, &tm.tm_mon, &tm.tm_mday ); -#endif // defined(GCC_VERSION) && GCC_VERSION > 50100 - - s.mPublishTs = mktime(&tm); - } + if((rit = uQ.find("publishTs")) != uQ.end()) + s.mPublishTs = static_cast(std::stoll(rit->second)); if((rit = uQ.find("authorId")) != uQ.end()) s.mAuthorId = RsGxsId(rit->second); diff --git a/libretroshare/src/gxs/rsgxsnettunnel.cc b/libretroshare/src/gxs/rsgxsnettunnel.cc index dfa0d8655..bbe10694b 100644 --- a/libretroshare/src/gxs/rsgxsnettunnel.cc +++ b/libretroshare/src/gxs/rsgxsnettunnel.cc @@ -61,8 +61,9 @@ const uint8_t RS_PKT_SUBTYPE_GXS_NET_TUNNEL_KEEP_ALIVE = 0x02 const uint8_t RS_PKT_SUBTYPE_GXS_NET_TUNNEL_RANDOM_BIAS = 0x03 ; const uint8_t RS_PKT_SUBTYPE_GXS_NET_TUNNEL_TURTLE_SEARCH_SUBSTRING = 0x04 ; const uint8_t RS_PKT_SUBTYPE_GXS_NET_TUNNEL_TURTLE_SEARCH_GROUP_REQUEST = 0x05 ; -const uint8_t RS_PKT_SUBTYPE_GXS_NET_TUNNEL_TURTLE_SEARCH_GROUP_SUMMARY = 0x06 ; +// const uint8_t RS_PKT_SUBTYPE_GXS_NET_TUNNEL_TURTLE_SEARCH_GROUP_SUMMARY = 0x06; // DEPRECATED const uint8_t RS_PKT_SUBTYPE_GXS_NET_TUNNEL_TURTLE_SEARCH_GROUP_DATA = 0x07 ; +const uint8_t RS_PKT_SUBTYPE_GXS_NET_TUNNEL_TURTLE_SEARCH_GROUP_SUMMARY = 0x08; class RsGxsNetTunnelItem: public RsItem { diff --git a/libretroshare/src/gxs/rsgxsutil.cc b/libretroshare/src/gxs/rsgxsutil.cc index 2212ea311..2e7a69079 100644 --- a/libretroshare/src/gxs/rsgxsutil.cc +++ b/libretroshare/src/gxs/rsgxsutil.cc @@ -162,7 +162,7 @@ void RsGxsIntegrityCheck::run() bool RsGxsIntegrityCheck::check() { #ifdef RS_DEEP_SEARCH - bool isGxsChannels = dynamic_cast(mGenExchangeClient); + bool isGxsChannels = mGenExchangeClient->serviceType() == RS_SERVICE_GXS_TYPE_CHANNELS; std::set indexedGroups; #endif