Merge pull request #1292 from G10h4ck/deep_search

Content search for channels trough xapian
This commit is contained in:
G10h4ck 2018-07-23 11:48:55 +02:00 committed by GitHub
commit fbc4e4205d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 616 additions and 198 deletions

View File

@ -13,10 +13,10 @@ matrix:
before_install:
- if [ $TRAVIS_OS_NAME == linux ]; then sudo apt-get update; fi
- if [ $TRAVIS_OS_NAME == linux ]; then sudo apt-get install -y build-essential libssl-dev libsqlcipher-dev libbz2-dev libmicrohttpd-dev libsqlite3-dev libupnp-dev pkg-config qt5-default libxss-dev qtmultimedia5-dev libqt5x11extras5-dev libqt5designer5 qttools5-dev; fi
- if [ $TRAVIS_OS_NAME == linux ]; then sudo apt-get install -y build-essential libssl-dev libsqlcipher-dev libbz2-dev libmicrohttpd-dev libsqlite3-dev libupnp-dev pkg-config qt5-default libxss-dev qtmultimedia5-dev libqt5x11extras5-dev libqt5designer5 libxapian-dev qttools5-dev; fi
- if [ $TRAVIS_OS_NAME == osx ]; then brew update ; fi
- if [ $TRAVIS_OS_NAME == osx ]; then brew install qt55 openssl miniupnpc libmicrohttpd sqlcipher; fi
- if [ $TRAVIS_OS_NAME == osx ]; then brew install qt55 openssl miniupnpc libmicrohttpd sqlcipher xapian; fi
- if [ $TRAVIS_OS_NAME == osx ]; then brew link --force qt55 ; fi
- wget https://github.com/Tencent/rapidjson/archive/v1.1.0.tar.gz

View File

@ -89,7 +89,7 @@ install:
# Configuring MSys2
- set PATH=C:\msys64\usr\bin;%PATH%
- set PATH=C:\msys64\mingw32\bin;%PATH%
- pacman --noconfirm -S mingw-w64-i686-qt5 mingw-w64-i686-miniupnpc mingw-w64-i686-sqlcipher mingw-w64-i686-libmicrohttpd
- pacman --noconfirm -S mingw-w64-i686-qt5 mingw-w64-i686-miniupnpc mingw-w64-i686-sqlcipher mingw-w64-i686-libmicrohttpd mingw-w64-xapian-core
#- pacman --noconfirm -S mingw-w64-i686-qt5-static mingw-w64-i686-miniupnpc mingw-w64-i686-sqlcipher mingw-w64-i686-libmicrohttpd
#- set PATH=C:\msys64\mingw32\qt5-static\bin\;%PATH%

View File

@ -0,0 +1,252 @@
#pragma once
/*
* RetroShare Content Search and Indexing.
* Copyright (C) 2018 Gioacchino Mazzurco <gio@eigenlab.org>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <ctime>
#include <vector>
#include <xapian.h>
#include "retroshare/rsgxschannels.h"
#include "retroshare/rsinit.h"
#include "util/rsurl.h"
#ifndef XAPIAN_AT_LEAST
#define XAPIAN_AT_LEAST(A,B,C) (XAPIAN_MAJOR_VERSION > (A) || \
(XAPIAN_MAJOR_VERSION == (A) && \
(XAPIAN_MINOR_VERSION > (B) || \
(XAPIAN_MINOR_VERSION == (B) && XAPIAN_REVISION >= (C)))))
#endif // ndef XAPIAN_AT_LEAST
struct DeepSearch
{
struct SearchResult
{
std::string mUrl;
std::string mSnippet;
};
/**
* @return search results count
*/
static uint32_t search( const std::string& queryStr,
std::vector<SearchResult>& results,
uint32_t maxResults = 100 )
{
results.clear();
// Open the database we're going to search.
Xapian::Database db(dbPath());
// Set up a QueryParser with a stemmer and suitable prefixes.
Xapian::QueryParser queryparser;
//queryparser.set_stemmer(Xapian::Stem("en"));
queryparser.set_stemming_strategy(queryparser.STEM_SOME);
// Start of prefix configuration.
//queryparser.add_prefix("title", "S");
//queryparser.add_prefix("description", "XD");
// End of prefix configuration.
// And parse the query.
Xapian::Query query = queryparser.parse_query(queryStr);
// Use an Enquire object on the database to run the query.
Xapian::Enquire enquire(db);
enquire.set_query(query);
Xapian::MSet mset = enquire.get_mset(0, maxResults);
for ( Xapian::MSetIterator m = mset.begin(); m != mset.end(); ++m )
{
const Xapian::Document& doc = m.get_document();
SearchResult s;
s.mUrl = doc.get_value(URL_VALUENO);
#if XAPIAN_AT_LEAST(1,3,5)
s.mSnippet = mset.snippet(doc.get_data());
#endif // XAPIAN_AT_LEAST(1,3,5)
results.push_back(s);
}
return results.size();
}
static void indexChannelGroup(const RsGxsChannelGroup& chan)
{
Xapian::WritableDatabase db(dbPath(), Xapian::DB_CREATE_OR_OPEN);
// Set up a TermGenerator that we'll use in indexing.
Xapian::TermGenerator termgenerator;
//termgenerator.set_stemmer(Xapian::Stem("en"));
// We make a document and tell the term generator to use this.
Xapian::Document doc;
termgenerator.set_document(doc);
// Index each field with a suitable prefix.
termgenerator.index_text(chan.mMeta.mGroupName, 1, "G");
termgenerator.index_text(timetToXapianDate(chan.mMeta.mPublishTs), 1, "D");
termgenerator.index_text(chan.mDescription, 1, "XD");
// Index fields without prefixes for general search.
termgenerator.index_text(chan.mMeta.mGroupName);
termgenerator.increase_termpos();
termgenerator.index_text(chan.mDescription);
RsUrl chanUrl; chanUrl
.setScheme("retroshare").setPath("/channel")
.setQueryKV("id", chan.mMeta.mGroupId.toStdString());
const std::string idTerm("Q" + chanUrl.toString());
chanUrl.setQueryKV("publishTs", std::to_string(chan.mMeta.mPublishTs));
chanUrl.setQueryKV("name", chan.mMeta.mGroupName);
if(!chan.mMeta.mAuthorId.isNull())
chanUrl.setQueryKV("authorId", chan.mMeta.mAuthorId.toStdString());
if(chan.mMeta.mSignFlags)
chanUrl.setQueryKV( "signFlags",
std::to_string(chan.mMeta.mSignFlags) );
std::string rsLink(chanUrl.toString());
// store the RS link so we are able to retrive it on matching search
doc.add_value(URL_VALUENO, rsLink);
// Store some fields for display purposes.
doc.set_data(chan.mMeta.mGroupName + "\n" + chan.mDescription);
// We use the identifier to ensure each object ends up in the
// database only once no matter how many times we run the
// indexer. "Q" prefix is a Xapian convention for unique id term.
doc.add_boolean_term(idTerm);
db.replace_document(idTerm, doc);
}
static void removeChannelFromIndex(RsGxsGroupId grpId)
{
// "Q" prefix is a Xapian convention for unique id term.
RsUrl chanUrl; chanUrl
.setScheme("retroshare").setPath("/channel")
.setQueryKV("id", grpId.toStdString());
std::string idTerm("Q" + chanUrl.toString());
Xapian::WritableDatabase db(dbPath(), Xapian::DB_CREATE_OR_OPEN);
db.delete_document(idTerm);
}
static void indexChannelPost(const RsGxsChannelPost& post)
{
Xapian::WritableDatabase db(dbPath(), Xapian::DB_CREATE_OR_OPEN);
// Set up a TermGenerator that we'll use in indexing.
Xapian::TermGenerator termgenerator;
//termgenerator.set_stemmer(Xapian::Stem("en"));
// We make a document and tell the term generator to use this.
Xapian::Document doc;
termgenerator.set_document(doc);
// Index each field with a suitable prefix.
termgenerator.index_text(post.mMeta.mMsgName, 1, "S");
termgenerator.index_text(timetToXapianDate(post.mMeta.mPublishTs), 1, "D");
// Avoid indexing HTML
bool isPlainMsg = post.mMsg[0] != '<' || post.mMsg[post.mMsg.size() - 1] != '>';
if(isPlainMsg)
termgenerator.index_text(post.mMsg, 1, "XD");
// Index fields without prefixes for general search.
termgenerator.index_text(post.mMeta.mMsgName);
if(isPlainMsg)
{
termgenerator.increase_termpos();
termgenerator.index_text(post.mMsg);
}
for(const RsGxsFile& attachment : post.mFiles)
{
termgenerator.index_text(attachment.mName, 1, "F");
termgenerator.increase_termpos();
termgenerator.index_text(attachment.mName);
}
// We use the identifier to ensure each object ends up in the
// database only once no matter how many times we run the
// indexer.
RsUrl postUrl; postUrl
.setScheme("retroshare").setPath("/channel")
.setQueryKV("id", post.mMeta.mGroupId.toStdString())
.setQueryKV("msgid", post.mMeta.mMsgId.toStdString());
std::string idTerm("Q" + postUrl.toString());
postUrl.setQueryKV("publishTs", std::to_string(post.mMeta.mPublishTs));
postUrl.setQueryKV("name", post.mMeta.mMsgName);
postUrl.setQueryKV("authorId", post.mMeta.mAuthorId.toStdString());
std::string rsLink(postUrl.toString());
// store the RS link so we are able to retrive it on matching search
doc.add_value(URL_VALUENO, rsLink);
// Store some fields for display purposes.
if(isPlainMsg)
doc.set_data(post.mMeta.mMsgName + "\n" + post.mMsg);
else doc.set_data(post.mMeta.mMsgName);
doc.add_boolean_term(idTerm);
db.replace_document(idTerm, doc);
}
static void removeChannelPostFromIndex(
RsGxsGroupId grpId, RsGxsMessageId msgId )
{
RsUrl postUrl; postUrl
.setScheme("retroshare").setPath("/channel")
.setQueryKV("id", grpId.toStdString())
.setQueryKV("msgid", msgId.toStdString());
// "Q" prefix is a Xapian convention for unique id term.
std::string idTerm("Q" + postUrl.toString());
Xapian::WritableDatabase db(dbPath(), Xapian::DB_CREATE_OR_OPEN);
db.delete_document(idTerm);
}
private:
enum : Xapian::valueno
{
/// Used to store retroshare url of indexed documents
URL_VALUENO,
/// @see Xapian::BAD_VALUENO
BAD_VALUENO = Xapian::BAD_VALUENO
};
static const std::string& dbPath()
{
static const std::string dbDir =
RsAccounts::AccountDirectory() + "/deep_search_xapian_db";
return dbDir;
}
static std::string timetToXapianDate(const time_t& time)
{
char date[] = "YYYYMMDD\0";
std::strftime(date, 9, "%Y%m%d", std::gmtime(&time));
return date;
}
};

View File

@ -74,17 +74,15 @@ public:
};
/*!
* This is used to query network statistics for a given group. This is useful to e.g. show group
* popularity, or number of visible messages for unsubscribed group.
* This is used to query network statistics for a given group. This is useful
* to e.g. show group popularity, or number of visible messages for unsubscribed
* group.
*/
class RsGroupNetworkStats
struct RsGroupNetworkStats
{
public:
RsGroupNetworkStats()
{
mMaxVisibleCount = 0 ;
}
RsGroupNetworkStats() :
mSuppliers(0), mMaxVisibleCount(0), mGrpAutoSync(false),
mAllowMsgSync(false), mLastGroupModificationTS(0) {}
uint32_t mSuppliers;
uint32_t mMaxVisibleCount;

View File

@ -59,7 +59,7 @@ static const uint32_t INDEX_AUTHEN_ADMIN = 0x00000040; // admin key
#define GXS_MASK "GXS_MASK_HACK"
#define GEN_EXCH_DEBUG 1
//#define GEN_EXCH_DEBUG 1
static const uint32_t MSG_CLEANUP_PERIOD = 60*59; // 59 minutes
static const uint32_t INTEGRITY_CHECK_PERIOD = 60*31; // 31 minutes
@ -201,7 +201,8 @@ void RsGenExchange::tick()
if(!mIntegrityCheck)
{
mIntegrityCheck = new RsGxsIntegrityCheck(mDataStore,this,mGixs);
mIntegrityCheck = new RsGxsIntegrityCheck( mDataStore, this,
*mSerialiser, mGixs);
mIntegrityCheck->start("gxs integrity");
mChecking = true;
}
@ -1645,6 +1646,7 @@ void RsGenExchange::receiveDistantSearchResults(TurtleRequestId id,const RsGxsGr
std::cerr << "RsGenExchange::receiveDistantSearchResults(): received result for request " << std::hex << id << std::dec << std::endl;
}
void RsGenExchange::notifyReceivePublishKey(const RsGxsGroupId &grpId)
{
RS_STACK_MUTEX(mGenMtx);

View File

@ -257,6 +257,10 @@
#include "util/rsmemory.h"
#include "util/stacktrace.h"
#ifdef RS_DEEP_SEARCH
# include "deep_search/deep_search.h"
#endif
/***
* Use the following defines to debug:
NXS_NET_DEBUG_0 shows group update high level information
@ -5183,8 +5187,8 @@ void RsGxsNetService::receiveTurtleSearchResults(TurtleRequestId req, const std:
std::map<RsGxsGroupId,RsGxsGroupSummary>& search_results_map(mDistantSearchResults[req]) ;
for(auto it(group_infos.begin());it!=group_infos.end();++it)
if(search_results_map.find((*it).group_id) == search_results_map.end())
grpMeta[(*it).group_id] = NULL;
if(search_results_map.find((*it).mGroupId) == search_results_map.end())
grpMeta[(*it).mGroupId] = NULL;
mDataStore->retrieveGxsGrpMetaData(grpMeta);
@ -5193,26 +5197,26 @@ void RsGxsNetService::receiveTurtleSearchResults(TurtleRequestId req, const std:
// only keep groups that are not locally known, and groups that are not already in the mDistantSearchResults structure
for(auto it(group_infos.begin());it!=group_infos.end();++it)
if(grpMeta[(*it).group_id] == NULL)
if(grpMeta[(*it).mGroupId] == NULL)
{
filtered_results.push_back(*it) ;
auto it2 = search_results_map.find((*it).group_id) ;
auto it2 = search_results_map.find((*it).mGroupId) ;
if(it2 != search_results_map.end())
{
// update existing data
it2->second.popularity++ ;
it2->second.number_of_messages = std::max(it2->second.number_of_messages,(*it).number_of_messages) ;
it2->second.mPopularity++ ;
it2->second.mNumberOfMessages = std::max(it2->second.mNumberOfMessages,(*it).mNumberOfMessages) ;
}
else
{
search_results_map[(*it).group_id] = *it;
search_results_map[(*it).group_id].popularity = 1; // number of results so far
search_results_map[(*it).mGroupId] = *it;
search_results_map[(*it).mGroupId].mPopularity = 1; // number of results so far
}
mObserver->receiveDistantSearchResults(req,(*it).group_id) ;
mObserver->receiveDistantSearchResults(req,(*it).mGroupId) ;
}
}
@ -5268,8 +5272,50 @@ void RsGxsNetService::receiveTurtleSearchResults(TurtleRequestId req,const unsig
mObserver->receiveNewGroups(new_grps);
}
bool RsGxsNetService::search(const std::string& substring,std::list<RsGxsGroupSummary>& group_infos)
bool RsGxsNetService::search( const std::string& substring,
std::list<RsGxsGroupSummary>& group_infos )
{
group_infos.clear();
#ifdef RS_DEEP_SEARCH
std::vector<DeepSearch::SearchResult> results;
DeepSearch::search(substring, results, 0);
for(auto dsr : results)
{
RsUrl rUrl(dsr.mUrl);
const auto& uQ(rUrl.query());
auto rit = uQ.find("id");
if(rit != rUrl.query().end())
{
RsGroupNetworkStats stats;
RsGxsGroupId grpId(rit->second);
if( !grpId.isNull() && getGroupNetworkStats(grpId, stats) )
{
RsGxsGroupSummary s;
s.mGroupId = grpId;
if((rit = uQ.find("name")) != uQ.end())
s.mGroupName = rit->second;
if((rit = uQ.find("signFlags")) != uQ.end())
s.mSignFlags = std::stoul(rit->second);
if((rit = uQ.find("publishTs")) != uQ.end())
s.mPublishTs = static_cast<time_t>(std::stoll(rit->second));
if((rit = uQ.find("authorId")) != uQ.end())
s.mAuthorId = RsGxsId(rit->second);
s.mSearchContext = dsr.mSnippet;
s.mNumberOfMessages = stats.mMaxVisibleCount;
s.mLastMessageTs = stats.mLastGroupModificationTS;
s.mPopularity = stats.mSuppliers;
group_infos.push_back(s);
}
}
}
#else // RS_DEEP_SEARCH
RsGxsGrpMetaTemporaryMap grpMetaMap;
{
RS_STACK_MUTEX(mNxsMutex) ;
@ -5277,26 +5323,25 @@ bool RsGxsNetService::search(const std::string& substring,std::list<RsGxsGroupSu
}
RsGroupNetworkStats stats;
for(auto it(grpMetaMap.begin());it!=grpMetaMap.end();++it)
if(termSearch(it->second->mGroupName,substring))
{
getGroupNetworkStats(it->first,stats);
RsGxsGroupSummary s;
s.group_id = it->first ;
s.group_name = it->second->mGroupName ;
s.group_description = it->second->mGroupName ; // to be filled with something better when we use the real search
s.search_context = it->second->mGroupName ;
s.sign_flags = it->second->mSignFlags;
s.publish_ts = it->second->mPublishTs;
s.author_id = it->second->mAuthorId;
s.number_of_messages = stats.mMaxVisibleCount ;
s.last_message_ts = stats.mLastGroupModificationTS ;
s.popularity = it->second->mPop;
s.mGroupId = it->first;
s.mGroupName = it->second->mGroupName;
s.mSearchContext = it->second->mGroupName;
s.mSignFlags = it->second->mSignFlags;
s.mPublishTs = it->second->mPublishTs;
s.mAuthorId = it->second->mAuthorId;
s.mNumberOfMessages = stats.mMaxVisibleCount;
s.mLastMessageTs = stats.mLastGroupModificationTS;
s.mPopularity = it->second->mPop;
group_infos.push_back(s);
}
#endif // RS_DEEP_SEARCH
#ifdef NXS_NET_DEBUG_8
GXSNETDEBUG___ << " performing local substring search in response to distant request. Found " << group_infos.size() << " responses." << std::endl;

View File

@ -61,8 +61,9 @@ const uint8_t RS_PKT_SUBTYPE_GXS_NET_TUNNEL_KEEP_ALIVE = 0x02
const uint8_t RS_PKT_SUBTYPE_GXS_NET_TUNNEL_RANDOM_BIAS = 0x03 ;
const uint8_t RS_PKT_SUBTYPE_GXS_NET_TUNNEL_TURTLE_SEARCH_SUBSTRING = 0x04 ;
const uint8_t RS_PKT_SUBTYPE_GXS_NET_TUNNEL_TURTLE_SEARCH_GROUP_REQUEST = 0x05 ;
const uint8_t RS_PKT_SUBTYPE_GXS_NET_TUNNEL_TURTLE_SEARCH_GROUP_SUMMARY = 0x06 ;
// const uint8_t RS_PKT_SUBTYPE_GXS_NET_TUNNEL_TURTLE_SEARCH_GROUP_SUMMARY = 0x06; // DEPRECATED
const uint8_t RS_PKT_SUBTYPE_GXS_NET_TUNNEL_TURTLE_SEARCH_GROUP_DATA = 0x07 ;
const uint8_t RS_PKT_SUBTYPE_GXS_NET_TUNNEL_TURTLE_SEARCH_GROUP_SUMMARY = 0x08;
class RsGxsNetTunnelItem: public RsItem
{
@ -216,22 +217,6 @@ public:
}
};
template<>
void RsTypeSerializer::serial_process( RsGenericSerializer::SerializeJob j, RsGenericSerializer::SerializeContext& ctx, RsGxsGroupSummary& gs, const std::string& member_name )
{
RsTypeSerializer::serial_process (j,ctx,gs.group_id ,member_name+"-group_id") ; // RsGxsGroupId group_id ;
RsTypeSerializer::serial_process (j,ctx,TLV_TYPE_STR_NAME ,gs.group_name,member_name+"-group_name") ; // std::string group_name ;
RsTypeSerializer::serial_process (j,ctx,TLV_TYPE_STR_COMMENT ,gs.group_description,member_name+"-group_description") ; // std::string group_description ;
RsTypeSerializer::serial_process (j,ctx,TLV_TYPE_STR_VALUE ,gs.search_context,member_name+"-group_name") ; // std::string search_context ;
RsTypeSerializer::serial_process (j,ctx,gs.author_id ,member_name+"-author_id") ; // RsGxsId author_id ;
RsTypeSerializer::serial_process (j,ctx,gs.publish_ts ,member_name+"-publish_ts") ; // time_t publish_ts ;
RsTypeSerializer::serial_process (j,ctx,gs.number_of_messages,member_name+"-number_of_messages") ; // uint32_t number_of_messages ;
RsTypeSerializer::serial_process<time_t> (j,ctx,gs.last_message_ts ,member_name+"-last_message_ts") ; // time_t last_message_ts ;
RsTypeSerializer::serial_process<uint32_t>(j,ctx,gs.sign_flags ,member_name+"-sign_flags") ; // uint32_t sign_flags ;
RsTypeSerializer::serial_process<uint32_t>(j,ctx,gs.popularity ,member_name+"-popularity") ; // uint32_t popularity ;
}
//===========================================================================================================================================//
// Interface with rest of the software //
//===========================================================================================================================================//
@ -1102,7 +1087,7 @@ void RsGxsNetTunnelService::receiveSearchResult(TurtleSearchRequestId request_id
GXS_NET_TUNNEL_DEBUG() << " : result is of type group summary result for service " << result_gs->service << std::dec << ": " << std::endl;
for(auto it(result_gs->group_infos.begin());it!=result_gs->group_infos.end();++it)
std::cerr << " group " << (*it).group_id << ": " << (*it).group_name << ", " << (*it).number_of_messages << " messages, last is " << time(NULL)-(*it).last_message_ts << " secs ago." << std::endl;
std::cerr << " group " << (*it).mGroupId << ": " << (*it).mGroupName << ", " << (*it).mNumberOfMessages << " messages, last is " << time(NULL)-(*it).mLastMessageTs << " secs ago." << std::endl;
auto it = mSearchableServices.find(result_gs->service) ;

View File

@ -4,6 +4,7 @@
* libretroshare: retroshare core library *
* *
* Copyright 2013-2013 by Christopher Evi-Parker *
* Copyright (C) 2018 Gioacchino Mazzurco <gio@eigenlab.org> *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as *
@ -19,6 +20,7 @@
* along with this program. If not, see <https://www.gnu.org/licenses/>. *
* *
*******************************************************************************/
#include <time.h>
#include "rsgxsutil.h"
@ -27,6 +29,12 @@
#include "pqi/pqihash.h"
#include "gxs/rsgixs.h"
#ifdef RS_DEEP_SEARCH
# include "deep_search/deep_search.h"
# include "services/p3gxschannels.h"
# include "rsitems/rsgxschannelitems.h"
#endif
static const uint32_t MAX_GXS_IDS_REQUESTS_NET = 10 ; // max number of requests from cache/net (avoids killing the system!)
//#define DEBUG_GXSUTIL 1
@ -137,20 +145,27 @@ bool RsGxsMessageCleanUp::clean()
return mGrpMeta.empty();
}
RsGxsIntegrityCheck::RsGxsIntegrityCheck(RsGeneralDataService* const dataService, RsGenExchange *genex, RsGixs *gixs) :
mDs(dataService),mGenExchangeClient(genex), mDone(false), mIntegrityMutex("integrity"),mGixs(gixs)
{ }
RsGxsIntegrityCheck::RsGxsIntegrityCheck(
RsGeneralDataService* const dataService, RsGenExchange* genex,
RsSerialType& serializer, RsGixs* gixs ) :
mDs(dataService), mGenExchangeClient(genex), mSerializer(serializer),
mDone(false), mIntegrityMutex("integrity"), mGixs(gixs) {}
void RsGxsIntegrityCheck::run()
{
check();
RsStackMutex stack(mIntegrityMutex);
RS_STACK_MUTEX(mIntegrityMutex);
mDone = true;
}
bool RsGxsIntegrityCheck::check()
{
#ifdef RS_DEEP_SEARCH
bool isGxsChannels = mGenExchangeClient->serviceType() == RS_SERVICE_GXS_TYPE_CHANNELS;
std::set<RsGxsGroupId> indexedGroups;
#endif
// first take out all the groups
std::map<RsGxsGroupId, RsNxsGrp*> grp;
mDs->retrieveNxsGrps(grp, true, true);
@ -163,8 +178,8 @@ bool RsGxsIntegrityCheck::check()
// compute hash and compare to stored value, if it fails then simply add it
// to list
std::map<RsGxsGroupId, RsNxsGrp*>::iterator git = grp.begin();
for(; git != grp.end(); ++git)
for( std::map<RsGxsGroupId, RsNxsGrp*>::iterator git = grp.begin();
git != grp.end(); ++git )
{
RsNxsGrp* grp = git->second;
RsFileHash currHash;
@ -189,30 +204,67 @@ bool RsGxsIntegrityCheck::check()
#ifdef DEBUG_GXSUTIL
GXSUTIL_DEBUG() << "TimeStamping group authors' key ID " << grp->metaData->mAuthorId << " in group ID " << grp->grpId << std::endl;
#endif
if(rsReputations!=NULL && rsReputations->overallReputationLevel(grp->metaData->mAuthorId) > RsReputations::REPUTATION_LOCALLY_NEGATIVE)
if( rsReputations && rsReputations->overallReputationLevel(grp->metaData->mAuthorId ) > RsReputations::REPUTATION_LOCALLY_NEGATIVE )
used_gxs_ids.insert(std::make_pair(grp->metaData->mAuthorId, RsIdentityUsage(mGenExchangeClient->serviceType(), RsIdentityUsage::GROUP_AUTHOR_KEEP_ALIVE,grp->grpId)));
}
}
}
else msgIds.erase(msgIds.find(grp->grpId));
#ifdef RS_DEEP_SEARCH
if( isGxsChannels
&& grp->metaData->mCircleType == GXS_CIRCLE_TYPE_PUBLIC
&& grp->metaData->mSubscribeFlags & GXS_SERV::GROUP_SUBSCRIBE_SUBSCRIBED )
{
RsGxsGrpMetaData meta;
meta.deserialise(grp->meta.bin_data, grp->meta.bin_len);
uint32_t blz = grp->grp.bin_len;
RsItem* rIt = mSerializer.deserialise(grp->grp.bin_data,
&blz);
if( RsGxsChannelGroupItem* cgIt =
dynamic_cast<RsGxsChannelGroupItem*>(rIt) )
{
RsGxsChannelGroup cg;
cgIt->toChannelGroup(cg, false);
cg.mMeta = meta;
indexedGroups.insert(grp->grpId);
DeepSearch::indexChannelGroup(cg);
}
else
{
msgIds.erase(msgIds.find(grp->grpId));
// grpsToDel.push_back(grp->grpId);
std::cerr << __PRETTY_FUNCTION__ << " Group: "
<< meta.mGroupId.toStdString() << " "
<< meta.mGroupName
<< " doesn't seems a channel, please "
<< "report to developers"
<< std::endl;
print_stacktrace();
}
delete rIt;
}
#endif
}
else
{
grpsToDel.push_back(grp->grpId);
#ifdef RS_DEEP_SEARCH
if(isGxsChannels) DeepSearch::removeChannelFromIndex(grp->grpId);
#endif
}
if(!(grp->metaData->mSubscribeFlags & GXS_SERV::GROUP_SUBSCRIBE_SUBSCRIBED) && !(grp->metaData->mSubscribeFlags & GXS_SERV::GROUP_SUBSCRIBE_ADMIN) && !(grp->metaData->mSubscribeFlags & GXS_SERV::GROUP_SUBSCRIBE_PUBLISH))
if( !(grp->metaData->mSubscribeFlags & GXS_SERV::GROUP_SUBSCRIBE_SUBSCRIBED) &&
!(grp->metaData->mSubscribeFlags & GXS_SERV::GROUP_SUBSCRIBE_ADMIN) &&
!(grp->metaData->mSubscribeFlags & GXS_SERV::GROUP_SUBSCRIBE_PUBLISH) )
{
RsGroupNetworkStats stats;
mGenExchangeClient->getGroupNetworkStats(grp->grpId,stats);
if(stats.mSuppliers == 0 && stats.mMaxVisibleCount == 0 && stats.mGrpAutoSync)
if( stats.mSuppliers == 0 && stats.mMaxVisibleCount == 0
&& stats.mGrpAutoSync )
{
#ifdef DEBUG_GXSUTIL
GXSUTIL_DEBUG() << "Scheduling group \"" << grp->metaData->mGroupName << "\" ID=" << grp->grpId << " in service " << std::hex << mGenExchangeClient->serviceType() << std::dec << " for deletion because it has no suppliers not any visible data at friends." << std::endl;
@ -258,12 +310,15 @@ bool RsGxsIntegrityCheck::check()
if (nxsMsgIt == nxsMsgV.end())
{
msgsToDel[grpId].insert(msgId);
#ifdef RS_DEEP_SEARCH
if(isGxsChannels)
DeepSearch::removeChannelPostFromIndex(grpId, msgId);
#endif
}
}
}
GxsMsgResult::iterator mit = msgs.begin();
for(; mit != msgs.end(); ++mit)
{
std::vector<RsNxsMsg*>& msgV = mit->second;
@ -279,10 +334,56 @@ bool RsGxsIntegrityCheck::check()
if(msg->metaData == NULL || currHash != msg->metaData->mHash)
{
std::cerr << "(EE) deleting message data with wrong hash or null meta data. meta=" << (void*)msg->metaData << std::endl;
std::cerr << __PRETTY_FUNCTION__ <<" (EE) deleting message data"
<< " with wrong hash or null meta data. meta="
<< (void*)msg->metaData << std::endl;
msgsToDel[msg->grpId].insert(msg->msgId);
#ifdef RS_DEEP_SEARCH
if(isGxsChannels)
DeepSearch::removeChannelPostFromIndex(msg->grpId, msg->msgId);
#endif
}
else if(!msg->metaData->mAuthorId.isNull() && subscribed_groups.find(msg->metaData->mGroupId)!=subscribed_groups.end())
else if (subscribed_groups.count(msg->metaData->mGroupId))
{
#ifdef RS_DEEP_SEARCH
if( isGxsChannels
&& indexedGroups.count(msg->metaData->mGroupId) )
{
RsGxsMsgMetaData meta;
meta.deserialise(msg->meta.bin_data, &msg->meta.bin_len);
uint32_t blz = msg->msg.bin_len;
RsItem* rIt = mSerializer.deserialise(msg->msg.bin_data,
&blz);
if( RsGxsChannelPostItem* cgIt =
dynamic_cast<RsGxsChannelPostItem*>(rIt) )
{
RsGxsChannelPost cg;
cgIt->toChannelPost(cg, false);
cg.mMeta = meta;
DeepSearch::indexChannelPost(cg);
}
else if(dynamic_cast<RsGxsCommentItem*>(rIt)) {}
else if(dynamic_cast<RsGxsVoteItem*>(rIt)) {}
else
{
std::cerr << __PRETTY_FUNCTION__ << " Message: "
<< meta.mMsgId.toStdString()
<< " in group: "
<< meta.mGroupId.toStdString() << " "
<< " doesn't seems a channel post, please "
<< "report to developers"
<< std::endl;
print_stacktrace();
}
delete rIt;
}
#endif
if(!msg->metaData->mAuthorId.isNull())
{
#ifdef DEBUG_GXSUTIL
GXSUTIL_DEBUG() << "TimeStamping message authors' key ID " << msg->metaData->mAuthorId << " in message " << msg->msgId << ", group ID " << msg->grpId<< std::endl;
@ -290,6 +391,7 @@ bool RsGxsIntegrityCheck::check()
if(rsReputations!=NULL && rsReputations->overallReputationLevel(msg->metaData->mAuthorId) > RsReputations::REPUTATION_LOCALLY_NEGATIVE)
used_gxs_ids.insert(std::make_pair(msg->metaData->mAuthorId,RsIdentityUsage(mGenExchangeClient->serviceType(),RsIdentityUsage::MESSAGE_AUTHOR_KEEP_ALIVE,msg->metaData->mGroupId,msg->metaData->mMsgId))) ;
}
}
delete msg;
}
@ -298,7 +400,7 @@ bool RsGxsIntegrityCheck::check()
mDs->removeMsgs(msgsToDel);
{
RsStackMutex stack(mIntegrityMutex);
RS_STACK_MUTEX(mIntegrityMutex);
std::vector<RsGxsGroupId>::iterator grpIt;
for(grpIt = grpsToDel.begin(); grpIt != grpsToDel.end(); ++grpIt)
@ -369,14 +471,13 @@ bool RsGxsIntegrityCheck::check()
bool RsGxsIntegrityCheck::isDone()
{
RsStackMutex stack(mIntegrityMutex);
RS_STACK_MUTEX(mIntegrityMutex);
return mDone;
}
void RsGxsIntegrityCheck::getDeletedIds(std::list<RsGxsGroupId>& grpIds, std::map<RsGxsGroupId, std::set<RsGxsMessageId> >& msgIds)
{
RsStackMutex stack(mIntegrityMutex);
RS_STACK_MUTEX(mIntegrityMutex);
grpIds = mDeletedGrps;
msgIds = mDeletedMsgs;
}

View File

@ -4,6 +4,7 @@
* libretroshare: retroshare core library *
* *
* Copyright 2013-2013 by Christopher Evi-Parker *
* Copyright (C) 2018 Gioacchino Mazzurco <gio@eigenlab.org> *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as *
@ -198,7 +199,9 @@ public:
* @param chunkSize
* @param sleepPeriod
*/
RsGxsIntegrityCheck(RsGeneralDataService* const dataService, RsGenExchange *genex, RsGixs *gixs);
RsGxsIntegrityCheck( RsGeneralDataService* const dataService,
RsGenExchange *genex, RsSerialType& gxsSerialiser,
RsGixs *gixs);
bool check();
bool isDone();
@ -211,6 +214,8 @@ private:
RsGeneralDataService* const mDs;
RsGenExchange *mGenExchangeClient;
RsSerialType& mSerializer;
bool mDone;
RsMutex mIntegrityMutex;
std::list<RsGxsGroupId> mDeletedGrps;

View File

@ -867,7 +867,9 @@ rs_gxs_trans {
SOURCES += gxstrans/p3gxstransitems.cc gxstrans/p3gxstrans.cc
}
rs_deep_search {
HEADERS += deep_search/deep_search.h
}
###########################################################################################################

View File

@ -45,10 +45,9 @@ extern RsGxsCircles *rsGxsCircles;
typedef RsPgpId RsPgpId;
// The meaning of the different circle types is:
//
//
static const uint32_t GXS_CIRCLE_TYPE_UNKNOWN = 0x0000 ; // not known. Is treated as public.
/// The meaning of the different circle types is:
/// TODO: convert to enum
static const uint32_t GXS_CIRCLE_TYPE_UNKNOWN = 0x0000 ; /// Used to detect uninizialized values.
static const uint32_t GXS_CIRCLE_TYPE_PUBLIC = 0x0001 ; // not restricted to a circle
static const uint32_t GXS_CIRCLE_TYPE_EXTERNAL = 0x0002 ; // restricted to an external circle, made of RsGxsId
static const uint32_t GXS_CIRCLE_TYPE_YOUR_FRIENDS_ONLY = 0x0003 ; // restricted to a subset of friend nodes of a given RS node given by a RsPgpId list

View File

@ -31,28 +31,45 @@
#include "retroshare/rsgxsservice.h"
#include "gxs/rsgxsdata.h"
#include "retroshare/rsgxsifacetypes.h"
#include "util/rsdeprecate.h"
/*!
* \brief The RsGxsGroupSymmary struct
* This structure is used to transport group summary information when a GXS service is searched. It contains the group information
* as well as a context string to tell where the information was found. It is more compact than a GroupMeta object, so as to make
* search responses as light as possible.
* This structure is used to transport group summary information when a GXS
* service is searched. It contains the group information as well as a context
* string to tell where the information was found. It is more compact than a
* GroupMeta object, so as to make search responses as light as possible.
*/
struct RsGxsGroupSummary
struct RsGxsGroupSummary : RsSerializable
{
RsGxsGroupSummary() : publish_ts(0), number_of_messages(0),last_message_ts(0),sign_flags(0),popularity(0) {}
RsGxsGroupSummary() :
mPublishTs(0), mNumberOfMessages(0),mLastMessageTs(0),
mSignFlags(0),mPopularity(0) {}
RsGxsGroupId group_id ;
RsGxsGroupId mGroupId;
std::string mGroupName;
RsGxsId mAuthorId;
time_t mPublishTs;
uint32_t mNumberOfMessages;
time_t mLastMessageTs;
uint32_t mSignFlags;
uint32_t mPopularity;
std::string group_name ;
std::string group_description ;
std::string search_context ;
RsGxsId author_id ;
time_t publish_ts ;
uint32_t number_of_messages ;
time_t last_message_ts ;
uint32_t sign_flags ;
uint32_t popularity ;
std::string mSearchContext;
/// @see RsSerializable::serial_process
void serial_process( RsGenericSerializer::SerializeJob j,
RsGenericSerializer::SerializeContext& ctx )
{
RS_SERIAL_PROCESS(mGroupId);
RS_SERIAL_PROCESS(mGroupName);
RS_SERIAL_PROCESS(mAuthorId);
RS_SERIAL_PROCESS(mPublishTs);
RS_SERIAL_PROCESS(mNumberOfMessages);
RS_SERIAL_PROCESS(mLastMessageTs);
RS_SERIAL_PROCESS(mSignFlags);
RS_SERIAL_PROCESS(mPopularity);
RS_SERIAL_PROCESS(mSearchContext);
}
};

View File

@ -288,7 +288,8 @@ public:
virtual void clear();
virtual void serial_process(RsGenericSerializer::SerializeJob j,RsGenericSerializer::SerializeContext& ctx);
virtual void serial_process( RsGenericSerializer::SerializeJob j,
RsGenericSerializer::SerializeContext& ctx );
RsGxsGroupId grpId; /// group Id, needed to complete version Id (ncvi)
static int refcount;

View File

@ -1708,16 +1708,13 @@ bool p3GxsChannels::retrieveDistantGroup(const RsGxsGroupId& group_id,RsGxsChann
if(netService()->retrieveDistantGroupSummary(group_id,gs))
{
// This is a placeholder information by the time we receive the full group meta data.
distant_group.mDescription = gs.group_description;
distant_group.mMeta.mGroupId = gs.group_id ;
distant_group.mMeta.mGroupName = gs.group_name;
distant_group.mMeta.mGroupId = gs.mGroupId ;
distant_group.mMeta.mGroupName = gs.mGroupName;
distant_group.mMeta.mGroupFlags = GXS_SERV::FLAG_PRIVACY_PUBLIC ;
distant_group.mMeta.mSignFlags = gs.sign_flags;
distant_group.mMeta.mSignFlags = gs.mSignFlags;
distant_group.mMeta.mPublishTs = gs.publish_ts;
distant_group.mMeta.mAuthorId = gs.author_id;
distant_group.mMeta.mPublishTs = gs.mPublishTs;
distant_group.mMeta.mAuthorId = gs.mAuthorId;
distant_group.mMeta.mCircleType = GXS_CIRCLE_TYPE_PUBLIC ;// guessed, otherwise the group would not be search-able.
@ -1726,9 +1723,9 @@ bool p3GxsChannels::retrieveDistantGroup(const RsGxsGroupId& group_id,RsGxsChann
distant_group.mMeta.mSubscribeFlags = GXS_SERV::GROUP_SUBSCRIBE_NOT_SUBSCRIBED ;
distant_group.mMeta.mPop = gs.popularity; // Popularity = number of friend subscribers
distant_group.mMeta.mVisibleMsgCount = gs.number_of_messages; // Max messages reported by friends
distant_group.mMeta.mLastPost = gs.last_message_ts; // Timestamp for last message. Not used yet.
distant_group.mMeta.mPop = gs.mPopularity; // Popularity = number of friend subscribers
distant_group.mMeta.mVisibleMsgCount = gs.mNumberOfMessages; // Max messages reported by friends
distant_group.mMeta.mLastPost = gs.mLastMessageTs; // Timestamp for last message. Not used yet.
return true ;
}

View File

@ -51,6 +51,10 @@ linux-* {
mLibs += dl
}
rs_deep_search {
mLibs += xapian
}
static {
sLibs *= $$mLibs
} else {

View File

@ -282,18 +282,19 @@ void GxsGroupFrameDialog::updateSearchResults()
for(auto it3(group_infos.begin());it3!=group_infos.end();++it3)
if(mCachedGroupMetas.find(it3->first) == mCachedGroupMetas.end())
{
std::cerr << " adding new group " << it3->first << " " << it3->second.group_id << " \"" << it3->second.group_name << "\"" << std::endl;
std::cerr << " adding new group " << it3->first << " "
<< it3->second.mGroupId << " \""
<< it3->second.mGroupName << "\"" << std::endl;
GroupItemInfo i;
i.id = QString(it3->second.group_id.toStdString().c_str()) ;
i.name = QString::fromUtf8(it3->second.group_name.c_str()) ;
i.description = QString::fromUtf8(it3->second.group_description.c_str()) ;
i.id = QString(it3->second.mGroupId.toStdString().c_str());
i.name = QString::fromUtf8(it3->second.mGroupName.c_str());
i.popularity = 0; // could be set to the number of hits
i.lastpost = QDateTime::fromTime_t(it3->second.last_message_ts);
i.lastpost = QDateTime::fromTime_t(it3->second.mLastMessageTs);
i.subscribeFlags = 0; // irrelevant here
i.publishKey = false ; // IS_GROUP_PUBLISHER(groupInfo.mSubscribeFlags);
i.adminKey = false ; // IS_GROUP_ADMIN(groupInfo.mSubscribeFlags);
i.max_visible_posts = it3->second.number_of_messages ;
i.max_visible_posts = it3->second.mNumberOfMessages;
group_items.push_back(i);
}

View File

@ -135,6 +135,11 @@ rs_macos10.9:CONFIG -= rs_macos10.11
rs_macos10.10:CONFIG -= rs_macos10.11
rs_macos10.12:CONFIG -= rs_macos10.11
# To disable deep search append the following assignation to qmake command line
# "CONFIG+=no_rs_deep_search"
CONFIG *= rs_deep_search
no_rs_deep_search:CONFIG -= rs_deep_search
###########################################################################################################################################################
#
# V07_NON_BACKWARD_COMPATIBLE_CHANGE_001:
@ -334,6 +339,10 @@ rs_chatserver {
DEFINES *= RS_CHATSERVER
}
rs_deep_search {
DEFINES *= RS_DEEP_SEARCH
}
debug {
QMAKE_CXXFLAGS -= -O2 -fomit-frame-pointer
QMAKE_CFLAGS -= -O2 -fomit-frame-pointer