mirror of
https://github.com/RetroShare/RetroShare.git
synced 2025-06-29 00:27:27 -04:00
Implement deep indexing for files through Xapian
ATM it support extracting metadata only from OGG files. The system has been designed to be easly extensible to more file formats registering more indexer functions which just need to extract metadata from a certain type of file and feed it to Xapian. The system has been integrated into existent file search system to through generric search requests and results, it keep a good level of retro-compatibility due to some tricks. The indexing system is released under AGPLv3 so when libretroshare is compiled with deep search enabled AGPLv3 must be honored instead of LGPLv3-or-later. Cleaned up the debian copyright file using non-deprecated license code-names.
This commit is contained in:
parent
d46e3eb2b7
commit
3a26ccf6a5
25 changed files with 1364 additions and 438 deletions
|
@ -257,8 +257,8 @@
|
|||
#include "util/rsmemory.h"
|
||||
#include "util/stacktrace.h"
|
||||
|
||||
#ifdef RS_DEEP_SEARCH
|
||||
# include "deep_search/deep_search.h"
|
||||
#ifdef RS_DEEP_CHANNEL_INDEX
|
||||
# include "deep_search/channelsindex.hpp"
|
||||
#endif
|
||||
|
||||
/***
|
||||
|
@ -5148,13 +5148,13 @@ TurtleRequestId RsGxsNetService::turtleSearchRequest(const std::string& match_st
|
|||
return mGxsNetTunnel->turtleSearchRequest(match_string,this) ;
|
||||
}
|
||||
|
||||
#ifndef RS_DEEP_SEARCH
|
||||
#ifndef RS_DEEP_CHANNEL_INDEX
|
||||
static bool termSearch(const std::string& src, const std::string& substring)
|
||||
{
|
||||
/* always ignore case */
|
||||
return src.end() != std::search( src.begin(), src.end(), substring.begin(), substring.end(), RsRegularExpression::CompareCharIC() );
|
||||
}
|
||||
#endif // ndef RS_DEEP_SEARCH
|
||||
#endif // ndef RS_DEEP_CHANNEL_INDEX
|
||||
|
||||
bool RsGxsNetService::retrieveDistantSearchResults(TurtleRequestId req,std::map<RsGxsGroupId,RsGxsGroupSummary>& group_infos)
|
||||
{
|
||||
|
@ -5209,11 +5209,11 @@ void RsGxsNetService::receiveTurtleSearchResults(
|
|||
|
||||
for (const RsGxsGroupSummary& gps : group_infos)
|
||||
{
|
||||
#ifndef RS_DEEP_SEARCH
|
||||
#ifndef RS_DEEP_CHANNEL_INDEX
|
||||
/* Only keep groups that are not locally known, and groups that are
|
||||
* not already in the mDistantSearchResults structure. */
|
||||
if(grpMeta[gps.mGroupId]) continue;
|
||||
#else // ndef RS_DEEP_SEARCH
|
||||
#else // ndef RS_DEEP_CHANNEL_INDEX
|
||||
/* When deep search is enabled search results may bring more info
|
||||
* then we already have also about post that are indexed by xapian,
|
||||
* so we don't apply this filter in this case. */
|
||||
|
@ -5302,9 +5302,9 @@ bool RsGxsNetService::search( const std::string& substring,
|
|||
{
|
||||
group_infos.clear();
|
||||
|
||||
#ifdef RS_DEEP_SEARCH
|
||||
std::vector<DeepSearch::SearchResult> results;
|
||||
DeepSearch::search(substring, results);
|
||||
#ifdef RS_DEEP_CHANNEL_INDEX
|
||||
std::vector<DeepChannelsSearchResult> results;
|
||||
DeepChannelsIndex::search(substring, results);
|
||||
|
||||
for(auto dsr : results)
|
||||
{
|
||||
|
@ -5324,7 +5324,7 @@ bool RsGxsNetService::search( const std::string& substring,
|
|||
if((rit = uQ.find("name")) != uQ.end())
|
||||
s.mGroupName = rit->second;
|
||||
if((rit = uQ.find("signFlags")) != uQ.end())
|
||||
s.mSignFlags = std::stoul(rit->second);
|
||||
s.mSignFlags = static_cast<uint32_t>(std::stoul(rit->second));
|
||||
if((rit = uQ.find("publishTs")) != uQ.end())
|
||||
s.mPublishTs = static_cast<rstime_t>(std::stoll(rit->second));
|
||||
if((rit = uQ.find("authorId")) != uQ.end())
|
||||
|
@ -5340,7 +5340,7 @@ bool RsGxsNetService::search( const std::string& substring,
|
|||
}
|
||||
}
|
||||
}
|
||||
#else // RS_DEEP_SEARCH
|
||||
#else // RS_DEEP_CHANNEL_INDEX
|
||||
RsGxsGrpMetaTemporaryMap grpMetaMap;
|
||||
{
|
||||
RS_STACK_MUTEX(mNxsMutex) ;
|
||||
|
@ -5366,7 +5366,7 @@ bool RsGxsNetService::search( const std::string& substring,
|
|||
|
||||
group_infos.push_back(s);
|
||||
}
|
||||
#endif // RS_DEEP_SEARCH
|
||||
#endif // RS_DEEP_CHANNEL_INDEX
|
||||
|
||||
#ifdef NXS_NET_DEBUG_8
|
||||
GXSNETDEBUG___ << " performing local substring search in response to distant request. Found " << group_infos.size() << " responses." << std::endl;
|
||||
|
|
|
@ -29,8 +29,8 @@
|
|||
#include "pqi/pqihash.h"
|
||||
#include "gxs/rsgixs.h"
|
||||
|
||||
#ifdef RS_DEEP_SEARCH
|
||||
# include "deep_search/deep_search.h"
|
||||
#ifdef RS_DEEP_CHANNEL_INDEX
|
||||
# include "deep_search/channelsindex.hpp"
|
||||
# include "services/p3gxschannels.h"
|
||||
# include "rsitems/rsgxschannelitems.h"
|
||||
#endif
|
||||
|
@ -148,12 +148,12 @@ bool RsGxsMessageCleanUp::clean()
|
|||
RsGxsIntegrityCheck::RsGxsIntegrityCheck(
|
||||
RsGeneralDataService* const dataService, RsGenExchange* genex,
|
||||
RsSerialType&
|
||||
#ifdef RS_DEEP_SEARCH
|
||||
#ifdef RS_DEEP_CHANNEL_INDEX
|
||||
serializer
|
||||
#endif
|
||||
, RsGixs* gixs )
|
||||
: mDs(dataService), mGenExchangeClient(genex),
|
||||
#ifdef RS_DEEP_SEARCH
|
||||
#ifdef RS_DEEP_CHANNEL_INDEX
|
||||
mSerializer(serializer),
|
||||
#endif
|
||||
mDone(false), mIntegrityMutex("integrity"), mGixs(gixs) {}
|
||||
|
@ -168,7 +168,7 @@ void RsGxsIntegrityCheck::run()
|
|||
|
||||
bool RsGxsIntegrityCheck::check()
|
||||
{
|
||||
#ifdef RS_DEEP_SEARCH
|
||||
#ifdef RS_DEEP_CHANNEL_INDEX
|
||||
bool isGxsChannels = mGenExchangeClient->serviceType() == RS_SERVICE_GXS_TYPE_CHANNELS;
|
||||
std::set<RsGxsGroupId> indexedGroups;
|
||||
#endif
|
||||
|
@ -221,7 +221,7 @@ bool RsGxsIntegrityCheck::check()
|
|||
}
|
||||
else msgIds.erase(msgIds.find(grp->grpId));
|
||||
|
||||
#ifdef RS_DEEP_SEARCH
|
||||
#ifdef RS_DEEP_CHANNEL_INDEX
|
||||
if( isGxsChannels
|
||||
&& grp->metaData->mCircleType == GXS_CIRCLE_TYPE_PUBLIC
|
||||
&& grp->metaData->mSubscribeFlags & GXS_SERV::GROUP_SUBSCRIBE_SUBSCRIBED )
|
||||
|
@ -241,7 +241,7 @@ bool RsGxsIntegrityCheck::check()
|
|||
cg.mMeta = meta;
|
||||
|
||||
indexedGroups.insert(grp->grpId);
|
||||
DeepSearch::indexChannelGroup(cg);
|
||||
DeepChannelsIndex::indexChannelGroup(cg);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -256,14 +256,15 @@ bool RsGxsIntegrityCheck::check()
|
|||
|
||||
delete rIt;
|
||||
}
|
||||
#endif
|
||||
#endif // def RS_DEEP_CHANNEL_INDEX
|
||||
}
|
||||
else
|
||||
{
|
||||
grpsToDel.push_back(grp->grpId);
|
||||
#ifdef RS_DEEP_SEARCH
|
||||
if(isGxsChannels) DeepSearch::removeChannelFromIndex(grp->grpId);
|
||||
#endif
|
||||
#ifdef RS_DEEP_CHANNEL_INDEX
|
||||
if(isGxsChannels)
|
||||
DeepChannelsIndex::removeChannelFromIndex(grp->grpId);
|
||||
#endif // def RS_DEEP_CHANNEL_INDEX
|
||||
}
|
||||
|
||||
if( !(grp->metaData->mSubscribeFlags & GXS_SERV::GROUP_SUBSCRIBE_SUBSCRIBED) &&
|
||||
|
@ -320,10 +321,10 @@ bool RsGxsIntegrityCheck::check()
|
|||
if (nxsMsgIt == nxsMsgV.end())
|
||||
{
|
||||
msgsToDel[grpId].insert(msgId);
|
||||
#ifdef RS_DEEP_SEARCH
|
||||
#ifdef RS_DEEP_CHANNEL_INDEX
|
||||
if(isGxsChannels)
|
||||
DeepSearch::removeChannelPostFromIndex(grpId, msgId);
|
||||
#endif
|
||||
DeepChannelsIndex::removeChannelPostFromIndex(grpId, msgId);
|
||||
#endif // def RS_DEEP_CHANNEL_INDEX
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -348,14 +349,15 @@ bool RsGxsIntegrityCheck::check()
|
|||
<< " with wrong hash or null meta data. meta="
|
||||
<< (void*)msg->metaData << std::endl;
|
||||
msgsToDel[msg->grpId].insert(msg->msgId);
|
||||
#ifdef RS_DEEP_SEARCH
|
||||
#ifdef RS_DEEP_CHANNEL_INDEX
|
||||
if(isGxsChannels)
|
||||
DeepSearch::removeChannelPostFromIndex(msg->grpId, msg->msgId);
|
||||
#endif
|
||||
DeepChannelsIndex::removeChannelPostFromIndex(
|
||||
msg->grpId, msg->msgId );
|
||||
#endif // def RS_DEEP_CHANNEL_INDEX
|
||||
}
|
||||
else if (subscribed_groups.count(msg->metaData->mGroupId))
|
||||
{
|
||||
#ifdef RS_DEEP_SEARCH
|
||||
#ifdef RS_DEEP_CHANNEL_INDEX
|
||||
if( isGxsChannels
|
||||
&& indexedGroups.count(msg->metaData->mGroupId) )
|
||||
{
|
||||
|
@ -373,7 +375,7 @@ bool RsGxsIntegrityCheck::check()
|
|||
cgIt->toChannelPost(cg, false);
|
||||
cg.mMeta = meta;
|
||||
|
||||
DeepSearch::indexChannelPost(cg);
|
||||
DeepChannelsIndex::indexChannelPost(cg);
|
||||
}
|
||||
else if(dynamic_cast<RsGxsCommentItem*>(rIt)) {}
|
||||
else if(dynamic_cast<RsGxsVoteItem*>(rIt)) {}
|
||||
|
@ -391,7 +393,7 @@ bool RsGxsIntegrityCheck::check()
|
|||
|
||||
delete rIt;
|
||||
}
|
||||
#endif
|
||||
#endif // def RS_DEEP_CHANNEL_INDEX
|
||||
|
||||
if(!msg->metaData->mAuthorId.isNull())
|
||||
{
|
||||
|
|
|
@ -213,7 +213,7 @@ private:
|
|||
|
||||
RsGeneralDataService* const mDs;
|
||||
RsGenExchange *mGenExchangeClient;
|
||||
#ifdef RS_DEEP_SEARCH
|
||||
#ifdef RS_DEEP_CHANNEL_INDEX
|
||||
RsSerialType& mSerializer;
|
||||
#endif
|
||||
bool mDone;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue