First working prototype of GXS service search

Channels are now able to take advantage of the new deep search
  goodies

Rebase on top of master 2021/10/19
This commit is contained in:
Gioacchino Mazzurco 2021-02-08 17:04:04 +01:00
parent ab349a8157
commit 1b551d809f
No known key found for this signature in database
GPG key ID: A1FBCA3872E87051
30 changed files with 1455 additions and 874 deletions

View file

@ -1,8 +1,8 @@
/*******************************************************************************
* RetroShare full text indexing and search implementation based on Xapian *
* *
* Copyright (C) 2018-2019 Gioacchino Mazzurco <gio@eigenlab.org> *
* Copyright (C) 2019 Asociación Civil Altermundi <info@altermundi.net> *
* Copyright (C) 2018-2021 Gioacchino Mazzurco <gio@eigenlab.org> *
* Copyright (C) 2019-2021 Asociación Civil Altermundi <info@altermundi.net> *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Affero General Public License version 3 as *
@ -20,16 +20,23 @@
#include "deep_search/channelsindex.hpp"
#include "deep_search/commonutils.hpp"
#include "retroshare/rsinit.h"
#include "util/rsdebuglevel3.h"
uint32_t DeepChannelsIndex::search(
/*static*/ std::string DeepChannelsIndex::dbDefaultPath()
{ return RsAccounts::AccountDirectory() + "/deep_channels_xapian_db"; }
std::error_condition DeepChannelsIndex::search(
const std::string& queryStr,
std::vector<DeepChannelsSearchResult>& results, uint32_t maxResults )
{
RS_DBG3(queryStr);
results.clear();
std::unique_ptr<Xapian::Database> dbPtr(
DeepSearch::openReadOnlyDatabase(dbPath()) );
if(!dbPtr) return 0;
DeepSearch::openReadOnlyDatabase(mDbPath) );
if(!dbPtr) return std::errc::bad_file_descriptor;
Xapian::Database& db(*dbPtr);
@ -63,17 +70,13 @@ uint32_t DeepChannelsIndex::search(
results.push_back(s);
}
return static_cast<uint32_t>(results.size());
return std::error_condition();
}
void DeepChannelsIndex::indexChannelGroup(const RsGxsChannelGroup& chan)
std::error_condition DeepChannelsIndex::indexChannelGroup(
const RsGxsChannelGroup& chan )
{
std::unique_ptr<Xapian::WritableDatabase> dbPtr(
DeepSearch::openWritableDatabase(
dbPath(), Xapian::DB_CREATE_OR_OPEN ) );
if(!dbPtr) return;
Xapian::WritableDatabase& db(*dbPtr);
RS_DBG4(chan);
// Set up a TermGenerator that we'll use in indexing.
Xapian::TermGenerator termgenerator;
@ -94,21 +97,8 @@ void DeepChannelsIndex::indexChannelGroup(const RsGxsChannelGroup& chan)
termgenerator.increase_termpos();
termgenerator.index_text(chan.mDescription);
RsUrl chanUrl; chanUrl
.setScheme("retroshare").setPath("/channel")
.setQueryKV("id", chan.mMeta.mGroupId.toStdString());
const std::string idTerm("Q" + chanUrl.toString());
chanUrl.setQueryKV("publishTs", std::to_string(chan.mMeta.mPublishTs));
chanUrl.setQueryKV("name", chan.mMeta.mGroupName);
if(!chan.mMeta.mAuthorId.isNull())
chanUrl.setQueryKV("authorId", chan.mMeta.mAuthorId.toStdString());
if(chan.mMeta.mSignFlags)
chanUrl.setQueryKV( "signFlags",
std::to_string(chan.mMeta.mSignFlags) );
std::string rsLink(chanUrl.toString());
// store the RS link so we are able to retrive it on matching search
const std::string rsLink(channelIndexId(chan.mMeta.mGroupId));
doc.add_value(URL_VALUENO, rsLink);
// Store some fields for display purposes.
@ -117,35 +107,32 @@ void DeepChannelsIndex::indexChannelGroup(const RsGxsChannelGroup& chan)
// We use the identifier to ensure each object ends up in the
// database only once no matter how many times we run the
// indexer. "Q" prefix is a Xapian convention for unique id term.
const std::string idTerm("Q" + rsLink);
doc.add_boolean_term(idTerm);
db.replace_document(idTerm, doc);
mWriteQueue.push( [idTerm, doc](Xapian::WritableDatabase& db)
{ db.replace_document(idTerm, doc); } );
return std::error_condition();
}
void DeepChannelsIndex::removeChannelFromIndex(RsGxsGroupId grpId)
std::error_condition DeepChannelsIndex::removeChannelFromIndex(
const RsGxsGroupId& grpId )
{
RS_DBG3(grpId);
// "Q" prefix is a Xapian convention for unique id term.
RsUrl chanUrl; chanUrl
.setScheme("retroshare").setPath("/channel")
.setQueryKV("id", grpId.toStdString());
std::string idTerm("Q" + chanUrl.toString());
const std::string idTerm("Q" + channelIndexId(grpId));
mWriteQueue.push( [idTerm](Xapian::WritableDatabase& db)
{ db.delete_document(idTerm); } );
std::unique_ptr<Xapian::WritableDatabase> dbPtr(
DeepSearch::openWritableDatabase(
dbPath(), Xapian::DB_CREATE_OR_OPEN ) );
if(!dbPtr) return;
Xapian::WritableDatabase& db(*dbPtr);
db.delete_document(idTerm);
return std::error_condition();
}
void DeepChannelsIndex::indexChannelPost(const RsGxsChannelPost& post)
std::error_condition DeepChannelsIndex::indexChannelPost(
const RsGxsChannelPost& post )
{
std::unique_ptr<Xapian::WritableDatabase> dbPtr(
DeepSearch::openWritableDatabase(
dbPath(), Xapian::DB_CREATE_OR_OPEN ) );
if(!dbPtr) return;
Xapian::WritableDatabase& db(*dbPtr);
RS_DBG4(post);
// Set up a TermGenerator that we'll use in indexing.
Xapian::TermGenerator termgenerator;
@ -160,21 +147,16 @@ void DeepChannelsIndex::indexChannelPost(const RsGxsChannelPost& post)
termgenerator.index_text(
DeepSearch::timetToXapianDate(post.mMeta.mPublishTs), 1, "D" );
// TODO: we should strip out HTML tags instead of skipping indexing
// Avoid indexing HTML
bool isPlainMsg =
post.mMsg[0] != '<' || post.mMsg[post.mMsg.size() - 1] != '>';
if(isPlainMsg)
termgenerator.index_text(post.mMsg, 1, "XD");
// Avoid indexing RetroShare-gui HTML tags
const std::string cleanMsg = DeepSearch::simpleTextHtmlExtract(post.mMsg);
termgenerator.index_text(
DeepSearch::simpleTextHtmlExtract(post.mMsg), 1, "XD" );
// Index fields without prefixes for general search.
termgenerator.index_text(post.mMeta.mMsgName);
if(isPlainMsg)
{
termgenerator.increase_termpos();
termgenerator.index_text(post.mMsg);
}
termgenerator.increase_termpos();
termgenerator.index_text(cleanMsg);
for(const RsGxsFile& attachment : post.mFiles)
{
@ -184,47 +166,50 @@ void DeepChannelsIndex::indexChannelPost(const RsGxsChannelPost& post)
termgenerator.index_text(attachment.mName);
}
// We use the identifier to ensure each object ends up in the
// database only once no matter how many times we run the
// indexer.
RsUrl postUrl; postUrl
.setScheme("retroshare").setPath("/channel")
.setQueryKV("id", post.mMeta.mGroupId.toStdString())
.setQueryKV("msgid", post.mMeta.mMsgId.toStdString());
std::string idTerm("Q" + postUrl.toString());
postUrl.setQueryKV("publishTs", std::to_string(post.mMeta.mPublishTs));
postUrl.setQueryKV("name", post.mMeta.mMsgName);
postUrl.setQueryKV("authorId", post.mMeta.mAuthorId.toStdString());
std::string rsLink(postUrl.toString());
// store the RS link so we are able to retrive it on matching search
const std::string rsLink(postIndexId(post.mMeta.mGroupId, post.mMeta.mMsgId));
doc.add_value(URL_VALUENO, rsLink);
// Store some fields for display purposes.
if(isPlainMsg)
doc.set_data(post.mMeta.mMsgName + "\n" + post.mMsg);
else doc.set_data(post.mMeta.mMsgName);
doc.set_data(post.mMeta.mMsgName + "\n" + cleanMsg);
// We use the identifier to ensure each object ends up in the
// database only once no matter how many times we run the
// indexer.
const std::string idTerm("Q" + rsLink);
doc.add_boolean_term(idTerm);
db.replace_document(idTerm, doc);
mWriteQueue.push( [idTerm, doc](Xapian::WritableDatabase& db)
{ db.replace_document(idTerm, doc); } );
return std::error_condition();
}
void DeepChannelsIndex::removeChannelPostFromIndex(
std::error_condition DeepChannelsIndex::removeChannelPostFromIndex(
const RsGxsGroupId& grpId, const RsGxsMessageId& msgId )
{
RS_DBG3(grpId, msgId);
std::string idTerm("Q" + postIndexId(grpId, msgId));
mWriteQueue.push( [idTerm](Xapian::WritableDatabase& db)
{ db.delete_document(idTerm); } );
return std::error_condition();
}
/*static*/ std::string DeepChannelsIndex::channelIndexId(RsGxsGroupId grpId)
{
RsUrl channelIndexId(RsGxsChannels::DEFAULT_CHANNEL_BASE_URL);
channelIndexId.setQueryKV(
RsGxsChannels::CHANNEL_URL_ID_FIELD, grpId.toStdString() );
return channelIndexId.toString();
}
/*static*/ std::string DeepChannelsIndex::postIndexId(
RsGxsGroupId grpId, RsGxsMessageId msgId )
{
RsUrl postUrl; postUrl
.setScheme("retroshare").setPath("/channel")
.setQueryKV("id", grpId.toStdString())
.setQueryKV("msgid", msgId.toStdString());
// "Q" prefix is a Xapian convention for unique id term.
std::string idTerm("Q" + postUrl.toString());
std::unique_ptr<Xapian::WritableDatabase> dbPtr(
DeepSearch::openWritableDatabase(
dbPath(), Xapian::DB_CREATE_OR_OPEN ) );
if(!dbPtr) return;
Xapian::WritableDatabase& db(*dbPtr);
db.delete_document(idTerm);
RsUrl postIndexId(RsGxsChannels::DEFAULT_CHANNEL_BASE_URL);
postIndexId.setQueryKV(RsGxsChannels::CHANNEL_URL_ID_FIELD, grpId.toStdString());
postIndexId.setQueryKV(RsGxsChannels::CHANNEL_URL_MSG_ID_FIELD, msgId.toStdString());
return postIndexId.toString();
}

View file

@ -1,8 +1,8 @@
/*******************************************************************************
* RetroShare full text indexing and search implementation based on Xapian *
* *
* Copyright (C) 2018-2019 Gioacchino Mazzurco <gio@eigenlab.org> *
* Copyright (C) 2019 Asociación Civil Altermundi <info@altermundi.net> *
* Copyright (C) 2018-2021 Gioacchino Mazzurco <gio@eigenlab.org> *
* Copyright (C) 2019-2021 Asociación Civil Altermundi <info@altermundi.net> *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Affero General Public License version 3 as *
@ -24,8 +24,8 @@
#include "util/rstime.h"
#include "retroshare/rsgxschannels.h"
#include "retroshare/rsinit.h"
#include "util/rsurl.h"
#include "deep_search/commonutils.hpp"
struct DeepChannelsSearchResult
{
@ -36,28 +36,34 @@ struct DeepChannelsSearchResult
struct DeepChannelsIndex
{
explicit DeepChannelsIndex(const std::string& dbPath) :
mDbPath(dbPath), mWriteQueue(dbPath) {}
/**
* @brief Search indexed GXS groups and messages
* @param[in] maxResults maximum number of acceptable search results, 0 for
* no limits
* @return search results count
*/
static uint32_t search( const std::string& queryStr,
std::vector<DeepChannelsSearchResult>& results,
uint32_t maxResults = 100 );
std::error_condition search(
const std::string& queryStr,
std::vector<DeepChannelsSearchResult>& results,
uint32_t maxResults = 100 );
static void indexChannelGroup(const RsGxsChannelGroup& chan);
std::error_condition indexChannelGroup(const RsGxsChannelGroup& chan);
static void removeChannelFromIndex(RsGxsGroupId grpId);
std::error_condition removeChannelFromIndex(const RsGxsGroupId& grpId);
static void indexChannelPost(const RsGxsChannelPost& post);
std::error_condition indexChannelPost(const RsGxsChannelPost& post);
static void removeChannelPostFromIndex(
RsGxsGroupId grpId, RsGxsMessageId msgId );
std::error_condition removeChannelPostFromIndex(
const RsGxsGroupId& grpId, const RsGxsMessageId& msgId );
static uint32_t indexFile(const std::string& path);
static std::string dbDefaultPath();
private:
static std::string channelIndexId(RsGxsGroupId grpId);
static std::string postIndexId(RsGxsGroupId grpId, RsGxsMessageId msgId);
enum : Xapian::valueno
{
@ -68,10 +74,7 @@ private:
BAD_VALUENO = Xapian::BAD_VALUENO
};
static const std::string& dbPath()
{
static const std::string dbDir =
RsAccounts::AccountDirectory() + "/deep_channels_xapian_db";
return dbDir;
}
const std::string mDbPath;
DeepSearch::StubbornWriteOpQueue mWriteQueue;
};

View file

@ -1,8 +1,8 @@
/*******************************************************************************
* RetroShare full text indexing and search implementation based on Xapian *
* *
* Copyright (C) 2018-2019 Gioacchino Mazzurco <gio@eigenlab.org> *
* Copyright (C) 2019 Asociación Civil Altermundi <info@altermundi.net> *
* Copyright (C) 2018-2021 Gioacchino Mazzurco <gio@eigenlab.org> *
* Copyright (C) 2019-2021 Asociación Civil Altermundi <info@altermundi.net> *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Affero General Public License version 3 as *
@ -18,39 +18,17 @@
* *
*******************************************************************************/
#include <algorithm>
#include <thread>
#include "deep_search/commonutils.hpp"
#include "util/stacktrace.h"
#include "util/rsdebug.h"
#include "util/rsthreads.h"
#include "util/rsdebuglevel0.h"
namespace DeepSearch
{
std::unique_ptr<Xapian::WritableDatabase> openWritableDatabase(
const std::string& path, int flags, int blockSize )
{
try
{
std::unique_ptr<Xapian::WritableDatabase> dbPtr(
new Xapian::WritableDatabase(path, flags, blockSize) );
return dbPtr;
}
catch(Xapian::DatabaseLockError)
{
RsErr() << __PRETTY_FUNCTION__ << " Failed aquiring Xapian DB lock "
<< path << std::endl;
print_stacktrace();
}
catch(...)
{
RsErr() << __PRETTY_FUNCTION__ << " Xapian DB is apparently corrupted "
<< "deleting it might help without causing any harm: "
<< path << std::endl;
print_stacktrace();
}
return nullptr;
}
std::unique_ptr<Xapian::Database> openReadOnlyDatabase(
const std::string& path, int flags )
{
@ -60,12 +38,12 @@ std::unique_ptr<Xapian::Database> openReadOnlyDatabase(
new Xapian::Database(path, flags) );
return dbPtr;
}
catch(Xapian::DatabaseOpeningError e)
catch(Xapian::DatabaseOpeningError& e)
{
RsWarn() << __PRETTY_FUNCTION__ << " " << e.get_msg()
<< ", probably nothing has been indexed yet." << std::endl;
}
catch(Xapian::DatabaseLockError)
catch(Xapian::DatabaseLockError&)
{
RsErr() << __PRETTY_FUNCTION__ << " Failed aquiring Xapian DB lock "
<< path << std::endl;
@ -90,4 +68,116 @@ std::string timetToXapianDate(const rstime_t& time)
return date;
}
StubbornWriteOpQueue::~StubbornWriteOpQueue()
{
auto fErr = flush(0);
if(fErr)
{
RS_FATAL( "Flush failed on destruction ", mOpStore.size(),
" operations irreparably lost ", fErr );
print_stacktrace();
}
}
void StubbornWriteOpQueue::push(write_op op)
{
RS_DBG4("");
{
std::unique_lock<std::mutex> lock(mQueueMutex);
mOpStore.push(op);
}
flush();
}
std::error_condition StubbornWriteOpQueue::flush(
rstime_t acceptDelay, rstime_t callTS )
{
RS_DBG4("");
{
// Return without attempt to open the database if the queue is empty
std::unique_lock<std::mutex> lock(mQueueMutex);
if(mOpStore.empty()) return std::error_condition();
}
std::unique_ptr<Xapian::WritableDatabase> dbPtr;
try
{
dbPtr = std::make_unique<Xapian::WritableDatabase>(
mDbPath, Xapian::DB_CREATE_OR_OPEN );
}
catch(Xapian::DatabaseLockError)
{
if(acceptDelay)
{
rstime_t tNow = time(nullptr);
rstime_t maxRemaining = tNow - (callTS + acceptDelay);
if(maxRemaining > 0)
{
std::chrono::milliseconds interval(
std::max(50l, maxRemaining*1000/5) );
RS_DBG3( "Cannot acquire database write lock, retrying in:",
interval.count(), "ms" );
RsThread::async([this, acceptDelay, callTS, interval]()
{
std::this_thread::sleep_for(interval);
flush(acceptDelay, callTS);
});
return std::error_condition();
}
else
{
RS_ERR(std::errc::timed_out, acceptDelay, callTS, tNow);
return std::errc::timed_out;
}
}
else return std::errc::resource_unavailable_try_again;
}
catch(...)
{
RS_ERR("Xapian DB ", mDbPath, " is apparently corrupted");
print_stacktrace();
return std::errc::io_error;
}
std::unique_lock<std::mutex> lock(mQueueMutex);
while(!mOpStore.empty())
{
auto op = mOpStore.front(); mOpStore.pop();
op(*dbPtr);
}
return std::error_condition();
}
std::string simpleTextHtmlExtract(const std::string& rsHtmlDoc)
{
if(rsHtmlDoc.empty()) return rsHtmlDoc;
const bool isPlainMsg =
rsHtmlDoc[0] != '<' || rsHtmlDoc[rsHtmlDoc.size() - 1] != '>';
if(isPlainMsg) return rsHtmlDoc;
auto oSize = rsHtmlDoc.size();
auto bodyTagBegin(rsHtmlDoc.find("<body"));
if(bodyTagBegin >= oSize) return rsHtmlDoc;
auto bodyTagEnd(rsHtmlDoc.find(">", bodyTagBegin));
if(bodyTagEnd >= oSize) return rsHtmlDoc;
std::string retVal(rsHtmlDoc.substr(bodyTagEnd+1));
std::string::size_type oPos;
std::string::size_type cPos;
while((oPos = retVal.find("<")) < retVal.size())
{
if((cPos = retVal.find(">")) <= retVal.size())
retVal.erase(oPos, 1+cPos-oPos);
else break;
}
return retVal;
}
}

View file

@ -1,8 +1,8 @@
/*******************************************************************************
* RetroShare full text indexing and search implementation based on Xapian *
* *
* Copyright (C) 2018-2019 Gioacchino Mazzurco <gio@eigenlab.org> *
* Copyright (C) 2019 Asociación Civil Altermundi <info@altermundi.net> *
* Copyright (C) 2018-2021 Gioacchino Mazzurco <gio@eigenlab.org> *
* Copyright (C) 2019-2021 Asociación Civil Altermundi <info@altermundi.net> *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Affero General Public License version 3 as *
@ -21,6 +21,9 @@
#include <xapian.h>
#include <memory>
#include <functional>
#include <queue>
#include <mutex>
#include "util/rstime.h"
@ -33,13 +36,34 @@
namespace DeepSearch
{
std::unique_ptr<Xapian::WritableDatabase> openWritableDatabase(
const std::string& path, int flags = 0, int blockSize = 0 );
typedef std::function<void(Xapian::WritableDatabase&)> write_op;
std::unique_ptr<Xapian::Database> openReadOnlyDatabase(
const std::string& path, int flags = 0 );
std::string timetToXapianDate(const rstime_t& time);
std::string simpleTextHtmlExtract(const std::string& rsHtmlDoc);
struct StubbornWriteOpQueue
{
explicit StubbornWriteOpQueue(const std::string& dbPath):
mDbPath(dbPath) {}
~StubbornWriteOpQueue();
void push(write_op op);
std::error_condition flush(
rstime_t acceptDelay = 20, rstime_t callTS = time(nullptr) );
private:
std::queue<write_op> mOpStore;
rstime_t mLastFlush;
std::mutex mQueueMutex;
const std::string mDbPath;
};
}

View file

@ -1,8 +1,8 @@
/*******************************************************************************
* RetroShare full text indexing and search implementation based on Xapian *
* *
* Copyright (C) 2018-2019 Gioacchino Mazzurco <gio@eigenlab.org> *
* Copyright (C) 2019 Asociación Civil Altermundi <info@altermundi.net> *
* Copyright (C) 2018-2021 Gioacchino Mazzurco <gio@eigenlab.org> *
* Copyright (C) 2019-2021 Asociación Civil Altermundi <info@altermundi.net> *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Affero General Public License version 3 as *
@ -18,47 +18,47 @@
* *
*******************************************************************************/
#include "deep_search/filesindex.hpp"
#include "deep_search/commonutils.hpp"
#include "util/rsdebug.h"
#include "retroshare/rsinit.h"
#include "retroshare/rsversion.h"
#include <utility>
#include "deep_search/filesindex.hpp"
#include "deep_search/commonutils.hpp"
#include "util/rsdebuglevel1.h"
#include "retroshare/rsinit.h"
#include "retroshare/rsversion.h"
/*static*/ std::multimap<int, DeepFilesIndex::IndexerFunType>
DeepFilesIndex::indexersRegister = {};
bool DeepFilesIndex::indexFile(
std::error_condition DeepFilesIndex::indexFile(
const std::string& path, const std::string& name,
const RsFileHash& hash )
{
auto dbPtr = DeepSearch::openWritableDatabase(
mDbPath, Xapian::DB_CREATE_OR_OPEN );
if(!dbPtr) return false;
Xapian::WritableDatabase& db(*dbPtr);
const std::string hashString = hash.toStdString();
const std::string idTerm("Q" + hashString);
Xapian::Document oldDoc;
Xapian::PostingIterator pIt = db.postlist_begin(idTerm);
if( pIt != db.postlist_end(idTerm) )
auto db = DeepSearch::openReadOnlyDatabase(mDbPath);
if(db)
{
oldDoc = db.get_document(*pIt);
if( oldDoc.get_value(INDEXER_VERSION_VALUENO) ==
RS_HUMAN_READABLE_VERSION &&
std::stoull(oldDoc.get_value(INDEXERS_COUNT_VALUENO)) ==
indexersRegister.size() )
Xapian::Document oldDoc;
Xapian::PostingIterator pIt = db->postlist_begin(idTerm);
if( pIt != db->postlist_end(idTerm) )
{
/* Looks like this file has already been indexed by this RetroShare
* exact version, so we can skip it. If the version was different it
* made sense to reindex it as better indexers might be available
* since last time it was indexed */
Dbg3() << __PRETTY_FUNCTION__ << " skipping laready indexed file: "
<< hash << " " << name << std::endl;
return true;
oldDoc = db->get_document(*pIt);
if( oldDoc.get_value(INDEXER_VERSION_VALUENO) ==
RS_HUMAN_READABLE_VERSION &&
std::stoull(oldDoc.get_value(INDEXERS_COUNT_VALUENO)) ==
indexersRegister.size() )
{
/* Looks like this file has already been indexed by this
* RetroShare exact version, so we can skip it. If the version
* was different it made sense to reindex it as better indexers
* might be available since last time it was indexed */
RS_DBG3("skipping laready indexed file: ", hash, " ", name);
return std::error_condition();
}
}
db.reset(); // Release DB read lock ASAP
}
Xapian::Document doc;
@ -80,22 +80,21 @@ bool DeepFilesIndex::indexFile(
doc.add_value(
INDEXERS_COUNT_VALUENO,
std::to_string(indexersRegister.size()) );
db.replace_document(idTerm, doc);
return true;
mWriteQueue.push([idTerm, doc](Xapian::WritableDatabase& db)
{ db.replace_document(idTerm, doc); });
return std::error_condition();
}
bool DeepFilesIndex::removeFileFromIndex(const RsFileHash& hash)
std::error_condition DeepFilesIndex::removeFileFromIndex(const RsFileHash& hash)
{
Dbg3() << __PRETTY_FUNCTION__ << " removing file from index: "
<< hash << std::endl;
RS_DBG3(hash);
std::unique_ptr<Xapian::WritableDatabase> db =
DeepSearch::openWritableDatabase(mDbPath, Xapian::DB_CREATE_OR_OPEN);
if(!db) return false;
mWriteQueue.push([hash](Xapian::WritableDatabase& db)
{ db.delete_document("Q" + hash.toStdString()); });
db->delete_document("Q" + hash.toStdString());
return true;
return std::error_condition();
}
/*static*/ std::string DeepFilesIndex::dbDefaultPath()
@ -104,20 +103,20 @@ bool DeepFilesIndex::removeFileFromIndex(const RsFileHash& hash)
/*static*/ bool DeepFilesIndex::registerIndexer(
int order, const DeepFilesIndex::IndexerFunType& indexerFun )
{
Dbg1() << __PRETTY_FUNCTION__ << " " << order << std::endl;
RS_DBG1(order);
indexersRegister.insert(std::make_pair(order, indexerFun));
return true;
}
uint32_t DeepFilesIndex::search(
std::error_condition DeepFilesIndex::search(
const std::string& queryStr,
std::vector<DeepFilesSearchResult>& results, uint32_t maxResults )
{
results.clear();
auto dbPtr = DeepSearch::openReadOnlyDatabase(mDbPath);
if(!dbPtr) return 0;
if(!dbPtr) return std::errc::bad_file_descriptor;
Xapian::Database& db(*dbPtr);
// Set up a QueryParser with a stemmer and suitable prefixes.
@ -151,7 +150,7 @@ uint32_t DeepFilesIndex::search(
results.push_back(s);
}
return static_cast<uint32_t>(results.size());
return std::error_condition();
}

View file

@ -1,8 +1,8 @@
/*******************************************************************************
* RetroShare full text indexing and search implementation based on Xapian *
* *
* Copyright (C) 2018-2019 Gioacchino Mazzurco <gio@eigenlab.org> *
* Copyright (C) 2019 Asociación Civil Altermundi <info@altermundi.net> *
* Copyright (C) 2018-2021 Gioacchino Mazzurco <gio@eigenlab.org> *
* Copyright (C) 2019-2021 Asociación Civil Altermundi <info@altermundi.net> *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Affero General Public License version 3 as *
@ -19,9 +19,6 @@
*******************************************************************************/
#pragma once
#include "retroshare/rstypes.h"
#include "util/rsdebug.h"
#include <string>
#include <cstdint>
#include <vector>
@ -29,6 +26,9 @@
#include <map>
#include <functional>
#include "retroshare/rstypes.h"
#include "deep_search/commonutils.hpp"
struct DeepFilesSearchResult
{
DeepFilesSearchResult() : mWeight(0) {}
@ -41,7 +41,8 @@ struct DeepFilesSearchResult
class DeepFilesIndex
{
public:
DeepFilesIndex(const std::string& dbPath) : mDbPath(dbPath) {}
explicit DeepFilesIndex(const std::string& dbPath):
mDbPath(dbPath), mWriteQueue(dbPath) {}
/**
* @brief Search indexed files
@ -49,7 +50,7 @@ public:
* no limits
* @return search results count
*/
uint32_t search( const std::string& queryStr,
std::error_condition search( const std::string& queryStr,
std::vector<DeepFilesSearchResult>& results,
uint32_t maxResults = 100 );
@ -57,7 +58,7 @@ public:
* @return false if file could not be indexed because of error or
* unsupported type, true otherwise.
*/
bool indexFile(
std::error_condition indexFile(
const std::string& path, const std::string& name,
const RsFileHash& hash );
@ -65,7 +66,7 @@ public:
* @brief Remove file entry from database
* @return false on error, true otherwise.
*/
bool removeFileFromIndex(const RsFileHash& hash);
std::error_condition removeFileFromIndex(const RsFileHash& hash);
static std::string dbDefaultPath();
@ -96,8 +97,8 @@ private:
const std::string mDbPath;
DeepSearch::StubbornWriteOpQueue mWriteQueue;
/** Storage for indexers function by order */
static std::multimap<int, IndexerFunType> indexersRegister;
RS_SET_CONTEXT_DEBUG_LEVEL(1)
};