Add search capability to DeepSearch

This commit is contained in:
Gioacchino Mazzurco 2018-06-10 19:04:11 +02:00
parent c15ae864b5
commit 0f63283f96
No known key found for this signature in database
GPG Key ID: A1FBCA3872E87051
3 changed files with 83 additions and 19 deletions

View File

@ -17,6 +17,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#include <vector>
#include <xapian.h> #include <xapian.h>
#include "retroshare/rsgxschannels.h" #include "retroshare/rsgxschannels.h"
@ -25,7 +26,55 @@ struct DeepSearch
{ {
//DeepSearch(const std::string& dbPath) : mDbPath(dbPath) {} //DeepSearch(const std::string& dbPath) : mDbPath(dbPath) {}
static void search(/*query*/) { /*return all matching results*/ } struct SearchResult
{
// TODO: Use RsUrl from extra_locators branch instead of plain string
std::string mUrl;
std::string mSnippet;
};
/**
* @return search results count
*/
static uint32_t search( const std::string& queryStr,
std::vector<SearchResult>& results,
uint32_t maxResults = 100 )
{
results.clear();
// Open the database we're going to search.
Xapian::Database db(mDbPath);
// Set up a QueryParser with a stemmer and suitable prefixes.
Xapian::QueryParser queryparser;
//queryparser.set_stemmer(Xapian::Stem("en"));
queryparser.set_stemming_strategy(queryparser.STEM_SOME);
// Start of prefix configuration.
//queryparser.add_prefix("title", "S");
//queryparser.add_prefix("description", "XD");
// End of prefix configuration.
// And parse the query.
Xapian::Query query = queryparser.parse_query(queryStr);
// Use an Enquire object on the database to run the query.
Xapian::Enquire enquire(db);
enquire.set_query(query);
Xapian::MSet mset = enquire.get_mset(0, maxResults);
for ( Xapian::MSetIterator m = mset.begin(); m != mset.end(); ++m )
{
const Xapian::Document& doc = m.get_document();
SearchResult s;
s.mUrl = doc.get_value(URL_VALUENO);
s.mSnippet = mset.snippet(doc.get_data());
results.push_back(s);
}
return results.size();
}
static void indexChannelGroup(const RsGxsChannelGroup& chan) static void indexChannelGroup(const RsGxsChannelGroup& chan)
@ -49,18 +98,26 @@ struct DeepSearch
termgenerator.increase_termpos(); termgenerator.increase_termpos();
termgenerator.index_text(chan.mDescription); termgenerator.index_text(chan.mDescription);
std::string rsLink("retroshare://channel?id=");
rsLink += chan.mMeta.mGroupId.toStdString();
// store the RS link so we are able to retrive it on matching search
doc.add_value(URL_VALUENO, rsLink);
// Store some fields for display purposes.
doc.set_data(chan.mMeta.mGroupName + "\n" + chan.mDescription);
// We use the identifier to ensure each object ends up in the // We use the identifier to ensure each object ends up in the
// database only once no matter how many times we run the // database only once no matter how many times we run the
// indexer. // indexer. "Q" prefix is a Xapian convention for unique id term.
std::string idTerm("Qretroshare://channel?id="); const std::string idTerm("Q" + rsLink);
idTerm += chan.mMeta.mGroupId.toStdString();
doc.add_boolean_term(idTerm); doc.add_boolean_term(idTerm);
db.replace_document(idTerm, doc); db.replace_document(idTerm, doc);
} }
static void removeChannelFromIndex(RsGxsGroupId grpId) static void removeChannelFromIndex(RsGxsGroupId grpId)
{ {
// "Q" prefix is a Xapian convention for unique id term.
std::string idTerm("Qretroshare://channel?id="); std::string idTerm("Qretroshare://channel?id=");
idTerm += grpId.toStdString(); idTerm += grpId.toStdString();
@ -100,6 +157,17 @@ struct DeepSearch
db.replace_document(idTerm, doc); db.replace_document(idTerm, doc);
} }
private:
enum : Xapian::valueno
{
/// Used to store retroshare url of indexed documents
URL_VALUENO,
/// @see Xapian::BAD_VALUENO
BAD_VALUENO = Xapian::BAD_VALUENO
};
static std::string mDbPath; static std::string mDbPath;
}; };

View File

@ -4,6 +4,7 @@
* RetroShare C++ Interface. Generic routines that are useful in GXS * RetroShare C++ Interface. Generic routines that are useful in GXS
* *
* Copyright 2013-2013 by Christopher Evi-Parker * Copyright 2013-2013 by Christopher Evi-Parker
* Copyright (C) 2018 Gioacchino Mazzurco <gio@eigenlab.org>
* *
* This library is free software; you can redistribute it and/or * This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public * modify it under the terms of the GNU Library General Public
@ -165,8 +166,6 @@ bool RsGxsIntegrityCheck::check()
{ {
#ifdef RS_DEEP_SEARCH #ifdef RS_DEEP_SEARCH
bool isGxsChannels = dynamic_cast<p3GxsChannels*>(mGenExchangeClient); bool isGxsChannels = dynamic_cast<p3GxsChannels*>(mGenExchangeClient);
std::cout << __PRETTY_FUNCTION__ << " isGxsChannels: " << isGxsChannels
<< std::endl;
#endif #endif
// first take out all the groups // first take out all the groups
@ -220,21 +219,17 @@ bool RsGxsIntegrityCheck::check()
cg.mMeta = meta; cg.mMeta = meta;
DeepSearch::indexChannelGroup(cg); DeepSearch::indexChannelGroup(cg);
std::cout << __PRETTY_FUNCTION__ << " ||Channel: "
<< meta.mGroupName << " ||Description: "
<< cg.mDescription << std::endl;
} }
else else
std::cout << __PRETTY_FUNCTION__ << " ||Group: " {
std::cerr << __PRETTY_FUNCTION__ << " Group: "
<< meta.mGroupId.toStdString() << " "
<< meta.mGroupName << meta.mGroupName
<< " ||doesn't seems a channel" << " doesn't seems a channel, please "
<< " ||grp->grp.bin_data: " << "report to developers"
<< grp->grp.bin_data << std::endl;
<< " ||grp->grp.bin_len: " print_stacktrace();
<< grp->grp.bin_len }
<< " ||rIt: " << rIt << " ||blz: " << blz
<< " ||cgIt: " << cgIt << std::endl;
delete rIt; delete rIt;
} }

View File

@ -4,6 +4,7 @@
* RetroShare C++ Interface. Generic routines that are useful in GXS * RetroShare C++ Interface. Generic routines that are useful in GXS
* *
* Copyright 2013-2013 by Christopher Evi-Parker * Copyright 2013-2013 by Christopher Evi-Parker
* Copyright (C) 2018 Gioacchino Mazzurco <gio@eigenlab.org>
* *
* This library is free software; you can redistribute it and/or * This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public * modify it under the terms of the GNU Library General Public