Add search capability to DeepSearch

This commit is contained in:
Gioacchino Mazzurco 2018-06-10 19:04:11 +02:00
parent c15ae864b5
commit 0f63283f96
No known key found for this signature in database
GPG Key ID: A1FBCA3872E87051
3 changed files with 83 additions and 19 deletions

View File

@ -17,6 +17,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <vector>
#include <xapian.h>
#include "retroshare/rsgxschannels.h"
@ -25,7 +26,55 @@ struct DeepSearch
{
//DeepSearch(const std::string& dbPath) : mDbPath(dbPath) {}
static void search(/*query*/) { /*return all matching results*/ }
struct SearchResult
{
// TODO: Use RsUrl from extra_locators branch instead of plain string
std::string mUrl;
std::string mSnippet;
};
/**
* @return search results count
*/
static uint32_t search( const std::string& queryStr,
std::vector<SearchResult>& results,
uint32_t maxResults = 100 )
{
results.clear();
// Open the database we're going to search.
Xapian::Database db(mDbPath);
// Set up a QueryParser with a stemmer and suitable prefixes.
Xapian::QueryParser queryparser;
//queryparser.set_stemmer(Xapian::Stem("en"));
queryparser.set_stemming_strategy(queryparser.STEM_SOME);
// Start of prefix configuration.
//queryparser.add_prefix("title", "S");
//queryparser.add_prefix("description", "XD");
// End of prefix configuration.
// And parse the query.
Xapian::Query query = queryparser.parse_query(queryStr);
// Use an Enquire object on the database to run the query.
Xapian::Enquire enquire(db);
enquire.set_query(query);
Xapian::MSet mset = enquire.get_mset(0, maxResults);
for ( Xapian::MSetIterator m = mset.begin(); m != mset.end(); ++m )
{
const Xapian::Document& doc = m.get_document();
SearchResult s;
s.mUrl = doc.get_value(URL_VALUENO);
s.mSnippet = mset.snippet(doc.get_data());
results.push_back(s);
}
return results.size();
}
static void indexChannelGroup(const RsGxsChannelGroup& chan)
@ -49,18 +98,26 @@ struct DeepSearch
termgenerator.increase_termpos();
termgenerator.index_text(chan.mDescription);
std::string rsLink("retroshare://channel?id=");
rsLink += chan.mMeta.mGroupId.toStdString();
// store the RS link so we are able to retrive it on matching search
doc.add_value(URL_VALUENO, rsLink);
// Store some fields for display purposes.
doc.set_data(chan.mMeta.mGroupName + "\n" + chan.mDescription);
// We use the identifier to ensure each object ends up in the
// database only once no matter how many times we run the
// indexer.
std::string idTerm("Qretroshare://channel?id=");
idTerm += chan.mMeta.mGroupId.toStdString();
// indexer. "Q" prefix is a Xapian convention for unique id term.
const std::string idTerm("Q" + rsLink);
doc.add_boolean_term(idTerm);
db.replace_document(idTerm, doc);
}
static void removeChannelFromIndex(RsGxsGroupId grpId)
{
// "Q" prefix is a Xapian convention for unique id term.
std::string idTerm("Qretroshare://channel?id=");
idTerm += grpId.toStdString();
@ -100,6 +157,17 @@ struct DeepSearch
db.replace_document(idTerm, doc);
}
private:
enum : Xapian::valueno
{
/// Used to store retroshare url of indexed documents
URL_VALUENO,
/// @see Xapian::BAD_VALUENO
BAD_VALUENO = Xapian::BAD_VALUENO
};
static std::string mDbPath;
};

View File

@ -4,6 +4,7 @@
* RetroShare C++ Interface. Generic routines that are useful in GXS
*
* Copyright 2013-2013 by Christopher Evi-Parker
* Copyright (C) 2018 Gioacchino Mazzurco <gio@eigenlab.org>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
@ -165,8 +166,6 @@ bool RsGxsIntegrityCheck::check()
{
#ifdef RS_DEEP_SEARCH
bool isGxsChannels = dynamic_cast<p3GxsChannels*>(mGenExchangeClient);
std::cout << __PRETTY_FUNCTION__ << " isGxsChannels: " << isGxsChannels
<< std::endl;
#endif
// first take out all the groups
@ -220,21 +219,17 @@ bool RsGxsIntegrityCheck::check()
cg.mMeta = meta;
DeepSearch::indexChannelGroup(cg);
std::cout << __PRETTY_FUNCTION__ << " ||Channel: "
<< meta.mGroupName << " ||Description: "
<< cg.mDescription << std::endl;
}
else
std::cout << __PRETTY_FUNCTION__ << " ||Group: "
{
std::cerr << __PRETTY_FUNCTION__ << " Group: "
<< meta.mGroupId.toStdString() << " "
<< meta.mGroupName
<< " ||doesn't seems a channel"
<< " ||grp->grp.bin_data: "
<< grp->grp.bin_data
<< " ||grp->grp.bin_len: "
<< grp->grp.bin_len
<< " ||rIt: " << rIt << " ||blz: " << blz
<< " ||cgIt: " << cgIt << std::endl;
<< " doesn't seems a channel, please "
<< "report to developers"
<< std::endl;
print_stacktrace();
}
delete rIt;
}

View File

@ -4,6 +4,7 @@
* RetroShare C++ Interface. Generic routines that are useful in GXS
*
* Copyright 2013-2013 by Christopher Evi-Parker
* Copyright (C) 2018 Gioacchino Mazzurco <gio@eigenlab.org>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public