From 0f63283f96f644f475a87315a48583bd8c44267a Mon Sep 17 00:00:00 2001 From: Gioacchino Mazzurco Date: Sun, 10 Jun 2018 19:04:11 +0200 Subject: [PATCH] Add search capability to DeepSearch --- libretroshare/src/deep_search/deep_search.h | 78 +++++++++++++++++++-- libretroshare/src/gxs/rsgxsutil.cc | 23 +++--- libretroshare/src/gxs/rsgxsutil.h | 1 + 3 files changed, 83 insertions(+), 19 deletions(-) diff --git a/libretroshare/src/deep_search/deep_search.h b/libretroshare/src/deep_search/deep_search.h index 6af963c6d..ae5a97f2e 100644 --- a/libretroshare/src/deep_search/deep_search.h +++ b/libretroshare/src/deep_search/deep_search.h @@ -17,6 +17,7 @@ * along with this program. If not, see . */ +#include #include #include "retroshare/rsgxschannels.h" @@ -25,7 +26,55 @@ struct DeepSearch { //DeepSearch(const std::string& dbPath) : mDbPath(dbPath) {} - static void search(/*query*/) { /*return all matching results*/ } + struct SearchResult + { + // TODO: Use RsUrl from extra_locators branch instead of plain string + std::string mUrl; + std::string mSnippet; + }; + + /** + * @return search results count + */ + static uint32_t search( const std::string& queryStr, + std::vector& results, + uint32_t maxResults = 100 ) + { + results.clear(); + + // Open the database we're going to search. + Xapian::Database db(mDbPath); + + // Set up a QueryParser with a stemmer and suitable prefixes. + Xapian::QueryParser queryparser; + //queryparser.set_stemmer(Xapian::Stem("en")); + queryparser.set_stemming_strategy(queryparser.STEM_SOME); + // Start of prefix configuration. + //queryparser.add_prefix("title", "S"); + //queryparser.add_prefix("description", "XD"); + // End of prefix configuration. + + // And parse the query. + Xapian::Query query = queryparser.parse_query(queryStr); + + // Use an Enquire object on the database to run the query. + Xapian::Enquire enquire(db); + enquire.set_query(query); + + Xapian::MSet mset = enquire.get_mset(0, maxResults); + + for ( Xapian::MSetIterator m = mset.begin(); m != mset.end(); ++m ) + { + const Xapian::Document& doc = m.get_document(); + + SearchResult s; + s.mUrl = doc.get_value(URL_VALUENO); + s.mSnippet = mset.snippet(doc.get_data()); + results.push_back(s); + } + + return results.size(); + } static void indexChannelGroup(const RsGxsChannelGroup& chan) @@ -49,18 +98,26 @@ struct DeepSearch termgenerator.increase_termpos(); termgenerator.index_text(chan.mDescription); + std::string rsLink("retroshare://channel?id="); + rsLink += chan.mMeta.mGroupId.toStdString(); + + // store the RS link so we are able to retrive it on matching search + doc.add_value(URL_VALUENO, rsLink); + + // Store some fields for display purposes. + doc.set_data(chan.mMeta.mGroupName + "\n" + chan.mDescription); + // We use the identifier to ensure each object ends up in the // database only once no matter how many times we run the - // indexer. - std::string idTerm("Qretroshare://channel?id="); - idTerm += chan.mMeta.mGroupId.toStdString(); - + // indexer. "Q" prefix is a Xapian convention for unique id term. + const std::string idTerm("Q" + rsLink); doc.add_boolean_term(idTerm); db.replace_document(idTerm, doc); } static void removeChannelFromIndex(RsGxsGroupId grpId) { + // "Q" prefix is a Xapian convention for unique id term. std::string idTerm("Qretroshare://channel?id="); idTerm += grpId.toStdString(); @@ -100,6 +157,17 @@ struct DeepSearch db.replace_document(idTerm, doc); } +private: + + enum : Xapian::valueno + { + /// Used to store retroshare url of indexed documents + URL_VALUENO, + + /// @see Xapian::BAD_VALUENO + BAD_VALUENO = Xapian::BAD_VALUENO + }; + static std::string mDbPath; }; diff --git a/libretroshare/src/gxs/rsgxsutil.cc b/libretroshare/src/gxs/rsgxsutil.cc index 5e3ed9b83..2011ff8d9 100644 --- a/libretroshare/src/gxs/rsgxsutil.cc +++ b/libretroshare/src/gxs/rsgxsutil.cc @@ -4,6 +4,7 @@ * RetroShare C++ Interface. Generic routines that are useful in GXS * * Copyright 2013-2013 by Christopher Evi-Parker + * Copyright (C) 2018 Gioacchino Mazzurco * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public @@ -165,8 +166,6 @@ bool RsGxsIntegrityCheck::check() { #ifdef RS_DEEP_SEARCH bool isGxsChannels = dynamic_cast(mGenExchangeClient); - std::cout << __PRETTY_FUNCTION__ << " isGxsChannels: " << isGxsChannels - << std::endl; #endif // first take out all the groups @@ -220,21 +219,17 @@ bool RsGxsIntegrityCheck::check() cg.mMeta = meta; DeepSearch::indexChannelGroup(cg); - - std::cout << __PRETTY_FUNCTION__ << " ||Channel: " - << meta.mGroupName << " ||Description: " - << cg.mDescription << std::endl; } else - std::cout << __PRETTY_FUNCTION__ << " ||Group: " + { + std::cerr << __PRETTY_FUNCTION__ << " Group: " + << meta.mGroupId.toStdString() << " " << meta.mGroupName - << " ||doesn't seems a channel" - << " ||grp->grp.bin_data: " - << grp->grp.bin_data - << " ||grp->grp.bin_len: " - << grp->grp.bin_len - << " ||rIt: " << rIt << " ||blz: " << blz - << " ||cgIt: " << cgIt << std::endl; + << " doesn't seems a channel, please " + << "report to developers" + << std::endl; + print_stacktrace(); + } delete rIt; } diff --git a/libretroshare/src/gxs/rsgxsutil.h b/libretroshare/src/gxs/rsgxsutil.h index faea08040..694f22116 100644 --- a/libretroshare/src/gxs/rsgxsutil.h +++ b/libretroshare/src/gxs/rsgxsutil.h @@ -4,6 +4,7 @@ * RetroShare C++ Interface. Generic routines that are useful in GXS * * Copyright 2013-2013 by Christopher Evi-Parker + * Copyright (C) 2018 Gioacchino Mazzurco * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public