From 9c38eed6482c70a109143c0a7d960ed12ff9f4c1 Mon Sep 17 00:00:00 2001 From: Gioacchino Mazzurco Date: Fri, 19 Feb 2021 23:23:02 +0100 Subject: [PATCH] Implement deep indexing and search for forums RsGxsNetTunnelService::receiveSearchRequest handle no results case properly RsNxsObserver::handleDistantSearchRequest improve method behaviour documentation RsTurtleClientService Improve documentation --- libretroshare/src/deep_search/commonutils.cpp | 20 + libretroshare/src/deep_search/forumsindex.cpp | 208 ++++++++++ libretroshare/src/deep_search/forumsindex.hpp | 81 ++++ libretroshare/src/gxs/rsgxsnetservice.cc | 6 + libretroshare/src/gxs/rsnxsobserver.h | 8 +- libretroshare/src/retroshare/rsgxschannels.h | 2 +- libretroshare/src/retroshare/rsgxsforums.h | 76 +++- libretroshare/src/rsitems/rsgxsforumitems.h | 62 ++- libretroshare/src/rsserver/rsinit.cc | 34 +- libretroshare/src/services/p3gxsforums.cc | 363 ++++++++++++++++-- libretroshare/src/services/p3gxsforums.h | 52 ++- .../src/turtle/turtleclientservice.h | 73 ++-- retroshare.pri | 6 + 13 files changed, 902 insertions(+), 89 deletions(-) create mode 100644 libretroshare/src/deep_search/forumsindex.cpp create mode 100644 libretroshare/src/deep_search/forumsindex.hpp diff --git a/libretroshare/src/deep_search/commonutils.cpp b/libretroshare/src/deep_search/commonutils.cpp index cbe4ee27b..c3b9c5342 100644 --- a/libretroshare/src/deep_search/commonutils.cpp +++ b/libretroshare/src/deep_search/commonutils.cpp @@ -168,13 +168,33 @@ std::string simpleTextHtmlExtract(const std::string& rsHtmlDoc) std::string retVal(rsHtmlDoc.substr(bodyTagEnd+1)); + // strip also CSS inside + oSize = retVal.size(); + auto styleTagBegin(retVal.find("", styleTagBegin)); + if(styleEnd < oSize) + retVal.erase(styleTagBegin, 8+styleEnd-styleTagBegin); + } + std::string::size_type oPos; std::string::size_type cPos; + int itCount = 0; while((oPos = retVal.find("<")) < retVal.size()) { if((cPos = retVal.find(">")) <= retVal.size()) retVal.erase(oPos, 1+cPos-oPos); else break; + + // Avoid infinite loop with crafty input + if(itCount > 1000) + { + RS_WARN( "Breaking stripping loop due to max allowed iterations ", + "rsHtmlDoc: ", rsHtmlDoc, " retVal: ", retVal ); + break; + } + ++itCount; } return retVal; diff --git a/libretroshare/src/deep_search/forumsindex.cpp b/libretroshare/src/deep_search/forumsindex.cpp new file mode 100644 index 000000000..acc7aed9a --- /dev/null +++ b/libretroshare/src/deep_search/forumsindex.cpp @@ -0,0 +1,208 @@ +/******************************************************************************* + * RetroShare full text indexing and search implementation based on Xapian * + * * + * Copyright (C) 2021 Gioacchino Mazzurco * + * Copyright (C) 2021 Asociación Civil Altermundi * + * * + * This program is free software: you can redistribute it and/or modify * + * it under the terms of the GNU Affero General Public License version 3 as * + * published by the Free Software Foundation. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU Affero General Public License for more details. * + * * + * You should have received a copy of the GNU Affero General Public License * + * along with this program. If not, see . * + * * + *******************************************************************************/ + +#include "deep_search/forumsindex.hpp" +#include "deep_search/commonutils.hpp" +#include "retroshare/rsinit.h" +#include "retroshare/rsgxsforums.h" +#include "util/rsdebuglevel4.h" + +std::error_condition DeepForumsIndex::search( + const std::string& queryStr, + std::vector& results, uint32_t maxResults ) +{ + results.clear(); + + std::unique_ptr dbPtr( + DeepSearch::openReadOnlyDatabase(mDbPath) ); + if(!dbPtr) return std::errc::bad_file_descriptor; + + Xapian::Database& db(*dbPtr); + + // Set up a QueryParser with a stemmer and suitable prefixes. + Xapian::QueryParser queryparser; + //queryparser.set_stemmer(Xapian::Stem("en")); + queryparser.set_stemming_strategy(queryparser.STEM_SOME); + // Start of prefix configuration. + //queryparser.add_prefix("title", "S"); + //queryparser.add_prefix("description", "XD"); + // End of prefix configuration. + + // And parse the query. + Xapian::Query query = queryparser.parse_query(queryStr); + + // Use an Enquire object on the database to run the query. + Xapian::Enquire enquire(db); + enquire.set_query(query); + + Xapian::MSet mset = enquire.get_mset( + 0, maxResults ? maxResults : db.get_doccount() ); + + for( Xapian::MSetIterator m = mset.begin(); m != mset.end(); ++m ) + { + const Xapian::Document& doc = m.get_document(); + DeepForumsSearchResult s; + s.mUrl = doc.get_value(URL_VALUENO); +#if XAPIAN_AT_LEAST(1,3,5) + s.mSnippet = mset.snippet(doc.get_data()); +#endif // XAPIAN_AT_LEAST(1,3,5) + results.push_back(s); + } + + return std::error_condition(); +} + +/*static*/ std::string DeepForumsIndex::forumIndexId(const RsGxsGroupId& grpId) +{ + RsUrl forumIndexId(RsGxsForums::DEFAULT_FORUM_BASE_URL); + forumIndexId.setQueryKV( + RsGxsForums::FORUM_URL_ID_FIELD, grpId.toStdString() ); + return forumIndexId.toString(); +} + +/*static*/ std::string DeepForumsIndex::postIndexId( + const RsGxsGroupId& grpId, const RsGxsMessageId& msgId ) +{ + RsUrl postIndexId(RsGxsForums::DEFAULT_FORUM_BASE_URL); + postIndexId.setQueryKV(RsGxsForums::FORUM_URL_ID_FIELD, grpId.toStdString()); + postIndexId.setQueryKV(RsGxsForums::FORUM_URL_MSG_ID_FIELD, msgId.toStdString()); + return postIndexId.toString(); +} + +std::error_condition DeepForumsIndex::indexForumGroup( + const RsGxsForumGroup& forum ) +{ + // Set up a TermGenerator that we'll use in indexing. + Xapian::TermGenerator termgenerator; + //termgenerator.set_stemmer(Xapian::Stem("en")); + + // We make a document and tell the term generator to use this. + Xapian::Document doc; + termgenerator.set_document(doc); + + // Index each field with a suitable prefix. + termgenerator.index_text(forum.mMeta.mGroupName, 1, "G"); + termgenerator.index_text( + DeepSearch::timetToXapianDate(forum.mMeta.mPublishTs), 1, "D" ); + termgenerator.index_text(forum.mDescription, 1, "XD"); + + // Index fields without prefixes for general search. + termgenerator.index_text(forum.mMeta.mGroupName); + termgenerator.increase_termpos(); + termgenerator.index_text(forum.mDescription); + + // store the RS link so we are able to retrive it on matching search + const std::string rsLink(forumIndexId(forum.mMeta.mGroupId)); + doc.add_value(URL_VALUENO, rsLink); + + /* Store some fields for display purposes. Retrieved later to provide the + * matching snippet on search */ + doc.set_data(forum.mMeta.mGroupName + "\n" + forum.mDescription); + + /* We use the identifier to ensure each object ends up in the database only + * once no matter how many times we run the indexer. + * "Q" prefix is a Xapian convention for unique id term. */ + const std::string idTerm("Q" + rsLink); + doc.add_boolean_term(idTerm); + + mWriteQueue.push([idTerm, doc](Xapian::WritableDatabase& db) + { db.replace_document(idTerm, doc); } ); + + return std::error_condition(); +} + +std::error_condition DeepForumsIndex::removeForumFromIndex( + const RsGxsGroupId& grpId ) +{ + mWriteQueue.push([grpId](Xapian::WritableDatabase& db) + { db.delete_document("Q" + forumIndexId(grpId)); }); + + return std::error_condition(); +} + +std::error_condition DeepForumsIndex::indexForumPost(const RsGxsForumMsg& post) +{ + RS_DBG4(post); + + const auto& groupId = post.mMeta.mGroupId; + const auto& msgId = post.mMeta.mMsgId; + + if(groupId.isNull() || msgId.isNull()) + { + RS_ERR("Got post with invalid id ", post); + print_stacktrace(); + return std::errc::invalid_argument; + } + + // Set up a TermGenerator that we'll use in indexing. + Xapian::TermGenerator termgenerator; + //termgenerator.set_stemmer(Xapian::Stem("en")); + + // We make a document and tell the term generator to use this. + Xapian::Document doc; + termgenerator.set_document(doc); + + // Index each field with a suitable prefix. + termgenerator.index_text(post.mMeta.mMsgName, 1, "S"); + termgenerator.index_text( + DeepSearch::timetToXapianDate(post.mMeta.mPublishTs), 1, "D" ); + + // Avoid indexing RetroShare-gui HTML tags + const std::string cleanMsg = DeepSearch::simpleTextHtmlExtract(post.mMsg); + termgenerator.index_text(cleanMsg, 1, "XD" ); + + // Index fields without prefixes for general search. + termgenerator.index_text(post.mMeta.mMsgName); + + termgenerator.increase_termpos(); + termgenerator.index_text(cleanMsg); + // store the RS link so we are able to retrive it on matching search + const std::string rsLink(postIndexId(groupId, msgId)); + doc.add_value(URL_VALUENO, rsLink); + + // Store some fields for display purposes. + doc.set_data(post.mMeta.mMsgName + "\n" + cleanMsg); + + // We use the identifier to ensure each object ends up in the + // database only once no matter how many times we run the + // indexer. + const std::string idTerm("Q" + rsLink); + doc.add_boolean_term(idTerm); + + mWriteQueue.push( [idTerm, doc](Xapian::WritableDatabase& db) + { db.replace_document(idTerm, doc); } ); + + + return std::error_condition(); +} + +std::error_condition DeepForumsIndex::removeForumPostFromIndex( + RsGxsGroupId grpId, RsGxsMessageId msgId ) +{ + // "Q" prefix is a Xapian convention for unique id term. + std::string idTerm("Q" + postIndexId(grpId, msgId)); + mWriteQueue.push( [idTerm](Xapian::WritableDatabase& db) + { db.delete_document(idTerm); } ); + + return std::error_condition(); +} + +/*static*/ std::string DeepForumsIndex::dbDefaultPath() +{ return RsAccounts::AccountDirectory() + "/deep_forum_index_xapian_db"; } diff --git a/libretroshare/src/deep_search/forumsindex.hpp b/libretroshare/src/deep_search/forumsindex.hpp new file mode 100644 index 000000000..2955ce323 --- /dev/null +++ b/libretroshare/src/deep_search/forumsindex.hpp @@ -0,0 +1,81 @@ +/******************************************************************************* + * RetroShare full text indexing and search implementation based on Xapian * + * * + * Copyright (C) 2021 Gioacchino Mazzurco * + * Copyright (C) 2021 Asociación Civil Altermundi * + * * + * This program is free software: you can redistribute it and/or modify * + * it under the terms of the GNU Affero General Public License version 3 as * + * published by the Free Software Foundation. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU Affero General Public License for more details. * + * * + * You should have received a copy of the GNU Affero General Public License * + * along with this program. If not, see . * + * * + *******************************************************************************/ +#pragma once + +#include +#include +#include + +#include "util/rstime.h" +#include "retroshare/rsgxsforums.h" +#include "retroshare/rsevents.h" +#include "deep_search/commonutils.hpp" + +struct DeepForumsSearchResult +{ + std::string mUrl; + double mWeight; + std::string mSnippet; +}; + +struct DeepForumsIndex +{ + explicit DeepForumsIndex(const std::string& dbPath) : + mDbPath(dbPath), mWriteQueue(dbPath) {} + + /** + * @brief Search indexed GXS groups and messages + * @param[in] maxResults maximum number of acceptable search results, 0 for + * no limits + * @return search results count + */ + std::error_condition search( const std::string& queryStr, + std::vector& results, + uint32_t maxResults = 100 ); + + std::error_condition indexForumGroup(const RsGxsForumGroup& chan); + + std::error_condition removeForumFromIndex(const RsGxsGroupId& grpId); + + std::error_condition indexForumPost(const RsGxsForumMsg& post); + + std::error_condition removeForumPostFromIndex( + RsGxsGroupId grpId, RsGxsMessageId msgId ); + + static std::string dbDefaultPath(); + +private: + static std::string forumIndexId(const RsGxsGroupId& grpId); + static std::string postIndexId( + const RsGxsGroupId& grpId, const RsGxsMessageId& msgId ); + + enum : Xapian::valueno + { + /// Used to store retroshare url of indexed documents + URL_VALUENO, + + /// @see Xapian::BAD_VALUENO + BAD_VALUENO = Xapian::BAD_VALUENO + }; + + const std::string mDbPath; + + DeepSearch::StubbornWriteOpQueue mWriteQueue; +}; diff --git a/libretroshare/src/gxs/rsgxsnetservice.cc b/libretroshare/src/gxs/rsgxsnetservice.cc index 8038b63f4..e664b580c 100644 --- a/libretroshare/src/gxs/rsgxsnetservice.cc +++ b/libretroshare/src/gxs/rsgxsnetservice.cc @@ -5420,6 +5420,12 @@ std::error_condition RsGxsNetService::distantSearchRequest( rs_owner_ptr searchData, uint32_t dataSize, RsServiceType serviceType, TurtleRequestId& requestId ) { + if(!mGxsNetTunnel) + { + free(searchData); + return std::errc::function_not_supported; + } + return mGxsNetTunnel->turtleSearchRequest( searchData, dataSize, serviceType, requestId ); } diff --git a/libretroshare/src/gxs/rsnxsobserver.h b/libretroshare/src/gxs/rsnxsobserver.h index 5725df0f0..9d81e7656 100644 --- a/libretroshare/src/gxs/rsnxsobserver.h +++ b/libretroshare/src/gxs/rsnxsobserver.h @@ -104,8 +104,10 @@ public: * requests there. * @param[in] requestData search query * @param[in] requestSize search query size - * @param[out] resultData results data - * @param[out] resultSize results data size + * @param[out] resultData results data storage for a pointer to search + * result reply data or nullptr if no mathing results where found + * @param[out] resultSize storage for results data size or 0 if no matching + * results where found * @return Error details or success, NOT_OVERRIDDEN_BY_OBSERVER is * returned to inform the caller that this method was not overridden by the * observer so do not use it for other meanings. */ @@ -113,6 +115,8 @@ public: rs_view_ptr requestData, uint32_t requestSize, rs_owner_ptr& resultData, uint32_t& resultSize ) { + /* Avoid unused paramethers warning this way so doxygen can still parse + * paramethers documentation */ (void) requestData; (void) requestSize; (void) resultData; (void) resultSize; return RsNxsObserverErrorNum::NOT_OVERRIDDEN_BY_OBSERVER; diff --git a/libretroshare/src/retroshare/rsgxschannels.h b/libretroshare/src/retroshare/rsgxschannels.h index bd09950f4..a91e341a7 100644 --- a/libretroshare/src/retroshare/rsgxschannels.h +++ b/libretroshare/src/retroshare/rsgxschannels.h @@ -427,7 +427,7 @@ public: * @param[in] contentIds ids of requested contents, if empty summaries of * all messages are reqeusted * @param[out] summaries storage for summaries - * @return false if something failed, true otherwhise + * @return success or error details if something failed */ virtual std::error_condition getContentSummaries( const RsGxsGroupId& channelId, diff --git a/libretroshare/src/retroshare/rsgxsforums.h b/libretroshare/src/retroshare/rsgxsforums.h index 1b35f2db9..561fe646d 100644 --- a/libretroshare/src/retroshare/rsgxsforums.h +++ b/libretroshare/src/retroshare/rsgxsforums.h @@ -4,8 +4,8 @@ * libretroshare: retroshare core library * * * * Copyright (C) 2012-2014 Robert Fernie * - * Copyright (C) 2018-2020 Gioacchino Mazzurco * - * Copyright (C) 2019-2020 Asociación Civil Altermundi * + * Copyright (C) 2018-2021 Gioacchino Mazzurco * + * Copyright (C) 2019-2021 Asociación Civil Altermundi * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU Lesser General Public License as * @@ -118,7 +118,10 @@ enum class RsForumEventCode: uint8_t SYNC_PARAMETERS_UPDATED = 0x0a, /// sync and storage times have changed PINNED_POSTS_CHANGED = 0x0b, /// some posts where pinned or un-pinned DELETED_FORUM = 0x0c, /// forum was deleted by cleaning - DELETED_POSTS = 0x0d /// Posts deleted by cleaning + DELETED_POST = 0x0d, /// Post deleted (usually by cleaning) + + /// Distant search result received + DISTANT_SEARCH_RESULT = 0x0e }; struct RsGxsForumEvent: RsEvent @@ -149,6 +152,29 @@ struct RsGxsForumEvent: RsEvent ~RsGxsForumEvent() override; }; +/** This event is fired once distant search results are received */ +struct RsGxsForumsDistantSearchEvent: RsEvent +{ + RsGxsForumsDistantSearchEvent(): + RsEvent(RsEventType::GXS_CHANNELS), + mForumEventCode(RsForumEventCode::DISTANT_SEARCH_RESULT) {} + + RsForumEventCode mForumEventCode; + TurtleRequestId mSearchId; + std::vector mSearchResults; + + ///* @see RsEvent @see RsSerializable + void serial_process( RsGenericSerializer::SerializeJob j, + RsGenericSerializer::SerializeContext& ctx ) override + { + RsEvent::serial_process(j, ctx); + + RS_SERIAL_PROCESS(mForumEventCode); + RS_SERIAL_PROCESS(mSearchId); + RS_SERIAL_PROCESS(mSearchResults); + } +}; + class RsGxsForums: public RsGxsIfaceHelper { public: @@ -385,6 +411,50 @@ public: const RsGxsGroupId& forumId, const RsGxsMessageId& postId, bool keepForever ) = 0; + /** + * @brief Get forum content summaries + * @jsonapi{development} + * @param[in] forumId id of the forum of which the content is requested + * @param[in] contentIds ids of requested contents, if empty summaries of + * all messages are reqeusted + * @param[out] summaries storage for summaries + * @return success or error details if something failed + */ + virtual std::error_condition getContentSummaries( + const RsGxsGroupId& forumId, + const std::set& contentIds, + std::vector& summaries ) = 0; + + /** + * @brief Search the whole reachable network for matching forums and + * posts + * @jsonapi{development} + * An @see RsGxsForumsDistantSearchEvent is emitted when matching results + * arrives from the network + * @param[in] matchString string to search into the forum and posts + * @param[out] searchId storage for search id, useful to track search events + * and retrieve search results + * @return success or error details + */ + virtual std::error_condition distantSearchRequest( + const std::string& matchString, TurtleRequestId& searchId ) = 0; + + /** + * @brief Search the local index for matching forums and posts + * @jsonapi{development} + * @param[in] matchString string to search into the index + * @param[out] searchResults storage for searchr esults + * @return success or error details + */ + virtual std::error_condition localSearch( + const std::string& matchString, + std::vector& searchResults ) = 0; + + + //////////////////////////////////////////////////////////////////////////// + /* Following functions are deprecated and should not be considered a stable + * to use API */ + /** * @brief Create forum. Blocking API. * @jsonapi{development} diff --git a/libretroshare/src/rsitems/rsgxsforumitems.h b/libretroshare/src/rsitems/rsgxsforumitems.h index 30c6c4e08..b1f5fb629 100644 --- a/libretroshare/src/rsitems/rsgxsforumitems.h +++ b/libretroshare/src/rsitems/rsgxsforumitems.h @@ -3,7 +3,9 @@ * * * libretroshare: retroshare core library * * * - * Copyright 2012-2012 by Robert Fernie * + * Copyright (C) 2012 Robert Fernie * + * Copyright (C) 2018-2021 Gioacchino Mazzurco * + * Copyright (C) 2019-2021 Asociación Civil Altermundi * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU Lesser General Public License as * @@ -19,8 +21,7 @@ * along with this program. If not, see . * * * *******************************************************************************/ -#ifndef RS_GXS_FORUM_ITEMS_H -#define RS_GXS_FORUM_ITEMS_H +#pragma once #include @@ -31,7 +32,18 @@ #include "retroshare/rsgxsforums.h" +enum class RsGxsForumsItems : uint8_t +{ + GROUP_ITEM = 0x02, + MESSAGE_ITEM = 0x03, + SEARCH_REQUEST = 0x04, + SEARCH_REPLY = 0x05, +}; + +RS_DEPRECATED_FOR(RsGxsForumsItems) const uint8_t RS_PKT_SUBTYPE_GXSFORUM_GROUP_ITEM = 0x02; + +RS_DEPRECATED_FOR(RsGxsForumsItems) const uint8_t RS_PKT_SUBTYPE_GXSFORUM_MESSAGE_ITEM = 0x03; class RsGxsForumGroupItem : public RsGxsGrpItem @@ -61,6 +73,48 @@ public: RsGxsForumMsg mMsg; }; +struct RsGxsForumsSearchRequest : RsSerializable +{ + RsGxsForumsSearchRequest() : mType(RsGxsForumsItems::SEARCH_REQUEST) {} + + /// Just for easier back and forward compatibility + RsGxsForumsItems mType; + + /// Store search match string + std::string mQuery; + + /// @see RsSerializable + void serial_process( RsGenericSerializer::SerializeJob j, + RsGenericSerializer::SerializeContext& ctx ) override + { + RS_SERIAL_PROCESS(mType); + RS_SERIAL_PROCESS(mQuery); + } + + ~RsGxsForumsSearchRequest() override = default; +}; + +struct RsGxsForumsSearchReply : RsSerializable +{ + RsGxsForumsSearchReply() : mType(RsGxsForumsItems::SEARCH_REPLY) {} + + /// Just for easier back and forward compatibility + RsGxsForumsItems mType; + + /// Results storage + std::vector mResults; + + /// @see RsSerializable + void serial_process( RsGenericSerializer::SerializeJob j, + RsGenericSerializer::SerializeContext& ctx ) override + { + RS_SERIAL_PROCESS(mType); + RS_SERIAL_PROCESS(mResults); + } + + ~RsGxsForumsSearchReply() override = default; +}; + class RsGxsForumSerialiser : public RsServiceSerializer { public: @@ -69,5 +123,3 @@ public: virtual RsItem *create_item(uint16_t service_id,uint8_t item_subtype) const ; }; - -#endif /* RS_GXS_FORUM_ITEMS_H */ diff --git a/libretroshare/src/rsserver/rsinit.cc b/libretroshare/src/rsserver/rsinit.cc index ad39f46a1..7433ab151 100644 --- a/libretroshare/src/rsserver/rsinit.cc +++ b/libretroshare/src/rsserver/rsinit.cc @@ -2,7 +2,8 @@ * libretroshare/src/retroshare: rsinit.cc * * * * Copyright (C) 2004-2014 Robert Fernie * - * Copyright (C) 2016-2019 Gioacchino Mazzurco * + * Copyright (C) 2016-2021 Gioacchino Mazzurco * + * Copyright (C) 2021 Asociación Civil Altermundi * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU Lesser General Public License as * @@ -1336,22 +1337,26 @@ int RsServer::StartupRetroShare() mWiki->setNetworkExchangeService(wiki_ns) ; #endif - /**** Forum GXS service ****/ + /************************* Forum GXS service ******************************/ - RsGeneralDataService* gxsforums_ds = new RsDataService(currGxsDir + "/", "gxsforums_db", - RS_SERVICE_GXS_TYPE_FORUMS, NULL, rsInitConfig->gxs_passwd); + RsGeneralDataService* gxsforums_ds = new RsDataService( + currGxsDir + "/", "gxsforums_db", RS_SERVICE_GXS_TYPE_FORUMS, + nullptr, rsInitConfig->gxs_passwd ); + p3GxsForums* mGxsForums = new p3GxsForums( + gxsforums_ds, nullptr, mGxsIdService ); - p3GxsForums *mGxsForums = new p3GxsForums(gxsforums_ds, NULL, mGxsIdService); + RsGxsNetTunnelService* gxsForumsTunnelService = nullptr; +#ifdef RS_DEEP_FORUMS_INDEX + gxsForumsTunnelService = mGxsNetTunnel; +#endif - // create GXS photo service - RsGxsNetService* gxsforums_ns = new RsGxsNetService( - RS_SERVICE_GXS_TYPE_FORUMS, gxsforums_ds, nxsMgr, - mGxsForums, mGxsForums->getServiceInfo(), - mReputations, mGxsCircles,mGxsIdService, - pgpAuxUtils);//,mGxsNetTunnel,true,true,true); + RsGxsNetService* gxsforums_ns = new RsGxsNetService( + RS_SERVICE_GXS_TYPE_FORUMS, gxsforums_ds, nxsMgr, mGxsForums, + mGxsForums->getServiceInfo(), mReputations, mGxsCircles, + mGxsIdService, pgpAuxUtils, gxsForumsTunnelService ); + mGxsForums->setNetworkExchangeService(gxsforums_ns); - mGxsForums->setNetworkExchangeService(gxsforums_ns) ; /**** Channel GXS service ****/ @@ -1598,7 +1603,10 @@ int RsServer::StartupRetroShare() /**************************************************************************/ // Turtle search for GXS services - mGxsNetTunnel->registerSearchableService(gxschannels_ns) ; + mGxsNetTunnel->registerSearchableService(gxschannels_ns); +#ifdef RS_DEEP_FORUMS_INDEX + mGxsNetTunnel->registerSearchableService(gxsforums_ns); +#endif /**************************************************************************/ diff --git a/libretroshare/src/services/p3gxsforums.cc b/libretroshare/src/services/p3gxsforums.cc index cd7cbb5fe..2f85afe59 100644 --- a/libretroshare/src/services/p3gxsforums.cc +++ b/libretroshare/src/services/p3gxsforums.cc @@ -4,8 +4,8 @@ * libretroshare: retroshare core library * * * * Copyright (C) 2012-2014 Robert Fernie * - * Copyright (C) 2018-2020 Gioacchino Mazzurco * - * Copyright (C) 2019-2020 Asociación Civil Altermundi * + * Copyright (C) 2018-2021 Gioacchino Mazzurco * + * Copyright (C) 2019-2021 Asociación Civil Altermundi * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU Lesser General Public License as * @@ -59,7 +59,11 @@ p3GxsForums::p3GxsForums( RsGeneralDataService *gds, RsGenExchange( gds, nes, new RsGxsForumSerialiser(), RS_SERVICE_GXS_TYPE_FORUMS, gixs, forumsAuthenPolicy()), RsGxsForums(static_cast(*this)), mGenToken(0), - mGenActive(false), mGenCount(0), mKnownForumsMutex("GXS forums known forums timestamp cache") + mGenActive(false), mGenCount(0), + mKnownForumsMutex("GXS forums known forums timestamp cache") +#ifdef RS_DEEP_FORUMS_INDEX + , mDeepIndex(DeepForumsIndex::dbDefaultPath()) +#endif { // Test Data disabled in Repo. //RsTickEvent::schedule_in(FORUM_TESTEVENT_DUMMYDATA, DUMMYDATA_PERIOD); @@ -190,31 +194,61 @@ RsSerialiser* p3GxsForums::setupSerialiser() return rss; } -void p3GxsForums::notifyChanges(std::vector &changes) +void p3GxsForums::notifyChanges(std::vector& changes) { RS_DBG2(changes.size(), " changes to notify"); - std::vector::iterator it; - for(it = changes.begin(); it != changes.end(); ++it) + for(RsGxsNotify* gxsChange: changes) { - RsGxsNotify* gxsChange = *it; + // Let the compiler delete the change for us + std::unique_ptr gxsChangeDeleter(gxsChange); + switch(gxsChange->getType()) { case RsGxsNotify::TYPE_RECEIVED_NEW: // [[fallthrough]] case RsGxsNotify::TYPE_PUBLISHED: { - RsGxsMsgChange* msgChange = dynamic_cast(*it); - RsGxsGroupChange* groupChange = dynamic_cast(*it); + auto msgChange = dynamic_cast(gxsChange); - if(msgChange) /* Message received*/ + if(msgChange) /* Message received */ { - auto ev = std::make_shared(); - ev->mForumMsgId = msgChange->mMsgId; - ev->mForumGroupId = msgChange->mGroupId; - ev->mForumEventCode = RsForumEventCode::NEW_MESSAGE; - rsEvents->postEvent(ev); + uint8_t msgSubtype = msgChange->mNewMsgItem->PacketSubType(); + switch(static_cast(msgSubtype)) + { + case RsGxsForumsItems::MESSAGE_ITEM: + { + auto newForumMessageItem = + dynamic_cast( + msgChange->mNewMsgItem ); + + if(!newForumMessageItem) + { + RS_ERR("Received message change with mNewMsgItem type " + "mismatching or null"); + print_stacktrace(); + return; + } + +#ifdef RS_DEEP_FORUMS_INDEX + RsGxsForumMsg tmpPost = newForumMessageItem->mMsg; + tmpPost.mMeta = newForumMessageItem->meta; + mDeepIndex.indexForumPost(tmpPost); +#endif + auto ev = std::make_shared(); + ev->mForumMsgId = msgChange->mMsgId; + ev->mForumGroupId = msgChange->mGroupId; + ev->mForumEventCode = RsForumEventCode::NEW_MESSAGE; + rsEvents->postEvent(ev); + break; + } + default: + RS_WARN("Got unknown gxs message subtype: ", msgSubtype); + break; + } } - else if(groupChange) /* Group received */ + + auto groupChange = dynamic_cast(gxsChange); + if(groupChange) /* Group received */ { bool unknown; { @@ -232,9 +266,25 @@ void p3GxsForums::notifyChanges(std::vector &changes) ev->mForumEventCode = RsForumEventCode::NEW_FORUM; rsEvents->postEvent(ev); } - else - RS_DBG1( " Not notifying already known forum ", - gxsChange->mGroupId ); + +#ifdef RS_DEEP_FORUMS_INDEX + uint8_t itemType = groupChange->mNewGroupItem->PacketSubType(); + switch(static_cast(itemType)) + { + case RsGxsForumsItems::GROUP_ITEM: + { + auto newForumGroupItem = + static_cast( + groupChange->mNewGroupItem ); + mDeepIndex.indexForumGroup(newForumGroupItem->mGroup); + break; + } + default: + RS_WARN("Got unknown gxs group subtype: ", itemType); + break; + } +#endif // def RS_DEEP_FORUMS_INDEX + } break; } @@ -256,25 +306,31 @@ void p3GxsForums::notifyChanges(std::vector &changes) } case RsGxsNotify::TYPE_MESSAGE_DELETED: { - RsGxsMsgDeletedChange* delChange = - dynamic_cast(gxsChange); - + auto delChange = dynamic_cast(gxsChange); if(!delChange) { RS_ERR( "Got mismatching notification type: ", gxsChange->getType() ); print_stacktrace(); - goto cleanup; + break; } +#ifdef RS_DEEP_FORUMS_INDEX + mDeepIndex.removeForumPostFromIndex( + delChange->mGroupId, delChange->messageId); +#endif + auto ev = std::make_shared(); - ev->mForumEventCode = RsForumEventCode::DELETED_POSTS; + ev->mForumEventCode = RsForumEventCode::DELETED_POST; ev->mForumGroupId = delChange->mGroupId; ev->mForumMsgId = delChange->messageId; break; } case RsGxsNotify::TYPE_GROUP_DELETED: { +#ifdef RS_DEEP_FORUMS_INDEX + mDeepIndex.removeForumFromIndex(gxsChange->mGroupId); +#endif auto ev = std::make_shared(); ev->mForumGroupId = gxsChange->mGroupId; ev->mForumEventCode = RsForumEventCode::DELETED_FORUM; @@ -299,7 +355,7 @@ void p3GxsForums::notifyChanges(std::vector &changes) * analyse the old and new group in order to detect possible * notifications for clients */ - RsGxsGroupChange* grpChange = dynamic_cast(*it); + auto grpChange = dynamic_cast(gxsChange); RsGxsForumGroupItem* old_forum_grp_item = dynamic_cast(grpChange->mOldGroupItem); @@ -312,9 +368,13 @@ void p3GxsForums::notifyChanges(std::vector &changes) "mNewGroup not of type RsGxsForumGroupItem or NULL. " "This is inconsistent!"); print_stacktrace(); - goto cleanup; + break; } +#ifdef RS_DEEP_FORUMS_INDEX + mDeepIndex.indexForumGroup(new_forum_grp_item->mGroup); +#endif + /* First of all, we check if there is a difference between the old * and new list of moderators */ @@ -382,9 +442,6 @@ void p3GxsForums::notifyChanges(std::vector &changes) " Currently not handled." ); break; } - -cleanup: - delete *it; } } @@ -1348,6 +1405,254 @@ bool RsGxsForumGroup::canEditPosts(const RsGxsId& id) const id == mMeta.mAuthorId; } +std::error_condition p3GxsForums::getContentSummaries( + const RsGxsGroupId& forumId, + const std::set& contentIds, + std::vector& summaries ) +{ + uint32_t token; + RsTokReqOptions opts; + opts.mReqType = GXS_REQUEST_TYPE_MSG_META; + + GxsMsgReq msgReq; + msgReq[forumId] = contentIds; + + + if(!requestMsgInfo(token, opts, msgReq)) + { + RS_ERR("requestMsgInfo failed"); + return std::errc::invalid_argument; + } + + switch(waitToken(token, std::chrono::seconds(5))) + { + case RsTokenService::COMPLETE: + { + GxsMsgMetaMap metaMap; + if(!RsGenExchange::getMsgMeta(token, metaMap)) + return std::errc::result_out_of_range; + summaries = metaMap[forumId]; + return std::error_condition(); + } + case RsTokenService::PARTIAL: // [[fallthrough]]; + case RsTokenService::PENDING: + return std::errc::timed_out; + default: + return std::errc::not_supported; + } +} + +#ifdef RS_DEEP_FORUMS_INDEX +std::error_condition p3GxsForums::handleDistantSearchRequest( + rs_view_ptr requestData, uint32_t requestSize, + rs_owner_ptr& resultData, uint32_t& resultSize ) +{ + RS_DBG1(""); + + RsGxsForumsSearchRequest request; + { + RsGenericSerializer::SerializeContext ctx(requestData, requestSize); + RsGenericSerializer::SerializeJob j = + RsGenericSerializer::SerializeJob::DESERIALIZE; + RS_SERIAL_PROCESS(request); + } + + if(request.mType != RsGxsForumsItems::SEARCH_REQUEST) + { + // If more types are implemented we would put a switch on mType instead + RS_WARN( "Got search request with unkown type: ", + static_cast(request.mType) ); + return std::errc::bad_message; + } + + RsGxsForumsSearchReply reply; + auto mErr = prepareSearchResults(request.mQuery, true, reply.mResults); + if(mErr || reply.mResults.empty()) return mErr; + + { + RsGenericSerializer::SerializeContext ctx; + RsGenericSerializer::SerializeJob j = + RsGenericSerializer::SerializeJob::SIZE_ESTIMATE; + RS_SERIAL_PROCESS(reply); + resultSize = ctx.mOffset; + } + + resultData = rs_malloc(resultSize); + RsGenericSerializer::SerializeContext ctx(resultData, resultSize); + RsGenericSerializer::SerializeJob j = + RsGenericSerializer::SerializeJob::SERIALIZE; + RS_SERIAL_PROCESS(reply); + + return std::error_condition(); +} + +std::error_condition p3GxsForums::distantSearchRequest( + const std::string& matchString, TurtleRequestId& searchId ) +{ + RsGxsForumsSearchRequest request; + request.mQuery = matchString; + + uint32_t requestSize; + { + RsGenericSerializer::SerializeContext ctx; + RsGenericSerializer::SerializeJob j = + RsGenericSerializer::SerializeJob::SIZE_ESTIMATE; + RS_SERIAL_PROCESS(request); + requestSize = ctx.mOffset; + } + + std::error_condition ec; + auto requestData = rs_malloc(requestSize, &ec); + if(!requestData) return ec; + { + RsGenericSerializer::SerializeContext ctx(requestData, requestSize); + RsGenericSerializer::SerializeJob j = + RsGenericSerializer::SerializeJob::SERIALIZE; + RS_SERIAL_PROCESS(request); + } + + return netService()->distantSearchRequest( + requestData, requestSize, + static_cast(serviceType()), searchId ); +} + +std::error_condition p3GxsForums::localSearch( + const std::string& matchString, + std::vector& searchResults ) +{ return prepareSearchResults(matchString, false, searchResults); } + +std::error_condition p3GxsForums::prepareSearchResults( + const std::string& matchString, bool publicOnly, + std::vector& searchResults ) +{ + std::vector results; + auto mErr = mDeepIndex.search(matchString, results); + if(mErr) return mErr; + + searchResults.clear(); + for(auto uRes: results) + { + RsUrl resUrl(uRes.mUrl); + const auto forumIdStr = resUrl.getQueryV(RsGxsForums::FORUM_URL_ID_FIELD); + if(!forumIdStr) + { + RS_ERR( "Forum URL retrieved from deep index miss ID. ", + "Should never happen! ", uRes.mUrl ); + print_stacktrace(); + return std::errc::address_not_available; + } + + std::vector forumsInfo; + RsGxsGroupId forumId(*forumIdStr); + if(forumId.isNull()) + { + RS_ERR( "Forum ID retrieved from deep index is invalid. ", + "Should never happen! ", uRes.mUrl ); + print_stacktrace(); + return std::errc::bad_address; + } + + if( !getForumsInfo(std::list{forumId}, forumsInfo) || + forumsInfo.empty() ) + { + RS_ERR( "Forum just parsed from deep index link not found. " + "Should never happen! ", forumId, " ", uRes.mUrl ); + print_stacktrace(); + return std::errc::identifier_removed; + } + + RsGroupMetaData& fMeta(forumsInfo[0].mMeta); + + // Avoid leaking sensitive information to unkown peers + if( publicOnly && + !(fMeta.mGroupFlags & GXS_SERV::FLAG_PRIVACY_PUBLIC) ) continue; + + RsGxsSearchResult res; + res.mGroupId = forumId; + res.mGroupName = fMeta.mGroupName; + res.mAuthorId = fMeta.mAuthorId; + res.mPublishTs = fMeta.mPublishTs; + res.mSearchContext = uRes.mSnippet; + + auto postIdStr = + resUrl.getQueryV(RsGxsForums::FORUM_URL_MSG_ID_FIELD); + if(postIdStr) + { + RsGxsMessageId msgId(*postIdStr); + if(msgId.isNull()) + { + RS_ERR( "Post just parsed from deep index link is invalid. " + "Should never happen! ", postIdStr, " ", uRes.mUrl ); + print_stacktrace(); + return std::errc::bad_address; + } + + std::vector msgSummaries; + auto errc = getContentSummaries( + forumId, std::set{msgId}, msgSummaries); + if(errc) return errc; + + if(msgSummaries.size() != 1) + { + RS_ERR( "getContentSummaries returned: ", msgSummaries.size(), + "should never happen!" ); + return std::errc::result_out_of_range; + } + + RsMsgMetaData& msgMeta(msgSummaries[0]); + res.mMsgId = msgMeta.mMsgId; + res.mMsgName = msgMeta.mMsgName; + res.mAuthorId = msgMeta.mAuthorId; + } + + RS_DBG4(res); + searchResults.push_back(res); + } + + return std::error_condition(); +} + +std::error_condition p3GxsForums::receiveDistantSearchResult( + const TurtleRequestId requestId, + rs_owner_ptr& resultData, uint32_t& resultSize ) +{ + RsGxsForumsSearchReply reply; + { + RsGenericSerializer::SerializeContext ctx(resultData, resultSize); + RsGenericSerializer::SerializeJob j = + RsGenericSerializer::SerializeJob::DESERIALIZE; + RS_SERIAL_PROCESS(reply); + } + free(resultData); + + if(reply.mType != RsGxsForumsItems::SEARCH_REPLY) + { + // If more types are implemented we would put a switch on mType instead + RS_WARN( "Got search request with unkown type: ", + static_cast(reply.mType) ); + return std::errc::bad_message; + } + + auto event = std::make_shared(); + event->mSearchId = requestId; + event->mSearchResults = reply.mResults; + rsEvents->postEvent(event); + return std::error_condition(); +} + +#else // def RS_DEEP_FORUMS_INDEX + +std::error_condition p3GxsForums::distantSearchRequest( + const std::string&, TurtleRequestId& ) +{ return std::errc::function_not_supported; } + +std::error_condition p3GxsForums::localSearch( + const std::string&, + std::vector& ) +{ return std::errc::function_not_supported; } + +#endif // def RS_DEEP_FORUMS_INDEX + /*static*/ const std::string RsGxsForums::DEFAULT_FORUM_BASE_URL = "retroshare:///forums"; /*static*/ const std::string RsGxsForums::FORUM_URL_NAME_FIELD = diff --git a/libretroshare/src/services/p3gxsforums.h b/libretroshare/src/services/p3gxsforums.h index 97f04cc7b..b499e7b0d 100644 --- a/libretroshare/src/services/p3gxsforums.h +++ b/libretroshare/src/services/p3gxsforums.h @@ -4,8 +4,8 @@ * libretroshare: retroshare core library * * * * Copyright (C) 2012-2014 Robert Fernie * - * Copyright (C) 2018-2020 Gioacchino Mazzurco * - * Copyright (C) 2019-2020 Asociación Civil Altermundi * + * Copyright (C) 2018-2021 Gioacchino Mazzurco * + * Copyright (C) 2019-2021 Asociación Civil Altermundi * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU Lesser General Public License as * @@ -32,6 +32,10 @@ #include "util/rstickevent.h" #include "util/rsdebug.h" +#ifdef RS_DEEP_FORUMS_INDEX +#include "deep_search/forumsindex.hpp" +#endif + class p3GxsForums: public RsGenExchange, public RsGxsForums, public p3Config, public RsTickEvent /* only needed for testing - remove after */ @@ -142,7 +146,34 @@ public: /// @see RsGxsForums std::error_condition setPostKeepForever( const RsGxsGroupId& forumId, const RsGxsMessageId& postId, - bool keepForever ) override; + bool keepForever ) override; + + /// @see RsGxsForums + std::error_condition getContentSummaries( + const RsGxsGroupId& forumId, + const std::set& contentIds, + std::vector& summaries ) override; + + /// @see RsGxsForums + std::error_condition distantSearchRequest( + const std::string& matchString, TurtleRequestId& searchId ) override; + + /// @see RsGxsForums + std::error_condition localSearch( + const std::string& matchString, + std::vector& searchResults ) override; + +#ifdef RS_DEEP_FORUMS_INDEX + /// @see RsNxsObserver + std::error_condition handleDistantSearchRequest( + rs_view_ptr requestData, uint32_t requestSize, + rs_owner_ptr& resultData, uint32_t& resultSize ) override; + + /// @see RsNxsObserver + std::error_condition receiveDistantSearchResult( + const TurtleRequestId requestId, + rs_owner_ptr& resultData, uint32_t& resultSize ) override; +#endif /// implementation of rsGxsGorums /// @@ -155,6 +186,17 @@ public: bool getMsgMetaData(const uint32_t &token, GxsMsgMetaMap& msg_metas) ; +protected: +#ifdef RS_DEEP_FORUMS_INDEX + /** Internal usage + * @param[in] publicOnly if true is passed only results pertaining to + * publicly shared forums are returned + */ + std::error_condition prepareSearchResults( + const std::string& matchString, bool publicOnly, + std::vector& searchResults ); +#endif //def RS_DEEP_FORUMS_INDEX + private: static uint32_t forumsAuthenPolicy(); @@ -189,4 +231,8 @@ bool generateGroup(uint32_t &token, std::string groupName); std::map mKnownForums ; RsMutex mKnownForumsMutex; + +#ifdef RS_DEEP_FORUMS_INDEX + DeepForumsIndex mDeepIndex; +#endif }; diff --git a/libretroshare/src/turtle/turtleclientservice.h b/libretroshare/src/turtle/turtleclientservice.h index 9cbe5763f..55525ae3c 100644 --- a/libretroshare/src/turtle/turtleclientservice.h +++ b/libretroshare/src/turtle/turtleclientservice.h @@ -19,23 +19,25 @@ * along with this program. If not, see . * * * *******************************************************************************/ - -// This class is the parent class for any service that will use the turtle router to distribute its packets. -// Typical representative clients include: -// -// p3ChatService: opens tunnels to distant peers for chatting -// ftServer: searches and open tunnels to distant sources for file transfer -// #pragma once #include #include -#include -#include + +#include "serialiser/rsserial.h" +#include "turtle/rsturtleitem.h" +#include "util/rsdebug.h" struct RsItem; class p3turtle ; +/** This class is the parent class for any service that will use the turtle + * router to distribute its packets. + * Typical representative clients include: + * p3ChatService: opens tunnels to distant peers for chatting + * ftServer: searches and open tunnels to distant sources for file + * transfer + */ class RsTurtleClientService { public: @@ -87,30 +89,35 @@ class RsTurtleClientService std::cerr << "!!!!!! Received Data from turtle router, but the client service is not handling it !!!!!!!!!!" << std::endl ; } - /*! - * \brief receiveSearchRequest - * This method is called by the turtle router to notify the client of a search request in the form generic data. The returned - * result contains the serialised generic result returned by the client. - * - * The turtle router keeps the memory ownership over search_request_data - * - * \param search_request_data generic serialized search data - * \param search_request_data_len length of the serialized search data - * \param search_result_data generic serialized search result data - * \param search_result_data_len length of the serialized search result data - * \param max_allowed_hits max number of hits allowed to be sent back and forwarded - * - * \return true if the search is successful. - */ - virtual bool receiveSearchRequest(unsigned char */*search_request_data*/, - uint32_t /*search_request_data_len*/, - unsigned char *& /*search_result_data*/, - uint32_t& /*search_result_data_len*/, - uint32_t& /* max_allows_hits */) - { - std::cerr << "!!!!!! Received search result from turtle router, but the client service who requested it is not handling it !!!!!!!!!!" << std::endl ; - return false; - } + /*! + * This method is called by the turtle router to notify the client of a + * search request in the form generic data. + * The returned result contains the serialised generic result returned by the + * client service. + * The turtle router keeps the memory ownership over search_request_data + * \param search_request_data generic serialized search data + * \param search_request_data_len length of the serialized search data + * \param search_result_data generic serialized search result data + * \param search_result_data_len length of the serialized search result data + * \param max_allowed_hits max number of hits allowed to be sent back and + * forwarded + * \return true if matching results are available, false otherwise. + */ + virtual bool receiveSearchRequest( + unsigned char *search_request_data, uint32_t search_request_data_len, + unsigned char *& search_result_data, uint32_t& search_result_data_len, + uint32_t& max_allows_hits ) + { + /* Suppress unused warning this way and not commenting the param names + * so doxygen match documentation against params */ + (void) search_request_data; (void) search_request_data_len; + (void) search_result_data; (void) search_result_data_len; + (void) max_allows_hits; + + RS_WARN( "Received search request from turtle router, but the client " + "is not handling it!" ); + return false; + } /*! * \brief receiveSearchResult diff --git a/retroshare.pri b/retroshare.pri index 8c55efd8f..d0439e5e2 100644 --- a/retroshare.pri +++ b/retroshare.pri @@ -141,6 +141,11 @@ rs_macos10.15:CONFIG -= rs_macos10.11 CONFIG *= no_rs_jsonapi rs_jsonapi:CONFIG -= no_rs_jsonapi +# To enable forums indexing append the following assignation to qmake command +# line "CONFIG+=rs_deep_forums_index" +CONFIG *= no_rs_deep_forums_index +rs_deep_forums_index:CONFIG -= no_rs_deep_forums_index + # To enable channel indexing append the following assignation to qmake command # line "CONFIG+=rs_deep_channels_index" CONFIG *= no_rs_deep_channels_index @@ -561,6 +566,7 @@ rs_webui { DEFINES *= RS_WEBUI } +rs_deep_forums_index:DEFINES *= RS_DEEP_FORUMS_INDEX rs_deep_channels_index:DEFINES *= RS_DEEP_CHANNEL_INDEX rs_deep_files_index:DEFINES *= RS_DEEP_FILES_INDEX