Implement deep indexing and search for forums

RsGxsNetTunnelService::receiveSearchRequest handle no results case
  properly
RsNxsObserver::handleDistantSearchRequest improve method behaviour
  documentation
RsTurtleClientService Improve documentation
This commit is contained in:
Gioacchino Mazzurco 2021-02-19 23:23:02 +01:00
parent 1b551d809f
commit 9c38eed648
No known key found for this signature in database
GPG Key ID: A1FBCA3872E87051
13 changed files with 902 additions and 89 deletions

View File

@ -168,13 +168,33 @@ std::string simpleTextHtmlExtract(const std::string& rsHtmlDoc)
std::string retVal(rsHtmlDoc.substr(bodyTagEnd+1));
// strip also CSS inside <style></style>
oSize = retVal.size();
auto styleTagBegin(retVal.find("<style"));
if(styleTagBegin < oSize)
{
auto styleEnd(retVal.find("</style>", styleTagBegin));
if(styleEnd < oSize)
retVal.erase(styleTagBegin, 8+styleEnd-styleTagBegin);
}
std::string::size_type oPos;
std::string::size_type cPos;
int itCount = 0;
while((oPos = retVal.find("<")) < retVal.size())
{
if((cPos = retVal.find(">")) <= retVal.size())
retVal.erase(oPos, 1+cPos-oPos);
else break;
// Avoid infinite loop with crafty input
if(itCount > 1000)
{
RS_WARN( "Breaking stripping loop due to max allowed iterations ",
"rsHtmlDoc: ", rsHtmlDoc, " retVal: ", retVal );
break;
}
++itCount;
}
return retVal;

View File

@ -0,0 +1,208 @@
/*******************************************************************************
* RetroShare full text indexing and search implementation based on Xapian *
* *
* Copyright (C) 2021 Gioacchino Mazzurco <gio@eigenlab.org> *
* Copyright (C) 2021 Asociación Civil Altermundi <info@altermundi.net> *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Affero General Public License version 3 as *
* published by the Free Software Foundation. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Affero General Public License for more details. *
* *
* You should have received a copy of the GNU Affero General Public License *
* along with this program. If not, see <https://www.gnu.org/licenses/>. *
* *
*******************************************************************************/
#include "deep_search/forumsindex.hpp"
#include "deep_search/commonutils.hpp"
#include "retroshare/rsinit.h"
#include "retroshare/rsgxsforums.h"
#include "util/rsdebuglevel4.h"
std::error_condition DeepForumsIndex::search(
const std::string& queryStr,
std::vector<DeepForumsSearchResult>& results, uint32_t maxResults )
{
results.clear();
std::unique_ptr<Xapian::Database> dbPtr(
DeepSearch::openReadOnlyDatabase(mDbPath) );
if(!dbPtr) return std::errc::bad_file_descriptor;
Xapian::Database& db(*dbPtr);
// Set up a QueryParser with a stemmer and suitable prefixes.
Xapian::QueryParser queryparser;
//queryparser.set_stemmer(Xapian::Stem("en"));
queryparser.set_stemming_strategy(queryparser.STEM_SOME);
// Start of prefix configuration.
//queryparser.add_prefix("title", "S");
//queryparser.add_prefix("description", "XD");
// End of prefix configuration.
// And parse the query.
Xapian::Query query = queryparser.parse_query(queryStr);
// Use an Enquire object on the database to run the query.
Xapian::Enquire enquire(db);
enquire.set_query(query);
Xapian::MSet mset = enquire.get_mset(
0, maxResults ? maxResults : db.get_doccount() );
for( Xapian::MSetIterator m = mset.begin(); m != mset.end(); ++m )
{
const Xapian::Document& doc = m.get_document();
DeepForumsSearchResult s;
s.mUrl = doc.get_value(URL_VALUENO);
#if XAPIAN_AT_LEAST(1,3,5)
s.mSnippet = mset.snippet(doc.get_data());
#endif // XAPIAN_AT_LEAST(1,3,5)
results.push_back(s);
}
return std::error_condition();
}
/*static*/ std::string DeepForumsIndex::forumIndexId(const RsGxsGroupId& grpId)
{
RsUrl forumIndexId(RsGxsForums::DEFAULT_FORUM_BASE_URL);
forumIndexId.setQueryKV(
RsGxsForums::FORUM_URL_ID_FIELD, grpId.toStdString() );
return forumIndexId.toString();
}
/*static*/ std::string DeepForumsIndex::postIndexId(
const RsGxsGroupId& grpId, const RsGxsMessageId& msgId )
{
RsUrl postIndexId(RsGxsForums::DEFAULT_FORUM_BASE_URL);
postIndexId.setQueryKV(RsGxsForums::FORUM_URL_ID_FIELD, grpId.toStdString());
postIndexId.setQueryKV(RsGxsForums::FORUM_URL_MSG_ID_FIELD, msgId.toStdString());
return postIndexId.toString();
}
std::error_condition DeepForumsIndex::indexForumGroup(
const RsGxsForumGroup& forum )
{
// Set up a TermGenerator that we'll use in indexing.
Xapian::TermGenerator termgenerator;
//termgenerator.set_stemmer(Xapian::Stem("en"));
// We make a document and tell the term generator to use this.
Xapian::Document doc;
termgenerator.set_document(doc);
// Index each field with a suitable prefix.
termgenerator.index_text(forum.mMeta.mGroupName, 1, "G");
termgenerator.index_text(
DeepSearch::timetToXapianDate(forum.mMeta.mPublishTs), 1, "D" );
termgenerator.index_text(forum.mDescription, 1, "XD");
// Index fields without prefixes for general search.
termgenerator.index_text(forum.mMeta.mGroupName);
termgenerator.increase_termpos();
termgenerator.index_text(forum.mDescription);
// store the RS link so we are able to retrive it on matching search
const std::string rsLink(forumIndexId(forum.mMeta.mGroupId));
doc.add_value(URL_VALUENO, rsLink);
/* Store some fields for display purposes. Retrieved later to provide the
* matching snippet on search */
doc.set_data(forum.mMeta.mGroupName + "\n" + forum.mDescription);
/* We use the identifier to ensure each object ends up in the database only
* once no matter how many times we run the indexer.
* "Q" prefix is a Xapian convention for unique id term. */
const std::string idTerm("Q" + rsLink);
doc.add_boolean_term(idTerm);
mWriteQueue.push([idTerm, doc](Xapian::WritableDatabase& db)
{ db.replace_document(idTerm, doc); } );
return std::error_condition();
}
std::error_condition DeepForumsIndex::removeForumFromIndex(
const RsGxsGroupId& grpId )
{
mWriteQueue.push([grpId](Xapian::WritableDatabase& db)
{ db.delete_document("Q" + forumIndexId(grpId)); });
return std::error_condition();
}
std::error_condition DeepForumsIndex::indexForumPost(const RsGxsForumMsg& post)
{
RS_DBG4(post);
const auto& groupId = post.mMeta.mGroupId;
const auto& msgId = post.mMeta.mMsgId;
if(groupId.isNull() || msgId.isNull())
{
RS_ERR("Got post with invalid id ", post);
print_stacktrace();
return std::errc::invalid_argument;
}
// Set up a TermGenerator that we'll use in indexing.
Xapian::TermGenerator termgenerator;
//termgenerator.set_stemmer(Xapian::Stem("en"));
// We make a document and tell the term generator to use this.
Xapian::Document doc;
termgenerator.set_document(doc);
// Index each field with a suitable prefix.
termgenerator.index_text(post.mMeta.mMsgName, 1, "S");
termgenerator.index_text(
DeepSearch::timetToXapianDate(post.mMeta.mPublishTs), 1, "D" );
// Avoid indexing RetroShare-gui HTML tags
const std::string cleanMsg = DeepSearch::simpleTextHtmlExtract(post.mMsg);
termgenerator.index_text(cleanMsg, 1, "XD" );
// Index fields without prefixes for general search.
termgenerator.index_text(post.mMeta.mMsgName);
termgenerator.increase_termpos();
termgenerator.index_text(cleanMsg);
// store the RS link so we are able to retrive it on matching search
const std::string rsLink(postIndexId(groupId, msgId));
doc.add_value(URL_VALUENO, rsLink);
// Store some fields for display purposes.
doc.set_data(post.mMeta.mMsgName + "\n" + cleanMsg);
// We use the identifier to ensure each object ends up in the
// database only once no matter how many times we run the
// indexer.
const std::string idTerm("Q" + rsLink);
doc.add_boolean_term(idTerm);
mWriteQueue.push( [idTerm, doc](Xapian::WritableDatabase& db)
{ db.replace_document(idTerm, doc); } );
return std::error_condition();
}
std::error_condition DeepForumsIndex::removeForumPostFromIndex(
RsGxsGroupId grpId, RsGxsMessageId msgId )
{
// "Q" prefix is a Xapian convention for unique id term.
std::string idTerm("Q" + postIndexId(grpId, msgId));
mWriteQueue.push( [idTerm](Xapian::WritableDatabase& db)
{ db.delete_document(idTerm); } );
return std::error_condition();
}
/*static*/ std::string DeepForumsIndex::dbDefaultPath()
{ return RsAccounts::AccountDirectory() + "/deep_forum_index_xapian_db"; }

View File

@ -0,0 +1,81 @@
/*******************************************************************************
* RetroShare full text indexing and search implementation based on Xapian *
* *
* Copyright (C) 2021 Gioacchino Mazzurco <gio@eigenlab.org> *
* Copyright (C) 2021 Asociación Civil Altermundi <info@altermundi.net> *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Affero General Public License version 3 as *
* published by the Free Software Foundation. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Affero General Public License for more details. *
* *
* You should have received a copy of the GNU Affero General Public License *
* along with this program. If not, see <https://www.gnu.org/licenses/>. *
* *
*******************************************************************************/
#pragma once
#include <system_error>
#include <vector>
#include <xapian.h>
#include "util/rstime.h"
#include "retroshare/rsgxsforums.h"
#include "retroshare/rsevents.h"
#include "deep_search/commonutils.hpp"
struct DeepForumsSearchResult
{
std::string mUrl;
double mWeight;
std::string mSnippet;
};
struct DeepForumsIndex
{
explicit DeepForumsIndex(const std::string& dbPath) :
mDbPath(dbPath), mWriteQueue(dbPath) {}
/**
* @brief Search indexed GXS groups and messages
* @param[in] maxResults maximum number of acceptable search results, 0 for
* no limits
* @return search results count
*/
std::error_condition search( const std::string& queryStr,
std::vector<DeepForumsSearchResult>& results,
uint32_t maxResults = 100 );
std::error_condition indexForumGroup(const RsGxsForumGroup& chan);
std::error_condition removeForumFromIndex(const RsGxsGroupId& grpId);
std::error_condition indexForumPost(const RsGxsForumMsg& post);
std::error_condition removeForumPostFromIndex(
RsGxsGroupId grpId, RsGxsMessageId msgId );
static std::string dbDefaultPath();
private:
static std::string forumIndexId(const RsGxsGroupId& grpId);
static std::string postIndexId(
const RsGxsGroupId& grpId, const RsGxsMessageId& msgId );
enum : Xapian::valueno
{
/// Used to store retroshare url of indexed documents
URL_VALUENO,
/// @see Xapian::BAD_VALUENO
BAD_VALUENO = Xapian::BAD_VALUENO
};
const std::string mDbPath;
DeepSearch::StubbornWriteOpQueue mWriteQueue;
};

View File

@ -5420,6 +5420,12 @@ std::error_condition RsGxsNetService::distantSearchRequest(
rs_owner_ptr<uint8_t> searchData, uint32_t dataSize,
RsServiceType serviceType, TurtleRequestId& requestId )
{
if(!mGxsNetTunnel)
{
free(searchData);
return std::errc::function_not_supported;
}
return mGxsNetTunnel->turtleSearchRequest(
searchData, dataSize, serviceType, requestId );
}

View File

@ -104,8 +104,10 @@ public:
* requests there.
* @param[in] requestData search query
* @param[in] requestSize search query size
* @param[out] resultData results data
* @param[out] resultSize results data size
* @param[out] resultData results data storage for a pointer to search
* result reply data or nullptr if no mathing results where found
* @param[out] resultSize storage for results data size or 0 if no matching
* results where found
* @return Error details or success, NOT_OVERRIDDEN_BY_OBSERVER is
* returned to inform the caller that this method was not overridden by the
* observer so do not use it for other meanings. */
@ -113,6 +115,8 @@ public:
rs_view_ptr<uint8_t> requestData, uint32_t requestSize,
rs_owner_ptr<uint8_t>& resultData, uint32_t& resultSize )
{
/* Avoid unused paramethers warning this way so doxygen can still parse
* paramethers documentation */
(void) requestData; (void) requestSize;
(void) resultData; (void) resultSize;
return RsNxsObserverErrorNum::NOT_OVERRIDDEN_BY_OBSERVER;

View File

@ -427,7 +427,7 @@ public:
* @param[in] contentIds ids of requested contents, if empty summaries of
* all messages are reqeusted
* @param[out] summaries storage for summaries
* @return false if something failed, true otherwhise
* @return success or error details if something failed
*/
virtual std::error_condition getContentSummaries(
const RsGxsGroupId& channelId,

View File

@ -4,8 +4,8 @@
* libretroshare: retroshare core library *
* *
* Copyright (C) 2012-2014 Robert Fernie <retroshare@lunamutt.com> *
* Copyright (C) 2018-2020 Gioacchino Mazzurco <gio@eigenlab.org> *
* Copyright (C) 2019-2020 Asociación Civil Altermundi <info@altermundi.net> *
* Copyright (C) 2018-2021 Gioacchino Mazzurco <gio@eigenlab.org> *
* Copyright (C) 2019-2021 Asociación Civil Altermundi <info@altermundi.net> *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as *
@ -118,7 +118,10 @@ enum class RsForumEventCode: uint8_t
SYNC_PARAMETERS_UPDATED = 0x0a, /// sync and storage times have changed
PINNED_POSTS_CHANGED = 0x0b, /// some posts where pinned or un-pinned
DELETED_FORUM = 0x0c, /// forum was deleted by cleaning
DELETED_POSTS = 0x0d /// Posts deleted by cleaning
DELETED_POST = 0x0d, /// Post deleted (usually by cleaning)
/// Distant search result received
DISTANT_SEARCH_RESULT = 0x0e
};
struct RsGxsForumEvent: RsEvent
@ -149,6 +152,29 @@ struct RsGxsForumEvent: RsEvent
~RsGxsForumEvent() override;
};
/** This event is fired once distant search results are received */
struct RsGxsForumsDistantSearchEvent: RsEvent
{
RsGxsForumsDistantSearchEvent():
RsEvent(RsEventType::GXS_CHANNELS),
mForumEventCode(RsForumEventCode::DISTANT_SEARCH_RESULT) {}
RsForumEventCode mForumEventCode;
TurtleRequestId mSearchId;
std::vector<RsGxsSearchResult> mSearchResults;
///* @see RsEvent @see RsSerializable
void serial_process( RsGenericSerializer::SerializeJob j,
RsGenericSerializer::SerializeContext& ctx ) override
{
RsEvent::serial_process(j, ctx);
RS_SERIAL_PROCESS(mForumEventCode);
RS_SERIAL_PROCESS(mSearchId);
RS_SERIAL_PROCESS(mSearchResults);
}
};
class RsGxsForums: public RsGxsIfaceHelper
{
public:
@ -385,6 +411,50 @@ public:
const RsGxsGroupId& forumId, const RsGxsMessageId& postId,
bool keepForever ) = 0;
/**
* @brief Get forum content summaries
* @jsonapi{development}
* @param[in] forumId id of the forum of which the content is requested
* @param[in] contentIds ids of requested contents, if empty summaries of
* all messages are reqeusted
* @param[out] summaries storage for summaries
* @return success or error details if something failed
*/
virtual std::error_condition getContentSummaries(
const RsGxsGroupId& forumId,
const std::set<RsGxsMessageId>& contentIds,
std::vector<RsMsgMetaData>& summaries ) = 0;
/**
* @brief Search the whole reachable network for matching forums and
* posts
* @jsonapi{development}
* An @see RsGxsForumsDistantSearchEvent is emitted when matching results
* arrives from the network
* @param[in] matchString string to search into the forum and posts
* @param[out] searchId storage for search id, useful to track search events
* and retrieve search results
* @return success or error details
*/
virtual std::error_condition distantSearchRequest(
const std::string& matchString, TurtleRequestId& searchId ) = 0;
/**
* @brief Search the local index for matching forums and posts
* @jsonapi{development}
* @param[in] matchString string to search into the index
* @param[out] searchResults storage for searchr esults
* @return success or error details
*/
virtual std::error_condition localSearch(
const std::string& matchString,
std::vector<RsGxsSearchResult>& searchResults ) = 0;
////////////////////////////////////////////////////////////////////////////
/* Following functions are deprecated and should not be considered a stable
* to use API */
/**
* @brief Create forum. Blocking API.
* @jsonapi{development}

View File

@ -3,7 +3,9 @@
* *
* libretroshare: retroshare core library *
* *
* Copyright 2012-2012 by Robert Fernie <retroshare@lunamutt.com> *
* Copyright (C) 2012 Robert Fernie <retroshare@lunamutt.com> *
* Copyright (C) 2018-2021 Gioacchino Mazzurco <gio@eigenlab.org> *
* Copyright (C) 2019-2021 Asociación Civil Altermundi <info@altermundi.net> *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as *
@ -19,8 +21,7 @@
* along with this program. If not, see <https://www.gnu.org/licenses/>. *
* *
*******************************************************************************/
#ifndef RS_GXS_FORUM_ITEMS_H
#define RS_GXS_FORUM_ITEMS_H
#pragma once
#include <map>
@ -31,7 +32,18 @@
#include "retroshare/rsgxsforums.h"
enum class RsGxsForumsItems : uint8_t
{
GROUP_ITEM = 0x02,
MESSAGE_ITEM = 0x03,
SEARCH_REQUEST = 0x04,
SEARCH_REPLY = 0x05,
};
RS_DEPRECATED_FOR(RsGxsForumsItems)
const uint8_t RS_PKT_SUBTYPE_GXSFORUM_GROUP_ITEM = 0x02;
RS_DEPRECATED_FOR(RsGxsForumsItems)
const uint8_t RS_PKT_SUBTYPE_GXSFORUM_MESSAGE_ITEM = 0x03;
class RsGxsForumGroupItem : public RsGxsGrpItem
@ -61,6 +73,48 @@ public:
RsGxsForumMsg mMsg;
};
struct RsGxsForumsSearchRequest : RsSerializable
{
RsGxsForumsSearchRequest() : mType(RsGxsForumsItems::SEARCH_REQUEST) {}
/// Just for easier back and forward compatibility
RsGxsForumsItems mType;
/// Store search match string
std::string mQuery;
/// @see RsSerializable
void serial_process( RsGenericSerializer::SerializeJob j,
RsGenericSerializer::SerializeContext& ctx ) override
{
RS_SERIAL_PROCESS(mType);
RS_SERIAL_PROCESS(mQuery);
}
~RsGxsForumsSearchRequest() override = default;
};
struct RsGxsForumsSearchReply : RsSerializable
{
RsGxsForumsSearchReply() : mType(RsGxsForumsItems::SEARCH_REPLY) {}
/// Just for easier back and forward compatibility
RsGxsForumsItems mType;
/// Results storage
std::vector<RsGxsSearchResult> mResults;
/// @see RsSerializable
void serial_process( RsGenericSerializer::SerializeJob j,
RsGenericSerializer::SerializeContext& ctx ) override
{
RS_SERIAL_PROCESS(mType);
RS_SERIAL_PROCESS(mResults);
}
~RsGxsForumsSearchReply() override = default;
};
class RsGxsForumSerialiser : public RsServiceSerializer
{
public:
@ -69,5 +123,3 @@ public:
virtual RsItem *create_item(uint16_t service_id,uint8_t item_subtype) const ;
};
#endif /* RS_GXS_FORUM_ITEMS_H */

View File

@ -2,7 +2,8 @@
* libretroshare/src/retroshare: rsinit.cc *
* *
* Copyright (C) 2004-2014 Robert Fernie <retroshare@lunamutt.com> *
* Copyright (C) 2016-2019 Gioacchino Mazzurco <gio@altermundi.net> *
* Copyright (C) 2016-2021 Gioacchino Mazzurco <gio@altermundi.net> *
* Copyright (C) 2021 Asociación Civil Altermundi <info@altermundi.net> *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as *
@ -1336,22 +1337,26 @@ int RsServer::StartupRetroShare()
mWiki->setNetworkExchangeService(wiki_ns) ;
#endif
/**** Forum GXS service ****/
/************************* Forum GXS service ******************************/
RsGeneralDataService* gxsforums_ds = new RsDataService(currGxsDir + "/", "gxsforums_db",
RS_SERVICE_GXS_TYPE_FORUMS, NULL, rsInitConfig->gxs_passwd);
RsGeneralDataService* gxsforums_ds = new RsDataService(
currGxsDir + "/", "gxsforums_db", RS_SERVICE_GXS_TYPE_FORUMS,
nullptr, rsInitConfig->gxs_passwd );
p3GxsForums* mGxsForums = new p3GxsForums(
gxsforums_ds, nullptr, mGxsIdService );
p3GxsForums *mGxsForums = new p3GxsForums(gxsforums_ds, NULL, mGxsIdService);
RsGxsNetTunnelService* gxsForumsTunnelService = nullptr;
#ifdef RS_DEEP_FORUMS_INDEX
gxsForumsTunnelService = mGxsNetTunnel;
#endif
// create GXS photo service
RsGxsNetService* gxsforums_ns = new RsGxsNetService(
RS_SERVICE_GXS_TYPE_FORUMS, gxsforums_ds, nxsMgr,
mGxsForums, mGxsForums->getServiceInfo(),
mReputations, mGxsCircles,mGxsIdService,
pgpAuxUtils);//,mGxsNetTunnel,true,true,true);
RsGxsNetService* gxsforums_ns = new RsGxsNetService(
RS_SERVICE_GXS_TYPE_FORUMS, gxsforums_ds, nxsMgr, mGxsForums,
mGxsForums->getServiceInfo(), mReputations, mGxsCircles,
mGxsIdService, pgpAuxUtils, gxsForumsTunnelService );
mGxsForums->setNetworkExchangeService(gxsforums_ns);
mGxsForums->setNetworkExchangeService(gxsforums_ns) ;
/**** Channel GXS service ****/
@ -1598,7 +1603,10 @@ int RsServer::StartupRetroShare()
/**************************************************************************/
// Turtle search for GXS services
mGxsNetTunnel->registerSearchableService(gxschannels_ns) ;
mGxsNetTunnel->registerSearchableService(gxschannels_ns);
#ifdef RS_DEEP_FORUMS_INDEX
mGxsNetTunnel->registerSearchableService(gxsforums_ns);
#endif
/**************************************************************************/

View File

@ -4,8 +4,8 @@
* libretroshare: retroshare core library *
* *
* Copyright (C) 2012-2014 Robert Fernie <retroshare@lunamutt.com> *
* Copyright (C) 2018-2020 Gioacchino Mazzurco <gio@eigenlab.org> *
* Copyright (C) 2019-2020 Asociación Civil Altermundi <info@altermundi.net> *
* Copyright (C) 2018-2021 Gioacchino Mazzurco <gio@eigenlab.org> *
* Copyright (C) 2019-2021 Asociación Civil Altermundi <info@altermundi.net> *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as *
@ -59,7 +59,11 @@ p3GxsForums::p3GxsForums( RsGeneralDataService *gds,
RsGenExchange( gds, nes, new RsGxsForumSerialiser(),
RS_SERVICE_GXS_TYPE_FORUMS, gixs, forumsAuthenPolicy()),
RsGxsForums(static_cast<RsGxsIface&>(*this)), mGenToken(0),
mGenActive(false), mGenCount(0), mKnownForumsMutex("GXS forums known forums timestamp cache")
mGenActive(false), mGenCount(0),
mKnownForumsMutex("GXS forums known forums timestamp cache")
#ifdef RS_DEEP_FORUMS_INDEX
, mDeepIndex(DeepForumsIndex::dbDefaultPath())
#endif
{
// Test Data disabled in Repo.
//RsTickEvent::schedule_in(FORUM_TESTEVENT_DUMMYDATA, DUMMYDATA_PERIOD);
@ -190,31 +194,61 @@ RsSerialiser* p3GxsForums::setupSerialiser()
return rss;
}
void p3GxsForums::notifyChanges(std::vector<RsGxsNotify *> &changes)
void p3GxsForums::notifyChanges(std::vector<RsGxsNotify*>& changes)
{
RS_DBG2(changes.size(), " changes to notify");
std::vector<RsGxsNotify *>::iterator it;
for(it = changes.begin(); it != changes.end(); ++it)
for(RsGxsNotify* gxsChange: changes)
{
RsGxsNotify* gxsChange = *it;
// Let the compiler delete the change for us
std::unique_ptr<RsGxsNotify> gxsChangeDeleter(gxsChange);
switch(gxsChange->getType())
{
case RsGxsNotify::TYPE_RECEIVED_NEW: // [[fallthrough]]
case RsGxsNotify::TYPE_PUBLISHED:
{
RsGxsMsgChange* msgChange = dynamic_cast<RsGxsMsgChange*>(*it);
RsGxsGroupChange* groupChange = dynamic_cast<RsGxsGroupChange*>(*it);
auto msgChange = dynamic_cast<RsGxsMsgChange*>(gxsChange);
if(msgChange) /* Message received*/
if(msgChange) /* Message received */
{
auto ev = std::make_shared<RsGxsForumEvent>();
ev->mForumMsgId = msgChange->mMsgId;
ev->mForumGroupId = msgChange->mGroupId;
ev->mForumEventCode = RsForumEventCode::NEW_MESSAGE;
rsEvents->postEvent(ev);
uint8_t msgSubtype = msgChange->mNewMsgItem->PacketSubType();
switch(static_cast<RsGxsForumsItems>(msgSubtype))
{
case RsGxsForumsItems::MESSAGE_ITEM:
{
auto newForumMessageItem =
dynamic_cast<RsGxsForumMsgItem*>(
msgChange->mNewMsgItem );
if(!newForumMessageItem)
{
RS_ERR("Received message change with mNewMsgItem type "
"mismatching or null");
print_stacktrace();
return;
}
#ifdef RS_DEEP_FORUMS_INDEX
RsGxsForumMsg tmpPost = newForumMessageItem->mMsg;
tmpPost.mMeta = newForumMessageItem->meta;
mDeepIndex.indexForumPost(tmpPost);
#endif
auto ev = std::make_shared<RsGxsForumEvent>();
ev->mForumMsgId = msgChange->mMsgId;
ev->mForumGroupId = msgChange->mGroupId;
ev->mForumEventCode = RsForumEventCode::NEW_MESSAGE;
rsEvents->postEvent(ev);
break;
}
default:
RS_WARN("Got unknown gxs message subtype: ", msgSubtype);
break;
}
}
else if(groupChange) /* Group received */
auto groupChange = dynamic_cast<RsGxsGroupChange*>(gxsChange);
if(groupChange) /* Group received */
{
bool unknown;
{
@ -232,9 +266,25 @@ void p3GxsForums::notifyChanges(std::vector<RsGxsNotify *> &changes)
ev->mForumEventCode = RsForumEventCode::NEW_FORUM;
rsEvents->postEvent(ev);
}
else
RS_DBG1( " Not notifying already known forum ",
gxsChange->mGroupId );
#ifdef RS_DEEP_FORUMS_INDEX
uint8_t itemType = groupChange->mNewGroupItem->PacketSubType();
switch(static_cast<RsGxsForumsItems>(itemType))
{
case RsGxsForumsItems::GROUP_ITEM:
{
auto newForumGroupItem =
static_cast<RsGxsForumGroupItem*>(
groupChange->mNewGroupItem );
mDeepIndex.indexForumGroup(newForumGroupItem->mGroup);
break;
}
default:
RS_WARN("Got unknown gxs group subtype: ", itemType);
break;
}
#endif // def RS_DEEP_FORUMS_INDEX
}
break;
}
@ -256,25 +306,31 @@ void p3GxsForums::notifyChanges(std::vector<RsGxsNotify *> &changes)
}
case RsGxsNotify::TYPE_MESSAGE_DELETED:
{
RsGxsMsgDeletedChange* delChange =
dynamic_cast<RsGxsMsgDeletedChange*>(gxsChange);
auto delChange = dynamic_cast<RsGxsMsgDeletedChange*>(gxsChange);
if(!delChange)
{
RS_ERR( "Got mismatching notification type: ",
gxsChange->getType() );
print_stacktrace();
goto cleanup;
break;
}
#ifdef RS_DEEP_FORUMS_INDEX
mDeepIndex.removeForumPostFromIndex(
delChange->mGroupId, delChange->messageId);
#endif
auto ev = std::make_shared<RsGxsForumEvent>();
ev->mForumEventCode = RsForumEventCode::DELETED_POSTS;
ev->mForumEventCode = RsForumEventCode::DELETED_POST;
ev->mForumGroupId = delChange->mGroupId;
ev->mForumMsgId = delChange->messageId;
break;
}
case RsGxsNotify::TYPE_GROUP_DELETED:
{
#ifdef RS_DEEP_FORUMS_INDEX
mDeepIndex.removeForumFromIndex(gxsChange->mGroupId);
#endif
auto ev = std::make_shared<RsGxsForumEvent>();
ev->mForumGroupId = gxsChange->mGroupId;
ev->mForumEventCode = RsForumEventCode::DELETED_FORUM;
@ -299,7 +355,7 @@ void p3GxsForums::notifyChanges(std::vector<RsGxsNotify *> &changes)
* analyse the old and new group in order to detect possible
* notifications for clients */
RsGxsGroupChange* grpChange = dynamic_cast<RsGxsGroupChange*>(*it);
auto grpChange = dynamic_cast<RsGxsGroupChange*>(gxsChange);
RsGxsForumGroupItem* old_forum_grp_item =
dynamic_cast<RsGxsForumGroupItem*>(grpChange->mOldGroupItem);
@ -312,9 +368,13 @@ void p3GxsForums::notifyChanges(std::vector<RsGxsNotify *> &changes)
"mNewGroup not of type RsGxsForumGroupItem or NULL. "
"This is inconsistent!");
print_stacktrace();
goto cleanup;
break;
}
#ifdef RS_DEEP_FORUMS_INDEX
mDeepIndex.indexForumGroup(new_forum_grp_item->mGroup);
#endif
/* First of all, we check if there is a difference between the old
* and new list of moderators */
@ -382,9 +442,6 @@ void p3GxsForums::notifyChanges(std::vector<RsGxsNotify *> &changes)
" Currently not handled." );
break;
}
cleanup:
delete *it;
}
}
@ -1348,6 +1405,254 @@ bool RsGxsForumGroup::canEditPosts(const RsGxsId& id) const
id == mMeta.mAuthorId;
}
std::error_condition p3GxsForums::getContentSummaries(
const RsGxsGroupId& forumId,
const std::set<RsGxsMessageId>& contentIds,
std::vector<RsMsgMetaData>& summaries )
{
uint32_t token;
RsTokReqOptions opts;
opts.mReqType = GXS_REQUEST_TYPE_MSG_META;
GxsMsgReq msgReq;
msgReq[forumId] = contentIds;
if(!requestMsgInfo(token, opts, msgReq))
{
RS_ERR("requestMsgInfo failed");
return std::errc::invalid_argument;
}
switch(waitToken(token, std::chrono::seconds(5)))
{
case RsTokenService::COMPLETE:
{
GxsMsgMetaMap metaMap;
if(!RsGenExchange::getMsgMeta(token, metaMap))
return std::errc::result_out_of_range;
summaries = metaMap[forumId];
return std::error_condition();
}
case RsTokenService::PARTIAL: // [[fallthrough]];
case RsTokenService::PENDING:
return std::errc::timed_out;
default:
return std::errc::not_supported;
}
}
#ifdef RS_DEEP_FORUMS_INDEX
std::error_condition p3GxsForums::handleDistantSearchRequest(
rs_view_ptr<uint8_t> requestData, uint32_t requestSize,
rs_owner_ptr<uint8_t>& resultData, uint32_t& resultSize )
{
RS_DBG1("");
RsGxsForumsSearchRequest request;
{
RsGenericSerializer::SerializeContext ctx(requestData, requestSize);
RsGenericSerializer::SerializeJob j =
RsGenericSerializer::SerializeJob::DESERIALIZE;
RS_SERIAL_PROCESS(request);
}
if(request.mType != RsGxsForumsItems::SEARCH_REQUEST)
{
// If more types are implemented we would put a switch on mType instead
RS_WARN( "Got search request with unkown type: ",
static_cast<uint32_t>(request.mType) );
return std::errc::bad_message;
}
RsGxsForumsSearchReply reply;
auto mErr = prepareSearchResults(request.mQuery, true, reply.mResults);
if(mErr || reply.mResults.empty()) return mErr;
{
RsGenericSerializer::SerializeContext ctx;
RsGenericSerializer::SerializeJob j =
RsGenericSerializer::SerializeJob::SIZE_ESTIMATE;
RS_SERIAL_PROCESS(reply);
resultSize = ctx.mOffset;
}
resultData = rs_malloc<uint8_t>(resultSize);
RsGenericSerializer::SerializeContext ctx(resultData, resultSize);
RsGenericSerializer::SerializeJob j =
RsGenericSerializer::SerializeJob::SERIALIZE;
RS_SERIAL_PROCESS(reply);
return std::error_condition();
}
std::error_condition p3GxsForums::distantSearchRequest(
const std::string& matchString, TurtleRequestId& searchId )
{
RsGxsForumsSearchRequest request;
request.mQuery = matchString;
uint32_t requestSize;
{
RsGenericSerializer::SerializeContext ctx;
RsGenericSerializer::SerializeJob j =
RsGenericSerializer::SerializeJob::SIZE_ESTIMATE;
RS_SERIAL_PROCESS(request);
requestSize = ctx.mOffset;
}
std::error_condition ec;
auto requestData = rs_malloc<uint8_t>(requestSize, &ec);
if(!requestData) return ec;
{
RsGenericSerializer::SerializeContext ctx(requestData, requestSize);
RsGenericSerializer::SerializeJob j =
RsGenericSerializer::SerializeJob::SERIALIZE;
RS_SERIAL_PROCESS(request);
}
return netService()->distantSearchRequest(
requestData, requestSize,
static_cast<RsServiceType>(serviceType()), searchId );
}
std::error_condition p3GxsForums::localSearch(
const std::string& matchString,
std::vector<RsGxsSearchResult>& searchResults )
{ return prepareSearchResults(matchString, false, searchResults); }
std::error_condition p3GxsForums::prepareSearchResults(
const std::string& matchString, bool publicOnly,
std::vector<RsGxsSearchResult>& searchResults )
{
std::vector<DeepForumsSearchResult> results;
auto mErr = mDeepIndex.search(matchString, results);
if(mErr) return mErr;
searchResults.clear();
for(auto uRes: results)
{
RsUrl resUrl(uRes.mUrl);
const auto forumIdStr = resUrl.getQueryV(RsGxsForums::FORUM_URL_ID_FIELD);
if(!forumIdStr)
{
RS_ERR( "Forum URL retrieved from deep index miss ID. ",
"Should never happen! ", uRes.mUrl );
print_stacktrace();
return std::errc::address_not_available;
}
std::vector<RsGxsForumGroup> forumsInfo;
RsGxsGroupId forumId(*forumIdStr);
if(forumId.isNull())
{
RS_ERR( "Forum ID retrieved from deep index is invalid. ",
"Should never happen! ", uRes.mUrl );
print_stacktrace();
return std::errc::bad_address;
}
if( !getForumsInfo(std::list<RsGxsGroupId>{forumId}, forumsInfo) ||
forumsInfo.empty() )
{
RS_ERR( "Forum just parsed from deep index link not found. "
"Should never happen! ", forumId, " ", uRes.mUrl );
print_stacktrace();
return std::errc::identifier_removed;
}
RsGroupMetaData& fMeta(forumsInfo[0].mMeta);
// Avoid leaking sensitive information to unkown peers
if( publicOnly &&
!(fMeta.mGroupFlags & GXS_SERV::FLAG_PRIVACY_PUBLIC) ) continue;
RsGxsSearchResult res;
res.mGroupId = forumId;
res.mGroupName = fMeta.mGroupName;
res.mAuthorId = fMeta.mAuthorId;
res.mPublishTs = fMeta.mPublishTs;
res.mSearchContext = uRes.mSnippet;
auto postIdStr =
resUrl.getQueryV(RsGxsForums::FORUM_URL_MSG_ID_FIELD);
if(postIdStr)
{
RsGxsMessageId msgId(*postIdStr);
if(msgId.isNull())
{
RS_ERR( "Post just parsed from deep index link is invalid. "
"Should never happen! ", postIdStr, " ", uRes.mUrl );
print_stacktrace();
return std::errc::bad_address;
}
std::vector<RsMsgMetaData> msgSummaries;
auto errc = getContentSummaries(
forumId, std::set<RsGxsMessageId>{msgId}, msgSummaries);
if(errc) return errc;
if(msgSummaries.size() != 1)
{
RS_ERR( "getContentSummaries returned: ", msgSummaries.size(),
"should never happen!" );
return std::errc::result_out_of_range;
}
RsMsgMetaData& msgMeta(msgSummaries[0]);
res.mMsgId = msgMeta.mMsgId;
res.mMsgName = msgMeta.mMsgName;
res.mAuthorId = msgMeta.mAuthorId;
}
RS_DBG4(res);
searchResults.push_back(res);
}
return std::error_condition();
}
std::error_condition p3GxsForums::receiveDistantSearchResult(
const TurtleRequestId requestId,
rs_owner_ptr<uint8_t>& resultData, uint32_t& resultSize )
{
RsGxsForumsSearchReply reply;
{
RsGenericSerializer::SerializeContext ctx(resultData, resultSize);
RsGenericSerializer::SerializeJob j =
RsGenericSerializer::SerializeJob::DESERIALIZE;
RS_SERIAL_PROCESS(reply);
}
free(resultData);
if(reply.mType != RsGxsForumsItems::SEARCH_REPLY)
{
// If more types are implemented we would put a switch on mType instead
RS_WARN( "Got search request with unkown type: ",
static_cast<uint32_t>(reply.mType) );
return std::errc::bad_message;
}
auto event = std::make_shared<RsGxsForumsDistantSearchEvent>();
event->mSearchId = requestId;
event->mSearchResults = reply.mResults;
rsEvents->postEvent(event);
return std::error_condition();
}
#else // def RS_DEEP_FORUMS_INDEX
std::error_condition p3GxsForums::distantSearchRequest(
const std::string&, TurtleRequestId& )
{ return std::errc::function_not_supported; }
std::error_condition p3GxsForums::localSearch(
const std::string&,
std::vector<RsGxsSearchResult>& )
{ return std::errc::function_not_supported; }
#endif // def RS_DEEP_FORUMS_INDEX
/*static*/ const std::string RsGxsForums::DEFAULT_FORUM_BASE_URL =
"retroshare:///forums";
/*static*/ const std::string RsGxsForums::FORUM_URL_NAME_FIELD =

View File

@ -4,8 +4,8 @@
* libretroshare: retroshare core library *
* *
* Copyright (C) 2012-2014 Robert Fernie <retroshare@lunamutt.com> *
* Copyright (C) 2018-2020 Gioacchino Mazzurco <gio@eigenlab.org> *
* Copyright (C) 2019-2020 Asociación Civil Altermundi <info@altermundi.net> *
* Copyright (C) 2018-2021 Gioacchino Mazzurco <gio@eigenlab.org> *
* Copyright (C) 2019-2021 Asociación Civil Altermundi <info@altermundi.net> *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as *
@ -32,6 +32,10 @@
#include "util/rstickevent.h"
#include "util/rsdebug.h"
#ifdef RS_DEEP_FORUMS_INDEX
#include "deep_search/forumsindex.hpp"
#endif
class p3GxsForums: public RsGenExchange, public RsGxsForums, public p3Config,
public RsTickEvent /* only needed for testing - remove after */
@ -142,7 +146,34 @@ public:
/// @see RsGxsForums
std::error_condition setPostKeepForever(
const RsGxsGroupId& forumId, const RsGxsMessageId& postId,
bool keepForever ) override;
bool keepForever ) override;
/// @see RsGxsForums
std::error_condition getContentSummaries(
const RsGxsGroupId& forumId,
const std::set<RsGxsMessageId>& contentIds,
std::vector<RsMsgMetaData>& summaries ) override;
/// @see RsGxsForums
std::error_condition distantSearchRequest(
const std::string& matchString, TurtleRequestId& searchId ) override;
/// @see RsGxsForums
std::error_condition localSearch(
const std::string& matchString,
std::vector<RsGxsSearchResult>& searchResults ) override;
#ifdef RS_DEEP_FORUMS_INDEX
/// @see RsNxsObserver
std::error_condition handleDistantSearchRequest(
rs_view_ptr<uint8_t> requestData, uint32_t requestSize,
rs_owner_ptr<uint8_t>& resultData, uint32_t& resultSize ) override;
/// @see RsNxsObserver
std::error_condition receiveDistantSearchResult(
const TurtleRequestId requestId,
rs_owner_ptr<uint8_t>& resultData, uint32_t& resultSize ) override;
#endif
/// implementation of rsGxsGorums
///
@ -155,6 +186,17 @@ public:
bool getMsgMetaData(const uint32_t &token, GxsMsgMetaMap& msg_metas) ;
protected:
#ifdef RS_DEEP_FORUMS_INDEX
/** Internal usage
* @param[in] publicOnly if true is passed only results pertaining to
* publicly shared forums are returned
*/
std::error_condition prepareSearchResults(
const std::string& matchString, bool publicOnly,
std::vector<RsGxsSearchResult>& searchResults );
#endif //def RS_DEEP_FORUMS_INDEX
private:
static uint32_t forumsAuthenPolicy();
@ -189,4 +231,8 @@ bool generateGroup(uint32_t &token, std::string groupName);
std::map<RsGxsGroupId,rstime_t> mKnownForums ;
RsMutex mKnownForumsMutex;
#ifdef RS_DEEP_FORUMS_INDEX
DeepForumsIndex mDeepIndex;
#endif
};

View File

@ -19,23 +19,25 @@
* along with this program. If not, see <https://www.gnu.org/licenses/>. *
* *
*******************************************************************************/
// This class is the parent class for any service that will use the turtle router to distribute its packets.
// Typical representative clients include:
//
// p3ChatService: opens tunnels to distant peers for chatting
// ftServer: searches and open tunnels to distant sources for file transfer
//
#pragma once
#include <string>
#include <stdlib.h>
#include <serialiser/rsserial.h>
#include <turtle/rsturtleitem.h>
#include "serialiser/rsserial.h"
#include "turtle/rsturtleitem.h"
#include "util/rsdebug.h"
struct RsItem;
class p3turtle ;
/** This class is the parent class for any service that will use the turtle
* router to distribute its packets.
* Typical representative clients include:
* p3ChatService: opens tunnels to distant peers for chatting
* ftServer: searches and open tunnels to distant sources for file
* transfer
*/
class RsTurtleClientService
{
public:
@ -87,30 +89,35 @@ class RsTurtleClientService
std::cerr << "!!!!!! Received Data from turtle router, but the client service is not handling it !!!!!!!!!!" << std::endl ;
}
/*!
* \brief receiveSearchRequest
* This method is called by the turtle router to notify the client of a search request in the form generic data. The returned
* result contains the serialised generic result returned by the client.
*
* The turtle router keeps the memory ownership over search_request_data
*
* \param search_request_data generic serialized search data
* \param search_request_data_len length of the serialized search data
* \param search_result_data generic serialized search result data
* \param search_result_data_len length of the serialized search result data
* \param max_allowed_hits max number of hits allowed to be sent back and forwarded
*
* \return true if the search is successful.
*/
virtual bool receiveSearchRequest(unsigned char */*search_request_data*/,
uint32_t /*search_request_data_len*/,
unsigned char *& /*search_result_data*/,
uint32_t& /*search_result_data_len*/,
uint32_t& /* max_allows_hits */)
{
std::cerr << "!!!!!! Received search result from turtle router, but the client service who requested it is not handling it !!!!!!!!!!" << std::endl ;
return false;
}
/*!
* This method is called by the turtle router to notify the client of a
* search request in the form generic data.
* The returned result contains the serialised generic result returned by the
* client service.
* The turtle router keeps the memory ownership over search_request_data
* \param search_request_data generic serialized search data
* \param search_request_data_len length of the serialized search data
* \param search_result_data generic serialized search result data
* \param search_result_data_len length of the serialized search result data
* \param max_allowed_hits max number of hits allowed to be sent back and
* forwarded
* \return true if matching results are available, false otherwise.
*/
virtual bool receiveSearchRequest(
unsigned char *search_request_data, uint32_t search_request_data_len,
unsigned char *& search_result_data, uint32_t& search_result_data_len,
uint32_t& max_allows_hits )
{
/* Suppress unused warning this way and not commenting the param names
* so doxygen match documentation against params */
(void) search_request_data; (void) search_request_data_len;
(void) search_result_data; (void) search_result_data_len;
(void) max_allows_hits;
RS_WARN( "Received search request from turtle router, but the client "
"is not handling it!" );
return false;
}
/*!
* \brief receiveSearchResult

View File

@ -141,6 +141,11 @@ rs_macos10.15:CONFIG -= rs_macos10.11
CONFIG *= no_rs_jsonapi
rs_jsonapi:CONFIG -= no_rs_jsonapi
# To enable forums indexing append the following assignation to qmake command
# line "CONFIG+=rs_deep_forums_index"
CONFIG *= no_rs_deep_forums_index
rs_deep_forums_index:CONFIG -= no_rs_deep_forums_index
# To enable channel indexing append the following assignation to qmake command
# line "CONFIG+=rs_deep_channels_index"
CONFIG *= no_rs_deep_channels_index
@ -561,6 +566,7 @@ rs_webui {
DEFINES *= RS_WEBUI
}
rs_deep_forums_index:DEFINES *= RS_DEEP_FORUMS_INDEX
rs_deep_channels_index:DEFINES *= RS_DEEP_CHANNEL_INDEX
rs_deep_files_index:DEFINES *= RS_DEEP_FILES_INDEX