2018-11-10 08:45:05 -05:00
|
|
|
/*******************************************************************************
|
|
|
|
* libretroshare/src/crypto: crypto.h *
|
|
|
|
* *
|
|
|
|
* libretroshare: retroshare core library *
|
|
|
|
* *
|
|
|
|
* Copyright (C) 2018 Gioacchino Mazzurco <gio@eigenlab.org> *
|
|
|
|
* *
|
|
|
|
* This program is free software: you can redistribute it and/or modify *
|
|
|
|
* it under the terms of the GNU Lesser General Public License as *
|
|
|
|
* published by the Free Software Foundation, either version 3 of the *
|
|
|
|
* License, or (at your option) any later version. *
|
|
|
|
* *
|
|
|
|
* This program is distributed in the hope that it will be useful, *
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
|
|
|
|
* GNU Lesser General Public License for more details. *
|
|
|
|
* *
|
|
|
|
* You should have received a copy of the GNU Lesser General Public License *
|
|
|
|
* along with this program. If not, see <https://www.gnu.org/licenses/>. *
|
|
|
|
* *
|
|
|
|
*******************************************************************************/
|
2018-06-09 12:06:14 -04:00
|
|
|
#pragma once
|
|
|
|
|
2018-10-06 19:34:05 -04:00
|
|
|
#include "util/rstime.h"
|
2018-06-10 13:04:11 -04:00
|
|
|
#include <vector>
|
2018-06-09 12:06:14 -04:00
|
|
|
#include <xapian.h>
|
|
|
|
|
|
|
|
#include "retroshare/rsgxschannels.h"
|
2018-06-12 08:12:08 -04:00
|
|
|
#include "retroshare/rsinit.h"
|
2018-07-04 06:08:50 -04:00
|
|
|
#include "util/rsurl.h"
|
2018-06-09 12:06:14 -04:00
|
|
|
|
2018-07-21 07:20:50 -04:00
|
|
|
#ifndef XAPIAN_AT_LEAST
|
|
|
|
#define XAPIAN_AT_LEAST(A,B,C) (XAPIAN_MAJOR_VERSION > (A) || \
|
|
|
|
(XAPIAN_MAJOR_VERSION == (A) && \
|
|
|
|
(XAPIAN_MINOR_VERSION > (B) || \
|
|
|
|
(XAPIAN_MINOR_VERSION == (B) && XAPIAN_REVISION >= (C)))))
|
|
|
|
#endif // ndef XAPIAN_AT_LEAST
|
|
|
|
|
2018-06-09 12:06:14 -04:00
|
|
|
struct DeepSearch
|
|
|
|
{
|
2018-06-10 13:04:11 -04:00
|
|
|
struct SearchResult
|
|
|
|
{
|
|
|
|
std::string mUrl;
|
|
|
|
std::string mSnippet;
|
|
|
|
};
|
|
|
|
|
|
|
|
/**
|
2018-07-24 18:27:39 -04:00
|
|
|
* @param[in] maxResults maximum number of acceptable search results, 0 for
|
|
|
|
* no limits
|
2018-06-10 13:04:11 -04:00
|
|
|
* @return search results count
|
|
|
|
*/
|
|
|
|
static uint32_t search( const std::string& queryStr,
|
|
|
|
std::vector<SearchResult>& results,
|
|
|
|
uint32_t maxResults = 100 )
|
|
|
|
{
|
|
|
|
results.clear();
|
|
|
|
|
2018-07-24 18:27:39 -04:00
|
|
|
Xapian::Database db;
|
|
|
|
|
2018-06-10 13:04:11 -04:00
|
|
|
// Open the database we're going to search.
|
2018-07-24 18:27:39 -04:00
|
|
|
try { db = Xapian::Database(dbPath()); }
|
|
|
|
catch(Xapian::DatabaseOpeningError e)
|
|
|
|
{
|
|
|
|
std::cerr << __PRETTY_FUNCTION__ << " " << e.get_msg()
|
|
|
|
<< ", probably nothing has been indexed yet."<< std::endl;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
catch(Xapian::DatabaseError e)
|
|
|
|
{
|
|
|
|
std::cerr << __PRETTY_FUNCTION__ << " " << e.get_msg()
|
|
|
|
<< " this is fishy, maybe " << dbPath()
|
|
|
|
<< " has been corrupted (deleting it may help in that "
|
|
|
|
<< "case without loosing data)" << std::endl;
|
|
|
|
return 0;
|
|
|
|
}
|
2018-06-10 13:04:11 -04:00
|
|
|
|
|
|
|
// Set up a QueryParser with a stemmer and suitable prefixes.
|
|
|
|
Xapian::QueryParser queryparser;
|
|
|
|
//queryparser.set_stemmer(Xapian::Stem("en"));
|
|
|
|
queryparser.set_stemming_strategy(queryparser.STEM_SOME);
|
|
|
|
// Start of prefix configuration.
|
|
|
|
//queryparser.add_prefix("title", "S");
|
|
|
|
//queryparser.add_prefix("description", "XD");
|
|
|
|
// End of prefix configuration.
|
|
|
|
|
|
|
|
// And parse the query.
|
|
|
|
Xapian::Query query = queryparser.parse_query(queryStr);
|
|
|
|
|
|
|
|
// Use an Enquire object on the database to run the query.
|
|
|
|
Xapian::Enquire enquire(db);
|
|
|
|
enquire.set_query(query);
|
|
|
|
|
2018-07-24 18:27:39 -04:00
|
|
|
Xapian::MSet mset = enquire.get_mset(
|
|
|
|
0, maxResults ? maxResults : db.get_doccount() );
|
2018-06-10 13:04:11 -04:00
|
|
|
|
|
|
|
for ( Xapian::MSetIterator m = mset.begin(); m != mset.end(); ++m )
|
|
|
|
{
|
|
|
|
const Xapian::Document& doc = m.get_document();
|
|
|
|
SearchResult s;
|
|
|
|
s.mUrl = doc.get_value(URL_VALUENO);
|
2018-07-21 07:20:50 -04:00
|
|
|
#if XAPIAN_AT_LEAST(1,3,5)
|
2018-06-10 13:04:11 -04:00
|
|
|
s.mSnippet = mset.snippet(doc.get_data());
|
2018-07-21 07:20:50 -04:00
|
|
|
#endif // XAPIAN_AT_LEAST(1,3,5)
|
2018-06-10 13:04:11 -04:00
|
|
|
results.push_back(s);
|
|
|
|
}
|
|
|
|
|
|
|
|
return results.size();
|
|
|
|
}
|
2018-06-09 12:06:14 -04:00
|
|
|
|
|
|
|
|
|
|
|
static void indexChannelGroup(const RsGxsChannelGroup& chan)
|
|
|
|
{
|
2018-06-12 08:12:08 -04:00
|
|
|
Xapian::WritableDatabase db(dbPath(), Xapian::DB_CREATE_OR_OPEN);
|
2018-06-09 12:06:14 -04:00
|
|
|
|
|
|
|
// Set up a TermGenerator that we'll use in indexing.
|
|
|
|
Xapian::TermGenerator termgenerator;
|
|
|
|
//termgenerator.set_stemmer(Xapian::Stem("en"));
|
|
|
|
|
|
|
|
// We make a document and tell the term generator to use this.
|
|
|
|
Xapian::Document doc;
|
|
|
|
termgenerator.set_document(doc);
|
|
|
|
|
|
|
|
// Index each field with a suitable prefix.
|
|
|
|
termgenerator.index_text(chan.mMeta.mGroupName, 1, "G");
|
2018-07-23 05:18:32 -04:00
|
|
|
termgenerator.index_text(timetToXapianDate(chan.mMeta.mPublishTs), 1, "D");
|
2018-06-09 12:06:14 -04:00
|
|
|
termgenerator.index_text(chan.mDescription, 1, "XD");
|
|
|
|
|
|
|
|
// Index fields without prefixes for general search.
|
|
|
|
termgenerator.index_text(chan.mMeta.mGroupName);
|
|
|
|
termgenerator.increase_termpos();
|
|
|
|
termgenerator.index_text(chan.mDescription);
|
|
|
|
|
2018-07-04 06:08:50 -04:00
|
|
|
RsUrl chanUrl; chanUrl
|
|
|
|
.setScheme("retroshare").setPath("/channel")
|
|
|
|
.setQueryKV("id", chan.mMeta.mGroupId.toStdString());
|
|
|
|
const std::string idTerm("Q" + chanUrl.toString());
|
|
|
|
|
2018-07-23 05:18:32 -04:00
|
|
|
chanUrl.setQueryKV("publishTs", std::to_string(chan.mMeta.mPublishTs));
|
2018-07-04 06:08:50 -04:00
|
|
|
chanUrl.setQueryKV("name", chan.mMeta.mGroupName);
|
2018-07-22 15:33:40 -04:00
|
|
|
if(!chan.mMeta.mAuthorId.isNull())
|
|
|
|
chanUrl.setQueryKV("authorId", chan.mMeta.mAuthorId.toStdString());
|
|
|
|
if(chan.mMeta.mSignFlags)
|
|
|
|
chanUrl.setQueryKV( "signFlags",
|
|
|
|
std::to_string(chan.mMeta.mSignFlags) );
|
2018-07-04 06:08:50 -04:00
|
|
|
std::string rsLink(chanUrl.toString());
|
2018-06-10 13:04:11 -04:00
|
|
|
|
|
|
|
// store the RS link so we are able to retrive it on matching search
|
|
|
|
doc.add_value(URL_VALUENO, rsLink);
|
|
|
|
|
|
|
|
// Store some fields for display purposes.
|
|
|
|
doc.set_data(chan.mMeta.mGroupName + "\n" + chan.mDescription);
|
|
|
|
|
2018-06-09 12:06:14 -04:00
|
|
|
// We use the identifier to ensure each object ends up in the
|
|
|
|
// database only once no matter how many times we run the
|
2018-06-10 13:04:11 -04:00
|
|
|
// indexer. "Q" prefix is a Xapian convention for unique id term.
|
2018-06-09 12:06:14 -04:00
|
|
|
doc.add_boolean_term(idTerm);
|
|
|
|
db.replace_document(idTerm, doc);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void removeChannelFromIndex(RsGxsGroupId grpId)
|
|
|
|
{
|
2018-06-10 13:04:11 -04:00
|
|
|
// "Q" prefix is a Xapian convention for unique id term.
|
2018-07-04 06:08:50 -04:00
|
|
|
RsUrl chanUrl; chanUrl
|
|
|
|
.setScheme("retroshare").setPath("/channel")
|
|
|
|
.setQueryKV("id", grpId.toStdString());
|
|
|
|
std::string idTerm("Q" + chanUrl.toString());
|
2018-06-09 12:06:14 -04:00
|
|
|
|
2018-06-12 08:12:08 -04:00
|
|
|
Xapian::WritableDatabase db(dbPath(), Xapian::DB_CREATE_OR_OPEN);
|
2018-06-09 12:06:14 -04:00
|
|
|
db.delete_document(idTerm);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void indexChannelPost(const RsGxsChannelPost& post)
|
|
|
|
{
|
2018-06-12 08:12:08 -04:00
|
|
|
Xapian::WritableDatabase db(dbPath(), Xapian::DB_CREATE_OR_OPEN);
|
2018-06-09 12:06:14 -04:00
|
|
|
|
|
|
|
// Set up a TermGenerator that we'll use in indexing.
|
|
|
|
Xapian::TermGenerator termgenerator;
|
|
|
|
//termgenerator.set_stemmer(Xapian::Stem("en"));
|
|
|
|
|
|
|
|
// We make a document and tell the term generator to use this.
|
|
|
|
Xapian::Document doc;
|
|
|
|
termgenerator.set_document(doc);
|
|
|
|
|
|
|
|
// Index each field with a suitable prefix.
|
|
|
|
termgenerator.index_text(post.mMeta.mMsgName, 1, "S");
|
2018-07-23 05:18:32 -04:00
|
|
|
termgenerator.index_text(timetToXapianDate(post.mMeta.mPublishTs), 1, "D");
|
2018-07-04 06:08:50 -04:00
|
|
|
|
|
|
|
// Avoid indexing HTML
|
|
|
|
bool isPlainMsg = post.mMsg[0] != '<' || post.mMsg[post.mMsg.size() - 1] != '>';
|
|
|
|
|
|
|
|
if(isPlainMsg)
|
|
|
|
termgenerator.index_text(post.mMsg, 1, "XD");
|
2018-06-09 12:06:14 -04:00
|
|
|
|
|
|
|
// Index fields without prefixes for general search.
|
|
|
|
termgenerator.index_text(post.mMeta.mMsgName);
|
2018-07-04 06:08:50 -04:00
|
|
|
if(isPlainMsg)
|
|
|
|
{
|
|
|
|
termgenerator.increase_termpos();
|
|
|
|
termgenerator.index_text(post.mMsg);
|
|
|
|
}
|
|
|
|
|
|
|
|
for(const RsGxsFile& attachment : post.mFiles)
|
|
|
|
{
|
|
|
|
termgenerator.index_text(attachment.mName, 1, "F");
|
|
|
|
|
|
|
|
termgenerator.increase_termpos();
|
|
|
|
termgenerator.index_text(attachment.mName);
|
|
|
|
}
|
2018-06-09 12:06:14 -04:00
|
|
|
|
|
|
|
// We use the identifier to ensure each object ends up in the
|
|
|
|
// database only once no matter how many times we run the
|
|
|
|
// indexer.
|
2018-07-04 06:08:50 -04:00
|
|
|
RsUrl postUrl; postUrl
|
|
|
|
.setScheme("retroshare").setPath("/channel")
|
|
|
|
.setQueryKV("id", post.mMeta.mGroupId.toStdString())
|
|
|
|
.setQueryKV("msgid", post.mMeta.mMsgId.toStdString());
|
|
|
|
std::string idTerm("Q" + postUrl.toString());
|
|
|
|
|
2018-07-23 05:18:32 -04:00
|
|
|
postUrl.setQueryKV("publishTs", std::to_string(post.mMeta.mPublishTs));
|
2018-07-04 06:08:50 -04:00
|
|
|
postUrl.setQueryKV("name", post.mMeta.mMsgName);
|
2018-07-23 05:18:32 -04:00
|
|
|
postUrl.setQueryKV("authorId", post.mMeta.mAuthorId.toStdString());
|
2018-07-04 06:08:50 -04:00
|
|
|
std::string rsLink(postUrl.toString());
|
|
|
|
|
|
|
|
// store the RS link so we are able to retrive it on matching search
|
|
|
|
doc.add_value(URL_VALUENO, rsLink);
|
|
|
|
|
|
|
|
// Store some fields for display purposes.
|
|
|
|
if(isPlainMsg)
|
|
|
|
doc.set_data(post.mMeta.mMsgName + "\n" + post.mMsg);
|
|
|
|
else doc.set_data(post.mMeta.mMsgName);
|
|
|
|
|
2018-06-09 12:06:14 -04:00
|
|
|
doc.add_boolean_term(idTerm);
|
|
|
|
db.replace_document(idTerm, doc);
|
|
|
|
}
|
|
|
|
|
2018-07-04 06:08:50 -04:00
|
|
|
static void removeChannelPostFromIndex(
|
|
|
|
RsGxsGroupId grpId, RsGxsMessageId msgId )
|
|
|
|
{
|
|
|
|
RsUrl postUrl; postUrl
|
|
|
|
.setScheme("retroshare").setPath("/channel")
|
|
|
|
.setQueryKV("id", grpId.toStdString())
|
|
|
|
.setQueryKV("msgid", msgId.toStdString());
|
|
|
|
// "Q" prefix is a Xapian convention for unique id term.
|
|
|
|
std::string idTerm("Q" + postUrl.toString());
|
|
|
|
|
|
|
|
Xapian::WritableDatabase db(dbPath(), Xapian::DB_CREATE_OR_OPEN);
|
|
|
|
db.delete_document(idTerm);
|
|
|
|
}
|
|
|
|
|
2018-06-10 13:04:11 -04:00
|
|
|
private:
|
|
|
|
|
|
|
|
enum : Xapian::valueno
|
|
|
|
{
|
|
|
|
/// Used to store retroshare url of indexed documents
|
|
|
|
URL_VALUENO,
|
|
|
|
|
|
|
|
/// @see Xapian::BAD_VALUENO
|
|
|
|
BAD_VALUENO = Xapian::BAD_VALUENO
|
|
|
|
};
|
|
|
|
|
2018-06-12 08:12:08 -04:00
|
|
|
static const std::string& dbPath()
|
|
|
|
{
|
|
|
|
static const std::string dbDir =
|
|
|
|
RsAccounts::AccountDirectory() + "/deep_search_xapian_db";
|
|
|
|
return dbDir;
|
|
|
|
}
|
2018-07-23 05:18:32 -04:00
|
|
|
|
2018-10-06 19:34:05 -04:00
|
|
|
static std::string timetToXapianDate(const rstime_t& time)
|
2018-07-23 05:18:32 -04:00
|
|
|
{
|
|
|
|
char date[] = "YYYYMMDD\0";
|
2018-10-17 22:21:23 -04:00
|
|
|
time_t tTime = static_cast<time_t>(time);
|
|
|
|
std::strftime(date, 9, "%Y%m%d", std::gmtime(&tTime));
|
2018-07-23 05:18:32 -04:00
|
|
|
return date;
|
|
|
|
}
|
2018-06-09 12:06:14 -04:00
|
|
|
};
|
|
|
|
|