diff --git a/libretroshare/src/deep_search/channelsindex.hpp b/libretroshare/src/deep_search/channelsindex.hpp index e9e015b7b..0a49629d9 100644 --- a/libretroshare/src/deep_search/channelsindex.hpp +++ b/libretroshare/src/deep_search/channelsindex.hpp @@ -71,7 +71,7 @@ private: static const std::string& dbPath() { static const std::string dbDir = - RsAccounts::AccountDirectory() + "/deep_search_xapian_db"; + RsAccounts::AccountDirectory() + "/deep_channels_xapian_db"; return dbDir; } }; diff --git a/libretroshare/src/deep_search/filesflacindexer.hpp b/libretroshare/src/deep_search/filesflacindexer.hpp new file mode 100644 index 000000000..dbdc62b99 --- /dev/null +++ b/libretroshare/src/deep_search/filesflacindexer.hpp @@ -0,0 +1,156 @@ +/******************************************************************************* + * RetroShare full text indexing and search implementation based on Xapian * + * * + * Copyright (C) 2018-2019 Gioacchino Mazzurco * + * Copyright (C) 2019 AsociaciĆ³n Civil Altermundi * + * * + * This program is free software: you can redistribute it and/or modify * + * it under the terms of the GNU Affero General Public License version 3 as * + * published by the Free Software Foundation. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU Affero General Public License for more details. * + * * + * You should have received a copy of the GNU Affero General Public License * + * along with this program. If not, see . * + * * + *******************************************************************************/ + +#include "deep_search/filesindex.hpp" +#include "util/rsdebug.h" + +#include +#include +#include +#include +#include + +struct RsDeepFlacFileIndexer +{ + RsDeepFlacFileIndexer() + { + DeepFilesIndex::registerIndexer(31, indexFlacFile); + } + + static uint32_t indexFlacFile( + const std::string& path, const std::string& /*name*/, + Xapian::TermGenerator& xTG, Xapian::Document& xDoc ) + { + Dbg3() << __PRETTY_FUNCTION__ << " " << path << std::endl; + + using FlacChain = FLAC::Metadata::Chain; + std::unique_ptr flacChain(new FlacChain); + + if(!flacChain->is_valid()) + { + RsErr() << __PRETTY_FUNCTION__ << " Failed creating FLAC Chain 1" + << std::endl; + return 1; + } + + if(!flacChain->read(path.c_str(), false)) + { + Dbg3() << __PRETTY_FUNCTION__ << " Failed to open the file as FLAC" + << std::endl; + + flacChain.reset(new FlacChain); + if(!flacChain->is_valid()) + { + RsErr() << __PRETTY_FUNCTION__ + << " Failed creating FLAC Chain 2" << std::endl; + return 1; + } + if(!flacChain->read(path.c_str(), true)) + { + Dbg3() << __PRETTY_FUNCTION__ + << " Failed to open the file as OggFLAC" + << std::endl; + return 0; + } + } + + unsigned validCommentsCnt = 0; + std::string docData = xDoc.get_data(); + + FLAC::Metadata::Iterator mdit; + mdit.init(*flacChain); + if(!mdit.is_valid()) return 1; + + do + { + ::FLAC__MetadataType mdt = mdit.get_block_type(); + if (mdt != FLAC__METADATA_TYPE_VORBIS_COMMENT) continue; + + Dbg2() << __PRETTY_FUNCTION__ << " Found Vorbis Comment Block" + << std::endl; + + std::unique_ptr proto(mdit.get_block()); + if(!proto) continue; + + const FLAC::Metadata::VorbisComment* vc = + dynamic_cast(proto.get()); + if(!vc || !vc->is_valid()) continue; + + unsigned numComments = vc->get_num_comments(); + for(unsigned i = 0; i < numComments; ++i) + { + FLAC::Metadata::VorbisComment::Entry entry = + vc->get_comment(i); + if(!entry.is_valid()) continue; + + std::string tagName( entry.get_field_name(), + entry.get_field_name_length() ); + + /* Vorbis tags should be uppercases but not all the softwares + * enforce it */ + for (auto& c: tagName) c = static_cast(toupper(c)); + + std::string tagValue( entry.get_field_value(), + entry.get_field_value_length() ); + + if(tagValue.empty()) continue; + + if(tagName == "ARTIST") + xTG.index_text(tagValue, 1, "A"); + else if (tagName == "DESCRIPTION") + xTG.index_text(tagValue, 1, "XD"); + else if (tagName == "TITLE") + xTG.index_text(tagValue, 1, "S"); + else if(tagName.find("COVERART") != tagName.npos) + continue; // Avoid polluting the index with binary data + else if (tagName.find("METADATA_BLOCK_PICTURE") != tagName.npos) + continue; // Avoid polluting the index with binary data + + // Index fields without prefixes for general search. + xTG.increase_termpos(); + std::string fullComment(tagName + "=" + tagValue); + xTG.index_text(fullComment); + docData += fullComment + "\n"; + + Dbg2() << __PRETTY_FUNCTION__ << " Indexed " << fullComment + << std::endl; + + ++validCommentsCnt; + } + } + while(mdit.next()); + + if(validCommentsCnt > 0) + { + Dbg1() << __PRETTY_FUNCTION__ << " Successfully indexed: " << path + << std::endl; + + xDoc.set_data(docData); + return 99; + } + + /* Altought the file appears to be a valid FLAC, no vorbis comment has + * been found so return less then 50 maybe it has tagged only with ID3 + * tags ? */ + return 30; + } + + RS_SET_CONTEXT_DEBUG_LEVEL(3) +}; diff --git a/libretroshare/src/deep_search/filesindex.cpp b/libretroshare/src/deep_search/filesindex.cpp index f59069a39..3edcf9a97 100644 --- a/libretroshare/src/deep_search/filesindex.cpp +++ b/libretroshare/src/deep_search/filesindex.cpp @@ -22,6 +22,7 @@ #include "deep_search/commonutils.hpp" #include "util/rsdebug.h" #include "retroshare/rsinit.h" +#include "retroshare/rsversion.h" #include @@ -37,31 +38,48 @@ bool DeepFilesIndex::indexFile( if(!dbPtr) return false; Xapian::WritableDatabase& db(*dbPtr); - if(db.term_exists("Q" + hash.toStdString())) + const std::string hashString = hash.toStdString(); + const std::string idTerm("Q" + hashString); + + Xapian::Document oldDoc; + Xapian::PostingIterator pIt = db.postlist_begin(idTerm); + if( pIt != db.postlist_end(idTerm) ) { - Dbg3() << __PRETTY_FUNCTION__ << " skipping laready indexed file: " - << hash << " " << name << std::endl; - return true; + oldDoc = db.get_document(*pIt); + if( oldDoc.get_value(INDEXER_VERSION_VALUENO) == + RS_HUMAN_READABLE_VERSION && + std::stoull(oldDoc.get_value(INDEXERS_COUNT_VALUENO)) == + indexersRegister.size() ) + { + /* Looks like this file has already been indexed by this RetroShare + * exact version, so we can skip it. If the version was different it + * made sense to reindex it as better indexers might be available + * since last time it was indexed */ + Dbg3() << __PRETTY_FUNCTION__ << " skipping laready indexed file: " + << hash << " " << name << std::endl; + return true; + } } + Xapian::Document doc; + // Set up a TermGenerator that we'll use in indexing. Xapian::TermGenerator termgenerator; //termgenerator.set_stemmer(Xapian::Stem("en")); - - // We make a document and tell the term generator to use this. - Xapian::Document doc; termgenerator.set_document(doc); for(auto& indexerPair : indexersRegister) if(indexerPair.second(path, name, termgenerator, doc) > 50) break; - const std::string hashString = hash.toStdString(); - const std::string idTerm("Q" + hashString); doc.add_boolean_term(idTerm); termgenerator.index_text(name, 1, "N"); termgenerator.index_text(name); doc.add_value(FILE_HASH_VALUENO, hashString); + doc.add_value(INDEXER_VERSION_VALUENO, RS_HUMAN_READABLE_VERSION); + doc.add_value( + INDEXERS_COUNT_VALUENO, + std::to_string(indexersRegister.size()) ); db.replace_document(idTerm, doc); return true; @@ -141,3 +159,13 @@ uint32_t DeepFilesIndex::search( # include "deep_search/filesoggindexer.hpp" static RsDeepOggFileIndexer oggFileIndexer; #endif // def RS_DEEP_FILES_INDEX_OGG + +#ifdef RS_DEEP_FILES_INDEX_FLAC +# include "deep_search/filesflacindexer.hpp" +static RsDeepFlacFileIndexer flacFileIndexer; +#endif // def RS_DEEP_FILES_INDEX_FLAC + +#ifdef RS_DEEP_FILES_INDEX_TAGLIB +# include "deep_search/filestaglibindexer.hpp" +static RsDeepTaglibFileIndexer taglibFileIndexer; +#endif // def RS_DEEP_FILES_INDEX_TAGLIB diff --git a/libretroshare/src/deep_search/filesindex.hpp b/libretroshare/src/deep_search/filesindex.hpp index feb172ddc..f811e5e9c 100644 --- a/libretroshare/src/deep_search/filesindex.hpp +++ b/libretroshare/src/deep_search/filesindex.hpp @@ -82,6 +82,14 @@ private: /// Used to store RsFileHash of indexed documents FILE_HASH_VALUENO, + /** Used to check if some file need reindex because was indexed with an + * older version of the indexer */ + INDEXER_VERSION_VALUENO, + + /** Used to check if some file need reindex because was indexed with an + * older version of the indexer */ + INDEXERS_COUNT_VALUENO, + /// @see Xapian::BAD_VALUENO BAD_VALUENO = Xapian::BAD_VALUENO }; @@ -91,5 +99,5 @@ private: /** Storage for indexers function by order */ static std::multimap indexersRegister; - RS_SET_CONTEXT_DEBUG_LEVEL(4) + RS_SET_CONTEXT_DEBUG_LEVEL(1) }; diff --git a/libretroshare/src/deep_search/filesoggindexer.hpp b/libretroshare/src/deep_search/filesoggindexer.hpp index babaa2b71..00c22ae66 100644 --- a/libretroshare/src/deep_search/filesoggindexer.hpp +++ b/libretroshare/src/deep_search/filesoggindexer.hpp @@ -74,7 +74,7 @@ struct RsDeepOggFileIndexer xTG.index_text(tagValue, 1, "XD"); else if (tagName == "TITLE") xTG.index_text(tagValue, 1, "S"); - if(tagName.find("COVERART") != tagName.npos) + else if(tagName.find("COVERART") != tagName.npos) continue; // Avoid polluting the index with binary data else if (tagName.find("METADATA_BLOCK_PICTURE") != tagName.npos) continue; // Avoid polluting the index with binary data @@ -93,5 +93,5 @@ struct RsDeepOggFileIndexer return 0; } - RS_SET_CONTEXT_DEBUG_LEVEL(2) + RS_SET_CONTEXT_DEBUG_LEVEL(1) }; diff --git a/libretroshare/src/deep_search/filestaglibindexer.hpp b/libretroshare/src/deep_search/filestaglibindexer.hpp new file mode 100644 index 000000000..341e9af38 --- /dev/null +++ b/libretroshare/src/deep_search/filestaglibindexer.hpp @@ -0,0 +1,103 @@ +/******************************************************************************* + * RetroShare full text indexing and search implementation based on Xapian * + * * + * Copyright (C) 2018-2019 Gioacchino Mazzurco * + * Copyright (C) 2019 AsociaciĆ³n Civil Altermundi * + * * + * This program is free software: you can redistribute it and/or modify * + * it under the terms of the GNU Affero General Public License version 3 as * + * published by the Free Software Foundation. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU Affero General Public License for more details. * + * * + * You should have received a copy of the GNU Affero General Public License * + * along with this program. If not, see . * + * * + *******************************************************************************/ + +#include "deep_search/filesindex.hpp" +#include "util/rsdebug.h" + +#include +#include +#include +#include +#include +#include + +struct RsDeepTaglibFileIndexer +{ + RsDeepTaglibFileIndexer() + { + DeepFilesIndex::registerIndexer(40, indexFile); + } + + static uint32_t indexFile( + const std::string& path, const std::string& /*name*/, + Xapian::TermGenerator& xTG, Xapian::Document& xDoc ) + { + Dbg4() << __PRETTY_FUNCTION__ << " " << path << std::endl; + + TagLib::FileRef tFile(path.c_str()); + if(tFile.isNull()) return 0; + + const TagLib::Tag* tag = tFile.tag(); + if(!tag) return 0; + + TagLib::PropertyMap tMap = tag->properties(); + + unsigned validCommentsCnt = 0; + std::string docData = xDoc.get_data(); + for( TagLib::PropertyMap::ConstIterator mIt = tMap.begin(); + mIt != tMap.end(); ++mIt ) + { + if(mIt->first.isNull() || mIt->first.isEmpty()) continue; + std::string tagName(mIt->first.upper().to8Bit()); + + if(mIt->second.isEmpty()) continue; + std::string tagValue(mIt->second.toString(", ").to8Bit(true)); + if(tagValue.empty()) continue; + + if(tagName == "ARTIST") + xTG.index_text(tagValue, 1, "A"); + else if (tagName == "DESCRIPTION") + xTG.index_text(tagValue, 1, "XD"); + else if (tagName == "TITLE") + xTG.index_text(tagValue, 1, "S"); + else if(tagName.find("COVERART") != tagName.npos) + continue; // Avoid polluting the index with binary data + else if (tagName.find("METADATA_BLOCK_PICTURE") != tagName.npos) + continue; // Avoid polluting the index with binary data + + // Index fields without prefixes for general search. + xTG.increase_termpos(); + std::string fullComment(tagName + "=" + tagValue); + xTG.index_text(fullComment); + docData += fullComment + "\n"; + + Dbg2() << __PRETTY_FUNCTION__ << " Indexed " << tagName << "=\"" + << tagValue << '"' << std::endl; + + ++validCommentsCnt; + } + + if(validCommentsCnt > 0) + { + Dbg1() << __PRETTY_FUNCTION__ << " Successfully indexed: " << path + << std::endl; + + xDoc.set_data(docData); + return 99; + } + + /* Altought the file appears to be supported by taglib, no comments has + * been found so return less then 50 maybe another indexer is capable of + * extracting information */ + return 30; + } + + RS_SET_CONTEXT_DEBUG_LEVEL(3) +}; diff --git a/libretroshare/src/libretroshare.pro b/libretroshare/src/libretroshare.pro index 017be8593..01cbc8852 100644 --- a/libretroshare/src/libretroshare.pro +++ b/libretroshare/src/libretroshare.pro @@ -919,6 +919,14 @@ rs_deep_files_index_ogg { HEADERS += deep_search/filesoggindexer.hpp } +rs_deep_files_index_flac { + HEADERS += deep_search/filesflacindexer.hpp +} + +rs_deep_files_index_taglib { + HEADERS += deep_search/filestaglibindexer.hpp +} + rs_broadcast_discovery { HEADERS += retroshare/rsbroadcastdiscovery.h \ services/broadcastdiscoveryservice.h diff --git a/libretroshare/src/use_libretroshare.pri b/libretroshare/src/use_libretroshare.pri index 7f70c0185..f129f7f6c 100644 --- a/libretroshare/src/use_libretroshare.pri +++ b/libretroshare/src/use_libretroshare.pri @@ -77,6 +77,14 @@ rs_deep_files_index_ogg { mLibs += vorbisfile } +rs_deep_files_index_flac { + mLibs += FLAC++ +} + +rs_deep_files_index_taglib { + mLibs += tag +} + rs_broadcast_discovery { no_rs_cross_compiling { UDP_DISCOVERY_SRC_PATH=$$clean_path($${RS_SRC_PATH}/supportlibs/udp-discovery-cpp/) diff --git a/retroshare.pri b/retroshare.pri index 9ba5f0dcc..e266755bb 100644 --- a/retroshare.pri +++ b/retroshare.pri @@ -170,15 +170,25 @@ rs_jsonapi:CONFIG -= no_rs_jsonapi CONFIG *= no_rs_deep_channel_index rs_deep_channel_index:CONFIG -= no_rs_deep_channel_index -# To enable file indexing append the following assignation to qmake command -# line "CONFIG+=rs_files_index" +# To enable deep files indexing append the following assignation to qmake +# command line "CONFIG+=rs_files_index" CONFIG *= no_rs_deep_files_index rs_deep_files_index:CONFIG -= no_rs_deep_files_index -# To enable Ogg file indexing append the following assignation to qmake command -# line "CONFIG+=rs_deep_files_index_ogg" +# To enable Ogg files deep indexing append the following assignation to qmake +# command line "CONFIG+=rs_deep_files_index_ogg" CONFIG *= no_rs_deep_files_index_ogg -rs_deep_files_index_ogg::CONFIG -= no_rs_deep_files_index_ogg +rs_deep_files_index_ogg:CONFIG -= no_rs_deep_files_index_ogg + +# To enable FLAC files deep indexing append the following assignation to qmake +# command line "CONFIG+=rs_deep_files_index_flac" +CONFIG *= no_rs_deep_files_index_flac +rs_deep_files_index_flac:CONFIG -= no_rs_deep_files_index_flac + +# To enable taglib files deep indexing append the following assignation to qmake +# command line "CONFIG+=rs_deep_files_index_taglib" +CONFIG *= no_rs_deep_files_index_taglib +rs_deep_files_index_taglib:CONFIG -= no_rs_deep_files_index_taglib # To enable native dialogs append the following assignation to qmake command # line "CONFIG+=rs_use_native_dialogs" @@ -578,6 +588,8 @@ rs_deep_channels_index:DEFINES *= RS_DEEP_CHANNEL_INDEX rs_deep_files_index:DEFINES *= RS_DEEP_FILES_INDEX rs_deep_files_index_ogg:DEFINES *= RS_DEEP_FILES_INDEX_OGG +rs_deep_files_index_flac:DEFINES *= RS_DEEP_FILES_INDEX_FLAC +rs_deep_files_index_taglib:DEFINES *= RS_DEEP_FILES_INDEX_TAGLIB rs_use_native_dialogs:DEFINES *= RS_NATIVEDIALOGS