mirror of
https://github.com/RetroShare/RetroShare.git
synced 2024-10-01 02:35:48 -04:00
Add support for indexing files dupported by taglib
Add FLAC standalone indexer Add indexing common music tags for all formats supported by taglib File indexer reindex files that have been indexed by older versions of the indexer and reindex them Sparse improvements to deep indexing
This commit is contained in:
parent
3a26ccf6a5
commit
63b71e383a
@ -71,7 +71,7 @@ private:
|
||||
static const std::string& dbPath()
|
||||
{
|
||||
static const std::string dbDir =
|
||||
RsAccounts::AccountDirectory() + "/deep_search_xapian_db";
|
||||
RsAccounts::AccountDirectory() + "/deep_channels_xapian_db";
|
||||
return dbDir;
|
||||
}
|
||||
};
|
||||
|
156
libretroshare/src/deep_search/filesflacindexer.hpp
Normal file
156
libretroshare/src/deep_search/filesflacindexer.hpp
Normal file
@ -0,0 +1,156 @@
|
||||
/*******************************************************************************
|
||||
* RetroShare full text indexing and search implementation based on Xapian *
|
||||
* *
|
||||
* Copyright (C) 2018-2019 Gioacchino Mazzurco <gio@eigenlab.org> *
|
||||
* Copyright (C) 2019 Asociación Civil Altermundi <info@altermundi.net> *
|
||||
* *
|
||||
* This program is free software: you can redistribute it and/or modify *
|
||||
* it under the terms of the GNU Affero General Public License version 3 as *
|
||||
* published by the Free Software Foundation. *
|
||||
* *
|
||||
* This program is distributed in the hope that it will be useful, *
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
|
||||
* GNU Affero General Public License for more details. *
|
||||
* *
|
||||
* You should have received a copy of the GNU Affero General Public License *
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>. *
|
||||
* *
|
||||
*******************************************************************************/
|
||||
|
||||
#include "deep_search/filesindex.hpp"
|
||||
#include "util/rsdebug.h"
|
||||
|
||||
#include <xapian.h>
|
||||
#include <string>
|
||||
#include <FLAC++/metadata.h>
|
||||
#include <cctype>
|
||||
#include <memory>
|
||||
|
||||
struct RsDeepFlacFileIndexer
|
||||
{
|
||||
RsDeepFlacFileIndexer()
|
||||
{
|
||||
DeepFilesIndex::registerIndexer(31, indexFlacFile);
|
||||
}
|
||||
|
||||
static uint32_t indexFlacFile(
|
||||
const std::string& path, const std::string& /*name*/,
|
||||
Xapian::TermGenerator& xTG, Xapian::Document& xDoc )
|
||||
{
|
||||
Dbg3() << __PRETTY_FUNCTION__ << " " << path << std::endl;
|
||||
|
||||
using FlacChain = FLAC::Metadata::Chain;
|
||||
std::unique_ptr<FlacChain> flacChain(new FlacChain);
|
||||
|
||||
if(!flacChain->is_valid())
|
||||
{
|
||||
RsErr() << __PRETTY_FUNCTION__ << " Failed creating FLAC Chain 1"
|
||||
<< std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
if(!flacChain->read(path.c_str(), false))
|
||||
{
|
||||
Dbg3() << __PRETTY_FUNCTION__ << " Failed to open the file as FLAC"
|
||||
<< std::endl;
|
||||
|
||||
flacChain.reset(new FlacChain);
|
||||
if(!flacChain->is_valid())
|
||||
{
|
||||
RsErr() << __PRETTY_FUNCTION__
|
||||
<< " Failed creating FLAC Chain 2" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
if(!flacChain->read(path.c_str(), true))
|
||||
{
|
||||
Dbg3() << __PRETTY_FUNCTION__
|
||||
<< " Failed to open the file as OggFLAC"
|
||||
<< std::endl;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned validCommentsCnt = 0;
|
||||
std::string docData = xDoc.get_data();
|
||||
|
||||
FLAC::Metadata::Iterator mdit;
|
||||
mdit.init(*flacChain);
|
||||
if(!mdit.is_valid()) return 1;
|
||||
|
||||
do
|
||||
{
|
||||
::FLAC__MetadataType mdt = mdit.get_block_type();
|
||||
if (mdt != FLAC__METADATA_TYPE_VORBIS_COMMENT) continue;
|
||||
|
||||
Dbg2() << __PRETTY_FUNCTION__ << " Found Vorbis Comment Block"
|
||||
<< std::endl;
|
||||
|
||||
std::unique_ptr<FLAC::Metadata::Prototype> proto(mdit.get_block());
|
||||
if(!proto) continue;
|
||||
|
||||
const FLAC::Metadata::VorbisComment* vc =
|
||||
dynamic_cast<FLAC::Metadata::VorbisComment*>(proto.get());
|
||||
if(!vc || !vc->is_valid()) continue;
|
||||
|
||||
unsigned numComments = vc->get_num_comments();
|
||||
for(unsigned i = 0; i < numComments; ++i)
|
||||
{
|
||||
FLAC::Metadata::VorbisComment::Entry entry =
|
||||
vc->get_comment(i);
|
||||
if(!entry.is_valid()) continue;
|
||||
|
||||
std::string tagName( entry.get_field_name(),
|
||||
entry.get_field_name_length() );
|
||||
|
||||
/* Vorbis tags should be uppercases but not all the softwares
|
||||
* enforce it */
|
||||
for (auto& c: tagName) c = static_cast<char>(toupper(c));
|
||||
|
||||
std::string tagValue( entry.get_field_value(),
|
||||
entry.get_field_value_length() );
|
||||
|
||||
if(tagValue.empty()) continue;
|
||||
|
||||
if(tagName == "ARTIST")
|
||||
xTG.index_text(tagValue, 1, "A");
|
||||
else if (tagName == "DESCRIPTION")
|
||||
xTG.index_text(tagValue, 1, "XD");
|
||||
else if (tagName == "TITLE")
|
||||
xTG.index_text(tagValue, 1, "S");
|
||||
else if(tagName.find("COVERART") != tagName.npos)
|
||||
continue; // Avoid polluting the index with binary data
|
||||
else if (tagName.find("METADATA_BLOCK_PICTURE") != tagName.npos)
|
||||
continue; // Avoid polluting the index with binary data
|
||||
|
||||
// Index fields without prefixes for general search.
|
||||
xTG.increase_termpos();
|
||||
std::string fullComment(tagName + "=" + tagValue);
|
||||
xTG.index_text(fullComment);
|
||||
docData += fullComment + "\n";
|
||||
|
||||
Dbg2() << __PRETTY_FUNCTION__ << " Indexed " << fullComment
|
||||
<< std::endl;
|
||||
|
||||
++validCommentsCnt;
|
||||
}
|
||||
}
|
||||
while(mdit.next());
|
||||
|
||||
if(validCommentsCnt > 0)
|
||||
{
|
||||
Dbg1() << __PRETTY_FUNCTION__ << " Successfully indexed: " << path
|
||||
<< std::endl;
|
||||
|
||||
xDoc.set_data(docData);
|
||||
return 99;
|
||||
}
|
||||
|
||||
/* Altought the file appears to be a valid FLAC, no vorbis comment has
|
||||
* been found so return less then 50 maybe it has tagged only with ID3
|
||||
* tags ? */
|
||||
return 30;
|
||||
}
|
||||
|
||||
RS_SET_CONTEXT_DEBUG_LEVEL(3)
|
||||
};
|
@ -22,6 +22,7 @@
|
||||
#include "deep_search/commonutils.hpp"
|
||||
#include "util/rsdebug.h"
|
||||
#include "retroshare/rsinit.h"
|
||||
#include "retroshare/rsversion.h"
|
||||
|
||||
#include <utility>
|
||||
|
||||
@ -37,31 +38,48 @@ bool DeepFilesIndex::indexFile(
|
||||
if(!dbPtr) return false;
|
||||
Xapian::WritableDatabase& db(*dbPtr);
|
||||
|
||||
if(db.term_exists("Q" + hash.toStdString()))
|
||||
const std::string hashString = hash.toStdString();
|
||||
const std::string idTerm("Q" + hashString);
|
||||
|
||||
Xapian::Document oldDoc;
|
||||
Xapian::PostingIterator pIt = db.postlist_begin(idTerm);
|
||||
if( pIt != db.postlist_end(idTerm) )
|
||||
{
|
||||
Dbg3() << __PRETTY_FUNCTION__ << " skipping laready indexed file: "
|
||||
<< hash << " " << name << std::endl;
|
||||
return true;
|
||||
oldDoc = db.get_document(*pIt);
|
||||
if( oldDoc.get_value(INDEXER_VERSION_VALUENO) ==
|
||||
RS_HUMAN_READABLE_VERSION &&
|
||||
std::stoull(oldDoc.get_value(INDEXERS_COUNT_VALUENO)) ==
|
||||
indexersRegister.size() )
|
||||
{
|
||||
/* Looks like this file has already been indexed by this RetroShare
|
||||
* exact version, so we can skip it. If the version was different it
|
||||
* made sense to reindex it as better indexers might be available
|
||||
* since last time it was indexed */
|
||||
Dbg3() << __PRETTY_FUNCTION__ << " skipping laready indexed file: "
|
||||
<< hash << " " << name << std::endl;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
Xapian::Document doc;
|
||||
|
||||
// Set up a TermGenerator that we'll use in indexing.
|
||||
Xapian::TermGenerator termgenerator;
|
||||
//termgenerator.set_stemmer(Xapian::Stem("en"));
|
||||
|
||||
// We make a document and tell the term generator to use this.
|
||||
Xapian::Document doc;
|
||||
termgenerator.set_document(doc);
|
||||
|
||||
for(auto& indexerPair : indexersRegister)
|
||||
if(indexerPair.second(path, name, termgenerator, doc) > 50)
|
||||
break;
|
||||
|
||||
const std::string hashString = hash.toStdString();
|
||||
const std::string idTerm("Q" + hashString);
|
||||
doc.add_boolean_term(idTerm);
|
||||
termgenerator.index_text(name, 1, "N");
|
||||
termgenerator.index_text(name);
|
||||
doc.add_value(FILE_HASH_VALUENO, hashString);
|
||||
doc.add_value(INDEXER_VERSION_VALUENO, RS_HUMAN_READABLE_VERSION);
|
||||
doc.add_value(
|
||||
INDEXERS_COUNT_VALUENO,
|
||||
std::to_string(indexersRegister.size()) );
|
||||
db.replace_document(idTerm, doc);
|
||||
|
||||
return true;
|
||||
@ -141,3 +159,13 @@ uint32_t DeepFilesIndex::search(
|
||||
# include "deep_search/filesoggindexer.hpp"
|
||||
static RsDeepOggFileIndexer oggFileIndexer;
|
||||
#endif // def RS_DEEP_FILES_INDEX_OGG
|
||||
|
||||
#ifdef RS_DEEP_FILES_INDEX_FLAC
|
||||
# include "deep_search/filesflacindexer.hpp"
|
||||
static RsDeepFlacFileIndexer flacFileIndexer;
|
||||
#endif // def RS_DEEP_FILES_INDEX_FLAC
|
||||
|
||||
#ifdef RS_DEEP_FILES_INDEX_TAGLIB
|
||||
# include "deep_search/filestaglibindexer.hpp"
|
||||
static RsDeepTaglibFileIndexer taglibFileIndexer;
|
||||
#endif // def RS_DEEP_FILES_INDEX_TAGLIB
|
||||
|
@ -82,6 +82,14 @@ private:
|
||||
/// Used to store RsFileHash of indexed documents
|
||||
FILE_HASH_VALUENO,
|
||||
|
||||
/** Used to check if some file need reindex because was indexed with an
|
||||
* older version of the indexer */
|
||||
INDEXER_VERSION_VALUENO,
|
||||
|
||||
/** Used to check if some file need reindex because was indexed with an
|
||||
* older version of the indexer */
|
||||
INDEXERS_COUNT_VALUENO,
|
||||
|
||||
/// @see Xapian::BAD_VALUENO
|
||||
BAD_VALUENO = Xapian::BAD_VALUENO
|
||||
};
|
||||
@ -91,5 +99,5 @@ private:
|
||||
/** Storage for indexers function by order */
|
||||
static std::multimap<int, IndexerFunType> indexersRegister;
|
||||
|
||||
RS_SET_CONTEXT_DEBUG_LEVEL(4)
|
||||
RS_SET_CONTEXT_DEBUG_LEVEL(1)
|
||||
};
|
||||
|
@ -74,7 +74,7 @@ struct RsDeepOggFileIndexer
|
||||
xTG.index_text(tagValue, 1, "XD");
|
||||
else if (tagName == "TITLE")
|
||||
xTG.index_text(tagValue, 1, "S");
|
||||
if(tagName.find("COVERART") != tagName.npos)
|
||||
else if(tagName.find("COVERART") != tagName.npos)
|
||||
continue; // Avoid polluting the index with binary data
|
||||
else if (tagName.find("METADATA_BLOCK_PICTURE") != tagName.npos)
|
||||
continue; // Avoid polluting the index with binary data
|
||||
@ -93,5 +93,5 @@ struct RsDeepOggFileIndexer
|
||||
return 0;
|
||||
}
|
||||
|
||||
RS_SET_CONTEXT_DEBUG_LEVEL(2)
|
||||
RS_SET_CONTEXT_DEBUG_LEVEL(1)
|
||||
};
|
||||
|
103
libretroshare/src/deep_search/filestaglibindexer.hpp
Normal file
103
libretroshare/src/deep_search/filestaglibindexer.hpp
Normal file
@ -0,0 +1,103 @@
|
||||
/*******************************************************************************
|
||||
* RetroShare full text indexing and search implementation based on Xapian *
|
||||
* *
|
||||
* Copyright (C) 2018-2019 Gioacchino Mazzurco <gio@eigenlab.org> *
|
||||
* Copyright (C) 2019 Asociación Civil Altermundi <info@altermundi.net> *
|
||||
* *
|
||||
* This program is free software: you can redistribute it and/or modify *
|
||||
* it under the terms of the GNU Affero General Public License version 3 as *
|
||||
* published by the Free Software Foundation. *
|
||||
* *
|
||||
* This program is distributed in the hope that it will be useful, *
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
|
||||
* GNU Affero General Public License for more details. *
|
||||
* *
|
||||
* You should have received a copy of the GNU Affero General Public License *
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>. *
|
||||
* *
|
||||
*******************************************************************************/
|
||||
|
||||
#include "deep_search/filesindex.hpp"
|
||||
#include "util/rsdebug.h"
|
||||
|
||||
#include <xapian.h>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <taglib/tag.h>
|
||||
#include <taglib/fileref.h>
|
||||
#include <taglib/tpropertymap.h>
|
||||
|
||||
struct RsDeepTaglibFileIndexer
|
||||
{
|
||||
RsDeepTaglibFileIndexer()
|
||||
{
|
||||
DeepFilesIndex::registerIndexer(40, indexFile);
|
||||
}
|
||||
|
||||
static uint32_t indexFile(
|
||||
const std::string& path, const std::string& /*name*/,
|
||||
Xapian::TermGenerator& xTG, Xapian::Document& xDoc )
|
||||
{
|
||||
Dbg4() << __PRETTY_FUNCTION__ << " " << path << std::endl;
|
||||
|
||||
TagLib::FileRef tFile(path.c_str());
|
||||
if(tFile.isNull()) return 0;
|
||||
|
||||
const TagLib::Tag* tag = tFile.tag();
|
||||
if(!tag) return 0;
|
||||
|
||||
TagLib::PropertyMap tMap = tag->properties();
|
||||
|
||||
unsigned validCommentsCnt = 0;
|
||||
std::string docData = xDoc.get_data();
|
||||
for( TagLib::PropertyMap::ConstIterator mIt = tMap.begin();
|
||||
mIt != tMap.end(); ++mIt )
|
||||
{
|
||||
if(mIt->first.isNull() || mIt->first.isEmpty()) continue;
|
||||
std::string tagName(mIt->first.upper().to8Bit());
|
||||
|
||||
if(mIt->second.isEmpty()) continue;
|
||||
std::string tagValue(mIt->second.toString(", ").to8Bit(true));
|
||||
if(tagValue.empty()) continue;
|
||||
|
||||
if(tagName == "ARTIST")
|
||||
xTG.index_text(tagValue, 1, "A");
|
||||
else if (tagName == "DESCRIPTION")
|
||||
xTG.index_text(tagValue, 1, "XD");
|
||||
else if (tagName == "TITLE")
|
||||
xTG.index_text(tagValue, 1, "S");
|
||||
else if(tagName.find("COVERART") != tagName.npos)
|
||||
continue; // Avoid polluting the index with binary data
|
||||
else if (tagName.find("METADATA_BLOCK_PICTURE") != tagName.npos)
|
||||
continue; // Avoid polluting the index with binary data
|
||||
|
||||
// Index fields without prefixes for general search.
|
||||
xTG.increase_termpos();
|
||||
std::string fullComment(tagName + "=" + tagValue);
|
||||
xTG.index_text(fullComment);
|
||||
docData += fullComment + "\n";
|
||||
|
||||
Dbg2() << __PRETTY_FUNCTION__ << " Indexed " << tagName << "=\""
|
||||
<< tagValue << '"' << std::endl;
|
||||
|
||||
++validCommentsCnt;
|
||||
}
|
||||
|
||||
if(validCommentsCnt > 0)
|
||||
{
|
||||
Dbg1() << __PRETTY_FUNCTION__ << " Successfully indexed: " << path
|
||||
<< std::endl;
|
||||
|
||||
xDoc.set_data(docData);
|
||||
return 99;
|
||||
}
|
||||
|
||||
/* Altought the file appears to be supported by taglib, no comments has
|
||||
* been found so return less then 50 maybe another indexer is capable of
|
||||
* extracting information */
|
||||
return 30;
|
||||
}
|
||||
|
||||
RS_SET_CONTEXT_DEBUG_LEVEL(3)
|
||||
};
|
@ -919,6 +919,14 @@ rs_deep_files_index_ogg {
|
||||
HEADERS += deep_search/filesoggindexer.hpp
|
||||
}
|
||||
|
||||
rs_deep_files_index_flac {
|
||||
HEADERS += deep_search/filesflacindexer.hpp
|
||||
}
|
||||
|
||||
rs_deep_files_index_taglib {
|
||||
HEADERS += deep_search/filestaglibindexer.hpp
|
||||
}
|
||||
|
||||
rs_broadcast_discovery {
|
||||
HEADERS += retroshare/rsbroadcastdiscovery.h \
|
||||
services/broadcastdiscoveryservice.h
|
||||
|
@ -77,6 +77,14 @@ rs_deep_files_index_ogg {
|
||||
mLibs += vorbisfile
|
||||
}
|
||||
|
||||
rs_deep_files_index_flac {
|
||||
mLibs += FLAC++
|
||||
}
|
||||
|
||||
rs_deep_files_index_taglib {
|
||||
mLibs += tag
|
||||
}
|
||||
|
||||
rs_broadcast_discovery {
|
||||
no_rs_cross_compiling {
|
||||
UDP_DISCOVERY_SRC_PATH=$$clean_path($${RS_SRC_PATH}/supportlibs/udp-discovery-cpp/)
|
||||
|
@ -170,15 +170,25 @@ rs_jsonapi:CONFIG -= no_rs_jsonapi
|
||||
CONFIG *= no_rs_deep_channel_index
|
||||
rs_deep_channel_index:CONFIG -= no_rs_deep_channel_index
|
||||
|
||||
# To enable file indexing append the following assignation to qmake command
|
||||
# line "CONFIG+=rs_files_index"
|
||||
# To enable deep files indexing append the following assignation to qmake
|
||||
# command line "CONFIG+=rs_files_index"
|
||||
CONFIG *= no_rs_deep_files_index
|
||||
rs_deep_files_index:CONFIG -= no_rs_deep_files_index
|
||||
|
||||
# To enable Ogg file indexing append the following assignation to qmake command
|
||||
# line "CONFIG+=rs_deep_files_index_ogg"
|
||||
# To enable Ogg files deep indexing append the following assignation to qmake
|
||||
# command line "CONFIG+=rs_deep_files_index_ogg"
|
||||
CONFIG *= no_rs_deep_files_index_ogg
|
||||
rs_deep_files_index_ogg::CONFIG -= no_rs_deep_files_index_ogg
|
||||
rs_deep_files_index_ogg:CONFIG -= no_rs_deep_files_index_ogg
|
||||
|
||||
# To enable FLAC files deep indexing append the following assignation to qmake
|
||||
# command line "CONFIG+=rs_deep_files_index_flac"
|
||||
CONFIG *= no_rs_deep_files_index_flac
|
||||
rs_deep_files_index_flac:CONFIG -= no_rs_deep_files_index_flac
|
||||
|
||||
# To enable taglib files deep indexing append the following assignation to qmake
|
||||
# command line "CONFIG+=rs_deep_files_index_taglib"
|
||||
CONFIG *= no_rs_deep_files_index_taglib
|
||||
rs_deep_files_index_taglib:CONFIG -= no_rs_deep_files_index_taglib
|
||||
|
||||
# To enable native dialogs append the following assignation to qmake command
|
||||
# line "CONFIG+=rs_use_native_dialogs"
|
||||
@ -578,6 +588,8 @@ rs_deep_channels_index:DEFINES *= RS_DEEP_CHANNEL_INDEX
|
||||
|
||||
rs_deep_files_index:DEFINES *= RS_DEEP_FILES_INDEX
|
||||
rs_deep_files_index_ogg:DEFINES *= RS_DEEP_FILES_INDEX_OGG
|
||||
rs_deep_files_index_flac:DEFINES *= RS_DEEP_FILES_INDEX_FLAC
|
||||
rs_deep_files_index_taglib:DEFINES *= RS_DEEP_FILES_INDEX_TAGLIB
|
||||
|
||||
rs_use_native_dialogs:DEFINES *= RS_NATIVEDIALOGS
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user