mirror of
https://github.com/RetroShare/RetroShare.git
synced 2025-01-27 15:57:08 -05:00
63b71e383a
Add FLAC standalone indexer Add indexing common music tags for all formats supported by taglib File indexer reindex files that have been indexed by older versions of the indexer and reindex them Sparse improvements to deep indexing
157 lines
5.1 KiB
C++
157 lines
5.1 KiB
C++
/*******************************************************************************
|
|
* RetroShare full text indexing and search implementation based on Xapian *
|
|
* *
|
|
* Copyright (C) 2018-2019 Gioacchino Mazzurco <gio@eigenlab.org> *
|
|
* Copyright (C) 2019 Asociación Civil Altermundi <info@altermundi.net> *
|
|
* *
|
|
* This program is free software: you can redistribute it and/or modify *
|
|
* it under the terms of the GNU Affero General Public License version 3 as *
|
|
* published by the Free Software Foundation. *
|
|
* *
|
|
* This program is distributed in the hope that it will be useful, *
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
|
|
* GNU Affero General Public License for more details. *
|
|
* *
|
|
* You should have received a copy of the GNU Affero General Public License *
|
|
* along with this program. If not, see <https://www.gnu.org/licenses/>. *
|
|
* *
|
|
*******************************************************************************/
|
|
|
|
#include "deep_search/filesindex.hpp"
|
|
#include "util/rsdebug.h"
|
|
|
|
#include <xapian.h>
|
|
#include <string>
|
|
#include <FLAC++/metadata.h>
|
|
#include <cctype>
|
|
#include <memory>
|
|
|
|
struct RsDeepFlacFileIndexer
|
|
{
|
|
RsDeepFlacFileIndexer()
|
|
{
|
|
DeepFilesIndex::registerIndexer(31, indexFlacFile);
|
|
}
|
|
|
|
static uint32_t indexFlacFile(
|
|
const std::string& path, const std::string& /*name*/,
|
|
Xapian::TermGenerator& xTG, Xapian::Document& xDoc )
|
|
{
|
|
Dbg3() << __PRETTY_FUNCTION__ << " " << path << std::endl;
|
|
|
|
using FlacChain = FLAC::Metadata::Chain;
|
|
std::unique_ptr<FlacChain> flacChain(new FlacChain);
|
|
|
|
if(!flacChain->is_valid())
|
|
{
|
|
RsErr() << __PRETTY_FUNCTION__ << " Failed creating FLAC Chain 1"
|
|
<< std::endl;
|
|
return 1;
|
|
}
|
|
|
|
if(!flacChain->read(path.c_str(), false))
|
|
{
|
|
Dbg3() << __PRETTY_FUNCTION__ << " Failed to open the file as FLAC"
|
|
<< std::endl;
|
|
|
|
flacChain.reset(new FlacChain);
|
|
if(!flacChain->is_valid())
|
|
{
|
|
RsErr() << __PRETTY_FUNCTION__
|
|
<< " Failed creating FLAC Chain 2" << std::endl;
|
|
return 1;
|
|
}
|
|
if(!flacChain->read(path.c_str(), true))
|
|
{
|
|
Dbg3() << __PRETTY_FUNCTION__
|
|
<< " Failed to open the file as OggFLAC"
|
|
<< std::endl;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
unsigned validCommentsCnt = 0;
|
|
std::string docData = xDoc.get_data();
|
|
|
|
FLAC::Metadata::Iterator mdit;
|
|
mdit.init(*flacChain);
|
|
if(!mdit.is_valid()) return 1;
|
|
|
|
do
|
|
{
|
|
::FLAC__MetadataType mdt = mdit.get_block_type();
|
|
if (mdt != FLAC__METADATA_TYPE_VORBIS_COMMENT) continue;
|
|
|
|
Dbg2() << __PRETTY_FUNCTION__ << " Found Vorbis Comment Block"
|
|
<< std::endl;
|
|
|
|
std::unique_ptr<FLAC::Metadata::Prototype> proto(mdit.get_block());
|
|
if(!proto) continue;
|
|
|
|
const FLAC::Metadata::VorbisComment* vc =
|
|
dynamic_cast<FLAC::Metadata::VorbisComment*>(proto.get());
|
|
if(!vc || !vc->is_valid()) continue;
|
|
|
|
unsigned numComments = vc->get_num_comments();
|
|
for(unsigned i = 0; i < numComments; ++i)
|
|
{
|
|
FLAC::Metadata::VorbisComment::Entry entry =
|
|
vc->get_comment(i);
|
|
if(!entry.is_valid()) continue;
|
|
|
|
std::string tagName( entry.get_field_name(),
|
|
entry.get_field_name_length() );
|
|
|
|
/* Vorbis tags should be uppercases but not all the softwares
|
|
* enforce it */
|
|
for (auto& c: tagName) c = static_cast<char>(toupper(c));
|
|
|
|
std::string tagValue( entry.get_field_value(),
|
|
entry.get_field_value_length() );
|
|
|
|
if(tagValue.empty()) continue;
|
|
|
|
if(tagName == "ARTIST")
|
|
xTG.index_text(tagValue, 1, "A");
|
|
else if (tagName == "DESCRIPTION")
|
|
xTG.index_text(tagValue, 1, "XD");
|
|
else if (tagName == "TITLE")
|
|
xTG.index_text(tagValue, 1, "S");
|
|
else if(tagName.find("COVERART") != tagName.npos)
|
|
continue; // Avoid polluting the index with binary data
|
|
else if (tagName.find("METADATA_BLOCK_PICTURE") != tagName.npos)
|
|
continue; // Avoid polluting the index with binary data
|
|
|
|
// Index fields without prefixes for general search.
|
|
xTG.increase_termpos();
|
|
std::string fullComment(tagName + "=" + tagValue);
|
|
xTG.index_text(fullComment);
|
|
docData += fullComment + "\n";
|
|
|
|
Dbg2() << __PRETTY_FUNCTION__ << " Indexed " << fullComment
|
|
<< std::endl;
|
|
|
|
++validCommentsCnt;
|
|
}
|
|
}
|
|
while(mdit.next());
|
|
|
|
if(validCommentsCnt > 0)
|
|
{
|
|
Dbg1() << __PRETTY_FUNCTION__ << " Successfully indexed: " << path
|
|
<< std::endl;
|
|
|
|
xDoc.set_data(docData);
|
|
return 99;
|
|
}
|
|
|
|
/* Altought the file appears to be a valid FLAC, no vorbis comment has
|
|
* been found so return less then 50 maybe it has tagged only with ID3
|
|
* tags ? */
|
|
return 30;
|
|
}
|
|
|
|
RS_SET_CONTEXT_DEBUG_LEVEL(3)
|
|
};
|