2022-03-01 06:16:17 -05:00
|
|
|
// Copyright (c) 2014-2022, The Monero Project
|
2014-10-05 06:42:40 -04:00
|
|
|
//
|
|
|
|
// All rights reserved.
|
|
|
|
//
|
|
|
|
// Redistribution and use in source and binary forms, with or without modification, are
|
|
|
|
// permitted provided that the following conditions are met:
|
|
|
|
//
|
|
|
|
// 1. Redistributions of source code must retain the above copyright notice, this list of
|
|
|
|
// conditions and the following disclaimer.
|
|
|
|
//
|
|
|
|
// 2. Redistributions in binary form must reproduce the above copyright notice, this list
|
|
|
|
// of conditions and the following disclaimer in the documentation and/or other
|
|
|
|
// materials provided with the distribution.
|
|
|
|
//
|
|
|
|
// 3. Neither the name of the copyright holder nor the names of its contributors may be
|
|
|
|
// used to endorse or promote products derived from this software without specific
|
|
|
|
// prior written permission.
|
|
|
|
//
|
|
|
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
|
|
|
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
|
|
|
// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
|
|
|
// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
|
|
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
|
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
|
|
|
// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
|
|
|
|
// THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
|
2014-10-02 12:05:27 -04:00
|
|
|
/*!
|
|
|
|
* \file language_base.h
|
|
|
|
*
|
|
|
|
* \brief Language Base class for Polymorphism.
|
|
|
|
*/
|
|
|
|
|
2014-10-02 11:44:29 -04:00
|
|
|
#ifndef LANGUAGE_BASE_H
|
|
|
|
#define LANGUAGE_BASE_H
|
|
|
|
|
|
|
|
#include <vector>
|
|
|
|
#include <unordered_map>
|
|
|
|
#include <string>
|
2019-01-10 20:36:59 -05:00
|
|
|
#include <boost/algorithm/string.hpp>
|
2015-10-21 18:28:39 -04:00
|
|
|
#include "misc_log_ex.h"
|
2019-01-10 20:36:59 -05:00
|
|
|
#include "fnv1.h"
|
2020-04-28 09:28:55 -04:00
|
|
|
#include "common/utf8.h"
|
2014-10-02 11:44:29 -04:00
|
|
|
|
2014-10-02 12:05:27 -04:00
|
|
|
/*!
|
|
|
|
* \namespace Language
|
|
|
|
* \brief Mnemonic language related namespace.
|
|
|
|
*/
|
2014-10-02 11:44:29 -04:00
|
|
|
namespace Language
|
|
|
|
{
|
2015-06-21 06:28:16 -04:00
|
|
|
/*!
|
|
|
|
* \brief Returns a string made of (at most) the first count characters in s.
|
|
|
|
* Assumes well formedness. No check is made for this.
|
|
|
|
* \param s The string from which to return the first count characters.
|
|
|
|
* \param count How many characters to return.
|
|
|
|
* \return A string consisting of the first count characters in s.
|
|
|
|
*/
|
2018-07-06 19:03:15 -04:00
|
|
|
template<typename T>
|
|
|
|
inline T utf8prefix(const T &s, size_t count)
|
2015-06-21 06:28:16 -04:00
|
|
|
{
|
2018-07-06 19:03:15 -04:00
|
|
|
T prefix = "";
|
|
|
|
size_t avail = s.size();
|
|
|
|
const char *ptr = s.data();
|
|
|
|
while (count-- && avail--)
|
2015-06-21 06:28:16 -04:00
|
|
|
{
|
|
|
|
prefix += *ptr++;
|
2018-07-06 19:03:15 -04:00
|
|
|
while (avail && ((*ptr) & 0xc0) == 0x80)
|
|
|
|
{
|
2015-06-21 06:28:16 -04:00
|
|
|
prefix += *ptr++;
|
2018-07-06 19:03:15 -04:00
|
|
|
--avail;
|
|
|
|
}
|
2015-06-21 06:28:16 -04:00
|
|
|
}
|
|
|
|
return prefix;
|
|
|
|
}
|
|
|
|
|
2019-01-10 20:36:59 -05:00
|
|
|
struct WordHash
|
|
|
|
{
|
|
|
|
std::size_t operator()(const epee::wipeable_string &s) const
|
|
|
|
{
|
2020-04-28 09:28:55 -04:00
|
|
|
const epee::wipeable_string sc = tools::utf8canonical(s, [](wint_t c) -> wint_t { return std::towlower(c); });
|
2019-01-10 20:36:59 -05:00
|
|
|
return epee::fnv::FNV1a(sc.data(), sc.size());
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
struct WordEqual
|
|
|
|
{
|
|
|
|
bool operator()(const epee::wipeable_string &s0, const epee::wipeable_string &s1) const
|
|
|
|
{
|
2020-04-28 09:28:55 -04:00
|
|
|
const epee::wipeable_string s0c = tools::utf8canonical(s0, [](wint_t c) -> wint_t { return std::towlower(c); });
|
|
|
|
const epee::wipeable_string s1c = tools::utf8canonical(s1, [](wint_t c) -> wint_t { return std::towlower(c); });
|
2019-01-10 20:36:59 -05:00
|
|
|
return s0c == s1c;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2014-10-03 06:55:44 -04:00
|
|
|
/*!
|
|
|
|
* \class Base
|
|
|
|
* \brief A base language class which all languages have to inherit from for
|
|
|
|
* Polymorphism.
|
|
|
|
*/
|
|
|
|
class Base
|
|
|
|
{
|
|
|
|
protected:
|
2015-10-21 18:28:39 -04:00
|
|
|
enum {
|
|
|
|
ALLOW_SHORT_WORDS = 1<<0,
|
|
|
|
ALLOW_DUPLICATE_PREFIXES = 1<<1,
|
|
|
|
};
|
2018-08-29 05:42:23 -04:00
|
|
|
enum {
|
|
|
|
NWORDS = 1626
|
|
|
|
};
|
|
|
|
std::vector<std::string> word_list; /*!< A pointer to the array of words */
|
2019-01-10 20:36:59 -05:00
|
|
|
std::unordered_map<epee::wipeable_string, uint32_t, WordHash, WordEqual> word_map; /*!< hash table to find word's index */
|
|
|
|
std::unordered_map<epee::wipeable_string, uint32_t, WordHash, WordEqual> trimmed_word_map; /*!< hash table to find word's trimmed index */
|
2014-10-03 06:55:44 -04:00
|
|
|
std::string language_name; /*!< Name of language */
|
2018-03-17 18:46:41 -04:00
|
|
|
std::string english_language_name; /*!< Name of language */
|
2014-10-07 03:19:36 -04:00
|
|
|
uint32_t unique_prefix_length; /*!< Number of unique starting characters to trim the wordlist to when matching */
|
2014-10-03 06:55:44 -04:00
|
|
|
/*!
|
|
|
|
* \brief Populates the word maps after the list is ready.
|
|
|
|
*/
|
2015-10-21 18:28:39 -04:00
|
|
|
void populate_maps(uint32_t flags = 0)
|
2014-10-03 06:55:44 -04:00
|
|
|
{
|
|
|
|
int ii;
|
2015-10-21 15:59:55 -04:00
|
|
|
std::vector<std::string>::const_iterator it;
|
2018-08-29 05:42:23 -04:00
|
|
|
if (word_list.size () != NWORDS)
|
2015-10-21 18:28:39 -04:00
|
|
|
throw std::runtime_error("Wrong word list length for " + language_name);
|
2015-10-21 15:59:55 -04:00
|
|
|
for (it = word_list.begin(), ii = 0; it != word_list.end(); it++, ii++)
|
2014-10-03 06:55:44 -04:00
|
|
|
{
|
2015-10-21 15:59:55 -04:00
|
|
|
word_map[*it] = ii;
|
2015-10-21 18:28:39 -04:00
|
|
|
if ((*it).size() < unique_prefix_length)
|
|
|
|
{
|
|
|
|
if (flags & ALLOW_SHORT_WORDS)
|
2021-01-03 05:04:53 -05:00
|
|
|
MINFO(language_name << " word '" << *it << "' is shorter than its prefix length, " << unique_prefix_length);
|
2015-10-21 18:28:39 -04:00
|
|
|
else
|
|
|
|
throw std::runtime_error("Too short word in " + language_name + " word list: " + *it);
|
|
|
|
}
|
2018-07-06 19:03:15 -04:00
|
|
|
epee::wipeable_string trimmed;
|
2014-10-03 06:55:44 -04:00
|
|
|
if (it->length() > unique_prefix_length)
|
|
|
|
{
|
2015-10-21 18:28:39 -04:00
|
|
|
trimmed = utf8prefix(*it, unique_prefix_length);
|
2014-10-03 06:55:44 -04:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2015-10-21 18:28:39 -04:00
|
|
|
trimmed = *it;
|
|
|
|
}
|
|
|
|
if (trimmed_word_map.find(trimmed) != trimmed_word_map.end())
|
|
|
|
{
|
|
|
|
if (flags & ALLOW_DUPLICATE_PREFIXES)
|
2018-07-06 19:03:15 -04:00
|
|
|
MWARNING("Duplicate prefix in " << language_name << " word list: " << std::string(trimmed.data(), trimmed.size()));
|
2015-10-21 18:28:39 -04:00
|
|
|
else
|
2018-07-06 19:03:15 -04:00
|
|
|
throw std::runtime_error("Duplicate prefix in " + language_name + " word list: " + std::string(trimmed.data(), trimmed.size()));
|
2014-10-03 06:55:44 -04:00
|
|
|
}
|
2015-10-21 18:28:39 -04:00
|
|
|
trimmed_word_map[trimmed] = ii;
|
2014-10-03 06:55:44 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
public:
|
2018-03-17 18:46:41 -04:00
|
|
|
Base(const char *language_name, const char *english_language_name, const std::vector<std::string> &words, uint32_t prefix_length):
|
2015-10-21 15:59:55 -04:00
|
|
|
word_list(words),
|
|
|
|
unique_prefix_length(prefix_length),
|
2018-03-17 18:46:41 -04:00
|
|
|
language_name(language_name),
|
|
|
|
english_language_name(english_language_name)
|
2014-10-03 06:55:44 -04:00
|
|
|
{
|
|
|
|
}
|
2016-12-10 07:39:25 -05:00
|
|
|
virtual ~Base()
|
|
|
|
{
|
|
|
|
}
|
2018-08-29 05:42:23 -04:00
|
|
|
void set_words(const char * const words[])
|
|
|
|
{
|
|
|
|
word_list.resize(NWORDS);
|
|
|
|
for (size_t i = 0; i < NWORDS; ++i)
|
|
|
|
word_list[i] = words[i];
|
|
|
|
}
|
2014-10-03 06:55:44 -04:00
|
|
|
/*!
|
|
|
|
* \brief Returns a pointer to the word list.
|
|
|
|
* \return A pointer to the word list.
|
|
|
|
*/
|
|
|
|
const std::vector<std::string>& get_word_list() const
|
|
|
|
{
|
2015-10-21 15:59:55 -04:00
|
|
|
return word_list;
|
2014-10-03 06:55:44 -04:00
|
|
|
}
|
|
|
|
/*!
|
|
|
|
* \brief Returns a pointer to the word map.
|
|
|
|
* \return A pointer to the word map.
|
|
|
|
*/
|
2019-01-10 20:36:59 -05:00
|
|
|
const std::unordered_map<epee::wipeable_string, uint32_t, WordHash, WordEqual>& get_word_map() const
|
2014-10-03 06:55:44 -04:00
|
|
|
{
|
2015-10-21 15:59:55 -04:00
|
|
|
return word_map;
|
2014-10-03 06:55:44 -04:00
|
|
|
}
|
|
|
|
/*!
|
|
|
|
* \brief Returns a pointer to the trimmed word map.
|
|
|
|
* \return A pointer to the trimmed word map.
|
|
|
|
*/
|
2019-01-10 20:36:59 -05:00
|
|
|
const std::unordered_map<epee::wipeable_string, uint32_t, WordHash, WordEqual>& get_trimmed_word_map() const
|
2014-10-03 06:55:44 -04:00
|
|
|
{
|
2015-10-21 15:59:55 -04:00
|
|
|
return trimmed_word_map;
|
2014-10-03 06:55:44 -04:00
|
|
|
}
|
|
|
|
/*!
|
|
|
|
* \brief Returns the name of the language.
|
|
|
|
* \return Name of the language.
|
|
|
|
*/
|
2015-10-21 15:59:55 -04:00
|
|
|
const std::string &get_language_name() const
|
2014-10-03 06:55:44 -04:00
|
|
|
{
|
|
|
|
return language_name;
|
|
|
|
}
|
2018-03-17 18:46:41 -04:00
|
|
|
/*!
|
|
|
|
* \brief Returns the name of the language in English.
|
|
|
|
* \return Name of the language.
|
|
|
|
*/
|
|
|
|
const std::string &get_english_language_name() const
|
|
|
|
{
|
|
|
|
return english_language_name;
|
|
|
|
}
|
2014-10-05 06:42:40 -04:00
|
|
|
/*!
|
|
|
|
* \brief Returns the number of unique starting characters to be used for matching.
|
|
|
|
* \return Number of unique starting characters.
|
|
|
|
*/
|
2014-10-07 03:19:36 -04:00
|
|
|
uint32_t get_unique_prefix_length() const
|
2014-10-05 06:42:40 -04:00
|
|
|
{
|
2014-10-07 03:19:36 -04:00
|
|
|
return unique_prefix_length;
|
2014-10-05 06:42:40 -04:00
|
|
|
}
|
2014-10-03 06:55:44 -04:00
|
|
|
};
|
2014-10-02 11:44:29 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|