ufo_data/udb.cpp
2023-02-24 14:23:55 -05:00

1806 lines
58 KiB
C++

// Copyright (C) 2023 Richard Geldreich, Jr.
#include "udb.h"
#include "udb_tables.h"
const uint32_t UDB_RECORD_SIZE = 112;
const uint32_t UDB_REC_TEXT_SIZE = 78;
enum
{
cFlagMAP, cFlagGND, cFlagCST, cFlagSEA, cFlagAIR, cFlagObsMIL, cFlagObsCIV, cFlagHQO, // loc/obs flags
cFlagSCI, cFlagTLP, cFlagNWS, cFlagMID, cFlagHOX, cFlagCNT, cFlagODD, cFlagWAV, // misc flags
cFlagSCR, cFlagCIG, cFlagDLT, cFlagNLT, cFlagPRB, cFlagFBL, cFlagSUB, cFlagNFO, // type of ufo craft flags
cFlagOID, cFlagRBT, cFlagPSH, cFlagMIB, cFlagMON, cFlagGNT, cFlagFIG, cFlagNOC, // aliens monsters flags
cFlagOBS, cFlagRAY, cFlagSMP, cFlagMST, cFlagABD, cFlagOPR, cFlagSIG, cFlagCVS, // apparent ufo occupant activities flags
cFlagNUC, cFlagDRT, cFlagVEG, cFlagANI, cFlagHUM, cFlagVEH, cFlagBLD, cFlagLND, // places visited and things affected flags
cFlagPHT, cFlagRDR, cFlagRDA, cFlagEME, cFlagTRC, cFlagTCH, cFlagHST, cFlagINJ, // evidence and special effects flags
cFlagMIL, cFlagBBK, cFlagGSA, cFlagOGA, cFlagSND, cFlagODR, cFlagCOV, cFlagCMF, // misc details flags
cTotalFlags = 64
};
#pragma pack(push, 1)
struct udb_rec
{
private:
int16_t m_year;
uint8_t m_unknown_and_locale; // nibbles
uint8_t m_unknown_and_month; // nibbles
uint8_t m_ref_index_high_day; // 3 bits ref index high, low 5 bits day
uint8_t m_time;
uint8_t m_ymdt; // 2-bit fields: TDMY accuracy, T lowest, 0=invalid, 1=?, 2=~, 3=accurate
uint8_t m_duration;
uint8_t m_unknown1;
int16_t m_enc_longtitude;
int16_t m_enc_latitude;
int16_t m_elevation;
int16_t m_rel_altitude;
uint8_t m_unknown2;
uint8_t m_continent_country; // nibbles
uint8_t m_state_or_prov[3];
uint8_t m_unknown3;
#if 0
uint8_t m_loc_flags;
uint8_t m_misc_flags;
uint8_t m_type_of_ufo_craft_flags;
uint8_t m_aliens_monsters_flags;
uint8_t m_apparent_ufo_occupant_activities_flags;
uint8_t m_places_visited_and_things_affected_flags;
uint8_t m_evidence_and_special_effects_flags;
uint8_t m_miscellaneous_details_flags;
#else
uint8_t m_flags[8];
#endif
uint8_t m_text[UDB_REC_TEXT_SIZE];
uint8_t m_reference;
uint8_t m_ref_index;
uint8_t m_strangeness_credibility; // nibbles
public:
const uint8_t* get_text() const { return m_text; }
int get_year() const { return m_year; }
uint32_t get_month() const { return m_unknown_and_month & 0xF; }
uint32_t get_day() const { return m_ref_index_high_day & 31; }
// meters
int get_elevation() const { return m_elevation; }
int get_rel_altitude() const { return m_rel_altitude; }
uint32_t get_strangeness() const { return m_strangeness_credibility >> 4; }
uint32_t get_credibility() const { return m_strangeness_credibility & 0xF; }
uint32_t get_reference() const { return m_reference; }
uint32_t get_reference_index() const { return m_ref_index | ((m_ref_index_high_day >> 5) << 8); }
uint32_t get_continent_code() const { return m_continent_country >> 4; }
uint32_t get_country_code() const { return m_continent_country & 0xF; }
uint32_t get_locale() const { return m_unknown_and_locale & 0xF; }
std::string get_state_or_prov() const
{
const uint32_t c0 = m_state_or_prov[0];
const uint32_t c1 = m_state_or_prov[1];
const uint32_t c2 = m_state_or_prov[2];
return dos_to_utf8(string_format("%c%c%c", (c0 >= ' ') ? c0 : ' ', (c1 >= ' ') ? c1 : ' ', (c2 >= ' ') ? c2 : ' '));
}
double get_latitude() const { return ((double)m_enc_latitude / 200.0f) * 1.11111111111f; }
double get_longitude() const { return -((double)m_enc_longtitude / 200.0f) * 1.11111111111f; }
std::string get_latitude_dms() const { double lat = get_latitude(); return get_deg_to_dms(lat) + ((lat <= 0) ? " S" : " N"); }
std::string get_longitude_dms() const { double lon = get_longitude(); return get_deg_to_dms(lon) + ((lon <= 0) ? " W" : " E"); }
// minutes
uint32_t get_duration() const { return m_duration; }
enum
{
cAccuracyInvalid = 0,
cAccuracyQuestionable = 1,
cAccuracyApproximate = 2,
cAccuracyGood = 3
};
bool get_time(std::string& time) const
{
uint32_t time_accuracy = m_ymdt & 3;
if (time_accuracy == cAccuracyInvalid)
return false;
uint32_t hour = m_time / 6;
uint32_t minute = (m_time % 6) * 10;
if (hour > 23)
{
assert(0);
return false;
}
time = string_format("%02u:%02u", hour, minute);
if (time_accuracy == cAccuracyQuestionable)
time += "?";
else if (time_accuracy == cAccuracyApproximate)
time = "~" + time;
return true;
}
bool get_date(event_date& date) const
{
uint32_t year_accuracy = (m_ymdt >> 6) & 3;
uint32_t month_accuracy = (m_ymdt >> 4) & 3;
uint32_t day_accuracy = (m_ymdt >> 2) & 3;
int year = year_accuracy ? get_year() : 0;
uint32_t month = month_accuracy ? get_month() : 0;
uint32_t day = day_accuracy ? get_day() : 0;
if ((day < 1) || (day > 31))
{
day = 0;
day_accuracy = cAccuracyInvalid;
}
if ((month < 1) || (month > 12))
{
month = 0;
month_accuracy = cAccuracyInvalid;
}
if (!year)
return false;
uint32_t min_accuracy = year;
date.m_year = year;
if (month)
{
date.m_month = month;
if (!day)
{
min_accuracy = std::min(year_accuracy, month_accuracy);
}
else
{
min_accuracy = std::min(std::min(year_accuracy, month_accuracy), day_accuracy);
date.m_day = day;
}
}
if (min_accuracy == cAccuracyApproximate)
date.m_approx = true;
else if (min_accuracy == cAccuracyQuestionable)
date.m_fuzzy = true;
return true;
}
enum { cMaxFlags = 64 };
// LOC, MISC, TYPE, ALIENS/MONSTERS, ACTIVITIES, VISITED/THINGS, EVIDENCE/SPECIAL, MISC_DETAILS
bool get_flag(uint32_t index) const
{
assert(index < cMaxFlags);
return (m_flags[index >> 3] & (1 << (index & 7))) != 0;
}
#if 0
uint8_t get_loc_flags() const { return m_loc_flags; }
uint8_t get_misc_flags() const { return m_misc_flags; }
uint8_t get_type_of_ufo_craft_flags() const { return m_type_of_ufo_craft_flags; }
uint8_t get_aliens_monsters_flags() const { return m_aliens_monsters_flags; }
uint8_t get_apparent_ufo_occupant_activities_flags() const { return m_apparent_ufo_occupant_activities_flags; }
uint8_t get_places_visited_and_things_affected_flags() const { return m_places_visited_and_things_affected_flags; }
uint8_t get_evidence_and_special_effects_flags() const { return m_evidence_and_special_effects_flags; }
uint8_t get_miscellaneous_details_flags() const { return m_miscellaneous_details_flags; }
#endif
void get_geo(std::string& country_name, std::string& state_or_prov_name) const
{
std::string state_or_prov_str(get_state_or_prov());
string_trim_end(state_or_prov_str);
if (state_or_prov_str.back() == '.')
state_or_prov_str.pop_back();
if (state_or_prov_str.back() == '.')
state_or_prov_str.pop_back();
get_hatch_geo(get_continent_code(), get_country_code(), state_or_prov_str, country_name, state_or_prov_name);
if (state_or_prov_str == "UNK")
state_or_prov_name = "Unknown";
}
std::string get_full_refs() const
{
std::string ref(g_hatch_refs_tab[get_reference()]);
if (g_hatch_refs_tab[get_reference()])
{
uint32_t ref_index = get_reference_index();
if (get_reference() == 93)
{
for (const auto& x : g_hatch_refs_93)
if (x.m_ref == ref_index)
{
ref += x.m_pDesc;
break;
}
}
else if (get_reference() == 96)
{
for (const auto& x : g_hatch_refs_96)
if (x.m_ref == ref_index)
{
ref += x.m_pDesc;
break;
}
}
else if (get_reference() == 97)
{
for (const auto& x : g_hatch_refs_97)
if (x.m_ref == ref_index)
{
ref += x.m_pDesc;
break;
}
}
else if (get_reference() == 98)
{
for (const auto& x : g_hatch_refs_98)
if (x.m_ref == ref_index)
{
ref += x.m_pDesc;
break;
}
}
else
{
ref += string_format(" (Index %u)", ref_index);
}
}
return ref;
}
};
#pragma pack(pop)
static std::unordered_map<std::string, std::string> g_dictionary;
struct token
{
std::string m_token;
bool m_cap_check;
bool m_replaced_flag;
token() :
m_cap_check(false),
m_replaced_flag(false)
{
}
token(const std::string& token, bool cap_check, bool replaced_flag) :
m_token(token),
m_cap_check(cap_check),
m_replaced_flag(replaced_flag)
{
}
};
std::unordered_set<std::string> g_unique_tokens;
std::vector<string_vec> g_hatch_exception_tokens;
static void init_hatch_cap_exception_tokens()
{
g_hatch_exception_tokens.resize(std::size(g_cap_exceptions));
std::string cur_etoken;
for (uint32_t e = 0; e < std::size(g_cap_exceptions); e++)
{
const std::string exception_str(g_cap_exceptions[e]);
string_vec& etokens = g_hatch_exception_tokens[e];
for (uint32_t i = 0; i < exception_str.size(); i++)
{
uint8_t c = exception_str[i];
if (c == ' ')
{
if (cur_etoken.size())
{
etokens.push_back(cur_etoken);
cur_etoken.clear();
}
}
else if (c == '-')
{
if (cur_etoken.size())
{
etokens.push_back(cur_etoken);
cur_etoken.clear();
}
std::string s;
s.push_back(c);
etokens.push_back(s);
}
else
{
cur_etoken.push_back(c);
}
}
if (cur_etoken.size())
{
etokens.push_back(cur_etoken);
cur_etoken.resize(0);
}
}
}
static std::string fix_capitilization(std::vector<token>& toks, uint32_t& tok_index)
{
if (toks[tok_index].m_replaced_flag)
return toks[tok_index].m_token;
const uint32_t toks_remaining = (uint32_t)toks.size() - tok_index;
// Peak ahead on the tokens to see if we need to correct any capitilization using the exception table.
for (uint32_t e = 0; e < std::size(g_cap_exceptions); e++)
{
const string_vec& etokens = g_hatch_exception_tokens[e];
if (toks_remaining >= etokens.size())
{
uint32_t i;
for (i = 0; i < etokens.size(); i++)
if ((string_icompare(etokens[i], toks[tok_index + i].m_token.c_str()) != 0) || toks[tok_index + i].m_replaced_flag)
break;
if (i == etokens.size())
{
for (i = 0; i < etokens.size(); i++)
{
toks[tok_index + i].m_token = etokens[i];
toks[tok_index + i].m_replaced_flag = true;
}
std::string res(toks[tok_index].m_token);
return res;
}
}
}
std::string str(toks[tok_index].m_token);
if (!toks[tok_index].m_cap_check)
return str;
string_vec wtokens;
std::string cur_wtoken;
for (uint32_t i = 0; i < str.size(); i++)
{
uint8_t c = str[i];
if (isalpha(c) || isdigit(c) || ((c == '\'') && (i != 0) && (i != str.size() - 1)))
{
cur_wtoken.push_back(c);
}
else
{
if (cur_wtoken.size())
{
wtokens.push_back(cur_wtoken);
cur_wtoken.clear();
}
std::string s;
s.push_back(c);
wtokens.push_back(s);
}
}
if (cur_wtoken.size())
{
wtokens.push_back(cur_wtoken);
cur_wtoken.clear();
}
for (uint32_t wtoken_index = 0; wtoken_index < wtokens.size(); wtoken_index++)
{
std::string& substr = wtokens[wtoken_index];
if (substr == "A")
substr = "a";
else if (substr.size() >= 2)
{
bool is_all_uppercase = true;
for (uint8_t c : substr)
{
if (!isupper(c) && (c != '\''))
{
is_all_uppercase = false;
break;
}
}
if (is_all_uppercase)
{
auto res = g_dictionary.find(string_lower(substr));
if (res != g_dictionary.end())
{
substr = res->second;
}
else
{
substr = string_lower(substr);
g_unique_tokens.insert(substr);
}
}
}
}
std::string res;
for (uint32_t wtoken_index = 0; wtoken_index < wtokens.size(); wtoken_index++)
res += wtokens[wtoken_index];
return res;
}
static std::unordered_map<std::string, hatch_abbrev> g_hatch_abbreviations_map;
static void init_hatch_abbreviations_map()
{
for (uint32_t abbrev_index = 0; abbrev_index < std::size(g_hatch_abbreviations); abbrev_index++)
{
auto res = g_hatch_abbreviations_map.insert(std::make_pair(string_lower(g_hatch_abbreviations[abbrev_index].pAbbrev), g_hatch_abbreviations[abbrev_index]));
if (!res.second)
panic("Mutiple Hatch abbreviation: %s", res.first->first.c_str());
}
}
// Expand abbreviations
static void expand_abbreviations_internal(bool first_line, std::string orig_token, const string_vec& tokens, uint32_t cur_tokens_index, std::vector<token>& toks)
{
const uint32_t MAX_ABBREVS = 5;
uint32_t k;
for (k = 0; k < MAX_ABBREVS; k++)
{
std::string new_token(orig_token);
auto find_res = g_hatch_abbreviations_map.find(string_lower(orig_token));
if (find_res != g_hatch_abbreviations_map.end())
{
if (!first_line || !find_res->second.m_forbid_firstline)
{
new_token = find_res->second.pExpansion;
if (new_token.size())
toks.push_back(token(new_token, !first_line && (new_token == orig_token), false));
break;
}
}
if ((orig_token.size() >= 4) && (uisupper(orig_token[0])))
{
std::string month_suffix(orig_token);
month_suffix.erase(0, 3);
if ((month_suffix.size() <= 4) && string_is_digits(month_suffix))
{
std::string month_prefix(orig_token);
month_prefix.erase(3, month_prefix.size() - 3);
std::string search_prefix(string_upper(month_prefix));
static const char* g_hmonths[12] =
{
"JAN", "FEB", "MAR", "APR", "MAY", "JUN",
"JLY", "AUG", "SEP", "OCT", "NOV", "DEC"
};
uint32_t m;
for (m = 0; m < 12; m++)
if (search_prefix == g_hmonths[m])
break;
if (m < 12)
{
toks.push_back(token(g_months[m], !first_line, false));
// TODO: This can be improved by checking the # before the token
long long val = atoll(month_suffix.c_str());
if (val > 31)
month_suffix = '\'' + month_suffix;
toks.push_back(token(month_suffix, !first_line, false));
break;
}
}
}
size_t p;
if ((p = orig_token.find_first_of('.')) == std::string::npos)
{
// No period(s) - we're done.
if (new_token.size())
toks.push_back(token(new_token, !first_line, false));
break;
}
// Specifically detect abbrev. first names like "A." etc. and expand them.
if (!first_line && (orig_token.size() > 4) && (p == 1) && uisupper(orig_token[0]) && uisupper(orig_token[2]))
{
std::string first_name(orig_token);
first_name.erase(2, first_name.size() - 2);
toks.push_back(token(first_name, false, false));
orig_token.erase(0, p + 1);
}
else
{
// Detect words starting with an abbreviation ending in "."
std::string prefix(orig_token);
prefix.erase(p + 1, prefix.size() - (p + 1));
find_res = g_hatch_abbreviations_map.find(string_lower(prefix));
if ((find_res != g_hatch_abbreviations_map.end()) && (!first_line || !find_res->second.m_forbid_firstline))
{
new_token = find_res->second.pExpansion;
toks.push_back(token(new_token, false, false));
orig_token.erase(0, p + 1);
}
else
{
if (new_token.size())
toks.push_back(token(new_token, !first_line, false));
break;
}
}
} // k
if (k == MAX_ABBREVS)
{
if (orig_token.size())
toks.push_back(token(orig_token, !first_line, false));
}
}
static bool is_sentence_ender(uint8_t c)
{
return (c == '!') || (c == '.') || (c == '?');
}
static void expand_abbreviations(bool first_line, std::string orig_token, const string_vec& tokens, uint32_t cur_tokens_index, std::vector<token>& toks)
{
std::string new_token(orig_token);
// Temporarily remove " and ' prefix/suffix chars from the token, before the abbrev checks.
std::string prefix_char, suffix_char;
if (orig_token.size() >= 3)
{
if ((orig_token[0] == '\'') || (orig_token[0] == '\"'))
{
prefix_char.push_back(orig_token[0]);
orig_token.erase(0, 1);
new_token = orig_token;
}
if ((orig_token.back() == '\'') || (orig_token.back() == '\"'))
{
suffix_char.push_back(orig_token.back());
orig_token.pop_back();
new_token = orig_token;
}
}
const size_t first_tok = toks.size();
expand_abbreviations_internal(first_line, orig_token, tokens, cur_tokens_index, toks);
const size_t num_toks = toks.size() - first_tok;
assert(num_toks);
const size_t last_tok = first_tok + num_toks - 1;
if (prefix_char.size())
toks[first_tok].m_token = prefix_char + toks[first_tok].m_token;
if (suffix_char.size())
toks[last_tok].m_token = toks[last_tok].m_token + suffix_char;
}
static std::string decode_hatch(const std::string& str, bool first_line)
{
std::string res;
string_vec tokens;
std::string cur_token;
bool inside_space = false;
int prev_c = -1;
// Phase 1: Tokenize the input string based off examination of (mostly) individual chars, previous chars and upcoming individual chars.
for (uint32_t i = 0; i < str.size(); i++)
{
uint8_t c = str[i];
const bool is_two_dots = (c == '.') && ((i + 1) < str.size()) && (str[i + 1] == '.');
const bool is_one_equals = (c == '1') && ((i + 1) < str.size()) && (str[i + 1] == '=');
const bool prev_is_digit = i && uisdigit(str[i - 1]);
const bool next_is_plus = ((i + 1) < str.size()) && (str[i + 1] == '+');
//const bool has_prev = (i != 0);
//const bool has_next = (i + 1) < str.size();
if (c == ' ')
{
if (cur_token.size())
{
tokens.push_back(cur_token);
cur_token.clear();
}
inside_space = true;
}
else if (is_one_equals)
{
if (cur_token.size())
{
tokens.push_back(cur_token);
cur_token.clear();
}
tokens.push_back("1=");
i++;
inside_space = false;
}
else if (
(c == ';') || ((c >= 0x18) && (c <= 0x1b)) || (c == '<') || (c == '>') ||
(c == '=') ||
(c == '/') ||
(c == ',') ||
(c == '?') || (c == '!') ||
((!prev_is_digit || next_is_plus) && (c == '+')) ||
(c == '@') || (c == '-') ||
is_two_dots
)
{
if (cur_token.size())
{
tokens.push_back(cur_token);
cur_token.clear();
}
std::string s;
s.push_back(c);
if (is_two_dots)
{
s += ".";
i++;
}
tokens.push_back(s);
inside_space = false;
}
else
{
cur_token.push_back(c);
inside_space = false;
if ((c == 0xf8) || // code page 437 degree sym
(prev_is_digit && (c == '+') && !next_is_plus))
{
tokens.push_back(cur_token);
cur_token.clear();
}
}
prev_c = c;
}
if (cur_token.size())
tokens.push_back(cur_token);
// Phase 2: Exceptional fixups that change or split tokens up into multiple tokens.
string_vec new_tokens;
for (uint32_t i = 0; i < tokens.size(); i++)
{
std::string tok(tokens[i]);
// Convert "BBK#"
if (string_begins_with(tok, "BBK#") && (tok.size() > 4))
{
new_tokens.push_back("Project Bluebook Case #");
tok.erase(0, 4);
new_tokens.push_back(tok);
continue;
}
// Split "k'alt"
if (string_ends_in(tok, "k'alt"))
{
tok.erase(tok.size() - 3, 3);
new_tokens.push_back(tok);
new_tokens.push_back("Alt");
continue;
}
// Convert "HI+LO"
if ((i + 2 < tokens.size()) && (tokens[i] == "HI") && (tokens[i + 1] == "+") && (tokens[i + 2] == "LO"))
{
tokens.push_back("high and low");
i += 2;
continue;
}
// Don't split "4rth" to "4 rth" etc.
if ((string_icompare(tok, "4RTH") == 0) || (string_icompare(tok, "3rds") == 0) || (string_icompare(tok, "16th") == 0))
{
new_tokens.push_back(tok);
continue;
}
if (string_ends_in(tok, "Kmph"))
{
new_tokens.push_back(tok);
continue;
}
if (tok == "12Ocm")
{
new_tokens.push_back("120cm");
continue;
}
if (string_icompare(tok, "3OOM") == 0)
{
new_tokens.push_back("300m");
continue;
}
// If the first char isn't a digit then just continue now, because the rest of this code is concerned with splitting numbers away from words.
if (!isdigit(tok[0]))
{
new_tokens.push_back(tok);
continue;
}
if (tok.size() >= 3)
{
// Check for 1-7 digits then ' followed by 1- letters and split
uint32_t j;
for (j = 1; j < tok.size(); j++)
if (tok[j] == '\'')
break;
if ((j < tok.size()) && (j != tok.size() - 1) && (j <= 7))
{
uint32_t k;
for (k = 1; k < j; k++)
if (!uisdigit(tok[k]) && (utolower(tok[k]) != 'x') && (utolower(tok[k]) != 'k') && (tok[k] != '.'))
break;
if ((k == j) && (uisalpha(tok[j + 1])))
{
int sp = j + 1;
std::string new_tok(tok);
new_tok.erase(0, sp);
std::string n(tok);
n.erase(sp, n.size() - sp);
new_tokens.push_back(n);
new_tokens.push_back(new_tok);
continue;
}
}
}
// Won't split digits away for tokens < 4 chars
if ((tok.size() < 4) || (tok == "6F6s"))
{
new_tokens.push_back(tok);
continue;
}
// Check for 1-2 digits and alpha and split
// TODO: support 3-4 digits
int split_point = -1;
if (uisalpha(tok[1]))
split_point = 1;
else if (uisdigit(tok[1]) && uisalpha(tok[2]) && uisalpha(tok[3]))
split_point = 2;
if (split_point > 0)
{
std::string new_tok(tok);
new_tok.erase(0, split_point);
// Don't split the number digits from some special cases, like hr, cm, mph, etc.
if ((string_icompare(new_tok, "hr") != 0) &&
(string_icompare(new_tok, "nd") != 0) &&
(string_icompare(new_tok, "kw") != 0) &&
(string_icompare(new_tok, "cm") != 0) &&
(string_icompare(new_tok, "km") != 0) &&
(string_icompare(new_tok, "mph") != 0) &&
(string_icompare(new_tok, "kph") != 0) &&
(!string_begins_with(new_tok, "K'")))
{
std::string n(tok);
n.erase(split_point, n.size() - split_point);
new_tokens.push_back(n);
if (new_tok == "min")
new_tok = "minute(s)";
new_tokens.push_back(new_tok);
}
else
{
new_tokens.push_back(tok);
}
}
else
{
new_tokens.push_back(tok);
}
}
tokens.swap(new_tokens);
std::vector<token> toks;
// Phase 3: Compose new string, expanding abbreviations and tokens to one or more words, or combining together special sequences of tokens into specific phrases.
// Also try to carefully insert spaces into the output, as needed.
for (uint32_t i = 0; i < tokens.size(); i++)
{
const uint32_t num_tokens_left = ((uint32_t)tokens.size() - 1) - i;
const bool has_prev_token = i > 0, has_next_token = (i + 1) < tokens.size();
const bool next_token_is_slash = (has_next_token) && (tokens[i + 1][0] == '/');
bool is_next_dir = false;
if (has_next_token)
{
uint32_t ofs = 1;
if (tokens[i + 1] == ">")
{
ofs = 2;
}
if ((i + ofs) < tokens.size())
{
std::string next_tok = string_upper(tokens[i + ofs]);
if ((next_tok.back() == '.') && (next_tok.size() >= 2))
next_tok.pop_back();
if ((next_tok == "N") || (next_tok == "S") || (next_tok == "E") || (next_tok == "W") ||
(next_tok == "SW") || (next_tok == "SE") || (next_tok == "NW") || (next_tok == "NE") ||
(next_tok == "NNE") || (next_tok == "NNW") || (next_tok == "SSE") || (next_tok == "SSW") ||
(next_tok == "ESE"))
{
is_next_dir = true;
}
}
}
std::string orig_token(tokens[i]);
std::string new_token(orig_token);
if (!orig_token.size())
continue;
// Handle various exceptions before expending abbreviations
// TODO: Refactor to table(s)
// Special handling for RUSS/RUSS.
if ((tokens[i] == "RUSS") || (tokens[i] == "RUSS.") || (tokens[i] == "RUS") || (tokens[i] == "RUS."))
{
if (first_line)
new_token = "Russia";
else
new_token = "Russian";
}
// AA FLITE #519 - exception
// AA LINER
else if ((tokens[i] == "AA") && (num_tokens_left >= 1) && ((tokens[i + 1] == "FLITE#519") || (tokens[i + 1] == "LINER")))
{
new_token = "AA";
}
// bright Lt.
else if ((tokens[i] == "VBRITE") && (num_tokens_left >= 1) && (tokens[i + 1] == "LT"))
{
new_token = "vibrant bright light";
i++;
}
// ENERGY SRC
else if ((tokens[i] == "ENERGY") && (num_tokens_left >= 1) && (tokens[i + 1] == "SRC"))
{
new_token = "energy source";
i++;
}
// mid air - exception
else if ((tokens[i] == "MID") && (num_tokens_left >= 1) && (tokens[i + 1] == "AIR"))
{
new_token = "mid";
}
// /FORMN or /formation - exception
else if ((string_icompare(tokens[i], "/") == 0) && (num_tokens_left >= 1) && ((string_icompare(tokens[i + 1], "FORMN") == 0) || (string_icompare(tokens[i + 1], "formation") == 0)))
{
new_token = "in formation";
i++;
}
// /FORMNs - exception
else if ((string_icompare(tokens[i], "/") == 0) && (num_tokens_left >= 1) && ((string_icompare(tokens[i + 1], "FORMNs") == 0) || (string_icompare(tokens[i + 1], "formations") == 0)))
{
new_token = "in formations";
i++;
}
// LOST/CLOUDS - exception
else if ((string_icompare(tokens[i], "LOST") == 0) && (num_tokens_left >= 2) && (tokens[i + 1] == "/") && (string_icompare(tokens[i + 2], "CLOUDS") == 0))
{
new_token = "lost in clouds";
i += 2;
}
// LOST/DISTANCE - exception
else if ((string_icompare(tokens[i], "LOST") == 0) && (num_tokens_left >= 2) && (tokens[i + 1] == "/") && (string_icompare(tokens[i + 2], "DISTANCE") == 0))
{
new_token = "lost in the distance";
i += 2;
}
// W-carbide - exception
else if ((string_icompare(tokens[i], "W") == 0) && (num_tokens_left >= 2) && (tokens[i + 1] == "-") && (string_icompare(tokens[i + 2], "carbide") == 0))
{
new_token = "W";
}
// S-SHAPE - exception
else if ((tokens[i] == "S") && (num_tokens_left >= 2) && (tokens[i + 1] == "-") && (tokens[i + 2] == "SHAPE"))
{
new_token = "S";
}
// mid-sky - exception
else if ((tokens[i] == "MID") && (num_tokens_left >= 2) && (tokens[i + 1] == "-") && (tokens[i + 2] == "SKY"))
{
new_token = "mid";
}
// mid-flite - exception
else if ((tokens[i] == "MID") && (num_tokens_left >= 2) && (tokens[i + 1] == "-") && (tokens[i + 2] == "FLITE"))
{
new_token = "mid";
}
// mid-city - exception
else if ((tokens[i] == "MID") && (num_tokens_left >= 2) && (tokens[i + 1] == "-") && (tokens[i + 2] == "CITY"))
{
new_token = "mid";
}
// W vee - exception
else if ((tokens[i] == "W") && (num_tokens_left >= 1) && (tokens[i + 1] == "VEE"))
{
new_token = "with vee";
i++;
}
// Lake Mi - exception
else if ((tokens[i] == "LAKE") && (num_tokens_left >= 1) && (tokens[i + 1] == "Mi"))
{
new_token = "Lake Michigan";
i++;
}
// SCI-FI
else if ((tokens[i] == "SCI") && (num_tokens_left >= 2) && (tokens[i + 1] == "-") && (tokens[i + 2] == "FI"))
{
new_token = "Sci-Fi";
i += 2;
}
// V-tall
else if ((tokens[i] == "V") && (num_tokens_left >= 2) && (tokens[i + 1] == "-") && (tokens[i + 2] == "TALL"))
{
new_token = "very tall";
i += 2;
}
// 1 OBS/1 OBS. at beginning
else if ((i == 1) && (tokens[0] == "1") && (tokens[1] == "OBS" || tokens[1] == "OBS."))
{
new_token = "observer";
}
// CLR WEATHER exception
else if ((num_tokens_left >= 1) && (tokens[i] == "CLR") && (tokens[i + 1] == "WEATHER"))
{
new_token = "clear";
}
// WATER DOMES exception (typo fix)
else if ((num_tokens_left >= 1) && (string_icompare(tokens[i], "WATER") == 0) && (string_icompare(tokens[i + 1], "DOMES") == 0))
{
new_token = "water comes";
i++;
}
// W dome exception
else if ((num_tokens_left >= 1) && (string_icompare(tokens[i], "W") == 0) && (string_icompare(tokens[i + 1], "DOME") == 0))
{
new_token = "with";
}
// CLR SKY exception
else if ((num_tokens_left >= 1) && (string_icompare(tokens[i], "CLR") == 0) && (string_icompare(tokens[i + 1], "SKY") == 0))
{
new_token = "clear";
}
// CLR DOME exception
else if ((num_tokens_left >= 1) && (string_icompare(tokens[i], "CLR") == 0) && (string_icompare(tokens[i + 1], "DOME") == 0))
{
new_token = "clear";
}
// CLR DOMED exception
else if ((num_tokens_left >= 2) && (string_icompare(tokens[i], "CLR") == 0) && (tokens[i + 1] == "-") && (string_icompare(tokens[i + 2], "DOMED") == 0))
{
new_token = "clear";
}
// CLR DOME exception
else if ((num_tokens_left >= 2) && (string_icompare(tokens[i], "CLR") == 0) && (tokens[i + 1] == "-") && (string_icompare(tokens[i + 2], "DOME") == 0))
{
new_token = "clear";
}
// CLR RDR exception
else if ((num_tokens_left >= 1) && (string_icompare(tokens[i], "CLR") == 0) && (string_icompare(tokens[i + 1], "RDR") == 0))
{
new_token = "clear";
}
// CLR CLOCKPIT exception
else if ((num_tokens_left >= 1) && (string_icompare(tokens[i], "CLR") == 0) && (string_icompare(tokens[i + 1], "COCKPIT") == 0))
{
new_token = "clear";
}
// CLR TORUS exception
else if ((num_tokens_left >= 1) && (string_icompare(tokens[i], "CLR") == 0) && (string_icompare(tokens[i + 1], "TORUS") == 0))
{
new_token = "clear";
}
// CLR DAY exception
else if ((num_tokens_left >= 1) && (string_icompare(tokens[i], "CLR") == 0) && (string_icompare(tokens[i + 1], "DAY") == 0))
{
new_token = "clear";
}
// CLR PLASTIC exception
else if ((num_tokens_left >= 1) && (string_icompare(tokens[i], "CLR") == 0) && (string_icompare(tokens[i + 1], "PLASTIC") == 0))
{
new_token = "clear";
}
// CLR FOTOS exception (a guess, need to verify)
else if ((num_tokens_left >= 1) && (string_icompare(tokens[i], "CLR") == 0) && (string_icompare(tokens[i + 1], "FOTOS") == 0))
{
new_token = "clear";
}
// CLR FOTO exception (a guess, need to verify)
else if ((num_tokens_left >= 1) && (string_icompare(tokens[i], "CLR") == 0) && (string_icompare(tokens[i + 1], "FOTO") == 0))
{
new_token = "clear";
}
// CLR SHOT exception (a guess, need to verify)
else if ((num_tokens_left >= 1) && (string_icompare(tokens[i], "CLR") == 0) && (string_icompare(tokens[i + 1], "SHOT") == 0))
{
new_token = "clear";
}
// CLR BLUE exception
else if ((num_tokens_left >= 1) && (string_icompare(tokens[i], "CLR") == 0) && (string_icompare(tokens[i + 1], "BLUE") == 0))
{
new_token = "clear";
}
// CLR BUBBLE exception
else if ((num_tokens_left >= 1) && (string_icompare(tokens[i], "CLR") == 0) && (string_icompare(tokens[i + 1], "BUBBLE") == 0))
{
new_token = "clear";
}
// CLR BUBBLES exception
else if ((num_tokens_left >= 1) && (string_icompare(tokens[i], "CLR") == 0) && (string_icompare(tokens[i + 1], "BUBBLES") == 0))
{
new_token = "clear";
}
// S+Cu exception
else if ((num_tokens_left >= 2) && (tokens[i] == "S") && (tokens[i + 1] == "+") && (tokens[i + 2] == "Cu"))
{
new_token = "S";
}
// IND OBS exception
else if ((num_tokens_left >= 1) && (tokens[i] == "IND") && (tokens[i + 1] == "OBS"))
{
new_token = "independent";
}
// L<>R
else if ((num_tokens_left >= 3) && (tokens[i] == "L") && (tokens[i + 1] == "<") && (tokens[i + 2] == ">") && (tokens[i + 3] == "R"))
{
new_token = "left and right";
i += 3;
}
// <+>
else if ((num_tokens_left >= 2) && (tokens[i] == "<") && (tokens[i + 1] == "+") && (tokens[i + 2] == ">"))
{
new_token = "left and right";
i += 2;
}
else if (orig_token == "NFD")
{
if ((!has_next_token) || next_token_is_slash)
new_token = "No further details";
else
new_token = "No further details [in]";
}
// Up and down arrows
else if ((orig_token[0] == 0x18) &&
((i + 1) < tokens.size()) && (tokens[i + 1][0] == '+') &&
((i + 2) < tokens.size()) && (tokens[i + 2][0] == 0x19))
{
const uint32_t at_end = ((i + 3) == tokens.size()) || (tokens[i + 3][0] == '/');
new_token = !at_end ? "going up and down [to]" : "going up and down";
i += 2;
}
// "V BRITE"
else if ((orig_token == "V") && ((i + 1) < tokens.size()) && (tokens[i + 1] == "BRITE"))
{
new_token = "very bright";
i++;
}
// ++
else if ((orig_token == "+") && ((i + 1) < tokens.size()) && (tokens[i + 1] == "+"))
{
new_token = "and more/others";
i++;
}
// >>
else if ((orig_token == ">") && ((i + 1) < tokens.size()) && (tokens[i + 1] == ">"))
{
const uint32_t at_end = ((i + 2) == tokens.size()) || (tokens[i + 2][0] == '/');
new_token = (!at_end && !is_next_dir) ? "going quickly [to]" : "going quickly";
i++;
}
// ><
else if ((orig_token == ">") && ((i + 1) < tokens.size()) && (tokens[i + 1] == "<"))
{
new_token = "to/from";
i++;
}
// <>
else if ((orig_token == "<") && ((i + 1) < tokens.size()) && (tokens[i + 1] == ">"))
{
// Larry said "between" but that sounds awkward and would require reordering tokens.
new_token = "to/from/between";
i++;
}
// >
else if (orig_token == ">")
{
new_token = (has_next_token && !next_token_is_slash && !is_next_dir) ? "going [to]" : "going";
}
// Tree up arrows
else if ((orig_token[0] == 0x18) && (num_tokens_left >= 2) && (tokens[i + 1][0] == 0x18) && (tokens[i + 2][0] == 0x18))
{
const uint32_t at_end = ((i + 3) == tokens.size()) || (tokens[i + 3][0] == '/');
new_token = !at_end ? "extremely quickly going up [to]" : "extremely quickly going up";
i += 2;
}
// Two up arrows
else if ((orig_token[0] == 0x18) && ((i + 1) < tokens.size()) && (tokens[i + 1][0] == 0x18))
{
const uint32_t at_end = ((i + 2) == tokens.size()) || (tokens[i + 2][0] == '/');
new_token = !at_end ? "quickly going up [to]" : "quickly going up";
i++;
}
// Up arrow
else if (orig_token[0] == 0x18)
{
new_token = (has_next_token && !next_token_is_slash) ? "going up [to]" : "going up";
}
// Two down arrows
else if ((orig_token[0] == 0x19) && ((i + 1) < tokens.size()) && (tokens[i + 1][0] == 0x19))
{
const uint32_t at_end = ((i + 2) == tokens.size()) || (tokens[i + 2][0] == '/');
new_token = !at_end ? "quickly going down [to]" : "quickly going down";
i++;
}
// Down arrow
else if (orig_token[0] == 0x19)
{
new_token = (has_next_token && !next_token_is_slash) ? "going down [to]" : "going down";
}
// Two right arrows
else if ((orig_token[0] == 0x1A) && ((i + 1) < tokens.size()) && (tokens[i + 1][0] == 0x1A))
{
const uint32_t at_end = ((i + 2) == tokens.size()) || (tokens[i + 2][0] == '/');
new_token = !at_end ? "quickly going right [to]" : "quickly going right";
i++;
}
// Right arrow
else if (orig_token[0] == 0x1A)
{
new_token = (has_next_token && !next_token_is_slash) ? "going right [to]" : "going right";
}
// Two left arrows
else if ((orig_token[0] == 0x1B) && ((i + 1) < tokens.size()) && (tokens[i + 1][0] == 0x1B))
{
const uint32_t at_end = ((i + 2) == tokens.size()) || (tokens[i + 2][0] == '/');
new_token = !at_end ? "quickly going left [to]" : "quickly going left";
i++;
}
// Left arrow
else if (orig_token[0] == 0x1B)
{
new_token = (has_next_token && !next_token_is_slash) ? "going left [to]" : "going left";
}
// /
else if (orig_token[0] == '/')
{
new_token = "/";
}
// +
else if (orig_token[0] == '+')
{
if (!i)
new_token = "also";
else if ((i != (tokens.size() - 1)) && (tokens[i + 1][0] != '/'))
new_token = "and";
else
new_token = "and more";
}
// @
else if (orig_token[0] == '@')
{
new_token = "at";
}
// dbl-word
else if ((string_icompare(orig_token, "dbl") == 0) && ((i + 1) < tokens.size()) && (tokens[i + 1] == "-"))
{
new_token = "double";
}
// GLOW-word
else if ((string_icompare(orig_token, "GLOW") == 0) && ((i + 1) < tokens.size()) && (tokens[i + 1] == "-"))
{
new_token = "glowing";
}
// A-test
else if ((orig_token == "A") && ((i + 1) < tokens.size()) && (tokens[i + 1] == "-") &&
((i + 2) < tokens.size()) && (string_icompare(tokens[i + 2], "TEST") == 0))
{
new_token = "atomic test";
i += 2;
}
// A-plant
else if ((orig_token == "A") && ((i + 1) < tokens.size()) && (tokens[i + 1] == "-") &&
((i + 2) < tokens.size()) && (string_icompare(tokens[i + 2], "PLANT") == 0))
{
new_token = "atomic plant";
i += 2;
}
// V-form
else if ((orig_token == "V") && ((i + 1) < tokens.size()) && (tokens[i + 1] == "-") &&
((i + 2) < tokens.size()) && (string_icompare(tokens[i + 2], "FORM") == 0))
{
new_token = "V-formation";
i += 2;
}
// 1/2 (to fix spacing issues)
else if ((orig_token == "1") && ((i + 1) < tokens.size()) && (tokens[i + 1] == "/") &&
((i + 2) < tokens.size()) && (tokens[i + 2] == "2"))
{
new_token = "1/2";
i += 2;
}
// "W/O"
else if ((i) &&
(string_icompare(orig_token, "W") == 0) &&
((i + 1) < tokens.size()) && (tokens[i + 1] == "/") &&
((i + 2) < tokens.size()) && (string_icompare(tokens[i + 2], "O") == 0))
{
new_token = "without";
i += 2;
}
// "S/L"
else if ((orig_token == "S") &&
((i + 1) < tokens.size()) && (tokens[i + 1] == "/") &&
((i + 2) < tokens.size()) && (tokens[i + 2] == "L"))
{
// No idea what this means yet.
new_token = "straight and level";
i += 2;
}
// "FOO-FIGHTERS"
else if ((orig_token == "FOO") &&
((i + 1) < tokens.size()) && (tokens[i + 1] == "-") &&
((i + 2) < tokens.size()) && (tokens[i + 2] == "FIGHTERS"))
{
// Just don't let the abbreviator kick in. Thanks Larry.
}
// "W/word"
else if ((i) &&
((orig_token == "W") || (orig_token == "w")) &&
((i + 1) < tokens.size()) && (tokens[i + 1] == "/") &&
(tokens[i - 1] != ">") &&
(tokens[i - 1] != "<"))
{
new_token = "with";
i++;
}
// "1="
else if (orig_token == "1=")
{
new_token = "one is [a]";
}
// Exception for "ORG RPT".
else if ((orig_token == "ORG") && has_next_token && (tokens[i + 1] == "RPT"))
{
new_token = "original";
}
// TODO: check for line 1 and don't expand these states
// Exception for ,MT (the state) - don't change to "Mt."
else if (first_line && orig_token == "MI" && has_prev_token && tokens[i - 1] == ",")
{
}
// Exception for ,MT (the state) - don't change to "Mt."
else if (first_line && orig_token == "MT" && has_prev_token && tokens[i - 1] == ",")
{
}
// Exception for ,NE (the state) - don't change to "northeast"
else if (first_line && orig_token == "NE" && has_prev_token && tokens[i - 1] == ",")
{
}
// Exception for ,MS (the state) - don't change to "northeast"
else if (first_line && orig_token == "MS" && has_prev_token && tokens[i - 1] == ",")
{
}
// Exception for ,AL (the state) - don't change to "northeast"
else if (first_line && orig_token == "AL" && has_prev_token && tokens[i - 1] == ",")
{
}
else
{
expand_abbreviations(first_line, orig_token, tokens, i, toks);
continue;
}
if (new_token.size())
toks.push_back(token(new_token, !first_line && (new_token == tokens[i]), false));
}
// Phase 4: Compose the final string, converting tokens to lower/uppercase and inserting spaces as needed.
std::string new_str;
bool in_quote = false;
for (uint32_t i = 0; i < toks.size(); i++)
{
std::string new_token(toks[i].m_token);
if (!new_token.size())
continue;
if (!first_line)
new_token = fix_capitilization(toks, i);
// Add a space if the previous string is not empty - excluding special cases where a space isn't necessary.
if (new_str.size() &&
(new_token != "..") &&
(new_token != ",") &&
(new_token != "!") && (new_token != "?") &&
(new_token != "+") &&
(!((new_token == ")") && (new_str.back() == '?'))) &&
(new_token != ";") && (new_str.back() != ';') &&
(new_token != "-") && (new_str.back() != '-') &&
(new_str.back() != '#') &&
(new_str.back() != '+') &&
(!(in_quote && (new_token == "\"") && new_str.size() && is_sentence_ender(new_str.back())))
)
{
new_str.push_back(' ');
//new_str.push_back('*');
}
// Append the token string to the output string
new_str += new_token;
for (uint8_t c : new_token)
if (c == '\"')
in_quote = !in_quote;
}
return new_str;
}
static void decode_hatch_desc(const udb_rec* pRec, std::string& db_str, std::string& loc_str, std::string& desc_str)
{
for (uint32_t i = 0; i < UDB_REC_TEXT_SIZE; i++)
{
if (pRec->get_text()[i] == 0)
break;
db_str.push_back(pRec->get_text()[i]);
}
std::string orig_desc(db_str);
string_vec desc;
for (; ; )
{
size_t pos = orig_desc.find_first_of(':');
if (pos == std::string::npos)
{
desc.push_back(string_trim(orig_desc));
break;
}
else
{
std::string s(orig_desc);
s.erase(pos, s.size() - pos);
desc.push_back(string_trim(s));
orig_desc.erase(0, pos + 1);
}
}
for (uint32_t i = 0; i < desc.size(); i++)
{
std::string str(decode_hatch(desc[i], !i));
if (!str.size())
continue;
if (desc_str.size())
{
if (desc_str.back() != '.' && desc_str.back() != '!' && desc_str.back() != '?')
desc_str += ".";
desc_str += " ";
}
if (!i)
{
loc_str = string_upper(str);
}
else
{
if (uislower(str[0]))
str[0] = utoupper(str[0]);
else if ((str[0] == '\"') && (str.size() >= 2) && (uislower(str[1])))
str[1] = utoupper(str[1]);
else if ((str[0] == '\'') && (str.size() >= 2) && (uislower(str[1])))
str[1] = utoupper(str[1]);
else if ((str[0] == '(') && (str.size() >= 2) && (uislower(str[1])))
str[1] = utoupper(str[1]);
desc_str += str;
}
}
if (desc_str.size() && desc_str.back() != '.' && desc_str.back() != '!' && desc_str.back() != '?')
{
if ((desc_str.back() == ')') && (!string_ends_in(desc_str, "(s)")))
{
desc_str.pop_back();
if (desc_str.back() == ' ')
desc_str.pop_back();
if (desc_str.size() && desc_str.back() != '.' && desc_str.back() != '!' && desc_str.back() != '?')
desc_str += ".";
desc_str += ")";
}
else
{
desc_str += ".";
}
}
db_str = dos_to_utf8(db_str);
loc_str = dos_to_utf8(loc_str);
desc_str = dos_to_utf8(desc_str);
}
template<typename T>
static void check_for_hatch_tab_dups(const T& tab)
{
std::unordered_set<int> ids;
for (const auto& x : tab)
if (!ids.insert(x.m_ref).second)
panic("Duplicate hatch ref table id");
}
static void init_dict()
{
string_vec dict;
uprintf("Reading dictionary\n");
bool utf8_flag = false;
if (!read_text_file("uppercase_dict.txt", dict, true, &utf8_flag))
panic("Failed reading uppercase_dict.txt");
for (auto str : dict)
{
string_trim(str);
if (str.size() && uisupper(str[0]))
{
g_dictionary.insert(std::make_pair(string_lower(str), str));
}
}
uprintf("Done reading dictionary, %u uppercase words\n", g_dictionary.size());
}
void udb_init()
{
assert(sizeof(udb_rec) == UDB_RECORD_SIZE);
check_for_hatch_tab_dups(g_hatch_refs);
check_for_hatch_tab_dups(g_hatch_refs_93);
check_for_hatch_tab_dups(g_hatch_refs_96);
check_for_hatch_tab_dups(g_hatch_refs_97);
check_for_hatch_tab_dups(g_hatch_refs_98);
for (uint32_t i = 0; i < std::size(g_hatch_refs); i++)
g_hatch_refs_tab[g_hatch_refs[i].m_ref] = g_hatch_refs[i].m_pDesc;
init_hatch_abbreviations_map();
init_hatch_cap_exception_tokens();
init_dict();
}
bool udb_dump()
{
uint8_vec udb;
if (!read_binary_file("u.rnd", udb))
return false;
const uint32_t TOTAL_RECS = 18123;
if ((udb.size() / UDB_RECORD_SIZE) < TOTAL_RECS)
panic("Invalid file size");
string_vec output;
const udb_rec* pRecs = reinterpret_cast<const udb_rec*>(&udb.front());
for (uint32_t rec_index = 1; rec_index < TOTAL_RECS; rec_index++)
//for (uint32_t rec_index = 18038; rec_index <= 18038; rec_index++)
{
const udb_rec* pRec = pRecs + rec_index;
std::string db_str, loc_str, desc_str;
decode_hatch_desc(pRec, db_str, loc_str, desc_str);
event_date ed;
pRec->get_date(ed);
std::string date_str(ed.get_string());
{
uprintf("\n----------%u: Date: %s, Strangeness: %u, Credibility: %u\n", rec_index, date_str.c_str(), pRec->get_strangeness(), pRec->get_credibility());
std::string time;
if (pRec->get_time(time))
uprintf("Time: %s\n", time.c_str());
if (pRec->get_duration())
uprintf("Duration: %u mins\n", pRec->get_duration());
if (pRec->get_elevation() != -99)
uprintf("Elevation: %im\n", pRec->get_elevation());
if ((pRec->get_rel_altitude() != 0) && (pRec->get_rel_altitude() != 999))
uprintf("Altitude: %im\n", pRec->get_rel_altitude());
uprintf("Location: %s\n", loc_str.c_str());
std::string country_name, state_or_prov_name;
pRec->get_geo(country_name, state_or_prov_name);
const uint32_t continent_code = pRec->get_continent_code();
uprintf("Country: %s, State/Province: %s (%s), Continent: %s\n", country_name.c_str(), state_or_prov_name.c_str(), pRec->get_state_or_prov().c_str(),
(continent_code < std::size(g_hatch_continents)) ? g_hatch_continents[continent_code] : "?");
uprintf("Latitude/Longitude: %f %f, %s %s\n", pRec->get_latitude(), pRec->get_longitude(), pRec->get_latitude_dms().c_str(), pRec->get_longitude_dms().c_str());
const uint32_t locale = pRec->get_locale();
if (locale < std::size(g_hatch_locales))
uprintf("Locale: %s\n", g_hatch_locales[locale]);
uprintf("UDB Desc: %s\n", db_str.c_str());
uprintf("Decoded Desc: %s\n", desc_str.c_str());
uint32_t total_flags = 0;
for (uint32_t f = 0; f < udb_rec::cMaxFlags; f++)
{
if (!f) // map
continue;
if (pRec->get_flag(f))
total_flags++;
}
if (total_flags)
{
uprintf("Flags: ");
uint32_t num_flags_printed = 0;
for (uint32_t f = 0; f < udb_rec::cMaxFlags; f++)
{
if (!f) // map
continue;
if (pRec->get_flag(f))
{
uprintf("%s", g_pHatch_flag_descs[f]);
num_flags_printed++;
if (num_flags_printed < total_flags)
{
uprintf(", ");
if ((num_flags_printed % 2) == 0)
uprintf("\n");
}
}
}
uprintf("\n");
}
uprintf("Ref: %s\n", pRec->get_full_refs().c_str());
}
output.push_back(string_format("Date: %s\nLocation: \"%s\"\nDescription: \"%s\"\n", date_str.c_str(), loc_str.c_str(), desc_str.c_str()));
}
string_vec toks;
for (const auto& str : g_unique_tokens)
toks.push_back(str);
write_text_file("unique_tokens.txt", toks, false);
write_text_file("output.txt", output, true);
return true;
}
static bool convert_rec(uint32_t rec_index, const udb_rec* pRec, timeline_event& event)
{
std::string db_str, loc_str, desc_str;
decode_hatch_desc(pRec, db_str, loc_str, desc_str);
pRec->get_date(event.m_begin_date);
if (event.m_begin_date.m_year <= 0)
return false;
std::string time;
if (pRec->get_time(time))
{
if (time != "00:00?")
event.m_time_str = time;
}
event.m_date_str = event.m_begin_date.get_string();
event.m_locations.push_back(loc_str);
event.m_desc = desc_str;
// TODO
event.m_type.push_back("sighting");
event.m_source_id = string_format("Hatch_UDB_%u", rec_index);
event.m_source = "Hatch";
for (uint32_t f = 0; f < udb_rec::cMaxFlags; f++)
if ((f != cFlagMAP) && (pRec->get_flag(f)))
event.m_attributes.push_back(g_pHatch_flag_descs[f]);
event.m_refs.push_back(pRec->get_full_refs());
event.m_key_value_data.push_back(std::make_pair("LocationLink", string_format("[Google Maps](https://www.google.com/maps/place/%f,%f)", pRec->get_latitude(), pRec->get_longitude())));
event.m_key_value_data.push_back(std::make_pair("LatLong", string_format("%f %f", pRec->get_latitude(), pRec->get_longitude())));
event.m_key_value_data.push_back(std::make_pair("LatLongDMS", string_format("%s %s", pRec->get_latitude_dms().c_str(), pRec->get_longitude_dms().c_str())));
event.m_key_value_data.push_back(std::make_pair("HatchDesc", db_str));
event.m_key_value_data.push_back(std::make_pair("Duration", string_format("%u", pRec->get_duration())));
std::string country_name, state_or_prov_name;
pRec->get_geo(country_name, state_or_prov_name);
event.m_key_value_data.push_back(std::make_pair("Country", country_name));
event.m_key_value_data.push_back(std::make_pair("State/Prov", state_or_prov_name));
event.m_key_value_data.push_back(std::make_pair("Strangeness", string_format("%u", pRec->get_strangeness())));
event.m_key_value_data.push_back(std::make_pair("Credibility", string_format("%u", pRec->get_credibility())));
const uint32_t locale = pRec->get_locale();
if (locale < std::size(g_hatch_locales))
event.m_key_value_data.push_back(std::make_pair("Locale", g_hatch_locales[locale]));
if (pRec->get_elevation() != -99)
event.m_key_value_data.push_back(std::make_pair("Elev", string_format("%i", pRec->get_elevation())));
if ((pRec->get_rel_altitude() != 0) && (pRec->get_rel_altitude() != 999))
event.m_key_value_data.push_back(std::make_pair("RelAlt", string_format("%i", pRec->get_rel_altitude())));
return true;
}
bool udb_convert()
{
uint8_vec udb;
if (!read_binary_file("u.rnd", udb))
return false;
const uint32_t TOTAL_RECS = 18123;
if ((udb.size() / UDB_RECORD_SIZE) < TOTAL_RECS)
panic("Invalid file size");
const udb_rec* pRecs = reinterpret_cast<const udb_rec*>(&udb.front());
ufo_timeline timeline;
for (uint32_t rec_index = 1; rec_index < TOTAL_RECS; rec_index++)
{
const udb_rec* pRec = pRecs + rec_index;
timeline_event event;
if (!convert_rec(rec_index, pRec, event))
continue;
timeline.get_events().push_back(event);
}
if (!timeline.get_events().size())
panic("Empty timeline)");
timeline.set_name("Hatch_UDB_Timeline");
return timeline.write_file("hatch_udb.json", true);
}