// Copyright (C) 2023 Richard Geldreich, Jr. #include "udb.h" #include "udb_tables.h" const uint32_t UDB_RECORD_SIZE = 112; const uint32_t UDB_REC_TEXT_SIZE = 78; enum { cFlagMAP, cFlagGND, cFlagCST, cFlagSEA, cFlagAIR, cFlagObsMIL, cFlagObsCIV, cFlagHQO, // loc/obs flags cFlagSCI, cFlagTLP, cFlagNWS, cFlagMID, cFlagHOX, cFlagCNT, cFlagODD, cFlagWAV, // misc flags cFlagSCR, cFlagCIG, cFlagDLT, cFlagNLT, cFlagPRB, cFlagFBL, cFlagSUB, cFlagNFO, // type of ufo craft flags cFlagOID, cFlagRBT, cFlagPSH, cFlagMIB, cFlagMON, cFlagGNT, cFlagFIG, cFlagNOC, // aliens monsters flags cFlagOBS, cFlagRAY, cFlagSMP, cFlagMST, cFlagABD, cFlagOPR, cFlagSIG, cFlagCVS, // apparent ufo occupant activities flags cFlagNUC, cFlagDRT, cFlagVEG, cFlagANI, cFlagHUM, cFlagVEH, cFlagBLD, cFlagLND, // places visited and things affected flags cFlagPHT, cFlagRDR, cFlagRDA, cFlagEME, cFlagTRC, cFlagTCH, cFlagHST, cFlagINJ, // evidence and special effects flags cFlagMIL, cFlagBBK, cFlagGSA, cFlagOGA, cFlagSND, cFlagODR, cFlagCOV, cFlagCMF, // misc details flags cTotalFlags = 64 }; static std::string get_deg_to_dms(double deg) { deg = std::round(fabs(deg) * 3600.0f); int min_secs = (int)fmod(deg, 3600.0f); deg = std::floor((deg - (double)min_secs) / 3600.0f); int minutes = min_secs / 60; int secs = min_secs % 60; return string_format("%02i%:%02i:%02i", (int)deg, minutes, secs); } #pragma pack(push, 1) struct udb_rec { private: int16_t m_year; uint8_t m_unknown_and_locale; // nibbles uint8_t m_unknown_and_month; // nibbles uint8_t m_ref_index_high_day; // 3 bits ref index high, low 5 bits day uint8_t m_time; uint8_t m_ymdt; // 2-bit fields: TDMY accuracy, T lowest, 0=invalid, 1=?, 2=~, 3=accurate uint8_t m_duration; uint8_t m_unknown1; int16_t m_enc_longtitude; int16_t m_enc_latitude; int16_t m_elevation; int16_t m_rel_altitude; uint8_t m_unknown2; uint8_t m_continent_country; // nibbles uint8_t m_state_or_prov[3]; uint8_t m_unknown3; #if 0 uint8_t m_loc_flags; uint8_t m_misc_flags; uint8_t m_type_of_ufo_craft_flags; uint8_t m_aliens_monsters_flags; uint8_t m_apparent_ufo_occupant_activities_flags; uint8_t m_places_visited_and_things_affected_flags; uint8_t m_evidence_and_special_effects_flags; uint8_t m_miscellaneous_details_flags; #else uint8_t m_flags[8]; #endif uint8_t m_text[UDB_REC_TEXT_SIZE]; uint8_t m_reference; uint8_t m_ref_index; uint8_t m_strangeness_credibility; // nibbles public: const uint8_t* get_text() const { return m_text; } int get_year() const { return m_year; } uint32_t get_month() const { return m_unknown_and_month & 0xF; } uint32_t get_day() const { return m_ref_index_high_day & 31; } // meters int get_elevation() const { return m_elevation; } int get_rel_altitude() const { return m_rel_altitude; } uint32_t get_strangeness() const { return m_strangeness_credibility >> 4; } uint32_t get_credibility() const { return m_strangeness_credibility & 0xF; } uint32_t get_reference() const { return m_reference; } uint32_t get_reference_index() const { return m_ref_index | ((m_ref_index_high_day >> 5) << 8); } uint32_t get_continent_code() const { return m_continent_country >> 4; } uint32_t get_country_code() const { return m_continent_country & 0xF; } uint32_t get_locale() const { return m_unknown_and_locale & 0xF; } std::string get_state_or_prov() const { const uint32_t c0 = m_state_or_prov[0]; const uint32_t c1 = m_state_or_prov[1]; const uint32_t c2 = m_state_or_prov[2]; return dos_to_utf8(string_format("%c%c%c", (c0 >= ' ') ? c0 : ' ', (c1 >= ' ') ? c1 : ' ', (c2 >= ' ') ? c2 : ' ')); } double get_latitude() const { return ((double)m_enc_latitude / 200.0f) * 1.11111111111f; } double get_longitude() const { return -((double)m_enc_longtitude / 200.0f) * 1.11111111111f; } std::string get_latitude_dms() const { double lat = get_latitude(); return get_deg_to_dms(lat) + ((lat <= 0) ? " S" : " N"); } std::string get_longitude_dms() const { double lon = get_longitude(); return get_deg_to_dms(lon) + ((lon <= 0) ? " W" : " E"); } // minutes uint32_t get_duration() const { return m_duration; } enum { cAccuracyInvalid = 0, cAccuracyQuestionable = 1, cAccuracyApproximate = 2, cAccuracyGood = 3 }; bool get_time(std::string& time) const { uint32_t time_accuracy = m_ymdt & 3; if (time_accuracy == cAccuracyInvalid) return false; uint32_t hour = m_time / 6; uint32_t minute = (m_time % 6) * 10; if (hour > 23) { assert(0); return false; } time = string_format("%02u:%02u", hour, minute); if (time_accuracy == cAccuracyQuestionable) time += "?"; else if (time_accuracy == cAccuracyApproximate) time = "~" + time; return true; } bool get_date(event_date& date) const { uint32_t year_accuracy = (m_ymdt >> 6) & 3; uint32_t month_accuracy = (m_ymdt >> 4) & 3; uint32_t day_accuracy = (m_ymdt >> 2) & 3; int year = year_accuracy ? get_year() : 0; uint32_t month = month_accuracy ? get_month() : 0; uint32_t day = day_accuracy ? get_day() : 0; if ((day < 1) || (day > 31)) { day = 0; day_accuracy = cAccuracyInvalid; } if ((month < 1) || (month > 12)) { month = 0; month_accuracy = cAccuracyInvalid; } if (!year) return false; uint32_t min_accuracy = year; date.m_year = year; if (month) { date.m_month = month; if (!day) { min_accuracy = std::min(year_accuracy, month_accuracy); } else { min_accuracy = std::min(std::min(year_accuracy, month_accuracy), day_accuracy); date.m_day = day; } } if (min_accuracy == cAccuracyApproximate) date.m_approx = true; else if (min_accuracy == cAccuracyQuestionable) date.m_fuzzy = true; return true; } enum { cMaxFlags = 64 }; // LOC, MISC, TYPE, ALIENS/MONSTERS, ACTIVITIES, VISITED/THINGS, EVIDENCE/SPECIAL, MISC_DETAILS bool get_flag(uint32_t index) const { assert(index < cMaxFlags); return (m_flags[index >> 3] & (1 << (index & 7))) != 0; } #if 0 uint8_t get_loc_flags() const { return m_loc_flags; } uint8_t get_misc_flags() const { return m_misc_flags; } uint8_t get_type_of_ufo_craft_flags() const { return m_type_of_ufo_craft_flags; } uint8_t get_aliens_monsters_flags() const { return m_aliens_monsters_flags; } uint8_t get_apparent_ufo_occupant_activities_flags() const { return m_apparent_ufo_occupant_activities_flags; } uint8_t get_places_visited_and_things_affected_flags() const { return m_places_visited_and_things_affected_flags; } uint8_t get_evidence_and_special_effects_flags() const { return m_evidence_and_special_effects_flags; } uint8_t get_miscellaneous_details_flags() const { return m_miscellaneous_details_flags; } #endif void get_geo(std::string& country_name, std::string& state_or_prov_name) const { std::string state_or_prov_str(get_state_or_prov()); string_trim_end(state_or_prov_str); if (state_or_prov_str.back() == '.') state_or_prov_str.pop_back(); if (state_or_prov_str.back() == '.') state_or_prov_str.pop_back(); get_hatch_geo(get_continent_code(), get_country_code(), state_or_prov_str, country_name, state_or_prov_name); if (state_or_prov_str == "UNK") state_or_prov_name = "Unknown"; } std::string get_full_refs() const { std::string ref(g_hatch_refs_tab[get_reference()]); if (g_hatch_refs_tab[get_reference()]) { uint32_t ref_index = get_reference_index(); if (get_reference() == 93) { for (const auto& x : g_hatch_refs_93) if (x.m_ref == ref_index) { ref += x.m_pDesc; break; } } else if (get_reference() == 96) { for (const auto& x : g_hatch_refs_96) if (x.m_ref == ref_index) { ref += x.m_pDesc; break; } } else if (get_reference() == 97) { for (const auto& x : g_hatch_refs_97) if (x.m_ref == ref_index) { ref += x.m_pDesc; break; } } else if (get_reference() == 98) { for (const auto& x : g_hatch_refs_98) if (x.m_ref == ref_index) { ref += x.m_pDesc; break; } } else { ref += string_format(" (Index %u)", ref_index); } } return ref; } }; #pragma pack(pop) static std::unordered_map g_dictionary; struct token { std::string m_token; bool m_cap_check; bool m_replaced_flag; token() : m_cap_check(false), m_replaced_flag(false) { } token(const std::string& token, bool cap_check, bool replaced_flag) : m_token(token), m_cap_check(cap_check), m_replaced_flag(replaced_flag) { } }; std::unordered_set g_unique_tokens; std::vector g_hatch_exception_tokens; static void init_hatch_cap_exception_tokens() { g_hatch_exception_tokens.resize(std::size(g_cap_exceptions)); std::string cur_etoken; for (uint32_t e = 0; e < std::size(g_cap_exceptions); e++) { const std::string exception_str(g_cap_exceptions[e]); string_vec& etokens = g_hatch_exception_tokens[e]; for (uint32_t i = 0; i < exception_str.size(); i++) { uint8_t c = exception_str[i]; if (c == ' ') { if (cur_etoken.size()) { etokens.push_back(cur_etoken); cur_etoken.clear(); } } else if (c == '-') { if (cur_etoken.size()) { etokens.push_back(cur_etoken); cur_etoken.clear(); } std::string s; s.push_back(c); etokens.push_back(s); } else { cur_etoken.push_back(c); } } if (cur_etoken.size()) { etokens.push_back(cur_etoken); cur_etoken.resize(0); } } } static std::string fix_capitilization(std::vector& toks, uint32_t& tok_index) { if (toks[tok_index].m_replaced_flag) return toks[tok_index].m_token; const uint32_t toks_remaining = (uint32_t)toks.size() - tok_index; // Peak ahead on the tokens to see if we need to correct any capitilization using the exception table. for (uint32_t e = 0; e < std::size(g_cap_exceptions); e++) { const string_vec& etokens = g_hatch_exception_tokens[e]; if (toks_remaining >= etokens.size()) { uint32_t i; for (i = 0; i < etokens.size(); i++) if ((string_icompare(etokens[i], toks[tok_index + i].m_token.c_str()) != 0) || toks[tok_index + i].m_replaced_flag) break; if (i == etokens.size()) { for (i = 0; i < etokens.size(); i++) { toks[tok_index + i].m_token = etokens[i]; toks[tok_index + i].m_replaced_flag = true; } std::string res(toks[tok_index].m_token); return res; } } } std::string str(toks[tok_index].m_token); if (!toks[tok_index].m_cap_check) return str; string_vec wtokens; std::string cur_wtoken; for (uint32_t i = 0; i < str.size(); i++) { uint8_t c = str[i]; if (isalpha(c) || isdigit(c) || ((c == '\'') && (i != 0) && (i != str.size() - 1))) { cur_wtoken.push_back(c); } else { if (cur_wtoken.size()) { wtokens.push_back(cur_wtoken); cur_wtoken.clear(); } std::string s; s.push_back(c); wtokens.push_back(s); } } if (cur_wtoken.size()) { wtokens.push_back(cur_wtoken); cur_wtoken.clear(); } for (uint32_t wtoken_index = 0; wtoken_index < wtokens.size(); wtoken_index++) { std::string& substr = wtokens[wtoken_index]; if (substr == "A") substr = "a"; else if (substr.size() >= 2) { bool is_all_uppercase = true; for (uint8_t c : substr) { if (!isupper(c) && (c != '\'')) { is_all_uppercase = false; break; } } if (is_all_uppercase) { auto res = g_dictionary.find(string_lower(substr)); if (res != g_dictionary.end()) { substr = res->second; } else { substr = string_lower(substr); g_unique_tokens.insert(substr); } } } } std::string res; for (uint32_t wtoken_index = 0; wtoken_index < wtokens.size(); wtoken_index++) res += wtokens[wtoken_index]; return res; } static std::unordered_map g_hatch_abbreviations_map; static void init_hatch_abbreviations_map() { for (uint32_t abbrev_index = 0; abbrev_index < std::size(g_hatch_abbreviations); abbrev_index++) { auto res = g_hatch_abbreviations_map.insert(std::make_pair(string_lower(g_hatch_abbreviations[abbrev_index].pAbbrev), g_hatch_abbreviations[abbrev_index])); if (!res.second) panic("Mutiple Hatch abbreviation: %s", res.first->first.c_str()); } } // Expand abbreviations static void expand_abbreviations_internal(bool first_line, std::string orig_token, const string_vec& tokens, uint32_t cur_tokens_index, std::vector& toks) { const uint32_t MAX_ABBREVS = 5; uint32_t k; for (k = 0; k < MAX_ABBREVS; k++) { std::string new_token(orig_token); auto find_res = g_hatch_abbreviations_map.find(string_lower(orig_token)); if (find_res != g_hatch_abbreviations_map.end()) { if (!first_line || !find_res->second.m_forbid_firstline) { new_token = find_res->second.pExpansion; if (new_token.size()) toks.push_back(token(new_token, !first_line && (new_token == orig_token), false)); break; } } if ((orig_token.size() >= 4) && (uisupper(orig_token[0]))) { std::string month_suffix(orig_token); month_suffix.erase(0, 3); if ((month_suffix.size() <= 4) && string_is_digits(month_suffix)) { std::string month_prefix(orig_token); month_prefix.erase(3, month_prefix.size() - 3); std::string search_prefix(string_upper(month_prefix)); static const char* g_hmonths[12] = { "JAN", "FEB", "MAR", "APR", "MAY", "JUN", "JLY", "AUG", "SEP", "OCT", "NOV", "DEC" }; uint32_t m; for (m = 0; m < 12; m++) if (search_prefix == g_hmonths[m]) break; if (m < 12) { toks.push_back(token(g_months[m], !first_line, false)); // TODO: This can be improved by checking the # before the token long long val = atoll(month_suffix.c_str()); if (val > 31) month_suffix = '\'' + month_suffix; toks.push_back(token(month_suffix, !first_line, false)); break; } } } size_t p; if ((p = orig_token.find_first_of('.')) == std::string::npos) { // No period(s) - we're done. if (new_token.size()) toks.push_back(token(new_token, !first_line, false)); break; } // Specifically detect abbrev. first names like "A." etc. and expand them. if (!first_line && (orig_token.size() > 4) && (p == 1) && uisupper(orig_token[0]) && uisupper(orig_token[2])) { std::string first_name(orig_token); first_name.erase(2, first_name.size() - 2); toks.push_back(token(first_name, false, false)); orig_token.erase(0, p + 1); } else { // Detect words starting with an abbreviation ending in "." std::string prefix(orig_token); prefix.erase(p + 1, prefix.size() - (p + 1)); find_res = g_hatch_abbreviations_map.find(string_lower(prefix)); if ((find_res != g_hatch_abbreviations_map.end()) && (!first_line || !find_res->second.m_forbid_firstline)) { new_token = find_res->second.pExpansion; toks.push_back(token(new_token, false, false)); orig_token.erase(0, p + 1); } else { if (new_token.size()) toks.push_back(token(new_token, !first_line, false)); break; } } } // k if (k == MAX_ABBREVS) { if (orig_token.size()) toks.push_back(token(orig_token, !first_line, false)); } } static bool is_sentence_ender(uint8_t c) { return (c == '!') || (c == '.') || (c == '?'); } static void expand_abbreviations(bool first_line, std::string orig_token, const string_vec& tokens, uint32_t cur_tokens_index, std::vector& toks) { std::string new_token(orig_token); // Temporarily remove " and ' prefix/suffix chars from the token, before the abbrev checks. std::string prefix_char, suffix_char; if (orig_token.size() >= 3) { if ((orig_token[0] == '\'') || (orig_token[0] == '\"')) { prefix_char.push_back(orig_token[0]); orig_token.erase(0, 1); new_token = orig_token; } if ((orig_token.back() == '\'') || (orig_token.back() == '\"')) { suffix_char.push_back(orig_token.back()); orig_token.pop_back(); new_token = orig_token; } } const size_t first_tok = toks.size(); expand_abbreviations_internal(first_line, orig_token, tokens, cur_tokens_index, toks); const size_t num_toks = toks.size() - first_tok; assert(num_toks); const size_t last_tok = first_tok + num_toks - 1; if (prefix_char.size()) toks[first_tok].m_token = prefix_char + toks[first_tok].m_token; if (suffix_char.size()) toks[last_tok].m_token = toks[last_tok].m_token + suffix_char; } static std::string decode_hatch(const std::string& str, bool first_line) { std::string res; string_vec tokens; std::string cur_token; bool inside_space = false; int prev_c = -1; // Phase 1: Tokenize the input string based off examination of (mostly) individual chars, previous chars and upcoming individual chars. for (uint32_t i = 0; i < str.size(); i++) { uint8_t c = str[i]; const bool is_two_dots = (c == '.') && ((i + 1) < str.size()) && (str[i + 1] == '.'); const bool is_one_equals = (c == '1') && ((i + 1) < str.size()) && (str[i + 1] == '='); const bool prev_is_digit = i && uisdigit(str[i - 1]); const bool next_is_plus = ((i + 1) < str.size()) && (str[i + 1] == '+'); //const bool has_prev = (i != 0); //const bool has_next = (i + 1) < str.size(); if (c == ' ') { if (cur_token.size()) { tokens.push_back(cur_token); cur_token.clear(); } inside_space = true; } else if (is_one_equals) { if (cur_token.size()) { tokens.push_back(cur_token); cur_token.clear(); } tokens.push_back("1="); i++; inside_space = false; } else if ( (c == ';') || ((c >= 0x18) && (c <= 0x1b)) || (c == '<') || (c == '>') || (c == '=') || (c == '/') || (c == ',') || (c == '?') || (c == '!') || ((!prev_is_digit || next_is_plus) && (c == '+')) || (c == '@') || (c == '-') || is_two_dots ) { if (cur_token.size()) { tokens.push_back(cur_token); cur_token.clear(); } std::string s; s.push_back(c); if (is_two_dots) { s += "."; i++; } tokens.push_back(s); inside_space = false; } else { cur_token.push_back(c); inside_space = false; if ((c == 0xf8) || // code page 437 degree sym (prev_is_digit && (c == '+') && !next_is_plus)) { tokens.push_back(cur_token); cur_token.clear(); } } prev_c = c; } if (cur_token.size()) tokens.push_back(cur_token); // Phase 2: Exceptional fixups that change or split tokens up into multiple tokens. string_vec new_tokens; for (uint32_t i = 0; i < tokens.size(); i++) { std::string tok(tokens[i]); // Convert "BBK#" if (string_begins_with(tok, "BBK#") && (tok.size() > 4)) { new_tokens.push_back("Project Bluebook Case #"); tok.erase(0, 4); new_tokens.push_back(tok); continue; } // Split "k'alt" if (string_ends_in(tok, "k'alt")) { tok.erase(tok.size() - 3, 3); new_tokens.push_back(tok); new_tokens.push_back("Alt"); continue; } // Convert "HI+LO" if ((i + 2 < tokens.size()) && (tokens[i] == "HI") && (tokens[i + 1] == "+") && (tokens[i + 2] == "LO")) { tokens.push_back("high and low"); i += 2; continue; } // Don't split "4rth" to "4 rth" etc. if ((string_icompare(tok, "4RTH") == 0) || (string_icompare(tok, "3rds") == 0) || (string_icompare(tok, "16th") == 0)) { new_tokens.push_back(tok); continue; } if (string_ends_in(tok, "Kmph")) { new_tokens.push_back(tok); continue; } if (tok == "12Ocm") { new_tokens.push_back("120cm"); continue; } if (string_icompare(tok, "3OOM") == 0) { new_tokens.push_back("300m"); continue; } // If the first char isn't a digit then just continue now, because the rest of this code is concerned with splitting numbers away from words. if (!isdigit(tok[0])) { new_tokens.push_back(tok); continue; } if (tok.size() >= 3) { // Check for 1-7 digits then ' followed by 1- letters and split uint32_t j; for (j = 1; j < tok.size(); j++) if (tok[j] == '\'') break; if ((j < tok.size()) && (j != tok.size() - 1) && (j <= 7)) { uint32_t k; for (k = 1; k < j; k++) if (!uisdigit(tok[k]) && (utolower(tok[k]) != 'x') && (utolower(tok[k]) != 'k') && (tok[k] != '.')) break; if ((k == j) && (uisalpha(tok[j + 1]))) { int sp = j + 1; std::string new_tok(tok); new_tok.erase(0, sp); std::string n(tok); n.erase(sp, n.size() - sp); new_tokens.push_back(n); new_tokens.push_back(new_tok); continue; } } } // Won't split digits away for tokens < 4 chars if ((tok.size() < 4) || (tok == "6F6s")) { new_tokens.push_back(tok); continue; } // Check for 1-2 digits and alpha and split // TODO: support 3-4 digits int split_point = -1; if (uisalpha(tok[1])) split_point = 1; else if (uisdigit(tok[1]) && uisalpha(tok[2]) && uisalpha(tok[3])) split_point = 2; if (split_point > 0) { std::string new_tok(tok); new_tok.erase(0, split_point); // Don't split the number digits from some special cases, like hr, cm, mph, etc. if ((string_icompare(new_tok, "hr") != 0) && (string_icompare(new_tok, "nd") != 0) && (string_icompare(new_tok, "kw") != 0) && (string_icompare(new_tok, "cm") != 0) && (string_icompare(new_tok, "km") != 0) && (string_icompare(new_tok, "mph") != 0) && (string_icompare(new_tok, "kph") != 0) && (!string_begins_with(new_tok, "K'"))) { std::string n(tok); n.erase(split_point, n.size() - split_point); new_tokens.push_back(n); if (new_tok == "min") new_tok = "minute(s)"; new_tokens.push_back(new_tok); } else { new_tokens.push_back(tok); } } else { new_tokens.push_back(tok); } } tokens.swap(new_tokens); std::vector toks; // Phase 3: Compose new string, expanding abbreviations and tokens to one or more words, or combining together special sequences of tokens into specific phrases. // Also try to carefully insert spaces into the output, as needed. for (uint32_t i = 0; i < tokens.size(); i++) { const uint32_t num_tokens_left = ((uint32_t)tokens.size() - 1) - i; const bool has_prev_token = i > 0, has_next_token = (i + 1) < tokens.size(); const bool next_token_is_slash = (has_next_token) && (tokens[i + 1][0] == '/'); bool is_next_dir = false; if (has_next_token) { uint32_t ofs = 1; if (tokens[i + 1] == ">") { ofs = 2; } if ((i + ofs) < tokens.size()) { std::string next_tok = string_upper(tokens[i + ofs]); if ((next_tok.back() == '.') && (next_tok.size() >= 2)) next_tok.pop_back(); if ((next_tok == "N") || (next_tok == "S") || (next_tok == "E") || (next_tok == "W") || (next_tok == "SW") || (next_tok == "SE") || (next_tok == "NW") || (next_tok == "NE") || (next_tok == "NNE") || (next_tok == "NNW") || (next_tok == "SSE") || (next_tok == "SSW") || (next_tok == "ESE")) { is_next_dir = true; } } } std::string orig_token(tokens[i]); std::string new_token(orig_token); if (!orig_token.size()) continue; // Handle various exceptions before expending abbreviations // TODO: Refactor to table(s) // Special handling for RUSS/RUSS. if ((tokens[i] == "RUSS") || (tokens[i] == "RUSS.") || (tokens[i] == "RUS") || (tokens[i] == "RUS.")) { if (first_line) new_token = "Russia"; else new_token = "Russian"; } // AA FLITE #519 - exception // AA LINER else if ((tokens[i] == "AA") && (num_tokens_left >= 1) && ((tokens[i + 1] == "FLITE#519") || (tokens[i + 1] == "LINER"))) { new_token = "AA"; } // bright Lt. else if ((tokens[i] == "VBRITE") && (num_tokens_left >= 1) && (tokens[i + 1] == "LT")) { new_token = "vibrant bright light"; i++; } // ENERGY SRC else if ((tokens[i] == "ENERGY") && (num_tokens_left >= 1) && (tokens[i + 1] == "SRC")) { new_token = "energy source"; i++; } // mid air - exception else if ((tokens[i] == "MID") && (num_tokens_left >= 1) && (tokens[i + 1] == "AIR")) { new_token = "mid"; } // /FORMN or /formation - exception else if ((string_icompare(tokens[i], "/") == 0) && (num_tokens_left >= 1) && ((string_icompare(tokens[i + 1], "FORMN") == 0) || (string_icompare(tokens[i + 1], "formation") == 0))) { new_token = "in formation"; i++; } // /FORMNs - exception else if ((string_icompare(tokens[i], "/") == 0) && (num_tokens_left >= 1) && ((string_icompare(tokens[i + 1], "FORMNs") == 0) || (string_icompare(tokens[i + 1], "formations") == 0))) { new_token = "in formations"; i++; } // LOST/CLOUDS - exception else if ((string_icompare(tokens[i], "LOST") == 0) && (num_tokens_left >= 2) && (tokens[i + 1] == "/") && (string_icompare(tokens[i + 2], "CLOUDS") == 0)) { new_token = "lost in clouds"; i += 2; } // LOST/DISTANCE - exception else if ((string_icompare(tokens[i], "LOST") == 0) && (num_tokens_left >= 2) && (tokens[i + 1] == "/") && (string_icompare(tokens[i + 2], "DISTANCE") == 0)) { new_token = "lost in the distance"; i += 2; } // W-carbide - exception else if ((string_icompare(tokens[i], "W") == 0) && (num_tokens_left >= 2) && (tokens[i + 1] == "-") && (string_icompare(tokens[i + 2], "carbide") == 0)) { new_token = "W"; } // mid-sky - exception else if ((tokens[i] == "MID") && (num_tokens_left >= 2) && (tokens[i + 1] == "-") && (tokens[i + 2] == "SKY")) { new_token = "mid"; } // mid-flite - exception else if ((tokens[i] == "MID") && (num_tokens_left >= 2) && (tokens[i + 1] == "-") && (tokens[i + 2] == "FLITE")) { new_token = "mid"; } // mid-city - exception else if ((tokens[i] == "MID") && (num_tokens_left >= 2) && (tokens[i + 1] == "-") && (tokens[i + 2] == "CITY")) { new_token = "mid"; } // W vee - exception else if ((tokens[i] == "W") && (num_tokens_left >= 1) && (tokens[i + 1] == "VEE")) { new_token = "with vee"; i++; } // Lake Mi - exception else if ((tokens[i] == "LAKE") && (num_tokens_left >= 1) && (tokens[i + 1] == "Mi")) { new_token = "Lake Michigan"; i++; } // SCI-FI else if ((tokens[i] == "SCI") && (num_tokens_left >= 2) && (tokens[i + 1] == "-") && (tokens[i + 2] == "FI")) { new_token = "Sci-Fi"; i += 2; } // V-tall else if ((tokens[i] == "V") && (num_tokens_left >= 2) && (tokens[i + 1] == "-") && (tokens[i + 2] == "TALL")) { new_token = "very tall"; i += 2; } // 1 OBS/1 OBS. at beginning else if ((i == 1) && (tokens[0] == "1") && (tokens[1] == "OBS" || tokens[1] == "OBS.")) { new_token = "observer"; } // CLR WEATHER exception else if ((num_tokens_left >= 1) && (tokens[i] == "CLR") && (tokens[i + 1] == "WEATHER")) { new_token = "clear"; } // WATER DOMES exception (typo fix) else if ((num_tokens_left >= 1) && (string_icompare(tokens[i], "WATER") == 0) && (string_icompare(tokens[i + 1], "DOMES") == 0)) { new_token = "water comes"; i++; } // W dome exception else if ((num_tokens_left >= 1) && (string_icompare(tokens[i], "W") == 0) && (string_icompare(tokens[i + 1], "DOME") == 0)) { new_token = "with"; } // CLR SKY exception else if ((num_tokens_left >= 1) && (string_icompare(tokens[i], "CLR") == 0) && (string_icompare(tokens[i + 1], "SKY") == 0)) { new_token = "clear"; } // CLR DOME exception else if ((num_tokens_left >= 1) && (string_icompare(tokens[i], "CLR") == 0) && (string_icompare(tokens[i + 1], "DOME") == 0)) { new_token = "clear"; } // CLR DOMED exception else if ((num_tokens_left >= 2) && (string_icompare(tokens[i], "CLR") == 0) && (tokens[i + 1] == "-") && (string_icompare(tokens[i + 2], "DOMED") == 0)) { new_token = "clear"; } // CLR DOME exception else if ((num_tokens_left >= 2) && (string_icompare(tokens[i], "CLR") == 0) && (tokens[i + 1] == "-") && (string_icompare(tokens[i + 2], "DOME") == 0)) { new_token = "clear"; } // CLR RDR exception else if ((num_tokens_left >= 1) && (string_icompare(tokens[i], "CLR") == 0) && (string_icompare(tokens[i + 1], "RDR") == 0)) { new_token = "clear"; } // CLR CLOCKPIT exception else if ((num_tokens_left >= 1) && (string_icompare(tokens[i], "CLR") == 0) && (string_icompare(tokens[i + 1], "COCKPIT") == 0)) { new_token = "clear"; } // CLR TORUS exception else if ((num_tokens_left >= 1) && (string_icompare(tokens[i], "CLR") == 0) && (string_icompare(tokens[i + 1], "TORUS") == 0)) { new_token = "clear"; } // CLR DAY exception else if ((num_tokens_left >= 1) && (string_icompare(tokens[i], "CLR") == 0) && (string_icompare(tokens[i + 1], "DAY") == 0)) { new_token = "clear"; } // CLR PLASTIC exception else if ((num_tokens_left >= 1) && (string_icompare(tokens[i], "CLR") == 0) && (string_icompare(tokens[i + 1], "PLASTIC") == 0)) { new_token = "clear"; } // CLR FOTOS exception (a guess, need to verify) else if ((num_tokens_left >= 1) && (string_icompare(tokens[i], "CLR") == 0) && (string_icompare(tokens[i + 1], "FOTOS") == 0)) { new_token = "clear"; } // CLR FOTO exception (a guess, need to verify) else if ((num_tokens_left >= 1) && (string_icompare(tokens[i], "CLR") == 0) && (string_icompare(tokens[i + 1], "FOTO") == 0)) { new_token = "clear"; } // CLR SHOT exception (a guess, need to verify) else if ((num_tokens_left >= 1) && (string_icompare(tokens[i], "CLR") == 0) && (string_icompare(tokens[i + 1], "SHOT") == 0)) { new_token = "clear"; } // CLR BLUE exception else if ((num_tokens_left >= 1) && (string_icompare(tokens[i], "CLR") == 0) && (string_icompare(tokens[i + 1], "BLUE") == 0)) { new_token = "clear"; } // CLR BUBBLE exception else if ((num_tokens_left >= 1) && (string_icompare(tokens[i], "CLR") == 0) && (string_icompare(tokens[i + 1], "BUBBLE") == 0)) { new_token = "clear"; } // CLR BUBBLES exception else if ((num_tokens_left >= 1) && (string_icompare(tokens[i], "CLR") == 0) && (string_icompare(tokens[i + 1], "BUBBLES") == 0)) { new_token = "clear"; } // S+Cu exception else if ((num_tokens_left >= 2) && (tokens[i] == "S") && (tokens[i + 1] == "+") && (tokens[i + 2] == "Cu")) { new_token = "S"; } // IND OBS exception else if ((num_tokens_left >= 1) && (tokens[i] == "IND") && (tokens[i + 1] == "OBS")) { new_token = "independent"; } // L<>R else if ((num_tokens_left >= 3) && (tokens[i] == "L") && (tokens[i + 1] == "<") && (tokens[i + 2] == ">") && (tokens[i + 3] == "R")) { new_token = "left and right"; i += 3; } // <+> else if ((num_tokens_left >= 2) && (tokens[i] == "<") && (tokens[i + 1] == "+") && (tokens[i + 2] == ">")) { new_token = "left and right"; i += 2; } else if (orig_token == "NFD") { if ((!has_next_token) || next_token_is_slash) new_token = "No further details"; else new_token = "No further details [in]"; } // Up and down arrows else if ((orig_token[0] == 0x18) && ((i + 1) < tokens.size()) && (tokens[i + 1][0] == '+') && ((i + 2) < tokens.size()) && (tokens[i + 2][0] == 0x19)) { const uint32_t at_end = ((i + 3) == tokens.size()) || (tokens[i + 3][0] == '/'); new_token = !at_end ? "going up and down [to]" : "going up and down"; i += 2; } // "V BRITE" else if ((orig_token == "V") && ((i + 1) < tokens.size()) && (tokens[i + 1] == "BRITE")) { new_token = "very bright"; i++; } // ++ else if ((orig_token == "+") && ((i + 1) < tokens.size()) && (tokens[i + 1] == "+")) { new_token = "and more/others"; i++; } // >> else if ((orig_token == ">") && ((i + 1) < tokens.size()) && (tokens[i + 1] == ">")) { const uint32_t at_end = ((i + 2) == tokens.size()) || (tokens[i + 2][0] == '/'); new_token = (!at_end && !is_next_dir) ? "going quickly [to]" : "going quickly"; i++; } // >< else if ((orig_token == ">") && ((i + 1) < tokens.size()) && (tokens[i + 1] == "<")) { new_token = "to/from"; i++; } // <> else if ((orig_token == "<") && ((i + 1) < tokens.size()) && (tokens[i + 1] == ">")) { // Larry said "between" but that sounds awkward and would require reordering tokens. new_token = "to/from/between"; i++; } // > else if (orig_token == ">") { new_token = (has_next_token && !next_token_is_slash && !is_next_dir) ? "going [to]" : "going"; } // Tree up arrows else if ((orig_token[0] == 0x18) && (num_tokens_left >= 2) && (tokens[i + 1][0] == 0x18) && (tokens[i + 2][0] == 0x18)) { const uint32_t at_end = ((i + 3) == tokens.size()) || (tokens[i + 3][0] == '/'); new_token = !at_end ? "extremely quickly going up [to]" : "extremely quickly going up"; i += 2; } // Two up arrows else if ((orig_token[0] == 0x18) && ((i + 1) < tokens.size()) && (tokens[i + 1][0] == 0x18)) { const uint32_t at_end = ((i + 2) == tokens.size()) || (tokens[i + 2][0] == '/'); new_token = !at_end ? "quickly going up [to]" : "quickly going up"; i++; } // Up arrow else if (orig_token[0] == 0x18) { new_token = (has_next_token && !next_token_is_slash) ? "going up [to]" : "going up"; } // Two down arrows else if ((orig_token[0] == 0x19) && ((i + 1) < tokens.size()) && (tokens[i + 1][0] == 0x19)) { const uint32_t at_end = ((i + 2) == tokens.size()) || (tokens[i + 2][0] == '/'); new_token = !at_end ? "quickly going down [to]" : "quickly going down"; i++; } // Down arrow else if (orig_token[0] == 0x19) { new_token = (has_next_token && !next_token_is_slash) ? "going down [to]" : "going down"; } // Two right arrows else if ((orig_token[0] == 0x1A) && ((i + 1) < tokens.size()) && (tokens[i + 1][0] == 0x1A)) { const uint32_t at_end = ((i + 2) == tokens.size()) || (tokens[i + 2][0] == '/'); new_token = !at_end ? "quickly going right [to]" : "quickly going right"; i++; } // Right arrow else if (orig_token[0] == 0x1A) { new_token = (has_next_token && !next_token_is_slash) ? "going right [to]" : "going right"; } // Two left arrows else if ((orig_token[0] == 0x1B) && ((i + 1) < tokens.size()) && (tokens[i + 1][0] == 0x1B)) { const uint32_t at_end = ((i + 2) == tokens.size()) || (tokens[i + 2][0] == '/'); new_token = !at_end ? "quickly going left [to]" : "quickly going left"; i++; } // Left arrow else if (orig_token[0] == 0x1B) { new_token = (has_next_token && !next_token_is_slash) ? "going left [to]" : "going left"; } // / else if (orig_token[0] == '/') { new_token = "/"; } // + else if (orig_token[0] == '+') { if (!i) new_token = "also"; else if ((i != (tokens.size() - 1)) && (tokens[i + 1][0] != '/')) new_token = "and"; else new_token = "and more"; } // @ else if (orig_token[0] == '@') { new_token = "at"; } // dbl-word else if ((string_icompare(orig_token, "dbl") == 0) && ((i + 1) < tokens.size()) && (tokens[i + 1] == "-")) { new_token = "double"; } // GLOW-word else if ((string_icompare(orig_token, "GLOW") == 0) && ((i + 1) < tokens.size()) && (tokens[i + 1] == "-")) { new_token = "glowing"; } // A-test else if ((orig_token == "A") && ((i + 1) < tokens.size()) && (tokens[i + 1] == "-") && ((i + 2) < tokens.size()) && (string_icompare(tokens[i + 2], "TEST") == 0)) { new_token = "atomic test"; i += 2; } // A-plant else if ((orig_token == "A") && ((i + 1) < tokens.size()) && (tokens[i + 1] == "-") && ((i + 2) < tokens.size()) && (string_icompare(tokens[i + 2], "PLANT") == 0)) { new_token = "atomic plant"; i += 2; } // V-form else if ((orig_token == "V") && ((i + 1) < tokens.size()) && (tokens[i + 1] == "-") && ((i + 2) < tokens.size()) && (string_icompare(tokens[i + 2], "FORM") == 0)) { new_token = "V-formation"; i += 2; } // 1/2 (to fix spacing issues) else if ((orig_token == "1") && ((i + 1) < tokens.size()) && (tokens[i + 1] == "/") && ((i + 2) < tokens.size()) && (tokens[i + 2] == "2")) { new_token = "1/2"; i += 2; } // "W/O" else if ((i) && (string_icompare(orig_token, "W") == 0) && ((i + 1) < tokens.size()) && (tokens[i + 1] == "/") && ((i + 2) < tokens.size()) && (string_icompare(tokens[i + 2], "O") == 0)) { new_token = "without"; i += 2; } // "S/L" else if ((orig_token == "S") && ((i + 1) < tokens.size()) && (tokens[i + 1] == "/") && ((i + 2) < tokens.size()) && (tokens[i + 2] == "L")) { // No idea what this means yet. new_token = "straight and level"; i += 2; } // "FOO-FIGHTERS" else if ((orig_token == "FOO") && ((i + 1) < tokens.size()) && (tokens[i + 1] == "-") && ((i + 2) < tokens.size()) && (tokens[i + 2] == "FIGHTERS")) { // Just don't let the abbreviator kick in. Thanks Larry. } // "W/word" else if ((i) && ((orig_token == "W") || (orig_token == "w")) && ((i + 1) < tokens.size()) && (tokens[i + 1] == "/") && (tokens[i - 1] != ">") && (tokens[i - 1] != "<")) { new_token = "with"; i++; } // "1=" else if (orig_token == "1=") { new_token = "one is [a]"; } // Exception for "ORG RPT". else if ((orig_token == "ORG") && has_next_token && (tokens[i + 1] == "RPT")) { new_token = "original"; } // TODO: check for line 1 and don't expand these states // Exception for ,MT (the state) - don't change to "Mt." else if (first_line && orig_token == "MI" && has_prev_token && tokens[i - 1] == ",") { } // Exception for ,MT (the state) - don't change to "Mt." else if (first_line && orig_token == "MT" && has_prev_token && tokens[i - 1] == ",") { } // Exception for ,NE (the state) - don't change to "northeast" else if (first_line && orig_token == "NE" && has_prev_token && tokens[i - 1] == ",") { } // Exception for ,MS (the state) - don't change to "northeast" else if (first_line && orig_token == "MS" && has_prev_token && tokens[i - 1] == ",") { } // Exception for ,AL (the state) - don't change to "northeast" else if (first_line && orig_token == "AL" && has_prev_token && tokens[i - 1] == ",") { } else { expand_abbreviations(first_line, orig_token, tokens, i, toks); continue; } if (new_token.size()) toks.push_back(token(new_token, !first_line && (new_token == tokens[i]), false)); } // Phase 4: Compose the final string, converting tokens to lower/uppercase and inserting spaces as needed. std::string new_str; bool in_quote = false; for (uint32_t i = 0; i < toks.size(); i++) { std::string new_token(toks[i].m_token); if (!new_token.size()) continue; if (!first_line) new_token = fix_capitilization(toks, i); // Add a space if the previous string is not empty - excluding special cases where a space isn't necessary. if (new_str.size() && (new_token != "..") && (new_token != ",") && (new_token != "!") && (new_token != "?") && (new_token != "+") && (!((new_token == ")") && (new_str.back() == '?'))) && (new_token != ";") && (new_str.back() != ';') && (new_token != "-") && (new_str.back() != '-') && (new_str.back() != '#') && (new_str.back() != '+') && (!(in_quote && (new_token == "\"") && new_str.size() && is_sentence_ender(new_str.back()))) ) { new_str.push_back(' '); //new_str.push_back('*'); } // Append the token string to the output string new_str += new_token; for (uint8_t c : new_token) if (c == '\"') in_quote = !in_quote; } return new_str; } static void decode_hatch_desc(const udb_rec* pRec, std::string& db_str, std::string& loc_str, std::string& desc_str) { for (uint32_t i = 0; i < UDB_REC_TEXT_SIZE; i++) { if (pRec->get_text()[i] == 0) break; db_str.push_back(pRec->get_text()[i]); } std::string orig_desc(db_str); string_vec desc; for (; ; ) { size_t pos = orig_desc.find_first_of(':'); if (pos == std::string::npos) { desc.push_back(string_trim(orig_desc)); break; } else { std::string s(orig_desc); s.erase(pos, s.size() - pos); desc.push_back(string_trim(s)); orig_desc.erase(0, pos + 1); } } for (uint32_t i = 0; i < desc.size(); i++) { std::string str(decode_hatch(desc[i], !i)); if (!str.size()) continue; if (desc_str.size()) { if (desc_str.back() != '.' && desc_str.back() != '!' && desc_str.back() != '?') desc_str += "."; desc_str += " "; } if (!i) { loc_str = string_upper(str); } else { if (uislower(str[0])) str[0] = utoupper(str[0]); else if ((str[0] == '\"') && (str.size() >= 2) && (uislower(str[1]))) str[1] = utoupper(str[1]); else if ((str[0] == '\'') && (str.size() >= 2) && (uislower(str[1]))) str[1] = utoupper(str[1]); else if ((str[0] == '(') && (str.size() >= 2) && (uislower(str[1]))) str[1] = utoupper(str[1]); desc_str += str; } } if (desc_str.size() && desc_str.back() != '.' && desc_str.back() != '!' && desc_str.back() != '?') { if ((desc_str.back() == ')') && (!string_ends_in(desc_str, "(s)"))) { desc_str.pop_back(); if (desc_str.back() == ' ') desc_str.pop_back(); if (desc_str.size() && desc_str.back() != '.' && desc_str.back() != '!' && desc_str.back() != '?') desc_str += "."; desc_str += ")"; } else { desc_str += "."; } } db_str = dos_to_utf8(db_str); loc_str = dos_to_utf8(loc_str); desc_str = dos_to_utf8(desc_str); } template static void check_for_hatch_tab_dups(const T& tab) { std::unordered_set ids; for (const auto& x : tab) if (!ids.insert(x.m_ref).second) panic("Duplicate hatch ref table id"); } static void init_dict() { string_vec dict; uprintf("Reading dictionary\n"); bool utf8_flag = false; if (!read_text_file("uppercase_dict.txt", dict, utf8_flag)) panic("Failed reading uppercase_dict.txt"); for (auto str : dict) { string_trim(str); if (str.size() && uisupper(str[0])) { g_dictionary.insert(std::make_pair(string_lower(str), str)); } } uprintf("Done reading dictionary, %u uppercase words\n", g_dictionary.size()); } void udb_init() { assert(sizeof(udb_rec) == UDB_RECORD_SIZE); check_for_hatch_tab_dups(g_hatch_refs); check_for_hatch_tab_dups(g_hatch_refs_93); check_for_hatch_tab_dups(g_hatch_refs_96); check_for_hatch_tab_dups(g_hatch_refs_97); check_for_hatch_tab_dups(g_hatch_refs_98); for (uint32_t i = 0; i < std::size(g_hatch_refs); i++) g_hatch_refs_tab[g_hatch_refs[i].m_ref] = g_hatch_refs[i].m_pDesc; init_hatch_abbreviations_map(); init_hatch_cap_exception_tokens(); init_dict(); } bool udb_dump() { uint8_vec udb; if (!read_binary_file("u.rnd", udb)) return false; const uint32_t TOTAL_RECS = 18123; if ((udb.size() / UDB_RECORD_SIZE) < TOTAL_RECS) panic("Invalid file size"); string_vec output; const udb_rec* pRecs = reinterpret_cast(&udb.front()); for (uint32_t rec_index = 1; rec_index < TOTAL_RECS; rec_index++) //for (uint32_t rec_index = 18038; rec_index <= 18038; rec_index++) { const udb_rec* pRec = pRecs + rec_index; std::string db_str, loc_str, desc_str; decode_hatch_desc(pRec, db_str, loc_str, desc_str); event_date ed; pRec->get_date(ed); std::string date_str(ed.get_string()); { uprintf("\n----------%u: Date: %s, Strangeness: %u, Credibility: %u\n", rec_index, date_str.c_str(), pRec->get_strangeness(), pRec->get_credibility()); std::string time; if (pRec->get_time(time)) uprintf("Time: %s\n", time.c_str()); if (pRec->get_duration()) uprintf("Duration: %u mins\n", pRec->get_duration()); if (pRec->get_elevation() != -99) uprintf("Elevation: %im\n", pRec->get_elevation()); if ((pRec->get_rel_altitude() != 0) && (pRec->get_rel_altitude() != 999)) uprintf("Altitude: %im\n", pRec->get_rel_altitude()); uprintf("Location: %s\n", loc_str.c_str()); std::string country_name, state_or_prov_name; pRec->get_geo(country_name, state_or_prov_name); const uint32_t continent_code = pRec->get_continent_code(); uprintf("Country: %s, State/Province: %s (%s), Continent: %s\n", country_name.c_str(), state_or_prov_name.c_str(), pRec->get_state_or_prov().c_str(), (continent_code < std::size(g_hatch_continents)) ? g_hatch_continents[continent_code] : "?"); uprintf("Latitude/Longitude: %f %f, %s %s\n", pRec->get_latitude(), pRec->get_longitude(), pRec->get_latitude_dms().c_str(), pRec->get_longitude_dms().c_str()); const uint32_t locale = pRec->get_locale(); if (locale < std::size(g_hatch_locales)) uprintf("Locale: %s\n", g_hatch_locales[locale]); uprintf("UDB Desc: %s\n", db_str.c_str()); uprintf("Decoded Desc: %s\n", desc_str.c_str()); uint32_t total_flags = 0; for (uint32_t f = 0; f < udb_rec::cMaxFlags; f++) { if (!f) // map continue; if (pRec->get_flag(f)) total_flags++; } if (total_flags) { uprintf("Flags: "); uint32_t num_flags_printed = 0; for (uint32_t f = 0; f < udb_rec::cMaxFlags; f++) { if (!f) // map continue; if (pRec->get_flag(f)) { uprintf("%s", g_pHatch_flag_descs[f]); num_flags_printed++; if (num_flags_printed < total_flags) { uprintf(", "); if ((num_flags_printed % 2) == 0) uprintf("\n"); } } } uprintf("\n"); } uprintf("Ref: %s\n", pRec->get_full_refs().c_str()); } output.push_back(string_format("Date: %s\nLocation: \"%s\"\nDescription: \"%s\"\n", date_str.c_str(), loc_str.c_str(), desc_str.c_str())); } string_vec toks; for (const auto& str : g_unique_tokens) toks.push_back(str); write_text_file("unique_tokens.txt", toks, false); write_text_file("output.txt", output, true); return true; } static bool convert_rec(uint32_t rec_index, const udb_rec* pRec, timeline_event& event) { std::string db_str, loc_str, desc_str; decode_hatch_desc(pRec, db_str, loc_str, desc_str); pRec->get_date(event.m_begin_date); if (event.m_begin_date.m_year <= 0) return false; std::string time; if (pRec->get_time(time)) event.m_time_str = time; event.m_date_str = event.m_begin_date.get_string(); event.m_locations.push_back(loc_str); event.m_desc = desc_str; // TODO event.m_type.push_back("sighting"); event.m_source_id = string_format("Hatch_UDB_%u", rec_index); event.m_source = "Hatch"; for (uint32_t f = 0; f < udb_rec::cMaxFlags; f++) if ((f != cFlagMAP) && (pRec->get_flag(f))) event.m_attributes.push_back(g_pHatch_flag_descs[f]); event.m_refs.push_back(pRec->get_full_refs()); event.m_udb_data.push_back(std::make_pair("Lat/Long", string_format("%f %f", pRec->get_latitude(), pRec->get_longitude()))); event.m_udb_data.push_back(std::make_pair("LocationLink", string_format("[Google Maps](https://www.google.com/maps/place/%f,%f)", pRec->get_latitude(), pRec->get_longitude()))); //event.m_udb_data.push_back(std::make_pair("lat/long DMS", string_format("%s %s", pRec->get_latitude_dms().c_str(), pRec->get_longitude_dms().c_str()))); event.m_udb_data.push_back(std::make_pair("HatchDesc", db_str)); event.m_udb_data.push_back(std::make_pair("Duration", string_format("%u", pRec->get_duration()))); std::string country_name, state_or_prov_name; pRec->get_geo(country_name, state_or_prov_name); event.m_udb_data.push_back(std::make_pair("Country", country_name)); event.m_udb_data.push_back(std::make_pair("State/Prov", state_or_prov_name)); event.m_udb_data.push_back(std::make_pair("Strangeness", string_format("%u", pRec->get_strangeness()))); event.m_udb_data.push_back(std::make_pair("Credibility", string_format("%u", pRec->get_credibility()))); const uint32_t locale = pRec->get_locale(); if (locale < std::size(g_hatch_locales)) event.m_udb_data.push_back(std::make_pair("Locale", g_hatch_locales[locale])); if (pRec->get_elevation() != -99) event.m_udb_data.push_back(std::make_pair("Elev", string_format("%i", pRec->get_elevation()))); if ((pRec->get_rel_altitude() != 0) && (pRec->get_rel_altitude() != 999)) event.m_udb_data.push_back(std::make_pair("RelAlt", string_format("%i", pRec->get_rel_altitude()))); return true; } bool udb_convert() { uint8_vec udb; if (!read_binary_file("u.rnd", udb)) return false; const uint32_t TOTAL_RECS = 18123; if ((udb.size() / UDB_RECORD_SIZE) < TOTAL_RECS) panic("Invalid file size"); const udb_rec* pRecs = reinterpret_cast(&udb.front()); ufo_timeline timeline; for (uint32_t rec_index = 1; rec_index < TOTAL_RECS; rec_index++) { const udb_rec* pRec = pRecs + rec_index; timeline_event event; if (!convert_rec(rec_index, pRec, event)) continue; timeline.get_events().push_back(event); } if (!timeline.get_events().size()) panic("Empty timeline)"); timeline.set_name("Hatch_UDB_Timeline"); return timeline.write_file("hatch_udb.json", true); }