mirror of
https://github.com/richgel999/ufo_data.git
synced 2025-04-18 14:25:56 -04:00
Changes I've been sitting on since December
Change the language standard from MSVC's default stdcpp14 to stdcpp17. Haven't evaluated what issues may or may not be present when going to stdcpp20. Enable string pooling and multiprocessor compilation. Use C++11 `[[fallthrough]]` and `[[maybe_unused]]` attributes. Use std::size for getting lengths of C arrays at compile time. Resave converters.cpp and ufojson_core.cpp as UTF8 with BOM. Address various signed/unsigned warnings. Add nipcap_date_is_year_valid helper to deal with year values coming in as `int` but the constants being `uint32_t` (signed/unsigned mismatches). Fix constructor member initialization order issue in pjson.h. Explicitly handle some cJSONValueType's which have no conversions to silence unhandled enums warnings. Fix missing comma in g_cap_exceptions list.
This commit is contained in:
parent
7af9ce6128
commit
9ffdf1ac1d
155
converters.cpp
155
converters.cpp
@ -1,4 +1,4 @@
|
||||
// converters.cpp
|
||||
// converters.cpp
|
||||
// Copyright (C) 2023 Richard Geldreich, Jr.
|
||||
#include "ufojson_core.h"
|
||||
#include "markdown_proc.h"
|
||||
@ -44,15 +44,15 @@ bool convert_magonia(const char* pSrc_filename, const char* pDst_filename, const
|
||||
fputc(UTF8_BOM0, pOut_file);
|
||||
fputc(UTF8_BOM1, pOut_file);
|
||||
fputc(UTF8_BOM2, pOut_file);
|
||||
|
||||
|
||||
fprintf(pOut_file, "{\n");
|
||||
fprintf(pOut_file, "\"%s Timeline\" : [\n", pSource_override ? pSource_override : "Magonia");
|
||||
|
||||
//const uint32_t TOTAL_RECS = 923;
|
||||
|
||||
|
||||
uint32_t cur_line = 0;
|
||||
uint32_t rec_index = first_rec_index;
|
||||
|
||||
|
||||
while (cur_line < lines.size())
|
||||
{
|
||||
if (!lines[cur_line].size())
|
||||
@ -66,7 +66,7 @@ bool convert_magonia(const char* pSrc_filename, const char* pDst_filename, const
|
||||
panic("Out of lines");
|
||||
|
||||
std::string first_line(lines[cur_line++]);
|
||||
|
||||
|
||||
std::string date_str(first_line);
|
||||
if (date_str.size() > TOTAL_COLS)
|
||||
date_str.resize(TOTAL_COLS);
|
||||
@ -120,7 +120,7 @@ bool convert_magonia(const char* pSrc_filename, const char* pDst_filename, const
|
||||
|
||||
if (buf.size() < TOTAL_COLS)
|
||||
break;
|
||||
|
||||
|
||||
if (desc_lines.size() == 1)
|
||||
{
|
||||
if (buf.size() >= TOTAL_COLS)
|
||||
@ -214,9 +214,9 @@ bool convert_magonia(const char* pSrc_filename, const char* pDst_filename, const
|
||||
int year = -1, month = -1, day = -1;
|
||||
date_prefix_t date_prefix = cNoPrefix;
|
||||
std::string date_suffix;
|
||||
|
||||
|
||||
std::string temp_date_str(date_str);
|
||||
|
||||
|
||||
if (string_ends_in(temp_date_str, "'s"))
|
||||
{
|
||||
temp_date_str.resize(temp_date_str.size() - 2);
|
||||
@ -387,7 +387,7 @@ bool convert_magonia(const char* pSrc_filename, const char* pDst_filename, const
|
||||
{
|
||||
if (date_suffix.size())
|
||||
panic("Invalid date suffix");
|
||||
|
||||
|
||||
fprintf(pOut_file, "%i/%i", month, year);
|
||||
}
|
||||
else
|
||||
@ -415,7 +415,7 @@ bool convert_magonia(const char* pSrc_filename, const char* pDst_filename, const
|
||||
else
|
||||
fprintf(pOut_file, " \"source_id\" : \"Magonia_%u\",\n", rec_index);
|
||||
|
||||
fprintf(pOut_file, u8" \"source\" : \"%s\",\n", pSource_override ? pSource_override : u8"ValléeMagonia");
|
||||
fprintf(pOut_file, u8" \"source\" : \"%s\",\n", pSource_override ? pSource_override : u8"ValléeMagonia");
|
||||
|
||||
if (pType_override)
|
||||
fprintf(pOut_file, " \"type\" : \"%s\"\n", pType_override);
|
||||
@ -898,7 +898,7 @@ bool convert_dolan(const char *pSrc_filename, const char *pDst_filename, const c
|
||||
panic("Encountered empty line");
|
||||
if (rec.size() < 54)
|
||||
panic("Line too small");
|
||||
|
||||
|
||||
std::string date_str(rec);
|
||||
date_str = string_slice(date_str, 0, 16);
|
||||
string_trim(date_str);
|
||||
@ -908,13 +908,13 @@ bool convert_dolan(const char *pSrc_filename, const char *pDst_filename, const c
|
||||
|
||||
rec = string_slice(rec, 52);
|
||||
string_trim(rec);
|
||||
|
||||
|
||||
fprintf(pOut_file, "{\n");
|
||||
fprintf(pOut_file, " \"date\" : \"%s\",\n", date_str.c_str());
|
||||
|
||||
|
||||
fprintf(pOut_file, " \"location\" : \"%s\",\n", escape_string_for_json(location_str).c_str());
|
||||
fprintf(pOut_file, " \"desc\" : \"%s\",\n", escape_string_for_json(rec).c_str());
|
||||
|
||||
|
||||
if (pType)
|
||||
fprintf(pOut_file, " \"type\" : \"%s\",\n", pType);
|
||||
|
||||
@ -923,7 +923,7 @@ bool convert_dolan(const char *pSrc_filename, const char *pDst_filename, const c
|
||||
|
||||
fprintf(pOut_file, " \"source_id\" : \"%s_%u\",\n", pSource, total_recs);
|
||||
fprintf(pOut_file, " \"source\" : \"%s\"\n", pSource);
|
||||
|
||||
|
||||
fprintf(pOut_file, "}");
|
||||
if (cur_line < lines.size())
|
||||
fprintf(pOut_file, ",");
|
||||
@ -1052,7 +1052,7 @@ bool convert_eberhart(unordered_string_set& unique_urls)
|
||||
|
||||
std::vector<uint32_t> list;
|
||||
list.push_back(l);
|
||||
|
||||
|
||||
auto res = openai_res_hash.insert(std::make_pair(rec["event_crc32"].get<uint32_t>(), list));
|
||||
if (!res.second)
|
||||
(res.first)->second.push_back(l);
|
||||
@ -1174,7 +1174,7 @@ bool convert_eberhart(unordered_string_set& unique_urls)
|
||||
continue;
|
||||
}
|
||||
|
||||
size_t dash_pos = line.find(u8"—");
|
||||
size_t dash_pos = line.find(u8"—");
|
||||
if (dash_pos == std::string::npos)
|
||||
panic("Failed finding dash\n");
|
||||
|
||||
@ -1206,7 +1206,7 @@ bool convert_eberhart(unordered_string_set& unique_urls)
|
||||
if (temp[0] == '#')
|
||||
break;
|
||||
|
||||
size_t d = temp.find(u8"—");
|
||||
size_t d = temp.find(u8"—");
|
||||
|
||||
const uint32_t DASH_THRESH_POS = 42;
|
||||
if ((d != std::string::npos) && (d < DASH_THRESH_POS))
|
||||
@ -1306,7 +1306,7 @@ bool convert_eberhart(unordered_string_set& unique_urls)
|
||||
|
||||
if (json_alt_date.size())
|
||||
fprintf(pOut_file, " \"alt_date\" : \"%s\",\n", json_alt_date.c_str());
|
||||
|
||||
|
||||
fprintf(pOut_file, " \"desc\" : \"%s\",\n", escape_string_for_json(desc).c_str());
|
||||
fprintf(pOut_file, " \"source_id\" : \"Eberhart_%u\",\n", event_num);
|
||||
|
||||
@ -1359,9 +1359,9 @@ bool convert_eberhart(unordered_string_set& unique_urls)
|
||||
{
|
||||
if (total_useful_locs_printed)
|
||||
fprintf(pOut_file, ", ");
|
||||
|
||||
|
||||
fprintf(pOut_file, "\"%s\"", escape_string_for_json(loc[k]).c_str());
|
||||
|
||||
|
||||
total_useful_locs_printed++;
|
||||
}
|
||||
else
|
||||
@ -1378,7 +1378,7 @@ bool convert_eberhart(unordered_string_set& unique_urls)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (!ref.size())
|
||||
{
|
||||
fprintf(pOut_file, " \"ref\" : \"[Eberhart](http://www.cufos.org/pdfs/UFOsandIntelligence.pdf)\"\n");
|
||||
@ -1497,7 +1497,7 @@ bool convert_johnson()
|
||||
(string_find_first(l, "Written by Donald Johnson") != -1) ||
|
||||
(string_find_first(l, "Written by Donald A Johnson") != -1) ||
|
||||
(string_find_first(l, "Compiled from the UFOCAT computer database") != -1) ||
|
||||
(string_find_first(l, u8"© Donald A. Johnson") != -1) ||
|
||||
(string_find_first(l, u8"© Donald A. Johnson") != -1) ||
|
||||
(string_begins_with(l, "Themes: ")))
|
||||
{
|
||||
found_end = true;
|
||||
@ -1964,6 +1964,7 @@ static bool test_eberhart_date()
|
||||
return true;
|
||||
}
|
||||
|
||||
[[maybe_unused]] // currently unused...
|
||||
static void print_nocr(const std::string& s)
|
||||
{
|
||||
std::string new_string;
|
||||
@ -1993,8 +1994,8 @@ static void converters_test()
|
||||
uprintf("%s\n", wchar_to_utf8(utf8_to_wchar(blah, CP_ACP)).c_str());
|
||||
#endif
|
||||
|
||||
//fprintf(u8"“frightening vision”");
|
||||
//ufprintf(stderr, u8"“frightening vision”");
|
||||
//fprintf(u8"“frightening vision”");
|
||||
//ufprintf(stderr, u8"“frightening vision”");
|
||||
assert(crc32((const uint8_t*)"TEST", 4) == 0xeeea93b8);
|
||||
assert(crc32((const uint8_t*)"408tdsfjdsfjsdh893!;", 20) == 0xa044e016);
|
||||
if (!test_eberhart_date()) return panic("test_eberhart_date failed!");
|
||||
@ -2007,11 +2008,11 @@ static void converters_test()
|
||||
|
||||
//bufprintf(pIn, "A\nB \nC\n_This is a blah_[XXXX](YYYY(S))");
|
||||
|
||||
//const char* p = u8R"(Chemist [Gustaf Ljunggren](https://www.google.com/url?q=https://en.wikipedia.org/wiki/Gustaf_Ljunggren_(chemist)&sa=D&source=editors&ust=1674889728009134&usg=AOvVaw2v_Cymx15I5Ic1eNEYeeBr) of the Swedish National Defense Research Institute summarizes for the Swedish Defense staff his analysis of 27 finds of mysterious substances, allegedly from ghost rockets. None are radioactive and all have mundane explanations. (Anders Liljegren and Clas Svahn, “The Ghost Rockets,” UFOs 1947–1987, Fortean Tomes, 1987, pp. 33–34))";
|
||||
// const char* p = u8R"(Blah
|
||||
//English clergyman and philosopher [_John Wilkins_](https://www.google.com/url?q=https://en.wikipedia.org/wiki/John_Wilkins&sa=D&source=editors&ust=1674889727243386&usg=AOvVaw1hw56rPPqRvDJzjdV0g8Zb) writes The Discovery of a World in the Moone, in which he highlights the similarities of the Earth and the Moon (seas, mountains, atmosphere) and concludes that the Moon is likely to be inhabited by living beings, whom the calls “Selenites.” (Maria Avxentevskaya, “[How 17th Century](https://www.google.com/url?q=https://www.realclearscience.com/articles/2017/12/02/how_17th_century_dreamers_planned_to_reach_the_moon_110476.html&sa=D&source=editors&ust=1674889727243765&usg=AOvVaw13_nH4qqo0LYqJqnhq4_eI) [Dreamers Planned to Reach the Moon,](https://www.google.com/url?q=https://www.realclearscience.com/articles/2017/12/02/how_17th_century_dreamers_planned_to_reach_the_moon_110476.html&sa=D&source=editors&ust=1674889727244030&usg=AOvVaw2K5FMN315Pjxq_xO7wp7Ga)” <br/><br/>Real Clear Science, December 2, 2017) )";
|
||||
//const char* p = u8R"(Chemist [Gustaf Ljunggren](https://www.google.com/url?q=https://en.wikipedia.org/wiki/Gustaf_Ljunggren_(chemist)&sa=D&source=editors&ust=1674889728009134&usg=AOvVaw2v_Cymx15I5Ic1eNEYeeBr) of the Swedish National Defense Research Institute summarizes for the Swedish Defense staff his analysis of 27 finds of mysterious substances, allegedly from ghost rockets. None are radioactive and all have mundane explanations. (Anders Liljegren and Clas Svahn, “The Ghost Rockets,” UFOs 1947–1987, Fortean Tomes, 1987, pp. 33–34))";
|
||||
// const char* p = u8R"(Blah
|
||||
//English clergyman and philosopher [_John Wilkins_](https://www.google.com/url?q=https://en.wikipedia.org/wiki/John_Wilkins&sa=D&source=editors&ust=1674889727243386&usg=AOvVaw1hw56rPPqRvDJzjdV0g8Zb) writes The Discovery of a World in the Moone, in which he highlights the similarities of the Earth and the Moon (seas, mountains, atmosphere) and concludes that the Moon is likely to be inhabited by living beings, whom the calls “Selenites.” (Maria Avxentevskaya, “[How 17th Century](https://www.google.com/url?q=https://www.realclearscience.com/articles/2017/12/02/how_17th_century_dreamers_planned_to_reach_the_moon_110476.html&sa=D&source=editors&ust=1674889727243765&usg=AOvVaw13_nH4qqo0LYqJqnhq4_eI) [Dreamers Planned to Reach the Moon,](https://www.google.com/url?q=https://www.realclearscience.com/articles/2017/12/02/how_17th_century_dreamers_planned_to_reach_the_moon_110476.html&sa=D&source=editors&ust=1674889727244030&usg=AOvVaw2K5FMN315Pjxq_xO7wp7Ga)” <br/><br/>Real Clear Science, December 2, 2017) )";
|
||||
|
||||
//const char* p = u8R"(Pierre Lagrange, “[_Agobard, la Magonie et les ovnis_,](https://www.google.com/url?q=https://pierrelagrangesociologie.files.wordpress.com/2020/08/lagrange-agobard-magonie-ufologie-lhistoire-440-2017-10-p28-29.pdf&sa=D&source=editors&ust=1674889727239396&usg=AOvVaw1U01Ykx3tRTQS4QKENJuGi)” Actualité, no. 440 (October 2017): 28–29; Wikipedia, “[Magonia (mythology)](https://www.google.com/url?q=https://en.wikipedia.org/wiki/Magonia_(mythology)&sa=D&source=editors&ust=1674889727239728&usg=AOvVaw0JOQanVKKoRClyKQPK5SJi)”))";
|
||||
//const char* p = u8R"(Pierre Lagrange, “[_Agobard, la Magonie et les ovnis_,](https://www.google.com/url?q=https://pierrelagrangesociologie.files.wordpress.com/2020/08/lagrange-agobard-magonie-ufologie-lhistoire-440-2017-10-p28-29.pdf&sa=D&source=editors&ust=1674889727239396&usg=AOvVaw1U01Ykx3tRTQS4QKENJuGi)” Actualité, no. 440 (October 2017): 28–29; Wikipedia, “[Magonia (mythology)](https://www.google.com/url?q=https://en.wikipedia.org/wiki/Magonia_(mythology)&sa=D&source=editors&ust=1674889727239728&usg=AOvVaw0JOQanVKKoRClyKQPK5SJi)”))";
|
||||
const char* p = "<br/>blah<br/>_[Agobard,](www.blah.com)_<br/> blah<br/>blah <br/>[_Agobard_,](www.blah.com)<br/>";
|
||||
|
||||
//const char* p = "***[sssss](www.dddd.com)*** _Blah_ *Cool*_Zeek_";
|
||||
@ -2103,12 +2104,12 @@ enum
|
||||
cSlashFlag = 256
|
||||
};
|
||||
|
||||
static const struct
|
||||
static constexpr struct
|
||||
{
|
||||
const char* m_pStr;
|
||||
uint32_t m_flag;
|
||||
uint32_t m_month;
|
||||
date_prefix_t m_date_prefix;
|
||||
uint32_t m_month = 0;
|
||||
date_prefix_t m_date_prefix = cNoPrefix;
|
||||
} g_special_phrases[] =
|
||||
{
|
||||
{ "january", cMonthFlag, 1 },
|
||||
@ -2173,7 +2174,7 @@ static const struct
|
||||
{ "/", cSlashFlag }
|
||||
};
|
||||
|
||||
const uint32_t NUM_SPECIAL_PHRASES = sizeof(g_special_phrases) / sizeof(g_special_phrases[0]);
|
||||
constexpr int NUM_SPECIAL_PHRASES = static_cast<int>(std::size(g_special_phrases));
|
||||
|
||||
enum
|
||||
{
|
||||
@ -2253,12 +2254,18 @@ static int get_special_from_token(int64_t tok)
|
||||
return (int)spec;
|
||||
}
|
||||
|
||||
static bool convert_nipcap_date(std::string date, event_date& begin_date, event_date& end_date, event_date& alt_date)
|
||||
static constexpr bool nipcap_date_is_year_valid(
|
||||
int year)
|
||||
{
|
||||
assert(cSpecialTotal == NUM_SPECIAL_PHRASES);
|
||||
|
||||
const uint32_t MIN_YEAR = 1860;
|
||||
const uint32_t MAX_YEAR = 2012;
|
||||
return static_cast<uint32_t>(year) >= MIN_YEAR
|
||||
&& static_cast<uint32_t>(year) <= MAX_YEAR;
|
||||
}
|
||||
|
||||
static bool convert_nipcap_date(std::string date, event_date& begin_date, event_date& end_date, event_date& alt_date)
|
||||
{
|
||||
static_assert(cSpecialTotal == NUM_SPECIAL_PHRASES);
|
||||
|
||||
string_trim(date);
|
||||
|
||||
@ -2318,7 +2325,7 @@ static bool convert_nipcap_date(std::string date, event_date& begin_date, event_
|
||||
int month = convert_hex_digit(date[4]) * 10 + convert_hex_digit(date[5]);
|
||||
int day = convert_hex_digit(date[6]) * 10 + convert_hex_digit(date[7]);
|
||||
|
||||
if ((year < MIN_YEAR) || (year > MAX_YEAR))
|
||||
if (!nipcap_date_is_year_valid(year))
|
||||
return false;
|
||||
|
||||
if (month > 12)
|
||||
@ -2351,7 +2358,7 @@ static bool convert_nipcap_date(std::string date, event_date& begin_date, event_
|
||||
return false;
|
||||
}
|
||||
|
||||
// Tokenize the input then only parse those cases we explictly support. Everything else is an error.
|
||||
// Tokenize the input then only parse those cases we explicitly support. Everything else is an error.
|
||||
|
||||
std::vector<int64_t> tokens;
|
||||
std::vector<int> digits;
|
||||
@ -2432,7 +2439,7 @@ static bool convert_nipcap_date(std::string date, event_date& begin_date, event_
|
||||
else if (digits[0] == 4)
|
||||
{
|
||||
year = (int)tokens[0];
|
||||
if ((year < MIN_YEAR) || (year > MAX_YEAR))
|
||||
if (!nipcap_date_is_year_valid(year))
|
||||
return false;
|
||||
}
|
||||
else
|
||||
@ -2462,7 +2469,7 @@ static bool convert_nipcap_date(std::string date, event_date& begin_date, event_
|
||||
{
|
||||
if (digits[0] == 4)
|
||||
{
|
||||
// YYMMXX
|
||||
// YYMMXX
|
||||
int year = 1900 + (int)(tokens[0] / 100);
|
||||
int month = (int)(tokens[0] % 100);
|
||||
|
||||
@ -2474,10 +2481,10 @@ static bool convert_nipcap_date(std::string date, event_date& begin_date, event_
|
||||
}
|
||||
else if (digits[0] == 6)
|
||||
{
|
||||
// YYYYMMXX
|
||||
// YYYYMMXX
|
||||
|
||||
int year = (int)(tokens[0] / 100);
|
||||
if ((year < MIN_YEAR) || (year > MAX_YEAR))
|
||||
if (!nipcap_date_is_year_valid(year))
|
||||
return false;
|
||||
|
||||
int month = (int)(tokens[0] % 100);
|
||||
@ -2505,7 +2512,7 @@ static bool convert_nipcap_date(std::string date, event_date& begin_date, event_
|
||||
{
|
||||
// YYYYXXXX
|
||||
begin_date.m_year = (int)tokens[0];
|
||||
if ((begin_date.m_year < MIN_YEAR) || (begin_date.m_year > MAX_YEAR))
|
||||
if (!nipcap_date_is_year_valid(begin_date.m_year))
|
||||
return false;
|
||||
}
|
||||
else
|
||||
@ -2555,7 +2562,7 @@ static bool convert_nipcap_date(std::string date, event_date& begin_date, event_
|
||||
{
|
||||
// YYYYMMDD
|
||||
begin_date.m_year = (int)(tokens[0] / 10000);
|
||||
if ((begin_date.m_year < MIN_YEAR) || (begin_date.m_year > MAX_YEAR))
|
||||
if (!nipcap_date_is_year_valid(begin_date.m_year))
|
||||
return false;
|
||||
|
||||
begin_date.m_month = (int)((tokens[0] / 100) % 100);
|
||||
@ -2577,7 +2584,7 @@ static bool convert_nipcap_date(std::string date, event_date& begin_date, event_
|
||||
}
|
||||
|
||||
if ((tokens.size() == 2) && (tokens[1] < 0) &&
|
||||
((get_special_from_token(tokens[1]) >= cSpecialLate) && (get_special_from_token(tokens[1]) <= cSpecialEnd) ||
|
||||
(((get_special_from_token(tokens[1]) >= cSpecialLate) && (get_special_from_token(tokens[1]) <= cSpecialEnd)) ||
|
||||
(get_special_from_token(tokens[1]) == cSpecialMid))
|
||||
)
|
||||
{
|
||||
@ -2649,7 +2656,7 @@ static bool convert_nipcap_date(std::string date, event_date& begin_date, event_
|
||||
{
|
||||
// YYYYMMDD-YYYYMMDD
|
||||
end_date.m_year = (int)(tokens[2] / 10000);
|
||||
if ((end_date.m_year < MIN_YEAR) || (end_date.m_year > MAX_YEAR))
|
||||
if (!nipcap_date_is_year_valid(end_date.m_year))
|
||||
return false;
|
||||
|
||||
end_date.m_month = (int)((tokens[2] / 100) % 100);
|
||||
@ -3317,7 +3324,7 @@ bool convert_nicap(unordered_string_set& unique_urls)
|
||||
|
||||
if ((prev_orig_desc.size()) && (orig_desc == prev_orig_desc) && (js["date"] == prev_date))
|
||||
{
|
||||
// It's a repeated record, with just a different category.
|
||||
// It's a repeated record, with just a different category.
|
||||
std::string new_desc(js_doc_array.back()["desc"]);
|
||||
|
||||
new_desc += string_format(" (NICAP: %s)", g_nicap_categories[cat_index - 1]);
|
||||
@ -3391,7 +3398,7 @@ bool convert_nuk()
|
||||
{
|
||||
std::string title;
|
||||
string_vec col_titles;
|
||||
|
||||
|
||||
std::vector<string_vec> rows;
|
||||
|
||||
bool success = load_column_text("nuktest_usa.txt", rows, title, col_titles, false, "USA");
|
||||
@ -3428,9 +3435,9 @@ bool convert_nuk()
|
||||
event.m_locations.push_back(x[cColLat] + " " + x[cColLong]);
|
||||
|
||||
std::string attr;
|
||||
|
||||
|
||||
std::string t(string_upper(x[cColType]));
|
||||
|
||||
|
||||
bool salvo = false;
|
||||
if (string_ends_in(t, "_SALVO"))
|
||||
{
|
||||
@ -3491,9 +3498,9 @@ bool convert_nuk()
|
||||
panic("Invalid type");
|
||||
|
||||
event.m_desc = string_format("Nuclear test: %s. Country: %s", attr.c_str(), x[cColCountry].c_str());
|
||||
|
||||
|
||||
if ((x[cColName].size()) && (x[cColName] != "-"))
|
||||
event.m_desc += string_format(u8" Name: “%s”", x[cColName].c_str());
|
||||
event.m_desc += string_format(u8" Name: “%s”", x[cColName].c_str());
|
||||
|
||||
if (x[cColY].size())
|
||||
event.m_desc += string_format(" Yield: %sKT", x[cColY].c_str());
|
||||
@ -3510,13 +3517,13 @@ bool convert_nuk()
|
||||
|
||||
std::string latitude_dms = get_deg_to_dms(lat) + ((lat <= 0) ? " S" : " N");
|
||||
std::string longitude_dms = get_deg_to_dms(lon) + ((lon <= 0) ? " W" : " E");
|
||||
|
||||
|
||||
event.m_key_value_data.push_back(string_pair("LatLongDMS", latitude_dms + " " + longitude_dms));
|
||||
}
|
||||
|
||||
if (x[cColDepth].size())
|
||||
event.m_key_value_data.push_back(string_pair("NukeDepth", x[cColDepth]));
|
||||
|
||||
|
||||
if (x[cColMb].size())
|
||||
event.m_key_value_data.push_back(string_pair("NukeMb", x[cColMb]));
|
||||
|
||||
@ -3534,7 +3541,7 @@ bool convert_nuk()
|
||||
|
||||
event.m_key_value_data.push_back(string_pair("NukeSource", x[cColSource]));
|
||||
event.m_key_value_data.push_back(string_pair("NukeCountry", x[cColCountry]));
|
||||
|
||||
|
||||
if (x[cColLat].size() && x[cColLong].size())
|
||||
{
|
||||
event.m_key_value_data.push_back(std::make_pair("LocationLink", string_format("[Google Maps](https://www.google.com/maps/place/%s,%s)", x[cColLat].c_str(), x[cColLong].c_str())));
|
||||
@ -3545,9 +3552,9 @@ bool convert_nuk()
|
||||
|
||||
event.m_source = "NukeExplosions";
|
||||
event.m_source_id = event.m_source + string_format("_%u", event_id);
|
||||
|
||||
|
||||
timeline.get_events().push_back(event);
|
||||
|
||||
|
||||
event_id++;
|
||||
}
|
||||
|
||||
@ -3555,7 +3562,7 @@ bool convert_nuk()
|
||||
panic("Empty timeline)");
|
||||
|
||||
timeline.set_name("Nuclear Test Timeline");
|
||||
|
||||
|
||||
return timeline.write_file("nuclear_tests.json", true);
|
||||
}
|
||||
|
||||
@ -3563,7 +3570,7 @@ bool convert_anon()
|
||||
{
|
||||
string_vec lines;
|
||||
bool utf8_flag = false;
|
||||
|
||||
|
||||
const char* pFilename = "anon_pdf.md";
|
||||
if (!read_text_file(pFilename, lines, true, &utf8_flag))
|
||||
panic("Failed reading text file %s", pFilename);
|
||||
@ -3582,10 +3589,10 @@ bool convert_anon()
|
||||
|
||||
if (s.size() < 27)
|
||||
panic("Invalid string");
|
||||
|
||||
//[0x00000026] 0xe2 'â' char
|
||||
//[0x00000027] 0x80 '€' char
|
||||
//[0x00000028] 0x94 '”' char
|
||||
|
||||
//[0x00000026] 0xe2 'â' char
|
||||
//[0x00000027] 0x80 '€' char
|
||||
//[0x00000028] 0x94 '”' char
|
||||
|
||||
const int8_t c = -30;// (int8_t)0xE2;
|
||||
size_t dash_pos = s.find_first_of(c);
|
||||
@ -3794,7 +3801,7 @@ bool convert_anon()
|
||||
break;
|
||||
|
||||
string_trim(ns);
|
||||
|
||||
|
||||
line_index++;
|
||||
|
||||
event_strs.push_back(ns);
|
||||
@ -4056,13 +4063,13 @@ static int md_convert(const char* pSrc_filename, int year, ufo_timeline& tm)
|
||||
}
|
||||
}
|
||||
|
||||
if ((day_index < 0) && ((month_tok_index + 1) < tokens.size()))
|
||||
if ((day_index < 0) && ((month_tok_index + 1) < static_cast<int>(tokens.size())))
|
||||
{
|
||||
std::string& suffix_str = tokens[month_tok_index + 1];
|
||||
if (isdigit(suffix_str[0]))
|
||||
{
|
||||
bool is_time = false;
|
||||
if ((month_tok_index + 2) < tokens.size())
|
||||
if ((month_tok_index + 2) < static_cast<int>(tokens.size()))
|
||||
{
|
||||
is_time = (tokens[month_tok_index + 2] == ":");
|
||||
}
|
||||
@ -4203,7 +4210,7 @@ static int md_convert(const char* pSrc_filename, int year, ufo_timeline& tm)
|
||||
|
||||
std::string ref(string_slice(rec_text, s, l));
|
||||
|
||||
if ((e < rec_text.size()) && ((rec_text[e] == '.') || (rec_text[e] == ']')))
|
||||
if ((e < static_cast<int>(rec_text.size())) && ((rec_text[e] == '.') || (rec_text[e] == ']')))
|
||||
{
|
||||
while (s > 0)
|
||||
{
|
||||
@ -4214,7 +4221,7 @@ static int md_convert(const char* pSrc_filename, int year, ufo_timeline& tm)
|
||||
}
|
||||
}
|
||||
|
||||
if ((e < rec_text.size()) && (rec_text[e] == ']'))
|
||||
if ((e < static_cast<int>(rec_text.size())) && (rec_text[e] == ']'))
|
||||
{
|
||||
e++;
|
||||
l++;
|
||||
@ -4335,7 +4342,7 @@ bool convert_rr0()
|
||||
tm.write_file("rr0.json");
|
||||
|
||||
uprintf("Processed %u years\n", total_years);
|
||||
|
||||
|
||||
return total_years >= NUM_EXPECTED_RR0_YEARS;
|
||||
}
|
||||
|
||||
@ -4439,7 +4446,7 @@ static bool overmeire_convert(const std::string& in_filename, ufo_timeline& tm)
|
||||
str = string_lower(str);
|
||||
|
||||
int year = -1, year_tok_index = -1;
|
||||
for (year_tok_index = 0; year_tok_index < tokens.size(); year_tok_index++)
|
||||
for (year_tok_index = 0; year_tok_index < static_cast<int>(tokens.size()); year_tok_index++)
|
||||
{
|
||||
int y = atoi(tokens[year_tok_index].c_str());
|
||||
if ((y > 0) && (y >= first_year) && (y <= last_year))
|
||||
@ -4501,13 +4508,13 @@ static bool overmeire_convert(const std::string& in_filename, ufo_timeline& tm)
|
||||
}
|
||||
|
||||
if ((day_index < 0) &&
|
||||
((month_tok_index + 1) < tokens.size()))
|
||||
((month_tok_index + 1) < static_cast<int>(tokens.size())))
|
||||
{
|
||||
std::string& suffix_str = tokens[month_tok_index + 1];
|
||||
if (isdigit(suffix_str[0]))
|
||||
{
|
||||
bool is_time = false;
|
||||
if ((month_tok_index + 2) < tokens.size())
|
||||
if ((month_tok_index + 2) < static_cast<int>(tokens.size()))
|
||||
{
|
||||
is_time = (tokens[month_tok_index + 2] == ":");
|
||||
}
|
||||
@ -4642,7 +4649,7 @@ static bool overmeire_convert(const std::string& in_filename, ufo_timeline& tm)
|
||||
evt.m_source = "Overmeire";
|
||||
evt.m_source_id = string_format("Overmeire_%zu", tm.get_events().size());
|
||||
evt.m_refs.push_back("[_Mini catalogue chronologique des observations OVNI_, by Godelieve Van Overmeire](https://web.archive.org/web/20060107070423/http://users.skynet.be/sky84985/chrono.html)");
|
||||
|
||||
|
||||
std::string trial_date(string_format("#%u", year));
|
||||
if (cur_date.m_month >= 1)
|
||||
{
|
||||
@ -4652,7 +4659,7 @@ static bool overmeire_convert(const std::string& in_filename, ufo_timeline& tm)
|
||||
}
|
||||
if (trial_date != strs[0])
|
||||
evt.m_desc += " (" + string_slice(strs[0], 1) + ")";
|
||||
|
||||
|
||||
tm.get_events().push_back(evt);
|
||||
|
||||
prev_year = year;
|
||||
|
@ -8168,6 +8168,7 @@ class lexer : public lexer_base<BasicJsonType>
|
||||
}
|
||||
}
|
||||
}
|
||||
JSON_HEDLEY_FALL_THROUGH;
|
||||
|
||||
// multi-line comments skip input until */ is read
|
||||
case '*':
|
||||
@ -8203,6 +8204,7 @@ class lexer : public lexer_base<BasicJsonType>
|
||||
}
|
||||
}
|
||||
}
|
||||
JSON_HEDLEY_FALL_THROUGH;
|
||||
|
||||
// unexpected character after reading '/'
|
||||
default:
|
||||
|
2
stem.c
2
stem.c
@ -329,7 +329,7 @@ static void step5()
|
||||
if (b[k] == 'e')
|
||||
{
|
||||
int a = m();
|
||||
if (a > 1 || a == 1 && !cvc(k - 1)) k--;
|
||||
if (a > 1 || (a == 1 && !cvc(k - 1))) k--;
|
||||
}
|
||||
if (b[k] == 'l' && doublec(k) && m() > 1) k--;
|
||||
}
|
||||
|
28
udb.cpp
28
udb.cpp
@ -33,6 +33,7 @@ private:
|
||||
uint8_t m_time;
|
||||
uint8_t m_ymdt; // 2-bit fields: TDMY accuracy, T lowest, 0=invalid, 1=?, 2=~, 3=accurate
|
||||
uint8_t m_duration;
|
||||
[[maybe_unused]] // -Wunused-private-field
|
||||
uint8_t m_unknown1;
|
||||
|
||||
int16_t m_enc_longtitude;
|
||||
@ -41,11 +42,13 @@ private:
|
||||
int16_t m_elevation;
|
||||
int16_t m_rel_altitude;
|
||||
|
||||
[[maybe_unused]] // -Wunused-private-field
|
||||
uint8_t m_unknown2;
|
||||
uint8_t m_continent_country; // nibbles
|
||||
|
||||
uint8_t m_state_or_prov[3];
|
||||
|
||||
[[maybe_unused]] // -Wunused-private-field
|
||||
uint8_t m_unknown3;
|
||||
|
||||
#if 0
|
||||
@ -653,8 +656,9 @@ static std::string decode_hatch(const std::string& str, bool first_line)
|
||||
string_vec tokens;
|
||||
std::string cur_token;
|
||||
|
||||
bool inside_space = false;
|
||||
int prev_c = -1;
|
||||
// written to, but never read from
|
||||
[[maybe_unused]] bool inside_space = false;
|
||||
[[maybe_unused]] int prev_c = -1;
|
||||
|
||||
// Phase 1: Tokenize the input string based off examination of (mostly) individual chars, previous chars and upcoming individual chars.
|
||||
for (uint32_t i = 0; i < str.size(); i++)
|
||||
@ -1562,11 +1566,11 @@ static void init_dict()
|
||||
}
|
||||
}
|
||||
|
||||
uprintf("Done reading dictionary, %u uppercase words\n", g_dictionary.size());
|
||||
uprintf("Done reading dictionary, %zu uppercase words\n", g_dictionary.size());
|
||||
}
|
||||
|
||||
void udb_init()
|
||||
{
|
||||
{
|
||||
assert(sizeof(udb_rec) == UDB_RECORD_SIZE);
|
||||
|
||||
check_for_hatch_tab_dups(g_hatch_refs);
|
||||
@ -1703,10 +1707,10 @@ static bool convert_rec(uint32_t rec_index, const udb_rec* pRec, timeline_event&
|
||||
decode_hatch_desc(pRec, db_str, loc_str, desc_str);
|
||||
|
||||
pRec->get_date(event.m_begin_date);
|
||||
|
||||
|
||||
if (event.m_begin_date.m_year <= 0)
|
||||
return false;
|
||||
|
||||
|
||||
std::string time;
|
||||
if (pRec->get_time(time))
|
||||
{
|
||||
@ -1719,21 +1723,21 @@ static bool convert_rec(uint32_t rec_index, const udb_rec* pRec, timeline_event&
|
||||
event.m_locations.push_back(loc_str);
|
||||
|
||||
event.m_desc = desc_str;
|
||||
|
||||
|
||||
// TODO
|
||||
event.m_type.push_back("sighting");
|
||||
|
||||
event.m_source_id = string_format("Hatch_UDB_%u", rec_index);
|
||||
event.m_source = "Hatch";
|
||||
|
||||
|
||||
for (uint32_t f = 0; f < udb_rec::cMaxFlags; f++)
|
||||
if ((f != cFlagMAP) && (pRec->get_flag(f)))
|
||||
event.m_attributes.push_back(g_pHatch_flag_descs[f]);
|
||||
|
||||
event.m_refs.push_back(pRec->get_full_refs());
|
||||
|
||||
|
||||
event.m_key_value_data.push_back(std::make_pair("LocationLink", string_format("[Google Maps](https://www.google.com/maps/place/%f,%f)", pRec->get_latitude(), pRec->get_longitude())));
|
||||
|
||||
|
||||
event.m_key_value_data.push_back(std::make_pair("LatLong", string_format("%f %f", pRec->get_latitude(), pRec->get_longitude())));
|
||||
event.m_key_value_data.push_back(std::make_pair("LatLongDMS", string_format("%s %s", pRec->get_latitude_dms().c_str(), pRec->get_longitude_dms().c_str())));
|
||||
|
||||
@ -1756,10 +1760,10 @@ static bool convert_rec(uint32_t rec_index, const udb_rec* pRec, timeline_event&
|
||||
|
||||
if (pRec->get_elevation() != -99)
|
||||
event.m_key_value_data.push_back(std::make_pair("Elev", string_format("%i", pRec->get_elevation())));
|
||||
|
||||
|
||||
if ((pRec->get_rel_altitude() != 0) && (pRec->get_rel_altitude() != 999))
|
||||
event.m_key_value_data.push_back(std::make_pair("RelAlt", string_format("%i", pRec->get_rel_altitude())));
|
||||
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
13
udb_tables.h
13
udb_tables.h
@ -1,5 +1,5 @@
|
||||
// udb_tables.h
|
||||
// Some portions of this specific file (get_hatch_geo, g_hatch_continents) use strings from
|
||||
// Some portions of this specific file (get_hatch_geo, g_hatch_continents) use strings from
|
||||
// the "uDb" project by Jérôme Beau, available on github here: https://github.com/RR0/uDb
|
||||
#pragma once
|
||||
|
||||
@ -60,7 +60,7 @@ static const char* g_hatch_continents[]
|
||||
struct hatch_state
|
||||
{
|
||||
const char* m_pCode;
|
||||
const char* m_pFull;
|
||||
const char* m_pFull = nullptr;
|
||||
};
|
||||
|
||||
static void get_hatch_geo(uint32_t cont_code, uint32_t country_code, const std::string& state_or_prov,
|
||||
@ -677,7 +677,7 @@ static void get_hatch_geo(uint32_t cont_code, uint32_t country_code, const std::
|
||||
|
||||
break;
|
||||
}
|
||||
case 6: // Asia Pacific
|
||||
case 6: // Asia Pacific
|
||||
{
|
||||
switch (country_code)
|
||||
{
|
||||
@ -1599,7 +1599,7 @@ struct hatch_abbrev
|
||||
{
|
||||
const char* pAbbrev;
|
||||
const char* pExpansion;
|
||||
bool m_forbid_firstline;
|
||||
bool m_forbid_firstline = false;
|
||||
};
|
||||
|
||||
static const hatch_abbrev g_hatch_abbreviations[] =
|
||||
@ -2956,7 +2956,7 @@ static const hatch_abbrev g_hatch_abbreviations[] =
|
||||
{ "Var.", "various", true },
|
||||
{ "Img", "image", true },
|
||||
{ "FLUCTs", "fluctuates", true },
|
||||
{ "rtps", "reports", true }, // "separate rtps"
|
||||
{ "rtps", "reports", true }, // "separate rtps"
|
||||
{ "Math.", "Mathematics", true },
|
||||
{ "indp.", "independent", true },
|
||||
{ "frag", "fragment", true },
|
||||
@ -3106,6 +3106,7 @@ static const char* g_cap_exceptions[] =
|
||||
"McChord",
|
||||
"Hetch Hetchy Aqueduct",
|
||||
"LaPaz",
|
||||
// #REVIEW Does this need to be double question mark? clang trips on "trigraph ignored" -Wtrigraphs
|
||||
"Sea Island'(??)",
|
||||
"Loren Gross",
|
||||
"Test Pilot",
|
||||
@ -3121,7 +3122,7 @@ static const char* g_cap_exceptions[] =
|
||||
"no UFO",
|
||||
"Blackcomb Mountain",
|
||||
"Harding Mall",
|
||||
"Hawkes Bay"
|
||||
"Hawkes Bay",
|
||||
"Hells Canyon",
|
||||
"Highway Patrol",
|
||||
"Hogg Mountain",
|
||||
|
BIN
ufojson.aps
BIN
ufojson.aps
Binary file not shown.
233
ufojson.cpp
233
ufojson.cpp
@ -11,6 +11,7 @@
|
||||
|
||||
//-------------------------------------------------------------------
|
||||
|
||||
[[maybe_unused]] // currently unused...
|
||||
static void detect_bad_urls()
|
||||
{
|
||||
string_vec unique_urls;
|
||||
@ -112,7 +113,7 @@ static bool invoke_openai(const char* pPrompt_text, json& result)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
return success;
|
||||
}
|
||||
|
||||
static bool invoke_openai(const timeline_event &event, const char *pPrompt_text, json& result)
|
||||
@ -125,7 +126,7 @@ static bool invoke_openai(const timeline_event &event, const char *pPrompt_text,
|
||||
|
||||
if ((desc.size() >= 2) && (desc.back() == '('))
|
||||
desc.pop_back();
|
||||
|
||||
|
||||
const uint32_t MAX_SIZE = 4096; // ~1024 tokens
|
||||
if (desc.size() > MAX_SIZE)
|
||||
{
|
||||
@ -143,7 +144,7 @@ static bool invoke_openai(const timeline_event &event, const char *pPrompt_text,
|
||||
}
|
||||
|
||||
uprintf("Desc: %s\n\n", desc.c_str());
|
||||
|
||||
|
||||
std::string prompt_str(pPrompt_text);
|
||||
prompt_str += desc;
|
||||
prompt_str += "\"";
|
||||
@ -151,12 +152,13 @@ static bool invoke_openai(const timeline_event &event, const char *pPrompt_text,
|
||||
return invoke_openai(prompt_str.c_str(), result);
|
||||
}
|
||||
|
||||
[[maybe_unused]] // currently unused...
|
||||
static void process_timeline_using_openai(const ufo_timeline &timeline)
|
||||
{
|
||||
bool utf8_flag;
|
||||
json existing_results;
|
||||
load_json_object("openai_results.json", utf8_flag, existing_results);
|
||||
|
||||
|
||||
json final_result = json::object();
|
||||
|
||||
final_result["results"] = json::array();
|
||||
@ -251,6 +253,7 @@ static void process_timeline_using_openai(const ufo_timeline &timeline)
|
||||
uprintf("Success\n");
|
||||
}
|
||||
|
||||
[[maybe_unused]] // currently unused...
|
||||
static void process_timeline_using_python(const ufo_timeline& timeline)
|
||||
{
|
||||
json final_result = json::object();
|
||||
@ -275,7 +278,7 @@ static void process_timeline_using_python(const ufo_timeline& timeline)
|
||||
remove("locations.json");
|
||||
|
||||
Sleep(50);
|
||||
|
||||
|
||||
int status = system("python.exe pextractlocs.py");
|
||||
if (status != EXIT_SUCCESS)
|
||||
panic("Failed running python.exe");
|
||||
@ -295,7 +298,7 @@ static void process_timeline_using_python(const ufo_timeline& timeline)
|
||||
if (it->is_string())
|
||||
uprintf("%s\n", it->get<std::string>().c_str());
|
||||
}
|
||||
|
||||
|
||||
json new_obj = json::object();
|
||||
new_obj.emplace("index", i);
|
||||
new_obj.emplace("date", event.m_date_str);
|
||||
@ -348,6 +351,7 @@ static bool is_important_country(const std::string& s)
|
||||
return (s == "US") || (s == "GB") || (s == "AU") || (s == "CA") || (s == "NZ") || (s == "FR") || (s == "DE") || (s == "BR") || (s == "IT");
|
||||
}
|
||||
|
||||
[[maybe_unused]] // currently unused...
|
||||
static bool is_favored_country(const std::string& s)
|
||||
{
|
||||
return (s == "US") || (s == "GB") || (s == "AU") || (s == "CA") || (s == "NZ") || (s == "FR") || (s == "DE");
|
||||
@ -375,11 +379,13 @@ static int get_favored_country_rank(const std::string& s)
|
||||
return 7;
|
||||
}
|
||||
|
||||
[[maybe_unused]] // currently unused...
|
||||
static bool is_country_fcode(const std::string &fcode)
|
||||
{
|
||||
return ((fcode == "PCL") || (fcode == "PCLD") || (fcode == "PCLF") || (fcode == "PCLH") || (fcode == "PCLI") || (fcode == "PCLIX") || (fcode == "PCLS") || (fcode == "TERR"));
|
||||
}
|
||||
|
||||
[[maybe_unused]] // currently unused...
|
||||
static void process_geodata()
|
||||
{
|
||||
string_vec lines;
|
||||
@ -395,7 +401,7 @@ static void process_geodata()
|
||||
geonames.resize(13000000);
|
||||
|
||||
uint32_t total_geonames = 0;
|
||||
|
||||
|
||||
uint32_t max_col_sizes[gn_total];
|
||||
clear_obj(max_col_sizes);
|
||||
|
||||
@ -405,7 +411,7 @@ static void process_geodata()
|
||||
uint32_t total_accepted = 0;
|
||||
|
||||
json output_json = json::array();
|
||||
|
||||
|
||||
for (const auto& str : lines)
|
||||
{
|
||||
tab_locs.resize(0);
|
||||
@ -444,7 +450,7 @@ static void process_geodata()
|
||||
#endif
|
||||
|
||||
max_col_sizes[i] = std::max(max_col_sizes[i], (uint32_t)g.m_fields[i].size());
|
||||
|
||||
|
||||
cur_ofs = tab_locs[i] + 1;
|
||||
}
|
||||
|
||||
@ -453,7 +459,7 @@ static void process_geodata()
|
||||
if (g.m_fields[gn_population].size())
|
||||
{
|
||||
int pop = atoi(g.m_fields[gn_population].c_str());
|
||||
|
||||
|
||||
const int MIN_POP = 10;
|
||||
if (pop >= MIN_POP)
|
||||
has_min_pop = true;
|
||||
@ -468,7 +474,7 @@ static void process_geodata()
|
||||
switch (feature_class)
|
||||
{
|
||||
case 'T': // mountain,hill,rock,...
|
||||
if ((code == "MT") || (code == "MTS") || (code == "ATOL") || (code == "CAPE") || (code == "CNYN") || (code == "DSRT") ||
|
||||
if ((code == "MT") || (code == "MTS") || (code == "ATOL") || (code == "CAPE") || (code == "CNYN") || (code == "DSRT") ||
|
||||
(code == "ISL") || (code == "ISLS") || (code == "PEN") || (code == "VALS") || (code == "VALX"))
|
||||
{
|
||||
accept_flag = true;
|
||||
@ -477,7 +483,7 @@ static void process_geodata()
|
||||
case 'S': // spot, building, farm
|
||||
if ((code == "AIRB") || (code == "AIRF") || (code == "AIRP") || (code == "AIRQ") || (code == "BRKS") || (code == "CTRA") ||
|
||||
(code == "CTRS") || (code == "INSM") || (code == "ITTR") || (code == "PSN") || (code == "STNE") || (code == "USGE") ||
|
||||
(code == "OBS") || (code == "OBSR") || (code == "MFGM") || (code == "FT") || (code == "ASTR") || (code == "FCL") ||
|
||||
(code == "OBS") || (code == "OBSR") || (code == "MFGM") || (code == "FT") || (code == "ASTR") || (code == "FCL") ||
|
||||
(code == "PS") || (code == "PSH") || (code == "STNB") || (code == "STNS") || (code == "UNIV"))
|
||||
{
|
||||
accept_flag = true;
|
||||
@ -495,6 +501,7 @@ static void process_geodata()
|
||||
break;
|
||||
case 'H': // stream, lake, ...
|
||||
if ((code == "BAY") || (code == "BAYS") || (code == "CHN") || (code == "CHNL") || (code == "CHNM") || (code == "CHNN") ||
|
||||
// #REVIEW "CNL" is repeated twice, was something else meant here?
|
||||
(code == "CNL") || (code == "CNL") || (code == "LK") || (code == "LKN") || (code == "LKS") || (code == "RSV") || (code == "SD") || (code == "STRT"))
|
||||
{
|
||||
accept_flag = true;
|
||||
@ -527,7 +534,7 @@ static void process_geodata()
|
||||
obj["id"] = g.m_fields[gn_geonameid].size() ? atoi(g.m_fields[gn_geonameid].c_str()) : -1;
|
||||
obj["name"] = g.m_fields[gn_name];
|
||||
obj["plainname"] = g.m_fields[gn_asciiname];
|
||||
|
||||
|
||||
if (g.m_fields[gn_alternatenames].size())
|
||||
obj["altnames"] = g.m_fields[gn_alternatenames];
|
||||
|
||||
@ -539,10 +546,10 @@ static void process_geodata()
|
||||
|
||||
if (g.m_fields[gn_country_code].size())
|
||||
obj["ccode"] = g.m_fields[gn_country_code];
|
||||
|
||||
|
||||
if (g.m_fields[gn_cc2].size())
|
||||
obj["cc2"] = g.m_fields[gn_cc2];
|
||||
|
||||
|
||||
if (g.m_fields[gn_admin1_code].size())
|
||||
obj["a1"] = g.m_fields[gn_admin1_code];
|
||||
|
||||
@ -572,7 +579,7 @@ static void process_geodata()
|
||||
{
|
||||
rejected_class_counts[feature_class] = rejected_class_counts[feature_class] + 1;
|
||||
}
|
||||
|
||||
|
||||
total_geonames++;
|
||||
|
||||
if ((total_geonames % 1000000) == 0)
|
||||
@ -596,11 +603,12 @@ static void process_geodata()
|
||||
uprintf("%c %u\n", s.first, s.second);
|
||||
}
|
||||
|
||||
#if 0 // unused code...
|
||||
static const struct
|
||||
{
|
||||
const char* m_pCode;
|
||||
int m_level;
|
||||
} g_geocode_levels[] =
|
||||
} g_geocode_levels[] =
|
||||
{
|
||||
{ "ADM1", 1 },
|
||||
{ "ADM1H", 1 },
|
||||
@ -643,6 +651,7 @@ static int find_geocode_admin_level(const char* pCode)
|
||||
|
||||
return -1;
|
||||
}
|
||||
#endif // 0 // unused code...
|
||||
|
||||
struct country_info
|
||||
{
|
||||
@ -707,12 +716,12 @@ public:
|
||||
load_hierarchy();
|
||||
|
||||
uprintf("Reading world_features.json\n");
|
||||
|
||||
|
||||
if (!read_text_file("world_features.json", m_filebuf, nullptr))
|
||||
panic("Failed reading file");
|
||||
|
||||
uprintf("Deserializing JSON file\n");
|
||||
|
||||
|
||||
bool status = m_doc.deserialize_in_place((char*)&m_filebuf[0]);
|
||||
if (!status)
|
||||
panic("Failed parsing JSON document!");
|
||||
@ -731,16 +740,16 @@ public:
|
||||
//tm.start();
|
||||
|
||||
uint8_vec name_buf;
|
||||
|
||||
|
||||
m_geoid_to_rec.clear();
|
||||
m_geoid_to_rec.reserve(MAX_EXPECTED_RECS);
|
||||
|
||||
|
||||
for (uint32_t rec_index = 0; rec_index < root_arr.size(); rec_index++)
|
||||
{
|
||||
const auto& arr_entry = root_arr[rec_index];
|
||||
if (!arr_entry.is_object())
|
||||
panic("Invalid JSON");
|
||||
|
||||
|
||||
int geoid = arr_entry.find_int32("id");
|
||||
assert(geoid > 0);
|
||||
auto ins_res = m_geoid_to_rec.insert(std::make_pair(geoid, (int)rec_index));
|
||||
@ -770,7 +779,7 @@ public:
|
||||
const auto pPlainName = arr_entry.find_value_variant("plainname");
|
||||
if ((pPlainName == nullptr) || (!pPlainName->is_string()))
|
||||
panic("Missing/invalid plainname field");
|
||||
|
||||
|
||||
{
|
||||
const char* pName_str = pPlainName->get_string_ptr();
|
||||
size_t name_size = strlen(pName_str);
|
||||
@ -823,12 +832,12 @@ public:
|
||||
}
|
||||
|
||||
std::string fclass = arr_entry.find_string_obj("fclass");
|
||||
|
||||
|
||||
if (fclass == "A")
|
||||
{
|
||||
std::string fcode(arr_entry.find_string_obj("fcode"));
|
||||
|
||||
if ((fcode == "ADM1") || (fcode == "ADM2") || (fcode == "ADM3") || (fcode == "ADM4"))
|
||||
|
||||
if ((fcode == "ADM1") || (fcode == "ADM2") || (fcode == "ADM3") || (fcode == "ADM4"))
|
||||
{
|
||||
std::string ccode(arr_entry.find_string_obj("ccode"));
|
||||
|
||||
@ -846,7 +855,7 @@ public:
|
||||
break;
|
||||
desc += "." + a[i];
|
||||
}
|
||||
|
||||
|
||||
m_admin_map[desc].push_back(std::pair<int, int>(rec_index, get_admin_level(fcode)));
|
||||
}
|
||||
}
|
||||
@ -878,7 +887,7 @@ public:
|
||||
{
|
||||
std::vector< std::pair<int, int> >& recs = it->second;
|
||||
|
||||
std::sort(recs.begin(), recs.end(),
|
||||
std::sort(recs.begin(), recs.end(),
|
||||
[](const std::pair<int, int>& a, const std::pair<int, int>& b) -> bool
|
||||
{
|
||||
return a.second < b.second;
|
||||
@ -890,7 +899,7 @@ public:
|
||||
{
|
||||
const int cur_rec_index = recs[i].first;
|
||||
const pjson::value_variant* pCur = &m_doc[cur_rec_index];
|
||||
|
||||
|
||||
uprintf("admlevel: %u, rec: %u geoid: %u name: %s fcode: %s\n",
|
||||
recs[i].second,
|
||||
cur_rec_index, pCur->find_int32("id"), pCur->find_string_obj("name").c_str(), pCur->find_string_obj("fcode").c_str());
|
||||
@ -922,7 +931,7 @@ public:
|
||||
c = utolower(c);
|
||||
|
||||
const uint32_t hash_val = (hash_hsieh((const uint8_t *)key.c_str(), key.size()) * HASH_FMAGIC) >> HASH_SHIFT;
|
||||
|
||||
|
||||
results.resize(0);
|
||||
alt_results.resize(0);
|
||||
|
||||
@ -934,7 +943,7 @@ public:
|
||||
const pjson::value_variant* pObj = &m_doc[rec_index];
|
||||
|
||||
const char *pName = pObj->find_string_ptr("name");
|
||||
|
||||
|
||||
const char* pPlainName = pObj->find_string_ptr("plainname");
|
||||
|
||||
if ((_stricmp(pKey, pName) != 0) && (_stricmp(pKey, pPlainName) != 0))
|
||||
@ -1010,7 +1019,7 @@ public:
|
||||
|
||||
if (num_parent_admins > num_child_admins)
|
||||
return false;
|
||||
|
||||
|
||||
// Example: Anderson, Shasta County, California
|
||||
if (num_parent_admins == num_child_admins)
|
||||
{
|
||||
@ -1022,7 +1031,7 @@ public:
|
||||
for (uint32_t admin_index = 0; admin_index < num_parent_admins; admin_index++)
|
||||
{
|
||||
std::string id(string_format("a%u", admin_index + 1));
|
||||
|
||||
|
||||
std::string admin_parent(pParent->find_string_obj(id.c_str()));
|
||||
std::string admin_child(pChild->find_string_obj(id.c_str()));
|
||||
|
||||
@ -1066,16 +1075,16 @@ public:
|
||||
cRankVillageNoPopAlt, // alt
|
||||
|
||||
cRankAdminNoPop, // not a numbered admin
|
||||
|
||||
|
||||
cRankPopVillageAlt, // prim, 1-100
|
||||
cRankTownAlt, // alt, 100+
|
||||
|
||||
cRankCityLevel0Alt, // alt or alt, 1k+
|
||||
cRankCityLevel1Alt, // alt or alt, 10k+
|
||||
|
||||
|
||||
cRankAdminCapital4Alt, // alt cap4
|
||||
cRankAdmin4Alt, // alt admin4
|
||||
|
||||
|
||||
cRankAdminCapital3Alt, // alt cap3
|
||||
cRankAdmin3Alt, // alt amind3
|
||||
|
||||
@ -1085,10 +1094,10 @@ public:
|
||||
cRankVillageNoPop, // prim no pop
|
||||
|
||||
cRankAdmin, // not numbered, has pop
|
||||
|
||||
|
||||
cRankPopVillage, // prim, 1-100
|
||||
cRankTown, // prim, 100+
|
||||
|
||||
|
||||
cRankAdminCapital2Alt, // alt county seat
|
||||
cRankAdmin2Alt, // alt county
|
||||
|
||||
@ -1097,9 +1106,9 @@ public:
|
||||
|
||||
cRankPark, // prim or alt
|
||||
cRankReserve, // prim or alt
|
||||
|
||||
|
||||
cRankAdminCapital1Alt, // alt state cap
|
||||
|
||||
|
||||
cRankCityLevel0, // prim or alt, 1k+
|
||||
cRankCityLevel1, // prim or alt, 10k+
|
||||
|
||||
@ -1110,19 +1119,19 @@ public:
|
||||
cRankCityLevel3, // prim or alt, 1m+
|
||||
|
||||
cRankBaseOrAirport, // prim or alt
|
||||
|
||||
|
||||
cRankAdminCapital2, // prim county seat
|
||||
cRankAdmin2, // prim county
|
||||
|
||||
cRankAdmin2, // prim county
|
||||
|
||||
cRankAdminCapital1, // prim state cap
|
||||
|
||||
|
||||
cRankAdmin1Alt, // alt state
|
||||
|
||||
|
||||
cRankPoliticalCapital, // prim or alt
|
||||
cRankGovernmentCapital, // prim or alt
|
||||
|
||||
|
||||
cRankAdmin1, // prim state
|
||||
|
||||
|
||||
// all countries prim or alt
|
||||
cRankCountryLevel0,
|
||||
cRankCountryLevel1,
|
||||
@ -1134,10 +1143,10 @@ public:
|
||||
cRankCountryLevel7,
|
||||
cRankCountryLevel8,
|
||||
cRankCountryLevel9,
|
||||
|
||||
|
||||
cRankTotal,
|
||||
};
|
||||
|
||||
|
||||
int get_rank(const pjson::value_variant* p, bool alt_match) const
|
||||
{
|
||||
int country_index = get_country_index(p);
|
||||
@ -1265,11 +1274,11 @@ public:
|
||||
|
||||
struct resolve_results
|
||||
{
|
||||
resolve_results()
|
||||
resolve_results()
|
||||
{
|
||||
clear();
|
||||
}
|
||||
|
||||
|
||||
void clear()
|
||||
{
|
||||
m_candidates.resize(0);
|
||||
@ -1282,16 +1291,16 @@ public:
|
||||
}
|
||||
|
||||
geo_result m_best_result;
|
||||
|
||||
|
||||
uint32_t m_num_input_tokens;
|
||||
bool m_strong_match;
|
||||
|
||||
|
||||
geo_result_vec m_candidates;
|
||||
std::vector< std::pair<uint32_t, float> > m_sorted_results;
|
||||
uint32_t m_best_sorted_result_index;
|
||||
float m_best_score;
|
||||
};
|
||||
|
||||
|
||||
bool resolve(const std::string& str, resolve_results &resolve_res) const
|
||||
{
|
||||
uprintf("--- Candidates for query: %s\n", str.c_str());
|
||||
@ -1359,7 +1368,7 @@ public:
|
||||
p->find_string_ptr("fcode"),
|
||||
p->find_int32("pop"));
|
||||
#endif
|
||||
|
||||
|
||||
temp_results[toks_index].push_back({ p, false });
|
||||
}
|
||||
|
||||
@ -1389,7 +1398,7 @@ public:
|
||||
uprintf("No results\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
//uprintf("Candidates for query: %s\n", str.c_str());
|
||||
|
||||
std::vector<uint32_t> valid_candidates;
|
||||
@ -1405,7 +1414,7 @@ public:
|
||||
|
||||
std::vector< std::pair<uint32_t, float> > candidate_results[TOTAL_FAVORED_COUNTRY_RANKS];
|
||||
uint32_t total_country_rankings = 0;
|
||||
uint32_t total_candidates = 0;
|
||||
[[maybe_unused]] uint32_t total_candidates = 0;
|
||||
|
||||
for (uint32_t candidate_index_iter = 0; candidate_index_iter < valid_candidates.size(); candidate_index_iter++)
|
||||
{
|
||||
@ -1449,11 +1458,11 @@ public:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
candidate_score += p->find_float("pop") / 40000000.0f;
|
||||
|
||||
const int country_rank = get_favored_country_rank(ccode);
|
||||
assert(country_rank < TOTAL_FAVORED_COUNTRY_RANKS);
|
||||
assert(static_cast<uint32_t>(country_rank) < TOTAL_FAVORED_COUNTRY_RANKS);
|
||||
|
||||
if (!candidate_results[country_rank].size())
|
||||
total_country_rankings++;
|
||||
@ -1462,7 +1471,7 @@ public:
|
||||
|
||||
total_candidates++;
|
||||
}
|
||||
|
||||
|
||||
// 1. If there's just one country rank group, choose the best score in that country rank group.
|
||||
// 2. If they matched against a country, choose the highest ranking country, prioritizing the favored countries first.
|
||||
// 3. Check for states, state capitals or other significant admin districts in the favored countries, in order
|
||||
@ -1512,7 +1521,7 @@ public:
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
if (total_country_rankings == 1)
|
||||
{
|
||||
// Only one ranked country group in the candidate results, so just choose the one with the highest score.
|
||||
@ -1525,9 +1534,9 @@ public:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
assert(pBest_ranking_vec);
|
||||
|
||||
|
||||
uint32_t candidate_index = (*pBest_ranking_vec)[0].first;
|
||||
|
||||
best_score = (*pBest_ranking_vec)[0].second;
|
||||
@ -1539,7 +1548,7 @@ public:
|
||||
else
|
||||
{
|
||||
// Multiple ranked country groups.
|
||||
|
||||
|
||||
// Check for US states (primary or alt)
|
||||
{
|
||||
uint32_t r_index = 0;
|
||||
@ -1564,7 +1573,7 @@ public:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (!pBest_result)
|
||||
{
|
||||
// First check for any country hits from any ranked country group.
|
||||
@ -1588,7 +1597,7 @@ public:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (pBest_result)
|
||||
break;
|
||||
}
|
||||
@ -1610,7 +1619,7 @@ public:
|
||||
//const bool was_alt = temp_results[last_tok_index][candidate_index].m_alt;
|
||||
|
||||
const int rank = get_rank(p, temp_results[last_tok_index][candidate_index].m_alt);
|
||||
|
||||
|
||||
if ((rank == cRankAdmin1Alt) || (rank == cRankAdmin1) || (rank == cRankPoliticalCapital) || (rank == cRankGovernmentCapital))
|
||||
{
|
||||
pBest_result = &temp_results[last_tok_index][candidate_index];
|
||||
@ -1620,7 +1629,7 @@ public:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (pBest_result)
|
||||
break;
|
||||
}
|
||||
@ -1686,7 +1695,7 @@ public:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (!pBest_result)
|
||||
{
|
||||
// Fall back to choosing the highest score
|
||||
@ -1698,13 +1707,13 @@ public:
|
||||
{
|
||||
const uint32_t candidate_index = r[i].first;
|
||||
const float score = r[i].second;
|
||||
|
||||
|
||||
if (score > best_score)
|
||||
{
|
||||
best_score = score;
|
||||
|
||||
pBest_result = &temp_results[last_tok_index][candidate_index];
|
||||
|
||||
|
||||
pBest_ranking_vec = &r;
|
||||
best_ranking_index = i;
|
||||
}
|
||||
@ -1730,10 +1739,9 @@ public:
|
||||
resolve_res.m_best_sorted_result_index = best_ranking_index;
|
||||
resolve_res.m_best_score = best_score;
|
||||
|
||||
const pjson::value_variant* pVariant = pBest_result->m_pVariant;
|
||||
(pVariant);
|
||||
[[maybe_unused]] const pjson::value_variant* pVariant = pBest_result->m_pVariant;
|
||||
|
||||
#if 0
|
||||
#if 0
|
||||
uprintf("Result: score:%f, alt: %u, id: %u, name: \"%s\", lat: %f, long: %f, ccode=%s, a1=%s, a2=%s, a3=%s, a4=%s, fclass: %s, fcode: %s, pop: %i\n",
|
||||
best_score,
|
||||
pBest_result->m_alt,
|
||||
@ -1759,7 +1767,7 @@ public:
|
||||
std::string ccode(p->find_string_obj("ccode"));
|
||||
std::string fclass(p->find_string_obj("fclass"));
|
||||
std::string fcode(p->find_string_obj("fcode"));
|
||||
|
||||
|
||||
std::string a[4] = { p->find_string_obj("a1"), p->find_string_obj("a2"), p->find_string_obj("a3"), p->find_string_obj("a4") };
|
||||
|
||||
uint32_t num_admins = count_admins(p);
|
||||
@ -1778,9 +1786,9 @@ public:
|
||||
if (find_res != m_admin_map.end())
|
||||
{
|
||||
const std::vector< std::pair<int, int> >& recs = find_res->second;
|
||||
|
||||
|
||||
assert(recs.size());
|
||||
|
||||
|
||||
int cur_level = recs[0].second;
|
||||
for (uint32_t j = 0; j < recs.size(); j++)
|
||||
{
|
||||
@ -1788,7 +1796,7 @@ public:
|
||||
break;
|
||||
|
||||
int rec_index = recs[j].first;
|
||||
|
||||
|
||||
const pjson::value_variant* q = &m_doc[rec_index];
|
||||
|
||||
if (i == (int)(num_admins - 1))
|
||||
@ -1832,7 +1840,7 @@ private:
|
||||
std::vector<uint_vec> m_name_hashtab;
|
||||
|
||||
std::unordered_map<int, int> m_geoid_to_rec;
|
||||
|
||||
|
||||
country_info_vec m_countries;
|
||||
std::unordered_map<int, int> m_rec_index_to_country_index;
|
||||
std::unordered_map<int, int> m_geoid_to_country_index;
|
||||
@ -1857,7 +1865,7 @@ private:
|
||||
return find_res->second;
|
||||
}
|
||||
|
||||
static void extract_tab_fields(const std::string& str, string_vec& fields)
|
||||
static void extract_tab_fields(const std::string& str, string_vec& fields)
|
||||
{
|
||||
std::vector<int> tab_locs;
|
||||
tab_locs.resize(0);
|
||||
@ -2055,6 +2063,7 @@ static const char* s_kwic_stop_words[] =
|
||||
"when", "where", "which", "while", "who", "whom", "why", "will", "with", "you", "your", "yours",
|
||||
"yourself", "yourselves", "although", "also", "already", "another", "seemed", "seem", "seems"
|
||||
};
|
||||
[[maybe_unused]]
|
||||
const uint32_t NUM_STOP_WORDS = (uint32_t)std::size(s_kwic_stop_words);
|
||||
|
||||
static bool create_kwic_index(const ufo_timeline &timeline, const ufo_timeline::event_urls_map_t &event_urls, bool book_flag = false, const char *pOutput_filename_base = nullptr, const char *pTitle = nullptr, const char *pHeader = nullptr)
|
||||
@ -2072,7 +2081,7 @@ static bool create_kwic_index(const ufo_timeline &timeline, const ufo_timeline::
|
||||
typedef std::unordered_map<std::string, word_usage_vec> word_map_t;
|
||||
word_map_t word_map;
|
||||
word_map.reserve(timeline.size() * 20);
|
||||
|
||||
|
||||
std::unordered_set<std::string> stop_word_set;
|
||||
for (const auto& str : s_kwic_stop_words)
|
||||
stop_word_set.insert(str);
|
||||
@ -2161,7 +2170,7 @@ static bool create_kwic_index(const ufo_timeline &timeline, const ufo_timeline::
|
||||
kwic_file_strings_header[i].push_back(string_format("# <a name=\"Top\">%s, KWIC Index Page: %s</a>", pTitle, name.c_str()));
|
||||
else
|
||||
kwic_file_strings_header[i].push_back(string_format("# <a name=\"Top\">UFO Event Timeline, KWIC Index Page: %s</a>", name.c_str()));
|
||||
|
||||
|
||||
if (!book_flag)
|
||||
{
|
||||
kwic_file_strings_header[i].push_back("");
|
||||
@ -2245,7 +2254,7 @@ static bool create_kwic_index(const ufo_timeline &timeline, const ufo_timeline::
|
||||
for (l = 0; l < (int)event_char_offsets.size(); l++)
|
||||
if (str_ofs == event_char_offsets[l])
|
||||
break;
|
||||
if (l == event_char_offsets.size())
|
||||
if (l == static_cast<int>(event_char_offsets.size()))
|
||||
l = 0;
|
||||
|
||||
const int PRE_CONTEXT_CHARS = 35;
|
||||
@ -2259,7 +2268,7 @@ static bool create_kwic_index(const ufo_timeline &timeline, const ufo_timeline::
|
||||
// in bytes
|
||||
int start_ofs = event_char_offsets[s];
|
||||
int prefix_bytes = event_char_offsets[l] - start_ofs;
|
||||
int end_ofs = (e >= event_char_offsets.size()) ? (int)str.size() : event_char_offsets[e];
|
||||
int end_ofs = (e >= static_cast<int>(event_char_offsets.size())) ? (int)str.size() : event_char_offsets[e];
|
||||
int len = end_ofs - start_ofs;
|
||||
|
||||
std::string context_str(string_slice(str, start_ofs, len));
|
||||
@ -2340,10 +2349,10 @@ static bool load_book_json(
|
||||
json js;
|
||||
if (!load_json_object(pSource_filename, utf8_flag, js))
|
||||
return false;
|
||||
|
||||
|
||||
const uint32_t first_event_index = (uint32_t)timeline.size();
|
||||
timeline.get_events().resize(first_event_index + js.size());
|
||||
|
||||
|
||||
for (uint32_t i = 0; i < js.size(); i++)
|
||||
{
|
||||
auto obj = js[i];
|
||||
@ -2413,7 +2422,7 @@ static bool load_book_json(
|
||||
|
||||
event_urls.insert(std::make_pair((int)(i + first_event_index), url));
|
||||
}
|
||||
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -2555,7 +2564,7 @@ static bool create_crashconf_kwic_index()
|
||||
{
|
||||
ufo_timeline timeline;
|
||||
ufo_timeline::event_urls_map_t event_urls;
|
||||
|
||||
|
||||
std::string header("This is an automatically generated [KWIC Index](https://en.wikipedia.org/wiki/Key_Word_in_Context) of the 2003-2009 Crash Retrieval Conference proceedings, created by [Richard Geldreich Jr.](https://twitter.com/richgel999).\n\nHere are links to each year's proceedings and each presentation:\n");
|
||||
|
||||
for (uint32_t i = 0; i < NUM_CRASHCONF_URLS; i++)
|
||||
@ -2587,13 +2596,14 @@ static bool create_crashconf_kwic_index()
|
||||
return create_kwic_index(timeline, event_urls, true, "crashconf_kwic_", "Crash Retrieval Conference Proceedings", header.c_str());
|
||||
}
|
||||
|
||||
[[maybe_unused]]
|
||||
static int md_trim(const string_vec& args)
|
||||
{
|
||||
if (args.size() != 3)
|
||||
panic("Expecting 2 filenames\n");
|
||||
|
||||
string_vec src_file_lines;
|
||||
|
||||
|
||||
if (!read_text_file(args[1].c_str(), src_file_lines, true, nullptr))
|
||||
panic("Failed reading source file %s\n", args[1].c_str());
|
||||
|
||||
@ -2606,7 +2616,7 @@ static int md_trim(const string_vec& args)
|
||||
const std::string& str = src_file_lines[i];
|
||||
if (!str.size())
|
||||
continue;
|
||||
|
||||
|
||||
if (string_find_first(str, "---------------") >= 0)
|
||||
{
|
||||
found_header = true;
|
||||
@ -2635,7 +2645,7 @@ static int md_trim(const string_vec& args)
|
||||
if (!str.size())
|
||||
continue;
|
||||
|
||||
if ( (string_find_first(str, "[Chronologie](annees.html)") >= 0) ||
|
||||
if ( (string_find_first(str, "[Chronologie](annees.html)") >= 0) ||
|
||||
(string_find_first(str, "[Contact](Contact.html)") >= 0) ||
|
||||
(string_find_first(str, "[Home](/)") >= 0))
|
||||
{
|
||||
@ -2660,7 +2670,7 @@ static int md_trim(const string_vec& args)
|
||||
panic("Failed writing output file %s\n", args[2].c_str());
|
||||
|
||||
uprintf("Wrote file %s\n", args[2].c_str());
|
||||
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
@ -2676,7 +2686,7 @@ static bool translate_record(const string_vec& in, string_vec& out)
|
||||
|
||||
string_vec prompt;
|
||||
prompt.push_back("Precisely translate this UFO/saucer event record from French to English. Preserve all formatting and new lines, especially the first 2 lines, which contain the date and location. If the record is all-caps, correct it so it's not.");
|
||||
|
||||
|
||||
prompt.push_back("\"");
|
||||
for (const auto& str : in)
|
||||
prompt.push_back(str);
|
||||
@ -2689,6 +2699,7 @@ static bool translate_record(const string_vec& in, string_vec& out)
|
||||
#endif
|
||||
}
|
||||
|
||||
[[maybe_unused]]
|
||||
static int md_translate(const string_vec& args)
|
||||
{
|
||||
if (args.size() != 3)
|
||||
@ -2763,10 +2774,10 @@ static int md_translate(const string_vec& args)
|
||||
uprintf("%s\n", cur_rec[i].c_str());
|
||||
|
||||
tran_recs.push_back(cur_rec);
|
||||
|
||||
|
||||
cur_rec.resize(0);
|
||||
}
|
||||
|
||||
|
||||
cur_rec.push_back(src_file_lines[cur_line]);
|
||||
}
|
||||
|
||||
@ -2792,7 +2803,7 @@ static int md_translate(const string_vec& args)
|
||||
if (!translate_record(tran_recs[rec_index], tran_rec))
|
||||
{
|
||||
uprintf("Failed translating record %u!\n", rec_index);
|
||||
|
||||
|
||||
if (tran_recs[rec_index].size())
|
||||
out_lines.push_back(tran_recs[rec_index][0]);
|
||||
out_lines.push_back("FAILED!\n");
|
||||
@ -2837,14 +2848,14 @@ static int md_translate(const string_vec& args)
|
||||
int wmain(int argc, wchar_t* argv[])
|
||||
{
|
||||
assert(cTotalPrefixes == sizeof(g_date_prefix_strings) / sizeof(g_date_prefix_strings[0]));
|
||||
|
||||
|
||||
string_vec args;
|
||||
convert_args_to_utf8(args, argc, argv);
|
||||
|
||||
// Set ANSI Latin 1; Western European (Windows) code page for output.
|
||||
SetConsoleOutputCP(1252);
|
||||
//SetConsoleOutputCP(CP_UTF8);
|
||||
|
||||
|
||||
converters_init();
|
||||
init_norm();
|
||||
udb_init();
|
||||
@ -2870,7 +2881,7 @@ int wmain(int argc, wchar_t* argv[])
|
||||
uprintf("Skipping file %s - already exists\n", out_filename.c_str());
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
string_vec a = { "", in_filename, out_filename };
|
||||
int status = md_translate(a);
|
||||
if (status != EXIT_SUCCESS)
|
||||
@ -2878,7 +2889,7 @@ int wmain(int argc, wchar_t* argv[])
|
||||
}
|
||||
exit(0);
|
||||
#endif
|
||||
|
||||
|
||||
bool status = false, utf8_flag = false;
|
||||
|
||||
unordered_string_set unique_urls;
|
||||
@ -2891,7 +2902,7 @@ int wmain(int argc, wchar_t* argv[])
|
||||
std::string title_str("All events");
|
||||
bool conversion_flag = false;
|
||||
bool crashconf_flag = false;
|
||||
|
||||
|
||||
int arg_index = 1;
|
||||
while (arg_index < argc)
|
||||
{
|
||||
@ -2900,7 +2911,7 @@ int wmain(int argc, wchar_t* argv[])
|
||||
arg_index++;
|
||||
|
||||
const uint32_t num_args_remaining = argc - arg_index;
|
||||
|
||||
|
||||
if (t == '-')
|
||||
{
|
||||
if (arg == "-convert")
|
||||
@ -2963,7 +2974,7 @@ int wmain(int argc, wchar_t* argv[])
|
||||
uprintf("Processing successful\n");
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
if (conversion_flag)
|
||||
{
|
||||
uprintf("Convert Overmeire:\n");
|
||||
@ -3096,7 +3107,7 @@ int wmain(int argc, wchar_t* argv[])
|
||||
panic("convert_anon failed!");
|
||||
uprintf("Success\n");
|
||||
} // if (conversion_flag)
|
||||
|
||||
|
||||
uprintf("Total unique URL's: %u\n", (uint32_t)unique_urls.size());
|
||||
|
||||
string_vec urls;
|
||||
@ -3138,7 +3149,7 @@ int wmain(int argc, wchar_t* argv[])
|
||||
status = timeline.load_json("nicap_db.json", utf8_flag, nullptr, false);
|
||||
if (!status)
|
||||
panic("Failed loading nicap_db.json");
|
||||
|
||||
|
||||
status = timeline.load_json("trace.json", utf8_flag, nullptr, false);
|
||||
if (!status)
|
||||
panic("Failed loading trace.json");
|
||||
@ -3154,7 +3165,7 @@ int wmain(int argc, wchar_t* argv[])
|
||||
status = timeline.load_json("ufo_evidence_hall.json", utf8_flag, nullptr, false);
|
||||
if (!status)
|
||||
panic("Failed loading ufo_evidence_hall.json");
|
||||
|
||||
|
||||
status = timeline.load_json("nuclear_tests.json", utf8_flag, nullptr, false);
|
||||
if (!status)
|
||||
panic("Failed loading nuclear_tests.json");
|
||||
@ -3178,7 +3189,7 @@ int wmain(int argc, wchar_t* argv[])
|
||||
status = timeline.load_json("ancient.json", utf8_flag, nullptr, false);
|
||||
if (!status)
|
||||
panic("Failed loading hostile.json");
|
||||
|
||||
|
||||
status = timeline.load_json("pre_roswell_chap1.json", utf8_flag, nullptr, false);
|
||||
if (!status)
|
||||
panic("Failed loading pre_roswell_chap1.json");
|
||||
@ -3290,7 +3301,7 @@ int wmain(int argc, wchar_t* argv[])
|
||||
panic("Date failed sanity check");
|
||||
|
||||
}
|
||||
|
||||
|
||||
uprintf("Load success, %zu total events\n", timeline.get_events().size());
|
||||
|
||||
timeline.sort();
|
||||
@ -3298,7 +3309,7 @@ int wmain(int argc, wchar_t* argv[])
|
||||
if (filter_strings.size())
|
||||
{
|
||||
ufo_timeline new_timeline;
|
||||
|
||||
|
||||
for (uint32_t i = 0; i < timeline.size(); i++)
|
||||
{
|
||||
const timeline_event& event = timeline[i];
|
||||
@ -3337,7 +3348,7 @@ int wmain(int argc, wchar_t* argv[])
|
||||
}
|
||||
|
||||
if ( ((filter_all_flag) && (total_matched == filter_strings.size())) ||
|
||||
((!filter_all_flag) && (total_matched > 0)) )
|
||||
((!filter_all_flag) && (total_matched > 0)) )
|
||||
{
|
||||
new_timeline.get_events().push_back(event);
|
||||
}
|
||||
@ -3350,7 +3361,7 @@ int wmain(int argc, wchar_t* argv[])
|
||||
|
||||
timeline.get_events().swap(new_timeline.get_events());
|
||||
}
|
||||
|
||||
|
||||
uprintf("Writing timeline markdown\n");
|
||||
|
||||
ufo_timeline::event_urls_map_t event_urls;
|
||||
|
@ -5,6 +5,13 @@ VisualStudioVersion = 17.4.33213.308
|
||||
MinimumVisualStudioVersion = 10.0.40219.1
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ufojson", "ufojson.vcxproj", "{E4A0DD72-979A-469B-9B0A-4ABE0B7C93D7}"
|
||||
EndProject
|
||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{2495626B-DF4D-491A-84F3-58EB01E0CAAE}"
|
||||
ProjectSection(SolutionItems) = preProject
|
||||
.gitignore = .gitignore
|
||||
LICENSE = LICENSE
|
||||
README.md = README.md
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|x64 = Debug|x64
|
||||
|
@ -24,6 +24,7 @@
|
||||
<ProjectGuid>{e4a0dd72-979a-469b-9b0a-4abe0b7c93d7}</ProjectGuid>
|
||||
<RootNamespace>ufojson</RootNamespace>
|
||||
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
|
||||
<PreferredToolArchitecture>x64</PreferredToolArchitecture>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Label="Configuration">
|
||||
@ -51,11 +52,16 @@
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup>
|
||||
<ClCompile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<LanguageStandard>stdcpp17</LanguageStandard>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
<DiagnosticsFormat>Caret</DiagnosticsFormat>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<!-- <TreatWarningAsError>true</TreatWarningAsError> -->
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<PreprocessorDefinitions>_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<StringPooling>true</StringPooling>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<MultiProcessorCompilation>true</MultiProcessorCompilation>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
@ -87,6 +93,7 @@
|
||||
<ClCompile Include="stem.c" />
|
||||
<ClCompile Include="udb.cpp" />
|
||||
<ClInclude Include="converters.h" />
|
||||
<ClInclude Include="pjson.h" />
|
||||
<ClInclude Include="stem.h" />
|
||||
<ClInclude Include="udb_tables.h" />
|
||||
<ClCompile Include="ufojson.cpp" />
|
||||
|
@ -48,5 +48,6 @@
|
||||
<ClInclude Include="ufojson_core.h" />
|
||||
<ClInclude Include="utf8.h" />
|
||||
<ClInclude Include="utils.h" />
|
||||
<ClInclude Include="pjson.h" />
|
||||
</ItemGroup>
|
||||
</Project>
|
@ -1,4 +1,4 @@
|
||||
// ufojson_core.cpp
|
||||
// ufojson_core.cpp
|
||||
// Copyright (C) 2023 Richard Geldreich, Jr.
|
||||
#include "ufojson_core.h"
|
||||
#include "markdown_proc.h"
|
||||
@ -394,7 +394,7 @@ bool event_date::parse(const char* pStr, bool fix_20century_dates)
|
||||
|
||||
string_trim(temp);
|
||||
}
|
||||
|
||||
|
||||
if (!temp.size())
|
||||
return false;
|
||||
|
||||
@ -443,7 +443,7 @@ bool event_date::parse(const char* pStr, bool fix_20century_dates)
|
||||
|
||||
m_year = atoi(date_strs[2].c_str());
|
||||
}
|
||||
|
||||
|
||||
if (fix_20century_dates)
|
||||
{
|
||||
if ((m_year >= 1) && (m_year <= 99))
|
||||
@ -627,7 +627,7 @@ bool event_date::parse_eberhart_date_range(std::string date,
|
||||
return false;
|
||||
|
||||
d.m_plural = true;
|
||||
|
||||
|
||||
s.pop_back();
|
||||
s.pop_back();
|
||||
|
||||
@ -1342,6 +1342,11 @@ static void get_date_range(const event_date& evt, event_date& begin, event_date&
|
||||
end.m_day = 31;
|
||||
}
|
||||
break;
|
||||
|
||||
case cNoPrefix:
|
||||
case cTotalPrefixes:
|
||||
assert(!"unreachable");
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -1978,7 +1983,7 @@ void timeline_event::from_json(const json& obj, const char* pSource_override, bo
|
||||
auto rocket_range = obj.find("rocket_range");
|
||||
auto source_id = obj.find("source_id");
|
||||
auto source = obj.find("source");
|
||||
|
||||
|
||||
if (desc == obj.end())
|
||||
panic("Missing desc");
|
||||
|
||||
@ -2003,7 +2008,7 @@ void timeline_event::from_json(const json& obj, const char* pSource_override, bo
|
||||
m_date_str = (*date);
|
||||
if (!m_begin_date.parse(m_date_str.c_str(), fix_20century_dates))
|
||||
panic("Failed parsing date %s\n", m_date_str.c_str());
|
||||
|
||||
|
||||
if (end_date != obj.end())
|
||||
{
|
||||
m_end_date_str = (*end_date);
|
||||
@ -2356,12 +2361,12 @@ void ufo_timeline::create_plaintext()
|
||||
|
||||
string_vec words;
|
||||
get_string_words(te.m_plain_desc, words, nullptr, "-");
|
||||
|
||||
|
||||
for (uint32_t j = 0; j < te.m_plain_refs.size(); j++)
|
||||
{
|
||||
string_vec temp_words;
|
||||
get_string_words(te.m_plain_refs[j], temp_words, nullptr, "-");
|
||||
|
||||
|
||||
words.insert(words.end(), temp_words.begin(), temp_words.end());
|
||||
}
|
||||
|
||||
@ -2379,12 +2384,12 @@ void ufo_timeline::create_plaintext()
|
||||
std::string tmp(ustrlwr(words[j]));
|
||||
if (!tmp.size() || is_stop_word(tmp))
|
||||
continue;
|
||||
|
||||
|
||||
std::string nrm_tmp(normalize_word(tmp));
|
||||
|
||||
if (!nrm_tmp.size() || is_stop_word(nrm_tmp))
|
||||
continue;
|
||||
|
||||
|
||||
new_words.push_back(nrm_tmp);
|
||||
}
|
||||
|
||||
@ -2413,7 +2418,7 @@ bool ufo_timeline::write_markdown(const char* pTimeline_filename, const char *pD
|
||||
last_event_index = std::max(last_event_index, i);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (first_event_index > last_event_index)
|
||||
panic("Can't find events");
|
||||
|
||||
@ -2424,28 +2429,28 @@ bool ufo_timeline::write_markdown(const char* pTimeline_filename, const char *pD
|
||||
|
||||
FILE* pTimeline_file = ufopen(pTimeline_filename, "w");
|
||||
if (!pTimeline_file)
|
||||
panic("Failed creating file %s", pTimeline_file);
|
||||
panic("Failed creating file %s", pTimeline_filename);
|
||||
|
||||
fputc(UTF8_BOM0, pTimeline_file);
|
||||
fputc(UTF8_BOM1, pTimeline_file);
|
||||
fputc(UTF8_BOM2, pTimeline_file);
|
||||
fprintf(pTimeline_file, "<meta charset=\"utf-8\">\n");
|
||||
|
||||
|
||||
if ((pDate_range_desc) && (strlen(pDate_range_desc)))
|
||||
fprintf(pTimeline_file, "\n# <a name=\"Top\">UFO/UAP Event Chronology, %s, v" TIMELINE_VERSION " - Compiled " COMPILATION_DATE "</a>\n\n", pDate_range_desc);
|
||||
else
|
||||
fprintf(pTimeline_file, "\n# <a name=\"Top\">UFO/UAP Event Chronology, v" TIMELINE_VERSION " - Compiled " COMPILATION_DATE "</a>\n\n");
|
||||
|
||||
fputs(
|
||||
u8R"(An automated compilation by <a href="https://twitter.com/richgel999">Richard Geldreich, Jr.</a> using public data from <a href="https://en.wikipedia.org/wiki/Jacques_Vall%C3%A9e">Dr. Jacques Vallée</a>,
|
||||
u8R"(An automated compilation by <a href="https://twitter.com/richgel999">Richard Geldreich, Jr.</a> using public data from <a href="https://en.wikipedia.org/wiki/Jacques_Vall%C3%A9e">Dr. Jacques Vallée</a>,
|
||||
<a href="https://www.academia.edu/9813787/GOVERNMENT_INVOLVEMENT_IN_THE_UFO_COVER_UP_CHRONOLOGY_based">Pea Research</a>, <a href="http://www.cufos.org/UFO_Timeline.html">George M. Eberhart</a>,
|
||||
<a href="https://en.wikipedia.org/wiki/Richard_H._Hall">Richard H. Hall</a>, <a href="https://web.archive.org/web/20160821221627/http://www.ufoinfo.com/onthisday/sametimenextyear.html">Dr. Donald A. Johnson</a>,
|
||||
<a href="https://medium.com/@richgel99/1958-keziah-poster-recreation-completed-82fdb55750d8">Fred Keziah</a>, <a href="https://github.com/richgel999/uap_resources/blob/main/bluebook_uncensored_unknowns_don_berliner.pdf">Don Berliner</a>,
|
||||
<a href="https://www.openminds.tv/larry-hatch-ufo-database-creator-remembered/42142">Larry Hatch</a>, [NICAP](https://www.nicap.org/), [Thomas R. Adams](https://www.lulu.com/shop/ray-boeche/bloodless-cuts/hardcover/product-22167360.html?page=1&pageSize=4), [George D. Fawcett](https://archive.ph/eQwIL), [Chris Aubeck](https://books.google.com/books/about/Return_to_Magonia.html?id=JBGNjgEACAAJ&source=kp_author_description), [Philip L. Rife](https://www.amazon.com/Didnt-Start-Roswell-Encounters-Coverups/dp/059517339X), [Richard Dolan](https://richarddolanmembers.com/), [Jérôme Beau](https://rr0.org/), [Godelieve Van Overmeire](http://cobeps.org/fr/godelieve-van-overmeire), and an anonymous individual or group.
|
||||
<a href="https://www.openminds.tv/larry-hatch-ufo-database-creator-remembered/42142">Larry Hatch</a>, [NICAP](https://www.nicap.org/), [Thomas R. Adams](https://www.lulu.com/shop/ray-boeche/bloodless-cuts/hardcover/product-22167360.html?page=1&pageSize=4), [George D. Fawcett](https://archive.ph/eQwIL), [Chris Aubeck](https://books.google.com/books/about/Return_to_Magonia.html?id=JBGNjgEACAAJ&source=kp_author_description), [Philip L. Rife](https://www.amazon.com/Didnt-Start-Roswell-Encounters-Coverups/dp/059517339X), [Richard Dolan](https://richarddolanmembers.com/), [Jérôme Beau](https://rr0.org/), [Godelieve Van Overmeire](http://cobeps.org/fr/godelieve-van-overmeire), and an anonymous individual or group.
|
||||
|
||||
## Some non-summarized events fall under one of these copyrights:
|
||||
- Richard Geldreich, Jr. - Copyright (c) 2023 (events marked \"maj2\" unless otherwise attributed)
|
||||
- Dr. Jacques F. Vallée - Copyright (c) 1993
|
||||
- Dr. Jacques F. Vallée - Copyright (c) 1993
|
||||
- LeRoy Pea - Copyright (c) 9/8/1988 (updated 3/17/2005)
|
||||
- George M. Eberhart - Copyright (c) 2022
|
||||
- Dr. Donald A. Johnson - Copyright (c) 2012
|
||||
@ -2453,18 +2458,18 @@ bool ufo_timeline::write_markdown(const char* pTimeline_filename, const char *pD
|
||||
- Larry Hatch - Copyright (c) 1992-2002
|
||||
- Thomas R. Adams - Copyright (c) 1991
|
||||
- Richard Dolan - Copyright (c) 2002
|
||||
- Jérôme Beau - Copyright (c) 2000-2023
|
||||
- Jérôme Beau - Copyright (c) 2000-2023
|
||||
|
||||
## Update History:
|
||||
- v1.46: Adding ~3700 events, translated from the French chronology [_Mini catalogue chronologique des observations OVNI_](https://web.archive.org/web/20060107070423/http://users.skynet.be/sky84985/chrono.html) by Belgian ufologist [Godelieve Van Overmeire, 1935-2021](http://cobeps.org/fr/godelieve-van-overmeire). Note these events are from the old HTML version on archive.org, not the larger [(10k event) PDF version](http://www.cobeps.org/pdf/Chronologie-OVNI-VOG.pdf). It is unclear if these events are copyrighted. I didn't see a copyright in either the HTML or PDF versions.
|
||||
- v1.43: Added ~3160 events, translated from a French chronology to English using OpenAI, from [rr0.org](https://rr0.org/). I believe this chronology was composed by Jérôme Beau. Its license is [here](https://rr0.org/Copyright.html).
|
||||
- v1.43: Added ~3160 events, translated from a French chronology to English using OpenAI, from [rr0.org](https://rr0.org/). I believe this chronology was composed by Jérôme Beau. Its license is [here](https://rr0.org/Copyright.html).
|
||||
- v1.40: Added digitized events/newspaper clippings from [Frank Scully's papers at the American Heritage Center in Laramie, WY](https://archiveswest.orbiscascade.org/ark:80444/xv506256), summarized the events from the timeline on the [Disclosure Diaries](https://www.disclosurediaries.com/) website, and added more misc. events. Fixed auto-translation issue in the search page.
|
||||
- v1.38: Added a [client-side search engine](search.html). There are a bunch of features I'm going to add to this engine, for now it can only search for keywords in the desc, location and and reference fields.
|
||||
- v1.37: Updated intro text, added total number of events to each event year, added a few 1800's events.
|
||||
- v1.36: Extracted and summarized the events in the book [_It Didn't Start with Roswell_ by Philip L. Rife](https://www.amazon.com/Didnt-Start-Roswell-Encounters-Coverups/dp/059517339X). Also extracted the military UFO events from Richard Dolan's book [_UFOs and the National Security State: Chronology of a Cover-up, 1941–1973_](https://www.amazon.com/UFOs-National-Security-State-Chronology-ebook/dp/B0C94W38QY).
|
||||
- v1.36: Extracted and summarized the events in the book [_It Didn't Start with Roswell_ by Philip L. Rife](https://www.amazon.com/Didnt-Start-Roswell-Encounters-Coverups/dp/059517339X). Also extracted the military UFO events from Richard Dolan's book [_UFOs and the National Security State: Chronology of a Cover-up, 1941–1973_](https://www.amazon.com/UFOs-National-Security-State-Chronology-ebook/dp/B0C94W38QY).
|
||||
- v1.34: Added more modern events, 1917 Mystery Airplane newspaper articles.
|
||||
- v1.33: More events: Events from George D. Fawcett, short AI summaries of Stringfield's 1978 MUFON symposium presentation, and short AI summaries of the pre-industrial era sighting events from the book [_Wonders in the Sky: Unexplained Aerial Objects from Antiquity to Modern Times_](https://www.amazon.com/Wonders-Sky-Unexplained-Objects-Antiquity/dp/1585428205).
|
||||
- v1.30: Added 203 Mystery Helicopter/mutilation related events (1970's-1980's) compiled by author/researcher [Thomas R. Adams](https://www.lulu.com/shop/ray-boeche/bloodless-cuts/hardcover/product-22167360.html?page=1&pageSize=4) (1945-2015) (or see [here](http://copycateffect.blogspot.com/2018/06/Adams-Massey-Obits.html)), from his book [_The Choppers - and the Choppers, Mystery Helicopters and Animal Mutilations_](http://www.ignaciodarnaude.com/avistamientos_ovnis/Adams,Thomas,Choppers%20and%20the%20Choppers-1.pdf), minor fixes
|
||||
- v1.30: Added 203 Mystery Helicopter/mutilation related events (1970's-1980's) compiled by author/researcher [Thomas R. Adams](https://www.lulu.com/shop/ray-boeche/bloodless-cuts/hardcover/product-22167360.html?page=1&pageSize=4) (1945-2015) (or see [here](http://copycateffect.blogspot.com/2018/06/Adams-Massey-Obits.html)), from his book [_The Choppers - and the Choppers, Mystery Helicopters and Animal Mutilations_](http://www.ignaciodarnaude.com/avistamientos_ovnis/Adams,Thomas,Choppers%20and%20the%20Choppers-1.pdf), minor fixes
|
||||
- v1.28: Added KWIC (Key Word in Context) index.
|
||||
- v1.27: Imported Anonymous PDF's contents, originally from [here](https://pdfhost.io/v/gR8lAdgVd_Uap_Timeline_Prepared_By_Another), with fixed URL's
|
||||
- v1.23-1.24: Added a handful of key historical events, such as Edward Tauss the head of CIA UFO disinformation in the 50's
|
||||
@ -2482,7 +2487,7 @@ Best viewed on a desktop/laptop, not a mobile device. On Windows, Firefox works
|
||||
|
||||
I've split up the timeline into 4 parts, to reduce their sizes: distant past up to 1949, 1950-1959, 1960-1979, and 1980-present.
|
||||
|
||||
The majority of the events in this chronology are sighting related, however it's important to be aware that this is a timeline of
|
||||
The majority of the events in this chronology are sighting related, however it's important to be aware that this is a timeline of
|
||||
UFO/UAP related _events_, not necessarily or exclusively UFO _sightings_. **This is not exclusively a UFO sightings timeline or database.**
|
||||
|
||||
Some sighting reports or events appear multiple times in this timeline because they appear in more than one data source. I view this as a useful feature.
|
||||
@ -2492,7 +2497,7 @@ Currently, the events are not sorted by time of day, only by date. Some sources
|
||||
A few events don't have firm dates, for example "Summer of 1947", or "Late July 1952". In these instances the compilation code uses fixed dates I selected for date sorting purposes. (See the code for the specific dates.)
|
||||
|
||||
## Source Code:
|
||||
This website is created automatically using a [C++](https://en.wikipedia.org/wiki/C%2B%2B) command line tool called “ufojson”. It parses the raw text and [Markdown](https://en.wikipedia.org/wiki/Markdown) source data to [JSON format](https://www.json.org/json-en.html), which is then converted to a single large web page using [pandoc](https://pandoc.org/). This tool's source code and all of the raw source and JSON data is located [here on github](https://github.com/richgel999/ufo_data).)", pTimeline_file);
|
||||
This website is created automatically using a [C++](https://en.wikipedia.org/wiki/C%2B%2B) command line tool called “ufojson”. It parses the raw text and [Markdown](https://en.wikipedia.org/wiki/Markdown) source data to [JSON format](https://www.json.org/json-en.html), which is then converted to a single large web page using [pandoc](https://pandoc.org/). This tool's source code and all of the raw source and JSON data is located [here on github](https://github.com/richgel999/ufo_data).)", pTimeline_file);
|
||||
|
||||
fputs("\n", pTimeline_file);
|
||||
|
||||
@ -2569,7 +2574,7 @@ u8R"(## Year Ranges
|
||||
for (uint32_t i = first_event_index; i <= last_event_index; i++)
|
||||
{
|
||||
int year = timeline_events[i].m_begin_date.m_year;
|
||||
|
||||
|
||||
year_histogram[year] = year_histogram[year] + 1;
|
||||
}
|
||||
|
||||
@ -2600,7 +2605,7 @@ u8R"(## Year Ranges
|
||||
|
||||
//std::string url( string_format("[%s #%u](%s#%08X)", timeline_events[i].m_date_str.c_str(), i, html_filename.c_str(), hash) );
|
||||
//<a href = "https://www.example.com">link to Example.com< / a> inside the pre section.
|
||||
std::string url( string_format("<a href=\"%s#%08X\">%s #%u</a>",
|
||||
std::string url( string_format("<a href=\"%s#%08X\">%s #%u</a>",
|
||||
html_filename.c_str(), hash,
|
||||
timeline_events[i].m_date_str.c_str(), i) );
|
||||
|
||||
@ -2670,6 +2675,6 @@ bool ufo_timeline::load_json(const char* pFilename, bool& utf8_flag, const char*
|
||||
timeline_events[first_event_index + i].from_json(obj, pSource_override, fix_20century_dates);
|
||||
}
|
||||
|
||||
return true;
|
||||
return success;
|
||||
}
|
||||
|
||||
|
@ -67,42 +67,42 @@ struct event_date
|
||||
bool m_estimated; // (estimated)
|
||||
|
||||
event_date();
|
||||
|
||||
|
||||
event_date(const event_date& other);
|
||||
|
||||
|
||||
bool sanity_check() const;
|
||||
|
||||
|
||||
bool operator== (const event_date& rhs) const;
|
||||
|
||||
|
||||
bool operator!= (const event_date& rhs) const;
|
||||
|
||||
|
||||
event_date& operator =(const event_date& rhs);
|
||||
|
||||
|
||||
void clear();
|
||||
|
||||
|
||||
bool is_valid() const;
|
||||
|
||||
|
||||
std::string get_string() const;
|
||||
|
||||
// Parses basic dates (not ranges).
|
||||
|
||||
// Parses basic dates (not ranges).
|
||||
// Date can end in "(approximate)", "(estimated)", "?", or "'s".
|
||||
// 2 digit dates converted to 1900+.
|
||||
// Supports year, month/year, or month/day/year.
|
||||
bool parse(const char* pStr, bool fix_20century_dates);
|
||||
|
||||
|
||||
// More advanced date range parsing, used for converting the Eberhart timeline.
|
||||
// Note this doesn't support "'s", "(approximate)", "(estimated)", or converting 2 digit years to 1900'.
|
||||
static bool parse_eberhart_date_range(std::string date,
|
||||
event_date& begin_date,
|
||||
event_date& end_date, event_date& alt_date,
|
||||
int required_year = -1);
|
||||
|
||||
|
||||
// Note the returned date may be invalid. It's only intended for sorting/comparison purposes against other sort dates.
|
||||
void get_sort_date(int& year, int& month, int& day) const;
|
||||
|
||||
|
||||
// Compares two timeline dates. true if lhs < rhs
|
||||
static bool compare(const event_date& lhs, const event_date& rhs);
|
||||
|
||||
|
||||
private:
|
||||
|
||||
static bool check_date_prefix(const event_date& date);
|
||||
@ -112,7 +112,7 @@ struct timeline_event
|
||||
{
|
||||
std::string m_date_str;
|
||||
std::string m_time_str; // military, but currently it's in any format (not parsed yet)
|
||||
|
||||
|
||||
std::string m_alt_date_str;
|
||||
std::string m_end_date_str;
|
||||
|
||||
@ -123,7 +123,7 @@ struct timeline_event
|
||||
std::string m_desc; // Markdown
|
||||
string_vec m_type;
|
||||
string_vec m_refs; // Markdown
|
||||
|
||||
|
||||
string_vec m_locations;
|
||||
string_vec m_attributes;
|
||||
string_vec m_see_also;
|
||||
@ -145,15 +145,15 @@ struct timeline_event
|
||||
std::string m_plain_desc; // Computed, ignored for comparison purposes, not deserialized from JSON
|
||||
string_vec m_plain_refs; // Computed, ignored for comparison purposes, not deserialized from JSON
|
||||
std::string m_search_words; // Computed, ignored for comparison purposes, not deserialized from JSON
|
||||
|
||||
|
||||
bool operator==(const timeline_event& rhs) const;
|
||||
bool operator!=(const timeline_event& rhs) const;
|
||||
bool operator< (const timeline_event& rhs) const;
|
||||
|
||||
void print(FILE* pFile) const;
|
||||
|
||||
|
||||
void from_json(const json& obj, const char* pSource_override, bool fix_20century_dates);
|
||||
|
||||
|
||||
void to_json(json& j) const;
|
||||
|
||||
uint32_t get_crc32() const;
|
||||
|
122
utils.cpp
122
utils.cpp
@ -114,6 +114,7 @@ std::string dos_to_utf8(const std::string& str)
|
||||
return wchar_to_utf8(wstr);
|
||||
}
|
||||
|
||||
_Use_decl_annotations_
|
||||
bool vformat(std::vector<char>& buf, const char* pFmt, va_list args)
|
||||
{
|
||||
uint32_t buf_size = 8192;
|
||||
@ -129,7 +130,7 @@ bool vformat(std::vector<char>& buf, const char* pFmt, va_list args)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (res <= buf.size() - 1)
|
||||
if (res <= static_cast<int>(buf.size() - 1))
|
||||
break;
|
||||
|
||||
buf_size *= 2;
|
||||
@ -142,6 +143,7 @@ bool vformat(std::vector<char>& buf, const char* pFmt, va_list args)
|
||||
return true;
|
||||
}
|
||||
|
||||
_Use_decl_annotations_
|
||||
void ufprintf(FILE* pFile, const char* pFmt, ...)
|
||||
{
|
||||
std::vector<char> buf;
|
||||
@ -155,11 +157,12 @@ void ufprintf(FILE* pFile, const char* pFmt, ...)
|
||||
std::wstring wbuf(utf8_to_wchar(std::string(&buf[0])));
|
||||
|
||||
// Not thread safe, but we don't care
|
||||
_setmode(_fileno(pFile), _O_U16TEXT);
|
||||
(void)_setmode(_fileno(pFile), _O_U16TEXT);
|
||||
fputws(&wbuf[0], pFile);
|
||||
_setmode(_fileno(pFile), _O_TEXT);
|
||||
(void)_setmode(_fileno(pFile), _O_TEXT);
|
||||
}
|
||||
|
||||
_Use_decl_annotations_
|
||||
void uprintf(const char* pFmt, ...)
|
||||
{
|
||||
std::vector<char> buf;
|
||||
@ -173,11 +176,12 @@ void uprintf(const char* pFmt, ...)
|
||||
std::wstring wbuf(utf8_to_wchar(std::string(&buf[0])));
|
||||
|
||||
// Not thread safe, but we don't care
|
||||
_setmode(_fileno(stdout), _O_U16TEXT);
|
||||
(void)_setmode(_fileno(stdout), _O_U16TEXT);
|
||||
fputws(&wbuf[0], stdout);
|
||||
_setmode(_fileno(stdout), _O_TEXT);
|
||||
(void)_setmode(_fileno(stdout), _O_TEXT);
|
||||
}
|
||||
|
||||
_Use_decl_annotations_
|
||||
std::string string_format(const char* pMsg, ...)
|
||||
{
|
||||
std::vector<char> buf;
|
||||
@ -195,6 +199,7 @@ std::string string_format(const char* pMsg, ...)
|
||||
return res;
|
||||
}
|
||||
|
||||
_Use_decl_annotations_
|
||||
void panic(const char* pMsg, ...)
|
||||
{
|
||||
char buf[4096];
|
||||
@ -256,8 +261,8 @@ int string_ifind_first(const std::string& str, const char* pPhrase)
|
||||
const size_t str_size = str.size();
|
||||
const size_t phrase_size = strlen(pPhrase);
|
||||
|
||||
assert((int)str_size == str_size);
|
||||
assert((int)phrase_size == phrase_size);
|
||||
assert(str_size == str_size);
|
||||
assert(phrase_size == phrase_size);
|
||||
assert(phrase_size);
|
||||
|
||||
if ((!str_size) || (!phrase_size) || (phrase_size > str_size))
|
||||
@ -270,7 +275,7 @@ int string_ifind_first(const std::string& str, const char* pPhrase)
|
||||
if (_strnicmp(str.c_str() + ofs, pPhrase, phrase_size) == 0)
|
||||
return (int)ofs;
|
||||
}
|
||||
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -342,7 +347,7 @@ std::string encode_url(const std::string& url)
|
||||
//const bool is_upper = (c >= 'A') && (c <= 'Z');
|
||||
//const bool is_lower = (c >= 'a') && (c <= 'z');
|
||||
|
||||
// Escape some problematic charactes that confuse some Markdown parsers (even after using Markdown '\' escapes)
|
||||
// Escape some problematic characters that confuse some Markdown parsers (even after using Markdown '\' escapes)
|
||||
if ((c == ')') || (c == '(') || (c == '_') || (c == '*'))
|
||||
{
|
||||
res.push_back('%');
|
||||
@ -451,7 +456,7 @@ bool read_binary_file(const char* pFilename, uint8_vec& buf)
|
||||
}
|
||||
_fseeki64(pFile, 0, SEEK_SET);
|
||||
|
||||
if (len > MAX_BINARY_FILE_LEN)
|
||||
if (static_cast<uint64_t>(len) > MAX_BINARY_FILE_LEN)
|
||||
return false;
|
||||
buf.resize(len);
|
||||
|
||||
@ -475,7 +480,7 @@ bool read_text_file(const char* pFilename, string_vec& lines, bool trim_lines, b
|
||||
|
||||
if (pUTF8_flag)
|
||||
*pUTF8_flag = false;
|
||||
|
||||
|
||||
while (!feof(pFile))
|
||||
{
|
||||
char buf[16384];
|
||||
@ -677,7 +682,7 @@ bool load_column_text(const char* pFilename, std::vector<string_vec>& rows, std:
|
||||
|
||||
std::string col_seps = lines[3];
|
||||
if ((!col_seps.size()) || (col_seps[0] != '-') || (col_seps.back() != '-'))
|
||||
panic("Invalid column seperator line");
|
||||
panic("Invalid column separator line");
|
||||
|
||||
for (uint32_t i = 0; i < col_seps.size(); i++)
|
||||
{
|
||||
@ -720,13 +725,13 @@ bool load_column_text(const char* pFilename, std::vector<string_vec>& rows, std:
|
||||
for (uint32_t i = 0; i < column_info.size(); i++)
|
||||
{
|
||||
col_titles[i] = col_line;
|
||||
|
||||
|
||||
if (column_info[i].first)
|
||||
col_titles[i].erase(0, column_info[i].first);
|
||||
|
||||
if (column_info[i].second > col_titles[i].size())
|
||||
panic("invalid columns");
|
||||
|
||||
|
||||
col_titles[i].erase(column_info[i].second, col_titles[i].size() - column_info[i].second);
|
||||
string_trim(col_titles[i]);
|
||||
}
|
||||
@ -737,7 +742,7 @@ bool load_column_text(const char* pFilename, std::vector<string_vec>& rows, std:
|
||||
|
||||
uint32_t cur_line = 4;
|
||||
|
||||
uint32_t cur_record_index = 0;
|
||||
[[maybe_unused]] uint32_t cur_record_index = 0;
|
||||
|
||||
while (cur_line < lines.size())
|
||||
{
|
||||
@ -804,7 +809,7 @@ bool load_column_text(const char* pFilename, std::vector<string_vec>& rows, std:
|
||||
l = ansi_to_utf8(l);
|
||||
|
||||
rows.push_back(col_lines);
|
||||
|
||||
|
||||
cur_record_index++;
|
||||
}
|
||||
|
||||
@ -850,11 +855,11 @@ bool invoke_curl(const std::string& args, string_vec& reply)
|
||||
uprintf("PDF file detected\n");
|
||||
|
||||
std::string filename(args);
|
||||
for (size_t i = filename.size() - 1; i >= 0; i--)
|
||||
for (int i = static_cast<int>(filename.size() - 1); i >= 0; i--)
|
||||
{
|
||||
if (filename[i] == '/')
|
||||
{
|
||||
filename.erase(0, i + 1);
|
||||
filename.erase(0, static_cast<size_t>(i + 1));
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -879,8 +884,14 @@ bool invoke_curl(const std::string& args, string_vec& reply)
|
||||
new_link_deescaped.push_back(c);
|
||||
}
|
||||
|
||||
rename("__temp.html", new_link_deescaped.c_str());
|
||||
uprintf("Renamed __temp.html to %s\n", new_link_deescaped.c_str());
|
||||
if (rename("__temp.html", new_link_deescaped.c_str()) == 0)
|
||||
{
|
||||
uprintf("Renamed __temp.html to %s\n", new_link_deescaped.c_str());
|
||||
}
|
||||
else
|
||||
{
|
||||
uprintf("FAILED to rename __temp.html to %s\n", new_link_deescaped.c_str());
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -939,10 +950,10 @@ std::string string_slice(const std::string& str, size_t ofs, size_t len)
|
||||
std::string res(str);
|
||||
if (ofs)
|
||||
res.erase(0, ofs);
|
||||
|
||||
|
||||
if (len)
|
||||
res.resize(len);
|
||||
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
@ -996,7 +1007,7 @@ bool invoke_openai(const string_vec &prompt, string_vec &reply)
|
||||
// Invoke openai.exe
|
||||
const uint32_t MAX_TRIES = 3;
|
||||
uint32_t num_tries;
|
||||
|
||||
|
||||
for (num_tries = 0; num_tries < MAX_TRIES; ++num_tries)
|
||||
{
|
||||
if (num_tries)
|
||||
@ -1062,11 +1073,11 @@ bool load_json_object(const char* pFilename, bool& utf8_flag, json &result_obj)
|
||||
if (!result_obj.is_object() && !result_obj.is_array())
|
||||
return false;
|
||||
|
||||
return true;
|
||||
return success;
|
||||
}
|
||||
|
||||
void string_tokenize(
|
||||
const std::string &str,
|
||||
const std::string &str,
|
||||
const std::string &whitespace,
|
||||
const std::string &break_chars,
|
||||
string_vec &tokens,
|
||||
@ -1078,7 +1089,7 @@ void string_tokenize(
|
||||
|
||||
std::string cur_token;
|
||||
uint32_t cur_ofs = 0;
|
||||
|
||||
|
||||
for (uint32_t i = 0; i < str.size(); i++)
|
||||
{
|
||||
uint8_t c = str[i];
|
||||
@ -1129,6 +1140,7 @@ void string_tokenize(
|
||||
}
|
||||
}
|
||||
|
||||
// #NOTE In C++20, there's a PI constant in <numbers> https://en.cppreference.com/w/cpp/numeric/constants
|
||||
const double PI = 3.141592653589793238463;
|
||||
|
||||
double deg2rad(double deg)
|
||||
@ -1144,7 +1156,7 @@ double rad2deg(double rad)
|
||||
// input in degrees
|
||||
double geo_distance(double lat1, double lon1, double lat2, double lon2, int unit)
|
||||
{
|
||||
if ((lat1 == lat2) && (lon1 == lon2))
|
||||
if ((lat1 == lat2) && (lon1 == lon2))
|
||||
return 0;
|
||||
|
||||
double theta = lon1 - lon2;
|
||||
@ -1154,7 +1166,7 @@ double geo_distance(double lat1, double lon1, double lat2, double lon2, int unit
|
||||
|
||||
dist = dist * 60 * 1.1515;
|
||||
|
||||
switch (unit)
|
||||
switch (unit)
|
||||
{
|
||||
case 'M':
|
||||
break;
|
||||
@ -1185,37 +1197,37 @@ std::string remove_bom(std::string str)
|
||||
return str;
|
||||
}
|
||||
|
||||
int get_next_utf8_code_point_len(const uint8_t* pStr)
|
||||
int get_next_utf8_code_point_len(const uint8_t* pStr)
|
||||
{
|
||||
if (pStr == nullptr || *pStr == 0)
|
||||
if (pStr == nullptr || *pStr == 0)
|
||||
{
|
||||
// Return 0 if the input is null or points to a null terminator
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
const uint8_t firstByte = *pStr;
|
||||
|
||||
if ((firstByte & 0x80) == 0)
|
||||
{
|
||||
if ((firstByte & 0x80) == 0)
|
||||
{
|
||||
// Starts with 0, ASCII character
|
||||
return 1;
|
||||
}
|
||||
else if ((firstByte & 0xE0) == 0xC0)
|
||||
{
|
||||
else if ((firstByte & 0xE0) == 0xC0)
|
||||
{
|
||||
// Starts with 110
|
||||
return 2;
|
||||
}
|
||||
else if ((firstByte & 0xF0) == 0xE0)
|
||||
{
|
||||
else if ((firstByte & 0xF0) == 0xE0)
|
||||
{
|
||||
// Starts with 1110
|
||||
return 3;
|
||||
}
|
||||
else if ((firstByte & 0xF8) == 0xF0)
|
||||
{
|
||||
else if ((firstByte & 0xF8) == 0xF0)
|
||||
{
|
||||
// Starts with 11110
|
||||
return 4;
|
||||
}
|
||||
else
|
||||
else
|
||||
{
|
||||
// Invalid UTF-8 byte sequence
|
||||
return -1;
|
||||
@ -1239,9 +1251,9 @@ void get_string_words(
|
||||
std::string whitespace(" \t\n\r,;:.!?()[]*/\"");
|
||||
if (pAdditional_whitespace)
|
||||
whitespace += std::string(pAdditional_whitespace);
|
||||
|
||||
|
||||
int word_start_ofs = -1;
|
||||
|
||||
|
||||
uint32_t cur_ofs = 0;
|
||||
while ((cur_ofs < str.size()) && (pStr[cur_ofs]))
|
||||
{
|
||||
@ -1303,7 +1315,7 @@ void get_string_words(
|
||||
else if (pStr[cur_ofs + 2] == 0x9D)
|
||||
is_whitespace = true;
|
||||
}
|
||||
|
||||
|
||||
if (is_whitespace)
|
||||
{
|
||||
if (cur_token.size())
|
||||
@ -1331,7 +1343,7 @@ void get_string_words(
|
||||
cur_token.push_back(pStr[cur_ofs + i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
cur_ofs += l;
|
||||
}
|
||||
|
||||
@ -1347,7 +1359,7 @@ void get_string_words(
|
||||
void get_utf8_code_point_offsets(const char* pStr, int_vec& offsets)
|
||||
{
|
||||
uint32_t cur_ofs = 0;
|
||||
|
||||
|
||||
offsets.resize(0);
|
||||
|
||||
while (pStr[cur_ofs])
|
||||
@ -1439,14 +1451,14 @@ static const char* g_stop_words[] =
|
||||
"when", "where", "which", "while", "who", "whom", "why", "will", "with", "you", "your", "yours",
|
||||
"yourself", "yourselves", "although", "also", "already", "another", "seemed", "seem", "seems"
|
||||
};
|
||||
static const uint32_t NUM_STOP_WORDS = (uint32_t)std::size(g_stop_words);
|
||||
[[maybe_unused]] static const uint32_t NUM_STOP_WORDS = (uint32_t)std::size(g_stop_words);
|
||||
|
||||
std::set<std::string> g_stop_words_set;
|
||||
|
||||
void init_norm()
|
||||
{
|
||||
g_stop_words_set.clear();
|
||||
for (const auto& str : g_stop_words)
|
||||
for (const char* str : g_stop_words)
|
||||
g_stop_words_set.insert(str);
|
||||
|
||||
for (uint32_t i = 0; i < std::size(g_char_norm_up); i++)
|
||||
@ -1507,7 +1519,7 @@ void init_norm()
|
||||
}
|
||||
}
|
||||
|
||||
// Resulting characters are guaranteed to be <128 - useful for searching purposes.
|
||||
// Resulting characters are guaranteed to be <128 - useful for searching purposes.
|
||||
// Unrecognized Unicode characters are deleted.
|
||||
void normalize_diacritics(const char* pStr, std::string& res)
|
||||
{
|
||||
@ -1610,10 +1622,10 @@ std::string normalize_word(const std::string& str)
|
||||
|
||||
if (str.size() > MAX_STRING_SIZE)
|
||||
panic("String too long");
|
||||
|
||||
|
||||
char buf[MAX_STRING_SIZE + 1];
|
||||
strcpy_s(buf, sizeof(buf), str.c_str());
|
||||
|
||||
|
||||
// Convert utf8 string to lower
|
||||
utf8lwr(buf);
|
||||
|
||||
@ -1622,7 +1634,7 @@ std::string normalize_word(const std::string& str)
|
||||
norm.reserve(strlen(buf));
|
||||
|
||||
normalize_diacritics(buf, norm);
|
||||
|
||||
|
||||
// Remove any non-letter or non-digit characters (we assume this is a word, so whitespace gets removed too)
|
||||
std::string temp;
|
||||
temp.reserve(norm.size());
|
||||
@ -1676,10 +1688,10 @@ std::string string_replace(const std::string& str, const std::string& find, cons
|
||||
assert(find.size());
|
||||
if (!find.size() || !str.size())
|
||||
return str;
|
||||
|
||||
|
||||
const uint8_t* pStr = (const uint8_t *)str.c_str();
|
||||
const size_t str_size = str.size();
|
||||
|
||||
|
||||
const uint8_t* pFind = (const uint8_t*)find.c_str();
|
||||
const size_t find_size = find.size();
|
||||
|
||||
@ -1695,7 +1707,7 @@ std::string string_replace(const std::string& str, const std::string& find, cons
|
||||
assert(0);
|
||||
str_char_size = 1;
|
||||
}
|
||||
|
||||
|
||||
const size_t str_remaining = str_size - str_ofs;
|
||||
if ((str_remaining >= find_size) && (memcmp(pStr + str_ofs, pFind, find_size) == 0))
|
||||
{
|
||||
@ -1718,7 +1730,7 @@ bool does_file_exist(const char* pFilename)
|
||||
FILE* pFile = ufopen(pFilename, "rb");
|
||||
if (!pFile)
|
||||
return false;
|
||||
|
||||
|
||||
fclose(pFile);
|
||||
return true;
|
||||
}
|
||||
|
18
utils.h
18
utils.h
@ -15,17 +15,15 @@
|
||||
#include <fcntl.h>
|
||||
#include <io.h>
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
#include <cstdint>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <varargs.h>
|
||||
#include <string>
|
||||
|
||||
#include <unordered_set>
|
||||
@ -52,8 +50,6 @@ const uint32_t ANSI_SOFT_HYPHEN = 0xAD;
|
||||
|
||||
template<typename T> inline void clear_obj(T& obj) { memset(&obj, 0, sizeof(T)); }
|
||||
|
||||
void panic(const char* pMsg, ...);
|
||||
|
||||
//------------------------------------------------------------------
|
||||
|
||||
inline bool string_is_digits(const std::string& s)
|
||||
@ -87,18 +83,18 @@ inline std::string ansi_to_utf8(const std::string& str) { return wchar_to_utf8(u
|
||||
// Code page 437 to utf8. WideCharToMultiByte etc. doesn't do the expecting thing for chars<32, and we need them.
|
||||
std::string dos_to_utf8(const std::string& str);
|
||||
|
||||
// utf8 string format
|
||||
bool vformat(std::vector<char>& buf, const char* pFmt, va_list args);
|
||||
// utf8 string format
|
||||
bool vformat(std::vector<char>& buf, _Printf_format_string_ const char* pFmt, va_list args);
|
||||
|
||||
// utf8 printf to FILE*
|
||||
void ufprintf(FILE* pFile, const char* pFmt, ...);
|
||||
void ufprintf(FILE* pFile, _Printf_format_string_ const char* pFmt, ...);
|
||||
|
||||
// utf8 print to stdout
|
||||
void uprintf(const char* pFmt, ...);
|
||||
void uprintf(_Printf_format_string_ const char* pFmt, ...);
|
||||
|
||||
std::string string_format(const char* pMsg, ...);
|
||||
std::string string_format(_Printf_format_string_ const char* pMsg, ...);
|
||||
|
||||
void panic(const char* pMsg, ...);
|
||||
[[noreturn]] void panic(_Printf_format_string_ const char* pMsg, ...);
|
||||
|
||||
// Open a file given a utf8 filename
|
||||
FILE* ufopen(const char* pFilename, const char* pMode);
|
||||
|
Loading…
x
Reference in New Issue
Block a user