Merge 9ffdf1ac1dac49a0a21589363c4da467b8c5b2c0 into 79c1b569e1f8c3cdff386816c712479842e59e59

This commit is contained in:
Sean 2024-02-05 20:53:51 -08:00 committed by GitHub
commit 5af670f831
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 727 additions and 728 deletions

56
.gitignore vendored
View File

@ -1,58 +1,8 @@
.vs/ufojson/v16/.suo
.vs/ufojson/v16/Browse.VC.db
.vs/ufojson/v16/Browse.VC.db-shm
.vs/ufojson/v16/Browse.VC.db-wal
.vs/ufojson/v16/Browse.VC.opendb
x64/Release/array.obj
x64/Release/buffer.obj
x64/Release/converters.obj
x64/Release/markdown.obj
x64/Release/markdown_proc.obj
x64/Release/renderers.obj
x64/Release/udb.obj
x64/Release/ufojson.Build.CppClean.log
x64/Release/ufojson.exe
x64/Release/ufojson.exe.recipe
x64/Release/ufojson.iobj
x64/Release/ufojson.ipdb
x64/Release/ufojson.log
x64/Release/ufojson.obj
x64/Release/ufojson.pdb
x64/Release/ufojson.tlog/CL.command.1.tlog
x64/Release/ufojson.tlog/CL.read.1.tlog
x64/Release/ufojson.tlog/CL.write.1.tlog
x64/Release/ufojson.tlog/link.command.1.tlog
x64/Release/ufojson.tlog/link.read.1.tlog
x64/Release/ufojson.tlog/link.write.1.tlog
x64/Release/ufojson.tlog/ufojson.lastbuildstate
x64/Release/ufojson.tlog/ufojson.write.1u.tlog
x64/Release/ufojson.vcxproj.FileListAbsolute.txt
x64/Release/ufojson_core.obj
x64/Release/utils.obj
x64/Release/vc142.pdb
.vs/
x64/
bin/anon_pdf.html/anon_pdf.html
bin/anon_pdf.html/assets/css/github-markdown.css
bin/anon_pdf.html/assets/css/hljs-github.min.css
bin/anon_pdf.html/assets/css/pilcrow.css
x64/Debug/ufojson.exe.recipe
x64/Debug/ufojson.vcxproj.FileListAbsolute.txt
x64/Debug/vc142.idb
x64/Debug/ufojson.tlog/CL.command.1.tlog
x64/Debug/ufojson.tlog/CL.read.1.tlog
x64/Debug/ufojson.tlog/CL.write.1.tlog
x64/Debug/ufojson.tlog/link.command.1.tlog
x64/Debug/ufojson.tlog/link.read.1.tlog
x64/Debug/ufojson.tlog/link.write.1.tlog
x64/Debug/ufojson.tlog/ufojson.lastbuildstate
.vs/ufojson/v16/ipch/AutoPCH/244e4210feda603/UFOJSON_CORE.ipch
.vs/ufojson/v16/ipch/AutoPCH/4c74a4fe4641f0f1/UFOJSON_CORE.ipch
.vs/ufojson/v16/ipch/AutoPCH/7aa2c43b33bd39c1/MARKDOWN_PROC.ipch
.vs/ufojson/v16/ipch/AutoPCH/83d2b3da9d0674cb/UFOJSON.ipch
.vs/ufojson/v16/ipch/AutoPCH/95d0a4c3d9175a86/UTILS.ipch
.vs/ufojson/v16/ipch/AutoPCH/9e020baee8628884/UTILS.ipch
.vs/ufojson/v16/ipch/AutoPCH/a92e92f6e8cde4c8/CONVERTERS.ipch
.vs/ufojson/v16/ipch/AutoPCH/b2fb0e8685a195af/MARKDOWN.ipch
.vs/ufojson/v16/ipch/AutoPCH/be6e1f435bd2965e/CONVERTERS.ipch
.vs/ufojson/v16/ipch/AutoPCH/cc2b4f538f55c179/UFOJSON.ipch
.vs/ufojson/v16/ipch/AutoPCH/fbf69d6913f7d0e8/UDB.ipch
*.user

View File

@ -1,4 +1,4 @@
// converters.cpp
// converters.cpp
// Copyright (C) 2023 Richard Geldreich, Jr.
#include "ufojson_core.h"
#include "markdown_proc.h"
@ -44,15 +44,15 @@ bool convert_magonia(const char* pSrc_filename, const char* pDst_filename, const
fputc(UTF8_BOM0, pOut_file);
fputc(UTF8_BOM1, pOut_file);
fputc(UTF8_BOM2, pOut_file);
fprintf(pOut_file, "{\n");
fprintf(pOut_file, "\"%s Timeline\" : [\n", pSource_override ? pSource_override : "Magonia");
//const uint32_t TOTAL_RECS = 923;
uint32_t cur_line = 0;
uint32_t rec_index = first_rec_index;
while (cur_line < lines.size())
{
if (!lines[cur_line].size())
@ -66,7 +66,7 @@ bool convert_magonia(const char* pSrc_filename, const char* pDst_filename, const
panic("Out of lines");
std::string first_line(lines[cur_line++]);
std::string date_str(first_line);
if (date_str.size() > TOTAL_COLS)
date_str.resize(TOTAL_COLS);
@ -120,7 +120,7 @@ bool convert_magonia(const char* pSrc_filename, const char* pDst_filename, const
if (buf.size() < TOTAL_COLS)
break;
if (desc_lines.size() == 1)
{
if (buf.size() >= TOTAL_COLS)
@ -214,9 +214,9 @@ bool convert_magonia(const char* pSrc_filename, const char* pDst_filename, const
int year = -1, month = -1, day = -1;
date_prefix_t date_prefix = cNoPrefix;
std::string date_suffix;
std::string temp_date_str(date_str);
if (string_ends_in(temp_date_str, "'s"))
{
temp_date_str.resize(temp_date_str.size() - 2);
@ -387,7 +387,7 @@ bool convert_magonia(const char* pSrc_filename, const char* pDst_filename, const
{
if (date_suffix.size())
panic("Invalid date suffix");
fprintf(pOut_file, "%i/%i", month, year);
}
else
@ -415,7 +415,7 @@ bool convert_magonia(const char* pSrc_filename, const char* pDst_filename, const
else
fprintf(pOut_file, " \"source_id\" : \"Magonia_%u\",\n", rec_index);
fprintf(pOut_file, u8" \"source\" : \"%s\",\n", pSource_override ? pSource_override : u8"ValléeMagonia");
fprintf(pOut_file, u8" \"source\" : \"%s\",\n", pSource_override ? pSource_override : u8"ValléeMagonia");
if (pType_override)
fprintf(pOut_file, " \"type\" : \"%s\"\n", pType_override);
@ -898,7 +898,7 @@ bool convert_dolan(const char *pSrc_filename, const char *pDst_filename, const c
panic("Encountered empty line");
if (rec.size() < 54)
panic("Line too small");
std::string date_str(rec);
date_str = string_slice(date_str, 0, 16);
string_trim(date_str);
@ -908,13 +908,13 @@ bool convert_dolan(const char *pSrc_filename, const char *pDst_filename, const c
rec = string_slice(rec, 52);
string_trim(rec);
fprintf(pOut_file, "{\n");
fprintf(pOut_file, " \"date\" : \"%s\",\n", date_str.c_str());
fprintf(pOut_file, " \"location\" : \"%s\",\n", escape_string_for_json(location_str).c_str());
fprintf(pOut_file, " \"desc\" : \"%s\",\n", escape_string_for_json(rec).c_str());
if (pType)
fprintf(pOut_file, " \"type\" : \"%s\",\n", pType);
@ -923,7 +923,7 @@ bool convert_dolan(const char *pSrc_filename, const char *pDst_filename, const c
fprintf(pOut_file, " \"source_id\" : \"%s_%u\",\n", pSource, total_recs);
fprintf(pOut_file, " \"source\" : \"%s\"\n", pSource);
fprintf(pOut_file, "}");
if (cur_line < lines.size())
fprintf(pOut_file, ",");
@ -1052,7 +1052,7 @@ bool convert_eberhart(unordered_string_set& unique_urls)
std::vector<uint32_t> list;
list.push_back(l);
auto res = openai_res_hash.insert(std::make_pair(rec["event_crc32"].get<uint32_t>(), list));
if (!res.second)
(res.first)->second.push_back(l);
@ -1174,7 +1174,7 @@ bool convert_eberhart(unordered_string_set& unique_urls)
continue;
}
size_t dash_pos = line.find(u8"");
size_t dash_pos = line.find(u8"");
if (dash_pos == std::string::npos)
panic("Failed finding dash\n");
@ -1206,7 +1206,7 @@ bool convert_eberhart(unordered_string_set& unique_urls)
if (temp[0] == '#')
break;
size_t d = temp.find(u8"");
size_t d = temp.find(u8"");
const uint32_t DASH_THRESH_POS = 42;
if ((d != std::string::npos) && (d < DASH_THRESH_POS))
@ -1306,7 +1306,7 @@ bool convert_eberhart(unordered_string_set& unique_urls)
if (json_alt_date.size())
fprintf(pOut_file, " \"alt_date\" : \"%s\",\n", json_alt_date.c_str());
fprintf(pOut_file, " \"desc\" : \"%s\",\n", escape_string_for_json(desc).c_str());
fprintf(pOut_file, " \"source_id\" : \"Eberhart_%u\",\n", event_num);
@ -1359,9 +1359,9 @@ bool convert_eberhart(unordered_string_set& unique_urls)
{
if (total_useful_locs_printed)
fprintf(pOut_file, ", ");
fprintf(pOut_file, "\"%s\"", escape_string_for_json(loc[k]).c_str());
total_useful_locs_printed++;
}
else
@ -1378,7 +1378,7 @@ bool convert_eberhart(unordered_string_set& unique_urls)
break;
}
}
if (!ref.size())
{
fprintf(pOut_file, " \"ref\" : \"[Eberhart](http://www.cufos.org/pdfs/UFOsandIntelligence.pdf)\"\n");
@ -1497,7 +1497,7 @@ bool convert_johnson()
(string_find_first(l, "Written by Donald Johnson") != -1) ||
(string_find_first(l, "Written by Donald A Johnson") != -1) ||
(string_find_first(l, "Compiled from the UFOCAT computer database") != -1) ||
(string_find_first(l, u8"© Donald A. Johnson") != -1) ||
(string_find_first(l, u8"© Donald A. Johnson") != -1) ||
(string_begins_with(l, "Themes: ")))
{
found_end = true;
@ -1964,6 +1964,7 @@ static bool test_eberhart_date()
return true;
}
[[maybe_unused]] // currently unused...
static void print_nocr(const std::string& s)
{
std::string new_string;
@ -1993,8 +1994,8 @@ static void converters_test()
uprintf("%s\n", wchar_to_utf8(utf8_to_wchar(blah, CP_ACP)).c_str());
#endif
//fprintf(u8"“frightening vision”");
//ufprintf(stderr, u8"“frightening vision”");
//fprintf(u8"“frightening vision”");
//ufprintf(stderr, u8"“frightening vision”");
assert(crc32((const uint8_t*)"TEST", 4) == 0xeeea93b8);
assert(crc32((const uint8_t*)"408tdsfjdsfjsdh893!;", 20) == 0xa044e016);
if (!test_eberhart_date()) return panic("test_eberhart_date failed!");
@ -2007,11 +2008,11 @@ static void converters_test()
//bufprintf(pIn, "A\nB \nC\n_This is a blah_[XXXX](YYYY(S))");
//const char* p = u8R"(Chemist [Gustaf Ljunggren](https://www.google.com/url?q=https://en.wikipedia.org/wiki/Gustaf_Ljunggren_(chemist)&sa=D&source=editors&ust=1674889728009134&usg=AOvVaw2v_Cymx15I5Ic1eNEYeeBr) of the Swedish National Defense Research Institute summarizes for the Swedish Defense staff his analysis of 27 finds of mysterious substances, allegedly from ghost rockets. None are radioactive and all have mundane explanations. (Anders Liljegren and Clas Svahn, “The Ghost Rockets,” UFOs 19471987, Fortean Tomes, 1987, pp. 3334))";
// const char* p = u8R"(Blah
//English clergyman and philosopher [_John Wilkins_](https://www.google.com/url?q=https://en.wikipedia.org/wiki/John_Wilkins&sa=D&source=editors&ust=1674889727243386&usg=AOvVaw1hw56rPPqRvDJzjdV0g8Zb) writes The Discovery of a World in the Moone, in which he highlights the similarities of the Earth and the Moon (seas, mountains, atmosphere) and concludes that the Moon is likely to be inhabited by living beings, whom the calls “Selenites.” (Maria Avxentevskaya, “[How 17th Century](https://www.google.com/url?q=https://www.realclearscience.com/articles/2017/12/02/how_17th_century_dreamers_planned_to_reach_the_moon_110476.html&sa=D&source=editors&ust=1674889727243765&usg=AOvVaw13_nH4qqo0LYqJqnhq4_eI) [Dreamers Planned to Reach the Moon,](https://www.google.com/url?q=https://www.realclearscience.com/articles/2017/12/02/how_17th_century_dreamers_planned_to_reach_the_moon_110476.html&sa=D&source=editors&ust=1674889727244030&usg=AOvVaw2K5FMN315Pjxq_xO7wp7Ga)” <br/><br/>Real Clear Science, December 2, 2017) )";
//const char* p = u8R"(Chemist [Gustaf Ljunggren](https://www.google.com/url?q=https://en.wikipedia.org/wiki/Gustaf_Ljunggren_(chemist)&sa=D&source=editors&ust=1674889728009134&usg=AOvVaw2v_Cymx15I5Ic1eNEYeeBr) of the Swedish National Defense Research Institute summarizes for the Swedish Defense staff his analysis of 27 finds of mysterious substances, allegedly from ghost rockets. None are radioactive and all have mundane explanations. (Anders Liljegren and Clas Svahn, “The Ghost Rockets,” UFOs 19471987, Fortean Tomes, 1987, pp. 3334))";
// const char* p = u8R"(Blah
//English clergyman and philosopher [_John Wilkins_](https://www.google.com/url?q=https://en.wikipedia.org/wiki/John_Wilkins&sa=D&source=editors&ust=1674889727243386&usg=AOvVaw1hw56rPPqRvDJzjdV0g8Zb) writes The Discovery of a World in the Moone, in which he highlights the similarities of the Earth and the Moon (seas, mountains, atmosphere) and concludes that the Moon is likely to be inhabited by living beings, whom the calls “Selenites.” (Maria Avxentevskaya, “[How 17th Century](https://www.google.com/url?q=https://www.realclearscience.com/articles/2017/12/02/how_17th_century_dreamers_planned_to_reach_the_moon_110476.html&sa=D&source=editors&ust=1674889727243765&usg=AOvVaw13_nH4qqo0LYqJqnhq4_eI) [Dreamers Planned to Reach the Moon,](https://www.google.com/url?q=https://www.realclearscience.com/articles/2017/12/02/how_17th_century_dreamers_planned_to_reach_the_moon_110476.html&sa=D&source=editors&ust=1674889727244030&usg=AOvVaw2K5FMN315Pjxq_xO7wp7Ga)” <br/><br/>Real Clear Science, December 2, 2017) )";
//const char* p = u8R"(Pierre Lagrange, “[_Agobard, la Magonie et les ovnis_,](https://www.google.com/url?q=https://pierrelagrangesociologie.files.wordpress.com/2020/08/lagrange-agobard-magonie-ufologie-lhistoire-440-2017-10-p28-29.pdf&sa=D&source=editors&ust=1674889727239396&usg=AOvVaw1U01Ykx3tRTQS4QKENJuGi)” Actualité, no. 440 (October 2017): 2829; Wikipedia, “[Magonia (mythology)](https://www.google.com/url?q=https://en.wikipedia.org/wiki/Magonia_(mythology)&sa=D&source=editors&ust=1674889727239728&usg=AOvVaw0JOQanVKKoRClyKQPK5SJi)”))";
//const char* p = u8R"(Pierre Lagrange, “[_Agobard, la Magonie et les ovnis_,](https://www.google.com/url?q=https://pierrelagrangesociologie.files.wordpress.com/2020/08/lagrange-agobard-magonie-ufologie-lhistoire-440-2017-10-p28-29.pdf&sa=D&source=editors&ust=1674889727239396&usg=AOvVaw1U01Ykx3tRTQS4QKENJuGi)” Actualité, no. 440 (October 2017): 2829; Wikipedia, “[Magonia (mythology)](https://www.google.com/url?q=https://en.wikipedia.org/wiki/Magonia_(mythology)&sa=D&source=editors&ust=1674889727239728&usg=AOvVaw0JOQanVKKoRClyKQPK5SJi)”))";
const char* p = "<br/>blah<br/>_[Agobard,](www.blah.com)_<br/> blah<br/>blah <br/>[_Agobard_,](www.blah.com)<br/>";
//const char* p = "***[sssss](www.dddd.com)*** _Blah_ *Cool*_Zeek_";
@ -2103,12 +2104,12 @@ enum
cSlashFlag = 256
};
static const struct
static constexpr struct
{
const char* m_pStr;
uint32_t m_flag;
uint32_t m_month;
date_prefix_t m_date_prefix;
uint32_t m_month = 0;
date_prefix_t m_date_prefix = cNoPrefix;
} g_special_phrases[] =
{
{ "january", cMonthFlag, 1 },
@ -2173,7 +2174,7 @@ static const struct
{ "/", cSlashFlag }
};
const uint32_t NUM_SPECIAL_PHRASES = sizeof(g_special_phrases) / sizeof(g_special_phrases[0]);
constexpr int NUM_SPECIAL_PHRASES = static_cast<int>(std::size(g_special_phrases));
enum
{
@ -2253,12 +2254,18 @@ static int get_special_from_token(int64_t tok)
return (int)spec;
}
static bool convert_nipcap_date(std::string date, event_date& begin_date, event_date& end_date, event_date& alt_date)
static constexpr bool nipcap_date_is_year_valid(
int year)
{
assert(cSpecialTotal == NUM_SPECIAL_PHRASES);
const uint32_t MIN_YEAR = 1860;
const uint32_t MAX_YEAR = 2012;
return static_cast<uint32_t>(year) >= MIN_YEAR
&& static_cast<uint32_t>(year) <= MAX_YEAR;
}
static bool convert_nipcap_date(std::string date, event_date& begin_date, event_date& end_date, event_date& alt_date)
{
static_assert(cSpecialTotal == NUM_SPECIAL_PHRASES);
string_trim(date);
@ -2318,7 +2325,7 @@ static bool convert_nipcap_date(std::string date, event_date& begin_date, event_
int month = convert_hex_digit(date[4]) * 10 + convert_hex_digit(date[5]);
int day = convert_hex_digit(date[6]) * 10 + convert_hex_digit(date[7]);
if ((year < MIN_YEAR) || (year > MAX_YEAR))
if (!nipcap_date_is_year_valid(year))
return false;
if (month > 12)
@ -2351,7 +2358,7 @@ static bool convert_nipcap_date(std::string date, event_date& begin_date, event_
return false;
}
// Tokenize the input then only parse those cases we explictly support. Everything else is an error.
// Tokenize the input then only parse those cases we explicitly support. Everything else is an error.
std::vector<int64_t> tokens;
std::vector<int> digits;
@ -2432,7 +2439,7 @@ static bool convert_nipcap_date(std::string date, event_date& begin_date, event_
else if (digits[0] == 4)
{
year = (int)tokens[0];
if ((year < MIN_YEAR) || (year > MAX_YEAR))
if (!nipcap_date_is_year_valid(year))
return false;
}
else
@ -2462,7 +2469,7 @@ static bool convert_nipcap_date(std::string date, event_date& begin_date, event_
{
if (digits[0] == 4)
{
// YYMMXX
// YYMMXX
int year = 1900 + (int)(tokens[0] / 100);
int month = (int)(tokens[0] % 100);
@ -2474,10 +2481,10 @@ static bool convert_nipcap_date(std::string date, event_date& begin_date, event_
}
else if (digits[0] == 6)
{
// YYYYMMXX
// YYYYMMXX
int year = (int)(tokens[0] / 100);
if ((year < MIN_YEAR) || (year > MAX_YEAR))
if (!nipcap_date_is_year_valid(year))
return false;
int month = (int)(tokens[0] % 100);
@ -2505,7 +2512,7 @@ static bool convert_nipcap_date(std::string date, event_date& begin_date, event_
{
// YYYYXXXX
begin_date.m_year = (int)tokens[0];
if ((begin_date.m_year < MIN_YEAR) || (begin_date.m_year > MAX_YEAR))
if (!nipcap_date_is_year_valid(begin_date.m_year))
return false;
}
else
@ -2555,7 +2562,7 @@ static bool convert_nipcap_date(std::string date, event_date& begin_date, event_
{
// YYYYMMDD
begin_date.m_year = (int)(tokens[0] / 10000);
if ((begin_date.m_year < MIN_YEAR) || (begin_date.m_year > MAX_YEAR))
if (!nipcap_date_is_year_valid(begin_date.m_year))
return false;
begin_date.m_month = (int)((tokens[0] / 100) % 100);
@ -2577,7 +2584,7 @@ static bool convert_nipcap_date(std::string date, event_date& begin_date, event_
}
if ((tokens.size() == 2) && (tokens[1] < 0) &&
((get_special_from_token(tokens[1]) >= cSpecialLate) && (get_special_from_token(tokens[1]) <= cSpecialEnd) ||
(((get_special_from_token(tokens[1]) >= cSpecialLate) && (get_special_from_token(tokens[1]) <= cSpecialEnd)) ||
(get_special_from_token(tokens[1]) == cSpecialMid))
)
{
@ -2649,7 +2656,7 @@ static bool convert_nipcap_date(std::string date, event_date& begin_date, event_
{
// YYYYMMDD-YYYYMMDD
end_date.m_year = (int)(tokens[2] / 10000);
if ((end_date.m_year < MIN_YEAR) || (end_date.m_year > MAX_YEAR))
if (!nipcap_date_is_year_valid(end_date.m_year))
return false;
end_date.m_month = (int)((tokens[2] / 100) % 100);
@ -3317,7 +3324,7 @@ bool convert_nicap(unordered_string_set& unique_urls)
if ((prev_orig_desc.size()) && (orig_desc == prev_orig_desc) && (js["date"] == prev_date))
{
// It's a repeated record, with just a different category.
// It's a repeated record, with just a different category.
std::string new_desc(js_doc_array.back()["desc"]);
new_desc += string_format(" (NICAP: %s)", g_nicap_categories[cat_index - 1]);
@ -3391,7 +3398,7 @@ bool convert_nuk()
{
std::string title;
string_vec col_titles;
std::vector<string_vec> rows;
bool success = load_column_text("nuktest_usa.txt", rows, title, col_titles, false, "USA");
@ -3428,9 +3435,9 @@ bool convert_nuk()
event.m_locations.push_back(x[cColLat] + " " + x[cColLong]);
std::string attr;
std::string t(string_upper(x[cColType]));
bool salvo = false;
if (string_ends_in(t, "_SALVO"))
{
@ -3491,9 +3498,9 @@ bool convert_nuk()
panic("Invalid type");
event.m_desc = string_format("Nuclear test: %s. Country: %s", attr.c_str(), x[cColCountry].c_str());
if ((x[cColName].size()) && (x[cColName] != "-"))
event.m_desc += string_format(u8" Name: “%s”", x[cColName].c_str());
event.m_desc += string_format(u8" Name: “%s”", x[cColName].c_str());
if (x[cColY].size())
event.m_desc += string_format(" Yield: %sKT", x[cColY].c_str());
@ -3510,13 +3517,13 @@ bool convert_nuk()
std::string latitude_dms = get_deg_to_dms(lat) + ((lat <= 0) ? " S" : " N");
std::string longitude_dms = get_deg_to_dms(lon) + ((lon <= 0) ? " W" : " E");
event.m_key_value_data.push_back(string_pair("LatLongDMS", latitude_dms + " " + longitude_dms));
}
if (x[cColDepth].size())
event.m_key_value_data.push_back(string_pair("NukeDepth", x[cColDepth]));
if (x[cColMb].size())
event.m_key_value_data.push_back(string_pair("NukeMb", x[cColMb]));
@ -3534,7 +3541,7 @@ bool convert_nuk()
event.m_key_value_data.push_back(string_pair("NukeSource", x[cColSource]));
event.m_key_value_data.push_back(string_pair("NukeCountry", x[cColCountry]));
if (x[cColLat].size() && x[cColLong].size())
{
event.m_key_value_data.push_back(std::make_pair("LocationLink", string_format("[Google Maps](https://www.google.com/maps/place/%s,%s)", x[cColLat].c_str(), x[cColLong].c_str())));
@ -3545,9 +3552,9 @@ bool convert_nuk()
event.m_source = "NukeExplosions";
event.m_source_id = event.m_source + string_format("_%u", event_id);
timeline.get_events().push_back(event);
event_id++;
}
@ -3555,7 +3562,7 @@ bool convert_nuk()
panic("Empty timeline)");
timeline.set_name("Nuclear Test Timeline");
return timeline.write_file("nuclear_tests.json", true);
}
@ -3563,7 +3570,7 @@ bool convert_anon()
{
string_vec lines;
bool utf8_flag = false;
const char* pFilename = "anon_pdf.md";
if (!read_text_file(pFilename, lines, true, &utf8_flag))
panic("Failed reading text file %s", pFilename);
@ -3582,10 +3589,10 @@ bool convert_anon()
if (s.size() < 27)
panic("Invalid string");
//[0x00000026] 0xe2 'â' char
//[0x00000027] 0x80 '€' char
//[0x00000028] 0x94 '”' char
//[0x00000026] 0xe2 'â' char
//[0x00000027] 0x80 '€' char
//[0x00000028] 0x94 '”' char
const int8_t c = -30;// (int8_t)0xE2;
size_t dash_pos = s.find_first_of(c);
@ -3794,7 +3801,7 @@ bool convert_anon()
break;
string_trim(ns);
line_index++;
event_strs.push_back(ns);
@ -4056,13 +4063,13 @@ static int md_convert(const char* pSrc_filename, int year, ufo_timeline& tm)
}
}
if ((day_index < 0) && ((month_tok_index + 1) < tokens.size()))
if ((day_index < 0) && ((month_tok_index + 1) < static_cast<int>(tokens.size())))
{
std::string& suffix_str = tokens[month_tok_index + 1];
if (isdigit(suffix_str[0]))
{
bool is_time = false;
if ((month_tok_index + 2) < tokens.size())
if ((month_tok_index + 2) < static_cast<int>(tokens.size()))
{
is_time = (tokens[month_tok_index + 2] == ":");
}
@ -4203,7 +4210,7 @@ static int md_convert(const char* pSrc_filename, int year, ufo_timeline& tm)
std::string ref(string_slice(rec_text, s, l));
if ((e < rec_text.size()) && ((rec_text[e] == '.') || (rec_text[e] == ']')))
if ((e < static_cast<int>(rec_text.size())) && ((rec_text[e] == '.') || (rec_text[e] == ']')))
{
while (s > 0)
{
@ -4214,7 +4221,7 @@ static int md_convert(const char* pSrc_filename, int year, ufo_timeline& tm)
}
}
if ((e < rec_text.size()) && (rec_text[e] == ']'))
if ((e < static_cast<int>(rec_text.size())) && (rec_text[e] == ']'))
{
e++;
l++;
@ -4335,7 +4342,7 @@ bool convert_rr0()
tm.write_file("rr0.json");
uprintf("Processed %u years\n", total_years);
return total_years >= NUM_EXPECTED_RR0_YEARS;
}
@ -4439,7 +4446,7 @@ static bool overmeire_convert(const std::string& in_filename, ufo_timeline& tm)
str = string_lower(str);
int year = -1, year_tok_index = -1;
for (year_tok_index = 0; year_tok_index < tokens.size(); year_tok_index++)
for (year_tok_index = 0; year_tok_index < static_cast<int>(tokens.size()); year_tok_index++)
{
int y = atoi(tokens[year_tok_index].c_str());
if ((y > 0) && (y >= first_year) && (y <= last_year))
@ -4501,13 +4508,13 @@ static bool overmeire_convert(const std::string& in_filename, ufo_timeline& tm)
}
if ((day_index < 0) &&
((month_tok_index + 1) < tokens.size()))
((month_tok_index + 1) < static_cast<int>(tokens.size())))
{
std::string& suffix_str = tokens[month_tok_index + 1];
if (isdigit(suffix_str[0]))
{
bool is_time = false;
if ((month_tok_index + 2) < tokens.size())
if ((month_tok_index + 2) < static_cast<int>(tokens.size()))
{
is_time = (tokens[month_tok_index + 2] == ":");
}
@ -4642,7 +4649,7 @@ static bool overmeire_convert(const std::string& in_filename, ufo_timeline& tm)
evt.m_source = "Overmeire";
evt.m_source_id = string_format("Overmeire_%zu", tm.get_events().size());
evt.m_refs.push_back("[_Mini catalogue chronologique des observations OVNI_, by Godelieve Van Overmeire](https://web.archive.org/web/20060107070423/http://users.skynet.be/sky84985/chrono.html)");
std::string trial_date(string_format("#%u", year));
if (cur_date.m_month >= 1)
{
@ -4652,7 +4659,7 @@ static bool overmeire_convert(const std::string& in_filename, ufo_timeline& tm)
}
if (trial_date != strs[0])
evt.m_desc += " (" + string_slice(strs[0], 1) + ")";
tm.get_events().push_back(evt);
prev_year = year;

View File

@ -8168,6 +8168,7 @@ class lexer : public lexer_base<BasicJsonType>
}
}
}
JSON_HEDLEY_FALL_THROUGH;
// multi-line comments skip input until */ is read
case '*':
@ -8203,6 +8204,7 @@ class lexer : public lexer_base<BasicJsonType>
}
}
}
JSON_HEDLEY_FALL_THROUGH;
// unexpected character after reading '/'
default:

542
pjson.h

File diff suppressed because it is too large Load Diff

2
stem.c
View File

@ -329,7 +329,7 @@ static void step5()
if (b[k] == 'e')
{
int a = m();
if (a > 1 || a == 1 && !cvc(k - 1)) k--;
if (a > 1 || (a == 1 && !cvc(k - 1))) k--;
}
if (b[k] == 'l' && doublec(k) && m() > 1) k--;
}

28
udb.cpp
View File

@ -33,6 +33,7 @@ private:
uint8_t m_time;
uint8_t m_ymdt; // 2-bit fields: TDMY accuracy, T lowest, 0=invalid, 1=?, 2=~, 3=accurate
uint8_t m_duration;
[[maybe_unused]] // -Wunused-private-field
uint8_t m_unknown1;
int16_t m_enc_longtitude;
@ -41,11 +42,13 @@ private:
int16_t m_elevation;
int16_t m_rel_altitude;
[[maybe_unused]] // -Wunused-private-field
uint8_t m_unknown2;
uint8_t m_continent_country; // nibbles
uint8_t m_state_or_prov[3];
[[maybe_unused]] // -Wunused-private-field
uint8_t m_unknown3;
#if 0
@ -653,8 +656,9 @@ static std::string decode_hatch(const std::string& str, bool first_line)
string_vec tokens;
std::string cur_token;
bool inside_space = false;
int prev_c = -1;
// written to, but never read from
[[maybe_unused]] bool inside_space = false;
[[maybe_unused]] int prev_c = -1;
// Phase 1: Tokenize the input string based off examination of (mostly) individual chars, previous chars and upcoming individual chars.
for (uint32_t i = 0; i < str.size(); i++)
@ -1562,11 +1566,11 @@ static void init_dict()
}
}
uprintf("Done reading dictionary, %u uppercase words\n", g_dictionary.size());
uprintf("Done reading dictionary, %zu uppercase words\n", g_dictionary.size());
}
void udb_init()
{
{
assert(sizeof(udb_rec) == UDB_RECORD_SIZE);
check_for_hatch_tab_dups(g_hatch_refs);
@ -1703,10 +1707,10 @@ static bool convert_rec(uint32_t rec_index, const udb_rec* pRec, timeline_event&
decode_hatch_desc(pRec, db_str, loc_str, desc_str);
pRec->get_date(event.m_begin_date);
if (event.m_begin_date.m_year <= 0)
return false;
std::string time;
if (pRec->get_time(time))
{
@ -1719,21 +1723,21 @@ static bool convert_rec(uint32_t rec_index, const udb_rec* pRec, timeline_event&
event.m_locations.push_back(loc_str);
event.m_desc = desc_str;
// TODO
event.m_type.push_back("sighting");
event.m_source_id = string_format("Hatch_UDB_%u", rec_index);
event.m_source = "Hatch";
for (uint32_t f = 0; f < udb_rec::cMaxFlags; f++)
if ((f != cFlagMAP) && (pRec->get_flag(f)))
event.m_attributes.push_back(g_pHatch_flag_descs[f]);
event.m_refs.push_back(pRec->get_full_refs());
event.m_key_value_data.push_back(std::make_pair("LocationLink", string_format("[Google Maps](https://www.google.com/maps/place/%f,%f)", pRec->get_latitude(), pRec->get_longitude())));
event.m_key_value_data.push_back(std::make_pair("LatLong", string_format("%f %f", pRec->get_latitude(), pRec->get_longitude())));
event.m_key_value_data.push_back(std::make_pair("LatLongDMS", string_format("%s %s", pRec->get_latitude_dms().c_str(), pRec->get_longitude_dms().c_str())));
@ -1756,10 +1760,10 @@ static bool convert_rec(uint32_t rec_index, const udb_rec* pRec, timeline_event&
if (pRec->get_elevation() != -99)
event.m_key_value_data.push_back(std::make_pair("Elev", string_format("%i", pRec->get_elevation())));
if ((pRec->get_rel_altitude() != 0) && (pRec->get_rel_altitude() != 999))
event.m_key_value_data.push_back(std::make_pair("RelAlt", string_format("%i", pRec->get_rel_altitude())));
return true;
}

View File

@ -1,5 +1,5 @@
// udb_tables.h
// Some portions of this specific file (get_hatch_geo, g_hatch_continents) use strings from
// Some portions of this specific file (get_hatch_geo, g_hatch_continents) use strings from
// the "uDb" project by Jérôme Beau, available on github here: https://github.com/RR0/uDb
#pragma once
@ -60,7 +60,7 @@ static const char* g_hatch_continents[]
struct hatch_state
{
const char* m_pCode;
const char* m_pFull;
const char* m_pFull = nullptr;
};
static void get_hatch_geo(uint32_t cont_code, uint32_t country_code, const std::string& state_or_prov,
@ -677,7 +677,7 @@ static void get_hatch_geo(uint32_t cont_code, uint32_t country_code, const std::
break;
}
case 6: // Asia Pacific
case 6: // Asia Pacific
{
switch (country_code)
{
@ -1599,7 +1599,7 @@ struct hatch_abbrev
{
const char* pAbbrev;
const char* pExpansion;
bool m_forbid_firstline;
bool m_forbid_firstline = false;
};
static const hatch_abbrev g_hatch_abbreviations[] =
@ -2956,7 +2956,7 @@ static const hatch_abbrev g_hatch_abbreviations[] =
{ "Var.", "various", true },
{ "Img", "image", true },
{ "FLUCTs", "fluctuates", true },
{ "rtps", "reports", true }, // "separate rtps"
{ "rtps", "reports", true }, // "separate rtps"
{ "Math.", "Mathematics", true },
{ "indp.", "independent", true },
{ "frag", "fragment", true },
@ -3106,6 +3106,7 @@ static const char* g_cap_exceptions[] =
"McChord",
"Hetch Hetchy Aqueduct",
"LaPaz",
// #REVIEW Does this need to be double question mark? clang trips on "trigraph ignored" -Wtrigraphs
"Sea Island'(??)",
"Loren Gross",
"Test Pilot",
@ -3121,7 +3122,7 @@ static const char* g_cap_exceptions[] =
"no UFO",
"Blackcomb Mountain",
"Harding Mall",
"Hawkes Bay"
"Hawkes Bay",
"Hells Canyon",
"Highway Patrol",
"Hogg Mountain",

Binary file not shown.

View File

@ -11,6 +11,7 @@
//-------------------------------------------------------------------
[[maybe_unused]] // currently unused...
static void detect_bad_urls()
{
string_vec unique_urls;
@ -112,7 +113,7 @@ static bool invoke_openai(const char* pPrompt_text, json& result)
return false;
}
return true;
return success;
}
static bool invoke_openai(const timeline_event &event, const char *pPrompt_text, json& result)
@ -125,7 +126,7 @@ static bool invoke_openai(const timeline_event &event, const char *pPrompt_text,
if ((desc.size() >= 2) && (desc.back() == '('))
desc.pop_back();
const uint32_t MAX_SIZE = 4096; // ~1024 tokens
if (desc.size() > MAX_SIZE)
{
@ -143,7 +144,7 @@ static bool invoke_openai(const timeline_event &event, const char *pPrompt_text,
}
uprintf("Desc: %s\n\n", desc.c_str());
std::string prompt_str(pPrompt_text);
prompt_str += desc;
prompt_str += "\"";
@ -151,12 +152,13 @@ static bool invoke_openai(const timeline_event &event, const char *pPrompt_text,
return invoke_openai(prompt_str.c_str(), result);
}
[[maybe_unused]] // currently unused...
static void process_timeline_using_openai(const ufo_timeline &timeline)
{
bool utf8_flag;
json existing_results;
load_json_object("openai_results.json", utf8_flag, existing_results);
json final_result = json::object();
final_result["results"] = json::array();
@ -251,6 +253,7 @@ static void process_timeline_using_openai(const ufo_timeline &timeline)
uprintf("Success\n");
}
[[maybe_unused]] // currently unused...
static void process_timeline_using_python(const ufo_timeline& timeline)
{
json final_result = json::object();
@ -275,7 +278,7 @@ static void process_timeline_using_python(const ufo_timeline& timeline)
remove("locations.json");
Sleep(50);
int status = system("python.exe pextractlocs.py");
if (status != EXIT_SUCCESS)
panic("Failed running python.exe");
@ -295,7 +298,7 @@ static void process_timeline_using_python(const ufo_timeline& timeline)
if (it->is_string())
uprintf("%s\n", it->get<std::string>().c_str());
}
json new_obj = json::object();
new_obj.emplace("index", i);
new_obj.emplace("date", event.m_date_str);
@ -348,6 +351,7 @@ static bool is_important_country(const std::string& s)
return (s == "US") || (s == "GB") || (s == "AU") || (s == "CA") || (s == "NZ") || (s == "FR") || (s == "DE") || (s == "BR") || (s == "IT");
}
[[maybe_unused]] // currently unused...
static bool is_favored_country(const std::string& s)
{
return (s == "US") || (s == "GB") || (s == "AU") || (s == "CA") || (s == "NZ") || (s == "FR") || (s == "DE");
@ -375,11 +379,13 @@ static int get_favored_country_rank(const std::string& s)
return 7;
}
[[maybe_unused]] // currently unused...
static bool is_country_fcode(const std::string &fcode)
{
return ((fcode == "PCL") || (fcode == "PCLD") || (fcode == "PCLF") || (fcode == "PCLH") || (fcode == "PCLI") || (fcode == "PCLIX") || (fcode == "PCLS") || (fcode == "TERR"));
}
[[maybe_unused]] // currently unused...
static void process_geodata()
{
string_vec lines;
@ -395,7 +401,7 @@ static void process_geodata()
geonames.resize(13000000);
uint32_t total_geonames = 0;
uint32_t max_col_sizes[gn_total];
clear_obj(max_col_sizes);
@ -405,7 +411,7 @@ static void process_geodata()
uint32_t total_accepted = 0;
json output_json = json::array();
for (const auto& str : lines)
{
tab_locs.resize(0);
@ -444,7 +450,7 @@ static void process_geodata()
#endif
max_col_sizes[i] = std::max(max_col_sizes[i], (uint32_t)g.m_fields[i].size());
cur_ofs = tab_locs[i] + 1;
}
@ -453,7 +459,7 @@ static void process_geodata()
if (g.m_fields[gn_population].size())
{
int pop = atoi(g.m_fields[gn_population].c_str());
const int MIN_POP = 10;
if (pop >= MIN_POP)
has_min_pop = true;
@ -468,7 +474,7 @@ static void process_geodata()
switch (feature_class)
{
case 'T': // mountain,hill,rock,...
if ((code == "MT") || (code == "MTS") || (code == "ATOL") || (code == "CAPE") || (code == "CNYN") || (code == "DSRT") ||
if ((code == "MT") || (code == "MTS") || (code == "ATOL") || (code == "CAPE") || (code == "CNYN") || (code == "DSRT") ||
(code == "ISL") || (code == "ISLS") || (code == "PEN") || (code == "VALS") || (code == "VALX"))
{
accept_flag = true;
@ -477,7 +483,7 @@ static void process_geodata()
case 'S': // spot, building, farm
if ((code == "AIRB") || (code == "AIRF") || (code == "AIRP") || (code == "AIRQ") || (code == "BRKS") || (code == "CTRA") ||
(code == "CTRS") || (code == "INSM") || (code == "ITTR") || (code == "PSN") || (code == "STNE") || (code == "USGE") ||
(code == "OBS") || (code == "OBSR") || (code == "MFGM") || (code == "FT") || (code == "ASTR") || (code == "FCL") ||
(code == "OBS") || (code == "OBSR") || (code == "MFGM") || (code == "FT") || (code == "ASTR") || (code == "FCL") ||
(code == "PS") || (code == "PSH") || (code == "STNB") || (code == "STNS") || (code == "UNIV"))
{
accept_flag = true;
@ -495,6 +501,7 @@ static void process_geodata()
break;
case 'H': // stream, lake, ...
if ((code == "BAY") || (code == "BAYS") || (code == "CHN") || (code == "CHNL") || (code == "CHNM") || (code == "CHNN") ||
// #REVIEW "CNL" is repeated twice, was something else meant here?
(code == "CNL") || (code == "CNL") || (code == "LK") || (code == "LKN") || (code == "LKS") || (code == "RSV") || (code == "SD") || (code == "STRT"))
{
accept_flag = true;
@ -527,7 +534,7 @@ static void process_geodata()
obj["id"] = g.m_fields[gn_geonameid].size() ? atoi(g.m_fields[gn_geonameid].c_str()) : -1;
obj["name"] = g.m_fields[gn_name];
obj["plainname"] = g.m_fields[gn_asciiname];
if (g.m_fields[gn_alternatenames].size())
obj["altnames"] = g.m_fields[gn_alternatenames];
@ -539,10 +546,10 @@ static void process_geodata()
if (g.m_fields[gn_country_code].size())
obj["ccode"] = g.m_fields[gn_country_code];
if (g.m_fields[gn_cc2].size())
obj["cc2"] = g.m_fields[gn_cc2];
if (g.m_fields[gn_admin1_code].size())
obj["a1"] = g.m_fields[gn_admin1_code];
@ -572,7 +579,7 @@ static void process_geodata()
{
rejected_class_counts[feature_class] = rejected_class_counts[feature_class] + 1;
}
total_geonames++;
if ((total_geonames % 1000000) == 0)
@ -596,11 +603,12 @@ static void process_geodata()
uprintf("%c %u\n", s.first, s.second);
}
#if 0 // unused code...
static const struct
{
const char* m_pCode;
int m_level;
} g_geocode_levels[] =
} g_geocode_levels[] =
{
{ "ADM1", 1 },
{ "ADM1H", 1 },
@ -643,6 +651,7 @@ static int find_geocode_admin_level(const char* pCode)
return -1;
}
#endif // 0 // unused code...
struct country_info
{
@ -707,12 +716,12 @@ public:
load_hierarchy();
uprintf("Reading world_features.json\n");
if (!read_text_file("world_features.json", m_filebuf, nullptr))
panic("Failed reading file");
uprintf("Deserializing JSON file\n");
bool status = m_doc.deserialize_in_place((char*)&m_filebuf[0]);
if (!status)
panic("Failed parsing JSON document!");
@ -731,16 +740,16 @@ public:
//tm.start();
uint8_vec name_buf;
m_geoid_to_rec.clear();
m_geoid_to_rec.reserve(MAX_EXPECTED_RECS);
for (uint32_t rec_index = 0; rec_index < root_arr.size(); rec_index++)
{
const auto& arr_entry = root_arr[rec_index];
if (!arr_entry.is_object())
panic("Invalid JSON");
int geoid = arr_entry.find_int32("id");
assert(geoid > 0);
auto ins_res = m_geoid_to_rec.insert(std::make_pair(geoid, (int)rec_index));
@ -770,7 +779,7 @@ public:
const auto pPlainName = arr_entry.find_value_variant("plainname");
if ((pPlainName == nullptr) || (!pPlainName->is_string()))
panic("Missing/invalid plainname field");
{
const char* pName_str = pPlainName->get_string_ptr();
size_t name_size = strlen(pName_str);
@ -823,12 +832,12 @@ public:
}
std::string fclass = arr_entry.find_string_obj("fclass");
if (fclass == "A")
{
std::string fcode(arr_entry.find_string_obj("fcode"));
if ((fcode == "ADM1") || (fcode == "ADM2") || (fcode == "ADM3") || (fcode == "ADM4"))
if ((fcode == "ADM1") || (fcode == "ADM2") || (fcode == "ADM3") || (fcode == "ADM4"))
{
std::string ccode(arr_entry.find_string_obj("ccode"));
@ -846,7 +855,7 @@ public:
break;
desc += "." + a[i];
}
m_admin_map[desc].push_back(std::pair<int, int>(rec_index, get_admin_level(fcode)));
}
}
@ -878,7 +887,7 @@ public:
{
std::vector< std::pair<int, int> >& recs = it->second;
std::sort(recs.begin(), recs.end(),
std::sort(recs.begin(), recs.end(),
[](const std::pair<int, int>& a, const std::pair<int, int>& b) -> bool
{
return a.second < b.second;
@ -890,7 +899,7 @@ public:
{
const int cur_rec_index = recs[i].first;
const pjson::value_variant* pCur = &m_doc[cur_rec_index];
uprintf("admlevel: %u, rec: %u geoid: %u name: %s fcode: %s\n",
recs[i].second,
cur_rec_index, pCur->find_int32("id"), pCur->find_string_obj("name").c_str(), pCur->find_string_obj("fcode").c_str());
@ -922,7 +931,7 @@ public:
c = utolower(c);
const uint32_t hash_val = (hash_hsieh((const uint8_t *)key.c_str(), key.size()) * HASH_FMAGIC) >> HASH_SHIFT;
results.resize(0);
alt_results.resize(0);
@ -934,7 +943,7 @@ public:
const pjson::value_variant* pObj = &m_doc[rec_index];
const char *pName = pObj->find_string_ptr("name");
const char* pPlainName = pObj->find_string_ptr("plainname");
if ((_stricmp(pKey, pName) != 0) && (_stricmp(pKey, pPlainName) != 0))
@ -1010,7 +1019,7 @@ public:
if (num_parent_admins > num_child_admins)
return false;
// Example: Anderson, Shasta County, California
if (num_parent_admins == num_child_admins)
{
@ -1022,7 +1031,7 @@ public:
for (uint32_t admin_index = 0; admin_index < num_parent_admins; admin_index++)
{
std::string id(string_format("a%u", admin_index + 1));
std::string admin_parent(pParent->find_string_obj(id.c_str()));
std::string admin_child(pChild->find_string_obj(id.c_str()));
@ -1066,16 +1075,16 @@ public:
cRankVillageNoPopAlt, // alt
cRankAdminNoPop, // not a numbered admin
cRankPopVillageAlt, // prim, 1-100
cRankTownAlt, // alt, 100+
cRankCityLevel0Alt, // alt or alt, 1k+
cRankCityLevel1Alt, // alt or alt, 10k+
cRankAdminCapital4Alt, // alt cap4
cRankAdmin4Alt, // alt admin4
cRankAdminCapital3Alt, // alt cap3
cRankAdmin3Alt, // alt amind3
@ -1085,10 +1094,10 @@ public:
cRankVillageNoPop, // prim no pop
cRankAdmin, // not numbered, has pop
cRankPopVillage, // prim, 1-100
cRankTown, // prim, 100+
cRankAdminCapital2Alt, // alt county seat
cRankAdmin2Alt, // alt county
@ -1097,9 +1106,9 @@ public:
cRankPark, // prim or alt
cRankReserve, // prim or alt
cRankAdminCapital1Alt, // alt state cap
cRankCityLevel0, // prim or alt, 1k+
cRankCityLevel1, // prim or alt, 10k+
@ -1110,19 +1119,19 @@ public:
cRankCityLevel3, // prim or alt, 1m+
cRankBaseOrAirport, // prim or alt
cRankAdminCapital2, // prim county seat
cRankAdmin2, // prim county
cRankAdmin2, // prim county
cRankAdminCapital1, // prim state cap
cRankAdmin1Alt, // alt state
cRankPoliticalCapital, // prim or alt
cRankGovernmentCapital, // prim or alt
cRankAdmin1, // prim state
// all countries prim or alt
cRankCountryLevel0,
cRankCountryLevel1,
@ -1134,10 +1143,10 @@ public:
cRankCountryLevel7,
cRankCountryLevel8,
cRankCountryLevel9,
cRankTotal,
};
int get_rank(const pjson::value_variant* p, bool alt_match) const
{
int country_index = get_country_index(p);
@ -1265,11 +1274,11 @@ public:
struct resolve_results
{
resolve_results()
resolve_results()
{
clear();
}
void clear()
{
m_candidates.resize(0);
@ -1282,16 +1291,16 @@ public:
}
geo_result m_best_result;
uint32_t m_num_input_tokens;
bool m_strong_match;
geo_result_vec m_candidates;
std::vector< std::pair<uint32_t, float> > m_sorted_results;
uint32_t m_best_sorted_result_index;
float m_best_score;
};
bool resolve(const std::string& str, resolve_results &resolve_res) const
{
uprintf("--- Candidates for query: %s\n", str.c_str());
@ -1359,7 +1368,7 @@ public:
p->find_string_ptr("fcode"),
p->find_int32("pop"));
#endif
temp_results[toks_index].push_back({ p, false });
}
@ -1389,7 +1398,7 @@ public:
uprintf("No results\n");
return false;
}
//uprintf("Candidates for query: %s\n", str.c_str());
std::vector<uint32_t> valid_candidates;
@ -1405,7 +1414,7 @@ public:
std::vector< std::pair<uint32_t, float> > candidate_results[TOTAL_FAVORED_COUNTRY_RANKS];
uint32_t total_country_rankings = 0;
uint32_t total_candidates = 0;
[[maybe_unused]] uint32_t total_candidates = 0;
for (uint32_t candidate_index_iter = 0; candidate_index_iter < valid_candidates.size(); candidate_index_iter++)
{
@ -1449,11 +1458,11 @@ public:
}
}
}
candidate_score += p->find_float("pop") / 40000000.0f;
const int country_rank = get_favored_country_rank(ccode);
assert(country_rank < TOTAL_FAVORED_COUNTRY_RANKS);
assert(static_cast<uint32_t>(country_rank) < TOTAL_FAVORED_COUNTRY_RANKS);
if (!candidate_results[country_rank].size())
total_country_rankings++;
@ -1462,7 +1471,7 @@ public:
total_candidates++;
}
// 1. If there's just one country rank group, choose the best score in that country rank group.
// 2. If they matched against a country, choose the highest ranking country, prioritizing the favored countries first.
// 3. Check for states, state capitals or other significant admin districts in the favored countries, in order
@ -1512,7 +1521,7 @@ public:
}
}
#endif
if (total_country_rankings == 1)
{
// Only one ranked country group in the candidate results, so just choose the one with the highest score.
@ -1525,9 +1534,9 @@ public:
break;
}
}
assert(pBest_ranking_vec);
uint32_t candidate_index = (*pBest_ranking_vec)[0].first;
best_score = (*pBest_ranking_vec)[0].second;
@ -1539,7 +1548,7 @@ public:
else
{
// Multiple ranked country groups.
// Check for US states (primary or alt)
{
uint32_t r_index = 0;
@ -1564,7 +1573,7 @@ public:
}
}
}
if (!pBest_result)
{
// First check for any country hits from any ranked country group.
@ -1588,7 +1597,7 @@ public:
break;
}
}
if (pBest_result)
break;
}
@ -1610,7 +1619,7 @@ public:
//const bool was_alt = temp_results[last_tok_index][candidate_index].m_alt;
const int rank = get_rank(p, temp_results[last_tok_index][candidate_index].m_alt);
if ((rank == cRankAdmin1Alt) || (rank == cRankAdmin1) || (rank == cRankPoliticalCapital) || (rank == cRankGovernmentCapital))
{
pBest_result = &temp_results[last_tok_index][candidate_index];
@ -1620,7 +1629,7 @@ public:
break;
}
}
if (pBest_result)
break;
}
@ -1686,7 +1695,7 @@ public:
}
}
}
if (!pBest_result)
{
// Fall back to choosing the highest score
@ -1698,13 +1707,13 @@ public:
{
const uint32_t candidate_index = r[i].first;
const float score = r[i].second;
if (score > best_score)
{
best_score = score;
pBest_result = &temp_results[last_tok_index][candidate_index];
pBest_ranking_vec = &r;
best_ranking_index = i;
}
@ -1730,10 +1739,9 @@ public:
resolve_res.m_best_sorted_result_index = best_ranking_index;
resolve_res.m_best_score = best_score;
const pjson::value_variant* pVariant = pBest_result->m_pVariant;
(pVariant);
[[maybe_unused]] const pjson::value_variant* pVariant = pBest_result->m_pVariant;
#if 0
#if 0
uprintf("Result: score:%f, alt: %u, id: %u, name: \"%s\", lat: %f, long: %f, ccode=%s, a1=%s, a2=%s, a3=%s, a4=%s, fclass: %s, fcode: %s, pop: %i\n",
best_score,
pBest_result->m_alt,
@ -1759,7 +1767,7 @@ public:
std::string ccode(p->find_string_obj("ccode"));
std::string fclass(p->find_string_obj("fclass"));
std::string fcode(p->find_string_obj("fcode"));
std::string a[4] = { p->find_string_obj("a1"), p->find_string_obj("a2"), p->find_string_obj("a3"), p->find_string_obj("a4") };
uint32_t num_admins = count_admins(p);
@ -1778,9 +1786,9 @@ public:
if (find_res != m_admin_map.end())
{
const std::vector< std::pair<int, int> >& recs = find_res->second;
assert(recs.size());
int cur_level = recs[0].second;
for (uint32_t j = 0; j < recs.size(); j++)
{
@ -1788,7 +1796,7 @@ public:
break;
int rec_index = recs[j].first;
const pjson::value_variant* q = &m_doc[rec_index];
if (i == (int)(num_admins - 1))
@ -1832,7 +1840,7 @@ private:
std::vector<uint_vec> m_name_hashtab;
std::unordered_map<int, int> m_geoid_to_rec;
country_info_vec m_countries;
std::unordered_map<int, int> m_rec_index_to_country_index;
std::unordered_map<int, int> m_geoid_to_country_index;
@ -1857,7 +1865,7 @@ private:
return find_res->second;
}
static void extract_tab_fields(const std::string& str, string_vec& fields)
static void extract_tab_fields(const std::string& str, string_vec& fields)
{
std::vector<int> tab_locs;
tab_locs.resize(0);
@ -2055,6 +2063,7 @@ static const char* s_kwic_stop_words[] =
"when", "where", "which", "while", "who", "whom", "why", "will", "with", "you", "your", "yours",
"yourself", "yourselves", "although", "also", "already", "another", "seemed", "seem", "seems"
};
[[maybe_unused]]
const uint32_t NUM_STOP_WORDS = (uint32_t)std::size(s_kwic_stop_words);
static bool create_kwic_index(const ufo_timeline &timeline, const ufo_timeline::event_urls_map_t &event_urls, bool book_flag = false, const char *pOutput_filename_base = nullptr, const char *pTitle = nullptr, const char *pHeader = nullptr)
@ -2072,7 +2081,7 @@ static bool create_kwic_index(const ufo_timeline &timeline, const ufo_timeline::
typedef std::unordered_map<std::string, word_usage_vec> word_map_t;
word_map_t word_map;
word_map.reserve(timeline.size() * 20);
std::unordered_set<std::string> stop_word_set;
for (const auto& str : s_kwic_stop_words)
stop_word_set.insert(str);
@ -2161,7 +2170,7 @@ static bool create_kwic_index(const ufo_timeline &timeline, const ufo_timeline::
kwic_file_strings_header[i].push_back(string_format("# <a name=\"Top\">%s, KWIC Index Page: %s</a>", pTitle, name.c_str()));
else
kwic_file_strings_header[i].push_back(string_format("# <a name=\"Top\">UFO Event Timeline, KWIC Index Page: %s</a>", name.c_str()));
if (!book_flag)
{
kwic_file_strings_header[i].push_back("");
@ -2245,7 +2254,7 @@ static bool create_kwic_index(const ufo_timeline &timeline, const ufo_timeline::
for (l = 0; l < (int)event_char_offsets.size(); l++)
if (str_ofs == event_char_offsets[l])
break;
if (l == event_char_offsets.size())
if (l == static_cast<int>(event_char_offsets.size()))
l = 0;
const int PRE_CONTEXT_CHARS = 35;
@ -2259,7 +2268,7 @@ static bool create_kwic_index(const ufo_timeline &timeline, const ufo_timeline::
// in bytes
int start_ofs = event_char_offsets[s];
int prefix_bytes = event_char_offsets[l] - start_ofs;
int end_ofs = (e >= event_char_offsets.size()) ? (int)str.size() : event_char_offsets[e];
int end_ofs = (e >= static_cast<int>(event_char_offsets.size())) ? (int)str.size() : event_char_offsets[e];
int len = end_ofs - start_ofs;
std::string context_str(string_slice(str, start_ofs, len));
@ -2340,10 +2349,10 @@ static bool load_book_json(
json js;
if (!load_json_object(pSource_filename, utf8_flag, js))
return false;
const uint32_t first_event_index = (uint32_t)timeline.size();
timeline.get_events().resize(first_event_index + js.size());
for (uint32_t i = 0; i < js.size(); i++)
{
auto obj = js[i];
@ -2413,7 +2422,7 @@ static bool load_book_json(
event_urls.insert(std::make_pair((int)(i + first_event_index), url));
}
return true;
}
@ -2555,7 +2564,7 @@ static bool create_crashconf_kwic_index()
{
ufo_timeline timeline;
ufo_timeline::event_urls_map_t event_urls;
std::string header("This is an automatically generated [KWIC Index](https://en.wikipedia.org/wiki/Key_Word_in_Context) of the 2003-2009 Crash Retrieval Conference proceedings, created by [Richard Geldreich Jr.](https://twitter.com/richgel999).\n\nHere are links to each year's proceedings and each presentation:\n");
for (uint32_t i = 0; i < NUM_CRASHCONF_URLS; i++)
@ -2587,13 +2596,14 @@ static bool create_crashconf_kwic_index()
return create_kwic_index(timeline, event_urls, true, "crashconf_kwic_", "Crash Retrieval Conference Proceedings", header.c_str());
}
[[maybe_unused]]
static int md_trim(const string_vec& args)
{
if (args.size() != 3)
panic("Expecting 2 filenames\n");
string_vec src_file_lines;
if (!read_text_file(args[1].c_str(), src_file_lines, true, nullptr))
panic("Failed reading source file %s\n", args[1].c_str());
@ -2606,7 +2616,7 @@ static int md_trim(const string_vec& args)
const std::string& str = src_file_lines[i];
if (!str.size())
continue;
if (string_find_first(str, "---------------") >= 0)
{
found_header = true;
@ -2635,7 +2645,7 @@ static int md_trim(const string_vec& args)
if (!str.size())
continue;
if ( (string_find_first(str, "[Chronologie](annees.html)") >= 0) ||
if ( (string_find_first(str, "[Chronologie](annees.html)") >= 0) ||
(string_find_first(str, "[Contact](Contact.html)") >= 0) ||
(string_find_first(str, "[Home](/)") >= 0))
{
@ -2660,7 +2670,7 @@ static int md_trim(const string_vec& args)
panic("Failed writing output file %s\n", args[2].c_str());
uprintf("Wrote file %s\n", args[2].c_str());
return EXIT_SUCCESS;
}
@ -2676,7 +2686,7 @@ static bool translate_record(const string_vec& in, string_vec& out)
string_vec prompt;
prompt.push_back("Precisely translate this UFO/saucer event record from French to English. Preserve all formatting and new lines, especially the first 2 lines, which contain the date and location. If the record is all-caps, correct it so it's not.");
prompt.push_back("\"");
for (const auto& str : in)
prompt.push_back(str);
@ -2689,6 +2699,7 @@ static bool translate_record(const string_vec& in, string_vec& out)
#endif
}
[[maybe_unused]]
static int md_translate(const string_vec& args)
{
if (args.size() != 3)
@ -2763,10 +2774,10 @@ static int md_translate(const string_vec& args)
uprintf("%s\n", cur_rec[i].c_str());
tran_recs.push_back(cur_rec);
cur_rec.resize(0);
}
cur_rec.push_back(src_file_lines[cur_line]);
}
@ -2792,7 +2803,7 @@ static int md_translate(const string_vec& args)
if (!translate_record(tran_recs[rec_index], tran_rec))
{
uprintf("Failed translating record %u!\n", rec_index);
if (tran_recs[rec_index].size())
out_lines.push_back(tran_recs[rec_index][0]);
out_lines.push_back("FAILED!\n");
@ -2837,14 +2848,14 @@ static int md_translate(const string_vec& args)
int wmain(int argc, wchar_t* argv[])
{
assert(cTotalPrefixes == sizeof(g_date_prefix_strings) / sizeof(g_date_prefix_strings[0]));
string_vec args;
convert_args_to_utf8(args, argc, argv);
// Set ANSI Latin 1; Western European (Windows) code page for output.
SetConsoleOutputCP(1252);
//SetConsoleOutputCP(CP_UTF8);
converters_init();
init_norm();
udb_init();
@ -2870,7 +2881,7 @@ int wmain(int argc, wchar_t* argv[])
uprintf("Skipping file %s - already exists\n", out_filename.c_str());
continue;
}
string_vec a = { "", in_filename, out_filename };
int status = md_translate(a);
if (status != EXIT_SUCCESS)
@ -2878,7 +2889,7 @@ int wmain(int argc, wchar_t* argv[])
}
exit(0);
#endif
bool status = false, utf8_flag = false;
unordered_string_set unique_urls;
@ -2891,7 +2902,7 @@ int wmain(int argc, wchar_t* argv[])
std::string title_str("All events");
bool conversion_flag = false;
bool crashconf_flag = false;
int arg_index = 1;
while (arg_index < argc)
{
@ -2900,7 +2911,7 @@ int wmain(int argc, wchar_t* argv[])
arg_index++;
const uint32_t num_args_remaining = argc - arg_index;
if (t == '-')
{
if (arg == "-convert")
@ -2963,7 +2974,7 @@ int wmain(int argc, wchar_t* argv[])
uprintf("Processing successful\n");
return EXIT_SUCCESS;
}
if (conversion_flag)
{
uprintf("Convert Overmeire:\n");
@ -3096,7 +3107,7 @@ int wmain(int argc, wchar_t* argv[])
panic("convert_anon failed!");
uprintf("Success\n");
} // if (conversion_flag)
uprintf("Total unique URL's: %u\n", (uint32_t)unique_urls.size());
string_vec urls;
@ -3138,7 +3149,7 @@ int wmain(int argc, wchar_t* argv[])
status = timeline.load_json("nicap_db.json", utf8_flag, nullptr, false);
if (!status)
panic("Failed loading nicap_db.json");
status = timeline.load_json("trace.json", utf8_flag, nullptr, false);
if (!status)
panic("Failed loading trace.json");
@ -3154,7 +3165,7 @@ int wmain(int argc, wchar_t* argv[])
status = timeline.load_json("ufo_evidence_hall.json", utf8_flag, nullptr, false);
if (!status)
panic("Failed loading ufo_evidence_hall.json");
status = timeline.load_json("nuclear_tests.json", utf8_flag, nullptr, false);
if (!status)
panic("Failed loading nuclear_tests.json");
@ -3178,7 +3189,7 @@ int wmain(int argc, wchar_t* argv[])
status = timeline.load_json("ancient.json", utf8_flag, nullptr, false);
if (!status)
panic("Failed loading hostile.json");
status = timeline.load_json("pre_roswell_chap1.json", utf8_flag, nullptr, false);
if (!status)
panic("Failed loading pre_roswell_chap1.json");
@ -3290,7 +3301,7 @@ int wmain(int argc, wchar_t* argv[])
panic("Date failed sanity check");
}
uprintf("Load success, %zu total events\n", timeline.get_events().size());
timeline.sort();
@ -3298,7 +3309,7 @@ int wmain(int argc, wchar_t* argv[])
if (filter_strings.size())
{
ufo_timeline new_timeline;
for (uint32_t i = 0; i < timeline.size(); i++)
{
const timeline_event& event = timeline[i];
@ -3337,7 +3348,7 @@ int wmain(int argc, wchar_t* argv[])
}
if ( ((filter_all_flag) && (total_matched == filter_strings.size())) ||
((!filter_all_flag) && (total_matched > 0)) )
((!filter_all_flag) && (total_matched > 0)) )
{
new_timeline.get_events().push_back(event);
}
@ -3350,7 +3361,7 @@ int wmain(int argc, wchar_t* argv[])
timeline.get_events().swap(new_timeline.get_events());
}
uprintf("Writing timeline markdown\n");
ufo_timeline::event_urls_map_t event_urls;

View File

@ -5,16 +5,29 @@ VisualStudioVersion = 17.4.33213.308
MinimumVisualStudioVersion = 10.0.40219.1
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ufojson", "ufojson.vcxproj", "{E4A0DD72-979A-469B-9B0A-4ABE0B7C93D7}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{2495626B-DF4D-491A-84F3-58EB01E0CAAE}"
ProjectSection(SolutionItems) = preProject
.gitignore = .gitignore
LICENSE = LICENSE
README.md = README.md
EndProjectSection
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x64 = Debug|x64
DebugClangCL|x64 = DebugClangCL|x64
Release|x64 = Release|x64
ReleaseClangCL|x64 = ReleaseClangCL|x64
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{E4A0DD72-979A-469B-9B0A-4ABE0B7C93D7}.Debug|x64.ActiveCfg = Debug|x64
{E4A0DD72-979A-469B-9B0A-4ABE0B7C93D7}.Debug|x64.Build.0 = Debug|x64
{E4A0DD72-979A-469B-9B0A-4ABE0B7C93D7}.DebugClangCL|x64.ActiveCfg = DebugClangCL|x64
{E4A0DD72-979A-469B-9B0A-4ABE0B7C93D7}.DebugClangCL|x64.Build.0 = DebugClangCL|x64
{E4A0DD72-979A-469B-9B0A-4ABE0B7C93D7}.Release|x64.ActiveCfg = Release|x64
{E4A0DD72-979A-469B-9B0A-4ABE0B7C93D7}.Release|x64.Build.0 = Release|x64
{E4A0DD72-979A-469B-9B0A-4ABE0B7C93D7}.ReleaseClangCL|x64.ActiveCfg = ReleaseClangCL|x64
{E4A0DD72-979A-469B-9B0A-4ABE0B7C93D7}.ReleaseClangCL|x64.Build.0 = ReleaseClangCL|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE

View File

@ -5,10 +5,18 @@
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="DebugClangCL|x64">
<Configuration>DebugClangCL</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="ReleaseClangCL|x64">
<Configuration>ReleaseClangCL</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<VCProjectVersion>16.0</VCProjectVersion>
@ -16,59 +24,63 @@
<ProjectGuid>{e4a0dd72-979a-469b-9b0a-4abe0b7c93d7}</ProjectGuid>
<RootNamespace>ufojson</RootNamespace>
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
<PreferredToolArchitecture>x64</PreferredToolArchitecture>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<PropertyGroup Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v142</PlatformToolset>
<PlatformToolset Condition="'$(VisualStudioVersion)'=='17.0'">v143</PlatformToolset>
<PlatformToolset Condition=" $(Configuration.EndsWith('ClangCL')) ">ClangCL</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v142</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
<UseDebugLibraries Condition=" $(Configuration.StartsWith('Debug')) ">true</UseDebugLibraries>
<WholeProgramOptimization Condition=" $(Configuration.StartsWith('Release')) ">true</WholeProgramOptimization>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Label="Shared">
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ImportGroup Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<PropertyGroup>
<DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
<LocalDebuggerWorkingDirectory>bin</LocalDebuggerWorkingDirectory>
<LocalDebuggerCommandArguments>-convert</LocalDebuggerCommandArguments>
</PropertyGroup>
<ItemDefinitionGroup>
<ClCompile>
<WarningLevel>Level4</WarningLevel>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<LanguageStandard>stdcpp17</LanguageStandard>
<ConformanceMode>true</ConformanceMode>
<DiagnosticsFormat>Caret</DiagnosticsFormat>
<WarningLevel>Level4</WarningLevel>
<!-- <TreatWarningAsError>true</TreatWarningAsError> -->
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<StringPooling>true</StringPooling>
<FunctionLevelLinking>true</FunctionLevelLinking>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ItemDefinitionGroup Condition=" $(Configuration.StartsWith('Debug')) ">
<ClCompile>
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition=" $(Configuration.StartsWith('Release')) ">
<ClCompile>
<WarningLevel>Level4</WarningLevel>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
</ItemDefinitionGroup>
<ItemGroup>
@ -81,12 +93,10 @@
<ClCompile Include="stem.c" />
<ClCompile Include="udb.cpp" />
<ClInclude Include="converters.h" />
<ClInclude Include="pjson.h" />
<ClInclude Include="stem.h" />
<ClInclude Include="udb_tables.h" />
<ClCompile Include="ufojson.cpp">
<WarningLevel Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Level4</WarningLevel>
<WarningLevel Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Level4</WarningLevel>
</ClCompile>
<ClCompile Include="ufojson.cpp" />
<ClCompile Include="ufojson_core.cpp" />
<ClCompile Include="utils.cpp" />
</ItemGroup>
@ -105,4 +115,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
</Project>

View File

@ -1,96 +1,53 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="Source Files">
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
<Extensions>cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
</Filter>
<Filter Include="Header Files">
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
<Extensions>h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd</Extensions>
</Filter>
<Filter Include="Resource Files">
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
</Filter>
<Filter Include="Source Files\libsoldout">
<Filter Include="libsoldout">
<UniqueIdentifier>{931391b6-b6ef-48f8-82ca-8723e5dd5d93}</UniqueIdentifier>
</Filter>
</ItemGroup>
<ItemGroup>
<ClCompile Include="ufojson.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="libsoldout\array.c">
<Filter>Source Files\libsoldout</Filter>
<Filter>libsoldout</Filter>
</ClCompile>
<ClCompile Include="libsoldout\buffer.c">
<Filter>Source Files\libsoldout</Filter>
<Filter>libsoldout</Filter>
</ClCompile>
<ClCompile Include="libsoldout\markdown.c">
<Filter>Source Files\libsoldout</Filter>
<Filter>libsoldout</Filter>
</ClCompile>
<ClCompile Include="libsoldout\renderers.c">
<Filter>Source Files\libsoldout</Filter>
</ClCompile>
<ClCompile Include="utils.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="markdown_proc.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="udb.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="ufojson_core.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="converters.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="stem.c">
<Filter>Source Files</Filter>
<Filter>libsoldout</Filter>
</ClCompile>
<ClCompile Include="converters.cpp" />
<ClCompile Include="markdown_proc.cpp" />
<ClCompile Include="stem.c" />
<ClCompile Include="udb.cpp" />
<ClCompile Include="ufojson.cpp" />
<ClCompile Include="ufojson_core.cpp" />
<ClCompile Include="utils.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="libsoldout\array.h">
<Filter>Source Files\libsoldout</Filter>
<Filter>libsoldout</Filter>
</ClInclude>
<ClInclude Include="libsoldout\buffer.h">
<Filter>Source Files\libsoldout</Filter>
<Filter>libsoldout</Filter>
</ClInclude>
<ClInclude Include="libsoldout\markdown.h">
<Filter>Source Files\libsoldout</Filter>
<Filter>libsoldout</Filter>
</ClInclude>
<ClInclude Include="libsoldout\renderers.h">
<Filter>Source Files\libsoldout</Filter>
</ClInclude>
<ClInclude Include="resource.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="utils.h">
<Filter>Source Files</Filter>
</ClInclude>
<ClInclude Include="markdown_proc.h">
<Filter>Source Files</Filter>
</ClInclude>
<ClInclude Include="udb.h">
<Filter>Source Files</Filter>
</ClInclude>
<ClInclude Include="ufojson_core.h">
<Filter>Source Files</Filter>
</ClInclude>
<ClInclude Include="udb_tables.h">
<Filter>Source Files</Filter>
</ClInclude>
<ClInclude Include="converters.h">
<Filter>Source Files</Filter>
</ClInclude>
<ClInclude Include="utf8.h">
<Filter>Source Files</Filter>
</ClInclude>
<ClInclude Include="stem.h">
<Filter>Source Files</Filter>
<Filter>libsoldout</Filter>
</ClInclude>
<ClInclude Include="resource.h" />
<ClInclude Include="converters.h" />
<ClInclude Include="markdown_proc.h" />
<ClInclude Include="stem.h" />
<ClInclude Include="udb.h" />
<ClInclude Include="udb_tables.h" />
<ClInclude Include="ufojson_core.h" />
<ClInclude Include="utf8.h" />
<ClInclude Include="utils.h" />
<ClInclude Include="pjson.h" />
</ItemGroup>
</Project>

View File

@ -1,13 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="Current" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<LocalDebuggerWorkingDirectory>bin</LocalDebuggerWorkingDirectory>
<DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
<LocalDebuggerCommandArguments>-convert</LocalDebuggerCommandArguments>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<LocalDebuggerWorkingDirectory>bin</LocalDebuggerWorkingDirectory>
<DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
<LocalDebuggerCommandArguments>-convert</LocalDebuggerCommandArguments>
</PropertyGroup>
</Project>

View File

@ -1,4 +1,4 @@
// ufojson_core.cpp
// ufojson_core.cpp
// Copyright (C) 2023 Richard Geldreich, Jr.
#include "ufojson_core.h"
#include "markdown_proc.h"
@ -394,7 +394,7 @@ bool event_date::parse(const char* pStr, bool fix_20century_dates)
string_trim(temp);
}
if (!temp.size())
return false;
@ -443,7 +443,7 @@ bool event_date::parse(const char* pStr, bool fix_20century_dates)
m_year = atoi(date_strs[2].c_str());
}
if (fix_20century_dates)
{
if ((m_year >= 1) && (m_year <= 99))
@ -627,7 +627,7 @@ bool event_date::parse_eberhart_date_range(std::string date,
return false;
d.m_plural = true;
s.pop_back();
s.pop_back();
@ -1342,6 +1342,11 @@ static void get_date_range(const event_date& evt, event_date& begin, event_date&
end.m_day = 31;
}
break;
case cNoPrefix:
case cTotalPrefixes:
assert(!"unreachable");
break;
}
}
else
@ -1978,7 +1983,7 @@ void timeline_event::from_json(const json& obj, const char* pSource_override, bo
auto rocket_range = obj.find("rocket_range");
auto source_id = obj.find("source_id");
auto source = obj.find("source");
if (desc == obj.end())
panic("Missing desc");
@ -2003,7 +2008,7 @@ void timeline_event::from_json(const json& obj, const char* pSource_override, bo
m_date_str = (*date);
if (!m_begin_date.parse(m_date_str.c_str(), fix_20century_dates))
panic("Failed parsing date %s\n", m_date_str.c_str());
if (end_date != obj.end())
{
m_end_date_str = (*end_date);
@ -2356,12 +2361,12 @@ void ufo_timeline::create_plaintext()
string_vec words;
get_string_words(te.m_plain_desc, words, nullptr, "-");
for (uint32_t j = 0; j < te.m_plain_refs.size(); j++)
{
string_vec temp_words;
get_string_words(te.m_plain_refs[j], temp_words, nullptr, "-");
words.insert(words.end(), temp_words.begin(), temp_words.end());
}
@ -2379,12 +2384,12 @@ void ufo_timeline::create_plaintext()
std::string tmp(ustrlwr(words[j]));
if (!tmp.size() || is_stop_word(tmp))
continue;
std::string nrm_tmp(normalize_word(tmp));
if (!nrm_tmp.size() || is_stop_word(nrm_tmp))
continue;
new_words.push_back(nrm_tmp);
}
@ -2413,7 +2418,7 @@ bool ufo_timeline::write_markdown(const char* pTimeline_filename, const char *pD
last_event_index = std::max(last_event_index, i);
}
}
if (first_event_index > last_event_index)
panic("Can't find events");
@ -2424,28 +2429,28 @@ bool ufo_timeline::write_markdown(const char* pTimeline_filename, const char *pD
FILE* pTimeline_file = ufopen(pTimeline_filename, "w");
if (!pTimeline_file)
panic("Failed creating file %s", pTimeline_file);
panic("Failed creating file %s", pTimeline_filename);
fputc(UTF8_BOM0, pTimeline_file);
fputc(UTF8_BOM1, pTimeline_file);
fputc(UTF8_BOM2, pTimeline_file);
fprintf(pTimeline_file, "<meta charset=\"utf-8\">\n");
if ((pDate_range_desc) && (strlen(pDate_range_desc)))
fprintf(pTimeline_file, "\n# <a name=\"Top\">UFO/UAP Event Chronology, %s, v" TIMELINE_VERSION " - Compiled " COMPILATION_DATE "</a>\n\n", pDate_range_desc);
else
fprintf(pTimeline_file, "\n# <a name=\"Top\">UFO/UAP Event Chronology, v" TIMELINE_VERSION " - Compiled " COMPILATION_DATE "</a>\n\n");
fputs(
u8R"(An automated compilation by <a href="https://twitter.com/richgel999">Richard Geldreich, Jr.</a> using public data from <a href="https://en.wikipedia.org/wiki/Jacques_Vall%C3%A9e">Dr. Jacques Vallée</a>,
u8R"(An automated compilation by <a href="https://twitter.com/richgel999">Richard Geldreich, Jr.</a> using public data from <a href="https://en.wikipedia.org/wiki/Jacques_Vall%C3%A9e">Dr. Jacques Vallée</a>,
<a href="https://www.academia.edu/9813787/GOVERNMENT_INVOLVEMENT_IN_THE_UFO_COVER_UP_CHRONOLOGY_based">Pea Research</a>, <a href="http://www.cufos.org/UFO_Timeline.html">George M. Eberhart</a>,
<a href="https://en.wikipedia.org/wiki/Richard_H._Hall">Richard H. Hall</a>, <a href="https://web.archive.org/web/20160821221627/http://www.ufoinfo.com/onthisday/sametimenextyear.html">Dr. Donald A. Johnson</a>,
<a href="https://medium.com/@richgel99/1958-keziah-poster-recreation-completed-82fdb55750d8">Fred Keziah</a>, <a href="https://github.com/richgel999/uap_resources/blob/main/bluebook_uncensored_unknowns_don_berliner.pdf">Don Berliner</a>,
<a href="https://www.openminds.tv/larry-hatch-ufo-database-creator-remembered/42142">Larry Hatch</a>, [NICAP](https://www.nicap.org/), [Thomas R. Adams](https://www.lulu.com/shop/ray-boeche/bloodless-cuts/hardcover/product-22167360.html?page=1&pageSize=4), [George D. Fawcett](https://archive.ph/eQwIL), [Chris Aubeck](https://books.google.com/books/about/Return_to_Magonia.html?id=JBGNjgEACAAJ&source=kp_author_description), [Philip L. Rife](https://www.amazon.com/Didnt-Start-Roswell-Encounters-Coverups/dp/059517339X), [Richard Dolan](https://richarddolanmembers.com/), [Jérôme Beau](https://rr0.org/), [Godelieve Van Overmeire](http://cobeps.org/fr/godelieve-van-overmeire), and an anonymous individual or group.
<a href="https://www.openminds.tv/larry-hatch-ufo-database-creator-remembered/42142">Larry Hatch</a>, [NICAP](https://www.nicap.org/), [Thomas R. Adams](https://www.lulu.com/shop/ray-boeche/bloodless-cuts/hardcover/product-22167360.html?page=1&pageSize=4), [George D. Fawcett](https://archive.ph/eQwIL), [Chris Aubeck](https://books.google.com/books/about/Return_to_Magonia.html?id=JBGNjgEACAAJ&source=kp_author_description), [Philip L. Rife](https://www.amazon.com/Didnt-Start-Roswell-Encounters-Coverups/dp/059517339X), [Richard Dolan](https://richarddolanmembers.com/), [Jérôme Beau](https://rr0.org/), [Godelieve Van Overmeire](http://cobeps.org/fr/godelieve-van-overmeire), and an anonymous individual or group.
## Some non-summarized events fall under one of these copyrights:
- Richard Geldreich, Jr. - Copyright (c) 2023 (events marked \"maj2\" unless otherwise attributed)
- Dr. Jacques F. Vallée - Copyright (c) 1993
- Dr. Jacques F. Vallée - Copyright (c) 1993
- LeRoy Pea - Copyright (c) 9/8/1988 (updated 3/17/2005)
- George M. Eberhart - Copyright (c) 2022
- Dr. Donald A. Johnson - Copyright (c) 2012
@ -2453,18 +2458,18 @@ bool ufo_timeline::write_markdown(const char* pTimeline_filename, const char *pD
- Larry Hatch - Copyright (c) 1992-2002
- Thomas R. Adams - Copyright (c) 1991
- Richard Dolan - Copyright (c) 2002
- Jérôme Beau - Copyright (c) 2000-2023
- Jérôme Beau - Copyright (c) 2000-2023
## Update History:
- v1.46: Adding ~3700 events, translated from the French chronology [_Mini catalogue chronologique des observations OVNI_](https://web.archive.org/web/20060107070423/http://users.skynet.be/sky84985/chrono.html) by Belgian ufologist [Godelieve Van Overmeire, 1935-2021](http://cobeps.org/fr/godelieve-van-overmeire). Note these events are from the old HTML version on archive.org, not the larger [(10k event) PDF version](http://www.cobeps.org/pdf/Chronologie-OVNI-VOG.pdf). It is unclear if these events are copyrighted. I didn't see a copyright in either the HTML or PDF versions.
- v1.43: Added ~3160 events, translated from a French chronology to English using OpenAI, from [rr0.org](https://rr0.org/). I believe this chronology was composed by Jérôme Beau. Its license is [here](https://rr0.org/Copyright.html).
- v1.43: Added ~3160 events, translated from a French chronology to English using OpenAI, from [rr0.org](https://rr0.org/). I believe this chronology was composed by Jérôme Beau. Its license is [here](https://rr0.org/Copyright.html).
- v1.40: Added digitized events/newspaper clippings from [Frank Scully's papers at the American Heritage Center in Laramie, WY](https://archiveswest.orbiscascade.org/ark:80444/xv506256), summarized the events from the timeline on the [Disclosure Diaries](https://www.disclosurediaries.com/) website, and added more misc. events. Fixed auto-translation issue in the search page.
- v1.38: Added a [client-side search engine](search.html). There are a bunch of features I'm going to add to this engine, for now it can only search for keywords in the desc, location and and reference fields.
- v1.37: Updated intro text, added total number of events to each event year, added a few 1800's events.
- v1.36: Extracted and summarized the events in the book [_It Didn't Start with Roswell_ by Philip L. Rife](https://www.amazon.com/Didnt-Start-Roswell-Encounters-Coverups/dp/059517339X). Also extracted the military UFO events from Richard Dolan's book [_UFOs and the National Security State: Chronology of a Cover-up, 19411973_](https://www.amazon.com/UFOs-National-Security-State-Chronology-ebook/dp/B0C94W38QY).
- v1.36: Extracted and summarized the events in the book [_It Didn't Start with Roswell_ by Philip L. Rife](https://www.amazon.com/Didnt-Start-Roswell-Encounters-Coverups/dp/059517339X). Also extracted the military UFO events from Richard Dolan's book [_UFOs and the National Security State: Chronology of a Cover-up, 19411973_](https://www.amazon.com/UFOs-National-Security-State-Chronology-ebook/dp/B0C94W38QY).
- v1.34: Added more modern events, 1917 Mystery Airplane newspaper articles.
- v1.33: More events: Events from George D. Fawcett, short AI summaries of Stringfield's 1978 MUFON symposium presentation, and short AI summaries of the pre-industrial era sighting events from the book [_Wonders in the Sky: Unexplained Aerial Objects from Antiquity to Modern Times_](https://www.amazon.com/Wonders-Sky-Unexplained-Objects-Antiquity/dp/1585428205).
- v1.30: Added 203 Mystery Helicopter/mutilation related events (1970's-1980's) compiled by author/researcher [Thomas R. Adams](https://www.lulu.com/shop/ray-boeche/bloodless-cuts/hardcover/product-22167360.html?page=1&pageSize=4) (1945-2015) (or see [here](http://copycateffect.blogspot.com/2018/06/Adams-Massey-Obits.html)), from his book [_The Choppers - and the Choppers, Mystery Helicopters and Animal Mutilations_](http://www.ignaciodarnaude.com/avistamientos_ovnis/Adams,Thomas,Choppers%20and%20the%20Choppers-1.pdf), minor fixes
- v1.30: Added 203 Mystery Helicopter/mutilation related events (1970's-1980's) compiled by author/researcher [Thomas R. Adams](https://www.lulu.com/shop/ray-boeche/bloodless-cuts/hardcover/product-22167360.html?page=1&pageSize=4) (1945-2015) (or see [here](http://copycateffect.blogspot.com/2018/06/Adams-Massey-Obits.html)), from his book [_The Choppers - and the Choppers, Mystery Helicopters and Animal Mutilations_](http://www.ignaciodarnaude.com/avistamientos_ovnis/Adams,Thomas,Choppers%20and%20the%20Choppers-1.pdf), minor fixes
- v1.28: Added KWIC (Key Word in Context) index.
- v1.27: Imported Anonymous PDF's contents, originally from [here](https://pdfhost.io/v/gR8lAdgVd_Uap_Timeline_Prepared_By_Another), with fixed URL's
- v1.23-1.24: Added a handful of key historical events, such as Edward Tauss the head of CIA UFO disinformation in the 50's
@ -2482,7 +2487,7 @@ Best viewed on a desktop/laptop, not a mobile device. On Windows, Firefox works
I've split up the timeline into 4 parts, to reduce their sizes: distant past up to 1949, 1950-1959, 1960-1979, and 1980-present.
The majority of the events in this chronology are sighting related, however it's important to be aware that this is a timeline of
The majority of the events in this chronology are sighting related, however it's important to be aware that this is a timeline of
UFO/UAP related _events_, not necessarily or exclusively UFO _sightings_. **This is not exclusively a UFO sightings timeline or database.**
Some sighting reports or events appear multiple times in this timeline because they appear in more than one data source. I view this as a useful feature.
@ -2492,7 +2497,7 @@ Currently, the events are not sorted by time of day, only by date. Some sources
A few events don't have firm dates, for example "Summer of 1947", or "Late July 1952". In these instances the compilation code uses fixed dates I selected for date sorting purposes. (See the code for the specific dates.)
## Source Code:
This website is created automatically using a [C++](https://en.wikipedia.org/wiki/C%2B%2B) command line tool called “ufojson”. It parses the raw text and [Markdown](https://en.wikipedia.org/wiki/Markdown) source data to [JSON format](https://www.json.org/json-en.html), which is then converted to a single large web page using [pandoc](https://pandoc.org/). This tool's source code and all of the raw source and JSON data is located [here on github](https://github.com/richgel999/ufo_data).)", pTimeline_file);
This website is created automatically using a [C++](https://en.wikipedia.org/wiki/C%2B%2B) command line tool called “ufojson”. It parses the raw text and [Markdown](https://en.wikipedia.org/wiki/Markdown) source data to [JSON format](https://www.json.org/json-en.html), which is then converted to a single large web page using [pandoc](https://pandoc.org/). This tool's source code and all of the raw source and JSON data is located [here on github](https://github.com/richgel999/ufo_data).)", pTimeline_file);
fputs("\n", pTimeline_file);
@ -2569,7 +2574,7 @@ u8R"(## Year Ranges
for (uint32_t i = first_event_index; i <= last_event_index; i++)
{
int year = timeline_events[i].m_begin_date.m_year;
year_histogram[year] = year_histogram[year] + 1;
}
@ -2600,7 +2605,7 @@ u8R"(## Year Ranges
//std::string url( string_format("[%s #%u](%s#%08X)", timeline_events[i].m_date_str.c_str(), i, html_filename.c_str(), hash) );
//<a href = "https://www.example.com">link to Example.com< / a> inside the pre section.
std::string url( string_format("<a href=\"%s#%08X\">%s #%u</a>",
std::string url( string_format("<a href=\"%s#%08X\">%s #%u</a>",
html_filename.c_str(), hash,
timeline_events[i].m_date_str.c_str(), i) );
@ -2670,6 +2675,6 @@ bool ufo_timeline::load_json(const char* pFilename, bool& utf8_flag, const char*
timeline_events[first_event_index + i].from_json(obj, pSource_override, fix_20century_dates);
}
return true;
return success;
}

View File

@ -67,42 +67,42 @@ struct event_date
bool m_estimated; // (estimated)
event_date();
event_date(const event_date& other);
bool sanity_check() const;
bool operator== (const event_date& rhs) const;
bool operator!= (const event_date& rhs) const;
event_date& operator =(const event_date& rhs);
void clear();
bool is_valid() const;
std::string get_string() const;
// Parses basic dates (not ranges).
// Parses basic dates (not ranges).
// Date can end in "(approximate)", "(estimated)", "?", or "'s".
// 2 digit dates converted to 1900+.
// Supports year, month/year, or month/day/year.
bool parse(const char* pStr, bool fix_20century_dates);
// More advanced date range parsing, used for converting the Eberhart timeline.
// Note this doesn't support "'s", "(approximate)", "(estimated)", or converting 2 digit years to 1900'.
static bool parse_eberhart_date_range(std::string date,
event_date& begin_date,
event_date& end_date, event_date& alt_date,
int required_year = -1);
// Note the returned date may be invalid. It's only intended for sorting/comparison purposes against other sort dates.
void get_sort_date(int& year, int& month, int& day) const;
// Compares two timeline dates. true if lhs < rhs
static bool compare(const event_date& lhs, const event_date& rhs);
private:
static bool check_date_prefix(const event_date& date);
@ -112,7 +112,7 @@ struct timeline_event
{
std::string m_date_str;
std::string m_time_str; // military, but currently it's in any format (not parsed yet)
std::string m_alt_date_str;
std::string m_end_date_str;
@ -123,7 +123,7 @@ struct timeline_event
std::string m_desc; // Markdown
string_vec m_type;
string_vec m_refs; // Markdown
string_vec m_locations;
string_vec m_attributes;
string_vec m_see_also;
@ -145,15 +145,15 @@ struct timeline_event
std::string m_plain_desc; // Computed, ignored for comparison purposes, not deserialized from JSON
string_vec m_plain_refs; // Computed, ignored for comparison purposes, not deserialized from JSON
std::string m_search_words; // Computed, ignored for comparison purposes, not deserialized from JSON
bool operator==(const timeline_event& rhs) const;
bool operator!=(const timeline_event& rhs) const;
bool operator< (const timeline_event& rhs) const;
void print(FILE* pFile) const;
void from_json(const json& obj, const char* pSource_override, bool fix_20century_dates);
void to_json(json& j) const;
uint32_t get_crc32() const;

122
utils.cpp
View File

@ -114,6 +114,7 @@ std::string dos_to_utf8(const std::string& str)
return wchar_to_utf8(wstr);
}
_Use_decl_annotations_
bool vformat(std::vector<char>& buf, const char* pFmt, va_list args)
{
uint32_t buf_size = 8192;
@ -129,7 +130,7 @@ bool vformat(std::vector<char>& buf, const char* pFmt, va_list args)
return false;
}
if (res <= buf.size() - 1)
if (res <= static_cast<int>(buf.size() - 1))
break;
buf_size *= 2;
@ -142,6 +143,7 @@ bool vformat(std::vector<char>& buf, const char* pFmt, va_list args)
return true;
}
_Use_decl_annotations_
void ufprintf(FILE* pFile, const char* pFmt, ...)
{
std::vector<char> buf;
@ -155,11 +157,12 @@ void ufprintf(FILE* pFile, const char* pFmt, ...)
std::wstring wbuf(utf8_to_wchar(std::string(&buf[0])));
// Not thread safe, but we don't care
_setmode(_fileno(pFile), _O_U16TEXT);
(void)_setmode(_fileno(pFile), _O_U16TEXT);
fputws(&wbuf[0], pFile);
_setmode(_fileno(pFile), _O_TEXT);
(void)_setmode(_fileno(pFile), _O_TEXT);
}
_Use_decl_annotations_
void uprintf(const char* pFmt, ...)
{
std::vector<char> buf;
@ -173,11 +176,12 @@ void uprintf(const char* pFmt, ...)
std::wstring wbuf(utf8_to_wchar(std::string(&buf[0])));
// Not thread safe, but we don't care
_setmode(_fileno(stdout), _O_U16TEXT);
(void)_setmode(_fileno(stdout), _O_U16TEXT);
fputws(&wbuf[0], stdout);
_setmode(_fileno(stdout), _O_TEXT);
(void)_setmode(_fileno(stdout), _O_TEXT);
}
_Use_decl_annotations_
std::string string_format(const char* pMsg, ...)
{
std::vector<char> buf;
@ -195,6 +199,7 @@ std::string string_format(const char* pMsg, ...)
return res;
}
_Use_decl_annotations_
void panic(const char* pMsg, ...)
{
char buf[4096];
@ -256,8 +261,8 @@ int string_ifind_first(const std::string& str, const char* pPhrase)
const size_t str_size = str.size();
const size_t phrase_size = strlen(pPhrase);
assert((int)str_size == str_size);
assert((int)phrase_size == phrase_size);
assert(str_size == str_size);
assert(phrase_size == phrase_size);
assert(phrase_size);
if ((!str_size) || (!phrase_size) || (phrase_size > str_size))
@ -270,7 +275,7 @@ int string_ifind_first(const std::string& str, const char* pPhrase)
if (_strnicmp(str.c_str() + ofs, pPhrase, phrase_size) == 0)
return (int)ofs;
}
return -1;
}
@ -342,7 +347,7 @@ std::string encode_url(const std::string& url)
//const bool is_upper = (c >= 'A') && (c <= 'Z');
//const bool is_lower = (c >= 'a') && (c <= 'z');
// Escape some problematic charactes that confuse some Markdown parsers (even after using Markdown '\' escapes)
// Escape some problematic characters that confuse some Markdown parsers (even after using Markdown '\' escapes)
if ((c == ')') || (c == '(') || (c == '_') || (c == '*'))
{
res.push_back('%');
@ -451,7 +456,7 @@ bool read_binary_file(const char* pFilename, uint8_vec& buf)
}
_fseeki64(pFile, 0, SEEK_SET);
if (len > MAX_BINARY_FILE_LEN)
if (static_cast<uint64_t>(len) > MAX_BINARY_FILE_LEN)
return false;
buf.resize(len);
@ -475,7 +480,7 @@ bool read_text_file(const char* pFilename, string_vec& lines, bool trim_lines, b
if (pUTF8_flag)
*pUTF8_flag = false;
while (!feof(pFile))
{
char buf[16384];
@ -677,7 +682,7 @@ bool load_column_text(const char* pFilename, std::vector<string_vec>& rows, std:
std::string col_seps = lines[3];
if ((!col_seps.size()) || (col_seps[0] != '-') || (col_seps.back() != '-'))
panic("Invalid column seperator line");
panic("Invalid column separator line");
for (uint32_t i = 0; i < col_seps.size(); i++)
{
@ -720,13 +725,13 @@ bool load_column_text(const char* pFilename, std::vector<string_vec>& rows, std:
for (uint32_t i = 0; i < column_info.size(); i++)
{
col_titles[i] = col_line;
if (column_info[i].first)
col_titles[i].erase(0, column_info[i].first);
if (column_info[i].second > col_titles[i].size())
panic("invalid columns");
col_titles[i].erase(column_info[i].second, col_titles[i].size() - column_info[i].second);
string_trim(col_titles[i]);
}
@ -737,7 +742,7 @@ bool load_column_text(const char* pFilename, std::vector<string_vec>& rows, std:
uint32_t cur_line = 4;
uint32_t cur_record_index = 0;
[[maybe_unused]] uint32_t cur_record_index = 0;
while (cur_line < lines.size())
{
@ -804,7 +809,7 @@ bool load_column_text(const char* pFilename, std::vector<string_vec>& rows, std:
l = ansi_to_utf8(l);
rows.push_back(col_lines);
cur_record_index++;
}
@ -850,11 +855,11 @@ bool invoke_curl(const std::string& args, string_vec& reply)
uprintf("PDF file detected\n");
std::string filename(args);
for (size_t i = filename.size() - 1; i >= 0; i--)
for (int i = static_cast<int>(filename.size() - 1); i >= 0; i--)
{
if (filename[i] == '/')
{
filename.erase(0, i + 1);
filename.erase(0, static_cast<size_t>(i + 1));
break;
}
}
@ -879,8 +884,14 @@ bool invoke_curl(const std::string& args, string_vec& reply)
new_link_deescaped.push_back(c);
}
rename("__temp.html", new_link_deescaped.c_str());
uprintf("Renamed __temp.html to %s\n", new_link_deescaped.c_str());
if (rename("__temp.html", new_link_deescaped.c_str()) == 0)
{
uprintf("Renamed __temp.html to %s\n", new_link_deescaped.c_str());
}
else
{
uprintf("FAILED to rename __temp.html to %s\n", new_link_deescaped.c_str());
}
return true;
}
@ -939,10 +950,10 @@ std::string string_slice(const std::string& str, size_t ofs, size_t len)
std::string res(str);
if (ofs)
res.erase(0, ofs);
if (len)
res.resize(len);
return res;
}
@ -996,7 +1007,7 @@ bool invoke_openai(const string_vec &prompt, string_vec &reply)
// Invoke openai.exe
const uint32_t MAX_TRIES = 3;
uint32_t num_tries;
for (num_tries = 0; num_tries < MAX_TRIES; ++num_tries)
{
if (num_tries)
@ -1062,11 +1073,11 @@ bool load_json_object(const char* pFilename, bool& utf8_flag, json &result_obj)
if (!result_obj.is_object() && !result_obj.is_array())
return false;
return true;
return success;
}
void string_tokenize(
const std::string &str,
const std::string &str,
const std::string &whitespace,
const std::string &break_chars,
string_vec &tokens,
@ -1078,7 +1089,7 @@ void string_tokenize(
std::string cur_token;
uint32_t cur_ofs = 0;
for (uint32_t i = 0; i < str.size(); i++)
{
uint8_t c = str[i];
@ -1129,6 +1140,7 @@ void string_tokenize(
}
}
// #NOTE In C++20, there's a PI constant in <numbers> https://en.cppreference.com/w/cpp/numeric/constants
const double PI = 3.141592653589793238463;
double deg2rad(double deg)
@ -1144,7 +1156,7 @@ double rad2deg(double rad)
// input in degrees
double geo_distance(double lat1, double lon1, double lat2, double lon2, int unit)
{
if ((lat1 == lat2) && (lon1 == lon2))
if ((lat1 == lat2) && (lon1 == lon2))
return 0;
double theta = lon1 - lon2;
@ -1154,7 +1166,7 @@ double geo_distance(double lat1, double lon1, double lat2, double lon2, int unit
dist = dist * 60 * 1.1515;
switch (unit)
switch (unit)
{
case 'M':
break;
@ -1185,37 +1197,37 @@ std::string remove_bom(std::string str)
return str;
}
int get_next_utf8_code_point_len(const uint8_t* pStr)
int get_next_utf8_code_point_len(const uint8_t* pStr)
{
if (pStr == nullptr || *pStr == 0)
if (pStr == nullptr || *pStr == 0)
{
// Return 0 if the input is null or points to a null terminator
return 0;
return 0;
}
const uint8_t firstByte = *pStr;
if ((firstByte & 0x80) == 0)
{
if ((firstByte & 0x80) == 0)
{
// Starts with 0, ASCII character
return 1;
}
else if ((firstByte & 0xE0) == 0xC0)
{
else if ((firstByte & 0xE0) == 0xC0)
{
// Starts with 110
return 2;
}
else if ((firstByte & 0xF0) == 0xE0)
{
else if ((firstByte & 0xF0) == 0xE0)
{
// Starts with 1110
return 3;
}
else if ((firstByte & 0xF8) == 0xF0)
{
else if ((firstByte & 0xF8) == 0xF0)
{
// Starts with 11110
return 4;
}
else
else
{
// Invalid UTF-8 byte sequence
return -1;
@ -1239,9 +1251,9 @@ void get_string_words(
std::string whitespace(" \t\n\r,;:.!?()[]*/\"");
if (pAdditional_whitespace)
whitespace += std::string(pAdditional_whitespace);
int word_start_ofs = -1;
uint32_t cur_ofs = 0;
while ((cur_ofs < str.size()) && (pStr[cur_ofs]))
{
@ -1303,7 +1315,7 @@ void get_string_words(
else if (pStr[cur_ofs + 2] == 0x9D)
is_whitespace = true;
}
if (is_whitespace)
{
if (cur_token.size())
@ -1331,7 +1343,7 @@ void get_string_words(
cur_token.push_back(pStr[cur_ofs + i]);
}
}
cur_ofs += l;
}
@ -1347,7 +1359,7 @@ void get_string_words(
void get_utf8_code_point_offsets(const char* pStr, int_vec& offsets)
{
uint32_t cur_ofs = 0;
offsets.resize(0);
while (pStr[cur_ofs])
@ -1439,14 +1451,14 @@ static const char* g_stop_words[] =
"when", "where", "which", "while", "who", "whom", "why", "will", "with", "you", "your", "yours",
"yourself", "yourselves", "although", "also", "already", "another", "seemed", "seem", "seems"
};
static const uint32_t NUM_STOP_WORDS = (uint32_t)std::size(g_stop_words);
[[maybe_unused]] static const uint32_t NUM_STOP_WORDS = (uint32_t)std::size(g_stop_words);
std::set<std::string> g_stop_words_set;
void init_norm()
{
g_stop_words_set.clear();
for (const auto& str : g_stop_words)
for (const char* str : g_stop_words)
g_stop_words_set.insert(str);
for (uint32_t i = 0; i < std::size(g_char_norm_up); i++)
@ -1507,7 +1519,7 @@ void init_norm()
}
}
// Resulting characters are guaranteed to be <128 - useful for searching purposes.
// Resulting characters are guaranteed to be <128 - useful for searching purposes.
// Unrecognized Unicode characters are deleted.
void normalize_diacritics(const char* pStr, std::string& res)
{
@ -1610,10 +1622,10 @@ std::string normalize_word(const std::string& str)
if (str.size() > MAX_STRING_SIZE)
panic("String too long");
char buf[MAX_STRING_SIZE + 1];
strcpy_s(buf, sizeof(buf), str.c_str());
// Convert utf8 string to lower
utf8lwr(buf);
@ -1622,7 +1634,7 @@ std::string normalize_word(const std::string& str)
norm.reserve(strlen(buf));
normalize_diacritics(buf, norm);
// Remove any non-letter or non-digit characters (we assume this is a word, so whitespace gets removed too)
std::string temp;
temp.reserve(norm.size());
@ -1676,10 +1688,10 @@ std::string string_replace(const std::string& str, const std::string& find, cons
assert(find.size());
if (!find.size() || !str.size())
return str;
const uint8_t* pStr = (const uint8_t *)str.c_str();
const size_t str_size = str.size();
const uint8_t* pFind = (const uint8_t*)find.c_str();
const size_t find_size = find.size();
@ -1695,7 +1707,7 @@ std::string string_replace(const std::string& str, const std::string& find, cons
assert(0);
str_char_size = 1;
}
const size_t str_remaining = str_size - str_ofs;
if ((str_remaining >= find_size) && (memcmp(pStr + str_ofs, pFind, find_size) == 0))
{
@ -1718,7 +1730,7 @@ bool does_file_exist(const char* pFilename)
FILE* pFile = ufopen(pFilename, "rb");
if (!pFile)
return false;
fclose(pFile);
return true;
}

18
utils.h
View File

@ -15,17 +15,15 @@
#include <fcntl.h>
#include <io.h>
#include <stdarg.h>
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
#include <cstdint>
#include <stdlib.h>
#include <stdio.h>
#include <algorithm>
#include <map>
#include <set>
#include <varargs.h>
#include <string>
#include <unordered_set>
@ -52,8 +50,6 @@ const uint32_t ANSI_SOFT_HYPHEN = 0xAD;
template<typename T> inline void clear_obj(T& obj) { memset(&obj, 0, sizeof(T)); }
void panic(const char* pMsg, ...);
//------------------------------------------------------------------
inline bool string_is_digits(const std::string& s)
@ -87,18 +83,18 @@ inline std::string ansi_to_utf8(const std::string& str) { return wchar_to_utf8(u
// Code page 437 to utf8. WideCharToMultiByte etc. doesn't do the expecting thing for chars<32, and we need them.
std::string dos_to_utf8(const std::string& str);
// utf8 string format
bool vformat(std::vector<char>& buf, const char* pFmt, va_list args);
// utf8 string format
bool vformat(std::vector<char>& buf, _Printf_format_string_ const char* pFmt, va_list args);
// utf8 printf to FILE*
void ufprintf(FILE* pFile, const char* pFmt, ...);
void ufprintf(FILE* pFile, _Printf_format_string_ const char* pFmt, ...);
// utf8 print to stdout
void uprintf(const char* pFmt, ...);
void uprintf(_Printf_format_string_ const char* pFmt, ...);
std::string string_format(const char* pMsg, ...);
std::string string_format(_Printf_format_string_ const char* pMsg, ...);
void panic(const char* pMsg, ...);
[[noreturn]] void panic(_Printf_format_string_ const char* pMsg, ...);
// Open a file given a utf8 filename
FILE* ufopen(const char* pFilename, const char* pMode);