2023-02-20 17:58:51 -05:00
|
|
|
|
// ufojson.cpp
|
|
|
|
|
// Copyright (C) 2023 Richard Geldreich, Jr.
|
2023-02-03 13:31:18 -05:00
|
|
|
|
|
2023-02-20 17:58:51 -05:00
|
|
|
|
#include "utils.h"
|
|
|
|
|
#include "markdown_proc.h"
|
|
|
|
|
#include "ufojson_core.h"
|
|
|
|
|
#include "udb.h"
|
|
|
|
|
#include "converters.h"
|
2023-02-03 13:31:18 -05:00
|
|
|
|
|
2023-02-20 17:58:51 -05:00
|
|
|
|
//-------------------------------------------------------------------
|
2023-02-09 17:26:42 -05:00
|
|
|
|
|
2023-02-20 17:58:51 -05:00
|
|
|
|
static void detect_bad_urls()
|
2023-02-09 17:26:42 -05:00
|
|
|
|
{
|
2023-02-20 17:58:51 -05:00
|
|
|
|
string_vec unique_urls;
|
2023-02-09 17:26:42 -05:00
|
|
|
|
bool utf8_flag = false;
|
2023-02-20 17:58:51 -05:00
|
|
|
|
if (!read_text_file("unique_urls.txt", unique_urls, utf8_flag))
|
|
|
|
|
panic("Can't read file unique_urls.txt");
|
2023-02-09 17:26:42 -05:00
|
|
|
|
|
2023-02-20 17:58:51 -05:00
|
|
|
|
uint32_t total_processed = 0;
|
2023-02-09 17:26:42 -05:00
|
|
|
|
|
2023-02-20 17:58:51 -05:00
|
|
|
|
string_vec failed_urls;
|
|
|
|
|
for (const auto& url : unique_urls)
|
2023-02-09 17:26:42 -05:00
|
|
|
|
{
|
2023-02-20 17:58:51 -05:00
|
|
|
|
printf("-------------- URL : %u\n", total_processed);
|
2023-02-09 17:26:42 -05:00
|
|
|
|
|
2023-02-20 17:58:51 -05:00
|
|
|
|
string_vec reply;
|
|
|
|
|
bool status = invoke_curl(url, reply);
|
|
|
|
|
bool not_found = false;
|
2023-02-09 17:26:42 -05:00
|
|
|
|
|
2023-02-20 17:58:51 -05:00
|
|
|
|
if (status)
|
2023-02-09 17:26:42 -05:00
|
|
|
|
{
|
2023-02-20 17:58:51 -05:00
|
|
|
|
for (auto str : reply)
|
2023-02-09 17:26:42 -05:00
|
|
|
|
{
|
2023-02-20 17:58:51 -05:00
|
|
|
|
str = string_lower(str);
|
2023-02-09 17:26:42 -05:00
|
|
|
|
|
2023-02-20 17:58:51 -05:00
|
|
|
|
if ((string_find_first(str, "404 not found") != -1) ||
|
|
|
|
|
(string_find_first(str, "cannot find the requested page") != -1))
|
2023-02-09 17:26:42 -05:00
|
|
|
|
{
|
2023-02-20 17:58:51 -05:00
|
|
|
|
not_found = true;
|
2023-02-09 17:26:42 -05:00
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-02-20 17:58:51 -05:00
|
|
|
|
if ((!status) || (not_found))
|
2023-02-09 17:26:42 -05:00
|
|
|
|
{
|
2023-02-20 17:58:51 -05:00
|
|
|
|
failed_urls.push_back(url);
|
|
|
|
|
printf("FAILED: %s\n", url.c_str());
|
2023-02-09 17:26:42 -05:00
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
2023-02-20 17:58:51 -05:00
|
|
|
|
printf("SUCCEEDED: %s\n", url.c_str());
|
2023-02-09 17:26:42 -05:00
|
|
|
|
}
|
2023-02-20 17:58:51 -05:00
|
|
|
|
|
|
|
|
|
total_processed++;
|
|
|
|
|
if ((total_processed % 25) == 24)
|
2023-02-09 17:26:42 -05:00
|
|
|
|
{
|
2023-02-20 17:58:51 -05:00
|
|
|
|
if (!write_text_file("failed_urls.txt", failed_urls, false))
|
|
|
|
|
panic("Unable to create file failed_urs.txt");
|
2023-02-09 17:26:42 -05:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-02-20 17:58:51 -05:00
|
|
|
|
printf("Total urls: %zu, failed: %zu\n", unique_urls.size(), failed_urls.size());
|
2023-02-09 17:26:42 -05:00
|
|
|
|
|
2023-02-20 17:58:51 -05:00
|
|
|
|
if (!write_text_file("failed_urls.txt", failed_urls, false))
|
|
|
|
|
panic("Unable to create file failed_urs.txt");
|
2023-02-09 17:26:42 -05:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Windows defaults to code page 437:
|
|
|
|
|
// https://www.ascii-codes.com/
|
|
|
|
|
// We want code page 1252
|
|
|
|
|
// http://www.alanwood.net/demos/ansi.html
|
|
|
|
|
|
|
|
|
|
// Main code
|
|
|
|
|
int wmain(int argc, wchar_t* argv[])
|
|
|
|
|
{
|
|
|
|
|
assert(cTotalPrefixes == sizeof(g_date_prefix_strings) / sizeof(g_date_prefix_strings[0]));
|
2023-02-20 17:58:51 -05:00
|
|
|
|
|
2023-02-09 17:26:42 -05:00
|
|
|
|
string_vec args;
|
|
|
|
|
convert_args_to_utf8(args, argc, argv);
|
|
|
|
|
|
|
|
|
|
// Set ANSI Latin 1; Western European (Windows) code page for output.
|
|
|
|
|
SetConsoleOutputCP(1252);
|
2023-02-20 17:58:51 -05:00
|
|
|
|
//SetConsoleOutputCP(CP_UTF8);
|
|
|
|
|
|
|
|
|
|
converters_init();
|
|
|
|
|
|
|
|
|
|
udb_init();
|
2023-02-09 17:26:42 -05:00
|
|
|
|
|
2023-02-20 17:58:51 -05:00
|
|
|
|
//udb_dump();
|
|
|
|
|
|
|
|
|
|
#if 0
|
|
|
|
|
detect_bad_urls();
|
|
|
|
|
return 0;
|
|
|
|
|
#endif
|
2023-02-09 17:26:42 -05:00
|
|
|
|
|
|
|
|
|
#if 0
|
|
|
|
|
std::vector<string_vec> rows;
|
|
|
|
|
std::string title;
|
|
|
|
|
string_vec col_titles;
|
|
|
|
|
load_column_text("ufoevid13.txt", rows, title, col_titles);
|
|
|
|
|
return 0;
|
|
|
|
|
#endif
|
|
|
|
|
|
2023-02-20 17:58:51 -05:00
|
|
|
|
bool status = false, utf8_flag = false;
|
|
|
|
|
|
|
|
|
|
unordered_string_set unique_urls;
|
|
|
|
|
|
|
|
|
|
uprintf("Convert Hatch UDB:\n");
|
|
|
|
|
if (!udb_convert())
|
|
|
|
|
panic("udb_convert() failed!");
|
|
|
|
|
uprintf("Success\n");
|
2023-02-09 17:26:42 -05:00
|
|
|
|
|
|
|
|
|
#if 1
|
|
|
|
|
uprintf("Convert NICAP:\n");
|
2023-02-20 17:58:51 -05:00
|
|
|
|
if (!convert_nicap(unique_urls))
|
2023-02-09 17:26:42 -05:00
|
|
|
|
panic("convert_nicap() failed!");
|
|
|
|
|
uprintf("Success\n");
|
|
|
|
|
|
|
|
|
|
uprintf("Convert Johnson:\n");
|
|
|
|
|
if (!convert_johnson())
|
|
|
|
|
panic("convert_johnson() failed!");
|
|
|
|
|
uprintf("Success\n");
|
|
|
|
|
|
|
|
|
|
uprintf("Convert Eberhart:\n");
|
2023-02-20 17:58:51 -05:00
|
|
|
|
if (!convert_eberhart(unique_urls))
|
2023-02-09 17:26:42 -05:00
|
|
|
|
panic("convert_eberthart() failed!");
|
|
|
|
|
uprintf("Success\n");
|
|
|
|
|
|
|
|
|
|
uprintf("Convert Trace:\n");
|
|
|
|
|
if (!convert_magnonia("trace.txt", "trace.json", "Trace", " [Trace Cases](https://www.thenightskyii.org/trace.html)"))
|
|
|
|
|
panic("convert_magnonia() failed!");
|
|
|
|
|
uprintf("Success\n");
|
|
|
|
|
|
|
|
|
|
uprintf("Convert Magnonia:\n");
|
|
|
|
|
if (!convert_magnonia("magnonia.txt", "magnonia.json"))
|
|
|
|
|
panic("convert_magnonia() failed!");
|
|
|
|
|
uprintf("Success\n");
|
|
|
|
|
|
|
|
|
|
uprintf("Convert Hall:\n");
|
|
|
|
|
if (!convert_hall())
|
|
|
|
|
panic("convert_hall() failed!");
|
|
|
|
|
uprintf("Success\n");
|
|
|
|
|
|
|
|
|
|
uprintf("Convert Bluebook Unknowns:\n");
|
|
|
|
|
if (!convert_bluebook_unknowns())
|
|
|
|
|
panic("convert_bluebook_unknowns failed!");
|
|
|
|
|
uprintf("Success\n");
|
|
|
|
|
#endif
|
2023-02-20 17:58:51 -05:00
|
|
|
|
|
|
|
|
|
uprintf("Total unique URL's: %u\n", (uint32_t)unique_urls.size());
|
|
|
|
|
|
|
|
|
|
string_vec urls;
|
|
|
|
|
for (const auto& s : unique_urls)
|
|
|
|
|
urls.push_back(s);
|
|
|
|
|
std::sort(urls.begin(), urls.end());
|
|
|
|
|
write_text_file("unique_urls.txt", urls, false);
|
|
|
|
|
uprintf("Wrote unique_urls.txt\n");
|
|
|
|
|
|
|
|
|
|
ufo_timeline timeline;
|
|
|
|
|
|
|
|
|
|
status = timeline.load_json("maj2.json", utf8_flag, "Maj2", true);
|
|
|
|
|
if (!status)
|
|
|
|
|
panic("Failed loading maj2.json");
|
|
|
|
|
for (uint32_t i = 0; i < timeline.size(); i++)
|
|
|
|
|
timeline[i].m_source_id = string_format("%s_%u", timeline[i].m_source.c_str(), i);
|
2023-02-09 17:26:42 -05:00
|
|
|
|
|
2023-02-20 17:58:51 -05:00
|
|
|
|
status = timeline.load_json("hatch_udb.json", utf8_flag, nullptr, false);
|
|
|
|
|
if (!status)
|
|
|
|
|
panic("Failed loading hatch_udb.json");
|
|
|
|
|
|
|
|
|
|
status = timeline.load_json("nicap_db.json", utf8_flag, nullptr, false);
|
2023-02-09 17:26:42 -05:00
|
|
|
|
if (!status)
|
|
|
|
|
panic("Failed loading nicap_db.json");
|
|
|
|
|
|
2023-02-20 17:58:51 -05:00
|
|
|
|
status = timeline.load_json("trace.json", utf8_flag, nullptr, false);
|
2023-02-09 17:26:42 -05:00
|
|
|
|
if (!status)
|
|
|
|
|
panic("Failed loading trace.json");
|
|
|
|
|
|
2023-02-20 17:58:51 -05:00
|
|
|
|
status = timeline.load_json("magnonia.json", utf8_flag, nullptr, false);
|
2023-02-09 17:26:42 -05:00
|
|
|
|
if (!status)
|
|
|
|
|
panic("Failed loading magnolia.json");
|
|
|
|
|
|
2023-02-20 17:58:51 -05:00
|
|
|
|
status = timeline.load_json("bb_unknowns.json", utf8_flag, nullptr, false);
|
2023-02-09 17:26:42 -05:00
|
|
|
|
if (!status)
|
|
|
|
|
panic("Failed loading bb_unknowns.json");
|
|
|
|
|
|
2023-02-20 17:58:51 -05:00
|
|
|
|
status = timeline.load_json("ufo_evidence_hall.json", utf8_flag, nullptr, false);
|
2023-02-09 17:26:42 -05:00
|
|
|
|
if (!status)
|
|
|
|
|
panic("Failed loading ufo_evidence_hall.json");
|
|
|
|
|
|
2023-02-20 17:58:51 -05:00
|
|
|
|
status = timeline.load_json("eberhart.json", utf8_flag, nullptr, false);
|
2023-02-09 17:26:42 -05:00
|
|
|
|
if (!status)
|
|
|
|
|
panic("Failed loading eberhart.json");
|
|
|
|
|
|
2023-02-20 17:58:51 -05:00
|
|
|
|
status = timeline.load_json("johnson.json", utf8_flag, nullptr, false);
|
2023-02-09 17:26:42 -05:00
|
|
|
|
if (!status)
|
|
|
|
|
panic("Failed loading johnson.json");
|
2023-02-20 17:58:51 -05:00
|
|
|
|
|
2023-02-09 17:26:42 -05:00
|
|
|
|
for (uint32_t i = 0; i < timeline.size(); i++)
|
|
|
|
|
{
|
|
|
|
|
if (!timeline[i].m_begin_date.sanity_check())
|
|
|
|
|
panic("Date failed sanity check");
|
|
|
|
|
|
|
|
|
|
if (timeline[i].m_end_date.is_valid())
|
|
|
|
|
{
|
|
|
|
|
if (!timeline[i].m_end_date.sanity_check())
|
|
|
|
|
panic("Date failed sanity check");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (timeline[i].m_alt_date.is_valid())
|
|
|
|
|
{
|
|
|
|
|
if (!timeline[i].m_alt_date.sanity_check())
|
|
|
|
|
panic("Date failed sanity check");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// roundtrip test
|
|
|
|
|
event_date test_date;
|
|
|
|
|
|
2023-02-20 17:58:51 -05:00
|
|
|
|
if (!test_date.parse(timeline[i].m_date_str.c_str(), false))
|
2023-02-09 17:26:42 -05:00
|
|
|
|
panic("Date failed sanity check");
|
|
|
|
|
|
|
|
|
|
if (test_date != timeline[i].m_begin_date)
|
|
|
|
|
panic("Date failed sanity check");
|
|
|
|
|
|
|
|
|
|
std::string test_str(timeline[i].m_begin_date.get_string());
|
|
|
|
|
if (test_str != timeline[i].m_date_str)
|
|
|
|
|
{
|
|
|
|
|
fprintf(stderr, "Date failed roundtrip: %s %s %s\n", timeline[i].m_source_id.c_str(), timeline[i].m_date_str.c_str(), test_str.c_str());
|
|
|
|
|
panic("Date failed sanity check");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (timeline[i].m_end_date.is_valid())
|
|
|
|
|
{
|
2023-02-20 17:58:51 -05:00
|
|
|
|
if (!test_date.parse(timeline[i].m_end_date_str.c_str(), false))
|
2023-02-09 17:26:42 -05:00
|
|
|
|
panic("Date failed sanity check");
|
|
|
|
|
|
|
|
|
|
if (test_date != timeline[i].m_end_date)
|
|
|
|
|
panic("Date failed sanity check");
|
|
|
|
|
|
|
|
|
|
std::string test_str2(timeline[i].m_end_date.get_string());
|
|
|
|
|
if (test_str2 != timeline[i].m_end_date_str)
|
|
|
|
|
{
|
|
|
|
|
fprintf(stderr, "Date failed roundtrip: %s %s %s\n", timeline[i].m_source_id.c_str(), timeline[i].m_end_date_str.c_str(), test_str2.c_str());
|
|
|
|
|
panic("End date failed sanity check");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else if (timeline[i].m_end_date_str.size())
|
|
|
|
|
panic("Date failed sanity check");
|
|
|
|
|
|
|
|
|
|
if (timeline[i].m_alt_date.is_valid())
|
|
|
|
|
{
|
2023-02-20 17:58:51 -05:00
|
|
|
|
if (!test_date.parse(timeline[i].m_alt_date_str.c_str(), false))
|
2023-02-09 17:26:42 -05:00
|
|
|
|
panic("Date failed sanity check");
|
|
|
|
|
|
|
|
|
|
if (test_date != timeline[i].m_alt_date)
|
|
|
|
|
panic("Date failed sanity check");
|
|
|
|
|
|
|
|
|
|
std::string test_str3(timeline[i].m_alt_date.get_string());
|
|
|
|
|
if (test_str3 != timeline[i].m_alt_date_str)
|
|
|
|
|
{
|
|
|
|
|
fprintf(stderr, "Date failed roundtrip: %s %s %s\n", timeline[i].m_source_id.c_str(), timeline[i].m_alt_date_str.c_str(), test_str3.c_str());
|
|
|
|
|
panic("Alt date failed sanity check");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else if (timeline[i].m_alt_date_str.size())
|
|
|
|
|
panic("Date failed sanity check");
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uprintf("Load success, %zu total events\n", timeline.get_events().size());
|
|
|
|
|
|
|
|
|
|
timeline.sort();
|
|
|
|
|
|
|
|
|
|
// Write majestic.json, then load it and verify that it saved and loaded correctly.
|
|
|
|
|
{
|
|
|
|
|
timeline.set_name("Majestic Timeline");
|
|
|
|
|
timeline.write_file("majestic.json", true);
|
|
|
|
|
|
|
|
|
|
ufo_timeline timeline_comp;
|
|
|
|
|
bool utf8_flag_comp;
|
2023-02-20 17:58:51 -05:00
|
|
|
|
if (!timeline_comp.load_json("majestic.json", utf8_flag_comp, nullptr, false))
|
2023-02-09 17:26:42 -05:00
|
|
|
|
panic("Failed loading majestic.json");
|
|
|
|
|
|
|
|
|
|
if (timeline.get_events().size() != timeline_comp.get_events().size())
|
|
|
|
|
panic("Failed loading timeline events JSON");
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < timeline.get_events().size(); i++)
|
|
|
|
|
if (timeline[i] != timeline_comp[i])
|
|
|
|
|
panic("Failed comparing majestic.json");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
timeline.write_markdown("timeline.md");
|
2023-02-03 13:31:18 -05:00
|
|
|
|
|
|
|
|
|
uprintf("Processing successful\n");
|
|
|
|
|
|
|
|
|
|
return EXIT_SUCCESS;
|
|
|
|
|
}
|