mirror of
https://github.com/richgel999/ufo_data.git
synced 2025-08-11 00:00:32 -04:00
v1.22 update - added find, basic command line parsing, minor fixes to maj timeline data, stable sorting
This commit is contained in:
parent
8e6da6d1b5
commit
4fb5c81d2a
25 changed files with 937182 additions and 1221708 deletions
112
converters.cpp
112
converters.cpp
|
@ -2,6 +2,7 @@
|
|||
// Copyright (C) 2023 Richard Geldreich, Jr.
|
||||
#include "ufojson_core.h"
|
||||
#include "markdown_proc.h"
|
||||
#include <unordered_map>
|
||||
|
||||
#define USE_OPENAI (0)
|
||||
|
||||
|
@ -877,6 +878,42 @@ bool convert_eberhart(unordered_string_set& unique_urls)
|
|||
if (!read_text_file("ufo600_906_2.md", lines, true, nullptr))
|
||||
panic("Can't read file ufo_evidence_hall.txt");
|
||||
|
||||
json eberhart_openai;
|
||||
bool utf8_flag;
|
||||
if (!load_json_object("eberhart_openai.json", utf8_flag, eberhart_openai))
|
||||
panic("Failed loading eberhart_openai.json");
|
||||
|
||||
if (eberhart_openai.find("results") == eberhart_openai.end())
|
||||
panic("Couldn't find results");
|
||||
|
||||
const auto& openai_res = eberhart_openai["results"];
|
||||
if (!openai_res.is_array())
|
||||
panic("Couldn't find results");
|
||||
|
||||
std::unordered_map<uint32_t, std::vector<uint32_t> > openai_res_hash;
|
||||
|
||||
for (uint32_t l = 0; l < openai_res.size(); l++)
|
||||
{
|
||||
const auto& rec = openai_res[l];
|
||||
if (!rec.contains("event_crc32") || !rec.contains("event_date_str") || !rec.contains("locations"))
|
||||
panic("Invalid OpenAI JSON data");
|
||||
|
||||
std::vector<uint32_t> list;
|
||||
list.push_back(l);
|
||||
|
||||
auto res = openai_res_hash.insert(std::make_pair(rec["event_crc32"].get<uint32_t>(), list));
|
||||
if (!res.second)
|
||||
(res.first)->second.push_back(l);
|
||||
}
|
||||
|
||||
string_vec useful_locs;
|
||||
if (!read_text_file("eberhart_useful_locations.txt", useful_locs, true, nullptr))
|
||||
panic("failed reading eberhart_useful_locations");
|
||||
|
||||
unordered_string_set useful_locs_set;
|
||||
for (const auto& str : useful_locs)
|
||||
useful_locs_set.insert(str);
|
||||
|
||||
string_vec trimmed_lines;
|
||||
for (uint32_t i = 0; i < lines.size(); i++)
|
||||
{
|
||||
|
@ -961,6 +998,9 @@ bool convert_eberhart(unordered_string_set& unique_urls)
|
|||
cur_line = 0;
|
||||
|
||||
uint32_t event_num = 0, total_unattributed = 0;
|
||||
uint32_t total_openai_recs_found = 0;
|
||||
|
||||
string_vec location_strs;
|
||||
|
||||
while (cur_line < lines.size())
|
||||
{
|
||||
|
@ -1114,12 +1154,79 @@ bool convert_eberhart(unordered_string_set& unique_urls)
|
|||
|
||||
if (json_alt_date.size())
|
||||
fprintf(pOut_file, " \"alt_date\" : \"%s\",\n", json_alt_date.c_str());
|
||||
|
||||
|
||||
fprintf(pOut_file, " \"desc\" : \"%s\",\n", escape_string_for_json(desc).c_str());
|
||||
fprintf(pOut_file, " \"source_id\" : \"Eberhart_%u\",\n", event_num);
|
||||
|
||||
fprintf(pOut_file, " \"source\" : \"EberhartUFOI\",\n");
|
||||
|
||||
uint32_t hash = crc32((const uint8_t*)desc.c_str(), desc.size());
|
||||
hash = crc32((const uint8_t*)&begin_date.m_year, sizeof(begin_date.m_year), hash);
|
||||
hash = crc32((const uint8_t*)&begin_date.m_month, sizeof(begin_date.m_month), hash);
|
||||
hash = crc32((const uint8_t*)&begin_date.m_day, sizeof(begin_date.m_day), hash);
|
||||
|
||||
auto find_res = openai_res_hash.find(hash);
|
||||
if (find_res != openai_res_hash.end())
|
||||
{
|
||||
const std::vector<uint32_t>& list = find_res->second;
|
||||
|
||||
for (uint32_t l = 0; l < list.size(); l++)
|
||||
{
|
||||
const uint32_t rec_index = list[l];
|
||||
|
||||
const auto& rec = openai_res[rec_index];
|
||||
|
||||
if (!rec.contains("event_crc32") || !rec.contains("event_date_str") || !rec.contains("locations"))
|
||||
panic("Invalid OpenAI JSON data");
|
||||
|
||||
if (rec["event_crc32"] != hash)
|
||||
panic("hash failed");
|
||||
|
||||
if (rec["event_date_str"] != json_date)
|
||||
continue;
|
||||
|
||||
const auto& loc = rec["locations"];
|
||||
if (loc.size())
|
||||
{
|
||||
uint32_t total_useful_locs = 0;
|
||||
for (uint32_t k = 0; k < loc.size(); k++)
|
||||
{
|
||||
if (useful_locs_set.find(loc[k]) != useful_locs_set.end())
|
||||
total_useful_locs++;
|
||||
}
|
||||
|
||||
if (total_useful_locs)
|
||||
{
|
||||
fprintf(pOut_file, " \"location\" : [ ");
|
||||
|
||||
uint32_t total_useful_locs_printed = 0;
|
||||
|
||||
for (uint32_t k = 0; k < loc.size(); k++)
|
||||
{
|
||||
if (useful_locs_set.find(loc[k]) != useful_locs_set.end())
|
||||
{
|
||||
if (total_useful_locs_printed)
|
||||
fprintf(pOut_file, ", ");
|
||||
|
||||
fprintf(pOut_file, "\"%s\"", escape_string_for_json(loc[k]).c_str());
|
||||
|
||||
total_useful_locs_printed++;
|
||||
}
|
||||
else
|
||||
{
|
||||
location_strs.push_back(loc[k]);
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(pOut_file, " ],\n");
|
||||
}
|
||||
}
|
||||
|
||||
total_openai_recs_found++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!ref.size())
|
||||
{
|
||||
fprintf(pOut_file, " \"ref\" : \"[Eberhart](http://www.cufos.org/pdfs/UFOsandIntelligence.pdf)\"\n");
|
||||
|
@ -1139,11 +1246,14 @@ bool convert_eberhart(unordered_string_set& unique_urls)
|
|||
event_num++;
|
||||
}
|
||||
|
||||
write_text_file("rejected_location_strs.txt", location_strs, true);
|
||||
|
||||
fprintf(pOut_file, "] }\n");
|
||||
fclose(pOut_file);
|
||||
|
||||
uprintf("Total records: %u\n", event_num);
|
||||
uprintf("Total unattributed: %u\n", total_unattributed);
|
||||
uprintf("Total OpenAI recs found: %u\n", total_openai_recs_found);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue