From 9ffdf1ac1dac49a0a21589363c4da467b8c5b2c0 Mon Sep 17 00:00:00 2001 From: kornman00 Date: Sat, 20 Jan 2024 14:34:26 -0800 Subject: [PATCH] Changes I've been sitting on since December Change the language standard from MSVC's default stdcpp14 to stdcpp17. Haven't evaluated what issues may or may not be present when going to stdcpp20. Enable string pooling and multiprocessor compilation. Use C++11 `[[fallthrough]]` and `[[maybe_unused]]` attributes. Use std::size for getting lengths of C arrays at compile time. Resave converters.cpp and ufojson_core.cpp as UTF8 with BOM. Address various signed/unsigned warnings. Add nipcap_date_is_year_valid helper to deal with year values coming in as `int` but the constants being `uint32_t` (signed/unsigned mismatches). Fix constructor member initialization order issue in pjson.h. Explicitly handle some cJSONValueType's which have no conversions to silence unhandled enums warnings. Fix missing comma in g_cap_exceptions list. --- converters.cpp | 155 ++++++------ json/json.hpp | 2 + pjson.h | 542 ++++++++++++++++++++++------------------ stem.c | 2 +- udb.cpp | 28 ++- udb_tables.h | 13 +- ufojson.aps | Bin 1260 -> 0 bytes ufojson.cpp | 233 +++++++++-------- ufojson.sln | 7 + ufojson.vcxproj | 11 +- ufojson.vcxproj.filters | 1 + ufojson_core.cpp | 55 ++-- ufojson_core.h | 38 +-- utils.cpp | 122 +++++---- utils.h | 18 +- 15 files changed, 662 insertions(+), 565 deletions(-) delete mode 100644 ufojson.aps diff --git a/converters.cpp b/converters.cpp index 71f866b..5933d5c 100644 --- a/converters.cpp +++ b/converters.cpp @@ -1,4 +1,4 @@ -// converters.cpp +// converters.cpp // Copyright (C) 2023 Richard Geldreich, Jr. #include "ufojson_core.h" #include "markdown_proc.h" @@ -44,15 +44,15 @@ bool convert_magonia(const char* pSrc_filename, const char* pDst_filename, const fputc(UTF8_BOM0, pOut_file); fputc(UTF8_BOM1, pOut_file); fputc(UTF8_BOM2, pOut_file); - + fprintf(pOut_file, "{\n"); fprintf(pOut_file, "\"%s Timeline\" : [\n", pSource_override ? pSource_override : "Magonia"); //const uint32_t TOTAL_RECS = 923; - + uint32_t cur_line = 0; uint32_t rec_index = first_rec_index; - + while (cur_line < lines.size()) { if (!lines[cur_line].size()) @@ -66,7 +66,7 @@ bool convert_magonia(const char* pSrc_filename, const char* pDst_filename, const panic("Out of lines"); std::string first_line(lines[cur_line++]); - + std::string date_str(first_line); if (date_str.size() > TOTAL_COLS) date_str.resize(TOTAL_COLS); @@ -120,7 +120,7 @@ bool convert_magonia(const char* pSrc_filename, const char* pDst_filename, const if (buf.size() < TOTAL_COLS) break; - + if (desc_lines.size() == 1) { if (buf.size() >= TOTAL_COLS) @@ -214,9 +214,9 @@ bool convert_magonia(const char* pSrc_filename, const char* pDst_filename, const int year = -1, month = -1, day = -1; date_prefix_t date_prefix = cNoPrefix; std::string date_suffix; - + std::string temp_date_str(date_str); - + if (string_ends_in(temp_date_str, "'s")) { temp_date_str.resize(temp_date_str.size() - 2); @@ -387,7 +387,7 @@ bool convert_magonia(const char* pSrc_filename, const char* pDst_filename, const { if (date_suffix.size()) panic("Invalid date suffix"); - + fprintf(pOut_file, "%i/%i", month, year); } else @@ -415,7 +415,7 @@ bool convert_magonia(const char* pSrc_filename, const char* pDst_filename, const else fprintf(pOut_file, " \"source_id\" : \"Magonia_%u\",\n", rec_index); - fprintf(pOut_file, u8" \"source\" : \"%s\",\n", pSource_override ? pSource_override : u8"ValleMagonia"); + fprintf(pOut_file, u8" \"source\" : \"%s\",\n", pSource_override ? pSource_override : u8"ValléeMagonia"); if (pType_override) fprintf(pOut_file, " \"type\" : \"%s\"\n", pType_override); @@ -898,7 +898,7 @@ bool convert_dolan(const char *pSrc_filename, const char *pDst_filename, const c panic("Encountered empty line"); if (rec.size() < 54) panic("Line too small"); - + std::string date_str(rec); date_str = string_slice(date_str, 0, 16); string_trim(date_str); @@ -908,13 +908,13 @@ bool convert_dolan(const char *pSrc_filename, const char *pDst_filename, const c rec = string_slice(rec, 52); string_trim(rec); - + fprintf(pOut_file, "{\n"); fprintf(pOut_file, " \"date\" : \"%s\",\n", date_str.c_str()); - + fprintf(pOut_file, " \"location\" : \"%s\",\n", escape_string_for_json(location_str).c_str()); fprintf(pOut_file, " \"desc\" : \"%s\",\n", escape_string_for_json(rec).c_str()); - + if (pType) fprintf(pOut_file, " \"type\" : \"%s\",\n", pType); @@ -923,7 +923,7 @@ bool convert_dolan(const char *pSrc_filename, const char *pDst_filename, const c fprintf(pOut_file, " \"source_id\" : \"%s_%u\",\n", pSource, total_recs); fprintf(pOut_file, " \"source\" : \"%s\"\n", pSource); - + fprintf(pOut_file, "}"); if (cur_line < lines.size()) fprintf(pOut_file, ","); @@ -1052,7 +1052,7 @@ bool convert_eberhart(unordered_string_set& unique_urls) std::vector list; list.push_back(l); - + auto res = openai_res_hash.insert(std::make_pair(rec["event_crc32"].get(), list)); if (!res.second) (res.first)->second.push_back(l); @@ -1174,7 +1174,7 @@ bool convert_eberhart(unordered_string_set& unique_urls) continue; } - size_t dash_pos = line.find(u8""); + size_t dash_pos = line.find(u8"—"); if (dash_pos == std::string::npos) panic("Failed finding dash\n"); @@ -1206,7 +1206,7 @@ bool convert_eberhart(unordered_string_set& unique_urls) if (temp[0] == '#') break; - size_t d = temp.find(u8""); + size_t d = temp.find(u8"—"); const uint32_t DASH_THRESH_POS = 42; if ((d != std::string::npos) && (d < DASH_THRESH_POS)) @@ -1306,7 +1306,7 @@ bool convert_eberhart(unordered_string_set& unique_urls) if (json_alt_date.size()) fprintf(pOut_file, " \"alt_date\" : \"%s\",\n", json_alt_date.c_str()); - + fprintf(pOut_file, " \"desc\" : \"%s\",\n", escape_string_for_json(desc).c_str()); fprintf(pOut_file, " \"source_id\" : \"Eberhart_%u\",\n", event_num); @@ -1359,9 +1359,9 @@ bool convert_eberhart(unordered_string_set& unique_urls) { if (total_useful_locs_printed) fprintf(pOut_file, ", "); - + fprintf(pOut_file, "\"%s\"", escape_string_for_json(loc[k]).c_str()); - + total_useful_locs_printed++; } else @@ -1378,7 +1378,7 @@ bool convert_eberhart(unordered_string_set& unique_urls) break; } } - + if (!ref.size()) { fprintf(pOut_file, " \"ref\" : \"[Eberhart](http://www.cufos.org/pdfs/UFOsandIntelligence.pdf)\"\n"); @@ -1497,7 +1497,7 @@ bool convert_johnson() (string_find_first(l, "Written by Donald Johnson") != -1) || (string_find_first(l, "Written by Donald A Johnson") != -1) || (string_find_first(l, "Compiled from the UFOCAT computer database") != -1) || - (string_find_first(l, u8" Donald A. Johnson") != -1) || + (string_find_first(l, u8"© Donald A. Johnson") != -1) || (string_begins_with(l, "Themes: "))) { found_end = true; @@ -1964,6 +1964,7 @@ static bool test_eberhart_date() return true; } +[[maybe_unused]] // currently unused... static void print_nocr(const std::string& s) { std::string new_string; @@ -1993,8 +1994,8 @@ static void converters_test() uprintf("%s\n", wchar_to_utf8(utf8_to_wchar(blah, CP_ACP)).c_str()); #endif - //fprintf(u8"frightening vision"); - //ufprintf(stderr, u8"frightening vision"); + //fprintf(u8"“frightening vision”"); + //ufprintf(stderr, u8"“frightening vision”"); assert(crc32((const uint8_t*)"TEST", 4) == 0xeeea93b8); assert(crc32((const uint8_t*)"408tdsfjdsfjsdh893!;", 20) == 0xa044e016); if (!test_eberhart_date()) return panic("test_eberhart_date failed!"); @@ -2007,11 +2008,11 @@ static void converters_test() //bufprintf(pIn, "A\nB \nC\n_This is a blah_[XXXX](YYYY(S))"); - //const char* p = u8R"(Chemist [Gustaf Ljunggren](https://www.google.com/url?q=https://en.wikipedia.org/wiki/Gustaf_Ljunggren_(chemist)&sa=D&source=editors&ust=1674889728009134&usg=AOvVaw2v_Cymx15I5Ic1eNEYeeBr)of the Swedish National Defense Research Institute summarizes for the Swedish Defense staff his analysis of 27 finds of mysterious substances, allegedly from ghost rockets. None are radioactive and all have mundane explanations. (Anders Liljegren and Clas Svahn, The Ghost Rockets, UFOs 19471987, Fortean Tomes, 1987, pp. 3334))"; -// const char* p = u8R"(Blah -//English clergyman and philosopher [_John Wilkins_](https://www.google.com/url?q=https://en.wikipedia.org/wiki/John_Wilkins&sa=D&source=editors&ust=1674889727243386&usg=AOvVaw1hw56rPPqRvDJzjdV0g8Zb) writes The Discovery of a World in the Moone, in which he highlights the similarities of the Earth and the Moon (seas, mountains, atmosphere) and concludes that the Moon is likely to be inhabited by living beings, whom the calls Selenites. (Maria Avxentevskaya, [How 17th Century](https://www.google.com/url?q=https://www.realclearscience.com/articles/2017/12/02/how_17th_century_dreamers_planned_to_reach_the_moon_110476.html&sa=D&source=editors&ust=1674889727243765&usg=AOvVaw13_nH4qqo0LYqJqnhq4_eI)[Dreamers Planned to Reach the Moon,](https://www.google.com/url?q=https://www.realclearscience.com/articles/2017/12/02/how_17th_century_dreamers_planned_to_reach_the_moon_110476.html&sa=D&source=editors&ust=1674889727244030&usg=AOvVaw2K5FMN315Pjxq_xO7wp7Ga)

Real Clear Science, December 2, 2017) )"; + //const char* p = u8R"(Chemist [Gustaf Ljunggren](https://www.google.com/url?q=https://en.wikipedia.org/wiki/Gustaf_Ljunggren_(chemist)&sa=D&source=editors&ust=1674889728009134&usg=AOvVaw2v_Cymx15I5Ic1eNEYeeBr) of the Swedish National Defense Research Institute summarizes for the Swedish Defense staff his analysis of 27 finds of mysterious substances, allegedly from ghost rockets. None are radioactive and all have mundane explanations. (Anders Liljegren and Clas Svahn, “The Ghost Rockets,” UFOs 1947–1987, Fortean Tomes, 1987, pp. 33–34))"; +// const char* p = u8R"(Blah +//English clergyman and philosopher [_John Wilkins_](https://www.google.com/url?q=https://en.wikipedia.org/wiki/John_Wilkins&sa=D&source=editors&ust=1674889727243386&usg=AOvVaw1hw56rPPqRvDJzjdV0g8Zb) writes The Discovery of a World in the Moone, in which he highlights the similarities of the Earth and the Moon (seas, mountains, atmosphere) and concludes that the Moon is likely to be inhabited by living beings, whom the calls “Selenites.” (Maria Avxentevskaya, “[How 17th Century](https://www.google.com/url?q=https://www.realclearscience.com/articles/2017/12/02/how_17th_century_dreamers_planned_to_reach_the_moon_110476.html&sa=D&source=editors&ust=1674889727243765&usg=AOvVaw13_nH4qqo0LYqJqnhq4_eI) [Dreamers Planned to Reach the Moon,](https://www.google.com/url?q=https://www.realclearscience.com/articles/2017/12/02/how_17th_century_dreamers_planned_to_reach_the_moon_110476.html&sa=D&source=editors&ust=1674889727244030&usg=AOvVaw2K5FMN315Pjxq_xO7wp7Ga)”

Real Clear Science, December 2, 2017) )"; - //const char* p = u8R"(Pierre Lagrange, [_Agobard, la Magonie et les ovnis_,](https://www.google.com/url?q=https://pierrelagrangesociologie.files.wordpress.com/2020/08/lagrange-agobard-magonie-ufologie-lhistoire-440-2017-10-p28-29.pdf&sa=D&source=editors&ust=1674889727239396&usg=AOvVaw1U01Ykx3tRTQS4QKENJuGi) Actualit, no. 440 (October 2017): 2829; Wikipedia, [Magonia (mythology)](https://www.google.com/url?q=https://en.wikipedia.org/wiki/Magonia_(mythology)&sa=D&source=editors&ust=1674889727239728&usg=AOvVaw0JOQanVKKoRClyKQPK5SJi)))"; + //const char* p = u8R"(Pierre Lagrange, “[_Agobard, la Magonie et les ovnis_,](https://www.google.com/url?q=https://pierrelagrangesociologie.files.wordpress.com/2020/08/lagrange-agobard-magonie-ufologie-lhistoire-440-2017-10-p28-29.pdf&sa=D&source=editors&ust=1674889727239396&usg=AOvVaw1U01Ykx3tRTQS4QKENJuGi)” Actualité, no. 440 (October 2017): 28–29; Wikipedia, “[Magonia (mythology)](https://www.google.com/url?q=https://en.wikipedia.org/wiki/Magonia_(mythology)&sa=D&source=editors&ust=1674889727239728&usg=AOvVaw0JOQanVKKoRClyKQPK5SJi)”))"; const char* p = "
blah
_[Agobard,](www.blah.com)_
blah
blah
[_Agobard_,](www.blah.com)
"; //const char* p = "***[sssss](www.dddd.com)*** _Blah_ *Cool*_Zeek_"; @@ -2103,12 +2104,12 @@ enum cSlashFlag = 256 }; -static const struct +static constexpr struct { const char* m_pStr; uint32_t m_flag; - uint32_t m_month; - date_prefix_t m_date_prefix; + uint32_t m_month = 0; + date_prefix_t m_date_prefix = cNoPrefix; } g_special_phrases[] = { { "january", cMonthFlag, 1 }, @@ -2173,7 +2174,7 @@ static const struct { "/", cSlashFlag } }; -const uint32_t NUM_SPECIAL_PHRASES = sizeof(g_special_phrases) / sizeof(g_special_phrases[0]); +constexpr int NUM_SPECIAL_PHRASES = static_cast(std::size(g_special_phrases)); enum { @@ -2253,12 +2254,18 @@ static int get_special_from_token(int64_t tok) return (int)spec; } -static bool convert_nipcap_date(std::string date, event_date& begin_date, event_date& end_date, event_date& alt_date) +static constexpr bool nipcap_date_is_year_valid( + int year) { - assert(cSpecialTotal == NUM_SPECIAL_PHRASES); - const uint32_t MIN_YEAR = 1860; const uint32_t MAX_YEAR = 2012; + return static_cast(year) >= MIN_YEAR + && static_cast(year) <= MAX_YEAR; +} + +static bool convert_nipcap_date(std::string date, event_date& begin_date, event_date& end_date, event_date& alt_date) +{ + static_assert(cSpecialTotal == NUM_SPECIAL_PHRASES); string_trim(date); @@ -2318,7 +2325,7 @@ static bool convert_nipcap_date(std::string date, event_date& begin_date, event_ int month = convert_hex_digit(date[4]) * 10 + convert_hex_digit(date[5]); int day = convert_hex_digit(date[6]) * 10 + convert_hex_digit(date[7]); - if ((year < MIN_YEAR) || (year > MAX_YEAR)) + if (!nipcap_date_is_year_valid(year)) return false; if (month > 12) @@ -2351,7 +2358,7 @@ static bool convert_nipcap_date(std::string date, event_date& begin_date, event_ return false; } - // Tokenize the input then only parse those cases we explictly support. Everything else is an error. + // Tokenize the input then only parse those cases we explicitly support. Everything else is an error. std::vector tokens; std::vector digits; @@ -2432,7 +2439,7 @@ static bool convert_nipcap_date(std::string date, event_date& begin_date, event_ else if (digits[0] == 4) { year = (int)tokens[0]; - if ((year < MIN_YEAR) || (year > MAX_YEAR)) + if (!nipcap_date_is_year_valid(year)) return false; } else @@ -2462,7 +2469,7 @@ static bool convert_nipcap_date(std::string date, event_date& begin_date, event_ { if (digits[0] == 4) { - // YYMMXX + // YYMMXX int year = 1900 + (int)(tokens[0] / 100); int month = (int)(tokens[0] % 100); @@ -2474,10 +2481,10 @@ static bool convert_nipcap_date(std::string date, event_date& begin_date, event_ } else if (digits[0] == 6) { - // YYYYMMXX + // YYYYMMXX int year = (int)(tokens[0] / 100); - if ((year < MIN_YEAR) || (year > MAX_YEAR)) + if (!nipcap_date_is_year_valid(year)) return false; int month = (int)(tokens[0] % 100); @@ -2505,7 +2512,7 @@ static bool convert_nipcap_date(std::string date, event_date& begin_date, event_ { // YYYYXXXX begin_date.m_year = (int)tokens[0]; - if ((begin_date.m_year < MIN_YEAR) || (begin_date.m_year > MAX_YEAR)) + if (!nipcap_date_is_year_valid(begin_date.m_year)) return false; } else @@ -2555,7 +2562,7 @@ static bool convert_nipcap_date(std::string date, event_date& begin_date, event_ { // YYYYMMDD begin_date.m_year = (int)(tokens[0] / 10000); - if ((begin_date.m_year < MIN_YEAR) || (begin_date.m_year > MAX_YEAR)) + if (!nipcap_date_is_year_valid(begin_date.m_year)) return false; begin_date.m_month = (int)((tokens[0] / 100) % 100); @@ -2577,7 +2584,7 @@ static bool convert_nipcap_date(std::string date, event_date& begin_date, event_ } if ((tokens.size() == 2) && (tokens[1] < 0) && - ((get_special_from_token(tokens[1]) >= cSpecialLate) && (get_special_from_token(tokens[1]) <= cSpecialEnd) || + (((get_special_from_token(tokens[1]) >= cSpecialLate) && (get_special_from_token(tokens[1]) <= cSpecialEnd)) || (get_special_from_token(tokens[1]) == cSpecialMid)) ) { @@ -2649,7 +2656,7 @@ static bool convert_nipcap_date(std::string date, event_date& begin_date, event_ { // YYYYMMDD-YYYYMMDD end_date.m_year = (int)(tokens[2] / 10000); - if ((end_date.m_year < MIN_YEAR) || (end_date.m_year > MAX_YEAR)) + if (!nipcap_date_is_year_valid(end_date.m_year)) return false; end_date.m_month = (int)((tokens[2] / 100) % 100); @@ -3317,7 +3324,7 @@ bool convert_nicap(unordered_string_set& unique_urls) if ((prev_orig_desc.size()) && (orig_desc == prev_orig_desc) && (js["date"] == prev_date)) { - // It's a repeated record, with just a different category. + // It's a repeated record, with just a different category. std::string new_desc(js_doc_array.back()["desc"]); new_desc += string_format(" (NICAP: %s)", g_nicap_categories[cat_index - 1]); @@ -3391,7 +3398,7 @@ bool convert_nuk() { std::string title; string_vec col_titles; - + std::vector rows; bool success = load_column_text("nuktest_usa.txt", rows, title, col_titles, false, "USA"); @@ -3428,9 +3435,9 @@ bool convert_nuk() event.m_locations.push_back(x[cColLat] + " " + x[cColLong]); std::string attr; - + std::string t(string_upper(x[cColType])); - + bool salvo = false; if (string_ends_in(t, "_SALVO")) { @@ -3491,9 +3498,9 @@ bool convert_nuk() panic("Invalid type"); event.m_desc = string_format("Nuclear test: %s. Country: %s", attr.c_str(), x[cColCountry].c_str()); - + if ((x[cColName].size()) && (x[cColName] != "-")) - event.m_desc += string_format(u8" Name: %s", x[cColName].c_str()); + event.m_desc += string_format(u8" Name: “%s”", x[cColName].c_str()); if (x[cColY].size()) event.m_desc += string_format(" Yield: %sKT", x[cColY].c_str()); @@ -3510,13 +3517,13 @@ bool convert_nuk() std::string latitude_dms = get_deg_to_dms(lat) + ((lat <= 0) ? " S" : " N"); std::string longitude_dms = get_deg_to_dms(lon) + ((lon <= 0) ? " W" : " E"); - + event.m_key_value_data.push_back(string_pair("LatLongDMS", latitude_dms + " " + longitude_dms)); } if (x[cColDepth].size()) event.m_key_value_data.push_back(string_pair("NukeDepth", x[cColDepth])); - + if (x[cColMb].size()) event.m_key_value_data.push_back(string_pair("NukeMb", x[cColMb])); @@ -3534,7 +3541,7 @@ bool convert_nuk() event.m_key_value_data.push_back(string_pair("NukeSource", x[cColSource])); event.m_key_value_data.push_back(string_pair("NukeCountry", x[cColCountry])); - + if (x[cColLat].size() && x[cColLong].size()) { event.m_key_value_data.push_back(std::make_pair("LocationLink", string_format("[Google Maps](https://www.google.com/maps/place/%s,%s)", x[cColLat].c_str(), x[cColLong].c_str()))); @@ -3545,9 +3552,9 @@ bool convert_nuk() event.m_source = "NukeExplosions"; event.m_source_id = event.m_source + string_format("_%u", event_id); - + timeline.get_events().push_back(event); - + event_id++; } @@ -3555,7 +3562,7 @@ bool convert_nuk() panic("Empty timeline)"); timeline.set_name("Nuclear Test Timeline"); - + return timeline.write_file("nuclear_tests.json", true); } @@ -3563,7 +3570,7 @@ bool convert_anon() { string_vec lines; bool utf8_flag = false; - + const char* pFilename = "anon_pdf.md"; if (!read_text_file(pFilename, lines, true, &utf8_flag)) panic("Failed reading text file %s", pFilename); @@ -3582,10 +3589,10 @@ bool convert_anon() if (s.size() < 27) panic("Invalid string"); - - //[0x00000026] 0xe2 '' char - //[0x00000027] 0x80 '' char - //[0x00000028] 0x94 '' char + + //[0x00000026] 0xe2 'â' char + //[0x00000027] 0x80 '€' char + //[0x00000028] 0x94 '”' char const int8_t c = -30;// (int8_t)0xE2; size_t dash_pos = s.find_first_of(c); @@ -3794,7 +3801,7 @@ bool convert_anon() break; string_trim(ns); - + line_index++; event_strs.push_back(ns); @@ -4056,13 +4063,13 @@ static int md_convert(const char* pSrc_filename, int year, ufo_timeline& tm) } } - if ((day_index < 0) && ((month_tok_index + 1) < tokens.size())) + if ((day_index < 0) && ((month_tok_index + 1) < static_cast(tokens.size()))) { std::string& suffix_str = tokens[month_tok_index + 1]; if (isdigit(suffix_str[0])) { bool is_time = false; - if ((month_tok_index + 2) < tokens.size()) + if ((month_tok_index + 2) < static_cast(tokens.size())) { is_time = (tokens[month_tok_index + 2] == ":"); } @@ -4203,7 +4210,7 @@ static int md_convert(const char* pSrc_filename, int year, ufo_timeline& tm) std::string ref(string_slice(rec_text, s, l)); - if ((e < rec_text.size()) && ((rec_text[e] == '.') || (rec_text[e] == ']'))) + if ((e < static_cast(rec_text.size())) && ((rec_text[e] == '.') || (rec_text[e] == ']'))) { while (s > 0) { @@ -4214,7 +4221,7 @@ static int md_convert(const char* pSrc_filename, int year, ufo_timeline& tm) } } - if ((e < rec_text.size()) && (rec_text[e] == ']')) + if ((e < static_cast(rec_text.size())) && (rec_text[e] == ']')) { e++; l++; @@ -4335,7 +4342,7 @@ bool convert_rr0() tm.write_file("rr0.json"); uprintf("Processed %u years\n", total_years); - + return total_years >= NUM_EXPECTED_RR0_YEARS; } @@ -4439,7 +4446,7 @@ static bool overmeire_convert(const std::string& in_filename, ufo_timeline& tm) str = string_lower(str); int year = -1, year_tok_index = -1; - for (year_tok_index = 0; year_tok_index < tokens.size(); year_tok_index++) + for (year_tok_index = 0; year_tok_index < static_cast(tokens.size()); year_tok_index++) { int y = atoi(tokens[year_tok_index].c_str()); if ((y > 0) && (y >= first_year) && (y <= last_year)) @@ -4501,13 +4508,13 @@ static bool overmeire_convert(const std::string& in_filename, ufo_timeline& tm) } if ((day_index < 0) && - ((month_tok_index + 1) < tokens.size())) + ((month_tok_index + 1) < static_cast(tokens.size()))) { std::string& suffix_str = tokens[month_tok_index + 1]; if (isdigit(suffix_str[0])) { bool is_time = false; - if ((month_tok_index + 2) < tokens.size()) + if ((month_tok_index + 2) < static_cast(tokens.size())) { is_time = (tokens[month_tok_index + 2] == ":"); } @@ -4642,7 +4649,7 @@ static bool overmeire_convert(const std::string& in_filename, ufo_timeline& tm) evt.m_source = "Overmeire"; evt.m_source_id = string_format("Overmeire_%zu", tm.get_events().size()); evt.m_refs.push_back("[_Mini catalogue chronologique des observations OVNI_, by Godelieve Van Overmeire](https://web.archive.org/web/20060107070423/http://users.skynet.be/sky84985/chrono.html)"); - + std::string trial_date(string_format("#%u", year)); if (cur_date.m_month >= 1) { @@ -4652,7 +4659,7 @@ static bool overmeire_convert(const std::string& in_filename, ufo_timeline& tm) } if (trial_date != strs[0]) evt.m_desc += " (" + string_slice(strs[0], 1) + ")"; - + tm.get_events().push_back(evt); prev_year = year; diff --git a/json/json.hpp b/json/json.hpp index c10acf1..2ae5efe 100644 --- a/json/json.hpp +++ b/json/json.hpp @@ -8168,6 +8168,7 @@ class lexer : public lexer_base } } } + JSON_HEDLEY_FALL_THROUGH; // multi-line comments skip input until */ is read case '*': @@ -8203,6 +8204,7 @@ class lexer : public lexer_base } } } + JSON_HEDLEY_FALL_THROUGH; // unexpected character after reading '/' default: diff --git a/pjson.h b/pjson.h index e90e8ff..dec0c8e 100644 --- a/pjson.h +++ b/pjson.h @@ -39,7 +39,7 @@ namespace pjson class value_variant; struct value_variant_data; struct key_value_t; - + typedef std::vector char_vec_t; typedef std::string string_t; @@ -48,7 +48,7 @@ namespace pjson inline void* pjson_malloc(size_t size) { return malloc(size); } inline void* pjson_realloc(void* p, size_t size) { return realloc(p, size); } inline void pjson_free(void* p) { free(p); } - + // Misc. Helpers template inline void swap(T& l, T& r) { T temp(l); l = r; r = temp; } @@ -68,9 +68,9 @@ namespace pjson static const uint8 s_parse_flags[256]; }; typedef globals_struct<> globals; - + template - const uint8 globals_struct::s_str_serialize_flags[256] = + const uint8 globals_struct::s_str_serialize_flags[256] = { // 0 1 2 3 4 5 6 7 8 9 A B C D E F 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0 @@ -82,19 +82,19 @@ namespace pjson 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 6 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 7 // 128-255 - 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0 }; template - const double globals_struct::s_pow10_table[63] = + const double globals_struct::s_pow10_table[63] = { 1.e-031,1.e-030,1.e-029,1.e-028,1.e-027,1.e-026,1.e-025,1.e-024,1.e-023,1.e-022,1.e-021,1.e-020,1.e-019,1.e-018,1.e-017,1.e-016, 1.e-015,1.e-014,1.e-013,1.e-012,1.e-011,1.e-010,1.e-009,1.e-008,1.e-007,1.e-006,1.e-005,1.e-004,1.e-003,1.e-002,1.e-001,1.e+000, 1.e+001,1.e+002,1.e+003,1.e+004,1.e+005,1.e+006,1.e+007,1.e+008,1.e+009,1.e+010,1.e+011,1.e+012,1.e+013,1.e+014,1.e+015,1.e+016, - 1.e+017,1.e+018,1.e+019,1.e+020,1.e+021,1.e+022,1.e+023,1.e+024,1.e+025,1.e+026,1.e+027,1.e+028,1.e+029,1.e+030,1.e+031 + 1.e+017,1.e+018,1.e+019,1.e+020,1.e+021,1.e+022,1.e+023,1.e+024,1.e+025,1.e+026,1.e+027,1.e+028,1.e+029,1.e+030,1.e+031 }; // bit 0 (1) - set if: \0 cr lf " \ @@ -103,7 +103,7 @@ namespace pjson // bit 3 (8) - set if: 0-9 // bit 4 (0x10) - set if: 0-9 e E . template - const uint8 globals_struct::s_parse_flags[256] = + const uint8 globals_struct::s_parse_flags[256] = { // 0 1 2 3 4 5 6 7 8 9 A B C D E F 7, 4, 4, 4, 4, 4, 4, 4, 4, 4, 7, 4, 4, 7, 4, 4, // 0 @@ -116,9 +116,9 @@ namespace pjson 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 7 // 128-255 - 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0 }; @@ -126,15 +126,15 @@ namespace pjson struct pool_allocator { - inline pool_allocator(uint initial_size = 0, uint min_chunk_size = PJSON_DEFAULT_MIN_CHUNK_SIZE, size_t max_bytes_to_preserve_across_resets = PJSON_DEFAULT_MAX_BYTES_TO_PRESERVE_ACROSS_RESETS) : - m_pActive_chunks(NULL), - m_pFree_chunks(NULL), + inline pool_allocator(uint initial_size = 0, uint min_chunk_size = PJSON_DEFAULT_MIN_CHUNK_SIZE, size_t max_bytes_to_preserve_across_resets = PJSON_DEFAULT_MAX_BYTES_TO_PRESERVE_ACROSS_RESETS) : + m_pActive_chunks(nullptr), + m_pFree_chunks(nullptr), m_total_free_bytes(0), - m_initial_size(initial_size), + m_initial_size(initial_size), m_min_chunk_size(min_chunk_size), - m_cur_grow_size(min_chunk_size), - m_max_to_preserve_across_resets(max_bytes_to_preserve_across_resets) - { + m_max_to_preserve_across_resets(max_bytes_to_preserve_across_resets), + m_cur_grow_size(min_chunk_size) + { if (initial_size) { m_pActive_chunks = static_cast(pjson_malloc(sizeof(chunk) + initial_size)); @@ -154,17 +154,17 @@ namespace pjson { free_chunk_chain(m_pActive_chunks); m_pActive_chunks = NULL; - + free_chunk_chain(m_pFree_chunks); m_pFree_chunks = NULL; m_total_free_bytes = 0; - + m_cur_grow_size = m_min_chunk_size; } inline size_t get_total_free_bytes() const { return m_total_free_bytes; } - + inline uint get_min_chunk_size() const { return m_min_chunk_size; } inline size_t get_max_bytes_to_preserve_across_resets() const { return m_max_to_preserve_across_resets; } @@ -172,7 +172,7 @@ namespace pjson inline void set_max_bytes_to_preserve_across_resets(size_t s) { m_max_to_preserve_across_resets = s; } inline uint get_cur_grow_size() const { return m_cur_grow_size; } - + inline void* Alloc(size_t size) { size = (size + 3) & ~3; @@ -209,7 +209,7 @@ namespace pjson { if (!p) return Alloc(new_size); - + new_size = (new_size + 3) & ~3; cur_size = (cur_size + 3) & ~3; if (new_size == cur_size) @@ -228,7 +228,7 @@ namespace pjson return p; } } - else + else { PJSON_ASSERT(m_pActive_chunks->m_ofs >= (cur_size - new_size)); m_pActive_chunks->m_ofs -= (cur_size - new_size); @@ -249,7 +249,7 @@ namespace pjson { if (!m_pActive_chunks) return; - + chunk* pCur_active_tail = m_pActive_chunks; size_t total_allocated_bytes = 0; for ( ; ; ) @@ -261,7 +261,7 @@ namespace pjson pCur_active_tail = pCur_active_tail->m_pNext; } pCur_active_tail->m_pNext = m_pFree_chunks; - + m_pFree_chunks = m_pActive_chunks; m_pActive_chunks = NULL; @@ -323,8 +323,8 @@ namespace pjson } private: - pool_allocator(const pool_allocator&); - pool_allocator& operator= (const pool_allocator&); + pool_allocator(const pool_allocator&) = delete; + pool_allocator& operator= (const pool_allocator&) = delete; struct chunk { @@ -337,10 +337,11 @@ namespace pjson chunk* m_pFree_chunks; size_t m_total_free_bytes; + [[maybe_unused]] // -Wunused-private-field uint m_initial_size; uint m_min_chunk_size; size_t m_max_to_preserve_across_resets; - + uint m_cur_grow_size; inline void free_chunk_chain(chunk* pChunk) @@ -359,20 +360,20 @@ namespace pjson template struct simple_vector_default_copy_construction_policy { - inline static void copy_construct(void *pDst, const T& init, pool_allocator& alloc) { alloc; new (pDst) T(init); } - inline static void assign(void *pDst, const T& src, pool_allocator& alloc) { alloc; *static_cast(pDst) = src; } + inline static void copy_construct(void *pDst, const T& init, [[maybe_unused]] pool_allocator& alloc) { new (pDst) T(init); } + inline static void assign(void *pDst, const T& src, [[maybe_unused]] pool_allocator& alloc) { *static_cast(pDst) = src; } }; template struct simple_vector_allocator_copy_construction_policy { - inline static void copy_construct(void *pDst, const T& init, pool_allocator& alloc) { alloc; new (pDst) T(init, alloc); } + inline static void copy_construct(void *pDst, const T& init, [[maybe_unused]] pool_allocator& alloc) { new (pDst) T(init, alloc); } inline static void assign(void *pDst, const T& src, pool_allocator& alloc) { static_cast(pDst)->assign(src, alloc); } }; template inline T* construct(T* p) { return new (static_cast(p)) T; } template inline void construct_array(T* p, uint n) { T* q = p + n; for ( ; p != q; ++p) new (static_cast(p)) T; } - + template struct elemental_vector { @@ -381,7 +382,7 @@ namespace pjson typedef const T& const_reference; typedef T* pointer; typedef const T* const_pointer; - + T* m_p; uint32 m_size; }; @@ -393,12 +394,12 @@ namespace pjson inline simple_vector() { construct(); } inline simple_vector(const simple_vector& other, pool_allocator& alloc) { construct(other, alloc); } - + // Manual constructor methods inline void construct() { base::m_p = NULL; base::m_size = 0; } inline void construct(uint size, pool_allocator& alloc) { construct(); enlarge(size, alloc, false); } inline void construct(const T* p, uint size, pool_allocator& alloc) - { + { base::m_size = size; base::m_p = NULL; if (size) @@ -417,17 +418,17 @@ namespace pjson memcpy(base::m_p, p, num_bytes); } } - inline void construct(const simple_vector& other, pool_allocator& alloc) - { - construct(other.m_p, other.m_size, alloc); + inline void construct(const simple_vector& other, pool_allocator& alloc) + { + construct(other.m_p, other.m_size, alloc); } - + inline uint size() const { return base::m_size; } inline uint size_in_bytes() const { return base::m_size * sizeof(T); } - + inline const T& operator[] (uint i) const { PJSON_ASSERT(i < base::m_size); return base::m_p[i]; } inline T& operator[] (uint i) { PJSON_ASSERT(i < base::m_size); return base::m_p[i]; } - + inline const T* get_ptr() const { return base::m_p; } inline T* get_ptr() { return base::m_p; } @@ -435,17 +436,17 @@ namespace pjson inline T* get_ptr(T* pDef) { return base::m_p ? base::m_p : pDef; } inline void clear() { base::m_p = NULL; base::m_size = 0; } - + inline void resize(uint new_size, pool_allocator& alloc) { if (new_size > base::m_size) { grow(new_size, alloc); - + if (UseConstructor) construct_array(base::m_p + base::m_size, new_size - base::m_size); } - + base::m_size = new_size; } @@ -453,9 +454,9 @@ namespace pjson { base::m_size = new_size; } - - inline T* enlarge_no_construct(uint n, pool_allocator& alloc) - { + + inline T* enlarge_no_construct(uint n, pool_allocator& alloc) + { PJSON_ASSERT(n); uint cur_size = base::m_size, new_size = base::m_size + n; grow(new_size, alloc); @@ -464,7 +465,7 @@ namespace pjson } inline T* enlarge(uint n, pool_allocator& alloc) - { + { T* p = enlarge_no_construct(n, alloc); if (UseConstructor) construct_array(p, n); @@ -500,7 +501,7 @@ namespace pjson inline void assign(const T* p, uint n, pool_allocator& alloc) { PJSON_ASSERT(!base::m_p || ((p + n) <= base::m_p) || (p >= (base::m_p + base::m_size))); - + const uint num_to_assign = PJSON_MIN(base::m_size, n); if (num_to_assign) { @@ -520,10 +521,10 @@ namespace pjson } inline void assign(const simple_vector& other, pool_allocator& alloc) - { + { assign(other.m_p, other.m_size, alloc); } - + inline void erase(uint start, uint n) { PJSON_ASSERT((start + n) <= base::m_size); @@ -539,19 +540,19 @@ namespace pjson base::m_size -= n; } - inline void swap(simple_vector& other) - { + inline void swap(simple_vector& other) + { pjson::swap(base::m_p, other.m_p); pjson::swap(base::m_size, other.m_size); } - + inline void grow(uint new_size, pool_allocator& alloc) { if (new_size > base::m_size) base::m_p = static_cast(alloc.Realloc(base::m_p, sizeof(T) * new_size, base::m_size * sizeof(T))); } }; - + enum json_value_type_t { cJSONValueTypeNull = 0, @@ -564,13 +565,13 @@ namespace pjson cJSONValueTypeArray, cJSONValueTypeObject, }; - + // ---- struct value_variant_data typedef simple_vector string_vec_t; typedef simple_vector > key_value_vec_t; typedef simple_vector > value_variant_vec_t; - + #pragma pack(push, 4) struct value_variant_data { @@ -596,16 +597,16 @@ namespace pjson inline const key_value_vec_t& get_object() const { return (const key_value_vec_t&)m_data.m_object; } inline key_value_vec_t& get_object() { return (key_value_vec_t&)m_data.m_object; } }; - + // ---- struct key_value_t struct key_value_t { inline key_value_t() { } inline key_value_t(const key_value_t& other, pool_allocator& alloc); - + inline void assign(const key_value_t& src, pool_allocator& alloc); - + inline const string_vec_t& get_key() const { return m_key; } inline string_vec_t& get_key() { return m_key; } @@ -618,7 +619,7 @@ namespace pjson #pragma pack(pop) // ---- class char_vector_print_helper - + class char_vector_print_helper { char_vector_print_helper(const char_vector_print_helper&); @@ -630,7 +631,7 @@ namespace pjson inline void resize(size_t new_size) { m_buf.resize(new_size); } inline size_t size() const { return m_buf.size(); } inline char* get_ptr() const { return &m_buf[0]; } - + inline const char_vec_t& get_buf() const { return m_buf; } inline char_vec_t& get_buf() { return m_buf; } @@ -641,14 +642,14 @@ namespace pjson void print_escaped(const string_vec_t& str) { const char* pStr = str.m_p; - uint len = str.m_size; len; - + [[maybe_unused]] uint len = str.m_size; + static const char* s_to_hex = "0123456789abcdef"; print_char('\"'); while (*pStr) { uint8 c = *pStr++; - if ((c >= ' ') && (c != '\"') && (c != '\\')) + if ((c >= ' ') && (c != '\"') && (c != '\\')) print_char(c); else { @@ -668,7 +669,7 @@ namespace pjson } print_char('\"'); } - + private: char_vec_t& m_buf; }; @@ -686,7 +687,7 @@ namespace pjson inline void resize(size_t new_size) { PJSON_ASSERT(new_size <= (size_t)(m_pEnd - m_pStart)); m_pDst = m_pStart + new_size; } inline size_t size() const { return m_pDst - m_pStart; } inline char* get_ptr() const { return m_pStart; } - + inline void puts(const char* pStr, size_t l) { memcpy(m_pDst, pStr, l = PJSON_MIN(l, (size_t)(m_pEnd - m_pDst))); m_pDst += l; } inline void print_tabs(size_t n) { n = PJSON_MIN(n, (size_t)(m_pEnd - m_pDst)); memset(m_pDst, '\t', n); m_pDst += n; } inline void print_char(char c) { if (m_pDst < m_pEnd) *m_pDst++ = c; } @@ -699,10 +700,10 @@ namespace pjson char* pDst = m_pDst; char* pEnd = m_pEnd; uint len = str.m_size; - + // If len!=0, it includes the terminating null, so this expression is conservative. if (static_cast(pEnd - pDst) < (len + 2)) { m_pDst = pEnd; return; } - + *pDst++ = '\"'; uint8 c = 0; if (pStr) c = pStr[0]; @@ -713,13 +714,13 @@ namespace pjson pDst[2] = c; c = pStr[3]; if (globals::s_str_serialize_flags[c]) { pStr += 3, pDst += 3; break; } pDst[3] = c; c = pStr[4]; pStr += 4, pDst += 4; } - + while (c) { if ((pEnd - pDst) < 7) - { - m_pDst = pEnd; - return; + { + m_pDst = pEnd; + return; } if (!globals::s_str_serialize_flags[c]) *pDst++ = c; @@ -728,7 +729,7 @@ namespace pjson pDst[0] = '\\'; switch (c) { - case '\b': pDst[1] = 'b'; break; + case '\b': pDst[1] = 'b'; break; case '\r': pDst[1] = 'r'; break; case '\t': pDst[1] = 't'; break; case '\f': pDst[1] = 'f'; break; @@ -767,7 +768,7 @@ namespace pjson }; // ---- class value_variant - + #pragma pack(push, 4) class value_variant : public value_variant_data { @@ -784,12 +785,12 @@ namespace pjson inline value_variant(uint32 nVal) { m_type = cJSONValueTypeInt; m_data.m_nVal = nVal; } inline value_variant(int64 nVal) { m_type = cJSONValueTypeInt; m_data.m_nVal = nVal; } inline value_variant(double flVal) { m_type = cJSONValueTypeDouble; m_data.m_flVal = flVal; } - + inline value_variant(const char* pStr, pool_allocator& alloc) - { + { m_type = cJSONValueTypeString; if (!pStr) pStr = ""; - get_string().construct(pStr, static_cast(strlen(pStr)) + 1, alloc); + get_string().construct(pStr, static_cast(strlen(pStr)) + 1, alloc); } inline value_variant(json_value_type_t type) @@ -834,7 +835,7 @@ namespace pjson inline bool is_object_or_array() const { return m_type >= cJSONValueTypeArray; } inline bool is_object() const { return m_type == cJSONValueTypeObject; } inline bool is_array() const { return m_type == cJSONValueTypeArray; } - + inline void clear() { set_to_null(); } inline void assume_ownership(value_variant& src_val) { set_to_null(); swap(src_val); } @@ -850,9 +851,9 @@ namespace pjson inline value_variant& set(int64 nVal) { m_data.m_nVal = nVal; m_type = cJSONValueTypeInt; return *this; } inline value_variant& set(uint32 nVal) { set(static_cast(nVal)); return *this; } inline value_variant& set(double flVal) { m_data.m_flVal = flVal; m_type = cJSONValueTypeDouble; return *this; } - - inline value_variant& set(const char* pStr, pool_allocator& alloc) - { + + inline value_variant& set(const char* pStr, pool_allocator& alloc) + { if (!pStr) pStr = ""; uint l = static_cast(strlen(pStr)) + 1; if (!is_string()) @@ -866,7 +867,7 @@ namespace pjson } inline value_variant& set_assume_ownership(char* pStr, uint len) - { + { m_type = cJSONValueTypeString; string_vec_t& str = get_string(); str.m_p = pStr; @@ -874,8 +875,8 @@ namespace pjson return *this; } - inline value_variant& set(const value_variant* pVals, uint n, pool_allocator& alloc) - { + inline value_variant& set(const value_variant* pVals, uint n, pool_allocator& alloc) + { if (!is_array()) { m_type = cJSONValueTypeArray; @@ -886,8 +887,8 @@ namespace pjson return *this; } - inline value_variant& set_assume_ownership(value_variant* pVals, uint n) - { + inline value_variant& set_assume_ownership(value_variant* pVals, uint n) + { m_type = cJSONValueTypeArray; value_variant_vec_t& arr = get_array(); arr.m_p = pVals; @@ -895,8 +896,8 @@ namespace pjson return *this; } - inline value_variant& set(const key_value_t* pKey_values, uint n, pool_allocator& alloc) - { + inline value_variant& set(const key_value_t* pKey_values, uint n, pool_allocator& alloc) + { if (!is_object()) { m_type = cJSONValueTypeObject; @@ -907,8 +908,8 @@ namespace pjson return *this; } - inline value_variant& set_assume_ownership(key_value_t* pKey_values, uint n) - { + inline value_variant& set_assume_ownership(key_value_t* pKey_values, uint n) + { m_type = cJSONValueTypeObject; key_value_vec_t& obj = get_object(); obj.m_p = pKey_values; @@ -928,14 +929,14 @@ namespace pjson inline bool get_numeric_value(float& val, float def = 0.0f) const { if (is_double()) { val = static_cast(m_data.m_flVal); return true; } else return convert_to_float(val, def); } inline bool get_numeric_value(double& val, double def = 0.0f) const { if (is_double()) { val = m_data.m_flVal; return true; } else return convert_to_double(val, def); } inline bool get_string_value(string_t& val, const char* pDef = "") const { if (is_string()) { val = get_string_ptr(); return true; } else return convert_to_string(val, pDef); } - + inline bool as_bool(bool def = false) const { bool result; get_bool_value(result, def); return result; } inline int as_int32(int32 def = 0) const { int32 result; get_numeric_value(result, def); return result; } inline int64 as_int64(int64 def = 0) const { int64 result; get_numeric_value(result, def); return result; } inline float as_float(float def = 0.0f) const { float result; get_numeric_value(result, def); return result; } inline double as_double(double def = 0.0f) const { double result; get_numeric_value(result, def); return result; } - // Returns value as a string, or the default string if the value cannot be converted. + // Returns value as a string, or the default string if the value cannot be converted. inline string_t as_string(const char* pDef = "") const { string_t result; get_string_value(result, pDef); return result; } // Returns pointer to null terminated string or NULL if the value is not a string. @@ -980,7 +981,7 @@ namespace pjson PJSON_ASSERT(0); return -1; } - + //const uint n = get_array().size(); const uint n = get_object().size(); const key_value_vec_t &obj = get_object(); @@ -997,47 +998,47 @@ namespace pjson return find_key(pName) >= 0; } - inline bool find_bool(const char *pName, bool def = false) const - { + inline bool find_bool(const char *pName, bool def = false) const + { int index = find_key(pName); return (index < 0) ? def : get_object()[index].get_value().as_bool(def); } - - inline int find_int32(const char *pName, int32 def = 0) const - { + + inline int find_int32(const char *pName, int32 def = 0) const + { int index = find_key(pName); return (index < 0) ? def : get_object()[index].get_value().as_int32(def); } - - inline int64 find_int64(const char *pName, int64 def = 0) const - { + + inline int64 find_int64(const char *pName, int64 def = 0) const + { int index = find_key(pName); return (index < 0) ? def : get_object()[index].get_value().as_int64(def); } - - inline float find_float(const char *pName, float def = 0.0f) const - { + + inline float find_float(const char *pName, float def = 0.0f) const + { int index = find_key(pName); return (index < 0) ? def : get_object()[index].get_value().as_float(def); } - inline double find_double(const char *pName, double def = 0.0f) const - { + inline double find_double(const char *pName, double def = 0.0f) const + { int index = find_key(pName); return (index < 0) ? def : get_object()[index].get_value().as_double(def); } - - inline const char* find_string_ptr(const char *pName, const char *pDef = "") const - { + + inline const char* find_string_ptr(const char *pName, const char *pDef = "") const + { int index = find_key(pName); - if (index < 0) + if (index < 0) return pDef; const char *p = get_object()[index].get_value().as_string_ptr(); return p ? p : pDef; } inline std::string find_string_obj(const char* pName, const char* pDef = "") const { return find_string_ptr(pName, pDef); } - + inline value_variant& get_value_at_index(uint index) { PJSON_ASSERT(is_object_or_array()); return is_object() ? get_object()[index].get_value() : get_array()[index]; } inline const value_variant& get_value_at_index(uint index) const { PJSON_ASSERT(is_object_or_array()); return is_object() ? get_object()[index].get_value() : get_array()[index]; } @@ -1047,7 +1048,7 @@ namespace pjson inline json_value_type_t get_value_type_at_index(uint index) const { return get_value_at_index(index).get_type(); } inline bool is_child_at_index(uint index) const { return get_value_type_at_index(index) >= cJSONValueTypeArray; } - + inline bool has_children() const { if (is_object()) @@ -1068,7 +1069,7 @@ namespace pjson } return false; } - + inline void clear_object_or_array() { PJSON_ASSERT(is_object_or_array()); @@ -1089,7 +1090,7 @@ namespace pjson inline void set_key_name_at_index(uint index, const char *pKey, uint key_len, pool_allocator& alloc) { - PJSON_ASSERT(is_object()); + PJSON_ASSERT(is_object()); string_vec_t& str = get_object()[index].get_key(); str.assign(pKey, key_len + 1, alloc); } @@ -1098,7 +1099,7 @@ namespace pjson { set_key_name_at_index(index, pKey, static_cast(strlen(pKey)) + 1, alloc); } - + inline value_variant& add_key_value(const char* pKey, uint key_len, const value_variant& val, pool_allocator& alloc) { PJSON_ASSERT(is_object()); @@ -1120,7 +1121,7 @@ namespace pjson get_array().enlarge_no_construct(1, alloc)->construct(val, alloc); return *this; } - + bool serialize(char* pBuf, size_t buf_size, size_t* pSize = NULL, bool formatted = true, bool null_terminate = true) const { serialize_helper helper(pBuf, buf_size); @@ -1136,7 +1137,7 @@ namespace pjson serialize_internal(helper, formatted, null_terminate, 0); return true; } - + protected: // Manual constructor inline void construct(json_value_type_t type) @@ -1191,6 +1192,14 @@ namespace pjson val = (atof(get_string_ptr()) != 0.0f); return true; } + + case cJSONValueTypeNull: + case cJSONValueTypeArray: + case cJSONValueTypeObject: + { + // no conversion + break; + } } val = def; return false; @@ -1236,13 +1245,24 @@ namespace pjson return true; } double flVal = floor(atof(get_string_ptr())); - if ((flVal >= std::numeric_limits::min()) && (flVal <= std::numeric_limits::max())) + // Before max was casted to double, this was tripping under clang: + // implicit conversion from 'long long' to 'double' changes value from 9223372036854775807 to 9223372036854775808 [-Wimplicit-const-int-float-conversion] + if ((flVal >= std::numeric_limits::min()) && + (flVal <= static_cast(std::numeric_limits::max()))) { val = static_cast(flVal); return true; } break; } + + case cJSONValueTypeNull: + case cJSONValueTypeArray: + case cJSONValueTypeObject: + { + // no conversion + break; + } } val = def; return false; @@ -1278,6 +1298,14 @@ namespace pjson val = static_cast(atof(get_string_ptr())); return true; } + + case cJSONValueTypeNull: + case cJSONValueTypeArray: + case cJSONValueTypeObject: + { + // no conversion + break; + } } val = def; return false; @@ -1313,11 +1341,19 @@ namespace pjson val = atof(get_string_ptr()); return true; } + + case cJSONValueTypeNull: + case cJSONValueTypeArray: + case cJSONValueTypeObject: + { + // no conversion + break; + } } val = def; return false; } - + inline bool convert_to_string(char* pBuf, size_t buf_size) const { switch (m_type) @@ -1339,22 +1375,22 @@ namespace pjson { char* pDst = pBuf; int64 n = m_data.m_nVal; - + uint64 s = static_cast(n >> 63); *pDst = '-'; pDst -= s; n = (n ^ s) - s; - + char* pLeft = pDst; - do + do { *pDst++ = '0' + (n % 10); n /= 10; } while (n); - + *pDst = '\0'; - + do { char c = *--pDst; @@ -1367,6 +1403,14 @@ namespace pjson { return 0 == _gcvt_s(pBuf, buf_size, m_data.m_flVal, 15); } + + case cJSONValueTypeString: + case cJSONValueTypeArray: + case cJSONValueTypeObject: + { + // no conversion + break; + } } return false; } @@ -1395,7 +1439,7 @@ namespace pjson { char buf[64]; const uint size = get_array().size(); - + if (!size) { static const char* g_empty_object_strs[4] = { "[]", "[ ]", "{}", "{ }" }; @@ -1409,7 +1453,7 @@ namespace pjson out.puts("[ ", 2); const uint cMaxLineLen = 100; - + for (uint i = 0; i < size; i++) { const value_variant& child_val = get_value_at_index(i); @@ -1494,7 +1538,7 @@ namespace pjson if (formatted) out.print_char('\n'); } - else + else { if (is_string()) out.print_escaped(get_string()); @@ -1508,7 +1552,7 @@ namespace pjson if (null_terminate) out.print_char('\0'); } - + template value_variant(T*); template value_variant(const T*); template value_variant& operator= (T*); @@ -1516,20 +1560,20 @@ namespace pjson }; #pragma pack(pop) - inline key_value_t::key_value_t(const key_value_t& other, pool_allocator& alloc) : + inline key_value_t::key_value_t(const key_value_t& other, pool_allocator& alloc) : m_key(other.get_key(), alloc) - { + { get_value().construct(other.get_value(), alloc); } - inline void key_value_t::assign(const key_value_t& src, pool_allocator& alloc) - { - get_key().assign(src.get_key(), alloc); - get_value().assign(src.get_value(), alloc); + inline void key_value_t::assign(const key_value_t& src, pool_allocator& alloc) + { + get_key().assign(src.get_key(), alloc); + get_value().assign(src.get_value(), alloc); } // ---- class error_info - + class error_info { public: @@ -1541,11 +1585,11 @@ namespace pjson }; // ---- class growable_stack - + class growable_stack { public: - inline growable_stack(uint initial_size) : + inline growable_stack(uint initial_size) : m_pBuf(NULL), m_size(initial_size), m_ofs(0) @@ -1581,11 +1625,11 @@ namespace pjson const size_t bytes_needed = sizeof(T) * num; T* pResult = reinterpret_cast(m_pBuf + m_ofs); m_ofs += bytes_needed; - + if (m_ofs > m_size) { m_ofs -= bytes_needed; - + m_size = PJSON_MAX(1, m_size * 2); while(m_size <= (m_ofs + bytes_needed)) m_size *= 2; @@ -1594,7 +1638,7 @@ namespace pjson pResult = reinterpret_cast(m_pBuf + m_ofs); m_ofs += bytes_needed; } - + PJSON_ASSERT(m_ofs <= m_size); return pResult; } @@ -1607,7 +1651,7 @@ namespace pjson m_ofs -= bytes_needed; return reinterpret_cast(m_pBuf + m_ofs); } - + private: uint8* m_pBuf; size_t m_size; @@ -1615,7 +1659,7 @@ namespace pjson }; // ---- class document - + class document : public value_variant { document(const document&); @@ -1648,14 +1692,14 @@ namespace pjson m_parse_stats.clear(); #endif } - + // The buffer must be null terminated, and must stay resident in memory as long as this document lasts. The buffer will be modified. bool deserialize_in_place(char* pStr) { return deserialize_start((uint8*)pStr); } - -#if PJSON_PARSE_STATS + +#if PJSON_PARSE_STATS struct parse_stats_t { size_t m_num_string, m_num_string_chars; @@ -1676,7 +1720,7 @@ namespace pjson #endif const error_info& get_error_info() const { return m_error_info; } - + private: pool_allocator m_allocator; uint m_initial_stack_size; @@ -1684,15 +1728,15 @@ namespace pjson error_info m_error_info; const uint8* m_pStart; const uint8* m_pStr; - - inline bool set_error(const uint8* pStr, const char* pMsg) - { - m_pStr = pStr; - m_error_info.set(m_pStr - m_pStart, pMsg); - return false; + + inline bool set_error(const uint8* pStr, const char* pMsg) + { + m_pStr = pStr; + m_error_info.set(m_pStr - m_pStart, pMsg); + return false; } -#if PJSON_PARSE_STATS +#if PJSON_PARSE_STATS parse_stats_t m_parse_stats; #endif @@ -1703,7 +1747,7 @@ namespace pjson #define PJSON_INCREMENT_STAT(x) do { } while(0) #define PJSON_UPDATE_STAT(x, n) do { } while(0) #endif - + #define PJSON_SKIP_WHITESPACE \ while (globals::s_parse_flags[*pStr] & 4) \ { \ @@ -1726,7 +1770,7 @@ namespace pjson { if ((c == ' ') || (c == '\t')) { - do + do { PJSON_INCREMENT_STAT(m_num_whitespace_chars); } while (*++p == c); @@ -1751,27 +1795,27 @@ namespace pjson } return p; } - + bool deserialize_internal() { static const uint8 g_utf8_first_byte[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; - + m_stack.reset(); memcpy(m_stack.push(1), static_cast(this), sizeof(value_variant)); const uint8* pStr = ++m_pStr; PJSON_UPDATE_STAT(m_num_control, 1); - + bool cur_is_object = is_object(); uint8 cur_end_char = get_end_char(); uint cur_num_elements = 0; - + for ( ; ; ) { PJSON_SKIP_WHITESPACE; uint8 c = *pStr; - + if (c == ',') { if (!cur_num_elements) @@ -1784,12 +1828,12 @@ namespace pjson } else if ((cur_num_elements) && (c != cur_end_char)) return set_error(pStr, "Expected comma or object/array end character"); - + while (c == cur_end_char) { PJSON_UPDATE_STAT(m_num_control, 1); ++pStr; - + for ( ; ; ) { uint n = cur_num_elements, num_bytes = cur_num_elements * (cur_is_object ? sizeof(key_value_t) : sizeof(value_variant)); @@ -1797,7 +1841,7 @@ namespace pjson PJSON_INCREMENT_STAT(m_num_value_pop); PJSON_UPDATE_STAT(m_value_pop_bytes, num_bytes); - // The top of the stack (after popping the current array/object) could contain either a value_variant (if cur_is_object is set), + // The top of the stack (after popping the current array/object) could contain either a value_variant (if cur_is_object is set), // or a key_value_t, which ends in a value_variant. So all we need to do is look at the very end, which always has a value_variant. value_variant* pCur_variant = m_stack.get_top_obj(); @@ -1805,12 +1849,12 @@ namespace pjson cur_is_object = (arr.m_p != NULL); cur_end_char = cur_is_object ? '}' : ']'; cur_num_elements = arr.m_size; - + arr.m_size = n; arr.m_p = NULL; if (num_bytes) memcpy(arr.m_p = static_cast(m_allocator.Alloc(num_bytes)), pSrc, num_bytes); - + if (m_stack.get_ofs() <= sizeof(value_variant)) { PJSON_ASSERT(m_stack.get_ofs() == sizeof(value_variant)); @@ -1818,20 +1862,20 @@ namespace pjson m_pStr = pStr; return true; } - - PJSON_SKIP_WHITESPACE; + + PJSON_SKIP_WHITESPACE; if (*pStr == ',') { PJSON_UPDATE_STAT(m_num_control, 1); ++pStr; - + PJSON_SKIP_WHITESPACE; c = *pStr; break; } - + if (*pStr++ != cur_end_char) return set_error(pStr, "Unexpected character within object or array"); PJSON_UPDATE_STAT(m_num_control, 1); @@ -1841,14 +1885,14 @@ namespace pjson ++cur_num_elements; value_variant* pChild_variant; - + if (!cur_is_object) pChild_variant = m_stack.push(1); else { if (c != '\"') return set_error(pStr, "Expected quoted key string"); - + ++pStr; PJSON_INCREMENT_STAT(m_num_string); PJSON_INCREMENT_STAT(m_num_string_chars); uint8* pBuf = (uint8*)pStr; @@ -1868,7 +1912,7 @@ namespace pjson uint8* pDst = (uint8*)pStr - 1; - if (c != '\"') PJSON_INCREMENT_STAT(m_num_escape_breaks); + if (c != '\"') PJSON_INCREMENT_STAT(m_num_escape_breaks); while (c != '\"') { @@ -1878,7 +1922,7 @@ namespace pjson c = *pStr++; PJSON_INCREMENT_STAT(m_num_string_chars); if (c == 'u') { - PJSON_INCREMENT_STAT(m_num_unicode_escapes); + PJSON_INCREMENT_STAT(m_num_unicode_escapes); uint u = 0; for (uint i = 0; i < 4; i++) { @@ -1898,10 +1942,10 @@ namespace pjson pDst += len; uint8* q = pDst; - switch (len) + switch (len) { - case 3: *--q = static_cast((u | 0x80) & 0xBF); u >>= 6; // falls through - case 2: *--q = static_cast((u | 0x80) & 0xBF); u >>= 6; // falls through + case 3: *--q = static_cast((u | 0x80) & 0xBF); u >>= 6; [[fallthrough]]; + case 2: *--q = static_cast((u | 0x80) & 0xBF); u >>= 6; [[fallthrough]]; case 1: *--q = static_cast(u | g_utf8_first_byte[len]); } } @@ -1925,26 +1969,26 @@ namespace pjson c = *pStr++; PJSON_INCREMENT_STAT(m_num_string_chars); while (!(globals::s_parse_flags[c] & 1)) { - pDst[0] = c; + pDst[0] = c; c = pStr[0]; if (globals::s_parse_flags[c] & 1) { ++pDst; ++pStr; PJSON_INCREMENT_STAT(m_num_string_chars); break; } - pDst[1] = c; + pDst[1] = c; c = pStr[1]; if (globals::s_parse_flags[c] & 1) { pDst += 2; pStr += 2; PJSON_UPDATE_STAT(m_num_string_chars, 2); break; } - pDst[2] = c; + pDst[2] = c; c = pStr[2]; if (globals::s_parse_flags[c] & 1) { pDst += 3; pStr += 3; PJSON_UPDATE_STAT(m_num_string_chars, 3); break; } - pDst[3] = c; + pDst[3] = c; pDst += 4; c = pStr[3]; pStr += 4; PJSON_UPDATE_STAT(m_num_string_chars, 4); } } - + *pDst++ = '\0'; key_value_t* pKey_value = m_stack.push(1); pChild_variant = &pKey_value->get_value(); pKey_value->get_key().m_p = (char*)pBuf; pKey_value->get_key().m_size = static_cast(pDst - pBuf); - + PJSON_SKIP_WHITESPACE; if (*pStr != ':') @@ -1952,10 +1996,10 @@ namespace pjson ++pStr; PJSON_INCREMENT_STAT(m_num_control); PJSON_SKIP_WHITESPACE; - + c = *pStr; } - + switch (c) { case '{': @@ -1966,7 +2010,7 @@ namespace pjson pChild_variant->m_type = (c == '{') ? cJSONValueTypeObject : cJSONValueTypeArray; pChild_variant->m_data.m_object.m_size = cur_num_elements; pChild_variant->m_data.m_object.m_p = (key_value_t*)cur_is_object; - + cur_is_object = (c == '{'); cur_num_elements = 0; cur_end_char = c + 2; @@ -1975,9 +2019,9 @@ namespace pjson case '\"': { ++pStr; PJSON_INCREMENT_STAT(m_num_string); PJSON_INCREMENT_STAT(m_num_string_chars); - + uint8* pBuf = (uint8*)pStr; - + c = *pStr++; PJSON_INCREMENT_STAT(m_num_string_chars); if (!(globals::s_parse_flags[c] & 1)) { @@ -1993,17 +2037,17 @@ namespace pjson uint8* pDst = (uint8*)pStr - 1; - if (c != '\"') PJSON_INCREMENT_STAT(m_num_escape_breaks); + if (c != '\"') PJSON_INCREMENT_STAT(m_num_escape_breaks); while (c != '\"') { if (globals::s_parse_flags[c] & 2) return set_error(pStr, "Missing end quote"); - + c = *pStr++; PJSON_INCREMENT_STAT(m_num_string_chars); if (c == 'u') { - PJSON_INCREMENT_STAT(m_num_unicode_escapes); + PJSON_INCREMENT_STAT(m_num_unicode_escapes); uint u = 0; for (uint i = 0; i < 4; i++) { @@ -2023,10 +2067,10 @@ namespace pjson pDst += len; uint8* q = pDst; - switch (len) + switch (len) { - case 3: *--q = static_cast((u | 0x80) & 0xBF); u >>= 6; // falls through - case 2: *--q = static_cast((u | 0x80) & 0xBF); u >>= 6; // falls through + case 3: *--q = static_cast((u | 0x80) & 0xBF); u >>= 6; [[fallthrough]]; + case 2: *--q = static_cast((u | 0x80) & 0xBF); u >>= 6; [[fallthrough]]; case 1: *--q = static_cast(u | g_utf8_first_byte[len]); } } @@ -2050,16 +2094,16 @@ namespace pjson c = *pStr++; PJSON_INCREMENT_STAT(m_num_string_chars); while (!(globals::s_parse_flags[c] & 1)) { - pDst[0] = c; + pDst[0] = c; c = pStr[0]; if (globals::s_parse_flags[c] & 1) { ++pDst; ++pStr; PJSON_INCREMENT_STAT(m_num_string_chars); break; } - pDst[1] = c; + pDst[1] = c; c = pStr[1]; if (globals::s_parse_flags[c] & 1) { pDst += 2; pStr += 2; PJSON_UPDATE_STAT(m_num_string_chars, 2); break; } - pDst[2] = c; + pDst[2] = c; c = pStr[2]; if (globals::s_parse_flags[c] & 1) { pDst += 3; pStr += 3; PJSON_UPDATE_STAT(m_num_string_chars, 3); break; } - pDst[3] = c; + pDst[3] = c; pDst += 4; c = pStr[3]; - pStr += 4; PJSON_UPDATE_STAT(m_num_string_chars, 4); + pStr += 4; PJSON_UPDATE_STAT(m_num_string_chars, 4); } } @@ -2073,11 +2117,11 @@ namespace pjson break; } - case 'n': + case 'n': { if ((pStr[1] == 'u') && (pStr[2] == 'l') && (pStr[3] == 'l')) { - pStr += 4; PJSON_UPDATE_STAT(m_num_bool_chars, 4); + pStr += 4; PJSON_UPDATE_STAT(m_num_bool_chars, 4); pChild_variant->construct(cJSONValueTypeNull); } else @@ -2088,7 +2132,7 @@ namespace pjson { if ((pStr[1] == 'r') && (pStr[2] == 'u') && (pStr[3] == 'e')) { - pStr += 4; PJSON_UPDATE_STAT(m_num_bool_chars, 4); + pStr += 4; PJSON_UPDATE_STAT(m_num_bool_chars, 4); pChild_variant->construct(cJSONValueTypeBool); pChild_variant->m_data.m_nVal = 1; } @@ -2100,7 +2144,7 @@ namespace pjson { if ((pStr[1] == 'a') && (pStr[2] == 'l') && (pStr[3] == 's') && (pStr[4] == 'e')) { - pStr += 5; PJSON_UPDATE_STAT(m_num_bool_chars, 5); + pStr += 5; PJSON_UPDATE_STAT(m_num_bool_chars, 5); pChild_variant->construct(cJSONValueTypeBool); } else @@ -2110,37 +2154,37 @@ namespace pjson case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '-': case '.': { - PJSON_INCREMENT_STAT(m_num_numeric); - if (c == '-') PJSON_INCREMENT_STAT(m_num_numeric_chars); - + PJSON_INCREMENT_STAT(m_num_numeric); + if (c == '-') PJSON_INCREMENT_STAT(m_num_numeric_chars); + uint32 n32 = 0; int is_neg = (c == '-'); c = *(pStr += is_neg); - - if (globals::s_parse_flags[c] & 8) - { - n32 = c - '0'; c = *++pStr; PJSON_UPDATE_STAT(m_num_numeric_chars, 1); - if (globals::s_parse_flags[c] & 8) - { - n32 = (n32 * 10U) + (c - '0'); c = *++pStr; PJSON_UPDATE_STAT(m_num_numeric_chars, 1); - if (globals::s_parse_flags[c] & 8) - { - n32 = (n32 * 10U) + (c - '0'); c = *++pStr; PJSON_UPDATE_STAT(m_num_numeric_chars, 1); - if (globals::s_parse_flags[c] & 8) - { - n32 = (n32 * 10U) + (c - '0'); c = *++pStr; PJSON_UPDATE_STAT(m_num_numeric_chars, 1); - if (globals::s_parse_flags[c] & 8) - { - n32 = (n32 * 10U) + (c - '0'); c = *++pStr; PJSON_UPDATE_STAT(m_num_numeric_chars, 1); - if (globals::s_parse_flags[c] & 8) - { - n32 = (n32 * 10U) + (c - '0'); c = *++pStr; PJSON_UPDATE_STAT(m_num_numeric_chars, 1); - if (globals::s_parse_flags[c] & 8) - { - n32 = (n32 * 10U) + (c - '0'); c = *++pStr; PJSON_UPDATE_STAT(m_num_numeric_chars, 1); - if (globals::s_parse_flags[c] & 8) - { - n32 = (n32 * 10U) + (c - '0'); c = *++pStr; PJSON_UPDATE_STAT(m_num_numeric_chars, 1); + + if (globals::s_parse_flags[c] & 8) + { + n32 = c - '0'; c = *++pStr; PJSON_UPDATE_STAT(m_num_numeric_chars, 1); + if (globals::s_parse_flags[c] & 8) + { + n32 = (n32 * 10U) + (c - '0'); c = *++pStr; PJSON_UPDATE_STAT(m_num_numeric_chars, 1); + if (globals::s_parse_flags[c] & 8) + { + n32 = (n32 * 10U) + (c - '0'); c = *++pStr; PJSON_UPDATE_STAT(m_num_numeric_chars, 1); + if (globals::s_parse_flags[c] & 8) + { + n32 = (n32 * 10U) + (c - '0'); c = *++pStr; PJSON_UPDATE_STAT(m_num_numeric_chars, 1); + if (globals::s_parse_flags[c] & 8) + { + n32 = (n32 * 10U) + (c - '0'); c = *++pStr; PJSON_UPDATE_STAT(m_num_numeric_chars, 1); + if (globals::s_parse_flags[c] & 8) + { + n32 = (n32 * 10U) + (c - '0'); c = *++pStr; PJSON_UPDATE_STAT(m_num_numeric_chars, 1); + if (globals::s_parse_flags[c] & 8) + { + n32 = (n32 * 10U) + (c - '0'); c = *++pStr; PJSON_UPDATE_STAT(m_num_numeric_chars, 1); + if (globals::s_parse_flags[c] & 8) + { + n32 = (n32 * 10U) + (c - '0'); c = *++pStr; PJSON_UPDATE_STAT(m_num_numeric_chars, 1); } } } @@ -2158,16 +2202,16 @@ namespace pjson else { uint64 n64 = n32; - while (globals::s_parse_flags[c] & 8) + while (globals::s_parse_flags[c] & 8) { n64 = n64 * 10U + (c - '0'); PJSON_INCREMENT_STAT(m_num_numeric_chars); c = *++pStr; - + if ((!(globals::s_parse_flags[c] & 8)) || (n64 > 0xCCCCCCCCCCCCCCBULL)) break; - + n64 = n64 * 10U + (c - '0'); PJSON_INCREMENT_STAT(m_num_numeric_chars); c = *++pStr; - if (n64 > 0xCCCCCCCCCCCCCCBULL) + if (n64 > 0xCCCCCCCCCCCCCCBULL) break; } @@ -2181,14 +2225,14 @@ namespace pjson double f = static_cast(n64); int scale = 0, escalesign = 1, escale = 0; - while (globals::s_parse_flags[c] & 8) + while (globals::s_parse_flags[c] & 8) { f = f * 10.0f + (c - '0'); PJSON_INCREMENT_STAT(m_num_numeric_chars); c = *++pStr; - if (!(globals::s_parse_flags[c] & 8)) + if (!(globals::s_parse_flags[c] & 8)) break; - + f = f * 10.0f + (c - '0'); PJSON_INCREMENT_STAT(m_num_numeric_chars); c = *++pStr; - } + } if (c == '.') { @@ -2197,16 +2241,16 @@ namespace pjson { scale--; f = f * 10.0f + (c - '0'); PJSON_INCREMENT_STAT(m_num_numeric_chars); c = *++pStr; - if (!(globals::s_parse_flags[c] & 8)) + if (!(globals::s_parse_flags[c] & 8)) break; - + scale--; f = f * 10.0f + (c - '0'); PJSON_INCREMENT_STAT(m_num_numeric_chars); c = *++pStr; } } if ((c == 'e') || (c == 'E')) { - PJSON_INCREMENT_STAT(m_num_numeric_chars); + PJSON_INCREMENT_STAT(m_num_numeric_chars); c = *++pStr; if (c == '-') { @@ -2242,7 +2286,7 @@ namespace pjson pChild_variant->m_data.m_flVal = v; } } - + break; } case '\0': @@ -2250,7 +2294,7 @@ namespace pjson default: return set_error(pStr, "Unrecognized character"); } - } + } } bool deserialize_start(uint8* pStr) @@ -2264,9 +2308,9 @@ namespace pjson m_allocator.reset(); m_pStart = pStr; - + m_pStr = skip_whitespace(pStr); - + if (!*m_pStr) return set_error(m_pStr, "Nothing to deserialize"); diff --git a/stem.c b/stem.c index 657af42..6e774f2 100644 --- a/stem.c +++ b/stem.c @@ -329,7 +329,7 @@ static void step5() if (b[k] == 'e') { int a = m(); - if (a > 1 || a == 1 && !cvc(k - 1)) k--; + if (a > 1 || (a == 1 && !cvc(k - 1))) k--; } if (b[k] == 'l' && doublec(k) && m() > 1) k--; } diff --git a/udb.cpp b/udb.cpp index d411175..5657f4e 100644 --- a/udb.cpp +++ b/udb.cpp @@ -33,6 +33,7 @@ private: uint8_t m_time; uint8_t m_ymdt; // 2-bit fields: TDMY accuracy, T lowest, 0=invalid, 1=?, 2=~, 3=accurate uint8_t m_duration; + [[maybe_unused]] // -Wunused-private-field uint8_t m_unknown1; int16_t m_enc_longtitude; @@ -41,11 +42,13 @@ private: int16_t m_elevation; int16_t m_rel_altitude; + [[maybe_unused]] // -Wunused-private-field uint8_t m_unknown2; uint8_t m_continent_country; // nibbles uint8_t m_state_or_prov[3]; + [[maybe_unused]] // -Wunused-private-field uint8_t m_unknown3; #if 0 @@ -653,8 +656,9 @@ static std::string decode_hatch(const std::string& str, bool first_line) string_vec tokens; std::string cur_token; - bool inside_space = false; - int prev_c = -1; + // written to, but never read from + [[maybe_unused]] bool inside_space = false; + [[maybe_unused]] int prev_c = -1; // Phase 1: Tokenize the input string based off examination of (mostly) individual chars, previous chars and upcoming individual chars. for (uint32_t i = 0; i < str.size(); i++) @@ -1562,11 +1566,11 @@ static void init_dict() } } - uprintf("Done reading dictionary, %u uppercase words\n", g_dictionary.size()); + uprintf("Done reading dictionary, %zu uppercase words\n", g_dictionary.size()); } void udb_init() -{ +{ assert(sizeof(udb_rec) == UDB_RECORD_SIZE); check_for_hatch_tab_dups(g_hatch_refs); @@ -1703,10 +1707,10 @@ static bool convert_rec(uint32_t rec_index, const udb_rec* pRec, timeline_event& decode_hatch_desc(pRec, db_str, loc_str, desc_str); pRec->get_date(event.m_begin_date); - + if (event.m_begin_date.m_year <= 0) return false; - + std::string time; if (pRec->get_time(time)) { @@ -1719,21 +1723,21 @@ static bool convert_rec(uint32_t rec_index, const udb_rec* pRec, timeline_event& event.m_locations.push_back(loc_str); event.m_desc = desc_str; - + // TODO event.m_type.push_back("sighting"); event.m_source_id = string_format("Hatch_UDB_%u", rec_index); event.m_source = "Hatch"; - + for (uint32_t f = 0; f < udb_rec::cMaxFlags; f++) if ((f != cFlagMAP) && (pRec->get_flag(f))) event.m_attributes.push_back(g_pHatch_flag_descs[f]); event.m_refs.push_back(pRec->get_full_refs()); - + event.m_key_value_data.push_back(std::make_pair("LocationLink", string_format("[Google Maps](https://www.google.com/maps/place/%f,%f)", pRec->get_latitude(), pRec->get_longitude()))); - + event.m_key_value_data.push_back(std::make_pair("LatLong", string_format("%f %f", pRec->get_latitude(), pRec->get_longitude()))); event.m_key_value_data.push_back(std::make_pair("LatLongDMS", string_format("%s %s", pRec->get_latitude_dms().c_str(), pRec->get_longitude_dms().c_str()))); @@ -1756,10 +1760,10 @@ static bool convert_rec(uint32_t rec_index, const udb_rec* pRec, timeline_event& if (pRec->get_elevation() != -99) event.m_key_value_data.push_back(std::make_pair("Elev", string_format("%i", pRec->get_elevation()))); - + if ((pRec->get_rel_altitude() != 0) && (pRec->get_rel_altitude() != 999)) event.m_key_value_data.push_back(std::make_pair("RelAlt", string_format("%i", pRec->get_rel_altitude()))); - + return true; } diff --git a/udb_tables.h b/udb_tables.h index 3f9c34b..f99927d 100644 --- a/udb_tables.h +++ b/udb_tables.h @@ -1,5 +1,5 @@ // udb_tables.h -// Some portions of this specific file (get_hatch_geo, g_hatch_continents) use strings from +// Some portions of this specific file (get_hatch_geo, g_hatch_continents) use strings from // the "uDb" project by Jérôme Beau, available on github here: https://github.com/RR0/uDb #pragma once @@ -60,7 +60,7 @@ static const char* g_hatch_continents[] struct hatch_state { const char* m_pCode; - const char* m_pFull; + const char* m_pFull = nullptr; }; static void get_hatch_geo(uint32_t cont_code, uint32_t country_code, const std::string& state_or_prov, @@ -677,7 +677,7 @@ static void get_hatch_geo(uint32_t cont_code, uint32_t country_code, const std:: break; } - case 6: // Asia Pacific + case 6: // Asia Pacific { switch (country_code) { @@ -1599,7 +1599,7 @@ struct hatch_abbrev { const char* pAbbrev; const char* pExpansion; - bool m_forbid_firstline; + bool m_forbid_firstline = false; }; static const hatch_abbrev g_hatch_abbreviations[] = @@ -2956,7 +2956,7 @@ static const hatch_abbrev g_hatch_abbreviations[] = { "Var.", "various", true }, { "Img", "image", true }, { "FLUCTs", "fluctuates", true }, - { "rtps", "reports", true }, // "separate rtps" + { "rtps", "reports", true }, // "separate rtps" { "Math.", "Mathematics", true }, { "indp.", "independent", true }, { "frag", "fragment", true }, @@ -3106,6 +3106,7 @@ static const char* g_cap_exceptions[] = "McChord", "Hetch Hetchy Aqueduct", "LaPaz", + // #REVIEW Does this need to be double question mark? clang trips on "trigraph ignored" -Wtrigraphs "Sea Island'(??)", "Loren Gross", "Test Pilot", @@ -3121,7 +3122,7 @@ static const char* g_cap_exceptions[] = "no UFO", "Blackcomb Mountain", "Harding Mall", - "Hawkes Bay" + "Hawkes Bay", "Hells Canyon", "Highway Patrol", "Hogg Mountain", diff --git a/ufojson.aps b/ufojson.aps deleted file mode 100644 index 2a1f290f8cfe78930367e27ac36e24cb62900888..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1260 zcmb7DO>fgc5PgPR!pDUJSKN|AD^bfS6+*oQ+Z7g$UBymXBuiH0xHMH8OG(`DQ+ntR z0)7GFR`Em3TYqGe7Fw~^taoSLyqTTd0RWYf=cRDDHePYpqI|>=KJcICN@Z_0ss*ke z1Ll4>N-o3fB#lPH`A{s*X6Zzm#=uho_b34@9CNne!hnN5Oll;j4#_!9W@$E!6YUgu z`Jn$JB7jx@*W*cio{bW9b1|NfOgr6t{)8B&`amuMvCbtNZ>U?Z!?z`_j1FuRnhmVv z&SFKKLqBq@;~?r;zSr*=Ry621eG6^9jpnM|@VecOYgXnxT_=E{C4u3&LCfx2&$m4{vfTp@ZM=YDn?~dZoxnD*wbS0l4wT|NV*bs? zQH@{xuh-#wVG*^j>yTHY*)Xw>5F;eG(lVni<0|G_1FxHJtQ^gT zQTQLyn&J!RAugce0ArjNo+`HR74Pv5+hjYUenR#X3$m*C#QmJBkSEkR+qA~?%!-Uk ztwxK6U4DA;iM`15g7c!_D|0L}17qJl|BpN84|v(OYdC$Amt0s|JkzRn_pjQwORc~I zZ+}EF?|E?(i(|@Ape%|XNb0Mr4qH|iz`QLHw_SOx39Qw%oWnY!7 ZfOvBO!%2*Mb2UcXOF(*wk@r$#{s7QO+K>PM diff --git a/ufojson.cpp b/ufojson.cpp index 8f11e15..34174bc 100644 --- a/ufojson.cpp +++ b/ufojson.cpp @@ -11,6 +11,7 @@ //------------------------------------------------------------------- +[[maybe_unused]] // currently unused... static void detect_bad_urls() { string_vec unique_urls; @@ -112,7 +113,7 @@ static bool invoke_openai(const char* pPrompt_text, json& result) return false; } - return true; + return success; } static bool invoke_openai(const timeline_event &event, const char *pPrompt_text, json& result) @@ -125,7 +126,7 @@ static bool invoke_openai(const timeline_event &event, const char *pPrompt_text, if ((desc.size() >= 2) && (desc.back() == '(')) desc.pop_back(); - + const uint32_t MAX_SIZE = 4096; // ~1024 tokens if (desc.size() > MAX_SIZE) { @@ -143,7 +144,7 @@ static bool invoke_openai(const timeline_event &event, const char *pPrompt_text, } uprintf("Desc: %s\n\n", desc.c_str()); - + std::string prompt_str(pPrompt_text); prompt_str += desc; prompt_str += "\""; @@ -151,12 +152,13 @@ static bool invoke_openai(const timeline_event &event, const char *pPrompt_text, return invoke_openai(prompt_str.c_str(), result); } +[[maybe_unused]] // currently unused... static void process_timeline_using_openai(const ufo_timeline &timeline) { bool utf8_flag; json existing_results; load_json_object("openai_results.json", utf8_flag, existing_results); - + json final_result = json::object(); final_result["results"] = json::array(); @@ -251,6 +253,7 @@ static void process_timeline_using_openai(const ufo_timeline &timeline) uprintf("Success\n"); } +[[maybe_unused]] // currently unused... static void process_timeline_using_python(const ufo_timeline& timeline) { json final_result = json::object(); @@ -275,7 +278,7 @@ static void process_timeline_using_python(const ufo_timeline& timeline) remove("locations.json"); Sleep(50); - + int status = system("python.exe pextractlocs.py"); if (status != EXIT_SUCCESS) panic("Failed running python.exe"); @@ -295,7 +298,7 @@ static void process_timeline_using_python(const ufo_timeline& timeline) if (it->is_string()) uprintf("%s\n", it->get().c_str()); } - + json new_obj = json::object(); new_obj.emplace("index", i); new_obj.emplace("date", event.m_date_str); @@ -348,6 +351,7 @@ static bool is_important_country(const std::string& s) return (s == "US") || (s == "GB") || (s == "AU") || (s == "CA") || (s == "NZ") || (s == "FR") || (s == "DE") || (s == "BR") || (s == "IT"); } +[[maybe_unused]] // currently unused... static bool is_favored_country(const std::string& s) { return (s == "US") || (s == "GB") || (s == "AU") || (s == "CA") || (s == "NZ") || (s == "FR") || (s == "DE"); @@ -375,11 +379,13 @@ static int get_favored_country_rank(const std::string& s) return 7; } +[[maybe_unused]] // currently unused... static bool is_country_fcode(const std::string &fcode) { return ((fcode == "PCL") || (fcode == "PCLD") || (fcode == "PCLF") || (fcode == "PCLH") || (fcode == "PCLI") || (fcode == "PCLIX") || (fcode == "PCLS") || (fcode == "TERR")); } +[[maybe_unused]] // currently unused... static void process_geodata() { string_vec lines; @@ -395,7 +401,7 @@ static void process_geodata() geonames.resize(13000000); uint32_t total_geonames = 0; - + uint32_t max_col_sizes[gn_total]; clear_obj(max_col_sizes); @@ -405,7 +411,7 @@ static void process_geodata() uint32_t total_accepted = 0; json output_json = json::array(); - + for (const auto& str : lines) { tab_locs.resize(0); @@ -444,7 +450,7 @@ static void process_geodata() #endif max_col_sizes[i] = std::max(max_col_sizes[i], (uint32_t)g.m_fields[i].size()); - + cur_ofs = tab_locs[i] + 1; } @@ -453,7 +459,7 @@ static void process_geodata() if (g.m_fields[gn_population].size()) { int pop = atoi(g.m_fields[gn_population].c_str()); - + const int MIN_POP = 10; if (pop >= MIN_POP) has_min_pop = true; @@ -468,7 +474,7 @@ static void process_geodata() switch (feature_class) { case 'T': // mountain,hill,rock,... - if ((code == "MT") || (code == "MTS") || (code == "ATOL") || (code == "CAPE") || (code == "CNYN") || (code == "DSRT") || + if ((code == "MT") || (code == "MTS") || (code == "ATOL") || (code == "CAPE") || (code == "CNYN") || (code == "DSRT") || (code == "ISL") || (code == "ISLS") || (code == "PEN") || (code == "VALS") || (code == "VALX")) { accept_flag = true; @@ -477,7 +483,7 @@ static void process_geodata() case 'S': // spot, building, farm if ((code == "AIRB") || (code == "AIRF") || (code == "AIRP") || (code == "AIRQ") || (code == "BRKS") || (code == "CTRA") || (code == "CTRS") || (code == "INSM") || (code == "ITTR") || (code == "PSN") || (code == "STNE") || (code == "USGE") || - (code == "OBS") || (code == "OBSR") || (code == "MFGM") || (code == "FT") || (code == "ASTR") || (code == "FCL") || + (code == "OBS") || (code == "OBSR") || (code == "MFGM") || (code == "FT") || (code == "ASTR") || (code == "FCL") || (code == "PS") || (code == "PSH") || (code == "STNB") || (code == "STNS") || (code == "UNIV")) { accept_flag = true; @@ -495,6 +501,7 @@ static void process_geodata() break; case 'H': // stream, lake, ... if ((code == "BAY") || (code == "BAYS") || (code == "CHN") || (code == "CHNL") || (code == "CHNM") || (code == "CHNN") || + // #REVIEW "CNL" is repeated twice, was something else meant here? (code == "CNL") || (code == "CNL") || (code == "LK") || (code == "LKN") || (code == "LKS") || (code == "RSV") || (code == "SD") || (code == "STRT")) { accept_flag = true; @@ -527,7 +534,7 @@ static void process_geodata() obj["id"] = g.m_fields[gn_geonameid].size() ? atoi(g.m_fields[gn_geonameid].c_str()) : -1; obj["name"] = g.m_fields[gn_name]; obj["plainname"] = g.m_fields[gn_asciiname]; - + if (g.m_fields[gn_alternatenames].size()) obj["altnames"] = g.m_fields[gn_alternatenames]; @@ -539,10 +546,10 @@ static void process_geodata() if (g.m_fields[gn_country_code].size()) obj["ccode"] = g.m_fields[gn_country_code]; - + if (g.m_fields[gn_cc2].size()) obj["cc2"] = g.m_fields[gn_cc2]; - + if (g.m_fields[gn_admin1_code].size()) obj["a1"] = g.m_fields[gn_admin1_code]; @@ -572,7 +579,7 @@ static void process_geodata() { rejected_class_counts[feature_class] = rejected_class_counts[feature_class] + 1; } - + total_geonames++; if ((total_geonames % 1000000) == 0) @@ -596,11 +603,12 @@ static void process_geodata() uprintf("%c %u\n", s.first, s.second); } +#if 0 // unused code... static const struct { const char* m_pCode; int m_level; -} g_geocode_levels[] = +} g_geocode_levels[] = { { "ADM1", 1 }, { "ADM1H", 1 }, @@ -643,6 +651,7 @@ static int find_geocode_admin_level(const char* pCode) return -1; } +#endif // 0 // unused code... struct country_info { @@ -707,12 +716,12 @@ public: load_hierarchy(); uprintf("Reading world_features.json\n"); - + if (!read_text_file("world_features.json", m_filebuf, nullptr)) panic("Failed reading file"); uprintf("Deserializing JSON file\n"); - + bool status = m_doc.deserialize_in_place((char*)&m_filebuf[0]); if (!status) panic("Failed parsing JSON document!"); @@ -731,16 +740,16 @@ public: //tm.start(); uint8_vec name_buf; - + m_geoid_to_rec.clear(); m_geoid_to_rec.reserve(MAX_EXPECTED_RECS); - + for (uint32_t rec_index = 0; rec_index < root_arr.size(); rec_index++) { const auto& arr_entry = root_arr[rec_index]; if (!arr_entry.is_object()) panic("Invalid JSON"); - + int geoid = arr_entry.find_int32("id"); assert(geoid > 0); auto ins_res = m_geoid_to_rec.insert(std::make_pair(geoid, (int)rec_index)); @@ -770,7 +779,7 @@ public: const auto pPlainName = arr_entry.find_value_variant("plainname"); if ((pPlainName == nullptr) || (!pPlainName->is_string())) panic("Missing/invalid plainname field"); - + { const char* pName_str = pPlainName->get_string_ptr(); size_t name_size = strlen(pName_str); @@ -823,12 +832,12 @@ public: } std::string fclass = arr_entry.find_string_obj("fclass"); - + if (fclass == "A") { std::string fcode(arr_entry.find_string_obj("fcode")); - - if ((fcode == "ADM1") || (fcode == "ADM2") || (fcode == "ADM3") || (fcode == "ADM4")) + + if ((fcode == "ADM1") || (fcode == "ADM2") || (fcode == "ADM3") || (fcode == "ADM4")) { std::string ccode(arr_entry.find_string_obj("ccode")); @@ -846,7 +855,7 @@ public: break; desc += "." + a[i]; } - + m_admin_map[desc].push_back(std::pair(rec_index, get_admin_level(fcode))); } } @@ -878,7 +887,7 @@ public: { std::vector< std::pair >& recs = it->second; - std::sort(recs.begin(), recs.end(), + std::sort(recs.begin(), recs.end(), [](const std::pair& a, const std::pair& b) -> bool { return a.second < b.second; @@ -890,7 +899,7 @@ public: { const int cur_rec_index = recs[i].first; const pjson::value_variant* pCur = &m_doc[cur_rec_index]; - + uprintf("admlevel: %u, rec: %u geoid: %u name: %s fcode: %s\n", recs[i].second, cur_rec_index, pCur->find_int32("id"), pCur->find_string_obj("name").c_str(), pCur->find_string_obj("fcode").c_str()); @@ -922,7 +931,7 @@ public: c = utolower(c); const uint32_t hash_val = (hash_hsieh((const uint8_t *)key.c_str(), key.size()) * HASH_FMAGIC) >> HASH_SHIFT; - + results.resize(0); alt_results.resize(0); @@ -934,7 +943,7 @@ public: const pjson::value_variant* pObj = &m_doc[rec_index]; const char *pName = pObj->find_string_ptr("name"); - + const char* pPlainName = pObj->find_string_ptr("plainname"); if ((_stricmp(pKey, pName) != 0) && (_stricmp(pKey, pPlainName) != 0)) @@ -1010,7 +1019,7 @@ public: if (num_parent_admins > num_child_admins) return false; - + // Example: Anderson, Shasta County, California if (num_parent_admins == num_child_admins) { @@ -1022,7 +1031,7 @@ public: for (uint32_t admin_index = 0; admin_index < num_parent_admins; admin_index++) { std::string id(string_format("a%u", admin_index + 1)); - + std::string admin_parent(pParent->find_string_obj(id.c_str())); std::string admin_child(pChild->find_string_obj(id.c_str())); @@ -1066,16 +1075,16 @@ public: cRankVillageNoPopAlt, // alt cRankAdminNoPop, // not a numbered admin - + cRankPopVillageAlt, // prim, 1-100 cRankTownAlt, // alt, 100+ cRankCityLevel0Alt, // alt or alt, 1k+ cRankCityLevel1Alt, // alt or alt, 10k+ - + cRankAdminCapital4Alt, // alt cap4 cRankAdmin4Alt, // alt admin4 - + cRankAdminCapital3Alt, // alt cap3 cRankAdmin3Alt, // alt amind3 @@ -1085,10 +1094,10 @@ public: cRankVillageNoPop, // prim no pop cRankAdmin, // not numbered, has pop - + cRankPopVillage, // prim, 1-100 cRankTown, // prim, 100+ - + cRankAdminCapital2Alt, // alt county seat cRankAdmin2Alt, // alt county @@ -1097,9 +1106,9 @@ public: cRankPark, // prim or alt cRankReserve, // prim or alt - + cRankAdminCapital1Alt, // alt state cap - + cRankCityLevel0, // prim or alt, 1k+ cRankCityLevel1, // prim or alt, 10k+ @@ -1110,19 +1119,19 @@ public: cRankCityLevel3, // prim or alt, 1m+ cRankBaseOrAirport, // prim or alt - + cRankAdminCapital2, // prim county seat - cRankAdmin2, // prim county - + cRankAdmin2, // prim county + cRankAdminCapital1, // prim state cap - + cRankAdmin1Alt, // alt state - + cRankPoliticalCapital, // prim or alt cRankGovernmentCapital, // prim or alt - + cRankAdmin1, // prim state - + // all countries prim or alt cRankCountryLevel0, cRankCountryLevel1, @@ -1134,10 +1143,10 @@ public: cRankCountryLevel7, cRankCountryLevel8, cRankCountryLevel9, - + cRankTotal, }; - + int get_rank(const pjson::value_variant* p, bool alt_match) const { int country_index = get_country_index(p); @@ -1265,11 +1274,11 @@ public: struct resolve_results { - resolve_results() + resolve_results() { clear(); } - + void clear() { m_candidates.resize(0); @@ -1282,16 +1291,16 @@ public: } geo_result m_best_result; - + uint32_t m_num_input_tokens; bool m_strong_match; - + geo_result_vec m_candidates; std::vector< std::pair > m_sorted_results; uint32_t m_best_sorted_result_index; float m_best_score; }; - + bool resolve(const std::string& str, resolve_results &resolve_res) const { uprintf("--- Candidates for query: %s\n", str.c_str()); @@ -1359,7 +1368,7 @@ public: p->find_string_ptr("fcode"), p->find_int32("pop")); #endif - + temp_results[toks_index].push_back({ p, false }); } @@ -1389,7 +1398,7 @@ public: uprintf("No results\n"); return false; } - + //uprintf("Candidates for query: %s\n", str.c_str()); std::vector valid_candidates; @@ -1405,7 +1414,7 @@ public: std::vector< std::pair > candidate_results[TOTAL_FAVORED_COUNTRY_RANKS]; uint32_t total_country_rankings = 0; - uint32_t total_candidates = 0; + [[maybe_unused]] uint32_t total_candidates = 0; for (uint32_t candidate_index_iter = 0; candidate_index_iter < valid_candidates.size(); candidate_index_iter++) { @@ -1449,11 +1458,11 @@ public: } } } - + candidate_score += p->find_float("pop") / 40000000.0f; const int country_rank = get_favored_country_rank(ccode); - assert(country_rank < TOTAL_FAVORED_COUNTRY_RANKS); + assert(static_cast(country_rank) < TOTAL_FAVORED_COUNTRY_RANKS); if (!candidate_results[country_rank].size()) total_country_rankings++; @@ -1462,7 +1471,7 @@ public: total_candidates++; } - + // 1. If there's just one country rank group, choose the best score in that country rank group. // 2. If they matched against a country, choose the highest ranking country, prioritizing the favored countries first. // 3. Check for states, state capitals or other significant admin districts in the favored countries, in order @@ -1512,7 +1521,7 @@ public: } } #endif - + if (total_country_rankings == 1) { // Only one ranked country group in the candidate results, so just choose the one with the highest score. @@ -1525,9 +1534,9 @@ public: break; } } - + assert(pBest_ranking_vec); - + uint32_t candidate_index = (*pBest_ranking_vec)[0].first; best_score = (*pBest_ranking_vec)[0].second; @@ -1539,7 +1548,7 @@ public: else { // Multiple ranked country groups. - + // Check for US states (primary or alt) { uint32_t r_index = 0; @@ -1564,7 +1573,7 @@ public: } } } - + if (!pBest_result) { // First check for any country hits from any ranked country group. @@ -1588,7 +1597,7 @@ public: break; } } - + if (pBest_result) break; } @@ -1610,7 +1619,7 @@ public: //const bool was_alt = temp_results[last_tok_index][candidate_index].m_alt; const int rank = get_rank(p, temp_results[last_tok_index][candidate_index].m_alt); - + if ((rank == cRankAdmin1Alt) || (rank == cRankAdmin1) || (rank == cRankPoliticalCapital) || (rank == cRankGovernmentCapital)) { pBest_result = &temp_results[last_tok_index][candidate_index]; @@ -1620,7 +1629,7 @@ public: break; } } - + if (pBest_result) break; } @@ -1686,7 +1695,7 @@ public: } } } - + if (!pBest_result) { // Fall back to choosing the highest score @@ -1698,13 +1707,13 @@ public: { const uint32_t candidate_index = r[i].first; const float score = r[i].second; - + if (score > best_score) { best_score = score; pBest_result = &temp_results[last_tok_index][candidate_index]; - + pBest_ranking_vec = &r; best_ranking_index = i; } @@ -1730,10 +1739,9 @@ public: resolve_res.m_best_sorted_result_index = best_ranking_index; resolve_res.m_best_score = best_score; - const pjson::value_variant* pVariant = pBest_result->m_pVariant; - (pVariant); + [[maybe_unused]] const pjson::value_variant* pVariant = pBest_result->m_pVariant; -#if 0 +#if 0 uprintf("Result: score:%f, alt: %u, id: %u, name: \"%s\", lat: %f, long: %f, ccode=%s, a1=%s, a2=%s, a3=%s, a4=%s, fclass: %s, fcode: %s, pop: %i\n", best_score, pBest_result->m_alt, @@ -1759,7 +1767,7 @@ public: std::string ccode(p->find_string_obj("ccode")); std::string fclass(p->find_string_obj("fclass")); std::string fcode(p->find_string_obj("fcode")); - + std::string a[4] = { p->find_string_obj("a1"), p->find_string_obj("a2"), p->find_string_obj("a3"), p->find_string_obj("a4") }; uint32_t num_admins = count_admins(p); @@ -1778,9 +1786,9 @@ public: if (find_res != m_admin_map.end()) { const std::vector< std::pair >& recs = find_res->second; - + assert(recs.size()); - + int cur_level = recs[0].second; for (uint32_t j = 0; j < recs.size(); j++) { @@ -1788,7 +1796,7 @@ public: break; int rec_index = recs[j].first; - + const pjson::value_variant* q = &m_doc[rec_index]; if (i == (int)(num_admins - 1)) @@ -1832,7 +1840,7 @@ private: std::vector m_name_hashtab; std::unordered_map m_geoid_to_rec; - + country_info_vec m_countries; std::unordered_map m_rec_index_to_country_index; std::unordered_map m_geoid_to_country_index; @@ -1857,7 +1865,7 @@ private: return find_res->second; } - static void extract_tab_fields(const std::string& str, string_vec& fields) + static void extract_tab_fields(const std::string& str, string_vec& fields) { std::vector tab_locs; tab_locs.resize(0); @@ -2055,6 +2063,7 @@ static const char* s_kwic_stop_words[] = "when", "where", "which", "while", "who", "whom", "why", "will", "with", "you", "your", "yours", "yourself", "yourselves", "although", "also", "already", "another", "seemed", "seem", "seems" }; +[[maybe_unused]] const uint32_t NUM_STOP_WORDS = (uint32_t)std::size(s_kwic_stop_words); static bool create_kwic_index(const ufo_timeline &timeline, const ufo_timeline::event_urls_map_t &event_urls, bool book_flag = false, const char *pOutput_filename_base = nullptr, const char *pTitle = nullptr, const char *pHeader = nullptr) @@ -2072,7 +2081,7 @@ static bool create_kwic_index(const ufo_timeline &timeline, const ufo_timeline:: typedef std::unordered_map word_map_t; word_map_t word_map; word_map.reserve(timeline.size() * 20); - + std::unordered_set stop_word_set; for (const auto& str : s_kwic_stop_words) stop_word_set.insert(str); @@ -2161,7 +2170,7 @@ static bool create_kwic_index(const ufo_timeline &timeline, const ufo_timeline:: kwic_file_strings_header[i].push_back(string_format("# %s, KWIC Index Page: %s", pTitle, name.c_str())); else kwic_file_strings_header[i].push_back(string_format("# UFO Event Timeline, KWIC Index Page: %s", name.c_str())); - + if (!book_flag) { kwic_file_strings_header[i].push_back(""); @@ -2245,7 +2254,7 @@ static bool create_kwic_index(const ufo_timeline &timeline, const ufo_timeline:: for (l = 0; l < (int)event_char_offsets.size(); l++) if (str_ofs == event_char_offsets[l]) break; - if (l == event_char_offsets.size()) + if (l == static_cast(event_char_offsets.size())) l = 0; const int PRE_CONTEXT_CHARS = 35; @@ -2259,7 +2268,7 @@ static bool create_kwic_index(const ufo_timeline &timeline, const ufo_timeline:: // in bytes int start_ofs = event_char_offsets[s]; int prefix_bytes = event_char_offsets[l] - start_ofs; - int end_ofs = (e >= event_char_offsets.size()) ? (int)str.size() : event_char_offsets[e]; + int end_ofs = (e >= static_cast(event_char_offsets.size())) ? (int)str.size() : event_char_offsets[e]; int len = end_ofs - start_ofs; std::string context_str(string_slice(str, start_ofs, len)); @@ -2340,10 +2349,10 @@ static bool load_book_json( json js; if (!load_json_object(pSource_filename, utf8_flag, js)) return false; - + const uint32_t first_event_index = (uint32_t)timeline.size(); timeline.get_events().resize(first_event_index + js.size()); - + for (uint32_t i = 0; i < js.size(); i++) { auto obj = js[i]; @@ -2413,7 +2422,7 @@ static bool load_book_json( event_urls.insert(std::make_pair((int)(i + first_event_index), url)); } - + return true; } @@ -2555,7 +2564,7 @@ static bool create_crashconf_kwic_index() { ufo_timeline timeline; ufo_timeline::event_urls_map_t event_urls; - + std::string header("This is an automatically generated [KWIC Index](https://en.wikipedia.org/wiki/Key_Word_in_Context) of the 2003-2009 Crash Retrieval Conference proceedings, created by [Richard Geldreich Jr.](https://twitter.com/richgel999).\n\nHere are links to each year's proceedings and each presentation:\n"); for (uint32_t i = 0; i < NUM_CRASHCONF_URLS; i++) @@ -2587,13 +2596,14 @@ static bool create_crashconf_kwic_index() return create_kwic_index(timeline, event_urls, true, "crashconf_kwic_", "Crash Retrieval Conference Proceedings", header.c_str()); } +[[maybe_unused]] static int md_trim(const string_vec& args) { if (args.size() != 3) panic("Expecting 2 filenames\n"); string_vec src_file_lines; - + if (!read_text_file(args[1].c_str(), src_file_lines, true, nullptr)) panic("Failed reading source file %s\n", args[1].c_str()); @@ -2606,7 +2616,7 @@ static int md_trim(const string_vec& args) const std::string& str = src_file_lines[i]; if (!str.size()) continue; - + if (string_find_first(str, "---------------") >= 0) { found_header = true; @@ -2635,7 +2645,7 @@ static int md_trim(const string_vec& args) if (!str.size()) continue; - if ( (string_find_first(str, "[Chronologie](annees.html)") >= 0) || + if ( (string_find_first(str, "[Chronologie](annees.html)") >= 0) || (string_find_first(str, "[Contact](Contact.html)") >= 0) || (string_find_first(str, "[Home](/)") >= 0)) { @@ -2660,7 +2670,7 @@ static int md_trim(const string_vec& args) panic("Failed writing output file %s\n", args[2].c_str()); uprintf("Wrote file %s\n", args[2].c_str()); - + return EXIT_SUCCESS; } @@ -2676,7 +2686,7 @@ static bool translate_record(const string_vec& in, string_vec& out) string_vec prompt; prompt.push_back("Precisely translate this UFO/saucer event record from French to English. Preserve all formatting and new lines, especially the first 2 lines, which contain the date and location. If the record is all-caps, correct it so it's not."); - + prompt.push_back("\""); for (const auto& str : in) prompt.push_back(str); @@ -2689,6 +2699,7 @@ static bool translate_record(const string_vec& in, string_vec& out) #endif } +[[maybe_unused]] static int md_translate(const string_vec& args) { if (args.size() != 3) @@ -2763,10 +2774,10 @@ static int md_translate(const string_vec& args) uprintf("%s\n", cur_rec[i].c_str()); tran_recs.push_back(cur_rec); - + cur_rec.resize(0); } - + cur_rec.push_back(src_file_lines[cur_line]); } @@ -2792,7 +2803,7 @@ static int md_translate(const string_vec& args) if (!translate_record(tran_recs[rec_index], tran_rec)) { uprintf("Failed translating record %u!\n", rec_index); - + if (tran_recs[rec_index].size()) out_lines.push_back(tran_recs[rec_index][0]); out_lines.push_back("FAILED!\n"); @@ -2837,14 +2848,14 @@ static int md_translate(const string_vec& args) int wmain(int argc, wchar_t* argv[]) { assert(cTotalPrefixes == sizeof(g_date_prefix_strings) / sizeof(g_date_prefix_strings[0])); - + string_vec args; convert_args_to_utf8(args, argc, argv); // Set ANSI Latin 1; Western European (Windows) code page for output. SetConsoleOutputCP(1252); //SetConsoleOutputCP(CP_UTF8); - + converters_init(); init_norm(); udb_init(); @@ -2870,7 +2881,7 @@ int wmain(int argc, wchar_t* argv[]) uprintf("Skipping file %s - already exists\n", out_filename.c_str()); continue; } - + string_vec a = { "", in_filename, out_filename }; int status = md_translate(a); if (status != EXIT_SUCCESS) @@ -2878,7 +2889,7 @@ int wmain(int argc, wchar_t* argv[]) } exit(0); #endif - + bool status = false, utf8_flag = false; unordered_string_set unique_urls; @@ -2891,7 +2902,7 @@ int wmain(int argc, wchar_t* argv[]) std::string title_str("All events"); bool conversion_flag = false; bool crashconf_flag = false; - + int arg_index = 1; while (arg_index < argc) { @@ -2900,7 +2911,7 @@ int wmain(int argc, wchar_t* argv[]) arg_index++; const uint32_t num_args_remaining = argc - arg_index; - + if (t == '-') { if (arg == "-convert") @@ -2963,7 +2974,7 @@ int wmain(int argc, wchar_t* argv[]) uprintf("Processing successful\n"); return EXIT_SUCCESS; } - + if (conversion_flag) { uprintf("Convert Overmeire:\n"); @@ -3096,7 +3107,7 @@ int wmain(int argc, wchar_t* argv[]) panic("convert_anon failed!"); uprintf("Success\n"); } // if (conversion_flag) - + uprintf("Total unique URL's: %u\n", (uint32_t)unique_urls.size()); string_vec urls; @@ -3138,7 +3149,7 @@ int wmain(int argc, wchar_t* argv[]) status = timeline.load_json("nicap_db.json", utf8_flag, nullptr, false); if (!status) panic("Failed loading nicap_db.json"); - + status = timeline.load_json("trace.json", utf8_flag, nullptr, false); if (!status) panic("Failed loading trace.json"); @@ -3154,7 +3165,7 @@ int wmain(int argc, wchar_t* argv[]) status = timeline.load_json("ufo_evidence_hall.json", utf8_flag, nullptr, false); if (!status) panic("Failed loading ufo_evidence_hall.json"); - + status = timeline.load_json("nuclear_tests.json", utf8_flag, nullptr, false); if (!status) panic("Failed loading nuclear_tests.json"); @@ -3178,7 +3189,7 @@ int wmain(int argc, wchar_t* argv[]) status = timeline.load_json("ancient.json", utf8_flag, nullptr, false); if (!status) panic("Failed loading hostile.json"); - + status = timeline.load_json("pre_roswell_chap1.json", utf8_flag, nullptr, false); if (!status) panic("Failed loading pre_roswell_chap1.json"); @@ -3290,7 +3301,7 @@ int wmain(int argc, wchar_t* argv[]) panic("Date failed sanity check"); } - + uprintf("Load success, %zu total events\n", timeline.get_events().size()); timeline.sort(); @@ -3298,7 +3309,7 @@ int wmain(int argc, wchar_t* argv[]) if (filter_strings.size()) { ufo_timeline new_timeline; - + for (uint32_t i = 0; i < timeline.size(); i++) { const timeline_event& event = timeline[i]; @@ -3337,7 +3348,7 @@ int wmain(int argc, wchar_t* argv[]) } if ( ((filter_all_flag) && (total_matched == filter_strings.size())) || - ((!filter_all_flag) && (total_matched > 0)) ) + ((!filter_all_flag) && (total_matched > 0)) ) { new_timeline.get_events().push_back(event); } @@ -3350,7 +3361,7 @@ int wmain(int argc, wchar_t* argv[]) timeline.get_events().swap(new_timeline.get_events()); } - + uprintf("Writing timeline markdown\n"); ufo_timeline::event_urls_map_t event_urls; diff --git a/ufojson.sln b/ufojson.sln index c42fd54..6ffa5e1 100644 --- a/ufojson.sln +++ b/ufojson.sln @@ -5,6 +5,13 @@ VisualStudioVersion = 17.4.33213.308 MinimumVisualStudioVersion = 10.0.40219.1 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ufojson", "ufojson.vcxproj", "{E4A0DD72-979A-469B-9B0A-4ABE0B7C93D7}" EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{2495626B-DF4D-491A-84F3-58EB01E0CAAE}" + ProjectSection(SolutionItems) = preProject + .gitignore = .gitignore + LICENSE = LICENSE + README.md = README.md + EndProjectSection +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|x64 = Debug|x64 diff --git a/ufojson.vcxproj b/ufojson.vcxproj index 851d763..dc88754 100644 --- a/ufojson.vcxproj +++ b/ufojson.vcxproj @@ -24,6 +24,7 @@ {e4a0dd72-979a-469b-9b0a-4abe0b7c93d7} ufojson 10.0 + x64 @@ -51,11 +52,16 @@ - Level4 - true + stdcpp17 true + Caret + Level4 + + true _CONSOLE;%(PreprocessorDefinitions) + true true + true Console @@ -87,6 +93,7 @@ + diff --git a/ufojson.vcxproj.filters b/ufojson.vcxproj.filters index e897b2a..e9283a0 100644 --- a/ufojson.vcxproj.filters +++ b/ufojson.vcxproj.filters @@ -48,5 +48,6 @@ + \ No newline at end of file diff --git a/ufojson_core.cpp b/ufojson_core.cpp index ce7856e..c99d626 100644 --- a/ufojson_core.cpp +++ b/ufojson_core.cpp @@ -1,4 +1,4 @@ -// ufojson_core.cpp +// ufojson_core.cpp // Copyright (C) 2023 Richard Geldreich, Jr. #include "ufojson_core.h" #include "markdown_proc.h" @@ -394,7 +394,7 @@ bool event_date::parse(const char* pStr, bool fix_20century_dates) string_trim(temp); } - + if (!temp.size()) return false; @@ -443,7 +443,7 @@ bool event_date::parse(const char* pStr, bool fix_20century_dates) m_year = atoi(date_strs[2].c_str()); } - + if (fix_20century_dates) { if ((m_year >= 1) && (m_year <= 99)) @@ -627,7 +627,7 @@ bool event_date::parse_eberhart_date_range(std::string date, return false; d.m_plural = true; - + s.pop_back(); s.pop_back(); @@ -1342,6 +1342,11 @@ static void get_date_range(const event_date& evt, event_date& begin, event_date& end.m_day = 31; } break; + + case cNoPrefix: + case cTotalPrefixes: + assert(!"unreachable"); + break; } } else @@ -1978,7 +1983,7 @@ void timeline_event::from_json(const json& obj, const char* pSource_override, bo auto rocket_range = obj.find("rocket_range"); auto source_id = obj.find("source_id"); auto source = obj.find("source"); - + if (desc == obj.end()) panic("Missing desc"); @@ -2003,7 +2008,7 @@ void timeline_event::from_json(const json& obj, const char* pSource_override, bo m_date_str = (*date); if (!m_begin_date.parse(m_date_str.c_str(), fix_20century_dates)) panic("Failed parsing date %s\n", m_date_str.c_str()); - + if (end_date != obj.end()) { m_end_date_str = (*end_date); @@ -2356,12 +2361,12 @@ void ufo_timeline::create_plaintext() string_vec words; get_string_words(te.m_plain_desc, words, nullptr, "-"); - + for (uint32_t j = 0; j < te.m_plain_refs.size(); j++) { string_vec temp_words; get_string_words(te.m_plain_refs[j], temp_words, nullptr, "-"); - + words.insert(words.end(), temp_words.begin(), temp_words.end()); } @@ -2379,12 +2384,12 @@ void ufo_timeline::create_plaintext() std::string tmp(ustrlwr(words[j])); if (!tmp.size() || is_stop_word(tmp)) continue; - + std::string nrm_tmp(normalize_word(tmp)); if (!nrm_tmp.size() || is_stop_word(nrm_tmp)) continue; - + new_words.push_back(nrm_tmp); } @@ -2413,7 +2418,7 @@ bool ufo_timeline::write_markdown(const char* pTimeline_filename, const char *pD last_event_index = std::max(last_event_index, i); } } - + if (first_event_index > last_event_index) panic("Can't find events"); @@ -2424,28 +2429,28 @@ bool ufo_timeline::write_markdown(const char* pTimeline_filename, const char *pD FILE* pTimeline_file = ufopen(pTimeline_filename, "w"); if (!pTimeline_file) - panic("Failed creating file %s", pTimeline_file); + panic("Failed creating file %s", pTimeline_filename); fputc(UTF8_BOM0, pTimeline_file); fputc(UTF8_BOM1, pTimeline_file); fputc(UTF8_BOM2, pTimeline_file); fprintf(pTimeline_file, "\n"); - + if ((pDate_range_desc) && (strlen(pDate_range_desc))) fprintf(pTimeline_file, "\n# UFO/UAP Event Chronology, %s, v" TIMELINE_VERSION " - Compiled " COMPILATION_DATE "\n\n", pDate_range_desc); else fprintf(pTimeline_file, "\n# UFO/UAP Event Chronology, v" TIMELINE_VERSION " - Compiled " COMPILATION_DATE "\n\n"); fputs( - u8R"(An automated compilation by Richard Geldreich, Jr. using public data from Dr. Jacques Valle, + u8R"(An automated compilation by Richard Geldreich, Jr. using public data from Dr. Jacques Vallée, Pea Research, George M. Eberhart, Richard H. Hall, Dr. Donald A. Johnson, Fred Keziah, Don Berliner, -Larry Hatch, [NICAP](https://www.nicap.org/), [Thomas R. Adams](https://www.lulu.com/shop/ray-boeche/bloodless-cuts/hardcover/product-22167360.html?page=1&pageSize=4), [George D. Fawcett](https://archive.ph/eQwIL), [Chris Aubeck](https://books.google.com/books/about/Return_to_Magonia.html?id=JBGNjgEACAAJ&source=kp_author_description), [Philip L. Rife](https://www.amazon.com/Didnt-Start-Roswell-Encounters-Coverups/dp/059517339X), [Richard Dolan](https://richarddolanmembers.com/), [Jrme Beau](https://rr0.org/), [Godelieve Van Overmeire](http://cobeps.org/fr/godelieve-van-overmeire), and an anonymous individual or group. +Larry Hatch, [NICAP](https://www.nicap.org/), [Thomas R. Adams](https://www.lulu.com/shop/ray-boeche/bloodless-cuts/hardcover/product-22167360.html?page=1&pageSize=4), [George D. Fawcett](https://archive.ph/eQwIL), [Chris Aubeck](https://books.google.com/books/about/Return_to_Magonia.html?id=JBGNjgEACAAJ&source=kp_author_description), [Philip L. Rife](https://www.amazon.com/Didnt-Start-Roswell-Encounters-Coverups/dp/059517339X), [Richard Dolan](https://richarddolanmembers.com/), [Jérôme Beau](https://rr0.org/), [Godelieve Van Overmeire](http://cobeps.org/fr/godelieve-van-overmeire), and an anonymous individual or group. ## Some non-summarized events fall under one of these copyrights: - Richard Geldreich, Jr. - Copyright (c) 2023 (events marked \"maj2\" unless otherwise attributed) -- Dr. Jacques F. Valle - Copyright (c) 1993 +- Dr. Jacques F. Vallée - Copyright (c) 1993 - LeRoy Pea - Copyright (c) 9/8/1988 (updated 3/17/2005) - George M. Eberhart - Copyright (c) 2022 - Dr. Donald A. Johnson - Copyright (c) 2012 @@ -2453,18 +2458,18 @@ bool ufo_timeline::write_markdown(const char* pTimeline_filename, const char *pD - Larry Hatch - Copyright (c) 1992-2002 - Thomas R. Adams - Copyright (c) 1991 - Richard Dolan - Copyright (c) 2002 -- Jrme Beau - Copyright (c) 2000-2023 +- Jérôme Beau - Copyright (c) 2000-2023 ## Update History: - v1.46: Adding ~3700 events, translated from the French chronology [_Mini catalogue chronologique des observations OVNI_](https://web.archive.org/web/20060107070423/http://users.skynet.be/sky84985/chrono.html) by Belgian ufologist [Godelieve Van Overmeire, 1935-2021](http://cobeps.org/fr/godelieve-van-overmeire). Note these events are from the old HTML version on archive.org, not the larger [(10k event) PDF version](http://www.cobeps.org/pdf/Chronologie-OVNI-VOG.pdf). It is unclear if these events are copyrighted. I didn't see a copyright in either the HTML or PDF versions. -- v1.43: Added ~3160 events, translated from a French chronology to English using OpenAI, from [rr0.org](https://rr0.org/). I believe this chronology was composed by Jrme Beau. Its license is [here](https://rr0.org/Copyright.html). +- v1.43: Added ~3160 events, translated from a French chronology to English using OpenAI, from [rr0.org](https://rr0.org/). I believe this chronology was composed by Jérôme Beau. Its license is [here](https://rr0.org/Copyright.html). - v1.40: Added digitized events/newspaper clippings from [Frank Scully's papers at the American Heritage Center in Laramie, WY](https://archiveswest.orbiscascade.org/ark:80444/xv506256), summarized the events from the timeline on the [Disclosure Diaries](https://www.disclosurediaries.com/) website, and added more misc. events. Fixed auto-translation issue in the search page. - v1.38: Added a [client-side search engine](search.html). There are a bunch of features I'm going to add to this engine, for now it can only search for keywords in the desc, location and and reference fields. - v1.37: Updated intro text, added total number of events to each event year, added a few 1800's events. -- v1.36: Extracted and summarized the events in the book [_It Didn't Start with Roswell_ by Philip L. Rife](https://www.amazon.com/Didnt-Start-Roswell-Encounters-Coverups/dp/059517339X). Also extracted the military UFO events from Richard Dolan's book [_UFOs and the National Security State: Chronology of a Cover-up, 19411973_](https://www.amazon.com/UFOs-National-Security-State-Chronology-ebook/dp/B0C94W38QY). +- v1.36: Extracted and summarized the events in the book [_It Didn't Start with Roswell_ by Philip L. Rife](https://www.amazon.com/Didnt-Start-Roswell-Encounters-Coverups/dp/059517339X). Also extracted the military UFO events from Richard Dolan's book [_UFOs and the National Security State: Chronology of a Cover-up, 1941–1973_](https://www.amazon.com/UFOs-National-Security-State-Chronology-ebook/dp/B0C94W38QY). - v1.34: Added more modern events, 1917 Mystery Airplane newspaper articles. - v1.33: More events: Events from George D. Fawcett, short AI summaries of Stringfield's 1978 MUFON symposium presentation, and short AI summaries of the pre-industrial era sighting events from the book [_Wonders in the Sky: Unexplained Aerial Objects from Antiquity to Modern Times_](https://www.amazon.com/Wonders-Sky-Unexplained-Objects-Antiquity/dp/1585428205). -- v1.30: Added 203 Mystery Helicopter/mutilation related events (1970's-1980's) compiled by author/researcher [Thomas R. Adams](https://www.lulu.com/shop/ray-boeche/bloodless-cuts/hardcover/product-22167360.html?page=1&pageSize=4) (1945-2015) (or see [here](http://copycateffect.blogspot.com/2018/06/Adams-Massey-Obits.html)), from his book [_The Choppers - and the Choppers, Mystery Helicopters and Animal Mutilations_](http://www.ignaciodarnaude.com/avistamientos_ovnis/Adams,Thomas,Choppers%20and%20the%20Choppers-1.pdf), minor fixes +- v1.30: Added 203 Mystery Helicopter/mutilation related events (1970's-1980's) compiled by author/researcher [Thomas R. Adams](https://www.lulu.com/shop/ray-boeche/bloodless-cuts/hardcover/product-22167360.html?page=1&pageSize=4) (1945-2015) (or see [here](http://copycateffect.blogspot.com/2018/06/Adams-Massey-Obits.html)), from his book [_The Choppers - and the Choppers, Mystery Helicopters and Animal Mutilations_](http://www.ignaciodarnaude.com/avistamientos_ovnis/Adams,Thomas,Choppers%20and%20the%20Choppers-1.pdf), minor fixes - v1.28: Added KWIC (Key Word in Context) index. - v1.27: Imported Anonymous PDF's contents, originally from [here](https://pdfhost.io/v/gR8lAdgVd_Uap_Timeline_Prepared_By_Another), with fixed URL's - v1.23-1.24: Added a handful of key historical events, such as Edward Tauss the head of CIA UFO disinformation in the 50's @@ -2482,7 +2487,7 @@ Best viewed on a desktop/laptop, not a mobile device. On Windows, Firefox works I've split up the timeline into 4 parts, to reduce their sizes: distant past up to 1949, 1950-1959, 1960-1979, and 1980-present. -The majority of the events in this chronology are sighting related, however it's important to be aware that this is a timeline of +The majority of the events in this chronology are sighting related, however it's important to be aware that this is a timeline of UFO/UAP related _events_, not necessarily or exclusively UFO _sightings_. **This is not exclusively a UFO sightings timeline or database.** Some sighting reports or events appear multiple times in this timeline because they appear in more than one data source. I view this as a useful feature. @@ -2492,7 +2497,7 @@ Currently, the events are not sorted by time of day, only by date. Some sources A few events don't have firm dates, for example "Summer of 1947", or "Late July 1952". In these instances the compilation code uses fixed dates I selected for date sorting purposes. (See the code for the specific dates.) ## Source Code: -This website is created automatically using a [C++](https://en.wikipedia.org/wiki/C%2B%2B) command line tool called ufojson. It parses the raw text and [Markdown](https://en.wikipedia.org/wiki/Markdown) source data to [JSON format](https://www.json.org/json-en.html), which is then converted to a single large web page using [pandoc](https://pandoc.org/). This tool's source code and all of the raw source and JSON data is located [here on github](https://github.com/richgel999/ufo_data).)", pTimeline_file); +This website is created automatically using a [C++](https://en.wikipedia.org/wiki/C%2B%2B) command line tool called “ufojson”. It parses the raw text and [Markdown](https://en.wikipedia.org/wiki/Markdown) source data to [JSON format](https://www.json.org/json-en.html), which is then converted to a single large web page using [pandoc](https://pandoc.org/). This tool's source code and all of the raw source and JSON data is located [here on github](https://github.com/richgel999/ufo_data).)", pTimeline_file); fputs("\n", pTimeline_file); @@ -2569,7 +2574,7 @@ u8R"(## Year Ranges for (uint32_t i = first_event_index; i <= last_event_index; i++) { int year = timeline_events[i].m_begin_date.m_year; - + year_histogram[year] = year_histogram[year] + 1; } @@ -2600,7 +2605,7 @@ u8R"(## Year Ranges //std::string url( string_format("[%s #%u](%s#%08X)", timeline_events[i].m_date_str.c_str(), i, html_filename.c_str(), hash) ); //link to Example.com< / a> inside the pre section. - std::string url( string_format("%s #%u", + std::string url( string_format("%s #%u", html_filename.c_str(), hash, timeline_events[i].m_date_str.c_str(), i) ); @@ -2670,6 +2675,6 @@ bool ufo_timeline::load_json(const char* pFilename, bool& utf8_flag, const char* timeline_events[first_event_index + i].from_json(obj, pSource_override, fix_20century_dates); } - return true; + return success; } diff --git a/ufojson_core.h b/ufojson_core.h index d399b8a..e122090 100644 --- a/ufojson_core.h +++ b/ufojson_core.h @@ -67,42 +67,42 @@ struct event_date bool m_estimated; // (estimated) event_date(); - + event_date(const event_date& other); - + bool sanity_check() const; - + bool operator== (const event_date& rhs) const; - + bool operator!= (const event_date& rhs) const; - + event_date& operator =(const event_date& rhs); - + void clear(); - + bool is_valid() const; - + std::string get_string() const; - - // Parses basic dates (not ranges). + + // Parses basic dates (not ranges). // Date can end in "(approximate)", "(estimated)", "?", or "'s". // 2 digit dates converted to 1900+. // Supports year, month/year, or month/day/year. bool parse(const char* pStr, bool fix_20century_dates); - + // More advanced date range parsing, used for converting the Eberhart timeline. // Note this doesn't support "'s", "(approximate)", "(estimated)", or converting 2 digit years to 1900'. static bool parse_eberhart_date_range(std::string date, event_date& begin_date, event_date& end_date, event_date& alt_date, int required_year = -1); - + // Note the returned date may be invalid. It's only intended for sorting/comparison purposes against other sort dates. void get_sort_date(int& year, int& month, int& day) const; - + // Compares two timeline dates. true if lhs < rhs static bool compare(const event_date& lhs, const event_date& rhs); - + private: static bool check_date_prefix(const event_date& date); @@ -112,7 +112,7 @@ struct timeline_event { std::string m_date_str; std::string m_time_str; // military, but currently it's in any format (not parsed yet) - + std::string m_alt_date_str; std::string m_end_date_str; @@ -123,7 +123,7 @@ struct timeline_event std::string m_desc; // Markdown string_vec m_type; string_vec m_refs; // Markdown - + string_vec m_locations; string_vec m_attributes; string_vec m_see_also; @@ -145,15 +145,15 @@ struct timeline_event std::string m_plain_desc; // Computed, ignored for comparison purposes, not deserialized from JSON string_vec m_plain_refs; // Computed, ignored for comparison purposes, not deserialized from JSON std::string m_search_words; // Computed, ignored for comparison purposes, not deserialized from JSON - + bool operator==(const timeline_event& rhs) const; bool operator!=(const timeline_event& rhs) const; bool operator< (const timeline_event& rhs) const; void print(FILE* pFile) const; - + void from_json(const json& obj, const char* pSource_override, bool fix_20century_dates); - + void to_json(json& j) const; uint32_t get_crc32() const; diff --git a/utils.cpp b/utils.cpp index 08a22a7..c27c09c 100644 --- a/utils.cpp +++ b/utils.cpp @@ -114,6 +114,7 @@ std::string dos_to_utf8(const std::string& str) return wchar_to_utf8(wstr); } +_Use_decl_annotations_ bool vformat(std::vector& buf, const char* pFmt, va_list args) { uint32_t buf_size = 8192; @@ -129,7 +130,7 @@ bool vformat(std::vector& buf, const char* pFmt, va_list args) return false; } - if (res <= buf.size() - 1) + if (res <= static_cast(buf.size() - 1)) break; buf_size *= 2; @@ -142,6 +143,7 @@ bool vformat(std::vector& buf, const char* pFmt, va_list args) return true; } +_Use_decl_annotations_ void ufprintf(FILE* pFile, const char* pFmt, ...) { std::vector buf; @@ -155,11 +157,12 @@ void ufprintf(FILE* pFile, const char* pFmt, ...) std::wstring wbuf(utf8_to_wchar(std::string(&buf[0]))); // Not thread safe, but we don't care - _setmode(_fileno(pFile), _O_U16TEXT); + (void)_setmode(_fileno(pFile), _O_U16TEXT); fputws(&wbuf[0], pFile); - _setmode(_fileno(pFile), _O_TEXT); + (void)_setmode(_fileno(pFile), _O_TEXT); } +_Use_decl_annotations_ void uprintf(const char* pFmt, ...) { std::vector buf; @@ -173,11 +176,12 @@ void uprintf(const char* pFmt, ...) std::wstring wbuf(utf8_to_wchar(std::string(&buf[0]))); // Not thread safe, but we don't care - _setmode(_fileno(stdout), _O_U16TEXT); + (void)_setmode(_fileno(stdout), _O_U16TEXT); fputws(&wbuf[0], stdout); - _setmode(_fileno(stdout), _O_TEXT); + (void)_setmode(_fileno(stdout), _O_TEXT); } +_Use_decl_annotations_ std::string string_format(const char* pMsg, ...) { std::vector buf; @@ -195,6 +199,7 @@ std::string string_format(const char* pMsg, ...) return res; } +_Use_decl_annotations_ void panic(const char* pMsg, ...) { char buf[4096]; @@ -256,8 +261,8 @@ int string_ifind_first(const std::string& str, const char* pPhrase) const size_t str_size = str.size(); const size_t phrase_size = strlen(pPhrase); - assert((int)str_size == str_size); - assert((int)phrase_size == phrase_size); + assert(str_size == str_size); + assert(phrase_size == phrase_size); assert(phrase_size); if ((!str_size) || (!phrase_size) || (phrase_size > str_size)) @@ -270,7 +275,7 @@ int string_ifind_first(const std::string& str, const char* pPhrase) if (_strnicmp(str.c_str() + ofs, pPhrase, phrase_size) == 0) return (int)ofs; } - + return -1; } @@ -342,7 +347,7 @@ std::string encode_url(const std::string& url) //const bool is_upper = (c >= 'A') && (c <= 'Z'); //const bool is_lower = (c >= 'a') && (c <= 'z'); - // Escape some problematic charactes that confuse some Markdown parsers (even after using Markdown '\' escapes) + // Escape some problematic characters that confuse some Markdown parsers (even after using Markdown '\' escapes) if ((c == ')') || (c == '(') || (c == '_') || (c == '*')) { res.push_back('%'); @@ -451,7 +456,7 @@ bool read_binary_file(const char* pFilename, uint8_vec& buf) } _fseeki64(pFile, 0, SEEK_SET); - if (len > MAX_BINARY_FILE_LEN) + if (static_cast(len) > MAX_BINARY_FILE_LEN) return false; buf.resize(len); @@ -475,7 +480,7 @@ bool read_text_file(const char* pFilename, string_vec& lines, bool trim_lines, b if (pUTF8_flag) *pUTF8_flag = false; - + while (!feof(pFile)) { char buf[16384]; @@ -677,7 +682,7 @@ bool load_column_text(const char* pFilename, std::vector& rows, std: std::string col_seps = lines[3]; if ((!col_seps.size()) || (col_seps[0] != '-') || (col_seps.back() != '-')) - panic("Invalid column seperator line"); + panic("Invalid column separator line"); for (uint32_t i = 0; i < col_seps.size(); i++) { @@ -720,13 +725,13 @@ bool load_column_text(const char* pFilename, std::vector& rows, std: for (uint32_t i = 0; i < column_info.size(); i++) { col_titles[i] = col_line; - + if (column_info[i].first) col_titles[i].erase(0, column_info[i].first); if (column_info[i].second > col_titles[i].size()) panic("invalid columns"); - + col_titles[i].erase(column_info[i].second, col_titles[i].size() - column_info[i].second); string_trim(col_titles[i]); } @@ -737,7 +742,7 @@ bool load_column_text(const char* pFilename, std::vector& rows, std: uint32_t cur_line = 4; - uint32_t cur_record_index = 0; + [[maybe_unused]] uint32_t cur_record_index = 0; while (cur_line < lines.size()) { @@ -804,7 +809,7 @@ bool load_column_text(const char* pFilename, std::vector& rows, std: l = ansi_to_utf8(l); rows.push_back(col_lines); - + cur_record_index++; } @@ -850,11 +855,11 @@ bool invoke_curl(const std::string& args, string_vec& reply) uprintf("PDF file detected\n"); std::string filename(args); - for (size_t i = filename.size() - 1; i >= 0; i--) + for (int i = static_cast(filename.size() - 1); i >= 0; i--) { if (filename[i] == '/') { - filename.erase(0, i + 1); + filename.erase(0, static_cast(i + 1)); break; } } @@ -879,8 +884,14 @@ bool invoke_curl(const std::string& args, string_vec& reply) new_link_deescaped.push_back(c); } - rename("__temp.html", new_link_deescaped.c_str()); - uprintf("Renamed __temp.html to %s\n", new_link_deescaped.c_str()); + if (rename("__temp.html", new_link_deescaped.c_str()) == 0) + { + uprintf("Renamed __temp.html to %s\n", new_link_deescaped.c_str()); + } + else + { + uprintf("FAILED to rename __temp.html to %s\n", new_link_deescaped.c_str()); + } return true; } @@ -939,10 +950,10 @@ std::string string_slice(const std::string& str, size_t ofs, size_t len) std::string res(str); if (ofs) res.erase(0, ofs); - + if (len) res.resize(len); - + return res; } @@ -996,7 +1007,7 @@ bool invoke_openai(const string_vec &prompt, string_vec &reply) // Invoke openai.exe const uint32_t MAX_TRIES = 3; uint32_t num_tries; - + for (num_tries = 0; num_tries < MAX_TRIES; ++num_tries) { if (num_tries) @@ -1062,11 +1073,11 @@ bool load_json_object(const char* pFilename, bool& utf8_flag, json &result_obj) if (!result_obj.is_object() && !result_obj.is_array()) return false; - return true; + return success; } void string_tokenize( - const std::string &str, + const std::string &str, const std::string &whitespace, const std::string &break_chars, string_vec &tokens, @@ -1078,7 +1089,7 @@ void string_tokenize( std::string cur_token; uint32_t cur_ofs = 0; - + for (uint32_t i = 0; i < str.size(); i++) { uint8_t c = str[i]; @@ -1129,6 +1140,7 @@ void string_tokenize( } } +// #NOTE In C++20, there's a PI constant in https://en.cppreference.com/w/cpp/numeric/constants const double PI = 3.141592653589793238463; double deg2rad(double deg) @@ -1144,7 +1156,7 @@ double rad2deg(double rad) // input in degrees double geo_distance(double lat1, double lon1, double lat2, double lon2, int unit) { - if ((lat1 == lat2) && (lon1 == lon2)) + if ((lat1 == lat2) && (lon1 == lon2)) return 0; double theta = lon1 - lon2; @@ -1154,7 +1166,7 @@ double geo_distance(double lat1, double lon1, double lat2, double lon2, int unit dist = dist * 60 * 1.1515; - switch (unit) + switch (unit) { case 'M': break; @@ -1185,37 +1197,37 @@ std::string remove_bom(std::string str) return str; } -int get_next_utf8_code_point_len(const uint8_t* pStr) +int get_next_utf8_code_point_len(const uint8_t* pStr) { - if (pStr == nullptr || *pStr == 0) + if (pStr == nullptr || *pStr == 0) { // Return 0 if the input is null or points to a null terminator - return 0; + return 0; } const uint8_t firstByte = *pStr; - if ((firstByte & 0x80) == 0) - { + if ((firstByte & 0x80) == 0) + { // Starts with 0, ASCII character return 1; } - else if ((firstByte & 0xE0) == 0xC0) - { + else if ((firstByte & 0xE0) == 0xC0) + { // Starts with 110 return 2; } - else if ((firstByte & 0xF0) == 0xE0) - { + else if ((firstByte & 0xF0) == 0xE0) + { // Starts with 1110 return 3; } - else if ((firstByte & 0xF8) == 0xF0) - { + else if ((firstByte & 0xF8) == 0xF0) + { // Starts with 11110 return 4; } - else + else { // Invalid UTF-8 byte sequence return -1; @@ -1239,9 +1251,9 @@ void get_string_words( std::string whitespace(" \t\n\r,;:.!?()[]*/\""); if (pAdditional_whitespace) whitespace += std::string(pAdditional_whitespace); - + int word_start_ofs = -1; - + uint32_t cur_ofs = 0; while ((cur_ofs < str.size()) && (pStr[cur_ofs])) { @@ -1303,7 +1315,7 @@ void get_string_words( else if (pStr[cur_ofs + 2] == 0x9D) is_whitespace = true; } - + if (is_whitespace) { if (cur_token.size()) @@ -1331,7 +1343,7 @@ void get_string_words( cur_token.push_back(pStr[cur_ofs + i]); } } - + cur_ofs += l; } @@ -1347,7 +1359,7 @@ void get_string_words( void get_utf8_code_point_offsets(const char* pStr, int_vec& offsets) { uint32_t cur_ofs = 0; - + offsets.resize(0); while (pStr[cur_ofs]) @@ -1439,14 +1451,14 @@ static const char* g_stop_words[] = "when", "where", "which", "while", "who", "whom", "why", "will", "with", "you", "your", "yours", "yourself", "yourselves", "although", "also", "already", "another", "seemed", "seem", "seems" }; -static const uint32_t NUM_STOP_WORDS = (uint32_t)std::size(g_stop_words); +[[maybe_unused]] static const uint32_t NUM_STOP_WORDS = (uint32_t)std::size(g_stop_words); std::set g_stop_words_set; void init_norm() { g_stop_words_set.clear(); - for (const auto& str : g_stop_words) + for (const char* str : g_stop_words) g_stop_words_set.insert(str); for (uint32_t i = 0; i < std::size(g_char_norm_up); i++) @@ -1507,7 +1519,7 @@ void init_norm() } } -// Resulting characters are guaranteed to be <128 - useful for searching purposes. +// Resulting characters are guaranteed to be <128 - useful for searching purposes. // Unrecognized Unicode characters are deleted. void normalize_diacritics(const char* pStr, std::string& res) { @@ -1610,10 +1622,10 @@ std::string normalize_word(const std::string& str) if (str.size() > MAX_STRING_SIZE) panic("String too long"); - + char buf[MAX_STRING_SIZE + 1]; strcpy_s(buf, sizeof(buf), str.c_str()); - + // Convert utf8 string to lower utf8lwr(buf); @@ -1622,7 +1634,7 @@ std::string normalize_word(const std::string& str) norm.reserve(strlen(buf)); normalize_diacritics(buf, norm); - + // Remove any non-letter or non-digit characters (we assume this is a word, so whitespace gets removed too) std::string temp; temp.reserve(norm.size()); @@ -1676,10 +1688,10 @@ std::string string_replace(const std::string& str, const std::string& find, cons assert(find.size()); if (!find.size() || !str.size()) return str; - + const uint8_t* pStr = (const uint8_t *)str.c_str(); const size_t str_size = str.size(); - + const uint8_t* pFind = (const uint8_t*)find.c_str(); const size_t find_size = find.size(); @@ -1695,7 +1707,7 @@ std::string string_replace(const std::string& str, const std::string& find, cons assert(0); str_char_size = 1; } - + const size_t str_remaining = str_size - str_ofs; if ((str_remaining >= find_size) && (memcmp(pStr + str_ofs, pFind, find_size) == 0)) { @@ -1718,7 +1730,7 @@ bool does_file_exist(const char* pFilename) FILE* pFile = ufopen(pFilename, "rb"); if (!pFile) return false; - + fclose(pFile); return true; } diff --git a/utils.h b/utils.h index d158c24..26f91b0 100644 --- a/utils.h +++ b/utils.h @@ -15,17 +15,15 @@ #include #include +#include #include #include #include #include -#include -#include #include #include #include -#include #include #include @@ -52,8 +50,6 @@ const uint32_t ANSI_SOFT_HYPHEN = 0xAD; template inline void clear_obj(T& obj) { memset(&obj, 0, sizeof(T)); } -void panic(const char* pMsg, ...); - //------------------------------------------------------------------ inline bool string_is_digits(const std::string& s) @@ -87,18 +83,18 @@ inline std::string ansi_to_utf8(const std::string& str) { return wchar_to_utf8(u // Code page 437 to utf8. WideCharToMultiByte etc. doesn't do the expecting thing for chars<32, and we need them. std::string dos_to_utf8(const std::string& str); -// utf8 string format -bool vformat(std::vector& buf, const char* pFmt, va_list args); +// utf8 string format +bool vformat(std::vector& buf, _Printf_format_string_ const char* pFmt, va_list args); // utf8 printf to FILE* -void ufprintf(FILE* pFile, const char* pFmt, ...); +void ufprintf(FILE* pFile, _Printf_format_string_ const char* pFmt, ...); // utf8 print to stdout -void uprintf(const char* pFmt, ...); +void uprintf(_Printf_format_string_ const char* pFmt, ...); -std::string string_format(const char* pMsg, ...); +std::string string_format(_Printf_format_string_ const char* pMsg, ...); -void panic(const char* pMsg, ...); +[[noreturn]] void panic(_Printf_format_string_ const char* pMsg, ...); // Open a file given a utf8 filename FILE* ufopen(const char* pFilename, const char* pMode);