ufo_data/utils.h

286 lines
7.7 KiB
C
Raw Normal View History

2023-02-20 17:59:08 -05:00
// utils.h
// Copyright (C) 2023 Richard Geldreich, Jr.
#pragma once
#ifdef _MSC_VER
#pragma warning (disable:4100) // unreferenced formal parameter
#pragma warning (disable:4505) // unreferenced function with internal linkage has been removed)
#define WIN32_LEAN_AND_MEAN
#define NOMINMAX
#include <windows.h>
#endif
#include <fcntl.h>
#include <io.h>
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
#include <cstdint>
#include <stdlib.h>
#include <stdio.h>
#include <algorithm>
#include <map>
#include <set>
#include <varargs.h>
#include <string>
#include <unordered_set>
#include "pjson.h"
2023-02-20 17:59:08 -05:00
#include "libsoldout/markdown.h"
#include "json/json.hpp"
using json = nlohmann::json;
typedef std::vector<std::string> string_vec;
typedef std::unordered_set<std::string> unordered_string_set;
typedef std::vector<uint8_t> uint8_vec;
typedef std::pair<std::string, std::string> string_pair;
typedef std::vector<int> int_vec;
typedef std::vector<uint32_t> uint_vec;
2023-02-20 17:59:08 -05:00
const uint32_t UTF8_BOM0 = 0xEF, UTF8_BOM1 = 0xBB, UTF8_BOM2 = 0xBF;
// Code page 1242 (ANSI) soft hyphen character.
// See http://www.alanwood.net/demos/ansi.html
const uint32_t ANSI_SOFT_HYPHEN = 0xAD;
template<typename T> inline void clear_obj(T& obj) { memset(&obj, 0, sizeof(T)); }
2023-02-20 17:59:08 -05:00
void panic(const char* pMsg, ...);
//------------------------------------------------------------------
inline bool string_is_digits(const std::string& s)
{
for (char c : s)
if (!isdigit((uint8_t)c))
return false;
return true;
}
inline bool string_is_alpha(const std::string& s)
{
for (char c : s)
if (!isalpha((uint8_t)c))
return false;
return true;
}
std::string combine_strings(std::string a, const std::string& b);
// Convert an UTF8 string to a wide Unicode String
std::wstring utf8_to_wchar(const std::string& str, UINT code_page = CP_UTF8);
// Convert a wide Unicode string to an UTF8 string
std::string wchar_to_utf8(const std::wstring& wstr, UINT code_page = CP_UTF8);
inline std::string ansi_to_utf8(const std::string& str) { return wchar_to_utf8(utf8_to_wchar(str, 1252)); }
// Code page 437 to utf8. WideCharToMultiByte etc. doesn't do the expecting thing for chars<32, and we need them.
std::string dos_to_utf8(const std::string& str);
// utf8 string format
bool vformat(std::vector<char>& buf, const char* pFmt, va_list args);
// utf8 printf to FILE*
void ufprintf(FILE* pFile, const char* pFmt, ...);
// utf8 print to stdout
void uprintf(const char* pFmt, ...);
std::string string_format(const char* pMsg, ...);
void panic(const char* pMsg, ...);
// Open a file given a utf8 filename
FILE* ufopen(const char* pFilename, const char* pMode);
// like tolower() but doesn't assert on negative values and doesn't factor in locale
inline char utolower(char c)
{
if ((c >= 'A') && (c <= 'Z'))
return (c - 'A') + 'a';
return c;
}
2023-08-07 19:28:52 -04:00
inline uint8_t utolower(uint8_t c)
{
if ((c >= 'A') && (c <= 'Z'))
return (c - 'A') + 'a';
return c;
}
2023-02-20 17:59:08 -05:00
// like toupper() but doesn't assert on negative values and doesn't factor in locale
inline char utoupper(char c)
{
if ((c >= 'a') && (c <= 'z'))
return (c - 'a') + 'A';
return c;
}
2023-08-07 19:28:52 -04:00
inline uint8_t utoupper(uint8_t c)
{
if ((c >= 'a') && (c <= 'z'))
return (c - 'a') + 'A';
return c;
}
2023-02-20 17:59:08 -05:00
// like isdigit() but doesn't assert on negative values and doesn't factor in locale
inline bool uisdigit(char c)
{
return (c >= '0') && (c <= '9');
}
2023-08-07 19:28:52 -04:00
inline bool uisdigit(uint8_t c)
{
return (c >= '0') && (c <= '9');
}
2023-02-20 17:59:08 -05:00
// like isupper() but doesn't assert on negative values and doesn't factor in locale
inline bool uisupper(char c)
{
return (c >= 'A') && (c <= 'Z');
}
2023-08-07 19:28:52 -04:00
inline bool uisupper(uint8_t c)
{
return (c >= 'A') && (c <= 'Z');
}
2023-02-20 17:59:08 -05:00
// like islower() but doesn't assert on negative values and doesn't factor in locale
inline bool uislower(char c)
{
return (c >= 'a') && (c <= 'z');
}
2023-08-07 19:28:52 -04:00
inline bool uislower(uint8_t c)
{
return (c >= 'a') && (c <= 'z');
}
2023-02-20 17:59:08 -05:00
// like isalpha() but doesn't assert on negative values and doesn't factor in locale
inline bool uisalpha(char c)
{
return uisupper(c) || uislower(c);
}
2023-08-07 19:28:52 -04:00
inline bool uisalpha(uint8_t c)
{
return uisupper(c) || uislower(c);
}
2023-02-20 17:59:08 -05:00
inline int convert_hex_digit(int d)
{
if ((d >= 'a') && (d <= 'f'))
return (d - 'a') + 10;
else if ((d >= 'A') && (d <= 'F'))
return (d - 'A') + 10;
else if ((d >= '0') && (d <= '9'))
return d - '0';
return -1;
}
inline std::string string_lower(std::string str)
{
for (char& c : str)
2023-08-07 19:28:52 -04:00
c = (char)utolower((uint8_t)c);
2023-02-20 17:59:08 -05:00
return str;
}
inline std::string string_upper(std::string str)
{
for (char& c : str)
2023-08-07 19:28:52 -04:00
c = (char)utoupper((uint8_t)c);
2023-02-20 17:59:08 -05:00
return str;
}
std::string& string_trim(std::string& str);
std::string& string_trim_end(std::string& str);
// Case sensitive, returns -1 if can't find
int string_find_first(const std::string& str, const char* pPhrase);
int string_ifind_first(const std::string& str, const char* pPhrase);
2023-02-20 17:59:08 -05:00
int string_icompare(const std::string& a, const char* pB);
// Case insensitive
bool string_begins_with(const std::string& str, const char* pPhrase);
// Case insensitive
bool string_ends_in(const std::string& str, const char* pPhrase);
std::string string_slice(const std::string& str, size_t ofs, size_t len = UINT32_MAX);
2023-02-20 17:59:08 -05:00
inline char to_hex(uint32_t val)
{
assert(val <= 15);
return (char)((val <= 9) ? ('0' + val) : ('A' + val - 10));
}
std::string encode_url(const std::string& url);
uint32_t crc32(const uint8_t* pBuf, size_t size, uint32_t init_crc = 0);
uint32_t hash_hsieh(const uint8_t* pBuf, size_t len);
2023-02-20 17:59:08 -05:00
bool read_binary_file(const char* pFilename, uint8_vec& buf);
bool read_text_file(const char* pFilename, string_vec& lines, bool trim_lines, bool* pUTF8_flag);
2023-02-20 17:59:08 -05:00
bool read_text_file(const char* pFilename, std::vector<uint8_t>& buf, bool *pUTF8_flag);
2023-02-20 17:59:08 -05:00
2023-10-05 14:07:39 -04:00
bool write_text_file(const char* pFilename, const string_vec& lines, bool utf8_bom = true);
2023-02-20 17:59:08 -05:00
bool serialize_to_json_file(const char* pFilename, const json& j, bool utf8_bom);
bool load_column_text(const char* pFilename, std::vector<string_vec>& rows, std::string& title, string_vec& col_titles, bool empty_line_seps, const char *pExtra_col_text);
2023-02-20 17:59:08 -05:00
bool invoke_curl(const std::string& args, string_vec& reply);
void convert_args_to_utf8(string_vec& args, int argc, wchar_t* argv[]);
bool invoke_openai(const std::string& prompt, std::string& reply);
2023-10-05 14:07:39 -04:00
bool invoke_openai(const string_vec& prompt, string_vec& reply);
std::string get_deg_to_dms(double deg);
bool load_json_object(const char* pFilename, bool& utf8_flag, json& result_obj);
inline bool load_json_object(const char* pFilename, json& result_obj) { bool utf8_flag = false; return load_json_object(pFilename, utf8_flag, result_obj); }
void string_tokenize(const std::string& str, const std::string& whitespace, const std::string& break_chars, string_vec& tokens, uint_vec* pOffsets_vec = nullptr);
double deg2rad(double deg);
double rad2deg(double rad);
// input in degrees
double geo_distance(double lat1, double lon1, double lat2, double lon2, int unit = 'M');
2023-08-07 19:28:52 -04:00
std::string remove_bom(std::string str);
int get_next_utf8_code_point_len(const uint8_t* pStr);
2023-10-05 14:07:39 -04:00
void get_string_words(const std::string& str, string_vec& words, uint_vec* pOffsets_vec, const char *pAdditional_whitespace = nullptr);
void get_utf8_code_point_offsets(const char* pStr, int_vec& offsets);
void init_norm();
void normalize_diacritics(const char* pStr, std::string& res);
std::string normalize_word(const std::string& str);
bool is_stop_word(const std::string& word);
std::string ustrlwr(const std::string& s);
std::string string_replace(const std::string& str, const std::string& find, const std::string& repl);
bool does_file_exist(const char* pFilename);