mirror of
https://github.com/richgel999/ufo_data.git
synced 2024-12-24 06:59:35 -05:00
new files
This commit is contained in:
parent
3f5d0d8f18
commit
650909370f
3168
converters.cpp
Normal file
3168
converters.cpp
Normal file
File diff suppressed because it is too large
Load Diff
13
converters.h
Normal file
13
converters.h
Normal file
@ -0,0 +1,13 @@
|
||||
// converters.h
|
||||
// Copyright (C) 2023 Richard Geldreich, Jr.
|
||||
#pragma once
|
||||
#include "utils.h"
|
||||
|
||||
void converters_init();
|
||||
|
||||
bool convert_magnonia(const char* pSrc_filename, const char* pDst_filename, const char* pSource_override = nullptr, const char* pRef_override = nullptr);
|
||||
bool convert_bluebook_unknowns();
|
||||
bool convert_hall();
|
||||
bool convert_eberhart(unordered_string_set& unique_urls);
|
||||
bool convert_johnson();
|
||||
bool convert_nicap(unordered_string_set& unique_urls);
|
608
markdown_proc.cpp
Normal file
608
markdown_proc.cpp
Normal file
@ -0,0 +1,608 @@
|
||||
// Copyright (C) 2023 Richard Geldreich, Jr.
|
||||
// markdown_proc.cpp
|
||||
#include "markdown_proc.h"
|
||||
|
||||
static bool markdown_should_escape(int c)
|
||||
{
|
||||
switch (c)
|
||||
{
|
||||
case '\\':
|
||||
case '`':
|
||||
case '*':
|
||||
case '_':
|
||||
case '{':
|
||||
case '}':
|
||||
case '[':
|
||||
case ']':
|
||||
case '<':
|
||||
case '>':
|
||||
case '(':
|
||||
case ')':
|
||||
case '#':
|
||||
//case '-':
|
||||
//case '.':
|
||||
//case '!':
|
||||
case '|':
|
||||
return true;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static std::string escape_markdown(const std::string& str)
|
||||
{
|
||||
std::string out;
|
||||
|
||||
for (uint32_t i = 0; i < str.size(); i++)
|
||||
{
|
||||
uint8_t c = str[i];
|
||||
|
||||
if (markdown_should_escape(c))
|
||||
out.push_back('\\');
|
||||
|
||||
out.push_back(c);
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
markdown_text_processor::markdown_text_processor()
|
||||
{
|
||||
}
|
||||
|
||||
void markdown_text_processor::clear()
|
||||
{
|
||||
m_text.clear();
|
||||
m_details.clear();
|
||||
m_links.clear();
|
||||
}
|
||||
|
||||
void markdown_text_processor::fix_redirect_urls()
|
||||
{
|
||||
for (uint32_t link_index = 0; link_index < m_links.size(); link_index++)
|
||||
{
|
||||
const char* pPrefix = "https://www.google.com/url?q=";
|
||||
|
||||
if (!string_begins_with(m_links[link_index], pPrefix))
|
||||
continue;
|
||||
|
||||
size_t p;
|
||||
if ((p = m_links[link_index].find("&sa=D&source=editors&ust=")) == std::string::npos)
|
||||
continue;
|
||||
|
||||
size_t r = m_links[link_index].find("&usg=");
|
||||
if ((r == std::string::npos) || (r < p))
|
||||
continue;
|
||||
|
||||
if ((r - p) != 41)
|
||||
continue;
|
||||
|
||||
if ((m_links[link_index].size() - r) != 33)
|
||||
continue;
|
||||
|
||||
if ((m_links[link_index].size() - p) != 74)
|
||||
continue;
|
||||
|
||||
std::string new_link(m_links[link_index]);
|
||||
new_link.erase(p, new_link.size() - p);
|
||||
|
||||
new_link.erase(0, strlen(pPrefix));
|
||||
|
||||
// De-escape the string
|
||||
std::string new_link_deescaped;
|
||||
for (uint32_t i = 0; i < new_link.size(); i++)
|
||||
{
|
||||
uint8_t c = new_link[i];
|
||||
if ((c == '%') && ((i + 2) < new_link.size()))
|
||||
{
|
||||
int da = convert_hex_digit(new_link[i + 1]);
|
||||
int db = convert_hex_digit(new_link[i + 2]);
|
||||
if (da >= 0 && db >= 0)
|
||||
{
|
||||
int val = da * 16 + db;
|
||||
new_link_deescaped.push_back((uint8_t)val);
|
||||
}
|
||||
|
||||
i += 2;
|
||||
}
|
||||
else
|
||||
new_link_deescaped.push_back(c);
|
||||
}
|
||||
|
||||
//printf("%s\n", new_link.c_str());
|
||||
|
||||
m_links[link_index] = new_link_deescaped;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < m_links.size(); i++)
|
||||
m_links[i] = encode_url(m_links[i]);
|
||||
}
|
||||
|
||||
void markdown_text_processor::init_from_markdown(const char* pText)
|
||||
{
|
||||
struct buf* pIn = bufnew(4096);
|
||||
bufputs(pIn, pText);
|
||||
|
||||
struct buf* pOut = bufnew(4096);
|
||||
markdown(pOut, pIn, &mkd_parse);
|
||||
|
||||
std::string buf;
|
||||
buf.append((char*)pOut->data, pOut->size);
|
||||
|
||||
init_from_codes(buf);
|
||||
|
||||
bufrelease(pIn);
|
||||
bufrelease(pOut);
|
||||
}
|
||||
|
||||
bool markdown_text_processor::split_in_half(uint32_t ofs, markdown_text_processor& a, markdown_text_processor& b) const
|
||||
{
|
||||
assert((this != &a) && (this != &b));
|
||||
|
||||
if (m_details[ofs].m_emphasis != 0)
|
||||
return false;
|
||||
|
||||
a.m_text = m_text;
|
||||
a.m_details = m_details;
|
||||
a.m_links = m_links;
|
||||
|
||||
b.m_text = m_text;
|
||||
b.m_details = m_details;
|
||||
b.m_links = m_links;
|
||||
|
||||
a.m_text.erase(ofs, a.m_text.size() - ofs);
|
||||
a.m_details.erase(a.m_details.begin() + ofs, a.m_details.end());
|
||||
|
||||
b.m_text.erase(0, ofs);
|
||||
b.m_details.erase(b.m_details.begin(), b.m_details.begin() + ofs);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
uint32_t markdown_text_processor::count_char_in_text(uint8_t c) const
|
||||
{
|
||||
uint32_t num = 0;
|
||||
for (uint32_t i = 0; i < m_text.size(); i++)
|
||||
{
|
||||
if ((uint8_t)m_text[i] == c)
|
||||
num++;
|
||||
}
|
||||
return num;
|
||||
}
|
||||
|
||||
bool markdown_text_processor::split_last_parens(markdown_text_processor& a, markdown_text_processor& b) const
|
||||
{
|
||||
a.clear();
|
||||
b.clear();
|
||||
|
||||
if (!m_text.size())
|
||||
return false;
|
||||
|
||||
int ofs = (int)m_text.size() - 1;
|
||||
while ((m_text[ofs] == '\n') || (m_text[ofs] == ' '))
|
||||
{
|
||||
if (!ofs)
|
||||
return false;
|
||||
ofs--;
|
||||
}
|
||||
|
||||
if (m_text[ofs] == '.')
|
||||
{
|
||||
if (!ofs)
|
||||
return false;
|
||||
|
||||
ofs--;
|
||||
}
|
||||
|
||||
if (m_text[ofs] != ')')
|
||||
return false;
|
||||
|
||||
int level = 0;
|
||||
while (ofs >= 0)
|
||||
{
|
||||
uint8_t c = (uint8_t)m_text[ofs];
|
||||
|
||||
if (c == ')')
|
||||
level++;
|
||||
else if (c == '(')
|
||||
{
|
||||
level--;
|
||||
if (!level)
|
||||
break;
|
||||
}
|
||||
|
||||
ofs--;
|
||||
}
|
||||
if (ofs < 0)
|
||||
return false;
|
||||
|
||||
return split_in_half(ofs, a, b);
|
||||
}
|
||||
|
||||
void markdown_text_processor::convert_to_plain(std::string& out, bool trim_end) const
|
||||
{
|
||||
for (uint32_t i = 0; i < m_text.size(); i++)
|
||||
{
|
||||
uint8_t c = m_text[i];
|
||||
|
||||
assert((c == '\n') || (c == '\t') || (c >= 32));
|
||||
|
||||
out.push_back(c);
|
||||
}
|
||||
|
||||
if (trim_end)
|
||||
{
|
||||
while (out.size() && out.back() == '\n')
|
||||
out.pop_back();
|
||||
|
||||
string_trim_end(out);
|
||||
}
|
||||
}
|
||||
|
||||
void markdown_text_processor::convert_to_markdown(std::string& out, bool trim_end) const
|
||||
{
|
||||
int emphasis = 0, emphasis_amount = 0;
|
||||
int cur_link_index = -1;
|
||||
|
||||
for (uint32_t text_ofs = 0; text_ofs < m_text.size(); text_ofs++)
|
||||
{
|
||||
if (m_details[text_ofs].m_link_index != -1)
|
||||
{
|
||||
// Inside link at current position
|
||||
|
||||
if (cur_link_index == -1)
|
||||
{
|
||||
// Not currently inside a link, so start a new link
|
||||
|
||||
handle_html(out, text_ofs);
|
||||
|
||||
out.push_back('[');
|
||||
|
||||
// Beginning new link
|
||||
handle_emphasis(out, text_ofs, emphasis, emphasis_amount);
|
||||
}
|
||||
else if (cur_link_index != m_details[text_ofs].m_link_index)
|
||||
{
|
||||
// Switching to different link, so flush current link and start a new one
|
||||
handle_emphasis(out, text_ofs, emphasis, emphasis_amount);
|
||||
|
||||
out += "](";
|
||||
|
||||
for (uint32_t j = 0; j < m_links[cur_link_index].size(); j++)
|
||||
{
|
||||
uint8_t c = m_links[cur_link_index][j];
|
||||
if (markdown_should_escape(c))
|
||||
out.push_back('\\');
|
||||
out.push_back(c);
|
||||
}
|
||||
|
||||
out.push_back(')');
|
||||
|
||||
handle_html(out, text_ofs);
|
||||
|
||||
out.push_back('[');
|
||||
}
|
||||
else
|
||||
{
|
||||
// Currently inside a link which hasn't changed
|
||||
|
||||
handle_html(out, text_ofs);
|
||||
|
||||
handle_emphasis(out, text_ofs, emphasis, emphasis_amount);
|
||||
}
|
||||
|
||||
cur_link_index = m_details[text_ofs].m_link_index;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Not inside link at current position
|
||||
|
||||
if (cur_link_index != -1)
|
||||
{
|
||||
// Flush current link
|
||||
handle_emphasis(out, text_ofs, emphasis, emphasis_amount);
|
||||
|
||||
out += "](";
|
||||
|
||||
for (uint32_t j = 0; j < m_links[cur_link_index].size(); j++)
|
||||
{
|
||||
uint8_t c = m_links[cur_link_index][j];
|
||||
if (markdown_should_escape(c))
|
||||
out.push_back('\\');
|
||||
out.push_back(c);
|
||||
}
|
||||
|
||||
out.push_back(')');
|
||||
|
||||
handle_html(out, text_ofs);
|
||||
|
||||
cur_link_index = -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
handle_html(out, text_ofs);
|
||||
|
||||
handle_emphasis(out, text_ofs, emphasis, emphasis_amount);
|
||||
}
|
||||
}
|
||||
|
||||
if (m_details[text_ofs].m_linebreak)
|
||||
{
|
||||
out.push_back(' ');
|
||||
|
||||
// One space will already be in the text.
|
||||
//out.push_back(' ');
|
||||
}
|
||||
|
||||
uint8_t c = m_text[text_ofs];
|
||||
if (markdown_should_escape(c))
|
||||
{
|
||||
// Markdown escape
|
||||
out.push_back('\\');
|
||||
}
|
||||
|
||||
out.push_back(c);
|
||||
}
|
||||
|
||||
if (emphasis != 0)
|
||||
{
|
||||
// Flush last emphasis
|
||||
for (int j = 0; j < emphasis_amount; j++)
|
||||
out.push_back((uint8_t)emphasis);
|
||||
}
|
||||
emphasis = 0;
|
||||
emphasis_amount = 0;
|
||||
|
||||
if (cur_link_index != -1)
|
||||
{
|
||||
// Flush last link
|
||||
out += "](";
|
||||
|
||||
for (uint32_t j = 0; j < m_links[cur_link_index].size(); j++)
|
||||
{
|
||||
uint8_t c = m_links[cur_link_index][j];
|
||||
if (markdown_should_escape(c))
|
||||
out.push_back('\\');
|
||||
out.push_back(c);
|
||||
}
|
||||
|
||||
out.push_back(')');
|
||||
cur_link_index = -1;
|
||||
}
|
||||
|
||||
if (m_details.size() > m_text.size())
|
||||
{
|
||||
if (m_details.size() != m_text.size() + 1)
|
||||
panic("details array too large");
|
||||
|
||||
if (m_details.back().m_html.size())
|
||||
{
|
||||
for (uint32_t i = 0; i < m_details.back().m_html.size(); i++)
|
||||
out += m_details.back().m_html[i];
|
||||
}
|
||||
}
|
||||
|
||||
if (trim_end)
|
||||
{
|
||||
while (out.size() && out.back() == '\n')
|
||||
out.pop_back();
|
||||
|
||||
string_trim_end(out);
|
||||
}
|
||||
}
|
||||
|
||||
void markdown_text_processor::ensure_detail_ofs(uint32_t ofs)
|
||||
{
|
||||
if (m_details.size() <= ofs)
|
||||
m_details.resize(ofs + 1);
|
||||
}
|
||||
|
||||
void markdown_text_processor::init_from_codes(const std::string& buf)
|
||||
{
|
||||
m_text.resize(0);
|
||||
m_details.resize(0);
|
||||
m_links.resize(0);
|
||||
|
||||
parse_block(buf);
|
||||
}
|
||||
|
||||
void markdown_text_processor::parse_block(const std::string& buf)
|
||||
{
|
||||
uint32_t cur_ofs = 0;
|
||||
while (cur_ofs < buf.size())
|
||||
{
|
||||
uint8_t sig = (uint8_t)buf[cur_ofs];
|
||||
|
||||
if (sig != markdown::cCodeSig)
|
||||
panic("Expected code block signature");
|
||||
|
||||
cur_ofs++;
|
||||
if (cur_ofs == buf.size())
|
||||
panic("Premature end of buffer");
|
||||
|
||||
uint8_t code_type = (uint8_t)buf[cur_ofs];
|
||||
cur_ofs++;
|
||||
|
||||
switch (code_type)
|
||||
{
|
||||
case markdown::cCodeLink:
|
||||
{
|
||||
const uint32_t link_size = markdown::get_len32(buf, cur_ofs);
|
||||
const uint32_t content_size = markdown::get_len32(buf, cur_ofs);
|
||||
|
||||
std::string link(markdown::get_string(buf, cur_ofs, link_size));
|
||||
std::string content(markdown::get_string(buf, cur_ofs, content_size));
|
||||
|
||||
const uint32_t link_index = (uint32_t)m_links.size();
|
||||
m_links.push_back(link);
|
||||
|
||||
const uint32_t start_text_ofs = (uint32_t)m_text.size();
|
||||
|
||||
parse_block(content);
|
||||
|
||||
const uint32_t end_text_ofs = (uint32_t)m_text.size();
|
||||
if (end_text_ofs)
|
||||
{
|
||||
ensure_detail_ofs(end_text_ofs - 1);
|
||||
|
||||
for (uint32_t i = start_text_ofs; i < end_text_ofs; i++)
|
||||
m_details[i].m_link_index = link_index;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case markdown::cCodeEmphasis:
|
||||
{
|
||||
if (cur_ofs >= buf.size())
|
||||
panic("Buffer too small");
|
||||
|
||||
const uint8_t c = (uint8_t)buf[cur_ofs++];
|
||||
|
||||
if (cur_ofs >= buf.size())
|
||||
panic("Buffer too small");
|
||||
|
||||
const uint32_t amount = (uint8_t)buf[cur_ofs++];
|
||||
|
||||
const uint32_t text_size = markdown::get_len32(buf, cur_ofs);
|
||||
|
||||
std::string text(markdown::get_string(buf, cur_ofs, text_size));
|
||||
|
||||
const uint32_t start_text_ofs = (uint32_t)m_text.size();
|
||||
|
||||
parse_block(text);
|
||||
|
||||
const uint32_t end_text_ofs = (uint32_t)m_text.size();
|
||||
|
||||
if (end_text_ofs)
|
||||
{
|
||||
ensure_detail_ofs(end_text_ofs - 1);
|
||||
|
||||
for (uint32_t i = start_text_ofs; i < end_text_ofs; i++)
|
||||
{
|
||||
m_details[i].m_emphasis = c;
|
||||
m_details[i].m_emphasis_amount = (uint8_t)amount;
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case markdown::cCodeText:
|
||||
{
|
||||
const uint32_t text_size = markdown::get_len32(buf, cur_ofs);
|
||||
std::string text(markdown::get_string(buf, cur_ofs, text_size));
|
||||
|
||||
for (size_t i = 0; i < text.size(); i++)
|
||||
{
|
||||
// value 1 is written by the markdown parser when it wants to delete a \n
|
||||
if (text[i] != 1)
|
||||
m_text.push_back(text[i]);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case markdown::cCodeParagraph:
|
||||
{
|
||||
const uint32_t text_size = markdown::get_len32(buf, cur_ofs);
|
||||
std::string text(markdown::get_string(buf, cur_ofs, text_size));
|
||||
|
||||
parse_block(text);
|
||||
|
||||
m_text += "\n";
|
||||
m_text += "\n";
|
||||
|
||||
ensure_detail_ofs((uint32_t)m_text.size() - 1);
|
||||
m_details[m_text.size() - 1].m_end_paragraph = true;
|
||||
|
||||
break;
|
||||
}
|
||||
case markdown::cCodeLinebreak:
|
||||
{
|
||||
m_text += "\n";
|
||||
|
||||
ensure_detail_ofs((uint32_t)m_text.size() - 1);
|
||||
m_details[m_text.size() - 1].m_linebreak = true;
|
||||
|
||||
break;
|
||||
}
|
||||
case markdown::cCodeHTML:
|
||||
{
|
||||
const uint32_t text_size = markdown::get_len32(buf, cur_ofs);
|
||||
std::string text(markdown::get_string(buf, cur_ofs, text_size));
|
||||
|
||||
uint32_t ofs = (uint32_t)m_text.size();
|
||||
ensure_detail_ofs(ofs);
|
||||
m_details[ofs].m_html.push_back(text);
|
||||
|
||||
break;
|
||||
}
|
||||
default:
|
||||
panic("Invalid code");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (m_text.size())
|
||||
ensure_detail_ofs((uint32_t)m_text.size() - 1);
|
||||
}
|
||||
|
||||
void markdown_text_processor::handle_html(std::string& out, uint32_t text_ofs) const
|
||||
{
|
||||
// Any HTML appears before this character
|
||||
for (uint32_t i = 0; i < m_details[text_ofs].m_html.size(); i++)
|
||||
out += m_details[text_ofs].m_html[i];
|
||||
}
|
||||
|
||||
void markdown_text_processor::handle_emphasis(std::string& out, uint32_t text_ofs, int& emphasis, int& emphasis_amount) const
|
||||
{
|
||||
if (m_details[text_ofs].m_emphasis != 0)
|
||||
{
|
||||
// Desired emphasis
|
||||
if ((m_details[text_ofs].m_emphasis == emphasis) && (m_details[text_ofs].m_emphasis_amount == emphasis_amount))
|
||||
{
|
||||
// No change to emphasis
|
||||
|
||||
// Any HTML appears before this character
|
||||
//for (uint32_t i = 0; i < m_details[text_ofs].m_html.size(); i++)
|
||||
// out += m_details[text_ofs].m_html[i];
|
||||
}
|
||||
else
|
||||
{
|
||||
// Change to emphasis
|
||||
if (emphasis != 0)
|
||||
{
|
||||
// Flush out current emphasis
|
||||
for (int j = 0; j < emphasis_amount; j++)
|
||||
out.push_back((uint8_t)emphasis);
|
||||
}
|
||||
|
||||
// Any HTML appears before this character
|
||||
//for (uint32_t i = 0; i < m_details[text_ofs].m_html.size(); i++)
|
||||
// out += m_details[text_ofs].m_html[i];
|
||||
|
||||
emphasis = m_details[text_ofs].m_emphasis;
|
||||
emphasis_amount = m_details[text_ofs].m_emphasis_amount;
|
||||
|
||||
// Start new emphasis
|
||||
for (int j = 0; j < emphasis_amount; j++)
|
||||
out.push_back((uint8_t)emphasis);
|
||||
}
|
||||
}
|
||||
else if (m_details[text_ofs].m_emphasis == 0)
|
||||
{
|
||||
// Desires no emphasis
|
||||
if (emphasis != 0)
|
||||
{
|
||||
// Flush out current emphasis
|
||||
for (int j = 0; j < emphasis_amount; j++)
|
||||
out.push_back((uint8_t)emphasis);
|
||||
}
|
||||
emphasis = 0;
|
||||
emphasis_amount = 0;
|
||||
|
||||
// Any HTML appears before this character
|
||||
//for (uint32_t i = 0; i < m_details[text_ofs].m_html.size(); i++)
|
||||
// out += m_details[text_ofs].m_html[i];
|
||||
}
|
||||
}
|
390
markdown_proc.h
Normal file
390
markdown_proc.h
Normal file
@ -0,0 +1,390 @@
|
||||
// Copyright (C) 2023 Richard Geldreich, Jr.
|
||||
// markdown_proc.h
|
||||
#pragma once
|
||||
|
||||
#include "utils.h"
|
||||
|
||||
#include "libsoldout/markdown.h"
|
||||
|
||||
struct markdown
|
||||
{
|
||||
enum
|
||||
{
|
||||
cCodeSig = 0xFE,
|
||||
|
||||
cCodeLink = 1,
|
||||
cCodeEmphasis,
|
||||
cCodeText,
|
||||
cCodeParagraph,
|
||||
cCodeLinebreak,
|
||||
cCodeHTML
|
||||
};
|
||||
|
||||
static void bufappend(struct buf* out, struct buf* in)
|
||||
{
|
||||
assert(in != out);
|
||||
|
||||
if (in && in->size)
|
||||
bufput(out, in->data, in->size);
|
||||
}
|
||||
|
||||
static void writelen(struct buf* ob, uint32_t size)
|
||||
{
|
||||
bufputc(ob, (uint8_t)(size & 0xFF));
|
||||
bufputc(ob, (uint8_t)((size >> 8) & 0xFF));
|
||||
bufputc(ob, (uint8_t)((size >> 16) & 0xFF));
|
||||
bufputc(ob, (uint8_t)((size >> 24) & 0xFF));
|
||||
}
|
||||
|
||||
static std::string get_string(const std::string& buf, uint32_t& cur_ofs, uint32_t text_size)
|
||||
{
|
||||
std::string text;
|
||||
if (cur_ofs + text_size > buf.size())
|
||||
panic("Buffer too small");
|
||||
|
||||
text.append(buf.c_str() + cur_ofs, text_size);
|
||||
cur_ofs += text_size;
|
||||
|
||||
return text;
|
||||
}
|
||||
|
||||
static uint32_t get_len32(const std::string& buf, uint32_t& ofs)
|
||||
{
|
||||
if ((ofs + 4) > buf.size())
|
||||
panic("Buffer too small");
|
||||
|
||||
uint32_t l = (uint8_t)buf[ofs] |
|
||||
(((uint8_t)buf[ofs + 1]) << 8) |
|
||||
(((uint8_t)buf[ofs + 2]) << 16) |
|
||||
(((uint8_t)buf[ofs + 3]) << 24);
|
||||
|
||||
ofs += 4;
|
||||
|
||||
return l;
|
||||
}
|
||||
|
||||
static void prolog(struct buf* ob, void* opaque)
|
||||
{
|
||||
}
|
||||
|
||||
static void epilog(struct buf* ob, void* opaque)
|
||||
{
|
||||
}
|
||||
|
||||
/* block level callbacks - NULL skips the block */
|
||||
static void blockcode(struct buf* ob, struct buf* text, void* opaque)
|
||||
{
|
||||
#if 0
|
||||
bufprintf(ob, "blockcode: \"%.*s\" ", (int)text->size, text->data);
|
||||
#endif
|
||||
panic("unsupported markdown feature");
|
||||
}
|
||||
|
||||
static void blockquote(struct buf* ob, struct buf* text, void* opaque)
|
||||
{
|
||||
#if 0
|
||||
bufprintf(ob, "blockquote: \"%.*s\" ", (int)text->size, text->data);
|
||||
#endif
|
||||
panic("unsupported markdown feature");
|
||||
}
|
||||
|
||||
static void blockhtml(struct buf* ob, struct buf* text, void* opaque)
|
||||
{
|
||||
#if 0
|
||||
bufprintf(ob, "blockhtml: \"%.*s\" ", (int)text->size, text->data);
|
||||
#endif
|
||||
panic("unsupported markdown feature");
|
||||
}
|
||||
|
||||
static void header(struct buf* ob, struct buf* text, int level, void* opaque)
|
||||
{
|
||||
#if 0
|
||||
bufprintf(ob, "header: %i \"%.*s\" ", level, (int)text->size, text->data);
|
||||
#endif
|
||||
panic("unsupported markdown feature");
|
||||
}
|
||||
|
||||
static void hrule(struct buf* ob, void* opaque)
|
||||
{
|
||||
panic("unsupported markdown feature");
|
||||
}
|
||||
|
||||
static void list(struct buf* ob, struct buf* text, int flags, void* opaque)
|
||||
{
|
||||
panic("unsupported markdown feature");
|
||||
}
|
||||
|
||||
static void listitem(struct buf* ob, struct buf* text, int flags, void* opaque)
|
||||
{
|
||||
panic("unsupported markdown feature");
|
||||
}
|
||||
|
||||
static void paragraph(struct buf* ob, struct buf* text, void* opaque)
|
||||
{
|
||||
#if 0
|
||||
bufprintf(ob, "paragraph: \"%.*s\" ", (int)text->size, text->data);
|
||||
#endif
|
||||
if (!text || !text->size)
|
||||
return;
|
||||
|
||||
bufputc(ob, (uint8_t)cCodeSig);
|
||||
bufputc(ob, (uint8_t)cCodeParagraph);
|
||||
writelen(ob, (uint32_t)text->size);
|
||||
bufappend(ob, text);
|
||||
}
|
||||
|
||||
static void table(struct buf* ob, struct buf* head_row, struct buf* rows, void* opaque)
|
||||
{
|
||||
#if 0
|
||||
bufprintf(ob, "table: \"%.*s\" \"%.*s\" ", (int)head_row->size, head_row->data, (int)rows->size, rows->data);
|
||||
#endif
|
||||
panic("unsupported markdown feature");
|
||||
}
|
||||
|
||||
static void table_cell(struct buf* ob, struct buf* text, int flags, void* opaque)
|
||||
{
|
||||
#if 0
|
||||
bufprintf(ob, "table_cell: \"%.*s\" %i ", (int)text->size, text->data, flags);
|
||||
#endif
|
||||
panic("unsupported markdown feature");
|
||||
}
|
||||
|
||||
static void table_row(struct buf* ob, struct buf* cells, int flags, void* opaque)
|
||||
{
|
||||
#if 0
|
||||
bufprintf(ob, "table_row: \"%.*s\" %i ", (int)cells->size, cells->data, flags);
|
||||
#endif
|
||||
panic("unsupported markdown feature");
|
||||
}
|
||||
|
||||
static int autolink(struct buf* ob, struct buf* link, enum mkd_autolink type, void* opaque)
|
||||
{
|
||||
#if 0
|
||||
bufprintf(ob, "autolink: %u \"%.*s\" ", type, (int)link->size, link->data);
|
||||
#endif
|
||||
panic("unsupported markdown feature");
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int codespan(struct buf* ob, struct buf* text, void* opaque)
|
||||
{
|
||||
#if 0
|
||||
bufprintf(ob, "codespan: \"%.*s\" ", (int)text->size, text->data);
|
||||
#endif
|
||||
panic("unsupported markdown feature");
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int double_emphasis(struct buf* ob, struct buf* text, char c, void* opaque)
|
||||
{
|
||||
#if 0
|
||||
bufprintf(ob, "double_emphasis: %u ('%c') [%.*s] ", c, c, (int)text->size, text->data);
|
||||
#endif
|
||||
if (!text || !text->size)
|
||||
return 1;
|
||||
|
||||
bufputc(ob, (uint8_t)cCodeSig);
|
||||
bufputc(ob, (uint8_t)cCodeEmphasis);
|
||||
bufputc(ob, c);
|
||||
bufputc(ob, 2);
|
||||
writelen(ob, (uint32_t)text->size);
|
||||
bufappend(ob, text);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int emphasis(struct buf* ob, struct buf* text, char c, void* opaque)
|
||||
{
|
||||
#if 0
|
||||
bufprintf(ob, "emphasis: %u ('%c') [%.*s] ", c, c, (int)text->size, text->data);
|
||||
#endif
|
||||
|
||||
if (!text || !text->size)
|
||||
return 1;
|
||||
|
||||
bufputc(ob, (uint8_t)cCodeSig);
|
||||
bufputc(ob, (uint8_t)cCodeEmphasis);
|
||||
bufputc(ob, c);
|
||||
bufputc(ob, 1);
|
||||
writelen(ob, (uint32_t)text->size);
|
||||
bufappend(ob, text);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int image(struct buf* ob, struct buf* link, struct buf* title, struct buf* alt, void* opaque)
|
||||
{
|
||||
#if 0
|
||||
bufprintf(ob, "image: \"%.*s\" \"%.*s\" \"%.*s\" ",
|
||||
(int)link->size, link->data,
|
||||
(int)title->size, title->data,
|
||||
(int)alt->size, alt->data);
|
||||
#endif
|
||||
panic("unsupported markdown feature");
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int linebreak(struct buf* ob, void* opaque)
|
||||
{
|
||||
#if 0
|
||||
bufprintf(ob, "linebreak ");
|
||||
#endif
|
||||
|
||||
bufputc(ob, (uint8_t)cCodeSig);
|
||||
bufputc(ob, (uint8_t)cCodeLinebreak);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int link(struct buf* ob, struct buf* link, struct buf* title, struct buf* content, void* opaque)
|
||||
{
|
||||
#if 0
|
||||
printf("link: {%.*s} {%.*s} {%.*s}\n",
|
||||
link ? (int)link->size : 0,
|
||||
link ? link->data : nullptr,
|
||||
title ? (int)title->size : 0,
|
||||
title ? title->data : nullptr,
|
||||
content ? (int)content->size : 0,
|
||||
content ? content->data : nullptr);
|
||||
#endif
|
||||
bufputc(ob, (uint8_t)cCodeSig);
|
||||
bufputc(ob, (uint8_t)cCodeLink);
|
||||
writelen(ob, (uint32_t)link->size);
|
||||
writelen(ob, (uint32_t)content->size);
|
||||
|
||||
bufappend(ob, link);
|
||||
bufappend(ob, content);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int raw_html_tag(struct buf* ob, struct buf* tag, void* opaque)
|
||||
{
|
||||
//bufprintf(ob, "raw_html_tag: \"%.*s\" ", (int)tag->size, tag->data);
|
||||
|
||||
if (!tag || !tag->size)
|
||||
return 1;
|
||||
|
||||
bufputc(ob, (uint8_t)cCodeSig);
|
||||
bufputc(ob, (uint8_t)cCodeHTML);
|
||||
writelen(ob, (uint32_t)tag->size);
|
||||
bufappend(ob, tag);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int triple_emphasis(struct buf* ob, struct buf* text, char c, void* opaque)
|
||||
{
|
||||
//bufprintf(ob, "triple_emphasis: %u ('%c') [%.*s] ", c, c, (int)text->size, text->data);
|
||||
|
||||
if (!text || !text->size)
|
||||
return 1;
|
||||
|
||||
bufputc(ob, (uint8_t)cCodeSig);
|
||||
bufputc(ob, (uint8_t)cCodeEmphasis);
|
||||
bufputc(ob, c);
|
||||
bufputc(ob, 3);
|
||||
writelen(ob, (uint32_t)text->size);
|
||||
bufappend(ob, text);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void normal_text(struct buf* ob, struct buf* text, void* opaque)
|
||||
{
|
||||
if (!text || !text->size)
|
||||
return;
|
||||
|
||||
bufputc(ob, (uint8_t)cCodeSig);
|
||||
bufputc(ob, (uint8_t)cCodeText);
|
||||
writelen(ob, (uint32_t)text->size);
|
||||
for (uint32_t i = 0; i < text->size; i++)
|
||||
{
|
||||
uint8_t c = text->data[i];
|
||||
if (c == '\n')
|
||||
bufputc(ob, ' ');
|
||||
else if (c != 1)
|
||||
{
|
||||
assert(c >= 32 || c == '\t');
|
||||
bufputc(ob, c);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const struct mkd_renderer mkd_parse =
|
||||
{
|
||||
markdown::prolog,
|
||||
markdown::epilog,
|
||||
|
||||
markdown::blockcode,
|
||||
markdown::blockquote,
|
||||
markdown::blockhtml,
|
||||
markdown::header,
|
||||
markdown::hrule,
|
||||
markdown::list,
|
||||
markdown::listitem,
|
||||
markdown::paragraph,
|
||||
markdown::table,
|
||||
markdown::table_cell,
|
||||
markdown::table_row,
|
||||
|
||||
markdown::autolink,
|
||||
markdown::codespan,
|
||||
markdown::double_emphasis,
|
||||
markdown::emphasis,
|
||||
markdown::image,
|
||||
markdown::linebreak,
|
||||
markdown::link,
|
||||
markdown::raw_html_tag,
|
||||
markdown::triple_emphasis,
|
||||
|
||||
//markdown::entity,
|
||||
nullptr,
|
||||
markdown::normal_text,
|
||||
|
||||
64,
|
||||
"*_",
|
||||
nullptr
|
||||
};
|
||||
|
||||
class markdown_text_processor
|
||||
{
|
||||
public:
|
||||
struct detail
|
||||
{
|
||||
detail() : m_link_index(-1), m_emphasis(0), m_emphasis_amount(0), m_linebreak(false), m_end_paragraph(false) { }
|
||||
|
||||
string_vec m_html;
|
||||
|
||||
int m_link_index;
|
||||
|
||||
char m_emphasis;
|
||||
uint8_t m_emphasis_amount;
|
||||
bool m_linebreak;
|
||||
bool m_end_paragraph;
|
||||
};
|
||||
|
||||
std::string m_text;
|
||||
std::vector<detail> m_details;
|
||||
string_vec m_links;
|
||||
|
||||
markdown_text_processor();
|
||||
|
||||
void clear();
|
||||
|
||||
void fix_redirect_urls();
|
||||
void init_from_markdown(const char* pText);
|
||||
bool split_in_half(uint32_t ofs, markdown_text_processor& a, markdown_text_processor& b) const;
|
||||
uint32_t count_char_in_text(uint8_t c) const;
|
||||
bool split_last_parens(markdown_text_processor& a, markdown_text_processor& b) const;
|
||||
void convert_to_plain(std::string& out, bool trim_end) const;
|
||||
void convert_to_markdown(std::string& out, bool trim_end) const;
|
||||
|
||||
private:
|
||||
void ensure_detail_ofs(uint32_t ofs);
|
||||
void init_from_codes(const std::string& buf);
|
||||
void parse_block(const std::string& buf);
|
||||
void handle_html(std::string& out, uint32_t text_ofs) const;
|
||||
void handle_emphasis(std::string& out, uint32_t text_ofs, int& emphasis, int& emphasis_amount) const;
|
||||
};
|
14
resource.h
Normal file
14
resource.h
Normal file
@ -0,0 +1,14 @@
|
||||
//{{NO_DEPENDENCIES}}
|
||||
// Microsoft Visual C++ generated include file.
|
||||
// Used by ufojson.rc
|
||||
|
||||
// Next default values for new objects
|
||||
//
|
||||
#ifdef APSTUDIO_INVOKED
|
||||
#ifndef APSTUDIO_READONLY_SYMBOLS
|
||||
#define _APS_NEXT_RESOURCE_VALUE 101
|
||||
#define _APS_NEXT_COMMAND_VALUE 40001
|
||||
#define _APS_NEXT_CONTROL_VALUE 1001
|
||||
#define _APS_NEXT_SYMED_VALUE 101
|
||||
#endif
|
||||
#endif
|
8
udb.h
Normal file
8
udb.h
Normal file
@ -0,0 +1,8 @@
|
||||
// Copyright (C) 2023 Richard Geldreich, Jr.
|
||||
#pragma once
|
||||
|
||||
#include "ufojson_core.h"
|
||||
|
||||
void udb_init();
|
||||
bool udb_dump();
|
||||
bool udb_convert();
|
3373
udb_tables.h
Normal file
3373
udb_tables.h
Normal file
File diff suppressed because it is too large
Load Diff
BIN
ufojson.aps
Normal file
BIN
ufojson.aps
Normal file
Binary file not shown.
1654
ufojson_core.cpp
Normal file
1654
ufojson_core.cpp
Normal file
File diff suppressed because it is too large
Load Diff
217
ufojson_core.h
Normal file
217
ufojson_core.h
Normal file
@ -0,0 +1,217 @@
|
||||
// ufojson_core.h
|
||||
// Copyright (C) 2023 Richard Geldreich, Jr.
|
||||
#pragma once
|
||||
#include "utils.h"
|
||||
|
||||
#define TIMELINE_VERSION "1.13"
|
||||
|
||||
// Note that May ends in a period.
|
||||
extern const char* g_months[12];
|
||||
extern const char* g_full_months[12];
|
||||
|
||||
const uint32_t NUM_DATE_PREFIX_STRINGS = 24;
|
||||
extern const char* g_date_prefix_strings[NUM_DATE_PREFIX_STRINGS];
|
||||
|
||||
enum date_prefix_t
|
||||
{
|
||||
cNoPrefix = -1,
|
||||
|
||||
cEarlySpring,
|
||||
cEarlySummer,
|
||||
cEarlyAutumn,
|
||||
cEarlyFall,
|
||||
cEarlyWinter,
|
||||
|
||||
cMidSpring,
|
||||
cMidSummer,
|
||||
cMidAutumn,
|
||||
cMidFall,
|
||||
cMidWinter,
|
||||
|
||||
cLateSpring,
|
||||
cLateSummer,
|
||||
cLateAutumn,
|
||||
cLateFall,
|
||||
cLateWinter,
|
||||
|
||||
cSpring,
|
||||
cSummer,
|
||||
cAutumn,
|
||||
cFall,
|
||||
cWinter,
|
||||
|
||||
cEarly,
|
||||
cMiddleOf,
|
||||
cLate,
|
||||
cEndOf,
|
||||
|
||||
cTotalPrefixes
|
||||
};
|
||||
|
||||
bool is_season(date_prefix_t prefix);
|
||||
int determine_month(const std::string& date);
|
||||
|
||||
struct event_date
|
||||
{
|
||||
date_prefix_t m_prefix;
|
||||
|
||||
int m_year;
|
||||
int m_month; // 1 based: [1,12] (NOT ZERO BASED), -1=invalid
|
||||
int m_day; // 1 based: [1,31], -1=invalid
|
||||
|
||||
bool m_fuzzy; // ?
|
||||
bool m_plural; // 's
|
||||
|
||||
bool m_approx; // (approximate)
|
||||
bool m_estimated; // (estimated)
|
||||
|
||||
event_date();
|
||||
|
||||
event_date(const event_date& other);
|
||||
|
||||
bool sanity_check() const;
|
||||
|
||||
bool operator== (const event_date& rhs) const;
|
||||
|
||||
bool operator!= (const event_date& rhs) const;
|
||||
|
||||
event_date& operator =(const event_date& rhs);
|
||||
|
||||
void clear();
|
||||
|
||||
bool is_valid() const;
|
||||
|
||||
std::string get_string() const;
|
||||
|
||||
// Parses basic dates (not ranges).
|
||||
// Date can end in "(approximate)", "(estimated)", "?", or "'s".
|
||||
// 2 digit dates converted to 1900+.
|
||||
// Supports year, month/year, or month/day/year.
|
||||
bool parse(const char* pStr, bool fix_20century_dates);
|
||||
|
||||
// More advanced date range parsing, used for converting the Eberhart timeline.
|
||||
// Note this doesn't support "'s", "(approximate)", "(estimated)", or converting 2 digit years to 1900'.
|
||||
static bool parse_eberhart_date_range(std::string date,
|
||||
event_date& begin_date,
|
||||
event_date& end_date, event_date& alt_date,
|
||||
int required_year = -1);
|
||||
|
||||
// Note the returned date may be invalid. It's only intended for sorting/comparison purposes against other sort dates.
|
||||
void get_sort_date(int& year, int& month, int& day) const;
|
||||
|
||||
// Compares two timeline dates. true if lhs < rhs
|
||||
static bool compare(const event_date& lhs, const event_date& rhs);
|
||||
|
||||
private:
|
||||
|
||||
static bool check_date_prefix(const event_date& date);
|
||||
};
|
||||
|
||||
struct timeline_event
|
||||
{
|
||||
std::string m_date_str;
|
||||
std::string m_time_str; // military
|
||||
|
||||
std::string m_alt_date_str;
|
||||
|
||||
std::string m_end_date_str;
|
||||
|
||||
event_date m_begin_date;
|
||||
event_date m_end_date;
|
||||
event_date m_alt_date;
|
||||
|
||||
std::string m_desc;
|
||||
string_vec m_type;
|
||||
string_vec m_refs;
|
||||
string_vec m_locations;
|
||||
string_vec m_attributes;
|
||||
string_vec m_see_also;
|
||||
|
||||
std::string m_rocket_type;
|
||||
std::string m_rocket_altitude;
|
||||
std::string m_rocket_range;
|
||||
|
||||
std::string m_atomic_type;
|
||||
std::string m_atomic_kt;
|
||||
std::string m_atomic_mt;
|
||||
|
||||
std::string m_source_id;
|
||||
|
||||
std::string m_source;
|
||||
|
||||
std::vector<string_pair> m_udb_data;
|
||||
|
||||
bool operator==(const timeline_event& rhs) const;
|
||||
|
||||
bool operator!=(const timeline_event& rhs) const;
|
||||
|
||||
bool operator< (const timeline_event& rhs) const;
|
||||
|
||||
void print(FILE* pFile) const;
|
||||
|
||||
void from_json(const json& obj, const char* pSource_override, bool fix_20century_dates);
|
||||
|
||||
void to_json(json& j) const;
|
||||
};
|
||||
|
||||
typedef std::vector<timeline_event> timeline_event_vec;
|
||||
|
||||
class ufo_timeline
|
||||
{
|
||||
public:
|
||||
ufo_timeline() :
|
||||
m_name("Unnamed Timeline")
|
||||
{
|
||||
}
|
||||
|
||||
size_t size() const { return m_events.size(); }
|
||||
|
||||
timeline_event& operator[] (size_t i) { return m_events[i]; }
|
||||
const timeline_event& operator[] (size_t i) const { return m_events[i]; }
|
||||
|
||||
const std::string& get_name() const { return m_name; }
|
||||
void set_name(const std::string& str) { m_name = str; }
|
||||
|
||||
const timeline_event_vec& get_events() const { return m_events; }
|
||||
timeline_event_vec& get_events() { return m_events; }
|
||||
|
||||
void sort()
|
||||
{
|
||||
std::sort(m_events.begin(), m_events.end());
|
||||
}
|
||||
|
||||
void create_json(json& j) const
|
||||
{
|
||||
j = json::object();
|
||||
|
||||
const char* pTimeline_name = m_name.c_str();
|
||||
|
||||
j[pTimeline_name] = json::array();
|
||||
|
||||
auto& ar = j[pTimeline_name];
|
||||
|
||||
for (size_t i = 0; i < m_events.size(); i++)
|
||||
{
|
||||
json obj;
|
||||
m_events[i].to_json(obj);
|
||||
|
||||
ar.push_back(obj);
|
||||
}
|
||||
}
|
||||
|
||||
bool write_file(const char* pFilename, bool utf8_bom = true)
|
||||
{
|
||||
json j;
|
||||
create_json(j);
|
||||
|
||||
return serialize_to_json_file(pFilename, j, utf8_bom);
|
||||
}
|
||||
|
||||
bool load_json(const char* pFilename, bool& utf8_flag, const char* pSource_override, bool fix_20century_dates);
|
||||
|
||||
bool write_markdown(const char* pTimeline_filename);
|
||||
|
||||
private:
|
||||
timeline_event_vec m_events;
|
||||
std::string m_name;
|
||||
};
|
814
utils.cpp
Normal file
814
utils.cpp
Normal file
@ -0,0 +1,814 @@
|
||||
// utils.cpp
|
||||
// Copyright (C) 2023 Richard Geldreich, Jr.
|
||||
#include "utils.h"
|
||||
|
||||
std::string combine_strings(std::string a, const std::string& b)
|
||||
{
|
||||
if (!a.size())
|
||||
return b;
|
||||
|
||||
if (!b.size())
|
||||
return a;
|
||||
|
||||
if (a.back() == '-')
|
||||
{
|
||||
if ((a.size() >= 2) && isdigit((uint8_t)a[a.size() - 2]))
|
||||
{
|
||||
}
|
||||
else
|
||||
{
|
||||
a.pop_back();
|
||||
a += b;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (a.back() != ' ')
|
||||
a += " ";
|
||||
a += b;
|
||||
}
|
||||
|
||||
return a;
|
||||
}
|
||||
|
||||
std::wstring utf8_to_wchar(const std::string& str, UINT code_page)
|
||||
{
|
||||
if (str.empty())
|
||||
return std::wstring();
|
||||
|
||||
int size_needed = MultiByteToWideChar(code_page, 0, &str[0], (int)str.size(), NULL, 0);
|
||||
if (!size_needed)
|
||||
return std::wstring();
|
||||
|
||||
std::wstring wstrTo(size_needed, 0);
|
||||
int res = MultiByteToWideChar(code_page, 0, &str[0], (int)str.size(), &wstrTo[0], size_needed);
|
||||
if (!res)
|
||||
return std::wstring();
|
||||
|
||||
return wstrTo;
|
||||
}
|
||||
|
||||
std::string wchar_to_utf8(const std::wstring& wstr, UINT code_page)
|
||||
{
|
||||
if (wstr.empty())
|
||||
return std::string();
|
||||
|
||||
int size_needed = WideCharToMultiByte(code_page, 0, &wstr[0], (int)wstr.size(), NULL, 0, NULL, NULL);
|
||||
if (!size_needed)
|
||||
return std::string();
|
||||
|
||||
std::string strTo(size_needed, 0);
|
||||
int res = WideCharToMultiByte(code_page, 0, &wstr[0], (int)wstr.size(), &strTo[0], size_needed, NULL, NULL);
|
||||
if (!res)
|
||||
return std::string();
|
||||
|
||||
return strTo;
|
||||
}
|
||||
|
||||
static uint16_t g_codepage_437_to_unicode_0_31[32] =
|
||||
{
|
||||
' ', 0x263A, 0x263B, 0x2665, 0x2666, 0x2663, 0x2660, 0x2022,
|
||||
0x25D8, 0x25CB, 0x25D9, 0x2642, 0x2640, 0x266A, 0x266B, 0x263C,
|
||||
0x25BA, 0x25C4, 0x2195, 0x203C, 0x00B6, 0x00A7, 0x25AC, 0x21A8,
|
||||
0x2191, 0x2193, 0x2192, 0x2190, 0x221F, 0x2194, 0x25B2, 0x25BC
|
||||
};
|
||||
|
||||
static uint16_t g_codepage_437_to_unicode_128_255[129] =
|
||||
{
|
||||
0x2302,
|
||||
0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7,
|
||||
0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5,
|
||||
0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9,
|
||||
0x00FF, 0x00D6, 0x00DC, 0x00A2, 0x00A3, 0x00A5, 0x20A7, 0x0192,
|
||||
0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9,
|
||||
0x00FF, 0x00D6, 0x00DC, 0x00A2, 0x00A3, 0x00A5, 0x20A7, 0x0192,
|
||||
0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556,
|
||||
0x2555, 0x2563, 0x2551, 0x2557, 0x255D, 0x255C, 0x255B, 0x2510,
|
||||
0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x255E, 0x255F,
|
||||
0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x2567,
|
||||
0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256B,
|
||||
0x256A, 0x2518, 0x250C, 0x2588, 0x2584, 0x258C, 0x2590, 0x2580,
|
||||
0x03B1, 0x00DF, 0x0393, 0x03C0, 0x03A3, 0x03C3, 0x00B5, 0x03C4,
|
||||
0x03A6, 0x0398, 0x03A9, 0x03B4, 0x221E, 0x03C6, 0x03B5, 0x2229,
|
||||
0x2261, 0x00B1, 0x2265, 0x2264, 0x2320, 0x2321, 0x00F7, 0x2248,
|
||||
0x00B0, 0x2219, 0x00B7, 0x221A, 0x207F, 0x00B2, 0x25A0, 0x00A0
|
||||
};
|
||||
|
||||
// Code page 437 to utf8. WideCharToMultiByte etc. doesn't do the expecting thing for chars<32, and we need them.
|
||||
std::string dos_to_utf8(const std::string& str)
|
||||
{
|
||||
std::wstring wstr;
|
||||
|
||||
for (uint8_t c : str)
|
||||
{
|
||||
if (c < 32)
|
||||
wstr.push_back(g_codepage_437_to_unicode_0_31[c]);
|
||||
else if (c >= 127)
|
||||
wstr.push_back(g_codepage_437_to_unicode_128_255[c - 127]);
|
||||
else
|
||||
wstr.push_back(c);
|
||||
}
|
||||
|
||||
return wchar_to_utf8(wstr);
|
||||
}
|
||||
|
||||
bool vformat(std::vector<char>& buf, const char* pFmt, va_list args)
|
||||
{
|
||||
uint32_t buf_size = 8192;
|
||||
|
||||
for (; ; )
|
||||
{
|
||||
buf.resize(buf_size);
|
||||
|
||||
int res = vsnprintf(&buf[0], buf.size(), pFmt, args);
|
||||
if (res == -1)
|
||||
{
|
||||
assert(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (res <= buf.size() - 1)
|
||||
break;
|
||||
|
||||
buf_size *= 2;
|
||||
if (buf_size > 16 * 1024 * 1024)
|
||||
{
|
||||
assert(false);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void ufprintf(FILE* pFile, const char* pFmt, ...)
|
||||
{
|
||||
std::vector<char> buf;
|
||||
|
||||
va_list args;
|
||||
va_start(args, pFmt);
|
||||
if (!vformat(buf, pFmt, args))
|
||||
return;
|
||||
va_end(args);
|
||||
|
||||
std::wstring wbuf(utf8_to_wchar(std::string(&buf[0])));
|
||||
|
||||
// Not thread safe, but we don't care
|
||||
_setmode(_fileno(pFile), _O_U16TEXT);
|
||||
fputws(&wbuf[0], pFile);
|
||||
_setmode(_fileno(pFile), _O_TEXT);
|
||||
}
|
||||
|
||||
void uprintf(const char* pFmt, ...)
|
||||
{
|
||||
std::vector<char> buf;
|
||||
|
||||
va_list args;
|
||||
va_start(args, pFmt);
|
||||
if (!vformat(buf, pFmt, args))
|
||||
return;
|
||||
va_end(args);
|
||||
|
||||
std::wstring wbuf(utf8_to_wchar(std::string(&buf[0])));
|
||||
|
||||
// Not thread safe, but we don't care
|
||||
_setmode(_fileno(stdout), _O_U16TEXT);
|
||||
fputws(&wbuf[0], stdout);
|
||||
_setmode(_fileno(stdout), _O_TEXT);
|
||||
}
|
||||
|
||||
std::string string_format(const char* pMsg, ...)
|
||||
{
|
||||
std::vector<char> buf;
|
||||
|
||||
va_list args;
|
||||
va_start(args, pMsg);
|
||||
if (!vformat(buf, pMsg, args))
|
||||
return "";
|
||||
va_end(args);
|
||||
|
||||
std::string res;
|
||||
if (buf.size())
|
||||
res.assign(&buf[0]);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
void panic(const char* pMsg, ...)
|
||||
{
|
||||
char buf[4096];
|
||||
|
||||
va_list args;
|
||||
va_start(args, pMsg);
|
||||
vsnprintf(buf, sizeof(buf), pMsg, args);
|
||||
va_end(args);
|
||||
|
||||
ufprintf(stderr, "%s", buf);
|
||||
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
FILE* ufopen(const char* pFilename, const char* pMode)
|
||||
{
|
||||
std::wstring wfilename(utf8_to_wchar(pFilename));
|
||||
std::wstring wmode(utf8_to_wchar(pMode));
|
||||
|
||||
if (!wfilename.size() || !wmode.size())
|
||||
return nullptr;
|
||||
|
||||
FILE* pRes = nullptr;
|
||||
_wfopen_s(&pRes, &wfilename[0], &wmode[0]);
|
||||
return pRes;
|
||||
}
|
||||
|
||||
std::string& string_trim(std::string& str)
|
||||
{
|
||||
while (str.size() && isspace((uint8_t)str.back()))
|
||||
str.pop_back();
|
||||
|
||||
while (str.size() && isspace((uint8_t)str[0]))
|
||||
str.erase(0, 1);
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
std::string& string_trim_end(std::string& str)
|
||||
{
|
||||
while (str.size() && isspace((uint8_t)str.back()))
|
||||
str.pop_back();
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
// Case sensitive, returns -1 if can't find
|
||||
int string_find_first(const std::string& str, const char* pPhrase)
|
||||
{
|
||||
size_t res = str.find(pPhrase, 0);
|
||||
if (res == std::string::npos)
|
||||
return -1;
|
||||
return (int)res;
|
||||
}
|
||||
|
||||
int string_icompare(const std::string& a, const char* pB)
|
||||
{
|
||||
const size_t a_len = a.size();
|
||||
const size_t b_len = strlen(pB);
|
||||
|
||||
const size_t min_len = std::min(a_len, b_len);
|
||||
|
||||
for (size_t i = 0; i < min_len; i++)
|
||||
{
|
||||
const int ac = (uint8_t)utolower(a[i]);
|
||||
const int bc = (uint8_t)utolower(pB[i]);
|
||||
|
||||
if (ac != bc)
|
||||
return (ac < bc) ? -1 : 1;
|
||||
}
|
||||
|
||||
if (a_len == b_len)
|
||||
return 0;
|
||||
|
||||
return (a_len < b_len) ? -1 : 1;
|
||||
}
|
||||
|
||||
bool string_begins_with(const std::string& str, const char* pPhrase)
|
||||
{
|
||||
const size_t str_len = str.size();
|
||||
|
||||
const size_t phrase_len = strlen(pPhrase);
|
||||
assert(phrase_len);
|
||||
|
||||
if (str_len >= phrase_len)
|
||||
{
|
||||
if (_strnicmp(pPhrase, str.c_str(), phrase_len) == 0)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool string_ends_in(const std::string& str, const char* pPhrase)
|
||||
{
|
||||
const size_t str_len = str.size();
|
||||
|
||||
const size_t phrase_len = strlen(pPhrase);
|
||||
assert(phrase_len);
|
||||
|
||||
if (str_len >= phrase_len)
|
||||
{
|
||||
if (_stricmp(pPhrase, str.c_str() + str_len - phrase_len) == 0)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string encode_url(const std::string& url)
|
||||
{
|
||||
//const char* pValid_chars = ";,/?:@&=+$-_.!~*'()#";
|
||||
//const size_t valid_chars_len = strlen(pValid_chars);
|
||||
|
||||
std::string res;
|
||||
for (uint32_t i = 0; i < url.size(); i++)
|
||||
{
|
||||
uint8_t c = (uint8_t)url[i];
|
||||
|
||||
//const bool is_digit = (c >= 0) && (c <= '9');
|
||||
//const bool is_upper = (c >= 'A') && (c <= 'Z');
|
||||
//const bool is_lower = (c >= 'a') && (c <= 'z');
|
||||
|
||||
// Escape some problematic charactes that confuse some Markdown parsers (even after using Markdown '\' escapes)
|
||||
if ((c == ')') || (c == '(') || (c == '_') || (c == '*'))
|
||||
{
|
||||
res.push_back('%');
|
||||
res.push_back(to_hex(c / 16));
|
||||
res.push_back(to_hex(c % 16));
|
||||
continue;
|
||||
}
|
||||
|
||||
res.push_back(c);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
uint32_t crc32(const uint8_t* pBuf, size_t size, uint32_t init_crc)
|
||||
{
|
||||
uint32_t crc = ~init_crc;
|
||||
|
||||
for (size_t i = 0; i < size; i++)
|
||||
{
|
||||
const uint32_t byte = pBuf[i];
|
||||
|
||||
crc = crc ^ byte;
|
||||
|
||||
for (int j = 7; j >= 0; j--)
|
||||
{
|
||||
uint32_t mask = -((int)(crc & 1));
|
||||
crc = (crc >> 1) ^ (0xEDB88320 & mask);
|
||||
}
|
||||
}
|
||||
|
||||
return ~crc;
|
||||
}
|
||||
|
||||
bool read_binary_file(const char* pFilename, uint8_vec& buf)
|
||||
{
|
||||
const uint64_t MAX_BINARY_FILE_LEN = 168ULL * 1024ULL * (1024ULL * 1024ULL);
|
||||
|
||||
FILE* pFile = ufopen(pFilename, "rb");
|
||||
if (!pFile)
|
||||
return false;
|
||||
|
||||
_fseeki64(pFile, 0, SEEK_END);
|
||||
int64_t len = _ftelli64(pFile);
|
||||
if (len < 0)
|
||||
{
|
||||
fclose(pFile);
|
||||
return false;
|
||||
}
|
||||
_fseeki64(pFile, 0, SEEK_SET);
|
||||
|
||||
if (len > MAX_BINARY_FILE_LEN)
|
||||
return false;
|
||||
buf.resize(len);
|
||||
|
||||
if (fread(&buf[0], len, 1, pFile) != 1)
|
||||
{
|
||||
fclose(pFile);
|
||||
return false;
|
||||
}
|
||||
|
||||
fclose(pFile);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool read_text_file(const char* pFilename, string_vec& lines, bool trim_lines, bool* pUTF8_flag)
|
||||
{
|
||||
FILE* pFile = ufopen(pFilename, "r");
|
||||
if (!pFile)
|
||||
return false;
|
||||
|
||||
bool first_line = true;
|
||||
|
||||
if (pUTF8_flag)
|
||||
*pUTF8_flag = false;
|
||||
|
||||
while (!feof(pFile))
|
||||
{
|
||||
char buf[16384];
|
||||
|
||||
char* p = fgets(buf, sizeof(buf), pFile);
|
||||
if (!p)
|
||||
{
|
||||
if (feof(pFile))
|
||||
break;
|
||||
|
||||
fclose(pFile);
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string str(p);
|
||||
|
||||
if (first_line)
|
||||
{
|
||||
first_line = false;
|
||||
if ((str.size() >= 3) &&
|
||||
((uint8_t)str[0] == UTF8_BOM0) &&
|
||||
((uint8_t)str[1] == UTF8_BOM1) &&
|
||||
((uint8_t)str[2] == UTF8_BOM2))
|
||||
{
|
||||
if (pUTF8_flag)
|
||||
*pUTF8_flag = true;
|
||||
str.erase(0, 3);
|
||||
}
|
||||
}
|
||||
|
||||
while (str.size() && ((str.back() == '\n') || (str.back() == '\r')))
|
||||
str.pop_back();
|
||||
|
||||
if (trim_lines)
|
||||
string_trim_end(str);
|
||||
|
||||
lines.push_back(str);
|
||||
}
|
||||
|
||||
fclose(pFile);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool read_text_file(const char* pFilename, std::vector<uint8_t>& buf, bool& utf8_flag)
|
||||
{
|
||||
utf8_flag = false;
|
||||
|
||||
FILE* pFile = ufopen(pFilename, "rb");
|
||||
if (!pFile)
|
||||
{
|
||||
ufprintf(stderr, "Failed reading file %s!\n", pFilename);
|
||||
return false;
|
||||
}
|
||||
|
||||
fseek(pFile, 0, SEEK_END);
|
||||
uint64_t filesize = _ftelli64(pFile);
|
||||
fseek(pFile, 0, SEEK_SET);
|
||||
|
||||
buf.resize(filesize + 1);
|
||||
fread(&buf[0], 1, filesize, pFile);
|
||||
|
||||
fclose(pFile);
|
||||
|
||||
if ((buf.size() >= 3) &&
|
||||
((uint8_t)buf[0] == UTF8_BOM0) &&
|
||||
((uint8_t)buf[1] == UTF8_BOM1) &&
|
||||
((uint8_t)buf[2] == UTF8_BOM2))
|
||||
{
|
||||
utf8_flag = true;
|
||||
|
||||
buf.erase(buf.begin(), buf.begin() + 3);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool write_text_file(const char* pFilename, string_vec& lines, bool utf8_bom)
|
||||
{
|
||||
FILE* pFile = ufopen(pFilename, "wb");
|
||||
if (!pFile)
|
||||
return false;
|
||||
|
||||
if (utf8_bom)
|
||||
{
|
||||
if ((fputc(UTF8_BOM0, pFile) == EOF) || (fputc(UTF8_BOM1, pFile) == EOF) || (fputc(UTF8_BOM2, pFile) == EOF))
|
||||
{
|
||||
fclose(pFile);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < lines.size(); i++)
|
||||
{
|
||||
if ((fwrite(lines[i].c_str(), lines[i].size(), 1, pFile) != 1) || (fwrite("\r\n", 2, 1, pFile) != 1))
|
||||
{
|
||||
fclose(pFile);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (fclose(pFile) == EOF)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool serialize_to_json_file(const char* pFilename, const json& j, bool utf8_bom)
|
||||
{
|
||||
FILE* pFile = ufopen(pFilename, "wb");
|
||||
if (!pFile)
|
||||
return false;
|
||||
|
||||
if (utf8_bom)
|
||||
{
|
||||
if ((fputc(UTF8_BOM0, pFile) == EOF) || (fputc(UTF8_BOM1, pFile) == EOF) || (fputc(UTF8_BOM2, pFile) == EOF))
|
||||
{
|
||||
fclose(pFile);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
std::string d(j.dump(2));
|
||||
|
||||
if (d.size())
|
||||
{
|
||||
if (fwrite(&d[0], d.size(), 1, pFile) != 1)
|
||||
{
|
||||
fclose(pFile);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(pFile);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Note: This doesn't actually handle utf8. It assumes ANSI (code page 252) text input.
|
||||
static std::string extract_column_text(const std::string& str, uint32_t ofs, uint32_t len)
|
||||
{
|
||||
if (ofs >= str.size())
|
||||
return "";
|
||||
|
||||
const uint32_t max_len = std::min((uint32_t)str.size() - ofs, len);
|
||||
|
||||
std::string res(str);
|
||||
if (ofs)
|
||||
res.erase(0, ofs);
|
||||
|
||||
if (max_len < res.size())
|
||||
res.erase(max_len, res.size());
|
||||
|
||||
string_trim(res);
|
||||
return res;
|
||||
}
|
||||
|
||||
// Note: This doesn't actually handle utf8. It assumes ANSI (code page 252) text input.
|
||||
bool load_column_text(const char* pFilename, std::vector<string_vec>& rows, std::string& title, string_vec& col_titles)
|
||||
{
|
||||
string_vec lines;
|
||||
bool utf8_flag = false;
|
||||
if (!read_text_file(pFilename, lines, utf8_flag))
|
||||
panic("Failed reading text file %s", pFilename);
|
||||
|
||||
if (utf8_flag)
|
||||
panic("load_column_text() doesn't support utf8 yet");
|
||||
|
||||
if (!lines.size() || !lines[0].size())
|
||||
panic("Expected title");
|
||||
|
||||
if (lines.size() < 6)
|
||||
panic("File too small");
|
||||
|
||||
for (uint32_t i = 0; i < lines.size(); i++)
|
||||
{
|
||||
if (lines[i].find_first_of(9) != std::string::npos)
|
||||
panic("Tab in file");
|
||||
|
||||
string_trim(lines[i]);
|
||||
}
|
||||
|
||||
title = lines[0];
|
||||
|
||||
if (lines[1].size())
|
||||
panic("Expected empty line");
|
||||
|
||||
std::string col_line = lines[2];
|
||||
|
||||
std::string col_seps = lines[3];
|
||||
if ((!col_seps.size()) || (col_seps[0] != '-') || (col_seps.back() != '-'))
|
||||
panic("Invalid column seperator line");
|
||||
|
||||
for (uint32_t i = 0; i < col_seps.size(); i++)
|
||||
{
|
||||
const uint8_t c = col_seps[i];
|
||||
if ((c != ' ') && (c != '-'))
|
||||
panic("Invalid column separator line");
|
||||
}
|
||||
|
||||
int col_sep_start = 0;
|
||||
std::vector< std::pair<uint32_t, uint32_t> > column_info; // start offset and len of each column in chars
|
||||
|
||||
for (uint32_t i = 1; i < col_seps.size(); i++)
|
||||
{
|
||||
const uint8_t c = col_seps[i];
|
||||
if (c == ' ')
|
||||
{
|
||||
if (col_sep_start != -1)
|
||||
{
|
||||
column_info.push_back(std::make_pair(col_sep_start, i - col_sep_start));
|
||||
col_sep_start = -1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (col_sep_start == -1)
|
||||
col_sep_start = i;
|
||||
}
|
||||
}
|
||||
|
||||
if (col_sep_start != -1)
|
||||
{
|
||||
column_info.push_back(std::make_pair(col_sep_start, (uint32_t)col_seps.size() - col_sep_start));
|
||||
col_sep_start = -1;
|
||||
}
|
||||
|
||||
if (!column_info.size())
|
||||
panic("No columns found");
|
||||
|
||||
col_titles.resize(column_info.size());
|
||||
for (uint32_t i = 0; i < column_info.size(); i++)
|
||||
{
|
||||
col_titles[i] = col_line;
|
||||
|
||||
if (column_info[i].first)
|
||||
col_titles[i].erase(0, column_info[i].first);
|
||||
|
||||
col_titles[i].erase(column_info[i].second, col_titles[i].size() - column_info[i].second);
|
||||
string_trim(col_titles[i]);
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < column_info.size() - 1; i++)
|
||||
column_info[i].second = column_info[i + 1].first - column_info[i].first;
|
||||
column_info.back().second = 32000;
|
||||
|
||||
uint32_t cur_line = 4;
|
||||
|
||||
uint32_t cur_record_index = 0;
|
||||
|
||||
while (cur_line < lines.size())
|
||||
{
|
||||
string_vec rec_lines;
|
||||
rec_lines.push_back(lines[cur_line++]);
|
||||
|
||||
while (cur_line < lines.size())
|
||||
{
|
||||
if (!lines[cur_line].size())
|
||||
break;
|
||||
|
||||
rec_lines.push_back(lines[cur_line++]);
|
||||
}
|
||||
|
||||
// cur_line should be blank, or we're at the end of the file
|
||||
if (cur_line < lines.size())
|
||||
{
|
||||
cur_line++;
|
||||
if (cur_line < lines.size())
|
||||
{
|
||||
if (!lines[cur_line].size())
|
||||
panic("Expected non-empty line");
|
||||
}
|
||||
}
|
||||
|
||||
uprintf("%u:\n", cur_record_index);
|
||||
//for (uint32_t i = 0; i < rec_lines.size(); i++)
|
||||
// uprintf("%s\n", rec_lines[i].c_str());
|
||||
|
||||
string_vec col_lines(column_info.size());
|
||||
|
||||
for (uint32_t col_index = 0; col_index < column_info.size(); col_index++)
|
||||
{
|
||||
for (uint32_t l = 0; l < rec_lines.size(); l++)
|
||||
{
|
||||
std::string col_text(extract_column_text(rec_lines[l], column_info[col_index].first, column_info[col_index].second));
|
||||
|
||||
if (col_text.size())
|
||||
{
|
||||
if (col_lines[col_index].size())
|
||||
{
|
||||
if ((col_lines[col_index].back() != '-') && ((uint8_t)col_lines[col_index].back() != ANSI_SOFT_HYPHEN))
|
||||
col_lines[col_index].push_back(' ');
|
||||
else
|
||||
{
|
||||
if ((col_lines[col_index].size() >= 2) && (!isdigit((uint8_t)col_lines[col_index][col_lines[col_index].size() - 2])))
|
||||
col_lines[col_index].pop_back();
|
||||
}
|
||||
}
|
||||
|
||||
col_lines[col_index] += col_text;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Convert from ANSI (code page 252) to UTF8.
|
||||
for (auto& l : col_lines)
|
||||
l = ansi_to_utf8(l);
|
||||
|
||||
for (uint32_t col_index = 0; col_index < column_info.size(); col_index++)
|
||||
{
|
||||
uprintf("%s\n", col_lines[col_index].c_str());
|
||||
}
|
||||
|
||||
uprintf("\n");
|
||||
|
||||
cur_record_index++;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool invoke_curl(const std::string& args, string_vec& reply)
|
||||
{
|
||||
reply.clear();
|
||||
|
||||
remove("__temp.html");
|
||||
|
||||
// Invoke curl.exe
|
||||
std::string cmd(string_format("curl.exe \"%s\" -o __temp.html", args.c_str()));
|
||||
uprintf("Command: %s\n", cmd.c_str());
|
||||
|
||||
int status = system(cmd.c_str());
|
||||
uprintf("curl returned status %i\n", status);
|
||||
|
||||
if (status != EXIT_SUCCESS)
|
||||
return false;
|
||||
|
||||
// Read output file.
|
||||
|
||||
FILE* pFile = ufopen("__temp.html", "rb");
|
||||
if (!pFile)
|
||||
{
|
||||
Sleep(50);
|
||||
pFile = ufopen("__temp.html", "rb");
|
||||
if (!pFile)
|
||||
return false;
|
||||
}
|
||||
|
||||
uint8_t buf[6] = { 0,0,0,0,0,0 };
|
||||
fread(buf, 5, 1, pFile);
|
||||
fclose(pFile);
|
||||
|
||||
// Try to detect some common binary file types
|
||||
|
||||
// PDF
|
||||
if (memcmp(buf, "%PDF-", 5) == 0)
|
||||
{
|
||||
uprintf("PDF file detected\n");
|
||||
|
||||
std::string filename(args);
|
||||
for (size_t i = filename.size() - 1; i >= 0; i--)
|
||||
{
|
||||
if (filename[i] == '/')
|
||||
{
|
||||
filename.erase(0, i + 1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
std::string new_link_deescaped;
|
||||
for (uint32_t i = 0; i < filename.size(); i++)
|
||||
{
|
||||
uint8_t c = filename[i];
|
||||
if ((c == '%') && ((i + 2) < filename.size()))
|
||||
{
|
||||
int da = convert_hex_digit(filename[i + 1]);
|
||||
int db = convert_hex_digit(filename[i + 2]);
|
||||
if (da >= 0 && db >= 0)
|
||||
{
|
||||
int val = da * 16 + db;
|
||||
new_link_deescaped.push_back((uint8_t)val);
|
||||
}
|
||||
|
||||
i += 2;
|
||||
}
|
||||
else
|
||||
new_link_deescaped.push_back(c);
|
||||
}
|
||||
|
||||
rename("__temp.html", new_link_deescaped.c_str());
|
||||
uprintf("Renamed __temp.html to %s\n", new_link_deescaped.c_str());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// JPEG
|
||||
if (memcmp(buf, "\xFF\xD8\xFF\xE0", 4) == 0)
|
||||
{
|
||||
uprintf("JPEG file detected\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!read_text_file("__temp.html", reply))
|
||||
{
|
||||
// Wait a bit and try again, rarely needed under Windows.
|
||||
Sleep(50);
|
||||
if (!read_text_file("__temp.html", reply))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void convert_args_to_utf8(string_vec& args, int argc, wchar_t* argv[])
|
||||
{
|
||||
args.resize(argc);
|
||||
|
||||
for (int i = 0; i < argc; i++)
|
||||
args[i] = wchar_to_utf8(argv[i]);
|
||||
}
|
207
utils.h
Normal file
207
utils.h
Normal file
@ -0,0 +1,207 @@
|
||||
// utils.h
|
||||
// Copyright (C) 2023 Richard Geldreich, Jr.
|
||||
#pragma once
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning (disable:4100) // unreferenced formal parameter
|
||||
#pragma warning (disable:4505) // unreferenced function with internal linkage has been removed)
|
||||
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#define NOMINMAX
|
||||
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <io.h>
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
#include <cstdint>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <varargs.h>
|
||||
#include <string>
|
||||
|
||||
#include <unordered_set>
|
||||
|
||||
//#include "pjson.h"
|
||||
|
||||
#include "libsoldout/markdown.h"
|
||||
|
||||
#include "json/json.hpp"
|
||||
using json = nlohmann::json;
|
||||
|
||||
typedef std::vector<std::string> string_vec;
|
||||
typedef std::unordered_set<std::string> unordered_string_set;
|
||||
typedef std::vector<uint8_t> uint8_vec;
|
||||
typedef std::pair<std::string, std::string> string_pair;
|
||||
|
||||
const uint32_t UTF8_BOM0 = 0xEF, UTF8_BOM1 = 0xBB, UTF8_BOM2 = 0xBF;
|
||||
|
||||
// Code page 1242 (ANSI) soft hyphen character.
|
||||
// See http://www.alanwood.net/demos/ansi.html
|
||||
const uint32_t ANSI_SOFT_HYPHEN = 0xAD;
|
||||
|
||||
void panic(const char* pMsg, ...);
|
||||
|
||||
//------------------------------------------------------------------
|
||||
|
||||
inline bool string_is_digits(const std::string& s)
|
||||
{
|
||||
for (char c : s)
|
||||
if (!isdigit((uint8_t)c))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool string_is_alpha(const std::string& s)
|
||||
{
|
||||
for (char c : s)
|
||||
if (!isalpha((uint8_t)c))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
std::string combine_strings(std::string a, const std::string& b);
|
||||
|
||||
// Convert an UTF8 string to a wide Unicode String
|
||||
std::wstring utf8_to_wchar(const std::string& str, UINT code_page = CP_UTF8);
|
||||
|
||||
// Convert a wide Unicode string to an UTF8 string
|
||||
std::string wchar_to_utf8(const std::wstring& wstr, UINT code_page = CP_UTF8);
|
||||
|
||||
inline std::string ansi_to_utf8(const std::string& str) { return wchar_to_utf8(utf8_to_wchar(str, 1252)); }
|
||||
|
||||
// Code page 437 to utf8. WideCharToMultiByte etc. doesn't do the expecting thing for chars<32, and we need them.
|
||||
std::string dos_to_utf8(const std::string& str);
|
||||
|
||||
// utf8 string format
|
||||
bool vformat(std::vector<char>& buf, const char* pFmt, va_list args);
|
||||
|
||||
// utf8 printf to FILE*
|
||||
void ufprintf(FILE* pFile, const char* pFmt, ...);
|
||||
|
||||
// utf8 print to stdout
|
||||
void uprintf(const char* pFmt, ...);
|
||||
|
||||
std::string string_format(const char* pMsg, ...);
|
||||
|
||||
void panic(const char* pMsg, ...);
|
||||
|
||||
// Open a file given a utf8 filename
|
||||
FILE* ufopen(const char* pFilename, const char* pMode);
|
||||
|
||||
// like tolower() but doesn't assert on negative values and doesn't factor in locale
|
||||
inline char utolower(char c)
|
||||
{
|
||||
if ((c >= 'A') && (c <= 'Z'))
|
||||
return (c - 'A') + 'a';
|
||||
return c;
|
||||
}
|
||||
|
||||
// like toupper() but doesn't assert on negative values and doesn't factor in locale
|
||||
inline char utoupper(char c)
|
||||
{
|
||||
if ((c >= 'a') && (c <= 'z'))
|
||||
return (c - 'a') + 'A';
|
||||
return c;
|
||||
}
|
||||
|
||||
// like isdigit() but doesn't assert on negative values and doesn't factor in locale
|
||||
inline bool uisdigit(char c)
|
||||
{
|
||||
return (c >= '0') && (c <= '9');
|
||||
}
|
||||
|
||||
// like isupper() but doesn't assert on negative values and doesn't factor in locale
|
||||
inline bool uisupper(char c)
|
||||
{
|
||||
return (c >= 'A') && (c <= 'Z');
|
||||
}
|
||||
|
||||
// like islower() but doesn't assert on negative values and doesn't factor in locale
|
||||
inline bool uislower(char c)
|
||||
{
|
||||
return (c >= 'a') && (c <= 'z');
|
||||
}
|
||||
|
||||
// like isalpha() but doesn't assert on negative values and doesn't factor in locale
|
||||
inline bool uisalpha(char c)
|
||||
{
|
||||
return uisupper(c) || uislower(c);
|
||||
}
|
||||
|
||||
inline int convert_hex_digit(int d)
|
||||
{
|
||||
if ((d >= 'a') && (d <= 'f'))
|
||||
return (d - 'a') + 10;
|
||||
else if ((d >= 'A') && (d <= 'F'))
|
||||
return (d - 'A') + 10;
|
||||
else if ((d >= '0') && (d <= '9'))
|
||||
return d - '0';
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
inline std::string string_lower(std::string str)
|
||||
{
|
||||
for (char& c : str)
|
||||
c = utolower(c);
|
||||
return str;
|
||||
}
|
||||
|
||||
inline std::string string_upper(std::string str)
|
||||
{
|
||||
for (char& c : str)
|
||||
c = utoupper(c);
|
||||
return str;
|
||||
}
|
||||
|
||||
std::string& string_trim(std::string& str);
|
||||
|
||||
std::string& string_trim_end(std::string& str);
|
||||
|
||||
// Case sensitive, returns -1 if can't find
|
||||
int string_find_first(const std::string& str, const char* pPhrase);
|
||||
|
||||
int string_icompare(const std::string& a, const char* pB);
|
||||
|
||||
// Case insensitive
|
||||
bool string_begins_with(const std::string& str, const char* pPhrase);
|
||||
|
||||
// Case insensitive
|
||||
bool string_ends_in(const std::string& str, const char* pPhrase);
|
||||
|
||||
inline char to_hex(uint32_t val)
|
||||
{
|
||||
assert(val <= 15);
|
||||
return (char)((val <= 9) ? ('0' + val) : ('A' + val - 10));
|
||||
}
|
||||
|
||||
std::string encode_url(const std::string& url);
|
||||
|
||||
uint32_t crc32(const uint8_t* pBuf, size_t size, uint32_t init_crc = 0);
|
||||
|
||||
bool read_binary_file(const char* pFilename, uint8_vec& buf);
|
||||
|
||||
bool read_text_file(const char* pFilename, string_vec& lines, bool trim_lines = true, bool* pUTF8_flag = nullptr);
|
||||
|
||||
bool read_text_file(const char* pFilename, std::vector<uint8_t>& buf, bool& utf8_flag);
|
||||
|
||||
bool write_text_file(const char* pFilename, string_vec& lines, bool utf8_bom = true);
|
||||
|
||||
bool serialize_to_json_file(const char* pFilename, const json& j, bool utf8_bom);
|
||||
|
||||
bool load_column_text(const char* pFilename, std::vector<string_vec>& rows, std::string& title, string_vec& col_titles);
|
||||
|
||||
bool invoke_curl(const std::string& args, string_vec& reply);
|
||||
|
||||
void convert_args_to_utf8(string_vec& args, int argc, wchar_t* argv[]);
|
Loading…
Reference in New Issue
Block a user