mirror of
https://github.com/richgel999/ufo_data.git
synced 2025-06-19 03:39:14 -04:00
new files
This commit is contained in:
parent
3f5d0d8f18
commit
650909370f
13 changed files with 12276 additions and 0 deletions
608
markdown_proc.cpp
Normal file
608
markdown_proc.cpp
Normal file
|
@ -0,0 +1,608 @@
|
|||
// Copyright (C) 2023 Richard Geldreich, Jr.
|
||||
// markdown_proc.cpp
|
||||
#include "markdown_proc.h"
|
||||
|
||||
static bool markdown_should_escape(int c)
|
||||
{
|
||||
switch (c)
|
||||
{
|
||||
case '\\':
|
||||
case '`':
|
||||
case '*':
|
||||
case '_':
|
||||
case '{':
|
||||
case '}':
|
||||
case '[':
|
||||
case ']':
|
||||
case '<':
|
||||
case '>':
|
||||
case '(':
|
||||
case ')':
|
||||
case '#':
|
||||
//case '-':
|
||||
//case '.':
|
||||
//case '!':
|
||||
case '|':
|
||||
return true;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static std::string escape_markdown(const std::string& str)
|
||||
{
|
||||
std::string out;
|
||||
|
||||
for (uint32_t i = 0; i < str.size(); i++)
|
||||
{
|
||||
uint8_t c = str[i];
|
||||
|
||||
if (markdown_should_escape(c))
|
||||
out.push_back('\\');
|
||||
|
||||
out.push_back(c);
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
markdown_text_processor::markdown_text_processor()
|
||||
{
|
||||
}
|
||||
|
||||
void markdown_text_processor::clear()
|
||||
{
|
||||
m_text.clear();
|
||||
m_details.clear();
|
||||
m_links.clear();
|
||||
}
|
||||
|
||||
void markdown_text_processor::fix_redirect_urls()
|
||||
{
|
||||
for (uint32_t link_index = 0; link_index < m_links.size(); link_index++)
|
||||
{
|
||||
const char* pPrefix = "https://www.google.com/url?q=";
|
||||
|
||||
if (!string_begins_with(m_links[link_index], pPrefix))
|
||||
continue;
|
||||
|
||||
size_t p;
|
||||
if ((p = m_links[link_index].find("&sa=D&source=editors&ust=")) == std::string::npos)
|
||||
continue;
|
||||
|
||||
size_t r = m_links[link_index].find("&usg=");
|
||||
if ((r == std::string::npos) || (r < p))
|
||||
continue;
|
||||
|
||||
if ((r - p) != 41)
|
||||
continue;
|
||||
|
||||
if ((m_links[link_index].size() - r) != 33)
|
||||
continue;
|
||||
|
||||
if ((m_links[link_index].size() - p) != 74)
|
||||
continue;
|
||||
|
||||
std::string new_link(m_links[link_index]);
|
||||
new_link.erase(p, new_link.size() - p);
|
||||
|
||||
new_link.erase(0, strlen(pPrefix));
|
||||
|
||||
// De-escape the string
|
||||
std::string new_link_deescaped;
|
||||
for (uint32_t i = 0; i < new_link.size(); i++)
|
||||
{
|
||||
uint8_t c = new_link[i];
|
||||
if ((c == '%') && ((i + 2) < new_link.size()))
|
||||
{
|
||||
int da = convert_hex_digit(new_link[i + 1]);
|
||||
int db = convert_hex_digit(new_link[i + 2]);
|
||||
if (da >= 0 && db >= 0)
|
||||
{
|
||||
int val = da * 16 + db;
|
||||
new_link_deescaped.push_back((uint8_t)val);
|
||||
}
|
||||
|
||||
i += 2;
|
||||
}
|
||||
else
|
||||
new_link_deescaped.push_back(c);
|
||||
}
|
||||
|
||||
//printf("%s\n", new_link.c_str());
|
||||
|
||||
m_links[link_index] = new_link_deescaped;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < m_links.size(); i++)
|
||||
m_links[i] = encode_url(m_links[i]);
|
||||
}
|
||||
|
||||
void markdown_text_processor::init_from_markdown(const char* pText)
|
||||
{
|
||||
struct buf* pIn = bufnew(4096);
|
||||
bufputs(pIn, pText);
|
||||
|
||||
struct buf* pOut = bufnew(4096);
|
||||
markdown(pOut, pIn, &mkd_parse);
|
||||
|
||||
std::string buf;
|
||||
buf.append((char*)pOut->data, pOut->size);
|
||||
|
||||
init_from_codes(buf);
|
||||
|
||||
bufrelease(pIn);
|
||||
bufrelease(pOut);
|
||||
}
|
||||
|
||||
bool markdown_text_processor::split_in_half(uint32_t ofs, markdown_text_processor& a, markdown_text_processor& b) const
|
||||
{
|
||||
assert((this != &a) && (this != &b));
|
||||
|
||||
if (m_details[ofs].m_emphasis != 0)
|
||||
return false;
|
||||
|
||||
a.m_text = m_text;
|
||||
a.m_details = m_details;
|
||||
a.m_links = m_links;
|
||||
|
||||
b.m_text = m_text;
|
||||
b.m_details = m_details;
|
||||
b.m_links = m_links;
|
||||
|
||||
a.m_text.erase(ofs, a.m_text.size() - ofs);
|
||||
a.m_details.erase(a.m_details.begin() + ofs, a.m_details.end());
|
||||
|
||||
b.m_text.erase(0, ofs);
|
||||
b.m_details.erase(b.m_details.begin(), b.m_details.begin() + ofs);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
uint32_t markdown_text_processor::count_char_in_text(uint8_t c) const
|
||||
{
|
||||
uint32_t num = 0;
|
||||
for (uint32_t i = 0; i < m_text.size(); i++)
|
||||
{
|
||||
if ((uint8_t)m_text[i] == c)
|
||||
num++;
|
||||
}
|
||||
return num;
|
||||
}
|
||||
|
||||
bool markdown_text_processor::split_last_parens(markdown_text_processor& a, markdown_text_processor& b) const
|
||||
{
|
||||
a.clear();
|
||||
b.clear();
|
||||
|
||||
if (!m_text.size())
|
||||
return false;
|
||||
|
||||
int ofs = (int)m_text.size() - 1;
|
||||
while ((m_text[ofs] == '\n') || (m_text[ofs] == ' '))
|
||||
{
|
||||
if (!ofs)
|
||||
return false;
|
||||
ofs--;
|
||||
}
|
||||
|
||||
if (m_text[ofs] == '.')
|
||||
{
|
||||
if (!ofs)
|
||||
return false;
|
||||
|
||||
ofs--;
|
||||
}
|
||||
|
||||
if (m_text[ofs] != ')')
|
||||
return false;
|
||||
|
||||
int level = 0;
|
||||
while (ofs >= 0)
|
||||
{
|
||||
uint8_t c = (uint8_t)m_text[ofs];
|
||||
|
||||
if (c == ')')
|
||||
level++;
|
||||
else if (c == '(')
|
||||
{
|
||||
level--;
|
||||
if (!level)
|
||||
break;
|
||||
}
|
||||
|
||||
ofs--;
|
||||
}
|
||||
if (ofs < 0)
|
||||
return false;
|
||||
|
||||
return split_in_half(ofs, a, b);
|
||||
}
|
||||
|
||||
void markdown_text_processor::convert_to_plain(std::string& out, bool trim_end) const
|
||||
{
|
||||
for (uint32_t i = 0; i < m_text.size(); i++)
|
||||
{
|
||||
uint8_t c = m_text[i];
|
||||
|
||||
assert((c == '\n') || (c == '\t') || (c >= 32));
|
||||
|
||||
out.push_back(c);
|
||||
}
|
||||
|
||||
if (trim_end)
|
||||
{
|
||||
while (out.size() && out.back() == '\n')
|
||||
out.pop_back();
|
||||
|
||||
string_trim_end(out);
|
||||
}
|
||||
}
|
||||
|
||||
void markdown_text_processor::convert_to_markdown(std::string& out, bool trim_end) const
|
||||
{
|
||||
int emphasis = 0, emphasis_amount = 0;
|
||||
int cur_link_index = -1;
|
||||
|
||||
for (uint32_t text_ofs = 0; text_ofs < m_text.size(); text_ofs++)
|
||||
{
|
||||
if (m_details[text_ofs].m_link_index != -1)
|
||||
{
|
||||
// Inside link at current position
|
||||
|
||||
if (cur_link_index == -1)
|
||||
{
|
||||
// Not currently inside a link, so start a new link
|
||||
|
||||
handle_html(out, text_ofs);
|
||||
|
||||
out.push_back('[');
|
||||
|
||||
// Beginning new link
|
||||
handle_emphasis(out, text_ofs, emphasis, emphasis_amount);
|
||||
}
|
||||
else if (cur_link_index != m_details[text_ofs].m_link_index)
|
||||
{
|
||||
// Switching to different link, so flush current link and start a new one
|
||||
handle_emphasis(out, text_ofs, emphasis, emphasis_amount);
|
||||
|
||||
out += "](";
|
||||
|
||||
for (uint32_t j = 0; j < m_links[cur_link_index].size(); j++)
|
||||
{
|
||||
uint8_t c = m_links[cur_link_index][j];
|
||||
if (markdown_should_escape(c))
|
||||
out.push_back('\\');
|
||||
out.push_back(c);
|
||||
}
|
||||
|
||||
out.push_back(')');
|
||||
|
||||
handle_html(out, text_ofs);
|
||||
|
||||
out.push_back('[');
|
||||
}
|
||||
else
|
||||
{
|
||||
// Currently inside a link which hasn't changed
|
||||
|
||||
handle_html(out, text_ofs);
|
||||
|
||||
handle_emphasis(out, text_ofs, emphasis, emphasis_amount);
|
||||
}
|
||||
|
||||
cur_link_index = m_details[text_ofs].m_link_index;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Not inside link at current position
|
||||
|
||||
if (cur_link_index != -1)
|
||||
{
|
||||
// Flush current link
|
||||
handle_emphasis(out, text_ofs, emphasis, emphasis_amount);
|
||||
|
||||
out += "](";
|
||||
|
||||
for (uint32_t j = 0; j < m_links[cur_link_index].size(); j++)
|
||||
{
|
||||
uint8_t c = m_links[cur_link_index][j];
|
||||
if (markdown_should_escape(c))
|
||||
out.push_back('\\');
|
||||
out.push_back(c);
|
||||
}
|
||||
|
||||
out.push_back(')');
|
||||
|
||||
handle_html(out, text_ofs);
|
||||
|
||||
cur_link_index = -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
handle_html(out, text_ofs);
|
||||
|
||||
handle_emphasis(out, text_ofs, emphasis, emphasis_amount);
|
||||
}
|
||||
}
|
||||
|
||||
if (m_details[text_ofs].m_linebreak)
|
||||
{
|
||||
out.push_back(' ');
|
||||
|
||||
// One space will already be in the text.
|
||||
//out.push_back(' ');
|
||||
}
|
||||
|
||||
uint8_t c = m_text[text_ofs];
|
||||
if (markdown_should_escape(c))
|
||||
{
|
||||
// Markdown escape
|
||||
out.push_back('\\');
|
||||
}
|
||||
|
||||
out.push_back(c);
|
||||
}
|
||||
|
||||
if (emphasis != 0)
|
||||
{
|
||||
// Flush last emphasis
|
||||
for (int j = 0; j < emphasis_amount; j++)
|
||||
out.push_back((uint8_t)emphasis);
|
||||
}
|
||||
emphasis = 0;
|
||||
emphasis_amount = 0;
|
||||
|
||||
if (cur_link_index != -1)
|
||||
{
|
||||
// Flush last link
|
||||
out += "](";
|
||||
|
||||
for (uint32_t j = 0; j < m_links[cur_link_index].size(); j++)
|
||||
{
|
||||
uint8_t c = m_links[cur_link_index][j];
|
||||
if (markdown_should_escape(c))
|
||||
out.push_back('\\');
|
||||
out.push_back(c);
|
||||
}
|
||||
|
||||
out.push_back(')');
|
||||
cur_link_index = -1;
|
||||
}
|
||||
|
||||
if (m_details.size() > m_text.size())
|
||||
{
|
||||
if (m_details.size() != m_text.size() + 1)
|
||||
panic("details array too large");
|
||||
|
||||
if (m_details.back().m_html.size())
|
||||
{
|
||||
for (uint32_t i = 0; i < m_details.back().m_html.size(); i++)
|
||||
out += m_details.back().m_html[i];
|
||||
}
|
||||
}
|
||||
|
||||
if (trim_end)
|
||||
{
|
||||
while (out.size() && out.back() == '\n')
|
||||
out.pop_back();
|
||||
|
||||
string_trim_end(out);
|
||||
}
|
||||
}
|
||||
|
||||
void markdown_text_processor::ensure_detail_ofs(uint32_t ofs)
|
||||
{
|
||||
if (m_details.size() <= ofs)
|
||||
m_details.resize(ofs + 1);
|
||||
}
|
||||
|
||||
void markdown_text_processor::init_from_codes(const std::string& buf)
|
||||
{
|
||||
m_text.resize(0);
|
||||
m_details.resize(0);
|
||||
m_links.resize(0);
|
||||
|
||||
parse_block(buf);
|
||||
}
|
||||
|
||||
void markdown_text_processor::parse_block(const std::string& buf)
|
||||
{
|
||||
uint32_t cur_ofs = 0;
|
||||
while (cur_ofs < buf.size())
|
||||
{
|
||||
uint8_t sig = (uint8_t)buf[cur_ofs];
|
||||
|
||||
if (sig != markdown::cCodeSig)
|
||||
panic("Expected code block signature");
|
||||
|
||||
cur_ofs++;
|
||||
if (cur_ofs == buf.size())
|
||||
panic("Premature end of buffer");
|
||||
|
||||
uint8_t code_type = (uint8_t)buf[cur_ofs];
|
||||
cur_ofs++;
|
||||
|
||||
switch (code_type)
|
||||
{
|
||||
case markdown::cCodeLink:
|
||||
{
|
||||
const uint32_t link_size = markdown::get_len32(buf, cur_ofs);
|
||||
const uint32_t content_size = markdown::get_len32(buf, cur_ofs);
|
||||
|
||||
std::string link(markdown::get_string(buf, cur_ofs, link_size));
|
||||
std::string content(markdown::get_string(buf, cur_ofs, content_size));
|
||||
|
||||
const uint32_t link_index = (uint32_t)m_links.size();
|
||||
m_links.push_back(link);
|
||||
|
||||
const uint32_t start_text_ofs = (uint32_t)m_text.size();
|
||||
|
||||
parse_block(content);
|
||||
|
||||
const uint32_t end_text_ofs = (uint32_t)m_text.size();
|
||||
if (end_text_ofs)
|
||||
{
|
||||
ensure_detail_ofs(end_text_ofs - 1);
|
||||
|
||||
for (uint32_t i = start_text_ofs; i < end_text_ofs; i++)
|
||||
m_details[i].m_link_index = link_index;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case markdown::cCodeEmphasis:
|
||||
{
|
||||
if (cur_ofs >= buf.size())
|
||||
panic("Buffer too small");
|
||||
|
||||
const uint8_t c = (uint8_t)buf[cur_ofs++];
|
||||
|
||||
if (cur_ofs >= buf.size())
|
||||
panic("Buffer too small");
|
||||
|
||||
const uint32_t amount = (uint8_t)buf[cur_ofs++];
|
||||
|
||||
const uint32_t text_size = markdown::get_len32(buf, cur_ofs);
|
||||
|
||||
std::string text(markdown::get_string(buf, cur_ofs, text_size));
|
||||
|
||||
const uint32_t start_text_ofs = (uint32_t)m_text.size();
|
||||
|
||||
parse_block(text);
|
||||
|
||||
const uint32_t end_text_ofs = (uint32_t)m_text.size();
|
||||
|
||||
if (end_text_ofs)
|
||||
{
|
||||
ensure_detail_ofs(end_text_ofs - 1);
|
||||
|
||||
for (uint32_t i = start_text_ofs; i < end_text_ofs; i++)
|
||||
{
|
||||
m_details[i].m_emphasis = c;
|
||||
m_details[i].m_emphasis_amount = (uint8_t)amount;
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case markdown::cCodeText:
|
||||
{
|
||||
const uint32_t text_size = markdown::get_len32(buf, cur_ofs);
|
||||
std::string text(markdown::get_string(buf, cur_ofs, text_size));
|
||||
|
||||
for (size_t i = 0; i < text.size(); i++)
|
||||
{
|
||||
// value 1 is written by the markdown parser when it wants to delete a \n
|
||||
if (text[i] != 1)
|
||||
m_text.push_back(text[i]);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case markdown::cCodeParagraph:
|
||||
{
|
||||
const uint32_t text_size = markdown::get_len32(buf, cur_ofs);
|
||||
std::string text(markdown::get_string(buf, cur_ofs, text_size));
|
||||
|
||||
parse_block(text);
|
||||
|
||||
m_text += "\n";
|
||||
m_text += "\n";
|
||||
|
||||
ensure_detail_ofs((uint32_t)m_text.size() - 1);
|
||||
m_details[m_text.size() - 1].m_end_paragraph = true;
|
||||
|
||||
break;
|
||||
}
|
||||
case markdown::cCodeLinebreak:
|
||||
{
|
||||
m_text += "\n";
|
||||
|
||||
ensure_detail_ofs((uint32_t)m_text.size() - 1);
|
||||
m_details[m_text.size() - 1].m_linebreak = true;
|
||||
|
||||
break;
|
||||
}
|
||||
case markdown::cCodeHTML:
|
||||
{
|
||||
const uint32_t text_size = markdown::get_len32(buf, cur_ofs);
|
||||
std::string text(markdown::get_string(buf, cur_ofs, text_size));
|
||||
|
||||
uint32_t ofs = (uint32_t)m_text.size();
|
||||
ensure_detail_ofs(ofs);
|
||||
m_details[ofs].m_html.push_back(text);
|
||||
|
||||
break;
|
||||
}
|
||||
default:
|
||||
panic("Invalid code");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (m_text.size())
|
||||
ensure_detail_ofs((uint32_t)m_text.size() - 1);
|
||||
}
|
||||
|
||||
void markdown_text_processor::handle_html(std::string& out, uint32_t text_ofs) const
|
||||
{
|
||||
// Any HTML appears before this character
|
||||
for (uint32_t i = 0; i < m_details[text_ofs].m_html.size(); i++)
|
||||
out += m_details[text_ofs].m_html[i];
|
||||
}
|
||||
|
||||
void markdown_text_processor::handle_emphasis(std::string& out, uint32_t text_ofs, int& emphasis, int& emphasis_amount) const
|
||||
{
|
||||
if (m_details[text_ofs].m_emphasis != 0)
|
||||
{
|
||||
// Desired emphasis
|
||||
if ((m_details[text_ofs].m_emphasis == emphasis) && (m_details[text_ofs].m_emphasis_amount == emphasis_amount))
|
||||
{
|
||||
// No change to emphasis
|
||||
|
||||
// Any HTML appears before this character
|
||||
//for (uint32_t i = 0; i < m_details[text_ofs].m_html.size(); i++)
|
||||
// out += m_details[text_ofs].m_html[i];
|
||||
}
|
||||
else
|
||||
{
|
||||
// Change to emphasis
|
||||
if (emphasis != 0)
|
||||
{
|
||||
// Flush out current emphasis
|
||||
for (int j = 0; j < emphasis_amount; j++)
|
||||
out.push_back((uint8_t)emphasis);
|
||||
}
|
||||
|
||||
// Any HTML appears before this character
|
||||
//for (uint32_t i = 0; i < m_details[text_ofs].m_html.size(); i++)
|
||||
// out += m_details[text_ofs].m_html[i];
|
||||
|
||||
emphasis = m_details[text_ofs].m_emphasis;
|
||||
emphasis_amount = m_details[text_ofs].m_emphasis_amount;
|
||||
|
||||
// Start new emphasis
|
||||
for (int j = 0; j < emphasis_amount; j++)
|
||||
out.push_back((uint8_t)emphasis);
|
||||
}
|
||||
}
|
||||
else if (m_details[text_ofs].m_emphasis == 0)
|
||||
{
|
||||
// Desires no emphasis
|
||||
if (emphasis != 0)
|
||||
{
|
||||
// Flush out current emphasis
|
||||
for (int j = 0; j < emphasis_amount; j++)
|
||||
out.push_back((uint8_t)emphasis);
|
||||
}
|
||||
emphasis = 0;
|
||||
emphasis_amount = 0;
|
||||
|
||||
// Any HTML appears before this character
|
||||
//for (uint32_t i = 0; i < m_details[text_ofs].m_html.size(); i++)
|
||||
// out += m_details[text_ofs].m_html[i];
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue