mirror of
https://github.com/richgel999/ufo_data.git
synced 2024-12-25 15:39:34 -05:00
1109 lines
28 KiB
C++
1109 lines
28 KiB
C++
// Copyright (C) 2023 Richard Geldreich, Jr.
|
|
// markdown_proc.cpp
|
|
#include "markdown_proc.h"
|
|
|
|
struct markdown
|
|
{
|
|
enum
|
|
{
|
|
cCodeSig = 0xFE,
|
|
|
|
cCodeLink = 1,
|
|
cCodeEmphasis,
|
|
cCodeText,
|
|
cCodeParagraph,
|
|
cCodeLinebreak,
|
|
cCodeHTML
|
|
};
|
|
|
|
static void bufappend(struct buf* out, struct buf* in)
|
|
{
|
|
assert(in != out);
|
|
|
|
if (in && in->size)
|
|
bufput(out, in->data, in->size);
|
|
}
|
|
|
|
static void writelen(struct buf* ob, uint32_t size)
|
|
{
|
|
bufputc(ob, (uint8_t)(size & 0xFF));
|
|
bufputc(ob, (uint8_t)((size >> 8) & 0xFF));
|
|
bufputc(ob, (uint8_t)((size >> 16) & 0xFF));
|
|
bufputc(ob, (uint8_t)((size >> 24) & 0xFF));
|
|
}
|
|
|
|
static std::string get_string(const std::string& buf, uint32_t& cur_ofs, uint32_t text_size)
|
|
{
|
|
std::string text;
|
|
if (cur_ofs + text_size > buf.size())
|
|
panic("Buffer too small");
|
|
|
|
text.append(buf.c_str() + cur_ofs, text_size);
|
|
cur_ofs += text_size;
|
|
|
|
return text;
|
|
}
|
|
|
|
static uint32_t get_len32(const std::string& buf, uint32_t& ofs)
|
|
{
|
|
if ((ofs + 4) > buf.size())
|
|
panic("Buffer too small");
|
|
|
|
uint32_t l = (uint8_t)buf[ofs] |
|
|
(((uint8_t)buf[ofs + 1]) << 8) |
|
|
(((uint8_t)buf[ofs + 2]) << 16) |
|
|
(((uint8_t)buf[ofs + 3]) << 24);
|
|
|
|
ofs += 4;
|
|
|
|
return l;
|
|
}
|
|
|
|
static void prolog(struct buf* ob, void* opaque)
|
|
{
|
|
}
|
|
|
|
static void epilog(struct buf* ob, void* opaque)
|
|
{
|
|
}
|
|
|
|
/* block level callbacks - NULL skips the block */
|
|
static void blockcode(struct buf* ob, struct buf* text, void* opaque)
|
|
{
|
|
#if 0
|
|
bufprintf(ob, "blockcode: \"%.*s\" ", (int)text->size, text->data);
|
|
#endif
|
|
panic("unsupported markdown feature");
|
|
}
|
|
|
|
static void blockquote(struct buf* ob, struct buf* text, void* opaque)
|
|
{
|
|
#if 0
|
|
bufprintf(ob, "blockquote: \"%.*s\" ", (int)text->size, text->data);
|
|
#endif
|
|
// TODO: unsupported block quotes (here for when we're converting to plain text)
|
|
//panic("unsupported markdown feature");
|
|
if (opaque)
|
|
*(bool*)opaque = true;
|
|
|
|
if (!text || !text->size)
|
|
return;
|
|
|
|
bufputc(ob, (uint8_t)cCodeSig);
|
|
bufputc(ob, (uint8_t)cCodeParagraph);
|
|
writelen(ob, (uint32_t)text->size);
|
|
bufappend(ob, text);
|
|
}
|
|
|
|
static void blockhtml(struct buf* ob, struct buf* text, void* opaque)
|
|
{
|
|
#if 0
|
|
bufprintf(ob, "blockhtml: \"%.*s\" ", (int)text->size, text->data);
|
|
#endif
|
|
// TODO: Not fully supported - just dropping it
|
|
//panic("unsupported markdown feature");
|
|
|
|
if (opaque)
|
|
*(bool*)opaque = true;
|
|
}
|
|
|
|
static void header(struct buf* ob, struct buf* text, int level, void* opaque)
|
|
{
|
|
#if 0
|
|
bufprintf(ob, "header: %i \"%.*s\" ", level, (int)text->size, text->data);
|
|
#endif
|
|
// TODO: Not fully supported
|
|
//panic("unsupported markdown feature");
|
|
if (opaque)
|
|
*(bool*)opaque = true;
|
|
|
|
bufputc(ob, (uint8_t)cCodeSig);
|
|
bufputc(ob, (uint8_t)cCodeParagraph);
|
|
writelen(ob, (uint32_t)text->size);
|
|
bufappend(ob, text);
|
|
}
|
|
|
|
static void hrule(struct buf* ob, void* opaque)
|
|
{
|
|
// TODO
|
|
//panic("unsupported markdown feature");
|
|
|
|
if (opaque)
|
|
*(bool*)opaque = true;
|
|
}
|
|
|
|
static void list(struct buf* ob, struct buf* text, int flags, void* opaque)
|
|
{
|
|
// TODO: not fully supporting lists (here for when we're converting to plain text)
|
|
//panic("unsupported markdown feature");
|
|
if (opaque)
|
|
*(bool*)opaque = true;
|
|
|
|
if (!text || !text->size)
|
|
return;
|
|
|
|
bufputc(ob, (uint8_t)cCodeSig);
|
|
bufputc(ob, (uint8_t)cCodeParagraph);
|
|
writelen(ob, (uint32_t)text->size);
|
|
bufappend(ob, text);
|
|
}
|
|
|
|
static void listitem(struct buf* ob, struct buf* text, int flags, void* opaque)
|
|
{
|
|
// TODO: not fully supporting lists (here for when we're converting to plain text)
|
|
//panic("unsupported markdown feature");
|
|
if (opaque)
|
|
*(bool*)opaque = true;
|
|
|
|
if (!text || !text->size)
|
|
return;
|
|
|
|
bufputc(ob, (uint8_t)cCodeSig);
|
|
bufputc(ob, (uint8_t)cCodeParagraph);
|
|
writelen(ob, (uint32_t)text->size);
|
|
bufappend(ob, text);
|
|
}
|
|
|
|
static void paragraph(struct buf* ob, struct buf* text, void* opaque)
|
|
{
|
|
#if 0
|
|
bufprintf(ob, "paragraph: \"%.*s\" ", (int)text->size, text->data);
|
|
#endif
|
|
if (!text || !text->size)
|
|
return;
|
|
|
|
bufputc(ob, (uint8_t)cCodeSig);
|
|
bufputc(ob, (uint8_t)cCodeParagraph);
|
|
writelen(ob, (uint32_t)text->size);
|
|
bufappend(ob, text);
|
|
}
|
|
|
|
static void table(struct buf* ob, struct buf* head_row, struct buf* rows, void* opaque)
|
|
{
|
|
#if 0
|
|
bufprintf(ob, "table: \"%.*s\" \"%.*s\" ", (int)head_row->size, head_row->data, (int)rows->size, rows->data);
|
|
#endif
|
|
//panic("unsupported markdown feature");
|
|
|
|
// TODO: not fully supported, just for plaintext conversion
|
|
if (opaque)
|
|
*(bool*)opaque = true;
|
|
}
|
|
|
|
static void table_cell(struct buf* ob, struct buf* text, int flags, void* opaque)
|
|
{
|
|
#if 0
|
|
bufprintf(ob, "table_cell: \"%.*s\" %i ", (int)text->size, text->data, flags);
|
|
#endif
|
|
//panic("unsupported markdown feature");
|
|
if (opaque)
|
|
*(bool*)opaque = true;
|
|
|
|
// TODO: not fully supported, just for plaintext conversion
|
|
if (!text || !text->size)
|
|
return;
|
|
|
|
bufputc(ob, (uint8_t)cCodeSig);
|
|
bufputc(ob, (uint8_t)cCodeParagraph);
|
|
writelen(ob, (uint32_t)text->size);
|
|
bufappend(ob, text);
|
|
}
|
|
|
|
static void table_row(struct buf* ob, struct buf* cells, int flags, void* opaque)
|
|
{
|
|
#if 0
|
|
bufprintf(ob, "table_row: \"%.*s\" %i ", (int)cells->size, cells->data, flags);
|
|
#endif
|
|
//panic("unsupported markdown feature");
|
|
// TODO: not fully supported, just for plaintext conversion
|
|
|
|
if (opaque)
|
|
*(bool*)opaque = true;
|
|
}
|
|
|
|
static int autolink(struct buf* ob, struct buf* link, enum mkd_autolink type, void* opaque)
|
|
{
|
|
#if 0
|
|
bufprintf(ob, "autolink: %u \"%.*s\" ", type, (int)link->size, link->data);
|
|
#endif
|
|
panic("unsupported markdown feature");
|
|
return 1;
|
|
}
|
|
|
|
static int codespan(struct buf* ob, struct buf* text, void* opaque)
|
|
{
|
|
#if 0
|
|
bufprintf(ob, "codespan: \"%.*s\" ", (int)text->size, text->data);
|
|
#endif
|
|
//panic("unsupported markdown feature");
|
|
if (opaque)
|
|
*(bool*)opaque = true;
|
|
|
|
bufputc(ob, (uint8_t)cCodeSig);
|
|
bufputc(ob, (uint8_t)cCodeText);
|
|
writelen(ob, (uint32_t)text->size);
|
|
bufappend(ob, text);
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int double_emphasis(struct buf* ob, struct buf* text, char c, void* opaque)
|
|
{
|
|
#if 0
|
|
bufprintf(ob, "double_emphasis: %u ('%c') [%.*s] ", c, c, (int)text->size, text->data);
|
|
#endif
|
|
if (!text || !text->size)
|
|
return 1;
|
|
|
|
bufputc(ob, (uint8_t)cCodeSig);
|
|
bufputc(ob, (uint8_t)cCodeEmphasis);
|
|
bufputc(ob, c);
|
|
bufputc(ob, 2);
|
|
writelen(ob, (uint32_t)text->size);
|
|
bufappend(ob, text);
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int emphasis(struct buf* ob, struct buf* text, char c, void* opaque)
|
|
{
|
|
#if 0
|
|
bufprintf(ob, "emphasis: %u ('%c') [%.*s] ", c, c, (int)text->size, text->data);
|
|
#endif
|
|
|
|
if (!text || !text->size)
|
|
return 1;
|
|
|
|
bufputc(ob, (uint8_t)cCodeSig);
|
|
bufputc(ob, (uint8_t)cCodeEmphasis);
|
|
bufputc(ob, c);
|
|
bufputc(ob, 1);
|
|
writelen(ob, (uint32_t)text->size);
|
|
bufappend(ob, text);
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int image(struct buf* ob, struct buf* link, struct buf* title, struct buf* alt, void* opaque)
|
|
{
|
|
#if 0
|
|
bufprintf(ob, "image: \"%.*s\" \"%.*s\" \"%.*s\" ",
|
|
(int)link->size, link->data,
|
|
(int)title->size, title->data,
|
|
(int)alt->size, alt->data);
|
|
#endif
|
|
//panic("unsupported markdown feature");
|
|
if (opaque)
|
|
*(bool*)opaque = true;
|
|
|
|
if (alt)
|
|
{
|
|
bufputc(ob, (uint8_t)cCodeSig);
|
|
bufputc(ob, (uint8_t)cCodeText);
|
|
writelen(ob, (uint32_t)alt->size);
|
|
bufappend(ob, alt);
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int linebreak(struct buf* ob, void* opaque)
|
|
{
|
|
#if 0
|
|
bufprintf(ob, "linebreak ");
|
|
#endif
|
|
|
|
bufputc(ob, (uint8_t)cCodeSig);
|
|
bufputc(ob, (uint8_t)cCodeLinebreak);
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int link(struct buf* ob, struct buf* link, struct buf* title, struct buf* content, void* opaque)
|
|
{
|
|
#if 0
|
|
printf("link: {%.*s} {%.*s} {%.*s}\n",
|
|
link ? (int)link->size : 0,
|
|
link ? link->data : nullptr,
|
|
title ? (int)title->size : 0,
|
|
title ? title->data : nullptr,
|
|
content ? (int)content->size : 0,
|
|
content ? content->data : nullptr);
|
|
#endif
|
|
bufputc(ob, (uint8_t)cCodeSig);
|
|
bufputc(ob, (uint8_t)cCodeLink);
|
|
writelen(ob, (uint32_t)link->size);
|
|
writelen(ob, (uint32_t)content->size);
|
|
|
|
bufappend(ob, link);
|
|
bufappend(ob, content);
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int raw_html_tag(struct buf* ob, struct buf* tag, void* opaque)
|
|
{
|
|
//bufprintf(ob, "raw_html_tag: \"%.*s\" ", (int)tag->size, tag->data);
|
|
|
|
if (!tag || !tag->size)
|
|
return 1;
|
|
|
|
bufputc(ob, (uint8_t)cCodeSig);
|
|
bufputc(ob, (uint8_t)cCodeHTML);
|
|
writelen(ob, (uint32_t)tag->size);
|
|
bufappend(ob, tag);
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int triple_emphasis(struct buf* ob, struct buf* text, char c, void* opaque)
|
|
{
|
|
//bufprintf(ob, "triple_emphasis: %u ('%c') [%.*s] ", c, c, (int)text->size, text->data);
|
|
|
|
if (!text || !text->size)
|
|
return 1;
|
|
|
|
bufputc(ob, (uint8_t)cCodeSig);
|
|
bufputc(ob, (uint8_t)cCodeEmphasis);
|
|
bufputc(ob, c);
|
|
bufputc(ob, 3);
|
|
writelen(ob, (uint32_t)text->size);
|
|
bufappend(ob, text);
|
|
|
|
return 1;
|
|
}
|
|
|
|
static void normal_text(struct buf* ob, struct buf* text, void* opaque)
|
|
{
|
|
if (!text || !text->size)
|
|
return;
|
|
|
|
bufputc(ob, (uint8_t)cCodeSig);
|
|
bufputc(ob, (uint8_t)cCodeText);
|
|
writelen(ob, (uint32_t)text->size);
|
|
for (uint32_t i = 0; i < text->size; i++)
|
|
{
|
|
uint8_t c = text->data[i];
|
|
if (c == '\n')
|
|
bufputc(ob, ' ');
|
|
else if (c != 1)
|
|
{
|
|
assert(c >= 32 || c == '\t');
|
|
bufputc(ob, c);
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
static struct mkd_renderer g_mkd_parse =
|
|
{
|
|
markdown::prolog,
|
|
markdown::epilog,
|
|
|
|
markdown::blockcode,
|
|
markdown::blockquote,
|
|
markdown::blockhtml,
|
|
markdown::header,
|
|
markdown::hrule,
|
|
markdown::list,
|
|
markdown::listitem,
|
|
markdown::paragraph,
|
|
markdown::table,
|
|
markdown::table_cell,
|
|
markdown::table_row,
|
|
|
|
markdown::autolink,
|
|
markdown::codespan,
|
|
markdown::double_emphasis,
|
|
markdown::emphasis,
|
|
markdown::image,
|
|
markdown::linebreak,
|
|
markdown::link,
|
|
markdown::raw_html_tag,
|
|
markdown::triple_emphasis,
|
|
|
|
//markdown::entity,
|
|
nullptr,
|
|
markdown::normal_text,
|
|
|
|
64,
|
|
"*_",
|
|
nullptr
|
|
};
|
|
|
|
static bool markdown_should_escape(int c)
|
|
{
|
|
switch (c)
|
|
{
|
|
case '\\':
|
|
case '`':
|
|
case '*':
|
|
case '_':
|
|
case '{':
|
|
case '}':
|
|
case '[':
|
|
case ']':
|
|
case '<':
|
|
case '>':
|
|
case '(':
|
|
case ')':
|
|
case '#':
|
|
//case '-':
|
|
//case '.':
|
|
//case '!':
|
|
case '|':
|
|
return true;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static std::string escape_markdown(const std::string& str)
|
|
{
|
|
std::string out;
|
|
|
|
for (uint32_t i = 0; i < str.size(); i++)
|
|
{
|
|
uint8_t c = str[i];
|
|
|
|
if (markdown_should_escape(c))
|
|
out.push_back('\\');
|
|
|
|
out.push_back(c);
|
|
}
|
|
|
|
return out;
|
|
}
|
|
|
|
markdown_text_processor::markdown_text_processor() :
|
|
m_used_unsupported_feature(false)
|
|
{
|
|
}
|
|
|
|
void markdown_text_processor::clear()
|
|
{
|
|
m_used_unsupported_feature = false;
|
|
m_text.clear();
|
|
m_details.clear();
|
|
m_links.clear();
|
|
}
|
|
|
|
void markdown_text_processor::fix_redirect_urls()
|
|
{
|
|
for (uint32_t link_index = 0; link_index < m_links.size(); link_index++)
|
|
{
|
|
const char* pPrefix = "https://www.google.com/url?q=";
|
|
|
|
if (!string_begins_with(m_links[link_index], pPrefix))
|
|
continue;
|
|
|
|
size_t p;
|
|
if ((p = m_links[link_index].find("&sa=D&source=editors&ust=")) == std::string::npos)
|
|
continue;
|
|
|
|
size_t r = m_links[link_index].find("&usg=");
|
|
if ((r == std::string::npos) || (r < p))
|
|
continue;
|
|
|
|
if ((r - p) != 41)
|
|
continue;
|
|
|
|
if ((m_links[link_index].size() - r) != 33)
|
|
continue;
|
|
|
|
if ((m_links[link_index].size() - p) != 74)
|
|
continue;
|
|
|
|
std::string new_link(m_links[link_index]);
|
|
new_link.erase(p, new_link.size() - p);
|
|
|
|
new_link.erase(0, strlen(pPrefix));
|
|
|
|
// De-escape the string
|
|
std::string new_link_deescaped;
|
|
for (uint32_t i = 0; i < new_link.size(); i++)
|
|
{
|
|
uint8_t c = new_link[i];
|
|
if ((c == '%') && ((i + 2) < new_link.size()))
|
|
{
|
|
int da = convert_hex_digit(new_link[i + 1]);
|
|
int db = convert_hex_digit(new_link[i + 2]);
|
|
if (da >= 0 && db >= 0)
|
|
{
|
|
int val = da * 16 + db;
|
|
new_link_deescaped.push_back((uint8_t)val);
|
|
}
|
|
|
|
i += 2;
|
|
}
|
|
else
|
|
new_link_deescaped.push_back(c);
|
|
}
|
|
|
|
//printf("%s\n", new_link.c_str());
|
|
|
|
m_links[link_index] = new_link_deescaped;
|
|
}
|
|
|
|
for (uint32_t i = 0; i < m_links.size(); i++)
|
|
m_links[i] = encode_url(m_links[i]);
|
|
}
|
|
|
|
void markdown_text_processor::init_from_markdown(const char* pText)
|
|
{
|
|
struct buf* pIn = bufnew(4096);
|
|
bufputs(pIn, pText);
|
|
|
|
struct buf* pOut = bufnew(4096);
|
|
|
|
m_used_unsupported_feature = false;
|
|
g_mkd_parse.opaque = &m_used_unsupported_feature;
|
|
markdown(pOut, pIn, &g_mkd_parse);
|
|
|
|
std::string buf;
|
|
buf.append((char*)pOut->data, pOut->size);
|
|
|
|
init_from_codes(buf);
|
|
|
|
bufrelease(pIn);
|
|
bufrelease(pOut);
|
|
}
|
|
|
|
bool markdown_text_processor::split_in_half(uint32_t ofs, markdown_text_processor& a, markdown_text_processor& b) const
|
|
{
|
|
assert((this != &a) && (this != &b));
|
|
|
|
if (m_details[ofs].m_emphasis != 0)
|
|
return false;
|
|
|
|
a.m_text = m_text;
|
|
a.m_details = m_details;
|
|
a.m_links = m_links;
|
|
|
|
b.m_text = m_text;
|
|
b.m_details = m_details;
|
|
b.m_links = m_links;
|
|
|
|
a.m_text.erase(ofs, a.m_text.size() - ofs);
|
|
a.m_details.erase(a.m_details.begin() + ofs, a.m_details.end());
|
|
|
|
b.m_text.erase(0, ofs);
|
|
b.m_details.erase(b.m_details.begin(), b.m_details.begin() + ofs);
|
|
|
|
return true;
|
|
}
|
|
|
|
uint32_t markdown_text_processor::count_char_in_text(uint8_t c) const
|
|
{
|
|
uint32_t num = 0;
|
|
for (uint32_t i = 0; i < m_text.size(); i++)
|
|
{
|
|
if ((uint8_t)m_text[i] == c)
|
|
num++;
|
|
}
|
|
return num;
|
|
}
|
|
|
|
bool markdown_text_processor::split_last_parens(markdown_text_processor& a, markdown_text_processor& b) const
|
|
{
|
|
a.clear();
|
|
b.clear();
|
|
|
|
if (!m_text.size())
|
|
return false;
|
|
|
|
int ofs = (int)m_text.size() - 1;
|
|
while ((m_text[ofs] == '\n') || (m_text[ofs] == ' '))
|
|
{
|
|
if (!ofs)
|
|
return false;
|
|
ofs--;
|
|
}
|
|
|
|
if (m_text[ofs] == '.')
|
|
{
|
|
if (!ofs)
|
|
return false;
|
|
|
|
ofs--;
|
|
}
|
|
|
|
if (m_text[ofs] != ')')
|
|
return false;
|
|
|
|
int level = 0;
|
|
while (ofs >= 0)
|
|
{
|
|
uint8_t c = (uint8_t)m_text[ofs];
|
|
|
|
if (c == ')')
|
|
level++;
|
|
else if (c == '(')
|
|
{
|
|
level--;
|
|
if (!level)
|
|
break;
|
|
}
|
|
|
|
ofs--;
|
|
}
|
|
if (ofs < 0)
|
|
return false;
|
|
|
|
return split_in_half(ofs, a, b);
|
|
}
|
|
|
|
void markdown_text_processor::convert_to_plain(std::string& out, bool trim_end) const
|
|
{
|
|
for (uint32_t i = 0; i < m_text.size(); i++)
|
|
{
|
|
uint8_t c = m_text[i];
|
|
|
|
assert((c == '\n') || (c == '\t') || (c >= 32));
|
|
|
|
out.push_back(c);
|
|
}
|
|
|
|
if (trim_end)
|
|
{
|
|
while (out.size() && out.back() == '\n')
|
|
out.pop_back();
|
|
|
|
string_trim_end(out);
|
|
}
|
|
}
|
|
|
|
void markdown_text_processor::convert_to_markdown(std::string& out, bool trim_end) const
|
|
{
|
|
if (m_used_unsupported_feature)
|
|
printf("markdown_text_processor::convert_to_markdown: Warning, one or more Markdown features were used in this text and won't be losslessly converted.\n");
|
|
|
|
int emphasis = 0, emphasis_amount = 0;
|
|
int cur_link_index = -1;
|
|
|
|
for (uint32_t text_ofs = 0; text_ofs < m_text.size(); text_ofs++)
|
|
{
|
|
if (m_details[text_ofs].m_link_index != -1)
|
|
{
|
|
// Inside link at current position
|
|
|
|
if (cur_link_index == -1)
|
|
{
|
|
// Not currently inside a link, so start a new link
|
|
|
|
handle_html(out, text_ofs);
|
|
|
|
out.push_back('[');
|
|
|
|
// Beginning new link
|
|
handle_emphasis(out, text_ofs, emphasis, emphasis_amount);
|
|
}
|
|
else if (cur_link_index != m_details[text_ofs].m_link_index)
|
|
{
|
|
// Switching to different link, so flush current link and start a new one
|
|
handle_emphasis(out, text_ofs, emphasis, emphasis_amount);
|
|
|
|
out += "](";
|
|
|
|
for (uint32_t j = 0; j < m_links[cur_link_index].size(); j++)
|
|
{
|
|
uint8_t c = m_links[cur_link_index][j];
|
|
if (markdown_should_escape(c))
|
|
out.push_back('\\');
|
|
out.push_back(c);
|
|
}
|
|
|
|
out.push_back(')');
|
|
|
|
handle_html(out, text_ofs);
|
|
|
|
out.push_back('[');
|
|
}
|
|
else
|
|
{
|
|
// Currently inside a link which hasn't changed
|
|
|
|
handle_html(out, text_ofs);
|
|
|
|
handle_emphasis(out, text_ofs, emphasis, emphasis_amount);
|
|
}
|
|
|
|
cur_link_index = m_details[text_ofs].m_link_index;
|
|
}
|
|
else
|
|
{
|
|
// Not inside link at current position
|
|
|
|
if (cur_link_index != -1)
|
|
{
|
|
// Flush current link
|
|
handle_emphasis(out, text_ofs, emphasis, emphasis_amount);
|
|
|
|
out += "](";
|
|
|
|
for (uint32_t j = 0; j < m_links[cur_link_index].size(); j++)
|
|
{
|
|
uint8_t c = m_links[cur_link_index][j];
|
|
if (markdown_should_escape(c))
|
|
out.push_back('\\');
|
|
out.push_back(c);
|
|
}
|
|
|
|
out.push_back(')');
|
|
|
|
handle_html(out, text_ofs);
|
|
|
|
cur_link_index = -1;
|
|
}
|
|
else
|
|
{
|
|
handle_html(out, text_ofs);
|
|
|
|
handle_emphasis(out, text_ofs, emphasis, emphasis_amount);
|
|
}
|
|
}
|
|
|
|
if (m_details[text_ofs].m_linebreak)
|
|
{
|
|
out.push_back(' ');
|
|
|
|
// One space will already be in the text.
|
|
//out.push_back(' ');
|
|
}
|
|
|
|
uint8_t c = m_text[text_ofs];
|
|
if (markdown_should_escape(c))
|
|
{
|
|
// Markdown escape
|
|
out.push_back('\\');
|
|
}
|
|
|
|
out.push_back(c);
|
|
}
|
|
|
|
if (emphasis != 0)
|
|
{
|
|
// Flush last emphasis
|
|
for (int j = 0; j < emphasis_amount; j++)
|
|
out.push_back((uint8_t)emphasis);
|
|
}
|
|
emphasis = 0;
|
|
emphasis_amount = 0;
|
|
|
|
if (cur_link_index != -1)
|
|
{
|
|
// Flush last link
|
|
out += "](";
|
|
|
|
for (uint32_t j = 0; j < m_links[cur_link_index].size(); j++)
|
|
{
|
|
uint8_t c = m_links[cur_link_index][j];
|
|
if (markdown_should_escape(c))
|
|
out.push_back('\\');
|
|
out.push_back(c);
|
|
}
|
|
|
|
out.push_back(')');
|
|
cur_link_index = -1;
|
|
}
|
|
|
|
if (m_details.size() > m_text.size())
|
|
{
|
|
if (m_details.size() != m_text.size() + 1)
|
|
panic("details array too large");
|
|
|
|
if (m_details.back().m_html.size())
|
|
{
|
|
for (uint32_t i = 0; i < m_details.back().m_html.size(); i++)
|
|
out += m_details.back().m_html[i];
|
|
}
|
|
}
|
|
|
|
if (trim_end)
|
|
{
|
|
while (out.size() && out.back() == '\n')
|
|
out.pop_back();
|
|
|
|
string_trim_end(out);
|
|
}
|
|
}
|
|
|
|
void markdown_text_processor::ensure_detail_ofs(uint32_t ofs)
|
|
{
|
|
if (m_details.size() <= ofs)
|
|
m_details.resize(ofs + 1);
|
|
}
|
|
|
|
void markdown_text_processor::init_from_codes(const std::string& buf)
|
|
{
|
|
m_text.resize(0);
|
|
m_details.resize(0);
|
|
m_links.resize(0);
|
|
|
|
parse_block(buf);
|
|
}
|
|
|
|
void markdown_text_processor::parse_block(const std::string& buf)
|
|
{
|
|
uint32_t cur_ofs = 0;
|
|
while (cur_ofs < buf.size())
|
|
{
|
|
uint8_t sig = (uint8_t)buf[cur_ofs];
|
|
|
|
if (sig != markdown::cCodeSig)
|
|
panic("Expected code block signature");
|
|
|
|
cur_ofs++;
|
|
if (cur_ofs == buf.size())
|
|
panic("Premature end of buffer");
|
|
|
|
uint8_t code_type = (uint8_t)buf[cur_ofs];
|
|
cur_ofs++;
|
|
|
|
switch (code_type)
|
|
{
|
|
case markdown::cCodeLink:
|
|
{
|
|
const uint32_t link_size = markdown::get_len32(buf, cur_ofs);
|
|
const uint32_t content_size = markdown::get_len32(buf, cur_ofs);
|
|
|
|
std::string link(markdown::get_string(buf, cur_ofs, link_size));
|
|
std::string content(markdown::get_string(buf, cur_ofs, content_size));
|
|
|
|
const uint32_t link_index = (uint32_t)m_links.size();
|
|
m_links.push_back(link);
|
|
|
|
const uint32_t start_text_ofs = (uint32_t)m_text.size();
|
|
|
|
parse_block(content);
|
|
|
|
const uint32_t end_text_ofs = (uint32_t)m_text.size();
|
|
if (end_text_ofs)
|
|
{
|
|
ensure_detail_ofs(end_text_ofs - 1);
|
|
|
|
for (uint32_t i = start_text_ofs; i < end_text_ofs; i++)
|
|
m_details[i].m_link_index = link_index;
|
|
}
|
|
|
|
break;
|
|
}
|
|
case markdown::cCodeEmphasis:
|
|
{
|
|
if (cur_ofs >= buf.size())
|
|
panic("Buffer too small");
|
|
|
|
const uint8_t c = (uint8_t)buf[cur_ofs++];
|
|
|
|
if (cur_ofs >= buf.size())
|
|
panic("Buffer too small");
|
|
|
|
const uint32_t amount = (uint8_t)buf[cur_ofs++];
|
|
|
|
const uint32_t text_size = markdown::get_len32(buf, cur_ofs);
|
|
|
|
std::string text(markdown::get_string(buf, cur_ofs, text_size));
|
|
|
|
const uint32_t start_text_ofs = (uint32_t)m_text.size();
|
|
|
|
parse_block(text);
|
|
|
|
const uint32_t end_text_ofs = (uint32_t)m_text.size();
|
|
|
|
if (end_text_ofs)
|
|
{
|
|
ensure_detail_ofs(end_text_ofs - 1);
|
|
|
|
for (uint32_t i = start_text_ofs; i < end_text_ofs; i++)
|
|
{
|
|
m_details[i].m_emphasis = c;
|
|
m_details[i].m_emphasis_amount = (uint8_t)amount;
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
case markdown::cCodeText:
|
|
{
|
|
const uint32_t text_size = markdown::get_len32(buf, cur_ofs);
|
|
std::string text(markdown::get_string(buf, cur_ofs, text_size));
|
|
|
|
for (size_t i = 0; i < text.size(); i++)
|
|
{
|
|
// value 1 is written by the markdown parser when it wants to delete a \n
|
|
if (text[i] != 1)
|
|
m_text.push_back(text[i]);
|
|
}
|
|
|
|
break;
|
|
}
|
|
case markdown::cCodeParagraph:
|
|
{
|
|
const uint32_t text_size = markdown::get_len32(buf, cur_ofs);
|
|
std::string text(markdown::get_string(buf, cur_ofs, text_size));
|
|
|
|
parse_block(text);
|
|
|
|
m_text += "\n";
|
|
m_text += "\n";
|
|
|
|
ensure_detail_ofs((uint32_t)m_text.size() - 1);
|
|
m_details[m_text.size() - 1].m_end_paragraph = true;
|
|
|
|
break;
|
|
}
|
|
case markdown::cCodeLinebreak:
|
|
{
|
|
m_text += "\n";
|
|
|
|
ensure_detail_ofs((uint32_t)m_text.size() - 1);
|
|
m_details[m_text.size() - 1].m_linebreak = true;
|
|
|
|
break;
|
|
}
|
|
case markdown::cCodeHTML:
|
|
{
|
|
const uint32_t text_size = markdown::get_len32(buf, cur_ofs);
|
|
std::string text(markdown::get_string(buf, cur_ofs, text_size));
|
|
|
|
uint32_t ofs = (uint32_t)m_text.size();
|
|
ensure_detail_ofs(ofs);
|
|
m_details[ofs].m_html.push_back(text);
|
|
|
|
break;
|
|
}
|
|
default:
|
|
panic("Invalid code");
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (m_text.size())
|
|
ensure_detail_ofs((uint32_t)m_text.size() - 1);
|
|
}
|
|
|
|
void markdown_text_processor::handle_html(std::string& out, uint32_t text_ofs) const
|
|
{
|
|
// Any HTML appears before this character
|
|
for (uint32_t i = 0; i < m_details[text_ofs].m_html.size(); i++)
|
|
out += m_details[text_ofs].m_html[i];
|
|
}
|
|
|
|
void markdown_text_processor::handle_emphasis(std::string& out, uint32_t text_ofs, int& emphasis, int& emphasis_amount) const
|
|
{
|
|
if (m_details[text_ofs].m_emphasis != 0)
|
|
{
|
|
// Desired emphasis
|
|
if ((m_details[text_ofs].m_emphasis == emphasis) && (m_details[text_ofs].m_emphasis_amount == emphasis_amount))
|
|
{
|
|
// No change to emphasis
|
|
|
|
// Any HTML appears before this character
|
|
//for (uint32_t i = 0; i < m_details[text_ofs].m_html.size(); i++)
|
|
// out += m_details[text_ofs].m_html[i];
|
|
}
|
|
else
|
|
{
|
|
// Change to emphasis
|
|
if (emphasis != 0)
|
|
{
|
|
// Flush out current emphasis
|
|
for (int j = 0; j < emphasis_amount; j++)
|
|
out.push_back((uint8_t)emphasis);
|
|
}
|
|
|
|
// Any HTML appears before this character
|
|
//for (uint32_t i = 0; i < m_details[text_ofs].m_html.size(); i++)
|
|
// out += m_details[text_ofs].m_html[i];
|
|
|
|
emphasis = m_details[text_ofs].m_emphasis;
|
|
emphasis_amount = m_details[text_ofs].m_emphasis_amount;
|
|
|
|
// Start new emphasis
|
|
for (int j = 0; j < emphasis_amount; j++)
|
|
out.push_back((uint8_t)emphasis);
|
|
}
|
|
}
|
|
else if (m_details[text_ofs].m_emphasis == 0)
|
|
{
|
|
// Desires no emphasis
|
|
if (emphasis != 0)
|
|
{
|
|
// Flush out current emphasis
|
|
for (int j = 0; j < emphasis_amount; j++)
|
|
out.push_back((uint8_t)emphasis);
|
|
}
|
|
emphasis = 0;
|
|
emphasis_amount = 0;
|
|
|
|
// Any HTML appears before this character
|
|
//for (uint32_t i = 0; i < m_details[text_ofs].m_html.size(); i++)
|
|
// out += m_details[text_ofs].m_html[i];
|
|
}
|
|
}
|
|
|
|
#if 0
|
|
const char* pText =
|
|
u8R"(
|
|
|
|
<ul>test</ul>
|
|
|
|
_text1_
|
|
**text2**
|
|
**_text3_**
|
|
|
|
![alt text](https://github.com/n48.png "Logo Title")
|
|
|
|
# Heading 1
|
|
## Heading 2
|
|
### Heading 3
|
|
|
|
1. XXXXX
|
|
1. Item 1
|
|
2. Item 2
|
|
2. YYYYY
|
|
3. ZZZZZ
|
|
|
|
| Tables | Are | Cool |
|
|
| ------------- |:-------------:| -----:|
|
|
| col 3 is | right-aligned | $1600 |
|
|
| col 2 is | centered | $12 |
|
|
| zebra stripes | are neat | $1 |
|
|
|
|
* [blahblah](www.blah1.com)
|
|
* [blahblah2](www.blah2.com)
|
|
|
|
`
|
|
this is code 1
|
|
this is code 2
|
|
`
|
|
|
|
```
|
|
this is code 3
|
|
this is code 4
|
|
```
|
|
|
|
> blockquote 1
|
|
> blockquote 2
|
|
|
|
---
|
|
|
|
* AAA
|
|
* BBB
|
|
* ZZZZ1
|
|
* ZZZZ2
|
|
* CCC)";
|
|
|
|
markdown_text_processor tp;
|
|
tp.init_from_markdown(pText);
|
|
|
|
std::string desc;
|
|
tp.convert_to_plain(desc, true);
|
|
|
|
uprintf("%s\n", desc.c_str());
|
|
|
|
return 0;
|
|
#endif
|