new files

2025-08-10 07:40:11 -04:00 · 2023-02-20 17:59:08 -05:00 · 2023-02-20 17:59:08 -05:00 · 650909370f
commit 650909370f
parent 3f5d0d8f18
13 changed files with 12276 additions and 0 deletions
--- a/markdown_proc.cpp
+++ b/markdown_proc.cpp
@ -0,0 +1,608 @@
+// Copyright (C) 2023 Richard Geldreich, Jr.
+// markdown_proc.cpp
+#include "markdown_proc.h"
+
+static bool markdown_should_escape(int c)
+{
+    switch (c)
+    {
+    case '\\':
+    case '`':
+    case '*':
+    case '_':
+    case '{':
+    case '}':
+    case '[':
+    case ']':
+    case '<':
+    case '>':
+    case '(':
+    case ')':
+    case '#':
+        //case '-':
+        //case '.':
+        //case '!':
+    case '|':
+        return true;
+    default:
+        break;
+    }
+
+    return false;
+}
+
+static std::string escape_markdown(const std::string& str)
+{
+    std::string out;
+
+    for (uint32_t i = 0; i < str.size(); i++)
+    {
+        uint8_t c = str[i];
+
+        if (markdown_should_escape(c))
+            out.push_back('\\');
+
+        out.push_back(c);
+    }
+
+    return out;
+}
+
+markdown_text_processor::markdown_text_processor()
+{
+}
+
+void markdown_text_processor::clear()
+{
+    m_text.clear();
+    m_details.clear();
+    m_links.clear();
+}
+
+void markdown_text_processor::fix_redirect_urls()
+{
+    for (uint32_t link_index = 0; link_index < m_links.size(); link_index++)
+    {
+        const char* pPrefix = "https://www.google.com/url?q=";
+
+        if (!string_begins_with(m_links[link_index], pPrefix))
+            continue;
+
+        size_t p;
+        if ((p = m_links[link_index].find("&sa=D&source=editors&ust=")) == std::string::npos)
+            continue;
+
+        size_t r = m_links[link_index].find("&usg=");
+        if ((r == std::string::npos) || (r < p))
+            continue;
+
+        if ((r - p) != 41)
+            continue;
+
+        if ((m_links[link_index].size() - r) != 33)
+            continue;
+
+        if ((m_links[link_index].size() - p) != 74)
+            continue;
+
+        std::string new_link(m_links[link_index]);
+        new_link.erase(p, new_link.size() - p);
+
+        new_link.erase(0, strlen(pPrefix));
+
+        // De-escape the string
+        std::string new_link_deescaped;
+        for (uint32_t i = 0; i < new_link.size(); i++)
+        {
+            uint8_t c = new_link[i];
+            if ((c == '%') && ((i + 2) < new_link.size()))
+            {
+                int da = convert_hex_digit(new_link[i + 1]);
+                int db = convert_hex_digit(new_link[i + 2]);
+                if (da >= 0 && db >= 0)
+                {
+                    int val = da * 16 + db;
+                    new_link_deescaped.push_back((uint8_t)val);
+                }
+
+                i += 2;
+            }
+            else
+                new_link_deescaped.push_back(c);
+        }
+
+        //printf("%s\n", new_link.c_str());
+
+        m_links[link_index] = new_link_deescaped;
+    }
+
+    for (uint32_t i = 0; i < m_links.size(); i++)
+        m_links[i] = encode_url(m_links[i]);
+}
+
+void markdown_text_processor::init_from_markdown(const char* pText)
+{
+    struct buf* pIn = bufnew(4096);
+    bufputs(pIn, pText);
+
+    struct buf* pOut = bufnew(4096);
+    markdown(pOut, pIn, &mkd_parse);
+
+    std::string buf;
+    buf.append((char*)pOut->data, pOut->size);
+
+    init_from_codes(buf);
+
+    bufrelease(pIn);
+    bufrelease(pOut);
+}
+
+bool markdown_text_processor::split_in_half(uint32_t ofs, markdown_text_processor& a, markdown_text_processor& b) const
+{
+    assert((this != &a) && (this != &b));
+
+    if (m_details[ofs].m_emphasis != 0)
+        return false;
+
+    a.m_text = m_text;
+    a.m_details = m_details;
+    a.m_links = m_links;
+
+    b.m_text = m_text;
+    b.m_details = m_details;
+    b.m_links = m_links;
+
+    a.m_text.erase(ofs, a.m_text.size() - ofs);
+    a.m_details.erase(a.m_details.begin() + ofs, a.m_details.end());
+
+    b.m_text.erase(0, ofs);
+    b.m_details.erase(b.m_details.begin(), b.m_details.begin() + ofs);
+
+    return true;
+}
+
+uint32_t markdown_text_processor::count_char_in_text(uint8_t c) const
+{
+    uint32_t num = 0;
+    for (uint32_t i = 0; i < m_text.size(); i++)
+    {
+        if ((uint8_t)m_text[i] == c)
+            num++;
+    }
+    return num;
+}
+
+bool markdown_text_processor::split_last_parens(markdown_text_processor& a, markdown_text_processor& b) const
+{
+    a.clear();
+    b.clear();
+
+    if (!m_text.size())
+        return false;
+
+    int ofs = (int)m_text.size() - 1;
+    while ((m_text[ofs] == '\n') || (m_text[ofs] == ' '))
+    {
+        if (!ofs)
+            return false;
+        ofs--;
+    }
+
+    if (m_text[ofs] == '.')
+    {
+        if (!ofs)
+            return false;
+
+        ofs--;
+    }
+
+    if (m_text[ofs] != ')')
+        return false;
+
+    int level = 0;
+    while (ofs >= 0)
+    {
+        uint8_t c = (uint8_t)m_text[ofs];
+
+        if (c == ')')
+            level++;
+        else if (c == '(')
+        {
+            level--;
+            if (!level)
+                break;
+        }
+
+        ofs--;
+    }
+    if (ofs < 0)
+        return false;
+
+    return split_in_half(ofs, a, b);
+}
+
+void markdown_text_processor::convert_to_plain(std::string& out, bool trim_end) const
+{
+    for (uint32_t i = 0; i < m_text.size(); i++)
+    {
+        uint8_t c = m_text[i];
+
+        assert((c == '\n') || (c == '\t') || (c >= 32));
+
+        out.push_back(c);
+    }
+
+    if (trim_end)
+    {
+        while (out.size() && out.back() == '\n')
+            out.pop_back();
+
+        string_trim_end(out);
+    }
+}
+
+void markdown_text_processor::convert_to_markdown(std::string& out, bool trim_end) const
+{
+    int emphasis = 0, emphasis_amount = 0;
+    int cur_link_index = -1;
+
+    for (uint32_t text_ofs = 0; text_ofs < m_text.size(); text_ofs++)
+    {
+        if (m_details[text_ofs].m_link_index != -1)
+        {
+            // Inside link at current position
+
+            if (cur_link_index == -1)
+            {
+                // Not currently inside a link, so start a new link
+
+                handle_html(out, text_ofs);
+
+                out.push_back('[');
+
+                // Beginning new link
+                handle_emphasis(out, text_ofs, emphasis, emphasis_amount);
+            }
+            else if (cur_link_index != m_details[text_ofs].m_link_index)
+            {
+                // Switching to different link, so flush current link and start a new one
+                handle_emphasis(out, text_ofs, emphasis, emphasis_amount);
+
+                out += "](";
+
+                for (uint32_t j = 0; j < m_links[cur_link_index].size(); j++)
+                {
+                    uint8_t c = m_links[cur_link_index][j];
+                    if (markdown_should_escape(c))
+                        out.push_back('\\');
+                    out.push_back(c);
+                }
+
+                out.push_back(')');
+
+                handle_html(out, text_ofs);
+
+                out.push_back('[');
+            }
+            else
+            {
+                // Currently inside a link which hasn't changed
+
+                handle_html(out, text_ofs);
+
+                handle_emphasis(out, text_ofs, emphasis, emphasis_amount);
+            }
+
+            cur_link_index = m_details[text_ofs].m_link_index;
+        }
+        else
+        {
+            // Not inside link at current position
+
+            if (cur_link_index != -1)
+            {
+                // Flush current link
+                handle_emphasis(out, text_ofs, emphasis, emphasis_amount);
+
+                out += "](";
+
+                for (uint32_t j = 0; j < m_links[cur_link_index].size(); j++)
+                {
+                    uint8_t c = m_links[cur_link_index][j];
+                    if (markdown_should_escape(c))
+                        out.push_back('\\');
+                    out.push_back(c);
+                }
+
+                out.push_back(')');
+
+                handle_html(out, text_ofs);
+
+                cur_link_index = -1;
+            }
+            else
+            {
+                handle_html(out, text_ofs);
+
+                handle_emphasis(out, text_ofs, emphasis, emphasis_amount);
+            }
+        }
+
+        if (m_details[text_ofs].m_linebreak)
+        {
+            out.push_back(' ');
+
+            // One space will already be in the text.
+            //out.push_back(' ');
+        }
+
+        uint8_t c = m_text[text_ofs];
+        if (markdown_should_escape(c))
+        {
+            // Markdown escape
+            out.push_back('\\');
+        }
+
+        out.push_back(c);
+    }
+
+    if (emphasis != 0)
+    {
+        // Flush last emphasis
+        for (int j = 0; j < emphasis_amount; j++)
+            out.push_back((uint8_t)emphasis);
+    }
+    emphasis = 0;
+    emphasis_amount = 0;
+
+    if (cur_link_index != -1)
+    {
+        // Flush last link
+        out += "](";
+
+        for (uint32_t j = 0; j < m_links[cur_link_index].size(); j++)
+        {
+            uint8_t c = m_links[cur_link_index][j];
+            if (markdown_should_escape(c))
+                out.push_back('\\');
+            out.push_back(c);
+        }
+
+        out.push_back(')');
+        cur_link_index = -1;
+    }
+
+    if (m_details.size() > m_text.size())
+    {
+        if (m_details.size() != m_text.size() + 1)
+            panic("details array too large");
+
+        if (m_details.back().m_html.size())
+        {
+            for (uint32_t i = 0; i < m_details.back().m_html.size(); i++)
+                out += m_details.back().m_html[i];
+        }
+    }
+
+    if (trim_end)
+    {
+        while (out.size() && out.back() == '\n')
+            out.pop_back();
+
+        string_trim_end(out);
+    }
+}
+
+void markdown_text_processor::ensure_detail_ofs(uint32_t ofs)
+{
+    if (m_details.size() <= ofs)
+        m_details.resize(ofs + 1);
+}
+
+void markdown_text_processor::init_from_codes(const std::string& buf)
+{
+    m_text.resize(0);
+    m_details.resize(0);
+    m_links.resize(0);
+
+    parse_block(buf);
+}
+
+void markdown_text_processor::parse_block(const std::string& buf)
+{
+    uint32_t cur_ofs = 0;
+    while (cur_ofs < buf.size())
+    {
+        uint8_t sig = (uint8_t)buf[cur_ofs];
+
+        if (sig != markdown::cCodeSig)
+            panic("Expected code block signature");
+
+        cur_ofs++;
+        if (cur_ofs == buf.size())
+            panic("Premature end of buffer");
+
+        uint8_t code_type = (uint8_t)buf[cur_ofs];
+        cur_ofs++;
+
+        switch (code_type)
+        {
+        case markdown::cCodeLink:
+        {
+            const uint32_t link_size = markdown::get_len32(buf, cur_ofs);
+            const uint32_t content_size = markdown::get_len32(buf, cur_ofs);
+
+            std::string link(markdown::get_string(buf, cur_ofs, link_size));
+            std::string content(markdown::get_string(buf, cur_ofs, content_size));
+
+            const uint32_t link_index = (uint32_t)m_links.size();
+            m_links.push_back(link);
+
+            const uint32_t start_text_ofs = (uint32_t)m_text.size();
+
+            parse_block(content);
+
+            const uint32_t end_text_ofs = (uint32_t)m_text.size();
+            if (end_text_ofs)
+            {
+                ensure_detail_ofs(end_text_ofs - 1);
+
+                for (uint32_t i = start_text_ofs; i < end_text_ofs; i++)
+                    m_details[i].m_link_index = link_index;
+            }
+
+            break;
+        }
+        case markdown::cCodeEmphasis:
+        {
+            if (cur_ofs >= buf.size())
+                panic("Buffer too small");
+
+            const uint8_t c = (uint8_t)buf[cur_ofs++];
+
+            if (cur_ofs >= buf.size())
+                panic("Buffer too small");
+
+            const uint32_t amount = (uint8_t)buf[cur_ofs++];
+
+            const uint32_t text_size = markdown::get_len32(buf, cur_ofs);
+
+            std::string text(markdown::get_string(buf, cur_ofs, text_size));
+
+            const uint32_t start_text_ofs = (uint32_t)m_text.size();
+
+            parse_block(text);
+
+            const uint32_t end_text_ofs = (uint32_t)m_text.size();
+
+            if (end_text_ofs)
+            {
+                ensure_detail_ofs(end_text_ofs - 1);
+
+                for (uint32_t i = start_text_ofs; i < end_text_ofs; i++)
+                {
+                    m_details[i].m_emphasis = c;
+                    m_details[i].m_emphasis_amount = (uint8_t)amount;
+                }
+            }
+
+            break;
+        }
+        case markdown::cCodeText:
+        {
+            const uint32_t text_size = markdown::get_len32(buf, cur_ofs);
+            std::string text(markdown::get_string(buf, cur_ofs, text_size));
+
+            for (size_t i = 0; i < text.size(); i++)
+            {
+                // value 1 is written by the markdown parser when it wants to delete a \n
+                if (text[i] != 1)
+                    m_text.push_back(text[i]);
+            }
+
+            break;
+        }
+        case markdown::cCodeParagraph:
+        {
+            const uint32_t text_size = markdown::get_len32(buf, cur_ofs);
+            std::string text(markdown::get_string(buf, cur_ofs, text_size));
+
+            parse_block(text);
+
+            m_text += "\n";
+            m_text += "\n";
+
+            ensure_detail_ofs((uint32_t)m_text.size() - 1);
+            m_details[m_text.size() - 1].m_end_paragraph = true;
+
+            break;
+        }
+        case markdown::cCodeLinebreak:
+        {
+            m_text += "\n";
+
+            ensure_detail_ofs((uint32_t)m_text.size() - 1);
+            m_details[m_text.size() - 1].m_linebreak = true;
+
+            break;
+        }
+        case markdown::cCodeHTML:
+        {
+            const uint32_t text_size = markdown::get_len32(buf, cur_ofs);
+            std::string text(markdown::get_string(buf, cur_ofs, text_size));
+
+            uint32_t ofs = (uint32_t)m_text.size();
+            ensure_detail_ofs(ofs);
+            m_details[ofs].m_html.push_back(text);
+
+            break;
+        }
+        default:
+            panic("Invalid code");
+            break;
+        }
+    }
+
+    if (m_text.size())
+        ensure_detail_ofs((uint32_t)m_text.size() - 1);
+}
+
+void markdown_text_processor::handle_html(std::string& out, uint32_t text_ofs) const
+{
+    // Any HTML appears before this character
+    for (uint32_t i = 0; i < m_details[text_ofs].m_html.size(); i++)
+        out += m_details[text_ofs].m_html[i];
+}
+
+void markdown_text_processor::handle_emphasis(std::string& out, uint32_t text_ofs, int& emphasis, int& emphasis_amount) const
+{
+    if (m_details[text_ofs].m_emphasis != 0)
+    {
+        // Desired emphasis
+        if ((m_details[text_ofs].m_emphasis == emphasis) && (m_details[text_ofs].m_emphasis_amount == emphasis_amount))
+        {
+            // No change to emphasis
+
+            // Any HTML appears before this character
+            //for (uint32_t i = 0; i < m_details[text_ofs].m_html.size(); i++)
+            //    out += m_details[text_ofs].m_html[i];
+        }
+        else
+        {
+            // Change to emphasis
+            if (emphasis != 0)
+            {
+                // Flush out current emphasis
+                for (int j = 0; j < emphasis_amount; j++)
+                    out.push_back((uint8_t)emphasis);
+            }
+
+            // Any HTML appears before this character
+            //for (uint32_t i = 0; i < m_details[text_ofs].m_html.size(); i++)
+            //    out += m_details[text_ofs].m_html[i];
+
+            emphasis = m_details[text_ofs].m_emphasis;
+            emphasis_amount = m_details[text_ofs].m_emphasis_amount;
+
+            // Start new emphasis
+            for (int j = 0; j < emphasis_amount; j++)
+                out.push_back((uint8_t)emphasis);
+        }
+    }
+    else if (m_details[text_ofs].m_emphasis == 0)
+    {
+        // Desires no emphasis
+        if (emphasis != 0)
+        {
+            // Flush out current emphasis
+            for (int j = 0; j < emphasis_amount; j++)
+                out.push_back((uint8_t)emphasis);
+        }
+        emphasis = 0;
+        emphasis_amount = 0;
+
+        // Any HTML appears before this character
+        //for (uint32_t i = 0; i < m_details[text_ofs].m_html.size(); i++)
+        //    out += m_details[text_ofs].m_html[i];
+    }
+}