ufo_data/markdown_proc.cpp

// Copyright (C) 2023 Richard Geldreich, Jr.
// markdown_proc.cpp
#include "markdown_proc.h"

struct markdown
{
    enum
    {
        cCodeSig = 0xFE,

        cCodeLink = 1,
        cCodeEmphasis,
        cCodeText,
        cCodeParagraph,
        cCodeLinebreak,
        cCodeHTML
    };

    static void bufappend(struct buf* out, struct buf* in)
    {
        assert(in != out);

        if (in && in->size)
            bufput(out, in->data, in->size);
    }

    static void writelen(struct buf* ob, uint32_t size)
    {
        bufputc(ob, (uint8_t)(size & 0xFF));
        bufputc(ob, (uint8_t)((size >> 8) & 0xFF));
        bufputc(ob, (uint8_t)((size >> 16) & 0xFF));
        bufputc(ob, (uint8_t)((size >> 24) & 0xFF));
    }

    static std::string get_string(const std::string& buf, uint32_t& cur_ofs, uint32_t text_size)
    {
        std::string text;
        if (cur_ofs + text_size > buf.size())
            panic("Buffer too small");

        text.append(buf.c_str() + cur_ofs, text_size);
        cur_ofs += text_size;

        return text;
    }

    static uint32_t get_len32(const std::string& buf, uint32_t& ofs)
    {
        if ((ofs + 4) > buf.size())
            panic("Buffer too small");

        uint32_t l = (uint8_t)buf[ofs] |
            (((uint8_t)buf[ofs + 1]) << 8) |
            (((uint8_t)buf[ofs + 2]) << 16) |
            (((uint8_t)buf[ofs + 3]) << 24);

        ofs += 4;

        return l;
    }

    static void prolog(struct buf* ob, void* opaque)
    {
    }

    static void epilog(struct buf* ob, void* opaque)
    {
    }

    /* block level callbacks - NULL skips the block */
    static void blockcode(struct buf* ob, struct buf* text, void* opaque)
    {
#if 0
        bufprintf(ob, "blockcode: \"%.*s\" ", (int)text->size, text->data);
#endif
        panic("unsupported markdown feature");
    }

    static void blockquote(struct buf* ob, struct buf* text, void* opaque)
    {
#if 0
        bufprintf(ob, "blockquote: \"%.*s\" ", (int)text->size, text->data);
#endif
        // TODO: unsupported block quotes (here for when we're converting to plain text)
        //panic("unsupported markdown feature");
        if (opaque)
            *(bool*)opaque = true;

        if (!text || !text->size)
            return;

        bufputc(ob, (uint8_t)cCodeSig);
        bufputc(ob, (uint8_t)cCodeParagraph);
        writelen(ob, (uint32_t)text->size);
        bufappend(ob, text);
    }

    static void blockhtml(struct buf* ob, struct buf* text, void* opaque)
    {
#if 0
        bufprintf(ob, "blockhtml: \"%.*s\" ", (int)text->size, text->data);
#endif
        // TODO: Not fully supported - just dropping it
        //panic("unsupported markdown feature");

        if (opaque)
            *(bool*)opaque = true;
    }

    static void header(struct buf* ob, struct buf* text, int level, void* opaque)
    {
#if 0
        bufprintf(ob, "header: %i \"%.*s\" ", level, (int)text->size, text->data);
#endif
        // TODO: Not fully supported
        //panic("unsupported markdown feature");
        if (opaque)
            *(bool*)opaque = true;

        bufputc(ob, (uint8_t)cCodeSig);
        bufputc(ob, (uint8_t)cCodeParagraph);
        writelen(ob, (uint32_t)text->size);
        bufappend(ob, text);
    }

    static void hrule(struct buf* ob, void* opaque)
    {
        // TODO
        //panic("unsupported markdown feature");
        
        if (opaque)
            *(bool*)opaque = true;
    }

    static void list(struct buf* ob, struct buf* text, int flags, void* opaque)
    {
        // TODO: not fully supporting lists (here for when we're converting to plain text)
        //panic("unsupported markdown feature");
        if (opaque)
            *(bool*)opaque = true;

        if (!text || !text->size)
            return;

        bufputc(ob, (uint8_t)cCodeSig);
        bufputc(ob, (uint8_t)cCodeParagraph);
        writelen(ob, (uint32_t)text->size);
        bufappend(ob, text);
    }

    static void listitem(struct buf* ob, struct buf* text, int flags, void* opaque)
    {
        // TODO: not fully supporting lists (here for when we're converting to plain text)
        //panic("unsupported markdown feature");
        if (opaque)
            *(bool*)opaque = true;

        if (!text || !text->size)
            return;

        bufputc(ob, (uint8_t)cCodeSig);
        bufputc(ob, (uint8_t)cCodeParagraph);
        writelen(ob, (uint32_t)text->size);
        bufappend(ob, text);
    }

    static void paragraph(struct buf* ob, struct buf* text, void* opaque)
    {
#if 0
        bufprintf(ob, "paragraph: \"%.*s\" ", (int)text->size, text->data);
#endif
        if (!text || !text->size)
            return;

        bufputc(ob, (uint8_t)cCodeSig);
        bufputc(ob, (uint8_t)cCodeParagraph);
        writelen(ob, (uint32_t)text->size);
        bufappend(ob, text);
    }

    static void table(struct buf* ob, struct buf* head_row, struct buf* rows, void* opaque)
    {
#if 0
        bufprintf(ob, "table: \"%.*s\" \"%.*s\" ", (int)head_row->size, head_row->data, (int)rows->size, rows->data);
#endif
        //panic("unsupported markdown feature");

        // TODO: not fully supported, just for plaintext conversion
        if (opaque)
            *(bool*)opaque = true;
    }

    static void table_cell(struct buf* ob, struct buf* text, int flags, void* opaque)
    {
#if 0
        bufprintf(ob, "table_cell: \"%.*s\" %i ", (int)text->size, text->data, flags);
#endif
        //panic("unsupported markdown feature");
        if (opaque)
            *(bool*)opaque = true;

        // TODO: not fully supported, just for plaintext conversion
        if (!text || !text->size)
            return;

        bufputc(ob, (uint8_t)cCodeSig);
        bufputc(ob, (uint8_t)cCodeParagraph);
        writelen(ob, (uint32_t)text->size);
        bufappend(ob, text);
    }

    static void table_row(struct buf* ob, struct buf* cells, int flags, void* opaque)
    {
#if 0
        bufprintf(ob, "table_row: \"%.*s\" %i ", (int)cells->size, cells->data, flags);
#endif
        //panic("unsupported markdown feature");
        // TODO: not fully supported, just for plaintext conversion

        if (opaque)
            *(bool*)opaque = true;
    }

    static int autolink(struct buf* ob, struct buf* link, enum mkd_autolink type, void* opaque)
    {
#if 0
        bufprintf(ob, "autolink: %u \"%.*s\" ", type, (int)link->size, link->data);
#endif
        panic("unsupported markdown feature");
        return 1;
    }

    static int codespan(struct buf* ob, struct buf* text, void* opaque)
    {
#if 0
        bufprintf(ob, "codespan: \"%.*s\" ", (int)text->size, text->data);
#endif
        //panic("unsupported markdown feature");
        if (opaque)
            *(bool*)opaque = true;

        bufputc(ob, (uint8_t)cCodeSig);
        bufputc(ob, (uint8_t)cCodeText);
        writelen(ob, (uint32_t)text->size);
        bufappend(ob, text);

        return 1;
    }

    static int double_emphasis(struct buf* ob, struct buf* text, char c, void* opaque)
    {
#if 0
        bufprintf(ob, "double_emphasis: %u ('%c') [%.*s] ", c, c, (int)text->size, text->data);
#endif
        if (!text || !text->size)
            return 1;

        bufputc(ob, (uint8_t)cCodeSig);
        bufputc(ob, (uint8_t)cCodeEmphasis);
        bufputc(ob, c);
        bufputc(ob, 2);
        writelen(ob, (uint32_t)text->size);
        bufappend(ob, text);

        return 1;
    }

    static int emphasis(struct buf* ob, struct buf* text, char c, void* opaque)
    {
#if 0
        bufprintf(ob, "emphasis: %u ('%c') [%.*s] ", c, c, (int)text->size, text->data);
#endif

        if (!text || !text->size)
            return 1;

        bufputc(ob, (uint8_t)cCodeSig);
        bufputc(ob, (uint8_t)cCodeEmphasis);
        bufputc(ob, c);
        bufputc(ob, 1);
        writelen(ob, (uint32_t)text->size);
        bufappend(ob, text);

        return 1;
    }

    static int image(struct buf* ob, struct buf* link, struct buf* title, struct buf* alt, void* opaque)
    {
#if 0
        bufprintf(ob, "image: \"%.*s\" \"%.*s\" \"%.*s\" ",
            (int)link->size, link->data,
            (int)title->size, title->data,
            (int)alt->size, alt->data);
#endif
        //panic("unsupported markdown feature");
        if (opaque)
            *(bool*)opaque = true;

        if (alt)
        {
            bufputc(ob, (uint8_t)cCodeSig);
            bufputc(ob, (uint8_t)cCodeText);
            writelen(ob, (uint32_t)alt->size);
            bufappend(ob, alt);
        }

        return 1;
    }

    static int linebreak(struct buf* ob, void* opaque)
    {
#if 0
        bufprintf(ob, "linebreak ");
#endif

        bufputc(ob, (uint8_t)cCodeSig);
        bufputc(ob, (uint8_t)cCodeLinebreak);

        return 1;
    }

    static int link(struct buf* ob, struct buf* link, struct buf* title, struct buf* content, void* opaque)
    {
#if 0
        printf("link: {%.*s} {%.*s} {%.*s}\n",
            link ? (int)link->size : 0,
            link ? link->data : nullptr,
            title ? (int)title->size : 0,
            title ? title->data : nullptr,
            content ? (int)content->size : 0,
            content ? content->data : nullptr);
#endif
        bufputc(ob, (uint8_t)cCodeSig);
        bufputc(ob, (uint8_t)cCodeLink);
        writelen(ob, (uint32_t)link->size);
        writelen(ob, (uint32_t)content->size);

        bufappend(ob, link);
        bufappend(ob, content);

        return 1;
    }

    static int raw_html_tag(struct buf* ob, struct buf* tag, void* opaque)
    {
        //bufprintf(ob, "raw_html_tag: \"%.*s\" ", (int)tag->size, tag->data);

        if (!tag || !tag->size)
            return 1;

        bufputc(ob, (uint8_t)cCodeSig);
        bufputc(ob, (uint8_t)cCodeHTML);
        writelen(ob, (uint32_t)tag->size);
        bufappend(ob, tag);

        return 1;
    }

    static int triple_emphasis(struct buf* ob, struct buf* text, char c, void* opaque)
    {
        //bufprintf(ob, "triple_emphasis: %u ('%c') [%.*s] ", c, c, (int)text->size, text->data);

        if (!text || !text->size)
            return 1;

        bufputc(ob, (uint8_t)cCodeSig);
        bufputc(ob, (uint8_t)cCodeEmphasis);
        bufputc(ob, c);
        bufputc(ob, 3);
        writelen(ob, (uint32_t)text->size);
        bufappend(ob, text);

        return 1;
    }

    static void normal_text(struct buf* ob, struct buf* text, void* opaque)
    {
        if (!text || !text->size)
            return;

        bufputc(ob, (uint8_t)cCodeSig);
        bufputc(ob, (uint8_t)cCodeText);
        writelen(ob, (uint32_t)text->size);
        for (uint32_t i = 0; i < text->size; i++)
        {
            uint8_t c = text->data[i];
            if (c == '\n')
                bufputc(ob, ' ');
            else if (c != 1)
            {
                assert(c >= 32 || c == '\t');
                bufputc(ob, c);
            }
        }
    }
};

static struct mkd_renderer g_mkd_parse =
{
    markdown::prolog,
    markdown::epilog,

    markdown::blockcode,
    markdown::blockquote,
    markdown::blockhtml,
    markdown::header,
    markdown::hrule,
    markdown::list,
    markdown::listitem,
    markdown::paragraph,
    markdown::table,
    markdown::table_cell,
    markdown::table_row,

    markdown::autolink,
    markdown::codespan,
    markdown::double_emphasis,
    markdown::emphasis,
    markdown::image,
    markdown::linebreak,
    markdown::link,
    markdown::raw_html_tag,
    markdown::triple_emphasis,

    //markdown::entity,
    nullptr,
    markdown::normal_text,

    64,
    "*_",
    nullptr
};

static bool markdown_should_escape(int c)
{
    switch (c)
    {
    case '\\':
    case '`':
    case '*':
    case '_':
    case '{':
    case '}':
    case '[':
    case ']':
    case '<':
    case '>':
    case '(':
    case ')':
    case '#':
        //case '-':
        //case '.':
        //case '!':
    case '|':
        return true;
    default:
        break;
    }

    return false;
}

static std::string escape_markdown(const std::string& str)
{
    std::string out;

    for (uint32_t i = 0; i < str.size(); i++)
    {
        uint8_t c = str[i];

        if (markdown_should_escape(c))
            out.push_back('\\');

        out.push_back(c);
    }

    return out;
}

markdown_text_processor::markdown_text_processor() : 
    m_used_unsupported_feature(false)
{
}

void markdown_text_processor::clear()
{
    m_used_unsupported_feature = false;
    m_text.clear();
    m_details.clear();
    m_links.clear();
}

void markdown_text_processor::fix_redirect_urls()
{
    for (uint32_t link_index = 0; link_index < m_links.size(); link_index++)
    {
        const char* pPrefix = "https://www.google.com/url?q=";

        if (!string_begins_with(m_links[link_index], pPrefix))
            continue;

        size_t p;
        if ((p = m_links[link_index].find("&sa=D&source=editors&ust=")) == std::string::npos)
            continue;

        size_t r = m_links[link_index].find("&usg=");
        if ((r == std::string::npos) || (r < p))
            continue;

        if ((r - p) != 41)
            continue;

        if ((m_links[link_index].size() - r) != 33)
            continue;

        if ((m_links[link_index].size() - p) != 74)
            continue;

        std::string new_link(m_links[link_index]);
        new_link.erase(p, new_link.size() - p);

        new_link.erase(0, strlen(pPrefix));

        // De-escape the string
        std::string new_link_deescaped;
        for (uint32_t i = 0; i < new_link.size(); i++)
        {
            uint8_t c = new_link[i];
            if ((c == '%') && ((i + 2) < new_link.size()))
            {
                int da = convert_hex_digit(new_link[i + 1]);
                int db = convert_hex_digit(new_link[i + 2]);
                if (da >= 0 && db >= 0)
                {
                    int val = da * 16 + db;
                    new_link_deescaped.push_back((uint8_t)val);
                }

                i += 2;
            }
            else
                new_link_deescaped.push_back(c);
        }

        //printf("%s\n", new_link.c_str());

        m_links[link_index] = new_link_deescaped;
    }

    for (uint32_t i = 0; i < m_links.size(); i++)
        m_links[i] = encode_url(m_links[i]);
}

void markdown_text_processor::init_from_markdown(const char* pText)
{
    struct buf* pIn = bufnew(4096);
    bufputs(pIn, pText);

    struct buf* pOut = bufnew(4096);
    
    m_used_unsupported_feature = false;
    g_mkd_parse.opaque = &m_used_unsupported_feature;
    markdown(pOut, pIn, &g_mkd_parse);

    std::string buf;
    buf.append((char*)pOut->data, pOut->size);

    init_from_codes(buf);

    bufrelease(pIn);
    bufrelease(pOut);
}

bool markdown_text_processor::split_in_half(uint32_t ofs, markdown_text_processor& a, markdown_text_processor& b) const
{
    assert((this != &a) && (this != &b));

    if (m_details[ofs].m_emphasis != 0)
        return false;

    a.m_text = m_text;
    a.m_details = m_details;
    a.m_links = m_links;

    b.m_text = m_text;
    b.m_details = m_details;
    b.m_links = m_links;

    a.m_text.erase(ofs, a.m_text.size() - ofs);
    a.m_details.erase(a.m_details.begin() + ofs, a.m_details.end());

    b.m_text.erase(0, ofs);
    b.m_details.erase(b.m_details.begin(), b.m_details.begin() + ofs);

    return true;
}

uint32_t markdown_text_processor::count_char_in_text(uint8_t c) const
{
    uint32_t num = 0;
    for (uint32_t i = 0; i < m_text.size(); i++)
    {
        if ((uint8_t)m_text[i] == c)
            num++;
    }
    return num;
}

bool markdown_text_processor::split_last_parens(markdown_text_processor& a, markdown_text_processor& b) const
{
    a.clear();
    b.clear();

    if (!m_text.size())
        return false;

    int ofs = (int)m_text.size() - 1;
    while ((m_text[ofs] == '\n') || (m_text[ofs] == ' '))
    {
        if (!ofs)
            return false;
        ofs--;
    }

    if (m_text[ofs] == '.')
    {
        if (!ofs)
            return false;

        ofs--;
    }

    if (m_text[ofs] != ')')
        return false;

    int level = 0;
    while (ofs >= 0)
    {
        uint8_t c = (uint8_t)m_text[ofs];

        if (c == ')')
            level++;
        else if (c == '(')
        {
            level--;
            if (!level)
                break;
        }

        ofs--;
    }
    if (ofs < 0)
        return false;

    return split_in_half(ofs, a, b);
}

void markdown_text_processor::convert_to_plain(std::string& out, bool trim_end) const
{
    for (uint32_t i = 0; i < m_text.size(); i++)
    {
        uint8_t c = m_text[i];

        assert((c == '\n') || (c == '\t') || (c >= 32));

        out.push_back(c);
    }

    if (trim_end)
    {
        while (out.size() && out.back() == '\n')
            out.pop_back();

        string_trim_end(out);
    }
}

void markdown_text_processor::convert_to_markdown(std::string& out, bool trim_end) const
{
    if (m_used_unsupported_feature)
        printf("markdown_text_processor::convert_to_markdown: Warning, one or more Markdown features were used in this text and won't be losslessly converted.\n");

    int emphasis = 0, emphasis_amount = 0;
    int cur_link_index = -1;

    for (uint32_t text_ofs = 0; text_ofs < m_text.size(); text_ofs++)
    {
        if (m_details[text_ofs].m_link_index != -1)
        {
            // Inside link at current position

            if (cur_link_index == -1)
            {
                // Not currently inside a link, so start a new link

                handle_html(out, text_ofs);

                out.push_back('[');

                // Beginning new link
                handle_emphasis(out, text_ofs, emphasis, emphasis_amount);
            }
            else if (cur_link_index != m_details[text_ofs].m_link_index)
            {
                // Switching to different link, so flush current link and start a new one
                handle_emphasis(out, text_ofs, emphasis, emphasis_amount);

                out += "](";

                for (uint32_t j = 0; j < m_links[cur_link_index].size(); j++)
                {
                    uint8_t c = m_links[cur_link_index][j];
                    if (markdown_should_escape(c))
                        out.push_back('\\');
                    out.push_back(c);
                }

                out.push_back(')');

                handle_html(out, text_ofs);

                out.push_back('[');
            }
            else
            {
                // Currently inside a link which hasn't changed

                handle_html(out, text_ofs);

                handle_emphasis(out, text_ofs, emphasis, emphasis_amount);
            }

            cur_link_index = m_details[text_ofs].m_link_index;
        }
        else
        {
            // Not inside link at current position

            if (cur_link_index != -1)
            {
                // Flush current link
                handle_emphasis(out, text_ofs, emphasis, emphasis_amount);

                out += "](";

                for (uint32_t j = 0; j < m_links[cur_link_index].size(); j++)
                {
                    uint8_t c = m_links[cur_link_index][j];
                    if (markdown_should_escape(c))
                        out.push_back('\\');
                    out.push_back(c);
                }

                out.push_back(')');

                handle_html(out, text_ofs);

                cur_link_index = -1;
            }
            else
            {
                handle_html(out, text_ofs);

                handle_emphasis(out, text_ofs, emphasis, emphasis_amount);
            }
        }

        if (m_details[text_ofs].m_linebreak)
        {
            out.push_back(' ');

            // One space will already be in the text.
            //out.push_back(' ');
        }

        uint8_t c = m_text[text_ofs];
        if (markdown_should_escape(c))
        {
            // Markdown escape
            out.push_back('\\');
        }

        out.push_back(c);
    }

    if (emphasis != 0)
    {
        // Flush last emphasis
        for (int j = 0; j < emphasis_amount; j++)
            out.push_back((uint8_t)emphasis);
    }
    emphasis = 0;
    emphasis_amount = 0;

    if (cur_link_index != -1)
    {
        // Flush last link
        out += "](";

        for (uint32_t j = 0; j < m_links[cur_link_index].size(); j++)
        {
            uint8_t c = m_links[cur_link_index][j];
            if (markdown_should_escape(c))
                out.push_back('\\');
            out.push_back(c);
        }

        out.push_back(')');
        cur_link_index = -1;
    }

    if (m_details.size() > m_text.size())
    {
        if (m_details.size() != m_text.size() + 1)
            panic("details array too large");

        if (m_details.back().m_html.size())
        {
            for (uint32_t i = 0; i < m_details.back().m_html.size(); i++)
                out += m_details.back().m_html[i];
        }
    }

    if (trim_end)
    {
        while (out.size() && out.back() == '\n')
            out.pop_back();

        string_trim_end(out);
    }
}

void markdown_text_processor::ensure_detail_ofs(uint32_t ofs)
{
    if (m_details.size() <= ofs)
        m_details.resize(ofs + 1);
}

void markdown_text_processor::init_from_codes(const std::string& buf)
{
    m_text.resize(0);
    m_details.resize(0);
    m_links.resize(0);

    parse_block(buf);
}

void markdown_text_processor::parse_block(const std::string& buf)
{
    uint32_t cur_ofs = 0;
    while (cur_ofs < buf.size())
    {
        uint8_t sig = (uint8_t)buf[cur_ofs];

        if (sig != markdown::cCodeSig)
            panic("Expected code block signature");

        cur_ofs++;
        if (cur_ofs == buf.size())
            panic("Premature end of buffer");

        uint8_t code_type = (uint8_t)buf[cur_ofs];
        cur_ofs++;

        switch (code_type)
        {
        case markdown::cCodeLink:
        {
            const uint32_t link_size = markdown::get_len32(buf, cur_ofs);
            const uint32_t content_size = markdown::get_len32(buf, cur_ofs);

            std::string link(markdown::get_string(buf, cur_ofs, link_size));
            std::string content(markdown::get_string(buf, cur_ofs, content_size));

            const uint32_t link_index = (uint32_t)m_links.size();
            m_links.push_back(link);

            const uint32_t start_text_ofs = (uint32_t)m_text.size();

            parse_block(content);

            const uint32_t end_text_ofs = (uint32_t)m_text.size();
            if (end_text_ofs)
            {
                ensure_detail_ofs(end_text_ofs - 1);

                for (uint32_t i = start_text_ofs; i < end_text_ofs; i++)
                    m_details[i].m_link_index = link_index;
            }

            break;
        }
        case markdown::cCodeEmphasis:
        {
            if (cur_ofs >= buf.size())
                panic("Buffer too small");

            const uint8_t c = (uint8_t)buf[cur_ofs++];

            if (cur_ofs >= buf.size())
                panic("Buffer too small");

            const uint32_t amount = (uint8_t)buf[cur_ofs++];

            const uint32_t text_size = markdown::get_len32(buf, cur_ofs);

            std::string text(markdown::get_string(buf, cur_ofs, text_size));

            const uint32_t start_text_ofs = (uint32_t)m_text.size();

            parse_block(text);

            const uint32_t end_text_ofs = (uint32_t)m_text.size();

            if (end_text_ofs)
            {
                ensure_detail_ofs(end_text_ofs - 1);

                for (uint32_t i = start_text_ofs; i < end_text_ofs; i++)
                {
                    m_details[i].m_emphasis = c;
                    m_details[i].m_emphasis_amount = (uint8_t)amount;
                }
            }

            break;
        }
        case markdown::cCodeText:
        {
            const uint32_t text_size = markdown::get_len32(buf, cur_ofs);
            std::string text(markdown::get_string(buf, cur_ofs, text_size));

            for (size_t i = 0; i < text.size(); i++)
            {
                // value 1 is written by the markdown parser when it wants to delete a \n
                if (text[i] != 1)
                    m_text.push_back(text[i]);
            }

            break;
        }
        case markdown::cCodeParagraph:
        {
            const uint32_t text_size = markdown::get_len32(buf, cur_ofs);
            std::string text(markdown::get_string(buf, cur_ofs, text_size));

            parse_block(text);

            m_text += "\n";
            m_text += "\n";

            ensure_detail_ofs((uint32_t)m_text.size() - 1);
            m_details[m_text.size() - 1].m_end_paragraph = true;

            break;
        }
        case markdown::cCodeLinebreak:
        {
            m_text += "\n";

            ensure_detail_ofs((uint32_t)m_text.size() - 1);
            m_details[m_text.size() - 1].m_linebreak = true;

            break;
        }
        case markdown::cCodeHTML:
        {
            const uint32_t text_size = markdown::get_len32(buf, cur_ofs);
            std::string text(markdown::get_string(buf, cur_ofs, text_size));

            uint32_t ofs = (uint32_t)m_text.size();
            ensure_detail_ofs(ofs);
            m_details[ofs].m_html.push_back(text);

            break;
        }
        default:
            panic("Invalid code");
            break;
        }
    }

    if (m_text.size())
        ensure_detail_ofs((uint32_t)m_text.size() - 1);
}

void markdown_text_processor::handle_html(std::string& out, uint32_t text_ofs) const
{
    // Any HTML appears before this character
    for (uint32_t i = 0; i < m_details[text_ofs].m_html.size(); i++)
        out += m_details[text_ofs].m_html[i];
}

void markdown_text_processor::handle_emphasis(std::string& out, uint32_t text_ofs, int& emphasis, int& emphasis_amount) const
{
    if (m_details[text_ofs].m_emphasis != 0)
    {
        // Desired emphasis
        if ((m_details[text_ofs].m_emphasis == emphasis) && (m_details[text_ofs].m_emphasis_amount == emphasis_amount))
        {
            // No change to emphasis

            // Any HTML appears before this character
            //for (uint32_t i = 0; i < m_details[text_ofs].m_html.size(); i++)
            //    out += m_details[text_ofs].m_html[i];
        }
        else
        {
            // Change to emphasis
            if (emphasis != 0)
            {
                // Flush out current emphasis
                for (int j = 0; j < emphasis_amount; j++)
                    out.push_back((uint8_t)emphasis);
            }

            // Any HTML appears before this character
            //for (uint32_t i = 0; i < m_details[text_ofs].m_html.size(); i++)
            //    out += m_details[text_ofs].m_html[i];

            emphasis = m_details[text_ofs].m_emphasis;
            emphasis_amount = m_details[text_ofs].m_emphasis_amount;

            // Start new emphasis
            for (int j = 0; j < emphasis_amount; j++)
                out.push_back((uint8_t)emphasis);
        }
    }
    else if (m_details[text_ofs].m_emphasis == 0)
    {
        // Desires no emphasis
        if (emphasis != 0)
        {
            // Flush out current emphasis
            for (int j = 0; j < emphasis_amount; j++)
                out.push_back((uint8_t)emphasis);
        }
        emphasis = 0;
        emphasis_amount = 0;

        // Any HTML appears before this character
        //for (uint32_t i = 0; i < m_details[text_ofs].m_html.size(); i++)
        //    out += m_details[text_ofs].m_html[i];
    }
}

#if 0
const char* pText =
u8R"(

<ul>test</ul>

_text1_  
**text2**  
**_text3_**  

![alt text](https://github.com/n48.png "Logo Title")

# Heading 1
## Heading 2
### Heading 3

1. XXXXX  
  1. Item 1
  2. Item 2
2. YYYYY
3. ZZZZZ

| Tables        | Are           | Cool  |
| ------------- |:-------------:| -----:|
| col 3 is      | right-aligned | $1600 |
| col 2 is      | centered      |   $12 |
| zebra stripes | are neat      |    $1 |

* [blahblah](www.blah1.com)
* [blahblah2](www.blah2.com)

`
this is code 1
this is code 2
`

```
this is code 3
this is code 4
```

> blockquote 1  
> blockquote 2

---

* AAA
* BBB
  * ZZZZ1
  * ZZZZ2
* CCC)";

markdown_text_processor tp;
tp.init_from_markdown(pText);

std::string desc;
tp.convert_to_plain(desc, true);

uprintf("%s\n", desc.c_str());

return 0;
#endif