2023-02-20 17:59:08 -05:00
// Copyright (C) 2023 Richard Geldreich, Jr.
// markdown_proc.cpp
# include "markdown_proc.h"
2023-10-05 14:07:39 -04:00
struct markdown
{
enum
{
cCodeSig = 0xFE ,
cCodeLink = 1 ,
cCodeEmphasis ,
cCodeText ,
cCodeParagraph ,
cCodeLinebreak ,
cCodeHTML
} ;
static void bufappend ( struct buf * out , struct buf * in )
{
assert ( in ! = out ) ;
if ( in & & in - > size )
bufput ( out , in - > data , in - > size ) ;
}
static void writelen ( struct buf * ob , uint32_t size )
{
bufputc ( ob , ( uint8_t ) ( size & 0xFF ) ) ;
bufputc ( ob , ( uint8_t ) ( ( size > > 8 ) & 0xFF ) ) ;
bufputc ( ob , ( uint8_t ) ( ( size > > 16 ) & 0xFF ) ) ;
bufputc ( ob , ( uint8_t ) ( ( size > > 24 ) & 0xFF ) ) ;
}
static std : : string get_string ( const std : : string & buf , uint32_t & cur_ofs , uint32_t text_size )
{
std : : string text ;
if ( cur_ofs + text_size > buf . size ( ) )
panic ( " Buffer too small " ) ;
text . append ( buf . c_str ( ) + cur_ofs , text_size ) ;
cur_ofs + = text_size ;
return text ;
}
static uint32_t get_len32 ( const std : : string & buf , uint32_t & ofs )
{
if ( ( ofs + 4 ) > buf . size ( ) )
panic ( " Buffer too small " ) ;
uint32_t l = ( uint8_t ) buf [ ofs ] |
( ( ( uint8_t ) buf [ ofs + 1 ] ) < < 8 ) |
( ( ( uint8_t ) buf [ ofs + 2 ] ) < < 16 ) |
( ( ( uint8_t ) buf [ ofs + 3 ] ) < < 24 ) ;
ofs + = 4 ;
return l ;
}
static void prolog ( struct buf * ob , void * opaque )
{
}
static void epilog ( struct buf * ob , void * opaque )
{
}
/* block level callbacks - NULL skips the block */
static void blockcode ( struct buf * ob , struct buf * text , void * opaque )
{
#if 0
bufprintf ( ob , " blockcode: \" %.*s \" " , ( int ) text - > size , text - > data ) ;
# endif
panic ( " unsupported markdown feature " ) ;
}
static void blockquote ( struct buf * ob , struct buf * text , void * opaque )
{
#if 0
bufprintf ( ob , " blockquote: \" %.*s \" " , ( int ) text - > size , text - > data ) ;
# endif
// TODO: unsupported block quotes (here for when we're converting to plain text)
//panic("unsupported markdown feature");
if ( opaque )
* ( bool * ) opaque = true ;
if ( ! text | | ! text - > size )
return ;
bufputc ( ob , ( uint8_t ) cCodeSig ) ;
bufputc ( ob , ( uint8_t ) cCodeParagraph ) ;
writelen ( ob , ( uint32_t ) text - > size ) ;
bufappend ( ob , text ) ;
}
static void blockhtml ( struct buf * ob , struct buf * text , void * opaque )
{
#if 0
bufprintf ( ob , " blockhtml: \" %.*s \" " , ( int ) text - > size , text - > data ) ;
# endif
// TODO: Not fully supported - just dropping it
//panic("unsupported markdown feature");
if ( opaque )
* ( bool * ) opaque = true ;
}
static void header ( struct buf * ob , struct buf * text , int level , void * opaque )
{
#if 0
bufprintf ( ob , " header: %i \" %.*s \" " , level , ( int ) text - > size , text - > data ) ;
# endif
// TODO: Not fully supported
//panic("unsupported markdown feature");
if ( opaque )
* ( bool * ) opaque = true ;
bufputc ( ob , ( uint8_t ) cCodeSig ) ;
bufputc ( ob , ( uint8_t ) cCodeParagraph ) ;
writelen ( ob , ( uint32_t ) text - > size ) ;
bufappend ( ob , text ) ;
}
static void hrule ( struct buf * ob , void * opaque )
{
// TODO
//panic("unsupported markdown feature");
if ( opaque )
* ( bool * ) opaque = true ;
}
static void list ( struct buf * ob , struct buf * text , int flags , void * opaque )
{
// TODO: not fully supporting lists (here for when we're converting to plain text)
//panic("unsupported markdown feature");
if ( opaque )
* ( bool * ) opaque = true ;
if ( ! text | | ! text - > size )
return ;
bufputc ( ob , ( uint8_t ) cCodeSig ) ;
bufputc ( ob , ( uint8_t ) cCodeParagraph ) ;
writelen ( ob , ( uint32_t ) text - > size ) ;
bufappend ( ob , text ) ;
}
static void listitem ( struct buf * ob , struct buf * text , int flags , void * opaque )
{
// TODO: not fully supporting lists (here for when we're converting to plain text)
//panic("unsupported markdown feature");
if ( opaque )
* ( bool * ) opaque = true ;
if ( ! text | | ! text - > size )
return ;
bufputc ( ob , ( uint8_t ) cCodeSig ) ;
bufputc ( ob , ( uint8_t ) cCodeParagraph ) ;
writelen ( ob , ( uint32_t ) text - > size ) ;
bufappend ( ob , text ) ;
}
static void paragraph ( struct buf * ob , struct buf * text , void * opaque )
{
#if 0
bufprintf ( ob , " paragraph: \" %.*s \" " , ( int ) text - > size , text - > data ) ;
# endif
if ( ! text | | ! text - > size )
return ;
bufputc ( ob , ( uint8_t ) cCodeSig ) ;
bufputc ( ob , ( uint8_t ) cCodeParagraph ) ;
writelen ( ob , ( uint32_t ) text - > size ) ;
bufappend ( ob , text ) ;
}
static void table ( struct buf * ob , struct buf * head_row , struct buf * rows , void * opaque )
{
#if 0
bufprintf ( ob , " table: \" %.*s \" \" %.*s \" " , ( int ) head_row - > size , head_row - > data , ( int ) rows - > size , rows - > data ) ;
# endif
//panic("unsupported markdown feature");
// TODO: not fully supported, just for plaintext conversion
if ( opaque )
* ( bool * ) opaque = true ;
}
static void table_cell ( struct buf * ob , struct buf * text , int flags , void * opaque )
{
#if 0
bufprintf ( ob , " table_cell: \" %.*s \" %i " , ( int ) text - > size , text - > data , flags ) ;
# endif
//panic("unsupported markdown feature");
if ( opaque )
* ( bool * ) opaque = true ;
// TODO: not fully supported, just for plaintext conversion
if ( ! text | | ! text - > size )
return ;
bufputc ( ob , ( uint8_t ) cCodeSig ) ;
bufputc ( ob , ( uint8_t ) cCodeParagraph ) ;
writelen ( ob , ( uint32_t ) text - > size ) ;
bufappend ( ob , text ) ;
}
static void table_row ( struct buf * ob , struct buf * cells , int flags , void * opaque )
{
#if 0
bufprintf ( ob , " table_row: \" %.*s \" %i " , ( int ) cells - > size , cells - > data , flags ) ;
# endif
//panic("unsupported markdown feature");
// TODO: not fully supported, just for plaintext conversion
if ( opaque )
* ( bool * ) opaque = true ;
}
static int autolink ( struct buf * ob , struct buf * link , enum mkd_autolink type , void * opaque )
{
#if 0
bufprintf ( ob , " autolink: %u \" %.*s \" " , type , ( int ) link - > size , link - > data ) ;
# endif
panic ( " unsupported markdown feature " ) ;
return 1 ;
}
static int codespan ( struct buf * ob , struct buf * text , void * opaque )
{
#if 0
bufprintf ( ob , " codespan: \" %.*s \" " , ( int ) text - > size , text - > data ) ;
# endif
//panic("unsupported markdown feature");
if ( opaque )
* ( bool * ) opaque = true ;
bufputc ( ob , ( uint8_t ) cCodeSig ) ;
bufputc ( ob , ( uint8_t ) cCodeText ) ;
writelen ( ob , ( uint32_t ) text - > size ) ;
bufappend ( ob , text ) ;
return 1 ;
}
static int double_emphasis ( struct buf * ob , struct buf * text , char c , void * opaque )
{
#if 0
bufprintf ( ob , " double_emphasis: %u ('%c') [%.*s] " , c , c , ( int ) text - > size , text - > data ) ;
# endif
if ( ! text | | ! text - > size )
return 1 ;
bufputc ( ob , ( uint8_t ) cCodeSig ) ;
bufputc ( ob , ( uint8_t ) cCodeEmphasis ) ;
bufputc ( ob , c ) ;
bufputc ( ob , 2 ) ;
writelen ( ob , ( uint32_t ) text - > size ) ;
bufappend ( ob , text ) ;
return 1 ;
}
static int emphasis ( struct buf * ob , struct buf * text , char c , void * opaque )
{
#if 0
bufprintf ( ob , " emphasis: %u ('%c') [%.*s] " , c , c , ( int ) text - > size , text - > data ) ;
# endif
if ( ! text | | ! text - > size )
return 1 ;
bufputc ( ob , ( uint8_t ) cCodeSig ) ;
bufputc ( ob , ( uint8_t ) cCodeEmphasis ) ;
bufputc ( ob , c ) ;
bufputc ( ob , 1 ) ;
writelen ( ob , ( uint32_t ) text - > size ) ;
bufappend ( ob , text ) ;
return 1 ;
}
static int image ( struct buf * ob , struct buf * link , struct buf * title , struct buf * alt , void * opaque )
{
#if 0
bufprintf ( ob , " image: \" %.*s \" \" %.*s \" \" %.*s \" " ,
( int ) link - > size , link - > data ,
( int ) title - > size , title - > data ,
( int ) alt - > size , alt - > data ) ;
# endif
//panic("unsupported markdown feature");
if ( opaque )
* ( bool * ) opaque = true ;
if ( alt )
{
bufputc ( ob , ( uint8_t ) cCodeSig ) ;
bufputc ( ob , ( uint8_t ) cCodeText ) ;
writelen ( ob , ( uint32_t ) alt - > size ) ;
bufappend ( ob , alt ) ;
}
return 1 ;
}
static int linebreak ( struct buf * ob , void * opaque )
{
#if 0
bufprintf ( ob , " linebreak " ) ;
# endif
bufputc ( ob , ( uint8_t ) cCodeSig ) ;
bufputc ( ob , ( uint8_t ) cCodeLinebreak ) ;
return 1 ;
}
static int link ( struct buf * ob , struct buf * link , struct buf * title , struct buf * content , void * opaque )
{
#if 0
printf ( " link: {%.*s} {%.*s} {%.*s} \n " ,
link ? ( int ) link - > size : 0 ,
link ? link - > data : nullptr ,
title ? ( int ) title - > size : 0 ,
title ? title - > data : nullptr ,
content ? ( int ) content - > size : 0 ,
content ? content - > data : nullptr ) ;
# endif
bufputc ( ob , ( uint8_t ) cCodeSig ) ;
bufputc ( ob , ( uint8_t ) cCodeLink ) ;
writelen ( ob , ( uint32_t ) link - > size ) ;
writelen ( ob , ( uint32_t ) content - > size ) ;
bufappend ( ob , link ) ;
bufappend ( ob , content ) ;
return 1 ;
}
static int raw_html_tag ( struct buf * ob , struct buf * tag , void * opaque )
{
//bufprintf(ob, "raw_html_tag: \"%.*s\" ", (int)tag->size, tag->data);
if ( ! tag | | ! tag - > size )
return 1 ;
bufputc ( ob , ( uint8_t ) cCodeSig ) ;
bufputc ( ob , ( uint8_t ) cCodeHTML ) ;
writelen ( ob , ( uint32_t ) tag - > size ) ;
bufappend ( ob , tag ) ;
return 1 ;
}
static int triple_emphasis ( struct buf * ob , struct buf * text , char c , void * opaque )
{
//bufprintf(ob, "triple_emphasis: %u ('%c') [%.*s] ", c, c, (int)text->size, text->data);
if ( ! text | | ! text - > size )
return 1 ;
bufputc ( ob , ( uint8_t ) cCodeSig ) ;
bufputc ( ob , ( uint8_t ) cCodeEmphasis ) ;
bufputc ( ob , c ) ;
bufputc ( ob , 3 ) ;
writelen ( ob , ( uint32_t ) text - > size ) ;
bufappend ( ob , text ) ;
return 1 ;
}
static void normal_text ( struct buf * ob , struct buf * text , void * opaque )
{
if ( ! text | | ! text - > size )
return ;
bufputc ( ob , ( uint8_t ) cCodeSig ) ;
bufputc ( ob , ( uint8_t ) cCodeText ) ;
writelen ( ob , ( uint32_t ) text - > size ) ;
for ( uint32_t i = 0 ; i < text - > size ; i + + )
{
uint8_t c = text - > data [ i ] ;
if ( c = = ' \n ' )
bufputc ( ob , ' ' ) ;
else if ( c ! = 1 )
{
assert ( c > = 32 | | c = = ' \t ' ) ;
bufputc ( ob , c ) ;
}
}
}
} ;
static struct mkd_renderer g_mkd_parse =
{
markdown : : prolog ,
markdown : : epilog ,
markdown : : blockcode ,
markdown : : blockquote ,
markdown : : blockhtml ,
markdown : : header ,
markdown : : hrule ,
markdown : : list ,
markdown : : listitem ,
markdown : : paragraph ,
markdown : : table ,
markdown : : table_cell ,
markdown : : table_row ,
markdown : : autolink ,
markdown : : codespan ,
markdown : : double_emphasis ,
markdown : : emphasis ,
markdown : : image ,
markdown : : linebreak ,
markdown : : link ,
markdown : : raw_html_tag ,
markdown : : triple_emphasis ,
//markdown::entity,
nullptr ,
markdown : : normal_text ,
64 ,
" *_ " ,
nullptr
} ;
2023-02-20 17:59:08 -05:00
static bool markdown_should_escape ( int c )
{
switch ( c )
{
case ' \\ ' :
case ' ` ' :
case ' * ' :
case ' _ ' :
case ' { ' :
case ' } ' :
case ' [ ' :
case ' ] ' :
case ' < ' :
case ' > ' :
case ' ( ' :
case ' ) ' :
case ' # ' :
//case '-':
//case '.':
//case '!':
case ' | ' :
return true ;
default :
break ;
}
return false ;
}
static std : : string escape_markdown ( const std : : string & str )
{
std : : string out ;
for ( uint32_t i = 0 ; i < str . size ( ) ; i + + )
{
uint8_t c = str [ i ] ;
if ( markdown_should_escape ( c ) )
out . push_back ( ' \\ ' ) ;
out . push_back ( c ) ;
}
return out ;
}
2023-10-05 14:07:39 -04:00
markdown_text_processor : : markdown_text_processor ( ) :
m_used_unsupported_feature ( false )
2023-02-20 17:59:08 -05:00
{
}
void markdown_text_processor : : clear ( )
{
2023-10-05 14:07:39 -04:00
m_used_unsupported_feature = false ;
2023-02-20 17:59:08 -05:00
m_text . clear ( ) ;
m_details . clear ( ) ;
m_links . clear ( ) ;
}
void markdown_text_processor : : fix_redirect_urls ( )
{
for ( uint32_t link_index = 0 ; link_index < m_links . size ( ) ; link_index + + )
{
const char * pPrefix = " https://www.google.com/url?q= " ;
if ( ! string_begins_with ( m_links [ link_index ] , pPrefix ) )
continue ;
size_t p ;
if ( ( p = m_links [ link_index ] . find ( " &sa=D&source=editors&ust= " ) ) = = std : : string : : npos )
continue ;
size_t r = m_links [ link_index ] . find ( " &usg= " ) ;
if ( ( r = = std : : string : : npos ) | | ( r < p ) )
continue ;
if ( ( r - p ) ! = 41 )
continue ;
if ( ( m_links [ link_index ] . size ( ) - r ) ! = 33 )
continue ;
if ( ( m_links [ link_index ] . size ( ) - p ) ! = 74 )
continue ;
std : : string new_link ( m_links [ link_index ] ) ;
new_link . erase ( p , new_link . size ( ) - p ) ;
new_link . erase ( 0 , strlen ( pPrefix ) ) ;
// De-escape the string
std : : string new_link_deescaped ;
for ( uint32_t i = 0 ; i < new_link . size ( ) ; i + + )
{
uint8_t c = new_link [ i ] ;
if ( ( c = = ' % ' ) & & ( ( i + 2 ) < new_link . size ( ) ) )
{
int da = convert_hex_digit ( new_link [ i + 1 ] ) ;
int db = convert_hex_digit ( new_link [ i + 2 ] ) ;
if ( da > = 0 & & db > = 0 )
{
int val = da * 16 + db ;
new_link_deescaped . push_back ( ( uint8_t ) val ) ;
}
i + = 2 ;
}
else
new_link_deescaped . push_back ( c ) ;
}
//printf("%s\n", new_link.c_str());
m_links [ link_index ] = new_link_deescaped ;
}
for ( uint32_t i = 0 ; i < m_links . size ( ) ; i + + )
m_links [ i ] = encode_url ( m_links [ i ] ) ;
}
void markdown_text_processor : : init_from_markdown ( const char * pText )
{
struct buf * pIn = bufnew ( 4096 ) ;
bufputs ( pIn , pText ) ;
struct buf * pOut = bufnew ( 4096 ) ;
2023-10-05 14:07:39 -04:00
m_used_unsupported_feature = false ;
g_mkd_parse . opaque = & m_used_unsupported_feature ;
markdown ( pOut , pIn , & g_mkd_parse ) ;
2023-02-20 17:59:08 -05:00
std : : string buf ;
buf . append ( ( char * ) pOut - > data , pOut - > size ) ;
init_from_codes ( buf ) ;
bufrelease ( pIn ) ;
bufrelease ( pOut ) ;
}
bool markdown_text_processor : : split_in_half ( uint32_t ofs , markdown_text_processor & a , markdown_text_processor & b ) const
{
assert ( ( this ! = & a ) & & ( this ! = & b ) ) ;
if ( m_details [ ofs ] . m_emphasis ! = 0 )
return false ;
a . m_text = m_text ;
a . m_details = m_details ;
a . m_links = m_links ;
b . m_text = m_text ;
b . m_details = m_details ;
b . m_links = m_links ;
a . m_text . erase ( ofs , a . m_text . size ( ) - ofs ) ;
a . m_details . erase ( a . m_details . begin ( ) + ofs , a . m_details . end ( ) ) ;
b . m_text . erase ( 0 , ofs ) ;
b . m_details . erase ( b . m_details . begin ( ) , b . m_details . begin ( ) + ofs ) ;
return true ;
}
uint32_t markdown_text_processor : : count_char_in_text ( uint8_t c ) const
{
uint32_t num = 0 ;
for ( uint32_t i = 0 ; i < m_text . size ( ) ; i + + )
{
if ( ( uint8_t ) m_text [ i ] = = c )
num + + ;
}
return num ;
}
bool markdown_text_processor : : split_last_parens ( markdown_text_processor & a , markdown_text_processor & b ) const
{
a . clear ( ) ;
b . clear ( ) ;
if ( ! m_text . size ( ) )
return false ;
int ofs = ( int ) m_text . size ( ) - 1 ;
while ( ( m_text [ ofs ] = = ' \n ' ) | | ( m_text [ ofs ] = = ' ' ) )
{
if ( ! ofs )
return false ;
ofs - - ;
}
if ( m_text [ ofs ] = = ' . ' )
{
if ( ! ofs )
return false ;
ofs - - ;
}
if ( m_text [ ofs ] ! = ' ) ' )
return false ;
int level = 0 ;
while ( ofs > = 0 )
{
uint8_t c = ( uint8_t ) m_text [ ofs ] ;
if ( c = = ' ) ' )
level + + ;
else if ( c = = ' ( ' )
{
level - - ;
if ( ! level )
break ;
}
ofs - - ;
}
if ( ofs < 0 )
return false ;
return split_in_half ( ofs , a , b ) ;
}
void markdown_text_processor : : convert_to_plain ( std : : string & out , bool trim_end ) const
{
for ( uint32_t i = 0 ; i < m_text . size ( ) ; i + + )
{
uint8_t c = m_text [ i ] ;
assert ( ( c = = ' \n ' ) | | ( c = = ' \t ' ) | | ( c > = 32 ) ) ;
out . push_back ( c ) ;
}
if ( trim_end )
{
while ( out . size ( ) & & out . back ( ) = = ' \n ' )
out . pop_back ( ) ;
string_trim_end ( out ) ;
}
}
void markdown_text_processor : : convert_to_markdown ( std : : string & out , bool trim_end ) const
{
2023-10-05 14:07:39 -04:00
if ( m_used_unsupported_feature )
printf ( " markdown_text_processor::convert_to_markdown: Warning, one or more Markdown features were used in this text and won't be losslessly converted. \n " ) ;
2023-02-20 17:59:08 -05:00
int emphasis = 0 , emphasis_amount = 0 ;
int cur_link_index = - 1 ;
for ( uint32_t text_ofs = 0 ; text_ofs < m_text . size ( ) ; text_ofs + + )
{
if ( m_details [ text_ofs ] . m_link_index ! = - 1 )
{
// Inside link at current position
if ( cur_link_index = = - 1 )
{
// Not currently inside a link, so start a new link
handle_html ( out , text_ofs ) ;
out . push_back ( ' [ ' ) ;
// Beginning new link
handle_emphasis ( out , text_ofs , emphasis , emphasis_amount ) ;
}
else if ( cur_link_index ! = m_details [ text_ofs ] . m_link_index )
{
// Switching to different link, so flush current link and start a new one
handle_emphasis ( out , text_ofs , emphasis , emphasis_amount ) ;
out + = " ]( " ;
for ( uint32_t j = 0 ; j < m_links [ cur_link_index ] . size ( ) ; j + + )
{
uint8_t c = m_links [ cur_link_index ] [ j ] ;
if ( markdown_should_escape ( c ) )
out . push_back ( ' \\ ' ) ;
out . push_back ( c ) ;
}
out . push_back ( ' ) ' ) ;
handle_html ( out , text_ofs ) ;
out . push_back ( ' [ ' ) ;
}
else
{
// Currently inside a link which hasn't changed
handle_html ( out , text_ofs ) ;
handle_emphasis ( out , text_ofs , emphasis , emphasis_amount ) ;
}
cur_link_index = m_details [ text_ofs ] . m_link_index ;
}
else
{
// Not inside link at current position
if ( cur_link_index ! = - 1 )
{
// Flush current link
handle_emphasis ( out , text_ofs , emphasis , emphasis_amount ) ;
out + = " ]( " ;
for ( uint32_t j = 0 ; j < m_links [ cur_link_index ] . size ( ) ; j + + )
{
uint8_t c = m_links [ cur_link_index ] [ j ] ;
if ( markdown_should_escape ( c ) )
out . push_back ( ' \\ ' ) ;
out . push_back ( c ) ;
}
out . push_back ( ' ) ' ) ;
handle_html ( out , text_ofs ) ;
cur_link_index = - 1 ;
}
else
{
handle_html ( out , text_ofs ) ;
handle_emphasis ( out , text_ofs , emphasis , emphasis_amount ) ;
}
}
if ( m_details [ text_ofs ] . m_linebreak )
{
out . push_back ( ' ' ) ;
// One space will already be in the text.
//out.push_back(' ');
}
uint8_t c = m_text [ text_ofs ] ;
if ( markdown_should_escape ( c ) )
{
// Markdown escape
out . push_back ( ' \\ ' ) ;
}
out . push_back ( c ) ;
}
if ( emphasis ! = 0 )
{
// Flush last emphasis
for ( int j = 0 ; j < emphasis_amount ; j + + )
out . push_back ( ( uint8_t ) emphasis ) ;
}
emphasis = 0 ;
emphasis_amount = 0 ;
if ( cur_link_index ! = - 1 )
{
// Flush last link
out + = " ]( " ;
for ( uint32_t j = 0 ; j < m_links [ cur_link_index ] . size ( ) ; j + + )
{
uint8_t c = m_links [ cur_link_index ] [ j ] ;
if ( markdown_should_escape ( c ) )
out . push_back ( ' \\ ' ) ;
out . push_back ( c ) ;
}
out . push_back ( ' ) ' ) ;
cur_link_index = - 1 ;
}
if ( m_details . size ( ) > m_text . size ( ) )
{
if ( m_details . size ( ) ! = m_text . size ( ) + 1 )
panic ( " details array too large " ) ;
if ( m_details . back ( ) . m_html . size ( ) )
{
for ( uint32_t i = 0 ; i < m_details . back ( ) . m_html . size ( ) ; i + + )
out + = m_details . back ( ) . m_html [ i ] ;
}
}
if ( trim_end )
{
while ( out . size ( ) & & out . back ( ) = = ' \n ' )
out . pop_back ( ) ;
string_trim_end ( out ) ;
}
}
void markdown_text_processor : : ensure_detail_ofs ( uint32_t ofs )
{
if ( m_details . size ( ) < = ofs )
m_details . resize ( ofs + 1 ) ;
}
void markdown_text_processor : : init_from_codes ( const std : : string & buf )
{
m_text . resize ( 0 ) ;
m_details . resize ( 0 ) ;
m_links . resize ( 0 ) ;
parse_block ( buf ) ;
}
void markdown_text_processor : : parse_block ( const std : : string & buf )
{
uint32_t cur_ofs = 0 ;
while ( cur_ofs < buf . size ( ) )
{
uint8_t sig = ( uint8_t ) buf [ cur_ofs ] ;
if ( sig ! = markdown : : cCodeSig )
panic ( " Expected code block signature " ) ;
cur_ofs + + ;
if ( cur_ofs = = buf . size ( ) )
panic ( " Premature end of buffer " ) ;
uint8_t code_type = ( uint8_t ) buf [ cur_ofs ] ;
cur_ofs + + ;
switch ( code_type )
{
case markdown : : cCodeLink :
{
const uint32_t link_size = markdown : : get_len32 ( buf , cur_ofs ) ;
const uint32_t content_size = markdown : : get_len32 ( buf , cur_ofs ) ;
std : : string link ( markdown : : get_string ( buf , cur_ofs , link_size ) ) ;
std : : string content ( markdown : : get_string ( buf , cur_ofs , content_size ) ) ;
const uint32_t link_index = ( uint32_t ) m_links . size ( ) ;
m_links . push_back ( link ) ;
const uint32_t start_text_ofs = ( uint32_t ) m_text . size ( ) ;
parse_block ( content ) ;
const uint32_t end_text_ofs = ( uint32_t ) m_text . size ( ) ;
if ( end_text_ofs )
{
ensure_detail_ofs ( end_text_ofs - 1 ) ;
for ( uint32_t i = start_text_ofs ; i < end_text_ofs ; i + + )
m_details [ i ] . m_link_index = link_index ;
}
break ;
}
case markdown : : cCodeEmphasis :
{
if ( cur_ofs > = buf . size ( ) )
panic ( " Buffer too small " ) ;
const uint8_t c = ( uint8_t ) buf [ cur_ofs + + ] ;
if ( cur_ofs > = buf . size ( ) )
panic ( " Buffer too small " ) ;
const uint32_t amount = ( uint8_t ) buf [ cur_ofs + + ] ;
const uint32_t text_size = markdown : : get_len32 ( buf , cur_ofs ) ;
std : : string text ( markdown : : get_string ( buf , cur_ofs , text_size ) ) ;
const uint32_t start_text_ofs = ( uint32_t ) m_text . size ( ) ;
parse_block ( text ) ;
const uint32_t end_text_ofs = ( uint32_t ) m_text . size ( ) ;
if ( end_text_ofs )
{
ensure_detail_ofs ( end_text_ofs - 1 ) ;
for ( uint32_t i = start_text_ofs ; i < end_text_ofs ; i + + )
{
m_details [ i ] . m_emphasis = c ;
m_details [ i ] . m_emphasis_amount = ( uint8_t ) amount ;
}
}
break ;
}
case markdown : : cCodeText :
{
const uint32_t text_size = markdown : : get_len32 ( buf , cur_ofs ) ;
std : : string text ( markdown : : get_string ( buf , cur_ofs , text_size ) ) ;
for ( size_t i = 0 ; i < text . size ( ) ; i + + )
{
// value 1 is written by the markdown parser when it wants to delete a \n
if ( text [ i ] ! = 1 )
m_text . push_back ( text [ i ] ) ;
}
break ;
}
case markdown : : cCodeParagraph :
{
const uint32_t text_size = markdown : : get_len32 ( buf , cur_ofs ) ;
std : : string text ( markdown : : get_string ( buf , cur_ofs , text_size ) ) ;
parse_block ( text ) ;
m_text + = " \n " ;
m_text + = " \n " ;
ensure_detail_ofs ( ( uint32_t ) m_text . size ( ) - 1 ) ;
m_details [ m_text . size ( ) - 1 ] . m_end_paragraph = true ;
break ;
}
case markdown : : cCodeLinebreak :
{
m_text + = " \n " ;
ensure_detail_ofs ( ( uint32_t ) m_text . size ( ) - 1 ) ;
m_details [ m_text . size ( ) - 1 ] . m_linebreak = true ;
break ;
}
case markdown : : cCodeHTML :
{
const uint32_t text_size = markdown : : get_len32 ( buf , cur_ofs ) ;
std : : string text ( markdown : : get_string ( buf , cur_ofs , text_size ) ) ;
uint32_t ofs = ( uint32_t ) m_text . size ( ) ;
ensure_detail_ofs ( ofs ) ;
m_details [ ofs ] . m_html . push_back ( text ) ;
break ;
}
default :
panic ( " Invalid code " ) ;
break ;
}
}
if ( m_text . size ( ) )
ensure_detail_ofs ( ( uint32_t ) m_text . size ( ) - 1 ) ;
}
void markdown_text_processor : : handle_html ( std : : string & out , uint32_t text_ofs ) const
{
// Any HTML appears before this character
for ( uint32_t i = 0 ; i < m_details [ text_ofs ] . m_html . size ( ) ; i + + )
out + = m_details [ text_ofs ] . m_html [ i ] ;
}
void markdown_text_processor : : handle_emphasis ( std : : string & out , uint32_t text_ofs , int & emphasis , int & emphasis_amount ) const
{
if ( m_details [ text_ofs ] . m_emphasis ! = 0 )
{
// Desired emphasis
if ( ( m_details [ text_ofs ] . m_emphasis = = emphasis ) & & ( m_details [ text_ofs ] . m_emphasis_amount = = emphasis_amount ) )
{
// No change to emphasis
// Any HTML appears before this character
//for (uint32_t i = 0; i < m_details[text_ofs].m_html.size(); i++)
// out += m_details[text_ofs].m_html[i];
}
else
{
// Change to emphasis
if ( emphasis ! = 0 )
{
// Flush out current emphasis
for ( int j = 0 ; j < emphasis_amount ; j + + )
out . push_back ( ( uint8_t ) emphasis ) ;
}
// Any HTML appears before this character
//for (uint32_t i = 0; i < m_details[text_ofs].m_html.size(); i++)
// out += m_details[text_ofs].m_html[i];
emphasis = m_details [ text_ofs ] . m_emphasis ;
emphasis_amount = m_details [ text_ofs ] . m_emphasis_amount ;
// Start new emphasis
for ( int j = 0 ; j < emphasis_amount ; j + + )
out . push_back ( ( uint8_t ) emphasis ) ;
}
}
else if ( m_details [ text_ofs ] . m_emphasis = = 0 )
{
// Desires no emphasis
if ( emphasis ! = 0 )
{
// Flush out current emphasis
for ( int j = 0 ; j < emphasis_amount ; j + + )
out . push_back ( ( uint8_t ) emphasis ) ;
}
emphasis = 0 ;
emphasis_amount = 0 ;
// Any HTML appears before this character
//for (uint32_t i = 0; i < m_details[text_ofs].m_html.size(); i++)
// out += m_details[text_ofs].m_html[i];
}
}
2023-10-05 14:07:39 -04:00
#if 0
const char * pText =
u8R " (
< ul > test < / ul >
_text1_
* * text2 * *
* * _text3_ * *
! [ alt text ] ( https : //github.com/n48.png "Logo Title")
# Heading 1
# # Heading 2
# ## Heading 3
1. XXXXX
1. Item 1
2. Item 2
2. YYYYY
3. ZZZZZ
| Tables | Are | Cool |
| - - - - - - - - - - - - - | : - - - - - - - - - - - - - : | - - - - - : |
| col 3 is | right - aligned | $ 1600 |
| col 2 is | centered | $ 12 |
| zebra stripes | are neat | $ 1 |
* [ blahblah ] ( www . blah1 . com )
* [ blahblah2 ] ( www . blah2 . com )
`
this is code 1
this is code 2
`
` ` `
this is code 3
this is code 4
` ` `
> blockquote 1
> blockquote 2
- - -
* AAA
* BBB
* ZZZZ1
* ZZZZ2
* CCC ) " ;
markdown_text_processor tp ;
tp . init_from_markdown ( pText ) ;
std : : string desc ;
tp . convert_to_plain ( desc , true ) ;
uprintf ( " %s \n " , desc . c_str ( ) ) ;
return 0 ;
# endif