%{ /********************************************************************** markdown_parser.leg - markdown parser in C using a PEG grammar. (c) 2008 John MacFarlane (jgm at berkeley dot edu). This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License or the MIT license. See LICENSE for details. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. ***********************************************************************/ #include <stdbool.h> #include <assert.h> #include "markdown_peg.h" #include "utility_functions.h" /********************************************************************** Definitions for leg parser generator. YY_INPUT is the function the parser calls to get new input. We take all new input from (static) charbuf. ***********************************************************************/ # define YYSTYPE element * #ifdef __DEBUG__ # define YY_DEBUG 1 #endif #define YY_INPUT(buf, result, max_size) \ { \ int yyc; \ if (charbuf && *charbuf != '\0') { \ yyc= *charbuf++; \ } else { \ yyc= EOF; \ } \ result= (EOF == yyc) ? 0 : (*(buf)= yyc, 1); \ } #define YY_RULE(T) T /********************************************************************** PEG grammar and parser actions for markdown syntax. ***********************************************************************/ %} Doc = BOM? a:StartList ( Block { a = cons($$, a); } )* { parse_result = reverse(a); } Block = BlankLine* ( BlockQuote | Verbatim | Note | Reference | HorizontalRule | Heading | OrderedList | BulletList | HtmlBlock | StyleBlock | Para | Plain ) Para = NonindentSpace a:Inlines BlankLine+ { $$ = a; $$->key = PARA; } Plain = a:Inlines { $$ = a; $$->key = PLAIN; } AtxInline = !Newline !(Sp? '#'* Sp Newline) Inline AtxStart = < ( "######" | "#####" | "####" | "###" | "##" | "#" ) > { $$ = mk_element(H1 + (strlen(yytext) - 1)); } AtxHeading = s:AtxStart Sp? a:StartList ( AtxInline { a = cons($$, a); } )+ (Sp? '#'* Sp)? Newline { $$ = mk_list(s->key, a); free(s); } SetextHeading = SetextHeading1 | SetextHeading2 SetextBottom1 = '='+ Newline SetextBottom2 = '-'+ Newline SetextHeading1 = &(RawLine SetextBottom1) a:StartList ( !Endline Inline { a = cons($$, a); } )+ Sp? Newline SetextBottom1 { $$ = mk_list(H1, a); } SetextHeading2 = &(RawLine SetextBottom2) a:StartList ( !Endline Inline { a = cons($$, a); } )+ Sp? Newline SetextBottom2 { $$ = mk_list(H2, a); } Heading = SetextHeading | AtxHeading BlockQuote = a:BlockQuoteRaw { $$ = mk_element(BLOCKQUOTE); $$->children = a; } BlockQuoteRaw = a:StartList (( '>' ' '? Line { a = cons($$, a); } ) ( !'>' !BlankLine Line { a = cons($$, a); } )* ( BlankLine { a = cons(mk_str("\n"), a); } )* )+ { $$ = mk_str_from_list(a, true); $$->key = RAW; } NonblankIndentedLine = !BlankLine IndentedLine VerbatimChunk = a:StartList ( BlankLine { a = cons(mk_str("\n"), a); } )* ( NonblankIndentedLine { a = cons($$, a); } )+ { $$ = mk_str_from_list(a, false); } Verbatim = a:StartList ( VerbatimChunk { a = cons($$, a); } )+ { $$ = mk_str_from_list(a, false); $$->key = VERBATIM; } HorizontalRule = NonindentSpace ( '*' Sp '*' Sp '*' (Sp '*')* | '-' Sp '-' Sp '-' (Sp '-')* | '_' Sp '_' Sp '_' (Sp '_')*) Sp Newline BlankLine+ { $$ = mk_element(HRULE); } Bullet = !HorizontalRule NonindentSpace ('+' | '*' | '-') Spacechar+ BulletList = &Bullet (ListTight | ListLoose) { $$->key = BULLETLIST; } ListTight = a:StartList ( ListItemTight { a = cons($$, a); } )+ BlankLine* !(Bullet | Enumerator) { $$ = mk_list(LIST, a); } ListLoose = a:StartList ( b:ListItem BlankLine* { element *li; li = b->children; li->contents.str = realloc(li->contents.str, strlen(li->contents.str) + 3); strcat(li->contents.str, "\n\n"); /* In loose list, \n\n added to end of each element */ a = cons(b, a); } )+ { $$ = mk_list(LIST, a); } ListItem = ( Bullet | Enumerator ) a:StartList ListBlock { a = cons($$, a); } ( ListContinuationBlock { a = cons($$, a); } )* { element *raw; raw = mk_str_from_list(a, false); raw->key = RAW; $$ = mk_element(LISTITEM); $$->children = raw; } ListItemTight = ( Bullet | Enumerator ) a:StartList ListBlock { a = cons($$, a); } ( !BlankLine ListContinuationBlock { a = cons($$, a); } )* !ListContinuationBlock { element *raw; raw = mk_str_from_list(a, false); raw->key = RAW; $$ = mk_element(LISTITEM); $$->children = raw; } ListBlock = a:StartList !BlankLine Line { a = cons($$, a); } ( ListBlockLine { a = cons($$, a); } )* { $$ = mk_str_from_list(a, false); } ListContinuationBlock = a:StartList ( < BlankLine* > { if (strlen(yytext) == 0) a = cons(mk_str("\001"), a); /* block separator */ else a = cons(mk_str(yytext), a); } ) ( Indent ListBlock { a = cons($$, a); } )+ { $$ = mk_str_from_list(a, false); } Enumerator = NonindentSpace [0-9]+ '.' Spacechar+ OrderedList = &Enumerator (ListTight | ListLoose) { $$->key = ORDEREDLIST; } ListBlockLine = !BlankLine !( Indent? (Bullet | Enumerator) ) !HorizontalRule OptionallyIndentedLine # Parsers for different kinds of block-level HTML content. # This is repetitive due to constraints of PEG grammar. HtmlBlockOpenAddress = '<' Spnl ("address" | "ADDRESS") Spnl HtmlAttribute* '>' HtmlBlockCloseAddress = '<' Spnl '/' ("address" | "ADDRESS") Spnl '>' HtmlBlockAddress = HtmlBlockOpenAddress (HtmlBlockAddress | !HtmlBlockCloseAddress .)* HtmlBlockCloseAddress HtmlBlockOpenBlockquote = '<' Spnl ("blockquote" | "BLOCKQUOTE") Spnl HtmlAttribute* '>' HtmlBlockCloseBlockquote = '<' Spnl '/' ("blockquote" | "BLOCKQUOTE") Spnl '>' HtmlBlockBlockquote = HtmlBlockOpenBlockquote (HtmlBlockBlockquote | !HtmlBlockCloseBlockquote .)* HtmlBlockCloseBlockquote HtmlBlockOpenCenter = '<' Spnl ("center" | "CENTER") Spnl HtmlAttribute* '>' HtmlBlockCloseCenter = '<' Spnl '/' ("center" | "CENTER") Spnl '>' HtmlBlockCenter = HtmlBlockOpenCenter (HtmlBlockCenter | !HtmlBlockCloseCenter .)* HtmlBlockCloseCenter HtmlBlockOpenDir = '<' Spnl ("dir" | "DIR") Spnl HtmlAttribute* '>' HtmlBlockCloseDir = '<' Spnl '/' ("dir" | "DIR") Spnl '>' HtmlBlockDir = HtmlBlockOpenDir (HtmlBlockDir | !HtmlBlockCloseDir .)* HtmlBlockCloseDir HtmlBlockOpenDiv = '<' Spnl ("div" | "DIV") Spnl HtmlAttribute* '>' HtmlBlockCloseDiv = '<' Spnl '/' ("div" | "DIV") Spnl '>' HtmlBlockDiv = HtmlBlockOpenDiv (HtmlBlockDiv | !HtmlBlockCloseDiv .)* HtmlBlockCloseDiv HtmlBlockOpenDl = '<' Spnl ("dl" | "DL") Spnl HtmlAttribute* '>' HtmlBlockCloseDl = '<' Spnl '/' ("dl" | "DL") Spnl '>' HtmlBlockDl = HtmlBlockOpenDl (HtmlBlockDl | !HtmlBlockCloseDl .)* HtmlBlockCloseDl HtmlBlockOpenFieldset = '<' Spnl ("fieldset" | "FIELDSET") Spnl HtmlAttribute* '>' HtmlBlockCloseFieldset = '<' Spnl '/' ("fieldset" | "FIELDSET") Spnl '>' HtmlBlockFieldset = HtmlBlockOpenFieldset (HtmlBlockFieldset | !HtmlBlockCloseFieldset .)* HtmlBlockCloseFieldset HtmlBlockOpenForm = '<' Spnl ("form" | "FORM") Spnl HtmlAttribute* '>' HtmlBlockCloseForm = '<' Spnl '/' ("form" | "FORM") Spnl '>' HtmlBlockForm = HtmlBlockOpenForm (HtmlBlockForm | !HtmlBlockCloseForm .)* HtmlBlockCloseForm HtmlBlockOpenH1 = '<' Spnl ("h1" | "H1") Spnl HtmlAttribute* '>' HtmlBlockCloseH1 = '<' Spnl '/' ("h1" | "H1") Spnl '>' HtmlBlockH1 = HtmlBlockOpenH1 (HtmlBlockH1 | !HtmlBlockCloseH1 .)* HtmlBlockCloseH1 HtmlBlockOpenH2 = '<' Spnl ("h2" | "H2") Spnl HtmlAttribute* '>' HtmlBlockCloseH2 = '<' Spnl '/' ("h2" | "H2") Spnl '>' HtmlBlockH2 = HtmlBlockOpenH2 (HtmlBlockH2 | !HtmlBlockCloseH2 .)* HtmlBlockCloseH2 HtmlBlockOpenH3 = '<' Spnl ("h3" | "H3") Spnl HtmlAttribute* '>' HtmlBlockCloseH3 = '<' Spnl '/' ("h3" | "H3") Spnl '>' HtmlBlockH3 = HtmlBlockOpenH3 (HtmlBlockH3 | !HtmlBlockCloseH3 .)* HtmlBlockCloseH3 HtmlBlockOpenH4 = '<' Spnl ("h4" | "H4") Spnl HtmlAttribute* '>' HtmlBlockCloseH4 = '<' Spnl '/' ("h4" | "H4") Spnl '>' HtmlBlockH4 = HtmlBlockOpenH4 (HtmlBlockH4 | !HtmlBlockCloseH4 .)* HtmlBlockCloseH4 HtmlBlockOpenH5 = '<' Spnl ("h5" | "H5") Spnl HtmlAttribute* '>' HtmlBlockCloseH5 = '<' Spnl '/' ("h5" | "H5") Spnl '>' HtmlBlockH5 = HtmlBlockOpenH5 (HtmlBlockH5 | !HtmlBlockCloseH5 .)* HtmlBlockCloseH5 HtmlBlockOpenH6 = '<' Spnl ("h6" | "H6") Spnl HtmlAttribute* '>' HtmlBlockCloseH6 = '<' Spnl '/' ("h6" | "H6") Spnl '>' HtmlBlockH6 = HtmlBlockOpenH6 (HtmlBlockH6 | !HtmlBlockCloseH6 .)* HtmlBlockCloseH6 HtmlBlockOpenMenu = '<' Spnl ("menu" | "MENU") Spnl HtmlAttribute* '>' HtmlBlockCloseMenu = '<' Spnl '/' ("menu" | "MENU") Spnl '>' HtmlBlockMenu = HtmlBlockOpenMenu (HtmlBlockMenu | !HtmlBlockCloseMenu .)* HtmlBlockCloseMenu HtmlBlockOpenNoframes = '<' Spnl ("noframes" | "NOFRAMES") Spnl HtmlAttribute* '>' HtmlBlockCloseNoframes = '<' Spnl '/' ("noframes" | "NOFRAMES") Spnl '>' HtmlBlockNoframes = HtmlBlockOpenNoframes (HtmlBlockNoframes | !HtmlBlockCloseNoframes .)* HtmlBlockCloseNoframes HtmlBlockOpenNoscript = '<' Spnl ("noscript" | "NOSCRIPT") Spnl HtmlAttribute* '>' HtmlBlockCloseNoscript = '<' Spnl '/' ("noscript" | "NOSCRIPT") Spnl '>' HtmlBlockNoscript = HtmlBlockOpenNoscript (HtmlBlockNoscript | !HtmlBlockCloseNoscript .)* HtmlBlockCloseNoscript HtmlBlockOpenOl = '<' Spnl ("ol" | "OL") Spnl HtmlAttribute* '>' HtmlBlockCloseOl = '<' Spnl '/' ("ol" | "OL") Spnl '>' HtmlBlockOl = HtmlBlockOpenOl (HtmlBlockOl | !HtmlBlockCloseOl .)* HtmlBlockCloseOl HtmlBlockOpenP = '<' Spnl ("p" | "P") Spnl HtmlAttribute* '>' HtmlBlockCloseP = '<' Spnl '/' ("p" | "P") Spnl '>' HtmlBlockP = HtmlBlockOpenP (HtmlBlockP | !HtmlBlockCloseP .)* HtmlBlockCloseP HtmlBlockOpenPre = '<' Spnl ("pre" | "PRE") Spnl HtmlAttribute* '>' HtmlBlockClosePre = '<' Spnl '/' ("pre" | "PRE") Spnl '>' HtmlBlockPre = HtmlBlockOpenPre (HtmlBlockPre | !HtmlBlockClosePre .)* HtmlBlockClosePre HtmlBlockOpenTable = '<' Spnl ("table" | "TABLE") Spnl HtmlAttribute* '>' HtmlBlockCloseTable = '<' Spnl '/' ("table" | "TABLE") Spnl '>' HtmlBlockTable = HtmlBlockOpenTable (HtmlBlockTable | !HtmlBlockCloseTable .)* HtmlBlockCloseTable HtmlBlockOpenUl = '<' Spnl ("ul" | "UL") Spnl HtmlAttribute* '>' HtmlBlockCloseUl = '<' Spnl '/' ("ul" | "UL") Spnl '>' HtmlBlockUl = HtmlBlockOpenUl (HtmlBlockUl | !HtmlBlockCloseUl .)* HtmlBlockCloseUl HtmlBlockOpenDd = '<' Spnl ("dd" | "DD") Spnl HtmlAttribute* '>' HtmlBlockCloseDd = '<' Spnl '/' ("dd" | "DD") Spnl '>' HtmlBlockDd = HtmlBlockOpenDd (HtmlBlockDd | !HtmlBlockCloseDd .)* HtmlBlockCloseDd HtmlBlockOpenDt = '<' Spnl ("dt" | "DT") Spnl HtmlAttribute* '>' HtmlBlockCloseDt = '<' Spnl '/' ("dt" | "DT") Spnl '>' HtmlBlockDt = HtmlBlockOpenDt (HtmlBlockDt | !HtmlBlockCloseDt .)* HtmlBlockCloseDt HtmlBlockOpenFrameset = '<' Spnl ("frameset" | "FRAMESET") Spnl HtmlAttribute* '>' HtmlBlockCloseFrameset = '<' Spnl '/' ("frameset" | "FRAMESET") Spnl '>' HtmlBlockFrameset = HtmlBlockOpenFrameset (HtmlBlockFrameset | !HtmlBlockCloseFrameset .)* HtmlBlockCloseFrameset HtmlBlockOpenLi = '<' Spnl ("li" | "LI") Spnl HtmlAttribute* '>' HtmlBlockCloseLi = '<' Spnl '/' ("li" | "LI") Spnl '>' HtmlBlockLi = HtmlBlockOpenLi (HtmlBlockLi | !HtmlBlockCloseLi .)* HtmlBlockCloseLi HtmlBlockOpenTbody = '<' Spnl ("tbody" | "TBODY") Spnl HtmlAttribute* '>' HtmlBlockCloseTbody = '<' Spnl '/' ("tbody" | "TBODY") Spnl '>' HtmlBlockTbody = HtmlBlockOpenTbody (HtmlBlockTbody | !HtmlBlockCloseTbody .)* HtmlBlockCloseTbody HtmlBlockOpenTd = '<' Spnl ("td" | "TD") Spnl HtmlAttribute* '>' HtmlBlockCloseTd = '<' Spnl '/' ("td" | "TD") Spnl '>' HtmlBlockTd = HtmlBlockOpenTd (HtmlBlockTd | !HtmlBlockCloseTd .)* HtmlBlockCloseTd HtmlBlockOpenTfoot = '<' Spnl ("tfoot" | "TFOOT") Spnl HtmlAttribute* '>' HtmlBlockCloseTfoot = '<' Spnl '/' ("tfoot" | "TFOOT") Spnl '>' HtmlBlockTfoot = HtmlBlockOpenTfoot (HtmlBlockTfoot | !HtmlBlockCloseTfoot .)* HtmlBlockCloseTfoot HtmlBlockOpenTh = '<' Spnl ("th" | "TH") Spnl HtmlAttribute* '>' HtmlBlockCloseTh = '<' Spnl '/' ("th" | "TH") Spnl '>' HtmlBlockTh = HtmlBlockOpenTh (HtmlBlockTh | !HtmlBlockCloseTh .)* HtmlBlockCloseTh HtmlBlockOpenThead = '<' Spnl ("thead" | "THEAD") Spnl HtmlAttribute* '>' HtmlBlockCloseThead = '<' Spnl '/' ("thead" | "THEAD") Spnl '>' HtmlBlockThead = HtmlBlockOpenThead (HtmlBlockThead | !HtmlBlockCloseThead .)* HtmlBlockCloseThead HtmlBlockOpenTr = '<' Spnl ("tr" | "TR") Spnl HtmlAttribute* '>' HtmlBlockCloseTr = '<' Spnl '/' ("tr" | "TR") Spnl '>' HtmlBlockTr = HtmlBlockOpenTr (HtmlBlockTr | !HtmlBlockCloseTr .)* HtmlBlockCloseTr HtmlBlockOpenScript = '<' Spnl ("script" | "SCRIPT") Spnl HtmlAttribute* '>' HtmlBlockCloseScript = '<' Spnl '/' ("script" | "SCRIPT") Spnl '>' HtmlBlockScript = HtmlBlockOpenScript (!HtmlBlockCloseScript .)* HtmlBlockCloseScript HtmlBlockInTags = HtmlBlockAddress | HtmlBlockBlockquote | HtmlBlockCenter | HtmlBlockDir | HtmlBlockDiv | HtmlBlockDl | HtmlBlockFieldset | HtmlBlockForm | HtmlBlockH1 | HtmlBlockH2 | HtmlBlockH3 | HtmlBlockH4 | HtmlBlockH5 | HtmlBlockH6 | HtmlBlockMenu | HtmlBlockNoframes | HtmlBlockNoscript | HtmlBlockOl | HtmlBlockP | HtmlBlockPre | HtmlBlockTable | HtmlBlockUl | HtmlBlockDd | HtmlBlockDt | HtmlBlockFrameset | HtmlBlockLi | HtmlBlockTbody | HtmlBlockTd | HtmlBlockTfoot | HtmlBlockTh | HtmlBlockThead | HtmlBlockTr | HtmlBlockScript HtmlBlock = < ( HtmlBlockInTags | HtmlComment | HtmlBlockSelfClosing ) > BlankLine+ { if (extension(EXT_FILTER_HTML)) { $$ = mk_list(LIST, NULL); } else { $$ = mk_str(yytext); $$->key = HTMLBLOCK; } } HtmlBlockSelfClosing = '<' Spnl HtmlBlockType Spnl HtmlAttribute* '/' Spnl '>' HtmlBlockType = "address" | "blockquote" | "center" | "dir" | "div" | "dl" | "fieldset" | "form" | "h1" | "h2" | "h3" | "h4" | "h5" | "h6" | "hr" | "isindex" | "menu" | "noframes" | "noscript" | "ol" | "p" | "pre" | "table" | "ul" | "dd" | "dt" | "frameset" | "li" | "tbody" | "td" | "tfoot" | "th" | "thead" | "tr" | "script" | "ADDRESS" | "BLOCKQUOTE" | "CENTER" | "DIR" | "DIV" | "DL" | "FIELDSET" | "FORM" | "H1" | "H2" | "H3" | "H4" | "H5" | "H6" | "HR" | "ISINDEX" | "MENU" | "NOFRAMES" | "NOSCRIPT" | "OL" | "P" | "PRE" | "TABLE" | "UL" | "DD" | "DT" | "FRAMESET" | "LI" | "TBODY" | "TD" | "TFOOT" | "TH" | "THEAD" | "TR" | "SCRIPT" StyleOpen = '<' Spnl ("style" | "STYLE") Spnl HtmlAttribute* '>' StyleClose = '<' Spnl '/' ("style" | "STYLE") Spnl '>' InStyleTags = StyleOpen (!StyleClose .)* StyleClose StyleBlock = < InStyleTags > BlankLine* { if (extension(EXT_FILTER_STYLES)) { $$ = mk_list(LIST, NULL); } else { $$ = mk_str(yytext); $$->key = HTMLBLOCK; } } Inlines = a:StartList ( !Endline Inline { a = cons($$, a); } | c:Endline &Inline { a = cons(c, a); } )+ Endline? { $$ = mk_list(LIST, a); } Inline = Str | Endline | UlOrStarLine | Space | Strong | Emph | Image | Link | NoteReference | InlineNote | Code | RawHtml | Entity | EscapedChar | Smart | Symbol Space = Spacechar+ { $$ = mk_str(" "); $$->key = SPACE; } Str = a:StartList < NormalChar+ > { a = cons(mk_str(yytext), a); } ( StrChunk { a = cons($$, a); } )* { if (a->next == NULL) { $$ = a; } else { $$ = mk_list(LIST, a); } } StrChunk = < (NormalChar | '_'+ &Alphanumeric)+ > { $$ = mk_str(yytext); } | AposChunk AposChunk = &{ extension(EXT_SMART) } '\'' &Alphanumeric { $$ = mk_element(APOSTROPHE); } EscapedChar = '\\' !Newline < [-\\`|*_{}[\]()#+.!><] > { $$ = mk_str(yytext); } Entity = ( HexEntity | DecEntity | CharEntity ) { $$ = mk_str(yytext); $$->key = HTML; } Endline = LineBreak | TerminalEndline | NormalEndline NormalEndline = Sp Newline !BlankLine !'>' !AtxStart !(Line ('='+ | '-'+) Newline) { $$ = mk_str("\n"); $$->key = SPACE; } TerminalEndline = Sp Newline Eof { $$ = NULL; } LineBreak = " " NormalEndline { $$ = mk_element(LINEBREAK); } Symbol = < SpecialChar > { $$ = mk_str(yytext); } # This keeps the parser from getting bogged down on long strings of '*' or '_', # or strings of '*' or '_' with space on each side: UlOrStarLine = (UlLine | StarLine) { $$ = mk_str(yytext); } StarLine = < "****" '*'* > | < Spacechar '*'+ &Spacechar > UlLine = < "____" '_'* > | < Spacechar '_'+ &Spacechar > Emph = EmphStar | EmphUl Whitespace = Spacechar | Newline EmphStar = '*' !Whitespace a:StartList ( !'*' b:Inline { a = cons(b, a); } | b:StrongStar { a = cons(b, a); } )+ '*' { $$ = mk_list(EMPH, a); } EmphUl = '_' !Whitespace a:StartList ( !'_' b:Inline { a = cons(b, a); } | b:StrongUl { a = cons(b, a); } )+ '_' { $$ = mk_list(EMPH, a); } Strong = StrongStar | StrongUl StrongStar = "**" !Whitespace a:StartList ( !"**" b:Inline { a = cons(b, a); })+ "**" { $$ = mk_list(STRONG, a); } StrongUl = "__" !Whitespace a:StartList ( !"__" b:Inline { a = cons(b, a); })+ "__" { $$ = mk_list(STRONG, a); } Image = '!' ( ExplicitLink | ReferenceLink ) { if ($$->key == LINK) { $$->key = IMAGE; } else { element *result; result = $$; $$->children = cons(mk_str("!"), result->children); } } Link = ExplicitLink | ReferenceLink | AutoLink ReferenceLink = ReferenceLinkDouble | ReferenceLinkSingle ReferenceLinkDouble = a:Label < Spnl > !"[]" b:Label { link match; if (find_reference(&match, b->children)) { $$ = mk_link(a->children, match.url, match.title); free(a); free_element_list(b); } else { element *result; result = mk_element(LIST); result->children = cons(mk_str("["), cons(a, cons(mk_str("]"), cons(mk_str(yytext), cons(mk_str("["), cons(b, mk_str("]"))))))); $$ = result; } } ReferenceLinkSingle = a:Label < (Spnl "[]")? > { link match; if (find_reference(&match, a->children)) { $$ = mk_link(a->children, match.url, match.title); free(a); } else { element *result; result = mk_element(LIST); result->children = cons(mk_str("["), cons(a, cons(mk_str("]"), mk_str(yytext)))); $$ = result; } } ExplicitLink = l:Label '(' Sp s:Source Spnl t:Title Sp ')' { $$ = mk_link(l->children, s->contents.str, t->contents.str); free_element(s); free_element(t); free(l); } Source = ( '<' < SourceContents > '>' | < SourceContents > ) { $$ = mk_str(yytext); } SourceContents = ( ( !'(' !')' !'>' Nonspacechar )+ | '(' SourceContents ')')* Title = ( TitleSingle | TitleDouble | < "" > ) { $$ = mk_str(yytext); } TitleSingle = '\'' < ( !( '\'' Sp ( ')' | Newline ) ) . )* > '\'' TitleDouble = '"' < ( !( '"' Sp ( ')' | Newline ) ) . )* > '"' AutoLink = AutoLinkUrl | AutoLinkEmail AutoLinkUrl = '<' < [A-Za-z]+ "://" ( !Newline !'>' . )+ > '>' { $$ = mk_link(mk_str(yytext), yytext, ""); } AutoLinkEmail = '<' ( "mailto:" )? < [-A-Za-z0-9+_./!%~$]+ '@' ( !Newline !'>' . )+ > '>' { char *mailto = malloc(strlen(yytext) + 8); sprintf(mailto, "mailto:%s", yytext); $$ = mk_link(mk_str(yytext), mailto, ""); free(mailto); } Reference = NonindentSpace !"[]" l:Label ':' Spnl s:RefSrc t:RefTitle BlankLine+ { $$ = mk_link(l->children, s->contents.str, t->contents.str); free_element(s); free_element(t); free(l); $$->key = REFERENCE; } Label = '[' ( !'^' &{ extension(EXT_NOTES) } | &. &{ !extension(EXT_NOTES) } ) a:StartList ( !']' Inline { a = cons($$, a); } )* ']' { $$ = mk_list(LIST, a); } RefSrc = < Nonspacechar+ > { $$ = mk_str(yytext); $$->key = HTML; } RefTitle = ( RefTitleSingle | RefTitleDouble | RefTitleParens | EmptyTitle ) { $$ = mk_str(yytext); } EmptyTitle = < "" > RefTitleSingle = Spnl '\'' < ( !( '\'' Sp Newline | Newline ) . )* > '\'' RefTitleDouble = Spnl '"' < ( !('"' Sp Newline | Newline) . )* > '"' RefTitleParens = Spnl '(' < ( !(')' Sp Newline | Newline) . )* > ')' References = a:StartList ( b:Reference { a = cons(b, a); } | SkipBlock )* { references = reverse(a); } Ticks1 = "`" !'`' Ticks2 = "``" !'`' Ticks3 = "```" !'`' Ticks4 = "````" !'`' Ticks5 = "`````" !'`' Code = ( Ticks1 Sp < ( ( !'`' Nonspacechar )+ | !Ticks1 '`'+ | !( Sp Ticks1 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks1 | Ticks2 Sp < ( ( !'`' Nonspacechar )+ | !Ticks2 '`'+ | !( Sp Ticks2 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks2 | Ticks3 Sp < ( ( !'`' Nonspacechar )+ | !Ticks3 '`'+ | !( Sp Ticks3 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks3 | Ticks4 Sp < ( ( !'`' Nonspacechar )+ | !Ticks4 '`'+ | !( Sp Ticks4 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks4 | Ticks5 Sp < ( ( !'`' Nonspacechar )+ | !Ticks5 '`'+ | !( Sp Ticks5 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks5 ) { $$ = mk_str(yytext); $$->key = CODE; } RawHtml = < (HtmlComment | HtmlBlockScript | HtmlTag) > { if (extension(EXT_FILTER_HTML)) { $$ = mk_list(LIST, NULL); } else { $$ = mk_str(yytext); $$->key = HTML; } } BlankLine = Sp Newline Quoted = '"' (!'"' .)* '"' | '\'' (!'\'' .)* '\'' HtmlAttribute = (AlphanumericAscii | '-')+ Spnl ('=' Spnl (Quoted | (!'>' Nonspacechar)+))? Spnl HtmlComment = "<!--" (!"-->" .)* "-->" HtmlTag = '<' Spnl '/'? AlphanumericAscii+ Spnl HtmlAttribute* '/'? Spnl '>' Eof = !. Spacechar = ' ' | '\t' Nonspacechar = !Spacechar !Newline . Newline = '\n' | '\r' '\n'? Sp = Spacechar* Spnl = Sp (Newline Sp)? SpecialChar = '*' | '_' | '`' | '&' | '[' | ']' | '(' | ')' | '<' | '!' | '#' | '\\' | '\'' | '"' | ExtendedSpecialChar NormalChar = !( SpecialChar | Spacechar | Newline ) . Alphanumeric = [0-9A-Za-z] | '\200' | '\201' | '\202' | '\203' | '\204' | '\205' | '\206' | '\207' | '\210' | '\211' | '\212' | '\213' | '\214' | '\215' | '\216' | '\217' | '\220' | '\221' | '\222' | '\223' | '\224' | '\225' | '\226' | '\227' | '\230' | '\231' | '\232' | '\233' | '\234' | '\235' | '\236' | '\237' | '\240' | '\241' | '\242' | '\243' | '\244' | '\245' | '\246' | '\247' | '\250' | '\251' | '\252' | '\253' | '\254' | '\255' | '\256' | '\257' | '\260' | '\261' | '\262' | '\263' | '\264' | '\265' | '\266' | '\267' | '\270' | '\271' | '\272' | '\273' | '\274' | '\275' | '\276' | '\277' | '\300' | '\301' | '\302' | '\303' | '\304' | '\305' | '\306' | '\307' | '\310' | '\311' | '\312' | '\313' | '\314' | '\315' | '\316' | '\317' | '\320' | '\321' | '\322' | '\323' | '\324' | '\325' | '\326' | '\327' | '\330' | '\331' | '\332' | '\333' | '\334' | '\335' | '\336' | '\337' | '\340' | '\341' | '\342' | '\343' | '\344' | '\345' | '\346' | '\347' | '\350' | '\351' | '\352' | '\353' | '\354' | '\355' | '\356' | '\357' | '\360' | '\361' | '\362' | '\363' | '\364' | '\365' | '\366' | '\367' | '\370' | '\371' | '\372' | '\373' | '\374' | '\375' | '\376' | '\377' AlphanumericAscii = [A-Za-z0-9] Digit = [0-9] BOM = "\357\273\277" HexEntity = < '&' '#' [Xx] [0-9a-fA-F]+ ';' > DecEntity = < '&' '#' [0-9]+ > ';' > CharEntity = < '&' [A-Za-z0-9]+ ';' > NonindentSpace = " " | " " | " " | "" Indent = "\t" | " " IndentedLine = Indent Line OptionallyIndentedLine = Indent? Line # StartList starts a list data structure that can be added to with cons: StartList = &. { $$ = NULL; } Line = RawLine { $$ = mk_str(yytext); } RawLine = ( < (!'\r' !'\n' .)* Newline > | < .+ > Eof ) SkipBlock = HtmlBlock | ( !'#' !SetextBottom1 !SetextBottom2 !BlankLine RawLine )+ BlankLine* | BlankLine+ | RawLine # Syntax extensions ExtendedSpecialChar = &{ extension(EXT_SMART) } ('.' | '-' | '\'' | '"') | &{ extension(EXT_NOTES) } ( '^' ) Smart = &{ extension(EXT_SMART) } ( Ellipsis | Dash | SingleQuoted | DoubleQuoted | Apostrophe ) Apostrophe = '\'' { $$ = mk_element(APOSTROPHE); } Ellipsis = ("..." | ". . .") { $$ = mk_element(ELLIPSIS); } Dash = EmDash | EnDash EnDash = '-' &Digit { $$ = mk_element(ENDASH); } EmDash = ("---" | "--") { $$ = mk_element(EMDASH); } SingleQuoteStart = '\'' !(Spacechar | Newline) SingleQuoteEnd = '\'' !Alphanumeric SingleQuoted = SingleQuoteStart a:StartList ( !SingleQuoteEnd b:Inline { a = cons(b, a); } )+ SingleQuoteEnd { $$ = mk_list(SINGLEQUOTED, a); } DoubleQuoteStart = '"' DoubleQuoteEnd = '"' DoubleQuoted = DoubleQuoteStart a:StartList ( !DoubleQuoteEnd b:Inline { a = cons(b, a); } )+ DoubleQuoteEnd { $$ = mk_list(DOUBLEQUOTED, a); } NoteReference = &{ extension(EXT_NOTES) } ref:RawNoteReference { element *match; if (find_note(&match, ref->contents.str)) { $$ = mk_element(NOTE); assert(match->children != NULL); $$->children = match->children; $$->contents.str = 0; } else { char *s; s = malloc(strlen(ref->contents.str) + 4); sprintf(s, "[^%s]", ref->contents.str); $$ = mk_str(s); free(s); } } RawNoteReference = "[^" < ( !Newline !']' . )+ > ']' { $$ = mk_str(yytext); } Note = &{ extension(EXT_NOTES) } NonindentSpace ref:RawNoteReference ':' Sp a:StartList ( RawNoteBlock { a = cons($$, a); } ) ( &Indent RawNoteBlock { a = cons($$, a); } )* { $$ = mk_list(NOTE, a); $$->contents.str = strdup(ref->contents.str); } InlineNote = &{ extension(EXT_NOTES) } "^[" a:StartList ( !']' Inline { a = cons($$, a); } )+ ']' { $$ = mk_list(NOTE, a); $$->contents.str = 0; } Notes = a:StartList ( b:Note { a = cons(b, a); } | SkipBlock )* { notes = reverse(a); } RawNoteBlock = a:StartList ( !BlankLine OptionallyIndentedLine { a = cons($$, a); } )+ ( < BlankLine* > { a = cons(mk_str(yytext), a); } ) { $$ = mk_str_from_list(a, true); $$->key = RAW; } %%