%{
/**********************************************************************

  markdown_parser.leg - markdown parser in C using a PEG grammar.
  (c) 2008 John MacFarlane (jgm at berkeley dot edu).

  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License or the MIT
  license.  See LICENSE for details.

  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

 ***********************************************************************/

#include <stdbool.h>
#include <assert.h>
#include "markdown_peg.h"
#include "utility_functions.h"



/**********************************************************************

  Definitions for leg parser generator.
  YY_INPUT is the function the parser calls to get new input.
  We take all new input from (static) charbuf.

 ***********************************************************************/



# define YYSTYPE element *
#ifdef __DEBUG__
# define YY_DEBUG 1
#endif

#define YY_INPUT(buf, result, max_size)              \
{                                                    \
    int yyc;                                         \
    if (charbuf && *charbuf != '\0') {               \
        yyc= *charbuf++;                             \
    } else {                                         \
        yyc= EOF;                                    \
    }                                                \
    result= (EOF == yyc) ? 0 : (*(buf)= yyc, 1);     \
}

#define YY_RULE(T)	T


/**********************************************************************

  PEG grammar and parser actions for markdown syntax.

 ***********************************************************************/

%}

Doc =       BOM? a:StartList ( Block { a = cons($$, a); } )*
            { parse_result = reverse(a); }

Block =     BlankLine*
            ( BlockQuote
            | Verbatim
            | Note
            | Reference
            | HorizontalRule
            | Heading
            | OrderedList
            | BulletList
            | HtmlBlock
            | StyleBlock
            | Para
            | Plain )

Para =      NonindentSpace a:Inlines BlankLine+
            { $$ = a; $$->key = PARA; }

Plain =     a:Inlines
            { $$ = a; $$->key = PLAIN; }

AtxInline = !Newline !(Sp? '#'* Sp Newline) Inline

AtxStart =  < ( "######" | "#####" | "####" | "###" | "##" | "#" ) >
            { $$ = mk_element(H1 + (strlen(yytext) - 1)); }

AtxHeading = s:AtxStart Sp? a:StartList ( AtxInline { a = cons($$, a); } )+ (Sp? '#'* Sp)?  Newline
            { $$ = mk_list(s->key, a);
              free(s); }

SetextHeading = SetextHeading1 | SetextHeading2

SetextBottom1 = '='+ Newline

SetextBottom2 = '-'+ Newline

SetextHeading1 =  &(RawLine SetextBottom1)
                  a:StartList ( !Endline Inline { a = cons($$, a); } )+ Sp? Newline
                  SetextBottom1 { $$ = mk_list(H1, a); }

SetextHeading2 =  &(RawLine SetextBottom2)
                  a:StartList ( !Endline Inline { a = cons($$, a); } )+ Sp? Newline
                  SetextBottom2 { $$ = mk_list(H2, a); }

Heading = SetextHeading | AtxHeading

BlockQuote = a:BlockQuoteRaw
             {  $$ = mk_element(BLOCKQUOTE);
                $$->children = a;
             }

BlockQuoteRaw =  a:StartList
                 (( '>' ' '? Line { a = cons($$, a); } )
                  ( !'>' !BlankLine Line { a = cons($$, a); } )*
                  ( BlankLine { a = cons(mk_str("\n"), a); } )*
                 )+
                 {   $$ = mk_str_from_list(a, true);
                     $$->key = RAW;
                 }

NonblankIndentedLine = !BlankLine IndentedLine

VerbatimChunk = a:StartList
                ( BlankLine { a = cons(mk_str("\n"), a); } )*
                ( NonblankIndentedLine { a = cons($$, a); } )+
                { $$ = mk_str_from_list(a, false); }

Verbatim =     a:StartList ( VerbatimChunk { a = cons($$, a); } )+
               { $$ = mk_str_from_list(a, false);
                 $$->key = VERBATIM; }

HorizontalRule = NonindentSpace
                 ( '*' Sp '*' Sp '*' (Sp '*')*
                 | '-' Sp '-' Sp '-' (Sp '-')*
                 | '_' Sp '_' Sp '_' (Sp '_')*)
                 Sp Newline BlankLine+
                 { $$ = mk_element(HRULE); }

Bullet = !HorizontalRule NonindentSpace ('+' | '*' | '-') Spacechar+

BulletList = &Bullet (ListTight | ListLoose)
             { $$->key = BULLETLIST; }

ListTight = a:StartList
            ( ListItemTight { a = cons($$, a); } )+
            BlankLine* !(Bullet | Enumerator)
            { $$ = mk_list(LIST, a); }

ListLoose = a:StartList
            ( b:ListItem BlankLine*
              {   element *li;
                  li = b->children;
                  li->contents.str = realloc(li->contents.str, strlen(li->contents.str) + 3);
                  strcat(li->contents.str, "\n\n");  /* In loose list, \n\n added to end of each element */
                  a = cons(b, a);
              } )+
            { $$ = mk_list(LIST, a); }

ListItem =  ( Bullet | Enumerator )
            a:StartList
            ListBlock { a = cons($$, a); }
            ( ListContinuationBlock { a = cons($$, a); } )*
            {  element *raw;
               raw = mk_str_from_list(a, false);
               raw->key = RAW;
               $$ = mk_element(LISTITEM);
               $$->children = raw;
            }

ListItemTight =
            ( Bullet | Enumerator )
            a:StartList
            ListBlock { a = cons($$, a); }
            ( !BlankLine
              ListContinuationBlock { a = cons($$, a); } )*
            !ListContinuationBlock
            {  element *raw;
               raw = mk_str_from_list(a, false);
               raw->key = RAW;
               $$ = mk_element(LISTITEM);
               $$->children = raw;
            }

ListBlock = a:StartList
            !BlankLine Line { a = cons($$, a); }
            ( ListBlockLine { a = cons($$, a); } )*
            { $$ = mk_str_from_list(a, false); }

ListContinuationBlock = a:StartList
                        ( < BlankLine* >
                          {   if (strlen(yytext) == 0)
                                   a = cons(mk_str("\001"), a); /* block separator */
                              else
                                   a = cons(mk_str(yytext), a); } )
                        ( Indent ListBlock { a = cons($$, a); } )+
                        {  $$ = mk_str_from_list(a, false); }

Enumerator = NonindentSpace [0-9]+ '.' Spacechar+

OrderedList = &Enumerator (ListTight | ListLoose)
              { $$->key = ORDEREDLIST; }

ListBlockLine = !BlankLine
                !( Indent? (Bullet | Enumerator) )
                !HorizontalRule
                OptionallyIndentedLine

# Parsers for different kinds of block-level HTML content.
# This is repetitive due to constraints of PEG grammar.

HtmlBlockOpenAddress = '<' Spnl ("address" | "ADDRESS") Spnl HtmlAttribute* '>'
HtmlBlockCloseAddress = '<' Spnl '/' ("address" | "ADDRESS") Spnl '>'
HtmlBlockAddress = HtmlBlockOpenAddress (HtmlBlockAddress | !HtmlBlockCloseAddress .)* HtmlBlockCloseAddress

HtmlBlockOpenBlockquote = '<' Spnl ("blockquote" | "BLOCKQUOTE") Spnl HtmlAttribute* '>'
HtmlBlockCloseBlockquote = '<' Spnl '/' ("blockquote" | "BLOCKQUOTE") Spnl '>'
HtmlBlockBlockquote = HtmlBlockOpenBlockquote (HtmlBlockBlockquote | !HtmlBlockCloseBlockquote .)* HtmlBlockCloseBlockquote

HtmlBlockOpenCenter = '<' Spnl ("center" | "CENTER") Spnl HtmlAttribute* '>'
HtmlBlockCloseCenter = '<' Spnl '/' ("center" | "CENTER") Spnl '>'
HtmlBlockCenter = HtmlBlockOpenCenter (HtmlBlockCenter | !HtmlBlockCloseCenter .)* HtmlBlockCloseCenter

HtmlBlockOpenDir = '<' Spnl ("dir" | "DIR") Spnl HtmlAttribute* '>'
HtmlBlockCloseDir = '<' Spnl '/' ("dir" | "DIR") Spnl '>'
HtmlBlockDir = HtmlBlockOpenDir (HtmlBlockDir | !HtmlBlockCloseDir .)* HtmlBlockCloseDir

HtmlBlockOpenDiv = '<' Spnl ("div" | "DIV") Spnl HtmlAttribute* '>'
HtmlBlockCloseDiv = '<' Spnl '/' ("div" | "DIV") Spnl '>'
HtmlBlockDiv = HtmlBlockOpenDiv (HtmlBlockDiv | !HtmlBlockCloseDiv .)* HtmlBlockCloseDiv

HtmlBlockOpenDl = '<' Spnl ("dl" | "DL") Spnl HtmlAttribute* '>'
HtmlBlockCloseDl = '<' Spnl '/' ("dl" | "DL") Spnl '>'
HtmlBlockDl = HtmlBlockOpenDl (HtmlBlockDl | !HtmlBlockCloseDl .)* HtmlBlockCloseDl

HtmlBlockOpenFieldset = '<' Spnl ("fieldset" | "FIELDSET") Spnl HtmlAttribute* '>'
HtmlBlockCloseFieldset = '<' Spnl '/' ("fieldset" | "FIELDSET") Spnl '>'
HtmlBlockFieldset = HtmlBlockOpenFieldset (HtmlBlockFieldset | !HtmlBlockCloseFieldset .)* HtmlBlockCloseFieldset

HtmlBlockOpenForm = '<' Spnl ("form" | "FORM") Spnl HtmlAttribute* '>'
HtmlBlockCloseForm = '<' Spnl '/' ("form" | "FORM") Spnl '>'
HtmlBlockForm = HtmlBlockOpenForm (HtmlBlockForm | !HtmlBlockCloseForm .)* HtmlBlockCloseForm

HtmlBlockOpenH1 = '<' Spnl ("h1" | "H1") Spnl HtmlAttribute* '>'
HtmlBlockCloseH1 = '<' Spnl '/' ("h1" | "H1") Spnl '>'
HtmlBlockH1 = HtmlBlockOpenH1 (HtmlBlockH1 | !HtmlBlockCloseH1 .)* HtmlBlockCloseH1

HtmlBlockOpenH2 = '<' Spnl ("h2" | "H2") Spnl HtmlAttribute* '>'
HtmlBlockCloseH2 = '<' Spnl '/' ("h2" | "H2") Spnl '>'
HtmlBlockH2 = HtmlBlockOpenH2 (HtmlBlockH2 | !HtmlBlockCloseH2 .)* HtmlBlockCloseH2

HtmlBlockOpenH3 = '<' Spnl ("h3" | "H3") Spnl HtmlAttribute* '>'
HtmlBlockCloseH3 = '<' Spnl '/' ("h3" | "H3") Spnl '>'
HtmlBlockH3 = HtmlBlockOpenH3 (HtmlBlockH3 | !HtmlBlockCloseH3 .)* HtmlBlockCloseH3

HtmlBlockOpenH4 = '<' Spnl ("h4" | "H4") Spnl HtmlAttribute* '>'
HtmlBlockCloseH4 = '<' Spnl '/' ("h4" | "H4") Spnl '>'
HtmlBlockH4 = HtmlBlockOpenH4 (HtmlBlockH4 | !HtmlBlockCloseH4 .)* HtmlBlockCloseH4

HtmlBlockOpenH5 = '<' Spnl ("h5" | "H5") Spnl HtmlAttribute* '>'
HtmlBlockCloseH5 = '<' Spnl '/' ("h5" | "H5") Spnl '>'
HtmlBlockH5 = HtmlBlockOpenH5 (HtmlBlockH5 | !HtmlBlockCloseH5 .)* HtmlBlockCloseH5

HtmlBlockOpenH6 = '<' Spnl ("h6" | "H6") Spnl HtmlAttribute* '>'
HtmlBlockCloseH6 = '<' Spnl '/' ("h6" | "H6") Spnl '>'
HtmlBlockH6 = HtmlBlockOpenH6 (HtmlBlockH6 | !HtmlBlockCloseH6 .)* HtmlBlockCloseH6

HtmlBlockOpenMenu = '<' Spnl ("menu" | "MENU") Spnl HtmlAttribute* '>'
HtmlBlockCloseMenu = '<' Spnl '/' ("menu" | "MENU") Spnl '>'
HtmlBlockMenu = HtmlBlockOpenMenu (HtmlBlockMenu | !HtmlBlockCloseMenu .)* HtmlBlockCloseMenu

HtmlBlockOpenNoframes = '<' Spnl ("noframes" | "NOFRAMES") Spnl HtmlAttribute* '>'
HtmlBlockCloseNoframes = '<' Spnl '/' ("noframes" | "NOFRAMES") Spnl '>'
HtmlBlockNoframes = HtmlBlockOpenNoframes (HtmlBlockNoframes | !HtmlBlockCloseNoframes .)* HtmlBlockCloseNoframes

HtmlBlockOpenNoscript = '<' Spnl ("noscript" | "NOSCRIPT") Spnl HtmlAttribute* '>'
HtmlBlockCloseNoscript = '<' Spnl '/' ("noscript" | "NOSCRIPT") Spnl '>'
HtmlBlockNoscript = HtmlBlockOpenNoscript (HtmlBlockNoscript | !HtmlBlockCloseNoscript .)* HtmlBlockCloseNoscript

HtmlBlockOpenOl = '<' Spnl ("ol" | "OL") Spnl HtmlAttribute* '>'
HtmlBlockCloseOl = '<' Spnl '/' ("ol" | "OL") Spnl '>'
HtmlBlockOl = HtmlBlockOpenOl (HtmlBlockOl | !HtmlBlockCloseOl .)* HtmlBlockCloseOl

HtmlBlockOpenP = '<' Spnl ("p" | "P") Spnl HtmlAttribute* '>'
HtmlBlockCloseP = '<' Spnl '/' ("p" | "P") Spnl '>'
HtmlBlockP = HtmlBlockOpenP (HtmlBlockP | !HtmlBlockCloseP .)* HtmlBlockCloseP

HtmlBlockOpenPre = '<' Spnl ("pre" | "PRE") Spnl HtmlAttribute* '>'
HtmlBlockClosePre = '<' Spnl '/' ("pre" | "PRE") Spnl '>'
HtmlBlockPre = HtmlBlockOpenPre (HtmlBlockPre | !HtmlBlockClosePre .)* HtmlBlockClosePre

HtmlBlockOpenTable = '<' Spnl ("table" | "TABLE") Spnl HtmlAttribute* '>'
HtmlBlockCloseTable = '<' Spnl '/' ("table" | "TABLE") Spnl '>'
HtmlBlockTable = HtmlBlockOpenTable (HtmlBlockTable | !HtmlBlockCloseTable .)* HtmlBlockCloseTable

HtmlBlockOpenUl = '<' Spnl ("ul" | "UL") Spnl HtmlAttribute* '>'
HtmlBlockCloseUl = '<' Spnl '/' ("ul" | "UL") Spnl '>'
HtmlBlockUl = HtmlBlockOpenUl (HtmlBlockUl | !HtmlBlockCloseUl .)* HtmlBlockCloseUl

HtmlBlockOpenDd = '<' Spnl ("dd" | "DD") Spnl HtmlAttribute* '>'
HtmlBlockCloseDd = '<' Spnl '/' ("dd" | "DD") Spnl '>'
HtmlBlockDd = HtmlBlockOpenDd (HtmlBlockDd | !HtmlBlockCloseDd .)* HtmlBlockCloseDd

HtmlBlockOpenDt = '<' Spnl ("dt" | "DT") Spnl HtmlAttribute* '>'
HtmlBlockCloseDt = '<' Spnl '/' ("dt" | "DT") Spnl '>'
HtmlBlockDt = HtmlBlockOpenDt (HtmlBlockDt | !HtmlBlockCloseDt .)* HtmlBlockCloseDt

HtmlBlockOpenFrameset = '<' Spnl ("frameset" | "FRAMESET") Spnl HtmlAttribute* '>'
HtmlBlockCloseFrameset = '<' Spnl '/' ("frameset" | "FRAMESET") Spnl '>'
HtmlBlockFrameset = HtmlBlockOpenFrameset (HtmlBlockFrameset | !HtmlBlockCloseFrameset .)* HtmlBlockCloseFrameset

HtmlBlockOpenLi = '<' Spnl ("li" | "LI") Spnl HtmlAttribute* '>'
HtmlBlockCloseLi = '<' Spnl '/' ("li" | "LI") Spnl '>'
HtmlBlockLi = HtmlBlockOpenLi (HtmlBlockLi | !HtmlBlockCloseLi .)* HtmlBlockCloseLi

HtmlBlockOpenTbody = '<' Spnl ("tbody" | "TBODY") Spnl HtmlAttribute* '>'
HtmlBlockCloseTbody = '<' Spnl '/' ("tbody" | "TBODY") Spnl '>'
HtmlBlockTbody = HtmlBlockOpenTbody (HtmlBlockTbody | !HtmlBlockCloseTbody .)* HtmlBlockCloseTbody

HtmlBlockOpenTd = '<' Spnl ("td" | "TD") Spnl HtmlAttribute* '>'
HtmlBlockCloseTd = '<' Spnl '/' ("td" | "TD") Spnl '>'
HtmlBlockTd = HtmlBlockOpenTd (HtmlBlockTd | !HtmlBlockCloseTd .)* HtmlBlockCloseTd

HtmlBlockOpenTfoot = '<' Spnl ("tfoot" | "TFOOT") Spnl HtmlAttribute* '>'
HtmlBlockCloseTfoot = '<' Spnl '/' ("tfoot" | "TFOOT") Spnl '>'
HtmlBlockTfoot = HtmlBlockOpenTfoot (HtmlBlockTfoot | !HtmlBlockCloseTfoot .)* HtmlBlockCloseTfoot

HtmlBlockOpenTh = '<' Spnl ("th" | "TH") Spnl HtmlAttribute* '>'
HtmlBlockCloseTh = '<' Spnl '/' ("th" | "TH") Spnl '>'
HtmlBlockTh = HtmlBlockOpenTh (HtmlBlockTh | !HtmlBlockCloseTh .)* HtmlBlockCloseTh

HtmlBlockOpenThead = '<' Spnl ("thead" | "THEAD") Spnl HtmlAttribute* '>'
HtmlBlockCloseThead = '<' Spnl '/' ("thead" | "THEAD") Spnl '>'
HtmlBlockThead = HtmlBlockOpenThead (HtmlBlockThead | !HtmlBlockCloseThead .)* HtmlBlockCloseThead

HtmlBlockOpenTr = '<' Spnl ("tr" | "TR") Spnl HtmlAttribute* '>'
HtmlBlockCloseTr = '<' Spnl '/' ("tr" | "TR") Spnl '>'
HtmlBlockTr = HtmlBlockOpenTr (HtmlBlockTr | !HtmlBlockCloseTr .)* HtmlBlockCloseTr

HtmlBlockOpenScript = '<' Spnl ("script" | "SCRIPT") Spnl HtmlAttribute* '>'
HtmlBlockCloseScript = '<' Spnl '/' ("script" | "SCRIPT") Spnl '>'
HtmlBlockScript = HtmlBlockOpenScript (!HtmlBlockCloseScript .)* HtmlBlockCloseScript


HtmlBlockInTags = HtmlBlockAddress
                | HtmlBlockBlockquote
                | HtmlBlockCenter
                | HtmlBlockDir
                | HtmlBlockDiv
                | HtmlBlockDl
                | HtmlBlockFieldset
                | HtmlBlockForm
                | HtmlBlockH1
                | HtmlBlockH2
                | HtmlBlockH3
                | HtmlBlockH4
                | HtmlBlockH5
                | HtmlBlockH6
                | HtmlBlockMenu
                | HtmlBlockNoframes
                | HtmlBlockNoscript
                | HtmlBlockOl
                | HtmlBlockP
                | HtmlBlockPre
                | HtmlBlockTable
                | HtmlBlockUl
                | HtmlBlockDd
                | HtmlBlockDt
                | HtmlBlockFrameset
                | HtmlBlockLi
                | HtmlBlockTbody
                | HtmlBlockTd
                | HtmlBlockTfoot
                | HtmlBlockTh
                | HtmlBlockThead
                | HtmlBlockTr
                | HtmlBlockScript

HtmlBlock = < ( HtmlBlockInTags | HtmlComment | HtmlBlockSelfClosing ) >
            BlankLine+
            {   if (extension(EXT_FILTER_HTML)) {
                    $$ = mk_list(LIST, NULL);
                } else {
                    $$ = mk_str(yytext);
                    $$->key = HTMLBLOCK;
                }
            }

HtmlBlockSelfClosing = '<' Spnl HtmlBlockType Spnl HtmlAttribute* '/' Spnl '>'

HtmlBlockType = "address" | "blockquote" | "center" | "dir" | "div" | "dl" | "fieldset" | "form" | "h1" | "h2" | "h3" |
                "h4" | "h5" | "h6" | "hr" | "isindex" | "menu" | "noframes" | "noscript" | "ol" | "p" | "pre" | "table" |
                "ul" | "dd" | "dt" | "frameset" | "li" | "tbody" | "td" | "tfoot" | "th" | "thead" | "tr" | "script" |
                "ADDRESS" | "BLOCKQUOTE" | "CENTER" | "DIR" | "DIV" | "DL" | "FIELDSET" | "FORM" | "H1" | "H2" | "H3" |
                "H4" | "H5" | "H6" | "HR" | "ISINDEX" | "MENU" | "NOFRAMES" | "NOSCRIPT" | "OL" | "P" | "PRE" | "TABLE" |
                "UL" | "DD" | "DT" | "FRAMESET" | "LI" | "TBODY" | "TD" | "TFOOT" | "TH" | "THEAD" | "TR" | "SCRIPT"

StyleOpen =     '<' Spnl ("style" | "STYLE") Spnl HtmlAttribute* '>'
StyleClose =    '<' Spnl '/' ("style" | "STYLE") Spnl '>'
InStyleTags =   StyleOpen (!StyleClose .)* StyleClose
StyleBlock =    < InStyleTags >
                BlankLine*
                {   if (extension(EXT_FILTER_STYLES)) {
                        $$ = mk_list(LIST, NULL);
                    } else {
                        $$ = mk_str(yytext);
                        $$->key = HTMLBLOCK;
                    }
                }

Inlines  =  a:StartList ( !Endline Inline { a = cons($$, a); }
                        | c:Endline &Inline { a = cons(c, a); } )+ Endline?
            { $$ = mk_list(LIST, a); }

Inline  = Str
        | Endline
        | UlOrStarLine
        | Space
        | Strong
        | Emph
        | Image
        | Link
        | NoteReference
        | InlineNote
        | Code
        | RawHtml
        | Entity
        | EscapedChar
        | Smart
        | Symbol

Space = Spacechar+
        { $$ = mk_str(" ");
          $$->key = SPACE; }

Str = a:StartList < NormalChar+ > { a = cons(mk_str(yytext), a); }
      ( StrChunk { a = cons($$, a); } )*
      { if (a->next == NULL) { $$ = a; } else { $$ = mk_list(LIST, a); } }

StrChunk = < (NormalChar | '_'+ &Alphanumeric)+ > { $$ = mk_str(yytext); } |
           AposChunk

AposChunk = &{ extension(EXT_SMART) } '\'' &Alphanumeric
      { $$ = mk_element(APOSTROPHE); }

EscapedChar =   '\\' !Newline < [-\\`|*_{}[\]()#+.!><] >
                { $$ = mk_str(yytext); }

Entity =    ( HexEntity | DecEntity | CharEntity )
            { $$ = mk_str(yytext); $$->key = HTML; }

Endline =   LineBreak | TerminalEndline | NormalEndline

NormalEndline =   Sp Newline !BlankLine !'>' !AtxStart
                  !(Line ('='+ | '-'+) Newline)
                  { $$ = mk_str("\n");
                    $$->key = SPACE; }

TerminalEndline = Sp Newline Eof
                  { $$ = NULL; }

LineBreak = "  " NormalEndline
            { $$ = mk_element(LINEBREAK); }

Symbol =    < SpecialChar >
            { $$ = mk_str(yytext); }

# This keeps the parser from getting bogged down on long strings of '*' or '_',
# or strings of '*' or '_' with space on each side:
UlOrStarLine =  (UlLine | StarLine) { $$ = mk_str(yytext); }
StarLine =      < "****" '*'* > | < Spacechar '*'+ &Spacechar >
UlLine   =      < "____" '_'* > | < Spacechar '_'+ &Spacechar >

Emph =      EmphStar | EmphUl

Whitespace = Spacechar | Newline

EmphStar =  '*' !Whitespace
            a:StartList
            ( !'*' b:Inline { a = cons(b, a); }
            | b:StrongStar  { a = cons(b, a); }
            )+
            '*'
            { $$ = mk_list(EMPH, a); }

EmphUl =    '_' !Whitespace
            a:StartList
            ( !'_' b:Inline { a = cons(b, a); }
            | b:StrongUl  { a = cons(b, a); }
            )+
            '_'
            { $$ = mk_list(EMPH, a); }

Strong = StrongStar | StrongUl

StrongStar =    "**" !Whitespace
                a:StartList
                ( !"**" b:Inline { a = cons(b, a); })+
                "**"
                { $$ = mk_list(STRONG, a); }

StrongUl   =    "__" !Whitespace
                a:StartList
                ( !"__" b:Inline { a = cons(b, a); })+
                "__"
                { $$ = mk_list(STRONG, a); }

Image = '!' ( ExplicitLink | ReferenceLink )
        { if ($$->key == LINK) {
              $$->key = IMAGE;
          } else {
              element *result;
              result = $$;
              $$->children = cons(mk_str("!"), result->children);
          } }

Link =  ExplicitLink | ReferenceLink | AutoLink

ReferenceLink = ReferenceLinkDouble | ReferenceLinkSingle

ReferenceLinkDouble =  a:Label < Spnl > !"[]" b:Label
                       {   link match;
                           if (find_reference(&match, b->children)) {
                               $$ = mk_link(a->children, match.url, match.title);
                               free(a);
                               free_element_list(b);
                           } else {
                               element *result;
                               result = mk_element(LIST);
                               result->children = cons(mk_str("["), cons(a, cons(mk_str("]"), cons(mk_str(yytext),
                                                   cons(mk_str("["), cons(b, mk_str("]")))))));
                               $$ = result;
                           }
                       }

ReferenceLinkSingle =  a:Label < (Spnl "[]")? >
                       {   link match;
                           if (find_reference(&match, a->children)) {
                               $$ = mk_link(a->children, match.url, match.title);
                               free(a);
                           }
                           else {
                               element *result;
                               result = mk_element(LIST);
                               result->children = cons(mk_str("["), cons(a, cons(mk_str("]"), mk_str(yytext))));
                               $$ = result;
                           }
                       }

ExplicitLink =  l:Label '(' Sp s:Source Spnl t:Title Sp ')'
                { $$ = mk_link(l->children, s->contents.str, t->contents.str);
                  free_element(s);
                  free_element(t);
                  free(l); }

Source  = ( '<' < SourceContents > '>' | < SourceContents > )
          { $$ = mk_str(yytext); }

SourceContents = ( ( !'(' !')' !'>' Nonspacechar )+ | '(' SourceContents ')')*

Title = ( TitleSingle | TitleDouble | < "" > )
        { $$ = mk_str(yytext); }

TitleSingle = '\'' < ( !( '\'' Sp ( ')' | Newline ) ) . )* > '\''

TitleDouble = '"' < ( !( '"' Sp ( ')' | Newline ) ) . )* > '"'

AutoLink = AutoLinkUrl | AutoLinkEmail

AutoLinkUrl =   '<' < [A-Za-z]+ "://" ( !Newline !'>' . )+ > '>'
                {   $$ = mk_link(mk_str(yytext), yytext, ""); }

AutoLinkEmail = '<' ( "mailto:" )? < [-A-Za-z0-9+_./!%~$]+ '@' ( !Newline !'>' . )+ > '>'
                {   char *mailto = malloc(strlen(yytext) + 8);
                    sprintf(mailto, "mailto:%s", yytext);
                    $$ = mk_link(mk_str(yytext), mailto, "");
                    free(mailto);
                }

Reference = NonindentSpace !"[]" l:Label ':' Spnl s:RefSrc t:RefTitle BlankLine+
            { $$ = mk_link(l->children, s->contents.str, t->contents.str);
              free_element(s);
              free_element(t);
              free(l);
              $$->key = REFERENCE; }

Label = '[' ( !'^' &{ extension(EXT_NOTES) } | &. &{ !extension(EXT_NOTES) } )
        a:StartList
        ( !']' Inline { a = cons($$, a); } )*
        ']'
        { $$ = mk_list(LIST, a); }

RefSrc = < Nonspacechar+ > 
         { $$ = mk_str(yytext); 
           $$->key = HTML; }

RefTitle =  ( RefTitleSingle | RefTitleDouble | RefTitleParens | EmptyTitle )
            { $$ = mk_str(yytext); }

EmptyTitle = < "" >

RefTitleSingle = Spnl '\'' < ( !( '\'' Sp Newline | Newline ) . )* > '\''

RefTitleDouble = Spnl '"' < ( !('"' Sp Newline | Newline) . )* > '"'

RefTitleParens = Spnl '(' < ( !(')' Sp Newline | Newline) . )* > ')'

References = a:StartList
             ( b:Reference { a = cons(b, a); } | SkipBlock )*
             { references = reverse(a); }

Ticks1 = "`" !'`'
Ticks2 = "``" !'`'
Ticks3 = "```" !'`'
Ticks4 = "````" !'`'
Ticks5 = "`````" !'`'

Code = ( Ticks1 Sp < ( ( !'`' Nonspacechar )+ | !Ticks1 '`'+ | !( Sp Ticks1 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks1
       | Ticks2 Sp < ( ( !'`' Nonspacechar )+ | !Ticks2 '`'+ | !( Sp Ticks2 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks2
       | Ticks3 Sp < ( ( !'`' Nonspacechar )+ | !Ticks3 '`'+ | !( Sp Ticks3 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks3
       | Ticks4 Sp < ( ( !'`' Nonspacechar )+ | !Ticks4 '`'+ | !( Sp Ticks4 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks4
       | Ticks5 Sp < ( ( !'`' Nonspacechar )+ | !Ticks5 '`'+ | !( Sp Ticks5 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks5
       )
       { $$ = mk_str(yytext); $$->key = CODE; }

RawHtml =   < (HtmlComment | HtmlBlockScript | HtmlTag) >
            {   if (extension(EXT_FILTER_HTML)) {
                    $$ = mk_list(LIST, NULL);
                } else {
                    $$ = mk_str(yytext);
                    $$->key = HTML;
                }
            }

BlankLine =     Sp Newline

Quoted =        '"' (!'"' .)* '"' | '\'' (!'\'' .)* '\''
HtmlAttribute = (AlphanumericAscii | '-')+ Spnl ('=' Spnl (Quoted | (!'>' Nonspacechar)+))? Spnl
HtmlComment =   "<!--" (!"-->" .)* "-->"
HtmlTag =       '<' Spnl '/'? AlphanumericAscii+ Spnl HtmlAttribute* '/'? Spnl '>'
Eof =           !.
Spacechar =     ' ' | '\t'
Nonspacechar =  !Spacechar !Newline .
Newline =       '\n' | '\r' '\n'?
Sp =            Spacechar*
Spnl =          Sp (Newline Sp)?
SpecialChar =   '*' | '_' | '`' | '&' | '[' | ']' | '(' | ')' | '<' | '!' | '#' | '\\' | '\'' | '"' | ExtendedSpecialChar
NormalChar =    !( SpecialChar | Spacechar | Newline ) .
Alphanumeric = [0-9A-Za-z] | '\200' | '\201' | '\202' | '\203' | '\204' | '\205' | '\206' | '\207' | '\210' | '\211' | '\212' | '\213' | '\214' | '\215' | '\216' | '\217' | '\220' | '\221' | '\222' | '\223' | '\224' | '\225' | '\226' | '\227' | '\230' | '\231' | '\232' | '\233' | '\234' | '\235' | '\236' | '\237' | '\240' | '\241' | '\242' | '\243' | '\244' | '\245' | '\246' | '\247' | '\250' | '\251' | '\252' | '\253' | '\254' | '\255' | '\256' | '\257' | '\260' | '\261' | '\262' | '\263' | '\264' | '\265' | '\266' | '\267' | '\270' | '\271' | '\272' | '\273' | '\274' | '\275' | '\276' | '\277' | '\300' | '\301' | '\302' | '\303' | '\304' | '\305' | '\306' | '\307' | '\310' | '\311' | '\312' | '\313' | '\314' | '\315' | '\316' | '\317' | '\320' | '\321' | '\322' | '\323' | '\324' | '\325' | '\326' | '\327' | '\330' | '\331' | '\332' | '\333' | '\334' | '\335' | '\336' | '\337' | '\340' | '\341' | '\342' | '\343' | '\344' | '\345' | '\346' | '\347' | '\350' | '\351' | '\352' | '\353' | '\354' | '\355' | '\356' | '\357' | '\360' | '\361' | '\362' | '\363' | '\364' | '\365' | '\366' | '\367' | '\370' | '\371' | '\372' | '\373' | '\374' | '\375' | '\376' | '\377'
AlphanumericAscii = [A-Za-z0-9]
Digit = [0-9]
BOM = "\357\273\277"

HexEntity =     < '&' '#' [Xx] [0-9a-fA-F]+ ';' >
DecEntity =     < '&' '#' [0-9]+ > ';' >
CharEntity =    < '&' [A-Za-z0-9]+ ';' >

NonindentSpace =    "   " | "  " | " " | ""
Indent =            "\t" | "    "
IndentedLine =      Indent Line
OptionallyIndentedLine = Indent? Line

# StartList starts a list data structure that can be added to with cons:
StartList = &.
            { $$ = NULL; }

Line =  RawLine
        { $$ = mk_str(yytext); }
RawLine = ( < (!'\r' !'\n' .)* Newline > | < .+ > Eof )

SkipBlock = HtmlBlock
          | ( !'#' !SetextBottom1 !SetextBottom2 !BlankLine RawLine )+ BlankLine*
          | BlankLine+
          | RawLine

# Syntax extensions

ExtendedSpecialChar = &{ extension(EXT_SMART) } ('.' | '-' | '\'' | '"')
                    | &{ extension(EXT_NOTES) } ( '^' )

Smart = &{ extension(EXT_SMART) }
        ( Ellipsis | Dash | SingleQuoted | DoubleQuoted | Apostrophe )

Apostrophe = '\''
             { $$ = mk_element(APOSTROPHE); }

Ellipsis = ("..." | ". . .")
           { $$ = mk_element(ELLIPSIS); }

Dash = EmDash | EnDash

EnDash = '-' &Digit
         { $$ = mk_element(ENDASH); }

EmDash = ("---" | "--")
         { $$ = mk_element(EMDASH); }

SingleQuoteStart = '\'' !(Spacechar | Newline)

SingleQuoteEnd = '\'' !Alphanumeric

SingleQuoted = SingleQuoteStart
               a:StartList
               ( !SingleQuoteEnd b:Inline { a = cons(b, a); } )+
               SingleQuoteEnd
               { $$ = mk_list(SINGLEQUOTED, a); }

DoubleQuoteStart = '"'

DoubleQuoteEnd = '"'

DoubleQuoted =  DoubleQuoteStart
                a:StartList
                ( !DoubleQuoteEnd b:Inline { a = cons(b, a); } )+
                DoubleQuoteEnd
                { $$ = mk_list(DOUBLEQUOTED, a); }

NoteReference = &{ extension(EXT_NOTES) }
                ref:RawNoteReference
                {   element *match;
                    if (find_note(&match, ref->contents.str)) {
                        $$ = mk_element(NOTE);
                        assert(match->children != NULL);
                        $$->children = match->children;
                        $$->contents.str = 0;
                    } else {
                        char *s;
                        s = malloc(strlen(ref->contents.str) + 4);
                        sprintf(s, "[^%s]", ref->contents.str);
                        $$ = mk_str(s);
                        free(s);
                    }
                }

RawNoteReference = "[^" < ( !Newline !']' . )+ > ']'
                   { $$ = mk_str(yytext); }

Note =          &{ extension(EXT_NOTES) }
                NonindentSpace ref:RawNoteReference ':' Sp
                a:StartList
                ( RawNoteBlock { a = cons($$, a); } )
                ( &Indent RawNoteBlock { a = cons($$, a); } )*
                {   $$ = mk_list(NOTE, a);
                    $$->contents.str = strdup(ref->contents.str);
                }

InlineNote =    &{ extension(EXT_NOTES) }
                "^["
                a:StartList
                ( !']' Inline { a = cons($$, a); } )+
                ']'
                { $$ = mk_list(NOTE, a);
                  $$->contents.str = 0; }

Notes =         a:StartList
                ( b:Note { a = cons(b, a); } | SkipBlock )*
                { notes = reverse(a); }

RawNoteBlock =  a:StartList
                    ( !BlankLine OptionallyIndentedLine { a = cons($$, a); } )+
                ( < BlankLine* > { a = cons(mk_str(yytext), a); } )
                {   $$ = mk_str_from_list(a, true);
                    $$->key = RAW;
                }

%%