adding pegmarkdown support library.

git-svn-id: http://svn.code.sf.net/p/retroshare/code/branches/v0.5-gxs-b1@5953 b45a01b8-16f6-495d-af2f-9b41ad6348cc
This commit is contained in:
drbob 2012-12-09 12:12:57 +00:00
parent 4413536926
commit 3be22536df
107 changed files with 20038 additions and 0 deletions

View File

@ -2,6 +2,7 @@
#include <algorithm> #include <algorithm>
#include <math.h> #include <math.h>
#include <sstream> #include <sstream>
#include <stdio.h>
#include "p3posted.h" #include "p3posted.h"
#include "gxs/rsgxsflags.h" #include "gxs/rsgxsflags.h"

View File

@ -0,0 +1,208 @@
/*
* GLibFacade.c
* MultiMarkdown
*
* Created by Daniel Jalkut on 7/26/11.
* Modified by Fletcher T. Penney on 9/15/11.
* Modified by Dan Lowe on 1/3/12.
* Copyright 2011 __MyCompanyName__. All rights reserved.
*/
#include "GLibFacade.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
/*
* The following section came from:
*
* http://lists-archives.org/mingw-users/12649-asprintf-missing-vsnprintf-
* behaving-differently-and-_vsncprintf-undefined.html
*
* and
*
* http://groups.google.com/group/jansson-users/browse_thread/thread/
* 76a88d63d9519978/041a7d0570de2d48?lnk=raot
*/
/* Solaris and Windows do not provide vasprintf() or asprintf(). */
#if defined(__WIN32) || (defined(__SVR4) && defined(__sun))
int vasprintf( char **sptr, char *fmt, va_list argv )
{
int wanted = vsnprintf( *sptr = NULL, 0, fmt, argv );
if( (wanted > 0) && ((*sptr = malloc( 1 + wanted )) != NULL) )
return vsprintf( *sptr, fmt, argv );
return wanted;
}
int asprintf( char **sptr, char *fmt, ... )
{
int retval;
va_list argv;
va_start( argv, fmt );
retval = vasprintf( sptr, fmt, argv );
va_end( argv );
return retval;
}
#endif
/* GString */
#define kStringBufferStartingSize 1024
#define kStringBufferGrowthMultiplier 2
GString* g_string_new(char *startingString)
{
GString* newString = malloc(sizeof(GString));
if (startingString == NULL) startingString = "";
size_t startingBufferSize = kStringBufferStartingSize;
size_t startingStringSize = strlen(startingString);
while (startingBufferSize < (startingStringSize + 1))
{
startingBufferSize *= kStringBufferGrowthMultiplier;
}
newString->str = malloc(startingBufferSize);
newString->currentStringBufferSize = startingBufferSize;
strncpy(newString->str, startingString, startingStringSize);
newString->str[startingStringSize] = '\0';
newString->currentStringLength = startingStringSize;
return newString;
}
char* g_string_free(GString* ripString, bool freeCharacterData)
{
char* returnedString = ripString->str;
if (freeCharacterData)
{
if (ripString->str != NULL)
{
free(ripString->str);
}
returnedString = NULL;
}
free(ripString);
return returnedString;
}
static void ensureStringBufferCanHold(GString* baseString, size_t newStringSize)
{
size_t newBufferSizeNeeded = newStringSize + 1;
if (newBufferSizeNeeded > baseString->currentStringBufferSize)
{
size_t newBufferSize = baseString->currentStringBufferSize;
while (newBufferSizeNeeded > newBufferSize)
{
newBufferSize *= kStringBufferGrowthMultiplier;
}
baseString->str = realloc(baseString->str, newBufferSize);
baseString->currentStringBufferSize = newBufferSize;
}
}
void g_string_append(GString* baseString, char* appendedString)
{
if ((appendedString != NULL) && (strlen(appendedString) > 0))
{
size_t appendedStringLength = strlen(appendedString);
size_t newStringLength = baseString->currentStringLength + appendedStringLength;
ensureStringBufferCanHold(baseString, newStringLength);
/* We already know where the current string ends, so pass that as the starting address for strncat */
strncat(baseString->str + baseString->currentStringLength, appendedString, appendedStringLength);
baseString->currentStringLength = newStringLength;
}
}
void g_string_append_c(GString* baseString, char appendedCharacter)
{
size_t newSizeNeeded = baseString->currentStringLength + 1;
ensureStringBufferCanHold(baseString, newSizeNeeded);
baseString->str[baseString->currentStringLength] = appendedCharacter;
baseString->currentStringLength++;
baseString->str[baseString->currentStringLength] = '\0';
}
void g_string_append_printf(GString* baseString, char* format, ...)
{
va_list args;
va_start(args, format);
char* formattedString = NULL;
vasprintf(&formattedString, format, args);
if (formattedString != NULL)
{
g_string_append(baseString, formattedString);
free(formattedString);
}
}
void g_string_prepend(GString* baseString, char* prependedString)
{
if ((prependedString != NULL) && (strlen(prependedString) > 0))
{
size_t prependedStringLength = strlen(prependedString);
size_t newStringLength = baseString->currentStringLength + prependedStringLength;
ensureStringBufferCanHold(baseString, newStringLength);
memmove(baseString->str + prependedStringLength, baseString->str, baseString->currentStringLength);
strncpy(baseString->str, prependedString, prependedStringLength);
baseString->currentStringLength = newStringLength;
baseString->str[baseString->currentStringLength] = '\0';
}
}
/* GSList */
void g_slist_free(GSList* ripList)
{
GSList* thisListItem = ripList;
while (thisListItem != NULL)
{
GSList* nextItem = thisListItem->next;
/* I guess we don't release the data? Non-retained memory management is hard... let's figure it out later. */
free(thisListItem);
thisListItem = nextItem;
}
}
/* Currently only used for markdown_output.c endnotes printing */
GSList* g_slist_reverse(GSList* theList)
{
GSList* lastNodeSeen = NULL;
/* Iterate the list items, tacking them on to our new reversed List as we find them */
GSList* listWalker = theList;
while (listWalker != NULL)
{
GSList* nextNode = listWalker->next;
listWalker->next = lastNodeSeen;
lastNodeSeen = listWalker;
listWalker = nextNode;
}
return lastNodeSeen;
}
GSList* g_slist_prepend(GSList* targetElement, void* newElementData)
{
GSList* newElement = malloc(sizeof(GSList));
newElement->data = newElementData;
newElement->next = targetElement;
return newElement;
}

View File

@ -0,0 +1,67 @@
/*
* GLibFacade.h
* MultiMarkdown
*
* Created by Daniel Jalkut on 7/26/11.
* Copyright 2011 __MyCompanyName__. All rights reserved.
*/
#ifndef __MARKDOWN_GLIB_FACADE__
#define __MARKDOWN_GLIB_FACADE__
/* peg_markdown uses the link symbol for its own purposes */
#define link MARKDOWN_LINK_IGNORED
#include <unistd.h>
#undef link
#include <stdbool.h>
#include <ctype.h>
typedef int gboolean;
typedef char gchar;
/* This style of bool is used in shared source code */
#define FALSE false
#define TRUE true
/* WE implement minimal mirror implementations of GLib's GString and GSList
* sufficient to cover the functionality required by MultiMarkdown.
*
* NOTE: THese are 100% clean, from-scratch implementations using only the
* GLib function prototype as guide for behavior.
*/
typedef struct
{
/* Current UTF8 byte stream this string represents */
char* str;
/* Where in the str buffer will we add new characters */
/* or append new strings? */
int currentStringBufferSize;
int currentStringLength;
} GString;
GString* g_string_new(char *startingString);
char* g_string_free(GString* ripString, bool freeCharacterData);
void g_string_append_c(GString* baseString, char appendedCharacter);
void g_string_append(GString* baseString, char *appendedString);
void g_string_prepend(GString* baseString, char* prependedString);
void g_string_append_printf(GString* baseString, char* format, ...);
/* Just implement a very simple singly linked list. */
typedef struct _GSList
{
void* data;
struct _GSList* next;
} GSList;
void g_slist_free(GSList* ripList);
GSList* g_slist_prepend(GSList* targetElement, void* newElementData);
GSList* g_slist_reverse(GSList* theList);
#endif

View File

@ -0,0 +1,88 @@
markdown in c, implemented using PEG grammar
Copyright (c) 2008-2011 John MacFarlane
ODF output code (c) 2011 Fletcher T. Penney
peg-markdown is released under both the GPL and MIT licenses.
You may pick the license that best fits your needs.
The GPL
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
The MIT License
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
peg-0.1.4 (included for convenience - http://piumarta.com/software/peg/)
Copyright (c) 2007 by Ian Piumarta
All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the 'Software'),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, provided that the above copyright notice(s) and this
permission notice appear in all copies of the Software. Acknowledgement
of the use of this Software in supporting documentation would be
appreciated but is not required.
THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK.
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
my_getopt (included for convenience - http://www.geocities.com/bsittler/)
Copyright 1997, 2000, 2001, 2002, 2006, Benjamin Sittler
Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation
files (the "Software"), to deal in the Software without
restriction, including without limitation the rights to use, copy,
modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.

View File

@ -0,0 +1,42 @@
uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not')
ifneq (,$(findstring MINGW,$(uname_S)))
X = .exe
endif
export X
PROGRAM=markdown$(X)
CFLAGS ?= -Wall -O3 -ansi -D_GNU_SOURCE # -flto for newer GCC versions
OBJS=markdown_parser.o markdown_output.o markdown_lib.o utility_functions.o parsing_functions.o odf.o
PEGDIR=peg-0.1.9
LEG=$(PEGDIR)/leg$(X)
PKG_CONFIG = pkg-config
ALL : $(PROGRAM)
$(LEG): $(PEGDIR)
CC=gcc make -C $(PEGDIR)
%.o : %.c markdown_peg.h
$(CC) -c `$(PKG_CONFIG) --cflags glib-2.0` $(CFLAGS) -o $@ $<
$(PROGRAM) : markdown.c $(OBJS)
$(CC) `$(PKG_CONFIG) --cflags glib-2.0` $(CFLAGS) -o $@ $< $(OBJS) `$(PKG_CONFIG) --libs glib-2.0`
markdown_parser.c : markdown_parser.leg $(LEG) markdown_peg.h parsing_functions.c utility_functions.c
$(LEG) -o $@ $<
.PHONY: clean test
clean:
rm -f markdown_parser.c $(PROGRAM) $(OBJS)
distclean: clean
make -C $(PEGDIR) clean
\
test: $(PROGRAM)
cd MarkdownTest_1.0.3; \
./MarkdownTest.pl --script=../$(PROGRAM) --tidy
leak-check: $(PROGRAM)
valgrind --leak-check=full ./markdown README

View File

@ -0,0 +1,176 @@
#!/usr/bin/perl
#
# MarkdownTester -- Run tests for Markdown implementations
#
# Copyright (c) 2004-2005 John Gruber
# <http://daringfireball.net/projects/markdown/>
#
use strict;
use warnings;
use Getopt::Long;
use Benchmark;
our $VERSION = '1.0.2';
# Sat 24 Dec 2005
my $time_start = new Benchmark;
my $test_dir = "Tests";
my $script = "./Markdown.pl";
my $use_tidy = 0;
my ($flag_version);
GetOptions (
"script=s" => \$script,
"testdir=s" => \$test_dir,
"tidy" => \$use_tidy,
"version" => \$flag_version,
);
if($flag_version) {
my $progname = $0;
$progname =~ s{.*/}{};
die "$progname version $VERSION\n";
}
unless (-d $test_dir) { die "'$test_dir' is not a directory.\n"; }
unless (-f $script) { die "$script does not exist.\n"; }
unless (-x $script) { die "$script is not executable.\n"; }
my $tests_passed = 0;
my $tests_failed = 0;
TEST:
foreach my $testfile (glob "$test_dir/*.text") {
my $testname = $testfile;
$testname =~ s{.*/(.+)\.text$}{$1}i;
print "$testname ... ";
# Look for a corresponding .html file for each .text file:
my $resultfile = $testfile;
$resultfile =~ s{\.text$}{\.html}i;
unless (-f $resultfile) {
print "'$resultfile' does not exist.\n\n";
next TEST;
}
# open(TEST, $testfile) || die("Can't open testfile: $!");
open(RESULT, $resultfile) || die("Can't open resultfile: $!");
undef $/;
# my $t_input = <TEST>;
my $t_result = <RESULT>;
my $t_output = `'$script' '$testfile'`;
# Normalize the output and expected result strings:
$t_result =~ s/\s+\z//; # trim trailing whitespace
$t_output =~ s/\s+\z//; # trim trailing whitespace
if ($use_tidy) {
# Escape the strings, pass them through to CLI tidy tool for tag-level equivalency
$t_result =~ s{'}{'\\''}g; # escape ' chars for shell
$t_output =~ s{'}{'\\''}g;
$t_result = `echo '$t_result' | tidy --show-body-only 1 --quiet 1 --show-warnings 0`;
$t_output = `echo '$t_output' | tidy --show-body-only 1 --quiet 1 --show-warnings 0`;
}
if ($t_output eq $t_result) {
print "OK\n";
$tests_passed++;
}
else {
print "FAILED\n\n";
# This part added by JM to print diffs
open(OUT, '>tmp1') or die $!;
print OUT $t_output or die $!;
open(RES, '>tmp2') or die $!;
print RES $t_result or die $!;
print `diff tmp1 tmp2`;
close RES;
close OUT;
print "\n";
`rm tmp?`;
# End of added part
$tests_failed++;
}
}
print "\n\n";
print "$tests_passed passed; $tests_failed failed.\n";
my $time_end = new Benchmark;
my $time_diff = timediff($time_end, $time_start);
print "Benchmark: ", timestr($time_diff), "\n";
__END__
=pod
=head1 NAME
B<MarkdownTest>
=head1 SYNOPSIS
B<MarkdownTest.pl> [ B<--options> ] [ I<file> ... ]
=head1 DESCRIPTION
=head1 OPTIONS
Use "--" to end switch parsing. For example, to open a file named "-z", use:
MarkdownTest.pl -- -z
=over 4
=item B<--script>
Specify the path to the Markdown script to test. Defaults to
"./Markdown.pl". Example:
./MarkdownTest.pl --script ./PHP-Markdown/php-markdown
=item B<--testdir>
Specify the path to a directory containing test data. Defaults to "Tests".
=item B<--tidy>
Flag to turn on using the command line 'tidy' tool to normalize HTML
output before comparing script output to the expected test result.
Assumes that the 'tidy' command is available in your PATH. Defaults to
off.
=back
=head1 BUGS
=head1 VERSION HISTORY
1.0 Mon 13 Dec 2004-2005
1.0.1 Mon 19 Sep 2005
+ Better handling of case when foo.text exists, but foo.html doesn't.
It now prints a message and moves on, rather than dying.
=head1 COPYRIGHT AND LICENSE
Copyright (c) 2004-2005 John Gruber
<http://daringfireball.net/>
All rights reserved.
This is free software; you may redistribute it and/or modify it under
the same terms as Perl itself.
=cut

View File

@ -0,0 +1,17 @@
<p>AT&amp;T has an ampersand in their name.</p>
<p>AT&amp;T is another way to write it.</p>
<p>This &amp; that.</p>
<p>4 &lt; 5.</p>
<p>6 > 5.</p>
<p>Here's a <a href="http://example.com/?foo=1&amp;bar=2">link</a> with an ampersand in the URL.</p>
<p>Here's a link with an amersand in the link text: <a href="http://att.com/" title="AT&amp;T">AT&amp;T</a>.</p>
<p>Here's an inline <a href="/script?foo=1&amp;bar=2">link</a>.</p>
<p>Here's an inline <a href="/script?foo=1&amp;bar=2">link</a>.</p>

View File

@ -0,0 +1,21 @@
AT&T has an ampersand in their name.
AT&amp;T is another way to write it.
This & that.
4 < 5.
6 > 5.
Here's a [link] [1] with an ampersand in the URL.
Here's a link with an amersand in the link text: [AT&T] [2].
Here's an inline [link](/script?foo=1&bar=2).
Here's an inline [link](</script?foo=1&bar=2>).
[1]: http://example.com/?foo=1&bar=2
[2]: http://att.com/ "AT&T"

View File

@ -0,0 +1,18 @@
<p>Link: <a href="http://example.com/">http://example.com/</a>.</p>
<p>With an ampersand: <a href="http://example.com/?foo=1&amp;bar=2">http://example.com/?foo=1&amp;bar=2</a></p>
<ul>
<li>In a list?</li>
<li><a href="http://example.com/">http://example.com/</a></li>
<li>It should.</li>
</ul>
<blockquote>
<p>Blockquoted: <a href="http://example.com/">http://example.com/</a></p>
</blockquote>
<p>Auto-links should not occur here: <code>&lt;http://example.com/&gt;</code></p>
<pre><code>or here: &lt;http://example.com/&gt;
</code></pre>

View File

@ -0,0 +1,13 @@
Link: <http://example.com/>.
With an ampersand: <http://example.com/?foo=1&bar=2>
* In a list?
* <http://example.com/>
* It should.
> Blockquoted: <http://example.com/>
Auto-links should not occur here: `<http://example.com/>`
or here: <http://example.com/>

View File

@ -0,0 +1,118 @@
<p>These should all get escaped:</p>
<p>Backslash: \</p>
<p>Backtick: `</p>
<p>Asterisk: *</p>
<p>Underscore: _</p>
<p>Left brace: {</p>
<p>Right brace: }</p>
<p>Left bracket: [</p>
<p>Right bracket: ]</p>
<p>Left paren: (</p>
<p>Right paren: )</p>
<p>Greater-than: ></p>
<p>Hash: #</p>
<p>Period: .</p>
<p>Bang: !</p>
<p>Plus: +</p>
<p>Minus: -</p>
<p>These should not, because they occur within a code block:</p>
<pre><code>Backslash: \\
Backtick: \`
Asterisk: \*
Underscore: \_
Left brace: \{
Right brace: \}
Left bracket: \[
Right bracket: \]
Left paren: \(
Right paren: \)
Greater-than: \&gt;
Hash: \#
Period: \.
Bang: \!
Plus: \+
Minus: \-
</code></pre>
<p>Nor should these, which occur in code spans:</p>
<p>Backslash: <code>\\</code></p>
<p>Backtick: <code>\`</code></p>
<p>Asterisk: <code>\*</code></p>
<p>Underscore: <code>\_</code></p>
<p>Left brace: <code>\{</code></p>
<p>Right brace: <code>\}</code></p>
<p>Left bracket: <code>\[</code></p>
<p>Right bracket: <code>\]</code></p>
<p>Left paren: <code>\(</code></p>
<p>Right paren: <code>\)</code></p>
<p>Greater-than: <code>\&gt;</code></p>
<p>Hash: <code>\#</code></p>
<p>Period: <code>\.</code></p>
<p>Bang: <code>\!</code></p>
<p>Plus: <code>\+</code></p>
<p>Minus: <code>\-</code></p>
<p>These should get escaped, even though they're matching pairs for
other Markdown constructs:</p>
<p>*asterisks*</p>
<p>_underscores_</p>
<p>`backticks`</p>
<p>This is a code span with a literal backslash-backtick sequence: <code>\`</code></p>
<p>This is a tag with unescaped backticks <span attr='`ticks`'>bar</span>.</p>
<p>This is a tag with backslashes <span attr='\\backslashes\\'>bar</span>.</p>

View File

@ -0,0 +1,120 @@
These should all get escaped:
Backslash: \\
Backtick: \`
Asterisk: \*
Underscore: \_
Left brace: \{
Right brace: \}
Left bracket: \[
Right bracket: \]
Left paren: \(
Right paren: \)
Greater-than: \>
Hash: \#
Period: \.
Bang: \!
Plus: \+
Minus: \-
These should not, because they occur within a code block:
Backslash: \\
Backtick: \`
Asterisk: \*
Underscore: \_
Left brace: \{
Right brace: \}
Left bracket: \[
Right bracket: \]
Left paren: \(
Right paren: \)
Greater-than: \>
Hash: \#
Period: \.
Bang: \!
Plus: \+
Minus: \-
Nor should these, which occur in code spans:
Backslash: `\\`
Backtick: `` \` ``
Asterisk: `\*`
Underscore: `\_`
Left brace: `\{`
Right brace: `\}`
Left bracket: `\[`
Right bracket: `\]`
Left paren: `\(`
Right paren: `\)`
Greater-than: `\>`
Hash: `\#`
Period: `\.`
Bang: `\!`
Plus: `\+`
Minus: `\-`
These should get escaped, even though they're matching pairs for
other Markdown constructs:
\*asterisks\*
\_underscores\_
\`backticks\`
This is a code span with a literal backslash-backtick sequence: `` \` ``
This is a tag with unescaped backticks <span attr='`ticks`'>bar</span>.
This is a tag with backslashes <span attr='\\backslashes\\'>bar</span>.

View File

@ -0,0 +1,15 @@
<blockquote>
<p>Example:</p>
<pre><code>sub status {
print "working";
}
</code></pre>
<p>Or:</p>
<pre><code>sub status {
return "working";
}
</code></pre>
</blockquote>

View File

@ -0,0 +1,11 @@
> Example:
>
> sub status {
> print "working";
> }
>
> Or:
>
> sub status {
> return "working";
> }

View File

@ -0,0 +1,18 @@
<pre><code>code block on the first line
</code></pre>
<p>Regular text.</p>
<pre><code>code block indented by spaces
</code></pre>
<p>Regular text.</p>
<pre><code>the lines in this block
all contain trailing spaces
</code></pre>
<p>Regular Text.</p>
<pre><code>code block on the last line
</code></pre>

View File

@ -0,0 +1,14 @@
code block on the first line
Regular text.
code block indented by spaces
Regular text.
the lines in this block
all contain trailing spaces
Regular Text.
code block on the last line

View File

@ -0,0 +1,6 @@
<p><code>&lt;test a="</code> content of attribute <code>"&gt;</code></p>
<p>Fix for backticks within HTML tag: <span attr='`ticks`'>like this</span></p>
<p>Here's how you put <code>`backticks`</code> in a code span.</p>

View File

@ -0,0 +1,6 @@
`<test a="` content of attribute `">`
Fix for backticks within HTML tag: <span attr='`ticks`'>like this</span>
Here's how you put `` `backticks` `` in a code span.

View File

@ -0,0 +1,8 @@
<p>In Markdown 1.0.0 and earlier. Version
8. This line turns into a list item.
Because a hard-wrapped line in the
middle of a paragraph looked like a
list item.</p>
<p>Here's one with a bullet.
* criminey.</p>

View File

@ -0,0 +1,8 @@
In Markdown 1.0.0 and earlier. Version
8. This line turns into a list item.
Because a hard-wrapped line in the
middle of a paragraph looked like a
list item.
Here's one with a bullet.
* criminey.

View File

@ -0,0 +1,71 @@
<p>Dashes:</p>
<hr />
<hr />
<hr />
<hr />
<pre><code>---
</code></pre>
<hr />
<hr />
<hr />
<hr />
<pre><code>- - -
</code></pre>
<p>Asterisks:</p>
<hr />
<hr />
<hr />
<hr />
<pre><code>***
</code></pre>
<hr />
<hr />
<hr />
<hr />
<pre><code>* * *
</code></pre>
<p>Underscores:</p>
<hr />
<hr />
<hr />
<hr />
<pre><code>___
</code></pre>
<hr />
<hr />
<hr />
<hr />
<pre><code>_ _ _
</code></pre>

View File

@ -0,0 +1,67 @@
Dashes:
---
---
---
---
---
- - -
- - -
- - -
- - -
- - -
Asterisks:
***
***
***
***
***
* * *
* * *
* * *
* * *
* * *
Underscores:
___
___
___
___
___
_ _ _
_ _ _
_ _ _
_ _ _
_ _ _

View File

@ -0,0 +1,15 @@
<p>Simple block on one line:</p>
<div>foo</div>
<p>And nested without indentation:</p>
<div>
<div>
<div>
foo
</div>
<div style=">"/>
</div>
<div>bar</div>
</div>

View File

@ -0,0 +1,15 @@
Simple block on one line:
<div>foo</div>
And nested without indentation:
<div>
<div>
<div>
foo
</div>
<div style=">"/>
</div>
<div>bar</div>
</div>

View File

@ -0,0 +1,72 @@
<p>Here's a simple block:</p>
<div>
foo
</div>
<p>This should be a code block, though:</p>
<pre><code>&lt;div&gt;
foo
&lt;/div&gt;
</code></pre>
<p>As should this:</p>
<pre><code>&lt;div&gt;foo&lt;/div&gt;
</code></pre>
<p>Now, nested:</p>
<div>
<div>
<div>
foo
</div>
</div>
</div>
<p>This should just be an HTML comment:</p>
<!-- Comment -->
<p>Multiline:</p>
<!--
Blah
Blah
-->
<p>Code block:</p>
<pre><code>&lt;!-- Comment --&gt;
</code></pre>
<p>Just plain comment, with trailing spaces on the line:</p>
<!-- foo -->
<p>Code:</p>
<pre><code>&lt;hr /&gt;
</code></pre>
<p>Hr's:</p>
<hr>
<hr/>
<hr />
<hr>
<hr/>
<hr />
<hr class="foo" id="bar" />
<hr class="foo" id="bar"/>
<hr class="foo" id="bar" >

View File

@ -0,0 +1,69 @@
Here's a simple block:
<div>
foo
</div>
This should be a code block, though:
<div>
foo
</div>
As should this:
<div>foo</div>
Now, nested:
<div>
<div>
<div>
foo
</div>
</div>
</div>
This should just be an HTML comment:
<!-- Comment -->
Multiline:
<!--
Blah
Blah
-->
Code block:
<!-- Comment -->
Just plain comment, with trailing spaces on the line:
<!-- foo -->
Code:
<hr />
Hr's:
<hr>
<hr/>
<hr />
<hr>
<hr/>
<hr />
<hr class="foo" id="bar" />
<hr class="foo" id="bar"/>
<hr class="foo" id="bar" >

View File

@ -0,0 +1,13 @@
<p>Paragraph one.</p>
<!-- This is a simple comment -->
<!--
This is another comment.
-->
<p>Paragraph two.</p>
<!-- one comment block -- -- with two comments -->
<p>The end.</p>

View File

@ -0,0 +1,13 @@
Paragraph one.
<!-- This is a simple comment -->
<!--
This is another comment.
-->
Paragraph two.
<!-- one comment block -- -- with two comments -->
The end.

View File

@ -0,0 +1,11 @@
<p>Just a <a href="/url/">URL</a>.</p>
<p><a href="/url/" title="title">URL and title</a>.</p>
<p><a href="/url/" title="title preceded by two spaces">URL and title</a>.</p>
<p><a href="/url/" title="title preceded by a tab">URL and title</a>.</p>
<p><a href="/url/" title="title has spaces afterward">URL and title</a>.</p>
<p><a href="">Empty</a>.</p>

View File

@ -0,0 +1,12 @@
Just a [URL](/url/).
[URL and title](/url/ "title").
[URL and title](/url/ "title preceded by two spaces").
[URL and title](/url/ "title preceded by a tab").
[URL and title](/url/ "title has spaces afterward" ).
[Empty]().

View File

@ -0,0 +1,52 @@
<p>Foo <a href="/url/" title="Title">bar</a>.</p>
<p>Foo <a href="/url/" title="Title">bar</a>.</p>
<p>Foo <a href="/url/" title="Title">bar</a>.</p>
<p>With <a href="/url/">embedded [brackets]</a>.</p>
<p>Indented <a href="/url">once</a>.</p>
<p>Indented <a href="/url">twice</a>.</p>
<p>Indented <a href="/url">thrice</a>.</p>
<p>Indented [four][] times.</p>
<pre><code>[four]: /url
</code></pre>
<hr />
<p><a href="foo">this</a> should work</p>
<p>So should <a href="foo">this</a>.</p>
<p>And <a href="foo">this</a>.</p>
<p>And <a href="foo">this</a>.</p>
<p>And <a href="foo">this</a>.</p>
<p>But not [that] [].</p>
<p>Nor [that][].</p>
<p>Nor [that].</p>
<p>[Something in brackets like <a href="foo">this</a> should work]</p>
<p>[Same with <a href="foo">this</a>.]</p>
<p>In this case, <a href="/somethingelse/">this</a> points to something else.</p>
<p>Backslashing should suppress [this] and [this].</p>
<hr />
<p>Here's one where the <a href="/url/">link
breaks</a> across lines.</p>
<p>Here's another where the <a href="/url/">link
breaks</a> across lines, but with a line-ending space.</p>

View File

@ -0,0 +1,71 @@
Foo [bar] [1].
Foo [bar][1].
Foo [bar]
[1].
[1]: /url/ "Title"
With [embedded [brackets]] [b].
Indented [once][].
Indented [twice][].
Indented [thrice][].
Indented [four][] times.
[once]: /url
[twice]: /url
[thrice]: /url
[four]: /url
[b]: /url/
* * *
[this] [this] should work
So should [this][this].
And [this] [].
And [this][].
And [this].
But not [that] [].
Nor [that][].
Nor [that].
[Something in brackets like [this][] should work]
[Same with [this].]
In this case, [this](/somethingelse/) points to something else.
Backslashing should suppress \[this] and [this\].
[this]: foo
* * *
Here's one where the [link
breaks] across lines.
Here's another where the [link
breaks] across lines, but with a line-ending space.
[link breaks]: /url/

View File

@ -0,0 +1,9 @@
<p>This is the <a href="/simple">simple case</a>.</p>
<p>This one has a <a href="/foo">line
break</a>.</p>
<p>This one has a <a href="/foo">line
break</a> with a line-ending space.</p>
<p><a href="/that">this</a> and the <a href="/other">other</a></p>

View File

@ -0,0 +1,20 @@
This is the [simple case].
[simple case]: /simple
This one has a [line
break].
This one has a [line
break] with a line-ending space.
[line break]: /foo
[this] [that] and the [other]
[this]: /this
[that]: /that
[other]: /other

View File

@ -0,0 +1,3 @@
<p>Foo <a href="/url/" title="Title with &quot;quotes&quot; inside">bar</a>.</p>
<p>Foo <a href="/url/" title="Title with &quot;quotes&quot; inside">bar</a>.</p>

View File

@ -0,0 +1,7 @@
Foo [bar][].
Foo [bar](/url/ "Title with "quotes" inside").
[bar]: /url/ "Title with "quotes" inside"

View File

@ -0,0 +1,314 @@
<h1>Markdown: Basics</h1>
<ul id="ProjectSubmenu">
<li><a href="/projects/markdown/" title="Markdown Project Page">Main</a></li>
<li><a class="selected" title="Markdown Basics">Basics</a></li>
<li><a href="/projects/markdown/syntax" title="Markdown Syntax Documentation">Syntax</a></li>
<li><a href="/projects/markdown/license" title="Pricing and License Information">License</a></li>
<li><a href="/projects/markdown/dingus" title="Online Markdown Web Form">Dingus</a></li>
</ul>
<h2>Getting the Gist of Markdown's Formatting Syntax</h2>
<p>This page offers a brief overview of what it's like to use Markdown.
The <a href="/projects/markdown/syntax" title="Markdown Syntax">syntax page</a> provides complete, detailed documentation for
every feature, but Markdown should be very easy to pick up simply by
looking at a few examples of it in action. The examples on this page
are written in a before/after style, showing example syntax and the
HTML output produced by Markdown.</p>
<p>It's also helpful to simply try Markdown out; the <a href="/projects/markdown/dingus" title="Markdown Dingus">Dingus</a> is a
web application that allows you type your own Markdown-formatted text
and translate it to XHTML.</p>
<p><strong>Note:</strong> This document is itself written using Markdown; you
can <a href="/projects/markdown/basics.text">see the source for it by adding '.text' to the URL</a>.</p>
<h2>Paragraphs, Headers, Blockquotes</h2>
<p>A paragraph is simply one or more consecutive lines of text, separated
by one or more blank lines. (A blank line is any line that looks like a
blank line -- a line containing nothing spaces or tabs is considered
blank.) Normal paragraphs should not be intended with spaces or tabs.</p>
<p>Markdown offers two styles of headers: <em>Setext</em> and <em>atx</em>.
Setext-style headers for <code>&lt;h1&gt;</code> and <code>&lt;h2&gt;</code> are created by
"underlining" with equal signs (<code>=</code>) and hyphens (<code>-</code>), respectively.
To create an atx-style header, you put 1-6 hash marks (<code>#</code>) at the
beginning of the line -- the number of hashes equals the resulting
HTML header level.</p>
<p>Blockquotes are indicated using email-style '<code>&gt;</code>' angle brackets.</p>
<p>Markdown:</p>
<pre><code>A First Level Header
====================
A Second Level Header
---------------------
Now is the time for all good men to come to
the aid of their country. This is just a
regular paragraph.
The quick brown fox jumped over the lazy
dog's back.
### Header 3
&gt; This is a blockquote.
&gt;
&gt; This is the second paragraph in the blockquote.
&gt;
&gt; ## This is an H2 in a blockquote
</code></pre>
<p>Output:</p>
<pre><code>&lt;h1&gt;A First Level Header&lt;/h1&gt;
&lt;h2&gt;A Second Level Header&lt;/h2&gt;
&lt;p&gt;Now is the time for all good men to come to
the aid of their country. This is just a
regular paragraph.&lt;/p&gt;
&lt;p&gt;The quick brown fox jumped over the lazy
dog's back.&lt;/p&gt;
&lt;h3&gt;Header 3&lt;/h3&gt;
&lt;blockquote&gt;
&lt;p&gt;This is a blockquote.&lt;/p&gt;
&lt;p&gt;This is the second paragraph in the blockquote.&lt;/p&gt;
&lt;h2&gt;This is an H2 in a blockquote&lt;/h2&gt;
&lt;/blockquote&gt;
</code></pre>
<h3>Phrase Emphasis</h3>
<p>Markdown uses asterisks and underscores to indicate spans of emphasis.</p>
<p>Markdown:</p>
<pre><code>Some of these words *are emphasized*.
Some of these words _are emphasized also_.
Use two asterisks for **strong emphasis**.
Or, if you prefer, __use two underscores instead__.
</code></pre>
<p>Output:</p>
<pre><code>&lt;p&gt;Some of these words &lt;em&gt;are emphasized&lt;/em&gt;.
Some of these words &lt;em&gt;are emphasized also&lt;/em&gt;.&lt;/p&gt;
&lt;p&gt;Use two asterisks for &lt;strong&gt;strong emphasis&lt;/strong&gt;.
Or, if you prefer, &lt;strong&gt;use two underscores instead&lt;/strong&gt;.&lt;/p&gt;
</code></pre>
<h2>Lists</h2>
<p>Unordered (bulleted) lists use asterisks, pluses, and hyphens (<code>*</code>,
<code>+</code>, and <code>-</code>) as list markers. These three markers are
interchangable; this:</p>
<pre><code>* Candy.
* Gum.
* Booze.
</code></pre>
<p>this:</p>
<pre><code>+ Candy.
+ Gum.
+ Booze.
</code></pre>
<p>and this:</p>
<pre><code>- Candy.
- Gum.
- Booze.
</code></pre>
<p>all produce the same output:</p>
<pre><code>&lt;ul&gt;
&lt;li&gt;Candy.&lt;/li&gt;
&lt;li&gt;Gum.&lt;/li&gt;
&lt;li&gt;Booze.&lt;/li&gt;
&lt;/ul&gt;
</code></pre>
<p>Ordered (numbered) lists use regular numbers, followed by periods, as
list markers:</p>
<pre><code>1. Red
2. Green
3. Blue
</code></pre>
<p>Output:</p>
<pre><code>&lt;ol&gt;
&lt;li&gt;Red&lt;/li&gt;
&lt;li&gt;Green&lt;/li&gt;
&lt;li&gt;Blue&lt;/li&gt;
&lt;/ol&gt;
</code></pre>
<p>If you put blank lines between items, you'll get <code>&lt;p&gt;</code> tags for the
list item text. You can create multi-paragraph list items by indenting
the paragraphs by 4 spaces or 1 tab:</p>
<pre><code>* A list item.
With multiple paragraphs.
* Another item in the list.
</code></pre>
<p>Output:</p>
<pre><code>&lt;ul&gt;
&lt;li&gt;&lt;p&gt;A list item.&lt;/p&gt;
&lt;p&gt;With multiple paragraphs.&lt;/p&gt;&lt;/li&gt;
&lt;li&gt;&lt;p&gt;Another item in the list.&lt;/p&gt;&lt;/li&gt;
&lt;/ul&gt;
</code></pre>
<h3>Links</h3>
<p>Markdown supports two styles for creating links: <em>inline</em> and
<em>reference</em>. With both styles, you use square brackets to delimit the
text you want to turn into a link.</p>
<p>Inline-style links use parentheses immediately after the link text.
For example:</p>
<pre><code>This is an [example link](http://example.com/).
</code></pre>
<p>Output:</p>
<pre><code>&lt;p&gt;This is an &lt;a href="http://example.com/"&gt;
example link&lt;/a&gt;.&lt;/p&gt;
</code></pre>
<p>Optionally, you may include a title attribute in the parentheses:</p>
<pre><code>This is an [example link](http://example.com/ "With a Title").
</code></pre>
<p>Output:</p>
<pre><code>&lt;p&gt;This is an &lt;a href="http://example.com/" title="With a Title"&gt;
example link&lt;/a&gt;.&lt;/p&gt;
</code></pre>
<p>Reference-style links allow you to refer to your links by names, which
you define elsewhere in your document:</p>
<pre><code>I get 10 times more traffic from [Google][1] than from
[Yahoo][2] or [MSN][3].
[1]: http://google.com/ "Google"
[2]: http://search.yahoo.com/ "Yahoo Search"
[3]: http://search.msn.com/ "MSN Search"
</code></pre>
<p>Output:</p>
<pre><code>&lt;p&gt;I get 10 times more traffic from &lt;a href="http://google.com/"
title="Google"&gt;Google&lt;/a&gt; than from &lt;a href="http://search.yahoo.com/"
title="Yahoo Search"&gt;Yahoo&lt;/a&gt; or &lt;a href="http://search.msn.com/"
title="MSN Search"&gt;MSN&lt;/a&gt;.&lt;/p&gt;
</code></pre>
<p>The title attribute is optional. Link names may contain letters,
numbers and spaces, but are <em>not</em> case sensitive:</p>
<pre><code>I start my morning with a cup of coffee and
[The New York Times][NY Times].
[ny times]: http://www.nytimes.com/
</code></pre>
<p>Output:</p>
<pre><code>&lt;p&gt;I start my morning with a cup of coffee and
&lt;a href="http://www.nytimes.com/"&gt;The New York Times&lt;/a&gt;.&lt;/p&gt;
</code></pre>
<h3>Images</h3>
<p>Image syntax is very much like link syntax.</p>
<p>Inline (titles are optional):</p>
<pre><code>![alt text](/path/to/img.jpg "Title")
</code></pre>
<p>Reference-style:</p>
<pre><code>![alt text][id]
[id]: /path/to/img.jpg "Title"
</code></pre>
<p>Both of the above examples produce the same output:</p>
<pre><code>&lt;img src="/path/to/img.jpg" alt="alt text" title="Title" /&gt;
</code></pre>
<h3>Code</h3>
<p>In a regular paragraph, you can create code span by wrapping text in
backtick quotes. Any ampersands (<code>&amp;</code>) and angle brackets (<code>&lt;</code> or
<code>&gt;</code>) will automatically be translated into HTML entities. This makes
it easy to use Markdown to write about HTML example code:</p>
<pre><code>I strongly recommend against using any `&lt;blink&gt;` tags.
I wish SmartyPants used named entities like `&amp;mdash;`
instead of decimal-encoded entites like `&amp;#8212;`.
</code></pre>
<p>Output:</p>
<pre><code>&lt;p&gt;I strongly recommend against using any
&lt;code&gt;&amp;lt;blink&amp;gt;&lt;/code&gt; tags.&lt;/p&gt;
&lt;p&gt;I wish SmartyPants used named entities like
&lt;code&gt;&amp;amp;mdash;&lt;/code&gt; instead of decimal-encoded
entites like &lt;code&gt;&amp;amp;#8212;&lt;/code&gt;.&lt;/p&gt;
</code></pre>
<p>To specify an entire block of pre-formatted code, indent every line of
the block by 4 spaces or 1 tab. Just like with code spans, <code>&amp;</code>, <code>&lt;</code>,
and <code>&gt;</code> characters will be escaped automatically.</p>
<p>Markdown:</p>
<pre><code>If you want your page to validate under XHTML 1.0 Strict,
you've got to put paragraph tags in your blockquotes:
&lt;blockquote&gt;
&lt;p&gt;For example.&lt;/p&gt;
&lt;/blockquote&gt;
</code></pre>
<p>Output:</p>
<pre><code>&lt;p&gt;If you want your page to validate under XHTML 1.0 Strict,
you've got to put paragraph tags in your blockquotes:&lt;/p&gt;
&lt;pre&gt;&lt;code&gt;&amp;lt;blockquote&amp;gt;
&amp;lt;p&amp;gt;For example.&amp;lt;/p&amp;gt;
&amp;lt;/blockquote&amp;gt;
&lt;/code&gt;&lt;/pre&gt;
</code></pre>

View File

@ -0,0 +1,306 @@
Markdown: Basics
================
<ul id="ProjectSubmenu">
<li><a href="/projects/markdown/" title="Markdown Project Page">Main</a></li>
<li><a class="selected" title="Markdown Basics">Basics</a></li>
<li><a href="/projects/markdown/syntax" title="Markdown Syntax Documentation">Syntax</a></li>
<li><a href="/projects/markdown/license" title="Pricing and License Information">License</a></li>
<li><a href="/projects/markdown/dingus" title="Online Markdown Web Form">Dingus</a></li>
</ul>
Getting the Gist of Markdown's Formatting Syntax
------------------------------------------------
This page offers a brief overview of what it's like to use Markdown.
The [syntax page] [s] provides complete, detailed documentation for
every feature, but Markdown should be very easy to pick up simply by
looking at a few examples of it in action. The examples on this page
are written in a before/after style, showing example syntax and the
HTML output produced by Markdown.
It's also helpful to simply try Markdown out; the [Dingus] [d] is a
web application that allows you type your own Markdown-formatted text
and translate it to XHTML.
**Note:** This document is itself written using Markdown; you
can [see the source for it by adding '.text' to the URL] [src].
[s]: /projects/markdown/syntax "Markdown Syntax"
[d]: /projects/markdown/dingus "Markdown Dingus"
[src]: /projects/markdown/basics.text
## Paragraphs, Headers, Blockquotes ##
A paragraph is simply one or more consecutive lines of text, separated
by one or more blank lines. (A blank line is any line that looks like a
blank line -- a line containing nothing spaces or tabs is considered
blank.) Normal paragraphs should not be intended with spaces or tabs.
Markdown offers two styles of headers: *Setext* and *atx*.
Setext-style headers for `<h1>` and `<h2>` are created by
"underlining" with equal signs (`=`) and hyphens (`-`), respectively.
To create an atx-style header, you put 1-6 hash marks (`#`) at the
beginning of the line -- the number of hashes equals the resulting
HTML header level.
Blockquotes are indicated using email-style '`>`' angle brackets.
Markdown:
A First Level Header
====================
A Second Level Header
---------------------
Now is the time for all good men to come to
the aid of their country. This is just a
regular paragraph.
The quick brown fox jumped over the lazy
dog's back.
### Header 3
> This is a blockquote.
>
> This is the second paragraph in the blockquote.
>
> ## This is an H2 in a blockquote
Output:
<h1>A First Level Header</h1>
<h2>A Second Level Header</h2>
<p>Now is the time for all good men to come to
the aid of their country. This is just a
regular paragraph.</p>
<p>The quick brown fox jumped over the lazy
dog's back.</p>
<h3>Header 3</h3>
<blockquote>
<p>This is a blockquote.</p>
<p>This is the second paragraph in the blockquote.</p>
<h2>This is an H2 in a blockquote</h2>
</blockquote>
### Phrase Emphasis ###
Markdown uses asterisks and underscores to indicate spans of emphasis.
Markdown:
Some of these words *are emphasized*.
Some of these words _are emphasized also_.
Use two asterisks for **strong emphasis**.
Or, if you prefer, __use two underscores instead__.
Output:
<p>Some of these words <em>are emphasized</em>.
Some of these words <em>are emphasized also</em>.</p>
<p>Use two asterisks for <strong>strong emphasis</strong>.
Or, if you prefer, <strong>use two underscores instead</strong>.</p>
## Lists ##
Unordered (bulleted) lists use asterisks, pluses, and hyphens (`*`,
`+`, and `-`) as list markers. These three markers are
interchangable; this:
* Candy.
* Gum.
* Booze.
this:
+ Candy.
+ Gum.
+ Booze.
and this:
- Candy.
- Gum.
- Booze.
all produce the same output:
<ul>
<li>Candy.</li>
<li>Gum.</li>
<li>Booze.</li>
</ul>
Ordered (numbered) lists use regular numbers, followed by periods, as
list markers:
1. Red
2. Green
3. Blue
Output:
<ol>
<li>Red</li>
<li>Green</li>
<li>Blue</li>
</ol>
If you put blank lines between items, you'll get `<p>` tags for the
list item text. You can create multi-paragraph list items by indenting
the paragraphs by 4 spaces or 1 tab:
* A list item.
With multiple paragraphs.
* Another item in the list.
Output:
<ul>
<li><p>A list item.</p>
<p>With multiple paragraphs.</p></li>
<li><p>Another item in the list.</p></li>
</ul>
### Links ###
Markdown supports two styles for creating links: *inline* and
*reference*. With both styles, you use square brackets to delimit the
text you want to turn into a link.
Inline-style links use parentheses immediately after the link text.
For example:
This is an [example link](http://example.com/).
Output:
<p>This is an <a href="http://example.com/">
example link</a>.</p>
Optionally, you may include a title attribute in the parentheses:
This is an [example link](http://example.com/ "With a Title").
Output:
<p>This is an <a href="http://example.com/" title="With a Title">
example link</a>.</p>
Reference-style links allow you to refer to your links by names, which
you define elsewhere in your document:
I get 10 times more traffic from [Google][1] than from
[Yahoo][2] or [MSN][3].
[1]: http://google.com/ "Google"
[2]: http://search.yahoo.com/ "Yahoo Search"
[3]: http://search.msn.com/ "MSN Search"
Output:
<p>I get 10 times more traffic from <a href="http://google.com/"
title="Google">Google</a> than from <a href="http://search.yahoo.com/"
title="Yahoo Search">Yahoo</a> or <a href="http://search.msn.com/"
title="MSN Search">MSN</a>.</p>
The title attribute is optional. Link names may contain letters,
numbers and spaces, but are *not* case sensitive:
I start my morning with a cup of coffee and
[The New York Times][NY Times].
[ny times]: http://www.nytimes.com/
Output:
<p>I start my morning with a cup of coffee and
<a href="http://www.nytimes.com/">The New York Times</a>.</p>
### Images ###
Image syntax is very much like link syntax.
Inline (titles are optional):
![alt text](/path/to/img.jpg "Title")
Reference-style:
![alt text][id]
[id]: /path/to/img.jpg "Title"
Both of the above examples produce the same output:
<img src="/path/to/img.jpg" alt="alt text" title="Title" />
### Code ###
In a regular paragraph, you can create code span by wrapping text in
backtick quotes. Any ampersands (`&`) and angle brackets (`<` or
`>`) will automatically be translated into HTML entities. This makes
it easy to use Markdown to write about HTML example code:
I strongly recommend against using any `<blink>` tags.
I wish SmartyPants used named entities like `&mdash;`
instead of decimal-encoded entites like `&#8212;`.
Output:
<p>I strongly recommend against using any
<code>&lt;blink&gt;</code> tags.</p>
<p>I wish SmartyPants used named entities like
<code>&amp;mdash;</code> instead of decimal-encoded
entites like <code>&amp;#8212;</code>.</p>
To specify an entire block of pre-formatted code, indent every line of
the block by 4 spaces or 1 tab. Just like with code spans, `&`, `<`,
and `>` characters will be escaped automatically.
Markdown:
If you want your page to validate under XHTML 1.0 Strict,
you've got to put paragraph tags in your blockquotes:
<blockquote>
<p>For example.</p>
</blockquote>
Output:
<p>If you want your page to validate under XHTML 1.0 Strict,
you've got to put paragraph tags in your blockquotes:</p>
<pre><code>&lt;blockquote&gt;
&lt;p&gt;For example.&lt;/p&gt;
&lt;/blockquote&gt;
</code></pre>

View File

@ -0,0 +1,942 @@
<h1>Markdown: Syntax</h1>
<ul id="ProjectSubmenu">
<li><a href="/projects/markdown/" title="Markdown Project Page">Main</a></li>
<li><a href="/projects/markdown/basics" title="Markdown Basics">Basics</a></li>
<li><a class="selected" title="Markdown Syntax Documentation">Syntax</a></li>
<li><a href="/projects/markdown/license" title="Pricing and License Information">License</a></li>
<li><a href="/projects/markdown/dingus" title="Online Markdown Web Form">Dingus</a></li>
</ul>
<ul>
<li><a href="#overview">Overview</a>
<ul>
<li><a href="#philosophy">Philosophy</a></li>
<li><a href="#html">Inline HTML</a></li>
<li><a href="#autoescape">Automatic Escaping for Special Characters</a></li>
</ul></li>
<li><a href="#block">Block Elements</a>
<ul>
<li><a href="#p">Paragraphs and Line Breaks</a></li>
<li><a href="#header">Headers</a></li>
<li><a href="#blockquote">Blockquotes</a></li>
<li><a href="#list">Lists</a></li>
<li><a href="#precode">Code Blocks</a></li>
<li><a href="#hr">Horizontal Rules</a></li>
</ul></li>
<li><a href="#span">Span Elements</a>
<ul>
<li><a href="#link">Links</a></li>
<li><a href="#em">Emphasis</a></li>
<li><a href="#code">Code</a></li>
<li><a href="#img">Images</a></li>
</ul></li>
<li><a href="#misc">Miscellaneous</a>
<ul>
<li><a href="#backslash">Backslash Escapes</a></li>
<li><a href="#autolink">Automatic Links</a></li>
</ul></li>
</ul>
<p><strong>Note:</strong> This document is itself written using Markdown; you
can <a href="/projects/markdown/syntax.text">see the source for it by adding '.text' to the URL</a>.</p>
<hr />
<h2 id="overview">Overview</h2>
<h3 id="philosophy">Philosophy</h3>
<p>Markdown is intended to be as easy-to-read and easy-to-write as is feasible.</p>
<p>Readability, however, is emphasized above all else. A Markdown-formatted
document should be publishable as-is, as plain text, without looking
like it's been marked up with tags or formatting instructions. While
Markdown's syntax has been influenced by several existing text-to-HTML
filters -- including <a href="http://docutils.sourceforge.net/mirror/setext.html">Setext</a>, <a href="http://www.aaronsw.com/2002/atx/">atx</a>, <a href="http://textism.com/tools/textile/">Textile</a>, <a href="http://docutils.sourceforge.net/rst.html">reStructuredText</a>,
<a href="http://www.triptico.com/software/grutatxt.html">Grutatext</a>, and <a href="http://ettext.taint.org/doc/">EtText</a> -- the single biggest source of
inspiration for Markdown's syntax is the format of plain text email.</p>
<p>To this end, Markdown's syntax is comprised entirely of punctuation
characters, which punctuation characters have been carefully chosen so
as to look like what they mean. E.g., asterisks around a word actually
look like *emphasis*. Markdown lists look like, well, lists. Even
blockquotes look like quoted passages of text, assuming you've ever
used email.</p>
<h3 id="html">Inline HTML</h3>
<p>Markdown's syntax is intended for one purpose: to be used as a
format for <em>writing</em> for the web.</p>
<p>Markdown is not a replacement for HTML, or even close to it. Its
syntax is very small, corresponding only to a very small subset of
HTML tags. The idea is <em>not</em> to create a syntax that makes it easier
to insert HTML tags. In my opinion, HTML tags are already easy to
insert. The idea for Markdown is to make it easy to read, write, and
edit prose. HTML is a <em>publishing</em> format; Markdown is a <em>writing</em>
format. Thus, Markdown's formatting syntax only addresses issues that
can be conveyed in plain text.</p>
<p>For any markup that is not covered by Markdown's syntax, you simply
use HTML itself. There's no need to preface it or delimit it to
indicate that you're switching from Markdown to HTML; you just use
the tags.</p>
<p>The only restrictions are that block-level HTML elements -- e.g. <code>&lt;div&gt;</code>,
<code>&lt;table&gt;</code>, <code>&lt;pre&gt;</code>, <code>&lt;p&gt;</code>, etc. -- must be separated from surrounding
content by blank lines, and the start and end tags of the block should
not be indented with tabs or spaces. Markdown is smart enough not
to add extra (unwanted) <code>&lt;p&gt;</code> tags around HTML block-level tags.</p>
<p>For example, to add an HTML table to a Markdown article:</p>
<pre><code>This is a regular paragraph.
&lt;table&gt;
&lt;tr&gt;
&lt;td&gt;Foo&lt;/td&gt;
&lt;/tr&gt;
&lt;/table&gt;
This is another regular paragraph.
</code></pre>
<p>Note that Markdown formatting syntax is not processed within block-level
HTML tags. E.g., you can't use Markdown-style <code>*emphasis*</code> inside an
HTML block.</p>
<p>Span-level HTML tags -- e.g. <code>&lt;span&gt;</code>, <code>&lt;cite&gt;</code>, or <code>&lt;del&gt;</code> -- can be
used anywhere in a Markdown paragraph, list item, or header. If you
want, you can even use HTML tags instead of Markdown formatting; e.g. if
you'd prefer to use HTML <code>&lt;a&gt;</code> or <code>&lt;img&gt;</code> tags instead of Markdown's
link or image syntax, go right ahead.</p>
<p>Unlike block-level HTML tags, Markdown syntax <em>is</em> processed within
span-level tags.</p>
<h3 id="autoescape">Automatic Escaping for Special Characters</h3>
<p>In HTML, there are two characters that demand special treatment: <code>&lt;</code>
and <code>&amp;</code>. Left angle brackets are used to start tags; ampersands are
used to denote HTML entities. If you want to use them as literal
characters, you must escape them as entities, e.g. <code>&amp;lt;</code>, and
<code>&amp;amp;</code>.</p>
<p>Ampersands in particular are bedeviling for web writers. If you want to
write about 'AT&amp;T', you need to write '<code>AT&amp;amp;T</code>'. You even need to
escape ampersands within URLs. Thus, if you want to link to:</p>
<pre><code>http://images.google.com/images?num=30&amp;q=larry+bird
</code></pre>
<p>you need to encode the URL as:</p>
<pre><code>http://images.google.com/images?num=30&amp;amp;q=larry+bird
</code></pre>
<p>in your anchor tag <code>href</code> attribute. Needless to say, this is easy to
forget, and is probably the single most common source of HTML validation
errors in otherwise well-marked-up web sites.</p>
<p>Markdown allows you to use these characters naturally, taking care of
all the necessary escaping for you. If you use an ampersand as part of
an HTML entity, it remains unchanged; otherwise it will be translated
into <code>&amp;amp;</code>.</p>
<p>So, if you want to include a copyright symbol in your article, you can write:</p>
<pre><code>&amp;copy;
</code></pre>
<p>and Markdown will leave it alone. But if you write:</p>
<pre><code>AT&amp;T
</code></pre>
<p>Markdown will translate it to:</p>
<pre><code>AT&amp;amp;T
</code></pre>
<p>Similarly, because Markdown supports <a href="#html">inline HTML</a>, if you use
angle brackets as delimiters for HTML tags, Markdown will treat them as
such. But if you write:</p>
<pre><code>4 &lt; 5
</code></pre>
<p>Markdown will translate it to:</p>
<pre><code>4 &amp;lt; 5
</code></pre>
<p>However, inside Markdown code spans and blocks, angle brackets and
ampersands are <em>always</em> encoded automatically. This makes it easy to use
Markdown to write about HTML code. (As opposed to raw HTML, which is a
terrible format for writing about HTML syntax, because every single <code>&lt;</code>
and <code>&amp;</code> in your example code needs to be escaped.)</p>
<hr />
<h2 id="block">Block Elements</h2>
<h3 id="p">Paragraphs and Line Breaks</h3>
<p>A paragraph is simply one or more consecutive lines of text, separated
by one or more blank lines. (A blank line is any line that looks like a
blank line -- a line containing nothing but spaces or tabs is considered
blank.) Normal paragraphs should not be intended with spaces or tabs.</p>
<p>The implication of the "one or more consecutive lines of text" rule is
that Markdown supports "hard-wrapped" text paragraphs. This differs
significantly from most other text-to-HTML formatters (including Movable
Type's "Convert Line Breaks" option) which translate every line break
character in a paragraph into a <code>&lt;br /&gt;</code> tag.</p>
<p>When you <em>do</em> want to insert a <code>&lt;br /&gt;</code> break tag using Markdown, you
end a line with two or more spaces, then type return.</p>
<p>Yes, this takes a tad more effort to create a <code>&lt;br /&gt;</code>, but a simplistic
"every line break is a <code>&lt;br /&gt;</code>" rule wouldn't work for Markdown.
Markdown's email-style <a href="#blockquote">blockquoting</a> and multi-paragraph <a href="#list">list items</a>
work best -- and look better -- when you format them with hard breaks.</p>
<h3 id="header">Headers</h3>
<p>Markdown supports two styles of headers, <a href="http://docutils.sourceforge.net/mirror/setext.html">Setext</a> and <a href="http://www.aaronsw.com/2002/atx/">atx</a>.</p>
<p>Setext-style headers are "underlined" using equal signs (for first-level
headers) and dashes (for second-level headers). For example:</p>
<pre><code>This is an H1
=============
This is an H2
-------------
</code></pre>
<p>Any number of underlining <code>=</code>'s or <code>-</code>'s will work.</p>
<p>Atx-style headers use 1-6 hash characters at the start of the line,
corresponding to header levels 1-6. For example:</p>
<pre><code># This is an H1
## This is an H2
###### This is an H6
</code></pre>
<p>Optionally, you may "close" atx-style headers. This is purely
cosmetic -- you can use this if you think it looks better. The
closing hashes don't even need to match the number of hashes
used to open the header. (The number of opening hashes
determines the header level.) :</p>
<pre><code># This is an H1 #
## This is an H2 ##
### This is an H3 ######
</code></pre>
<h3 id="blockquote">Blockquotes</h3>
<p>Markdown uses email-style <code>&gt;</code> characters for blockquoting. If you're
familiar with quoting passages of text in an email message, then you
know how to create a blockquote in Markdown. It looks best if you hard
wrap the text and put a <code>&gt;</code> before every line:</p>
<pre><code>&gt; This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
&gt; consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.
&gt; Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.
&gt;
&gt; Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse
&gt; id sem consectetuer libero luctus adipiscing.
</code></pre>
<p>Markdown allows you to be lazy and only put the <code>&gt;</code> before the first
line of a hard-wrapped paragraph:</p>
<pre><code>&gt; This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.
Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.
&gt; Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse
id sem consectetuer libero luctus adipiscing.
</code></pre>
<p>Blockquotes can be nested (i.e. a blockquote-in-a-blockquote) by
adding additional levels of <code>&gt;</code>:</p>
<pre><code>&gt; This is the first level of quoting.
&gt;
&gt; &gt; This is nested blockquote.
&gt;
&gt; Back to the first level.
</code></pre>
<p>Blockquotes can contain other Markdown elements, including headers, lists,
and code blocks:</p>
<pre><code>&gt; ## This is a header.
&gt;
&gt; 1. This is the first list item.
&gt; 2. This is the second list item.
&gt;
&gt; Here's some example code:
&gt;
&gt; return shell_exec("echo $input | $markdown_script");
</code></pre>
<p>Any decent text editor should make email-style quoting easy. For
example, with BBEdit, you can make a selection and choose Increase
Quote Level from the Text menu.</p>
<h3 id="list">Lists</h3>
<p>Markdown supports ordered (numbered) and unordered (bulleted) lists.</p>
<p>Unordered lists use asterisks, pluses, and hyphens -- interchangably
-- as list markers:</p>
<pre><code>* Red
* Green
* Blue
</code></pre>
<p>is equivalent to:</p>
<pre><code>+ Red
+ Green
+ Blue
</code></pre>
<p>and:</p>
<pre><code>- Red
- Green
- Blue
</code></pre>
<p>Ordered lists use numbers followed by periods:</p>
<pre><code>1. Bird
2. McHale
3. Parish
</code></pre>
<p>It's important to note that the actual numbers you use to mark the
list have no effect on the HTML output Markdown produces. The HTML
Markdown produces from the above list is:</p>
<pre><code>&lt;ol&gt;
&lt;li&gt;Bird&lt;/li&gt;
&lt;li&gt;McHale&lt;/li&gt;
&lt;li&gt;Parish&lt;/li&gt;
&lt;/ol&gt;
</code></pre>
<p>If you instead wrote the list in Markdown like this:</p>
<pre><code>1. Bird
1. McHale
1. Parish
</code></pre>
<p>or even:</p>
<pre><code>3. Bird
1. McHale
8. Parish
</code></pre>
<p>you'd get the exact same HTML output. The point is, if you want to,
you can use ordinal numbers in your ordered Markdown lists, so that
the numbers in your source match the numbers in your published HTML.
But if you want to be lazy, you don't have to.</p>
<p>If you do use lazy list numbering, however, you should still start the
list with the number 1. At some point in the future, Markdown may support
starting ordered lists at an arbitrary number.</p>
<p>List markers typically start at the left margin, but may be indented by
up to three spaces. List markers must be followed by one or more spaces
or a tab.</p>
<p>To make lists look nice, you can wrap items with hanging indents:</p>
<pre><code>* Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi,
viverra nec, fringilla in, laoreet vitae, risus.
* Donec sit amet nisl. Aliquam semper ipsum sit amet velit.
Suspendisse id sem consectetuer libero luctus adipiscing.
</code></pre>
<p>But if you want to be lazy, you don't have to:</p>
<pre><code>* Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi,
viverra nec, fringilla in, laoreet vitae, risus.
* Donec sit amet nisl. Aliquam semper ipsum sit amet velit.
Suspendisse id sem consectetuer libero luctus adipiscing.
</code></pre>
<p>If list items are separated by blank lines, Markdown will wrap the
items in <code>&lt;p&gt;</code> tags in the HTML output. For example, this input:</p>
<pre><code>* Bird
* Magic
</code></pre>
<p>will turn into:</p>
<pre><code>&lt;ul&gt;
&lt;li&gt;Bird&lt;/li&gt;
&lt;li&gt;Magic&lt;/li&gt;
&lt;/ul&gt;
</code></pre>
<p>But this:</p>
<pre><code>* Bird
* Magic
</code></pre>
<p>will turn into:</p>
<pre><code>&lt;ul&gt;
&lt;li&gt;&lt;p&gt;Bird&lt;/p&gt;&lt;/li&gt;
&lt;li&gt;&lt;p&gt;Magic&lt;/p&gt;&lt;/li&gt;
&lt;/ul&gt;
</code></pre>
<p>List items may consist of multiple paragraphs. Each subsequent
paragraph in a list item must be intended by either 4 spaces
or one tab:</p>
<pre><code>1. This is a list item with two paragraphs. Lorem ipsum dolor
sit amet, consectetuer adipiscing elit. Aliquam hendrerit
mi posuere lectus.
Vestibulum enim wisi, viverra nec, fringilla in, laoreet
vitae, risus. Donec sit amet nisl. Aliquam semper ipsum
sit amet velit.
2. Suspendisse id sem consectetuer libero luctus adipiscing.
</code></pre>
<p>It looks nice if you indent every line of the subsequent
paragraphs, but here again, Markdown will allow you to be
lazy:</p>
<pre><code>* This is a list item with two paragraphs.
This is the second paragraph in the list item. You're
only required to indent the first line. Lorem ipsum dolor
sit amet, consectetuer adipiscing elit.
* Another item in the same list.
</code></pre>
<p>To put a blockquote within a list item, the blockquote's <code>&gt;</code>
delimiters need to be indented:</p>
<pre><code>* A list item with a blockquote:
&gt; This is a blockquote
&gt; inside a list item.
</code></pre>
<p>To put a code block within a list item, the code block needs
to be indented <em>twice</em> -- 8 spaces or two tabs:</p>
<pre><code>* A list item with a code block:
&lt;code goes here&gt;
</code></pre>
<p>It's worth noting that it's possible to trigger an ordered list by
accident, by writing something like this:</p>
<pre><code>1986. What a great season.
</code></pre>
<p>In other words, a <em>number-period-space</em> sequence at the beginning of a
line. To avoid this, you can backslash-escape the period:</p>
<pre><code>1986\. What a great season.
</code></pre>
<h3 id="precode">Code Blocks</h3>
<p>Pre-formatted code blocks are used for writing about programming or
markup source code. Rather than forming normal paragraphs, the lines
of a code block are interpreted literally. Markdown wraps a code block
in both <code>&lt;pre&gt;</code> and <code>&lt;code&gt;</code> tags.</p>
<p>To produce a code block in Markdown, simply indent every line of the
block by at least 4 spaces or 1 tab. For example, given this input:</p>
<pre><code>This is a normal paragraph:
This is a code block.
</code></pre>
<p>Markdown will generate:</p>
<pre><code>&lt;p&gt;This is a normal paragraph:&lt;/p&gt;
&lt;pre&gt;&lt;code&gt;This is a code block.
&lt;/code&gt;&lt;/pre&gt;
</code></pre>
<p>One level of indentation -- 4 spaces or 1 tab -- is removed from each
line of the code block. For example, this:</p>
<pre><code>Here is an example of AppleScript:
tell application "Foo"
beep
end tell
</code></pre>
<p>will turn into:</p>
<pre><code>&lt;p&gt;Here is an example of AppleScript:&lt;/p&gt;
&lt;pre&gt;&lt;code&gt;tell application "Foo"
beep
end tell
&lt;/code&gt;&lt;/pre&gt;
</code></pre>
<p>A code block continues until it reaches a line that is not indented
(or the end of the article).</p>
<p>Within a code block, ampersands (<code>&amp;</code>) and angle brackets (<code>&lt;</code> and <code>&gt;</code>)
are automatically converted into HTML entities. This makes it very
easy to include example HTML source code using Markdown -- just paste
it and indent it, and Markdown will handle the hassle of encoding the
ampersands and angle brackets. For example, this:</p>
<pre><code> &lt;div class="footer"&gt;
&amp;copy; 2004 Foo Corporation
&lt;/div&gt;
</code></pre>
<p>will turn into:</p>
<pre><code>&lt;pre&gt;&lt;code&gt;&amp;lt;div class="footer"&amp;gt;
&amp;amp;copy; 2004 Foo Corporation
&amp;lt;/div&amp;gt;
&lt;/code&gt;&lt;/pre&gt;
</code></pre>
<p>Regular Markdown syntax is not processed within code blocks. E.g.,
asterisks are just literal asterisks within a code block. This means
it's also easy to use Markdown to write about Markdown's own syntax.</p>
<h3 id="hr">Horizontal Rules</h3>
<p>You can produce a horizontal rule tag (<code>&lt;hr /&gt;</code>) by placing three or
more hyphens, asterisks, or underscores on a line by themselves. If you
wish, you may use spaces between the hyphens or asterisks. Each of the
following lines will produce a horizontal rule:</p>
<pre><code>* * *
***
*****
- - -
---------------------------------------
_ _ _
</code></pre>
<hr />
<h2 id="span">Span Elements</h2>
<h3 id="link">Links</h3>
<p>Markdown supports two style of links: <em>inline</em> and <em>reference</em>.</p>
<p>In both styles, the link text is delimited by [square brackets].</p>
<p>To create an inline link, use a set of regular parentheses immediately
after the link text's closing square bracket. Inside the parentheses,
put the URL where you want the link to point, along with an <em>optional</em>
title for the link, surrounded in quotes. For example:</p>
<pre><code>This is [an example](http://example.com/ "Title") inline link.
[This link](http://example.net/) has no title attribute.
</code></pre>
<p>Will produce:</p>
<pre><code>&lt;p&gt;This is &lt;a href="http://example.com/" title="Title"&gt;
an example&lt;/a&gt; inline link.&lt;/p&gt;
&lt;p&gt;&lt;a href="http://example.net/"&gt;This link&lt;/a&gt; has no
title attribute.&lt;/p&gt;
</code></pre>
<p>If you're referring to a local resource on the same server, you can
use relative paths:</p>
<pre><code>See my [About](/about/) page for details.
</code></pre>
<p>Reference-style links use a second set of square brackets, inside
which you place a label of your choosing to identify the link:</p>
<pre><code>This is [an example][id] reference-style link.
</code></pre>
<p>You can optionally use a space to separate the sets of brackets:</p>
<pre><code>This is [an example] [id] reference-style link.
</code></pre>
<p>Then, anywhere in the document, you define your link label like this,
on a line by itself:</p>
<pre><code>[id]: http://example.com/ "Optional Title Here"
</code></pre>
<p>That is:</p>
<ul>
<li>Square brackets containing the link identifier (optionally
indented from the left margin using up to three spaces);</li>
<li>followed by a colon;</li>
<li>followed by one or more spaces (or tabs);</li>
<li>followed by the URL for the link;</li>
<li>optionally followed by a title attribute for the link, enclosed
in double or single quotes.</li>
</ul>
<p>The link URL may, optionally, be surrounded by angle brackets:</p>
<pre><code>[id]: &lt;http://example.com/&gt; "Optional Title Here"
</code></pre>
<p>You can put the title attribute on the next line and use extra spaces
or tabs for padding, which tends to look better with longer URLs:</p>
<pre><code>[id]: http://example.com/longish/path/to/resource/here
"Optional Title Here"
</code></pre>
<p>Link definitions are only used for creating links during Markdown
processing, and are stripped from your document in the HTML output.</p>
<p>Link definition names may constist of letters, numbers, spaces, and punctuation -- but they are <em>not</em> case sensitive. E.g. these two links:</p>
<pre><code>[link text][a]
[link text][A]
</code></pre>
<p>are equivalent.</p>
<p>The <em>implicit link name</em> shortcut allows you to omit the name of the
link, in which case the link text itself is used as the name.
Just use an empty set of square brackets -- e.g., to link the word
"Google" to the google.com web site, you could simply write:</p>
<pre><code>[Google][]
</code></pre>
<p>And then define the link:</p>
<pre><code>[Google]: http://google.com/
</code></pre>
<p>Because link names may contain spaces, this shortcut even works for
multiple words in the link text:</p>
<pre><code>Visit [Daring Fireball][] for more information.
</code></pre>
<p>And then define the link:</p>
<pre><code>[Daring Fireball]: http://daringfireball.net/
</code></pre>
<p>Link definitions can be placed anywhere in your Markdown document. I
tend to put them immediately after each paragraph in which they're
used, but if you want, you can put them all at the end of your
document, sort of like footnotes.</p>
<p>Here's an example of reference links in action:</p>
<pre><code>I get 10 times more traffic from [Google] [1] than from
[Yahoo] [2] or [MSN] [3].
[1]: http://google.com/ "Google"
[2]: http://search.yahoo.com/ "Yahoo Search"
[3]: http://search.msn.com/ "MSN Search"
</code></pre>
<p>Using the implicit link name shortcut, you could instead write:</p>
<pre><code>I get 10 times more traffic from [Google][] than from
[Yahoo][] or [MSN][].
[google]: http://google.com/ "Google"
[yahoo]: http://search.yahoo.com/ "Yahoo Search"
[msn]: http://search.msn.com/ "MSN Search"
</code></pre>
<p>Both of the above examples will produce the following HTML output:</p>
<pre><code>&lt;p&gt;I get 10 times more traffic from &lt;a href="http://google.com/"
title="Google"&gt;Google&lt;/a&gt; than from
&lt;a href="http://search.yahoo.com/" title="Yahoo Search"&gt;Yahoo&lt;/a&gt;
or &lt;a href="http://search.msn.com/" title="MSN Search"&gt;MSN&lt;/a&gt;.&lt;/p&gt;
</code></pre>
<p>For comparison, here is the same paragraph written using
Markdown's inline link style:</p>
<pre><code>I get 10 times more traffic from [Google](http://google.com/ "Google")
than from [Yahoo](http://search.yahoo.com/ "Yahoo Search") or
[MSN](http://search.msn.com/ "MSN Search").
</code></pre>
<p>The point of reference-style links is not that they're easier to
write. The point is that with reference-style links, your document
source is vastly more readable. Compare the above examples: using
reference-style links, the paragraph itself is only 81 characters
long; with inline-style links, it's 176 characters; and as raw HTML,
it's 234 characters. In the raw HTML, there's more markup than there
is text.</p>
<p>With Markdown's reference-style links, a source document much more
closely resembles the final output, as rendered in a browser. By
allowing you to move the markup-related metadata out of the paragraph,
you can add links without interrupting the narrative flow of your
prose.</p>
<h3 id="em">Emphasis</h3>
<p>Markdown treats asterisks (<code>*</code>) and underscores (<code>_</code>) as indicators of
emphasis. Text wrapped with one <code>*</code> or <code>_</code> will be wrapped with an
HTML <code>&lt;em&gt;</code> tag; double <code>*</code>'s or <code>_</code>'s will be wrapped with an HTML
<code>&lt;strong&gt;</code> tag. E.g., this input:</p>
<pre><code>*single asterisks*
_single underscores_
**double asterisks**
__double underscores__
</code></pre>
<p>will produce:</p>
<pre><code>&lt;em&gt;single asterisks&lt;/em&gt;
&lt;em&gt;single underscores&lt;/em&gt;
&lt;strong&gt;double asterisks&lt;/strong&gt;
&lt;strong&gt;double underscores&lt;/strong&gt;
</code></pre>
<p>You can use whichever style you prefer; the lone restriction is that
the same character must be used to open and close an emphasis span.</p>
<p>Emphasis can be used in the middle of a word:</p>
<pre><code>un*fucking*believable
</code></pre>
<p>But if you surround an <code>*</code> or <code>_</code> with spaces, it'll be treated as a
literal asterisk or underscore.</p>
<p>To produce a literal asterisk or underscore at a position where it
would otherwise be used as an emphasis delimiter, you can backslash
escape it:</p>
<pre><code>\*this text is surrounded by literal asterisks\*
</code></pre>
<h3 id="code">Code</h3>
<p>To indicate a span of code, wrap it with backtick quotes (<code>`</code>).
Unlike a pre-formatted code block, a code span indicates code within a
normal paragraph. For example:</p>
<pre><code>Use the `printf()` function.
</code></pre>
<p>will produce:</p>
<pre><code>&lt;p&gt;Use the &lt;code&gt;printf()&lt;/code&gt; function.&lt;/p&gt;
</code></pre>
<p>To include a literal backtick character within a code span, you can use
multiple backticks as the opening and closing delimiters:</p>
<pre><code>``There is a literal backtick (`) here.``
</code></pre>
<p>which will produce this:</p>
<pre><code>&lt;p&gt;&lt;code&gt;There is a literal backtick (`) here.&lt;/code&gt;&lt;/p&gt;
</code></pre>
<p>The backtick delimiters surrounding a code span may include spaces --
one after the opening, one before the closing. This allows you to place
literal backtick characters at the beginning or end of a code span:</p>
<pre><code>A single backtick in a code span: `` ` ``
A backtick-delimited string in a code span: `` `foo` ``
</code></pre>
<p>will produce:</p>
<pre><code>&lt;p&gt;A single backtick in a code span: &lt;code&gt;`&lt;/code&gt;&lt;/p&gt;
&lt;p&gt;A backtick-delimited string in a code span: &lt;code&gt;`foo`&lt;/code&gt;&lt;/p&gt;
</code></pre>
<p>With a code span, ampersands and angle brackets are encoded as HTML
entities automatically, which makes it easy to include example HTML
tags. Markdown will turn this:</p>
<pre><code>Please don't use any `&lt;blink&gt;` tags.
</code></pre>
<p>into:</p>
<pre><code>&lt;p&gt;Please don't use any &lt;code&gt;&amp;lt;blink&amp;gt;&lt;/code&gt; tags.&lt;/p&gt;
</code></pre>
<p>You can write this:</p>
<pre><code>`&amp;#8212;` is the decimal-encoded equivalent of `&amp;mdash;`.
</code></pre>
<p>to produce:</p>
<pre><code>&lt;p&gt;&lt;code&gt;&amp;amp;#8212;&lt;/code&gt; is the decimal-encoded
equivalent of &lt;code&gt;&amp;amp;mdash;&lt;/code&gt;.&lt;/p&gt;
</code></pre>
<h3 id="img">Images</h3>
<p>Admittedly, it's fairly difficult to devise a "natural" syntax for
placing images into a plain text document format.</p>
<p>Markdown uses an image syntax that is intended to resemble the syntax
for links, allowing for two styles: <em>inline</em> and <em>reference</em>.</p>
<p>Inline image syntax looks like this:</p>
<pre><code>![Alt text](/path/to/img.jpg)
![Alt text](/path/to/img.jpg "Optional title")
</code></pre>
<p>That is:</p>
<ul>
<li>An exclamation mark: <code>!</code>;</li>
<li>followed by a set of square brackets, containing the <code>alt</code>
attribute text for the image;</li>
<li>followed by a set of parentheses, containing the URL or path to
the image, and an optional <code>title</code> attribute enclosed in double
or single quotes.</li>
</ul>
<p>Reference-style image syntax looks like this:</p>
<pre><code>![Alt text][id]
</code></pre>
<p>Where "id" is the name of a defined image reference. Image references
are defined using syntax identical to link references:</p>
<pre><code>[id]: url/to/image "Optional title attribute"
</code></pre>
<p>As of this writing, Markdown has no syntax for specifying the
dimensions of an image; if this is important to you, you can simply
use regular HTML <code>&lt;img&gt;</code> tags.</p>
<hr />
<h2 id="misc">Miscellaneous</h2>
<h3 id="autolink">Automatic Links</h3>
<p>Markdown supports a shortcut style for creating "automatic" links for URLs and email addresses: simply surround the URL or email address with angle brackets. What this means is that if you want to show the actual text of a URL or email address, and also have it be a clickable link, you can do this:</p>
<pre><code>&lt;http://example.com/&gt;
</code></pre>
<p>Markdown will turn this into:</p>
<pre><code>&lt;a href="http://example.com/"&gt;http://example.com/&lt;/a&gt;
</code></pre>
<p>Automatic links for email addresses work similarly, except that
Markdown will also perform a bit of randomized decimal and hex
entity-encoding to help obscure your address from address-harvesting
spambots. For example, Markdown will turn this:</p>
<pre><code>&lt;address@example.com&gt;
</code></pre>
<p>into something like this:</p>
<pre><code>&lt;a href="&amp;#x6D;&amp;#x61;i&amp;#x6C;&amp;#x74;&amp;#x6F;:&amp;#x61;&amp;#x64;&amp;#x64;&amp;#x72;&amp;#x65;
&amp;#115;&amp;#115;&amp;#64;&amp;#101;&amp;#120;&amp;#x61;&amp;#109;&amp;#x70;&amp;#x6C;e&amp;#x2E;&amp;#99;&amp;#111;
&amp;#109;"&gt;&amp;#x61;&amp;#x64;&amp;#x64;&amp;#x72;&amp;#x65;&amp;#115;&amp;#115;&amp;#64;&amp;#101;&amp;#120;&amp;#x61;
&amp;#109;&amp;#x70;&amp;#x6C;e&amp;#x2E;&amp;#99;&amp;#111;&amp;#109;&lt;/a&gt;
</code></pre>
<p>which will render in a browser as a clickable link to "address@example.com".</p>
<p>(This sort of entity-encoding trick will indeed fool many, if not
most, address-harvesting bots, but it definitely won't fool all of
them. It's better than nothing, but an address published in this way
will probably eventually start receiving spam.)</p>
<h3 id="backslash">Backslash Escapes</h3>
<p>Markdown allows you to use backslash escapes to generate literal
characters which would otherwise have special meaning in Markdown's
formatting syntax. For example, if you wanted to surround a word with
literal asterisks (instead of an HTML <code>&lt;em&gt;</code> tag), you can backslashes
before the asterisks, like this:</p>
<pre><code>\*literal asterisks\*
</code></pre>
<p>Markdown provides backslash escapes for the following characters:</p>
<pre><code>\ backslash
` backtick
* asterisk
_ underscore
{} curly braces
[] square brackets
() parentheses
# hash mark
+ plus sign
- minus sign (hyphen)
. dot
! exclamation mark
</code></pre>

View File

@ -0,0 +1,888 @@
Markdown: Syntax
================
<ul id="ProjectSubmenu">
<li><a href="/projects/markdown/" title="Markdown Project Page">Main</a></li>
<li><a href="/projects/markdown/basics" title="Markdown Basics">Basics</a></li>
<li><a class="selected" title="Markdown Syntax Documentation">Syntax</a></li>
<li><a href="/projects/markdown/license" title="Pricing and License Information">License</a></li>
<li><a href="/projects/markdown/dingus" title="Online Markdown Web Form">Dingus</a></li>
</ul>
* [Overview](#overview)
* [Philosophy](#philosophy)
* [Inline HTML](#html)
* [Automatic Escaping for Special Characters](#autoescape)
* [Block Elements](#block)
* [Paragraphs and Line Breaks](#p)
* [Headers](#header)
* [Blockquotes](#blockquote)
* [Lists](#list)
* [Code Blocks](#precode)
* [Horizontal Rules](#hr)
* [Span Elements](#span)
* [Links](#link)
* [Emphasis](#em)
* [Code](#code)
* [Images](#img)
* [Miscellaneous](#misc)
* [Backslash Escapes](#backslash)
* [Automatic Links](#autolink)
**Note:** This document is itself written using Markdown; you
can [see the source for it by adding '.text' to the URL][src].
[src]: /projects/markdown/syntax.text
* * *
<h2 id="overview">Overview</h2>
<h3 id="philosophy">Philosophy</h3>
Markdown is intended to be as easy-to-read and easy-to-write as is feasible.
Readability, however, is emphasized above all else. A Markdown-formatted
document should be publishable as-is, as plain text, without looking
like it's been marked up with tags or formatting instructions. While
Markdown's syntax has been influenced by several existing text-to-HTML
filters -- including [Setext] [1], [atx] [2], [Textile] [3], [reStructuredText] [4],
[Grutatext] [5], and [EtText] [6] -- the single biggest source of
inspiration for Markdown's syntax is the format of plain text email.
[1]: http://docutils.sourceforge.net/mirror/setext.html
[2]: http://www.aaronsw.com/2002/atx/
[3]: http://textism.com/tools/textile/
[4]: http://docutils.sourceforge.net/rst.html
[5]: http://www.triptico.com/software/grutatxt.html
[6]: http://ettext.taint.org/doc/
To this end, Markdown's syntax is comprised entirely of punctuation
characters, which punctuation characters have been carefully chosen so
as to look like what they mean. E.g., asterisks around a word actually
look like \*emphasis\*. Markdown lists look like, well, lists. Even
blockquotes look like quoted passages of text, assuming you've ever
used email.
<h3 id="html">Inline HTML</h3>
Markdown's syntax is intended for one purpose: to be used as a
format for *writing* for the web.
Markdown is not a replacement for HTML, or even close to it. Its
syntax is very small, corresponding only to a very small subset of
HTML tags. The idea is *not* to create a syntax that makes it easier
to insert HTML tags. In my opinion, HTML tags are already easy to
insert. The idea for Markdown is to make it easy to read, write, and
edit prose. HTML is a *publishing* format; Markdown is a *writing*
format. Thus, Markdown's formatting syntax only addresses issues that
can be conveyed in plain text.
For any markup that is not covered by Markdown's syntax, you simply
use HTML itself. There's no need to preface it or delimit it to
indicate that you're switching from Markdown to HTML; you just use
the tags.
The only restrictions are that block-level HTML elements -- e.g. `<div>`,
`<table>`, `<pre>`, `<p>`, etc. -- must be separated from surrounding
content by blank lines, and the start and end tags of the block should
not be indented with tabs or spaces. Markdown is smart enough not
to add extra (unwanted) `<p>` tags around HTML block-level tags.
For example, to add an HTML table to a Markdown article:
This is a regular paragraph.
<table>
<tr>
<td>Foo</td>
</tr>
</table>
This is another regular paragraph.
Note that Markdown formatting syntax is not processed within block-level
HTML tags. E.g., you can't use Markdown-style `*emphasis*` inside an
HTML block.
Span-level HTML tags -- e.g. `<span>`, `<cite>`, or `<del>` -- can be
used anywhere in a Markdown paragraph, list item, or header. If you
want, you can even use HTML tags instead of Markdown formatting; e.g. if
you'd prefer to use HTML `<a>` or `<img>` tags instead of Markdown's
link or image syntax, go right ahead.
Unlike block-level HTML tags, Markdown syntax *is* processed within
span-level tags.
<h3 id="autoescape">Automatic Escaping for Special Characters</h3>
In HTML, there are two characters that demand special treatment: `<`
and `&`. Left angle brackets are used to start tags; ampersands are
used to denote HTML entities. If you want to use them as literal
characters, you must escape them as entities, e.g. `&lt;`, and
`&amp;`.
Ampersands in particular are bedeviling for web writers. If you want to
write about 'AT&T', you need to write '`AT&amp;T`'. You even need to
escape ampersands within URLs. Thus, if you want to link to:
http://images.google.com/images?num=30&q=larry+bird
you need to encode the URL as:
http://images.google.com/images?num=30&amp;q=larry+bird
in your anchor tag `href` attribute. Needless to say, this is easy to
forget, and is probably the single most common source of HTML validation
errors in otherwise well-marked-up web sites.
Markdown allows you to use these characters naturally, taking care of
all the necessary escaping for you. If you use an ampersand as part of
an HTML entity, it remains unchanged; otherwise it will be translated
into `&amp;`.
So, if you want to include a copyright symbol in your article, you can write:
&copy;
and Markdown will leave it alone. But if you write:
AT&T
Markdown will translate it to:
AT&amp;T
Similarly, because Markdown supports [inline HTML](#html), if you use
angle brackets as delimiters for HTML tags, Markdown will treat them as
such. But if you write:
4 < 5
Markdown will translate it to:
4 &lt; 5
However, inside Markdown code spans and blocks, angle brackets and
ampersands are *always* encoded automatically. This makes it easy to use
Markdown to write about HTML code. (As opposed to raw HTML, which is a
terrible format for writing about HTML syntax, because every single `<`
and `&` in your example code needs to be escaped.)
* * *
<h2 id="block">Block Elements</h2>
<h3 id="p">Paragraphs and Line Breaks</h3>
A paragraph is simply one or more consecutive lines of text, separated
by one or more blank lines. (A blank line is any line that looks like a
blank line -- a line containing nothing but spaces or tabs is considered
blank.) Normal paragraphs should not be intended with spaces or tabs.
The implication of the "one or more consecutive lines of text" rule is
that Markdown supports "hard-wrapped" text paragraphs. This differs
significantly from most other text-to-HTML formatters (including Movable
Type's "Convert Line Breaks" option) which translate every line break
character in a paragraph into a `<br />` tag.
When you *do* want to insert a `<br />` break tag using Markdown, you
end a line with two or more spaces, then type return.
Yes, this takes a tad more effort to create a `<br />`, but a simplistic
"every line break is a `<br />`" rule wouldn't work for Markdown.
Markdown's email-style [blockquoting][bq] and multi-paragraph [list items][l]
work best -- and look better -- when you format them with hard breaks.
[bq]: #blockquote
[l]: #list
<h3 id="header">Headers</h3>
Markdown supports two styles of headers, [Setext] [1] and [atx] [2].
Setext-style headers are "underlined" using equal signs (for first-level
headers) and dashes (for second-level headers). For example:
This is an H1
=============
This is an H2
-------------
Any number of underlining `=`'s or `-`'s will work.
Atx-style headers use 1-6 hash characters at the start of the line,
corresponding to header levels 1-6. For example:
# This is an H1
## This is an H2
###### This is an H6
Optionally, you may "close" atx-style headers. This is purely
cosmetic -- you can use this if you think it looks better. The
closing hashes don't even need to match the number of hashes
used to open the header. (The number of opening hashes
determines the header level.) :
# This is an H1 #
## This is an H2 ##
### This is an H3 ######
<h3 id="blockquote">Blockquotes</h3>
Markdown uses email-style `>` characters for blockquoting. If you're
familiar with quoting passages of text in an email message, then you
know how to create a blockquote in Markdown. It looks best if you hard
wrap the text and put a `>` before every line:
> This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
> consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.
> Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.
>
> Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse
> id sem consectetuer libero luctus adipiscing.
Markdown allows you to be lazy and only put the `>` before the first
line of a hard-wrapped paragraph:
> This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.
Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.
> Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse
id sem consectetuer libero luctus adipiscing.
Blockquotes can be nested (i.e. a blockquote-in-a-blockquote) by
adding additional levels of `>`:
> This is the first level of quoting.
>
> > This is nested blockquote.
>
> Back to the first level.
Blockquotes can contain other Markdown elements, including headers, lists,
and code blocks:
> ## This is a header.
>
> 1. This is the first list item.
> 2. This is the second list item.
>
> Here's some example code:
>
> return shell_exec("echo $input | $markdown_script");
Any decent text editor should make email-style quoting easy. For
example, with BBEdit, you can make a selection and choose Increase
Quote Level from the Text menu.
<h3 id="list">Lists</h3>
Markdown supports ordered (numbered) and unordered (bulleted) lists.
Unordered lists use asterisks, pluses, and hyphens -- interchangably
-- as list markers:
* Red
* Green
* Blue
is equivalent to:
+ Red
+ Green
+ Blue
and:
- Red
- Green
- Blue
Ordered lists use numbers followed by periods:
1. Bird
2. McHale
3. Parish
It's important to note that the actual numbers you use to mark the
list have no effect on the HTML output Markdown produces. The HTML
Markdown produces from the above list is:
<ol>
<li>Bird</li>
<li>McHale</li>
<li>Parish</li>
</ol>
If you instead wrote the list in Markdown like this:
1. Bird
1. McHale
1. Parish
or even:
3. Bird
1. McHale
8. Parish
you'd get the exact same HTML output. The point is, if you want to,
you can use ordinal numbers in your ordered Markdown lists, so that
the numbers in your source match the numbers in your published HTML.
But if you want to be lazy, you don't have to.
If you do use lazy list numbering, however, you should still start the
list with the number 1. At some point in the future, Markdown may support
starting ordered lists at an arbitrary number.
List markers typically start at the left margin, but may be indented by
up to three spaces. List markers must be followed by one or more spaces
or a tab.
To make lists look nice, you can wrap items with hanging indents:
* Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi,
viverra nec, fringilla in, laoreet vitae, risus.
* Donec sit amet nisl. Aliquam semper ipsum sit amet velit.
Suspendisse id sem consectetuer libero luctus adipiscing.
But if you want to be lazy, you don't have to:
* Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi,
viverra nec, fringilla in, laoreet vitae, risus.
* Donec sit amet nisl. Aliquam semper ipsum sit amet velit.
Suspendisse id sem consectetuer libero luctus adipiscing.
If list items are separated by blank lines, Markdown will wrap the
items in `<p>` tags in the HTML output. For example, this input:
* Bird
* Magic
will turn into:
<ul>
<li>Bird</li>
<li>Magic</li>
</ul>
But this:
* Bird
* Magic
will turn into:
<ul>
<li><p>Bird</p></li>
<li><p>Magic</p></li>
</ul>
List items may consist of multiple paragraphs. Each subsequent
paragraph in a list item must be intended by either 4 spaces
or one tab:
1. This is a list item with two paragraphs. Lorem ipsum dolor
sit amet, consectetuer adipiscing elit. Aliquam hendrerit
mi posuere lectus.
Vestibulum enim wisi, viverra nec, fringilla in, laoreet
vitae, risus. Donec sit amet nisl. Aliquam semper ipsum
sit amet velit.
2. Suspendisse id sem consectetuer libero luctus adipiscing.
It looks nice if you indent every line of the subsequent
paragraphs, but here again, Markdown will allow you to be
lazy:
* This is a list item with two paragraphs.
This is the second paragraph in the list item. You're
only required to indent the first line. Lorem ipsum dolor
sit amet, consectetuer adipiscing elit.
* Another item in the same list.
To put a blockquote within a list item, the blockquote's `>`
delimiters need to be indented:
* A list item with a blockquote:
> This is a blockquote
> inside a list item.
To put a code block within a list item, the code block needs
to be indented *twice* -- 8 spaces or two tabs:
* A list item with a code block:
<code goes here>
It's worth noting that it's possible to trigger an ordered list by
accident, by writing something like this:
1986. What a great season.
In other words, a *number-period-space* sequence at the beginning of a
line. To avoid this, you can backslash-escape the period:
1986\. What a great season.
<h3 id="precode">Code Blocks</h3>
Pre-formatted code blocks are used for writing about programming or
markup source code. Rather than forming normal paragraphs, the lines
of a code block are interpreted literally. Markdown wraps a code block
in both `<pre>` and `<code>` tags.
To produce a code block in Markdown, simply indent every line of the
block by at least 4 spaces or 1 tab. For example, given this input:
This is a normal paragraph:
This is a code block.
Markdown will generate:
<p>This is a normal paragraph:</p>
<pre><code>This is a code block.
</code></pre>
One level of indentation -- 4 spaces or 1 tab -- is removed from each
line of the code block. For example, this:
Here is an example of AppleScript:
tell application "Foo"
beep
end tell
will turn into:
<p>Here is an example of AppleScript:</p>
<pre><code>tell application "Foo"
beep
end tell
</code></pre>
A code block continues until it reaches a line that is not indented
(or the end of the article).
Within a code block, ampersands (`&`) and angle brackets (`<` and `>`)
are automatically converted into HTML entities. This makes it very
easy to include example HTML source code using Markdown -- just paste
it and indent it, and Markdown will handle the hassle of encoding the
ampersands and angle brackets. For example, this:
<div class="footer">
&copy; 2004 Foo Corporation
</div>
will turn into:
<pre><code>&lt;div class="footer"&gt;
&amp;copy; 2004 Foo Corporation
&lt;/div&gt;
</code></pre>
Regular Markdown syntax is not processed within code blocks. E.g.,
asterisks are just literal asterisks within a code block. This means
it's also easy to use Markdown to write about Markdown's own syntax.
<h3 id="hr">Horizontal Rules</h3>
You can produce a horizontal rule tag (`<hr />`) by placing three or
more hyphens, asterisks, or underscores on a line by themselves. If you
wish, you may use spaces between the hyphens or asterisks. Each of the
following lines will produce a horizontal rule:
* * *
***
*****
- - -
---------------------------------------
_ _ _
* * *
<h2 id="span">Span Elements</h2>
<h3 id="link">Links</h3>
Markdown supports two style of links: *inline* and *reference*.
In both styles, the link text is delimited by [square brackets].
To create an inline link, use a set of regular parentheses immediately
after the link text's closing square bracket. Inside the parentheses,
put the URL where you want the link to point, along with an *optional*
title for the link, surrounded in quotes. For example:
This is [an example](http://example.com/ "Title") inline link.
[This link](http://example.net/) has no title attribute.
Will produce:
<p>This is <a href="http://example.com/" title="Title">
an example</a> inline link.</p>
<p><a href="http://example.net/">This link</a> has no
title attribute.</p>
If you're referring to a local resource on the same server, you can
use relative paths:
See my [About](/about/) page for details.
Reference-style links use a second set of square brackets, inside
which you place a label of your choosing to identify the link:
This is [an example][id] reference-style link.
You can optionally use a space to separate the sets of brackets:
This is [an example] [id] reference-style link.
Then, anywhere in the document, you define your link label like this,
on a line by itself:
[id]: http://example.com/ "Optional Title Here"
That is:
* Square brackets containing the link identifier (optionally
indented from the left margin using up to three spaces);
* followed by a colon;
* followed by one or more spaces (or tabs);
* followed by the URL for the link;
* optionally followed by a title attribute for the link, enclosed
in double or single quotes.
The link URL may, optionally, be surrounded by angle brackets:
[id]: <http://example.com/> "Optional Title Here"
You can put the title attribute on the next line and use extra spaces
or tabs for padding, which tends to look better with longer URLs:
[id]: http://example.com/longish/path/to/resource/here
"Optional Title Here"
Link definitions are only used for creating links during Markdown
processing, and are stripped from your document in the HTML output.
Link definition names may constist of letters, numbers, spaces, and punctuation -- but they are *not* case sensitive. E.g. these two links:
[link text][a]
[link text][A]
are equivalent.
The *implicit link name* shortcut allows you to omit the name of the
link, in which case the link text itself is used as the name.
Just use an empty set of square brackets -- e.g., to link the word
"Google" to the google.com web site, you could simply write:
[Google][]
And then define the link:
[Google]: http://google.com/
Because link names may contain spaces, this shortcut even works for
multiple words in the link text:
Visit [Daring Fireball][] for more information.
And then define the link:
[Daring Fireball]: http://daringfireball.net/
Link definitions can be placed anywhere in your Markdown document. I
tend to put them immediately after each paragraph in which they're
used, but if you want, you can put them all at the end of your
document, sort of like footnotes.
Here's an example of reference links in action:
I get 10 times more traffic from [Google] [1] than from
[Yahoo] [2] or [MSN] [3].
[1]: http://google.com/ "Google"
[2]: http://search.yahoo.com/ "Yahoo Search"
[3]: http://search.msn.com/ "MSN Search"
Using the implicit link name shortcut, you could instead write:
I get 10 times more traffic from [Google][] than from
[Yahoo][] or [MSN][].
[google]: http://google.com/ "Google"
[yahoo]: http://search.yahoo.com/ "Yahoo Search"
[msn]: http://search.msn.com/ "MSN Search"
Both of the above examples will produce the following HTML output:
<p>I get 10 times more traffic from <a href="http://google.com/"
title="Google">Google</a> than from
<a href="http://search.yahoo.com/" title="Yahoo Search">Yahoo</a>
or <a href="http://search.msn.com/" title="MSN Search">MSN</a>.</p>
For comparison, here is the same paragraph written using
Markdown's inline link style:
I get 10 times more traffic from [Google](http://google.com/ "Google")
than from [Yahoo](http://search.yahoo.com/ "Yahoo Search") or
[MSN](http://search.msn.com/ "MSN Search").
The point of reference-style links is not that they're easier to
write. The point is that with reference-style links, your document
source is vastly more readable. Compare the above examples: using
reference-style links, the paragraph itself is only 81 characters
long; with inline-style links, it's 176 characters; and as raw HTML,
it's 234 characters. In the raw HTML, there's more markup than there
is text.
With Markdown's reference-style links, a source document much more
closely resembles the final output, as rendered in a browser. By
allowing you to move the markup-related metadata out of the paragraph,
you can add links without interrupting the narrative flow of your
prose.
<h3 id="em">Emphasis</h3>
Markdown treats asterisks (`*`) and underscores (`_`) as indicators of
emphasis. Text wrapped with one `*` or `_` will be wrapped with an
HTML `<em>` tag; double `*`'s or `_`'s will be wrapped with an HTML
`<strong>` tag. E.g., this input:
*single asterisks*
_single underscores_
**double asterisks**
__double underscores__
will produce:
<em>single asterisks</em>
<em>single underscores</em>
<strong>double asterisks</strong>
<strong>double underscores</strong>
You can use whichever style you prefer; the lone restriction is that
the same character must be used to open and close an emphasis span.
Emphasis can be used in the middle of a word:
un*fucking*believable
But if you surround an `*` or `_` with spaces, it'll be treated as a
literal asterisk or underscore.
To produce a literal asterisk or underscore at a position where it
would otherwise be used as an emphasis delimiter, you can backslash
escape it:
\*this text is surrounded by literal asterisks\*
<h3 id="code">Code</h3>
To indicate a span of code, wrap it with backtick quotes (`` ` ``).
Unlike a pre-formatted code block, a code span indicates code within a
normal paragraph. For example:
Use the `printf()` function.
will produce:
<p>Use the <code>printf()</code> function.</p>
To include a literal backtick character within a code span, you can use
multiple backticks as the opening and closing delimiters:
``There is a literal backtick (`) here.``
which will produce this:
<p><code>There is a literal backtick (`) here.</code></p>
The backtick delimiters surrounding a code span may include spaces --
one after the opening, one before the closing. This allows you to place
literal backtick characters at the beginning or end of a code span:
A single backtick in a code span: `` ` ``
A backtick-delimited string in a code span: `` `foo` ``
will produce:
<p>A single backtick in a code span: <code>`</code></p>
<p>A backtick-delimited string in a code span: <code>`foo`</code></p>
With a code span, ampersands and angle brackets are encoded as HTML
entities automatically, which makes it easy to include example HTML
tags. Markdown will turn this:
Please don't use any `<blink>` tags.
into:
<p>Please don't use any <code>&lt;blink&gt;</code> tags.</p>
You can write this:
`&#8212;` is the decimal-encoded equivalent of `&mdash;`.
to produce:
<p><code>&amp;#8212;</code> is the decimal-encoded
equivalent of <code>&amp;mdash;</code>.</p>
<h3 id="img">Images</h3>
Admittedly, it's fairly difficult to devise a "natural" syntax for
placing images into a plain text document format.
Markdown uses an image syntax that is intended to resemble the syntax
for links, allowing for two styles: *inline* and *reference*.
Inline image syntax looks like this:
![Alt text](/path/to/img.jpg)
![Alt text](/path/to/img.jpg "Optional title")
That is:
* An exclamation mark: `!`;
* followed by a set of square brackets, containing the `alt`
attribute text for the image;
* followed by a set of parentheses, containing the URL or path to
the image, and an optional `title` attribute enclosed in double
or single quotes.
Reference-style image syntax looks like this:
![Alt text][id]
Where "id" is the name of a defined image reference. Image references
are defined using syntax identical to link references:
[id]: url/to/image "Optional title attribute"
As of this writing, Markdown has no syntax for specifying the
dimensions of an image; if this is important to you, you can simply
use regular HTML `<img>` tags.
* * *
<h2 id="misc">Miscellaneous</h2>
<h3 id="autolink">Automatic Links</h3>
Markdown supports a shortcut style for creating "automatic" links for URLs and email addresses: simply surround the URL or email address with angle brackets. What this means is that if you want to show the actual text of a URL or email address, and also have it be a clickable link, you can do this:
<http://example.com/>
Markdown will turn this into:
<a href="http://example.com/">http://example.com/</a>
Automatic links for email addresses work similarly, except that
Markdown will also perform a bit of randomized decimal and hex
entity-encoding to help obscure your address from address-harvesting
spambots. For example, Markdown will turn this:
<address@example.com>
into something like this:
<a href="&#x6D;&#x61;i&#x6C;&#x74;&#x6F;:&#x61;&#x64;&#x64;&#x72;&#x65;
&#115;&#115;&#64;&#101;&#120;&#x61;&#109;&#x70;&#x6C;e&#x2E;&#99;&#111;
&#109;">&#x61;&#x64;&#x64;&#x72;&#x65;&#115;&#115;&#64;&#101;&#120;&#x61;
&#109;&#x70;&#x6C;e&#x2E;&#99;&#111;&#109;</a>
which will render in a browser as a clickable link to "address@example.com".
(This sort of entity-encoding trick will indeed fool many, if not
most, address-harvesting bots, but it definitely won't fool all of
them. It's better than nothing, but an address published in this way
will probably eventually start receiving spam.)
<h3 id="backslash">Backslash Escapes</h3>
Markdown allows you to use backslash escapes to generate literal
characters which would otherwise have special meaning in Markdown's
formatting syntax. For example, if you wanted to surround a word with
literal asterisks (instead of an HTML `<em>` tag), you can backslashes
before the asterisks, like this:
\*literal asterisks\*
Markdown provides backslash escapes for the following characters:
\ backslash
` backtick
* asterisk
_ underscore
{} curly braces
[] square brackets
() parentheses
# hash mark
+ plus sign
- minus sign (hyphen)
. dot
! exclamation mark

View File

@ -0,0 +1,9 @@
<blockquote>
<p>foo</p>
<blockquote>
<p>bar</p>
</blockquote>
<p>foo</p>
</blockquote>

View File

@ -0,0 +1,5 @@
> foo
>
> > bar
>
> foo

View File

@ -0,0 +1,148 @@
<h2>Unordered</h2>
<p>Asterisks tight:</p>
<ul>
<li>asterisk 1</li>
<li>asterisk 2</li>
<li>asterisk 3</li>
</ul>
<p>Asterisks loose:</p>
<ul>
<li><p>asterisk 1</p></li>
<li><p>asterisk 2</p></li>
<li><p>asterisk 3</p></li>
</ul>
<hr />
<p>Pluses tight:</p>
<ul>
<li>Plus 1</li>
<li>Plus 2</li>
<li>Plus 3</li>
</ul>
<p>Pluses loose:</p>
<ul>
<li><p>Plus 1</p></li>
<li><p>Plus 2</p></li>
<li><p>Plus 3</p></li>
</ul>
<hr />
<p>Minuses tight:</p>
<ul>
<li>Minus 1</li>
<li>Minus 2</li>
<li>Minus 3</li>
</ul>
<p>Minuses loose:</p>
<ul>
<li><p>Minus 1</p></li>
<li><p>Minus 2</p></li>
<li><p>Minus 3</p></li>
</ul>
<h2>Ordered</h2>
<p>Tight:</p>
<ol>
<li>First</li>
<li>Second</li>
<li>Third</li>
</ol>
<p>and:</p>
<ol>
<li>One</li>
<li>Two</li>
<li>Three</li>
</ol>
<p>Loose using tabs:</p>
<ol>
<li><p>First</p></li>
<li><p>Second</p></li>
<li><p>Third</p></li>
</ol>
<p>and using spaces:</p>
<ol>
<li><p>One</p></li>
<li><p>Two</p></li>
<li><p>Three</p></li>
</ol>
<p>Multiple paragraphs:</p>
<ol>
<li><p>Item 1, graf one.</p>
<p>Item 2. graf two. The quick brown fox jumped over the lazy dog's
back.</p></li>
<li><p>Item 2.</p></li>
<li><p>Item 3.</p></li>
</ol>
<h2>Nested</h2>
<ul>
<li>Tab
<ul>
<li>Tab
<ul>
<li>Tab</li>
</ul></li>
</ul></li>
</ul>
<p>Here's another:</p>
<ol>
<li>First</li>
<li>Second:
<ul>
<li>Fee</li>
<li>Fie</li>
<li>Foe</li>
</ul></li>
<li>Third</li>
</ol>
<p>Same thing but with paragraphs:</p>
<ol>
<li><p>First</p></li>
<li><p>Second:</p>
<ul>
<li>Fee</li>
<li>Fie</li>
<li>Foe</li>
</ul></li>
<li><p>Third</p></li>
</ol>
<p>This was an error in Markdown 1.0.1:</p>
<ul>
<li><p>this</p>
<ul><li>sub</li></ul>
<p>that</p></li>
</ul>

View File

@ -0,0 +1,131 @@
## Unordered
Asterisks tight:
* asterisk 1
* asterisk 2
* asterisk 3
Asterisks loose:
* asterisk 1
* asterisk 2
* asterisk 3
* * *
Pluses tight:
+ Plus 1
+ Plus 2
+ Plus 3
Pluses loose:
+ Plus 1
+ Plus 2
+ Plus 3
* * *
Minuses tight:
- Minus 1
- Minus 2
- Minus 3
Minuses loose:
- Minus 1
- Minus 2
- Minus 3
## Ordered
Tight:
1. First
2. Second
3. Third
and:
1. One
2. Two
3. Three
Loose using tabs:
1. First
2. Second
3. Third
and using spaces:
1. One
2. Two
3. Three
Multiple paragraphs:
1. Item 1, graf one.
Item 2. graf two. The quick brown fox jumped over the lazy dog's
back.
2. Item 2.
3. Item 3.
## Nested
* Tab
* Tab
* Tab
Here's another:
1. First
2. Second:
* Fee
* Fie
* Foe
3. Third
Same thing but with paragraphs:
1. First
2. Second:
* Fee
* Fie
* Foe
3. Third
This was an error in Markdown 1.0.1:
* this
* sub
that

View File

@ -0,0 +1,7 @@
<p><strong><em>This is strong and em.</em></strong></p>
<p>So is <strong><em>this</em></strong> word.</p>
<p><strong><em>This is strong and em.</em></strong></p>
<p>So is <strong><em>this</em></strong> word.</p>

View File

@ -0,0 +1,7 @@
***This is strong and em.***
So is ***this*** word.
___This is strong and em.___
So is ___this___ word.

View File

@ -0,0 +1,25 @@
<ul>
<li><p>this is a list item
indented with tabs</p></li>
<li><p>this is a list item
indented with spaces</p></li>
</ul>
<p>Code:</p>
<pre><code>this code block is indented by one tab
</code></pre>
<p>And:</p>
<pre><code> this code block is indented by two tabs
</code></pre>
<p>And:</p>
<pre><code>+ this is an example list item
indented with tabs
+ this is an example list item
indented with spaces
</code></pre>

View File

@ -0,0 +1,21 @@
+ this is a list item
indented with tabs
+ this is a list item
indented with spaces
Code:
this code block is indented by one tab
And:
this code block is indented by two tabs
And:
+ this is an example list item
indented with tabs
+ this is an example list item
indented with spaces

View File

@ -0,0 +1,8 @@
<blockquote>
<p>A list within a blockquote:</p>
<ul>
<li>asterisk 1</li>
<li>asterisk 2</li>
<li>asterisk 3</li>
</ul>
</blockquote>

View File

@ -0,0 +1,5 @@
> A list within a blockquote:
>
> * asterisk 1
> * asterisk 2
> * asterisk 3

View File

@ -0,0 +1,225 @@
This a forked version of peg-markdown.... only minor changes:
* Switch to Qt .pro make system for easy x-platform.
* build a library instead of exec.
* Add GLibFacade from multimarkdown to allow Win32 compilations.
* Added ifdefs for C++ linking,
What is this?
=============
This is an implementation of John Gruber's [markdown][] in C. It uses a
[parsing expression grammar (PEG)][] to define the syntax. This should
allow easy modification and extension. It currently supports output in
HTML, LaTeX, ODF, or groff_mm formats, and adding new formats is
relatively easy.
[parsing expression grammar (PEG)]: http://en.wikipedia.org/wiki/Parsing_expression_grammar
[markdown]: http://daringfireball.net/projects/markdown/
It is pretty fast. A 179K text file that takes 5.7 seconds for
Markdown.pl (v. 1.0.1) to parse takes less than 0.2 seconds for this
markdown. It does, however, use a lot of memory (up to 4M of heap space
while parsing the 179K file, and up to 80K for a 4K file). (Note that
the memory leaks in earlier versions of this program have now been
plugged.)
Both a library and a standalone program are provided.
peg-markdown is written and maintained by John MacFarlane (jgm on
github), with significant contributions by Ryan Tomayko (rtomayko).
It is released under both the GPL and the MIT license; see LICENSE for
details.
Installing
==========
On a linux or unix-based system
-------------------------------
This program is written in portable ANSI C. It requires
[glib2](http://www.gtk.org/download/index.php). Most *nix systems will have
this installed already. The build system requires GNU make.
The other required dependency, [Ian Piumarta's peg/leg PEG parser
generator](http://piumarta.com/software/peg/), is included in the source
directory. It will be built automatically. (However, it is not as portable
as peg-markdown itself, and seems to require gcc.)
To make the 'markdown' executable:
make
(Or, on some systems, `gmake`.) Then, for usage instructions:
./markdown --help
To run John Gruber's Markdown 1.0.3 test suite:
make test
The test suite will fail on one of the list tests. Here's why.
Markdown.pl encloses "item one" in the following list in `<p>` tags:
1. item one
* subitem
* subitem
2. item two
3. item three
peg-markdown does not enclose "item one" in `<p>` tags unless it has a
following blank line. This is consistent with the official markdown
syntax description, and lets the author of the document choose whether
`<p>` tags are desired.
Cross-compiling for Windows with MinGW on a linux box
-----------------------------------------------------
Prerequisites:
* Linux system with MinGW cross compiler For Ubuntu:
sudo apt-get install mingw32
* [Windows glib-2.0 binary & development files](http://www.gtk.org/download-windows.html).
Unzip files into cross-compiler directory tree (e.g., `/usr/i586-mingw32msvc`).
Steps:
1. Create the markdown parser using Linux-compiled `leg` from peg-0.1.4:
./peg-0.1.4/leg markdown_parser.leg >markdown_parser.c
(Note: The same thing could be accomplished by cross-compiling leg,
executing it on Windows, and copying the resulting C file to the Linux
cross-compiler host.)
2. Run the cross compiler with include flag for the Windows glib-2.0 headers:
for example,
/usr/bin/i586-mingw32msvc-cc -c \
-I/usr/i586-mingw32msvc/include/glib-2.0 \
-I/usr/i586-mingw32msvc/lib/glib-2.0/include -Wall -O3 -ansi markdown*.c
3. Link against Windows glib-2.0 headers: for example,
/usr/bin/i586-mingw32msvc-cc markdown*.o \
-Wl,-L/usr/i586-mingw32msvc/lib/glib,--dy,--warn-unresolved-symbols,-lglib-2.0 \
-o markdown.exe
The resulting executable depends on the glib dll file, so be sure to
load the glib binary on the Windows host.
Compiling with MinGW on Windows
-------------------------------
These directions assume that MinGW is installed in `c:\MinGW` and glib-2.0
is installed in the MinGW directory hierarchy (with the mingw bin directory
in the system path).
Unzip peg-markdown in a temp directory. From the directory with the
peg-markdown source, execute:
cd peg-0.1.4
make PKG_CONFIG=c:/path/to/glib/bin/pkg-config.exe
Extensions
==========
peg-markdown supports extensions to standard markdown syntax.
These can be turned on using the command line flag `-x` or
`--extensions`. `-x` by itself turns on all extensions. Extensions
can also be turned on selectively, using individual command-line
options. To see the available extensions:
./markdown --help-extensions
The `--smart` extension provides "smart quotes", dashes, and ellipses.
The `--notes` extension provides a footnote syntax like that of
Pandoc or PHP Markdown Extra.
Using the library
=================
The library exports two functions:
GString * markdown_to_g_string(char *text, int extensions, int output_format);
char * markdown_to_string(char *text, int extensions, int output_format);
The only difference between these is that `markdown_to_g_string` returns a
`GString` (glib's automatically resizable string), while `markdown_to_string`
returns a regular character pointer. The memory allocated for these must be
freed by the calling program, using `g_string_free()` or `free()`.
`text` is the markdown-formatted text to be converted. Note that tabs will
be converted to spaces, using a four-space tab stop. Character encodings are
ignored.
`extensions` is a bit-field specifying which syntax extensions should be used.
If `extensions` is 0, no extensions will be used. If it is `0xFFFFFF`,
all extensions will be used. To set extensions selectively, use the
bitwise `&` operator and the following constants:
- `EXT_SMART` turns on smart quotes, dashes, and ellipses.
- `EXT_NOTES` turns on footnote syntax. [Pandoc's footnote syntax][] is used here.
- `EXT_FILTER_HTML` filters out raw HTML (except for styles).
- `EXT_FILTER_STYLES` filters out styles in HTML.
[Pandoc's footnote syntax]: http://johnmacfarlane.net/pandoc/README.html#footnotes
`output_format` is either `HTML_FORMAT`, `LATEX_FORMAT`, `ODF_FORMAT`,
or `GROFF_MM_FORMAT`.
To use the library, include `markdown_lib.h`. See `markdown.c` for an example.
Hacking
=======
It should be pretty easy to modify the program to produce other formats,
and to parse syntax extensions. A quick guide:
* `markdown_parser.leg` contains the grammar itself.
* `markdown_output.c` contains functions for printing the `Element`
structure in various output formats.
* To add an output format, add the format to `markdown_formats` in
`markdown_lib.h`. Then modify `print_element` in `markdown_output.c`,
and add functions `print_XXXX_string`, `print_XXXX_element`, and
`print_XXXX_element_list`. Also add an option in the main program
that selects the new format. Don't forget to add it to the list of
formats in the usage message.
* To add syntax extensions, define them in the PEG grammar
(`markdown_parser.leg`), using existing extensions as a guide. New
inline elements will need to be added to `Inline =`; new block
elements will need to be added to `Block =`. (Note: the order
of the alternatives does matter in PEG grammars.)
* If you need to add new types of elements, modify the `keys`
enum in `markdown_peg.h`.
* By using `&{ }` rules one can selectively disable extensions
depending on command-line options. For example,
`&{ extension(EXT_SMART) }` succeeds only if the `EXT_SMART` bit
of the global `syntax_extensions` is set. Add your option to
`markdown_extensions` in `markdown_lib.h`, and add an option in
`markdown.c` to turn on your extension.
* Note: Avoid using `[^abc]` character classes in the grammar, because
they cause problems with non-ascii input. Instead, use: `( !'a' !'b'
!'c' . )`
Acknowledgements
================
Support for ODF output was added by Fletcher T. Penney.

View File

@ -0,0 +1,11 @@
/*
* glib.h
* MultiMarkdown
*
* Created by Daniel Jalkut on 7/26/11.
* Copyright 2011 __MyCompanyName__. All rights reserved.
*
*/
/* Just a dummy file to keep the glib-dependent sources compiling as we would hope */
#include "GLibFacade.h"

View File

@ -0,0 +1,183 @@
/**********************************************************************
markdown.c - markdown in C using a PEG grammar.
(c) 2008 John MacFarlane (jgm at berkeley dot edu).
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License or the MIT
license. See LICENSE for details.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
***********************************************************************/
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <glib.h>
#include "markdown_peg.h"
static int extensions;
/**********************************************************************
The main program is just a wrapper around the library functions in
markdown_lib.c. It parses command-line options, reads the text to
be converted from input files or stdin, converts the text, and sends
the output to stdout or a file. Character encodings are ignored.
***********************************************************************/
#define VERSION "0.4.14"
#define COPYRIGHT "Copyright (c) 2008-2009 John MacFarlane. License GPLv2+ or MIT.\n" \
"This is free software: you are free to change and redistribute it.\n" \
"There is NO WARRANTY, to the extent permitted by law."
/* print version and copyright information */
void version(const char *progname)
{
printf("peg-markdown version %s\n"
"%s\n",
VERSION,
COPYRIGHT);
}
int main(int argc, char * argv[]) {
int numargs; /* number of filename arguments */
int i;
GString *inputbuf;
char *out; /* string containing processed output */
FILE *input;
FILE *output;
char curchar;
char *progname = argv[0];
int output_format = HTML_FORMAT;
/* Code for command-line option parsing. */
static gboolean opt_version = FALSE;
static gchar *opt_output = 0;
static gchar *opt_to = 0;
static gboolean opt_smart = FALSE;
static gboolean opt_notes = FALSE;
static gboolean opt_filter_html = FALSE;
static gboolean opt_filter_styles = FALSE;
static gboolean opt_allext = FALSE;
static GOptionEntry entries[] =
{
{ "version", 'v', 0, G_OPTION_ARG_NONE, &opt_version, "print version and exit", NULL },
{ "output", 'o', 0, G_OPTION_ARG_STRING, &opt_output, "send output to FILE (default is stdout)", "FILE" },
{ "to", 't', 0, G_OPTION_ARG_STRING, &opt_to, "convert to FORMAT (default is html)", "FORMAT" },
{ "extensions", 'x', 0, G_OPTION_ARG_NONE, &opt_allext, "use all syntax extensions", NULL },
{ "filter-html", 0, 0, G_OPTION_ARG_NONE, &opt_filter_html, "filter out raw HTML (except styles)", NULL },
{ "filter-styles", 0, 0, G_OPTION_ARG_NONE, &opt_filter_styles, "filter out HTML styles", NULL },
{ NULL }
};
/* Options to active syntax extensions. These appear separately in --help. */
static GOptionEntry ext_entries[] =
{
{ "smart", 0, 0, G_OPTION_ARG_NONE, &opt_smart, "use smart typography extension", NULL },
{ "notes", 0, 0, G_OPTION_ARG_NONE, &opt_notes, "use notes extension", NULL },
{ NULL }
};
GError *error = NULL;
GOptionContext *context;
GOptionGroup *ext_group;
context = g_option_context_new ("[FILE...]");
g_option_context_add_main_entries (context, entries, NULL);
ext_group = g_option_group_new ("extensions", "Syntax extensions", "show available syntax extensions", NULL, NULL);
g_option_group_add_entries (ext_group, ext_entries);
g_option_context_add_group (context, ext_group);
g_option_context_set_description (context, "Converts text in specified files (or stdin) from markdown to FORMAT.\n"
"Available FORMATs: html, latex, groff-mm, odf");
if (!g_option_context_parse (context, &argc, &argv, &error)) {
g_print ("option parsing failed: %s\n", error->message);
exit (1);
}
g_option_context_free(context);
/* Process command-line options and arguments. */
if (opt_version) {
version(progname);
return EXIT_SUCCESS;
}
extensions = 0;
if (opt_allext)
extensions = 0xFFFFFF; /* turn on all extensions */
if (opt_smart)
extensions = extensions | EXT_SMART;
if (opt_notes)
extensions = extensions | EXT_NOTES;
if (opt_filter_html)
extensions = extensions | EXT_FILTER_HTML;
if (opt_filter_styles)
extensions = extensions | EXT_FILTER_STYLES;
if (opt_to == NULL)
output_format = HTML_FORMAT;
else if (strcmp(opt_to, "html") == 0)
output_format = HTML_FORMAT;
else if (strcmp(opt_to, "latex") == 0)
output_format = LATEX_FORMAT;
else if (strcmp(opt_to, "groff-mm") == 0)
output_format = GROFF_MM_FORMAT;
else if (strcmp(opt_to, "odf") == 0)
output_format = ODF_FORMAT;
else {
fprintf(stderr, "%s: Unknown output format '%s'\n", progname, opt_to);
exit(EXIT_FAILURE);
}
/* we allow "-" as a synonym for stdout here */
if (opt_output == NULL || strcmp(opt_output, "-") == 0)
output = stdout;
else if (!(output = fopen(opt_output, "w"))) {
perror(opt_output);
return 1;
}
inputbuf = g_string_new(""); /* string for concatenated input */
/* Read input from stdin or input files into inputbuf */
numargs = argc - 1;
if (numargs == 0) { /* use stdin if no files specified */
while ((curchar = fgetc(stdin)) != EOF)
g_string_append_c(inputbuf, curchar);
fclose(stdin);
}
else { /* open all the files on command line */
for (i = 0; i < numargs; i++) {
if ((input = fopen(argv[i+1], "r")) == NULL) {
perror(argv[i+1]);
exit(EXIT_FAILURE);
}
while ((curchar = fgetc(input)) != EOF)
g_string_append_c(inputbuf, curchar);
fclose(input);
}
}
out = markdown_to_string(inputbuf->str, extensions, output_format);
fprintf(output, "%s\n", out);
free(out);
g_string_free(inputbuf, true);
return(EXIT_SUCCESS);
}

View File

@ -0,0 +1,181 @@
/**********************************************************************
markdown_lib.c - markdown in C using a PEG grammar.
(c) 2008 John MacFarlane (jgm at berkeley dot edu).
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License or the MIT
license. See LICENSE for details.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
***********************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "markdown_peg.h"
#define TABSTOP 4
/* preformat_text - allocate and copy text buffer while
* performing tab expansion. */
static GString *preformat_text(char *text) {
GString *buf;
char next_char;
int charstotab;
int len = 0;
buf = g_string_new("");
charstotab = TABSTOP;
while ((next_char = *text++) != '\0') {
switch (next_char) {
case '\t':
while (charstotab > 0)
g_string_append_c(buf, ' '), len++, charstotab--;
break;
case '\n':
g_string_append_c(buf, '\n'), len++, charstotab = TABSTOP;
break;
default:
g_string_append_c(buf, next_char), len++, charstotab--;
}
if (charstotab == 0)
charstotab = TABSTOP;
}
g_string_append(buf, "\n\n");
return(buf);
}
/* print_tree - print tree of elements, for debugging only. */
static void print_tree(element * elt, int indent) {
int i;
char * key;
while (elt != NULL) {
for (i = 0; i < indent; i++)
fputc(' ', stderr);
switch (elt->key) {
case LIST: key = "LIST"; break;
case RAW: key = "RAW"; break;
case SPACE: key = "SPACE"; break;
case LINEBREAK: key = "LINEBREAK"; break;
case ELLIPSIS: key = "ELLIPSIS"; break;
case EMDASH: key = "EMDASH"; break;
case ENDASH: key = "ENDASH"; break;
case APOSTROPHE: key = "APOSTROPHE"; break;
case SINGLEQUOTED: key = "SINGLEQUOTED"; break;
case DOUBLEQUOTED: key = "DOUBLEQUOTED"; break;
case STR: key = "STR"; break;
case LINK: key = "LINK"; break;
case IMAGE: key = "IMAGE"; break;
case CODE: key = "CODE"; break;
case HTML: key = "HTML"; break;
case EMPH: key = "EMPH"; break;
case STRONG: key = "STRONG"; break;
case PLAIN: key = "PLAIN"; break;
case PARA: key = "PARA"; break;
case LISTITEM: key = "LISTITEM"; break;
case BULLETLIST: key = "BULLETLIST"; break;
case ORDEREDLIST: key = "ORDEREDLIST"; break;
case H1: key = "H1"; break;
case H2: key = "H2"; break;
case H3: key = "H3"; break;
case H4: key = "H4"; break;
case H5: key = "H5"; break;
case H6: key = "H6"; break;
case BLOCKQUOTE: key = "BLOCKQUOTE"; break;
case VERBATIM: key = "VERBATIM"; break;
case HTMLBLOCK: key = "HTMLBLOCK"; break;
case HRULE: key = "HRULE"; break;
case REFERENCE: key = "REFERENCE"; break;
case NOTE: key = "NOTE"; break;
default: key = "?";
}
if ( elt->key == STR ) {
fprintf(stderr, "0x%p: %s '%s'\n", (void *)elt, key, elt->contents.str);
} else {
fprintf(stderr, "0x%p: %s\n", (void *)elt, key);
}
if (elt->children)
print_tree(elt->children, indent + 4);
elt = elt->next;
}
}
/* process_raw_blocks - traverses an element list, replacing any RAW elements with
* the result of parsing them as markdown text, and recursing into the children
* of parent elements. The result should be a tree of elements without any RAWs. */
static element * process_raw_blocks(element *input, int extensions, element *references, element *notes) {
element *current = NULL;
element *last_child = NULL;
char *contents;
current = input;
while (current != NULL) {
if (current->key == RAW) {
/* \001 is used to indicate boundaries between nested lists when there
* is no blank line. We split the string by \001 and parse
* each chunk separately. */
contents = strtok(current->contents.str, "\001");
current->key = LIST;
current->children = parse_markdown(contents, extensions, references, notes);
last_child = current->children;
while ((contents = strtok(NULL, "\001"))) {
while (last_child->next != NULL)
last_child = last_child->next;
last_child->next = parse_markdown(contents, extensions, references, notes);
}
free(current->contents.str);
current->contents.str = NULL;
}
if (current->children != NULL)
current->children = process_raw_blocks(current->children, extensions, references, notes);
current = current->next;
}
return input;
}
/* markdown_to_gstring - convert markdown text to the output format specified.
* Returns a GString, which must be freed after use using g_string_free(). */
GString * markdown_to_g_string(char *text, int extensions, int output_format) {
element *result;
element *references;
element *notes;
GString *formatted_text;
GString *out;
out = g_string_new("");
formatted_text = preformat_text(text);
references = parse_references(formatted_text->str, extensions);
notes = parse_notes(formatted_text->str, extensions, references);
result = parse_markdown(formatted_text->str, extensions, references, notes);
result = process_raw_blocks(result, extensions, references, notes);
g_string_free(formatted_text, TRUE);
print_element_list(out, result, output_format, extensions);
free_element_list(result);
free_element_list(references);
return out;
}
/* markdown_to_string - convert markdown text to the output format specified.
* Returns a null-terminated string, which must be freed after use. */
char * markdown_to_string(char *text, int extensions, int output_format) {
GString *out;
char *char_out;
out = markdown_to_g_string(text, extensions, output_format);
char_out = out->str;
g_string_free(out, FALSE);
return char_out;
}
/* vim:set ts=4 sw=4: */

View File

@ -0,0 +1,38 @@
#ifndef MARKDOWN_LIB_H
#define MARKDOWN_LIB_H
#include <stdlib.h>
#include <stdio.h>
#include <glib.h>
#ifdef __cplusplus
extern "C" {
#endif
enum markdown_extensions {
EXT_SMART = 0x01,
EXT_NOTES = 0x02,
EXT_FILTER_HTML = 0x04,
EXT_FILTER_STYLES = 0x08
};
enum markdown_formats {
HTML_FORMAT,
LATEX_FORMAT,
GROFF_MM_FORMAT,
ODF_FORMAT
};
GString * markdown_to_g_string(char *text, int extensions, int output_format);
char * markdown_to_string(char *text, int extensions, int output_format);
#ifdef __cplusplus
}
#endif
/* vim: set ts=4 sw=4 : */
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,770 @@
%{
/**********************************************************************
markdown_parser.leg - markdown parser in C using a PEG grammar.
(c) 2008 John MacFarlane (jgm at berkeley dot edu).
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License or the MIT
license. See LICENSE for details.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
***********************************************************************/
#include <stdbool.h>
#include <assert.h>
#include "markdown_peg.h"
#include "utility_functions.h"
/**********************************************************************
Definitions for leg parser generator.
YY_INPUT is the function the parser calls to get new input.
We take all new input from (static) charbuf.
***********************************************************************/
# define YYSTYPE element *
#ifdef __DEBUG__
# define YY_DEBUG 1
#endif
#define YY_INPUT(buf, result, max_size) \
{ \
int yyc; \
if (charbuf && *charbuf != '\0') { \
yyc= *charbuf++; \
} else { \
yyc= EOF; \
} \
result= (EOF == yyc) ? 0 : (*(buf)= yyc, 1); \
}
#define YY_RULE(T) T
/**********************************************************************
PEG grammar and parser actions for markdown syntax.
***********************************************************************/
%}
Doc = BOM? a:StartList ( Block { a = cons($$, a); } )*
{ parse_result = reverse(a); }
Block = BlankLine*
( BlockQuote
| Verbatim
| Note
| Reference
| HorizontalRule
| Heading
| OrderedList
| BulletList
| HtmlBlock
| StyleBlock
| Para
| Plain )
Para = NonindentSpace a:Inlines BlankLine+
{ $$ = a; $$->key = PARA; }
Plain = a:Inlines
{ $$ = a; $$->key = PLAIN; }
AtxInline = !Newline !(Sp? '#'* Sp Newline) Inline
AtxStart = < ( "######" | "#####" | "####" | "###" | "##" | "#" ) >
{ $$ = mk_element(H1 + (strlen(yytext) - 1)); }
AtxHeading = s:AtxStart Sp? a:StartList ( AtxInline { a = cons($$, a); } )+ (Sp? '#'* Sp)? Newline
{ $$ = mk_list(s->key, a);
free(s); }
SetextHeading = SetextHeading1 | SetextHeading2
SetextBottom1 = '='+ Newline
SetextBottom2 = '-'+ Newline
SetextHeading1 = &(RawLine SetextBottom1)
a:StartList ( !Endline Inline { a = cons($$, a); } )+ Sp? Newline
SetextBottom1 { $$ = mk_list(H1, a); }
SetextHeading2 = &(RawLine SetextBottom2)
a:StartList ( !Endline Inline { a = cons($$, a); } )+ Sp? Newline
SetextBottom2 { $$ = mk_list(H2, a); }
Heading = SetextHeading | AtxHeading
BlockQuote = a:BlockQuoteRaw
{ $$ = mk_element(BLOCKQUOTE);
$$->children = a;
}
BlockQuoteRaw = a:StartList
(( '>' ' '? Line { a = cons($$, a); } )
( !'>' !BlankLine Line { a = cons($$, a); } )*
( BlankLine { a = cons(mk_str("\n"), a); } )*
)+
{ $$ = mk_str_from_list(a, true);
$$->key = RAW;
}
NonblankIndentedLine = !BlankLine IndentedLine
VerbatimChunk = a:StartList
( BlankLine { a = cons(mk_str("\n"), a); } )*
( NonblankIndentedLine { a = cons($$, a); } )+
{ $$ = mk_str_from_list(a, false); }
Verbatim = a:StartList ( VerbatimChunk { a = cons($$, a); } )+
{ $$ = mk_str_from_list(a, false);
$$->key = VERBATIM; }
HorizontalRule = NonindentSpace
( '*' Sp '*' Sp '*' (Sp '*')*
| '-' Sp '-' Sp '-' (Sp '-')*
| '_' Sp '_' Sp '_' (Sp '_')*)
Sp Newline BlankLine+
{ $$ = mk_element(HRULE); }
Bullet = !HorizontalRule NonindentSpace ('+' | '*' | '-') Spacechar+
BulletList = &Bullet (ListTight | ListLoose)
{ $$->key = BULLETLIST; }
ListTight = a:StartList
( ListItemTight { a = cons($$, a); } )+
BlankLine* !(Bullet | Enumerator)
{ $$ = mk_list(LIST, a); }
ListLoose = a:StartList
( b:ListItem BlankLine*
{ element *li;
li = b->children;
li->contents.str = realloc(li->contents.str, strlen(li->contents.str) + 3);
strcat(li->contents.str, "\n\n"); /* In loose list, \n\n added to end of each element */
a = cons(b, a);
} )+
{ $$ = mk_list(LIST, a); }
ListItem = ( Bullet | Enumerator )
a:StartList
ListBlock { a = cons($$, a); }
( ListContinuationBlock { a = cons($$, a); } )*
{ element *raw;
raw = mk_str_from_list(a, false);
raw->key = RAW;
$$ = mk_element(LISTITEM);
$$->children = raw;
}
ListItemTight =
( Bullet | Enumerator )
a:StartList
ListBlock { a = cons($$, a); }
( !BlankLine
ListContinuationBlock { a = cons($$, a); } )*
!ListContinuationBlock
{ element *raw;
raw = mk_str_from_list(a, false);
raw->key = RAW;
$$ = mk_element(LISTITEM);
$$->children = raw;
}
ListBlock = a:StartList
!BlankLine Line { a = cons($$, a); }
( ListBlockLine { a = cons($$, a); } )*
{ $$ = mk_str_from_list(a, false); }
ListContinuationBlock = a:StartList
( < BlankLine* >
{ if (strlen(yytext) == 0)
a = cons(mk_str("\001"), a); /* block separator */
else
a = cons(mk_str(yytext), a); } )
( Indent ListBlock { a = cons($$, a); } )+
{ $$ = mk_str_from_list(a, false); }
Enumerator = NonindentSpace [0-9]+ '.' Spacechar+
OrderedList = &Enumerator (ListTight | ListLoose)
{ $$->key = ORDEREDLIST; }
ListBlockLine = !BlankLine
!( Indent? (Bullet | Enumerator) )
!HorizontalRule
OptionallyIndentedLine
# Parsers for different kinds of block-level HTML content.
# This is repetitive due to constraints of PEG grammar.
HtmlBlockOpenAddress = '<' Spnl ("address" | "ADDRESS") Spnl HtmlAttribute* '>'
HtmlBlockCloseAddress = '<' Spnl '/' ("address" | "ADDRESS") Spnl '>'
HtmlBlockAddress = HtmlBlockOpenAddress (HtmlBlockAddress | !HtmlBlockCloseAddress .)* HtmlBlockCloseAddress
HtmlBlockOpenBlockquote = '<' Spnl ("blockquote" | "BLOCKQUOTE") Spnl HtmlAttribute* '>'
HtmlBlockCloseBlockquote = '<' Spnl '/' ("blockquote" | "BLOCKQUOTE") Spnl '>'
HtmlBlockBlockquote = HtmlBlockOpenBlockquote (HtmlBlockBlockquote | !HtmlBlockCloseBlockquote .)* HtmlBlockCloseBlockquote
HtmlBlockOpenCenter = '<' Spnl ("center" | "CENTER") Spnl HtmlAttribute* '>'
HtmlBlockCloseCenter = '<' Spnl '/' ("center" | "CENTER") Spnl '>'
HtmlBlockCenter = HtmlBlockOpenCenter (HtmlBlockCenter | !HtmlBlockCloseCenter .)* HtmlBlockCloseCenter
HtmlBlockOpenDir = '<' Spnl ("dir" | "DIR") Spnl HtmlAttribute* '>'
HtmlBlockCloseDir = '<' Spnl '/' ("dir" | "DIR") Spnl '>'
HtmlBlockDir = HtmlBlockOpenDir (HtmlBlockDir | !HtmlBlockCloseDir .)* HtmlBlockCloseDir
HtmlBlockOpenDiv = '<' Spnl ("div" | "DIV") Spnl HtmlAttribute* '>'
HtmlBlockCloseDiv = '<' Spnl '/' ("div" | "DIV") Spnl '>'
HtmlBlockDiv = HtmlBlockOpenDiv (HtmlBlockDiv | !HtmlBlockCloseDiv .)* HtmlBlockCloseDiv
HtmlBlockOpenDl = '<' Spnl ("dl" | "DL") Spnl HtmlAttribute* '>'
HtmlBlockCloseDl = '<' Spnl '/' ("dl" | "DL") Spnl '>'
HtmlBlockDl = HtmlBlockOpenDl (HtmlBlockDl | !HtmlBlockCloseDl .)* HtmlBlockCloseDl
HtmlBlockOpenFieldset = '<' Spnl ("fieldset" | "FIELDSET") Spnl HtmlAttribute* '>'
HtmlBlockCloseFieldset = '<' Spnl '/' ("fieldset" | "FIELDSET") Spnl '>'
HtmlBlockFieldset = HtmlBlockOpenFieldset (HtmlBlockFieldset | !HtmlBlockCloseFieldset .)* HtmlBlockCloseFieldset
HtmlBlockOpenForm = '<' Spnl ("form" | "FORM") Spnl HtmlAttribute* '>'
HtmlBlockCloseForm = '<' Spnl '/' ("form" | "FORM") Spnl '>'
HtmlBlockForm = HtmlBlockOpenForm (HtmlBlockForm | !HtmlBlockCloseForm .)* HtmlBlockCloseForm
HtmlBlockOpenH1 = '<' Spnl ("h1" | "H1") Spnl HtmlAttribute* '>'
HtmlBlockCloseH1 = '<' Spnl '/' ("h1" | "H1") Spnl '>'
HtmlBlockH1 = HtmlBlockOpenH1 (HtmlBlockH1 | !HtmlBlockCloseH1 .)* HtmlBlockCloseH1
HtmlBlockOpenH2 = '<' Spnl ("h2" | "H2") Spnl HtmlAttribute* '>'
HtmlBlockCloseH2 = '<' Spnl '/' ("h2" | "H2") Spnl '>'
HtmlBlockH2 = HtmlBlockOpenH2 (HtmlBlockH2 | !HtmlBlockCloseH2 .)* HtmlBlockCloseH2
HtmlBlockOpenH3 = '<' Spnl ("h3" | "H3") Spnl HtmlAttribute* '>'
HtmlBlockCloseH3 = '<' Spnl '/' ("h3" | "H3") Spnl '>'
HtmlBlockH3 = HtmlBlockOpenH3 (HtmlBlockH3 | !HtmlBlockCloseH3 .)* HtmlBlockCloseH3
HtmlBlockOpenH4 = '<' Spnl ("h4" | "H4") Spnl HtmlAttribute* '>'
HtmlBlockCloseH4 = '<' Spnl '/' ("h4" | "H4") Spnl '>'
HtmlBlockH4 = HtmlBlockOpenH4 (HtmlBlockH4 | !HtmlBlockCloseH4 .)* HtmlBlockCloseH4
HtmlBlockOpenH5 = '<' Spnl ("h5" | "H5") Spnl HtmlAttribute* '>'
HtmlBlockCloseH5 = '<' Spnl '/' ("h5" | "H5") Spnl '>'
HtmlBlockH5 = HtmlBlockOpenH5 (HtmlBlockH5 | !HtmlBlockCloseH5 .)* HtmlBlockCloseH5
HtmlBlockOpenH6 = '<' Spnl ("h6" | "H6") Spnl HtmlAttribute* '>'
HtmlBlockCloseH6 = '<' Spnl '/' ("h6" | "H6") Spnl '>'
HtmlBlockH6 = HtmlBlockOpenH6 (HtmlBlockH6 | !HtmlBlockCloseH6 .)* HtmlBlockCloseH6
HtmlBlockOpenMenu = '<' Spnl ("menu" | "MENU") Spnl HtmlAttribute* '>'
HtmlBlockCloseMenu = '<' Spnl '/' ("menu" | "MENU") Spnl '>'
HtmlBlockMenu = HtmlBlockOpenMenu (HtmlBlockMenu | !HtmlBlockCloseMenu .)* HtmlBlockCloseMenu
HtmlBlockOpenNoframes = '<' Spnl ("noframes" | "NOFRAMES") Spnl HtmlAttribute* '>'
HtmlBlockCloseNoframes = '<' Spnl '/' ("noframes" | "NOFRAMES") Spnl '>'
HtmlBlockNoframes = HtmlBlockOpenNoframes (HtmlBlockNoframes | !HtmlBlockCloseNoframes .)* HtmlBlockCloseNoframes
HtmlBlockOpenNoscript = '<' Spnl ("noscript" | "NOSCRIPT") Spnl HtmlAttribute* '>'
HtmlBlockCloseNoscript = '<' Spnl '/' ("noscript" | "NOSCRIPT") Spnl '>'
HtmlBlockNoscript = HtmlBlockOpenNoscript (HtmlBlockNoscript | !HtmlBlockCloseNoscript .)* HtmlBlockCloseNoscript
HtmlBlockOpenOl = '<' Spnl ("ol" | "OL") Spnl HtmlAttribute* '>'
HtmlBlockCloseOl = '<' Spnl '/' ("ol" | "OL") Spnl '>'
HtmlBlockOl = HtmlBlockOpenOl (HtmlBlockOl | !HtmlBlockCloseOl .)* HtmlBlockCloseOl
HtmlBlockOpenP = '<' Spnl ("p" | "P") Spnl HtmlAttribute* '>'
HtmlBlockCloseP = '<' Spnl '/' ("p" | "P") Spnl '>'
HtmlBlockP = HtmlBlockOpenP (HtmlBlockP | !HtmlBlockCloseP .)* HtmlBlockCloseP
HtmlBlockOpenPre = '<' Spnl ("pre" | "PRE") Spnl HtmlAttribute* '>'
HtmlBlockClosePre = '<' Spnl '/' ("pre" | "PRE") Spnl '>'
HtmlBlockPre = HtmlBlockOpenPre (HtmlBlockPre | !HtmlBlockClosePre .)* HtmlBlockClosePre
HtmlBlockOpenTable = '<' Spnl ("table" | "TABLE") Spnl HtmlAttribute* '>'
HtmlBlockCloseTable = '<' Spnl '/' ("table" | "TABLE") Spnl '>'
HtmlBlockTable = HtmlBlockOpenTable (HtmlBlockTable | !HtmlBlockCloseTable .)* HtmlBlockCloseTable
HtmlBlockOpenUl = '<' Spnl ("ul" | "UL") Spnl HtmlAttribute* '>'
HtmlBlockCloseUl = '<' Spnl '/' ("ul" | "UL") Spnl '>'
HtmlBlockUl = HtmlBlockOpenUl (HtmlBlockUl | !HtmlBlockCloseUl .)* HtmlBlockCloseUl
HtmlBlockOpenDd = '<' Spnl ("dd" | "DD") Spnl HtmlAttribute* '>'
HtmlBlockCloseDd = '<' Spnl '/' ("dd" | "DD") Spnl '>'
HtmlBlockDd = HtmlBlockOpenDd (HtmlBlockDd | !HtmlBlockCloseDd .)* HtmlBlockCloseDd
HtmlBlockOpenDt = '<' Spnl ("dt" | "DT") Spnl HtmlAttribute* '>'
HtmlBlockCloseDt = '<' Spnl '/' ("dt" | "DT") Spnl '>'
HtmlBlockDt = HtmlBlockOpenDt (HtmlBlockDt | !HtmlBlockCloseDt .)* HtmlBlockCloseDt
HtmlBlockOpenFrameset = '<' Spnl ("frameset" | "FRAMESET") Spnl HtmlAttribute* '>'
HtmlBlockCloseFrameset = '<' Spnl '/' ("frameset" | "FRAMESET") Spnl '>'
HtmlBlockFrameset = HtmlBlockOpenFrameset (HtmlBlockFrameset | !HtmlBlockCloseFrameset .)* HtmlBlockCloseFrameset
HtmlBlockOpenLi = '<' Spnl ("li" | "LI") Spnl HtmlAttribute* '>'
HtmlBlockCloseLi = '<' Spnl '/' ("li" | "LI") Spnl '>'
HtmlBlockLi = HtmlBlockOpenLi (HtmlBlockLi | !HtmlBlockCloseLi .)* HtmlBlockCloseLi
HtmlBlockOpenTbody = '<' Spnl ("tbody" | "TBODY") Spnl HtmlAttribute* '>'
HtmlBlockCloseTbody = '<' Spnl '/' ("tbody" | "TBODY") Spnl '>'
HtmlBlockTbody = HtmlBlockOpenTbody (HtmlBlockTbody | !HtmlBlockCloseTbody .)* HtmlBlockCloseTbody
HtmlBlockOpenTd = '<' Spnl ("td" | "TD") Spnl HtmlAttribute* '>'
HtmlBlockCloseTd = '<' Spnl '/' ("td" | "TD") Spnl '>'
HtmlBlockTd = HtmlBlockOpenTd (HtmlBlockTd | !HtmlBlockCloseTd .)* HtmlBlockCloseTd
HtmlBlockOpenTfoot = '<' Spnl ("tfoot" | "TFOOT") Spnl HtmlAttribute* '>'
HtmlBlockCloseTfoot = '<' Spnl '/' ("tfoot" | "TFOOT") Spnl '>'
HtmlBlockTfoot = HtmlBlockOpenTfoot (HtmlBlockTfoot | !HtmlBlockCloseTfoot .)* HtmlBlockCloseTfoot
HtmlBlockOpenTh = '<' Spnl ("th" | "TH") Spnl HtmlAttribute* '>'
HtmlBlockCloseTh = '<' Spnl '/' ("th" | "TH") Spnl '>'
HtmlBlockTh = HtmlBlockOpenTh (HtmlBlockTh | !HtmlBlockCloseTh .)* HtmlBlockCloseTh
HtmlBlockOpenThead = '<' Spnl ("thead" | "THEAD") Spnl HtmlAttribute* '>'
HtmlBlockCloseThead = '<' Spnl '/' ("thead" | "THEAD") Spnl '>'
HtmlBlockThead = HtmlBlockOpenThead (HtmlBlockThead | !HtmlBlockCloseThead .)* HtmlBlockCloseThead
HtmlBlockOpenTr = '<' Spnl ("tr" | "TR") Spnl HtmlAttribute* '>'
HtmlBlockCloseTr = '<' Spnl '/' ("tr" | "TR") Spnl '>'
HtmlBlockTr = HtmlBlockOpenTr (HtmlBlockTr | !HtmlBlockCloseTr .)* HtmlBlockCloseTr
HtmlBlockOpenScript = '<' Spnl ("script" | "SCRIPT") Spnl HtmlAttribute* '>'
HtmlBlockCloseScript = '<' Spnl '/' ("script" | "SCRIPT") Spnl '>'
HtmlBlockScript = HtmlBlockOpenScript (!HtmlBlockCloseScript .)* HtmlBlockCloseScript
HtmlBlockInTags = HtmlBlockAddress
| HtmlBlockBlockquote
| HtmlBlockCenter
| HtmlBlockDir
| HtmlBlockDiv
| HtmlBlockDl
| HtmlBlockFieldset
| HtmlBlockForm
| HtmlBlockH1
| HtmlBlockH2
| HtmlBlockH3
| HtmlBlockH4
| HtmlBlockH5
| HtmlBlockH6
| HtmlBlockMenu
| HtmlBlockNoframes
| HtmlBlockNoscript
| HtmlBlockOl
| HtmlBlockP
| HtmlBlockPre
| HtmlBlockTable
| HtmlBlockUl
| HtmlBlockDd
| HtmlBlockDt
| HtmlBlockFrameset
| HtmlBlockLi
| HtmlBlockTbody
| HtmlBlockTd
| HtmlBlockTfoot
| HtmlBlockTh
| HtmlBlockThead
| HtmlBlockTr
| HtmlBlockScript
HtmlBlock = < ( HtmlBlockInTags | HtmlComment | HtmlBlockSelfClosing ) >
BlankLine+
{ if (extension(EXT_FILTER_HTML)) {
$$ = mk_list(LIST, NULL);
} else {
$$ = mk_str(yytext);
$$->key = HTMLBLOCK;
}
}
HtmlBlockSelfClosing = '<' Spnl HtmlBlockType Spnl HtmlAttribute* '/' Spnl '>'
HtmlBlockType = "address" | "blockquote" | "center" | "dir" | "div" | "dl" | "fieldset" | "form" | "h1" | "h2" | "h3" |
"h4" | "h5" | "h6" | "hr" | "isindex" | "menu" | "noframes" | "noscript" | "ol" | "p" | "pre" | "table" |
"ul" | "dd" | "dt" | "frameset" | "li" | "tbody" | "td" | "tfoot" | "th" | "thead" | "tr" | "script" |
"ADDRESS" | "BLOCKQUOTE" | "CENTER" | "DIR" | "DIV" | "DL" | "FIELDSET" | "FORM" | "H1" | "H2" | "H3" |
"H4" | "H5" | "H6" | "HR" | "ISINDEX" | "MENU" | "NOFRAMES" | "NOSCRIPT" | "OL" | "P" | "PRE" | "TABLE" |
"UL" | "DD" | "DT" | "FRAMESET" | "LI" | "TBODY" | "TD" | "TFOOT" | "TH" | "THEAD" | "TR" | "SCRIPT"
StyleOpen = '<' Spnl ("style" | "STYLE") Spnl HtmlAttribute* '>'
StyleClose = '<' Spnl '/' ("style" | "STYLE") Spnl '>'
InStyleTags = StyleOpen (!StyleClose .)* StyleClose
StyleBlock = < InStyleTags >
BlankLine*
{ if (extension(EXT_FILTER_STYLES)) {
$$ = mk_list(LIST, NULL);
} else {
$$ = mk_str(yytext);
$$->key = HTMLBLOCK;
}
}
Inlines = a:StartList ( !Endline Inline { a = cons($$, a); }
| c:Endline &Inline { a = cons(c, a); } )+ Endline?
{ $$ = mk_list(LIST, a); }
Inline = Str
| Endline
| UlOrStarLine
| Space
| Strong
| Emph
| Image
| Link
| NoteReference
| InlineNote
| Code
| RawHtml
| Entity
| EscapedChar
| Smart
| Symbol
Space = Spacechar+
{ $$ = mk_str(" ");
$$->key = SPACE; }
Str = a:StartList < NormalChar+ > { a = cons(mk_str(yytext), a); }
( StrChunk { a = cons($$, a); } )*
{ if (a->next == NULL) { $$ = a; } else { $$ = mk_list(LIST, a); } }
StrChunk = < (NormalChar | '_'+ &Alphanumeric)+ > { $$ = mk_str(yytext); } |
AposChunk
AposChunk = &{ extension(EXT_SMART) } '\'' &Alphanumeric
{ $$ = mk_element(APOSTROPHE); }
EscapedChar = '\\' !Newline < [-\\`|*_{}[\]()#+.!><] >
{ $$ = mk_str(yytext); }
Entity = ( HexEntity | DecEntity | CharEntity )
{ $$ = mk_str(yytext); $$->key = HTML; }
Endline = LineBreak | TerminalEndline | NormalEndline
NormalEndline = Sp Newline !BlankLine !'>' !AtxStart
!(Line ('='+ | '-'+) Newline)
{ $$ = mk_str("\n");
$$->key = SPACE; }
TerminalEndline = Sp Newline Eof
{ $$ = NULL; }
LineBreak = " " NormalEndline
{ $$ = mk_element(LINEBREAK); }
Symbol = < SpecialChar >
{ $$ = mk_str(yytext); }
# This keeps the parser from getting bogged down on long strings of '*' or '_',
# or strings of '*' or '_' with space on each side:
UlOrStarLine = (UlLine | StarLine) { $$ = mk_str(yytext); }
StarLine = < "****" '*'* > | < Spacechar '*'+ &Spacechar >
UlLine = < "____" '_'* > | < Spacechar '_'+ &Spacechar >
Emph = EmphStar | EmphUl
Whitespace = Spacechar | Newline
EmphStar = '*' !Whitespace
a:StartList
( !'*' b:Inline { a = cons(b, a); }
| b:StrongStar { a = cons(b, a); }
)+
'*'
{ $$ = mk_list(EMPH, a); }
EmphUl = '_' !Whitespace
a:StartList
( !'_' b:Inline { a = cons(b, a); }
| b:StrongUl { a = cons(b, a); }
)+
'_'
{ $$ = mk_list(EMPH, a); }
Strong = StrongStar | StrongUl
StrongStar = "**" !Whitespace
a:StartList
( !"**" b:Inline { a = cons(b, a); })+
"**"
{ $$ = mk_list(STRONG, a); }
StrongUl = "__" !Whitespace
a:StartList
( !"__" b:Inline { a = cons(b, a); })+
"__"
{ $$ = mk_list(STRONG, a); }
Image = '!' ( ExplicitLink | ReferenceLink )
{ if ($$->key == LINK) {
$$->key = IMAGE;
} else {
element *result;
result = $$;
$$->children = cons(mk_str("!"), result->children);
} }
Link = ExplicitLink | ReferenceLink | AutoLink
ReferenceLink = ReferenceLinkDouble | ReferenceLinkSingle
ReferenceLinkDouble = a:Label < Spnl > !"[]" b:Label
{ link match;
if (find_reference(&match, b->children)) {
$$ = mk_link(a->children, match.url, match.title);
free(a);
free_element_list(b);
} else {
element *result;
result = mk_element(LIST);
result->children = cons(mk_str("["), cons(a, cons(mk_str("]"), cons(mk_str(yytext),
cons(mk_str("["), cons(b, mk_str("]")))))));
$$ = result;
}
}
ReferenceLinkSingle = a:Label < (Spnl "[]")? >
{ link match;
if (find_reference(&match, a->children)) {
$$ = mk_link(a->children, match.url, match.title);
free(a);
}
else {
element *result;
result = mk_element(LIST);
result->children = cons(mk_str("["), cons(a, cons(mk_str("]"), mk_str(yytext))));
$$ = result;
}
}
ExplicitLink = l:Label '(' Sp s:Source Spnl t:Title Sp ')'
{ $$ = mk_link(l->children, s->contents.str, t->contents.str);
free_element(s);
free_element(t);
free(l); }
Source = ( '<' < SourceContents > '>' | < SourceContents > )
{ $$ = mk_str(yytext); }
SourceContents = ( ( !'(' !')' !'>' Nonspacechar )+ | '(' SourceContents ')')*
Title = ( TitleSingle | TitleDouble | < "" > )
{ $$ = mk_str(yytext); }
TitleSingle = '\'' < ( !( '\'' Sp ( ')' | Newline ) ) . )* > '\''
TitleDouble = '"' < ( !( '"' Sp ( ')' | Newline ) ) . )* > '"'
AutoLink = AutoLinkUrl | AutoLinkEmail
AutoLinkUrl = '<' < [A-Za-z]+ "://" ( !Newline !'>' . )+ > '>'
{ $$ = mk_link(mk_str(yytext), yytext, ""); }
AutoLinkEmail = '<' ( "mailto:" )? < [-A-Za-z0-9+_./!%~$]+ '@' ( !Newline !'>' . )+ > '>'
{ char *mailto = malloc(strlen(yytext) + 8);
sprintf(mailto, "mailto:%s", yytext);
$$ = mk_link(mk_str(yytext), mailto, "");
free(mailto);
}
Reference = NonindentSpace !"[]" l:Label ':' Spnl s:RefSrc t:RefTitle BlankLine+
{ $$ = mk_link(l->children, s->contents.str, t->contents.str);
free_element(s);
free_element(t);
free(l);
$$->key = REFERENCE; }
Label = '[' ( !'^' &{ extension(EXT_NOTES) } | &. &{ !extension(EXT_NOTES) } )
a:StartList
( !']' Inline { a = cons($$, a); } )*
']'
{ $$ = mk_list(LIST, a); }
RefSrc = < Nonspacechar+ >
{ $$ = mk_str(yytext);
$$->key = HTML; }
RefTitle = ( RefTitleSingle | RefTitleDouble | RefTitleParens | EmptyTitle )
{ $$ = mk_str(yytext); }
EmptyTitle = < "" >
RefTitleSingle = Spnl '\'' < ( !( '\'' Sp Newline | Newline ) . )* > '\''
RefTitleDouble = Spnl '"' < ( !('"' Sp Newline | Newline) . )* > '"'
RefTitleParens = Spnl '(' < ( !(')' Sp Newline | Newline) . )* > ')'
References = a:StartList
( b:Reference { a = cons(b, a); } | SkipBlock )*
{ references = reverse(a); }
Ticks1 = "`" !'`'
Ticks2 = "``" !'`'
Ticks3 = "```" !'`'
Ticks4 = "````" !'`'
Ticks5 = "`````" !'`'
Code = ( Ticks1 Sp < ( ( !'`' Nonspacechar )+ | !Ticks1 '`'+ | !( Sp Ticks1 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks1
| Ticks2 Sp < ( ( !'`' Nonspacechar )+ | !Ticks2 '`'+ | !( Sp Ticks2 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks2
| Ticks3 Sp < ( ( !'`' Nonspacechar )+ | !Ticks3 '`'+ | !( Sp Ticks3 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks3
| Ticks4 Sp < ( ( !'`' Nonspacechar )+ | !Ticks4 '`'+ | !( Sp Ticks4 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks4
| Ticks5 Sp < ( ( !'`' Nonspacechar )+ | !Ticks5 '`'+ | !( Sp Ticks5 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks5
)
{ $$ = mk_str(yytext); $$->key = CODE; }
RawHtml = < (HtmlComment | HtmlBlockScript | HtmlTag) >
{ if (extension(EXT_FILTER_HTML)) {
$$ = mk_list(LIST, NULL);
} else {
$$ = mk_str(yytext);
$$->key = HTML;
}
}
BlankLine = Sp Newline
Quoted = '"' (!'"' .)* '"' | '\'' (!'\'' .)* '\''
HtmlAttribute = (AlphanumericAscii | '-')+ Spnl ('=' Spnl (Quoted | (!'>' Nonspacechar)+))? Spnl
HtmlComment = "<!--" (!"-->" .)* "-->"
HtmlTag = '<' Spnl '/'? AlphanumericAscii+ Spnl HtmlAttribute* '/'? Spnl '>'
Eof = !.
Spacechar = ' ' | '\t'
Nonspacechar = !Spacechar !Newline .
Newline = '\n' | '\r' '\n'?
Sp = Spacechar*
Spnl = Sp (Newline Sp)?
SpecialChar = '*' | '_' | '`' | '&' | '[' | ']' | '(' | ')' | '<' | '!' | '#' | '\\' | '\'' | '"' | ExtendedSpecialChar
NormalChar = !( SpecialChar | Spacechar | Newline ) .
Alphanumeric = [0-9A-Za-z] | '\200' | '\201' | '\202' | '\203' | '\204' | '\205' | '\206' | '\207' | '\210' | '\211' | '\212' | '\213' | '\214' | '\215' | '\216' | '\217' | '\220' | '\221' | '\222' | '\223' | '\224' | '\225' | '\226' | '\227' | '\230' | '\231' | '\232' | '\233' | '\234' | '\235' | '\236' | '\237' | '\240' | '\241' | '\242' | '\243' | '\244' | '\245' | '\246' | '\247' | '\250' | '\251' | '\252' | '\253' | '\254' | '\255' | '\256' | '\257' | '\260' | '\261' | '\262' | '\263' | '\264' | '\265' | '\266' | '\267' | '\270' | '\271' | '\272' | '\273' | '\274' | '\275' | '\276' | '\277' | '\300' | '\301' | '\302' | '\303' | '\304' | '\305' | '\306' | '\307' | '\310' | '\311' | '\312' | '\313' | '\314' | '\315' | '\316' | '\317' | '\320' | '\321' | '\322' | '\323' | '\324' | '\325' | '\326' | '\327' | '\330' | '\331' | '\332' | '\333' | '\334' | '\335' | '\336' | '\337' | '\340' | '\341' | '\342' | '\343' | '\344' | '\345' | '\346' | '\347' | '\350' | '\351' | '\352' | '\353' | '\354' | '\355' | '\356' | '\357' | '\360' | '\361' | '\362' | '\363' | '\364' | '\365' | '\366' | '\367' | '\370' | '\371' | '\372' | '\373' | '\374' | '\375' | '\376' | '\377'
AlphanumericAscii = [A-Za-z0-9]
Digit = [0-9]
BOM = "\357\273\277"
HexEntity = < '&' '#' [Xx] [0-9a-fA-F]+ ';' >
DecEntity = < '&' '#' [0-9]+ > ';' >
CharEntity = < '&' [A-Za-z0-9]+ ';' >
NonindentSpace = " " | " " | " " | ""
Indent = "\t" | " "
IndentedLine = Indent Line
OptionallyIndentedLine = Indent? Line
# StartList starts a list data structure that can be added to with cons:
StartList = &.
{ $$ = NULL; }
Line = RawLine
{ $$ = mk_str(yytext); }
RawLine = ( < (!'\r' !'\n' .)* Newline > | < .+ > Eof )
SkipBlock = HtmlBlock
| ( !'#' !SetextBottom1 !SetextBottom2 !BlankLine RawLine )+ BlankLine*
| BlankLine+
| RawLine
# Syntax extensions
ExtendedSpecialChar = &{ extension(EXT_SMART) } ('.' | '-' | '\'' | '"')
| &{ extension(EXT_NOTES) } ( '^' )
Smart = &{ extension(EXT_SMART) }
( Ellipsis | Dash | SingleQuoted | DoubleQuoted | Apostrophe )
Apostrophe = '\''
{ $$ = mk_element(APOSTROPHE); }
Ellipsis = ("..." | ". . .")
{ $$ = mk_element(ELLIPSIS); }
Dash = EmDash | EnDash
EnDash = '-' &Digit
{ $$ = mk_element(ENDASH); }
EmDash = ("---" | "--")
{ $$ = mk_element(EMDASH); }
SingleQuoteStart = '\'' !(Spacechar | Newline)
SingleQuoteEnd = '\'' !Alphanumeric
SingleQuoted = SingleQuoteStart
a:StartList
( !SingleQuoteEnd b:Inline { a = cons(b, a); } )+
SingleQuoteEnd
{ $$ = mk_list(SINGLEQUOTED, a); }
DoubleQuoteStart = '"'
DoubleQuoteEnd = '"'
DoubleQuoted = DoubleQuoteStart
a:StartList
( !DoubleQuoteEnd b:Inline { a = cons(b, a); } )+
DoubleQuoteEnd
{ $$ = mk_list(DOUBLEQUOTED, a); }
NoteReference = &{ extension(EXT_NOTES) }
ref:RawNoteReference
{ element *match;
if (find_note(&match, ref->contents.str)) {
$$ = mk_element(NOTE);
assert(match->children != NULL);
$$->children = match->children;
$$->contents.str = 0;
} else {
char *s;
s = malloc(strlen(ref->contents.str) + 4);
sprintf(s, "[^%s]", ref->contents.str);
$$ = mk_str(s);
free(s);
}
}
RawNoteReference = "[^" < ( !Newline !']' . )+ > ']'
{ $$ = mk_str(yytext); }
Note = &{ extension(EXT_NOTES) }
NonindentSpace ref:RawNoteReference ':' Sp
a:StartList
( RawNoteBlock { a = cons($$, a); } )
( &Indent RawNoteBlock { a = cons($$, a); } )*
{ $$ = mk_list(NOTE, a);
$$->contents.str = strdup(ref->contents.str);
}
InlineNote = &{ extension(EXT_NOTES) }
"^["
a:StartList
( !']' Inline { a = cons($$, a); } )+
']'
{ $$ = mk_list(NOTE, a);
$$->contents.str = 0; }
Notes = a:StartList
( b:Note { a = cons(b, a); } | SkipBlock )*
{ notes = reverse(a); }
RawNoteBlock = a:StartList
( !BlankLine OptionallyIndentedLine { a = cons($$, a); } )+
( < BlankLine* > { a = cons(mk_str(yytext), a); } )
{ $$ = mk_str_from_list(a, true);
$$->key = RAW;
}
%%

View File

@ -0,0 +1,72 @@
/* markdown_peg.h */
#ifndef MARKDOWN_PEG_H
#define MARKDOWN_PEG_H
#include "markdown_lib.h"
#include <glib.h>
/* Information (label, URL and title) for a link. */
struct Link {
struct Element *label;
char *url;
char *title;
};
typedef struct Link link;
/* Union for contents of an Element (string, list, or link). */
union Contents {
char *str;
struct Link *link;
};
/* Types of semantic values returned by parsers. */
enum keys { LIST, /* A generic list of values. For ordered and bullet lists, see below. */
RAW, /* Raw markdown to be processed further */
SPACE,
LINEBREAK,
ELLIPSIS,
EMDASH,
ENDASH,
APOSTROPHE,
SINGLEQUOTED,
DOUBLEQUOTED,
STR,
LINK,
IMAGE,
CODE,
HTML,
EMPH,
STRONG,
PLAIN,
PARA,
LISTITEM,
BULLETLIST,
ORDEREDLIST,
H1, H2, H3, H4, H5, H6, /* Code assumes that these are in order. */
BLOCKQUOTE,
VERBATIM,
HTMLBLOCK,
HRULE,
REFERENCE,
NOTE
};
/* Semantic value of a parsing action. */
struct Element {
int key;
union Contents contents;
struct Element *children;
struct Element *next;
};
typedef struct Element element;
element * parse_references(char *string, int extensions);
element * parse_notes(char *string, int extensions, element *reference_list);
element * parse_markdown(char *string, int extensions, element *reference_list, element *note_list);
void free_element_list(element * elt);
void free_element(element *elt);
void print_element_list(GString *out, element *elt, int format, int exts);
#endif

View File

@ -0,0 +1,181 @@
/**********************************************************************
odf.c - Utility routines to enable ODF support in peg-multimarkdown.
(c) 2011 Fletcher T. Penney (http://fletcherpenney.net/).
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License or the MIT
license. See LICENSE for details.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
***********************************************************************/
#include "odf.h"
void print_odf_header(GString *out){
/* Insert required XML header */
g_string_append_printf(out,
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" \
"<office:document xmlns:office=\"urn:oasis:names:tc:opendocument:xmlns:office:1.0\"\n" \
" xmlns:style=\"urn:oasis:names:tc:opendocument:xmlns:style:1.0\"\n" \
" xmlns:text=\"urn:oasis:names:tc:opendocument:xmlns:text:1.0\"\n" \
" xmlns:table=\"urn:oasis:names:tc:opendocument:xmlns:table:1.0\"\n" \
" xmlns:draw=\"urn:oasis:names:tc:opendocument:xmlns:drawing:1.0\"\n" \
" xmlns:fo=\"urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0\"\n" \
" xmlns:xlink=\"http://www.w3.org/1999/xlink\"\n" \
" xmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n" \
" xmlns:meta=\"urn:oasis:names:tc:opendocument:xmlns:meta:1.0\"\n" \
" xmlns:number=\"urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0\"\n" \
" xmlns:svg=\"urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0\"\n" \
" xmlns:chart=\"urn:oasis:names:tc:opendocument:xmlns:chart:1.0\"\n" \
" xmlns:dr3d=\"urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0\"\n" \
" xmlns:math=\"http://www.w3.org/1998/Math/MathML\"\n" \
" xmlns:form=\"urn:oasis:names:tc:opendocument:xmlns:form:1.0\"\n" \
" xmlns:script=\"urn:oasis:names:tc:opendocument:xmlns:script:1.0\"\n" \
" xmlns:config=\"urn:oasis:names:tc:opendocument:xmlns:config:1.0\"\n" \
" xmlns:ooo=\"http://openoffice.org/2004/office\"\n" \
" xmlns:ooow=\"http://openoffice.org/2004/writer\"\n" \
" xmlns:oooc=\"http://openoffice.org/2004/calc\"\n" \
" xmlns:dom=\"http://www.w3.org/2001/xml-events\"\n" \
" xmlns:xforms=\"http://www.w3.org/2002/xforms\"\n" \
" xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\"\n" \
" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n" \
" xmlns:rpt=\"http://openoffice.org/2005/report\"\n" \
" xmlns:of=\"urn:oasis:names:tc:opendocument:xmlns:of:1.2\"\n" \
" xmlns:xhtml=\"http://www.w3.org/1999/xhtml\"\n" \
" xmlns:grddl=\"http://www.w3.org/2003/g/data-view#\"\n" \
" xmlns:tableooo=\"http://openoffice.org/2009/table\"\n" \
" xmlns:field=\"urn:openoffice:names:experimental:ooo-ms-interop:xmlns:field:1.0\"\n" \
" xmlns:formx=\"urn:openoffice:names:experimental:ooxml-odf-interop:xmlns:form:1.0\"\n" \
" xmlns:css3t=\"http://www.w3.org/TR/css3-text/\"\n" \
" office:version=\"1.2\"\n" \
" grddl:transformation=\"http://docs.oasis-open.org/office/1.2/xslt/odf2rdf.xsl\"\n" \
" office:mimetype=\"application/vnd.oasis.opendocument.text\">\n");
/* Font Declarations */
g_string_append_printf(out, "<office:font-face-decls>\n" \
" <style:font-face style:name=\"Courier New\" svg:font-family=\"'Courier New'\"\n" \
" style:font-adornments=\"Regular\"\n" \
" style:font-family-generic=\"modern\"\n" \
" style:font-pitch=\"fixed\"/>\n" \
"</office:font-face-decls>\n");
/* Append basic style information */
g_string_append_printf(out, "<office:styles>\n" \
"<style:style style:name=\"Standard\" style:family=\"paragraph\" style:class=\"text\">\n" \
" <style:paragraph-properties fo:margin-top=\"0in\" fo:margin-bottom=\"0.15in\"" \
" fo:text-align=\"justify\" style:justify-single-word=\"false\"/>\n" \
" </style:style>\n" \
"<style:style style:name=\"Preformatted_20_Text\" style:display-name=\"Preformatted Text\"\n" \
" style:family=\"paragraph\"\n" \
" style:parent-style-name=\"Standard\"\n" \
" style:class=\"html\">\n" \
" <style:paragraph-properties fo:margin-top=\"0in\" fo:margin-bottom=\"0in\" fo:text-align=\"start\"\n" \
" style:justify-single-word=\"false\"/>\n" \
" <style:text-properties style:font-name=\"Courier New\" fo:font-size=\"11pt\"\n" \
" style:font-name-asian=\"Courier New\"\n" \
" style:font-size-asian=\"11pt\"\n" \
" style:font-name-complex=\"Courier New\"\n" \
" style:font-size-complex=\"11pt\"/>\n" \
"</style:style>\n" \
"<style:style style:name=\"Source_20_Text\" style:display-name=\"Source Text\"\n" \
" style:family=\"text\">\n" \
" <style:text-properties style:font-name=\"Courier New\" style:font-name-asian=\"Courier New\"\n" \
" style:font-name-complex=\"Courier New\"\n" \
" fo:font-size=\"11pt\"/>\n" \
"</style:style>\n" \
"<style:style style:name=\"List\" style:family=\"paragraph\"\n" \
" style:parent-style-name=\"Standard\"\n" \
" style:class=\"list\">\n" \
" <style:paragraph-properties fo:text-align=\"start\" style:justify-single-word=\"false\"/>\n" \
" <style:text-properties style:font-size-asian=\"12pt\"/>\n" \
"</style:style>\n" \
"<style:style style:name=\"Quotations\" style:family=\"paragraph\"\n" \
" style:parent-style-name=\"Standard\"\n" \
" style:class=\"html\">\n" \
" <style:paragraph-properties fo:margin-left=\"0.3937in\" fo:margin-right=\"0.3937in\" fo:margin-top=\"0in\"\n" \
" fo:margin-bottom=\"0.1965in\"\n" \
" fo:text-align=\"justify\"" \
" style:justify-single-word=\"false\"" \
" fo:text-indent=\"0in\"\n" \
" style:auto-text-indent=\"false\"/>\n" \
"</style:style>\n" \
"<style:style style:name=\"Table_20_Heading\" style:display-name=\"Table Heading\"\n" \
" style:family=\"paragraph\"\n" \
" style:parent-style-name=\"Table_20_Contents\"\n" \
" style:class=\"extra\">\n" \
" <style:paragraph-properties fo:text-align=\"center\" style:justify-single-word=\"false\"\n" \
" text:number-lines=\"false\"\n" \
" text:line-number=\"0\"/>\n" \
" <style:text-properties fo:font-weight=\"bold\" style:font-weight-asian=\"bold\"\n" \
" style:font-weight-complex=\"bold\"/>\n" \
"</style:style>\n" \
"<style:style style:name=\"Horizontal_20_Line\" style:display-name=\"Horizontal Line\"\n" \
" style:family=\"paragraph\"\n" \
" style:parent-style-name=\"Standard\"\n" \
" style:class=\"html\">\n" \
" <style:paragraph-properties fo:margin-top=\"0in\" fo:margin-bottom=\"0.1965in\"\n" \
" style:border-line-width-bottom=\"0.0008in 0.0138in 0.0008in\"\n" \
" fo:padding=\"0in\"\n" \
" fo:border-left=\"none\"\n" \
" fo:border-right=\"none\"\n" \
" fo:border-top=\"none\"\n" \
" fo:border-bottom=\"0.0154in double #808080\"\n" \
" text:number-lines=\"false\"\n" \
" text:line-number=\"0\"\n" \
" style:join-border=\"false\"/>\n" \
" <style:text-properties fo:font-size=\"6pt\" style:font-size-asian=\"6pt\" style:font-size-complex=\"6pt\"/>\n" \
"</style:style>\n" \
"</office:styles>\n");
/* Automatic style information */
g_string_append_printf(out, "<office:automatic-styles>" \
" <style:style style:name=\"MMD-Italic\" style:family=\"text\">\n" \
" <style:text-properties fo:font-style=\"italic\" style:font-style-asian=\"italic\"\n" \
" style:font-style-complex=\"italic\"/>\n" \
" </style:style>\n" \
" <style:style style:name=\"MMD-Bold\" style:family=\"text\">\n" \
" <style:text-properties fo:font-weight=\"bold\" style:font-weight-asian=\"bold\"\n" \
" style:font-weight-complex=\"bold\"/>\n" \
" </style:style>\n" \
"<style:style style:name=\"MMD-Table\" style:family=\"paragraph\" style:parent-style-name=\"Standard\">\n" \
" <style:paragraph-properties fo:margin-top=\"0in\" fo:margin-bottom=\"0.05in\"/>\n" \
"</style:style>\n" \
"<style:style style:name=\"MMD-Table-Center\" style:family=\"paragraph\" style:parent-style-name=\"MMD-Table\">\n" \
" <style:paragraph-properties fo:text-align=\"center\" style:justify-single-word=\"false\"/>\n" \
"</style:style>\n" \
"<style:style style:name=\"MMD-Table-Right\" style:family=\"paragraph\" style:parent-style-name=\"MMD-Table\">\n" \
" <style:paragraph-properties fo:text-align=\"right\" style:justify-single-word=\"false\"/>\n" \
"</style:style>\n" \
"<style:style style:name=\"P2\" style:family=\"paragraph\" style:parent-style-name=\"Standard\"\n" \
" style:list-style-name=\"L2\">\n" \
"<style:paragraph-properties fo:text-align=\"start\" style:justify-single-word=\"false\"/>\n" \
"</style:style>\n" \
"<style:style style:name=\"fr1\" style:family=\"graphic\" style:parent-style-name=\"Frame\">\n" \
" <style:graphic-properties style:print-content=\"false\" style:vertical-pos=\"top\"\n" \
" style:vertical-rel=\"baseline\"\n" \
" fo:padding=\"0in\"\n" \
" fo:border=\"none\"\n" \
" style:shadow=\"none\"/>\n" \
"</style:style>\n" \
"</office:automatic-styles>\n" \
"<style:style style:name=\"P1\" style:family=\"paragraph\" style:parent-style-name=\"Standard\"\n" \
" style:list-style-name=\"L1\"/>\n" \
"<text:list-style style:name=\"L1\">\n" \
" <text:list-level-style-bullet />\n" \
"</text:list-style>\n" \
"<text:list-style style:name=\"L2\">\n" \
" <text:list-level-style-number />\n" \
"</text:list-style>\n");
}
void print_odf_footer(GString *out) {
g_string_append_printf(out, "</office:text>\n</office:body>\n</office:document>");
}

View File

@ -0,0 +1,11 @@
#ifndef ODF_H
#define ODF_H
#include <stdlib.h>
#include <stdio.h>
#include <glib.h>
void print_odf_header(GString *out);
void print_odf_footer(GString *out);
#endif

View File

@ -0,0 +1,117 @@
/* parsing_functions.c - Functions for parsing markdown and
* freeing element lists. */
/* These yy_* functions come from markdown_parser.c which is
* generated from markdown_parser.leg
* */
typedef int (*yyrule)();
extern int yyparse();
extern int yyparsefrom(yyrule);
extern int yy_References();
extern int yy_Notes();
extern int yy_Doc();
#include "utility_functions.h"
#include "parsing_functions.h"
#include "markdown_peg.h"
static void free_element_contents(element elt);
/* free_element_list - free list of elements recursively */
void free_element_list(element * elt) {
element * next = NULL;
while (elt != NULL) {
next = elt->next;
free_element_contents(*elt);
if (elt->children != NULL) {
free_element_list(elt->children);
elt->children = NULL;
}
free(elt);
elt = next;
}
}
/* free_element_contents - free element contents depending on type */
static void free_element_contents(element elt) {
switch (elt.key) {
case STR:
case SPACE:
case RAW:
case HTMLBLOCK:
case HTML:
case VERBATIM:
case CODE:
case NOTE:
free(elt.contents.str);
elt.contents.str = NULL;
break;
case LINK:
case IMAGE:
case REFERENCE:
free(elt.contents.link->url);
elt.contents.link->url = NULL;
free(elt.contents.link->title);
elt.contents.link->title = NULL;
free_element_list(elt.contents.link->label);
free(elt.contents.link);
elt.contents.link = NULL;
break;
default:
;
}
}
/* free_element - free element and contents */
void free_element(element *elt) {
free_element_contents(*elt);
free(elt);
}
element * parse_references(char *string, int extensions) {
char *oldcharbuf;
syntax_extensions = extensions;
oldcharbuf = charbuf;
charbuf = string;
yyparsefrom(yy_References); /* first pass, just to collect references */
charbuf = oldcharbuf;
return references;
}
element * parse_notes(char *string, int extensions, element *reference_list) {
char *oldcharbuf;
notes = NULL;
syntax_extensions = extensions;
if (extension(EXT_NOTES)) {
references = reference_list;
oldcharbuf = charbuf;
charbuf = string;
yyparsefrom(yy_Notes); /* second pass for notes */
charbuf = oldcharbuf;
}
return notes;
}
element * parse_markdown(char *string, int extensions, element *reference_list, element *note_list) {
char *oldcharbuf;
syntax_extensions = extensions;
references = reference_list;
notes = note_list;
oldcharbuf = charbuf;
charbuf = string;
yyparsefrom(yy_Doc);
charbuf = oldcharbuf; /* restore charbuf to original value */
return parse_result;
}

View File

@ -0,0 +1,17 @@
#ifndef PARSING_FUNCTIONS_H
#define PARSING_FUNCTIONS_H
/* parsing_functions.c - Functions for parsing markdown and
* freeing element lists. */
#include "markdown_peg.h"
/* free_element_list - free list of elements recursively */
void free_element_list(element * elt);
/* free_element - free element and contents */
void free_element(element *elt);
element * parse_references(char *string, int extensions);
element * parse_notes(char *string, int extensions, element *reference_list);
element * parse_markdown(char *string, int extensions, element *reference_list, element *note_list);
#endif

View File

@ -0,0 +1,65 @@
CFLAGS = -g -Wall $(OFLAGS) $(XFLAGS)
OFLAGS = -O3 -DNDEBUG
#OFLAGS = -pg
OBJS = tree.o compile.o
all : peg leg
peg : peg.o $(OBJS)
$(CC) $(CFLAGS) -o $@-new peg.o $(OBJS)
mv $@-new $@
leg : leg.o $(OBJS)
$(CC) $(CFLAGS) -o $@-new leg.o $(OBJS)
mv $@-new $@
ROOT =
PREFIX = /usr/local
BINDIR = $(ROOT)$(PREFIX)/bin
install : $(BINDIR)/peg $(BINDIR)/leg
$(BINDIR)/% : %
cp -p $< $@
strip $@
uninstall : .FORCE
rm -f $(BINDIR)/peg
rm -f $(BINDIR)/leg
peg.o : peg.c peg.peg-c
%.peg-c : %.peg compile.c
./peg -o $@ $<
leg.o : leg.c
leg.c : leg.leg compile.c
./leg -o $@ $<
check : check-peg check-leg
check-peg : peg .FORCE
./peg < peg.peg > peg.out
diff peg.peg-c peg.out
rm peg.out
check-leg : leg .FORCE
./leg < leg.leg > leg.out
diff leg.c leg.out
rm leg.out
test examples : .FORCE
$(SHELL) -ec '(cd examples; $(MAKE))'
clean : .FORCE
rm -f *~ *.o *.peg.[cd] *.leg.[cd]
$(SHELL) -ec '(cd examples; $(MAKE) $@)'
spotless : clean .FORCE
rm -f peg
rm -f leg
$(SHELL) -ec '(cd examples; $(MAKE) $@)'
.FORCE :

View File

@ -0,0 +1,717 @@
/* Copyright (c) 2007, 2012 by Ian Piumarta
* All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the 'Software'),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, provided that the above copyright notice(s) and this
* permission notice appear in all copies of the Software. Acknowledgement
* of the use of this Software in supporting documentation would be
* appreciated but is not required.
*
* THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK.
*
* Last edited: 2012-04-29 16:09:36 by piumarta on emilia
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "version.h"
#include "tree.h"
static int yyl(void)
{
static int prev= 0;
return ++prev;
}
static void charClassSet (unsigned char bits[], int c) { bits[c >> 3] |= (1 << (c & 7)); }
static void charClassClear(unsigned char bits[], int c) { bits[c >> 3] &= ~(1 << (c & 7)); }
typedef void (*setter)(unsigned char bits[], int c);
static inline int oigit(int c) { return '0' <= c && c <= '7'; }
static int cnext(unsigned char **ccp)
{
unsigned char *cclass= *ccp;
int c= *cclass++;
if (c)
{
if ('\\' == c && *cclass)
{
switch (c= *cclass++)
{
case 'a': c= '\a'; break; /* bel */
case 'b': c= '\b'; break; /* bs */
case 'e': c= '\e'; break; /* esc */
case 'f': c= '\f'; break; /* ff */
case 'n': c= '\n'; break; /* nl */
case 'r': c= '\r'; break; /* cr */
case 't': c= '\t'; break; /* ht */
case 'v': c= '\v'; break; /* vt */
default:
if (oigit(c))
{
c -= '0';
if (oigit(*cclass)) c= (c << 3) + *cclass++ - '0';
if (oigit(*cclass)) c= (c << 3) + *cclass++ - '0';
}
break;
}
}
*ccp= cclass;
}
return c;
}
static char *makeCharClass(unsigned char *cclass)
{
unsigned char bits[32];
setter set;
int c, prev= -1;
static char string[256];
char *ptr;
if ('^' == *cclass)
{
memset(bits, 255, 32);
set= charClassClear;
++cclass;
}
else
{
memset(bits, 0, 32);
set= charClassSet;
}
while (*cclass)
{
if ('-' == *cclass && cclass[1] && prev >= 0)
{
++cclass;
for (c= cnext(&cclass); prev <= c; ++prev)
set(bits, prev);
prev= -1;
}
else
{
c= cnext(&cclass);
set(bits, prev= c);
}
}
ptr= string;
for (c= 0; c < 32; ++c)
ptr += sprintf(ptr, "\\%03o", bits[c]);
return string;
}
static void begin(void) { fprintf(output, "\n {"); }
static void end(void) { fprintf(output, "\n }"); }
static void label(int n) { fprintf(output, "\n l%d:;\t", n); }
static void jump(int n) { fprintf(output, " goto l%d;", n); }
static void save(int n) { fprintf(output, " int yypos%d= ctx->pos, yythunkpos%d= ctx->thunkpos;", n, n); }
static void restore(int n) { fprintf(output, " ctx->pos= yypos%d; ctx->thunkpos= yythunkpos%d;", n, n); }
static void Node_compile_c_ko(Node *node, int ko)
{
assert(node);
switch (node->type)
{
case Rule:
fprintf(stderr, "\ninternal error #1 (%s)\n", node->rule.name);
exit(1);
break;
case Dot:
fprintf(output, " if (!yymatchDot(ctx)) goto l%d;", ko);
break;
case Name:
fprintf(output, " if (!yy_%s(ctx)) goto l%d;", node->name.rule->rule.name, ko);
if (node->name.variable)
fprintf(output, " yyDo(ctx, yySet, %d, 0);", node->name.variable->variable.offset);
break;
case Character:
case String:
{
int len= strlen(node->string.value);
if (1 == len)
{
if ('\'' == node->string.value[0])
fprintf(output, " if (!yymatchChar(ctx, '\\'')) goto l%d;", ko);
else
fprintf(output, " if (!yymatchChar(ctx, '%s')) goto l%d;", node->string.value, ko);
}
else
if (2 == len && '\\' == node->string.value[0])
fprintf(output, " if (!yymatchChar(ctx, '%s')) goto l%d;", node->string.value, ko);
else
fprintf(output, " if (!yymatchString(ctx, \"%s\")) goto l%d;", node->string.value, ko);
}
break;
case Class:
fprintf(output, " if (!yymatchClass(ctx, (unsigned char *)\"%s\")) goto l%d;", makeCharClass(node->cclass.value), ko);
break;
case Action:
fprintf(output, " yyDo(ctx, yy%s, ctx->begin, ctx->end);", node->action.name);
break;
case Predicate:
fprintf(output, " yyText(ctx, ctx->begin, ctx->end); if (!(%s)) goto l%d;", node->action.text, ko);
break;
case Alternate:
{
int ok= yyl();
begin();
save(ok);
for (node= node->alternate.first; node; node= node->alternate.next)
if (node->alternate.next)
{
int next= yyl();
Node_compile_c_ko(node, next);
jump(ok);
label(next);
restore(ok);
}
else
Node_compile_c_ko(node, ko);
end();
label(ok);
}
break;
case Sequence:
for (node= node->sequence.first; node; node= node->sequence.next)
Node_compile_c_ko(node, ko);
break;
case PeekFor:
{
int ok= yyl();
begin();
save(ok);
Node_compile_c_ko(node->peekFor.element, ko);
restore(ok);
end();
}
break;
case PeekNot:
{
int ok= yyl();
begin();
save(ok);
Node_compile_c_ko(node->peekFor.element, ok);
jump(ko);
label(ok);
restore(ok);
end();
}
break;
case Query:
{
int qko= yyl(), qok= yyl();
begin();
save(qko);
Node_compile_c_ko(node->query.element, qko);
jump(qok);
label(qko);
restore(qko);
end();
label(qok);
}
break;
case Star:
{
int again= yyl(), out= yyl();
label(again);
begin();
save(out);
Node_compile_c_ko(node->star.element, out);
jump(again);
label(out);
restore(out);
end();
}
break;
case Plus:
{
int again= yyl(), out= yyl();
Node_compile_c_ko(node->plus.element, ko);
label(again);
begin();
save(out);
Node_compile_c_ko(node->plus.element, out);
jump(again);
label(out);
restore(out);
end();
}
break;
default:
fprintf(stderr, "\nNode_compile_c_ko: illegal node type %d\n", node->type);
exit(1);
}
}
static int countVariables(Node *node)
{
int count= 0;
while (node)
{
++count;
node= node->variable.next;
}
return count;
}
static void defineVariables(Node *node)
{
int count= 0;
while (node)
{
fprintf(output, "#define %s ctx->val[%d]\n", node->variable.name, --count);
node->variable.offset= count;
node= node->variable.next;
}
fprintf(output, "#define yy ctx->yy\n");
fprintf(output, "#define yypos ctx->pos\n");
fprintf(output, "#define yythunkpos ctx->thunkpos\n");
}
static void undefineVariables(Node *node)
{
fprintf(output, "#undef yythunkpos\n");
fprintf(output, "#undef yypos\n");
fprintf(output, "#undef yy\n");
while (node)
{
fprintf(output, "#undef %s\n", node->variable.name);
node= node->variable.next;
}
}
static void Rule_compile_c2(Node *node)
{
assert(node);
assert(Rule == node->type);
if (!node->rule.expression)
fprintf(stderr, "rule '%s' used but not defined\n", node->rule.name);
else
{
int ko= yyl(), safe;
if ((!(RuleUsed & node->rule.flags)) && (node != start))
fprintf(stderr, "rule '%s' defined but not used\n", node->rule.name);
safe= ((Query == node->rule.expression->type) || (Star == node->rule.expression->type));
fprintf(output, "\nYY_RULE(int) yy_%s(yycontext *ctx)\n{", node->rule.name);
if (!safe) save(0);
if (node->rule.variables)
fprintf(output, " yyDo(ctx, yyPush, %d, 0);", countVariables(node->rule.variables));
fprintf(output, "\n yyprintf((stderr, \"%%s\\n\", \"%s\"));", node->rule.name);
Node_compile_c_ko(node->rule.expression, ko);
fprintf(output, "\n yyprintf((stderr, \" ok %%s @ %%s\\n\", \"%s\", ctx->buf+ctx->pos));", node->rule.name);
if (node->rule.variables)
fprintf(output, " yyDo(ctx, yyPop, %d, 0);", countVariables(node->rule.variables));
fprintf(output, "\n return 1;");
if (!safe)
{
label(ko);
restore(0);
fprintf(output, "\n yyprintf((stderr, \" fail %%s @ %%s\\n\", \"%s\", ctx->buf+ctx->pos));", node->rule.name);
fprintf(output, "\n return 0;");
}
fprintf(output, "\n}");
}
if (node->rule.next)
Rule_compile_c2(node->rule.next);
}
static char *header= "\
#include <stdio.h>\n\
#include <stdlib.h>\n\
#include <string.h>\n\
";
static char *preamble= "\
#ifndef YY_LOCAL\n\
#define YY_LOCAL(T) static T\n\
#endif\n\
#ifndef YY_ACTION\n\
#define YY_ACTION(T) static T\n\
#endif\n\
#ifndef YY_RULE\n\
#define YY_RULE(T) static T\n\
#endif\n\
#ifndef YY_PARSE\n\
#define YY_PARSE(T) T\n\
#endif\n\
#ifndef YYPARSE\n\
#define YYPARSE yyparse\n\
#endif\n\
#ifndef YYPARSEFROM\n\
#define YYPARSEFROM yyparsefrom\n\
#endif\n\
#ifndef YY_INPUT\n\
#define YY_INPUT(buf, result, max_size) \\\n\
{ \\\n\
int yyc= getchar(); \\\n\
result= (EOF == yyc) ? 0 : (*(buf)= yyc, 1); \\\n\
yyprintf((stderr, \"<%c>\", yyc)); \\\n\
}\n\
#endif\n\
#ifndef YY_BEGIN\n\
#define YY_BEGIN ( ctx->begin= ctx->pos, 1)\n\
#endif\n\
#ifndef YY_END\n\
#define YY_END ( ctx->end= ctx->pos, 1)\n\
#endif\n\
#ifdef YY_DEBUG\n\
# define yyprintf(args) fprintf args\n\
#else\n\
# define yyprintf(args)\n\
#endif\n\
#ifndef YYSTYPE\n\
#define YYSTYPE int\n\
#endif\n\
\n\
#ifndef YY_PART\n\
\n\
typedef struct _yycontext yycontext;\n\
typedef void (*yyaction)(yycontext *ctx, char *yytext, int yyleng);\n\
typedef struct _yythunk { int begin, end; yyaction action; struct _yythunk *next; } yythunk;\n\
\n\
struct _yycontext {\n\
char *buf;\n\
int buflen;\n\
int pos;\n\
int limit;\n\
char *text;\n\
int textlen;\n\
int begin;\n\
int end;\n\
int textmax;\n\
yythunk *thunks;\n\
int thunkslen;\n\
int thunkpos;\n\
YYSTYPE yy;\n\
YYSTYPE *val;\n\
YYSTYPE *vals;\n\
int valslen;\n\
#ifdef YY_CTX_MEMBERS\n\
YY_CTX_MEMBERS\n\
#endif\n\
};\n\
\n\
#ifdef YY_CTX_LOCAL\n\
#define YY_CTX_PARAM_ yycontext *yyctx,\n\
#define YY_CTX_PARAM yycontext *yyctx\n\
#define YY_CTX_ARG_ yyctx,\n\
#define YY_CTX_ARG yyctx\n\
#else\n\
#define YY_CTX_PARAM_\n\
#define YY_CTX_PARAM\n\
#define YY_CTX_ARG_\n\
#define YY_CTX_ARG\n\
yycontext yyctx0;\n\
yycontext *yyctx= &yyctx0;\n\
#endif\n\
\n\
YY_LOCAL(int) yyrefill(yycontext *ctx)\n\
{\n\
int yyn;\n\
while (ctx->buflen - ctx->pos < 512)\n\
{\n\
ctx->buflen *= 2;\n\
ctx->buf= (char *)realloc(ctx->buf, ctx->buflen);\n\
}\n\
YY_INPUT((ctx->buf + ctx->pos), yyn, (ctx->buflen - ctx->pos));\n\
if (!yyn) return 0;\n\
ctx->limit += yyn;\n\
return 1;\n\
}\n\
\n\
YY_LOCAL(int) yymatchDot(yycontext *ctx)\n\
{\n\
if (ctx->pos >= ctx->limit && !yyrefill(ctx)) return 0;\n\
++ctx->pos;\n\
return 1;\n\
}\n\
\n\
YY_LOCAL(int) yymatchChar(yycontext *ctx, int c)\n\
{\n\
if (ctx->pos >= ctx->limit && !yyrefill(ctx)) return 0;\n\
if ((unsigned char)ctx->buf[ctx->pos] == c)\n\
{\n\
++ctx->pos;\n\
yyprintf((stderr, \" ok yymatchChar(ctx, %c) @ %s\\n\", c, ctx->buf+ctx->pos));\n\
return 1;\n\
}\n\
yyprintf((stderr, \" fail yymatchChar(ctx, %c) @ %s\\n\", c, ctx->buf+ctx->pos));\n\
return 0;\n\
}\n\
\n\
YY_LOCAL(int) yymatchString(yycontext *ctx, char *s)\n\
{\n\
int yysav= ctx->pos;\n\
while (*s)\n\
{\n\
if (ctx->pos >= ctx->limit && !yyrefill(ctx)) return 0;\n\
if (ctx->buf[ctx->pos] != *s)\n\
{\n\
ctx->pos= yysav;\n\
return 0;\n\
}\n\
++s;\n\
++ctx->pos;\n\
}\n\
return 1;\n\
}\n\
\n\
YY_LOCAL(int) yymatchClass(yycontext *ctx, unsigned char *bits)\n\
{\n\
int c;\n\
if (ctx->pos >= ctx->limit && !yyrefill(ctx)) return 0;\n\
c= (unsigned char)ctx->buf[ctx->pos];\n\
if (bits[c >> 3] & (1 << (c & 7)))\n\
{\n\
++ctx->pos;\n\
yyprintf((stderr, \" ok yymatchClass @ %s\\n\", ctx->buf+ctx->pos));\n\
return 1;\n\
}\n\
yyprintf((stderr, \" fail yymatchClass @ %s\\n\", ctx->buf+ctx->pos));\n\
return 0;\n\
}\n\
\n\
YY_LOCAL(void) yyDo(yycontext *ctx, yyaction action, int begin, int end)\n\
{\n\
while (ctx->thunkpos >= ctx->thunkslen)\n\
{\n\
ctx->thunkslen *= 2;\n\
ctx->thunks= (yythunk *)realloc(ctx->thunks, sizeof(yythunk) * ctx->thunkslen);\n\
}\n\
ctx->thunks[ctx->thunkpos].begin= begin;\n\
ctx->thunks[ctx->thunkpos].end= end;\n\
ctx->thunks[ctx->thunkpos].action= action;\n\
++ctx->thunkpos;\n\
}\n\
\n\
YY_LOCAL(int) yyText(yycontext *ctx, int begin, int end)\n\
{\n\
int yyleng= end - begin;\n\
if (yyleng <= 0)\n\
yyleng= 0;\n\
else\n\
{\n\
while (ctx->textlen < (yyleng + 1))\n\
{\n\
ctx->textlen *= 2;\n\
ctx->text= (char *)realloc(ctx->text, ctx->textlen);\n\
}\n\
memcpy(ctx->text, ctx->buf + begin, yyleng);\n\
}\n\
ctx->text[yyleng]= '\\0';\n\
return yyleng;\n\
}\n\
\n\
YY_LOCAL(void) yyDone(yycontext *ctx)\n\
{\n\
int pos;\n\
for (pos= 0; pos < ctx->thunkpos; ++pos)\n\
{\n\
yythunk *thunk= &ctx->thunks[pos];\n\
int yyleng= thunk->end ? yyText(ctx, thunk->begin, thunk->end) : thunk->begin;\n\
yyprintf((stderr, \"DO [%d] %p %s\\n\", pos, thunk->action, ctx->text));\n\
thunk->action(ctx, ctx->text, yyleng);\n\
}\n\
ctx->thunkpos= 0;\n\
}\n\
\n\
YY_LOCAL(void) yyCommit(yycontext *ctx)\n\
{\n\
if ((ctx->limit -= ctx->pos))\n\
{\n\
memmove(ctx->buf, ctx->buf + ctx->pos, ctx->limit);\n\
}\n\
ctx->begin -= ctx->pos;\n\
ctx->end -= ctx->pos;\n\
ctx->pos= ctx->thunkpos= 0;\n\
}\n\
\n\
YY_LOCAL(int) yyAccept(yycontext *ctx, int tp0)\n\
{\n\
if (tp0)\n\
{\n\
fprintf(stderr, \"accept denied at %d\\n\", tp0);\n\
return 0;\n\
}\n\
else\n\
{\n\
yyDone(ctx);\n\
yyCommit(ctx);\n\
}\n\
return 1;\n\
}\n\
\n\
YY_LOCAL(void) yyPush(yycontext *ctx, char *text, int count) { ctx->val += count; }\n\
YY_LOCAL(void) yyPop(yycontext *ctx, char *text, int count) { ctx->val -= count; }\n\
YY_LOCAL(void) yySet(yycontext *ctx, char *text, int count) { ctx->val[count]= ctx->yy; }\n\
\n\
#endif /* YY_PART */\n\
\n\
#define YYACCEPT yyAccept(ctx, yythunkpos0)\n\
\n\
";
static char *footer= "\n\
\n\
#ifndef YY_PART\n\
\n\
typedef int (*yyrule)(yycontext *ctx);\n\
\n\
YY_PARSE(int) YYPARSEFROM(YY_CTX_PARAM_ yyrule yystart)\n\
{\n\
int yyok;\n\
if (!yyctx->buflen)\n\
{\n\
yyctx->buflen= 1024;\n\
yyctx->buf= (char *)malloc(yyctx->buflen);\n\
yyctx->textlen= 1024;\n\
yyctx->text= (char *)malloc(yyctx->textlen);\n\
yyctx->thunkslen= 32;\n\
yyctx->thunks= (yythunk *)malloc(sizeof(yythunk) * yyctx->thunkslen);\n\
yyctx->valslen= 32;\n\
yyctx->vals= (YYSTYPE *)malloc(sizeof(YYSTYPE) * yyctx->valslen);\n\
yyctx->begin= yyctx->end= yyctx->pos= yyctx->limit= yyctx->thunkpos= 0;\n\
}\n\
yyctx->begin= yyctx->end= yyctx->pos;\n\
yyctx->thunkpos= 0;\n\
yyctx->val= yyctx->vals;\n\
yyok= yystart(yyctx);\n\
if (yyok) yyDone(yyctx);\n\
yyCommit(yyctx);\n\
return yyok;\n\
}\n\
\n\
YY_PARSE(int) YYPARSE(YY_CTX_PARAM)\n\
{\n\
return YYPARSEFROM(YY_CTX_ARG_ yy_%s);\n\
}\n\
\n\
#endif\n\
";
void Rule_compile_c_header(void)
{
fprintf(output, "/* A recursive-descent parser generated by peg %d.%d.%d */\n", PEG_MAJOR, PEG_MINOR, PEG_LEVEL);
fprintf(output, "\n");
fprintf(output, "%s", header);
fprintf(output, "#define YYRULECOUNT %d\n", ruleCount);
}
int consumesInput(Node *node)
{
if (!node) return 0;
switch (node->type)
{
case Rule:
{
int result= 0;
if (RuleReached & node->rule.flags)
fprintf(stderr, "possible infinite left recursion in rule '%s'\n", node->rule.name);
else
{
node->rule.flags |= RuleReached;
result= consumesInput(node->rule.expression);
node->rule.flags &= ~RuleReached;
}
return result;
}
break;
case Dot: return 1;
case Name: return consumesInput(node->name.rule);
case Character:
case String: return strlen(node->string.value) > 0;
case Class: return 1;
case Action: return 0;
case Predicate: return 0;
case Alternate:
{
Node *n;
for (n= node->alternate.first; n; n= n->alternate.next)
if (!consumesInput(n))
return 0;
}
return 1;
case Sequence:
{
Node *n;
for (n= node->alternate.first; n; n= n->alternate.next)
if (consumesInput(n))
return 1;
}
return 0;
case PeekFor: return 0;
case PeekNot: return 0;
case Query: return 0;
case Star: return 0;
case Plus: return consumesInput(node->plus.element);
default:
fprintf(stderr, "\nconsumesInput: illegal node type %d\n", node->type);
exit(1);
}
return 0;
}
void Rule_compile_c(Node *node)
{
Node *n;
for (n= rules; n; n= n->rule.next)
consumesInput(n);
fprintf(output, "%s", preamble);
for (n= node; n; n= n->rule.next)
fprintf(output, "YY_RULE(int) yy_%s(yycontext *ctx); /* %d */\n", n->rule.name, n->rule.id);
fprintf(output, "\n");
for (n= actions; n; n= n->action.list)
{
fprintf(output, "YY_ACTION(void) yy%s(yycontext *ctx, char *yytext, int yyleng)\n{\n", n->action.name);
defineVariables(n->action.rule->rule.variables);
fprintf(output, " yyprintf((stderr, \"do yy%s\\n\"));\n", n->action.name);
fprintf(output, " %s;\n", n->action.text);
undefineVariables(n->action.rule->rule.variables);
fprintf(output, "}\n");
}
Rule_compile_c2(node);
fprintf(output, footer, start->rule.name);
}

View File

@ -0,0 +1,88 @@
EXAMPLES = test rule accept wc dc dcv calc basic localctx
CFLAGS = -g -O3
DIFF = diff
TEE = cat >
all : $(EXAMPLES)
test : .FORCE
../peg -o test.peg.c test.peg
$(CC) $(CFLAGS) -o test test.c
echo 'ab.ac.ad.ae.afg.afh.afg.afh.afi.afj.' | ./$@ | $(TEE) $@.out
$(DIFF) $@.ref $@.out
rm -f $@.out
@echo
rule : .FORCE
../peg -o rule.peg.c rule.peg
$(CC) $(CFLAGS) -o rule rule.c
echo 'abcbcdabcbcdabcbcdabcbcd' | ./$@ | $(TEE) $@.out
$(DIFF) $@.ref $@.out
rm -f $@.out
@echo
accept : .FORCE
../peg -o accept.peg.c accept.peg
$(CC) $(CFLAGS) -o accept accept.c
echo 'abcbcdabcbcdabcbcdabcbcd' | ./$@ | $(TEE) $@.out
$(DIFF) $@.ref $@.out
rm -f $@.out
@echo
wc : .FORCE
../leg -o wc.leg.c wc.leg
$(CC) $(CFLAGS) -o wc wc.leg.c
cat wc.leg | ./$@ | $(TEE) $@.out
$(DIFF) $@.ref $@.out
rm -f $@.out
@echo
dc : .FORCE
../peg -o dc.peg.c dc.peg
$(CC) $(CFLAGS) -o dc dc.c
echo ' 2 *3 *(3+ 4) ' | ./dc | $(TEE) $@.out
$(DIFF) $@.ref $@.out
rm -f $@.out
@echo
dcv : .FORCE
../peg -o dcv.peg.c dcv.peg
$(CC) $(CFLAGS) -o dcv dcv.c
echo 'a = 6; b = 7; a * b' | ./dcv | $(TEE) $@.out
$(DIFF) $@.ref $@.out
rm -f $@.out
@echo
calc : .FORCE
../leg -o calc.leg.c calc.leg
$(CC) $(CFLAGS) -o calc calc.leg.c
echo 'a = 6; b = 7; a * b' | ./calc | $(TEE) $@.out
$(DIFF) $@.ref $@.out
rm -f $@.out
@echo
basic : .FORCE
../leg -o basic.leg.c basic.leg
$(CC) $(CFLAGS) -o basic basic.leg.c
( echo 'load "test"'; echo "run" ) | ./basic | $(TEE) $@.out
$(DIFF) $@.ref $@.out
rm -f $@.out
@echo
localctx : .FORCE
../peg -o test.peg.c test.peg
$(CC) $(CFLAGS) -o localctx localctx.c
echo 'ab.ac.ad.ae.afg.afh.afg.afh.afi.afj.' | ./$@ | $(TEE) $@.out
$(DIFF) $@.ref $@.out
rm -f $@.out
@echo
clean : .FORCE
rm -f *~ *.o *.[pl]eg.[cd] $(EXAMPLES)
rm -rf *.dSYM
spotless : clean
.FORCE :

View File

@ -0,0 +1,11 @@
#include <stdio.h>
#include <stdlib.h>
#include "accept.peg.c"
int main()
{
while (yyparse());
return 0;
}

View File

@ -0,0 +1,8 @@
start <- abcd+
abcd <- 'a' { printf("A %d\n", yypos); } bc { printf("ABC %d\n", yypos); } &{YYACCEPT}
/ 'b' { printf("B %d\n", yypos); } cd { printf("BCD %d\n", yypos); } &{YYACCEPT}
bc <- 'b' { printf("B %d\n", yypos); } 'c' { printf("C %d\n", yypos); }
cd <- 'c' { printf("C %d\n", yypos); } 'd' { printf("D %d\n", yypos); }

View File

@ -0,0 +1,32 @@
A 3
B 3
C 3
ABC 3
B 3
C 3
D 3
BCD 3
A 3
B 3
C 3
ABC 3
B 3
C 3
D 3
BCD 3
A 3
B 3
C 3
ABC 3
B 3
C 3
D 3
BCD 3
A 3
B 3
C 3
ABC 3
B 3
C 3
D 3
BCD 3

View File

@ -0,0 +1,361 @@
# A 'syntax-directed interpreter' (all execution is a side-effect of parsing).
# Inspired by Dennis Allison's original Tiny BASIC grammar, circa 1975.
#
# Copyright (c) 2007 by Ian Piumarta
# All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the 'Software'),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, provided that the above copyright notice(s) and this
# permission notice appear in all copies of the Software. Acknowledgement
# of the use of this Software in supporting documentation would be
# appreciated but is not required.
#
# THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK.
#
# Last edited: 2012-04-29 15:14:06 by piumarta on emilia
%{
# include <stdio.h>
typedef struct line line;
struct line
{
int number;
int length;
char *text;
};
line *lines= 0;
int numLines= 0;
int pc= -1, epc= -1;
int batch= 0;
int nextline(char *buf, int max);
# define min(x, y) ((x) < (y) ? (x) : (y))
# define YY_INPUT(buf, result, max_size) \
{ \
if ((pc >= 0) && (pc < numLines)) \
{ \
line *linep= lines+pc++; \
result= min(max_size, linep->length); \
memcpy(buf, linep->text, result); \
} \
else \
result= nextline(buf, max_size); \
}
union value {
int number;
char *string;
int (*binop)(int lhs, int rhs);
};
# define YYSTYPE union value
int variables[26];
void accept(int number, char *line);
void save(char *name);
void load(char *name);
void type(char *name);
int lessThan(int lhs, int rhs) { return lhs < rhs; }
int lessEqual(int lhs, int rhs) { return lhs <= rhs; }
int notEqual(int lhs, int rhs) { return lhs != rhs; }
int equalTo(int lhs, int rhs) { return lhs == rhs; }
int greaterEqual(int lhs, int rhs) { return lhs >= rhs; }
int greaterThan(int lhs, int rhs) { return lhs > rhs; }
int input(void);
int stack[1024], sp= 0;
char *help;
void error(char *fmt, ...);
int findLine(int n, int create);
%}
line = - s:statement CR
| - n:number < ( !CR . )* CR > { accept(n.number, yytext); }
| - CR
| - < ( !CR . )* CR > { epc= pc; error("syntax error"); }
| - !. { exit(0); }
statement = 'print'- expr-list
| 'if'- e1:expression r:relop e2:expression { if (!r.binop(e1.number, e2.number)) yythunkpos= 0; }
'then'- statement
| 'goto'- e:expression { epc= pc; if ((pc= findLine(e.number, 0)) < 0) error("no such line"); }
| 'input'- var-list
| 'let'- v:var EQUAL e:expression { variables[v.number]= e.number; }
| 'gosub'- e:expression { epc= pc; if (sp < 1024) stack[sp++]= pc, pc= findLine(e.number, 0); else error("too many gosubs");
if (pc < 0) error("no such line"); }
| 'return'- { epc= pc; if ((pc= sp ? stack[--sp] : -1) < 0) error("no gosub"); }
| 'clear'- { while (numLines) accept(lines->number, "\n"); }
| 'list'- { int i; for (i= 0; i < numLines; ++i) printf("%5d %s", lines[i].number, lines[i].text); }
| 'run'- s:string { load(s.string); pc= 0; }
| 'run'- { pc= 0; }
| 'end'- { pc= -1; if (batch) exit(0); }
| 'rem'- ( !CR . )*
| ('bye'|'quit'|'exit')- { exit(0); }
| 'save'- s:string { save(s.string); }
| 'load'- s:string { load(s.string); }
| 'type'- s:string { type(s.string); }
| 'dir'- { system("ls *.bas"); }
| 'help'- { fprintf(stderr, "%s", help); }
expr-list = ( e:string { printf("%s", e.string); }
| e:expression { printf("%d", e.number); }
)? ( COMMA ( e:string { printf("%s", e.string); }
| e:expression { printf("%d", e.number); }
)
)* ( COMMA
| !COMMA { printf("\n"); }
)
var-list = v:var { variables[v.number]= input(); }
( COMMA v:var { variables[v.number]= input(); }
)*
expression = ( PLUS? l:term
| MINUS l:term { l.number = -l.number }
) ( PLUS r:term { l.number += r.number }
| MINUS r:term { l.number -= r.number }
)* { $$.number = l.number }
term = l:factor ( STAR r:factor { l.number *= r.number }
| SLASH r:factor { l.number /= r.number }
)* { $$.number = l.number }
factor = v:var { $$.number = variables[v.number] }
| n:number
| OPEN expression CLOSE
var = < [a-z] > - { $$.number = yytext[0] - 'a' }
number = < digit+ > - { $$.number = atoi(yytext); }
digit = [0-9]
string = '"' < [^\"]* > '"' - { $$.string = yytext; }
relop = '<=' - { $$.binop= lessEqual; }
| '<>' - { $$.binop= notEqual; }
| '<' - { $$.binop= lessThan; }
| '>=' - { $$.binop= greaterEqual; }
| '>' - { $$.binop= greaterThan; }
| '=' - { $$.binop= equalTo; }
EQUAL = '=' - CLOSE = ')' - OPEN = '(' -
SLASH = '/' - STAR = '*' - MINUS = '-' -
PLUS = '+' - COMMA = ',' -
- = [ \t]*
CR = '\n' | '\r' | '\r\n'
%%
#include <unistd.h>
#include <stdarg.h>
char *help=
"print <num>|<string> [, <num>|<string> ...] [,]\n"
"if <expr> <|<=|<>|=|>=|> <expr> then <stmt>\n"
"input <var> [, <var> ...] let <var> = <expr>\n"
"goto <expr> gosub <expr>\n"
"end return\n"
"list clear\n"
"run [\"filename\"] rem <comment...>\n"
"dir type \"filename\"\n"
"save \"filename\" load \"filename\"\n"
"bye|quit|exit help\n"
;
void error(char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
if (epc > 0)
fprintf(stderr, "\nline %d: %s", lines[epc-1].number, lines[epc-1].text);
else
fprintf(stderr, "\n");
vfprintf(stderr, fmt, ap);
fprintf(stderr, "\n");
va_end(ap);
epc= pc= -1;
}
#ifdef USE_READLINE
# include <readline/readline.h>
# include <readline/history.h>
#endif
int nextline(char *buf, int max)
{
pc= -1;
if (batch) exit(0);
if (isatty(fileno(stdin)))
{
# ifdef USE_READLINE
char *line= readline(">");
if (line)
{
int len= strlen(line);
if (len >= max) len= max - 1;
strncpy(buf, line, len);
(buf)[len]= '\n';
add_history(line);
free(line);
return len + 1;
}
else
{
printf("\n");
return 0;
}
# endif
putchar('>');
fflush(stdout);
}
return fgets(buf, max, stdin) ? strlen(buf) : 0;
}
int maxLines= 0;
int findLine(int n, int create)
{
int lo= 0, hi= numLines - 1;
while (lo <= hi)
{
int mid= (lo + hi) / 2, lno= lines[mid].number;
if (lno > n)
hi= mid - 1;
else if (lno < n)
lo= mid + 1;
else
return mid;
}
if (create)
{
if (numLines == maxLines)
{
maxLines *= 2;
lines= realloc(lines, sizeof(line) * maxLines);
}
if (lo < numLines)
memmove(lines + lo + 1, lines + lo, sizeof(line) * (numLines - lo));
++numLines;
lines[lo].number= n;
lines[lo].text= 0;
return lo;
}
return -1;
}
void accept(int n, char *s)
{
if (s[0] < 32) /* delete */
{
int lno= findLine(n, 0);
if (lno >= 0)
{
if (lno < numLines - 1)
memmove(lines + lno, lines + lno + 1, sizeof(line) * (numLines - lno - 1));
--numLines;
}
}
else /* insert */
{
int lno= findLine(n, 1);
if (lines[lno].text) free(lines[lno].text);
lines[lno].length= strlen(s);
lines[lno].text= strdup(s);
}
}
char *extend(char *name)
{
static char path[1024];
int len= strlen(name);
sprintf(path, "%s%s", name, (((len > 4) && !strcasecmp(".bas", name + len - 4)) ? "" : ".bas"));
return path;
}
void save(char *name)
{
FILE *f= fopen(name= extend(name), "w");
if (!f)
perror(name);
else
{
int i;
for (i= 0; i < numLines; ++i)
fprintf(f, "%d %s", lines[i].number, lines[i].text);
fclose(f);
}
}
void load(char *name)
{
FILE *f= fopen(name= extend(name), "r");
if (!f)
perror(name);
else
{
int lineNumber;
char lineText[1024];
while ((1 == fscanf(f, " %d ", &lineNumber)) && fgets(lineText, sizeof(lineText), f))
accept(lineNumber, lineText);
fclose(f);
}
}
void type(char *name)
{
FILE *f= fopen(name= extend(name), "r");
if (!f)
perror(name);
else
{
int c, d;
while ((c= getc(f)) >= 0)
putchar(d= c);
fclose(f);
if ('\n' != d && '\r' != d) putchar('\n');
}
}
int input(void)
{
char line[32];
fgets(line, sizeof(line), stdin);
return atoi(line);
}
int main(int argc, char **argv)
{
lines= malloc(sizeof(line) * (maxLines= 32));
numLines= 0;
if (argc > 1)
{
batch= 1;
while (argc-- > 1)
load(*++argv);
pc= 0;
}
while (!feof(stdin))
yyparse();
return 0;
}

View File

@ -0,0 +1,10 @@
1
2 4
3 6 9
4 8 12 16
5 10 15 20 25
6 12 18 24 30 36
7 14 21 28 35 42 49
8 16 24 32 40 48 56 64
9 18 27 36 45 54 63 72 81
10 20 30 40 50 60 70 80 90 100

View File

@ -0,0 +1,8 @@
100 let n=100000
120 let m=0
110 let s=0
130 let m=m+1
140 let s=s+m
150 if m<n then goto 130
160 print "interpreted ", n*3, " lines of code; answer is ", s
170 end

View File

@ -0,0 +1,46 @@
%{
#include <stdio.h>
int vars[26];
%}
Stmt = - e:Expr EOL { printf("%d\n", e); }
| ( !EOL . )* EOL { printf("error\n"); }
Expr = i:ID ASSIGN s:Sum { $$= vars[i]= s; }
| s:Sum { $$= s; }
Sum = l:Product
( PLUS r:Product { l += r; }
| MINUS r:Product { l -= r; }
)* { $$= l; }
Product = l:Value
( TIMES r:Value { l *= r; }
| DIVIDE r:Value { l /= r; }
)* { $$= l; }
Value = i:NUMBER { $$= atoi(yytext); }
| i:ID !ASSIGN { $$= vars[i]; }
| OPEN i:Expr CLOSE { $$= i; }
NUMBER = < [0-9]+ > - { $$= atoi(yytext); }
ID = < [a-z] > - { $$= yytext[0] - 'a'; }
ASSIGN = '=' -
PLUS = '+' -
MINUS = '-' -
TIMES = '*' -
DIVIDE = '/' -
OPEN = '(' -
CLOSE = ')' -
- = [ \t]*
EOL = '\n' | '\r\n' | '\r' | ';'
%%
int main()
{
while (yyparse());
return 0;
}

View File

@ -0,0 +1,3 @@
6
7
42

View File

@ -0,0 +1,17 @@
#include <stdio.h>
#include <stdlib.h>
int stack[1024];
int stackp= -1;
int push(int n) { return stack[++stackp]= n; }
int pop(void) { return stack[stackp--]; }
#include "dc.peg.c"
int main()
{
while (yyparse());
return 0;
}

View File

@ -0,0 +1,27 @@
# Grammar
Expr <- SPACE Sum EOL { printf("%d\n", pop()); }
/ (!EOL .)* EOL { printf("error\n"); }
Sum <- Product ( PLUS Product { int r= pop(), l= pop(); push(l + r); }
/ MINUS Product { int r= pop(), l= pop(); push(l - r); }
)*
Product <- Value ( TIMES Value { int r= pop(), l= pop(); push(l * r); }
/ DIVIDE Value { int r= pop(), l= pop(); push(l / r); }
)*
Value <- NUMBER { push(atoi(yytext)); }
/ OPEN Sum CLOSE
# Lexemes
NUMBER <- < [0-9]+ > SPACE
PLUS <- '+' SPACE
MINUS <- '-' SPACE
TIMES <- '*' SPACE
DIVIDE <- '/' SPACE
OPEN <- '(' SPACE
CLOSE <- ')' SPACE
SPACE <- [ \t]*
EOL <- '\n' / '\r\n' / '\r'

View File

@ -0,0 +1 @@
42

View File

@ -0,0 +1,20 @@
#include <stdio.h>
#include <stdlib.h>
int stack[1024];
int stackp= -1;
int var= 0;
int vars[26];
int push(int n) { return stack[++stackp]= n; }
int pop(void) { return stack[stackp--]; }
int top(void) { return stack[stackp]; }
#include "dcv.peg.c"
int main()
{
while (yyparse());
return 0;
}

View File

@ -0,0 +1,34 @@
# Grammar
Stmt <- SPACE Expr EOL { printf("%d\n", pop()); }
/ (!EOL .)* EOL { printf("error\n"); }
Expr <- ID { var= yytext[0] } ASSIGN Sum { vars[var - 'a']= top(); }
/ Sum
Sum <- Product ( PLUS Product { int r= pop(), l= pop(); push(l + r); }
/ MINUS Product { int r= pop(), l= pop(); push(l - r); }
)*
Product <- Value ( TIMES Value { int r= pop(), l= pop(); push(l * r); }
/ DIVIDE Value { int r= pop(), l= pop(); push(l / r); }
)*
Value <- NUMBER { push(atoi(yytext)); }
/ < ID > !ASSIGN { push(vars[yytext[0] - 'a']); }
/ OPEN Expr CLOSE
# Lexemes
NUMBER <- < [0-9]+ > SPACE
ID <- < [a-z] > SPACE
ASSIGN <- '=' SPACE
PLUS <- '+' SPACE
MINUS <- '-' SPACE
TIMES <- '*' SPACE
DIVIDE <- '/' SPACE
OPEN <- '(' SPACE
CLOSE <- ')' SPACE
SPACE <- [ \t]*
EOL <- '\n' / '\r\n' / '\r' / ';'

View File

@ -0,0 +1,3 @@
6
7
42

View File

@ -0,0 +1,17 @@
100 let n=32
110 gosub 200
120 print "fibonacci(",n,") = ", m
130 end
200 let c=n
210 let b=1
220 if c<2 then goto 400
230 let c=c-1
240 let a=1
300 let c=c-1
310 let d=a+b
320 let a=b
330 let b=d+1
340 if c<>0 then goto 300
400 let m=b
410 return

View File

@ -0,0 +1,17 @@
#include <stdio.h>
#define YY_INPUT(buf, result, max) \
{ \
int c= getchar(); \
result= (EOF == c) ? 0 : (*(buf)= c, 1); \
if (EOF != c) printf("<%c>\n", c); \
}
#include "left.peg.c"
int main()
{
printf(yyparse() ? "success\n" : "failure\n");
return 0;
}

View File

@ -0,0 +1,3 @@
# Grammar
S <- (S 'a' / 'a') !'a'

View File

@ -0,0 +1,13 @@
#include <stdio.h>
#define YY_CTX_LOCAL
#include "test.peg.c"
int main()
{
yycontext ctx;
memset(&ctx, 0, sizeof(yycontext));
while (yyparse(&ctx));
return 0;
}

View File

@ -0,0 +1,10 @@
a1 ab1 .
a2 ac2 .
a3 ad3 .
a3 ae3 .
a4 af4 afg4 .
a4 af5 afh5 .
a4 af4 afg4 .
a4 af5 afh5 .
af6 afi6 a6 .
af6 af7 afj7 a6 .

View File

@ -0,0 +1,11 @@
#include <stdio.h>
#include <stdlib.h>
#include "rule.peg.c"
int main()
{
while (yyparse());
return 0;
}

View File

@ -0,0 +1,8 @@
start <- abcd+
abcd <- 'a' { printf("A %d\n", yypos); } bc { printf("ABC %d\n", yypos); }
/ 'b' { printf("B %d\n", yypos); } cd { printf("BCD %d\n", yypos); }
bc <- 'b' { printf("B %d\n", yypos); } 'c' { printf("C %d\n", yypos); }
cd <- 'c' { printf("C %d\n", yypos); } 'd' { printf("D %d\n", yypos); }

View File

@ -0,0 +1,32 @@
A 24
B 24
C 24
ABC 24
B 24
C 24
D 24
BCD 24
A 24
B 24
C 24
ABC 24
B 24
C 24
D 24
BCD 24
A 24
B 24
C 24
ABC 24
B 24
C 24
D 24
BCD 24
A 24
B 24
C 24
ABC 24
B 24
C 24
D 24
BCD 24

View File

@ -0,0 +1,12 @@
10 let i=1
20 gosub 100
30 let i=i+1
40 if i<=10 then goto 20
50 end
100 let j=1
110 print " ", i*j,
120 let j=j+1
130 if j<=i then goto 110
140 print
150 return

View File

@ -0,0 +1,8 @@
#include <stdio.h>
#include "test.peg.c"
int main()
{
while (yyparse());
return 0;
}

View File

@ -0,0 +1,13 @@
start <- body '.' { printf(".\n"); }
body <- 'a' { printf("a1 "); } 'b' { printf("ab1 "); }
/ 'a' { printf("a2 "); } 'c' { printf("ac2 "); }
/ 'a' { printf("a3 "); } ( 'd' { printf("ad3 "); } / 'e' { printf("ae3 "); } )
/ 'a' { printf("a4 "); } ( 'f' { printf("af4 "); } 'g' { printf("afg4 "); }
/ 'f' { printf("af5 "); } 'h' { printf("afh5 "); } )
/ 'a' { printf("a6 "); } ( 'f' &{ printf("af6 ") } 'i' &{ printf("afi6 ") }
/ 'f' &{ printf("af7 ") } 'j' &{ printf("afj7 ") } )

View File

@ -0,0 +1,10 @@
a1 ab1 .
a2 ac2 .
a3 ad3 .
a3 ae3 .
a4 af4 afg4 .
a4 af5 afh5 .
a4 af4 afg4 .
a4 af5 afh5 .
af6 afi6 a6 .
af6 af7 afj7 a6 .

View File

@ -0,0 +1,14 @@
%{
#include <unistd.h>
%}
start = "username" { printf("%s", getlogin()); }
| < . > { putchar(yytext[0]); }
%%
int main()
{
while (yyparse());
return 0;
}

View File

@ -0,0 +1,22 @@
%{
#include <stdio.h>
int lines= 0, words= 0, chars= 0;
%}
start = (line | word | char)
line = < (( '\n' '\r'* ) | ( '\r' '\n'* )) > { lines++; chars += yyleng; }
word = < [a-zA-Z]+ > { words++; chars += yyleng; printf("<%s>\n", yytext); }
char = . { chars++; }
%%
int main()
{
while (yyparse())
;
printf("%d lines\n", lines);
printf("%d chars\n", chars);
printf("%d words\n", words);
return 0;
}

View File

@ -0,0 +1,55 @@
<include>
<stdio>
<h>
<int>
<lines>
<words>
<chars>
<start>
<line>
<word>
<char>
<line>
<n>
<r>
<r>
<n>
<lines>
<chars>
<yyleng>
<word>
<a>
<zA>
<Z>
<words>
<chars>
<yyleng>
<printf>
<s>
<n>
<yytext>
<char>
<chars>
<int>
<main>
<while>
<yyparse>
<printf>
<d>
<lines>
<n>
<lines>
<printf>
<d>
<chars>
<n>
<chars>
<printf>
<d>
<words>
<n>
<words>
<return>
22 lines
425 chars
52 words

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,292 @@
# LE Grammar for LE Grammars
#
# Copyright (c) 2007 by Ian Piumarta
# All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the 'Software'),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, provided that the above copyright notice(s) and this
# permission notice appear in all copies of the Software. Acknowledgement
# of the use of this Software in supporting documentation would be
# appreciated but is not required.
#
# THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK.
#
# Last edited: 2012-04-29 15:51:15 by piumarta on emilia
%{
# include "tree.h"
# include "version.h"
# include <stdio.h>
# include <stdlib.h>
# include <unistd.h>
# include <string.h>
# include <libgen.h>
# include <assert.h>
typedef struct Header Header;
struct Header {
char *text;
Header *next;
};
FILE *input= 0;
int verboseFlag= 0;
static int lineNumber= 0;
static char *fileName= 0;
static char *trailer= 0;
static Header *headers= 0;
void makeHeader(char *text);
void makeTrailer(char *text);
void yyerror(char *message);
# define YY_INPUT(buf, result, max) \
{ \
int c= getc(input); \
if ('\n' == c || '\r' == c) ++lineNumber; \
result= (EOF == c) ? 0 : (*(buf)= c, 1); \
}
# define YY_LOCAL(T) static T
# define YY_RULE(T) static T
%}
# Hierarchical syntax
grammar= - ( declaration | definition )+ trailer? end-of-file
declaration= '%{' < ( !'%}' . )* > RPERCENT { makeHeader(yytext); } #{YYACCEPT}
trailer= '%%' < .* > { makeTrailer(yytext); } #{YYACCEPT}
definition= identifier { if (push(beginRule(findRule(yytext)))->rule.expression)
fprintf(stderr, "rule '%s' redefined\n", yytext); }
EQUAL expression { Node *e= pop(); Rule_setExpression(pop(), e); }
SEMICOLON? #{YYACCEPT}
expression= sequence (BAR sequence { Node *f= pop(); push(Alternate_append(pop(), f)); }
)*
sequence= prefix (prefix { Node *f= pop(); push(Sequence_append(pop(), f)); }
)*
prefix= AND action { push(makePredicate(yytext)); }
| AND suffix { push(makePeekFor(pop())); }
| NOT suffix { push(makePeekNot(pop())); }
| suffix
suffix= primary (QUESTION { push(makeQuery(pop())); }
| STAR { push(makeStar (pop())); }
| PLUS { push(makePlus (pop())); }
)?
primary= identifier { push(makeVariable(yytext)); }
COLON identifier !EQUAL { Node *name= makeName(findRule(yytext)); name->name.variable= pop(); push(name); }
| identifier !EQUAL { push(makeName(findRule(yytext))); }
| OPEN expression CLOSE
| literal { push(makeString(yytext)); }
| class { push(makeClass(yytext)); }
| DOT { push(makeDot()); }
| action { push(makeAction(yytext)); }
| BEGIN { push(makePredicate("YY_BEGIN")); }
| END { push(makePredicate("YY_END")); }
# Lexical syntax
identifier= < [-a-zA-Z_][-a-zA-Z_0-9]* > -
literal= ['] < ( !['] char )* > ['] -
| ["] < ( !["] char )* > ["] -
class= '[' < ( !']' range )* > ']' -
range= char '-' char | char
char= '\\' [-abefnrtv'"\[\]\\]
| '\\' [0-3][0-7][0-7]
| '\\' [0-7][0-7]?
| !'\\' .
action= '{' < braces* > '}' -
braces= '{' braces* '}'
| !'}' .
EQUAL= '=' -
COLON= ':' -
SEMICOLON= ';' -
BAR= '|' -
AND= '&' -
NOT= '!' -
QUESTION= '?' -
STAR= '*' -
PLUS= '+' -
OPEN= '(' -
CLOSE= ')' -
DOT= '.' -
BEGIN= '<' -
END= '>' -
RPERCENT= '%}' -
-= (space | comment)*
space= ' ' | '\t' | end-of-line
comment= '#' (!end-of-line .)* end-of-line
end-of-line= '\r\n' | '\n' | '\r'
end-of-file= !.
%%
void yyerror(char *message)
{
fprintf(stderr, "%s:%d: %s", fileName, lineNumber, message);
if (yyctx->text[0]) fprintf(stderr, " near token '%s'", yyctx->text);
if (yyctx->pos < yyctx->limit || !feof(input))
{
yyctx->buf[yyctx->limit]= '\0';
fprintf(stderr, " before text \"");
while (yyctx->pos < yyctx->limit)
{
if ('\n' == yyctx->buf[yyctx->pos] || '\r' == yyctx->buf[yyctx->pos]) break;
fputc(yyctx->buf[yyctx->pos++], stderr);
}
if (yyctx->pos == yyctx->limit)
{
int c;
while (EOF != (c= fgetc(input)) && '\n' != c && '\r' != c)
fputc(c, stderr);
}
fputc('\"', stderr);
}
fprintf(stderr, "\n");
exit(1);
}
void makeHeader(char *text)
{
Header *header= (Header *)malloc(sizeof(Header));
header->text= strdup(text);
header->next= headers;
headers= header;
}
void makeTrailer(char *text)
{
trailer= strdup(text);
}
static void version(char *name)
{
printf("%s version %d.%d.%d\n", name, PEG_MAJOR, PEG_MINOR, PEG_LEVEL);
}
static void usage(char *name)
{
version(name);
fprintf(stderr, "usage: %s [<option>...] [<file>...]\n", name);
fprintf(stderr, "where <option> can be\n");
fprintf(stderr, " -h print this help information\n");
fprintf(stderr, " -o <ofile> write output to <ofile>\n");
fprintf(stderr, " -v be verbose\n");
fprintf(stderr, " -V print version number and exit\n");
fprintf(stderr, "if no <file> is given, input is read from stdin\n");
fprintf(stderr, "if no <ofile> is given, output is written to stdout\n");
exit(1);
}
int main(int argc, char **argv)
{
Node *n;
int c;
output= stdout;
input= stdin;
lineNumber= 1;
fileName= "<stdin>";
while (-1 != (c= getopt(argc, argv, "Vho:v")))
{
switch (c)
{
case 'V':
version(basename(argv[0]));
exit(0);
case 'h':
usage(basename(argv[0]));
break;
case 'o':
if (!(output= fopen(optarg, "w")))
{
perror(optarg);
exit(1);
}
break;
case 'v':
verboseFlag= 1;
break;
default:
fprintf(stderr, "for usage try: %s -h\n", argv[0]);
exit(1);
}
}
argc -= optind;
argv += optind;
if (argc)
{
for (; argc; --argc, ++argv)
{
if (!strcmp(*argv, "-"))
{
input= stdin;
fileName= "<stdin>";
}
else
{
if (!(input= fopen(*argv, "r")))
{
perror(*argv);
exit(1);
}
fileName= *argv;
}
lineNumber= 1;
if (!yyparse())
yyerror("syntax error");
if (input != stdin)
fclose(input);
}
}
else
if (!yyparse())
yyerror("syntax error");
if (verboseFlag)
for (n= rules; n; n= n->any.next)
Rule_print(n);
Rule_compile_c_header();
for (; headers; headers= headers->next)
fprintf(output, "%s\n", headers->text);
if (rules)
Rule_compile_c(rules);
if (trailer)
fprintf(output, "%s\n", trailer);
return 0;
}

View File

@ -0,0 +1,933 @@
.\" Copyright (c) 2007 by Ian Piumarta
.\" All rights reserved.
.\"
.\" Permission is hereby granted, free of charge, to any person obtaining a
.\" copy of this software and associated documentation files (the 'Software'),
.\" to deal in the Software without restriction, including without limitation
.\" the rights to use, copy, modify, merge, publish, distribute, and/or sell
.\" copies of the Software, and to permit persons to whom the Software is
.\" furnished to do so, provided that the above copyright notice(s) and this
.\" permission notice appear in all copies of the Software. Acknowledgement
.\" of the use of this Software in supporting documentation would be
.\" appreciated but is not required.
.\"
.\" THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK.
.\"
.\" Last edited: 2012-04-29 16:58:44 by piumarta on emilia
.\"
.TH PEG 1 "April 2012" "Version 0.1"
.SH NAME
peg, leg \- parser generators
.SH SYNOPSIS
.B peg
.B [\-hvV \-ooutput]
.I [filename ...]
.sp 0
.B leg
.B [\-hvV \-ooutput]
.I [filename ...]
.SH DESCRIPTION
.I peg
and
.I leg
are tools for generating recursive-descent parsers: programs that
perform pattern matching on text. They process a Parsing Expression
Grammar (PEG) [Ford 2004] to produce a program that recognises legal
sentences of that grammar.
.I peg
processes PEGs written using the original syntax described by Ford;
.I leg
processes PEGs written using slightly different syntax and conventions
that are intended to make it an attractive replacement for parsers
built with
.IR lex (1)
and
.IR yacc (1).
Unlike
.I lex
and
.IR yacc ,
.I peg
and
.I leg
support unlimited backtracking, provide ordered choice as a means for
disambiguation, and can combine scanning (lexical analysis) and
parsing (syntactic analysis) into a single activity.
.PP
.I peg
reads the specified
.IR filename s,
or standard input if no
.IR filename s
are given, for a grammar describing the parser to generate.
.I peg
then generates a C source file that defines a function
.IR yyparse().
This C source file can be included in, or compiled and then linked
with, a client program. Each time the client program calls
.IR yyparse ()
the parser consumes input text according to the parsing rules,
starting from the first rule in the grammar.
.IR yyparse ()
returns non-zero if the input could be parsed according to the
grammar; it returns zero if the input could not be parsed.
.PP
The prefix 'yy' or 'YY' is prepended to all externally-visible symbols
in the generated parser. This is intended to reduce the risk of
namespace pollution in client programs. (The choice of 'yy' is
historical; see
.IR lex (1)
and
.IR yacc (1),
for example.)
.SH OPTIONS
.I peg
and
.I leg
provide the following options:
.TP
.B \-h
prints a summary of available options and then exits.
.TP
.B \-ooutput
writes the generated parser to the file
.B output
instead of the standard output.
.TP
.B \-v
writes verbose information to standard error while working.
.TP
.B \-V
writes version information to standard error then exits.
.SH A SIMPLE EXAMPLE
The following
.I peg
input specifies a grammar with a single rule (called 'start') that is
satisfied when the input contains the string "username".
.nf
start <- "username"
.fi
(The quotation marks are
.I not
part of the matched text; they serve to indicate a literal string to
be matched.) In other words,
.IR yyparse ()
in the generated C source will return non-zero only if the next eight
characters read from the input spell the word "username". If the
input contains anything else,
.IR yyparse ()
returns zero and no input will have been consumed. (Subsequent calls
to
.IR yyparse ()
will also return zero, since the parser is effectively blocked looking
for the string "username".) To ensure progress we can add an
alternative clause to the 'start' rule that will match any single
character if "username" is not found.
.nf
start <- "username"
/ .
.fi
.IR yyparse ()
now always returns non-zero (except at the very end of the input). To
do something useful we can add actions to the rules. These actions
are performed after a complete match is found (starting from the first
rule) and are chosen according to the 'path' taken through the grammar
to match the input. (Linguists would call this path a 'phrase
marker'.)
.nf
start <- "username" { printf("%s\\n", getlogin()); }
/ < . > { putchar(yytext[0]); }
.fi
The first line instructs the parser to print the user's login name
whenever it sees "username" in the input. If that match fails, the
second line tells the parser to echo the next character on the input
the standard output. Our parser is now performing useful work: it
will copy the input to the output, replacing all occurrences of
"username" with the user's account name.
.PP
Note the angle brackets ('<' and '>') that were added to the second
alternative. These have no effect on the meaning of the rule, but
serve to delimit the text made available to the following action in
the variable
.IR yytext .
.PP
If the above grammar is placed in the file
.BR username.peg ,
running the command
.nf
peg -o username.c username.peg
.fi
will save the corresponding parser in the file
.BR username.c .
To create a complete program this parser could be included by a C
program as follows.
.nf
#include <stdio.h> /* printf(), putchar() */
#include <unistd.h> /* getlogin() */
#include "username.c" /* yyparse() */
int main()
{
while (yyparse()) /* repeat until EOF */
;
return 0;
}
.fi
.SH PEG GRAMMARS
A grammar consists of a set of named rules.
.nf
name <- pattern
.fi
The
.B pattern
contains one or more of the following elements.
.TP
.B name
The element stands for the entire pattern in the rule with the given
.BR name .
.TP
.BR \(dq characters \(dq
A character or string enclosed in double quotes is matched literally.
The ANSI C esacpe sequences are recognised within the
.IR characters .
.TP
.BR ' characters '
A character or string enclosed in single quotes is matched literally, as above.
.TP
.BR [ characters ]
A set of characters enclosed in square brackets matches any single
character from the set, with escape characters recognised as above.
If the set begins with an uparrow (^) then the set is negated (the
element matches any character
.I not
in the set). Any pair of characters separated with a dash (-)
represents the range of characters from the first to the second,
inclusive. A single alphabetic character or underscore is matched by
the following set.
.nf
[a-zA-Z_]
.fi
Similarly, the following matches any single non-digit character.
.nf
[^0-9]
.fi
.TP
.B .
A dot matches any character. Note that the only time this fails is at
the end of file, where there is no character to match.
.TP
.BR ( \ pattern\ )
Parentheses are used for grouping (modifying the precendence of the
operators described below).
.TP
.BR { \ action\ }
Curly braces surround actions. The action is arbitray C source code
to be executed at the end of matching. Any braces within the action
must be properly nested. Any input text that was matched before the
action and delimited by angle brackets (see below) is made available
within the action as the contents of the character array
.IR yytext .
The length of (number of characters in)
.I yytext
is available in the variable
.IR yyleng .
(These variable names are historical; see
.IR lex (1).)
.TP
.B <
An opening angle bracket always matches (consuming no input) and
causes the parser to begin accumulating matched text. This text will
be made available to actions in the variable
.IR yytext .
.TP
.B >
A closing angle bracket always matches (consuming no input) and causes
the parser to stop accumulating text for
.IR yytext .
.PP
The above
.IR element s
can be made optional and/or repeatable with the following suffixes:
.TP
.RB element\ ?
The element is optional. If present on the input, it is consumed and
the match succeeds. If not present on the input, no text is consumed
and the match succeeds anyway.
.TP
.RB element\ +
The element is repeatable. If present on the input, one or more
occurrences of
.I element
are consumed and the match succeeds. If no occurrences of
.I element
are present on the input, the match fails.
.TP
.RB element\ *
The element is optional and repeatable. If present on the input, one or more
occurrences of
.I element
are consumed and the match succeeds. If no occurrences of
.I element
are present on the input, the match succeeds anyway.
.PP
The above elements and suffixes can be converted into predicates (that
match arbitray input text and subsequently succeed or fail
.I without
consuming that input) with the following prefixes:
.TP
.BR & \ element
The predicate succeeds only if
.I element
can be matched. Input text scanned while matching
.I element
is not consumed from the input and remains available for subsequent
matching.
.TP
.BR ! \ element
The predicate succeeds only if
.I element
cannot be matched. Input text scanned while matching
.I element
is not consumed from the input and remains available for subsequent
matching. A popular idiom is
.nf
!.
.fi
which matches the end of file, after the last character of the input
has already been consumed.
.PP
A special form of the '&' predicate is provided:
.TP
.BR & {\ expression\ }
In this predicate the simple C
.I expression
.RB ( not
statement) is evaluated immediately when the parser reaches the
predicate. If the
.I expression
yields non-zero (true) the 'match' succeeds and the parser continues
with the next element in the pattern. If the
.I expression
yields zero (false) the 'match' fails and the parser backs up to look
for an alternative parse of the input.
.PP
Several elements (with or without prefixes and suffixes) can be
combined into a
.I sequence
by writing them one after the other. The entire sequence matches only
if each individual element within it matches, from left to right.
.PP
Sequences can be separated into disjoint alternatives by the
alternation operator '/'.
.TP
.RB sequence-1\ / \ sequence-2\ / \ ...\ / \ sequence-N
Each sequence is tried in turn until one of them matches, at which
time matching for the overall pattern succeeds. If none of the
sequences matches then the match of the overall pattern fails.
.PP
Finally, the pound sign (#) introduces a comment (discarded) that
continues until the end of the line.
.PP
To summarise the above, the parser tries to match the input text
against a pattern containing literals, names (representing other
rules), and various operators (written as prefixes, suffixes,
juxtaposition for sequencing and and infix alternation operator) that
modify how the elements within the pattern are matched. Matches are
made from left to right, 'descending' into named sub-rules as they are
encountered. If the matching process fails, the parser 'back tracks'
('rewinding' the input appropriately in the process) to find the
nearest alternative 'path' through the grammar. In other words the
parser performs a depth-first, left-to-right search for the first
successfully-matching path through the rules. If found, the actions
along the successful path are executed (in the order they were
encountered).
.PP
Note that predicates are evaluated
.I immediately
during the search for a successful match, since they contribute to the
success or failure of the search. Actions, however, are evaluated
only after a successful match has been found.
.SH PEG GRAMMAR FOR PEG GRAMMARS
The grammar for
.I peg
grammars is shown below. This will both illustrate and formalise
the above description.
.nf
Grammar <- Spacing Definition+ EndOfFile
Definition <- Identifier LEFTARROW Expression
Expression <- Sequence ( SLASH Sequence )*
Sequence <- Prefix*
Prefix <- AND Action
/ ( AND | NOT )? Suffix
Suffix <- Primary ( QUERY / STAR / PLUS )?
Primary <- Identifier !LEFTARROW
/ OPEN Expression CLOSE
/ Literal
/ Class
/ DOT
/ Action
/ BEGIN
/ END
Identifier <- < IdentStart IdentCont* > Spacing
IdentStart <- [a-zA-Z_]
IdentCont <- IdentStart / [0-9]
Literal <- ['] < ( !['] Char )* > ['] Spacing
/ ["] < ( !["] Char )* > ["] Spacing
Class <- '[' < ( !']' Range )* > ']' Spacing
Range <- Char '-' Char / Char
Char <- '\\\\' [abefnrtv'"\\[\\]\\\\]
/ '\\\\' [0-3][0-7][0-7]
/ '\\\\' [0-7][0-7]?
/ '\\\\' '-'
/ !'\\\\' .
LEFTARROW <- '<-' Spacing
SLASH <- '/' Spacing
AND <- '&' Spacing
NOT <- '!' Spacing
QUERY <- '?' Spacing
STAR <- '*' Spacing
PLUS <- '+' Spacing
OPEN <- '(' Spacing
CLOSE <- ')' Spacing
DOT <- '.' Spacing
Spacing <- ( Space / Comment )*
Comment <- '#' ( !EndOfLine . )* EndOfLine
Space <- ' ' / '\\t' / EndOfLine
EndOfLine <- '\\r\\n' / '\\n' / '\\r'
EndOfFile <- !.
Action <- '{' < [^}]* > '}' Spacing
BEGIN <- '<' Spacing
END <- '>' Spacing
.fi
.SH LEG GRAMMARS
.I leg
is a variant of
.I peg
that adds some features of
.IR lex (1)
and
.IR yacc (1).
It differs from
.I peg
in the following ways.
.TP
.BI %{\ text... \ %}
A declaration section can appear anywhere that a rule definition is
expected. The
.I text
between the delimiters '%{' and '%}' is copied verbatim to the
generated C parser code
.I before
the code that implements the parser itself.
.TP
.IB name\ = \ pattern
The 'assignment' operator replaces the left arrow operator '<-'.
.TP
.B rule-name
Hyphens can appear as letters in the names of rules. Each hyphen is
converted into an underscore in the generated C source code. A single
single hyphen '-' is a legal rule name.
.nf
- = [ \\t\\n\\r]*
number = [0-9]+ -
name = [a-zA-Z_][a-zA_Z_0-9]* -
l-paren = '(' -
r-paren = ')' -
.fi
This example shows how ignored whitespace can be obvious when reading
the grammar and yet unobtrusive when placed liberally at the end of
every rule associated with a lexical element.
.TP
.IB seq-1\ | \ seq-2
The alternation operator is vertical bar '|' rather than forward
slash '/'. The
.I peg
rule
.nf
name <- sequence-1
/ sequence-2
/ sequence-3
.fi
is therefore written
.nf
name = sequence-1
| sequence-2
| sequence-3
;
.fi
in
.I leg
(with the final semicolon being optional, as described next).
.TP
.IB pattern\ ;
A semicolon punctuator can optionally terminate a
.IR pattern .
.TP
.BI %% \ text...
A double percent '%%' terminates the rules (and declarations) section of
the grammar. All
.I text
following '%%' is copied verbatim to the generated C parser code
.I after
the parser implementation code.
.TP
.BI $$\ = \ value
A sub-rule can return a semantic
.I value
from an action by assigning it to the pseudo-variable '$$'. All
semantic values must have the same type (which defaults to 'int').
This type can be changed by defining YYSTYPE in a declaration section.
.TP
.IB identifier : name
The semantic value returned (by assigning to '$$') from the sub-rule
.I name
is associated with the
.I identifier
and can be referred to in subsequent actions.
.PP
The desk calclator example below illustrates the use of '$$' and ':'.
.SH LEG EXAMPLE: A DESK CALCULATOR
The extensions in
.I leg
described above allow useful parsers and evaluators (including
declarations, grammar rules, and supporting C functions such
as 'main') to be kept within a single source file. To illustrate this
we show a simple desk calculator supporting the four common arithmetic
operators and named variables. The intermediate results of arithmetic
evaluation will be accumulated on an implicit stack by returning them
as semantic values from sub-rules.
.nf
%{
#include <stdio.h> /* printf() */
#include <stdlib.h> /* atoi() */
int vars[26];
%}
Stmt = - e:Expr EOL { printf("%d\\n", e); }
| ( !EOL . )* EOL { printf("error\\n"); }
Expr = i:ID ASSIGN s:Sum { $$ = vars[i] = s; }
| s:Sum { $$ = s; }
Sum = l:Product
( PLUS r:Product { l += r; }
| MINUS r:Product { l -= r; }
)* { $$ = l; }
Product = l:Value
( TIMES r:Value { l *= r; }
| DIVIDE r:Value { l /= r; }
)* { $$ = l; }
Value = i:NUMBER { $$ = atoi(yytext); }
| i:ID !ASSIGN { $$ = vars[i]; }
| OPEN i:Expr CLOSE { $$ = i; }
NUMBER = < [0-9]+ > - { $$ = atoi(yytext); }
ID = < [a-z] > - { $$ = yytext[0] - 'a'; }
ASSIGN = '=' -
PLUS = '+' -
MINUS = '-' -
TIMES = '*' -
DIVIDE = '/' -
OPEN = '(' -
CLOSE = ')' -
- = [ \\t]*
EOL = '\\n' | '\\r\\n' | '\\r' | ';'
%%
int main()
{
while (yyparse())
;
return 0;
}
.fi
.SH LEG GRAMMAR FOR LEG GRAMMARS
The grammar for
.I leg
grammars is shown below. This will both illustrate and formalise the
above description.
.nf
grammar = -
( declaration | definition )+
trailer? end-of-file
declaration = '%{' < ( !'%}' . )* > RPERCENT
trailer = '%%' < .* >
definition = identifier EQUAL expression SEMICOLON?
expression = sequence ( BAR sequence )*
sequence = prefix+
prefix = AND action
| ( AND | NOT )? suffix
suffix = primary ( QUERY | STAR | PLUS )?
primary = identifier COLON identifier !EQUAL
| identifier !EQUAL
| OPEN expression CLOSE
| literal
| class
| DOT
| action
| BEGIN
| END
identifier = < [-a-zA-Z_][-a-zA-Z_0-9]* > -
literal = ['] < ( !['] char )* > ['] -
| ["] < ( !["] char )* > ["] -
class = '[' < ( !']' range )* > ']' -
range = char '-' char | char
char = '\\\\' [abefnrtv'"\\[\\]\\\\]
| '\\\\' [0-3][0-7][0-7]
| '\\\\' [0-7][0-7]?
| !'\\\\' .
action = '{' < [^}]* > '}' -
EQUAL = '=' -
COLON = ':' -
SEMICOLON = ';' -
BAR = '|' -
AND = '&' -
NOT = '!' -
QUERY = '?' -
STAR = '*' -
PLUS = '+' -
OPEN = '(' -
CLOSE = ')' -
DOT = '.' -
BEGIN = '<' -
END = '>' -
RPERCENT = '%}' -
- = ( space | comment )*
space = ' ' | '\\t' | end-of-line
comment = '#' ( !end-of-line . )* end-of-line
end-of-line = '\\r\\n' | '\\n' | '\\r'
end-of-file = !.
.fi
.SH CUSTOMISING THE PARSER
The following symbols can be redefined in declaration sections to
modify the generated parser code.
.TP
.B YYSTYPE
The semantic value type. The pseudo-variable '$$' and the
identifiers 'bound' to rule results with the colon operator ':' should
all be considered as being declared to have this type. The default
value is 'int'.
.TP
.B YYPARSE
The name of the main entry point to the parser. The default value
is 'yyparse'.
.TP
.B YYPARSEFROM
The name of an alternative entry point to the parser. This function
expects one argument: the function corresponding to the rule from
which the search for a match should begin. The default
is 'yyparsefrom'. Note that yyparse() is defined as
.nf
int yyparse() { return yyparsefrom(yy_foo); }
.fi
where 'foo' is the name of the first rule in the grammar.
.TP
.BI YY_INPUT( buf , \ result , \ max_size )
This macro is invoked by the parser to obtain more input text.
.I buf
points to an area of memory that can hold at most
.I max_size
characters. The macro should copy input text to
.I buf
and then assign the integer variable
.I result
to indicate the number of characters copied. If no more input is available,
the macro should assign 0 to
.IR result .
By default, the YY_INPUT macro is defined as follows.
.nf
#define YY_INPUT(buf, result, max_size) \\
{ \\
int yyc= getchar(); \\
result= (EOF == yyc) ? 0 : (*(buf)= yyc, 1); \\
}
.fi
.TP
.B YY_DEBUG
If this symbols is defined then additional code will be included in
the parser that prints vast quantities of arcane information to the
standard error while the parser is running.
.TP
.B YY_BEGIN
This macro is invoked to mark the start of input text that will be
made available in actions as 'yytext'. This corresponds to
occurrences of '<' in the grammar. These are converted into
predicates that are expected to succeed. The default definition
.nf
#define YY_BEGIN (yybegin= yypos, 1)
.fi
therefore saves the current input position and returns 1 ('true') as
the result of the predicate.
.TP
.B YY_END
This macros corresponds to '>' in the grammar. Again, it is a
predicate so the default definition saves the input position
before 'succeeding'.
.nf
#define YY_END (yyend= yypos, 1)
.fi
.TP
.BI YY_PARSE( T )
This macro declares the parser entry points (yyparse and yyparsefrom)
to be of type
.IR T .
The default definition
.nf
#define YY_PARSE(T) T
.fi
leaves yyparse() and yyparsefrom() with global visibility. If they
should not be externally visible in other source files, this macro can
be redefined to declare them 'static'.
.nf
#define YY_PARSE(T) static T
.fi
.TP
.BI YY_CTX_LOCAL
If this symbol is defined during compilation of a generated parser
then global parser state will be kept in a structure of
type 'yycontext' which can be declared as a local variable. This
allows multiple instances of parsers to coexist and to be thread-safe.
The parsing function
.IR yyparse ()
will be declared to expect a first argument of type 'yycontext *', an
instance of the structure holding the global state for the parser.
This instance must be allocated and initialised to zero by the client.
A trivial but complete example is as follows.
.nf
#include <stdio.h>
#define YY_CTX_LOCAL
#include "the-generated-parser.peg.c"
int main()
{
yycontext ctx;
memset(&ctx, 0, sizeof(yycontext));
while (yyparse(&ctx));
return 0;
}
.fi
Note that if this symbol is undefined then the compiled parser will
statically allocate its global state and will be neither reentrant nor
thread-safe.
.TP
.BI YY_CTX_MEMBERS
If YY_CTX_LOCAL is defined (see above) then the macro YY_CTX_MEMBERS
can be defined to expand to any additional member field declarations
that the client would like included in the declaration of
the 'yycontext' structure type. These additional members are
otherwise ignored by the generated parser. The instance
of 'yycontext' associated with the currently-active parser is
available in actions through the pointer variable
.IR yyctx .
.PP
The following variables can be reffered to within actions.
.TP
.B char *yybuf
This variable points to the parser's input buffer used to store input
text that has not yet been matched.
.TP
.B int yypos
This is the offset (in yybuf) of the next character to be matched and
consumed.
.TP
.B char *yytext
The most recent matched text delimited by '<' and '>' is stored in this variable.
.TP
.B int yyleng
This variable indicates the number of characters in 'yytext'.
.TP
.B yycontext *yyctx
This variable points to the instance of 'yycontext' associated with
the currently-active parser.
.SH DIAGNOSTICS
.I peg
and
.I leg
warn about the following conditions while converting a grammar into a parser.
.TP
.B syntax error
The input grammar was malformed in some way. The error message will
include the text about to be matched (often backed up a huge amount
from the actual location of the error) and the line number of the most
recently considered character (which is often the real location of the
problem).
.TP
.B rule 'foo' used but not defined
The grammar referred to a rule named 'foo' but no definition for it
was given. Attempting to use the generated parser will likely result
in errors from the linker due to undefined symbols associated with the
missing rule.
.TP
.B rule 'foo' defined but not used
The grammar defined a rule named 'foo' and then ignored it. The code
associated with the rule is included in the generated parser which
will in all other respects be healthy.
.TP
.B possible infinite left recursion in rule 'foo'
There exists at least one path through the grammar that leads from the
rule 'foo' back to (a recursive invocation of) the same rule without
consuming any input.
.PP
Left recursion, especially that found in standards documents, is
often 'direct' and implies trivial repetition.
.nf
# (6.7.6)
direct-abstract-declarator =
LPAREN abstract-declarator RPAREN
| direct-abstract-declarator? LBRACKET assign-expr? RBRACKET
| direct-abstract-declarator? LBRACKET STAR RBRACKET
| direct-abstract-declarator? LPAREN param-type-list? RPAREN
.fi
The recursion can easily be eliminated by converting the parts of the
pattern following the recursion into a repeatable suffix.
.nf
# (6.7.6)
direct-abstract-declarator =
direct-abstract-declarator-head?
direct-abstract-declarator-tail*
direct-abstract-declarator-head =
LPAREN abstract-declarator RPAREN
direct-abstract-declarator-tail =
LBRACKET assign-expr? RBRACKET
| LBRACKET STAR RBRACKET
| LPAREN param-type-list? RPAREN
.fi
.SH BUGS
The 'yy' and 'YY' prefixes cannot be changed.
.PP
Left recursion is detected in the input grammar but is not handled
correctly in the generated parser.
.PP
Diagnostics for errors in the input grammar are obscure and not
particularly helpful.
.PP
Several commonly-used
.IR lex (1)
features (yywrap(), yyin, etc.) are completely absent.
.PP
The generated parser foes not contain '#line' directives to direct C
compiler errors back to the grammar description when appropriate.
.IR lex (1)
features (yywrap(), yyin, etc.) are completely absent.
.SH SEE ALSO
D. Val Schorre,
.I META II, a syntax-oriented compiler writing language,
19th ACM National Conference, 1964, pp.\ 41.301--41.311. Describes a
self-implementing parser generator for analytic grammars with no
backtracking.
.PP
Alexander Birman,
.I The TMG Recognition Schema,
Ph.D. dissertation, Princeton, 1970. A mathematical treatment of the
power and complexity of recursive-descent parsing with backtracking.
.PP
Bryan Ford,
.I Parsing Expression Grammars: A Recognition-Based Syntactic Foundation,
ACM SIGPLAN Symposium on Principles of Programming Languages, 2004.
Defines PEGs and analyses them in relation to context-free and regular
grammars. Introduces the syntax adopted in
.IR peg .
.PP
The standard Unix utilies
.IR lex (1)
and
.IR yacc (1)
which influenced the syntax and features of
.IR leg .
.PP
The source code for
.I peg
and
.I leg
whose grammar parsers are written using themselves.
.PP
The latest version of this software and documentation:
.nf
http://piumarta.com/software/peg
.fi
.SH AUTHOR
.IR peg ,
.I leg
and this manual page were written by Ian Piumarta (first-name at
last-name dot com) while investigating the viablility of regular- and
parsing-expression grammars for efficiently extracting type and
signature information from C header files.
.PP
Please send bug reports and suggestions for improvements to the author
at the above address.

View File

@ -0,0 +1,173 @@
/* Copyright (c) 2007 by Ian Piumarta
* All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the 'Software'),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, provided that the above copyright notice(s) and this
* permission notice appear in all copies of the Software. Acknowledgement
* of the use of this Software in supporting documentation would be
* appreciated but is not required.
*
* THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK.
*
* Last edited: 2012-04-29 15:49:09 by piumarta on emilia
*/
#include "tree.h"
#include "version.h"
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <libgen.h>
#include <assert.h>
FILE *input= 0;
int verboseFlag= 0;
static int lineNumber= 0;
static char *fileName= 0;
void yyerror(char *message);
#define YY_INPUT(buf, result, max) \
{ \
int c= getc(input); \
if ('\n' == c || '\r' == c) ++lineNumber; \
result= (EOF == c) ? 0 : (*(buf)= c, 1); \
}
#define YY_LOCAL(T) static T
#define YY_RULE(T) static T
#include "peg.peg-c"
void yyerror(char *message)
{
fprintf(stderr, "%s:%d: %s", fileName, lineNumber, message);
if (yyctx->text[0]) fprintf(stderr, " near token '%s'", yyctx->text);
if (yyctx->pos < yyctx->limit || !feof(input))
{
yyctx->buf[yyctx->limit]= '\0';
fprintf(stderr, " before text \"");
while (yyctx->pos < yyctx->limit)
{
if ('\n' == yyctx->buf[yyctx->pos] || '\r' == yyctx->buf[yyctx->pos]) break;
fputc(yyctx->buf[yyctx->pos++], stderr);
}
if (yyctx->pos == yyctx->limit)
{
int c;
while (EOF != (c= fgetc(input)) && '\n' != c && '\r' != c)
fputc(c, stderr);
}
fputc('\"', stderr);
}
fprintf(stderr, "\n");
exit(1);
}
static void version(char *name)
{
printf("%s version %d.%d.%d\n", name, PEG_MAJOR, PEG_MINOR, PEG_LEVEL);
}
static void usage(char *name)
{
version(name);
fprintf(stderr, "usage: %s [<option>...] [<file>...]\n", name);
fprintf(stderr, "where <option> can be\n");
fprintf(stderr, " -h print this help information\n");
fprintf(stderr, " -o <ofile> write output to <ofile>\n");
fprintf(stderr, " -v be verbose\n");
fprintf(stderr, " -V print version number and exit\n");
fprintf(stderr, "if no <file> is given, input is read from stdin\n");
fprintf(stderr, "if no <ofile> is given, output is written to stdout\n");
exit(1);
}
int main(int argc, char **argv)
{
Node *n;
int c;
output= stdout;
input= stdin;
lineNumber= 1;
fileName= "<stdin>";
while (-1 != (c= getopt(argc, argv, "Vho:v")))
{
switch (c)
{
case 'V':
version(basename(argv[0]));
exit(0);
case 'h':
usage(basename(argv[0]));
break;
case 'o':
if (!(output= fopen(optarg, "w")))
{
perror(optarg);
exit(1);
}
break;
case 'v':
verboseFlag= 1;
break;
default:
fprintf(stderr, "for usage try: %s -h\n", argv[0]);
exit(1);
}
}
argc -= optind;
argv += optind;
if (argc)
{
for (; argc; --argc, ++argv)
{
if (!strcmp(*argv, "-"))
{
input= stdin;
fileName= "<stdin>";
}
else
{
if (!(input= fopen(*argv, "r")))
{
perror(*argv);
exit(1);
}
fileName= *argv;
}
lineNumber= 1;
if (!yyparse())
yyerror("syntax error");
if (input != stdin)
fclose(input);
}
}
else
if (!yyparse())
yyerror("syntax error");
if (verboseFlag)
for (n= rules; n; n= n->any.next)
Rule_print(n);
Rule_compile_c_header();
if (rules) Rule_compile_c(rules);
return 0;
}

View File

@ -0,0 +1,77 @@
# PE Grammar for PE Grammars
#
# Adapted from [1] by Ian Piumarta <first-name at last-name point com>.
#
# Local modifications (marked '#ikp') to support:
# C text in '{ ... }' copied verbatim to output as 'semantic action'
# input consumed between '<' and '>' is 'char yytext[]' in semantic actions
#
# Best viewed using 140 columns monospaced with tabs every 8.
#
# [1] Bryan Ford. "Parsing Expression Grammars: A Recognition-Based Syntactic
# Foundation." Symposium on Principles of Programming Languages,
# January 14--16, 2004, Venice, Italy.
#
# Last edited: 2007-05-15 10:32:44 by piumarta on emilia
# Hierarchical syntax
Grammar <- Spacing Definition+ EndOfFile
Definition <- Identifier { if (push(beginRule(findRule(yytext)))->rule.expression) fprintf(stderr, "rule '%s' redefined\n", yytext); }
LEFTARROW Expression { Node *e= pop(); Rule_setExpression(pop(), e); } &{ YYACCEPT }
Expression <- Sequence (SLASH Sequence { Node *f= pop(); push(Alternate_append(pop(), f)); }
)*
Sequence <- Prefix (Prefix { Node *f= pop(); push(Sequence_append(pop(), f)); } #ikp expanded from 'Seq <- Prefix*'
)*
/ { push(makePredicate("1")); } #ikp added
Prefix <- AND Action { push(makePredicate(yytext)); } #ikp added
/ AND Suffix { push(makePeekFor(pop())); } #ikp expanded from 'Prefix <- (AND/NOT)? Suffix'
/ NOT Suffix { push(makePeekNot(pop())); }
/ Suffix
Suffix <- Primary (QUESTION { push(makeQuery(pop())); }
/ STAR { push(makeStar (pop())); }
/ PLUS { push(makePlus (pop())); }
)?
Primary <- Identifier !LEFTARROW { push(makeName(findRule(yytext))); }
/ OPEN Expression CLOSE
/ Literal { push(makeString(yytext)); }
/ Class { push(makeClass(yytext)); }
/ DOT { push(makeDot()); }
/ Action { push(makeAction(yytext)); } #ikp added
/ BEGIN { push(makePredicate("YY_BEGIN")); } #ikp added
/ END { push(makePredicate("YY_END")); } #ikp added
# Lexical syntax
Identifier <- < IdentStart IdentCont* > Spacing #ikp inserted < ... >
IdentStart <- [a-zA-Z_]
IdentCont <- IdentStart / [0-9]
Literal <- ['] < (!['] Char )* > ['] Spacing #ikp inserted < ... >
/ ["] < (!["] Char )* > ["] Spacing #ikp inserted < ... >
Class <- '[' < (!']' Range)* > ']' Spacing #ikp inserted < ... >
Range <- Char '-' Char / Char
Char <- '\\' [abefnrtv'"\[\]\\] #ikp added missing ANSI escapes: abefv
/ '\\' [0-3][0-7][0-7]
/ '\\' [0-7][0-7]?
/ '\\' '-' #ikp added
/ !'\\' .
LEFTARROW <- '<-' Spacing
SLASH <- '/' Spacing
AND <- '&' Spacing
NOT <- '!' Spacing
QUESTION <- '?' Spacing
STAR <- '*' Spacing
PLUS <- '+' Spacing
OPEN <- '(' Spacing
CLOSE <- ')' Spacing
DOT <- '.' Spacing
Spacing <- (Space / Comment)*
Comment <- '#' (!EndOfLine .)* EndOfLine
Space <- ' ' / '\t' / EndOfLine
EndOfLine <- '\r\n' / '\n' / '\r'
EndOfFile <- !.
Action <- '{' < [^}]* > '}' Spacing #ikp added
BEGIN <- '<' Spacing #ikp added
END <- '>' Spacing #ikp added

Some files were not shown because too many files have changed in this diff Show More