mirror of
https://github.com/richgel999/ufo_data.git
synced 2024-10-01 01:45:37 -04:00
447 lines
12 KiB
C
447 lines
12 KiB
C
|
/* mkd2latex.c - LaTeX-formatted output from markdown text */
|
||
|
|
||
|
/*
|
||
|
* Copyright (c) 2009, Baptiste Daroussin, Natacha Porté, and Michael Huang
|
||
|
*
|
||
|
* Permission to use, copy, modify, and distribute this software for any
|
||
|
* purpose with or without fee is hereby granted, provided that the above
|
||
|
* copyright notice and this permission notice appear in all copies.
|
||
|
*
|
||
|
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||
|
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||
|
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||
|
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||
|
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||
|
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||
|
*/
|
||
|
|
||
|
/*
|
||
|
* Links require the hyperref package, and images require the graphicx
|
||
|
* package.
|
||
|
*/
|
||
|
|
||
|
#include "markdown.h"
|
||
|
|
||
|
#include <stdio.h>
|
||
|
#include <stdlib.h>
|
||
|
#include <errno.h>
|
||
|
#include <string.h>
|
||
|
|
||
|
#define READ_UNIT 1024
|
||
|
#define OUTPUT_UNIT 64
|
||
|
|
||
|
/*********************
|
||
|
* ENTITY CONVERSION *
|
||
|
*********************/
|
||
|
|
||
|
struct str_pair {
|
||
|
const char *entity;
|
||
|
const char *latex; };
|
||
|
|
||
|
static struct str_pair entity_latex[] = {
|
||
|
{ "Æ", "\\AE{}" },
|
||
|
{ "Á", "\\'A" },
|
||
|
{ "Â", "\\^A" },
|
||
|
{ "À", "\\`A" },
|
||
|
{ "Å", "\\AA{}" },
|
||
|
{ "Ã", "\\~A" },
|
||
|
{ "Ä", "\\\"A" },
|
||
|
{ "Ç", "\\c{C}" },
|
||
|
// { "Ð", "\\DH{}" }, // not available in OT1
|
||
|
{ "É", "\\'E" },
|
||
|
{ "Ê", "\\^E" },
|
||
|
{ "È", "\\`E" },
|
||
|
{ "Ë", "\\\"E" },
|
||
|
{ "Í", "\\'I" },
|
||
|
{ "Î", "\\^I" },
|
||
|
{ "Ì", "\\`I" },
|
||
|
{ "Ï", "\\\"I" },
|
||
|
{ "Ñ", "\\~N" },
|
||
|
{ "Ó", "\\'O" },
|
||
|
{ "Ô", "\\^O" },
|
||
|
{ "Ò", "\\`O" },
|
||
|
{ "Ø", "\\O{}" },
|
||
|
{ "Õ", "\\~O" },
|
||
|
{ "Ö", "\\\"O" },
|
||
|
// { "Þ", "\\TH{}" }, // not available in OT1
|
||
|
{ "Ú", "\\'U" },
|
||
|
{ "Û", "\\^U" },
|
||
|
{ "Ù", "\\`U" },
|
||
|
{ "Ü", "\\\"U" },
|
||
|
{ "&Ygrave;", "\\`Y" },
|
||
|
{ "á", "\\'a" },
|
||
|
{ "â", "\\^a" },
|
||
|
// { "´", "\\textasciiacute{}" }, // requires textcomp
|
||
|
{ "æ", "\\ae{}" },
|
||
|
{ "à", "\\`a" },
|
||
|
{ "&", "\\&" },
|
||
|
{ "'", "'" },
|
||
|
{ "å", "\\aa{}" },
|
||
|
{ "ã", "\\~a" },
|
||
|
{ "ä", "\\\"a" },
|
||
|
// { "¦", "\\textbrokenbar{}" }, // requires textcomp
|
||
|
{ "ç", "\\c{c}" },
|
||
|
{ "¸", "\\c{}" },
|
||
|
// { "¢", "\\textcent{}" }, // requires textcomp
|
||
|
{ "©", "\\copyright{}" },
|
||
|
// { "¤", "\\textcurrency{}" }, // requires textcomp
|
||
|
// { "°", "\\textdegree{}" }, // requires textcomp
|
||
|
// { "÷", "\\textdiv{}" }, // requires textcomp
|
||
|
{ "é", "\\'e" },
|
||
|
{ "ê", "\\^e" },
|
||
|
{ "è", "\\`e" },
|
||
|
// { "ð", "\\dh{}" }. // not available in OT1
|
||
|
{ "ë", "\\\"e" },
|
||
|
// { "½", "\\textonehalf{}" }, // requires textcomp
|
||
|
// { "¼", "\\textonequarter{}" }, // requires textcomp
|
||
|
// { "¾", "\\textthreequarter{}" }, // requires textcomp
|
||
|
{ ">", "$>$" },
|
||
|
{ "í", "\\'\\i{}" },
|
||
|
{ "î", "\\^\\i{}" },
|
||
|
{ "¡", "\\textexclamdown{}" },
|
||
|
{ "ì", "\\`\\i{}" },
|
||
|
{ "¿", "\\" },
|
||
|
{ "ï", "\\\"\\i{}" },
|
||
|
// { "«", "\\guillemotleft{}" }, // not available in OT1
|
||
|
{ "<", "$<$" },
|
||
|
// { "¯", "\\textasciimacaron{}" }, // requires textcomp
|
||
|
// { "µ", "\\textmu{}"}, // requires textcomp
|
||
|
{ "·", "\\textperiodcentered{}" },
|
||
|
{ " ", "~" },
|
||
|
// { "¬", "\\textlnot{}" }, // requires textcomp
|
||
|
{ "ñ", "\\~n" },
|
||
|
{ "ó", "\\'o" },
|
||
|
{ "ô", "\\^o" },
|
||
|
{ "ò", "\\`o" },
|
||
|
{ "ª", "\\textordfeminine{}" },
|
||
|
{ "º", "\\textordmasculine{}" },
|
||
|
{ "ø", "\\o{}" },
|
||
|
{ "õ", "\\~o" },
|
||
|
{ "ö", "\\\"o" },
|
||
|
{ "¶", "\\P{}" },
|
||
|
// { "±", "\\textpm{}" }, // requires textcomp
|
||
|
{ "£", "\\textsterling{}" },
|
||
|
{ """, "\"" },
|
||
|
// { "»", "\\guillemotright{}" }, // not available in OT1
|
||
|
{ "®", "\\textregistered{}" },
|
||
|
{ "§", "\\S{}" },
|
||
|
{ "­", "\\-" },
|
||
|
// { "¹", "\\textonesuperior{}" }, // requires textcomp
|
||
|
// { "²", "\\texttwosuperior{}" }, // requires textcomp
|
||
|
// { "³", "\\textthreesuperior{}" }, // requires textcomp
|
||
|
{ "ß", "\\ss{}" },
|
||
|
// { "þ", "\\th{}" }, // not available in OT1
|
||
|
// { "×", "\\texttimes{}" }, // requires textcomp
|
||
|
{ "ú", "\\'u" },
|
||
|
{ "û", "\\^u" },
|
||
|
{ "ù", "\\`u" },
|
||
|
// { "¨", "\\textasciidieresis{}" }, // requires textcomp
|
||
|
{ "ü", "\\\"u" },
|
||
|
{ "ý", "\\'y" },
|
||
|
// { "¥", "\\textyen{}" }, // requires textcomp
|
||
|
{ "ÿ", "\\\"y" },
|
||
|
};
|
||
|
|
||
|
static int cmp_entity(const void *key, const void *element) {
|
||
|
const struct str_pair *pair = element;
|
||
|
const struct buf *entity = key;
|
||
|
return bufcmps(entity, pair->entity); }
|
||
|
|
||
|
static const char *entity2latex(const struct buf *entity) {
|
||
|
const struct str_pair *pair;
|
||
|
pair = bsearch(entity, entity_latex,
|
||
|
sizeof entity_latex / sizeof *entity_latex,
|
||
|
sizeof *entity_latex,
|
||
|
&cmp_entity);
|
||
|
return pair ? pair->latex : 0; }
|
||
|
|
||
|
|
||
|
|
||
|
/******************************
|
||
|
* MARKDOWN TO LATEX RENDERER *
|
||
|
******************************/
|
||
|
|
||
|
static void
|
||
|
latex_text_escape(struct buf *ob, char *src, size_t size) {
|
||
|
size_t i = 0, org;
|
||
|
while (i < size) {
|
||
|
/* copying directly unescaped characters */
|
||
|
org = i;
|
||
|
while (i < size && src[i] != '&' && src[i] != '%'
|
||
|
&& src[i] != '$' && src[i] != '#' && src[i] != '_'
|
||
|
&& src[i] != '{' && src[i] != '}' && src[i] != '~'
|
||
|
&& src[i] != '^' && src[i] != '\\' && src[i] != '<'
|
||
|
&& src[i] != '>')
|
||
|
i += 1;
|
||
|
if (i > org) bufput(ob, src + org, i - org);
|
||
|
|
||
|
/* escaping */
|
||
|
if (i >= size) break;
|
||
|
else if (src[i] == '&') BUFPUTSL(ob, "\\&");
|
||
|
else if (src[i] == '%') BUFPUTSL(ob, "\\%");
|
||
|
else if (src[i] == '$') BUFPUTSL(ob, "\\$");
|
||
|
else if (src[i] == '#') BUFPUTSL(ob, "\\#");
|
||
|
else if (src[i] == '_') BUFPUTSL(ob, "\\_");
|
||
|
else if (src[i] == '{') BUFPUTSL(ob, "\\{");
|
||
|
else if (src[i] == '}') BUFPUTSL(ob, "\\}");
|
||
|
else if (src[i] == '<') BUFPUTSL(ob, "$<$");
|
||
|
else if (src[i] == '>') BUFPUTSL(ob, "$<$");
|
||
|
else if (src[i] == '~') BUFPUTSL(ob, "\\textasciitilde{}");
|
||
|
else if (src[i] == '^') BUFPUTSL(ob, "\\textasciicircum{}");
|
||
|
else if (src[i] == '\\') BUFPUTSL(ob, "\\textbackslash{}");
|
||
|
i += 1; } }
|
||
|
|
||
|
static void
|
||
|
latex_prolog(struct buf *ob, void *opaque) {
|
||
|
BUFPUTSL(ob,
|
||
|
"\\documentclass{article}\n"
|
||
|
"\\usepackage{hyperref}\n"
|
||
|
"\\usepackage{graphicx}\n"
|
||
|
"\\begin{document}\n"); }
|
||
|
|
||
|
static void
|
||
|
latex_epilog(struct buf *ob, void *opaque) {
|
||
|
BUFPUTSL(ob, "\n\\end{document}\n"); }
|
||
|
|
||
|
static int
|
||
|
latex_autolink(struct buf *ob, struct buf *link, enum mkd_autolink type,
|
||
|
void *opaque) {
|
||
|
if (!link || !link->size) return 0;
|
||
|
BUFPUTSL(ob, "\\href{");
|
||
|
if (type == MKDA_IMPLICIT_EMAIL) BUFPUTSL(ob, "mailto:");
|
||
|
bufput(ob, link->data, link->size);
|
||
|
BUFPUTSL(ob, "}{");
|
||
|
if (type == MKDA_EXPLICIT_EMAIL && link->size > 7)
|
||
|
latex_text_escape(ob, link->data + 7, link->size - 7);
|
||
|
else latex_text_escape(ob, link->data, link->size);
|
||
|
BUFPUTSL(ob, "}");
|
||
|
return 1; }
|
||
|
|
||
|
static int
|
||
|
latex_link(struct buf *ob, struct buf *link, struct buf *title,
|
||
|
struct buf *content, void *opaque) {
|
||
|
BUFPUTSL(ob, "\\href{");
|
||
|
if (link && link->size) bufput(ob, link->data, link->size);
|
||
|
BUFPUTSL(ob, "}{");
|
||
|
if (content && content->size)
|
||
|
bufput(ob, content->data, content->size);
|
||
|
BUFPUTSL(ob, "}");
|
||
|
return 1; }
|
||
|
|
||
|
static int
|
||
|
latex_image(struct buf *ob, struct buf *link, struct buf *title,
|
||
|
struct buf *alt, void *opaque) {
|
||
|
if (!link || !link->size) return 0;
|
||
|
BUFPUTSL(ob, "\\includegraphics{");
|
||
|
bufput(ob, link->data, link->size);
|
||
|
BUFPUTSL(ob, "}");
|
||
|
return 1; }
|
||
|
|
||
|
static void
|
||
|
latex_blockcode(struct buf *ob, struct buf *text, void *opaque) {
|
||
|
if (ob->size) bufputc(ob, '\n');
|
||
|
BUFPUTSL(ob, "\\begin{verbatim}\n");
|
||
|
if (text) bufput(ob, text->data, text->size);
|
||
|
BUFPUTSL(ob, "\\end{verbatim}\n"); }
|
||
|
|
||
|
static void
|
||
|
latex_blockquote(struct buf *ob, struct buf *text, void *opaque) {
|
||
|
if (ob->size) bufputc(ob, '\n');
|
||
|
BUFPUTSL(ob, "\\begin{quote}\n");
|
||
|
if (text) bufput(ob, text->data, text->size);
|
||
|
BUFPUTSL(ob, "\\end{quote}\n"); }
|
||
|
|
||
|
static int
|
||
|
latex_codespan(struct buf *ob, struct buf *text, void *opaque) {
|
||
|
BUFPUTSL(ob, "\\texttt{");
|
||
|
if (text) latex_text_escape(ob, text->data, text->size);
|
||
|
BUFPUTSL(ob, "}");
|
||
|
return 1; }
|
||
|
|
||
|
static void
|
||
|
latex_header(struct buf *ob, struct buf *text, int level, void *opaque) {
|
||
|
if (ob->size) bufputc(ob, '\n');
|
||
|
switch(level) {
|
||
|
case 1:
|
||
|
BUFPUTSL(ob,"\\section{");
|
||
|
break;
|
||
|
case 2:
|
||
|
BUFPUTSL(ob, "\\subsection{");
|
||
|
break;
|
||
|
case 3:
|
||
|
BUFPUTSL(ob, "\\subsubsection{");
|
||
|
break;
|
||
|
default:
|
||
|
fprintf(stderr, "Warning: ignoring header level %d\n",
|
||
|
level);
|
||
|
}
|
||
|
if (text) bufput(ob, text->data, text->size);
|
||
|
if (level >= 1 && level <= 3) BUFPUTSL(ob, "}\n");
|
||
|
}
|
||
|
|
||
|
static int
|
||
|
latex_double_emphasis(struct buf *ob, struct buf *text, char c, void *opaque) {
|
||
|
if (!text || !text->size) return 0;
|
||
|
BUFPUTSL(ob, "\\textbf{");
|
||
|
bufput(ob, text->data, text->size);
|
||
|
BUFPUTSL(ob, "}");
|
||
|
return 1; }
|
||
|
|
||
|
static int
|
||
|
latex_emphasis(struct buf *ob, struct buf *text, char c, void *opaque) {
|
||
|
if (!text || !text->size) return 0;
|
||
|
BUFPUTSL(ob, "\\emph{");
|
||
|
if (text) bufput(ob, text->data, text->size);
|
||
|
BUFPUTSL(ob, "}");
|
||
|
return 1; }
|
||
|
|
||
|
static int
|
||
|
latex_linebreak(struct buf *ob, void *opaque) {
|
||
|
BUFPUTSL(ob, "\\\\");
|
||
|
return 1; }
|
||
|
|
||
|
static void
|
||
|
latex_paragraph(struct buf *ob, struct buf *text, void *opaque) {
|
||
|
if (ob->size) bufputc(ob, '\n');
|
||
|
if (text) bufput(ob, text->data, text->size);
|
||
|
BUFPUTSL(ob, "\n"); }
|
||
|
|
||
|
static void
|
||
|
latex_list(struct buf *ob, struct buf *text, int flags, void *opaque) {
|
||
|
if (ob->size) bufputc(ob, '\n');
|
||
|
if (flags & MKD_LIST_ORDERED)
|
||
|
BUFPUTSL(ob, "\\begin{enumerate}\n");
|
||
|
else
|
||
|
BUFPUTSL(ob, "\\begin{itemize}\n");
|
||
|
if (text) bufput(ob, text->data, text->size);
|
||
|
if (flags & MKD_LIST_ORDERED)
|
||
|
BUFPUTSL(ob, "\\end{enumerate}\n");
|
||
|
else
|
||
|
BUFPUTSL(ob, "\\end{itemize}\n"); }
|
||
|
|
||
|
static void
|
||
|
latex_listitem(struct buf *ob, struct buf *text, int flags, void *opaque) {
|
||
|
BUFPUTSL(ob, "\\item ");
|
||
|
if (text) {
|
||
|
while (text->size && text->data[text->size - 1] == '\n')
|
||
|
text->size -= 1;
|
||
|
bufput(ob, text->data, text->size); }
|
||
|
BUFPUTSL(ob, "\n"); }
|
||
|
|
||
|
static void
|
||
|
latex_hrule(struct buf *ob, void *opaque) {
|
||
|
if (ob->size) bufputc(ob, '\n');
|
||
|
BUFPUTSL(ob, "\\hrule"); }
|
||
|
|
||
|
static int
|
||
|
latex_triple_emphasis(struct buf *ob, struct buf *text, char c, void *opaque) {
|
||
|
if (!text || !text->size) return 0;
|
||
|
BUFPUTSL(ob, "\\textbf{\\emph{");
|
||
|
bufput(ob, text->data, text->size);
|
||
|
BUFPUTSL(ob, "}}");
|
||
|
return 1; }
|
||
|
|
||
|
static void
|
||
|
latex_entity(struct buf *ob, struct buf *entity, void *opaque) {
|
||
|
const char *rendered = entity2latex(entity);
|
||
|
if (rendered)
|
||
|
bufputs(ob, rendered);
|
||
|
else {
|
||
|
BUFPUTSL(ob, "\\texttt{");
|
||
|
bufput(ob, entity->data, entity->size);
|
||
|
BUFPUTSL(ob, "}"); } }
|
||
|
|
||
|
static void
|
||
|
latex_normal_text(struct buf *ob, struct buf *text, void *opaque) {
|
||
|
if (text) latex_text_escape(ob, text->data, text->size); }
|
||
|
|
||
|
|
||
|
/* renderer structure */
|
||
|
static struct mkd_renderer to_latex = {
|
||
|
/* document-level callbacks */
|
||
|
latex_prolog,
|
||
|
latex_epilog,
|
||
|
|
||
|
/* block-level callbacks */
|
||
|
latex_blockcode,
|
||
|
latex_blockquote,
|
||
|
latex_blockcode,
|
||
|
latex_header,
|
||
|
latex_hrule,
|
||
|
latex_list,
|
||
|
latex_listitem,
|
||
|
latex_paragraph,
|
||
|
NULL,
|
||
|
NULL,
|
||
|
NULL,
|
||
|
|
||
|
/* span-level callbacks */
|
||
|
latex_autolink,
|
||
|
latex_codespan,
|
||
|
latex_double_emphasis,
|
||
|
latex_emphasis,
|
||
|
latex_image,
|
||
|
latex_linebreak,
|
||
|
latex_link,
|
||
|
latex_codespan,
|
||
|
latex_triple_emphasis,
|
||
|
|
||
|
/* low-level callbacks */
|
||
|
latex_entity,
|
||
|
latex_normal_text,
|
||
|
|
||
|
/* renderer data */
|
||
|
64,
|
||
|
"*_",
|
||
|
NULL };
|
||
|
|
||
|
|
||
|
|
||
|
/*****************
|
||
|
* MAIN FUNCTION *
|
||
|
*****************/
|
||
|
|
||
|
/* main • main function, interfacing STDIO with the parser */
|
||
|
int
|
||
|
main(int argc, char **argv) {
|
||
|
struct buf *ib, *ob;
|
||
|
size_t ret;
|
||
|
FILE *in = stdin;
|
||
|
|
||
|
/* opening the file if given from the command line */
|
||
|
if (argc > 1) {
|
||
|
in = fopen(argv[1], "r");
|
||
|
if (!in) {
|
||
|
fprintf(stderr,"Unable to open input file \"%s\": %s\n",
|
||
|
argv[1], strerror(errno));
|
||
|
return 1; } }
|
||
|
|
||
|
/* reading everything */
|
||
|
ib = bufnew(READ_UNIT);
|
||
|
bufgrow(ib, READ_UNIT);
|
||
|
while ((ret = fread(ib->data + ib->size, 1,
|
||
|
ib->asize - ib->size, in)) > 0) {
|
||
|
ib->size += ret;
|
||
|
bufgrow(ib, ib->size + READ_UNIT); }
|
||
|
if (in != stdin) fclose(in);
|
||
|
|
||
|
/* performing markdown to LaTeX */
|
||
|
ob = bufnew(OUTPUT_UNIT);
|
||
|
markdown(ob, ib, &to_latex);
|
||
|
|
||
|
/* writing the result to stdout */
|
||
|
ret = fwrite(ob->data, 1, ob->size, stdout);
|
||
|
if (ret < ob->size)
|
||
|
fprintf(stderr, "Warning: only %zu output byte written, "
|
||
|
"out of %zu\n",
|
||
|
ret,
|
||
|
ob->size);
|
||
|
|
||
|
/* cleanup */
|
||
|
bufrelease(ib);
|
||
|
bufrelease(ob);
|
||
|
return 0; }
|
||
|
|
||
|
/* vim: set filetype=c: */
|