python and typst script

2025-08-09 23:13:17 -04:00 · 2023-07-09 20:53:03 +00:00 · 2023-07-09 20:53:03 +00:00 · be05046783
commit be05046783
parent da5f497ec1
19 changed files with 2223 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,6 +1,7 @@
 public/

 *.pdf
+*.pyc

 # temporary files which can be created if a process still has a handle open of a deleted file
 .fuse_hidden*
--- a/layout/anarsec_article.typ
+++ b/layout/anarsec_article.typ
@ -0,0 +1,111 @@
+#let anarsec_article(
+  title: none,
+  frontimage: none,
+  backimage: none,
+  lastediteddate: none,
+  description: none,
+  content
+) = {
+  // format links
+  show link: it => {
+    it.body
+    if type(it.dest) == "string" {
+      if it.dest.starts-with("https://") {
+        footnote[#it.dest.trim("https://", at: start)]
+      }
+      else if it.dest.starts-with("/glossary#") or it.dest.starts-with("/glossary/#") {
+        locate(location => {
+          let elements = query(label(it.dest.trim("/glossary#", at: start).trim("/glossary/#", at: start)), location)
+          text[#super[†]]
+        })
+      }
+      else if it.dest.starts-with("/") {
+        footnote({text[anarsec.guide] + it.dest})
+      }
+    }
+    else if type(it.dest) == "label" {
+      locate(location => {
+        let elements = query(it.dest, location)
+        text[ (#emph(elements.first().body))]
+      })
+    }
+  }
+  
+  // format lists
+  set list(marker: ([•], [--]))
+
+  // front cover
+  page()[
+    #set align(center + horizon)
+    
+    #image(frontimage)
+    
+    #text(25pt, title)
+  ]
+
+  // inside cover
+  page()[
+    #set align(center + bottom)
+    
+    #text()[This version of the zine was last edited on #lastediteddate. Visit anarsec.guide to see whether it has been updated since.]
+    
+    #text()[This dagger symbol #super[†] on a word means that there is a glossary entry for it. Ai ferri corti.]
+  ]
+  
+  // table of contents
+  page()[
+    #outline(indent: 20pt, depth: 3)
+  ]
+
+  // content
+  set page(numbering: "1")
+  set align(left)
+  
+  pagebreak(weak: true)
+  
+  show heading.where(level: 1): it => {
+    pagebreak(weak: true)
+    block(width: 100%)[
+      #set align(center)
+      #set text(26pt)
+      #smallcaps(it.body)
+      #v(10pt)
+    ]
+  }
+  show heading.where(level: 2): it => block(width: 100%)[
+    #set text(19pt)
+    #text(it.body)
+    #v(10pt)
+  ]
+  show heading.where(level: 3): it => block(width: 100%)[
+    #set text(14pt, weight: "bold")
+    #text(it.body)
+    #v(10pt)
+  ]
+  
+  content
+  
+  set page(numbering: none)
+  
+  // back cover
+  page()[
+    #text()[
+        #set align(center + horizon)
+        
+        #block(width: 100%, align(left, par(justify: true, description)))
+        
+        #image(height: 250pt, backimage)
+    ]
+  ]
+}
+
+// blockquote function ; TODO: remove when typst has a native blockquote function (see https://github.com/typst/typst/issues/105)
+#let blockquote(
+  content
+) = align(center)[
+  #block(width: 92%, fill: rgb(230, 230, 230), radius: 4pt, inset: 8pt)[
+    #align(left)[
+      #text(content)
+    ]
+  ]
+]
--- a/layout/python/anarsec_article_to_pdf.py
+++ b/layout/python/anarsec_article_to_pdf.py
@ -0,0 +1,221 @@
+import argparse
+import contextlib
+import os
+import pathlib
+import re
+import shutil
+import slugify
+import subprocess
+import tempfile
+
+import pdfimposer
+import PyPDF2
+import toml
+
+class Converter:
+    """Converts an Anarsec article to PDF booklets."""
+    
+    def __init__(self, pandoc_binary: pathlib.Path, typst_binary: pathlib.Path, anarsec_root: pathlib.Path, post_id: str, *, force: bool = False, verbose: bool = False):
+        """Initialize the converter."""
+        
+        # Set attributes
+        self.pandoc_binary = pandoc_binary
+        self.typst_binary = typst_binary
+        self.anarsec_root = anarsec_root
+        self.post_id = post_id
+        self.force = force
+        self.verbose = verbose
+        
+        # Set post directory
+        self.post_directory = self.anarsec_root / "content" / "posts" / self.post_id
+        
+        # Check validity of some attributes
+        if not self.pandoc_binary.exists() or not self.pandoc_binary.is_file():
+            raise RuntimeError(f"Pandoc binary '{self.pandoc_binary}' doesn't exist or isn't a file.")
+        if not self.typst_binary.exists() or not self.typst_binary.is_file():
+            raise RuntimeError(f"Typst binary '{self.typst_binary}' doesn't exist or isn't a file.")
+        if not self.anarsec_root.exists() or not self.anarsec_root.is_dir():
+            raise RuntimeError(f"Anarsec root '{self.anarsec_root}' doesn't exist or isn't a directory.")
+        if not self.post_directory.exists() or not self.post_directory.is_dir():
+            raise RuntimeError(f"Post directory '{self.post_directory}' doesn't exist or isn't a directory.")
+        
+    def convert(self):
+        """Convert the input file to the output file. This method should only be run once."""
+        
+        # Set glossary file
+        glossary_file = self.anarsec_root / "content" / "glossary" / "_index.md"
+        if not glossary_file.exists() or not glossary_file.is_file():
+            raise RuntimeError(f"Glossary file '{glossary_file}' doesn't exist or isn't a file.")
+            
+        # Set recommendations file
+        recommendations_file = self.anarsec_root / "content" / "recommendations" / "_index.md"
+        if not recommendations_file.exists() or not recommendations_file.is_file():
+            raise RuntimeError(f"Recommendations file '{recommendations_file}' doesn't exist or isn't a file.")
+            
+        # Set input path
+        input_path = self.post_directory / "index.md"
+        if not input_path.exists() or not input_path.is_file():
+            raise RuntimeError(f"Post Markdown file '{input_path}' doesn't exist or isn't a file.")
+            
+        # Load the glossary
+        glossary = dict()
+        for match in re.findall(r'### (.*?)\n+(.*?)\n*(?=###|\Z)', glossary_file.open().read(), re.DOTALL | re.MULTILINE):
+            glossary[slugify.slugify(match[0])] = (match[0], match[1])
+        
+        # For each paper size
+        for paper_size in ["a4", "letter"]:
+            # Set the output path
+            output_path = self.post_directory / f"{self.post_id}-{paper_size}.pdf"
+            if not self.force and output_path.exists():
+                raise RuntimeError(f"Output file '{output_path}' already exists.")
+            
+            # Work in a temporary directory
+            with tempfile.TemporaryDirectory() as workingDirectory:
+                # Copy the required resources to the working directory
+                shutil.copy(pathlib.Path(__file__).parent.parent / "anarsec_article.typ", workingDirectory)
+                for filename in input_path.parent.iterdir():
+                    if filename.suffix.lower() == ".webp":
+                        subprocess.check_call(["convert", filename, pathlib.Path(workingDirectory) / f"{filename.name}.png"])
+                    elif filename.suffix.lower() in [".png", ".jpg", ".jpeg", ".bmp", ".svg", ".gif"]:
+                        shutil.copy(filename, workingDirectory)
+                
+                # Separate the input file into a TOML front matter and Markdown content
+                with input_path.open("r") as input_file:
+                    match = re.fullmatch(r'\+{3}\n(.*)\+{3}(.*)', input_file.read(), re.DOTALL | re.MULTILINE)
+                    if match is None:
+                        raise RuntimeError(f"Couldn't separate input file '{self.input_path}' into a TOML front matter and Markdown content. Is it a valid Anarsec article?")
+                    toml_front_matter = toml.loads(match.group(1))
+                    markdown_content = match.group(2)
+                
+                # Grab the description
+                description = re.search(r'^(.*?)\<\!\-\- more \-\-\>', markdown_content, re.DOTALL | re.MULTILINE).group(1).strip("\n ")
+                
+                # Parse the description
+                description_md_path = pathlib.Path(workingDirectory) / "description.md"
+                description_txt_path = pathlib.Path(workingDirectory) / "description.txt"
+                description_md_path.open("w").write(description)
+                subprocess.check_call([str(self.pandoc_binary), "-f", "markdown", "-t", "plain", "--columns", "999999", "-o", description_txt_path, description_md_path])
+                description = description_txt_path.open().read()
+                    
+                # Copy the front image
+                front_image = pathlib.Path(workingDirectory) / ("front_image" + pathlib.Path(toml_front_matter['extra']['blogimage']).suffix)
+                shutil.copy(self.anarsec_root / "static" / toml_front_matter['extra']['blogimage'].removeprefix("/"), front_image)
+
+                # Copy the back image
+                back_image = pathlib.Path(workingDirectory) / "back_image.png"
+                shutil.copy(self.anarsec_root / "static" / "images" / "gay.png", back_image)
+                
+                # Add recommendations to the Markdown content
+                recommendations = re.search(r'\+{3}.*?\+{3}(.*)', recommendations_file.open().read(), re.MULTILINE | re.DOTALL).group(1)
+                markdown_content += f"\n\n# Recommendations\n\n{recommendations}\n\n"
+                
+                # Replace all .webp images to .png images in the Markdown content
+                markdown_content = re.sub(r'\((.*?\.webp)\)', lambda match: f'({match.group(1)}.png)', markdown_content)
+                
+                # List glossary entries that appear in the Markdown content
+                glossary_entries = set()
+                for match in re.findall(r'\[.*?\]\(/glossary\/?#(.*?)\)', markdown_content):
+                    glossary_entries.add(slugify.slugify(match))
+                
+                # Add to glossary entries the glossary entries that appear in glossary entries, recursively
+                added_entry = True
+                while added_entry:
+                    added_entry = False
+                    for entry in list(glossary_entries):
+                        for match in re.findall(r'\[.*?\]\((?:/glossary|)\/?#(.*?)\)', glossary[entry][1]):
+                            new_entry = slugify.slugify(match)
+                            if new_entry not in glossary_entries:
+                                glossary_entries.add(new_entry)
+                                added_entry = True
+                
+                # Add glossary entries to the Markdown content
+                if glossary_entries:
+                    markdown_content += "\n\n# Glossary\n\n"
+                    for entry, entry_content in glossary.items():
+                        if entry in glossary_entries:
+                            markdown_content += f"## {entry_content[0]}\n\n{entry_content[1]}\n\n"
+                
+                # Write the Markdown content to a file
+                input_markdown_path = pathlib.Path(workingDirectory) / f"{self.post_id}-markdown.md"
+                input_markdown_path.open("w").write(markdown_content)
+                
+                # Convert the Markdown content to typst
+                typst_path = pathlib.Path(workingDirectory) / f"{self.post_id}.typ"
+                subprocess.check_call([str(self.pandoc_binary), "-f", "markdown", "-t", "typst", "--columns", "999999", "-o", typst_path, input_markdown_path])
+                
+                # Build the full typst file
+                full_typst_path = pathlib.Path(workingDirectory) / f"{self.post_id}-full.typ"
+                full_typst = f"""
+#import "anarsec_article.typ": anarsec_article, blockquote
+#set page({'"a5"' if paper_size == "a4" else 'width: 5.5in, height: 8.5in'})
+#show: content => anarsec_article(
+  title: [
+    {toml_front_matter["title"]}
+  ],
+  frontimage: "{front_image.name}",
+  backimage: "{back_image.name}",
+  lastediteddate: "{toml_front_matter["extra"]["dateedit"]}",
+  description: "{description}",
+  content
+)
+{typst_path.open().read()}
+"""
+                full_typst_path.open("w").write(full_typst)
+                    
+                # Convert the full typst file to PDF
+                pdf_path = pathlib.Path(workingDirectory) / f"{self.post_id}.pdf"
+                subprocess.check_call(
+                    [str(self.typst_binary), "--root", workingDirectory, "compile", full_typst_path, pdf_path],
+                    stderr = subprocess.STDOUT
+                )
+                
+                # Insert blank pages before the back cover if needed
+                pdf_reader = PyPDF2.PdfFileReader(pdf_path.open("rb"))
+                if len(pdf_reader.pages) % 4 != 0:
+                    pdf_writer = PyPDF2.PdfFileWriter()
+                    for page in pdf_reader.pages[:-1]:
+                        pdf_writer.addPage(page)
+                    for i in range(4 - len(pdf_reader.pages) % 4):
+                        pdf_writer.addBlankPage()
+                    pdf_writer.addPage(pdf_reader.pages[-1])
+                    pdf_with_blank_pages_path = pathlib.Path(workingDirectory) / f"{self.post_id}-with-blank-pages.pdf"
+                    pdf_writer.write(pdf_with_blank_pages_path.open("wb"))
+                    shutil.copy(pdf_with_blank_pages_path, pdf_path)
+                
+                # Bookletize
+                with open(os.devnull, "w") as devnull:
+                    with contextlib.redirect_stdout(devnull):
+                        pdfimposer.bookletize_on_file(
+                            pdf_path,
+                            output_path,
+                            layout = "2x1",
+                            format = "A4" if paper_size == "a4" else "Letter"
+                        )
+                    
+            # Print a message
+            if self.verbose:
+                print(f"PDF file '{output_path}' created successfully!")
+
+if __name__ == "__main__":
+    # Parse arguments
+    parser = argparse.ArgumentParser(description = "Converts an Anarsec article to PDF booklets.")
+    parser.add_argument("--pandoc-binary", type = pathlib.Path, required = True, help = "Path to the Pandoc binary. Minimum required version is 3.1.5.")
+    parser.add_argument("--typst-binary", type = pathlib.Path, required = True, help = "Path to the typst binary. Minimum required version is 0.6.0.")
+    parser.add_argument("--anarsec-root", type = pathlib.Path, required = True, help = "Root of the Anarsec repository.")
+    parser.add_argument("--post-id", type = str, required = True, help = "ID of the Anarsec post to convert, i.e. the name of the post folder in '/content/posts'.")
+    parser.add_argument("-f", "--force", dest = "force", default = False, action = "store_true", help = "Replace the output files if they already exist.")
+    parser.add_argument("-v", "--verbose", dest = "verbose", default = False, action = "store_true", help = "Print messages when the output files are created.")
+    arguments = parser.parse_args()
+    
+    # Create the converter
+    converter = Converter(
+        arguments.pandoc_binary,
+        arguments.typst_binary,
+        arguments.anarsec_root,
+        arguments.post_id,
+        force = arguments.force,
+        verbose = arguments.verbose
+    )
+    
+    # Convert
+    converter.convert()
--- a/layout/python/slugify/init.py
+++ b/layout/python/slugify/init.py
@ -0,0 +1,7 @@
+from .special import *
+from .slugify import *
+
+
+__author__ = 'Val Neekman @ Neekware Inc. [@vneekman]'
+__description__ = 'A Python slugify application that also handles Unicode'
+__version__ = '4.0.1'
--- a/layout/python/slugify/main.py
+++ b/layout/python/slugify/main.py
@ -0,0 +1,93 @@
+from __future__ import print_function, absolute_import
+import argparse
+import sys
+
+from .slugify import slugify, DEFAULT_SEPARATOR
+
+
+def parse_args(argv):
+    parser = argparse.ArgumentParser(description="Sluggify string")
+
+    input_group = parser.add_argument_group(description="Input")
+    input_group.add_argument("input_string", nargs='*',
+                             help='Text to slugify')
+    input_group.add_argument("--stdin", action='store_true',
+                             help="Take the text from STDIN")
+
+    parser.add_argument("--no-entities", action='store_false', dest='entities', default=True,
+                        help="Do not convert HTML entities to unicode")
+    parser.add_argument("--no-decimal", action='store_false', dest='decimal', default=True,
+                        help="Do not convert HTML decimal to unicode")
+    parser.add_argument("--no-hexadecimal", action='store_false', dest='hexadecimal', default=True,
+                        help="Do not convert HTML hexadecimal to unicode")
+    parser.add_argument("--max-length", type=int, default=0,
+                        help="Output string length, 0 for no limit")
+    parser.add_argument("--word-boundary", action='store_true', default=False,
+                        help="Truncate to complete word even if length ends up shorter than --max_length")
+    parser.add_argument("--save-order", action='store_true', default=False,
+                        help="When set and --max_length > 0 return whole words in the initial order")
+    parser.add_argument("--separator", type=str, default=DEFAULT_SEPARATOR,
+                        help="Separator between words. By default " + DEFAULT_SEPARATOR)
+    parser.add_argument("--stopwords", nargs='+',
+                        help="Words to discount")
+    parser.add_argument("--regex-pattern",
+                        help="Python regex pattern for allowed characters")
+    parser.add_argument("--no-lowercase", action='store_false', dest='lowercase', default=True,
+                        help="Activate case sensitivity")
+    parser.add_argument("--replacements", nargs='+',
+                        help="""Additional replacement rules e.g. "|->or", "%%->percent".""")
+
+    args = parser.parse_args(argv[1:])
+
+    if args.input_string and args.stdin:
+        parser.error("Input strings and --stdin cannot work together")
+
+    if args.replacements:
+        def split_check(repl):
+            SEP = '->'
+            if SEP not in repl:
+                parser.error("Replacements must be of the form: ORIGINAL{SEP}REPLACED".format(SEP=SEP))
+            return repl.split(SEP, 1)
+        args.replacements = [split_check(repl) for repl in args.replacements]
+
+    if args.input_string:
+        args.input_string = " ".join(args.input_string)
+    elif args.stdin:
+        args.input_string = sys.stdin.read()
+
+    if not args.input_string:
+        args.input_string = ''
+
+    return args
+
+
+def slugify_params(args):
+    return dict(
+        text=args.input_string,
+        entities=args.entities,
+        decimal=args.decimal,
+        hexadecimal=args.hexadecimal,
+        max_length=args.max_length,
+        word_boundary=args.word_boundary,
+        save_order=args.save_order,
+        separator=args.separator,
+        stopwords=args.stopwords,
+        lowercase=args.lowercase,
+        replacements=args.replacements
+    )
+
+
+def main(argv=None): # pragma: no cover
+    """ Run this program """
+    if argv is None:
+        argv = sys.argv
+    args = parse_args(argv)
+    params = slugify_params(args)
+    try:
+        print(slugify(**params))
+    except KeyboardInterrupt:
+        sys.exit(-1)
+
+
+if __name__ == '__main__': # pragma: no cover
+    main()
--- a/layout/python/slugify/slugify.py
+++ b/layout/python/slugify/slugify.py
@ -0,0 +1,180 @@
+import re
+import unicodedata
+import types
+import sys
+
+try:
+    from htmlentitydefs import name2codepoint
+    _unicode = unicode
+    _unicode_type = types.UnicodeType
+except ImportError:
+    from html.entities import name2codepoint
+    _unicode = str
+    _unicode_type = str
+    unichr = chr
+
+try:
+    import text_unidecode as unidecode
+except ImportError:
+    import unidecode
+
+__all__ = ['slugify', 'smart_truncate']
+
+
+CHAR_ENTITY_PATTERN = re.compile(r'&(%s);' % '|'.join(name2codepoint))
+DECIMAL_PATTERN = re.compile(r'&#(\d+);')
+HEX_PATTERN = re.compile(r'&#x([\da-fA-F]+);')
+QUOTE_PATTERN = re.compile(r'[\']+')
+ALLOWED_CHARS_PATTERN = re.compile(r'[^-a-z0-9]+')
+ALLOWED_CHARS_PATTERN_WITH_UPPERCASE = re.compile(r'[^-a-zA-Z0-9]+')
+DUPLICATE_DASH_PATTERN = re.compile(r'-{2,}')
+NUMBERS_PATTERN = re.compile(r'(?<=\d),(?=\d)')
+DEFAULT_SEPARATOR = '-'
+
+
+def smart_truncate(string, max_length=0, word_boundary=False, separator=' ', save_order=False):
+    """
+    Truncate a string.
+    :param string (str): string for modification
+    :param max_length (int): output string length
+    :param word_boundary (bool):
+    :param save_order (bool): if True then word order of output string is like input string
+    :param separator (str): separator between words
+    :return:
+    """
+
+    string = string.strip(separator)
+
+    if not max_length:
+        return string
+
+    if len(string) < max_length:
+        return string
+
+    if not word_boundary:
+        return string[:max_length].strip(separator)
+
+    if separator not in string:
+        return string[:max_length]
+
+    truncated = ''
+    for word in string.split(separator):
+        if word:
+            next_len = len(truncated) + len(word)
+            if next_len < max_length:
+                truncated += '{}{}'.format(word, separator)
+            elif next_len == max_length:
+                truncated += '{}'.format(word)
+                break
+            else:
+                if save_order:
+                    break
+    if not truncated: # pragma: no cover
+        truncated = string[:max_length]
+    return truncated.strip(separator)
+
+
+def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, word_boundary=False,
+            separator=DEFAULT_SEPARATOR, save_order=False, stopwords=(), regex_pattern=None, lowercase=True,
+            replacements=()):
+    """
+    Make a slug from the given text.
+    :param text (str): initial text
+    :param entities (bool): converts html entities to unicode
+    :param decimal (bool): converts html decimal to unicode
+    :param hexadecimal (bool): converts html hexadecimal to unicode
+    :param max_length (int): output string length
+    :param word_boundary (bool): truncates to complete word even if length ends up shorter than max_length
+    :param save_order (bool): if parameter is True and max_length > 0 return whole words in the initial order
+    :param separator (str): separator between words
+    :param stopwords (iterable): words to discount
+    :param regex_pattern (str): regex pattern for allowed characters
+    :param lowercase (bool): activate case sensitivity by setting it to False
+    :param replacements (iterable): list of replacement rules e.g. [['|', 'or'], ['%', 'percent']]
+    :return (str):
+    """
+
+    # user-specific replacements
+    if replacements:
+        for old, new in replacements:
+            text = text.replace(old, new)
+
+    # ensure text is unicode
+    if not isinstance(text, _unicode_type):
+        text = _unicode(text, 'utf-8', 'ignore')
+
+    # replace quotes with dashes - pre-process
+    text = QUOTE_PATTERN.sub(DEFAULT_SEPARATOR, text)
+
+    # decode unicode
+    text = unidecode.unidecode(text)
+
+    # ensure text is still in unicode
+    if not isinstance(text, _unicode_type):
+        text = _unicode(text, 'utf-8', 'ignore')
+
+    # character entity reference
+    if entities:
+        text = CHAR_ENTITY_PATTERN.sub(lambda m: unichr(name2codepoint[m.group(1)]), text)
+
+    # decimal character reference
+    if decimal:
+        try:
+            text = DECIMAL_PATTERN.sub(lambda m: unichr(int(m.group(1))), text)
+        except Exception:
+            pass
+
+    # hexadecimal character reference
+    if hexadecimal:
+        try:
+            text = HEX_PATTERN.sub(lambda m: unichr(int(m.group(1), 16)), text)
+        except Exception:
+            pass
+
+    # translate
+    text = unicodedata.normalize('NFKD', text)
+    if sys.version_info < (3,):
+        text = text.encode('ascii', 'ignore')
+
+    # make the text lowercase (optional)
+    if lowercase:
+        text = text.lower()
+
+    # remove generated quotes -- post-process
+    text = QUOTE_PATTERN.sub('', text)
+
+    # cleanup numbers
+    text = NUMBERS_PATTERN.sub('', text)
+
+    # replace all other unwanted characters
+    if lowercase:
+        pattern = regex_pattern or ALLOWED_CHARS_PATTERN
+    else:
+        pattern = regex_pattern or ALLOWED_CHARS_PATTERN_WITH_UPPERCASE
+    text = re.sub(pattern, DEFAULT_SEPARATOR, text)
+
+    # remove redundant
+    text = DUPLICATE_DASH_PATTERN.sub(DEFAULT_SEPARATOR, text).strip(DEFAULT_SEPARATOR)
+
+    # remove stopwords
+    if stopwords:
+        if lowercase:
+            stopwords_lower = [s.lower() for s in stopwords]
+            words = [w for w in text.split(DEFAULT_SEPARATOR) if w not in stopwords_lower]
+        else:
+            words = [w for w in text.split(DEFAULT_SEPARATOR) if w not in stopwords]
+        text = DEFAULT_SEPARATOR.join(words)
+
+    # finalize user-specific replacements
+    if replacements:
+        for old, new in replacements:
+            text = text.replace(old, new)
+
+    # smart truncate if requested
+    if max_length > 0:
+        text = smart_truncate(text, max_length, word_boundary, DEFAULT_SEPARATOR, save_order)
+
+    if separator != DEFAULT_SEPARATOR:
+        text = text.replace(DEFAULT_SEPARATOR, separator)
+
+    return text
--- a/layout/python/slugify/special.py
+++ b/layout/python/slugify/special.py
@ -0,0 +1,47 @@
+# -*- coding: utf-8 -*-
+
+
+def add_uppercase_char(char_list):
+    """ Given a replacement char list, this adds uppercase chars to the list """
+
+    for item in char_list:
+        char, xlate = item
+        upper_dict = char.upper(), xlate.capitalize()
+        if upper_dict not in char_list and char != upper_dict[0]:
+            char_list.insert(0, upper_dict)
+        return char_list
+
+
+# Language specific pre translations
+# Source awesome-slugify
+
+_CYRILLIC = [      # package defaults:
+    (u'ё', u'e'),    # io / yo
+    (u'я', u'ya'),   # ia
+    (u'х', u'h'),    # kh
+    (u'у', u'y'),    # u
+    (u'щ', u'sch'),  # shch
+    (u'ю', u'u'),    # iu / yu
+]
+CYRILLIC = add_uppercase_char(_CYRILLIC)
+
+_GERMAN = [        # package defaults:
+    (u'ä', u'ae'),   # a
+    (u'ö', u'oe'),   # o
+    (u'ü', u'ue'),   # u
+]
+GERMAN = add_uppercase_char(_GERMAN)
+
+_GREEK = [         # package defaults:
+    (u'χ', u'ch'),   # kh
+    (u'Ξ', u'X'),    # Ks
+    (u'ϒ', u'Y'),    # U
+    (u'υ', u'y'),    # u
+    (u'ύ', u'y'),
+    (u'ϋ', u'y'),
+    (u'ΰ', u'y'),
+]
+GREEK = add_uppercase_char(_GREEK)
+
+# Pre translations
+PRE_TRANSLATIONS = CYRILLIC + GERMAN + GREEK
--- a/layout/python/text_unidecode/init.py
+++ b/layout/python/text_unidecode/init.py
@ -0,0 +1,21 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import, unicode_literals
+import os
+import pkgutil
+
+_replaces = pkgutil.get_data(__name__, 'data.bin').decode('utf8').split('\x00')
+
+def unidecode(txt):
+    chars = []
+    for ch in txt:
+        codepoint = ord(ch)
+
+        if not codepoint:
+            chars.append('\x00')
+            continue
+
+        try:
+            chars.append(_replaces[codepoint-1])
+        except IndexError:
+            pass
+    return "".join(chars)
--- a/layout/python/text_unidecode/data.bin
+++ b/layout/python/text_unidecode/data.bin
--- a/layout/python/toml/init.py
+++ b/layout/python/toml/init.py
@ -0,0 +1,25 @@
+"""Python module which parses and emits TOML.
+
+Released under the MIT license.
+"""
+
+from toml import encoder
+from toml import decoder
+
+__version__ = "0.10.2"
+_spec_ = "0.5.0"
+
+load = decoder.load
+loads = decoder.loads
+TomlDecoder = decoder.TomlDecoder
+TomlDecodeError = decoder.TomlDecodeError
+TomlPreserveCommentDecoder = decoder.TomlPreserveCommentDecoder
+
+dump = encoder.dump
+dumps = encoder.dumps
+TomlEncoder = encoder.TomlEncoder
+TomlArraySeparatorEncoder = encoder.TomlArraySeparatorEncoder
+TomlPreserveInlineDictEncoder = encoder.TomlPreserveInlineDictEncoder
+TomlNumpyEncoder = encoder.TomlNumpyEncoder
+TomlPreserveCommentEncoder = encoder.TomlPreserveCommentEncoder
+TomlPathlibEncoder = encoder.TomlPathlibEncoder
--- a/layout/python/toml/init.pyi
+++ b/layout/python/toml/init.pyi
@ -0,0 +1,15 @@
+from toml import decoder as decoder, encoder as encoder
+
+load = decoder.load
+loads = decoder.loads
+TomlDecoder = decoder.TomlDecoder
+TomlDecodeError = decoder.TomlDecodeError
+TomlPreserveCommentDecoder = decoder.TomlPreserveCommentDecoder
+dump = encoder.dump
+dumps = encoder.dumps
+TomlEncoder = encoder.TomlEncoder
+TomlArraySeparatorEncoder = encoder.TomlArraySeparatorEncoder
+TomlPreserveInlineDictEncoder = encoder.TomlPreserveInlineDictEncoder
+TomlNumpyEncoder = encoder.TomlNumpyEncoder
+TomlPreserveCommentEncoder = encoder.TomlPreserveCommentEncoder
+TomlPathlibEncoder = encoder.TomlPathlibEncoder
--- a/layout/python/toml/decoder.py
+++ b/layout/python/toml/decoder.py
--- a/layout/python/toml/decoder.pyi
+++ b/layout/python/toml/decoder.pyi
@ -0,0 +1,52 @@
+from toml.tz import TomlTz as TomlTz
+from typing import Any, Optional
+
+unicode = str
+basestring = str
+unichr = chr
+FNFError = FileNotFoundError
+FNFError = IOError
+TIME_RE: Any
+
+class TomlDecodeError(ValueError):
+    msg: Any = ...
+    doc: Any = ...
+    pos: Any = ...
+    lineno: Any = ...
+    colno: Any = ...
+    def __init__(self, msg: Any, doc: Any, pos: Any) -> None: ...
+
+class CommentValue:
+    val: Any = ...
+    comment: Any = ...
+    def __init__(self, val: Any, comment: Any, beginline: Any, _dict: Any) -> None: ...
+    def __getitem__(self, key: Any): ...
+    def __setitem__(self, key: Any, value: Any) -> None: ...
+    def dump(self, dump_value_func: Any): ...
+
+def load(f: Union[str, list, IO[str]],
+         _dict: Type[MutableMapping[str, Any]] = ...,
+         decoder: TomlDecoder = ...) \
+         -> MutableMapping[str, Any]: ...
+def loads(s: str, _dict: Type[MutableMapping[str, Any]] = ..., decoder: TomlDecoder = ...) \
+         -> MutableMapping[str, Any]: ...
+
+class InlineTableDict: ...
+
+class TomlDecoder:
+    def __init__(self, _dict: Any = ...) -> None: ...
+    def get_empty_table(self): ...
+    def get_empty_inline_table(self): ...
+    def load_inline_object(self, line: Any, currentlevel: Any, multikey: bool = ..., multibackslash: bool = ...) -> None: ...
+    def load_line(self, line: Any, currentlevel: Any, multikey: Any, multibackslash: Any): ...
+    def load_value(self, v: Any, strictly_valid: bool = ...): ...
+    def bounded_string(self, s: Any): ...
+    def load_array(self, a: Any): ...
+    def preserve_comment(self, line_no: Any, key: Any, comment: Any, beginline: Any) -> None: ...
+    def embed_comments(self, idx: Any, currentlevel: Any) -> None: ...
+
+class TomlPreserveCommentDecoder(TomlDecoder):
+    saved_comments: Any = ...
+    def __init__(self, _dict: Any = ...) -> None: ...
+    def preserve_comment(self, line_no: Any, key: Any, comment: Any, beginline: Any) -> None: ...
+    def embed_comments(self, idx: Any, currentlevel: Any) -> None: ...
--- a/layout/python/toml/encoder.py
+++ b/layout/python/toml/encoder.py
@ -0,0 +1,304 @@
+import datetime
+import re
+import sys
+from decimal import Decimal
+
+from toml.decoder import InlineTableDict
+
+if sys.version_info >= (3,):
+    unicode = str
+
+
+def dump(o, f, encoder=None):
+    """Writes out dict as toml to a file
+
+    Args:
+        o: Object to dump into toml
+        f: File descriptor where the toml should be stored
+        encoder: The ``TomlEncoder`` to use for constructing the output string
+
+    Returns:
+        String containing the toml corresponding to dictionary
+
+    Raises:
+        TypeError: When anything other than file descriptor is passed
+    """
+
+    if not f.write:
+        raise TypeError("You can only dump an object to a file descriptor")
+    d = dumps(o, encoder=encoder)
+    f.write(d)
+    return d
+
+
+def dumps(o, encoder=None):
+    """Stringifies input dict as toml
+
+    Args:
+        o: Object to dump into toml
+        encoder: The ``TomlEncoder`` to use for constructing the output string
+
+    Returns:
+        String containing the toml corresponding to dict
+
+    Examples:
+        ```python
+        >>> import toml
+        >>> output = {
+        ... 'a': "I'm a string",
+        ... 'b': ["I'm", "a", "list"],
+        ... 'c': 2400
+        ... }
+        >>> toml.dumps(output)
+        'a = "I\'m a string"\nb = [ "I\'m", "a", "list",]\nc = 2400\n'
+        ```
+    """
+
+    retval = ""
+    if encoder is None:
+        encoder = TomlEncoder(o.__class__)
+    addtoretval, sections = encoder.dump_sections(o, "")
+    retval += addtoretval
+    outer_objs = [id(o)]
+    while sections:
+        section_ids = [id(section) for section in sections.values()]
+        for outer_obj in outer_objs:
+            if outer_obj in section_ids:
+                raise ValueError("Circular reference detected")
+        outer_objs += section_ids
+        newsections = encoder.get_empty_table()
+        for section in sections:
+            addtoretval, addtosections = encoder.dump_sections(
+                sections[section], section)
+
+            if addtoretval or (not addtoretval and not addtosections):
+                if retval and retval[-2:] != "\n\n":
+                    retval += "\n"
+                retval += "[" + section + "]\n"
+                if addtoretval:
+                    retval += addtoretval
+            for s in addtosections:
+                newsections[section + "." + s] = addtosections[s]
+        sections = newsections
+    return retval
+
+
+def _dump_str(v):
+    if sys.version_info < (3,) and hasattr(v, 'decode') and isinstance(v, str):
+        v = v.decode('utf-8')
+    v = "%r" % v
+    if v[0] == 'u':
+        v = v[1:]
+    singlequote = v.startswith("'")
+    if singlequote or v.startswith('"'):
+        v = v[1:-1]
+    if singlequote:
+        v = v.replace("\\'", "'")
+        v = v.replace('"', '\\"')
+    v = v.split("\\x")
+    while len(v) > 1:
+        i = -1
+        if not v[0]:
+            v = v[1:]
+        v[0] = v[0].replace("\\\\", "\\")
+        # No, I don't know why != works and == breaks
+        joinx = v[0][i] != "\\"
+        while v[0][:i] and v[0][i] == "\\":
+            joinx = not joinx
+            i -= 1
+        if joinx:
+            joiner = "x"
+        else:
+            joiner = "u00"
+        v = [v[0] + joiner + v[1]] + v[2:]
+    return unicode('"' + v[0] + '"')
+
+
+def _dump_float(v):
+    return "{}".format(v).replace("e+0", "e+").replace("e-0", "e-")
+
+
+def _dump_time(v):
+    utcoffset = v.utcoffset()
+    if utcoffset is None:
+        return v.isoformat()
+    # The TOML norm specifies that it's local time thus we drop the offset
+    return v.isoformat()[:-6]
+
+
+class TomlEncoder(object):
+
+    def __init__(self, _dict=dict, preserve=False):
+        self._dict = _dict
+        self.preserve = preserve
+        self.dump_funcs = {
+            str: _dump_str,
+            unicode: _dump_str,
+            list: self.dump_list,
+            bool: lambda v: unicode(v).lower(),
+            int: lambda v: v,
+            float: _dump_float,
+            Decimal: _dump_float,
+            datetime.datetime: lambda v: v.isoformat().replace('+00:00', 'Z'),
+            datetime.time: _dump_time,
+            datetime.date: lambda v: v.isoformat()
+        }
+
+    def get_empty_table(self):
+        return self._dict()
+
+    def dump_list(self, v):
+        retval = "["
+        for u in v:
+            retval += " " + unicode(self.dump_value(u)) + ","
+        retval += "]"
+        return retval
+
+    def dump_inline_table(self, section):
+        """Preserve inline table in its compact syntax instead of expanding
+        into subsection.
+
+        https://github.com/toml-lang/toml#user-content-inline-table
+        """
+        retval = ""
+        if isinstance(section, dict):
+            val_list = []
+            for k, v in section.items():
+                val = self.dump_inline_table(v)
+                val_list.append(k + " = " + val)
+            retval += "{ " + ", ".join(val_list) + " }\n"
+            return retval
+        else:
+            return unicode(self.dump_value(section))
+
+    def dump_value(self, v):
+        # Lookup function corresponding to v's type
+        dump_fn = self.dump_funcs.get(type(v))
+        if dump_fn is None and hasattr(v, '__iter__'):
+            dump_fn = self.dump_funcs[list]
+        # Evaluate function (if it exists) else return v
+        return dump_fn(v) if dump_fn is not None else self.dump_funcs[str](v)
+
+    def dump_sections(self, o, sup):
+        retstr = ""
+        if sup != "" and sup[-1] != ".":
+            sup += '.'
+        retdict = self._dict()
+        arraystr = ""
+        for section in o:
+            section = unicode(section)
+            qsection = section
+            if not re.match(r'^[A-Za-z0-9_-]+$', section):
+                qsection = _dump_str(section)
+            if not isinstance(o[section], dict):
+                arrayoftables = False
+                if isinstance(o[section], list):
+                    for a in o[section]:
+                        if isinstance(a, dict):
+                            arrayoftables = True
+                if arrayoftables:
+                    for a in o[section]:
+                        arraytabstr = "\n"
+                        arraystr += "[[" + sup + qsection + "]]\n"
+                        s, d = self.dump_sections(a, sup + qsection)
+                        if s:
+                            if s[0] == "[":
+                                arraytabstr += s
+                            else:
+                                arraystr += s
+                        while d:
+                            newd = self._dict()
+                            for dsec in d:
+                                s1, d1 = self.dump_sections(d[dsec], sup +
+                                                            qsection + "." +
+                                                            dsec)
+                                if s1:
+                                    arraytabstr += ("[" + sup + qsection +
+                                                    "." + dsec + "]\n")
+                                    arraytabstr += s1
+                                for s1 in d1:
+                                    newd[dsec + "." + s1] = d1[s1]
+                            d = newd
+                        arraystr += arraytabstr
+                else:
+                    if o[section] is not None:
+                        retstr += (qsection + " = " +
+                                   unicode(self.dump_value(o[section])) + '\n')
+            elif self.preserve and isinstance(o[section], InlineTableDict):
+                retstr += (qsection + " = " +
+                           self.dump_inline_table(o[section]))
+            else:
+                retdict[qsection] = o[section]
+        retstr += arraystr
+        return (retstr, retdict)
+
+
+class TomlPreserveInlineDictEncoder(TomlEncoder):
+
+    def __init__(self, _dict=dict):
+        super(TomlPreserveInlineDictEncoder, self).__init__(_dict, True)
+
+
+class TomlArraySeparatorEncoder(TomlEncoder):
+
+    def __init__(self, _dict=dict, preserve=False, separator=","):
+        super(TomlArraySeparatorEncoder, self).__init__(_dict, preserve)
+        if separator.strip() == "":
+            separator = "," + separator
+        elif separator.strip(' \t\n\r,'):
+            raise ValueError("Invalid separator for arrays")
+        self.separator = separator
+
+    def dump_list(self, v):
+        t = []
+        retval = "["
+        for u in v:
+            t.append(self.dump_value(u))
+        while t != []:
+            s = []
+            for u in t:
+                if isinstance(u, list):
+                    for r in u:
+                        s.append(r)
+                else:
+                    retval += " " + unicode(u) + self.separator
+            t = s
+        retval += "]"
+        return retval
+
+
+class TomlNumpyEncoder(TomlEncoder):
+
+    def __init__(self, _dict=dict, preserve=False):
+        import numpy as np
+        super(TomlNumpyEncoder, self).__init__(_dict, preserve)
+        self.dump_funcs[np.float16] = _dump_float
+        self.dump_funcs[np.float32] = _dump_float
+        self.dump_funcs[np.float64] = _dump_float
+        self.dump_funcs[np.int16] = self._dump_int
+        self.dump_funcs[np.int32] = self._dump_int
+        self.dump_funcs[np.int64] = self._dump_int
+
+    def _dump_int(self, v):
+        return "{}".format(int(v))
+
+
+class TomlPreserveCommentEncoder(TomlEncoder):
+
+    def __init__(self, _dict=dict, preserve=False):
+        from toml.decoder import CommentValue
+        super(TomlPreserveCommentEncoder, self).__init__(_dict, preserve)
+        self.dump_funcs[CommentValue] = lambda v: v.dump(self.dump_value)
+
+
+class TomlPathlibEncoder(TomlEncoder):
+
+    def _dump_pathlib_path(self, v):
+        return _dump_str(str(v))
+
+    def dump_value(self, v):
+        if (3, 4) <= sys.version_info:
+            import pathlib
+            if isinstance(v, pathlib.PurePath):
+                v = str(v)
+        return super(TomlPathlibEncoder, self).dump_value(v)
--- a/layout/python/toml/encoder.pyi
+++ b/layout/python/toml/encoder.pyi
@ -0,0 +1,34 @@
+from toml.decoder import InlineTableDict as InlineTableDict
+from typing import Any, Optional
+
+unicode = str
+
+def dump(o: Mapping[str, Any], f: IO[str], encoder: TomlEncoder = ...) -> str: ...
+def dumps(o: Mapping[str, Any], encoder: TomlEncoder = ...) -> str: ...
+
+class TomlEncoder:
+    preserve: Any = ...
+    dump_funcs: Any = ...
+    def __init__(self, _dict: Any = ..., preserve: bool = ...): ...
+    def get_empty_table(self): ...
+    def dump_list(self, v: Any): ...
+    def dump_inline_table(self, section: Any): ...
+    def dump_value(self, v: Any): ...
+    def dump_sections(self, o: Any, sup: Any): ...
+
+class TomlPreserveInlineDictEncoder(TomlEncoder):
+    def __init__(self, _dict: Any = ...) -> None: ...
+
+class TomlArraySeparatorEncoder(TomlEncoder):
+    separator: Any = ...
+    def __init__(self, _dict: Any = ..., preserve: bool = ..., separator: str = ...) -> None: ...
+    def dump_list(self, v: Any): ...
+
+class TomlNumpyEncoder(TomlEncoder):
+    def __init__(self, _dict: Any = ..., preserve: bool = ...) -> None: ...
+
+class TomlPreserveCommentEncoder(TomlEncoder):
+    def __init__(self, _dict: Any = ..., preserve: bool = ...): ...
+
+class TomlPathlibEncoder(TomlEncoder):
+    def dump_value(self, v: Any): ...
--- a/layout/python/toml/ordered.py
+++ b/layout/python/toml/ordered.py
@ -0,0 +1,15 @@
+from collections import OrderedDict
+from toml import TomlEncoder
+from toml import TomlDecoder
+
+
+class TomlOrderedDecoder(TomlDecoder):
+
+    def __init__(self):
+        super(self.__class__, self).__init__(_dict=OrderedDict)
+
+
+class TomlOrderedEncoder(TomlEncoder):
+
+    def __init__(self):
+        super(self.__class__, self).__init__(_dict=OrderedDict)
--- a/layout/python/toml/ordered.pyi
+++ b/layout/python/toml/ordered.pyi
@ -0,0 +1,7 @@
+from toml import TomlDecoder as TomlDecoder, TomlEncoder as TomlEncoder
+
+class TomlOrderedDecoder(TomlDecoder):
+    def __init__(self) -> None: ...
+
+class TomlOrderedEncoder(TomlEncoder):
+    def __init__(self) -> None: ...
--- a/layout/python/toml/tz.py
+++ b/layout/python/toml/tz.py
@ -0,0 +1,24 @@
+from datetime import tzinfo, timedelta
+
+
+class TomlTz(tzinfo):
+    def __init__(self, toml_offset):
+        if toml_offset == "Z":
+            self._raw_offset = "+00:00"
+        else:
+            self._raw_offset = toml_offset
+        self._sign = -1 if self._raw_offset[0] == '-' else 1
+        self._hours = int(self._raw_offset[1:3])
+        self._minutes = int(self._raw_offset[4:6])
+
+    def __deepcopy__(self, memo):
+        return self.__class__(self._raw_offset)
+
+    def tzname(self, dt):
+        return "UTC" + self._raw_offset
+
+    def utcoffset(self, dt):
+        return self._sign * timedelta(hours=self._hours, minutes=self._minutes)
+
+    def dst(self, dt):
+        return timedelta(0)
--- a/layout/python/toml/tz.pyi
+++ b/layout/python/toml/tz.pyi
@ -0,0 +1,9 @@
+from datetime import tzinfo
+from typing import Any
+
+class TomlTz(tzinfo):
+    def __init__(self, toml_offset: Any) -> None: ...
+    def __deepcopy__(self, memo: Any): ...
+    def tzname(self, dt: Any): ...
+    def utcoffset(self, dt: Any): ...
+    def dst(self, dt: Any): ...