python and typst script

This commit is contained in:
anarsec 2023-07-09 20:53:03 +00:00
parent da5f497ec1
commit be05046783
No known key found for this signature in database
19 changed files with 2223 additions and 0 deletions

View file

@ -0,0 +1,221 @@
import argparse
import contextlib
import os
import pathlib
import re
import shutil
import slugify
import subprocess
import tempfile
import pdfimposer
import PyPDF2
import toml
class Converter:
"""Converts an Anarsec article to PDF booklets."""
def __init__(self, pandoc_binary: pathlib.Path, typst_binary: pathlib.Path, anarsec_root: pathlib.Path, post_id: str, *, force: bool = False, verbose: bool = False):
"""Initialize the converter."""
# Set attributes
self.pandoc_binary = pandoc_binary
self.typst_binary = typst_binary
self.anarsec_root = anarsec_root
self.post_id = post_id
self.force = force
self.verbose = verbose
# Set post directory
self.post_directory = self.anarsec_root / "content" / "posts" / self.post_id
# Check validity of some attributes
if not self.pandoc_binary.exists() or not self.pandoc_binary.is_file():
raise RuntimeError(f"Pandoc binary '{self.pandoc_binary}' doesn't exist or isn't a file.")
if not self.typst_binary.exists() or not self.typst_binary.is_file():
raise RuntimeError(f"Typst binary '{self.typst_binary}' doesn't exist or isn't a file.")
if not self.anarsec_root.exists() or not self.anarsec_root.is_dir():
raise RuntimeError(f"Anarsec root '{self.anarsec_root}' doesn't exist or isn't a directory.")
if not self.post_directory.exists() or not self.post_directory.is_dir():
raise RuntimeError(f"Post directory '{self.post_directory}' doesn't exist or isn't a directory.")
def convert(self):
"""Convert the input file to the output file. This method should only be run once."""
# Set glossary file
glossary_file = self.anarsec_root / "content" / "glossary" / "_index.md"
if not glossary_file.exists() or not glossary_file.is_file():
raise RuntimeError(f"Glossary file '{glossary_file}' doesn't exist or isn't a file.")
# Set recommendations file
recommendations_file = self.anarsec_root / "content" / "recommendations" / "_index.md"
if not recommendations_file.exists() or not recommendations_file.is_file():
raise RuntimeError(f"Recommendations file '{recommendations_file}' doesn't exist or isn't a file.")
# Set input path
input_path = self.post_directory / "index.md"
if not input_path.exists() or not input_path.is_file():
raise RuntimeError(f"Post Markdown file '{input_path}' doesn't exist or isn't a file.")
# Load the glossary
glossary = dict()
for match in re.findall(r'### (.*?)\n+(.*?)\n*(?=###|\Z)', glossary_file.open().read(), re.DOTALL | re.MULTILINE):
glossary[slugify.slugify(match[0])] = (match[0], match[1])
# For each paper size
for paper_size in ["a4", "letter"]:
# Set the output path
output_path = self.post_directory / f"{self.post_id}-{paper_size}.pdf"
if not self.force and output_path.exists():
raise RuntimeError(f"Output file '{output_path}' already exists.")
# Work in a temporary directory
with tempfile.TemporaryDirectory() as workingDirectory:
# Copy the required resources to the working directory
shutil.copy(pathlib.Path(__file__).parent.parent / "anarsec_article.typ", workingDirectory)
for filename in input_path.parent.iterdir():
if filename.suffix.lower() == ".webp":
subprocess.check_call(["convert", filename, pathlib.Path(workingDirectory) / f"{filename.name}.png"])
elif filename.suffix.lower() in [".png", ".jpg", ".jpeg", ".bmp", ".svg", ".gif"]:
shutil.copy(filename, workingDirectory)
# Separate the input file into a TOML front matter and Markdown content
with input_path.open("r") as input_file:
match = re.fullmatch(r'\+{3}\n(.*)\+{3}(.*)', input_file.read(), re.DOTALL | re.MULTILINE)
if match is None:
raise RuntimeError(f"Couldn't separate input file '{self.input_path}' into a TOML front matter and Markdown content. Is it a valid Anarsec article?")
toml_front_matter = toml.loads(match.group(1))
markdown_content = match.group(2)
# Grab the description
description = re.search(r'^(.*?)\<\!\-\- more \-\-\>', markdown_content, re.DOTALL | re.MULTILINE).group(1).strip("\n ")
# Parse the description
description_md_path = pathlib.Path(workingDirectory) / "description.md"
description_txt_path = pathlib.Path(workingDirectory) / "description.txt"
description_md_path.open("w").write(description)
subprocess.check_call([str(self.pandoc_binary), "-f", "markdown", "-t", "plain", "--columns", "999999", "-o", description_txt_path, description_md_path])
description = description_txt_path.open().read()
# Copy the front image
front_image = pathlib.Path(workingDirectory) / ("front_image" + pathlib.Path(toml_front_matter['extra']['blogimage']).suffix)
shutil.copy(self.anarsec_root / "static" / toml_front_matter['extra']['blogimage'].removeprefix("/"), front_image)
# Copy the back image
back_image = pathlib.Path(workingDirectory) / "back_image.png"
shutil.copy(self.anarsec_root / "static" / "images" / "gay.png", back_image)
# Add recommendations to the Markdown content
recommendations = re.search(r'\+{3}.*?\+{3}(.*)', recommendations_file.open().read(), re.MULTILINE | re.DOTALL).group(1)
markdown_content += f"\n\n# Recommendations\n\n{recommendations}\n\n"
# Replace all .webp images to .png images in the Markdown content
markdown_content = re.sub(r'\((.*?\.webp)\)', lambda match: f'({match.group(1)}.png)', markdown_content)
# List glossary entries that appear in the Markdown content
glossary_entries = set()
for match in re.findall(r'\[.*?\]\(/glossary\/?#(.*?)\)', markdown_content):
glossary_entries.add(slugify.slugify(match))
# Add to glossary entries the glossary entries that appear in glossary entries, recursively
added_entry = True
while added_entry:
added_entry = False
for entry in list(glossary_entries):
for match in re.findall(r'\[.*?\]\((?:/glossary|)\/?#(.*?)\)', glossary[entry][1]):
new_entry = slugify.slugify(match)
if new_entry not in glossary_entries:
glossary_entries.add(new_entry)
added_entry = True
# Add glossary entries to the Markdown content
if glossary_entries:
markdown_content += "\n\n# Glossary\n\n"
for entry, entry_content in glossary.items():
if entry in glossary_entries:
markdown_content += f"## {entry_content[0]}\n\n{entry_content[1]}\n\n"
# Write the Markdown content to a file
input_markdown_path = pathlib.Path(workingDirectory) / f"{self.post_id}-markdown.md"
input_markdown_path.open("w").write(markdown_content)
# Convert the Markdown content to typst
typst_path = pathlib.Path(workingDirectory) / f"{self.post_id}.typ"
subprocess.check_call([str(self.pandoc_binary), "-f", "markdown", "-t", "typst", "--columns", "999999", "-o", typst_path, input_markdown_path])
# Build the full typst file
full_typst_path = pathlib.Path(workingDirectory) / f"{self.post_id}-full.typ"
full_typst = f"""
#import "anarsec_article.typ": anarsec_article, blockquote
#set page({'"a5"' if paper_size == "a4" else 'width: 5.5in, height: 8.5in'})
#show: content => anarsec_article(
title: [
{toml_front_matter["title"]}
],
frontimage: "{front_image.name}",
backimage: "{back_image.name}",
lastediteddate: "{toml_front_matter["extra"]["dateedit"]}",
description: "{description}",
content
)
{typst_path.open().read()}
"""
full_typst_path.open("w").write(full_typst)
# Convert the full typst file to PDF
pdf_path = pathlib.Path(workingDirectory) / f"{self.post_id}.pdf"
subprocess.check_call(
[str(self.typst_binary), "--root", workingDirectory, "compile", full_typst_path, pdf_path],
stderr = subprocess.STDOUT
)
# Insert blank pages before the back cover if needed
pdf_reader = PyPDF2.PdfFileReader(pdf_path.open("rb"))
if len(pdf_reader.pages) % 4 != 0:
pdf_writer = PyPDF2.PdfFileWriter()
for page in pdf_reader.pages[:-1]:
pdf_writer.addPage(page)
for i in range(4 - len(pdf_reader.pages) % 4):
pdf_writer.addBlankPage()
pdf_writer.addPage(pdf_reader.pages[-1])
pdf_with_blank_pages_path = pathlib.Path(workingDirectory) / f"{self.post_id}-with-blank-pages.pdf"
pdf_writer.write(pdf_with_blank_pages_path.open("wb"))
shutil.copy(pdf_with_blank_pages_path, pdf_path)
# Bookletize
with open(os.devnull, "w") as devnull:
with contextlib.redirect_stdout(devnull):
pdfimposer.bookletize_on_file(
pdf_path,
output_path,
layout = "2x1",
format = "A4" if paper_size == "a4" else "Letter"
)
# Print a message
if self.verbose:
print(f"PDF file '{output_path}' created successfully!")
if __name__ == "__main__":
# Parse arguments
parser = argparse.ArgumentParser(description = "Converts an Anarsec article to PDF booklets.")
parser.add_argument("--pandoc-binary", type = pathlib.Path, required = True, help = "Path to the Pandoc binary. Minimum required version is 3.1.5.")
parser.add_argument("--typst-binary", type = pathlib.Path, required = True, help = "Path to the typst binary. Minimum required version is 0.6.0.")
parser.add_argument("--anarsec-root", type = pathlib.Path, required = True, help = "Root of the Anarsec repository.")
parser.add_argument("--post-id", type = str, required = True, help = "ID of the Anarsec post to convert, i.e. the name of the post folder in '/content/posts'.")
parser.add_argument("-f", "--force", dest = "force", default = False, action = "store_true", help = "Replace the output files if they already exist.")
parser.add_argument("-v", "--verbose", dest = "verbose", default = False, action = "store_true", help = "Print messages when the output files are created.")
arguments = parser.parse_args()
# Create the converter
converter = Converter(
arguments.pandoc_binary,
arguments.typst_binary,
arguments.anarsec_root,
arguments.post_id,
force = arguments.force,
verbose = arguments.verbose
)
# Convert
converter.convert()