mirror of
https://0xacab.org/anarsec/anarsec.guide.git
synced 2025-06-08 06:42:56 -04:00
python and typst script
This commit is contained in:
parent
da5f497ec1
commit
be05046783
19 changed files with 2223 additions and 0 deletions
221
layout/python/anarsec_article_to_pdf.py
Normal file
221
layout/python/anarsec_article_to_pdf.py
Normal file
|
@ -0,0 +1,221 @@
|
|||
import argparse
|
||||
import contextlib
|
||||
import os
|
||||
import pathlib
|
||||
import re
|
||||
import shutil
|
||||
import slugify
|
||||
import subprocess
|
||||
import tempfile
|
||||
|
||||
import pdfimposer
|
||||
import PyPDF2
|
||||
import toml
|
||||
|
||||
class Converter:
|
||||
"""Converts an Anarsec article to PDF booklets."""
|
||||
|
||||
def __init__(self, pandoc_binary: pathlib.Path, typst_binary: pathlib.Path, anarsec_root: pathlib.Path, post_id: str, *, force: bool = False, verbose: bool = False):
|
||||
"""Initialize the converter."""
|
||||
|
||||
# Set attributes
|
||||
self.pandoc_binary = pandoc_binary
|
||||
self.typst_binary = typst_binary
|
||||
self.anarsec_root = anarsec_root
|
||||
self.post_id = post_id
|
||||
self.force = force
|
||||
self.verbose = verbose
|
||||
|
||||
# Set post directory
|
||||
self.post_directory = self.anarsec_root / "content" / "posts" / self.post_id
|
||||
|
||||
# Check validity of some attributes
|
||||
if not self.pandoc_binary.exists() or not self.pandoc_binary.is_file():
|
||||
raise RuntimeError(f"Pandoc binary '{self.pandoc_binary}' doesn't exist or isn't a file.")
|
||||
if not self.typst_binary.exists() or not self.typst_binary.is_file():
|
||||
raise RuntimeError(f"Typst binary '{self.typst_binary}' doesn't exist or isn't a file.")
|
||||
if not self.anarsec_root.exists() or not self.anarsec_root.is_dir():
|
||||
raise RuntimeError(f"Anarsec root '{self.anarsec_root}' doesn't exist or isn't a directory.")
|
||||
if not self.post_directory.exists() or not self.post_directory.is_dir():
|
||||
raise RuntimeError(f"Post directory '{self.post_directory}' doesn't exist or isn't a directory.")
|
||||
|
||||
def convert(self):
|
||||
"""Convert the input file to the output file. This method should only be run once."""
|
||||
|
||||
# Set glossary file
|
||||
glossary_file = self.anarsec_root / "content" / "glossary" / "_index.md"
|
||||
if not glossary_file.exists() or not glossary_file.is_file():
|
||||
raise RuntimeError(f"Glossary file '{glossary_file}' doesn't exist or isn't a file.")
|
||||
|
||||
# Set recommendations file
|
||||
recommendations_file = self.anarsec_root / "content" / "recommendations" / "_index.md"
|
||||
if not recommendations_file.exists() or not recommendations_file.is_file():
|
||||
raise RuntimeError(f"Recommendations file '{recommendations_file}' doesn't exist or isn't a file.")
|
||||
|
||||
# Set input path
|
||||
input_path = self.post_directory / "index.md"
|
||||
if not input_path.exists() or not input_path.is_file():
|
||||
raise RuntimeError(f"Post Markdown file '{input_path}' doesn't exist or isn't a file.")
|
||||
|
||||
# Load the glossary
|
||||
glossary = dict()
|
||||
for match in re.findall(r'### (.*?)\n+(.*?)\n*(?=###|\Z)', glossary_file.open().read(), re.DOTALL | re.MULTILINE):
|
||||
glossary[slugify.slugify(match[0])] = (match[0], match[1])
|
||||
|
||||
# For each paper size
|
||||
for paper_size in ["a4", "letter"]:
|
||||
# Set the output path
|
||||
output_path = self.post_directory / f"{self.post_id}-{paper_size}.pdf"
|
||||
if not self.force and output_path.exists():
|
||||
raise RuntimeError(f"Output file '{output_path}' already exists.")
|
||||
|
||||
# Work in a temporary directory
|
||||
with tempfile.TemporaryDirectory() as workingDirectory:
|
||||
# Copy the required resources to the working directory
|
||||
shutil.copy(pathlib.Path(__file__).parent.parent / "anarsec_article.typ", workingDirectory)
|
||||
for filename in input_path.parent.iterdir():
|
||||
if filename.suffix.lower() == ".webp":
|
||||
subprocess.check_call(["convert", filename, pathlib.Path(workingDirectory) / f"{filename.name}.png"])
|
||||
elif filename.suffix.lower() in [".png", ".jpg", ".jpeg", ".bmp", ".svg", ".gif"]:
|
||||
shutil.copy(filename, workingDirectory)
|
||||
|
||||
# Separate the input file into a TOML front matter and Markdown content
|
||||
with input_path.open("r") as input_file:
|
||||
match = re.fullmatch(r'\+{3}\n(.*)\+{3}(.*)', input_file.read(), re.DOTALL | re.MULTILINE)
|
||||
if match is None:
|
||||
raise RuntimeError(f"Couldn't separate input file '{self.input_path}' into a TOML front matter and Markdown content. Is it a valid Anarsec article?")
|
||||
toml_front_matter = toml.loads(match.group(1))
|
||||
markdown_content = match.group(2)
|
||||
|
||||
# Grab the description
|
||||
description = re.search(r'^(.*?)\<\!\-\- more \-\-\>', markdown_content, re.DOTALL | re.MULTILINE).group(1).strip("\n ")
|
||||
|
||||
# Parse the description
|
||||
description_md_path = pathlib.Path(workingDirectory) / "description.md"
|
||||
description_txt_path = pathlib.Path(workingDirectory) / "description.txt"
|
||||
description_md_path.open("w").write(description)
|
||||
subprocess.check_call([str(self.pandoc_binary), "-f", "markdown", "-t", "plain", "--columns", "999999", "-o", description_txt_path, description_md_path])
|
||||
description = description_txt_path.open().read()
|
||||
|
||||
# Copy the front image
|
||||
front_image = pathlib.Path(workingDirectory) / ("front_image" + pathlib.Path(toml_front_matter['extra']['blogimage']).suffix)
|
||||
shutil.copy(self.anarsec_root / "static" / toml_front_matter['extra']['blogimage'].removeprefix("/"), front_image)
|
||||
|
||||
# Copy the back image
|
||||
back_image = pathlib.Path(workingDirectory) / "back_image.png"
|
||||
shutil.copy(self.anarsec_root / "static" / "images" / "gay.png", back_image)
|
||||
|
||||
# Add recommendations to the Markdown content
|
||||
recommendations = re.search(r'\+{3}.*?\+{3}(.*)', recommendations_file.open().read(), re.MULTILINE | re.DOTALL).group(1)
|
||||
markdown_content += f"\n\n# Recommendations\n\n{recommendations}\n\n"
|
||||
|
||||
# Replace all .webp images to .png images in the Markdown content
|
||||
markdown_content = re.sub(r'\((.*?\.webp)\)', lambda match: f'({match.group(1)}.png)', markdown_content)
|
||||
|
||||
# List glossary entries that appear in the Markdown content
|
||||
glossary_entries = set()
|
||||
for match in re.findall(r'\[.*?\]\(/glossary\/?#(.*?)\)', markdown_content):
|
||||
glossary_entries.add(slugify.slugify(match))
|
||||
|
||||
# Add to glossary entries the glossary entries that appear in glossary entries, recursively
|
||||
added_entry = True
|
||||
while added_entry:
|
||||
added_entry = False
|
||||
for entry in list(glossary_entries):
|
||||
for match in re.findall(r'\[.*?\]\((?:/glossary|)\/?#(.*?)\)', glossary[entry][1]):
|
||||
new_entry = slugify.slugify(match)
|
||||
if new_entry not in glossary_entries:
|
||||
glossary_entries.add(new_entry)
|
||||
added_entry = True
|
||||
|
||||
# Add glossary entries to the Markdown content
|
||||
if glossary_entries:
|
||||
markdown_content += "\n\n# Glossary\n\n"
|
||||
for entry, entry_content in glossary.items():
|
||||
if entry in glossary_entries:
|
||||
markdown_content += f"## {entry_content[0]}\n\n{entry_content[1]}\n\n"
|
||||
|
||||
# Write the Markdown content to a file
|
||||
input_markdown_path = pathlib.Path(workingDirectory) / f"{self.post_id}-markdown.md"
|
||||
input_markdown_path.open("w").write(markdown_content)
|
||||
|
||||
# Convert the Markdown content to typst
|
||||
typst_path = pathlib.Path(workingDirectory) / f"{self.post_id}.typ"
|
||||
subprocess.check_call([str(self.pandoc_binary), "-f", "markdown", "-t", "typst", "--columns", "999999", "-o", typst_path, input_markdown_path])
|
||||
|
||||
# Build the full typst file
|
||||
full_typst_path = pathlib.Path(workingDirectory) / f"{self.post_id}-full.typ"
|
||||
full_typst = f"""
|
||||
#import "anarsec_article.typ": anarsec_article, blockquote
|
||||
#set page({'"a5"' if paper_size == "a4" else 'width: 5.5in, height: 8.5in'})
|
||||
#show: content => anarsec_article(
|
||||
title: [
|
||||
{toml_front_matter["title"]}
|
||||
],
|
||||
frontimage: "{front_image.name}",
|
||||
backimage: "{back_image.name}",
|
||||
lastediteddate: "{toml_front_matter["extra"]["dateedit"]}",
|
||||
description: "{description}",
|
||||
content
|
||||
)
|
||||
{typst_path.open().read()}
|
||||
"""
|
||||
full_typst_path.open("w").write(full_typst)
|
||||
|
||||
# Convert the full typst file to PDF
|
||||
pdf_path = pathlib.Path(workingDirectory) / f"{self.post_id}.pdf"
|
||||
subprocess.check_call(
|
||||
[str(self.typst_binary), "--root", workingDirectory, "compile", full_typst_path, pdf_path],
|
||||
stderr = subprocess.STDOUT
|
||||
)
|
||||
|
||||
# Insert blank pages before the back cover if needed
|
||||
pdf_reader = PyPDF2.PdfFileReader(pdf_path.open("rb"))
|
||||
if len(pdf_reader.pages) % 4 != 0:
|
||||
pdf_writer = PyPDF2.PdfFileWriter()
|
||||
for page in pdf_reader.pages[:-1]:
|
||||
pdf_writer.addPage(page)
|
||||
for i in range(4 - len(pdf_reader.pages) % 4):
|
||||
pdf_writer.addBlankPage()
|
||||
pdf_writer.addPage(pdf_reader.pages[-1])
|
||||
pdf_with_blank_pages_path = pathlib.Path(workingDirectory) / f"{self.post_id}-with-blank-pages.pdf"
|
||||
pdf_writer.write(pdf_with_blank_pages_path.open("wb"))
|
||||
shutil.copy(pdf_with_blank_pages_path, pdf_path)
|
||||
|
||||
# Bookletize
|
||||
with open(os.devnull, "w") as devnull:
|
||||
with contextlib.redirect_stdout(devnull):
|
||||
pdfimposer.bookletize_on_file(
|
||||
pdf_path,
|
||||
output_path,
|
||||
layout = "2x1",
|
||||
format = "A4" if paper_size == "a4" else "Letter"
|
||||
)
|
||||
|
||||
# Print a message
|
||||
if self.verbose:
|
||||
print(f"PDF file '{output_path}' created successfully!")
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Parse arguments
|
||||
parser = argparse.ArgumentParser(description = "Converts an Anarsec article to PDF booklets.")
|
||||
parser.add_argument("--pandoc-binary", type = pathlib.Path, required = True, help = "Path to the Pandoc binary. Minimum required version is 3.1.5.")
|
||||
parser.add_argument("--typst-binary", type = pathlib.Path, required = True, help = "Path to the typst binary. Minimum required version is 0.6.0.")
|
||||
parser.add_argument("--anarsec-root", type = pathlib.Path, required = True, help = "Root of the Anarsec repository.")
|
||||
parser.add_argument("--post-id", type = str, required = True, help = "ID of the Anarsec post to convert, i.e. the name of the post folder in '/content/posts'.")
|
||||
parser.add_argument("-f", "--force", dest = "force", default = False, action = "store_true", help = "Replace the output files if they already exist.")
|
||||
parser.add_argument("-v", "--verbose", dest = "verbose", default = False, action = "store_true", help = "Print messages when the output files are created.")
|
||||
arguments = parser.parse_args()
|
||||
|
||||
# Create the converter
|
||||
converter = Converter(
|
||||
arguments.pandoc_binary,
|
||||
arguments.typst_binary,
|
||||
arguments.anarsec_root,
|
||||
arguments.post_id,
|
||||
force = arguments.force,
|
||||
verbose = arguments.verbose
|
||||
)
|
||||
|
||||
# Convert
|
||||
converter.convert()
|
Loading…
Add table
Add a link
Reference in a new issue