import argparse import contextlib import os import pathlib import re import shutil import slugify import subprocess import tempfile import pdfimposer import PyPDF2 import toml class Converter: """Converts an Anarsec article to PDF booklets.""" def __init__(self, pandoc_binary: pathlib.Path, typst_binary: pathlib.Path, anarsec_root: pathlib.Path, post_id: str, *, force: bool = False, verbose: bool = False): """Initialize the converter.""" # Set attributes self.pandoc_binary = pandoc_binary self.typst_binary = typst_binary self.anarsec_root = anarsec_root self.post_id = post_id self.force = force self.verbose = verbose # Set post directory self.post_directory = self.anarsec_root / "content" / "posts" / self.post_id # Check validity of some attributes if not self.pandoc_binary.exists() or not self.pandoc_binary.is_file(): raise RuntimeError(f"Pandoc binary '{self.pandoc_binary}' doesn't exist or isn't a file.") if not self.typst_binary.exists() or not self.typst_binary.is_file(): raise RuntimeError(f"Typst binary '{self.typst_binary}' doesn't exist or isn't a file.") if not self.anarsec_root.exists() or not self.anarsec_root.is_dir(): raise RuntimeError(f"Anarsec root '{self.anarsec_root}' doesn't exist or isn't a directory.") if not self.post_directory.exists() or not self.post_directory.is_dir(): raise RuntimeError(f"Post directory '{self.post_directory}' doesn't exist or isn't a directory.") def convert(self): """Convert the input file to the output file. This method should only be run once.""" # Set glossary file glossary_file = self.anarsec_root / "content" / "glossary" / "_index.md" if not glossary_file.exists() or not glossary_file.is_file(): raise RuntimeError(f"Glossary file '{glossary_file}' doesn't exist or isn't a file.") # Set recommendations file recommendations_file = self.anarsec_root / "content" / "recommendations" / "_index.md" if not recommendations_file.exists() or not recommendations_file.is_file(): raise RuntimeError(f"Recommendations file '{recommendations_file}' doesn't exist or isn't a file.") # Set input path input_path = self.post_directory / "index.md" if not input_path.exists() or not input_path.is_file(): raise RuntimeError(f"Post Markdown file '{input_path}' doesn't exist or isn't a file.") # Load the glossary glossary = dict() for match in re.findall(r'### (.*?)\n+(.*?)\n*(?=###|\Z)', glossary_file.open().read(), re.DOTALL | re.MULTILINE): glossary[slugify.slugify(match[0])] = (match[0], match[1]) # For each paper size for paper_size in ["a4", "letter"]: # Set the output path output_path = self.post_directory / f"{self.post_id}-{paper_size}.pdf" if not self.force and output_path.exists(): raise RuntimeError(f"Output file '{output_path}' already exists.") # Work in a temporary directory with tempfile.TemporaryDirectory() as workingDirectory: # Copy the required resources to the working directory shutil.copy(pathlib.Path(__file__).parent.parent / "anarsec_article.typ", workingDirectory) for filename in input_path.parent.iterdir(): if filename.suffix.lower() == ".webp": subprocess.check_call(["convert", filename, pathlib.Path(workingDirectory) / f"{filename.name}.png"]) elif filename.suffix.lower() in [".png", ".jpg", ".jpeg", ".bmp", ".svg", ".gif"]: shutil.copy(filename, workingDirectory) # Separate the input file into a TOML front matter and Markdown content with input_path.open("r") as input_file: match = re.fullmatch(r'\+{3}\n(.*)\+{3}(.*)', input_file.read(), re.DOTALL | re.MULTILINE) if match is None: raise RuntimeError(f"Couldn't separate input file '{self.input_path}' into a TOML front matter and Markdown content. Is it a valid Anarsec article?") toml_front_matter = toml.loads(match.group(1)) markdown_content = match.group(2) # Grab the description description = re.search(r'^(.*?)\<\!\-\- more \-\-\>', markdown_content, re.DOTALL | re.MULTILINE).group(1).strip("\n ") # Add the intro heading markdown_content = f"# Introduction\n\n{markdown_content}" # Parse the description description_md_path = pathlib.Path(workingDirectory) / "description.md" description_txt_path = pathlib.Path(workingDirectory) / "description.txt" description_md_path.open("w").write(description) subprocess.check_call([str(self.pandoc_binary), "-f", "markdown", "-t", "plain", "--columns", "999999", "-o", description_txt_path, description_md_path]) description = description_txt_path.open().read() # Copy the front image front_image = pathlib.Path(workingDirectory) / ("front_image" + pathlib.Path(toml_front_matter['extra']['blogimage']).suffix) shutil.copy(self.anarsec_root / "static" / toml_front_matter['extra']['blogimage'].removeprefix("/"), front_image) # Copy the back image back_image = pathlib.Path(workingDirectory) / "back_image.png" shutil.copy(self.anarsec_root / "static" / "images" / "gay.png", back_image) # Add recommendations to the Markdown content recommendations = re.search(r'\+{3}.*?\+{3}(.*)', recommendations_file.open().read(), re.MULTILINE | re.DOTALL).group(1) markdown_content += f"\n\n# Recommendations\n\n{recommendations}\n\n" # Replace all .webp images to .png images in the Markdown content markdown_content = re.sub(r'\((.*?\.webp)\)', lambda match: f'({match.group(1)}.png)', markdown_content) # List glossary entries that appear in the Markdown content glossary_entries = set() for match in re.findall(r'\[.*?\]\(/glossary\/?#(.*?)\)', markdown_content): glossary_entries.add(slugify.slugify(match)) # Add to glossary entries the glossary entries that appear in glossary entries, recursively added_entry = True while added_entry: added_entry = False for entry in list(glossary_entries): for match in re.findall(r'\[.*?\]\((?:/glossary|)\/?#(.*?)\)', glossary[entry][1]): new_entry = slugify.slugify(match) if new_entry not in glossary_entries: glossary_entries.add(new_entry) added_entry = True # Add glossary entries to the Markdown content if glossary_entries: markdown_content += "\n\n# Glossary\n\n" for entry, entry_content in glossary.items(): if entry in glossary_entries: markdown_content += f"## {entry_content[0]}\n\n{entry_content[1]}\n\n" # Write the Markdown content to a file input_markdown_path = pathlib.Path(workingDirectory) / f"{self.post_id}-markdown.md" input_markdown_path.open("w").write(markdown_content) # Convert the Markdown content to typst typst_path = pathlib.Path(workingDirectory) / f"{self.post_id}.typ" subprocess.check_call([str(self.pandoc_binary), "-f", "markdown", "-t", "typst", "--columns", "999999", "-o", typst_path, input_markdown_path]) # Build the full typst file full_typst_path = pathlib.Path(workingDirectory) / f"{self.post_id}-full.typ" full_typst = f""" #import "anarsec_article.typ": anarsec_article, blockquote #set page({'"a5"' if paper_size == "a4" else 'width: 5.5in, height: 8.5in'}) #show: content => anarsec_article( title: [ {toml_front_matter["title"]} ], frontimage: "{front_image.name}", backimage: "{back_image.name}", lastediteddate: "{toml_front_matter["extra"]["dateedit"]}", description: "{description}", subtitle: "{toml_front_matter.get("description")}", content ) {typst_path.open().read()} """ full_typst_path.open("w").write(full_typst) # Convert the full typst file to PDF pdf_path = pathlib.Path(workingDirectory) / f"{self.post_id}.pdf" subprocess.check_call( [str(self.typst_binary), "--root", workingDirectory, "compile", full_typst_path, pdf_path], stderr = subprocess.STDOUT ) # Insert blank pages before the back cover if needed pdf_reader = PyPDF2.PdfFileReader(pdf_path.open("rb")) if len(pdf_reader.pages) % 4 != 0: pdf_writer = PyPDF2.PdfFileWriter() for page in pdf_reader.pages[:-1]: pdf_writer.addPage(page) for i in range(4 - len(pdf_reader.pages) % 4): pdf_writer.addBlankPage() pdf_writer.addPage(pdf_reader.pages[-1]) pdf_with_blank_pages_path = pathlib.Path(workingDirectory) / f"{self.post_id}-with-blank-pages.pdf" pdf_writer.write(pdf_with_blank_pages_path.open("wb")) shutil.copy(pdf_with_blank_pages_path, pdf_path) # Bookletize with open(os.devnull, "w") as devnull: with contextlib.redirect_stdout(devnull): pdfimposer.bookletize_on_file( pdf_path, output_path, layout = "2x1", format = "A4" if paper_size == "a4" else "Letter" ) # Print a message if self.verbose: print(f"PDF file '{output_path}' created successfully!") if __name__ == "__main__": # Parse arguments parser = argparse.ArgumentParser(description = "Converts an Anarsec article to PDF booklets.") parser.add_argument("--pandoc-binary", type = pathlib.Path, required = True, help = "Path to the Pandoc binary. Minimum required version is 3.1.5.") parser.add_argument("--typst-binary", type = pathlib.Path, required = True, help = "Path to the typst binary. Minimum required version is 0.6.0.") parser.add_argument("--anarsec-root", type = pathlib.Path, required = True, help = "Root of the Anarsec repository.") parser.add_argument("--post-id", type = str, required = True, help = "ID of the Anarsec post to convert, i.e. the name of the post folder in '/content/posts'.") parser.add_argument("-f", "--force", dest = "force", default = False, action = "store_true", help = "Replace the output files if they already exist.") parser.add_argument("-v", "--verbose", dest = "verbose", default = False, action = "store_true", help = "Print messages when the output files are created.") arguments = parser.parse_args() # Create the converter converter = Converter( arguments.pandoc_binary, arguments.typst_binary, arguments.anarsec_root, arguments.post_id, force = arguments.force, verbose = arguments.verbose ) # Convert converter.convert()