import argparse import contextlib import os import pathlib import re import shutil import slugify import subprocess import tempfile import pdfimposer import PyPDF2 import toml class Converter: """Converts an Anarsec article to PDF booklets.""" def __init__(self, pandoc_binary: pathlib.Path, typst_binary: pathlib.Path, anarsec_root: pathlib.Path, post_id: str, *, force: bool = False, verbose: bool = False): """Initialize the converter.""" # Set attributes self.pandoc_binary = pandoc_binary self.typst_binary = typst_binary self.anarsec_root = anarsec_root self.post_id = post_id.split('.',1)[0] self.post_lang = post_id.split('.',1)[1] self.force = force self.verbose = verbose # Set post directory self.post_directory = self.anarsec_root / "content" / "posts" / self.post_id # Check validity of some attributes if not self.pandoc_binary.exists() or not self.pandoc_binary.is_file(): raise RuntimeError(f"Pandoc binary '{self.pandoc_binary}' doesn't exist or isn't a file.") if not self.typst_binary.exists() or not self.typst_binary.is_file(): raise RuntimeError(f"Typst binary '{self.typst_binary}' doesn't exist or isn't a file.") if not self.anarsec_root.exists() or not self.anarsec_root.is_dir(): raise RuntimeError(f"Anarsec root '{self.anarsec_root}' doesn't exist or isn't a directory.") if not self.post_directory.exists() or not self.post_directory.is_dir(): raise RuntimeError(f"Post directory '{self.post_directory}' doesn't exist or isn't a directory.") def convert(self): """Convert the input file to the output file. This method should only be run once.""" # Set glossary file if self.post_lang == 'en': glossary_file = self.anarsec_root / "content" / "glossary" / "_index.md" else: glossary_file = self.anarsec_root / "content" / "glossary" / f"_index.{self.post_lang}.md" if not glossary_file.exists() or not glossary_file.is_file(): raise RuntimeError(f"Glossary file '{glossary_file}' doesn't exist or isn't a file.") # Set recommendations file if self.post_lang == 'en': recommendations_file = self.anarsec_root / "content" / "recommendations" / "_index.md" else: recommendations_file = self.anarsec_root / "content" / "recommendations" / f"_index.{self.post_lang}.md" if not recommendations_file.exists() or not recommendations_file.is_file(): raise RuntimeError(f"Recommendations file '{recommendations_file}' doesn't exist or isn't a file.") # Set series file if self.post_lang == 'en': series_file = self.anarsec_root / "content" / "series" / "_index.md" else: series_file = self.anarsec_root / "content" / "series" / f"_index.{self.post_lang}.md" if not series_file.exists() or not series_file.is_file(): raise RuntimeError(f"Series file '{series_file}' doesn't exist or isn't a file.") # Set input path if self.post_lang == 'en': input_path = self.post_directory / "index.md" else: input_path = self.post_directory / f"index.{self.post_lang}.md" if not input_path.exists() or not input_path.is_file(): raise RuntimeError(f"Post Markdown file '{input_path}' doesn't exist or isn't a file.") # Load the glossary glossary = dict() for match in re.findall(r'### (.*?)\n+(.*?)\n*(?=###|\Z)', glossary_file.open().read(), re.DOTALL | re.MULTILINE): glossary[slugify.slugify(match[0])] = (match[0], match[1]) # Load the series markdown series_markdown = re.search(r'\+{3}.*?\+{3}(.*)', series_file.open().read(), re.MULTILINE | re.DOTALL).group(1) # For each paper size for paper_size in ["a4", "letter"]: # Set the output path if self.post_lang == 'en': output_path = self.anarsec_root / "static" / "posts" / self.post_id / f"{self.post_id}-{paper_size}-{self.post_lang}.pdf" else: output_path = self.anarsec_root / "static" / self.post_lang / "posts" / self.post_id / f"{self.post_id}-{paper_size}-{self.post_lang}.pdf" if not self.force and output_path.exists(): raise RuntimeError(f"Output file '{output_path}' already exists.") # Work in a temporary directory with tempfile.TemporaryDirectory() as workingDirectory: # Copy the required resources to the working directory shutil.copy(pathlib.Path(__file__).parent.parent / "anarsec_article.typ", workingDirectory) for filename in input_path.parent.iterdir(): if filename.suffix.lower() == ".webp": subprocess.check_call(["convert", filename, pathlib.Path(workingDirectory) / f"{filename.name}.png"]) elif filename.suffix.lower() in [".png", ".jpg", ".jpeg", ".bmp", ".svg", ".gif"]: shutil.copy(filename, workingDirectory) # Separate the input file into a TOML front matter and Markdown content with input_path.open("r") as input_file: match = re.fullmatch(r'\+{3}\n(.*)\+{3}(.*)', input_file.read(), re.DOTALL | re.MULTILINE) if match is None: raise RuntimeError(f"Couldn't separate input file '{self.input_path}' into a TOML front matter and Markdown content. Is it a valid Anarsec article?") toml_front_matter = toml.loads(match.group(1)) markdown_content = match.group(2) # Grab the description description = re.search(r'^(.*?)\<\!\-\- more \-\-\>', markdown_content, re.DOTALL | re.MULTILINE).group(1).strip("\n ") # Parse the description description_md_path = pathlib.Path(workingDirectory) / "description.md" description_txt_path = pathlib.Path(workingDirectory) / "description.txt" description_md_path.open("w").write(description) subprocess.check_call([str(self.pandoc_binary), "-f", "markdown", "-t", "plain", "--columns", "999999", "-o", description_txt_path, description_md_path]) description = description_txt_path.open().read() # Copy the front image blogimageid = "blogimagepng" if "blogimagepng" in toml_front_matter["extra"] else "blogimage" front_image = pathlib.Path(workingDirectory) / ("front_image" + pathlib.Path(toml_front_matter['extra'][blogimageid]).suffix) shutil.copy(self.anarsec_root / "static" / toml_front_matter['extra'][blogimageid].removeprefix("/"), front_image) # Copy the back image back_image = pathlib.Path(workingDirectory) / "back_image.png" shutil.copy(self.anarsec_root / "static" / "images" / "gay.png", back_image) # Copy the header font header_font = pathlib.Path(workingDirectory) / "Jost-Medium.ttf" shutil.copy(self.anarsec_root / "static" / "fonts" / "Jost-Medium.ttf", header_font) header_font_italic = pathlib.Path(workingDirectory) / "Jost-MediumItalic.ttf" shutil.copy(self.anarsec_root / "static" / "fonts" / "Jost-MediumItalic.ttf", header_font_italic) # Add recommendations to the Markdown content recommendations = re.search(r'\+{3}.*?\+{3}(.*)', recommendations_file.open().read(), re.MULTILINE | re.DOTALL).group(1) if self.post_lang == 'en': markdown_content += f"\n\n# Appendix: Recommendations\n\n{recommendations}\n\n" if self.post_lang == 'fr': markdown_content += f"\n\n# Annexe: Recommendations\n\n{recommendations}\n\n" if self.post_lang == 'el': markdown_content += f"\n\n# Appendix: Recommendations\n\n{recommendations}\n\n" if self.post_lang == 'pt': markdown_content += f"\n\n# Appendix: Recommendations\n\n{recommendations}\n\n" if self.post_lang == 'ru': markdown_content += f"\n\n# Appendix: Recommendations\n\n{recommendations}\n\n" # Make all images paths relative in the Markdown content for extension in ["jpg", "png", "webp", "jpeg", "gif"]: markdown_content = re.sub(f'\\(\\/posts/{input_path.parent.name}/(.*?\.{extension})\\)', lambda match: f'({match.group(1)})', markdown_content) # Replace all .webp images to .png images in the Markdown content markdown_content = re.sub(r'\((.*?\.webp)\)', lambda match: f'({match.group(1)}.png)', markdown_content) # List glossary entries that appear in the Markdown content glossary_entries = set() for match in re.findall(r'\[.*?\]\((?:|/fr)(?:|/pt)(?:|/ru)(?:|/el)/glossary\/?#(.*?)\)', markdown_content): glossary_entries.add(slugify.slugify(match)) # Add glossary entries to the Markdown content if glossary_entries: if self.post_lang == 'en': markdown_content += "\n\n# Appendix: Glossary\n\n" if self.post_lang == 'fr': markdown_content += "\n\n# Annexe: Glossaire\n\n" if self.post_lang == 'el': markdown_content += "\n\n# Appendix: Glossary\n\n" if self.post_lang == 'pt': markdown_content += "\n\n# Appendix: Glossary\n\n" if self.post_lang == 'ru': markdown_content += "\n\n# Appendix: Glossary\n\n" for entry, entry_content in glossary.items(): if entry in glossary_entries: markdown_content += f""" ```{{=typst}} == {entry_content[0]} <{slugify.slugify(entry_content[0])}> ``` """ markdown_content += f"\n\n{entry_content[1]}\n\n" # Write the Markdown content to a file input_markdown_path = pathlib.Path(workingDirectory) / f"{self.post_id}-markdown.md" input_markdown_path.open("w").write(markdown_content) # Convert the Markdown content to typst typst_path = pathlib.Path(workingDirectory) / f"{self.post_id}.typ" subprocess.check_call([str(self.pandoc_binary), "-f", "markdown", "-t", "typst", "--columns", "999999", "-o", typst_path, input_markdown_path]) # Write the series markdown to a file series_markdown_path = pathlib.Path(workingDirectory) / "series-markdown.md" series_markdown_path.open("w").write(series_markdown) # Convert the series markdown to typst series_typst_path = pathlib.Path(workingDirectory) / f"series.typ" subprocess.check_call([str(self.pandoc_binary), "-f", "markdown+raw_attribute", "-t", "typst", "--columns", "999999", "-o", series_typst_path, series_markdown_path]) # mutlilingual categories category = toml_front_matter["taxonomies"]["categories"][0] if self.post_lang == 'fr': if category == 'Defensive': category = 'Défensif' if category == 'Offensive': category = 'Offensif' # Build the full typst file full_typst_path = pathlib.Path(workingDirectory) / f"{self.post_id}-full.typ" full_typst = f""" #import "anarsec_article.typ": anarsec_article, blockquote #set page({'"a5"' if paper_size == "a4" else 'width: 5.5in, height: 8.5in'}) #show: content => anarsec_article( title: [ {toml_front_matter["title"]} ], frontimage: "{front_image.name}", backimage: "{back_image.name}", lastediteddate: "{toml_front_matter["extra"]["dateedit"]}", description: "{description}", subtitle: "{toml_front_matter.get("description")}", category: "{category}", backcoverinsidecontent: [{series_typst_path.open().read()}], lang: "{self.post_lang}", content ) {typst_path.open().read()} """ full_typst_path.open("w").write(full_typst) # Convert the full typst file to PDF pdf_path = pathlib.Path(workingDirectory) / f"{self.post_id}.pdf" os.environ["TYPST_FONT_PATHS"] = str(workingDirectory) subprocess.check_call( [str(self.typst_binary), "compile", full_typst_path, pdf_path, "--root", workingDirectory], stderr = subprocess.STDOUT ) # Insert blank pages before the back cover and back cover inside if needed pdf_reader = PyPDF2.PdfFileReader(pdf_path.open("rb")) if len(pdf_reader.pages) % 4 != 0: pdf_writer = PyPDF2.PdfFileWriter() for page in pdf_reader.pages[:-2]: pdf_writer.addPage(page) for i in range(4 - len(pdf_reader.pages) % 4): pdf_writer.addBlankPage() pdf_writer.addPage(pdf_reader.pages[-2]) pdf_writer.addPage(pdf_reader.pages[-1]) pdf_with_blank_pages_path = pathlib.Path(workingDirectory) / f"{self.post_id}-with-blank-pages.pdf" pdf_writer.write(pdf_with_blank_pages_path.open("wb")) shutil.copy(pdf_with_blank_pages_path, pdf_path) # Bookletize with open(os.devnull, "w") as devnull: with contextlib.redirect_stdout(devnull): pdfimposer.bookletize_on_file( pdf_path, output_path, layout = "2x1", format = "A4" if paper_size == "a4" else "Letter" ) # Print a message if self.verbose: print(f"PDF file '{output_path}' created successfully!") if __name__ == "__main__": # Parse arguments parser = argparse.ArgumentParser(description = "Converts an Anarsec article to PDF booklets.") parser.add_argument("--pandoc-binary", type = pathlib.Path, required = True, help = "Path to the Pandoc binary. Minimum required version is 3.1.5.") parser.add_argument("--typst-binary", type = pathlib.Path, required = True, help = "Path to the typst binary. Minimum required version is 0.6.0.") parser.add_argument("--anarsec-root", type = pathlib.Path, required = True, help = "Root of the Anarsec repository.") parser.add_argument("--post-id", type = str, required = True, help = "ID of the Anarsec post to convert with language added after a period, i.e. 'nophones.en' and 'nophones.fr', where 'nophones' is the name of the post folder in '/content/posts'.") parser.add_argument("-f", "--force", dest = "force", default = False, action = "store_true", help = "Replace the output files if they already exist.") parser.add_argument("-v", "--verbose", dest = "verbose", default = False, action = "store_true", help = "Print messages when the output files are created.") arguments = parser.parse_args() # Create the converter converter = Converter( arguments.pandoc_binary, arguments.typst_binary, arguments.anarsec_root, arguments.post_id, force = arguments.force, verbose = arguments.verbose ) # Convert converter.convert()