mirror of
https://0xacab.org/anarsec/anarsec.guide.git
synced 2025-06-07 22:32:55 -04:00
276 lines
15 KiB
Python
276 lines
15 KiB
Python
import argparse
|
|
import contextlib
|
|
import os
|
|
import pathlib
|
|
import re
|
|
import shutil
|
|
import slugify
|
|
import subprocess
|
|
import tempfile
|
|
|
|
import pdfimposer
|
|
import PyPDF2
|
|
import toml
|
|
|
|
class Converter:
|
|
"""Converts an Anarsec article to PDF booklets."""
|
|
|
|
def __init__(self, pandoc_binary: pathlib.Path, typst_binary: pathlib.Path, anarsec_root: pathlib.Path, post_id: str, *, force: bool = False, verbose: bool = False):
|
|
"""Initialize the converter."""
|
|
|
|
# Set attributes
|
|
self.pandoc_binary = pandoc_binary
|
|
self.typst_binary = typst_binary
|
|
self.anarsec_root = anarsec_root
|
|
self.post_id = post_id.split('.',1)[0]
|
|
self.post_lang = post_id.split('.',1)[1]
|
|
self.force = force
|
|
self.verbose = verbose
|
|
|
|
# Set post directory
|
|
self.post_directory = self.anarsec_root / "content" / "posts" / self.post_id
|
|
|
|
# Check validity of some attributes
|
|
if not self.pandoc_binary.exists() or not self.pandoc_binary.is_file():
|
|
raise RuntimeError(f"Pandoc binary '{self.pandoc_binary}' doesn't exist or isn't a file.")
|
|
if not self.typst_binary.exists() or not self.typst_binary.is_file():
|
|
raise RuntimeError(f"Typst binary '{self.typst_binary}' doesn't exist or isn't a file.")
|
|
if not self.anarsec_root.exists() or not self.anarsec_root.is_dir():
|
|
raise RuntimeError(f"Anarsec root '{self.anarsec_root}' doesn't exist or isn't a directory.")
|
|
if not self.post_directory.exists() or not self.post_directory.is_dir():
|
|
raise RuntimeError(f"Post directory '{self.post_directory}' doesn't exist or isn't a directory.")
|
|
|
|
def convert(self):
|
|
"""Convert the input file to the output file. This method should only be run once."""
|
|
|
|
# Set glossary file
|
|
if self.post_lang == 'en':
|
|
glossary_file = self.anarsec_root / "content" / "glossary" / "_index.md"
|
|
else:
|
|
glossary_file = self.anarsec_root / "content" / "glossary" / f"_index.{self.post_lang}.md"
|
|
if not glossary_file.exists() or not glossary_file.is_file():
|
|
raise RuntimeError(f"Glossary file '{glossary_file}' doesn't exist or isn't a file.")
|
|
|
|
# Set recommendations file
|
|
if self.post_lang == 'en':
|
|
recommendations_file = self.anarsec_root / "content" / "recommendations" / "_index.md"
|
|
else:
|
|
recommendations_file = self.anarsec_root / "content" / "recommendations" / f"_index.{self.post_lang}.md"
|
|
if not recommendations_file.exists() or not recommendations_file.is_file():
|
|
raise RuntimeError(f"Recommendations file '{recommendations_file}' doesn't exist or isn't a file.")
|
|
|
|
# Set series file
|
|
if self.post_lang == 'en':
|
|
series_file = self.anarsec_root / "content" / "series" / "_index.md"
|
|
else:
|
|
series_file = self.anarsec_root / "content" / "series" / f"_index.{self.post_lang}.md"
|
|
if not series_file.exists() or not series_file.is_file():
|
|
raise RuntimeError(f"Series file '{series_file}' doesn't exist or isn't a file.")
|
|
|
|
# Set input path
|
|
if self.post_lang == 'en':
|
|
input_path = self.post_directory / "index.md"
|
|
else:
|
|
input_path = self.post_directory / f"index.{self.post_lang}.md"
|
|
if not input_path.exists() or not input_path.is_file():
|
|
raise RuntimeError(f"Post Markdown file '{input_path}' doesn't exist or isn't a file.")
|
|
|
|
# Load the glossary
|
|
glossary = dict()
|
|
for match in re.findall(r'### (.*?)\n+(.*?)\n*(?=###|\Z)', glossary_file.open().read(), re.DOTALL | re.MULTILINE):
|
|
glossary[slugify.slugify(match[0])] = (match[0], match[1])
|
|
|
|
# Load the series markdown
|
|
series_markdown = re.search(r'\+{3}.*?\+{3}(.*)', series_file.open().read(), re.MULTILINE | re.DOTALL).group(1)
|
|
|
|
# For each paper size
|
|
for paper_size in ["a4", "letter"]:
|
|
# Set the output path
|
|
if self.post_lang == 'en':
|
|
output_path = self.anarsec_root / "static" / "posts" / self.post_id / f"{self.post_id}-{paper_size}-{self.post_lang}.pdf"
|
|
else:
|
|
output_path = self.anarsec_root / "static" / self.post_lang / "posts" / self.post_id / f"{self.post_id}-{paper_size}-{self.post_lang}.pdf"
|
|
if not self.force and output_path.exists():
|
|
raise RuntimeError(f"Output file '{output_path}' already exists.")
|
|
|
|
# Work in a temporary directory
|
|
with tempfile.TemporaryDirectory() as workingDirectory:
|
|
# Copy the required resources to the working directory
|
|
shutil.copy(pathlib.Path(__file__).parent.parent / "anarsec_article.typ", workingDirectory)
|
|
for filename in input_path.parent.iterdir():
|
|
if filename.suffix.lower() == ".webp":
|
|
subprocess.check_call(["convert", filename, pathlib.Path(workingDirectory) / f"{filename.name}.png"])
|
|
elif filename.suffix.lower() in [".png", ".jpg", ".jpeg", ".bmp", ".svg", ".gif"]:
|
|
shutil.copy(filename, workingDirectory)
|
|
|
|
# Separate the input file into a TOML front matter and Markdown content
|
|
with input_path.open("r") as input_file:
|
|
match = re.fullmatch(r'\+{3}\n(.*)\+{3}(.*)', input_file.read(), re.DOTALL | re.MULTILINE)
|
|
if match is None:
|
|
raise RuntimeError(f"Couldn't separate input file '{self.input_path}' into a TOML front matter and Markdown content. Is it a valid Anarsec article?")
|
|
toml_front_matter = toml.loads(match.group(1))
|
|
markdown_content = match.group(2)
|
|
|
|
# Grab the description
|
|
description = re.search(r'^(.*?)\<\!\-\- more \-\-\>', markdown_content, re.DOTALL | re.MULTILINE).group(1).strip("\n ")
|
|
|
|
# Parse the description
|
|
description_md_path = pathlib.Path(workingDirectory) / "description.md"
|
|
description_txt_path = pathlib.Path(workingDirectory) / "description.txt"
|
|
description_md_path.open("w").write(description)
|
|
subprocess.check_call([str(self.pandoc_binary), "-f", "markdown", "-t", "plain", "--columns", "999999", "-o", description_txt_path, description_md_path])
|
|
description = description_txt_path.open().read()
|
|
|
|
# Copy the front image
|
|
blogimageid = "blogimagepng" if "blogimagepng" in toml_front_matter["extra"] else "blogimage"
|
|
front_image = pathlib.Path(workingDirectory) / ("front_image" + pathlib.Path(toml_front_matter['extra'][blogimageid]).suffix)
|
|
shutil.copy(self.anarsec_root / "static" / toml_front_matter['extra'][blogimageid].removeprefix("/"), front_image)
|
|
|
|
# Copy the back image
|
|
back_image = pathlib.Path(workingDirectory) / "back_image.png"
|
|
shutil.copy(self.anarsec_root / "static" / "images" / "gay.png", back_image)
|
|
|
|
# Copy the header font
|
|
header_font = pathlib.Path(workingDirectory) / "Jost-Medium.ttf"
|
|
shutil.copy(self.anarsec_root / "static" / "fonts" / "Jost-Medium.ttf", header_font)
|
|
header_font_italic = pathlib.Path(workingDirectory) / "Jost-MediumItalic.ttf"
|
|
shutil.copy(self.anarsec_root / "static" / "fonts" / "Jost-MediumItalic.ttf", header_font_italic)
|
|
|
|
# Add recommendations to the Markdown content
|
|
recommendations = re.search(r'\+{3}.*?\+{3}(.*)', recommendations_file.open().read(), re.MULTILINE | re.DOTALL).group(1)
|
|
if self.post_lang == 'en':
|
|
markdown_content += f"\n\n# Appendix: Recommendations\n\n{recommendations}\n\n"
|
|
if self.post_lang == 'fr':
|
|
markdown_content += f"\n\n# Annexe: Recommendations\n\n{recommendations}\n\n"
|
|
|
|
# Make all images paths relative in the Markdown content
|
|
for extension in ["jpg", "png", "webp", "jpeg", "gif"]:
|
|
markdown_content = re.sub(f'\\(\\/posts/{input_path.parent.name}/(.*?\.{extension})\\)', lambda match: f'({match.group(1)})', markdown_content)
|
|
|
|
# Replace all .webp images to .png images in the Markdown content
|
|
markdown_content = re.sub(r'\((.*?\.webp)\)', lambda match: f'({match.group(1)}.png)', markdown_content)
|
|
|
|
# List glossary entries that appear in the Markdown content
|
|
glossary_entries = set()
|
|
for match in re.findall(r'\[.*?\]\(/glossary\/?#(.*?)\)', markdown_content):
|
|
glossary_entries.add(slugify.slugify(match))
|
|
|
|
# Add glossary entries to the Markdown content
|
|
if glossary_entries:
|
|
if self.post_lang == 'en':
|
|
markdown_content += "\n\n# Appendix: Glossary\n\n"
|
|
if self.post_lang == 'fr':
|
|
markdown_content += "\n\n# Annexe: Glossaire\n\n"
|
|
for entry, entry_content in glossary.items():
|
|
if entry in glossary_entries:
|
|
markdown_content += f"## {entry_content[0]}\n\n{entry_content[1]}\n\n"
|
|
|
|
# Write the Markdown content to a file
|
|
input_markdown_path = pathlib.Path(workingDirectory) / f"{self.post_id}-markdown.md"
|
|
input_markdown_path.open("w").write(markdown_content)
|
|
|
|
# Convert the Markdown content to typst
|
|
typst_path = pathlib.Path(workingDirectory) / f"{self.post_id}.typ"
|
|
subprocess.check_call([str(self.pandoc_binary), "-f", "markdown", "-t", "typst", "--columns", "999999", "-o", typst_path, input_markdown_path])
|
|
|
|
# Write the series markdown to a file
|
|
series_markdown_path = pathlib.Path(workingDirectory) / "series-markdown.md"
|
|
series_markdown_path.open("w").write(series_markdown)
|
|
|
|
# Convert the series markdown to typst
|
|
series_typst_path = pathlib.Path(workingDirectory) / f"series.typ"
|
|
subprocess.check_call([str(self.pandoc_binary), "-f", "markdown", "-t", "typst", "--columns", "999999", "-o", series_typst_path, series_markdown_path])
|
|
|
|
# mutlilingual categories
|
|
category = toml_front_matter["taxonomies"]["categories"][0]
|
|
if self.post_lang == 'fr':
|
|
if category == 'Defensive':
|
|
category = 'Défensif'
|
|
if category == 'Offensive':
|
|
category = 'Offensif'
|
|
|
|
# Build the full typst file
|
|
full_typst_path = pathlib.Path(workingDirectory) / f"{self.post_id}-full.typ"
|
|
full_typst = f"""
|
|
#import "anarsec_article.typ": anarsec_article, blockquote
|
|
#set page({'"a5"' if paper_size == "a4" else 'width: 5.5in, height: 8.5in'})
|
|
#show: content => anarsec_article(
|
|
title: [
|
|
{toml_front_matter["title"]}
|
|
],
|
|
frontimage: "{front_image.name}",
|
|
backimage: "{back_image.name}",
|
|
lastediteddate: "{toml_front_matter["extra"]["dateedit"]}",
|
|
description: "{description}",
|
|
subtitle: "{toml_front_matter.get("description")}",
|
|
category: "{category}",
|
|
backcoverinsidecontent: [{series_typst_path.open().read()}],
|
|
lang: "{self.post_lang}",
|
|
content
|
|
)
|
|
{typst_path.open().read()}
|
|
"""
|
|
full_typst_path.open("w").write(full_typst)
|
|
|
|
# Convert the full typst file to PDF
|
|
pdf_path = pathlib.Path(workingDirectory) / f"{self.post_id}.pdf"
|
|
|
|
os.environ["TYPST_FONT_PATHS"] = str(workingDirectory)
|
|
|
|
subprocess.check_call(
|
|
[str(self.typst_binary), "compile", full_typst_path, pdf_path, "--root", workingDirectory],
|
|
stderr = subprocess.STDOUT
|
|
)
|
|
|
|
# Insert blank pages before the back cover and back cover inside if needed
|
|
pdf_reader = PyPDF2.PdfFileReader(pdf_path.open("rb"))
|
|
if len(pdf_reader.pages) % 4 != 0:
|
|
pdf_writer = PyPDF2.PdfFileWriter()
|
|
for page in pdf_reader.pages[:-2]:
|
|
pdf_writer.addPage(page)
|
|
for i in range(4 - len(pdf_reader.pages) % 4):
|
|
pdf_writer.addBlankPage()
|
|
pdf_writer.addPage(pdf_reader.pages[-2])
|
|
pdf_writer.addPage(pdf_reader.pages[-1])
|
|
pdf_with_blank_pages_path = pathlib.Path(workingDirectory) / f"{self.post_id}-with-blank-pages.pdf"
|
|
pdf_writer.write(pdf_with_blank_pages_path.open("wb"))
|
|
shutil.copy(pdf_with_blank_pages_path, pdf_path)
|
|
|
|
# Bookletize
|
|
with open(os.devnull, "w") as devnull:
|
|
with contextlib.redirect_stdout(devnull):
|
|
pdfimposer.bookletize_on_file(
|
|
pdf_path,
|
|
output_path,
|
|
layout = "2x1",
|
|
format = "A4" if paper_size == "a4" else "Letter"
|
|
)
|
|
|
|
# Print a message
|
|
if self.verbose:
|
|
print(f"PDF file '{output_path}' created successfully!")
|
|
|
|
if __name__ == "__main__":
|
|
# Parse arguments
|
|
parser = argparse.ArgumentParser(description = "Converts an Anarsec article to PDF booklets.")
|
|
parser.add_argument("--pandoc-binary", type = pathlib.Path, required = True, help = "Path to the Pandoc binary. Minimum required version is 3.1.5.")
|
|
parser.add_argument("--typst-binary", type = pathlib.Path, required = True, help = "Path to the typst binary. Minimum required version is 0.6.0.")
|
|
parser.add_argument("--anarsec-root", type = pathlib.Path, required = True, help = "Root of the Anarsec repository.")
|
|
parser.add_argument("--post-id", type = str, required = True, help = "ID of the Anarsec post to convert with language added after a period, i.e. 'nophones.en' and 'nophones.fr', where 'nophones' is the name of the post folder in '/content/posts'.")
|
|
parser.add_argument("-f", "--force", dest = "force", default = False, action = "store_true", help = "Replace the output files if they already exist.")
|
|
parser.add_argument("-v", "--verbose", dest = "verbose", default = False, action = "store_true", help = "Print messages when the output files are created.")
|
|
arguments = parser.parse_args()
|
|
|
|
# Create the converter
|
|
converter = Converter(
|
|
arguments.pandoc_binary,
|
|
arguments.typst_binary,
|
|
arguments.anarsec_root,
|
|
arguments.post_id,
|
|
force = arguments.force,
|
|
verbose = arguments.verbose
|
|
)
|
|
|
|
# Convert
|
|
converter.convert()
|
|
|