diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml new file mode 100644 index 0000000..3682ba5 --- /dev/null +++ b/.github/workflows/build.yaml @@ -0,0 +1,14 @@ +name: Build +on: push + +jobs: + build: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Build + run: | + go build -C src -o ../compiler + ./compiler -path references.bib > /dev/null diff --git a/.github/workflows/deploy-website.yaml b/.github/workflows/deploy-website.yaml new file mode 100644 index 0000000..919411a --- /dev/null +++ b/.github/workflows/deploy-website.yaml @@ -0,0 +1,28 @@ +name: Deploy website +on: + push: + branches: + main + +permissions: + contents: write + +jobs: + deploy-website: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Install and Build + run: | + go build -C src -o ../compiler + mkdir build + mv assets build + ./compiler -path references.bib > build/index.html + + - name: Deploy + uses: JamesIves/github-pages-deploy-action@v4 + with: + # Must be identical to where we wrote the HTML to. + folder: build diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..86a7c8e --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +compiler diff --git a/Makefile b/Makefile deleted file mode 100644 index 5166372..0000000 --- a/Makefile +++ /dev/null @@ -1,19 +0,0 @@ -define LATEX_CODE -\\documentclass{article} -\\usepackage[top=2cm,bottom=2.5cm,left=2cm,right=2cm]{geometry} -\\usepackage[backend=biber]{biblatex} -\\addbibresource{references.bib} -\\begin{document} -\\nocite{*} -\\printbibliography -\\end{document} -endef - -export LATEX_CODE - -test: - TMP_FILE=$$(mktemp "censorbib-tmp-XXXXXXX.tex") ;\ - echo "$$LATEX_CODE" > "$$TMP_FILE" ;\ - pdflatex --interaction=batchmode "$${TMP_FILE%.tex}" ;\ - biber "$${TMP_FILE%.tex}" ;\ - rm "$${TMP_FILE%.tex}"* ; diff --git a/README.md b/README.md index 50c8ba7..9ff1fa7 100644 --- a/README.md +++ b/README.md @@ -1,27 +1,19 @@ -Overview --------- +# CensorBib -This repository contains the BibTeX file and HTML templates that form the +This repository contains the +[BibTeX file](references.bib) +and +[corresponding tooling](src/) +that powers the [Internet Censorship Bibliography](https://censorbib.nymity.ch). +CensorBib is also available via +[GitHub pages](https://NullHypothesis.github.io/censorbib/) +in case the primary domain is inaccessible to you. -Build it --------- +## Contribute -You first need [`bibliogra.py`](https://github.com/NullHypothesis/bibliograpy) -to turn the BibTeX file into an HTML bibliography. +To contribute, please create a pull request that adds a new paper or +improves an existing one. -Then, run the following commands to write the bibliography to `OUTPUT_DIR`. - - $ ./fetch_pdfs.py references.bib OUTPUT_DIR - $ bibliogra.py -H header.tpl -F footer.tpl -f references.bib OUTPUT_DIR - -Acknowledgements ----------------- - -CensorBib uses [Font Awesome](https://fontawesome.com/license/free) icons -without modification. - -Feedback --------- - -Contact: Philipp Winter +> [!TIP] +> Try to mimic the style of existing BibTeX entries. The parser is strict! \ No newline at end of file diff --git a/img/bibtex-icon.svg b/assets/bibtex-icon.svg similarity index 100% rename from img/bibtex-icon.svg rename to assets/bibtex-icon.svg diff --git a/img/cache-icon.svg b/assets/cache-icon.svg similarity index 100% rename from img/cache-icon.svg rename to assets/cache-icon.svg diff --git a/img/code-icon.svg b/assets/code-icon.svg similarity index 100% rename from img/code-icon.svg rename to assets/code-icon.svg diff --git a/img/link-icon.svg b/assets/link-icon.svg similarity index 100% rename from img/link-icon.svg rename to assets/link-icon.svg diff --git a/img/pdf-icon.svg b/assets/pdf-icon.svg similarity index 100% rename from img/pdf-icon.svg rename to assets/pdf-icon.svg diff --git a/img/research-power-tools-cover.jpg b/assets/research-power-tools-cover.jpg similarity index 100% rename from img/research-power-tools-cover.jpg rename to assets/research-power-tools-cover.jpg diff --git a/img/update-icon.svg b/assets/update-icon.svg similarity index 100% rename from img/update-icon.svg rename to assets/update-icon.svg diff --git a/favicon.svg b/favicon.svg deleted file mode 100644 index 1e3ba38..0000000 --- a/favicon.svg +++ /dev/null @@ -1,59 +0,0 @@ - - - - - - - - CB - - diff --git a/fetch_pdfs.py b/fetch_pdfs.py deleted file mode 100755 index 49967ae..0000000 --- a/fetch_pdfs.py +++ /dev/null @@ -1,100 +0,0 @@ -#!/usr/bin/env python3 -# -# Copyright 2015 Philipp Winter -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . -""" -Fetch pdf and ps files in BibTeX file. -""" - -import os -import sys -import errno -import urllib.request - -import pybtex.database.input.bibtex as bibtex - - -def download_pdf(url, file_name): - """ - Download file and write it to given file name. - """ - - print("Now fetching %s" % url) - - try: - req = urllib.request.Request(url, headers={'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36"}) - fetched_file = urllib.request.urlopen(req) - except Exception as err: - print(url, err, file=sys.stderr) - return - - with open(file_name, "wb") as fd: - fd.write(fetched_file.read()) - - -def main(file_name, output_dir): - """ - Extract BibTeX key and URL, and then trigger file download. - """ - - parser = bibtex.Parser() - bibdata = parser.parse_file(file_name) - - # Create download directories. - - try: - os.makedirs(os.path.join(output_dir, "pdf")) - os.makedirs(os.path.join(output_dir, "ps")) - except OSError as exc: - if exc.errno == errno.EEXIST: - pass - else: - raise - - # Iterate over all BibTeX entries and trigger download if necessary. - - for bibkey in bibdata.entries: - - entry = bibdata.entries[bibkey] - url = entry.fields.get("url") - if url is None: - continue - - # Extract file name extension and see what we are dealing with. - - _, ext = os.path.splitext(url) - if ext: - ext = ext[1:] - - if ext not in ["pdf", "ps"]: - continue - - file_name = os.path.join(output_dir, ext, bibkey + ".%s" % ext) - if os.path.exists(file_name): - continue - - download_pdf(url, file_name) - - return 0 - - -if __name__ == "__main__": - - if len(sys.argv) != 3: - print("\nUsage: %s FILE_NAME OUTPUT_DIR\n" % sys.argv[0], - file=sys.stderr) - sys.exit(1) - - sys.exit(main(sys.argv[1], sys.argv[2])) diff --git a/footer.tpl b/footer.tpl deleted file mode 100644 index 10d642a..0000000 --- a/footer.tpl +++ /dev/null @@ -1,7 +0,0 @@ - - - - diff --git a/img/author-icon.svg b/img/author-icon.svg deleted file mode 100644 index d3b0a1f..0000000 --- a/img/author-icon.svg +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/img/author-reverse-icon.svg b/img/author-reverse-icon.svg deleted file mode 100644 index 8321cc9..0000000 --- a/img/author-reverse-icon.svg +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/img/donate-icon.svg b/img/donate-icon.svg deleted file mode 100644 index e13015b..0000000 --- a/img/donate-icon.svg +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/img/file-icon.svg b/img/file-icon.svg deleted file mode 100644 index c581e8f..0000000 --- a/img/file-icon.svg +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/img/lock-icon.svg b/img/lock-icon.svg deleted file mode 100644 index 19dfa22..0000000 --- a/img/lock-icon.svg +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/img/year-icon.svg b/img/year-icon.svg deleted file mode 100644 index 565af50..0000000 --- a/img/year-icon.svg +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/img/year-reverse-icon.svg b/img/year-reverse-icon.svg deleted file mode 100644 index 71dbcdd..0000000 --- a/img/year-reverse-icon.svg +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/open-access.svg b/open-access.svg deleted file mode 100644 index 209cac0..0000000 --- a/open-access.svg +++ /dev/null @@ -1,99 +0,0 @@ - - - -image/svg+xml \ No newline at end of file diff --git a/src/decode.go b/src/decode.go new file mode 100644 index 0000000..53cebe4 --- /dev/null +++ b/src/decode.go @@ -0,0 +1,55 @@ +package main + +import ( + "log" + "strings" +) + +type conversion struct { + from string + to string +} + +func decodeTitle(title string) string { + for _, convert := range []conversion{ + {`\#`, "#"}, + {`--`, `–`}, + {"``", "“"}, + {"''", "”"}, + {"'", "’"}, // U+2019 + {`$\cdot$`, `·`}, // U+00B7. + } { + title = strings.Replace(title, convert.from, convert.to, -1) + } + + // Get rid of all curly brackets. We're displaying titles without changing + // their casing. + title = strings.ReplaceAll(title, "{", "") + title = strings.ReplaceAll(title, "}", "") + + return title +} + +func decodeAuthors(authors string) string { + for _, convert := range []conversion{ + {"'", "’"}, + } { + authors = strings.Replace(authors, convert.from, convert.to, -1) + } + // For simplicity, we expect authors to be formatted as "John Doe" instead + // of "Doe, John". + if strings.Contains(authors, ",") { + log.Fatalf("author %q contains a comma", authors) + } + authorSlice := strings.Split(authors, " and ") + return strings.Join(authorSlice, ", ") +} + +func decodeProceedings(proceedings string) string { + for _, convert := range []conversion{ + {`\&`, "&"}, + } { + proceedings = strings.Replace(proceedings, convert.from, convert.to, -1) + } + return proceedings +} diff --git a/src/decode_test.go b/src/decode_test.go new file mode 100644 index 0000000..8276555 --- /dev/null +++ b/src/decode_test.go @@ -0,0 +1,81 @@ +package main + +import ( + "testing" +) + +func TestToString(t *testing.T) { + testCases := []conversion{ + { + from: "Title", + to: "Title", + }, + { + from: "This is a {Title}", + to: "This is a Title", + }, + { + from: "This is a {Title}", + to: "This is a Title", + }, + { + from: `{\#h00t}: Censorship Resistant Microblogging`, + to: `#h00t: Censorship Resistant Microblogging`, + }, + { + from: "``Good'' Worms and Human Rights", + to: "“Good” Worms and Human Rights", + }, + { + from: "An Analysis of {China}'s ``{Great Cannon}''", + to: "An Analysis of China’s “Great Cannon”", + }, + { + from: `lib$\cdot$erate, (n):`, + to: `lib·erate, (n):`, + }, + { + from: "Well -- Exploring the {Great} {Firewall}'s Poisoned {DNS}", + to: "Well – Exploring the Great Firewall’s Poisoned DNS", + }, + } + + for _, test := range testCases { + to := decodeTitle(test.from) + if to != test.to { + t.Errorf("Expected\n%s\ngot\n%s", test.to, to) + } + } +} + +func TestDecodeAuthors(t *testing.T) { + testCases := []conversion{ + { // Multiple authors should be separated by commas. + from: "John Doe and Jane Doe", + to: "John Doe, Jane Doe", + }, + { // Single authors should remain as-is. + from: "John Doe", + to: "John Doe", + }, + { // Single-name authors should remain as-is. + from: "John and Jane", + to: "John, Jane", + }, + { // Non-ASCII characters should be unaffected. + from: "Jóhn Doe", + to: "Jóhn Doe", + }, + { // Apostrophes should be replaced with the right single quote. + from: "John O'Brian", + to: "John O’Brian", + }, + } + + for _, test := range testCases { + to := decodeAuthors(test.from) + if to != test.to { + t.Errorf("Expected\n%s\ngot\n%s", test.to, to) + } + } +} diff --git a/src/footer.go b/src/footer.go new file mode 100644 index 0000000..796c9e1 --- /dev/null +++ b/src/footer.go @@ -0,0 +1,11 @@ +package main + +func footer() string { + return ` + + +` +} diff --git a/src/go.mod b/src/go.mod new file mode 100644 index 0000000..3f4fb15 --- /dev/null +++ b/src/go.mod @@ -0,0 +1,5 @@ +module censorbib-go + +go 1.21.3 + +require github.com/nickng/bibtex v1.3.0 diff --git a/src/go.sum b/src/go.sum new file mode 100644 index 0000000..44be713 --- /dev/null +++ b/src/go.sum @@ -0,0 +1,2 @@ +github.com/nickng/bibtex v1.3.0 h1:iv0408z8Xe+FEVquJUo8eraXnhrAF0e+2/WayPcism8= +github.com/nickng/bibtex v1.3.0/go.mod h1:4BJ3ka/ZjGVXcHOlkzlRonex6U17L3kW6ICEsygP2bg= diff --git a/header.tpl b/src/header.go similarity index 73% rename from header.tpl rename to src/header.go index 3514449..38a79f6 100644 --- a/header.tpl +++ b/src/header.go @@ -1,13 +1,21 @@ - +package main - +import ( + "bytes" + "log" + "text/template" + "time" +) + +const headerTemplate = ` + + - + The Internet censorship bibliography -