From 800f46e278e57f30d410064d41b63b22428a4456 Mon Sep 17 00:00:00 2001 From: Alec Muffett Date: Sat, 16 Nov 2019 14:24:14 +0000 Subject: [PATCH] commit: first test run of new code --- .gitignore | 3 + Makefile | 24 ++--- Makefile,old | 15 +++ master.csv | 107 +++++++++++++++++++++ rwos-db.py | 260 +++++++++++++++++++++++++++++++++++++++++++++++++++ wrapper.sh | 32 +++++++ 6 files changed, 430 insertions(+), 11 deletions(-) create mode 100644 Makefile,old create mode 100644 master.csv create mode 100755 rwos-db.py create mode 100755 wrapper.sh diff --git a/.gitignore b/.gitignore index 93bfd12..5c39821 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,5 @@ *~ .DS_Store +*.sqlite3 +*.sqlite3-* +log*.txt diff --git a/Makefile b/Makefile index 5cf2d59..6b98935 100644 --- a/Makefile +++ b/Makefile @@ -1,15 +1,17 @@ +DB=fetch.sqlite3 + all: - git pull - ./checker.sh - git pull - ( cat 01-preamble.md ; perl walk.pl ; cat 02-footnotes.md ) > README.md - ./get-ct-logs.sh - git add . - git commit -m "auto-update on `date`" - git push + -echo "make what?" + +run: + ./wrapper.sh clean: - rm *~ + -rm *~ + -rm log*.txt -wat: - git diff HEAD^ +db: + sqlite3 $(DB) + +db-nuke: clean + -rm $(DB) $(DB)-* diff --git a/Makefile,old b/Makefile,old new file mode 100644 index 0000000..5cf2d59 --- /dev/null +++ b/Makefile,old @@ -0,0 +1,15 @@ +all: + git pull + ./checker.sh + git pull + ( cat 01-preamble.md ; perl walk.pl ; cat 02-footnotes.md ) > README.md + ./get-ct-logs.sh + git add . + git commit -m "auto-update on `date`" + git push + +clean: + rm *~ + +wat: + git diff HEAD^ diff --git a/master.csv b/master.csv new file mode 100644 index 0000000..360e285 --- /dev/null +++ b/master.csv @@ -0,0 +1,107 @@ +category,site_name,flaky,onion_url,comment,proof_url +civil society and community,Privacy International,,https://privacyintyqcroe.onion/,,- +civil society and community,Riseup,,http://5jp7xtmox6jyoqd5.onion/,riseup etherpad,- +civil society and community,Riseup,,http://6zc6sejeho3fwrd4.onion/,riseup file share & pastebin,- +civil society and community,Riseup,,http://j6uhdvbhz74oefxf.onion/,riseup user admin,- +civil society and community,Riseup,,http://nzh3fv6jc6jskki3.onion/,riseup main site,- +civil society and community,Riseup,,http://nzh3fv6jc6jskki3.onion/en/security/network-security/tor#riseups-tor-hidden-services,riseup index of onion sites,- +civil society and community,Riseup,,http://xpgylzydxykgdqyg.onion/,riseup lists,- +civil society and community,Riseup,,http://zsolxunfmbfuq7wf.onion/rc/,riseup mail,- +companies and services,decoded:Legal,,http://decodedsbwzj4nhq.onion/,english law firm,- +companies and services,decoded:Legal,,http://dlegal66uj5u2dvcbrev7vv6fjtwnd4moqu7j6jnd42rmbypv3coigyd.onion/,v3 address,- +globaleaks,Afrileaks,,http://wcnueib4qrsm544n.onion/,,https://www.afrileaks.org/ +globaleaks,ALAT / Allerta AntiCorruzione,,http://fkut2p37apcg6l7f.onion/,italian whistleblowing,https://allertaanticorruzione.transparency.it/servizio-alac/ +globaleaks,Atlatszo MagyarLeaks,,http://ak2uqfavwgmjrvtu.onion/,hungarian leaks,https://atlatszo.hu/magyarleaks/ +globaleaks,Bezkorupce.cz,,http://iopx5pchfdldldwp.onion/,czech anticorruption reporting site,https://secure.bezkorupce.cz/ +globaleaks,IRPILeaks,,http://5r4bjnjug3apqdii.onion/,italian investigative reporting project,https://irpi.eu/en/leaks/how-irpileaks-works/ +globaleaks,Mexico Leaks,,http://kjpkmlafh2ra57wz.onion/,,https://mexicoleaks.mx/ +globaleaks,Pistaljka.rs Whistleblowing,,http://acabtd4btrxjjrvr.onion/#/,,https://pistaljka.rs/ +globaleaks,Wildleaks,,http://ppdz5djzpo3w5k2z.onion/,elephant action league,https://www.wildleaks.org/the-technology/ +globaleaks,XNet Activism,,http://ztjn5gcdsqeqzmw4.onion/,anticorruption whistleblowing,https://xnet-x.net/en/xnetleaks/ +government,US Central Intelligence Agency,,http://ciadotgov4sjwlzihbbgxnqg3xiyrg7so2r2o3lt5wz5ypk4sxyjstad.onion/index.html,,https://www.cia.gov/news-information/featured-story-archive/2019-featured-story-archive/latest-layer-an-onion-site.html +news and media,BBC News,,https://www.bbcnewsv2vjtpsuy.onion/,,https://www.bbc.co.uk/news/technology-50150981 +news and media,BuzzFeed News,,https://bfnews3u2ox4m4ty.onion/,,ssl +news and media,ProPublica,,https://p53lf57qovyuvwsc6xnrppyply3vtqm7l6pcobkmyqsiofyeznfu5uqd.onion/,,ssl +news and media,ProPublica,,https://www.propub3r6espa33w.onion/,,- +news and media,The New York Times,,https://mobile.nytimes3xbfgragh.onion/,mobile site,- +news and media,The New York Times,,https://www.nytimes3xbfgragh.onion/,,- +securedrop for individuals,Barton Gellman,,http://mqddpn6yt4f5uqei.onion/,,https://github.com/b4rton/securedrop +securedrop for individuals,Jean-Marc Manach,,http://32qfx2skzcifeyg7.onion/,,https://jean-marc.manach.net/securedrop.htm +securedrop for organisations,Adresseavisen,,http://xpx3m5hcnrkds5wg.onion/,,https://securedrop.adressa.no/ +securedrop for organisations,Aftenposten,,http://bocl4xqbak4xvlh4.onion/,,https://www.aftenposten.no/securedrop/ +securedrop for organisations,Aftonbladet,,https://y27vf7g2ce5g3fnl.onion/,,ssl +securedrop for organisations,Apache,,http://zdf4nikyuswdzbt6.onion/,,https://www.apache.be/securedrop +securedrop for organisations,Associated Press,,http://3expgpdnrrzezf7r.onion/,,https://www.ap.org/tips/ +securedrop for organisations,Bergens Tidende,,http://mxrrw2l3g5dyhgzn.onion/,,https://www.bt.no/securedrop/ +securedrop for organisations,Bloomberg News,,http://m4hynbhhctdk27jr.onion/,,https://www.bloomberg.com/tips +securedrop for organisations,Business Insider,,http://doaxi7t7lkctvq5i.onion/,,https://www.businessinsider.com/how-to-tip-business-insider-securely-guide-signal-securedrop-2017-6 +securedrop for organisations,CBC / Canadian Broadcasting Corporation,,http://ad2ztmbv5vmbj7ic.onion/,,https://securedrop.cbc.ca/ +securedrop for organisations,Coworker.org,,http://no4gurk7efg4abwv.onion/,,https://home.coworker.org/contact/ +securedrop for organisations,Dagbladet,,http://mz33367mcdrcdi7s.onion/,,https://securedrop.dagbladet.no/ +securedrop for organisations,Fairfax Media Group (SMH et al.),,http://ipfhnseo4hgfw5mg.onion/,,https://securedrop.fairfax.com.au/ +securedrop for organisations,Field of Vision,,http://fovisionunz7mtxw.onion/,,https://fieldofvision.org/securedrop +securedrop for organisations,Financial Times,,http://xdm7flvwt3uvsrrd.onion/,,https://www.ft.com/news-tips/ +securedrop for organisations,Forbes,,http://t5pv5o4t6jyjilp6.onion/,,https://www.forbes.com/fdc/securedrop.html +securedrop for organisations,Forbidden Stories,,http://w7t5f3u4mej6dvpt.onion/,,https://forbiddenstories.org/protect-your-stories/ +securedrop for organisations,Globe and Mail (Toronto),,http://sml5wmpuq7ifq2mh.onion/,,https://sec.theglobeandmail.com/securedrop/ +securedrop for organisations,Greenpeace New Zealand,,http://ll6edwtpfl3zdwoi.onion/,,https://www.safesource.org.nz +securedrop for organisations,Guardian,,http://33y6fjyhs3phzfjj.onion/,,https://www.theguardian.com/securedrop +securedrop for organisations,Heise Investigativ,,http://sq4lecqyx4izcpkp.onion/,,https://www.heise.de/investigativ/briefkasten/ +securedrop for organisations,Houston Chronicle,,http://ibnfpppyydd6mg46.onion/,,https://newstips.houstonchronicle.com/ +securedrop for organisations,HuffPost,,http://rbugf2rz5lmjbfun.onion/,,https://img.huffingtonpost.com/securedrop +securedrop for organisations,ICIJ / International Consortium of Investigative Journalists,,http://lzpczap7l3zxu7zv.onion/,,https://www.icij.org/securedrop +securedrop for organisations,Intercept,,http://intrcept32ncblef.onion/,,https://theintercept.com/securedrop/ +securedrop for organisations,KUOW Public Radio,,http://hcxmf67v3ltykmww.onion/,,https://medium.com/@kuow/how-whistleblowers-can-contact-kuow-3ed089e21d30 +securedrop for organisations,Lucy Parsons Labs (Chicago),,http://qn4qfeeslglmwxgb.onion/,,https://lucyparsonslabs.com/securedrop +securedrop for organisations,McClatchy DC,,http://zafull3et6muayeh.onion/,,https://www.mcclatchydc.com/customer-service/contact-us/ +securedrop for organisations,Meduza,,http://xwt2mqq64h63ydp5.onion/,,https://meduza.io/cards/u-menya-est-vazhnaya-informatsiya-dlya-meduzy-no-ya-boyus-ee-peredavat-kak-sdelat-eto-po-nastoyaschemu-anonimno +securedrop for organisations,Morgenbladet,,http://g4wmrmqxpj5bnvml.onion/,,https://morgenbladet.no/varsle +securedrop for organisations,MormonLeaks,,http://efeip5ekoqi4upkz.onion/,,https://mormonleaks.io/ +securedrop for organisations,New York Times,,https://nyttips4bmquxfzw.onion/,,https://www.nytimes.com/newsgraphics/2016/news-tips/#securedrop +securedrop for organisations,New Yorker,,http://icpozbs6r6yrwt67.onion/,,https://projects.newyorker.com/securedrop/ +securedrop for organisations,NPR,,http://5ha7oig7du2jeyer.onion/,,https://help.npr.org/customer/en/portal/articles/2860413-got-a-confidential-news-tip +securedrop for organisations,NRK,,http://nrkvarslekidu2uz.onion/,,https://www.nrk.no/varsle/ +securedrop for organisations,Politico,,http://mq2du34rci6arhbd.onion/,,https://www.politico.com/news-tips/ +securedrop for organisations,Public Intelligence,,http://arujlhu2zjjhc3bw.onion/,,https://publicintelligence.net/contribute/ +securedrop for organisations,Radio-Canada,,http://w5jfqhep2jbypkek.onion/,,https://sourceanonyme.radio-canada.ca +securedrop for organisations,Reuters,,http://smb7p276iht3i2fj.onion/,,https://www.reuters.com/investigates/special-report/tips/ +securedrop for organisations,RISE Moldova,,http://6lhmirnluwmvjw4z.onion/,,https://www.rise.md/leaks/ +securedrop for organisations,San Francisco Chronicle,,http://nrwvazcz6figxpg5.onion/,,https://newstips.sfchronicle.com/ +securedrop for organisations,Svenska Dagbladet,,http://cnhuql7wj2ga5iv7.onion/,,https://www.svd.se/securedrop/ +securedrop for organisations,The Atlantic,,http://s6xle2dgrsqcxiwb.onion/,,https://www.theatlantic.com/tips/ +securedrop for organisations,The Daily Beast,,http://bcwyjiwj25t44it6.onion/,,https://www.thedailybeast.com/tips +securedrop for organisations,The Telegraph,,http://ldbtuktejbkg227d.onion/,,https://www.telegraph.co.uk/news/investigations/contact-us/ +securedrop for organisations,The Verge; Racked; Eater,,http://2xat73hlwcpwo2zy.onion/,,https://apps.voxmedia.com/verge-tips/ +securedrop for organisations,USA Today,,https://usatodayw7vu5egc.onion/,,https://newstips.usatoday.com/securedrop.html +securedrop for organisations,VG / Verdens Gang,,http://vgnettwin5lyl4yr.onion/,,https://securedrop.vg.no/ +securedrop for organisations,VICE Media,,http://e3v3x57ykz25uvij.onion/,,https://news.vice.com/securedrop/ +securedrop for organisations,Wall Street Journal,,http://z5duvjw7ztnuc6fg.onion/,,https://www.wsj.com/tips +securedrop for organisations,Washington Post,,https://jcw5q6uyjioupxcc.onion/,,https://www.washingtonpost.com/securedrop/ +securedrop for organisations,Whistleblower Aid,,http://uwd57qermcote3au.onion/,,https://whistlebloweraid.org/contact/instructions/ +securedrop for organisations,Wired,,http://k5ri3fdr232d36nb.onion/,,https://www.wired.com/securedrop/ +tech and software,Ablative Hosting,,https://hzwjmjimhr7bdmfv2doll4upibt5ojjmpo3pbp5ctwcg37n3hyk7qzid.onion/,,ssl +tech and software,Debian,,http://5nca3wxl33tzlzj5.onion/,index of onion sites,https://onion.debian.org/ +tech and software,Debian,,http://sejnfjrq6szgca7v.onion/,main site,- +tech and software,ExpressVPN,,http://expressobutiolem.onion/,,- +tech and software,Hardened BSD,,http://3jkjhrvkdbdkqisnwhdpe4afh2j2g3suhsfcewiemsyk5ecd6gadmxyd.onion/,,https://hardenedbsd.org/article/shawn-webb/2017-03-11/hardenedbsd-through-tor-hidden-service +tech and software,Hardened BSD,,http://dxsj6ifxytlgq33k.onion/,,https://hardenedbsd.org/article/shawn-webb/2017-03-11/hardenedbsd-through-tor-hidden-service +tech and software,keybase.io,,http://fncuwbiisyh6ak3i.onion/,,- +tech and software,keybase.io,,http://keybase5wmilwokqirssclfnsqrjdsi7jdir5wy7y7iu3tanwmtp6oid.onion/,,https://keybase.io/docs/command_line/tor +tech and software,Mailpile,,http://clgs64523yi2bkhz.onion/,,- +tech and software,OnionShare,,http://lldan5gahapx5k7iafb3s4ikijc4ni7gx5iywdflkba5y2ezyg6sjgyd.onion/,,https://onionshare.org/ +tech and software,Qubes OS,,http://qubesos4rrrrz6n4.onion/,,https://www.qubes-os.org/news/2018/01/23/qubes-whonix-next-gen-tor-onion-services/ +tech and software,Qubes OS,,http://sik5nlgfc5qylnnsr57qrbm64zbdx6t4lreyhpon3ychmxmiem7tioad.onion/,,https://www.qubes-os.org/news/2018/01/23/qubes-whonix-next-gen-tor-onion-services/ +tech and software,Tor Project,,http://expyuzz4wqqyqhjn.onion/,main site,- +tech and software,Tor Project,,http://yz7lpwfhhzcdyc5y.onion/,index of onion sites,https://onion.torproject.org/ +tech and software,Whonix,,http://dds6qkxpwdeubwucdiaord2xgbbeyds25rbsgr73tbfpqpt4a6vjwsyd.onion/,main page,https://www.qubes-os.org/news/2018/01/23/qubes-whonix-next-gen-tor-onion-services/ +tech and software,Whonix,,http://dds6qkxpwdeubwucdiaord2xgbbeyds25rbsgr73tbfpqpt4a6vjwsyd.onion/wiki/Forcing_.onion_on_whonix.org,index of onion sites,https://www.qubes-os.org/news/2018/01/23/qubes-whonix-next-gen-tor-onion-services/ +tech and software,Whonix,,http://dds6qkxpwdeubwucdiaord2xgbbeyds25rbsgr73tbfpqpt4a6vjwsyd.onion/wiki/Main_page,wiki,https://www.qubes-os.org/news/2018/01/23/qubes-whonix-next-gen-tor-onion-services/ +tech and software,Whonix,,http://forums.dds6qkxpwdeubwucdiaord2xgbbeyds25rbsgr73tbfpqpt4a6vjwsyd.onion/,forums,https://www.qubes-os.org/news/2018/01/23/qubes-whonix-next-gen-tor-onion-services/ +tech and software,Whonix,,http://kkkkkkkkkk63ava6.onion/,,https://www.qubes-os.org/news/2018/01/23/qubes-whonix-next-gen-tor-onion-services/ +web and internet,Archive Today (archive.is),,http://archivecaslytosk.onion/,,https://archive.is/ +web and internet,Cloudflare Public DNS 1.1.1.1,,https://dns4torpnlfs2ifuz2s2yf3fc7rdmsbhm6rw75euj35pac6ap25zgqad.onion/,dns resolver by cloudflare,- +web and internet,DuckDuckGo,,https://3g2upl4pq6kufc4m.onion/,search engine,- +web and internet,Facebook,,https://m.facebookcorewwwi.onion/,mobile site,- +web and internet,Facebook,,https://www.facebookcorewwwi.onion/,desktop site,- +web and internet,Mail2Tor,,http://mail2tor2zyjdctd.onion/,mail gateway,- +web and internet,Protonmail,,https://protonirockerxow.onion/,,- \ No newline at end of file diff --git a/rwos-db.py b/rwos-db.py new file mode 100755 index 0000000..53d3687 --- /dev/null +++ b/rwos-db.py @@ -0,0 +1,260 @@ +#!/usr/bin/env python3 +from datetime import datetime, timezone +from multiprocessing import Pool, Lock +import csv +import datetime as dt +import sqlite3 +import subprocess +import sys +import time + +GLOBAL_DB = None # has to be a global because pickling :-( + +MASTER_CSV = 'master.csv' +DB_FILENAME = 'fetch.sqlite3' +SOCKS_PROXY = 'socks5h://127.0.0.1:9150/' +USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0" +BADNESS = 900 +CURL_TIMEOUT = 120 +RETRY_SLEEP = 60 +PLACEHOLDER = '-' +POOL_WORKERS = 8 +YES = 'y' + +EMOJI_UNSET = ':question:' +EMOJI_2xx = ':white_check_mark:' +EMOJI_3xx = ':arrow_right:' +EMOJI_4xx = ':negative_squared_cross_mark:' +EMOJI_5xx = ':red_circle:' +EMOJI_DEAD = ':sos:' +EMOJI_NO_DATA = ':interrobang:' + +H1 = '#' +H2 = '##' +H3 = '###' +H4 = '####' +B = '*' +BB = ' *' +BBB = ' *' +LINE = '----' + +SCHEMA_SQL = ''' +PRAGMA journal_mode = wal; +PRAGMA foreign_keys = ON; +PRAGMA encoding = "UTF-8"; +BEGIN TRANSACTION; +CREATE TABLE IF NOT EXISTS fetches ( + id INTEGER PRIMARY KEY NOT NULL, + ctime INTEGER DEFAULT (CAST(strftime('%s','now') AS INTEGER)) NOT NULL, + run TEXT NOT NULL, + url TEXT NOT NULL, + attempt INTEGER NOT NULL, + http_code INTEGER NOT NULL, + curl_exit INTEGER NOT NULL, + out TEXT NOT NULL, + err TEXT NOT NULL + ); +PRAGMA user_version = 1; +COMMIT; +''' + +INSERT_SQL = ''' +INSERT INTO +fetches (run, url, attempt, out, err, http_code, curl_exit) +VALUES (:run, :url, :attempt, :out, :err, :http_code, :curl_exit) +''' + +SUMMARY_SQL = ''' +SELECT ctime, attempt, http_code +FROM fetches +WHERE url=:url +ORDER BY ctime DESC +LIMIT :limit +''' + +def extract_hcode(s): # static + if s == None: + return BADNESS + 1 + lines = s.splitlines() + if len(lines) == 0: + return BADNESS + 2 + fields = lines[0].split() + if len(fields) < 2: + return BADNESS + 3 + try: + code = int(fields[1]) + except: + code = BADNESS + 4 + return code + +class Database: + def __init__(self, filename): + self.connection = sqlite3.connect(filename) + self.connection.text_factory = lambda x: unicode(x, UTF8, 'ignore') # ignore bad unicode shit + self.cursor = self.connection.cursor() + self.cursor.executescript(SCHEMA_SQL) + self.now = time.strftime('%Y%m%d%H%M%S', time.gmtime()) + self.lock = Lock() + + def commit(self): + self.connection.commit() + + def close(self): + self.commit() + self.connection.close() + + def summary(self, url, limit=10): + params = { 'url': url, 'limit': limit } + rows = self.cursor.execute(SUMMARY_SQL, params) + return rows.fetchall() + + def insert(self, rowhash): + rowhash['run'] = self.now + self.lock.acquire() # BEGIN PRIVILEGED CODE + self.cursor.execute(INSERT_SQL, rowhash) + self.commit() + self.lock.release() # END PRIVILEGED CODE + +class URL: + def __init__(self, url): + self.url = url + self.attempt = 0 + self.last_code = None + + def fetch1(self): + args = [ 'curl', '--head', '--user-agent', USER_AGENT, '--proxy', SOCKS_PROXY, self.url ] + try: + p = subprocess.Popen(args, stdin=subprocess.DEVNULL, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) + (out, err) = p.communicate(timeout=CURL_TIMEOUT) + hcode = extract_hcode(out) + if hcode == 200: err = PLACEHOLDER + ecode = p.returncode + except subprocess.TimeoutExpired as e: + (out, err) = (PLACEHOLDER, str(e)) + hcode = BADNESS + 10 + ecode = BADNESS + 10 + self.last_code = hcode + self.attempt += 1 + GLOBAL_DB.insert(dict( + url=self.url, + attempt=self.attempt, + out=out, + err=err, + http_code=hcode, + curl_exit=ecode, + )) + + def fetchwrap(self): + for i in [ 1, 2, 3 ]: + self.fetch1() + print('try{0}: {1} {2}'.format(i, self.url, self.last_code)) + if self.last_code < BADNESS: return + time.sleep(RETRY_SLEEP) + +def placeholder(s): + if s == '': return PLACEHOLDER + if s == None: return PLACEHOLDER + return s + +def caps(s): + return ' '.join([w.capitalize() for w in s.lower().split()]) + +def get_categories(chunk): + return sorted(set([x['category'] for x in chunk])) + +def get_placeholder(row, k): + return placeholder(row.get(k, '')) + +def sort_using(chunk, k): + return sorted(chunk, key=lambda x: x[k]) + +def grep_using(chunk, k, v, invert=False): + if invert: + return [ x for x in chunk if x.get(k, '') != v ] + else: + return [ x for x in chunk if x.get(k, '') == v ] + +def get_proof(row): + url = get_placeholder(row, 'proof_url') + if url == '-': return 'proof to be done' + if url == 'ssl': return 'check tls/ssl certificate' + return '[proof link]({})'.format(url) + +def get_summary(url): + rows = GLOBAL_DB.summary(url) + if len(rows) == 0: + return EMOJI_NO_DATA + result = [] + for when, attempt, code in rows: + emoji = EMOJI_UNSET + if code >= 200 and code < 300: + emoji = EMOJI_2xx + elif code >= 300 and code < 400: + emoji = EMOJI_3xx + elif code >= 400 and code < 500: + emoji = EMOJI_4xx + elif code >= 500 and code < 600: + emoji = EMOJI_5xx + elif code >= BADNESS: + emoji = EMOJI_DEAD + t = datetime.fromtimestamp(when, timezone.utc) + result.append('{0} attempt={1} code={2} time={3}'.format(emoji, attempt, code, t)) + return result + +def print_chunk(chunk, title, print_bar=True): + print(LINE) + print(H2, caps(title)) + print() + for row in sort_using(chunk, 'site_name'): + print(H3, '[{site_name}]({onion_url})'.format(**row)) + comment = get_placeholder(row, 'comment') + if comment != '-': + print(B, '*{}*'.format(comment)) + # print proof unconditionally, as encouragement to fix it + print(B, '*{}*'.format(get_proof(row))) + if print_bar: + for foo in get_summary(row['onion_url']): + print(BB, foo) + print() + +def poolhook(x): + x.fetchwrap() + +def do_fetch(master): + chunk = grep_using(master, 'flaky', YES, invert=True) + work = [ URL(x['onion_url']) for x in chunk ] + with Pool(POOL_WORKERS) as p: p.map(poolhook, work) + +def print_index(cats): + print(LINE) + print(H1, 'Index') + print() + for cat in cats: + print(B, '[{0}](#{1})'.format(caps(cat), cat.lower().replace(' ', '-'))) + print() + +def do_print(master): + cats = get_categories(master) + print_index(cats) + for cat in cats: + chunk = grep_using(master, 'category', cat) + chunk = grep_using(chunk, 'flaky', YES, invert=True) + print_chunk(chunk, cat) + flaky = grep_using(master, 'flaky', YES) + print_chunk(flaky, 'Flaky Sites', print_bar=False) + +if __name__ == '__main__': + master = None + + # csv: category, site_name, flaky, onion_url, comment, proof_url + with open(MASTER_CSV, 'r') as fh: + dr = csv.DictReader(fh) + master = [ x for x in dr ] + + GLOBAL_DB = Database(DB_FILENAME) + + for arg in sys.argv[1:]: + if arg == 'fetch': do_fetch(master) + if arg == 'print': do_print(master) + + GLOBAL_DB.close() diff --git a/wrapper.sh b/wrapper.sh new file mode 100755 index 0000000..2c6cadc --- /dev/null +++ b/wrapper.sh @@ -0,0 +1,32 @@ +#!/bin/sh + +url="https://docs.google.com/spreadsheets/d/e/2PACX-1vRjEEqZ2bGYQcvTvWqJfNvw_NCTrcIM9C2GzriqGyEfz_8C9ZAj2c9gaR6ew6u4X-qRsYxgeD_zZMxD/pub?gid=0&single=true&output=csv" +now=`date "+%Y%m%d%H%M%S"` +out="log-$now.out.txt" +err="log-$now.err.txt" +tmp="/tmp/onion-tmp-$$.csv" +csv="master.csv" +exe="./rwos-db.py" + +exec $out 2>$err + +set -x + +curl "$url" > $tmp || exit 1 + +if [ -s $tmp ] ; then + cmp $tmp $csv || cp $tmp $csv +fi + +$exe fetch || exit 1 + +( + cat 01-preamble.md + echo "" + $exe print || exit 1 + echo "" + cat 02-footnotes.md + echo "" +) > README.md + +exit 0