mirror of
https://github.com/alecmuffett/real-world-onion-sites.git
synced 2024-10-01 01:06:18 -04:00
commit: first test run of new code
This commit is contained in:
parent
f9895057a7
commit
800f46e278
3
.gitignore
vendored
3
.gitignore
vendored
@ -1,2 +1,5 @@
|
|||||||
*~
|
*~
|
||||||
.DS_Store
|
.DS_Store
|
||||||
|
*.sqlite3
|
||||||
|
*.sqlite3-*
|
||||||
|
log*.txt
|
||||||
|
24
Makefile
24
Makefile
@ -1,15 +1,17 @@
|
|||||||
|
DB=fetch.sqlite3
|
||||||
|
|
||||||
all:
|
all:
|
||||||
git pull
|
-echo "make what?"
|
||||||
./checker.sh
|
|
||||||
git pull
|
run:
|
||||||
( cat 01-preamble.md ; perl walk.pl ; cat 02-footnotes.md ) > README.md
|
./wrapper.sh
|
||||||
./get-ct-logs.sh
|
|
||||||
git add .
|
|
||||||
git commit -m "auto-update on `date`"
|
|
||||||
git push
|
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm *~
|
-rm *~
|
||||||
|
-rm log*.txt
|
||||||
|
|
||||||
wat:
|
db:
|
||||||
git diff HEAD^
|
sqlite3 $(DB)
|
||||||
|
|
||||||
|
db-nuke: clean
|
||||||
|
-rm $(DB) $(DB)-*
|
||||||
|
15
Makefile,old
Normal file
15
Makefile,old
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
all:
|
||||||
|
git pull
|
||||||
|
./checker.sh
|
||||||
|
git pull
|
||||||
|
( cat 01-preamble.md ; perl walk.pl ; cat 02-footnotes.md ) > README.md
|
||||||
|
./get-ct-logs.sh
|
||||||
|
git add .
|
||||||
|
git commit -m "auto-update on `date`"
|
||||||
|
git push
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm *~
|
||||||
|
|
||||||
|
wat:
|
||||||
|
git diff HEAD^
|
107
master.csv
Normal file
107
master.csv
Normal file
@ -0,0 +1,107 @@
|
|||||||
|
category,site_name,flaky,onion_url,comment,proof_url
|
||||||
|
civil society and community,Privacy International,,https://privacyintyqcroe.onion/,,-
|
||||||
|
civil society and community,Riseup,,http://5jp7xtmox6jyoqd5.onion/,riseup etherpad,-
|
||||||
|
civil society and community,Riseup,,http://6zc6sejeho3fwrd4.onion/,riseup file share & pastebin,-
|
||||||
|
civil society and community,Riseup,,http://j6uhdvbhz74oefxf.onion/,riseup user admin,-
|
||||||
|
civil society and community,Riseup,,http://nzh3fv6jc6jskki3.onion/,riseup main site,-
|
||||||
|
civil society and community,Riseup,,http://nzh3fv6jc6jskki3.onion/en/security/network-security/tor#riseups-tor-hidden-services,riseup index of onion sites,-
|
||||||
|
civil society and community,Riseup,,http://xpgylzydxykgdqyg.onion/,riseup lists,-
|
||||||
|
civil society and community,Riseup,,http://zsolxunfmbfuq7wf.onion/rc/,riseup mail,-
|
||||||
|
companies and services,decoded:Legal,,http://decodedsbwzj4nhq.onion/,english law firm,-
|
||||||
|
companies and services,decoded:Legal,,http://dlegal66uj5u2dvcbrev7vv6fjtwnd4moqu7j6jnd42rmbypv3coigyd.onion/,v3 address,-
|
||||||
|
globaleaks,Afrileaks,,http://wcnueib4qrsm544n.onion/,,https://www.afrileaks.org/
|
||||||
|
globaleaks,ALAT / Allerta AntiCorruzione,,http://fkut2p37apcg6l7f.onion/,italian whistleblowing,https://allertaanticorruzione.transparency.it/servizio-alac/
|
||||||
|
globaleaks,Atlatszo MagyarLeaks,,http://ak2uqfavwgmjrvtu.onion/,hungarian leaks,https://atlatszo.hu/magyarleaks/
|
||||||
|
globaleaks,Bezkorupce.cz,,http://iopx5pchfdldldwp.onion/,czech anticorruption reporting site,https://secure.bezkorupce.cz/
|
||||||
|
globaleaks,IRPILeaks,,http://5r4bjnjug3apqdii.onion/,italian investigative reporting project,https://irpi.eu/en/leaks/how-irpileaks-works/
|
||||||
|
globaleaks,Mexico Leaks,,http://kjpkmlafh2ra57wz.onion/,,https://mexicoleaks.mx/
|
||||||
|
globaleaks,Pistaljka.rs Whistleblowing,,http://acabtd4btrxjjrvr.onion/#/,,https://pistaljka.rs/
|
||||||
|
globaleaks,Wildleaks,,http://ppdz5djzpo3w5k2z.onion/,elephant action league,https://www.wildleaks.org/the-technology/
|
||||||
|
globaleaks,XNet Activism,,http://ztjn5gcdsqeqzmw4.onion/,anticorruption whistleblowing,https://xnet-x.net/en/xnetleaks/
|
||||||
|
government,US Central Intelligence Agency,,http://ciadotgov4sjwlzihbbgxnqg3xiyrg7so2r2o3lt5wz5ypk4sxyjstad.onion/index.html,,https://www.cia.gov/news-information/featured-story-archive/2019-featured-story-archive/latest-layer-an-onion-site.html
|
||||||
|
news and media,BBC News,,https://www.bbcnewsv2vjtpsuy.onion/,,https://www.bbc.co.uk/news/technology-50150981
|
||||||
|
news and media,BuzzFeed News,,https://bfnews3u2ox4m4ty.onion/,,ssl
|
||||||
|
news and media,ProPublica,,https://p53lf57qovyuvwsc6xnrppyply3vtqm7l6pcobkmyqsiofyeznfu5uqd.onion/,,ssl
|
||||||
|
news and media,ProPublica,,https://www.propub3r6espa33w.onion/,,-
|
||||||
|
news and media,The New York Times,,https://mobile.nytimes3xbfgragh.onion/,mobile site,-
|
||||||
|
news and media,The New York Times,,https://www.nytimes3xbfgragh.onion/,,-
|
||||||
|
securedrop for individuals,Barton Gellman,,http://mqddpn6yt4f5uqei.onion/,,https://github.com/b4rton/securedrop
|
||||||
|
securedrop for individuals,Jean-Marc Manach,,http://32qfx2skzcifeyg7.onion/,,https://jean-marc.manach.net/securedrop.htm
|
||||||
|
securedrop for organisations,Adresseavisen,,http://xpx3m5hcnrkds5wg.onion/,,https://securedrop.adressa.no/
|
||||||
|
securedrop for organisations,Aftenposten,,http://bocl4xqbak4xvlh4.onion/,,https://www.aftenposten.no/securedrop/
|
||||||
|
securedrop for organisations,Aftonbladet,,https://y27vf7g2ce5g3fnl.onion/,,ssl
|
||||||
|
securedrop for organisations,Apache,,http://zdf4nikyuswdzbt6.onion/,,https://www.apache.be/securedrop
|
||||||
|
securedrop for organisations,Associated Press,,http://3expgpdnrrzezf7r.onion/,,https://www.ap.org/tips/
|
||||||
|
securedrop for organisations,Bergens Tidende,,http://mxrrw2l3g5dyhgzn.onion/,,https://www.bt.no/securedrop/
|
||||||
|
securedrop for organisations,Bloomberg News,,http://m4hynbhhctdk27jr.onion/,,https://www.bloomberg.com/tips
|
||||||
|
securedrop for organisations,Business Insider,,http://doaxi7t7lkctvq5i.onion/,,https://www.businessinsider.com/how-to-tip-business-insider-securely-guide-signal-securedrop-2017-6
|
||||||
|
securedrop for organisations,CBC / Canadian Broadcasting Corporation,,http://ad2ztmbv5vmbj7ic.onion/,,https://securedrop.cbc.ca/
|
||||||
|
securedrop for organisations,Coworker.org,,http://no4gurk7efg4abwv.onion/,,https://home.coworker.org/contact/
|
||||||
|
securedrop for organisations,Dagbladet,,http://mz33367mcdrcdi7s.onion/,,https://securedrop.dagbladet.no/
|
||||||
|
securedrop for organisations,Fairfax Media Group (SMH et al.),,http://ipfhnseo4hgfw5mg.onion/,,https://securedrop.fairfax.com.au/
|
||||||
|
securedrop for organisations,Field of Vision,,http://fovisionunz7mtxw.onion/,,https://fieldofvision.org/securedrop
|
||||||
|
securedrop for organisations,Financial Times,,http://xdm7flvwt3uvsrrd.onion/,,https://www.ft.com/news-tips/
|
||||||
|
securedrop for organisations,Forbes,,http://t5pv5o4t6jyjilp6.onion/,,https://www.forbes.com/fdc/securedrop.html
|
||||||
|
securedrop for organisations,Forbidden Stories,,http://w7t5f3u4mej6dvpt.onion/,,https://forbiddenstories.org/protect-your-stories/
|
||||||
|
securedrop for organisations,Globe and Mail (Toronto),,http://sml5wmpuq7ifq2mh.onion/,,https://sec.theglobeandmail.com/securedrop/
|
||||||
|
securedrop for organisations,Greenpeace New Zealand,,http://ll6edwtpfl3zdwoi.onion/,,https://www.safesource.org.nz
|
||||||
|
securedrop for organisations,Guardian,,http://33y6fjyhs3phzfjj.onion/,,https://www.theguardian.com/securedrop
|
||||||
|
securedrop for organisations,Heise Investigativ,,http://sq4lecqyx4izcpkp.onion/,,https://www.heise.de/investigativ/briefkasten/
|
||||||
|
securedrop for organisations,Houston Chronicle,,http://ibnfpppyydd6mg46.onion/,,https://newstips.houstonchronicle.com/
|
||||||
|
securedrop for organisations,HuffPost,,http://rbugf2rz5lmjbfun.onion/,,https://img.huffingtonpost.com/securedrop
|
||||||
|
securedrop for organisations,ICIJ / International Consortium of Investigative Journalists,,http://lzpczap7l3zxu7zv.onion/,,https://www.icij.org/securedrop
|
||||||
|
securedrop for organisations,Intercept,,http://intrcept32ncblef.onion/,,https://theintercept.com/securedrop/
|
||||||
|
securedrop for organisations,KUOW Public Radio,,http://hcxmf67v3ltykmww.onion/,,https://medium.com/@kuow/how-whistleblowers-can-contact-kuow-3ed089e21d30
|
||||||
|
securedrop for organisations,Lucy Parsons Labs (Chicago),,http://qn4qfeeslglmwxgb.onion/,,https://lucyparsonslabs.com/securedrop
|
||||||
|
securedrop for organisations,McClatchy DC,,http://zafull3et6muayeh.onion/,,https://www.mcclatchydc.com/customer-service/contact-us/
|
||||||
|
securedrop for organisations,Meduza,,http://xwt2mqq64h63ydp5.onion/,,https://meduza.io/cards/u-menya-est-vazhnaya-informatsiya-dlya-meduzy-no-ya-boyus-ee-peredavat-kak-sdelat-eto-po-nastoyaschemu-anonimno
|
||||||
|
securedrop for organisations,Morgenbladet,,http://g4wmrmqxpj5bnvml.onion/,,https://morgenbladet.no/varsle
|
||||||
|
securedrop for organisations,MormonLeaks,,http://efeip5ekoqi4upkz.onion/,,https://mormonleaks.io/
|
||||||
|
securedrop for organisations,New York Times,,https://nyttips4bmquxfzw.onion/,,https://www.nytimes.com/newsgraphics/2016/news-tips/#securedrop
|
||||||
|
securedrop for organisations,New Yorker,,http://icpozbs6r6yrwt67.onion/,,https://projects.newyorker.com/securedrop/
|
||||||
|
securedrop for organisations,NPR,,http://5ha7oig7du2jeyer.onion/,,https://help.npr.org/customer/en/portal/articles/2860413-got-a-confidential-news-tip
|
||||||
|
securedrop for organisations,NRK,,http://nrkvarslekidu2uz.onion/,,https://www.nrk.no/varsle/
|
||||||
|
securedrop for organisations,Politico,,http://mq2du34rci6arhbd.onion/,,https://www.politico.com/news-tips/
|
||||||
|
securedrop for organisations,Public Intelligence,,http://arujlhu2zjjhc3bw.onion/,,https://publicintelligence.net/contribute/
|
||||||
|
securedrop for organisations,Radio-Canada,,http://w5jfqhep2jbypkek.onion/,,https://sourceanonyme.radio-canada.ca
|
||||||
|
securedrop for organisations,Reuters,,http://smb7p276iht3i2fj.onion/,,https://www.reuters.com/investigates/special-report/tips/
|
||||||
|
securedrop for organisations,RISE Moldova,,http://6lhmirnluwmvjw4z.onion/,,https://www.rise.md/leaks/
|
||||||
|
securedrop for organisations,San Francisco Chronicle,,http://nrwvazcz6figxpg5.onion/,,https://newstips.sfchronicle.com/
|
||||||
|
securedrop for organisations,Svenska Dagbladet,,http://cnhuql7wj2ga5iv7.onion/,,https://www.svd.se/securedrop/
|
||||||
|
securedrop for organisations,The Atlantic,,http://s6xle2dgrsqcxiwb.onion/,,https://www.theatlantic.com/tips/
|
||||||
|
securedrop for organisations,The Daily Beast,,http://bcwyjiwj25t44it6.onion/,,https://www.thedailybeast.com/tips
|
||||||
|
securedrop for organisations,The Telegraph,,http://ldbtuktejbkg227d.onion/,,https://www.telegraph.co.uk/news/investigations/contact-us/
|
||||||
|
securedrop for organisations,The Verge; Racked; Eater,,http://2xat73hlwcpwo2zy.onion/,,https://apps.voxmedia.com/verge-tips/
|
||||||
|
securedrop for organisations,USA Today,,https://usatodayw7vu5egc.onion/,,https://newstips.usatoday.com/securedrop.html
|
||||||
|
securedrop for organisations,VG / Verdens Gang,,http://vgnettwin5lyl4yr.onion/,,https://securedrop.vg.no/
|
||||||
|
securedrop for organisations,VICE Media,,http://e3v3x57ykz25uvij.onion/,,https://news.vice.com/securedrop/
|
||||||
|
securedrop for organisations,Wall Street Journal,,http://z5duvjw7ztnuc6fg.onion/,,https://www.wsj.com/tips
|
||||||
|
securedrop for organisations,Washington Post,,https://jcw5q6uyjioupxcc.onion/,,https://www.washingtonpost.com/securedrop/
|
||||||
|
securedrop for organisations,Whistleblower Aid,,http://uwd57qermcote3au.onion/,,https://whistlebloweraid.org/contact/instructions/
|
||||||
|
securedrop for organisations,Wired,,http://k5ri3fdr232d36nb.onion/,,https://www.wired.com/securedrop/
|
||||||
|
tech and software,Ablative Hosting,,https://hzwjmjimhr7bdmfv2doll4upibt5ojjmpo3pbp5ctwcg37n3hyk7qzid.onion/,,ssl
|
||||||
|
tech and software,Debian,,http://5nca3wxl33tzlzj5.onion/,index of onion sites,https://onion.debian.org/
|
||||||
|
tech and software,Debian,,http://sejnfjrq6szgca7v.onion/,main site,-
|
||||||
|
tech and software,ExpressVPN,,http://expressobutiolem.onion/,,-
|
||||||
|
tech and software,Hardened BSD,,http://3jkjhrvkdbdkqisnwhdpe4afh2j2g3suhsfcewiemsyk5ecd6gadmxyd.onion/,,https://hardenedbsd.org/article/shawn-webb/2017-03-11/hardenedbsd-through-tor-hidden-service
|
||||||
|
tech and software,Hardened BSD,,http://dxsj6ifxytlgq33k.onion/,,https://hardenedbsd.org/article/shawn-webb/2017-03-11/hardenedbsd-through-tor-hidden-service
|
||||||
|
tech and software,keybase.io,,http://fncuwbiisyh6ak3i.onion/,,-
|
||||||
|
tech and software,keybase.io,,http://keybase5wmilwokqirssclfnsqrjdsi7jdir5wy7y7iu3tanwmtp6oid.onion/,,https://keybase.io/docs/command_line/tor
|
||||||
|
tech and software,Mailpile,,http://clgs64523yi2bkhz.onion/,,-
|
||||||
|
tech and software,OnionShare,,http://lldan5gahapx5k7iafb3s4ikijc4ni7gx5iywdflkba5y2ezyg6sjgyd.onion/,,https://onionshare.org/
|
||||||
|
tech and software,Qubes OS,,http://qubesos4rrrrz6n4.onion/,,https://www.qubes-os.org/news/2018/01/23/qubes-whonix-next-gen-tor-onion-services/
|
||||||
|
tech and software,Qubes OS,,http://sik5nlgfc5qylnnsr57qrbm64zbdx6t4lreyhpon3ychmxmiem7tioad.onion/,,https://www.qubes-os.org/news/2018/01/23/qubes-whonix-next-gen-tor-onion-services/
|
||||||
|
tech and software,Tor Project,,http://expyuzz4wqqyqhjn.onion/,main site,-
|
||||||
|
tech and software,Tor Project,,http://yz7lpwfhhzcdyc5y.onion/,index of onion sites,https://onion.torproject.org/
|
||||||
|
tech and software,Whonix,,http://dds6qkxpwdeubwucdiaord2xgbbeyds25rbsgr73tbfpqpt4a6vjwsyd.onion/,main page,https://www.qubes-os.org/news/2018/01/23/qubes-whonix-next-gen-tor-onion-services/
|
||||||
|
tech and software,Whonix,,http://dds6qkxpwdeubwucdiaord2xgbbeyds25rbsgr73tbfpqpt4a6vjwsyd.onion/wiki/Forcing_.onion_on_whonix.org,index of onion sites,https://www.qubes-os.org/news/2018/01/23/qubes-whonix-next-gen-tor-onion-services/
|
||||||
|
tech and software,Whonix,,http://dds6qkxpwdeubwucdiaord2xgbbeyds25rbsgr73tbfpqpt4a6vjwsyd.onion/wiki/Main_page,wiki,https://www.qubes-os.org/news/2018/01/23/qubes-whonix-next-gen-tor-onion-services/
|
||||||
|
tech and software,Whonix,,http://forums.dds6qkxpwdeubwucdiaord2xgbbeyds25rbsgr73tbfpqpt4a6vjwsyd.onion/,forums,https://www.qubes-os.org/news/2018/01/23/qubes-whonix-next-gen-tor-onion-services/
|
||||||
|
tech and software,Whonix,,http://kkkkkkkkkk63ava6.onion/,,https://www.qubes-os.org/news/2018/01/23/qubes-whonix-next-gen-tor-onion-services/
|
||||||
|
web and internet,Archive Today (archive.is),,http://archivecaslytosk.onion/,,https://archive.is/
|
||||||
|
web and internet,Cloudflare Public DNS 1.1.1.1,,https://dns4torpnlfs2ifuz2s2yf3fc7rdmsbhm6rw75euj35pac6ap25zgqad.onion/,dns resolver by cloudflare,-
|
||||||
|
web and internet,DuckDuckGo,,https://3g2upl4pq6kufc4m.onion/,search engine,-
|
||||||
|
web and internet,Facebook,,https://m.facebookcorewwwi.onion/,mobile site,-
|
||||||
|
web and internet,Facebook,,https://www.facebookcorewwwi.onion/,desktop site,-
|
||||||
|
web and internet,Mail2Tor,,http://mail2tor2zyjdctd.onion/,mail gateway,-
|
||||||
|
web and internet,Protonmail,,https://protonirockerxow.onion/,,-
|
|
260
rwos-db.py
Executable file
260
rwos-db.py
Executable file
@ -0,0 +1,260 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from multiprocessing import Pool, Lock
|
||||||
|
import csv
|
||||||
|
import datetime as dt
|
||||||
|
import sqlite3
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
|
||||||
|
GLOBAL_DB = None # has to be a global because pickling :-(
|
||||||
|
|
||||||
|
MASTER_CSV = 'master.csv'
|
||||||
|
DB_FILENAME = 'fetch.sqlite3'
|
||||||
|
SOCKS_PROXY = 'socks5h://127.0.0.1:9150/'
|
||||||
|
USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0"
|
||||||
|
BADNESS = 900
|
||||||
|
CURL_TIMEOUT = 120
|
||||||
|
RETRY_SLEEP = 60
|
||||||
|
PLACEHOLDER = '-'
|
||||||
|
POOL_WORKERS = 8
|
||||||
|
YES = 'y'
|
||||||
|
|
||||||
|
EMOJI_UNSET = ':question:'
|
||||||
|
EMOJI_2xx = ':white_check_mark:'
|
||||||
|
EMOJI_3xx = ':arrow_right:'
|
||||||
|
EMOJI_4xx = ':negative_squared_cross_mark:'
|
||||||
|
EMOJI_5xx = ':red_circle:'
|
||||||
|
EMOJI_DEAD = ':sos:'
|
||||||
|
EMOJI_NO_DATA = ':interrobang:'
|
||||||
|
|
||||||
|
H1 = '#'
|
||||||
|
H2 = '##'
|
||||||
|
H3 = '###'
|
||||||
|
H4 = '####'
|
||||||
|
B = '*'
|
||||||
|
BB = ' *'
|
||||||
|
BBB = ' *'
|
||||||
|
LINE = '----'
|
||||||
|
|
||||||
|
SCHEMA_SQL = '''
|
||||||
|
PRAGMA journal_mode = wal;
|
||||||
|
PRAGMA foreign_keys = ON;
|
||||||
|
PRAGMA encoding = "UTF-8";
|
||||||
|
BEGIN TRANSACTION;
|
||||||
|
CREATE TABLE IF NOT EXISTS fetches (
|
||||||
|
id INTEGER PRIMARY KEY NOT NULL,
|
||||||
|
ctime INTEGER DEFAULT (CAST(strftime('%s','now') AS INTEGER)) NOT NULL,
|
||||||
|
run TEXT NOT NULL,
|
||||||
|
url TEXT NOT NULL,
|
||||||
|
attempt INTEGER NOT NULL,
|
||||||
|
http_code INTEGER NOT NULL,
|
||||||
|
curl_exit INTEGER NOT NULL,
|
||||||
|
out TEXT NOT NULL,
|
||||||
|
err TEXT NOT NULL
|
||||||
|
);
|
||||||
|
PRAGMA user_version = 1;
|
||||||
|
COMMIT;
|
||||||
|
'''
|
||||||
|
|
||||||
|
INSERT_SQL = '''
|
||||||
|
INSERT INTO
|
||||||
|
fetches (run, url, attempt, out, err, http_code, curl_exit)
|
||||||
|
VALUES (:run, :url, :attempt, :out, :err, :http_code, :curl_exit)
|
||||||
|
'''
|
||||||
|
|
||||||
|
SUMMARY_SQL = '''
|
||||||
|
SELECT ctime, attempt, http_code
|
||||||
|
FROM fetches
|
||||||
|
WHERE url=:url
|
||||||
|
ORDER BY ctime DESC
|
||||||
|
LIMIT :limit
|
||||||
|
'''
|
||||||
|
|
||||||
|
def extract_hcode(s): # static
|
||||||
|
if s == None:
|
||||||
|
return BADNESS + 1
|
||||||
|
lines = s.splitlines()
|
||||||
|
if len(lines) == 0:
|
||||||
|
return BADNESS + 2
|
||||||
|
fields = lines[0].split()
|
||||||
|
if len(fields) < 2:
|
||||||
|
return BADNESS + 3
|
||||||
|
try:
|
||||||
|
code = int(fields[1])
|
||||||
|
except:
|
||||||
|
code = BADNESS + 4
|
||||||
|
return code
|
||||||
|
|
||||||
|
class Database:
|
||||||
|
def __init__(self, filename):
|
||||||
|
self.connection = sqlite3.connect(filename)
|
||||||
|
self.connection.text_factory = lambda x: unicode(x, UTF8, 'ignore') # ignore bad unicode shit
|
||||||
|
self.cursor = self.connection.cursor()
|
||||||
|
self.cursor.executescript(SCHEMA_SQL)
|
||||||
|
self.now = time.strftime('%Y%m%d%H%M%S', time.gmtime())
|
||||||
|
self.lock = Lock()
|
||||||
|
|
||||||
|
def commit(self):
|
||||||
|
self.connection.commit()
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
self.commit()
|
||||||
|
self.connection.close()
|
||||||
|
|
||||||
|
def summary(self, url, limit=10):
|
||||||
|
params = { 'url': url, 'limit': limit }
|
||||||
|
rows = self.cursor.execute(SUMMARY_SQL, params)
|
||||||
|
return rows.fetchall()
|
||||||
|
|
||||||
|
def insert(self, rowhash):
|
||||||
|
rowhash['run'] = self.now
|
||||||
|
self.lock.acquire() # BEGIN PRIVILEGED CODE
|
||||||
|
self.cursor.execute(INSERT_SQL, rowhash)
|
||||||
|
self.commit()
|
||||||
|
self.lock.release() # END PRIVILEGED CODE
|
||||||
|
|
||||||
|
class URL:
|
||||||
|
def __init__(self, url):
|
||||||
|
self.url = url
|
||||||
|
self.attempt = 0
|
||||||
|
self.last_code = None
|
||||||
|
|
||||||
|
def fetch1(self):
|
||||||
|
args = [ 'curl', '--head', '--user-agent', USER_AGENT, '--proxy', SOCKS_PROXY, self.url ]
|
||||||
|
try:
|
||||||
|
p = subprocess.Popen(args, stdin=subprocess.DEVNULL, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
||||||
|
(out, err) = p.communicate(timeout=CURL_TIMEOUT)
|
||||||
|
hcode = extract_hcode(out)
|
||||||
|
if hcode == 200: err = PLACEHOLDER
|
||||||
|
ecode = p.returncode
|
||||||
|
except subprocess.TimeoutExpired as e:
|
||||||
|
(out, err) = (PLACEHOLDER, str(e))
|
||||||
|
hcode = BADNESS + 10
|
||||||
|
ecode = BADNESS + 10
|
||||||
|
self.last_code = hcode
|
||||||
|
self.attempt += 1
|
||||||
|
GLOBAL_DB.insert(dict(
|
||||||
|
url=self.url,
|
||||||
|
attempt=self.attempt,
|
||||||
|
out=out,
|
||||||
|
err=err,
|
||||||
|
http_code=hcode,
|
||||||
|
curl_exit=ecode,
|
||||||
|
))
|
||||||
|
|
||||||
|
def fetchwrap(self):
|
||||||
|
for i in [ 1, 2, 3 ]:
|
||||||
|
self.fetch1()
|
||||||
|
print('try{0}: {1} {2}'.format(i, self.url, self.last_code))
|
||||||
|
if self.last_code < BADNESS: return
|
||||||
|
time.sleep(RETRY_SLEEP)
|
||||||
|
|
||||||
|
def placeholder(s):
|
||||||
|
if s == '': return PLACEHOLDER
|
||||||
|
if s == None: return PLACEHOLDER
|
||||||
|
return s
|
||||||
|
|
||||||
|
def caps(s):
|
||||||
|
return ' '.join([w.capitalize() for w in s.lower().split()])
|
||||||
|
|
||||||
|
def get_categories(chunk):
|
||||||
|
return sorted(set([x['category'] for x in chunk]))
|
||||||
|
|
||||||
|
def get_placeholder(row, k):
|
||||||
|
return placeholder(row.get(k, ''))
|
||||||
|
|
||||||
|
def sort_using(chunk, k):
|
||||||
|
return sorted(chunk, key=lambda x: x[k])
|
||||||
|
|
||||||
|
def grep_using(chunk, k, v, invert=False):
|
||||||
|
if invert:
|
||||||
|
return [ x for x in chunk if x.get(k, '') != v ]
|
||||||
|
else:
|
||||||
|
return [ x for x in chunk if x.get(k, '') == v ]
|
||||||
|
|
||||||
|
def get_proof(row):
|
||||||
|
url = get_placeholder(row, 'proof_url')
|
||||||
|
if url == '-': return 'proof to be done'
|
||||||
|
if url == 'ssl': return 'check tls/ssl certificate'
|
||||||
|
return '[proof link]({})'.format(url)
|
||||||
|
|
||||||
|
def get_summary(url):
|
||||||
|
rows = GLOBAL_DB.summary(url)
|
||||||
|
if len(rows) == 0:
|
||||||
|
return EMOJI_NO_DATA
|
||||||
|
result = []
|
||||||
|
for when, attempt, code in rows:
|
||||||
|
emoji = EMOJI_UNSET
|
||||||
|
if code >= 200 and code < 300:
|
||||||
|
emoji = EMOJI_2xx
|
||||||
|
elif code >= 300 and code < 400:
|
||||||
|
emoji = EMOJI_3xx
|
||||||
|
elif code >= 400 and code < 500:
|
||||||
|
emoji = EMOJI_4xx
|
||||||
|
elif code >= 500 and code < 600:
|
||||||
|
emoji = EMOJI_5xx
|
||||||
|
elif code >= BADNESS:
|
||||||
|
emoji = EMOJI_DEAD
|
||||||
|
t = datetime.fromtimestamp(when, timezone.utc)
|
||||||
|
result.append('{0} attempt={1} code={2} time={3}'.format(emoji, attempt, code, t))
|
||||||
|
return result
|
||||||
|
|
||||||
|
def print_chunk(chunk, title, print_bar=True):
|
||||||
|
print(LINE)
|
||||||
|
print(H2, caps(title))
|
||||||
|
print()
|
||||||
|
for row in sort_using(chunk, 'site_name'):
|
||||||
|
print(H3, '[{site_name}]({onion_url})'.format(**row))
|
||||||
|
comment = get_placeholder(row, 'comment')
|
||||||
|
if comment != '-':
|
||||||
|
print(B, '*{}*'.format(comment))
|
||||||
|
# print proof unconditionally, as encouragement to fix it
|
||||||
|
print(B, '*{}*'.format(get_proof(row)))
|
||||||
|
if print_bar:
|
||||||
|
for foo in get_summary(row['onion_url']):
|
||||||
|
print(BB, foo)
|
||||||
|
print()
|
||||||
|
|
||||||
|
def poolhook(x):
|
||||||
|
x.fetchwrap()
|
||||||
|
|
||||||
|
def do_fetch(master):
|
||||||
|
chunk = grep_using(master, 'flaky', YES, invert=True)
|
||||||
|
work = [ URL(x['onion_url']) for x in chunk ]
|
||||||
|
with Pool(POOL_WORKERS) as p: p.map(poolhook, work)
|
||||||
|
|
||||||
|
def print_index(cats):
|
||||||
|
print(LINE)
|
||||||
|
print(H1, 'Index')
|
||||||
|
print()
|
||||||
|
for cat in cats:
|
||||||
|
print(B, '[{0}](#{1})'.format(caps(cat), cat.lower().replace(' ', '-')))
|
||||||
|
print()
|
||||||
|
|
||||||
|
def do_print(master):
|
||||||
|
cats = get_categories(master)
|
||||||
|
print_index(cats)
|
||||||
|
for cat in cats:
|
||||||
|
chunk = grep_using(master, 'category', cat)
|
||||||
|
chunk = grep_using(chunk, 'flaky', YES, invert=True)
|
||||||
|
print_chunk(chunk, cat)
|
||||||
|
flaky = grep_using(master, 'flaky', YES)
|
||||||
|
print_chunk(flaky, 'Flaky Sites', print_bar=False)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
master = None
|
||||||
|
|
||||||
|
# csv: category, site_name, flaky, onion_url, comment, proof_url
|
||||||
|
with open(MASTER_CSV, 'r') as fh:
|
||||||
|
dr = csv.DictReader(fh)
|
||||||
|
master = [ x for x in dr ]
|
||||||
|
|
||||||
|
GLOBAL_DB = Database(DB_FILENAME)
|
||||||
|
|
||||||
|
for arg in sys.argv[1:]:
|
||||||
|
if arg == 'fetch': do_fetch(master)
|
||||||
|
if arg == 'print': do_print(master)
|
||||||
|
|
||||||
|
GLOBAL_DB.close()
|
32
wrapper.sh
Executable file
32
wrapper.sh
Executable file
@ -0,0 +1,32 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
url="https://docs.google.com/spreadsheets/d/e/2PACX-1vRjEEqZ2bGYQcvTvWqJfNvw_NCTrcIM9C2GzriqGyEfz_8C9ZAj2c9gaR6ew6u4X-qRsYxgeD_zZMxD/pub?gid=0&single=true&output=csv"
|
||||||
|
now=`date "+%Y%m%d%H%M%S"`
|
||||||
|
out="log-$now.out.txt"
|
||||||
|
err="log-$now.err.txt"
|
||||||
|
tmp="/tmp/onion-tmp-$$.csv"
|
||||||
|
csv="master.csv"
|
||||||
|
exe="./rwos-db.py"
|
||||||
|
|
||||||
|
exec </dev/null >$out 2>$err
|
||||||
|
|
||||||
|
set -x
|
||||||
|
|
||||||
|
curl "$url" > $tmp || exit 1
|
||||||
|
|
||||||
|
if [ -s $tmp ] ; then
|
||||||
|
cmp $tmp $csv || cp $tmp $csv
|
||||||
|
fi
|
||||||
|
|
||||||
|
$exe fetch || exit 1
|
||||||
|
|
||||||
|
(
|
||||||
|
cat 01-preamble.md
|
||||||
|
echo ""
|
||||||
|
$exe print || exit 1
|
||||||
|
echo ""
|
||||||
|
cat 02-footnotes.md
|
||||||
|
echo ""
|
||||||
|
) > README.md
|
||||||
|
|
||||||
|
exit 0
|
Loading…
Reference in New Issue
Block a user