From 83503400586a6b145f063da1529016a882f0d1be Mon Sep 17 00:00:00 2001 From: Alec Muffett Date: Sat, 9 Apr 2022 22:09:26 +0000 Subject: [PATCH] auto-update on Sat Apr 9 22:09:26 UTC 2022 --- .02-footnotes.md.swp | Bin 12288 -> 0 bytes 01-preamble.md | 2 ++ 02-footnotes.md | 7 ++++--- README.md | 17 ++++++++++------- rwos-db.py | 31 ++++++++++++++++++++++--------- 5 files changed, 38 insertions(+), 19 deletions(-) delete mode 100644 .02-footnotes.md.swp diff --git a/.02-footnotes.md.swp b/.02-footnotes.md.swp deleted file mode 100644 index fa987b85d89e1b1d8b8ca0c626ca14c641e64c7d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12288 zcmeHN&u=706|Rl=9Rdh08~{CRlGwvckL@HP@d}Z|b|7L`+1QgrXbI!$?l;pFcX#zx zReRPF2^WrBxBzF)NDw%K#E-v$D;#n_NC5;~fD7NNo>>o3*p0Xa^+=y)rl;!Fd*A!s ztFm0_&Wq1pr^l0Bh1bWF`s809Oy8Wmp??13`;;y@tK|7TUcRkIsdnxj!_;fh;5b+} zs%&lWK6b3j(ZaeiAK{6uN8SYX}d`YVOUWWVMdj@(2?#@6PW~0X+c;S(~ z-5nXYx$#;0#K&K_`<(vQo`If$o`If$o`If$o`If$o`If$|0@PUb47g%5#6_p>S+0X z{D1B}a325v zU;X|6ua7GAXW&o3kAMQ$2LARDrG5r{ANU^dD)3EU3Va%P;{l~!2WG$~@Xrq`^$*}R z;8h?4Qs6VdU+%{mKn^?!d0z2f#6KKk%FPDfMmO5V!&;;1}Y-mrW?zlysKawhluI z?t&`zUe8TEr_(3nRiJh@;}A~C)-+j|5ZEQkcnSs6Oncn@38d?sDvJ+{u~{(ZObdM> z&iFR13}Kj4o`eZcbprc;B+8M$h@>ts;cR?~ljx=vR{2YP{ycf4hg& znb<}v+T5gPnDZ3Tp}Do8bxbhQ=wMP)n$Sf@GwY)JoLPT?2r9BOgeNzfGTrF3WJ;Y~ zcy>NfUztp9oQ&)_yU}wpWEi8>;Gt@Q(1a*qFheK-R$E_Q(881@Wk^9_(rPdW<<;B` z1~ekEwq}fcVcRk%+@YdWe6797*>O(VljlqV&|blW=9+t?b0(hJJ)y$*idikqEN zaarWWF&sh+Oqgf<*~MUxs2HE$7*g7bpU7a!g+4d7b)COU9|i*-kWcOM!C*+4uDj>T zR87ehmZXgodH@}v(Df+inQlv1bnQ|I(QV=S+~^oD33;ky$e^?}$7kz|CPas}O+yLg zk+;<)`<-KhCD{W|t+?cKq4ljqA`7BOuQ44%xb%|LrB{A^&B?)yp(?biW^F0STUwWo z78$LKXL?0~5nq=&pe;`32};Y+)15N5#hD~YMEbID%^BR6u(2#=X{0chRMb!gOfNX4 z6|tTcG3%8+6V)g+qD8@yI2qZnizSnZL3xlNCa1e1GnunZA)e)YZZLwjUgGq4a)8RF z!Y*Wdj_Z10E)-P44Us5UDd#!lW9p@-ptOKo4#~Q1b?8OHh=gGven&)7u7p%qrUvIw zSvOitJN69*ME|TkGfYrJUShR4N}>LxM`$D+re`e_CF0Q~$|E;u6JRFJmP8Io=-pQk zeJr<)tC6o&5kkhfh3bXt!JtBJ@Fhrjb;IGzEi{qf5uZAZ7EXs!ju5%1cOfYiBA4YA zXRTKuenYWr+^IQr*v1LM$eQcaI2&wc%R3QB0#FSL=9=Qdr{ZMSi6+-h=tN2wqEL!Z z{HKrYo#Jf+&q|uZF*sL%F%+ePe9$(f6Hu;$rkS%9s@+SNRX6(!my^-2%e;O#tn##h zB=T(o25v^WW%7ZnB9zIK+-9xtj}=fF47s_rek%?og}!||@jNa~D&NQ1>eGmwvuK4y zsLJiDShB*!P^k{#J1g6q_d5rl*xeBq*R^9RwryDcyz{91EcuaA2(8D^oYDIo@)H+X z4ctZkPM_G>>)sxbX_YR~)hYxK4T%1bPF~H3%-&S(r1^ zaBg&B-Fz(H#@CDwQ#+fAp=lTKG%f-bk6(K3WMq&KSEE}0lT0Ox#-`GuaGi9NWs~xt z0gQou)>%Pv4>wWQE%tyY=QzoT%;O%HbI_)1gfW)5HhQ`UL)5a&0t&lR>_H^pAv%)Z zPGW`_o6py5EBgStx5tLgU8A^(9RzoSW4mh`NS6Kox_^tHAl-2#rwKPcK6>i#`VlS^ zZzs|j_iJA4$4Fy2^8yV-OEVkZsg(;Bh1IA->&`kM(N={62#Q)u(PBrhx{fHU3iXJ? zVOLtysx;7jZ~byD$2dLImjx+ZO&rMW0!ILRUb2X8jeR)7K#i89v45k=qN!k;sWDh#@etc-T2rS%~Gm4!tCDNVD2v$JWX-R0y?8IiJ15p>7_X0Q{0^VNYZ zb0P7?_)E9G`1K?nG~m?)4t6P}5?T=z#{(3doLr-$7wLdqFf)^hbTBYx>)h}{sF8ej z2bKhC)N=GdVvfVNJ75FnGY{sGjob{}fRIDx86o&LH>53YhPh~IE z@TQIjWk}l8Ip2;4g@TR4^sKa=p)ee`;8z}H61Um7mE|^O0Ou=Qx)0a|#3VkHx}LYl zg{LtBe@f0*h#e;xY;m#Gf^TSMu506?f8<)!=-%hMAVtsUXCCv>;=njPv$|K`o!9>j DxH@ob diff --git a/01-preamble.md b/01-preamble.md index bb4f492..b4d034a 100644 --- a/01-preamble.md +++ b/01-preamble.md @@ -9,6 +9,8 @@ This is a list of substantial, commercial-or-social-good mainstream websites whi - licensed: cc-by-sa - author/editor: alec muffett +## Legend/Key for Symbols + You can find techical details and the legend/key for symbols in the [footnotes section](#footnotes), below. ## Regarding Updates and Suggestions diff --git a/02-footnotes.md b/02-footnotes.md index 509abbf..e08615e 100644 --- a/02-footnotes.md +++ b/02-footnotes.md @@ -14,8 +14,7 @@ - :eight_spoked_asterisk: site up, and redirected to another page - :no_entry_sign: site up, but could not access the page - :stop_sign: site up, but reported a system error -- :sos: site returned no data, or is down, or curl experienced a transient - network error (may be a problem with the RWOS server connection) +- :sos: site returned no data, or is down, or curl experienced a transient or permanent network error; may also reflect a problem with the RWOS server connection - :new: site is newly added, no data yet You can also see the [history of updates](https://github.com/alecmuffett/real-world-onion-sites/commits/master/README.md). @@ -26,7 +25,9 @@ Mouse-over the icons for details of HTTP codes, curl exit statuses, and the number of attempts made on each site. - codes [are from HTTP and are documented elsewhere](https://en.wikipedia.org/wiki/List_of_HTTP_status_codes); RWOS-internal ones include: - - `901`, `902`, `903` - malformed HTTP response + - `901` - malformed HTTP response + - `902` - malformed HTTP response + - `903` - malformed HTTP response, commonly including (e.g.) invalid HTTPS certificate - `904` - HTTP status code parse error - `910` - connection timeout - exits [are from Curl and are documented elsewhere](https://curl.haxx.se/libcurl/c/libcurl-errors.html); common ones include: diff --git a/README.md b/README.md index a77d819..84aa653 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,8 @@ This is a list of substantial, commercial-or-social-good mainstream websites whi - licensed: cc-by-sa - author/editor: alec muffett +## Legend/Key for Symbols + You can find techical details and the legend/key for symbols in the [footnotes section](#footnotes), below. ## Regarding Updates and Suggestions @@ -46,7 +48,7 @@ You can find techical details and the legend/key for symbols in the [footnotes s * link: [https://lxwu7pwyszfevhglxfgaukjqjdk2belosfvsl2ekzx3vrboacvewc7qd.onion/](https://lxwu7pwyszfevhglxfgaukjqjdk2belosfvsl2ekzx3vrboacvewc7qd.onion/) * plain: `https://lxwu7pwyszfevhglxfgaukjqjdk2belosfvsl2ekzx3vrboacvewc7qd.onion/` * proof: [link](https://ahf.me) -* check: :white_check_mark::white_check_mark::white_check_mark::white_check_mark::white_check_mark::white_check_mark::white_check_mark::white_check_mark::white_check_mark::sos::sos::sos::sos::sos: +* check: :white_check_mark::white_check_mark::white_check_mark::white_check_mark::white_check_mark::white_check_mark::white_check_mark::white_check_mark::white_check_mark::old_key::old_key::old_key::old_key::old_key: ### [Ctrl blog](http://v65ngaoj2nyaiq2ltf4uzota254gnasarrkuj4aqndi2bb5lw6frt3ad.onion/) * transport: :small_red_triangle: **HTTP** @@ -67,7 +69,7 @@ You can find techical details and the legend/key for symbols in the [footnotes s * link: [https://kushal76uaid62oup5774umh654scnu5dwzh4u2534qxhcbi4wbab3ad.onion/](https://kushal76uaid62oup5774umh654scnu5dwzh4u2534qxhcbi4wbab3ad.onion/) * plain: `https://kushal76uaid62oup5774umh654scnu5dwzh4u2534qxhcbi4wbab3ad.onion/` * proof: [link](https://kushaldas.in/) -* check: :white_check_mark::white_check_mark::white_check_mark::white_check_mark::white_check_mark::white_check_mark::white_check_mark::white_check_mark::white_check_mark::sos::sos::sos::sos::sos: +* check: :white_check_mark::white_check_mark::white_check_mark::white_check_mark::white_check_mark::white_check_mark::white_check_mark::white_check_mark::white_check_mark::old_key::old_key::old_key::old_key::old_key: ### [Michael Altfield](http://michaelahgu3sqef5yz3u242nok2uczduq5oxqfkwq646tvjhdnl35id.onion/) * transport: :small_red_triangle: **HTTP** @@ -88,7 +90,7 @@ You can find techical details and the legend/key for symbols in the [footnotes s * link: [https://nickf43ab43xxf3yqgzy5uedsjij6h473rmbyzq6inohcnr3lohlu3yd.onion/](https://nickf43ab43xxf3yqgzy5uedsjij6h473rmbyzq6inohcnr3lohlu3yd.onion/) * plain: `https://nickf43ab43xxf3yqgzy5uedsjij6h473rmbyzq6inohcnr3lohlu3yd.onion/` * proof: [link](https://frichetten.com) -* check: :white_check_mark::white_check_mark::white_check_mark::white_check_mark::white_check_mark::white_check_mark::white_check_mark::white_check_mark::white_check_mark::white_check_mark::sos::sos::sos::sos: +* check: :white_check_mark::white_check_mark::white_check_mark::white_check_mark::white_check_mark::white_check_mark::white_check_mark::white_check_mark::white_check_mark::white_check_mark::old_key::old_key::old_key::old_key: ### [Shen's Essays](https://shen.hongio267dx4o2ofkn4ddsztu4ok2vq4euc7sxumbi7kcfd64ije62ad.onion/) * transport: :closed_lock_with_key: **HTTPS** @@ -903,7 +905,7 @@ You can find techical details and the legend/key for symbols in the [footnotes s * link: [https://hzwjmjimhr7bdmfv2doll4upibt5ojjmpo3pbp5ctwcg37n3hyk7qzid.onion/](https://hzwjmjimhr7bdmfv2doll4upibt5ojjmpo3pbp5ctwcg37n3hyk7qzid.onion/) * plain: `https://hzwjmjimhr7bdmfv2doll4upibt5ojjmpo3pbp5ctwcg37n3hyk7qzid.onion/` * proof: :lock: see tls/ssl certificate -* check: :sos::sos::sos::sos::sos::sos::sos::sos::sos::sos::sos::sos::white_check_mark::white_check_mark: +* check: :old_key::old_key::old_key::old_key::old_key::old_key::old_key::old_key::old_key::old_key::old_key::old_key::white_check_mark::white_check_mark: ### [DEF CON Groups](http://jrw32khnmfehvdsvwdf34mywoqj5emvxh4mzbkls6jk2cb3thcgz6nid.onion/) * transport: :small_red_triangle: **HTTP** @@ -1411,8 +1413,7 @@ These sites have apparently stopped responding. - :eight_spoked_asterisk: site up, and redirected to another page - :no_entry_sign: site up, but could not access the page - :stop_sign: site up, but reported a system error -- :sos: site returned no data, or is down, or curl experienced a transient - network error (may be a problem with the RWOS server connection) +- :sos: site returned no data, or is down, or curl experienced a transient or permanent network error; may also reflect a problem with the RWOS server connection - :new: site is newly added, no data yet You can also see the [history of updates](https://github.com/alecmuffett/real-world-onion-sites/commits/master/README.md). @@ -1423,7 +1424,9 @@ Mouse-over the icons for details of HTTP codes, curl exit statuses, and the number of attempts made on each site. - codes [are from HTTP and are documented elsewhere](https://en.wikipedia.org/wiki/List_of_HTTP_status_codes); RWOS-internal ones include: - - `901`, `902`, `903` - malformed HTTP response + - `901` - malformed HTTP response + - `902` - malformed HTTP response + - `903` - malformed HTTP response, commonly including (e.g.) invalid HTTPS certificate - `904` - HTTP status code parse error - `910` - connection timeout - exits [are from Curl and are documented elsewhere](https://curl.haxx.se/libcurl/c/libcurl-errors.html); common ones include: diff --git a/rwos-db.py b/rwos-db.py index f51c24c..ebf64bd 100755 --- a/rwos-db.py +++ b/rwos-db.py @@ -39,6 +39,7 @@ EMOJI_4xx = ':no_entry_sign:' EMOJI_5xx = ':stop_sign:' EMOJI_DEAD = ':sos:' EMOJI_NO_DATA = ':new:' +EMOJI_BAD_CERT = ':old_key:' H1 = '#' H2 = '##' @@ -77,7 +78,7 @@ VALUES (:run, :url, :attempt, :out, :err, :http_code, :curl_exit) ''' SUMMARY_SQL = ''' -SELECT foo.ctime, foo.attempt, foo.http_code, foo.curl_exit +SELECT foo.ctime, foo.attempt, foo.http_code, foo.curl_exit, foo.err FROM fetches foo INNER JOIN ( SELECT url, run, MAX(attempt) AS pivot @@ -113,10 +114,23 @@ def extract_hcode(s): # static code = BADNESS + 4 return code +def placeholder(s): + if s == '': return PLACEHOLDER + if s == None: return PLACEHOLDER + return s + +def unicode_cleanup(x): + x = placeholder(x) # canonicalise blanks and None + if isinstance(x, str): # native python3 utf-8 string + result = x + else: # is byte array + result = x.decode('utf-8', 'ignore') + return result + class Database: def __init__(self, filename): self.connection = sqlite3.connect(filename) - self.connection.text_factory = lambda x: unicode(x, UTF8, 'ignore') # ignore bad unicode shit + self.connection.text_factory = lambda x: unicode_cleanup(x) self.cursor = self.connection.cursor() self.cursor.executescript(SCHEMA_SQL) self.now = time.strftime('%Y%m%d%H%M%S', time.gmtime()) @@ -187,11 +201,6 @@ class URL: if self.last_code < BADNESS: return time.sleep(RETRY_SLEEP) -def placeholder(s): - if s == '': return PLACEHOLDER - if s == None: return PLACEHOLDER - return s - def caps(s): return ' '.join([w.capitalize() for w in s.lower().split()]) @@ -229,7 +238,8 @@ def get_summary(url): if len(rows) == 0: return ( EMOJI_NO_DATA, ) result = [] - for when, attempt, hcode, ecode in rows: + for when, attempt, hcode, ecode, errstr in rows: + errstr = unicode_cleanup(errstr) # THIS SHOULD NOT BE NEEDED, WHY? PERHAPS BECAUSE MULTI-LINE? emoji = EMOJI_UNSET if hcode >= 200 and hcode < 300: emoji = EMOJI_2xx @@ -240,7 +250,10 @@ def get_summary(url): elif hcode >= 500 and hcode < 600: emoji = EMOJI_5xx elif hcode >= BADNESS: - emoji = EMOJI_DEAD + if 'SSL certificate' in errstr: + emoji = EMOJI_BAD_CERT + else: + emoji = EMOJI_DEAD t = datetime.fromtimestamp(when, timezone.utc) result.append('{0}'.format(emoji, attempt, hcode, ecode, t)) return result