mirror of
https://github.com/alecmuffett/real-world-onion-sites.git
synced 2024-12-29 00:56:12 -05:00
58 lines
1.7 KiB
Python
Executable File
58 lines
1.7 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
import re
|
|
import csv
|
|
import pprint
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
|
|
url = 'https://crt.sh/?dNSName=%25.onion&exclude=expired&match=ILIKE'
|
|
session = requests.Session()
|
|
response = session.get(url)
|
|
results = []
|
|
onion_re = re.compile(r'[2-7a-z]{16}([2-7a-z]{40})?\.onion(\s|$)')
|
|
|
|
status = response.status_code
|
|
if status != 200: raise RuntimeError('http status: {}'.format(status))
|
|
|
|
html_doc = response.text
|
|
soup = BeautifulSoup(html_doc, 'html.parser')
|
|
|
|
table = soup.find_all('table')[2]
|
|
table_rows = table.find_all('tr')
|
|
|
|
for tr in table_rows: # skip header
|
|
for br in tr.find_all("br"): br.replace_with(" ")
|
|
td = tr.find_all('td')
|
|
fields = [i.text for i in td]
|
|
if not fields: continue
|
|
result = dict()
|
|
result['id'] = fields[0]
|
|
result['at'] = fields[1]
|
|
result['nb'] = fields[2]
|
|
result['na'] = fields[3]
|
|
result['cn'] = fields[4]
|
|
result['san'] = fields[5]
|
|
result['in'] = fields[6]
|
|
results.append(result)
|
|
|
|
done = dict()
|
|
for r in results:
|
|
# pprint.pprint(r)
|
|
dates = 'date={0} not_before={1} not_after={2}'.format(r['at'], r['nb'], r['na'])
|
|
sans = r['cn'].split()
|
|
sans.extend(r['san'].split())
|
|
ca_data = [ x.strip() for x in r['in'].lower().split(',') ]
|
|
ca_data.append('cn=BAD OR MISSING CN FIELD IN CT LOG')
|
|
ca = [x for x in ca_data if x.startswith('cn=')][0][3:]
|
|
for san in sans:
|
|
if not onion_re.search(san): continue
|
|
if done.get(san, False): continue
|
|
done[san] = True
|
|
if re.match(r'\*', san):
|
|
print('* `{}`'.format(san))
|
|
else:
|
|
print('* [`{san}`](https://{san})'.format(san=san))
|
|
print(' * {0}'.format(dates))
|
|
print(' * **{0}**'.format(ca))
|