update peer crawling to be recursive

This commit is contained in:
lza_menace 2023-04-15 12:25:48 -07:00
parent 574761ec8e
commit a85198c9ed
4 changed files with 42 additions and 34 deletions

View file

@ -1,3 +1,3 @@
*/3 * * * * sh -c "cd ~/git/monero.fail; ./bin/cmd validate" */3 * * * * sh -c "cd ~/git/monero.fail; ./bin/cmd validate"
30 */4 * * * sh -c "cd ~/git/monero.fail; ./bin/cmd check" 30 */4 * * * sh -c "cd ~/git/monero.fail; ./bin/cmd check"
0 */2 * * * sh -c "cd ~/git/monero.fail; ./bin/cmd get_peers" 0 */16 * * * sh -c "cd ~/git/monero.fail; ./bin/cmd get_peers"

View file

@ -1,5 +1,6 @@
import logging import logging
from datetime import datetime, timedelta from datetime import datetime, timedelta
from time import sleep
import geoip2.database import geoip2.database
import arrow import arrow
@ -76,41 +77,27 @@ def check():
@bp.cli.command("get_peers") @bp.cli.command("get_peers")
def get_peers(): def get_peers():
"""
This command requests peers from the configured upstream node and fans out
to recursively scrape all other peers on the network. This will take
several hours to run.
"""
# keep track of all peers
all_peers = [] all_peers = []
print("[+] Preparing to crawl Monero p2p network") print("[+] Preparing to crawl Monero p2p network")
print(f"[.] Retrieving initial peers from {config.NODE_HOST}:{config.NODE_PORT}") print(f"[.] Retrieving initial peers from {config.NODE_HOST}:{config.NODE_PORT}")
initial_peers = retrieve_peers(config.NODE_HOST, config.NODE_PORT)
with geoip2.database.Reader("./data/GeoLite2-City.mmdb") as reader:
for peer in initial_peers:
if peer not in all_peers:
all_peers.append(peer)
_url = urlparse(peer)
url = f"{_url.scheme}://{_url.netloc}".lower()
if not Peer.select().where(Peer.url == peer).exists():
response = reader.city(_url.hostname)
p = Peer(
url=peer,
country=response.country.name,
city=response.city.name,
postal=response.postal.code,
lat=response.location.latitude,
lon=response.location.longitude,
)
p.save()
print(f"{peer} - saving new peer")
else:
p = Peer.select().where(Peer.url == peer).first()
p.datetime = datetime.now()
p.save()
try: # start initial list of peers to scan
print(f"[.] Retrieving crawled peers from {_url.netloc}") peers_to_scan = retrieve_peers(config.NODE_HOST, config.NODE_PORT)
new_peers = retrieve_peers(_url.hostname, _url.port) print(f"[+] Found {len(peers_to_scan)} initial peers to begin scraping.")
for peer in new_peers: sleep(3)
if peer not in all_peers:
all_peers.append(peer) # helper function to add a new peer to the db or update an existing one
def save_peer(peer):
with geoip2.database.Reader("./data/GeoLite2-City.mmdb") as reader:
_url = urlparse(peer) _url = urlparse(peer)
url = f"{_url.scheme}://{_url.netloc}".lower() url = f"{_url.scheme}://{_url.netloc}".lower()
# add new peer if not in db
if not Peer.select().where(Peer.url == peer).exists(): if not Peer.select().where(Peer.url == peer).exists():
response = reader.city(_url.hostname) response = reader.city(_url.hostname)
p = Peer( p = Peer(
@ -123,19 +110,38 @@ def get_peers():
) )
p.save() p.save()
print(f"{peer} - saving new peer") print(f"{peer} - saving new peer")
# or update if it does
else: else:
p = Peer.select().where(Peer.url == peer).first() p = Peer.select().where(Peer.url == peer).first()
p.datetime = datetime.now() p.datetime = datetime.now()
p.save() p.save()
return _url
# iterate over the whole list until all peers have been scanned
# add new peers to the list
# skip the peer if we've seen it already
try:
while peers_to_scan:
_peer = peers_to_scan[0]
peers_to_scan.pop(0)
if _peer in all_peers:
print(f'already found {_peer}')
continue
all_peers.append(_peer)
try:
peer = save_peer(_peer)
peers_to_scan += retrieve_peers(peer.hostname, peer.port)
except: except:
pass pass
except KeyboardInterrupt:
print('Stopped.')
print( print(
f"[+] Found {len(all_peers)} peers from {config.NODE_HOST}:{config.NODE_PORT}" f"[+] Found {len(all_peers)} peers from {config.NODE_HOST}:{config.NODE_PORT}"
) )
print("[+] Deleting old Monero p2p peers") print("[+] Deleting old Monero p2p peers")
for p in Peer.select(): for p in Peer.select():
if p.hours_elapsed() > 24: if p.hours_elapsed() > config.PEER_LIFETIME:
print(f"[.] Deleting {p.url}") print(f"[.] Deleting {p.url}")
p.delete_instance() p.delete_instance()
rw_cache("map_peers", list(Peer.select().execute())) rw_cache("map_peers", list(Peer.select().execute()))

View file

@ -13,3 +13,4 @@ TOR_PORT = environ.get("TOR_PORT", 9050)
NODE_HOST = environ.get("NODE_HOST", "singapore.node.xmr.pm") NODE_HOST = environ.get("NODE_HOST", "singapore.node.xmr.pm")
NODE_PORT = environ.get("NODE_PORT", 18080) NODE_PORT = environ.get("NODE_PORT", 18080)
HEALTHY_BLOCK_DIFF = int(environ.get("HEALTHY_BLOCK_DIFF", 500)) HEALTHY_BLOCK_DIFF = int(environ.get("HEALTHY_BLOCK_DIFF", 500))
PEER_LIFETIME = int(environ.get("PEER_LIFETIME", 96))

View file

@ -60,9 +60,10 @@
<p>Source Node: {{ source_node }}</p> <p>Source Node: {{ source_node }}</p>
<p> <p>
This is not a full representation of the entire Monero network, This is not a full representation of the entire Monero network,
just a look into the peers being crawled from the source node ({{ source_node }}). just a look into the peers being recursively crawled from the source node ({{ source_node }}).
New peers are searched for on a recurring interval throughout the day. New peers are searched for once per day.
Older peers are shown as more transparent and will be removed if not seen again after 24 hours. Older peers are shown as more transparent and will be removed
if not seen again after {{ config.PEER_LIFETIME }} hours.
</p> </p>
<br> <br>
<a href="/">Go home</a> <a href="/">Go home</a>