Merge pull request #186 from galgeek/simpler_choose_warcprox

Simpler choose warcprox
This commit is contained in:
jkafader 2020-03-11 14:16:57 -07:00 committed by GitHub
commit 1d9a95dfc2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -28,6 +28,7 @@ import json
import PIL.Image import PIL.Image
import io import io
import socket import socket
import random
import requests import requests
import doublethink import doublethink
import tempfile import tempfile
@ -80,19 +81,12 @@ class BrozzlerWorker:
warcproxes = self._service_registry.available_services('warcprox') warcproxes = self._service_registry.available_services('warcprox')
if not warcproxes: if not warcproxes:
return None return None
reql = self._frontier.rr.table('sites').between( warcproxes.sort(key=lambda warcprox: (warcprox['load']))
['ACTIVE', r.minval], ['ACTIVE', r.maxval], num_choices = 5
index='sites_last_disclaimed') if len(warcproxes) < num_choices:
active_sites = list(reql.run()) num_choices = len(warcproxes)
for warcprox in warcproxes:
address = '%s:%s' % (warcprox['host'], warcprox['port'])
warcprox['assigned_sites'] = len([
site for site in active_sites
if 'proxy' in site and site['proxy'] == address])
warcproxes.sort(key=lambda warcprox: (
warcprox['assigned_sites'], warcprox['load']))
# XXX make this heuristic more advanced? # XXX make this heuristic more advanced?
return warcproxes[0] return random.choice(warcproxes[0:num_choices])
def _proxy_for(self, site): def _proxy_for(self, site):
if self._proxy: if self._proxy: