mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-04-21 16:16:28 -04:00
Merge branch 'choose-warcprox' into qa
* choose-warcprox: --warcprox-auto distribute assigned sites evenly
This commit is contained in:
commit
9157cf3a0a
0
brozzler/cli.py
Executable file → Normal file
0
brozzler/cli.py
Executable file → Normal file
@ -129,13 +129,30 @@ class BrozzlerWorker:
|
||||
self._start_stop_lock = threading.Lock()
|
||||
self._shutdown = threading.Event()
|
||||
|
||||
def _choose_warcprox(self):
|
||||
warcproxes = self._service_registry.available_services('warcprox')
|
||||
if not warcproxes:
|
||||
return None
|
||||
active_sites = self._frontier.rr.table('sites').between(
|
||||
['ACTIVE', r.minval], ['ACTIVE', r.maxval],
|
||||
index='sites_last_disclaimed').run()
|
||||
for warcprox in warcproxes:
|
||||
address = '%s:%s' % (warcprox['host'], warcprox['port'])
|
||||
warcprox['assigned_sites'] = len([
|
||||
site for site in active_sites
|
||||
if 'proxy' in site and site['proxy'] == address])
|
||||
warcproxes.sort(key=lambda warcprox: (
|
||||
warcprox['assigned_sites'], warcprox['load']))
|
||||
# XXX make this heuristic more advanced?
|
||||
return warcproxes[0]
|
||||
|
||||
def _proxy_for(self, site):
|
||||
if self._proxy:
|
||||
return self._proxy
|
||||
elif site.proxy:
|
||||
return site.proxy
|
||||
elif self._warcprox_auto:
|
||||
svc = self._service_registry.available_service('warcprox')
|
||||
svc = self._choose_warcprox()
|
||||
if svc is None:
|
||||
raise brozzler.ProxyError(
|
||||
'no available instances of warcprox in the service '
|
||||
|
Loading…
x
Reference in New Issue
Block a user