mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-04-20 23:56:34 -04:00
Merge branch 'simpler_choose_warcprox' into qa
This commit is contained in:
commit
379be437a2
@ -28,6 +28,7 @@ import json
|
||||
import PIL.Image
|
||||
import io
|
||||
import socket
|
||||
import random
|
||||
import requests
|
||||
import doublethink
|
||||
import tempfile
|
||||
@ -80,19 +81,9 @@ class BrozzlerWorker:
|
||||
warcproxes = self._service_registry.available_services('warcprox')
|
||||
if not warcproxes:
|
||||
return None
|
||||
reql = self._frontier.rr.table('sites').between(
|
||||
['ACTIVE', r.minval], ['ACTIVE', r.maxval],
|
||||
index='sites_last_disclaimed')
|
||||
active_sites = list(reql.run())
|
||||
for warcprox in warcproxes:
|
||||
address = '%s:%s' % (warcprox['host'], warcprox['port'])
|
||||
warcprox['assigned_sites'] = len([
|
||||
site for site in active_sites
|
||||
if 'proxy' in site and site['proxy'] == address])
|
||||
warcproxes.sort(key=lambda warcprox: (
|
||||
warcprox['assigned_sites'], warcprox['load']))
|
||||
warcproxes.sort(key=lambda warcprox: (warcprox['load']))
|
||||
# XXX make this heuristic more advanced?
|
||||
return warcproxes[0]
|
||||
return random.choice(warcproxes[0:5])
|
||||
|
||||
def _proxy_for(self, site):
|
||||
if self._proxy:
|
||||
|
Loading…
x
Reference in New Issue
Block a user