mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-24 08:39:59 -05:00
Merge pull request #186 from galgeek/simpler_choose_warcprox
Simpler choose warcprox
This commit is contained in:
commit
1d9a95dfc2
@ -28,6 +28,7 @@ import json
|
|||||||
import PIL.Image
|
import PIL.Image
|
||||||
import io
|
import io
|
||||||
import socket
|
import socket
|
||||||
|
import random
|
||||||
import requests
|
import requests
|
||||||
import doublethink
|
import doublethink
|
||||||
import tempfile
|
import tempfile
|
||||||
@ -80,19 +81,12 @@ class BrozzlerWorker:
|
|||||||
warcproxes = self._service_registry.available_services('warcprox')
|
warcproxes = self._service_registry.available_services('warcprox')
|
||||||
if not warcproxes:
|
if not warcproxes:
|
||||||
return None
|
return None
|
||||||
reql = self._frontier.rr.table('sites').between(
|
warcproxes.sort(key=lambda warcprox: (warcprox['load']))
|
||||||
['ACTIVE', r.minval], ['ACTIVE', r.maxval],
|
num_choices = 5
|
||||||
index='sites_last_disclaimed')
|
if len(warcproxes) < num_choices:
|
||||||
active_sites = list(reql.run())
|
num_choices = len(warcproxes)
|
||||||
for warcprox in warcproxes:
|
|
||||||
address = '%s:%s' % (warcprox['host'], warcprox['port'])
|
|
||||||
warcprox['assigned_sites'] = len([
|
|
||||||
site for site in active_sites
|
|
||||||
if 'proxy' in site and site['proxy'] == address])
|
|
||||||
warcproxes.sort(key=lambda warcprox: (
|
|
||||||
warcprox['assigned_sites'], warcprox['load']))
|
|
||||||
# XXX make this heuristic more advanced?
|
# XXX make this heuristic more advanced?
|
||||||
return warcproxes[0]
|
return random.choice(warcproxes[0:num_choices])
|
||||||
|
|
||||||
def _proxy_for(self, site):
|
def _proxy_for(self, site):
|
||||||
if self._proxy:
|
if self._proxy:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user