Merge branch 'master' into qa

* master:
  bump version after merge
  change time limit enforcement
This commit is contained in:
Noah Levitt 2018-11-29 14:52:32 -08:00
commit b447063099
4 changed files with 12 additions and 26 deletions

View File

@ -152,17 +152,15 @@ class RethinkDbFrontier:
else:
raise brozzler.NothingToClaim
def enforce_time_limit(self, site, session_time=0):
def enforce_time_limit(self, site):
'''
Raises `brozzler.ReachedTimeLimit` if appropriate.
'''
if (site.time_limit
and site.time_limit > 0
and (site.active_brozzling_time or 0) + session_time > site.time_limit):
if (site.time_limit and site.time_limit > 0
and site.elapsed() > site.time_limit):
self.logger.debug(
"site FINISHED_TIME_LIMIT! time_limit=%s "
"active_brozzling_time=%s %s", site.time_limit,
site.active_brozzling_time, site)
"elapsed=%s %s", site.time_limit, site.elapsed(), site)
raise brozzler.ReachedTimeLimit
def claim_page(self, site, worker_id):

View File

@ -360,7 +360,7 @@ class BrozzlerWorker:
self._proxy_for(site), site)
while time.time() - start < self.SITE_SESSION_MINUTES * 60:
site.refresh()
self._frontier.enforce_time_limit(site, time.time() - start)
self._frontier.enforce_time_limit(site)
self._frontier.honor_stop_request(site)
page = self._frontier.claim_page(site, "%s:%s" % (
socket.gethostname(), browser.chrome.port))

View File

@ -32,7 +32,7 @@ def find_package_data(package):
setuptools.setup(
name='brozzler',
version='1.5.dev314',
version='1.5.dev315',
description='Distributed web crawling with browsers',
url='https://github.com/internetarchive/brozzler',
author='Noah Levitt',

View File

@ -21,6 +21,7 @@ limitations under the License.
import argparse
import datetime
import logging
import time
import doublethink
import pytest
@ -375,15 +376,10 @@ def test_time_limit():
assert site.starts_and_stops[1]['start']
assert site.starts_and_stops[1]['stop'] is None
# time limit not reached yet
# no time limit set
frontier.enforce_time_limit(site)
assert site.status == 'ACTIVE'
assert len(site.starts_and_stops) == 2
assert site.starts_and_stops[1]['start']
assert site.starts_and_stops[1]['stop'] is None
site.time_limit = 0.1
site.time_limit = 10
site.claimed = True
site.save()
@ -394,19 +390,11 @@ def test_time_limit():
assert site.starts_and_stops[1]['start']
assert site.starts_and_stops[1]['stop'] is None
site.active_brozzling_time = 0.2 # this is why the time limit will be hit
site.time_limit = 0.1
time.sleep(0.1)
try:
with pytest.raises(brozzler.ReachedTimeLimit):
frontier.enforce_time_limit(site)
except brozzler.ReachedTimeLimit:
frontier.finished(site, 'FINISHED_TIME_LIMIT')
assert site.status == 'FINISHED_TIME_LIMIT'
assert not site.claimed
assert len(site.starts_and_stops) == 2
assert site.starts_and_stops[1]['start']
assert site.starts_and_stops[1]['stop']
assert site.starts_and_stops[1]['stop'] > site.starts_and_stops[0]['start']
def test_field_defaults():
rr = doublethink.Rethinker('localhost', db='ignoreme')