Merge pull request #83 from nlevitt/fifteen-minutes

lengthen site session brozzling time to 15 minutes
This commit is contained in:
jkafader 2018-01-29 15:59:16 -08:00 committed by GitHub
commit a125434563
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -103,6 +103,7 @@ class BrozzlerWorker:
logger = logging.getLogger(__module__ + "." + __qualname__)
HEARTBEAT_INTERVAL = 20.0
SITE_SESSION_MINUTES = 15
def __init__(
self, frontier, service_registry=None, max_browsers=1,
@ -191,7 +192,7 @@ class BrozzlerWorker:
# in case youtube-dl takes a long time, heartbeat site.last_claimed
# to prevent another brozzler-worker from claiming the site
try:
if site.rr and doublethink.utcnow() - site.last_claimed > datetime.timedelta(minutes=7):
if site.rr and doublethink.utcnow() - site.last_claimed > datetime.timedelta(minutes=self.SITE_SESSION_MINUTES):
self.logger.debug(
'heartbeating site.last_claimed to prevent another '
'brozzler-worker claiming this site id=%r', site.id)
@ -506,7 +507,7 @@ class BrozzlerWorker:
self.logger.info(
"brozzling site (proxy=%r) %r",
self._proxy_for(site), site)
while time.time() - start < 7 * 60:
while time.time() - start < self.SITE_SESSION_MINUTES * 60:
site.refresh()
self._frontier.honor_stop_request(site)
page = self._frontier.claim_page(site, "%s:%s" % (
@ -519,7 +520,8 @@ class BrozzlerWorker:
page.blocked_by_robots = True
self._frontier.completed_page(site, page)
else:
outlinks = self.brozzle_page(browser, site, page,
outlinks = self.brozzle_page(
browser, site, page,
enable_youtube_dl=not self._skip_youtube_dl)
self._frontier.completed_page(site, page)
self._frontier.scope_and_schedule_outlinks(