lengthen site session brozzling time to 15 minutes

This should reduce contention over the "sites" table, which should help
keep more available browsers busy across the cluster.
This commit is contained in:
Noah Levitt 2018-01-29 15:34:54 -08:00
parent 4d37f88bcb
commit 64211475c0

View File

@ -103,6 +103,7 @@ class BrozzlerWorker:
logger = logging.getLogger(__module__ + "." + __qualname__)
HEARTBEAT_INTERVAL = 20.0
SITE_SESSION_MINUTES = 15
def __init__(
self, frontier, service_registry=None, max_browsers=1,
@ -191,7 +192,7 @@ class BrozzlerWorker:
# in case youtube-dl takes a long time, heartbeat site.last_claimed
# to prevent another brozzler-worker from claiming the site
try:
if site.rr and doublethink.utcnow() - site.last_claimed > datetime.timedelta(minutes=7):
if site.rr and doublethink.utcnow() - site.last_claimed > datetime.timedelta(minutes=self.SITE_SESSION_MINUTES):
self.logger.debug(
'heartbeating site.last_claimed to prevent another '
'brozzler-worker claiming this site id=%r', site.id)
@ -506,7 +507,7 @@ class BrozzlerWorker:
self.logger.info(
"brozzling site (proxy=%r) %r",
self._proxy_for(site), site)
while time.time() - start < 7 * 60:
while time.time() - start < self.SITE_SESSION_MINUTES * 60:
site.refresh()
self._frontier.honor_stop_request(site)
page = self._frontier.claim_page(site, "%s:%s" % (
@ -519,8 +520,9 @@ class BrozzlerWorker:
page.blocked_by_robots = True
self._frontier.completed_page(site, page)
else:
outlinks = self.brozzle_page(browser, site, page,
enable_youtube_dl=not self._skip_youtube_dl)
outlinks = self.brozzle_page(
browser, site, page,
enable_youtube_dl=not self._skip_youtube_dl)
self._frontier.completed_page(site, page)
self._frontier.scope_and_schedule_outlinks(
site, page, outlinks)