mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-24 08:39:59 -05:00
Merge pull request #83 from nlevitt/fifteen-minutes
lengthen site session brozzling time to 15 minutes
This commit is contained in:
commit
a125434563
@ -103,6 +103,7 @@ class BrozzlerWorker:
|
|||||||
logger = logging.getLogger(__module__ + "." + __qualname__)
|
logger = logging.getLogger(__module__ + "." + __qualname__)
|
||||||
|
|
||||||
HEARTBEAT_INTERVAL = 20.0
|
HEARTBEAT_INTERVAL = 20.0
|
||||||
|
SITE_SESSION_MINUTES = 15
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self, frontier, service_registry=None, max_browsers=1,
|
self, frontier, service_registry=None, max_browsers=1,
|
||||||
@ -191,7 +192,7 @@ class BrozzlerWorker:
|
|||||||
# in case youtube-dl takes a long time, heartbeat site.last_claimed
|
# in case youtube-dl takes a long time, heartbeat site.last_claimed
|
||||||
# to prevent another brozzler-worker from claiming the site
|
# to prevent another brozzler-worker from claiming the site
|
||||||
try:
|
try:
|
||||||
if site.rr and doublethink.utcnow() - site.last_claimed > datetime.timedelta(minutes=7):
|
if site.rr and doublethink.utcnow() - site.last_claimed > datetime.timedelta(minutes=self.SITE_SESSION_MINUTES):
|
||||||
self.logger.debug(
|
self.logger.debug(
|
||||||
'heartbeating site.last_claimed to prevent another '
|
'heartbeating site.last_claimed to prevent another '
|
||||||
'brozzler-worker claiming this site id=%r', site.id)
|
'brozzler-worker claiming this site id=%r', site.id)
|
||||||
@ -506,7 +507,7 @@ class BrozzlerWorker:
|
|||||||
self.logger.info(
|
self.logger.info(
|
||||||
"brozzling site (proxy=%r) %r",
|
"brozzling site (proxy=%r) %r",
|
||||||
self._proxy_for(site), site)
|
self._proxy_for(site), site)
|
||||||
while time.time() - start < 7 * 60:
|
while time.time() - start < self.SITE_SESSION_MINUTES * 60:
|
||||||
site.refresh()
|
site.refresh()
|
||||||
self._frontier.honor_stop_request(site)
|
self._frontier.honor_stop_request(site)
|
||||||
page = self._frontier.claim_page(site, "%s:%s" % (
|
page = self._frontier.claim_page(site, "%s:%s" % (
|
||||||
@ -519,8 +520,9 @@ class BrozzlerWorker:
|
|||||||
page.blocked_by_robots = True
|
page.blocked_by_robots = True
|
||||||
self._frontier.completed_page(site, page)
|
self._frontier.completed_page(site, page)
|
||||||
else:
|
else:
|
||||||
outlinks = self.brozzle_page(browser, site, page,
|
outlinks = self.brozzle_page(
|
||||||
enable_youtube_dl=not self._skip_youtube_dl)
|
browser, site, page,
|
||||||
|
enable_youtube_dl=not self._skip_youtube_dl)
|
||||||
self._frontier.completed_page(site, page)
|
self._frontier.completed_page(site, page)
|
||||||
self._frontier.scope_and_schedule_outlinks(
|
self._frontier.scope_and_schedule_outlinks(
|
||||||
site, page, outlinks)
|
site, page, outlinks)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user