don't try to read the browser's cookie database if the browser hasn't been started (which can happen if the page is simply fetched rather than browsed because it's not html)

This commit is contained in:
Noah Levitt 2016-10-03 15:03:08 -07:00
parent 87a4d93a1c
commit 0e096dd4e4
3 changed files with 4 additions and 3 deletions

View File

@ -185,7 +185,7 @@ class Browser:
cookie_location = os.path.join(
self._work_dir.name, "chrome-user-data", "Default", "Cookies")
self.logger.debug(
"marking cookies persistent then reading file into memory: %s ",
"marking cookies persistent then reading file into memory: %s",
cookie_location)
try:
with sqlite3.connect(cookie_location) as conn:

View File

@ -312,7 +312,8 @@ class BrozzlerWorker:
page = self._frontier.claim_page(site, "%s:%s" % (
socket.gethostname(), browser.chrome_port))
outlinks = self.brozzle_page(browser, site, page)
site.cookie_db=browser.persist_and_read_cookie_db()
if browser.is_running():
site.cookie_db = browser.persist_and_read_cookie_db()
self._frontier.completed_page(site, page)
self._frontier.scope_and_schedule_outlinks(
site, page, outlinks)

View File

@ -32,7 +32,7 @@ def find_package_data(package):
setuptools.setup(
name='brozzler',
version='1.1b6.dev87',
version='1.1b6.dev88',
description='Distributed web crawling with browsers',
url='https://github.com/internetarchive/brozzler',
author='Noah Levitt',