mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-04-20 23:56:34 -04:00
Read/Write Cookie DB file when creating and stopping browser instance.
This commit is contained in:
parent
127002b77d
commit
1cb6653fab
@ -128,11 +128,19 @@ class Browser:
|
||||
def __exit__(self, *args):
|
||||
self.stop()
|
||||
|
||||
def start(self, proxy=None):
|
||||
def start(self, proxy=None, cookieDb=None):
|
||||
if not self._chrome_instance:
|
||||
# these can raise exceptions
|
||||
self.chrome_port = self._find_available_port()
|
||||
self._work_dir = tempfile.TemporaryDirectory()
|
||||
if cookieDb is not None:
|
||||
cookieLocation = os.sep.join([self._work_dir.name, "chrome-user-data","Default","Cookies"])
|
||||
try:
|
||||
with open(cookieLocation,'w') as cookieFile:
|
||||
cookieFile.write(cookieDb)
|
||||
except EnvironmentError:
|
||||
self.logger.error("exception writing cookie file at: %s", cookieLocation, exc_info=True)
|
||||
|
||||
self._chrome_instance = Chrome(
|
||||
port=self.chrome_port, executable=self.chrome_exe,
|
||||
user_home_dir=self._work_dir.name,
|
||||
@ -161,6 +169,24 @@ class Browser:
|
||||
except:
|
||||
self.logger.error("problem stopping", exc_info=True)
|
||||
|
||||
def read_cookie_db(self):
|
||||
cookieLocation = os.sep.join([self._work_dir.name, "chrome-user-data","Default","Cookies"])
|
||||
|
||||
try:
|
||||
with sqlite3.connect(cookieLocation) as conn:
|
||||
cur = conn.cursor()
|
||||
cur.execute("UPDATE cookies SET persistent = 1")
|
||||
except sqlite3.Error:
|
||||
self.logger.error("exception updating cookie DB", exc_info=True)
|
||||
|
||||
cookieDb=None
|
||||
try:
|
||||
with open(cookieLocation, "rb") as cookieFile:
|
||||
cookieDb=bytearray(cookieFile.read())
|
||||
except EnvironmentError:
|
||||
self.logger.error("exception reading from cookie DB file at: %s", cookieLocation, exc_info=True)
|
||||
return cookieDb
|
||||
|
||||
def _find_available_port(self):
|
||||
port_available = False
|
||||
port = self.chrome_port
|
||||
|
@ -91,7 +91,7 @@ class Site(brozzler.BaseDictable):
|
||||
enable_warcprox_features=False, reached_limit=None,
|
||||
status="ACTIVE", claimed=False, start_time=None,
|
||||
last_disclaimed=_EPOCH_UTC, last_claimed_by=None,
|
||||
last_claimed=_EPOCH_UTC, metadata={}, remember_outlinks=None):
|
||||
last_claimed=_EPOCH_UTC, metadata={}, remember_outlinks=None, cookieDb=None):
|
||||
|
||||
self.seed = seed
|
||||
self.id = id
|
||||
@ -110,6 +110,7 @@ class Site(brozzler.BaseDictable):
|
||||
self.last_claimed = last_claimed
|
||||
self.metadata = metadata
|
||||
self.remember_outlinks = remember_outlinks
|
||||
self.cookieDb = bytearray(cookieDb)
|
||||
|
||||
self.scope = scope or {}
|
||||
if not "surt" in self.scope:
|
||||
|
@ -257,7 +257,7 @@ class BrozzlerWorker:
|
||||
if self._needs_browsing(page, ydl_spy):
|
||||
self.logger.info('needs browsing: %s', page)
|
||||
if not browser.is_running():
|
||||
browser.start(proxy=self._proxy(site))
|
||||
browser.start(proxy=self._proxy(site), cookieDb=site.cookieDb)
|
||||
outlinks = browser.browse_page(
|
||||
page.url, extra_headers=site.extra_headers(),
|
||||
on_screenshot=_on_screenshot,
|
||||
@ -312,6 +312,7 @@ class BrozzlerWorker:
|
||||
page = self._frontier.claim_page(site, "%s:%s" % (
|
||||
socket.gethostname(), browser.chrome_port))
|
||||
outlinks = self.brozzle_page(browser, site, page)
|
||||
site.cookieDb=browser.read_cookie_db()
|
||||
self._frontier.completed_page(site, page)
|
||||
self._frontier.scope_and_schedule_outlinks(
|
||||
site, page, outlinks)
|
||||
|
Loading…
x
Reference in New Issue
Block a user