diff --git a/brozzler/browser.py b/brozzler/browser.py index 30a780d..a7cd6d0 100644 --- a/brozzler/browser.py +++ b/brozzler/browser.py @@ -77,7 +77,7 @@ class BrowserPool: try: browser = self._available.pop() except KeyError: - raise NoBrowsersAvailable() + raise NoBrowsersAvailable self._in_use.add(browser) return browser @@ -87,8 +87,12 @@ class BrowserPool: self._in_use.remove(browser) def shutdown_now(self): - self.logger.info('shutting down browser pool') + self.logger.info( + 'shutting down browser pool (%s browsers in use)', + len(self._in_use)) with self._lock: + for browser in self._available: + browser.stop() for browser in self._in_use: browser.stop() @@ -114,6 +118,7 @@ class Browser: self.chrome = Chrome(**kwargs) self.websocket_url = None self.is_browsing = False + self._browser_controller = None def __enter__(self): self.start() @@ -139,10 +144,10 @@ class Browser: Stops chrome if it's running. ''' try: - if self.is_running(): + if self._browser_controller: self._browser_controller.stop() - self.websocket_url = None - self.chrome.stop() + self.websocket_url = None + self.chrome.stop() except: self.logger.error('problem stopping', exc_info=True) @@ -215,19 +220,9 @@ class Browser: ## outlinks += retrieve_outlinks (60 sec) final_page_url = self._browser_controller.url() return final_page_url, outlinks - except brozzler.ShutdownRequested: - self.logger.info('shutdown requested') - raise except websocket.WebSocketConnectionClosedException as e: - # import pdb; pdb.set_trace() + self.logger.error('websocket closed, did chrome die?') raise BrowsingException(e) - # if not self.is_running(): - # logging.info('appears shutdown was requested') - # return None, None - # else: - # raise BrowsingException( - # "websocket closed, did chrome die? %s" % ( - # self.websocket_url)) finally: self.is_browsing = False @@ -287,8 +282,12 @@ class BrowserController: ''' Raises BrowsingException in the thread that called start() ''' - self.logger.error( - 'exception from websocket receiver thread', exc_info=1) + if isinstance(e, websocket.WebSocketConnectionClosedException): + self.logger.error('websocket closed, did chrome die?') + else: + self.logger.error( + 'exception from websocket receiver thread', + exc_info=1) brozzler.thread_raise(calling_thread, BrowsingException) # open websocket, start thread that receives messages diff --git a/brozzler/chrome.py b/brozzler/chrome.py index b5e5f08..935a9ca 100644 --- a/brozzler/chrome.py +++ b/brozzler/chrome.py @@ -50,6 +50,7 @@ class Chrome: self.chrome_exe = chrome_exe self.ignore_cert_errors = ignore_cert_errors self._shutdown = threading.Event() + self.chrome_process = None def __enter__(self): ''' @@ -188,6 +189,8 @@ class Chrome: 'got chrome window websocket debug url %s from %s', url, json_url) return url + except brozzler.ShutdownRequested: + raise except BaseException as e: if int(time.time() - self._start) % 10 == 5: self.logger.warn( @@ -253,18 +256,18 @@ class Chrome: def stop(self): if not self.chrome_process or self._shutdown.is_set(): return + self._shutdown.set() timeout_sec = 300 - self._shutdown.set() - self.logger.info('terminating chrome pgid %s' % self.chrome_process.pid) + if self.chrome_process.poll() is None: + self.logger.info( + 'terminating chrome pgid %s', self.chrome_process.pid) - os.killpg(self.chrome_process.pid, signal.SIGTERM) - first_sigterm = time.time() + os.killpg(self.chrome_process.pid, signal.SIGTERM) + t0 = time.time() try: - while time.time() - first_sigterm < timeout_sec: - time.sleep(0.5) - + while time.time() - t0 < timeout_sec: status = self.chrome_process.poll() if status is not None: if status == 0: @@ -281,11 +284,12 @@ class Chrome: # around, but there's a chance I suppose that some other # process could have started with the same pgid return + time.sleep(0.5) self.logger.warn( 'chrome pid %s still alive %.1f seconds after sending ' 'SIGTERM, sending SIGKILL', self.chrome_process.pid, - time.time() - first_sigterm) + time.time() - t0) os.killpg(self.chrome_process.pid, signal.SIGKILL) status = self.chrome_process.wait() self.logger.warn( @@ -301,3 +305,4 @@ class Chrome: finally: self._out_reader_thread.join() self.chrome_process = None + diff --git a/brozzler/worker.py b/brozzler/worker.py index 0d366dc..49626ad 100644 --- a/brozzler/worker.py +++ b/brozzler/worker.py @@ -322,9 +322,9 @@ class BrozzlerWorker: return False def _brozzle_site(self, browser, site): - start = time.time() page = None try: + start = time.time() while time.time() - start < 7 * 60: self._frontier.honor_stop_request(site.job_id) page = self._frontier.claim_page(site, "%s:%s" % (