diff --git a/brozzler/browser.py b/brozzler/browser.py index ed5744f..f1d355d 100644 --- a/brozzler/browser.py +++ b/brozzler/browser.py @@ -233,8 +233,7 @@ class WebsockReceiverThread(threading.Thread): brozzler.thread_raise(self.calling_thread, BrowsingException) elif message['method'] == 'Console.messageAdded': self.logger.debug( - '%s console.%s %s', self.websock.url, - message['params']['message']['level'], + 'console.%s %s', message['params']['message']['level'], message['params']['message']['text']) elif message['method'] == 'Page.javascriptDialogOpening': self._javascript_dialog_opening(message) @@ -310,10 +309,8 @@ class Browser: if not self.is_running(): self.websock_url = self.chrome.start(**kwargs) self.websock = websocket.WebSocketApp(self.websock_url) - thread_name = 'WebsockThread:{}-{:%Y%m%d%H%M%S}'.format( - surt.handyurl.parse(self.websock_url).port, - datetime.datetime.utcnow()) - self.websock_thread = WebsockReceiverThread(self.websock) + self.websock_thread = WebsockReceiverThread( + self.websock, name='WebsockThread:%s' % self.chrome.port) self.websock_thread.start() self._wait_for(lambda: self.websock_thread.is_open, timeout=10) diff --git a/brozzler/chrome.py b/brozzler/chrome.py index e5ef311..8924950 100644 --- a/brozzler/chrome.py +++ b/brozzler/chrome.py @@ -165,8 +165,7 @@ class Chrome: stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=0) self._out_reader_thread = threading.Thread( target=self._read_stderr_stdout, - name='ChromeOutReaderThread(pid=%s)' % self.chrome_process.pid, - daemon=True) + name='ChromeOutReaderThread:%s' % self.port, daemon=True) self._out_reader_thread.start() self.logger.info('chrome running, pid %s' % self.chrome_process.pid) diff --git a/brozzler/frontier.py b/brozzler/frontier.py index 0ae91eb..fbf9714 100644 --- a/brozzler/frontier.py +++ b/brozzler/frontier.py @@ -125,7 +125,7 @@ class RethinkDbFrontier: self._vet_result(result, replaced=[0,1], unchanged=[0,1]) def new_page(self, page): - self.logger.debug("inserting into 'pages' table %s", page) + self.logger.trace("inserting into 'pages' table %s", page) result = self.r.table("pages").insert(page.to_dict()).run() self._vet_result(result, inserted=1) diff --git a/brozzler/worker.py b/brozzler/worker.py index 7eb7d22..565ca94 100644 --- a/brozzler/worker.py +++ b/brozzler/worker.py @@ -418,7 +418,7 @@ class BrozzlerWorker: repr(self._proxy(site)), site) th = threading.Thread( target=self._brozzle_site, args=(browser, site), - name="BrozzlingThread:%s" % site.seed, + name="BrozzlingThread:%s" % browser.chrome.port, daemon=True) with self._browsing_threads_lock: self._browsing_threads.add(th) @@ -432,9 +432,7 @@ class BrozzlerWorker: "all %s browsers are busy", self._max_browsers) latest_state = "browsers-busy" except brozzler.NothingToClaim: - if latest_state != "no-unclaimed-sites": - self.logger.info("no unclaimed sites to browse") - latest_state = "no-unclaimed-sites" + pass time.sleep(0.5) except brozzler.ShutdownRequested: self.logger.info("shutdown requested") diff --git a/setup.py b/setup.py index 5198956..c7b47d1 100644 --- a/setup.py +++ b/setup.py @@ -32,7 +32,7 @@ def find_package_data(package): setuptools.setup( name='brozzler', - version='1.1b9.dev185', + version='1.1b9.dev186', description='Distributed web crawling with browsers', url='https://github.com/internetarchive/brozzler', author='Noah Levitt',