mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-24 00:29:53 -05:00
logging tweaks
This commit is contained in:
parent
09fa41f959
commit
e58f4b7c44
@ -233,8 +233,7 @@ class WebsockReceiverThread(threading.Thread):
|
|||||||
brozzler.thread_raise(self.calling_thread, BrowsingException)
|
brozzler.thread_raise(self.calling_thread, BrowsingException)
|
||||||
elif message['method'] == 'Console.messageAdded':
|
elif message['method'] == 'Console.messageAdded':
|
||||||
self.logger.debug(
|
self.logger.debug(
|
||||||
'%s console.%s %s', self.websock.url,
|
'console.%s %s', message['params']['message']['level'],
|
||||||
message['params']['message']['level'],
|
|
||||||
message['params']['message']['text'])
|
message['params']['message']['text'])
|
||||||
elif message['method'] == 'Page.javascriptDialogOpening':
|
elif message['method'] == 'Page.javascriptDialogOpening':
|
||||||
self._javascript_dialog_opening(message)
|
self._javascript_dialog_opening(message)
|
||||||
@ -310,10 +309,8 @@ class Browser:
|
|||||||
if not self.is_running():
|
if not self.is_running():
|
||||||
self.websock_url = self.chrome.start(**kwargs)
|
self.websock_url = self.chrome.start(**kwargs)
|
||||||
self.websock = websocket.WebSocketApp(self.websock_url)
|
self.websock = websocket.WebSocketApp(self.websock_url)
|
||||||
thread_name = 'WebsockThread:{}-{:%Y%m%d%H%M%S}'.format(
|
self.websock_thread = WebsockReceiverThread(
|
||||||
surt.handyurl.parse(self.websock_url).port,
|
self.websock, name='WebsockThread:%s' % self.chrome.port)
|
||||||
datetime.datetime.utcnow())
|
|
||||||
self.websock_thread = WebsockReceiverThread(self.websock)
|
|
||||||
self.websock_thread.start()
|
self.websock_thread.start()
|
||||||
|
|
||||||
self._wait_for(lambda: self.websock_thread.is_open, timeout=10)
|
self._wait_for(lambda: self.websock_thread.is_open, timeout=10)
|
||||||
|
@ -165,8 +165,7 @@ class Chrome:
|
|||||||
stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=0)
|
stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=0)
|
||||||
self._out_reader_thread = threading.Thread(
|
self._out_reader_thread = threading.Thread(
|
||||||
target=self._read_stderr_stdout,
|
target=self._read_stderr_stdout,
|
||||||
name='ChromeOutReaderThread(pid=%s)' % self.chrome_process.pid,
|
name='ChromeOutReaderThread:%s' % self.port, daemon=True)
|
||||||
daemon=True)
|
|
||||||
self._out_reader_thread.start()
|
self._out_reader_thread.start()
|
||||||
self.logger.info('chrome running, pid %s' % self.chrome_process.pid)
|
self.logger.info('chrome running, pid %s' % self.chrome_process.pid)
|
||||||
|
|
||||||
|
@ -125,7 +125,7 @@ class RethinkDbFrontier:
|
|||||||
self._vet_result(result, replaced=[0,1], unchanged=[0,1])
|
self._vet_result(result, replaced=[0,1], unchanged=[0,1])
|
||||||
|
|
||||||
def new_page(self, page):
|
def new_page(self, page):
|
||||||
self.logger.debug("inserting into 'pages' table %s", page)
|
self.logger.trace("inserting into 'pages' table %s", page)
|
||||||
result = self.r.table("pages").insert(page.to_dict()).run()
|
result = self.r.table("pages").insert(page.to_dict()).run()
|
||||||
self._vet_result(result, inserted=1)
|
self._vet_result(result, inserted=1)
|
||||||
|
|
||||||
|
@ -418,7 +418,7 @@ class BrozzlerWorker:
|
|||||||
repr(self._proxy(site)), site)
|
repr(self._proxy(site)), site)
|
||||||
th = threading.Thread(
|
th = threading.Thread(
|
||||||
target=self._brozzle_site, args=(browser, site),
|
target=self._brozzle_site, args=(browser, site),
|
||||||
name="BrozzlingThread:%s" % site.seed,
|
name="BrozzlingThread:%s" % browser.chrome.port,
|
||||||
daemon=True)
|
daemon=True)
|
||||||
with self._browsing_threads_lock:
|
with self._browsing_threads_lock:
|
||||||
self._browsing_threads.add(th)
|
self._browsing_threads.add(th)
|
||||||
@ -432,9 +432,7 @@ class BrozzlerWorker:
|
|||||||
"all %s browsers are busy", self._max_browsers)
|
"all %s browsers are busy", self._max_browsers)
|
||||||
latest_state = "browsers-busy"
|
latest_state = "browsers-busy"
|
||||||
except brozzler.NothingToClaim:
|
except brozzler.NothingToClaim:
|
||||||
if latest_state != "no-unclaimed-sites":
|
pass
|
||||||
self.logger.info("no unclaimed sites to browse")
|
|
||||||
latest_state = "no-unclaimed-sites"
|
|
||||||
time.sleep(0.5)
|
time.sleep(0.5)
|
||||||
except brozzler.ShutdownRequested:
|
except brozzler.ShutdownRequested:
|
||||||
self.logger.info("shutdown requested")
|
self.logger.info("shutdown requested")
|
||||||
|
2
setup.py
2
setup.py
@ -32,7 +32,7 @@ def find_package_data(package):
|
|||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name='brozzler',
|
name='brozzler',
|
||||||
version='1.1b9.dev185',
|
version='1.1b9.dev186',
|
||||||
description='Distributed web crawling with browsers',
|
description='Distributed web crawling with browsers',
|
||||||
url='https://github.com/internetarchive/brozzler',
|
url='https://github.com/internetarchive/brozzler',
|
||||||
author='Noah Levitt',
|
author='Noah Levitt',
|
||||||
|
Loading…
x
Reference in New Issue
Block a user