add docstring to _chain_chrome_messages, remove debug logging, tweak name of websock thread

This commit is contained in:
Noah Levitt 2016-07-28 20:29:11 -05:00
parent 2046ee36e0
commit cfc18e6845
2 changed files with 21 additions and 7 deletions

View File

@ -36,8 +36,8 @@ import re
import base64
import psutil
import signal
import string
import sqlite3
import datetime
__all__ = ["BrowserPool", "Browser"]
@ -261,8 +261,8 @@ class Browser:
self._websocket_url, on_open=self._visit_page,
on_message=self._wrap_handle_message)
threadName = "WebsockThread:%s-%s" % (self.chrome_port, ''.join(
random.choice(string.ascii_letters) for _ in range(6)))
threadName = "WebsockThread:{}-{:%Y%m%d%H%M%S}".format(
self.chrome_port, datetime.datetime.utcnow())
websock_thread = threading.Thread(
target=self._websock.run_forever, name=threadName,
kwargs={'ping_timeout':0.5})
@ -315,12 +315,26 @@ compileOutlinks(window).join(' ');
"""
def _chain_chrome_messages(self, chain):
"""
Sends a series of messages to chrome/chromium on the debugging protocol
websocket. Waits for a reply from each one before sending the next.
Enforces a timeout waiting for each reply. If the timeout is hit, sets
self._result_message_timeout with a ResultMessageTimeout (an exception
class). Takes an array of dicts, each of which should look like this:
{
"info": "human readable description",
"chrome_msg": { ... }, # message to send to chrome, as a dict
"timeout": 30, # timeout in seconds
"callback": my_callback, # takes one arg, the result message
}
The code is rather convoluted because of the asynchronous nature of the
whole thing. See how it's used in _start_postbehavior_chain.
"""
timer = None
def callback(message):
self.logger.info(
"timer=%s chain[0]['callback']=%s len(chain[1:])=%s",
timer, chain[0]["callback"], len(chain[1:]))
if timer:
timer.cancel()
if message["id"] in self._waiting_on_result_messages:

View File

@ -21,7 +21,7 @@ import setuptools
setuptools.setup(
name='brozzler',
version='1.1b4.dev62',
version='1.1b4.dev63',
description='Distributed web crawling with browsers',
url='https://github.com/internetarchive/brozzler',
author='Noah Levitt',