mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-08-08 06:22:23 -04:00
add docstring to _chain_chrome_messages, remove debug logging, tweak name of websock thread
This commit is contained in:
parent
2046ee36e0
commit
cfc18e6845
2 changed files with 21 additions and 7 deletions
|
@ -36,8 +36,8 @@ import re
|
||||||
import base64
|
import base64
|
||||||
import psutil
|
import psutil
|
||||||
import signal
|
import signal
|
||||||
import string
|
|
||||||
import sqlite3
|
import sqlite3
|
||||||
|
import datetime
|
||||||
|
|
||||||
__all__ = ["BrowserPool", "Browser"]
|
__all__ = ["BrowserPool", "Browser"]
|
||||||
|
|
||||||
|
@ -261,8 +261,8 @@ class Browser:
|
||||||
self._websocket_url, on_open=self._visit_page,
|
self._websocket_url, on_open=self._visit_page,
|
||||||
on_message=self._wrap_handle_message)
|
on_message=self._wrap_handle_message)
|
||||||
|
|
||||||
threadName = "WebsockThread:%s-%s" % (self.chrome_port, ''.join(
|
threadName = "WebsockThread:{}-{:%Y%m%d%H%M%S}".format(
|
||||||
random.choice(string.ascii_letters) for _ in range(6)))
|
self.chrome_port, datetime.datetime.utcnow())
|
||||||
websock_thread = threading.Thread(
|
websock_thread = threading.Thread(
|
||||||
target=self._websock.run_forever, name=threadName,
|
target=self._websock.run_forever, name=threadName,
|
||||||
kwargs={'ping_timeout':0.5})
|
kwargs={'ping_timeout':0.5})
|
||||||
|
@ -315,12 +315,26 @@ compileOutlinks(window).join(' ');
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def _chain_chrome_messages(self, chain):
|
def _chain_chrome_messages(self, chain):
|
||||||
|
"""
|
||||||
|
Sends a series of messages to chrome/chromium on the debugging protocol
|
||||||
|
websocket. Waits for a reply from each one before sending the next.
|
||||||
|
Enforces a timeout waiting for each reply. If the timeout is hit, sets
|
||||||
|
self._result_message_timeout with a ResultMessageTimeout (an exception
|
||||||
|
class). Takes an array of dicts, each of which should look like this:
|
||||||
|
|
||||||
|
{
|
||||||
|
"info": "human readable description",
|
||||||
|
"chrome_msg": { ... }, # message to send to chrome, as a dict
|
||||||
|
"timeout": 30, # timeout in seconds
|
||||||
|
"callback": my_callback, # takes one arg, the result message
|
||||||
|
}
|
||||||
|
|
||||||
|
The code is rather convoluted because of the asynchronous nature of the
|
||||||
|
whole thing. See how it's used in _start_postbehavior_chain.
|
||||||
|
"""
|
||||||
timer = None
|
timer = None
|
||||||
|
|
||||||
def callback(message):
|
def callback(message):
|
||||||
self.logger.info(
|
|
||||||
"timer=%s chain[0]['callback']=%s len(chain[1:])=%s",
|
|
||||||
timer, chain[0]["callback"], len(chain[1:]))
|
|
||||||
if timer:
|
if timer:
|
||||||
timer.cancel()
|
timer.cancel()
|
||||||
if message["id"] in self._waiting_on_result_messages:
|
if message["id"] in self._waiting_on_result_messages:
|
||||||
|
|
2
setup.py
2
setup.py
|
@ -21,7 +21,7 @@ import setuptools
|
||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name='brozzler',
|
name='brozzler',
|
||||||
version='1.1b4.dev62',
|
version='1.1b4.dev63',
|
||||||
description='Distributed web crawling with browsers',
|
description='Distributed web crawling with browsers',
|
||||||
url='https://github.com/internetarchive/brozzler',
|
url='https://github.com/internetarchive/brozzler',
|
||||||
author='Noah Levitt',
|
author='Noah Levitt',
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue