tweak thread names

This commit is contained in:
Noah Levitt 2016-07-19 14:33:57 -05:00
parent ac3a71742d
commit c902a70450
4 changed files with 31 additions and 28 deletions

View File

@ -1,21 +1,21 @@
#
# brozzler/__init__.py - __init__.py for brozzler package, contains some common
# code
#
# Copyright (C) 2014-2016 Internet Archive
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
brozzler/__init__.py - __init__.py for brozzler package, contains some common
code
Copyright (C) 2014-2016 Internet Archive
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import json as _json
import logging as _logging

View File

@ -36,6 +36,7 @@ import re
import base64
import psutil
import signal
import string
__all__ = ["BrowserPool", "Browser"]
@ -217,12 +218,15 @@ class Browser:
self._abort_browse_page = False
self._has_screenshot = False
self._websock = websocket.WebSocketApp(self._websocket_url,
on_open=self._visit_page, on_message=self._wrap_handle_message)
self._websock = websocket.WebSocketApp(
self._websocket_url, on_open=self._visit_page,
on_message=self._wrap_handle_message)
threadName = "WebsockThread{}-{}".format(self.chrome_port,
''.join((random.choice('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789') for _ in range(6))))
websock_thread = threading.Thread(target=self._websock.run_forever, name=threadName, kwargs={'ping_timeout':0.5})
threadName = "WebsockThread:%s-%s" % (self.chrome_port, ''.join(
random.choice(string.ascii_letters) for _ in range(6)))
websock_thread = threading.Thread(
target=self._websock.run_forever, name=threadName,
kwargs={'ping_timeout':0.5})
websock_thread.start()
self._start = time.time()
aborted = False
@ -464,7 +468,7 @@ compileOutlinks(window).join(' ');
self.on_screenshot(base64.b64decode(message["result"]["data"]))
self._waiting_on_screenshot_msg_id = None
self._has_screenshot = True
self.logger.info("got screenshot, moving on to getting outlinks url={}".format(self.url))
self.logger.info("got screenshot, moving on to getting outlinks")
elif message["id"] == self._waiting_on_scroll_to_top_msg_id:
self._waiting_on_scroll_to_top_msg_id = None
self._waiting_on_scroll_to_top_start = None
@ -673,4 +677,3 @@ class Chrome:
finally:
self._out_reader_thread.join()
self.chrome_process = None

View File

@ -376,8 +376,7 @@ class BrozzlerWorker:
th = threading.Thread(
target=lambda: self._brozzle_site(
browser, site),
name="BrowsingThread:{}-{}".format(
browser.chrome_port, site.seed))
name="BrozzlingThread:%s" % site.seed)
th.start()
self._browsing_threads.add(th)
except:

View File

@ -21,7 +21,7 @@ import setuptools
setuptools.setup(
name='brozzler',
version='1.1b3.dev54',
version='1.1b3.dev55',
description='Distributed web crawling with browsers',
url='https://github.com/internetarchive/brozzler',
author='Noah Levitt',
@ -66,3 +66,4 @@ setuptools.setup(
'Topic :: Internet :: WWW/HTTP',
'Topic :: System :: Archiving',
])