mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-24 08:39:59 -05:00
tweak thread names
This commit is contained in:
parent
ac3a71742d
commit
c902a70450
@ -1,21 +1,21 @@
|
|||||||
#
|
"""
|
||||||
# brozzler/__init__.py - __init__.py for brozzler package, contains some common
|
brozzler/__init__.py - __init__.py for brozzler package, contains some common
|
||||||
# code
|
code
|
||||||
#
|
|
||||||
# Copyright (C) 2014-2016 Internet Archive
|
Copyright (C) 2014-2016 Internet Archive
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
# you may not use this file except in compliance with the License.
|
you may not use this file except in compliance with the License.
|
||||||
# You may obtain a copy of the License at
|
You may obtain a copy of the License at
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
Unless required by applicable law or agreed to in writing, software
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and
|
See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
limitations under the License.
|
||||||
#
|
"""
|
||||||
|
|
||||||
import json as _json
|
import json as _json
|
||||||
import logging as _logging
|
import logging as _logging
|
||||||
|
@ -36,6 +36,7 @@ import re
|
|||||||
import base64
|
import base64
|
||||||
import psutil
|
import psutil
|
||||||
import signal
|
import signal
|
||||||
|
import string
|
||||||
|
|
||||||
__all__ = ["BrowserPool", "Browser"]
|
__all__ = ["BrowserPool", "Browser"]
|
||||||
|
|
||||||
@ -217,12 +218,15 @@ class Browser:
|
|||||||
self._abort_browse_page = False
|
self._abort_browse_page = False
|
||||||
self._has_screenshot = False
|
self._has_screenshot = False
|
||||||
|
|
||||||
self._websock = websocket.WebSocketApp(self._websocket_url,
|
self._websock = websocket.WebSocketApp(
|
||||||
on_open=self._visit_page, on_message=self._wrap_handle_message)
|
self._websocket_url, on_open=self._visit_page,
|
||||||
|
on_message=self._wrap_handle_message)
|
||||||
|
|
||||||
threadName = "WebsockThread{}-{}".format(self.chrome_port,
|
threadName = "WebsockThread:%s-%s" % (self.chrome_port, ''.join(
|
||||||
''.join((random.choice('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789') for _ in range(6))))
|
random.choice(string.ascii_letters) for _ in range(6)))
|
||||||
websock_thread = threading.Thread(target=self._websock.run_forever, name=threadName, kwargs={'ping_timeout':0.5})
|
websock_thread = threading.Thread(
|
||||||
|
target=self._websock.run_forever, name=threadName,
|
||||||
|
kwargs={'ping_timeout':0.5})
|
||||||
websock_thread.start()
|
websock_thread.start()
|
||||||
self._start = time.time()
|
self._start = time.time()
|
||||||
aborted = False
|
aborted = False
|
||||||
@ -464,7 +468,7 @@ compileOutlinks(window).join(' ');
|
|||||||
self.on_screenshot(base64.b64decode(message["result"]["data"]))
|
self.on_screenshot(base64.b64decode(message["result"]["data"]))
|
||||||
self._waiting_on_screenshot_msg_id = None
|
self._waiting_on_screenshot_msg_id = None
|
||||||
self._has_screenshot = True
|
self._has_screenshot = True
|
||||||
self.logger.info("got screenshot, moving on to getting outlinks url={}".format(self.url))
|
self.logger.info("got screenshot, moving on to getting outlinks")
|
||||||
elif message["id"] == self._waiting_on_scroll_to_top_msg_id:
|
elif message["id"] == self._waiting_on_scroll_to_top_msg_id:
|
||||||
self._waiting_on_scroll_to_top_msg_id = None
|
self._waiting_on_scroll_to_top_msg_id = None
|
||||||
self._waiting_on_scroll_to_top_start = None
|
self._waiting_on_scroll_to_top_start = None
|
||||||
@ -673,4 +677,3 @@ class Chrome:
|
|||||||
finally:
|
finally:
|
||||||
self._out_reader_thread.join()
|
self._out_reader_thread.join()
|
||||||
self.chrome_process = None
|
self.chrome_process = None
|
||||||
|
|
||||||
|
@ -376,8 +376,7 @@ class BrozzlerWorker:
|
|||||||
th = threading.Thread(
|
th = threading.Thread(
|
||||||
target=lambda: self._brozzle_site(
|
target=lambda: self._brozzle_site(
|
||||||
browser, site),
|
browser, site),
|
||||||
name="BrowsingThread:{}-{}".format(
|
name="BrozzlingThread:%s" % site.seed)
|
||||||
browser.chrome_port, site.seed))
|
|
||||||
th.start()
|
th.start()
|
||||||
self._browsing_threads.add(th)
|
self._browsing_threads.add(th)
|
||||||
except:
|
except:
|
||||||
|
3
setup.py
3
setup.py
@ -21,7 +21,7 @@ import setuptools
|
|||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name='brozzler',
|
name='brozzler',
|
||||||
version='1.1b3.dev54',
|
version='1.1b3.dev55',
|
||||||
description='Distributed web crawling with browsers',
|
description='Distributed web crawling with browsers',
|
||||||
url='https://github.com/internetarchive/brozzler',
|
url='https://github.com/internetarchive/brozzler',
|
||||||
author='Noah Levitt',
|
author='Noah Levitt',
|
||||||
@ -66,3 +66,4 @@ setuptools.setup(
|
|||||||
'Topic :: Internet :: WWW/HTTP',
|
'Topic :: Internet :: WWW/HTTP',
|
||||||
'Topic :: System :: Archiving',
|
'Topic :: System :: Archiving',
|
||||||
])
|
])
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user