mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-24 08:39:59 -05:00
move _find_available_ports to chrome.py, changing the way it works so that browser:9200 doesn't get stuck at 9201 forever, which pushes 9201 to 9202 etc, and add a unit test
This commit is contained in:
parent
74009852d6
commit
ce03381b92
@ -30,7 +30,6 @@ from brozzler.chrome import Chrome
|
||||
from brozzler.behaviors import Behavior
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
import base64
|
||||
import psutil
|
||||
import sqlite3
|
||||
import datetime
|
||||
|
||||
@ -103,7 +102,9 @@ class Browser:
|
||||
|
||||
HARD_TIMEOUT_SECONDS = 20 * 60
|
||||
|
||||
def __init__(self, chrome_port=9222, chrome_exe='chromium-browser', proxy=None, ignore_cert_errors=False):
|
||||
def __init__(
|
||||
self, chrome_port=9222, chrome_exe='chromium-browser', proxy=None,
|
||||
ignore_cert_errors=False):
|
||||
self.command_id = itertools.count(1)
|
||||
self.chrome_port = chrome_port
|
||||
self.chrome_exe = chrome_exe
|
||||
@ -130,7 +131,6 @@ class Browser:
|
||||
def start(self, proxy=None, cookie_db=None):
|
||||
if not self._chrome_instance:
|
||||
# these can raise exceptions
|
||||
self.chrome_port = self._find_available_port()
|
||||
self._work_dir = tempfile.TemporaryDirectory()
|
||||
if cookie_db is not None:
|
||||
cookie_dir = os.path.join(
|
||||
@ -199,23 +199,6 @@ class Browser:
|
||||
cookie_location, exc_info=True)
|
||||
return cookie_db
|
||||
|
||||
def _find_available_port(self):
|
||||
port_available = False
|
||||
port = self.chrome_port
|
||||
|
||||
try:
|
||||
conns = psutil.net_connections(kind="tcp")
|
||||
except psutil.AccessDenied:
|
||||
return port
|
||||
|
||||
for p in range(port, 65535):
|
||||
if any(connection.laddr[1] == p for connection in conns):
|
||||
self.logger.warn("port %s already open, will try %s", p, p+1)
|
||||
else:
|
||||
port = p
|
||||
break
|
||||
return port
|
||||
|
||||
def is_running(self):
|
||||
return bool(self._websocket_url)
|
||||
|
||||
|
@ -29,6 +29,7 @@ import signal
|
||||
import sqlite3
|
||||
import datetime
|
||||
import json
|
||||
import psutil
|
||||
|
||||
class Chrome:
|
||||
logger = logging.getLogger(__module__ + '.' + __qualname__)
|
||||
@ -53,6 +54,24 @@ class Chrome:
|
||||
def __exit__(self, *args):
|
||||
self.stop()
|
||||
|
||||
def _find_available_port(self, default_port=9200):
|
||||
try:
|
||||
conns = psutil.net_connections(kind='tcp')
|
||||
except psutil.AccessDenied:
|
||||
return default_port
|
||||
|
||||
if any(conn.laddr[1] == default_port for conn in conns):
|
||||
return default_port
|
||||
|
||||
for p in range(9999,8999,-1):
|
||||
if not any(conn.laddr[1] == p for conn in conns):
|
||||
self.logger.warn(
|
||||
'port %s already in use, using %s instead',
|
||||
default_port, p)
|
||||
return p
|
||||
|
||||
return default_port
|
||||
|
||||
def start(self):
|
||||
'''
|
||||
Returns websocket url to chrome window with about:blank loaded.
|
||||
@ -60,6 +79,7 @@ class Chrome:
|
||||
timeout_sec = 600
|
||||
new_env = os.environ.copy()
|
||||
new_env['HOME'] = self.user_home_dir
|
||||
self.port = self._find_available_port(self.port)
|
||||
chrome_args = [
|
||||
self.executable, '--use-mock-keychain', # mac thing
|
||||
'--user-data-dir=%s' % self.user_data_dir,
|
||||
|
2
setup.py
2
setup.py
@ -32,7 +32,7 @@ def find_package_data(package):
|
||||
|
||||
setuptools.setup(
|
||||
name='brozzler',
|
||||
version='1.1b8.dev131',
|
||||
version='1.1b8.dev132',
|
||||
description='Distributed web crawling with browsers',
|
||||
url='https://github.com/internetarchive/brozzler',
|
||||
author='Noah Levitt',
|
||||
|
@ -22,6 +22,9 @@ import http.server
|
||||
import threading
|
||||
import os
|
||||
import brozzler
|
||||
import brozzler.chrome
|
||||
import socket
|
||||
import logging
|
||||
|
||||
@pytest.fixture(scope='module')
|
||||
def httpd(request):
|
||||
@ -52,3 +55,18 @@ def test_robots(httpd):
|
||||
site = brozzler.Site(seed=url, user_agent='im/a bAdBOt/uh huh')
|
||||
assert not brozzler.is_permitted_by_robots(site, url)
|
||||
|
||||
def test_find_available_port():
|
||||
try:
|
||||
psutil.net_connections(kind='tcp')
|
||||
except psutil.AccessDenied:
|
||||
logging.warn(
|
||||
'skipping _find_available_port() test because '
|
||||
'psutil.net_connections(kind="tcp") raised AccessDenied')
|
||||
return
|
||||
assert brozzler.chrome.Chrome._find_available_port(None, 9800) == 9800
|
||||
sock = socket.socket()
|
||||
sock.bind(('localhost', 9800))
|
||||
assert brozzler.chrome.Chrome._find_available_port(None, 9800) == 9999
|
||||
sock.close()
|
||||
assert brozzler.chrome.Chrome._find_available_port(None, 9800) == 9800
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user