move _find_available_ports to chrome.py, changing the way it works so that browser:9200 doesn't get stuck at 9201 forever, which pushes 9201 to 9202 etc, and add a unit test

This commit is contained in:
Noah Levitt 2016-12-06 17:12:20 -08:00
parent 74009852d6
commit ce03381b92
4 changed files with 42 additions and 21 deletions

View File

@ -30,7 +30,6 @@ from brozzler.chrome import Chrome
from brozzler.behaviors import Behavior
from requests.structures import CaseInsensitiveDict
import base64
import psutil
import sqlite3
import datetime
@ -103,7 +102,9 @@ class Browser:
HARD_TIMEOUT_SECONDS = 20 * 60
def __init__(self, chrome_port=9222, chrome_exe='chromium-browser', proxy=None, ignore_cert_errors=False):
def __init__(
self, chrome_port=9222, chrome_exe='chromium-browser', proxy=None,
ignore_cert_errors=False):
self.command_id = itertools.count(1)
self.chrome_port = chrome_port
self.chrome_exe = chrome_exe
@ -130,7 +131,6 @@ class Browser:
def start(self, proxy=None, cookie_db=None):
if not self._chrome_instance:
# these can raise exceptions
self.chrome_port = self._find_available_port()
self._work_dir = tempfile.TemporaryDirectory()
if cookie_db is not None:
cookie_dir = os.path.join(
@ -199,23 +199,6 @@ class Browser:
cookie_location, exc_info=True)
return cookie_db
def _find_available_port(self):
port_available = False
port = self.chrome_port
try:
conns = psutil.net_connections(kind="tcp")
except psutil.AccessDenied:
return port
for p in range(port, 65535):
if any(connection.laddr[1] == p for connection in conns):
self.logger.warn("port %s already open, will try %s", p, p+1)
else:
port = p
break
return port
def is_running(self):
return bool(self._websocket_url)

View File

@ -29,6 +29,7 @@ import signal
import sqlite3
import datetime
import json
import psutil
class Chrome:
logger = logging.getLogger(__module__ + '.' + __qualname__)
@ -53,6 +54,24 @@ class Chrome:
def __exit__(self, *args):
self.stop()
def _find_available_port(self, default_port=9200):
try:
conns = psutil.net_connections(kind='tcp')
except psutil.AccessDenied:
return default_port
if any(conn.laddr[1] == default_port for conn in conns):
return default_port
for p in range(9999,8999,-1):
if not any(conn.laddr[1] == p for conn in conns):
self.logger.warn(
'port %s already in use, using %s instead',
default_port, p)
return p
return default_port
def start(self):
'''
Returns websocket url to chrome window with about:blank loaded.
@ -60,6 +79,7 @@ class Chrome:
timeout_sec = 600
new_env = os.environ.copy()
new_env['HOME'] = self.user_home_dir
self.port = self._find_available_port(self.port)
chrome_args = [
self.executable, '--use-mock-keychain', # mac thing
'--user-data-dir=%s' % self.user_data_dir,

View File

@ -32,7 +32,7 @@ def find_package_data(package):
setuptools.setup(
name='brozzler',
version='1.1b8.dev131',
version='1.1b8.dev132',
description='Distributed web crawling with browsers',
url='https://github.com/internetarchive/brozzler',
author='Noah Levitt',

View File

@ -22,6 +22,9 @@ import http.server
import threading
import os
import brozzler
import brozzler.chrome
import socket
import logging
@pytest.fixture(scope='module')
def httpd(request):
@ -52,3 +55,18 @@ def test_robots(httpd):
site = brozzler.Site(seed=url, user_agent='im/a bAdBOt/uh huh')
assert not brozzler.is_permitted_by_robots(site, url)
def test_find_available_port():
try:
psutil.net_connections(kind='tcp')
except psutil.AccessDenied:
logging.warn(
'skipping _find_available_port() test because '
'psutil.net_connections(kind="tcp") raised AccessDenied')
return
assert brozzler.chrome.Chrome._find_available_port(None, 9800) == 9800
sock = socket.socket()
sock.bind(('localhost', 9800))
assert brozzler.chrome.Chrome._find_available_port(None, 9800) == 9999
sock.close()
assert brozzler.chrome.Chrome._find_available_port(None, 9800) == 9800