diff --git a/brozzler/__init__.py b/brozzler/__init__.py index 068b904..f305015 100644 --- a/brozzler/__init__.py +++ b/brozzler/__init__.py @@ -18,9 +18,10 @@ limitations under the License. """ import logging -import structlog from importlib.metadata import version as _version +import structlog + __version__ = _version("brozzler") @@ -106,7 +107,10 @@ def behaviors(behaviors_dir=None): :param behaviors_dir: Directory containing `behaviors.yaml` and `js-templates/`. Defaults to brozzler dir. """ - import os, yaml, string + import os + import string + + import yaml global _behaviors if _behaviors is None: @@ -121,7 +125,8 @@ def behavior_script(url, template_parameters=None, behaviors_dir=None): """ Returns the javascript behavior string populated with template_parameters. """ - import re, json + import json + import re logger = structlog.get_logger(logger_name=__name__) @@ -245,7 +250,11 @@ def thread_raise(thread, exctype): TypeError if `exctype` is not a class ValueError, SystemError in case of unexpected problems """ - import ctypes, inspect, threading, structlog + import ctypes + import inspect + import threading + + import structlog logger = structlog.get_logger(exctype=exctype, thread=thread) @@ -297,7 +306,10 @@ _jinja2_env = None def jinja2_environment(behaviors_dir=None): global _jinja2_env if not _jinja2_env: - import os, jinja2, json + import json + import os + + import jinja2 if behaviors_dir: _loader = jinja2.FileSystemLoader( @@ -365,7 +377,8 @@ def _suggest_default_chrome_exe_mac(): def suggest_default_chrome_exe(): - import shutil, sys + import shutil + import sys # First ask mdfind, which lets us find it in non-default paths if sys.platform == "darwin": @@ -395,8 +408,8 @@ import datetime EPOCH_UTC = datetime.datetime.fromtimestamp(0.0, tz=datetime.timezone.utc) -from brozzler.robots import is_permitted_by_robots from brozzler.browser import Browser, BrowserPool, BrowsingException +from brozzler.robots import is_permitted_by_robots __all__ = [ "is_permitted_by_robots", @@ -412,20 +425,21 @@ __all__ = [ try: import doublethink - # All of these imports use doublethink for real and are unsafe - # to do if doublethink is unavailable. - from brozzler.worker import BrozzlerWorker from brozzler.frontier import RethinkDbFrontier from brozzler.model import ( - new_job, - new_job_file, - new_site, + InvalidJobConf, Job, Page, Site, - InvalidJobConf, + new_job, + new_job_file, + new_site, ) + # All of these imports use doublethink for real and are unsafe + # to do if doublethink is unavailable. + from brozzler.worker import BrozzlerWorker + __all__.extend( [ "Page", diff --git a/brozzler/browser.py b/brozzler/browser.py index a5fff4e..6b872e4 100644 --- a/brozzler/browser.py +++ b/brozzler/browser.py @@ -16,23 +16,23 @@ See the License for the specific language governing permissions and limitations under the License. """ -import logging -import time -import brozzler +import base64 +import datetime import itertools import json -import websocket -import time -import threading -import brozzler -from requests.structures import CaseInsensitiveDict -import datetime -import base64 -from ipaddress import AddressValueError -from brozzler.chrome import Chrome +import logging import socket +import threading +import time +from ipaddress import AddressValueError + import structlog import urlcanon +import websocket +from requests.structures import CaseInsensitiveDict + +import brozzler +from brozzler.chrome import Chrome class BrowsingException(Exception): diff --git a/brozzler/chrome.py b/brozzler/chrome.py index 1b630cc..7dd6066 100644 --- a/brozzler/chrome.py +++ b/brozzler/chrome.py @@ -16,20 +16,22 @@ See the License for the specific language governing permissions and limitations under the License. """ -import urllib.request -import time -import threading -import subprocess +import json import os -import brozzler -import select import re +import select import signal import sqlite3 -import structlog -import json -import tempfile +import subprocess import sys +import tempfile +import threading +import time +import urllib.request + +import structlog + +import brozzler def check_version(chrome_exe): diff --git a/brozzler/cli.py b/brozzler/cli.py index 27e7e81..1fe5318 100755 --- a/brozzler/cli.py +++ b/brozzler/cli.py @@ -18,27 +18,28 @@ limitations under the License. """ import argparse -import brozzler -import brozzler.worker +import base64 import datetime import json import logging import os import re -import requests -import doublethink import signal import string -import structlog import sys import threading import time import traceback import warnings -import yaml -import base64 -import rethinkdb as rdb +import doublethink +import requests +import rethinkdb as rdb +import structlog +import yaml + +import brozzler +import brozzler.worker from brozzler import suggest_default_chrome_exe r = rdb.RethinkDB() diff --git a/brozzler/dashboard/__init__.py b/brozzler/dashboard/__init__.py index e623a01..15a836c 100644 --- a/brozzler/dashboard/__init__.py +++ b/brozzler/dashboard/__init__.py @@ -17,9 +17,10 @@ See the License for the specific language governing permissions and limitations under the License. """ -import structlog import sys +import structlog + logger = structlog.get_logger(logger_name=__name__) try: @@ -32,13 +33,14 @@ except ImportError as e: e, ) sys.exit(1) -import doublethink +import base64 +import importlib import json import os -import importlib + +import doublethink import rethinkdb as rdb import yaml -import base64 r = rdb.RethinkDB() @@ -284,8 +286,8 @@ def root(path): try: import gunicorn.app.base - from gunicorn.six import iteritems import gunicorn.glogging + from gunicorn.six import iteritems class BypassGunicornLogging(gunicorn.glogging.Logger): def setup(self, cfg): @@ -327,6 +329,7 @@ except ImportError: def main(argv=None): import argparse + import brozzler.cli argv = argv or sys.argv diff --git a/brozzler/easy.py b/brozzler/easy.py index 05f01a0..fc213f8 100644 --- a/brozzler/easy.py +++ b/brozzler/easy.py @@ -18,19 +18,22 @@ See the License for the specific language governing permissions and limitations under the License. """ -import structlog import sys +import structlog + logger = structlog.get_logger(logger_name=__name__) try: + import wsgiref.handlers + import wsgiref.simple_server + + import pywb import warcprox import warcprox.main - import pywb - import brozzler.pywb - import wsgiref.simple_server - import wsgiref.handlers + import brozzler.dashboard + import brozzler.pywb except ImportError as e: logger.critical( '%s: %s\n\nYou might need to run "pip install ' @@ -40,16 +43,18 @@ except ImportError as e: ) sys.exit(1) import argparse -import brozzler -import brozzler.cli import os -import socket import signal +import socket +import socketserver import threading import time -import doublethink import traceback -import socketserver + +import doublethink + +import brozzler +import brozzler.cli def _build_arg_parser(argv=None): diff --git a/brozzler/frontier.py b/brozzler/frontier.py index 0ef3924..b37d5d1 100644 --- a/brozzler/frontier.py +++ b/brozzler/frontier.py @@ -16,15 +16,17 @@ See the License for the specific language governing permissions and limitations under the License. """ -import brozzler +import datetime import random import time -import datetime + +import doublethink import rethinkdb as rdb import structlog -import doublethink import urlcanon +import brozzler + r = rdb.RethinkDB() diff --git a/brozzler/model.py b/brozzler/model.py index 1099c3b..49a9905 100644 --- a/brozzler/model.py +++ b/brozzler/model.py @@ -17,25 +17,27 @@ See the License for the specific language governing permissions and limitations under the License. """ -import brozzler import base64 -import cerberus import copy import datetime -import doublethink import hashlib import json import os import re -import structlog import time -import urlcanon import urllib import uuid -import yaml import zlib from typing import Optional +import cerberus +import doublethink +import structlog +import urlcanon +import yaml + +import brozzler + logger = structlog.get_logger(logger_name=__name__) diff --git a/brozzler/pywb.py b/brozzler/pywb.py index 5d714d0..b4b6280 100644 --- a/brozzler/pywb.py +++ b/brozzler/pywb.py @@ -19,6 +19,7 @@ limitations under the License. """ import sys + import structlog logger = structlog.get_logger(logger_name=__name__) @@ -28,9 +29,9 @@ try: import pywb.cdx.cdxdomainspecific import pywb.cdx.cdxobject import pywb.cdx.cdxserver - import pywb.webapp.query_handler import pywb.framework.basehandlers import pywb.rewrite.wburl + import pywb.webapp.query_handler except ImportError as e: logger.critical( '%s: %s\n\nYou might need to run "pip install ' @@ -39,12 +40,14 @@ except ImportError as e: e, ) sys.exit(1) +import argparse +import json + import doublethink import rethinkdb as rdb import urlcanon -import json + import brozzler -import argparse r = rdb.RethinkDB() @@ -219,13 +222,17 @@ def support_in_progress_warcs(): class SomeWbUrl(pywb.rewrite.wburl.WbUrl): def __init__(self, orig_url): import re + import six - - from six.moves.urllib.parse import urlsplit, urlunsplit - from six.moves.urllib.parse import quote_plus, quote, unquote_plus - - from pywb.utils.loaders import to_native_str from pywb.rewrite.wburl import WbUrl + from pywb.utils.loaders import to_native_str + from six.moves.urllib.parse import ( + quote, + quote_plus, + unquote_plus, + urlsplit, + urlunsplit, + ) pywb.rewrite.wburl.BaseWbUrl.__init__(self) @@ -372,8 +379,8 @@ def monkey_patch_fuzzy_query(): # as such def _calc_search_range(url, match_type, surt_ordered=True, url_canon=None): # imports added here for brozzler - from pywb.utils.canonicalize import UrlCanonicalizer, UrlCanonicalizeException import six.moves.urllib.parse as urlparse + from pywb.utils.canonicalize import UrlCanonicalizeException, UrlCanonicalizer def inc_last_char(x): return x[0:-1] + chr(ord(x[-1]) + 1) diff --git a/brozzler/robots.py b/brozzler/robots.py index 2d3751e..7f249dd 100644 --- a/brozzler/robots.py +++ b/brozzler/robots.py @@ -23,13 +23,15 @@ limitations under the License. """ import json -import brozzler + import reppy import reppy.cache import reppy.parser import requests import structlog +import brozzler + __all__ = ["is_permitted_by_robots"] # monkey-patch reppy to do substring user-agent matching, see top of file diff --git a/brozzler/worker.py b/brozzler/worker.py index 868ded4..3a58bfa 100644 --- a/brozzler/worker.py +++ b/brozzler/worker.py @@ -18,26 +18,29 @@ See the License for the specific language governing permissions and limitations under the License. """ -import brozzler -import brozzler.browser import datetime +import io +import json +import random +import socket +import tempfile import threading import time import urllib.request -import json -import PIL.Image -import io -import socket -import random -import requests -import structlog -import urllib3 -from urllib3.exceptions import TimeoutError, ProxyError + import doublethink -import tempfile -import urlcanon -from requests.structures import CaseInsensitiveDict +import PIL.Image +import requests import rethinkdb as rdb +import structlog +import urlcanon +import urllib3 +from requests.structures import CaseInsensitiveDict +from urllib3.exceptions import ProxyError, TimeoutError + +import brozzler +import brozzler.browser + from . import metrics r = rdb.RethinkDB() diff --git a/brozzler/ydl.py b/brozzler/ydl.py index ae756a0..f81d551 100644 --- a/brozzler/ydl.py +++ b/brozzler/ydl.py @@ -16,21 +16,24 @@ See the License for the specific language governing permissions and limitations under the License. """ -import yt_dlp -from yt_dlp.utils import match_filter_func, ExtractorError -import brozzler -import urllib.request -import tempfile -import urlcanon -import os -import json -import doublethink import datetime -from . import metrics +import json +import os import random -import structlog +import tempfile import threading import time +import urllib.request + +import doublethink +import structlog +import urlcanon +import yt_dlp +from yt_dlp.utils import ExtractorError, match_filter_func + +import brozzler + +from . import metrics thread_local = threading.local() diff --git a/setup.py b/setup.py index 8751e08..46bd957 100644 --- a/setup.py +++ b/setup.py @@ -17,9 +17,10 @@ See the License for the specific language governing permissions and limitations under the License. """ -import setuptools import os +import setuptools + def find_package_data(package): pkg_data = [] diff --git a/tests/test_brozzling.py b/tests/test_brozzling.py index c187c42..03124a4 100755 --- a/tests/test_brozzling.py +++ b/tests/test_brozzling.py @@ -17,17 +17,18 @@ See the License for the specific language governing permissions and limitations under the License. """ -import pytest -import brozzler +import argparse +import http.server +import json import logging import os -import http.server -import threading -import argparse -import urllib -import json -import threading import socket +import threading +import urllib + +import pytest + +import brozzler arg_parser = argparse.ArgumentParser() brozzler.cli.add_common_options(arg_parser) diff --git a/tests/test_cli.py b/tests/test_cli.py index 076a466..3c8f0c5 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -17,11 +17,13 @@ See the License for the specific language governing permissions and limitations under the License. """ -import brozzler.cli import importlib.metadata -import pytest import subprocess + import doublethink +import pytest + +import brozzler.cli def console_scripts(): diff --git a/tests/test_cluster.py b/tests/test_cluster.py index d3ece4b..5949fd3 100644 --- a/tests/test_cluster.py +++ b/tests/test_cluster.py @@ -18,23 +18,23 @@ See the License for the specific language governing permissions and limitations under the License. """ -import pytest +import datetime import http.server -import threading -import urllib.request import os import socket -import doublethink -import time -import brozzler -import datetime -import requests import subprocess -import http.server -import structlog import sys +import threading +import time +import urllib.request + +import doublethink +import pytest +import requests +import structlog import warcprox +import brozzler logger = structlog.get_logger(logger_name=__name__) diff --git a/tests/test_units.py b/tests/test_units.py index 096f632..33979bc 100644 --- a/tests/test_units.py +++ b/tests/test_units.py @@ -17,23 +17,24 @@ See the License for the specific language governing permissions and limitations under the License. """ -import pytest +import datetime import http.server -import threading import os +import socket +import sys +import tempfile +import threading +import time +import uuid +from unittest import mock + +import pytest +import requests +import yaml + import brozzler import brozzler.chrome import brozzler.ydl -import yaml -import datetime -import requests -import tempfile -import uuid -import socket -import time -import sys -import threading -from unittest import mock @pytest.fixture(scope="module") diff --git a/vagrant/vagrant-brozzler-new-job.py b/vagrant/vagrant-brozzler-new-job.py index b653b2b..44854d3 100755 --- a/vagrant/vagrant-brozzler-new-job.py +++ b/vagrant/vagrant-brozzler-new-job.py @@ -22,10 +22,10 @@ See the License for the specific language governing permissions and limitations under the License. """ -import sys -import os import argparse +import os import subprocess +import sys def main(argv=[]): diff --git a/vagrant/vagrant-brozzler-new-site.py b/vagrant/vagrant-brozzler-new-site.py index 5e7503a..b76ff84 100755 --- a/vagrant/vagrant-brozzler-new-site.py +++ b/vagrant/vagrant-brozzler-new-site.py @@ -25,10 +25,10 @@ See the License for the specific language governing permissions and limitations under the License. """ -import sys -import os import argparse +import os import subprocess +import sys try: from shlex import quote