mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-04-18 23:06:01 -04:00
ruff import sorting pass + adding uv.lock (#342)
* ruff import sorting pass * add uv.lock * move comment back to its proper place
This commit is contained in:
parent
21102ca95c
commit
6f011cc6c8
@ -18,9 +18,10 @@ limitations under the License.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import structlog
|
||||
from importlib.metadata import version as _version
|
||||
|
||||
import structlog
|
||||
|
||||
__version__ = _version("brozzler")
|
||||
|
||||
|
||||
@ -106,7 +107,10 @@ def behaviors(behaviors_dir=None):
|
||||
:param behaviors_dir: Directory containing `behaviors.yaml` and
|
||||
`js-templates/`. Defaults to brozzler dir.
|
||||
"""
|
||||
import os, yaml, string
|
||||
import os
|
||||
import string
|
||||
|
||||
import yaml
|
||||
|
||||
global _behaviors
|
||||
if _behaviors is None:
|
||||
@ -121,7 +125,8 @@ def behavior_script(url, template_parameters=None, behaviors_dir=None):
|
||||
"""
|
||||
Returns the javascript behavior string populated with template_parameters.
|
||||
"""
|
||||
import re, json
|
||||
import json
|
||||
import re
|
||||
|
||||
logger = structlog.get_logger(logger_name=__name__)
|
||||
|
||||
@ -245,7 +250,11 @@ def thread_raise(thread, exctype):
|
||||
TypeError if `exctype` is not a class
|
||||
ValueError, SystemError in case of unexpected problems
|
||||
"""
|
||||
import ctypes, inspect, threading, structlog
|
||||
import ctypes
|
||||
import inspect
|
||||
import threading
|
||||
|
||||
import structlog
|
||||
|
||||
logger = structlog.get_logger(exctype=exctype, thread=thread)
|
||||
|
||||
@ -297,7 +306,10 @@ _jinja2_env = None
|
||||
def jinja2_environment(behaviors_dir=None):
|
||||
global _jinja2_env
|
||||
if not _jinja2_env:
|
||||
import os, jinja2, json
|
||||
import json
|
||||
import os
|
||||
|
||||
import jinja2
|
||||
|
||||
if behaviors_dir:
|
||||
_loader = jinja2.FileSystemLoader(
|
||||
@ -365,7 +377,8 @@ def _suggest_default_chrome_exe_mac():
|
||||
|
||||
|
||||
def suggest_default_chrome_exe():
|
||||
import shutil, sys
|
||||
import shutil
|
||||
import sys
|
||||
|
||||
# First ask mdfind, which lets us find it in non-default paths
|
||||
if sys.platform == "darwin":
|
||||
@ -395,8 +408,8 @@ import datetime
|
||||
EPOCH_UTC = datetime.datetime.fromtimestamp(0.0, tz=datetime.timezone.utc)
|
||||
|
||||
|
||||
from brozzler.robots import is_permitted_by_robots
|
||||
from brozzler.browser import Browser, BrowserPool, BrowsingException
|
||||
from brozzler.robots import is_permitted_by_robots
|
||||
|
||||
__all__ = [
|
||||
"is_permitted_by_robots",
|
||||
@ -414,17 +427,17 @@ try:
|
||||
|
||||
# All of these imports use doublethink for real and are unsafe
|
||||
# to do if doublethink is unavailable.
|
||||
from brozzler.worker import BrozzlerWorker
|
||||
from brozzler.frontier import RethinkDbFrontier
|
||||
from brozzler.model import (
|
||||
new_job,
|
||||
new_job_file,
|
||||
new_site,
|
||||
InvalidJobConf,
|
||||
Job,
|
||||
Page,
|
||||
Site,
|
||||
InvalidJobConf,
|
||||
new_job,
|
||||
new_job_file,
|
||||
new_site,
|
||||
)
|
||||
from brozzler.worker import BrozzlerWorker
|
||||
|
||||
__all__.extend(
|
||||
[
|
||||
|
@ -16,23 +16,23 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
import brozzler
|
||||
import base64
|
||||
import datetime
|
||||
import itertools
|
||||
import json
|
||||
import websocket
|
||||
import time
|
||||
import threading
|
||||
import brozzler
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
import datetime
|
||||
import base64
|
||||
from ipaddress import AddressValueError
|
||||
from brozzler.chrome import Chrome
|
||||
import logging
|
||||
import socket
|
||||
import threading
|
||||
import time
|
||||
from ipaddress import AddressValueError
|
||||
|
||||
import structlog
|
||||
import urlcanon
|
||||
import websocket
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
|
||||
import brozzler
|
||||
from brozzler.chrome import Chrome
|
||||
|
||||
|
||||
class BrowsingException(Exception):
|
||||
|
@ -16,20 +16,22 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
"""
|
||||
|
||||
import urllib.request
|
||||
import time
|
||||
import threading
|
||||
import subprocess
|
||||
import json
|
||||
import os
|
||||
import brozzler
|
||||
import select
|
||||
import re
|
||||
import select
|
||||
import signal
|
||||
import sqlite3
|
||||
import structlog
|
||||
import json
|
||||
import tempfile
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import threading
|
||||
import time
|
||||
import urllib.request
|
||||
|
||||
import structlog
|
||||
|
||||
import brozzler
|
||||
|
||||
|
||||
def check_version(chrome_exe):
|
||||
|
@ -18,27 +18,28 @@ limitations under the License.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import brozzler
|
||||
import brozzler.worker
|
||||
import base64
|
||||
import datetime
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import requests
|
||||
import doublethink
|
||||
import signal
|
||||
import string
|
||||
import structlog
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
import traceback
|
||||
import warnings
|
||||
import yaml
|
||||
import base64
|
||||
import rethinkdb as rdb
|
||||
|
||||
import doublethink
|
||||
import requests
|
||||
import rethinkdb as rdb
|
||||
import structlog
|
||||
import yaml
|
||||
|
||||
import brozzler
|
||||
import brozzler.worker
|
||||
from brozzler import suggest_default_chrome_exe
|
||||
|
||||
r = rdb.RethinkDB()
|
||||
|
@ -17,9 +17,10 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
"""
|
||||
|
||||
import structlog
|
||||
import sys
|
||||
|
||||
import structlog
|
||||
|
||||
logger = structlog.get_logger(logger_name=__name__)
|
||||
|
||||
try:
|
||||
@ -32,13 +33,14 @@ except ImportError as e:
|
||||
e,
|
||||
)
|
||||
sys.exit(1)
|
||||
import doublethink
|
||||
import base64
|
||||
import importlib
|
||||
import json
|
||||
import os
|
||||
import importlib
|
||||
|
||||
import doublethink
|
||||
import rethinkdb as rdb
|
||||
import yaml
|
||||
import base64
|
||||
|
||||
r = rdb.RethinkDB()
|
||||
|
||||
@ -284,8 +286,8 @@ def root(path):
|
||||
|
||||
try:
|
||||
import gunicorn.app.base
|
||||
from gunicorn.six import iteritems
|
||||
import gunicorn.glogging
|
||||
from gunicorn.six import iteritems
|
||||
|
||||
class BypassGunicornLogging(gunicorn.glogging.Logger):
|
||||
def setup(self, cfg):
|
||||
@ -327,6 +329,7 @@ except ImportError:
|
||||
|
||||
def main(argv=None):
|
||||
import argparse
|
||||
|
||||
import brozzler.cli
|
||||
|
||||
argv = argv or sys.argv
|
||||
|
@ -18,19 +18,22 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
"""
|
||||
|
||||
import structlog
|
||||
import sys
|
||||
|
||||
import structlog
|
||||
|
||||
logger = structlog.get_logger(logger_name=__name__)
|
||||
|
||||
try:
|
||||
import wsgiref.handlers
|
||||
import wsgiref.simple_server
|
||||
|
||||
import pywb
|
||||
import warcprox
|
||||
import warcprox.main
|
||||
import pywb
|
||||
import brozzler.pywb
|
||||
import wsgiref.simple_server
|
||||
import wsgiref.handlers
|
||||
|
||||
import brozzler.dashboard
|
||||
import brozzler.pywb
|
||||
except ImportError as e:
|
||||
logger.critical(
|
||||
'%s: %s\n\nYou might need to run "pip install '
|
||||
@ -40,16 +43,18 @@ except ImportError as e:
|
||||
)
|
||||
sys.exit(1)
|
||||
import argparse
|
||||
import brozzler
|
||||
import brozzler.cli
|
||||
import os
|
||||
import socket
|
||||
import signal
|
||||
import socket
|
||||
import socketserver
|
||||
import threading
|
||||
import time
|
||||
import doublethink
|
||||
import traceback
|
||||
import socketserver
|
||||
|
||||
import doublethink
|
||||
|
||||
import brozzler
|
||||
import brozzler.cli
|
||||
|
||||
|
||||
def _build_arg_parser(argv=None):
|
||||
|
@ -16,15 +16,17 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
"""
|
||||
|
||||
import brozzler
|
||||
import datetime
|
||||
import random
|
||||
import time
|
||||
import datetime
|
||||
|
||||
import doublethink
|
||||
import rethinkdb as rdb
|
||||
import structlog
|
||||
import doublethink
|
||||
import urlcanon
|
||||
|
||||
import brozzler
|
||||
|
||||
r = rdb.RethinkDB()
|
||||
|
||||
|
||||
|
@ -17,25 +17,27 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
"""
|
||||
|
||||
import brozzler
|
||||
import base64
|
||||
import cerberus
|
||||
import copy
|
||||
import datetime
|
||||
import doublethink
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import structlog
|
||||
import time
|
||||
import urlcanon
|
||||
import urllib
|
||||
import uuid
|
||||
import yaml
|
||||
import zlib
|
||||
from typing import Optional
|
||||
|
||||
import cerberus
|
||||
import doublethink
|
||||
import structlog
|
||||
import urlcanon
|
||||
import yaml
|
||||
|
||||
import brozzler
|
||||
|
||||
logger = structlog.get_logger(logger_name=__name__)
|
||||
|
||||
|
||||
|
@ -19,6 +19,7 @@ limitations under the License.
|
||||
"""
|
||||
|
||||
import sys
|
||||
|
||||
import structlog
|
||||
|
||||
logger = structlog.get_logger(logger_name=__name__)
|
||||
@ -28,9 +29,9 @@ try:
|
||||
import pywb.cdx.cdxdomainspecific
|
||||
import pywb.cdx.cdxobject
|
||||
import pywb.cdx.cdxserver
|
||||
import pywb.webapp.query_handler
|
||||
import pywb.framework.basehandlers
|
||||
import pywb.rewrite.wburl
|
||||
import pywb.webapp.query_handler
|
||||
except ImportError as e:
|
||||
logger.critical(
|
||||
'%s: %s\n\nYou might need to run "pip install '
|
||||
@ -39,12 +40,14 @@ except ImportError as e:
|
||||
e,
|
||||
)
|
||||
sys.exit(1)
|
||||
import argparse
|
||||
import json
|
||||
|
||||
import doublethink
|
||||
import rethinkdb as rdb
|
||||
import urlcanon
|
||||
import json
|
||||
|
||||
import brozzler
|
||||
import argparse
|
||||
|
||||
r = rdb.RethinkDB()
|
||||
|
||||
@ -219,13 +222,17 @@ def support_in_progress_warcs():
|
||||
class SomeWbUrl(pywb.rewrite.wburl.WbUrl):
|
||||
def __init__(self, orig_url):
|
||||
import re
|
||||
|
||||
import six
|
||||
|
||||
from six.moves.urllib.parse import urlsplit, urlunsplit
|
||||
from six.moves.urllib.parse import quote_plus, quote, unquote_plus
|
||||
|
||||
from pywb.utils.loaders import to_native_str
|
||||
from pywb.rewrite.wburl import WbUrl
|
||||
from pywb.utils.loaders import to_native_str
|
||||
from six.moves.urllib.parse import (
|
||||
quote,
|
||||
quote_plus,
|
||||
unquote_plus,
|
||||
urlsplit,
|
||||
urlunsplit,
|
||||
)
|
||||
|
||||
pywb.rewrite.wburl.BaseWbUrl.__init__(self)
|
||||
|
||||
@ -372,8 +379,8 @@ def monkey_patch_fuzzy_query():
|
||||
# as such
|
||||
def _calc_search_range(url, match_type, surt_ordered=True, url_canon=None):
|
||||
# imports added here for brozzler
|
||||
from pywb.utils.canonicalize import UrlCanonicalizer, UrlCanonicalizeException
|
||||
import six.moves.urllib.parse as urlparse
|
||||
from pywb.utils.canonicalize import UrlCanonicalizeException, UrlCanonicalizer
|
||||
|
||||
def inc_last_char(x):
|
||||
return x[0:-1] + chr(ord(x[-1]) + 1)
|
||||
|
@ -23,13 +23,15 @@ limitations under the License.
|
||||
"""
|
||||
|
||||
import json
|
||||
import brozzler
|
||||
|
||||
import reppy
|
||||
import reppy.cache
|
||||
import reppy.parser
|
||||
import requests
|
||||
import structlog
|
||||
|
||||
import brozzler
|
||||
|
||||
__all__ = ["is_permitted_by_robots"]
|
||||
|
||||
# monkey-patch reppy to do substring user-agent matching, see top of file
|
||||
|
@ -18,26 +18,29 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
"""
|
||||
|
||||
import brozzler
|
||||
import brozzler.browser
|
||||
import datetime
|
||||
import io
|
||||
import json
|
||||
import random
|
||||
import socket
|
||||
import tempfile
|
||||
import threading
|
||||
import time
|
||||
import urllib.request
|
||||
import json
|
||||
import PIL.Image
|
||||
import io
|
||||
import socket
|
||||
import random
|
||||
import requests
|
||||
import structlog
|
||||
import urllib3
|
||||
from urllib3.exceptions import TimeoutError, ProxyError
|
||||
|
||||
import doublethink
|
||||
import tempfile
|
||||
import urlcanon
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
import PIL.Image
|
||||
import requests
|
||||
import rethinkdb as rdb
|
||||
import structlog
|
||||
import urlcanon
|
||||
import urllib3
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
from urllib3.exceptions import ProxyError, TimeoutError
|
||||
|
||||
import brozzler
|
||||
import brozzler.browser
|
||||
|
||||
from . import metrics
|
||||
|
||||
r = rdb.RethinkDB()
|
||||
|
@ -16,21 +16,24 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
"""
|
||||
|
||||
import yt_dlp
|
||||
from yt_dlp.utils import match_filter_func, ExtractorError
|
||||
import brozzler
|
||||
import urllib.request
|
||||
import tempfile
|
||||
import urlcanon
|
||||
import os
|
||||
import json
|
||||
import doublethink
|
||||
import datetime
|
||||
from . import metrics
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
import structlog
|
||||
import tempfile
|
||||
import threading
|
||||
import time
|
||||
import urllib.request
|
||||
|
||||
import doublethink
|
||||
import structlog
|
||||
import urlcanon
|
||||
import yt_dlp
|
||||
from yt_dlp.utils import ExtractorError, match_filter_func
|
||||
|
||||
import brozzler
|
||||
|
||||
from . import metrics
|
||||
|
||||
thread_local = threading.local()
|
||||
|
||||
|
3
setup.py
3
setup.py
@ -17,9 +17,10 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
"""
|
||||
|
||||
import setuptools
|
||||
import os
|
||||
|
||||
import setuptools
|
||||
|
||||
|
||||
def find_package_data(package):
|
||||
pkg_data = []
|
||||
|
@ -17,17 +17,18 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import brozzler
|
||||
import argparse
|
||||
import http.server
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import http.server
|
||||
import threading
|
||||
import argparse
|
||||
import urllib
|
||||
import json
|
||||
import threading
|
||||
import socket
|
||||
import threading
|
||||
import urllib
|
||||
|
||||
import pytest
|
||||
|
||||
import brozzler
|
||||
|
||||
arg_parser = argparse.ArgumentParser()
|
||||
brozzler.cli.add_common_options(arg_parser)
|
||||
|
@ -17,11 +17,13 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
"""
|
||||
|
||||
import brozzler.cli
|
||||
import importlib.metadata
|
||||
import pytest
|
||||
import subprocess
|
||||
|
||||
import doublethink
|
||||
import pytest
|
||||
|
||||
import brozzler.cli
|
||||
|
||||
|
||||
def console_scripts():
|
||||
|
@ -18,23 +18,23 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import datetime
|
||||
import http.server
|
||||
import threading
|
||||
import urllib.request
|
||||
import os
|
||||
import socket
|
||||
import doublethink
|
||||
import time
|
||||
import brozzler
|
||||
import datetime
|
||||
import requests
|
||||
import subprocess
|
||||
import http.server
|
||||
import structlog
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
import urllib.request
|
||||
|
||||
import doublethink
|
||||
import pytest
|
||||
import requests
|
||||
import structlog
|
||||
import warcprox
|
||||
|
||||
import brozzler
|
||||
|
||||
logger = structlog.get_logger(logger_name=__name__)
|
||||
|
||||
|
@ -17,23 +17,24 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import datetime
|
||||
import http.server
|
||||
import threading
|
||||
import os
|
||||
import socket
|
||||
import sys
|
||||
import tempfile
|
||||
import threading
|
||||
import time
|
||||
import uuid
|
||||
from unittest import mock
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
import yaml
|
||||
|
||||
import brozzler
|
||||
import brozzler.chrome
|
||||
import brozzler.ydl
|
||||
import yaml
|
||||
import datetime
|
||||
import requests
|
||||
import tempfile
|
||||
import uuid
|
||||
import socket
|
||||
import time
|
||||
import sys
|
||||
import threading
|
||||
from unittest import mock
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
|
@ -22,10 +22,10 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import argparse
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
|
||||
def main(argv=[]):
|
||||
|
@ -25,10 +25,10 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import argparse
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
try:
|
||||
from shlex import quote
|
||||
|
Loading…
x
Reference in New Issue
Block a user