diff --git a/.github/workflows/python-formatting.yml b/.github/workflows/python-formatting.yml index afbdff5..1a94232 100644 --- a/.github/workflows/python-formatting.yml +++ b/.github/workflows/python-formatting.yml @@ -22,10 +22,10 @@ jobs: - name: Create virtual environment run: python -m venv venv - - name: Install black + - name: Install ruff run: | ./venv/bin/pip install --upgrade pip - ./venv/bin/pip install black + ./venv/bin/pip install ruff - name: Run formatting check run: make ck-format diff --git a/Makefile b/Makefile index f99dcc9..ae16af0 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ .PHONY: format format: - venv/bin/black -t py35 -t py36 -t py37 -t py38 -t py39 -t py310 -t py311 -t py312 . + venv/bin/ruff format --target-version py37 . .PHONY: ck-format ck-format: - venv/bin/black --check . + venv/bin/ruff format --check --target-version py37 . diff --git a/brozzler/chrome.py b/brozzler/chrome.py index 140ffd7..1b630cc 100644 --- a/brozzler/chrome.py +++ b/brozzler/chrome.py @@ -367,7 +367,7 @@ class Chrome: os.killpg(self.chrome_process.pid, signal.SIGKILL) status = self.chrome_process.wait() pid_logger.warning( - "chrome reaped after killing with " "SIGKILL", + "chrome reaped after killing with SIGKILL", status=status, ) diff --git a/brozzler/cli.py b/brozzler/cli.py index e49c5aa..73c002d 100755 --- a/brozzler/cli.py +++ b/brozzler/cli.py @@ -953,7 +953,7 @@ def brozzler_list_pages(argv=None): "--claimed", dest="claimed", action="store_true", - help=("limit to pages that are currently claimed by a brozzler " "worker"), + help=("limit to pages that are currently claimed by a brozzler worker"), ) add_rethinkdb_options(arg_parser) add_common_options(arg_parser, argv) @@ -1024,22 +1024,21 @@ def brozzler_purge(argv=None): dest="job", metavar="JOB_ID", help=( - "purge crawl state from rethinkdb for a job, including all " - "sites and pages" + "purge crawl state from rethinkdb for a job, including all sites and pages" ), ) group.add_argument( "--site", dest="site", metavar="SITE_ID", - help=("purge crawl state from rethinkdb for a site, including all " "pages"), + help=("purge crawl state from rethinkdb for a site, including all pages"), ) group.add_argument( "--finished-before", dest="finished_before", metavar="YYYY-MM-DD", help=( - "purge crawl state from rethinkdb for a jobs that ended " "before this date" + "purge crawl state from rethinkdb for a jobs that ended before this date" ), ) arg_parser.add_argument( diff --git a/brozzler/dashboard/__init__.py b/brozzler/dashboard/__init__.py index fcac6c5..e623a01 100644 --- a/brozzler/dashboard/__init__.py +++ b/brozzler/dashboard/__init__.py @@ -334,7 +334,7 @@ def main(argv=None): prog=os.path.basename(argv[0]), formatter_class=argparse.RawDescriptionHelpFormatter, description=( - "brozzler-dashboard - web application for viewing brozzler " "crawl status" + "brozzler-dashboard - web application for viewing brozzler crawl status" ), epilog=( "brozzler-dashboard has no command line options, but can be " diff --git a/brozzler/easy.py b/brozzler/easy.py index 28a6c14..05f01a0 100644 --- a/brozzler/easy.py +++ b/brozzler/easy.py @@ -81,8 +81,7 @@ def _build_arg_parser(argv=None): dest="cacert", default="./%s-warcprox-ca.pem" % socket.gethostname(), help=( - "warcprox CA certificate file; if file does not exist, it " - "will be created" + "warcprox CA certificate file; if file does not exist, it will be created" ), ) arg_parser.add_argument( @@ -95,7 +94,7 @@ def _build_arg_parser(argv=None): "--onion-tor-socks-proxy", dest="onion_tor_socks_proxy", default=None, - help=("host:port of tor socks proxy, used only to connect to " ".onion sites"), + help=("host:port of tor socks proxy, used only to connect to .onion sites"), ) # brozzler-worker args @@ -112,7 +111,7 @@ def _build_arg_parser(argv=None): dest="max_browsers", type=int, default=1, - help=("max number of chrome instances simultaneously " "browsing pages"), + help=("max number of chrome instances simultaneously browsing pages"), ) # pywb args diff --git a/brozzler/pywb.py b/brozzler/pywb.py index de0fa44..5d714d0 100644 --- a/brozzler/pywb.py +++ b/brozzler/pywb.py @@ -447,8 +447,6 @@ def main(argv=sys.argv): wayback_cli = BrozzlerWaybackCli( args=argv[1:], default_port=8880, - desc=( - "brozzler-wayback - pywb wayback (monkey-patched for use " "with brozzler)" - ), + desc=("brozzler-wayback - pywb wayback (monkey-patched for use with brozzler)"), ) wayback_cli.run() diff --git a/brozzler/robots.py b/brozzler/robots.py index 0c9d699..2d3751e 100644 --- a/brozzler/robots.py +++ b/brozzler/robots.py @@ -120,7 +120,7 @@ def is_permitted_by_robots(site, url, proxy=None): raise brozzler.ProxyError(e) else: structlog.get_logger(logger_name=__name__).warning( - "returning true (permitted) after problem fetching " "robots.txt", + "returning true (permitted) after problem fetching robots.txt", url=url, raised_exception=e, ) diff --git a/brozzler/worker.py b/brozzler/worker.py index 7a20658..b8befef 100644 --- a/brozzler/worker.py +++ b/brozzler/worker.py @@ -169,7 +169,7 @@ class BrozzlerWorker: svc = self._choose_warcprox() if svc is None: raise brozzler.ProxyError( - "no available instances of warcprox in the service " "registry" + "no available instances of warcprox in the service registry" ) site.proxy = "%s:%s" % (svc["host"], svc["port"]) site.save() @@ -735,7 +735,7 @@ class BrozzlerWorker: with self._start_stop_lock: if self._thread: self.logger.warning( - "ignoring start request because self._thread is " "not None" + "ignoring start request because self._thread is not None" ) return self._thread = threading.Thread(target=self.run, name="BrozzlerWorker") diff --git a/brozzler/ydl.py b/brozzler/ydl.py index de88dda..7fab1f7 100644 --- a/brozzler/ydl.py +++ b/brozzler/ydl.py @@ -434,7 +434,7 @@ def _try_youtube_dl(worker, ydl, site, page): if worker._using_warcprox(site): info_json = json.dumps(ie_result, sort_keys=True, indent=4) logger.info( - "sending WARCPROX_WRITE_RECORD request to warcprox " "with yt-dlp json", + "sending WARCPROX_WRITE_RECORD request to warcprox with yt-dlp json", url=ydl.url, ) worker._warcprox_write_record( diff --git a/vagrant/vagrant-brozzler-new-site.py b/vagrant/vagrant-brozzler-new-site.py index 244bf6b..5e7503a 100755 --- a/vagrant/vagrant-brozzler-new-site.py +++ b/vagrant/vagrant-brozzler-new-site.py @@ -82,7 +82,7 @@ def main(argv=[]): os.chdir(os.path.dirname(__file__)) cmd = ( - "/opt/brozzler-ve3/bin/python /opt/brozzler-ve3/bin/brozzler-new-site " "%s %s" + "/opt/brozzler-ve3/bin/python /opt/brozzler-ve3/bin/brozzler-new-site %s %s" ) % (" ".join(options), args.seed) subprocess.call(["vagrant", "ssh", "--", cmd])