chore: use ruff for formatting

There are a few minor changes here compared to black; it flagged
unnecessary string concatenations, and has slightly different
opinions on line length.
This commit is contained in:
Misty De Méo 2025-02-25 14:18:54 -08:00
parent ab8970ff3e
commit a23dd6a923
11 changed files with 19 additions and 23 deletions

View file

@ -22,10 +22,10 @@ jobs:
- name: Create virtual environment - name: Create virtual environment
run: python -m venv venv run: python -m venv venv
- name: Install black - name: Install ruff
run: | run: |
./venv/bin/pip install --upgrade pip ./venv/bin/pip install --upgrade pip
./venv/bin/pip install black ./venv/bin/pip install ruff
- name: Run formatting check - name: Run formatting check
run: make ck-format run: make ck-format

View file

@ -1,7 +1,7 @@
.PHONY: format .PHONY: format
format: format:
venv/bin/black -t py35 -t py36 -t py37 -t py38 -t py39 -t py310 -t py311 -t py312 . venv/bin/ruff format --target-version py37 .
.PHONY: ck-format .PHONY: ck-format
ck-format: ck-format:
venv/bin/black --check . venv/bin/ruff format --check --target-version py37 .

View file

@ -367,7 +367,7 @@ class Chrome:
os.killpg(self.chrome_process.pid, signal.SIGKILL) os.killpg(self.chrome_process.pid, signal.SIGKILL)
status = self.chrome_process.wait() status = self.chrome_process.wait()
pid_logger.warning( pid_logger.warning(
"chrome reaped after killing with " "SIGKILL", "chrome reaped after killing with SIGKILL",
status=status, status=status,
) )

View file

@ -937,7 +937,7 @@ def brozzler_list_pages(argv=None):
"--claimed", "--claimed",
dest="claimed", dest="claimed",
action="store_true", action="store_true",
help=("limit to pages that are currently claimed by a brozzler " "worker"), help=("limit to pages that are currently claimed by a brozzler worker"),
) )
add_rethinkdb_options(arg_parser) add_rethinkdb_options(arg_parser)
add_common_options(arg_parser, argv) add_common_options(arg_parser, argv)
@ -1008,22 +1008,21 @@ def brozzler_purge(argv=None):
dest="job", dest="job",
metavar="JOB_ID", metavar="JOB_ID",
help=( help=(
"purge crawl state from rethinkdb for a job, including all " "purge crawl state from rethinkdb for a job, including all sites and pages"
"sites and pages"
), ),
) )
group.add_argument( group.add_argument(
"--site", "--site",
dest="site", dest="site",
metavar="SITE_ID", metavar="SITE_ID",
help=("purge crawl state from rethinkdb for a site, including all " "pages"), help=("purge crawl state from rethinkdb for a site, including all pages"),
) )
group.add_argument( group.add_argument(
"--finished-before", "--finished-before",
dest="finished_before", dest="finished_before",
metavar="YYYY-MM-DD", metavar="YYYY-MM-DD",
help=( help=(
"purge crawl state from rethinkdb for a jobs that ended " "before this date" "purge crawl state from rethinkdb for a jobs that ended before this date"
), ),
) )
arg_parser.add_argument( arg_parser.add_argument(

View file

@ -334,7 +334,7 @@ def main(argv=None):
prog=os.path.basename(argv[0]), prog=os.path.basename(argv[0]),
formatter_class=argparse.RawDescriptionHelpFormatter, formatter_class=argparse.RawDescriptionHelpFormatter,
description=( description=(
"brozzler-dashboard - web application for viewing brozzler " "crawl status" "brozzler-dashboard - web application for viewing brozzler crawl status"
), ),
epilog=( epilog=(
"brozzler-dashboard has no command line options, but can be " "brozzler-dashboard has no command line options, but can be "

View file

@ -81,8 +81,7 @@ def _build_arg_parser(argv=None):
dest="cacert", dest="cacert",
default="./%s-warcprox-ca.pem" % socket.gethostname(), default="./%s-warcprox-ca.pem" % socket.gethostname(),
help=( help=(
"warcprox CA certificate file; if file does not exist, it " "warcprox CA certificate file; if file does not exist, it will be created"
"will be created"
), ),
) )
arg_parser.add_argument( arg_parser.add_argument(
@ -95,7 +94,7 @@ def _build_arg_parser(argv=None):
"--onion-tor-socks-proxy", "--onion-tor-socks-proxy",
dest="onion_tor_socks_proxy", dest="onion_tor_socks_proxy",
default=None, default=None,
help=("host:port of tor socks proxy, used only to connect to " ".onion sites"), help=("host:port of tor socks proxy, used only to connect to .onion sites"),
) )
# brozzler-worker args # brozzler-worker args
@ -112,7 +111,7 @@ def _build_arg_parser(argv=None):
dest="max_browsers", dest="max_browsers",
type=int, type=int,
default=1, default=1,
help=("max number of chrome instances simultaneously " "browsing pages"), help=("max number of chrome instances simultaneously browsing pages"),
) )
# pywb args # pywb args

View file

@ -447,8 +447,6 @@ def main(argv=sys.argv):
wayback_cli = BrozzlerWaybackCli( wayback_cli = BrozzlerWaybackCli(
args=argv[1:], args=argv[1:],
default_port=8880, default_port=8880,
desc=( desc=("brozzler-wayback - pywb wayback (monkey-patched for use with brozzler)"),
"brozzler-wayback - pywb wayback (monkey-patched for use " "with brozzler)"
),
) )
wayback_cli.run() wayback_cli.run()

View file

@ -120,7 +120,7 @@ def is_permitted_by_robots(site, url, proxy=None):
raise brozzler.ProxyError(e) raise brozzler.ProxyError(e)
else: else:
structlog.get_logger(logger_name=__name__).warning( structlog.get_logger(logger_name=__name__).warning(
"returning true (permitted) after problem fetching " "robots.txt", "returning true (permitted) after problem fetching robots.txt",
url=url, url=url,
raised_exception=e, raised_exception=e,
) )

View file

@ -168,7 +168,7 @@ class BrozzlerWorker:
svc = self._choose_warcprox() svc = self._choose_warcprox()
if svc is None: if svc is None:
raise brozzler.ProxyError( raise brozzler.ProxyError(
"no available instances of warcprox in the service " "registry" "no available instances of warcprox in the service registry"
) )
site.proxy = "%s:%s" % (svc["host"], svc["port"]) site.proxy = "%s:%s" % (svc["host"], svc["port"])
site.save() site.save()
@ -759,7 +759,7 @@ class BrozzlerWorker:
with self._start_stop_lock: with self._start_stop_lock:
if self._thread: if self._thread:
self.logger.warning( self.logger.warning(
"ignoring start request because self._thread is " "not None" "ignoring start request because self._thread is not None"
) )
return return
self._thread = threading.Thread(target=self.run, name="BrozzlerWorker") self._thread = threading.Thread(target=self.run, name="BrozzlerWorker")

View file

@ -426,7 +426,7 @@ def _try_youtube_dl(worker, ydl, site, page):
if worker._using_warcprox(site): if worker._using_warcprox(site):
info_json = json.dumps(ie_result, sort_keys=True, indent=4) info_json = json.dumps(ie_result, sort_keys=True, indent=4)
logger.info( logger.info(
"sending WARCPROX_WRITE_RECORD request to warcprox " "with yt-dlp json", "sending WARCPROX_WRITE_RECORD request to warcprox with yt-dlp json",
url=ydl.url, url=ydl.url,
) )
worker._warcprox_write_record( worker._warcprox_write_record(

View file

@ -82,7 +82,7 @@ def main(argv=[]):
os.chdir(os.path.dirname(__file__)) os.chdir(os.path.dirname(__file__))
cmd = ( cmd = (
"/opt/brozzler-ve3/bin/python /opt/brozzler-ve3/bin/brozzler-new-site " "%s %s" "/opt/brozzler-ve3/bin/python /opt/brozzler-ve3/bin/brozzler-new-site %s %s"
) % (" ".join(options), args.seed) ) % (" ".join(options), args.seed)
subprocess.call(["vagrant", "ssh", "--", cmd]) subprocess.call(["vagrant", "ssh", "--", cmd])