mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-07-22 14:30:53 -04:00
chore: use ruff for formatting
There are a few minor changes here compared to black; it flagged unnecessary string concatenations, and has slightly different opinions on line length.
This commit is contained in:
parent
ab8970ff3e
commit
a23dd6a923
11 changed files with 19 additions and 23 deletions
4
.github/workflows/python-formatting.yml
vendored
4
.github/workflows/python-formatting.yml
vendored
|
@ -22,10 +22,10 @@ jobs:
|
||||||
- name: Create virtual environment
|
- name: Create virtual environment
|
||||||
run: python -m venv venv
|
run: python -m venv venv
|
||||||
|
|
||||||
- name: Install black
|
- name: Install ruff
|
||||||
run: |
|
run: |
|
||||||
./venv/bin/pip install --upgrade pip
|
./venv/bin/pip install --upgrade pip
|
||||||
./venv/bin/pip install black
|
./venv/bin/pip install ruff
|
||||||
|
|
||||||
- name: Run formatting check
|
- name: Run formatting check
|
||||||
run: make ck-format
|
run: make ck-format
|
||||||
|
|
4
Makefile
4
Makefile
|
@ -1,7 +1,7 @@
|
||||||
.PHONY: format
|
.PHONY: format
|
||||||
format:
|
format:
|
||||||
venv/bin/black -t py35 -t py36 -t py37 -t py38 -t py39 -t py310 -t py311 -t py312 .
|
venv/bin/ruff format --target-version py37 .
|
||||||
|
|
||||||
.PHONY: ck-format
|
.PHONY: ck-format
|
||||||
ck-format:
|
ck-format:
|
||||||
venv/bin/black --check .
|
venv/bin/ruff format --check --target-version py37 .
|
||||||
|
|
|
@ -367,7 +367,7 @@ class Chrome:
|
||||||
os.killpg(self.chrome_process.pid, signal.SIGKILL)
|
os.killpg(self.chrome_process.pid, signal.SIGKILL)
|
||||||
status = self.chrome_process.wait()
|
status = self.chrome_process.wait()
|
||||||
pid_logger.warning(
|
pid_logger.warning(
|
||||||
"chrome reaped after killing with " "SIGKILL",
|
"chrome reaped after killing with SIGKILL",
|
||||||
status=status,
|
status=status,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -937,7 +937,7 @@ def brozzler_list_pages(argv=None):
|
||||||
"--claimed",
|
"--claimed",
|
||||||
dest="claimed",
|
dest="claimed",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help=("limit to pages that are currently claimed by a brozzler " "worker"),
|
help=("limit to pages that are currently claimed by a brozzler worker"),
|
||||||
)
|
)
|
||||||
add_rethinkdb_options(arg_parser)
|
add_rethinkdb_options(arg_parser)
|
||||||
add_common_options(arg_parser, argv)
|
add_common_options(arg_parser, argv)
|
||||||
|
@ -1008,22 +1008,21 @@ def brozzler_purge(argv=None):
|
||||||
dest="job",
|
dest="job",
|
||||||
metavar="JOB_ID",
|
metavar="JOB_ID",
|
||||||
help=(
|
help=(
|
||||||
"purge crawl state from rethinkdb for a job, including all "
|
"purge crawl state from rethinkdb for a job, including all sites and pages"
|
||||||
"sites and pages"
|
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
group.add_argument(
|
group.add_argument(
|
||||||
"--site",
|
"--site",
|
||||||
dest="site",
|
dest="site",
|
||||||
metavar="SITE_ID",
|
metavar="SITE_ID",
|
||||||
help=("purge crawl state from rethinkdb for a site, including all " "pages"),
|
help=("purge crawl state from rethinkdb for a site, including all pages"),
|
||||||
)
|
)
|
||||||
group.add_argument(
|
group.add_argument(
|
||||||
"--finished-before",
|
"--finished-before",
|
||||||
dest="finished_before",
|
dest="finished_before",
|
||||||
metavar="YYYY-MM-DD",
|
metavar="YYYY-MM-DD",
|
||||||
help=(
|
help=(
|
||||||
"purge crawl state from rethinkdb for a jobs that ended " "before this date"
|
"purge crawl state from rethinkdb for a jobs that ended before this date"
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
arg_parser.add_argument(
|
arg_parser.add_argument(
|
||||||
|
|
|
@ -334,7 +334,7 @@ def main(argv=None):
|
||||||
prog=os.path.basename(argv[0]),
|
prog=os.path.basename(argv[0]),
|
||||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||||
description=(
|
description=(
|
||||||
"brozzler-dashboard - web application for viewing brozzler " "crawl status"
|
"brozzler-dashboard - web application for viewing brozzler crawl status"
|
||||||
),
|
),
|
||||||
epilog=(
|
epilog=(
|
||||||
"brozzler-dashboard has no command line options, but can be "
|
"brozzler-dashboard has no command line options, but can be "
|
||||||
|
|
|
@ -81,8 +81,7 @@ def _build_arg_parser(argv=None):
|
||||||
dest="cacert",
|
dest="cacert",
|
||||||
default="./%s-warcprox-ca.pem" % socket.gethostname(),
|
default="./%s-warcprox-ca.pem" % socket.gethostname(),
|
||||||
help=(
|
help=(
|
||||||
"warcprox CA certificate file; if file does not exist, it "
|
"warcprox CA certificate file; if file does not exist, it will be created"
|
||||||
"will be created"
|
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
arg_parser.add_argument(
|
arg_parser.add_argument(
|
||||||
|
@ -95,7 +94,7 @@ def _build_arg_parser(argv=None):
|
||||||
"--onion-tor-socks-proxy",
|
"--onion-tor-socks-proxy",
|
||||||
dest="onion_tor_socks_proxy",
|
dest="onion_tor_socks_proxy",
|
||||||
default=None,
|
default=None,
|
||||||
help=("host:port of tor socks proxy, used only to connect to " ".onion sites"),
|
help=("host:port of tor socks proxy, used only to connect to .onion sites"),
|
||||||
)
|
)
|
||||||
|
|
||||||
# brozzler-worker args
|
# brozzler-worker args
|
||||||
|
@ -112,7 +111,7 @@ def _build_arg_parser(argv=None):
|
||||||
dest="max_browsers",
|
dest="max_browsers",
|
||||||
type=int,
|
type=int,
|
||||||
default=1,
|
default=1,
|
||||||
help=("max number of chrome instances simultaneously " "browsing pages"),
|
help=("max number of chrome instances simultaneously browsing pages"),
|
||||||
)
|
)
|
||||||
|
|
||||||
# pywb args
|
# pywb args
|
||||||
|
|
|
@ -447,8 +447,6 @@ def main(argv=sys.argv):
|
||||||
wayback_cli = BrozzlerWaybackCli(
|
wayback_cli = BrozzlerWaybackCli(
|
||||||
args=argv[1:],
|
args=argv[1:],
|
||||||
default_port=8880,
|
default_port=8880,
|
||||||
desc=(
|
desc=("brozzler-wayback - pywb wayback (monkey-patched for use with brozzler)"),
|
||||||
"brozzler-wayback - pywb wayback (monkey-patched for use " "with brozzler)"
|
|
||||||
),
|
|
||||||
)
|
)
|
||||||
wayback_cli.run()
|
wayback_cli.run()
|
||||||
|
|
|
@ -120,7 +120,7 @@ def is_permitted_by_robots(site, url, proxy=None):
|
||||||
raise brozzler.ProxyError(e)
|
raise brozzler.ProxyError(e)
|
||||||
else:
|
else:
|
||||||
structlog.get_logger(logger_name=__name__).warning(
|
structlog.get_logger(logger_name=__name__).warning(
|
||||||
"returning true (permitted) after problem fetching " "robots.txt",
|
"returning true (permitted) after problem fetching robots.txt",
|
||||||
url=url,
|
url=url,
|
||||||
raised_exception=e,
|
raised_exception=e,
|
||||||
)
|
)
|
||||||
|
|
|
@ -168,7 +168,7 @@ class BrozzlerWorker:
|
||||||
svc = self._choose_warcprox()
|
svc = self._choose_warcprox()
|
||||||
if svc is None:
|
if svc is None:
|
||||||
raise brozzler.ProxyError(
|
raise brozzler.ProxyError(
|
||||||
"no available instances of warcprox in the service " "registry"
|
"no available instances of warcprox in the service registry"
|
||||||
)
|
)
|
||||||
site.proxy = "%s:%s" % (svc["host"], svc["port"])
|
site.proxy = "%s:%s" % (svc["host"], svc["port"])
|
||||||
site.save()
|
site.save()
|
||||||
|
@ -759,7 +759,7 @@ class BrozzlerWorker:
|
||||||
with self._start_stop_lock:
|
with self._start_stop_lock:
|
||||||
if self._thread:
|
if self._thread:
|
||||||
self.logger.warning(
|
self.logger.warning(
|
||||||
"ignoring start request because self._thread is " "not None"
|
"ignoring start request because self._thread is not None"
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
self._thread = threading.Thread(target=self.run, name="BrozzlerWorker")
|
self._thread = threading.Thread(target=self.run, name="BrozzlerWorker")
|
||||||
|
|
|
@ -426,7 +426,7 @@ def _try_youtube_dl(worker, ydl, site, page):
|
||||||
if worker._using_warcprox(site):
|
if worker._using_warcprox(site):
|
||||||
info_json = json.dumps(ie_result, sort_keys=True, indent=4)
|
info_json = json.dumps(ie_result, sort_keys=True, indent=4)
|
||||||
logger.info(
|
logger.info(
|
||||||
"sending WARCPROX_WRITE_RECORD request to warcprox " "with yt-dlp json",
|
"sending WARCPROX_WRITE_RECORD request to warcprox with yt-dlp json",
|
||||||
url=ydl.url,
|
url=ydl.url,
|
||||||
)
|
)
|
||||||
worker._warcprox_write_record(
|
worker._warcprox_write_record(
|
||||||
|
|
|
@ -82,7 +82,7 @@ def main(argv=[]):
|
||||||
os.chdir(os.path.dirname(__file__))
|
os.chdir(os.path.dirname(__file__))
|
||||||
|
|
||||||
cmd = (
|
cmd = (
|
||||||
"/opt/brozzler-ve3/bin/python /opt/brozzler-ve3/bin/brozzler-new-site " "%s %s"
|
"/opt/brozzler-ve3/bin/python /opt/brozzler-ve3/bin/brozzler-new-site %s %s"
|
||||||
) % (" ".join(options), args.seed)
|
) % (" ".join(options), args.seed)
|
||||||
subprocess.call(["vagrant", "ssh", "--", cmd])
|
subprocess.call(["vagrant", "ssh", "--", cmd])
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue