mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-24 08:39:59 -05:00
logging.warn is deprecated and replaced by logging.warning
We replace it everywhere in the code base.
This commit is contained in:
parent
ee8ef23f0c
commit
a2ac3a0374
@ -159,7 +159,7 @@ class ThreadExceptionGate:
|
|||||||
def queue_exception(self, e):
|
def queue_exception(self, e):
|
||||||
with self.lock:
|
with self.lock:
|
||||||
if self.pending_exception:
|
if self.pending_exception:
|
||||||
self.logger.warn(
|
self.logger.warning(
|
||||||
'%r already pending for thread %r, discarding %r',
|
'%r already pending for thread %r, discarding %r',
|
||||||
self.pending_exception, self.thread, e)
|
self.pending_exception, self.thread, e)
|
||||||
else:
|
else:
|
||||||
|
@ -223,7 +223,7 @@ class Chrome:
|
|||||||
raise
|
raise
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if time.time() - self._last_warning > 30:
|
if time.time() - self._last_warning > 30:
|
||||||
self.logger.warn(
|
self.logger.warning(
|
||||||
'problem with %s (will keep trying until timeout '
|
'problem with %s (will keep trying until timeout '
|
||||||
'of %d seconds): %s', json_url, timeout_sec, e)
|
'of %d seconds): %s', json_url, timeout_sec, e)
|
||||||
self._last_warning = time.time()
|
self._last_warning = time.time()
|
||||||
@ -294,7 +294,7 @@ class Chrome:
|
|||||||
'chrome pid %s exited normally',
|
'chrome pid %s exited normally',
|
||||||
self.chrome_process.pid)
|
self.chrome_process.pid)
|
||||||
else:
|
else:
|
||||||
self.logger.warn(
|
self.logger.warning(
|
||||||
'chrome pid %s exited with nonzero status %s',
|
'chrome pid %s exited with nonzero status %s',
|
||||||
self.chrome_process.pid, status)
|
self.chrome_process.pid, status)
|
||||||
|
|
||||||
@ -305,13 +305,13 @@ class Chrome:
|
|||||||
return
|
return
|
||||||
time.sleep(0.5)
|
time.sleep(0.5)
|
||||||
|
|
||||||
self.logger.warn(
|
self.logger.warning(
|
||||||
'chrome pid %s still alive %.1f seconds after sending '
|
'chrome pid %s still alive %.1f seconds after sending '
|
||||||
'SIGTERM, sending SIGKILL', self.chrome_process.pid,
|
'SIGTERM, sending SIGKILL', self.chrome_process.pid,
|
||||||
time.time() - t0)
|
time.time() - t0)
|
||||||
os.killpg(self.chrome_process.pid, signal.SIGKILL)
|
os.killpg(self.chrome_process.pid, signal.SIGKILL)
|
||||||
status = self.chrome_process.wait()
|
status = self.chrome_process.wait()
|
||||||
self.logger.warn(
|
self.logger.warning(
|
||||||
'chrome pid %s reaped (status=%s) after killing with '
|
'chrome pid %s reaped (status=%s) after killing with '
|
||||||
'SIGKILL', self.chrome_process.pid, status)
|
'SIGKILL', self.chrome_process.pid, status)
|
||||||
|
|
||||||
|
@ -627,7 +627,7 @@ def brozzler_purge(argv=None):
|
|||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
if job.status == 'ACTIVE':
|
if job.status == 'ACTIVE':
|
||||||
if args.force:
|
if args.force:
|
||||||
logging.warn(
|
logging.warning(
|
||||||
'job %s has status ACTIVE, purging anyway because '
|
'job %s has status ACTIVE, purging anyway because '
|
||||||
'--force was supplied', job_id)
|
'--force was supplied', job_id)
|
||||||
else:
|
else:
|
||||||
@ -644,7 +644,7 @@ def brozzler_purge(argv=None):
|
|||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
if site.status == 'ACTIVE':
|
if site.status == 'ACTIVE':
|
||||||
if args.force:
|
if args.force:
|
||||||
logging.warn(
|
logging.warning(
|
||||||
'site %s has status ACTIVE, purging anyway because '
|
'site %s has status ACTIVE, purging anyway because '
|
||||||
'--force was supplied', site_id)
|
'--force was supplied', site_id)
|
||||||
else:
|
else:
|
||||||
@ -712,7 +712,7 @@ def brozzler_list_captures(argv=None):
|
|||||||
|
|
||||||
if args.url_or_sha1[:5] == 'sha1:':
|
if args.url_or_sha1[:5] == 'sha1:':
|
||||||
if args.prefix:
|
if args.prefix:
|
||||||
logging.warn(
|
logging.warning(
|
||||||
'ignoring supplied --prefix option which does not apply '
|
'ignoring supplied --prefix option which does not apply '
|
||||||
'to lookup by sha1')
|
'to lookup by sha1')
|
||||||
# assumes it's already base32 (XXX could detect if hex and convert)
|
# assumes it's already base32 (XXX could detect if hex and convert)
|
||||||
|
@ -260,7 +260,7 @@ class BrozzlerEasyController:
|
|||||||
state_strs.append(str(th))
|
state_strs.append(str(th))
|
||||||
stack = traceback.format_stack(sys._current_frames()[th.ident])
|
stack = traceback.format_stack(sys._current_frames()[th.ident])
|
||||||
state_strs.append(''.join(stack))
|
state_strs.append(''.join(stack))
|
||||||
logging.warn('dumping state (caught signal {})\n{}'.format(
|
logging.warning('dumping state (caught signal {})\n{}'.format(
|
||||||
signum, '\n'.join(state_strs)))
|
signum, '\n'.join(state_strs)))
|
||||||
|
|
||||||
def main(argv=None):
|
def main(argv=None):
|
||||||
|
@ -138,7 +138,7 @@ class RethinkDbFrontier:
|
|||||||
sites = []
|
sites = []
|
||||||
for i in range(result["replaced"]):
|
for i in range(result["replaced"]):
|
||||||
if result["changes"][i]["old_val"]["claimed"]:
|
if result["changes"][i]["old_val"]["claimed"]:
|
||||||
self.logger.warn(
|
self.logger.warning(
|
||||||
"re-claimed site that was still marked 'claimed' "
|
"re-claimed site that was still marked 'claimed' "
|
||||||
"because it was last claimed a long time ago "
|
"because it was last claimed a long time ago "
|
||||||
"at %s, and presumably some error stopped it from "
|
"at %s, and presumably some error stopped it from "
|
||||||
@ -225,7 +225,7 @@ class RethinkDbFrontier:
|
|||||||
if not job:
|
if not job:
|
||||||
return False
|
return False
|
||||||
if job.status.startswith("FINISH"):
|
if job.status.startswith("FINISH"):
|
||||||
self.logger.warn("%s is already %s", job, job.status)
|
self.logger.warning("%s is already %s", job, job.status)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
results = self.rr.table("sites").get_all(job_id, index="job_id").run()
|
results = self.rr.table("sites").get_all(job_id, index="job_id").run()
|
||||||
@ -415,7 +415,7 @@ class RethinkDbFrontier:
|
|||||||
assert isinstance(e, brozzler.ReachedLimit)
|
assert isinstance(e, brozzler.ReachedLimit)
|
||||||
if (site.reached_limit
|
if (site.reached_limit
|
||||||
and site.reached_limit != e.warcprox_meta["reached-limit"]):
|
and site.reached_limit != e.warcprox_meta["reached-limit"]):
|
||||||
self.logger.warn(
|
self.logger.warning(
|
||||||
"reached limit %s but site had already reached limit %s",
|
"reached limit %s but site had already reached limit %s",
|
||||||
e.warcprox_meta["reached-limit"], self.reached_limit)
|
e.warcprox_meta["reached-limit"], self.reached_limit)
|
||||||
else:
|
else:
|
||||||
@ -434,7 +434,7 @@ class RethinkDbFrontier:
|
|||||||
index="priority_by_site").filter({"hops_from_seed":0}).run()
|
index="priority_by_site").filter({"hops_from_seed":0}).run()
|
||||||
pages = list(results)
|
pages = list(results)
|
||||||
if len(pages) > 1:
|
if len(pages) > 1:
|
||||||
self.logger.warn(
|
self.logger.warning(
|
||||||
"more than one seed page for site_id %s ?", site_id)
|
"more than one seed page for site_id %s ?", site_id)
|
||||||
if len(pages) < 1:
|
if len(pages) < 1:
|
||||||
return None
|
return None
|
||||||
|
@ -106,7 +106,7 @@ def is_permitted_by_robots(site, url, proxy=None):
|
|||||||
# reppy has wrapped an exception that we want to bubble up
|
# reppy has wrapped an exception that we want to bubble up
|
||||||
raise brozzler.ProxyError(e)
|
raise brozzler.ProxyError(e)
|
||||||
else:
|
else:
|
||||||
logging.warn(
|
logging.warning(
|
||||||
"returning true (permitted) after problem fetching "
|
"returning true (permitted) after problem fetching "
|
||||||
"robots.txt for %r: %r", url, e)
|
"robots.txt for %r: %r", url, e)
|
||||||
return True
|
return True
|
||||||
|
@ -147,13 +147,13 @@ class BrozzlerWorker:
|
|||||||
try:
|
try:
|
||||||
with urllib.request.urlopen(request, timeout=600) as response:
|
with urllib.request.urlopen(request, timeout=600) as response:
|
||||||
if response.getcode() != 204:
|
if response.getcode() != 204:
|
||||||
self.logger.warn(
|
self.logger.warning(
|
||||||
'got "%s %s" response on warcprox '
|
'got "%s %s" response on warcprox '
|
||||||
'WARCPROX_WRITE_RECORD request (expected 204)',
|
'WARCPROX_WRITE_RECORD request (expected 204)',
|
||||||
response.getcode(), response.reason)
|
response.getcode(), response.reason)
|
||||||
return request, response
|
return request, response
|
||||||
except urllib.error.HTTPError as e:
|
except urllib.error.HTTPError as e:
|
||||||
self.logger.warn(
|
self.logger.warning(
|
||||||
'got "%s %s" response on warcprox '
|
'got "%s %s" response on warcprox '
|
||||||
'WARCPROX_WRITE_RECORD request (expected 204)',
|
'WARCPROX_WRITE_RECORD request (expected 204)',
|
||||||
e.getcode(), e.info())
|
e.getcode(), e.info())
|
||||||
@ -370,7 +370,7 @@ class BrozzlerWorker:
|
|||||||
if (page.needs_robots_check and
|
if (page.needs_robots_check and
|
||||||
not brozzler.is_permitted_by_robots(
|
not brozzler.is_permitted_by_robots(
|
||||||
site, page.url, self._proxy_for(site))):
|
site, page.url, self._proxy_for(site))):
|
||||||
logging.warn("page %s is blocked by robots.txt", page.url)
|
logging.warning("page %s is blocked by robots.txt", page.url)
|
||||||
page.blocked_by_robots = True
|
page.blocked_by_robots = True
|
||||||
self._frontier.completed_page(site, page)
|
self._frontier.completed_page(site, page)
|
||||||
else:
|
else:
|
||||||
@ -544,7 +544,7 @@ class BrozzlerWorker:
|
|||||||
def start(self):
|
def start(self):
|
||||||
with self._start_stop_lock:
|
with self._start_stop_lock:
|
||||||
if self._thread:
|
if self._thread:
|
||||||
self.logger.warn(
|
self.logger.warning(
|
||||||
'ignoring start request because self._thread is '
|
'ignoring start request because self._thread is '
|
||||||
'not None')
|
'not None')
|
||||||
return
|
return
|
||||||
|
@ -48,7 +48,7 @@ _orig_webpage_read_content = youtube_dl.extractor.generic.GenericIE._webpage_rea
|
|||||||
def _webpage_read_content(self, *args, **kwargs):
|
def _webpage_read_content(self, *args, **kwargs):
|
||||||
content = _orig_webpage_read_content(self, *args, **kwargs)
|
content = _orig_webpage_read_content(self, *args, **kwargs)
|
||||||
if len(content) > 20000000:
|
if len(content) > 20000000:
|
||||||
logging.warn(
|
logging.warning(
|
||||||
'bypassing youtube-dl extraction because content is '
|
'bypassing youtube-dl extraction because content is '
|
||||||
'too large (%s characters)', len(content))
|
'too large (%s characters)', len(content))
|
||||||
return ''
|
return ''
|
||||||
@ -185,7 +185,7 @@ def _build_youtube_dl(worker, destdir, site):
|
|||||||
mimetype = magic.from_file(ctx['filename'], mime=True)
|
mimetype = magic.from_file(ctx['filename'], mime=True)
|
||||||
except ImportError as e:
|
except ImportError as e:
|
||||||
mimetype = 'video/%s' % info_dict['ext']
|
mimetype = 'video/%s' % info_dict['ext']
|
||||||
self.logger.warn(
|
self.logger.warning(
|
||||||
'guessing mimetype %s because %r', mimetype, e)
|
'guessing mimetype %s because %r', mimetype, e)
|
||||||
|
|
||||||
url = 'youtube-dl:%05d:%s' % (
|
url = 'youtube-dl:%05d:%s' % (
|
||||||
|
Loading…
x
Reference in New Issue
Block a user