diff --git a/brozzler/__init__.py b/brozzler/__init__.py index ccfaacd..c97835f 100644 --- a/brozzler/__init__.py +++ b/brozzler/__init__.py @@ -159,7 +159,7 @@ class ThreadExceptionGate: def queue_exception(self, e): with self.lock: if self.pending_exception: - self.logger.warn( + self.logger.warning( '%r already pending for thread %r, discarding %r', self.pending_exception, self.thread, e) else: diff --git a/brozzler/chrome.py b/brozzler/chrome.py index 5928586..c70296f 100644 --- a/brozzler/chrome.py +++ b/brozzler/chrome.py @@ -223,7 +223,7 @@ class Chrome: raise except Exception as e: if time.time() - self._last_warning > 30: - self.logger.warn( + self.logger.warning( 'problem with %s (will keep trying until timeout ' 'of %d seconds): %s', json_url, timeout_sec, e) self._last_warning = time.time() @@ -294,7 +294,7 @@ class Chrome: 'chrome pid %s exited normally', self.chrome_process.pid) else: - self.logger.warn( + self.logger.warning( 'chrome pid %s exited with nonzero status %s', self.chrome_process.pid, status) @@ -305,13 +305,13 @@ class Chrome: return time.sleep(0.5) - self.logger.warn( + self.logger.warning( 'chrome pid %s still alive %.1f seconds after sending ' 'SIGTERM, sending SIGKILL', self.chrome_process.pid, time.time() - t0) os.killpg(self.chrome_process.pid, signal.SIGKILL) status = self.chrome_process.wait() - self.logger.warn( + self.logger.warning( 'chrome pid %s reaped (status=%s) after killing with ' 'SIGKILL', self.chrome_process.pid, status) diff --git a/brozzler/cli.py b/brozzler/cli.py index 188d591..4b0bd76 100644 --- a/brozzler/cli.py +++ b/brozzler/cli.py @@ -627,7 +627,7 @@ def brozzler_purge(argv=None): sys.exit(1) if job.status == 'ACTIVE': if args.force: - logging.warn( + logging.warning( 'job %s has status ACTIVE, purging anyway because ' '--force was supplied', job_id) else: @@ -644,7 +644,7 @@ def brozzler_purge(argv=None): sys.exit(1) if site.status == 'ACTIVE': if args.force: - logging.warn( + logging.warning( 'site %s has status ACTIVE, purging anyway because ' '--force was supplied', site_id) else: @@ -712,7 +712,7 @@ def brozzler_list_captures(argv=None): if args.url_or_sha1[:5] == 'sha1:': if args.prefix: - logging.warn( + logging.warning( 'ignoring supplied --prefix option which does not apply ' 'to lookup by sha1') # assumes it's already base32 (XXX could detect if hex and convert) diff --git a/brozzler/easy.py b/brozzler/easy.py index 83cf1ba..dd98884 100644 --- a/brozzler/easy.py +++ b/brozzler/easy.py @@ -260,7 +260,7 @@ class BrozzlerEasyController: state_strs.append(str(th)) stack = traceback.format_stack(sys._current_frames()[th.ident]) state_strs.append(''.join(stack)) - logging.warn('dumping state (caught signal {})\n{}'.format( + logging.warning('dumping state (caught signal {})\n{}'.format( signum, '\n'.join(state_strs))) def main(argv=None): diff --git a/brozzler/frontier.py b/brozzler/frontier.py index 3826abf..0e3b777 100644 --- a/brozzler/frontier.py +++ b/brozzler/frontier.py @@ -138,7 +138,7 @@ class RethinkDbFrontier: sites = [] for i in range(result["replaced"]): if result["changes"][i]["old_val"]["claimed"]: - self.logger.warn( + self.logger.warning( "re-claimed site that was still marked 'claimed' " "because it was last claimed a long time ago " "at %s, and presumably some error stopped it from " @@ -225,7 +225,7 @@ class RethinkDbFrontier: if not job: return False if job.status.startswith("FINISH"): - self.logger.warn("%s is already %s", job, job.status) + self.logger.warning("%s is already %s", job, job.status) return True results = self.rr.table("sites").get_all(job_id, index="job_id").run() @@ -415,7 +415,7 @@ class RethinkDbFrontier: assert isinstance(e, brozzler.ReachedLimit) if (site.reached_limit and site.reached_limit != e.warcprox_meta["reached-limit"]): - self.logger.warn( + self.logger.warning( "reached limit %s but site had already reached limit %s", e.warcprox_meta["reached-limit"], self.reached_limit) else: @@ -434,7 +434,7 @@ class RethinkDbFrontier: index="priority_by_site").filter({"hops_from_seed":0}).run() pages = list(results) if len(pages) > 1: - self.logger.warn( + self.logger.warning( "more than one seed page for site_id %s ?", site_id) if len(pages) < 1: return None diff --git a/brozzler/robots.py b/brozzler/robots.py index 5b96423..4122093 100644 --- a/brozzler/robots.py +++ b/brozzler/robots.py @@ -106,7 +106,7 @@ def is_permitted_by_robots(site, url, proxy=None): # reppy has wrapped an exception that we want to bubble up raise brozzler.ProxyError(e) else: - logging.warn( + logging.warning( "returning true (permitted) after problem fetching " "robots.txt for %r: %r", url, e) return True diff --git a/brozzler/worker.py b/brozzler/worker.py index fba83aa..5ce5499 100644 --- a/brozzler/worker.py +++ b/brozzler/worker.py @@ -147,13 +147,13 @@ class BrozzlerWorker: try: with urllib.request.urlopen(request, timeout=600) as response: if response.getcode() != 204: - self.logger.warn( + self.logger.warning( 'got "%s %s" response on warcprox ' 'WARCPROX_WRITE_RECORD request (expected 204)', response.getcode(), response.reason) return request, response except urllib.error.HTTPError as e: - self.logger.warn( + self.logger.warning( 'got "%s %s" response on warcprox ' 'WARCPROX_WRITE_RECORD request (expected 204)', e.getcode(), e.info()) @@ -370,7 +370,7 @@ class BrozzlerWorker: if (page.needs_robots_check and not brozzler.is_permitted_by_robots( site, page.url, self._proxy_for(site))): - logging.warn("page %s is blocked by robots.txt", page.url) + logging.warning("page %s is blocked by robots.txt", page.url) page.blocked_by_robots = True self._frontier.completed_page(site, page) else: @@ -544,7 +544,7 @@ class BrozzlerWorker: def start(self): with self._start_stop_lock: if self._thread: - self.logger.warn( + self.logger.warning( 'ignoring start request because self._thread is ' 'not None') return diff --git a/brozzler/ydl.py b/brozzler/ydl.py index 57550e5..2388df9 100644 --- a/brozzler/ydl.py +++ b/brozzler/ydl.py @@ -48,7 +48,7 @@ _orig_webpage_read_content = youtube_dl.extractor.generic.GenericIE._webpage_rea def _webpage_read_content(self, *args, **kwargs): content = _orig_webpage_read_content(self, *args, **kwargs) if len(content) > 20000000: - logging.warn( + logging.warning( 'bypassing youtube-dl extraction because content is ' 'too large (%s characters)', len(content)) return '' @@ -185,7 +185,7 @@ def _build_youtube_dl(worker, destdir, site): mimetype = magic.from_file(ctx['filename'], mime=True) except ImportError as e: mimetype = 'video/%s' % info_dict['ext'] - self.logger.warn( + self.logger.warning( 'guessing mimetype %s because %r', mimetype, e) url = 'youtube-dl:%05d:%s' % (