use %r instead of calling repr()

This commit is contained in:
Noah Levitt 2017-06-07 13:07:42 -07:00
parent 65adc11d95
commit 4d7f4518b5
6 changed files with 32 additions and 31 deletions

View file

@ -47,9 +47,9 @@ class ReachedLimit(Exception):
self.http_payload = http_payload self.http_payload = http_payload
def __repr__(self): def __repr__(self):
return "ReachedLimit(warcprox_meta=%s,http_payload=%s)" % ( return "ReachedLimit(warcprox_meta=%r,http_payload=%r)" % (
repr(self.warcprox_meta) if hasattr(self, 'warcprox_meta') else None, self.warcprox_meta if hasattr(self, 'warcprox_meta') else None,
repr(self.http_payload) if hasattr(self, 'http_payload') else None) self.http_payload if hasattr(self, 'http_payload') else None)
def __str__(self): def __str__(self):
return self.__repr__() return self.__repr__()
@ -94,8 +94,8 @@ def behavior_script(url, template_parameters=None):
behavior['behavior_js_template']) behavior['behavior_js_template'])
script = template.render(parameters) script = template.render(parameters)
logging.info( logging.info(
'using template=%s populated with parameters=%s for %s', 'using template=%r populated with parameters=%r for %r',
repr(behavior['behavior_js_template']), parameters, url) behavior['behavior_js_template'], parameters, url)
return script return script
return None return None
@ -129,8 +129,8 @@ class ThreadExceptionGate:
with self.lock: with self.lock:
if self.pending_exception: if self.pending_exception:
self.logger.warn( self.logger.warn(
'%s already pending for thread %s, discarding %s', '%r already pending for thread %r, discarding %r',
repr(self.pending_exception), self.thread, repr(e)) self.pending_exception, self.thread, e)
else: else:
self.pending_exception = e self.pending_exception = e

View file

@ -137,8 +137,7 @@ class Chrome:
if proxy: if proxy:
chrome_args.append('--proxy-server=%s' % proxy) chrome_args.append('--proxy-server=%s' % proxy)
chrome_args.append('about:blank') chrome_args.append('about:blank')
self.logger.info( self.logger.info('running: %r', subprocess.list2cmdline(chrome_args))
'running: %s', repr(subprocess.list2cmdline(chrome_args)))
# start_new_session - new process group so we can kill the whole group # start_new_session - new process group so we can kill the whole group
self.chrome_process = subprocess.Popen( self.chrome_process = subprocess.Popen(
chrome_args, env=new_env, start_new_session=True, chrome_args, env=new_env, start_new_session=True,
@ -173,7 +172,7 @@ class Chrome:
return url return url
except brozzler.ShutdownRequested: except brozzler.ShutdownRequested:
raise raise
except BaseException as e: except Exception as e:
if time.time() - self._last_warning > 30: if time.time() - self._last_warning > 30:
self.logger.warn( self.logger.warn(
'problem with %s (will keep trying until timeout ' 'problem with %s (will keep trying until timeout '

View file

@ -413,7 +413,7 @@ def brozzler_list_jobs(argv=None):
if result: if result:
results = [reql.run()] results = [reql.run()]
else: else:
logging.error('no such job with id %s', repr(job_id)) logging.error('no such job with id %r', job_id)
sys.exit(1) sys.exit(1)
else: else:
reql = rr.table('jobs').order_by('id') reql = rr.table('jobs').order_by('id')
@ -657,7 +657,7 @@ def brozzler_stop_crawl(argv=None):
job_id = args.job_id job_id = args.job_id
job = brozzler.Job.load(rr, job_id) job = brozzler.Job.load(rr, job_id)
if not job: if not job:
logging.fatal('job not found with id=%s', repr(job_id)) logging.fatal('job not found with id=%r', job_id)
sys.exit(1) sys.exit(1)
job.stop_requested = doublethink.utcnow() job.stop_requested = doublethink.utcnow()
job.save() job.save()
@ -668,7 +668,7 @@ def brozzler_stop_crawl(argv=None):
site_id = args.site_id site_id = args.site_id
site = brozzler.Site.load(rr, site_id) site = brozzler.Site.load(rr, site_id)
if not site: if not site:
logging.fatal('site not found with id=%s', repr(site_id)) logging.fatal('site not found with id=%r', site_id)
sys.exit(1) sys.exit(1)
site.stop_requested = doublethink.utcnow() site.stop_requested = doublethink.utcnow()
site.save() site.save()

View file

@ -40,14 +40,13 @@ class RethinkDbFrontier:
def _ensure_db(self): def _ensure_db(self):
dbs = self.rr.db_list().run() dbs = self.rr.db_list().run()
if not self.rr.dbname in dbs: if not self.rr.dbname in dbs:
self.logger.info( self.logger.info("creating rethinkdb database %r", self.rr.dbname)
"creating rethinkdb database %s", repr(self.rr.dbname))
self.rr.db_create(self.rr.dbname).run() self.rr.db_create(self.rr.dbname).run()
tables = self.rr.table_list().run() tables = self.rr.table_list().run()
if not "sites" in tables: if not "sites" in tables:
self.logger.info( self.logger.info(
"creating rethinkdb table 'sites' in database %s", "creating rethinkdb table 'sites' in database %r",
repr(self.rr.dbname)) self.rr.dbname)
self.rr.table_create( self.rr.table_create(
"sites", shards=self.shards, replicas=self.replicas).run() "sites", shards=self.shards, replicas=self.replicas).run()
self.rr.table("sites").index_create("sites_last_disclaimed", [ self.rr.table("sites").index_create("sites_last_disclaimed", [
@ -55,8 +54,8 @@ class RethinkDbFrontier:
self.rr.table("sites").index_create("job_id").run() self.rr.table("sites").index_create("job_id").run()
if not "pages" in tables: if not "pages" in tables:
self.logger.info( self.logger.info(
"creating rethinkdb table 'pages' in database %s", "creating rethinkdb table 'pages' in database %r",
repr(self.rr.dbname)) self.rr.dbname)
self.rr.table_create( self.rr.table_create(
"pages", shards=self.shards, replicas=self.replicas).run() "pages", shards=self.shards, replicas=self.replicas).run()
self.rr.table("pages").index_create("priority_by_site", [ self.rr.table("pages").index_create("priority_by_site", [
@ -69,8 +68,8 @@ class RethinkDbFrontier:
r.row["hops_from_seed"]]).run() r.row["hops_from_seed"]]).run()
if not "jobs" in tables: if not "jobs" in tables:
self.logger.info( self.logger.info(
"creating rethinkdb table 'jobs' in database %s", "creating rethinkdb table 'jobs' in database %r",
repr(self.rr.dbname)) self.rr.dbname)
self.rr.table_create( self.rr.table_create(
"jobs", shards=self.shards, replicas=self.replicas).run() "jobs", shards=self.shards, replicas=self.replicas).run()
@ -86,10 +85,13 @@ class RethinkDbFrontier:
expected = 0 expected = 0
if isinstance(expected, list): if isinstance(expected, list):
if result.get(k) not in kwargs[k]: if result.get(k) not in kwargs[k]:
raise UnexpectedDbResult("expected {} to be one of {} in {}".format(repr(k), expected, result)) raise UnexpectedDbResult(
"expected %r to be one of %r in %r" % (
k, expected, result))
else: else:
if result.get(k) != expected: if result.get(k) != expected:
raise UnexpectedDbResult("expected {} to be {} in {}".format(repr(k), expected, result)) raise UnexpectedDbResult("expected %r to be %r in %r" % (
k, expected, result))
def claim_site(self, worker_id): def claim_site(self, worker_id):
# XXX keep track of aggregate priority and prioritize sites accordingly? # XXX keep track of aggregate priority and prioritize sites accordingly?

View file

@ -112,12 +112,12 @@ def is_permitted_by_robots(site, url, proxy=None):
else: else:
if tries_left > 0: if tries_left > 0:
logging.warn( logging.warn(
"caught exception fetching robots.txt (%s tries " "caught exception fetching robots.txt (%r tries "
"left) for %s: %s", tries_left, url, repr(e)) "left) for %r: %r", tries_left, url, e)
tries_left -= 1 tries_left -= 1
else: else:
logging.error( logging.error(
"caught exception fetching robots.txt (0 tries " "caught exception fetching robots.txt (0 tries "
"left) for %s: %s", url, repr(e), exc_info=True) "left) for %r: %r", url, e, exc_info=True)
return False return False

View file

@ -135,8 +135,8 @@ class BrozzlerWorker:
site.proxy = '%s:%s' % (svc['host'], svc['port']) site.proxy = '%s:%s' % (svc['host'], svc['port'])
site.save() site.save()
self.logger.info( self.logger.info(
'chose warcprox instance %s from service registry for %s', 'chose warcprox instance %r from service registry for %r',
repr(site.proxy), site) site.proxy, site)
return site.proxy return site.proxy
return None return None
@ -428,8 +428,8 @@ class BrozzlerWorker:
page = None page = None
self._frontier.honor_stop_request(site) self._frontier.honor_stop_request(site)
self.logger.info( self.logger.info(
"brozzling site (proxy=%s) %s", "brozzling site (proxy=%r) %r",
repr(self._proxy_for(site)), site) self._proxy_for(site), site)
start = time.time() start = time.time()
while time.time() - start < 7 * 60: while time.time() - start < 7 * 60:
site.refresh() site.refresh()
@ -473,7 +473,7 @@ class BrozzlerWorker:
# using brozzler-worker --proxy, nothing to do but try the # using brozzler-worker --proxy, nothing to do but try the
# same proxy again next time # same proxy again next time
logging.error( logging.error(
'proxy error (site.proxy=%s): %s', repr(site.proxy), e) 'proxy error (site.proxy=%r): %r', site.proxy, e)
except: except:
self.logger.critical("unexpected exception", exc_info=True) self.logger.critical("unexpected exception", exc_info=True)
finally: finally: