mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-08-11 07:50:24 -04:00
Merge branch 'master' into qa
* master: ugh, avoid infinite recursion fix frontier tests now that enable_warcprox_features is simply omitted by default i dub thee 1.1b9 github didn't like that, how about a width in pixels maybe pypi supports RST image "scale"
This commit is contained in:
commit
06ef045e63
4 changed files with 4 additions and 9 deletions
|
@ -2,7 +2,7 @@
|
||||||
:target: https://travis-ci.org/internetarchive/brozzler
|
:target: https://travis-ci.org/internetarchive/brozzler
|
||||||
|
|
||||||
.. |logo| image:: https://cdn.rawgit.com/internetarchive/brozzler/1.1b5/brozzler/webconsole/static/brozzler.svg
|
.. |logo| image:: https://cdn.rawgit.com/internetarchive/brozzler/1.1b5/brozzler/webconsole/static/brozzler.svg
|
||||||
:width: 7%
|
:width: 60px
|
||||||
|
|
||||||
|logo| brozzler
|
|logo| brozzler
|
||||||
===============
|
===============
|
||||||
|
|
|
@ -138,10 +138,7 @@ class BrozzlerWorker:
|
||||||
return site.proxy
|
return site.proxy
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def _enable_warcprox_features(self, site):
|
def _enable_warcprox_features(self, site):
|
||||||
if not self._proxy(site):
|
|
||||||
return False
|
|
||||||
if site.enable_warcprox_features is not None:
|
if site.enable_warcprox_features is not None:
|
||||||
return site.enable_warcprox_features
|
return site.enable_warcprox_features
|
||||||
else:
|
else:
|
||||||
|
@ -227,7 +224,7 @@ class BrozzlerWorker:
|
||||||
info = ydl.extract_info(page.url)
|
info = ydl.extract_info(page.url)
|
||||||
self._remember_videos(page, ydl.brozzler_spy)
|
self._remember_videos(page, ydl.brozzler_spy)
|
||||||
# logging.info('XXX %s', json.dumps(info))
|
# logging.info('XXX %s', json.dumps(info))
|
||||||
if self._enable_warcprox_features(site):
|
if self._proxy(site) and self._enable_warcprox_features(site):
|
||||||
info_json = json.dumps(info, sort_keys=True, indent=4)
|
info_json = json.dumps(info, sort_keys=True, indent=4)
|
||||||
self.logger.info(
|
self.logger.info(
|
||||||
"sending WARCPROX_WRITE_RECORD request to warcprox "
|
"sending WARCPROX_WRITE_RECORD request to warcprox "
|
||||||
|
@ -306,7 +303,7 @@ class BrozzlerWorker:
|
||||||
def _on_screenshot(screenshot_png):
|
def _on_screenshot(screenshot_png):
|
||||||
if on_screenshot:
|
if on_screenshot:
|
||||||
on_screenshot(screenshot_png)
|
on_screenshot(screenshot_png)
|
||||||
if self._enable_warcprox_features(site):
|
if self._proxy(site) and self._enable_warcprox_features(site):
|
||||||
self.logger.info(
|
self.logger.info(
|
||||||
"sending WARCPROX_WRITE_RECORD request to %s with "
|
"sending WARCPROX_WRITE_RECORD request to %s with "
|
||||||
"screenshot for %s", self._proxy(site), page)
|
"screenshot for %s", self._proxy(site), page)
|
||||||
|
|
2
setup.py
2
setup.py
|
@ -32,7 +32,7 @@ def find_package_data(package):
|
||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name='brozzler',
|
name='brozzler',
|
||||||
version='1.1b9.dev215',
|
version='1.1b10.dev217',
|
||||||
description='Distributed web crawling with browsers',
|
description='Distributed web crawling with browsers',
|
||||||
url='https://github.com/internetarchive/brozzler',
|
url='https://github.com/internetarchive/brozzler',
|
||||||
author='Noah Levitt',
|
author='Noah Levitt',
|
||||||
|
|
|
@ -67,7 +67,6 @@ def test_basics():
|
||||||
assert sites[1].starts_and_stops[0]['start']
|
assert sites[1].starts_and_stops[0]['start']
|
||||||
assert sites[0] == {
|
assert sites[0] == {
|
||||||
'claimed': False,
|
'claimed': False,
|
||||||
'enable_warcprox_features': False,
|
|
||||||
'id': sites[0].id,
|
'id': sites[0].id,
|
||||||
'job_id': job.id,
|
'job_id': job.id,
|
||||||
'last_claimed': brozzler.EPOCH_UTC,
|
'last_claimed': brozzler.EPOCH_UTC,
|
||||||
|
@ -86,7 +85,6 @@ def test_basics():
|
||||||
}
|
}
|
||||||
assert sites[1] == {
|
assert sites[1] == {
|
||||||
'claimed': False,
|
'claimed': False,
|
||||||
'enable_warcprox_features': False,
|
|
||||||
'id': sites[1].id,
|
'id': sites[1].id,
|
||||||
'job_id': job.id,
|
'job_id': job.id,
|
||||||
'last_claimed': brozzler.EPOCH_UTC,
|
'last_claimed': brozzler.EPOCH_UTC,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue