fix brozzler-easy so that warcprox features are enabled automatically (feature was already there but broken)

This commit is contained in:
Noah Levitt 2017-03-22 15:15:07 -07:00
parent 603956ec41
commit aae810cc6e
4 changed files with 8 additions and 17 deletions

View file

@ -86,7 +86,7 @@ def _add_proxy_options(arg_parser):
'--proxy', dest='proxy', default=None, help='http proxy') '--proxy', dest='proxy', default=None, help='http proxy')
arg_parser.add_argument( arg_parser.add_argument(
'--enable-warcprox-features', dest='enable_warcprox_features', '--enable-warcprox-features', dest='enable_warcprox_features',
action='store_true', help=( action='store_true', default=None, help=(
'enable special features that assume the configured proxy is ' 'enable special features that assume the configured proxy is '
'warcprox')) 'warcprox'))
@ -159,14 +159,7 @@ def brozzle_page():
arg_parser.add_argument( arg_parser.add_argument(
'--password', dest='password', default=None, '--password', dest='password', default=None,
help='use this password to try to log in if a login form is found') help='use this password to try to log in if a login form is found')
arg_parser.add_argument( _add_proxy_options(arg_parser)
'--proxy', dest='proxy', default=None,
help='http proxy')
arg_parser.add_argument(
'--enable-warcprox-features', dest='enable_warcprox_features',
action='store_true', help=(
'enable special features that assume the configured proxy '
'is warcprox'))
add_common_options(arg_parser) add_common_options(arg_parser)
args = arg_parser.parse_args(args=sys.argv[1:]) args = arg_parser.parse_args(args=sys.argv[1:])

View file

@ -33,8 +33,6 @@ class Site(doublethink.Document):
def populate_defaults(self): def populate_defaults(self):
if not "status" in self: if not "status" in self:
self.status = "ACTIVE" self.status = "ACTIVE"
if not "enable_warcprox_features" in self:
self.enable_warcprox_features = False
if not "claimed" in self: if not "claimed" in self:
self.claimed = False self.claimed = False
if not "last_disclaimed" in self: if not "last_disclaimed" in self:

View file

@ -124,9 +124,7 @@ class BrozzlerWorker:
return site.proxy return site.proxy
elif self._default_proxy: elif self._default_proxy:
return self._default_proxy return self._default_proxy
elif self._service_registry and ( elif self._service_registry and self._enable_warcprox_features(site):
site.enable_warcprox_features
or self._default_enable_warcprox_features):
svc = self._service_registry.available_service('warcprox') svc = self._service_registry.available_service('warcprox')
if svc is None: if svc is None:
raise Exception( raise Exception(
@ -142,6 +140,8 @@ class BrozzlerWorker:
def _enable_warcprox_features(self, site): def _enable_warcprox_features(self, site):
if not self._proxy(site):
return False
if site.enable_warcprox_features is not None: if site.enable_warcprox_features is not None:
return site.enable_warcprox_features return site.enable_warcprox_features
else: else:
@ -227,7 +227,7 @@ class BrozzlerWorker:
info = ydl.extract_info(page.url) info = ydl.extract_info(page.url)
self._remember_videos(page, ydl.brozzler_spy) self._remember_videos(page, ydl.brozzler_spy)
# logging.info('XXX %s', json.dumps(info)) # logging.info('XXX %s', json.dumps(info))
if self._proxy(site) and self._enable_warcprox_features(site): if self._enable_warcprox_features(site):
info_json = json.dumps(info, sort_keys=True, indent=4) info_json = json.dumps(info, sort_keys=True, indent=4)
self.logger.info( self.logger.info(
"sending WARCPROX_WRITE_RECORD request to warcprox " "sending WARCPROX_WRITE_RECORD request to warcprox "
@ -306,7 +306,7 @@ class BrozzlerWorker:
def _on_screenshot(screenshot_png): def _on_screenshot(screenshot_png):
if on_screenshot: if on_screenshot:
on_screenshot(screenshot_png) on_screenshot(screenshot_png)
if self._proxy(site) and self._enable_warcprox_features(site): if self._enable_warcprox_features(site):
self.logger.info( self.logger.info(
"sending WARCPROX_WRITE_RECORD request to %s with " "sending WARCPROX_WRITE_RECORD request to %s with "
"screenshot for %s", self._proxy(site), page) "screenshot for %s", self._proxy(site), page)

View file

@ -32,7 +32,7 @@ def find_package_data(package):
setuptools.setup( setuptools.setup(
name='brozzler', name='brozzler',
version='1.1b9.dev214', version='1.1b9.dev215',
description='Distributed web crawling with browsers', description='Distributed web crawling with browsers',
url='https://github.com/internetarchive/brozzler', url='https://github.com/internetarchive/brozzler',
author='Noah Levitt', author='Noah Levitt',