mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-24 00:29:53 -05:00
initialize page.videos correctly in all cases
This commit is contained in:
parent
eeee523b18
commit
95ba334b89
@ -200,7 +200,8 @@ class BrozzlerWorker:
|
|||||||
e.getcode(), e.info())
|
e.getcode(), e.info())
|
||||||
|
|
||||||
def _remember_videos(self, page, ydl_spy):
|
def _remember_videos(self, page, ydl_spy):
|
||||||
videos = []
|
if not 'videos' in page:
|
||||||
|
page.videos = []
|
||||||
for txn in ydl_spy.transactions:
|
for txn in ydl_spy.transactions:
|
||||||
if (txn['response_headers'].get_content_type().startswith('video/')
|
if (txn['response_headers'].get_content_type().startswith('video/')
|
||||||
and txn['method'] == 'GET'
|
and txn['method'] == 'GET'
|
||||||
@ -218,14 +219,12 @@ class BrozzlerWorker:
|
|||||||
video['content-range'] = txn[
|
video['content-range'] = txn[
|
||||||
'response_headers']['content-range']
|
'response_headers']['content-range']
|
||||||
logging.debug('embedded video %s', video)
|
logging.debug('embedded video %s', video)
|
||||||
videos.append(video)
|
page.videos.append(video)
|
||||||
page.videos = videos
|
|
||||||
|
|
||||||
def _try_youtube_dl(self, ydl, site, page):
|
def _try_youtube_dl(self, ydl, site, page):
|
||||||
try:
|
try:
|
||||||
self.logger.info("trying youtube-dl on {}".format(page))
|
self.logger.info("trying youtube-dl on {}".format(page))
|
||||||
info = ydl.extract_info(page.url)
|
info = ydl.extract_info(page.url)
|
||||||
self._remember_videos(page, ydl.brozzler_spy)
|
|
||||||
# logging.info('XXX %s', json.dumps(info))
|
# logging.info('XXX %s', json.dumps(info))
|
||||||
if self._proxy(site) and self._enable_warcprox_features(site):
|
if self._proxy(site) and self._enable_warcprox_features(site):
|
||||||
info_json = json.dumps(info, sort_keys=True, indent=4)
|
info_json = json.dumps(info, sort_keys=True, indent=4)
|
||||||
@ -344,6 +343,8 @@ class BrozzlerWorker:
|
|||||||
if 'content-range' in response_headers:
|
if 'content-range' in response_headers:
|
||||||
video['content-range'] = response_headers['content-range']
|
video['content-range'] = response_headers['content-range']
|
||||||
logging.debug('embedded video %s', video)
|
logging.debug('embedded video %s', video)
|
||||||
|
if not 'videos' in page:
|
||||||
|
page.videos = []
|
||||||
page.videos.append(video)
|
page.videos.append(video)
|
||||||
|
|
||||||
if not browser.is_running():
|
if not browser.is_running():
|
||||||
|
2
setup.py
2
setup.py
@ -32,7 +32,7 @@ def find_package_data(package):
|
|||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name='brozzler',
|
name='brozzler',
|
||||||
version='1.1b9.dev212',
|
version='1.1b9.dev213',
|
||||||
description='Distributed web crawling with browsers',
|
description='Distributed web crawling with browsers',
|
||||||
url='https://github.com/internetarchive/brozzler',
|
url='https://github.com/internetarchive/brozzler',
|
||||||
author='Noah Levitt',
|
author='Noah Levitt',
|
||||||
|
Loading…
x
Reference in New Issue
Block a user