mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-08-07 05:52:27 -04:00
initialize page.videos correctly in all cases
This commit is contained in:
parent
eeee523b18
commit
95ba334b89
2 changed files with 6 additions and 5 deletions
|
@ -200,7 +200,8 @@ class BrozzlerWorker:
|
|||
e.getcode(), e.info())
|
||||
|
||||
def _remember_videos(self, page, ydl_spy):
|
||||
videos = []
|
||||
if not 'videos' in page:
|
||||
page.videos = []
|
||||
for txn in ydl_spy.transactions:
|
||||
if (txn['response_headers'].get_content_type().startswith('video/')
|
||||
and txn['method'] == 'GET'
|
||||
|
@ -218,14 +219,12 @@ class BrozzlerWorker:
|
|||
video['content-range'] = txn[
|
||||
'response_headers']['content-range']
|
||||
logging.debug('embedded video %s', video)
|
||||
videos.append(video)
|
||||
page.videos = videos
|
||||
page.videos.append(video)
|
||||
|
||||
def _try_youtube_dl(self, ydl, site, page):
|
||||
try:
|
||||
self.logger.info("trying youtube-dl on {}".format(page))
|
||||
info = ydl.extract_info(page.url)
|
||||
self._remember_videos(page, ydl.brozzler_spy)
|
||||
# logging.info('XXX %s', json.dumps(info))
|
||||
if self._proxy(site) and self._enable_warcprox_features(site):
|
||||
info_json = json.dumps(info, sort_keys=True, indent=4)
|
||||
|
@ -344,6 +343,8 @@ class BrozzlerWorker:
|
|||
if 'content-range' in response_headers:
|
||||
video['content-range'] = response_headers['content-range']
|
||||
logging.debug('embedded video %s', video)
|
||||
if not 'videos' in page:
|
||||
page.videos = []
|
||||
page.videos.append(video)
|
||||
|
||||
if not browser.is_running():
|
||||
|
|
2
setup.py
2
setup.py
|
@ -32,7 +32,7 @@ def find_package_data(package):
|
|||
|
||||
setuptools.setup(
|
||||
name='brozzler',
|
||||
version='1.1b9.dev212',
|
||||
version='1.1b9.dev213',
|
||||
description='Distributed web crawling with browsers',
|
||||
url='https://github.com/internetarchive/brozzler',
|
||||
author='Noah Levitt',
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue