mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-09-26 19:20:55 -04:00
Merge pull request #51 from vbanos/youtube-dl-option
Make youtube-dl optional in BrozzlerWorker.brozzle_page
This commit is contained in:
commit
895bfbf913
1 changed files with 38 additions and 32 deletions
|
@ -316,8 +316,10 @@ class BrozzlerWorker:
|
|||
|
||||
return full_jpeg, thumb_jpeg
|
||||
|
||||
def brozzle_page(self, browser, site, page, on_screenshot=None):
|
||||
def brozzle_page(self, browser, site, page, on_screenshot=None,
|
||||
enable_youtube_dl=True):
|
||||
self.logger.info("brozzling {}".format(page))
|
||||
if enable_youtube_dl:
|
||||
try:
|
||||
with tempfile.TemporaryDirectory(prefix='brzl-ydl-') as tempdir:
|
||||
ydl = self._youtube_dl(tempdir, site)
|
||||
|
@ -340,6 +342,8 @@ class BrozzlerWorker:
|
|||
self.logger.error(
|
||||
'youtube_dl raised exception on %s', page,
|
||||
exc_info=True)
|
||||
else:
|
||||
ydl_spy = False
|
||||
|
||||
if self._needs_browsing(page, ydl_spy):
|
||||
self.logger.info('needs browsing: %s', page)
|
||||
|
@ -435,6 +439,7 @@ class BrozzlerWorker:
|
|||
'proxy error fetching %s' % page.url) from e
|
||||
|
||||
def _needs_browsing(self, page, brozzler_spy):
|
||||
if brozzler_spy:
|
||||
final_bounces = brozzler_spy.final_bounces(page.url)
|
||||
if not final_bounces:
|
||||
return True
|
||||
|
@ -445,6 +450,7 @@ class BrozzlerWorker:
|
|||
return False
|
||||
|
||||
def _already_fetched(self, page, brozzler_spy):
|
||||
if brozzler_spy:
|
||||
for txn in brozzler_spy.final_bounces(page.url):
|
||||
if (txn['method'] == 'GET' and txn['status_code'] == 200):
|
||||
return True
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue