mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-07-01 18:16:43 -04:00
bugfix
This commit is contained in:
parent
9c81a7bbda
commit
6259d03be1
1 changed files with 13 additions and 9 deletions
|
@ -342,6 +342,8 @@ class BrozzlerWorker:
|
||||||
self.logger.error(
|
self.logger.error(
|
||||||
'youtube_dl raised exception on %s', page,
|
'youtube_dl raised exception on %s', page,
|
||||||
exc_info=True)
|
exc_info=True)
|
||||||
|
else:
|
||||||
|
ydl_spy = False
|
||||||
|
|
||||||
if self._needs_browsing(page, ydl_spy):
|
if self._needs_browsing(page, ydl_spy):
|
||||||
self.logger.info('needs browsing: %s', page)
|
self.logger.info('needs browsing: %s', page)
|
||||||
|
@ -437,19 +439,21 @@ class BrozzlerWorker:
|
||||||
'proxy error fetching %s' % page.url) from e
|
'proxy error fetching %s' % page.url) from e
|
||||||
|
|
||||||
def _needs_browsing(self, page, brozzler_spy):
|
def _needs_browsing(self, page, brozzler_spy):
|
||||||
final_bounces = brozzler_spy.final_bounces(page.url)
|
if brozzler_spy:
|
||||||
if not final_bounces:
|
final_bounces = brozzler_spy.final_bounces(page.url)
|
||||||
return True
|
if not final_bounces:
|
||||||
for txn in final_bounces:
|
|
||||||
if txn['response_headers'].get_content_type() in [
|
|
||||||
'text/html', 'application/xhtml+xml']:
|
|
||||||
return True
|
return True
|
||||||
|
for txn in final_bounces:
|
||||||
|
if txn['response_headers'].get_content_type() in [
|
||||||
|
'text/html', 'application/xhtml+xml']:
|
||||||
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def _already_fetched(self, page, brozzler_spy):
|
def _already_fetched(self, page, brozzler_spy):
|
||||||
for txn in brozzler_spy.final_bounces(page.url):
|
if brozzler_spy:
|
||||||
if (txn['method'] == 'GET' and txn['status_code'] == 200):
|
for txn in brozzler_spy.final_bounces(page.url):
|
||||||
return True
|
if (txn['method'] == 'GET' and txn['status_code'] == 200):
|
||||||
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def brozzle_site(self, browser, site):
|
def brozzle_site(self, browser, site):
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue