mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-08-08 14:32:23 -04:00
restore handling of 420 Reached limit, with a rudimentary test
This commit is contained in:
parent
e5fb6cb4b9
commit
70b67942a5
3 changed files with 62 additions and 12 deletions
|
@ -119,6 +119,7 @@ class WebsockReceiverThread(threading.Thread):
|
|||
|
||||
self.is_open = False
|
||||
self.got_page_load_event = None
|
||||
self.reached_limit = None
|
||||
|
||||
self.on_request = None
|
||||
self.on_response = None
|
||||
|
@ -182,15 +183,22 @@ class WebsockReceiverThread(threading.Thread):
|
|||
self.websock.send(json.dumps(dict(id=0, method='Debugger.resume')))
|
||||
|
||||
def _network_response_received(self, message):
|
||||
# if (not self._reached_limit
|
||||
# and message['params']['response']['status'] == 420
|
||||
# and 'Warcprox-Meta' in CaseInsensitiveDict(
|
||||
# message['params']['response']['headers'])):
|
||||
# warcprox_meta = json.loads(CaseInsensitiveDict(
|
||||
# message['params']['response']['headers'])['Warcprox-Meta'])
|
||||
# self._reached_limit = brozzler.ReachedLimit(
|
||||
# warcprox_meta=warcprox_meta)
|
||||
# self.logger.info('reached limit %s', self._reached_limit)
|
||||
if (message['params']['response']['status'] == 420
|
||||
and 'Warcprox-Meta' in CaseInsensitiveDict(
|
||||
message['params']['response']['headers'])):
|
||||
if not self.reached_limit:
|
||||
warcprox_meta = json.loads(CaseInsensitiveDict(
|
||||
message['params']['response']['headers'])['Warcprox-Meta'])
|
||||
self.reached_limit = brozzler.ReachedLimit(
|
||||
warcprox_meta=warcprox_meta)
|
||||
self.logger.info('reached limit %s', self.reached_limit)
|
||||
brozzler.thread_raise(
|
||||
self.calling_thread, brozzler.ReachedLimit)
|
||||
else:
|
||||
self.logger.info(
|
||||
'reached limit but self.reached_limit is already set, '
|
||||
'assuming the calling thread is already handling this',
|
||||
self.reached_limit)
|
||||
if self.on_response:
|
||||
self.on_response(message)
|
||||
|
||||
|
@ -422,6 +430,10 @@ class Browser:
|
|||
## outlinks += retrieve_outlinks (60 sec)
|
||||
final_page_url = self.url()
|
||||
return final_page_url, outlinks
|
||||
except brozzler.ReachedLimit:
|
||||
# websock_thread has stashed the ReachedLimit exception with
|
||||
# more information, raise that one
|
||||
raise self.websock_thread.reached_limit
|
||||
except websocket.WebSocketConnectionClosedException as e:
|
||||
self.logger.error('websocket closed, did chrome die?')
|
||||
raise BrowsingException(e)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue