mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-04-20 23:56:34 -04:00
Run JS behaviors only on HTML
Make method `Browser.page_is_html` which return boolean. The method uses `document.contentType` to get current page's conent-type. If page is not HTML, skip JS behavior. Capture performance increases greatly for non html pages.
This commit is contained in:
parent
ba85917f70
commit
d36431b878
@ -655,7 +655,24 @@ class Browser:
|
||||
message = self.websock_thread.pop_result(msg_id)
|
||||
return message['result']['result']['value']
|
||||
|
||||
def page_is_html(self, timeout=10):
|
||||
'''
|
||||
Check if current page is HTML.
|
||||
'''
|
||||
self.websock_thread.expect_result(self._command_id.peek())
|
||||
msg_id = self.send_to_chrome(
|
||||
method='Runtime.evaluate', suppress_logging=True,
|
||||
params={'expression': 'document.contentType'})
|
||||
self._wait_for(lambda: self.websock_thread.received_result(msg_id),
|
||||
timeout=timeout)
|
||||
message = self.websock_thread.pop_result(msg_id)
|
||||
return 'html' in message['result']['result']['value']
|
||||
|
||||
def run_behavior(self, behavior_script, timeout=900):
|
||||
# Skip running JS behavior if page is not HTML.
|
||||
if not self.page_is_html():
|
||||
return
|
||||
|
||||
self.send_to_chrome(
|
||||
method='Runtime.evaluate', suppress_logging=True,
|
||||
params={'expression': behavior_script})
|
||||
|
Loading…
x
Reference in New Issue
Block a user