mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-06-20 04:44:12 -04:00
fix problem where each hashtag visited causes a page load if page url redirects
This commit is contained in:
parent
384c877e9a
commit
ec847e48bc
2 changed files with 3 additions and 4 deletions
|
@ -454,7 +454,7 @@ class Browser:
|
||||||
else:
|
else:
|
||||||
outlinks = self.extract_outlinks()
|
outlinks = self.extract_outlinks()
|
||||||
if not skip_visit_hashtags:
|
if not skip_visit_hashtags:
|
||||||
self.visit_hashtags(page_url, hashtags, outlinks)
|
self.visit_hashtags(self.url(), hashtags, outlinks)
|
||||||
final_page_url = self.url()
|
final_page_url = self.url()
|
||||||
return final_page_url, outlinks
|
return final_page_url, outlinks
|
||||||
except brozzler.ReachedLimit:
|
except brozzler.ReachedLimit:
|
||||||
|
@ -599,8 +599,7 @@ class Browser:
|
||||||
|
|
||||||
def try_login(self, username, password, timeout=300):
|
def try_login(self, username, password, timeout=300):
|
||||||
try_login_js = brozzler.jinja2_environment().get_template(
|
try_login_js = brozzler.jinja2_environment().get_template(
|
||||||
'try-login.js.j2').render(
|
'try-login.js.j2').render(username=username, password=password)
|
||||||
username=username, password=password)
|
|
||||||
|
|
||||||
self.websock_thread.got_page_load_event = None
|
self.websock_thread.got_page_load_event = None
|
||||||
self.send_to_chrome(
|
self.send_to_chrome(
|
||||||
|
|
2
setup.py
2
setup.py
|
@ -32,7 +32,7 @@ def find_package_data(package):
|
||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name='brozzler',
|
name='brozzler',
|
||||||
version='1.1b12.dev267',
|
version='1.1b12.dev268',
|
||||||
description='Distributed web crawling with browsers',
|
description='Distributed web crawling with browsers',
|
||||||
url='https://github.com/internetarchive/brozzler',
|
url='https://github.com/internetarchive/brozzler',
|
||||||
author='Noah Levitt',
|
author='Noah Levitt',
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue