From 041feaf4262827f6aeaa46a9a513f26976bb2bcd Mon Sep 17 00:00:00 2001 From: Vangelis Banos Date: Tue, 14 Apr 2020 09:39:48 +0000 Subject: [PATCH 1/4] Add missing super().do_POST() --- tests/test_brozzling.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_brozzling.py b/tests/test_brozzling.py index 0e83089..1fcaab4 100755 --- a/tests/test_brozzling.py +++ b/tests/test_brozzling.py @@ -80,6 +80,8 @@ def httpd(request): self.send_header('Content-Length', len(payload)) self.end_headers() self.wfile.write(payload) + else: + super().do_POST() # SimpleHTTPRequestHandler always uses CWD so we have to chdir From 973af2c16e9a3336eb1e77f7cf819907f5d2fe14 Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Tue, 14 Apr 2020 09:44:20 -0700 Subject: [PATCH 2/4] bump version after merge, update copyright --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index cad2897..f5d260a 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ ''' setup.py - brozzler setup script -Copyright (C) 2014-2019 Internet Archive +Copyright (C) 2014-2020 Internet Archive Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -32,7 +32,7 @@ def find_package_data(package): setuptools.setup( name='brozzler', - version='1.5.19', + version='1.5.20', description='Distributed web crawling with browsers', url='https://github.com/internetarchive/brozzler', author='Noah Levitt', From 78365c9f3576a215ece3792a6a790df8ab1adcfb Mon Sep 17 00:00:00 2001 From: Jake L Date: Tue, 14 Apr 2020 17:19:53 -0400 Subject: [PATCH 3/4] Expanding Brozzler's logging in capabilities Some sites don't allow you to login without clicking on a button to open a retracted modal. This update to the login code allows Brozzler to click on all elements that we think are related to opening a login modal. Then, if there isn't a regular form, we will attempt to fill out abnormal form schemes. The test_try_login test has been expanded for the new type of login form we are supporting. --- brozzler/js-templates/try-login.js.j2 | 38 +++++++++++++++++++++++++++ tests/htdocs/site11/form2.html | 25 ++++++++++++++++++ tests/test_brozzling.py | 14 ++++++++++ 3 files changed, 77 insertions(+) create mode 100644 tests/htdocs/site11/form2.html diff --git a/brozzler/js-templates/try-login.js.j2 b/brozzler/js-templates/try-login.js.j2 index c07a296..fb4ee59 100644 --- a/brozzler/js-templates/try-login.js.j2 +++ b/brozzler/js-templates/try-login.js.j2 @@ -1,6 +1,11 @@ var __brzl_tryLoginState = 'trying'; var __brzl_tryLogin = function() { + for(var x = 0; x < document.querySelectorAll("[class^='login-open']").length; x++){ + button = document.querySelectorAll("[class^='login-open']")[x]; + button.click(); + } + for (var i = 0; i < document.forms.length; i++) { var form = document.forms[i]; if (form.method != 'post') { @@ -48,6 +53,39 @@ var __brzl_tryLogin = function() { return; } } + + + if(__brzl_tryLoginState === 'trying'){ + var submit = undefined; + var usernameInput = undefined; + var passwordField = undefined; + + for (var z = 0; z < document.querySelectorAll("[class^='login']").length; z++){ + var input = document.querySelectorAll("[class^='login']")[z]; + + if (input.type === "text" || input.type == "email") { + usernameField = input; + } else if (input.type === "password") { + passwordField = input; + } else if(input.onclick != null) { + submit = input; + } else if (input.type == "textarea") { + usernameField = undefined; + passwordField = undefined; + break; + } + } + + + if (submit && usernameField && passwordField){ + usernameField.value = {{username|json}}; + passwordField.value = {{password|json}}; + submit.click(); + __brzl_tryLoginState = 'maybe-submitted-form'; + return; + } + } + __brzl_tryLoginState = 'login-form-not-found'; }; diff --git a/tests/htdocs/site11/form2.html b/tests/htdocs/site11/form2.html new file mode 100644 index 0000000..9f63d65 --- /dev/null +++ b/tests/htdocs/site11/form2.html @@ -0,0 +1,25 @@ + + + brozzler login form test 2 + + +
+