fix needs_browsing check

correctly handle relative url "location" response header
This commit is contained in:
Noah Levitt 2018-01-26 11:00:46 -08:00
parent bf5401283e
commit ba8d5a3740
2 changed files with 5 additions and 2 deletions

View File

@ -37,6 +37,7 @@ import urlcanon
from requests.structures import CaseInsensitiveDict
import rethinkdb as r
import datetime
import urllib.parse
class ExtraHeaderAdder(urllib.request.BaseHandler):
def __init__(self, extra_headers):
@ -87,7 +88,9 @@ class YoutubeDLSpy(urllib.request.BaseHandler):
final_url = url
while final_url in redirects:
final_url = redirects.pop(final_url)['response_headers']['location']
txn = redirects.pop(final_url)
final_url = urllib.parse.urljoin(
txn['url'], txn['response_headers']['location'])
final_bounces = []
for txn in self.transactions:

View File

@ -32,7 +32,7 @@ def find_package_data(package):
setuptools.setup(
name='brozzler',
version='1.1b12.dev280',
version='1.1b12.dev281',
description='Distributed web crawling with browsers',
url='https://github.com/internetarchive/brozzler',
author='Noah Levitt',