fix bug in final_bounces (not sure what I was thinking)

This commit is contained in:
Noah Levitt 2016-11-09 13:12:14 -08:00
parent 8889e4ab20
commit 953e50d9a6
2 changed files with 7 additions and 6 deletions

View File

@ -48,6 +48,7 @@ class ExtraHeaderAdder(urllib.request.BaseHandler):
class YoutubeDLSpy(urllib.request.BaseHandler):
Transaction = collections.namedtuple('Transaction', ['request', 'response'])
logger = logging.getLogger(__module__ + "." + __qualname__)
def __init__(self):
self.reset()
@ -62,17 +63,17 @@ class YoutubeDLSpy(urllib.request.BaseHandler):
self.transactions = []
def final_bounces(self, url):
"""Resolves redirect chains in self.transactions, returns a list of
"""
Resolves redirect chains in self.transactions, returns a list of
Transaction representing the final redirect destinations of the given
url. There could be more than one if for example youtube-dl hit the
same url with HEAD and then GET requests."""
same url with HEAD and then GET requests.
"""
redirects = {}
for txn in self.transactions:
# XXX check http status 301,302,303,307? check for "uri" header
# as well as "location"? see urllib.request.HTTPRedirectHandler
if ((txn.request.full_url == url
or txn.request.full_url in redirects)
and 'location' in txn.response.headers):
if 'location' in txn.response.headers:
redirects[txn.request.full_url] = txn
final_url = url

View File

@ -32,7 +32,7 @@ def find_package_data(package):
setuptools.setup(
name='brozzler',
version='1.1b7.dev118',
version='1.1b7.dev119',
description='Distributed web crawling with browsers',
url='https://github.com/internetarchive/brozzler',
author='Noah Levitt',