mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-06-21 05:14:22 -04:00
fix bug in final_bounces (not sure what I was thinking)
This commit is contained in:
parent
8889e4ab20
commit
953e50d9a6
2 changed files with 7 additions and 6 deletions
|
@ -48,6 +48,7 @@ class ExtraHeaderAdder(urllib.request.BaseHandler):
|
||||||
|
|
||||||
class YoutubeDLSpy(urllib.request.BaseHandler):
|
class YoutubeDLSpy(urllib.request.BaseHandler):
|
||||||
Transaction = collections.namedtuple('Transaction', ['request', 'response'])
|
Transaction = collections.namedtuple('Transaction', ['request', 'response'])
|
||||||
|
logger = logging.getLogger(__module__ + "." + __qualname__)
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.reset()
|
self.reset()
|
||||||
|
@ -62,17 +63,17 @@ class YoutubeDLSpy(urllib.request.BaseHandler):
|
||||||
self.transactions = []
|
self.transactions = []
|
||||||
|
|
||||||
def final_bounces(self, url):
|
def final_bounces(self, url):
|
||||||
"""Resolves redirect chains in self.transactions, returns a list of
|
"""
|
||||||
|
Resolves redirect chains in self.transactions, returns a list of
|
||||||
Transaction representing the final redirect destinations of the given
|
Transaction representing the final redirect destinations of the given
|
||||||
url. There could be more than one if for example youtube-dl hit the
|
url. There could be more than one if for example youtube-dl hit the
|
||||||
same url with HEAD and then GET requests."""
|
same url with HEAD and then GET requests.
|
||||||
|
"""
|
||||||
redirects = {}
|
redirects = {}
|
||||||
for txn in self.transactions:
|
for txn in self.transactions:
|
||||||
# XXX check http status 301,302,303,307? check for "uri" header
|
# XXX check http status 301,302,303,307? check for "uri" header
|
||||||
# as well as "location"? see urllib.request.HTTPRedirectHandler
|
# as well as "location"? see urllib.request.HTTPRedirectHandler
|
||||||
if ((txn.request.full_url == url
|
if 'location' in txn.response.headers:
|
||||||
or txn.request.full_url in redirects)
|
|
||||||
and 'location' in txn.response.headers):
|
|
||||||
redirects[txn.request.full_url] = txn
|
redirects[txn.request.full_url] = txn
|
||||||
|
|
||||||
final_url = url
|
final_url = url
|
||||||
|
|
2
setup.py
2
setup.py
|
@ -32,7 +32,7 @@ def find_package_data(package):
|
||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name='brozzler',
|
name='brozzler',
|
||||||
version='1.1b7.dev118',
|
version='1.1b7.dev119',
|
||||||
description='Distributed web crawling with browsers',
|
description='Distributed web crawling with browsers',
|
||||||
url='https://github.com/internetarchive/brozzler',
|
url='https://github.com/internetarchive/brozzler',
|
||||||
author='Noah Levitt',
|
author='Noah Levitt',
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue