update for reppy api change and pin to current version of reppy

This commit is contained in:
Noah Levitt 2016-11-08 13:39:32 -08:00
parent cba5fa4a0b
commit f10b4c71e6
2 changed files with 21 additions and 20 deletions

View file

@ -1,20 +1,20 @@
# '''
# brozzler/robots.py - robots.txt support brozzler/robots.py - robots.txt support
#
# Copyright (C) 2014-2016 Internet Archive Copyright (C) 2014-2016 Internet Archive
#
# Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
# You may obtain a copy of the License at You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0 http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and See the License for the specific language governing permissions and
# limitations under the License. limitations under the License.
# '''
import json import json
import logging import logging
@ -44,7 +44,8 @@ def _robots_cache(site):
req_sesh.headers.update(site.extra_headers()) req_sesh.headers.update(site.extra_headers())
if site.user_agent: if site.user_agent:
req_sesh.headers['User-Agent'] = site.user_agent req_sesh.headers['User-Agent'] = site.user_agent
_robots_caches[site.id] = reppy.cache.RobotsCache(session=req_sesh) _robots_caches[site.id] = reppy.cache.RobotsCache(
session=req_sesh, capacity=5)
return _robots_caches[site.id] return _robots_caches[site.id]

View file

@ -32,7 +32,7 @@ def find_package_data(package):
setuptools.setup( setuptools.setup(
name='brozzler', name='brozzler',
version='1.1b7.dev114', version='1.1b7.dev115',
description='Distributed web crawling with browsers', description='Distributed web crawling with browsers',
url='https://github.com/internetarchive/brozzler', url='https://github.com/internetarchive/brozzler',
author='Noah Levitt', author='Noah Levitt',
@ -59,7 +59,7 @@ setuptools.setup(
install_requires=[ install_requires=[
'PyYAML', 'PyYAML',
'youtube-dl', 'youtube-dl',
'reppy', 'reppy==0.4.1',
'requests', 'requests',
'websocket-client', 'websocket-client',
'pillow==3.3.0', 'pillow==3.3.0',