fix buglet in creation of new least_hops on pages table

This commit is contained in:
Noah Levitt 2016-06-28 23:14:23 +00:00
parent 77c800f6a2
commit e9c398caea
2 changed files with 20 additions and 20 deletions

View file

@ -1,20 +1,20 @@
# '''
# brozzler/frontier.py - RethinkDbFrontier manages crawl jobs, sites and pages brozzler/frontier.py - RethinkDbFrontier manages crawl jobs, sites and pages
#
# Copyright (C) 2014-2016 Internet Archive Copyright (C) 2014-2016 Internet Archive
#
# Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
# You may obtain a copy of the License at You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0 http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and See the License for the specific language governing permissions and
# limitations under the License. limitations under the License.
# '''
import logging import logging
import brozzler import brozzler
@ -68,8 +68,8 @@ class RethinkDbFrontier:
# console # console
self.r.table("pages").index_create( self.r.table("pages").index_create(
"least_hops", [ "least_hops", [
r.row["site_id"], r.row["brozzle_count"], self.r.row["site_id"], self.r.row["brozzle_count"],
r.row["hops_from_seed"]]) self.r.row["hops_from_seed"]])
if not "jobs" in tables: if not "jobs" in tables:
self.logger.info( self.logger.info(
"creating rethinkdb table 'jobs' in database %s", "creating rethinkdb table 'jobs' in database %s",

View file

@ -21,7 +21,7 @@ import setuptools
setuptools.setup( setuptools.setup(
name='brozzler', name='brozzler',
version='1.1.dev28', version='1.1.dev29',
description='Distributed web crawling with browsers', description='Distributed web crawling with browsers',
url='https://github.com/internetarchive/brozzler', url='https://github.com/internetarchive/brozzler',
author='Noah Levitt', author='Noah Levitt',