fix and test both job stop request and site stop requests

This commit is contained in:
Daniel Bicho 2017-10-16 11:46:35 +01:00
parent 8aa10962bc
commit bb98a43c8c
2 changed files with 84 additions and 21 deletions

View File

@ -264,6 +264,7 @@ class RethinkDbFrontier:
# can't call resume_job since that would resume jobs's other sites # can't call resume_job since that would resume jobs's other sites
job = brozzler.Job.load(self.rr, site.job_id) job = brozzler.Job.load(self.rr, site.job_id)
job.status = "ACTIVE" job.status = "ACTIVE"
site.stop_requested = None
job.starts_and_stops.append( job.starts_and_stops.append(
{"start":doublethink.utcnow(), "stop":None}) {"start":doublethink.utcnow(), "stop":None})
job.save() job.save()

View File

@ -249,42 +249,104 @@ def test_resume_job():
assert site.starts_and_stops[3]['start'] assert site.starts_and_stops[3]['start']
assert site.starts_and_stops[3]['stop'] is None assert site.starts_and_stops[3]['stop'] is None
# simulate a crawl stopped by a stop request # simulate a job stop request
job_conf = {'seeds': [{'url': 'http://example.com/'}, {'url': 'http://example_2.com/'}]}
job = brozzler.new_job(frontier, job_conf)
assert len(list(frontier.job_sites(job.id))) == 2
site1 = list(frontier.job_sites(job.id))[0]
site2 = list(frontier.job_sites(job.id))[1]
job.stop_requested = datetime.datetime.utcnow().replace(tzinfo=doublethink.UTC) job.stop_requested = datetime.datetime.utcnow().replace(tzinfo=doublethink.UTC)
job.save() job.save()
# should raise a CrawlStopped
with pytest.raises(brozzler.CrawlStopped): with pytest.raises(brozzler.CrawlStopped):
frontier.honor_stop_request(site) frontier.honor_stop_request(site1)
frontier.finished(site, 'FINISHED_STOP_REQUESTED') frontier.finished(site1, 'FINISHED_STOP_REQUESTED')
frontier.finished(site2, 'FINISHED_STOP_REQUESTED')
job.refresh() job.refresh()
assert job.status == 'FINISHED' assert job.status == 'FINISHED'
assert job.stop_requested assert job.stop_requested
assert len(job.starts_and_stops) == 4 assert len(job.starts_and_stops) == 1
assert job.starts_and_stops[3]['start'] assert job.starts_and_stops[0]['start']
assert job.starts_and_stops[3]['stop'] assert job.starts_and_stops[0]['stop']
assert job.starts_and_stops[3]['stop'] > job.starts_and_stops[3]['start'] assert job.starts_and_stops[0]['stop'] > job.starts_and_stops[0]['start']
assert site.status == 'FINISHED_STOP_REQUESTED' assert site1.status == 'FINISHED_STOP_REQUESTED'
assert len(site.starts_and_stops) == 4 assert site2.status == 'FINISHED_STOP_REQUESTED'
assert site.starts_and_stops[3]['start'] assert len(site1.starts_and_stops) == 1
assert site.starts_and_stops[3]['stop'] assert len(site2.starts_and_stops) == 1
assert site.starts_and_stops[3]['stop'] > site.starts_and_stops[3]['start'] assert site1.starts_and_stops[0]['start']
assert site1.starts_and_stops[0]['stop']
assert site1.starts_and_stops[0]['stop'] > site.starts_and_stops[0]['start']
assert site2.starts_and_stops[0]['start']
assert site2.starts_and_stops[0]['stop']
assert site2.starts_and_stops[0]['stop'] > site.starts_and_stops[0]['start']
# test resume job after a stop request # simulate job resume after a stop request
frontier.resume_job(job) frontier.resume_job(job)
site = list(frontier.job_sites(job.id))[0] site1 = list(frontier.job_sites(job.id))[0]
site2 = list(frontier.job_sites(job.id))[0]
assert job.status == 'ACTIVE' assert job.status == 'ACTIVE'
assert job.stop_requested is None assert job.stop_requested is None
assert len(job.starts_and_stops) == 5 assert len(job.starts_and_stops) == 2
assert job.starts_and_stops[4]['start'] assert job.starts_and_stops[1]['start']
assert job.starts_and_stops[4]['stop'] is None assert job.starts_and_stops[1]['stop'] is None
assert site.status == 'ACTIVE' assert site1.status == 'ACTIVE'
assert len(site.starts_and_stops) == 5 assert len(site1.starts_and_stops) == 2
assert site.starts_and_stops[4]['start'] assert site1.starts_and_stops[1]['start']
assert site.starts_and_stops[4]['stop'] is None assert site1.starts_and_stops[1]['stop'] is None
assert site2.status == 'ACTIVE'
assert len(site2.starts_and_stops) == 2
assert site2.starts_and_stops[1]['start']
assert site2.starts_and_stops[1]['stop'] is None
# simulate a site stop request
site1.stop_requested = datetime.datetime.utcnow().replace(tzinfo=doublethink.UTC)
site1.save()
# should not raise a CrawlStopped
frontier.honor_stop_request(site)
frontier.finished(site1, 'FINISHED_STOP_REQUESTED')
job.refresh()
assert job.status == 'ACTIVE'
assert job.stop_requested is None
assert len(job.starts_and_stops) == 2
assert job.starts_and_stops[1]['start']
assert job.starts_and_stops[1]['stop'] is None
assert site1.status == 'FINISHED_STOP_REQUESTED'
assert len(site1.starts_and_stops) == 2
assert site1.starts_and_stops[1]['start']
assert site1.starts_and_stops[1]['stop']
assert site1.starts_and_stops[1]['stop'] > site.starts_and_stops[1]['start']
assert site2.status == 'ACTIVE'
assert len(site2.starts_and_stops) == 2
assert site2.starts_and_stops[1]['start']
assert site2.starts_and_stops[1]['stop'] is None
# simulate site resume after a stop request
frontier.resume_site(site1)
site1 = list(frontier.job_sites(job.id))[0]
site2 = list(frontier.job_sites(job.id))[1]
assert job.status == 'ACTIVE'
assert job.stop_requested is None
assert len(job.starts_and_stops) == 2
assert job.starts_and_stops[1]['start']
assert job.starts_and_stops[1]['stop'] is None
assert site1.status == 'ACTIVE'
assert site1.stop_requested is None
assert len(site1.starts_and_stops) == 3
assert site1.starts_and_stops[2]['start']
assert site1.starts_and_stops[2]['stop'] is None
assert site2.status == 'ACTIVE'
assert len(site2.starts_and_stops) == 2
assert site2.starts_and_stops[1]['start']
assert site2.starts_and_stops[1]['stop'] is None
def test_time_limit(): def test_time_limit():
# XXX test not thoroughly adapted to change in time accounting, since # XXX test not thoroughly adapted to change in time accounting, since