mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-05-02 06:36:20 -04:00
fix and test both job stop request and site stop requests
This commit is contained in:
parent
8aa10962bc
commit
bb98a43c8c
2 changed files with 84 additions and 21 deletions
|
@ -249,42 +249,104 @@ def test_resume_job():
|
|||
assert site.starts_and_stops[3]['start']
|
||||
assert site.starts_and_stops[3]['stop'] is None
|
||||
|
||||
# simulate a crawl stopped by a stop request
|
||||
# simulate a job stop request
|
||||
job_conf = {'seeds': [{'url': 'http://example.com/'}, {'url': 'http://example_2.com/'}]}
|
||||
job = brozzler.new_job(frontier, job_conf)
|
||||
assert len(list(frontier.job_sites(job.id))) == 2
|
||||
site1 = list(frontier.job_sites(job.id))[0]
|
||||
site2 = list(frontier.job_sites(job.id))[1]
|
||||
|
||||
job.stop_requested = datetime.datetime.utcnow().replace(tzinfo=doublethink.UTC)
|
||||
job.save()
|
||||
|
||||
# should raise a CrawlStopped
|
||||
with pytest.raises(brozzler.CrawlStopped):
|
||||
frontier.honor_stop_request(site)
|
||||
frontier.honor_stop_request(site1)
|
||||
|
||||
frontier.finished(site, 'FINISHED_STOP_REQUESTED')
|
||||
frontier.finished(site1, 'FINISHED_STOP_REQUESTED')
|
||||
frontier.finished(site2, 'FINISHED_STOP_REQUESTED')
|
||||
job.refresh()
|
||||
|
||||
assert job.status == 'FINISHED'
|
||||
assert job.stop_requested
|
||||
assert len(job.starts_and_stops) == 4
|
||||
assert job.starts_and_stops[3]['start']
|
||||
assert job.starts_and_stops[3]['stop']
|
||||
assert job.starts_and_stops[3]['stop'] > job.starts_and_stops[3]['start']
|
||||
assert site.status == 'FINISHED_STOP_REQUESTED'
|
||||
assert len(site.starts_and_stops) == 4
|
||||
assert site.starts_and_stops[3]['start']
|
||||
assert site.starts_and_stops[3]['stop']
|
||||
assert site.starts_and_stops[3]['stop'] > site.starts_and_stops[3]['start']
|
||||
assert len(job.starts_and_stops) == 1
|
||||
assert job.starts_and_stops[0]['start']
|
||||
assert job.starts_and_stops[0]['stop']
|
||||
assert job.starts_and_stops[0]['stop'] > job.starts_and_stops[0]['start']
|
||||
assert site1.status == 'FINISHED_STOP_REQUESTED'
|
||||
assert site2.status == 'FINISHED_STOP_REQUESTED'
|
||||
assert len(site1.starts_and_stops) == 1
|
||||
assert len(site2.starts_and_stops) == 1
|
||||
assert site1.starts_and_stops[0]['start']
|
||||
assert site1.starts_and_stops[0]['stop']
|
||||
assert site1.starts_and_stops[0]['stop'] > site.starts_and_stops[0]['start']
|
||||
assert site2.starts_and_stops[0]['start']
|
||||
assert site2.starts_and_stops[0]['stop']
|
||||
assert site2.starts_and_stops[0]['stop'] > site.starts_and_stops[0]['start']
|
||||
|
||||
# test resume job after a stop request
|
||||
# simulate job resume after a stop request
|
||||
frontier.resume_job(job)
|
||||
site = list(frontier.job_sites(job.id))[0]
|
||||
site1 = list(frontier.job_sites(job.id))[0]
|
||||
site2 = list(frontier.job_sites(job.id))[0]
|
||||
|
||||
assert job.status == 'ACTIVE'
|
||||
assert job.stop_requested is None
|
||||
assert len(job.starts_and_stops) == 5
|
||||
assert job.starts_and_stops[4]['start']
|
||||
assert job.starts_and_stops[4]['stop'] is None
|
||||
assert site.status == 'ACTIVE'
|
||||
assert len(site.starts_and_stops) == 5
|
||||
assert site.starts_and_stops[4]['start']
|
||||
assert site.starts_and_stops[4]['stop'] is None
|
||||
assert len(job.starts_and_stops) == 2
|
||||
assert job.starts_and_stops[1]['start']
|
||||
assert job.starts_and_stops[1]['stop'] is None
|
||||
assert site1.status == 'ACTIVE'
|
||||
assert len(site1.starts_and_stops) == 2
|
||||
assert site1.starts_and_stops[1]['start']
|
||||
assert site1.starts_and_stops[1]['stop'] is None
|
||||
assert site2.status == 'ACTIVE'
|
||||
assert len(site2.starts_and_stops) == 2
|
||||
assert site2.starts_and_stops[1]['start']
|
||||
assert site2.starts_and_stops[1]['stop'] is None
|
||||
|
||||
# simulate a site stop request
|
||||
site1.stop_requested = datetime.datetime.utcnow().replace(tzinfo=doublethink.UTC)
|
||||
site1.save()
|
||||
|
||||
# should not raise a CrawlStopped
|
||||
frontier.honor_stop_request(site)
|
||||
|
||||
frontier.finished(site1, 'FINISHED_STOP_REQUESTED')
|
||||
job.refresh()
|
||||
|
||||
assert job.status == 'ACTIVE'
|
||||
assert job.stop_requested is None
|
||||
assert len(job.starts_and_stops) == 2
|
||||
assert job.starts_and_stops[1]['start']
|
||||
assert job.starts_and_stops[1]['stop'] is None
|
||||
assert site1.status == 'FINISHED_STOP_REQUESTED'
|
||||
assert len(site1.starts_and_stops) == 2
|
||||
assert site1.starts_and_stops[1]['start']
|
||||
assert site1.starts_and_stops[1]['stop']
|
||||
assert site1.starts_and_stops[1]['stop'] > site.starts_and_stops[1]['start']
|
||||
assert site2.status == 'ACTIVE'
|
||||
assert len(site2.starts_and_stops) == 2
|
||||
assert site2.starts_and_stops[1]['start']
|
||||
assert site2.starts_and_stops[1]['stop'] is None
|
||||
|
||||
# simulate site resume after a stop request
|
||||
frontier.resume_site(site1)
|
||||
site1 = list(frontier.job_sites(job.id))[0]
|
||||
site2 = list(frontier.job_sites(job.id))[1]
|
||||
|
||||
assert job.status == 'ACTIVE'
|
||||
assert job.stop_requested is None
|
||||
assert len(job.starts_and_stops) == 2
|
||||
assert job.starts_and_stops[1]['start']
|
||||
assert job.starts_and_stops[1]['stop'] is None
|
||||
assert site1.status == 'ACTIVE'
|
||||
assert site1.stop_requested is None
|
||||
assert len(site1.starts_and_stops) == 3
|
||||
assert site1.starts_and_stops[2]['start']
|
||||
assert site1.starts_and_stops[2]['stop'] is None
|
||||
assert site2.status == 'ACTIVE'
|
||||
assert len(site2.starts_and_stops) == 2
|
||||
assert site2.starts_and_stops[1]['start']
|
||||
assert site2.starts_and_stops[1]['stop'] is None
|
||||
|
||||
def test_time_limit():
|
||||
# XXX test not thoroughly adapted to change in time accounting, since
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue