Merge pull request #115 from nlevitt/ydl-stitched

Ydl stitched
This commit is contained in:
jkafader 2018-09-06 16:15:52 -07:00 committed by GitHub
commit 8368cd2bcb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 441 additions and 216 deletions

View file

@ -0,0 +1,34 @@
<html>
<head>
<title>segmented (hls) video test</title>
</head>
<body>
<!--
hls segments and manifest generated like so:
ffmpeg -i ../site6/small.mp4 -c:v h264 -flags +cgop -g 30 -hls_time 1 small.m3u8
-->
<!--
hls doesn't work in chrome with plain video tag without js, but we
don't care because we're testing youtube-dl functionality
-->
<video id="video" controls muted>
<source src="small.m3u8" type="application/x-mpegURL">
</video>
<!-- to make this work in chrome you need this -->
<!--
<script src="hls.js"></script>
<script>
if(Hls.isSupported()) {
var video = document.getElementById('video');
var hls = new Hls();
hls.loadSource('small.m3u8');
hls.attachMedia(video);
hls.on(Hls.Events.MANIFEST_PARSED,function() {
video.play();
});
}
</script>
-->
</body>
</html>

View file

@ -0,0 +1,15 @@
#EXTM3U
#EXT-X-VERSION:3
#EXT-X-TARGETDURATION:1
#EXT-X-MEDIA-SEQUENCE:1
#EXTINF:1.000000,
small1.ts
#EXTINF:1.000000,
small2.ts
#EXTINF:1.000000,
small3.ts
#EXTINF:1.000000,
small4.ts
#EXTINF:0.533333,
small5.ts
#EXT-X-ENDLIST

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -3,7 +3,7 @@
test_cluster.py - integration tests for a brozzler cluster, expects brozzler,
warcprox, pywb, rethinkdb and other dependencies to be running already
Copyright (C) 2016-2017 Internet Archive
Copyright (C) 2016-2018 Internet Archive
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@ -796,3 +796,31 @@ def test_time_limit(httpd):
job.refresh()
assert job.status == 'FINISHED'
def test_ydl_stitching(httpd):
test_id = 'test_ydl_stitching-%s' % datetime.datetime.utcnow().isoformat()
rr = doublethink.Rethinker('localhost', db='brozzler')
frontier = brozzler.RethinkDbFrontier(rr)
site = brozzler.Site(rr, {
'seed': 'http://localhost:%s/site10/' % httpd.server_port})
brozzler.new_site(frontier, site)
# the site should be brozzled fairly quickly
start = time.time()
while site.status != 'FINISHED' and time.time() - start < 300:
time.sleep(0.5)
site.refresh()
assert site.status == 'FINISHED'
# check page.videos
pages = list(frontier.site_pages(site.id))
assert len(pages) == 1
page = pages[0]
assert len(page.videos) == 6
assert {
'blame': 'youtube-dl',
'content-length': 267900,
'content-type': 'video/mp4',
'response_code': 204,
'url': 'youtube-dl:00001:http://localhost:%s/site10/' % httpd.server_port,
} in page.videos

View file

@ -23,6 +23,7 @@ import threading
import os
import brozzler
import brozzler.chrome
import brozzler.ydl
import logging
import yaml
import datetime
@ -227,9 +228,8 @@ def test_proxy_down():
# youtube-dl fetch
with tempfile.TemporaryDirectory(prefix='brzl-ydl-') as tempdir:
ydl = worker._youtube_dl(tempdir, site)
with pytest.raises(brozzler.ProxyError):
worker._try_youtube_dl(ydl, site, page)
brozzler.ydl.do_youtube_dl(worker, site, page)
# raw fetch
with pytest.raises(brozzler.ProxyError):
@ -404,18 +404,19 @@ def test_needs_browsing():
page = brozzler.Page(None, {
'url':'http://example.com/a'})
spy = brozzler.worker.YoutubeDLSpy()
spy.transactions.append({
spy = brozzler.ydl.YoutubeDLSpy()
spy.fetches.append({
'url': 'http://example.com/a',
'method': 'HEAD',
'status_code': 301,
'response_code': 301,
'response_headers': ConvenientHeaders({'Location': '/b'})})
spy.transactions.append({
spy.fetches.append({
'url': 'http://example.com/b',
'method': 'GET',
'status_code': 200,
'response_code': 200,
'response_headers': ConvenientHeaders({
'Content-Type': 'application/pdf'})})
assert not brozzler.worker.BrozzlerWorker._needs_browsing(None, page, spy)
assert not brozzler.worker.BrozzlerWorker._needs_browsing(
None, page, spy.fetches)