mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-24 00:29:53 -05:00
Merge pull request #263 from galgeek/yt-dlp-vimeo
yt-dlp: capture postprocessor "Merger" videos
This commit is contained in:
commit
5b5d4cb062
@ -1,7 +1,7 @@
|
|||||||
#
|
#
|
||||||
# brozzler/behaviors.yaml - behavior configuration
|
# brozzler/behaviors.yaml - behavior configuration
|
||||||
#
|
#
|
||||||
# Copyright (C) 2014-2020 Internet Archive
|
# Copyright (C) 2014-2023 Internet Archive
|
||||||
#
|
#
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
# you may not use this file except in compliance with the License.
|
# you may not use this file except in compliance with the License.
|
||||||
@ -63,10 +63,6 @@
|
|||||||
url_regex: '^https?://(?:www\.)?marquette\.edu/.*$'
|
url_regex: '^https?://(?:www\.)?marquette\.edu/.*$'
|
||||||
behavior_js_template: marquette_edu.js
|
behavior_js_template: marquette_edu.js
|
||||||
request_idle_timeout_sec: 10
|
request_idle_timeout_sec: 10
|
||||||
-
|
|
||||||
url_regex: '^https?://(?:www\.)?vimeo\.com/.*$'
|
|
||||||
behavior_js_template: vimeo.js
|
|
||||||
request_idle_timeout_sec: 10
|
|
||||||
-
|
-
|
||||||
url_regex: '^https?://(?:www\.)?psu24.psu.edu/.*$'
|
url_regex: '^https?://(?:www\.)?psu24.psu.edu/.*$'
|
||||||
behavior_js_template: psu24.js
|
behavior_js_template: psu24.js
|
||||||
|
@ -1,41 +0,0 @@
|
|||||||
/*
|
|
||||||
* brozzler/behaviors.d/vimeo.js - behavior for vimeo.com, clicks to play/crawl
|
|
||||||
* videos
|
|
||||||
*
|
|
||||||
* Copyright (C) 2014-2016 Internet Archive
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
var umbraState = {'idleSince':null};
|
|
||||||
var umbraVideoElements = document.getElementsByTagName('video');
|
|
||||||
for (var i = 0; i < umbraVideoElements.length; i++) {
|
|
||||||
umbraVideoElements[i].play();
|
|
||||||
}
|
|
||||||
umbraState.idleSince = Date.now();
|
|
||||||
|
|
||||||
// If we haven't had anything to do (scrolled, clicked, etc) in this amount of
|
|
||||||
// time, then we consider ourselves finished with the page.
|
|
||||||
var UMBRA_USER_ACTION_IDLE_TIMEOUT_SEC = 10;
|
|
||||||
|
|
||||||
// Called from outside of this script.
|
|
||||||
var umbraBehaviorFinished = function() {
|
|
||||||
if (umbraState.idleSince != null) {
|
|
||||||
var idleTimeMs = Date.now() - umbraState.idleSince;
|
|
||||||
if (idleTimeMs / 1000 > UMBRA_USER_ACTION_IDLE_TIMEOUT_SEC) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
|||||||
'''
|
'''
|
||||||
brozzler/worker.py - BrozzlerWorker brozzles pages from the frontier, meaning
|
brozzler/worker.py - BrozzlerWorker brozzles pages from the frontier, meaning
|
||||||
it runs youtube-dl on them, browses them and runs behaviors if appropriate,
|
it runs yt-dlp on them, browses them and runs behaviors if appropriate,
|
||||||
scopes and adds outlinks to the frontier
|
scopes and adds outlinks to the frontier
|
||||||
|
|
||||||
Copyright (C) 2014-2023 Internet Archive
|
Copyright (C) 2014-2023 Internet Archive
|
||||||
|
@ -171,15 +171,15 @@ def _build_youtube_dl(worker, destdir, site, page):
|
|||||||
|
|
||||||
# youtube watch page postprocessor is MoveFiles
|
# youtube watch page postprocessor is MoveFiles
|
||||||
|
|
||||||
if postprocessor == 'FixupM3u8':
|
if postprocessor == 'FixupM3u8' or postprocessor == 'Merger':
|
||||||
url = 'youtube-dl:%05d:%s' % (
|
url = 'youtube-dl:%05d:%s' % (
|
||||||
info_dict.get('playlist_index') or 1,
|
info_dict.get('playlist_index') or 1,
|
||||||
info_dict['webpage_url'])
|
info_dict['webpage_url'])
|
||||||
else:
|
else:
|
||||||
url = info_dict.get('url')
|
url = info_dict.get('url', '')
|
||||||
|
|
||||||
# skip urls ending .m3u8, to avoid duplicates handled by FixupM3u*
|
# skip urls ending .m3u8, to avoid duplicates handled by FixupM3u8
|
||||||
if url.endswith('.m3u8'):
|
if url.endswith('.m3u8') or url == '':
|
||||||
return
|
return
|
||||||
|
|
||||||
size = os.path.getsize(info_dict['filepath'])
|
size = os.path.getsize(info_dict['filepath'])
|
||||||
@ -347,7 +347,7 @@ def _try_youtube_dl(worker, ydl, site, page):
|
|||||||
except brozzler.ShutdownRequested as e:
|
except brozzler.ShutdownRequested as e:
|
||||||
raise
|
raise
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if hasattr(e, "exc_info") and e.exc_info[0] == youtube_dl.utils.UnsupportedError:
|
if hasattr(e, "exc_info") and e.exc_info[0] == yt_dlp.utils.UnsupportedError:
|
||||||
return None
|
return None
|
||||||
elif (hasattr(e, "exc_info")
|
elif (hasattr(e, "exc_info")
|
||||||
and e.exc_info[0] == urllib.error.HTTPError
|
and e.exc_info[0] == urllib.error.HTTPError
|
||||||
|
Loading…
x
Reference in New Issue
Block a user