mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-23 08:09:48 -05:00
Merge pull request #263 from galgeek/yt-dlp-vimeo
yt-dlp: capture postprocessor "Merger" videos
This commit is contained in:
commit
5b5d4cb062
@ -1,7 +1,7 @@
|
||||
#
|
||||
# brozzler/behaviors.yaml - behavior configuration
|
||||
#
|
||||
# Copyright (C) 2014-2020 Internet Archive
|
||||
# Copyright (C) 2014-2023 Internet Archive
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@ -63,10 +63,6 @@
|
||||
url_regex: '^https?://(?:www\.)?marquette\.edu/.*$'
|
||||
behavior_js_template: marquette_edu.js
|
||||
request_idle_timeout_sec: 10
|
||||
-
|
||||
url_regex: '^https?://(?:www\.)?vimeo\.com/.*$'
|
||||
behavior_js_template: vimeo.js
|
||||
request_idle_timeout_sec: 10
|
||||
-
|
||||
url_regex: '^https?://(?:www\.)?psu24.psu.edu/.*$'
|
||||
behavior_js_template: psu24.js
|
||||
|
@ -1,41 +0,0 @@
|
||||
/*
|
||||
* brozzler/behaviors.d/vimeo.js - behavior for vimeo.com, clicks to play/crawl
|
||||
* videos
|
||||
*
|
||||
* Copyright (C) 2014-2016 Internet Archive
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
var umbraState = {'idleSince':null};
|
||||
var umbraVideoElements = document.getElementsByTagName('video');
|
||||
for (var i = 0; i < umbraVideoElements.length; i++) {
|
||||
umbraVideoElements[i].play();
|
||||
}
|
||||
umbraState.idleSince = Date.now();
|
||||
|
||||
// If we haven't had anything to do (scrolled, clicked, etc) in this amount of
|
||||
// time, then we consider ourselves finished with the page.
|
||||
var UMBRA_USER_ACTION_IDLE_TIMEOUT_SEC = 10;
|
||||
|
||||
// Called from outside of this script.
|
||||
var umbraBehaviorFinished = function() {
|
||||
if (umbraState.idleSince != null) {
|
||||
var idleTimeMs = Date.now() - umbraState.idleSince;
|
||||
if (idleTimeMs / 1000 > UMBRA_USER_ACTION_IDLE_TIMEOUT_SEC) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
'''
|
||||
brozzler/worker.py - BrozzlerWorker brozzles pages from the frontier, meaning
|
||||
it runs youtube-dl on them, browses them and runs behaviors if appropriate,
|
||||
it runs yt-dlp on them, browses them and runs behaviors if appropriate,
|
||||
scopes and adds outlinks to the frontier
|
||||
|
||||
Copyright (C) 2014-2023 Internet Archive
|
||||
|
@ -171,15 +171,15 @@ def _build_youtube_dl(worker, destdir, site, page):
|
||||
|
||||
# youtube watch page postprocessor is MoveFiles
|
||||
|
||||
if postprocessor == 'FixupM3u8':
|
||||
if postprocessor == 'FixupM3u8' or postprocessor == 'Merger':
|
||||
url = 'youtube-dl:%05d:%s' % (
|
||||
info_dict.get('playlist_index') or 1,
|
||||
info_dict['webpage_url'])
|
||||
else:
|
||||
url = info_dict.get('url')
|
||||
url = info_dict.get('url', '')
|
||||
|
||||
# skip urls ending .m3u8, to avoid duplicates handled by FixupM3u*
|
||||
if url.endswith('.m3u8'):
|
||||
# skip urls ending .m3u8, to avoid duplicates handled by FixupM3u8
|
||||
if url.endswith('.m3u8') or url == '':
|
||||
return
|
||||
|
||||
size = os.path.getsize(info_dict['filepath'])
|
||||
@ -347,7 +347,7 @@ def _try_youtube_dl(worker, ydl, site, page):
|
||||
except brozzler.ShutdownRequested as e:
|
||||
raise
|
||||
except Exception as e:
|
||||
if hasattr(e, "exc_info") and e.exc_info[0] == youtube_dl.utils.UnsupportedError:
|
||||
if hasattr(e, "exc_info") and e.exc_info[0] == yt_dlp.utils.UnsupportedError:
|
||||
return None
|
||||
elif (hasattr(e, "exc_info")
|
||||
and e.exc_info[0] == urllib.error.HTTPError
|
||||
|
Loading…
x
Reference in New Issue
Block a user