mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-24 00:29:53 -05:00
Merge pull request #178 from galgeek/ARI-5995-tidied
ARI-5995 instagram capture updates
This commit is contained in:
commit
802fbff986
@ -35,12 +35,16 @@
|
||||
url_regex: '^https?://(?:www\.)?instagram\.com/.*$'
|
||||
behavior_js_template: umbraBehavior.js.j2
|
||||
default_parameters:
|
||||
interval: 500
|
||||
actions:
|
||||
- selector: button.coreSpriteDismissLarge
|
||||
- selector: .glyphsSpriteGrey_Close
|
||||
rmSelector: '.RnEpo'
|
||||
- selector: 'a>.eLAPa>.KL4Bh'
|
||||
firstMatchOnly: true
|
||||
limit: 1
|
||||
rmSelector: '.RnEpo'
|
||||
- selector: a.coreSpriteRightPaginationArrow
|
||||
repeatSameElement: true
|
||||
rmSelector: '.RnEpo'
|
||||
-
|
||||
url_regex: '^https?://americaspresidents\.si\.edu/gallery.*$'
|
||||
behavior_js_template: umbraBehavior.js.j2
|
||||
@ -48,7 +52,7 @@
|
||||
interval: 2500
|
||||
actions:
|
||||
- selector: div.see-more
|
||||
firstMatchOnly: true
|
||||
limit: 1
|
||||
- selector: li.next
|
||||
repeatSameElement: true
|
||||
-
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
* brozzler/js-templates/umbrabehavior.js.j2 - an umbra/brozzler behavior class
|
||||
*
|
||||
* Copyright (C) 2017-2018 Internet Archive
|
||||
* Copyright (C) 2017-2019 Internet Archive
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@ -35,8 +35,9 @@ class UmbraBehavior {
|
||||
var selector = this.actions[k].selector;
|
||||
var childSelector = this.actions[k].childSelector;
|
||||
var repeatSameElement = this.actions[k].repeatSameElement ? this.actions[k].repeatSameElement : false;
|
||||
var firstMatchOnly = this.actions[k].firstMatchOnly ? this.actions[k].firstMatchOnly : false;
|
||||
var limit = this.actions[k].limit ? this.actions[k].limit : false;
|
||||
var action = this.actions[k].do ? this.actions[k].do : 'click';
|
||||
var rmSelector = this.actions[k].rmSelector ? this.actions[k].rmSelector : null;
|
||||
var closeSelector = this.actions[k].closeSelector ? this.actions[k].closeSelector : null;
|
||||
var didSomething = false;
|
||||
var somethingLeftAbove = false;
|
||||
@ -59,6 +60,16 @@ class UmbraBehavior {
|
||||
|
||||
var documentsLength = documents.length;
|
||||
for (var j = 0; j < documentsLength; j++) {
|
||||
if (rmSelector) {
|
||||
var rmTargets = documents[j].querySelectorAll(rmSelector);
|
||||
for (var i = 0; i < rmTargets.length; i++) {
|
||||
if (this.isVisible(rmTargets[i])) {
|
||||
rmTargets[i].remove();
|
||||
didSomething = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (closeSelector) {
|
||||
var closeTargets = documents[j].querySelectorAll(closeSelector);
|
||||
for (var i = 0; i < closeTargets.length; i++) {
|
||||
@ -70,18 +81,18 @@ class UmbraBehavior {
|
||||
}
|
||||
}
|
||||
|
||||
if (firstMatchOnly) {
|
||||
var doTargets = [ documents[j].querySelector(selector) ];
|
||||
} else {
|
||||
var doTargets = documents[j].querySelectorAll(selector);
|
||||
var doTargets = documents[j].querySelectorAll(selector);
|
||||
|
||||
var repeats = doTargets.length;
|
||||
if (limit && limit < repeats) {
|
||||
repeats = limit;
|
||||
}
|
||||
|
||||
var doTargetsLength = doTargets.length;
|
||||
if (!(doTargetsLength > 0)) {
|
||||
if (!(repeats > 0)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for ( var i = 0; i < doTargetsLength; i++) {
|
||||
for ( var i = 0; i < repeats; i++) {
|
||||
if (!repeatSameElement && this.alreadyDone.indexOf(doTargets[i]) > -1) {
|
||||
continue;
|
||||
}
|
||||
@ -110,6 +121,11 @@ class UmbraBehavior {
|
||||
somethingLeftAbove = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (limit && limit == i) {
|
||||
this.nextAction();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!didSomething) {
|
||||
@ -129,10 +145,7 @@ class UmbraBehavior {
|
||||
} else {
|
||||
var idleTimeMs = Date.now() - this.idleSince;
|
||||
if ((idleTimeMs / 1000) > (this.IDLE_TIMEOUT_SEC - 1) && (this.index < (this.actions.length - 1))) {
|
||||
console.log("ready for next action");
|
||||
this.index += 1;
|
||||
this.idleSince = null;
|
||||
window.scroll(0,0);
|
||||
this.nextAction();
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -170,6 +183,13 @@ class UmbraBehavior {
|
||||
this.idleSince = null;
|
||||
}
|
||||
|
||||
nextAction() {
|
||||
console.log("ready for next action");
|
||||
this.index += 1;
|
||||
this.idleSince = null;
|
||||
window.scroll(0,0);
|
||||
}
|
||||
|
||||
start() {
|
||||
var that = this;
|
||||
this.intervalId = setInterval(function() {
|
||||
|
@ -154,7 +154,7 @@ def _build_youtube_dl(worker, destdir, site):
|
||||
if ie_result.get('_type') == 'playlist':
|
||||
self.logger.info(
|
||||
'extractor %r found playlist in %s', ie.IE_NAME, url)
|
||||
if ie.IE_NAME == 'youtube:playlist':
|
||||
if ie.IE_NAME in {'youtube:playlist', 'soundcloud:user', 'instagram:user'}:
|
||||
# At this point ie_result['entries'] is an iterator that
|
||||
# will fetch more metadata from youtube to list all the
|
||||
# videos. We unroll that iterator here partly because
|
||||
@ -163,17 +163,9 @@ def _build_youtube_dl(worker, destdir, site):
|
||||
ie_result['entries_no_dl'] = list(ie_result['entries'])
|
||||
ie_result['entries'] = []
|
||||
self.logger.info(
|
||||
'not downloading %s videos from this youtube '
|
||||
'not downloading %s media files from this '
|
||||
'playlist because we expect to capture them from '
|
||||
'individual watch pages',
|
||||
len(ie_result['entries_no_dl']))
|
||||
elif ie.IE_NAME == 'soundcloud:user':
|
||||
ie_result['entries_no_dl'] = list(ie_result['entries'])
|
||||
ie_result['entries'] = []
|
||||
self.logger.info(
|
||||
'not downloading %s tracks from this soundcloud '
|
||||
'user page because we expect to capture them from '
|
||||
'individual track pages',
|
||||
'individual watch/track/detail pages',
|
||||
len(ie_result['entries_no_dl']))
|
||||
else:
|
||||
self.logger.info(
|
||||
|
Loading…
x
Reference in New Issue
Block a user