Merge branch 'ARI-5995' into qa

This commit is contained in:
Barbara Miller 2019-10-15 11:59:28 -07:00
commit a17c34236c
3 changed files with 25 additions and 16 deletions

View file

@ -42,7 +42,7 @@
actions: actions:
- selector: .glyphsSpriteGrey_Close - selector: .glyphsSpriteGrey_Close
- selector: 'a>.eLAPa>.KL4Bh' - selector: 'a>.eLAPa>.KL4Bh'
firstMatchOnly: true limit: 1
- selector: a.coreSpriteRightPaginationArrow - selector: a.coreSpriteRightPaginationArrow
repeatSameElement: true repeatSameElement: true
- -
@ -52,7 +52,7 @@
interval: 2500 interval: 2500
actions: actions:
- selector: div.see-more - selector: div.see-more
firstMatchOnly: true limit: 1
- selector: li.next - selector: li.next
repeatSameElement: true repeatSameElement: true
- -

View file

@ -1,7 +1,7 @@
/* /*
* brozzler/js-templates/umbrabehavior.js.j2 - an umbra/brozzler behavior class * brozzler/js-templates/umbrabehavior.js.j2 - an umbra/brozzler behavior class
* *
* Copyright (C) 2017-2018 Internet Archive * Copyright (C) 2017-2019 Internet Archive
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
@ -34,7 +34,7 @@ class UmbraBehavior {
var k = this.index; var k = this.index;
var selector = this.actions[k].selector; var selector = this.actions[k].selector;
var repeatSameElement = this.actions[k].repeatSameElement ? this.actions[k].repeatSameElement : false; var repeatSameElement = this.actions[k].repeatSameElement ? this.actions[k].repeatSameElement : false;
var firstMatchOnly = this.actions[k].firstMatchOnly ? this.actions[k].firstMatchOnly : false; var limit = this.actions[k].limit ? this.actions[k].limit : false;
var action = this.actions[k].do ? this.actions[k].do : 'click'; var action = this.actions[k].do ? this.actions[k].do : 'click';
var closeSelector = this.actions[k].closeSelector ? this.actions[k].closeSelector : null; var closeSelector = this.actions[k].closeSelector ? this.actions[k].closeSelector : null;
var didSomething = false; var didSomething = false;
@ -69,18 +69,18 @@ class UmbraBehavior {
} }
} }
if (firstMatchOnly) {
var doTargets = [ documents[j].querySelector(selector) ];
} else {
var doTargets = documents[j].querySelectorAll(selector); var doTargets = documents[j].querySelectorAll(selector);
var repeats = doTargets.length;
if (limit && limit < repeats) {
repeats = limit;
} }
var doTargetsLength = doTargets.length; if (!(repeats > 0)) {
if (!(doTargetsLength > 0)) {
continue; continue;
} }
for ( var i = 0; i < doTargetsLength; i++) { for ( var i = 0; i < repeats; i++) {
if (!repeatSameElement && this.alreadyDone.indexOf(doTargets[i]) > -1) { if (!repeatSameElement && this.alreadyDone.indexOf(doTargets[i]) > -1) {
continue; continue;
} }
@ -98,6 +98,11 @@ class UmbraBehavior {
somethingLeftAbove = true; somethingLeftAbove = true;
} }
} }
if (didSomething && limit && limit === i+1) {
nextAction();
break;
}
} }
if (!didSomething) { if (!didSomething) {
@ -117,10 +122,7 @@ class UmbraBehavior {
} else { } else {
var idleTimeMs = Date.now() - this.idleSince; var idleTimeMs = Date.now() - this.idleSince;
if ((idleTimeMs / 1000) > (this.IDLE_TIMEOUT_SEC - 1) && (this.index < (this.actions.length - 1))) { if ((idleTimeMs / 1000) > (this.IDLE_TIMEOUT_SEC - 1) && (this.index < (this.actions.length - 1))) {
console.log("ready for next action"); nextAction();
this.index += 1;
this.idleSince = null;
window.scroll(0,0);
} }
} }
} }
@ -158,6 +160,13 @@ class UmbraBehavior {
this.idleSince = null; this.idleSince = null;
} }
nextAction() {
console.log("ready for next action");
this.index += 1;
this.idleSince = null;
window.scroll(0,0);
}
start() { start() {
var that = this; var that = this;
this.intervalId = setInterval(function() { this.intervalId = setInterval(function() {

View file

@ -154,7 +154,7 @@ def _build_youtube_dl(worker, destdir, site):
if ie_result.get('_type') == 'playlist': if ie_result.get('_type') == 'playlist':
self.logger.info( self.logger.info(
'extractor %r found playlist in %s', ie.IE_NAME, url) 'extractor %r found playlist in %s', ie.IE_NAME, url)
if ie.IE_NAME in {'youtube:playlist', 'soundcloud:user', 'instagram:user'}: if ie.IE_NAME in {'youtube:playlist', 'soundcloud:user'}:
# At this point ie_result['entries'] is an iterator that # At this point ie_result['entries'] is an iterator that
# will fetch more metadata from youtube to list all the # will fetch more metadata from youtube to list all the
# videos. We unroll that iterator here partly because # videos. We unroll that iterator here partly because