mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-04-21 16:16:28 -04:00
Merge branch 'ARI-5460' into qa
This commit is contained in:
commit
44e2b8579a
@ -26,14 +26,23 @@ var umbraInstagramBehavior = {
|
||||
previousBigImage: null,
|
||||
|
||||
intervalFunc: function() {
|
||||
|
||||
if (this.state === "loading-thumbs") {
|
||||
var signUpButton = document.querySelectorAll("span._lilm5");
|
||||
if (signUpButton.length > 0) {
|
||||
console.log("clicking sign up button");
|
||||
signUpButton[0].click();
|
||||
this.idleSince = null;
|
||||
return;
|
||||
}
|
||||
|
||||
if (window.scrollY + window.innerHeight < document.documentElement.scrollHeight) {
|
||||
window.scrollBy(0, 200);
|
||||
this.idleSince = null;
|
||||
return;
|
||||
}
|
||||
|
||||
var moreButtons = document.querySelectorAll("a._oidfu");
|
||||
var moreButtons = document.querySelectorAll("a._1cr2e._epyes");
|
||||
if (moreButtons.length > 0) {
|
||||
console.log("clicking load more button");
|
||||
moreButtons[0].click();
|
||||
@ -56,7 +65,7 @@ var umbraInstagramBehavior = {
|
||||
}
|
||||
|
||||
if (this.state === "clicking-first-thumb") {
|
||||
var images = document.querySelectorAll("div._ovg3g");
|
||||
var images = document.querySelectorAll("div._si7dy");
|
||||
if (images && images !== "undefined") {
|
||||
this.imageCount = images.length;
|
||||
if (images.length > 0) {
|
||||
|
@ -317,7 +317,7 @@ class BrozzlerWorker:
|
||||
return full_jpeg, thumb_jpeg
|
||||
|
||||
def brozzle_page(self, browser, site, page, on_screenshot=None,
|
||||
enable_youtube_dl=True):
|
||||
on_request=None, enable_youtube_dl=True):
|
||||
self.logger.info("brozzling {}".format(page))
|
||||
if enable_youtube_dl:
|
||||
try:
|
||||
@ -347,7 +347,8 @@ class BrozzlerWorker:
|
||||
|
||||
if self._needs_browsing(page, ydl_spy):
|
||||
self.logger.info('needs browsing: %s', page)
|
||||
outlinks = self._browse_page(browser, site, page, on_screenshot)
|
||||
outlinks = self._browse_page(browser, site, page, on_screenshot,
|
||||
on_request)
|
||||
return outlinks
|
||||
else:
|
||||
if not self._already_fetched(page, ydl_spy):
|
||||
@ -357,7 +358,7 @@ class BrozzlerWorker:
|
||||
self.logger.info('already fetched: %s', page)
|
||||
return []
|
||||
|
||||
def _browse_page(self, browser, site, page, on_screenshot=None):
|
||||
def _browse_page(self, browser, site, page, on_screenshot=None, on_request=None):
|
||||
def _on_screenshot(screenshot_png):
|
||||
if on_screenshot:
|
||||
on_screenshot(screenshot_png)
|
||||
@ -413,7 +414,7 @@ class BrozzlerWorker:
|
||||
username=site.get('username'), password=site.get('password'),
|
||||
user_agent=site.get('user_agent'),
|
||||
on_screenshot=_on_screenshot, on_response=_on_response,
|
||||
hashtags=page.hashtags,
|
||||
on_request=on_request, hashtags=page.hashtags,
|
||||
skip_extract_outlinks=self._skip_extract_outlinks,
|
||||
skip_visit_hashtags=self._skip_visit_hashtags)
|
||||
if final_page_url != page.url:
|
||||
|
Loading…
x
Reference in New Issue
Block a user