add license headers

This commit is contained in:
Noah Levitt 2016-04-25 20:02:11 +00:00
parent e210d417fb
commit df61e55b6b
24 changed files with 497 additions and 78 deletions

View File

@ -1,5 +1,22 @@
#!/usr/bin/env python
# vim: set sw=4 et:
#
# brozzle-page - command line utility for brozzling a single page, i.e. opening
# it in a browser, running some javascript behaviors, and printing outlinks
#
# Copyright (C) 2014-2016 Internet Archive
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import argparse
import os

View File

@ -1,4 +1,23 @@
#!/usr/bin/env python
#
# brozzler-new-job - takes a yaml brozzler job configuration file, creates
# job, sites, and pages objects in rethinkdb, which brozzler-workers will look
# at and start crawling
#
# Copyright (C) 2014-2016 Internet Archive
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import argparse
import os

View File

@ -1,5 +1,22 @@
#!/usr/bin/env python
# vim: set sw=4 et:
#
# brozzler-new-site - takes a seed url and creates a site and page object in
# rethinkdb, which brozzler-workers will look at and start crawling
#
# Copyright (C) 2014-2016 Internet Archive
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import argparse
import os

View File

@ -1,5 +1,23 @@
#!/usr/bin/env python
# vim: set sw=4 et:
#
# brozzler-worker - main entrypoint for brozzler, gets sites and pages to
# brozzle from rethinkdb, brozzles them
#
# Copyright (C) 2014-2016 Internet Archive
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import argparse
import os

View File

@ -1,3 +1,22 @@
#
# brozzler/__init__.py - __init__.py for brozzler package, contains some common
# code
#
# Copyright (C) 2014-2016 Internet Archive
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import json as _json
import logging as _logging
from pkg_resources import get_distribution as _get_distribution

View File

@ -1,8 +1,21 @@
// vim:set sw=8 et:
//
// Scrolls to the bottom of the page, and clicks on embedded soundcloud
// elements.
//
/*
* brozzler/behaviors.d/default.js - default behavior, scrolls to the bottom of
* the page and clicks on embedded soundcloud elements
*
* Copyright (C) 2014-2016 Internet Archive
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
var umbraAboveBelowOrOnScreen = function(e) {
var eTop = e.getBoundingClientRect().top;
@ -26,21 +39,21 @@ var umbraIntervalFunc = function() {
var umbraSoundCloudEmbeddedElements = [];
getUmbraSoundCloudEmbeddedElements(umbraSoundCloudEmbeddedElements);
var clickedSomething = false;
var somethingLeftBelow = false;
var somethingLeftAbove = false;
var missedAbove = 0;
for (var i = 0; i < umbraSoundCloudEmbeddedElements.length; i++) {
var targetId = umbraSoundCloudEmbeddedElements[i].id;
var target = umbraSoundCloudEmbeddedElements[i].target;
if (!(targetId in umbraAlreadyClicked)) {
var where = umbraAboveBelowOrOnScreen(target);
if (where == 0) { // on screen
// var pos = target.getBoundingClientRect().top;
// window.scrollTo(0, target.getBoundingClientRect().top - 100);
@ -52,14 +65,14 @@ var umbraIntervalFunc = function() {
clickedSomething = true;
umbraState.idleSince = null;
break;
} else if (where > 0) {
} else if (where > 0) {
somethingLeftBelow = true;
} else if (where < 0) {
somethingLeftAbove = true;
}
}
}
if (!clickedSomething) {
if (somethingLeftAbove) {
console.log("scrolling UP because everything on this screen has been clicked but we missed something above");
@ -77,7 +90,7 @@ var umbraIntervalFunc = function() {
umbraState.idleSince = Date.now();
}
}
if (umbraState.idleSince == null) {
umbraState.idleSince = Date.now();
}
@ -86,31 +99,31 @@ var umbraIntervalFunc = function() {
//try to detect sound cloud "Play" buttons and return them as targets for clicking
var getUmbraSoundCloudEmbeddedElements = function(soundCloudEmbeddedElements, currentIframeDepth, currentDocument,
iframeElement) {
//set default values for parameters
currentIframeDepth = currentIframeDepth || 0;
currentDocument = currentDocument || document;
if (currentIframeDepth > MAX_IFRAME_RECURSE_DEPTH) {
return;
}
//collect all buttons on current document first
var button = [];
button = currentDocument.querySelectorAll(UMBRA_THINGS_TO_CLICK_SOUNDCLOUD_EMBEDDED_SELECTOR);
var cssPathIframe = iframeElement ? getElementCssPath(iframeElement) : "";
for (var i = 0; i < button.length; i++) {
soundCloudEmbeddedElements.push({"id" : cssPathIframe + getElementCssPath(button.item(i)), "target" : button.item(i)});
}
//now get all buttons in embedded iframes
var iframe = [];
iframe = currentDocument.querySelectorAll(UMBRA_IFRAME_SOUNDCLOUD_EMBEDDED_SELECTOR);
for (var i = 0; i < iframe.length; i++) {
getUmbraSoundCloudEmbeddedElements(soundCloudEmbeddedElements, currentIframeDepth + 1, iframe[i].contentWindow.document.body, iframe[i]);
}
@ -135,7 +148,7 @@ var umbraBehaviorFinished = function() {
var getElementCssPath = function(element) {
var names = [];
while (element.parentNode){
if (element.id){
names.unshift('#' + element.id);
@ -146,14 +159,14 @@ var getElementCssPath = function(element) {
}
else {
for (var c = 1, e = element; e.previousElementSibling; e = e.previousElementSibling, c++);
names.unshift(element.tagName + ":nth-child(" + c + ")");
}
element = element.parentNode;
}
}
return names.join(" > ");
}

View File

@ -1,4 +1,21 @@
// vim:set sw=8 et:
/*
* brozzler/behaviors.d/facebook.js - facebook behavior, scrolls to the bottom
* of the page, clicks to expand images, a few other things
*
* Copyright (C) 2014-2016 Internet Archive
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
var umbraAboveBelowOrOnScreen = function(e) {
var eTop = e.getBoundingClientRect().top;
@ -28,26 +45,26 @@ var umbraScrolledThingFailedScrollAttempts = {};
var umbraState = {'idleSince':null,'expectingSomething':null,'bottomReachedScrollY':0};
var umbraIntervalFunc = function() {
var thingsToScroll = document.querySelectorAll(UMBRA_THINGS_TO_SCROLL_SELECTOR);
var everythingScrolled = true;
for (var i = 0; i < thingsToScroll.length; i++) {
var target = thingsToScroll[i];
if (!(target in umbraAlreadyScrolledThing)) {
everythingScrolled = false;
console.log("scrolling to " + target.scrollHeight + " on element with nodeName " + target.nodeName + " with id of " + target.id);
var lastScrollTop = target.scrollTop;
target.scrollTop = target.scrollHeight;
umbraState.idleSince = null;
if (target.scrollTop >= target.scrollHeight) {
umbraAlreadyScrolledThing[target] = true;
}
}
else if (target.scrollTop == lastScrollTop) {
if (umbraScrolledThingFailedScrollAttempts[target]) {
umbraScrolledThingFailedScrollAttempts[target]++;
@ -55,7 +72,7 @@ var umbraIntervalFunc = function() {
else {
umbraScrolledThingFailedScrollAttempts[target] = 1;
}
if (umbraScrolledThingFailedScrollAttempts[target] >= NUMBER_FAILED_SCROLL_ATTEMPTS_ON_THING_TO_SCROLL_BEFORE_STOP_SCROLLING) {
umbraAlreadyScrolledThing[target] = true;
}
@ -67,24 +84,24 @@ var umbraIntervalFunc = function() {
}
else {
console.log("done scrolling for element with nodeName " + target.nodeName + " with id of " + target.id)
}
}
umbraState.expectingSomething = null;
}
if (thingsToScroll && thingsToScroll.length > 0 && everythingScrolled) {
if (umbraState.idleSince == null) {
umbraState.idleSince = Date.now();
}
return;
}
var closeButtons = document.querySelectorAll('a[title="Close"], a.closeTheater, a[aria-label="Press Esc to close"]');
for (var i = 0; i < closeButtons.length; i++) {
// XXX closeTheater buttons stick around in the dom after closing, clientWidth>0 is one way to check if they're visible
if (closeButtons[i].clientWidth > 0) {
if (umbraState.expectingSomething == 'closeButton') {
if (closeButtons[i].clientWidth > 0) {
if (umbraState.expectingSomething == 'closeButton') {
console.log("found expected close button, clicking on it " + closeButtons[i].outerHTML);
umbraState.expectingSomething = null;
} else {
@ -106,7 +123,7 @@ var umbraIntervalFunc = function() {
var missedAbove = 0;
for (var i = 0; i < thingsToClick.length; i++) {
var target = thingsToClick[i];
var target = thingsToClick[i];
if (!(target in umbraAlreadyClicked)) {
var where = umbraAboveBelowOrOnScreen(target);
if (where == 0) { // on screen
@ -122,14 +139,14 @@ var umbraIntervalFunc = function() {
clickedSomething = true;
umbraState.idleSince = null;
break;
} else if (where > 0) {
} else if (where > 0) {
somethingLeftBelow = true;
} else if (where < 0) {
somethingLeftAbove = true;
}
}
}
if (window.scrollY > umbraState.bottomReachedScrollY) {
umbraState.bottomReachedScrollY = window.scrollY;
}
@ -159,7 +176,7 @@ var UMBRA_USER_ACTION_IDLE_TIMEOUT_SEC = 10;
// Called from outside of this script.
var umbraBehaviorFinished = function() {
if (umbraState.idleSince != null) {
var idleTimeMs = Date.now() - umbraState.idleSince;
if (idleTimeMs / 1000 > UMBRA_USER_ACTION_IDLE_TIMEOUT_SEC) {

View File

@ -1,17 +1,33 @@
// vim:set sw=8 et:
/*
* brozzler/behaviors.d/flickr.js - behavior for flickr.com
*
* Copyright (C) 2014-2016 Internet Archive
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
setInterval(function() { window.scrollBy(0,50); }, 100);
setTimeout(function() {
a = document.evaluate("//a[contains(@class, 'sn-ico-slideshow')]", document, null, XPathResult.UNORDERED_NODE_ITERATOR_TYPE, null );
f = a.iterateNext();
setTimeout(function() {
a = document.evaluate("//a[contains(@class, 'sn-ico-slideshow')]", document, null, XPathResult.UNORDERED_NODE_ITERATOR_TYPE, null );
f = a.iterateNext();
f.click();
}, 5000);
setTimeout(function() {
a = document.evaluate("//a[contains(@data-track, 'photo-click')]", document, null, XPathResult.UNORDERED_NODE_ITERATOR_TYPE, null );
setInterval(function() {
f = a.iterateNext();
setTimeout(function() {
a = document.evaluate("//a[contains(@data-track, 'photo-click')]", document, null, XPathResult.UNORDERED_NODE_ITERATOR_TYPE, null );
setInterval(function() {
f = a.iterateNext();
f.click();
}, 5000);
}, 5000);

View File

@ -1,5 +1,20 @@
// vim:set sw=8 et:
//
/*
* brozzler/behaviors.d/flickr.js - behavior for instagram
*
* Copyright (C) 2014-2016 Internet Archive
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
var umbraInstagramBehavior = {
IDLE_TIMEOUT_SEC: 20,
@ -12,11 +27,11 @@ var umbraInstagramBehavior = {
intervalFunc: function() {
if (this.state === "loading-thumbs") {
if (window.scrollY + window.innerHeight < document.documentElement.scrollHeight) {
if (window.scrollY + window.innerHeight < document.documentElement.scrollHeight) {
window.scrollBy(0, 200);
this.idleSince = null;
return;
}
}
var moreButtons = document.querySelectorAll(".PhotoGridMoreButton:not(.pgmbDisabled)");
if (moreButtons.length > 0) {
@ -24,8 +39,8 @@ var umbraInstagramBehavior = {
moreButtons[0].click();
this.idleSince = null;
return;
}
}
if (this.idleSince == null) {
console.log("nothing to do at the moment, might be waiting for something to load, setting this.idleSince=Date.now()");
this.idleSince = Date.now();
@ -37,12 +52,12 @@ var umbraInstagramBehavior = {
this.state = "clicking-first-thumb";
this.idleSince = null;
return;
} else {
} else {
// console.log("still might be waiting for something to load...");
return;
}
}
}
}
}
if (this.state === "clicking-first-thumb") {
var images = document.querySelectorAll("a.pgmiImageLink");

View File

@ -1,4 +1,21 @@
// vim:set sw=8 et:
/*
* brozzler/behaviors.d/flickr.js - behavior for marquette.edu, clicks to
* play/crawl embedded videos
*
* Copyright (C) 2014-2016 Internet Archive
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
var umbraState = {'idleSince':null};
var umbraIntervalID = setInterval(umbraScrollInterval,50);
@ -10,7 +27,7 @@ function umbraScrollInterval() {
umbraScroll();
umbraState.idleSince=null;
}
else {
else {
var videoBoxes = document.querySelectorAll("div#vid_box a");
var clickedVideo = false;
@ -50,4 +67,4 @@ var umbraBehaviorFinished = function() {
}
return false;
}

View File

@ -1,3 +1,21 @@
/*
* brozzler/behaviors.d/flickr.js - behavior for marquette.edu, clicks to
* play/crawl embedded videos
*
* Copyright (C) 2014-2016 Internet Archive
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
var umbraBehavior = {
IDLE_TIMEOUT_SEC : 10,
@ -12,7 +30,7 @@ var umbraBehavior = {
var iframes = document.querySelectorAll("iframe");
var documents = Array(iframes.length + 1);
documents[0] = document;
for (var i = 0; i < iframes.length; i++) {
documents[i+1] = iframes[i].contentWindow.document;
}

View File

@ -1,3 +1,22 @@
/*
* brozzler/behaviors.d/simpleclicks.js.in - simpleclicks behavior template,
* clicks on elements matching templatized css selector
*
* Copyright (C) 2014-2016 Internet Archive
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
var umbraBehavior = {
IDLE_TIMEOUT_SEC : 10,
idleSince : null,
@ -12,11 +31,11 @@ var umbraBehavior = {
//handle Python to JavaScript boolean conversion
clickUntilTimeout == "True" ? clickUntilTimeout = true : clickUntilTimeout = false;
var iframes = document.querySelectorAll("iframe");
var documents = Array(iframes.length + 1);
documents[0] = document;
for (var i = 0; i < iframes.length; i++) {
documents[i+1] = iframes[i].contentWindow.document;
}

View File

@ -1,4 +1,21 @@
// vim:set sw=8 et:
/*
* brozzler/behaviors.d/vimeo.js - behavior for vimeo.com, clicks to play/crawl
* videos
*
* Copyright (C) 2014-2016 Internet Archive
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
var umbraState = {'idleSince':null};
var umbraVideoElements = document.getElementsByTagName('video');

View File

@ -1,4 +1,21 @@
# vim: set sw=4 et:
#
# brozzler/behaviors.py - manages behaviors, which are javascript scripts that
# run in brozzled web pages
#
# Copyright (C) 2014-2016 Internet Archive
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import json
import itertools

View File

@ -1,3 +1,21 @@
#
# brozzler/behaviors.yaml - behavior configuration
#
# Copyright (C) 2014-2016 Internet Archive
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# first matched behavior is used, so order matters here
behaviors:
-

View File

@ -1,5 +1,21 @@
#!/usr/bin/env python
# vim: set sw=4 et:
#
# brozzler/browser.py - classes responsible for running web browsers
# (chromium/chromium) and browsing web pages in them
#
# Copyright (C) 2014-2016 Internet Archive
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import logging
import json

View File

@ -1,3 +1,21 @@
#
# brozzler/frontier.py - RethinkDbFrontier manages crawl jobs, sites and pages
#
# Copyright (C) 2014-2016 Internet Archive
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import logging
import brozzler
import random

View File

@ -1,3 +1,22 @@
#
# brozzler/job.py - Job class representing a brozzler crawl job, and functions
# for setting up a job with supplied configuration
#
# Copyright (C) 2014-2016 Internet Archive
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import logging
import brozzler
import yaml

View File

@ -1,4 +1,20 @@
# vim: set sw=4 et:
#
# brozzler/robots.py - robots.txt support
#
# Copyright (C) 2014-2016 Internet Archive
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import json
import logging

View File

@ -1,3 +1,21 @@
#
# brozzler/site.py - classes representing sites and pages
#
# Copyright (C) 2014-2016 Internet Archive
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import surt
import json
import logging

View File

@ -1,3 +1,23 @@
#
# brozzler/worker.py - BrozzlerWorker brozzles pages from the frontier, meaning
# it runs youtube-dl on them, browses them and runs behaviors if appropriate,
# scopes and adds outlinks to the frontier
#
# Copyright (C) 2014-2016 Internet Archive
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import os
import logging
import brozzler

View File

@ -1,3 +1,21 @@
#
# setup.py - brozzler setup script
#
# Copyright (C) 2014-2016 Internet Archive
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import setuptools
import glob

View File

@ -1,3 +1,22 @@
#
# brozzler-webconsole/__init__.py - flask app for brozzler web console, defines
# api endspoints etc
#
# Copyright (C) 2014-2016 Internet Archive
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import flask
import rethinkstuff
import json

View File

@ -1,3 +1,21 @@
/*
* brozzler-webconsole/static/js/app.js - brozzler console angularjs code
*
* Copyright (C) 2014-2016 Internet Archive
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
"use strict";
var brozzlerConsoleApp = angular.module("brozzlerConsoleApp", [