mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-05-02 22:55:04 -04:00
remove "dev" from version number and switch README to rst
This commit is contained in:
parent
245078284d
commit
9699a40645
3 changed files with 65 additions and 54 deletions
49
README.md
49
README.md
|
@ -1,49 +0,0 @@
|
|||
brozzler
|
||||
========
|
||||
"browser" | "crawler" = "brozzler"
|
||||
|
||||
Brozzler is a distributed web crawler (爬虫) that uses a real browser (chrome
|
||||
or chromium) to fetch pages and embedded urls and to extract links. It also
|
||||
uses [youtube-dl](https://github.com/rg3/youtube-dl) to enhance media capture
|
||||
capabilities.
|
||||
|
||||
It is forked from https://github.com/internetarchive/umbra.
|
||||
|
||||
Brozzler is designed to work in conjunction with
|
||||
[warcprox](https://github.com/internetarchive/warcprox) for web archiving.
|
||||
|
||||
Installation
|
||||
------------
|
||||
```
|
||||
git clone https://github.com/nlevitt/brozzler.git
|
||||
cd brozzler
|
||||
# set up virtualenv if desired
|
||||
pip install -r requirements.txt .
|
||||
```
|
||||
Brozzler also requires a rethinkdb deployment.
|
||||
|
||||
Fonts for good screenshots
|
||||
--------------------------
|
||||
On ubuntu 14.04 trusty I installed these packages:
|
||||
|
||||
xfonts-base ttf-mscorefonts-installer fonts-arphic-bkai00mp fonts-arphic-bsmi00lp fonts-arphic-gbsn00lp fonts-arphic-gkai00mp fonts-arphic-ukai fonts-farsiweb fonts-nafees fonts-sil-abyssinica fonts-sil-ezra fonts-sil-padauk fonts-unfonts-extra fonts-unfonts-core ttf-indic-fonts fonts-thai-tlwg fonts-lklug-sinhala
|
||||
|
||||
Haven't looked much at the resulting screenshots yet though.
|
||||
|
||||
License
|
||||
-------
|
||||
|
||||
Copyright 2015 Internet Archive
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this software except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
62
README.rst
Normal file
62
README.rst
Normal file
|
@ -0,0 +1,62 @@
|
|||
brozzler
|
||||
========
|
||||
|
||||
"browser" \| "crawler" = "brozzler"
|
||||
|
||||
Brozzler is a distributed web crawler (爬虫) that uses a real browser
|
||||
(chrome or chromium) to fetch pages and embedded urls and to extract
|
||||
links. It also uses `youtube-dl <https://github.com/rg3/youtube-dl>`__
|
||||
to enhance media capture capabilities.
|
||||
|
||||
It is forked from https://github.com/internetarchive/umbra.
|
||||
|
||||
Brozzler is designed to work in conjunction with
|
||||
`warcprox <https://github.com/internetarchive/warcprox>`__ for web
|
||||
archiving.
|
||||
|
||||
Installation
|
||||
------------
|
||||
|
||||
XXX These instructions don't work at the moment. Brozzler requires some
|
||||
customized packages not easily installable in the outside world. I intend to
|
||||
remedy the situation soon.
|
||||
|
||||
::
|
||||
|
||||
# set up virtualenv if desired
|
||||
pip install git+https://github.com/nlevitt/brozzler.git
|
||||
|
||||
Brozzler also requires a rethinkdb deployment.
|
||||
|
||||
Fonts for good screenshots
|
||||
--------------------------
|
||||
|
||||
On ubuntu 14.04 trusty I installed these packages:
|
||||
|
||||
xfonts-base ttf-mscorefonts-installer fonts-arphic-bkai00mp
|
||||
fonts-arphic-bsmi00lp fonts-arphic-gbsn00lp fonts-arphic-gkai00mp
|
||||
fonts-arphic-ukai fonts-farsiweb fonts-nafees fonts-sil-abyssinica
|
||||
fonts-sil-ezra fonts-sil-padauk fonts-unfonts-extra fonts-unfonts-core
|
||||
ttf-indic-fonts fonts-thai-tlwg fonts-lklug-sinhala
|
||||
|
||||
Haven't looked much at the resulting screenshots yet though.
|
||||
|
||||
License
|
||||
-------
|
||||
|
||||
Copyright 2015 Internet Archive
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
not use this software except in compliance with the License. You may
|
||||
obtain a copy of the License at
|
||||
|
||||
::
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
6
setup.py
6
setup.py
|
@ -1,5 +1,3 @@
|
|||
# vim: set sw=4 et:
|
||||
|
||||
import setuptools
|
||||
import glob
|
||||
|
||||
|
@ -9,7 +7,7 @@ def full_version_bytes():
|
|||
import subprocess, time
|
||||
try:
|
||||
commit_num_bytes = subprocess.check_output(['git', 'rev-list', '--count', 'HEAD'])
|
||||
return VERSION_BYTES + b'.dev' + commit_num_bytes.strip()
|
||||
return VERSION_BYTES + b'.' + commit_num_bytes.strip()
|
||||
except subprocess.CalledProcessError:
|
||||
return VERSION_BYTES
|
||||
|
||||
|
@ -24,7 +22,7 @@ setuptools.setup(name='brozzler',
|
|||
url='https://github.com/nlevitt/brozzler',
|
||||
author='Noah Levitt',
|
||||
author_email='nlevitt@archive.org',
|
||||
long_description=open('README.md').read(),
|
||||
long_description=open('README.rst').read(),
|
||||
license='Apache License 2.0',
|
||||
packages=['brozzler'],
|
||||
package_data={'brozzler': ['behaviors.d/*.js*', 'behaviors.yaml', 'version.txt']},
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue