mirror of
https://github.com/sys-nyx/red-arch.git
synced 2025-05-06 00:35:25 -04:00
initial
This commit is contained in:
commit
d9d000055b
32 changed files with 1530 additions and 0 deletions
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
/data
|
||||
/r
|
||||
!/r/static
|
72
README.md
Normal file
72
README.md
Normal file
|
@ -0,0 +1,72 @@
|
|||
## reddit html archiver
|
||||
|
||||
pulls reddit data from the [pushshift](https://github.com/pushshift/api) api and renders offline compatible html pages
|
||||
|
||||
### install
|
||||
|
||||
requires python 3
|
||||
|
||||
sudo apt-get install pip
|
||||
pip install psaw
|
||||
git clone https://github.com/chid/snudown
|
||||
cd snudown
|
||||
sudo python setup.py install
|
||||
cd ..
|
||||
git clone [this repo]
|
||||
cd reddit-html-archiver
|
||||
chmod u+x *.py
|
||||
|
||||
### fetch reddit data from pushshift
|
||||
|
||||
data is fetched by subreddit and date range.
|
||||
|
||||
./fetch_links.py politics 2017-1-1 2017-2-1
|
||||
# or add some link/post request parameters
|
||||
./fetch_links.py --self_only --score "> 2000" politics 2015-1-1 2016-1-1
|
||||
./fetch_links.py -h
|
||||
|
||||
you may need decrease your date range or adjust `pushshift_rate_limit_per_minute` in `fetch_links.py` if you are getting connection errors.
|
||||
|
||||
### write web pages
|
||||
|
||||
write html files for all subreddits.
|
||||
|
||||
./write_html.py
|
||||
# or add some output filtering
|
||||
./write_html.py --min-score 100 --min-comments 100 --hide-deleted-comments
|
||||
./write_html.py -h
|
||||
|
||||
|
||||
if you add more data later, delete everything in `r` aside from `r/static` and re-run the script to refresh your archive's pages.
|
||||
|
||||
### hosting the archived pages
|
||||
|
||||
copy the contents of the `r` directory to a web root or appropriately served git repo. or serve it directly.
|
||||
|
||||
### potential improvements
|
||||
|
||||
* fetch_links
|
||||
* num_comments filtering
|
||||
* thumbnails or thumbnail urls
|
||||
* media posts
|
||||
* update scores from the reddit api with [praw](https://github.com/praw-dev/praw)
|
||||
* real templating
|
||||
* filter output per sub, individual min score and comments filters
|
||||
* js markdown url previews
|
||||
* js powered search page, show no links by default
|
||||
* user pages
|
||||
* add pagination, posts sorted by score, comments, date, sub
|
||||
* too many files in one directory
|
||||
|
||||
### see also
|
||||
|
||||
* [pushshift](https://github.com/pushshift/api) [subreddit](https://www.reddit.com/r/pushshift/)
|
||||
* [psaw](https://github.com/dmarx/psaw)
|
||||
* [snudown](https://github.com/reddit/snudown)
|
||||
* [redditsearch.io](https://redditsearch.io/)
|
||||
* [reddit post archiver](https://github.com/sJohnsonStoever/redditPostArchiver)
|
||||
|
||||
### screenshots
|
||||
|
||||

|
||||

|
217
fetch_links.py
Executable file
217
fetch_links.py
Executable file
|
@ -0,0 +1,217 @@
|
|||
#! /usr/bin/env python
|
||||
import time
|
||||
from time import mktime
|
||||
from datetime import datetime, timedelta
|
||||
import argparse
|
||||
from pprint import pprint
|
||||
import json
|
||||
import csv
|
||||
import os
|
||||
from psaw import PushshiftAPI
|
||||
|
||||
pushshift_rate_limit_per_minute = 20
|
||||
max_comments_per_query = 150
|
||||
write_every = 10
|
||||
|
||||
link_fields = ['author', 'created_utc', 'domain', 'id', 'is_self',
|
||||
'num_comments', 'over_18', 'permalink', 'retrieved_on', 'score',
|
||||
'selftext', 'stickied', 'subreddit_id', 'title', 'url']
|
||||
comment_fields = ['author', 'body', 'created_utc', 'id', 'link_id',
|
||||
'parent_id', 'score', 'stickied', 'subreddit_id']
|
||||
|
||||
def fetch_links(subreddit=None, date_start=None, date_stop=None, limit=None, score=None, self_only=False):
|
||||
if subreddit is None or date_start is None or date_stop is None:
|
||||
print('ERROR: missing required arguments')
|
||||
exit()
|
||||
|
||||
api = PushshiftAPI(rate_limit_per_minute=pushshift_rate_limit_per_minute, detect_local_tz=False)
|
||||
|
||||
# get links
|
||||
links = []
|
||||
print('fetching submissions %s to %s...' % (time.strftime('%Y-%m-%d', date_start), time.strftime('%Y-%m-%d', date_stop)))
|
||||
params = {
|
||||
'after': int(mktime(date_start)) - 86400, # make date inclusive, adjust for UTC
|
||||
'before': int(mktime(date_stop)) + 86400,
|
||||
'subreddit': subreddit,
|
||||
'filter': link_fields,
|
||||
'sort': 'asc',
|
||||
'sort_type': 'created_utc',
|
||||
}
|
||||
if limit:
|
||||
params['limit'] = int(limit)
|
||||
if score:
|
||||
params['score'] = score
|
||||
if self_only:
|
||||
params['is_self'] = True
|
||||
link_results = list(api.search_submissions(**params))
|
||||
print('processing %s links' % len(link_results))
|
||||
for s in link_results:
|
||||
# print('%s %s' % (datetime.utcfromtimestamp(int(s.d_['created_utc'])), s.d_['title']))
|
||||
# pprint(s)
|
||||
|
||||
# get comment ids
|
||||
comments = []
|
||||
if s.d_['num_comments'] > 0 and not comment_data_exists(subreddit, s.d_['created_utc'], s.d_['id']):
|
||||
comment_ids = list(api._get_submission_comment_ids(s.d_['id']))
|
||||
# print('%s comment_ids: %s' % (data['id'], comment_ids))
|
||||
|
||||
# get comments
|
||||
if (len(comment_ids) > 0):
|
||||
mychunks = []
|
||||
if len(comment_ids) > max_comments_per_query:
|
||||
mychunks = chunks(comment_ids, max_comments_per_query)
|
||||
else:
|
||||
mychunks = [comment_ids]
|
||||
for chunk in mychunks:
|
||||
comment_params = {
|
||||
'filter': comment_fields,
|
||||
'ids': ','.join(chunk),
|
||||
'limit': max_comments_per_query,
|
||||
}
|
||||
comments_results = list(api.search_comments(**comment_params))
|
||||
print('%s fetch link %s comments %s/%s' % (datetime.utcfromtimestamp(int(s.d_['created_utc'])), s.d_['id'], len(comments_results), len(comment_ids)))
|
||||
for c in comments_results:
|
||||
comments.append(c.d_)
|
||||
|
||||
s.d_['comments'] = comments
|
||||
links.append(s.d_)
|
||||
|
||||
# write results
|
||||
if len(links) >= write_every:
|
||||
success = write_links(subreddit, links)
|
||||
if success:
|
||||
links = []
|
||||
|
||||
# write remining results
|
||||
if len(links):
|
||||
write_links(subreddit, links)
|
||||
|
||||
# csvs are not guaranteed to be sorted by date but you can resume broken runs
|
||||
# and change sort criteria later to add more posts without getting duplicates.
|
||||
# delete csvs and re-run to update existing posts
|
||||
def write_links(subreddit, links):
|
||||
if links and len(links) > 0:
|
||||
writing_day = None
|
||||
file = None
|
||||
writer = None
|
||||
existing_link_ids = []
|
||||
wrote_links = 0
|
||||
wrote_comments = 0
|
||||
|
||||
for r in links:
|
||||
# print('%s link %s' % (r['id'], r['title']))
|
||||
|
||||
# grab link comments
|
||||
existing_comment_ids = []
|
||||
comments = r['comments']
|
||||
# print('%s comments %s' % (r['id'], comments))
|
||||
|
||||
created_ts = int(r['created_utc'])
|
||||
created = datetime.utcfromtimestamp(created_ts).strftime('%Y-%m-%d')
|
||||
created_path = datetime.utcfromtimestamp(created_ts).strftime('%Y/%m/%d')
|
||||
if created != writing_day:
|
||||
if file:
|
||||
file.close()
|
||||
writing_day = created
|
||||
path = 'data/' + subreddit + '/' + created_path
|
||||
os.makedirs(path, exist_ok=True)
|
||||
|
||||
# create and parse existing links
|
||||
filename = 'links.csv'
|
||||
filepath = path + '/' + filename
|
||||
if not os.path.isfile(filepath):
|
||||
file = open(filepath, 'a')
|
||||
writer = csv.DictWriter(file, fieldnames=link_fields)
|
||||
writer.writeheader()
|
||||
# print('created %s' % filepath)
|
||||
else:
|
||||
with open(filepath, 'r') as file:
|
||||
reader = csv.DictReader(file)
|
||||
for row in reader:
|
||||
existing_link_ids.append(row['id'])
|
||||
|
||||
file = open(filepath, 'a')
|
||||
writer = csv.DictWriter(file, fieldnames=link_fields)
|
||||
|
||||
# create and parse existing comments
|
||||
# writing empty comments csvs resuming and comment_data_exists()
|
||||
filename = r['id'] + '.csv'
|
||||
filepath = path + '/' + filename
|
||||
if not os.path.isfile(filepath):
|
||||
comments_file = open(filepath, 'a')
|
||||
comments_writer = csv.DictWriter(comments_file, fieldnames=comment_fields)
|
||||
comments_writer.writeheader()
|
||||
# print('created %s' % filepath)
|
||||
else:
|
||||
with open(filepath, 'r') as comments_file:
|
||||
reader = csv.DictReader(comments_file)
|
||||
for row in reader:
|
||||
existing_comment_ids.append(row['id'])
|
||||
|
||||
comments_file = open(filepath, 'a')
|
||||
comments_writer = csv.DictWriter(comments_file, fieldnames=comment_fields)
|
||||
|
||||
# write link row
|
||||
if r['id'] not in existing_link_ids:
|
||||
for field in list(r):
|
||||
if field not in link_fields:
|
||||
del r[field]
|
||||
|
||||
writer.writerow(r)
|
||||
wrote_links += 1
|
||||
|
||||
# write comments
|
||||
for c in comments:
|
||||
if c['id'] not in existing_comment_ids:
|
||||
for field in list(c):
|
||||
if field not in comment_fields:
|
||||
del c[field]
|
||||
comments_writer.writerow(c)
|
||||
wrote_comments += 1
|
||||
comments_file.close()
|
||||
|
||||
|
||||
print('got %s links, wrote %s and %s comments' % (len(links), wrote_links, wrote_comments))
|
||||
return True
|
||||
|
||||
def link_data_exists(subreddit, date):
|
||||
created_path = time.strftime('%Y/%m/%d', date)
|
||||
path = 'data/' + subreddit + '/' + created_path + '/links.csv'
|
||||
if not os.path.isfile(path):
|
||||
return False
|
||||
return True
|
||||
|
||||
def comment_data_exists(subreddit, link_created_utc, link_id):
|
||||
created_ts = int(link_created_utc)
|
||||
created_path = datetime.utcfromtimestamp(created_ts).strftime('%Y/%m/%d')
|
||||
path = 'data/' + subreddit + '/' + created_path + '/' + link_id + '.csv'
|
||||
if os.path.isfile(path):
|
||||
return True
|
||||
return False
|
||||
|
||||
def chunks(l, n):
|
||||
"""Yield successive n-sized chunks from l."""
|
||||
for i in range(0, len(l), n):
|
||||
yield l[i:i + n]
|
||||
|
||||
def mkdate(datestr):
|
||||
try:
|
||||
return time.strptime(datestr, '%Y-%m-%d')
|
||||
except ValueError:
|
||||
raise argparse.ArgumentTypeError(datestr + ' is not a proper date string')
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser=argparse.ArgumentParser()
|
||||
parser.add_argument('subreddit', help='subreddit to archive')
|
||||
parser.add_argument('date_start', type=mkdate, help='start archiving at date, e.g. 2005-1-1')
|
||||
parser.add_argument('date_stop', type=mkdate, help='stop archiving at date, inclusive, cannot be date_start')
|
||||
parser.add_argument('--limit', default=None, help='pushshift api limit param, default None')
|
||||
parser.add_argument('--score', default=None, help='pushshift api score param, e.g. "> 10", default None')
|
||||
parser.add_argument('--self_only', action="store_true", help='only fetch selftext submissions, default False')
|
||||
args=parser.parse_args()
|
||||
|
||||
self_only = False
|
||||
if args.self_only:
|
||||
self_only = True
|
||||
|
||||
fetch_links(args.subreddit, args.date_start, args.date_stop, args.limit, args.score, self_only)
|
103
r/static/css/archive.css
Normal file
103
r/static/css/archive.css
Normal file
|
@ -0,0 +1,103 @@
|
|||
footer {
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
/* markdown */
|
||||
.md {
|
||||
word-wrap: break-word;
|
||||
overflow-wrap: break-word;
|
||||
}
|
||||
.md p, .md ol, .md ul, .md blockquote {
|
||||
margin: 3px 0;
|
||||
}
|
||||
.md blockquote {
|
||||
border-left: 2px solid rgba(255, 255, 255, 0.4); /* text-muted */
|
||||
padding-left: 0.5rem;
|
||||
}
|
||||
.md blockquote, .md del {
|
||||
color: rgba(255, 255, 255, 0.4); /* text-muted */
|
||||
}
|
||||
.md code, .md pre {
|
||||
border: 1px solid #4E5D6C; /* alert-secondary */
|
||||
background: #4E5D6C; /* disabled form input color */
|
||||
}
|
||||
.md h1 {
|
||||
font-size: 1.5rem;
|
||||
}
|
||||
.md h2 {
|
||||
font-size: 1.4rem;
|
||||
}
|
||||
.md h3 {
|
||||
font-size: 1.3rem;
|
||||
}
|
||||
.md h4 {
|
||||
font-size: 1.2rem;
|
||||
}
|
||||
.md h5 {
|
||||
font-size: 1.1rem;
|
||||
}
|
||||
.md h6 {
|
||||
font-size: 1rem;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
/* subreddit links */
|
||||
.links .title {
|
||||
line-height: 1.25;
|
||||
}
|
||||
.links .title a, .submission .title a {
|
||||
color: inherit;
|
||||
}
|
||||
.search .title {
|
||||
color: inherit;
|
||||
display: block;
|
||||
}
|
||||
|
||||
/* link/post page */
|
||||
.op .author, .submission .author {
|
||||
color: #5bc0de; /* match bootstrap link color / badge-primary */
|
||||
}
|
||||
.submission .card-body {
|
||||
padding: 1rem;
|
||||
}
|
||||
|
||||
/* comments */
|
||||
.comment {
|
||||
margin-bottom: 5px; /*1rem;*/
|
||||
}
|
||||
.comment .byline, .comment .byline a {
|
||||
line-height: 1;
|
||||
}
|
||||
.collapsed .md {
|
||||
display: none;
|
||||
}
|
||||
.hidden {
|
||||
display: none;
|
||||
}
|
||||
.to-top {
|
||||
display: block;
|
||||
}
|
||||
.comments .ml-1 {
|
||||
margin-left: 1rem !important;
|
||||
}
|
||||
.comments .ml-2 {
|
||||
margin-left: 2rem !important;
|
||||
}
|
||||
.comments .ml-3 {
|
||||
margin-left: 3rem !important;
|
||||
}
|
||||
.comments .ml-4 {
|
||||
margin-left: 4rem !important;
|
||||
}
|
||||
.comments .ml-5 {
|
||||
margin-left: 5rem !important;
|
||||
}
|
||||
.comments .ml-6 {
|
||||
margin-left: 6rem !important;
|
||||
}
|
||||
.comments .ml-7 {
|
||||
margin-left: 7rem !important;
|
||||
}
|
||||
.comments .ml-8 {
|
||||
margin-left: 8rem !important;
|
||||
}
|
13
r/static/css/bootstrap-superhero.min.css
vendored
Normal file
13
r/static/css/bootstrap-superhero.min.css
vendored
Normal file
File diff suppressed because one or more lines are too long
48
r/static/css/lato.css
Normal file
48
r/static/css/lato.css
Normal file
|
@ -0,0 +1,48 @@
|
|||
/* latin-ext */
|
||||
@font-face {
|
||||
font-family: 'Lato';
|
||||
font-style: normal;
|
||||
font-weight: 300;
|
||||
src: local('Lato Light'), local('Lato-Light'), url(../fonts/S6u9w4BMUTPHh7USSwaPGR_p.woff2) format('woff2');
|
||||
unicode-range: U+0100-024F, U+0259, U+1E00-1EFF, U+2020, U+20A0-20AB, U+20AD-20CF, U+2113, U+2C60-2C7F, U+A720-A7FF;
|
||||
}
|
||||
/* latin */
|
||||
@font-face {
|
||||
font-family: 'Lato';
|
||||
font-style: normal;
|
||||
font-weight: 300;
|
||||
src: local('Lato Light'), local('Lato-Light'), url(../fonts/S6u9w4BMUTPHh7USSwiPGQ.woff2) format('woff2');
|
||||
unicode-range: U+0000-00FF, U+0131, U+0152-0153, U+02BB-02BC, U+02C6, U+02DA, U+02DC, U+2000-206F, U+2074, U+20AC, U+2122, U+2191, U+2193, U+2212, U+2215, U+FEFF, U+FFFD;
|
||||
}
|
||||
/* latin-ext */
|
||||
@font-face {
|
||||
font-family: 'Lato';
|
||||
font-style: normal;
|
||||
font-weight: 400;
|
||||
src: local('Lato Regular'), local('Lato-Regular'), url(../fonts/S6uyw4BMUTPHjxAwXjeu.woff2) format('woff2');
|
||||
unicode-range: U+0100-024F, U+0259, U+1E00-1EFF, U+2020, U+20A0-20AB, U+20AD-20CF, U+2113, U+2C60-2C7F, U+A720-A7FF;
|
||||
}
|
||||
/* latin */
|
||||
@font-face {
|
||||
font-family: 'Lato';
|
||||
font-style: normal;
|
||||
font-weight: 400;
|
||||
src: local('Lato Regular'), local('Lato-Regular'), url(../fonts/S6uyw4BMUTPHjx4wXg.woff2) format('woff2');
|
||||
unicode-range: U+0000-00FF, U+0131, U+0152-0153, U+02BB-02BC, U+02C6, U+02DA, U+02DC, U+2000-206F, U+2074, U+20AC, U+2122, U+2191, U+2193, U+2212, U+2215, U+FEFF, U+FFFD;
|
||||
}
|
||||
/* latin-ext */
|
||||
@font-face {
|
||||
font-family: 'Lato';
|
||||
font-style: normal;
|
||||
font-weight: 700;
|
||||
src: local('Lato Bold'), local('Lato-Bold'), url(../fonts/S6u9w4BMUTPHh6UVSwaPGR_p.woff2) format('woff2');
|
||||
unicode-range: U+0100-024F, U+0259, U+1E00-1EFF, U+2020, U+20A0-20AB, U+20AD-20CF, U+2113, U+2C60-2C7F, U+A720-A7FF;
|
||||
}
|
||||
/* latin */
|
||||
@font-face {
|
||||
font-family: 'Lato';
|
||||
font-style: normal;
|
||||
font-weight: 700;
|
||||
src: local('Lato Bold'), local('Lato-Bold'), url(../fonts/S6u9w4BMUTPHh6UVSwiPGQ.woff2) format('woff2');
|
||||
unicode-range: U+0000-00FF, U+0131, U+0152-0153, U+02BB-02BC, U+02C6, U+02DA, U+02DC, U+2000-206F, U+2074, U+20AC, U+2122, U+2191, U+2193, U+2212, U+2215, U+FEFF, U+FFFD;
|
||||
}
|
BIN
r/static/fonts/S6u9w4BMUTPHh6UVSwaPGR_p.woff2
Normal file
BIN
r/static/fonts/S6u9w4BMUTPHh6UVSwaPGR_p.woff2
Normal file
Binary file not shown.
BIN
r/static/fonts/S6u9w4BMUTPHh6UVSwiPGQ.woff2
Normal file
BIN
r/static/fonts/S6u9w4BMUTPHh6UVSwiPGQ.woff2
Normal file
Binary file not shown.
BIN
r/static/fonts/S6u9w4BMUTPHh7USSwiPGQ.woff2
Normal file
BIN
r/static/fonts/S6u9w4BMUTPHh7USSwiPGQ.woff2
Normal file
Binary file not shown.
BIN
r/static/fonts/S6uyw4BMUTPHjx4wXg.woff2
Normal file
BIN
r/static/fonts/S6uyw4BMUTPHjx4wXg.woff2
Normal file
Binary file not shown.
BIN
r/static/fonts/S6uyw4BMUTPHjxAwXjeu.woff2
Normal file
BIN
r/static/fonts/S6uyw4BMUTPHjxAwXjeu.woff2
Normal file
Binary file not shown.
37
r/static/js/archive-comments.js
Normal file
37
r/static/js/archive-comments.js
Normal file
|
@ -0,0 +1,37 @@
|
|||
$(document).ready(function() {
|
||||
$('a.score').click(function(){
|
||||
var $comment = $(this).closest('.comment');
|
||||
var depth = parseInt($comment.data('depth'));
|
||||
if ($comment.hasClass('collapsed')) {
|
||||
$comment.removeClass('collapsed');
|
||||
$check_comment = $comment.next('.comment');
|
||||
depth_sibling = false;
|
||||
while (!depth_sibling) {
|
||||
if ($check_comment.length == 0) {
|
||||
depth_sibling = true;
|
||||
}
|
||||
if ($check_comment.data('depth') == '' || parseInt($check_comment.data('depth')) <= depth) {
|
||||
depth_sibling = true;
|
||||
} else {
|
||||
$check_comment.removeClass('hidden');
|
||||
}
|
||||
$check_comment = $check_comment.next('.comment');
|
||||
}
|
||||
} else {
|
||||
$comment.addClass('collapsed');
|
||||
$check_comment = $comment.next('.comment');
|
||||
depth_sibling = false;
|
||||
while (!depth_sibling) {
|
||||
if ($check_comment.length == 0) {
|
||||
depth_sibling = true;
|
||||
}
|
||||
if ($check_comment.data('depth') == '' || parseInt($check_comment.data('depth')) <= depth) {
|
||||
depth_sibling = true;
|
||||
} else {
|
||||
$check_comment.addClass('hidden');
|
||||
}
|
||||
$check_comment = $check_comment.next('.comment');
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
7
r/static/js/bootstrap.min.js
vendored
Normal file
7
r/static/js/bootstrap.min.js
vendored
Normal file
File diff suppressed because one or more lines are too long
2
r/static/js/jquery-3.3.1.slim.min.js
vendored
Normal file
2
r/static/js/jquery-3.3.1.slim.min.js
vendored
Normal file
File diff suppressed because one or more lines are too long
BIN
screenshots/post.jpg
Normal file
BIN
screenshots/post.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 107 KiB |
BIN
screenshots/sub.jpg
Normal file
BIN
screenshots/sub.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 163 KiB |
42
templates/index.html
Normal file
42
templates/index.html
Normal file
|
@ -0,0 +1,42 @@
|
|||
<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
|
||||
<link rel="stylesheet" href="###INCLUDE_PATH###static/css/lato.css">
|
||||
<link rel="stylesheet" href="###INCLUDE_PATH###static/css/bootstrap-superhero.min.css">
|
||||
<link rel="stylesheet" href="###INCLUDE_PATH###static/css/archive.css">
|
||||
<title>###TITLE###</title>
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<nav class="navbar navbar-expand-sm navbar-dark bg-primary">
|
||||
<span class="navbar-brand">###TITLE###</span>
|
||||
<button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation">
|
||||
<span class="navbar-toggler-icon"></span>
|
||||
</button>
|
||||
<div class="collapse navbar-collapse" id="navbarNav">
|
||||
<ul class="navbar-nav">
|
||||
<li class="nav-item dropdown">
|
||||
<a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">subreddits</a>
|
||||
<div class="dropdown-menu" aria-labelledby="navbarDropdown">
|
||||
<a class="dropdown-item" href="###URL_SUBS###">All</a>
|
||||
###HTML_SUBS_MENU###
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav>
|
||||
</header>
|
||||
<main role="main" class="container-fluid">
|
||||
<div class="links mt-3">
|
||||
###HTML_LINKS###
|
||||
</div>
|
||||
</main>
|
||||
<footer class="container-fluid">
|
||||
<p class="small mb-0">archive has ###ARCH_NUM_POSTS### posts. <a href="###URL_PROJECT###">source code</a>.</p>
|
||||
</footer>
|
||||
<script src="###INCLUDE_PATH###static/js/jquery-3.3.1.slim.min.js"></script>
|
||||
<script src="###INCLUDE_PATH###static/js/bootstrap.min.js"></script>
|
||||
</body>
|
||||
</html>
|
62
templates/link.html
Normal file
62
templates/link.html
Normal file
|
@ -0,0 +1,62 @@
|
|||
<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
|
||||
<link rel="stylesheet" href="###INCLUDE_PATH###static/css/lato.css">
|
||||
<link rel="stylesheet" href="###INCLUDE_PATH###static/css/bootstrap-superhero.min.css">
|
||||
<link rel="stylesheet" href="###INCLUDE_PATH###static/css/archive.css">
|
||||
<title>r/###SUB###: ###TITLE###</title>
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<nav class="navbar navbar-expand-sm navbar-dark bg-primary">
|
||||
<a class="navbar-brand" href="###URL_SUB###">r/###SUB###</a>
|
||||
<button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation">
|
||||
<span class="navbar-toggler-icon"></span>
|
||||
</button>
|
||||
<div class="collapse navbar-collapse" id="navbarNav">
|
||||
<ul class="navbar-nav">
|
||||
<li class="nav-item">
|
||||
<a class="nav-link" href="###URL_SUB###">score</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="nav-link" href="###URL_SUB_CMNT###">comments</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="nav-link" href="###URL_SUB_DATE###">date</a>
|
||||
</li>
|
||||
<li class="nav-item ###URL_SEARCH_CSS###">
|
||||
<a class="nav-link" href="###URL_SEARCH###">search</a>
|
||||
</li>
|
||||
<li class="nav-item dropdown">
|
||||
<a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">subreddits</a>
|
||||
<div class="dropdown-menu" aria-labelledby="navbarDropdown">
|
||||
<a class="dropdown-item" href="###URL_SUBS###">All</a>
|
||||
###HTML_SUBS_MENU###
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav>
|
||||
</header>
|
||||
<main role="main" class="container-fluid">
|
||||
<div class="submission pt-3" data-id="###ID###">
|
||||
<h3 class="title">###HTML_TITLE###</h3>
|
||||
<p><span class="badge badge-primary">###SCORE###</span> ###DATE### by ###HTML_AUTHOR_URL###</p>
|
||||
###HTML_SELFTEXT###
|
||||
</div>
|
||||
<div class="comments">
|
||||
<h5>###NUM_COMMENTS### comments</h5>
|
||||
###HTML_COMMENTS###
|
||||
</div>
|
||||
</main>
|
||||
<footer class="container-fluid">
|
||||
<a class="to-top mt-1 mb-1 btn btn-lg btn-primary" href="#top">top of page</a>
|
||||
<p class="small mb-0">data archived ###ARCHIVE_DATE###. <a href="###URL_PROJECT###">source code</a>.</p>
|
||||
</footer>
|
||||
<script src="###INCLUDE_PATH###static/js/jquery-3.3.1.slim.min.js"></script>
|
||||
<script src="###INCLUDE_PATH###static/js/bootstrap.min.js"></script>
|
||||
<script src="###INCLUDE_PATH###static/js/archive-comments.js"></script>
|
||||
</body>
|
||||
</html>
|
4
templates/partial_comment.html
Normal file
4
templates/partial_comment.html
Normal file
|
@ -0,0 +1,4 @@
|
|||
<div class="comment mb-3 ###CSS_CLASSES###" data-depth="###DEPTH###" data-id="###ID###">
|
||||
<p class="byline text-muted mb-0"><a href="javascript:;" class="score"><span class="badge ###CLASS_SCORE###">###SCORE###</span></a> ###HTML_AUTHOR_URL### ###DATE###</p>
|
||||
<div class="md">###BODY###</div>
|
||||
</div>
|
1
templates/partial_index_subreddit.html
Normal file
1
templates/partial_index_subreddit.html
Normal file
|
@ -0,0 +1 @@
|
|||
<h5><a class="subreddit" href="#URL_SUB#">#SUB#</a> <span class="badge badge-secondary">#NUM_LINKS#</span></h5>
|
4
templates/partial_link.html
Normal file
4
templates/partial_link.html
Normal file
|
@ -0,0 +1,4 @@
|
|||
<div class="link mt-3">
|
||||
<h5 class="title mb-0"><a href="###URL###">###TITLE###</a></h5>
|
||||
<a href="###URL_COMMENTS###"><span class="badge badge-secondary">###SCORE###</span></a> <small class="text-muted"><a href="###URL_COMMENTS###">###NUM_COMMENTS### comments</a> ###DATE### ###HTML_AUTHOR_URL### ###LINK_DOMAIN###</small>
|
||||
</div>
|
1
templates/partial_link_selftext.html
Normal file
1
templates/partial_link_selftext.html
Normal file
|
@ -0,0 +1 @@
|
|||
<div class="card bg-dark mb-3"><div class="card-body md">###SELFTEXT###</div></div>
|
1
templates/partial_menu_item.html
Normal file
1
templates/partial_menu_item.html
Normal file
|
@ -0,0 +1 @@
|
|||
<a class="dropdown-item" href="###URL_SUB###">###SUB###</a>
|
1
templates/partial_search_link.html
Normal file
1
templates/partial_search_link.html
Normal file
|
@ -0,0 +1 @@
|
|||
<a class="title mb-1" href="###URL###">###TITLE###</a>
|
1
templates/partial_subreddit_pager_link.html
Normal file
1
templates/partial_subreddit_pager_link.html
Normal file
|
@ -0,0 +1 @@
|
|||
<li class="page-item #CSS_CLASS#"><a class="page-link" href="#URL#">#TEXT#</a></li>
|
1
templates/partial_url.html
Normal file
1
templates/partial_url.html
Normal file
|
@ -0,0 +1 @@
|
|||
<a href="#HREF#">#INNER_HTML#</a>
|
1
templates/partial_user.html
Normal file
1
templates/partial_user.html
Normal file
|
@ -0,0 +1 @@
|
|||
<a class="author" href="###URL_AUTHOR###">###AUTHOR###</a>
|
4
templates/partial_user_link.html
Normal file
4
templates/partial_user_link.html
Normal file
|
@ -0,0 +1,4 @@
|
|||
<div class="link mt-3">
|
||||
<h5 class="title mb-0"><a href="###URL###">###TITLE###</a></h5>
|
||||
<a href="###URL_COMMENTS###"><span class="badge badge-secondary">###SCORE###</span></a> <small class="text-muted"><a href="###URL_COMMENTS###">###NUM_COMMENTS### comments</a> ###DATE### ###HTML_AUTHOR_URL### in <a href="###SUB_URL###">r/###SUB###</a></small>
|
||||
</div>
|
54
templates/search.html
Normal file
54
templates/search.html
Normal file
|
@ -0,0 +1,54 @@
|
|||
<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
|
||||
<link rel="stylesheet" href="###INCLUDE_PATH###static/css/lato.css">
|
||||
<link rel="stylesheet" href="###INCLUDE_PATH###static/css/bootstrap-superhero.min.css">
|
||||
<link rel="stylesheet" href="###INCLUDE_PATH###static/css/archive.css">
|
||||
<title>r/###SUB### ###TITLE###</title>
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<nav class="navbar navbar-expand-sm navbar-dark bg-primary">
|
||||
<a class="navbar-brand" href="###URL_IDX_SCORE###">r/###SUB###</a>
|
||||
<button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation">
|
||||
<span class="navbar-toggler-icon"></span>
|
||||
</button>
|
||||
<div class="collapse navbar-collapse" id="navbarNav">
|
||||
<ul class="navbar-nav">
|
||||
<li class="nav-item ###URL_IDX_SCORE_CSS###">
|
||||
<a class="nav-link" href="###URL_IDX_SCORE###">score</a>
|
||||
</li>
|
||||
<li class="nav-item ###URL_IDX_CMNT_CSS###">
|
||||
<a class="nav-link" href="###URL_IDX_CMNT###">comments</a>
|
||||
</li>
|
||||
<li class="nav-item ###URL_IDX_DATE_CSS###">
|
||||
<a class="nav-link" href="###URL_IDX_DATE###">date</a>
|
||||
</li>
|
||||
<li class="nav-item ###URL_SEARCH_CSS###">
|
||||
<a class="nav-link" href="###URL_SEARCH###">search</a>
|
||||
</li>
|
||||
<li class="nav-item dropdown">
|
||||
<a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">subreddits</a>
|
||||
<div class="dropdown-menu" aria-labelledby="navbarDropdown">
|
||||
<a class="dropdown-item" href="###URL_SUBS###">All</a>
|
||||
###HTML_SUBS_MENU###
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav>
|
||||
</header>
|
||||
<main role="main" class="container-fluid">
|
||||
<div class="links search mt-3">
|
||||
###HTML_LINKS###
|
||||
</div>
|
||||
</main>
|
||||
<footer class="container-fluid">
|
||||
<p class="small mb-0">r/###SUB### archive has ###ARCH_NUM_POSTS### posts and ###ARCH_NUM_COMMENTS### comments. <a href="###URL_PROJECT###">source code</a>.</p>
|
||||
</footer>
|
||||
<script src="###INCLUDE_PATH###static/js/jquery-3.3.1.slim.min.js"></script>
|
||||
<script src="###INCLUDE_PATH###static/js/bootstrap.min.js"></script>
|
||||
</body>
|
||||
</html>
|
60
templates/subreddit.html
Normal file
60
templates/subreddit.html
Normal file
|
@ -0,0 +1,60 @@
|
|||
<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
|
||||
<link rel="stylesheet" href="###INCLUDE_PATH###static/css/lato.css">
|
||||
<link rel="stylesheet" href="###INCLUDE_PATH###static/css/bootstrap-superhero.min.css">
|
||||
<link rel="stylesheet" href="###INCLUDE_PATH###static/css/archive.css">
|
||||
<title>r/###SUB### ###TITLE###</title>
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<nav class="navbar navbar-expand-sm navbar-dark bg-primary">
|
||||
<a class="navbar-brand" href="###URL_IDX_SCORE###">r/###SUB###</a>
|
||||
<button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation">
|
||||
<span class="navbar-toggler-icon"></span>
|
||||
</button>
|
||||
<div class="collapse navbar-collapse" id="navbarNav">
|
||||
<ul class="navbar-nav">
|
||||
<li class="nav-item ###URL_IDX_SCORE_CSS###">
|
||||
<a class="nav-link" href="###URL_IDX_SCORE###">score</a>
|
||||
</li>
|
||||
<li class="nav-item ###URL_IDX_CMNT_CSS###">
|
||||
<a class="nav-link" href="###URL_IDX_CMNT###">comments</a>
|
||||
</li>
|
||||
<li class="nav-item ###URL_IDX_DATE_CSS###">
|
||||
<a class="nav-link" href="###URL_IDX_DATE###">date</a>
|
||||
</li>
|
||||
<li class="nav-item ###URL_SEARCH_CSS###">
|
||||
<a class="nav-link" href="###URL_SEARCH###">search</a>
|
||||
</li>
|
||||
<li class="nav-item dropdown">
|
||||
<a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">subreddits</a>
|
||||
<div class="dropdown-menu" aria-labelledby="navbarDropdown">
|
||||
<a class="dropdown-item" href="###URL_SUBS###">All</a>
|
||||
###HTML_SUBS_MENU###
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav>
|
||||
</header>
|
||||
<main role="main" class="container-fluid">
|
||||
<ul class="pagination pagination-sm mt-3">
|
||||
###HTML_PAGER###
|
||||
</ul>
|
||||
<div class="links">
|
||||
###HTML_LINKS###
|
||||
</div>
|
||||
<ul class="pagination pagination-sm mt-3">
|
||||
###HTML_PAGER###
|
||||
</ul>
|
||||
</main>
|
||||
<footer class="container-fluid">
|
||||
<p class="small mb-0">r/###SUB### archive has ###ARCH_NUM_POSTS### posts and ###ARCH_NUM_COMMENTS### comments. <a href="###URL_PROJECT###">source code</a>.</p>
|
||||
</footer>
|
||||
<script src="###INCLUDE_PATH###static/js/jquery-3.3.1.slim.min.js"></script>
|
||||
<script src="###INCLUDE_PATH###static/js/bootstrap.min.js"></script>
|
||||
</body>
|
||||
</html>
|
42
templates/user.html
Normal file
42
templates/user.html
Normal file
|
@ -0,0 +1,42 @@
|
|||
<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
|
||||
<link rel="stylesheet" href="###INCLUDE_PATH###static/css/lato.css">
|
||||
<link rel="stylesheet" href="###INCLUDE_PATH###static/css/bootstrap-superhero.min.css">
|
||||
<link rel="stylesheet" href="###INCLUDE_PATH###static/css/archive.css">
|
||||
<title>###TITLE###</title>
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<nav class="navbar navbar-expand-sm navbar-dark bg-primary">
|
||||
<a class="navbar-brand" href="###URL_USER###">###TITLE###</a>
|
||||
<button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation">
|
||||
<span class="navbar-toggler-icon"></span>
|
||||
</button>
|
||||
<div class="collapse navbar-collapse" id="navbarNav">
|
||||
<ul class="navbar-nav">
|
||||
<li class="nav-item dropdown">
|
||||
<a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">subreddits</a>
|
||||
<div class="dropdown-menu" aria-labelledby="navbarDropdown">
|
||||
<a class="dropdown-item" href="###URL_SUBS###">All</a>
|
||||
###HTML_SUBS_MENU###
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav>
|
||||
</header>
|
||||
<main role="main" class="container-fluid">
|
||||
<div class="links">
|
||||
###HTML_LINKS###
|
||||
</div>
|
||||
</main>
|
||||
<footer class="container-fluid">
|
||||
<p class="small mb-0">archive has ###ARCH_NUM_POSTS### user posts. <a href="###URL_PROJECT###">source code</a>.</p>
|
||||
</footer>
|
||||
<script src="###INCLUDE_PATH###static/js/jquery-3.3.1.slim.min.js"></script>
|
||||
<script src="###INCLUDE_PATH###static/js/bootstrap.min.js"></script>
|
||||
</body>
|
||||
</html>
|
749
write_html.py
Executable file
749
write_html.py
Executable file
|
@ -0,0 +1,749 @@
|
|||
#! /usr/bin/env python
|
||||
from datetime import datetime, date, timedelta
|
||||
import argparse
|
||||
import csv
|
||||
import os
|
||||
import re
|
||||
import snudown
|
||||
|
||||
url_project = 'https://github.com/libertysoft3/reddit-html-archiver'
|
||||
links_per_page = 30
|
||||
pager_skip = 10
|
||||
pager_skip_long = 100
|
||||
start_date = date(2005, 1, 1)
|
||||
end_date = datetime.today().date() + timedelta(days=1)
|
||||
source_data_links = 'links.csv'
|
||||
max_comment_depth = 8 # mostly for mobile, which might be silly
|
||||
removed_content_identifiers = ['[deleted]','deleted','[removed]','removed']
|
||||
default_sort = 'score'
|
||||
sort_indexes = {
|
||||
'score': {
|
||||
'default': 1,
|
||||
'slug': 'score'
|
||||
},
|
||||
'num_comments': {
|
||||
'default': 0,
|
||||
'slug': 'comments',
|
||||
},
|
||||
'created_utc': {
|
||||
'default': 1000198000,
|
||||
'slug': 'date',
|
||||
}
|
||||
}
|
||||
|
||||
template_index = ''
|
||||
with open('templates/index.html', 'r') as file:
|
||||
template_index = file.read()
|
||||
|
||||
template_subreddit = ''
|
||||
with open('templates/subreddit.html', 'r') as file:
|
||||
template_subreddit = file.read()
|
||||
|
||||
template_link = ''
|
||||
with open('templates/link.html', 'r') as file:
|
||||
template_link = file.read()
|
||||
|
||||
template_comment = ''
|
||||
with open('templates/partial_comment.html', 'r') as file:
|
||||
template_comment = file.read()
|
||||
|
||||
template_search = ''
|
||||
with open('templates/search.html', 'r') as file:
|
||||
template_search = file.read()
|
||||
|
||||
template_user = ''
|
||||
with open('templates/user.html', 'r') as file:
|
||||
template_user = file.read()
|
||||
|
||||
template_sub_link = ''
|
||||
with open('templates/partial_menu_item.html', 'r') as file:
|
||||
template_sub_link = file.read()
|
||||
|
||||
template_user_url = ''
|
||||
with open('templates/partial_user.html', 'r') as file:
|
||||
template_user_url = file.read()
|
||||
|
||||
template_link_url = ''
|
||||
with open('templates/partial_link.html', 'r') as file:
|
||||
template_link_url = file.read()
|
||||
|
||||
template_search_link = ''
|
||||
with open('templates/partial_search_link.html', 'r') as file:
|
||||
template_search_link = file.read()
|
||||
|
||||
template_index_sub = ''
|
||||
with open('templates/partial_index_subreddit.html', 'r') as file:
|
||||
template_index_sub = file.read()
|
||||
|
||||
template_index_pager_link = ''
|
||||
with open('templates/partial_subreddit_pager_link.html', 'r') as file:
|
||||
template_index_pager_link = file.read()
|
||||
|
||||
template_selftext = ''
|
||||
with open('templates/partial_link_selftext.html', 'r') as file:
|
||||
template_selftext = file.read()
|
||||
|
||||
template_user_page_link = ''
|
||||
with open('templates/partial_user_link.html', 'r') as file:
|
||||
template_user_page_link = file.read()
|
||||
|
||||
teplate_url = ''
|
||||
with open('templates/partial_url.html', 'r') as file:
|
||||
template_url = file.read()
|
||||
|
||||
def generate_html(min_score=0, min_comments=0, hide_deleted_comments=False):
|
||||
delta = timedelta(days=1)
|
||||
subs = get_subs()
|
||||
stat_links = 0
|
||||
stat_filtered_links = 0
|
||||
user_index = {}
|
||||
processed_subs = []
|
||||
|
||||
for sub in subs:
|
||||
d = start_date
|
||||
sub_links = []
|
||||
stat_sub_links = 0
|
||||
stat_sub_filtered_links = 0
|
||||
stat_sub_comments = 0
|
||||
while d <= end_date:
|
||||
raw_links = load_links(d, sub)
|
||||
# print ('processing %s %s %s links' % (sub, d.strftime("%Y-%m-%d"), len(sub_links)))
|
||||
stat_links += len(raw_links)
|
||||
stat_sub_links += len(raw_links)
|
||||
for l in raw_links:
|
||||
if validate_link(l, min_score, min_comments):
|
||||
stat_filtered_links += 1
|
||||
stat_sub_filtered_links += 1
|
||||
stat_sub_comments += len('comments')
|
||||
sub_links.append(l)
|
||||
if l['author'] not in user_index.keys():
|
||||
user_index[l['author']] = []
|
||||
l['subreddit'] = sub
|
||||
user_index[l['author']].append(l)
|
||||
# TODO: return comments written
|
||||
write_link_page(subs, l, sub, hide_deleted_comments)
|
||||
d += delta
|
||||
if stat_sub_filtered_links > 0:
|
||||
processed_subs.append({'name': sub, 'num_links': stat_sub_filtered_links})
|
||||
write_subreddit_pages(sub, subs, sub_links, stat_sub_filtered_links, stat_sub_comments)
|
||||
write_subreddit_search_page(sub, subs, sub_links, stat_sub_filtered_links, stat_sub_comments)
|
||||
print('%s: %s links filtered to %s' % (sub, stat_sub_links, stat_sub_filtered_links))
|
||||
write_index(processed_subs)
|
||||
write_user_page(processed_subs, user_index)
|
||||
print('all done. %s links filtered to %s' % (stat_links, stat_filtered_links))
|
||||
|
||||
def write_subreddit_pages(subreddit, subs, link_index, stat_sub_filtered_links, stat_sub_comments):
|
||||
if len(link_index) == 0:
|
||||
return True
|
||||
|
||||
for sort in sort_indexes.keys():
|
||||
links = sorted(link_index, key=lambda k: (int(k[sort]) if k[sort] != '' else sort_indexes[sort]['default']), reverse=True)
|
||||
pages = list(chunks(links, links_per_page))
|
||||
page_num = 0
|
||||
|
||||
sort_based_prefix = '../'
|
||||
if sort == default_sort:
|
||||
sort_based_prefix = ''
|
||||
|
||||
# render subreddits list
|
||||
subs_menu_html = ''
|
||||
for sub in subs:
|
||||
sub_url = sort_based_prefix + '../' + sub + '/index.html'
|
||||
subs_menu_html += template_sub_link.replace('###URL_SUB###', sub_url).replace('###SUB###', sub)
|
||||
|
||||
for page in pages:
|
||||
page_num += 1
|
||||
# print('%s page' % (page))
|
||||
|
||||
links_html = ''
|
||||
for l in page:
|
||||
author_link_html = template_user_url
|
||||
author_url = sort_based_prefix + '../user/' + l['author'] + '.html'
|
||||
author_link_html = author_link_html.replace('###URL_AUTHOR###', author_url).replace('###AUTHOR###', l['author'])
|
||||
|
||||
link_url = l['url']
|
||||
link_comments_url = sort_based_prefix + l['permalink'].strip('/')
|
||||
link_comments_url = link_comments_url.replace('r/' + subreddit + '/', '')
|
||||
idpath = '/'.join(list(l['id']))
|
||||
link_comments_url = link_comments_url.replace(l['id'], idpath)
|
||||
link_comments_url += '.html'
|
||||
if l['is_self'] is True or l['is_self'] == 'True':
|
||||
link_url = link_comments_url
|
||||
|
||||
index_link_data_map = {
|
||||
'###TITLE###': l['title'],
|
||||
'###URL###': link_url,
|
||||
'###URL_COMMENTS###': link_comments_url,
|
||||
'###SCORE###': l['score'],
|
||||
'###NUM_COMMENTS###': l['num_comments'] if int(l['num_comments']) > 0 else 0,
|
||||
'###DATE###': datetime.utcfromtimestamp(int(l['created_utc'])).strftime('%Y-%m-%d'),
|
||||
'###LINK_DOMAIN###': '(self.' + l['subreddit'] + ')' if l['is_self'] is True or l['is_self'] == 'True' else '',
|
||||
'###HTML_AUTHOR_URL###': author_link_html,
|
||||
}
|
||||
link_html = template_link_url
|
||||
for key, value in index_link_data_map.items():
|
||||
link_html = link_html.replace(key, value)
|
||||
links_html += link_html + '\n'
|
||||
|
||||
index_page_data_map = {
|
||||
'###INCLUDE_PATH###': sort_based_prefix + '../',
|
||||
'###TITLE###': 'by ' + sort_indexes[sort]['slug'] + ' page ' + str(page_num) + ' of ' + str(len(pages)),
|
||||
'###SUB###': subreddit,
|
||||
'###ARCH_NUM_POSTS###': str(stat_sub_filtered_links),
|
||||
'###ARCH_NUM_COMMENTS###': str(stat_sub_comments),
|
||||
'###URL_SUBS###': sort_based_prefix + '../index.html',
|
||||
'###URL_PROJECT###': url_project,
|
||||
'###URL_IDX_SCORE###': sort_based_prefix + 'index.html',
|
||||
'###URL_IDX_CMNT###': sort_based_prefix + 'index-' + sort_indexes['num_comments']['slug'] + '/index.html',
|
||||
'###URL_IDX_DATE###': sort_based_prefix + 'index-' + sort_indexes['created_utc']['slug'] + '/index.html',
|
||||
'###URL_SEARCH###': sort_based_prefix + 'search.html',
|
||||
'###URL_IDX_SCORE_CSS###': 'active' if sort == 'score' else '',
|
||||
'###URL_IDX_CMNT_CSS###': 'active' if sort == 'num_comments' else '',
|
||||
'###URL_IDX_DATE_CSS###': 'active' if sort == 'created_utc' else '',
|
||||
'###URL_SEARCH_CSS###': '',
|
||||
'###HTML_LINKS###': links_html,
|
||||
'###HTML_SUBS_MENU###': subs_menu_html,
|
||||
'###HTML_PAGER###': get_pager_html(page_num, len(pages)),
|
||||
}
|
||||
page_html = template_subreddit
|
||||
for key, value in index_page_data_map.items():
|
||||
page_html = page_html.replace(key, value)
|
||||
|
||||
|
||||
# write file
|
||||
suffix = '-' + str(page_num) + '.html'
|
||||
if page_num == 1:
|
||||
suffix = '.html'
|
||||
filename = 'index' + suffix
|
||||
if sort == default_sort:
|
||||
filepath = 'r/' + subreddit + '/' + filename
|
||||
else:
|
||||
filepath = 'r/' + subreddit + '/index-' + sort_indexes[sort]['slug'] + '/' + filename
|
||||
if not os.path.isfile(filepath):
|
||||
os.makedirs(os.path.dirname(filepath), exist_ok=True)
|
||||
with open(filepath, 'w') as file:
|
||||
file.write(page_html)
|
||||
# print('wrote %s %s, %s links' % (sort, filepath, len(page)))
|
||||
|
||||
return True
|
||||
|
||||
def write_link_page(subreddits, link, subreddit='', hide_deleted_comments=False):
|
||||
# reddit: https://www.reddit.com/r/conspiracy/comments/8742iv/happening_now_classmate_former_friend_of/
|
||||
# archive: r/conspiracy/comments/8/7/4/2/i/v/happening_now_classmate_former_friend_of.html
|
||||
idpath = '/'.join(list(link['id']))
|
||||
filepath = link['permalink'].strip('/') + '.html'
|
||||
filepath = filepath.replace(link['id'], idpath)
|
||||
if os.path.isfile(filepath):
|
||||
return True
|
||||
|
||||
created = datetime.utcfromtimestamp(int(link['created_utc']))
|
||||
sorted_comments = []
|
||||
if len(link['comments']) > 0:
|
||||
sorted_comments = sort_comments(link['comments'], hide_deleted_comments)
|
||||
|
||||
# traverse up to root dir, depends on id length
|
||||
static_include_path = ''
|
||||
for i in range(len(link['id']) + 2):
|
||||
static_include_path += '../'
|
||||
|
||||
# render comments
|
||||
comments_html = ''
|
||||
for c in sorted_comments:
|
||||
css_classes = 'ml-' + (str(c['depth']) if int(c['depth']) <= max_comment_depth else str(max_comment_depth))
|
||||
if c['author'] == link['author'] and c['author'] not in removed_content_identifiers:
|
||||
css_classes += ' op'
|
||||
if c['stickied'].lower() == 'true' or c['stickied'] is True:
|
||||
css_classes += ' stickied'
|
||||
|
||||
# author link
|
||||
url = static_include_path + 'user/' + c['author'] + '.html'
|
||||
author_link_html = template_user_url.replace('###URL_AUTHOR###', url).replace('###AUTHOR###', c['author'])
|
||||
|
||||
comment_data_map = {
|
||||
'###ID###': c['id'],
|
||||
'###PARENT_ID###': c['parent_id'],
|
||||
'###DEPTH###': str(c['depth']),
|
||||
'###DATE###': created.strftime('%Y-%m-%d'),
|
||||
'###SCORE###': c['score'],
|
||||
'###BODY###': snudown.markdown(c['body'].replace('>','>')),
|
||||
'###CSS_CLASSES###': css_classes,
|
||||
'###CLASS_SCORE###': 'badge-danger' if len(c['score']) > 0 and int(c['score']) < 1 else 'badge-secondary',
|
||||
'###HTML_AUTHOR_URL###': author_link_html,
|
||||
}
|
||||
comment_html = template_comment
|
||||
for key, value in comment_data_map.items():
|
||||
comment_html = comment_html.replace(key, value)
|
||||
comments_html += comment_html + '\n'
|
||||
|
||||
# render subreddits list
|
||||
subs_menu_html = ''
|
||||
for sub in subreddits:
|
||||
sub_url = static_include_path + sub + '/index.html'
|
||||
subs_menu_html += template_sub_link.replace('###URL_SUB###', sub_url).replace('###SUB###', sub)
|
||||
|
||||
# render selftext
|
||||
selftext_html = ''
|
||||
if len(link['selftext']) > 0:
|
||||
selftext_html = template_selftext.replace('###SELFTEXT###', snudown.markdown(link['selftext'].replace('>','>')))
|
||||
|
||||
# author link
|
||||
url = static_include_path + 'user/' + link['author'] + '.html'
|
||||
author_link_html = template_user_url.replace('###URL_AUTHOR###', url).replace('###AUTHOR###', link['author'])
|
||||
|
||||
html_title = template_url.replace('#HREF#', link['url']).replace('#INNER_HTML#', link['title'])
|
||||
if link['is_self'] is True or link['is_self'].lower() == 'true':
|
||||
html_title = link['title']
|
||||
|
||||
# render link page
|
||||
link_data_map = {
|
||||
'###INCLUDE_PATH###': static_include_path,
|
||||
'###SUB###': subreddit,
|
||||
'###TITLE###': link['title'],
|
||||
'###ID###': link['id'],
|
||||
'###DATE###': created.strftime('%Y-%m-%d'),
|
||||
'###ARCHIVE_DATE###': datetime.utcfromtimestamp(int(link['retrieved_on'])).strftime('%Y-%m-%d') if link['retrieved_on'] != '' else 'n/a',
|
||||
'###SCORE###': link['score'],
|
||||
'###NUM_COMMENTS###': link['num_comments'],
|
||||
'###URL_PROJECT###': url_project,
|
||||
'###URL_SUBS###': static_include_path + 'index.html',
|
||||
'###URL_SUB###': static_include_path + subreddit + '/index.html',
|
||||
'###URL_SUB_CMNT###': static_include_path + subreddit + '/index-' + sort_indexes['num_comments']['slug'] + '/index.html',
|
||||
'###URL_SUB_DATE###': static_include_path + subreddit + '/index-' + sort_indexes['created_utc']['slug'] + '/index.html',
|
||||
'###URL_SEARCH###': static_include_path + subreddit + '/search.html',
|
||||
'###HTML_SUBS_MENU###': subs_menu_html,
|
||||
'###HTML_SELFTEXT###': selftext_html,
|
||||
'###HTML_COMMENTS###': comments_html,
|
||||
'###HTML_AUTHOR_URL###': author_link_html,
|
||||
'###HTML_TITLE###': html_title,
|
||||
}
|
||||
html = template_link
|
||||
for key, value in link_data_map.items():
|
||||
html = html.replace(key, value)
|
||||
|
||||
# write html
|
||||
# reddit: https://www.reddit.com/r/conspiracy/comments/8742iv/happening_now_classmate_former_friend_of/
|
||||
# archive: r/conspiracy/comments/8/7/4/2/i/v/happening_now_classmate_former_friend_of.html
|
||||
idpath = '/'.join(list(link['id']))
|
||||
filepath = link['permalink'].strip('/') + '.html'
|
||||
filepath = filepath.replace(link['id'], idpath)
|
||||
if not os.path.isfile(filepath):
|
||||
os.makedirs(os.path.dirname(filepath), exist_ok=True)
|
||||
with open(filepath, 'w') as file:
|
||||
file.write(html)
|
||||
# print('wrote %s %s' % (created.strftime('%Y-%m-%d'), filepath))
|
||||
|
||||
return True
|
||||
|
||||
def write_subreddit_search_page(subreddit, subs, link_index, stat_sub_filtered_links, stat_sub_comments):
|
||||
if len(link_index) == 0:
|
||||
return True
|
||||
|
||||
# name sort?
|
||||
links = sorted(link_index, key=lambda k: re.sub(r'\W+', '', k['title']).lower())
|
||||
|
||||
# render subreddits list
|
||||
subs_menu_html = ''
|
||||
for sub in subs:
|
||||
sub_url = '../' + sub + '/index.html'
|
||||
subs_menu_html += template_sub_link.replace('###URL_SUB###', sub_url).replace('###SUB###', sub)
|
||||
|
||||
links_html = ''
|
||||
for l in links:
|
||||
link_comments_url = l['permalink'].strip('/').replace('r/' + subreddit + '/', '')
|
||||
idpath = '/'.join(list(l['id']))
|
||||
link_comments_url = link_comments_url.replace(l['id'], idpath)
|
||||
link_comments_url += '.html'
|
||||
index_link_data_map = {
|
||||
'###TITLE###': l['title'],
|
||||
'###URL###': link_comments_url,
|
||||
}
|
||||
link_html = template_search_link
|
||||
for key, value in index_link_data_map.items():
|
||||
link_html = link_html.replace(key, value)
|
||||
links_html += link_html + '\n'
|
||||
|
||||
index_page_data_map = {
|
||||
'###INCLUDE_PATH###': '../',
|
||||
'###TITLE###': 'search',
|
||||
'###SUB###': subreddit,
|
||||
'###ARCH_NUM_POSTS###': str(stat_sub_filtered_links),
|
||||
'###ARCH_NUM_COMMENTS###': str(stat_sub_comments),
|
||||
'###URL_SUBS###': '../index.html',
|
||||
'###URL_PROJECT###': url_project,
|
||||
'###URL_IDX_SCORE###': 'index.html',
|
||||
'###URL_IDX_CMNT###': 'index-' + sort_indexes['num_comments']['slug'] + '/index.html',
|
||||
'###URL_IDX_DATE###': 'index-' + sort_indexes['created_utc']['slug'] + '/index.html',
|
||||
'###URL_SEARCH###': 'search.html',
|
||||
'###URL_IDX_SCORE_CSS###': '',
|
||||
'###URL_IDX_CMNT_CSS###': '',
|
||||
'###URL_IDX_DATE_CSS###': '',
|
||||
'###URL_SEARCH_CSS###': 'active',
|
||||
'###HTML_LINKS###': links_html,
|
||||
'###HTML_SUBS_MENU###': subs_menu_html,
|
||||
}
|
||||
page_html = template_search
|
||||
for key, value in index_page_data_map.items():
|
||||
page_html = page_html.replace(key, value)
|
||||
|
||||
# write file
|
||||
filename = 'search.html'
|
||||
filepath = 'r/' + subreddit + '/' + filename
|
||||
if not os.path.isfile(filepath):
|
||||
os.makedirs(os.path.dirname(filepath), exist_ok=True)
|
||||
with open(filepath, 'w') as file:
|
||||
file.write(page_html)
|
||||
# print('wrote %s, %s links' % (filepath, len(links)))
|
||||
return True
|
||||
|
||||
def write_user_page(subs, user_index):
|
||||
if len(user_index.keys()) == 0:
|
||||
return False
|
||||
|
||||
# subreddits list
|
||||
subs_menu_html = ''
|
||||
for sub in subs:
|
||||
sub_url = '../' + sub['name'] + '/index.html'
|
||||
subs_menu_html += template_sub_link.replace('###URL_SUB###', sub_url).replace('###SUB###', sub['name'])
|
||||
|
||||
for user in user_index.keys():
|
||||
links = user_index[user]
|
||||
links.sort(key=lambda k: (int(k['score']) if k['score'] != '' else sort_indexes['score']['default']), reverse=True)
|
||||
|
||||
links_html = ''
|
||||
for l in links:
|
||||
|
||||
author_link_html = template_user_url
|
||||
author_url = l['author'] + '.html'
|
||||
author_link_html = author_link_html.replace('###URL_AUTHOR###', author_url).replace('###AUTHOR###', l['author'])
|
||||
|
||||
link_comments_url = '../' + l['permalink'].strip('/').strip('r/')
|
||||
idpath = '/'.join(list(l['id']))
|
||||
link_comments_url = link_comments_url.replace(l['id'], idpath)
|
||||
link_comments_url += '.html'
|
||||
link_url = l['url']
|
||||
if l['is_self'] is True or l['is_self'] == 'True':
|
||||
link_url = link_comments_url
|
||||
|
||||
link_data_map = {
|
||||
'###TITLE###': l['title'],
|
||||
'###URL###': link_url,
|
||||
'###URL_COMMENTS###': link_comments_url,
|
||||
'###SCORE###': l['score'],
|
||||
'###NUM_COMMENTS###': l['num_comments'] if int(l['num_comments']) > 0 else 0,
|
||||
'###DATE###': datetime.utcfromtimestamp(int(l['created_utc'])).strftime('%Y-%m-%d'),
|
||||
'###SUB###': l['subreddit'],
|
||||
'###SUB_URL###': '../' + l['subreddit'] + '/index.html',
|
||||
'###HTML_AUTHOR_URL###': author_link_html,
|
||||
}
|
||||
link_html = template_user_page_link
|
||||
for key, value in link_data_map.items():
|
||||
link_html = link_html.replace(key, value)
|
||||
links_html += link_html + '\n'
|
||||
|
||||
page_data_map = {
|
||||
'###INCLUDE_PATH###': '../',
|
||||
'###TITLE###': 'user/' + user,
|
||||
'###ARCH_NUM_POSTS###': str(len(links)),
|
||||
'###URL_USER###': user + '.html',
|
||||
'###URL_SUBS###': '../index.html',
|
||||
'###URL_PROJECT###': url_project,
|
||||
'###HTML_LINKS###': links_html,
|
||||
'###HTML_SUBS_MENU###': subs_menu_html,
|
||||
}
|
||||
page_html = template_user
|
||||
for key, value in page_data_map.items():
|
||||
page_html = page_html.replace(key, value)
|
||||
|
||||
filepath = 'r/user/' + user + '.html'
|
||||
if not os.path.isfile(filepath):
|
||||
os.makedirs(os.path.dirname(filepath), exist_ok=True)
|
||||
with open(filepath, 'w') as file:
|
||||
file.write(page_html)
|
||||
# print('wrote %s' % (filepath))
|
||||
|
||||
return True
|
||||
|
||||
def write_index(subs):
|
||||
if len(subs) == 0:
|
||||
return False
|
||||
subs.sort(key=lambda k: k['name'].casefold())
|
||||
|
||||
stat_num_links = 0
|
||||
links_html = ''
|
||||
subs_menu_html = ''
|
||||
for sub in subs:
|
||||
sub_url = sub['name'] + '/index.html'
|
||||
links_html += template_index_sub.replace('#URL_SUB#', sub_url).replace('#SUB#', sub['name']).replace('#NUM_LINKS#', str(sub['num_links']))
|
||||
subs_menu_html += template_sub_link.replace('###URL_SUB###', sub_url).replace('###SUB###', sub['name'])
|
||||
stat_num_links += sub['num_links']
|
||||
|
||||
index_page_data_map = {
|
||||
'###INCLUDE_PATH###': '',
|
||||
'###TITLE###': 'subreddits',
|
||||
'###URL_SUBS###': 'index.html',
|
||||
'###URL_PROJECT###': url_project,
|
||||
'###ARCH_NUM_POSTS###': str(stat_num_links),
|
||||
'###HTML_LINKS###': links_html,
|
||||
'###HTML_SUBS_MENU###': subs_menu_html,
|
||||
}
|
||||
page_html = template_index
|
||||
for key, value in index_page_data_map.items():
|
||||
page_html = page_html.replace(key, value)
|
||||
|
||||
filepath = 'r/index.html'
|
||||
if not os.path.isfile(filepath):
|
||||
os.makedirs(os.path.dirname(filepath), exist_ok=True)
|
||||
with open(filepath, 'w') as file:
|
||||
file.write(page_html)
|
||||
# print('wrote %s' % (filepath))
|
||||
|
||||
return True
|
||||
|
||||
# a 'top' comments sort with orphaned comments (incomplete data) rendered last
|
||||
# only remove deleted comments if no children
|
||||
#
|
||||
def sort_comments(comments, hide_deleted_comments=False):
|
||||
sorted_comments = []
|
||||
if len(comments) == 0:
|
||||
return sorted_comments
|
||||
parent_map = {}
|
||||
id_map = {}
|
||||
top_level_comments = []
|
||||
link_id = comments[0]['link_id']
|
||||
depth = 0
|
||||
|
||||
for c in comments:
|
||||
c['depth'] = depth
|
||||
id_map[c['id']] = c
|
||||
parent_map[c['id']] = c['parent_id']
|
||||
# add stickied comments
|
||||
if c['stickied'].lower() == 'true':
|
||||
sorted_comments.append(c)
|
||||
# store top level comments
|
||||
elif c['parent_id'] == c['link_id']:
|
||||
top_level_comments.append(c)
|
||||
|
||||
# sort non stickied top level comments
|
||||
if len(top_level_comments) > 0:
|
||||
top_level_comments = sorted(top_level_comments, key=lambda k: (int(k['score']) if k['score'] != '' else 1), reverse=True)
|
||||
sorted_comments += top_level_comments
|
||||
|
||||
# add each top level comment's child comments
|
||||
sorted_linear_comments = []
|
||||
for c in sorted_comments:
|
||||
if hide_deleted_comments and c['body'] in removed_content_identifiers and 't1_' + c['id'] not in parent_map.values():
|
||||
pass
|
||||
else:
|
||||
sorted_linear_comments.append(c)
|
||||
child_comments = get_comment_tree_list([], depth + 1, c, id_map, parent_map, hide_deleted_comments)
|
||||
if len(child_comments) > 0:
|
||||
sorted_linear_comments += child_comments
|
||||
|
||||
# add orphaned comments
|
||||
for c in comments:
|
||||
if c['parent_id'] != link_id and c['parent_id'].strip('t1_') not in id_map.keys():
|
||||
if hide_deleted_comments and c['body'] in removed_content_identifiers:
|
||||
continue
|
||||
sorted_linear_comments.append(c)
|
||||
|
||||
# print('sort_comments() in %s out %s show deleted: %s' % (len(comments), len(sorted_comments), hide_deleted_comments))
|
||||
return sorted_linear_comments
|
||||
|
||||
def get_comment_tree_list(tree, depth, parent_comment, id_map, parent_map, hide_deleted_comments):
|
||||
parent_id = 't1_' + parent_comment['id']
|
||||
child_comments = []
|
||||
for key, value in parent_map.items():
|
||||
if value == parent_id:
|
||||
if hide_deleted_comments and id_map[key]['body'] in removed_content_identifiers and 't1_' + key not in parent_map.values():
|
||||
pass
|
||||
else:
|
||||
child_comments.append(id_map[key])
|
||||
|
||||
# sort children by score
|
||||
# TODO: sort by score and # of child comments
|
||||
if len(child_comments) > 0:
|
||||
child_comments = sorted(child_comments, key=lambda k: (int(k['score']) if k['score'] != '' else 1), reverse=True)
|
||||
for child_comment in child_comments:
|
||||
child_comment['depth'] = depth
|
||||
tree.append(child_comment)
|
||||
tree = get_comment_tree_list(tree, depth + 1, child_comment, id_map, parent_map, hide_deleted_comments)
|
||||
return tree
|
||||
|
||||
def validate_link(link, min_score=0, min_comments=0):
|
||||
if not link:
|
||||
return False
|
||||
elif not 'id' in link.keys():
|
||||
return False
|
||||
# apply multiple conditions as an OR, keep high score low comments and high comment low score links/posts
|
||||
if min_score > 0 and min_comments > 0:
|
||||
if int(link['score']) < min_score and int(link['num_comments']) < min_comments:
|
||||
return False
|
||||
else:
|
||||
if min_score > 0 and int(link['score']) < min_score:
|
||||
return False
|
||||
if min_comments > 0 and int(link['num_comments']) < min_comments:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def load_links(date, subreddit):
|
||||
links = []
|
||||
if not date or not subreddit:
|
||||
return links
|
||||
|
||||
date_path = date.strftime("%Y/%m/%d")
|
||||
daily_path = 'data/' + subreddit + '/' + date_path
|
||||
daily_links_path = daily_path + '/' + source_data_links
|
||||
if os.path.isfile(daily_links_path):
|
||||
links = []
|
||||
with open(daily_links_path, 'r') as links_file:
|
||||
reader = csv.DictReader(links_file)
|
||||
for link_row in reader:
|
||||
comments = []
|
||||
comments_file_path = daily_path + '/' + link_row['id'] + '.csv'
|
||||
if os.path.isfile(comments_file_path):
|
||||
with open(comments_file_path, 'r') as comments_file:
|
||||
reader = csv.DictReader(comments_file)
|
||||
for comment_row in reader:
|
||||
comments.append(comment_row)
|
||||
link_row['comments'] = comments
|
||||
links.append(link_row)
|
||||
return links
|
||||
|
||||
def get_subs():
|
||||
subs = []
|
||||
if not os.path.isdir('data'):
|
||||
print('ERROR: no data, run fetch_links.py first')
|
||||
return subs
|
||||
return [d.name for d in os.scandir('data') if d.is_dir()]
|
||||
|
||||
def get_pager_html(page_num=1, pages=1):
|
||||
html_pager = ''
|
||||
|
||||
# previous
|
||||
css = ''
|
||||
if page_num == 1:
|
||||
css = 'disabled'
|
||||
url = 'index'
|
||||
if page_num - 1 > 1:
|
||||
url += '-' + str(page_num - 1)
|
||||
url += '.html'
|
||||
html_pager += template_index_pager_link.replace('#URL#', url).replace('#TEXT#', '‹').replace('#CSS_CLASS#', css)
|
||||
|
||||
# skip back
|
||||
css = ''
|
||||
prev_skip = page_num - pager_skip
|
||||
if prev_skip < 1:
|
||||
prev_skip = 1
|
||||
if page_num == 1:
|
||||
css = 'disabled'
|
||||
url = 'index'
|
||||
if prev_skip > 1:
|
||||
url += '-' + str(prev_skip)
|
||||
url += '.html'
|
||||
html_pager += template_index_pager_link.replace('#URL#', url).replace('#TEXT#', '‹‹').replace('#CSS_CLASS#', css)
|
||||
|
||||
# skip back far
|
||||
css = ''
|
||||
prev_skip = page_num - pager_skip_long
|
||||
if prev_skip < 1:
|
||||
prev_skip = 1
|
||||
if page_num == 1:
|
||||
css += ' disabled'
|
||||
url = 'index'
|
||||
if prev_skip > 1:
|
||||
url += '-' + str(prev_skip)
|
||||
url += '.html'
|
||||
html_pager += template_index_pager_link.replace('#URL#', url).replace('#TEXT#', '‹‹‹').replace('#CSS_CLASS#', css)
|
||||
|
||||
# n-1
|
||||
start = -2
|
||||
if page_num + 1 > pages:
|
||||
start -= 1
|
||||
if page_num + 2 > pages:
|
||||
start -= 1
|
||||
for prev_page_num in range(start,0):
|
||||
if page_num + prev_page_num > 0:
|
||||
css = ''
|
||||
url = 'index'
|
||||
if page_num + prev_page_num > 1:
|
||||
url += '-' + str(page_num + prev_page_num)
|
||||
url += '.html'
|
||||
if prev_page_num < -1:
|
||||
css = 'd-none d-sm-block'
|
||||
html_pager += template_index_pager_link.replace('#URL#', url).replace('#TEXT#', str(page_num + prev_page_num)).replace('#CSS_CLASS#', css)
|
||||
# n
|
||||
url = 'index'
|
||||
if page_num > 1:
|
||||
url += '-' + str(page_num)
|
||||
url += '.html'
|
||||
html_pager += template_index_pager_link.replace('#URL#', url).replace('#TEXT#', str(page_num)).replace('#CSS_CLASS#', 'active')
|
||||
# n + 1
|
||||
css = ''
|
||||
end = 3
|
||||
if page_num -1 < 1:
|
||||
end += 1
|
||||
if page_num - 2 < 1:
|
||||
end += 1
|
||||
for next_page_num in range(1,end):
|
||||
if page_num + next_page_num <= pages:
|
||||
if next_page_num > 1:
|
||||
css = 'd-none d-sm-block'
|
||||
html_pager += template_index_pager_link.replace('#URL#', 'index' + '-' + str(page_num + next_page_num) + '.html').replace('#TEXT#', str(page_num + next_page_num)).replace('#CSS_CLASS#', css)
|
||||
|
||||
# skip forward far
|
||||
next_skip = page_num + pager_skip_long
|
||||
css = ''
|
||||
if page_num == pages:
|
||||
css += ' disabled'
|
||||
if next_skip > pages:
|
||||
next_skip = pages
|
||||
url = 'index'
|
||||
if next_skip > 1:
|
||||
url += '-' + str(next_skip)
|
||||
url += '.html'
|
||||
html_pager += template_index_pager_link.replace('#URL#', url).replace('#TEXT#', '›››').replace('#CSS_CLASS#', css)
|
||||
|
||||
# skip forward
|
||||
next_skip = page_num + pager_skip
|
||||
css = ''
|
||||
if page_num == pages:
|
||||
css = 'disabled'
|
||||
if next_skip > pages:
|
||||
next_skip = pages
|
||||
url = 'index'
|
||||
if next_skip > 1:
|
||||
url += '-' + str(next_skip)
|
||||
url += '.html'
|
||||
html_pager += template_index_pager_link.replace('#URL#', url).replace('#TEXT#', '››').replace('#CSS_CLASS#', css)
|
||||
|
||||
# next
|
||||
css = ''
|
||||
next_num = page_num + 1
|
||||
if page_num == pages:
|
||||
css = 'disabled'
|
||||
next_num = pages
|
||||
html_pager += template_index_pager_link.replace('#URL#', 'index' + '-' + str(next_num) + '.html').replace('#TEXT#', '›').replace('#CSS_CLASS#', css)
|
||||
|
||||
return html_pager
|
||||
|
||||
def chunks(l, n):
|
||||
"""Yield successive n-sized chunks from l."""
|
||||
for i in range(0, len(l), n):
|
||||
yield l[i:i + n]
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser=argparse.ArgumentParser()
|
||||
parser.add_argument('--min-score', default=0, help='limit post rendering, default 0')
|
||||
parser.add_argument('--min-comments', default=0, help='limit post rendering, default 0')
|
||||
parser.add_argument('--hide-deleted-comments', action='store_true', help='exclude deleted and removed comments where possible')
|
||||
args=parser.parse_args()
|
||||
|
||||
hide_deleted_comments = False
|
||||
if args.hide_deleted_comments:
|
||||
hide_deleted_comments = True
|
||||
|
||||
args.min_score = int(args.min_score)
|
||||
args.min_comments = int(args.min_comments)
|
||||
|
||||
generate_html(args.min_score, args.min_comments, hide_deleted_comments)
|
Loading…
Add table
Add a link
Reference in a new issue