comments without scores show as 'n/a'

This commit is contained in:
libertysoft3 2018-10-28 22:22:20 -07:00
parent d9d000055b
commit 67a7356ede
2 changed files with 20 additions and 16 deletions

View file

@ -16,12 +16,12 @@ requires python 3
cd reddit-html-archiver cd reddit-html-archiver
chmod u+x *.py chmod u+x *.py
### fetch reddit data from pushshift ### fetch reddit data
data is fetched by subreddit and date range. data is fetched by subreddit and date range and is stored as csv files in `data`.
./fetch_links.py politics 2017-1-1 2017-2-1 ./fetch_links.py politics 2017-1-1 2017-2-1
# or add some link/post request parameters # or add some link/post request filters
./fetch_links.py --self_only --score "> 2000" politics 2015-1-1 2016-1-1 ./fetch_links.py --self_only --score "> 2000" politics 2015-1-1 2016-1-1
./fetch_links.py -h ./fetch_links.py -h
@ -29,19 +29,20 @@ you may need decrease your date range or adjust `pushshift_rate_limit_per_minute
### write web pages ### write web pages
write html files for all subreddits. write html files for all subreddits to `r`.
./write_html.py ./write_html.py
# or add some output filtering # or add some output filtering
./write_html.py --min-score 100 --min-comments 100 --hide-deleted-comments ./write_html.py --min-score 100 --min-comments 100 --hide-deleted-comments
./write_html.py -h ./write_html.py -h
your html archive has been written to `r`. once you are satisfied with your archive feel free to copy/move the contents of `r` to elsewhere and to delete the git repos you have created. everything in `r` is fully self contained.
if you add more data later, delete everything in `r` aside from `r/static` and re-run the script to refresh your archive's pages. to update an html archive, delete everything in `r` aside from `r/static` and re-run `write_html.py` to regenerate everything.
### hosting the archived pages ### hosting the archived pages
copy the contents of the `r` directory to a web root or appropriately served git repo. or serve it directly. copy the contents of the `r` directory to a web root or appropriately served git repo.
### potential improvements ### potential improvements
@ -49,7 +50,9 @@ copy the contents of the `r` directory to a web root or appropriately served git
* num_comments filtering * num_comments filtering
* thumbnails or thumbnail urls * thumbnails or thumbnail urls
* media posts * media posts
* update scores from the reddit api with [praw](https://github.com/praw-dev/praw) * score update
* scores from reddit with [praw](https://github.com/praw-dev/praw)
* view on reddit.com
* real templating * real templating
* filter output per sub, individual min score and comments filters * filter output per sub, individual min score and comments filters
* js markdown url previews * js markdown url previews
@ -60,7 +63,7 @@ copy the contents of the `r` directory to a web root or appropriately served git
### see also ### see also
* [pushshift](https://github.com/pushshift/api) [subreddit](https://www.reddit.com/r/pushshift/) * [pushshift](https://github.com/pushshift/api), [r/pushshift](https://www.reddit.com/r/pushshift/)
* [psaw](https://github.com/dmarx/psaw) * [psaw](https://github.com/dmarx/psaw)
* [snudown](https://github.com/reddit/snudown) * [snudown](https://github.com/reddit/snudown)
* [redditsearch.io](https://redditsearch.io/) * [redditsearch.io](https://redditsearch.io/)

View file

@ -30,6 +30,7 @@ sort_indexes = {
'slug': 'date', 'slug': 'date',
} }
} }
missing_comment_score_label = 'n/a'
template_index = '' template_index = ''
with open('templates/index.html', 'r') as file: with open('templates/index.html', 'r') as file:
@ -174,8 +175,8 @@ def write_subreddit_pages(subreddit, subs, link_index, stat_sub_filtered_links,
'###TITLE###': l['title'], '###TITLE###': l['title'],
'###URL###': link_url, '###URL###': link_url,
'###URL_COMMENTS###': link_comments_url, '###URL_COMMENTS###': link_comments_url,
'###SCORE###': l['score'], '###SCORE###': str(l['score']),
'###NUM_COMMENTS###': l['num_comments'] if int(l['num_comments']) > 0 else 0, '###NUM_COMMENTS###': l['num_comments'] if int(l['num_comments']) > 0 else str(0),
'###DATE###': datetime.utcfromtimestamp(int(l['created_utc'])).strftime('%Y-%m-%d'), '###DATE###': datetime.utcfromtimestamp(int(l['created_utc'])).strftime('%Y-%m-%d'),
'###LINK_DOMAIN###': '(self.' + l['subreddit'] + ')' if l['is_self'] is True or l['is_self'] == 'True' else '', '###LINK_DOMAIN###': '(self.' + l['subreddit'] + ')' if l['is_self'] is True or l['is_self'] == 'True' else '',
'###HTML_AUTHOR_URL###': author_link_html, '###HTML_AUTHOR_URL###': author_link_html,
@ -264,7 +265,7 @@ def write_link_page(subreddits, link, subreddit='', hide_deleted_comments=False)
'###PARENT_ID###': c['parent_id'], '###PARENT_ID###': c['parent_id'],
'###DEPTH###': str(c['depth']), '###DEPTH###': str(c['depth']),
'###DATE###': created.strftime('%Y-%m-%d'), '###DATE###': created.strftime('%Y-%m-%d'),
'###SCORE###': c['score'], '###SCORE###': str(c['score']) if len(str(c['score'])) > 0 else missing_comment_score_label,
'###BODY###': snudown.markdown(c['body'].replace('>','>')), '###BODY###': snudown.markdown(c['body'].replace('>','>')),
'###CSS_CLASSES###': css_classes, '###CSS_CLASSES###': css_classes,
'###CLASS_SCORE###': 'badge-danger' if len(c['score']) > 0 and int(c['score']) < 1 else 'badge-secondary', '###CLASS_SCORE###': 'badge-danger' if len(c['score']) > 0 and int(c['score']) < 1 else 'badge-secondary',
@ -302,8 +303,8 @@ def write_link_page(subreddits, link, subreddit='', hide_deleted_comments=False)
'###ID###': link['id'], '###ID###': link['id'],
'###DATE###': created.strftime('%Y-%m-%d'), '###DATE###': created.strftime('%Y-%m-%d'),
'###ARCHIVE_DATE###': datetime.utcfromtimestamp(int(link['retrieved_on'])).strftime('%Y-%m-%d') if link['retrieved_on'] != '' else 'n/a', '###ARCHIVE_DATE###': datetime.utcfromtimestamp(int(link['retrieved_on'])).strftime('%Y-%m-%d') if link['retrieved_on'] != '' else 'n/a',
'###SCORE###': link['score'], '###SCORE###': str(link['score']),
'###NUM_COMMENTS###': link['num_comments'], '###NUM_COMMENTS###': str(link['num_comments']),
'###URL_PROJECT###': url_project, '###URL_PROJECT###': url_project,
'###URL_SUBS###': static_include_path + 'index.html', '###URL_SUBS###': static_include_path + 'index.html',
'###URL_SUB###': static_include_path + subreddit + '/index.html', '###URL_SUB###': static_include_path + subreddit + '/index.html',
@ -428,8 +429,8 @@ def write_user_page(subs, user_index):
'###TITLE###': l['title'], '###TITLE###': l['title'],
'###URL###': link_url, '###URL###': link_url,
'###URL_COMMENTS###': link_comments_url, '###URL_COMMENTS###': link_comments_url,
'###SCORE###': l['score'], '###SCORE###': str(l['score']),
'###NUM_COMMENTS###': l['num_comments'] if int(l['num_comments']) > 0 else 0, '###NUM_COMMENTS###': str(l['num_comments']) if int(l['num_comments']) > 0 else str(0),
'###DATE###': datetime.utcfromtimestamp(int(l['created_utc'])).strftime('%Y-%m-%d'), '###DATE###': datetime.utcfromtimestamp(int(l['created_utc'])).strftime('%Y-%m-%d'),
'###SUB###': l['subreddit'], '###SUB###': l['subreddit'],
'###SUB_URL###': '../' + l['subreddit'] + '/index.html', '###SUB_URL###': '../' + l['subreddit'] + '/index.html',