mirror of
https://github.com/sys-nyx/red-arch.git
synced 2025-05-06 08:45:31 -04:00
comments without scores show as 'n/a'
This commit is contained in:
parent
d9d000055b
commit
67a7356ede
2 changed files with 20 additions and 16 deletions
21
README.md
21
README.md
|
@ -16,12 +16,12 @@ requires python 3
|
||||||
cd reddit-html-archiver
|
cd reddit-html-archiver
|
||||||
chmod u+x *.py
|
chmod u+x *.py
|
||||||
|
|
||||||
### fetch reddit data from pushshift
|
### fetch reddit data
|
||||||
|
|
||||||
data is fetched by subreddit and date range.
|
data is fetched by subreddit and date range and is stored as csv files in `data`.
|
||||||
|
|
||||||
./fetch_links.py politics 2017-1-1 2017-2-1
|
./fetch_links.py politics 2017-1-1 2017-2-1
|
||||||
# or add some link/post request parameters
|
# or add some link/post request filters
|
||||||
./fetch_links.py --self_only --score "> 2000" politics 2015-1-1 2016-1-1
|
./fetch_links.py --self_only --score "> 2000" politics 2015-1-1 2016-1-1
|
||||||
./fetch_links.py -h
|
./fetch_links.py -h
|
||||||
|
|
||||||
|
@ -29,19 +29,20 @@ you may need decrease your date range or adjust `pushshift_rate_limit_per_minute
|
||||||
|
|
||||||
### write web pages
|
### write web pages
|
||||||
|
|
||||||
write html files for all subreddits.
|
write html files for all subreddits to `r`.
|
||||||
|
|
||||||
./write_html.py
|
./write_html.py
|
||||||
# or add some output filtering
|
# or add some output filtering
|
||||||
./write_html.py --min-score 100 --min-comments 100 --hide-deleted-comments
|
./write_html.py --min-score 100 --min-comments 100 --hide-deleted-comments
|
||||||
./write_html.py -h
|
./write_html.py -h
|
||||||
|
|
||||||
|
|
||||||
if you add more data later, delete everything in `r` aside from `r/static` and re-run the script to refresh your archive's pages.
|
your html archive has been written to `r`. once you are satisfied with your archive feel free to copy/move the contents of `r` to elsewhere and to delete the git repos you have created. everything in `r` is fully self contained.
|
||||||
|
|
||||||
|
to update an html archive, delete everything in `r` aside from `r/static` and re-run `write_html.py` to regenerate everything.
|
||||||
|
|
||||||
### hosting the archived pages
|
### hosting the archived pages
|
||||||
|
|
||||||
copy the contents of the `r` directory to a web root or appropriately served git repo. or serve it directly.
|
copy the contents of the `r` directory to a web root or appropriately served git repo.
|
||||||
|
|
||||||
### potential improvements
|
### potential improvements
|
||||||
|
|
||||||
|
@ -49,7 +50,9 @@ copy the contents of the `r` directory to a web root or appropriately served git
|
||||||
* num_comments filtering
|
* num_comments filtering
|
||||||
* thumbnails or thumbnail urls
|
* thumbnails or thumbnail urls
|
||||||
* media posts
|
* media posts
|
||||||
* update scores from the reddit api with [praw](https://github.com/praw-dev/praw)
|
* score update
|
||||||
|
* scores from reddit with [praw](https://github.com/praw-dev/praw)
|
||||||
|
* view on reddit.com
|
||||||
* real templating
|
* real templating
|
||||||
* filter output per sub, individual min score and comments filters
|
* filter output per sub, individual min score and comments filters
|
||||||
* js markdown url previews
|
* js markdown url previews
|
||||||
|
@ -60,7 +63,7 @@ copy the contents of the `r` directory to a web root or appropriately served git
|
||||||
|
|
||||||
### see also
|
### see also
|
||||||
|
|
||||||
* [pushshift](https://github.com/pushshift/api) [subreddit](https://www.reddit.com/r/pushshift/)
|
* [pushshift](https://github.com/pushshift/api), [r/pushshift](https://www.reddit.com/r/pushshift/)
|
||||||
* [psaw](https://github.com/dmarx/psaw)
|
* [psaw](https://github.com/dmarx/psaw)
|
||||||
* [snudown](https://github.com/reddit/snudown)
|
* [snudown](https://github.com/reddit/snudown)
|
||||||
* [redditsearch.io](https://redditsearch.io/)
|
* [redditsearch.io](https://redditsearch.io/)
|
||||||
|
|
|
@ -30,6 +30,7 @@ sort_indexes = {
|
||||||
'slug': 'date',
|
'slug': 'date',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
missing_comment_score_label = 'n/a'
|
||||||
|
|
||||||
template_index = ''
|
template_index = ''
|
||||||
with open('templates/index.html', 'r') as file:
|
with open('templates/index.html', 'r') as file:
|
||||||
|
@ -174,8 +175,8 @@ def write_subreddit_pages(subreddit, subs, link_index, stat_sub_filtered_links,
|
||||||
'###TITLE###': l['title'],
|
'###TITLE###': l['title'],
|
||||||
'###URL###': link_url,
|
'###URL###': link_url,
|
||||||
'###URL_COMMENTS###': link_comments_url,
|
'###URL_COMMENTS###': link_comments_url,
|
||||||
'###SCORE###': l['score'],
|
'###SCORE###': str(l['score']),
|
||||||
'###NUM_COMMENTS###': l['num_comments'] if int(l['num_comments']) > 0 else 0,
|
'###NUM_COMMENTS###': l['num_comments'] if int(l['num_comments']) > 0 else str(0),
|
||||||
'###DATE###': datetime.utcfromtimestamp(int(l['created_utc'])).strftime('%Y-%m-%d'),
|
'###DATE###': datetime.utcfromtimestamp(int(l['created_utc'])).strftime('%Y-%m-%d'),
|
||||||
'###LINK_DOMAIN###': '(self.' + l['subreddit'] + ')' if l['is_self'] is True or l['is_self'] == 'True' else '',
|
'###LINK_DOMAIN###': '(self.' + l['subreddit'] + ')' if l['is_self'] is True or l['is_self'] == 'True' else '',
|
||||||
'###HTML_AUTHOR_URL###': author_link_html,
|
'###HTML_AUTHOR_URL###': author_link_html,
|
||||||
|
@ -264,7 +265,7 @@ def write_link_page(subreddits, link, subreddit='', hide_deleted_comments=False)
|
||||||
'###PARENT_ID###': c['parent_id'],
|
'###PARENT_ID###': c['parent_id'],
|
||||||
'###DEPTH###': str(c['depth']),
|
'###DEPTH###': str(c['depth']),
|
||||||
'###DATE###': created.strftime('%Y-%m-%d'),
|
'###DATE###': created.strftime('%Y-%m-%d'),
|
||||||
'###SCORE###': c['score'],
|
'###SCORE###': str(c['score']) if len(str(c['score'])) > 0 else missing_comment_score_label,
|
||||||
'###BODY###': snudown.markdown(c['body'].replace('>','>')),
|
'###BODY###': snudown.markdown(c['body'].replace('>','>')),
|
||||||
'###CSS_CLASSES###': css_classes,
|
'###CSS_CLASSES###': css_classes,
|
||||||
'###CLASS_SCORE###': 'badge-danger' if len(c['score']) > 0 and int(c['score']) < 1 else 'badge-secondary',
|
'###CLASS_SCORE###': 'badge-danger' if len(c['score']) > 0 and int(c['score']) < 1 else 'badge-secondary',
|
||||||
|
@ -302,8 +303,8 @@ def write_link_page(subreddits, link, subreddit='', hide_deleted_comments=False)
|
||||||
'###ID###': link['id'],
|
'###ID###': link['id'],
|
||||||
'###DATE###': created.strftime('%Y-%m-%d'),
|
'###DATE###': created.strftime('%Y-%m-%d'),
|
||||||
'###ARCHIVE_DATE###': datetime.utcfromtimestamp(int(link['retrieved_on'])).strftime('%Y-%m-%d') if link['retrieved_on'] != '' else 'n/a',
|
'###ARCHIVE_DATE###': datetime.utcfromtimestamp(int(link['retrieved_on'])).strftime('%Y-%m-%d') if link['retrieved_on'] != '' else 'n/a',
|
||||||
'###SCORE###': link['score'],
|
'###SCORE###': str(link['score']),
|
||||||
'###NUM_COMMENTS###': link['num_comments'],
|
'###NUM_COMMENTS###': str(link['num_comments']),
|
||||||
'###URL_PROJECT###': url_project,
|
'###URL_PROJECT###': url_project,
|
||||||
'###URL_SUBS###': static_include_path + 'index.html',
|
'###URL_SUBS###': static_include_path + 'index.html',
|
||||||
'###URL_SUB###': static_include_path + subreddit + '/index.html',
|
'###URL_SUB###': static_include_path + subreddit + '/index.html',
|
||||||
|
@ -428,8 +429,8 @@ def write_user_page(subs, user_index):
|
||||||
'###TITLE###': l['title'],
|
'###TITLE###': l['title'],
|
||||||
'###URL###': link_url,
|
'###URL###': link_url,
|
||||||
'###URL_COMMENTS###': link_comments_url,
|
'###URL_COMMENTS###': link_comments_url,
|
||||||
'###SCORE###': l['score'],
|
'###SCORE###': str(l['score']),
|
||||||
'###NUM_COMMENTS###': l['num_comments'] if int(l['num_comments']) > 0 else 0,
|
'###NUM_COMMENTS###': str(l['num_comments']) if int(l['num_comments']) > 0 else str(0),
|
||||||
'###DATE###': datetime.utcfromtimestamp(int(l['created_utc'])).strftime('%Y-%m-%d'),
|
'###DATE###': datetime.utcfromtimestamp(int(l['created_utc'])).strftime('%Y-%m-%d'),
|
||||||
'###SUB###': l['subreddit'],
|
'###SUB###': l['subreddit'],
|
||||||
'###SUB_URL###': '../' + l['subreddit'] + '/index.html',
|
'###SUB_URL###': '../' + l['subreddit'] + '/index.html',
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue