mirror of
https://github.com/sys-nyx/red-arch.git
synced 2025-05-05 16:25:31 -04:00
fix subreddit name casing bugs, resolves #3
This commit is contained in:
parent
43a93e1a04
commit
83fb77b41c
3 changed files with 22 additions and 21 deletions
29
README.md
29
README.md
|
@ -16,6 +16,13 @@ requires python 3 on linux, OSX, or Windows
|
||||||
cd reddit-html-archiver
|
cd reddit-html-archiver
|
||||||
chmod u+x *.py
|
chmod u+x *.py
|
||||||
|
|
||||||
|
Windows users may need to run
|
||||||
|
|
||||||
|
chcp 65001
|
||||||
|
set PYTHONIOENCODING=utf-8
|
||||||
|
|
||||||
|
before running `fetch_links.py` or `write_html.py` to resolve encoding errors such as 'codec can't encode character'.
|
||||||
|
|
||||||
### fetch reddit data
|
### fetch reddit data
|
||||||
|
|
||||||
data is fetched by subreddit and date range and is stored as csv files in `data`.
|
data is fetched by subreddit and date range and is stored as csv files in `data`.
|
||||||
|
@ -44,15 +51,6 @@ to update an html archive, delete everything in `r` aside from `r/static` and re
|
||||||
|
|
||||||
copy the contents of the `r` directory to a web root or appropriately served git repo.
|
copy the contents of the `r` directory to a web root or appropriately served git repo.
|
||||||
|
|
||||||
### Windows users
|
|
||||||
|
|
||||||
Windows users may need to run the following before running `fetch_links.py` or `write_html.py`:
|
|
||||||
|
|
||||||
chcp 65001
|
|
||||||
set PYTHONIOENCODING=utf-8
|
|
||||||
|
|
||||||
This will resolve errors encoding errors such as 'codec can't encode character'.
|
|
||||||
|
|
||||||
### potential improvements
|
### potential improvements
|
||||||
|
|
||||||
* fetch_links
|
* fetch_links
|
||||||
|
@ -62,15 +60,16 @@ This will resolve errors encoding errors such as 'codec can't encode character'.
|
||||||
* score update
|
* score update
|
||||||
* scores from reddit with [praw](https://github.com/praw-dev/praw)
|
* scores from reddit with [praw](https://github.com/praw-dev/praw)
|
||||||
* real templating
|
* real templating
|
||||||
* filter output per sub, individual min score and comments filters
|
* choose [Bootswatch](https://bootswatch.com/) theme
|
||||||
* js markdown url previews
|
* specify subreddits to output
|
||||||
* js powered search page, show no links by default
|
* show link domain/post type
|
||||||
* link and comments pages
|
|
||||||
* view on reddit.com
|
|
||||||
* show link domains
|
|
||||||
* user pages
|
* user pages
|
||||||
* add pagination, posts sorted by score, comments, date, sub
|
* add pagination, posts sorted by score, comments, date, sub
|
||||||
* too many files in one directory
|
* too many files in one directory
|
||||||
|
* view on reddit.com
|
||||||
|
* js powered search page, show no links by default
|
||||||
|
* js inline media embeds/expandos
|
||||||
|
* archive.org links
|
||||||
|
|
||||||
### see also
|
### see also
|
||||||
|
|
||||||
|
|
|
@ -214,4 +214,6 @@ if __name__ == '__main__':
|
||||||
if args.self_only:
|
if args.self_only:
|
||||||
self_only = True
|
self_only = True
|
||||||
|
|
||||||
|
args.subreddit = args.subreddit.lower()
|
||||||
|
|
||||||
fetch_links(args.subreddit, args.date_start, args.date_stop, args.limit, args.score, self_only)
|
fetch_links(args.subreddit, args.date_start, args.date_stop, args.limit, args.score, self_only)
|
||||||
|
|
|
@ -163,7 +163,7 @@ def write_subreddit_pages(subreddit, subs, link_index, stat_sub_filtered_links,
|
||||||
author_link_html = author_link_html.replace('###URL_AUTHOR###', author_url).replace('###AUTHOR###', l['author'])
|
author_link_html = author_link_html.replace('###URL_AUTHOR###', author_url).replace('###AUTHOR###', l['author'])
|
||||||
|
|
||||||
link_url = l['url']
|
link_url = l['url']
|
||||||
link_comments_url = sort_based_prefix + l['permalink'].strip('/')
|
link_comments_url = sort_based_prefix + l['permalink'].lower().strip('/')
|
||||||
link_comments_url = link_comments_url.replace('r/' + subreddit + '/', '')
|
link_comments_url = link_comments_url.replace('r/' + subreddit + '/', '')
|
||||||
idpath = '/'.join(list(l['id']))
|
idpath = '/'.join(list(l['id']))
|
||||||
link_comments_url = link_comments_url.replace(l['id'], idpath)
|
link_comments_url = link_comments_url.replace(l['id'], idpath)
|
||||||
|
@ -232,7 +232,7 @@ def write_link_page(subreddits, link, subreddit='', hide_deleted_comments=False)
|
||||||
# reddit: https://www.reddit.com/r/conspiracy/comments/8742iv/happening_now_classmate_former_friend_of/
|
# reddit: https://www.reddit.com/r/conspiracy/comments/8742iv/happening_now_classmate_former_friend_of/
|
||||||
# archive: r/conspiracy/comments/8/7/4/2/i/v/happening_now_classmate_former_friend_of.html
|
# archive: r/conspiracy/comments/8/7/4/2/i/v/happening_now_classmate_former_friend_of.html
|
||||||
idpath = '/'.join(list(link['id']))
|
idpath = '/'.join(list(link['id']))
|
||||||
filepath = link['permalink'].strip('/') + '.html'
|
filepath = link['permalink'].lower().strip('/') + '.html'
|
||||||
filepath = filepath.replace(link['id'], idpath)
|
filepath = filepath.replace(link['id'], idpath)
|
||||||
if os.path.isfile(filepath):
|
if os.path.isfile(filepath):
|
||||||
return True
|
return True
|
||||||
|
@ -325,7 +325,7 @@ def write_link_page(subreddits, link, subreddit='', hide_deleted_comments=False)
|
||||||
# reddit: https://www.reddit.com/r/conspiracy/comments/8742iv/happening_now_classmate_former_friend_of/
|
# reddit: https://www.reddit.com/r/conspiracy/comments/8742iv/happening_now_classmate_former_friend_of/
|
||||||
# archive: r/conspiracy/comments/8/7/4/2/i/v/happening_now_classmate_former_friend_of.html
|
# archive: r/conspiracy/comments/8/7/4/2/i/v/happening_now_classmate_former_friend_of.html
|
||||||
idpath = '/'.join(list(link['id']))
|
idpath = '/'.join(list(link['id']))
|
||||||
filepath = link['permalink'].strip('/') + '.html'
|
filepath = link['permalink'].lower().strip('/') + '.html'
|
||||||
filepath = filepath.replace(link['id'], idpath)
|
filepath = filepath.replace(link['id'], idpath)
|
||||||
if not os.path.isfile(filepath):
|
if not os.path.isfile(filepath):
|
||||||
os.makedirs(os.path.dirname(filepath), exist_ok=True)
|
os.makedirs(os.path.dirname(filepath), exist_ok=True)
|
||||||
|
@ -350,7 +350,7 @@ def write_subreddit_search_page(subreddit, subs, link_index, stat_sub_filtered_l
|
||||||
|
|
||||||
links_html = ''
|
links_html = ''
|
||||||
for l in links:
|
for l in links:
|
||||||
link_comments_url = l['permalink'].strip('/').replace('r/' + subreddit + '/', '')
|
link_comments_url = l['permalink'].lower().strip('/').replace('r/' + subreddit + '/', '')
|
||||||
idpath = '/'.join(list(l['id']))
|
idpath = '/'.join(list(l['id']))
|
||||||
link_comments_url = link_comments_url.replace(l['id'], idpath)
|
link_comments_url = link_comments_url.replace(l['id'], idpath)
|
||||||
link_comments_url += '.html'
|
link_comments_url += '.html'
|
||||||
|
@ -417,7 +417,7 @@ def write_user_page(subs, user_index):
|
||||||
author_url = l['author'] + '.html'
|
author_url = l['author'] + '.html'
|
||||||
author_link_html = author_link_html.replace('###URL_AUTHOR###', author_url).replace('###AUTHOR###', l['author'])
|
author_link_html = author_link_html.replace('###URL_AUTHOR###', author_url).replace('###AUTHOR###', l['author'])
|
||||||
|
|
||||||
link_comments_url = '../' + l['permalink'].strip('/').strip('r/')
|
link_comments_url = '../' + l['permalink'].lower().strip('/').strip('r/')
|
||||||
idpath = '/'.join(list(l['id']))
|
idpath = '/'.join(list(l['id']))
|
||||||
link_comments_url = link_comments_url.replace(l['id'], idpath)
|
link_comments_url = link_comments_url.replace(l['id'], idpath)
|
||||||
link_comments_url += '.html'
|
link_comments_url += '.html'
|
||||||
|
@ -618,7 +618,7 @@ def get_subs():
|
||||||
return subs
|
return subs
|
||||||
for d in os.listdir('data'):
|
for d in os.listdir('data'):
|
||||||
if os.path.isdir('data' + '/' + d):
|
if os.path.isdir('data' + '/' + d):
|
||||||
subs.append(d)
|
subs.append(d.lower())
|
||||||
return subs
|
return subs
|
||||||
|
|
||||||
def get_pager_html(page_num=1, pages=1):
|
def get_pager_html(page_num=1, pages=1):
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue