mirror of
https://github.com/sys-nyx/red-arch.git
synced 2025-05-06 00:35:25 -04:00
added search indexing with lunrjs to redarch.py
This commit is contained in:
parent
754edaa768
commit
a4cf65944a
1 changed files with 57 additions and 4 deletions
61
redarch.py
61
redarch.py
|
@ -4,10 +4,49 @@ import os
|
|||
import json
|
||||
import argparse
|
||||
import configparser
|
||||
from lunr import lunr
|
||||
from urllib.parse import urlparse
|
||||
from write_html import generate_html
|
||||
from watchful import return_redd_objects
|
||||
|
||||
def get_lunr_posts_index(subreddits: list[dict]):
|
||||
print('Generating search index')
|
||||
to_index = []
|
||||
chunk_size = 1000
|
||||
idxs = []
|
||||
metadata = {}
|
||||
for s in subreddits.keys():
|
||||
for t in subreddits[s]:
|
||||
meta = {}
|
||||
t['path'] = t['permalink'].lower().replace(f'r/{s}', '').strip('/') + '.html'
|
||||
meta['path'] = t['path']
|
||||
meta['title'] = t['title']
|
||||
meta['score'] = t['score']
|
||||
meta['replies'] = str(len(t['comments']))
|
||||
meta['body_short'] = t['selftext'][:200]
|
||||
meta['date'] = t['created_utc']
|
||||
metadata[t['id']] = meta
|
||||
|
||||
to_index.append(t)
|
||||
|
||||
chunks = [to_index[i * chunk_size:(i + 1) * chunk_size] for i in range((len(to_index) + chunk_size - 1) // chunk_size )]
|
||||
for chunk in chunks:
|
||||
for t in chunk:
|
||||
print(t, '\n\n\n')
|
||||
idxs.append(lunr(
|
||||
ref='id',
|
||||
fields=[
|
||||
'id',
|
||||
dict(field_name='title', boost=15),
|
||||
dict(field_name='selftext', boost=10),
|
||||
'score',
|
||||
'author'
|
||||
],
|
||||
documents=chunk,
|
||||
))
|
||||
print(f'\rCreating index chunk: {chunks.index(chunk) + 1}/{len(chunks)}', end='')
|
||||
print('')
|
||||
return idxs, metadata
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('config', type=str, help='Path to configuration file.')
|
||||
|
@ -50,7 +89,7 @@ def main():
|
|||
comments[parent_url].append(c)
|
||||
|
||||
|
||||
complete_threads = []
|
||||
complete_reddit_threads = []
|
||||
|
||||
for p in raw_posts:
|
||||
p['comments'] = []
|
||||
|
@ -60,17 +99,31 @@ def main():
|
|||
if postp in comments.keys():
|
||||
p['comments'] = comments[postp]
|
||||
|
||||
complete_threads.append(p)
|
||||
complete_reddit_threads.append(p)
|
||||
|
||||
subreddits[s.lower()] = complete_threads
|
||||
subreddits[s.lower()] = complete_reddit_threads
|
||||
|
||||
print("Total threads: ",len(raw_posts))
|
||||
print("Total comments: ", len(raw_comments))
|
||||
print("Comments missing permalinks: ", len(missing_perm))
|
||||
print("Comment chains found: ", len(comments))
|
||||
print("Threads rebuilt: ", len(complete_threads))
|
||||
print("Threads rebuilt: ", len(complete_reddit_threads))
|
||||
|
||||
idxs, metadata = get_lunr_posts_index(subreddits)
|
||||
|
||||
idx_path_list = []
|
||||
for idx in idxs:
|
||||
idx_name = f'static/js/search/idx-00{idxs.index(idx) + 1}.json'
|
||||
|
||||
idx_path_list.append(idx_name)
|
||||
print(f'\rWriting: {idx_name}',end='')
|
||||
with open(f'r/{idx_name}', 'w') as f:
|
||||
json.dump(idx.serialize(),f)
|
||||
with open('r/static/js/search/search-idx-list.json','w') as f:
|
||||
json.dump(idx_path_list, f)
|
||||
with open('r/static/js/search/metadata.json', 'w') as f:
|
||||
json.dump(metadata, f)
|
||||
print('')
|
||||
generate_html([s.lower() for s in config.sections()], subreddits)
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue