mirror of
https://github.com/Watchful1/PushshiftDumps.git
synced 2025-07-24 15:15:24 -04:00
Clean up
This commit is contained in:
parent
4f56c141fd
commit
cf4962fd4c
3 changed files with 10 additions and 12 deletions
|
@ -11,6 +11,8 @@ import json
|
|||
import praw
|
||||
from praw import endpoints
|
||||
|
||||
sys.path.append('personal')
|
||||
|
||||
log = discord_logging.init_logging(debug=False)
|
||||
|
||||
import utils
|
||||
|
@ -31,7 +33,7 @@ def query_pushshift(ids, bearer, object_type):
|
|||
for i in range(4):
|
||||
response = requests.get(url, headers={
|
||||
'User-Agent': "In script by /u/Watchful1",
|
||||
'Authorization': f"Bearer {bearer}"})
|
||||
'Authorization': f"Bearer {bearer}"}, timeout=15)
|
||||
if response.status_code == 200:
|
||||
break
|
||||
if response.status_code == 403:
|
||||
|
@ -166,16 +168,11 @@ if __name__ == "__main__":
|
|||
log.error(f"Invalid type: {args.type}")
|
||||
sys.exit(2)
|
||||
|
||||
config = discord_logging.get_config()
|
||||
user_name = "Watchful12"
|
||||
reddit = praw.Reddit(
|
||||
username=user_name,
|
||||
password=discord_logging.get_config_var(config, user_name, "password"),
|
||||
client_id=discord_logging.get_config_var(config, user_name, f"client_id_1"),
|
||||
client_secret=discord_logging.get_config_var(config, user_name, f"client_secret_1"),
|
||||
user_agent=f"Remindme ingest script")
|
||||
reddit = praw.Reddit(user_name)
|
||||
|
||||
pushshift_token = "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJ1c2VyX2lkIjoiV2F0Y2hmdWwxIiwiZXhwaXJlcyI6MTY5MzA5OTE4OC4wMjU3MDU4fQ.HJJd73nwHArOz2lErpubUuTVd_gdJ44SfpKDjb91tIY"
|
||||
config = discord_logging.get_config()
|
||||
pushshift_token = discord_logging.get_config_var(config, user_name, "pushshift_token")
|
||||
|
||||
while start_date <= end_date:
|
||||
build_day(start_date, input_folders, args.output, object_type, reddit, pushshift_token)
|
||||
|
|
|
@ -11,6 +11,7 @@ from collections import defaultdict
|
|||
log = discord_logging.get_logger()
|
||||
|
||||
import utils
|
||||
import merge
|
||||
|
||||
NEWLINE_ENCODED = "\n".encode('utf-8')
|
||||
|
||||
|
@ -258,12 +259,12 @@ class ObjectDict:
|
|||
created_minute = created_utc.replace(second=0, microsecond=0)
|
||||
if obj['id'] in self.by_id:
|
||||
existing_obj = self.by_id[obj['id']]
|
||||
unmatched_field = utils.merge_fields(existing_obj, obj, self.obj_type)
|
||||
unmatched_field = merge.merge_fields(existing_obj, obj, self.obj_type)
|
||||
self.counts[created_minute][ingest_type][False] += 1
|
||||
return unmatched_field
|
||||
if created_utc < self.min_datetime or created_utc > self.max_datetime:
|
||||
return False
|
||||
unmatched_field = utils.parse_fields(obj, self.obj_type)
|
||||
unmatched_field = merge.parse_fields(obj, self.obj_type)
|
||||
self.by_id[obj['id']] = obj
|
||||
self.by_minute[created_minute].add(obj)
|
||||
self.counts[created_minute][ingest_type][True] += 1
|
||||
|
|
|
@ -189,7 +189,7 @@ field_actions = {
|
|||
"is_robot_indexable": FieldAction.OVERWRITE,
|
||||
"is_self": FieldAction.DONT_OVERWRITE,
|
||||
"is_survey_ad": FieldAction.ALLOW_EMPTY,
|
||||
"is_video": FieldAction.ALLOW,
|
||||
"is_video": FieldAction.OVERWRITE,
|
||||
"likes": FieldAction.ALLOW_EMPTY,
|
||||
"link_flair_background_color": FieldAction.OVERWRITE_NOT_NONE,
|
||||
"link_flair_css_class": FieldAction.OVERWRITE_NOT_NONE,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue