mirror of
https://github.com/Watchful1/PushshiftDumps.git
synced 2025-08-15 09:35:34 -04:00
Support empty filter
This commit is contained in:
parent
f7146593a0
commit
78c1814a60
1 changed files with 15 additions and 14 deletions
|
@ -7,7 +7,7 @@ from datetime import datetime
|
||||||
import logging.handlers
|
import logging.handlers
|
||||||
|
|
||||||
# put the path to the input file
|
# put the path to the input file
|
||||||
input_file = r"\\MYCLOUDPR4100\Public\reddit\submissions\RS_2023-02.zst"
|
input_file = r"\\MYCLOUDPR4100\Public\reddit\subreddits\redditdev_submissions.zst"
|
||||||
# put the name or path to the output file. The file extension from below will be added automatically
|
# put the name or path to the output file. The file extension from below will be added automatically
|
||||||
output_file = r"\\MYCLOUDPR4100\Public\output"
|
output_file = r"\\MYCLOUDPR4100\Public\output"
|
||||||
# the format to output in, pick from the following options
|
# the format to output in, pick from the following options
|
||||||
|
@ -233,19 +233,20 @@ if __name__ == "__main__":
|
||||||
if created > to_date:
|
if created > to_date:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
field_value = obj[field].lower()
|
if field is not None:
|
||||||
matched = False
|
field_value = obj[field].lower()
|
||||||
for value in values:
|
matched = False
|
||||||
if exact_match:
|
for value in values:
|
||||||
if value == field_value:
|
if exact_match:
|
||||||
matched = True
|
if value == field_value:
|
||||||
break
|
matched = True
|
||||||
else:
|
break
|
||||||
if value in field_value:
|
else:
|
||||||
matched = True
|
if value in field_value:
|
||||||
break
|
matched = True
|
||||||
if not matched:
|
break
|
||||||
continue
|
if not matched:
|
||||||
|
continue
|
||||||
|
|
||||||
matched_lines += 1
|
matched_lines += 1
|
||||||
if output_format == "zst":
|
if output_format == "zst":
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue