mirror of
https://github.com/Watchful1/PushshiftDumps.git
synced 2025-07-22 14:20:40 -04:00
Add split blocks by minute
This commit is contained in:
parent
4ecf22aaee
commit
f35762e203
7 changed files with 318 additions and 7 deletions
|
@ -1,5 +1,7 @@
|
|||
import zstandard
|
||||
import json
|
||||
import os
|
||||
from zst_blocks import ZstBlocksFile
|
||||
|
||||
|
||||
def read_obj_zst(file_name):
|
||||
|
@ -75,6 +77,14 @@ class OutputZst:
|
|||
return True
|
||||
|
||||
|
||||
# copied from https://github.com/ArthurHeitmann/zst_blocks_format
|
||||
def read_obj_zst_blocks(file_name):
|
||||
with open(file_name, "rb") as file:
|
||||
for row in ZstBlocksFile.streamRows(file):
|
||||
line = row.decode()
|
||||
yield json.loads(line.strip())
|
||||
|
||||
|
||||
def base36encode(integer: int) -> str:
|
||||
chars = '0123456789abcdefghijklmnopqrstuvwxyz'
|
||||
sign = '-' if integer < 0 else ''
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue