Initial implentation of process_month.py

This commit is contained in:
Watchful1 2024-05-17 21:35:00 -07:00
parent b54a2483dc
commit fa5f6316fb
10 changed files with 703 additions and 369 deletions

View file

@ -11,6 +11,7 @@ pymongo = {extras = ["srv"], version = "*"}
scipy = "*"
sortedcontainers = "*"
praw = "*"
multiprocessing-logging = "*"
[dev-packages]

567
Pipfile.lock generated
View file

@ -1,7 +1,7 @@
{
"_meta": {
"hash": {
"sha256": "da2ec602c80b176c90ccdba3624777867aabb098ec6d45f850a2d09e4c7cf22f"
"sha256": "c7bc148c6964f1aa92e8bfccaf999ec1aaf50811c93ce5fb2a92f704c022be6d"
},
"pipfile-spec": 6,
"requires": {
@ -18,92 +18,107 @@
"default": {
"certifi": {
"hashes": [
"sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082",
"sha256:92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9"
"sha256:0569859f95fc761b18b45ef421b1290a0f65f147e92a1e5eb3e635f9a5e4e66f",
"sha256:dc383c07b76109f368f6106eee2b593b04a011ea4d55f652c6ca24a754d1cdd1"
],
"markers": "python_version >= '3.6'",
"version": "==2023.7.22"
"version": "==2024.2.2"
},
"charset-normalizer": {
"hashes": [
"sha256:04e57ab9fbf9607b77f7d057974694b4f6b142da9ed4a199859d9d4d5c63fe96",
"sha256:09393e1b2a9461950b1c9a45d5fd251dc7c6f228acab64da1c9c0165d9c7765c",
"sha256:0b87549028f680ca955556e3bd57013ab47474c3124dc069faa0b6545b6c9710",
"sha256:1000fba1057b92a65daec275aec30586c3de2401ccdcd41f8a5c1e2c87078706",
"sha256:1249cbbf3d3b04902ff081ffbb33ce3377fa6e4c7356f759f3cd076cc138d020",
"sha256:1920d4ff15ce893210c1f0c0e9d19bfbecb7983c76b33f046c13a8ffbd570252",
"sha256:193cbc708ea3aca45e7221ae58f0fd63f933753a9bfb498a3b474878f12caaad",
"sha256:1a100c6d595a7f316f1b6f01d20815d916e75ff98c27a01ae817439ea7726329",
"sha256:1f30b48dd7fa1474554b0b0f3fdfdd4c13b5c737a3c6284d3cdc424ec0ffff3a",
"sha256:203f0c8871d5a7987be20c72442488a0b8cfd0f43b7973771640fc593f56321f",
"sha256:246de67b99b6851627d945db38147d1b209a899311b1305dd84916f2b88526c6",
"sha256:2dee8e57f052ef5353cf608e0b4c871aee320dd1b87d351c28764fc0ca55f9f4",
"sha256:2efb1bd13885392adfda4614c33d3b68dee4921fd0ac1d3988f8cbb7d589e72a",
"sha256:2f4ac36d8e2b4cc1aa71df3dd84ff8efbe3bfb97ac41242fbcfc053c67434f46",
"sha256:3170c9399da12c9dc66366e9d14da8bf7147e1e9d9ea566067bbce7bb74bd9c2",
"sha256:3b1613dd5aee995ec6d4c69f00378bbd07614702a315a2cf6c1d21461fe17c23",
"sha256:3bb3d25a8e6c0aedd251753a79ae98a093c7e7b471faa3aa9a93a81431987ace",
"sha256:3bb7fda7260735efe66d5107fb7e6af6a7c04c7fce9b2514e04b7a74b06bf5dd",
"sha256:41b25eaa7d15909cf3ac4c96088c1f266a9a93ec44f87f1d13d4a0e86c81b982",
"sha256:45de3f87179c1823e6d9e32156fb14c1927fcc9aba21433f088fdfb555b77c10",
"sha256:46fb8c61d794b78ec7134a715a3e564aafc8f6b5e338417cb19fe9f57a5a9bf2",
"sha256:48021783bdf96e3d6de03a6e39a1171ed5bd7e8bb93fc84cc649d11490f87cea",
"sha256:4957669ef390f0e6719db3613ab3a7631e68424604a7b448f079bee145da6e09",
"sha256:5e86d77b090dbddbe78867a0275cb4df08ea195e660f1f7f13435a4649e954e5",
"sha256:6339d047dab2780cc6220f46306628e04d9750f02f983ddb37439ca47ced7149",
"sha256:681eb3d7e02e3c3655d1b16059fbfb605ac464c834a0c629048a30fad2b27489",
"sha256:6c409c0deba34f147f77efaa67b8e4bb83d2f11c8806405f76397ae5b8c0d1c9",
"sha256:7095f6fbfaa55defb6b733cfeb14efaae7a29f0b59d8cf213be4e7ca0b857b80",
"sha256:70c610f6cbe4b9fce272c407dd9d07e33e6bf7b4aa1b7ffb6f6ded8e634e3592",
"sha256:72814c01533f51d68702802d74f77ea026b5ec52793c791e2da806a3844a46c3",
"sha256:7a4826ad2bd6b07ca615c74ab91f32f6c96d08f6fcc3902ceeedaec8cdc3bcd6",
"sha256:7c70087bfee18a42b4040bb9ec1ca15a08242cf5867c58726530bdf3945672ed",
"sha256:855eafa5d5a2034b4621c74925d89c5efef61418570e5ef9b37717d9c796419c",
"sha256:8700f06d0ce6f128de3ccdbc1acaea1ee264d2caa9ca05daaf492fde7c2a7200",
"sha256:89f1b185a01fe560bc8ae5f619e924407efca2191b56ce749ec84982fc59a32a",
"sha256:8b2c760cfc7042b27ebdb4a43a4453bd829a5742503599144d54a032c5dc7e9e",
"sha256:8c2f5e83493748286002f9369f3e6607c565a6a90425a3a1fef5ae32a36d749d",
"sha256:8e098148dd37b4ce3baca71fb394c81dc5d9c7728c95df695d2dca218edf40e6",
"sha256:94aea8eff76ee6d1cdacb07dd2123a68283cb5569e0250feab1240058f53b623",
"sha256:95eb302ff792e12aba9a8b8f8474ab229a83c103d74a750ec0bd1c1eea32e669",
"sha256:9bd9b3b31adcb054116447ea22caa61a285d92e94d710aa5ec97992ff5eb7cf3",
"sha256:9e608aafdb55eb9f255034709e20d5a83b6d60c054df0802fa9c9883d0a937aa",
"sha256:a103b3a7069b62f5d4890ae1b8f0597618f628b286b03d4bc9195230b154bfa9",
"sha256:a386ebe437176aab38c041de1260cd3ea459c6ce5263594399880bbc398225b2",
"sha256:a38856a971c602f98472050165cea2cdc97709240373041b69030be15047691f",
"sha256:a401b4598e5d3f4a9a811f3daf42ee2291790c7f9d74b18d75d6e21dda98a1a1",
"sha256:a7647ebdfb9682b7bb97e2a5e7cb6ae735b1c25008a70b906aecca294ee96cf4",
"sha256:aaf63899c94de41fe3cf934601b0f7ccb6b428c6e4eeb80da72c58eab077b19a",
"sha256:b0dac0ff919ba34d4df1b6131f59ce95b08b9065233446be7e459f95554c0dc8",
"sha256:baacc6aee0b2ef6f3d308e197b5d7a81c0e70b06beae1f1fcacffdbd124fe0e3",
"sha256:bf420121d4c8dce6b889f0e8e4ec0ca34b7f40186203f06a946fa0276ba54029",
"sha256:c04a46716adde8d927adb9457bbe39cf473e1e2c2f5d0a16ceb837e5d841ad4f",
"sha256:c0b21078a4b56965e2b12f247467b234734491897e99c1d51cee628da9786959",
"sha256:c1c76a1743432b4b60ab3358c937a3fe1341c828ae6194108a94c69028247f22",
"sha256:c4983bf937209c57240cff65906b18bb35e64ae872da6a0db937d7b4af845dd7",
"sha256:c4fb39a81950ec280984b3a44f5bd12819953dc5fa3a7e6fa7a80db5ee853952",
"sha256:c57921cda3a80d0f2b8aec7e25c8aa14479ea92b5b51b6876d975d925a2ea346",
"sha256:c8063cf17b19661471ecbdb3df1c84f24ad2e389e326ccaf89e3fb2484d8dd7e",
"sha256:ccd16eb18a849fd8dcb23e23380e2f0a354e8daa0c984b8a732d9cfaba3a776d",
"sha256:cd6dbe0238f7743d0efe563ab46294f54f9bc8f4b9bcf57c3c666cc5bc9d1299",
"sha256:d62e51710986674142526ab9f78663ca2b0726066ae26b78b22e0f5e571238dd",
"sha256:db901e2ac34c931d73054d9797383d0f8009991e723dab15109740a63e7f902a",
"sha256:e03b8895a6990c9ab2cdcd0f2fe44088ca1c65ae592b8f795c3294af00a461c3",
"sha256:e1c8a2f4c69e08e89632defbfabec2feb8a8d99edc9f89ce33c4b9e36ab63037",
"sha256:e4b749b9cc6ee664a3300bb3a273c1ca8068c46be705b6c31cf5d276f8628a94",
"sha256:e6a5bf2cba5ae1bb80b154ed68a3cfa2fa00fde979a7f50d6598d3e17d9ac20c",
"sha256:e857a2232ba53ae940d3456f7533ce6ca98b81917d47adc3c7fd55dad8fab858",
"sha256:ee4006268ed33370957f55bf2e6f4d263eaf4dc3cfc473d1d90baff6ed36ce4a",
"sha256:eef9df1eefada2c09a5e7a40991b9fc6ac6ef20b1372abd48d2794a316dc0449",
"sha256:f058f6963fd82eb143c692cecdc89e075fa0828db2e5b291070485390b2f1c9c",
"sha256:f25c229a6ba38a35ae6e25ca1264621cc25d4d38dca2942a7fce0b67a4efe918",
"sha256:f2a1d0fd4242bd8643ce6f98927cf9c04540af6efa92323e9d3124f57727bfc1",
"sha256:f7560358a6811e52e9c4d142d497f1a6e10103d3a6881f18d04dbce3729c0e2c",
"sha256:f779d3ad205f108d14e99bb3859aa7dd8e9c68874617c72354d7ecaec2a054ac",
"sha256:f87f746ee241d30d6ed93969de31e5ffd09a2961a051e60ae6bddde9ec3583aa"
"sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027",
"sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087",
"sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786",
"sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8",
"sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09",
"sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185",
"sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574",
"sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e",
"sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519",
"sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898",
"sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269",
"sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3",
"sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f",
"sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6",
"sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8",
"sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a",
"sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73",
"sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc",
"sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714",
"sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2",
"sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc",
"sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce",
"sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d",
"sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e",
"sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6",
"sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269",
"sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96",
"sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d",
"sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a",
"sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4",
"sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77",
"sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d",
"sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0",
"sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed",
"sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068",
"sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac",
"sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25",
"sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8",
"sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab",
"sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26",
"sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2",
"sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db",
"sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f",
"sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5",
"sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99",
"sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c",
"sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d",
"sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811",
"sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa",
"sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a",
"sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03",
"sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b",
"sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04",
"sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c",
"sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001",
"sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458",
"sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389",
"sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99",
"sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985",
"sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537",
"sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238",
"sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f",
"sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d",
"sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796",
"sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a",
"sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143",
"sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8",
"sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c",
"sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5",
"sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5",
"sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711",
"sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4",
"sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6",
"sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c",
"sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7",
"sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4",
"sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b",
"sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae",
"sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12",
"sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c",
"sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae",
"sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8",
"sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887",
"sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b",
"sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4",
"sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f",
"sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5",
"sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33",
"sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519",
"sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561"
],
"markers": "python_version >= '3.7'",
"version": "==3.2.0"
"version": "==3.3.2"
},
"discord-logging": {
"editable": true,
@ -112,50 +127,68 @@
},
"dnspython": {
"hashes": [
"sha256:57c6fbaaeaaf39c891292012060beb141791735dbb4004798328fc2c467402d8",
"sha256:8dcfae8c7460a2f84b4072e26f1c9f4101ca20c071649cb7c34e8b6a93d58984"
"sha256:5ef3b9680161f6fa89daf8ad451b5f1a33b18ae8a1c6778cdf4b43f08c0a6e50",
"sha256:e8f0f9c23a7b7cb99ded64e6c3a6f3e701d78f50c55e002b839dea7225cff7cc"
],
"markers": "python_version >= '3.8' and python_version < '4.0'",
"version": "==2.4.2"
"markers": "python_version >= '3.8'",
"version": "==2.6.1"
},
"idna": {
"hashes": [
"sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4",
"sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"
"sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc",
"sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0"
],
"markers": "python_version >= '3.5'",
"version": "==3.4"
"version": "==3.7"
},
"multiprocessing-logging": {
"hashes": [
"sha256:8a5be02b02edbd6fa6e3e89499af7680db69db9e2d8707fcd28d445fa248f23e"
],
"index": "pypi",
"version": "==0.3.4"
},
"numpy": {
"hashes": [
"sha256:0d60fbae8e0019865fc4784745814cff1c421df5afee233db6d88ab4f14655a2",
"sha256:1a1329e26f46230bf77b02cc19e900db9b52f398d6722ca853349a782d4cff55",
"sha256:1b9735c27cea5d995496f46a8b1cd7b408b3f34b6d50459d9ac8fe3a20cc17bf",
"sha256:2792d23d62ec51e50ce4d4b7d73de8f67a2fd3ea710dcbc8563a51a03fb07b01",
"sha256:3e0746410e73384e70d286f93abf2520035250aad8c5714240b0492a7302fdca",
"sha256:4c3abc71e8b6edba80a01a52e66d83c5d14433cbcd26a40c329ec7ed09f37901",
"sha256:5883c06bb92f2e6c8181df7b39971a5fb436288db58b5a1c3967702d4278691d",
"sha256:5c97325a0ba6f9d041feb9390924614b60b99209a71a69c876f71052521d42a4",
"sha256:60e7f0f7f6d0eee8364b9a6304c2845b9c491ac706048c7e8cf47b83123b8dbf",
"sha256:76b4115d42a7dfc5d485d358728cdd8719be33cc5ec6ec08632a5d6fca2ed380",
"sha256:7dc869c0c75988e1c693d0e2d5b26034644399dd929bc049db55395b1379e044",
"sha256:834b386f2b8210dca38c71a6e0f4fd6922f7d3fcff935dbe3a570945acb1b545",
"sha256:8b77775f4b7df768967a7c8b3567e309f617dd5e99aeb886fa14dc1a0791141f",
"sha256:90319e4f002795ccfc9050110bbbaa16c944b1c37c0baeea43c5fb881693ae1f",
"sha256:b79e513d7aac42ae918db3ad1341a015488530d0bb2a6abcbdd10a3a829ccfd3",
"sha256:bb33d5a1cf360304754913a350edda36d5b8c5331a8237268c48f91253c3a364",
"sha256:bec1e7213c7cb00d67093247f8c4db156fd03075f49876957dca4711306d39c9",
"sha256:c5462d19336db4560041517dbb7759c21d181a67cb01b36ca109b2ae37d32418",
"sha256:c5652ea24d33585ea39eb6a6a15dac87a1206a692719ff45d53c5282e66d4a8f",
"sha256:d7806500e4f5bdd04095e849265e55de20d8cc4b661b038957354327f6d9b295",
"sha256:db3ccc4e37a6873045580d413fe79b68e47a681af8db2e046f1dacfa11f86eb3",
"sha256:dfe4a913e29b418d096e696ddd422d8a5d13ffba4ea91f9f60440a3b759b0187",
"sha256:eb942bfb6f84df5ce05dbf4b46673ffed0d3da59f13635ea9b926af3deb76926",
"sha256:f08f2e037bba04e707eebf4bc934f1972a315c883a9e0ebfa8a7756eabf9e357",
"sha256:fd608e19c8d7c55021dffd43bfe5492fab8cc105cc8986f813f8c3c048b38760"
"sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b",
"sha256:08beddf13648eb95f8d867350f6a018a4be2e5ad54c8d8caed89ebca558b2818",
"sha256:1af303d6b2210eb850fcf03064d364652b7120803a0b872f5211f5234b399f20",
"sha256:1dda2e7b4ec9dd512f84935c5f126c8bd8b9f2fc001e9f54af255e8c5f16b0e0",
"sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010",
"sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a",
"sha256:3373d5d70a5fe74a2c1bb6d2cfd9609ecf686d47a2d7b1d37a8f3b6bf6003aea",
"sha256:47711010ad8555514b434df65f7d7b076bb8261df1ca9bb78f53d3b2db02e95c",
"sha256:4c66707fabe114439db9068ee468c26bbdf909cac0fb58686a42a24de1760c71",
"sha256:50193e430acfc1346175fcbdaa28ffec49947a06918b7b92130744e81e640110",
"sha256:52b8b60467cd7dd1e9ed082188b4e6bb35aa5cdd01777621a1658910745b90be",
"sha256:60dedbb91afcbfdc9bc0b1f3f402804070deed7392c23eb7a7f07fa857868e8a",
"sha256:62b8e4b1e28009ef2846b4c7852046736bab361f7aeadeb6a5b89ebec3c7055a",
"sha256:666dbfb6ec68962c033a450943ded891bed2d54e6755e35e5835d63f4f6931d5",
"sha256:675d61ffbfa78604709862923189bad94014bef562cc35cf61d3a07bba02a7ed",
"sha256:679b0076f67ecc0138fd2ede3a8fd196dddc2ad3254069bcb9faf9a79b1cebcd",
"sha256:7349ab0fa0c429c82442a27a9673fc802ffdb7c7775fad780226cb234965e53c",
"sha256:7ab55401287bfec946ced39700c053796e7cc0e3acbef09993a9ad2adba6ca6e",
"sha256:7e50d0a0cc3189f9cb0aeb3a6a6af18c16f59f004b866cd2be1c14b36134a4a0",
"sha256:95a7476c59002f2f6c590b9b7b998306fba6a5aa646b1e22ddfeaf8f78c3a29c",
"sha256:96ff0b2ad353d8f990b63294c8986f1ec3cb19d749234014f4e7eb0112ceba5a",
"sha256:9fad7dcb1aac3c7f0584a5a8133e3a43eeb2fe127f47e3632d43d677c66c102b",
"sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0",
"sha256:a354325ee03388678242a4d7ebcd08b5c727033fcff3b2f536aea978e15ee9e6",
"sha256:a4abb4f9001ad2858e7ac189089c42178fcce737e4169dc61321660f1a96c7d2",
"sha256:ab47dbe5cc8210f55aa58e4805fe224dac469cde56b9f731a4c098b91917159a",
"sha256:afedb719a9dcfc7eaf2287b839d8198e06dcd4cb5d276a3df279231138e83d30",
"sha256:b3ce300f3644fb06443ee2222c2201dd3a89ea6040541412b8fa189341847218",
"sha256:b97fe8060236edf3662adfc2c633f56a08ae30560c56310562cb4f95500022d5",
"sha256:bfe25acf8b437eb2a8b2d49d443800a5f18508cd811fea3181723922a8a82b07",
"sha256:cd25bcecc4974d09257ffcd1f098ee778f7834c3ad767fe5db785be9a4aa9cb2",
"sha256:d209d8969599b27ad20994c8e41936ee0964e6da07478d6c35016bc386b66ad4",
"sha256:d5241e0a80d808d70546c697135da2c613f30e28251ff8307eb72ba696945764",
"sha256:edd8b5fe47dab091176d21bb6de568acdd906d1887a4584a15a9a96a1dca06ef",
"sha256:f870204a840a60da0b12273ef34f7051e98c3b5961b61b0c2c1be6dfd64fbcd3",
"sha256:ffa75af20b44f8dba823498024771d5ac50620e6915abac414251bd971b4529f"
],
"markers": "python_version >= '3.9'",
"version": "==1.25.2"
"version": "==1.26.4"
},
"praw": {
"hashes": [
@ -167,101 +200,80 @@
},
"prawcore": {
"hashes": [
"sha256:48c17db447fa06a13ca3e722201f443031437708daa736c05a1df895fbcceff5",
"sha256:daf1ccd4b7a80dc4e6567d48336d782e94a9a6dad83770fc2edf76dc9a84f56d"
"sha256:29af5da58d85704b439ad3c820873ad541f4535e00bb98c66f0fbcc8c603065a",
"sha256:b7b2b5a1d04406e086ab4e79988dc794df16059862f329f4c6a43ed09986c335"
],
"markers": "python_version ~= '3.6'",
"version": "==2.3.0"
"markers": "python_version ~= '3.8'",
"version": "==2.4.0"
},
"pymongo": {
"extras": [
"srv"
],
"hashes": [
"sha256:076afa0a4a96ca9f77fec0e4a0d241200b3b3a1766f8d7be9a905ecf59a7416b",
"sha256:08819da7864f9b8d4a95729b2bea5fffed08b63d3b9c15b4fea47de655766cf5",
"sha256:0a1f26bc1f5ce774d99725773901820dfdfd24e875028da4a0252a5b48dcab5c",
"sha256:0f4b125b46fe377984fbaecf2af40ed48b05a4b7676a2ff98999f2016d66b3ec",
"sha256:1240edc1a448d4ada4bf1a0e55550b6292420915292408e59159fd8bbdaf8f63",
"sha256:152259f0f1a60f560323aacf463a3642a65a25557683f49cfa08c8f1ecb2395a",
"sha256:168172ef7856e20ec024fe2a746bfa895c88b32720138e6438fd765ebd2b62dd",
"sha256:1b1d7d9aabd8629a31d63cd106d56cca0e6420f38e50563278b520f385c0d86e",
"sha256:1d40ad09d9f5e719bc6f729cc6b17f31c0b055029719406bd31dde2f72fca7e7",
"sha256:21b953da14549ff62ea4ae20889c71564328958cbdf880c64a92a48dda4c9c53",
"sha256:23cc6d7eb009c688d70da186b8f362d61d5dd1a2c14a45b890bd1e91e9c451f2",
"sha256:2988ef5e6b360b3ff1c6d55c53515499de5f48df31afd9f785d788cdacfbe2d3",
"sha256:2a0aade2b11dc0c326ccd429ee4134d2d47459ff68d449c6d7e01e74651bd255",
"sha256:2b0176f9233a5927084c79ff80b51bd70bfd57e4f3d564f50f80238e797f0c8a",
"sha256:2d4fa1b01fa7e5b7bb8d312e3542e211b320eb7a4e3d8dc884327039d93cb9e0",
"sha256:3236cf89d69679eaeb9119c840f5c7eb388a2110b57af6bb6baf01a1da387c18",
"sha256:33faa786cc907de63f745f587e9879429b46033d7d97a7b84b37f4f8f47b9b32",
"sha256:37df8f6006286a5896d1cbc3efb8471ced42e3568d38e6cb00857277047b0d63",
"sha256:3a7166d57dc74d679caa7743b8ecf7dc3a1235a9fd178654dddb2b2a627ae229",
"sha256:3d79ae3bb1ff041c0db56f138c88ce1dfb0209f3546d8d6e7c3f74944ecd2439",
"sha256:3e33064f1984db412b34d51496f4ea785a9cff621c67de58e09fb28da6468a52",
"sha256:3fa3648e4f1e63ddfe53563ee111079ea3ab35c3b09cd25bc22dadc8269a495f",
"sha256:40d5f6e853ece9bfc01e9129b228df446f49316a4252bb1fbfae5c3c9dedebad",
"sha256:41771b22dd2822540f79a877c391283d4e6368125999a5ec8beee1ce566f3f82",
"sha256:435228d3c16a375274ac8ab9c4f9aef40c5e57ddb8296e20ecec9e2461da1017",
"sha256:44ee985194c426ddf781fa784f31ffa29cb59657b2dba09250a4245431847d73",
"sha256:465fd5b040206f8bce7016b01d7e7f79d2fcd7c2b8e41791be9632a9df1b4999",
"sha256:496c9cbcb4951183d4503a9d7d2c1e3694aab1304262f831d5e1917e60386036",
"sha256:49dce6957598975d8b8d506329d2a3a6c4aee911fa4bbcf5e52ffc6897122950",
"sha256:4c42748ccc451dfcd9cef6c5447a7ab727351fd9747ad431db5ebb18a9b78a4d",
"sha256:505f8519c4c782a61d94a17b0da50be639ec462128fbd10ab0a34889218fdee3",
"sha256:53f2dda54d76a98b43a410498bd12f6034b2a14b6844ca08513733b2b20b7ad8",
"sha256:56320c401f544d762fc35766936178fbceb1d9261cd7b24fbfbc8fb6f67aa8a5",
"sha256:58a63a26a1e3dc481dd3a18d6d9f8bd1d576cd1ffe0d479ba7dd38b0aeb20066",
"sha256:5caee7bd08c3d36ec54617832b44985bd70c4cbd77c5b313de6f7fce0bb34f93",
"sha256:631492573a1bef2f74f9ac0f9d84e0ce422c251644cd81207530af4aa2ee1980",
"sha256:63d8019eee119df308a075b8a7bdb06d4720bf791e2b73d5ab0e7473c115d79c",
"sha256:6422b6763b016f2ef2beedded0e546d6aa6ba87910f9244d86e0ac7690f75c96",
"sha256:681f252e43b3ef054ca9161635f81b730f4d8cadd28b3f2b2004f5a72f853982",
"sha256:6d64878d1659d2a5bdfd0f0a4d79bafe68653c573681495e424ab40d7b6d6d41",
"sha256:74c0da07c04d0781490b2915e7514b1adb265ef22af039a947988c331ee7455b",
"sha256:7591a3beea6a9a4fa3080d27d193b41f631130e3ffa76b88c9ccea123f26dc59",
"sha256:76a262c41c1a7cbb84a3b11976578a7eb8e788c4b7bfbd15c005fb6ca88e6e50",
"sha256:77cfff95c1fafd09e940b3fdcb7b65f11442662fad611d0e69b4dd5d17a81c60",
"sha256:8027c9063579083746147cf401a7072a9fb6829678076cd3deff28bb0e0f50c8",
"sha256:80a167081c75cf66b32f30e2f1eaee9365af935a86dbd76788169911bed9b5d5",
"sha256:840eaf30ccac122df260b6005f9dfae4ac287c498ee91e3e90c56781614ca238",
"sha256:8543253adfaa0b802bfa88386db1009c6ebb7d5684d093ee4edc725007553d21",
"sha256:89b3f2da57a27913d15d2a07d58482f33d0a5b28abd20b8e643ab4d625e36257",
"sha256:8e559116e4128630ad3b7e788e2e5da81cbc2344dee246af44471fa650486a70",
"sha256:9aff6279e405dc953eeb540ab061e72c03cf38119613fce183a8e94f31be608f",
"sha256:9c04b9560872fa9a91251030c488e0a73bce9321a70f991f830c72b3f8115d0d",
"sha256:9d2346b00af524757576cc2406414562cced1d4349c92166a0ee377a2a483a80",
"sha256:a253b765b7cbc4209f1d8ee16c7287c4268d3243070bf72d7eec5aa9dfe2a2c2",
"sha256:a8127437ebc196a6f5e8fddd746bd0903a400dc6b5ae35df672dd1ccc7170a2a",
"sha256:b25f7bea162b3dbec6d33c522097ef81df7c19a9300722fa6853f5b495aecb77",
"sha256:b33c17d9e694b66d7e96977e9e56df19d662031483efe121a24772a44ccbbc7e",
"sha256:b4fe46b58010115514b842c669a0ed9b6a342017b15905653a5b1724ab80917f",
"sha256:b520aafc6cb148bac09ccf532f52cbd31d83acf4d3e5070d84efe3c019a1adbf",
"sha256:b5bbb87fa0511bd313d9a2c90294c88db837667c2bda2ea3fa7a35b59fd93b1f",
"sha256:b6d2a56fc2354bb6378f3634402eec788a8f3facf0b3e7d468db5f2b5a78d763",
"sha256:bbd705d5f3c3d1ff2d169e418bb789ff07ab3c70d567cc6ba6b72b04b9143481",
"sha256:bc5d8c3647b8ae28e4312f1492b8f29deebd31479cd3abaa989090fb1d66db83",
"sha256:c3c3525ea8658ee1192cdddf5faf99b07ebe1eeaa61bf32821126df6d1b8072b",
"sha256:c9a9a39b7cac81dca79fca8c2a6479ef4c7b1aab95fad7544cc0e8fd943595a2",
"sha256:cd4c8d6aa91d3e35016847cbe8d73106e3d1c9a4e6578d38e2c346bfe8edb3ca",
"sha256:cf62da7a4cdec9a4b2981fcbd5e08053edffccf20e845c0b6ec1e77eb7fab61d",
"sha256:d67225f05f6ea27c8dc57f3fa6397c96d09c42af69d46629f71e82e66d33fa4f",
"sha256:dfcd2b9f510411de615ccedd47462dae80e82fdc09fe9ab0f0f32f11cf57eeb5",
"sha256:e1f61355c821e870fb4c17cdb318669cfbcf245a291ce5053b41140870c3e5cc",
"sha256:e249190b018d63c901678053b4a43e797ca78b93fb6d17633e3567d4b3ec6107",
"sha256:e2654d1278384cff75952682d17c718ecc1ad1d6227bb0068fd826ba47d426a5",
"sha256:e57d859b972c75ee44ea2ef4758f12821243e99de814030f69a3decb2aa86807",
"sha256:e5a27f348909235a106a3903fc8e70f573d89b41d723a500869c6569a391cff7",
"sha256:ead4f19d0257a756b21ac2e0e85a37a7245ddec36d3b6008d5bfe416525967dc",
"sha256:f076b779aa3dc179aa3ed861be063a313ed4e48ae9f6a8370a9b1295d4502111",
"sha256:f1bb3a62395ffe835dbef3a1cbff48fbcce709c78bd1f52e896aee990928432b",
"sha256:f2227a08b091bd41df5aadee0a5037673f691e2aa000e1968b1ea2342afc6880",
"sha256:f3754acbd7efc7f1b529039fcffc092a15e1cf045e31f22f6c9c5950c613ec4d",
"sha256:fe48f50fb6348511a3268a893bfd4ab5f263f5ac220782449d03cd05964d1ae7",
"sha256:fff7d17d30b2cd45afd654b3fc117755c5d84506ed25fda386494e4e0a3416e1"
"sha256:02efd1bb3397e24ef2af45923888b41a378ce00cb3a4259c5f4fc3c70497a22f",
"sha256:0d833651f1ba938bb7501f13e326b96cfbb7d98867b2d545ca6d69c7664903e0",
"sha256:12c466e02133b7f8f4ff1045c6b5916215c5f7923bc83fd6e28e290cba18f9f6",
"sha256:12d1fef77d25640cb78893d07ff7d2fac4c4461d8eec45bd3b9ad491a1115d6e",
"sha256:194065c9d445017b3c82fb85f89aa2055464a080bde604010dc8eb932a6b3c95",
"sha256:1c78f156edc59b905c80c9003e022e1a764c54fd40ac4fea05b0764f829790e2",
"sha256:1e37faf298a37ffb3e0809e77fbbb0a32b6a2d18a83c59cfc2a7b794ea1136b0",
"sha256:25eeb2c18ede63891cbd617943dd9e6b9cbccc54f276e0b2e693a0cc40f243c5",
"sha256:268d8578c0500012140c5460755ea405cbfe541ef47c81efa9d6744f0f99aeca",
"sha256:2cb77d09bd012cb4b30636e7e38d00b5f9be5eb521c364bde66490c45ee6c4b4",
"sha256:347c49cf7f0ba49ea87c1a5a1984187ecc5516b7c753f31938bf7b37462824fd",
"sha256:35b3f0c7d49724859d4df5f0445818d525824a6cd55074c42573d9b50764df67",
"sha256:37e9ea81fa59ee9274457ed7d59b6c27f6f2a5fe8e26f184ecf58ea52a019cb8",
"sha256:47a1a4832ef2f4346dcd1a10a36ade7367ad6905929ddb476459abb4fd1b98cb",
"sha256:4bdb5ffe1cd3728c9479671a067ef44dacafc3743741d4dc700c377c4231356f",
"sha256:4ffd1519edbe311df73c74ec338de7d294af535b2748191c866ea3a7c484cd15",
"sha256:5239776633f7578b81207e5646245415a5a95f6ae5ef5dff8e7c2357e6264bfc",
"sha256:5239ef7e749f1326ea7564428bf861d5250aa39d7f26d612741b1b1273227062",
"sha256:56bf8b706946952acdea0fe478f8e44f1ed101c4b87f046859e6c3abe6c0a9f4",
"sha256:65b4c00dedbd333698b83cd2095a639a6f0d7c4e2a617988f6c65fb46711f028",
"sha256:6a87eef394039765679f75c6a47455a4030870341cb76eafc349c5944408c882",
"sha256:727ad07952c155cd20045f2ce91143c7dc4fb01a5b4e8012905a89a7da554b0c",
"sha256:730778b6f0964b164c187289f906bbc84cb0524df285b7a85aa355bbec43eb21",
"sha256:743552033c63f0afdb56b9189ab04b5c1dbffd7310cf7156ab98eebcecf24621",
"sha256:7e9d9d2c0aae73aa4369bd373ac2ac59f02c46d4e56c4b6d6e250cfe85f76802",
"sha256:82102e353be13f1a6769660dd88115b1da382447672ba1c2662a0fbe3df1d861",
"sha256:827611beb6c483260d520cfa6a49662d980dfa5368a04296f65fa39e78fccea7",
"sha256:84bc00200c3cbb6c98a2bb964c9e8284b641e4a33cf10c802390552575ee21de",
"sha256:87032f818bf5052ab742812c715eff896621385c43f8f97cdd37d15b5d394e95",
"sha256:87832d6076c2c82f42870157414fd876facbb6554d2faf271ffe7f8f30ce7bed",
"sha256:87bb453ac3eb44db95cb6d5a616fbc906c1c00661eec7f55696253a6245beb8a",
"sha256:9024e1661c6e40acf468177bf90ce924d1bc681d2b244adda3ed7b2f4c4d17d7",
"sha256:9349f0bb17a31371d4cacb64b306e4ca90413a3ad1fffe73ac7cd495570d94b5",
"sha256:9385654f01a90f73827af4db90c290a1519f7d9102ba43286e187b373e9a78e9",
"sha256:9a8bd37f5dabc86efceb8d8cbff5969256523d42d08088f098753dba15f3b37a",
"sha256:9d892fb91e81cccb83f507cdb2ea0aa026ec3ced7f12a1d60f6a5bf0f20f9c1f",
"sha256:a754e366c404d19ff3f077ddeed64be31e0bb515e04f502bf11987f1baa55a16",
"sha256:b48a5650ee5320d59f6d570bd99a8d5c58ac6f297a4e9090535f6561469ac32e",
"sha256:bcf337d1b252405779d9c79978d6ca15eab3cdaa2f44c100a79221bddad97c8a",
"sha256:c44efab10d9a3db920530f7bcb26af8f408b7273d2f0214081d3891979726328",
"sha256:c72d16fede22efe7cdd1f422e8da15760e9498024040429362886f946c10fe95",
"sha256:cb6e00a79dff22c9a72212ad82021b54bdb3b85f38a85f4fc466bde581d7d17a",
"sha256:ce1a374ea0e49808e0380ffc64284c0ce0f12bd21042b4bef1af3eb7bdf49054",
"sha256:cecd2df037249d1c74f0af86fb5b766104a5012becac6ff63d85d1de53ba8b98",
"sha256:cf17ea9cea14d59b0527403dd7106362917ced7c4ec936c4ba22bd36c912c8e0",
"sha256:cf28430ec1924af1bffed37b69a812339084697fd3f3e781074a0148e6475803",
"sha256:d1bcd58669e56c08f1e72c5758868b5df169fe267501c949ee83c418e9df9155",
"sha256:d275596f840018858757561840767b39272ac96436fcb54f5cac6d245393fd97",
"sha256:d2dcf608d35644e8d276d61bf40a93339d8d66a0e5f3e3f75b2c155a421a1b71",
"sha256:d4d59776f435564159196d971aa89422ead878174aff8fe18e06d9a0bc6d648c",
"sha256:d9b6cbc037108ff1a0a867e7670d8513c37f9bcd9ee3d2464411bfabf70ca002",
"sha256:db4380d1e69fdad1044a4b8f3bb105200542c49a0dde93452d938ff9db1d6d29",
"sha256:e004527ea42a6b99a8b8d5b42b42762c3bdf80f88fbdb5c3a9d47f3808495b86",
"sha256:e6eab12c6385526d386543d6823b07187fefba028f0da216506e00f0e1855119",
"sha256:eb0642e5f0dd7e86bb358749cc278e70b911e617f519989d346f742dc9520dfb",
"sha256:f91073049c43d14e66696970dd708d319b86ee57ef9af359294eee072abaac79",
"sha256:fadc6e8db7707c861ebe25b13ad6aca19ea4d2c56bf04a26691f46c23dadf6e4",
"sha256:fc5af24fcf5fc6f7f40d65446400d45dd12bea933d0299dc9e90c5b22197f1e9",
"sha256:fcaf8c911cb29316a02356f89dbc0e0dfcc6a712ace217b6b543805690d2aefd",
"sha256:ffd4d7cb2e6c6e100e2b39606d38a9ffc934e18593dc9bb326196afc7d93ce3d"
],
"index": "pypi",
"version": "==4.5.0"
"version": "==4.7.2"
},
"requests": {
"hashes": [
@ -273,34 +285,34 @@
},
"scipy": {
"hashes": [
"sha256:0f3261f14b767b316d7137c66cc4f33a80ea05841b9c87ad83a726205b901423",
"sha256:10eb6af2f751aa3424762948e5352f707b0dece77288206f227864ddf675aca0",
"sha256:1342ca385c673208f32472830c10110a9dcd053cf0c4b7d4cd7026d0335a6c1d",
"sha256:214cdf04bbae7a54784f8431f976704ed607c4bc69ba0d5d5d6a9df84374df76",
"sha256:2b997a5369e2d30c97995dcb29d638701f8000d04df01b8e947f206e5d0ac788",
"sha256:2c91cf049ffb5575917f2a01da1da082fd24ed48120d08a6e7297dfcac771dcd",
"sha256:3aeb87661de987f8ec56fa6950863994cd427209158255a389fc5aea51fa7055",
"sha256:4447ad057d7597476f9862ecbd9285bbf13ba9d73ce25acfa4e4b11c6801b4c9",
"sha256:542a757e2a6ec409e71df3d8fd20127afbbacb1c07990cb23c5870c13953d899",
"sha256:8d9886f44ef8c9e776cb7527fb01455bf4f4a46c455c4682edc2c2cc8cd78562",
"sha256:90d3b1364e751d8214e325c371f0ee0dd38419268bf4888b2ae1040a6b266b2a",
"sha256:95763fbda1206bec41157582bea482f50eb3702c85fffcf6d24394b071c0e87a",
"sha256:ac74b1512d38718fb6a491c439aa7b3605b96b1ed3be6599c17d49d6c60fca18",
"sha256:afdb0d983f6135d50770dd979df50bf1c7f58b5b33e0eb8cf5c73c70600eae1d",
"sha256:b0620240ef445b5ddde52460e6bc3483b7c9c750275369379e5f609a1050911c",
"sha256:b133f237bd8ba73bad51bc12eb4f2d84cbec999753bf25ba58235e9fc2096d80",
"sha256:b29318a5e39bd200ca4381d80b065cdf3076c7d7281c5e36569e99273867f61d",
"sha256:b8425fa963a32936c9773ee3ce44a765d8ff67eed5f4ac81dc1e4a819a238ee9",
"sha256:d2b813bfbe8dec6a75164523de650bad41f4405d35b0fa24c2c28ae07fcefb20",
"sha256:d690e1ca993c8f7ede6d22e5637541217fc6a4d3f78b3672a6fe454dbb7eb9a7",
"sha256:e367904a0fec76433bf3fbf3e85bf60dae8e9e585ffd21898ab1085a29a04d16",
"sha256:ea932570b1c2a30edafca922345854ff2cd20d43cd9123b6dacfdecebfc1a80b",
"sha256:f28f1f6cfeb48339c192efc6275749b2a25a7e49c4d8369a28b6591da02fbc9a",
"sha256:f73102f769ee06041a3aa26b5841359b1a93cc364ce45609657751795e8f4a4a",
"sha256:fa4909c6c20c3d91480533cddbc0e7c6d849e7d9ded692918c76ce5964997898"
"sha256:05f1432ba070e90d42d7fd836462c50bf98bd08bed0aa616c359eed8a04e3922",
"sha256:09c74543c4fbeb67af6ce457f6a6a28e5d3739a87f62412e4a16e46f164f0ae5",
"sha256:0fbcf8abaf5aa2dc8d6400566c1a727aed338b5fe880cde64907596a89d576fa",
"sha256:109d391d720fcebf2fbe008621952b08e52907cf4c8c7efc7376822151820820",
"sha256:1d2f7bb14c178f8b13ebae93f67e42b0a6b0fc50eba1cd8021c9b6e08e8fb1cd",
"sha256:1e7626dfd91cdea5714f343ce1176b6c4745155d234f1033584154f60ef1ff42",
"sha256:22789b56a999265431c417d462e5b7f2b487e831ca7bef5edeb56efe4c93f86e",
"sha256:28e286bf9ac422d6beb559bc61312c348ca9b0f0dae0d7c5afde7f722d6ea13d",
"sha256:33fde20efc380bd23a78a4d26d59fc8704e9b5fd9b08841693eb46716ba13d86",
"sha256:45c08bec71d3546d606989ba6e7daa6f0992918171e2a6f7fbedfa7361c2de1e",
"sha256:4dca18c3ffee287ddd3bc8f1dabaf45f5305c5afc9f8ab9cbfab855e70b2df5c",
"sha256:5407708195cb38d70fd2d6bb04b1b9dd5c92297d86e9f9daae1576bd9e06f602",
"sha256:58569af537ea29d3f78e5abd18398459f195546bb3be23d16677fb26616cc11e",
"sha256:5e4a756355522eb60fcd61f8372ac2549073c8788f6114449b37e9e8104f15a5",
"sha256:6bf9fe63e7a4bf01d3645b13ff2aa6dea023d38993f42aaac81a18b1bda7a82a",
"sha256:8930ae3ea371d6b91c203b1032b9600d69c568e537b7988a3073dfe4d4774f21",
"sha256:9ff7dad5d24a8045d836671e082a490848e8639cabb3dbdacb29f943a678683d",
"sha256:a2f471de4d01200718b2b8927f7d76b5d9bde18047ea0fa8bd15c5ba3f26a1d6",
"sha256:ac38c4c92951ac0f729c4c48c9e13eb3675d9986cc0c83943784d7390d540c78",
"sha256:b2a3ff461ec4756b7e8e42e1c681077349a038f0686132d623fa404c0bee2551",
"sha256:b5acd8e1dbd8dbe38d0004b1497019b2dbbc3d70691e65d69615f8a7292865d7",
"sha256:b8434f6f3fa49f631fae84afee424e2483289dfc30a47755b4b4e6b07b2633a4",
"sha256:ba419578ab343a4e0a77c0ef82f088238a93eef141b2b8017e46149776dfad4d",
"sha256:d0de696f589681c2802f9090fff730c218f7c51ff49bf252b6a97ec4a5d19e8b",
"sha256:dcbb9ea49b0167de4167c40eeee6e167caeef11effb0670b554d10b1e693a8b9"
],
"index": "pypi",
"version": "==1.11.2"
"version": "==1.13.0"
},
"sortedcontainers": {
"hashes": [
@ -319,68 +331,71 @@
},
"urllib3": {
"hashes": [
"sha256:8d22f86aae8ef5e410d4f539fde9ce6b2113a001bb4d189e0aed70642d602b11",
"sha256:de7df1803967d2c2a98e4b11bb7d6bd9210474c46e8a0401514e3a42a75ebde4"
"sha256:450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d",
"sha256:d0570876c61ab9e520d776c38acbbb5b05a776d3f9ff98a5c8fd5162a444cf19"
],
"markers": "python_version >= '3.7'",
"version": "==2.0.4"
"markers": "python_version >= '3.8'",
"version": "==2.2.1"
},
"websocket-client": {
"hashes": [
"sha256:53e95c826bf800c4c465f50093a8c4ff091c7327023b10bfaff40cf1ef170eaa",
"sha256:ce54f419dfae71f4bdba69ebe65bf7f0a93fe71bc009ad3a010aacc3eebad537"
"sha256:17b44cc997f5c498e809b22cdf2d9c7a9e71c02c8cc2b6c56e7c2d1239bfa526",
"sha256:3239df9f44da632f96012472805d40a23281a991027ce11d2f45a6f24ac4c3da"
],
"markers": "python_version >= '3.8'",
"version": "==1.6.2"
"version": "==1.8.0"
},
"zstandard": {
"hashes": [
"sha256:0aad6090ac164a9d237d096c8af241b8dcd015524ac6dbec1330092dba151657",
"sha256:0bdbe350691dec3078b187b8304e6a9c4d9db3eb2d50ab5b1d748533e746d099",
"sha256:0e1e94a9d9e35dc04bf90055e914077c80b1e0c15454cc5419e82529d3e70728",
"sha256:1243b01fb7926a5a0417120c57d4c28b25a0200284af0525fddba812d575f605",
"sha256:144a4fe4be2e747bf9c646deab212666e39048faa4372abb6a250dab0f347a29",
"sha256:14e10ed461e4807471075d4b7a2af51f5234c8f1e2a0c1d37d5ca49aaaad49e8",
"sha256:1545fb9cb93e043351d0cb2ee73fa0ab32e61298968667bb924aac166278c3fc",
"sha256:1e6e131a4df2eb6f64961cea6f979cdff22d6e0d5516feb0d09492c8fd36f3bc",
"sha256:25fbfef672ad798afab12e8fd204d122fca3bc8e2dcb0a2ba73bf0a0ac0f5f07",
"sha256:2769730c13638e08b7a983b32cb67775650024632cd0476bf1ba0e6360f5ac7d",
"sha256:48b6233b5c4cacb7afb0ee6b4f91820afbb6c0e3ae0fa10abbc20000acdf4f11",
"sha256:4af612c96599b17e4930fe58bffd6514e6c25509d120f4eae6031b7595912f85",
"sha256:52b2b5e3e7670bd25835e0e0730a236f2b0df87672d99d3bf4bf87248aa659fb",
"sha256:57ac078ad7333c9db7a74804684099c4c77f98971c151cee18d17a12649bc25c",
"sha256:62957069a7c2626ae80023998757e27bd28d933b165c487ab6f83ad3337f773d",
"sha256:649a67643257e3b2cff1c0a73130609679a5673bf389564bc6d4b164d822a7ce",
"sha256:67829fdb82e7393ca68e543894cd0581a79243cc4ec74a836c305c70a5943f07",
"sha256:7d3bc4de588b987f3934ca79140e226785d7b5e47e31756761e48644a45a6766",
"sha256:7f2afab2c727b6a3d466faee6974a7dad0d9991241c498e7317e5ccf53dbc766",
"sha256:8070c1cdb4587a8aa038638acda3bd97c43c59e1e31705f2766d5576b329e97c",
"sha256:8257752b97134477fb4e413529edaa04fc0457361d304c1319573de00ba796b1",
"sha256:9980489f066a391c5572bc7dc471e903fb134e0b0001ea9b1d3eff85af0a6f1b",
"sha256:9cff89a036c639a6a9299bf19e16bfb9ac7def9a7634c52c257166db09d950e7",
"sha256:a8d200617d5c876221304b0e3fe43307adde291b4a897e7b0617a61611dfff6a",
"sha256:a9fec02ce2b38e8b2e86079ff0b912445495e8ab0b137f9c0505f88ad0d61296",
"sha256:b1367da0dde8ae5040ef0413fb57b5baeac39d8931c70536d5f013b11d3fc3a5",
"sha256:b69cccd06a4a0a1d9fb3ec9a97600055cf03030ed7048d4bcb88c574f7895773",
"sha256:b72060402524ab91e075881f6b6b3f37ab715663313030d0ce983da44960a86f",
"sha256:c053b7c4cbf71cc26808ed67ae955836232f7638444d709bfc302d3e499364fa",
"sha256:cff891e37b167bc477f35562cda1248acc115dbafbea4f3af54ec70821090965",
"sha256:d12fa383e315b62630bd407477d750ec96a0f438447d0e6e496ab67b8b451d39",
"sha256:d2d61675b2a73edcef5e327e38eb62bdfc89009960f0e3991eae5cc3d54718de",
"sha256:db62cbe7a965e68ad2217a056107cc43d41764c66c895be05cf9c8b19578ce9c",
"sha256:ddb086ea3b915e50f6604be93f4f64f168d3fc3cef3585bb9a375d5834392d4f",
"sha256:df28aa5c241f59a7ab524f8ad8bb75d9a23f7ed9d501b0fed6d40ec3064784e8",
"sha256:e1e0c62a67ff425927898cf43da2cf6b852289ebcc2054514ea9bf121bec10a5",
"sha256:e6048a287f8d2d6e8bc67f6b42a766c61923641dd4022b7fd3f7439e17ba5a4d",
"sha256:e7d560ce14fd209db6adacce8908244503a009c6c39eee0c10f138996cd66d3e",
"sha256:ea68b1ba4f9678ac3d3e370d96442a6332d431e5050223626bdce748692226ea",
"sha256:f08e3a10d01a247877e4cb61a82a319ea746c356a3786558bed2481e6c405546",
"sha256:f1b9703fe2e6b6811886c44052647df7c37478af1b4a1a9078585806f42e5b15",
"sha256:fe6c821eb6870f81d73bf10e5deed80edcac1e63fbc40610e61f340723fd5f7c",
"sha256:ff0852da2abe86326b20abae912d0367878dd0854b8931897d44cfeb18985472"
"sha256:11f0d1aab9516a497137b41e3d3ed4bbf7b2ee2abc79e5c8b010ad286d7464bd",
"sha256:1958100b8a1cc3f27fa21071a55cb2ed32e9e5df4c3c6e661c193437f171cba2",
"sha256:1a90ba9a4c9c884bb876a14be2b1d216609385efb180393df40e5172e7ecf356",
"sha256:1d43501f5f31e22baf822720d82b5547f8a08f5386a883b32584a185675c8fbf",
"sha256:23d2b3c2b8e7e5a6cb7922f7c27d73a9a615f0a5ab5d0e03dd533c477de23004",
"sha256:2612e9bb4977381184bb2463150336d0f7e014d6bb5d4a370f9a372d21916f69",
"sha256:275df437ab03f8c033b8a2c181e51716c32d831082d93ce48002a5227ec93019",
"sha256:2ac9957bc6d2403c4772c890916bf181b2653640da98f32e04b96e4d6fb3252a",
"sha256:2b11ea433db22e720758cba584c9d661077121fcf60ab43351950ded20283440",
"sha256:2fdd53b806786bd6112d97c1f1e7841e5e4daa06810ab4b284026a1a0e484c0b",
"sha256:33591d59f4956c9812f8063eff2e2c0065bc02050837f152574069f5f9f17775",
"sha256:36a47636c3de227cd765e25a21dc5dace00539b82ddd99ee36abae38178eff9e",
"sha256:39b2853efc9403927f9065cc48c9980649462acbdf81cd4f0cb773af2fd734bc",
"sha256:3db41c5e49ef73641d5111554e1d1d3af106410a6c1fb52cf68912ba7a343a0d",
"sha256:445b47bc32de69d990ad0f34da0e20f535914623d1e506e74d6bc5c9dc40bb09",
"sha256:466e6ad8caefb589ed281c076deb6f0cd330e8bc13c5035854ffb9c2014b118c",
"sha256:48f260e4c7294ef275744210a4010f116048e0c95857befb7462e033f09442fe",
"sha256:4ac59d5d6910b220141c1737b79d4a5aa9e57466e7469a012ed42ce2d3995e88",
"sha256:53866a9d8ab363271c9e80c7c2e9441814961d47f88c9bc3b248142c32141d94",
"sha256:589402548251056878d2e7c8859286eb91bd841af117dbe4ab000e6450987e08",
"sha256:68953dc84b244b053c0d5f137a21ae8287ecf51b20872eccf8eaac0302d3e3b0",
"sha256:6c25b8eb733d4e741246151d895dd0308137532737f337411160ff69ca24f93a",
"sha256:7034d381789f45576ec3f1fa0e15d741828146439228dc3f7c59856c5bcd3292",
"sha256:73a1d6bd01961e9fd447162e137ed949c01bdb830dfca487c4a14e9742dccc93",
"sha256:8226a33c542bcb54cd6bd0a366067b610b41713b64c9abec1bc4533d69f51e70",
"sha256:888196c9c8893a1e8ff5e89b8f894e7f4f0e64a5af4d8f3c410f0319128bb2f8",
"sha256:88c5b4b47a8a138338a07fc94e2ba3b1535f69247670abfe422de4e0b344aae2",
"sha256:8a1b2effa96a5f019e72874969394edd393e2fbd6414a8208fea363a22803b45",
"sha256:93e1856c8313bc688d5df069e106a4bc962eef3d13372020cc6e3ebf5e045202",
"sha256:9501f36fac6b875c124243a379267d879262480bf85b1dbda61f5ad4d01b75a3",
"sha256:959665072bd60f45c5b6b5d711f15bdefc9849dd5da9fb6c873e35f5d34d8cfb",
"sha256:a1d67d0d53d2a138f9e29d8acdabe11310c185e36f0a848efa104d4e40b808e4",
"sha256:a493d470183ee620a3df1e6e55b3e4de8143c0ba1b16f3ded83208ea8ddfd91d",
"sha256:a7ccf5825fd71d4542c8ab28d4d482aace885f5ebe4b40faaa290eed8e095a4c",
"sha256:a88b7df61a292603e7cd662d92565d915796b094ffb3d206579aaebac6b85d5f",
"sha256:a97079b955b00b732c6f280d5023e0eefe359045e8b83b08cf0333af9ec78f26",
"sha256:d22fdef58976457c65e2796e6730a3ea4a254f3ba83777ecfc8592ff8d77d303",
"sha256:d75f693bb4e92c335e0645e8845e553cd09dc91616412d1d4650da835b5449df",
"sha256:d8593f8464fb64d58e8cb0b905b272d40184eac9a18d83cf8c10749c3eafcd7e",
"sha256:d8fff0f0c1d8bc5d866762ae95bd99d53282337af1be9dc0d88506b340e74b73",
"sha256:de20a212ef3d00d609d0b22eb7cc798d5a69035e81839f549b538eff4105d01c",
"sha256:e9e9d4e2e336c529d4c435baad846a181e39a982f823f7e4495ec0b0ec8538d2",
"sha256:f058a77ef0ece4e210bb0450e68408d4223f728b109764676e1a13537d056bb0",
"sha256:f1a4b358947a65b94e2501ce3e078bbc929b039ede4679ddb0460829b12f7375",
"sha256:f9b2cde1cd1b2a10246dbc143ba49d942d14fb3d2b4bccf4618d475c65464912",
"sha256:fe3390c538f12437b859d815040763abc728955a52ca6ff9c5d4ac707c4ad98e"
],
"index": "pypi",
"version": "==0.21.0"
"version": "==0.22.0"
}
},
"develop": {}

View file

@ -15,7 +15,7 @@ import logging.handlers
sys.path.append('personal')
log = discord_logging.init_logging(debug=False)
log = discord_logging.get_logger(init=True)
import utils
import classes
@ -27,6 +27,57 @@ NEWLINE_ENCODED = "\n".encode('utf-8')
reg = re.compile(r"\d\d-\d\d-\d\d_\d\d-\d\d")
def build_month(month, input_folder, output_folder, file_type, compression_level):
total_objects = 0
total_bytes = 0
minute_iterator = month
if month.month == 12:
end_time = month.replace(year=month.year + 1, month=1)
else:
end_time = month.replace(month=month.month + 1)
while minute_iterator < end_time:
minute_file_path = os.path.join(input_folder, file_type, minute_iterator.strftime('%y-%m-%d'), f"{prefix}_{minute_iterator.strftime('%y-%m-%d_%H-%M')}.zst")
for obj, line, _ in utils.read_obj_zst_meta(minute_file_path):
total_bytes += len(line.encode('utf-8'))
total_bytes += 1
total_objects += 1
if total_objects % 1000000 == 0:
log.info(f"{file_type}: Counting: {minute_iterator.strftime('%y-%m-%d_%H-%M')} : {total_objects:,} : {total_bytes:,}")
minute_iterator += timedelta(minutes=1)
log.info(f"{file_type}: Counting: {minute_iterator.strftime('%y-%m-%d_%H-%M')} : {total_objects:,} : {total_bytes:,}")
output_path = os.path.join(output_folder, file_type, f"{prefix}_{month.strftime('%Y-%m')}.zst")
output_handle = zstandard.ZstdCompressor(level=compression_level, write_content_size=True, write_checksum=True, threads=-1).stream_writer(open(output_path, 'wb'), size=total_bytes)
count_objects = 0
count_bytes = 0
minute_iterator = month
if month.month == 12:
end_time = month.replace(year=month.year + 1, month=1)
else:
end_time = month.replace(month=month.month + 1)
while minute_iterator < end_time:
minute_file_path = os.path.join(input_folder, file_type, minute_iterator.strftime('%y-%m-%d'), f"{prefix}_{minute_iterator.strftime('%y-%m-%d_%H-%M')}.zst")
for obj, line, _ in utils.read_obj_zst_meta(minute_file_path):
line_encoded = line.encode('utf-8')
count_bytes += len(line_encoded)
count_bytes += 1
output_handle.write(line_encoded)
output_handle.write(NEWLINE_ENCODED)
count_objects += 1
if count_objects % 100000 == 0:
log.info(f"{file_type}: Writing: {minute_iterator.strftime('%y-%m-%d_%H-%M')} : {count_objects:,}/{total_objects:,} : {count_bytes:,}/{total_bytes:,}")
minute_iterator += timedelta(minutes=1)
log.info(f"{file_type}: Writing: {minute_iterator.strftime('%y-%m-%d_%H-%M')} : {count_objects:,}/{total_objects:,} : {count_bytes:,}/{total_bytes:,}")
output_handle.close()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Combine the minute files into a single month")
parser.add_argument("--type", help="The object type, either comments or submissions", required=True)
@ -57,51 +108,10 @@ if __name__ == "__main__":
log.error(f"Invalid type: {args.type}")
sys.exit(2)
total_objects = 0
total_bytes = 0
minute_iterator = month
if month.month == 12:
end_time = month.replace(year=month.year + 1, month=1)
else:
end_time = month.replace(month=month.month + 1)
while minute_iterator < end_time:
minute_file_path = os.path.join(args.input, args.type, minute_iterator.strftime('%y-%m-%d'), f"{prefix}_{minute_iterator.strftime('%y-%m-%d_%H-%M')}.zst")
for obj, line, _ in utils.read_obj_zst_meta(minute_file_path):
total_bytes += len(line.encode('utf-8'))
total_bytes += 1
total_objects += 1
if total_objects % 1000000 == 0:
log.info(f"Counting: {minute_iterator.strftime('%y-%m-%d_%H-%M')} : {total_objects:,} : {total_bytes:,}")
minute_iterator += timedelta(minutes=1)
log.info(f"Counting: {minute_iterator.strftime('%y-%m-%d_%H-%M')} : {total_objects:,} : {total_bytes:,}")
output_path = os.path.join(args.output, args.type, f"{prefix}_{month.strftime('%Y-%m')}.zst")
output_handle = zstandard.ZstdCompressor(level=level, write_content_size=True, write_checksum=True, threads=-1).stream_writer(open(output_path, 'wb'), size=total_bytes)
count_objects = 0
count_bytes = 0
minute_iterator = month
if month.month == 12:
end_time = month.replace(year=month.year + 1, month=1)
else:
end_time = month.replace(month=month.month + 1)
while minute_iterator < end_time:
minute_file_path = os.path.join(args.input, args.type, minute_iterator.strftime('%y-%m-%d'), f"{prefix}_{minute_iterator.strftime('%y-%m-%d_%H-%M')}.zst")
for obj, line, _ in utils.read_obj_zst_meta(minute_file_path):
line_encoded = line.encode('utf-8')
count_bytes += len(line_encoded)
count_bytes += 1
output_handle.write(line_encoded)
output_handle.write(NEWLINE_ENCODED)
count_objects += 1
if count_objects % 100000 == 0:
log.info(f"Writing: {minute_iterator.strftime('%y-%m-%d_%H-%M')} : {count_objects:,}/{total_objects:,} : {count_bytes:,}/{total_bytes:,}")
minute_iterator += timedelta(minutes=1)
log.info(f"Writing: {minute_iterator.strftime('%y-%m-%d_%H-%M')} : {count_objects:,}/{total_objects:,} : {count_bytes:,}/{total_bytes:,}")
output_handle.close()
build_month(
month,
args.input,
args.output,
args.type,
level
)

View file

@ -15,7 +15,7 @@ import logging.handlers
sys.path.append('personal')
log = discord_logging.init_logging(debug=False)
log = discord_logging.get_logger(init=True)
import utils
import classes
@ -27,6 +27,18 @@ NEWLINE_ENCODED = "\n".encode('utf-8')
reg = re.compile(r"\d\d-\d\d-\d\d_\d\d-\d\d")
def get_pushshift_token(old_token):
saved_token = load_pushshift_token()
if saved_token is None or old_token == saved_token:
log.info(f"Requesting new token")
result_token = re_auth_pushshift(old_token)
save_pushshift_token(result_token)
else:
result_token = saved_token
return result_token
def save_pushshift_token(token):
with open("pushshift.txt", 'w') as file:
file.write(token)
@ -66,7 +78,7 @@ def re_auth_pushshift(old_token):
sys.exit(1)
def query_pushshift(ids, bearer, object_type):
def query_pushshift(ids, bearer, object_type, pushshift_token_function):
object_name = "comment" if object_type == ObjectType.COMMENT else "submission"
url = f"https://api.pushshift.io/reddit/{object_name}/search?limit=1000&ids={','.join(ids)}"
log.debug(f"pushshift query: {url}")
@ -87,7 +99,7 @@ def query_pushshift(ids, bearer, object_type):
log.warning(f"Pushshift 403, trying reauth: {response.json()}")
log.warning(url)
log.warning(f"'Authorization': Bearer {bearer}")
bearer = re_auth_pushshift(bearer)
bearer = pushshift_token_function(bearer)
time.sleep(2)
if response.status_code != 200:
log.warning(f"4 requests failed with status code {response.status_code}")
@ -112,12 +124,12 @@ def end_of_day(input_minute):
return input_minute.replace(hour=0, minute=0, second=0) + timedelta(days=1)
def build_day(day_to_process, input_folders, output_folder, object_type, reddit, ignore_ids):
pushshift_token = load_pushshift_token()
log.info(f"Using pushshift token: {pushshift_token}")
def build_day(day_to_process, input_folders, output_folder, object_type, reddit, ignore_ids, pushshift_token_function):
file_type = "comments" if object_type == ObjectType.COMMENT else "submissions"
pushshift_token = pushshift_token_function(None)
log.info(f"{file_type}: Using pushshift token: {pushshift_token}")
file_minutes = {}
minute_iterator = day_to_process - timedelta(minutes=2)
end_time = end_of_day(day_to_process) + timedelta(minutes=2)
@ -131,7 +143,7 @@ def build_day(day_to_process, input_folders, output_folder, object_type, reddit,
for file in os.listdir(merge_date_folder):
match = reg.search(file)
if not match:
log.info(f"File doesn't match regex: {file}")
log.info(f"{file_type}: File doesn't match regex: {file}")
continue
file_date = datetime.strptime(match.group(), '%y-%m-%d_%H-%M')
if file_date in file_minutes:
@ -147,7 +159,7 @@ def build_day(day_to_process, input_folders, output_folder, object_type, reddit,
for obj in utils.read_obj_zst(ingest_file):
if objects.add_object(obj, ingest_type):
unmatched_field = True
log.info(f"Loaded {minute_iterator.strftime('%y-%m-%d_%H-%M')} : {objects.get_counts_string_by_minute(minute_iterator, [IngestType.INGEST, IngestType.RESCAN, IngestType.DOWNLOAD])}")
log.info(f"{file_type}: Loaded {minute_iterator.strftime('%y-%m-%d_%H-%M')} : {objects.get_counts_string_by_minute(minute_iterator, [IngestType.INGEST, IngestType.RESCAN, IngestType.DOWNLOAD])}")
if minute_iterator >= end_time or objects.count_minutes() >= 11:
if minute_iterator > last_minute_of_day:
@ -156,11 +168,11 @@ def build_day(day_to_process, input_folders, output_folder, object_type, reddit,
working_highest_minute = minute_iterator - timedelta(minutes=1)
missing_ids, start_id, end_id = objects.get_missing_ids_by_minutes(working_lowest_minute, working_highest_minute, ignore_ids)
log.debug(
f"Backfilling from: {working_lowest_minute.strftime('%y-%m-%d_%H-%M')} ({utils.base36encode(start_id)}|{start_id}) to "
f"{file_type}: Backfilling from: {working_lowest_minute.strftime('%y-%m-%d_%H-%M')} ({utils.base36encode(start_id)}|{start_id}) to "
f"{working_highest_minute.strftime('%y-%m-%d_%H-%M')} ({utils.base36encode(end_id)}|{end_id}) with {len(missing_ids)} ({end_id - start_id}) ids")
for chunk in utils.chunk_list(missing_ids, 50):
pushshift_objects, pushshift_token = query_pushshift(chunk, pushshift_token, object_type)
pushshift_objects, pushshift_token = query_pushshift(chunk, pushshift_token, object_type, pushshift_token_function)
for pushshift_object in pushshift_objects:
if objects.add_object(pushshift_object, IngestType.PUSHSHIFT):
unmatched_field = True
@ -188,7 +200,7 @@ def build_day(day_to_process, input_folders, output_folder, object_type, reddit,
output_handle.write(NEWLINE_ENCODED)
objects.delete_object_id(obj['id'])
log.info(
f"Wrote up to {working_lowest_minute.strftime('%y-%m-%d_%H-%M')} : "
f"{file_type}: Wrote up to {working_lowest_minute.strftime('%y-%m-%d_%H-%M')} : "
f"{objects.get_counts_string_by_minute(working_lowest_minute, [IngestType.PUSHSHIFT, IngestType.BACKFILL, IngestType.MISSING])}")
output_handle.close()
working_lowest_minute += timedelta(minutes=1)
@ -197,13 +209,20 @@ def build_day(day_to_process, input_folders, output_folder, object_type, reddit,
discord_logging.flush_discord()
if unmatched_field:
log.warning(f"Unmatched field, aborting")
log.warning(f"{file_type}: Unmatched field, aborting")
discord_logging.flush_discord()
sys.exit(1)
minute_iterator += timedelta(minutes=1)
log.info(f"Finished day {day_to_process.strftime('%y-%m-%d')}: {objects.get_counts_string()}")
log.info(f"{file_type}: Finished day {day_to_process.strftime('%y-%m-%d')}: {objects.get_counts_string()}")
def merge_and_backfill(start_date, end_date, input_folders, output_folder, object_type, ignore_ids, reddit_username, pushshift_token_function):
reddit = praw.Reddit(reddit_username)
while start_date <= end_date:
build_day(start_date, input_folders, output_folder, object_type, reddit, ignore_ids, pushshift_token_function)
start_date = end_of_day(start_date)
if __name__ == "__main__":
@ -254,19 +273,22 @@ if __name__ == "__main__":
start_id, end_id = id_range.split("-")
ignore_ids.append((utils.base36decode(start_id), utils.base36decode(end_id)))
user_name = "Watchful12"
reddit = praw.Reddit(user_name)
discord_logging.init_discord_logging(
section_name=None,
log_level=logging.WARNING,
logging_webhook=reddit.config.custom["logging_webhook"]
section_name="Watchful12",
log_level=logging.WARNING
)
if args.pushshift is not None:
log.warning(f"Saving pushshift token: {args.pushshift}")
save_pushshift_token(args.pushshift)
while start_date <= end_date:
build_day(start_date, input_folders, args.output, object_type, reddit, ignore_ids)
start_date = end_of_day(start_date)
merge_and_backfill(
start_date,
end_date,
input_folders,
args.output,
object_type,
ignore_ids,
"Watchful12",
get_pushshift_token
)

260
personal/process_month.py Normal file
View file

@ -0,0 +1,260 @@
import sys
sys.path.append('personal')
sys.path.append('combine')
import os
import argparse
import json
import time
import logging.handlers
import requests
import praw
from datetime import datetime, timedelta
import multiprocessing_logging
import discord_logging
import multiprocessing
multiprocessing_logging.install_mp_handler()
log = discord_logging.init_logging()
import utils
from transform import split_blocks_by_minutes
from combine.merge_and_backfill import build_day, IngestType, ObjectType
from combine import build_month
def get_pushshift_token(old_token):
global pushshift_lock
pushshift_lock.acquire()
saved_token = load_pushshift_token()
if saved_token is None or saved_token == "" or old_token == saved_token:
if old_token is None:
log.warning("No saved or passed in token")
save_pushshift_token("")
raise ValueError("No saved or passed in token")
log.info(f"Requesting new token")
result_token = re_auth_pushshift(old_token)
save_pushshift_token(result_token)
else:
result_token = saved_token
pushshift_lock.release()
return result_token
def save_pushshift_token(token):
with open("pushshift.txt", 'w') as file:
file.write(token)
def load_pushshift_token():
if not os.path.exists("pushshift.txt"):
return None
with open("pushshift.txt", 'r') as file:
token = file.read().strip()
return token
def re_auth_pushshift(old_token):
url = f"https://auth.pushshift.io/refresh?access_token={old_token}"
log.warning(f"Reauth request: {url}")
response = requests.post(url)
result = response.json()
log.warning(f"Reauth response: {str(result)}")
discord_logging.flush_discord()
if 'access_token' in result:
new_token = result['access_token']
log.warning(f"New pushshift token: {new_token}")
save_pushshift_token(new_token)
discord_logging.flush_discord()
return new_token
elif 'detail' in result:
if result['detail'] == 'Access token is still active and can not be refreshed.':
log.warning(f"Access token still active, trying request again")
time.sleep(5)
return old_token
log.warning(f"Reauth failed: {result['detail']}")
discord_logging.flush_discord()
return old_token
else:
log.warning(f"Something went wrong re-authing")
discord_logging.flush_discord()
return old_token
def init(p_lock):
global pushshift_lock
pushshift_lock = p_lock
def save_status(status_json, stages, month):
log.debug(f"Saving status: {stages}")
output_dict = {
"stages": stages,
"month": month,
}
json_string = json.dumps(output_dict, indent=4, default=str)
with open(status_json, 'w') as status_json_file:
status_json_file.write(json_string)
def load_status(status_json):
if os.path.exists(status_json):
with open(status_json, 'r') as status_json_file:
output_dict = json.load(status_json_file)
for stage_type, stage in output_dict["stages"].items():
if stage["merge"] is not None:
stage["merge"] = datetime.strptime(stage["merge"], "%Y-%m-%d %H:%M:%S")
return output_dict["stages"], output_dict["month"]
else:
stages = {
"comment": {
"split": False,
"merge": None, # 24-02-01
"build": False,
},
"submission": {
"split": False,
"merge": None, # 24-02-01
"build": False,
}
}
return stages, None
def end_of_day(input_minute):
return input_minute.replace(hour=0, minute=0, second=0) + timedelta(days=1)
def process(queue, base_folder, month, file_type, type_stages, reddit_username, compression_level, ignore_ids):
try:
# for stage, status in type_stages.items():
# log.info(f"{file_type} {stage}: {status}")
file_prefix = "RC" if file_type == "comment" else "RS"
if not type_stages["split"]:
split_file = os.path.join(base_folder, "reddit", "blocks", f"{file_prefix}_20{month}.zst")
if not os.path.exists(split_file):
log.info(f"{file_type}: File {split_file} doesn't exist, checking for blocks")
split_file = os.path.join(base_folder, "reddit", "blocks", f"{file_prefix}_20{month}.zst_blocks")
if not os.path.exists(split_file):
log.error(f"{file_type}: File {split_file} doesn't exist, aborting")
return False
split_folder = os.path.join(base_folder, "ingest", "download")
log.info(f"{file_type}: Starting {file_type} split")
log.info(f"{file_type}: Reading from: {split_file}")
log.info(f"{file_type}: Writing to: {split_folder}")
split_blocks_by_minutes.split_by_minutes(split_file, split_folder)
log.info(f"{file_type}: {file_type} split complete")
queue.put((file_type, "split", True))
start_date = datetime.strptime(month, "%y-%m")
if start_date.month == 12:
end_date = start_date.replace(year=start_date.year + 1, month=1)
else:
end_date = start_date.replace(month=start_date.month + 1)
if type_stages["merge"] is None or type_stages["merge"] < end_date:
if type_stages["merge"] is not None:
start_date = type_stages["merge"]
log.info(f"{file_type}: Starting {file_type} merge from {start_date}")
reddit = praw.Reddit(reddit_username)
input_folders = [
(os.path.join(base_folder, "ingest", "ingest"), IngestType.INGEST),
(os.path.join(base_folder, "ingest", "rescan"), IngestType.RESCAN),
(os.path.join(base_folder, "ingest", "download"), IngestType.DOWNLOAD),
]
for input_folder in input_folders:
log.info(f"{file_type}: Reading from: {input_folder[0]} : {input_folder[1]}")
combined_folder = os.path.join(base_folder, "ingest", "combined")
log.info(f"{file_type}: Writing to: {combined_folder}")
while start_date <= end_date:
build_day(
start_date,
input_folders,
combined_folder,
ObjectType.COMMENT if file_type == "comment" else ObjectType.SUBMISSION,
reddit,
ignore_ids,
get_pushshift_token
)
start_date = end_of_day(start_date)
queue.put((file_type, "merge", start_date))
log.info(f"{file_type}: {file_type} merge complete")
if not type_stages["build"]:
log.info(f"{file_type}: Starting {file_type} build")
input_folder = os.path.join(base_folder, "ingest", "combined")
log.info(f"{file_type}: Reading from: {input_folder}")
log.info(f"{file_type}: Writing to: {base_folder}")
build_month.build_month(
month,
input_folder,
base_folder,
file_type,
compression_level
)
queue.put((file_type, "build", True))
log.info(f"{file_type}: {file_type} build complete")
log.info(f"{file_type}: {file_type} all steps complete")
# for stage, status in type_stages.items():
# log.info(f"{file_type} {stage}: {status}")
except Exception as err:
queue.put((file_type, "error", str(err)))
# for stage, status in type_stages.items():
# log.info(f"{file_type} {stage}: {status}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="")
parser.add_argument('month', help='Month to process')
parser.add_argument('folder', help='Folder under which all the files are stored')
parser.add_argument("--ignore_ids", help="Ignore ids between the id ranges listed", default=None)
parser.add_argument("--level", help="The compression ratio to output at", default="22")
args = parser.parse_args()
ignore_ids = []
if args.ignore_ids is not None:
for id_range in args.ignore_ids.split(","):
start_id, end_id = id_range.split("-")
ignore_ids.append((utils.base36decode(start_id), utils.base36decode(end_id)))
discord_logging.init_discord_logging(
section_name="Watchful12",
log_level=logging.WARNING,
)
status_file = "process.json"
stages, month = load_status(status_file)
if month is not None and args.month != month:
log.error(f"Month does not match saved month, aborting: {month} : {args.month}")
sys.exit(0)
month = args.month
log.info(f"Processing {month}")
multiprocessing.set_start_method('spawn')
queue = multiprocessing.Manager().Queue()
p_lock = multiprocessing.Lock()
with multiprocessing.Pool(processes=2, initializer=init, initargs=(p_lock,)) as pool:
arguments = []
for file_type, type_stages in stages.items():
arguments.append((queue, args.folder, month, file_type, type_stages, "Watchful12", args.level, ignore_ids))
workers = pool.starmap_async(process, arguments, chunksize=1, error_callback=log.info)
while not workers.ready() or not queue.empty():
file_type, stage, status = queue.get()
if stage == "error":
log.error(f"Error in {file_type}: {status}")
stages[file_type][stage] = status
save_status(status_file, stages, month)
#log.info(f"workers {workers.ready()} : queue {queue.empty()}")

View file

@ -8,39 +8,39 @@ from datetime import datetime
import json
import argparse
log = discord_logging.init_logging()
log = discord_logging.get_logger(init=True)
import utils
NEWLINE_ENCODED = "\n".encode('utf-8')
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Take a zst_blocks file and split it by minute chunks")
parser.add_argument('--input', help='Input file', required=True)
parser.add_argument('--output', help='Output folder', required=True)
args = parser.parse_args()
def split_by_minutes(input_file, output_file):
file_type = "comments" if "RC" in input_file else "submissions"
# input_file = r"\\MYCLOUDPR4100\Public\reddit\blocks\RS_2023-10.zst_blocks"
# output_folder = r"\\MYCLOUDPR4100\Public\ingest\download"
file_type = "comments" if "RC" in args.input else "submissions"
log.info(f"Input file: {args.input}")
log.info(f"Output folder: {args.output}")
log.info(f"{file_type}: Input file: {input_file}")
log.info(f"{file_type}: Output folder: {output_file}")
previous_minute, output_handle, created_utc = None, None, None
count_objects, count_minute = 0, 0
for obj in utils.read_obj_zst_blocks(args.input):
if input_file.endswith(".zst"):
reader = utils.read_obj_zst(input_file)
elif input_file.endswith(".zst_blocks"):
reader = utils.read_obj_zst_blocks(input_file)
else:
log.error(f"{file_type}: Unsupported file type: {input_file}")
return
for obj in reader:
created_utc = datetime.utcfromtimestamp(obj["created_utc"])
current_minute = created_utc.replace(second=0)
if previous_minute is None or current_minute > previous_minute:
log.info(f"{created_utc.strftime('%y-%m-%d_%H-%M')}: {count_objects:,} : {count_minute: ,}")
log.info(f"{file_type}: {created_utc.strftime('%y-%m-%d_%H-%M')}: {count_objects:,} : {count_minute: ,}")
previous_minute = current_minute
count_minute = 0
if output_handle is not None:
output_handle.close()
output_path = os.path.join(args.output, file_type, created_utc.strftime('%y-%m-%d'))
output_path = os.path.join(output_file, file_type, created_utc.strftime('%y-%m-%d'))
if not os.path.exists(output_path):
os.makedirs(output_path)
output_path = os.path.join(output_path, f"{('RC' if file_type == 'comments' else 'RS')}_{created_utc.strftime('%y-%m-%d_%H-%M')}.zst")
@ -51,6 +51,18 @@ if __name__ == "__main__":
output_handle.write(json.dumps(obj, sort_keys=True).encode('utf-8'))
output_handle.write(NEWLINE_ENCODED)
log.info(f"{created_utc.strftime('%y-%m-%d_%H-%M')}: {count_objects:,} : {count_minute: ,}")
if created_utc is None:
log.error(f"{file_type}: {input_file} appears to be empty")
sys.exit(1)
log.info(f"{file_type}: {created_utc.strftime('%y-%m-%d_%H-%M')}: {count_objects:,} : {count_minute: ,}")
if output_handle is not None:
output_handle.close()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Take a zst_blocks file and split it by minute chunks")
parser.add_argument('--input', help='Input file', required=True)
parser.add_argument('--output', help='Output folder', required=True)
args = parser.parse_args()
split_by_minutes(args.input, args.output)

View file

@ -7,7 +7,7 @@ from datetime import datetime
import logging.handlers
# put the path to the input file, or a folder of files to process all of
input_file = r"\\MYCLOUDPR4100\Public\askreddit_comments.zst"
input_file = r"\\MYCLOUDPR4100\Public\reddit\subreddits23\wallstreetbets_submissions.zst"
# put the name or path to the output file. The file extension from below will be added automatically. If the input file is a folder, the output will be treated as a folder as well
output_file = r"\\MYCLOUDPR4100\Public\output"
# the format to output in, pick from the following options
@ -29,7 +29,7 @@ write_bad_lines = True
# only output items between these two dates
from_date = datetime.strptime("2005-01-01", "%Y-%m-%d")
to_date = datetime.strptime("2025-12-31", "%Y-%m-%d")
to_date = datetime.strptime("2030-12-31", "%Y-%m-%d")
# the field to filter on, the values to filter with and whether it should be an exact match
# some examples:
@ -75,6 +75,7 @@ to_date = datetime.strptime("2025-12-31", "%Y-%m-%d")
# run the script one last time and now you have a file called "filtered_comments.csv" that only has comments from your submissions above
# if you want only top level comments instead of all comments, you can set field to "parent_id" instead of "link_id"
# change this to field = None if you don't want to filter by anything
field = "body"
values = ['']
# if you have a long list of values, you can put them in a file and put the filename here. If set this overrides the value list above

View file

@ -14,6 +14,13 @@ import json
# the script will look for both comments and submissions files for each subreddit
folder = r"\\MYCLOUDPR4100\Public\reddit\subreddits23"
subreddits = [
"aquarium",
"opiates",
"axolotls",
"piercing",
"titanfolk",
"AskOuija",
"piercing",
"DPH",
"dxm",
]

View file

@ -122,4 +122,10 @@ same_subreddit_bot
SuicideAwarenessBot
thebenshapirobot
these_days_bot
totes_meta_bot
totes_meta_bot
aardBot
gifv-bot
I_Love_You-BOT
imdad_bot
metric_units
YoUaReSoHiLaRiOuS

View file

@ -17,9 +17,9 @@ import logging.handlers
# put the path to the input file
input_file_path = r"\\MYCLOUDPR4100\Public\reddit\subreddits\intel_comments.zst"
input_file_path = r"\\MYCLOUDPR4100\Public\tools\PushshiftDumps\Straight-Wrap-172_submissions.zst"
# put the path to the output file, with the csv extension
output_file_path = r"\\MYCLOUDPR4100\Public\intel_comments.csv"
output_file_path = r"\\MYCLOUDPR4100\Public\Straight-Wrap-172_submissions.csv"
# if you want a custom set of fields, put them in the following list. If you leave it empty the script will use a default set of fields
fields = []