mirror of
https://github.com/Watchful1/PushshiftDumps.git
synced 2025-07-01 18:06:48 -04:00
Add personal scripts to git
This commit is contained in:
parent
e4e8ad480c
commit
3be517ef12
12 changed files with 531 additions and 113 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -1,7 +1,6 @@
|
|||
.idea/*
|
||||
logs/*
|
||||
__pycache__/*
|
||||
personal/*
|
||||
*.db
|
||||
*.ini
|
||||
*.txt
|
1
Pipfile
1
Pipfile
|
@ -8,6 +8,7 @@ zstandard = "*"
|
|||
discord-logging = {editable = true, git = "https://github.com/Watchful1/DiscordLogging.git"}
|
||||
requests = "*"
|
||||
pymongo = {extras = ["srv"], version = "*"}
|
||||
scipy = "*"
|
||||
|
||||
[dev-packages]
|
||||
|
||||
|
|
271
Pipfile.lock
generated
271
Pipfile.lock
generated
|
@ -1,7 +1,7 @@
|
|||
{
|
||||
"_meta": {
|
||||
"hash": {
|
||||
"sha256": "e4f28d9f24e5ee1a4cbc98c2b1f4c21ee3f77e61e24cbbbba195dbd705369d73"
|
||||
"sha256": "7b82a2e74212a0cbf95cc089a5f0752f547e89f374d95e3d84ccf22b1ecbd6bd"
|
||||
},
|
||||
"pipfile-spec": 6,
|
||||
"requires": {
|
||||
|
@ -25,11 +25,11 @@
|
|||
},
|
||||
"charset-normalizer": {
|
||||
"hashes": [
|
||||
"sha256:e019de665e2bcf9c2b64e2e5aa025fa991da8720daa3c1138cadd2fd1856aed0",
|
||||
"sha256:f7af805c321bfa1ce6714c51f254e0d5bb5e5834039bc17db7ebe3a4cec9492b"
|
||||
"sha256:1eecaa09422db5be9e29d7fc65664e6c33bd06f9ced7838578ba40d58bdf3721",
|
||||
"sha256:b0b883e8e874edfdece9c28f314e3dd5badf067342e42fb162203335ae61aa2c"
|
||||
],
|
||||
"markers": "python_version >= '3'",
|
||||
"version": "==2.0.7"
|
||||
"version": "==2.0.9"
|
||||
},
|
||||
"discord-logging": {
|
||||
"editable": true,
|
||||
|
@ -51,121 +51,133 @@
|
|||
"markers": "python_version >= '3'",
|
||||
"version": "==3.3"
|
||||
},
|
||||
"numpy": {
|
||||
"hashes": [
|
||||
"sha256:0b78ecfa070460104934e2caf51694ccd00f37d5e5dbe76f021b1b0b0d221823",
|
||||
"sha256:1247ef28387b7bb7f21caf2dbe4767f4f4175df44d30604d42ad9bd701ebb31f",
|
||||
"sha256:1403b4e2181fc72664737d848b60e65150f272fe5a1c1cbc16145ed43884065a",
|
||||
"sha256:170b2a0805c6891ca78c1d96ee72e4c3ed1ae0a992c75444b6ab20ff038ba2cd",
|
||||
"sha256:2e4ed57f45f0aa38beca2a03b6532e70e548faf2debbeb3291cfc9b315d9be8f",
|
||||
"sha256:32fe5b12061f6446adcbb32cf4060a14741f9c21e15aaee59a207b6ce6423469",
|
||||
"sha256:34f3456f530ae8b44231c63082c8899fe9c983fd9b108c997c4b1c8c2d435333",
|
||||
"sha256:4c9c23158b87ed0e70d9a50c67e5c0b3f75bcf2581a8e34668d4e9d7474d76c6",
|
||||
"sha256:5d95668e727c75b3f5088ec7700e260f90ec83f488e4c0aaccb941148b2cd377",
|
||||
"sha256:615d4e328af7204c13ae3d4df7615a13ff60a49cb0d9106fde07f541207883ca",
|
||||
"sha256:69077388c5a4b997442b843dbdc3a85b420fb693ec8e33020bb24d647c164fa5",
|
||||
"sha256:74b85a17528ca60cf98381a5e779fc0264b4a88b46025e6bcbe9621f46bb3e63",
|
||||
"sha256:81225e58ef5fce7f1d80399575576fc5febec79a8a2742e8ef86d7b03beef49f",
|
||||
"sha256:8890b3360f345e8360133bc078d2dacc2843b6ee6059b568781b15b97acbe39f",
|
||||
"sha256:92aafa03da8658609f59f18722b88f0a73a249101169e28415b4fa148caf7e41",
|
||||
"sha256:9864424631775b0c052f3bd98bc2712d131b3e2cd95d1c0c68b91709170890b0",
|
||||
"sha256:9e6f5f50d1eff2f2f752b3089a118aee1ea0da63d56c44f3865681009b0af162",
|
||||
"sha256:a3deb31bc84f2b42584b8c4001c85d1934dbfb4030827110bc36bfd11509b7bf",
|
||||
"sha256:ad010846cdffe7ec27e3f933397f8a8d6c801a48634f419e3d075db27acf5880",
|
||||
"sha256:b1e2312f5b8843a3e4e8224b2b48fe16119617b8fc0a54df8f50098721b5bed2",
|
||||
"sha256:bc988afcea53e6156546e5b2885b7efab089570783d9d82caf1cfd323b0bb3dd",
|
||||
"sha256:c449eb870616a7b62e097982c622d2577b3dbc800aaf8689254ec6e0197cbf1e",
|
||||
"sha256:c74c699b122918a6c4611285cc2cad4a3aafdb135c22a16ec483340ef97d573c",
|
||||
"sha256:c885bfc07f77e8fee3dc879152ba993732601f1f11de248d4f357f0ffea6a6d4",
|
||||
"sha256:e3c3e990274444031482a31280bf48674441e0a5b55ddb168f3a6db3e0c38ec8",
|
||||
"sha256:e4799be6a2d7d3c33699a6f77201836ac975b2e1b98c2a07f66a38f499cb50ce",
|
||||
"sha256:e6c76a87633aa3fa16614b61ccedfae45b91df2767cf097aa9c933932a7ed1e0",
|
||||
"sha256:e89717274b41ebd568cd7943fc9418eeb49b1785b66031bc8a7f6300463c5898",
|
||||
"sha256:f5162ec777ba7138906c9c274353ece5603646c6965570d82905546579573f73",
|
||||
"sha256:fde96af889262e85aa033f8ee1d3241e32bf36228318a61f1ace579df4e8170d"
|
||||
],
|
||||
"markers": "python_version < '3.11' and python_version >= '3.7'",
|
||||
"version": "==1.21.4"
|
||||
},
|
||||
"pymongo": {
|
||||
"extras": [
|
||||
"srv"
|
||||
],
|
||||
"hashes": [
|
||||
"sha256:02e0c088f189ca69fac094cb5f851b43bbbd7cec42114495777d4d8f297f7f8a",
|
||||
"sha256:138248c542051eb462f88b50b0267bd5286d6661064bab06faa0ef6ac30cdb4b",
|
||||
"sha256:13a7c6d055af58a1e9c505e736da8b6a2e95ccc8cec10b008143f7a536e5de8a",
|
||||
"sha256:13d74bf3435c1e58d8fafccc0d5e87f246ae2c6e9cbef4b35e32a1c3759e354f",
|
||||
"sha256:15dae01341571d0af51526b7a21648ca575e9375e16ba045c9860848dfa8952f",
|
||||
"sha256:17238115e6d37f5423b046cb829f1ca02c4ea7edb163f5b8b88e0c975dc3fec9",
|
||||
"sha256:180b405e17b90a877ea5dbc5efe7f4c171af4c89323148e100c0f12cedb86f12",
|
||||
"sha256:1821ce4e5a293313947fd017bbd2d2535aa6309680fa29b33d0442d15da296ec",
|
||||
"sha256:1a7b138a04fdd17849930dc8bf664002e17db38448850bfb96d200c9c5a8b3a1",
|
||||
"sha256:1c4e51a3b69789b6f468a8e881a13f2d1e8f5e99e41f80fd44845e6ec0f701e1",
|
||||
"sha256:1d55982e5335925c55e2b87467043866ce72bd30ea7e7e3eeed6ec3d95a806d4",
|
||||
"sha256:1fa6f08ddb6975371777f97592d35c771e713ee2250e55618148a5e57e260aff",
|
||||
"sha256:2174d3279b8e2b6d7613b338f684cd78ff7adf1e7ec5b7b7bde5609a129c9898",
|
||||
"sha256:2462a68f6675da548e333fa299d8e9807e00f95a4d198cfe9194d7be69f40c9b",
|
||||
"sha256:25fd76deabe9ea37c8360c362b32f702cc095a208dd1c5328189938ca7685847",
|
||||
"sha256:287c2a0063267c1458c4ddf528b44063ce7f376a6436eea5bccd7f625bbc3b5e",
|
||||
"sha256:2d3abe548a280b49269c7907d5b71199882510c484d680a5ea7860f30c4a695f",
|
||||
"sha256:2fa101bb23619120673899694a65b094364269e597f551a87c4bdae3a474d726",
|
||||
"sha256:2fda3b3fb5c0d159195ab834b322a23808f1b059bcc7e475765abeddee6a2529",
|
||||
"sha256:303531649fa45f96b694054c1aa02f79bda32ef57affe42c5c339336717eed74",
|
||||
"sha256:36806ee53a85c3ba73939652f2ced2961e6a77cfbae385cd83f2e24cd97964b7",
|
||||
"sha256:37a63da5ee623acdf98e6d511171c8a5827a6106b0712c18af4441ef4f11e6be",
|
||||
"sha256:3a2fcbd04273a509fa85285d9eccf17ab65ce440bd4f5e5a58c978e563cd9e9a",
|
||||
"sha256:3b40e36d3036bfe69ba63ec8e746a390721f75467085a0384b528e1dda532c69",
|
||||
"sha256:4168b6c425d783e81723fc3dc382d374a228ff29530436a472a36d9f27593e73",
|
||||
"sha256:444c00ebc20f2f9dc62e34f7dc9453dc2f5f5a72419c8dccad6e26d546c35712",
|
||||
"sha256:45d6b47d70ed44e3c40bef618ed61866c48176e7e5dff80d06d8b1a6192e8584",
|
||||
"sha256:460bdaa3f65ddb5b7474ae08589a1763b5da1a78b8348351b9ba1c63b459d67d",
|
||||
"sha256:47ed77f62c8417a86f9ad158b803f3459a636386cb9d3d4e9e7d6a82d051f907",
|
||||
"sha256:48722e91981bb22a16b0431ea01da3e1cc5b96805634d3b8d3c2a5315c1ce7f1",
|
||||
"sha256:49b0d92724d3fce1174fd30b0b428595072d5c6b14d6203e46a9ea347ae7b439",
|
||||
"sha256:4a2d73a9281faefb273a5448f6d25f44ebd311ada9eb79b6801ae890508fe231",
|
||||
"sha256:4f4bc64fe9cbd70d46f519f1e88c9e4677f7af18ab9cd4942abce2bcfa7549c3",
|
||||
"sha256:5067c04d3b19c820faac6342854d887ade58e8d38c3db79b68c2a102bbb100e7",
|
||||
"sha256:51437c77030bed72d57d8a61e22758e3c389b13fea7787c808030002bb05ca39",
|
||||
"sha256:515e4708d6567901ffc06476a38abe2c9093733f52638235d9f149579c1d3de0",
|
||||
"sha256:5183b698d6542219e4135de583b57bc6286bd37df7f645b688278eb919bfa785",
|
||||
"sha256:56feb80ea1f5334ccab9bd16a5161571ab70392e51fcc752fb8a1dc67125f663",
|
||||
"sha256:573e2387d0686976642142c50740dfc4d3494cc627e2a7d22782b99f70879055",
|
||||
"sha256:58a67b3800476232f9989e533d0244060309451b436d46670a53e6d189f1a7e7",
|
||||
"sha256:5e3833c001a04aa06a28c6fd9628256862a654c09b0f81c07734b5629bc014ab",
|
||||
"sha256:5f5fe59328838fa28958cc06ecf94be585726b97d637012f168bc3c7abe4fd81",
|
||||
"sha256:6235bf2157aa46e53568ed79b70603aa8874baa202d5d1de82fa0eb917696e73",
|
||||
"sha256:63be03f7ae1e15e72a234637ec7941ef229c7ab252c9ff6af48bba1e5418961c",
|
||||
"sha256:65f159c445761cab04b665fc448b3fc008aebc98e54fdcbfd1aff195ef1b1408",
|
||||
"sha256:67e0b2ad3692f6d0335ae231a40de55ec395b6c2e971ad6f55b162244d1ec542",
|
||||
"sha256:68409171ab2aa7ccd6e8e839233e4b8ddeec246383c9a3698614e814739356f9",
|
||||
"sha256:6a96c04ce39d66df60d9ce89f4c254c4967bc7d9e2e2c52adc58f47be826ee96",
|
||||
"sha256:6ead0126fb4424c6c6a4fdc603d699a9db7c03cdb8eac374c352a75fec8a820a",
|
||||
"sha256:6eb6789f26c398c383225e1313c8e75a7d290d323b8eaf65f3f3ddd0eb8a5a3c",
|
||||
"sha256:6f07888e3b73c0dfa46f12d098760494f5f23fd66923a6615edfe486e6a7649c",
|
||||
"sha256:6f0f0a10f128ea0898e607d351ebfabf70941494fc94e87f12c76e2894d8e6c4",
|
||||
"sha256:704879b6a54c45ad76cea7c6789c1ae7185050acea7afd15b58318fa1932ed45",
|
||||
"sha256:7117bfd8827cfe550f65a3c399dcd6e02226197a91c6d11a3540c3e8efc686d6",
|
||||
"sha256:712de1876608fd5d76abc3fc8ec55077278dd5044073fbe9492631c9a2c58351",
|
||||
"sha256:75c7ef67b4b8ec070e7a4740764f6c03ec9246b59d95e2ae45c029d41cb9efa1",
|
||||
"sha256:77dddf596fb065de29fb39992fbc81301f7fd0003be649b7fa7448c77ca53bed",
|
||||
"sha256:7abc87e45b572eb6d17a50422e69a9e5d6f13e691e821fe2312df512500faa50",
|
||||
"sha256:7d8cdd2f070c71366e64990653522cce84b08dc26ab0d1fa19aa8d14ee0cf9ba",
|
||||
"sha256:81ce5f871f5d8e82615c8bd0b34b68a9650204c8b1a04ce7890d58c98eb66e39",
|
||||
"sha256:837cdef094f39c6f4a2967abc646a412999c2540fbf5d3cce1dd3b671f4b876c",
|
||||
"sha256:849e641cfed05c75d772f9e9018f42c5fbd00655d43d52da1b9c56346fd3e4cc",
|
||||
"sha256:87114b995506e7584cf3daf891e419b5f6e7e383e7df6267494da3a76312aa22",
|
||||
"sha256:87db421c9eb915b8d9a9a13c5b2ee338350e36ee83e26ff0adfc48abc5db3ac3",
|
||||
"sha256:8851544168703fb519e95556e3b463fca4beeef7ed3f731d81a68c8268515d9d",
|
||||
"sha256:891f541c7ed29b95799da0cd249ae1db1842777b564e8205a197b038c5df6135",
|
||||
"sha256:8f87f53c9cd89010ae45490ec2c963ff18b31f5f290dc08b04151709589fe8d9",
|
||||
"sha256:9641be893ccce7d192a0094efd0a0d9f1783a1ebf314b4128f8a27bfadb8a77c",
|
||||
"sha256:979e34db4f3dc5710c18db437aaf282f691092b352e708cb2afd4df287698c76",
|
||||
"sha256:9b62d84478f471fdb0dcea3876acff38f146bd23cbdbed15074fb4622064ec2e",
|
||||
"sha256:a472ca3d43d33e596ff5836c6cc71c3e61be33f44fe1cfdab4a1100f4af60333",
|
||||
"sha256:a5dbeeea6a375fbd79448b48a54c46fc9351611a03ef8398d2a40b684ce46194",
|
||||
"sha256:a7430f3987d232e782304c109be1d0e6fff46ca6405cb2479e4d8d08cd29541e",
|
||||
"sha256:a81e52dbf95f236a0c89a5abcd2b6e1331da0c0312f471c73fae76c79d2acf6b",
|
||||
"sha256:aa434534cc91f51a85e3099dc257ee8034b3d2be77f2ca58fb335a686e3a681f",
|
||||
"sha256:ab27d6d7d41a66d9e54269a290d27cd5c74f08e9add0054a754b4821026c4f42",
|
||||
"sha256:adb37bf22d25a51b84d989a2a5c770d4514ac590201eea1cb50ce8c9c5257f1d",
|
||||
"sha256:afb16330ab6efbbf995375ad94e970fa2f89bb46bd10d854b7047620fdb0d67d",
|
||||
"sha256:b1b06038c9940a49c73db0aeb0f6809b308e198da1326171768cf68d843af521",
|
||||
"sha256:b1e6d1cf4bd6552b5f519432cce1530c09e6b0aab98d44803b991f7e880bd332",
|
||||
"sha256:bf2d9d62178bb5c05e77d40becf89c309b1966fbcfb5c306238f81bf1ec2d6a2",
|
||||
"sha256:bfd073fea04061019a103a288847846b5ef40dfa2f73b940ed61e399ca95314f",
|
||||
"sha256:c04e84ccf590933a266180286d8b6a5fc844078a5d934432628301bd8b5f9ca7",
|
||||
"sha256:c0947d7be30335cb4c3d5d0983d8ebc8294ae52503cf1d596c926f7e7183900b",
|
||||
"sha256:c2a17752f97a942bdb4ff4a0516a67c5ade1658ebe1ab2edacdec0b42e39fa75",
|
||||
"sha256:c4653830375ab019b86d218c749ad38908b74182b2863d09936aa8d7f990d30e",
|
||||
"sha256:c660fd1e4a4b52f79f7d134a3d31d452948477b7f46ff5061074a534c5805ba6",
|
||||
"sha256:cb48ff6cc6109190e1ccf8ea1fc71cc244c9185813ce7d1c415dce991cfb8709",
|
||||
"sha256:cef2675004d85d85a4ccc24730b73a99931547368d18ceeed1259a2d9fcddbc1",
|
||||
"sha256:d1b98539b0de822b6f717498e59ae3e5ae2e7f564370ab513e6d0c060753e447",
|
||||
"sha256:d6c6989c10008ac70c2bb2ad2b940fcfe883712746c89f7e3308c14c213a70d7",
|
||||
"sha256:db3efec9dcecd96555d752215797816da40315d61878f90ca39c8e269791bf17",
|
||||
"sha256:dc4749c230a71b34db50ac2481d9008bb17b67c92671c443c3b40e192fbea78e",
|
||||
"sha256:dcf906c1f7a33e4222e4bff18da1554d69323bc4dd95fe867a6fa80709ee5f93",
|
||||
"sha256:e2bccadbe313b11704160aaba5eec95d2da1aa663f02f41d2d1520d02bbbdcd5",
|
||||
"sha256:e30cce3cc86d6082c8596b3fbee0d4f54bc4d337a4fa1bf536920e2e319e24f0",
|
||||
"sha256:e5d6428b8b422ba5205140e8be11722fa7292a0bedaa8bc80fb34c92eb19ba45",
|
||||
"sha256:e841695b5dbea38909ab2dbf17e91e9a823412d8d88d1ef77f1b94a7bc551c0f",
|
||||
"sha256:eb65ec0255a0fccc47c87d44e505ef5180bfd71690bd5f84161b1f23949fb209",
|
||||
"sha256:ed20ec5a01c43254f6047c5d8124b70d28e39f128c8ad960b437644fe94e1827",
|
||||
"sha256:ed751a20840a31242e7bea566fcf93ba75bc11b33afe2777bbf46069c1af5094",
|
||||
"sha256:ef8b927813c27c3bdfc82c55682d7767403bcdadfd9f9c0fc49f4be4553a877b",
|
||||
"sha256:f43cacda46fc188f998e6d308afe1c61ff41dcb300949f4cbf731e9a0a5eb2d3",
|
||||
"sha256:f44bea60fd2178d7153deef9621c4b526a93939da30010bba24d3408a98b0f79",
|
||||
"sha256:fcc021530b7c71069132fe4846d95a3cdd74d143adc2f7e398d5fabf610f111c",
|
||||
"sha256:fe16517b275031d61261a4e3941c411fb7c46a9cd012f02381b56e7907cc9e06",
|
||||
"sha256:fe3ae4294d593da54862f0140fdcc89d1aeeb94258ca97f094119ed7f0e5882d"
|
||||
"sha256:0238e53b452ab699b5e2e3f8af2557844c80ab0d0c7a0e066226882838e72756",
|
||||
"sha256:0271bbba36bb130202e011171c1883c4c193036ad0b1e02ecfbea6837790b7de",
|
||||
"sha256:069d49b193f94bb1d748cfd8faf697060a2299f40d86bf5b6d41dd3cedf0fd48",
|
||||
"sha256:06af6e6374ee2bb70f724e09ddf9402907a3d6714828b908737948cd83e5685c",
|
||||
"sha256:0c77cd3dbe0dd9e7cdf8c93dc24e5a4fcb56e115ffb259d4f399e4aaf3f3c62d",
|
||||
"sha256:0e9a2628bcd896368ede456bcfe189d9ca65b18fb0dd91974cb734baf2e24af9",
|
||||
"sha256:12d336bdbe60982de55651be397b5e49d7eadd2aa144f11da353002cd52502ed",
|
||||
"sha256:132cc67b909835d7c230888387b4cc9596d4559a3ce90d947e03bc0b0ffe420b",
|
||||
"sha256:13d0624c13a91da71fa0d960205d93b3d98344481be865ee7cc238c972d41d73",
|
||||
"sha256:1498f388181ae5592c7b60549faaefaffc62d6e3754097576611cb642d21d37b",
|
||||
"sha256:1617fd52da7b208fe5ea176d251dd7cf1b5309e5a4272754b9599edfdf7e64e5",
|
||||
"sha256:177ed1b14aa4f84f00ebef1b0f785680fbaa610361942b23eb54f562fe4c6b34",
|
||||
"sha256:186b2ff4518c1c169fcef5047deb0e6c13a2354d143859587e745fd9f2cf68e9",
|
||||
"sha256:1ba8eb426d56556fffec53d600a8f2572589c19d50b30f61daa8f4d72ab92fbe",
|
||||
"sha256:1c153274699424e8f89f2097d5113f8cbe7898a8d62afaad0270a0f0bd0af53b",
|
||||
"sha256:1fd71b4d7070b01c7f66edc44c1ec2f8bcace2761c3a6ecc10449a40e474d2fa",
|
||||
"sha256:28afb00423e521f4b04fb8f75da7c0215e46631e821e27abf5a7176f9b671f47",
|
||||
"sha256:349c8e522e0b785f442fc9d7fc01c59f7f13f1abe9395310d0d817cff03ec034",
|
||||
"sha256:35a5843546bcbe0422f30b4b2bd5e0b630b04cc4006492c70e8168a921d94b9e",
|
||||
"sha256:38b21eddd021a943b1978b0a3d42e974956a338e3dbb88d56aeb8b8799abd6e8",
|
||||
"sha256:3a4eb0a4db8a2d960bdd5354f05e2e57530e83d333cb644fb2b7120a7a954a69",
|
||||
"sha256:40269fe6bb79fe00c8ba7c2f2d542a82711eb234c3dedb90b7e489386120e9d1",
|
||||
"sha256:426584e99af31ad2398e617c3eb0f1ebcda37f0ffb2d3e56087cdaf23a2f1689",
|
||||
"sha256:47a58f15fc70198cf95982f9699e17fec12287b90f30e90c5e2b7c1c1bc07914",
|
||||
"sha256:512059a902ea2cbcd0afac370af580e67ccd4c7e41ecaff0f0fbd03653b25ca2",
|
||||
"sha256:51664dac8d9b138259876f324adca5ab31d991acf88d1d0ffcc94f423ff2e31b",
|
||||
"sha256:59a4a5fe5379e4fa93380fd0b55bccbdbeb8d04fcfbbad8b42bd31610d5ed3ad",
|
||||
"sha256:5cbfa85a12cfe3dca21951cd432051c505ac461bd9f4a635207d982dd9df2373",
|
||||
"sha256:5fea4207fec8909e155a7948c987eac61949dbbe97fd0c388e587d06ba9bc78d",
|
||||
"sha256:6183476860511cb553a7e4c40936221b6985af7852029c84df898370ec8a028c",
|
||||
"sha256:62459b91a513a7b441cfd70ea7fd15c50b858877ca823915d32bab08fe173edb",
|
||||
"sha256:633ca2001f80900142068bab907feca99554b557ac105c74a9ed157ed38ca5d6",
|
||||
"sha256:65f8a93816dcb2202710839907759aca9eece94d9f13215686f224fcc8966f9e",
|
||||
"sha256:686c40344f7f82c4deaa4e17aa46ad97df51263be1434aeedd2d6b6f38c7f44a",
|
||||
"sha256:6cd7a4321e718cb98a7c7c475b0757e77fdaf1cdb013d7d2e781ba45219e1144",
|
||||
"sha256:6f0605b1146bc24c720aac0e806492144aea9d5a4dc956589e0544301862756a",
|
||||
"sha256:716499113650aacfe1b94d37e0a863f1e84b8d47737c74a2f44f8dfccad46952",
|
||||
"sha256:71810eade75ae1c466adc158d1fa8141040f75427b76240316d97f3c89edd72f",
|
||||
"sha256:72a0c06b76b254bdec18af9add3b8d35796dda51e64a5e0e48d40bff7b41ab13",
|
||||
"sha256:7450b25a803b0f57dae4c3fbd0df742f7f3344c3c9cabb86e4180083c3ebd893",
|
||||
"sha256:75e449ab068af63b7729195343315bc63d242166d88467314be182cc54ce235d",
|
||||
"sha256:7629abba158610cb5db6c22041b287f9398555d72bf9468d44d2efc03d837b81",
|
||||
"sha256:774b9f48bdc385af6654def31e7a7617e01b99cc8aaca1ab3ef6ea0492205e57",
|
||||
"sha256:7a091050bb8d54a5200193b4998e0cf763d083f93d97c7780963c09996f85a38",
|
||||
"sha256:7bdb66340e246b5dcddfcfe79a63ac2ec3808dc394853476f49fc785425040f4",
|
||||
"sha256:812650a2e8a08b812d6a3c937f482bd2c9355e90574964fa283b4d8ef4ae665e",
|
||||
"sha256:84eec41ed982f21ceb58689e16a630a70301eb14499c929388a5bf6464518d9d",
|
||||
"sha256:86d0e28dd5867153d9d9963a4eb17764854a925758fc2db0a814260f82fd4319",
|
||||
"sha256:87dce7c85387ca033cf76cce773ace7675550dcffc456db32a34403439e53e45",
|
||||
"sha256:8869feff59f08cd63979df26aa12343a85bdc7fbd1b79fda8ae39f31a310fa62",
|
||||
"sha256:8baf23d6a0a08b697854e5bcdf82afb91da732cf575fd47ee93945c3654132d8",
|
||||
"sha256:8da525765dbcc1b7abf1bba623f9f701d8759a8fb19594cd71a13b7b0c2c56bd",
|
||||
"sha256:9043bfb816ed50d831acc8d06469dcc41597b4f50c30e62227a93f9f9e37d6c7",
|
||||
"sha256:91c049104b51321e4e18d41edc6850d9f0890ac609b3cb3b8db86dc51666de17",
|
||||
"sha256:93c25fbb5dbc436edbb74101f4da49a42bd3af534513fdf8e75fc72ef035d5e0",
|
||||
"sha256:953129b6b952a9d22042ac23050053444624f630e1928f5f590788660905fa9c",
|
||||
"sha256:9ff0dbec451a2c6226bbd6f2bbbde438bc263e002f3f97d151c8708732ba5197",
|
||||
"sha256:a47f4b24b1360da172cae07ce90e9bd425b6db0052d76142c7fef47173a27283",
|
||||
"sha256:a57e271a0647002b5683dd0c7c2fd7f5fb939357c44396d85298e51a3561b9e3",
|
||||
"sha256:b0606d14892ae2a2b1450e37c8924381e9b64683386a9853e4467f02fd5b44b6",
|
||||
"sha256:b73ff8582964f52ab1bf1a9fdddc1454143172a0b8a9d7d6e3972dd1134f7982",
|
||||
"sha256:bf6047dea1bc8ae19fc14e01b5cb70b3810f91b100d9a535751dd3eadcd3016c",
|
||||
"sha256:c0efc5ab7d9b9e64726496bf650dbc7f1754124a48d076e5292cc5306e61a530",
|
||||
"sha256:c86a0614eda95db036fae01a89f3917d7abdc657c806bac2a32eec74724d9330",
|
||||
"sha256:c878286b1464f462616a47f315d14f02f03512c6b81cb568e996c3f1f79bff8a",
|
||||
"sha256:cd4cde3dfdd347d638171eca53ee6e787d4b1247c6e182f8616039b1df6278d5",
|
||||
"sha256:ceb9a4986f56595e73fffeef3ec037280eda938ed5fe6e4e0961656669d89b32",
|
||||
"sha256:d419e2dbc4943ad6df7ee05e707d7b2c2b512b92407bb6ff643bccbdea399c3a",
|
||||
"sha256:d66462f740dcea496bd779775688a0f805860f0b01998bb59ca22566b098ee26",
|
||||
"sha256:d7514231a03e95072b32d9b335b96253799802ab94647ce83585d5010749380a",
|
||||
"sha256:d9f61b08b60909d936c1f3a4e12c163ca71fd1a4665fc6e078afc6f54f886977",
|
||||
"sha256:da576e59f5f8a642ee26d027479325a45be45defe075b6fa7c84506dabc76883",
|
||||
"sha256:ddaf391ba74eef47eb5afbc40d0b6ddcdbdb417ec8edc8ae95352d25485076db",
|
||||
"sha256:e2b6a323ca545bcb4286d14c0bd75d9a1f5bce2fa1d7fa3621e5f71fd9b8d196",
|
||||
"sha256:e3f6faea65a73ed54111f209b4a411fe012c68f04e8bde96dd7af89b13cac92b",
|
||||
"sha256:e4e36810c541bd1976cd05452e797860b775886cf32c3e8136b9fe48c2c8ba95",
|
||||
"sha256:e5441f4c8142a250695e249e432637c14f79d856a2b60e0974da082e006c53e2",
|
||||
"sha256:e7aedefc87cb46544a3865a19c1d5ca7ddf5ec5ed7dfe162d9538d7543aef499",
|
||||
"sha256:ee2c1fd5bd57fd0092dfa31c1f9f166cf2850f191311603ce343cadcc8608d60",
|
||||
"sha256:f2b6e12f98cce588525f3db802c88f9795d294549ebfe7c2c9bb81333f533ecd",
|
||||
"sha256:f333c0d71dd892683e608f8d1731785a0aa67b1ec012b0d9fc863e8d7224f64e",
|
||||
"sha256:f3e20eb096deea92350f7198a4287d45883a62fe4459d027ce789e72ceba12ee",
|
||||
"sha256:f785375ca2b4e2192786f1e0d2a94c66900d12e780ebae1eccbbab85eb9a7054"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==3.12.1"
|
||||
"version": "==4.0.1"
|
||||
},
|
||||
"requests": {
|
||||
"hashes": [
|
||||
|
@ -175,6 +187,41 @@
|
|||
"index": "pypi",
|
||||
"version": "==2.26.0"
|
||||
},
|
||||
"scipy": {
|
||||
"hashes": [
|
||||
"sha256:033ce76ed4e9f62923e1f8124f7e2b0800db533828c853b402c7eec6e9465d80",
|
||||
"sha256:173308efba2270dcd61cd45a30dfded6ec0085b4b6eb33b5eb11ab443005e088",
|
||||
"sha256:21b66200cf44b1c3e86495e3a436fc7a26608f92b8d43d344457c54f1c024cbc",
|
||||
"sha256:2c56b820d304dffcadbbb6cbfbc2e2c79ee46ea291db17e288e73cd3c64fefa9",
|
||||
"sha256:304dfaa7146cffdb75fbf6bb7c190fd7688795389ad060b970269c8576d038e9",
|
||||
"sha256:3f78181a153fa21c018d346f595edd648344751d7f03ab94b398be2ad083ed3e",
|
||||
"sha256:4d242d13206ca4302d83d8a6388c9dfce49fc48fdd3c20efad89ba12f785bf9e",
|
||||
"sha256:5d1cc2c19afe3b5a546ede7e6a44ce1ff52e443d12b231823268019f608b9b12",
|
||||
"sha256:5f2cfc359379c56b3a41b17ebd024109b2049f878badc1e454f31418c3a18436",
|
||||
"sha256:65bd52bf55f9a1071398557394203d881384d27b9c2cad7df9a027170aeaef93",
|
||||
"sha256:7edd9a311299a61e9919ea4192dd477395b50c014cdc1a1ac572d7c27e2207fa",
|
||||
"sha256:8499d9dd1459dc0d0fe68db0832c3d5fc1361ae8e13d05e6849b358dc3f2c279",
|
||||
"sha256:866ada14a95b083dd727a845a764cf95dd13ba3dc69a16b99038001b05439709",
|
||||
"sha256:87069cf875f0262a6e3187ab0f419f5b4280d3dcf4811ef9613c605f6e4dca95",
|
||||
"sha256:93378f3d14fff07572392ce6a6a2ceb3a1f237733bd6dcb9eb6a2b29b0d19085",
|
||||
"sha256:95c2d250074cfa76715d58830579c64dff7354484b284c2b8b87e5a38321672c",
|
||||
"sha256:ab5875facfdef77e0a47d5fd39ea178b58e60e454a4c85aa1e52fcb80db7babf",
|
||||
"sha256:b0e0aeb061a1d7dcd2ed59ea57ee56c9b23dd60100825f98238c06ee5cc4467e",
|
||||
"sha256:b78a35c5c74d336f42f44106174b9851c783184a85a3fe3e68857259b37b9ffb",
|
||||
"sha256:c9e04d7e9b03a8a6ac2045f7c5ef741be86727d8f49c45db45f244bdd2bcff17",
|
||||
"sha256:ca36e7d9430f7481fc7d11e015ae16fbd5575615a8e9060538104778be84addf",
|
||||
"sha256:ceebc3c4f6a109777c0053dfa0282fddb8893eddfb0d598574acfb734a926168",
|
||||
"sha256:e2c036492e673aad1b7b0d0ccdc0cb30a968353d2c4bf92ac8e73509e1bf212c",
|
||||
"sha256:eb326658f9b73c07081300daba90a8746543b5ea177184daed26528273157294",
|
||||
"sha256:eb7ae2c4dbdb3c9247e07acc532f91077ae6dbc40ad5bd5dca0bb5a176ee9bda",
|
||||
"sha256:edad1cf5b2ce1912c4d8ddad20e11d333165552aba262c882e28c78bbc09dbf6",
|
||||
"sha256:eef93a446114ac0193a7b714ce67659db80caf940f3232bad63f4c7a81bc18df",
|
||||
"sha256:f7eaea089345a35130bc9a39b89ec1ff69c208efa97b3f8b25ea5d4c41d88094",
|
||||
"sha256:f99d206db1f1ae735a8192ab93bd6028f3a42f6fa08467d37a14eb96c9dd34a3"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==1.7.3"
|
||||
},
|
||||
"urllib3": {
|
||||
"hashes": [
|
||||
"sha256:4987c65554f7a2dbf30c18fd48778ef124af6fab771a377103da0585e2336ece",
|
||||
|
|
21
personal/comments_per_day.py
Normal file
21
personal/comments_per_day.py
Normal file
|
@ -0,0 +1,21 @@
|
|||
import utils
|
||||
import discord_logging
|
||||
from datetime import datetime
|
||||
|
||||
log = discord_logging.init_logging()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
day = None
|
||||
day_comments = 0
|
||||
for comment in utils.read_obj_zst(r"\\MYCLOUDPR4100\Public\reddit_final\wallstreetbets_comments.zst"):
|
||||
created_day = datetime.utcfromtimestamp(int(comment['created_utc'])).strftime("%m/%d/%y")
|
||||
if day is None:
|
||||
day = created_day
|
||||
if day != created_day:
|
||||
log.info(f"{day} {day_comments}")
|
||||
day_comments = 0
|
||||
day = created_day
|
||||
day_comments += 1
|
||||
|
||||
log.info(f"{day} {day_comments}")
|
33
personal/compare_lines.py
Normal file
33
personal/compare_lines.py
Normal file
|
@ -0,0 +1,33 @@
|
|||
import utils
|
||||
import discord_logging
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime
|
||||
|
||||
log = discord_logging.init_logging()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
file_one = open(r"\\MYCLOUDPR4100\Public\reddit_final\RelationshipsOver35_comments_dump.txt", 'r')
|
||||
file_two = open(r"\\MYCLOUDPR4100\Public\reddit_final\RelationshipsOver35_comments_mongo.txt", 'r')
|
||||
|
||||
file_lines = 0
|
||||
while True:
|
||||
file_lines += 1
|
||||
line_one = file_one.readline().rstrip()
|
||||
line_two = file_two.readline().rstrip()
|
||||
if line_one != line_two:
|
||||
log.info(f"lines not matching: {file_lines}")
|
||||
log.info(line_one)
|
||||
log.info(line_two)
|
||||
#break
|
||||
|
||||
if file_lines % 100000 == 0:
|
||||
log.info(f"{file_lines:,}")
|
||||
|
||||
if not line_one:
|
||||
break
|
||||
|
||||
log.info(f"{file_lines:,}")
|
||||
file_one.close()
|
||||
file_two.close()
|
33
personal/count_by_subreddit.py
Normal file
33
personal/count_by_subreddit.py
Normal file
|
@ -0,0 +1,33 @@
|
|||
import utils
|
||||
import discord_logging
|
||||
import os
|
||||
from datetime import datetime
|
||||
|
||||
log = discord_logging.init_logging()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
subreddits = {}
|
||||
object_type = "submissions"
|
||||
folder = f"\\\\MYCLOUDPR4100\\Public\\reddit_final\\{object_type}"
|
||||
if not os.path.exists(folder):
|
||||
os.makedirs(folder)
|
||||
input_file = f"\\\\MYCLOUDPR4100\\Public\\reddit_final\\relationships_{object_type}.zst"
|
||||
input_file_size = os.stat(input_file).st_size
|
||||
total_lines = 0
|
||||
for comment, line, file_bytes_processed in utils.read_obj_zst_meta(input_file):
|
||||
if comment['subreddit'] not in subreddits:
|
||||
subreddits[comment['subreddit']] = {'writer': utils.OutputZst(os.path.join(folder, comment['subreddit'] + f"_{object_type}.zst")), 'lines': 0}
|
||||
subreddit = subreddits[comment['subreddit']]
|
||||
subreddit['writer'].write(line)
|
||||
subreddit['writer'].write("\n")
|
||||
subreddit['lines'] += 1
|
||||
total_lines += 1
|
||||
if total_lines % 100000 == 0:
|
||||
log.info(f"{total_lines:,} lines, {(file_bytes_processed / input_file_size) * 100:.0f}%")
|
||||
|
||||
log.info(f"{total_lines:,} lines, 100%")
|
||||
|
||||
for name, subreddit in subreddits.items():
|
||||
log.info(f"r/{name}: {subreddit['lines']:,} lines")
|
||||
subreddit['writer'].close()
|
47
personal/export_mongo.py
Normal file
47
personal/export_mongo.py
Normal file
|
@ -0,0 +1,47 @@
|
|||
import json
|
||||
|
||||
import utils
|
||||
import discord_logging
|
||||
import pymongo
|
||||
import time
|
||||
import sys
|
||||
|
||||
log = discord_logging.init_logging()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
mongo_address = sys.argv[1] # 192.168.1.131
|
||||
client = pymongo.MongoClient(f"mongodb://{mongo_address}:27017", serverSelectionTimeoutMS=5000)
|
||||
log.info(f"Database connected at {mongo_address} on {client.admin.command('serverStatus')['host']}")
|
||||
|
||||
count = 0
|
||||
start_time = time.time()
|
||||
cursor = client.reddit_database.comments.find(
|
||||
filter={"subreddit": "RelationshipsOver35"},
|
||||
projection={'_id': False},
|
||||
sort=[('created_utc', pymongo.ASCENDING)]
|
||||
)
|
||||
log.info(f"Got cursor in {int(time.time() - start_time)} seconds")
|
||||
|
||||
output_writer = utils.OutputZst(r"\\MYCLOUDPR4100\Public\reddit_final\RelationshipsOver35_comments.zst")
|
||||
start_time = time.time()
|
||||
for comment in cursor:
|
||||
count += 1
|
||||
output_writer.write(json.dumps(comment, separators=(',', ':')))
|
||||
output_writer.write("\n")
|
||||
if count % 100000 == 0:
|
||||
log.info(f"{count,} in {int(time.time() - start_time)} seconds")
|
||||
|
||||
output_writer.close()
|
||||
log.info(f"{count,} in {int(time.time() - start_time)} seconds")
|
||||
|
||||
|
||||
# db.comments.createIndex({subreddit:1}) // remove
|
||||
# db.comments.createIndex({subreddit:1, created_utc:1})
|
||||
# db.comments.createIndex({author:1, created_utc:1})
|
||||
# db.comments.createIndex({id:1})
|
||||
# db.submissions.createIndex({subreddit:1, created_utc:1})
|
||||
# db.submissions.createIndex({author:1, created_utc:1})
|
||||
# db.submissions.createIndex({id:1})
|
||||
# db.submissions.createIndex({created_utc:1})
|
||||
# db.comments.createIndex({created_utc:1})
|
57
personal/group_subs.py
Normal file
57
personal/group_subs.py
Normal file
|
@ -0,0 +1,57 @@
|
|||
import json
|
||||
from datetime import datetime
|
||||
import utils
|
||||
import discord_logging
|
||||
import pymongo
|
||||
import time
|
||||
import sys
|
||||
|
||||
log = discord_logging.init_logging()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
mongo_address = sys.argv[1] # 192.168.1.131
|
||||
client = pymongo.MongoClient(f"mongodb://{mongo_address}:27017", serverSelectionTimeoutMS=5000)
|
||||
log.info(f"Database connected at {mongo_address} on {client.admin.command('serverStatus')['host']}")
|
||||
|
||||
count = 0
|
||||
start_time = time.time()
|
||||
start_date = int(datetime(2021, 6, 1).timestamp())
|
||||
cursor = client.reddit_database.submissions.aggregate(
|
||||
[
|
||||
{"$match": {"created_utc": {"$gt": start_date}}},
|
||||
{"$project": {"subreddit": 1, "over_18": {"$cond": ["$over_18", 1, 0]}}},
|
||||
{"$group": {"_id": "$subreddit", "countTotal": {"$count": {}}, "countNsfw": {"$sum": "$over_18"}}},
|
||||
{"$match": {"countTotal": {"$gt": 100}}},
|
||||
],
|
||||
allowDiskUse=True
|
||||
)
|
||||
log.info(f"Got cursor in {int(time.time() - start_time)} seconds")
|
||||
|
||||
start_time = time.time()
|
||||
subreddits = []
|
||||
for subreddit in cursor:
|
||||
subreddit['percent'] = int((subreddit['countNsfw']/subreddit['countTotal'])*100)
|
||||
if subreddit['percent'] >= 10:
|
||||
subreddits.append(subreddit)
|
||||
count += 1
|
||||
if count % 100000 == 0:
|
||||
log.info(f"{count:,} in {int(time.time() - start_time)} seconds")
|
||||
|
||||
log.info(f"{count:,} in {int(time.time() - start_time)} seconds")
|
||||
|
||||
file_out = open(r"\\MYCLOUDPR4100\Public\reddit_final\subreddits.txt", 'w')
|
||||
for subreddit in sorted(subreddits, key=lambda item: (item['percent'], item['countTotal']), reverse=True):
|
||||
file_out.write(f"{subreddit['_id']: <22}{subreddit['countTotal']: <8}{subreddit['countNsfw']: <8}{subreddit['percent']}%\n")
|
||||
file_out.close()
|
||||
|
||||
|
||||
# db.comments.createIndex({subreddit:1}) // remove
|
||||
# db.comments.createIndex({subreddit:1, created_utc:1})
|
||||
# db.comments.createIndex({author:1, created_utc:1})
|
||||
# db.comments.createIndex({id:1})
|
||||
# db.submissions.createIndex({subreddit:1, created_utc:1})
|
||||
# db.submissions.createIndex({author:1, created_utc:1})
|
||||
# db.submissions.createIndex({id:1})
|
||||
# db.submissions.createIndex({created_utc:1})
|
||||
# db.comments.createIndex({created_utc:1})
|
62
personal/insert_mongo.py
Normal file
62
personal/insert_mongo.py
Normal file
|
@ -0,0 +1,62 @@
|
|||
import utils
|
||||
import discord_logging
|
||||
import os
|
||||
import pymongo
|
||||
import sys
|
||||
from datetime import datetime
|
||||
|
||||
log = discord_logging.init_logging()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
mongo_address = sys.argv[1] # 192.168.1.131
|
||||
client = pymongo.MongoClient(f"mongodb://{mongo_address}:27017", serverSelectionTimeoutMS=5000)
|
||||
|
||||
log.info(f"Database connected at {mongo_address} on {client.admin.command('serverStatus')['host']}")
|
||||
|
||||
object_type = sys.argv[2]
|
||||
input_folder = sys.argv[3]
|
||||
input_files = []
|
||||
total_size = 0
|
||||
for subdir, dirs, files in os.walk(input_folder + os.sep + object_type):
|
||||
files.sort()
|
||||
for filename in files:
|
||||
input_path = os.path.join(subdir, filename)
|
||||
if input_path.endswith(".zst"):
|
||||
file_size = os.stat(input_path).st_size
|
||||
total_size += file_size
|
||||
input_files.append([input_path, file_size])
|
||||
|
||||
log.info(f"Processing {len(input_files)} files of {(total_size / (2 ** 30)):.2f} gigabytes")
|
||||
|
||||
collection = client.reddit_database[object_type]
|
||||
|
||||
log.info(f"Using collection {object_type} which has {collection.estimated_document_count()} objects already")
|
||||
|
||||
total_lines = 0
|
||||
total_bytes_processed = 0
|
||||
for input_file in input_files:
|
||||
file_lines = 0
|
||||
file_bytes_processed = 0
|
||||
created = None
|
||||
inserts = []
|
||||
for obj, line, file_bytes_processed in utils.read_obj_zst_meta(input_file[0]):
|
||||
inserts.append(obj)
|
||||
if len(inserts) >= 10000:
|
||||
collection.insert_many(inserts)
|
||||
inserts = []
|
||||
|
||||
created = datetime.utcfromtimestamp(int(obj['created_utc']))
|
||||
file_lines += 1
|
||||
if file_lines == 1:
|
||||
log.info(f"{created.strftime('%Y-%m-%d %H:%M:%S')} : {file_lines + total_lines:,} : 0% : {(total_bytes_processed / total_size) * 100:.0f}%")
|
||||
if file_lines % 100000 == 0:
|
||||
log.info(f"{created.strftime('%Y-%m-%d %H:%M:%S')} : {file_lines + total_lines:,} : {(file_bytes_processed / input_file[1]) * 100:.0f}% : {(total_bytes_processed / total_size) * 100:.0f}%")
|
||||
|
||||
if len(inserts) >= 0:
|
||||
collection.insert_many(inserts)
|
||||
total_lines += file_lines
|
||||
total_bytes_processed += input_file[1]
|
||||
log.info(f"{created.strftime('%Y-%m-%d %H:%M:%S')} : {total_lines:,} : 100% : {(total_bytes_processed / total_size) * 100:.0f}%")
|
||||
|
||||
log.info(f"Total: {total_lines}")
|
33
personal/split_by_subreddit.py
Normal file
33
personal/split_by_subreddit.py
Normal file
|
@ -0,0 +1,33 @@
|
|||
import utils
|
||||
import discord_logging
|
||||
import os
|
||||
from datetime import datetime
|
||||
|
||||
log = discord_logging.init_logging()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
subreddits = {}
|
||||
object_type = "submissions"
|
||||
folder = f"\\\\MYCLOUDPR4100\\Public\\reddit_final\\{object_type}"
|
||||
if not os.path.exists(folder):
|
||||
os.makedirs(folder)
|
||||
input_file = f"\\\\MYCLOUDPR4100\\Public\\reddit_final\\relationships_{object_type}.zst"
|
||||
input_file_size = os.stat(input_file).st_size
|
||||
total_lines = 0
|
||||
for comment, line, file_bytes_processed in utils.read_obj_zst_meta(input_file):
|
||||
if comment['subreddit'] not in subreddits:
|
||||
subreddits[comment['subreddit']] = {'writer': utils.OutputZst(os.path.join(folder, comment['subreddit'] + f"_{object_type}.zst")), 'lines': 0}
|
||||
subreddit = subreddits[comment['subreddit']]
|
||||
subreddit['writer'].write(line)
|
||||
subreddit['writer'].write("\n")
|
||||
subreddit['lines'] += 1
|
||||
total_lines += 1
|
||||
if total_lines % 100000 == 0:
|
||||
log.info(f"{total_lines:,} lines, {(file_bytes_processed / input_file_size) * 100:.0f}%")
|
||||
|
||||
log.info(f"{total_lines:,} lines, 100%")
|
||||
|
||||
for name, subreddit in subreddits.items():
|
||||
log.info(f"r/{name}: {subreddit['lines']:,} lines")
|
||||
subreddit['writer'].close()
|
25
personal/test_file.py
Normal file
25
personal/test_file.py
Normal file
|
@ -0,0 +1,25 @@
|
|||
import utils
|
||||
import discord_logging
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime
|
||||
|
||||
log = discord_logging.init_logging()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
file_path = r"\\MYCLOUDPR4100\Public\reddit\submissions\RS_2011-01.zst"
|
||||
file_size = os.stat(file_path).st_size
|
||||
|
||||
file_lines = 0
|
||||
file_bytes_processed = 0
|
||||
created = None
|
||||
inserts = []
|
||||
for obj, line, file_bytes_processed in utils.read_obj_zst_meta(file_path):
|
||||
created = datetime.utcfromtimestamp(int(obj['created_utc']))
|
||||
file_lines += 1
|
||||
if file_lines % 100000 == 0:
|
||||
log.info(f"{created.strftime('%Y-%m-%d %H:%M:%S')} : {file_lines:,} : {(file_bytes_processed / file_size) * 100:.0f}%")
|
||||
|
||||
log.info(f"{created.strftime('%Y-%m-%d %H:%M:%S')} : {file_lines:,} : 100%")
|
||||
|
60
personal/utils.py
Normal file
60
personal/utils.py
Normal file
|
@ -0,0 +1,60 @@
|
|||
import zstandard
|
||||
import json
|
||||
|
||||
|
||||
def read_obj_zst(file_name):
|
||||
with open(file_name, 'rb') as file_handle:
|
||||
buffer = ''
|
||||
reader = zstandard.ZstdDecompressor(max_window_size=2**31).stream_reader(file_handle)
|
||||
while True:
|
||||
chunk = reader.read(2**27).decode()
|
||||
if not chunk:
|
||||
break
|
||||
lines = (buffer + chunk).split("\n")
|
||||
|
||||
for line in lines[:-1]:
|
||||
yield json.loads(line)
|
||||
|
||||
buffer = lines[-1]
|
||||
reader.close()
|
||||
|
||||
|
||||
def read_obj_zst_meta(file_name):
|
||||
with open(file_name, 'rb') as file_handle:
|
||||
buffer = ''
|
||||
reader = zstandard.ZstdDecompressor(max_window_size=2**31).stream_reader(file_handle)
|
||||
while True:
|
||||
chunk = reader.read(2**27).decode()
|
||||
if not chunk:
|
||||
break
|
||||
lines = (buffer + chunk).split("\n")
|
||||
|
||||
for line in lines[:-1]:
|
||||
try:
|
||||
json_object = json.loads(line)
|
||||
except (KeyError, json.JSONDecodeError) as err:
|
||||
continue
|
||||
yield json_object, line, file_handle.tell()
|
||||
|
||||
buffer = lines[-1]
|
||||
reader.close()
|
||||
|
||||
|
||||
class OutputZst:
|
||||
def __init__(self, file_name):
|
||||
output_file = open(file_name, 'wb')
|
||||
self.writer = zstandard.ZstdCompressor().stream_writer(output_file)
|
||||
|
||||
def write(self, line):
|
||||
encoded_line = line.encode('utf-8')
|
||||
self.writer.write(encoded_line)
|
||||
|
||||
def close(self):
|
||||
self.writer.close()
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_value, exc_traceback):
|
||||
self.close()
|
||||
return True
|
Loading…
Add table
Add a link
Reference in a new issue