cleaning files

This commit is contained in:
Nate Hammer 2023-04-30 18:35:36 -04:00
parent 50314b5f26
commit 9acc589f06
679 changed files with 7970 additions and 7452 deletions

View file

@ -0,0 +1,22 @@
import spacy
from collections import Counter
import re as regex
import os
from saxonche import PySaxonProcessor
nlp = spacy.load("en_core_web_lg")
def entitycollector(tokens):
# creates a new file that includes all of the found entities.
with open('conspPERSONentityCollector.txt', 'w') as f:
entities = {}
# goes through each entity in the token list.
for ent in sorted(tokens.ents):
entityInfo = [ent.text, ent.label_]
stringify = str(entityInfo)
f.write(stringify)
f.write('\n')
entities[ent.text] = ent.label_
# return all entities with its label and text.
return entities