textfiles-politics/pythonCode/scrap-files/stuff.py

23 lines
688 B
Python
Raw Normal View History

import spacy
from collections import Counter
import re as regex
import os
from saxonche import PySaxonProcessor
nlp = spacy.load("en_core_web_lg")
def entitycollector(tokens):
# creates a new file that includes all of the found entities.
with open('conspPERSONentityCollector.txt', 'w') as f:
entities = {}
# goes through each entity in the token list.
for ent in sorted(tokens.ents):
entityInfo = [ent.text, ent.label_]
stringify = str(entityInfo)
f.write(stringify)
f.write('\n')
entities[ent.text] = ent.label_
# return all entities with its label and text.
return entities