mirror of
https://github.com/nhammer514/textfiles-politics.git
synced 2024-12-25 15:29:25 -05:00
23 lines
688 B
Python
23 lines
688 B
Python
|
import spacy
|
||
|
from collections import Counter
|
||
|
import re as regex
|
||
|
import os
|
||
|
from saxonche import PySaxonProcessor
|
||
|
|
||
|
nlp = spacy.load("en_core_web_lg")
|
||
|
|
||
|
def entitycollector(tokens):
|
||
|
# creates a new file that includes all of the found entities.
|
||
|
with open('conspPERSONentityCollector.txt', 'w') as f:
|
||
|
entities = {}
|
||
|
# goes through each entity in the token list.
|
||
|
for ent in sorted(tokens.ents):
|
||
|
entityInfo = [ent.text, ent.label_]
|
||
|
stringify = str(entityInfo)
|
||
|
f.write(stringify)
|
||
|
f.write('\n')
|
||
|
entities[ent.text] = ent.label_
|
||
|
# return all entities with its label and text.
|
||
|
return entities
|
||
|
|