mirror of
https://github.com/nhammer514/textfiles-politics.git
synced 2025-06-08 23:02:41 -04:00
lots of stuff, python names, xquery to html, xml regex clean-ish
This commit is contained in:
parent
8838b667df
commit
780f7e4c00
342 changed files with 246606 additions and 843 deletions
22
pythonCode/stuff.py
Normal file
22
pythonCode/stuff.py
Normal file
|
@ -0,0 +1,22 @@
|
|||
import spacy
|
||||
from collections import Counter
|
||||
import re as regex
|
||||
import os
|
||||
from saxonche import PySaxonProcessor
|
||||
|
||||
nlp = spacy.load("en_core_web_lg")
|
||||
|
||||
def entitycollector(tokens):
|
||||
# creates a new file that includes all of the found entities.
|
||||
with open('conspPERSONentityCollector.txt', 'w') as f:
|
||||
entities = {}
|
||||
# goes through each entity in the token list.
|
||||
for ent in sorted(tokens.ents):
|
||||
entityInfo = [ent.text, ent.label_]
|
||||
stringify = str(entityInfo)
|
||||
f.write(stringify)
|
||||
f.write('\n')
|
||||
entities[ent.text] = ent.label_
|
||||
# return all entities with its label and text.
|
||||
return entities
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue