lots of stuff, python names, xquery to html, xml regex clean-ish

This commit is contained in:
HadleighJae 2023-04-24 14:37:38 -04:00
parent 8838b667df
commit 780f7e4c00
342 changed files with 246606 additions and 843 deletions

22
pythonCode/stuff.py Normal file
View file

@ -0,0 +1,22 @@
import spacy
from collections import Counter
import re as regex
import os
from saxonche import PySaxonProcessor
nlp = spacy.load("en_core_web_lg")
def entitycollector(tokens):
# creates a new file that includes all of the found entities.
with open('conspPERSONentityCollector.txt', 'w') as f:
entities = {}
# goes through each entity in the token list.
for ent in sorted(tokens.ents):
entityInfo = [ent.text, ent.label_]
stringify = str(entityInfo)
f.write(stringify)
f.write('\n')
entities[ent.text] = ent.label_
# return all entities with its label and text.
return entities