2023-03-27 17:14:07 -04:00
|
|
|
import spacy
|
2023-04-30 18:35:36 -04:00
|
|
|
from spacy.pipeline import EntityRuler
|
2023-03-27 17:14:07 -04:00
|
|
|
from collections import Counter
|
2023-03-27 18:22:30 -04:00
|
|
|
import re as regex
|
2023-03-27 17:14:07 -04:00
|
|
|
import os
|
2023-03-31 13:41:41 -04:00
|
|
|
from saxonche import PySaxonProcessor
|
|
|
|
|
2023-03-29 14:22:05 -04:00
|
|
|
|
|
|
|
#### Loads all of the necessary variables and functions.
|
2023-04-30 18:35:36 -04:00
|
|
|
#nlp = spacy.cli.download("en_core_web_lg")
|
2023-03-29 14:22:05 -04:00
|
|
|
nlp = spacy.load("en_core_web_lg")
|
2023-04-30 18:35:36 -04:00
|
|
|
#########################################################################################
|
|
|
|
# ebb: After reading the NLP output, we know spaCy is making some mistakes.
|
|
|
|
# So, here let's try adding an EntityRuler to customize spaCy's classification. We need
|
|
|
|
# to configure this BEFORE we send the tokens off to nlp() for processing.
|
|
|
|
##########################################################################################
|
|
|
|
# Create the EntityRuler and set it so the ner comes after, so OUR rules take precedence
|
|
|
|
# Sources:
|
|
|
|
# W. J. B. Mattingly: https://ner.pythonhumanities.com/02_01_spaCy_Entity_Ruler.html
|
|
|
|
# spaCy documentation on NER Entity Ruler: https://spacy.io/usage/rule-based-matching#entityruler
|
|
|
|
config = {"spans_key": None, "annotate_ents": True, "overwrite": True, "validate": True}
|
|
|
|
ruler = nlp.add_pipe("span_ruler", before="ner", config=config)
|
|
|
|
# 2023-04-07: ebb: NOTE: before="ner" setting seems to allow the spaCy NER rules to prevail over these patterns where
|
|
|
|
# there is a conflict.
|
|
|
|
# after="ner" means that the spaCy NER is TOTALLY OVERWRITTEN and invalidated by our patterns.
|
|
|
|
|
|
|
|
# Notes: Mattingly has this: ruler = nlp.add_pipe("entity_ruler", after="ner", config={"validate": True})
|
|
|
|
# But this only works when spaCy doesn't recognize a word / phrase as a named entity of any kind.
|
|
|
|
# If it recognizes a named entity but tags it wrong, we correct it with the span_ruler, not the entity_ruler
|
|
|
|
patterns = [
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "^-\w+?"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "^.$"}}]},
|
|
|
|
# ebb: Don't match on any single characters!
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "\^+"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "^\w\w$"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "^[a-z]+ [a-z]+$"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "^.*?__{2,}.*?$"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "CHRISTIAN(ITY|DOM)?"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "CHRISTIAN NETWORK"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Catholic(ism)?"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Zion(is[tm])?"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Juda(ism)?"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Jew(s|ish)?"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Vatican"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "(IBM|CIA|FBI)"}}]},
|
|
|
|
# ebb: Pick up IBM, FBI, etc
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "(Bush|Reagan|Kennedy|Nixon|Ford|Carter|Eisenhower) Administration"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "[A-Z]{2,}[A-Z][a-z]+"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "[a-z]{2,}[A-Z][a-z]+"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "^.*?[a-z][A-Z].*?$"}}]},
|
|
|
|
# ebb: Above line attempts to stop matching things like Oak IslandThe Method
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT" : {"REGEX": "^[Mm\-]+$"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "\w+cia\w+"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "fed\w+"}}]},
|
|
|
|
# SOCIALISMBY RICHARD
|
|
|
|
# ebb: Above line attempts to stop matching things Mmm-mm or mm , etc.
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "TITIOUS CONCEPTS"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "TORTIOUS"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "THROUGHWAYS"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "TARGET"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "NULL"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Sympathisers"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Sympathy"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "UNTIL"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "UNCLE"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "VCI"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "VISUALIZATION"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "YOGA"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "ack Canfield"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "aurem concepisti"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Xref"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Wspomnienia"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Untrustworthy"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Twelve Apostles"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Tsar"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Treasurer"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Thunderer"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Thou raisest"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Thou cleansest"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Thou castest"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "This Leopold the Traitor"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Swore"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Swastika"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Superstrings"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Subj"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Subterrene"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Sts"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Strength"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Sol Deus Invictus"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Skis"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Sect"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Sen"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Senza Aristotele noi"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Salubrious Living"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Saint vouches"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Safehaven"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "SURPRISED"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "SURE"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "SUCCESSIVE"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "SUBSEQUENT"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "STUNTS"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "STREAMLINED"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "SPECULATION"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "SPECULA"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "SOVEREIGN"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "SHOCKED"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "SEPT"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "SAVIOR"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Roman Emperor"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Revelation"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Resurrection Bone"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Relearning"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Reason"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Read again Knocking the Key Weapon"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Rea"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "RESOLVE"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "RESALE"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "REBELS"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "RATIFICATION"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Qui"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Priest"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Price"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Popes"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Place"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Philosophizing"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Pharmacopaeia"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Pharaoh"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Pauline Privilege"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Paul the Persecutor"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Page"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Pagan Fathers"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "PWs"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "PROPAGANDA"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "PRIVILEGES"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "PRIOR RESTRAINT"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "PRINCE satropic"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "PRESERVATION"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "POPE"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Obersturmbannfuehrer"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Obersturmbannfuehrer Brandt"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Obersturmbannfuehrer Eichmann"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Obersturmbannfuehrer Liebehenschel"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Obersturmbannfuehrer Rauff"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Obersturmbannfuehrer Rodl"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Obersturmbannfuehrer Sievers"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Obersturmbannfuehrer Walter Rauff"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "OPIUM CULTIVATION"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Nutshell"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Numberless"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Num"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Northwest"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Northshore"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Northeast"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Ngo Dinh Diem"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Ngo Din Diem"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Nay"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "NAZI CARTEL"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "NATURE"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Msgr"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Mother Goose"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Ministerially"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Martial Law Threat"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Marquis Who"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Mapmakers"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Mao"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "MIllbourn"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "MEDLEY"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "MAR"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Lop"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "LYINGLY FOUNDED"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Law"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "LAW"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Keys"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "KOD"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Juifs"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Judo"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Joseph the Carpenter"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Jerome arraigns"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Jack Built"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "JUDAICA"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "JURIS"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "JOINT"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "JHVH"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "JESUS THE"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "JESUS DIED"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Izvestiya VUZ"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Iron Shins"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Iren"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Infotechnology"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Inerrancy"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Imprimatur"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Imposture"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Implore"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Immortale Dei"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Illuminatus Lanz"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Illiterates Illiterates"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "INTELLECTUALS"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "IMMOVABLE"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "IMMIGRANT AZIZ Rehman"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "ILLUNINATI USE MAFIAS"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Hun"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Hoo boy"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Holy Virginity"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Holy Vehm"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Hoc Signo"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "His Covenant"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Henry the Navigator"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Heathens don"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "auptsturmfuehrer Truehe"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Hauptscharfuehrer Ruebe"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Hamstring Yugo"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "HYPERSPACE"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "HUMANE"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "HEED"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "HEARINGS"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Guillotine"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Grupenfuehrer Globocnik"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Gruppenfuehrer Katzmann"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Groom"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Gregory the Great"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Grammarians"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Grammar"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Gospel"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Gort"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Goebbelesque"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Godhead"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Girkalinei Comm"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Geschichte der Staatstheorien"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Genus Shamanensis"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Genius"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Gene Pool"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Gemeter"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Gaude Virgo"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Gamel Abdul Nasser"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Gameplayers"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Galicie Ukrainienne"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Gal"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "GUNJAMMER"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "GUILTY"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "GREED"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "GRAMS"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "GOVERNORS"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "GENIUS"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "GAER"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Furness"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Free Inq"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Foxx"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Foo Fighters"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Fig"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Figs"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Feverfew"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Feathers"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Fathers"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "FUNC"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "FOUO"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "FILES KIT"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "FEVERFEW"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "FHK"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Exodus"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Eur"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Epistle clxvi"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Epistle"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Enfranchised"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Endothermic"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Encyc"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Encloged"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Emperor"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Emperor Vespasian"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Electromagnetically"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Ecclesiastici Publici"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Eccles"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Ebrietatis Enconium"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "EXTRAORDINAIRE"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "EXPATRIATION"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "EPISTLES"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "ENORMOUS"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "EMPLOY"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Dupes"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Dueling Spymasters"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Dual Amiga"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Driver"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Dritte Reich"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Drang nach Osten"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Dorman"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Don sez"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Divinatione Daemonorum"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Distinguished paedagogists"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Dies irae"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Die Geschichte von Joel Brand"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Dew"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Deut"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Desert Rat"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Deregulatory Creep"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Der Weg des Politischen Katholizismus"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Depository"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Debasement"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Death Ray"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Das Sogenannte Boese"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "CORRUPT"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "COMMISSIONED THE UNDERSIGNED"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "CLEARFIELD"}}]},
|
|
|
|
{"label": "WORK_OF_ART", "pattern": [{"TEXT": {"REGEX": "Pagan Sibylline Oracles"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "CIRCUMSTANTIAL"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "CDT"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "CANNOT"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "BATF"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "BEEN"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "BLACKMAIL"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "BOURGEOIS GENTILHOMME"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Bagdad"}}]},
|
|
|
|
{"label": "LAW", "pattern": [{"TEXT": {"REGEX": "Balfour Declaration"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Battle"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Biblical"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Bishop"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Blade Runner"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Blackmail"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Blessing"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Briefly"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Brigadefuehrer"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Brigadefuehrer Globocnik"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Brigadefuehrer Gluecks"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Brigadefuehrer Zenner"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Brigadefuhrer Richard Glucks"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Brigadefuehrer"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Brigadefuhrer"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "CREATORS"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Cap"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Capitula Angilrammi"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Case"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Charles Mentesana Filmed"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Chillingworth"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Christian Pope"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Chronicles xxviii"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Coddled Omar Abdel Rahman"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Chronicles xxxiv"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Clem"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Commrce"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Corinthians"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Cosi"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Crucifixion"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Czar"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "DECRETALS"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "DECAY"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "DEPLETION"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "DISASTROUS THAN PEARL"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "DISEASES"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "DOPE"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Damit Ich"}}]},
|
|
|
|
{"label": "WORK_OF_ART", "pattern": [{"TEXT": {"REGEX": "Pagan Oracles"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Deceits"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Deep Cover"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Demagnetize"}}]},
|
|
|
|
{"label": "WORK_OF_ART", "pattern": [{"TEXT": {"REGEX": "Mona Lisa"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Demigod"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Demjanjuk"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Democratic Centralism"}}]},
|
|
|
|
{"label": "WORK_OF_ART", "pattern": [{"TEXT": {"REGEX": "Mein Kampf"}}]},
|
|
|
|
{"label": "WORK_OF_ART", "pattern": [{"TEXT": {"REGEX": "Mary Magdalin Gospel"}}]},
|
|
|
|
{"label": "WORK_OF_ART", "pattern": [{"TEXT": {"REGEX": "Horst Wessel Song"}}]},
|
|
|
|
{"label": "WORK_OF_ART", "pattern": [{"TEXT": {"REGEX": "Fama Fraternitatis"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Physiocrats"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Pagan"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Meo"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "PAGAN"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Obersturmfuehrer"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "MARTYRS"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Jurisconsults"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Jewess"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Jeremiads"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Jehovah's Witnesses"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Jehovah's Witness"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Jehovahs Witness"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "JURORS"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Heathens"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Gypsy"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Geronimo Chiricahua Apache"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Gauleiter"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Frankists"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Framers"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Framer"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Felloweraft"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Evangelist"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Egotists"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Duce"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Babylon(ia)?"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Diluvian Patriarchs"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Christiani ty"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "CAIB"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "the .+?"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Infidels"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Christiani"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "MAHAT"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Mahat"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Atta Boy"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Assailants"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Assassinated Robert"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Article"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Adrift"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "MAD"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "bacchanalian"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "behests"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Parallel"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": ""}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "the user"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "NETWORK"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Attemps"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Foundation"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Iena"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "attemps"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "pgh"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Christiani"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "MAHAT"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Mahat"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "MAD"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Parallel"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "the user"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "NETWORK"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Attemps"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Iena"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "attemps"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Advanced"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Believability"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Onesuch"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "the People"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "REPRINT"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "The Next Banking Crisis"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Incidently"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Martini Glass"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Customs"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "the Sheriff"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Greets"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "bey"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "conven"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Pro"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "THUS"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "TUNED CIRCUT"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "the supreme end"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Ebe"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Big Government"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "State"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "pseudoscientists"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "undam"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "News"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Times"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "PERIODICALS"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "SPEECHES"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "INCLUSION"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "MAJESTY"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "interpet"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "manifesta"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "aspetic"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "mish"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "petros"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "shevirat"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "sanguinis"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "resop"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "tryed"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "quandry"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "imbeds"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "germain"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "orleans"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "tgg"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Families"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "preparingits"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "germain"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "wintry"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "trogan"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Interested"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Time"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "ACTUALLY"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "ACCIDENTALLY"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "The Transmission"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "contra"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Mental Health"}}]},
|
|
|
|
{"label": "LAW", "pattern": [{"TEXT": {"REGEX": "Bill of Rights"}}]},
|
|
|
|
{"label": "LAW", "pattern": [{"TEXT": {"REGEX": "MCCARREN ACT"}}]},
|
|
|
|
{"label": "LAW", "pattern": [{"TEXT": {"REGEX": "Emergency Detention Act"}}]},
|
|
|
|
{"label": "LAW", "pattern": [{"TEXT": {"REGEX": "Geneva Convention"}}]},
|
|
|
|
{"label": "LAW", "pattern": [{"TEXT": {"REGEX": "Official Secrets Act"}}]},
|
|
|
|
{"label": "LAW", "pattern": [{"TEXT": {"REGEX": "Executive Order"}}]},
|
|
|
|
{"label": "LAW", "pattern": [{"TEXT": {"REGEX": "American Press freedom"}}]},
|
|
|
|
{"label": "LAW", "pattern": [{"TEXT": {"REGEX": "State Constitution"}}]},
|
|
|
|
{"label": "LAW", "pattern": [{"TEXT": {"REGEX": "Constitution"}}]},
|
|
|
|
{"label": "LAW", "pattern": [{"TEXT": {"REGEX": "Martial Law"}}]},
|
|
|
|
{"label": "LAW", "pattern": [{"TEXT": {"REGEX": "Martial Rule"}}]},
|
|
|
|
{"label": "LAW", "pattern": [{"TEXT": {"REGEX": "Unalienable Rights"}}]},
|
|
|
|
{"label": "LAW", "pattern": [{"TEXT": {"REGEX": "Alaska Mental Health Bill"}}]},
|
|
|
|
{"label": "LAW", "pattern": [{"TEXT": {"REGEX": "Multilateral Protection of War Victims/Prisoners of War"}}]},
|
|
|
|
{"label": "LAW", "pattern": [{"TEXT": {"REGEX": "Multilateral Protection of War Victims/Civilian Persons"}}]},
|
|
|
|
{"label": "LAW", "pattern": [{"TEXT": {"REGEX": "Public Health Service Draft Act"}}]},
|
|
|
|
{"label": "LAW", "pattern": [{"TEXT": {"REGEX": "Uniform Mental Health Act"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Executive Order #[0-9]+"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Median"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Next"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Daily"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "the General"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Diplomatic"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "televi"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "patsy"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "mike"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "MODIFY"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Nations"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "substanti"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "audi"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Justice"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Cancer"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Natural causes"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "PLATO"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "darksuckers"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Darksuckers"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "MAJESTY"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "SNOWBIRD"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "GARNET"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "AGL"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "DELTA"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "the King"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "EXCALIBUR"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "the secret control group"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "JASON"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Heart"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "sentatives"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Peter Gregory Original"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Billy Lovelady Depository"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "The World"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "John Holbrook Psychiatrist"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Paul Mandal Life"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Jackson Life Magazine"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Drug"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "thou mayest"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "thou eatest"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Unity movement"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Conn"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "intel"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "xxx xxxxx"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Xxxxxx Xxxxxxxx"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Xxxxxxx Xxxxxxx"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Wie hoch"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "WOULDN"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Waar"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "WCH"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "WCR"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "yeldeth ben"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "zwite Aufseher"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "wuz"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "beeb"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "duetschen Juden"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "bison"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "enim pejor"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "enshroud"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "entorce"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "epidemica"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "blasphemously"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "bull dudu"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "bowie asst"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "canonizes"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "clarinews"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "columno"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "complicitly"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "conspiracy buff"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "creo"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "decrepid"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Motor Collision"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "pro tem"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Electrotherapeutic"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "megs"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "JRD"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "The Wealth"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "state"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Trade"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Atomic"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Expo"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "looney"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "the Kingdom"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )??Office of Strategic Services"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Life Magazine"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Seattle Post Intellegencer"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "Industrias Cardoen"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "di"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "VENTURE"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "ALLEGED REAGAN"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Constitu"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "econonic aid"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Edgar Hoover"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "fed"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Apollo Astronauts"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "the Temple"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "CONNECTED WACKENHUT CORP"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "GREAT"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Lung"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "the Bill"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Said"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "CAPACITOR"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "SPARK"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "DAMPED"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "ANEW"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "THEREBY REDUCING EFFICIENCY"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "HOWEVER"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "TAPPED"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "ELECTRICALLY"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "ELECTRODE"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "TUNED CIRCUT"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "WAVELENGTH"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "HORIZONTALLY"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "THE OUDIN PRIMARY"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "AUTOTRANSFORMER"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "the Allied"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "the National"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Cheez Whiz"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "the Rich Discover Worthy"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Examiner"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "msen"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "ORG"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Physics A\. Mathematical"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "PALE"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Operation"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Order"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Command"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "jackboots"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "DARK"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "COLD"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Long Range"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "\S+@\S+\.\S+"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Human Behavior"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "SINCE LAWYERS OCCUPY"}}]},
|
|
|
|
{"label": "WORK_OF_ART", "pattern": [{"TEXT": {"REGEX": "Digha Nikaya"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "(Ludwig [Vv]an )?Beethoven"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Falangist"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Future of Freedom Foundation"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Congressional committee"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?The Federal Bureau of Prisons"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?American Red Cross"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Annals of Internal Medicine"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Houston Post"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "MKULTRA"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "KTOK"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Houston Chronicle"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Concentration Camp Program"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Operation Garden Plot"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?federal government"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?British intelligence"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?JASON SOCIETY"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Composite Service Organization"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Psychological Operations Organization"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Council on Foreign Relations"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Dept\. of Defense"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Mental Health Institution"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Dept\. of Transportation"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Dept\. of Justice"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?L\.E\.A\.F\."}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "Junio Valerio Black Prince Borghese"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Las Vegas Bullet"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?C\.I\.A\."}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?J\.C\. Penney"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?[Aa]-albionic Research"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Law Enforcement Assistance Force"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Young Americans for Freedom"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Military Police Unit"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Annals"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Inslaw"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "WACL"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "JDL"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "SJG"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "LEAA"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "Newe Anthropologie"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "NSTAC"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "NEWSBYTES"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "NDPC"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "National Democratic Policy Committee"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "National Security Telecommunications Advisory Committee"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "Tom Davis Research"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Civil Affairs Operations"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Civil Affairs Organization"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Big Brother"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Big brother"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?State Youthful Offenders Division"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?The California State Bar's Standing Committee on Professional Responsibility and Conduct"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?ILLUMINATI"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Crystic Institute"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Crystic institute"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?New World Order"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Roundtables"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Club of Rome"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Tri-lateral Commission"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Committee of 300"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Alaska Bar Association"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Rose Law Firm"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?University of Wisconsin"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?University of Southern California"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?ROTC"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Plunge"}}]},
|
|
|
|
{"label": "EVENT", "pattern": [{"TEXT": {"REGEX": "Fifth International Martin Luther King Tribunal"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Bayshore Inn"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Los Angeles Sheriff's Dept\."}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?U\.N\. Security Council"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?U\.N\."}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Daily Telegraph"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Alternative 3"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Library of Congress"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?International Congress on Mental health"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?National Academy of Sciences"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?U\. S . Secret Service"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Office of Thrift Supervision"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Comptroller of the Currency"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Urban Plunge"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Institute of Atmospheric Physics"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?State Department"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?National Libertarian Party"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Libertarian Party"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Bank of England"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Dept\. of State"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?United Grand Lodge"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Board of General Purposes"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?National Recon Organization"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?G\.S\.A\."}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?United States Military Establishment"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?State Dept\."}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Sapata Oil"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?California National Guard"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?H\.U\.D\."}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Royal Bank of Canada"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Bank of Montreal"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Rockefeller Bank"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?American Express"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Loeb Company"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?H\.E\.W\."}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?U\.S\. Senate"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Institute of Political Studies"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?law enforcement"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Housing & Urban Development"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Dept\. of Education"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Dropa"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Fria Ord"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Haitian"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Astronauts"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "ISHVARA"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Wehrwirtschaftsfuehrer"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Viet Cong"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Egyptian"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Lybian"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Cuban"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Japanese"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Nicaraguan"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "African"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Indian"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Icelandic"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Russian"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "CANADIAN"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Clandestinism"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "SCIENTOLOGISTS"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Trilateralists"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Homo Sapiens"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Spaniard"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Freemason"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Freemson"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Freemasonry"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Masonic"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "Vietnam Moratorium"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "Homestead Air Force Base"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "Moon"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "Swartz Creek"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Marshall Fields"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Gulf of Mexico"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "DREAMLAND"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "Pharselis"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "Northlake"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "Oak Cliff"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "Paddington station"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "David Munson Air Base"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "Southeast Asia"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "Eielson Air Force Base"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "U\.S\. Army Reserves"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "Bay of Pigs"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "US Air Force L\. Fletcher Prouty"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "Bohemian Grove"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "Broadway"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "Mockingbird Lane"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "Mockingbird"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "LAOS"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "Hyannisport"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "Lop Nor"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "Gross Wannsee"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "Groom Lake"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "North [A-Z][a-z]+"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Monk"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Northern"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Allanwood"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Roswell"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Westminster"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Portland"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Richmond"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Sacramento"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "St Louis"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "New Haven"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Milwaukee"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Little Rock"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Los Angeles"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Briton"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "El Paso"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Columbia"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "FORT HUNT"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Butte"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "State College"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Williamsport"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Landsdale"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Newtown Square"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Allentown"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "New Castle"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Beckley"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Alton"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Stubenville"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Buffalo"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Belleville"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Bloomington"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Carbondale"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Champaign"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Constitantinoble"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Rock Island"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Birmingham"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "IRAQ"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "CANADA"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "ENGLAND"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Danvers"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "GERMANY"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Montreal"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Canada"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Baltimore"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Anchorage"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Albuquerque"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Albany"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "CORAL GABLES"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Washington,? D\.?C\.?"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Avon Park"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Mill Point"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "El Reno"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Zagreb"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "ZAGREB"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "Whiskey Flat"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "site S-4"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "LUNA"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "AREA 51"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "El Salvador air base"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "Greenville County"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "Toyland"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Sunday"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "Sun"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "North Pole"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "Qua Vieaf"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "North Viet [Nn]am"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "Owl's Nest"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "Dachau"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "Hill Billies"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "Cave Man's"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "Cherry Brook Valley"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "Postbridge"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "McDill Air Force Base"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "Egland Air Force Base"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Nazareth"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Latin America"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "GREAT BRITAIN"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "U\.S\.S\.R"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "New York City"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Soviet Union"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Viet Nam"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "U\.S\.A\."}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "U\.S\."}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "U\.S"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Saudi Arabia"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Washington, DC"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "the Vatican"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?British Empire"}}]},
|
|
|
|
{"label": "GPE", "pattern": [{"TEXT": {"REGEX": "Pennsylvania"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Warren Commission"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Tesla Electric Company"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Hahn group"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?U\.S\. Department of Health and Human Services"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?American Council of Life Insurance"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Health Insurance Association of America\."}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?U\.S\. District Court"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?United States Pacific Fleet"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Pacific Fleet"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?National Guard"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Christic Institute"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Nugan-Hand"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Military-Industrial complex"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Condon Committee"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "COMMUNICATIONS CANADA"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?ABC"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?CBS"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Unity movement"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Covert Action"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?General Electric"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?US Army"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?US Navy"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?London Underground"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?US Justice Department"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Sur Coester"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Colorado Springs Gazette"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?SoftQuad Inc\."}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?United Nations World Health Organization"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?World Health Organization"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?L\.E\.A\.A\."}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Justice Dept\."}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "(THE )?HOUSE JUDICIARY COMMITTEE"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Contemporary Research, Inc\."}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?British Government"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?House Select Committee on Assassinations"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Office of Criminal Justice Planning"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Dept\. of Defense of the United States"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Dept\. of Commerce"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?MAJIC"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "Mother Jones"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?W\.A\. Harriman & Co\."}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Securities C\. Commission"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Rockefeller Syndicate"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Dallas Citizens Council"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Round Table groups"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Devon Unidentified Flying Objects centre"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?San Francisco Sidewalk Astronomers Association"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Dartmoor Livestock Protection Society"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Animal Defence Society"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?U\.S\. Agriculture Department"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Perpetual Hidden Government"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Office of the Deputy Chief of Staff of Personnel"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Harriman Fifteen Corp\."}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Israeli secret service"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Bell Helicopter"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Eurocheque"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?MasterCard"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?CitiCorp"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Marconi company"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Marconi"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?NRO"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Illinois Bell Telephone Company"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?UFO Magazine"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?University of Toronto"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Taymar, Inc\."}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Kerr McGree Nuclear Corporation"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Edward R\. Murrow Center"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?American Ship and Commerce Corp\."}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?American government"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Silverado Savings & Loan"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?American Association for the Advancement of Science"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?G\.H\. Walker & Co\."}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Office of Preparedness"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?National Aeronautics and Space Agency"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Martin Marietta company"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?John Birch Society"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?U\.S\. government"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Cleveland Field Office"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "The John F\. Kennedy Center for the Performing Arts Education Program"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?United States government"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?National Institute of Agrarian Reform"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?National Institute of Mental Health"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?INRA"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Massachusetts Institute of Technology"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?International UFO Bureau"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?National Investigations Committee on Aerial Phenomena"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Dept\. of the Army"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?U\.S\. Air Force"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?University of Arizona"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Criminal Justice System"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Sheriffs Dept\."}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "Standard & Poors"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Federal Reserve"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?L\.E\.A\.A\. Newsletter"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Texas School Book Depository"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Senate Select Committee on Intelligence"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Senate Intelligence Committee"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?House Committee on Assassinations"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?House Committee"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?House Select Committee"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?House of Saud"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "Wackenhut"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "KPFA"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "WACKENHUT CORP"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?International Agency for Research on Cancer"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?The London Times"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "WACKENHUT"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Federal Emergency Management Agency"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Special Forces Unit"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Defense Central Index"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Joint Chiefs of Staff"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Federal Information Center"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?American Counter-intelligence Corps"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "CIC"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?British Establishment\."}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "[A-Z][a-z]+ ([A-Z]\. [A-Z][a-z]+)?(, Jr\.)"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "[A-Z][a-z]+ ([A-Z]\. [A-Z][a-z]+)?( Jr\.)"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "[A-Z][a-z]+ (([A-Z]\. )?[A-Z][a-z]+([A-Z][a-z]+)?)? Jr\."}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "[[A-Z][a-z]+ [A-Z]\. ([A-Z][a-z])?[A-Z][a-z]+"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Nostradameus"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Harrison Edward Livingstone"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "J\. Herbert Sawyer"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "L\. Harmon Zeigler"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "C\. Everett Koop"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Edward L\. van Roden"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Kirschbaum"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Raphael Shumacker"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Martin Marietta"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "DAVID MELLOR"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "J\. W\. Willmott"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "O'Neill"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "O'Neil"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "O'Neal"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Orsenigo"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Neil Bush"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "SWAMI VIVEKANANDA"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Morris Ellowitz"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Harry Thon"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "David Ferrie"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Darrell W\. Garner"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "David Goldstein"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Mona B\. Saenz"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Abraham Zapruder"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Clayton Fowler"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "WILLIAM L\. LAW"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Reynolds"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Marriner S. Eccles"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "H\.S\.M\. Coxeter"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "A\.D\. Bowie"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Phillip Geraci"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "E\.R\. Walthers"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "George McGann"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Perry Russo"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "JOHN PHILIP NICHOLS"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "SALVADOR ALLENDE"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "D\. CASOLARO"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "AFFIDAVIT"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Koreshan"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Koresh"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Carl Oglesby"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Earline Roberts"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Lee Bowers, Jr\."}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Locust"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Starmaster"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Nuri Al-Said"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Pamela Courson"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Jimmy Hendrix"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Faisal II"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Milton Cooper"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Mostafa A Abdelkader"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Abdul Llah"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Marc J\. Seifer"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "R\. J\. Biggar"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Stanford White"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "John Paul I"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "John Paul II"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Ensign Ball"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "O\.H\. Cril"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Crill"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "KRLL"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "KRLLL"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "EBE"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Rockefeller III"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "John D\. Rockefeller III"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Dennis DeConcini"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Phil Gramm"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "BILL HAMILTON"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Gramm"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "DeConcini"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Michael Emerling"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Rockefeller"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Johnston"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Dodd"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Laurence Rockefeller"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "John D\. Rockefeller"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "John D\. Rockefeller IV"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "John D\. Rockefeller, Jr\."}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "William H\. Draper III"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Eduardo"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "EARL W\. BRIAN"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "RICONOSCIUTO"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Ramakrishna"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Don Ecker"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "John Galt"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "C\.D\. Jackson"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "G\.H\. Walker III"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Daniel Gooch"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Ramses II"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Alexander II"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Alexander V"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Alexander IV"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Alexander VI"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Breck Wall"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "CARROLL QUIGLEY"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Swami Vivekananda"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "DOOLITTLE"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "James I"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Oswald II"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Oswald I"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Umberto I"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Nikola Tesla"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Tesla"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "C\. Hamilton Ellis"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Alfred Ely Beach"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Ed Vyrdolak"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Beach"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Lord Milner"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Agapetus I"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Agapetus II"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "C\. Victor Raiser II"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "C\.V\. Raiser II"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Boniface VIII"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Boniface VII"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Boniface III"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Boniface II"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Boniface VI"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Gonda"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Ollie North"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Oliver North"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Mr\. North"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "E\. Howard Hunt"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "MLK"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "RFK"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "O\. W\. Judd"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "C\.J\. Duffner"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "J\.P\. Morgan"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Richard II"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "James McDonald"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "McDonald"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Saunders"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Richard III"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "George Terwilliger III"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "C\.E\. Koop"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Callistus I"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Francis von Hapsburg"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Malcom X"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Paramahansa Yogananda"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Victor IV"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Carles C\. Messick III"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "John D\. Rockefeller IV"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "MARCONI"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Marconi"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Bill English"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Bill Cooper"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Charles W\. Bailey II"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Charles I"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Charles McKee"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Charles X"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Charles V"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Jerome I"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Mark I"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "John XXIII"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Ferdinand I"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Hillary( Rodham)? Clinton"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "(President|Bill) Clinton"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Clinton"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "Bill and Hillary Clinton"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Vincent Foster, jr\."}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Nicholas I"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Nicholas II"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Nicholas V"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Vespasian"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "John Zajac"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Sylvester I"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Sylvester II"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Engenius IV"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Philip IV"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Stephen II"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Stephen VI"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Pious X"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Honorius II"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Sergius III"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Adrian I"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Adrian VI"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "William F\. Hamilton III"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "James Baker III"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "William Stamps Farish III"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Elizabeth II"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Anacletus II"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Edward II"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Edward III"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Edward V"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Matthew XVI"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Vyrdolak"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Thothmes III"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Harry Hurt III"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Napoleon III"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Clement VIII"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Clement I"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Clement VI"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Clement IV"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Pius X"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Pius II"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Pius IX"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Pius XI"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Pius XII"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Leo IX"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Leo X"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Leo III"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Leo XIII"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Amenhotep IV"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Charles W\. Bailey II"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Constantine"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Tut-ankh-amen"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Sixtus V"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Sixtus IV"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Benedict XIV"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Benedict VI"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Benedict IX"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Benedict XV"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "George I"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "George III"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Gregory VII"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Gregory I"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Gregory XVI"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Gregory IX"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Constantine II"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Albert I"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Albert V\. Bryan Jr"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Alfonso XII"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Alfonso XIII"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Gustavus III"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Gustav III"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Felix III"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Valentinian III"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Innocent III"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Innocent IV"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Frederick, II"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Frederick II"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Theodosius I"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Henry VIII"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Henry VI"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Henry IV"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Louis IX"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Louis XVI"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Joseph II"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Catherine II"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "James D Bryant II"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Paul III"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Paul II"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Julius II"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Paul VI"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "William II"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "William III"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Baldwin II"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Urban VIII"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Charles Goodhue, III"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Edwin Meese, III"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Thomas Downing"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Thomas E\. Davis"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "J\.A\. Milteer"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Charles Cabell"}}]},
|
|
|
|
{'label': 'PERSON', 'pattern': [{'TEXT': {'REGEX': 'James "Bo" Gritz'}}]},
|
|
|
|
{'label': 'PERSON', 'pattern': [{'TEXT': {'REGEX': 'Mike "Chucky" Peters'}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "James Earl Ray"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Earl Wheeler"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "William S\. Thompson"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "S\. Rilling"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Ralph Paul"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Scott Weekly"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Ford, L\.H"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "A\. Vilenkin"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Renate Viebahn"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Lustick, Ian S\."}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Friedman, Robert I\."}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Howard Sprague"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Mark Clark"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Owen"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Theodore Shackley"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "DeLesseps Morrison"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Betty McDonald"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Clines"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "L\. F\. Prouty"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "L\. Fletcher Prouty"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Knight, Amy W\."}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Martindale"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "R\. Knox"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Leon Oswald"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "H\. Ross Perot"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Stanly R\. Larsen"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "L\.E\. Allen Jr\."}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "A\. Lewis"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "D\. Fleming"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "J\. Edgar Hoover"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "V\. Pirie"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Nancy B\."}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "(Ronald )?Reagan"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "P\. Gibbs"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "W\. Citrine"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "E\. Moore"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Lyndon Johnson"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "John( F\.)? Kennedy"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Zia ul-Haque"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Ronald Payne"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "R\. Muldoon"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "F\. Orr"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Frank H\. Schwable"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "R\. F\. Doyle"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "David Munson"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Frances Clark"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "G\. Gordon Broadbent"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "J\. Segal"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "F\. Brangwyn"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Luc Montagnier"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "A\. Luchaire"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "L\. Segal"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "R\. William Davis"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "William C\. Sullivan"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "William W\. Quinn"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "William R\. Pabst"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Oliver Nichelson"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "James Worrell, Jr\."}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Hank Suydam"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "William Pabst"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "G\. Maeferren"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Dian-Lanz"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Buria"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "North"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Ortho III"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Swami Nikhilananda"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Santo Trafficante"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Felix Rodreguez"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Stanly F\. Yoles"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "G\. B\. Chisholm"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Baron Kurt von Schroeder"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "George Bush"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Roland Harriman"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "John Foster Dulles"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Sherman Skolnick"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Francis G\. Powers"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Kenneth O'Donnell"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "J\.M\. English"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "William Sullivan"}}]},
|
|
|
|
{'label': 'PERSON', 'pattern': [{'TEXT': {'REGEX': 'C\.L\. "Lummie" Lewis'}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Garland Slack"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Billy Lovelady"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "John Holbrook"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Marguerite Oswald"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Andrew Cetti"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Will H\. Griffin"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Peter Gregory"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Harry Grindell-Matthews"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Grindell-Matthews"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Rosenholtz"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Karen Silkwood"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "N\. McQuire"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "William Highland"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Harriman"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "William Saxby"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Ellis O\. Briggs"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Daniel Ludwig"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Robert C\. Klowers"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Tatum B\. Laird"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "William F\. Buckley, Jr\."}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "William Randolph Hearst, Jr\."}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Justin Dart"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Larry McDonald"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Mac MacPherson"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "MacPherson"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Callen"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Lievense"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "CASTRO"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Erik Jonsson"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Charles E\. Allen"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Laurence H\. Shoup"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "David Rieff"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "David Rockefeller"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Diamond"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Donald DeFreeze"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "McGovern"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "McCarthy"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "John DiNardo"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Gyeorgos C\. Hatonn"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "John Connally"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Lyndon LaRouche"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "LaRouche"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "W\.H\. Bowart"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Alan H\. Belmont"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Alan Belmont"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Joseph C\. Ayres"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Lehrman"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "John McCone"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Al Haig"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "J\. Latimer Clark"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "T\. W\. Rammell"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Elizabeth II"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Martha Honey"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Vince Bielski"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Anton Chaitkin"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Dean Burch"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Burch"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Gaius Caesar"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Gaius Chaerea"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "McCone"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Richard Nixon"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "David Wise"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Zapruder"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Mortimer J\. Adler"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Angela Davis"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Segals"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "COLIN WALLACE"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Thomas B\. Ross"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "JACOB G\. HORNBERGER"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Bowart"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Frank H\. Schwable"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Billy Goodman"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Leonard Pullin"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "R\. J\. Biggar"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "David R\. Hunter"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Hugh Everett, III"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Sigmund Diamond"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "J\. Pierpont Morgan"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Montagnier"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "W\. Schmunger"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Mary Pat Flaherty"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Vince Bielski"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Lee Harvey Oswald"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Schmunger"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Stranglove"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "James Calcutt"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Florentine Giovanni Boccaccio"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Prescott Bush"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Maxwell"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Daniel Schorr"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Allard K\. Lowenstein"}}]},
|
|
|
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Daniel Patrick Moynihan"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?National Institute of Health"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Department of Justice"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Yakuza"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "YAKUZA"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Citizens' Committee to Clean Up the Courts"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Bank of Criminals and Conspirators International"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?U\.S\. Department of Justice"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?U\.S\. Justice Department"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?FBI Field Office"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Bureau of Investigation"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Dallas Citizens Council"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?University of Maryland,"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Carnegie Corporation"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Katzenbach Committee"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Democratic Congress"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?National Endowment for Democracy"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Congress for Cultural Freedom"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Cambridge University Press"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Association of National Security Alumni"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Avon Books"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?American Psychiatric Association"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Ministry of Defence"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Ramparts Press"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Grove Press"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?National Association of Scholars"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Council on Foreign Relations"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Symbionese Liberation Army"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Washington Post"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Internal Revenue Service"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Terminate With Extreme Prejudice"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Center for Strategic and International Studies"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?School of Foreign Service"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?The Anglo-American Establishment"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Centre of Eternity"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Life magazine"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Full Disclosure Newspaper"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "North American Newspaper Alliance"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Trilateral Commision"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Ruling Elite"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?US Congress"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Church Committee"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?National Academy of Sciences"}}]},
|
|
|
|
{"label": "ORG", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Ramparts"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Bab"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Century"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "The Problem"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "Air Force One"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "Kingdom of God"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "Grassy Knoll"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "Frankfurt airport"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "Dealey Plaza"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "Central Africa"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "Fort Detrick"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "Area-51"}}]},
|
|
|
|
{"label": "LOC", "pattern": [{"TEXT": {"REGEX": "Clifton suspension bridge"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "mandkind"}}]},
|
|
|
|
{"label": "NULL", "pattern": [{"TEXT": {"REGEX": "Motor"}}]},
|
|
|
|
{"label": "MISC", "pattern": [{"TEXT": {"REGEX": "AIDS virus"}}]},
|
|
|
|
{"label": "MISC", "pattern": [{"TEXT": {"REGEX": "AIDS"}}]},
|
|
|
|
{"label": "MISC", "pattern": [{"TEXT": {"REGEX": "AIDS-1"}}]},
|
|
|
|
{"label": "MISC", "pattern": [{"TEXT": {"REGEX": "AIDS VIRUS"}}]},
|
|
|
|
{"label": "MISC", "pattern": [{"TEXT": {"REGEX": "Human Adult Leukemia virus"}}]},
|
|
|
|
{"label": "MISC", "pattern": [{"TEXT": {"REGEX": "LAV"}}]},
|
|
|
|
{"label": "MISC", "pattern": [{"TEXT": {"REGEX": "Malaria"}}]},
|
|
|
|
{"label": "MISC", "pattern": [{"TEXT": {"REGEX": "ARC"}}]},
|
|
|
|
{"label": "MISC", "pattern": [{"TEXT": {"REGEX": "HTLV-1"}}]},
|
|
|
|
{"label": "MISC", "pattern": [{"TEXT": {"REGEX": "HTLV-III"}}]},
|
|
|
|
{"label": "MISC", "pattern": [{"TEXT": {"REGEX": "HTLV"}}]},
|
|
|
|
{"label": "MISC", "pattern": [{"TEXT": {"REGEX": "HIV"}}]},
|
|
|
|
{"label": "MISC", "pattern": [{"TEXT": {"REGEX": "HIV virus"}}]},
|
|
|
|
{"label": "MISC", "pattern": [{"TEXT": {"REGEX": "Sheep Visna Virus"}}]},
|
|
|
|
{"label": "MISC", "pattern": [{"TEXT": {"REGEX": "Pneumocystis carinii pneumonia"}}]},
|
|
|
|
{"label": "MISC", "pattern": [{"TEXT": {"REGEX": "Kaposi's sarcoma"}}]},
|
|
|
|
{"label": "MISC", "pattern": [{"TEXT": {"REGEX": "Smallpox"}}]},
|
|
|
|
{"label": "MISC", "pattern": [{"TEXT": {"REGEX": "Polio"}}]},
|
|
|
|
{"label": "MISC", "pattern": [{"TEXT": {"REGEX": "Gunshot"}}]},
|
|
|
|
{"label": "MISC", "pattern": [{"TEXT": {"REGEX": "LSD"}}]},
|
|
|
|
{"label": "MISC", "pattern": [{"TEXT": {"REGEX": "Poison"}}]},
|
|
|
|
{"label": "MISC", "pattern": [{"TEXT": {"REGEX": "MHz"}}]},
|
|
|
|
{"label": "MISC", "pattern": [{"TEXT": {"REGEX": "TNT"}}]},
|
|
|
|
{"label": "MISC", "pattern": [{"TEXT": {"REGEX": "Acquired Immuno-Deficiency Syndrome"}}]},
|
|
|
|
{"label": "MISC", "pattern": [{"TEXT": {"REGEX": "Equal Opportunity Employer"}}]},
|
|
|
|
{"label": "MISC", "pattern": [{"TEXT": {"REGEX": "Visna"}}]},
|
|
|
|
{"label": "MISC", "pattern": [{"TEXT": {"REGEX": "visna"}}]},
|
|
|
|
{"label": "MISC", "pattern": [{"TEXT": {"REGEX": "visna virus"}}]},
|
|
|
|
{"label": "EVENT", "pattern": [{"TEXT": {"REGEX": "Korean War"}}]},
|
|
|
|
{"label": "EVENT", "pattern": [{"TEXT": {"REGEX": "National Jury Rights Day"}}]},
|
|
|
|
{"label": "EVENT", "pattern": [{"TEXT": {"REGEX": "Miranda Case"}}]},
|
|
|
|
{"label": "EVENT", "pattern": [{"TEXT": {"REGEX": "World War II"}}]},
|
|
|
|
{"label": "EVENT", "pattern": [{"TEXT": {"REGEX": "Second World War"}}]},
|
|
|
|
{"label": "EVENT", "pattern": [{"TEXT": {"REGEX": "KRISTALLNACHT"}}]},
|
|
|
|
{"label": "EVENT", "pattern": [{"TEXT": {"REGEX": "First World War"}}]},
|
|
|
|
{"label": "EVENT", "pattern": [{"TEXT": {"REGEX": "World War I"}}]},
|
|
|
|
{"label": "EVENT", "pattern": [{"TEXT": {"REGEX": "Parliament of Religions"}}]},
|
|
|
|
{"label": "EVENT", "pattern": [{"TEXT": {"REGEX": "JFK assassination"}}]},
|
|
|
|
{"label": "EVENT", "pattern": [{"TEXT": {"REGEX": "Apollo 14 mission"}}]},
|
|
|
|
{"label": "EVENT", "pattern": [{"TEXT": {"REGEX": "Apollo 11"}}]},
|
|
|
|
{"label": "EVENT", "pattern": [{"TEXT": {"REGEX": "([Tt]he )?Andrew Cetti Affair"}}]},
|
|
|
|
{"label": "EVENT", "pattern": [{"TEXT": {"REGEX": "Railway Mania"}}]},
|
|
|
|
{"label": "EVENT", "pattern": [{"TEXT": {"REGEX": "EVENT"}}]},
|
|
|
|
{"label": "EVENT", "pattern": [{"TEXT": {"REGEX": "CHRISTIAN ERA"}}]},
|
|
|
|
{"label": "EVENT", "pattern": [{"TEXT": {"REGEX": "ante-Niccne"}}]},
|
|
|
|
{"label": "EVENT", "pattern": [{"TEXT": {"REGEX": "Vietnam War"}}]},
|
|
|
|
{"label": "EVENT", "pattern": [{"TEXT": {"REGEX": "Project Red-light"}}]},
|
|
|
|
{"label": "EVENT", "pattern": [{"TEXT": {"REGEX": "Moon mission"}}]},
|
|
|
|
{"label": "EVENT", "pattern": [{"TEXT": {"REGEX": "UFO"}}]},
|
|
|
|
{"label": "EVENT", "pattern": [{"TEXT": {"REGEX": "Apollo 11"}}]},
|
|
|
|
{"label": "EVENT", "pattern": [{"TEXT": {"REGEX": "Johnson's war on poverty"}}]},
|
|
|
|
{"label": "MISC", "pattern": [{"TEXT": {"REGEX": "AIDS"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Chinesse"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Vietnamese"}}]},
|
|
|
|
{"label": "EVENT", "pattern": [{"TEXT": {"REGEX": "Identified Alien Crafts"}}]},
|
|
|
|
{"label": "EVENT", "pattern": [{"TEXT": {"REGEX": "IAC"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "multiculturalists"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Vietnamese"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Anglo-Saxon"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Patriots"}}]},
|
|
|
|
{"label": "NORP", "pattern": [{"TEXT": {"REGEX": "Patriot"}}]},
|
|
|
|
]
|
|
|
|
ruler.add_patterns(patterns)
|
|
|
|
|
2023-03-27 17:14:07 -04:00
|
|
|
workingDir = os.getcwd()
|
2023-04-30 18:35:36 -04:00
|
|
|
CollPath = os.path.join(workingDir, '../pre-src-xml')
|
|
|
|
outputPath = os.path.join(workingDir, '../src-xml')
|
2023-03-29 14:22:05 -04:00
|
|
|
# Everything in original conspiracy directory.
|
2023-03-27 17:14:07 -04:00
|
|
|
insideDir = os.listdir(CollPath)
|
|
|
|
print(insideDir)
|
|
|
|
|
2023-03-29 14:22:05 -04:00
|
|
|
# Copies files in case they do not exist
|
|
|
|
def copyTextFiles(file):
|
|
|
|
content = []
|
|
|
|
# Reads the contents of file, and saves each line of file into the content array.
|
|
|
|
with open(CollPath + "/" + file, 'r', encoding='utf8') as inFile:
|
|
|
|
for line in inFile:
|
|
|
|
content.append(line)
|
2023-03-31 13:41:41 -04:00
|
|
|
print(" ~~~~~~~~~~~~~~~~~~~~~~~~~~~ copying " + file + " ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ")
|
2023-03-29 14:22:05 -04:00
|
|
|
inFile.close()
|
|
|
|
# With the contents copied, a loop will go through the array and write it all in a new file in output folder.
|
|
|
|
with open(outputPath + "/" + file, 'w', encoding='utf8') as f:
|
|
|
|
for line in content:
|
|
|
|
f.write(str(line))
|
2023-03-27 17:14:07 -04:00
|
|
|
|
2023-03-29 14:22:05 -04:00
|
|
|
# Function runs through the tokens of given file. Entities are stored in array, then returned. Called by regexFile().
|
2023-03-27 17:14:07 -04:00
|
|
|
def entitycollector(tokens):
|
2023-03-31 15:01:49 -04:00
|
|
|
# creates a new file that includes all of the found entities.
|
2023-04-30 18:35:36 -04:00
|
|
|
with open('conspPERSON.txt', 'w') as f:
|
2023-03-31 13:41:41 -04:00
|
|
|
entities = {}
|
2023-03-31 15:01:49 -04:00
|
|
|
# goes through each entity in the token list.
|
2023-03-31 13:41:41 -04:00
|
|
|
for ent in sorted(tokens.ents):
|
2023-04-30 18:35:36 -04:00
|
|
|
entityInfo = [ent.text, ent.label_]
|
2023-03-31 13:41:41 -04:00
|
|
|
stringify = str(entityInfo)
|
|
|
|
f.write(stringify)
|
|
|
|
f.write('\n')
|
|
|
|
entities[ent.text] = ent.label_
|
2023-03-31 15:01:49 -04:00
|
|
|
# return all entities with its label and text.
|
2023-03-27 18:22:30 -04:00
|
|
|
return entities
|
2023-03-27 17:14:07 -04:00
|
|
|
|
2023-03-29 14:22:05 -04:00
|
|
|
# Function runs regex through given file.
|
|
|
|
def regexFile(file):
|
2023-03-31 13:41:41 -04:00
|
|
|
fileDir = os.path.join(outputPath, file)
|
|
|
|
with PySaxonProcessor(license=False) as proc:
|
2023-03-31 15:01:49 -04:00
|
|
|
# grabs the original xml file and stores it in a variable for later. this some xquery bs
|
2023-03-31 13:41:41 -04:00
|
|
|
xml = open(fileDir, encoding='utf-8').read()
|
|
|
|
xp = proc.new_xpath_processor()
|
|
|
|
node = proc.parse_xml(xml_text=xml)
|
|
|
|
xp.set_context(xdm_item=node)
|
2023-03-31 15:01:49 -04:00
|
|
|
|
|
|
|
# xquery goes through original text, and stores it all in a single string.
|
2023-03-31 13:41:41 -04:00
|
|
|
xpath = xp.evaluate('//p ! normalize-space() => string-join()')
|
2023-04-30 18:35:36 -04:00
|
|
|
string = str(xpath)
|
2023-03-31 15:01:49 -04:00
|
|
|
|
|
|
|
# regex goes through the text and deletes anything that is not a letter or space.
|
2023-04-30 18:35:36 -04:00
|
|
|
cleanedText = regex.sub(r'[^A-z ]+', ' ', string)
|
|
|
|
cleanedText = regex.sub(r'\n+', ' ', cleanedText)
|
2023-03-31 15:01:49 -04:00
|
|
|
|
|
|
|
# gets the tokens of the clean text.
|
2023-03-29 14:22:05 -04:00
|
|
|
tokens = nlp(cleanedText)
|
2023-03-31 15:01:49 -04:00
|
|
|
|
2023-03-31 13:41:41 -04:00
|
|
|
wrappedText = xml
|
2023-03-31 15:01:49 -04:00
|
|
|
# grabs all the entities in file and stores it in a list/array.
|
2023-04-30 18:35:36 -04:00
|
|
|
dictEntities = entitycollector(tokens)
|
2023-03-31 15:01:49 -04:00
|
|
|
# if anything exists in the list, the following code will run.
|
2023-04-30 18:35:36 -04:00
|
|
|
if dictEntities:
|
2023-03-31 15:01:49 -04:00
|
|
|
# it will check through each entity in the list and see its entity type. it is looking for "PERSON" tokens
|
|
|
|
# in this instance, which includes of nouns and names.
|
2023-04-30 18:35:36 -04:00
|
|
|
for entity in dictEntities.keys():
|
|
|
|
if dictEntities[entity] == "PERSON" or dictEntities[entity] == "LOC" or dictEntities[entity] == "ORG" or dictEntities[entity] == "GPE" or dictEntities[entity] == "NORP" or dictEntities[entity] == "EVENT" or dictEntities[entity] == "WORK_OF_ART":
|
2023-03-31 15:01:49 -04:00
|
|
|
# key_template variable is the elements we wrap around found instances.
|
2023-04-30 18:35:36 -04:00
|
|
|
key_template = "<ent type='" + dictEntities[entity] + "'>" + entity + "</ent>"
|
2023-03-31 15:01:49 -04:00
|
|
|
# loops through wrappedText until all entities are wrapped.
|
2023-03-31 13:41:41 -04:00
|
|
|
wrappedText = wrappedText.replace(entity, key_template)
|
2023-03-31 15:01:49 -04:00
|
|
|
# Saves newly wrapped elements and then writes it into new file.
|
2023-03-31 13:41:41 -04:00
|
|
|
with open(fileDir, 'w', encoding='utf8') as f:
|
|
|
|
f.write(wrappedText)
|
|
|
|
print("WRAPPING " + entity)
|
2023-04-30 18:35:36 -04:00
|
|
|
checkTags(file)
|
|
|
|
# ebb: Added above line to send the tagged file to the checkTags() function for cleaning.
|
2023-03-29 14:22:05 -04:00
|
|
|
|
2023-04-30 18:35:36 -04:00
|
|
|
# This part of the code is a WIP.
|
|
|
|
# ebb: I just activated it, and it works! (Nice job.) I altered it just a bit. May need more regexes to match.
|
2023-03-31 15:01:49 -04:00
|
|
|
## It tries to find weird or invalid elements/tags and fix them.
|
|
|
|
def checkTags(file):
|
|
|
|
content = []
|
|
|
|
fileDir = os.path.join(outputPath, file)
|
|
|
|
|
|
|
|
with open(fileDir, 'r', encoding='utf8') as inFile:
|
|
|
|
for line in inFile:
|
|
|
|
content.append(line)
|
|
|
|
# With the contents copied, a loop will go through the array and write it all in a new file in output folder.
|
|
|
|
with open(fileDir, 'w', encoding='utf8') as f:
|
|
|
|
for line in content:
|
2023-04-30 18:35:36 -04:00
|
|
|
# match = regex.search(r"(<ent type='.+?'>[^<>]*?)<ent[^>]+?>([^<>]+?)</ent>([^<>]*?</ent>)", line)
|
|
|
|
# if match:
|
|
|
|
# print("broken line found, fixing...")
|
|
|
|
# ebb: NOTE: IF this function only processes a line when there's a regex match, we'd have a serious problem:
|
|
|
|
# we'd not output the rest of the file--only the cleaned matches. So the output files would be mostly empty!
|
|
|
|
# Better to just string-clean every line using regex.sub(). Where there's no regex match, no substitution will happen.
|
|
|
|
origLine = line
|
|
|
|
# newLine = regex.sub(r"(<ent type='.+?'>[^<>]*?)<ent[^>]+?>([^<>]+?)</ent>([^<>]*?</ent>)", r"\1\2\3",line)
|
|
|
|
# <spe<ent type='ORG'>cia</ent>l>
|
|
|
|
newLine = regex.sub(r"(</?spe)<ent type='ORG'>(cia)</ent>(l>)", r"\1\2\3", origLine)
|
|
|
|
newLine = regex.sub(r"([^>])(New\s+?York\s+?Times)([^<])", r"\1<ent type='ORG'>\2</ent>\3", newLine)
|
|
|
|
newLine = regex.sub(r"([^>])(British)([^<])", r"\1<ent type='NORP'>\2</ent>\3", newLine)
|
|
|
|
newLine = regex.sub(r"([^>])(New\s+?York(\s+?City)?)([^<])", r"\1<ent type='GPE'>\2</ent>\4", newLine)
|
|
|
|
newLine = regex.sub(r"<ent type='[A-z]+?'>(Sundays?)</ent>", r"\1", newLine)
|
|
|
|
newLine = regex.sub(r"<ent type='PERSON'>(North)</ent>([a-z]*\s+[A-Z][a-z]+)*", r"<ent type='LOC'>\1\2</ent>", newLine)
|
|
|
|
newLine = regex.sub(r"(North\s+o?f?\s*([A-Z][a-z]+\s+)+)", r"<ent type='LOC'>\1</ent>", newLine)
|
|
|
|
|
|
|
|
# newLine = regex.sub(r"(<)<ent type='ORG'>(di)</ent>(v>)", r"\1\2\3", newLine)
|
|
|
|
newLine = regex.sub(r"(<ent type='[A-z]+?'>[^<]*?)<ent type='[A-z]+?'>([^<]+?)</ent>([^<]*?</ent>)", r"\1\2\3", newLine)
|
|
|
|
newLine = regex.sub(r"(<ent type='[A-z]+?'>[^<]*?)<ent type='[A-z]+?'>([^<]+?)</ent>([^<]*?</ent>)", r"\1\2\3", newLine)
|
|
|
|
newLine = regex.sub(r"(<ent type='[A-z]+?'>[^<]*?)<ent type='[A-z]+?'>([^<]+?)</ent>([^<]*?</ent>)", r"\1\2\3", newLine)
|
|
|
|
newLine = regex.sub(r"(<ent type='[A-z]+?'>[^<]*?)<ent type='[A-z]+?'>([^<]+?)</ent>([^<]*?</ent>)", r"\1\2\3", newLine)
|
|
|
|
newLine = regex.sub(r"(<ent type='[A-z]+?'>[^<]*?)<ent type='[A-z]+?'>([^<]+?)</ent>([^<]*?</ent>)", r"\1\2\3", newLine)
|
|
|
|
newLine = regex.sub(r"(<ent type='[A-z]+?'>[^<]*?)<ent type='[A-z]+?'>([^<]+?)</ent>([^<]*?</ent>)", r"\1\2\3", newLine)
|
|
|
|
# ebb: I'm repeating the above just in case of the weird event of triple or quadruple nested <ent> tags in <ent> tags.
|
|
|
|
# We saw it happen on the LOTR project and running it through multiple passes of the above line ultimately got rid of them all
|
|
|
|
# preserving only the outermost tags.
|
|
|
|
newLine = regex.sub(r"(<ent type=')<ent type='ORG'>(ORG)</ent>('>)", r"\1\2\3", newLine)
|
|
|
|
newLine = regex.sub(r"(<ent type='[A-Z]+'>)<ent type='[A-Z]+'>(\w+)</ent><ent type='[A-Z]+'>(\w+)</ent>(</ent>)", "\1\2 \3\4", newLine)
|
|
|
|
newLine = regex.sub(r"(<ent type='[A-Z]+?'>)(\w+)\s+?(<ent type='[A-Z+?]'>)(\w+)(</ent>)(\w+)(<ent type='[A-Z]+?'>)(\w+)(</ent>)(</ent>)", r"\1\2 \4 \6 \8\9", newLine)
|
|
|
|
# ebb: Problem line below: eliminates <ent type="PERSON">John Kennedy</ent>'s
|
|
|
|
# newLine = regex.sub(r"<ent type='\w+'>(\w+)</ent>('\w)", r"\1\2", newLine)
|
|
|
|
#
|
|
|
|
# <spe<ent type='ORG'>cia</ent>l>
|
|
|
|
# <<ent type='ORG'>di</ent>v>
|
|
|
|
if origLine != newLine:
|
2023-03-31 15:01:49 -04:00
|
|
|
print("broken line found, fixing...")
|
2023-04-30 18:35:36 -04:00
|
|
|
print(origLine + "\n INTO.")
|
2023-03-31 15:01:49 -04:00
|
|
|
print(newLine)
|
2023-04-30 18:35:36 -04:00
|
|
|
f.write(str(newLine))
|
|
|
|
print("File checking finished.")
|
2023-03-31 15:01:49 -04:00
|
|
|
|
2023-03-29 14:22:05 -04:00
|
|
|
for file in insideDir:
|
|
|
|
copyTextFiles(file)
|
2023-03-31 13:41:41 -04:00
|
|
|
regexFile(file)
|
2023-03-31 15:01:49 -04:00
|
|
|
#checkTags(file)
|
2023-04-30 18:35:36 -04:00
|
|
|
# ebb: You don't really want to activate checkTags here,
|
|
|
|
# because it would run over the untagged input files.
|