From 946f25035e76bd97c1deb22fd5aec98312d07014 Mon Sep 17 00:00:00 2001 From: ebeshero Date: Fri, 28 Apr 2023 01:26:51 -0400 Subject: [PATCH] repairing and restoring XSLT, spot fixes to personTagger.py --- pythonCode/personTagger.py | 14 ++--- xslt/htmlConverter-complete.xsl | 93 +++++++++++++++++++++++++++++++++ xslt/xml-paragraph-fixer.xsl | 17 +----- 3 files changed, 101 insertions(+), 23 deletions(-) create mode 100644 xslt/htmlConverter-complete.xsl diff --git a/pythonCode/personTagger.py b/pythonCode/personTagger.py index ff5a25f..e6cf326 100644 --- a/pythonCode/personTagger.py +++ b/pythonCode/personTagger.py @@ -1266,11 +1266,11 @@ patterns = [ {"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "W\.\s+?Citrine"}}]}, {"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "E\.\s+?Moore"}}]}, {"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Lyndon\s+?Johnson"}}]}, - {"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "John\s+?Kennedy"}}]}, + {"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "John(\s*F\.?)\s*Kennedy"}}]}, {"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Zia\s+?ul-Haque"}}]}, - {"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Ronald\s+?Payne"}}]}, - {"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "R\.\s+?Muldoon"}}]}, - {"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "F\.\s+?Orr"}}]}, + {"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Ronald\s*Payne"}}]}, + {"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "R\.\s*Muldoon"}}]}, + {"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "F\.\s*Orr"}}]}, {"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Frank\s+?H\.\s+?Schwable"}}]}, {"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "R\.\s+?F\.\s+?Doyle"}}]}, {"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "David\s+?Munson"}}]}, @@ -1382,7 +1382,6 @@ patterns = [ {"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Frank\s+?H\.\s+?Schwable"}}]}, {"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Billy\s+?Goodman"}}]}, {"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Leonard\s+?Pullin"}}]}, - {"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "John\s+?F\.\s+?Kennedy"}}]}, {"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "R\.\s+?J\.\s+?Biggar"}}]}, {"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "David\s+?R\.\s+?Hunter"}}]}, {"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Hugh\s+?Everett,\s+?III"}}]}, @@ -1523,7 +1522,7 @@ patterns = [ ruler.add_patterns(patterns) workingDir = os.getcwd() -CollPath = os.path.join(workingDir, '../regexConspTest') +CollPath = os.path.join(workingDir, '../pre-src-xml') outputPath = os.path.join(workingDir, 'personTestingOutput/') # Everything in original conspiracy directory. insideDir = os.listdir(CollPath) @@ -1636,7 +1635,8 @@ def checkTags(file): newLine = regex.sub(r"((ORG)('>)", r"\1\2\3", newLine) newLine = regex.sub(r"()(\w+)(\w+)()", "\1\2 \3\4", newLine) newLine = regex.sub(r"()(\w+)\s+?()(\w+)()(\w+)()(\w+)()()", r"\1\2 \4 \6 \8\9", newLine) - newLine = regex.sub(r"(\w+)('\w)", r"\1\2", newLine) + # ebb: Problem line below: eliminates John Kennedy's + # newLine = regex.sub(r"(\w+)('\w)", r"\1\2", newLine) # # cial> # <div> diff --git a/xslt/htmlConverter-complete.xsl b/xslt/htmlConverter-complete.xsl new file mode 100644 index 0000000..f8b60ba --- /dev/null +++ b/xslt/htmlConverter-complete.xsl @@ -0,0 +1,93 @@ + + + + + + + + + + + + + + = + + <xsl:value-of select="$filename"/> + + + Fill in your link line for CSS and JS in the XSLT here! + + +

+ + + + + + +
+
+ + +
+ + +

+ +

+
+ + + + + + + + + + + + + + + + + + + + + + + +
\ No newline at end of file diff --git a/xslt/xml-paragraph-fixer.xsl b/xslt/xml-paragraph-fixer.xsl index 810713e..12b9282 100644 --- a/xslt/xml-paragraph-fixer.xsl +++ b/xslt/xml-paragraph-fixer.xsl @@ -21,7 +21,7 @@ - + @@ -57,19 +57,4 @@ - - - - - - - - - - - - - - - \ No newline at end of file