mirror of
https://github.com/nhammer514/textfiles-politics.git
synced 2025-01-13 08:19:32 -05:00
repairing and restoring XSLT, spot fixes to personTagger.py
This commit is contained in:
parent
204363595d
commit
946f25035e
@ -1266,11 +1266,11 @@ patterns = [
|
|||||||
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "W\.\s+?Citrine"}}]},
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "W\.\s+?Citrine"}}]},
|
||||||
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "E\.\s+?Moore"}}]},
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "E\.\s+?Moore"}}]},
|
||||||
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Lyndon\s+?Johnson"}}]},
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Lyndon\s+?Johnson"}}]},
|
||||||
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "John\s+?Kennedy"}}]},
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "John(\s*F\.?)\s*Kennedy"}}]},
|
||||||
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Zia\s+?ul-Haque"}}]},
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Zia\s+?ul-Haque"}}]},
|
||||||
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Ronald\s+?Payne"}}]},
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Ronald\s*Payne"}}]},
|
||||||
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "R\.\s+?Muldoon"}}]},
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "R\.\s*Muldoon"}}]},
|
||||||
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "F\.\s+?Orr"}}]},
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "F\.\s*Orr"}}]},
|
||||||
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Frank\s+?H\.\s+?Schwable"}}]},
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Frank\s+?H\.\s+?Schwable"}}]},
|
||||||
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "R\.\s+?F\.\s+?Doyle"}}]},
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "R\.\s+?F\.\s+?Doyle"}}]},
|
||||||
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "David\s+?Munson"}}]},
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "David\s+?Munson"}}]},
|
||||||
@ -1382,7 +1382,6 @@ patterns = [
|
|||||||
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Frank\s+?H\.\s+?Schwable"}}]},
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Frank\s+?H\.\s+?Schwable"}}]},
|
||||||
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Billy\s+?Goodman"}}]},
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Billy\s+?Goodman"}}]},
|
||||||
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Leonard\s+?Pullin"}}]},
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Leonard\s+?Pullin"}}]},
|
||||||
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "John\s+?F\.\s+?Kennedy"}}]},
|
|
||||||
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "R\.\s+?J\.\s+?Biggar"}}]},
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "R\.\s+?J\.\s+?Biggar"}}]},
|
||||||
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "David\s+?R\.\s+?Hunter"}}]},
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "David\s+?R\.\s+?Hunter"}}]},
|
||||||
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Hugh\s+?Everett,\s+?III"}}]},
|
{"label": "PERSON", "pattern": [{"TEXT": {"REGEX": "Hugh\s+?Everett,\s+?III"}}]},
|
||||||
@ -1523,7 +1522,7 @@ patterns = [
|
|||||||
ruler.add_patterns(patterns)
|
ruler.add_patterns(patterns)
|
||||||
|
|
||||||
workingDir = os.getcwd()
|
workingDir = os.getcwd()
|
||||||
CollPath = os.path.join(workingDir, '../regexConspTest')
|
CollPath = os.path.join(workingDir, '../pre-src-xml')
|
||||||
outputPath = os.path.join(workingDir, 'personTestingOutput/')
|
outputPath = os.path.join(workingDir, 'personTestingOutput/')
|
||||||
# Everything in original conspiracy directory.
|
# Everything in original conspiracy directory.
|
||||||
insideDir = os.listdir(CollPath)
|
insideDir = os.listdir(CollPath)
|
||||||
@ -1636,7 +1635,8 @@ def checkTags(file):
|
|||||||
newLine = regex.sub(r"(<ent type=')<ent type='ORG'>(ORG)</ent>('>)", r"\1\2\3", newLine)
|
newLine = regex.sub(r"(<ent type=')<ent type='ORG'>(ORG)</ent>('>)", r"\1\2\3", newLine)
|
||||||
newLine = regex.sub(r"(<ent type='[A-Z]+'>)<ent type='[A-Z]+'>(\w+)</ent><ent type='[A-Z]+'>(\w+)</ent>(</ent>)", "\1\2 \3\4", newLine)
|
newLine = regex.sub(r"(<ent type='[A-Z]+'>)<ent type='[A-Z]+'>(\w+)</ent><ent type='[A-Z]+'>(\w+)</ent>(</ent>)", "\1\2 \3\4", newLine)
|
||||||
newLine = regex.sub(r"(<ent type='[A-Z]+?'>)(\w+)\s+?(<ent type='[A-Z+?]'>)(\w+)(</ent>)(\w+)(<ent type='[A-Z]+?'>)(\w+)(</ent>)(</ent>)", r"\1\2 \4 \6 \8\9", newLine)
|
newLine = regex.sub(r"(<ent type='[A-Z]+?'>)(\w+)\s+?(<ent type='[A-Z+?]'>)(\w+)(</ent>)(\w+)(<ent type='[A-Z]+?'>)(\w+)(</ent>)(</ent>)", r"\1\2 \4 \6 \8\9", newLine)
|
||||||
newLine = regex.sub(r"<ent type='\w+'>(\w+)</ent>('\w)", r"\1\2", newLine)
|
# ebb: Problem line below: eliminates <ent type="PERSON">John Kennedy</ent>'s
|
||||||
|
# newLine = regex.sub(r"<ent type='\w+'>(\w+)</ent>('\w)", r"\1\2", newLine)
|
||||||
#
|
#
|
||||||
# <spe<ent type='ORG'>cia</ent>l>
|
# <spe<ent type='ORG'>cia</ent>l>
|
||||||
# <<ent type='ORG'>di</ent>v>
|
# <<ent type='ORG'>di</ent>v>
|
||||||
|
93
xslt/htmlConverter-complete.xsl
Normal file
93
xslt/htmlConverter-complete.xsl
Normal file
@ -0,0 +1,93 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
||||||
|
xmlns:xs="http://www.w3.org/2001/XMLSchema"
|
||||||
|
xmlns:math="http://www.w3.org/2005/xpath-functions/math"
|
||||||
|
xmlns="http://www.w3.org/1999/xhtml"
|
||||||
|
exclude-result-prefixes="xs math"
|
||||||
|
version="3.0">
|
||||||
|
|
||||||
|
<xsl:output method="xhtml" html-version="5" omit-xml-declaration="yes" include-content-type="no" indent="yes"/>
|
||||||
|
|
||||||
|
<!-- 2023-04-27 ebb: This is XSLT Stage 2: Outputting HTML for the individual collection files -->
|
||||||
|
<xsl:variable name="conspiracy" as="document-node()+" select="collection('../src-xml')"/>
|
||||||
|
|
||||||
|
|
||||||
|
<xsl:template match="/">
|
||||||
|
<xsl:for-each select="$conspiracy">
|
||||||
|
<xsl:variable name="filename" as="xs:string" select="current() ! base-uri() ! tokenize(., '/')[last()] ! substring-before(., '.xml')"/>
|
||||||
|
<xsl:result-document method="xml" indent="yes" href="../docs/collection/{$filename}.html">
|
||||||
|
= <html>
|
||||||
|
<head>
|
||||||
|
<title><xsl:value-of select="$filename"/></title>
|
||||||
|
<link rel="stylesheet" href="../CSSstyle.css"/>
|
||||||
|
<!--Fill in your link line for CSS and JS in the XSLT here! -->
|
||||||
|
<xsl:comment>Fill in your link line for CSS and JS in the XSLT here! </xsl:comment>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1 id="title-index"><xsl:value-of select="$filename"/></h1>
|
||||||
|
<nav id="menu">
|
||||||
|
<a href="../index.html">
|
||||||
|
<div class="button">Home</div>
|
||||||
|
</a>
|
||||||
|
<a href="../fulltext.html">
|
||||||
|
<div class="button">Fulltext</div>
|
||||||
|
</a>
|
||||||
|
<a href="../analysis.html">
|
||||||
|
<div class="button">Analysis</div>
|
||||||
|
</a>
|
||||||
|
<a href="../gallery.html">
|
||||||
|
<div class="button">Gallery</div>
|
||||||
|
</a>
|
||||||
|
<a href="../methods.html">
|
||||||
|
<div class="button">Methods</div>
|
||||||
|
</a>
|
||||||
|
<a href="../about.html">
|
||||||
|
<div class="button">About</div>
|
||||||
|
</a>
|
||||||
|
<a href="../GitHub.html">
|
||||||
|
<div class="button">GitHub <img alt="github icon"
|
||||||
|
src="https://logos-download.com/wp-content/uploads/2016/09/GitHub_logo.png"
|
||||||
|
width="15"/>
|
||||||
|
</div>
|
||||||
|
</a>
|
||||||
|
</nav>
|
||||||
|
|
||||||
|
<xsl:apply-templates/>
|
||||||
|
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
</xsl:result-document>
|
||||||
|
</xsl:for-each>
|
||||||
|
|
||||||
|
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
<xsl:template match="p">
|
||||||
|
<p>
|
||||||
|
<xsl:apply-templates/>
|
||||||
|
</p>
|
||||||
|
</xsl:template>
|
||||||
|
<!-- ebb: adding mouseover tooltip via title attribute-->
|
||||||
|
<xsl:template match="ent">
|
||||||
|
<span class="{@type}" title="{@type}">
|
||||||
|
<xsl:apply-templates/>
|
||||||
|
</span>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
<!--ebb: What about the special and info XML tags? -->
|
||||||
|
|
||||||
|
<xsl:template match="special">
|
||||||
|
<span class="special">
|
||||||
|
<xsl:apply-templates/>
|
||||||
|
</span>
|
||||||
|
</xsl:template>
|
||||||
|
<xsl:template match="info">
|
||||||
|
<span class="info" title="{@type}">
|
||||||
|
<xsl:apply-templates/>
|
||||||
|
</span>
|
||||||
|
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</xsl:stylesheet>
|
@ -21,7 +21,7 @@
|
|||||||
|
|
||||||
<xsl:for-each select="$conspiracy">
|
<xsl:for-each select="$conspiracy">
|
||||||
<xsl:variable name="filename" as="xs:string" select="current() ! base-uri() ! tokenize(., '/')[last()]"/>
|
<xsl:variable name="filename" as="xs:string" select="current() ! base-uri() ! tokenize(., '/')[last()]"/>
|
||||||
<xsl:result-document method="xml" indent="yes" href="../src-xml/{$filename}">
|
<xsl:result-document method="xml" indent="yes" href="../pre-src-xml/{$filename}">
|
||||||
<!-- ebb: NEED TO LOOK UP HOW TO SET UP INDIVIDUAL RESULT DOCUMENTS output to folder -->
|
<!-- ebb: NEED TO LOOK UP HOW TO SET UP INDIVIDUAL RESULT DOCUMENTS output to folder -->
|
||||||
<xsl:choose>
|
<xsl:choose>
|
||||||
<xsl:when test="count(descendant::p) gt 1">
|
<xsl:when test="count(descendant::p) gt 1">
|
||||||
@ -57,19 +57,4 @@
|
|||||||
</xsl:analyze-string>
|
</xsl:analyze-string>
|
||||||
</div>
|
</div>
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
|
|
||||||
<xsl:template match="info">
|
|
||||||
<info type="{@type}">
|
|
||||||
<xsl:apply-templates/>
|
|
||||||
</info>
|
|
||||||
</xsl:template>
|
|
||||||
<xsl:template match="special">
|
|
||||||
<info type="{@type}">
|
|
||||||
<xsl:apply-templates/>
|
|
||||||
</info>
|
|
||||||
</xsl:template>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
</xsl:stylesheet>
|
</xsl:stylesheet>
|
Loading…
Reference in New Issue
Block a user