created main.py with code

2025-11-25 08:53:17 -05:00 · 2023-03-27 17:14:07 -04:00 · 2023-03-27 17:14:07 -04:00 · d18a6f5b16
commit d18a6f5b16
parent 3dfe20153d
2 changed files with 86 additions and 8 deletions
--- a/pythonCode/main.py
+++ b/pythonCode/main.py
@ -0,0 +1,80 @@
 import spacy
 from collections import Counter
 import os
 # Uncomment this line if you need the language model.
 # If you already have it, comment it ou.
 # Let's try the different spaCy language models for this. We can compare _lg with _md or _sm
 workingDir = os.getcwd()
 CollPath = os.path.join(workingDir, '../regexConsp')
 insideDir = os.listdir(CollPath)
 print(insideDir)
 nlp = spacy.load("en_core_web_lg")
 def readTextFiles(filepath):
    with open(filepath, 'r', encoding='utf8') as f:
        readFile = f.read()
        # print(readFile)
        stringFile = str(readFile)
        # lengthFile = len(readFile)
        # print(lengthFile)
        tokens = nlp(stringFile)
        # print(tokens)
        listEntities = entitycollector(tokens)
        print(listEntities)
        # cardinal_freq = Counter(listCardinals)
        # topTen = cardinal_freq.most_common(10)
        # print(topTen)
 def entitycollector(tokens):
    entities = []
    for entity in tokens.ents:
        # if entity.label_ == "CARDINAL":
        print(entity.text, entity.label_, spacy.explain(entity.label_))
        entities.append(entity.text)
    return entities
 for file in os.listdir(CollPath):
    if file.endswith(".xml"):
        filepath = f"{CollPath}/{file}"
        print(filepath)
        readTextFiles(filepath)
 # print(listCardinals)
 # cardinal_freq = Counter(listCardinals)
 # topTen = cardinal_freq.most_common(10)
 # print(topTen)
 # grimm = open('grimm.txt', 'r', encoding='utf8')
 # grimmDoc = grimm.read()
 # grimmNLP = nlp(grimmDoc)
 # grimmmSentences = grimmNLP.sents
 # def sentenceLengths(sentences):
 #     lengths = []
 #     for s in sentences:
 #         length = len(s.text)
 #         lengths.append(length)
 #     return sorted(lengths)
 # grimmLengths = sentenceLengths(grimmmSentences)
 # # print(grimmLengths)
 # maxVal = max(grimmLengths)
 # minVal = min(grimmLengths)
 # print('The shortest sentence is ' + str(minVal) + ' characters long.')
 # print('The longest sentence is ' + str(maxVal) + ' characters long.')
 # for sentence in grimmNLP.sents:
 # #    print(sentence.text)
 #     length = len(sentence.text)
 #     if length == minVal:
 #         print("The shortest sentence is: " + sentence.text)
 #     if len(sentence.text) == maxVal:
 #         print('The longest sentence is: ' + sentence.text + ' :' + str(maxVal) + 'characters')
--- a/pythonCode/test.py
+++ b/pythonCode/test.py
@ -1,8 +1,6 @@
-home = C:\Users\Nathan\AppData\Local\Programs\Python\Python311
+import os
-implementation = CPython
+
-version_info = 3.11.1.final.0
+workingDir = os.getcwd()
-virtualenv = 20.16.7
+CollPath = os.path.join(workingDir, '../regexConsp')
-include-system-site-packages = false
+insideDir = os.listdir(CollPath)
-base-prefix = C:\Users\Nathan\AppData\Local\Programs\Python\Python311
+print(insideDir)
 base-exec-prefix = C:\Users\Nathan\AppData\Local\Programs\Python\Python311
 base-executable = C:\Users\Nathan\AppData\Local\Programs\Python\Python311\python.exe