mirror of
https://github.com/nhammer514/textfiles-politics.git
synced 2025-05-07 00:56:12 -04:00
modified some code and new markdown file with notes
This commit is contained in:
parent
b5373e4b4b
commit
21283b1ebb
2 changed files with 8 additions and 1 deletions
|
@ -2,7 +2,7 @@ import spacy
|
||||||
from collections import Counter
|
from collections import Counter
|
||||||
import re as regex
|
import re as regex
|
||||||
import os
|
import os
|
||||||
# Uncomment this line if you need the language model.
|
# nlp = spacy.cli.download("en_core_web_lg")
|
||||||
# If you already have it, comment it ou.
|
# If you already have it, comment it ou.
|
||||||
# Let's try the different spaCy language models for this. We can compare _lg with _md or _sm
|
# Let's try the different spaCy language models for this. We can compare _lg with _md or _sm
|
||||||
workingDir = os.getcwd()
|
workingDir = os.getcwd()
|
||||||
|
@ -33,6 +33,7 @@ def entitycollector(tokens):
|
||||||
with open("outputNames.txt", 'a') as f:
|
with open("outputNames.txt", 'a') as f:
|
||||||
f.write("\n" + entity.text)
|
f.write("\n" + entity.text)
|
||||||
print("Writing in outputNames.txt: " + entity.text)
|
print("Writing in outputNames.txt: " + entity.text)
|
||||||
|
## Below includes entity values and stuf
|
||||||
# print(entity.text, entity.label_, spacy.explain(entity.label_))
|
# print(entity.text, entity.label_, spacy.explain(entity.label_))
|
||||||
entities.append(entity.text)
|
entities.append(entity.text)
|
||||||
return entities
|
return entities
|
||||||
|
|
6
pythonCode/read.md
Normal file
6
pythonCode/read.md
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
# PythonCode
|
||||||
|
## Notes
|
||||||
|
- You will need to uncomment the ```nlp = spacy.cli.download("en_core_web_lg")``` to download the language model stuff. u can uncomment it once it downloads.
|
||||||
|
- The ```re``` library includes our Regex functions. It is called using ```regex```. It uses standard regular expression stuff.
|
||||||
|
- Everytime ```main.py``` launches, ```outputNames.txt``` clears. It will need to go through the entirety of our files, which still has to be done. Will all of the files work???
|
||||||
|
- We will need to modify code so that it can produce new ```.xml``` files. Probably best to output files in new directory or something once we get started on that.
|
Loading…
Add table
Add a link
Reference in a new issue