mirror of
https://github.com/DISARMFoundation/DISARMframeworks.git
synced 2025-12-17 09:14:10 -05:00
moved to datasets as CSVs
Changed from data held in excelfiles to data held in CSV files. This gives us a better view of what's changed in the datasets when we push them to git.
This commit is contained in:
parent
cae9cbc55e
commit
1bc8d88b63
99 changed files with 15137 additions and 14858 deletions
|
|
@ -11,9 +11,36 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<ipython-input-5-7e799212f01e>:9: FutureWarning: Passing a negative integer is deprecated in version 1.0 and will not be supported in future version. Instead, use None to not limit the column width.\n",
|
||||
" pd.set_option('display.max_colwidth', -1)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"table README is the same\n",
|
||||
"table examples is the same\n",
|
||||
"table incidents is the same\n",
|
||||
"table incidenttechniques is the same\n",
|
||||
"table narratives is the same\n",
|
||||
"table externalgroups is the same\n",
|
||||
"table externalgroupcounters is the same\n",
|
||||
"table suggested_externalgroups is the same\n",
|
||||
"table removed_externalgroups is the same\n",
|
||||
"table forums is the same\n",
|
||||
"table tools is the same\n",
|
||||
"table removed_tools is the same\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import sqlite3 as sql\n",
|
||||
|
|
@ -25,8 +52,8 @@
|
|||
"pd.set_option('display.max_rows', 1000)\n",
|
||||
"pd.set_option('display.max_colwidth', -1)\n",
|
||||
"\n",
|
||||
"newfile = '../DISARM_MASTER_DATA/DISARM_FRAMEWORKS_MASTER.xlsx'\n",
|
||||
"oldfile = '../DISARM_MASTER_DATA/DISARM_FRAMEWORKS_MASTER_previous_version.xlsx'\n",
|
||||
"newfile = '../DISARM_MASTER_DATA/DISARM_DATA_MASTER.xlsx'\n",
|
||||
"oldfile = '../../DISARMframeworks_2022_08_10/DISARM_MASTER_DATA/DISARM_DATA_MASTER.xlsx'\n",
|
||||
"\n",
|
||||
"# Load dfs from file\n",
|
||||
"newdfs = {}\n",
|
||||
|
|
@ -65,7 +92,68 @@
|
|||
"\n",
|
||||
"for table in newdfs.keys():\n",
|
||||
" if newdfs[table].equals(olddfs[table]) == False:\n",
|
||||
" investigate_table(table)"
|
||||
" investigate_table(table)\n",
|
||||
" else:\n",
|
||||
" print('table {} is the same'.format(table))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>disarm_id</th>\n",
|
||||
" <th>new</th>\n",
|
||||
" <th>old</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
"Empty DataFrame\n",
|
||||
"Columns: [disarm_id, new, old]\n",
|
||||
"Index: []"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Look at individual table differences\n",
|
||||
"table = 'countermeasures'\n",
|
||||
"column = 'summary'\n",
|
||||
"coldiffs = newdfs[table][column] != olddfs[table][column]\n",
|
||||
"diffcols = pd.DataFrame()\n",
|
||||
"diffcols['disarm_id'] = newdfs[table][coldiffs]['disarm_id']\n",
|
||||
"diffcols['new'] = newdfs[table][coldiffs][column]\n",
|
||||
"diffcols['old'] = olddfs[table][coldiffs][column]\n",
|
||||
"diffcols[diffcols['old'] != '']"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -73,17 +161,7 @@
|
|||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Look at individual table differences\n",
|
||||
"table = 'countermeasures'\n",
|
||||
"column = 'summary'\n",
|
||||
"coldiffs = newdfs[table][column] != olddfs[table][column]\n",
|
||||
"diffcols = pd.DataFrame()\n",
|
||||
"diffcols['amitt_id'] = newdfs[table][coldiffs]['amitt_id']\n",
|
||||
"diffcols['new'] = newdfs[table][coldiffs][column]\n",
|
||||
"diffcols['old'] = olddfs[table][coldiffs][column]\n",
|
||||
"diffcols[diffcols['old'] != '']"
|
||||
]
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue