moved to datasets as CSVs

Changed from data held in excelfiles to data held in CSV files. This gives us a better view of what's changed in the datasets when we push them to git.
2025-12-17 09:14:10 -05:00 · 2022-08-25 09:50:52 -04:00 · 2022-08-25 09:50:52 -04:00 · 1bc8d88b63
commit 1bc8d88b63
parent cae9cbc55e
99 changed files with 15137 additions and 14858 deletions
--- a/CODE/compare_DISARM_excel_versions.ipynb
+++ b/CODE/compare_DISARM_excel_versions.ipynb
@ -11,9 +11,36 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "<ipython-input-5-7e799212f01e>:9: FutureWarning: Passing a negative integer is deprecated in version 1.0 and will not be supported in future version. Instead, use None to not limit the column width.\n",
+      "  pd.set_option('display.max_colwidth', -1)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "table README is the same\n",
+      "table examples is the same\n",
+      "table incidents is the same\n",
+      "table incidenttechniques is the same\n",
+      "table narratives is the same\n",
+      "table externalgroups is the same\n",
+      "table externalgroupcounters is the same\n",
+      "table suggested_externalgroups is the same\n",
+      "table removed_externalgroups is the same\n",
+      "table forums is the same\n",
+      "table tools is the same\n",
+      "table removed_tools is the same\n"
+     ]
+    }
+   ],
   "source": [
    "import pandas as pd\n",
    "import sqlite3 as sql\n",
@ -25,8 +52,8 @@
    "pd.set_option('display.max_rows', 1000)\n",
    "pd.set_option('display.max_colwidth', -1)\n",
    "\n",
-    "newfile = '../DISARM_MASTER_DATA/DISARM_FRAMEWORKS_MASTER.xlsx'\n",
-    "oldfile = '../DISARM_MASTER_DATA/DISARM_FRAMEWORKS_MASTER_previous_version.xlsx'\n",
+    "newfile = '../DISARM_MASTER_DATA/DISARM_DATA_MASTER.xlsx'\n",
+    "oldfile = '../../DISARMframeworks_2022_08_10/DISARM_MASTER_DATA/DISARM_DATA_MASTER.xlsx'\n",
    "\n",
    "# Load dfs from file\n",
    "newdfs = {}\n",
@ -65,7 +92,68 @@
    "\n",
    "for table in newdfs.keys():\n",
    "    if newdfs[table].equals(olddfs[table]) == False:\n",
-    "        investigate_table(table)"
+    "        investigate_table(table)\n",
+    "    else:\n",
+    "        print('table {} is the same'.format(table))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>disarm_id</th>\n",
+       "      <th>new</th>\n",
+       "      <th>old</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "Empty DataFrame\n",
+       "Columns: [disarm_id, new, old]\n",
+       "Index: []"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Look at individual table differences\n",
+    "table = 'countermeasures'\n",
+    "column = 'summary'\n",
+    "coldiffs = newdfs[table][column] != olddfs[table][column]\n",
+    "diffcols = pd.DataFrame()\n",
+    "diffcols['disarm_id'] = newdfs[table][coldiffs]['disarm_id']\n",
+    "diffcols['new'] = newdfs[table][coldiffs][column]\n",
+    "diffcols['old'] = olddfs[table][coldiffs][column]\n",
+    "diffcols[diffcols['old'] != '']"
   ]
  },
  {
@ -73,17 +161,7 @@
   "execution_count": null,
   "metadata": {},
   "outputs": [],
-   "source": [
-    "# Look at individual table differences\n",
-    "table = 'countermeasures'\n",
-    "column = 'summary'\n",
-    "coldiffs = newdfs[table][column] != olddfs[table][column]\n",
-    "diffcols = pd.DataFrame()\n",
-    "diffcols['amitt_id'] = newdfs[table][coldiffs]['amitt_id']\n",
-    "diffcols['new'] = newdfs[table][coldiffs][column]\n",
-    "diffcols['old'] = olddfs[table][coldiffs][column]\n",
-    "diffcols[diffcols['old'] != '']"
-   ]
+   "source": []
  }
 ],
 "metadata": {