DISARMframeworks/CODE/check_new_table_updates.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Check new and old spreadsheets of data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "# make sure we didn't miss anything in mike's original sheet\n",
    "def check_diffs(col1, col2):\n",
    "    xx = col1.value_counts().index\n",
    "    yy = col2.value_counts().index\n",
    "    print('lengths are: {} and {}'.format(len(xx), len(yy)))\n",
    "    print('first list: {}'.format(xx))\n",
    "    print('second list: {}'.format(yy))\n",
    "    print('set difference: {}'.format(set(xx).difference(set(yy))))\n",
    "\n",
    "def check_ids(df, col='DISARM ID'):\n",
    "    print('null ids: {}'.format(df[df[col].isnull()]))\n",
    "    vc = df[col].value_counts().reset_index()\n",
    "    print('repeated ids: {}'.format(vc[vc[col]>1]))\n",
    "    return\n",
    "\n",
    "def check_descriptions(df1, df2):\n",
    "    merge = df1.merge(df2, left_on='DISARM ID', right_on='disarm_id')\n",
    "    return(merge[merge['summary'] != merge['Description']])\n",
    "\n",
    "df = pd.read_csv('DISARM Merged (Internal) - DISARM Framework Merged.csv')\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "phases = df[['DISARM Phase', 'DISARM Tactic', 'Description', 'DISARM ID']]\n",
    "phases = phases[phases['DISARM Tactic'].isnull()]\n",
    "phases"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "check_diffs(phases['DISARM Phase'], df['DISARM Phase'])\n",
    "check_ids(phases)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tactics = df[['DISARM Tactic', 'DISARM Technique', 'Description', 'DISARM ID']]#['DISARM Tactic'].notnull()\n",
    "tactics = tactics[(tactics['DISARM Tactic'].notnull()) & (tactics['DISARM Technique'].isnull())]\n",
    "tactics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "check_diffs(tactics['DISARM Tactic'], df['DISARM Tactic'])\n",
    "check_ids(tactics)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "techs = df[['DISARM Technique', 'DISARM Subtechnique', 'Description', 'DISARM ID']]\n",
    "techs = techs[(techs['DISARM Technique'].notnull()) & (techs['DISARM Subtechnique'].isnull())]\n",
    "techs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "check_diffs(techs['DISARM Technique'], df['DISARM Technique'])\n",
    "check_ids(techs)\n",
    "#techs.to_csv('temp_techs.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "subs = df[['DISARM Subtechnique', 'DISARM Technique', 'Description', 'DISARM ID']]\n",
    "subs = subs[subs['DISARM Subtechnique'].notnull()]\n",
    "subs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sx = subs.merge(techs, on='DISARM Technique')\n",
    "sx.to_csv('tmp_subs.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "check_diffs(subs['DISARM Subtechnique'], df['DISARM Subtechnique'])\n",
    "check_ids(subs)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# check against originals"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "MASTERDATA_DIR = '../DISARM_MASTER_DATA/'\n",
    "frameworkfile = MASTERDATA_DIR + 'DISARM_FRAMEWORKS_MASTER.xlsx'\n",
    "\n",
    "def create_excel_metadata(frameworkfile):\n",
    "    metadata = {}\n",
    "    xlsx = pd.ExcelFile(frameworkfile)\n",
    "    for sheetname in xlsx.sheet_names:\n",
    "        metadata[sheetname] = xlsx.parse(sheetname)\n",
    "        metadata[sheetname].fillna('', inplace=True)\n",
    "        metadata[sheetname].rename(columns={'amitt_id': 'disarm_id'}, inplace=True)\n",
    "    return metadata\n",
    "\n",
    "metadata = create_excel_metadata(frameworkfile)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "metadata['phases']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "phases"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# do union of both tables, to get new spreadsheet table\n",
    "check_descriptions(phases, metadata['phases'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "check_diffs(metadata['phases']['name'], phases['DISARM Phase'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "check_descriptions(tactics, metadata['tactics'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "check_diffs(metadata['tactics']['name'], tactics['DISARM Tactic'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "check_diffs(metadata['techniques']['name'], techs['DISARM Technique'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}