{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Check new and old spreadsheets of data" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "import pandas as pd\n", "\n", "# make sure we didn't miss anything in mike's original sheet\n", "def check_diffs(col1, col2):\n", " xx = col1.value_counts().index\n", " yy = col2.value_counts().index\n", " print('lengths are: {} and {}'.format(len(xx), len(yy)))\n", " print('first list: {}'.format(xx))\n", " print('second list: {}'.format(yy))\n", " print('set difference: {}'.format(set(xx).difference(set(yy))))\n", "\n", "def check_ids(df, col='DISARM ID'):\n", " print('null ids: {}'.format(df[df[col].isnull()]))\n", " vc = df[col].value_counts().reset_index()\n", " print('repeated ids: {}'.format(vc[vc[col]>1]))\n", " return\n", "\n", "def check_descriptions(df1, df2):\n", " merge = df1.merge(df2, left_on='DISARM ID', right_on='disarm_id')\n", " return(merge[merge['summary'] != merge['Description']])\n", "\n", "df = pd.read_csv('DISARM Merged (Internal) - DISARM Framework Merged.csv')\n", "df" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "phases = df[['DISARM Phase', 'DISARM Tactic', 'Description', 'DISARM ID']]\n", "phases = phases[phases['DISARM Tactic'].isnull()]\n", "phases" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "check_diffs(phases['DISARM Phase'], df['DISARM Phase'])\n", "check_ids(phases)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tactics = df[['DISARM Tactic', 'DISARM Technique', 'Description', 'DISARM ID']]#['DISARM Tactic'].notnull()\n", "tactics = tactics[(tactics['DISARM Tactic'].notnull()) & (tactics['DISARM Technique'].isnull())]\n", "tactics" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "check_diffs(tactics['DISARM Tactic'], df['DISARM Tactic'])\n", "check_ids(tactics)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "techs = df[['DISARM Technique', 'DISARM Subtechnique', 'Description', 'DISARM ID']]\n", "techs = techs[(techs['DISARM Technique'].notnull()) & (techs['DISARM Subtechnique'].isnull())]\n", "techs" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "check_diffs(techs['DISARM Technique'], df['DISARM Technique'])\n", "check_ids(techs)\n", "#techs.to_csv('temp_techs.csv', index=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "subs = df[['DISARM Subtechnique', 'DISARM Technique', 'Description', 'DISARM ID']]\n", "subs = subs[subs['DISARM Subtechnique'].notnull()]\n", "subs" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "sx = subs.merge(techs, on='DISARM Technique')\n", "sx.to_csv('tmp_subs.csv', index=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "check_diffs(subs['DISARM Subtechnique'], df['DISARM Subtechnique'])\n", "check_ids(subs)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# check against originals" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "MASTERDATA_DIR = '../DISARM_MASTER_DATA/'\n", "frameworkfile = MASTERDATA_DIR + 'DISARM_FRAMEWORKS_MASTER.xlsx'\n", "oldframefile = MASTERDATA_DIR + '2022-06-30_DISARM_0_1/DISARM_FRAMEWORKS_MASTER_0_1.xlsx'\n", "\n", "def create_excel_metadata(frameworkfile):\n", " metadata = {}\n", " xlsx = pd.ExcelFile(frameworkfile)\n", " for sheetname in xlsx.sheet_names:\n", " metadata[sheetname] = xlsx.parse(sheetname)\n", " metadata[sheetname].fillna('', inplace=True)\n", " return metadata\n", "\n", "metadata = create_excel_metadata(frameworkfile)\n", "oldmeta = create_excel_metadata(oldframefile)\n", "print('Current keys: {}'.format(metadata.keys()))\n", "print('Old keys: {}'.format(oldmeta.keys()))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "metadata['phases']" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "phases" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# do union of both tables, to get new spreadsheet table\n", "check_descriptions(phases, metadata['phases'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "check_diffs(metadata['phases']['name'], phases['DISARM Phase'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "check_descriptions(tactics, metadata['tactics'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "check_diffs(metadata['tactics']['name'], tactics['DISARM Tactic'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "check_diffs(metadata['techniques']['name'], techs['DISARM Technique'])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Check against last version of framework" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def check_version_descriptions(df1, df2):\n", " merge = df1.merge(df2, on='disarm_id')\n", " merge['name_changed'] = merge['name_x'] != merge['name_y']\n", " merge['summary_changed'] = merge['summary_x'] != merge['summary_y']\n", " return merge[(merge['name_changed']) | (merge['summary_changed'])][['disarm_id', 'name_changed', 'summary_changed', 'name_x', \n", " 'name_y', 'summary_x', 'summary_y']]\n", "\n", "check_version_descriptions(metadata['phases'], oldmeta['phases'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "check_version_descriptions(metadata['tactics'], oldmeta['tactics'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "check_version_descriptions(metadata['techniques'], oldmeta['techniques'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.3" } }, "nbformat": 4, "nbformat_minor": 4 }