{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Check new and old spreadsheets of data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>DISARM Phase</th>\n",
       "      <th>DISARM Tactic</th>\n",
       "      <th>DISARM Technique</th>\n",
       "      <th>DISARM Subtechnique</th>\n",
       "      <th>Description</th>\n",
       "      <th>DISARM ID</th>\n",
       "      <th>Feedback from EEAS</th>\n",
       "      <th>Action to Do</th>\n",
       "      <th>Action Taken</th>\n",
       "      <th>Where did it come from?</th>\n",
       "      <th>Where from more info</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Plan</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Envision the desired outcome. Lay out effectiv...</td>\n",
       "      <td>P01</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>AMITT</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>36</td>\n",
       "      <td>Prepare</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Activities conducted before execution to impro...</td>\n",
       "      <td>P02</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>126</td>\n",
       "      <td>Execute</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Run the action, from initial exposure to wrap-...</td>\n",
       "      <td>P03</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>263</td>\n",
       "      <td>Assess</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Assess effectiveness of action, for use in fut...</td>\n",
       "      <td>P04</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>7</td>\n",
       "      <td>Plan</td>\n",
       "      <td>Plan Objectives</td>\n",
       "      <td>Dismiss</td>\n",
       "      <td>Discredit Credible Sources</td>\n",
       "      <td>Plan to delegitimize the media landscape and d...</td>\n",
       "      <td>ST0001</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>AMITT</td>\n",
       "      <td>T0067 Plan to Discredit Credible Sources</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>272</th>\n",
       "      <td>184</td>\n",
       "      <td>Execute</td>\n",
       "      <td>Maximize Exposure</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Maximize exposure of the target audience to in...</td>\n",
       "      <td>TA12</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>273</th>\n",
       "      <td>204</td>\n",
       "      <td>Execute</td>\n",
       "      <td>Drive Online Harms</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Actions taken by an influence operation to har...</td>\n",
       "      <td>TA13</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>274</th>\n",
       "      <td>221</td>\n",
       "      <td>Execute</td>\n",
       "      <td>Drive Offline Activity</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Move incident/campaign from online to offline....</td>\n",
       "      <td>TA14</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>275</th>\n",
       "      <td>235</td>\n",
       "      <td>Execute</td>\n",
       "      <td>Persist in the Information Space</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Persist in the Information Space refers to tak...</td>\n",
       "      <td>TA15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>276</th>\n",
       "      <td>264</td>\n",
       "      <td>Assess</td>\n",
       "      <td>Assess Effectiveness</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TA16</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>277 rows × 12 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     Unnamed: 0 DISARM Phase                     DISARM Tactic  \\\n",
       "0             1         Plan                               NaN   \n",
       "1            36      Prepare                               NaN   \n",
       "2           126      Execute                               NaN   \n",
       "3           263       Assess                               NaN   \n",
       "4             7         Plan                   Plan Objectives   \n",
       "..          ...          ...                               ...   \n",
       "272         184      Execute                 Maximize Exposure   \n",
       "273         204      Execute                Drive Online Harms   \n",
       "274         221      Execute            Drive Offline Activity   \n",
       "275         235      Execute  Persist in the Information Space   \n",
       "276         264       Assess              Assess Effectiveness   \n",
       "\n",
       "    DISARM Technique         DISARM Subtechnique  \\\n",
       "0                NaN                         NaN   \n",
       "1                NaN                         NaN   \n",
       "2                NaN                         NaN   \n",
       "3                NaN                         NaN   \n",
       "4            Dismiss  Discredit Credible Sources   \n",
       "..               ...                         ...   \n",
       "272              NaN                         NaN   \n",
       "273              NaN                         NaN   \n",
       "274              NaN                         NaN   \n",
       "275              NaN                         NaN   \n",
       "276              NaN                         NaN   \n",
       "\n",
       "                                           Description DISARM ID  \\\n",
       "0    Envision the desired outcome. Lay out effectiv...       P01   \n",
       "1    Activities conducted before execution to impro...       P02   \n",
       "2    Run the action, from initial exposure to wrap-...       P03   \n",
       "3    Assess effectiveness of action, for use in fut...       P04   \n",
       "4    Plan to delegitimize the media landscape and d...    ST0001   \n",
       "..                                                 ...       ...   \n",
       "272  Maximize exposure of the target audience to in...      TA12   \n",
       "273  Actions taken by an influence operation to har...      TA13   \n",
       "274  Move incident/campaign from online to offline....      TA14   \n",
       "275  Persist in the Information Space refers to tak...      TA15   \n",
       "276                                                NaN      TA16   \n",
       "\n",
       "     Feedback from EEAS  Action to Do  Action Taken Where did it come from?  \\\n",
       "0                   NaN           NaN           NaN                   AMITT   \n",
       "1                   NaN           NaN           NaN                     NaN   \n",
       "2                   NaN           NaN           NaN                     NaN   \n",
       "3                   NaN           NaN           NaN                     NaN   \n",
       "4                   NaN           NaN           NaN                   AMITT   \n",
       "..                  ...           ...           ...                     ...   \n",
       "272                 NaN           NaN           NaN                     NaN   \n",
       "273                 NaN           NaN           NaN                     NaN   \n",
       "274                 NaN           NaN           NaN                     NaN   \n",
       "275                 NaN           NaN           NaN                     NaN   \n",
       "276                 NaN           NaN           NaN                     NaN   \n",
       "\n",
       "                         Where from more info  \n",
       "0                                         NaN  \n",
       "1                                         NaN  \n",
       "2                                         NaN  \n",
       "3                                         NaN  \n",
       "4    T0067 Plan to Discredit Credible Sources  \n",
       "..                                        ...  \n",
       "272                                       NaN  \n",
       "273                                       NaN  \n",
       "274                                       NaN  \n",
       "275                                       NaN  \n",
       "276                                       NaN  \n",
       "\n",
       "[277 rows x 12 columns]"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "\n",
    "# make sure we didn't miss anything in mike's original sheet\n",
    "def check_diffs(col1, col2):\n",
    "    xx = col1.value_counts().index\n",
    "    yy = col2.value_counts().index\n",
    "    print('lengths are: {} and {}'.format(len(xx), len(yy)))\n",
    "    print('first list: {}'.format(xx))\n",
    "    print('second list: {}'.format(yy))\n",
    "    print('set difference: {}'.format(set(xx).difference(set(yy))))\n",
    "\n",
    "def check_ids(df, col='DISARM ID'):\n",
    "    print('null ids: {}'.format(df[df[col].isnull()]))\n",
    "    vc = df[col].value_counts().reset_index()\n",
    "    print('repeated ids: {}'.format(vc[vc[col]>1]))\n",
    "    return\n",
    "\n",
    "def check_descriptions(df1, df2):\n",
    "    merge = df1.merge(df2, left_on='DISARM ID', right_on='disarm_id')\n",
    "    return(merge[merge['summary'] != merge['Description']])\n",
    "\n",
    "df = pd.read_csv('DISARM Merged (Internal) - DISARM Framework Merged.csv')\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>DISARM Phase</th>\n",
       "      <th>DISARM Tactic</th>\n",
       "      <th>Description</th>\n",
       "      <th>DISARM ID</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Plan</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Envision the desired outcome. Lay out effectiv...</td>\n",
       "      <td>P01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Prepare</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Activities conducted before execution to impro...</td>\n",
       "      <td>P02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Execute</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Run the action, from initial exposure to wrap-...</td>\n",
       "      <td>P03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Assess</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Assess effectiveness of action, for use in fut...</td>\n",
       "      <td>P04</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  DISARM Phase DISARM Tactic  \\\n",
       "0         Plan           NaN   \n",
       "1      Prepare           NaN   \n",
       "2      Execute           NaN   \n",
       "3       Assess           NaN   \n",
       "\n",
       "                                         Description DISARM ID  \n",
       "0  Envision the desired outcome. Lay out effectiv...       P01  \n",
       "1  Activities conducted before execution to impro...       P02  \n",
       "2  Run the action, from initial exposure to wrap-...       P03  \n",
       "3  Assess effectiveness of action, for use in fut...       P04  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "phases = df[['DISARM Phase', 'DISARM Tactic', 'Description', 'DISARM ID']]\n",
    "phases = phases[phases['DISARM Tactic'].isnull()]\n",
    "phases"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "lengths are: 4 and 4\n",
      "first list: Index(['Assess', 'Execute', 'Plan', 'Prepare'], dtype='object')\n",
      "second list: Index(['Execute', 'Prepare', 'Plan', 'Assess'], dtype='object')\n",
      "set difference: set()\n",
      "null ids: Empty DataFrame\n",
      "Columns: [DISARM Phase, DISARM Tactic, Description, DISARM ID]\n",
      "Index: []\n",
      "repeated ids: Empty DataFrame\n",
      "Columns: [index, DISARM ID]\n",
      "Index: []\n"
     ]
    }
   ],
   "source": [
    "check_diffs(phases['DISARM Phase'], df['DISARM Phase'])\n",
    "check_ids(phases)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>DISARM Tactic</th>\n",
       "      <th>DISARM Technique</th>\n",
       "      <th>Description</th>\n",
       "      <th>DISARM ID</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>261</th>\n",
       "      <td>Plan Strategy</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Define the desired end state, i.e. the set of ...</td>\n",
       "      <td>TA01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>262</th>\n",
       "      <td>Plan Objectives</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Set clearly defined, measurable, and achievabl...</td>\n",
       "      <td>TA02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>263</th>\n",
       "      <td>Target Audience Analysis</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Identifying and analyzing the target audience ...</td>\n",
       "      <td>TA03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>264</th>\n",
       "      <td>Develop Narratives</td>\n",
       "      <td>NaN</td>\n",
       "      <td>The promotion of beneficial master narratives ...</td>\n",
       "      <td>TA04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>265</th>\n",
       "      <td>Develop Content</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TA05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>266</th>\n",
       "      <td>Establish Social Assets</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Establishing information assets generates mess...</td>\n",
       "      <td>TA06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>267</th>\n",
       "      <td>Establish Legitimacy</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TA07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>268</th>\n",
       "      <td>Microtarget</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Target very specific populations of people</td>\n",
       "      <td>TA08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>269</th>\n",
       "      <td>Select Channels and Affordances</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Selecting platforms and affordances assesses w...</td>\n",
       "      <td>TA09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>270</th>\n",
       "      <td>Conduct Pump Priming</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Release content on a targetted small scale, pr...</td>\n",
       "      <td>TA10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>271</th>\n",
       "      <td>Deliver Content</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Release content to general public or larger po...</td>\n",
       "      <td>TA11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>272</th>\n",
       "      <td>Maximize Exposure</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Maximize exposure of the target audience to in...</td>\n",
       "      <td>TA12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>273</th>\n",
       "      <td>Drive Online Harms</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Actions taken by an influence operation to har...</td>\n",
       "      <td>TA13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>274</th>\n",
       "      <td>Drive Offline Activity</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Move incident/campaign from online to offline....</td>\n",
       "      <td>TA14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>275</th>\n",
       "      <td>Persist in the Information Space</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Persist in the Information Space refers to tak...</td>\n",
       "      <td>TA15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>276</th>\n",
       "      <td>Assess Effectiveness</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TA16</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                        DISARM Tactic DISARM Technique  \\\n",
       "261                     Plan Strategy              NaN   \n",
       "262                   Plan Objectives              NaN   \n",
       "263          Target Audience Analysis              NaN   \n",
       "264                Develop Narratives              NaN   \n",
       "265                   Develop Content              NaN   \n",
       "266           Establish Social Assets              NaN   \n",
       "267              Establish Legitimacy              NaN   \n",
       "268                       Microtarget              NaN   \n",
       "269   Select Channels and Affordances              NaN   \n",
       "270              Conduct Pump Priming              NaN   \n",
       "271                   Deliver Content              NaN   \n",
       "272                 Maximize Exposure              NaN   \n",
       "273                Drive Online Harms              NaN   \n",
       "274            Drive Offline Activity              NaN   \n",
       "275  Persist in the Information Space              NaN   \n",
       "276              Assess Effectiveness              NaN   \n",
       "\n",
       "                                           Description DISARM ID  \n",
       "261  Define the desired end state, i.e. the set of ...      TA01  \n",
       "262  Set clearly defined, measurable, and achievabl...      TA02  \n",
       "263  Identifying and analyzing the target audience ...      TA03  \n",
       "264  The promotion of beneficial master narratives ...      TA04  \n",
       "265                                                NaN      TA05  \n",
       "266  Establishing information assets generates mess...      TA06  \n",
       "267                                                NaN      TA07  \n",
       "268         Target very specific populations of people      TA08  \n",
       "269  Selecting platforms and affordances assesses w...      TA09  \n",
       "270  Release content on a targetted small scale, pr...      TA10  \n",
       "271  Release content to general public or larger po...      TA11  \n",
       "272  Maximize exposure of the target audience to in...      TA12  \n",
       "273  Actions taken by an influence operation to har...      TA13  \n",
       "274  Move incident/campaign from online to offline....      TA14  \n",
       "275  Persist in the Information Space refers to tak...      TA15  \n",
       "276                                                NaN      TA16  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tactics = df[['DISARM Tactic', 'DISARM Technique', 'Description', 'DISARM ID']]#['DISARM Tactic'].notnull()\n",
    "tactics = tactics[(tactics['DISARM Tactic'].notnull()) & (tactics['DISARM Technique'].isnull())]\n",
    "tactics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "lengths are: 16 and 16\n",
      "first list: Index(['Drive Online Harms', 'Maximize Exposure', 'Conduct Pump Priming',\n",
      "       'Deliver Content', 'Persist in the Information Space',\n",
      "       'Assess Effectiveness', 'Establish Social Assets',\n",
      "       'Target Audience Analysis', 'Develop Narratives', 'Plan Strategy',\n",
      "       'Select Channels and Affordances', 'Develop Content', 'Plan Objectives',\n",
      "       'Microtarget', 'Establish Legitimacy', 'Drive Offline Activity'],\n",
      "      dtype='object')\n",
      "second list: Index(['Develop Content', 'Establish Social Assets',\n",
      "       'Select Channels and Affordances', 'Persist in the Information Space',\n",
      "       'Target Audience Analysis', 'Maximize Exposure', 'Drive Online Harms',\n",
      "       'Establish Legitimacy', 'Assess Effectiveness',\n",
      "       'Drive Offline Activity', 'Deliver Content', 'Develop Narratives',\n",
      "       'Plan Objectives', 'Conduct Pump Priming', 'Microtarget',\n",
      "       'Plan Strategy'],\n",
      "      dtype='object')\n",
      "set difference: set()\n",
      "null ids: Empty DataFrame\n",
      "Columns: [DISARM Tactic, DISARM Technique, Description, DISARM ID]\n",
      "Index: []\n",
      "repeated ids: Empty DataFrame\n",
      "Columns: [index, DISARM ID]\n",
      "Index: []\n"
     ]
    }
   ],
   "source": [
    "check_diffs(tactics['DISARM Tactic'], df['DISARM Tactic'])\n",
    "check_ids(tactics)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>DISARM Technique</th>\n",
       "      <th>DISARM Subtechnique</th>\n",
       "      <th>Description</th>\n",
       "      <th>DISARM ID</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>164</th>\n",
       "      <td>Determine Target Audiences</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>T0001</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>165</th>\n",
       "      <td>Determine Strategic Ends</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>T0002</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>166</th>\n",
       "      <td>Dismiss</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Push back against criticism by dismissing your...</td>\n",
       "      <td>T0003</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>167</th>\n",
       "      <td>Distort</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Twist the narrative. Take information, or arti...</td>\n",
       "      <td>T0004</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>168</th>\n",
       "      <td>Distract</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Shift attention to a different narrative or ac...</td>\n",
       "      <td>T0005</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>256</th>\n",
       "      <td>Exploit TOS/Content Moderation</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>T0093</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>257</th>\n",
       "      <td>Play the long game</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Play the long game refers to two phenomena: 1....</td>\n",
       "      <td>T0094</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>258</th>\n",
       "      <td>Measure Performance</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>T0095</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>259</th>\n",
       "      <td>Measure Effectiveness</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>T0096</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>260</th>\n",
       "      <td>Measure Effectiveness Indicators (or KPIs)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>T0097</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>97 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                               DISARM Technique DISARM Subtechnique  \\\n",
       "164                  Determine Target Audiences                 NaN   \n",
       "165                    Determine Strategic Ends                 NaN   \n",
       "166                                     Dismiss                 NaN   \n",
       "167                                     Distort                 NaN   \n",
       "168                                    Distract                 NaN   \n",
       "..                                          ...                 ...   \n",
       "256              Exploit TOS/Content Moderation                 NaN   \n",
       "257                          Play the long game                 NaN   \n",
       "258                         Measure Performance                 NaN   \n",
       "259                       Measure Effectiveness                 NaN   \n",
       "260  Measure Effectiveness Indicators (or KPIs)                 NaN   \n",
       "\n",
       "                                           Description DISARM ID  \n",
       "164                                                NaN     T0001  \n",
       "165                                                NaN     T0002  \n",
       "166  Push back against criticism by dismissing your...     T0003  \n",
       "167  Twist the narrative. Take information, or arti...     T0004  \n",
       "168  Shift attention to a different narrative or ac...     T0005  \n",
       "..                                                 ...       ...  \n",
       "256                                                NaN     T0093  \n",
       "257  Play the long game refers to two phenomena: 1....     T0094  \n",
       "258                                                NaN     T0095  \n",
       "259                                                NaN     T0096  \n",
       "260                                                NaN     T0097  \n",
       "\n",
       "[97 rows x 4 columns]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "techs = df[['DISARM Technique', 'DISARM Subtechnique', 'Description', 'DISARM ID']]\n",
    "techs = techs[(techs['DISARM Technique'].notnull()) & (techs['DISARM Subtechnique'].isnull())]\n",
    "techs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "lengths are: 97 and 97\n",
      "first list: Index(['Leverage Echo Chambers/Filter Bubbles',\n",
      "       'Map Target Audience Information Environment', 'Physical Violence',\n",
      "       'Direct Users to Alternative Platforms', 'Develop Audio-based Content',\n",
      "       'Develop Owned Media Assets', 'Leverage Conspiracy Theory Narratives',\n",
      "       'Develop Text-based Content', 'Develop Video-based Content',\n",
      "       'Deliver Ads', 'Leverage Content Farm',\n",
      "       'Bookmarking and Content Curation', 'Use Search Engine Optimization',\n",
      "       'Livestream', 'Respond to Breaking News Event or Active Crisis',\n",
      "       'Recruit bad actors', 'Incentivize Sharing', 'Media Sharing Networks',\n",
      "       'Identify Social and Technical Vulnerabilities',\n",
      "       'Create Inauthentic Accounts', 'Prepare fundraising campaigns', 'Email',\n",
      "       'Online polls', 'Purchase Targeted Advertisements',\n",
      "       'Obtain Private Documents', 'Play the long game', 'Build Network',\n",
      "       ' Social Networks', 'Discussion Forums', 'Divide',\n",
      "       'Create Inauthentic Social Media Pages and Groups',\n",
      "       'Conduct fundraising', 'Acquire/ recruit Network', 'Cross-Posting',\n",
      "       'Leverage Existing Narratives', 'Traditional Media',\n",
      "       'Conceal Operational Activity', 'Create Clickbait',\n",
      "       'Reuse Existing Content', 'Measure Effectiveness', 'Distort',\n",
      "       'Conceal Infrastructure', 'Generate information pollution',\n",
      "       'Develop New Narratives', 'Bait legitimate influencers',\n",
      "       'Facilitate State Propaganda', 'Post Content', 'Co-opt Trusted Sources',\n",
      "       'Seed distortions', 'Compromise legitimate accounts',\n",
      "       'Blogging and Publishing Networks', 'Platform Filtering',\n",
      "       'Determine Target Audiences', 'Dismiss', 'Consumer Review Networks',\n",
      "       'Merchandising/ Advertising', 'Develop Image-based Content',\n",
      "       'Prepare Assets Impersonating Legitimate Entities',\n",
      "       'Flooding the Information Space', 'Distract', 'Measure Performance',\n",
      "       'Distort facts', 'Use fake experts', 'Comment or Reply on Content',\n",
      "       'Create fake experts', 'Create personas', 'Seed Kernel of truth',\n",
      "       'Conceal People', 'Attract Traditional Media',\n",
      "       'Determine Strategic Ends', 'Create inauthentic websites',\n",
      "       'Cultivate ignorant agents', 'Create Localized Content',\n",
      "       'Create hashtags and search artifacts',\n",
      "       'Censor social media as a political force', 'Dismay',\n",
      "       'Segment Audiences', 'Establish Inauthentic News Sites',\n",
      "       'Organize Events', 'Harass', 'Suppress Opposition',\n",
      "       'Control Information Environment through Offensive Cyberspace Operations',\n",
      "       'Infiltrate Existing Networks', 'Formal Diplomatic Channels',\n",
      "       'Chat apps', 'Encourage Attendance at Events',\n",
      "       'Develop Competing Narratives', 'Employ Commercial Analytic Firms',\n",
      "       'Degrade Adversary', 'Amplify Existing Narrative',\n",
      "       'Measure Effectiveness Indicators (or KPIs)',\n",
      "       'Exploit TOS/Content Moderation',\n",
      "       'Prepare Physical Broadcast Capabilities',\n",
      "       'Integrate Target Audience Vulnerabilities into Narrative',\n",
      "       'Manipulate Platform Algorithm', 'Trial content',\n",
      "       'Demand insurmountable proof'],\n",
      "      dtype='object')\n",
      "second list: Index(['Conceal Operational Activity',\n",
      "       'Identify Social and Technical Vulnerabilities',\n",
      "       'Flooding the Information Space', ' Social Networks',\n",
      "       'Segment Audiences', 'Conceal People',\n",
      "       'Map Target Audience Information Environment', 'Conceal Infrastructure',\n",
      "       'Measure Effectiveness', 'Harass',\n",
      "       'Control Information Environment through Offensive Cyberspace Operations',\n",
      "       'Reuse Existing Content', 'Create Inauthentic Accounts',\n",
      "       'Develop Image-based Content', 'Obtain Private Documents',\n",
      "       'Co-opt Trusted Sources', 'Recruit bad actors',\n",
      "       'Leverage Echo Chambers/Filter Bubbles', 'Build Network',\n",
      "       'Post Content', 'Measure Performance', 'Traditional Media',\n",
      "       'Cross-Posting', 'Media Sharing Networks', 'Suppress Opposition',\n",
      "       'Develop Text-based Content', 'Leverage Content Farm',\n",
      "       'Infiltrate Existing Networks', 'Physical Violence', 'Livestream',\n",
      "       'Organize Events', 'Establish Inauthentic News Sites', 'Distort facts',\n",
      "       'Leverage Conspiracy Theory Narratives', 'Develop Audio-based Content',\n",
      "       'Incentivize Sharing', 'Deliver Ads', 'Generate information pollution',\n",
      "       'Acquire/ recruit Network', 'Encourage Attendance at Events',\n",
      "       'Develop Video-based Content', 'Prepare fundraising campaigns',\n",
      "       'Measure Effectiveness Indicators (or KPIs)',\n",
      "       'Exploit TOS/Content Moderation',\n",
      "       'Prepare Assets Impersonating Legitimate Entities', 'Chat apps',\n",
      "       'Discussion Forums', 'Comment or Reply on Content',\n",
      "       'Merchandising/ Advertising', 'Manipulate Platform Algorithm',\n",
      "       'Conduct fundraising', 'Create fake experts', 'Dismiss',\n",
      "       'Create personas', 'Develop Owned Media Assets',\n",
      "       'Direct Users to Alternative Platforms', 'Create Clickbait',\n",
      "       'Leverage Existing Narratives', 'Cultivate ignorant agents',\n",
      "       'Play the long game', 'Develop New Narratives',\n",
      "       'Use Search Engine Optimization', 'Purchase Targeted Advertisements',\n",
      "       'Use fake experts', 'Determine Target Audiences',\n",
      "       'Facilitate State Propaganda', 'Create Localized Content',\n",
      "       'Attract Traditional Media', 'Trial content',\n",
      "       'Create inauthentic websites', 'Bait legitimate influencers', 'Divide',\n",
      "       'Create Inauthentic Social Media Pages and Groups',\n",
      "       'Seed Kernel of truth', 'Censor social media as a political force',\n",
      "       'Seed distortions', 'Bookmarking and Content Curation',\n",
      "       'Degrade Adversary', 'Consumer Review Networks', 'Platform Filtering',\n",
      "       'Create hashtags and search artifacts', 'Develop Competing Narratives',\n",
      "       'Distort', 'Prepare Physical Broadcast Capabilities',\n",
      "       'Amplify Existing Narrative', 'Determine Strategic Ends',\n",
      "       'Employ Commercial Analytic Firms',\n",
      "       'Integrate Target Audience Vulnerabilities into Narrative', 'Dismay',\n",
      "       'Formal Diplomatic Channels', 'Blogging and Publishing Networks',\n",
      "       'Compromise legitimate accounts', 'Distract', 'Online polls', 'Email',\n",
      "       'Respond to Breaking News Event or Active Crisis',\n",
      "       'Demand insurmountable proof'],\n",
      "      dtype='object')\n",
      "set difference: set()\n",
      "null ids: Empty DataFrame\n",
      "Columns: [DISARM Technique, DISARM Subtechnique, Description, DISARM ID]\n",
      "Index: []\n",
      "repeated ids: Empty DataFrame\n",
      "Columns: [index, DISARM ID]\n",
      "Index: []\n"
     ]
    }
   ],
   "source": [
    "check_diffs(techs['DISARM Technique'], df['DISARM Technique'])\n",
    "check_ids(techs)\n",
    "#techs.to_csv('temp_techs.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>DISARM Subtechnique</th>\n",
       "      <th>DISARM Technique</th>\n",
       "      <th>Description</th>\n",
       "      <th>DISARM ID</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Discredit Credible Sources</td>\n",
       "      <td>Dismiss</td>\n",
       "      <td>Plan to delegitimize the media landscape and d...</td>\n",
       "      <td>ST0001</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Monitor Social Media Analytics</td>\n",
       "      <td>Map Target Audience Information Environment</td>\n",
       "      <td>An influence operation may use social media an...</td>\n",
       "      <td>ST0002</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Evaluate Media Surveys</td>\n",
       "      <td>Map Target Audience Information Environment</td>\n",
       "      <td>An influence operation may evaluate its own or...</td>\n",
       "      <td>ST0003</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Identify Trending Topics/Hashtags</td>\n",
       "      <td>Map Target Audience Information Environment</td>\n",
       "      <td>An influence operation may identify trending h...</td>\n",
       "      <td>ST0004</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Conduct Web Traffic Analysis</td>\n",
       "      <td>Map Target Audience Information Environment</td>\n",
       "      <td>An influence operation may conduct web traffic...</td>\n",
       "      <td>ST0005</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>159</th>\n",
       "      <td>Awareness</td>\n",
       "      <td>Measure Effectiveness</td>\n",
       "      <td>NaN</td>\n",
       "      <td>ST0156</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>160</th>\n",
       "      <td>Knowledge</td>\n",
       "      <td>Measure Effectiveness</td>\n",
       "      <td>NaN</td>\n",
       "      <td>ST0157</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>161</th>\n",
       "      <td>Action/attitude</td>\n",
       "      <td>Measure Effectiveness</td>\n",
       "      <td>NaN</td>\n",
       "      <td>ST0158</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>162</th>\n",
       "      <td>Message reach</td>\n",
       "      <td>Measure Effectiveness Indicators (or KPIs)</td>\n",
       "      <td>Monitor and evaluate message reach in misinfor...</td>\n",
       "      <td>ST0159</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>163</th>\n",
       "      <td>Social media engagement</td>\n",
       "      <td>Measure Effectiveness Indicators (or KPIs)</td>\n",
       "      <td>Monitor and evaluate social media engagement i...</td>\n",
       "      <td>ST0160</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>160 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                   DISARM Subtechnique  \\\n",
       "4           Discredit Credible Sources   \n",
       "5       Monitor Social Media Analytics   \n",
       "6               Evaluate Media Surveys   \n",
       "7    Identify Trending Topics/Hashtags   \n",
       "8         Conduct Web Traffic Analysis   \n",
       "..                                 ...   \n",
       "159                          Awareness   \n",
       "160                          Knowledge   \n",
       "161                    Action/attitude   \n",
       "162                      Message reach   \n",
       "163            Social media engagement   \n",
       "\n",
       "                                DISARM Technique  \\\n",
       "4                                        Dismiss   \n",
       "5    Map Target Audience Information Environment   \n",
       "6    Map Target Audience Information Environment   \n",
       "7    Map Target Audience Information Environment   \n",
       "8    Map Target Audience Information Environment   \n",
       "..                                           ...   \n",
       "159                        Measure Effectiveness   \n",
       "160                        Measure Effectiveness   \n",
       "161                        Measure Effectiveness   \n",
       "162   Measure Effectiveness Indicators (or KPIs)   \n",
       "163   Measure Effectiveness Indicators (or KPIs)   \n",
       "\n",
       "                                           Description DISARM ID  \n",
       "4    Plan to delegitimize the media landscape and d...    ST0001  \n",
       "5    An influence operation may use social media an...    ST0002  \n",
       "6    An influence operation may evaluate its own or...    ST0003  \n",
       "7    An influence operation may identify trending h...    ST0004  \n",
       "8    An influence operation may conduct web traffic...    ST0005  \n",
       "..                                                 ...       ...  \n",
       "159                                                NaN    ST0156  \n",
       "160                                                NaN    ST0157  \n",
       "161                                                NaN    ST0158  \n",
       "162  Monitor and evaluate message reach in misinfor...    ST0159  \n",
       "163  Monitor and evaluate social media engagement i...    ST0160  \n",
       "\n",
       "[160 rows x 4 columns]"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "subs = df[['DISARM Subtechnique', 'DISARM Technique', 'Description', 'DISARM ID']]\n",
    "subs = subs[subs['DISARM Subtechnique'].notnull()]\n",
    "subs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "sx = subs.merge(techs, on='DISARM Technique')\n",
    "sx.to_csv('tmp_subs.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "lengths are: 159 and 159\n",
      "first list: Index(['Conceal Network Identity', 'Co-opt Influencers', 'Newspaper',\n",
      "       'Utilize Academic/Pseudoscientific Justifications',\n",
      "       'Legacy web content', 'Audio sharing', 'Utilize Butterfly Attack',\n",
      "       'Demographic Segmentation', 'Political Segmentation',\n",
      "       'Create Organizations',\n",
      "       ...\n",
      "       'Generate Content Unrelated to Narrative', 'Hijack existing hashtag',\n",
      "       'Delete Opposing Content', 'Backstop personas ',\n",
      "       'Co-Opt Trusted Individuals', 'Post Across Groups',\n",
      "       'Identify Data Voids', 'Create fake research', 'Content Focused',\n",
      "       'Share Memes'],\n",
      "      dtype='object', length=159)\n",
      "second list: Index(['Conceal Network Identity', 'Co-opt Influencers', 'Newspaper',\n",
      "       'Utilize Academic/Pseudoscientific Justifications',\n",
      "       'Legacy web content', 'Audio sharing', 'Utilize Butterfly Attack',\n",
      "       'Demographic Segmentation', 'Political Segmentation',\n",
      "       'Create Organizations',\n",
      "       ...\n",
      "       'Generate Content Unrelated to Narrative', 'Hijack existing hashtag',\n",
      "       'Delete Opposing Content', 'Backstop personas ',\n",
      "       'Co-Opt Trusted Individuals', 'Post Across Groups',\n",
      "       'Identify Data Voids', 'Create fake research', 'Content Focused',\n",
      "       'Share Memes'],\n",
      "      dtype='object', length=159)\n",
      "set difference: set()\n",
      "null ids: Empty DataFrame\n",
      "Columns: [DISARM Subtechnique, Description, DISARM ID]\n",
      "Index: []\n",
      "repeated ids: Empty DataFrame\n",
      "Columns: [index, DISARM ID]\n",
      "Index: []\n"
     ]
    }
   ],
   "source": [
    "check_diffs(subs['DISARM Subtechnique'], df['DISARM Subtechnique'])\n",
    "check_ids(subs)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# check against originals"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Current keys: dict_keys(['FRAMEWORK_ADMIN_README', 'FRAMEWORK_ADMIN CODES', 'phases', 'tactics', 'techniques', 'subtechniques', 'frameworks', 'tasks', 'detections', 'countermeasures', 'playbooks', 'responsetypes', 'metatechniques', 'actortypes', 'sectors', 'resources', 'MOE', 'MOP'])\n",
      "Old keys: dict_keys(['ADMIN_README', 'ADMIN CODES', 'phases', 'tactics', 'techniques', 'countermeasures', 'frameworks', 'tasks', 'detections', 'playbooks', 'responsetypes', 'metatechniques', 'actortypes', 'actortype_framework', 'actortype_sector', 'sectors', 'resources', 'MOE', 'MOP'])\n"
     ]
    }
   ],
   "source": [
    "MASTERDATA_DIR = '../DISARM_MASTER_DATA/'\n",
    "frameworkfile = MASTERDATA_DIR + 'DISARM_FRAMEWORKS_MASTER.xlsx'\n",
    "oldframefile = MASTERDATA_DIR + '2022-06-30_DISARM_0_1/DISARM_FRAMEWORKS_MASTER_0_1.xlsx'\n",
    "\n",
    "def create_excel_metadata(frameworkfile):\n",
    "    metadata = {}\n",
    "    xlsx = pd.ExcelFile(frameworkfile)\n",
    "    for sheetname in xlsx.sheet_names:\n",
    "        metadata[sheetname] = xlsx.parse(sheetname)\n",
    "        metadata[sheetname].fillna('', inplace=True)\n",
    "    return metadata\n",
    "\n",
    "metadata = create_excel_metadata(frameworkfile)\n",
    "oldmeta = create_excel_metadata(oldframefile)\n",
    "print('Current keys: {}'.format(metadata.keys()))\n",
    "print('Old keys: {}'.format(oldmeta.keys()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>disarm_id</th>\n",
       "      <th>name</th>\n",
       "      <th>name_DE</th>\n",
       "      <th>rank</th>\n",
       "      <th>summary</th>\n",
       "      <th>summary_DE</th>\n",
       "      <th>longname</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>P01</td>\n",
       "      <td>Plan</td>\n",
       "      <td>Planung</td>\n",
       "      <td>1</td>\n",
       "      <td>Envision the desired outcome. Lay out effectiv...</td>\n",
       "      <td></td>\n",
       "      <td>P01 - Plan</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>P02</td>\n",
       "      <td>Prepare</td>\n",
       "      <td>Vorbereitung</td>\n",
       "      <td>2</td>\n",
       "      <td>Activities conducted before execution to impro...</td>\n",
       "      <td></td>\n",
       "      <td>P02 - Prepare</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>P03</td>\n",
       "      <td>Execute</td>\n",
       "      <td>Durchführung</td>\n",
       "      <td>3</td>\n",
       "      <td>Run the action, from initial exposure to wrap-...</td>\n",
       "      <td></td>\n",
       "      <td>P03 - Execute</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>P04</td>\n",
       "      <td>Assess</td>\n",
       "      <td>Auswertung</td>\n",
       "      <td>4</td>\n",
       "      <td>Evaluate effectiveness of action, for use in f...</td>\n",
       "      <td></td>\n",
       "      <td>P04 - Assess</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  disarm_id     name       name_DE  rank  \\\n",
       "0       P01     Plan       Planung     1   \n",
       "1       P02  Prepare  Vorbereitung     2   \n",
       "2       P03  Execute  Durchführung     3   \n",
       "3       P04   Assess    Auswertung     4   \n",
       "\n",
       "                                             summary summary_DE       longname  \n",
       "0  Envision the desired outcome. Lay out effectiv...                P01 - Plan  \n",
       "1  Activities conducted before execution to impro...             P02 - Prepare  \n",
       "2  Run the action, from initial exposure to wrap-...             P03 - Execute  \n",
       "3  Evaluate effectiveness of action, for use in f...              P04 - Assess  "
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "metadata['phases']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>DISARM Phase</th>\n",
       "      <th>DISARM Tactic</th>\n",
       "      <th>Description</th>\n",
       "      <th>DISARM ID</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Plan</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Envision the desired outcome. Lay out effectiv...</td>\n",
       "      <td>P01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Prepare</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Activities conducted before execution to impro...</td>\n",
       "      <td>P02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Execute</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Run the action, from initial exposure to wrap-...</td>\n",
       "      <td>P03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Assess</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Assess effectiveness of action, for use in fut...</td>\n",
       "      <td>P04</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  DISARM Phase DISARM Tactic  \\\n",
       "0         Plan           NaN   \n",
       "1      Prepare           NaN   \n",
       "2      Execute           NaN   \n",
       "3       Assess           NaN   \n",
       "\n",
       "                                         Description DISARM ID  \n",
       "0  Envision the desired outcome. Lay out effectiv...       P01  \n",
       "1  Activities conducted before execution to impro...       P02  \n",
       "2  Run the action, from initial exposure to wrap-...       P03  \n",
       "3  Assess effectiveness of action, for use in fut...       P04  "
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "phases"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>DISARM Phase</th>\n",
       "      <th>DISARM Tactic</th>\n",
       "      <th>Description</th>\n",
       "      <th>DISARM ID</th>\n",
       "      <th>disarm_id</th>\n",
       "      <th>name</th>\n",
       "      <th>name_DE</th>\n",
       "      <th>rank</th>\n",
       "      <th>summary</th>\n",
       "      <th>summary_DE</th>\n",
       "      <th>longname</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Assess</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Assess effectiveness of action, for use in fut...</td>\n",
       "      <td>P04</td>\n",
       "      <td>P04</td>\n",
       "      <td>Assess</td>\n",
       "      <td>Auswertung</td>\n",
       "      <td>4</td>\n",
       "      <td>Evaluate effectiveness of action, for use in f...</td>\n",
       "      <td></td>\n",
       "      <td>P04 - Assess</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  DISARM Phase DISARM Tactic  \\\n",
       "3       Assess           NaN   \n",
       "\n",
       "                                         Description DISARM ID disarm_id  \\\n",
       "3  Assess effectiveness of action, for use in fut...       P04       P04   \n",
       "\n",
       "     name     name_DE  rank  \\\n",
       "3  Assess  Auswertung     4   \n",
       "\n",
       "                                             summary summary_DE      longname  \n",
       "3  Evaluate effectiveness of action, for use in f...             P04 - Assess  "
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# do union of both tables, to get new spreadsheet table\n",
    "check_descriptions(phases, metadata['phases'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "lengths are: 4 and 4\n",
      "first list: Index(['Assess', 'Execute', 'Plan', 'Prepare'], dtype='object')\n",
      "second list: Index(['Assess', 'Execute', 'Plan', 'Prepare'], dtype='object')\n",
      "set difference: set()\n"
     ]
    }
   ],
   "source": [
    "check_diffs(metadata['phases']['name'], phases['DISARM Phase'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>DISARM Tactic</th>\n",
       "      <th>DISARM Technique</th>\n",
       "      <th>Description</th>\n",
       "      <th>DISARM ID</th>\n",
       "      <th>disarm_id</th>\n",
       "      <th>name</th>\n",
       "      <th>name_DE</th>\n",
       "      <th>phase_id</th>\n",
       "      <th>rank</th>\n",
       "      <th>summary</th>\n",
       "      <th>summary_DE</th>\n",
       "      <th>longname</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Plan Objectives</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Set clearly defined, measurable, and achievabl...</td>\n",
       "      <td>TA02</td>\n",
       "      <td>TA02</td>\n",
       "      <td>Objective Planning</td>\n",
       "      <td>objektive Planung</td>\n",
       "      <td>P01</td>\n",
       "      <td>2</td>\n",
       "      <td>Set clearly defined, measurable, and achievabl...</td>\n",
       "      <td></td>\n",
       "      <td>TA02 - Objective Planning</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Target Audience Analysis</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Identifying and analyzing the target audience ...</td>\n",
       "      <td>TA03</td>\n",
       "      <td>TA03</td>\n",
       "      <td>Develop People</td>\n",
       "      <td>Menschen entwickeln</td>\n",
       "      <td>P02</td>\n",
       "      <td>4</td>\n",
       "      <td>Develop online and offline users and agents, i...</td>\n",
       "      <td></td>\n",
       "      <td>TA03 - Develop People</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Develop Narratives</td>\n",
       "      <td>NaN</td>\n",
       "      <td>The promotion of beneficial master narratives ...</td>\n",
       "      <td>TA04</td>\n",
       "      <td>TA04</td>\n",
       "      <td>Develop Networks</td>\n",
       "      <td>Netzwerke entwickeln</td>\n",
       "      <td>P02</td>\n",
       "      <td>5</td>\n",
       "      <td>Develop online and offline communities and tra...</td>\n",
       "      <td></td>\n",
       "      <td>TA04 - Develop Networks</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Develop Content</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TA05</td>\n",
       "      <td>TA05</td>\n",
       "      <td>Microtargeting</td>\n",
       "      <td>individualisierte Wählerwerbung</td>\n",
       "      <td>P02</td>\n",
       "      <td>6</td>\n",
       "      <td>Target very specific populations of people</td>\n",
       "      <td></td>\n",
       "      <td>TA05 - Microtargeting</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Establish Social Assets</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Establishing information assets generates mess...</td>\n",
       "      <td>TA06</td>\n",
       "      <td>TA06</td>\n",
       "      <td>Develop Content</td>\n",
       "      <td>Inhalte entwickeln</td>\n",
       "      <td>P02</td>\n",
       "      <td>7</td>\n",
       "      <td>Create and acquire content used in incident</td>\n",
       "      <td></td>\n",
       "      <td>TA06 - Develop Content</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Establish Legitimacy</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TA07</td>\n",
       "      <td>TA07</td>\n",
       "      <td>Channel Selection</td>\n",
       "      <td>Kanalauswahl</td>\n",
       "      <td>P02</td>\n",
       "      <td>8</td>\n",
       "      <td>Set up specific delivery, amplification and ma...</td>\n",
       "      <td></td>\n",
       "      <td>TA07 - Channel Selection</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Microtarget</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Target very specific populations of people</td>\n",
       "      <td>TA08</td>\n",
       "      <td>TA08</td>\n",
       "      <td>Pump Priming</td>\n",
       "      <td>Ankurbelung</td>\n",
       "      <td>P03</td>\n",
       "      <td>9</td>\n",
       "      <td>Release content on a targetted small scale, pr...</td>\n",
       "      <td></td>\n",
       "      <td>TA08 - Pump Priming</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Select Channels and Affordances</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Selecting platforms and affordances assesses w...</td>\n",
       "      <td>TA09</td>\n",
       "      <td>TA09</td>\n",
       "      <td>Exposure</td>\n",
       "      <td></td>\n",
       "      <td>P03</td>\n",
       "      <td>10</td>\n",
       "      <td>Release content to general public or push to l...</td>\n",
       "      <td></td>\n",
       "      <td>TA09 - Exposure</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Conduct Pump Priming</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Release content on a targetted small scale, pr...</td>\n",
       "      <td>TA10</td>\n",
       "      <td>TA10</td>\n",
       "      <td>Drive Offline Activity</td>\n",
       "      <td></td>\n",
       "      <td>P03</td>\n",
       "      <td>11</td>\n",
       "      <td>Move incident into offline world</td>\n",
       "      <td></td>\n",
       "      <td>TA10 - Drive Offline Activity</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>Deliver Content</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Release content to general public or larger po...</td>\n",
       "      <td>TA11</td>\n",
       "      <td>TA11</td>\n",
       "      <td>Persistence</td>\n",
       "      <td></td>\n",
       "      <td>P03</td>\n",
       "      <td>12</td>\n",
       "      <td>Keep incident 'alive', beyond the incident cre...</td>\n",
       "      <td></td>\n",
       "      <td>TA11 - Persistence</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>Maximize Exposure</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Maximize exposure of the target audience to in...</td>\n",
       "      <td>TA12</td>\n",
       "      <td>TA12</td>\n",
       "      <td>Measure Effectiveness</td>\n",
       "      <td></td>\n",
       "      <td>P04</td>\n",
       "      <td>13</td>\n",
       "      <td>Measure effectiveness of incident, for use in ...</td>\n",
       "      <td></td>\n",
       "      <td>TA12 - Measure Effectiveness</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>Drive Online Harms</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Actions taken by an influence operation to har...</td>\n",
       "      <td>TA13</td>\n",
       "      <td>TA13</td>\n",
       "      <td>Conduct Center of Gravity Analysis</td>\n",
       "      <td></td>\n",
       "      <td>P01</td>\n",
       "      <td>3</td>\n",
       "      <td>Recon/research to identify \"the source of powe...</td>\n",
       "      <td></td>\n",
       "      <td>TA13 - Conduct Center of Gravity Analysis</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                      DISARM Tactic DISARM Technique  \\\n",
       "1                   Plan Objectives              NaN   \n",
       "2          Target Audience Analysis              NaN   \n",
       "3                Develop Narratives              NaN   \n",
       "4                   Develop Content              NaN   \n",
       "5           Establish Social Assets              NaN   \n",
       "6              Establish Legitimacy              NaN   \n",
       "7                       Microtarget              NaN   \n",
       "8   Select Channels and Affordances              NaN   \n",
       "9              Conduct Pump Priming              NaN   \n",
       "10                  Deliver Content              NaN   \n",
       "11                Maximize Exposure              NaN   \n",
       "12               Drive Online Harms              NaN   \n",
       "\n",
       "                                          Description DISARM ID disarm_id  \\\n",
       "1   Set clearly defined, measurable, and achievabl...      TA02      TA02   \n",
       "2   Identifying and analyzing the target audience ...      TA03      TA03   \n",
       "3   The promotion of beneficial master narratives ...      TA04      TA04   \n",
       "4                                                 NaN      TA05      TA05   \n",
       "5   Establishing information assets generates mess...      TA06      TA06   \n",
       "6                                                 NaN      TA07      TA07   \n",
       "7          Target very specific populations of people      TA08      TA08   \n",
       "8   Selecting platforms and affordances assesses w...      TA09      TA09   \n",
       "9   Release content on a targetted small scale, pr...      TA10      TA10   \n",
       "10  Release content to general public or larger po...      TA11      TA11   \n",
       "11  Maximize exposure of the target audience to in...      TA12      TA12   \n",
       "12  Actions taken by an influence operation to har...      TA13      TA13   \n",
       "\n",
       "                                  name                          name_DE  \\\n",
       "1                   Objective Planning                objektive Planung   \n",
       "2                       Develop People              Menschen entwickeln   \n",
       "3                     Develop Networks             Netzwerke entwickeln   \n",
       "4                       Microtargeting  individualisierte Wählerwerbung   \n",
       "5                      Develop Content               Inhalte entwickeln   \n",
       "6                    Channel Selection                     Kanalauswahl   \n",
       "7                         Pump Priming                      Ankurbelung   \n",
       "8                             Exposure                                    \n",
       "9               Drive Offline Activity                                    \n",
       "10                         Persistence                                    \n",
       "11               Measure Effectiveness                                    \n",
       "12  Conduct Center of Gravity Analysis                                    \n",
       "\n",
       "   phase_id  rank                                            summary  \\\n",
       "1       P01     2  Set clearly defined, measurable, and achievabl...   \n",
       "2       P02     4  Develop online and offline users and agents, i...   \n",
       "3       P02     5  Develop online and offline communities and tra...   \n",
       "4       P02     6         Target very specific populations of people   \n",
       "5       P02     7        Create and acquire content used in incident   \n",
       "6       P02     8  Set up specific delivery, amplification and ma...   \n",
       "7       P03     9  Release content on a targetted small scale, pr...   \n",
       "8       P03    10  Release content to general public or push to l...   \n",
       "9       P03    11                   Move incident into offline world   \n",
       "10      P03    12  Keep incident 'alive', beyond the incident cre...   \n",
       "11      P04    13  Measure effectiveness of incident, for use in ...   \n",
       "12      P01     3  Recon/research to identify \"the source of powe...   \n",
       "\n",
       "   summary_DE                                   longname  \n",
       "1                              TA02 - Objective Planning  \n",
       "2                                  TA03 - Develop People  \n",
       "3                                TA04 - Develop Networks  \n",
       "4                                  TA05 - Microtargeting  \n",
       "5                                 TA06 - Develop Content  \n",
       "6                               TA07 - Channel Selection  \n",
       "7                                    TA08 - Pump Priming  \n",
       "8                                        TA09 - Exposure  \n",
       "9                          TA10 - Drive Offline Activity  \n",
       "10                                    TA11 - Persistence  \n",
       "11                          TA12 - Measure Effectiveness  \n",
       "12             TA13 - Conduct Center of Gravity Analysis  "
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "check_descriptions(tactics, metadata['tactics'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "lengths are: 13 and 16\n",
      "first list: Index(['Strategic Planning', 'Persistence', 'Microtargeting',\n",
      "       'Channel Selection', 'Develop Networks', 'Objective Planning',\n",
      "       'Measure Effectiveness', 'Exposure',\n",
      "       'Conduct Center of Gravity Analysis', 'Drive Offline Activity',\n",
      "       'Pump Priming', 'Develop Content', 'Develop People'],\n",
      "      dtype='object')\n",
      "second list: Index(['Drive Online Harms', 'Maximize Exposure', 'Conduct Pump Priming',\n",
      "       'Deliver Content', 'Persist in the Information Space',\n",
      "       'Assess Effectiveness', 'Establish Social Assets',\n",
      "       'Target Audience Analysis', 'Develop Narratives', 'Plan Strategy',\n",
      "       'Select Channels and Affordances', 'Develop Content', 'Plan Objectives',\n",
      "       'Microtarget', 'Establish Legitimacy', 'Drive Offline Activity'],\n",
      "      dtype='object')\n",
      "set difference: {'Develop Networks', 'Channel Selection', 'Pump Priming', 'Measure Effectiveness', 'Conduct Center of Gravity Analysis', 'Exposure', 'Objective Planning', 'Develop People', 'Microtargeting', 'Persistence', 'Strategic Planning'}\n"
     ]
    }
   ],
   "source": [
    "check_diffs(metadata['tactics']['name'], tactics['DISARM Tactic'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "lengths are: 71 and 97\n",
      "first list: Index(['Backstop personas', 'Message reach', 'Twitter bots amplify',\n",
      "       'Demand unsurmountable proof', 'Behaviour changes',\n",
      "       'Create fake videos and images', 'Play the long game',\n",
      "       'Purchase advertisements', 'Manipulate online polls',\n",
      "       'Facilitate State Propaganda', 'Find echo chambers', 'Clickbait',\n",
      "       'Pinterest', 'Seed distortions', 'Cow online opinion leaders',\n",
      "       'Segment audiences',\n",
      "       'Dedicated channels disseminate information pollution',\n",
      "       'Organise remote rallies and events', 'Prepare fundraising campaigns',\n",
      "       'Muzzle social media as a political force',\n",
      "       'Twitter trolls amplify and manipulate',\n",
      "       'Create pseudoscientific or disingenuous research',\n",
      "       'Develop Narrative Concepts', 'Cheerleading domestic social media ops',\n",
      "       '5Ds (dismiss, distort, distract, dismay, divide)',\n",
      "       'Leverage Existing Narratives', 'Continue to amplify', 'Reddit',\n",
      "       'Trial content', 'LinkedIn', 'Leak altered documents',\n",
      "       'Conspiracy narratives', 'Use hashtag',\n",
      "       'Generate information pollution', 'Analyze existing communities',\n",
      "       'Fabricate social media comment', 'Cultivate ignorant agents',\n",
      "       'YouTube', 'Compromise legitimate account',\n",
      "       'Create competing narratives', 'Kernel of Truth',\n",
      "       'Respond to breaking news event', 'Twitter',\n",
      "       'Plan to discredit credible sources', 'Distort facts',\n",
      "       'Use physical broadcast capabilities', 'Use fake experts', 'Facebook',\n",
      "       'Create fake experts', 'Devise Competing Narratives', 'Instagram',\n",
      "       'Bait legitimate influencers', 'Respond to active crisis',\n",
      "       'Create hashtags', 'Flooding', 'Legacy web content',\n",
      "       'Create fake Social Media Profiles / Pages / Groups',\n",
      "       'Create fake websites', 'Deny involvement',\n",
      "       'Tertiary sites amplify news', 'Adapt existing narratives',\n",
      "       'Search Engine Optimization', 'Degrade adversary', 'Use concealment',\n",
      "       'Sell merchandising', 'Conduct Fundraising Campaigns',\n",
      "       'Social media engagement', 'Use SMS/ WhatsApp/ Chat apps', 'Memes',\n",
      "       'WhatsApp', 'Create fake or imposter news sites'],\n",
      "      dtype='object')\n",
      "second list: Index(['Leverage Echo Chambers/Filter Bubbles',\n",
      "       'Map Target Audience Information Environment', 'Physical Violence',\n",
      "       'Direct Users to Alternative Platforms', 'Develop Audio-based Content',\n",
      "       'Develop Owned Media Assets', 'Leverage Conspiracy Theory Narratives',\n",
      "       'Develop Text-based Content', 'Develop Video-based Content',\n",
      "       'Deliver Ads', 'Leverage Content Farm',\n",
      "       'Bookmarking and Content Curation', 'Use Search Engine Optimization',\n",
      "       'Livestream', 'Respond to Breaking News Event or Active Crisis',\n",
      "       'Recruit bad actors', 'Incentivize Sharing', 'Media Sharing Networks',\n",
      "       'Identify Social and Technical Vulnerabilities',\n",
      "       'Create Inauthentic Accounts', 'Prepare fundraising campaigns', 'Email',\n",
      "       'Online polls', 'Purchase Targeted Advertisements',\n",
      "       'Obtain Private Documents', 'Play the long game', 'Build Network',\n",
      "       ' Social Networks', 'Discussion Forums', 'Divide',\n",
      "       'Create Inauthentic Social Media Pages and Groups',\n",
      "       'Conduct fundraising', 'Acquire/ recruit Network', 'Cross-Posting',\n",
      "       'Leverage Existing Narratives', 'Traditional Media',\n",
      "       'Conceal Operational Activity', 'Create Clickbait',\n",
      "       'Reuse Existing Content', 'Measure Effectiveness', 'Distort',\n",
      "       'Conceal Infrastructure', 'Generate information pollution',\n",
      "       'Develop New Narratives', 'Bait legitimate influencers',\n",
      "       'Facilitate State Propaganda', 'Post Content', 'Co-opt Trusted Sources',\n",
      "       'Seed distortions', 'Compromise legitimate accounts',\n",
      "       'Blogging and Publishing Networks', 'Platform Filtering',\n",
      "       'Determine Target Audiences', 'Dismiss', 'Consumer Review Networks',\n",
      "       'Merchandising/ Advertising', 'Develop Image-based Content',\n",
      "       'Prepare Assets Impersonating Legitimate Entities',\n",
      "       'Flooding the Information Space', 'Distract', 'Measure Performance',\n",
      "       'Distort facts', 'Use fake experts', 'Comment or Reply on Content',\n",
      "       'Create fake experts', 'Create personas', 'Seed Kernel of truth',\n",
      "       'Conceal People', 'Attract Traditional Media',\n",
      "       'Determine Strategic Ends', 'Create inauthentic websites',\n",
      "       'Cultivate ignorant agents', 'Create Localized Content',\n",
      "       'Create hashtags and search artifacts',\n",
      "       'Censor social media as a political force', 'Dismay',\n",
      "       'Segment Audiences', 'Establish Inauthentic News Sites',\n",
      "       'Organize Events', 'Harass', 'Suppress Opposition',\n",
      "       'Control Information Environment through Offensive Cyberspace Operations',\n",
      "       'Infiltrate Existing Networks', 'Formal Diplomatic Channels',\n",
      "       'Chat apps', 'Encourage Attendance at Events',\n",
      "       'Develop Competing Narratives', 'Employ Commercial Analytic Firms',\n",
      "       'Degrade Adversary', 'Amplify Existing Narrative',\n",
      "       'Measure Effectiveness Indicators (or KPIs)',\n",
      "       'Exploit TOS/Content Moderation',\n",
      "       'Prepare Physical Broadcast Capabilities',\n",
      "       'Integrate Target Audience Vulnerabilities into Narrative',\n",
      "       'Manipulate Platform Algorithm', 'Trial content',\n",
      "       'Demand insurmountable proof'],\n",
      "      dtype='object')\n",
      "set difference: {'Degrade adversary', 'Twitter trolls amplify and manipulate', 'Create pseudoscientific or disingenuous research', 'Message reach', 'Develop Narrative Concepts', 'Continue to amplify', 'Behaviour changes', 'Cheerleading domestic social media ops', '5Ds (dismiss, distort, distract, dismay, divide)', 'Create fake websites', 'Muzzle social media as a political force', 'LinkedIn', 'Deny involvement', 'Conspiracy narratives', 'Organise remote rallies and events', 'Reddit', 'Instagram', 'Demand unsurmountable proof', 'Purchase advertisements', 'Use hashtag', 'Find echo chambers', 'Memes', 'Manipulate online polls', 'Clickbait', 'Pinterest', 'Create fake or imposter news sites', 'Cow online opinion leaders', 'Flooding', 'Create fake Social Media Profiles / Pages / Groups', 'Tertiary sites amplify news', 'Adapt existing narratives', 'Search Engine Optimization', 'Legacy web content', 'Sell merchandising', 'Create competing narratives', 'Conduct Fundraising Campaigns', 'Social media engagement', 'Use SMS/ WhatsApp/ Chat apps', 'Fabricate social media comment', 'WhatsApp', 'Use concealment', 'Create hashtags', 'Use physical broadcast capabilities', 'Segment audiences', 'Respond to active crisis', 'Compromise legitimate account', 'Twitter bots amplify', 'Kernel of Truth', 'Analyze existing communities', 'Respond to breaking news event', 'YouTube', 'Twitter', 'Plan to discredit credible sources', 'Dedicated channels disseminate information pollution', 'Facebook', 'Create fake videos and images', 'Leak altered documents', 'Devise Competing Narratives', 'Backstop personas'}\n"
     ]
    }
   ],
   "source": [
    "check_diffs(metadata['techniques']['name'], techs['DISARM Technique'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Check against last version of framework"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>disarm_id</th>\n",
       "      <th>name_changed</th>\n",
       "      <th>summary_changed</th>\n",
       "      <th>name_x</th>\n",
       "      <th>name_y</th>\n",
       "      <th>summary_x</th>\n",
       "      <th>summary_y</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>P04</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>Assess</td>\n",
       "      <td>Assess</td>\n",
       "      <td>Evaluate effectiveness of action, for use in f...</td>\n",
       "      <td>Evaluate effectiveness of action, for use in f...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  disarm_id  name_changed  summary_changed  name_x  name_y  \\\n",
       "3       P04         False             True  Assess  Assess   \n",
       "\n",
       "                                           summary_x  \\\n",
       "3  Evaluate effectiveness of action, for use in f...   \n",
       "\n",
       "                                           summary_y  \n",
       "3  Evaluate effectiveness of action, for use in f...  "
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def check_version_descriptions(df1, df2):\n",
    "    merge = df1.merge(df2, on='disarm_id')\n",
    "    merge['name_changed'] = merge['name_x'] != merge['name_y']\n",
    "    merge['summary_changed'] = merge['summary_x'] != merge['summary_y']\n",
    "    return merge[(merge['name_changed']) | (merge['summary_changed'])][['disarm_id', 'name_changed', 'summary_changed', 'name_x', \n",
    "                                                                        'name_y', 'summary_x', 'summary_y']]\n",
    "\n",
    "check_version_descriptions(metadata['phases'], oldmeta['phases'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>disarm_id</th>\n",
       "      <th>name_changed</th>\n",
       "      <th>summary_changed</th>\n",
       "      <th>name_x</th>\n",
       "      <th>name_y</th>\n",
       "      <th>summary_x</th>\n",
       "      <th>summary_y</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>TA01</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>Plan Strategy</td>\n",
       "      <td>Strategic Planning</td>\n",
       "      <td>Define the desired end state, i.e. the set of ...</td>\n",
       "      <td>Define the desired end state, i.e. the set of ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>TA02</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>Plan Objectives</td>\n",
       "      <td>Objective Planning</td>\n",
       "      <td>Set clearly defined, measurable, and achievabl...</td>\n",
       "      <td>Set clearly defined, measurable, and achievabl...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>TA05</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>Microtarget</td>\n",
       "      <td>Microtargeting</td>\n",
       "      <td>Target very specific populations of people</td>\n",
       "      <td>Target very specific populations of people</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>TA06</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>Develop Content</td>\n",
       "      <td>Develop Content</td>\n",
       "      <td></td>\n",
       "      <td>Create and acquire content used in incident</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>TA07</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>Select Channels and Affordances</td>\n",
       "      <td>Channel Selection</td>\n",
       "      <td>Selecting platforms and affordances assesses w...</td>\n",
       "      <td>Set up specific delivery, amplification and ma...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>TA08</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>Conduct Pump Priming</td>\n",
       "      <td>Pump Priming</td>\n",
       "      <td>Release content on a targetted small scale, pr...</td>\n",
       "      <td>Release content on a targetted small scale, pr...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>TA09</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>Deliver Content</td>\n",
       "      <td>Exposure</td>\n",
       "      <td>Release content to general public or larger po...</td>\n",
       "      <td>Release content to general public or push to l...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>TA10</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>Drive Offline Activity</td>\n",
       "      <td>Drive Offline Activity</td>\n",
       "      <td>Move incident/campaign from online to offline....</td>\n",
       "      <td>Move incident into offline world</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>TA11</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>Persist in the Information Environment</td>\n",
       "      <td>Persistence</td>\n",
       "      <td>Persist in the Information Space refers to tak...</td>\n",
       "      <td>Keep incident 'alive', beyond the incident cre...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>TA12</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>Assess Effectiveness</td>\n",
       "      <td>Measure Effectiveness</td>\n",
       "      <td>Assess effectiveness of action, for use in fut...</td>\n",
       "      <td>Measure effectiveness of incident, for use in ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>TA13</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>Target Audience Analysis</td>\n",
       "      <td>Conduct Center of Gravity Analysis</td>\n",
       "      <td>Identifying and analyzing the target audience ...</td>\n",
       "      <td>Recon/research to identify \"the source of powe...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   disarm_id  name_changed  summary_changed  \\\n",
       "0       TA01          True            False   \n",
       "1       TA02          True             True   \n",
       "2       TA05          True            False   \n",
       "3       TA06         False             True   \n",
       "4       TA07          True             True   \n",
       "5       TA08          True             True   \n",
       "6       TA09          True             True   \n",
       "7       TA10         False             True   \n",
       "8       TA11          True             True   \n",
       "9       TA12          True             True   \n",
       "10      TA13          True             True   \n",
       "\n",
       "                                    name_x  \\\n",
       "0                            Plan Strategy   \n",
       "1                          Plan Objectives   \n",
       "2                              Microtarget   \n",
       "3                          Develop Content   \n",
       "4          Select Channels and Affordances   \n",
       "5                     Conduct Pump Priming   \n",
       "6                          Deliver Content   \n",
       "7                   Drive Offline Activity   \n",
       "8   Persist in the Information Environment   \n",
       "9                     Assess Effectiveness   \n",
       "10                Target Audience Analysis   \n",
       "\n",
       "                                name_y  \\\n",
       "0                   Strategic Planning   \n",
       "1                   Objective Planning   \n",
       "2                       Microtargeting   \n",
       "3                      Develop Content   \n",
       "4                    Channel Selection   \n",
       "5                         Pump Priming   \n",
       "6                             Exposure   \n",
       "7               Drive Offline Activity   \n",
       "8                          Persistence   \n",
       "9                Measure Effectiveness   \n",
       "10  Conduct Center of Gravity Analysis   \n",
       "\n",
       "                                            summary_x  \\\n",
       "0   Define the desired end state, i.e. the set of ...   \n",
       "1   Set clearly defined, measurable, and achievabl...   \n",
       "2          Target very specific populations of people   \n",
       "3                                                       \n",
       "4   Selecting platforms and affordances assesses w...   \n",
       "5   Release content on a targetted small scale, pr...   \n",
       "6   Release content to general public or larger po...   \n",
       "7   Move incident/campaign from online to offline....   \n",
       "8   Persist in the Information Space refers to tak...   \n",
       "9   Assess effectiveness of action, for use in fut...   \n",
       "10  Identifying and analyzing the target audience ...   \n",
       "\n",
       "                                            summary_y  \n",
       "0   Define the desired end state, i.e. the set of ...  \n",
       "1   Set clearly defined, measurable, and achievabl...  \n",
       "2          Target very specific populations of people  \n",
       "3         Create and acquire content used in incident  \n",
       "4   Set up specific delivery, amplification and ma...  \n",
       "5   Release content on a targetted small scale, pr...  \n",
       "6   Release content to general public or push to l...  \n",
       "7                    Move incident into offline world  \n",
       "8   Keep incident 'alive', beyond the incident cre...  \n",
       "9   Measure effectiveness of incident, for use in ...  \n",
       "10  Recon/research to identify \"the source of powe...  "
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "check_version_descriptions(metadata['tactics'], oldmeta['tactics'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>disarm_id</th>\n",
       "      <th>name_changed</th>\n",
       "      <th>summary_changed</th>\n",
       "      <th>name_x</th>\n",
       "      <th>name_y</th>\n",
       "      <th>summary_x</th>\n",
       "      <th>summary_y</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Empty DataFrame\n",
       "Columns: [disarm_id, name_changed, summary_changed, name_x, name_y, summary_x, summary_y]\n",
       "Index: []"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "check_version_descriptions(metadata['techniques'], oldmeta['techniques'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}