# Create AMITT website sql

Creates the sqlite file used as a datasource in the AMITT website tests. 

In [20]:
# Get AMITT variables

import pandas as pd
import sqlite3 as sql
import os
from sqlalchemy import create_engine
from generate_amitt_ttps import Amitt
pd.set_option('display.max_rows', 1000)


# Generate AMITT datasets
amitt = Amitt()

dbasetype = 'postgresql_local' #'sqlite' #'heroku'


# Create connection to database
if dbasetype == 'sqlite':
    # Generate minimal sqlite database from the Amitt variables
    conn = sql.connect('amittsite.sqlite')
elif dbasetype == 'postgresql_local':
    conn = create_engine('postgresql://sara:@localhost:5432/amittsite')

    
# Add table to database    
def add_table(dataframe, tablename, columns):    
    # Create sql-appropriate table from dataframe
    colnames = ', '.join(['{} TEXT NOT NULL'.format(col) for col in columns])
    newtable = dataframe[columns].copy().applymap(str)
    newtable['id'] = range(1,len(newtable)+1)
    
    # send to database
    if dbasetype == 'sqlite':
        conn.execute("DROP TABLE IF EXISTS {}".format(tablename))
        conn.execute('''CREATE TABLE {} (id INTEGER PRIMARY KEY AUTOINCREMENT, {});'''.format(tablename, colnames))
        newtable.to_sql(tablename, conn, index=False, if_exists='append')
        conn.commit()
    elif dbasetype == 'postgresql_local':
        newtable.to_sql(tablename, conn, index=False, if_exists='replace')
    return newtable


# Build a cross-table
def object_tactics_techniques(objectcol, objecttable, crosstable):
    # objects to techniques
    ctech = crosstable.copy()
    ctech = ctech[(ctech['technique_id'] != '') & (~ctech['technique_id'].str.startswith('TA'))]
    ctech.rename(columns={'amitt_id':objectcol}, inplace=True)
    ctech['summary'] = 'N/A'

    # objects to tactics
    ctact = crosstable[crosstable['technique_id'].str.startswith('TA')].copy()
    ctact.rename(columns={'amitt_id':objectcol, 'technique_id': 'tactic_id'}, inplace=True)
    ctact['main_tactic'] = 'N'
    ctactmain = objecttable[['amitt_id', 'tactic_id']].copy()
    ctactmain.rename(columns={'amitt_id':objectcol}, inplace=True)
    ctactmain['main_tactic'] = 'Y'
    ctact = pd.concat([ctact, ctactmain], ignore_index=True, sort=False)
    ctact['summary'] = 'N/A'
    return(ctech, ctact)

#Load all the tables - Heroku needs them in correct order... 

# -- frameworks --

#newtable = add_table(amitt.df_actortypes, 'actor_type', ['amitt_id', 'sector_id', 'framework_id', 'name', 'summary'])
newtable = add_table(amitt.df_counters, 'counter', ['amitt_id', 'tactic_id', 'metatechnique_id', 'name', 'summary'])
newtable = add_table(amitt.df_detections, 'detection', ['amitt_id', 'tactic_id', 'name', 'summary'])
newtable = add_table(amitt.df_frameworks, 'framework', ['amitt_id', 'name', 'summary'])
newtable = add_table(amitt.df_metatechniques, 'metatechnique', ['amitt_id', 'name', 'summary'])
newtable = add_table(amitt.df_phases, 'phase', ['amitt_id', 'name', 'rank', 'summary'])
newtable = add_table(amitt.df_playbook, 'playbook', ['amitt_id', 'object_id', 'name', 'summary'])
newtable = add_table(amitt.df_resources, 'resource', ['amitt_id', 'name', 'summary', 'resource_type'])
newtable = add_table(amitt.df_responsetypes, 'responsetype', ['amitt_id', 'name', 'summary'])
#newtable = add_table(amitt.df_sector, 'sector', ['amitt_id', 'name', 'summary'])
newtable = add_table(amitt.df_tactics, 'tactic', ['amitt_id', 'phase_id', 'name', 'rank', 'summary'])
newtable = add_table(amitt.df_tasks, 'task', ['amitt_id', 'tactic_id', 'framework_id', 'name', 'summary'])
newtable = add_table(amitt.df_techniques, 'technique', ['amitt_id', 'tactic_id', 'name', 'summary'])

(ctech, ctact) = object_tactics_techniques('counter_id', amitt.df_counters, amitt.cross_counterid_techniqueid)
newtable = add_table(ctech, 'counter_technique', ['counter_id', 'technique_id', 'summary'])
newtable = add_table(ctact, 'counter_tactic', ['counter_id', 'tactic_id', 'main_tactic', 'summary'])

(dtech, dtact) = object_tactics_techniques('detection_id', amitt.df_detections, amitt.cross_detectionid_techniqueid)
newtable = add_table(dtech, 'detection_technique', ['detection_id', 'technique_id', 'summary'])
newtable = add_table(dtact, 'detection_tactic', ['detection_id', 'tactic_id', 'main_tactic', 'summary'])

# -- datasets --

newtable = add_table(amitt.df_examples, 'example', ['amitt_id', 'object_id', 'name', 'summary'])

# dataset
# reference - create this from other tables. 
# Also need incidentcounter etc - create from data
newtable = add_table(amitt.df_groups, 'group', ['amitt_id', 'name', 'url', 'summary', 
                                                'sector', 'primary_role', 'secondary_role', 
                                                'primary_subject', 'secondary_subject', 
                                                'volunteers', 'region', 'country', 
                                                'twitter_handle'])
newtable = add_table(amitt.df_incidents, 'incident', ['amitt_id', 'name', 'summary', 
                                                      'year_started', 'attributions_seen', 
                                                      'found_in_country', 'objecttype'])
newtable = add_table(amitt.df_tools, 'tool', ['amitt_id', 'name', 'summary',
                                              'organization', 'url', 'category', 
                                              'disinformation_use', 'cogseccollab_use', 
                                              'function', 'code_url', 'artifacts', 
                                              'automation', 'platform', 'accessibility'])
# incidenttechnique crosstable
it = vars(amitt)['it'][['amitt_id', 'name', 'summary', 'amitt_id_incident', 'amitt_id_technique']].copy()
it.rename(columns={'amitt_id_incident':'incident_id', 'amitt_id_technique': 'technique_id'}, inplace=True)
newtable = add_table(it, 'incident_technique', ['amitt_id', 'name', 'summary', 
                                                'incident_id', 'technique_id'])

# Load in users table, and close connection
if dbasetype == 'sqlite':
    conn.execute("DROP TABLE IF EXISTS {}".format('users'))
    conn.execute('''CREATE TABLE users (id INTEGER PRIMARY KEY AUTOINCREMENT, username TEXT NOT NULL UNIQUE, password TEXT NOT NULL);''')
    conn.close()
else:
    dfusers = pd.DataFrame([['test','testing']], columns=['username', 'password'])
    usertable = add_table(dfusers, 'users', ['username', 'password'])

newtable

Unnamed: 0,amitt_id,name,summary,incident_id,technique_id,id
0,IT00000001,buy FB targeted ads,,I00002,T0018,1
1,IT00000010,Targeted FB paid ads,,I00005,T0018,2
2,IT00000057,Targeted FB paid ads,,I00017,T0018,3
3,IT00000002,"Promote ""funding"" campaign",,I00002,T0017,4
4,IT00000003,create web-site - information pollution,,I00002,T0019,5
5,IT00000014,RT & Sputnik generate information pollution,,I00005,T0019,6
6,IT00000031,RT & Sputnik generate information pollution (r...,,I00007,T0019,7
7,IT00000037,RT & Sputnik generate information pollution,,I00009,T0019,8
8,IT00000061,RT & Sputnik generate information pollution,,I00017,T0019,9
9,IT00000082,RT & Sputnik generate information pollution (s...,,I00029,T0019,10


## test area

In [21]:
# Check which amitt variables we can see from here
print('{}\n'.format(vars(amitt).keys()))
testdfname = 'it'
testdf = vars(amitt)[testdfname]
print('{} columns: {}'.format(testdfname, testdf.columns))
testdf

dict_keys(['df_phases', 'df_frameworks', 'df_techniques', 'df_tasks', 'df_incidents', 'df_groups', 'df_tools', 'df_examples', 'df_counters', 'df_detections', 'df_actortypes', 'df_resources', 'df_responsetypes', 'df_metatechniques', 'it', 'df_tactics', 'df_techniques_per_tactic', 'df_counters_per_tactic', 'phases', 'tactics', 'techniques', 'counters', 'metatechniques', 'actortypes', 'resources', 'num_tactics', 'cross_counterid_techniqueid', 'cross_counterid_resourceid', 'cross_counterid_actortypeid', 'cross_detectionid_techniqueid', 'cross_detectionid_resourceid', 'cross_detectionid_actortypeid'])

it columns: Index(['amitt_id', 'name', 'summary', 'amitt_id_incident', 'name_incident',
       'amitt_id_technique', 'name_technique'],
      dtype='object')


Unnamed: 0,amitt_id,name,summary,amitt_id_incident,name_incident,amitt_id_technique,name_technique
0,IT00000001,buy FB targeted ads,,I00002,#VaccinateUS,T0018,Paid targeted ads
1,IT00000010,Targeted FB paid ads,,I00005,Brexit vote,T0018,Paid targeted ads
2,IT00000057,Targeted FB paid ads,,I00017,US presidential elections,T0018,Paid targeted ads
3,IT00000002,"Promote ""funding"" campaign",,I00002,#VaccinateUS,T0017,Promote online funding
4,IT00000003,create web-site - information pollution,,I00002,#VaccinateUS,T0019,Generate information pollution
5,IT00000014,RT & Sputnik generate information pollution,,I00005,Brexit vote,T0019,Generate information pollution
6,IT00000031,RT & Sputnik generate information pollution (r...,,I00007,Incirlik terrorists,T0019,Generate information pollution
7,IT00000037,RT & Sputnik generate information pollution,,I00009,PhilippinesExpert,T0019,Generate information pollution
8,IT00000061,RT & Sputnik generate information pollution,,I00017,US presidential elections,T0019,Generate information pollution
9,IT00000082,RT & Sputnik generate information pollution (s...,,I00029,MH17 investigation,T0019,Generate information pollution


In [22]:
it = vars(amitt)['it'][['amitt_id', 'name', 'summary', 'amitt_id_incident', 'amitt_id_technique']].copy()
it.rename(columns={'amitt_id_incident':'incident_id', 
                  'amitt_id_technique': 'technique_id'}, inplace=True)
it

Unnamed: 0,amitt_id,name,summary,incident_id,technique_id
0,IT00000001,buy FB targeted ads,,I00002,T0018
1,IT00000010,Targeted FB paid ads,,I00005,T0018
2,IT00000057,Targeted FB paid ads,,I00017,T0018
3,IT00000002,"Promote ""funding"" campaign",,I00002,T0017
4,IT00000003,create web-site - information pollution,,I00002,T0019
5,IT00000014,RT & Sputnik generate information pollution,,I00005,T0019
6,IT00000031,RT & Sputnik generate information pollution (r...,,I00007,T0019
7,IT00000037,RT & Sputnik generate information pollution,,I00009,T0019
8,IT00000061,RT & Sputnik generate information pollution,,I00017,T0019
9,IT00000082,RT & Sputnik generate information pollution (s...,,I00029,T0019


In [23]:
dii = vars(amitt)['df_incidents']
dii['objecttype'].value_counts()

incident    44
campaign    16
apt          2
tactic       1
Name: objecttype, dtype: int64

In [24]:
dii[dii['objecttype'] == 'apt']

Unnamed: 0,amitt_id,name,objecttype,summary,year_started,attributions_seen,found_in_country,urls,notes,when_added,found_via,longname
30,I00031,antivax,apt,,2018,Russia,World,https://ajph.aphapublications.org/doi/pdf/10.2...,,2019-02-24,OII,I00031 - antivax
32,I00033,China 50cent Army,apt,50cent Army is a CCP “tool” - “massive secret ...,2014,China,China,https://gking.harvard.edu/files/gking/files/ho...,"campaign (multiple incidents, e.g. Shanshan ri...",2019-02-24,OII,I00033 - China 50cent Army


In [25]:
dii = vars(amitt)['df_techniques']
dii

Unnamed: 0,amitt_id,name,tactic_id,summary,longname
0,T0001,"5Ds (dismiss, distort, distract, dismay, divide)",TA01,"Nimmo's ""4Ds of propaganda"": dismiss, distort,...","T0001 - 5Ds (dismiss, distort, distract, disma..."
1,T0002,Facilitate State Propaganda,TA01,Organize citizens around pro-state messaging. ...,T0002 - Facilitate State Propaganda
2,T0003,Leverage Existing Narratives,TA01,"Use or adapt existing narrative themes, where ...",T0003 - Leverage Existing Narratives
3,T0004,Competing Narratives,TA01,Advance competing narratives connected to same...,T0004 - Competing Narratives
4,T0005,Center of Gravity Analysis,TA02,"Recon/research to identify ""the source of powe...",T0005 - Center of Gravity Analysis
5,T0006,Create Master Narratives,TA02,The promotion of beneficial master narratives ...,T0006 - Create Master Narratives
6,T0007,Create fake Social Media Profiles / Pages / Gr...,TA03,Create key social engineering assets needed to...,T0007 - Create fake Social Media Profiles / Pa...
7,T0008,Create fake or imposter news sites,TA03,Modern computational propaganda makes use of a...,T0008 - Create fake or imposter news sites
8,T0009,Create fake experts,TA03,Stories planted or promoted in computational p...,T0009 - Create fake experts
9,T0010,Cultivate ignorant agents,TA04,"Cultivate propagandists for a cause, the goals...",T0010 - Cultivate ignorant agents


In [26]:
# Generate arrays used to create D3 grids
dflists = dii.groupby('tactic_id')['amitt_id'].apply(list).reset_index()
dfidgrid = pd.DataFrame(dflists['amitt_id'].to_list())
dfgrid = pd.concat([dflists[['tactic_id']], dfidgrid], axis=1).fillna('')
gridarray = [dfgrid[col].to_list() for col in dfgrid.columns]
gridarray

[['TA01',
  'TA02',
  'TA03',
  'TA04',
  'TA05',
  'TA06',
  'TA07',
  'TA08',
  'TA09',
  'TA10',
  'TA11',
  'TA12'],
 ['T0001',
  'T0005',
  'T0007',
  'T0010',
  'T0016',
  'T0019',
  'T0029',
  'T0039',
  'T0047',
  'T0057',
  'T0058',
  'T0062'],
 ['T0002',
  'T0006',
  'T0008',
  'T0011',
  'T0017',
  'T0020',
  'T0030',
  'T0040',
  'T0048',
  'T0061',
  'T0059',
  'T0063'],
 ['T0003',
  '',
  'T0009',
  'T0012',
  'T0018',
  'T0021',
  'T0031',
  'T0041',
  'T0049',
  '',
  'T0060',
  'T0064'],
 ['T0004',
  '',
  '',
  'T0013',
  '',
  'T0022',
  'T0032',
  'T0042',
  'T0050',
  '',
  '',
  ''],
 ['', '', '', 'T0014', '', 'T0023', 'T0033', 'T0043', 'T0051', '', '', ''],
 ['', '', '', 'T0015', '', 'T0024', 'T0034', 'T0044', 'T0052', '', '', ''],
 ['', '', '', '', '', 'T0025', 'T0035', 'T0045', 'T0053', '', '', ''],
 ['', '', '', '', '', 'T0026', 'T0036', 'T0046', 'T0054', '', '', ''],
 ['', '', '', '', '', 'T0027', 'T0037', '', 'T0055', '', '', ''],
 ['', '', '', '', '', 'T002

In [50]:
it.index = it.amitt_id
it[['name']].transpose().to_dict('records')[0]

{'IT00000001': 'buy FB targeted ads',
 'IT00000010': 'Targeted FB paid ads',
 'IT00000057': 'Targeted FB paid ads',
 'IT00000002': 'Promote "funding" campaign',
 'IT00000003': 'create web-site - information pollution',
 'IT00000014': 'RT & Sputnik generate information pollution',
 'IT00000031': 'RT & Sputnik generate information pollution (report an unreported false story/event)',
 'IT00000037': 'RT & Sputnik generate information pollution ',
 'IT00000061': 'RT & Sputnik generate information pollution',
 'IT00000082': 'RT & Sputnik generate information pollution (synthetic media)',
 'IT00000094': 'RT & Sputnik generate information pollution',
 'IT00000120': 'RT & Sputnik generate information pollution',
 'IT00000137': 'RT & Sputnik generate information pollution (synthetic media)',
 'IT00000146': 'RT & Sputnik generate information pollution (synthetic media)',
 'IT00000156': 'RT & Sputnik generate information pollution',
 'IT00000180': 'RT & Sputnik generate information pollution (synt