# Check new and old spreadsheets of data

In [1]:
import pandas as pd

# make sure we didn't miss anything in mike's original sheet
def check_diffs(col1, col2):
    xx = col1.value_counts().index
    yy = col2.value_counts().index
    print('lengths are: {} and {}'.format(len(xx), len(yy)))
    print('first list: {}'.format(xx))
    print('second list: {}'.format(yy))
    print('set difference: {}'.format(set(xx).difference(set(yy))))

def check_ids(df, col='DISARM ID'):
    print('null ids: {}'.format(df[df[col].isnull()]))
    vc = df[col].value_counts().reset_index()
    print('repeated ids: {}'.format(vc[vc[col]>1]))
    return

def check_descriptions(df1, df2):
    merge = df1.merge(df2, left_on='DISARM ID', right_on='disarm_id')
    return(merge[merge['summary'] != merge['Description']])

df = pd.read_csv('DISARM Merged (Internal) - DISARM Framework Merged.csv')
df

Unnamed: 0.1,Unnamed: 0,DISARM Phase,DISARM Tactic,DISARM Technique,DISARM Subtechnique,Description,DISARM ID,Feedback from EEAS,Action to Do,Action Taken,Where did it come from?,Where from more info
0,1,Plan,,,,Envision the desired outcome. Lay out effectiv...,P01,,,,AMITT,
1,36,Prepare,,,,Activities conducted before execution to impro...,P02,,,,,
2,126,Execute,,,,"Run the action, from initial exposure to wrap-...",P03,,,,,
3,263,Assess,,,,"Assess effectiveness of action, for use in fut...",P04,,,,,
4,7,Plan,Plan Objectives,Dismiss,Discredit Credible Sources,Plan to delegitimize the media landscape and d...,ST0001,,,,AMITT,T0067 Plan to Discredit Credible Sources
...,...,...,...,...,...,...,...,...,...,...,...,...
272,184,Execute,Maximize Exposure,,,Maximize exposure of the target audience to in...,TA12,,,,,
273,204,Execute,Drive Online Harms,,,Actions taken by an influence operation to har...,TA13,,,,,
274,221,Execute,Drive Offline Activity,,,Move incident/campaign from online to offline....,TA14,,,,,
275,235,Execute,Persist in the Information Space,,,Persist in the Information Space refers to tak...,TA15,,,,,


In [2]:
phases = df[['DISARM Phase', 'DISARM Tactic', 'Description', 'DISARM ID']]
phases = phases[phases['DISARM Tactic'].isnull()]
phases

Unnamed: 0,DISARM Phase,DISARM Tactic,Description,DISARM ID
0,Plan,,Envision the desired outcome. Lay out effectiv...,P01
1,Prepare,,Activities conducted before execution to impro...,P02
2,Execute,,"Run the action, from initial exposure to wrap-...",P03
3,Assess,,"Assess effectiveness of action, for use in fut...",P04


In [3]:
check_diffs(phases['DISARM Phase'], df['DISARM Phase'])
check_ids(phases)

lengths are: 4 and 4
first list: Index(['Assess', 'Execute', 'Plan', 'Prepare'], dtype='object')
second list: Index(['Execute', 'Prepare', 'Plan', 'Assess'], dtype='object')
set difference: set()
null ids: Empty DataFrame
Columns: [DISARM Phase, DISARM Tactic, Description, DISARM ID]
Index: []
repeated ids: Empty DataFrame
Columns: [index, DISARM ID]
Index: []


In [4]:
tactics = df[['DISARM Tactic', 'DISARM Technique', 'Description', 'DISARM ID']]#['DISARM Tactic'].notnull()
tactics = tactics[(tactics['DISARM Tactic'].notnull()) & (tactics['DISARM Technique'].isnull())]
tactics

Unnamed: 0,DISARM Tactic,DISARM Technique,Description,DISARM ID
261,Plan Strategy,,"Define the desired end state, i.e. the set of ...",TA01
262,Plan Objectives,,"Set clearly defined, measurable, and achievabl...",TA02
263,Target Audience Analysis,,Identifying and analyzing the target audience ...,TA03
264,Develop Narratives,,The promotion of beneficial master narratives ...,TA04
265,Develop Content,,,TA05
266,Establish Social Assets,,Establishing information assets generates mess...,TA06
267,Establish Legitimacy,,,TA07
268,Microtarget,,Target very specific populations of people,TA08
269,Select Channels and Affordances,,Selecting platforms and affordances assesses w...,TA09
270,Conduct Pump Priming,,"Release content on a targetted small scale, pr...",TA10


In [5]:
check_diffs(tactics['DISARM Tactic'], df['DISARM Tactic'])
check_ids(tactics)

lengths are: 16 and 16
first list: Index(['Drive Online Harms', 'Maximize Exposure', 'Conduct Pump Priming',
       'Deliver Content', 'Persist in the Information Space',
       'Assess Effectiveness', 'Establish Social Assets',
       'Target Audience Analysis', 'Develop Narratives', 'Plan Strategy',
       'Select Channels and Affordances', 'Develop Content', 'Plan Objectives',
       'Microtarget', 'Establish Legitimacy', 'Drive Offline Activity'],
      dtype='object')
second list: Index(['Develop Content', 'Establish Social Assets',
       'Select Channels and Affordances', 'Persist in the Information Space',
       'Target Audience Analysis', 'Maximize Exposure', 'Drive Online Harms',
       'Establish Legitimacy', 'Assess Effectiveness',
       'Drive Offline Activity', 'Deliver Content', 'Develop Narratives',
       'Plan Objectives', 'Conduct Pump Priming', 'Microtarget',
       'Plan Strategy'],
      dtype='object')
set difference: set()
null ids: Empty DataFrame
Columns: [D

In [6]:
techs = df[['DISARM Technique', 'DISARM Subtechnique', 'Description', 'DISARM ID']]
techs = techs[(techs['DISARM Technique'].notnull()) & (techs['DISARM Subtechnique'].isnull())]
techs

Unnamed: 0,DISARM Technique,DISARM Subtechnique,Description,DISARM ID
164,Determine Target Audiences,,,T0001
165,Determine Strategic Ends,,,T0002
166,Dismiss,,Push back against criticism by dismissing your...,T0003
167,Distort,,"Twist the narrative. Take information, or arti...",T0004
168,Distract,,Shift attention to a different narrative or ac...,T0005
...,...,...,...,...
256,Exploit TOS/Content Moderation,,,T0093
257,Play the long game,,Play the long game refers to two phenomena: 1....,T0094
258,Measure Performance,,,T0095
259,Measure Effectiveness,,,T0096


In [7]:
check_diffs(techs['DISARM Technique'], df['DISARM Technique'])
check_ids(techs)
#techs.to_csv('temp_techs.csv', index=False)

lengths are: 97 and 97
first list: Index(['Leverage Echo Chambers/Filter Bubbles',
       'Map Target Audience Information Environment', 'Physical Violence',
       'Direct Users to Alternative Platforms', 'Develop Audio-based Content',
       'Develop Owned Media Assets', 'Leverage Conspiracy Theory Narratives',
       'Develop Text-based Content', 'Develop Video-based Content',
       'Deliver Ads', 'Leverage Content Farm',
       'Bookmarking and Content Curation', 'Use Search Engine Optimization',
       'Livestream', 'Respond to Breaking News Event or Active Crisis',
       'Recruit bad actors', 'Incentivize Sharing', 'Media Sharing Networks',
       'Identify Social and Technical Vulnerabilities',
       'Create Inauthentic Accounts', 'Prepare fundraising campaigns', 'Email',
       'Online polls', 'Purchase Targeted Advertisements',
       'Obtain Private Documents', 'Play the long game', 'Build Network',
       ' Social Networks', 'Discussion Forums', 'Divide',
       'Create I

In [19]:
subs = df[['DISARM Subtechnique', 'DISARM Technique', 'Description', 'DISARM ID']]
subs = subs[subs['DISARM Subtechnique'].notnull()]
subs

Unnamed: 0,DISARM Subtechnique,DISARM Technique,Description,DISARM ID
4,Discredit Credible Sources,Dismiss,Plan to delegitimize the media landscape and d...,ST0001
5,Monitor Social Media Analytics,Map Target Audience Information Environment,An influence operation may use social media an...,ST0002
6,Evaluate Media Surveys,Map Target Audience Information Environment,An influence operation may evaluate its own or...,ST0003
7,Identify Trending Topics/Hashtags,Map Target Audience Information Environment,An influence operation may identify trending h...,ST0004
8,Conduct Web Traffic Analysis,Map Target Audience Information Environment,An influence operation may conduct web traffic...,ST0005
...,...,...,...,...
159,Awareness,Measure Effectiveness,,ST0156
160,Knowledge,Measure Effectiveness,,ST0157
161,Action/attitude,Measure Effectiveness,,ST0158
162,Message reach,Measure Effectiveness Indicators (or KPIs),Monitor and evaluate message reach in misinfor...,ST0159


In [21]:
sx = subs.merge(techs, on='DISARM Technique')
sx.to_csv('tmp_subs.csv', index=False)

In [9]:
check_diffs(subs['DISARM Subtechnique'], df['DISARM Subtechnique'])
check_ids(subs)

lengths are: 159 and 159
first list: Index(['Conceal Network Identity', 'Co-opt Influencers', 'Newspaper',
       'Utilize Academic/Pseudoscientific Justifications',
       'Legacy web content', 'Audio sharing', 'Utilize Butterfly Attack',
       'Demographic Segmentation', 'Political Segmentation',
       'Create Organizations',
       ...
       'Generate Content Unrelated to Narrative', 'Hijack existing hashtag',
       'Delete Opposing Content', 'Backstop personas ',
       'Co-Opt Trusted Individuals', 'Post Across Groups',
       'Identify Data Voids', 'Create fake research', 'Content Focused',
       'Share Memes'],
      dtype='object', length=159)
second list: Index(['Conceal Network Identity', 'Co-opt Influencers', 'Newspaper',
       'Utilize Academic/Pseudoscientific Justifications',
       'Legacy web content', 'Audio sharing', 'Utilize Butterfly Attack',
       'Demographic Segmentation', 'Political Segmentation',
       'Create Organizations',
       ...
       'Generate

# check against originals

In [25]:
MASTERDATA_DIR = '../DISARM_MASTER_DATA/'
frameworkfile = MASTERDATA_DIR + 'DISARM_FRAMEWORKS_MASTER.xlsx'
oldframefile = MASTERDATA_DIR + '2022-06-30_DISARM_0_1/DISARM_FRAMEWORKS_MASTER_0_1.xlsx'

def create_excel_metadata(frameworkfile):
    metadata = {}
    xlsx = pd.ExcelFile(frameworkfile)
    for sheetname in xlsx.sheet_names:
        metadata[sheetname] = xlsx.parse(sheetname)
        metadata[sheetname].fillna('', inplace=True)
    return metadata

metadata = create_excel_metadata(frameworkfile)
oldmeta = create_excel_metadata(oldframefile)
print('Current keys: {}'.format(metadata.keys()))
print('Old keys: {}'.format(oldmeta.keys()))

Current keys: dict_keys(['FRAMEWORK_ADMIN_README', 'FRAMEWORK_ADMIN CODES', 'phases', 'tactics', 'techniques', 'subtechniques', 'frameworks', 'tasks', 'detections', 'countermeasures', 'playbooks', 'responsetypes', 'metatechniques', 'actortypes', 'sectors', 'resources', 'MOE', 'MOP'])
Old keys: dict_keys(['ADMIN_README', 'ADMIN CODES', 'phases', 'tactics', 'techniques', 'countermeasures', 'frameworks', 'tasks', 'detections', 'playbooks', 'responsetypes', 'metatechniques', 'actortypes', 'actortype_framework', 'actortype_sector', 'sectors', 'resources', 'MOE', 'MOP'])


In [11]:
metadata['phases']

Unnamed: 0,disarm_id,name,name_DE,rank,summary,summary_DE,longname
0,P01,Plan,Planung,1,Envision the desired outcome. Lay out effectiv...,,P01 - Plan
1,P02,Prepare,Vorbereitung,2,Activities conducted before execution to impro...,,P02 - Prepare
2,P03,Execute,Durchführung,3,"Run the action, from initial exposure to wrap-...",,P03 - Execute
3,P04,Assess,Auswertung,4,"Evaluate effectiveness of action, for use in f...",,P04 - Assess


In [12]:
phases

Unnamed: 0,DISARM Phase,DISARM Tactic,Description,DISARM ID
0,Plan,,Envision the desired outcome. Lay out effectiv...,P01
1,Prepare,,Activities conducted before execution to impro...,P02
2,Execute,,"Run the action, from initial exposure to wrap-...",P03
3,Assess,,"Assess effectiveness of action, for use in fut...",P04


In [13]:
# do union of both tables, to get new spreadsheet table
check_descriptions(phases, metadata['phases'])

Unnamed: 0,DISARM Phase,DISARM Tactic,Description,DISARM ID,disarm_id,name,name_DE,rank,summary,summary_DE,longname
3,Assess,,"Assess effectiveness of action, for use in fut...",P04,P04,Assess,Auswertung,4,"Evaluate effectiveness of action, for use in f...",,P04 - Assess


In [14]:
check_diffs(metadata['phases']['name'], phases['DISARM Phase'])

lengths are: 4 and 4
first list: Index(['Assess', 'Execute', 'Plan', 'Prepare'], dtype='object')
second list: Index(['Assess', 'Execute', 'Plan', 'Prepare'], dtype='object')
set difference: set()


In [15]:
check_descriptions(tactics, metadata['tactics'])

Unnamed: 0,DISARM Tactic,DISARM Technique,Description,DISARM ID,disarm_id,name,name_DE,phase_id,rank,summary,summary_DE,longname
1,Plan Objectives,,"Set clearly defined, measurable, and achievabl...",TA02,TA02,Objective Planning,objektive Planung,P01,2,"Set clearly defined, measurable, and achievabl...",,TA02 - Objective Planning
2,Target Audience Analysis,,Identifying and analyzing the target audience ...,TA03,TA03,Develop People,Menschen entwickeln,P02,4,"Develop online and offline users and agents, i...",,TA03 - Develop People
3,Develop Narratives,,The promotion of beneficial master narratives ...,TA04,TA04,Develop Networks,Netzwerke entwickeln,P02,5,Develop online and offline communities and tra...,,TA04 - Develop Networks
4,Develop Content,,,TA05,TA05,Microtargeting,individualisierte Wählerwerbung,P02,6,Target very specific populations of people,,TA05 - Microtargeting
5,Establish Social Assets,,Establishing information assets generates mess...,TA06,TA06,Develop Content,Inhalte entwickeln,P02,7,Create and acquire content used in incident,,TA06 - Develop Content
6,Establish Legitimacy,,,TA07,TA07,Channel Selection,Kanalauswahl,P02,8,"Set up specific delivery, amplification and ma...",,TA07 - Channel Selection
7,Microtarget,,Target very specific populations of people,TA08,TA08,Pump Priming,Ankurbelung,P03,9,"Release content on a targetted small scale, pr...",,TA08 - Pump Priming
8,Select Channels and Affordances,,Selecting platforms and affordances assesses w...,TA09,TA09,Exposure,,P03,10,Release content to general public or push to l...,,TA09 - Exposure
9,Conduct Pump Priming,,"Release content on a targetted small scale, pr...",TA10,TA10,Drive Offline Activity,,P03,11,Move incident into offline world,,TA10 - Drive Offline Activity
10,Deliver Content,,Release content to general public or larger po...,TA11,TA11,Persistence,,P03,12,"Keep incident 'alive', beyond the incident cre...",,TA11 - Persistence


In [16]:
check_diffs(metadata['tactics']['name'], tactics['DISARM Tactic'])

lengths are: 13 and 16
first list: Index(['Strategic Planning', 'Persistence', 'Microtargeting',
       'Channel Selection', 'Develop Networks', 'Objective Planning',
       'Measure Effectiveness', 'Exposure',
       'Conduct Center of Gravity Analysis', 'Drive Offline Activity',
       'Pump Priming', 'Develop Content', 'Develop People'],
      dtype='object')
second list: Index(['Drive Online Harms', 'Maximize Exposure', 'Conduct Pump Priming',
       'Deliver Content', 'Persist in the Information Space',
       'Assess Effectiveness', 'Establish Social Assets',
       'Target Audience Analysis', 'Develop Narratives', 'Plan Strategy',
       'Select Channels and Affordances', 'Develop Content', 'Plan Objectives',
       'Microtarget', 'Establish Legitimacy', 'Drive Offline Activity'],
      dtype='object')
set difference: {'Develop Networks', 'Channel Selection', 'Pump Priming', 'Measure Effectiveness', 'Conduct Center of Gravity Analysis', 'Exposure', 'Objective Planning', 'Develop

In [17]:
check_diffs(metadata['techniques']['name'], techs['DISARM Technique'])

lengths are: 71 and 97
first list: Index(['Backstop personas', 'Message reach', 'Twitter bots amplify',
       'Demand unsurmountable proof', 'Behaviour changes',
       'Create fake videos and images', 'Play the long game',
       'Purchase advertisements', 'Manipulate online polls',
       'Facilitate State Propaganda', 'Find echo chambers', 'Clickbait',
       'Pinterest', 'Seed distortions', 'Cow online opinion leaders',
       'Segment audiences',
       'Dedicated channels disseminate information pollution',
       'Organise remote rallies and events', 'Prepare fundraising campaigns',
       'Muzzle social media as a political force',
       'Twitter trolls amplify and manipulate',
       'Create pseudoscientific or disingenuous research',
       'Develop Narrative Concepts', 'Cheerleading domestic social media ops',
       '5Ds (dismiss, distort, distract, dismay, divide)',
       'Leverage Existing Narratives', 'Continue to amplify', 'Reddit',
       'Trial content', 'LinkedIn'

# Check against last version of framework

In [45]:
def check_version_descriptions(df1, df2):
    merge = df1.merge(df2, on='disarm_id')
    merge['name_changed'] = merge['name_x'] != merge['name_y']
    merge['summary_changed'] = merge['summary_x'] != merge['summary_y']
    return merge[(merge['name_changed']) | (merge['summary_changed'])][['disarm_id', 'name_changed', 'summary_changed', 'name_x', 
                                                                        'name_y', 'summary_x', 'summary_y']]

check_version_descriptions(metadata['phases'], oldmeta['phases'])

Unnamed: 0,disarm_id,name_changed,summary_changed,name_x,name_y,summary_x,summary_y
3,P04,False,True,Assess,Assess,"Evaluate effectiveness of action, for use in f...","Evaluate effectiveness of action, for use in f..."


In [46]:
check_version_descriptions(metadata['tactics'], oldmeta['tactics'])

Unnamed: 0,disarm_id,name_changed,summary_changed,name_x,name_y,summary_x,summary_y
0,TA01,True,False,Plan Strategy,Strategic Planning,"Define the desired end state, i.e. the set of ...","Define the desired end state, i.e. the set of ..."
1,TA02,True,True,Plan Objectives,Objective Planning,"Set clearly defined, measurable, and achievabl...","Set clearly defined, measurable, and achievabl..."
2,TA05,True,False,Microtarget,Microtargeting,Target very specific populations of people,Target very specific populations of people
3,TA06,False,True,Develop Content,Develop Content,,Create and acquire content used in incident
4,TA07,True,True,Select Channels and Affordances,Channel Selection,Selecting platforms and affordances assesses w...,"Set up specific delivery, amplification and ma..."
5,TA08,True,True,Conduct Pump Priming,Pump Priming,"Release content on a targetted small scale, pr...","Release content on a targetted small scale, pr..."
6,TA09,True,True,Deliver Content,Exposure,Release content to general public or larger po...,Release content to general public or push to l...
7,TA10,False,True,Drive Offline Activity,Drive Offline Activity,Move incident/campaign from online to offline....,Move incident into offline world
8,TA11,True,True,Persist in the Information Environment,Persistence,Persist in the Information Space refers to tak...,"Keep incident 'alive', beyond the incident cre..."
9,TA12,True,True,Assess Effectiveness,Measure Effectiveness,"Assess effectiveness of action, for use in fut...","Measure effectiveness of incident, for use in ..."


In [47]:
check_version_descriptions(metadata['techniques'], oldmeta['techniques'])

Unnamed: 0,disarm_id,name_changed,summary_changed,name_x,name_y,summary_x,summary_y
