mirror of
https://github.com/DISARMFoundation/DISARMframeworks.git
synced 2024-12-20 21:34:17 -05:00
22abaf93d8
Took a copy of the current AMITT github repository - we'll be updating this and merging the SPICE branch back in Rebranded to DISARM Moved generated pages to their own folder, to make looking at the repository less confusing
111 lines
3.7 KiB
Plaintext
111 lines
3.7 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# fix the problem with excelfile changes\n",
|
|
"\n",
|
|
"Background: DISARM's master dataset is in an excelfile. Changes in this dont' show up in github, so it's difficult to tell what's changed between versions. Code below checks for those differences - use this repeatedly until versions align. "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"import sqlite3 as sql\n",
|
|
"from generate_DISARM_pages import Disarm\n",
|
|
"import pandas as pd\n",
|
|
"import numpy as np\n",
|
|
"import os\n",
|
|
"from sklearn.feature_extraction.text import CountVectorizer\n",
|
|
"pd.set_option('display.max_rows', 1000)\n",
|
|
"pd.set_option('display.max_colwidth', -1)\n",
|
|
"\n",
|
|
"newfile = '../DISARM_MASTER_DATA/DISARM_FRAMEWORKS_MASTER.xlsx'\n",
|
|
"oldfile = '../DISARM_MASTER_DATA/DISARM_FRAMEWORKS_MASTER_previous_version.xlsx'\n",
|
|
"\n",
|
|
"# Load dfs from file\n",
|
|
"newdfs = {}\n",
|
|
"newxlsx = pd.ExcelFile(newfile)\n",
|
|
"for sheetname in newxlsx.sheet_names:\n",
|
|
" newdfs[sheetname] = newxlsx.parse(sheetname)\n",
|
|
" newdfs[sheetname].fillna('', inplace=True)\n",
|
|
"\n",
|
|
"olddfs = {}\n",
|
|
"oldxlsx = pd.ExcelFile(oldfile)\n",
|
|
"for sheetname in oldxlsx.sheet_names:\n",
|
|
" olddfs[sheetname] = oldxlsx.parse(sheetname)\n",
|
|
" olddfs[sheetname].fillna('', inplace=True)\n",
|
|
"\n",
|
|
"addedtables = newdfs.keys() - olddfs.keys()\n",
|
|
"losttables = olddfs.keys() - newdfs.keys()\n",
|
|
"if len(addedtables) + len(losttables) > 0:\n",
|
|
" print('Table changes: new tables are {}, lost tables are {}'.format(addedtables, losttables))\n",
|
|
"\n",
|
|
"def investigate_table(table):\n",
|
|
" print('\\n\\nTable {} is changed'.format(table))\n",
|
|
" # Column headings\n",
|
|
" coldiffs = set(newdfs[table].columns).symmetric_difference(set(olddfs[table].columns))\n",
|
|
" if len(coldiffs) > 0:\n",
|
|
" print('column differences: {}'.format(coldiffs))\n",
|
|
" # length\n",
|
|
" if len(newdfs[table]) != len(olddfs[table]):\n",
|
|
" print('length differences: new {} old {}'.format(len(newdfs[table]), len(olddfs[table])))\n",
|
|
"\n",
|
|
" # column by column\n",
|
|
" for column in newdfs[table].columns:\n",
|
|
" coldiffs = newdfs[table][column] != olddfs[table][column]\n",
|
|
" if len(newdfs[table][coldiffs]) > 0:\n",
|
|
" print('Differences in column {}'.format(column))\n",
|
|
" return\n",
|
|
"\n",
|
|
"for table in newdfs.keys():\n",
|
|
" if newdfs[table].equals(olddfs[table]) == False:\n",
|
|
" investigate_table(table)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Look at individual table differences\n",
|
|
"table = 'countermeasures'\n",
|
|
"column = 'summary'\n",
|
|
"coldiffs = newdfs[table][column] != olddfs[table][column]\n",
|
|
"diffcols = pd.DataFrame()\n",
|
|
"diffcols['amitt_id'] = newdfs[table][coldiffs]['amitt_id']\n",
|
|
"diffcols['new'] = newdfs[table][coldiffs][column]\n",
|
|
"diffcols['old'] = olddfs[table][coldiffs][column]\n",
|
|
"diffcols[diffcols['old'] != '']"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.8.3"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 4
|
|
}
|