mirror of
https://github.com/DISARMFoundation/DISARMframeworks.git
synced 2024-12-26 15:59:52 -05:00
111 lines
3.7 KiB
Plaintext
111 lines
3.7 KiB
Plaintext
|
{
|
||
|
"cells": [
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# fix the problem with excelfile changes\n",
|
||
|
"\n",
|
||
|
"Background: DISARM's master dataset is in an excelfile. Changes in this dont' show up in github, so it's difficult to tell what's changed between versions. Code below checks for those differences - use this repeatedly until versions align. "
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"import pandas as pd\n",
|
||
|
"import sqlite3 as sql\n",
|
||
|
"from generate_DISARM_pages import Disarm\n",
|
||
|
"import pandas as pd\n",
|
||
|
"import numpy as np\n",
|
||
|
"import os\n",
|
||
|
"from sklearn.feature_extraction.text import CountVectorizer\n",
|
||
|
"pd.set_option('display.max_rows', 1000)\n",
|
||
|
"pd.set_option('display.max_colwidth', -1)\n",
|
||
|
"\n",
|
||
|
"newfile = '../DISARM_MASTER_DATA/DISARM_FRAMEWORKS_MASTER.xlsx'\n",
|
||
|
"oldfile = '../DISARM_MASTER_DATA/DISARM_FRAMEWORKS_MASTER_previous_version.xlsx'\n",
|
||
|
"\n",
|
||
|
"# Load dfs from file\n",
|
||
|
"newdfs = {}\n",
|
||
|
"newxlsx = pd.ExcelFile(newfile)\n",
|
||
|
"for sheetname in newxlsx.sheet_names:\n",
|
||
|
" newdfs[sheetname] = newxlsx.parse(sheetname)\n",
|
||
|
" newdfs[sheetname].fillna('', inplace=True)\n",
|
||
|
"\n",
|
||
|
"olddfs = {}\n",
|
||
|
"oldxlsx = pd.ExcelFile(oldfile)\n",
|
||
|
"for sheetname in oldxlsx.sheet_names:\n",
|
||
|
" olddfs[sheetname] = oldxlsx.parse(sheetname)\n",
|
||
|
" olddfs[sheetname].fillna('', inplace=True)\n",
|
||
|
"\n",
|
||
|
"addedtables = newdfs.keys() - olddfs.keys()\n",
|
||
|
"losttables = olddfs.keys() - newdfs.keys()\n",
|
||
|
"if len(addedtables) + len(losttables) > 0:\n",
|
||
|
" print('Table changes: new tables are {}, lost tables are {}'.format(addedtables, losttables))\n",
|
||
|
"\n",
|
||
|
"def investigate_table(table):\n",
|
||
|
" print('\\n\\nTable {} is changed'.format(table))\n",
|
||
|
" # Column headings\n",
|
||
|
" coldiffs = set(newdfs[table].columns).symmetric_difference(set(olddfs[table].columns))\n",
|
||
|
" if len(coldiffs) > 0:\n",
|
||
|
" print('column differences: {}'.format(coldiffs))\n",
|
||
|
" # length\n",
|
||
|
" if len(newdfs[table]) != len(olddfs[table]):\n",
|
||
|
" print('length differences: new {} old {}'.format(len(newdfs[table]), len(olddfs[table])))\n",
|
||
|
"\n",
|
||
|
" # column by column\n",
|
||
|
" for column in newdfs[table].columns:\n",
|
||
|
" coldiffs = newdfs[table][column] != olddfs[table][column]\n",
|
||
|
" if len(newdfs[table][coldiffs]) > 0:\n",
|
||
|
" print('Differences in column {}'.format(column))\n",
|
||
|
" return\n",
|
||
|
"\n",
|
||
|
"for table in newdfs.keys():\n",
|
||
|
" if newdfs[table].equals(olddfs[table]) == False:\n",
|
||
|
" investigate_table(table)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# Look at individual table differences\n",
|
||
|
"table = 'countermeasures'\n",
|
||
|
"column = 'summary'\n",
|
||
|
"coldiffs = newdfs[table][column] != olddfs[table][column]\n",
|
||
|
"diffcols = pd.DataFrame()\n",
|
||
|
"diffcols['amitt_id'] = newdfs[table][coldiffs]['amitt_id']\n",
|
||
|
"diffcols['new'] = newdfs[table][coldiffs][column]\n",
|
||
|
"diffcols['old'] = olddfs[table][coldiffs][column]\n",
|
||
|
"diffcols[diffcols['old'] != '']"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"metadata": {
|
||
|
"kernelspec": {
|
||
|
"display_name": "Python 3",
|
||
|
"language": "python",
|
||
|
"name": "python3"
|
||
|
},
|
||
|
"language_info": {
|
||
|
"codemirror_mode": {
|
||
|
"name": "ipython",
|
||
|
"version": 3
|
||
|
},
|
||
|
"file_extension": ".py",
|
||
|
"mimetype": "text/x-python",
|
||
|
"name": "python",
|
||
|
"nbconvert_exporter": "python",
|
||
|
"pygments_lexer": "ipython3",
|
||
|
"version": "3.8.3"
|
||
|
}
|
||
|
},
|
||
|
"nbformat": 4,
|
||
|
"nbformat_minor": 4
|
||
|
}
|