{ "cells": [ { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Read in two files" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01234
0JC139807/17/201621:46:298TAC93D IS FHP MUTUAL AID CHANNEL
1JC139807/17/201621:48:33OSP COMMANDERS BRIEFING THEIR WILL BE NO CLEVE...
\n", "
" ], "text/plain": [ " 0 1 2 3 \\\n", "0 JC1398 07/17/2016 21:46:29 \n", "1 JC1398 07/17/2016 21:48:33 \n", "\n", " 4 \n", "0 8TAC93D IS FHP MUTUAL AID CHANNEL \n", "1 OSP COMMANDERS BRIEFING THEIR WILL BE NO CLEVE... " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1 = pd.read_csv(\"output.csv\", sep='|', header=None)\n", "df1.head(2)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01234
0JC139807/17/201621:46:298TAC93D IS FHP MUTUAL AID CHANNEL
1JC139807/17201621:48:33NaNOSP COMMANDERS BRIEFING THEIR WILL BE NO CLEVE...
\n", "
" ], "text/plain": [ " 0 1 2 3 \\\n", "0 JC1398 07/17/2016 21:46:29 \n", "1 JC1398 07/172016 21:48:33 NaN \n", "\n", " 4 \n", "0 8TAC93D IS FHP MUTUAL AID CHANNEL \n", "1 OSP COMMANDERS BRIEFING THEIR WILL BE NO CLEVE... " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2 = pd.read_csv(\"output2.csv\", sep='|', header=None)\n", "df2.head(2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## You can get True/False for every cell just by comparing" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01234
0FalseFalseFalseFalseFalse
1FalseTrueFalseTrueFalse
2FalseFalseFalseFalseFalse
3FalseFalseFalseTrueFalse
4TrueFalseFalseTrueTrue
\n", "
" ], "text/plain": [ " 0 1 2 3 4\n", "0 False False False False False\n", "1 False True False True False\n", "2 False False False False False\n", "3 False False False True False\n", "4 True False False True True" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1 != df2" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## So we say hey, give me every cell where things are different" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01234
0NaNNaNNaNNaNNaN
1NaN07/17/2016NaNNaN
2NaNNaNNaNNaNNaN
3NaNNaNNaNNaNNaN
4SmartCADNaNNaNNaNOLD DISP CODES: CODE1 = MOA, CODE2 = , CODE3 =...
\n", "
" ], "text/plain": [ " 0 1 2 3 \\\n", "0 NaN NaN NaN NaN \n", "1 NaN 07/17/2016 NaN \n", "2 NaN NaN NaN NaN \n", "3 NaN NaN NaN NaN \n", "4 SmartCAD NaN NaN NaN \n", "\n", " 4 \n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 OLD DISP CODES: CODE1 = MOA, CODE2 = , CODE3 =... " ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1[df1 != df2]" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01234
0NaNNaNNaNNaNNaN
1NaN07/172016NaNNaNNaN
2NaNNaNNaNNaNNaN
3NaNNaNNaNNaNNaN
4SMARTCADNaNNaNNaNOLD DISP CODES: CODE1 = MOA, CODE2 = , CODE3 =...
\n", "
" ], "text/plain": [ " 0 1 2 3 \\\n", "0 NaN NaN NaN NaN \n", "1 NaN 07/172016 NaN NaN \n", "2 NaN NaN NaN NaN \n", "3 NaN NaN NaN NaN \n", "4 SMARTCAD NaN NaN NaN \n", "\n", " 4 \n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 OLD DISP CODES: CODE1 = MOA, CODE2 = , CODE3 =... " ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2[df1 != df2]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## We take those two dataframes and add them together" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01234
0NaNNaNNaNNaNNaN
1NaN07/17/201607/172016NaNNaNNaN
2NaNNaNNaNNaNNaN
3NaNNaNNaNNaNNaN
4SmartCADSMARTCADNaNNaNNaNOLD DISP CODES: CODE1 = MOA, CODE2 = , CODE3 =...
\n", "
" ], "text/plain": [ " 0 1 2 3 \\\n", "0 NaN NaN NaN NaN \n", "1 NaN 07/17/201607/172016 NaN NaN \n", "2 NaN NaN NaN NaN \n", "3 NaN NaN NaN NaN \n", "4 SmartCADSMARTCAD NaN NaN NaN \n", "\n", " 4 \n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 OLD DISP CODES: CODE1 = MOA, CODE2 = , CODE3 =... " ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1[df1 != df2] + df2[df1 != df2]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Might as well add in a 'vs' for comparison and drop the bad ones\n", "\n", "This isn't the best." ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01234
1NaN07/17/2016 vs 07/172016NaNNaNNaN
4SmartCAD vs SMARTCADNaNNaNNaNOLD DISP CODES: CODE1 = MOA, CODE2 = , CODE3 =...
\n", "
" ], "text/plain": [ " 0 1 2 3 \\\n", "1 NaN 07/17/2016 vs 07/172016 NaN NaN \n", "4 SmartCAD vs SMARTCAD NaN NaN NaN \n", "\n", " 4 \n", "1 NaN \n", "4 OLD DISP CODES: CODE1 = MOA, CODE2 = , CODE3 =... " ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1[df1 != df2].add(\" vs \").add(df2[df1 != df2]).dropna(how='all')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 2 }