{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"
\n",
" \n",
" \n",
" | \n",
" DPT | \n",
" NAME | \n",
" ADDRESS | \n",
" TTL | \n",
" PC | \n",
" SAL-RATE | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 868 | \n",
" B J SANDIFORD | \n",
" DEPARTMENT OF CITYWIDE ADM | \n",
" 12702 | \n",
" X | \n",
" $5.00 | \n",
"
\n",
" \n",
" 1 | \n",
" 868 | \n",
" C A WIGFALL | \n",
" DEPARTMENT OF CITYWIDE ADM | \n",
" 12702 | \n",
" X | \n",
" $5.00 | \n",
"
\n",
" \n",
" 2 | \n",
" 69 | \n",
" A E A-AWOSOGBA | \n",
" HRA/DEPARTMENT OF SOCIAL S | \n",
" 52311 | \n",
" A | \n",
" $51955.00 | \n",
"
\n",
" \n",
" 3 | \n",
" 868 | \n",
" K D AABY | \n",
" DEPARTMENT OF CITYWIDE ADM | \n",
" 10209 | \n",
" X | \n",
" $12.00 | \n",
"
\n",
" \n",
" 4 | \n",
" 56 | \n",
" I D AADIL | \n",
" POLICE DEPARTMENT | \n",
" 71012 | \n",
" A | \n",
" $46953.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" DPT NAME ADDRESS TTL PC SAL-RATE\n",
"0 868 B J SANDIFORD DEPARTMENT OF CITYWIDE ADM 12702 X $5.00\n",
"1 868 C A WIGFALL DEPARTMENT OF CITYWIDE ADM 12702 X $5.00\n",
"2 69 A E A-AWOSOGBA HRA/DEPARTMENT OF SOCIAL S 52311 A $51955.00\n",
"3 868 K D AABY DEPARTMENT OF CITYWIDE ADM 10209 X $12.00\n",
"4 56 I D AADIL POLICE DEPARTMENT 71012 A $46953.00"
]
},
"execution_count": 74,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.read_csv(\"Civil_List_2014.csv\", \n",
" names=[\"DPT\",\"NAME\",\"ADDRESS\",\"TTL\", \"PC\",\"SAL-RATE\"], \n",
" skiprows=1,\n",
" dtype=col_types)\n",
"df.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" DPT | \n",
" NAME | \n",
" ADDRESS | \n",
" TTL | \n",
" PC | \n",
" SAL-RATE | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" DPT | \n",
" NAME | \n",
" ADDRESS | \n",
" TTL # | \n",
" PC | \n",
" SAL-RATE | \n",
"
\n",
" \n",
" 1 | \n",
" 868 | \n",
" B J SANDIFORD | \n",
" DEPARTMENT OF CITYWIDE ADM | \n",
" 12702 | \n",
" X | \n",
" $5.00 | \n",
"
\n",
" \n",
" 2 | \n",
" 868 | \n",
" C A WIGFALL | \n",
" DEPARTMENT OF CITYWIDE ADM | \n",
" 12702 | \n",
" X | \n",
" $5.00 | \n",
"
\n",
" \n",
" 3 | \n",
" 69 | \n",
" A E A-AWOSOGBA | \n",
" HRA/DEPARTMENT OF SOCIAL S | \n",
" 52311 | \n",
" A | \n",
" $51955.00 | \n",
"
\n",
" \n",
" 4 | \n",
" 868 | \n",
" K D AABY | \n",
" DEPARTMENT OF CITYWIDE ADM | \n",
" 10209 | \n",
" X | \n",
" $12.00 | \n",
"
\n",
" \n",
" 5 | \n",
" 56 | \n",
" I D AADIL | \n",
" POLICE DEPARTMENT | \n",
" 71012 | \n",
" A | \n",
" $46953.00 | \n",
"
\n",
" \n",
" 6 | \n",
" 69 | \n",
" M AAKIRI | \n",
" HRA/DEPARTMENT OF SOCIAL S | \n",
" 56056 | \n",
" A | \n",
" $33000.00 | \n",
"
\n",
" \n",
" 7 | \n",
" 464 | \n",
" A AALAI | \n",
" CUNY QUEENSBOROUGH COMMUNI | \n",
" 4607 | \n",
" N | \n",
" $73.53 | \n",
"
\n",
" \n",
" 8 | \n",
" 998 | \n",
" A V AALEVIK | \n",
" N.Y.C. TRANSIT AUTHORITY | \n",
" 402 | \n",
" 2 | \n",
" $33280.00 | \n",
"
\n",
" \n",
" 9 | \n",
" 998 | \n",
" M AAMIR | \n",
" N.Y.C. TRANSIT AUTHORITY | \n",
" 00T07 | \n",
" 4 | \n",
" $60878.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" DPT NAME ADDRESS TTL PC \\\n",
"0 DPT NAME ADDRESS TTL # PC \n",
"1 868 B J SANDIFORD DEPARTMENT OF CITYWIDE ADM 12702 X \n",
"2 868 C A WIGFALL DEPARTMENT OF CITYWIDE ADM 12702 X \n",
"3 69 A E A-AWOSOGBA HRA/DEPARTMENT OF SOCIAL S 52311 A \n",
"4 868 K D AABY DEPARTMENT OF CITYWIDE ADM 10209 X \n",
"5 56 I D AADIL POLICE DEPARTMENT 71012 A \n",
"6 69 M AAKIRI HRA/DEPARTMENT OF SOCIAL S 56056 A \n",
"7 464 A AALAI CUNY QUEENSBOROUGH COMMUNI 4607 N \n",
"8 998 A V AALEVIK N.Y.C. TRANSIT AUTHORITY 402 2 \n",
"9 998 M AAMIR N.Y.C. TRANSIT AUTHORITY 00T07 4 \n",
"\n",
" SAL-RATE \n",
"0 SAL-RATE \n",
"1 $5.00 \n",
"2 $5.00 \n",
"3 $51955.00 \n",
"4 $12.00 \n",
"5 $46953.00 \n",
"6 $33000.00 \n",
"7 $73.53 \n",
"8 $33280.00 \n",
"9 $60878.00 "
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"count 269838\n",
"unique 27533\n",
"top $76488.00\n",
"freq 26024\n",
"Name: SAL-RATE, dtype: object"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['SAL-RATE'].describe()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"ename": "KeyError",
"evalue": "'PC'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/pandas/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 1944\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1945\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1946\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32mpandas/index.pyx\u001b[0m in \u001b[0;36mpandas.index.IndexEngine.get_loc (pandas/index.c:4154)\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/index.pyx\u001b[0m in \u001b[0;36mpandas.index.IndexEngine.get_loc (pandas/index.c:4018)\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/hashtable.pyx\u001b[0m in \u001b[0;36mpandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:12368)\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/hashtable.pyx\u001b[0m in \u001b[0;36mpandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:12322)\u001b[0;34m()\u001b[0m\n",
"\u001b[0;31mKeyError\u001b[0m: 'PC'",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'PC'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalue_counts\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 1995\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_multilevel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1996\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1997\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_column\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1998\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1999\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_getitem_column\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m_getitem_column\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 2002\u001b[0m \u001b[0;31m# get column\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2003\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_unique\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2004\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_item_cache\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2005\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2006\u001b[0m \u001b[0;31m# duplicate columns & possible reduce dimensionality\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m_get_item_cache\u001b[0;34m(self, item)\u001b[0m\n\u001b[1;32m 1348\u001b[0m \u001b[0mres\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcache\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1349\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mres\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1350\u001b[0;31m \u001b[0mvalues\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_data\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1351\u001b[0m \u001b[0mres\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_box_item_values\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1352\u001b[0m \u001b[0mcache\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mres\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/pandas/core/internals.py\u001b[0m in \u001b[0;36mget\u001b[0;34m(self, item, fastpath)\u001b[0m\n\u001b[1;32m 3288\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3289\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0misnull\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3290\u001b[0;31m \u001b[0mloc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3291\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3292\u001b[0m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0misnull\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/pandas/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 1945\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1946\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1947\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_maybe_cast_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1948\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1949\u001b[0m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtolerance\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtolerance\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32mpandas/index.pyx\u001b[0m in \u001b[0;36mpandas.index.IndexEngine.get_loc (pandas/index.c:4154)\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/index.pyx\u001b[0m in \u001b[0;36mpandas.index.IndexEngine.get_loc (pandas/index.c:4018)\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/hashtable.pyx\u001b[0m in \u001b[0;36mpandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:12368)\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/hashtable.pyx\u001b[0m in \u001b[0;36mpandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:12322)\u001b[0;34m()\u001b[0m\n",
"\u001b[0;31mKeyError\u001b[0m: 'PC'"
]
}
],
"source": [
"df['PC'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['DPT ', 'NAME ', 'ADDRESS ', 'TTL # ', 'PC ', 'SAL-RATE'], dtype='object')"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.columns"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"'DPT'"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\"DPT \".strip()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"DPT\n",
"NAME\n",
"ADDRESS\n",
"TTL #\n",
"PC\n",
"SAL-RATE\n"
]
}
],
"source": [
"for col in df.columns:\n",
" print(col.strip())"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"['DPT', 'NAME', 'ADDRESS', 'TTL #', 'PC', 'SAL-RATE']"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# it's a list, so - []\n",
"# it's a loop, so - [for col in df.columns]\n",
"# col.strip(), so - [col.strip() for col in columns]\n",
"\n",
"[col.strip() for col in df.columns]"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df.columns = [col.strip() for col in df.columns]"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['DPT', 'NAME', 'ADDRESS', 'TTL #', 'PC', 'SAL-RATE'], dtype='object')"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.columns"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"A 100521\n",
"D 54881\n",
"X 24403\n",
"4 23259\n",
"3 19866\n",
"1 13083\n",
"K 7635\n",
"F 6361\n",
"N 4671\n",
"U 3409\n",
"B 2817\n",
"BW 1766\n",
"E 1300\n",
"W 1257\n",
"G 1085\n",
"I 730\n",
"2 721\n",
"M 708\n",
"O 536\n",
"WK 488\n",
"P 307\n",
"L 25\n",
"S 4\n",
"0 3\n",
"V 2\n",
"Name: PC, dtype: int64"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['PC'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"count 269838\n",
"unique 27533\n",
"top $76488.00\n",
"freq 26024\n",
"Name: SAL-RATE, dtype: object"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['SAL-RATE'].describe()"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"76488.0\n",
"5.0\n",
"1500.0\n"
]
}
],
"source": [
"def money_to_float(money_str):\n",
" return float(money_str.replace(\"$\",\"\").replace(\",\",\"\"))\n",
"\n",
"print(money_to_int(\"$76488.00\"))\n",
"print(money_to_int(\"$5.00\"))\n",
"print(money_to_int(\"$1,500.00\"))"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0 5.00\n",
"1 5.00\n",
"2 51955.00\n",
"3 12.00\n",
"4 46953.00\n",
"5 33000.00\n",
"6 73.53\n",
"7 33280.00\n",
"8 60878.00\n",
"9 69272.00\n",
"10 89960.00\n",
"11 38324.00\n",
"12 39019.00\n",
"13 369.92\n",
"14 66127.00\n",
"15 64250.00\n",
"16 32510.00\n",
"17 2.65\n",
"18 109087.00\n",
"19 111606.00\n",
"20 5.00\n",
"21 56555.00\n",
"22 51643.00\n",
"23 77015.00\n",
"24 76488.00\n",
"25 67627.00\n",
"26 68095.00\n",
"27 66127.00\n",
"28 76689.00\n",
"29 48153.00\n",
" ... \n",
"269808 85287.00\n",
"269809 65564.00\n",
"269810 56937.00\n",
"269811 81439.00\n",
"269812 118506.00\n",
"269813 42064.00\n",
"269814 33.18\n",
"269815 116364.00\n",
"269816 98072.00\n",
"269817 107563.00\n",
"269818 37150.00\n",
"269819 74646.00\n",
"269820 64040.00\n",
"269821 76488.00\n",
"269822 83802.00\n",
"269823 17.16\n",
"269824 85667.00\n",
"269825 46.89\n",
"269826 67.42\n",
"269827 51916.00\n",
"269828 57514.00\n",
"269829 74605.00\n",
"269830 74605.00\n",
"269831 64403.00\n",
"269832 51936.00\n",
"269833 47.58\n",
"269834 87738.00\n",
"269835 78683.00\n",
"269836 76488.00\n",
"269837 5.00\n",
"Name: SAL-RATE, dtype: float64"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#money_to_float(df['SAL-RATE'])\n",
"df['SAL-RATE'].apply(money_to_float)"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"df['salary'] = df['SAL-RATE'].apply(money_to_float)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" DPT | \n",
" NAME | \n",
" ADDRESS | \n",
" TTL # | \n",
" PC | \n",
" SAL-RATE | \n",
" salary | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 868 | \n",
" B J SANDIFORD | \n",
" DEPARTMENT OF CITYWIDE ADM | \n",
" 12702 | \n",
" X | \n",
" $5.00 | \n",
" 5.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 868 | \n",
" C A WIGFALL | \n",
" DEPARTMENT OF CITYWIDE ADM | \n",
" 12702 | \n",
" X | \n",
" $5.00 | \n",
" 5.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 69 | \n",
" A E A-AWOSOGBA | \n",
" HRA/DEPARTMENT OF SOCIAL S | \n",
" 52311 | \n",
" A | \n",
" $51955.00 | \n",
" 51955.0 | \n",
"
\n",
" \n",
" 3 | \n",
" 868 | \n",
" K D AABY | \n",
" DEPARTMENT OF CITYWIDE ADM | \n",
" 10209 | \n",
" X | \n",
" $12.00 | \n",
" 12.0 | \n",
"
\n",
" \n",
" 4 | \n",
" 56 | \n",
" I D AADIL | \n",
" POLICE DEPARTMENT | \n",
" 71012 | \n",
" A | \n",
" $46953.00 | \n",
" 46953.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" DPT NAME ADDRESS TTL # PC SAL-RATE \\\n",
"0 868 B J SANDIFORD DEPARTMENT OF CITYWIDE ADM 12702 X $5.00 \n",
"1 868 C A WIGFALL DEPARTMENT OF CITYWIDE ADM 12702 X $5.00 \n",
"2 69 A E A-AWOSOGBA HRA/DEPARTMENT OF SOCIAL S 52311 A $51955.00 \n",
"3 868 K D AABY DEPARTMENT OF CITYWIDE ADM 10209 X $12.00 \n",
"4 56 I D AADIL POLICE DEPARTMENT 71012 A $46953.00 \n",
"\n",
" salary \n",
"0 5.0 \n",
"1 5.0 \n",
"2 51955.0 \n",
"3 12.0 \n",
"4 46953.0 "
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"count 269838.000000\n",
"mean 54143.707486\n",
"std 32345.656289\n",
"min 0.500000\n",
"25% 36899.000000\n",
"50% 57855.000000\n",
"75% 76488.000000\n",
"max 300000.000000\n",
"Name: salary, dtype: float64"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['salary'].describe()"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZQAAAEACAYAAACUMoD1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3X+MXeV95/H3BxObUIyBZmFWJmSIwMS0pMZpnKzYKrch\nGEgV7I0Enaa7xo0jRQEab9N0sSNV3lRVCKhJ3XZl/xFQx7YaWV6kBkdFtsPi21UifnjBjgn24tkf\npoyDJynGrKKoCMN3/zjPxcfjuTPHM+fee+6Zz0u68jnfc557n+/c63nmPN9z7lFEYGZmNlPn9boD\nZmZWDx5QzMysFB5QzMysFB5QzMysFB5QzMysFB5QzMysFIUHFEnnSdovaWda3yBpVNLz6XFbbt/1\nkkYkHZa0PBdfKumgpCOSNubicyVtT22eknRVWQmamVl3nMsRylrgxXGxb0fE0vTYBSBpMXAXsBi4\nHdgkSWn/zcCaiFgELJJ0a4qvAU5ExLXARuCh6aVjZma9UmhAkXQl8Gng4fGbJth9BbA9Ik5FxFFg\nBFgmaQCYHxH70n5bgZW5NlvS8qPAzYUzMDOzSih6hPKXwJ8A4y+rv0/SAUkPS1qQYguBV3L7HEux\nhcBoLj6aYme0iYi3gZOSLiuchZmZ9dyUA4qk3wHGIuIAZx6RbAI+GBFLgOPAt0rs10RHPmZmVmHn\nF9jnJuAOSZ8G3gvMl7Q1Ilbl9vkO8P20fAx4f27blSnWLp5v81NJc4CLI+LE+I5I8hePmZlNQ0R0\n/A/1KY9QIuJrEXFVRHwQGAKejIhVqSbS8lngJ2l5JzCUzty6GrgGeDYijgNvSFqWivSrgMdybe5O\ny3cCT7bv0c6zHhde+Bvs3r2biOjrx4YNG3reB+fn3Jxf/R7dUuQIpZ2HJC0B3gGOAl8EiIhDknYA\nh4C3gHvidEb3AsPABcDjkc4MAx4BtkkaAV4jG7ja+MxZkfe8529mkEZ1HD16tNdd6Kg651fn3MD5\nWTHnNKBExD8C/5iWV02y3wPAAxPEnwNumCD+JtmpxmZm1qd8pXyFrF69utdd6Kg651fn3MD5WTHq\n5vzaTGVF+bP7u2DBcnbs+CrLly+foJWZ2ewmiahCUd66p9ls9roLHVXn/OqcGzg/K8YDipmZlcJT\nXmZmNecpLzMz6yseUCqk7vO4dc6vzrmB87NiPKCYmVkpXEMxM6s511DMzKyveECpkLrP49Y5vzrn\nBs7PivGAYmZmpXANxcys5lxDMTOzvuIBpULqPo9b5/zqnBs4PyvGA4qZmZWi8IAi6TxJz0vamdYv\nlbRH0kuSdktakNt3vaQRSYclLc/Fl0o6KOmIpI25+FxJ21ObpyRdVVaC/aTRaPS6Cx1V5/zqnBs4\nPyvmXI5Q1pLd1rdlHfBERFxHdg/49QCSrie7++Ji4HZgU7qHPMBmYE1ELAIWSbo1xdcAJyLiWmAj\n8NA08zEzsx4pNKBIuhL4NPBwLrwC2JKWtwAr0/IdwPaIOBURR4ERYJmkAWB+ROxL+23Ntck/16PA\nzeeeSv+r+zxunfOrc27g/KyYokcofwn8CWees3tFRIwBRMRx4PIUXwi8ktvvWIotBEZz8dEUO6NN\nRLwNnJR0WfE0zMys186fagdJvwOMRcQBSY1Jdi3zgpZJzpdeDQym5UuAJe9uaf2V0ZoP7bf1Vqwq\n/XF+xdcbjUal+uP8Znd+zWaT4eFhAAYHB+mWKS9slPQN4N8Dp4D3AvOBvwd+E2hExFiaztobEYsl\nrQMiIh5M7XcBG4CXW/uk+BDwiYj4UmufiHhG0hzg1Yi4fFxXfGGjmdk0VObCxoj4WkRcFREfBIaA\nJyPiPwDfJztcALgbeCwt7wSG0plbVwPXAM+mabE3JC1LRfpV49rcnZbvJCvyzzqtvzDqqs751Tk3\ncH5WzJRTXpP4JrBD0ufJjj7uAoiIQ5J2kJ0R9hZwT5w+DLoXGAYuAB6PiF0p/giwTdII8BrZwGVm\nZn3E3+VlZlZzlZnyMjMzK8IDSoXUfR63zvnVOTdwflaMBxQzMyuFayhmZjXnGoqZmfUVDygVUvd5\n3DrnV+fcwPlZMR5QzMysFK6hmJnVnGsoZmbWVzygVEjd53HrnF+dcwPnZ8V4QDEzs1K4hmJmVnOu\noZiZWV/xgFIhdZ/HrXN+dc4NnJ8V4wHFzMxK4RqKmVnNVaaGImmepGck7Zf0YrrHPJI2SBqV9Hx6\n3JZrs17SiKTDkpbn4kslHZR0RNLGXHyupO2pzVOSrio7UTMz66wi95R/E/jtiLgR+DDwSUk3pc3f\njoil6bELQNJistsBLwZuBzale8gDbAbWRMQiYJGkW1N8DXAiIq4FNgIPlZRfX+n0PO7AwCCSJnwM\nDAx29LWh3vPUdc4NnJ8VU6iGEhG/TIvzUpvX0/pEh1ArgO0RcSoijgIjwDJJA8D8iNiX9tsKrMy1\n2ZKWHwVuPpckrJixsZfJpgzPfmTbzMymr9CAIuk8SfuB40AzIg6lTfdJOiDpYUkLUmwh8Equ+bEU\nWwiM5uKjKXZGm4h4Gzgp6bLpJNTPGo1Gr7vQUXXOr865gfOzYs4vslNEvAPcKOliYI+kTwCbgD+L\niJD058C3gC+U1K9JikergcG0fAmw5N0trcPW1ofD62euZ5pAI7fMu+u97p/Xve71ctabzSbDw8MA\nDA4O0jURcU4P4E+BPx4X+wBwMC2vA+7PbdsFfAwYAA7n4kPA5vw+aXkO8LM2rx0QZz0WLLgldu/e\nHf1u7969HX3+dj+/7EFHXzui8/n1Up1zi3B+/S79/z7n3/fn+ihyltf7WtNZkt4L3AIcSDWRls8C\nP0nLO4GhdObW1cA1wLMRcRx4Q9KyVKRfBTyWa3N3Wr4TeLLAWGhmZhUy5XUokm4gK5iLrOayLSL+\nQtJWsvmmd4CjwBcjYiy1WU925tZbwNqI2JPiHwGGgQuAxyNibYrPA7YBNwKvAUORFfTH98XXocxA\nNo63e7/FVJ8FM+tP3boOxRc2ziIeUMxmp8pc2Gjd0yqq1VWd86tzbuD8rBgPKGZmVgpPec0invIy\nm5085WVmZn3FA0qF1H0et8751Tk3cH5WjAcUMzMrhWsos4hrKGazk2soZmbWVzygVEhZ87jt7nvS\na3Wep65zbuD8rJhC3zZs/eX0fU/G6/2gYmb15RpKDbWvlbiGYjYbuYZiZmZ9xQNKhdR9HrfO+dU5\nN3B+VowHFDMzK4VrKDXkGoqZ5bmGYmZmfaXILYDnSXpG0n5JL0r6RopfKmmPpJck7W7dJjhtWy9p\nRNJhSctz8aWSDko6ImljLj5X0vbU5ilJV5WdaD+o+zxunfOrc27g/KyYKQeUiHgT+O2IuBH4MPBJ\nSTcB64AnIuI6snvArweQdD1wF7AYuB3YpNNX1W0G1kTEImCRpFtTfA1wIiKuBTYCD5WVoJmZdUeh\nKa+I+GVanJfavA6sILvXPOnflWn5DmB7RJxK94UfAZZJGgDmR8S+tN/WXJv8cz0K3DytbPpco9Ho\ndRc6qs751Tk3cH5WTKEBRdJ5kvYDx4FmRBwCroiIMYCIOA5cnnZfCLySa34sxRYCo7n4aIqd0SYi\n3gZOSrpsWhmZmVlPFPrqlYh4B7hR0sXAbkkNzj5dqMxThCY5G2E1MJiWLwGWvLulNQ/a+muj39Y3\nbtzIkiVLZvx8p7XWG+NijQm390t+VVzP/+yr0B/nN7vzazabDA8PAzA4OEjXRMQ5PYA/Bb4KHCY7\nSgEYAA6n5XXA/bn9dwEfy++T4kPA5vw+aXkO8LM2rx0QZz0WLLgldu/eHf1u7969pTxPu59T+3i2\nrdPKyq+K6pxbhPPrd+n/9zn/vj/XR5GzvN7XOoNL0nuBW4D9wE6ywwWAu4HH0vJOYCiduXU1cA3w\nbGTTYm9IWpaK9KvGtbk7Ld9JVuSfdVp/adRVnfOrc27g/KyYIlNe/xrYkgaB84BtEfHfUk1lh6TP\nAy+TndlFRByStAM4BLwF3JNGSIB7gWHgAuDxiNiV4o8A2ySNAK+RHb2YmVkfKXLa8AsRsTQiboyI\n34iIv0jxExHxqYi4LiKWR8TJXJsHIuKaiFgcEXty8eci4oaIuDYi1ubib0bEXSn+8cjODpt1zq6B\n1Eud86tzbuD8rBhfKW9mZqXwd3nVkL/Ly8zy/F1eZmbWVzygVEjd53HrnF+dcwPnZ8V4QDEzs1K4\nhlJDrqGYWZ5rKGZm1lc8oFRI3edx65xfnXMD52fFeEAxM7NSuIZSQ66hmFmeayhmZtZXPKBUSN3n\nceucX51zA+dnxXhAMTOzUriGUkOuoZhZnmsofWhgYBBJEz4GBgZ73T0zs44qcsfGKyU9KelFSS9I\n+sMU3yBpVNLz6XFbrs16SSOSDktanosvlXRQ0hFJG3PxuZK2pzZPSbqq7ES7YWzsZbIjgLMf2bbJ\n1X0et8751Tk3cH5WTJEjlFPAVyLi14B/A9wn6UNp27fTzbeWtu6+KGkx2d0bFwO3A5vS3R4BNgNr\nImIRsEjSrSm+BjgREdcCG4GHykjOzMy6p8gdG49HxIG0/AvgMLAwbZ5oTm4FsD0iTqU7L44AyyQN\nAPMjYl/abyuwMtdmS1p+FLh5Grn0vbrf17rO+dU5N3B+Vsw51VAkDQJLgGdS6D5JByQ9LGlBii0E\nXsk1O5ZiC4HRXHyU0wPTu20i4m3gpKTLzqVvZmbWW4UHFEkXkR09rE1HKpuAD0bEEuA48K0S+9Xx\nsxGqqO7zuHXOr865gfOzYs4vspOk88kGk20R8RhARPw8t8t3gO+n5WPA+3PbrkyxdvF8m59KmgNc\nHBEnJu7NamAwLV9CdsCUaX0oWoev3V5PvQAauWXeXZ+q/YEDB0rpz5l9Of36M+3fTNfLys/rXvf6\n5OvNZpPh4WEABgcH6ZZC16FI2gr8c0R8JRcbiIjjafmPgI9GxOckXQ/8HfAxsqmsHwDXRkRIehr4\nMrAP+AfgryNil6R7gF+PiHskDQErI2Jogn5U+jqU9td/QDev8/B1KGaW163rUKY8QpF0E/D7wAuS\n9pP9Rvoa8DlJS4B3gKPAFwEi4pCkHcAh4C3gnjj9m+peYBi4AHi8dWYY8AiwTdII8Bpw1mBiZmbV\nVuQsrx9FxJyIWBIRN7ZOEY6IVRHx4RRfGRFjuTYPRMQ1EbE4Ivbk4s9FxA0RcW1ErM3F34yIu1L8\n4+nssFnn7CmreqlzfnXODZyfFeMr5c3MrBT+Lq8SuYZiZlXk7/IyM7O+4gGlQuo+j1vn/OqcGzg/\nK8YDipmZlcI1lBK5hmJmVeQaipmZ9RUPKBVS93ncOudX59zA+VkxHlDMzKwUrqGUyDUUM6si11DM\nzKyveECpkLrP49Y5vzrnBs7PivGAYmZmpXANpUSuoZhZFbmGUjvzkDThY2BgsNedMzObMQ8oXfMm\n2dHB2Y+xsZeB+s/j1jm/OucGzs+KmXJAkXSlpCclvSjpBUlfTvFLJe2R9JKk3ZIW5NqslzQi6bCk\n5bn4UkkHJR2RtDEXnytpe2rzlKSryk7UzMw6a8oaiqQBYCAiDki6CHgOWAH8AfBaRDwk6X7g0ohY\nl7un/EeBK4EnOH1P+WeA+yJin6THgb+KiN2SvgTckO4p/7vAv6vjPeW7Vb9wDcXM8ipTQ4mI4xFx\nIC3/AjhMNlCsALak3bYAK9PyHcD2iDiVbuU7AixLA9P8iNiX9tuaa5N/rkeBm2eSlJmZdd851VAk\nDQJLgKeBK1r3kY+I48DlabeFwCu5ZsdSbCEwmouPptgZbSLibeCkpMvOpW91UPd53DrnV+fcwPlZ\nMYUHlDTd9SiwNh2pjJ8fKXO+pOOHZmZmVq7zi+wk6XyywWRbRDyWwmOSroiIsTSd9bMUPwa8P9f8\nyhRrF8+3+amkOcDFEXFi4t6sBgbT8iVkB0yZ1l8ZjUajJ+upF0Ajt0yB9VzrZnPG/TmzL/nXm7x/\n3fj5lJFfFdcbjUal+uP8Znd+zWaT4eFhAAYHB+mWQhc2StoK/HNEfCUXexA4EREPtinKf4xsKusH\nnC7KPw18GdgH/APw1xGxS9I9wK+novwQsNJF+U70w0V5s9moMkV5STcBvw98UtJ+Sc9Lug14ELhF\n0ktkRfRvAkTEIWAHcAh4HLgnTv+muhd4BDgCjETErhR/BHifpBHgPwLrykqwn5x9hFEvdc6vzrmB\n87NippzyiogfAXPabP5UmzYPAA9MEH8OuGGC+JvAXVP1xczMqsvf5VUiT3mZWRVVZsrLzMysCA8o\nFVL3edw651fn3MD5WTEeUMzMrBSuoZTINRQzqyLXUGxSAwODbe+vYmbWCx5QKuRc5nGze6hMfH+V\nqqrzPHWdcwPnZ8V4QDEzs1K4hlKibtZQpvdarqGYzUauocwqE99v3veaN7N+4gGlElr3m9/LRPea\nr4s6z1PXOTdwflaMBxQzMyuFayglmkkNpV3No9374xqKmRXlGoqZmfUVDyiV0ux1BzqqzvPUdc4N\nnJ8V4wHFzMxKUeSOjY9IGpN0MBfbIGk03b2xdQfH1rb1kkYkHZa0PBdfKumgpCOSNubicyVtT22e\nknRVmQn2l0avO9BR+XvL102dcwPnZ8UUOUL5W+DWCeLfjoil6bELQNJisjsvLgZuBzbp9JdLbQbW\nRMQiYJGk1nOuIbs3/bXARuCh6adTNxNfn+Lv6zKzKppyQImIHwKvT7Bpot9qK4DtEXEqIo4CI8Ay\nSQPA/IjYl/bbCqzMtdmSlh8luz/9LNUct966PqV/vq9rMnWep65zbuD8rJiZ1FDuk3RA0sOSFqTY\nQuCV3D7HUmwhMJqLj6bYGW0i4m3gpKTLZtAvMzPrgfOn2W4T8GcREZL+HPgW8IWS+jTFfM5qYDAt\nXwIseXdL66+M1nxot9dTLzhdC2mmf6daz8u3n+nzjd8++fN14+fTbDZ79v50cr3RaFSqP85vdufX\nbDYZHh4GYHBwkG4pdGGjpA8A34+ID0+2TdI6ICLiwbRtF7ABeBnYGxGLU3wI+EREfKm1T0Q8I2kO\n8GpEXN6mH7PuwsZuPp8vbDSrp6pd2ChyRw6pJtLyWeAnaXknMJTO3LoauAZ4NiKOA29IWpaK9KuA\nx3Jt7k7LdwJPTiuTWmj2ugMd1foLqo7qnBs4PytmyikvSd8lmxP5VUn/RHbE8duSlgDvAEeBLwJE\nxCFJO4BDwFvAPXH6z957gWHgAuDx1plhwCPANkkjwGvAUCmZmZlZV/m7vErkKS8zq6KqTXmZmZlN\nygNKpTR73YGOqvM8dZ1zA+dnxXhAMTOzUriGUiLXUMysilxDMTOzvuIBpVKave5AR9V5nrrOuYHz\ns2I8oJiZWSlcQymRayhmVkWuoZiZWV/xgFIpzV53oKPqPE9d59zA+VkxHlDMzKwUrqGUyDUUM6si\n11DMzKyveECplGavO9BRdZ6nrnNu4PysGA8o0zAwMIiksx5mZrOZayjT6wdVqHm4hmJmRVSmhiLp\nEUljkg7mYpdK2iPpJUm7JS3IbVsvaUTSYUnLc/Glkg5KOiJpYy4+V9L21OYpSVeVmaCZmXVHkSmv\nvwVuHRdbBzwREdeR3QN+PYCk64G7gMXA7cAmnZ4L2gysiYhFwCJJredcA5yIiGuBjcBDM8inzzV7\n3YGOqvM8dZ1zA+dnxUw5oETED4HXx4VXAFvS8hZgZVq+A9geEaci4igwAiyTNADMj4h9ab+tuTb5\n53oUuHkaeZiZWY9Ntyh/eUSMAUTEceDyFF8IvJLb71iKLQRGc/HRFDujTUS8DZyUdNk0+9XnGr3u\nQEc1Go1ed6Fj6pwbOD8r5vySnqfMau4UhaPVwGBavgRY8u6W1mFr68PRqfXTWuuNcbFGm+3t1qvx\nfN36+Xnd617v7Hqz2WR4eBiAwcFBuiYipnwAHwAO5tYPA1ek5QHgcFpeB9yf228X8LH8Pik+BGzO\n75OW5wA/m6QfAXHWY8GCW2L37t3RLe360T5edNvekp/v3Np02t69ezv+Gr1S59winF+/S/+/C/2+\nn8mj6JSXOPPIYSfZoQLA3cBjufhQOnPrauAa4NnIpsXekLQsFelXjWtzd1q+k6zIb2ZmfWbK61Ak\nfZdsTuRXgTFgA/A94L8C7wdeBu6KiJNp//VkZ269BayNiD0p/hFgGLgAeDwi1qb4PGAbcCPwGjAU\nWUF/or74OpQOPt9UnwUz60/dug5lyhpKRHyuzaZPtdn/AeCBCeLPATdMEH+T7FRjMzPrY/7qlUpp\n9roDE2r3VTOSGBgYLPw8Z5/UUB91zg2cnxVT1lleVmNjYy/TbqpsbMzfYWZmGX+X1/T6QRVqHuU+\n3wXAm222Melr9dNnyGw2qkwNxWaLN5l8gDIzm5xrKJXS7HUHOqrO89R1zg2cnxXjAcXMzErhGsr0\n+kHvax5VeL5sWz99hsxmo8rcD8VsOso61djM+ocHlEpp9roDpTl9qnH+sReItK1e6j4H7/ysCA8o\nZmZWCtdQptcP+rPm0b0aSvufUfs2ZtYZrqGYmVlf8YBSKc1ed6DDmr3uQMfUfQ7e+VkRHlDMzKwU\nrqFMrx/0Z83DNRSz2cg1FDMz6yszGlAkHZX0Y0n7JT2bYpdK2iPpJUm7JS3I7b9e0oikw5KW5+JL\nJR2UdETSxpn0qb81e92BDmv2ugMdU/c5eOdnRcz0COUdoBERN0bEshRbBzwREdeR3R9+PYCk68nu\nzLgYuB3YlO4vD7AZWBMRi4BFkm6dYb/MzKzLZjqgaILnWAFsSctbgJVp+Q5ge0ScSveMHwGWSRoA\n5kfEvrTf1lybWabR6w50WKPXHeiYRqPR6y50lPOzImY6oATwA0n7JH0hxa6IiDGAiDgOXJ7iC4FX\ncm2PpdhCYDQXH00xMzPrIzO9wdZNEfGqpH8F7JH0Emef2lPy6TyrgcG0fAmw5N0trXnQ1l8bnVo/\nrbXeGBdrtNnebr1lI1k+ZT1f2f2bfPv4n9fZ+7fym3j/fl7Pfzaq0B/nN7vzazabDA8PAzA4OEjX\nREQpD2AD8MfAYbKjFIAB4HBaXgfcn9t/F/Cx/D4pPgRsbvMaAXHWY8GCW2L37t3RLe360T5edNve\nkp+v7P5NvK34z2jvpG362d69e3vdhY5yfv0t/Z8r7fd9u8e0p7wkXSjporT8K8By4AVgJ9lhBMDd\nwGNpeScwJGmupKuBa4BnI5sWe0PSslSkX5VrM8s0et2BaZg34VfUT6zRzY51Vd3n4J2fFTGTKa8r\ngL/PLjbkfODvImKPpP8B7JD0eeBlsjO7iIhDknYAh4C3gHvSyAlwLzAMXAA8HhG7ZtAv66p296L3\nfejNZptpH6FExP+NiCWRnTJ8Q0R8M8VPRMSnIuK6iFgeESdzbR6IiGsiYnFE7MnFn0vPcW1ErJ1Z\nSv2s2esOdFiz1x3omLpfx+D8rAhfKW9mZqXwgFIpjV53oMMaU+7R7tbBVb9tcN3n4J2fFTHT04bN\nSnX61sHj467JmFWdj1AqpdnrDnRYs9cd6Ji6z8E7PyvCA4qZmZXCA0ob7eby219jUYZGB5+7Chq9\n7kDH1H0O3vlZEa6htNFuLj/j+fyZmdfhgdnMesFHKJXS7HUHOqyZ/m1dDDnRoz/VfQ7e+VkRHlDM\nzKwUvqd8+9ei9/dsr/rzdfO1fB96s+nyPeXNzKyveECplGavO9BhzV53oGPqPgfv/KwIDyjWJyb+\nmvx++FoWs9nCNZT2r0UV6gbVfr5uvtbkbfrpc2zWba6hmJlZX6nMgCLpNkn/U9IRSff3uj+90ex1\nBzqs2aHn7f10WN3n4J2fFVGJAUXSecB/AW4Ffg34PUkf6m2veuFArzvQYZ3Kr/2FkmNjx0sdaNp9\nJc9nPvPZspKppAMH6v3ZrHt+3VKJAQVYBoxExMsR8RawHVjR6Rftzfd1Tebk1Lv0tV7kN/Fgk321\nzsQm+1yc/kqeMx+/+MXrHc+kl06erPdns+75dUtVBpSFwCu59dEU66h2vxz6+StArKj202TT+1zM\nmda022SDl89es37Td18OefHFnzkr9i//sp+hodW8/vqrZ20777wLeeedX3ajayU42usOdNjRXncg\np3XkMpHpHKG+3fb5xsYumOKo99zaTfaZns62om2+/vWvn7Htiis+wPHjRyds12+OHj3a6y7UQiVO\nG5b0ceA/R8RtaX0dEBHx4Lj9et9ZM7M+1I3ThqsyoMwBXgJuBl4FngV+LyIO97RjZmZWWCWmvCLi\nbUn3AXvI6jqPeDAxM+svlThCMTOz/leVs7ym1E8XPko6KunHkvZLejbFLpW0R9JLknZLWpDbf72k\nEUmHJS3PxZdKOphy3piLz5W0PbV5StJVHc7nEUljkg7mYl3JR9Ldaf+XJK3qUm4bJI1Kej49buvH\n3NJrXCnpSUkvSnpB0pdTvC7v3/j8/jDF+/49lDRP0jPp98iLkr6R4tV97yKi8g+yge9/AR8A3kN2\nhdyHet2vSfr7f4BLx8UeBP5TWr4f+GZavh7YTzb9OJjybB05PgN8NC0/Dtyalr8EbErLvwts73A+\n/xZYAhzsZj7ApcD/BhYAl7SWu5DbBuArE+y7uJ9yS68zACxJyxeR1So/VKP3r11+tXgPgQvTv3OA\np4Gbqvze9csRSk8ufJwBcfbR3wpgS1reAqxMy3eQvYmnIuIoMAIskzQAzI+IfWm/rbk2+ed6lOxk\nho6JiB8C46/c62Q+n0zLtwJ7IuKNiDhJVmN79y/NMrTJDSY+d3gFfZQbQEQcj4gDafkXwGHgSurz\n/k2UX+satr5/DyOidT73PLLfKa9T4feuXwaUnlz4OAMB/EDSPklfSLErImIMsv8EwOUpPj63Yym2\nkCzPlnzO77aJiLeBk5Iu60Qik7i8g/m8kfJp91zdcJ+kA5Iezk0p9HVukgbJjsaeprOfx57kmMvv\nmRTq+/dQ0nmS9gPHgWZEHKLC712/DCj95qaIWAp8GrhX0m9x9tVrZZ4N0avvismrUz6bgA9GxBKy\n/8jfKvG5e5KbpIvI/gJdm/6Sr9XncYL8avEeRsQ7EXEj2VHlb0lqUOH3rl8GlGNAvvB8ZYpVUkS8\nmv79OfBWH67hAAABpUlEQVQ9sim7MUlXAKRD0J+l3Y8B7881b+XWLn5GG2XX8FwcESc6kkx73cin\nJ+97RPw80kQy8B2y9++Mfo7rT6Vzk3Q+2S/bbRHxWArX5v2bKL+6vYcR8f/Iah+/SZXfuzILSJ16\nkBWkWkX5uWRF+cW97lebvl4IXJSWfwX4EbCcrJB2f7QvpM0FrubMQtrTZP8RlD5Mt6X4PZwupA3R\n4aJ8ep1B4IXcesfz4czCYGv5ki7kNpBb/iPgu/2aW3qtrcC3x8Xq9P5NlF/fv4fA+0iFcOC9wH8n\nq5dW9r3r6C+hkj80t5GdwTECrOt1fybp59VkA95+4IVWX4HLgCdSDnvybw6wPr35h4HlufhH0nOM\nAH+Vi88DdqT408Bgh3P6LvBTsi/A+ifgD9KHrOP5AKtT/Aiwqku5bQUOpvfxe2Rz1n2XW3qNm8i+\naKz1mXw+/V/qyuexC+9fu/z6/j0Ebkj57Ad+DHw1xSv73vnCRjMzK0W/1FDMzKziPKCYmVkpPKCY\nmVkpPKCYmVkpPKCYmVkpPKCYmVkpPKCYmVkpPKCYmVkp/j/UFBBz/D9ECQAAAABJRU5ErkJggg==\n",
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"df['salary'].hist(bins=50)"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"N.Y.C. TRANSIT AUTHORITY 56932\n",
"POLICE DEPARTMENT 51453\n",
"FIRE DEPARTMENT 15848\n",
"DEPARTMENT OF CITYWIDE ADM 15739\n",
"HRA/DEPARTMENT OF SOCIAL S 13676\n",
"DEPARTMENT OF EDUCATION 11689\n",
"N.Y.C. HOUSING AUTHORITY 11501\n",
"DEPARTMENT OF CORRECTION 10188\n",
"DEPARTMENT OF SANITATION 9666\n",
"DEPARTMENT OF PARKS & RECR 7261\n",
"ADMINISTRATION FOR CHILDRE 6033\n",
"DEPARTMENT OF ENVIRONMENTA 5717\n",
"DEPARTMENT OF HEALTH AND M 5676\n",
"DEPARTMENT OF TRANSPORTATI 4589\n",
"CUNY LAGUARDIA COMMUNITY C 3379\n",
"CUNY KINGSBOROUGH COMMMUNI 3260\n",
"CUNY MANHATTAN COMMUNITY C 2974\n",
"TRIBOROUGH BRIDGE AND TUNN 2254\n",
"HOUSING PRESERVATION & DEV 2078\n",
"CUNY QUEENSBOROUGH COMMUNI 2030\n",
"DEPARTMENT OF HOMELESS SER 1966\n",
"DEPARTMENT OF FINANCE 1953\n",
"CUNY BRONX COMMUNITY COLLE 1923\n",
"LAW DEPARTMENT 1514\n",
"DISTRICT ATTORNEY-NEW YORK 1384\n",
"CUNY HOSTOS COMMUNITY COLL 1295\n",
"DEPARTMENT OF DESIGN AND C 1193\n",
"DEPARTMENT OF INFORMATION 1162\n",
"DEPARTMENT OF BUILDINGS 1111\n",
"DISTRICT ATTORNEY-KINGS CO 1062\n",
" ... \n",
"COMMUNITY BOARD NO.12-BROO 3\n",
"COMMUNITY BOARD NO.4-QUEEN 3\n",
"COMMUNITY BOARD NO.1-BROOK 3\n",
"COMMUNITY BOARD NO.13-QUEE 3\n",
"COMMUNITY BOARD NO.12-BRON 3\n",
"COMMUNITY BOARD NO.11-QUEE 3\n",
"COMMUNITY BOARD NO.2-BROOK 3\n",
"COMMUNITY BOARD NO.14-QUEE 3\n",
"COMMUNITY BOARD NO.7-MANHA 3\n",
"COMMUNITY BOARD N0.9-MANHA 3\n",
"COMMUNITY BOARD NO.6-BROOK 3\n",
"COMMUNITY BOARD NO.3-BROOK 3\n",
"COMMUNITY BOARD NO.8-BRONX 3\n",
"COMMUNITY BOARD NO.11-BROO 3\n",
"COMMUNITY BOARD NO.4-MANHA 3\n",
"COMMUNITY BOARD NO.8-BROOK 3\n",
"COMMUNITY BOARD NO.12-QUEE 3\n",
"COMMUNITY BOARD NO.10-BROO 3\n",
"COMMUNITY BOARD NO.1-RICHM 3\n",
"COMMUNITY BOARD NO.14-BROO 3\n",
"COMMUNITY BOARD NO.6-BRONX 2\n",
"COMMUNITY BOARD NO.9-QUEEN 2\n",
"COMMUNITY BOARD NO.15-BROO 2\n",
"COMMUNITY BOARD NO.1 BRONX 2\n",
"CUNY MEDGAR EVERS COLLEGE 2\n",
"COMMUNITY BOARD NO.9-BROOK 2\n",
"COMMUNITY BOARD NO.3-BRONX 2\n",
"COMMUNITY BOARD NO.6-MANHA 2\n",
"COMMUNITY BOARD NO.16-BROO 2\n",
"COMMUNITY BOARD NO.5-BRONX 2\n",
"Name: ADDRESS, dtype: int64"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['ADDRESS'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" DPT | \n",
" NAME | \n",
" ADDRESS | \n",
" TTL # | \n",
" PC | \n",
" SAL-RATE | \n",
" salary | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 868 | \n",
" B J SANDIFORD | \n",
" DEPARTMENT OF CITYWIDE ADM | \n",
" 12702 | \n",
" X | \n",
" $5.00 | \n",
" 5.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 868 | \n",
" C A WIGFALL | \n",
" DEPARTMENT OF CITYWIDE ADM | \n",
" 12702 | \n",
" X | \n",
" $5.00 | \n",
" 5.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 69 | \n",
" A E A-AWOSOGBA | \n",
" HRA/DEPARTMENT OF SOCIAL S | \n",
" 52311 | \n",
" A | \n",
" $51955.00 | \n",
" 51955.0 | \n",
"
\n",
" \n",
" 3 | \n",
" 868 | \n",
" K D AABY | \n",
" DEPARTMENT OF CITYWIDE ADM | \n",
" 10209 | \n",
" X | \n",
" $12.00 | \n",
" 12.0 | \n",
"
\n",
" \n",
" 4 | \n",
" 56 | \n",
" I D AADIL | \n",
" POLICE DEPARTMENT | \n",
" 71012 | \n",
" A | \n",
" $46953.00 | \n",
" 46953.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" DPT NAME ADDRESS TTL # PC SAL-RATE \\\n",
"0 868 B J SANDIFORD DEPARTMENT OF CITYWIDE ADM 12702 X $5.00 \n",
"1 868 C A WIGFALL DEPARTMENT OF CITYWIDE ADM 12702 X $5.00 \n",
"2 69 A E A-AWOSOGBA HRA/DEPARTMENT OF SOCIAL S 52311 A $51955.00 \n",
"3 868 K D AABY DEPARTMENT OF CITYWIDE ADM 10209 X $12.00 \n",
"4 56 I D AADIL POLICE DEPARTMENT 71012 A $46953.00 \n",
"\n",
" salary \n",
"0 5.0 \n",
"1 5.0 \n",
"2 51955.0 \n",
"3 12.0 \n",
"4 46953.0 "
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" code | \n",
" agency | \n",
" is_mayoral | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 002 | \n",
" OFFICE OF THE MAYOR | \n",
" N | \n",
"
\n",
" \n",
" 1 | \n",
" 003 | \n",
" BOARD OF ELECTIONS | \n",
" Y | \n",
"
\n",
" \n",
" 2 | \n",
" 004 | \n",
" CAMPAIGN FINANCE BOARD | \n",
" Y | \n",
"
\n",
" \n",
" 3 | \n",
" 005 | \n",
" BOARD OF ESTIMATE-SECRETARY | \n",
" Y | \n",
"
\n",
" \n",
" 4 | \n",
" 006 | \n",
" MAYOR'S OFFICE OF OPERATIONS | \n",
" N | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" code agency is_mayoral\n",
"0 002 OFFICE OF THE MAYOR N\n",
"1 003 BOARD OF ELECTIONS Y\n",
"2 004 CAMPAIGN FINANCE BOARD Y\n",
"3 005 BOARD OF ESTIMATE-SECRETARY Y\n",
"4 006 MAYOR'S OFFICE OF OPERATIONS N"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"agencies_df = pd.read_csv(\"cleaned-agencies.csv\", dtype='str')\n",
"agencies_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" code | \n",
" agency | \n",
" is_mayoral | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 002 | \n",
" OFFICE OF THE MAYOR | \n",
" N | \n",
"
\n",
" \n",
" 1 | \n",
" 003 | \n",
" BOARD OF ELECTIONS | \n",
" Y | \n",
"
\n",
" \n",
" 2 | \n",
" 004 | \n",
" CAMPAIGN FINANCE BOARD | \n",
" Y | \n",
"
\n",
" \n",
" 3 | \n",
" 005 | \n",
" BOARD OF ESTIMATE-SECRETARY | \n",
" Y | \n",
"
\n",
" \n",
" 4 | \n",
" 006 | \n",
" MAYOR'S OFFICE OF OPERATIONS | \n",
" N | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" code agency is_mayoral\n",
"0 002 OFFICE OF THE MAYOR N\n",
"1 003 BOARD OF ELECTIONS Y\n",
"2 004 CAMPAIGN FINANCE BOARD Y\n",
"3 005 BOARD OF ESTIMATE-SECRETARY Y\n",
"4 006 MAYOR'S OFFICE OF OPERATIONS N"
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"col_types = { 'code': 'str' }\n",
"agencies_df = pd.read_csv(\"cleaned-agencies.csv\", dtype=col_types)\n",
"agencies_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" code | \n",
" agency | \n",
" is_mayoral | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" -999 | \n",
" THIS IS FAKE | \n",
" False | \n",
"
\n",
" \n",
" 1 | \n",
" -999 | \n",
" THIS IS ALSO FAKE | \n",
" True | \n",
"
\n",
" \n",
" 2 | \n",
" -999 | \n",
" THESE ARE ALL FAKE | \n",
" False | \n",
"
\n",
" \n",
" 3 | \n",
" 002 | \n",
" OFFICE OF THE MAYOR | \n",
" False | \n",
"
\n",
" \n",
" 4 | \n",
" 003 | \n",
" BOARD OF ELECTIONS | \n",
" True | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" code agency is_mayoral\n",
"0 -999 THIS IS FAKE False\n",
"1 -999 THIS IS ALSO FAKE True\n",
"2 -999 THESE ARE ALL FAKE False\n",
"3 002 OFFICE OF THE MAYOR False\n",
"4 003 BOARD OF ELECTIONS True"
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"col_types = { 'code': 'str' }\n",
"agencies_df = pd.read_csv(\"cleaned-agencies.csv\", dtype=col_types, true_values='Y', false_values='N')\n",
"agencies_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" code | \n",
" agency | \n",
" is_mayoral | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" NaN | \n",
" THIS IS FAKE | \n",
" False | \n",
"
\n",
" \n",
" 1 | \n",
" NaN | \n",
" THIS IS ALSO FAKE | \n",
" True | \n",
"
\n",
" \n",
" 2 | \n",
" NaN | \n",
" THESE ARE ALL FAKE | \n",
" False | \n",
"
\n",
" \n",
" 3 | \n",
" 002 | \n",
" OFFICE OF THE MAYOR | \n",
" False | \n",
"
\n",
" \n",
" 4 | \n",
" 003 | \n",
" BOARD OF ELECTIONS | \n",
" True | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" code agency is_mayoral\n",
"0 NaN THIS IS FAKE False\n",
"1 NaN THIS IS ALSO FAKE True\n",
"2 NaN THESE ARE ALL FAKE False\n",
"3 002 OFFICE OF THE MAYOR False\n",
"4 003 BOARD OF ELECTIONS True"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"col_types = { 'code': 'str' }\n",
"agencies_df = pd.read_csv(\"cleaned-agencies.csv\", na_values=[\"-999\"], dtype=col_types, true_values='Y', false_values='N')\n",
"agencies_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" code | \n",
" agency | \n",
" is_mayoral | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" -999 | \n",
" THIS IS FAKE | \n",
" N | \n",
"
\n",
" \n",
" 1 | \n",
" -999 | \n",
" THIS IS ALSO FAKE | \n",
" Y | \n",
"
\n",
" \n",
" 2 | \n",
" -999 | \n",
" THESE ARE ALL FAKE | \n",
" N | \n",
"
\n",
" \n",
" 3 | \n",
" 2 | \n",
" OFFICE OF THE MAYOR | \n",
" N | \n",
"
\n",
" \n",
" 4 | \n",
" 3 | \n",
" BOARD OF ELECTIONS | \n",
" Y | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" code agency is_mayoral\n",
"0 -999 THIS IS FAKE N\n",
"1 -999 THIS IS ALSO FAKE Y\n",
"2 -999 THESE ARE ALL FAKE N\n",
"3 2 OFFICE OF THE MAYOR N\n",
"4 3 BOARD OF ELECTIONS Y"
]
},
"execution_count": 71,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"agencies_df = pd.read_csv(\"cleaned-agencies.csv\")\n",
"agencies_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" DPT | \n",
" NAME | \n",
" ADDRESS | \n",
" TTL | \n",
" PC | \n",
" SAL-RATE | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 868 | \n",
" B J SANDIFORD | \n",
" DEPARTMENT OF CITYWIDE ADM | \n",
" 12702 | \n",
" X | \n",
" $5.00 | \n",
"
\n",
" \n",
" 1 | \n",
" 868 | \n",
" C A WIGFALL | \n",
" DEPARTMENT OF CITYWIDE ADM | \n",
" 12702 | \n",
" X | \n",
" $5.00 | \n",
"
\n",
" \n",
" 2 | \n",
" 69 | \n",
" A E A-AWOSOGBA | \n",
" HRA/DEPARTMENT OF SOCIAL S | \n",
" 52311 | \n",
" A | \n",
" $51955.00 | \n",
"
\n",
" \n",
" 3 | \n",
" 868 | \n",
" K D AABY | \n",
" DEPARTMENT OF CITYWIDE ADM | \n",
" 10209 | \n",
" X | \n",
" $12.00 | \n",
"
\n",
" \n",
" 4 | \n",
" 56 | \n",
" I D AADIL | \n",
" POLICE DEPARTMENT | \n",
" 71012 | \n",
" A | \n",
" $46953.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" DPT NAME ADDRESS TTL PC SAL-RATE\n",
"0 868 B J SANDIFORD DEPARTMENT OF CITYWIDE ADM 12702 X $5.00\n",
"1 868 C A WIGFALL DEPARTMENT OF CITYWIDE ADM 12702 X $5.00\n",
"2 69 A E A-AWOSOGBA HRA/DEPARTMENT OF SOCIAL S 52311 A $51955.00\n",
"3 868 K D AABY DEPARTMENT OF CITYWIDE ADM 10209 X $12.00\n",
"4 56 I D AADIL POLICE DEPARTMENT 71012 A $46953.00"
]
},
"execution_count": 76,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" DPT | \n",
" NAME | \n",
" ADDRESS | \n",
" TTL | \n",
" PC | \n",
" SAL-RATE | \n",
" code | \n",
" agency | \n",
" is_mayoral | \n",
"
\n",
" \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [DPT, NAME, ADDRESS, TTL, PC, SAL-RATE, code, agency, is_mayoral]\n",
"Index: []"
]
},
"execution_count": 77,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.merge(agencies_df, left_on='DPT', right_on='code')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.4.2"
}
},
"nbformat": 4,
"nbformat_minor": 0
}