{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## What bad columns looks like\n",
    "\n",
    "Sometimes columns have extra spaces or are just plain odd, even if they look normal."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>DPT</th>\n",
       "      <th>NAME</th>\n",
       "      <th>ADDRESS</th>\n",
       "      <th>TTL #</th>\n",
       "      <th>PC</th>\n",
       "      <th>SAL-RATE</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>868</td>\n",
       "      <td>B J  SANDIFORD</td>\n",
       "      <td>DEPARTMENT OF CITYWIDE ADM</td>\n",
       "      <td>12702</td>\n",
       "      <td>X</td>\n",
       "      <td>$5.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>868</td>\n",
       "      <td>C A  WIGFALL</td>\n",
       "      <td>DEPARTMENT OF CITYWIDE ADM</td>\n",
       "      <td>12702</td>\n",
       "      <td>X</td>\n",
       "      <td>$5.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>69</td>\n",
       "      <td>A E A-AWOSOGBA</td>\n",
       "      <td>HRA/DEPARTMENT OF SOCIAL S</td>\n",
       "      <td>52311</td>\n",
       "      <td>A</td>\n",
       "      <td>$51955.00</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   DPT             NAME                        ADDRESS  TTL #    PC        \\\n",
       "0       868  B J  SANDIFORD  DEPARTMENT OF CITYWIDE ADM    12702        X   \n",
       "1       868    C A  WIGFALL  DEPARTMENT OF CITYWIDE ADM    12702        X   \n",
       "2        69  A E A-AWOSOGBA  HRA/DEPARTMENT OF SOCIAL S    52311        A   \n",
       "\n",
       "    SAL-RATE  \n",
       "0      $5.00  \n",
       "1      $5.00  \n",
       "2  $51955.00  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv(\"Civil_List_2014.csv\").head(3)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['DPT     ', 'NAME    ', 'ADDRESS ', 'TTL #   ', 'PC      ', 'SAL-RATE'], dtype='object')"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.columns"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Overwriting columns\n",
    "\n",
    "In order to fix them, you have a few options. Once thing you can do is just *overwrite them* with new ones."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>DPT</th>\n",
       "      <th>NAME</th>\n",
       "      <th>ADDRESS</th>\n",
       "      <th>TTL #</th>\n",
       "      <th>PC</th>\n",
       "      <th>SAL-RATE</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>868</td>\n",
       "      <td>B J  SANDIFORD</td>\n",
       "      <td>DEPARTMENT OF CITYWIDE ADM</td>\n",
       "      <td>12702</td>\n",
       "      <td>X</td>\n",
       "      <td>$5.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>868</td>\n",
       "      <td>C A  WIGFALL</td>\n",
       "      <td>DEPARTMENT OF CITYWIDE ADM</td>\n",
       "      <td>12702</td>\n",
       "      <td>X</td>\n",
       "      <td>$5.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>69</td>\n",
       "      <td>A E A-AWOSOGBA</td>\n",
       "      <td>HRA/DEPARTMENT OF SOCIAL S</td>\n",
       "      <td>52311</td>\n",
       "      <td>A</td>\n",
       "      <td>$51955.00</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   DPT             NAME                        ADDRESS  TTL #    PC        \\\n",
       "0       868  B J  SANDIFORD  DEPARTMENT OF CITYWIDE ADM    12702        X   \n",
       "1       868    C A  WIGFALL  DEPARTMENT OF CITYWIDE ADM    12702        X   \n",
       "2        69  A E A-AWOSOGBA  HRA/DEPARTMENT OF SOCIAL S    52311        A   \n",
       "\n",
       "    SAL-RATE  \n",
       "0      $5.00  \n",
       "1      $5.00  \n",
       "2  $51955.00  "
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv(\"Civil_List_2014.csv\").head(3)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['DPT     ', 'NAME    ', 'ADDRESS ', 'TTL #   ', 'PC      ', 'SAL-RATE'], dtype='object')"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.columns"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "In this case it might make sense to use a list comprehension to strip all of the extra spaces."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df.columns = [col.strip() for col in df.columns]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['DPT', 'NAME', 'ADDRESS', 'TTL #', 'PC', 'SAL-RATE'], dtype='object')"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.columns"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Mass renaming\n",
    "\n",
    "You can also just pass in a new list of columns if you don't like what they come in as."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>DPT</th>\n",
       "      <th>NAME</th>\n",
       "      <th>ADDRESS</th>\n",
       "      <th>TTL #</th>\n",
       "      <th>PC</th>\n",
       "      <th>SAL-RATE</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>868</td>\n",
       "      <td>B J  SANDIFORD</td>\n",
       "      <td>DEPARTMENT OF CITYWIDE ADM</td>\n",
       "      <td>12702</td>\n",
       "      <td>X</td>\n",
       "      <td>$5.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>868</td>\n",
       "      <td>C A  WIGFALL</td>\n",
       "      <td>DEPARTMENT OF CITYWIDE ADM</td>\n",
       "      <td>12702</td>\n",
       "      <td>X</td>\n",
       "      <td>$5.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>69</td>\n",
       "      <td>A E A-AWOSOGBA</td>\n",
       "      <td>HRA/DEPARTMENT OF SOCIAL S</td>\n",
       "      <td>52311</td>\n",
       "      <td>A</td>\n",
       "      <td>$51955.00</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   DPT             NAME                        ADDRESS  TTL #    PC        \\\n",
       "0       868  B J  SANDIFORD  DEPARTMENT OF CITYWIDE ADM    12702        X   \n",
       "1       868    C A  WIGFALL  DEPARTMENT OF CITYWIDE ADM    12702        X   \n",
       "2        69  A E A-AWOSOGBA  HRA/DEPARTMENT OF SOCIAL S    52311        A   \n",
       "\n",
       "    SAL-RATE  \n",
       "0      $5.00  \n",
       "1      $5.00  \n",
       "2  $51955.00  "
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv(\"Civil_List_2014.csv\").head(3)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Rename all of the columns, keeping them in order\n",
    "df.columns = ['Department', 'Name', 'Address', 'Title', 'Pay Class', 'Salary Rate']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Department</th>\n",
       "      <th>Name</th>\n",
       "      <th>Address</th>\n",
       "      <th>Title</th>\n",
       "      <th>Pay Class</th>\n",
       "      <th>Salary Rate</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>868</td>\n",
       "      <td>B J  SANDIFORD</td>\n",
       "      <td>DEPARTMENT OF CITYWIDE ADM</td>\n",
       "      <td>12702</td>\n",
       "      <td>X</td>\n",
       "      <td>$5.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>868</td>\n",
       "      <td>C A  WIGFALL</td>\n",
       "      <td>DEPARTMENT OF CITYWIDE ADM</td>\n",
       "      <td>12702</td>\n",
       "      <td>X</td>\n",
       "      <td>$5.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>69</td>\n",
       "      <td>A E A-AWOSOGBA</td>\n",
       "      <td>HRA/DEPARTMENT OF SOCIAL S</td>\n",
       "      <td>52311</td>\n",
       "      <td>A</td>\n",
       "      <td>$51955.00</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Department            Name                     Address  Title Pay Class  \\\n",
       "0         868  B J  SANDIFORD  DEPARTMENT OF CITYWIDE ADM  12702         X   \n",
       "1         868    C A  WIGFALL  DEPARTMENT OF CITYWIDE ADM  12702         X   \n",
       "2          69  A E A-AWOSOGBA  HRA/DEPARTMENT OF SOCIAL S  52311         A   \n",
       "\n",
       "  Salary Rate  \n",
       "0       $5.00  \n",
       "1       $5.00  \n",
       "2   $51955.00  "
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Rename on import\n",
    "\n",
    "You also set their names when you're reading in the csv."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/IPython/core/interactiveshell.py:2723: DtypeWarning: Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.\n",
      "  interactivity=interactivity, compiler=compiler, result=result)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Department</th>\n",
       "      <th>Name</th>\n",
       "      <th>Address</th>\n",
       "      <th>Title</th>\n",
       "      <th>Pay Class</th>\n",
       "      <th>Salary Rate</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>DPT</td>\n",
       "      <td>NAME</td>\n",
       "      <td>ADDRESS</td>\n",
       "      <td>TTL #</td>\n",
       "      <td>PC</td>\n",
       "      <td>SAL-RATE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>868</td>\n",
       "      <td>B J  SANDIFORD</td>\n",
       "      <td>DEPARTMENT OF CITYWIDE ADM</td>\n",
       "      <td>12702</td>\n",
       "      <td>X</td>\n",
       "      <td>$5.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>868</td>\n",
       "      <td>C A  WIGFALL</td>\n",
       "      <td>DEPARTMENT OF CITYWIDE ADM</td>\n",
       "      <td>12702</td>\n",
       "      <td>X</td>\n",
       "      <td>$5.00</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Department            Name                     Address     Title Pay Class  \\\n",
       "0   DPT             NAME                        ADDRESS   TTL #     PC         \n",
       "1        868  B J  SANDIFORD  DEPARTMENT OF CITYWIDE ADM     12702         X   \n",
       "2        868    C A  WIGFALL  DEPARTMENT OF CITYWIDE ADM     12702         X   \n",
       "\n",
       "  Salary Rate  \n",
       "0    SAL-RATE  \n",
       "1       $5.00  \n",
       "2       $5.00  "
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Read in the csv, passing names= to set the column names\n",
    "df = pd.read_csv(\"Civil_List_2014.csv\", names=[\"Department\", \"Name\", \"Address\", \"Title\", \"Pay Class\", \"Salary Rate\"]).head(3)\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Unfortunately this leaves you with the first row of *actual* headers inside of your data. When usings `names=` in `read_csv`, add `skiprows=1` to skip the first row (the header row)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Department</th>\n",
       "      <th>Name</th>\n",
       "      <th>Address</th>\n",
       "      <th>Title</th>\n",
       "      <th>Pay Class</th>\n",
       "      <th>Salary Rate</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>868</td>\n",
       "      <td>B J  SANDIFORD</td>\n",
       "      <td>DEPARTMENT OF CITYWIDE ADM</td>\n",
       "      <td>12702</td>\n",
       "      <td>X</td>\n",
       "      <td>$5.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>868</td>\n",
       "      <td>C A  WIGFALL</td>\n",
       "      <td>DEPARTMENT OF CITYWIDE ADM</td>\n",
       "      <td>12702</td>\n",
       "      <td>X</td>\n",
       "      <td>$5.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>69</td>\n",
       "      <td>A E A-AWOSOGBA</td>\n",
       "      <td>HRA/DEPARTMENT OF SOCIAL S</td>\n",
       "      <td>52311</td>\n",
       "      <td>A</td>\n",
       "      <td>$51955.00</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Department            Name                     Address  Title Pay Class  \\\n",
       "0         868  B J  SANDIFORD  DEPARTMENT OF CITYWIDE ADM  12702         X   \n",
       "1         868    C A  WIGFALL  DEPARTMENT OF CITYWIDE ADM  12702         X   \n",
       "2          69  A E A-AWOSOGBA  HRA/DEPARTMENT OF SOCIAL S  52311         A   \n",
       "\n",
       "  Salary Rate  \n",
       "0       $5.00  \n",
       "1       $5.00  \n",
       "2   $51955.00  "
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Pass in names=, but also skiprows=1\n",
    "df = pd.read_csv(\"Civil_List_2014.csv\", skiprows=1, names=[\"Department\", \"Name\", \"Address\", \"Title\", \"Pay Class\", \"Salary Rate\"]).head(3)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.4.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}