{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Use `.apply` to send a column of every row to a function\n",
"\n",
"You can use `.apply` to send a single column to a function. This is useful when cleaning up data - converting formats, altering values etc."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" DPT | \n",
" NAME | \n",
" ADDRESS | \n",
" TTL # | \n",
" PC | \n",
" SAL-RATE | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 868 | \n",
" B J SANDIFORD | \n",
" DEPARTMENT OF CITYWIDE ADM | \n",
" 12702 | \n",
" X | \n",
" $5.00 | \n",
"
\n",
" \n",
" 1 | \n",
" 868 | \n",
" C A WIGFALL | \n",
" DEPARTMENT OF CITYWIDE ADM | \n",
" 12702 | \n",
" X | \n",
" $5.00 | \n",
"
\n",
" \n",
" 2 | \n",
" 69 | \n",
" A E A-AWOSOGBA | \n",
" HRA/DEPARTMENT OF SOCIAL S | \n",
" 52311 | \n",
" A | \n",
" $51955.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" DPT NAME ADDRESS TTL # PC \\\n",
"0 868 B J SANDIFORD DEPARTMENT OF CITYWIDE ADM 12702 X \n",
"1 868 C A WIGFALL DEPARTMENT OF CITYWIDE ADM 12702 X \n",
"2 69 A E A-AWOSOGBA HRA/DEPARTMENT OF SOCIAL S 52311 A \n",
"\n",
" SAL-RATE \n",
"0 $5.00 \n",
"1 $5.00 \n",
"2 $51955.00 "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# What's our data look like?\n",
"df = pd.read_csv(\"Civil_List_2014.csv\").head(3)\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 5.0\n",
"1 5.0\n",
"2 51955.0\n",
"Name: SAL-RATE, dtype: float64"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Get rid of $ and , in the SAL-RATE, then convert it to a float\n",
"def money_to_float(money_str):\n",
" return float(money_str.replace(\"$\",\"\").replace(\",\",\"\"))\n",
"\n",
"df['SAL-RATE'].apply(money_to_float)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# Save the result in a new column\n",
"df['salary'] = df['SAL-RATE'].apply(money_to_float)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" DPT | \n",
" NAME | \n",
" ADDRESS | \n",
" TTL # | \n",
" PC | \n",
" SAL-RATE | \n",
" salary | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 868 | \n",
" B J SANDIFORD | \n",
" DEPARTMENT OF CITYWIDE ADM | \n",
" 12702 | \n",
" X | \n",
" $5.00 | \n",
" 5.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 868 | \n",
" C A WIGFALL | \n",
" DEPARTMENT OF CITYWIDE ADM | \n",
" 12702 | \n",
" X | \n",
" $5.00 | \n",
" 5.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 69 | \n",
" A E A-AWOSOGBA | \n",
" HRA/DEPARTMENT OF SOCIAL S | \n",
" 52311 | \n",
" A | \n",
" $51955.00 | \n",
" 51955.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" DPT NAME ADDRESS TTL # PC \\\n",
"0 868 B J SANDIFORD DEPARTMENT OF CITYWIDE ADM 12702 X \n",
"1 868 C A WIGFALL DEPARTMENT OF CITYWIDE ADM 12702 X \n",
"2 69 A E A-AWOSOGBA HRA/DEPARTMENT OF SOCIAL S 52311 A \n",
"\n",
" SAL-RATE salary \n",
"0 $5.00 5.0 \n",
"1 $5.00 5.0 \n",
"2 $51955.00 51955.0 "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Take a peek\n",
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Use `.apply` with `axis=1` to send every single row to a function\n",
"\n",
"You can also send an **entire row at a time** instead of just a single column. Use this if you need to use **multiple columns to get a result**."
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" height | \n",
" width | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 40.0 | \n",
" 10 | \n",
"
\n",
" \n",
" 1 | \n",
" 20.0 | \n",
" 9 | \n",
"
\n",
" \n",
" 2 | \n",
" 3.4 | \n",
" 4 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" height width\n",
"0 40.0 10\n",
"1 20.0 9\n",
"2 3.4 4"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Create a dataframe from a list of dictionaries\n",
"rectangles = [\n",
" { 'height': 40, 'width': 10 },\n",
" { 'height': 20, 'width': 9 },\n",
" { 'height': 3.4, 'width': 4 }\n",
"]\n",
"\n",
"rectangles_df = pd.DataFrame(rectangles)\n",
"rectangles_df"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 400.0\n",
"1 180.0\n",
"2 13.6\n",
"dtype: float64"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Use the height and width to calculate the area\n",
"def calculate_area(row):\n",
" return row['height'] * row['width']\n",
"\n",
"rectangles_df.apply(calculate_area, axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" height | \n",
" width | \n",
" area | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 40.0 | \n",
" 10 | \n",
" 400.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 20.0 | \n",
" 9 | \n",
" 180.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 3.4 | \n",
" 4 | \n",
" 13.6 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" height width area\n",
"0 40.0 10 400.0\n",
"1 20.0 9 180.0\n",
"2 3.4 4 13.6"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Use .apply to save the new column if we'd like\n",
"rectangles_df['area'] = rectangles_df.apply(calculate_area, axis=1)\n",
"rectangles_df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
}
},
"nbformat": 4,
"nbformat_minor": 1
}