{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Use `.apply` to send a column of every row to a function\n", "\n", "You can use `.apply` to send a single column to a function. This is useful when cleaning up data - converting formats, altering values etc." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
DPTNAMEADDRESSTTL #PCSAL-RATE
0868B J SANDIFORDDEPARTMENT OF CITYWIDE ADM12702X$5.00
1868C A WIGFALLDEPARTMENT OF CITYWIDE ADM12702X$5.00
269A E A-AWOSOGBAHRA/DEPARTMENT OF SOCIAL S52311A$51955.00
\n", "
" ], "text/plain": [ " DPT NAME ADDRESS TTL # PC \\\n", "0 868 B J SANDIFORD DEPARTMENT OF CITYWIDE ADM 12702 X \n", "1 868 C A WIGFALL DEPARTMENT OF CITYWIDE ADM 12702 X \n", "2 69 A E A-AWOSOGBA HRA/DEPARTMENT OF SOCIAL S 52311 A \n", "\n", " SAL-RATE \n", "0 $5.00 \n", "1 $5.00 \n", "2 $51955.00 " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# What's our data look like?\n", "df = pd.read_csv(\"Civil_List_2014.csv\").head(3)\n", "df" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 5.0\n", "1 5.0\n", "2 51955.0\n", "Name: SAL-RATE, dtype: float64" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Get rid of $ and , in the SAL-RATE, then convert it to a float\n", "def money_to_float(money_str):\n", " return float(money_str.replace(\"$\",\"\").replace(\",\",\"\"))\n", "\n", "df['SAL-RATE'].apply(money_to_float)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# Save the result in a new column\n", "df['salary'] = df['SAL-RATE'].apply(money_to_float)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
DPTNAMEADDRESSTTL #PCSAL-RATEsalary
0868B J SANDIFORDDEPARTMENT OF CITYWIDE ADM12702X$5.005.0
1868C A WIGFALLDEPARTMENT OF CITYWIDE ADM12702X$5.005.0
269A E A-AWOSOGBAHRA/DEPARTMENT OF SOCIAL S52311A$51955.0051955.0
\n", "
" ], "text/plain": [ " DPT NAME ADDRESS TTL # PC \\\n", "0 868 B J SANDIFORD DEPARTMENT OF CITYWIDE ADM 12702 X \n", "1 868 C A WIGFALL DEPARTMENT OF CITYWIDE ADM 12702 X \n", "2 69 A E A-AWOSOGBA HRA/DEPARTMENT OF SOCIAL S 52311 A \n", "\n", " SAL-RATE salary \n", "0 $5.00 5.0 \n", "1 $5.00 5.0 \n", "2 $51955.00 51955.0 " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Take a peek\n", "df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Use `.apply` with `axis=1` to send every single row to a function\n", "\n", "You can also send an **entire row at a time** instead of just a single column. Use this if you need to use **multiple columns to get a result**." ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
heightwidth
040.010
120.09
23.44
\n", "
" ], "text/plain": [ " height width\n", "0 40.0 10\n", "1 20.0 9\n", "2 3.4 4" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Create a dataframe from a list of dictionaries\n", "rectangles = [\n", " { 'height': 40, 'width': 10 },\n", " { 'height': 20, 'width': 9 },\n", " { 'height': 3.4, 'width': 4 }\n", "]\n", "\n", "rectangles_df = pd.DataFrame(rectangles)\n", "rectangles_df" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 400.0\n", "1 180.0\n", "2 13.6\n", "dtype: float64" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Use the height and width to calculate the area\n", "def calculate_area(row):\n", " return row['height'] * row['width']\n", "\n", "rectangles_df.apply(calculate_area, axis=1)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
heightwidtharea
040.010400.0
120.09180.0
23.4413.6
\n", "
" ], "text/plain": [ " height width area\n", "0 40.0 10 400.0\n", "1 20.0 9 180.0\n", "2 3.4 4 13.6" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Use .apply to save the new column if we'd like\n", "rectangles_df['area'] = rectangles_df.apply(calculate_area, axis=1)\n", "rectangles_df" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.4" } }, "nbformat": 4, "nbformat_minor": 1 }