Debugging Classwork

import pandas as pd
import requests
import dateutil.parser
import datetime

Walkthrough #1

mytime = "0926P"

dateutil.parser.parse(mytime)

---------------------------------------------------------------------------

ValueError                                Traceback (most recent call last)

<ipython-input-5-4234f0872e0f> in <module>()
----> 1 dateutil.parser.parse(mytime)


/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/dateutil/parser.py in parse(timestr, parserinfo, **kwargs)
   1162         return parser(parserinfo).parse(timestr, **kwargs)
   1163     else:
-> 1164         return DEFAULTPARSER.parse(timestr, **kwargs)
   1165 
   1166 


/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/dateutil/parser.py in parse(self, timestr, default, ignoretz, tzinfos, **kwargs)
    575                 repl['day'] = monthrange(cyear, cmonth)[1]
    576 
--> 577         ret = default.replace(**repl)
    578 
    579         if res.weekday is not None and not res.day:


ValueError: hour must be in 0..23

dateutil.parser.parse(mytime + "M")

---------------------------------------------------------------------------

ValueError                                Traceback (most recent call last)

<ipython-input-6-dc0a828bca5e> in <module>()
----> 1 dateutil.parser.parse(mytime + "M")


/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/dateutil/parser.py in parse(timestr, parserinfo, **kwargs)
   1162         return parser(parserinfo).parse(timestr, **kwargs)
   1163     else:
-> 1164         return DEFAULTPARSER.parse(timestr, **kwargs)
   1165 
   1166 


/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/dateutil/parser.py in parse(self, timestr, default, ignoretz, tzinfos, **kwargs)
    575                 repl['day'] = monthrange(cyear, cmonth)[1]
    576 
--> 577         ret = default.replace(**repl)
    578 
    579         if res.weekday is not None and not res.day:


ValueError: hour must be in 0..23

newtime = "../09BLAHBLAH26PM"
#dateutil.parser.parse(newtime)
datetime.datetime.strptime(newtime,"%IBLAHBLAH%M%p")

datetime.datetime(1900, 1, 1, 21, 26)

def convert_to_time(str_time):
    with_pm = str_time + "M"
    return datetime.datetime.strptime(with_pm,"%I%M%p")

timestring = "0929P"
convert_to_time(timestring)

datetime.datetime(1900, 1, 1, 21, 29)

convert_to_time("1201A")

datetime.datetime(1900, 1, 1, 0, 1)

Walkthrough #2

# Read in the dataframe
times_df = pd.read_csv("times-and-serials.csv")
times_df.head()

	Owner	Time	Serial Number
0	Daniella	8:35 AM	7754
1	Carleen	7:46 AM	6881
2	Daron	1:35 AM	4509
3	Cherly	1:35 AM	2310
4	Manda	1:35 AM	4362

# Try out a way of parsing one of the dates
datetime.datetime.strptime("8:35 AM", "%I:%M %p")

datetime.datetime(1900, 1, 1, 8, 35)

import numpy as np

# Build a function using that method
def time_to_datetime(str_time):
    try:
        #print("Trying to convert", str_time, "into a time")
        if str_time == '-999':
            #print("It's -999")
            return np.nan
        #print("It's not -999")
        return datetime.datetime.strptime(str_time.strip(), "%I:%M %p")
    except:
        return np.nan

# Apply that method to the 'Time' column of the dataframe
times_df['Time'].apply(time_to_datetime)

0    1900-01-01 08:35:00
1    1900-01-01 07:46:00
2    1900-01-01 01:35:00
3    1900-01-01 01:35:00
4    1900-01-01 01:35:00
5    1900-01-01 12:57:00
6    1900-01-01 03:49:00
7                    NaT
8    1900-01-01 00:36:00
9    1900-01-01 17:19:00
10   1900-01-01 21:20:00
11   1900-01-01 09:32:00
12   1900-01-01 17:19:00
13   1900-01-01 09:57:00
14   1900-01-01 21:33:00
15                   NaT
16   1900-01-01 20:32:00
17   1900-01-01 21:19:00
18   1900-01-01 00:58:00
19   1900-01-01 06:40:00
20   1900-01-01 02:47:00
21   1900-01-01 17:19:00
22   1900-01-01 15:38:00
23                   NaT
24   1900-01-01 05:58:00
25   1900-01-01 04:16:00
26   1900-01-01 09:03:00
27   1900-01-01 22:19:00
28   1900-01-01 21:04:00
29                   NaT
30   1900-01-01 09:12:00
31   1900-01-01 01:17:00
32   1900-01-01 22:35:00
33   1900-01-01 13:39:00
34   1900-01-01 20:58:00
35   1900-01-01 12:45:00
36                   NaT
37   1900-01-01 20:19:00
38   1900-01-01 18:41:00
39   1900-01-01 07:40:00
40   1900-01-01 09:43:00
41   1900-01-01 10:51:00
42   1900-01-01 12:07:00
43   1900-01-01 22:58:00
44   1900-01-01 16:31:00
Name: Time, dtype: datetime64[ns]

# Apply that method to the 'Time' column of the dataframe
times_df['converted_time'] = times_df['Time'].apply(time_to_datetime)

# Let's take a peek at our new column
times_df.head(10)

	Owner	Time	Serial Number	converted_time
0	Daniella	8:35 AM	7754	1900-01-01 08:35:00
1	Carleen	7:46 AM	6881	1900-01-01 07:46:00
2	Daron	1:35 AM	4509	1900-01-01 01:35:00
3	Cherly	1:35 AM	2310	1900-01-01 01:35:00
4	Manda	1:35 AM	4362	1900-01-01 01:35:00
5	Keri	12:57 PM	3360	1900-01-01 12:57:00
6	Frank	3:49 AM	5901	1900-01-01 03:49:00
7	Berneice	-999	6995	NaT
8	Janis	12:36 AM	4788	1900-01-01 00:36:00
9	Tosha	5:19 PM	2585	1900-01-01 17:19:00

# Let's look at all of the columns where converted time
# didn't end up working out
times_df[pd.isnull(times_df['converted_time'])]

	Owner	Time	Serial Number	converted_time
7	Berneice	-999	6995	NaT
15	Renato	GERTRUDE	3226	NaT
23	Monserrate	45:18 PM	5634	NaT
29	Brianne	527	0	NaT
36	Meggan	0:17 AM	5241	NaT

# don't do this it won't work
# if whatever == 'NaN'
# do this: np.isnull(whatever)

import numpy as np

nan

%pdb on

Automatic pdb calling has been turned ON

Walkthrough #4

I want to make sure my Plate ID is a string. Can’t lose the leading zeroes!
I don’t think anyone’s car was built in 0AD. Discard the ‘0’s as NaN.
I want the dates to be dates! Read the read_csv documentation to find out how to make pandas automatically parse dates.
“Date first observed” is a pretty weird column, but it seems like it has a date hiding inside. Using a function with .apply, transform the string (e.g. “20140324”) into a Python date. Make the 0’s show up as NaN.
“Violation time” is… not a time. Make it a time.
There sure are a lot of colors of cars, too bad so many of them are the same. Make “BLK” and “BLACK”, “WT” and “WHITE”, and any other combinations that you notice.
Join the data with the Parking Violations Code dataset from the NYC Open Data site.

read_csv documentation can be found at http://pandas.pydata.org/pandas- docs/stable/generated/pandas.read_csv.html

# import numpy as np

df = pd.read_csv("../million-violations.csv", nrows=1000000)
df.head()