Beta
Working with Dates and Times in Python
Run the hidden code cell below to import the data used in this course.
Take Notes
Add notes about the concepts you've learned and code cells with code you want to keep.
Add your notes here
# Add your code snippets here
# Import datetime
from datetime import datetime
# Create a datetime object
dt = datetime(2017,10,1,15,19,13)
# Print the results in ISO 8601 format
print(dt.isoformat())
# Replace the year with 1917
dt_old = dt.replace(year=1917)
# Print the results in ISO 8601 format
print(dt_old)
# Import the datetime class
from datetime import datetime
# Starting string, in YYYY-MM-DD HH:MM:SS format
s = '2017-02-03 00:00:01'
# Write a format string to parse s
fmt = '%Y-%m-%d %H:%M:%S'
# Create a datetime object d
d = datetime.strptime(s, fmt)
# Print d
print(d)
fmt_iso = "%Y-%m-%dT%H:%M:%S"
# Print out date with .isoformat(), then with .strftime() to compare
print(first_start.isoformat())
print(first_start.strftime(fmt_iso))
# Starting timestamps
timestamps = [1514665153, 1514664543]
# Datetime objects
dts = []
# Loop
for ts in timestamps:
dts.append(datetime.fromtimestamp(ts))
# Print results
print(dts)
# Initialize a list for all the trip durations
onebike_durations = []
for trip in onebike_datetimes:
# Create a timedelta object corresponding to the length of the trip
trip_duration = trip['end'] - trip['start']
# Get the total elapsed seconds in trip_duration
trip_length_seconds = trip_duration.total_seconds()
# Append the results to our list
onebike_durations.append(trip_length_seconds)
# Import datetime, timedelta, timezone
from datetime import datetime, timedelta, timezone
# Create a timezone for Pacific Standard Time, or UTC-8
pst = timezone(timedelta(hours=5))
# October 1, 2017 at 15:26:26, UTC-8
dt = datetime(2017, 10, 1, 15, 26, 26, tzinfo=pst)
# Print results
print(dt.isoformat())
# Import tz
from dateutil import tz
# Create a timezone object for Eastern Time
et = tz.gettz('America/New_York')
# Create the timezone object
sm = tz.gettz('Pacific/Apia')
# Pull out the start of the first trip
local = onebike_datetimes[0]['start']
# What time was it in Samoa?
notlocal = local.astimezone(sm)
# Print them out and see the difference
print(local.isoformat())
print(notlocal.isoformat())
# Import datetime, timedelta, tz, timezone
from datetime import datetime, timedelta, timezone
from dateutil import tz
# Start on March 12, 2017, midnight, then add 6 hours
start = datetime(2017, 3, 12, tzinfo = tz.gettz('America/New_York'))
end = start + timedelta(hours=6)
print(start.isoformat() + " to " + end.isoformat())
# How many hours have elapsed?
print((end - start).total_seconds()/(60*60))
# Start on March 12, 2017, midnight, then add 6 hours
start = datetime(2017, 3, 12, tzinfo = tz.gettz('America/New_York'))
end = start + timedelta(hours=6)
print(start.isoformat() + " to " + end.isoformat())
# How many hours have elapsed?
print((end - start).total_seconds()/(60*60))
# What if we move to UTC?
print((end.astimezone(timezone.utc) - start.astimezone(timezone.utc))\
.total_seconds()/(60*60))
trip_durations = []
for trip in onebike_datetimes:
# When the start is later than the end, set the fold to be 1
if trip['start'] > trip['end']:
trip['end'] = tz.enfold(trip['end'])
# Convert to UTC
start = trip['start'].astimezone(timezone.utc)
end = trip['end'].astimezone(timezone.utc)
# Subtract the difference
trip_length_seconds = (end-start).total_seconds()
trip_durations.append(trip_length_seconds)
# Take the shortest trip duration
print("Shortest trip: " + str(min(trip_durations)))
# Import pandas
import pandas as pd
# Load CSV into the rides variable
rides = pd.read_csv('capital-onebike.csv',
parse_dates = ['Start date','End date'])
# Print the initial (0th) row
print(rides.iloc[0])
# Subtract the start date from the end date
ride_durations = rides['End date'] - rides['Start date']
# Convert the results to seconds
rides['Duration'] = ride_durations.dt.total_seconds()
print(rides['Duration'].head())
# Create joyrides
joyrides = (rides['Start station'] == rides['End station'])
# Total number of joyrides
print("{} rides were joyrides".format(joyrides.sum()))
# Median of all rides
print("The median duration overall was {:.2f} seconds"\
.format(rides['Duration'].median()))
# Median of joyrides
print("The median duration for joyrides was {:.2f} seconds"\
.format(rides[rides['Start station'] == rides['End station']]['Duration'].median()))
# Import matplotlib
import matplotlib.pyplot as plt
# Resample rides to daily, take the size, plot the results
rides.resample('D', on = 'Start date')\
.size()\
.plot(ylim = [0, 15])
# Show the results
plt.show()
# Import matplotlib
import matplotlib.pyplot as plt
# Resample rides to monthly, take the size, plot the results
rides.resample('M', on = 'Start date')\
.size()\
.plot(ylim = [0, 150])
# Show the results
plt.show()
# Resample rides to be monthly on the basis of Start date
monthly_rides = rides.resample('M', on = 'Start date')['Member type']
# Take the ratio of the .value_counts() over the total number of rides
print(monthly_rides.value_counts() / monthly_rides.size())
# Group rides by member type, and resample to the month
grouped = rides.groupby('Member type')\
.resample('M', on = 'Start date')
# Print the median duration for each group
print(grouped['Duration'].median())
# Localize the Start date column to America/New_York
rides['Start date'] = rides['Start date'].dt.tz_localize('America/New_York', ambiguous='NaT')
# Print first value
print(rides['Start date'].iloc[0])
# Print first value
print(rides['Start date'].iloc[0])
# Convert the Start date column to Europe/London
rides['Start date'] = rides['Start date'].dt.tz_convert('Europe/London')
# Print the new value
print(rides['Start date'].iloc[0])
# Add a column for the weekday of the start of the ride
rides['Ride start weekday'] = rides['Start date'].dt.day_name()
# Print the median trip time per weekday
print(rides.groupby('Ride start weekday')['Duration'].median())
# Shift the index of the end date up one; now subract it from the start date
rides['Time since'] = rides['Start date'] - (rides['End date'].shift(1))
# Move from a timedelta to a number of seconds, which is easier to work with
rides['Time since'] = rides['Time since'].dt.total_seconds()
# Resample to the month
monthly = rides.resample('M', on = 'Start date')
# Print the average hours between rides each month
print(monthly['Time since'].mean()/(60*60))
Explore Datasets
Use the DataFrames imported in the first cell to explore the data and practice your skills!
- Count how many hurricanes made landfall each year in Florida using
florida_hurricane_dates
. - Reload the dataset
datasets/capital-onebike.csv
so that it correctly parses date and time columns. - Calculate the average trip duration of bike rentals on weekends in
rides
. Compare it with the average trip duration of bike rentals on weekdays.