Beta
Time Series Analysis in Python
Predicting NY Average Temperatures 1872 - 2046
Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import (
plot_acf,
plot_pacf,
plot_predict,
)
from statsmodels.tsa.arima.model import ARIMA
Load temp_NY dataframe, provided by NOAA
- We'll set the index to the DATE column
- Display the head() and tail() of the data
temp_NY = pd.read_csv(
"datasets/NOAA_TAVG.csv",
index_col = 'DATE')
temp_NY.head(), temp_NY.tail()
Convert DATE (index) to_datetime(), %Y format
temp_NY.index = pd.to_datetime(
temp_NY.index,
format='%Y',
)
temp_NY.head()
Display the min and max dates
temp_NY.index.min(), temp_NY.index.max()
Plot the average temperatures
temp_NY.plot()
plt.title = "Average Temperature in New York (1870-2016)"
plt.xlabel("Year")
plt.ylabel("Temperature (F)")
plt.show()
Apply the adfuller test and return the P-value
# Compute and print ADF p-value
result = adfuller(temp_NY['TAVG'])
print(f"P-value for the ADF test is {round(result[1],3)}")
Show change in avg temeerpature
chg_temp = temp_NY.diff()
chg_temp.dropna(inplace=True)