# Overview of ARIMA Prediction¶

ARIMA stands for AutoRegressive Integrated Moving Average , an algorithm used to predict future values from past values of a times series. ARIMA requires the dataset to be "stationary" in order to produce accurate predictions means parameters such as mean, variance, and covariance remain unchanged with time.

## Time Series Analysis - ARIMA Model¶

In [84]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

df

Out[84]:
Date Open High Low Close Adj Close Volume
0 2020-07-17 1.138650 1.144165 1.137889 1.139212 1.139212 0
1 2020-07-20 1.143955 1.146789 1.140524 1.144296 1.144296 0
2 2020-07-21 1.145764 1.149801 1.142622 1.145869 1.145869 0
3 2020-07-22 1.153509 1.160093 1.150854 1.153403 1.153403 0
4 2020-07-23 1.156671 1.162426 1.154215 1.156872 1.156872 0
256 2021-07-12 1.187366 1.188213 1.183670 1.187296 1.187296 0
257 2021-07-13 1.186493 1.187790 1.179343 1.186521 1.186521 0
258 2021-07-14 1.177440 1.182801 1.177260 1.177537 1.177537 0
259 2021-07-15 1.183334 1.185115 1.180596 1.183334 1.183334 0
260 2021-07-16 1.181307 1.182300 1.179400 1.181181 1.181181 0

261 rows × 7 columns

In [85]:
from pandas import datetime
df['Date'] = pd.to_datetime(df.Date,format='%Y-%m-%d')
df.index = df['Date']

plt.figure(figsize=(16,8))
plt.plot(df['Close'], label='Close Price history')

### Plot a graph using ARIMA model¶

In [86]:
from pandas import datetime
from pandas.plotting import autocorrelation_plot

def parser(x):
return pd.to_datetime(df.Date,format='%Y-%m-%d')

autocorrelation_plot(df)
plt.plot()

In [4]:
print(df.head())
df.plot()
plt.show()

                Open      High       Low     Close  Adj Close  Volume
Date
2020-07-17  1.138650  1.144165  1.137889  1.139212   1.139212       0
2020-07-20  1.143955  1.146789  1.140524  1.144296   1.144296       0
2020-07-21  1.145764  1.149801  1.142622  1.145869   1.145869       0
2020-07-22  1.153509  1.160093  1.150854  1.153403   1.153403       0
2020-07-23  1.156671  1.162426  1.154215  1.156872   1.156872       0

In [64]:
import matplotlib.dates as mdates
%matplotlib inline
import seaborn as sns

my_year_month_fmt = mdates.DateFormatter('%m/%y')


### Calculation of moving average¶

In [65]:
# Calculating the short-window simple moving average
short_rolling = data.rolling(window=20).mean()

In [8]:
# Calculating the long-window simple moving average
long_rolling = data.rolling(window=100).mean()
long_rolling.tail()

In [66]:
data['Close'].plot(grid=True,figsize=(8,5))

In [67]:
data['42d'] = np.round(data['Close'].rolling(window=42).mean(),2)
data['252d'] = np.round(data['Close'].rolling(window=252).mean(),2)
data.tail

In [68]:
data[['Close','42d','252d']].plot(grid=True,figsize=(8,5))

In [70]:
data['42-252'] = data['42d'] - data['252d']
X = 50
data['Stance'] = np.where(data['42-252'] > X, 1, 0)
data['Stance'] = np.where(data['42-252'] < -X, -1, data['Stance'])
data['Stance'].value_counts()

In [78]:
data['Stance'].plot()

In [88]:
data['Market Returns'] = np.log(data['Close'] / data['Close'].shift(1))
data['Strategy'] = data['Market Returns'] * data['Stance'].shift(1)
data[['Market Returns','Strategy']].cumsum().plot(grid=True,figsize=(8,5))

In [80]:
data["diff"] = data["42d"] - data["252d"]
data[["Close", "diff"]].plot(subplots=True, figsize=(8, 5))

In [81]:
sigdiff = 100.0
data["Signal"] = np.where(data["diff"]>sigdiff,1,0)
data["Signal"] = np.where(data["diff"]<-sigdiff,-1,0)
data[["Close","diff","Signal"]].plot(subplots=True, figsize=(8,8))

In [82]:
data["Returns"] = np.log(data["Close"] / data["Close"].shift(1))

data["Strategy"] = (data["Signal"] * data["Returns"])
data["Earnings"] = data["Strategy"].cumsum()
data[["Close", "Signal", "Earnings"]].plot(subplots=True, figsize=(10, 8))

