• +91-9872993883
• +91-8283824812
• info@ris-ai.com

# Overview of ARIMA Prediction¶

ARIMA stands for AutoRegressive Integrated Moving Average , an algorithm used to predict future values from past values of a times series. ARIMA requires the dataset to be "stationary" in order to produce accurate predictions means parameters such as mean, variance, and covariance remain unchanged with time.

## Time Series Analysis - ARIMA Model¶

In [84]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

df

Out[84]:
Date Open High Low Close Adj Close Volume
0 2020-07-17 1.138650 1.144165 1.137889 1.139212 1.139212 0
1 2020-07-20 1.143955 1.146789 1.140524 1.144296 1.144296 0
2 2020-07-21 1.145764 1.149801 1.142622 1.145869 1.145869 0
3 2020-07-22 1.153509 1.160093 1.150854 1.153403 1.153403 0
4 2020-07-23 1.156671 1.162426 1.154215 1.156872 1.156872 0
... ... ... ... ... ... ... ...
256 2021-07-12 1.187366 1.188213 1.183670 1.187296 1.187296 0
257 2021-07-13 1.186493 1.187790 1.179343 1.186521 1.186521 0
258 2021-07-14 1.177440 1.182801 1.177260 1.177537 1.177537 0
259 2021-07-15 1.183334 1.185115 1.180596 1.183334 1.183334 0
260 2021-07-16 1.181307 1.182300 1.179400 1.181181 1.181181 0

261 rows × 7 columns

In [85]:
from pandas import datetime
df['Date'] = pd.to_datetime(df.Date,format='%Y-%m-%d')
df.index = df['Date']

plt.figure(figsize=(16,8))
plt.plot(df['Close'], label='Close Price history')

Out[85]:
[<matplotlib.lines.Line2D at 0x7f886b129828>]

### Plot a graph using ARIMA model¶

In [86]:
from pandas import datetime
from pandas.plotting import autocorrelation_plot

def parser(x):
return pd.to_datetime(df.Date,format='%Y-%m-%d')

autocorrelation_plot(df)
plt.plot()

Out[86]:
[]
In [4]:
print(df.head())
df.plot()
plt.show()

                Open      High       Low     Close  Adj Close  Volume
Date
2020-07-17  1.138650  1.144165  1.137889  1.139212   1.139212       0
2020-07-20  1.143955  1.146789  1.140524  1.144296   1.144296       0
2020-07-21  1.145764  1.149801  1.142622  1.145869   1.145869       0
2020-07-22  1.153509  1.160093  1.150854  1.153403   1.153403       0
2020-07-23  1.156671  1.162426  1.154215  1.156872   1.156872       0

In [64]:
import matplotlib.dates as mdates
%matplotlib inline
import seaborn as sns

my_year_month_fmt = mdates.DateFormatter('%m/%y')


Out[64]:
Date Open High Low Close Adj Close Volume
0 2020-07-17 1.138650 1.144165 1.137889 1.139212 1.139212 0
1 2020-07-20 1.143955 1.146789 1.140524 1.144296 1.144296 0
2 2020-07-21 1.145764 1.149801 1.142622 1.145869 1.145869 0
3 2020-07-22 1.153509 1.160093 1.150854 1.153403 1.153403 0
4 2020-07-23 1.156671 1.162426 1.154215 1.156872 1.156872 0
5 2020-07-24 1.159501 1.164009 1.158171 1.159608 1.159608 0
6 2020-07-27 1.165257 1.177953 1.165257 1.165257 1.165257 0
7 2020-07-28 1.176651 1.177579 1.170100 1.176928 1.176928 0
8 2020-07-29 1.172058 1.177899 1.171495 1.171880 1.171880 0
9 2020-07-30 1.178689 1.180735 1.173268 1.178287 1.178287 0

### Calculation of moving average¶

In [65]:
# Calculating the short-window simple moving average
short_rolling = data.rolling(window=20).mean()

Out[65]:
Open High Low Close Adj Close Volume
0 NaN NaN NaN NaN NaN NaN
1 NaN NaN NaN NaN NaN NaN
2 NaN NaN NaN NaN NaN NaN
3 NaN NaN NaN NaN NaN NaN
4 NaN NaN NaN NaN NaN NaN
5 NaN NaN NaN NaN NaN NaN
6 NaN NaN NaN NaN NaN NaN
7 NaN NaN NaN NaN NaN NaN
8 NaN NaN NaN NaN NaN NaN
9 NaN NaN NaN NaN NaN NaN
10 NaN NaN NaN NaN NaN NaN
11 NaN NaN NaN NaN NaN NaN
12 NaN NaN NaN NaN NaN NaN
13 NaN NaN NaN NaN NaN NaN
14 NaN NaN NaN NaN NaN NaN
15 NaN NaN NaN NaN NaN NaN
16 NaN NaN NaN NaN NaN NaN
17 NaN NaN NaN NaN NaN NaN
18 NaN NaN NaN NaN NaN NaN
19 1.169727 1.174497 1.166116 1.169748 1.169748 0.0
In [8]:
# Calculating the long-window simple moving average
long_rolling = data.rolling(window=100).mean()
long_rolling.tail()

Out[8]:
Open High Low Close Adj Close Volume
256 1.200753 1.203508 1.197616 1.200819 1.200819 0.0
257 1.200451 1.203207 1.197272 1.200517 1.200517 0.0
258 1.200073 1.202861 1.196934 1.200141 1.200141 0.0
259 1.199737 1.202470 1.196583 1.199803 1.199803 0.0
260 1.199390 1.202111 1.196285 1.199454 1.199454 0.0
In [66]:
data['Close'].plot(grid=True,figsize=(8,5))

Out[66]:
<AxesSubplot:>
In [67]:
data['42d'] = np.round(data['Close'].rolling(window=42).mean(),2)
data['252d'] = np.round(data['Close'].rolling(window=252).mean(),2)
data.tail

Out[67]:
In [68]:
data[['Close','42d','252d']].plot(grid=True,figsize=(8,5))

Out[68]:
In [70]:
data['42-252'] = data['42d'] - data['252d']
X = 50
data['Stance'] = np.where(data['42-252'] > X, 1, 0)
data['Stance'] = np.where(data['42-252'] < -X, -1, data['Stance'])
data['Stance'].value_counts()

Out[70]:
In [78]:
data['Stance'].plot()

Out[78]:
In [88]:
data['Market Returns'] = np.log(data['Close'] / data['Close'].shift(1))
data['Strategy'] = data['Market Returns'] * data['Stance'].shift(1)
data[['Market Returns','Strategy']].cumsum().plot(grid=True,figsize=(8,5))

Out[88]:
In [80]:
data["diff"] = data["42d"] - data["252d"]
data[["Close", "diff"]].plot(subplots=True, figsize=(8, 5))

Out[80]:
In [81]:
sigdiff = 100.0
data["Signal"] = np.where(data["diff"]>sigdiff,1,0)
data["Signal"] = np.where(data["diff"]<-sigdiff,-1,0)
data[["Close","diff","Signal"]].plot(subplots=True, figsize=(8,8))

Out[81]:
In [82]:
data["Returns"] = np.log(data["Close"] / data["Close"].shift(1))

data["Strategy"] = (data["Signal"] * data["Returns"])
data["Earnings"] = data["Strategy"].cumsum()
data[["Close", "Signal", "Earnings"]].plot(subplots=True, figsize=(10, 8))

Out[82]: