StockPrediction/autocorr_plot.py

135 lines
4.5 KiB
Python
Raw Permalink Normal View History

2024-05-11 21:10:37 +00:00
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import yfinance as yf
from datetime import datetime
import os, sys
from sklearn import preprocessing
#bodacious colors
colors=sns.color_palette("rocket", 8)
#Ram's colors, if desired
seshadri = ['#c3121e', '#0348a1', '#ffb01c', '#027608', '#0193b0', '#9c5300', '#949c01', '#7104b5']
# 0sangre, 1neptune, 2pumpkin, 3clover, 4denim, 5cocoa, 6cumin, 7berry
stock_data = pd.read_pickle("data/MSFT_data.pkl")
daily_returns = ((stock_data["Close"] - stock_data["Open"]) / stock_data["Open"]).to_numpy() * 100
prices = stock_data[["Open", "High", "Low", "Close"]].to_numpy()
volume = stock_data["Volume"].to_numpy()
minmax_scaler = preprocessing.MinMaxScaler()
std_scaler = preprocessing.StandardScaler()
features = np.vstack((daily_returns, volume)).T
# Scale volume data to obtain better results
#minmax_scaler = preprocessing.MinMaxScaler()
#norm_ret = std_scaler.fit_transform(daily_returns.reshape(-1,1)).flatten()
#norm_vol = minmax_scaler.fit_transform(volume.reshape(-1,1)).flatten()
#norm_features = np.vstack((norm_ret, norm_vol)).T
# Solo volumi e ritorni
#norm_features = std_scaler.fit_transform(features)
# Aggiunta di prezzi
#norm_prices = minmax_scaler.fit_transform(prices.reshape(-1, 1)).reshape(-1, 4)
#norm_ret_and_vol = std_scaler.fit_transform(features)
#norm_features = np.hstack((norm_ret_and_vol, norm_prices))
# Necessary for MAs
part_features = std_scaler.fit_transform(features)
# Aggiunta SMA
#SMA_20 = stock_data["Close"].rolling(20).mean().to_numpy()
#SMA_50 = stock_data["Close"].rolling(50).mean().to_numpy()
#SMA_200 = stock_data["Close"].rolling(200).mean().to_numpy()
#SMAs = np.vstack((SMA_20, SMA_50)).T
#norm_SMAs = minmax_scaler.fit_transform(SMAs[49:, ].reshape(-1, 1)).reshape(-1, 2)
#norm_features = np.hstack((part_features[49:, ], norm_SMAs))
#SMAs = np.vstack((SMA_20, SMA_50, SMA_200)).T
#norm_SMAs = minmax_scaler.fit_transform(SMAs[199:, ].reshape(-1, 1)).reshape(-1, 3)
#norm_features = np.hstack((part_features[199:, ], norm_SMAs))
# Aggiunta EMA
EMA_20 = stock_data["Close"].ewm(span=20, adjust=False).mean()
EMA_50 = stock_data["Close"].ewm(span=50, adjust=False).mean()
EMAs = np.vstack((EMA_20, EMA_50)).T
norm_EMAs = minmax_scaler.fit_transform(EMAs.reshape(-1, 1)).reshape(-1, 2)
#EMA_200 = stock_data["Close"].ewm(span=200, adjust=False).mean()
#EMAs = np.vstack((EMA_20, EMA_50, EMA_200)).T
#norm_EMAs = minmax_scaler.fit_transform(EMAs.reshape(-1, 1)).reshape(-1, 3)
norm_features = np.hstack((part_features, norm_EMAs))
dfeat = {"Daily Returns" : norm_features[:,0],
"Volume" : norm_features[:,1],
"EMA20" : norm_features[:,2],
"EMA50" : norm_features[:,3]
}
corr = pd.DataFrame(dfeat).corr()
fig = plt.figure(1, (11, 10))
sns.heatmap(corr, vmin=-1, vmax=1, center=0, cmap="mako")
plt.tick_params(labelsize=14)
plt.savefig("plots/Correlation_EMAs.png", dpi=300)
# merge data into 2d numpy array
Y = np.zeros(features.shape[0] - 1)
for i in range(Y.size):
if daily_returns[i+1] >= 0:
Y[i] = 1
else:
Y[i] = 0
# per quando su usano ma fino a 200
#Y = Y[49:]
#Y = Y[199:]
print(norm_features.shape, Y.shape)
fig, ax = plt.subplots(figsize=(15,10))
#plot params
#plt.xlim([-12,12])
#plt.ylim([-0.5,16])
ax.minorticks_on()
ax.tick_params(labelsize=14)
ax.tick_params(labelbottom=True, labeltop=False, labelright=False, labelleft=True)
#xticks = np.arange(0, 1e4,10)
#yticks = np.arange(0,16.1,4)
ax.tick_params(direction='in',which='minor', length=5, bottom=True, top=True, left=True, right=True)
ax.tick_params(direction='in',which='major', length=10, bottom=True, top=True, left=True, right=True)
#plt.xticks(xticks)
#plt.yticks(yticks)
#plt.text(1,325, f'y={Decimal(coefs[3]):.4f}x$^3$+{Decimal(coefs[2]):.2f}x$^2$+{Decimal(coefs[1]):.2f}x+{Decimal(coefs[0]):.1f}',fontsize =13)
ax.set_xlim([0, 500])
#ax.set_ylim([-0.5, 0.5])
pd.plotting.autocorrelation_plot(daily_returns, ax=ax, color=seshadri[0], label="Daily Returns")
pd.plotting.autocorrelation_plot(np.abs(daily_returns), ax=ax, color=seshadri[1], label="Absolute Daily Returns")
pd.plotting.autocorrelation_plot(volume, ax=ax, color=seshadri[2], label="Volume")
ax.grid(False)
ax.set_xlabel(r'Lag', fontsize=14)
ax.set_ylabel(r'Autocorrelation',fontsize=14) # label the y axis
ax.legend(fontsize=14, loc="upper right", bbox_to_anchor=(0.99, 0.99)) # add the legend (will default to 'best' location)
plt.savefig("plots/Autocorrelation_returns_volume_abs.png", dpi=300)