135 lines
4.5 KiB
Python
135 lines
4.5 KiB
Python
|
import numpy as np
|
||
|
import matplotlib.pyplot as plt
|
||
|
import pandas as pd
|
||
|
import seaborn as sns
|
||
|
|
||
|
import yfinance as yf
|
||
|
from datetime import datetime
|
||
|
import os, sys
|
||
|
|
||
|
from sklearn import preprocessing
|
||
|
|
||
|
#bodacious colors
|
||
|
colors=sns.color_palette("rocket", 8)
|
||
|
#Ram's colors, if desired
|
||
|
seshadri = ['#c3121e', '#0348a1', '#ffb01c', '#027608', '#0193b0', '#9c5300', '#949c01', '#7104b5']
|
||
|
# 0sangre, 1neptune, 2pumpkin, 3clover, 4denim, 5cocoa, 6cumin, 7berry
|
||
|
|
||
|
stock_data = pd.read_pickle("data/MSFT_data.pkl")
|
||
|
|
||
|
daily_returns = ((stock_data["Close"] - stock_data["Open"]) / stock_data["Open"]).to_numpy() * 100
|
||
|
prices = stock_data[["Open", "High", "Low", "Close"]].to_numpy()
|
||
|
volume = stock_data["Volume"].to_numpy()
|
||
|
|
||
|
minmax_scaler = preprocessing.MinMaxScaler()
|
||
|
std_scaler = preprocessing.StandardScaler()
|
||
|
|
||
|
features = np.vstack((daily_returns, volume)).T
|
||
|
|
||
|
# Scale volume data to obtain better results
|
||
|
#minmax_scaler = preprocessing.MinMaxScaler()
|
||
|
#norm_ret = std_scaler.fit_transform(daily_returns.reshape(-1,1)).flatten()
|
||
|
#norm_vol = minmax_scaler.fit_transform(volume.reshape(-1,1)).flatten()
|
||
|
#norm_features = np.vstack((norm_ret, norm_vol)).T
|
||
|
|
||
|
# Solo volumi e ritorni
|
||
|
#norm_features = std_scaler.fit_transform(features)
|
||
|
|
||
|
# Aggiunta di prezzi
|
||
|
#norm_prices = minmax_scaler.fit_transform(prices.reshape(-1, 1)).reshape(-1, 4)
|
||
|
#norm_ret_and_vol = std_scaler.fit_transform(features)
|
||
|
#norm_features = np.hstack((norm_ret_and_vol, norm_prices))
|
||
|
|
||
|
# Necessary for MAs
|
||
|
part_features = std_scaler.fit_transform(features)
|
||
|
|
||
|
# Aggiunta SMA
|
||
|
#SMA_20 = stock_data["Close"].rolling(20).mean().to_numpy()
|
||
|
#SMA_50 = stock_data["Close"].rolling(50).mean().to_numpy()
|
||
|
#SMA_200 = stock_data["Close"].rolling(200).mean().to_numpy()
|
||
|
#SMAs = np.vstack((SMA_20, SMA_50)).T
|
||
|
#norm_SMAs = minmax_scaler.fit_transform(SMAs[49:, ].reshape(-1, 1)).reshape(-1, 2)
|
||
|
#norm_features = np.hstack((part_features[49:, ], norm_SMAs))
|
||
|
|
||
|
#SMAs = np.vstack((SMA_20, SMA_50, SMA_200)).T
|
||
|
#norm_SMAs = minmax_scaler.fit_transform(SMAs[199:, ].reshape(-1, 1)).reshape(-1, 3)
|
||
|
#norm_features = np.hstack((part_features[199:, ], norm_SMAs))
|
||
|
|
||
|
# Aggiunta EMA
|
||
|
EMA_20 = stock_data["Close"].ewm(span=20, adjust=False).mean()
|
||
|
EMA_50 = stock_data["Close"].ewm(span=50, adjust=False).mean()
|
||
|
EMAs = np.vstack((EMA_20, EMA_50)).T
|
||
|
norm_EMAs = minmax_scaler.fit_transform(EMAs.reshape(-1, 1)).reshape(-1, 2)
|
||
|
|
||
|
#EMA_200 = stock_data["Close"].ewm(span=200, adjust=False).mean()
|
||
|
#EMAs = np.vstack((EMA_20, EMA_50, EMA_200)).T
|
||
|
#norm_EMAs = minmax_scaler.fit_transform(EMAs.reshape(-1, 1)).reshape(-1, 3)
|
||
|
norm_features = np.hstack((part_features, norm_EMAs))
|
||
|
|
||
|
dfeat = {"Daily Returns" : norm_features[:,0],
|
||
|
"Volume" : norm_features[:,1],
|
||
|
"EMA20" : norm_features[:,2],
|
||
|
"EMA50" : norm_features[:,3]
|
||
|
}
|
||
|
|
||
|
corr = pd.DataFrame(dfeat).corr()
|
||
|
fig = plt.figure(1, (11, 10))
|
||
|
sns.heatmap(corr, vmin=-1, vmax=1, center=0, cmap="mako")
|
||
|
plt.tick_params(labelsize=14)
|
||
|
|
||
|
plt.savefig("plots/Correlation_EMAs.png", dpi=300)
|
||
|
|
||
|
# merge data into 2d numpy array
|
||
|
Y = np.zeros(features.shape[0] - 1)
|
||
|
|
||
|
|
||
|
for i in range(Y.size):
|
||
|
if daily_returns[i+1] >= 0:
|
||
|
Y[i] = 1
|
||
|
else:
|
||
|
Y[i] = 0
|
||
|
|
||
|
# per quando su usano ma fino a 200
|
||
|
#Y = Y[49:]
|
||
|
#Y = Y[199:]
|
||
|
|
||
|
print(norm_features.shape, Y.shape)
|
||
|
|
||
|
fig, ax = plt.subplots(figsize=(15,10))
|
||
|
|
||
|
|
||
|
#plot params
|
||
|
#plt.xlim([-12,12])
|
||
|
#plt.ylim([-0.5,16])
|
||
|
ax.minorticks_on()
|
||
|
ax.tick_params(labelsize=14)
|
||
|
ax.tick_params(labelbottom=True, labeltop=False, labelright=False, labelleft=True)
|
||
|
#xticks = np.arange(0, 1e4,10)
|
||
|
#yticks = np.arange(0,16.1,4)
|
||
|
|
||
|
ax.tick_params(direction='in',which='minor', length=5, bottom=True, top=True, left=True, right=True)
|
||
|
ax.tick_params(direction='in',which='major', length=10, bottom=True, top=True, left=True, right=True)
|
||
|
#plt.xticks(xticks)
|
||
|
#plt.yticks(yticks)
|
||
|
|
||
|
|
||
|
#plt.text(1,325, f'y={Decimal(coefs[3]):.4f}x$^3$+{Decimal(coefs[2]):.2f}x$^2$+{Decimal(coefs[1]):.2f}x+{Decimal(coefs[0]):.1f}',fontsize =13)
|
||
|
|
||
|
ax.set_xlim([0, 500])
|
||
|
#ax.set_ylim([-0.5, 0.5])
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
pd.plotting.autocorrelation_plot(daily_returns, ax=ax, color=seshadri[0], label="Daily Returns")
|
||
|
pd.plotting.autocorrelation_plot(np.abs(daily_returns), ax=ax, color=seshadri[1], label="Absolute Daily Returns")
|
||
|
pd.plotting.autocorrelation_plot(volume, ax=ax, color=seshadri[2], label="Volume")
|
||
|
|
||
|
ax.grid(False)
|
||
|
ax.set_xlabel(r'Lag', fontsize=14)
|
||
|
ax.set_ylabel(r'Autocorrelation',fontsize=14) # label the y axis
|
||
|
|
||
|
|
||
|
ax.legend(fontsize=14, loc="upper right", bbox_to_anchor=(0.99, 0.99)) # add the legend (will default to 'best' location)
|
||
|
plt.savefig("plots/Autocorrelation_returns_volume_abs.png", dpi=300)
|