StockPrediction/LSTM_advanced_returns.py

230 lines
7.0 KiB
Python

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import yfinance as yf
from datetime import datetime
import os, sys
from sklearn import preprocessing
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, Input
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
#bodacious colors
colors=sns.color_palette("rocket", 8)
#Ram's colors, if desired
seshadri = ['#c3121e', '#0348a1', '#ffb01c', '#027608', '#0193b0', '#9c5300', '#949c01', '#7104b5']
# 0sangre, 1neptune, 2pumpkin, 3clover, 4denim, 5cocoa, 6cumin, 7berry
np.set_printoptions(threshold=1000000)
def enlarge_lag(to_enlarge, time_window=1):
# to_enlarge is the data already present, should be a numpy array
enlarged = []
for i in range(to_enlarge.shape[0] - time_window + 1):
new_element = []
for j in range(time_window):
new_element.extend(to_enlarge[i + time_window - 1 - j, :])
enlarged.append(new_element)
return np.array(enlarged)
train_quota = 0.8
if len(sys.argv) > 1:
time_window = int(sys.argv[1])
else:
time_window = 1
stock_data = pd.read_pickle("data/MSFT_data.pkl")
price = stock_data["Close"].to_numpy()
volume = stock_data["Volume"].to_numpy()
#minmax_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
minmax_scaler = preprocessing.StandardScaler()
sec_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
#EMA_20 = stock_data["Close"].ewm(span=20, adjust=False).mean()
#EMA_50 = stock_data["Close"].ewm(span=50, adjust=False).mean()
#EMAs = np.vstack((EMA_20, EMA_50)).T
#norm_EMAs = minmax_scaler.fit_transform(EMAs.reshape(-1, 1)).reshape(-1, 2)
#EMA_200 = stock_data["Close"].ewm(span=200, adjust=False).mean()
#EMAs = np.vstack((EMA_20, EMA_50, EMA_200)).T
#norm_EMAs = minmax_scaler.fit_transform(EMAs.reshape(-1, 1)).reshape(-1, 3)
# Necessary for MAs
#norm_features = np.hstack((minmax_scaler.fit_transform(price.reshape(-1, 1)), sec_scaler.fit_transform(volume.reshape(-1, 1))))
norm_features = minmax_scaler.fit_transform(np.vstack((price, volume)).T)
#norm_features = np.hstack((norm_features, norm_EMAs))
rets = np.diff(price)
bin_rets = np.zeros(len(rets))
for i, r in enumerate(rets):
if r >= 0:
bin_rets[i] = 1
else:
bin_rets[i] = 0
bin_rets_np = np.array(bin_rets)
#norm_rets = sec_scaler.fit_transform(rets.reshape(-1, 1))
print("occai")
print(rets)
print(bin_rets)
print("ocai")
# merge data into 2d numpy array
#Y = np.zeros(norm_features.shape[0] - 1)
#for i in range(Y.size):
# Y[i] = norm_features[i+1, 0]
Y = bin_rets
time_window = 3
if time_window > 1:
norm_features = enlarge_lag(norm_features, time_window)
Y = Y[time_window-1:]
train_size = int(norm_features.shape[0] * 0.8)
X_train = norm_features[:train_size, ]
Y_train = Y[:train_size].reshape(-1, 1)
X_test = norm_features[train_size:-1, ]
Y_test = Y[train_size:].reshape(-1, 1)
def LSTM_model():
model = Sequential()
model.add(LSTM(units = 20, input_shape=(X_train.shape[1], 1)))
#model.add(Dense(units = 20, activation="relu", input_shape=(X_train.shape[1],)))
#model.add(Dropout(0.3))
#model.add(LSTM(units=50, return_sequences=True))
#model.add(Dropout(0.2))
model.add(Dense(units=10, activation="relu"))
model.add(Dense(units=5, activation="relu"))
model.add(Dense(units=1, activation="sigmoid"))
return model
model = LSTM_model()
model.summary()
model.compile(
optimizer="adam",
loss="binary_crossentropy",
metrics=['accuracy']
)
#if os.path.exists("./checkpoints/checkpoint"):
# model.load_weights("./checkpoints/my_checkpoint")
#else:
model.fit(
X_train,
Y_train,
shuffle=True,
epochs=50,
batch_size=32
)
#model.save_weights("./checkpoints/my_checkpoint")
prediction = model.predict(X_test).flatten()
print("pred: ", prediction)
print(model.evaluate(X_test, Y_test))
#predicted_prices = minmax_scaler.inverse_transform(prediction).flatten()
#predicted_rets = sec_scaler.inverse_transform(prediction).flatten()
#print(predicted_rets)
#counter = 0
#for i in range(prediction.shape[0]-1):
# if (prediction[i+1,] - prediction[i,] > 0 and predicted_prices[i+1,] - predicted_prices[i,] > 0) or (prediction[i+1,] - prediction[i,] < 0 and predicted_prices[i+1,] - predicted_prices[i,] < 0):
# counter = counter + 1
#print("acc: ", counter/prediction.shape[0])
#test_prices = price[time_window - 1 + train_size:]
#pred_ret = []
#actual_ret = []
#for j in range(len(test_prices) - 1):
# # il predicted price è il prezzo di domani, lo voglio confrontare con il ritorno effettivo di domani
# pred_ret.append((predicted_prices[j] - test_prices[j])/test_prices[j])
# actual_ret.append((test_prices[j+1] - test_prices[j])/test_prices[j])
#
#pred_ret_np = np.array(pred_ret)
#actual_ret_np = np.array(actual_ret)
#
#sign_comp = np.sum(np.sign(pred_ret_np) == np.sign(actual_ret_np))/len(pred_ret_np)
#sign_comp_red_nottoomuch = np.sum(np.sign(pred_ret_np[:200]) == np.sign(actual_ret_np[:200]))/len(pred_ret_np[:200])
#sign_comp_red = np.sum(np.sign(pred_ret_np[:100]) == np.sign(actual_ret_np[:100]))/len(pred_ret_np[:100])
#sign_comp_red_alot = np.sum(np.sign(pred_ret_np[:50]) == np.sign(actual_ret_np[:50]))/len(pred_ret_np[:50])
#print(sign_comp)
#print(sign_comp_red_nottoomuch)
#print(sign_comp_red)
#print(sign_comp_red_alot)
#rmse = calculate_rmse(test_prices[1:], predicted_prices)
#mape = calculate_mape(test_prices[1:], predicted_prices)
#
#print("RMSE: ", rmse)
#print("MAPE: ", mape)
#
#rmse = calculate_rmse(test_prices[1:301], predicted_prices[:300])
#mape = calculate_mape(test_prices[1:301], predicted_prices[:300])
#
#print("RMSE su 300 gg: ", rmse)
#print("MAPE su 300 gg: ", mape)
#plt.plot(pred_ret, color=seshadri[0])
#plt.plot(daily_returns[1:], color=seshadri[1])
fig = plt.figure(1, figsize=(12,10))
plt.plot(Y_test, color=seshadri[0], label="Registered Closing Price")
plt.plot(prediction, color=seshadri[1], label="Prediction")
#plot params
#plt.xlim([0,450])
#plt.ylim([-0.5,16])
plt.minorticks_on()
plt.tick_params(labelsize=14)
plt.tick_params(labelbottom=True, labeltop=False, labelright=False, labelleft=True)
#xticks = np.arange(0, 1e4,10)
#yticks = np.arange(0,16.1,4)
plt.tick_params(direction='in',which='minor', length=5, bottom=True, top=True, left=True, right=True)
plt.tick_params(direction='in',which='major', length=10, bottom=True, top=True, left=True, right=True)
#plt.xticks(xticks)
#plt.yticks(yticks)
#plt.text(1,325, f'y={Decimal(coefs[3]):.4f}x$^3$+{Decimal(coefs[2]):.2f}x$^2$+{Decimal(coefs[1]):.2f}x+{Decimal(coefs[0]):.1f}',fontsize =13)
plt.xlabel(r'Days (from last training)', fontsize=14)
plt.ylabel(r'Price (USD)',fontsize=14) # label the y axis
plt.legend(fontsize=14, loc="upper right", bbox_to_anchor=(0.99, 0.99)) # add the legend (will default to 'best' location)
plt.savefig("plots/LSTM_advanced_rets_1.png", dpi=300)
plt.show()
#with open("plots/data/MLP_20_10_5_2.csv", "a") as f:
# f.write(f"{time_window};{train_score};{score};\n")