230 lines
7.0 KiB
Python
230 lines
7.0 KiB
Python
|
import numpy as np
|
||
|
import matplotlib.pyplot as plt
|
||
|
import pandas as pd
|
||
|
import seaborn as sns
|
||
|
|
||
|
import yfinance as yf
|
||
|
from datetime import datetime
|
||
|
import os, sys
|
||
|
|
||
|
from sklearn import preprocessing
|
||
|
|
||
|
from tensorflow.keras.models import Sequential
|
||
|
from tensorflow.keras.layers import Dense, Dropout, LSTM, Input
|
||
|
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
|
||
|
|
||
|
#bodacious colors
|
||
|
colors=sns.color_palette("rocket", 8)
|
||
|
#Ram's colors, if desired
|
||
|
seshadri = ['#c3121e', '#0348a1', '#ffb01c', '#027608', '#0193b0', '#9c5300', '#949c01', '#7104b5']
|
||
|
# 0sangre, 1neptune, 2pumpkin, 3clover, 4denim, 5cocoa, 6cumin, 7berry
|
||
|
|
||
|
np.set_printoptions(threshold=1000000)
|
||
|
|
||
|
def enlarge_lag(to_enlarge, time_window=1):
|
||
|
# to_enlarge is the data already present, should be a numpy array
|
||
|
enlarged = []
|
||
|
for i in range(to_enlarge.shape[0] - time_window + 1):
|
||
|
new_element = []
|
||
|
for j in range(time_window):
|
||
|
new_element.extend(to_enlarge[i + time_window - 1 - j, :])
|
||
|
enlarged.append(new_element)
|
||
|
|
||
|
return np.array(enlarged)
|
||
|
|
||
|
|
||
|
train_quota = 0.8
|
||
|
|
||
|
if len(sys.argv) > 1:
|
||
|
time_window = int(sys.argv[1])
|
||
|
else:
|
||
|
time_window = 1
|
||
|
|
||
|
|
||
|
stock_data = pd.read_pickle("data/MSFT_data.pkl")
|
||
|
|
||
|
price = stock_data["Close"].to_numpy()
|
||
|
volume = stock_data["Volume"].to_numpy()
|
||
|
|
||
|
#minmax_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
|
||
|
minmax_scaler = preprocessing.StandardScaler()
|
||
|
sec_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
|
||
|
|
||
|
#EMA_20 = stock_data["Close"].ewm(span=20, adjust=False).mean()
|
||
|
#EMA_50 = stock_data["Close"].ewm(span=50, adjust=False).mean()
|
||
|
#EMAs = np.vstack((EMA_20, EMA_50)).T
|
||
|
#norm_EMAs = minmax_scaler.fit_transform(EMAs.reshape(-1, 1)).reshape(-1, 2)
|
||
|
|
||
|
#EMA_200 = stock_data["Close"].ewm(span=200, adjust=False).mean()
|
||
|
#EMAs = np.vstack((EMA_20, EMA_50, EMA_200)).T
|
||
|
#norm_EMAs = minmax_scaler.fit_transform(EMAs.reshape(-1, 1)).reshape(-1, 3)
|
||
|
|
||
|
# Necessary for MAs
|
||
|
#norm_features = np.hstack((minmax_scaler.fit_transform(price.reshape(-1, 1)), sec_scaler.fit_transform(volume.reshape(-1, 1))))
|
||
|
norm_features = minmax_scaler.fit_transform(np.vstack((price, volume)).T)
|
||
|
#norm_features = np.hstack((norm_features, norm_EMAs))
|
||
|
|
||
|
rets = np.diff(price)
|
||
|
bin_rets = np.zeros(len(rets))
|
||
|
for i, r in enumerate(rets):
|
||
|
if r >= 0:
|
||
|
bin_rets[i] = 1
|
||
|
else:
|
||
|
bin_rets[i] = 0
|
||
|
|
||
|
bin_rets_np = np.array(bin_rets)
|
||
|
|
||
|
|
||
|
#norm_rets = sec_scaler.fit_transform(rets.reshape(-1, 1))
|
||
|
|
||
|
print("occai")
|
||
|
|
||
|
print(rets)
|
||
|
print(bin_rets)
|
||
|
|
||
|
print("ocai")
|
||
|
|
||
|
# merge data into 2d numpy array
|
||
|
#Y = np.zeros(norm_features.shape[0] - 1)
|
||
|
#for i in range(Y.size):
|
||
|
# Y[i] = norm_features[i+1, 0]
|
||
|
|
||
|
Y = bin_rets
|
||
|
|
||
|
time_window = 3
|
||
|
|
||
|
if time_window > 1:
|
||
|
norm_features = enlarge_lag(norm_features, time_window)
|
||
|
Y = Y[time_window-1:]
|
||
|
|
||
|
|
||
|
train_size = int(norm_features.shape[0] * 0.8)
|
||
|
X_train = norm_features[:train_size, ]
|
||
|
Y_train = Y[:train_size].reshape(-1, 1)
|
||
|
|
||
|
X_test = norm_features[train_size:-1, ]
|
||
|
Y_test = Y[train_size:].reshape(-1, 1)
|
||
|
|
||
|
def LSTM_model():
|
||
|
model = Sequential()
|
||
|
|
||
|
model.add(LSTM(units = 20, input_shape=(X_train.shape[1], 1)))
|
||
|
#model.add(Dense(units = 20, activation="relu", input_shape=(X_train.shape[1],)))
|
||
|
#model.add(Dropout(0.3))
|
||
|
|
||
|
#model.add(LSTM(units=50, return_sequences=True))
|
||
|
#model.add(Dropout(0.2))
|
||
|
|
||
|
|
||
|
model.add(Dense(units=10, activation="relu"))
|
||
|
|
||
|
model.add(Dense(units=5, activation="relu"))
|
||
|
|
||
|
model.add(Dense(units=1, activation="sigmoid"))
|
||
|
|
||
|
return model
|
||
|
|
||
|
model = LSTM_model()
|
||
|
model.summary()
|
||
|
model.compile(
|
||
|
optimizer="adam",
|
||
|
loss="binary_crossentropy",
|
||
|
metrics=['accuracy']
|
||
|
)
|
||
|
|
||
|
#if os.path.exists("./checkpoints/checkpoint"):
|
||
|
# model.load_weights("./checkpoints/my_checkpoint")
|
||
|
#else:
|
||
|
model.fit(
|
||
|
X_train,
|
||
|
Y_train,
|
||
|
shuffle=True,
|
||
|
epochs=50,
|
||
|
batch_size=32
|
||
|
)
|
||
|
|
||
|
#model.save_weights("./checkpoints/my_checkpoint")
|
||
|
|
||
|
prediction = model.predict(X_test).flatten()
|
||
|
print("pred: ", prediction)
|
||
|
print(model.evaluate(X_test, Y_test))
|
||
|
#predicted_prices = minmax_scaler.inverse_transform(prediction).flatten()
|
||
|
#predicted_rets = sec_scaler.inverse_transform(prediction).flatten()
|
||
|
#print(predicted_rets)
|
||
|
#counter = 0
|
||
|
#for i in range(prediction.shape[0]-1):
|
||
|
# if (prediction[i+1,] - prediction[i,] > 0 and predicted_prices[i+1,] - predicted_prices[i,] > 0) or (prediction[i+1,] - prediction[i,] < 0 and predicted_prices[i+1,] - predicted_prices[i,] < 0):
|
||
|
# counter = counter + 1
|
||
|
|
||
|
#print("acc: ", counter/prediction.shape[0])
|
||
|
|
||
|
|
||
|
|
||
|
#test_prices = price[time_window - 1 + train_size:]
|
||
|
#pred_ret = []
|
||
|
#actual_ret = []
|
||
|
#for j in range(len(test_prices) - 1):
|
||
|
# # il predicted price è il prezzo di domani, lo voglio confrontare con il ritorno effettivo di domani
|
||
|
# pred_ret.append((predicted_prices[j] - test_prices[j])/test_prices[j])
|
||
|
# actual_ret.append((test_prices[j+1] - test_prices[j])/test_prices[j])
|
||
|
#
|
||
|
#pred_ret_np = np.array(pred_ret)
|
||
|
#actual_ret_np = np.array(actual_ret)
|
||
|
#
|
||
|
#sign_comp = np.sum(np.sign(pred_ret_np) == np.sign(actual_ret_np))/len(pred_ret_np)
|
||
|
#sign_comp_red_nottoomuch = np.sum(np.sign(pred_ret_np[:200]) == np.sign(actual_ret_np[:200]))/len(pred_ret_np[:200])
|
||
|
#sign_comp_red = np.sum(np.sign(pred_ret_np[:100]) == np.sign(actual_ret_np[:100]))/len(pred_ret_np[:100])
|
||
|
#sign_comp_red_alot = np.sum(np.sign(pred_ret_np[:50]) == np.sign(actual_ret_np[:50]))/len(pred_ret_np[:50])
|
||
|
#print(sign_comp)
|
||
|
#print(sign_comp_red_nottoomuch)
|
||
|
#print(sign_comp_red)
|
||
|
#print(sign_comp_red_alot)
|
||
|
|
||
|
#rmse = calculate_rmse(test_prices[1:], predicted_prices)
|
||
|
#mape = calculate_mape(test_prices[1:], predicted_prices)
|
||
|
#
|
||
|
#print("RMSE: ", rmse)
|
||
|
#print("MAPE: ", mape)
|
||
|
#
|
||
|
#rmse = calculate_rmse(test_prices[1:301], predicted_prices[:300])
|
||
|
#mape = calculate_mape(test_prices[1:301], predicted_prices[:300])
|
||
|
#
|
||
|
#print("RMSE su 300 gg: ", rmse)
|
||
|
#print("MAPE su 300 gg: ", mape)
|
||
|
|
||
|
#plt.plot(pred_ret, color=seshadri[0])
|
||
|
#plt.plot(daily_returns[1:], color=seshadri[1])
|
||
|
|
||
|
fig = plt.figure(1, figsize=(12,10))
|
||
|
plt.plot(Y_test, color=seshadri[0], label="Registered Closing Price")
|
||
|
plt.plot(prediction, color=seshadri[1], label="Prediction")
|
||
|
|
||
|
#plot params
|
||
|
#plt.xlim([0,450])
|
||
|
#plt.ylim([-0.5,16])
|
||
|
plt.minorticks_on()
|
||
|
plt.tick_params(labelsize=14)
|
||
|
plt.tick_params(labelbottom=True, labeltop=False, labelright=False, labelleft=True)
|
||
|
#xticks = np.arange(0, 1e4,10)
|
||
|
#yticks = np.arange(0,16.1,4)
|
||
|
|
||
|
plt.tick_params(direction='in',which='minor', length=5, bottom=True, top=True, left=True, right=True)
|
||
|
plt.tick_params(direction='in',which='major', length=10, bottom=True, top=True, left=True, right=True)
|
||
|
#plt.xticks(xticks)
|
||
|
#plt.yticks(yticks)
|
||
|
|
||
|
|
||
|
#plt.text(1,325, f'y={Decimal(coefs[3]):.4f}x$^3$+{Decimal(coefs[2]):.2f}x$^2$+{Decimal(coefs[1]):.2f}x+{Decimal(coefs[0]):.1f}',fontsize =13)
|
||
|
|
||
|
|
||
|
plt.xlabel(r'Days (from last training)', fontsize=14)
|
||
|
plt.ylabel(r'Price (USD)',fontsize=14) # label the y axis
|
||
|
|
||
|
|
||
|
plt.legend(fontsize=14, loc="upper right", bbox_to_anchor=(0.99, 0.99)) # add the legend (will default to 'best' location)
|
||
|
|
||
|
plt.savefig("plots/LSTM_advanced_rets_1.png", dpi=300)
|
||
|
|
||
|
plt.show()
|
||
|
#with open("plots/data/MLP_20_10_5_2.csv", "a") as f:
|
||
|
# f.write(f"{time_window};{train_score};{score};\n")
|