StockPrediction/MultiLayer_Perceptron.py

119 lines
3.3 KiB
Python
Raw Permalink Normal View History

2024-05-11 21:10:37 +00:00
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import yfinance as yf
from datetime import datetime
import os, sys
from sklearn import preprocessing
#bodacious colors
colors=sns.color_palette("rocket", 8)
#Ram's colors, if desired
seshadri = ['#c3121e', '#0348a1', '#ffb01c', '#027608', '#0193b0', '#9c5300', '#949c01', '#7104b5']
# 0sangre, 1neptune, 2pumpkin, 3clover, 4denim, 5cocoa, 6cumin, 7berry
train_quota = 0.8
def enlarge_lag(to_enlarge, time_window=1):
# to_enlarge is the data already present, should be a numpy array
enlarged = []
for i in range(to_enlarge.shape[0] - time_window + 1):
new_element = []
for j in range(time_window):
new_element.extend(to_enlarge[i + time_window - 1 - j, :])
enlarged.append(new_element)
return np.array(enlarged)
if len(sys.argv) > 1:
time_window = int(sys.argv[1])
else:
time_window = 1
#time_window = 10
stock_data = pd.read_pickle("data/MSFT_data.pkl")
daily_returns = ((stock_data["Close"] - stock_data["Open"]) / stock_data["Open"]).to_numpy()
prices = stock_data[["Open", "High", "Low", "Close"]].to_numpy()
volume = stock_data["Volume"].to_numpy()
minmax_scaler = preprocessing.MinMaxScaler()
std_scaler = preprocessing.StandardScaler()
features = np.vstack((daily_returns, volume)).T
# Necessary for MAs
part_features = std_scaler.fit_transform(features)
# Aggiunta EMA
EMA_20 = stock_data["Close"].ewm(span=20, adjust=False).mean()
EMA_50 = stock_data["Close"].ewm(span=50, adjust=False).mean()
EMAs = np.vstack((EMA_20, EMA_50)).T
norm_EMAs = minmax_scaler.fit_transform(EMAs.reshape(-1, 1)).reshape(-1, 2)
#EMA_200 = stock_data["Close"].ewm(span=200, adjust=False).mean()
#EMAs = np.vstack((EMA_20, EMA_50, EMA_200)).T
#norm_EMAs = minmax_scaler.fit_transform(EMAs.reshape(-1, 1)).reshape(-1, 3)
norm_features = np.hstack((part_features, norm_EMAs))
# merge data into 2d numpy array
Y = np.zeros(features.shape[0] - 1)
for i in range(Y.size):
if daily_returns[i+1] >= 0:
Y[i] = 1
else:
Y[i] = 0
# per quando su usano ma fino a 200
#Y = Y[49:]
#Y = Y[199:]
print(norm_features.shape, Y.shape)
if time_window > 1:
norm_features = enlarge_lag(norm_features, time_window)
Y = Y[time_window-1:]
train_size = int(norm_features.shape[0] * 0.8)
X_train = norm_features[:train_size, ]
Y_train = Y[:train_size]
X_test = norm_features[train_size:-1, ]
Y_test = Y[train_size:]
# Iterations vs Accuracy plot
#plt.figure()
#plt.plot(np.arange(0, len(acc_array)) * 100, acc_array)
#plt.xlabel("Iterations")
#plt.ylabel("Accuracy")
#
## Iterations vs Loss plot
#plt.figure()
#plt.plot(np.arange(0, len(acc_array)) * 100, losses)
#plt.xlabel("Iterations")
#plt.ylabel("Losses")
#
#plt.show()
#lets try sklearn
from sklearn.neural_network import MLPClassifier
#classifier = LogisticRegression(random_state=0, solver="saga").fit(X_train, Y_train)
clf = MLPClassifier(hidden_layer_sizes=(20,10,5,2), max_iter=30000, verbose=True).fit(X_train, Y_train)
train_score = clf.score(X_train, Y_train)
score = clf.score(X_test, Y_test)
print("sklearn score, all default: ", score, " train ", train_score)
with open("plots/data/MLP_20_10_5_2.csv", "a") as f:
f.write(f"{time_window};{train_score};{score};\n")