StockPrediction/MultiLayer_Perceptron.py

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

import yfinance as yf
from datetime import datetime
import os, sys

from sklearn import preprocessing

#bodacious colors
colors=sns.color_palette("rocket", 8)
#Ram's colors, if desired
seshadri = ['#c3121e', '#0348a1', '#ffb01c', '#027608', '#0193b0', '#9c5300', '#949c01', '#7104b5']
#            0sangre,   1neptune,  2pumpkin,  3clover,   4denim,    5cocoa,    6cumin,    7berry

train_quota = 0.8

def enlarge_lag(to_enlarge, time_window=1):
    # to_enlarge is the data already present, should be a numpy array
    enlarged = []
    for i in range(to_enlarge.shape[0] - time_window + 1):
        new_element = []
        for j in range(time_window):
            new_element.extend(to_enlarge[i + time_window - 1 - j, :])
        enlarged.append(new_element)

    return np.array(enlarged)


if len(sys.argv) > 1:
    time_window = int(sys.argv[1])
else:
    time_window = 1

#time_window = 10

stock_data = pd.read_pickle("data/MSFT_data.pkl")

daily_returns = ((stock_data["Close"] - stock_data["Open"]) / stock_data["Open"]).to_numpy()
prices = stock_data[["Open", "High", "Low", "Close"]].to_numpy()
volume = stock_data["Volume"].to_numpy()

minmax_scaler = preprocessing.MinMaxScaler()
std_scaler = preprocessing.StandardScaler()

features = np.vstack((daily_returns, volume)).T

# Necessary for MAs
part_features = std_scaler.fit_transform(features)

# Aggiunta EMA
EMA_20 = stock_data["Close"].ewm(span=20, adjust=False).mean()
EMA_50 = stock_data["Close"].ewm(span=50, adjust=False).mean()
EMAs = np.vstack((EMA_20, EMA_50)).T
norm_EMAs = minmax_scaler.fit_transform(EMAs.reshape(-1, 1)).reshape(-1, 2)

#EMA_200 = stock_data["Close"].ewm(span=200, adjust=False).mean()
#EMAs = np.vstack((EMA_20, EMA_50, EMA_200)).T
#norm_EMAs = minmax_scaler.fit_transform(EMAs.reshape(-1, 1)).reshape(-1, 3)
norm_features = np.hstack((part_features, norm_EMAs))


# merge data into 2d numpy array
Y = np.zeros(features.shape[0] - 1)


for i in range(Y.size):
    if daily_returns[i+1] >= 0:
        Y[i] = 1
    else:
        Y[i] = 0

# per quando su usano ma fino a 200
#Y = Y[49:]
#Y = Y[199:]

print(norm_features.shape, Y.shape)

if time_window > 1:
    norm_features = enlarge_lag(norm_features, time_window)
    Y = Y[time_window-1:]

train_size = int(norm_features.shape[0] * 0.8)
X_train = norm_features[:train_size, ]
Y_train = Y[:train_size]

X_test = norm_features[train_size:-1, ]
Y_test = Y[train_size:]


# Iterations vs Accuracy plot
#plt.figure()
#plt.plot(np.arange(0, len(acc_array)) * 100, acc_array)
#plt.xlabel("Iterations")
#plt.ylabel("Accuracy")
#
## Iterations vs Loss plot
#plt.figure()
#plt.plot(np.arange(0, len(acc_array)) * 100, losses)
#plt.xlabel("Iterations")
#plt.ylabel("Losses")
#
#plt.show()


#lets try sklearn
from sklearn.neural_network import MLPClassifier
#classifier = LogisticRegression(random_state=0, solver="saga").fit(X_train, Y_train)
clf = MLPClassifier(hidden_layer_sizes=(20,10,5,2), max_iter=30000, verbose=True).fit(X_train, Y_train)
train_score = clf.score(X_train, Y_train)
score = clf.score(X_test, Y_test)
print("sklearn score, all default: ", score, " train ", train_score)

with open("plots/data/MLP_20_10_5_2.csv", "a") as f:
    f.write(f"{time_window};{train_score};{score};\n")
Final Commit 2024-05-11 21:10:37 +00:00			`import numpy as np`
			`import matplotlib.pyplot as plt`
			`import pandas as pd`
			`import seaborn as sns`

			`import yfinance as yf`
			`from datetime import datetime`
			`import os, sys`

			`from sklearn import preprocessing`

			`#bodacious colors`
			`colors=sns.color_palette("rocket", 8)`
			`#Ram's colors, if desired`
			`seshadri = ['#c3121e', '#0348a1', '#ffb01c', '#027608', '#0193b0', '#9c5300', '#949c01', '#7104b5']`
			`# 0sangre, 1neptune, 2pumpkin, 3clover, 4denim, 5cocoa, 6cumin, 7berry`

			`train_quota = 0.8`

			`def enlarge_lag(to_enlarge, time_window=1):`
			`# to_enlarge is the data already present, should be a numpy array`
			`enlarged = []`
			`for i in range(to_enlarge.shape[0] - time_window + 1):`
			`new_element = []`
			`for j in range(time_window):`
			`new_element.extend(to_enlarge[i + time_window - 1 - j, :])`
			`enlarged.append(new_element)`

			`return np.array(enlarged)`


			`if len(sys.argv) > 1:`
			`time_window = int(sys.argv[1])`
			`else:`
			`time_window = 1`

			`#time_window = 10`

			`stock_data = pd.read_pickle("data/MSFT_data.pkl")`

			`daily_returns = ((stock_data["Close"] - stock_data["Open"]) / stock_data["Open"]).to_numpy()`
			`prices = stock_data[["Open", "High", "Low", "Close"]].to_numpy()`
			`volume = stock_data["Volume"].to_numpy()`

			`minmax_scaler = preprocessing.MinMaxScaler()`
			`std_scaler = preprocessing.StandardScaler()`

			`features = np.vstack((daily_returns, volume)).T`

			`# Necessary for MAs`
			`part_features = std_scaler.fit_transform(features)`

			`# Aggiunta EMA`
			`EMA_20 = stock_data["Close"].ewm(span=20, adjust=False).mean()`
			`EMA_50 = stock_data["Close"].ewm(span=50, adjust=False).mean()`
			`EMAs = np.vstack((EMA_20, EMA_50)).T`
			`norm_EMAs = minmax_scaler.fit_transform(EMAs.reshape(-1, 1)).reshape(-1, 2)`

			`#EMA_200 = stock_data["Close"].ewm(span=200, adjust=False).mean()`
			`#EMAs = np.vstack((EMA_20, EMA_50, EMA_200)).T`
			`#norm_EMAs = minmax_scaler.fit_transform(EMAs.reshape(-1, 1)).reshape(-1, 3)`
			`norm_features = np.hstack((part_features, norm_EMAs))`


			`# merge data into 2d numpy array`
			`Y = np.zeros(features.shape[0] - 1)`


			`for i in range(Y.size):`
			`if daily_returns[i+1] >= 0:`
			`Y[i] = 1`
			`else:`
			`Y[i] = 0`

			`# per quando su usano ma fino a 200`
			`#Y = Y[49:]`
			`#Y = Y[199:]`

			`print(norm_features.shape, Y.shape)`

			`if time_window > 1:`
			`norm_features = enlarge_lag(norm_features, time_window)`
			`Y = Y[time_window-1:]`

			`train_size = int(norm_features.shape[0] * 0.8)`
			`X_train = norm_features[:train_size, ]`
			`Y_train = Y[:train_size]`

			`X_test = norm_features[train_size:-1, ]`
			`Y_test = Y[train_size:]`



			`# Iterations vs Accuracy plot`
			`#plt.figure()`
			`#plt.plot(np.arange(0, len(acc_array)) * 100, acc_array)`
			`#plt.xlabel("Iterations")`
			`#plt.ylabel("Accuracy")`
			`#`
			`## Iterations vs Loss plot`
			`#plt.figure()`
			`#plt.plot(np.arange(0, len(acc_array)) * 100, losses)`
			`#plt.xlabel("Iterations")`
			`#plt.ylabel("Losses")`
			`#`
			`#plt.show()`



			`#lets try sklearn`
			`from sklearn.neural_network import MLPClassifier`
			`#classifier = LogisticRegression(random_state=0, solver="saga").fit(X_train, Y_train)`
			`clf = MLPClassifier(hidden_layer_sizes=(20,10,5,2), max_iter=30000, verbose=True).fit(X_train, Y_train)`
			`train_score = clf.score(X_train, Y_train)`
			`score = clf.score(X_test, Y_test)`
			`print("sklearn score, all default: ", score, " train ", train_score)`

			`with open("plots/data/MLP_20_10_5_2.csv", "a") as f:`
			`f.write(f"{time_window};{train_score};{score};\n")`