Merge pull request 'dev' (#1) from dev into main

Reviewed-on: #1
2024-05-11 21:19:15 +00:00 · 2024-05-11 21:19:15 +00:00 · b0fe8c7a37
parent f57dadb96b 3c8eb64c1e
commit b0fe8c7a37
51 changed files with 26440 additions and 0 deletions
--- a/LSTM.py
+++ b/LSTM.py
@ -0,0 +1,222 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import pandas as pd
+import seaborn as sns
+
+import yfinance as yf
+from datetime import datetime
+import os, sys
+
+from sklearn import preprocessing
+
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import Dense, Dropout, LSTM
+from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
+
+#bodacious colors
+colors=sns.color_palette("rocket", 8)
+#Ram's colors, if desired
+seshadri = ['#c3121e', '#0348a1', '#ffb01c', '#027608', '#0193b0', '#9c5300', '#949c01', '#7104b5']
+#            0sangre,   1neptune,  2pumpkin,  3clover,   4denim,    5cocoa,    6cumin,    7berry
+
+def enlarge_lag(to_enlarge, time_window=1):
+    # to_enlarge is the data already present, should be a numpy array
+    enlarged = []
+    for i in range(to_enlarge.shape[0] - time_window + 1):
+        new_element = []
+        for j in range(time_window):
+            new_element.extend(to_enlarge[i + time_window - 1 - j, :])
+        enlarged.append(new_element)
+
+    return np.array(enlarged)
+
+#### Calculate the metrics RMSE and MAPE ####
+def calculate_rmse(y_true, y_pred):
+    """
+    Calculate the Root Mean Squared Error (RMSE)
+    """
+    rmse = np.sqrt(np.mean((y_true - y_pred) ** 2))
+    return rmse
+
+
+def calculate_mape(y_true, y_pred):
+    """
+    Calculate the Mean Absolute Percentage Error (MAPE) %
+    """
+    y_pred, y_true = np.array(y_pred), np.array(y_true)
+    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
+    return mape
+
+train_quota = 0.8
+
+if len(sys.argv) > 1:
+    time_window = int(sys.argv[1])
+else:
+    time_window = 1
+
+#time_window = 10
+
+stock_data = pd.read_pickle("data/MSFT_data.pkl")
+
+price = stock_data["Close"].to_numpy()
+volume = stock_data["Volume"].to_numpy()
+daily_returns = ((stock_data["Close"] - stock_data["Open"]) / stock_data["Open"]).to_numpy()
+
+minmax_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
+
+features = np.vstack((price, volume)).T
+
+# Necessary for MAs
+norm_features = minmax_scaler.fit_transform(price.reshape(-1, 1))
+
+
+# merge data into 2d numpy array
+Y = np.zeros(features.shape[0] - 1)
+
+
+for i in range(Y.size):
+    Y[i] = norm_features[i+1, 0]
+
+time_window = 20
+
+if time_window > 1:
+    norm_features = enlarge_lag(norm_features, time_window)
+    Y = Y[time_window-1:]
+
+print(norm_features.shape, Y.shape)
+
+train_size = int(norm_features.shape[0] * 0.8)
+X_train = norm_features[:train_size, ]
+Y_train = Y[:train_size]
+
+X_test = norm_features[train_size:-1, ]
+Y_test = Y[train_size:]
+
+def LSTM_model():
+    model = Sequential()
+
+    model.add(LSTM(units = 50, return_sequences=True, input_shape=(X_train.shape[1], 1)))
+    model.add(Dropout(0.2))
+
+    model.add(LSTM(units=50, return_sequences=True))
+    model.add(Dropout(0.2))
+
+    model.add(LSTM(units=50))
+    model.add(Dropout(0.2))
+
+    model.add(Dense(units=1))
+
+    return model
+
+model = LSTM_model()
+model.summary()
+model.compile(
+    optimizer="adam",
+    loss="mean_squared_error"
+)
+
+# Save weights only for best model
+checkpointer = ModelCheckpoint(
+    filepath = 'weights_best.hdf5', 
+    verbose = 2, 
+    save_best_only = True
+)
+
+if os.path.exists("./checkpoints/checkpoint"):
+    model.load_weights("./checkpoints/my_checkpoint")
+else:
+    model.fit(
+        X_train, 
+        Y_train, 
+        epochs=25, 
+        batch_size = 32,
+        callbacks = [checkpointer]
+    )
+    
+    model.save_weights("./checkpoints/my_checkpoint")
+
+prediction = model.predict(X_test)
+predicted_prices = minmax_scaler.inverse_transform(prediction).flatten()
+
+counter = 0
+
+#for i in range(prediction.shape[0]-1):
+#    if (prediction[i+1,] - prediction[i,] > 0 and predicted_prices[i+1,] - predicted_prices[i,] > 0) or (prediction[i+1,] - prediction[i,] < 0 and predicted_prices[i+1,] - predicted_prices[i,] < 0):
+#        counter = counter + 1
+
+#print("acc: ", counter/prediction.shape[0])
+
+
+
+test_prices = price[time_window - 1 + train_size:]
+
+
+pred_ret = []
+actual_ret = []
+for j in range(len(test_prices) - 1):
+    # il predicted price è il prezzo di domani, lo voglio confrontare con il ritorno effettivo di domani
+    pred_ret.append((predicted_prices[j] - test_prices[j])/test_prices[j])
+    actual_ret.append((test_prices[j+1] - test_prices[j])/test_prices[j])
+
+pred_ret_np = np.array(pred_ret)
+actual_ret_np = np.array(actual_ret)
+
+sign_comp = np.sum(np.sign(pred_ret_np) == np.sign(actual_ret_np))/len(pred_ret_np)
+sign_comp_red_nottoomuch = np.sum(np.sign(pred_ret_np[:200]) == np.sign(actual_ret_np[:200]))/len(pred_ret_np[:200])
+sign_comp_red = np.sum(np.sign(pred_ret_np[:100]) == np.sign(actual_ret_np[:100]))/len(pred_ret_np[:100])
+sign_comp_red_alot = np.sum(np.sign(pred_ret_np[:50]) == np.sign(actual_ret_np[:50]))/len(pred_ret_np[:50])
+
+
+print(sign_comp)
+print(sign_comp_red_nottoomuch)
+print(sign_comp_red)
+print(sign_comp_red_alot)
+
+rmse = calculate_rmse(test_prices[1:], predicted_prices)
+mape = calculate_mape(test_prices[1:], predicted_prices)
+
+print("RMSE: ", rmse)
+print("MAPE: ", mape)
+
+rmse = calculate_rmse(test_prices[1:301], predicted_prices[:300])
+mape = calculate_mape(test_prices[1:301], predicted_prices[:300])
+
+print("RMSE su 300 gg: ", rmse)
+print("MAPE su 300 gg: ", mape)
+
+#plt.plot(pred_ret, color=seshadri[0])
+#plt.plot(daily_returns[1:], color=seshadri[1])
+
+fig = plt.figure(1, figsize=(12,10))
+plt.plot(test_prices, color=seshadri[0], label="Registered Closing Price")
+plt.plot(predicted_prices, color=seshadri[1], label="Prediction")
+
+#plot params
+plt.xlim([0,1200])
+plt.ylim([100,400])
+plt.minorticks_on()
+plt.tick_params(labelsize=14)
+plt.tick_params(labelbottom=True, labeltop=False, labelright=False, labelleft=True)
+#xticks = np.arange(0, 1e4,10)
+#yticks = np.arange(0,16.1,4)
+
+plt.tick_params(direction='in',which='minor', length=5, bottom=True, top=True, left=True, right=True)
+plt.tick_params(direction='in',which='major', length=10, bottom=True, top=True, left=True, right=True)
+#plt.xticks(xticks)
+#plt.yticks(yticks)
+
+
+#plt.text(1,325, f'y={Decimal(coefs[3]):.4f}x$^3$+{Decimal(coefs[2]):.2f}x$^2$+{Decimal(coefs[1]):.2f}x+{Decimal(coefs[0]):.1f}',fontsize =13)
+
+
+plt.xlabel(r'Days (from last training)', fontsize=14) 
+plt.ylabel(r'Price (USD)',fontsize=14)  # label the y axis
+
+
+plt.legend(fontsize=14, loc="upper right", bbox_to_anchor=(0.99, 0.99))  # add the legend (will default to 'best' location)
+
+plt.savefig("plots/First_Attempt_LSTM_2.png", dpi=300)
+
+plt.show()
+#with open("plots/data/MLP_20_10_5_2.csv", "a") as f:
+#    f.write(f"{time_window};{train_score};{score};\n")
--- a/LSTM_advanced.py
+++ b/LSTM_advanced.py
@ -0,0 +1,238 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import pandas as pd
+import seaborn as sns
+
+import yfinance as yf
+from datetime import datetime
+import os, sys
+
+from sklearn import preprocessing
+
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import Dense, Dropout, LSTM, Input
+from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
+
+#bodacious colors
+colors=sns.color_palette("rocket", 8)
+#Ram's colors, if desired
+seshadri = ['#c3121e', '#0348a1', '#ffb01c', '#027608', '#0193b0', '#9c5300', '#949c01', '#7104b5']
+#            0sangre,   1neptune,  2pumpkin,  3clover,   4denim,    5cocoa,    6cumin,    7berry
+
+np.set_printoptions(threshold=100)
+
+def enlarge_lag(to_enlarge, time_window=1):
+    # to_enlarge is the data already present, should be a numpy array
+    enlarged = []
+    for i in range(to_enlarge.shape[0] - time_window + 1):
+        new_element = []
+        for j in range(time_window):
+            new_element.extend(to_enlarge[i + time_window - 1 - j, :])
+        enlarged.append(new_element)
+
+    return np.array(enlarged)
+
+#### Calculate the metrics RMSE and MAPE ####
+def calculate_rmse(y_true, y_pred):
+    """
+    Calculate the Root Mean Squared Error (RMSE)
+    """
+    rmse = np.sqrt(np.mean((y_true - y_pred) ** 2))
+    return rmse
+
+
+def calculate_mape(y_true, y_pred):
+    """
+    Calculate the Mean Absolute Percentage Error (MAPE) %
+    """
+    y_pred, y_true = np.array(y_pred), np.array(y_true)
+    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
+    return mape
+
+train_quota = 0.8
+
+if len(sys.argv) > 1:
+    time_window = int(sys.argv[1])
+else:
+    time_window = 1
+
+#time_window = 10
+
+stock_data = pd.read_pickle("data/MSFT_data.pkl")
+
+price = stock_data["Close"].to_numpy()
+volume = stock_data["Volume"].to_numpy()
+daily_returns = ((stock_data["Close"] - stock_data["Open"]) / stock_data["Open"]).to_numpy()
+
+minmax_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
+sec_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
+
+#features = np.vstack((price, volume)).T
+
+# Necessary for MAs
+#norm_features = np.hstack((minmax_scaler.fit_transform(price.reshape(-1, 1)), sec_scaler.fit_transform(volume.reshape(-1, 1))))
+norm_features = minmax_scaler.fit_transform(price.reshape(-1, 1))
+
+rets = np.diff(price)
+bin_rets = np.zeros(len(rets))
+for i, r in enumerate(rets):
+    if r >= 0:
+        bin_rets[i] = 1
+    else:
+        bin_rets[i] = 0
+
+bin_rets_np = np.array(bin_rets)
+
+
+#norm_rets = sec_scaler.fit_transform(rets.reshape(-1, 1))
+
+print("occai")
+
+print(rets)
+print(bin_rets)
+
+print("ocai")
+
+# merge data into 2d numpy array
+#Y = np.zeros(norm_features.shape[0] - 1)
+#for i in range(Y.size):
+#    Y[i] = norm_features[i+1, 0]
+
+Y = bin_rets
+
+time_window = 20
+
+if time_window > 1:
+    norm_features = enlarge_lag(norm_features, time_window)
+    Y = Y[time_window-1:]
+
+
+train_size = int(norm_features.shape[0] * 0.8)
+X_train = norm_features[:train_size, ]
+Y_train = Y[:train_size]
+
+X_test = norm_features[train_size:-1, ]
+Y_test = Y[train_size:]
+
+def LSTM_model():
+    model = Sequential()
+
+    model.add(LSTM(units = 20, return_sequences=True, input_shape=(X_train.shape[1], 1)))
+    model.add(Dropout(0.2))
+
+    #model.add(LSTM(units=50, return_sequences=True))
+    #model.add(Dropout(0.2))
+
+    model.add(LSTM(units=20))
+    model.add(Dropout(0.2))
+
+    model.add(Dense(units=5))
+    model.add(Dropout(0.3))
+
+    model.add(Dense(units=1, activation="sigmoid"))
+
+    return model
+
+model = LSTM_model()
+model.summary()
+model.compile(
+    optimizer="adam",
+    loss="mean_squared_error"
+)
+
+#if os.path.exists("./checkpoints/checkpoint"):
+#    model.load_weights("./checkpoints/my_checkpoint")
+#else:
+model.fit(
+    X_train, 
+    Y_train,
+    shuffle=True,
+    epochs=20,
+    batch_size=20
+)
+    
+    #model.save_weights("./checkpoints/my_checkpoint")
+
+prediction = model.predict(X_test)
+print(prediction)
+print(model.evaluate(X_test, Y_test))
+#predicted_prices = minmax_scaler.inverse_transform(prediction).flatten()
+#predicted_rets = sec_scaler.inverse_transform(prediction).flatten()
+#print(predicted_rets)
+#counter = 0
+#for i in range(prediction.shape[0]-1):
+#    if (prediction[i+1,] - prediction[i,] > 0 and predicted_prices[i+1,] - predicted_prices[i,] > 0) or (prediction[i+1,] - prediction[i,] < 0 and predicted_prices[i+1,] - predicted_prices[i,] < 0):
+#        counter = counter + 1
+
+#print("acc: ", counter/prediction.shape[0])
+
+
+
+#test_prices = price[time_window - 1 + train_size:]
+#pred_ret = []
+#actual_ret = []
+#for j in range(len(test_prices) - 1):
+#    # il predicted price è il prezzo di domani, lo voglio confrontare con il ritorno effettivo di domani
+#    pred_ret.append((predicted_prices[j] - test_prices[j])/test_prices[j])
+#    actual_ret.append((test_prices[j+1] - test_prices[j])/test_prices[j])
+#
+#pred_ret_np = np.array(pred_ret)
+#actual_ret_np = np.array(actual_ret)
+#
+#sign_comp = np.sum(np.sign(pred_ret_np) == np.sign(actual_ret_np))/len(pred_ret_np)
+#sign_comp_red_nottoomuch = np.sum(np.sign(pred_ret_np[:200]) == np.sign(actual_ret_np[:200]))/len(pred_ret_np[:200])
+#sign_comp_red = np.sum(np.sign(pred_ret_np[:100]) == np.sign(actual_ret_np[:100]))/len(pred_ret_np[:100])
+#sign_comp_red_alot = np.sum(np.sign(pred_ret_np[:50]) == np.sign(actual_ret_np[:50]))/len(pred_ret_np[:50])
+#print(sign_comp)
+#print(sign_comp_red_nottoomuch)
+#print(sign_comp_red)
+#print(sign_comp_red_alot)
+
+#rmse = calculate_rmse(test_prices[1:], predicted_prices)
+#mape = calculate_mape(test_prices[1:], predicted_prices)
+#
+#print("RMSE: ", rmse)
+#print("MAPE: ", mape)
+#
+#rmse = calculate_rmse(test_prices[1:301], predicted_prices[:300])
+#mape = calculate_mape(test_prices[1:301], predicted_prices[:300])
+#
+#print("RMSE su 300 gg: ", rmse)
+#print("MAPE su 300 gg: ", mape)
+
+#plt.plot(pred_ret, color=seshadri[0])
+#plt.plot(daily_returns[1:], color=seshadri[1])
+
+fig = plt.figure(1, figsize=(12,10))
+plt.plot(Y_test, color=seshadri[0], label="Registered Closing Price")
+plt.plot(prediction, color=seshadri[1], label="Prediction")
+
+#plot params
+#plt.xlim([0,450])
+#plt.ylim([-0.5,16])
+plt.minorticks_on()
+plt.tick_params(labelsize=14)
+plt.tick_params(labelbottom=True, labeltop=False, labelright=False, labelleft=True)
+#xticks = np.arange(0, 1e4,10)
+#yticks = np.arange(0,16.1,4)
+
+plt.tick_params(direction='in',which='minor', length=5, bottom=True, top=True, left=True, right=True)
+plt.tick_params(direction='in',which='major', length=10, bottom=True, top=True, left=True, right=True)
+#plt.xticks(xticks)
+#plt.yticks(yticks)
+
+
+#plt.text(1,325, f'y={Decimal(coefs[3]):.4f}x$^3$+{Decimal(coefs[2]):.2f}x$^2$+{Decimal(coefs[1]):.2f}x+{Decimal(coefs[0]):.1f}',fontsize =13)
+
+
+plt.xlabel(r'Days (from last training)', fontsize=14) 
+plt.ylabel(r'Price (USD)',fontsize=14)  # label the y axis
+
+
+plt.legend(fontsize=14, loc="upper right", bbox_to_anchor=(0.99, 0.99))  # add the legend (will default to 'best' location)
+
+plt.savefig("plots/LSTM_advanced_rets_1.png", dpi=300)
+
+plt.show()
+#with open("plots/data/MLP_20_10_5_2.csv", "a") as f:
+#    f.write(f"{time_window};{train_score};{score};\n")
--- a/LSTM_advanced_returns.py
+++ b/LSTM_advanced_returns.py
@ -0,0 +1,230 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import pandas as pd
+import seaborn as sns
+
+import yfinance as yf
+from datetime import datetime
+import os, sys
+
+from sklearn import preprocessing
+
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import Dense, Dropout, LSTM, Input
+from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
+
+#bodacious colors
+colors=sns.color_palette("rocket", 8)
+#Ram's colors, if desired
+seshadri = ['#c3121e', '#0348a1', '#ffb01c', '#027608', '#0193b0', '#9c5300', '#949c01', '#7104b5']
+#            0sangre,   1neptune,  2pumpkin,  3clover,   4denim,    5cocoa,    6cumin,    7berry
+
+np.set_printoptions(threshold=1000000)
+
+def enlarge_lag(to_enlarge, time_window=1):
+    # to_enlarge is the data already present, should be a numpy array
+    enlarged = []
+    for i in range(to_enlarge.shape[0] - time_window + 1):
+        new_element = []
+        for j in range(time_window):
+            new_element.extend(to_enlarge[i + time_window - 1 - j, :])
+        enlarged.append(new_element)
+
+    return np.array(enlarged)
+
+
+train_quota = 0.8
+
+if len(sys.argv) > 1:
+    time_window = int(sys.argv[1])
+else:
+    time_window = 1
+
+
+stock_data = pd.read_pickle("data/MSFT_data.pkl")
+
+price = stock_data["Close"].to_numpy()
+volume = stock_data["Volume"].to_numpy()
+
+#minmax_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
+minmax_scaler = preprocessing.StandardScaler()
+sec_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
+
+#EMA_20 = stock_data["Close"].ewm(span=20, adjust=False).mean()
+#EMA_50 = stock_data["Close"].ewm(span=50, adjust=False).mean()
+#EMAs = np.vstack((EMA_20, EMA_50)).T
+#norm_EMAs = minmax_scaler.fit_transform(EMAs.reshape(-1, 1)).reshape(-1, 2)
+
+#EMA_200 = stock_data["Close"].ewm(span=200, adjust=False).mean()
+#EMAs = np.vstack((EMA_20, EMA_50, EMA_200)).T
+#norm_EMAs = minmax_scaler.fit_transform(EMAs.reshape(-1, 1)).reshape(-1, 3)
+
+# Necessary for MAs
+#norm_features = np.hstack((minmax_scaler.fit_transform(price.reshape(-1, 1)), sec_scaler.fit_transform(volume.reshape(-1, 1))))
+norm_features = minmax_scaler.fit_transform(np.vstack((price, volume)).T)
+#norm_features = np.hstack((norm_features, norm_EMAs))
+
+rets = np.diff(price)
+bin_rets = np.zeros(len(rets))
+for i, r in enumerate(rets):
+    if r >= 0:
+        bin_rets[i] = 1
+    else:
+        bin_rets[i] = 0
+
+bin_rets_np = np.array(bin_rets)
+
+
+#norm_rets = sec_scaler.fit_transform(rets.reshape(-1, 1))
+
+print("occai")
+
+print(rets)
+print(bin_rets)
+
+print("ocai")
+
+# merge data into 2d numpy array
+#Y = np.zeros(norm_features.shape[0] - 1)
+#for i in range(Y.size):
+#    Y[i] = norm_features[i+1, 0]
+
+Y = bin_rets
+
+time_window = 3
+
+if time_window > 1:
+    norm_features = enlarge_lag(norm_features, time_window)
+    Y = Y[time_window-1:]
+
+
+train_size = int(norm_features.shape[0] * 0.8)
+X_train = norm_features[:train_size, ]
+Y_train = Y[:train_size].reshape(-1, 1)
+
+X_test = norm_features[train_size:-1, ]
+Y_test = Y[train_size:].reshape(-1, 1)
+
+def LSTM_model():
+    model = Sequential()
+
+    model.add(LSTM(units = 20, input_shape=(X_train.shape[1], 1)))
+    #model.add(Dense(units = 20, activation="relu", input_shape=(X_train.shape[1],)))
+    #model.add(Dropout(0.3))
+
+    #model.add(LSTM(units=50, return_sequences=True))
+    #model.add(Dropout(0.2))
+
+
+    model.add(Dense(units=10, activation="relu"))
+
+    model.add(Dense(units=5, activation="relu"))
+
+    model.add(Dense(units=1, activation="sigmoid"))
+
+    return model
+
+model = LSTM_model()
+model.summary()
+model.compile(
+    optimizer="adam",
+    loss="binary_crossentropy",
+    metrics=['accuracy']
+)
+
+#if os.path.exists("./checkpoints/checkpoint"):
+#    model.load_weights("./checkpoints/my_checkpoint")
+#else:
+model.fit(
+    X_train, 
+    Y_train,
+    shuffle=True,
+    epochs=50,
+    batch_size=32
+)
+    
+    #model.save_weights("./checkpoints/my_checkpoint")
+
+prediction = model.predict(X_test).flatten()
+print("pred: ", prediction)
+print(model.evaluate(X_test, Y_test))
+#predicted_prices = minmax_scaler.inverse_transform(prediction).flatten()
+#predicted_rets = sec_scaler.inverse_transform(prediction).flatten()
+#print(predicted_rets)
+#counter = 0
+#for i in range(prediction.shape[0]-1):
+#    if (prediction[i+1,] - prediction[i,] > 0 and predicted_prices[i+1,] - predicted_prices[i,] > 0) or (prediction[i+1,] - prediction[i,] < 0 and predicted_prices[i+1,] - predicted_prices[i,] < 0):
+#        counter = counter + 1
+
+#print("acc: ", counter/prediction.shape[0])
+
+
+
+#test_prices = price[time_window - 1 + train_size:]
+#pred_ret = []
+#actual_ret = []
+#for j in range(len(test_prices) - 1):
+#    # il predicted price è il prezzo di domani, lo voglio confrontare con il ritorno effettivo di domani
+#    pred_ret.append((predicted_prices[j] - test_prices[j])/test_prices[j])
+#    actual_ret.append((test_prices[j+1] - test_prices[j])/test_prices[j])
+#
+#pred_ret_np = np.array(pred_ret)
+#actual_ret_np = np.array(actual_ret)
+#
+#sign_comp = np.sum(np.sign(pred_ret_np) == np.sign(actual_ret_np))/len(pred_ret_np)
+#sign_comp_red_nottoomuch = np.sum(np.sign(pred_ret_np[:200]) == np.sign(actual_ret_np[:200]))/len(pred_ret_np[:200])
+#sign_comp_red = np.sum(np.sign(pred_ret_np[:100]) == np.sign(actual_ret_np[:100]))/len(pred_ret_np[:100])
+#sign_comp_red_alot = np.sum(np.sign(pred_ret_np[:50]) == np.sign(actual_ret_np[:50]))/len(pred_ret_np[:50])
+#print(sign_comp)
+#print(sign_comp_red_nottoomuch)
+#print(sign_comp_red)
+#print(sign_comp_red_alot)
+
+#rmse = calculate_rmse(test_prices[1:], predicted_prices)
+#mape = calculate_mape(test_prices[1:], predicted_prices)
+#
+#print("RMSE: ", rmse)
+#print("MAPE: ", mape)
+#
+#rmse = calculate_rmse(test_prices[1:301], predicted_prices[:300])
+#mape = calculate_mape(test_prices[1:301], predicted_prices[:300])
+#
+#print("RMSE su 300 gg: ", rmse)
+#print("MAPE su 300 gg: ", mape)
+
+#plt.plot(pred_ret, color=seshadri[0])
+#plt.plot(daily_returns[1:], color=seshadri[1])
+
+fig = plt.figure(1, figsize=(12,10))
+plt.plot(Y_test, color=seshadri[0], label="Registered Closing Price")
+plt.plot(prediction, color=seshadri[1], label="Prediction")
+
+#plot params
+#plt.xlim([0,450])
+#plt.ylim([-0.5,16])
+plt.minorticks_on()
+plt.tick_params(labelsize=14)
+plt.tick_params(labelbottom=True, labeltop=False, labelright=False, labelleft=True)
+#xticks = np.arange(0, 1e4,10)
+#yticks = np.arange(0,16.1,4)
+
+plt.tick_params(direction='in',which='minor', length=5, bottom=True, top=True, left=True, right=True)
+plt.tick_params(direction='in',which='major', length=10, bottom=True, top=True, left=True, right=True)
+#plt.xticks(xticks)
+#plt.yticks(yticks)
+
+
+#plt.text(1,325, f'y={Decimal(coefs[3]):.4f}x$^3$+{Decimal(coefs[2]):.2f}x$^2$+{Decimal(coefs[1]):.2f}x+{Decimal(coefs[0]):.1f}',fontsize =13)
+
+
+plt.xlabel(r'Days (from last training)', fontsize=14) 
+plt.ylabel(r'Price (USD)',fontsize=14)  # label the y axis
+
+
+plt.legend(fontsize=14, loc="upper right", bbox_to_anchor=(0.99, 0.99))  # add the legend (will default to 'best' location)
+
+plt.savefig("plots/LSTM_advanced_rets_1.png", dpi=300)
+
+plt.show()
+#with open("plots/data/MLP_20_10_5_2.csv", "a") as f:
+#    f.write(f"{time_window};{train_score};{score};\n")
--- a/MultiLayer_Perceptron.py
+++ b/MultiLayer_Perceptron.py
@ -0,0 +1,119 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import pandas as pd
+import seaborn as sns
+
+import yfinance as yf
+from datetime import datetime
+import os, sys
+
+from sklearn import preprocessing
+
+#bodacious colors
+colors=sns.color_palette("rocket", 8)
+#Ram's colors, if desired
+seshadri = ['#c3121e', '#0348a1', '#ffb01c', '#027608', '#0193b0', '#9c5300', '#949c01', '#7104b5']
+#            0sangre,   1neptune,  2pumpkin,  3clover,   4denim,    5cocoa,    6cumin,    7berry
+
+train_quota = 0.8
+
+def enlarge_lag(to_enlarge, time_window=1):
+    # to_enlarge is the data already present, should be a numpy array
+    enlarged = []
+    for i in range(to_enlarge.shape[0] - time_window + 1):
+        new_element = []
+        for j in range(time_window):
+            new_element.extend(to_enlarge[i + time_window - 1 - j, :])
+        enlarged.append(new_element)
+
+    return np.array(enlarged)
+
+
+if len(sys.argv) > 1:
+    time_window = int(sys.argv[1])
+else:
+    time_window = 1
+
+#time_window = 10
+
+stock_data = pd.read_pickle("data/MSFT_data.pkl")
+
+daily_returns = ((stock_data["Close"] - stock_data["Open"]) / stock_data["Open"]).to_numpy()
+prices = stock_data[["Open", "High", "Low", "Close"]].to_numpy()
+volume = stock_data["Volume"].to_numpy()
+
+minmax_scaler = preprocessing.MinMaxScaler()
+std_scaler = preprocessing.StandardScaler()
+
+features = np.vstack((daily_returns, volume)).T
+
+# Necessary for MAs
+part_features = std_scaler.fit_transform(features)
+
+# Aggiunta EMA
+EMA_20 = stock_data["Close"].ewm(span=20, adjust=False).mean()
+EMA_50 = stock_data["Close"].ewm(span=50, adjust=False).mean()
+EMAs = np.vstack((EMA_20, EMA_50)).T
+norm_EMAs = minmax_scaler.fit_transform(EMAs.reshape(-1, 1)).reshape(-1, 2)
+
+#EMA_200 = stock_data["Close"].ewm(span=200, adjust=False).mean()
+#EMAs = np.vstack((EMA_20, EMA_50, EMA_200)).T
+#norm_EMAs = minmax_scaler.fit_transform(EMAs.reshape(-1, 1)).reshape(-1, 3)
+norm_features = np.hstack((part_features, norm_EMAs))
+
+
+# merge data into 2d numpy array
+Y = np.zeros(features.shape[0] - 1)
+
+
+for i in range(Y.size):
+    if daily_returns[i+1] >= 0:
+        Y[i] = 1
+    else:
+        Y[i] = 0
+
+# per quando su usano ma fino a 200
+#Y = Y[49:]
+#Y = Y[199:]
+
+print(norm_features.shape, Y.shape)
+
+if time_window > 1:
+    norm_features = enlarge_lag(norm_features, time_window)
+    Y = Y[time_window-1:]
+
+train_size = int(norm_features.shape[0] * 0.8)
+X_train = norm_features[:train_size, ]
+Y_train = Y[:train_size]
+
+X_test = norm_features[train_size:-1, ]
+Y_test = Y[train_size:]
+
+
+
+# Iterations vs Accuracy plot
+#plt.figure()
+#plt.plot(np.arange(0, len(acc_array)) * 100, acc_array)
+#plt.xlabel("Iterations")
+#plt.ylabel("Accuracy")
+#
+## Iterations vs Loss plot
+#plt.figure()
+#plt.plot(np.arange(0, len(acc_array)) * 100, losses)
+#plt.xlabel("Iterations")
+#plt.ylabel("Losses")
+#
+#plt.show()
+
+
+
+#lets try sklearn
+from sklearn.neural_network import MLPClassifier
+#classifier = LogisticRegression(random_state=0, solver="saga").fit(X_train, Y_train)
+clf = MLPClassifier(hidden_layer_sizes=(20,10,5,2), max_iter=30000, verbose=True).fit(X_train, Y_train)
+train_score = clf.score(X_train, Y_train)
+score = clf.score(X_test, Y_test)
+print("sklearn score, all default: ", score, " train ", train_score)
+
+with open("plots/data/MLP_20_10_5_2.csv", "a") as f:
+    f.write(f"{time_window};{train_score};{score};\n")
--- a/README.md
+++ b/README.md
@ -0,0 +1,7 @@
+# Stock Price Prediction
+
+This is a simple project of a stock price prediction tool that tests various configurations.
+
+The final goal is to study performance improvement changing the technology and the methods used.
+
+Final try
--- a/appunti.md
+++ b/appunti.md
@ -0,0 +1,145 @@
+# Appunti sullo sviluppo del progetto
+
+Il titolo di riferimento per la prima parte di studio è Microsoft. Perché:
+* molto capitalizzato
+* longevo
+* è un titolo tech ma non subisce dinamiche troppo "strane" rispetto al normale andamento di mercato (es. Tesla)
+
+Per prima cosa si testa la performance di un modello non trainato, semplice, che prova a predire il segno del ritorno del giorno successivo sulla base del ritorno del giorno precedente (+ segue + e - segue -).
+
+Si riporta anche un piccolo grafico a barre per avere un'idea della distribuzione dei ritorni.
+
+winrate detected: 0.47638123852445335
+
+Primi test con logistic regression, aggiungendo come features giorni passati: lieve increase, troppi giorni porta a overfitting
+
+provare a effettuare lo stesso test ma aggiungendo qualche metrica (es. moving average) -> C'è un lieve miglioramento
+
+Nell'MLP, nei nomi dei file di dati, i numeri sono la dimensione degli hidden layer, in ordine di profondità
+Primo test semplice semplice, architettura di seguito:
+Model: "sequential"
+_________________________________________________________________
+ Layer (type)                Output Shape              Param #   
+=================================================================
+ lstm (LSTM)                 (None, 20, 50)            10400     
+                                                                 
+ dropout (Dropout)           (None, 20, 50)            0         
+                                                                 
+ lstm_1 (LSTM)               (None, 20, 50)            20200     
+                                                                 
+ dropout_1 (Dropout)         (None, 20, 50)            0         
+                                                                 
+ lstm_2 (LSTM)               (None, 50)                20200     
+                                                                 
+ dropout_2 (Dropout)         (None, 50)                0         
+                                                                 
+ dense (Dense)               (None, 1)                 51        
+                                                                 
+=================================================================
+Total params: 50,851
+Trainable params: 50,851
+Non-trainable params: 0
+semplici 25 epoche e split 0.8 / 0.2
+il plot che si ottiene è quello
+
+con dati (win rate sui ritorni):
+tutto il testing set (): 0.4991624790619765
+primi 200 giorni: 0.605
+primi 100 giorni: 0.58
+primi 50 giorni: 0.66
+
+su tutto ho 
+RMSE:  76.4 (dollari ?)
+MAPE:  21.8 %
+
+su 300 giorni:
+RMSE su 300 gg:  6.4 $
+MAPE su 300 gg:  2.9 %
+
+In presentazione metto prima grafico con mape e rmse su tutto facendo considerazioni, poi mi allargo con mape e rmse specifiche + winrate su meno giorni
+
+
+nel firts advanced l'architettura è:
+Model: "sequential"
+_________________________________________________________________
+ Layer (type)                Output Shape              Param #   
+=================================================================
+ lstm (LSTM)                 (None, 10, 10)            480       
+                                                                 
+ dropout (Dropout)           (None, 10, 10)            0         
+                                                                 
+ lstm_1 (LSTM)               (None, 10)                840       
+                                                                 
+ dropout_1 (Dropout)         (None, 10)                0         
+                                                                 
+ dense (Dense)               (None, 5)                 55        
+                                                                 
+ dropout_2 (Dropout)         (None, 5)                 0         
+                                                                 
+ dense_1 (Dense)             (None, 1)                 6         
+                                                                 
+=================================================================
+Total params: 1,381
+Trainable params: 1,381
+Non-trainable params: 0
+_________________________________________________________________
+
+
+LTSM advanced 2: training data ridotta a 2000 giorni, arch:
+Model: "sequential"
+_________________________________________________________________
+ Layer (type)                Output Shape              Param #   
+=================================================================
+ lstm (LSTM)                 (None, 10, 20)            1760      
+                                                                 
+ dropout (Dropout)           (None, 10, 20)            0         
+                                                                 
+ lstm_1 (LSTM)               (None, 20)                3280      
+                                                                 
+ dropout_1 (Dropout)         (None, 20)                0         
+                                                                 
+ dense (Dense)               (None, 5)                 105       
+                                                                 
+ dropout_2 (Dropout)         (None, 5)                 0         
+                                                                 
+ dense_1 (Dense)             (None, 1)                 6         
+                                                                 
+=================================================================
+Total params: 5,151
+Trainable params: 5,151
+Non-trainable params: 0
+
+risultati
+RMSE:  10.799429328578809
+MAPE:  3.1894335488381116
+RMSE su 300 gg:  11.607057105021592
+MAPE su 300 gg:  3.591834377775106
+
+training di 50 epoche
+ma win rate sul ritorno del giorno dopo sempre ~0.5
+
+
+Il 3 ha un'architettura molto semplificata e tiene una timewindow di soli 5 gg:
+Model: "sequential"
+_________________________________________________________________
+ Layer (type)                Output Shape              Param #   
+=================================================================
+ lstm (LSTM)                 (None, 10)                480       
+                                                                 
+ dropout (Dropout)           (None, 10)                0         
+                                                                 
+ dense (Dense)               (None, 1)                 11        
+                                                                 
+=================================================================
+Total params: 491
+Trainable params: 491
+Non-trainable params: 0
+_________________________________________________________________
+RMSE:  12.955399161548117
+MAPE:  3.7480157718302904
+RMSE su 300 gg:  11.019121338505466
+MAPE su 300 gg:  3.3382726092879706
+
+non si guadagna molto in winrate
+
+semilog histo
--- a/autocorr_plot.py
+++ b/autocorr_plot.py
@ -0,0 +1,134 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import pandas as pd
+import seaborn as sns
+
+import yfinance as yf
+from datetime import datetime
+import os, sys
+
+from sklearn import preprocessing
+
+#bodacious colors
+colors=sns.color_palette("rocket", 8)
+#Ram's colors, if desired
+seshadri = ['#c3121e', '#0348a1', '#ffb01c', '#027608', '#0193b0', '#9c5300', '#949c01', '#7104b5']
+#            0sangre,   1neptune,  2pumpkin,  3clover,   4denim,    5cocoa,    6cumin,    7berry
+
+stock_data = pd.read_pickle("data/MSFT_data.pkl")
+
+daily_returns = ((stock_data["Close"] - stock_data["Open"]) / stock_data["Open"]).to_numpy() * 100
+prices = stock_data[["Open", "High", "Low", "Close"]].to_numpy()
+volume = stock_data["Volume"].to_numpy()
+
+minmax_scaler = preprocessing.MinMaxScaler()
+std_scaler = preprocessing.StandardScaler()
+
+features = np.vstack((daily_returns, volume)).T
+
+# Scale volume data to obtain better results
+#minmax_scaler = preprocessing.MinMaxScaler()
+#norm_ret = std_scaler.fit_transform(daily_returns.reshape(-1,1)).flatten()
+#norm_vol = minmax_scaler.fit_transform(volume.reshape(-1,1)).flatten()
+#norm_features = np.vstack((norm_ret, norm_vol)).T
+
+# Solo volumi e ritorni
+#norm_features = std_scaler.fit_transform(features)
+
+# Aggiunta di prezzi
+#norm_prices = minmax_scaler.fit_transform(prices.reshape(-1, 1)).reshape(-1, 4)
+#norm_ret_and_vol = std_scaler.fit_transform(features)
+#norm_features = np.hstack((norm_ret_and_vol, norm_prices))
+
+# Necessary for MAs
+part_features = std_scaler.fit_transform(features)
+
+# Aggiunta SMA
+#SMA_20 = stock_data["Close"].rolling(20).mean().to_numpy()
+#SMA_50 = stock_data["Close"].rolling(50).mean().to_numpy()
+#SMA_200 = stock_data["Close"].rolling(200).mean().to_numpy()
+#SMAs = np.vstack((SMA_20, SMA_50)).T
+#norm_SMAs = minmax_scaler.fit_transform(SMAs[49:, ].reshape(-1, 1)).reshape(-1, 2)
+#norm_features = np.hstack((part_features[49:, ], norm_SMAs))
+
+#SMAs = np.vstack((SMA_20, SMA_50, SMA_200)).T
+#norm_SMAs = minmax_scaler.fit_transform(SMAs[199:, ].reshape(-1, 1)).reshape(-1, 3)
+#norm_features = np.hstack((part_features[199:, ], norm_SMAs))
+
+# Aggiunta EMA
+EMA_20 = stock_data["Close"].ewm(span=20, adjust=False).mean()
+EMA_50 = stock_data["Close"].ewm(span=50, adjust=False).mean()
+EMAs = np.vstack((EMA_20, EMA_50)).T
+norm_EMAs = minmax_scaler.fit_transform(EMAs.reshape(-1, 1)).reshape(-1, 2)
+
+#EMA_200 = stock_data["Close"].ewm(span=200, adjust=False).mean()
+#EMAs = np.vstack((EMA_20, EMA_50, EMA_200)).T
+#norm_EMAs = minmax_scaler.fit_transform(EMAs.reshape(-1, 1)).reshape(-1, 3)
+norm_features = np.hstack((part_features, norm_EMAs))
+
+dfeat = {"Daily Returns" : norm_features[:,0],
+"Volume" : norm_features[:,1],
+"EMA20" : norm_features[:,2],
+"EMA50" : norm_features[:,3] 
+}
+
+corr = pd.DataFrame(dfeat).corr()
+fig = plt.figure(1, (11, 10))
+sns.heatmap(corr, vmin=-1, vmax=1, center=0, cmap="mako")
+plt.tick_params(labelsize=14)
+
+plt.savefig("plots/Correlation_EMAs.png", dpi=300)
+
+# merge data into 2d numpy array
+Y = np.zeros(features.shape[0] - 1)
+
+
+for i in range(Y.size):
+    if daily_returns[i+1] >= 0:
+        Y[i] = 1
+    else:
+        Y[i] = 0
+
+# per quando su usano ma fino a 200
+#Y = Y[49:]
+#Y = Y[199:]
+
+print(norm_features.shape, Y.shape)
+
+fig, ax = plt.subplots(figsize=(15,10))
+
+
+#plot params
+#plt.xlim([-12,12])
+#plt.ylim([-0.5,16])
+ax.minorticks_on()
+ax.tick_params(labelsize=14)
+ax.tick_params(labelbottom=True, labeltop=False, labelright=False, labelleft=True)
+#xticks = np.arange(0, 1e4,10)
+#yticks = np.arange(0,16.1,4)
+
+ax.tick_params(direction='in',which='minor', length=5, bottom=True, top=True, left=True, right=True)
+ax.tick_params(direction='in',which='major', length=10, bottom=True, top=True, left=True, right=True)
+#plt.xticks(xticks)
+#plt.yticks(yticks)
+
+
+#plt.text(1,325, f'y={Decimal(coefs[3]):.4f}x$^3$+{Decimal(coefs[2]):.2f}x$^2$+{Decimal(coefs[1]):.2f}x+{Decimal(coefs[0]):.1f}',fontsize =13)
+
+ax.set_xlim([0, 500])
+#ax.set_ylim([-0.5, 0.5])
+
+
+
+
+pd.plotting.autocorrelation_plot(daily_returns, ax=ax, color=seshadri[0], label="Daily Returns")
+pd.plotting.autocorrelation_plot(np.abs(daily_returns), ax=ax, color=seshadri[1], label="Absolute Daily Returns")
+pd.plotting.autocorrelation_plot(volume, ax=ax, color=seshadri[2], label="Volume")
+
+ax.grid(False)
+ax.set_xlabel(r'Lag', fontsize=14) 
+ax.set_ylabel(r'Autocorrelation',fontsize=14)  # label the y axis
+
+
+ax.legend(fontsize=14, loc="upper right", bbox_to_anchor=(0.99, 0.99))  # add the legend (will default to 'best' location)
+plt.savefig("plots/Autocorrelation_returns_volume_abs.png", dpi=300)
--- a/data/AAPL_data.csv
+++ b/data/AAPL_data.csv
--- a/data/AAPL_data.pkl
+++ b/data/AAPL_data.pkl
--- a/data/IXIC_data.csv
+++ b/data/IXIC_data.csv
--- a/data/IXIC_data.pkl
+++ b/data/IXIC_data.pkl
--- a/data/MSFT_data.csv
+++ b/data/MSFT_data.csv
--- a/data/MSFT_data.pkl
+++ b/data/MSFT_data.pkl
--- a/data/daily_MSFT.csv
+++ b/data/daily_MSFT.csv
--- a/logistic_regression.py
+++ b/logistic_regression.py
@ -0,0 +1,228 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import pandas as pd
+import seaborn as sns
+
+import yfinance as yf
+from datetime import datetime
+import os, sys
+
+from sklearn import preprocessing
+
+#bodacious colors
+colors=sns.color_palette("rocket", 8)
+#Ram's colors, if desired
+seshadri = ['#c3121e', '#0348a1', '#ffb01c', '#027608', '#0193b0', '#9c5300', '#949c01', '#7104b5']
+#            0sangre,   1neptune,  2pumpkin,  3clover,   4denim,    5cocoa,    6cumin,    7berry
+
+train_quota = 0.8
+
+def enlarge_lag(to_enlarge, time_window=1):
+    # to_enlarge is the data already present, should be a numpy array
+    enlarged = []
+    for i in range(to_enlarge.shape[0] - time_window + 1):
+        new_element = []
+        for j in range(time_window):
+            new_element.extend(to_enlarge[i + time_window - 1 - j, :])
+        enlarged.append(new_element)
+
+    return np.array(enlarged)
+
+def sigmoid(z):
+    return 1 / (1 + np.exp(-z))
+
+
+def logreg_inference(x, w, b):
+    z = (x @ w) + b
+    p = sigmoid(z)
+    return p
+
+
+def cross_entropy(P, Y):
+    return (-Y * np.log(P) - (1 - Y) * np.log(1 - P)).mean()
+
+
+def logreg_train(X, Y, lambda_, lr = 1e-4, steps=100000):
+    # The training samples are defined as such (each row of X is a sample):
+    # X[0, :] -> Y[0]
+    # X[1, :] -> Y[1]
+
+    m, n = X.shape
+
+    # Initial values for the parameters
+    w = np.zeros(n)
+    b = 0
+
+    # Initial values for the "precedent loss" and "convergence" variables, used to check convergence
+    prec_loss = 0
+    convergence = 0
+
+    for step in range(steps):
+        P = logreg_inference(X, w, b)
+        loss = cross_entropy(P, Y)
+        
+
+        if step % 1000 == 0:
+            print(step, loss)
+
+            # Difference between "precedent loss" and "current loss"
+            diff = np.absolute(prec_loss - loss)
+            prec_loss = loss
+            if diff < 0.00001:
+                # If convergence is reached, the algorithm is stopped
+                convergence = step
+                break
+
+        # Derivative of the loss function with respect to bias
+        grad_b = (P - Y).mean()
+
+        # Gradient of the loss function with respect to weights
+        grad_w = (X.T @ (P - Y)) / m
+
+        w -= lr * grad_w
+        b -= lr * grad_b
+
+        # Every 100 iteration the values of accuracy and loss are saved for plotting
+        if step%100 == 0:
+            Yhat = (P > 0.5)
+            acc_array.append((Y == Yhat).mean() * 100)
+            losses.append(loss)
+
+    # Print the iterations needed for convergence before returning
+    print("Convergence = ", convergence)
+
+    return w, b
+
+
+if len(sys.argv) > 1:
+    time_window = int(sys.argv[1])
+else:
+    time_window = 1
+
+#time_window = 10
+
+stock_data = pd.read_pickle("data/MSFT_data.pkl")
+
+daily_returns = ((stock_data["Close"] - stock_data["Open"]) / stock_data["Open"]).to_numpy()
+prices = stock_data[["Open", "High", "Low", "Close"]].to_numpy()
+volume = stock_data["Volume"].to_numpy()
+
+minmax_scaler = preprocessing.MinMaxScaler()
+std_scaler = preprocessing.StandardScaler()
+
+features = np.vstack((daily_returns, volume)).T
+
+# Scale volume data to obtain better results
+#minmax_scaler = preprocessing.MinMaxScaler()
+#norm_ret = std_scaler.fit_transform(daily_returns.reshape(-1,1)).flatten()
+#norm_vol = minmax_scaler.fit_transform(volume.reshape(-1,1)).flatten()
+#norm_features = np.vstack((norm_ret, norm_vol)).T
+
+# Solo volumi e ritorni
+#norm_features = std_scaler.fit_transform(features)
+
+# Aggiunta di prezzi
+#norm_prices = minmax_scaler.fit_transform(prices.reshape(-1, 1)).reshape(-1, 4)
+#norm_ret_and_vol = std_scaler.fit_transform(features)
+#norm_features = np.hstack((norm_ret_and_vol, norm_prices))
+
+# Necessary for MAs
+part_features = std_scaler.fit_transform(features)
+
+# Aggiunta SMA
+#SMA_20 = stock_data["Close"].rolling(20).mean().to_numpy()
+#SMA_50 = stock_data["Close"].rolling(50).mean().to_numpy()
+#SMA_200 = stock_data["Close"].rolling(200).mean().to_numpy()
+#SMAs = np.vstack((SMA_20, SMA_50)).T
+#norm_SMAs = minmax_scaler.fit_transform(SMAs[49:, ].reshape(-1, 1)).reshape(-1, 2)
+#norm_features = np.hstack((part_features[49:, ], norm_SMAs))
+
+#SMAs = np.vstack((SMA_20, SMA_50, SMA_200)).T
+#norm_SMAs = minmax_scaler.fit_transform(SMAs[199:, ].reshape(-1, 1)).reshape(-1, 3)
+#norm_features = np.hstack((part_features[199:, ], norm_SMAs))
+
+# Aggiunta EMA
+EMA_20 = stock_data["Close"].ewm(span=20, adjust=False).mean()
+EMA_50 = stock_data["Close"].ewm(span=50, adjust=False).mean()
+EMAs = np.vstack((EMA_20, EMA_50)).T
+norm_EMAs = minmax_scaler.fit_transform(EMAs.reshape(-1, 1)).reshape(-1, 2)
+
+#EMA_200 = stock_data["Close"].ewm(span=200, adjust=False).mean()
+#EMAs = np.vstack((EMA_20, EMA_50, EMA_200)).T
+#norm_EMAs = minmax_scaler.fit_transform(EMAs.reshape(-1, 1)).reshape(-1, 3)
+norm_features = np.hstack((part_features, norm_EMAs))
+
+
+# merge data into 2d numpy array
+Y = np.zeros(features.shape[0] - 1)
+
+
+for i in range(Y.size):
+    if daily_returns[i+1] >= 0:
+        Y[i] = 1
+    else:
+        Y[i] = 0
+
+# per quando su usano ma fino a 200
+#Y = Y[49:]
+#Y = Y[199:]
+
+print(norm_features.shape, Y.shape)
+
+if time_window > 1:
+    norm_features = enlarge_lag(norm_features, time_window)
+    Y = Y[time_window-1:]
+
+train_size = int(norm_features.shape[0] * 0.8)
+X_train = norm_features[:train_size, ]
+Y_train = Y[:train_size]
+
+X_test = norm_features[train_size:-1, ]
+Y_test = Y[train_size:]
+
+#if time_window > 1:
+#    X_train = enlarge_lag(X_train)
+#    Y_train = Y_train[time_window-1:]
+#
+#    X_test = enlarge_lag(X_test)
+#    Y_test = Y_test[time_window-1:]
+
+
+# Lists to save accuracy and loss
+acc_array = []
+losses = []
+
+w, b = logreg_train(X_train, Y_train, 0.0, 1e-3, 1000000)
+print("Weights: ", w)
+print("Bias: ", b)
+
+# Iterations vs Accuracy plot
+#plt.figure()
+#plt.plot(np.arange(0, len(acc_array)) * 100, acc_array)
+#plt.xlabel("Iterations")
+#plt.ylabel("Accuracy")
+#
+## Iterations vs Loss plot
+#plt.figure()
+#plt.plot(np.arange(0, len(acc_array)) * 100, losses)
+#plt.xlabel("Iterations")
+#plt.ylabel("Losses")
+#
+#plt.show()
+# Training accuracy of the model, is the last value recorded in the array
+print("Training Acc: ", acc_array[-1])
+
+P_test = logreg_inference(X_test, w, b)
+Yhat_test = (P_test > 0.5)
+accuracy_test = (Y_test == Yhat_test).mean()
+print("Test accuracy: ", 100*accuracy_test)
+
+
+#lets try sklearn
+#from sklearn.linear_model import LogisticRegression
+#classifier = LogisticRegression(random_state=0, solver="saga").fit(X_train, Y_train)
+#score = classifier.score(X_test, Y_test)
+#print("sklearn score, all default: ", score)
+
+with open("plots/data/logistic_regression_EMA_20_50.csv", "a") as f:
+    f.write(f"{time_window};{acc_array[-1]};{accuracy_test};\n")
--- a/logistic_regression_enlarge_only_rets.py
+++ b/logistic_regression_enlarge_only_rets.py
@ -0,0 +1,203 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import pandas as pd
+import seaborn as sns
+
+import yfinance as yf
+from datetime import datetime
+import os, sys
+
+from sklearn import preprocessing
+
+#bodacious colors
+colors=sns.color_palette("rocket", 8)
+#Ram's colors, if desired
+seshadri = ['#c3121e', '#0348a1', '#ffb01c', '#027608', '#0193b0', '#9c5300', '#949c01', '#7104b5']
+#            0sangre,   1neptune,  2pumpkin,  3clover,   4denim,    5cocoa,    6cumin,    7berry
+
+train_quota = 0.8
+
+def enlarge_lag(to_enlarge, time_window=1):
+    # to_enlarge is the data already present, should be a numpy array
+    enlarged = []
+    for i in range(to_enlarge.shape[0] - time_window + 1):
+        new_element = []
+        for j in range(time_window):
+            new_element.extend(to_enlarge[i + time_window - 1 - j, :])
+        enlarged.append(new_element)
+
+    return np.array(enlarged)
+
+def sigmoid(z):
+    return 1 / (1 + np.exp(-z))
+
+
+def logreg_inference(x, w, b):
+    z = (x @ w) + b
+    p = sigmoid(z)
+    return p
+
+
+def cross_entropy(P, Y):
+    return (-Y * np.log(P) - (1 - Y) * np.log(1 - P)).mean()
+
+
+def logreg_train(X, Y, lambda_, lr = 1e-4, steps=100000):
+    # The training samples are defined as such (each row of X is a sample):
+    # X[0, :] -> Y[0]
+    # X[1, :] -> Y[1]
+
+    m, n = X.shape
+
+    # Initial values for the parameters
+    w = np.zeros(n)
+    b = 0
+
+    # Initial values for the "precedent loss" and "convergence" variables, used to check convergence
+    prec_loss = 0
+    convergence = 0
+
+    for step in range(steps):
+        P = logreg_inference(X, w, b)
+        loss = cross_entropy(P, Y)
+        
+
+        if step % 1000 == 0:
+            print(step, loss)
+
+            # Difference between "precedent loss" and "current loss"
+            diff = np.absolute(prec_loss - loss)
+            prec_loss = loss
+            if diff < 0.00001:
+                # If convergence is reached, the algorithm is stopped
+                convergence = step
+                break
+
+        # Derivative of the loss function with respect to bias
+        grad_b = (P - Y).mean()
+
+        # Gradient of the loss function with respect to weights
+        grad_w = (X.T @ (P - Y)) / m
+
+        w -= lr * grad_w
+        b -= lr * grad_b
+
+        # Every 100 iteration the values of accuracy and loss are saved for plotting
+        if step%100 == 0:
+            Yhat = (P > 0.5)
+            acc_array.append((Y == Yhat).mean() * 100)
+            losses.append(loss)
+
+    # Print the iterations needed for convergence before returning
+    print("Convergence = ", convergence)
+
+    return w, b
+
+
+if len(sys.argv) > 1:
+    time_window = int(sys.argv[1])
+else:
+    time_window = 1
+
+#time_window = 10
+
+stock_data = pd.read_pickle("data/MSFT_data.pkl")
+
+daily_returns = ((stock_data["Close"] - stock_data["Open"]) / stock_data["Open"]).to_numpy()
+prices = stock_data[["Open", "High", "Low", "Close"]].to_numpy()
+volume = stock_data["Volume"].to_numpy()
+
+minmax_scaler = preprocessing.MinMaxScaler()
+std_scaler = preprocessing.StandardScaler()
+
+features = np.vstack((daily_returns, volume)).T
+
+# Necessary for MAs
+part_features = std_scaler.fit_transform(features)
+
+# merge data into 2d numpy array
+Y = np.zeros(features.shape[0] - 1)
+
+for i in range(Y.size):
+    if daily_returns[i+1] >= 0:
+        Y[i] = 1
+    else:
+        Y[i] = 0
+
+import copy
+
+if time_window > 1:
+    large_rets = enlarge_lag(part_features[:, 0].reshape(-1, 1), time_window)
+    Y = Y[time_window-1:]
+else:
+    large_rets = copy.deepcopy(part_features[:, 0].reshape(-1, 1))
+
+part_features = np.hstack((large_rets, part_features[time_window-1:, 1].reshape(-1, 1)))
+
+
+# Aggiunta EMA
+EMA_20 = stock_data["Close"].ewm(span=20, adjust=False).mean()
+EMA_50 = stock_data["Close"].ewm(span=50, adjust=False).mean()
+EMAs = np.vstack((EMA_20, EMA_50)).T
+norm_EMAs = minmax_scaler.fit_transform(EMAs.reshape(-1, 1)).reshape(-1, 2)
+
+norm_features = np.hstack((part_features, norm_EMAs[time_window-1:,]))
+
+
+print(norm_features.shape, Y.shape)
+
+
+
+train_size = int(norm_features.shape[0] * 0.8)
+X_train = norm_features[:train_size, ]
+Y_train = Y[:train_size]
+
+X_test = norm_features[train_size:-1, ]
+Y_test = Y[train_size:]
+
+#if time_window > 1:
+#    X_train = enlarge_lag(X_train)
+#    Y_train = Y_train[time_window-1:]
+#
+#    X_test = enlarge_lag(X_test)
+#    Y_test = Y_test[time_window-1:]
+
+
+# Lists to save accuracy and loss
+acc_array = []
+losses = []
+
+w, b = logreg_train(X_train, Y_train, 0.0, 1e-3, 1000000)
+print("Weights: ", w)
+print("Bias: ", b)
+
+# Iterations vs Accuracy plot
+#plt.figure()
+#plt.plot(np.arange(0, len(acc_array)) * 100, acc_array)
+#plt.xlabel("Iterations")
+#plt.ylabel("Accuracy")
+#
+## Iterations vs Loss plot
+#plt.figure()
+#plt.plot(np.arange(0, len(acc_array)) * 100, losses)
+#plt.xlabel("Iterations")
+#plt.ylabel("Losses")
+#
+#plt.show()
+# Training accuracy of the model, is the last value recorded in the array
+print("Training Acc: ", acc_array[-1])
+
+P_test = logreg_inference(X_test, w, b)
+Yhat_test = (P_test > 0.5)
+accuracy_test = (Y_test == Yhat_test).mean()
+print("Test accuracy: ", 100*accuracy_test)
+
+
+#lets try sklearn
+#from sklearn.linear_model import LogisticRegression
+#classifier = LogisticRegression(random_state=0, solver="saga").fit(X_train, Y_train)
+#score = classifier.score(X_test, Y_test)
+#print("sklearn score, all default: ", score)
+
+with open("plots/data/logistic_regression_EMA_20_50_only_daily_enlarged.csv", "a") as f:
+    f.write(f"{time_window};{acc_array[-1]};{accuracy_test};\n")
--- a/logistic_regression_only_returns.py
+++ b/logistic_regression_only_returns.py
@ -0,0 +1,170 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import pandas as pd
+import seaborn as sns
+
+import yfinance as yf
+from datetime import datetime
+import os, sys
+
+from sklearn import preprocessing
+
+#bodacious colors
+colors=sns.color_palette("rocket", 8)
+#Ram's colors, if desired
+seshadri = ['#c3121e', '#0348a1', '#ffb01c', '#027608', '#0193b0', '#9c5300', '#949c01', '#7104b5']
+#            0sangre,   1neptune,  2pumpkin,  3clover,   4denim,    5cocoa,    6cumin,    7berry
+
+train_quota = 0.8
+
+def enlarge_lag(to_enlarge, time_window=1):
+    # to_enlarge is the data already present, should be a numpy array
+    enlarged = []
+    for i in range(to_enlarge.shape[0] - time_window + 1):
+        new_element = []
+        for j in range(time_window):
+            new_element.extend(to_enlarge[i + time_window - 1 - j, :])
+        enlarged.append(new_element)
+
+    return np.array(enlarged)
+
+def sigmoid(z):
+    return 1 / (1 + np.exp(-z))
+
+
+def logreg_inference(x, w, b):
+    z = (x @ w) + b
+    p = sigmoid(z)
+    return p
+
+
+def cross_entropy(P, Y):
+    return (-Y * np.log(P) - (1 - Y) * np.log(1 - P)).mean()
+
+
+def logreg_train(X, Y, lambda_, lr = 1e-4, steps=100000):
+    # The training samples are defined as such (each row of X is a sample):
+    # X[0, :] -> Y[0]
+    # X[1, :] -> Y[1]
+
+    m, n = X.shape
+
+    # Initial values for the parameters
+    w = np.zeros(n)
+    b = 0
+
+    # Initial values for the "precedent loss" and "convergence" variables, used to check convergence
+    prec_loss = 0
+    convergence = 0
+
+    for step in range(steps):
+        P = logreg_inference(X, w, b)
+        loss = cross_entropy(P, Y)
+        
+
+        if step % 1000 == 0:
+            print(step, loss)
+
+            # Difference between "precedent loss" and "current loss"
+            diff = np.absolute(prec_loss - loss)
+            prec_loss = loss
+            if diff < 0.00001:
+                # If convergence is reached, the algorithm is stopped
+                convergence = step
+                break
+
+        # Derivative of the loss function with respect to bias
+        grad_b = (P - Y).mean()
+
+        # Gradient of the loss function with respect to weights
+        grad_w = (X.T @ (P - Y)) / m
+
+        w -= lr * grad_w
+        b -= lr * grad_b
+
+        # Every 100 iteration the values of accuracy and loss are saved for plotting
+        if step%100 == 0:
+            Yhat = (P > 0.5)
+            acc_array.append((Y == Yhat).mean() * 100)
+            losses.append(loss)
+
+    # Print the iterations needed for convergence before returning
+    print("Convergence = ", convergence)
+
+    return w, b
+
+
+if len(sys.argv) > 1:
+    time_window = int(sys.argv[1])
+else:
+    time_window = 1
+
+#time_window = 10
+
+stock_data = pd.read_pickle("data/MSFT_data.pkl")
+
+daily_returns = ((stock_data["Close"] - stock_data["Open"]) / stock_data["Open"]).to_numpy().reshape(-1,1)
+
+
+# merge data into 2d numpy array
+Y = np.zeros(daily_returns.shape[0] - 1)
+
+print(daily_returns.shape, Y.shape)
+
+for i in range(Y.size):
+    if daily_returns[i+1] >= 0:
+        Y[i] = 1
+    else:
+        Y[i] = 0
+import copy
+norm_features = copy.deepcopy(daily_returns)
+if time_window > 1:
+    norm_features = enlarge_lag(norm_features, time_window)
+    Y = Y[time_window-1:]
+
+train_size = int(norm_features.shape[0] * 0.8)
+X_train = norm_features[:train_size, ]
+Y_train = Y[:train_size]
+
+X_test = norm_features[train_size:-1, ]
+Y_test = Y[train_size:]
+
+
+# Lists to save accuracy and loss
+acc_array = []
+losses = []
+
+w, b = logreg_train(X_train, Y_train, 0.0, 1e-3, 1000000)
+print("Weights: ", w)
+print("Bias: ", b)
+
+# Iterations vs Accuracy plot
+#plt.figure()
+#plt.plot(np.arange(0, len(acc_array)) * 100, acc_array)
+#plt.xlabel("Iterations")
+#plt.ylabel("Accuracy")
+#
+## Iterations vs Loss plot
+#plt.figure()
+#plt.plot(np.arange(0, len(acc_array)) * 100, losses)
+#plt.xlabel("Iterations")
+#plt.ylabel("Losses")
+#
+#plt.show()
+# Training accuracy of the model, is the last value recorded in the array
+print("Training Acc: ", acc_array[-1])
+
+P_test = logreg_inference(X_test, w, b)
+Yhat_test = (P_test > 0.5)
+accuracy_test = (Y_test == Yhat_test).mean()
+print("Test accuracy: ", 100*accuracy_test)
+
+
+#lets try sklearn
+#from sklearn.linear_model import LogisticRegression
+#classifier = LogisticRegression(random_state=0, solver="saga").fit(X_train, Y_train)
+#score = classifier.score(X_test, Y_test)
+#print("sklearn score, all default: ", score)
+
+with open("plots/data/logistic_regression_only_rets.csv", "a") as f:
+    f.write(f"{time_window};{acc_array[-1]};{accuracy_test};\n")
--- a/logistic_regression_run_script.sh
+++ b/logistic_regression_run_script.sh
@ -0,0 +1,8 @@
+#!/bin/bash
+
+for i in $(seq 1 50);
+do
+    echo "Running with time window $i"
+    python3 logistic_regression_enlarge_only_rets.py $i
+done
+
--- a/mlp_run_script.sh
+++ b/mlp_run_script.sh
@ -0,0 +1,8 @@
+#!/bin/bash
+
+for i in $(seq 1 50);
+do
+    echo "Running with time window $i"
+    python3 MultiLayer_Perceptron.py $i
+done
+
--- a/plots/Autocorrelation_returns_volume.png
+++ b/plots/Autocorrelation_returns_volume.png
--- a/plots/Autocorrelation_returns_volume_abs.png
+++ b/plots/Autocorrelation_returns_volume_abs.png
--- a/plots/Correlation_EMAs.png
+++ b/plots/Correlation_EMAs.png
--- a/plots/First_Attempt_LSTM.png
+++ b/plots/First_Attempt_LSTM.png
--- a/plots/First_Attempt_LSTM_2.png
+++ b/plots/First_Attempt_LSTM_2.png
--- a/plots/LSTM_advanced_1.png
+++ b/plots/LSTM_advanced_1.png
--- a/plots/LSTM_advanced_2.png
+++ b/plots/LSTM_advanced_2.png
--- a/plots/LSTM_advanced_3.png
+++ b/plots/LSTM_advanced_3.png
--- a/plots/LSTM_advanced_4.png
+++ b/plots/LSTM_advanced_4.png
--- a/plots/LSTM_advanced_rets_1.png
+++ b/plots/LSTM_advanced_rets_1.png
--- a/plots/MLP_20_10_5_2.png
+++ b/plots/MLP_20_10_5_2.png
--- a/plots/MLP_30_20_20_10.png
+++ b/plots/MLP_30_20_20_10.png
--- a/plots/MLP_50_20.png
+++ b/plots/MLP_50_20.png
--- a/plots/MSFT_daily_occs.png
+++ b/plots/MSFT_daily_occs.png
--- a/plots/MSFT_daily_occs_semilogx.png
+++ b/plots/MSFT_daily_occs_semilogx.png
--- a/plots/data/MLP_20_10_5_2.csv
+++ b/plots/data/MLP_20_10_5_2.csv
@ -0,0 +1,51 @@
+time_window;training_accuracy;testing_accuracy;
+1;0.5157521385353641;0.5325542570951586;
+2;0.5070951585976627;0.5317195325542571;
+3;0.6231218697829716;0.48955722639933164;
+4;0.6103109997912753;0.4653299916457811;
+5;0.6471816283924844;0.4903926482873851;
+6;0.6604719148047609;0.4928989139515455;
+7;0.6641604010025063;0.5137844611528822;
+8;0.7013366750208856;0.520066889632107;
+9;0.6897848339252142;0.48327759197324416;
+10;0.6136648558295027;0.46321070234113715;
+11;0.6587251828631139;0.4866220735785953;
+12;0.7215719063545151;0.5259197324414716;
+13;0.6684782608695652;0.4811715481171548;
+14;0.748693288730922;0.5188284518828452;
+15;0.740694270179841;0.5154811715481171;
+16;0.6801924283622673;0.47447698744769873;
+17;0.7684100418410041;0.5238493723849372;
+18;0.7566945606694561;0.5008375209380235;
+19;0.7928436911487758;0.5309882747068677;
+20;0.8139388865634156;0.49413735343383586;
+21;0.6206824366757379;0.5301507537688442;
+22;0.7889447236180904;0.507537688442211;
+23;0.6379815745393634;0.4660519698239732;
+24;0.6751832460732984;0.5071248952221291;
+25;0.6258902387934646;0.46437552388935455;
+26;0.7301487534045673;0.49706621961441744;
+27;0.6787510477787091;0.46689019279128247;
+28;0.8658843252305113;0.47651006711409394;
+29;0.7048836721861245;0.535234899328859;
+30;0.8633123689727463;0.5075503355704698;
+31;0.8228140071293772;0.5067114093959731;
+32;0.8181627516778524;0.5058724832214765;
+33;0.6447147651006712;0.5104953820319059;
+34;0.8833647996643591;0.4802686817800168;
+35;0.8063365505665128;0.5071368597816961;
+36;0.8818467995802728;0.5146935348446684;
+37;0.6358102434928632;0.5331654072208228;
+38;0.8717464315701091;0.5117647058823529;
+39;0.9048918748687802;0.47394957983193275;
+40;0.6698866022679546;0.5319327731092437;
+41;0.6784289014912833;0.4689075630252101;
+42;0.7008403361344537;0.5210084033613446;
+43;0.8613445378151261;0.49705634987384356;
+44;0.7066610632485817;0.5021026072329688;
+45;0.6353509878100042;0.5365853658536586;
+46;0.6771074206432626;0.5256518082422204;
+47;0.5971404541631623;0.47434819175777965;
+48;0.8092935239697224;0.4983164983164983;
+49;0.9335436382754995;0.4983164983164983;
+50;0.9091291543962978;0.5244107744107744;
--- a/plots/data/MLP_30_20_20_10.csv
+++ b/plots/data/MLP_30_20_20_10.csv
@ -0,0 +1,51 @@
+time_window;training_accuracy;testing_accuracy;
+1;0.547673690799082;0.5217028380634391;
+2;0.6400250417362271;0.508347245409015;
+3;0.715567612687813;0.5472013366750209;
+4;0.7424337299102484;0.4803675856307435;
+5;0.7749478079331942;0.48370927318295737;
+6;0.8275214032157027;0.4954051796157059;
+7;0.8105680868838764;0.4928989139515455;
+8;0.8335421888053467;0.504180602006689;
+9;0.8272404428660957;0.49414715719063546;
+10;0.8763058921855411;0.4891304347826087;
+11;0.864158829676071;0.5150501672240803;
+12;0.8858695652173914;0.5066889632107023;
+13;0.9015468227424749;0.5213389121338912;
+14;0.8873092201547146;0.5129707112970712;
+15;0.90987034713509;0.5288702928870292;
+16;0.9115247856097051;0.5171548117154812;
+17;0.9156903765690376;0.47280334728033474;
+18;0.9217573221757323;0.5117252931323283;
+19;0.9378531073446328;0.48576214405360135;
+20;0.9158643784010047;0.474036850921273;
+21;0.9522712999790663;0.4949748743718593;
+22;0.9711055276381909;0.5293132328308208;
+23;0.9384422110552764;0.5037720033528919;
+24;0.9759162303664921;0.5155071248952221;
+25;0.9733975701717638;0.5146689019279128;
+26;0.9664781060129898;0.48365465213746855;
+27;0.972338642078793;0.5037720033528919;
+28;0.9867979882648784;0.4714765100671141;
+29;0.9811360301823517;0.4790268456375839;
+30;0.9656184486373166;0.5201342281879194;
+31;0.9746278045711889;0.4983221476510067;
+32;0.9351929530201343;0.5192953020134228;
+33;0.9729446308724832;0.4903442485306465;
+34;0.9815397524648626;0.48446683459277917;
+35;0.9326479227864037;0.5088161209068011;
+36;0.985099685204617;0.4945424013434089;
+37;0.9685138539042821;0.5155331654072208;
+38;0.9901343408900084;0.4613445378151261;
+39;0.9494016376233466;0.5042016806722689;
+40;0.9647207055858883;0.5100840336134453;
+41;0.9798361688720857;0.5;
+42;0.992436974789916;0.5058823529411764;
+43;0.9897058823529412;0.49032800672834315;
+44;0.9815087203193948;0.5172413793103449;
+45;0.9836065573770492;0.5029436501261564;
+46;0.9882278747109523;0.496215306980656;
+47;0.9960050462573591;0.5273338940285954;
+48;1.0;0.5025252525252525;
+49;0.9707676130389065;0.49326599326599324;
+50;0.9728649558266723;0.4941077441077441;
--- a/plots/data/MLP_50_20.csv
+++ b/plots/data/MLP_50_20.csv
@ -0,0 +1,51 @@
+time_window;training_accuracy;testing_accuracy;
+1;0.5366158981848529;0.5217028380634391;
+2;0.5899415692821369;0.5350584307178631;
+3;0.6750834724540902;0.5087719298245614;
+4;0.7261532039240242;0.48120300751879697;
+5;0.7701461377870563;0.49958228905597324;
+6;0.7584046773856755;0.5087719298245614;
+7;0.7644110275689223;0.5430242272347535;
+8;0.8398078529657477;0.49414715719063546;
+9;0.8748694380614164;0.5016722408026756;
+10;0.9352277475971584;0.47240802675585286;
+11;0.9264367816091954;0.4807692307692308;
+12;0.9423076923076923;0.520066889632107;
+13;0.9412625418060201;0.4794979079497908;
+14;0.9272423165377378;0.507949790794979;
+15;0.958594730238394;0.5196652719665272;
+16;0.9811754862999372;0.497071129707113;
+17;0.992887029288703;0.5263598326359833;
+18;0.9947698744769874;0.509212730318258;
+19;0.9922577945176815;0.49581239530988275;
+20;0.9945583926329008;0.525963149078727;
+21;0.9920452166631777;0.509212730318258;
+22;0.9972780569514238;0.5117252931323283;
+23;0.9886934673366834;0.49958088851634536;
+24;0.9912041884816754;0.5046102263202011;
+25;0.9981147884373691;0.509639564124057;
+26;0.9974858579509742;0.5004191114836547;
+27;0.9945515507124896;0.5297569153394803;
+28;1.0;0.5033557046979866;
+29;0.9997904003353595;0.5260067114093959;
+30;0.99958071278826;0.5243288590604027;
+31;1.0;0.47818791946308725;
+32;1.0;0.5151006711409396;
+33;0.9991610738255033;0.5264483627204031;
+34;1.0;0.4979009235936188;
+35;0.9987410826689047;0.5029387069689337;
+36;1.0;0.5188916876574308;
+37;1.0;0.4869857262804366;
+38;1.0;0.49411764705882355;
+39;1.0;0.5117647058823529;
+40;1.0;0.5243697478991597;
+41;1.0;0.5184873949579832;
+42;0.9997899159663866;0.49747899159663866;
+43;1.0;0.5063078216989066;
+44;1.0;0.48107653490328006;
+45;1.0;0.4920100925147183;
+46;1.0;0.5088309503784693;
+47;0.9997897392767031;0.4953742640874685;
+48;1.0;0.515993265993266;
+49;1.0;0.4772727272727273;
+50;1.0;0.5067340067340067;
--- a/plots/data/logistic_regression.csv
+++ b/plots/data/logistic_regression.csv
@ -0,0 +1,51 @@
+time_window;training_accuracy;testing_accuracy;
+1;52.075944085124135;0.5367278797996661;
+2;51.81552587646077;0.5392320534223706;
+3;51.982470784641066;0.5380116959064327;
+4;52.118555625130455;0.545530492898914;
+5;52.4008350730689;0.5396825396825397;
+6;52.41177698893297;0.5338345864661654;
+7;52.903091060985794;0.5405179615705932;
+8;52.75689223057645;0.5392976588628763;
+9;53.45728013369543;0.540133779264214;
+10;53.5311324697033;0.5409698996655519;
+11;53.45872518286311;0.5384615384615384;
+12;54.117892976588635;0.5351170568561873;
+13;54.0133779264214;0.5364016736401673;
+14;54.129207610286436;0.5338912133891214;
+15;53.95232120451694;0.5364016736401673;
+16;54.1518510771805;0.5338912133891214;
+17;54.16317991631799;0.5372384937238494;
+18;53.80753138075314;0.533500837520938;
+19;53.65139150449885;0.5343383584589615;
+20;54.227710339054;0.5284757118927973;
+21;54.30186309399204;0.5309882747068677;
+22;54.25041876046901;0.5293132328308208;
+23;54.14572864321608;0.5305951383067896;
+24;54.575916230366495;0.5322715842414082;
+25;54.18935902806871;0.5305951383067896;
+26;54.03310287031218;0.5314333612740989;
+27;54.37971500419112;0.5255658005029338;
+28;54.715004191114836;0.5285234899328859;
+29;54.51687277300357;0.5310402684563759;
+30;54.75890985324947;0.5302013422818792;
+31;54.41392325435102;0.5293624161073825;
+32;55.39010067114094;0.5209731543624161;
+33;55.45302013422819;0.5256087321578505;
+34;55.67442836165303;0.5222502099076406;
+35;55.686109945446916;0.5155331654072208;
+36;56.222455403987404;0.5205709487825357;
+37;56.549118387909324;0.5163727959697733;
+38;56.27623845507976;0.519327731092437;
+39;56.0781020365316;0.5134453781512605;
+40;56.425871482570344;0.5134453781512605;
+41;56.2906952320941;0.5117647058823529;
+42;56.596638655462186;0.5134453781512605;
+43;56.84873949579832;0.5138772077375946;
+44;56.81865938222316;0.5096719932716569;
+45;56.36822194199244;0.5105130361648444;
+46;56.54824469203279;0.511354079058032;
+47;56.53910849453322;0.511354079058032;
+48;56.64423885618166;0.5126262626262627;
+49;56.88748685594112;0.5126262626262627;
+50;56.710138830458554;0.5058922558922558;
--- a/plots/data/logistic_regression_EMA.csv
+++ b/plots/data/logistic_regression_EMA.csv
@ -0,0 +1,51 @@
+time_window;training_accuracy;testing_accuracy;
+1;51.99248904652618;0.5375626043405676;
+2;51.77378964941569;0.5375626043405676;
+3;51.982470784641066;0.5396825396825397;
+4;51.99332080985181;0.5405179615705932;
+5;52.35908141962422;0.5355054302422724;
+6;52.59970766339528;0.5304928989139516;
+7;52.86131996658312;0.5388471177944862;
+8;52.673350041771094;0.5426421404682275;
+9;53.39461040317527;0.5418060200668896;
+10;53.51023819473464;0.540133779264214;
+11;53.396029258098224;0.544314381270903;
+12;54.03428093645485;0.5409698996655519;
+13;53.992474916387955;0.5414225941422595;
+14;54.10830022998119;0.5430962343096234;
+15;54.01505646173149;0.5422594142259414;
+16;54.17276720351391;0.5422594142259414;
+17;54.16317991631799;0.5405857740585774;
+18;53.661087866108794;0.5435510887772195;
+19;53.7769407825905;0.5452261306532663;
+20;54.39514441188782;0.541038525963149;
+21;54.17626125183169;0.5385259631490787;
+22;54.10385259631491;0.5402010050251256;
+23;54.29229480737019;0.5406538139145013;
+24;54.51308900523561;0.5389773679798826;
+25;54.18935902806871;0.539815590947192;
+26;53.86549340037712;0.5406538139145013;
+27;54.48449287510477;0.539815590947192;
+28;54.882648784576695;0.5461409395973155;
+29;54.47495284007545;0.5394295302013423;
+30;54.67505241090147;0.5461409395973155;
+31;54.246173201929125;0.5411073825503355;
+32;55.45302013422819;0.5444630872483222;
+33;55.369127516778526;0.5373635600335852;
+34;55.695405915670236;0.5197313182199832;
+35;55.74905581200168;0.5214105793450882;
+36;56.306400839454355;0.5289672544080605;
+37;56.42317380352645;0.5214105793450882;
+38;56.507136859781696;0.5176470588235295;
+39;56.330044089859335;0.5176470588235295;
+40;56.32087358252835;0.5260504201680672;
+41;56.20667926906112;0.5235294117647059;
+42;56.57563025210084;0.5184873949579832;
+43;56.72268907563025;0.5180824222035324;
+44;56.881697835679766;0.5147182506307821;
+45;56.32618747372846;0.5088309503784693;
+46;56.443136430523445;0.5088309503784693;
+47;56.53910849453322;0.5046257359125316;
+48;56.72834314550042;0.5075757575757576;
+49;56.92954784437434;0.5084175084175084;
+50;56.62599915860328;0.5092592592592593;
--- a/plots/data/logistic_regression_EMA_20_50.csv
+++ b/plots/data/logistic_regression_EMA_20_50.csv
@ -0,0 +1,51 @@
+time_window;training_accuracy;testing_accuracy;
+1;52.05508032547465;0.5392320534223706;
+2;51.794657762938236;0.5383973288814691;
+3;52.00333889816361;0.5396825396825397;
+4;52.01419327906491;0.5421888053467001;
+5;52.4008350730689;0.5371762740183793;
+6;52.516182919189816;0.531328320802005;
+7;52.88220551378446;0.5363408521303258;
+8;52.694235588972425;0.5426421404682275;
+9;53.45728013369543;0.5409698996655519;
+10;53.5311324697033;0.540133779264214;
+11;53.37513061650993;0.5418060200668896;
+12;54.05518394648829;0.540133779264214;
+13;53.90886287625418;0.5414225941422595;
+14;54.10830022998119;0.5414225941422595;
+15;53.97323295692179;0.5414225941422595;
+16;54.089102698180305;0.5405857740585774;
+17;54.121338912133886;0.5422594142259414;
+18;53.74476987447699;0.5393634840871022;
+19;53.75601590290856;0.542713567839196;
+20;54.41607367099205;0.5393634840871022;
+21;54.13439397111157;0.5385259631490787;
+22;54.166666666666664;0.5393634840871022;
+23;54.29229480737019;0.5381391450125733;
+24;54.51308900523561;0.5381391450125733;
+25;54.16841223292836;0.5406538139145013;
+26;53.90739576786089;0.5414920368818106;
+27;54.505448449287506;0.5389773679798826;
+28;54.84073763621124;0.5444630872483222;
+29;54.495912806539515;0.5369127516778524;
+30;54.67505241090147;0.5444630872483222;
+31;54.2881107150346;0.5394295302013423;
+32;55.369127516778526;0.5419463087248322;
+33;55.39010067114094;0.5340050377833753;
+34;55.695405915670236;0.5197313182199832;
+35;55.74905581200168;0.5214105793450882;
+36;56.28541448058761;0.5264483627204031;
+37;56.507136859781696;0.5222502099076406;
+38;56.486146095717885;0.519327731092437;
+39;56.30904891874869;0.5168067226890757;
+40;56.34187316253675;0.5260504201680672;
+41;56.18567527830288;0.5260504201680672;
+42;56.53361344537815;0.5201680672268908;
+43;56.785714285714285;0.5197645079899075;
+44;56.839672200042024;0.5130361648444071;
+45;56.32618747372846;0.5105130361648444;
+46;56.485179735127176;0.511354079058032;
+47;56.518082422203534;0.5063078216989066;
+48;56.72834314550042;0.5084175084175084;
+49;56.8664563617245;0.5134680134680135;
+50;56.6470340765671;0.5058922558922558;
--- a/plots/data/logistic_regression_EMA_20_50_only_daily_enlarged.csv
+++ b/plots/data/logistic_regression_EMA_20_50_only_daily_enlarged.csv
@ -0,0 +1,51 @@
+time_window;training_accuracy;testing_accuracy;
+1;52.05508032547465;0.5392320534223706;
+2;52.10767946577629;0.5392320534223706;
+3;52.27462437395659;0.5396825396825397;
+4;52.43164266332707;0.5388471177944862;
+5;52.546972860125265;0.5355054302422724;
+6;52.28648987262476;0.5329991645781119;
+7;52.02589807852965;0.5338345864661654;
+8;52.54803675856308;0.5317725752508361;
+9;51.932316691038224;0.5359531772575251;
+10;52.08942749686586;0.5309364548494984;
+11;52.18390804597701;0.5317725752508361;
+12;52.65468227424749;0.5267558528428093;
+13;52.612876254180605;0.5263598326359833;
+14;52.64478360861384;0.5263598326359833;
+15;52.676704307821;0.5263598326359833;
+16;52.66680610750889;0.5263598326359833;
+17;52.86610878661088;0.5238493723849372;
+18;52.78242677824267;0.52428810720268;
+19;52.395898723582334;0.5251256281407035;
+20;52.51151109250733;0.525963149078727;
+21;52.54343730374712;0.525963149078727;
+22;52.68006700167505;0.5251256281407035;
+23;52.701005025125625;0.5247275775356245;
+24;52.33507853403141;0.5297569153394803;
+25;52.785923753665685;0.5230511316010059;
+26;52.44081290592919;0.5238893545683152;
+27;52.724224643755235;0.5238893545683152;
+28;52.47275775356245;0.5310402684563759;
+29;52.79815552295116;0.5243288590604027;
+30;52.64150943396226;0.5251677852348994;
+31;52.48479765149927;0.5268456375838926;
+32;52.57969798657718;0.5335570469798657;
+33;53.73322147651006;0.5096557514693535;
+34;53.63960562198448;0.5130142737195634;
+35;53.86067981535879;0.5138539042821159;
+36;53.5781741867786;0.5188916876574308;
+37;53.778337531486144;0.5071368597816961;
+38;53.56842989084802;0.5159663865546219;
+39;53.68465252991812;0.5100840336134453;
+40;53.77992440151197;0.5084033613445378;
+41;53.68620037807184;0.5109243697478991;
+42;53.739495798319325;0.5092436974789916;
+43;53.508403361344534;0.5046257359125316;
+44;54.12901870140786;0.5054667788057191;
+45;53.90920554854981;0.5096719932716569;
+46;54.172797981921384;0.5021026072329688;
+47;54.14213624894869;0.5054667788057191;
+48;53.973927670311184;0.51010101010101;
+49;53.52260778128286;0.5143097643097643;
+50;53.82835506941524;0.5109427609427609;
--- a/plots/data/logistic_regression_SMA.csv
+++ b/plots/data/logistic_regression_SMA.csv
@ -0,0 +1,51 @@
+time_window;training_accuracy;testing_accuracy;
+1;52.05006473888649;0.538860103626943;
+2;52.363479387006265;0.538860103626943;
+3;52.09412780656304;0.5345423143350605;
+4;52.26683937823834;0.5367329299913569;
+5;52.47246814942776;0.5358686257562663;
+6;52.6133909287257;0.5341400172860847;
+7;52.99200691294016;0.5375972342264477;
+8;53.34917891097667;0.5393258426966292;
+9;53.284356093344854;0.5397923875432526;
+10;53.16619840069159;0.5397923875432526;
+11;53.17769130998703;0.5389273356401384;
+12;53.729729729729726;0.5406574394463668;
+13;53.61159169550172;0.5389273356401384;
+14;53.67647058823529;0.5385281385281385;
+15;53.77460523469608;0.5428571428571428;
+16;53.82951103418434;0.5454545454545454;
+17;53.408353170309454;0.5463203463203463;
+18;53.961038961038966;0.5445887445887446;
+19;53.74458874458874;0.5450606585788561;
+20;53.799523706429966;0.5441941074523396;
+21;54.04937202252057;0.5450606585788561;
+22;54.12605588044185;0.5415944540727903;
+23;54.24610051993067;0.5424610051993067;
+24;54.24610051993067;0.546400693842151;
+25;54.58288190682556;0.5437987857762359;
+26;54.65973125270914;0.5403295750216826;
+27;54.19466724474312;0.5420641803989592;
+28;54.48829141370338;0.5394622723330442;
+29;54.48829141370338;0.5399305555555556;
+30;54.304923010193015;0.546875;
+31;54.36008676789588;0.5425347222222222;
+32;55.131264916467785;0.5477430555555556;
+33;55.90277777777778;0.5364583333333334;
+34;55.533854166666664;0.5334491746307559;
+35;55.285435207293254;0.5351867940920938;
+36;55.68823273990448;0.5317115551694179;
+37;55.591748099891426;0.5325803649000869;
+38;55.60382276281495;0.5325803649000869;
+39;55.60382276281495;0.5356521739130434;
+40;56.00695198783402;0.5339130434782609;
+41;56.171229900043464;0.5278260869565218;
+42;56.55292327754836;0.5252173913043479;
+43;56.608695652173914;0.5278260869565218;
+44;56.34782608695652;0.5274151436031331;
+45;56.53402913676886;0.5282854656222803;
+46;56.32883862548934;0.5317667536988686;
+47;56.66739177724603;0.5274151436031331;
+48;56.87554395126197;0.5300261096605744;
+49;56.83202785030461;0.5174216027874564;
+50;56.735582154515775;0.5156794425087108;
--- a/plots/data/logistic_regression_SMA_20_50.csv
+++ b/plots/data/logistic_regression_SMA_20_50.csv
@ -0,0 +1,51 @@
+time_window;training_accuracy;testing_accuracy;
+1;52.04038704249053;0.5412457912457912;
+2;51.90406059330949;0.5412457912457912;
+3;52.02020202020202;0.5370370370370371;
+4;52.20959595959596;0.5459140690817186;
+5;52.68364554830563;0.5408593091828138;
+6;52.71578947368422;0.5341196293176074;
+7;52.76900400084228;0.5391743892165122;
+8;52.906486941870256;0.540016849199663;
+9;53.517270429654594;0.5370994940978078;
+10;53.52854434379608;0.5370994940978078;
+11;53.64517488411293;0.5379426644182125;
+12;54.03582718651212;0.5396290050590219;
+13;54.15261382799326;0.5396290050590219;
+14;54.15261382799326;0.5409282700421941;
+15;54.079696394686906;0.5434599156118144;
+16;54.344158582876425;0.5417721518987342;
+17;54.16578780847922;0.5451476793248945;
+18;53.92405063291139;0.5417721518987342;
+19;53.9873417721519;0.5396959459459459;
+20;54.10424140113948;0.5396959459459459;
+21;54.15787252005065;0.5388513513513513;
+22;54.084863837872064;0.5388513513513513;
+23;53.69510135135135;0.5422297297297297;
+24;53.80067567567568;0.5435333896872359;
+25;54.14994720168954;0.5409974640743872;
+26;53.95014786649768;0.5452240067624683;
+27;54.53200929642933;0.5486052409129332;
+28;54.62806424344886;0.5469146238377007;
+29;54.52240067624683;0.5431472081218274;
+30;54.449376453181145;0.5439932318104906;
+31;54.60887949260042;0.5346869712351946;
+32;55.17022626348065;0.5431472081218274;
+33;55.52030456852792;0.5397631133671743;
+34;55.647208121827404;0.5309060118543607;
+35;55.74360059234187;0.529212531752752;
+36;56.03046974185357;0.5275190516511431;
+37;56.00000000000001;0.5275190516511431;
+38;55.82133784928027;0.5207451312447079;
+39;55.927180355630824;0.5245762711864407;
+40;56.15075164090621;0.5245762711864407;
+41;55.781448538754766;0.5288135593220339;
+42;56.15335733954671;0.5271186440677966;
+43;56.92796610169491;0.5203389830508475;
+44;56.525423728813564;0.5173876166242578;
+45;56.55859292222929;0.5165394402035624;
+46;56.358626536668076;0.5165394402035624;
+47;56.32817468730125;0.5148430873621713;
+48;56.59457167090755;0.5173876166242578;
+49;56.46734520780322;0.5118845500848896;
+50;56.182396606574756;0.5144312393887945;
--- a/plots/data/logistic_regression_only_rets.csv
+++ b/plots/data/logistic_regression_only_rets.csv
@ -0,0 +1,51 @@
+time_window;training_accuracy;testing_accuracy;
+1;50.69893594825787;0.5317195325542571;
+2;50.70951585976628;0.5317195325542571;
+3;50.688647746243745;0.5321637426900585;
+4;50.699227718639115;0.5321637426900585;
+5;50.68893528183715;0.5321637426900585;
+6;50.69951973272082;0.5321637426900585;
+7;50.71010860484545;0.5321637426900585;
+8;50.71010860484545;0.532608695652174;
+9;50.69981199080844;0.532608695652174;
+10;50.68951107396573;0.532608695652174;
+11;50.67920585161965;0.532608695652174;
+12;50.68979933110368;0.532608695652174;
+13;50.68979933110368;0.5330543933054394;
+14;50.700397240225804;0.5330543933054394;
+15;50.71099958176495;0.5330543933054394;
+16;50.700690232169;0.5330543933054394;
+17;50.7112970711297;0.5330543933054394;
+18;50.7112970711297;0.533500837520938;
+19;50.700983469345054;0.533500837520938;
+20;50.69066555043952;0.533500837520938;
+21;50.680343311701904;0.533500837520938;
+22;50.69095477386934;0.533500837520938;
+23;50.69095477386934;0.5331098072087175;
+24;50.680628272251305;0.5331098072087175;
+25;50.69124423963134;0.5331098072087175;
+26;50.68091347161114;0.5331098072087175;
+27;50.69153394803018;0.5331098072087175;
+28;50.69153394803018;0.5327181208053692;
+29;50.7021588765458;0.5327181208053692;
+30;50.712788259958074;0.5327181208053692;
+31;50.7234221010694;0.5327181208053692;
+32;50.73406040268457;0.5327181208053692;
+33;50.713087248322154;0.5331654072208228;
+34;50.72372561359345;0.5331654072208228;
+35;50.7343684431389;0.5331654072208228;
+36;50.724029380902415;0.5331654072208228;
+37;50.71368597816961;0.5331654072208228;
+38;50.73467674223342;0.5327731092436975;
+39;50.72433340331723;0.5327731092436975;
+40;50.734985300293985;0.5327731092436975;
+41;50.72463768115942;0.5327731092436975;
+42;50.71428571428571;0.5327731092436975;
+43;50.71428571428571;0.5323801513877208;
+44;50.724942214751;0.5323801513877208;
+45;50.735603194619586;0.5323801513877208;
+46;50.72524700441454;0.5323801513877208;
+47;50.71488645920942;0.5323801513877208;
+48;50.69386038687973;0.5328282828282829;
+49;50.683491062039955;0.5328282828282829;
+50;50.69415229280606;0.5328282828282829;
--- a/plots/data/logistic_regression_prices.csv
+++ b/plots/data/logistic_regression_prices.csv
@ -0,0 +1,51 @@
+time_window;training_accuracy;testing_accuracy;
+1;51.99248904652618;0.5375626043405676;
+2;51.85726210350584;0.5375626043405676;
+3;52.00333889816361;0.5388471177944862;
+4;52.01419327906491;0.5388471177944862;
+5;52.35908141962422;0.5355054302422724;
+6;52.62058884944665;0.5304928989139516;
+7;52.98663324979115;0.5371762740183793;
+8;52.7360066833751;0.5426421404682275;
+9;53.39461040317527;0.5409698996655519;
+10;53.489343919765986;0.5409698996655519;
+11;53.333333333333336;0.5426421404682275;
+12;53.992474916387955;0.5392976588628763;
+13;54.03428093645485;0.5414225941422595;
+14;54.15011499059168;0.5405857740585774;
+15;54.077791718946045;0.5414225941422595;
+16;54.19368332984731;0.5414225941422595;
+17;54.14225941422595;0.5397489539748954;
+18;53.59832635983264;0.5443886097152428;
+19;53.672316384180796;0.5443886097152428;
+20;54.353285893679356;0.5402010050251256;
+21;54.239062172911865;0.5385259631490787;
+22;54.082914572864325;0.5343383584589615;
+23;54.22948073701842;0.537300922045264;
+24;54.47120418848167;0.5356244761106455;
+25;54.18935902806871;0.5364626990779547;
+26;53.84454221663524;0.5356244761106455;
+27;54.40067057837384;0.539815590947192;
+28;54.90360435875943;0.5444630872483222;
+29;54.43303290714735;0.537751677852349;
+30;54.65408805031446;0.5427852348993288;
+31;54.26714195848186;0.5394295302013423;
+32;55.3481543624161;0.5436241610738255;
+33;55.39010067114094;0.5340050377833753;
+34;55.779316131739044;0.5197313182199832;
+35;55.77003776751993;0.5214105793450882;
+36;56.28541448058761;0.5264483627204031;
+37;56.36020151133502;0.5239294710327456;
+38;56.44416456759026;0.5235294117647059;
+39;56.37203443208062;0.5184873949579832;
+40;56.34187316253675;0.5235294117647059;
+41;56.24868725057761;0.5235294117647059;
+42;56.596638655462186;0.5126050420168067;
+43;56.76470588235294;0.5147182506307821;
+44;56.90271065349863;0.5138772077375946;
+45;56.284153005464475;0.5138772077375946;
+46;56.42211477822157;0.511354079058032;
+47;56.53910849453322;0.5079899074852817;
+48;56.749369217830115;0.5067340067340067;
+49;56.992639327024186;0.5092592592592593;
+50;56.56289440471182;0.51010101010101;
--- a/plots/logistic_regression_EMA.png
+++ b/plots/logistic_regression_EMA.png
--- a/plots/logistic_regression_SMA.png
+++ b/plots/logistic_regression_SMA.png
--- a/plots/logistic_regression_SMA_EMA_comparison.png
+++ b/plots/logistic_regression_SMA_EMA_comparison.png
--- a/plots/logistic_regression_first.png
+++ b/plots/logistic_regression_first.png
--- a/plotter.py
+++ b/plotter.py
@ -0,0 +1,61 @@
+import pandas as pd
+import numpy as np
+import seaborn as sns
+import matplotlib.pyplot as plt
+
+import os
+
+#bodacious colors
+colors=sns.color_palette("rocket", 8)
+#Ram's colors, if desired
+seshadri = ['#c3121e', '#0348a1', '#ffb01c', '#027608', '#0193b0', '#9c5300', '#949c01', '#7104b5']
+#            0sangre,   1neptune,  2pumpkin,  3clover,   4denim,    5cocoa,    6cumin,    7berry
+
+data = pd.read_csv("plots/data/MLP_20_10_5_2.csv", sep=";")
+#data = pd.read_csv("plots/data/logistic_regression.csv", sep=";")
+#data_SMA = pd.read_csv("plots/data/logistic_regression_SMA.csv", sep=";")
+#data_SMA_20_50 = pd.read_csv("plots/data/logistic_regression_SMA_20_50.csv", sep=";")
+#data_EMA = pd.read_csv("plots/data/logistic_regression_EMA.csv", sep=";")
+#data_EMA_20_50 = pd.read_csv("plots/data/logistic_regression_EMA_20_50.csv", sep=";")
+
+print(data)
+
+fig = plt.figure(1, figsize=(15,10))
+plt.plot(data["time_window"], data["training_accuracy"]*100, color=seshadri[0], label="Training Accuracy", linewidth=2)
+plt.plot(data["time_window"], data["testing_accuracy"]*100, color=seshadri[1], label="Testing Accuracy", linewidth=2)
+
+
+
+#plt.plot(data["time_window"], data["testing_accuracy"]*100, color=seshadri[0], label="Returns and Volume", linewidth=2)
+#plt.plot(data_SMA_20_50["time_window"], data_SMA_20_50["testing_accuracy"]*100, color=seshadri[1], label="With SMA 20 and 50 candles", linewidth=2)
+#plt.plot(data_SMA["time_window"], data_SMA["testing_accuracy"]*100, color=seshadri[2], label="With SMA 20, 50 and 200 candles", linewidth=2)
+#plt.plot(data_EMA_20_50["time_window"], data_EMA_20_50["testing_accuracy"]*100, color=seshadri[3], label="With EMA 20 and 50 candles", linewidth=2)
+#plt.plot(data_EMA["time_window"], data_EMA["testing_accuracy"]*100, color=seshadri[4], label="With EMA 20, 50 and 200 candles", linewidth=2)
+
+
+#plot params
+plt.xlim([0, 50])
+#plt.ylim([50, 60])
+plt.minorticks_on()
+plt.tick_params(labelsize=14)
+plt.tick_params(labelbottom=True, labeltop=False, labelright=False, labelleft=True)
+#xticks = np.arange(0, 1e4,10)
+#yticks = np.arange(0,16.1,4)
+
+plt.tick_params(direction='in',which='minor', length=5, bottom=True, top=True, left=True, right=True)
+plt.tick_params(direction='in',which='major', length=10, bottom=True, top=True, left=True, right=True)
+#plt.xticks(xticks)
+#plt.yticks(yticks)
+#plt.grid(True)
+
+#plt.text(1,325, f'y={Decimal(coefs[3]):.4f}x$^3$+{Decimal(coefs[2]):.2f}x$^2$+{Decimal(coefs[1]):.2f}x+{Decimal(coefs[0]):.1f}',fontsize =13)
+
+
+plt.xlabel(r'Lag (Days)', fontsize=14) 
+plt.ylabel(r'Accuracy (%)',fontsize=14)  # label the y axis
+
+plt.legend(fontsize=14, loc="upper right", bbox_to_anchor=(0.99, 0.99))  # add the legend (will default to 'best' location)
+
+plt.savefig("plots/MLP_20_10_5_2.png", dpi=300)
+
+plt.show()
--- a/simple_sign_prediction.py
+++ b/simple_sign_prediction.py
@ -0,0 +1,91 @@
+import pandas as pd
+import numpy as np
+import seaborn as sns
+import matplotlib.pyplot as plt
+
+import yfinance as yf
+from datetime import datetime
+
+import os
+
+#bodacious colors
+colors=sns.color_palette("rocket", 8)
+#Ram's colors, if desired
+seshadri = ['#c3121e', '#0348a1', '#ffb01c', '#027608', '#0193b0', '#9c5300', '#949c01', '#7104b5']
+#            0sangre,   1neptune,  2pumpkin,  3clover,   4denim,    5cocoa,    6cumin,    7berry
+
+#stock_data = pd.read_csv("data/daily_MSFT.csv").iloc[::-1].reset_index(drop=True)
+
+if os.path.isfile("data/MSFT_data.pkl"):
+    stock_data = pd.read_pickle("data/MSFT_data.pkl")
+elif os.path.isfile("data/MSFT_data.csv"):
+    stock_data = pd.read_csv("data/MSFT_data.csv")
+else:
+    start_date = datetime(2000, 1, 1)
+    end_date = datetime(2023, 10, 26)
+    stock_data = yf.download('MSFT', start=start_date, end=end_date)
+    stock_data.to_pickle("data/MSFT_data.pkl")
+    stock_data.to_csv("data/MSFT_data.csv")
+
+
+
+
+daily_returns = stock_data["Close"] - stock_data["Open"]
+win_lose = np.zeros(daily_returns.size - 1)
+
+for index, return_ in enumerate(daily_returns[:-1]):
+    if (return_ > 0 and daily_returns[index + 1] > 0) or (return_ < 0 and daily_returns[index + 1] < 0):
+        win_lose[index] = 1
+    else:
+        win_lose[index] = 0
+
+
+win_rate = np.count_nonzero(win_lose == 1) / win_lose.size
+
+print(win_rate)
+
+percent_returns = daily_returns / stock_data["Open"] * 100
+
+fig = plt.figure(1, figsize=(15,10))
+plt.hist(percent_returns, bins = 120, range=(-12,12), facecolor=seshadri[0], alpha=0.8, edgecolor="white", label="Percentage daily returns occurrences")
+
+#plt.plot(stock_data.index, stock_data["Close"], linestyle="-", color=seshadri[0])
+#plt.plot(stock_data.index, stock_data["Adj Close"], linestyle="-", color=seshadri[1])
+#plt.plot(stock_data.index, stock_data["close"] - 20, linestyle="-", color=seshadri[2])
+#plt.plot(stock_data.index, stock_data["close"] - 30, linestyle="-", color=seshadri[3])
+#plt.plot(stock_data.index, stock_data["close"] - 40, linestyle="-", color=seshadri[4])
+#plt.plot(stock_data.index, stock_data["close"] - 50, linestyle="-", color=seshadri[5])
+#plt.plot(stock_data.index, stock_data["close"] - 60, linestyle="-", color=seshadri[6])
+#plt.plot(stock_data.index, stock_data["close"] - 70, linestyle="-", color=seshadri[7])
+#plt.show()
+
+
+#plot params
+plt.xlim([-12,12])
+#plt.ylim([-0.5,16])
+plt.minorticks_on()
+plt.tick_params(labelsize=14)
+plt.tick_params(labelbottom=True, labeltop=False, labelright=False, labelleft=True)
+#xticks = np.arange(0, 1e4,10)
+#yticks = np.arange(0,16.1,4)
+
+plt.tick_params(direction='in',which='minor', length=5, bottom=True, top=True, left=True, right=True)
+plt.tick_params(direction='in',which='major', length=10, bottom=True, top=True, left=True, right=True)
+#plt.xticks(xticks)
+#plt.yticks(yticks)
+
+
+#plt.text(1,325, f'y={Decimal(coefs[3]):.4f}x$^3$+{Decimal(coefs[2]):.2f}x$^2$+{Decimal(coefs[1]):.2f}x+{Decimal(coefs[0]):.1f}',fontsize =13)
+
+
+plt.xlabel(r'Percentage daily return', fontsize=14) 
+plt.ylabel(r'Occurrences',fontsize=14)  # label the y axis
+
+plt.yscale('log')
+
+plt.legend(fontsize=14, loc="upper right", bbox_to_anchor=(0.99, 0.99))  # add the legend (will default to 'best' location)
+
+plt.savefig("plots/MSFT_daily_occs_semilogx.png", dpi=300)
+
+plt.show()
+