dev #1

Merged
master_roby3 merged 7 commits from dev into main 2024-05-11 21:19:15 +00:00
51 changed files with 26440 additions and 0 deletions

222
LSTM.py 100644
View File

@ -0,0 +1,222 @@
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import yfinance as yf
from datetime import datetime
import os, sys
from sklearn import preprocessing
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
#bodacious colors
colors=sns.color_palette("rocket", 8)
#Ram's colors, if desired
seshadri = ['#c3121e', '#0348a1', '#ffb01c', '#027608', '#0193b0', '#9c5300', '#949c01', '#7104b5']
# 0sangre, 1neptune, 2pumpkin, 3clover, 4denim, 5cocoa, 6cumin, 7berry
def enlarge_lag(to_enlarge, time_window=1):
# to_enlarge is the data already present, should be a numpy array
enlarged = []
for i in range(to_enlarge.shape[0] - time_window + 1):
new_element = []
for j in range(time_window):
new_element.extend(to_enlarge[i + time_window - 1 - j, :])
enlarged.append(new_element)
return np.array(enlarged)
#### Calculate the metrics RMSE and MAPE ####
def calculate_rmse(y_true, y_pred):
"""
Calculate the Root Mean Squared Error (RMSE)
"""
rmse = np.sqrt(np.mean((y_true - y_pred) ** 2))
return rmse
def calculate_mape(y_true, y_pred):
"""
Calculate the Mean Absolute Percentage Error (MAPE) %
"""
y_pred, y_true = np.array(y_pred), np.array(y_true)
mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
return mape
train_quota = 0.8
if len(sys.argv) > 1:
time_window = int(sys.argv[1])
else:
time_window = 1
#time_window = 10
stock_data = pd.read_pickle("data/MSFT_data.pkl")
price = stock_data["Close"].to_numpy()
volume = stock_data["Volume"].to_numpy()
daily_returns = ((stock_data["Close"] - stock_data["Open"]) / stock_data["Open"]).to_numpy()
minmax_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
features = np.vstack((price, volume)).T
# Necessary for MAs
norm_features = minmax_scaler.fit_transform(price.reshape(-1, 1))
# merge data into 2d numpy array
Y = np.zeros(features.shape[0] - 1)
for i in range(Y.size):
Y[i] = norm_features[i+1, 0]
time_window = 20
if time_window > 1:
norm_features = enlarge_lag(norm_features, time_window)
Y = Y[time_window-1:]
print(norm_features.shape, Y.shape)
train_size = int(norm_features.shape[0] * 0.8)
X_train = norm_features[:train_size, ]
Y_train = Y[:train_size]
X_test = norm_features[train_size:-1, ]
Y_test = Y[train_size:]
def LSTM_model():
model = Sequential()
model.add(LSTM(units = 50, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=50))
model.add(Dropout(0.2))
model.add(Dense(units=1))
return model
model = LSTM_model()
model.summary()
model.compile(
optimizer="adam",
loss="mean_squared_error"
)
# Save weights only for best model
checkpointer = ModelCheckpoint(
filepath = 'weights_best.hdf5',
verbose = 2,
save_best_only = True
)
if os.path.exists("./checkpoints/checkpoint"):
model.load_weights("./checkpoints/my_checkpoint")
else:
model.fit(
X_train,
Y_train,
epochs=25,
batch_size = 32,
callbacks = [checkpointer]
)
model.save_weights("./checkpoints/my_checkpoint")
prediction = model.predict(X_test)
predicted_prices = minmax_scaler.inverse_transform(prediction).flatten()
counter = 0
#for i in range(prediction.shape[0]-1):
# if (prediction[i+1,] - prediction[i,] > 0 and predicted_prices[i+1,] - predicted_prices[i,] > 0) or (prediction[i+1,] - prediction[i,] < 0 and predicted_prices[i+1,] - predicted_prices[i,] < 0):
# counter = counter + 1
#print("acc: ", counter/prediction.shape[0])
test_prices = price[time_window - 1 + train_size:]
pred_ret = []
actual_ret = []
for j in range(len(test_prices) - 1):
# il predicted price è il prezzo di domani, lo voglio confrontare con il ritorno effettivo di domani
pred_ret.append((predicted_prices[j] - test_prices[j])/test_prices[j])
actual_ret.append((test_prices[j+1] - test_prices[j])/test_prices[j])
pred_ret_np = np.array(pred_ret)
actual_ret_np = np.array(actual_ret)
sign_comp = np.sum(np.sign(pred_ret_np) == np.sign(actual_ret_np))/len(pred_ret_np)
sign_comp_red_nottoomuch = np.sum(np.sign(pred_ret_np[:200]) == np.sign(actual_ret_np[:200]))/len(pred_ret_np[:200])
sign_comp_red = np.sum(np.sign(pred_ret_np[:100]) == np.sign(actual_ret_np[:100]))/len(pred_ret_np[:100])
sign_comp_red_alot = np.sum(np.sign(pred_ret_np[:50]) == np.sign(actual_ret_np[:50]))/len(pred_ret_np[:50])
print(sign_comp)
print(sign_comp_red_nottoomuch)
print(sign_comp_red)
print(sign_comp_red_alot)
rmse = calculate_rmse(test_prices[1:], predicted_prices)
mape = calculate_mape(test_prices[1:], predicted_prices)
print("RMSE: ", rmse)
print("MAPE: ", mape)
rmse = calculate_rmse(test_prices[1:301], predicted_prices[:300])
mape = calculate_mape(test_prices[1:301], predicted_prices[:300])
print("RMSE su 300 gg: ", rmse)
print("MAPE su 300 gg: ", mape)
#plt.plot(pred_ret, color=seshadri[0])
#plt.plot(daily_returns[1:], color=seshadri[1])
fig = plt.figure(1, figsize=(12,10))
plt.plot(test_prices, color=seshadri[0], label="Registered Closing Price")
plt.plot(predicted_prices, color=seshadri[1], label="Prediction")
#plot params
plt.xlim([0,1200])
plt.ylim([100,400])
plt.minorticks_on()
plt.tick_params(labelsize=14)
plt.tick_params(labelbottom=True, labeltop=False, labelright=False, labelleft=True)
#xticks = np.arange(0, 1e4,10)
#yticks = np.arange(0,16.1,4)
plt.tick_params(direction='in',which='minor', length=5, bottom=True, top=True, left=True, right=True)
plt.tick_params(direction='in',which='major', length=10, bottom=True, top=True, left=True, right=True)
#plt.xticks(xticks)
#plt.yticks(yticks)
#plt.text(1,325, f'y={Decimal(coefs[3]):.4f}x$^3$+{Decimal(coefs[2]):.2f}x$^2$+{Decimal(coefs[1]):.2f}x+{Decimal(coefs[0]):.1f}',fontsize =13)
plt.xlabel(r'Days (from last training)', fontsize=14)
plt.ylabel(r'Price (USD)',fontsize=14) # label the y axis
plt.legend(fontsize=14, loc="upper right", bbox_to_anchor=(0.99, 0.99)) # add the legend (will default to 'best' location)
plt.savefig("plots/First_Attempt_LSTM_2.png", dpi=300)
plt.show()
#with open("plots/data/MLP_20_10_5_2.csv", "a") as f:
# f.write(f"{time_window};{train_score};{score};\n")

238
LSTM_advanced.py 100644
View File

@ -0,0 +1,238 @@
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import yfinance as yf
from datetime import datetime
import os, sys
from sklearn import preprocessing
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, Input
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
#bodacious colors
colors=sns.color_palette("rocket", 8)
#Ram's colors, if desired
seshadri = ['#c3121e', '#0348a1', '#ffb01c', '#027608', '#0193b0', '#9c5300', '#949c01', '#7104b5']
# 0sangre, 1neptune, 2pumpkin, 3clover, 4denim, 5cocoa, 6cumin, 7berry
np.set_printoptions(threshold=100)
def enlarge_lag(to_enlarge, time_window=1):
# to_enlarge is the data already present, should be a numpy array
enlarged = []
for i in range(to_enlarge.shape[0] - time_window + 1):
new_element = []
for j in range(time_window):
new_element.extend(to_enlarge[i + time_window - 1 - j, :])
enlarged.append(new_element)
return np.array(enlarged)
#### Calculate the metrics RMSE and MAPE ####
def calculate_rmse(y_true, y_pred):
"""
Calculate the Root Mean Squared Error (RMSE)
"""
rmse = np.sqrt(np.mean((y_true - y_pred) ** 2))
return rmse
def calculate_mape(y_true, y_pred):
"""
Calculate the Mean Absolute Percentage Error (MAPE) %
"""
y_pred, y_true = np.array(y_pred), np.array(y_true)
mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
return mape
train_quota = 0.8
if len(sys.argv) > 1:
time_window = int(sys.argv[1])
else:
time_window = 1
#time_window = 10
stock_data = pd.read_pickle("data/MSFT_data.pkl")
price = stock_data["Close"].to_numpy()
volume = stock_data["Volume"].to_numpy()
daily_returns = ((stock_data["Close"] - stock_data["Open"]) / stock_data["Open"]).to_numpy()
minmax_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
sec_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
#features = np.vstack((price, volume)).T
# Necessary for MAs
#norm_features = np.hstack((minmax_scaler.fit_transform(price.reshape(-1, 1)), sec_scaler.fit_transform(volume.reshape(-1, 1))))
norm_features = minmax_scaler.fit_transform(price.reshape(-1, 1))
rets = np.diff(price)
bin_rets = np.zeros(len(rets))
for i, r in enumerate(rets):
if r >= 0:
bin_rets[i] = 1
else:
bin_rets[i] = 0
bin_rets_np = np.array(bin_rets)
#norm_rets = sec_scaler.fit_transform(rets.reshape(-1, 1))
print("occai")
print(rets)
print(bin_rets)
print("ocai")
# merge data into 2d numpy array
#Y = np.zeros(norm_features.shape[0] - 1)
#for i in range(Y.size):
# Y[i] = norm_features[i+1, 0]
Y = bin_rets
time_window = 20
if time_window > 1:
norm_features = enlarge_lag(norm_features, time_window)
Y = Y[time_window-1:]
train_size = int(norm_features.shape[0] * 0.8)
X_train = norm_features[:train_size, ]
Y_train = Y[:train_size]
X_test = norm_features[train_size:-1, ]
Y_test = Y[train_size:]
def LSTM_model():
model = Sequential()
model.add(LSTM(units = 20, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(Dropout(0.2))
#model.add(LSTM(units=50, return_sequences=True))
#model.add(Dropout(0.2))
model.add(LSTM(units=20))
model.add(Dropout(0.2))
model.add(Dense(units=5))
model.add(Dropout(0.3))
model.add(Dense(units=1, activation="sigmoid"))
return model
model = LSTM_model()
model.summary()
model.compile(
optimizer="adam",
loss="mean_squared_error"
)
#if os.path.exists("./checkpoints/checkpoint"):
# model.load_weights("./checkpoints/my_checkpoint")
#else:
model.fit(
X_train,
Y_train,
shuffle=True,
epochs=20,
batch_size=20
)
#model.save_weights("./checkpoints/my_checkpoint")
prediction = model.predict(X_test)
print(prediction)
print(model.evaluate(X_test, Y_test))
#predicted_prices = minmax_scaler.inverse_transform(prediction).flatten()
#predicted_rets = sec_scaler.inverse_transform(prediction).flatten()
#print(predicted_rets)
#counter = 0
#for i in range(prediction.shape[0]-1):
# if (prediction[i+1,] - prediction[i,] > 0 and predicted_prices[i+1,] - predicted_prices[i,] > 0) or (prediction[i+1,] - prediction[i,] < 0 and predicted_prices[i+1,] - predicted_prices[i,] < 0):
# counter = counter + 1
#print("acc: ", counter/prediction.shape[0])
#test_prices = price[time_window - 1 + train_size:]
#pred_ret = []
#actual_ret = []
#for j in range(len(test_prices) - 1):
# # il predicted price è il prezzo di domani, lo voglio confrontare con il ritorno effettivo di domani
# pred_ret.append((predicted_prices[j] - test_prices[j])/test_prices[j])
# actual_ret.append((test_prices[j+1] - test_prices[j])/test_prices[j])
#
#pred_ret_np = np.array(pred_ret)
#actual_ret_np = np.array(actual_ret)
#
#sign_comp = np.sum(np.sign(pred_ret_np) == np.sign(actual_ret_np))/len(pred_ret_np)
#sign_comp_red_nottoomuch = np.sum(np.sign(pred_ret_np[:200]) == np.sign(actual_ret_np[:200]))/len(pred_ret_np[:200])
#sign_comp_red = np.sum(np.sign(pred_ret_np[:100]) == np.sign(actual_ret_np[:100]))/len(pred_ret_np[:100])
#sign_comp_red_alot = np.sum(np.sign(pred_ret_np[:50]) == np.sign(actual_ret_np[:50]))/len(pred_ret_np[:50])
#print(sign_comp)
#print(sign_comp_red_nottoomuch)
#print(sign_comp_red)
#print(sign_comp_red_alot)
#rmse = calculate_rmse(test_prices[1:], predicted_prices)
#mape = calculate_mape(test_prices[1:], predicted_prices)
#
#print("RMSE: ", rmse)
#print("MAPE: ", mape)
#
#rmse = calculate_rmse(test_prices[1:301], predicted_prices[:300])
#mape = calculate_mape(test_prices[1:301], predicted_prices[:300])
#
#print("RMSE su 300 gg: ", rmse)
#print("MAPE su 300 gg: ", mape)
#plt.plot(pred_ret, color=seshadri[0])
#plt.plot(daily_returns[1:], color=seshadri[1])
fig = plt.figure(1, figsize=(12,10))
plt.plot(Y_test, color=seshadri[0], label="Registered Closing Price")
plt.plot(prediction, color=seshadri[1], label="Prediction")
#plot params
#plt.xlim([0,450])
#plt.ylim([-0.5,16])
plt.minorticks_on()
plt.tick_params(labelsize=14)
plt.tick_params(labelbottom=True, labeltop=False, labelright=False, labelleft=True)
#xticks = np.arange(0, 1e4,10)
#yticks = np.arange(0,16.1,4)
plt.tick_params(direction='in',which='minor', length=5, bottom=True, top=True, left=True, right=True)
plt.tick_params(direction='in',which='major', length=10, bottom=True, top=True, left=True, right=True)
#plt.xticks(xticks)
#plt.yticks(yticks)
#plt.text(1,325, f'y={Decimal(coefs[3]):.4f}x$^3$+{Decimal(coefs[2]):.2f}x$^2$+{Decimal(coefs[1]):.2f}x+{Decimal(coefs[0]):.1f}',fontsize =13)
plt.xlabel(r'Days (from last training)', fontsize=14)
plt.ylabel(r'Price (USD)',fontsize=14) # label the y axis
plt.legend(fontsize=14, loc="upper right", bbox_to_anchor=(0.99, 0.99)) # add the legend (will default to 'best' location)
plt.savefig("plots/LSTM_advanced_rets_1.png", dpi=300)
plt.show()
#with open("plots/data/MLP_20_10_5_2.csv", "a") as f:
# f.write(f"{time_window};{train_score};{score};\n")

View File

@ -0,0 +1,230 @@
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import yfinance as yf
from datetime import datetime
import os, sys
from sklearn import preprocessing
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, Input
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
#bodacious colors
colors=sns.color_palette("rocket", 8)
#Ram's colors, if desired
seshadri = ['#c3121e', '#0348a1', '#ffb01c', '#027608', '#0193b0', '#9c5300', '#949c01', '#7104b5']
# 0sangre, 1neptune, 2pumpkin, 3clover, 4denim, 5cocoa, 6cumin, 7berry
np.set_printoptions(threshold=1000000)
def enlarge_lag(to_enlarge, time_window=1):
# to_enlarge is the data already present, should be a numpy array
enlarged = []
for i in range(to_enlarge.shape[0] - time_window + 1):
new_element = []
for j in range(time_window):
new_element.extend(to_enlarge[i + time_window - 1 - j, :])
enlarged.append(new_element)
return np.array(enlarged)
train_quota = 0.8
if len(sys.argv) > 1:
time_window = int(sys.argv[1])
else:
time_window = 1
stock_data = pd.read_pickle("data/MSFT_data.pkl")
price = stock_data["Close"].to_numpy()
volume = stock_data["Volume"].to_numpy()
#minmax_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
minmax_scaler = preprocessing.StandardScaler()
sec_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
#EMA_20 = stock_data["Close"].ewm(span=20, adjust=False).mean()
#EMA_50 = stock_data["Close"].ewm(span=50, adjust=False).mean()
#EMAs = np.vstack((EMA_20, EMA_50)).T
#norm_EMAs = minmax_scaler.fit_transform(EMAs.reshape(-1, 1)).reshape(-1, 2)
#EMA_200 = stock_data["Close"].ewm(span=200, adjust=False).mean()
#EMAs = np.vstack((EMA_20, EMA_50, EMA_200)).T
#norm_EMAs = minmax_scaler.fit_transform(EMAs.reshape(-1, 1)).reshape(-1, 3)
# Necessary for MAs
#norm_features = np.hstack((minmax_scaler.fit_transform(price.reshape(-1, 1)), sec_scaler.fit_transform(volume.reshape(-1, 1))))
norm_features = minmax_scaler.fit_transform(np.vstack((price, volume)).T)
#norm_features = np.hstack((norm_features, norm_EMAs))
rets = np.diff(price)
bin_rets = np.zeros(len(rets))
for i, r in enumerate(rets):
if r >= 0:
bin_rets[i] = 1
else:
bin_rets[i] = 0
bin_rets_np = np.array(bin_rets)
#norm_rets = sec_scaler.fit_transform(rets.reshape(-1, 1))
print("occai")
print(rets)
print(bin_rets)
print("ocai")
# merge data into 2d numpy array
#Y = np.zeros(norm_features.shape[0] - 1)
#for i in range(Y.size):
# Y[i] = norm_features[i+1, 0]
Y = bin_rets
time_window = 3
if time_window > 1:
norm_features = enlarge_lag(norm_features, time_window)
Y = Y[time_window-1:]
train_size = int(norm_features.shape[0] * 0.8)
X_train = norm_features[:train_size, ]
Y_train = Y[:train_size].reshape(-1, 1)
X_test = norm_features[train_size:-1, ]
Y_test = Y[train_size:].reshape(-1, 1)
def LSTM_model():
model = Sequential()
model.add(LSTM(units = 20, input_shape=(X_train.shape[1], 1)))
#model.add(Dense(units = 20, activation="relu", input_shape=(X_train.shape[1],)))
#model.add(Dropout(0.3))
#model.add(LSTM(units=50, return_sequences=True))
#model.add(Dropout(0.2))
model.add(Dense(units=10, activation="relu"))
model.add(Dense(units=5, activation="relu"))
model.add(Dense(units=1, activation="sigmoid"))
return model
model = LSTM_model()
model.summary()
model.compile(
optimizer="adam",
loss="binary_crossentropy",
metrics=['accuracy']
)
#if os.path.exists("./checkpoints/checkpoint"):
# model.load_weights("./checkpoints/my_checkpoint")
#else:
model.fit(
X_train,
Y_train,
shuffle=True,
epochs=50,
batch_size=32
)
#model.save_weights("./checkpoints/my_checkpoint")
prediction = model.predict(X_test).flatten()
print("pred: ", prediction)
print(model.evaluate(X_test, Y_test))
#predicted_prices = minmax_scaler.inverse_transform(prediction).flatten()
#predicted_rets = sec_scaler.inverse_transform(prediction).flatten()
#print(predicted_rets)
#counter = 0
#for i in range(prediction.shape[0]-1):
# if (prediction[i+1,] - prediction[i,] > 0 and predicted_prices[i+1,] - predicted_prices[i,] > 0) or (prediction[i+1,] - prediction[i,] < 0 and predicted_prices[i+1,] - predicted_prices[i,] < 0):
# counter = counter + 1
#print("acc: ", counter/prediction.shape[0])
#test_prices = price[time_window - 1 + train_size:]
#pred_ret = []
#actual_ret = []
#for j in range(len(test_prices) - 1):
# # il predicted price è il prezzo di domani, lo voglio confrontare con il ritorno effettivo di domani
# pred_ret.append((predicted_prices[j] - test_prices[j])/test_prices[j])
# actual_ret.append((test_prices[j+1] - test_prices[j])/test_prices[j])
#
#pred_ret_np = np.array(pred_ret)
#actual_ret_np = np.array(actual_ret)
#
#sign_comp = np.sum(np.sign(pred_ret_np) == np.sign(actual_ret_np))/len(pred_ret_np)
#sign_comp_red_nottoomuch = np.sum(np.sign(pred_ret_np[:200]) == np.sign(actual_ret_np[:200]))/len(pred_ret_np[:200])
#sign_comp_red = np.sum(np.sign(pred_ret_np[:100]) == np.sign(actual_ret_np[:100]))/len(pred_ret_np[:100])
#sign_comp_red_alot = np.sum(np.sign(pred_ret_np[:50]) == np.sign(actual_ret_np[:50]))/len(pred_ret_np[:50])
#print(sign_comp)
#print(sign_comp_red_nottoomuch)
#print(sign_comp_red)
#print(sign_comp_red_alot)
#rmse = calculate_rmse(test_prices[1:], predicted_prices)
#mape = calculate_mape(test_prices[1:], predicted_prices)
#
#print("RMSE: ", rmse)
#print("MAPE: ", mape)
#
#rmse = calculate_rmse(test_prices[1:301], predicted_prices[:300])
#mape = calculate_mape(test_prices[1:301], predicted_prices[:300])
#
#print("RMSE su 300 gg: ", rmse)
#print("MAPE su 300 gg: ", mape)
#plt.plot(pred_ret, color=seshadri[0])
#plt.plot(daily_returns[1:], color=seshadri[1])
fig = plt.figure(1, figsize=(12,10))
plt.plot(Y_test, color=seshadri[0], label="Registered Closing Price")
plt.plot(prediction, color=seshadri[1], label="Prediction")
#plot params
#plt.xlim([0,450])
#plt.ylim([-0.5,16])
plt.minorticks_on()
plt.tick_params(labelsize=14)
plt.tick_params(labelbottom=True, labeltop=False, labelright=False, labelleft=True)
#xticks = np.arange(0, 1e4,10)
#yticks = np.arange(0,16.1,4)
plt.tick_params(direction='in',which='minor', length=5, bottom=True, top=True, left=True, right=True)
plt.tick_params(direction='in',which='major', length=10, bottom=True, top=True, left=True, right=True)
#plt.xticks(xticks)
#plt.yticks(yticks)
#plt.text(1,325, f'y={Decimal(coefs[3]):.4f}x$^3$+{Decimal(coefs[2]):.2f}x$^2$+{Decimal(coefs[1]):.2f}x+{Decimal(coefs[0]):.1f}',fontsize =13)
plt.xlabel(r'Days (from last training)', fontsize=14)
plt.ylabel(r'Price (USD)',fontsize=14) # label the y axis
plt.legend(fontsize=14, loc="upper right", bbox_to_anchor=(0.99, 0.99)) # add the legend (will default to 'best' location)
plt.savefig("plots/LSTM_advanced_rets_1.png", dpi=300)
plt.show()
#with open("plots/data/MLP_20_10_5_2.csv", "a") as f:
# f.write(f"{time_window};{train_score};{score};\n")

View File

@ -0,0 +1,119 @@
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import yfinance as yf
from datetime import datetime
import os, sys
from sklearn import preprocessing
#bodacious colors
colors=sns.color_palette("rocket", 8)
#Ram's colors, if desired
seshadri = ['#c3121e', '#0348a1', '#ffb01c', '#027608', '#0193b0', '#9c5300', '#949c01', '#7104b5']
# 0sangre, 1neptune, 2pumpkin, 3clover, 4denim, 5cocoa, 6cumin, 7berry
train_quota = 0.8
def enlarge_lag(to_enlarge, time_window=1):
# to_enlarge is the data already present, should be a numpy array
enlarged = []
for i in range(to_enlarge.shape[0] - time_window + 1):
new_element = []
for j in range(time_window):
new_element.extend(to_enlarge[i + time_window - 1 - j, :])
enlarged.append(new_element)
return np.array(enlarged)
if len(sys.argv) > 1:
time_window = int(sys.argv[1])
else:
time_window = 1
#time_window = 10
stock_data = pd.read_pickle("data/MSFT_data.pkl")
daily_returns = ((stock_data["Close"] - stock_data["Open"]) / stock_data["Open"]).to_numpy()
prices = stock_data[["Open", "High", "Low", "Close"]].to_numpy()
volume = stock_data["Volume"].to_numpy()
minmax_scaler = preprocessing.MinMaxScaler()
std_scaler = preprocessing.StandardScaler()
features = np.vstack((daily_returns, volume)).T
# Necessary for MAs
part_features = std_scaler.fit_transform(features)
# Aggiunta EMA
EMA_20 = stock_data["Close"].ewm(span=20, adjust=False).mean()
EMA_50 = stock_data["Close"].ewm(span=50, adjust=False).mean()
EMAs = np.vstack((EMA_20, EMA_50)).T
norm_EMAs = minmax_scaler.fit_transform(EMAs.reshape(-1, 1)).reshape(-1, 2)
#EMA_200 = stock_data["Close"].ewm(span=200, adjust=False).mean()
#EMAs = np.vstack((EMA_20, EMA_50, EMA_200)).T
#norm_EMAs = minmax_scaler.fit_transform(EMAs.reshape(-1, 1)).reshape(-1, 3)
norm_features = np.hstack((part_features, norm_EMAs))
# merge data into 2d numpy array
Y = np.zeros(features.shape[0] - 1)
for i in range(Y.size):
if daily_returns[i+1] >= 0:
Y[i] = 1
else:
Y[i] = 0
# per quando su usano ma fino a 200
#Y = Y[49:]
#Y = Y[199:]
print(norm_features.shape, Y.shape)
if time_window > 1:
norm_features = enlarge_lag(norm_features, time_window)
Y = Y[time_window-1:]
train_size = int(norm_features.shape[0] * 0.8)
X_train = norm_features[:train_size, ]
Y_train = Y[:train_size]
X_test = norm_features[train_size:-1, ]
Y_test = Y[train_size:]
# Iterations vs Accuracy plot
#plt.figure()
#plt.plot(np.arange(0, len(acc_array)) * 100, acc_array)
#plt.xlabel("Iterations")
#plt.ylabel("Accuracy")
#
## Iterations vs Loss plot
#plt.figure()
#plt.plot(np.arange(0, len(acc_array)) * 100, losses)
#plt.xlabel("Iterations")
#plt.ylabel("Losses")
#
#plt.show()
#lets try sklearn
from sklearn.neural_network import MLPClassifier
#classifier = LogisticRegression(random_state=0, solver="saga").fit(X_train, Y_train)
clf = MLPClassifier(hidden_layer_sizes=(20,10,5,2), max_iter=30000, verbose=True).fit(X_train, Y_train)
train_score = clf.score(X_train, Y_train)
score = clf.score(X_test, Y_test)
print("sklearn score, all default: ", score, " train ", train_score)
with open("plots/data/MLP_20_10_5_2.csv", "a") as f:
f.write(f"{time_window};{train_score};{score};\n")

View File

@ -0,0 +1,7 @@
# Stock Price Prediction
This is a simple project of a stock price prediction tool that tests various configurations.
The final goal is to study performance improvement changing the technology and the methods used.
Final try

145
appunti.md 100644
View File

@ -0,0 +1,145 @@
# Appunti sullo sviluppo del progetto
Il titolo di riferimento per la prima parte di studio è Microsoft. Perché:
* molto capitalizzato
* longevo
* è un titolo tech ma non subisce dinamiche troppo "strane" rispetto al normale andamento di mercato (es. Tesla)
Per prima cosa si testa la performance di un modello non trainato, semplice, che prova a predire il segno del ritorno del giorno successivo sulla base del ritorno del giorno precedente (+ segue + e - segue -).
Si riporta anche un piccolo grafico a barre per avere un'idea della distribuzione dei ritorni.
winrate detected: 0.47638123852445335
Primi test con logistic regression, aggiungendo come features giorni passati: lieve increase, troppi giorni porta a overfitting
provare a effettuare lo stesso test ma aggiungendo qualche metrica (es. moving average) -> C'è un lieve miglioramento
Nell'MLP, nei nomi dei file di dati, i numeri sono la dimensione degli hidden layer, in ordine di profondità
Primo test semplice semplice, architettura di seguito:
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
lstm (LSTM) (None, 20, 50) 10400
dropout (Dropout) (None, 20, 50) 0
lstm_1 (LSTM) (None, 20, 50) 20200
dropout_1 (Dropout) (None, 20, 50) 0
lstm_2 (LSTM) (None, 50) 20200
dropout_2 (Dropout) (None, 50) 0
dense (Dense) (None, 1) 51
=================================================================
Total params: 50,851
Trainable params: 50,851
Non-trainable params: 0
semplici 25 epoche e split 0.8 / 0.2
il plot che si ottiene è quello
con dati (win rate sui ritorni):
tutto il testing set (): 0.4991624790619765
primi 200 giorni: 0.605
primi 100 giorni: 0.58
primi 50 giorni: 0.66
su tutto ho
RMSE: 76.4 (dollari ?)
MAPE: 21.8 %
su 300 giorni:
RMSE su 300 gg: 6.4 $
MAPE su 300 gg: 2.9 %
In presentazione metto prima grafico con mape e rmse su tutto facendo considerazioni, poi mi allargo con mape e rmse specifiche + winrate su meno giorni
nel firts advanced l'architettura è:
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
lstm (LSTM) (None, 10, 10) 480
dropout (Dropout) (None, 10, 10) 0
lstm_1 (LSTM) (None, 10) 840
dropout_1 (Dropout) (None, 10) 0
dense (Dense) (None, 5) 55
dropout_2 (Dropout) (None, 5) 0
dense_1 (Dense) (None, 1) 6
=================================================================
Total params: 1,381
Trainable params: 1,381
Non-trainable params: 0
_________________________________________________________________
LTSM advanced 2: training data ridotta a 2000 giorni, arch:
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
lstm (LSTM) (None, 10, 20) 1760
dropout (Dropout) (None, 10, 20) 0
lstm_1 (LSTM) (None, 20) 3280
dropout_1 (Dropout) (None, 20) 0
dense (Dense) (None, 5) 105
dropout_2 (Dropout) (None, 5) 0
dense_1 (Dense) (None, 1) 6
=================================================================
Total params: 5,151
Trainable params: 5,151
Non-trainable params: 0
risultati
RMSE: 10.799429328578809
MAPE: 3.1894335488381116
RMSE su 300 gg: 11.607057105021592
MAPE su 300 gg: 3.591834377775106
training di 50 epoche
ma win rate sul ritorno del giorno dopo sempre ~0.5
Il 3 ha un'architettura molto semplificata e tiene una timewindow di soli 5 gg:
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
lstm (LSTM) (None, 10) 480
dropout (Dropout) (None, 10) 0
dense (Dense) (None, 1) 11
=================================================================
Total params: 491
Trainable params: 491
Non-trainable params: 0
_________________________________________________________________
RMSE: 12.955399161548117
MAPE: 3.7480157718302904
RMSE su 300 gg: 11.019121338505466
MAPE su 300 gg: 3.3382726092879706
non si guadagna molto in winrate
semilog histo

134
autocorr_plot.py 100644
View File

@ -0,0 +1,134 @@
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import yfinance as yf
from datetime import datetime
import os, sys
from sklearn import preprocessing
#bodacious colors
colors=sns.color_palette("rocket", 8)
#Ram's colors, if desired
seshadri = ['#c3121e', '#0348a1', '#ffb01c', '#027608', '#0193b0', '#9c5300', '#949c01', '#7104b5']
# 0sangre, 1neptune, 2pumpkin, 3clover, 4denim, 5cocoa, 6cumin, 7berry
stock_data = pd.read_pickle("data/MSFT_data.pkl")
daily_returns = ((stock_data["Close"] - stock_data["Open"]) / stock_data["Open"]).to_numpy() * 100
prices = stock_data[["Open", "High", "Low", "Close"]].to_numpy()
volume = stock_data["Volume"].to_numpy()
minmax_scaler = preprocessing.MinMaxScaler()
std_scaler = preprocessing.StandardScaler()
features = np.vstack((daily_returns, volume)).T
# Scale volume data to obtain better results
#minmax_scaler = preprocessing.MinMaxScaler()
#norm_ret = std_scaler.fit_transform(daily_returns.reshape(-1,1)).flatten()
#norm_vol = minmax_scaler.fit_transform(volume.reshape(-1,1)).flatten()
#norm_features = np.vstack((norm_ret, norm_vol)).T
# Solo volumi e ritorni
#norm_features = std_scaler.fit_transform(features)
# Aggiunta di prezzi
#norm_prices = minmax_scaler.fit_transform(prices.reshape(-1, 1)).reshape(-1, 4)
#norm_ret_and_vol = std_scaler.fit_transform(features)
#norm_features = np.hstack((norm_ret_and_vol, norm_prices))
# Necessary for MAs
part_features = std_scaler.fit_transform(features)
# Aggiunta SMA
#SMA_20 = stock_data["Close"].rolling(20).mean().to_numpy()
#SMA_50 = stock_data["Close"].rolling(50).mean().to_numpy()
#SMA_200 = stock_data["Close"].rolling(200).mean().to_numpy()
#SMAs = np.vstack((SMA_20, SMA_50)).T
#norm_SMAs = minmax_scaler.fit_transform(SMAs[49:, ].reshape(-1, 1)).reshape(-1, 2)
#norm_features = np.hstack((part_features[49:, ], norm_SMAs))
#SMAs = np.vstack((SMA_20, SMA_50, SMA_200)).T
#norm_SMAs = minmax_scaler.fit_transform(SMAs[199:, ].reshape(-1, 1)).reshape(-1, 3)
#norm_features = np.hstack((part_features[199:, ], norm_SMAs))
# Aggiunta EMA
EMA_20 = stock_data["Close"].ewm(span=20, adjust=False).mean()
EMA_50 = stock_data["Close"].ewm(span=50, adjust=False).mean()
EMAs = np.vstack((EMA_20, EMA_50)).T
norm_EMAs = minmax_scaler.fit_transform(EMAs.reshape(-1, 1)).reshape(-1, 2)
#EMA_200 = stock_data["Close"].ewm(span=200, adjust=False).mean()
#EMAs = np.vstack((EMA_20, EMA_50, EMA_200)).T
#norm_EMAs = minmax_scaler.fit_transform(EMAs.reshape(-1, 1)).reshape(-1, 3)
norm_features = np.hstack((part_features, norm_EMAs))
dfeat = {"Daily Returns" : norm_features[:,0],
"Volume" : norm_features[:,1],
"EMA20" : norm_features[:,2],
"EMA50" : norm_features[:,3]
}
corr = pd.DataFrame(dfeat).corr()
fig = plt.figure(1, (11, 10))
sns.heatmap(corr, vmin=-1, vmax=1, center=0, cmap="mako")
plt.tick_params(labelsize=14)
plt.savefig("plots/Correlation_EMAs.png", dpi=300)
# merge data into 2d numpy array
Y = np.zeros(features.shape[0] - 1)
for i in range(Y.size):
if daily_returns[i+1] >= 0:
Y[i] = 1
else:
Y[i] = 0
# per quando su usano ma fino a 200
#Y = Y[49:]
#Y = Y[199:]
print(norm_features.shape, Y.shape)
fig, ax = plt.subplots(figsize=(15,10))
#plot params
#plt.xlim([-12,12])
#plt.ylim([-0.5,16])
ax.minorticks_on()
ax.tick_params(labelsize=14)
ax.tick_params(labelbottom=True, labeltop=False, labelright=False, labelleft=True)
#xticks = np.arange(0, 1e4,10)
#yticks = np.arange(0,16.1,4)
ax.tick_params(direction='in',which='minor', length=5, bottom=True, top=True, left=True, right=True)
ax.tick_params(direction='in',which='major', length=10, bottom=True, top=True, left=True, right=True)
#plt.xticks(xticks)
#plt.yticks(yticks)
#plt.text(1,325, f'y={Decimal(coefs[3]):.4f}x$^3$+{Decimal(coefs[2]):.2f}x$^2$+{Decimal(coefs[1]):.2f}x+{Decimal(coefs[0]):.1f}',fontsize =13)
ax.set_xlim([0, 500])
#ax.set_ylim([-0.5, 0.5])
pd.plotting.autocorrelation_plot(daily_returns, ax=ax, color=seshadri[0], label="Daily Returns")
pd.plotting.autocorrelation_plot(np.abs(daily_returns), ax=ax, color=seshadri[1], label="Absolute Daily Returns")
pd.plotting.autocorrelation_plot(volume, ax=ax, color=seshadri[2], label="Volume")
ax.grid(False)
ax.set_xlabel(r'Lag', fontsize=14)
ax.set_ylabel(r'Autocorrelation',fontsize=14) # label the y axis
ax.legend(fontsize=14, loc="upper right", bbox_to_anchor=(0.99, 0.99)) # add the legend (will default to 'best' location)
plt.savefig("plots/Autocorrelation_returns_volume_abs.png", dpi=300)

5993
data/AAPL_data.csv 100644

File diff suppressed because it is too large Load Diff

BIN
data/AAPL_data.pkl 100644

Binary file not shown.

5993
data/IXIC_data.csv 100644

File diff suppressed because it is too large Load Diff

BIN
data/IXIC_data.pkl 100644

Binary file not shown.

5993
data/MSFT_data.csv 100644

File diff suppressed because it is too large Load Diff

BIN
data/MSFT_data.pkl 100644

Binary file not shown.

6036
data/daily_MSFT.csv 100644

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,228 @@
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import yfinance as yf
from datetime import datetime
import os, sys
from sklearn import preprocessing
#bodacious colors
colors=sns.color_palette("rocket", 8)
#Ram's colors, if desired
seshadri = ['#c3121e', '#0348a1', '#ffb01c', '#027608', '#0193b0', '#9c5300', '#949c01', '#7104b5']
# 0sangre, 1neptune, 2pumpkin, 3clover, 4denim, 5cocoa, 6cumin, 7berry
train_quota = 0.8
def enlarge_lag(to_enlarge, time_window=1):
# to_enlarge is the data already present, should be a numpy array
enlarged = []
for i in range(to_enlarge.shape[0] - time_window + 1):
new_element = []
for j in range(time_window):
new_element.extend(to_enlarge[i + time_window - 1 - j, :])
enlarged.append(new_element)
return np.array(enlarged)
def sigmoid(z):
return 1 / (1 + np.exp(-z))
def logreg_inference(x, w, b):
z = (x @ w) + b
p = sigmoid(z)
return p
def cross_entropy(P, Y):
return (-Y * np.log(P) - (1 - Y) * np.log(1 - P)).mean()
def logreg_train(X, Y, lambda_, lr = 1e-4, steps=100000):
# The training samples are defined as such (each row of X is a sample):
# X[0, :] -> Y[0]
# X[1, :] -> Y[1]
m, n = X.shape
# Initial values for the parameters
w = np.zeros(n)
b = 0
# Initial values for the "precedent loss" and "convergence" variables, used to check convergence
prec_loss = 0
convergence = 0
for step in range(steps):
P = logreg_inference(X, w, b)
loss = cross_entropy(P, Y)
if step % 1000 == 0:
print(step, loss)
# Difference between "precedent loss" and "current loss"
diff = np.absolute(prec_loss - loss)
prec_loss = loss
if diff < 0.00001:
# If convergence is reached, the algorithm is stopped
convergence = step
break
# Derivative of the loss function with respect to bias
grad_b = (P - Y).mean()
# Gradient of the loss function with respect to weights
grad_w = (X.T @ (P - Y)) / m
w -= lr * grad_w
b -= lr * grad_b
# Every 100 iteration the values of accuracy and loss are saved for plotting
if step%100 == 0:
Yhat = (P > 0.5)
acc_array.append((Y == Yhat).mean() * 100)
losses.append(loss)
# Print the iterations needed for convergence before returning
print("Convergence = ", convergence)
return w, b
if len(sys.argv) > 1:
time_window = int(sys.argv[1])
else:
time_window = 1
#time_window = 10
stock_data = pd.read_pickle("data/MSFT_data.pkl")
daily_returns = ((stock_data["Close"] - stock_data["Open"]) / stock_data["Open"]).to_numpy()
prices = stock_data[["Open", "High", "Low", "Close"]].to_numpy()
volume = stock_data["Volume"].to_numpy()
minmax_scaler = preprocessing.MinMaxScaler()
std_scaler = preprocessing.StandardScaler()
features = np.vstack((daily_returns, volume)).T
# Scale volume data to obtain better results
#minmax_scaler = preprocessing.MinMaxScaler()
#norm_ret = std_scaler.fit_transform(daily_returns.reshape(-1,1)).flatten()
#norm_vol = minmax_scaler.fit_transform(volume.reshape(-1,1)).flatten()
#norm_features = np.vstack((norm_ret, norm_vol)).T
# Solo volumi e ritorni
#norm_features = std_scaler.fit_transform(features)
# Aggiunta di prezzi
#norm_prices = minmax_scaler.fit_transform(prices.reshape(-1, 1)).reshape(-1, 4)
#norm_ret_and_vol = std_scaler.fit_transform(features)
#norm_features = np.hstack((norm_ret_and_vol, norm_prices))
# Necessary for MAs
part_features = std_scaler.fit_transform(features)
# Aggiunta SMA
#SMA_20 = stock_data["Close"].rolling(20).mean().to_numpy()
#SMA_50 = stock_data["Close"].rolling(50).mean().to_numpy()
#SMA_200 = stock_data["Close"].rolling(200).mean().to_numpy()
#SMAs = np.vstack((SMA_20, SMA_50)).T
#norm_SMAs = minmax_scaler.fit_transform(SMAs[49:, ].reshape(-1, 1)).reshape(-1, 2)
#norm_features = np.hstack((part_features[49:, ], norm_SMAs))
#SMAs = np.vstack((SMA_20, SMA_50, SMA_200)).T
#norm_SMAs = minmax_scaler.fit_transform(SMAs[199:, ].reshape(-1, 1)).reshape(-1, 3)
#norm_features = np.hstack((part_features[199:, ], norm_SMAs))
# Aggiunta EMA
EMA_20 = stock_data["Close"].ewm(span=20, adjust=False).mean()
EMA_50 = stock_data["Close"].ewm(span=50, adjust=False).mean()
EMAs = np.vstack((EMA_20, EMA_50)).T
norm_EMAs = minmax_scaler.fit_transform(EMAs.reshape(-1, 1)).reshape(-1, 2)
#EMA_200 = stock_data["Close"].ewm(span=200, adjust=False).mean()
#EMAs = np.vstack((EMA_20, EMA_50, EMA_200)).T
#norm_EMAs = minmax_scaler.fit_transform(EMAs.reshape(-1, 1)).reshape(-1, 3)
norm_features = np.hstack((part_features, norm_EMAs))
# merge data into 2d numpy array
Y = np.zeros(features.shape[0] - 1)
for i in range(Y.size):
if daily_returns[i+1] >= 0:
Y[i] = 1
else:
Y[i] = 0
# per quando su usano ma fino a 200
#Y = Y[49:]
#Y = Y[199:]
print(norm_features.shape, Y.shape)
if time_window > 1:
norm_features = enlarge_lag(norm_features, time_window)
Y = Y[time_window-1:]
train_size = int(norm_features.shape[0] * 0.8)
X_train = norm_features[:train_size, ]
Y_train = Y[:train_size]
X_test = norm_features[train_size:-1, ]
Y_test = Y[train_size:]
#if time_window > 1:
# X_train = enlarge_lag(X_train)
# Y_train = Y_train[time_window-1:]
#
# X_test = enlarge_lag(X_test)
# Y_test = Y_test[time_window-1:]
# Lists to save accuracy and loss
acc_array = []
losses = []
w, b = logreg_train(X_train, Y_train, 0.0, 1e-3, 1000000)
print("Weights: ", w)
print("Bias: ", b)
# Iterations vs Accuracy plot
#plt.figure()
#plt.plot(np.arange(0, len(acc_array)) * 100, acc_array)
#plt.xlabel("Iterations")
#plt.ylabel("Accuracy")
#
## Iterations vs Loss plot
#plt.figure()
#plt.plot(np.arange(0, len(acc_array)) * 100, losses)
#plt.xlabel("Iterations")
#plt.ylabel("Losses")
#
#plt.show()
# Training accuracy of the model, is the last value recorded in the array
print("Training Acc: ", acc_array[-1])
P_test = logreg_inference(X_test, w, b)
Yhat_test = (P_test > 0.5)
accuracy_test = (Y_test == Yhat_test).mean()
print("Test accuracy: ", 100*accuracy_test)
#lets try sklearn
#from sklearn.linear_model import LogisticRegression
#classifier = LogisticRegression(random_state=0, solver="saga").fit(X_train, Y_train)
#score = classifier.score(X_test, Y_test)
#print("sklearn score, all default: ", score)
with open("plots/data/logistic_regression_EMA_20_50.csv", "a") as f:
f.write(f"{time_window};{acc_array[-1]};{accuracy_test};\n")

View File

@ -0,0 +1,203 @@
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import yfinance as yf
from datetime import datetime
import os, sys
from sklearn import preprocessing
#bodacious colors
colors=sns.color_palette("rocket", 8)
#Ram's colors, if desired
seshadri = ['#c3121e', '#0348a1', '#ffb01c', '#027608', '#0193b0', '#9c5300', '#949c01', '#7104b5']
# 0sangre, 1neptune, 2pumpkin, 3clover, 4denim, 5cocoa, 6cumin, 7berry
train_quota = 0.8
def enlarge_lag(to_enlarge, time_window=1):
# to_enlarge is the data already present, should be a numpy array
enlarged = []
for i in range(to_enlarge.shape[0] - time_window + 1):
new_element = []
for j in range(time_window):
new_element.extend(to_enlarge[i + time_window - 1 - j, :])
enlarged.append(new_element)
return np.array(enlarged)
def sigmoid(z):
return 1 / (1 + np.exp(-z))
def logreg_inference(x, w, b):
z = (x @ w) + b
p = sigmoid(z)
return p
def cross_entropy(P, Y):
return (-Y * np.log(P) - (1 - Y) * np.log(1 - P)).mean()
def logreg_train(X, Y, lambda_, lr = 1e-4, steps=100000):
# The training samples are defined as such (each row of X is a sample):
# X[0, :] -> Y[0]
# X[1, :] -> Y[1]
m, n = X.shape
# Initial values for the parameters
w = np.zeros(n)
b = 0
# Initial values for the "precedent loss" and "convergence" variables, used to check convergence
prec_loss = 0
convergence = 0
for step in range(steps):
P = logreg_inference(X, w, b)
loss = cross_entropy(P, Y)
if step % 1000 == 0:
print(step, loss)
# Difference between "precedent loss" and "current loss"
diff = np.absolute(prec_loss - loss)
prec_loss = loss
if diff < 0.00001:
# If convergence is reached, the algorithm is stopped
convergence = step
break
# Derivative of the loss function with respect to bias
grad_b = (P - Y).mean()
# Gradient of the loss function with respect to weights
grad_w = (X.T @ (P - Y)) / m
w -= lr * grad_w
b -= lr * grad_b
# Every 100 iteration the values of accuracy and loss are saved for plotting
if step%100 == 0:
Yhat = (P > 0.5)
acc_array.append((Y == Yhat).mean() * 100)
losses.append(loss)
# Print the iterations needed for convergence before returning
print("Convergence = ", convergence)
return w, b
if len(sys.argv) > 1:
time_window = int(sys.argv[1])
else:
time_window = 1
#time_window = 10
stock_data = pd.read_pickle("data/MSFT_data.pkl")
daily_returns = ((stock_data["Close"] - stock_data["Open"]) / stock_data["Open"]).to_numpy()
prices = stock_data[["Open", "High", "Low", "Close"]].to_numpy()
volume = stock_data["Volume"].to_numpy()
minmax_scaler = preprocessing.MinMaxScaler()
std_scaler = preprocessing.StandardScaler()
features = np.vstack((daily_returns, volume)).T
# Necessary for MAs
part_features = std_scaler.fit_transform(features)
# merge data into 2d numpy array
Y = np.zeros(features.shape[0] - 1)
for i in range(Y.size):
if daily_returns[i+1] >= 0:
Y[i] = 1
else:
Y[i] = 0
import copy
if time_window > 1:
large_rets = enlarge_lag(part_features[:, 0].reshape(-1, 1), time_window)
Y = Y[time_window-1:]
else:
large_rets = copy.deepcopy(part_features[:, 0].reshape(-1, 1))
part_features = np.hstack((large_rets, part_features[time_window-1:, 1].reshape(-1, 1)))
# Aggiunta EMA
EMA_20 = stock_data["Close"].ewm(span=20, adjust=False).mean()
EMA_50 = stock_data["Close"].ewm(span=50, adjust=False).mean()
EMAs = np.vstack((EMA_20, EMA_50)).T
norm_EMAs = minmax_scaler.fit_transform(EMAs.reshape(-1, 1)).reshape(-1, 2)
norm_features = np.hstack((part_features, norm_EMAs[time_window-1:,]))
print(norm_features.shape, Y.shape)
train_size = int(norm_features.shape[0] * 0.8)
X_train = norm_features[:train_size, ]
Y_train = Y[:train_size]
X_test = norm_features[train_size:-1, ]
Y_test = Y[train_size:]
#if time_window > 1:
# X_train = enlarge_lag(X_train)
# Y_train = Y_train[time_window-1:]
#
# X_test = enlarge_lag(X_test)
# Y_test = Y_test[time_window-1:]
# Lists to save accuracy and loss
acc_array = []
losses = []
w, b = logreg_train(X_train, Y_train, 0.0, 1e-3, 1000000)
print("Weights: ", w)
print("Bias: ", b)
# Iterations vs Accuracy plot
#plt.figure()
#plt.plot(np.arange(0, len(acc_array)) * 100, acc_array)
#plt.xlabel("Iterations")
#plt.ylabel("Accuracy")
#
## Iterations vs Loss plot
#plt.figure()
#plt.plot(np.arange(0, len(acc_array)) * 100, losses)
#plt.xlabel("Iterations")
#plt.ylabel("Losses")
#
#plt.show()
# Training accuracy of the model, is the last value recorded in the array
print("Training Acc: ", acc_array[-1])
P_test = logreg_inference(X_test, w, b)
Yhat_test = (P_test > 0.5)
accuracy_test = (Y_test == Yhat_test).mean()
print("Test accuracy: ", 100*accuracy_test)
#lets try sklearn
#from sklearn.linear_model import LogisticRegression
#classifier = LogisticRegression(random_state=0, solver="saga").fit(X_train, Y_train)
#score = classifier.score(X_test, Y_test)
#print("sklearn score, all default: ", score)
with open("plots/data/logistic_regression_EMA_20_50_only_daily_enlarged.csv", "a") as f:
f.write(f"{time_window};{acc_array[-1]};{accuracy_test};\n")

View File

@ -0,0 +1,170 @@
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import yfinance as yf
from datetime import datetime
import os, sys
from sklearn import preprocessing
#bodacious colors
colors=sns.color_palette("rocket", 8)
#Ram's colors, if desired
seshadri = ['#c3121e', '#0348a1', '#ffb01c', '#027608', '#0193b0', '#9c5300', '#949c01', '#7104b5']
# 0sangre, 1neptune, 2pumpkin, 3clover, 4denim, 5cocoa, 6cumin, 7berry
train_quota = 0.8
def enlarge_lag(to_enlarge, time_window=1):
# to_enlarge is the data already present, should be a numpy array
enlarged = []
for i in range(to_enlarge.shape[0] - time_window + 1):
new_element = []
for j in range(time_window):
new_element.extend(to_enlarge[i + time_window - 1 - j, :])
enlarged.append(new_element)
return np.array(enlarged)
def sigmoid(z):
return 1 / (1 + np.exp(-z))
def logreg_inference(x, w, b):
z = (x @ w) + b
p = sigmoid(z)
return p
def cross_entropy(P, Y):
return (-Y * np.log(P) - (1 - Y) * np.log(1 - P)).mean()
def logreg_train(X, Y, lambda_, lr = 1e-4, steps=100000):
# The training samples are defined as such (each row of X is a sample):
# X[0, :] -> Y[0]
# X[1, :] -> Y[1]
m, n = X.shape
# Initial values for the parameters
w = np.zeros(n)
b = 0
# Initial values for the "precedent loss" and "convergence" variables, used to check convergence
prec_loss = 0
convergence = 0
for step in range(steps):
P = logreg_inference(X, w, b)
loss = cross_entropy(P, Y)
if step % 1000 == 0:
print(step, loss)
# Difference between "precedent loss" and "current loss"
diff = np.absolute(prec_loss - loss)
prec_loss = loss
if diff < 0.00001:
# If convergence is reached, the algorithm is stopped
convergence = step
break
# Derivative of the loss function with respect to bias
grad_b = (P - Y).mean()
# Gradient of the loss function with respect to weights
grad_w = (X.T @ (P - Y)) / m
w -= lr * grad_w
b -= lr * grad_b
# Every 100 iteration the values of accuracy and loss are saved for plotting
if step%100 == 0:
Yhat = (P > 0.5)
acc_array.append((Y == Yhat).mean() * 100)
losses.append(loss)
# Print the iterations needed for convergence before returning
print("Convergence = ", convergence)
return w, b
if len(sys.argv) > 1:
time_window = int(sys.argv[1])
else:
time_window = 1
#time_window = 10
stock_data = pd.read_pickle("data/MSFT_data.pkl")
daily_returns = ((stock_data["Close"] - stock_data["Open"]) / stock_data["Open"]).to_numpy().reshape(-1,1)
# merge data into 2d numpy array
Y = np.zeros(daily_returns.shape[0] - 1)
print(daily_returns.shape, Y.shape)
for i in range(Y.size):
if daily_returns[i+1] >= 0:
Y[i] = 1
else:
Y[i] = 0
import copy
norm_features = copy.deepcopy(daily_returns)
if time_window > 1:
norm_features = enlarge_lag(norm_features, time_window)
Y = Y[time_window-1:]
train_size = int(norm_features.shape[0] * 0.8)
X_train = norm_features[:train_size, ]
Y_train = Y[:train_size]
X_test = norm_features[train_size:-1, ]
Y_test = Y[train_size:]
# Lists to save accuracy and loss
acc_array = []
losses = []
w, b = logreg_train(X_train, Y_train, 0.0, 1e-3, 1000000)
print("Weights: ", w)
print("Bias: ", b)
# Iterations vs Accuracy plot
#plt.figure()
#plt.plot(np.arange(0, len(acc_array)) * 100, acc_array)
#plt.xlabel("Iterations")
#plt.ylabel("Accuracy")
#
## Iterations vs Loss plot
#plt.figure()
#plt.plot(np.arange(0, len(acc_array)) * 100, losses)
#plt.xlabel("Iterations")
#plt.ylabel("Losses")
#
#plt.show()
# Training accuracy of the model, is the last value recorded in the array
print("Training Acc: ", acc_array[-1])
P_test = logreg_inference(X_test, w, b)
Yhat_test = (P_test > 0.5)
accuracy_test = (Y_test == Yhat_test).mean()
print("Test accuracy: ", 100*accuracy_test)
#lets try sklearn
#from sklearn.linear_model import LogisticRegression
#classifier = LogisticRegression(random_state=0, solver="saga").fit(X_train, Y_train)
#score = classifier.score(X_test, Y_test)
#print("sklearn score, all default: ", score)
with open("plots/data/logistic_regression_only_rets.csv", "a") as f:
f.write(f"{time_window};{acc_array[-1]};{accuracy_test};\n")

View File

@ -0,0 +1,8 @@
#!/bin/bash
for i in $(seq 1 50);
do
echo "Running with time window $i"
python3 logistic_regression_enlarge_only_rets.py $i
done

View File

@ -0,0 +1,8 @@
#!/bin/bash
for i in $(seq 1 50);
do
echo "Running with time window $i"
python3 MultiLayer_Perceptron.py $i
done

Binary file not shown.

After

Width:  |  Height:  |  Size: 355 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 510 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 120 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 358 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 366 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 408 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 438 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 452 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 452 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 504 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 462 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 316 KiB

BIN
plots/MLP_50_20.png 100644

Binary file not shown.

After

Width:  |  Height:  |  Size: 276 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 142 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 130 KiB

View File

@ -0,0 +1,51 @@
time_window;training_accuracy;testing_accuracy;
1;0.5157521385353641;0.5325542570951586;
2;0.5070951585976627;0.5317195325542571;
3;0.6231218697829716;0.48955722639933164;
4;0.6103109997912753;0.4653299916457811;
5;0.6471816283924844;0.4903926482873851;
6;0.6604719148047609;0.4928989139515455;
7;0.6641604010025063;0.5137844611528822;
8;0.7013366750208856;0.520066889632107;
9;0.6897848339252142;0.48327759197324416;
10;0.6136648558295027;0.46321070234113715;
11;0.6587251828631139;0.4866220735785953;
12;0.7215719063545151;0.5259197324414716;
13;0.6684782608695652;0.4811715481171548;
14;0.748693288730922;0.5188284518828452;
15;0.740694270179841;0.5154811715481171;
16;0.6801924283622673;0.47447698744769873;
17;0.7684100418410041;0.5238493723849372;
18;0.7566945606694561;0.5008375209380235;
19;0.7928436911487758;0.5309882747068677;
20;0.8139388865634156;0.49413735343383586;
21;0.6206824366757379;0.5301507537688442;
22;0.7889447236180904;0.507537688442211;
23;0.6379815745393634;0.4660519698239732;
24;0.6751832460732984;0.5071248952221291;
25;0.6258902387934646;0.46437552388935455;
26;0.7301487534045673;0.49706621961441744;
27;0.6787510477787091;0.46689019279128247;
28;0.8658843252305113;0.47651006711409394;
29;0.7048836721861245;0.535234899328859;
30;0.8633123689727463;0.5075503355704698;
31;0.8228140071293772;0.5067114093959731;
32;0.8181627516778524;0.5058724832214765;
33;0.6447147651006712;0.5104953820319059;
34;0.8833647996643591;0.4802686817800168;
35;0.8063365505665128;0.5071368597816961;
36;0.8818467995802728;0.5146935348446684;
37;0.6358102434928632;0.5331654072208228;
38;0.8717464315701091;0.5117647058823529;
39;0.9048918748687802;0.47394957983193275;
40;0.6698866022679546;0.5319327731092437;
41;0.6784289014912833;0.4689075630252101;
42;0.7008403361344537;0.5210084033613446;
43;0.8613445378151261;0.49705634987384356;
44;0.7066610632485817;0.5021026072329688;
45;0.6353509878100042;0.5365853658536586;
46;0.6771074206432626;0.5256518082422204;
47;0.5971404541631623;0.47434819175777965;
48;0.8092935239697224;0.4983164983164983;
49;0.9335436382754995;0.4983164983164983;
50;0.9091291543962978;0.5244107744107744;
1 time_window training_accuracy testing_accuracy
2 1 0.5157521385353641 0.5325542570951586
3 2 0.5070951585976627 0.5317195325542571
4 3 0.6231218697829716 0.48955722639933164
5 4 0.6103109997912753 0.4653299916457811
6 5 0.6471816283924844 0.4903926482873851
7 6 0.6604719148047609 0.4928989139515455
8 7 0.6641604010025063 0.5137844611528822
9 8 0.7013366750208856 0.520066889632107
10 9 0.6897848339252142 0.48327759197324416
11 10 0.6136648558295027 0.46321070234113715
12 11 0.6587251828631139 0.4866220735785953
13 12 0.7215719063545151 0.5259197324414716
14 13 0.6684782608695652 0.4811715481171548
15 14 0.748693288730922 0.5188284518828452
16 15 0.740694270179841 0.5154811715481171
17 16 0.6801924283622673 0.47447698744769873
18 17 0.7684100418410041 0.5238493723849372
19 18 0.7566945606694561 0.5008375209380235
20 19 0.7928436911487758 0.5309882747068677
21 20 0.8139388865634156 0.49413735343383586
22 21 0.6206824366757379 0.5301507537688442
23 22 0.7889447236180904 0.507537688442211
24 23 0.6379815745393634 0.4660519698239732
25 24 0.6751832460732984 0.5071248952221291
26 25 0.6258902387934646 0.46437552388935455
27 26 0.7301487534045673 0.49706621961441744
28 27 0.6787510477787091 0.46689019279128247
29 28 0.8658843252305113 0.47651006711409394
30 29 0.7048836721861245 0.535234899328859
31 30 0.8633123689727463 0.5075503355704698
32 31 0.8228140071293772 0.5067114093959731
33 32 0.8181627516778524 0.5058724832214765
34 33 0.6447147651006712 0.5104953820319059
35 34 0.8833647996643591 0.4802686817800168
36 35 0.8063365505665128 0.5071368597816961
37 36 0.8818467995802728 0.5146935348446684
38 37 0.6358102434928632 0.5331654072208228
39 38 0.8717464315701091 0.5117647058823529
40 39 0.9048918748687802 0.47394957983193275
41 40 0.6698866022679546 0.5319327731092437
42 41 0.6784289014912833 0.4689075630252101
43 42 0.7008403361344537 0.5210084033613446
44 43 0.8613445378151261 0.49705634987384356
45 44 0.7066610632485817 0.5021026072329688
46 45 0.6353509878100042 0.5365853658536586
47 46 0.6771074206432626 0.5256518082422204
48 47 0.5971404541631623 0.47434819175777965
49 48 0.8092935239697224 0.4983164983164983
50 49 0.9335436382754995 0.4983164983164983
51 50 0.9091291543962978 0.5244107744107744

View File

@ -0,0 +1,51 @@
time_window;training_accuracy;testing_accuracy;
1;0.547673690799082;0.5217028380634391;
2;0.6400250417362271;0.508347245409015;
3;0.715567612687813;0.5472013366750209;
4;0.7424337299102484;0.4803675856307435;
5;0.7749478079331942;0.48370927318295737;
6;0.8275214032157027;0.4954051796157059;
7;0.8105680868838764;0.4928989139515455;
8;0.8335421888053467;0.504180602006689;
9;0.8272404428660957;0.49414715719063546;
10;0.8763058921855411;0.4891304347826087;
11;0.864158829676071;0.5150501672240803;
12;0.8858695652173914;0.5066889632107023;
13;0.9015468227424749;0.5213389121338912;
14;0.8873092201547146;0.5129707112970712;
15;0.90987034713509;0.5288702928870292;
16;0.9115247856097051;0.5171548117154812;
17;0.9156903765690376;0.47280334728033474;
18;0.9217573221757323;0.5117252931323283;
19;0.9378531073446328;0.48576214405360135;
20;0.9158643784010047;0.474036850921273;
21;0.9522712999790663;0.4949748743718593;
22;0.9711055276381909;0.5293132328308208;
23;0.9384422110552764;0.5037720033528919;
24;0.9759162303664921;0.5155071248952221;
25;0.9733975701717638;0.5146689019279128;
26;0.9664781060129898;0.48365465213746855;
27;0.972338642078793;0.5037720033528919;
28;0.9867979882648784;0.4714765100671141;
29;0.9811360301823517;0.4790268456375839;
30;0.9656184486373166;0.5201342281879194;
31;0.9746278045711889;0.4983221476510067;
32;0.9351929530201343;0.5192953020134228;
33;0.9729446308724832;0.4903442485306465;
34;0.9815397524648626;0.48446683459277917;
35;0.9326479227864037;0.5088161209068011;
36;0.985099685204617;0.4945424013434089;
37;0.9685138539042821;0.5155331654072208;
38;0.9901343408900084;0.4613445378151261;
39;0.9494016376233466;0.5042016806722689;
40;0.9647207055858883;0.5100840336134453;
41;0.9798361688720857;0.5;
42;0.992436974789916;0.5058823529411764;
43;0.9897058823529412;0.49032800672834315;
44;0.9815087203193948;0.5172413793103449;
45;0.9836065573770492;0.5029436501261564;
46;0.9882278747109523;0.496215306980656;
47;0.9960050462573591;0.5273338940285954;
48;1.0;0.5025252525252525;
49;0.9707676130389065;0.49326599326599324;
50;0.9728649558266723;0.4941077441077441;
1 time_window training_accuracy testing_accuracy
2 1 0.547673690799082 0.5217028380634391
3 2 0.6400250417362271 0.508347245409015
4 3 0.715567612687813 0.5472013366750209
5 4 0.7424337299102484 0.4803675856307435
6 5 0.7749478079331942 0.48370927318295737
7 6 0.8275214032157027 0.4954051796157059
8 7 0.8105680868838764 0.4928989139515455
9 8 0.8335421888053467 0.504180602006689
10 9 0.8272404428660957 0.49414715719063546
11 10 0.8763058921855411 0.4891304347826087
12 11 0.864158829676071 0.5150501672240803
13 12 0.8858695652173914 0.5066889632107023
14 13 0.9015468227424749 0.5213389121338912
15 14 0.8873092201547146 0.5129707112970712
16 15 0.90987034713509 0.5288702928870292
17 16 0.9115247856097051 0.5171548117154812
18 17 0.9156903765690376 0.47280334728033474
19 18 0.9217573221757323 0.5117252931323283
20 19 0.9378531073446328 0.48576214405360135
21 20 0.9158643784010047 0.474036850921273
22 21 0.9522712999790663 0.4949748743718593
23 22 0.9711055276381909 0.5293132328308208
24 23 0.9384422110552764 0.5037720033528919
25 24 0.9759162303664921 0.5155071248952221
26 25 0.9733975701717638 0.5146689019279128
27 26 0.9664781060129898 0.48365465213746855
28 27 0.972338642078793 0.5037720033528919
29 28 0.9867979882648784 0.4714765100671141
30 29 0.9811360301823517 0.4790268456375839
31 30 0.9656184486373166 0.5201342281879194
32 31 0.9746278045711889 0.4983221476510067
33 32 0.9351929530201343 0.5192953020134228
34 33 0.9729446308724832 0.4903442485306465
35 34 0.9815397524648626 0.48446683459277917
36 35 0.9326479227864037 0.5088161209068011
37 36 0.985099685204617 0.4945424013434089
38 37 0.9685138539042821 0.5155331654072208
39 38 0.9901343408900084 0.4613445378151261
40 39 0.9494016376233466 0.5042016806722689
41 40 0.9647207055858883 0.5100840336134453
42 41 0.9798361688720857 0.5
43 42 0.992436974789916 0.5058823529411764
44 43 0.9897058823529412 0.49032800672834315
45 44 0.9815087203193948 0.5172413793103449
46 45 0.9836065573770492 0.5029436501261564
47 46 0.9882278747109523 0.496215306980656
48 47 0.9960050462573591 0.5273338940285954
49 48 1.0 0.5025252525252525
50 49 0.9707676130389065 0.49326599326599324
51 50 0.9728649558266723 0.4941077441077441

View File

@ -0,0 +1,51 @@
time_window;training_accuracy;testing_accuracy;
1;0.5366158981848529;0.5217028380634391;
2;0.5899415692821369;0.5350584307178631;
3;0.6750834724540902;0.5087719298245614;
4;0.7261532039240242;0.48120300751879697;
5;0.7701461377870563;0.49958228905597324;
6;0.7584046773856755;0.5087719298245614;
7;0.7644110275689223;0.5430242272347535;
8;0.8398078529657477;0.49414715719063546;
9;0.8748694380614164;0.5016722408026756;
10;0.9352277475971584;0.47240802675585286;
11;0.9264367816091954;0.4807692307692308;
12;0.9423076923076923;0.520066889632107;
13;0.9412625418060201;0.4794979079497908;
14;0.9272423165377378;0.507949790794979;
15;0.958594730238394;0.5196652719665272;
16;0.9811754862999372;0.497071129707113;
17;0.992887029288703;0.5263598326359833;
18;0.9947698744769874;0.509212730318258;
19;0.9922577945176815;0.49581239530988275;
20;0.9945583926329008;0.525963149078727;
21;0.9920452166631777;0.509212730318258;
22;0.9972780569514238;0.5117252931323283;
23;0.9886934673366834;0.49958088851634536;
24;0.9912041884816754;0.5046102263202011;
25;0.9981147884373691;0.509639564124057;
26;0.9974858579509742;0.5004191114836547;
27;0.9945515507124896;0.5297569153394803;
28;1.0;0.5033557046979866;
29;0.9997904003353595;0.5260067114093959;
30;0.99958071278826;0.5243288590604027;
31;1.0;0.47818791946308725;
32;1.0;0.5151006711409396;
33;0.9991610738255033;0.5264483627204031;
34;1.0;0.4979009235936188;
35;0.9987410826689047;0.5029387069689337;
36;1.0;0.5188916876574308;
37;1.0;0.4869857262804366;
38;1.0;0.49411764705882355;
39;1.0;0.5117647058823529;
40;1.0;0.5243697478991597;
41;1.0;0.5184873949579832;
42;0.9997899159663866;0.49747899159663866;
43;1.0;0.5063078216989066;
44;1.0;0.48107653490328006;
45;1.0;0.4920100925147183;
46;1.0;0.5088309503784693;
47;0.9997897392767031;0.4953742640874685;
48;1.0;0.515993265993266;
49;1.0;0.4772727272727273;
50;1.0;0.5067340067340067;
1 time_window training_accuracy testing_accuracy
2 1 0.5366158981848529 0.5217028380634391
3 2 0.5899415692821369 0.5350584307178631
4 3 0.6750834724540902 0.5087719298245614
5 4 0.7261532039240242 0.48120300751879697
6 5 0.7701461377870563 0.49958228905597324
7 6 0.7584046773856755 0.5087719298245614
8 7 0.7644110275689223 0.5430242272347535
9 8 0.8398078529657477 0.49414715719063546
10 9 0.8748694380614164 0.5016722408026756
11 10 0.9352277475971584 0.47240802675585286
12 11 0.9264367816091954 0.4807692307692308
13 12 0.9423076923076923 0.520066889632107
14 13 0.9412625418060201 0.4794979079497908
15 14 0.9272423165377378 0.507949790794979
16 15 0.958594730238394 0.5196652719665272
17 16 0.9811754862999372 0.497071129707113
18 17 0.992887029288703 0.5263598326359833
19 18 0.9947698744769874 0.509212730318258
20 19 0.9922577945176815 0.49581239530988275
21 20 0.9945583926329008 0.525963149078727
22 21 0.9920452166631777 0.509212730318258
23 22 0.9972780569514238 0.5117252931323283
24 23 0.9886934673366834 0.49958088851634536
25 24 0.9912041884816754 0.5046102263202011
26 25 0.9981147884373691 0.509639564124057
27 26 0.9974858579509742 0.5004191114836547
28 27 0.9945515507124896 0.5297569153394803
29 28 1.0 0.5033557046979866
30 29 0.9997904003353595 0.5260067114093959
31 30 0.99958071278826 0.5243288590604027
32 31 1.0 0.47818791946308725
33 32 1.0 0.5151006711409396
34 33 0.9991610738255033 0.5264483627204031
35 34 1.0 0.4979009235936188
36 35 0.9987410826689047 0.5029387069689337
37 36 1.0 0.5188916876574308
38 37 1.0 0.4869857262804366
39 38 1.0 0.49411764705882355
40 39 1.0 0.5117647058823529
41 40 1.0 0.5243697478991597
42 41 1.0 0.5184873949579832
43 42 0.9997899159663866 0.49747899159663866
44 43 1.0 0.5063078216989066
45 44 1.0 0.48107653490328006
46 45 1.0 0.4920100925147183
47 46 1.0 0.5088309503784693
48 47 0.9997897392767031 0.4953742640874685
49 48 1.0 0.515993265993266
50 49 1.0 0.4772727272727273
51 50 1.0 0.5067340067340067

View File

@ -0,0 +1,51 @@
time_window;training_accuracy;testing_accuracy;
1;52.075944085124135;0.5367278797996661;
2;51.81552587646077;0.5392320534223706;
3;51.982470784641066;0.5380116959064327;
4;52.118555625130455;0.545530492898914;
5;52.4008350730689;0.5396825396825397;
6;52.41177698893297;0.5338345864661654;
7;52.903091060985794;0.5405179615705932;
8;52.75689223057645;0.5392976588628763;
9;53.45728013369543;0.540133779264214;
10;53.5311324697033;0.5409698996655519;
11;53.45872518286311;0.5384615384615384;
12;54.117892976588635;0.5351170568561873;
13;54.0133779264214;0.5364016736401673;
14;54.129207610286436;0.5338912133891214;
15;53.95232120451694;0.5364016736401673;
16;54.1518510771805;0.5338912133891214;
17;54.16317991631799;0.5372384937238494;
18;53.80753138075314;0.533500837520938;
19;53.65139150449885;0.5343383584589615;
20;54.227710339054;0.5284757118927973;
21;54.30186309399204;0.5309882747068677;
22;54.25041876046901;0.5293132328308208;
23;54.14572864321608;0.5305951383067896;
24;54.575916230366495;0.5322715842414082;
25;54.18935902806871;0.5305951383067896;
26;54.03310287031218;0.5314333612740989;
27;54.37971500419112;0.5255658005029338;
28;54.715004191114836;0.5285234899328859;
29;54.51687277300357;0.5310402684563759;
30;54.75890985324947;0.5302013422818792;
31;54.41392325435102;0.5293624161073825;
32;55.39010067114094;0.5209731543624161;
33;55.45302013422819;0.5256087321578505;
34;55.67442836165303;0.5222502099076406;
35;55.686109945446916;0.5155331654072208;
36;56.222455403987404;0.5205709487825357;
37;56.549118387909324;0.5163727959697733;
38;56.27623845507976;0.519327731092437;
39;56.0781020365316;0.5134453781512605;
40;56.425871482570344;0.5134453781512605;
41;56.2906952320941;0.5117647058823529;
42;56.596638655462186;0.5134453781512605;
43;56.84873949579832;0.5138772077375946;
44;56.81865938222316;0.5096719932716569;
45;56.36822194199244;0.5105130361648444;
46;56.54824469203279;0.511354079058032;
47;56.53910849453322;0.511354079058032;
48;56.64423885618166;0.5126262626262627;
49;56.88748685594112;0.5126262626262627;
50;56.710138830458554;0.5058922558922558;
1 time_window training_accuracy testing_accuracy
2 1 52.075944085124135 0.5367278797996661
3 2 51.81552587646077 0.5392320534223706
4 3 51.982470784641066 0.5380116959064327
5 4 52.118555625130455 0.545530492898914
6 5 52.4008350730689 0.5396825396825397
7 6 52.41177698893297 0.5338345864661654
8 7 52.903091060985794 0.5405179615705932
9 8 52.75689223057645 0.5392976588628763
10 9 53.45728013369543 0.540133779264214
11 10 53.5311324697033 0.5409698996655519
12 11 53.45872518286311 0.5384615384615384
13 12 54.117892976588635 0.5351170568561873
14 13 54.0133779264214 0.5364016736401673
15 14 54.129207610286436 0.5338912133891214
16 15 53.95232120451694 0.5364016736401673
17 16 54.1518510771805 0.5338912133891214
18 17 54.16317991631799 0.5372384937238494
19 18 53.80753138075314 0.533500837520938
20 19 53.65139150449885 0.5343383584589615
21 20 54.227710339054 0.5284757118927973
22 21 54.30186309399204 0.5309882747068677
23 22 54.25041876046901 0.5293132328308208
24 23 54.14572864321608 0.5305951383067896
25 24 54.575916230366495 0.5322715842414082
26 25 54.18935902806871 0.5305951383067896
27 26 54.03310287031218 0.5314333612740989
28 27 54.37971500419112 0.5255658005029338
29 28 54.715004191114836 0.5285234899328859
30 29 54.51687277300357 0.5310402684563759
31 30 54.75890985324947 0.5302013422818792
32 31 54.41392325435102 0.5293624161073825
33 32 55.39010067114094 0.5209731543624161
34 33 55.45302013422819 0.5256087321578505
35 34 55.67442836165303 0.5222502099076406
36 35 55.686109945446916 0.5155331654072208
37 36 56.222455403987404 0.5205709487825357
38 37 56.549118387909324 0.5163727959697733
39 38 56.27623845507976 0.519327731092437
40 39 56.0781020365316 0.5134453781512605
41 40 56.425871482570344 0.5134453781512605
42 41 56.2906952320941 0.5117647058823529
43 42 56.596638655462186 0.5134453781512605
44 43 56.84873949579832 0.5138772077375946
45 44 56.81865938222316 0.5096719932716569
46 45 56.36822194199244 0.5105130361648444
47 46 56.54824469203279 0.511354079058032
48 47 56.53910849453322 0.511354079058032
49 48 56.64423885618166 0.5126262626262627
50 49 56.88748685594112 0.5126262626262627
51 50 56.710138830458554 0.5058922558922558

View File

@ -0,0 +1,51 @@
time_window;training_accuracy;testing_accuracy;
1;51.99248904652618;0.5375626043405676;
2;51.77378964941569;0.5375626043405676;
3;51.982470784641066;0.5396825396825397;
4;51.99332080985181;0.5405179615705932;
5;52.35908141962422;0.5355054302422724;
6;52.59970766339528;0.5304928989139516;
7;52.86131996658312;0.5388471177944862;
8;52.673350041771094;0.5426421404682275;
9;53.39461040317527;0.5418060200668896;
10;53.51023819473464;0.540133779264214;
11;53.396029258098224;0.544314381270903;
12;54.03428093645485;0.5409698996655519;
13;53.992474916387955;0.5414225941422595;
14;54.10830022998119;0.5430962343096234;
15;54.01505646173149;0.5422594142259414;
16;54.17276720351391;0.5422594142259414;
17;54.16317991631799;0.5405857740585774;
18;53.661087866108794;0.5435510887772195;
19;53.7769407825905;0.5452261306532663;
20;54.39514441188782;0.541038525963149;
21;54.17626125183169;0.5385259631490787;
22;54.10385259631491;0.5402010050251256;
23;54.29229480737019;0.5406538139145013;
24;54.51308900523561;0.5389773679798826;
25;54.18935902806871;0.539815590947192;
26;53.86549340037712;0.5406538139145013;
27;54.48449287510477;0.539815590947192;
28;54.882648784576695;0.5461409395973155;
29;54.47495284007545;0.5394295302013423;
30;54.67505241090147;0.5461409395973155;
31;54.246173201929125;0.5411073825503355;
32;55.45302013422819;0.5444630872483222;
33;55.369127516778526;0.5373635600335852;
34;55.695405915670236;0.5197313182199832;
35;55.74905581200168;0.5214105793450882;
36;56.306400839454355;0.5289672544080605;
37;56.42317380352645;0.5214105793450882;
38;56.507136859781696;0.5176470588235295;
39;56.330044089859335;0.5176470588235295;
40;56.32087358252835;0.5260504201680672;
41;56.20667926906112;0.5235294117647059;
42;56.57563025210084;0.5184873949579832;
43;56.72268907563025;0.5180824222035324;
44;56.881697835679766;0.5147182506307821;
45;56.32618747372846;0.5088309503784693;
46;56.443136430523445;0.5088309503784693;
47;56.53910849453322;0.5046257359125316;
48;56.72834314550042;0.5075757575757576;
49;56.92954784437434;0.5084175084175084;
50;56.62599915860328;0.5092592592592593;
1 time_window training_accuracy testing_accuracy
2 1 51.99248904652618 0.5375626043405676
3 2 51.77378964941569 0.5375626043405676
4 3 51.982470784641066 0.5396825396825397
5 4 51.99332080985181 0.5405179615705932
6 5 52.35908141962422 0.5355054302422724
7 6 52.59970766339528 0.5304928989139516
8 7 52.86131996658312 0.5388471177944862
9 8 52.673350041771094 0.5426421404682275
10 9 53.39461040317527 0.5418060200668896
11 10 53.51023819473464 0.540133779264214
12 11 53.396029258098224 0.544314381270903
13 12 54.03428093645485 0.5409698996655519
14 13 53.992474916387955 0.5414225941422595
15 14 54.10830022998119 0.5430962343096234
16 15 54.01505646173149 0.5422594142259414
17 16 54.17276720351391 0.5422594142259414
18 17 54.16317991631799 0.5405857740585774
19 18 53.661087866108794 0.5435510887772195
20 19 53.7769407825905 0.5452261306532663
21 20 54.39514441188782 0.541038525963149
22 21 54.17626125183169 0.5385259631490787
23 22 54.10385259631491 0.5402010050251256
24 23 54.29229480737019 0.5406538139145013
25 24 54.51308900523561 0.5389773679798826
26 25 54.18935902806871 0.539815590947192
27 26 53.86549340037712 0.5406538139145013
28 27 54.48449287510477 0.539815590947192
29 28 54.882648784576695 0.5461409395973155
30 29 54.47495284007545 0.5394295302013423
31 30 54.67505241090147 0.5461409395973155
32 31 54.246173201929125 0.5411073825503355
33 32 55.45302013422819 0.5444630872483222
34 33 55.369127516778526 0.5373635600335852
35 34 55.695405915670236 0.5197313182199832
36 35 55.74905581200168 0.5214105793450882
37 36 56.306400839454355 0.5289672544080605
38 37 56.42317380352645 0.5214105793450882
39 38 56.507136859781696 0.5176470588235295
40 39 56.330044089859335 0.5176470588235295
41 40 56.32087358252835 0.5260504201680672
42 41 56.20667926906112 0.5235294117647059
43 42 56.57563025210084 0.5184873949579832
44 43 56.72268907563025 0.5180824222035324
45 44 56.881697835679766 0.5147182506307821
46 45 56.32618747372846 0.5088309503784693
47 46 56.443136430523445 0.5088309503784693
48 47 56.53910849453322 0.5046257359125316
49 48 56.72834314550042 0.5075757575757576
50 49 56.92954784437434 0.5084175084175084
51 50 56.62599915860328 0.5092592592592593

View File

@ -0,0 +1,51 @@
time_window;training_accuracy;testing_accuracy;
1;52.05508032547465;0.5392320534223706;
2;51.794657762938236;0.5383973288814691;
3;52.00333889816361;0.5396825396825397;
4;52.01419327906491;0.5421888053467001;
5;52.4008350730689;0.5371762740183793;
6;52.516182919189816;0.531328320802005;
7;52.88220551378446;0.5363408521303258;
8;52.694235588972425;0.5426421404682275;
9;53.45728013369543;0.5409698996655519;
10;53.5311324697033;0.540133779264214;
11;53.37513061650993;0.5418060200668896;
12;54.05518394648829;0.540133779264214;
13;53.90886287625418;0.5414225941422595;
14;54.10830022998119;0.5414225941422595;
15;53.97323295692179;0.5414225941422595;
16;54.089102698180305;0.5405857740585774;
17;54.121338912133886;0.5422594142259414;
18;53.74476987447699;0.5393634840871022;
19;53.75601590290856;0.542713567839196;
20;54.41607367099205;0.5393634840871022;
21;54.13439397111157;0.5385259631490787;
22;54.166666666666664;0.5393634840871022;
23;54.29229480737019;0.5381391450125733;
24;54.51308900523561;0.5381391450125733;
25;54.16841223292836;0.5406538139145013;
26;53.90739576786089;0.5414920368818106;
27;54.505448449287506;0.5389773679798826;
28;54.84073763621124;0.5444630872483222;
29;54.495912806539515;0.5369127516778524;
30;54.67505241090147;0.5444630872483222;
31;54.2881107150346;0.5394295302013423;
32;55.369127516778526;0.5419463087248322;
33;55.39010067114094;0.5340050377833753;
34;55.695405915670236;0.5197313182199832;
35;55.74905581200168;0.5214105793450882;
36;56.28541448058761;0.5264483627204031;
37;56.507136859781696;0.5222502099076406;
38;56.486146095717885;0.519327731092437;
39;56.30904891874869;0.5168067226890757;
40;56.34187316253675;0.5260504201680672;
41;56.18567527830288;0.5260504201680672;
42;56.53361344537815;0.5201680672268908;
43;56.785714285714285;0.5197645079899075;
44;56.839672200042024;0.5130361648444071;
45;56.32618747372846;0.5105130361648444;
46;56.485179735127176;0.511354079058032;
47;56.518082422203534;0.5063078216989066;
48;56.72834314550042;0.5084175084175084;
49;56.8664563617245;0.5134680134680135;
50;56.6470340765671;0.5058922558922558;
1 time_window training_accuracy testing_accuracy
2 1 52.05508032547465 0.5392320534223706
3 2 51.794657762938236 0.5383973288814691
4 3 52.00333889816361 0.5396825396825397
5 4 52.01419327906491 0.5421888053467001
6 5 52.4008350730689 0.5371762740183793
7 6 52.516182919189816 0.531328320802005
8 7 52.88220551378446 0.5363408521303258
9 8 52.694235588972425 0.5426421404682275
10 9 53.45728013369543 0.5409698996655519
11 10 53.5311324697033 0.540133779264214
12 11 53.37513061650993 0.5418060200668896
13 12 54.05518394648829 0.540133779264214
14 13 53.90886287625418 0.5414225941422595
15 14 54.10830022998119 0.5414225941422595
16 15 53.97323295692179 0.5414225941422595
17 16 54.089102698180305 0.5405857740585774
18 17 54.121338912133886 0.5422594142259414
19 18 53.74476987447699 0.5393634840871022
20 19 53.75601590290856 0.542713567839196
21 20 54.41607367099205 0.5393634840871022
22 21 54.13439397111157 0.5385259631490787
23 22 54.166666666666664 0.5393634840871022
24 23 54.29229480737019 0.5381391450125733
25 24 54.51308900523561 0.5381391450125733
26 25 54.16841223292836 0.5406538139145013
27 26 53.90739576786089 0.5414920368818106
28 27 54.505448449287506 0.5389773679798826
29 28 54.84073763621124 0.5444630872483222
30 29 54.495912806539515 0.5369127516778524
31 30 54.67505241090147 0.5444630872483222
32 31 54.2881107150346 0.5394295302013423
33 32 55.369127516778526 0.5419463087248322
34 33 55.39010067114094 0.5340050377833753
35 34 55.695405915670236 0.5197313182199832
36 35 55.74905581200168 0.5214105793450882
37 36 56.28541448058761 0.5264483627204031
38 37 56.507136859781696 0.5222502099076406
39 38 56.486146095717885 0.519327731092437
40 39 56.30904891874869 0.5168067226890757
41 40 56.34187316253675 0.5260504201680672
42 41 56.18567527830288 0.5260504201680672
43 42 56.53361344537815 0.5201680672268908
44 43 56.785714285714285 0.5197645079899075
45 44 56.839672200042024 0.5130361648444071
46 45 56.32618747372846 0.5105130361648444
47 46 56.485179735127176 0.511354079058032
48 47 56.518082422203534 0.5063078216989066
49 48 56.72834314550042 0.5084175084175084
50 49 56.8664563617245 0.5134680134680135
51 50 56.6470340765671 0.5058922558922558

View File

@ -0,0 +1,51 @@
time_window;training_accuracy;testing_accuracy;
1;52.05508032547465;0.5392320534223706;
2;52.10767946577629;0.5392320534223706;
3;52.27462437395659;0.5396825396825397;
4;52.43164266332707;0.5388471177944862;
5;52.546972860125265;0.5355054302422724;
6;52.28648987262476;0.5329991645781119;
7;52.02589807852965;0.5338345864661654;
8;52.54803675856308;0.5317725752508361;
9;51.932316691038224;0.5359531772575251;
10;52.08942749686586;0.5309364548494984;
11;52.18390804597701;0.5317725752508361;
12;52.65468227424749;0.5267558528428093;
13;52.612876254180605;0.5263598326359833;
14;52.64478360861384;0.5263598326359833;
15;52.676704307821;0.5263598326359833;
16;52.66680610750889;0.5263598326359833;
17;52.86610878661088;0.5238493723849372;
18;52.78242677824267;0.52428810720268;
19;52.395898723582334;0.5251256281407035;
20;52.51151109250733;0.525963149078727;
21;52.54343730374712;0.525963149078727;
22;52.68006700167505;0.5251256281407035;
23;52.701005025125625;0.5247275775356245;
24;52.33507853403141;0.5297569153394803;
25;52.785923753665685;0.5230511316010059;
26;52.44081290592919;0.5238893545683152;
27;52.724224643755235;0.5238893545683152;
28;52.47275775356245;0.5310402684563759;
29;52.79815552295116;0.5243288590604027;
30;52.64150943396226;0.5251677852348994;
31;52.48479765149927;0.5268456375838926;
32;52.57969798657718;0.5335570469798657;
33;53.73322147651006;0.5096557514693535;
34;53.63960562198448;0.5130142737195634;
35;53.86067981535879;0.5138539042821159;
36;53.5781741867786;0.5188916876574308;
37;53.778337531486144;0.5071368597816961;
38;53.56842989084802;0.5159663865546219;
39;53.68465252991812;0.5100840336134453;
40;53.77992440151197;0.5084033613445378;
41;53.68620037807184;0.5109243697478991;
42;53.739495798319325;0.5092436974789916;
43;53.508403361344534;0.5046257359125316;
44;54.12901870140786;0.5054667788057191;
45;53.90920554854981;0.5096719932716569;
46;54.172797981921384;0.5021026072329688;
47;54.14213624894869;0.5054667788057191;
48;53.973927670311184;0.51010101010101;
49;53.52260778128286;0.5143097643097643;
50;53.82835506941524;0.5109427609427609;
1 time_window training_accuracy testing_accuracy
2 1 52.05508032547465 0.5392320534223706
3 2 52.10767946577629 0.5392320534223706
4 3 52.27462437395659 0.5396825396825397
5 4 52.43164266332707 0.5388471177944862
6 5 52.546972860125265 0.5355054302422724
7 6 52.28648987262476 0.5329991645781119
8 7 52.02589807852965 0.5338345864661654
9 8 52.54803675856308 0.5317725752508361
10 9 51.932316691038224 0.5359531772575251
11 10 52.08942749686586 0.5309364548494984
12 11 52.18390804597701 0.5317725752508361
13 12 52.65468227424749 0.5267558528428093
14 13 52.612876254180605 0.5263598326359833
15 14 52.64478360861384 0.5263598326359833
16 15 52.676704307821 0.5263598326359833
17 16 52.66680610750889 0.5263598326359833
18 17 52.86610878661088 0.5238493723849372
19 18 52.78242677824267 0.52428810720268
20 19 52.395898723582334 0.5251256281407035
21 20 52.51151109250733 0.525963149078727
22 21 52.54343730374712 0.525963149078727
23 22 52.68006700167505 0.5251256281407035
24 23 52.701005025125625 0.5247275775356245
25 24 52.33507853403141 0.5297569153394803
26 25 52.785923753665685 0.5230511316010059
27 26 52.44081290592919 0.5238893545683152
28 27 52.724224643755235 0.5238893545683152
29 28 52.47275775356245 0.5310402684563759
30 29 52.79815552295116 0.5243288590604027
31 30 52.64150943396226 0.5251677852348994
32 31 52.48479765149927 0.5268456375838926
33 32 52.57969798657718 0.5335570469798657
34 33 53.73322147651006 0.5096557514693535
35 34 53.63960562198448 0.5130142737195634
36 35 53.86067981535879 0.5138539042821159
37 36 53.5781741867786 0.5188916876574308
38 37 53.778337531486144 0.5071368597816961
39 38 53.56842989084802 0.5159663865546219
40 39 53.68465252991812 0.5100840336134453
41 40 53.77992440151197 0.5084033613445378
42 41 53.68620037807184 0.5109243697478991
43 42 53.739495798319325 0.5092436974789916
44 43 53.508403361344534 0.5046257359125316
45 44 54.12901870140786 0.5054667788057191
46 45 53.90920554854981 0.5096719932716569
47 46 54.172797981921384 0.5021026072329688
48 47 54.14213624894869 0.5054667788057191
49 48 53.973927670311184 0.51010101010101
50 49 53.52260778128286 0.5143097643097643
51 50 53.82835506941524 0.5109427609427609

View File

@ -0,0 +1,51 @@
time_window;training_accuracy;testing_accuracy;
1;52.05006473888649;0.538860103626943;
2;52.363479387006265;0.538860103626943;
3;52.09412780656304;0.5345423143350605;
4;52.26683937823834;0.5367329299913569;
5;52.47246814942776;0.5358686257562663;
6;52.6133909287257;0.5341400172860847;
7;52.99200691294016;0.5375972342264477;
8;53.34917891097667;0.5393258426966292;
9;53.284356093344854;0.5397923875432526;
10;53.16619840069159;0.5397923875432526;
11;53.17769130998703;0.5389273356401384;
12;53.729729729729726;0.5406574394463668;
13;53.61159169550172;0.5389273356401384;
14;53.67647058823529;0.5385281385281385;
15;53.77460523469608;0.5428571428571428;
16;53.82951103418434;0.5454545454545454;
17;53.408353170309454;0.5463203463203463;
18;53.961038961038966;0.5445887445887446;
19;53.74458874458874;0.5450606585788561;
20;53.799523706429966;0.5441941074523396;
21;54.04937202252057;0.5450606585788561;
22;54.12605588044185;0.5415944540727903;
23;54.24610051993067;0.5424610051993067;
24;54.24610051993067;0.546400693842151;
25;54.58288190682556;0.5437987857762359;
26;54.65973125270914;0.5403295750216826;
27;54.19466724474312;0.5420641803989592;
28;54.48829141370338;0.5394622723330442;
29;54.48829141370338;0.5399305555555556;
30;54.304923010193015;0.546875;
31;54.36008676789588;0.5425347222222222;
32;55.131264916467785;0.5477430555555556;
33;55.90277777777778;0.5364583333333334;
34;55.533854166666664;0.5334491746307559;
35;55.285435207293254;0.5351867940920938;
36;55.68823273990448;0.5317115551694179;
37;55.591748099891426;0.5325803649000869;
38;55.60382276281495;0.5325803649000869;
39;55.60382276281495;0.5356521739130434;
40;56.00695198783402;0.5339130434782609;
41;56.171229900043464;0.5278260869565218;
42;56.55292327754836;0.5252173913043479;
43;56.608695652173914;0.5278260869565218;
44;56.34782608695652;0.5274151436031331;
45;56.53402913676886;0.5282854656222803;
46;56.32883862548934;0.5317667536988686;
47;56.66739177724603;0.5274151436031331;
48;56.87554395126197;0.5300261096605744;
49;56.83202785030461;0.5174216027874564;
50;56.735582154515775;0.5156794425087108;
1 time_window training_accuracy testing_accuracy
2 1 52.05006473888649 0.538860103626943
3 2 52.363479387006265 0.538860103626943
4 3 52.09412780656304 0.5345423143350605
5 4 52.26683937823834 0.5367329299913569
6 5 52.47246814942776 0.5358686257562663
7 6 52.6133909287257 0.5341400172860847
8 7 52.99200691294016 0.5375972342264477
9 8 53.34917891097667 0.5393258426966292
10 9 53.284356093344854 0.5397923875432526
11 10 53.16619840069159 0.5397923875432526
12 11 53.17769130998703 0.5389273356401384
13 12 53.729729729729726 0.5406574394463668
14 13 53.61159169550172 0.5389273356401384
15 14 53.67647058823529 0.5385281385281385
16 15 53.77460523469608 0.5428571428571428
17 16 53.82951103418434 0.5454545454545454
18 17 53.408353170309454 0.5463203463203463
19 18 53.961038961038966 0.5445887445887446
20 19 53.74458874458874 0.5450606585788561
21 20 53.799523706429966 0.5441941074523396
22 21 54.04937202252057 0.5450606585788561
23 22 54.12605588044185 0.5415944540727903
24 23 54.24610051993067 0.5424610051993067
25 24 54.24610051993067 0.546400693842151
26 25 54.58288190682556 0.5437987857762359
27 26 54.65973125270914 0.5403295750216826
28 27 54.19466724474312 0.5420641803989592
29 28 54.48829141370338 0.5394622723330442
30 29 54.48829141370338 0.5399305555555556
31 30 54.304923010193015 0.546875
32 31 54.36008676789588 0.5425347222222222
33 32 55.131264916467785 0.5477430555555556
34 33 55.90277777777778 0.5364583333333334
35 34 55.533854166666664 0.5334491746307559
36 35 55.285435207293254 0.5351867940920938
37 36 55.68823273990448 0.5317115551694179
38 37 55.591748099891426 0.5325803649000869
39 38 55.60382276281495 0.5325803649000869
40 39 55.60382276281495 0.5356521739130434
41 40 56.00695198783402 0.5339130434782609
42 41 56.171229900043464 0.5278260869565218
43 42 56.55292327754836 0.5252173913043479
44 43 56.608695652173914 0.5278260869565218
45 44 56.34782608695652 0.5274151436031331
46 45 56.53402913676886 0.5282854656222803
47 46 56.32883862548934 0.5317667536988686
48 47 56.66739177724603 0.5274151436031331
49 48 56.87554395126197 0.5300261096605744
50 49 56.83202785030461 0.5174216027874564
51 50 56.735582154515775 0.5156794425087108

View File

@ -0,0 +1,51 @@
time_window;training_accuracy;testing_accuracy;
1;52.04038704249053;0.5412457912457912;
2;51.90406059330949;0.5412457912457912;
3;52.02020202020202;0.5370370370370371;
4;52.20959595959596;0.5459140690817186;
5;52.68364554830563;0.5408593091828138;
6;52.71578947368422;0.5341196293176074;
7;52.76900400084228;0.5391743892165122;
8;52.906486941870256;0.540016849199663;
9;53.517270429654594;0.5370994940978078;
10;53.52854434379608;0.5370994940978078;
11;53.64517488411293;0.5379426644182125;
12;54.03582718651212;0.5396290050590219;
13;54.15261382799326;0.5396290050590219;
14;54.15261382799326;0.5409282700421941;
15;54.079696394686906;0.5434599156118144;
16;54.344158582876425;0.5417721518987342;
17;54.16578780847922;0.5451476793248945;
18;53.92405063291139;0.5417721518987342;
19;53.9873417721519;0.5396959459459459;
20;54.10424140113948;0.5396959459459459;
21;54.15787252005065;0.5388513513513513;
22;54.084863837872064;0.5388513513513513;
23;53.69510135135135;0.5422297297297297;
24;53.80067567567568;0.5435333896872359;
25;54.14994720168954;0.5409974640743872;
26;53.95014786649768;0.5452240067624683;
27;54.53200929642933;0.5486052409129332;
28;54.62806424344886;0.5469146238377007;
29;54.52240067624683;0.5431472081218274;
30;54.449376453181145;0.5439932318104906;
31;54.60887949260042;0.5346869712351946;
32;55.17022626348065;0.5431472081218274;
33;55.52030456852792;0.5397631133671743;
34;55.647208121827404;0.5309060118543607;
35;55.74360059234187;0.529212531752752;
36;56.03046974185357;0.5275190516511431;
37;56.00000000000001;0.5275190516511431;
38;55.82133784928027;0.5207451312447079;
39;55.927180355630824;0.5245762711864407;
40;56.15075164090621;0.5245762711864407;
41;55.781448538754766;0.5288135593220339;
42;56.15335733954671;0.5271186440677966;
43;56.92796610169491;0.5203389830508475;
44;56.525423728813564;0.5173876166242578;
45;56.55859292222929;0.5165394402035624;
46;56.358626536668076;0.5165394402035624;
47;56.32817468730125;0.5148430873621713;
48;56.59457167090755;0.5173876166242578;
49;56.46734520780322;0.5118845500848896;
50;56.182396606574756;0.5144312393887945;
1 time_window training_accuracy testing_accuracy
2 1 52.04038704249053 0.5412457912457912
3 2 51.90406059330949 0.5412457912457912
4 3 52.02020202020202 0.5370370370370371
5 4 52.20959595959596 0.5459140690817186
6 5 52.68364554830563 0.5408593091828138
7 6 52.71578947368422 0.5341196293176074
8 7 52.76900400084228 0.5391743892165122
9 8 52.906486941870256 0.540016849199663
10 9 53.517270429654594 0.5370994940978078
11 10 53.52854434379608 0.5370994940978078
12 11 53.64517488411293 0.5379426644182125
13 12 54.03582718651212 0.5396290050590219
14 13 54.15261382799326 0.5396290050590219
15 14 54.15261382799326 0.5409282700421941
16 15 54.079696394686906 0.5434599156118144
17 16 54.344158582876425 0.5417721518987342
18 17 54.16578780847922 0.5451476793248945
19 18 53.92405063291139 0.5417721518987342
20 19 53.9873417721519 0.5396959459459459
21 20 54.10424140113948 0.5396959459459459
22 21 54.15787252005065 0.5388513513513513
23 22 54.084863837872064 0.5388513513513513
24 23 53.69510135135135 0.5422297297297297
25 24 53.80067567567568 0.5435333896872359
26 25 54.14994720168954 0.5409974640743872
27 26 53.95014786649768 0.5452240067624683
28 27 54.53200929642933 0.5486052409129332
29 28 54.62806424344886 0.5469146238377007
30 29 54.52240067624683 0.5431472081218274
31 30 54.449376453181145 0.5439932318104906
32 31 54.60887949260042 0.5346869712351946
33 32 55.17022626348065 0.5431472081218274
34 33 55.52030456852792 0.5397631133671743
35 34 55.647208121827404 0.5309060118543607
36 35 55.74360059234187 0.529212531752752
37 36 56.03046974185357 0.5275190516511431
38 37 56.00000000000001 0.5275190516511431
39 38 55.82133784928027 0.5207451312447079
40 39 55.927180355630824 0.5245762711864407
41 40 56.15075164090621 0.5245762711864407
42 41 55.781448538754766 0.5288135593220339
43 42 56.15335733954671 0.5271186440677966
44 43 56.92796610169491 0.5203389830508475
45 44 56.525423728813564 0.5173876166242578
46 45 56.55859292222929 0.5165394402035624
47 46 56.358626536668076 0.5165394402035624
48 47 56.32817468730125 0.5148430873621713
49 48 56.59457167090755 0.5173876166242578
50 49 56.46734520780322 0.5118845500848896
51 50 56.182396606574756 0.5144312393887945

View File

@ -0,0 +1,51 @@
time_window;training_accuracy;testing_accuracy;
1;50.69893594825787;0.5317195325542571;
2;50.70951585976628;0.5317195325542571;
3;50.688647746243745;0.5321637426900585;
4;50.699227718639115;0.5321637426900585;
5;50.68893528183715;0.5321637426900585;
6;50.69951973272082;0.5321637426900585;
7;50.71010860484545;0.5321637426900585;
8;50.71010860484545;0.532608695652174;
9;50.69981199080844;0.532608695652174;
10;50.68951107396573;0.532608695652174;
11;50.67920585161965;0.532608695652174;
12;50.68979933110368;0.532608695652174;
13;50.68979933110368;0.5330543933054394;
14;50.700397240225804;0.5330543933054394;
15;50.71099958176495;0.5330543933054394;
16;50.700690232169;0.5330543933054394;
17;50.7112970711297;0.5330543933054394;
18;50.7112970711297;0.533500837520938;
19;50.700983469345054;0.533500837520938;
20;50.69066555043952;0.533500837520938;
21;50.680343311701904;0.533500837520938;
22;50.69095477386934;0.533500837520938;
23;50.69095477386934;0.5331098072087175;
24;50.680628272251305;0.5331098072087175;
25;50.69124423963134;0.5331098072087175;
26;50.68091347161114;0.5331098072087175;
27;50.69153394803018;0.5331098072087175;
28;50.69153394803018;0.5327181208053692;
29;50.7021588765458;0.5327181208053692;
30;50.712788259958074;0.5327181208053692;
31;50.7234221010694;0.5327181208053692;
32;50.73406040268457;0.5327181208053692;
33;50.713087248322154;0.5331654072208228;
34;50.72372561359345;0.5331654072208228;
35;50.7343684431389;0.5331654072208228;
36;50.724029380902415;0.5331654072208228;
37;50.71368597816961;0.5331654072208228;
38;50.73467674223342;0.5327731092436975;
39;50.72433340331723;0.5327731092436975;
40;50.734985300293985;0.5327731092436975;
41;50.72463768115942;0.5327731092436975;
42;50.71428571428571;0.5327731092436975;
43;50.71428571428571;0.5323801513877208;
44;50.724942214751;0.5323801513877208;
45;50.735603194619586;0.5323801513877208;
46;50.72524700441454;0.5323801513877208;
47;50.71488645920942;0.5323801513877208;
48;50.69386038687973;0.5328282828282829;
49;50.683491062039955;0.5328282828282829;
50;50.69415229280606;0.5328282828282829;
1 time_window training_accuracy testing_accuracy
2 1 50.69893594825787 0.5317195325542571
3 2 50.70951585976628 0.5317195325542571
4 3 50.688647746243745 0.5321637426900585
5 4 50.699227718639115 0.5321637426900585
6 5 50.68893528183715 0.5321637426900585
7 6 50.69951973272082 0.5321637426900585
8 7 50.71010860484545 0.5321637426900585
9 8 50.71010860484545 0.532608695652174
10 9 50.69981199080844 0.532608695652174
11 10 50.68951107396573 0.532608695652174
12 11 50.67920585161965 0.532608695652174
13 12 50.68979933110368 0.532608695652174
14 13 50.68979933110368 0.5330543933054394
15 14 50.700397240225804 0.5330543933054394
16 15 50.71099958176495 0.5330543933054394
17 16 50.700690232169 0.5330543933054394
18 17 50.7112970711297 0.5330543933054394
19 18 50.7112970711297 0.533500837520938
20 19 50.700983469345054 0.533500837520938
21 20 50.69066555043952 0.533500837520938
22 21 50.680343311701904 0.533500837520938
23 22 50.69095477386934 0.533500837520938
24 23 50.69095477386934 0.5331098072087175
25 24 50.680628272251305 0.5331098072087175
26 25 50.69124423963134 0.5331098072087175
27 26 50.68091347161114 0.5331098072087175
28 27 50.69153394803018 0.5331098072087175
29 28 50.69153394803018 0.5327181208053692
30 29 50.7021588765458 0.5327181208053692
31 30 50.712788259958074 0.5327181208053692
32 31 50.7234221010694 0.5327181208053692
33 32 50.73406040268457 0.5327181208053692
34 33 50.713087248322154 0.5331654072208228
35 34 50.72372561359345 0.5331654072208228
36 35 50.7343684431389 0.5331654072208228
37 36 50.724029380902415 0.5331654072208228
38 37 50.71368597816961 0.5331654072208228
39 38 50.73467674223342 0.5327731092436975
40 39 50.72433340331723 0.5327731092436975
41 40 50.734985300293985 0.5327731092436975
42 41 50.72463768115942 0.5327731092436975
43 42 50.71428571428571 0.5327731092436975
44 43 50.71428571428571 0.5323801513877208
45 44 50.724942214751 0.5323801513877208
46 45 50.735603194619586 0.5323801513877208
47 46 50.72524700441454 0.5323801513877208
48 47 50.71488645920942 0.5323801513877208
49 48 50.69386038687973 0.5328282828282829
50 49 50.683491062039955 0.5328282828282829
51 50 50.69415229280606 0.5328282828282829

View File

@ -0,0 +1,51 @@
time_window;training_accuracy;testing_accuracy;
1;51.99248904652618;0.5375626043405676;
2;51.85726210350584;0.5375626043405676;
3;52.00333889816361;0.5388471177944862;
4;52.01419327906491;0.5388471177944862;
5;52.35908141962422;0.5355054302422724;
6;52.62058884944665;0.5304928989139516;
7;52.98663324979115;0.5371762740183793;
8;52.7360066833751;0.5426421404682275;
9;53.39461040317527;0.5409698996655519;
10;53.489343919765986;0.5409698996655519;
11;53.333333333333336;0.5426421404682275;
12;53.992474916387955;0.5392976588628763;
13;54.03428093645485;0.5414225941422595;
14;54.15011499059168;0.5405857740585774;
15;54.077791718946045;0.5414225941422595;
16;54.19368332984731;0.5414225941422595;
17;54.14225941422595;0.5397489539748954;
18;53.59832635983264;0.5443886097152428;
19;53.672316384180796;0.5443886097152428;
20;54.353285893679356;0.5402010050251256;
21;54.239062172911865;0.5385259631490787;
22;54.082914572864325;0.5343383584589615;
23;54.22948073701842;0.537300922045264;
24;54.47120418848167;0.5356244761106455;
25;54.18935902806871;0.5364626990779547;
26;53.84454221663524;0.5356244761106455;
27;54.40067057837384;0.539815590947192;
28;54.90360435875943;0.5444630872483222;
29;54.43303290714735;0.537751677852349;
30;54.65408805031446;0.5427852348993288;
31;54.26714195848186;0.5394295302013423;
32;55.3481543624161;0.5436241610738255;
33;55.39010067114094;0.5340050377833753;
34;55.779316131739044;0.5197313182199832;
35;55.77003776751993;0.5214105793450882;
36;56.28541448058761;0.5264483627204031;
37;56.36020151133502;0.5239294710327456;
38;56.44416456759026;0.5235294117647059;
39;56.37203443208062;0.5184873949579832;
40;56.34187316253675;0.5235294117647059;
41;56.24868725057761;0.5235294117647059;
42;56.596638655462186;0.5126050420168067;
43;56.76470588235294;0.5147182506307821;
44;56.90271065349863;0.5138772077375946;
45;56.284153005464475;0.5138772077375946;
46;56.42211477822157;0.511354079058032;
47;56.53910849453322;0.5079899074852817;
48;56.749369217830115;0.5067340067340067;
49;56.992639327024186;0.5092592592592593;
50;56.56289440471182;0.51010101010101;
1 time_window training_accuracy testing_accuracy
2 1 51.99248904652618 0.5375626043405676
3 2 51.85726210350584 0.5375626043405676
4 3 52.00333889816361 0.5388471177944862
5 4 52.01419327906491 0.5388471177944862
6 5 52.35908141962422 0.5355054302422724
7 6 52.62058884944665 0.5304928989139516
8 7 52.98663324979115 0.5371762740183793
9 8 52.7360066833751 0.5426421404682275
10 9 53.39461040317527 0.5409698996655519
11 10 53.489343919765986 0.5409698996655519
12 11 53.333333333333336 0.5426421404682275
13 12 53.992474916387955 0.5392976588628763
14 13 54.03428093645485 0.5414225941422595
15 14 54.15011499059168 0.5405857740585774
16 15 54.077791718946045 0.5414225941422595
17 16 54.19368332984731 0.5414225941422595
18 17 54.14225941422595 0.5397489539748954
19 18 53.59832635983264 0.5443886097152428
20 19 53.672316384180796 0.5443886097152428
21 20 54.353285893679356 0.5402010050251256
22 21 54.239062172911865 0.5385259631490787
23 22 54.082914572864325 0.5343383584589615
24 23 54.22948073701842 0.537300922045264
25 24 54.47120418848167 0.5356244761106455
26 25 54.18935902806871 0.5364626990779547
27 26 53.84454221663524 0.5356244761106455
28 27 54.40067057837384 0.539815590947192
29 28 54.90360435875943 0.5444630872483222
30 29 54.43303290714735 0.537751677852349
31 30 54.65408805031446 0.5427852348993288
32 31 54.26714195848186 0.5394295302013423
33 32 55.3481543624161 0.5436241610738255
34 33 55.39010067114094 0.5340050377833753
35 34 55.779316131739044 0.5197313182199832
36 35 55.77003776751993 0.5214105793450882
37 36 56.28541448058761 0.5264483627204031
38 37 56.36020151133502 0.5239294710327456
39 38 56.44416456759026 0.5235294117647059
40 39 56.37203443208062 0.5184873949579832
41 40 56.34187316253675 0.5235294117647059
42 41 56.24868725057761 0.5235294117647059
43 42 56.596638655462186 0.5126050420168067
44 43 56.76470588235294 0.5147182506307821
45 44 56.90271065349863 0.5138772077375946
46 45 56.284153005464475 0.5138772077375946
47 46 56.42211477822157 0.511354079058032
48 47 56.53910849453322 0.5079899074852817
49 48 56.749369217830115 0.5067340067340067
50 49 56.992639327024186 0.5092592592592593
51 50 56.56289440471182 0.51010101010101

Binary file not shown.

After

Width:  |  Height:  |  Size: 294 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 279 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 632 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 292 KiB

61
plotter.py 100644
View File

@ -0,0 +1,61 @@
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os
#bodacious colors
colors=sns.color_palette("rocket", 8)
#Ram's colors, if desired
seshadri = ['#c3121e', '#0348a1', '#ffb01c', '#027608', '#0193b0', '#9c5300', '#949c01', '#7104b5']
# 0sangre, 1neptune, 2pumpkin, 3clover, 4denim, 5cocoa, 6cumin, 7berry
data = pd.read_csv("plots/data/MLP_20_10_5_2.csv", sep=";")
#data = pd.read_csv("plots/data/logistic_regression.csv", sep=";")
#data_SMA = pd.read_csv("plots/data/logistic_regression_SMA.csv", sep=";")
#data_SMA_20_50 = pd.read_csv("plots/data/logistic_regression_SMA_20_50.csv", sep=";")
#data_EMA = pd.read_csv("plots/data/logistic_regression_EMA.csv", sep=";")
#data_EMA_20_50 = pd.read_csv("plots/data/logistic_regression_EMA_20_50.csv", sep=";")
print(data)
fig = plt.figure(1, figsize=(15,10))
plt.plot(data["time_window"], data["training_accuracy"]*100, color=seshadri[0], label="Training Accuracy", linewidth=2)
plt.plot(data["time_window"], data["testing_accuracy"]*100, color=seshadri[1], label="Testing Accuracy", linewidth=2)
#plt.plot(data["time_window"], data["testing_accuracy"]*100, color=seshadri[0], label="Returns and Volume", linewidth=2)
#plt.plot(data_SMA_20_50["time_window"], data_SMA_20_50["testing_accuracy"]*100, color=seshadri[1], label="With SMA 20 and 50 candles", linewidth=2)
#plt.plot(data_SMA["time_window"], data_SMA["testing_accuracy"]*100, color=seshadri[2], label="With SMA 20, 50 and 200 candles", linewidth=2)
#plt.plot(data_EMA_20_50["time_window"], data_EMA_20_50["testing_accuracy"]*100, color=seshadri[3], label="With EMA 20 and 50 candles", linewidth=2)
#plt.plot(data_EMA["time_window"], data_EMA["testing_accuracy"]*100, color=seshadri[4], label="With EMA 20, 50 and 200 candles", linewidth=2)
#plot params
plt.xlim([0, 50])
#plt.ylim([50, 60])
plt.minorticks_on()
plt.tick_params(labelsize=14)
plt.tick_params(labelbottom=True, labeltop=False, labelright=False, labelleft=True)
#xticks = np.arange(0, 1e4,10)
#yticks = np.arange(0,16.1,4)
plt.tick_params(direction='in',which='minor', length=5, bottom=True, top=True, left=True, right=True)
plt.tick_params(direction='in',which='major', length=10, bottom=True, top=True, left=True, right=True)
#plt.xticks(xticks)
#plt.yticks(yticks)
#plt.grid(True)
#plt.text(1,325, f'y={Decimal(coefs[3]):.4f}x$^3$+{Decimal(coefs[2]):.2f}x$^2$+{Decimal(coefs[1]):.2f}x+{Decimal(coefs[0]):.1f}',fontsize =13)
plt.xlabel(r'Lag (Days)', fontsize=14)
plt.ylabel(r'Accuracy (%)',fontsize=14) # label the y axis
plt.legend(fontsize=14, loc="upper right", bbox_to_anchor=(0.99, 0.99)) # add the legend (will default to 'best' location)
plt.savefig("plots/MLP_20_10_5_2.png", dpi=300)
plt.show()

View File

@ -0,0 +1,91 @@
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import yfinance as yf
from datetime import datetime
import os
#bodacious colors
colors=sns.color_palette("rocket", 8)
#Ram's colors, if desired
seshadri = ['#c3121e', '#0348a1', '#ffb01c', '#027608', '#0193b0', '#9c5300', '#949c01', '#7104b5']
# 0sangre, 1neptune, 2pumpkin, 3clover, 4denim, 5cocoa, 6cumin, 7berry
#stock_data = pd.read_csv("data/daily_MSFT.csv").iloc[::-1].reset_index(drop=True)
if os.path.isfile("data/MSFT_data.pkl"):
stock_data = pd.read_pickle("data/MSFT_data.pkl")
elif os.path.isfile("data/MSFT_data.csv"):
stock_data = pd.read_csv("data/MSFT_data.csv")
else:
start_date = datetime(2000, 1, 1)
end_date = datetime(2023, 10, 26)
stock_data = yf.download('MSFT', start=start_date, end=end_date)
stock_data.to_pickle("data/MSFT_data.pkl")
stock_data.to_csv("data/MSFT_data.csv")
daily_returns = stock_data["Close"] - stock_data["Open"]
win_lose = np.zeros(daily_returns.size - 1)
for index, return_ in enumerate(daily_returns[:-1]):
if (return_ > 0 and daily_returns[index + 1] > 0) or (return_ < 0 and daily_returns[index + 1] < 0):
win_lose[index] = 1
else:
win_lose[index] = 0
win_rate = np.count_nonzero(win_lose == 1) / win_lose.size
print(win_rate)
percent_returns = daily_returns / stock_data["Open"] * 100
fig = plt.figure(1, figsize=(15,10))
plt.hist(percent_returns, bins = 120, range=(-12,12), facecolor=seshadri[0], alpha=0.8, edgecolor="white", label="Percentage daily returns occurrences")
#plt.plot(stock_data.index, stock_data["Close"], linestyle="-", color=seshadri[0])
#plt.plot(stock_data.index, stock_data["Adj Close"], linestyle="-", color=seshadri[1])
#plt.plot(stock_data.index, stock_data["close"] - 20, linestyle="-", color=seshadri[2])
#plt.plot(stock_data.index, stock_data["close"] - 30, linestyle="-", color=seshadri[3])
#plt.plot(stock_data.index, stock_data["close"] - 40, linestyle="-", color=seshadri[4])
#plt.plot(stock_data.index, stock_data["close"] - 50, linestyle="-", color=seshadri[5])
#plt.plot(stock_data.index, stock_data["close"] - 60, linestyle="-", color=seshadri[6])
#plt.plot(stock_data.index, stock_data["close"] - 70, linestyle="-", color=seshadri[7])
#plt.show()
#plot params
plt.xlim([-12,12])
#plt.ylim([-0.5,16])
plt.minorticks_on()
plt.tick_params(labelsize=14)
plt.tick_params(labelbottom=True, labeltop=False, labelright=False, labelleft=True)
#xticks = np.arange(0, 1e4,10)
#yticks = np.arange(0,16.1,4)
plt.tick_params(direction='in',which='minor', length=5, bottom=True, top=True, left=True, right=True)
plt.tick_params(direction='in',which='major', length=10, bottom=True, top=True, left=True, right=True)
#plt.xticks(xticks)
#plt.yticks(yticks)
#plt.text(1,325, f'y={Decimal(coefs[3]):.4f}x$^3$+{Decimal(coefs[2]):.2f}x$^2$+{Decimal(coefs[1]):.2f}x+{Decimal(coefs[0]):.1f}',fontsize =13)
plt.xlabel(r'Percentage daily return', fontsize=14)
plt.ylabel(r'Occurrences',fontsize=14) # label the y axis
plt.yscale('log')
plt.legend(fontsize=14, loc="upper right", bbox_to_anchor=(0.99, 0.99)) # add the legend (will default to 'best' location)
plt.savefig("plots/MSFT_daily_occs_semilogx.png", dpi=300)
plt.show()