dev #1
|
@ -0,0 +1,222 @@
|
|||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import pandas as pd
|
||||
import seaborn as sns
|
||||
|
||||
import yfinance as yf
|
||||
from datetime import datetime
|
||||
import os, sys
|
||||
|
||||
from sklearn import preprocessing
|
||||
|
||||
from tensorflow.keras.models import Sequential
|
||||
from tensorflow.keras.layers import Dense, Dropout, LSTM
|
||||
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
|
||||
|
||||
#bodacious colors
|
||||
colors=sns.color_palette("rocket", 8)
|
||||
#Ram's colors, if desired
|
||||
seshadri = ['#c3121e', '#0348a1', '#ffb01c', '#027608', '#0193b0', '#9c5300', '#949c01', '#7104b5']
|
||||
# 0sangre, 1neptune, 2pumpkin, 3clover, 4denim, 5cocoa, 6cumin, 7berry
|
||||
|
||||
def enlarge_lag(to_enlarge, time_window=1):
|
||||
# to_enlarge is the data already present, should be a numpy array
|
||||
enlarged = []
|
||||
for i in range(to_enlarge.shape[0] - time_window + 1):
|
||||
new_element = []
|
||||
for j in range(time_window):
|
||||
new_element.extend(to_enlarge[i + time_window - 1 - j, :])
|
||||
enlarged.append(new_element)
|
||||
|
||||
return np.array(enlarged)
|
||||
|
||||
#### Calculate the metrics RMSE and MAPE ####
|
||||
def calculate_rmse(y_true, y_pred):
|
||||
"""
|
||||
Calculate the Root Mean Squared Error (RMSE)
|
||||
"""
|
||||
rmse = np.sqrt(np.mean((y_true - y_pred) ** 2))
|
||||
return rmse
|
||||
|
||||
|
||||
def calculate_mape(y_true, y_pred):
|
||||
"""
|
||||
Calculate the Mean Absolute Percentage Error (MAPE) %
|
||||
"""
|
||||
y_pred, y_true = np.array(y_pred), np.array(y_true)
|
||||
mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
|
||||
return mape
|
||||
|
||||
train_quota = 0.8
|
||||
|
||||
if len(sys.argv) > 1:
|
||||
time_window = int(sys.argv[1])
|
||||
else:
|
||||
time_window = 1
|
||||
|
||||
#time_window = 10
|
||||
|
||||
stock_data = pd.read_pickle("data/MSFT_data.pkl")
|
||||
|
||||
price = stock_data["Close"].to_numpy()
|
||||
volume = stock_data["Volume"].to_numpy()
|
||||
daily_returns = ((stock_data["Close"] - stock_data["Open"]) / stock_data["Open"]).to_numpy()
|
||||
|
||||
minmax_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
|
||||
|
||||
features = np.vstack((price, volume)).T
|
||||
|
||||
# Necessary for MAs
|
||||
norm_features = minmax_scaler.fit_transform(price.reshape(-1, 1))
|
||||
|
||||
|
||||
# merge data into 2d numpy array
|
||||
Y = np.zeros(features.shape[0] - 1)
|
||||
|
||||
|
||||
for i in range(Y.size):
|
||||
Y[i] = norm_features[i+1, 0]
|
||||
|
||||
time_window = 20
|
||||
|
||||
if time_window > 1:
|
||||
norm_features = enlarge_lag(norm_features, time_window)
|
||||
Y = Y[time_window-1:]
|
||||
|
||||
print(norm_features.shape, Y.shape)
|
||||
|
||||
train_size = int(norm_features.shape[0] * 0.8)
|
||||
X_train = norm_features[:train_size, ]
|
||||
Y_train = Y[:train_size]
|
||||
|
||||
X_test = norm_features[train_size:-1, ]
|
||||
Y_test = Y[train_size:]
|
||||
|
||||
def LSTM_model():
|
||||
model = Sequential()
|
||||
|
||||
model.add(LSTM(units = 50, return_sequences=True, input_shape=(X_train.shape[1], 1)))
|
||||
model.add(Dropout(0.2))
|
||||
|
||||
model.add(LSTM(units=50, return_sequences=True))
|
||||
model.add(Dropout(0.2))
|
||||
|
||||
model.add(LSTM(units=50))
|
||||
model.add(Dropout(0.2))
|
||||
|
||||
model.add(Dense(units=1))
|
||||
|
||||
return model
|
||||
|
||||
model = LSTM_model()
|
||||
model.summary()
|
||||
model.compile(
|
||||
optimizer="adam",
|
||||
loss="mean_squared_error"
|
||||
)
|
||||
|
||||
# Save weights only for best model
|
||||
checkpointer = ModelCheckpoint(
|
||||
filepath = 'weights_best.hdf5',
|
||||
verbose = 2,
|
||||
save_best_only = True
|
||||
)
|
||||
|
||||
if os.path.exists("./checkpoints/checkpoint"):
|
||||
model.load_weights("./checkpoints/my_checkpoint")
|
||||
else:
|
||||
model.fit(
|
||||
X_train,
|
||||
Y_train,
|
||||
epochs=25,
|
||||
batch_size = 32,
|
||||
callbacks = [checkpointer]
|
||||
)
|
||||
|
||||
model.save_weights("./checkpoints/my_checkpoint")
|
||||
|
||||
prediction = model.predict(X_test)
|
||||
predicted_prices = minmax_scaler.inverse_transform(prediction).flatten()
|
||||
|
||||
counter = 0
|
||||
|
||||
#for i in range(prediction.shape[0]-1):
|
||||
# if (prediction[i+1,] - prediction[i,] > 0 and predicted_prices[i+1,] - predicted_prices[i,] > 0) or (prediction[i+1,] - prediction[i,] < 0 and predicted_prices[i+1,] - predicted_prices[i,] < 0):
|
||||
# counter = counter + 1
|
||||
|
||||
#print("acc: ", counter/prediction.shape[0])
|
||||
|
||||
|
||||
|
||||
test_prices = price[time_window - 1 + train_size:]
|
||||
|
||||
|
||||
pred_ret = []
|
||||
actual_ret = []
|
||||
for j in range(len(test_prices) - 1):
|
||||
# il predicted price è il prezzo di domani, lo voglio confrontare con il ritorno effettivo di domani
|
||||
pred_ret.append((predicted_prices[j] - test_prices[j])/test_prices[j])
|
||||
actual_ret.append((test_prices[j+1] - test_prices[j])/test_prices[j])
|
||||
|
||||
pred_ret_np = np.array(pred_ret)
|
||||
actual_ret_np = np.array(actual_ret)
|
||||
|
||||
sign_comp = np.sum(np.sign(pred_ret_np) == np.sign(actual_ret_np))/len(pred_ret_np)
|
||||
sign_comp_red_nottoomuch = np.sum(np.sign(pred_ret_np[:200]) == np.sign(actual_ret_np[:200]))/len(pred_ret_np[:200])
|
||||
sign_comp_red = np.sum(np.sign(pred_ret_np[:100]) == np.sign(actual_ret_np[:100]))/len(pred_ret_np[:100])
|
||||
sign_comp_red_alot = np.sum(np.sign(pred_ret_np[:50]) == np.sign(actual_ret_np[:50]))/len(pred_ret_np[:50])
|
||||
|
||||
|
||||
print(sign_comp)
|
||||
print(sign_comp_red_nottoomuch)
|
||||
print(sign_comp_red)
|
||||
print(sign_comp_red_alot)
|
||||
|
||||
rmse = calculate_rmse(test_prices[1:], predicted_prices)
|
||||
mape = calculate_mape(test_prices[1:], predicted_prices)
|
||||
|
||||
print("RMSE: ", rmse)
|
||||
print("MAPE: ", mape)
|
||||
|
||||
rmse = calculate_rmse(test_prices[1:301], predicted_prices[:300])
|
||||
mape = calculate_mape(test_prices[1:301], predicted_prices[:300])
|
||||
|
||||
print("RMSE su 300 gg: ", rmse)
|
||||
print("MAPE su 300 gg: ", mape)
|
||||
|
||||
#plt.plot(pred_ret, color=seshadri[0])
|
||||
#plt.plot(daily_returns[1:], color=seshadri[1])
|
||||
|
||||
fig = plt.figure(1, figsize=(12,10))
|
||||
plt.plot(test_prices, color=seshadri[0], label="Registered Closing Price")
|
||||
plt.plot(predicted_prices, color=seshadri[1], label="Prediction")
|
||||
|
||||
#plot params
|
||||
plt.xlim([0,1200])
|
||||
plt.ylim([100,400])
|
||||
plt.minorticks_on()
|
||||
plt.tick_params(labelsize=14)
|
||||
plt.tick_params(labelbottom=True, labeltop=False, labelright=False, labelleft=True)
|
||||
#xticks = np.arange(0, 1e4,10)
|
||||
#yticks = np.arange(0,16.1,4)
|
||||
|
||||
plt.tick_params(direction='in',which='minor', length=5, bottom=True, top=True, left=True, right=True)
|
||||
plt.tick_params(direction='in',which='major', length=10, bottom=True, top=True, left=True, right=True)
|
||||
#plt.xticks(xticks)
|
||||
#plt.yticks(yticks)
|
||||
|
||||
|
||||
#plt.text(1,325, f'y={Decimal(coefs[3]):.4f}x$^3$+{Decimal(coefs[2]):.2f}x$^2$+{Decimal(coefs[1]):.2f}x+{Decimal(coefs[0]):.1f}',fontsize =13)
|
||||
|
||||
|
||||
plt.xlabel(r'Days (from last training)', fontsize=14)
|
||||
plt.ylabel(r'Price (USD)',fontsize=14) # label the y axis
|
||||
|
||||
|
||||
plt.legend(fontsize=14, loc="upper right", bbox_to_anchor=(0.99, 0.99)) # add the legend (will default to 'best' location)
|
||||
|
||||
plt.savefig("plots/First_Attempt_LSTM_2.png", dpi=300)
|
||||
|
||||
plt.show()
|
||||
#with open("plots/data/MLP_20_10_5_2.csv", "a") as f:
|
||||
# f.write(f"{time_window};{train_score};{score};\n")
|
|
@ -0,0 +1,238 @@
|
|||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import pandas as pd
|
||||
import seaborn as sns
|
||||
|
||||
import yfinance as yf
|
||||
from datetime import datetime
|
||||
import os, sys
|
||||
|
||||
from sklearn import preprocessing
|
||||
|
||||
from tensorflow.keras.models import Sequential
|
||||
from tensorflow.keras.layers import Dense, Dropout, LSTM, Input
|
||||
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
|
||||
|
||||
#bodacious colors
|
||||
colors=sns.color_palette("rocket", 8)
|
||||
#Ram's colors, if desired
|
||||
seshadri = ['#c3121e', '#0348a1', '#ffb01c', '#027608', '#0193b0', '#9c5300', '#949c01', '#7104b5']
|
||||
# 0sangre, 1neptune, 2pumpkin, 3clover, 4denim, 5cocoa, 6cumin, 7berry
|
||||
|
||||
np.set_printoptions(threshold=100)
|
||||
|
||||
def enlarge_lag(to_enlarge, time_window=1):
|
||||
# to_enlarge is the data already present, should be a numpy array
|
||||
enlarged = []
|
||||
for i in range(to_enlarge.shape[0] - time_window + 1):
|
||||
new_element = []
|
||||
for j in range(time_window):
|
||||
new_element.extend(to_enlarge[i + time_window - 1 - j, :])
|
||||
enlarged.append(new_element)
|
||||
|
||||
return np.array(enlarged)
|
||||
|
||||
#### Calculate the metrics RMSE and MAPE ####
|
||||
def calculate_rmse(y_true, y_pred):
|
||||
"""
|
||||
Calculate the Root Mean Squared Error (RMSE)
|
||||
"""
|
||||
rmse = np.sqrt(np.mean((y_true - y_pred) ** 2))
|
||||
return rmse
|
||||
|
||||
|
||||
def calculate_mape(y_true, y_pred):
|
||||
"""
|
||||
Calculate the Mean Absolute Percentage Error (MAPE) %
|
||||
"""
|
||||
y_pred, y_true = np.array(y_pred), np.array(y_true)
|
||||
mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
|
||||
return mape
|
||||
|
||||
train_quota = 0.8
|
||||
|
||||
if len(sys.argv) > 1:
|
||||
time_window = int(sys.argv[1])
|
||||
else:
|
||||
time_window = 1
|
||||
|
||||
#time_window = 10
|
||||
|
||||
stock_data = pd.read_pickle("data/MSFT_data.pkl")
|
||||
|
||||
price = stock_data["Close"].to_numpy()
|
||||
volume = stock_data["Volume"].to_numpy()
|
||||
daily_returns = ((stock_data["Close"] - stock_data["Open"]) / stock_data["Open"]).to_numpy()
|
||||
|
||||
minmax_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
|
||||
sec_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
|
||||
|
||||
#features = np.vstack((price, volume)).T
|
||||
|
||||
# Necessary for MAs
|
||||
#norm_features = np.hstack((minmax_scaler.fit_transform(price.reshape(-1, 1)), sec_scaler.fit_transform(volume.reshape(-1, 1))))
|
||||
norm_features = minmax_scaler.fit_transform(price.reshape(-1, 1))
|
||||
|
||||
rets = np.diff(price)
|
||||
bin_rets = np.zeros(len(rets))
|
||||
for i, r in enumerate(rets):
|
||||
if r >= 0:
|
||||
bin_rets[i] = 1
|
||||
else:
|
||||
bin_rets[i] = 0
|
||||
|
||||
bin_rets_np = np.array(bin_rets)
|
||||
|
||||
|
||||
#norm_rets = sec_scaler.fit_transform(rets.reshape(-1, 1))
|
||||
|
||||
print("occai")
|
||||
|
||||
print(rets)
|
||||
print(bin_rets)
|
||||
|
||||
print("ocai")
|
||||
|
||||
# merge data into 2d numpy array
|
||||
#Y = np.zeros(norm_features.shape[0] - 1)
|
||||
#for i in range(Y.size):
|
||||
# Y[i] = norm_features[i+1, 0]
|
||||
|
||||
Y = bin_rets
|
||||
|
||||
time_window = 20
|
||||
|
||||
if time_window > 1:
|
||||
norm_features = enlarge_lag(norm_features, time_window)
|
||||
Y = Y[time_window-1:]
|
||||
|
||||
|
||||
train_size = int(norm_features.shape[0] * 0.8)
|
||||
X_train = norm_features[:train_size, ]
|
||||
Y_train = Y[:train_size]
|
||||
|
||||
X_test = norm_features[train_size:-1, ]
|
||||
Y_test = Y[train_size:]
|
||||
|
||||
def LSTM_model():
|
||||
model = Sequential()
|
||||
|
||||
model.add(LSTM(units = 20, return_sequences=True, input_shape=(X_train.shape[1], 1)))
|
||||
model.add(Dropout(0.2))
|
||||
|
||||
#model.add(LSTM(units=50, return_sequences=True))
|
||||
#model.add(Dropout(0.2))
|
||||
|
||||
model.add(LSTM(units=20))
|
||||
model.add(Dropout(0.2))
|
||||
|
||||
model.add(Dense(units=5))
|
||||
model.add(Dropout(0.3))
|
||||
|
||||
model.add(Dense(units=1, activation="sigmoid"))
|
||||
|
||||
return model
|
||||
|
||||
model = LSTM_model()
|
||||
model.summary()
|
||||
model.compile(
|
||||
optimizer="adam",
|
||||
loss="mean_squared_error"
|
||||
)
|
||||
|
||||
#if os.path.exists("./checkpoints/checkpoint"):
|
||||
# model.load_weights("./checkpoints/my_checkpoint")
|
||||
#else:
|
||||
model.fit(
|
||||
X_train,
|
||||
Y_train,
|
||||
shuffle=True,
|
||||
epochs=20,
|
||||
batch_size=20
|
||||
)
|
||||
|
||||
#model.save_weights("./checkpoints/my_checkpoint")
|
||||
|
||||
prediction = model.predict(X_test)
|
||||
print(prediction)
|
||||
print(model.evaluate(X_test, Y_test))
|
||||
#predicted_prices = minmax_scaler.inverse_transform(prediction).flatten()
|
||||
#predicted_rets = sec_scaler.inverse_transform(prediction).flatten()
|
||||
#print(predicted_rets)
|
||||
#counter = 0
|
||||
#for i in range(prediction.shape[0]-1):
|
||||
# if (prediction[i+1,] - prediction[i,] > 0 and predicted_prices[i+1,] - predicted_prices[i,] > 0) or (prediction[i+1,] - prediction[i,] < 0 and predicted_prices[i+1,] - predicted_prices[i,] < 0):
|
||||
# counter = counter + 1
|
||||
|
||||
#print("acc: ", counter/prediction.shape[0])
|
||||
|
||||
|
||||
|
||||
#test_prices = price[time_window - 1 + train_size:]
|
||||
#pred_ret = []
|
||||
#actual_ret = []
|
||||
#for j in range(len(test_prices) - 1):
|
||||
# # il predicted price è il prezzo di domani, lo voglio confrontare con il ritorno effettivo di domani
|
||||
# pred_ret.append((predicted_prices[j] - test_prices[j])/test_prices[j])
|
||||
# actual_ret.append((test_prices[j+1] - test_prices[j])/test_prices[j])
|
||||
#
|
||||
#pred_ret_np = np.array(pred_ret)
|
||||
#actual_ret_np = np.array(actual_ret)
|
||||
#
|
||||
#sign_comp = np.sum(np.sign(pred_ret_np) == np.sign(actual_ret_np))/len(pred_ret_np)
|
||||
#sign_comp_red_nottoomuch = np.sum(np.sign(pred_ret_np[:200]) == np.sign(actual_ret_np[:200]))/len(pred_ret_np[:200])
|
||||
#sign_comp_red = np.sum(np.sign(pred_ret_np[:100]) == np.sign(actual_ret_np[:100]))/len(pred_ret_np[:100])
|
||||
#sign_comp_red_alot = np.sum(np.sign(pred_ret_np[:50]) == np.sign(actual_ret_np[:50]))/len(pred_ret_np[:50])
|
||||
#print(sign_comp)
|
||||
#print(sign_comp_red_nottoomuch)
|
||||
#print(sign_comp_red)
|
||||
#print(sign_comp_red_alot)
|
||||
|
||||
#rmse = calculate_rmse(test_prices[1:], predicted_prices)
|
||||
#mape = calculate_mape(test_prices[1:], predicted_prices)
|
||||
#
|
||||
#print("RMSE: ", rmse)
|
||||
#print("MAPE: ", mape)
|
||||
#
|
||||
#rmse = calculate_rmse(test_prices[1:301], predicted_prices[:300])
|
||||
#mape = calculate_mape(test_prices[1:301], predicted_prices[:300])
|
||||
#
|
||||
#print("RMSE su 300 gg: ", rmse)
|
||||
#print("MAPE su 300 gg: ", mape)
|
||||
|
||||
#plt.plot(pred_ret, color=seshadri[0])
|
||||
#plt.plot(daily_returns[1:], color=seshadri[1])
|
||||
|
||||
fig = plt.figure(1, figsize=(12,10))
|
||||
plt.plot(Y_test, color=seshadri[0], label="Registered Closing Price")
|
||||
plt.plot(prediction, color=seshadri[1], label="Prediction")
|
||||
|
||||
#plot params
|
||||
#plt.xlim([0,450])
|
||||
#plt.ylim([-0.5,16])
|
||||
plt.minorticks_on()
|
||||
plt.tick_params(labelsize=14)
|
||||
plt.tick_params(labelbottom=True, labeltop=False, labelright=False, labelleft=True)
|
||||
#xticks = np.arange(0, 1e4,10)
|
||||
#yticks = np.arange(0,16.1,4)
|
||||
|
||||
plt.tick_params(direction='in',which='minor', length=5, bottom=True, top=True, left=True, right=True)
|
||||
plt.tick_params(direction='in',which='major', length=10, bottom=True, top=True, left=True, right=True)
|
||||
#plt.xticks(xticks)
|
||||
#plt.yticks(yticks)
|
||||
|
||||
|
||||
#plt.text(1,325, f'y={Decimal(coefs[3]):.4f}x$^3$+{Decimal(coefs[2]):.2f}x$^2$+{Decimal(coefs[1]):.2f}x+{Decimal(coefs[0]):.1f}',fontsize =13)
|
||||
|
||||
|
||||
plt.xlabel(r'Days (from last training)', fontsize=14)
|
||||
plt.ylabel(r'Price (USD)',fontsize=14) # label the y axis
|
||||
|
||||
|
||||
plt.legend(fontsize=14, loc="upper right", bbox_to_anchor=(0.99, 0.99)) # add the legend (will default to 'best' location)
|
||||
|
||||
plt.savefig("plots/LSTM_advanced_rets_1.png", dpi=300)
|
||||
|
||||
plt.show()
|
||||
#with open("plots/data/MLP_20_10_5_2.csv", "a") as f:
|
||||
# f.write(f"{time_window};{train_score};{score};\n")
|
|
@ -0,0 +1,230 @@
|
|||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import pandas as pd
|
||||
import seaborn as sns
|
||||
|
||||
import yfinance as yf
|
||||
from datetime import datetime
|
||||
import os, sys
|
||||
|
||||
from sklearn import preprocessing
|
||||
|
||||
from tensorflow.keras.models import Sequential
|
||||
from tensorflow.keras.layers import Dense, Dropout, LSTM, Input
|
||||
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
|
||||
|
||||
#bodacious colors
|
||||
colors=sns.color_palette("rocket", 8)
|
||||
#Ram's colors, if desired
|
||||
seshadri = ['#c3121e', '#0348a1', '#ffb01c', '#027608', '#0193b0', '#9c5300', '#949c01', '#7104b5']
|
||||
# 0sangre, 1neptune, 2pumpkin, 3clover, 4denim, 5cocoa, 6cumin, 7berry
|
||||
|
||||
np.set_printoptions(threshold=1000000)
|
||||
|
||||
def enlarge_lag(to_enlarge, time_window=1):
|
||||
# to_enlarge is the data already present, should be a numpy array
|
||||
enlarged = []
|
||||
for i in range(to_enlarge.shape[0] - time_window + 1):
|
||||
new_element = []
|
||||
for j in range(time_window):
|
||||
new_element.extend(to_enlarge[i + time_window - 1 - j, :])
|
||||
enlarged.append(new_element)
|
||||
|
||||
return np.array(enlarged)
|
||||
|
||||
|
||||
train_quota = 0.8
|
||||
|
||||
if len(sys.argv) > 1:
|
||||
time_window = int(sys.argv[1])
|
||||
else:
|
||||
time_window = 1
|
||||
|
||||
|
||||
stock_data = pd.read_pickle("data/MSFT_data.pkl")
|
||||
|
||||
price = stock_data["Close"].to_numpy()
|
||||
volume = stock_data["Volume"].to_numpy()
|
||||
|
||||
#minmax_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
|
||||
minmax_scaler = preprocessing.StandardScaler()
|
||||
sec_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
|
||||
|
||||
#EMA_20 = stock_data["Close"].ewm(span=20, adjust=False).mean()
|
||||
#EMA_50 = stock_data["Close"].ewm(span=50, adjust=False).mean()
|
||||
#EMAs = np.vstack((EMA_20, EMA_50)).T
|
||||
#norm_EMAs = minmax_scaler.fit_transform(EMAs.reshape(-1, 1)).reshape(-1, 2)
|
||||
|
||||
#EMA_200 = stock_data["Close"].ewm(span=200, adjust=False).mean()
|
||||
#EMAs = np.vstack((EMA_20, EMA_50, EMA_200)).T
|
||||
#norm_EMAs = minmax_scaler.fit_transform(EMAs.reshape(-1, 1)).reshape(-1, 3)
|
||||
|
||||
# Necessary for MAs
|
||||
#norm_features = np.hstack((minmax_scaler.fit_transform(price.reshape(-1, 1)), sec_scaler.fit_transform(volume.reshape(-1, 1))))
|
||||
norm_features = minmax_scaler.fit_transform(np.vstack((price, volume)).T)
|
||||
#norm_features = np.hstack((norm_features, norm_EMAs))
|
||||
|
||||
rets = np.diff(price)
|
||||
bin_rets = np.zeros(len(rets))
|
||||
for i, r in enumerate(rets):
|
||||
if r >= 0:
|
||||
bin_rets[i] = 1
|
||||
else:
|
||||
bin_rets[i] = 0
|
||||
|
||||
bin_rets_np = np.array(bin_rets)
|
||||
|
||||
|
||||
#norm_rets = sec_scaler.fit_transform(rets.reshape(-1, 1))
|
||||
|
||||
print("occai")
|
||||
|
||||
print(rets)
|
||||
print(bin_rets)
|
||||
|
||||
print("ocai")
|
||||
|
||||
# merge data into 2d numpy array
|
||||
#Y = np.zeros(norm_features.shape[0] - 1)
|
||||
#for i in range(Y.size):
|
||||
# Y[i] = norm_features[i+1, 0]
|
||||
|
||||
Y = bin_rets
|
||||
|
||||
time_window = 3
|
||||
|
||||
if time_window > 1:
|
||||
norm_features = enlarge_lag(norm_features, time_window)
|
||||
Y = Y[time_window-1:]
|
||||
|
||||
|
||||
train_size = int(norm_features.shape[0] * 0.8)
|
||||
X_train = norm_features[:train_size, ]
|
||||
Y_train = Y[:train_size].reshape(-1, 1)
|
||||
|
||||
X_test = norm_features[train_size:-1, ]
|
||||
Y_test = Y[train_size:].reshape(-1, 1)
|
||||
|
||||
def LSTM_model():
|
||||
model = Sequential()
|
||||
|
||||
model.add(LSTM(units = 20, input_shape=(X_train.shape[1], 1)))
|
||||
#model.add(Dense(units = 20, activation="relu", input_shape=(X_train.shape[1],)))
|
||||
#model.add(Dropout(0.3))
|
||||
|
||||
#model.add(LSTM(units=50, return_sequences=True))
|
||||
#model.add(Dropout(0.2))
|
||||
|
||||
|
||||
model.add(Dense(units=10, activation="relu"))
|
||||
|
||||
model.add(Dense(units=5, activation="relu"))
|
||||
|
||||
model.add(Dense(units=1, activation="sigmoid"))
|
||||
|
||||
return model
|
||||
|
||||
model = LSTM_model()
|
||||
model.summary()
|
||||
model.compile(
|
||||
optimizer="adam",
|
||||
loss="binary_crossentropy",
|
||||
metrics=['accuracy']
|
||||
)
|
||||
|
||||
#if os.path.exists("./checkpoints/checkpoint"):
|
||||
# model.load_weights("./checkpoints/my_checkpoint")
|
||||
#else:
|
||||
model.fit(
|
||||
X_train,
|
||||
Y_train,
|
||||
shuffle=True,
|
||||
epochs=50,
|
||||
batch_size=32
|
||||
)
|
||||
|
||||
#model.save_weights("./checkpoints/my_checkpoint")
|
||||
|
||||
prediction = model.predict(X_test).flatten()
|
||||
print("pred: ", prediction)
|
||||
print(model.evaluate(X_test, Y_test))
|
||||
#predicted_prices = minmax_scaler.inverse_transform(prediction).flatten()
|
||||
#predicted_rets = sec_scaler.inverse_transform(prediction).flatten()
|
||||
#print(predicted_rets)
|
||||
#counter = 0
|
||||
#for i in range(prediction.shape[0]-1):
|
||||
# if (prediction[i+1,] - prediction[i,] > 0 and predicted_prices[i+1,] - predicted_prices[i,] > 0) or (prediction[i+1,] - prediction[i,] < 0 and predicted_prices[i+1,] - predicted_prices[i,] < 0):
|
||||
# counter = counter + 1
|
||||
|
||||
#print("acc: ", counter/prediction.shape[0])
|
||||
|
||||
|
||||
|
||||
#test_prices = price[time_window - 1 + train_size:]
|
||||
#pred_ret = []
|
||||
#actual_ret = []
|
||||
#for j in range(len(test_prices) - 1):
|
||||
# # il predicted price è il prezzo di domani, lo voglio confrontare con il ritorno effettivo di domani
|
||||
# pred_ret.append((predicted_prices[j] - test_prices[j])/test_prices[j])
|
||||
# actual_ret.append((test_prices[j+1] - test_prices[j])/test_prices[j])
|
||||
#
|
||||
#pred_ret_np = np.array(pred_ret)
|
||||
#actual_ret_np = np.array(actual_ret)
|
||||
#
|
||||
#sign_comp = np.sum(np.sign(pred_ret_np) == np.sign(actual_ret_np))/len(pred_ret_np)
|
||||
#sign_comp_red_nottoomuch = np.sum(np.sign(pred_ret_np[:200]) == np.sign(actual_ret_np[:200]))/len(pred_ret_np[:200])
|
||||
#sign_comp_red = np.sum(np.sign(pred_ret_np[:100]) == np.sign(actual_ret_np[:100]))/len(pred_ret_np[:100])
|
||||
#sign_comp_red_alot = np.sum(np.sign(pred_ret_np[:50]) == np.sign(actual_ret_np[:50]))/len(pred_ret_np[:50])
|
||||
#print(sign_comp)
|
||||
#print(sign_comp_red_nottoomuch)
|
||||
#print(sign_comp_red)
|
||||
#print(sign_comp_red_alot)
|
||||
|
||||
#rmse = calculate_rmse(test_prices[1:], predicted_prices)
|
||||
#mape = calculate_mape(test_prices[1:], predicted_prices)
|
||||
#
|
||||
#print("RMSE: ", rmse)
|
||||
#print("MAPE: ", mape)
|
||||
#
|
||||
#rmse = calculate_rmse(test_prices[1:301], predicted_prices[:300])
|
||||
#mape = calculate_mape(test_prices[1:301], predicted_prices[:300])
|
||||
#
|
||||
#print("RMSE su 300 gg: ", rmse)
|
||||
#print("MAPE su 300 gg: ", mape)
|
||||
|
||||
#plt.plot(pred_ret, color=seshadri[0])
|
||||
#plt.plot(daily_returns[1:], color=seshadri[1])
|
||||
|
||||
fig = plt.figure(1, figsize=(12,10))
|
||||
plt.plot(Y_test, color=seshadri[0], label="Registered Closing Price")
|
||||
plt.plot(prediction, color=seshadri[1], label="Prediction")
|
||||
|
||||
#plot params
|
||||
#plt.xlim([0,450])
|
||||
#plt.ylim([-0.5,16])
|
||||
plt.minorticks_on()
|
||||
plt.tick_params(labelsize=14)
|
||||
plt.tick_params(labelbottom=True, labeltop=False, labelright=False, labelleft=True)
|
||||
#xticks = np.arange(0, 1e4,10)
|
||||
#yticks = np.arange(0,16.1,4)
|
||||
|
||||
plt.tick_params(direction='in',which='minor', length=5, bottom=True, top=True, left=True, right=True)
|
||||
plt.tick_params(direction='in',which='major', length=10, bottom=True, top=True, left=True, right=True)
|
||||
#plt.xticks(xticks)
|
||||
#plt.yticks(yticks)
|
||||
|
||||
|
||||
#plt.text(1,325, f'y={Decimal(coefs[3]):.4f}x$^3$+{Decimal(coefs[2]):.2f}x$^2$+{Decimal(coefs[1]):.2f}x+{Decimal(coefs[0]):.1f}',fontsize =13)
|
||||
|
||||
|
||||
plt.xlabel(r'Days (from last training)', fontsize=14)
|
||||
plt.ylabel(r'Price (USD)',fontsize=14) # label the y axis
|
||||
|
||||
|
||||
plt.legend(fontsize=14, loc="upper right", bbox_to_anchor=(0.99, 0.99)) # add the legend (will default to 'best' location)
|
||||
|
||||
plt.savefig("plots/LSTM_advanced_rets_1.png", dpi=300)
|
||||
|
||||
plt.show()
|
||||
#with open("plots/data/MLP_20_10_5_2.csv", "a") as f:
|
||||
# f.write(f"{time_window};{train_score};{score};\n")
|
|
@ -0,0 +1,119 @@
|
|||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import pandas as pd
|
||||
import seaborn as sns
|
||||
|
||||
import yfinance as yf
|
||||
from datetime import datetime
|
||||
import os, sys
|
||||
|
||||
from sklearn import preprocessing
|
||||
|
||||
#bodacious colors
|
||||
colors=sns.color_palette("rocket", 8)
|
||||
#Ram's colors, if desired
|
||||
seshadri = ['#c3121e', '#0348a1', '#ffb01c', '#027608', '#0193b0', '#9c5300', '#949c01', '#7104b5']
|
||||
# 0sangre, 1neptune, 2pumpkin, 3clover, 4denim, 5cocoa, 6cumin, 7berry
|
||||
|
||||
train_quota = 0.8
|
||||
|
||||
def enlarge_lag(to_enlarge, time_window=1):
|
||||
# to_enlarge is the data already present, should be a numpy array
|
||||
enlarged = []
|
||||
for i in range(to_enlarge.shape[0] - time_window + 1):
|
||||
new_element = []
|
||||
for j in range(time_window):
|
||||
new_element.extend(to_enlarge[i + time_window - 1 - j, :])
|
||||
enlarged.append(new_element)
|
||||
|
||||
return np.array(enlarged)
|
||||
|
||||
|
||||
if len(sys.argv) > 1:
|
||||
time_window = int(sys.argv[1])
|
||||
else:
|
||||
time_window = 1
|
||||
|
||||
#time_window = 10
|
||||
|
||||
stock_data = pd.read_pickle("data/MSFT_data.pkl")
|
||||
|
||||
daily_returns = ((stock_data["Close"] - stock_data["Open"]) / stock_data["Open"]).to_numpy()
|
||||
prices = stock_data[["Open", "High", "Low", "Close"]].to_numpy()
|
||||
volume = stock_data["Volume"].to_numpy()
|
||||
|
||||
minmax_scaler = preprocessing.MinMaxScaler()
|
||||
std_scaler = preprocessing.StandardScaler()
|
||||
|
||||
features = np.vstack((daily_returns, volume)).T
|
||||
|
||||
# Necessary for MAs
|
||||
part_features = std_scaler.fit_transform(features)
|
||||
|
||||
# Aggiunta EMA
|
||||
EMA_20 = stock_data["Close"].ewm(span=20, adjust=False).mean()
|
||||
EMA_50 = stock_data["Close"].ewm(span=50, adjust=False).mean()
|
||||
EMAs = np.vstack((EMA_20, EMA_50)).T
|
||||
norm_EMAs = minmax_scaler.fit_transform(EMAs.reshape(-1, 1)).reshape(-1, 2)
|
||||
|
||||
#EMA_200 = stock_data["Close"].ewm(span=200, adjust=False).mean()
|
||||
#EMAs = np.vstack((EMA_20, EMA_50, EMA_200)).T
|
||||
#norm_EMAs = minmax_scaler.fit_transform(EMAs.reshape(-1, 1)).reshape(-1, 3)
|
||||
norm_features = np.hstack((part_features, norm_EMAs))
|
||||
|
||||
|
||||
# merge data into 2d numpy array
|
||||
Y = np.zeros(features.shape[0] - 1)
|
||||
|
||||
|
||||
for i in range(Y.size):
|
||||
if daily_returns[i+1] >= 0:
|
||||
Y[i] = 1
|
||||
else:
|
||||
Y[i] = 0
|
||||
|
||||
# per quando su usano ma fino a 200
|
||||
#Y = Y[49:]
|
||||
#Y = Y[199:]
|
||||
|
||||
print(norm_features.shape, Y.shape)
|
||||
|
||||
if time_window > 1:
|
||||
norm_features = enlarge_lag(norm_features, time_window)
|
||||
Y = Y[time_window-1:]
|
||||
|
||||
train_size = int(norm_features.shape[0] * 0.8)
|
||||
X_train = norm_features[:train_size, ]
|
||||
Y_train = Y[:train_size]
|
||||
|
||||
X_test = norm_features[train_size:-1, ]
|
||||
Y_test = Y[train_size:]
|
||||
|
||||
|
||||
|
||||
# Iterations vs Accuracy plot
|
||||
#plt.figure()
|
||||
#plt.plot(np.arange(0, len(acc_array)) * 100, acc_array)
|
||||
#plt.xlabel("Iterations")
|
||||
#plt.ylabel("Accuracy")
|
||||
#
|
||||
## Iterations vs Loss plot
|
||||
#plt.figure()
|
||||
#plt.plot(np.arange(0, len(acc_array)) * 100, losses)
|
||||
#plt.xlabel("Iterations")
|
||||
#plt.ylabel("Losses")
|
||||
#
|
||||
#plt.show()
|
||||
|
||||
|
||||
|
||||
#lets try sklearn
|
||||
from sklearn.neural_network import MLPClassifier
|
||||
#classifier = LogisticRegression(random_state=0, solver="saga").fit(X_train, Y_train)
|
||||
clf = MLPClassifier(hidden_layer_sizes=(20,10,5,2), max_iter=30000, verbose=True).fit(X_train, Y_train)
|
||||
train_score = clf.score(X_train, Y_train)
|
||||
score = clf.score(X_test, Y_test)
|
||||
print("sklearn score, all default: ", score, " train ", train_score)
|
||||
|
||||
with open("plots/data/MLP_20_10_5_2.csv", "a") as f:
|
||||
f.write(f"{time_window};{train_score};{score};\n")
|
|
@ -0,0 +1,7 @@
|
|||
# Stock Price Prediction
|
||||
|
||||
This is a simple project of a stock price prediction tool that tests various configurations.
|
||||
|
||||
The final goal is to study performance improvement changing the technology and the methods used.
|
||||
|
||||
Final try
|
|
@ -0,0 +1,145 @@
|
|||
# Appunti sullo sviluppo del progetto
|
||||
|
||||
Il titolo di riferimento per la prima parte di studio è Microsoft. Perché:
|
||||
* molto capitalizzato
|
||||
* longevo
|
||||
* è un titolo tech ma non subisce dinamiche troppo "strane" rispetto al normale andamento di mercato (es. Tesla)
|
||||
|
||||
Per prima cosa si testa la performance di un modello non trainato, semplice, che prova a predire il segno del ritorno del giorno successivo sulla base del ritorno del giorno precedente (+ segue + e - segue -).
|
||||
|
||||
Si riporta anche un piccolo grafico a barre per avere un'idea della distribuzione dei ritorni.
|
||||
|
||||
winrate detected: 0.47638123852445335
|
||||
|
||||
Primi test con logistic regression, aggiungendo come features giorni passati: lieve increase, troppi giorni porta a overfitting
|
||||
|
||||
provare a effettuare lo stesso test ma aggiungendo qualche metrica (es. moving average) -> C'è un lieve miglioramento
|
||||
|
||||
Nell'MLP, nei nomi dei file di dati, i numeri sono la dimensione degli hidden layer, in ordine di profondità
|
||||
Primo test semplice semplice, architettura di seguito:
|
||||
Model: "sequential"
|
||||
_________________________________________________________________
|
||||
Layer (type) Output Shape Param #
|
||||
=================================================================
|
||||
lstm (LSTM) (None, 20, 50) 10400
|
||||
|
||||
dropout (Dropout) (None, 20, 50) 0
|
||||
|
||||
lstm_1 (LSTM) (None, 20, 50) 20200
|
||||
|
||||
dropout_1 (Dropout) (None, 20, 50) 0
|
||||
|
||||
lstm_2 (LSTM) (None, 50) 20200
|
||||
|
||||
dropout_2 (Dropout) (None, 50) 0
|
||||
|
||||
dense (Dense) (None, 1) 51
|
||||
|
||||
=================================================================
|
||||
Total params: 50,851
|
||||
Trainable params: 50,851
|
||||
Non-trainable params: 0
|
||||
semplici 25 epoche e split 0.8 / 0.2
|
||||
il plot che si ottiene è quello
|
||||
|
||||
con dati (win rate sui ritorni):
|
||||
tutto il testing set (): 0.4991624790619765
|
||||
primi 200 giorni: 0.605
|
||||
primi 100 giorni: 0.58
|
||||
primi 50 giorni: 0.66
|
||||
|
||||
su tutto ho
|
||||
RMSE: 76.4 (dollari ?)
|
||||
MAPE: 21.8 %
|
||||
|
||||
su 300 giorni:
|
||||
RMSE su 300 gg: 6.4 $
|
||||
MAPE su 300 gg: 2.9 %
|
||||
|
||||
In presentazione metto prima grafico con mape e rmse su tutto facendo considerazioni, poi mi allargo con mape e rmse specifiche + winrate su meno giorni
|
||||
|
||||
|
||||
nel firts advanced l'architettura è:
|
||||
Model: "sequential"
|
||||
_________________________________________________________________
|
||||
Layer (type) Output Shape Param #
|
||||
=================================================================
|
||||
lstm (LSTM) (None, 10, 10) 480
|
||||
|
||||
dropout (Dropout) (None, 10, 10) 0
|
||||
|
||||
lstm_1 (LSTM) (None, 10) 840
|
||||
|
||||
dropout_1 (Dropout) (None, 10) 0
|
||||
|
||||
dense (Dense) (None, 5) 55
|
||||
|
||||
dropout_2 (Dropout) (None, 5) 0
|
||||
|
||||
dense_1 (Dense) (None, 1) 6
|
||||
|
||||
=================================================================
|
||||
Total params: 1,381
|
||||
Trainable params: 1,381
|
||||
Non-trainable params: 0
|
||||
_________________________________________________________________
|
||||
|
||||
|
||||
LTSM advanced 2: training data ridotta a 2000 giorni, arch:
|
||||
Model: "sequential"
|
||||
_________________________________________________________________
|
||||
Layer (type) Output Shape Param #
|
||||
=================================================================
|
||||
lstm (LSTM) (None, 10, 20) 1760
|
||||
|
||||
dropout (Dropout) (None, 10, 20) 0
|
||||
|
||||
lstm_1 (LSTM) (None, 20) 3280
|
||||
|
||||
dropout_1 (Dropout) (None, 20) 0
|
||||
|
||||
dense (Dense) (None, 5) 105
|
||||
|
||||
dropout_2 (Dropout) (None, 5) 0
|
||||
|
||||
dense_1 (Dense) (None, 1) 6
|
||||
|
||||
=================================================================
|
||||
Total params: 5,151
|
||||
Trainable params: 5,151
|
||||
Non-trainable params: 0
|
||||
|
||||
risultati
|
||||
RMSE: 10.799429328578809
|
||||
MAPE: 3.1894335488381116
|
||||
RMSE su 300 gg: 11.607057105021592
|
||||
MAPE su 300 gg: 3.591834377775106
|
||||
|
||||
training di 50 epoche
|
||||
ma win rate sul ritorno del giorno dopo sempre ~0.5
|
||||
|
||||
|
||||
Il 3 ha un'architettura molto semplificata e tiene una timewindow di soli 5 gg:
|
||||
Model: "sequential"
|
||||
_________________________________________________________________
|
||||
Layer (type) Output Shape Param #
|
||||
=================================================================
|
||||
lstm (LSTM) (None, 10) 480
|
||||
|
||||
dropout (Dropout) (None, 10) 0
|
||||
|
||||
dense (Dense) (None, 1) 11
|
||||
|
||||
=================================================================
|
||||
Total params: 491
|
||||
Trainable params: 491
|
||||
Non-trainable params: 0
|
||||
_________________________________________________________________
|
||||
RMSE: 12.955399161548117
|
||||
MAPE: 3.7480157718302904
|
||||
RMSE su 300 gg: 11.019121338505466
|
||||
MAPE su 300 gg: 3.3382726092879706
|
||||
|
||||
non si guadagna molto in winrate
|
||||
|
||||
semilog histo
|
|
@ -0,0 +1,134 @@
|
|||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import pandas as pd
|
||||
import seaborn as sns
|
||||
|
||||
import yfinance as yf
|
||||
from datetime import datetime
|
||||
import os, sys
|
||||
|
||||
from sklearn import preprocessing
|
||||
|
||||
#bodacious colors
|
||||
colors=sns.color_palette("rocket", 8)
|
||||
#Ram's colors, if desired
|
||||
seshadri = ['#c3121e', '#0348a1', '#ffb01c', '#027608', '#0193b0', '#9c5300', '#949c01', '#7104b5']
|
||||
# 0sangre, 1neptune, 2pumpkin, 3clover, 4denim, 5cocoa, 6cumin, 7berry
|
||||
|
||||
stock_data = pd.read_pickle("data/MSFT_data.pkl")
|
||||
|
||||
daily_returns = ((stock_data["Close"] - stock_data["Open"]) / stock_data["Open"]).to_numpy() * 100
|
||||
prices = stock_data[["Open", "High", "Low", "Close"]].to_numpy()
|
||||
volume = stock_data["Volume"].to_numpy()
|
||||
|
||||
minmax_scaler = preprocessing.MinMaxScaler()
|
||||
std_scaler = preprocessing.StandardScaler()
|
||||
|
||||
features = np.vstack((daily_returns, volume)).T
|
||||
|
||||
# Scale volume data to obtain better results
|
||||
#minmax_scaler = preprocessing.MinMaxScaler()
|
||||
#norm_ret = std_scaler.fit_transform(daily_returns.reshape(-1,1)).flatten()
|
||||
#norm_vol = minmax_scaler.fit_transform(volume.reshape(-1,1)).flatten()
|
||||
#norm_features = np.vstack((norm_ret, norm_vol)).T
|
||||
|
||||
# Solo volumi e ritorni
|
||||
#norm_features = std_scaler.fit_transform(features)
|
||||
|
||||
# Aggiunta di prezzi
|
||||
#norm_prices = minmax_scaler.fit_transform(prices.reshape(-1, 1)).reshape(-1, 4)
|
||||
#norm_ret_and_vol = std_scaler.fit_transform(features)
|
||||
#norm_features = np.hstack((norm_ret_and_vol, norm_prices))
|
||||
|
||||
# Necessary for MAs
|
||||
part_features = std_scaler.fit_transform(features)
|
||||
|
||||
# Aggiunta SMA
|
||||
#SMA_20 = stock_data["Close"].rolling(20).mean().to_numpy()
|
||||
#SMA_50 = stock_data["Close"].rolling(50).mean().to_numpy()
|
||||
#SMA_200 = stock_data["Close"].rolling(200).mean().to_numpy()
|
||||
#SMAs = np.vstack((SMA_20, SMA_50)).T
|
||||
#norm_SMAs = minmax_scaler.fit_transform(SMAs[49:, ].reshape(-1, 1)).reshape(-1, 2)
|
||||
#norm_features = np.hstack((part_features[49:, ], norm_SMAs))
|
||||
|
||||
#SMAs = np.vstack((SMA_20, SMA_50, SMA_200)).T
|
||||
#norm_SMAs = minmax_scaler.fit_transform(SMAs[199:, ].reshape(-1, 1)).reshape(-1, 3)
|
||||
#norm_features = np.hstack((part_features[199:, ], norm_SMAs))
|
||||
|
||||
# Aggiunta EMA
|
||||
EMA_20 = stock_data["Close"].ewm(span=20, adjust=False).mean()
|
||||
EMA_50 = stock_data["Close"].ewm(span=50, adjust=False).mean()
|
||||
EMAs = np.vstack((EMA_20, EMA_50)).T
|
||||
norm_EMAs = minmax_scaler.fit_transform(EMAs.reshape(-1, 1)).reshape(-1, 2)
|
||||
|
||||
#EMA_200 = stock_data["Close"].ewm(span=200, adjust=False).mean()
|
||||
#EMAs = np.vstack((EMA_20, EMA_50, EMA_200)).T
|
||||
#norm_EMAs = minmax_scaler.fit_transform(EMAs.reshape(-1, 1)).reshape(-1, 3)
|
||||
norm_features = np.hstack((part_features, norm_EMAs))
|
||||
|
||||
dfeat = {"Daily Returns" : norm_features[:,0],
|
||||
"Volume" : norm_features[:,1],
|
||||
"EMA20" : norm_features[:,2],
|
||||
"EMA50" : norm_features[:,3]
|
||||
}
|
||||
|
||||
corr = pd.DataFrame(dfeat).corr()
|
||||
fig = plt.figure(1, (11, 10))
|
||||
sns.heatmap(corr, vmin=-1, vmax=1, center=0, cmap="mako")
|
||||
plt.tick_params(labelsize=14)
|
||||
|
||||
plt.savefig("plots/Correlation_EMAs.png", dpi=300)
|
||||
|
||||
# merge data into 2d numpy array
|
||||
Y = np.zeros(features.shape[0] - 1)
|
||||
|
||||
|
||||
for i in range(Y.size):
|
||||
if daily_returns[i+1] >= 0:
|
||||
Y[i] = 1
|
||||
else:
|
||||
Y[i] = 0
|
||||
|
||||
# per quando su usano ma fino a 200
|
||||
#Y = Y[49:]
|
||||
#Y = Y[199:]
|
||||
|
||||
print(norm_features.shape, Y.shape)
|
||||
|
||||
fig, ax = plt.subplots(figsize=(15,10))
|
||||
|
||||
|
||||
#plot params
|
||||
#plt.xlim([-12,12])
|
||||
#plt.ylim([-0.5,16])
|
||||
ax.minorticks_on()
|
||||
ax.tick_params(labelsize=14)
|
||||
ax.tick_params(labelbottom=True, labeltop=False, labelright=False, labelleft=True)
|
||||
#xticks = np.arange(0, 1e4,10)
|
||||
#yticks = np.arange(0,16.1,4)
|
||||
|
||||
ax.tick_params(direction='in',which='minor', length=5, bottom=True, top=True, left=True, right=True)
|
||||
ax.tick_params(direction='in',which='major', length=10, bottom=True, top=True, left=True, right=True)
|
||||
#plt.xticks(xticks)
|
||||
#plt.yticks(yticks)
|
||||
|
||||
|
||||
#plt.text(1,325, f'y={Decimal(coefs[3]):.4f}x$^3$+{Decimal(coefs[2]):.2f}x$^2$+{Decimal(coefs[1]):.2f}x+{Decimal(coefs[0]):.1f}',fontsize =13)
|
||||
|
||||
ax.set_xlim([0, 500])
|
||||
#ax.set_ylim([-0.5, 0.5])
|
||||
|
||||
|
||||
|
||||
|
||||
pd.plotting.autocorrelation_plot(daily_returns, ax=ax, color=seshadri[0], label="Daily Returns")
|
||||
pd.plotting.autocorrelation_plot(np.abs(daily_returns), ax=ax, color=seshadri[1], label="Absolute Daily Returns")
|
||||
pd.plotting.autocorrelation_plot(volume, ax=ax, color=seshadri[2], label="Volume")
|
||||
|
||||
ax.grid(False)
|
||||
ax.set_xlabel(r'Lag', fontsize=14)
|
||||
ax.set_ylabel(r'Autocorrelation',fontsize=14) # label the y axis
|
||||
|
||||
|
||||
ax.legend(fontsize=14, loc="upper right", bbox_to_anchor=(0.99, 0.99)) # add the legend (will default to 'best' location)
|
||||
plt.savefig("plots/Autocorrelation_returns_volume_abs.png", dpi=300)
|
|
@ -0,0 +1,228 @@
|
|||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import pandas as pd
|
||||
import seaborn as sns
|
||||
|
||||
import yfinance as yf
|
||||
from datetime import datetime
|
||||
import os, sys
|
||||
|
||||
from sklearn import preprocessing
|
||||
|
||||
#bodacious colors
|
||||
colors=sns.color_palette("rocket", 8)
|
||||
#Ram's colors, if desired
|
||||
seshadri = ['#c3121e', '#0348a1', '#ffb01c', '#027608', '#0193b0', '#9c5300', '#949c01', '#7104b5']
|
||||
# 0sangre, 1neptune, 2pumpkin, 3clover, 4denim, 5cocoa, 6cumin, 7berry
|
||||
|
||||
train_quota = 0.8
|
||||
|
||||
def enlarge_lag(to_enlarge, time_window=1):
|
||||
# to_enlarge is the data already present, should be a numpy array
|
||||
enlarged = []
|
||||
for i in range(to_enlarge.shape[0] - time_window + 1):
|
||||
new_element = []
|
||||
for j in range(time_window):
|
||||
new_element.extend(to_enlarge[i + time_window - 1 - j, :])
|
||||
enlarged.append(new_element)
|
||||
|
||||
return np.array(enlarged)
|
||||
|
||||
def sigmoid(z):
|
||||
return 1 / (1 + np.exp(-z))
|
||||
|
||||
|
||||
def logreg_inference(x, w, b):
|
||||
z = (x @ w) + b
|
||||
p = sigmoid(z)
|
||||
return p
|
||||
|
||||
|
||||
def cross_entropy(P, Y):
|
||||
return (-Y * np.log(P) - (1 - Y) * np.log(1 - P)).mean()
|
||||
|
||||
|
||||
def logreg_train(X, Y, lambda_, lr = 1e-4, steps=100000):
|
||||
# The training samples are defined as such (each row of X is a sample):
|
||||
# X[0, :] -> Y[0]
|
||||
# X[1, :] -> Y[1]
|
||||
|
||||
m, n = X.shape
|
||||
|
||||
# Initial values for the parameters
|
||||
w = np.zeros(n)
|
||||
b = 0
|
||||
|
||||
# Initial values for the "precedent loss" and "convergence" variables, used to check convergence
|
||||
prec_loss = 0
|
||||
convergence = 0
|
||||
|
||||
for step in range(steps):
|
||||
P = logreg_inference(X, w, b)
|
||||
loss = cross_entropy(P, Y)
|
||||
|
||||
|
||||
if step % 1000 == 0:
|
||||
print(step, loss)
|
||||
|
||||
# Difference between "precedent loss" and "current loss"
|
||||
diff = np.absolute(prec_loss - loss)
|
||||
prec_loss = loss
|
||||
if diff < 0.00001:
|
||||
# If convergence is reached, the algorithm is stopped
|
||||
convergence = step
|
||||
break
|
||||
|
||||
# Derivative of the loss function with respect to bias
|
||||
grad_b = (P - Y).mean()
|
||||
|
||||
# Gradient of the loss function with respect to weights
|
||||
grad_w = (X.T @ (P - Y)) / m
|
||||
|
||||
w -= lr * grad_w
|
||||
b -= lr * grad_b
|
||||
|
||||
# Every 100 iteration the values of accuracy and loss are saved for plotting
|
||||
if step%100 == 0:
|
||||
Yhat = (P > 0.5)
|
||||
acc_array.append((Y == Yhat).mean() * 100)
|
||||
losses.append(loss)
|
||||
|
||||
# Print the iterations needed for convergence before returning
|
||||
print("Convergence = ", convergence)
|
||||
|
||||
return w, b
|
||||
|
||||
|
||||
if len(sys.argv) > 1:
|
||||
time_window = int(sys.argv[1])
|
||||
else:
|
||||
time_window = 1
|
||||
|
||||
#time_window = 10
|
||||
|
||||
stock_data = pd.read_pickle("data/MSFT_data.pkl")
|
||||
|
||||
daily_returns = ((stock_data["Close"] - stock_data["Open"]) / stock_data["Open"]).to_numpy()
|
||||
prices = stock_data[["Open", "High", "Low", "Close"]].to_numpy()
|
||||
volume = stock_data["Volume"].to_numpy()
|
||||
|
||||
minmax_scaler = preprocessing.MinMaxScaler()
|
||||
std_scaler = preprocessing.StandardScaler()
|
||||
|
||||
features = np.vstack((daily_returns, volume)).T
|
||||
|
||||
# Scale volume data to obtain better results
|
||||
#minmax_scaler = preprocessing.MinMaxScaler()
|
||||
#norm_ret = std_scaler.fit_transform(daily_returns.reshape(-1,1)).flatten()
|
||||
#norm_vol = minmax_scaler.fit_transform(volume.reshape(-1,1)).flatten()
|
||||
#norm_features = np.vstack((norm_ret, norm_vol)).T
|
||||
|
||||
# Solo volumi e ritorni
|
||||
#norm_features = std_scaler.fit_transform(features)
|
||||
|
||||
# Aggiunta di prezzi
|
||||
#norm_prices = minmax_scaler.fit_transform(prices.reshape(-1, 1)).reshape(-1, 4)
|
||||
#norm_ret_and_vol = std_scaler.fit_transform(features)
|
||||
#norm_features = np.hstack((norm_ret_and_vol, norm_prices))
|
||||
|
||||
# Necessary for MAs
|
||||
part_features = std_scaler.fit_transform(features)
|
||||
|
||||
# Aggiunta SMA
|
||||
#SMA_20 = stock_data["Close"].rolling(20).mean().to_numpy()
|
||||
#SMA_50 = stock_data["Close"].rolling(50).mean().to_numpy()
|
||||
#SMA_200 = stock_data["Close"].rolling(200).mean().to_numpy()
|
||||
#SMAs = np.vstack((SMA_20, SMA_50)).T
|
||||
#norm_SMAs = minmax_scaler.fit_transform(SMAs[49:, ].reshape(-1, 1)).reshape(-1, 2)
|
||||
#norm_features = np.hstack((part_features[49:, ], norm_SMAs))
|
||||
|
||||
#SMAs = np.vstack((SMA_20, SMA_50, SMA_200)).T
|
||||
#norm_SMAs = minmax_scaler.fit_transform(SMAs[199:, ].reshape(-1, 1)).reshape(-1, 3)
|
||||
#norm_features = np.hstack((part_features[199:, ], norm_SMAs))
|
||||
|
||||
# Aggiunta EMA
|
||||
EMA_20 = stock_data["Close"].ewm(span=20, adjust=False).mean()
|
||||
EMA_50 = stock_data["Close"].ewm(span=50, adjust=False).mean()
|
||||
EMAs = np.vstack((EMA_20, EMA_50)).T
|
||||
norm_EMAs = minmax_scaler.fit_transform(EMAs.reshape(-1, 1)).reshape(-1, 2)
|
||||
|
||||
#EMA_200 = stock_data["Close"].ewm(span=200, adjust=False).mean()
|
||||
#EMAs = np.vstack((EMA_20, EMA_50, EMA_200)).T
|
||||
#norm_EMAs = minmax_scaler.fit_transform(EMAs.reshape(-1, 1)).reshape(-1, 3)
|
||||
norm_features = np.hstack((part_features, norm_EMAs))
|
||||
|
||||
|
||||
# merge data into 2d numpy array
|
||||
Y = np.zeros(features.shape[0] - 1)
|
||||
|
||||
|
||||
for i in range(Y.size):
|
||||
if daily_returns[i+1] >= 0:
|
||||
Y[i] = 1
|
||||
else:
|
||||
Y[i] = 0
|
||||
|
||||
# per quando su usano ma fino a 200
|
||||
#Y = Y[49:]
|
||||
#Y = Y[199:]
|
||||
|
||||
print(norm_features.shape, Y.shape)
|
||||
|
||||
if time_window > 1:
|
||||
norm_features = enlarge_lag(norm_features, time_window)
|
||||
Y = Y[time_window-1:]
|
||||
|
||||
train_size = int(norm_features.shape[0] * 0.8)
|
||||
X_train = norm_features[:train_size, ]
|
||||
Y_train = Y[:train_size]
|
||||
|
||||
X_test = norm_features[train_size:-1, ]
|
||||
Y_test = Y[train_size:]
|
||||
|
||||
#if time_window > 1:
|
||||
# X_train = enlarge_lag(X_train)
|
||||
# Y_train = Y_train[time_window-1:]
|
||||
#
|
||||
# X_test = enlarge_lag(X_test)
|
||||
# Y_test = Y_test[time_window-1:]
|
||||
|
||||
|
||||
# Lists to save accuracy and loss
|
||||
acc_array = []
|
||||
losses = []
|
||||
|
||||
w, b = logreg_train(X_train, Y_train, 0.0, 1e-3, 1000000)
|
||||
print("Weights: ", w)
|
||||
print("Bias: ", b)
|
||||
|
||||
# Iterations vs Accuracy plot
|
||||
#plt.figure()
|
||||
#plt.plot(np.arange(0, len(acc_array)) * 100, acc_array)
|
||||
#plt.xlabel("Iterations")
|
||||
#plt.ylabel("Accuracy")
|
||||
#
|
||||
## Iterations vs Loss plot
|
||||
#plt.figure()
|
||||
#plt.plot(np.arange(0, len(acc_array)) * 100, losses)
|
||||
#plt.xlabel("Iterations")
|
||||
#plt.ylabel("Losses")
|
||||
#
|
||||
#plt.show()
|
||||
# Training accuracy of the model, is the last value recorded in the array
|
||||
print("Training Acc: ", acc_array[-1])
|
||||
|
||||
P_test = logreg_inference(X_test, w, b)
|
||||
Yhat_test = (P_test > 0.5)
|
||||
accuracy_test = (Y_test == Yhat_test).mean()
|
||||
print("Test accuracy: ", 100*accuracy_test)
|
||||
|
||||
|
||||
#lets try sklearn
|
||||
#from sklearn.linear_model import LogisticRegression
|
||||
#classifier = LogisticRegression(random_state=0, solver="saga").fit(X_train, Y_train)
|
||||
#score = classifier.score(X_test, Y_test)
|
||||
#print("sklearn score, all default: ", score)
|
||||
|
||||
with open("plots/data/logistic_regression_EMA_20_50.csv", "a") as f:
|
||||
f.write(f"{time_window};{acc_array[-1]};{accuracy_test};\n")
|
|
@ -0,0 +1,203 @@
|
|||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import pandas as pd
|
||||
import seaborn as sns
|
||||
|
||||
import yfinance as yf
|
||||
from datetime import datetime
|
||||
import os, sys
|
||||
|
||||
from sklearn import preprocessing
|
||||
|
||||
#bodacious colors
|
||||
colors=sns.color_palette("rocket", 8)
|
||||
#Ram's colors, if desired
|
||||
seshadri = ['#c3121e', '#0348a1', '#ffb01c', '#027608', '#0193b0', '#9c5300', '#949c01', '#7104b5']
|
||||
# 0sangre, 1neptune, 2pumpkin, 3clover, 4denim, 5cocoa, 6cumin, 7berry
|
||||
|
||||
train_quota = 0.8
|
||||
|
||||
def enlarge_lag(to_enlarge, time_window=1):
|
||||
# to_enlarge is the data already present, should be a numpy array
|
||||
enlarged = []
|
||||
for i in range(to_enlarge.shape[0] - time_window + 1):
|
||||
new_element = []
|
||||
for j in range(time_window):
|
||||
new_element.extend(to_enlarge[i + time_window - 1 - j, :])
|
||||
enlarged.append(new_element)
|
||||
|
||||
return np.array(enlarged)
|
||||
|
||||
def sigmoid(z):
|
||||
return 1 / (1 + np.exp(-z))
|
||||
|
||||
|
||||
def logreg_inference(x, w, b):
|
||||
z = (x @ w) + b
|
||||
p = sigmoid(z)
|
||||
return p
|
||||
|
||||
|
||||
def cross_entropy(P, Y):
|
||||
return (-Y * np.log(P) - (1 - Y) * np.log(1 - P)).mean()
|
||||
|
||||
|
||||
def logreg_train(X, Y, lambda_, lr = 1e-4, steps=100000):
|
||||
# The training samples are defined as such (each row of X is a sample):
|
||||
# X[0, :] -> Y[0]
|
||||
# X[1, :] -> Y[1]
|
||||
|
||||
m, n = X.shape
|
||||
|
||||
# Initial values for the parameters
|
||||
w = np.zeros(n)
|
||||
b = 0
|
||||
|
||||
# Initial values for the "precedent loss" and "convergence" variables, used to check convergence
|
||||
prec_loss = 0
|
||||
convergence = 0
|
||||
|
||||
for step in range(steps):
|
||||
P = logreg_inference(X, w, b)
|
||||
loss = cross_entropy(P, Y)
|
||||
|
||||
|
||||
if step % 1000 == 0:
|
||||
print(step, loss)
|
||||
|
||||
# Difference between "precedent loss" and "current loss"
|
||||
diff = np.absolute(prec_loss - loss)
|
||||
prec_loss = loss
|
||||
if diff < 0.00001:
|
||||
# If convergence is reached, the algorithm is stopped
|
||||
convergence = step
|
||||
break
|
||||
|
||||
# Derivative of the loss function with respect to bias
|
||||
grad_b = (P - Y).mean()
|
||||
|
||||
# Gradient of the loss function with respect to weights
|
||||
grad_w = (X.T @ (P - Y)) / m
|
||||
|
||||
w -= lr * grad_w
|
||||
b -= lr * grad_b
|
||||
|
||||
# Every 100 iteration the values of accuracy and loss are saved for plotting
|
||||
if step%100 == 0:
|
||||
Yhat = (P > 0.5)
|
||||
acc_array.append((Y == Yhat).mean() * 100)
|
||||
losses.append(loss)
|
||||
|
||||
# Print the iterations needed for convergence before returning
|
||||
print("Convergence = ", convergence)
|
||||
|
||||
return w, b
|
||||
|
||||
|
||||
if len(sys.argv) > 1:
|
||||
time_window = int(sys.argv[1])
|
||||
else:
|
||||
time_window = 1
|
||||
|
||||
#time_window = 10
|
||||
|
||||
stock_data = pd.read_pickle("data/MSFT_data.pkl")
|
||||
|
||||
daily_returns = ((stock_data["Close"] - stock_data["Open"]) / stock_data["Open"]).to_numpy()
|
||||
prices = stock_data[["Open", "High", "Low", "Close"]].to_numpy()
|
||||
volume = stock_data["Volume"].to_numpy()
|
||||
|
||||
minmax_scaler = preprocessing.MinMaxScaler()
|
||||
std_scaler = preprocessing.StandardScaler()
|
||||
|
||||
features = np.vstack((daily_returns, volume)).T
|
||||
|
||||
# Necessary for MAs
|
||||
part_features = std_scaler.fit_transform(features)
|
||||
|
||||
# merge data into 2d numpy array
|
||||
Y = np.zeros(features.shape[0] - 1)
|
||||
|
||||
for i in range(Y.size):
|
||||
if daily_returns[i+1] >= 0:
|
||||
Y[i] = 1
|
||||
else:
|
||||
Y[i] = 0
|
||||
|
||||
import copy
|
||||
|
||||
if time_window > 1:
|
||||
large_rets = enlarge_lag(part_features[:, 0].reshape(-1, 1), time_window)
|
||||
Y = Y[time_window-1:]
|
||||
else:
|
||||
large_rets = copy.deepcopy(part_features[:, 0].reshape(-1, 1))
|
||||
|
||||
part_features = np.hstack((large_rets, part_features[time_window-1:, 1].reshape(-1, 1)))
|
||||
|
||||
|
||||
# Aggiunta EMA
|
||||
EMA_20 = stock_data["Close"].ewm(span=20, adjust=False).mean()
|
||||
EMA_50 = stock_data["Close"].ewm(span=50, adjust=False).mean()
|
||||
EMAs = np.vstack((EMA_20, EMA_50)).T
|
||||
norm_EMAs = minmax_scaler.fit_transform(EMAs.reshape(-1, 1)).reshape(-1, 2)
|
||||
|
||||
norm_features = np.hstack((part_features, norm_EMAs[time_window-1:,]))
|
||||
|
||||
|
||||
print(norm_features.shape, Y.shape)
|
||||
|
||||
|
||||
|
||||
train_size = int(norm_features.shape[0] * 0.8)
|
||||
X_train = norm_features[:train_size, ]
|
||||
Y_train = Y[:train_size]
|
||||
|
||||
X_test = norm_features[train_size:-1, ]
|
||||
Y_test = Y[train_size:]
|
||||
|
||||
#if time_window > 1:
|
||||
# X_train = enlarge_lag(X_train)
|
||||
# Y_train = Y_train[time_window-1:]
|
||||
#
|
||||
# X_test = enlarge_lag(X_test)
|
||||
# Y_test = Y_test[time_window-1:]
|
||||
|
||||
|
||||
# Lists to save accuracy and loss
|
||||
acc_array = []
|
||||
losses = []
|
||||
|
||||
w, b = logreg_train(X_train, Y_train, 0.0, 1e-3, 1000000)
|
||||
print("Weights: ", w)
|
||||
print("Bias: ", b)
|
||||
|
||||
# Iterations vs Accuracy plot
|
||||
#plt.figure()
|
||||
#plt.plot(np.arange(0, len(acc_array)) * 100, acc_array)
|
||||
#plt.xlabel("Iterations")
|
||||
#plt.ylabel("Accuracy")
|
||||
#
|
||||
## Iterations vs Loss plot
|
||||
#plt.figure()
|
||||
#plt.plot(np.arange(0, len(acc_array)) * 100, losses)
|
||||
#plt.xlabel("Iterations")
|
||||
#plt.ylabel("Losses")
|
||||
#
|
||||
#plt.show()
|
||||
# Training accuracy of the model, is the last value recorded in the array
|
||||
print("Training Acc: ", acc_array[-1])
|
||||
|
||||
P_test = logreg_inference(X_test, w, b)
|
||||
Yhat_test = (P_test > 0.5)
|
||||
accuracy_test = (Y_test == Yhat_test).mean()
|
||||
print("Test accuracy: ", 100*accuracy_test)
|
||||
|
||||
|
||||
#lets try sklearn
|
||||
#from sklearn.linear_model import LogisticRegression
|
||||
#classifier = LogisticRegression(random_state=0, solver="saga").fit(X_train, Y_train)
|
||||
#score = classifier.score(X_test, Y_test)
|
||||
#print("sklearn score, all default: ", score)
|
||||
|
||||
with open("plots/data/logistic_regression_EMA_20_50_only_daily_enlarged.csv", "a") as f:
|
||||
f.write(f"{time_window};{acc_array[-1]};{accuracy_test};\n")
|
|
@ -0,0 +1,170 @@
|
|||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import pandas as pd
|
||||
import seaborn as sns
|
||||
|
||||
import yfinance as yf
|
||||
from datetime import datetime
|
||||
import os, sys
|
||||
|
||||
from sklearn import preprocessing
|
||||
|
||||
#bodacious colors
|
||||
colors=sns.color_palette("rocket", 8)
|
||||
#Ram's colors, if desired
|
||||
seshadri = ['#c3121e', '#0348a1', '#ffb01c', '#027608', '#0193b0', '#9c5300', '#949c01', '#7104b5']
|
||||
# 0sangre, 1neptune, 2pumpkin, 3clover, 4denim, 5cocoa, 6cumin, 7berry
|
||||
|
||||
train_quota = 0.8
|
||||
|
||||
def enlarge_lag(to_enlarge, time_window=1):
|
||||
# to_enlarge is the data already present, should be a numpy array
|
||||
enlarged = []
|
||||
for i in range(to_enlarge.shape[0] - time_window + 1):
|
||||
new_element = []
|
||||
for j in range(time_window):
|
||||
new_element.extend(to_enlarge[i + time_window - 1 - j, :])
|
||||
enlarged.append(new_element)
|
||||
|
||||
return np.array(enlarged)
|
||||
|
||||
def sigmoid(z):
|
||||
return 1 / (1 + np.exp(-z))
|
||||
|
||||
|
||||
def logreg_inference(x, w, b):
|
||||
z = (x @ w) + b
|
||||
p = sigmoid(z)
|
||||
return p
|
||||
|
||||
|
||||
def cross_entropy(P, Y):
|
||||
return (-Y * np.log(P) - (1 - Y) * np.log(1 - P)).mean()
|
||||
|
||||
|
||||
def logreg_train(X, Y, lambda_, lr = 1e-4, steps=100000):
|
||||
# The training samples are defined as such (each row of X is a sample):
|
||||
# X[0, :] -> Y[0]
|
||||
# X[1, :] -> Y[1]
|
||||
|
||||
m, n = X.shape
|
||||
|
||||
# Initial values for the parameters
|
||||
w = np.zeros(n)
|
||||
b = 0
|
||||
|
||||
# Initial values for the "precedent loss" and "convergence" variables, used to check convergence
|
||||
prec_loss = 0
|
||||
convergence = 0
|
||||
|
||||
for step in range(steps):
|
||||
P = logreg_inference(X, w, b)
|
||||
loss = cross_entropy(P, Y)
|
||||
|
||||
|
||||
if step % 1000 == 0:
|
||||
print(step, loss)
|
||||
|
||||
# Difference between "precedent loss" and "current loss"
|
||||
diff = np.absolute(prec_loss - loss)
|
||||
prec_loss = loss
|
||||
if diff < 0.00001:
|
||||
# If convergence is reached, the algorithm is stopped
|
||||
convergence = step
|
||||
break
|
||||
|
||||
# Derivative of the loss function with respect to bias
|
||||
grad_b = (P - Y).mean()
|
||||
|
||||
# Gradient of the loss function with respect to weights
|
||||
grad_w = (X.T @ (P - Y)) / m
|
||||
|
||||
w -= lr * grad_w
|
||||
b -= lr * grad_b
|
||||
|
||||
# Every 100 iteration the values of accuracy and loss are saved for plotting
|
||||
if step%100 == 0:
|
||||
Yhat = (P > 0.5)
|
||||
acc_array.append((Y == Yhat).mean() * 100)
|
||||
losses.append(loss)
|
||||
|
||||
# Print the iterations needed for convergence before returning
|
||||
print("Convergence = ", convergence)
|
||||
|
||||
return w, b
|
||||
|
||||
|
||||
if len(sys.argv) > 1:
|
||||
time_window = int(sys.argv[1])
|
||||
else:
|
||||
time_window = 1
|
||||
|
||||
#time_window = 10
|
||||
|
||||
stock_data = pd.read_pickle("data/MSFT_data.pkl")
|
||||
|
||||
daily_returns = ((stock_data["Close"] - stock_data["Open"]) / stock_data["Open"]).to_numpy().reshape(-1,1)
|
||||
|
||||
|
||||
# merge data into 2d numpy array
|
||||
Y = np.zeros(daily_returns.shape[0] - 1)
|
||||
|
||||
print(daily_returns.shape, Y.shape)
|
||||
|
||||
for i in range(Y.size):
|
||||
if daily_returns[i+1] >= 0:
|
||||
Y[i] = 1
|
||||
else:
|
||||
Y[i] = 0
|
||||
import copy
|
||||
norm_features = copy.deepcopy(daily_returns)
|
||||
if time_window > 1:
|
||||
norm_features = enlarge_lag(norm_features, time_window)
|
||||
Y = Y[time_window-1:]
|
||||
|
||||
train_size = int(norm_features.shape[0] * 0.8)
|
||||
X_train = norm_features[:train_size, ]
|
||||
Y_train = Y[:train_size]
|
||||
|
||||
X_test = norm_features[train_size:-1, ]
|
||||
Y_test = Y[train_size:]
|
||||
|
||||
|
||||
# Lists to save accuracy and loss
|
||||
acc_array = []
|
||||
losses = []
|
||||
|
||||
w, b = logreg_train(X_train, Y_train, 0.0, 1e-3, 1000000)
|
||||
print("Weights: ", w)
|
||||
print("Bias: ", b)
|
||||
|
||||
# Iterations vs Accuracy plot
|
||||
#plt.figure()
|
||||
#plt.plot(np.arange(0, len(acc_array)) * 100, acc_array)
|
||||
#plt.xlabel("Iterations")
|
||||
#plt.ylabel("Accuracy")
|
||||
#
|
||||
## Iterations vs Loss plot
|
||||
#plt.figure()
|
||||
#plt.plot(np.arange(0, len(acc_array)) * 100, losses)
|
||||
#plt.xlabel("Iterations")
|
||||
#plt.ylabel("Losses")
|
||||
#
|
||||
#plt.show()
|
||||
# Training accuracy of the model, is the last value recorded in the array
|
||||
print("Training Acc: ", acc_array[-1])
|
||||
|
||||
P_test = logreg_inference(X_test, w, b)
|
||||
Yhat_test = (P_test > 0.5)
|
||||
accuracy_test = (Y_test == Yhat_test).mean()
|
||||
print("Test accuracy: ", 100*accuracy_test)
|
||||
|
||||
|
||||
#lets try sklearn
|
||||
#from sklearn.linear_model import LogisticRegression
|
||||
#classifier = LogisticRegression(random_state=0, solver="saga").fit(X_train, Y_train)
|
||||
#score = classifier.score(X_test, Y_test)
|
||||
#print("sklearn score, all default: ", score)
|
||||
|
||||
with open("plots/data/logistic_regression_only_rets.csv", "a") as f:
|
||||
f.write(f"{time_window};{acc_array[-1]};{accuracy_test};\n")
|
|
@ -0,0 +1,8 @@
|
|||
#!/bin/bash
|
||||
|
||||
for i in $(seq 1 50);
|
||||
do
|
||||
echo "Running with time window $i"
|
||||
python3 logistic_regression_enlarge_only_rets.py $i
|
||||
done
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
#!/bin/bash
|
||||
|
||||
for i in $(seq 1 50);
|
||||
do
|
||||
echo "Running with time window $i"
|
||||
python3 MultiLayer_Perceptron.py $i
|
||||
done
|
||||
|
After Width: | Height: | Size: 355 KiB |
After Width: | Height: | Size: 510 KiB |
After Width: | Height: | Size: 120 KiB |
After Width: | Height: | Size: 358 KiB |
After Width: | Height: | Size: 366 KiB |
After Width: | Height: | Size: 408 KiB |
After Width: | Height: | Size: 438 KiB |
After Width: | Height: | Size: 452 KiB |
After Width: | Height: | Size: 452 KiB |
After Width: | Height: | Size: 504 KiB |
After Width: | Height: | Size: 462 KiB |
After Width: | Height: | Size: 316 KiB |
After Width: | Height: | Size: 276 KiB |
After Width: | Height: | Size: 142 KiB |
After Width: | Height: | Size: 130 KiB |
|
@ -0,0 +1,51 @@
|
|||
time_window;training_accuracy;testing_accuracy;
|
||||
1;0.5157521385353641;0.5325542570951586;
|
||||
2;0.5070951585976627;0.5317195325542571;
|
||||
3;0.6231218697829716;0.48955722639933164;
|
||||
4;0.6103109997912753;0.4653299916457811;
|
||||
5;0.6471816283924844;0.4903926482873851;
|
||||
6;0.6604719148047609;0.4928989139515455;
|
||||
7;0.6641604010025063;0.5137844611528822;
|
||||
8;0.7013366750208856;0.520066889632107;
|
||||
9;0.6897848339252142;0.48327759197324416;
|
||||
10;0.6136648558295027;0.46321070234113715;
|
||||
11;0.6587251828631139;0.4866220735785953;
|
||||
12;0.7215719063545151;0.5259197324414716;
|
||||
13;0.6684782608695652;0.4811715481171548;
|
||||
14;0.748693288730922;0.5188284518828452;
|
||||
15;0.740694270179841;0.5154811715481171;
|
||||
16;0.6801924283622673;0.47447698744769873;
|
||||
17;0.7684100418410041;0.5238493723849372;
|
||||
18;0.7566945606694561;0.5008375209380235;
|
||||
19;0.7928436911487758;0.5309882747068677;
|
||||
20;0.8139388865634156;0.49413735343383586;
|
||||
21;0.6206824366757379;0.5301507537688442;
|
||||
22;0.7889447236180904;0.507537688442211;
|
||||
23;0.6379815745393634;0.4660519698239732;
|
||||
24;0.6751832460732984;0.5071248952221291;
|
||||
25;0.6258902387934646;0.46437552388935455;
|
||||
26;0.7301487534045673;0.49706621961441744;
|
||||
27;0.6787510477787091;0.46689019279128247;
|
||||
28;0.8658843252305113;0.47651006711409394;
|
||||
29;0.7048836721861245;0.535234899328859;
|
||||
30;0.8633123689727463;0.5075503355704698;
|
||||
31;0.8228140071293772;0.5067114093959731;
|
||||
32;0.8181627516778524;0.5058724832214765;
|
||||
33;0.6447147651006712;0.5104953820319059;
|
||||
34;0.8833647996643591;0.4802686817800168;
|
||||
35;0.8063365505665128;0.5071368597816961;
|
||||
36;0.8818467995802728;0.5146935348446684;
|
||||
37;0.6358102434928632;0.5331654072208228;
|
||||
38;0.8717464315701091;0.5117647058823529;
|
||||
39;0.9048918748687802;0.47394957983193275;
|
||||
40;0.6698866022679546;0.5319327731092437;
|
||||
41;0.6784289014912833;0.4689075630252101;
|
||||
42;0.7008403361344537;0.5210084033613446;
|
||||
43;0.8613445378151261;0.49705634987384356;
|
||||
44;0.7066610632485817;0.5021026072329688;
|
||||
45;0.6353509878100042;0.5365853658536586;
|
||||
46;0.6771074206432626;0.5256518082422204;
|
||||
47;0.5971404541631623;0.47434819175777965;
|
||||
48;0.8092935239697224;0.4983164983164983;
|
||||
49;0.9335436382754995;0.4983164983164983;
|
||||
50;0.9091291543962978;0.5244107744107744;
|
|
|
@ -0,0 +1,51 @@
|
|||
time_window;training_accuracy;testing_accuracy;
|
||||
1;0.547673690799082;0.5217028380634391;
|
||||
2;0.6400250417362271;0.508347245409015;
|
||||
3;0.715567612687813;0.5472013366750209;
|
||||
4;0.7424337299102484;0.4803675856307435;
|
||||
5;0.7749478079331942;0.48370927318295737;
|
||||
6;0.8275214032157027;0.4954051796157059;
|
||||
7;0.8105680868838764;0.4928989139515455;
|
||||
8;0.8335421888053467;0.504180602006689;
|
||||
9;0.8272404428660957;0.49414715719063546;
|
||||
10;0.8763058921855411;0.4891304347826087;
|
||||
11;0.864158829676071;0.5150501672240803;
|
||||
12;0.8858695652173914;0.5066889632107023;
|
||||
13;0.9015468227424749;0.5213389121338912;
|
||||
14;0.8873092201547146;0.5129707112970712;
|
||||
15;0.90987034713509;0.5288702928870292;
|
||||
16;0.9115247856097051;0.5171548117154812;
|
||||
17;0.9156903765690376;0.47280334728033474;
|
||||
18;0.9217573221757323;0.5117252931323283;
|
||||
19;0.9378531073446328;0.48576214405360135;
|
||||
20;0.9158643784010047;0.474036850921273;
|
||||
21;0.9522712999790663;0.4949748743718593;
|
||||
22;0.9711055276381909;0.5293132328308208;
|
||||
23;0.9384422110552764;0.5037720033528919;
|
||||
24;0.9759162303664921;0.5155071248952221;
|
||||
25;0.9733975701717638;0.5146689019279128;
|
||||
26;0.9664781060129898;0.48365465213746855;
|
||||
27;0.972338642078793;0.5037720033528919;
|
||||
28;0.9867979882648784;0.4714765100671141;
|
||||
29;0.9811360301823517;0.4790268456375839;
|
||||
30;0.9656184486373166;0.5201342281879194;
|
||||
31;0.9746278045711889;0.4983221476510067;
|
||||
32;0.9351929530201343;0.5192953020134228;
|
||||
33;0.9729446308724832;0.4903442485306465;
|
||||
34;0.9815397524648626;0.48446683459277917;
|
||||
35;0.9326479227864037;0.5088161209068011;
|
||||
36;0.985099685204617;0.4945424013434089;
|
||||
37;0.9685138539042821;0.5155331654072208;
|
||||
38;0.9901343408900084;0.4613445378151261;
|
||||
39;0.9494016376233466;0.5042016806722689;
|
||||
40;0.9647207055858883;0.5100840336134453;
|
||||
41;0.9798361688720857;0.5;
|
||||
42;0.992436974789916;0.5058823529411764;
|
||||
43;0.9897058823529412;0.49032800672834315;
|
||||
44;0.9815087203193948;0.5172413793103449;
|
||||
45;0.9836065573770492;0.5029436501261564;
|
||||
46;0.9882278747109523;0.496215306980656;
|
||||
47;0.9960050462573591;0.5273338940285954;
|
||||
48;1.0;0.5025252525252525;
|
||||
49;0.9707676130389065;0.49326599326599324;
|
||||
50;0.9728649558266723;0.4941077441077441;
|
|
|
@ -0,0 +1,51 @@
|
|||
time_window;training_accuracy;testing_accuracy;
|
||||
1;0.5366158981848529;0.5217028380634391;
|
||||
2;0.5899415692821369;0.5350584307178631;
|
||||
3;0.6750834724540902;0.5087719298245614;
|
||||
4;0.7261532039240242;0.48120300751879697;
|
||||
5;0.7701461377870563;0.49958228905597324;
|
||||
6;0.7584046773856755;0.5087719298245614;
|
||||
7;0.7644110275689223;0.5430242272347535;
|
||||
8;0.8398078529657477;0.49414715719063546;
|
||||
9;0.8748694380614164;0.5016722408026756;
|
||||
10;0.9352277475971584;0.47240802675585286;
|
||||
11;0.9264367816091954;0.4807692307692308;
|
||||
12;0.9423076923076923;0.520066889632107;
|
||||
13;0.9412625418060201;0.4794979079497908;
|
||||
14;0.9272423165377378;0.507949790794979;
|
||||
15;0.958594730238394;0.5196652719665272;
|
||||
16;0.9811754862999372;0.497071129707113;
|
||||
17;0.992887029288703;0.5263598326359833;
|
||||
18;0.9947698744769874;0.509212730318258;
|
||||
19;0.9922577945176815;0.49581239530988275;
|
||||
20;0.9945583926329008;0.525963149078727;
|
||||
21;0.9920452166631777;0.509212730318258;
|
||||
22;0.9972780569514238;0.5117252931323283;
|
||||
23;0.9886934673366834;0.49958088851634536;
|
||||
24;0.9912041884816754;0.5046102263202011;
|
||||
25;0.9981147884373691;0.509639564124057;
|
||||
26;0.9974858579509742;0.5004191114836547;
|
||||
27;0.9945515507124896;0.5297569153394803;
|
||||
28;1.0;0.5033557046979866;
|
||||
29;0.9997904003353595;0.5260067114093959;
|
||||
30;0.99958071278826;0.5243288590604027;
|
||||
31;1.0;0.47818791946308725;
|
||||
32;1.0;0.5151006711409396;
|
||||
33;0.9991610738255033;0.5264483627204031;
|
||||
34;1.0;0.4979009235936188;
|
||||
35;0.9987410826689047;0.5029387069689337;
|
||||
36;1.0;0.5188916876574308;
|
||||
37;1.0;0.4869857262804366;
|
||||
38;1.0;0.49411764705882355;
|
||||
39;1.0;0.5117647058823529;
|
||||
40;1.0;0.5243697478991597;
|
||||
41;1.0;0.5184873949579832;
|
||||
42;0.9997899159663866;0.49747899159663866;
|
||||
43;1.0;0.5063078216989066;
|
||||
44;1.0;0.48107653490328006;
|
||||
45;1.0;0.4920100925147183;
|
||||
46;1.0;0.5088309503784693;
|
||||
47;0.9997897392767031;0.4953742640874685;
|
||||
48;1.0;0.515993265993266;
|
||||
49;1.0;0.4772727272727273;
|
||||
50;1.0;0.5067340067340067;
|
|
|
@ -0,0 +1,51 @@
|
|||
time_window;training_accuracy;testing_accuracy;
|
||||
1;52.075944085124135;0.5367278797996661;
|
||||
2;51.81552587646077;0.5392320534223706;
|
||||
3;51.982470784641066;0.5380116959064327;
|
||||
4;52.118555625130455;0.545530492898914;
|
||||
5;52.4008350730689;0.5396825396825397;
|
||||
6;52.41177698893297;0.5338345864661654;
|
||||
7;52.903091060985794;0.5405179615705932;
|
||||
8;52.75689223057645;0.5392976588628763;
|
||||
9;53.45728013369543;0.540133779264214;
|
||||
10;53.5311324697033;0.5409698996655519;
|
||||
11;53.45872518286311;0.5384615384615384;
|
||||
12;54.117892976588635;0.5351170568561873;
|
||||
13;54.0133779264214;0.5364016736401673;
|
||||
14;54.129207610286436;0.5338912133891214;
|
||||
15;53.95232120451694;0.5364016736401673;
|
||||
16;54.1518510771805;0.5338912133891214;
|
||||
17;54.16317991631799;0.5372384937238494;
|
||||
18;53.80753138075314;0.533500837520938;
|
||||
19;53.65139150449885;0.5343383584589615;
|
||||
20;54.227710339054;0.5284757118927973;
|
||||
21;54.30186309399204;0.5309882747068677;
|
||||
22;54.25041876046901;0.5293132328308208;
|
||||
23;54.14572864321608;0.5305951383067896;
|
||||
24;54.575916230366495;0.5322715842414082;
|
||||
25;54.18935902806871;0.5305951383067896;
|
||||
26;54.03310287031218;0.5314333612740989;
|
||||
27;54.37971500419112;0.5255658005029338;
|
||||
28;54.715004191114836;0.5285234899328859;
|
||||
29;54.51687277300357;0.5310402684563759;
|
||||
30;54.75890985324947;0.5302013422818792;
|
||||
31;54.41392325435102;0.5293624161073825;
|
||||
32;55.39010067114094;0.5209731543624161;
|
||||
33;55.45302013422819;0.5256087321578505;
|
||||
34;55.67442836165303;0.5222502099076406;
|
||||
35;55.686109945446916;0.5155331654072208;
|
||||
36;56.222455403987404;0.5205709487825357;
|
||||
37;56.549118387909324;0.5163727959697733;
|
||||
38;56.27623845507976;0.519327731092437;
|
||||
39;56.0781020365316;0.5134453781512605;
|
||||
40;56.425871482570344;0.5134453781512605;
|
||||
41;56.2906952320941;0.5117647058823529;
|
||||
42;56.596638655462186;0.5134453781512605;
|
||||
43;56.84873949579832;0.5138772077375946;
|
||||
44;56.81865938222316;0.5096719932716569;
|
||||
45;56.36822194199244;0.5105130361648444;
|
||||
46;56.54824469203279;0.511354079058032;
|
||||
47;56.53910849453322;0.511354079058032;
|
||||
48;56.64423885618166;0.5126262626262627;
|
||||
49;56.88748685594112;0.5126262626262627;
|
||||
50;56.710138830458554;0.5058922558922558;
|
|
|
@ -0,0 +1,51 @@
|
|||
time_window;training_accuracy;testing_accuracy;
|
||||
1;51.99248904652618;0.5375626043405676;
|
||||
2;51.77378964941569;0.5375626043405676;
|
||||
3;51.982470784641066;0.5396825396825397;
|
||||
4;51.99332080985181;0.5405179615705932;
|
||||
5;52.35908141962422;0.5355054302422724;
|
||||
6;52.59970766339528;0.5304928989139516;
|
||||
7;52.86131996658312;0.5388471177944862;
|
||||
8;52.673350041771094;0.5426421404682275;
|
||||
9;53.39461040317527;0.5418060200668896;
|
||||
10;53.51023819473464;0.540133779264214;
|
||||
11;53.396029258098224;0.544314381270903;
|
||||
12;54.03428093645485;0.5409698996655519;
|
||||
13;53.992474916387955;0.5414225941422595;
|
||||
14;54.10830022998119;0.5430962343096234;
|
||||
15;54.01505646173149;0.5422594142259414;
|
||||
16;54.17276720351391;0.5422594142259414;
|
||||
17;54.16317991631799;0.5405857740585774;
|
||||
18;53.661087866108794;0.5435510887772195;
|
||||
19;53.7769407825905;0.5452261306532663;
|
||||
20;54.39514441188782;0.541038525963149;
|
||||
21;54.17626125183169;0.5385259631490787;
|
||||
22;54.10385259631491;0.5402010050251256;
|
||||
23;54.29229480737019;0.5406538139145013;
|
||||
24;54.51308900523561;0.5389773679798826;
|
||||
25;54.18935902806871;0.539815590947192;
|
||||
26;53.86549340037712;0.5406538139145013;
|
||||
27;54.48449287510477;0.539815590947192;
|
||||
28;54.882648784576695;0.5461409395973155;
|
||||
29;54.47495284007545;0.5394295302013423;
|
||||
30;54.67505241090147;0.5461409395973155;
|
||||
31;54.246173201929125;0.5411073825503355;
|
||||
32;55.45302013422819;0.5444630872483222;
|
||||
33;55.369127516778526;0.5373635600335852;
|
||||
34;55.695405915670236;0.5197313182199832;
|
||||
35;55.74905581200168;0.5214105793450882;
|
||||
36;56.306400839454355;0.5289672544080605;
|
||||
37;56.42317380352645;0.5214105793450882;
|
||||
38;56.507136859781696;0.5176470588235295;
|
||||
39;56.330044089859335;0.5176470588235295;
|
||||
40;56.32087358252835;0.5260504201680672;
|
||||
41;56.20667926906112;0.5235294117647059;
|
||||
42;56.57563025210084;0.5184873949579832;
|
||||
43;56.72268907563025;0.5180824222035324;
|
||||
44;56.881697835679766;0.5147182506307821;
|
||||
45;56.32618747372846;0.5088309503784693;
|
||||
46;56.443136430523445;0.5088309503784693;
|
||||
47;56.53910849453322;0.5046257359125316;
|
||||
48;56.72834314550042;0.5075757575757576;
|
||||
49;56.92954784437434;0.5084175084175084;
|
||||
50;56.62599915860328;0.5092592592592593;
|
|
|
@ -0,0 +1,51 @@
|
|||
time_window;training_accuracy;testing_accuracy;
|
||||
1;52.05508032547465;0.5392320534223706;
|
||||
2;51.794657762938236;0.5383973288814691;
|
||||
3;52.00333889816361;0.5396825396825397;
|
||||
4;52.01419327906491;0.5421888053467001;
|
||||
5;52.4008350730689;0.5371762740183793;
|
||||
6;52.516182919189816;0.531328320802005;
|
||||
7;52.88220551378446;0.5363408521303258;
|
||||
8;52.694235588972425;0.5426421404682275;
|
||||
9;53.45728013369543;0.5409698996655519;
|
||||
10;53.5311324697033;0.540133779264214;
|
||||
11;53.37513061650993;0.5418060200668896;
|
||||
12;54.05518394648829;0.540133779264214;
|
||||
13;53.90886287625418;0.5414225941422595;
|
||||
14;54.10830022998119;0.5414225941422595;
|
||||
15;53.97323295692179;0.5414225941422595;
|
||||
16;54.089102698180305;0.5405857740585774;
|
||||
17;54.121338912133886;0.5422594142259414;
|
||||
18;53.74476987447699;0.5393634840871022;
|
||||
19;53.75601590290856;0.542713567839196;
|
||||
20;54.41607367099205;0.5393634840871022;
|
||||
21;54.13439397111157;0.5385259631490787;
|
||||
22;54.166666666666664;0.5393634840871022;
|
||||
23;54.29229480737019;0.5381391450125733;
|
||||
24;54.51308900523561;0.5381391450125733;
|
||||
25;54.16841223292836;0.5406538139145013;
|
||||
26;53.90739576786089;0.5414920368818106;
|
||||
27;54.505448449287506;0.5389773679798826;
|
||||
28;54.84073763621124;0.5444630872483222;
|
||||
29;54.495912806539515;0.5369127516778524;
|
||||
30;54.67505241090147;0.5444630872483222;
|
||||
31;54.2881107150346;0.5394295302013423;
|
||||
32;55.369127516778526;0.5419463087248322;
|
||||
33;55.39010067114094;0.5340050377833753;
|
||||
34;55.695405915670236;0.5197313182199832;
|
||||
35;55.74905581200168;0.5214105793450882;
|
||||
36;56.28541448058761;0.5264483627204031;
|
||||
37;56.507136859781696;0.5222502099076406;
|
||||
38;56.486146095717885;0.519327731092437;
|
||||
39;56.30904891874869;0.5168067226890757;
|
||||
40;56.34187316253675;0.5260504201680672;
|
||||
41;56.18567527830288;0.5260504201680672;
|
||||
42;56.53361344537815;0.5201680672268908;
|
||||
43;56.785714285714285;0.5197645079899075;
|
||||
44;56.839672200042024;0.5130361648444071;
|
||||
45;56.32618747372846;0.5105130361648444;
|
||||
46;56.485179735127176;0.511354079058032;
|
||||
47;56.518082422203534;0.5063078216989066;
|
||||
48;56.72834314550042;0.5084175084175084;
|
||||
49;56.8664563617245;0.5134680134680135;
|
||||
50;56.6470340765671;0.5058922558922558;
|
|
|
@ -0,0 +1,51 @@
|
|||
time_window;training_accuracy;testing_accuracy;
|
||||
1;52.05508032547465;0.5392320534223706;
|
||||
2;52.10767946577629;0.5392320534223706;
|
||||
3;52.27462437395659;0.5396825396825397;
|
||||
4;52.43164266332707;0.5388471177944862;
|
||||
5;52.546972860125265;0.5355054302422724;
|
||||
6;52.28648987262476;0.5329991645781119;
|
||||
7;52.02589807852965;0.5338345864661654;
|
||||
8;52.54803675856308;0.5317725752508361;
|
||||
9;51.932316691038224;0.5359531772575251;
|
||||
10;52.08942749686586;0.5309364548494984;
|
||||
11;52.18390804597701;0.5317725752508361;
|
||||
12;52.65468227424749;0.5267558528428093;
|
||||
13;52.612876254180605;0.5263598326359833;
|
||||
14;52.64478360861384;0.5263598326359833;
|
||||
15;52.676704307821;0.5263598326359833;
|
||||
16;52.66680610750889;0.5263598326359833;
|
||||
17;52.86610878661088;0.5238493723849372;
|
||||
18;52.78242677824267;0.52428810720268;
|
||||
19;52.395898723582334;0.5251256281407035;
|
||||
20;52.51151109250733;0.525963149078727;
|
||||
21;52.54343730374712;0.525963149078727;
|
||||
22;52.68006700167505;0.5251256281407035;
|
||||
23;52.701005025125625;0.5247275775356245;
|
||||
24;52.33507853403141;0.5297569153394803;
|
||||
25;52.785923753665685;0.5230511316010059;
|
||||
26;52.44081290592919;0.5238893545683152;
|
||||
27;52.724224643755235;0.5238893545683152;
|
||||
28;52.47275775356245;0.5310402684563759;
|
||||
29;52.79815552295116;0.5243288590604027;
|
||||
30;52.64150943396226;0.5251677852348994;
|
||||
31;52.48479765149927;0.5268456375838926;
|
||||
32;52.57969798657718;0.5335570469798657;
|
||||
33;53.73322147651006;0.5096557514693535;
|
||||
34;53.63960562198448;0.5130142737195634;
|
||||
35;53.86067981535879;0.5138539042821159;
|
||||
36;53.5781741867786;0.5188916876574308;
|
||||
37;53.778337531486144;0.5071368597816961;
|
||||
38;53.56842989084802;0.5159663865546219;
|
||||
39;53.68465252991812;0.5100840336134453;
|
||||
40;53.77992440151197;0.5084033613445378;
|
||||
41;53.68620037807184;0.5109243697478991;
|
||||
42;53.739495798319325;0.5092436974789916;
|
||||
43;53.508403361344534;0.5046257359125316;
|
||||
44;54.12901870140786;0.5054667788057191;
|
||||
45;53.90920554854981;0.5096719932716569;
|
||||
46;54.172797981921384;0.5021026072329688;
|
||||
47;54.14213624894869;0.5054667788057191;
|
||||
48;53.973927670311184;0.51010101010101;
|
||||
49;53.52260778128286;0.5143097643097643;
|
||||
50;53.82835506941524;0.5109427609427609;
|
|
|
@ -0,0 +1,51 @@
|
|||
time_window;training_accuracy;testing_accuracy;
|
||||
1;52.05006473888649;0.538860103626943;
|
||||
2;52.363479387006265;0.538860103626943;
|
||||
3;52.09412780656304;0.5345423143350605;
|
||||
4;52.26683937823834;0.5367329299913569;
|
||||
5;52.47246814942776;0.5358686257562663;
|
||||
6;52.6133909287257;0.5341400172860847;
|
||||
7;52.99200691294016;0.5375972342264477;
|
||||
8;53.34917891097667;0.5393258426966292;
|
||||
9;53.284356093344854;0.5397923875432526;
|
||||
10;53.16619840069159;0.5397923875432526;
|
||||
11;53.17769130998703;0.5389273356401384;
|
||||
12;53.729729729729726;0.5406574394463668;
|
||||
13;53.61159169550172;0.5389273356401384;
|
||||
14;53.67647058823529;0.5385281385281385;
|
||||
15;53.77460523469608;0.5428571428571428;
|
||||
16;53.82951103418434;0.5454545454545454;
|
||||
17;53.408353170309454;0.5463203463203463;
|
||||
18;53.961038961038966;0.5445887445887446;
|
||||
19;53.74458874458874;0.5450606585788561;
|
||||
20;53.799523706429966;0.5441941074523396;
|
||||
21;54.04937202252057;0.5450606585788561;
|
||||
22;54.12605588044185;0.5415944540727903;
|
||||
23;54.24610051993067;0.5424610051993067;
|
||||
24;54.24610051993067;0.546400693842151;
|
||||
25;54.58288190682556;0.5437987857762359;
|
||||
26;54.65973125270914;0.5403295750216826;
|
||||
27;54.19466724474312;0.5420641803989592;
|
||||
28;54.48829141370338;0.5394622723330442;
|
||||
29;54.48829141370338;0.5399305555555556;
|
||||
30;54.304923010193015;0.546875;
|
||||
31;54.36008676789588;0.5425347222222222;
|
||||
32;55.131264916467785;0.5477430555555556;
|
||||
33;55.90277777777778;0.5364583333333334;
|
||||
34;55.533854166666664;0.5334491746307559;
|
||||
35;55.285435207293254;0.5351867940920938;
|
||||
36;55.68823273990448;0.5317115551694179;
|
||||
37;55.591748099891426;0.5325803649000869;
|
||||
38;55.60382276281495;0.5325803649000869;
|
||||
39;55.60382276281495;0.5356521739130434;
|
||||
40;56.00695198783402;0.5339130434782609;
|
||||
41;56.171229900043464;0.5278260869565218;
|
||||
42;56.55292327754836;0.5252173913043479;
|
||||
43;56.608695652173914;0.5278260869565218;
|
||||
44;56.34782608695652;0.5274151436031331;
|
||||
45;56.53402913676886;0.5282854656222803;
|
||||
46;56.32883862548934;0.5317667536988686;
|
||||
47;56.66739177724603;0.5274151436031331;
|
||||
48;56.87554395126197;0.5300261096605744;
|
||||
49;56.83202785030461;0.5174216027874564;
|
||||
50;56.735582154515775;0.5156794425087108;
|
|
|
@ -0,0 +1,51 @@
|
|||
time_window;training_accuracy;testing_accuracy;
|
||||
1;52.04038704249053;0.5412457912457912;
|
||||
2;51.90406059330949;0.5412457912457912;
|
||||
3;52.02020202020202;0.5370370370370371;
|
||||
4;52.20959595959596;0.5459140690817186;
|
||||
5;52.68364554830563;0.5408593091828138;
|
||||
6;52.71578947368422;0.5341196293176074;
|
||||
7;52.76900400084228;0.5391743892165122;
|
||||
8;52.906486941870256;0.540016849199663;
|
||||
9;53.517270429654594;0.5370994940978078;
|
||||
10;53.52854434379608;0.5370994940978078;
|
||||
11;53.64517488411293;0.5379426644182125;
|
||||
12;54.03582718651212;0.5396290050590219;
|
||||
13;54.15261382799326;0.5396290050590219;
|
||||
14;54.15261382799326;0.5409282700421941;
|
||||
15;54.079696394686906;0.5434599156118144;
|
||||
16;54.344158582876425;0.5417721518987342;
|
||||
17;54.16578780847922;0.5451476793248945;
|
||||
18;53.92405063291139;0.5417721518987342;
|
||||
19;53.9873417721519;0.5396959459459459;
|
||||
20;54.10424140113948;0.5396959459459459;
|
||||
21;54.15787252005065;0.5388513513513513;
|
||||
22;54.084863837872064;0.5388513513513513;
|
||||
23;53.69510135135135;0.5422297297297297;
|
||||
24;53.80067567567568;0.5435333896872359;
|
||||
25;54.14994720168954;0.5409974640743872;
|
||||
26;53.95014786649768;0.5452240067624683;
|
||||
27;54.53200929642933;0.5486052409129332;
|
||||
28;54.62806424344886;0.5469146238377007;
|
||||
29;54.52240067624683;0.5431472081218274;
|
||||
30;54.449376453181145;0.5439932318104906;
|
||||
31;54.60887949260042;0.5346869712351946;
|
||||
32;55.17022626348065;0.5431472081218274;
|
||||
33;55.52030456852792;0.5397631133671743;
|
||||
34;55.647208121827404;0.5309060118543607;
|
||||
35;55.74360059234187;0.529212531752752;
|
||||
36;56.03046974185357;0.5275190516511431;
|
||||
37;56.00000000000001;0.5275190516511431;
|
||||
38;55.82133784928027;0.5207451312447079;
|
||||
39;55.927180355630824;0.5245762711864407;
|
||||
40;56.15075164090621;0.5245762711864407;
|
||||
41;55.781448538754766;0.5288135593220339;
|
||||
42;56.15335733954671;0.5271186440677966;
|
||||
43;56.92796610169491;0.5203389830508475;
|
||||
44;56.525423728813564;0.5173876166242578;
|
||||
45;56.55859292222929;0.5165394402035624;
|
||||
46;56.358626536668076;0.5165394402035624;
|
||||
47;56.32817468730125;0.5148430873621713;
|
||||
48;56.59457167090755;0.5173876166242578;
|
||||
49;56.46734520780322;0.5118845500848896;
|
||||
50;56.182396606574756;0.5144312393887945;
|
|
|
@ -0,0 +1,51 @@
|
|||
time_window;training_accuracy;testing_accuracy;
|
||||
1;50.69893594825787;0.5317195325542571;
|
||||
2;50.70951585976628;0.5317195325542571;
|
||||
3;50.688647746243745;0.5321637426900585;
|
||||
4;50.699227718639115;0.5321637426900585;
|
||||
5;50.68893528183715;0.5321637426900585;
|
||||
6;50.69951973272082;0.5321637426900585;
|
||||
7;50.71010860484545;0.5321637426900585;
|
||||
8;50.71010860484545;0.532608695652174;
|
||||
9;50.69981199080844;0.532608695652174;
|
||||
10;50.68951107396573;0.532608695652174;
|
||||
11;50.67920585161965;0.532608695652174;
|
||||
12;50.68979933110368;0.532608695652174;
|
||||
13;50.68979933110368;0.5330543933054394;
|
||||
14;50.700397240225804;0.5330543933054394;
|
||||
15;50.71099958176495;0.5330543933054394;
|
||||
16;50.700690232169;0.5330543933054394;
|
||||
17;50.7112970711297;0.5330543933054394;
|
||||
18;50.7112970711297;0.533500837520938;
|
||||
19;50.700983469345054;0.533500837520938;
|
||||
20;50.69066555043952;0.533500837520938;
|
||||
21;50.680343311701904;0.533500837520938;
|
||||
22;50.69095477386934;0.533500837520938;
|
||||
23;50.69095477386934;0.5331098072087175;
|
||||
24;50.680628272251305;0.5331098072087175;
|
||||
25;50.69124423963134;0.5331098072087175;
|
||||
26;50.68091347161114;0.5331098072087175;
|
||||
27;50.69153394803018;0.5331098072087175;
|
||||
28;50.69153394803018;0.5327181208053692;
|
||||
29;50.7021588765458;0.5327181208053692;
|
||||
30;50.712788259958074;0.5327181208053692;
|
||||
31;50.7234221010694;0.5327181208053692;
|
||||
32;50.73406040268457;0.5327181208053692;
|
||||
33;50.713087248322154;0.5331654072208228;
|
||||
34;50.72372561359345;0.5331654072208228;
|
||||
35;50.7343684431389;0.5331654072208228;
|
||||
36;50.724029380902415;0.5331654072208228;
|
||||
37;50.71368597816961;0.5331654072208228;
|
||||
38;50.73467674223342;0.5327731092436975;
|
||||
39;50.72433340331723;0.5327731092436975;
|
||||
40;50.734985300293985;0.5327731092436975;
|
||||
41;50.72463768115942;0.5327731092436975;
|
||||
42;50.71428571428571;0.5327731092436975;
|
||||
43;50.71428571428571;0.5323801513877208;
|
||||
44;50.724942214751;0.5323801513877208;
|
||||
45;50.735603194619586;0.5323801513877208;
|
||||
46;50.72524700441454;0.5323801513877208;
|
||||
47;50.71488645920942;0.5323801513877208;
|
||||
48;50.69386038687973;0.5328282828282829;
|
||||
49;50.683491062039955;0.5328282828282829;
|
||||
50;50.69415229280606;0.5328282828282829;
|
|
|
@ -0,0 +1,51 @@
|
|||
time_window;training_accuracy;testing_accuracy;
|
||||
1;51.99248904652618;0.5375626043405676;
|
||||
2;51.85726210350584;0.5375626043405676;
|
||||
3;52.00333889816361;0.5388471177944862;
|
||||
4;52.01419327906491;0.5388471177944862;
|
||||
5;52.35908141962422;0.5355054302422724;
|
||||
6;52.62058884944665;0.5304928989139516;
|
||||
7;52.98663324979115;0.5371762740183793;
|
||||
8;52.7360066833751;0.5426421404682275;
|
||||
9;53.39461040317527;0.5409698996655519;
|
||||
10;53.489343919765986;0.5409698996655519;
|
||||
11;53.333333333333336;0.5426421404682275;
|
||||
12;53.992474916387955;0.5392976588628763;
|
||||
13;54.03428093645485;0.5414225941422595;
|
||||
14;54.15011499059168;0.5405857740585774;
|
||||
15;54.077791718946045;0.5414225941422595;
|
||||
16;54.19368332984731;0.5414225941422595;
|
||||
17;54.14225941422595;0.5397489539748954;
|
||||
18;53.59832635983264;0.5443886097152428;
|
||||
19;53.672316384180796;0.5443886097152428;
|
||||
20;54.353285893679356;0.5402010050251256;
|
||||
21;54.239062172911865;0.5385259631490787;
|
||||
22;54.082914572864325;0.5343383584589615;
|
||||
23;54.22948073701842;0.537300922045264;
|
||||
24;54.47120418848167;0.5356244761106455;
|
||||
25;54.18935902806871;0.5364626990779547;
|
||||
26;53.84454221663524;0.5356244761106455;
|
||||
27;54.40067057837384;0.539815590947192;
|
||||
28;54.90360435875943;0.5444630872483222;
|
||||
29;54.43303290714735;0.537751677852349;
|
||||
30;54.65408805031446;0.5427852348993288;
|
||||
31;54.26714195848186;0.5394295302013423;
|
||||
32;55.3481543624161;0.5436241610738255;
|
||||
33;55.39010067114094;0.5340050377833753;
|
||||
34;55.779316131739044;0.5197313182199832;
|
||||
35;55.77003776751993;0.5214105793450882;
|
||||
36;56.28541448058761;0.5264483627204031;
|
||||
37;56.36020151133502;0.5239294710327456;
|
||||
38;56.44416456759026;0.5235294117647059;
|
||||
39;56.37203443208062;0.5184873949579832;
|
||||
40;56.34187316253675;0.5235294117647059;
|
||||
41;56.24868725057761;0.5235294117647059;
|
||||
42;56.596638655462186;0.5126050420168067;
|
||||
43;56.76470588235294;0.5147182506307821;
|
||||
44;56.90271065349863;0.5138772077375946;
|
||||
45;56.284153005464475;0.5138772077375946;
|
||||
46;56.42211477822157;0.511354079058032;
|
||||
47;56.53910849453322;0.5079899074852817;
|
||||
48;56.749369217830115;0.5067340067340067;
|
||||
49;56.992639327024186;0.5092592592592593;
|
||||
50;56.56289440471182;0.51010101010101;
|
|
After Width: | Height: | Size: 294 KiB |
After Width: | Height: | Size: 279 KiB |
After Width: | Height: | Size: 632 KiB |
After Width: | Height: | Size: 292 KiB |
|
@ -0,0 +1,61 @@
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
import seaborn as sns
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
import os
|
||||
|
||||
#bodacious colors
|
||||
colors=sns.color_palette("rocket", 8)
|
||||
#Ram's colors, if desired
|
||||
seshadri = ['#c3121e', '#0348a1', '#ffb01c', '#027608', '#0193b0', '#9c5300', '#949c01', '#7104b5']
|
||||
# 0sangre, 1neptune, 2pumpkin, 3clover, 4denim, 5cocoa, 6cumin, 7berry
|
||||
|
||||
data = pd.read_csv("plots/data/MLP_20_10_5_2.csv", sep=";")
|
||||
#data = pd.read_csv("plots/data/logistic_regression.csv", sep=";")
|
||||
#data_SMA = pd.read_csv("plots/data/logistic_regression_SMA.csv", sep=";")
|
||||
#data_SMA_20_50 = pd.read_csv("plots/data/logistic_regression_SMA_20_50.csv", sep=";")
|
||||
#data_EMA = pd.read_csv("plots/data/logistic_regression_EMA.csv", sep=";")
|
||||
#data_EMA_20_50 = pd.read_csv("plots/data/logistic_regression_EMA_20_50.csv", sep=";")
|
||||
|
||||
print(data)
|
||||
|
||||
fig = plt.figure(1, figsize=(15,10))
|
||||
plt.plot(data["time_window"], data["training_accuracy"]*100, color=seshadri[0], label="Training Accuracy", linewidth=2)
|
||||
plt.plot(data["time_window"], data["testing_accuracy"]*100, color=seshadri[1], label="Testing Accuracy", linewidth=2)
|
||||
|
||||
|
||||
|
||||
#plt.plot(data["time_window"], data["testing_accuracy"]*100, color=seshadri[0], label="Returns and Volume", linewidth=2)
|
||||
#plt.plot(data_SMA_20_50["time_window"], data_SMA_20_50["testing_accuracy"]*100, color=seshadri[1], label="With SMA 20 and 50 candles", linewidth=2)
|
||||
#plt.plot(data_SMA["time_window"], data_SMA["testing_accuracy"]*100, color=seshadri[2], label="With SMA 20, 50 and 200 candles", linewidth=2)
|
||||
#plt.plot(data_EMA_20_50["time_window"], data_EMA_20_50["testing_accuracy"]*100, color=seshadri[3], label="With EMA 20 and 50 candles", linewidth=2)
|
||||
#plt.plot(data_EMA["time_window"], data_EMA["testing_accuracy"]*100, color=seshadri[4], label="With EMA 20, 50 and 200 candles", linewidth=2)
|
||||
|
||||
|
||||
#plot params
|
||||
plt.xlim([0, 50])
|
||||
#plt.ylim([50, 60])
|
||||
plt.minorticks_on()
|
||||
plt.tick_params(labelsize=14)
|
||||
plt.tick_params(labelbottom=True, labeltop=False, labelright=False, labelleft=True)
|
||||
#xticks = np.arange(0, 1e4,10)
|
||||
#yticks = np.arange(0,16.1,4)
|
||||
|
||||
plt.tick_params(direction='in',which='minor', length=5, bottom=True, top=True, left=True, right=True)
|
||||
plt.tick_params(direction='in',which='major', length=10, bottom=True, top=True, left=True, right=True)
|
||||
#plt.xticks(xticks)
|
||||
#plt.yticks(yticks)
|
||||
#plt.grid(True)
|
||||
|
||||
#plt.text(1,325, f'y={Decimal(coefs[3]):.4f}x$^3$+{Decimal(coefs[2]):.2f}x$^2$+{Decimal(coefs[1]):.2f}x+{Decimal(coefs[0]):.1f}',fontsize =13)
|
||||
|
||||
|
||||
plt.xlabel(r'Lag (Days)', fontsize=14)
|
||||
plt.ylabel(r'Accuracy (%)',fontsize=14) # label the y axis
|
||||
|
||||
plt.legend(fontsize=14, loc="upper right", bbox_to_anchor=(0.99, 0.99)) # add the legend (will default to 'best' location)
|
||||
|
||||
plt.savefig("plots/MLP_20_10_5_2.png", dpi=300)
|
||||
|
||||
plt.show()
|
|
@ -0,0 +1,91 @@
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
import seaborn as sns
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
import yfinance as yf
|
||||
from datetime import datetime
|
||||
|
||||
import os
|
||||
|
||||
#bodacious colors
|
||||
colors=sns.color_palette("rocket", 8)
|
||||
#Ram's colors, if desired
|
||||
seshadri = ['#c3121e', '#0348a1', '#ffb01c', '#027608', '#0193b0', '#9c5300', '#949c01', '#7104b5']
|
||||
# 0sangre, 1neptune, 2pumpkin, 3clover, 4denim, 5cocoa, 6cumin, 7berry
|
||||
|
||||
#stock_data = pd.read_csv("data/daily_MSFT.csv").iloc[::-1].reset_index(drop=True)
|
||||
|
||||
if os.path.isfile("data/MSFT_data.pkl"):
|
||||
stock_data = pd.read_pickle("data/MSFT_data.pkl")
|
||||
elif os.path.isfile("data/MSFT_data.csv"):
|
||||
stock_data = pd.read_csv("data/MSFT_data.csv")
|
||||
else:
|
||||
start_date = datetime(2000, 1, 1)
|
||||
end_date = datetime(2023, 10, 26)
|
||||
stock_data = yf.download('MSFT', start=start_date, end=end_date)
|
||||
stock_data.to_pickle("data/MSFT_data.pkl")
|
||||
stock_data.to_csv("data/MSFT_data.csv")
|
||||
|
||||
|
||||
|
||||
|
||||
daily_returns = stock_data["Close"] - stock_data["Open"]
|
||||
win_lose = np.zeros(daily_returns.size - 1)
|
||||
|
||||
for index, return_ in enumerate(daily_returns[:-1]):
|
||||
if (return_ > 0 and daily_returns[index + 1] > 0) or (return_ < 0 and daily_returns[index + 1] < 0):
|
||||
win_lose[index] = 1
|
||||
else:
|
||||
win_lose[index] = 0
|
||||
|
||||
|
||||
win_rate = np.count_nonzero(win_lose == 1) / win_lose.size
|
||||
|
||||
print(win_rate)
|
||||
|
||||
percent_returns = daily_returns / stock_data["Open"] * 100
|
||||
|
||||
fig = plt.figure(1, figsize=(15,10))
|
||||
plt.hist(percent_returns, bins = 120, range=(-12,12), facecolor=seshadri[0], alpha=0.8, edgecolor="white", label="Percentage daily returns occurrences")
|
||||
|
||||
#plt.plot(stock_data.index, stock_data["Close"], linestyle="-", color=seshadri[0])
|
||||
#plt.plot(stock_data.index, stock_data["Adj Close"], linestyle="-", color=seshadri[1])
|
||||
#plt.plot(stock_data.index, stock_data["close"] - 20, linestyle="-", color=seshadri[2])
|
||||
#plt.plot(stock_data.index, stock_data["close"] - 30, linestyle="-", color=seshadri[3])
|
||||
#plt.plot(stock_data.index, stock_data["close"] - 40, linestyle="-", color=seshadri[4])
|
||||
#plt.plot(stock_data.index, stock_data["close"] - 50, linestyle="-", color=seshadri[5])
|
||||
#plt.plot(stock_data.index, stock_data["close"] - 60, linestyle="-", color=seshadri[6])
|
||||
#plt.plot(stock_data.index, stock_data["close"] - 70, linestyle="-", color=seshadri[7])
|
||||
#plt.show()
|
||||
|
||||
|
||||
#plot params
|
||||
plt.xlim([-12,12])
|
||||
#plt.ylim([-0.5,16])
|
||||
plt.minorticks_on()
|
||||
plt.tick_params(labelsize=14)
|
||||
plt.tick_params(labelbottom=True, labeltop=False, labelright=False, labelleft=True)
|
||||
#xticks = np.arange(0, 1e4,10)
|
||||
#yticks = np.arange(0,16.1,4)
|
||||
|
||||
plt.tick_params(direction='in',which='minor', length=5, bottom=True, top=True, left=True, right=True)
|
||||
plt.tick_params(direction='in',which='major', length=10, bottom=True, top=True, left=True, right=True)
|
||||
#plt.xticks(xticks)
|
||||
#plt.yticks(yticks)
|
||||
|
||||
|
||||
#plt.text(1,325, f'y={Decimal(coefs[3]):.4f}x$^3$+{Decimal(coefs[2]):.2f}x$^2$+{Decimal(coefs[1]):.2f}x+{Decimal(coefs[0]):.1f}',fontsize =13)
|
||||
|
||||
|
||||
plt.xlabel(r'Percentage daily return', fontsize=14)
|
||||
plt.ylabel(r'Occurrences',fontsize=14) # label the y axis
|
||||
|
||||
plt.yscale('log')
|
||||
|
||||
plt.legend(fontsize=14, loc="upper right", bbox_to_anchor=(0.99, 0.99)) # add the legend (will default to 'best' location)
|
||||
|
||||
plt.savefig("plots/MSFT_daily_occs_semilogx.png", dpi=300)
|
||||
|
||||
plt.show()
|
||||
|