55 lines
1.5 KiB
Python
55 lines
1.5 KiB
Python
from itertools import tee
|
|
import pandas as pd
|
|
import matplotlib.pyplot as plt
|
|
from pandas import isna
|
|
from prepare_financial_data import *
|
|
|
|
dir_path = os.path.dirname(os.path.realpath(__file__))
|
|
|
|
goog_data = load_financial_data(
|
|
start_date = '2001-01-01',
|
|
end_date = '2018-01-01',
|
|
output_file = 'goog_data_large.pkl'
|
|
)
|
|
|
|
X, Y = create_regression_trading_condition(goog_data)
|
|
X = X[:-1]
|
|
Y = Y[:-1]
|
|
|
|
goog_data = goog_data.assign(Target=pd.Series(Y, index=goog_data.index))
|
|
print(goog_data)
|
|
goog_data = goog_data[:-1]
|
|
print(goog_data)
|
|
|
|
pd.plotting.scatter_matrix(goog_data[['Open-Close', 'High-Low', 'Target']], diagonal='kde')
|
|
plt.savefig(dir_path + "/scatter_matrix.png")
|
|
|
|
""" Split 80% of available data into training feature value and target variable; and remaining 20% of dataset into out-sample testing feature value """
|
|
|
|
|
|
X_train, X_test, Y_train, Y_test = create_train_split_group(X,Y,split_ratio=0.8)
|
|
|
|
|
|
from sklearn import linear_model
|
|
ols = linear_model.LinearRegression()
|
|
ols.fit(X_train, Y_train)
|
|
|
|
print('Coefficients: ', ols.coef_)
|
|
|
|
""" Performance Matrices """
|
|
|
|
from sklearn.metrics import mean_squared_error, r2_score
|
|
|
|
# The mean squared error
|
|
print("Mean squared error: {}".format(mean_squared_error(Y_train, ols.predict(X_train))))
|
|
|
|
print(Y_test)
|
|
print(X_test)
|
|
|
|
# Explained variance score: 1 is perfect prediction
|
|
# print("Variance score: " + (r2_score(Y_test, ols.predict(X_test))))
|
|
|
|
goog_data['Predicted_Signal'] = ols.predict(X)
|
|
goog_data['GOOG_Returns'] = np.log(goog_data['Close']/goog_data['Close'].shift(1))
|
|
|
|
def calculate_return(df, split) |