learn-algorithmic-trading/courses/sources/sec2/ols.py

55 lines
1.5 KiB
Python
Raw Permalink Normal View History

from itertools import tee
import pandas as pd
import matplotlib.pyplot as plt
from pandas import isna
from prepare_financial_data import *
dir_path = os.path.dirname(os.path.realpath(__file__))
goog_data = load_financial_data(
start_date = '2001-01-01',
end_date = '2018-01-01',
output_file = 'goog_data_large.pkl'
)
X, Y = create_regression_trading_condition(goog_data)
X = X[:-1]
Y = Y[:-1]
goog_data = goog_data.assign(Target=pd.Series(Y, index=goog_data.index))
print(goog_data)
goog_data = goog_data[:-1]
print(goog_data)
pd.plotting.scatter_matrix(goog_data[['Open-Close', 'High-Low', 'Target']], diagonal='kde')
plt.savefig(dir_path + "/scatter_matrix.png")
""" Split 80% of available data into training feature value and target variable; and remaining 20% of dataset into out-sample testing feature value """
X_train, X_test, Y_train, Y_test = create_train_split_group(X,Y,split_ratio=0.8)
from sklearn import linear_model
ols = linear_model.LinearRegression()
ols.fit(X_train, Y_train)
print('Coefficients: ', ols.coef_)
""" Performance Matrices """
from sklearn.metrics import mean_squared_error, r2_score
# The mean squared error
print("Mean squared error: {}".format(mean_squared_error(Y_train, ols.predict(X_train))))
print(Y_test)
print(X_test)
# Explained variance score: 1 is perfect prediction
# print("Variance score: " + (r2_score(Y_test, ols.predict(X_test))))
goog_data['Predicted_Signal'] = ols.predict(X)
goog_data['GOOG_Returns'] = np.log(goog_data['Close']/goog_data['Close'].shift(1))
def calculate_return(df, split)