from itertools import tee import pandas as pd import matplotlib.pyplot as plt from pandas import isna from prepare_financial_data import * dir_path = os.path.dirname(os.path.realpath(__file__)) goog_data = load_financial_data( start_date = '2001-01-01', end_date = '2018-01-01', output_file = 'goog_data_large.pkl' ) X, Y = create_regression_trading_condition(goog_data) X = X[:-1] Y = Y[:-1] goog_data = goog_data.assign(Target=pd.Series(Y, index=goog_data.index)) print(goog_data) goog_data = goog_data[:-1] print(goog_data) pd.plotting.scatter_matrix(goog_data[['Open-Close', 'High-Low', 'Target']], diagonal='kde') plt.savefig(dir_path + "/scatter_matrix.png") """ Split 80% of available data into training feature value and target variable; and remaining 20% of dataset into out-sample testing feature value """ X_train, X_test, Y_train, Y_test = create_train_split_group(X,Y,split_ratio=0.8) from sklearn import linear_model ols = linear_model.LinearRegression() ols.fit(X_train, Y_train) print('Coefficients: ', ols.coef_) """ Performance Matrices """ from sklearn.metrics import mean_squared_error, r2_score # The mean squared error print("Mean squared error: {}".format(mean_squared_error(Y_train, ols.predict(X_train)))) print(Y_test) print(X_test) # Explained variance score: 1 is perfect prediction # print("Variance score: " + (r2_score(Y_test, ols.predict(X_test)))) goog_data['Predicted_Signal'] = ols.predict(X) goog_data['GOOG_Returns'] = np.log(goog_data['Close']/goog_data['Close'].shift(1)) def calculate_return(df, split)