import pandas as pd import os as os from pandas_datareader import data from sklearn.model_selection import train_test_split import numpy as np dir_path = os.path.dirname(os.path.realpath(__file__)) def load_financial_data(start_date, end_date, output_file): try: df = pd.read_pickle(output_file) print("File data found...reading GOOG data") except FileNotFoundError: print("File not found...downloading the GOOG data") df = data.DataReader('GOOG', 'yahoo', start_date, end_date) df.to_pickle(output_file) return df def create_classification_trading_condition(df): df['Open-Close'] = df.Open - df.Close df['High-Low'] = df.High - df.Low df = df.dropna() X = df[['Open-Close', 'High-Low']] Y = np.where(df['Close'].shift(-1) > df['Close'], 1, -1) return (X,Y) def create_regression_trading_condition(df): df['Open-Close'] = df.Open - df.Close df['High-Low'] = df.High - df.Low df = df.dropna() print(df) X = df[['Open-Close', 'High-Low']] Y = df['Close'].shift(-1) - df['Close'] return(X,Y) def create_train_split_group(X, Y, split_ratio=0.8): # Split dataset into two groups return train_test_split(X, Y, shuffle=False, train_size=split_ratio)