39 lines
1.2 KiB
Python
39 lines
1.2 KiB
Python
import pandas as pd
|
|
import os as os
|
|
from pandas_datareader import data
|
|
from sklearn.model_selection import train_test_split
|
|
import numpy as np
|
|
|
|
dir_path = os.path.dirname(os.path.realpath(__file__))
|
|
|
|
def load_financial_data(start_date, end_date, output_file):
|
|
try:
|
|
df = pd.read_pickle(output_file)
|
|
print("File data found...reading GOOG data")
|
|
except FileNotFoundError:
|
|
print("File not found...downloading the GOOG data")
|
|
df = data.DataReader('GOOG', 'yahoo', start_date, end_date)
|
|
df.to_pickle(output_file)
|
|
|
|
return df
|
|
|
|
def create_classification_trading_condition(df):
|
|
df['Open-Close'] = df.Open - df.Close
|
|
df['High-Low'] = df.High - df.Low
|
|
df = df.dropna()
|
|
X = df[['Open-Close', 'High-Low']]
|
|
Y = np.where(df['Close'].shift(-1) > df['Close'], 1, -1)
|
|
return (X,Y)
|
|
|
|
def create_regression_trading_condition(df):
|
|
df['Open-Close'] = df.Open - df.Close
|
|
df['High-Low'] = df.High - df.Low
|
|
df = df.dropna()
|
|
print(df)
|
|
X = df[['Open-Close', 'High-Low']]
|
|
Y = df['Close'].shift(-1) - df['Close']
|
|
return(X,Y)
|
|
|
|
def create_train_split_group(X, Y, split_ratio=0.8):
|
|
# Split dataset into two groups
|
|
return train_test_split(X, Y, shuffle=False, train_size=split_ratio) |