learn-algorithmic-trading/courses/sources/sec2/prepare_financial_data.py

39 lines
1.2 KiB
Python

import pandas as pd
import os as os
from pandas_datareader import data
from sklearn.model_selection import train_test_split
import numpy as np
dir_path = os.path.dirname(os.path.realpath(__file__))
def load_financial_data(start_date, end_date, output_file):
try:
df = pd.read_pickle(output_file)
print("File data found...reading GOOG data")
except FileNotFoundError:
print("File not found...downloading the GOOG data")
df = data.DataReader('GOOG', 'yahoo', start_date, end_date)
df.to_pickle(output_file)
return df
def create_classification_trading_condition(df):
df['Open-Close'] = df.Open - df.Close
df['High-Low'] = df.High - df.Low
df = df.dropna()
X = df[['Open-Close', 'High-Low']]
Y = np.where(df['Close'].shift(-1) > df['Close'], 1, -1)
return (X,Y)
def create_regression_trading_condition(df):
df['Open-Close'] = df.Open - df.Close
df['High-Low'] = df.High - df.Low
df = df.dropna()
print(df)
X = df[['Open-Close', 'High-Low']]
Y = df['Close'].shift(-1) - df['Close']
return(X,Y)
def create_train_split_group(X, Y, split_ratio=0.8):
# Split dataset into two groups
return train_test_split(X, Y, shuffle=False, train_size=split_ratio)