import os import time import statistics import pandas as pd import numpy as np from pandas_datareader import data dir_path = os.path.dirname(os.path.realpath(__file__)) start_date = '2001-01-01' end_date = '2018-01-01' SRC_DATA_FILENAME = dir_path + '/goog_data_large.pkl' try: goog_data = pd.read_pickle(SRC_DATA_FILENAME) print('File data found...reading GOOG data') except FileNotFoundError: print('File not found...downloading the GOOG data') goog_data = data.DataReader('GOOG', 'yahoo', start_date, end_date) goog_data.to_pickle(SRC_DATA_FILENAME) goog_data_signal = pd.DataFrame(index=goog_data.index) goog_data_signal['price'] = goog_data['Adj Close'] close = goog_data_signal['price'] exe_start_time = time.time() """ Calculate mean of each month's return """ goog_monthly_return = goog_data['Adj Close'].pct_change().groupby( [goog_data['Adj Close'].index.year, goog_data['Adj Close'].index.month]).mean() print(goog_monthly_return) goog_monthly_return_list = [] for i in range(len(goog_monthly_return)): goog_monthly_return_list.append({'month':goog_monthly_return.index[i][1],'monthly_return': goog_monthly_return[i]}) goog_monthly_return_list=pd.DataFrame(goog_monthly_return_list,\ columns=('month','monthly_return')) print(goog_monthly_return_list) goog_monthly_return_list.boxplot(column='monthly_return', by='month') """ Visulization """ import matplotlib.pyplot as plt ax = plt.gca() labels = [item.get_text() for item in ax.get_xticklabels()] print(labels) labels = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'] ax.set_xticklabels(labels) ax.set_ylabel('GOOG return') plt.tick_params(axis='both', which='major', labelsize=7) plt.title("GOOG Monthly return 2001-2018") plt.suptitle("") plt.savefig(dir_path + '/seasonality.png') plt.show()