Source code for src.CombinedModel

"""
Code for the combined model approach.

@author: Shashank Swaminathan
"""

from src.BayesReg import GPM
from src.StockRNN import StockRNN
import pandas as pd
import numpy as np
from datetime import datetime
from datetime import date

ZERO_TIME = " 00:00:00"

DEVICE = "cuda"  # selects the gpu to be used
TO_GPU_FAIL_MSG = "Unable to successfully run model.to('{}'). If running in Collaboratory, make sure " \
                  "that you have enabled the GPU your settings".format(DEVICE)

[docs]class CombinedModel: r""" Class for handling combined model operations. """
[docs] def __init__(self, ticker, comp_tickers): r""" init function. It will set up the StockRNN and GPM classes. :param ticker: Ticker of stocks to predict :param comp_tickers: List of tickers to compare desired ticker against. Used for StockRNN only. """ self.srnn = StockRNN(ticker, to_compare=comp_tickers, train_start_date=datetime(2012, 1, 1), train_end_date=datetime.today(), try_load_weights=False) self.cms = GPM(ticker)
[docs] def train(self, start_date, pred_start, pred_end, mw=0.5, n_epochs=10): r""" Main training function. It runs both the LSTM and GP models and stores results in attributes. :param start_date: Training start date (for GP model only). Provide as datetime object. :param pred_start: Date to start predictions from. Provide as datetime object. :param pred_end: Date to end predictions. Provide as datetime object. :param mw: Model weight. Used to do weighted average between GP and LSTM. 0 is for only the LSTM, and 1 is for only the GP. Defaults to 0.5 (equal split). :param n_epochs: Number of epochs to train the LSTM. Defaults to 10. :returns: (Mean predictions [t, y], Upper/lower bounds of 2 std [t, y]) """ dt_ps = date(pred_start.year, pred_start.month, pred_start.day) dt_pe = date(pred_end.year, pred_end.month, pred_end.day) self.n_days_pred = np.busday_count(dt_ps, dt_pe) + 1 self.train_end = pred_start - pd.Timedelta(1, "D") return self._combo_shot(start_date, pred_start, pred_end, mw = mw, n_epochs = n_epochs)
def _combo_shot(self, start_date, pred_start, pred_end, mw=0.5, n_epochs=10): r""" Helper function to actually do the combo model training. Runs the two models individually, aligns the two results in time, then adds the two generated distributions as a weighted sum. Sets attribute combo_vals equal to the result. :param start_date: Training start date (for GP model only). Provide as datetime object. :param pred_start: Date to start predictions from. Provide as datetime object. :param pred_end: Date to end predictions. Provide as datetime object. :param mw: Model weight. Used to do weighted average between GP and LSTM. 0 is for only the LSTM, and 1 is for only the GP. Defaults to 0.5 (equal split). :param n_epochs: Number of epochs to train the LSTM. Defaults to 10. """ self._srnn_train(pred_start, self.n_days_pred, n_epochs = n_epochs) self._cms_train(start_date, self.train_end, pred_end) m_combo = self.m_cms[-self.n_days_pred:]*(mw)+self.m_srnn*(1-mw) std_combo = self.std_cms[-self.n_days_pred:]*(mw)+self.std_srnn*(1-mw) xy_pred = [self.times, m_combo] upper = m_combo + 2*std_combo lower = m_combo - 2*std_combo band_x = np.append(self.times, self.times[::-1]) band_y = np.append(lower, upper[::-1]) std_bounds = [band_x, band_y] self.combo_vals = (xy_pred, std_bounds) def _srnn_train(self, pred_start, n_days_pred, n_epochs=10): r""" Helper function to train the LSTM using the StockRNN class. Generates upper and lower bounds of prediction based on mean and std. deviation. Sets attribute srnn_vals equal to result. Result is of form: ([time, mean prediction], [time, upper/lower bounds], [time, actual data prior to prediction], [time, actual data during prediction]). :param pred_start: Date to start predictions from. Provide as datetime object. :param n_days_pred: Number of days to predict ahead. Will only predict on business days. :param n_epochs: Number of epochs to train the LSTM. Defaults to 10. """ srdf = self.srnn.companies[0].data_frame srdfdt = pd.to_datetime(srdf.Date) raw_p_st_idx = srdfdt.searchsorted(pred_start) p_st_idx = raw_p_st_idx + srdf.index[0] raw_p_e_idx = raw_p_st_idx + self.n_days_pred try: self.srnn.to(DEVICE) self.srnn.__togpu__(True) except RuntimeError: print(TO_GPU_FAIL_MSG) except AssertionError: print(TO_GPU_FAIL_MSG) self.srnn.__togpu__(False) self.srnn.do_training(num_epochs=n_epochs) self.m_srnn, self.std_srnn = self.srnn.pred_in_conj(p_st_idx, n_days_pred) self.times = srdf.Date.iloc[raw_p_st_idx:raw_p_e_idx] self.m_srnn = np.array(self.m_srnn) self.std_srnn = np.array(self.std_srnn) times_td = srdf.Date.iloc[raw_p_st_idx-50:raw_p_st_idx-1] td_srnn = srdf.Close.iloc[raw_p_st_idx-50:raw_p_st_idx-1] a_srnn = srdf.Close.iloc[raw_p_st_idx:raw_p_e_idx] xy_pred = [self.times, self.m_srnn] upper = self.m_srnn + 2*self.std_srnn lower = self.m_srnn - 2*self.std_srnn band_x = np.append(self.times, self.times[::-1]) band_y = np.append(lower, upper[::-1]) std_bounds = [band_x, band_y] train_data = [times_td, td_srnn] test_data = [self.times, a_srnn] self.srnn_vals = (xy_pred, std_bounds, train_data, test_data) def _cms_train(self, start_date, train_end, pred_end): r""" Helper function to train the GP model using the GPM class. Sets attribute cms_vals equal to result. Result is of form: ([time, mean prediction], [time, upper/lower bounds], [time, actual data prior to prediction], [time, actual data during prediction]). :param start_date: Training start date (for GP model only). Provide as datetime object. :param train_end: Date to end training. Provide as datetime object. :param pred_end: Date to end predictions. Provide as datetime object. Assumes predictions begin right after training. """ xy_pred, std_bounds, train_data, test_data = self.cms.go(start_date=start_date, split_date=train_end, end_date=pred_end) self.m_cms = xy_pred[1] self.std_cms = xy_pred[2] self.cms_vals = (xy_pred, std_bounds, train_data, test_data)