Source code for src.CombinedModel

"""
Code for the combined model approach.

@author: Shashank Swaminathan
"""

from src.BayesReg import GPM
from src.StockRNN import StockRNN
import pandas as pd
import numpy as np
from datetime import datetime
from datetime import date

ZERO_TIME = " 00:00:00"

DEVICE = "cuda"  # selects the gpu to be used
TO_GPU_FAIL_MSG = "Unable to successfully run model.to('{}'). If running in Collaboratory, make sure " \
                  "that you have enabled the GPU your settings".format(DEVICE)

[docs]class CombinedModel:
    r"""
    Class for handling combined model operations.
    """
[docs]    def __init__(self, ticker, comp_tickers):
        r"""
        init function. It will set up the StockRNN and GPM classes.

        :param ticker: Ticker of stocks to predict
        :param comp_tickers: List of tickers to compare desired ticker against. Used for StockRNN only.
        """
        self.srnn = StockRNN(ticker, to_compare=comp_tickers,
                             train_start_date=datetime(2012, 1, 1),
                             train_end_date=datetime.today(),
                             try_load_weights=False)
        self.cms = GPM(ticker)

[docs]    def train(self, start_date, pred_start, pred_end, mw=0.5, n_epochs=10):
        r"""
        Main training function. It runs both the LSTM and GP models and stores results in attributes.

        :param start_date: Training start date (for GP model only). Provide as datetime object.
        :param pred_start: Date to start predictions from. Provide as datetime object.
        :param pred_end: Date to end predictions. Provide as datetime object.
        :param mw: Model weight. Used to do weighted average between GP and LSTM. 0 is for only the LSTM, and 1 is for only the GP. Defaults to 0.5 (equal split).
        :param n_epochs: Number of epochs to train the LSTM. Defaults to 10.

        :returns: (Mean predictions [t, y], Upper/lower bounds of 2 std [t, y])
        """
        dt_ps = date(pred_start.year, pred_start.month, pred_start.day)
        dt_pe = date(pred_end.year, pred_end.month, pred_end.day)
        self.n_days_pred = np.busday_count(dt_ps, dt_pe) + 1

        self.train_end = pred_start - pd.Timedelta(1, "D")
        return self._combo_shot(start_date, pred_start, pred_end,
                                mw = mw, n_epochs = n_epochs)

    def _combo_shot(self, start_date, pred_start, pred_end, mw=0.5, n_epochs=10):
        r"""
        Helper function to actually do the combo model training. Runs the two models individually, aligns the two results in time, then adds the two generated distributions as a weighted sum. Sets attribute combo_vals equal to the result.

        :param start_date: Training start date (for GP model only). Provide as datetime object.
        :param pred_start: Date to start predictions from. Provide as datetime object.
        :param pred_end: Date to end predictions. Provide as datetime object.
        :param mw: Model weight. Used to do weighted average between GP and LSTM. 0 is for only the LSTM, and 1 is for only the GP. Defaults to 0.5 (equal split).
        :param n_epochs: Number of epochs to train the LSTM. Defaults to 10.
        """
        self._srnn_train(pred_start, self.n_days_pred, n_epochs = n_epochs)
        self._cms_train(start_date, self.train_end, pred_end)
        m_combo = self.m_cms[-self.n_days_pred:]*(mw)+self.m_srnn*(1-mw)
        std_combo = self.std_cms[-self.n_days_pred:]*(mw)+self.std_srnn*(1-mw)

        xy_pred = [self.times, m_combo]
        upper = m_combo + 2*std_combo
        lower = m_combo - 2*std_combo
        band_x = np.append(self.times, self.times[::-1])
        band_y = np.append(lower, upper[::-1])
        std_bounds = [band_x, band_y]
        self.combo_vals = (xy_pred, std_bounds)

    def _srnn_train(self, pred_start, n_days_pred, n_epochs=10):
        r"""
        Helper function to train the LSTM using the StockRNN class. Generates upper and lower bounds of prediction based on mean and std. deviation. Sets attribute srnn_vals equal to result. Result is of form: ([time, mean prediction], [time, upper/lower bounds], [time, actual data prior to prediction], [time, actual data during prediction]).

        :param pred_start: Date to start predictions from. Provide as datetime object.
        :param n_days_pred: Number of days to predict ahead. Will only predict on business days.
        :param n_epochs: Number of epochs to train the LSTM. Defaults to 10.
        """
        srdf = self.srnn.companies[0].data_frame
        srdfdt = pd.to_datetime(srdf.Date)
        raw_p_st_idx = srdfdt.searchsorted(pred_start)
        p_st_idx = raw_p_st_idx + srdf.index[0]
        raw_p_e_idx = raw_p_st_idx + self.n_days_pred
        try:
            self.srnn.to(DEVICE)
            self.srnn.__togpu__(True)
        except RuntimeError:
            print(TO_GPU_FAIL_MSG)
        except AssertionError:
            print(TO_GPU_FAIL_MSG)
            self.srnn.__togpu__(False)

        self.srnn.do_training(num_epochs=n_epochs)
        self.m_srnn, self.std_srnn = self.srnn.pred_in_conj(p_st_idx, n_days_pred)
        self.times = srdf.Date.iloc[raw_p_st_idx:raw_p_e_idx]
        self.m_srnn = np.array(self.m_srnn)
        self.std_srnn = np.array(self.std_srnn)

        times_td = srdf.Date.iloc[raw_p_st_idx-50:raw_p_st_idx-1]
        td_srnn = srdf.Close.iloc[raw_p_st_idx-50:raw_p_st_idx-1]
        a_srnn = srdf.Close.iloc[raw_p_st_idx:raw_p_e_idx]

        xy_pred = [self.times, self.m_srnn]
        upper = self.m_srnn + 2*self.std_srnn
        lower = self.m_srnn - 2*self.std_srnn
        band_x = np.append(self.times, self.times[::-1])
        band_y = np.append(lower, upper[::-1])
        std_bounds = [band_x, band_y]
        train_data = [times_td, td_srnn]
        test_data = [self.times, a_srnn]
        self.srnn_vals = (xy_pred, std_bounds, train_data, test_data)

    def _cms_train(self, start_date, train_end, pred_end):
        r"""
        Helper function to train the GP model using the GPM class. Sets attribute cms_vals equal to result. Result is of form: ([time, mean prediction], [time, upper/lower bounds], [time, actual data prior to prediction], [time, actual data during prediction]).

        :param start_date: Training start date (for GP model only). Provide as datetime object.
        :param train_end: Date to end training. Provide as datetime object.
        :param pred_end: Date to end predictions. Provide as datetime object. Assumes predictions begin right after training.
        """
        xy_pred, std_bounds, train_data, test_data = self.cms.go(start_date=start_date,
                                                                 split_date=train_end,
                                                                 end_date=pred_end)
        self.m_cms = xy_pred[1]
        self.std_cms = xy_pred[2]
        self.cms_vals = (xy_pred, std_bounds, train_data, test_data)