Source code for btgym.research.gps.strategy

import numpy as np

import backtrader as bt
from btgym.research.strategy_gen_4 import DevStrat_4_12
from btgym.research.gps.oracle import Oracle, Oracle2

from gym import spaces
from btgym import DictSpace


[docs]class GuidedStrategy_0_0(DevStrat_4_12): """ Augments observation state with expert actions predictions estimated by accessing entire episode data (=cheating). """ # Time embedding period: time_dim = 30 # NOTE: changed this --> change Policy UNREAL for aux. pix control task upsampling params # Number of environment steps to skip before returning next response, # e.g. if set to 10 -- agent will interact with environment every 10th step; # every other step agent action is assumed to be 'hold': skip_frame = 10 # Number of timesteps reward estimation statistics are averaged over, should be: # skip_frame_period <= avg_period <= time_embedding_period: avg_period = 20 # Possible agent actions: portfolio_actions = ('hold', 'buy', 'sell', 'close') gamma = 0.99 # fi_gamma, should be MDP gamma decay reward_scale = 1 # reward multiplicator state_ext_scale = np.linspace(3e3, 1e3, num=6) params = dict( # Note: fake `Width` dimension to use 2d conv etc.: state_shape= { 'external': spaces.Box(low=-100, high=100, shape=(time_dim, 1, 6), dtype=np.float32), 'internal': spaces.Box(low=-2, high=2, shape=(avg_period, 1, 5), dtype=np.float32), 'datetime': spaces.Box(low=0, high=1, shape=(1, 5), dtype=np.float32), 'expert': spaces.Box(low=0, high=10, shape=(len(portfolio_actions),), dtype=np.float32), 'metadata': DictSpace( { 'type': spaces.Box( shape=(), low=0, high=1, dtype=np.uint32 ), 'trial_num': spaces.Box( shape=(), low=0, high=10 ** 10, dtype=np.uint32 ), 'trial_type': spaces.Box( shape=(), low=0, high=1, dtype=np.uint32 ), 'sample_num': spaces.Box( shape=(), low=0, high=10 ** 10, dtype=np.uint32 ), 'first_row': spaces.Box( shape=(), low=0, high=10 ** 10, dtype=np.uint32 ), 'timestamp': spaces.Box( shape=(), low=0, high=np.finfo(np.float64).max, dtype=np.float64 ), } ) }, drawdown_call=5, target_call=19, portfolio_actions=portfolio_actions, skip_frame=skip_frame, state_ext_scale=state_ext_scale, # EURUSD state_int_scale=1.0, gamma=gamma, reward_scale=1.0, metadata={}, # Expert parameters: expert_config= { 'time_threshold': 5, # minimum peak estimation radius in number of environment steps 'pips_threshold': 5, # minimum peak estimation value in number of quota points 'pips_scale': 1e-4, # value of single quota point relative to price value 'kernel_size': 5, # gaussian_over_action tails size in number of env. steps 'kernel_stddev': 1, # gaussian_over_action standard deviation }, ) def __init__(self, **kwargs): super(GuidedStrategy_0_0, self).__init__(**kwargs) self.expert = Oracle(action_space=np.arange(len(self.p.portfolio_actions)), **self.p.expert_config) # self.expert = Oracle2(action_space=np.arange(len(self.p.portfolio_actions)), **self.p.expert_config) self.expert_actions = None self.current_expert_action = None
[docs] def nextstart(self): """ Overrides base method augmenting it with estimating expert actions before actual episode starts. """ # This value shows how much episode records we need to spend # to estimate first environment observation: self.inner_embedding = self.data.close.buflen() self.log.info('Inner time embedding: {}'.format(self.inner_embedding)) # Now when we know exact maximum possible episode length - # can extract relevant episode data and make expert predictions: data = self.datas[0].p.dataname.as_matrix()[self.inner_embedding:, :] # Note: need to form sort of environment 'custom candels' by taking min and max price values over every # skip_frame period; this is done inside Oracle class; # TODO: shift actions forward to eliminate one-point prediction lag? # expert_actions is a matrix representing discrete distribution over actions probabilities # of size [max_env_steps, action_space_size]: self.expert_actions = self.expert.fit(episode_data=data, resampling_factor=self.p.skip_frame)
def get_expert_state(self): self.current_expert_action = self.expert_actions[self.env_iteration] #print('Strat_iteration:', self.iteration) #print('Env_iteration:', self.env_iteration) return self.current_expert_action
# def get_state(self): # # Update inner state statistic and compose state: # self.update_broker_stat() # # self.state = { # 'external': self.get_external_state(), # 'internal': self.get_internal_state(), # 'datetime': self.get_datetime_state(), # 'expert': self.get_expert_state(), # 'metadata': self.get_metadata_state(), # } # # return self.state
[docs]class ExpertObserver(bt.observer.Observer): """ Keeps track of expert-advised actions. Single data_feed. """ lines = ('buy', 'sell', 'hold', 'close') plotinfo = dict(plot=True, subplot=True, plotname='Expert Actions', plotymargin=.8) plotlines = dict( buy=dict(marker='^', markersize=4.0, color='cyan', fillstyle='full'), sell=dict(marker='v', markersize=4.0, color='magenta', fillstyle='full'), hold=dict(marker='.', markersize=1.0, color='gray', fillstyle='full'), close=dict(marker='o', markersize=4.0, color='blue', fillstyle='full') ) def next(self): action = np.argmax(self._owner.current_expert_action) if action == 0: self.lines.hold[0] = 0 elif action == 1: self.lines.buy[0] = 1 elif action == 2: self.lines.sell[0] = -1 elif action == 3: self.lines.close[0] = 0