Source code for btgym.research.gps.policy

import tensorflow as tf
from btgym.algorithms.policy.stacked_lstm import AacStackedRL2Policy
from btgym.algorithms.nn.layers import noisy_linear


[docs]class GuidedPolicy_0_0(AacStackedRL2Policy): """ Guided policy: simple configuration wrapper around Stacked LSTM architecture. """ def __init__( self, conv_2d_layer_config=( (32, (3, 1), (2, 1)), (32, (3, 1), (2, 1)), (64, (3, 1), (2, 1)), (64, (3, 1), (2, 1)) ), lstm_class_ref=tf.contrib.rnn.LayerNormBasicLSTMCell, lstm_layers=(256, 256), lstm_2_init_period=50, linear_layer_ref=noisy_linear, **kwargs ): super(GuidedPolicy_0_0, self).__init__( conv_2d_layer_config=conv_2d_layer_config, lstm_class_ref=lstm_class_ref, lstm_layers=lstm_layers, lstm_2_init_period=lstm_2_init_period, linear_layer_ref=linear_layer_ref, **kwargs ) self.expert_actions = self.on_state_in['expert']