Source code for btgym.research.gps.loss

import tensorflow as tf


[docs]def guided_aac_loss_def_0_0(pi_actions, expert_actions, name='on_policy/aac', verbose=False, **kwargs): """ Cross-entropy imitation loss on expert actions. Args: pi_actions: tensor holding policy actions logits expert_actions: tensor holding expert actions probability distribution name: loss op name scope Returns: tensor holding estimated imitation loss; list of related tensorboard summaries. """ with tf.name_scope(name + '/guided_loss'): # Loss over expert action's distribution: neg_pi_log_prob = tf.nn.softmax_cross_entropy_with_logits_v2( logits=pi_actions, labels=tf.argmax(expert_actions, axis=-1) ) loss = tf.reduce_mean(neg_pi_log_prob) if verbose: summaries = [tf.summary.scalar('actions_ce', loss)] else: summaries = [] return loss, summaries
[docs]def guided_aac_loss_def_0_1(pi_actions, expert_actions, name='on_policy/aac', verbose=False, **kwargs): """ Cross-entropy imitation loss on {`buy`, `sell`} subset of expert actions. Args: pi_actions: tensor holding policy actions logits expert_actions: tensor holding expert actions probability distribution name: loss op name scope Returns: tensor holding estimated imitation loss; list of related tensorboard summaries. """ with tf.name_scope(name + '/guided_loss'): # Loss over expert buy/ sell: # Cross-entropy on subset?... neg_pi_log_prob = tf.nn.softmax_cross_entropy_with_logits_v2( logits=pi_actions[..., 1:3], labels=expert_actions[..., 1:3] ) loss = tf.reduce_mean(neg_pi_log_prob) if verbose: summaries = [tf.summary.scalar('actions_ce', loss)] else: summaries = [] return loss, summaries
[docs]def guided_aac_loss_def_0_3(pi_actions, expert_actions, name='on_policy/aac', verbose=False, **kwargs): """ MSE imitation loss on {`buy`, `sell`} subset of expert actions. Args: pi_actions: tensor holding policy actions logits expert_actions: tensor holding expert actions probability distribution name: loss op name scope Returns: tensor holding estimated imitation loss; list of related tensorboard summaries. """ with tf.name_scope(name + '/guided_loss'): if 'guided_lambda' in kwargs.keys(): guided_lambda = kwargs['guided_lambda'] else: guided_lambda = 1.0 # Loss over expert buy/ sell: loss = tf.losses.mean_squared_error( labels=expert_actions[..., 1:3], predictions=tf.nn.softmax(pi_actions)[..., 1:3], ) * guided_lambda if verbose: summaries = [tf.summary.scalar('actions_mse', loss)] else: summaries = [] return loss, summaries