Source code for btgym.algorithms.envs

# Original code is taken from OpenAI repository under MIT licence:
# https://github.com/openai/universe-starter-agent

import numpy as np
import cv2
import gym
from gym import spaces
from btgym import DictSpace, ActionDictSpace


def _process_frame42(frame):
    frame = frame[34:34+160, :160]
    # Resize by half, then down to 42x42 (essentially mipmapping). If
    # we resize directly we lose pixels that, when mapped to 42x42,
    # aren't close enough to the pixel boundary.
    frame = cv2.resize(frame, (80, 80))
    frame = cv2.resize(frame, (42, 42))
    frame = frame.mean(2)
    frame = frame.astype(np.float32)
    frame *= (1.0 / 255.0)
    frame = np.reshape(frame, [42, 42, 1])
    return frame


[docs]class AtariRescale42x42(gym.ObservationWrapper):
    """
    Gym wrapper, pipes Atari into BTgym algorithms, as later expect observations to be DictSpace.
    Makes Atari environment return state as dictionary with single key 'external' holding
    normalized in [0,1] grayscale 42x42 visual output.
    """
    # TODO: INPRoGRESS: dict observation space, include metadata etc.
    def __init__(self, env_id=None):
        """

        Args:
            env_id:     conventional Gym id.
        """
        assert "." not in env_id  # universe environments have dots in names.
        env = gym.make(env_id)
        super(AtariRescale42x42, self).__init__(env)
        self.observation_space = DictSpace(
            {'external': spaces.Box(0.0, 1.0, [42, 42, 1], dtype=np.float32)}
        )
        self.asset_names = ['atari_player']
        num_actions = self.action_space.n
        self.action_space = ActionDictSpace(
            base_actions=list(np.arange(num_actions)),
            assets=self.asset_names
        )

    def observation(self, observation):
        return {'external': _process_frame42(observation)}

    def get_initial_action(self):
        return {asset: 0 for asset in self.asset_names}

    def step(self, action):
        # TODO: fix it
        action = action[self.asset_names[0]]
        observation, reward, done, info = self.env.step(action)
        reward = np.asarray(reward)
        return self.observation(observation), reward, done, info