Source code for genrl.environments.atari_wrappers

import gym
import numpy as np
from gym.core import Wrapper


[docs]class NoopReset(Wrapper):
    """
    Some Atari environments always reset to the same state. So we take \
a random number of some empty (noop) action to introduce some stochasticity.

    :param env: Atari environment
    :param max_noops: Maximum number of Noops to be taken
    :type env: Gym Environment
    :type max_noops: int
    """

    def __init__(self, env: gym.Env, max_noops: int = 30):
        super(NoopReset, self).__init__(env)
        self.max_noops = max_noops
        self.noop_action = 0
        assert env.unwrapped.get_action_meanings()[0] == "NOOP"

[docs]    def reset(self) -> np.ndarray:
        """
        Resets state of environment. Performs the noop action a \
random number of times to introduce stochasticity

        :returns: Initial state
        :rtype: NumPy array
        """
        self.env.reset()

        noops = np.random.randint(1, self.max_noops + 1)
        for _ in range(noops):
            obs, _, done, _ = self.env.step(self.noop_action)
            if done:
                obs = self.env.reset()
        return obs

[docs]    def step(self, action: np.ndarray) -> np.ndarray:
        """
        Step through underlying Atari environment for given action

        :param action: Action taken by agent
        :type action: NumPy array
        :returns: Current state, reward(for frameskip number of actions), \
done, info
        """
        return self.env.step(action)


[docs]class FireReset(Wrapper):
    """
    Some Atari environments do not actually do anything until a \
specific action (the fire action) is taken, so we make it take the \
action before starting the training process

    :param env: Atari environment
    :type env: Gym Environment
    """

    def __init__(self, env: gym.Env):
        super(FireReset, self).__init__(env)

[docs]    def reset(self) -> np.ndarray:
        """
        Resets state of environment. Performs the noop action a \
random number of times to introduce stochasticity

        :returns: Initial state
        :rtype: NumPy array
        """
        observation = self.env.reset()

        action_meanings = self.env.unwrapped.get_action_meanings()

        if action_meanings[1] == "FIRE" and len(action_meanings) >= 3:
            self.env.step(1)
            observation, _, _, _ = self.env.step(2)

        return observation