Source code for genrl.environments.atari_wrappers

import gym
import numpy as np
from gym.core import Wrapper


[docs]class NoopReset(Wrapper): """ Some Atari environments always reset to the same state. So we take \ a random number of some empty (noop) action to introduce some stochasticity. :param env: Atari environment :param max_noops: Maximum number of Noops to be taken :type env: Gym Environment :type max_noops: int """ def __init__(self, env: gym.Env, max_noops: int = 30): super(NoopReset, self).__init__(env) self.max_noops = max_noops self.noop_action = 0 assert env.unwrapped.get_action_meanings()[0] == "NOOP"
[docs] def reset(self) -> np.ndarray: """ Resets state of environment. Performs the noop action a \ random number of times to introduce stochasticity :returns: Initial state :rtype: NumPy array """ self.env.reset() noops = np.random.randint(1, self.max_noops + 1) for _ in range(noops): obs, _, done, _ = self.env.step(self.noop_action) if done: obs = self.env.reset() return obs
[docs] def step(self, action: np.ndarray) -> np.ndarray: """ Step through underlying Atari environment for given action :param action: Action taken by agent :type action: NumPy array :returns: Current state, reward(for frameskip number of actions), \ done, info """ return self.env.step(action)
[docs]class FireReset(Wrapper): """ Some Atari environments do not actually do anything until a \ specific action (the fire action) is taken, so we make it take the \ action before starting the training process :param env: Atari environment :type env: Gym Environment """ def __init__(self, env: gym.Env): super(FireReset, self).__init__(env)
[docs] def reset(self) -> np.ndarray: """ Resets state of environment. Performs the noop action a \ random number of times to introduce stochasticity :returns: Initial state :rtype: NumPy array """ observation = self.env.reset() action_meanings = self.env.unwrapped.get_action_meanings() if action_meanings[1] == "FIRE" and len(action_meanings) >= 3: self.env.step(1) observation, _, _, _ = self.env.step(2) return observation