Source code for genrl.utils.models

from typing import Tuple

import numpy as np


[docs]class TabularModel:
    """
    Sample-based tabular model class for deterministic, discrete environments

    :param s_dim: environment state dimension
    :param a_dim: environment action dimension
    :type s_dim: int
    :type a_dim: int
    """

    def __init__(self, s_dim: int, a_dim: int):
        self.s_dim = s_dim
        self.a_dim = a_dim

        self.s_model = np.zeros((s_dim, a_dim), dtype=np.uint8)
        self.r_model = np.zeros((s_dim, a_dim))

[docs]    def add(
        self,
        state: np.ndarray,
        action: np.ndarray,
        reward: float,
        next_state: np.ndarray,
    ) -> None:
        """
        add transition to model
        :param state: state
        :param action: action
        :param reward: reward
        :param next_state: next state
        :type state: float array
        :type action: int
        :type reward: int
        :type next_state: float array
        """
        self.s_model[state, action] = next_state
        self.r_model[state, action] = reward

[docs]    def sample(self) -> Tuple:
        """
        sample state action pair from model

        :returns: state and action
        :rtype: int, float, ... ; int, float, ...
        """
        # select random visited state
        state = np.random.choice(np.where(np.sum(self.s_model, axis=1) > 0)[0])
        # random action in that state
        action = np.random.choice(np.where(self.s_model[state] > 0)[0])
        return state, action

[docs]    def step(self, state: np.ndarray, action: np.ndarray) -> Tuple:
        """
        return consequence of action at state

        :returns: reward and next state
        :rtype: int; int, float, ...
        """
        reward = self.r_model[state, action]
        next_state = self.s_model[state, action]
        return reward, next_state

[docs]    def is_empty(self) -> bool:
        """
        Check if the model has been updated or not

        :returns: True if model not updated yet
        :rtype: bool
        """
        return not (np.any(self.s_model) or np.any(self.r_model))


model_registry = {"tabular": TabularModel}


[docs]def get_model_from_name(name_: str):
    """
    get model object from name

    :param name_: name of the model ['tabular']
    :type name_: str
    :returns: the model
    """
    if name_ in model_registry:
        return model_registry[name_]
    return NotImplementedError