Source code for genrl.agents.bandits.multiarmed.gaussian_mab

from typing import Tuple

import numpy as np

from genrl.core.bandit import MultiArmedBandit


[docs]class GaussianMAB(MultiArmedBandit): """ Contextual Bandit with categorial context and gaussian reward distribution :param bandits: Number of bandits :param arms: Number of arms in each bandit :param reward_means: Mean of gaussian distribution for each reward :type bandits: int :type arms: int :type reward_means: numpy.ndarray """ def __init__( self, bandits: int = 10, arms: int = 5, reward_means: np.ndarray = None, context_type: str = "tensor", ): super(GaussianMAB, self).__init__(bandits, arms, context_type) if reward_means is not None: self.reward_means = reward_means else: self.reward_means = np.random.random(size=(bandits, arms)) def _compute_reward(self, action: int) -> Tuple[float, float]: """ Takes an action in the bandit and returns the sampled reward The reward is sampled from a gaussian distribution :param action: The action to take :type action: int :returns: Reward sampled for the action taken and maximum reward :rtype: tuples of int """ reward_mean = self.reward_means[self.curr_bandit, action] reward = np.random.normal(reward_mean) return reward, max(self.reward_means[self.curr_bandit])