Source code for genrl.agents.bandits.contextual.fixed

from typing import List

import numpy as np
import torch

from genrl.agents.bandits.contextual.base import DCBAgent
from genrl.utils.data_bandits.base import DataBasedBandit


[docs]class FixedAgent(DCBAgent):
    def __init__(
        self, bandit: DataBasedBandit, p: List[float] = None, device: str = "cpu"
    ):
        """A fixed policy agent for deep contextual bandits.

        Args:
            bandit (DataBasedBandit): Bandit to solve.
            p (List[float], optional): List of probabilities for each action.
                Defaults to None which implies action is sampled uniformly.
            device (str): Device to use for tensor operations.
                "cpu" for cpu or "cuda" for cuda. Defaults to "cpu".

        Raises:
            ValueError: Raised if length of given probabilities is not
                equal to the number of actions available in given bandit.
        """
        super(FixedAgent, self).__init__(bandit, device)
        if p is None:
            p = [1 / self.n_actions for _ in range(self.n_actions)]
        elif len(p) != self.n_actions:
            raise ValueError(f"p should be of length {self.n_actions}")
        self.p = p
        self.t = 0

[docs]    def select_action(self, context: torch.Tensor) -> int:
        """Select an action based on fixed probabilities.

        Args:
            context (torch.Tensor): The context vector to select action for.
                In this agent, context vector is not considered.

        Returns:
            int: The action to take.
        """
        self.t += 1
        return np.random.choice(range(self.n_actions), p=self.p)

[docs]    def update_db(self, *args, **kwargs):
        pass

[docs]    def update_params(self, *args, **kwargs):
        pass