bandit

Provides Bandit class for bandit simulations.

`Bandit(arms, rng=None, seed=None)`

Multi-armed bandit with one or more arms.

This class wraps around a list of arms, each of which has a reward distribution. It provides an interface for interacting with the arms, such as playing a specific arm, querying for the optimal arm, and computing regret from a given choice.

Parameters:

Name	Type	Description	Default
`arms`	`list[Arm]`	A list of arms for the bandit.	required
`rng`	`Generator \| None`	A random number generator.	`None`
`seed`	`int \| None`	A seed for random number generation if `rng` is not provided.	`None`

Source code in mabby/bandit.py

def __init__(
    self, arms: list[Arm], rng: Generator | None = None, seed: int | None = None
):
    """Initializes a bandit with a given set of arms.

    Args:
        arms: A list of arms for the bandit.
        rng: A random number generator.
        seed: A seed for random number generation if ``rng`` is not provided.
    """
    self._arms = arms
    self._rng = rng if rng else np.random.default_rng(seed)

`means: list[float]` `property`

The means of the arms.

Returns:

Type	Description
`list[float]`	An array of the means of each arm.

`getitem(i)`

Returns an arm by index.

Parameters:

Name	Type	Description	Default
`i`	`int`	The index of the arm to get.	required

Returns:

Type	Description
`Arm`	The arm at the given index.

Source code in mabby/bandit.py

def __getitem__(self, i: int) -> Arm:
    """Returns an arm by index.

    Args:
        i: The index of the arm to get.

    Returns:
        The arm at the given index.
    """
    return self._arms[i]

`iter()`

Returns an iterator over the bandit's arms.

Source code in mabby/bandit.py

def __iter__(self) -> Iterable[Arm]:
    """Returns an iterator over the bandit's arms."""
    return iter(self._arms)

`len()`

Returns the number of arms.

Source code in mabby/bandit.py

def __len__(self) -> int:
    """Returns the number of arms."""
    return len(self._arms)

`repr()`

Returns a string representation of the bandit.

Source code in mabby/bandit.py

def __repr__(self) -> str:
    """Returns a string representation of the bandit."""
    return repr(self._arms)

`best_arm()`

Returns the index of the optimal arm.

The optimal arm is the arm with the greatest expected reward. If there are multiple arms with equal expected rewards, a random one is chosen.

Returns:

Type	Description
`int`	The index of the optimal arm.

Source code in mabby/bandit.py

def best_arm(self) -> int:
    """Returns the index of the optimal arm.

    The optimal arm is the arm with the greatest expected reward. If there are
    multiple arms with equal expected rewards, a random one is chosen.

    Returns:
        The index of the optimal arm.
    """
    return random_argmax(self.means, rng=self._rng)

`is_opt(choice)`

Returns the optimality of a given choice.

Parameters:

Name	Type	Description	Default
`choice`	`int`	The index of the chosen arm.	required

Returns:

Type	Description
`bool`	`True` if the arm has the greatest expected reward, `False` otherwise.

Source code in mabby/bandit.py

def is_opt(self, choice: int) -> bool:
    """Returns the optimality of a given choice.

    Args:
        choice: The index of the chosen arm.

    Returns:
        ``True`` if the arm has the greatest expected reward, ``False`` otherwise.
    """
    return np.max(self.means) == self._arms[choice].mean

`play(i)`

Plays an arm by index.

Parameters:

Name	Type	Description	Default
`i`	`int`	The index of the arm to play.	required

Returns:

Type	Description
`float`	The reward from playing the arm.

Source code in mabby/bandit.py

def play(self, i: int) -> float:
    """Plays an arm by index.

    Args:
        i: The index of the arm to play.

    Returns:
        The reward from playing the arm.
    """
    return self[i].play(self._rng)

`regret(choice)`

Returns the regret from a given choice.

The regret is computed as the difference between the expected reward from the optimal arm and the expected reward from the chosen arm.

Parameters:

Name	Type	Description	Default
`choice`	`int`	The index of the chosen arm.	required

Returns:

Type	Description
`float`	The computed regret value.

Source code in mabby/bandit.py

def regret(self, choice: int) -> float:
    """Returns the regret from a given choice.

    The regret is computed as the difference between the expected reward from the
    optimal arm and the expected reward from the chosen arm.

    Args:
        choice: The index of the chosen arm.

    Returns:
        The computed regret value.
    """
    return np.max(self.means) - self._arms[choice].mean

bandit

Bandit(arms, rng=None, seed=None)

means: list[float] property

__getitem__(i)

__iter__()

__len__()

__repr__()

best_arm()

is_opt(choice)

play(i)

regret(choice)

`Bandit(arms, rng=None, seed=None)`

`means: list[float]` `property`

`getitem(i)`

`iter()`

`len()`

`repr()`

`best_arm()`

`is_opt(choice)`

`play(i)`

`regret(choice)`