Source code for neurogym.envs.bandit

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Multi-arm Bandit task
TODO: add the actual papers
"""
import numpy as np

import neurogym as ngym
from neurogym import spaces


[docs] class Bandit(ngym.TrialEnv): """Multi-arm bandit task. On each trial, the agent is presented with multiple choices. Each option produces a reward of a certain magnitude given a certain probability. Args: n: int, the number of choices (arms) p: tuple of length n, describes the probability of each arm leading to reward rewards: tuple of length n, describe the reward magnitude of each option when rewarded """ metadata = { 'paper_link': 'https://www.nature.com/articles/s41593-018-0147-8', 'paper_name': 'Prefrontal cortex as a meta-reinforcement learning' + ' system', 'tags': ['n-alternative'] } def __init__(self, dt=100, n=2, p=(.5, .5), rewards=None, timing=None): super().__init__(dt=dt) if timing is not None: print('Warning: Bandit task does not require timing variable.') if rewards: self.rewards = rewards else: self.rewards = np.ones(n) # 1 for every arm self.n = n self.p = np.array(p) # Reward probabilities self.observation_space = spaces.Box(-np.inf, np.inf, shape=(1,), dtype=np.float32) self.action_space = spaces.Discrete(n) def _new_trial(self, **kwargs): trial = {'p': self.p, 'rewards': self.rewards} trial.update(kwargs) self.ob = np.zeros((1, self.observation_space.shape[0])) return trial def _step(self, action): trial = self.trial ob = self.ob[0] reward = (self.rng.random() < trial['p'][action]) * trial['rewards'][action] info = {'new_trial': True} return ob, reward, False, info