Skip to content

Commit

Permalink
fix typos
Browse files Browse the repository at this point in the history
  • Loading branch information
ddbourgin committed Apr 14, 2020
1 parent 8dbe385 commit e6748dc
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 77 deletions.
122 changes: 61 additions & 61 deletions numpy_ml/bandits/bandits.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,29 @@ def __repr__(self):
params = ", ".join(["{}={}".format(k, v) for (k, v) in HP.items() if k != "id"])
return "{}({})".format(HP["id"], params)

@property
def hyperparameters(self):
"""A dictionary of the bandit hyperparameters"""
return {}

@abstractmethod
def oracle_payoff(self, context=None):
"""
Return the expected reward for an optimal agent.
Parameters
----------
context : :py:class:`ndarray <numpy.ndarray>` of shape `(D, K)` or None
The current context matrix for each of the bandit arms, if
applicable. Default is None.
Returns
-------
optimal_rwd : float
The expected reward under an optimal policy.
"""
pass

def pull(self, arm_id, context=None):
"""
"Pull" (i.e., sample from) a given arm's payoff distribution.
Expand All @@ -43,24 +66,6 @@ def pull(self, arm_id, context=None):
self.step += 1
return self._pull(arm_id, context)

@abstractmethod
def oracle_payoff(self, context=None):
"""
Return the expected reward for an optimal agent.
Parameters
----------
context : :py:class:`ndarray <numpy.ndarray>` of shape `(D, K)` or None
The current context matrix for each of the bandit arms, if
applicable. Default is None.
Returns
-------
optimal_rwd : float
The expected reward under an optimal policy.
"""
pass

def reset(self):
"""Reset the bandit step and action counters to zero."""
self.step = 0
Expand All @@ -69,11 +74,6 @@ def reset(self):
def _pull(self, arm_id):
pass

@property
def hyperparameters(self):
"""A dictionary of the bandit hyperparameters"""
return {}


class MultinomialBandit(Bandit):
def __init__(self, payoffs, payoff_probs):
Expand Down Expand Up @@ -114,11 +114,6 @@ def hyperparameters(self):
"payoff_probs": self.payoff_probs,
}

def _pull(self, arm_id, context):
payoffs = self.payoffs[arm_id]
probs = self.payoff_probs[arm_id]
return np.random.choice(payoffs, p=probs)

def oracle_payoff(self, context=None):
"""
Return the expected reward for an optimal agent.
Expand All @@ -135,6 +130,11 @@ def oracle_payoff(self, context=None):
"""
return self.best_ev

def _pull(self, arm_id, context):
payoffs = self.payoffs[arm_id]
probs = self.payoff_probs[arm_id]
return np.random.choice(payoffs, p=probs)


class BernoulliBandit(Bandit):
def __init__(self, payoff_probs):
Expand Down Expand Up @@ -168,9 +168,6 @@ def hyperparameters(self):
"payoff_probs": self.payoff_probs,
}

def _pull(self, arm_id, context):
return int(np.random.rand() <= self.payoff_probs[arm_id])

def oracle_payoff(self, context=None):
"""
Return the expected reward for an optimal agent.
Expand All @@ -187,6 +184,9 @@ def oracle_payoff(self, context=None):
"""
return self.best_ev

def _pull(self, arm_id, context):
return int(np.random.rand() <= self.payoff_probs[arm_id])


class GaussianBandit(Bandit):
def __init__(self, payoff_dists, payoff_probs):
Expand Down Expand Up @@ -286,15 +286,6 @@ def __init__(self, G, start_vertex, end_vertex):
placeholder = [None] * len(self.paths)
super().__init__(placeholder, placeholder)

def _calc_arm_evs(self):
I2V = self.G.get_vertex
evs = np.zeros(len(self.paths))
for p_ix, path in enumerate(self.paths):
for ix, v_i in enumerate(path[:-1]):
e = [e for e in self.adj_dict[v_i] if e.to == I2V(path[ix + 1])][0]
evs[p_ix] -= e.weight
return evs

@property
def hyperparameters(self):
"""A dictionary of the bandit hyperparameters"""
Expand All @@ -305,15 +296,6 @@ def hyperparameters(self):
"start_vertex": self.start_vertex,
}

def _pull(self, arm_id, context):
reward = 0
I2V = self.G.get_vertex
path = self.paths[arm_id]
for ix, v_i in enumerate(path[:-1]):
e = [e for e in self.adj_dict[v_i] if e.to == I2V(path[ix + 1])][0]
reward -= e.weight
return reward

def oracle_payoff(self, context=None):
"""
Return the expected reward for an optimal agent.
Expand All @@ -330,6 +312,24 @@ def oracle_payoff(self, context=None):
"""
return self.best_ev

def _calc_arm_evs(self):
I2V = self.G.get_vertex
evs = np.zeros(len(self.paths))
for p_ix, path in enumerate(self.paths):
for ix, v_i in enumerate(path[:-1]):
e = [e for e in self.adj_dict[v_i] if e.to == I2V(path[ix + 1])][0]
evs[p_ix] -= e.weight
return evs

def _pull(self, arm_id, context):
reward = 0
I2V = self.G.get_vertex
path = self.paths[arm_id]
for ix, v_i in enumerate(path[:-1]):
e = [e for e in self.adj_dict[v_i] if e.to == I2V(path[ix + 1])][0]
reward -= e.weight
return reward


class ContextualBernoulliBandit(Bandit):
def __init__(self, context_probs):
Expand Down Expand Up @@ -379,12 +379,6 @@ def get_context(self):
context[np.random.choice(D), :] = 1
return random_one_hot_matrix(1, D).ravel()

def _pull(self, arm_id, context):
D, K = self.context_probs.shape
arm_probs = context[:, arm_id] @ self.context_probs
arm_rwds = (np.random.rand(K) <= arm_probs).astype(int)
return arm_rwds[arm_id]

def oracle_payoff(self, context):
"""
Return the expected reward for an optimal agent.
Expand All @@ -402,6 +396,12 @@ def oracle_payoff(self, context):
"""
return context[:, 0] @ self.best_ev

def _pull(self, arm_id, context):
D, K = self.context_probs.shape
arm_probs = context[:, arm_id] @ self.context_probs
arm_rwds = (np.random.rand(K) <= arm_probs).astype(int)
return arm_rwds[arm_id]


class ContextualLinearBandit(Bandit):
def __init__(self, K, D, payoff_variance=1):
Expand Down Expand Up @@ -484,12 +484,6 @@ def get_context(self):
"""
return np.random.normal(size=(self.D, self.K))

def _pull(self, arm_id, context):
K, thetas = self.K, self.thetas
self._noise = np.random.normal(scale=self.payoff_variance, size=self.K)
self.arm_evs = np.array([context[:, k] @ thetas[:, k] for k in range(K)])
return (self.arm_evs + self._noise)[arm_id]

def oracle_payoff(self, context):
"""
Return the expected reward for an optimal agent.
Expand All @@ -507,3 +501,9 @@ def oracle_payoff(self, context):
"""
best_arm = np.argmax(self.arm_evs)
return self.arm_evs[best_arm]

def _pull(self, arm_id, context):
K, thetas = self.K, self.thetas
self._noise = np.random.normal(scale=self.payoff_variance, size=self.K)
self.arm_evs = np.array([context[:, k] @ thetas[:, k] for k in range(K)])
return (self.arm_evs + self._noise)[arm_id]
32 changes: 16 additions & 16 deletions numpy_ml/bandits/policies.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,6 @@ def parameters(self):
"""A dictionary containing the current policy parameters"""
pass

@abstractmethod
def _initialize_params(self, bandit):
"""
Initialize any policy-specific parameters that depend on information
from the bandit environment.
"""
pass

def act(self, bandit, context=None):
"""
Select an arm and sample from its payoff distribution.
Expand Down Expand Up @@ -68,17 +60,17 @@ def act(self, bandit, context=None):
self._update_params(arm_id, rwd, context)
return rwd, arm_id

def _pull_arm(self, bandit, arm_id, context):
"""Execute a bandit action and return the received reward."""
self.step += 1
return bandit.pull(arm_id, context)

def reset(self):
"""Reset the policy parameters and counters to their initial states."""
self.step = 0
self._reset_params()
self.is_initialized = False

def _pull_arm(self, bandit, arm_id, context):
"""Execute a bandit action and return the received reward."""
self.step += 1
return bandit.pull(arm_id, context)

@abstractmethod
def _select_arm(self, bandit, context):
"""Select an arm based on the current context"""
Expand All @@ -89,6 +81,14 @@ def _update_params(self, bandit, context):
"""Update the policy parameters after an interaction"""
pass

@abstractmethod
def _initialize_params(self, bandit):
"""
Initialize any policy-specific parameters that depend on information
from the bandit environment.
"""
pass

@abstractmethod
def _reset_params(self):
"""
Expand Down Expand Up @@ -267,7 +267,7 @@ def _update_params(self, arm_id, reward, context=None):
def _reset_params(self):
"""
Reset any model-specific parameters. This gets called within the
public `self.reset()` method.
public :method:`reset` method.
"""
self.ev_estimates = {}
self.pull_counts = defaultdict(lambda: 0)
Expand All @@ -282,7 +282,7 @@ def __init__(self, alpha=1, beta=1):
Notes
-----
The policy assumes independent Beta priors on the Bernoulli arm payoff
probabilities, :math:`\\theta`:
probabilities, :math:`\theta`:
.. math::
Expand Down Expand Up @@ -414,7 +414,7 @@ def __init__(self, alpha=1):
Notes
-----
LinUCB is only defined for :class:`ContextualLinearBandit <numpy_ml.bandits.bandits.ContextualLinearBandit>` environments.
LinUCB is only defined for :class:`ContextualLinearBandit <numpy_ml.bandits.ContextualLinearBandit>` environments.
References
----------
Expand Down

0 comments on commit e6748dc

Please sign in to comment.