Skip to content

Commit

Permalink
miscellaneous documentation formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
ddbourgin committed Apr 6, 2020
1 parent ada1788 commit 24c8c76
Showing 1 changed file with 28 additions and 17 deletions.
45 changes: 28 additions & 17 deletions numpy_ml/bandits/policies.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,12 @@ def __repr__(self):

@property
def hyperparameters(self):
"""A dictionary containing the policy hyperparameters"""
return {}

@property
def parameters(self):
"""A dictionary containing the current policy parameters"""
return {}

def act(self, bandit):
Expand All @@ -32,15 +34,15 @@ def act(self, bandit):
Parameters
----------
bandit : :class:`bandit.Bandit` instance
The multi-arm bandit to act upon
bandit : :class:`Bandit <numpy_ml.bandits.bandits.Bandit>` instance
The multi-armed bandit to act upon
Returns
-------
rwd : float
The reward received after pulling `arm_id`
The reward received after pulling ``arm_id``.
arm_id : int
The arm that was pulled to generate `rwd`
The arm that was pulled to generate ``rwd``.
"""
if not self.is_initialized:
self.pull_counts = {i: 0 for i in range(bandit.n_arms)}
Expand Down Expand Up @@ -147,15 +149,15 @@ def __init__(self, C=1, ev_prior=0.5):
.. math::
UCB(a, t) = EV_t(a) + C \sqrt{\\frac{2 \log t}{N_t(a)}}
\\text{UCB}(a, t) = \\text{EV}_t(a) + C \sqrt{\\frac{2 \log t}{N_t(a)}}
where `UCB(a, t)` is the upper confidence bound on the expected value
of arm `a` at time `t`, :math:`EV_t(a)` is the average of the rewards
recieved so far from pulling arm `a`, `C` is a parameter controlling
the confidence upper bound of the estimate for `UCB(a, t)` (for
logarithmic regret bounds, `C` must equal 1), and `N_t(a)` is the
number of times arm `a` has been pulled during the previous `t - 1`
timesteps.
where :math:`\\text{UCB}(a, t)` is the upper confidence bound on the
expected value of arm `a` at time `t`, :math:`\\text{EV}_t(a)` is the
average of the rewards recieved so far from pulling arm `a`, `C` is a
parameter controlling the confidence upper bound of the estimate for
:math:`\\text{UCB}(a, t)` (for logarithmic regret bounds, `C` must
equal 1), and :math:`N_t(a)` is the number of times arm `a` has been
pulled during the previous `t - 1` timesteps.
References
----------
Expand Down Expand Up @@ -209,26 +211,35 @@ def _reset_params(self):
class ThompsonSamplingBetaBinomial(BanditPolicyBase):
def __init__(self, alpha=1, beta=1):
"""
A conjugate Thompson sampling policy for multi-arm bandits with
A conjugate Thompson sampling [1]_ [2]_ policy for multi-arm bandits with
Bernoulli likelihoods.
Notes
-----
The policy assumes independent Beta priors on the arm payoff
probabilities, :math:`theta`:
probabilities, :math:`\\theta`:
..math::
.. math::
\\theta_k \sim \\text{Beta}(\\alpha_k, \\beta_k)
where :math:`k \in 1,\ldots,K` indexes arms in the MAB and
where :math:`k \in \{1,\ldots,K \}` indexes arms in the MAB and
:math:`\\theta_k` is the parameter of the Bernoulli likelihood
for arm `k`. The sampler proceeds by selecting actions in proportion to
the posterior probability that they are optimal. Thanks to the
conjugacy between the Beta prior and Bernoulli likelihood the posterior
for each arm is also Beta-distributed and can be sampled from
efficiently.
References
----------
.. [1] Thompson, W. (1933). On the likelihood that one unknown
probability exceeds another in view of the evidence of two samples.
*Biometrika, 25(3/4)*, 285-294.
.. [2] Chapelle, O., & Li, L. (2011). An empirical evaluation of
Thompson sampling. *Advances in Neural Information Processing
Systems, 24*, 2249-2257.
Parameters
----------
alpha : float or list of length `K`
Expand Down Expand Up @@ -261,7 +272,7 @@ def hyperparameters(self):

def _initialize_params(self, bandit):
bhp = bandit.hyperparameters
assert bhp["id"] == "MultiArmedBanditBernoulliPayoff"
assert bhp["id"] == "MABBernoulliPayoff"

# initialize the model prior
if isinstance(self.alpha, numbers.Number):
Expand Down

0 comments on commit 24c8c76

Please sign in to comment.