Skip to content

Commit

Permalink
Merge pull request #1068 from minrk/cache-404
Browse files Browse the repository at this point in the history
cache 404 responses from GitHub API
  • Loading branch information
betatim authored Feb 25, 2020
2 parents bb441a7 + 00b0576 commit 239e939
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 2 deletions.
12 changes: 12 additions & 0 deletions binderhub/repoproviders.py
Original file line number Diff line number Diff line change
Expand Up @@ -597,6 +597,12 @@ class GitHubRepoProvider(RepoProvider):
# shared cache for resolved refs
cache = Cache(1024)

# separate cache with max age for 404 results
# 404s don't have ETags, so we want them to expire at some point
# to avoid caching a 404 forever since e.g. a missing repo or branch
# may be created later
cache_404 = Cache(1024, max_age=300)

hostname = Unicode('github.com',
config=True,
help="""The GitHub hostname to use
Expand Down Expand Up @@ -769,10 +775,16 @@ def get_resolved_ref(self):
etag = cached['etag']
self.log.debug("Cache hit for %s: %s", api_url, etag)
else:
cache_404 = self.cache_404.get(api_url)
if cache_404:
self.log.debug("Cache hit for 404 on %s", api_url)
return None
etag = None

resp = yield self.github_api_request(api_url, etag=etag)
if resp is None:
self.log.debug("Caching 404 on %s", api_url)
self.cache_404.set(api_url, True)
return None
if resp.code == 304:
self.log.info("Using cached ref for %s: %s", api_url, cached['sha'])
Expand Down
46 changes: 46 additions & 0 deletions binderhub/tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from unittest import mock

from binderhub import utils


Expand Down Expand Up @@ -57,3 +59,47 @@ def test_rendezvous_redistribution():
# the initial distribution of keys should be roughly the same
# We pick 30 because it is "about right"
assert abs(start_in["b1"] - start_in["b2"]) < 30


def test_cache():
cache = utils.Cache(max_size=2)
cache.set('a', 1)
cache.set('b', 2)
assert 'a' in cache
cache.set('c', 3)
assert 'a' not in cache
cache.set('b', 3)
assert 'b' in cache
cache.set('d', 4)
assert 'b' in cache
assert 'c' not in cache
assert len(cache) == 2


def test_cache_expiry():
cache = utils.Cache(2, max_age=10)
before_now = cache._now

def later():
return before_now() + 20

expired = mock.patch.object(cache, '_now', later)

cache.set('a', 1)
assert 'a' in cache
assert cache.get('a')
with expired:
assert not cache.get('a')
assert 'a' not in cache
assert 'a' not in cache._ages

cache.set('a', 1)
# no max age means no expiry
with expired, mock.patch.object(cache, 'max_age', 0):
assert cache.get('a')

# retrieving an item does *not* update the age
before_age = cache._ages['a']
with mock.patch.object(cache, '_now', lambda: before_now() + 1):
cache.get('a')
assert cache._ages['a'] == before_age
24 changes: 22 additions & 2 deletions binderhub/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Miscellaneous utilities"""
from collections import OrderedDict
from hashlib import blake2b
import time

from traitlets import Integer, TraitError

Expand Down Expand Up @@ -88,15 +89,28 @@ def validate(self, obj, value):
class Cache(OrderedDict):
"""Basic LRU Cache with get/set"""

def __init__(self, max_size=1024):
def __init__(self, max_size=1024, max_age=0):
self.max_size = max_size
self.max_age = max_age
self._ages = {}

def _now(self):
return time.perf_counter()

def _check_expired(self, key):
if not self.max_age:
return False
if self._ages[key] + self.max_age < self._now():
self.pop(key)
return True
return False

def get(self, key, default=None):
"""Get an item from the cache
same as dict.get
"""
if key in self:
if key in self and not self._check_expired(key):
self.move_to_end(key)
return super().get(key, default)

Expand All @@ -107,11 +121,17 @@ def set(self, key, value):
- if full, delete the oldest item
"""
self[key] = value
self._ages[key] = self._now()
self.move_to_end(key)
if len(self) > self.max_size:
first_key = next(iter(self))
self.pop(first_key)

def pop(self, key):
result = super().pop(key)
self._ages.pop(key)
return result


def url_path_join(*pieces):
"""Join components of url into a relative url.
Expand Down

0 comments on commit 239e939

Please sign in to comment.