Skip to content

Commit

Permalink
Merge pull request #2698 from manics/aws-curvenote-binder
Browse files Browse the repository at this point in the history
Deploy binderHub and other K8s apps on AWS curvenote
  • Loading branch information
manics authored Nov 1, 2023
2 parents 4c5ea09 + 4042775 commit fed3d1d
Show file tree
Hide file tree
Showing 17 changed files with 1,157 additions and 23 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/cd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ jobs:
run: |
curl -sf https://raw.githubusercontent.com/helm/helm/HEAD/scripts/get-helm-3 | DESIRED_VERSION=${HELM_VERSION} bash
helm dependency update ./mybinder
helm dependency update ./mybinder-kube-system
# Action Repo: https://github.com/sliteteam/github-action-git-crypt-unlock
- name: "Stage 2: Unlock git-crypt secrets"
Expand Down Expand Up @@ -290,6 +291,7 @@ jobs:
run: |
curl -sf https://raw.githubusercontent.com/helm/helm/HEAD/scripts/get-helm-3 | DESIRED_VERSION=${HELM_VERSION} bash
helm dependency update ./mybinder
helm dependency update ./mybinder-kube-system
- name: "Stage 2: Unlock git-crypt secrets"
uses: sliteteam/github-action-git-crypt-unlock@8b1fa3ccc81e322c5c45fbab261eee46513fd3f8
Expand Down
6 changes: 3 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ __pycache__
config/common/datacenter-*.yaml
secrets/banned_hosts.txt
secrets/config/common/bans.yaml
mybinder/charts
mybinder/requirements.lock
mybinder/Chart.lock
mybinder*/charts
mybinder*/requirements.lock
mybinder*/Chart.lock

.ipynb_checkpoints

Expand Down
1 change: 1 addition & 0 deletions .prettierignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
mybinder/templates/
terraform/aws/curvenote/cni/
1 change: 1 addition & 0 deletions chartpress.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ charts:
valuesPath: minesweeper.image
tc-init:
valuesPath: binderhub.jupyterhub.singleuser.initContainers.0.image
- name: mybinder-kube-system
31 changes: 31 additions & 0 deletions config-kube-system/curvenote.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Install the more modern load-balancer controller:
# https://docs.aws.amazon.com/eks/latest/userguide/aws-load-balancer-controller.html
aws-load-balancer-controller:
enabled: true
clusterName: binderhub
clusterSecretsPermissions:
allowAllSecrets: true
enableShield: false
enableWaf: false
enableWafv2: false
logLevel: debug
serviceAccount:
# Must match the IRSA service account name
name: aws-load-balancer-controller
annotations:
eks.amazonaws.com/role-arn: "arn:aws:iam::166088433508:role/binderhub-IRSA-aws-load-balancer-controller"

aws-ebs-csi-driver:
enabled: true
controller:
serviceAccount:
# Must match the IRSA service account name
name: ebs-csi-controller-sa
annotations:
eks.amazonaws.com/role-arn: "arn:aws:iam::166088433508:role/binderhub-IRSA-ebs-csi-controller-sa"
storageClasses:
- name: ebs-sc
# Note this results in EKS having two default StorageClasses, so to be sure
# always specify the storage class in the PVC.
annotations:
storageclass.kubernetes.io/is-default-class: "true"
324 changes: 324 additions & 0 deletions config/curvenote.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,324 @@
projectName: curvenote

binderhub:
config:
BinderHub:
hub_url: https://hub.binder.curvenote.dev
hub_url_local: http://proxy-public
badge_base_url: https://mybinder.org
sticky_builds: true
image_prefix: 166088433508.dkr.ecr.us-east-2.amazonaws.com/binderhub/
# log_level: DEBUG
# TODO: we should have CPU requests, too
# use this to limit the number of builds per node
# complicated: dind memory request + KubernetesBuildExecutor.memory_request * builds_per_node ~= node memory
KubernetesBuildExecutor:
memory_request: "2G"

LaunchQuota:
total_quota: 10

ExternalRegistryHelper:
service_url: http://curvenote-binderhub-container-registry-helper:8080

extraEnv:
BINDERHUB_CONTAINER_REGISTRY_HELPER_AUTH_TOKEN:
valueFrom:
secretKeyRef:
name: curvenote-binderhub-container-registry-helper
key: auth_token

registry:
url: 166088433508.dkr.ecr.us-east-2.amazonaws.com
username: ""
password: ""

# extraVolumes:
# - name: secrets
# secret:
# secretName: events-archiver-secrets
# extraVolumeMounts:
# - name: secrets
# mountPath: /secrets
# readOnly: true
# extraEnv:
# GOOGLE_APPLICATION_CREDENTIALS: /secrets/service-account.json

extraConfig:
01-eventlog: |
# Disabled until GOOGLE_APPLICATION_CREDENTIALS secret is available
# and secrets/events-archiver/curvenote.json is created
10-external-registry-helper: |
import json
from os import getenv
from tornado import httpclient
from traitlets import Unicode
from binderhub.registry import DockerRegistry
class ExternalRegistryHelper(DockerRegistry):
service_url = Unicode(
"http://binderhub-container-registry-helper:8080",
allow_none=False,
help="The URL of the registry helper micro-service.",
config=True,
)
auth_token = Unicode(
getenv("BINDERHUB_CONTAINER_REGISTRY_HELPER_AUTH_TOKEN"),
help="The auth token to use when accessing the registry helper micro-service.",
config=True,
)
async def _request(self, endpoint, **kwargs):
client = httpclient.AsyncHTTPClient()
repo_url = f"{self.service_url}{endpoint}"
headers = {"Authorization": f"Bearer {self.auth_token}"}
repo = await client.fetch(repo_url, headers=headers, **kwargs)
return json.loads(repo.body.decode("utf-8"))
async def _get_image(self, image, tag):
repo_url = f"/image/{image}:{tag}"
self.log.debug(f"Checking whether image exists: {repo_url}")
try:
image_json = await self._request(repo_url)
return image_json
except httpclient.HTTPError as e:
if e.code == 404:
return None
raise
async def get_image_manifest(self, image, tag):
"""
Checks whether the image exists in the registry.
If the container repository doesn't exist create the repository.
The container repository name may not be the same as the BinderHub image name.
E.g. Oracle Container Registry (OCIR) has the form:
OCIR_NAMESPACE/OCIR_REPOSITORY_NAME:TAG
These extra components are handled automatically by the registry helper
so BinderHub repository names such as OCIR_NAMESPACE/OCIR_REPOSITORY_NAME
can be used directly, it is not necessary to remove the extra components.
Returns the image manifest if the image exists, otherwise None
"""
repo_url = f"/repo/{image}"
self.log.debug(f"Checking whether repository exists: {repo_url}")
try:
repo_json = await self._request(repo_url)
except httpclient.HTTPError as e:
if e.code == 404:
repo_json = None
else:
raise
if repo_json:
return await self._get_image(image, tag)
else:
self.log.debug(f"Creating repository: {repo_url}")
await self._request(repo_url, method="POST", body="")
return None
async def get_credentials(self, image, tag):
"""
Get the registry credentials for the given image and tag if supported
by the remote helper, otherwise returns None
Returns a dictionary of login fields.
"""
token_url = f"/token/{image}:{tag}"
self.log.debug(f"Getting registry token: {token_url}")
token_json = None
try:
token_json = await self._request(token_url, method="POST", body="")
except httpclient.HTTPError as e:
if e.code == 404:
return None
raise
self.log.debug(f"Token: {*token_json.keys(),}")
token = dict(
(k, v)
for (k, v) in token_json.items()
if k in ["username", "password", "registry"]
)
return token
c.BinderHub.registry_class = ExternalRegistryHelper
ingress:
hosts:
- binder.curvenote.dev

jupyterhub:
hub:
db:
pvc:
storageClassName: ebs-sc
config:
KubeSpawner:
extra_pod_config:
enableServiceLinks: false
image_pull_policy: Always
extraPodSpec:
priorityClassName: binderhub-core
networkPolicy:
ingress:
# AWS VPC CNI only works if the name of the service port name is the same as
# the name of the pod port and the port number is the same
# https://docs.aws.amazon.com/eks/latest/userguide/cni-network-policy.html#cni-network-policy-considerations
- from:
- podSelector:
matchLabels:
hub.jupyter.org/network-access-hub: "true"
# For unknown reasons the hub <-> notebook traffic is partially blocked if
# this is included:
# ports:
# # service/hub port name is "hub"
# # pod/hub port name is "http"
# - port: 8081
# protocol: TCP

singleuser:
networkPolicy:
ingress:
# AWS VPC CNI only works if the name of the service port name is the same as
# the name of the pod port and the port number is the same
# https://docs.aws.amazon.com/eks/latest/userguide/cni-network-policy.html#cni-network-policy-considerations
- from:
- podSelector:
matchLabels:
hub.jupyter.org/network-access-singleuser: "true"
ports:
# proxy/pod port name is "notebook-port"
# I've no idea why that doesn't work
- port: 8888
protocol: TCP

proxy:
chp:
extraPodSpec:
priorityClassName: binderhub-core
networkPolicy:
ingress:
# AWS VPC CNI only works if the name of the service port name is the same as
# the name of the pod port and the port number is the same
# https://docs.aws.amazon.com/eks/latest/userguide/cni-network-policy.html#cni-network-policy-considerations
- from:
- podSelector:
matchLabels:
hub.jupyter.org/network-access-proxy-api: "true"
ports:
# service/proxy-api port doesn't have a name
# proxy/pod port name is "api"
- port: 8001
protocol: TCP
- from:
ports:
# service/proxy-public port is 80
# proxy/pod port is 8000
- port: 8000
protocol: TCP
- port: 80
protocol: TCP

ingress:
hosts:
- hub.binder.curvenote.dev
tls:
- secretName: kubelego-tls-hub
hosts:
- hub.binder.curvenote.dev
scheduling:
userPlaceholder:
enabled: false
userScheduler:
enabled: false
cull:
# maxAge: 1 hour since we're just testing
maxAge: 3600

imageCleaner:
enabled: true
# Use 40GB as upper limit, size is given in bytes
imageGCThresholdHigh: 40e9
imageGCThresholdLow: 30e9
imageGCThresholdType: "absolute"

cryptnono:
enabled: true

grafana:
enabled: false
ingress:
hosts:
# - grafana.binder.curvenote.dev
tls:
- hosts:
# - grafana.binder.curvenote.dev
secretName: kubelego-tls-grafana
datasources:
datasources.yaml:
apiVersion: 1
datasources:
- name: prometheus
orgId: 1
type: prometheus
url: https://prometheus.binder.curvenote.dev
access: direct
isDefault: true
editable: false

prometheus:
enabled: true
server:
persistentVolume:
size: 50Gi
retention: 30d
ingress:
hosts:
- prometheus.binder.curvenote.dev
tls:
- hosts:
- prometheus.binder.curvenote.dev
secretName: kubelego-tls-prometheus

ingress-nginx:
controller:
service:
annotations:
service.beta.kubernetes.io/aws-load-balancer-scheme: "internet-facing"

static:
ingress:
hosts:
- static.binder.curvenote.dev

analyticsPublisher:
enabled: false

minesweeper:
enabled: true

priorityClasses:
binderhub-core: 10000

binderhub-container-registry-helper:
enabled: true
# auth_token: Autogenerated
replicaCount: 2
serviceAccount:
name: binderhub-container-registry-helper
annotations:
eks.amazonaws.com/role-arn: "arn:aws:iam::166088433508:role/binderhub-IRSA-aws-binderhub-ecr"

awsEcrRegistryCleaner:
enabled: true
serviceAccount:
annotations:
eks.amazonaws.com/role-arn: "arn:aws:iam::166088433508:role/binderhub-IRSA-aws-binderhub-ecr-registry-cleaner"
Loading

0 comments on commit fed3d1d

Please sign in to comment.