Skip to content

Commit

Permalink
Merge pull request #1778 from consideRatio/kube-scheduler-rbac
Browse files Browse the repository at this point in the history
Updates to user-scheduler's coupling to the kube-scheduler binary
  • Loading branch information
consideRatio authored Sep 21, 2020
2 parents 436966b + dda741a commit 3d6e757
Show file tree
Hide file tree
Showing 6 changed files with 235 additions and 69 deletions.
3 changes: 0 additions & 3 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,6 @@ docker images to be pushed to a dedicated registry before they can be accessed
by the pods in the Kubernetes cluster, until [this
issue](https://github.com/rancher/k3d/issues/113) is resolved.

For this setup to work, make `registry.local` point to `127.0.0.1` (localhost)
by adding an entry in `/etc/hosts` or its equivalent in Windows.

__Install__

```shell
Expand Down
12 changes: 0 additions & 12 deletions jupyterhub/templates/scheduling/user-scheduler/_helpers.tpl

This file was deleted.

22 changes: 22 additions & 0 deletions jupyterhub/templates/scheduling/user-scheduler/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,28 @@ metadata:
labels:
{{- include "jupyterhub.labels" . | nindent 4 }}
data:
# ref: https://kubernetes.io/docs/reference/scheduling/config/
config.yaml: |
apiVersion: kubescheduler.config.k8s.io/v1beta1
kind: KubeSchedulerConfiguration
leaderElection:
resourceLock: endpoints
resourceName: user-scheduler-lock
resourceNamespace: {{ .Release.Namespace }}
profiles:
- schedulerName: {{ .Release.Name }}-user-scheduler
plugins:
{{- if .Values.scheduling.userScheduler.plugins }}
{{- .Values.scheduling.userScheduler.plugins | toYaml | trimSuffix "\n" | nindent 10 }}
{{- else }}
score:
disabled:
- name: NodeResourcesLeastAllocated
- name: NodeResourcesBalancedAllocation
enabled:
- name: NodeResourcesMostAllocated
{{- end }}
{{- $defaultPolicy := .Files.Get "files/userscheduler-defaultpolicy.yaml" | fromYaml }}
policy.cfg: {{ .Values.scheduling.userScheduler.policy | default $defaultPolicy | toJson | quote }}
{{- end }}
36 changes: 29 additions & 7 deletions jupyterhub/templates/scheduling/user-scheduler/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,8 @@ spec:
template:
metadata:
labels:
{{- /* Changes here will cause the Deployment to restart the pods. */}}
{{- include "jupyterhub.matchLabels" . | nindent 8 }}
annotations:
# This lets us autorestart when the configmap changes!
checksum/config-map: {{ include (print $.Template.BasePath "/scheduling/user-scheduler/configmap.yaml") . | sha256sum }}
spec:
{{- if .Values.rbac.enabled }}
Expand All @@ -27,18 +25,42 @@ spec:
{{- end }}
nodeSelector: {{ toJson .Values.scheduling.userScheduler.nodeSelector }}
{{- include "jupyterhub.coreAffinity" . | nindent 6 }}
volumes:
- name: config
configMap:
name: user-scheduler
containers:
- name: user-scheduler
image: {{ include "jupyterhub.scheduler.image" . }}
# NOTE: When the kube-scheduler 1.17+ binaries fail to find CSINode
# resource in the cluster, they won't start scheduling. Due to
# this, we fallback to the latest functional version with its
# legacy configuration format. This fallback can be removed when
# we assume k8s 1.17 where CSINode is generally available.
{{- if .Capabilities.APIVersions.Has "storage.k8s.io/v1/CSINode" }}
image: {{ .Values.scheduling.userScheduler.image.name }}:{{ .Values.scheduling.userScheduler.image.tag }}
{{- else }}
image: {{ .Values.scheduling.userScheduler.image.name }}:v1.16.15
{{- end }}
command:
- /usr/local/bin/kube-scheduler
# NOTE: --leader-elect-... (new) and --lock-object-... (deprecated)
# flags are silently ignored in favor of whats defined in the
# passed KubeSchedulerConfiguration whenever --config is
# passed.
#
# ref: https://kubernetes.io/docs/reference/command-line-tools-reference/kube-scheduler/
{{- if .Capabilities.APIVersions.Has "storage.k8s.io/v1/CSINode" }}
- --config=/etc/user-scheduler/config.yaml
{{- else }}
- --scheduler-name={{ .Release.Name }}-user-scheduler
- --policy-configmap=user-scheduler
- --policy-configmap-namespace={{ .Release.Namespace }}
- --lock-object-name=user-scheduler
- --policy-config-file=/etc/user-scheduler/policy.cfg
- --lock-object-name=user-scheduler-lock
- --lock-object-namespace={{ .Release.Namespace }}
- --leader-elect-resource-lock=configmaps
{{- end }}
- --v={{ .Values.scheduling.userScheduler.logLevel | default 4 }}
volumeMounts:
- mountPath: /etc/user-scheduler
name: config
livenessProbe:
httpGet:
path: /healthz
Expand Down
217 changes: 181 additions & 36 deletions jupyterhub/templates/scheduling/user-scheduler/rbac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,58 +7,203 @@ metadata:
labels:
{{- include "jupyterhub.labels" . | nindent 4 }}
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: {{ .Release.Name }}-user-scheduler-base
labels:
{{- $_ := merge (dict "componentSuffix" "-base") . }}
{{- include "jupyterhub.labels" $_ | nindent 4 }}
subjects:
- kind: ServiceAccount
name: user-scheduler
namespace: {{ .Release.Namespace }}
roleRef:
kind: ClusterRole
name: system:kube-scheduler
apiGroup: rbac.authorization.k8s.io
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: {{ .Release.Name }}-user-scheduler-complementary
name: {{ .Release.Name }}-user-scheduler
labels:
{{- $_ := merge (dict "componentSuffix" "-complementary") . }}
{{- include "jupyterhub.labels" $_ | nindent 4 }}
{{- include "jupyterhub.labels" . | nindent 4 }}
rules:
# Support leader elections
- apiGroups: [""]
resourceNames: ["user-scheduler"]
resources: ["configmaps"]
verbs: ["get", "update"]
# Workaround for missing permission in system:kube-scheduler as of k8s 1.10.4
- apiGroups: ["storage.k8s.io"]
resources: ["storageclasses"]
verbs: ["get", "list", "watch"]
# Workaround for missing permission with rancher local-path-provisioner
- apiGroups: [""]
resources: ["persistentvolume", "persistentvolumeclaims"]
verbs: ["update"]
# Copied from the system:kube-scheduler ClusterRole of the k8s version
# matching the kube-scheduler binary we use. A modification of two resource
# name references from kube-scheduler to user-scheduler-lock was made.
#
# NOTE: These rules have been unchanged between 1.12 and 1.15, then changed in
# 1.16 and in 1.17, but unchanged in 1.18 and 1.19.
#
# ref: https://github.com/kubernetes/kubernetes/blob/v1.19.0/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/testdata/cluster-roles.yaml#L696-L829
- apiGroups:
- ""
- events.k8s.io
resources:
- events
verbs:
- create
- patch
- update
- apiGroups:
- coordination.k8s.io
resources:
- leases
verbs:
- create
- apiGroups:
- coordination.k8s.io
resourceNames:
- user-scheduler-lock
resources:
- leases
verbs:
- get
- update
- apiGroups:
- ""
resources:
- endpoints
verbs:
- create
- apiGroups:
- ""
resourceNames:
- user-scheduler-lock
resources:
- endpoints
verbs:
- get
- update
- apiGroups:
- ""
resources:
- nodes
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
- pods
verbs:
- delete
- get
- list
- watch
- apiGroups:
- ""
resources:
- bindings
- pods/binding
verbs:
- create
- apiGroups:
- ""
resources:
- pods/status
verbs:
- patch
- update
- apiGroups:
- ""
resources:
- replicationcontrollers
- services
verbs:
- get
- list
- watch
- apiGroups:
- apps
- extensions
resources:
- replicasets
verbs:
- get
- list
- watch
- apiGroups:
- apps
resources:
- statefulsets
verbs:
- get
- list
- watch
- apiGroups:
- policy
resources:
- poddisruptionbudgets
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
- persistentvolumeclaims
- persistentvolumes
verbs:
- get
- list
- watch
- apiGroups:
- authentication.k8s.io
resources:
- tokenreviews
verbs:
- create
- apiGroups:
- authorization.k8s.io
resources:
- subjectaccessreviews
verbs:
- create
- apiGroups:
- storage.k8s.io
resources:
- csinodes
verbs:
- get
- list
- watch

# Copied from the system:volume-scheduler ClusterRole of the k8s version
# matching the kube-scheduler binary we use.
#
# NOTE: These rules have not changed between 1.12 and 1.19.
#
# ref: https://github.com/kubernetes/kubernetes/blob/v1.19.0/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/testdata/cluster-roles.yaml#L1213-L1240
- apiGroups:
- ""
resources:
- persistentvolumes
verbs:
- get
- list
- patch
- update
- watch
- apiGroups:
- storage.k8s.io
resources:
- storageclasses
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
- persistentvolumeclaims
verbs:
- get
- list
- patch
- update
- watch
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: {{ .Release.Name }}-user-scheduler-complementary
name: {{ .Release.Name }}-user-scheduler
labels:
{{- $_ := merge (dict "componentSuffix" "-complementary") . }}
{{- include "jupyterhub.labels" $_ | nindent 4 }}
{{- include "jupyterhub.labels" . | nindent 4 }}
subjects:
- kind: ServiceAccount
name: user-scheduler
namespace: {{ .Release.Namespace }}
roleRef:
kind: ClusterRole
name: {{ .Release.Name }}-user-scheduler-complementary
name: {{ .Release.Name }}-user-scheduler
apiGroup: rbac.authorization.k8s.io
{{- end }}
{{- end }}
14 changes: 3 additions & 11 deletions jupyterhub/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -316,18 +316,10 @@ scheduling:
enabled: true
replicas: 2
logLevel: 4
## policy:
## Allows you to provide custom YAML/JSON to render into a JSON policy.cfg,
## a configuration file for the kube-scheduler binary.
## NOTE: The kube-scheduler binary in the kube-scheduler image we are
## currently using may be version bumped. It would for example happen if we
## increase the lowest supported k8s version for the helm chart. At this
## point, the provided policy.cfg may require a change along with that due
## to breaking changes in the kube-scheduler binary.
policy: {}
plugins: {}
image:
name: gcr.io/google_containers/kube-scheduler-amd64
tag: v1.16.11
name: k8s.gcr.io/kube-scheduler
tag: v1.19.1
nodeSelector: {}
pdb:
enabled: true
Expand Down

0 comments on commit 3d6e757

Please sign in to comment.