Skip to content

Commit

Permalink
webhook: auto inject the rdma resource to pod
Browse files Browse the repository at this point in the history
Signed-off-by: cyclinder <qifeng.guo@daocloud.io>
  • Loading branch information
cyclinder committed Jun 28, 2024
1 parent 5ff6c82 commit 7f26f0b
Show file tree
Hide file tree
Showing 9 changed files with 376 additions and 13 deletions.
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,7 @@ unittest-tests: check_test_label
--cover --coverprofile=./coverage.out --covermode set \
--json-report unittestreport.json \
-randomize-suites -randomize-all --keep-going --timeout=1h -p \
--output-interceptor-mode=none \
-vv -r $(ROOT_DIR)/pkg $(ROOT_DIR)/cmd
$(QUIET) go tool cover -html=./coverage.out -o coverage-all.html

Expand Down
34 changes: 34 additions & 0 deletions charts/spiderpool/templates/tls.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,40 @@ webhooks:
- spidercoordinators
sideEffects: None
{{- end }}
{{- if .Values.dra.enabled }}
- admissionReviewVersions:
- v1
clientConfig:
service:
name: {{ .Values.spiderpoolController.name | trunc 63 | trimSuffix "-" }}
namespace: {{ .Release.Namespace }}
path: /mutate-pods
port: {{ .Values.spiderpoolController.webhookPort }}
{{- if (eq .Values.spiderpoolController.tls.method "provided") }}
caBundle: {{ .Values.spiderpoolController.tls.provided.tlsCa | required "missing spiderpoolController.tls.provided.tlsCa" }}
{{- else if (eq .Values.spiderpoolController.tls.method "auto") }}
caBundle: {{ .ca.Cert | b64enc }}
{{- end }}
failurePolicy: Ignore
name: pod.spidernet.io
objectSelector:
matchExpressions:
- key: app.kubernetes.io/name
operator: NotIn
values:
- {{ include "spiderpool.name" . }}
rules:
- apiGroups:
- ""
apiVersions:
- v1
operations:
- CREATE
- UPDATE
resources:
- pods
sideEffects: None
{{- end }}
---
apiVersion: admissionregistration.k8s.io/v1
kind: ValidatingWebhookConfiguration
Expand Down
3 changes: 2 additions & 1 deletion cmd/spiderpool-agent/cmd/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -352,9 +352,10 @@ func initAgentServiceManagers(ctx context.Context) {
agentContext.NSManager = nsManager

logger.Debug("Begin to initialize Pod manager")
podManager, err := podmanager.NewPodManager(
podManager, err := podmanager.NewPodManager(false,
agentContext.CRDManager.GetClient(),
agentContext.CRDManager.GetAPIReader(),
nil,
)
if err != nil {
logger.Fatal(err.Error())
Expand Down
3 changes: 3 additions & 0 deletions cmd/spiderpool-controller/cmd/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -258,12 +258,15 @@ func initControllerServiceManagers(ctx context.Context) {

logger.Debug("Begin to initialize Pod manager")
podManager, err := podmanager.NewPodManager(
controllerContext.Cfg.DraEnabled,
controllerContext.CRDManager.GetClient(),
controllerContext.CRDManager.GetAPIReader(),
controllerContext.CRDManager,
)
if err != nil {
logger.Fatal(err.Error())
}

controllerContext.PodManager = podManager

logger.Info("Begin to initialize StatefulSet manager")
Expand Down
205 changes: 198 additions & 7 deletions pkg/podmanager/pod_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,25 @@ package podmanager
import (
"context"
"fmt"
"os"

init_cmd "github.com/spidernet-io/spiderpool/cmd/spiderpool-init/cmd"
spiderpoolv2beta1 "github.com/spidernet-io/spiderpool/pkg/k8s/apis/spiderpool.spidernet.io/v2beta1"
crdclientset "github.com/spidernet-io/spiderpool/pkg/k8s/client/clientset/versioned"
"go.uber.org/zap"
appsv1 "k8s.io/api/apps/v1"
batchv1 "k8s.io/api/batch/v1"
corev1 "k8s.io/api/core/v1"
resourcev1alpha2 "k8s.io/api/resource/v1alpha2"
k8s_resource "k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
apitypes "k8s.io/apimachinery/pkg/types"
"k8s.io/utils/strings/slices"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/webhook"
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"

"github.com/spidernet-io/spiderpool/pkg/constant"
"github.com/spidernet-io/spiderpool/pkg/logutils"
Expand All @@ -24,25 +35,47 @@ type PodManager interface {
GetPodByName(ctx context.Context, namespace, podName string, cached bool) (*corev1.Pod, error)
ListPods(ctx context.Context, cached bool, opts ...client.ListOption) (*corev1.PodList, error)
GetPodTopController(ctx context.Context, pod *corev1.Pod) (types.PodTopController, error)
admission.CustomDefaulter
admission.CustomValidator
}

type podManager struct {
client client.Client
apiReader client.Reader
enableDra bool
client client.Client
apiReader client.Reader
SpiderClient crdclientset.Interface
}

func NewPodManager(client client.Client, apiReader client.Reader) (PodManager, error) {
var _ webhook.CustomValidator = &podManager{}

func NewPodManager(enableDra bool, client client.Client, apiReader client.Reader, mgr ctrl.Manager) (PodManager, error) {
if client == nil {
return nil, fmt.Errorf("k8s client %w", constant.ErrMissingRequiredParam)
}
if apiReader == nil {
return nil, fmt.Errorf("api reader %w", constant.ErrMissingRequiredParam)
}

return &podManager{
client: client,
apiReader: apiReader,
}, nil
spiderClient, err := crdclientset.NewForConfig(ctrl.GetConfigOrDie())
if err != nil {
return nil, err
}

pm := &podManager{
enableDra: enableDra,
client: client,
apiReader: apiReader,
SpiderClient: spiderClient,
}

if enableDra && mgr != nil {
return pm, ctrl.NewWebhookManagedBy(mgr).
For(&corev1.Pod{}).
WithDefaulter(pm).
Complete()
}

return pm, nil
}

func (pm *podManager) GetPodByName(ctx context.Context, namespace, podName string, cached bool) (*corev1.Pod, error) {
Expand Down Expand Up @@ -238,3 +271,161 @@ func (pm *podManager) GetPodTopController(ctx context.Context, pod *corev1.Pod)
UID: podOwner.UID,
}, nil
}

// Default implements admission.CustomDefaulter.
func (pw *podManager) Default(ctx context.Context, obj runtime.Object) error {
// Avoids affecting the time of pod creation when dra is not enabled
if !pw.enableDra {
return nil
}

logger := logutils.FromContext(ctx)
pod := obj.(*corev1.Pod)
mutateLogger := logger.Named("Mutating").With(
zap.String("Pod", pod.Name))
mutateLogger.Sugar().Debugf("Request Pod: %+v", *pod)

return pw.injectPodResources(logutils.IntoContext(ctx, mutateLogger), mutateLogger, pod)
}

func (pw *podManager) ValidateCreate(ctx context.Context, obj runtime.Object) (admission.Warnings, error) {
return nil, nil
}

func (pw *podManager) ValidateUpdate(ctx context.Context, oldObj, newObj runtime.Object) (admission.Warnings, error) {
return nil, nil
}

// ValidateDelete will implement something just like kubernetes Foreground cascade deletion to delete the MultusConfig corresponding net-attach-def firstly
// Since the MultusConf doesn't have Finalizer, you could delete it as soon as possible and we can't filter it to delete the net-attach-def at first.
func (pw *podManager) ValidateDelete(ctx context.Context, obj runtime.Object) (admission.Warnings, error) {
return nil, nil
}
func (pw *podManager) injectPodResources(ctx context.Context, l *zap.Logger, pod *corev1.Pod) error {
if pod.Spec.ResourceClaims == nil {
return nil
}

staticNics, err := pw.getStaticNicsFromSpiderClaimParameter(ctx, pod)
if err != nil {
l.Error(err.Error())
return err
}

if len(staticNics) == 0 {
l.Debug("spiderClaimParameter no staticNics configure, exit")
return nil
}

resourceMap, err := pw.getResourceMapFromStaticNics(ctx, staticNics)
if err != nil {
l.Error("error get resourceMap for the staticNics", zap.Error(err))
return err
}

if len(resourceMap) == 0 {
l.Debug("staticNics no rdma resource claimed, exit")
return nil
}

l.Info("find pod has dra claim with staticNics and rdma resources claim, try to inject rdma resource to pod resources")
InjectRdmaResourceToPod(resourceMap, pod)
l.Debug("Finish inject resource to pod", zap.Any("Pod", pod))
return nil
}

func (pw *podManager) getStaticNicsFromSpiderClaimParameter(ctx context.Context, pod *corev1.Pod) ([]spiderpoolv2beta1.StaticNic, error) {
for _, rc := range pod.Spec.ResourceClaims {
if rc.Source.ResourceClaimTemplateName != nil {
var rct resourcev1alpha2.ResourceClaimTemplate
if err := pw.apiReader.Get(ctx, apitypes.NamespacedName{Namespace: pod.Namespace, Name: *rc.Source.ResourceClaimTemplateName}, &rct); err != nil {
return nil, err
}

if rct.Spec.Spec.ResourceClassName == constant.DRADriverName && rct.Spec.Spec.ParametersRef.APIGroup == constant.SpiderpoolAPIGroup &&
rct.Spec.Spec.ParametersRef.Kind == constant.KindSpiderClaimParameter {

spc, err := pw.SpiderClient.SpiderpoolV2beta1().SpiderClaimParameters(pod.Namespace).Get(ctx, rct.Spec.Spec.ParametersRef.Name, metav1.GetOptions{})
if err != nil {
return nil, fmt.Errorf("failed to get spiderClaimParameter for pod %s/%s: %v", pod.Namespace, pod.Name, err)
}
return spc.Spec.StaticNics, nil
}
}
}
return []spiderpoolv2beta1.StaticNic{}, nil
}

func (pw *podManager) getResourceMapFromStaticNics(ctx context.Context, staticNics []spiderpoolv2beta1.StaticNic) (map[string]bool, error) {
resourceMap := make(map[string]bool)
for _, nic := range staticNics {
if nic.Namespace == "" {
nic.Namespace = os.Getenv(init_cmd.ENVNamespace)
}

smc, err := pw.SpiderClient.SpiderpoolV2beta1().SpiderMultusConfigs(nic.Namespace).Get(ctx, nic.MultusConfigName, metav1.GetOptions{})
if err != nil {
return nil, fmt.Errorf("failed to get spiderMultusConfigs %s/%s: %v", nic.Namespace, nic.MultusConfigName, err)
}

resourceName := pw.resourceName(smc)
if resourceName == "" {
continue
}

if _, ok := resourceMap[resourceName]; !ok {
resourceMap[resourceName] = false
}
}
return resourceMap, nil
}

// resourceName return the resourceName for given spiderMultusConfig
func (pw *podManager) resourceName(smc *spiderpoolv2beta1.SpiderMultusConfig) string {
switch *smc.Spec.CniType {
case constant.MacvlanCNI:
if smc.Spec.MacvlanConfig != nil && smc.Spec.MacvlanConfig.EnableRdma {
return smc.Spec.MacvlanConfig.RdmaResourceName
}
case constant.IPVlanCNI:
if smc.Spec.IPVlanConfig != nil && smc.Spec.IPVlanConfig.EnableRdma {
return smc.Spec.IPVlanConfig.RdmaResourceName
}
case constant.SriovCNI:
if smc.Spec.SriovConfig != nil {
return smc.Spec.SriovConfig.ResourceName
}
case constant.IBSriovCNI:
if smc.Spec.IbSriovConfig != nil {
return smc.Spec.IbSriovConfig.ResourceName
}
}
return ""
}

func InjectRdmaResourceToPod(resourceMap map[string]bool, pod *corev1.Pod) {
for _, c := range pod.Spec.Containers {
for resource := range resourceMap {
if resourceMap[resource] {
// the resource has found in pod, skip
continue
}

// try to find the resource in container resources.requests
if _, ok := c.Resources.Requests[corev1.ResourceName(resource)]; ok {
resourceMap[resource] = true
} else {
if _, ok := c.Resources.Limits[corev1.ResourceName(resource)]; ok {
resourceMap[resource] = true
}
}
}
}

for resource, found := range resourceMap {
if !found {
// inject the resource to the pod.containers[0].resources.requests
pod.Spec.Containers[0].Resources.Requests[corev1.ResourceName(resource)] = k8s_resource.MustParse("1")
}
}
}
3 changes: 2 additions & 1 deletion pkg/podmanager/pod_manager_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,10 @@ var _ = BeforeSuite(func() {
}).
Build()

podManager, err = podmanager.NewPodManager(
podManager, err = podmanager.NewPodManager(true,
fakeClient,
fakeAPIReader,
nil,
)
Expect(err).NotTo(HaveOccurred())
})
Loading

0 comments on commit 7f26f0b

Please sign in to comment.