spidernet-io · cyclinder · Oct 17, 2024
@@ -187,6 +187,10 @@ spec:
           valueFrom:
             fieldRef:
               fieldPath: metadata.namespace
+        - name: SPIDERPOOL_DEPLOYMENT_NAME
+          value: {{ .Values.spiderpoolController.name | quote }}
+        - name: SPIDERPOOL_ENABLE_POD_NETWORK_RESOURCE_INJECT
+          value: {{ .Values.spiderpoolController.enablePodNetworkResourceInject | quote }}
         {{- with .Values.spiderpoolController.extraEnv }}
         {{- toYaml . | nindent 8 }}
         {{- end }}

@@ -669,6 +669,9 @@
   ## @param spiderpoolController.webhookPort the http port for spiderpoolController webhook
   webhookPort: 5722
 
+  ## @param spiderpoolController.enablePodNetworkResourceInject inject network resource to pod 
+  enablePodNetworkResourceInject: false
+
   prometheus:
     ## @param spiderpoolController.prometheus.enabled enable spiderpool Controller to collect metrics
     enabled: false

@@ -99,6 +99,7 @@ var envInfo = []envConf{
 	{"SPIDERPOOL_MULTUS_CONFIG_INFORMER_RESYNC_PERIOD", "60", false, nil, nil, &controllerContext.Cfg.MultusConfigInformerResyncPeriod},
 	{"SPIDERPOOL_CILIUM_CONFIGMAP_NAMESPACE_NAME", "kube-system/cilium-config", false, &controllerContext.Cfg.CiliumConfigName, nil, nil},
 
+	{"SPIDERPOOL_ENABLE_POD_NETWORK_RESOURCE_INJECT", "false", false, nil, &controllerContext.Cfg.InjectPodNetworkResource, nil},
 	{"SPIDERPOOL_IPPOOL_INFORMER_RESYNC_PERIOD", "300", false, nil, nil, &controllerContext.Cfg.IPPoolInformerResyncPeriod},
 	{"SPIDERPOOL_IPPOOL_INFORMER_WORKERS", "3", true, nil, nil, &controllerContext.Cfg.IPPoolInformerWorkers},
 	{"SPIDERPOOL_AUTO_IPPOOL_HANDLER_MAX_WORKQUEUE_LENGTH", "10000", true, nil, nil, &controllerContext.Cfg.IPPoolInformerMaxWorkQueueLength},
@@ -128,16 +129,20 @@ type Config struct {
 	GopsListenPort    string
 	PyroscopeAddress  string
 	DefaultCniConfDir string
-	// CiliumConfigName is formatted by namespace and name,default is kube-system/cilium-config
+	// CiliumConfigName is formatted by namespace and name
+	// default is kube-system/cilium-config
 	CiliumConfigName string
 
-	ControllerPodNamespace string
-	ControllerPodName      string
-	DefaultCoordinatorName string
-	LeaseDuration          int
-	LeaseRenewDeadline     int
-	LeaseRetryPeriod       int
-	LeaseRetryGap          int
+	InjectPodNetworkResource bool
+
+	ControllerDeploymentName string
+	ControllerPodNamespace   string
+	ControllerPodName        string
+	DefaultCoordinatorName   string
+	LeaseDuration            int
+	LeaseRenewDeadline       int
+	LeaseRetryPeriod         int
+	LeaseRetryGap            int
 
 	IPPoolMaxAllocatedIPs int
 

@@ -268,6 +268,20 @@ func initControllerServiceManagers(ctx context.Context) {
 	}
 	controllerContext.PodManager = podManager
 
+	if controllerContext.Cfg.InjectPodNetworkResource {
+		logger.Debug("Begin to init Pod MutatingWebhook")
+		if err := podmanager.InitPodWebhook(controllerContext.CRDManager.GetClient(),
+			controllerContext.CRDManager, controllerContext.Cfg.ControllerDeploymentName); err != nil {
+			logger.Fatal(err.Error())
+		}
+	} else {
+		logger.Debug("InjectPodNetworkResource is disabled, try to remove the pod part in the MutatingWebhook")
+		if err := podmanager.RemovePodMutatingWebhook(controllerContext.CRDManager.GetClient(),
+			controllerContext.Cfg.ControllerDeploymentName); err != nil {
+			logger.Fatal(err.Error())
+		}
+	}
+
 	logger.Info("Begin to initialize StatefulSet manager")
 	statefulSetManager, err := statefulsetmanager.NewStatefulSetManager(
 		controllerContext.CRDManager.GetClient(),

@@ -32,6 +32,7 @@ Run the spiderpool controller daemon.
 | SPIDERPOOL_CNI_CONFIG_DIR                                         | /etc/cni/net.d    | The host path of the cni config directory.                                                       |
 | SPIDERPOOL_CILIUM_CONFIGMAP_NAMESPACE_NAME                        | kube-system/cilium-config.    | The cilium's configMap, default is kube-system/cilium-config.                                    |
 | SPIDERPOOL_COORDINATOR_DEFAULT_NAME                               | default | the name of default spidercoordinator CR |
+| SPIDERPOOL_ENABLE_POD_NETWORK_RESOURCE_INJECT                       | false   | Enable/disable inject network resources for pod.                                                    | 
 
 ## spiderpool-controller shutdown
 

@@ -214,7 +214,9 @@
 
 3. 创建 CNI 配置和对应的 ippool 资源
 
-    对于 Ethernet 网络，请为所有的 GPU 亲和的 macvlan 网卡配置，并创建对应的 IP 地址池。如下例子，配置了 GPU1 亲和的网卡和 IP 地址池。
+    对于 Ethernet 网络，请为所有的 GPU 亲和的 macvlan 网卡配置，并创建对应的 IP 地址池。Spiderpool 为了简化 AI 应用配置多网卡的复杂度，支持通过 labels 对一组网卡配置分类，用户只需要为 Pod 注入特定的标签，这样 Spiderpool 会通过 webhook 自动为 Pod 注入对应的网卡和网络资源。
+
+    如下例子，配置了 GPU1 亲和的网卡和 IP 地址池:
 
     ```shell
     $ cat <<EOF | kubectl apply -f -
@@ -223,29 +225,35 @@
     metadata:
       name: gpu1-net11
     spec:
-          gateway: 172.16.11.254
-          subnet: 172.16.11.0/16
-          ips:
-            - 172.16.11.1-172.16.11.200
+      gateway: 172.16.11.254
+      subnet: 172.16.11.0/16
+      ips:
+        - 172.16.11.1-172.16.11.200
     ---
     apiVersion: spiderpool.spidernet.io/v2beta1
     kind: SpiderMultusConfig
     metadata:
       name: gpu1-macvlan
       namespace: spiderpool
+      labels:
+        macvlan-rdma-gpu: ""
     spec:
-          cniType: macvlan
-          macvlan:
-            master: ["enp11s0f0np0"]
-            ippools:
-              ipv4: ["gpu1-net11"]
+      cniType: macvlan
+      macvlan:
+        master: ["enp11s0f0np0"]
+        enableRdma: true
+        rdmaResourceName: "spidernet.io/shared_cx5_gpu1"
+        ippools:
+          ipv4: ["gpu1-net11"]
     EOF
     ```
 
+    如上配置，为 8 张网卡对应的 SpiderMultusConfig 资源，添加了 `auto-spiderpool-inject: ""` 标签。 LabelKey 为用户自定义的值，该值需要与向 Pod 添加的 Label: `network.spidernet.io/inject-network-resources` 的 value 保持一致。
+
 ## 创建测试应用
 
 1. 在指定节点上创建一组 DaemonSet 应用
-   如下例子，通过 annotations `v1.multus-cni.io/default-network` 指定使用 calico 的缺省网卡，用于进行控制面通信，annotations `k8s.v1.cni.cncf.io/networks` 接入 8 个 GPU 亲和网卡的网卡，用于 RDMA 通信，并配置 8 种 RDMA resources 资源
+   如下例子，通过 annotations `v1.multus-cni.io/default-network` 指定使用 calico 的缺省网卡，用于进行控制面通信，指定 labels `network.spidernet.io/inject-network-resources: auto-spiderpool-inject`，Spiderpool 将会自动向 Pod 注入 8 个 GPU 亲和网卡的网卡，用于 RDMA 通信，并配置 8 种 RDMA resources 资源。
 
     ```shell
     $ helm repo add spiderchart https://spidernet-io.github.io/charts
@@ -271,36 +279,32 @@
                   - worker2
 
     # macvlan interfaces
-    extraAnnotations:
-      k8s.v1.cni.cncf.io/networks: |-
-                       [{"name":"gpu1-macvlan","namespace":"spiderpool"},
-                        {"name":"gpu2-macvlan","namespace":"spiderpool"},
-                        {"name":"gpu3-macvlan","namespace":"spiderpool"},
-                        {"name":"gpu4-macvlan","namespace":"spiderpool"},
-                        {"name":"gpu5-macvlan","namespace":"spiderpool"},
-                        {"name":"gpu6-macvlan","namespace":"spiderpool"},
-                        {"name":"gpu7-macvlan","namespace":"spiderpool"},
-                        {"name":"gpu8-macvlan","namespace":"spiderpool"}]
-
-    # macvlan resource
-    resources:
-      limits:
-            spidernet.io/shared_cx5_gpu1: 1
-            spidernet.io/shared_cx5_gpu2: 1
-            spidernet.io/shared_cx5_gpu3: 1
-            spidernet.io/shared_cx5_gpu4: 1
-            spidernet.io/shared_cx5_gpu5: 1
-            spidernet.io/shared_cx5_gpu6: 1
-            spidernet.io/shared_cx5_gpu7: 1
-            spidernet.io/shared_cx5_gpu8: 1
-            #nvidia.com/gpu: 1
+    extraLabels:
+      network.spidernet.io/inject-network-resources: auto-spiderpool-inject
     EOF
 
     $ helm install rdma-tools spiderchart/rdma-tools -f ./values.yaml
     ```
 
     在容器的网络命名空间创建过程中，Spiderpool 会对 macvlan 接口上的网关进行连通性测试，如果如上应用的所有 POD 都启动成功，说明了每个节点上的 VF 设备的连通性成功，可进行正常的 RDMA 通信。
 
+    当 Pod 成功 Running， 可检查 Pod 的 RDMA Resources 是否成功注入:
+
+    ```shell
+    # macvlan resource
+    resources:
+      requests:
+        spidernet.io/shared_cx5_gpu1: 1
+        spidernet.io/shared_cx5_gpu2: 1
+        spidernet.io/shared_cx5_gpu3: 1
+        spidernet.io/shared_cx5_gpu4: 1
+        spidernet.io/shared_cx5_gpu5: 1
+        spidernet.io/shared_cx5_gpu6: 1
+        spidernet.io/shared_cx5_gpu7: 1
+        spidernet.io/shared_cx5_gpu8: 1
+        #nvidia.com/gpu: 1
+    ```
+
 2. 查看容器的网络命名空间状态
 
     可进入任一一个 POD 的网络命名空间中，确认具备 9 个网卡：

@@ -215,7 +215,9 @@ The network planning for the cluster is as follows:
 
 3. Create CNI configuration and proper IP pool resources
 
-    For Ethernet networks, please configure the Macvlan network interfaces associated with all GPUs and create corresponding IP address pools. The example below shows the configuration for the network interface and IP address pool associated with GPU1.
+    For Ethernet networks, please configure macvlan interfaces for all GPU-affined network cards and create corresponding IP address pools. To simplify the complexity of configuring multiple network interfaces for AI applications, Spiderpool supports classifying a group of network interface configurations through labels. Users only need to inject specific labels for Pods, and Spiderpool will automatically inject corresponding network interfaces and network resources for Pods through webhooks.
+
+    The example below shows the configuration for the network interface and IP address pool associated with GPU1.
 
     ```shell
     $ cat <<EOF | kubectl apply -f -
@@ -224,29 +226,33 @@ The network planning for the cluster is as follows:
     metadata:
       name: gpu1-net11
     spec:
-          gateway: 172.16.11.254
-          subnet: 172.16.11.0/16
-          ips:
-            - 172.16.11.1-172.16.11.200
+      gateway: 172.16.11.254
+      subnet: 172.16.11.0/16
+      ips:
+        - 172.16.11.1-172.16.11.200
     ---
     apiVersion: spiderpool.spidernet.io/v2beta1
     kind: SpiderMultusConfig
     metadata:
       name: gpu1-macvlan
       namespace: spiderpool
+      labels:
+        macvlan-rdma-gpu: ""
     spec:
-          cniType: macvlan
-          macvlan:
-            master: ["enp11s0f0np0"]
-            ippools:
-              ipv4: ["gpu1-net11"]
+      cniType: macvlan
+      macvlan:
+        master: ["enp11s0f0np0"]
+        ippools:
+          ipv4: ["gpu1-net11"]
     EOF
     ```
 
+    The above configuration adds the `auto-spiderpool-inject: ""` label to the SpiderMultusConfig resources corresponding to the 8 network cards. The LabelKey is a user-defined value that needs to match the value of the `network.spidernet.io/inject-network-resources` label added to the Pod.
+
 ## Create a Test Application
 
 1. Create a DaemonSet application on specified nodes.
-   In the following example, the annotation field `v1.multus-cni.io/default-network` specifies the use of the default Calico network card for control plane communication. The annotation field `k8s.v1.cni.cncf.io/networks` connects to the 8 network cards affinitized to the GPU for RDMA communication, and configures 8 types of RDMA resources.
+   In the following example, the annotation field `v1.multus-cni.io/default-network` specifies the use of the default Calico network card for control plane communication. and specifies the label `network.spidernet.io/inject-network-resources: auto-spiderpool-inject`. Spiderpool will automatically inject 8 GPU-affinity network interfaces into the Pod for RDMA communication, and configure 8 types of RDMA resources.
 
     ```shell
     $ helm repo add spiderchart https://spidernet-io.github.io/charts
@@ -271,30 +277,9 @@ The network planning for the cluster is as follows:
                   - worker1
                   - worker2
 
-    # interfaces
-    extraAnnotations:
-      k8s.v1.cni.cncf.io/networks: |-
-                       [{"name":"gpu1-macvlan","namespace":"spiderpool"},
-                        {"name":"gpu2-macvlan","namespace":"spiderpool"},
-                        {"name":"gpu3-macvlan","namespace":"spiderpool"},
-                        {"name":"gpu4-macvlan","namespace":"spiderpool"},
-                        {"name":"gpu5-macvlan","namespace":"spiderpool"},
-                        {"name":"gpu6-macvlan","namespace":"spiderpool"},
-                        {"name":"gpu7-macvlan","namespace":"spiderpool"},
-                        {"name":"gpu8-macvlan","namespace":"spiderpool"}]
-
-    # resource
-    resources:
-      limits:
-            spidernet.io/shared_cx5_gpu1: 1
-            spidernet.io/shared_cx5_gpu2: 1
-            spidernet.io/shared_cx5_gpu3: 1
-            spidernet.io/shared_cx5_gpu4: 1
-            spidernet.io/shared_cx5_gpu5: 1
-            spidernet.io/shared_cx5_gpu6: 1
-            spidernet.io/shared_cx5_gpu7: 1
-            spidernet.io/shared_cx5_gpu8: 1
-            #nvidia.com/gpu: 1
+    # macvlan interfaces
+    extraLabels:
+      network.spidernet.io/inject-network-resources: auto-spiderpool-inject
     EOF
 
     $ helm install rdma-tools spiderchart/rdma-tools -f ./values.yaml
@@ -303,6 +288,23 @@ The network planning for the cluster is as follows:
     During the creation of the network namespace for the container, Spiderpool will perform connectivity tests on the gateway of the macvlan interface.
     If all PODs of the above application start successfully, it indicates successful connectivity of the network cards on each node, allowing normal RDMA communication.
 
+    When the Pod is successfully Running, you can check if the RDMA Resources have been successfully injected into the Pod:
+
+    ```shell
+    # macvlan resource
+    resources:
+      requests:
+        spidernet.io/shared_cx5_gpu1: 1
+        spidernet.io/shared_cx5_gpu2: 1
+        spidernet.io/shared_cx5_gpu3: 1
+        spidernet.io/shared_cx5_gpu4: 1
+        spidernet.io/shared_cx5_gpu5: 1
+        spidernet.io/shared_cx5_gpu6: 1
+        spidernet.io/shared_cx5_gpu7: 1
+        spidernet.io/shared_cx5_gpu8: 1
+        #nvidia.com/gpu: 1
+    ```
+
 2. Check the network namespace status of the container.
 
     You can enter the network namespace of any POD to confirm that it has 9 network cards.