From 58a4f0651786df6b5504095e0099c4acf6533343 Mon Sep 17 00:00:00 2001 From: Yecheng Fu Date: Mon, 15 Jun 2020 15:22:13 +0800 Subject: [PATCH] use the latest version object in deletion request --- .../tidbcluster/tidb_cluster_control.go | 16 +- pkg/manager/member/orphan_pods_cleaner.go | 20 ++- .../member/orphan_pods_cleaner_test.go | 145 ++++++++++++------ pkg/manager/member/pvc_cleaner.go | 2 - 4 files changed, 126 insertions(+), 57 deletions(-) diff --git a/pkg/controller/tidbcluster/tidb_cluster_control.go b/pkg/controller/tidbcluster/tidb_cluster_control.go index 4d565b0054e..22d91652759 100644 --- a/pkg/controller/tidbcluster/tidb_cluster_control.go +++ b/pkg/controller/tidbcluster/tidb_cluster_control.go @@ -143,8 +143,14 @@ func (tcc *defaultTidbClusterControl) updateTidbCluster(tc *v1alpha1.TidbCluster } // cleaning all orphan pods(pd, tikv or tiflash which don't have a related PVC) managed by operator - if _, err := tcc.orphanPodsCleaner.Clean(tc); err != nil { + if skipReasons, err := tcc.orphanPodsCleaner.Clean(tc); err != nil { return err + } else { + if klog.V(10) { + for podName, reason := range skipReasons { + klog.Infof("pod %s of cluster %s/%s is skipped, reason %q", podName, tc.Namespace, tc.Name, reason) + } + } } // reconcile TiDB discovery service @@ -231,8 +237,14 @@ func (tcc *defaultTidbClusterControl) updateTidbCluster(tc *v1alpha1.TidbCluster } // cleaning the pod scheduling annotation for pd and tikv - if _, err := tcc.pvcCleaner.Clean(tc); err != nil { + if skipReasons, err := tcc.pvcCleaner.Clean(tc); err != nil { return err + } else { + if klog.V(10) { + for pvcName, reason := range skipReasons { + klog.Infof("pvc %s of cluster %s/%s is skipped, reason %q", pvcName, tc.Namespace, tc.Name, reason) + } + } } // syncing the some tidbcluster status attributes diff --git a/pkg/manager/member/orphan_pods_cleaner.go b/pkg/manager/member/orphan_pods_cleaner.go index aeed02142f1..e93d80a079b 100644 --- a/pkg/manager/member/orphan_pods_cleaner.go +++ b/pkg/manager/member/orphan_pods_cleaner.go @@ -30,7 +30,7 @@ const ( skipReasonOrphanPodsCleanerPVCIsFound = "orphan pods cleaner: pvc is found" skipReasonOrphanPodsCleanerPodHasBeenScheduled = "orphan pods cleaner: pod has been scheduled" skipReasonOrphanPodsCleanerPodIsNotFound = "orphan pods cleaner: pod does not exist anymore" - skipReasonOrphanPodsCleanerPodChanged = "orphan pods cleaner: pod changed before deletion" + skipReasonOrphanPodsCleanerPodRecreated = "orphan pods cleaner: pod is recreated before deletion" ) // OrphanPodsCleaner implements the logic for cleaning the orphan pods(has no pvc) @@ -67,7 +67,6 @@ func NewOrphanPodsCleaner(podLister corelisters.PodLister, func (opc *orphanPodsCleaner) Clean(tc *v1alpha1.TidbCluster) (map[string]string, error) { ns := tc.GetNamespace() - // for unit test skipReason := map[string]string{} selector, err := label.New().Instance(tc.GetInstanceName()).Selector() @@ -141,17 +140,22 @@ func (opc *orphanPodsCleaner) Clean(tc *v1alpha1.TidbCluster) (map[string]string skipReason[podName] = skipReasonOrphanPodsCleanerPodIsNotFound continue } + if apiPod.UID != pod.UID { + skipReason[podName] = skipReasonOrphanPodsCleanerPodRecreated + } if err != nil { return skipReason, err } // In pre-1.14, kube-apiserver does not support - // deleteOption.Preconditions.ResourceVersion, we try our best to avoid - // deleting wrong object in apiserver. - if apiPod.UID != pod.UID || apiPod.ResourceVersion != pod.ResourceVersion { - skipReason[podName] = skipReasonOrphanPodsCleanerPodChanged - continue + // deleteOption.Preconditions.ResourceVersion, we fetch the latest + // version and check again before deletion. + if len(apiPod.Spec.NodeName) > 0 { + skipReason[podName] = skipReasonOrphanPodsCleanerPodHasBeenScheduled } - err = opc.podControl.DeletePod(tc, pod) + // As th the status of pod may be updated by kube-scheduler + // continuously, we should use the latest object here to avoid API + // conflict. + err = opc.podControl.DeletePod(tc, apiPod) if err != nil { klog.Errorf("orphan pods cleaner: failed to clean orphan pod: %s/%s, %v", ns, podName, err) return skipReason, err diff --git a/pkg/manager/member/orphan_pods_cleaner_test.go b/pkg/manager/member/orphan_pods_cleaner_test.go index 4249c48c98a..fecb4fd1e26 100644 --- a/pkg/manager/member/orphan_pods_cleaner_test.go +++ b/pkg/manager/member/orphan_pods_cleaner_test.go @@ -33,43 +33,15 @@ func TestOrphanPodsCleanerClean(t *testing.T) { g := NewGomegaWithT(t) tc := newTidbClusterForPD() - type testcase struct { + + tests := []struct { name string pods []*corev1.Pod apiPods []*corev1.Pod pvcs []*corev1.PersistentVolumeClaim deletePodFailed bool expectFn func(*GomegaWithT, map[string]string, *orphanPodsCleaner, error) - } - testFn := func(test *testcase, t *testing.T) { - t.Log(test.name) - - opc, podIndexer, pvcIndexer, client, podControl := newFakeOrphanPodsCleaner() - if test.pods != nil { - for _, pod := range test.pods { - client.CoreV1().Pods(pod.Namespace).Create(pod) - podIndexer.Add(pod) - } - } - if test.apiPods != nil { - for _, pod := range test.apiPods { - client.CoreV1().Pods(pod.Namespace).Update(pod) - } - } - if test.pvcs != nil { - for _, pvc := range test.pvcs { - client.CoreV1().PersistentVolumeClaims(pvc.Namespace).Create(pvc) - pvcIndexer.Add(pvc) - } - } - if test.deletePodFailed { - podControl.SetDeletePodError(fmt.Errorf("delete pod failed"), 0) - } - - skipReason, err := opc.Clean(tc) - test.expectFn(g, skipReason, opc, err) - } - tests := []testcase{ + }{ { name: "no pods", pods: []*corev1.Pod{}, @@ -310,15 +282,14 @@ func TestOrphanPodsCleanerClean(t *testing.T) { }, }, { - name: "pvc is not found but pod changed in apiserver", + name: "pvc is not found but pod is recreated in apiserver", pods: []*corev1.Pod{ { ObjectMeta: metav1.ObjectMeta{ - Name: "pod-1", - UID: "pod-1-uid", - ResourceVersion: "1", - Namespace: metav1.NamespaceDefault, - Labels: label.New().Instance(tc.GetInstanceName()).PD().Labels(), + Name: "pod-1", + UID: "pod-1-uid", + Namespace: metav1.NamespaceDefault, + Labels: label.New().Instance(tc.GetInstanceName()).PD().Labels(), }, Spec: corev1.PodSpec{ Volumes: []corev1.Volume{ @@ -340,11 +311,10 @@ func TestOrphanPodsCleanerClean(t *testing.T) { apiPods: []*corev1.Pod{ { ObjectMeta: metav1.ObjectMeta{ - Name: "pod-1", - UID: "pod-1-uid", - ResourceVersion: "2", - Namespace: metav1.NamespaceDefault, - Labels: label.New().Instance(tc.GetInstanceName()).PD().Labels(), + Name: "pod-1", + UID: "pod-1-uid2", + Namespace: metav1.NamespaceDefault, + Labels: label.New().Instance(tc.GetInstanceName()).PD().Labels(), }, Spec: corev1.PodSpec{ Volumes: []corev1.Volume{ @@ -367,7 +337,67 @@ func TestOrphanPodsCleanerClean(t *testing.T) { expectFn: func(g *GomegaWithT, skipReason map[string]string, opc *orphanPodsCleaner, err error) { g.Expect(err).NotTo(HaveOccurred()) g.Expect(len(skipReason)).To(Equal(1)) - g.Expect(skipReason["pod-1"]).To(Equal(skipReasonOrphanPodsCleanerPodChanged)) + g.Expect(skipReason["pod-1"]).To(Equal(skipReasonOrphanPodsCleanerPodRecreated)) + }, + }, + { + name: "pvc is not found but pod is scheduled", + pods: []*corev1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "pod-1", + UID: "pod-1-uid", + Namespace: metav1.NamespaceDefault, + Labels: label.New().Instance(tc.GetInstanceName()).PD().Labels(), + }, + Spec: corev1.PodSpec{ + Volumes: []corev1.Volume{ + { + Name: "pd", + VolumeSource: corev1.VolumeSource{ + PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ + ClaimName: "pvc-1", + }, + }, + }, + }, + }, + Status: corev1.PodStatus{ + Phase: corev1.PodPending, + }, + }, + }, + apiPods: []*corev1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "pod-1", + UID: "pod-1-uid", + Namespace: metav1.NamespaceDefault, + Labels: label.New().Instance(tc.GetInstanceName()).PD().Labels(), + }, + Spec: corev1.PodSpec{ + Volumes: []corev1.Volume{ + { + Name: "pd", + VolumeSource: corev1.VolumeSource{ + PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ + ClaimName: "pvc-1", + }, + }, + }, + }, + NodeName: "foo", + }, + Status: corev1.PodStatus{ + Phase: corev1.PodRunning, + }, + }, + }, + pvcs: []*corev1.PersistentVolumeClaim{}, + expectFn: func(g *GomegaWithT, skipReason map[string]string, opc *orphanPodsCleaner, err error) { + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(len(skipReason)).To(Equal(1)) + g.Expect(skipReason["pod-1"]).To(Equal(skipReasonOrphanPodsCleanerPodHasBeenScheduled)) }, }, { @@ -529,8 +559,33 @@ func TestOrphanPodsCleanerClean(t *testing.T) { }, }, } - for i := range tests { - testFn(&tests[i], t) + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + opc, podIndexer, pvcIndexer, client, podControl := newFakeOrphanPodsCleaner() + if tt.pods != nil { + for _, pod := range tt.pods { + client.CoreV1().Pods(pod.Namespace).Create(pod) + podIndexer.Add(pod) + } + } + if tt.apiPods != nil { + for _, pod := range tt.apiPods { + client.CoreV1().Pods(pod.Namespace).Update(pod) + } + } + if tt.pvcs != nil { + for _, pvc := range tt.pvcs { + client.CoreV1().PersistentVolumeClaims(pvc.Namespace).Create(pvc) + pvcIndexer.Add(pvc) + } + } + if tt.deletePodFailed { + podControl.SetDeletePodError(fmt.Errorf("delete pod failed"), 0) + } + + skipReason, err := opc.Clean(tc) + tt.expectFn(g, skipReason, opc, err) + }) } } diff --git a/pkg/manager/member/pvc_cleaner.go b/pkg/manager/member/pvc_cleaner.go index 654dd94812f..584e06a3a60 100644 --- a/pkg/manager/member/pvc_cleaner.go +++ b/pkg/manager/member/pvc_cleaner.go @@ -91,7 +91,6 @@ func (rpc *realPVCCleaner) reclaimPV(tc *v1alpha1.TidbCluster) (map[string]strin ns := tc.GetNamespace() tcName := tc.GetName() - // for unit test skipReason := map[string]string{} pvcs, err := rpc.listAllPVCs(tc) @@ -198,7 +197,6 @@ func (rpc *realPVCCleaner) reclaimPV(tc *v1alpha1.TidbCluster) (map[string]strin func (rpc *realPVCCleaner) cleanScheduleLock(tc *v1alpha1.TidbCluster) (map[string]string, error) { ns := tc.GetNamespace() tcName := tc.GetName() - // for unit test skipReason := map[string]string{} pvcs, err := rpc.listAllPVCs(tc)