Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add a serial test for stable scheduling #1972

Merged
merged 1 commit into from
Mar 18, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 9 additions & 55 deletions tests/actions.go
Original file line number Diff line number Diff line change
Expand Up @@ -230,10 +230,6 @@ type OperatorActions interface {
LabelNodesOrDie()
CheckDisasterTolerance(info *TidbClusterConfig) error
CheckDisasterToleranceOrDie(info *TidbClusterConfig)
GetTidbMemberAssignedNodes(info *TidbClusterConfig) (map[string]string, error)
GetTidbMemberAssignedNodesOrDie(info *TidbClusterConfig) map[string]string
CheckTidbMemberAssignedNodes(info *TidbClusterConfig, oldAssignedNodes map[string]string) error
CheckTidbMemberAssignedNodesOrDie(info *TidbClusterConfig, oldAssignedNodes map[string]string)
CheckUpgradeComplete(info *TidbClusterConfig) error
CheckUpgradeCompleteOrDie(info *TidbClusterConfig)
CheckInitSQL(info *TidbClusterConfig) error
Expand Down Expand Up @@ -410,10 +406,7 @@ func (oi *OperatorConfig) OperatorHelmSetString(m map[string]string) string {
set := map[string]string{
"operatorImage": oi.Image,
"controllerManager.autoFailover": "true",
"scheduler.kubeSchedulerImageName": oi.SchedulerImage,
"controllerManager.logLevel": oi.LogLevel,
"scheduler.logLevel": "4",
"imagePullPolicy": string(oi.ImagePullPolicy),
"testMode": strconv.FormatBool(oi.TestMode),
"admissionWebhook.cabundle": oi.Cabundle,
"admissionWebhook.create": strconv.FormatBool(oi.WebhookEnabled),
Expand All @@ -422,6 +415,15 @@ func (oi *OperatorConfig) OperatorHelmSetString(m map[string]string) string {
"admissionWebhook.mutation.pingcapResources": strconv.FormatBool(oi.DefaultingEnabled),
"admissionWebhook.validation.pingcapResources": strconv.FormatBool(oi.ValidatingEnabled),
}
if oi.LogLevel != "" {
set["controllerManager.logLevel"] = oi.LogLevel
}
if oi.SchedulerImage != "" {
set["scheduler.kubeSchedulerImageName"] = oi.SchedulerImage
}
if string(oi.ImagePullPolicy) != "" {
set["imagePullPolicy"] = string(oi.ImagePullPolicy)
}
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

make these parameters optional

if oi.ControllerManagerReplicas != nil {
set["controllerManager.replicas"] = strconv.Itoa(*oi.ControllerManagerReplicas)
}
Expand Down Expand Up @@ -893,54 +895,6 @@ func (oa *operatorActions) CleanTidbClusterOrDie(info *TidbClusterConfig) {
}
}

func (oa *operatorActions) GetTidbMemberAssignedNodes(info *TidbClusterConfig) (map[string]string, error) {
assignedNodes := make(map[string]string)
ns := info.Namespace
tcName := info.ClusterName
listOptions := metav1.ListOptions{
LabelSelector: labels.SelectorFromSet(
label.New().Instance(tcName).Component(label.TiDBLabelVal).Labels()).String(),
}
podList, err := oa.kubeCli.CoreV1().Pods(ns).List(listOptions)
if err != nil {
klog.Errorf("failed to get tidb pods: %s/%s, %v", ns, tcName, err)
return nil, err
}
for _, pod := range podList.Items {
assignedNodes[pod.Name] = pod.Spec.NodeName
}
return assignedNodes, nil
}

func (oa *operatorActions) GetTidbMemberAssignedNodesOrDie(info *TidbClusterConfig) map[string]string {
result, err := oa.GetTidbMemberAssignedNodes(info)
if err != nil {
slack.NotifyAndPanic(err)
}
return result
}

func (oa *operatorActions) CheckTidbMemberAssignedNodes(info *TidbClusterConfig, oldAssignedNodes map[string]string) error {
klog.Infof("checking tidb member [%s/%s] assigned nodes", info.Namespace, info.ClusterName)
assignedNodes, err := oa.GetTidbMemberAssignedNodes(info)
if err != nil {
return err
}
for member, node := range oldAssignedNodes {
newNode, ok := assignedNodes[member]
if !ok || newNode != node {
return fmt.Errorf("tidb member %s is not scheduled to %s, new node: %s", member, node, newNode)
}
}
return nil
}

func (oa *operatorActions) CheckTidbMemberAssignedNodesOrDie(info *TidbClusterConfig, oldAssignedNodes map[string]string) {
if err := oa.CheckTidbMemberAssignedNodes(info, oldAssignedNodes); err != nil {
slack.NotifyAndPanic(err)
}
}

func (oa *operatorActions) CheckTidbClusterStatus(info *TidbClusterConfig) error {
klog.Infof("checking tidb cluster [%s/%s] status", info.Namespace, info.ClusterName)

Expand Down
2 changes: 0 additions & 2 deletions tests/cmd/stability/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,12 +174,10 @@ func run() {
oa.RegisterWebHookAndServiceOrDie(ocfg.WebhookConfigName, namespace, ocfg.WebhookServiceName, certCtx)
ctx, cancel := context.WithCancel(context.Background())
for _, cluster := range clusters {
assignedNodes := oa.GetTidbMemberAssignedNodesOrDie(cluster)
cluster.UpgradeAll(upgradeVersion)
oa.UpgradeTidbClusterOrDie(cluster)
oa.CheckUpgradeOrDie(ctx, cluster)
oa.CheckTidbClusterStatusOrDie(cluster)
oa.CheckTidbMemberAssignedNodesOrDie(cluster, assignedNodes)
}

// configuration change
Expand Down
104 changes: 104 additions & 0 deletions tests/e2e/tidbcluster/serial.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ import (
"github.com/pingcap/tidb-operator/tests/pkg/fixture"
v1 "k8s.io/api/core/v1"
apiextensionsclientset "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/sets"
Expand Down Expand Up @@ -106,6 +107,109 @@ var _ = ginkgo.Describe("[tidb-operator][Serial]", func() {
}
})

ginkgo.Context("tidb-operator with default values", func() {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

a context to test against tidb-operator with default values

var ocfg *tests.OperatorConfig
var oa tests.OperatorActions
var genericCli client.Client

ginkgo.BeforeEach(func() {
ocfg = &tests.OperatorConfig{
Namespace: "pingcap",
ReleaseName: "operator",
Image: cfg.OperatorImage,
Tag: cfg.OperatorTag,
}
oa = tests.NewOperatorActions(cli, c, asCli, aggrCli, apiExtCli, tests.DefaultPollInterval, ocfg, e2econfig.TestConfig, nil, fw, f)
ginkgo.By("Installing CRDs")
oa.CleanCRDOrDie()
oa.InstallCRDOrDie(ocfg)
ginkgo.By("Installing tidb-operator")
oa.CleanOperatorOrDie(ocfg)
oa.DeployOperatorOrDie(ocfg)
var err error
genericCli, err = client.New(config, client.Options{Scheme: scheme.Scheme})
framework.ExpectNoError(err, "failed to create clientset")
})

ginkgo.AfterEach(func() {
ginkgo.By("Uninstall tidb-operator")
oa.CleanOperatorOrDie(ocfg)
ginkgo.By("Uninstalling CRDs")
oa.CleanCRDOrDie()
})

// There is no guarantee but tidb pods should be assigned back to
// previous nodes if no other pods to occupy the positions.
// See docs/design-proposals/tidb-stable-scheduling.md
ginkgo.It("[Feature: StableScheduling] TiDB pods should be scheduled to preivous nodes", func() {
clusterName := "tidb-scheduling"
tc := fixture.GetTidbCluster(ns, clusterName, utilimage.TiDBV3Version)
tc.Spec.PD.Replicas = 1
tc.Spec.TiKV.Replicas = 1
tc.Spec.TiDB.Replicas = 3
err := genericCli.Create(context.TODO(), tc)
framework.ExpectNoError(err)
err = oa.WaitForTidbClusterReady(tc, 30*time.Minute, 15*time.Second)
framework.ExpectNoError(err)

listOptions := metav1.ListOptions{
LabelSelector: labels.SelectorFromSet(
label.New().Instance(clusterName).Component(label.TiDBLabelVal).Labels()).String(),
}
oldPodList, err := c.CoreV1().Pods(ns).List(listOptions)
framework.ExpectNoError(err)

ginkgo.By("Update tidb configuration")
updateStrategy := v1alpha1.ConfigUpdateStrategyRollingUpdate
err = controller.GuaranteedUpdate(genericCli, tc, func() error {
tc.Spec.TiDB.Config.TokenLimit = func(i uint) *uint {
return &i
}(2000)
tc.Spec.TiDB.ConfigUpdateStrategy = &updateStrategy
return nil
})
framework.ExpectNoError(err)

ginkgo.By("Waiting for all tidb pods are recreated and assigned to the same node")
getOldPodByName := func(pod *v1.Pod) *v1.Pod {
for _, oldPod := range oldPodList.Items {
if oldPod.Name == pod.Name {
return &oldPod
}
}
return nil
}
err = wait.PollImmediate(time.Second*5, time.Minute*15, func() (bool, error) {
newPodList, err := c.CoreV1().Pods(ns).List(listOptions)
if err != nil && !apierrors.IsNotFound(err) {
return false, err
}
if apierrors.IsNotFound(err) {
return false, nil
}
if len(newPodList.Items) != len(oldPodList.Items) {
return false, nil
}
for _, newPod := range newPodList.Items {
oldPod := getOldPodByName(&newPod)
if oldPod == nil {
return false, fmt.Errorf("found an unexpected pod: %q", newPod.Name)
}
if oldPod.UID == newPod.UID {
// not recreated yet
return false, nil
}
if oldPod.Spec.NodeName != newPod.Spec.NodeName {
// recreated but assigned to another node
return false, fmt.Errorf("pod %q recreated but not assigned to previous node %q, got %q", oldPod.Name, oldPod.Spec.NodeName, newPod.Spec.NodeName)
}
}
return true, nil
})
framework.ExpectNoError(err)
})
})

// tidb-operator with AdvancedStatefulSet feature enabled
ginkgo.Context("[Feature: AdvancedStatefulSet][Feature: Webhook]", func() {
var ocfg *tests.OperatorConfig
Expand Down
2 changes: 0 additions & 2 deletions tests/e2e/tidbcluster/tidbcluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -244,12 +244,10 @@ var _ = ginkgo.Describe("[tidb-operator] TiDBCluster", func() {
upgradeVersions := cfg.GetUpgradeTidbVersionsOrDie()
ginkgo.By(fmt.Sprintf("Upgrading tidb cluster from %s to %s", cluster.ClusterVersion, upgradeVersions[0]))
ctx, cancel := context.WithCancel(context.Background())
assignedNodes := oa.GetTidbMemberAssignedNodesOrDie(&cluster)
cluster.UpgradeAll(upgradeVersions[0])
oa.UpgradeTidbClusterOrDie(&cluster)
oa.CheckUpgradeOrDie(ctx, &cluster)
oa.CheckTidbClusterStatusOrDie(&cluster)
oa.CheckTidbMemberAssignedNodesOrDie(&cluster, assignedNodes)
cancel()

ginkgo.By("Check webhook is still running")
Expand Down