From ccf518326801b4886e80e7856e7b7cc0022c88e9 Mon Sep 17 00:00:00 2001 From: Himanshu Sharma <79965161+himanshu-kun@users.noreply.github.com> Date: Mon, 5 Jun 2023 21:20:29 +0530 Subject: [PATCH] Refactor metrics code (#823) * refactored var block and collector registration part * refactored Collect() method for MC * refactored Collect() method for MCM * correction to helper function in MC metric update part * made metric enumeration more intutive * adds metric port for MC container * removed cloud_api subsystem from MCM , kept in MC , removed promatu dependency, renamed functions, * resolved lint errors --- .../deployment/out-of-tree/deployment.yaml | 4 + pkg/controller/metrics.go | 372 ++++++++++-------- pkg/metrics/metrics.go | 86 ++-- .../provider/machinecontroller/metrics.go | 338 ++++------------ pkg/util/provider/metrics/metrics.go | 338 ++-------------- 5 files changed, 343 insertions(+), 795 deletions(-) diff --git a/kubernetes/deployment/out-of-tree/deployment.yaml b/kubernetes/deployment/out-of-tree/deployment.yaml index c7f5ebebf..49a28e548 100644 --- a/kubernetes/deployment/out-of-tree/deployment.yaml +++ b/kubernetes/deployment/out-of-tree/deployment.yaml @@ -37,6 +37,10 @@ spec: periodSeconds: 10 successThreshold: 1 timeoutSeconds: 5 + ports: + - containerPort: 10258 + name: metrics + protocol: TCP - command: - ./machine-controller - --control-kubeconfig=$(TARGET_KUBECONFIG) # Mandatory Parameter - Filepath to the target cluster's kubeconfig where node objects are expected to join. diff --git a/pkg/controller/metrics.go b/pkg/controller/metrics.go index 1190e6a83..fd3fd0bb2 100644 --- a/pkg/controller/metrics.go +++ b/pkg/controller/metrics.go @@ -23,16 +23,22 @@ import ( v1alpha1 "github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1" "github.com/gardener/machine-controller-manager/pkg/metrics" "github.com/prometheus/client_golang/prometheus" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" ) // Describe is method required to implement the prometheus.Collect interface. func (c *controller) Describe(ch chan<- *prometheus.Desc) { - ch <- metrics.MachineCountDesc ch <- metrics.MachineSetCountDesc ch <- metrics.MachineDeploymentCountDesc } +// Collect is method required to implement the prometheus.Collect interface. +func (c *controller) Collect(ch chan<- prometheus.Metric) { + c.CollectMachineSetMetrics(ch) + c.CollectMachineDeploymentMetrics(ch) +} + // CollectMachineDeploymentMetrics is method to collect machineSet related metrics. func (c *controller) CollectMachineDeploymentMetrics(ch chan<- prometheus.Metric) { machineDeploymentList, err := c.machineDeploymentLister.MachineDeployments(c.namespace).List(labels.Everything()) @@ -40,204 +46,238 @@ func (c *controller) CollectMachineDeploymentMetrics(ch chan<- prometheus.Metric metrics.ScrapeFailedCounter.With(prometheus.Labels{"kind": "Machinedeployment-count"}).Inc() return } - metric, err := prometheus.NewConstMetric(metrics.MachineDeploymentCountDesc, prometheus.GaugeValue, float64(len(machineDeploymentList))) - if err != nil { - metrics.ScrapeFailedCounter.With(prometheus.Labels{"kind": "Machinedeployment-count"}).Inc() - return - } - ch <- metric for _, machineDeployment := range machineDeploymentList { - mdMeta := machineDeployment.ObjectMeta mdSpec := machineDeployment.Spec - metrics.MachineDeploymentInfo.With(prometheus.Labels{ - "name": mdMeta.Name, - "namespace": mdMeta.Namespace, - "createdAt": strconv.FormatInt(mdMeta.GetCreationTimestamp().Time.Unix(), 10), - "spec_strategy_type": string(mdSpec.Strategy.Type), - }).Set(float64(1)) + updateMachineDeploymentInfoMetric(mdMeta, mdSpec) + updateMachineDeploymentSpecRelatedMetrics(mdSpec, mdMeta) + updateMachineDeploymentStatusRelatedMetrics(machineDeployment, mdMeta) - var paused float64 - if mdSpec.Paused { - paused = 1 - } - metrics.MachineDeploymentInfoSpecPaused.With(prometheus.Labels{ - "name": mdMeta.Name, - "namespace": mdMeta.Namespace}).Set(paused) + } + updateMachineDeploymentCountMetric(ch, machineDeploymentList) +} - metrics.MachineDeploymentInfoSpecReplicas.With(prometheus.Labels{ - "name": mdMeta.Name, - "namespace": mdMeta.Namespace}).Set(float64(mdSpec.Replicas)) +// CollectMachineSetMetrics is method to collect machineSet related metrics. +func (c *controller) CollectMachineSetMetrics(ch chan<- prometheus.Metric) { + machineSetList, err := c.machineSetLister.MachineSets(c.namespace).List(labels.Everything()) + if err != nil { + metrics.ScrapeFailedCounter.With(prometheus.Labels{"kind": "Machineset-count"}).Inc() + return + } - metrics.MachineDeploymentInfoSpecMinReadySeconds.With(prometheus.Labels{ - "name": mdMeta.Name, - "namespace": mdMeta.Namespace}).Set(float64(mdSpec.MinReadySeconds)) - - if mdSpec.Strategy.Type == v1alpha1.RollingUpdateMachineDeploymentStrategyType { - metrics.MachineDeploymentInfoSpecRollingUpdateMaxSurge.With(prometheus.Labels{ - "name": mdMeta.Name, - "namespace": mdMeta.Namespace}).Set(float64(mdSpec.Strategy.RollingUpdate.MaxSurge.IntValue())) - metrics.MachineDeploymentInfoSpecRollingUpdateMaxUnavailable.With(prometheus.Labels{ - "name": mdMeta.Name, - "namespace": mdMeta.Namespace}).Set(float64(mdSpec.Strategy.RollingUpdate.MaxUnavailable.IntValue())) - } - if mdSpec.RevisionHistoryLimit != nil { - metrics.MachineDeploymentInfoSpecRevisionHistoryLimit.With(prometheus.Labels{ - "name": mdMeta.Name, - "namespace": mdMeta.Namespace}).Set(float64(int64(*mdSpec.RevisionHistoryLimit))) - } - if mdSpec.ProgressDeadlineSeconds != nil { - metrics.MachineDeploymentInfoSpecProgressDeadlineSeconds.With(prometheus.Labels{ - "name": mdMeta.Name, - "namespace": mdMeta.Namespace}).Set(float64(int64(*mdSpec.ProgressDeadlineSeconds))) - } - if mdSpec.RollbackTo != nil { - metrics.MachineDeploymentInfoSpecRollbackToRevision.With(prometheus.Labels{ - "name": mdMeta.Name, - "namespace": mdMeta.Namespace}).Set(float64(mdSpec.RollbackTo.Revision)) - } + updateMachineSetCountMetric(ch, machineSetList) - for _, condition := range machineDeployment.Status.Conditions { - var status float64 - switch condition.Status { - case v1alpha1.ConditionTrue: - status = 1 - case v1alpha1.ConditionFalse: - status = 0 - case v1alpha1.ConditionUnknown: - status = 2 - } - - metrics.MachineDeploymentStatusCondition.With(prometheus.Labels{ - "name": mdMeta.Name, - "namespace": mdMeta.Namespace, - "condition": string(condition.Type), - }).Set(status) - } + for _, machineSet := range machineSetList { + msMeta := machineSet.ObjectMeta + msSpec := machineSet.Spec - statusLabels := prometheus.Labels{ - "name": mdMeta.Name, - "namespace": mdMeta.Namespace, - } - metrics.MachineDeploymentStatusAvailableReplicas.With(statusLabels).Set(float64(machineDeployment.Status.AvailableReplicas)) - metrics.MachineDeploymentStatusUnavailableReplicas.With(statusLabels).Set(float64(machineDeployment.Status.UnavailableReplicas)) - metrics.MachineDeploymentStatusReadyReplicas.With(statusLabels).Set(float64(machineDeployment.Status.ReadyReplicas)) - metrics.MachineDeploymentStatusUpdatedReplicas.With(statusLabels).Set(float64(machineDeployment.Status.UpdatedReplicas)) - metrics.MachineDeploymentStatusReplicas.With(statusLabels).Set(float64(machineDeployment.Status.Replicas)) - - if machineDeployment.Status.CollisionCount != nil { - metrics.MachineDeploymentStatusCollisionCount.With(statusLabels).Set(float64(*machineDeployment.Status.CollisionCount)) + updateMachineSetInfoMetric(msMeta, msSpec) + updateMachineSetSpecRelatedMetrics(msMeta, msSpec) + updateMachineSetStatusRelatedMetric(machineSet, msMeta) + } +} + +func updateMachineSetStatusRelatedMetric(machineSet *v1alpha1.MachineSet, msMeta metav1.ObjectMeta) { + updateMachineSetStatusConditionMetric(machineSet, msMeta) + updateMachineSetStatusFailedMachinesMetric(machineSet, msMeta) + + metrics.MachineSetStatusAvailableReplicas.With(prometheus.Labels{ + "name": msMeta.Name, + "namespace": msMeta.Namespace, + }).Set(float64(machineSet.Status.AvailableReplicas)) + + metrics.MachineSetStatusFullyLabelledReplicas.With(prometheus.Labels{ + "name": msMeta.Name, + "namespace": msMeta.Namespace, + }).Set(float64(machineSet.Status.FullyLabeledReplicas)) + + metrics.MachineSetStatusReadyReplicas.With(prometheus.Labels{ + "name": msMeta.Name, + "namespace": msMeta.Namespace, + }).Set(float64(machineSet.Status.ReadyReplicas)) + + metrics.MachineSetStatusReplicas.With(prometheus.Labels{ + "name": msMeta.Name, + "namespace": msMeta.Namespace, + }).Set(float64(machineSet.Status.ReadyReplicas)) +} + +func updateMachineSetStatusFailedMachinesMetric(machineSet *v1alpha1.MachineSet, msMeta metav1.ObjectMeta) { + if machineSet.Status.FailedMachines != nil { + for _, failedMachine := range *machineSet.Status.FailedMachines { + metrics.MachineSetStatusFailedMachines.With(prometheus.Labels{ + "name": msMeta.Name, + "namespace": msMeta.Namespace, + "failed_machine_name": failedMachine.Name, + "failed_machine_provider_id": failedMachine.ProviderID, + "failed_machine_last_operation_state": string(failedMachine.LastOperation.State), + "failed_machine_last_operation_machine_operation_type": string(failedMachine.LastOperation.Type), + "failed_machine_owner_ref": failedMachine.OwnerRef}).Set(float64(1)) } + } +} - if machineDeployment.Status.FailedMachines != nil { - for _, failedMachine := range machineDeployment.Status.FailedMachines { - metrics.MachineDeploymentStatusFailedMachines.With(prometheus.Labels{ - "name": mdMeta.Name, - "namespace": mdMeta.Namespace, - "failed_machine_name": failedMachine.Name, - "failed_machine_provider_id": failedMachine.ProviderID, - "failed_machine_last_operation_state": string(failedMachine.LastOperation.State), - "failed_machine_last_operation_machine_operation_type": string(failedMachine.LastOperation.Type), - "failed_machine_owner_ref": failedMachine.OwnerRef}).Set(float64(1)) - - } +func updateMachineSetStatusConditionMetric(machineSet *v1alpha1.MachineSet, msMeta metav1.ObjectMeta) { + for _, condition := range machineSet.Status.Conditions { + var status float64 + switch condition.Status { + case v1alpha1.ConditionTrue: + status = 1 + case v1alpha1.ConditionFalse: + status = 0 + case v1alpha1.ConditionUnknown: + status = 2 } + metrics.MachineSetStatusCondition.With(prometheus.Labels{ + "name": msMeta.Name, + "namespace": msMeta.Namespace, + "condition": string(condition.Type), + }).Set(status) } } -// CollectMachineSetMetrics is method to collect machineSet related metrics. -func (c *controller) CollectMachineSetMetrics(ch chan<- prometheus.Metric) { - machineSetList, err := c.machineSetLister.MachineSets(c.namespace).List(labels.Everything()) - if err != nil { - metrics.ScrapeFailedCounter.With(prometheus.Labels{"kind": "Machineset-count"}).Inc() - return - } +func updateMachineSetSpecRelatedMetrics(msMeta metav1.ObjectMeta, msSpec v1alpha1.MachineSetSpec) { + metrics.MachineSetInfoSpecReplicas.With(prometheus.Labels{ + "name": msMeta.Name, + "namespace": msMeta.Namespace}).Set(float64(msSpec.Replicas)) + metrics.MachineSetInfoSpecMinReadySeconds.With(prometheus.Labels{ + "name": msMeta.Name, + "namespace": msMeta.Namespace}).Set(float64(msSpec.MinReadySeconds)) +} + +func updateMachineSetInfoMetric(msMeta metav1.ObjectMeta, msSpec v1alpha1.MachineSetSpec) { + metrics.MachineSetInfo.With(prometheus.Labels{ + "name": msMeta.Name, + "namespace": msMeta.Namespace, + "createdAt": strconv.FormatInt(msMeta.GetCreationTimestamp().Time.Unix(), 10), + "spec_machine_class_api_group": msSpec.MachineClass.APIGroup, + "spec_machine_class_kind": msSpec.MachineClass.Kind, + "spec_machine_class_name": msSpec.MachineClass.Name}).Set(float64(1)) +} + +func updateMachineSetCountMetric(ch chan<- prometheus.Metric, machineSetList []*v1alpha1.MachineSet) { metric, err := prometheus.NewConstMetric(metrics.MachineSetCountDesc, prometheus.GaugeValue, float64(len(machineSetList))) if err != nil { metrics.ScrapeFailedCounter.With(prometheus.Labels{"kind": "Machineset-count"}).Inc() return } ch <- metric +} - for _, machineSet := range machineSetList { +func updateMachineDeploymentStatusRelatedMetrics(machineDeployment *v1alpha1.MachineDeployment, mdMeta metav1.ObjectMeta) { + updateMachineDeploymentStatusConditionMetric(machineDeployment, mdMeta) + updateMachineDeploymentStatusFailedMachinesMetric(machineDeployment, mdMeta) - msMeta := machineSet.ObjectMeta - msSpec := machineSet.Spec + statusLabels := prometheus.Labels{ + "name": mdMeta.Name, + "namespace": mdMeta.Namespace, + } + metrics.MachineDeploymentStatusAvailableReplicas.With(statusLabels).Set(float64(machineDeployment.Status.AvailableReplicas)) + metrics.MachineDeploymentStatusUnavailableReplicas.With(statusLabels).Set(float64(machineDeployment.Status.UnavailableReplicas)) + metrics.MachineDeploymentStatusReadyReplicas.With(statusLabels).Set(float64(machineDeployment.Status.ReadyReplicas)) + metrics.MachineDeploymentStatusUpdatedReplicas.With(statusLabels).Set(float64(machineDeployment.Status.UpdatedReplicas)) + metrics.MachineDeploymentStatusReplicas.With(statusLabels).Set(float64(machineDeployment.Status.Replicas)) + + if machineDeployment.Status.CollisionCount != nil { + metrics.MachineDeploymentStatusCollisionCount.With(statusLabels).Set(float64(*machineDeployment.Status.CollisionCount)) + } +} - metrics.MachineSetInfo.With(prometheus.Labels{ - "name": msMeta.Name, - "namespace": msMeta.Namespace, - "createdAt": strconv.FormatInt(msMeta.GetCreationTimestamp().Time.Unix(), 10), - "spec_machine_class_api_group": msSpec.MachineClass.APIGroup, - "spec_machine_class_kind": msSpec.MachineClass.Kind, - "spec_machine_class_name": msSpec.MachineClass.Name}).Set(float64(1)) +func updateMachineDeploymentStatusFailedMachinesMetric(machineDeployment *v1alpha1.MachineDeployment, mdMeta metav1.ObjectMeta) { + if machineDeployment.Status.FailedMachines != nil { + for _, failedMachine := range machineDeployment.Status.FailedMachines { + metrics.MachineDeploymentStatusFailedMachines.With(prometheus.Labels{ + "name": mdMeta.Name, + "namespace": mdMeta.Namespace, + "failed_machine_name": failedMachine.Name, + "failed_machine_provider_id": failedMachine.ProviderID, + "failed_machine_last_operation_state": string(failedMachine.LastOperation.State), + "failed_machine_last_operation_machine_operation_type": string(failedMachine.LastOperation.Type), + "failed_machine_owner_ref": failedMachine.OwnerRef}).Set(float64(1)) - metrics.MachineSetInfoSpecReplicas.With(prometheus.Labels{ - "name": msMeta.Name, - "namespace": msMeta.Namespace}).Set(float64(msSpec.Replicas)) - metrics.MachineSetInfoSpecMinReadySeconds.With(prometheus.Labels{ - "name": msMeta.Name, - "namespace": msMeta.Namespace}).Set(float64(msSpec.MinReadySeconds)) - - for _, condition := range machineSet.Status.Conditions { - var status float64 - switch condition.Status { - case v1alpha1.ConditionTrue: - status = 1 - case v1alpha1.ConditionFalse: - status = 0 - case v1alpha1.ConditionUnknown: - status = 2 - } - - metrics.MachineSetStatusCondition.With(prometheus.Labels{ - "name": msMeta.Name, - "namespace": msMeta.Namespace, - "condition": string(condition.Type), - }).Set(status) } + } +} - metrics.MachineSetStatusAvailableReplicas.With(prometheus.Labels{ - "name": msMeta.Name, - "namespace": msMeta.Namespace, - }).Set(float64(machineSet.Status.AvailableReplicas)) +func updateMachineDeploymentStatusConditionMetric(machineDeployment *v1alpha1.MachineDeployment, mdMeta metav1.ObjectMeta) { + for _, condition := range machineDeployment.Status.Conditions { + var status float64 + switch condition.Status { + case v1alpha1.ConditionTrue: + status = 1 + case v1alpha1.ConditionFalse: + status = 0 + case v1alpha1.ConditionUnknown: + status = 2 + } - metrics.MachineSetStatusFullyLabelledReplicas.With(prometheus.Labels{ - "name": msMeta.Name, - "namespace": msMeta.Namespace, - }).Set(float64(machineSet.Status.FullyLabeledReplicas)) + metrics.MachineDeploymentStatusCondition.With(prometheus.Labels{ + "name": mdMeta.Name, + "namespace": mdMeta.Namespace, + "condition": string(condition.Type), + }).Set(status) + } +} - metrics.MachineSetStatusReadyReplicas.With(prometheus.Labels{ - "name": msMeta.Name, - "namespace": msMeta.Namespace, - }).Set(float64(machineSet.Status.ReadyReplicas)) +func updateMachineDeploymentSpecRelatedMetrics(mdSpec v1alpha1.MachineDeploymentSpec, mdMeta metav1.ObjectMeta) { + var paused float64 + if mdSpec.Paused { + paused = 1 + } + metrics.MachineDeploymentInfoSpecPaused.With(prometheus.Labels{ + "name": mdMeta.Name, + "namespace": mdMeta.Namespace}).Set(paused) - metrics.MachineSetStatusReplicas.With(prometheus.Labels{ - "name": msMeta.Name, - "namespace": msMeta.Namespace, - }).Set(float64(machineSet.Status.ReadyReplicas)) - - if machineSet.Status.FailedMachines != nil { - - for _, failedMachine := range *machineSet.Status.FailedMachines { - metrics.MachineSetStatusFailedMachines.With(prometheus.Labels{ - "name": msMeta.Name, - "namespace": msMeta.Namespace, - "failed_machine_name": failedMachine.Name, - "failed_machine_provider_id": failedMachine.ProviderID, - "failed_machine_last_operation_state": string(failedMachine.LastOperation.State), - "failed_machine_last_operation_machine_operation_type": string(failedMachine.LastOperation.Type), - "failed_machine_owner_ref": failedMachine.OwnerRef}).Set(float64(1)) - } - } + metrics.MachineDeploymentInfoSpecReplicas.With(prometheus.Labels{ + "name": mdMeta.Name, + "namespace": mdMeta.Namespace}).Set(float64(mdSpec.Replicas)) + + metrics.MachineDeploymentInfoSpecMinReadySeconds.With(prometheus.Labels{ + "name": mdMeta.Name, + "namespace": mdMeta.Namespace}).Set(float64(mdSpec.MinReadySeconds)) + + if mdSpec.Strategy.Type == v1alpha1.RollingUpdateMachineDeploymentStrategyType { + metrics.MachineDeploymentInfoSpecRollingUpdateMaxSurge.With(prometheus.Labels{ + "name": mdMeta.Name, + "namespace": mdMeta.Namespace}).Set(float64(mdSpec.Strategy.RollingUpdate.MaxSurge.IntValue())) + metrics.MachineDeploymentInfoSpecRollingUpdateMaxUnavailable.With(prometheus.Labels{ + "name": mdMeta.Name, + "namespace": mdMeta.Namespace}).Set(float64(mdSpec.Strategy.RollingUpdate.MaxUnavailable.IntValue())) + } + if mdSpec.RevisionHistoryLimit != nil { + metrics.MachineDeploymentInfoSpecRevisionHistoryLimit.With(prometheus.Labels{ + "name": mdMeta.Name, + "namespace": mdMeta.Namespace}).Set(float64(int64(*mdSpec.RevisionHistoryLimit))) + } + if mdSpec.ProgressDeadlineSeconds != nil { + metrics.MachineDeploymentInfoSpecProgressDeadlineSeconds.With(prometheus.Labels{ + "name": mdMeta.Name, + "namespace": mdMeta.Namespace}).Set(float64(int64(*mdSpec.ProgressDeadlineSeconds))) + } + if mdSpec.RollbackTo != nil { + metrics.MachineDeploymentInfoSpecRollbackToRevision.With(prometheus.Labels{ + "name": mdMeta.Name, + "namespace": mdMeta.Namespace}).Set(float64(mdSpec.RollbackTo.Revision)) } } -// Collect is method required to implement the prometheus.Collect interface. -func (c *controller) Collect(ch chan<- prometheus.Metric) { - c.CollectMachineSetMetrics(ch) - c.CollectMachineDeploymentMetrics(ch) +func updateMachineDeploymentInfoMetric(mdMeta metav1.ObjectMeta, mdSpec v1alpha1.MachineDeploymentSpec) { + metrics.MachineDeploymentInfo.With(prometheus.Labels{ + "name": mdMeta.Name, + "namespace": mdMeta.Namespace, + "createdAt": strconv.FormatInt(mdMeta.GetCreationTimestamp().Time.Unix(), 10), + "spec_strategy_type": string(mdSpec.Strategy.Type), + }).Set(float64(1)) +} + +func updateMachineDeploymentCountMetric(ch chan<- prometheus.Metric, machineDeploymentList []*v1alpha1.MachineDeployment) { + metric, err := prometheus.NewConstMetric(metrics.MachineDeploymentCountDesc, prometheus.GaugeValue, float64(len(machineDeploymentList))) + if err != nil { + metrics.ScrapeFailedCounter.With(prometheus.Labels{"kind": "Machinedeployment-count"}).Inc() + return + } + ch <- metric } diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index 29ffca485..5b89ddf72 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -22,42 +22,13 @@ import ( const ( namespace = "mcm" - machineSubsystem = "machine" machinesetSubsystem = "machine_set" machinedeploymentSubsystem = "machine_deployment" - cloudAPISubsystem = "cloud_api" + miscSubsystem = "misc" ) +// variables for subsystem: machine_set var ( - // MachineControllerFrozenDesc is a metric about MachineController's frozen status - MachineControllerFrozenDesc = prometheus.NewDesc("mcm_machine_controller_frozen", "Frozen status of the machine controller manager.", nil, nil) - // MachineCountDesc is a metric about machine count of the mcm manages - MachineCountDesc = prometheus.NewDesc("mcm_machine_items_total", "Count of machines currently managed by the mcm.", nil, nil) - - //MachineCSPhase Current status phase of the Machines currently managed by the mcm. - MachineCSPhase = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: machineSubsystem, - Name: "current_status_phase", - Help: "Current status phase of the Machines currently managed by the mcm.", - }, []string{"name", "namespace"}) - - //MachineInfo Information of the Machines currently managed by the mcm. - MachineInfo = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: machineSubsystem, - Name: "info", - Help: "Information of the Machines currently managed by the mcm.", - }, []string{"name", "namespace", "createdAt", - "spec_provider_id", "spec_class_api_group", "spec_class_kind", "spec_class_name"}) - - // MachineStatusCondition Information of the mcm managed Machines' status conditions - MachineStatusCondition = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: machineSubsystem, - Name: "status_condition", - Help: "Information of the mcm managed Machines' status conditions.", - }, []string{"name", "namespace", "condition"}) // MachineSetCountDesc Count of machinesets currently managed by the mcm MachineSetCountDesc = prometheus.NewDesc("mcm_machine_set_items_total", "Count of machinesets currently managed by the mcm.", nil, nil) @@ -136,7 +107,10 @@ var ( Name: "status_replicas", Help: "Information of the mcm managed Machinesets' status for replicas.", }, []string{"name", "namespace"}) +) +// variables for subsystem: machine_deployment +var ( // MachineDeploymentCountDesc Count of machinedeployments currently managed by the mcm. MachineDeploymentCountDesc = prometheus.NewDesc("mcm_machine_deployment_items_total", "Count of machinedeployments currently managed by the mcm.", nil, nil) @@ -277,38 +251,21 @@ var ( }, []string{"name", "namespace", "failed_machine_name", "failed_machine_provider_id", "failed_machine_owner_ref", "failed_machine_last_operation_state", "failed_machine_last_operation_machine_operation_type"}) +) - // APIRequestCount Number of Cloud Service API requests, partitioned by provider, and service. - APIRequestCount = prometheus.NewCounterVec(prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: cloudAPISubsystem, - Name: "requests_total", - Help: "Number of Cloud Service API requests, partitioned by provider, and service.", - }, []string{"provider", "service"}, - ) - - // APIFailedRequestCount Number of Failed Cloud Service API requests, partitioned by provider, and service. - APIFailedRequestCount = prometheus.NewCounterVec(prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: cloudAPISubsystem, - Name: "requests_failed_total", - Help: "Number of Failed Cloud Service API requests, partitioned by provider, and service.", - }, []string{"provider", "service"}, - ) - +// variables for subsystem: misc +var ( // ScrapeFailedCounter is a Prometheus metric, which counts errors during metrics collection. ScrapeFailedCounter = prometheus.NewCounterVec(prometheus.CounterOpts{ - Namespace: namespace, - Name: "scrape_failure_total", - Help: "Total count of scrape failures.", + Namespace: namespace, + Subsystem: miscSubsystem, + Name: "scrape_failure_total", + Help: "Total count of scrape failures.", + ConstLabels: map[string]string{"binary": "machine-controller-manager"}, }, []string{"kind"}) ) -func init() { - prometheus.MustRegister(ScrapeFailedCounter) - prometheus.MustRegister(MachineInfo) - prometheus.MustRegister(MachineStatusCondition) - prometheus.MustRegister(MachineCSPhase) +func registerMachineSetSubsystemMetrics() { prometheus.MustRegister(MachineSetInfo) prometheus.MustRegister(MachineSetInfoSpecReplicas) prometheus.MustRegister(MachineSetInfoSpecMinReadySeconds) @@ -318,6 +275,9 @@ func init() { prometheus.MustRegister(MachineSetStatusReplicas) prometheus.MustRegister(MachineSetStatusCondition) prometheus.MustRegister(MachineSetStatusFailedMachines) +} + +func registerMachineDeploymentSubsystemMetrics() { prometheus.MustRegister(MachineDeploymentInfo) prometheus.MustRegister(MachineDeploymentInfoSpecPaused) prometheus.MustRegister(MachineDeploymentInfoSpecReplicas) @@ -335,6 +295,14 @@ func init() { prometheus.MustRegister(MachineDeploymentStatusCollisionCount) prometheus.MustRegister(MachineDeploymentStatusReplicas) prometheus.MustRegister(MachineDeploymentStatusFailedMachines) - prometheus.MustRegister(APIRequestCount) - prometheus.MustRegister(APIFailedRequestCount) +} + +func registerMiscellaneousMetrics() { + prometheus.MustRegister(ScrapeFailedCounter) +} + +func init() { + registerMachineSetSubsystemMetrics() + registerMachineDeploymentSubsystemMetrics() + registerMiscellaneousMetrics() } diff --git a/pkg/util/provider/machinecontroller/metrics.go b/pkg/util/provider/machinecontroller/metrics.go index 7dfcf29be..c181f5981 100644 --- a/pkg/util/provider/machinecontroller/metrics.go +++ b/pkg/util/provider/machinecontroller/metrics.go @@ -24,6 +24,7 @@ import ( "github.com/gardener/machine-controller-manager/pkg/util/provider/metrics" "github.com/prometheus/client_golang/prometheus" v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" ) @@ -32,213 +33,13 @@ func (c *controller) Describe(ch chan<- *prometheus.Desc) { ch <- metrics.MachineCountDesc } -/* -TODO: Fix metric collection -// CollectMachineDeploymentMetrics is method to collect machineSet related metrics. -func (c *controller) CollectMachineDeploymentMetrics(ch chan<- prometheus.Metric) { - machineDeploymentList, err := c.machineDeploymentLister.MachineDeployments(c.namespace).List(labels.Everything()) - if err != nil { - metrics.ScrapeFailedCounter.With(prometheus.Labels{"kind": "Machinedeployment-count"}).Inc() - return - } - metric, err := prometheus.NewConstMetric(metrics.MachineDeploymentCountDesc, prometheus.GaugeValue, float64(len(machineDeploymentList))) - if err != nil { - metrics.ScrapeFailedCounter.With(prometheus.Labels{"kind": "Machinedeployment-count"}).Inc() - return - } - ch <- metric - - for _, machineDeployment := range machineDeploymentList { - - mdMeta := machineDeployment.ObjectMeta - mdSpec := machineDeployment.Spec - - metrics.MachineDeploymentInfo.With(prometheus.Labels{ - "name": mdMeta.Name, - "namespace": mdMeta.Namespace, - "createdAt": strconv.FormatInt(mdMeta.GetCreationTimestamp().Time.Unix(), 10), - "spec_strategy_type": string(mdSpec.Strategy.Type), - }).Set(float64(1)) - - var paused float64 - if mdSpec.Paused { - paused = 1 - } - metrics.MachineDeploymentInfoSpecPaused.With(prometheus.Labels{ - "name": mdMeta.Name, - "namespace": mdMeta.Namespace}).Set(paused) - - metrics.MachineDeploymentInfoSpecReplicas.With(prometheus.Labels{ - "name": mdMeta.Name, - "namespace": mdMeta.Namespace}).Set(float64(mdSpec.Replicas)) - - metrics.MachineDeploymentInfoSpecMinReadySeconds.With(prometheus.Labels{ - "name": mdMeta.Name, - "namespace": mdMeta.Namespace}).Set(float64(mdSpec.MinReadySeconds)) - - if mdSpec.Strategy.Type == v1alpha1.RollingUpdateMachineDeploymentStrategyType { - metrics.MachineDeploymentInfoSpecRollingUpdateMaxSurge.With(prometheus.Labels{ - "name": mdMeta.Name, - "namespace": mdMeta.Namespace}).Set(float64(mdSpec.Strategy.RollingUpdate.MaxSurge.IntValue())) - metrics.MachineDeploymentInfoSpecRollingUpdateMaxUnavailable.With(prometheus.Labels{ - "name": mdMeta.Name, - "namespace": mdMeta.Namespace}).Set(float64(mdSpec.Strategy.RollingUpdate.MaxUnavailable.IntValue())) - } - if mdSpec.RevisionHistoryLimit != nil { - metrics.MachineDeploymentInfoSpecRevisionHistoryLimit.With(prometheus.Labels{ - "name": mdMeta.Name, - "namespace": mdMeta.Namespace}).Set(float64(int64(*mdSpec.RevisionHistoryLimit))) - } - if mdSpec.ProgressDeadlineSeconds != nil { - metrics.MachineDeploymentInfoSpecProgressDeadlineSeconds.With(prometheus.Labels{ - "name": mdMeta.Name, - "namespace": mdMeta.Namespace}).Set(float64(int64(*mdSpec.ProgressDeadlineSeconds))) - } - if mdSpec.RollbackTo != nil { - metrics.MachineDeploymentInfoSpecRollbackToRevision.With(prometheus.Labels{ - "name": mdMeta.Name, - "namespace": mdMeta.Namespace}).Set(float64(mdSpec.RollbackTo.Revision)) - } - - for _, condition := range machineDeployment.Status.Conditions { - var status float64 - switch condition.Status { - case v1alpha1.ConditionTrue: - status = 1 - case v1alpha1.ConditionFalse: - status = 0 - case v1alpha1.ConditionUnknown: - status = 2 - } - - metrics.MachineDeploymentStatusCondition.With(prometheus.Labels{ - "name": mdMeta.Name, - "namespace": mdMeta.Namespace, - "condition": string(condition.Type), - }).Set(status) - } - - statusLabels := prometheus.Labels{ - "name": mdMeta.Name, - "namespace": mdMeta.Namespace, - } - metrics.MachineDeploymentStatusAvailableReplicas.With(statusLabels).Set(float64(machineDeployment.Status.AvailableReplicas)) - metrics.MachineDeploymentStatusUnavailableReplicas.With(statusLabels).Set(float64(machineDeployment.Status.UnavailableReplicas)) - metrics.MachineDeploymentStatusReadyReplicas.With(statusLabels).Set(float64(machineDeployment.Status.ReadyReplicas)) - metrics.MachineDeploymentStatusUpdatedReplicas.With(statusLabels).Set(float64(machineDeployment.Status.UpdatedReplicas)) - metrics.MachineDeploymentStatusReplicas.With(statusLabels).Set(float64(machineDeployment.Status.Replicas)) - - if machineDeployment.Status.CollisionCount != nil { - metrics.MachineDeploymentStatusCollisionCount.With(statusLabels).Set(float64(*machineDeployment.Status.CollisionCount)) - } - - if machineDeployment.Status.FailedMachines != nil { - for _, failedMachine := range machineDeployment.Status.FailedMachines { - metrics.MachineDeploymentStatusFailedMachines.With(prometheus.Labels{ - "name": mdMeta.Name, - "namespace": mdMeta.Namespace, - "failed_machine_name": failedMachine.Name, - "failed_machine_provider_id": failedMachine.ProviderID, - "failed_machine_last_operation_state": string(failedMachine.LastOperation.State), - "failed_machine_last_operation_machine_operation_type": string(failedMachine.LastOperation.Type), - "failed_machine_owner_ref": failedMachine.OwnerRef}).Set(float64(1)) - - } - } - - } -} - -// CollectMachineSetMetrics is method to collect machineSet related metrics. -func (c *controller) CollectMachineSetMetrics(ch chan<- prometheus.Metric) { - machineSetList, err := c.machineSetLister.MachineSets(c.namespace).List(labels.Everything()) - if err != nil { - metrics.ScrapeFailedCounter.With(prometheus.Labels{"kind": "Machineset-count"}).Inc() - return - } - metric, err := prometheus.NewConstMetric(metrics.MachineSetCountDesc, prometheus.GaugeValue, float64(len(machineSetList))) - if err != nil { - metrics.ScrapeFailedCounter.With(prometheus.Labels{"kind": "Machineset-count"}).Inc() - return - } - ch <- metric - - for _, machineSet := range machineSetList { - - msMeta := machineSet.ObjectMeta - msSpec := machineSet.Spec - - metrics.MachineSetInfo.With(prometheus.Labels{ - "name": msMeta.Name, - "namespace": msMeta.Namespace, - "createdAt": strconv.FormatInt(msMeta.GetCreationTimestamp().Time.Unix(), 10), - "spec_machine_class_api_group": msSpec.MachineClass.APIGroup, - "spec_machine_class_kind": msSpec.MachineClass.Kind, - "spec_machine_class_name": msSpec.MachineClass.Name}).Set(float64(1)) - - metrics.MachineSetInfoSpecReplicas.With(prometheus.Labels{ - "name": msMeta.Name, - "namespace": msMeta.Namespace}).Set(float64(msSpec.Replicas)) - metrics.MachineSetInfoSpecMinReadySeconds.With(prometheus.Labels{ - "name": msMeta.Name, - "namespace": msMeta.Namespace}).Set(float64(msSpec.MinReadySeconds)) - - for _, condition := range machineSet.Status.Conditions { - var status float64 - switch condition.Status { - case v1alpha1.ConditionTrue: - status = 1 - case v1alpha1.ConditionFalse: - status = 0 - case v1alpha1.ConditionUnknown: - status = 2 - } - - metrics.MachineSetStatusCondition.With(prometheus.Labels{ - "name": msMeta.Name, - "namespace": msMeta.Namespace, - "condition": string(condition.Type), - }).Set(status) - } - - metrics.MachineSetStatusAvailableReplicas.With(prometheus.Labels{ - "name": msMeta.Name, - "namespace": msMeta.Namespace, - }).Set(float64(machineSet.Status.AvailableReplicas)) - - metrics.MachineSetStatusFullyLabelledReplicas.With(prometheus.Labels{ - "name": msMeta.Name, - "namespace": msMeta.Namespace, - }).Set(float64(machineSet.Status.FullyLabeledReplicas)) - - metrics.MachineSetStatusReadyReplicas.With(prometheus.Labels{ - "name": msMeta.Name, - "namespace": msMeta.Namespace, - }).Set(float64(machineSet.Status.ReadyReplicas)) - - metrics.MachineSetStatusReplicas.With(prometheus.Labels{ - "name": msMeta.Name, - "namespace": msMeta.Namespace, - }).Set(float64(machineSet.Status.ReadyReplicas)) - - if machineSet.Status.FailedMachines != nil { - - for _, failedMachine := range *machineSet.Status.FailedMachines { - metrics.MachineSetStatusFailedMachines.With(prometheus.Labels{ - "name": msMeta.Name, - "namespace": msMeta.Namespace, - "failed_machine_name": failedMachine.Name, - "failed_machine_provider_id": failedMachine.ProviderID, - "failed_machine_last_operation_state": string(failedMachine.LastOperation.State), - "failed_machine_last_operation_machine_operation_type": string(failedMachine.LastOperation.Type), - "failed_machine_owner_ref": failedMachine.OwnerRef}).Set(float64(1)) - } - } - } +// Collect is method required to implement the prometheus.Collect interface. +func (c *controller) Collect(ch chan<- prometheus.Metric) { + c.CollectMachineMetrics(ch) + c.CollectMachineControllerFrozenStatusMetrics(ch) } -*/ -// CollectMachines is method to collect Machine related metrics. +// CollectMachineMetrics is method to collect Machine related metrics. func (c *controller) CollectMachineMetrics(ch chan<- prometheus.Metric) { // Collect the count of machines managed by the mcm. machineList, err := c.machineLister.Machines(c.namespace).List(labels.Everything()) @@ -250,72 +51,30 @@ func (c *controller) CollectMachineMetrics(ch chan<- prometheus.Metric) { for _, machine := range machineList { mMeta := machine.ObjectMeta mSpec := machine.Spec + updateMachineInfoMetric(mMeta, mSpec) + updateMachineStatusConditionMetric(machine, mMeta) + updateMachineCSPhaseMetric(machine, mMeta) + } - metrics.MachineInfo.With(prometheus.Labels{ - "name": mMeta.Name, - "namespace": mMeta.Namespace, - "createdAt": strconv.FormatInt(mMeta.GetCreationTimestamp().Time.Unix(), 10), - "spec_provider_id": mSpec.ProviderID, - "spec_class_api_group": mSpec.Class.APIGroup, - "spec_class_kind": mSpec.Class.Kind, - "spec_class_name": mSpec.Class.Name}).Set(float64(1)) - - for _, condition := range machine.Status.Conditions { - var status float64 - switch condition.Status { - case v1.ConditionTrue: - status = 1 - case v1.ConditionFalse: - status = 0 - case v1.ConditionUnknown: - status = 2 - } - - metrics.MachineStatusCondition.With(prometheus.Labels{ - "name": mMeta.Name, - "namespace": mMeta.Namespace, - "condition": string(condition.Type), - }).Set(status) - } - - var phase float64 - switch machine.Status.CurrentStatus.Phase { - case v1alpha1.MachinePending: - phase = -2 - case v1alpha1.MachineAvailable: - phase = -1 - case v1alpha1.MachineRunning: - phase = 0 - case v1alpha1.MachineTerminating: - phase = 1 - case v1alpha1.MachineUnknown: - phase = 2 - case v1alpha1.MachineFailed: - phase = 3 - } - metrics.MachineCSPhase.With(prometheus.Labels{ - "name": mMeta.Name, - "namespace": mMeta.Namespace, - }).Set(phase) + updateMachineCountMetric(ch, machineList) +} +// CollectMachineControllerFrozenStatusMetrics is method to collect Machine controller state related metrics. +func (c *controller) CollectMachineControllerFrozenStatusMetrics(ch chan<- prometheus.Metric) { + var frozenStatus float64 + if c.safetyOptions.MachineControllerFrozen { + frozenStatus = 1 } - - metric, err := prometheus.NewConstMetric(metrics.MachineCountDesc, prometheus.GaugeValue, float64(len(machineList))) + metric, err := prometheus.NewConstMetric(metrics.MachineControllerFrozenDesc, prometheus.GaugeValue, frozenStatus) if err != nil { metrics.ScrapeFailedCounter.With(prometheus.Labels{"kind": "Machine-count"}).Inc() return } ch <- metric - } -// CollectMachines is method to collect Machine related metrics. -func (c *controller) CollectMachineControllerFrozenStatus(ch chan<- prometheus.Metric) { - var frozenStatus float64 - if c.safetyOptions.MachineControllerFrozen { - frozenStatus = 1 - } - metric, err := prometheus.NewConstMetric(metrics.MachineControllerFrozenDesc, prometheus.GaugeValue, frozenStatus) +func updateMachineCountMetric(ch chan<- prometheus.Metric, machineList []*v1alpha1.Machine) { + metric, err := prometheus.NewConstMetric(metrics.MachineCountDesc, prometheus.GaugeValue, float64(len(machineList))) if err != nil { metrics.ScrapeFailedCounter.With(prometheus.Labels{"kind": "Machine-count"}).Inc() return @@ -323,10 +82,55 @@ func (c *controller) CollectMachineControllerFrozenStatus(ch chan<- prometheus.M ch <- metric } -// Collect is method required to implement the prometheus.Collect interface. -func (c *controller) Collect(ch chan<- prometheus.Metric) { - c.CollectMachineMetrics(ch) - //c.CollectMachineSetMetrics(ch) - //c.CollectMachineDeploymentMetrics(ch) - c.CollectMachineControllerFrozenStatus(ch) +func updateMachineCSPhaseMetric(machine *v1alpha1.Machine, mMeta metav1.ObjectMeta) { + var phase float64 + switch machine.Status.CurrentStatus.Phase { + case v1alpha1.MachineTerminating: + phase = -4 + case v1alpha1.MachineFailed: + phase = -3 + case v1alpha1.MachineCrashLoopBackOff: + phase = -2 + case v1alpha1.MachineUnknown: + phase = -1 + case v1alpha1.MachinePending: + phase = 0 + case v1alpha1.MachineRunning: + phase = 1 + } + metrics.MachineCSPhase.With(prometheus.Labels{ + "name": mMeta.Name, + "namespace": mMeta.Namespace, + }).Set(phase) +} + +func updateMachineStatusConditionMetric(machine *v1alpha1.Machine, mMeta metav1.ObjectMeta) { + for _, condition := range machine.Status.Conditions { + var status float64 + switch condition.Status { + case v1.ConditionTrue: + status = 1 + case v1.ConditionFalse: + status = 0 + case v1.ConditionUnknown: + status = 2 + } + + metrics.MachineStatusCondition.With(prometheus.Labels{ + "name": mMeta.Name, + "namespace": mMeta.Namespace, + "condition": string(condition.Type), + }).Set(status) + } +} + +func updateMachineInfoMetric(mMeta metav1.ObjectMeta, mSpec v1alpha1.MachineSpec) { + metrics.MachineInfo.With(prometheus.Labels{ + "name": mMeta.Name, + "namespace": mMeta.Namespace, + "createdAt": strconv.FormatInt(mMeta.GetCreationTimestamp().Time.Unix(), 10), + "spec_provider_id": mSpec.ProviderID, + "spec_class_api_group": mSpec.Class.APIGroup, + "spec_class_kind": mSpec.Class.Kind, + "spec_class_name": mSpec.Class.Name}).Set(float64(1)) } diff --git a/pkg/util/provider/metrics/metrics.go b/pkg/util/provider/metrics/metrics.go index 5eaf36979..50033312a 100644 --- a/pkg/util/provider/metrics/metrics.go +++ b/pkg/util/provider/metrics/metrics.go @@ -18,24 +18,24 @@ package metrics import ( "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" ) const ( - namespace = "mcm" - machineSubsystem = "machine" - machinesetSubsystem = "machine_set" - machinedeploymentSubsystem = "machine_deployment" - cloudAPISubsystem = "cloud_api" + namespace = "mcm" + machineSubsystem = "machine" + cloudAPISubsystem = "cloud_api" + miscSubsystem = "misc" ) +// variables for subsystem: machine var ( // MachineControllerFrozenDesc is a metric about MachineController's frozen status MachineControllerFrozenDesc = prometheus.NewDesc("mcm_machine_controller_frozen", "Frozen status of the machine controller manager.", nil, nil) + // MachineCountDesc is a metric about machine count of the mcm manages MachineCountDesc = prometheus.NewDesc("mcm_machine_items_total", "Count of machines currently managed by the mcm.", nil, nil) - //MachineCSPhase Current status phase of the Machines currently managed by the mcm. + // MachineCSPhase Current status phase of the Machines currently managed by the mcm. MachineCSPhase = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Namespace: namespace, Subsystem: machineSubsystem, @@ -43,7 +43,7 @@ var ( Help: "Current status phase of the Machines currently managed by the mcm.", }, []string{"name", "namespace"}) - //MachineInfo Information of the Machines currently managed by the mcm. + // MachineInfo Information of the Machines currently managed by the mcm. MachineInfo = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Namespace: namespace, Subsystem: machineSubsystem, @@ -59,228 +59,10 @@ var ( Name: "status_condition", Help: "Information of the mcm managed Machines' status conditions.", }, []string{"name", "namespace", "condition"}) +) - /* - // MachineSetCountDesc Count of machinesets currently managed by the mcm - MachineSetCountDesc = prometheus.NewDesc("mcm_machine_set_items_total", "Count of machinesets currently managed by the mcm.", nil, nil) - - // MachineSetInfo Information of the Machinesets currently managed by the mcm. - MachineSetInfo = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: machinesetSubsystem, - Name: "info", - Help: "Information of the Machinesets currently managed by the mcm.", - }, []string{"name", "namespace", "createdAt", - "spec_machine_class_api_group", "spec_machine_class_kind", "spec_machine_class_name"}) - - // MachineSetInfoSpecReplicas Count of the Machinesets Spec Replicas. - MachineSetInfoSpecReplicas = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: machinesetSubsystem, - Name: "info_spec_replicas", - Help: "Count of the Machinesets Spec Replicas.", - }, []string{"name", "namespace"}) - - // MachineSetInfoSpecMinReadySeconds Information of the Machinesets currently managed by the mcm. - MachineSetInfoSpecMinReadySeconds = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: machinesetSubsystem, - Name: "info_spec_min_ready_seconds", - Help: "Information of the Machinesets currently managed by the mcm.", - }, []string{"name", "namespace"}) - - // MachineSetStatusCondition Information of the mcm managed Machinesets' status conditions. - MachineSetStatusCondition = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: machinesetSubsystem, - Name: "status_condition", - Help: "Information of the mcm managed Machinesets' status conditions.", - }, []string{"name", "namespace", "condition"}) - - // MachineSetStatusFailedMachines Information of the mcm managed Machinesets' failed machines. - MachineSetStatusFailedMachines = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: machinesetSubsystem, - Name: "failed_machines", - Help: "Information of the mcm managed Machinesets' failed machines.", - }, []string{"name", "namespace", "failed_machine_name", "failed_machine_provider_id", "failed_machine_owner_ref", - "failed_machine_last_operation_state", - "failed_machine_last_operation_machine_operation_type"}) - - // MachineSetStatusAvailableReplicas Information of the mcm managed Machinesets' status for available replicas. - MachineSetStatusAvailableReplicas = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: machinesetSubsystem, - Name: "status_available_replicas", - Help: "Information of the mcm managed Machinesets' status for available replicas.", - }, []string{"name", "namespace"}) - - // MachineSetStatusFullyLabelledReplicas Information of the mcm managed Machinesets' status for fully labelled replicas. - MachineSetStatusFullyLabelledReplicas = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: machinesetSubsystem, - Name: "status_fully_labelled_replicas", - Help: "Information of the mcm managed Machinesets' status for fully labelled replicas.", - }, []string{"name", "namespace"}) - - // MachineSetStatusReadyReplicas Information of the mcm managed Machinesets' status for ready replicas - MachineSetStatusReadyReplicas = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: machinesetSubsystem, - Name: "status_ready_replicas", - Help: "Information of the mcm managed Machinesets' status for ready replicas.", - }, []string{"name", "namespace"}) - - // MachineSetStatusReplicas Information of the mcm managed Machinesets' status for replicas. - MachineSetStatusReplicas = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: machinesetSubsystem, - Name: "status_replicas", - Help: "Information of the mcm managed Machinesets' status for replicas.", - }, []string{"name", "namespace"}) - - // MachineDeploymentCountDesc Count of machinedeployments currently managed by the mcm. - MachineDeploymentCountDesc = prometheus.NewDesc("mcm_machine_deployment_items_total", "Count of machinedeployments currently managed by the mcm.", nil, nil) - - // MachineDeploymentInfo Information of the Machinedeployments currently managed by the mcm. - MachineDeploymentInfo = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: machinedeploymentSubsystem, - Name: "info", - Help: "Information of the Machinedeployments currently managed by the mcm.", - }, []string{"name", "namespace", "createdAt", "spec_strategy_type"}) - - // MachineDeploymentInfoSpecPaused Information of the Machinedeployments paused status. - MachineDeploymentInfoSpecPaused = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: machinedeploymentSubsystem, - Name: "info_spec_paused", - Help: "Information of the Machinedeployments paused status.", - }, []string{"name", "namespace"}) - - // MachineDeploymentInfoSpecReplicas Information of the Machinedeployments spec replicas. - MachineDeploymentInfoSpecReplicas = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: machinedeploymentSubsystem, - Name: "info_spec_replicas", - Help: "Information of the Machinedeployments spec replicas.", - }, []string{"name", "namespace"}) - - // MachineDeploymentInfoSpecMinReadySeconds Information of the Machinedeployments spec min ready seconds. - MachineDeploymentInfoSpecMinReadySeconds = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: machinedeploymentSubsystem, - Name: "info_spec_min_ready_seconds", - Help: "Information of the Machinedeployments spec min ready seconds.", - }, []string{"name", "namespace"}) - - // MachineDeploymentInfoSpecRollingUpdateMaxSurge Information of the Machinedeployments spec rolling update max surge. - MachineDeploymentInfoSpecRollingUpdateMaxSurge = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: machinedeploymentSubsystem, - Name: "info_spec_rolling_update_max_surge", - Help: "Information of the Machinedeployments spec rolling update max surge.", - }, []string{"name", "namespace"}) - - // MachineDeploymentInfoSpecRollingUpdateMaxUnavailable Information of the Machinedeployments spec rolling update max unavailable. - MachineDeploymentInfoSpecRollingUpdateMaxUnavailable = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: machinedeploymentSubsystem, - Name: "info_spec_rolling_update_max_unavailable", - Help: "Information of the Machinedeployments spec rolling update max unavailable.", - }, []string{"name", "namespace"}) - - // MachineDeploymentInfoSpecRevisionHistoryLimit Information of the Machinedeployments spec revision history limit. - MachineDeploymentInfoSpecRevisionHistoryLimit = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: machinedeploymentSubsystem, - Name: "info_spec_revision_history_limit", - Help: "Information of the Machinedeployments spec revision history limit.", - }, []string{"name", "namespace"}) - - // MachineDeploymentInfoSpecProgressDeadlineSeconds Information of the Machinedeployments spec deadline seconds. - MachineDeploymentInfoSpecProgressDeadlineSeconds = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: machinedeploymentSubsystem, - Name: "info_spec_progress_deadline_seconds", - Help: "Information of the Machinedeployments spec deadline seconds.", - }, []string{"name", "namespace"}) - - // MachineDeploymentInfoSpecRollbackToRevision Information of the Machinedeployments spec rollback to revision. - MachineDeploymentInfoSpecRollbackToRevision = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: machinedeploymentSubsystem, - Name: "info_spec_rollback_to_revision", - Help: "Information of the Machinedeployments spec rollback to revision.", - }, []string{"name", "namespace"}) - - // MachineDeploymentStatusCondition Information of the mcm managed Machinedeployments' status conditions. - MachineDeploymentStatusCondition = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: machinedeploymentSubsystem, - Name: "status_condition", - Help: "Information of the mcm managed Machinedeployments' status conditions.", - }, []string{"name", "namespace", "condition"}) - - // MachineDeploymentStatusAvailableReplicas Count of the mcm managed Machinedeployments available replicas. - MachineDeploymentStatusAvailableReplicas = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: machinedeploymentSubsystem, - Name: "status_available_replicas", - Help: "Count of the mcm managed Machinedeployments available replicas.", - }, []string{"name", "namespace"}) - - // MachineDeploymentStatusUnavailableReplicas Count of the mcm managed Machinedeployments unavailable replicas. - MachineDeploymentStatusUnavailableReplicas = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: machinedeploymentSubsystem, - Name: "status_unavailable_replicas", - Help: "Count of the mcm managed Machinedeployments unavailable replicas.", - }, []string{"name", "namespace"}) - - // MachineDeploymentStatusReadyReplicas Count of the mcm managed Machinedeployments ready replicas. - MachineDeploymentStatusReadyReplicas = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: machinedeploymentSubsystem, - Name: "status_ready_replicas", - Help: "Count of the mcm managed Machinedeployments ready replicas.", - }, []string{"name", "namespace"}) - - // MachineDeploymentStatusUpdatedReplicas Count of the mcm managed Machinedeployments updated replicas. - MachineDeploymentStatusUpdatedReplicas = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: machinedeploymentSubsystem, - Name: "status_updated_replicas", - Help: "Count of the mcm managed Machinedeployments updated replicas.", - }, []string{"name", "namespace"}) - - // MachineDeploymentStatusCollisionCount Mcm managed Machinedeployments collision count. - MachineDeploymentStatusCollisionCount = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: machinedeploymentSubsystem, - Name: "status_collision_count", - Help: "Mcm managed Machinedeployments collision count.", - }, []string{"name", "namespace"}) - - // MachineDeploymentStatusReplicas Count of the mcm managed Machinedeployments replicas. - MachineDeploymentStatusReplicas = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: machinedeploymentSubsystem, - Name: "status_replicas", - Help: "Count of the mcm managed Machinedeployments replicas.", - }, []string{"name", "namespace"}) - - // MachineDeploymentStatusFailedMachines Information of the mcm managed Machinedeployments' failed machines. - MachineDeploymentStatusFailedMachines = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: machinedeploymentSubsystem, - Name: "failed_machines", - Help: "Information of the mcm managed Machinedeployments' failed machines.", - }, []string{"name", "namespace", "failed_machine_name", "failed_machine_provider_id", "failed_machine_owner_ref", - "failed_machine_last_operation_state", - "failed_machine_last_operation_machine_operation_type"}) - */ - +// variables for subsystem: cloud_api +var ( // APIRequestCount Number of Cloud Service API requests, partitioned by provider, and service. APIRequestCount = prometheus.NewCounterVec(prometheus.CounterOpts{ Namespace: namespace, @@ -298,87 +80,37 @@ var ( Help: "Number of Failed Cloud Service API requests, partitioned by provider, and service.", }, []string{"provider", "service"}, ) +) +// variables for subsystem: misc +var ( // ScrapeFailedCounter is a Prometheus metric, which counts errors during metrics collection. ScrapeFailedCounter = prometheus.NewCounterVec(prometheus.CounterOpts{ - Namespace: namespace, - Name: "scrape_failure_total", - Help: "Total count of scrape failures.", + Namespace: namespace, + Subsystem: miscSubsystem, + Name: "scrape_failure_total", + Help: "Total count of scrape failures.", + ConstLabels: map[string]string{"binary": "machine-controller-manager-provider"}, }, []string{"kind"}) ) -func init() { - reg := prometheus.NewRegistry() - promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ - Namespace: namespace, - Name: "scrape_failure_total", - Help: "Total count of scrape failures.", - }, []string{"kind"}) - - promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: machineSubsystem, - Name: "info", - Help: "Information of the Machines currently managed by the mcm.", - }, []string{"name", "namespace", "createdAt", - "spec_provider_id", "spec_class_api_group", "spec_class_kind", "spec_class_name"}) - - promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: machineSubsystem, - Name: "status_condition", - Help: "Information of the mcm managed Machines' status conditions.", - }, []string{"name", "namespace", "condition"}) +func registerMachineSubsystemMetrics() { + prometheus.MustRegister(MachineInfo) + prometheus.MustRegister(MachineStatusCondition) + prometheus.MustRegister(MachineCSPhase) +} - promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: machineSubsystem, - Name: "current_status_phase", - Help: "Current status phase of the Machines currently managed by the mcm.", - }, []string{"name", "namespace"}) +func registerCloudAPISubsystemMetrics() { + prometheus.MustRegister(APIRequestCount) + prometheus.MustRegister(APIFailedRequestCount) +} - //register below commented collectors with promauto only not with MustRegister - /* - prometheus.MustRegister(MachineSetInfo) - prometheus.MustRegister(MachineSetInfoSpecReplicas) - prometheus.MustRegister(MachineSetInfoSpecMinReadySeconds) - prometheus.MustRegister(MachineSetStatusAvailableReplicas) - prometheus.MustRegister(MachineSetStatusFullyLabelledReplicas) - prometheus.MustRegister(MachineSetStatusReadyReplicas) - prometheus.MustRegister(MachineSetStatusReplicas) - prometheus.MustRegister(MachineSetStatusCondition) - prometheus.MustRegister(MachineSetStatusFailedMachines) - prometheus.MustRegister(MachineDeploymentInfo) - prometheus.MustRegister(MachineDeploymentInfoSpecPaused) - prometheus.MustRegister(MachineDeploymentInfoSpecReplicas) - prometheus.MustRegister(MachineDeploymentInfoSpecRevisionHistoryLimit) - prometheus.MustRegister(MachineDeploymentInfoSpecMinReadySeconds) - prometheus.MustRegister(MachineDeploymentInfoSpecRollingUpdateMaxSurge) - prometheus.MustRegister(MachineDeploymentInfoSpecRollingUpdateMaxUnavailable) - prometheus.MustRegister(MachineDeploymentInfoSpecProgressDeadlineSeconds) - prometheus.MustRegister(MachineDeploymentInfoSpecRollbackToRevision) - prometheus.MustRegister(MachineDeploymentStatusCondition) - prometheus.MustRegister(MachineDeploymentStatusAvailableReplicas) - prometheus.MustRegister(MachineDeploymentStatusUnavailableReplicas) - prometheus.MustRegister(MachineDeploymentStatusReadyReplicas) - prometheus.MustRegister(MachineDeploymentStatusUpdatedReplicas) - prometheus.MustRegister(MachineDeploymentStatusCollisionCount) - prometheus.MustRegister(MachineDeploymentStatusReplicas) - prometheus.MustRegister(MachineDeploymentStatusFailedMachines) - */ - promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: cloudAPISubsystem, - Name: "requests_total", - Help: "Number of Cloud Service API requests, partitioned by provider, and service.", - }, []string{"provider", "service"}, - ) +func registerMiscellaneousMetrics() { + prometheus.MustRegister(ScrapeFailedCounter) +} - promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: cloudAPISubsystem, - Name: "requests_failed_total", - Help: "Number of Failed Cloud Service API requests, partitioned by provider, and service.", - }, []string{"provider", "service"}, - ) +func init() { + registerMachineSubsystemMetrics() + registerCloudAPISubsystemMetrics() + registerMiscellaneousMetrics() }