Skip to content

Commit

Permalink
Merge pull request kubernetes#1787 from jupblb/module-measurements
Browse files Browse the repository at this point in the history
Add measurements module
  • Loading branch information
k8s-ci-robot committed Apr 27, 2021
2 parents 7dbc89a + f7b2f8a commit c64d9ae
Show file tree
Hide file tree
Showing 2 changed files with 111 additions and 147 deletions.
154 changes: 7 additions & 147 deletions clusterloader2/testing/load/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,26 +19,11 @@
{{$MEDIUM_STATEFUL_SETS_PER_NAMESPACE := DefaultParam .MEDIUM_STATEFUL_SETS_PER_NAMESPACE 1}}
{{$ENABLE_CHAOSMONKEY := DefaultParam .ENABLE_CHAOSMONKEY false}}
{{$CHECK_IF_PODS_ARE_UPDATED := DefaultParam .CL2_CHECK_IF_PODS_ARE_UPDATED true}}
{{$PROMETHEUS_SCRAPE_KUBE_PROXY := DefaultParam .PROMETHEUS_SCRAPE_KUBE_PROXY true}}
{{$PROMETHEUS_SCRAPE_KUBE_STATE_METRICS := DefaultParam .PROMETHEUS_SCRAPE_KUBE_STATE_METRICS false}}
{{$ENABLE_PVS := DefaultParam .CL2_ENABLE_PVS true}}
{{$DISABLE_DAEMONSETS := DefaultParam .CL2_DISABLE_DAEMONSETS false}}
{{$ENABLE_NETWORKPOLICIES := DefaultParam .CL2_ENABLE_NETWORKPOLICIES false}}
{{$ENABLE_NODE_LOCAL_DNS_LATENCY := DefaultParam .CL2_ENABLE_NODE_LOCAL_DNS_LATENCY false}}
{{$NODE_LOCAL_DNS_LATENCY_THRESHOLD := DefaultParam .CL2_NODE_LOCAL_DNS_LATENCY_THRESHOLD "5s"}}
{{$ENABLE_DNSTESTS := DefaultParam .CL2_ENABLE_DNSTESTS false}}
{{$ENABLE_SYSTEM_POD_METRICS:= DefaultParam .ENABLE_SYSTEM_POD_METRICS true}}
{{$ENABLE_CLUSTER_OOMS_TRACKER := DefaultParam .CL2_ENABLE_CLUSTER_OOMS_TRACKER true}}
{{$CLUSTER_OOMS_IGNORED_PROCESSES := DefaultParam .CL2_CLUSTER_OOMS_IGNORED_PROCESSES ""}}
{{$USE_SIMPLE_LATENCY_QUERY := DefaultParam .USE_SIMPLE_LATENCY_QUERY false}}
{{$ENABLE_RESTART_COUNT_CHECK := DefaultParam .ENABLE_RESTART_COUNT_CHECK true}}
{{$RESTART_COUNT_THRESHOLD_OVERRIDES:= DefaultParam .RESTART_COUNT_THRESHOLD_OVERRIDES ""}}
{{$ALLOWED_SLOW_API_CALLS := DefaultParam .CL2_ALLOWED_SLOW_API_CALLS 0}}
{{$CUSTOM_API_CALL_THRESHOLDS := DefaultParam .CUSTOM_API_CALL_THRESHOLDS ""}}
{{$ENABLE_VIOLATIONS_FOR_API_CALL_PROMETHEUS := DefaultParam .CL2_ENABLE_VIOLATIONS_FOR_API_CALL_PROMETHEUS false}}
{{$ENABLE_VIOLATIONS_FOR_API_CALL_PROMETHEUS_SIMPLE := DefaultParam .CL2_ENABLE_VIOLATIONS_FOR_API_CALL_PROMETHEUS_SIMPLE true}}
{{$ENABLE_API_AVAILABILITY_MEASUREMENT := DefaultParam .CL2_ENABLE_API_AVAILABILITY_MEASUREMENT false}}
{{$API_AVAILABILITY_PERCENTAGE_THRESHOLD := DefaultParam .CL2_API_AVAILABILITY_PERCENTAGE_THRESHOLD 0.0}}
#Variables
{{$namespaces := DivideInt .Nodes $NODES_PER_NAMESPACE}}
{{$totalPods := MultiplyInt $namespaces $NODES_PER_NAMESPACE $PODS_PER_NODE}}
Expand Down Expand Up @@ -86,9 +71,6 @@
# in an unnecessary number of namespaces.
{{$schedulerThroughputNamespaces := IfThenElse $IS_SMALL_CLUSTER 1 $schedulerThroughputNamespaces}}

# Probe measurements shared parameter
{{$PROBE_MEASUREMENTS_CHECK_PROBES_READY_TIMEOUT := DefaultParam .CL2_PROBE_MEASUREMENTS_CHECK_PROBES_READY_TIMEOUT "15m"}}

# Command to be executed
{{$EXEC_COMMAND := DefaultParam .CL2_EXEC_COMMAND nil}}
{{$EXIT_AFTER_EXEC := DefaultParam .CL2_EXIT_AFTER_EXEC false}}
Expand Down Expand Up @@ -131,70 +113,10 @@ chaosMonkey:
simulatedDowntime: 10m
{{end}}
steps:
- name: Starting measurements
measurements:
- Identifier: APIResponsivenessPrometheus
Method: APIResponsivenessPrometheus
Params:
action: start
- Identifier: APIResponsivenessPrometheusSimple
Method: APIResponsivenessPrometheus
Params:
action: start
- Identifier: CreatePhasePodStartupLatency
Method: PodStartupLatency
Params:
action: start
labelSelector: group = load
threshold: 1h # TODO(https://github.com/kubernetes/perf-tests/issues/1024): Ideally, this should be 5s
- Identifier: InClusterNetworkLatency
Method: InClusterNetworkLatency
Params:
action: start
checkProbesReadyTimeout: {{$PROBE_MEASUREMENTS_CHECK_PROBES_READY_TIMEOUT}}
replicasPerProbe: {{AddInt 2 (DivideInt .Nodes 100)}}
{{if $ENABLE_NODE_LOCAL_DNS_LATENCY}}
- Identifier: NodeLocalDNSLatency
Method: NodeLocalDNSLatencyPrometheus
Params:
action: start
{{end}}
- Identifier: SLOMeasurement
Method: SLOMeasurement
Params:
action: start
checkProbesReadyTimeout: {{$PROBE_MEASUREMENTS_CHECK_PROBES_READY_TIMEOUT}}
replicasPerProbe: {{AddInt 2 (DivideInt .Nodes 100)}}
{{if $PROMETHEUS_SCRAPE_KUBE_PROXY}}
- Identifier: NetworkProgrammingLatency
Method: NetworkProgrammingLatency
Params:
action: start
{{end}}
{{if $PROMETHEUS_SCRAPE_KUBE_STATE_METRICS}}
- Identifier: KubeStateMetricsLatency
Method: KubeStateMetricsLatency
Params:
action: start
{{end}}
{{if $ENABLE_API_AVAILABILITY_MEASUREMENT}}
- Identifier: APIAvailability
Method: APIAvailability
Params:
action: start
pollFrequency: "5s"
hostPollTimeoutSeconds: 5
threshold: {{$API_AVAILABILITY_PERCENTAGE_THRESHOLD}}
{{end}}
- Identifier: TestMetrics
Method: TestMetrics
Params:
- module:
path: /modules/measurements.yaml
params:
action: start
systemPodMetricsEnabled: {{$ENABLE_SYSTEM_POD_METRICS}}
clusterOOMsTrackerEnabled: {{$ENABLE_CLUSTER_OOMS_TRACKER}}
clusterOOMsIgnoredProcesses: {{$CLUSTER_OOMS_IGNORED_PROCESSES}}
restartCountThresholdOverrides: {{YamlQuote $RESTART_COUNT_THRESHOLD_OVERRIDES 4}}
enableRestartCountCheck: {{$ENABLE_RESTART_COUNT_CHECK}}

- module:
path: modules/services.yaml
Expand Down Expand Up @@ -851,69 +773,7 @@ steps:
bigServicesPerNamespace: 0
{{end}} # not EXIT_AFTER_EXEC

- name: Collecting measurements
measurements:
- Identifier: APIResponsivenessPrometheusSimple
Method: APIResponsivenessPrometheus
Params:
action: gather
enableViolations: {{$ENABLE_VIOLATIONS_FOR_API_CALL_PROMETHEUS_SIMPLE}}
useSimpleLatencyQuery: true
summaryName: APIResponsivenessPrometheus_simple
allowedSlowCalls: {{$ALLOWED_SLOW_API_CALLS}}
customThresholds: {{YamlQuote $CUSTOM_API_CALL_THRESHOLDS 4}}
{{if not $USE_SIMPLE_LATENCY_QUERY}}
- Identifier: APIResponsivenessPrometheus
Method: APIResponsivenessPrometheus
Params:
action: gather
enableViolations: {{$ENABLE_VIOLATIONS_FOR_API_CALL_PROMETHEUS}}
allowedSlowCalls: {{$ALLOWED_SLOW_API_CALLS}}
customThresholds: {{YamlQuote $CUSTOM_API_CALL_THRESHOLDS 4}}
{{end}}
- Identifier: CreatePhasePodStartupLatency
Method: PodStartupLatency
Params:
action: gather
- Identifier: InClusterNetworkLatency
Method: InClusterNetworkLatency
Params:
action: gather
{{if $ENABLE_NODE_LOCAL_DNS_LATENCY}}
- Identifier: NodeLocalDNSLatency
Method: NodeLocalDNSLatencyPrometheus
Params:
action: gather
enableViolations: true
threshold: {{$NODE_LOCAL_DNS_LATENCY_THRESHOLD}}
{{end}}
- Identifier: SLOMeasurement
Method: SLOMeasurement
Params:
action: gather
{{if $PROMETHEUS_SCRAPE_KUBE_PROXY}}
- Identifier: NetworkProgrammingLatency
Method: NetworkProgrammingLatency
Params:
action: gather
{{end}}
{{if $PROMETHEUS_SCRAPE_KUBE_STATE_METRICS}}
- Identifier: KubeStateMetricsLatency
Method: KubeStateMetricsLatency
Params:
action: gather
{{end}}
{{if $ENABLE_API_AVAILABILITY_MEASUREMENT}}
- Identifier: APIAvailability
Method: APIAvailability
Params:
action: gather
{{end}}
- Identifier: TestMetrics
Method: TestMetrics
Params:
action: gather
systemPodMetricsEnabled: {{$ENABLE_SYSTEM_POD_METRICS}}
clusterOOMsTrackerEnabled: {{$ENABLE_CLUSTER_OOMS_TRACKER}}
restartCountThresholdOverrides: {{YamlQuote $RESTART_COUNT_THRESHOLD_OVERRIDES 4}}
enableRestartCountCheck: {{$ENABLE_RESTART_COUNT_CHECK}}
- module:
path: /modules/measurements.yaml
params:
action: gather
104 changes: 104 additions & 0 deletions clusterloader2/testing/load/modules/measurements.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
## Measurement module defines test scoped measurement.

## Input params
# Valid actions: "start", "gather"
{{$action := .action}}

## Feature-gates and configs:
{{$ALLOWED_SLOW_API_CALLS := DefaultParam .CL2_ALLOWED_SLOW_API_CALLS 0}}
{{$API_AVAILABILITY_PERCENTAGE_THRESHOLD := DefaultParam .CL2_API_AVAILABILITY_PERCENTAGE_THRESHOLD 0.0}}
{{$CLUSTER_OOMS_IGNORED_PROCESSES := DefaultParam .CL2_CLUSTER_OOMS_IGNORED_PROCESSES ""}}
{{$CUSTOM_API_CALL_THRESHOLDS := DefaultParam .CUSTOM_API_CALL_THRESHOLDS ""}}
{{$ENABLE_API_AVAILABILITY_MEASUREMENT := DefaultParam .CL2_ENABLE_API_AVAILABILITY_MEASUREMENT false}}
{{$ENABLE_CLUSTER_OOMS_TRACKER := DefaultParam .CL2_ENABLE_CLUSTER_OOMS_TRACKER true}}
{{$ENABLE_NODE_LOCAL_DNS_LATENCY := DefaultParam .CL2_ENABLE_NODE_LOCAL_DNS_LATENCY false}}
{{$ENABLE_RESTART_COUNT_CHECK := DefaultParam .ENABLE_RESTART_COUNT_CHECK true}}
{{$ENABLE_SYSTEM_POD_METRICS:= DefaultParam .ENABLE_SYSTEM_POD_METRICS true}}
{{$ENABLE_VIOLATIONS_FOR_API_CALL_PROMETHEUS := DefaultParam .CL2_ENABLE_VIOLATIONS_FOR_API_CALL_PROMETHEUS false}}
{{$ENABLE_VIOLATIONS_FOR_API_CALL_PROMETHEUS_SIMPLE := DefaultParam .CL2_ENABLE_VIOLATIONS_FOR_API_CALL_PROMETHEUS_SIMPLE true}}
{{$NODE_LOCAL_DNS_LATENCY_THRESHOLD := DefaultParam .CL2_NODE_LOCAL_DNS_LATENCY_THRESHOLD "5s"}}
{{$PROMETHEUS_SCRAPE_KUBE_PROXY := DefaultParam .PROMETHEUS_SCRAPE_KUBE_PROXY true}}
{{$PROMETHEUS_SCRAPE_KUBE_STATE_METRICS := DefaultParam .PROMETHEUS_SCRAPE_KUBE_STATE_METRICS false}}
{{$RESTART_COUNT_THRESHOLD_OVERRIDES:= DefaultParam .RESTART_COUNT_THRESHOLD_OVERRIDES ""}}
{{$USE_SIMPLE_LATENCY_QUERY := DefaultParam .USE_SIMPLE_LATENCY_QUERY false}}

# Probe measurements shared parameter
{{$PROBE_MEASUREMENTS_CHECK_PROBES_READY_TIMEOUT := DefaultParam .CL2_PROBE_MEASUREMENTS_CHECK_PROBES_READY_TIMEOUT "15m"}}

steps:
- name: "{{$action}}ing measurements"
measurements:
- Identifier: APIResponsivenessPrometheus
Method: APIResponsivenessPrometheus
Params:
action: {{$action}}
{{if not $USE_SIMPLE_LATENCY_QUERY}}
enableViolations: {{$ENABLE_VIOLATIONS_FOR_API_CALL_PROMETHEUS}}
allowedSlowCalls: {{$ALLOWED_SLOW_API_CALLS}}
customThresholds: {{YamlQuote $CUSTOM_API_CALL_THRESHOLDS 4}}
{{end}}
- Identifier: APIResponsivenessPrometheusSimple
Method: APIResponsivenessPrometheus
Params:
action: {{$action}}
enableViolations: {{$ENABLE_VIOLATIONS_FOR_API_CALL_PROMETHEUS_SIMPLE}}
useSimpleLatencyQuery: true
summaryName: APIResponsivenessPrometheus_simple
allowedSlowCalls: {{$ALLOWED_SLOW_API_CALLS}}
customThresholds: {{YamlQuote $CUSTOM_API_CALL_THRESHOLDS 4}}
- Identifier: CreatePhasePodStartupLatency
Method: PodStartupLatency
Params:
action: {{$action}}
labelSelector: group = load
threshold: 1h # TODO(https://github.com/kubernetes/perf-tests/issues/1024): Ideally, this should be 5s
- Identifier: InClusterNetworkLatency
Method: InClusterNetworkLatency
Params:
action: {{$action}}
checkProbesReadyTimeout: {{$PROBE_MEASUREMENTS_CHECK_PROBES_READY_TIMEOUT}}
replicasPerProbe: {{AddInt 2 (DivideInt .Nodes 100)}}
{{if $ENABLE_NODE_LOCAL_DNS_LATENCY}}
- Identifier: NodeLocalDNSLatency
Method: NodeLocalDNSLatencyPrometheus
Params:
action: {{$action}}
enableViolations: true
threshold: {{$NODE_LOCAL_DNS_LATENCY_THRESHOLD}}
{{end}}
- Identifier: SLOMeasurement
Method: SLOMeasurement
Params:
action: {{$action}}
checkProbesReadyTimeout: {{$PROBE_MEASUREMENTS_CHECK_PROBES_READY_TIMEOUT}}
replicasPerProbe: {{AddInt 2 (DivideInt .Nodes 100)}}
{{if $PROMETHEUS_SCRAPE_KUBE_PROXY}}
- Identifier: NetworkProgrammingLatency
Method: NetworkProgrammingLatency
Params:
action: {{$action}}
{{end}}
{{if $PROMETHEUS_SCRAPE_KUBE_STATE_METRICS}}
- Identifier: KubeStateMetricsLatency
Method: KubeStateMetricsLatency
Params:
action: {{$action}}
{{end}}
{{if $ENABLE_API_AVAILABILITY_MEASUREMENT}}
- Identifier: APIAvailability
Method: APIAvailability
Params:
action: {{$action}}
pollFrequency: "5s"
hostPollTimeoutSeconds: 5
threshold: {{$API_AVAILABILITY_PERCENTAGE_THRESHOLD}}
{{end}}
- Identifier: TestMetrics
Method: TestMetrics
Params:
action: {{$action}}
systemPodMetricsEnabled: {{$ENABLE_SYSTEM_POD_METRICS}}
clusterOOMsIgnoredProcesses: {{YamlQuote $CLUSTER_OOMS_IGNORED_PROCESSES 4}}
clusterOOMsTrackerEnabled: {{$ENABLE_CLUSTER_OOMS_TRACKER}}
restartCountThresholdOverrides: {{YamlQuote $RESTART_COUNT_THRESHOLD_OVERRIDES 4}}
enableRestartCountCheck: {{$ENABLE_RESTART_COUNT_CHECK}}

0 comments on commit c64d9ae

Please sign in to comment.