Skip to content

Commit

Permalink
Define resource tracking mode based on cluster size
Browse files Browse the repository at this point in the history
  • Loading branch information
wojtek-t committed Nov 12, 2020
1 parent 3a0d9a9 commit a700174
Show file tree
Hide file tree
Showing 7 changed files with 11 additions and 37 deletions.
23 changes: 9 additions & 14 deletions clusterloader2/pkg/measurement/common/resource_usage.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ import (

const (
resourceUsageMetricName = "ResourceUsageSummary"
// maxNodeCountForAllNodes defines the threshold for cluster size above which
// we no longer gather resource usage from all system components on all nodes.
maxNodeCountForAllNodes = 1000
)

func init() {
Expand Down Expand Up @@ -67,10 +70,6 @@ func (e *resourceUsageMetricMeasurement) Execute(config *measurement.Config) ([]
if err != nil {
return nil, err
}
nodeMode, err := util.GetStringOrDefault(config.Params, "nodeMode", "")
if err != nil {
return nil, err
}
namespace, err := util.GetStringOrDefault(config.Params, "namespace", "kube-system")
if err != nil {
return nil, err
Expand All @@ -94,19 +93,15 @@ func (e *resourceUsageMetricMeasurement) Execute(config *measurement.Config) ([]
}
}
}
var nodesSet gatherers.NodesSet
switch nodeMode {
case "master":
nodesSet = gatherers.MasterNodes
case "masteranddns":
nodesSet = gatherers.MasterAndDNSNodes
case "masterandnondaemons":

// Compute the node based on the cluster size.
nodeCount := config.ClusterFramework.GetClusterConfig().Nodes
nodesSet := gatherers.AllNodes
if nodeCount > maxNodeCountForAllNodes {
nodesSet = gatherers.MasterAndNonDaemons
default:
nodesSet = gatherers.AllNodes
}

klog.V(2).Infof("%s: starting resource usage collecting...", e)
klog.V(2).Infof("%s: starting resource usage collecting (mode %#v)...", e, nodesSet)
e.gatherer, err = gatherers.NewResourceUsageGatherer(config.ClusterFramework.GetClientSets().GetClient(), host, config.ClusterFramework.GetClusterConfig().KubeletPort,
provider, gatherers.ResourceGathererOptions{
InKubemark: provider.Name() == "kubemark",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,8 @@ type NodesSet int
const (
// AllNodes - all containers on all nodes
AllNodes NodesSet = 0
// MasterNodes - all containers on Master nodes only
MasterNodes NodesSet = 1
// MasterAndDNSNodes - all containers on Master nodes and DNS containers on other nodes
MasterAndDNSNodes NodesSet = 2
// MasterAndNonDaemons - all containers on Master nodes and non-daemons on other nodes.
MasterAndNonDaemons NodesSet = 3
MasterAndNonDaemons NodesSet = 1
)

// ResourceUsageSummary represents summary of resource usage per container.
Expand Down Expand Up @@ -126,12 +122,6 @@ func NewResourceUsageGatherer(c clientset.Interface, host string, port int, prov

nodesToConsider := make(map[string]bool)
for _, pod := range pods.Items {
if (options.Nodes == MasterNodes) && !masterNodes.Has(pod.Spec.NodeName) {
continue
}
if (options.Nodes == MasterAndDNSNodes) && !masterNodes.Has(pod.Spec.NodeName) && pod.Labels["k8s-app"] != "kube-dns" {
continue
}
if (options.Nodes == MasterAndNonDaemons) && !masterNodes.Has(pod.Spec.NodeName) && isDaemonPod(&pod) {
continue
}
Expand All @@ -141,7 +131,7 @@ func NewResourceUsageGatherer(c clientset.Interface, host string, port int, prov
for _, container := range pod.Status.ContainerStatuses {
g.containerIDs = append(g.containerIDs, container.Name)
}
if options.Nodes == MasterAndDNSNodes || options.Nodes == MasterAndNonDaemons {
if options.Nodes == MasterAndNonDaemons {
nodesToConsider[pod.Spec.NodeName] = true
}
}
Expand Down
3 changes: 0 additions & 3 deletions clusterloader2/testing/access-tokens/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
{{$qpsPerWorker := DefaultParam .CL2_ACCESS_TOKENS_QPS (MultiplyFloat 2.5 (DivideFloat .Nodes 5000))}}

# TestMetrics measurement variables
{{$NODE_MODE := DefaultParam .NODE_MODE "allnodes"}}
{{$ENABLE_SYSTEM_POD_METRICS:= DefaultParam .ENABLE_SYSTEM_POD_METRICS true}}
{{$ENABLE_RESTART_COUNT_CHECK := DefaultParam .ENABLE_RESTART_COUNT_CHECK false}}
{{$RESTART_COUNT_THRESHOLD_OVERRIDES:= DefaultParam .RESTART_COUNT_THRESHOLD_OVERRIDES ""}}
Expand All @@ -59,7 +58,6 @@ steps:
Method: TestMetrics
Params:
action: start
nodeMode: {{$NODE_MODE}}
systemPodMetricsEnabled: {{$ENABLE_SYSTEM_POD_METRICS}}
restartCountThresholdOverrides: {{YamlQuote $RESTART_COUNT_THRESHOLD_OVERRIDES 4}}
enableRestartCountCheck: {{$ENABLE_RESTART_COUNT_CHECK}}
Expand Down Expand Up @@ -174,7 +172,6 @@ steps:
Method: TestMetrics
Params:
action: gather
nodeMode: {{$NODE_MODE}}
systemPodMetricsEnabled: {{$ENABLE_SYSTEM_POD_METRICS}}
restartCountThresholdOverrides: {{YamlQuote $RESTART_COUNT_THRESHOLD_OVERRIDES 4}}
enableRestartCountCheck: {{$ENABLE_RESTART_COUNT_CHECK}}
2 changes: 0 additions & 2 deletions clusterloader2/testing/density/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

#Constants
{{$DENSITY_RESOURCE_CONSTRAINTS_FILE := DefaultParam .DENSITY_RESOURCE_CONSTRAINTS_FILE ""}}
{{$NODE_MODE := DefaultParam .NODE_MODE "allnodes"}}
{{$NODES_PER_NAMESPACE := DefaultParam .NODES_PER_NAMESPACE 100}}
{{$PODS_PER_NODE := DefaultParam .PODS_PER_NODE 30}}
{{$DENSITY_TEST_THROUGHPUT := DefaultParam .DENSITY_TEST_THROUGHPUT 20}}
Expand Down Expand Up @@ -87,7 +86,6 @@ steps:
Method: TestMetrics
Params:
action: start
nodeMode: {{$NODE_MODE}}
resourceConstraints: {{$DENSITY_RESOURCE_CONSTRAINTS_FILE}}
systemPodMetricsEnabled: {{$ENABLE_SYSTEM_POD_METRICS}}
clusterOOMsTrackerEnabled: {{$ENABLE_CLUSTER_OOMS_TRACKER}}
Expand Down
2 changes: 0 additions & 2 deletions clusterloader2/testing/density/high-density-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

#Constants
{{$DENSITY_RESOURCE_CONSTRAINTS_FILE := DefaultParam .DENSITY_RESOURCE_CONSTRAINTS_FILE ""}}
{{$NODE_MODE := DefaultParam .NODE_MODE "allnodes"}}
{{$NODES_PER_NAMESPACE := DefaultParam .NODES_PER_NAMESPACE 100}}
{{$PODS_PER_NODE := DefaultParam .PODS_PER_NODE 30}}
{{$DENSITY_TEST_THROUGHPUT := DefaultParam .DENSITY_TEST_THROUGHPUT 20}}
Expand Down Expand Up @@ -79,7 +78,6 @@ steps:
Method: TestMetrics
Params:
action: start
nodeMode: {{$NODE_MODE}}
resourceConstraints: {{$DENSITY_RESOURCE_CONSTRAINTS_FILE}}
systemPodMetricsEnabled: {{$ENABLE_SYSTEM_POD_METRICS}}
restartCountThresholdOverrides: {{YamlQuote $RESTART_COUNT_THRESHOLD_OVERRIDES 4}}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
# - If using Persistent Volumes, the default storage class must have volumeBindingMode: Immediate

# Cluster Variables
{{$NODE_MODE := DefaultParam .NODE_MODE "allnodes"}}
{{$NODES_PER_NAMESPACE := DefaultParam .NODES_PER_NAMESPACE 100}}

# Test Variales
Expand Down Expand Up @@ -51,7 +50,6 @@ steps:
Method: TestMetrics
Params:
action: start
nodeMode: {{$NODE_MODE}}
- Identifier: PodWithVolumesStartupLatency
Method: PodStartupLatency
Params:
Expand Down
2 changes: 0 additions & 2 deletions clusterloader2/testing/load/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
# - Only half of Deployments will be assigned 1-1 to existing SVCs.

#Constants
{{$NODE_MODE := DefaultParam .NODE_MODE "allnodes"}}
{{$NODES_PER_NAMESPACE := DefaultParam .NODES_PER_NAMESPACE 100}}
{{$PODS_PER_NODE := DefaultParam .PODS_PER_NODE 30}}
{{$LOAD_TEST_THROUGHPUT := DefaultParam .CL2_LOAD_TEST_THROUGHPUT 10}}
Expand Down Expand Up @@ -153,7 +152,6 @@ steps:
Method: TestMetrics
Params:
action: start
nodeMode: {{$NODE_MODE}}
systemPodMetricsEnabled: {{$ENABLE_SYSTEM_POD_METRICS}}
clusterOOMsTrackerEnabled: {{$ENABLE_CLUSTER_OOMS_TRACKER}}
clusterOOMsIgnoredProcesses: {{$CLUSTER_OOMS_IGNORED_PROCESSES}}
Expand Down

0 comments on commit a700174

Please sign in to comment.