From 740aa7f9c59b8032334663838980bfdb7f8ae7b1 Mon Sep 17 00:00:00 2001 From: Przemek Maciolek Date: Wed, 11 Mar 2020 18:12:56 +0100 Subject: [PATCH 1/5] Use memory_limiter and better batch settings --- deploy/helm/sumologic/values.yaml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/deploy/helm/sumologic/values.yaml b/deploy/helm/sumologic/values.yaml index e2933a57c9..72d968a9be 100644 --- a/deploy/helm/sumologic/values.yaml +++ b/deploy/helm/sumologic/values.yaml @@ -518,7 +518,7 @@ otelcol: memBallastSizeMib: "683" image: name: "sumologic/opentelemetry-collector" - tag: "0.2.6.5" + tag: "0.2.6.6" pullPolicy: IfNotPresent config: receivers: @@ -533,6 +533,7 @@ otelcol: zipkin: endpoint: "0.0.0.0:9411" processors: + memory_limiter: k8s_tagger: passthrough: false extract: @@ -579,7 +580,8 @@ otelcol: queue_size: 10000 retry_on_failure: true batch: - send_batch_size: 1024 + send_batch_size: 512 + num_tickers: 10 timeout: 5s extensions: health_check: {} @@ -595,5 +597,5 @@ otelcol: pipelines: traces: receivers: [jaeger, zipkin, opencensus] - processors: [k8s_tagger, batch, queued_retry] + processors: [memory_limiter, k8s_tagger, batch, queued_retry] exporters: [zipkin] From ac1d10406695ac0eb3a1a59156c20d8b428fd25f Mon Sep 17 00:00:00 2001 From: Przemek Maciolek Date: Wed, 11 Mar 2020 18:41:52 +0100 Subject: [PATCH 2/5] Update memory limiter settings --- deploy/helm/sumologic/values.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/deploy/helm/sumologic/values.yaml b/deploy/helm/sumologic/values.yaml index 72d968a9be..8399b9f6ec 100644 --- a/deploy/helm/sumologic/values.yaml +++ b/deploy/helm/sumologic/values.yaml @@ -534,6 +534,8 @@ otelcol: endpoint: "0.0.0.0:9411" processors: memory_limiter: + check_interval: 5s + limit_mib: 4000 k8s_tagger: passthrough: false extract: From 824cb43045691be70531b7a00e9a74ea253dc397 Mon Sep 17 00:00:00 2001 From: Przemek Maciolek Date: Wed, 11 Mar 2020 19:10:58 +0100 Subject: [PATCH 3/5] Make in-conf description of the settings --- deploy/helm/sumologic/values.yaml | 34 +++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/deploy/helm/sumologic/values.yaml b/deploy/helm/sumologic/values.yaml index 8399b9f6ec..977f13749a 100644 --- a/deploy/helm/sumologic/values.yaml +++ b/deploy/helm/sumologic/values.yaml @@ -533,11 +533,11 @@ otelcol: zipkin: endpoint: "0.0.0.0:9411" processors: - memory_limiter: - check_interval: 5s - limit_mib: 4000 + # Tags spans with K8S metadata, basing on the context IP k8s_tagger: + # When true, only IP is assigned and passed (so it could be tagged on another collector) passthrough: false + # Extracted fields and assigned names extract: metadata: # extract the following well-known metadata fields @@ -577,13 +577,37 @@ otelcol: labels: - tag_name: pod_label_%s key: "*" + + # The memory_limiter processor is used to prevent out of memory situations on the collector. + memory_limiter: + # check_interval is the time between measurements of memory usage for the + # purposes of avoiding going over the limits. Defaults to zero, so no + # checks will be performed. Values below 1 second are not recommended since + # it can result in unnecessary CPU consumption. + check_interval: 5s + + # Maximum amount of memory, in MiB, targeted to be allocated by the process heap. + # Note that typically the total memory usage of process will be about 50MiB higher + # than this value. + limit_mib: 4000 + + # The queued_retry processor uses a bounded queue to relay batches from the receiver or previous + # processor to the next processor. queued_retry: + # Number of workers that dequeue batches num_workers: 16 + # Maximum number of batches kept in memory before data is dropped queue_size: 10000 + # Whether to retry on failure or give up and drop retry_on_failure: true + + # The batch processor accepts spans and places them into batches grouped by node and resource batch: + # Number of spans after which a batch will be sent regardless of time send_batch_size: 512 + # Number of tickers that loop over batch buckets num_tickers: 10 + # Time duration after which a batch will be sent regardless of size timeout: 5s extensions: health_check: {} @@ -591,7 +615,9 @@ otelcol: zipkin: url: "exporters.zipkin.url_replace" # Following generates verbose logs with span content, useful to verify what - # metadata is being tagged. To enable, uncomment and add "logging" to exporters below + # metadata is being tagged. To enable, uncomment and add "logging" to exporters below. + # There are two levels that could be used: `debug` and `info` with the former + # being much more verbose and including (sampled) spans content # logging: # loglevel: debug service: From 31d5805809e0e6ebafe425d4ebb20c16d44dfab7 Mon Sep 17 00:00:00 2001 From: Przemek Maciolek Date: Wed, 11 Mar 2020 19:17:11 +0100 Subject: [PATCH 4/5] Update the memory_limit to match pod limit --- deploy/helm/sumologic/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/helm/sumologic/values.yaml b/deploy/helm/sumologic/values.yaml index 977f13749a..fa982802fd 100644 --- a/deploy/helm/sumologic/values.yaml +++ b/deploy/helm/sumologic/values.yaml @@ -589,7 +589,7 @@ otelcol: # Maximum amount of memory, in MiB, targeted to be allocated by the process heap. # Note that typically the total memory usage of process will be about 50MiB higher # than this value. - limit_mib: 4000 + limit_mib: 1900 # The queued_retry processor uses a bounded queue to relay batches from the receiver or previous # processor to the next processor. From a5b4636547bf72bc33667f40a19b52936299ab2b Mon Sep 17 00:00:00 2001 From: Przemek Maciolek Date: Wed, 11 Mar 2020 19:37:13 +0100 Subject: [PATCH 5/5] Adjust the batch size further --- deploy/helm/sumologic/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/helm/sumologic/values.yaml b/deploy/helm/sumologic/values.yaml index fa982802fd..f88a26b445 100644 --- a/deploy/helm/sumologic/values.yaml +++ b/deploy/helm/sumologic/values.yaml @@ -604,7 +604,7 @@ otelcol: # The batch processor accepts spans and places them into batches grouped by node and resource batch: # Number of spans after which a batch will be sent regardless of time - send_batch_size: 512 + send_batch_size: 256 # Number of tickers that loop over batch buckets num_tickers: 10 # Time duration after which a batch will be sent regardless of size