Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tracing: Fix sampler defaults #5887

Merged
merged 3 commits into from
Nov 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ version: 2.1

orbs:
go: circleci/go@1.7.1
git-shallow-clone: guitarrapc/git-shallow-clone@2.4.0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any reason to add this btw? @matej-g

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The CI was failing because of https://cloud-native.slack.com/archives/CL25937SP/p1666777294865379, it's just a cherry-pick from the main but should not affect the release itself.


executors:
golang:
Expand All @@ -19,7 +20,7 @@ jobs:
environment:
GO111MODULE: 'on'
steps:
- checkout
- git-shallow-clone/checkout
- go/mod-download-cached
- setup_remote_docker:
version: 20.10.12
Expand Down Expand Up @@ -58,7 +59,7 @@ jobs:
GOBIN: "/home/circleci/.go_workspace/go/bin"
PROMU_VERSION: "0.5.0"
steps:
- checkout
- git-shallow-clone/checkout
- run: mkdir -p ${GOBIN}
- run: curl -L "https://github.com/prometheus/promu/releases/download/v${PROMU_VERSION}/promu-${PROMU_VERSION}.$(go env GOOS)-$(go env GOARCH).tar.gz" | tar --strip-components=1 -xzf - -C ${GOBIN}
- run: mv -f ${GOBIN}/promu "${GOBIN}/promu-v${PROMU_VERSION}"
Expand All @@ -71,7 +72,7 @@ jobs:
publish_main:
executor: golang
steps:
- checkout
- git-shallow-clone/checkout
- go/mod-download-cached
- setup_remote_docker:
version: 20.10.12
Expand All @@ -93,7 +94,7 @@ jobs:
publish_release:
executor: golang
steps:
- checkout
- git-shallow-clone/checkout
- go/mod-download-cached
- setup_remote_docker:
version: 20.10.12
Expand Down
12 changes: 12 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,18 @@ NOTE: As semantic versioning states all 0.y.z releases can contain breaking chan

We use *breaking :warning:* to mark changes that are not backward compatible (relates only to v0.y.z releases.)

## Unreleased

## [v0.29.1](https://github.com/thanos-io/thanos/tree/release-0.29) - In progress

### Fixed

- [#5887](https://github.com/thanos-io/thanos/pull/5887) Tracing: Make sure rate limiting sampler is the default, as was the case in version pre-0.29.0.

### Added

### Changed

## [v0.29.0](https://github.com/thanos-io/thanos/tree/release-0.29) - 2022.11.03

### Fixed
Expand Down
45 changes: 30 additions & 15 deletions pkg/tracing/jaeger/config_yaml.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,13 @@ import (
tracesdk "go.opentelemetry.io/otel/sdk/trace"
)

const (
SamplerTypeRemote = "remote"
SamplerTypeProbabilistic = "probabilistic"
SamplerTypeConstant = "const"
SamplerTypeRateLimiting = "ratelimiting"
)

type ParentBasedSamplerConfig struct {
LocalParentSampled bool `yaml:"local_parent_sampled"`
RemoteParentSampled bool `yaml:"remote_parent_sampled"`
Expand Down Expand Up @@ -114,40 +121,48 @@ func getSamplingFraction(samplerType string, samplingFactor float64) float64 {

func getSampler(config Config) tracesdk.Sampler {
samplerType := config.SamplerType
if samplerType == "" {
samplerType = SamplerTypeRateLimiting
}
samplingFraction := getSamplingFraction(samplerType, config.SamplerParam)

var sampler tracesdk.Sampler
switch samplerType {
case "probabilistic":
sampler = tracesdk.ParentBased(tracesdk.TraceIDRatioBased(samplingFraction))
case "const":
case SamplerTypeProbabilistic:
sampler = tracesdk.TraceIDRatioBased(samplingFraction)
case SamplerTypeConstant:
if samplingFraction == 1.0 {
sampler = tracesdk.AlwaysSample()
} else {
sampler = tracesdk.NeverSample()
}
case "remote":
case SamplerTypeRemote:
remoteOptions := getRemoteOptions(config)
sampler = jaegerremote.New(config.ServiceName, remoteOptions...)
case "ratelimiting":
// Fallback always to default (rate limiting).
case SamplerTypeRateLimiting:
default:
// The same config options are applicable to both remote and rate-limiting samplers.
remoteOptions := getRemoteOptions(config)
sampler = jaegerremote.New(config.ServiceName, remoteOptions...)
sampler, ok := sampler.(*rateLimitingSampler)
if ok {
sampler.Update(config.SamplerParam)
}
default:
var root tracesdk.Sampler
var parentOptions []tracesdk.ParentBasedSamplerOption
if config.SamplerParentConfig.LocalParentSampled {
parentOptions = append(parentOptions, tracesdk.WithLocalParentSampled(root))
}
if config.SamplerParentConfig.RemoteParentSampled {
parentOptions = append(parentOptions, tracesdk.WithRemoteParentSampled(root))
}
sampler = tracesdk.ParentBased(root, parentOptions...)
}

// Use parent-based to make sure we respect the span parent, if
// it is sampled. Optionally, allow user to specify the
// parent-based options.
var parentOptions []tracesdk.ParentBasedSamplerOption
if config.SamplerParentConfig.LocalParentSampled {
parentOptions = append(parentOptions, tracesdk.WithLocalParentSampled(sampler))
}
if config.SamplerParentConfig.RemoteParentSampled {
parentOptions = append(parentOptions, tracesdk.WithRemoteParentSampled(sampler))
}
sampler = tracesdk.ParentBased(sampler, parentOptions...)

return sampler
}

Expand Down
43 changes: 41 additions & 2 deletions pkg/tracing/jaeger/jaeger_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ var parentConfig = ParentBasedSamplerConfig{LocalParentSampled: true}

// This test shows that if sample factor will enable tracing on client process, even when it would be disabled on server
// it will be still enabled for all spans within this span.
func TestContextTracing_ClientEnablesTracing(t *testing.T) {
func TestContextTracing_ClientEnablesProbabilisticTracing(t *testing.T) {
exp := tracetest.NewInMemoryExporter()
config := Config{
SamplerType: "probabilistic",
Expand Down Expand Up @@ -65,7 +65,7 @@ func TestContextTracing_ClientEnablesTracing(t *testing.T) {

// This test shows that if sample factor will disable tracing on client process, when it would be enabled on server
// it will be still disabled for all spans within this span.
func TestContextTracing_ClientDisablesTracing(t *testing.T) {
func TestContextTracing_ClientDisablesProbabilisticTracing(t *testing.T) {
exp := tracetest.NewInMemoryExporter()

config := Config{
Expand Down Expand Up @@ -105,6 +105,45 @@ func TestContextTracing_ClientDisablesTracing(t *testing.T) {
tracing.ContextTracing_ClientDisablesTracing(t, exp, clientRoot, srvRoot, srvChild)
}

func TestContextTracing_ClientDisablesAlwaysOnSampling(t *testing.T) {
exp := tracetest.NewInMemoryExporter()

config := Config{
SamplerType: SamplerTypeConstant,
SamplerParam: 0,
}
sampler := getSampler(config)
tracerOtel := newTraceProvider(
context.Background(),
"tracerOtel",
log.NewNopLogger(),
tracesdk.NewSimpleSpanProcessor(exp),
sampler, // never sample
[]attribute.KeyValue{},
)
tracer, _ := migration.Bridge(tracerOtel, log.NewNopLogger())

clientRoot, clientCtx := tracing.StartSpan(tracing.ContextWithTracer(context.Background(), tracer), "a")

config.SamplerParam = 1
sampler2 := getSampler(config)
// Simulate Server process with different tracer, but with client span in context.
srvTracerOtel := newTraceProvider(
context.Background(),
"srvTracerOtel",
log.NewNopLogger(),
tracesdk.NewSimpleSpanProcessor(exp),
sampler2, // never sample
[]attribute.KeyValue{},
)
srvTracer, _ := migration.Bridge(srvTracerOtel, log.NewNopLogger())

srvRoot, srvCtx := tracing.StartSpan(tracing.ContextWithTracer(clientCtx, srvTracer), "b")
srvChild, _ := tracing.StartSpan(srvCtx, "bb")

tracing.ContextTracing_ClientDisablesTracing(t, exp, clientRoot, srvRoot, srvChild)
}

// This test shows that if span will contain special baggage (for example from special HTTP header), even when sample
// factor will disable client & server tracing, it will be still enabled for all spans within this span.
func TestContextTracing_ForceTracing(t *testing.T) {
Expand Down