diff --git a/cache/cache.go b/cache/cache.go index c9d9314cd..997cedef4 100644 --- a/cache/cache.go +++ b/cache/cache.go @@ -90,10 +90,9 @@ const ( ) type BackendConfig struct { - Backend string `yaml:"backend"` - Memcached MemcachedClientConfig `yaml:"memcached"` - Redis RedisClientConfig `yaml:"redis"` - Opentelemetry bool `yaml:"opentelemetry"` + Backend string `yaml:"backend"` + Memcached MemcachedClientConfig `yaml:"memcached"` + Redis RedisClientConfig `yaml:"redis"` } // Validate the config. diff --git a/go.mod b/go.mod index edf5d5040..cc754642a 100644 --- a/go.mod +++ b/go.mod @@ -32,7 +32,9 @@ require ( go.etcd.io/etcd/api/v3 v3.5.0 go.etcd.io/etcd/client/pkg/v3 v3.5.0 go.etcd.io/etcd/client/v3 v3.5.0 + go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.42.0 go.opentelemetry.io/otel v1.16.0 + go.opentelemetry.io/otel/sdk v1.15.0 go.opentelemetry.io/otel/trace v1.16.0 go.uber.org/atomic v1.10.0 go.uber.org/goleak v1.2.0 @@ -97,12 +99,11 @@ require ( go.opentelemetry.io/otel/bridge/opentracing v1.15.0 // indirect go.opentelemetry.io/otel/exporters/jaeger v1.15.0 // indirect go.opentelemetry.io/otel/metric v1.16.0 // indirect - go.opentelemetry.io/otel/sdk v1.15.0 // indirect go.uber.org/multierr v1.6.0 // indirect go.uber.org/zap v1.17.0 // indirect golang.org/x/crypto v0.1.0 // indirect golang.org/x/mod v0.9.0 // indirect - golang.org/x/net v0.9.0 // indirect + golang.org/x/net v0.10.0 // indirect golang.org/x/oauth2 v0.7.0 // indirect golang.org/x/sys v0.8.0 // indirect golang.org/x/text v0.9.0 // indirect diff --git a/go.sum b/go.sum index 6002cbe4d..1a63221fb 100644 --- a/go.sum +++ b/go.sum @@ -35,6 +35,7 @@ cloud.google.com/go v0.102.1/go.mod h1:XZ77E9qnTEnrgEOvr4xzfdX5TRo7fB4T2F4O6+34h cloud.google.com/go v0.104.0/go.mod h1:OO6xxXdJyvuJPcEPBLN9BJPD+jep5G1+2U5B5gkRYtA= cloud.google.com/go v0.105.0/go.mod h1:PrLgOJNe5nfE9UMxKxgXj4mD3voiP+YQ6gdt6KMFOKM= cloud.google.com/go v0.107.0/go.mod h1:wpc2eNrD7hXUTy8EKS10jkxpZBjASrORK7goS+3YX2I= +cloud.google.com/go v0.110.0 h1:Zc8gqp3+a9/Eyph2KDmcGaPtbKRIoqq4YTlL4NMD0Ys= cloud.google.com/go v0.110.0/go.mod h1:SJnCLqQ0FCFGSZMUNUf84MV3Aia54kn7pi8st7tMzaY= cloud.google.com/go/accessapproval v1.4.0/go.mod h1:zybIuC3KpDOvotz59lFe5qxRZx6C75OtwbisN56xYB4= cloud.google.com/go/accessapproval v1.5.0/go.mod h1:HFy3tuiGvMdcd/u+Cu5b9NkO1pEICJ46IR82PoUdplw= @@ -145,10 +146,12 @@ cloud.google.com/go/compute v1.12.1/go.mod h1:e8yNOBcBONZU1vJKCvCoDw/4JQsA0dpM4x cloud.google.com/go/compute v1.13.0/go.mod h1:5aPTS0cUNMIc1CE546K+Th6weJUNQErARyZtRXDJ8GE= cloud.google.com/go/compute v1.14.0/go.mod h1:YfLtxrj9sU4Yxv+sXzZkyPjEyPBZfXHUvjxega5vAdo= cloud.google.com/go/compute v1.15.1/go.mod h1:bjjoF/NtFUrkD/urWfdHaKuOPDR5nWIs63rR+SXhcpA= +cloud.google.com/go/compute v1.18.0 h1:FEigFqoDbys2cvFkZ9Fjq4gnHBP55anJ0yQyau2f9oY= cloud.google.com/go/compute v1.18.0/go.mod h1:1X7yHxec2Ga+Ss6jPyjxRxpu2uu7PLgsOVXvgU0yacs= cloud.google.com/go/compute/metadata v0.1.0/go.mod h1:Z1VN+bulIf6bt4P/C37K4DyZYZEXYonfTBHHFPO/4UU= cloud.google.com/go/compute/metadata v0.2.0/go.mod h1:zFmK7XCadkQkj6TtorcaGlCW1hT1fIilQDwofLpJ20k= cloud.google.com/go/compute/metadata v0.2.1/go.mod h1:jgHgmJd2RKBGzXqF5LR2EZMGxBkeanZ9wwa75XHJgOM= +cloud.google.com/go/compute/metadata v0.2.3 h1:mg4jlk7mCAj6xXp9UJ4fjI9VUI5rubuGBW5aJ7UnBMY= cloud.google.com/go/compute/metadata v0.2.3/go.mod h1:VAV5nSsACxMJvgaAuX6Pk2AawlZn8kiOGuCv6gTkwuA= cloud.google.com/go/contactcenterinsights v1.3.0/go.mod h1:Eu2oemoePuEFc/xKFPjbTuPSj0fYJcPls9TFlPNnHHY= cloud.google.com/go/contactcenterinsights v1.4.0/go.mod h1:L2YzkGbPsv+vMQMCADxJoT9YiTTnSEd6fEvCeHTYVck= @@ -1056,6 +1059,8 @@ go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.5/go.mod h1:5pWMHQbX5EPX2/62yrJeAkowc+lfs/XD7Uxpq3pI6kk= go.opencensus.io v0.23.0/go.mod h1:XItmlyltB5F7CS4xOC1DcqMoFqwtC6OG2xF7mCv7P7E= go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.42.0 h1:ZOLJc06r4CB42laIXg/7udr0pbZyuAihN10A/XuiQRY= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.42.0/go.mod h1:5z+/ZWJQKXa9YT34fQNx5K8Hd1EoIhvtUygUQPqEOgQ= go.opentelemetry.io/contrib/samplers/jaegerremote v0.9.0 h1:zRi6a8uX+cJGTPLXRPjFEzN27a26k5R7LiLK87ntXgg= go.opentelemetry.io/contrib/samplers/jaegerremote v0.9.0/go.mod h1:pzJOLTppaPbiPjPZEwGRf0VWx6G07hhOqznjKXIMkEk= go.opentelemetry.io/otel v1.14.0/go.mod h1:o4buv+dJzx8rohcUeRmWUZhqupFvzWis188WlggnNeU= @@ -1229,8 +1234,8 @@ golang.org/x/net v0.5.0/go.mod h1:DivGGAXEgPSlEBzxGzZI+ZLohi+xUj054jfeKui00ws= golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc= -golang.org/x/net v0.9.0 h1:aWJ/m6xSmxWBx+V0XRHTlrYrPG56jKsLdTFmsSsCzOM= -golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns= +golang.org/x/net v0.10.0 h1:X2//UzNDwYmtCLn7To6G58Wr6f5ahEAQgKNzv9Y951M= +golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= diff --git a/grpcclient/instrumentation.go b/grpcclient/instrumentation.go index c8d352889..2d09f1f74 100644 --- a/grpcclient/instrumentation.go +++ b/grpcclient/instrumentation.go @@ -5,6 +5,7 @@ import ( "github.com/opentracing/opentracing-go" "github.com/prometheus/client_golang/prometheus" "github.com/weaveworks/common/middleware" + "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc" "google.golang.org/grpc" ) @@ -19,3 +20,15 @@ func Instrument(requestDuration *prometheus.HistogramVec) ([]grpc.UnaryClientInt middleware.StreamClientInstrumentInterceptor(requestDuration), } } + +func OtelInstrument(requestDuration *prometheus.HistogramVec) ([]grpc.UnaryClientInterceptor, []grpc.StreamClientInterceptor) { + return []grpc.UnaryClientInterceptor{ + otelgrpc.UnaryClientInterceptor(), + middleware.ClientUserHeaderInterceptor, + middleware.UnaryClientInstrumentInterceptor(requestDuration), + }, []grpc.StreamClientInterceptor{ + otelgrpc.StreamClientInterceptor(), + middleware.StreamClientUserHeaderInterceptor, + middleware.StreamClientInstrumentInterceptor(requestDuration), + } +} diff --git a/spanlogger/otelspanlogger.go b/spanlogger/otelspanlogger.go new file mode 100644 index 000000000..04fd2da5f --- /dev/null +++ b/spanlogger/otelspanlogger.go @@ -0,0 +1,110 @@ +package spanlogger + +import ( + "context" + "fmt" + "reflect" + + "github.com/go-kit/log" + "github.com/go-kit/log/level" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" +) + +type OtelSpanLogger struct { + log.Logger + trace.Span + sampled bool +} + +func OtelNew(ctx context.Context, tr trace.Tracer, logger log.Logger, method string, resolver TenantResolver, kvps ...interface{}) (*OtelSpanLogger, context.Context) { + ctx, sp := tr.Start(ctx, method) + defer sp.End() + if ids, err := resolver.TenantIDs(ctx); err == nil && len(ids) > 0 { + sp.SetAttributes(attribute.StringSlice(TenantIDsTagName, ids)) + } + lwc, sampled := withContext(ctx, logger, resolver, true) + l := &OtelSpanLogger{ + Logger: log.With(lwc, "method", method), + Span: sp, + sampled: sampled, + } + if len(kvps) > 0 { + level.Debug(l).Log(kvps...) + } + + ctx = context.WithValue(ctx, loggerCtxKey, logger) + return l, ctx +} + +func OtelFromContext(ctx context.Context, fallback log.Logger, resolver TenantResolver) *OtelSpanLogger { + logger, ok := ctx.Value(loggerCtxKey).(log.Logger) + if !ok { + logger = fallback + } + sp := trace.SpanFromContext(ctx) + lwc, sampled := withContext(ctx, logger, resolver, true) + return &OtelSpanLogger{ + Logger: lwc, + Span: sp, + sampled: sampled, + } +} + +func (s *OtelSpanLogger) Log(kvps ...interface{}) error { + s.Logger.Log(kvps...) + if !s.sampled { + return nil + } + fields, err := convertKVToAttributes(kvps...) + if err != nil { + return err + } + s.AddEvent("log", trace.WithAttributes(fields...)) + return nil +} + +func (s *OtelSpanLogger) Error(err error) error { + if err == nil || !s.sampled { + return err + } + s.Span.SetStatus(codes.Error, "") + s.Span.RecordError(err) + return err +} + +// convertKVToAttributes converts keyValues to a slice of attribute.KeyValue +func convertKVToAttributes(keyValues ...interface{}) ([]attribute.KeyValue, error) { + if len(keyValues)%2 != 0 { + return nil, fmt.Errorf("non-even keyValues len: %d", len(keyValues)) + } + fields := make([]attribute.KeyValue, len(keyValues)/2) + for i := 0; i*2 < len(keyValues); i++ { + key, ok := keyValues[i*2].(string) + if !ok { + return nil, fmt.Errorf("non-string key (pair #%d): %T", i, keyValues[i*2]) + } + value := keyValues[i*2+1] + typedVal := reflect.ValueOf(value) + + switch typedVal.Kind() { + case reflect.Bool: + fields[i] = attribute.Bool(key, typedVal.Bool()) + case reflect.String: + fields[i] = attribute.String(key, typedVal.String()) + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + fields[i] = attribute.Int(key, int(typedVal.Int())) + case reflect.Float32, reflect.Float64: + fields[i] = attribute.Float64(key, typedVal.Float()) + default: + if typedVal.Kind() == reflect.Ptr && typedVal.IsNil() { + fields[i] = attribute.String(key, "nil") + continue + } + // When in doubt, coerce to a string + fields[i] = attribute.String(key, fmt.Sprintf("%v", value)) + } + } + return fields, nil +} diff --git a/spanlogger/otelspanlogger_test.go b/spanlogger/otelspanlogger_test.go new file mode 100644 index 000000000..200cc0a1b --- /dev/null +++ b/spanlogger/otelspanlogger_test.go @@ -0,0 +1,111 @@ +package spanlogger + +import ( + "context" + "testing" + + "github.com/go-kit/log" + "github.com/pkg/errors" + "github.com/stretchr/testify/require" + "github.com/weaveworks/common/user" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + sdktrace "go.opentelemetry.io/otel/sdk/trace" + "go.opentelemetry.io/otel/sdk/trace/tracetest" +) + +func TestOtelSpanLogger_Log(t *testing.T) { + logger := log.NewNopLogger() + resolver := fakeResolver{} + tp := sdktrace.NewTracerProvider( + sdktrace.WithBatcher(tracetest.NewInMemoryExporter()), + ) + span, ctx := OtelNew(context.Background(), tp.Tracer("test"), logger, "test", resolver, "bar") + defer span.End() + _ = span.Log("foo") + newSpan := OtelFromContext(ctx, logger, resolver) + require.Equal(t, span.Span, newSpan.Span) + _ = newSpan.Log("bar") + noSpan := OtelFromContext(context.Background(), logger, resolver) + _ = noSpan.Log("foo") + require.Error(t, noSpan.Error(errors.New("err"))) + require.NoError(t, noSpan.Error(nil)) +} + +func TestOtelSpanLogger_CustomLogger(t *testing.T) { + var logged [][]interface{} + var logger funcLogger = func(keyvals ...interface{}) error { + logged = append(logged, keyvals) + return nil + } + resolver := fakeResolver{} + exp := tracetest.NewNoopExporter() + tp := sdktrace.NewTracerProvider( + sdktrace.WithBatcher(exp), + // Set the sampler to never sample so that traceID is not logged. + sdktrace.WithSampler(sdktrace.NeverSample()), + ) + span, ctx := OtelNew(context.Background(), tp.Tracer("test"), logger, "test", resolver) + _ = span.Log("msg", "original spanlogger") + + span = OtelFromContext(ctx, log.NewNopLogger(), resolver) + _ = span.Log("msg", "restored spanlogger") + + span = OtelFromContext(context.Background(), logger, resolver) + _ = span.Log("msg", "fallback spanlogger") + + expect := [][]interface{}{ + {"method", "test", "msg", "original spanlogger"}, + {"msg", "restored spanlogger"}, + {"msg", "fallback spanlogger"}, + } + + require.Equal(t, expect, logged) +} + +func TestOtelSpanCreatedWithTenantTag(t *testing.T) { + exp, sp := createOtelSpan(user.InjectOrgID(context.Background(), "team-a")) + defer sp.End() + + require.Equal(t, 1, len(exp.GetSpans().Snapshots())) + require.Equal(t, + []attribute.KeyValue{attribute.StringSlice(TenantIDsTagName, []string{"team-a"})}, + exp.GetSpans().Snapshots()[0].Attributes()) +} + +func TestOtelSpanCreatedWithoutTenantTag(t *testing.T) { + exp, sp := createOtelSpan(context.Background()) + defer sp.End() + require.Equal(t, 1, len(exp.GetSpans().Snapshots())) + + exist := false + for _, kv := range exp.GetSpans().Snapshots()[0].Attributes() { + if kv.Key == TenantIDsTagName { + exist = true + } + } + require.False(t, exist) +} + +// Using a no-op logger and no tracing provider, measure the overhead of a small log call. +func BenchmarkOtelSpanLogger(b *testing.B) { + _, sl := createOtelSpan(context.Background()) + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = sl.Log("msg", "foo", "more", "data") + } +} + +func createOtelSpan(ctx context.Context) (*tracetest.InMemoryExporter, *OtelSpanLogger) { + exp := tracetest.NewInMemoryExporter() + tp := sdktrace.NewTracerProvider( + sdktrace.WithBatcher(exp), + ) + otel.SetTracerProvider(tp) + tt := tp.Tracer("test") + + sl, _ := OtelNew(ctx, tt, log.NewNopLogger(), "get", fakeResolver{}) + // Force flush to ensure spans are reported before the test ends. + tp.ForceFlush(ctx) + return exp, sl +} diff --git a/spanlogger/spanlogger.go b/spanlogger/spanlogger.go index 75d363857..bf22229bd 100644 --- a/spanlogger/spanlogger.go +++ b/spanlogger/spanlogger.go @@ -9,8 +9,6 @@ import ( "github.com/opentracing/opentracing-go/ext" otlog "github.com/opentracing/opentracing-go/log" "github.com/weaveworks/common/tracing" - "go.opentelemetry.io/otel/attribute" - "go.opentelemetry.io/otel/trace" ) type loggerCtxMarker struct{} @@ -39,12 +37,6 @@ type SpanLogger struct { sampled bool } -type SpanLoggerOpentelemetry struct { - log.Logger - trace.Span - sampled bool -} - // New makes a new SpanLogger with a log.Logger to send logs to. The provided context will have the logger attached // to it and can be retrieved with FromContext. func New(ctx context.Context, logger log.Logger, method string, resolver TenantResolver, kvps ...interface{}) (*SpanLogger, context.Context) { @@ -52,26 +44,7 @@ func New(ctx context.Context, logger log.Logger, method string, resolver TenantR if ids, err := resolver.TenantIDs(ctx); err == nil && len(ids) > 0 { span.SetTag(TenantIDsTagName, ids) } - lwc, sampled := withContext(ctx, logger, resolver) - l := &SpanLogger{ - Logger: log.With(lwc, "method", method), - Span: span, - sampled: sampled, - } - if len(kvps) > 0 { - level.Debug(l).Log(kvps...) - } - - ctx = context.WithValue(ctx, loggerCtxKey, logger) - return l, ctx -} - -func NewSpanLoger(tracer trace.Tracer, ctx context.Context, logger log.Logger, method string, resolver TenantResolver, kvps ...interface{}) (SpanLoggerInterface, context.Context) { - ctx, sp := tracer.Start(ctx, method) - if ids, err := resolver.TenantIDs(ctx); err == nil && len(ids) > 0 { - sp.SetAttributes(attribute.StringSlice(TenantIDsTagName, ids)) - } - lwc, sampled := withContext(ctx, logger, resolver) + lwc, sampled := withContext(ctx, logger, resolver, false) l := &SpanLogger{ Logger: log.With(lwc, "method", method), Span: span, @@ -98,7 +71,7 @@ func FromContext(ctx context.Context, fallback log.Logger, resolver TenantResolv if sp == nil { sp = opentracing.NoopTracer{}.StartSpan("noop") } - lwc, sampled := withContext(ctx, logger, resolver) + lwc, sampled := withContext(ctx, logger, resolver, false) return &SpanLogger{ Logger: lwc, Span: sp, @@ -131,13 +104,19 @@ func (s *SpanLogger) Error(err error) error { return err } -func withContext(ctx context.Context, logger log.Logger, resolver TenantResolver) (log.Logger, bool) { +func withContext(ctx context.Context, logger log.Logger, resolver TenantResolver, otel bool) (log.Logger, bool) { userID, err := resolver.TenantID(ctx) if err == nil && userID != "" { logger = log.With(logger, "user", userID) } - traceID, ok := tracing.ExtractSampledTraceID(ctx) + var traceID string + var ok bool + if otel { + traceID, ok = tracing.ExtractOpentelemetrySampledTraceID(ctx) + } else { + traceID, ok = tracing.ExtractSampledTraceID(ctx) + } if !ok { return logger, false }