Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

hook scrubber as logrus formatter #17781

Merged
merged 1 commit into from
Jun 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions components/blobserve/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ require (
github.com/docker/docker-credential-helpers v0.6.4 // indirect
github.com/felixge/httpsnoop v1.0.3 // indirect
github.com/fsnotify/fsnotify v1.6.0 // indirect
github.com/gitpod-io/gitpod/components/scrubber v0.0.0-20230530092704-f7d81a8eef6f // indirect
github.com/gitpod-io/gitpod/registry-facade/api v0.0.0-00010101000000-000000000000 // indirect
github.com/go-logr/logr v1.2.3 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
Expand Down Expand Up @@ -80,6 +81,7 @@ require (
github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
github.com/minio/sha256-simd v1.0.0 // indirect
github.com/mitchellh/go-homedir v1.1.0 // indirect
github.com/mitchellh/reflectwalk v1.0.2 // indirect
github.com/moby/locker v1.0.1 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
Expand Down
4 changes: 4 additions & 0 deletions components/blobserve/go.sum

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions components/common-go/BUILD.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,7 @@ packages:
type: go
srcs:
- "**"
deps:
- components/scrubber:lib
config:
packaging: library
5 changes: 5 additions & 0 deletions components/common-go/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ require (
github.com/coreos/go-systemd/v22 v22.3.2 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/docker/go-units v0.4.0 // indirect
github.com/gitpod-io/gitpod/components/scrubber v0.0.0-20230530092704-f7d81a8eef6f
github.com/go-logr/logr v1.2.3 // indirect
github.com/godbus/dbus/v5 v5.0.4 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
Expand Down Expand Up @@ -71,6 +72,8 @@ require (
sigs.k8s.io/structured-merge-diff/v4 v4.2.3 // indirect
)

require github.com/mitchellh/reflectwalk v1.0.2 // indirect

replace k8s.io/api => k8s.io/api v0.26.2

replace k8s.io/apiextensions-apiserver => k8s.io/apiextensions-apiserver v0.26.2
Expand Down Expand Up @@ -120,3 +123,5 @@ replace k8s.io/kubectl => k8s.io/kubectl v0.26.2
replace k8s.io/mount-utils => k8s.io/mount-utils v0.26.2

replace k8s.io/pod-security-admission => k8s.io/pod-security-admission v0.26.2

replace github.com/gitpod-io/gitpod/components/scrubber => ../scrubber // leeway
2 changes: 2 additions & 0 deletions components/common-go/go.sum

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

73 changes: 60 additions & 13 deletions components/common-go/log/log.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,15 @@ import (
"fmt"
"os"
"path"
"reflect"
"runtime"
"strings"
"time"

"github.com/sirupsen/logrus"
log "github.com/sirupsen/logrus"

"github.com/gitpod-io/gitpod/components/scrubber"
)

// Log is the application wide console logger
Expand Down Expand Up @@ -52,19 +55,7 @@ func Init(service, version string, json, verbose bool) {
log.SetReportCaller(true)

if json {
Log.Logger.SetFormatter(&gcpFormatter{
log.JSONFormatter{
FieldMap: log.FieldMap{
log.FieldKeyMsg: "message",
},
CallerPrettyfier: func(f *runtime.Frame) (string, string) {
s := strings.Split(f.Function, ".")
funcName := s[len(s)-1]
return funcName, fmt.Sprintf("%s:%d", path.Base(f.File), f.Line)
},
TimestampFormat: time.RFC3339Nano,
},
})
Log.Logger.SetFormatter(newGcpFormatter(false))
} else {
Log.Logger.SetFormatter(&logrus.TextFormatter{
TimestampFormat: time.RFC3339Nano,
Expand All @@ -83,6 +74,24 @@ func Init(service, version string, json, verbose bool) {
// gcpFormatter formats errors according to GCP rules, see
type gcpFormatter struct {
log.JSONFormatter
skipScrub bool
}

func newGcpFormatter(skipScrub bool) *gcpFormatter {
return &gcpFormatter{
skipScrub: skipScrub,
JSONFormatter: log.JSONFormatter{
FieldMap: log.FieldMap{
log.FieldKeyMsg: "message",
},
CallerPrettyfier: func(f *runtime.Frame) (string, string) {
s := strings.Split(f.Function, ".")
funcName := s[len(s)-1]
return funcName, fmt.Sprintf("%s:%d", path.Base(f.File), f.Line)
},
TimestampFormat: time.RFC3339Nano,
},
}
}

func (f *gcpFormatter) Format(entry *log.Entry) ([]byte, error) {
Expand Down Expand Up @@ -121,6 +130,44 @@ func (f *gcpFormatter) Format(entry *log.Entry) ([]byte, error) {
entry.Data["@type"] = "type.googleapis.com/google.devtools.clouderrorreporting.v1beta1.ReportedErrorEvent"
}

if f.skipScrub {
return f.JSONFormatter.Format(entry)
}

for key, value := range entry.Data {
if key == "error" || key == "severity" || key == "message" || key == "time" || key == "serviceContext" || key == "context" {
continue
}
switch v := value.(type) {
case string:
entry.Data[key] = scrubber.Default.KeyValue(key, v)
case int, int8, int16, int32, int64, uint, uint8, uint16, uint32, uint64, float32, float64, complex64, complex128:
// no-op
case bool:
// no-op
case time.Time, time.Duration:
// no-op
case scrubber.TrustedValue:
// no-op
default:
// handling of named primitive types
rv := reflect.ValueOf(value)
switch rv.Kind() {
case reflect.String:
entry.Data[key] = scrubber.Default.KeyValue(key, rv.String())
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64,
reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
// no-op
case reflect.Bool:
// no-op
default:
// implement TrustedValue for custom types
// make sure to use the scrubber.Default to scrub sensitive data
entry.Data[key] = "[redacted:nested]"
}
}
}
return f.JSONFormatter.Format(entry)
}

Expand Down
190 changes: 190 additions & 0 deletions components/common-go/log/log_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,11 @@ package log

import (
"bytes"
"fmt"
"io"
"strings"
"testing"
"time"

"github.com/google/go-cmp/cmp"
"github.com/sirupsen/logrus"
Expand Down Expand Up @@ -85,3 +89,189 @@ func TestFromBuffer(t *testing.T) {
}
}
}

type TestWorkspaceInfo struct {
WorkspaceId string
InstanceId string
WorkspaceContextUrl string
}

func TestScrubFormatter(t *testing.T) {
logger := logrus.New()
logger.SetFormatter(newGcpFormatter(false))

var buffer bytes.Buffer
logger.SetOutput(&buffer)

createInfo := func() *TestWorkspaceInfo {
return &TestWorkspaceInfo{
WorkspaceId: "1234567890",
InstanceId: "1234567890",
WorkspaceContextUrl: "https://github.com/gitpod-io/gitpod",
}
}
info := createInfo()
workspaceID := "1234567890"

logTime := time.Now()
logger.WithTime(logTime).WithField("info", info).WithField("workspaceID", workspaceID).
WithError(fmt.Errorf("some test error")).
WithFields(ServiceContext("test", "1.0.0")).
Info("email: anton@gitpod.io")

expectation := fmt.Sprintf(
`{"error":"some test error","info":"[redacted:nested]","level":"info","message":"email: anton@gitpod.io","serviceContext":{"service":"test","version":"1.0.0"},"severity":"INFO","time":"%s","workspaceID":"[redacted:md5:e807f1fcf82d132f9bb018ca6738a19f]"}`,
logTime.Format(time.RFC3339Nano),
)
actual := strings.TrimSpace(buffer.String())
if diff := cmp.Diff(expectation, actual); diff != "" {
t.Errorf("unexpected result (-want +got):\n%s", diff)
}
if diff := cmp.Diff(createInfo(), info); diff != "" {
t.Errorf("info: unexpected result (-want +got):\n%s", diff)
}
if diff := cmp.Diff("1234567890", workspaceID); diff != "" {
t.Errorf("workspaceID: unexpected result (-want +got):\n%s", diff)
}
}

/*
12/06/2023 - no cahce fields
cpu: Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz
BenchmarkSimpleScrubFormatter/json-32 312109 3617 ns/op 2035 B/op 31 allocs/op
BenchmarkSimpleScrubFormatter/scrub+json-32 244150 4753 ns/op 2242 B/op 37 allocs/op

09/06/2023 - with redacting nested fields
cpu: Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz
BenchmarkSimpleScrubFormatter/json-32 281763 3666 ns/op 2035 B/op 31 allocs/op
BenchmarkSimpleScrubFormatter/scrub+json-32 278829 4248 ns/op 2204 B/op 37 allocs/op

05/06/2023 - with json scrubbing
cpu: Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz
BenchmarkSimpleScrubFormatter/json-32 321686 3546 ns/op 2035 B/op 31 allocs/op
BenchmarkSimpleScrubFormatter/scrub+json-32 148384 8308 ns/op 3781 B/op 76 allocs/op

02/06/2023 (Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz) - with cache lock and eviction
BenchmarkSimpleScrubFormatter/json-32 359746 3144 ns/op 1914 B/op 28 allocs/op
BenchmarkSimpleScrubFormatter/scrub+json-32 312946 3796 ns/op 2083 B/op 34 allocs/op

02/06/2023 (Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz) - precomputed cache
BenchmarkSimpleScrubFormatter/json-32 367134 3215 ns/op 1914 B/op 28 allocs/op
BenchmarkSimpleScrubFormatter/scrub+json-32 302583 3869 ns/op 2083 B/op 34 allocs/op

01/06/2023 (Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz) - with caching without lower case conversion
BenchmarkSimpleScrubFormatter/json-32 377443 3059 ns/op 1914 B/op 28 allocs/op
BenchmarkSimpleScrubFormatter/scrub+json-32 311752 3704 ns/op 2083 B/op 34 allocs/op

31/05/2023 (Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz) - skip message, scrub with KeyValue for string type
BenchmarkSimpleScrubFormatter/json-32 376062 3019 ns/op 1914 B/op 28 allocs/op
BenchmarkSimpleScrubFormatter/scrub+json-32 313464 3798 ns/op 2097 B/op 35 allocs/op

31/05/2023 (Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz) - skip message
BenchmarkSimpleScrubFormatter/json-32 380050 2991 ns/op 1914 B/op 28 allocs/op
BenchmarkSimpleScrubFormatter/scrub+json-32 270021 4420 ns/op 2233 B/op 43 allocs/op

30/05/2023 (Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz) - scrubbing takes almost twice as long because of relection and regex replacements
BenchmarkSimpleScrubFormatter/json-32 368532 3095 ns/op 1914 B/op 28 allocs/op
BenchmarkSimpleScrubFormatter/scrub+json-32 200944 5858 ns/op 2666 B/op 53 allocs/op
0 1.21s (flat, cum) 38.29% of Total
. . 94:func (formatter *scrubFormatter) Format(entry *logrus.Entry) ([]byte, error) {
. 300ms 95: entry.Message = scrubber.Default.Value(entry.Message)
akosyakov marked this conversation as resolved.
Show resolved Hide resolved
. 260ms 96: err := scrubber.Default.Struct(entry.Data)
akosyakov marked this conversation as resolved.
Show resolved Hide resolved
. . 97: if err != nil {
. . 98: return nil, fmt.Errorf("cannot scrub log entry: %w", err)
. . 99: }
. 650ms 100: return formatter.delegate.Format(entry)
. . 101:}
*/
func BenchmarkSimpleScrubFormatter(b *testing.B) {
logger := logrus.New()
logger.SetOutput(io.Discard)

run := func() {
logger.WithField("workspaceID", "1234567890").Info("email: anton@gitpod.io")
}

logger.SetFormatter(newGcpFormatter(true))
b.Run("json", func(b *testing.B) {
for i := 0; i < b.N; i++ {
run()
}
})

logger.SetFormatter(newGcpFormatter(false))
b.Run("scrub+json", func(b *testing.B) {
for i := 0; i < b.N; i++ {
run()
}
})

}

/*
12/06/2023 - no cahce fields
cpu: Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz
BenchmarkStructScrubFormatter/json-32 304662 3860 ns/op 2091 B/op 32 allocs/op
BenchmarkStructScrubFormatter/scrub+json-32 320354 3698 ns/op 2091 B/op 32 allocs/op

09/06/2023 - with redacting nested fields
cpu: Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz
BenchmarkStructScrubFormatter/json-32 315826 3706 ns/op 2091 B/op 32 allocs/op
BenchmarkStructScrubFormatter/scrub+json-32 322910 3647 ns/op 2091 B/op 32 allocs/op

05/06/2023 - with json scrubbing
cpu: Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz
BenchmarkStructScrubFormatter/json-32 310472 3842 ns/op 2091 B/op 32 allocs/op
BenchmarkStructScrubFormatter/scrub+json-32 109390 10808 ns/op 5101 B/op 105 allocs/op

02/06/2023 (Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz - with cache lock and eviction
BenchmarkStructScrubFormatter/json-32 354174 3314 ns/op 1971 B/op 29 allocs/op
BenchmarkStructScrubFormatter/scrub+json-32 242992 4896 ns/op 2365 B/op 44 allocs/op

02/06/2023 (Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz - precomputed cache
BenchmarkStructScrubFormatter/json-32 344023 3465 ns/op 1971 B/op 29 allocs/op
BenchmarkStructScrubFormatter/scrub+json-32 240534 4850 ns/op 2365 B/op 44 allocs/op

01/06/2023 (Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz) - with caching without lower case conversion
BenchmarkStructScrubFormatter/json-32 354729 3259 ns/op 1971 B/op 29 allocs/op
BenchmarkStructScrubFormatter/scrub+json-32 232270 4767 ns/op 2365 B/op 44 allocs/op

01/06/2023 (Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz) - with substring mathing - 3 times slower
BenchmarkStructScrubFormatter/json-32 351439 3388 ns/op 1971 B/op 29 allocs/op
BenchmarkStructScrubFormatter/scrub+json-32 115599 9032 ns/op 2396 B/op 44 allocs/op

31/05/2023 (Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz) - early exit with SkipEntry + no double traversing
BenchmarkStructScrubFormatter/json-32 369788 3885 ns/op 1971 B/op 29 allocs/op
BenchmarkStructScrubFormatter/scrub+json-32 232442 5092 ns/op 2421 B/op 47 allocs/op

31/05/2023 (Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz) - initial, still double
BenchmarkStructScrubFormatter/json-32 349736 3240 ns/op 1971 B/op 29 allocs/op
BenchmarkStructScrubFormatter/scrub+json-32 142266 8295 ns/op 2580 B/op 53 allocs/op
*/
func BenchmarkStructScrubFormatter(b *testing.B) {
logger := logrus.New()
logger.SetOutput(io.Discard)

run := func() {
logger.WithField("info", &TestWorkspaceInfo{
WorkspaceId: "1234567890",
InstanceId: "1234567890",
WorkspaceContextUrl: "https://github.com/gitpod-io/gitpod",
}).Info("resolve workspace info failed")
}

logger.SetFormatter(newGcpFormatter(true))
b.Run("json", func(b *testing.B) {
for i := 0; i < b.N; i++ {
run()
}
})

logger.SetFormatter(newGcpFormatter(false))
b.Run("scrub+json", func(b *testing.B) {
for i := 0; i < b.N; i++ {
run()
}
})

}
Loading