From 481d609de353b29888040ab4cfbd9f28cc362152 Mon Sep 17 00:00:00 2001 From: Peter Wilson Date: Thu, 5 Sep 2024 12:55:27 +0100 Subject: [PATCH 1/3] always use derived context for audit logging, increase timeout to 10secs max --- audit/broker.go | 94 +++++++++++--------------------------------- audit/broker_test.go | 79 ++++++++++++++++++------------------- 2 files changed, 61 insertions(+), 112 deletions(-) diff --git a/audit/broker.go b/audit/broker.go index 601831eab067..13845267362e 100644 --- a/audit/broker.go +++ b/audit/broker.go @@ -22,7 +22,7 @@ import ( const ( // timeout is the duration which should be used for context related timeouts. - timeout = 5 * time.Second + timeout = 10 * time.Second ) var ( @@ -277,29 +277,12 @@ func (b *Broker) LogRequest(ctx context.Context, in *logical.LogInput) (retErr e e.Data = in - // Evaluate whether we need a new context for auditing. - var auditContext context.Context - if isContextViable(ctx) { - auditContext = ctx - } else { - // In cases where we are trying to audit the request, and the existing - // context is not viable due to a short deadline, we detach ourselves from - // the original context (keeping only the namespace). - // This is so that we get a fair run at writing audit entries if Vault - // has taken up a lot of time handling the request before audit (request) - // is triggered. Pipeline nodes and the eventlogger.Broker may check for a - // cancelled context and refuse to process the nodes further. - ns, err := nshelper.FromContext(ctx) - if err != nil { - return fmt.Errorf("namespace missing from context: %w", err) - } - - tempContext, auditCancel := context.WithTimeout(context.Background(), timeout) - defer auditCancel() - auditContext = nshelper.ContextWithNamespace(tempContext, ns) - - b.logger.Trace("log request requires a derived context (original context was not viable)", "namespace ID", ns.ID, "namespace path", ns.Path, "timeout", timeout) + // Get a context to use for auditing. + auditContext, auditCancel, err := getAuditContext(ctx) + if err != nil { + return err } + defer auditCancel() var status eventlogger.Status if hasAuditPipelines(b.broker) { @@ -361,29 +344,12 @@ func (b *Broker) LogResponse(ctx context.Context, in *logical.LogInput) (retErr e.Data = in - // Evaluate whether we need a new context for auditing. - var auditContext context.Context - if isContextViable(ctx) { - auditContext = ctx - } else { - // In cases where we are trying to audit the response, and the existing - // context is not viable due to a short deadline, we detach ourselves from - // the original context (keeping only the namespace). - // This is so that we get a fair run at writing audit entries if Vault - // has taken up a lot of time handling the request before audit (response) - // is triggered. Pipeline nodes and the eventlogger.Broker may check for a - // cancelled context and refuse to process the nodes further. - ns, err := nshelper.FromContext(ctx) - if err != nil { - return fmt.Errorf("namespace missing from context: %w", err) - } - - tempContext, auditCancel := context.WithTimeout(context.Background(), timeout) - defer auditCancel() - auditContext = nshelper.ContextWithNamespace(tempContext, ns) - - b.logger.Trace("log response requires a derived context (original context was not viable)", "namespace ID", ns.ID, "namespace path", ns.Path, "timeout", timeout) + // Get a context to use for auditing. + auditContext, auditCancel, err := getAuditContext(ctx) + if err != nil { + return err } + defer auditCancel() var status eventlogger.Status if hasAuditPipelines(b.broker) { @@ -463,33 +429,19 @@ func (b *Broker) IsRegistered(name string) bool { return b.isRegisteredByName(name) } -// isContextViable examines the supplied context to see if its own deadline would -// occur later than a newly created context with a specific timeout. -// Additionally, whether the supplied context is already cancelled, thus making it -// unviable. -// If the existing context is viable it can be used 'as-is', if not, the caller -// should consider creating a new context with the relevant deadline and associated -// context values (e.g. namespace) in order to reduce the likelihood that the -// audit system believes there is a failure in audit (and updating its metrics) -// when the root cause is elsewhere. -func isContextViable(ctx context.Context) bool { - if ctx == nil { - return false - } - - select { - case <-ctx.Done(): - return false - default: +// getAuditContext extracts the namespace from the specified context and returns +// a new context and cancelation function, completely detached from the original +// with a timeout. +// NOTE: The context.CancelFunc returned from this function should be deferred +// immediately by the caller to prevent resource leaks. +func getAuditContext(ctx context.Context) (context.Context, context.CancelFunc, error) { + ns, err := nshelper.FromContext(ctx) + if err != nil { + return nil, nil, fmt.Errorf("namespace missing from context: %w", err) } - deadline, hasDeadline := ctx.Deadline() - - // If there's no deadline on the context then we don't need to worry about - // it being cancelled due to a timeout. - if !hasDeadline { - return true - } + tempContext := nshelper.ContextWithNamespace(context.Background(), ns) + auditContext, auditCancel := context.WithTimeout(tempContext, timeout) - return deadline.After(time.Now().Add(timeout)) + return auditContext, auditCancel, nil } diff --git a/audit/broker_test.go b/audit/broker_test.go index 1fd6d545771c..a7bdc960d128 100644 --- a/audit/broker_test.go +++ b/audit/broker_test.go @@ -159,54 +159,51 @@ func BenchmarkAuditBroker_File_Request_DevNull(b *testing.B) { }) } -// TestBroker_isContextViable_basics checks the expected result of isContextViable -// for basic inputs such as nil, cancelled context and a never-ending context. -func TestBroker_isContextViable_basics(t *testing.T) { +// TestBroker_getAuditContext_NoNamespace checks that we get the right error when +// trying to get an audit context with no namespace. +func TestBroker_getAuditContext_NoNamespace(t *testing.T) { t.Parallel() - require.False(t, isContextViable(nil)) - ctx, cancel := context.WithCancel(context.Background()) - cancel() - require.False(t, isContextViable(ctx)) - require.True(t, isContextViable(context.Background())) + _, _, err := getAuditContext(context.Background()) + require.Error(t, err) + require.EqualError(t, err, "namespace missing from context: no namespace") } -// TestBroker_isContextViable_timeouts checks the expected result of isContextViable -// for various timeout durations. -func TestBroker_isContextViable_timeouts(t *testing.T) { +// TestBroker_getAuditContext checks that we get a context back which isn't linked +// to the original context, and contains our namespace. +func TestBroker_getAuditContext(t *testing.T) { t.Parallel() - tests := map[string]struct { - timeout time.Duration - expected bool - }{ - "2s-smaller-deadline": { - timeout: timeout - 2*time.Second, - expected: false, - }, - "same-deadline": { - timeout: timeout, - expected: false, // Expected as a near miss - }, - "same-deadline-plus": { - timeout: timeout + 100*time.Millisecond, - expected: true, - }, - "2x-longer-deadline": { - timeout: timeout * 2, - expected: true, - }, + // context with namespace + ns := &nshelper.Namespace{ + ID: "foo", + Path: "foo/", } - for name, tc := range tests { - name := name - tc := tc - t.Run(name, func(t *testing.T) { - t.Parallel() + // Create a context with a namespace. + originalContext, originalCancel := context.WithCancel(context.Background()) + t.Cleanup(originalCancel) + nsContext := nshelper.ContextWithNamespace(originalContext, ns) - ctx, cancel := context.WithTimeout(context.Background(), tc.timeout) - t.Cleanup(func() { cancel() }) - require.Equal(t, tc.expected, isContextViable(ctx)) - }) - } + // Get the audit context + auditContext, auditCancel, err := getAuditContext(nsContext) + t.Cleanup(auditCancel) + + require.NoError(t, err) + require.NotNil(t, auditContext) + require.NotNil(t, auditCancel) + + // Ensure the namespace is there too. + val, err := nshelper.FromContext(auditContext) + require.NoError(t, err) + require.Equal(t, ns, val) + + // Now cancel the original context and ensure it is done but audit context isn't. + originalCancel() + require.NotNil(t, originalContext.Err()) + require.Nil(t, auditContext.Err()) + + // Now cancel the audit context and ensure that it is done. + auditCancel() + require.NotNil(t, auditContext.Err()) } From 3bb65977cd6e596f3f543b9bf77aea7cd67af052 Mon Sep 17 00:00:00 2001 From: Peter Wilson Date: Thu, 5 Sep 2024 12:56:42 +0100 Subject: [PATCH 2/3] improve go doc --- audit/broker.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/audit/broker.go b/audit/broker.go index 13845267362e..1f9a859d3881 100644 --- a/audit/broker.go +++ b/audit/broker.go @@ -432,8 +432,8 @@ func (b *Broker) IsRegistered(name string) bool { // getAuditContext extracts the namespace from the specified context and returns // a new context and cancelation function, completely detached from the original // with a timeout. -// NOTE: The context.CancelFunc returned from this function should be deferred -// immediately by the caller to prevent resource leaks. +// NOTE: When error is nil, the context.CancelFunc returned from this function +// should be deferred immediately by the caller to prevent resource leaks. func getAuditContext(ctx context.Context) (context.Context, context.CancelFunc, error) { ns, err := nshelper.FromContext(ctx) if err != nil { From 1c1b55eb2e241c76aa3aba9843b46bd0a0897317 Mon Sep 17 00:00:00 2001 From: Peter Wilson Date: Thu, 5 Sep 2024 13:22:46 +0100 Subject: [PATCH 3/3] changelog --- changelog/28286.txt | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 changelog/28286.txt diff --git a/changelog/28286.txt b/changelog/28286.txt new file mode 100644 index 000000000000..c7010a199241 --- /dev/null +++ b/changelog/28286.txt @@ -0,0 +1,3 @@ +```release-note:improvement +audit: Internal implementation changes to the audit subsystem which improve relability. +``` \ No newline at end of file