Skip to content

Commit

Permalink
backport of commit 322cb27 (#28292)
Browse files Browse the repository at this point in the history
Co-authored-by: Peter Wilson <peter.wilson@hashicorp.com>
  • Loading branch information
hc-github-team-secure-vault-core and Peter Wilson authored Sep 5, 2024
1 parent 3deeb38 commit b97074b
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 112 deletions.
94 changes: 23 additions & 71 deletions audit/broker.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import (

const (
// timeout is the duration which should be used for context related timeouts.
timeout = 5 * time.Second
timeout = 10 * time.Second
)

var (
Expand Down Expand Up @@ -277,29 +277,12 @@ func (b *Broker) LogRequest(ctx context.Context, in *logical.LogInput) (retErr e

e.Data = in

// Evaluate whether we need a new context for auditing.
var auditContext context.Context
if isContextViable(ctx) {
auditContext = ctx
} else {
// In cases where we are trying to audit the request, and the existing
// context is not viable due to a short deadline, we detach ourselves from
// the original context (keeping only the namespace).
// This is so that we get a fair run at writing audit entries if Vault
// has taken up a lot of time handling the request before audit (request)
// is triggered. Pipeline nodes and the eventlogger.Broker may check for a
// cancelled context and refuse to process the nodes further.
ns, err := nshelper.FromContext(ctx)
if err != nil {
return fmt.Errorf("namespace missing from context: %w", err)
}

tempContext, auditCancel := context.WithTimeout(context.Background(), timeout)
defer auditCancel()
auditContext = nshelper.ContextWithNamespace(tempContext, ns)

b.logger.Trace("log request requires a derived context (original context was not viable)", "namespace ID", ns.ID, "namespace path", ns.Path, "timeout", timeout)
// Get a context to use for auditing.
auditContext, auditCancel, err := getAuditContext(ctx)
if err != nil {
return err
}
defer auditCancel()

var status eventlogger.Status
if hasAuditPipelines(b.broker) {
Expand Down Expand Up @@ -361,29 +344,12 @@ func (b *Broker) LogResponse(ctx context.Context, in *logical.LogInput) (retErr

e.Data = in

// Evaluate whether we need a new context for auditing.
var auditContext context.Context
if isContextViable(ctx) {
auditContext = ctx
} else {
// In cases where we are trying to audit the response, and the existing
// context is not viable due to a short deadline, we detach ourselves from
// the original context (keeping only the namespace).
// This is so that we get a fair run at writing audit entries if Vault
// has taken up a lot of time handling the request before audit (response)
// is triggered. Pipeline nodes and the eventlogger.Broker may check for a
// cancelled context and refuse to process the nodes further.
ns, err := nshelper.FromContext(ctx)
if err != nil {
return fmt.Errorf("namespace missing from context: %w", err)
}

tempContext, auditCancel := context.WithTimeout(context.Background(), timeout)
defer auditCancel()
auditContext = nshelper.ContextWithNamespace(tempContext, ns)

b.logger.Trace("log response requires a derived context (original context was not viable)", "namespace ID", ns.ID, "namespace path", ns.Path, "timeout", timeout)
// Get a context to use for auditing.
auditContext, auditCancel, err := getAuditContext(ctx)
if err != nil {
return err
}
defer auditCancel()

var status eventlogger.Status
if hasAuditPipelines(b.broker) {
Expand Down Expand Up @@ -463,33 +429,19 @@ func (b *Broker) IsRegistered(name string) bool {
return b.isRegisteredByName(name)
}

// isContextViable examines the supplied context to see if its own deadline would
// occur later than a newly created context with a specific timeout.
// Additionally, whether the supplied context is already cancelled, thus making it
// unviable.
// If the existing context is viable it can be used 'as-is', if not, the caller
// should consider creating a new context with the relevant deadline and associated
// context values (e.g. namespace) in order to reduce the likelihood that the
// audit system believes there is a failure in audit (and updating its metrics)
// when the root cause is elsewhere.
func isContextViable(ctx context.Context) bool {
if ctx == nil {
return false
}

select {
case <-ctx.Done():
return false
default:
// getAuditContext extracts the namespace from the specified context and returns
// a new context and cancelation function, completely detached from the original
// with a timeout.
// NOTE: When error is nil, the context.CancelFunc returned from this function
// should be deferred immediately by the caller to prevent resource leaks.
func getAuditContext(ctx context.Context) (context.Context, context.CancelFunc, error) {
ns, err := nshelper.FromContext(ctx)
if err != nil {
return nil, nil, fmt.Errorf("namespace missing from context: %w", err)
}

deadline, hasDeadline := ctx.Deadline()

// If there's no deadline on the context then we don't need to worry about
// it being cancelled due to a timeout.
if !hasDeadline {
return true
}
tempContext := nshelper.ContextWithNamespace(context.Background(), ns)
auditContext, auditCancel := context.WithTimeout(tempContext, timeout)

return deadline.After(time.Now().Add(timeout))
return auditContext, auditCancel, nil
}
79 changes: 38 additions & 41 deletions audit/broker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,54 +159,51 @@ func BenchmarkAuditBroker_File_Request_DevNull(b *testing.B) {
})
}

// TestBroker_isContextViable_basics checks the expected result of isContextViable
// for basic inputs such as nil, cancelled context and a never-ending context.
func TestBroker_isContextViable_basics(t *testing.T) {
// TestBroker_getAuditContext_NoNamespace checks that we get the right error when
// trying to get an audit context with no namespace.
func TestBroker_getAuditContext_NoNamespace(t *testing.T) {
t.Parallel()

require.False(t, isContextViable(nil))
ctx, cancel := context.WithCancel(context.Background())
cancel()
require.False(t, isContextViable(ctx))
require.True(t, isContextViable(context.Background()))
_, _, err := getAuditContext(context.Background())
require.Error(t, err)
require.EqualError(t, err, "namespace missing from context: no namespace")
}

// TestBroker_isContextViable_timeouts checks the expected result of isContextViable
// for various timeout durations.
func TestBroker_isContextViable_timeouts(t *testing.T) {
// TestBroker_getAuditContext checks that we get a context back which isn't linked
// to the original context, and contains our namespace.
func TestBroker_getAuditContext(t *testing.T) {
t.Parallel()

tests := map[string]struct {
timeout time.Duration
expected bool
}{
"2s-smaller-deadline": {
timeout: timeout - 2*time.Second,
expected: false,
},
"same-deadline": {
timeout: timeout,
expected: false, // Expected as a near miss
},
"same-deadline-plus": {
timeout: timeout + 100*time.Millisecond,
expected: true,
},
"2x-longer-deadline": {
timeout: timeout * 2,
expected: true,
},
// context with namespace
ns := &nshelper.Namespace{
ID: "foo",
Path: "foo/",
}

for name, tc := range tests {
name := name
tc := tc
t.Run(name, func(t *testing.T) {
t.Parallel()
// Create a context with a namespace.
originalContext, originalCancel := context.WithCancel(context.Background())
t.Cleanup(originalCancel)
nsContext := nshelper.ContextWithNamespace(originalContext, ns)

ctx, cancel := context.WithTimeout(context.Background(), tc.timeout)
t.Cleanup(func() { cancel() })
require.Equal(t, tc.expected, isContextViable(ctx))
})
}
// Get the audit context
auditContext, auditCancel, err := getAuditContext(nsContext)
t.Cleanup(auditCancel)

require.NoError(t, err)
require.NotNil(t, auditContext)
require.NotNil(t, auditCancel)

// Ensure the namespace is there too.
val, err := nshelper.FromContext(auditContext)
require.NoError(t, err)
require.Equal(t, ns, val)

// Now cancel the original context and ensure it is done but audit context isn't.
originalCancel()
require.NotNil(t, originalContext.Err())
require.Nil(t, auditContext.Err())

// Now cancel the audit context and ensure that it is done.
auditCancel()
require.NotNil(t, auditContext.Err())
}
3 changes: 3 additions & 0 deletions changelog/28286.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:improvement
audit: Internal implementation changes to the audit subsystem which improve relability.
```

0 comments on commit b97074b

Please sign in to comment.