Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[E2E] Retry Agent MSI download #29352

Merged
merged 6 commits into from
Sep 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion test/new-e2e/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ require (
github.com/aws/smithy-go v1.20.4 // indirect
github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect
github.com/blang/semver v3.5.1+incompatible // indirect
github.com/cenkalti/backoff/v4 v4.3.0 // indirect
github.com/cenkalti/backoff/v4 v4.3.0
github.com/chai2010/gettext-go v1.0.2 // indirect
github.com/charmbracelet/bubbles v0.18.0 // indirect
github.com/charmbracelet/bubbletea v0.25.0 // indirect
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ import (
"github.com/DataDog/datadog-agent/test/new-e2e/pkg/environments"
awshost "github.com/DataDog/datadog-agent/test/new-e2e/pkg/environments/aws/host"
"github.com/DataDog/datadog-agent/test/new-e2e/tests/windows"
windowsCommon "github.com/DataDog/datadog-agent/test/new-e2e/tests/windows/common"
windowsAgent "github.com/DataDog/datadog-agent/test/new-e2e/tests/windows/common/agent"
)

Expand Down Expand Up @@ -76,13 +75,7 @@ func (v *vmSuite) TestSystemProbeCWSSuite() {
// install the agent (just so we can get the driver(s) installed)
agentPackage, err := windowsAgent.GetPackageFromEnv()
require.NoError(t, err)
remoteMSIPath, err := windowsCommon.GetTemporaryFile(vm)
require.NoError(t, err)
t.Logf("Getting install package %s...", agentPackage.URL)
err = windowsCommon.PutOrDownloadFile(vm, agentPackage.URL, remoteMSIPath)
require.NoError(t, err)

err = windowsCommon.InstallMSI(vm, remoteMSIPath, "", "")
_, err = windowsAgent.InstallAgent(vm, windowsAgent.WithPackage(agentPackage))
t.Log("Install complete")
require.NoError(t, err)

Expand Down
9 changes: 1 addition & 8 deletions test/new-e2e/tests/sysprobe-functional/sysprobe_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ import (
"github.com/DataDog/datadog-agent/test/new-e2e/pkg/environments"
awshost "github.com/DataDog/datadog-agent/test/new-e2e/pkg/environments/aws/host"
"github.com/DataDog/datadog-agent/test/new-e2e/tests/windows"
windowsCommon "github.com/DataDog/datadog-agent/test/new-e2e/tests/windows/common"
windowsAgent "github.com/DataDog/datadog-agent/test/new-e2e/tests/windows/common/agent"
componentsos "github.com/DataDog/test-infra-definitions/components/os"
"github.com/DataDog/test-infra-definitions/scenarios/aws/ec2"
Expand Down Expand Up @@ -106,13 +105,7 @@ func (v *vmSuite) TestSystemProbeNPMSuite() {
// install the agent (just so we can get the driver(s) installed)
agentPackage, err := windowsAgent.GetPackageFromEnv()
require.NoError(t, err)
remoteMSIPath, err := windowsCommon.GetTemporaryFile(vm)
require.NoError(t, err)
t.Logf("Getting install package %s...", agentPackage.URL)
err = windowsCommon.PutOrDownloadFile(vm, agentPackage.URL, remoteMSIPath)
require.NoError(t, err)

err = windowsCommon.InstallMSI(vm, remoteMSIPath, "", "")
_, err = windowsAgent.InstallAgent(vm, windowsAgent.WithPackage(agentPackage))
t.Log("Install complete")
require.NoError(t, err)

Expand Down
14 changes: 13 additions & 1 deletion test/new-e2e/tests/windows/common/agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"path/filepath"
"strings"
"testing"
"time"

"github.com/DataDog/datadog-agent/pkg/version"
"github.com/DataDog/datadog-agent/test/new-e2e/pkg/components"
Expand All @@ -20,6 +21,7 @@ import (
windowsCommon "github.com/DataDog/datadog-agent/test/new-e2e/tests/windows/common"
infraCommon "github.com/DataDog/test-infra-definitions/common"

"github.com/cenkalti/backoff/v4"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
Expand Down Expand Up @@ -72,13 +74,23 @@ func InstallAgent(host *components.RemoteHost, options ...InstallAgentOption) (s
p.LocalInstallLogFile = filepath.Join(os.TempDir(), "install.log")
}

downloadBackOff := p.DownloadMSIBackOff
if downloadBackOff == nil {
// 5s, 7s, 11s, 17s, 25s, 38s, 60s, 60s...for up to 5 minutes
downloadBackOff = backoff.NewExponentialBackOff(
backoff.WithInitialInterval(5*time.Second),
backoff.WithMaxInterval(60*time.Second),
backoff.WithMaxElapsedTime(5*time.Minute),
)
}

args := p.toArgs()

remoteMSIPath, err := windowsCommon.GetTemporaryFile(host)
if err != nil {
return "", err
}
err = windowsCommon.PutOrDownloadFile(host, p.Package.URL, remoteMSIPath)
err = windowsCommon.PutOrDownloadFileWithRetry(host, p.Package.URL, remoteMSIPath, downloadBackOff)
if err != nil {
return "", err
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,15 @@ import (
"github.com/DataDog/datadog-agent/test/new-e2e/pkg/runner"
"github.com/DataDog/datadog-agent/test/new-e2e/pkg/runner/parameters"
"github.com/DataDog/test-infra-definitions/components/datadog/agentparams/msi"

"github.com/cenkalti/backoff/v4"
)

// InstallAgentParams are the parameters used for installing the Agent using msiexec.
type InstallAgentParams struct {
Package *Package
Package *Package
DownloadMSIBackOff backoff.BackOff

// Path on local test runner to save the MSI install log
LocalInstallLogFile string

Expand Down Expand Up @@ -146,6 +150,14 @@ func WithLastStablePackage() InstallAgentOption {
}
}

// WithDownloadMSIBackoff specifies the backoff strategy for downloading the MSI.
func WithDownloadMSIBackoff(backoff backoff.BackOff) InstallAgentOption {
return func(i *InstallAgentParams) error {
i.DownloadMSIBackOff = backoff
return nil
}
}

// WithFakeIntake configures the Agent to use a fake intake URL.
func WithFakeIntake(fakeIntake *components.FakeIntake) InstallAgentOption {
return func(i *InstallAgentParams) error {
Expand Down
23 changes: 22 additions & 1 deletion test/new-e2e/tests/windows/common/network.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ import (
"strings"

"github.com/DataDog/datadog-agent/test/new-e2e/pkg/components"

"github.com/cenkalti/backoff/v4"
)

// BoundPort represents a port that is bound to a process
Expand Down Expand Up @@ -80,8 +82,27 @@ func ListBoundPorts(host *components.RemoteHost) ([]*BoundPort, error) {
// If the URL is a local file, it will be uploaded to the VM.
// If the URL is a remote file, it will be downloaded from the VM
func PutOrDownloadFile(host *components.RemoteHost, url string, destination string) error {
// no retry
return PutOrDownloadFileWithRetry(host, url, destination, &backoff.StopBackOff{})
}

// PutOrDownloadFileWithRetry is similar to PutOrDownloadFile but retries on download failure,
// local file copy is not retried.
func PutOrDownloadFileWithRetry(host *components.RemoteHost, url string, destination string, b backoff.BackOff) error {
if strings.HasPrefix(url, "http://") || strings.HasPrefix(url, "https://") {
return DownloadFile(host, url, destination)
err := backoff.Retry(func() error {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

❓ question
Would it be possible to pass a testing.T context in the signature, so that we could use testify.Eventually instead of backoff? We had plans to remove backoff from test/new-e2e as it is all code uses in tests, makes sense when possible to use testing features.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm trying to avoid testify.Eventually in code that isn't specifically a test due to the issues we've encountered with it. Have those been resolved, yet?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Which issue did you encounter?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

testify.Eventually waits for the delay interval once before calling the condition function.

Copy link
Contributor

@pducolin pducolin Sep 17, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The PR that checks the condition before running the delayed loop is still open. Is this a blocker in this case? In other words, do you expect for the condition to be possibly true at time 0?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the download succeeding the first time is the 99% case yes.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about reducing the interval to 1 second, so worst case, while the feature is not integrated, you lose 1 second? I am trying to remove backoff from test/new-e2e to have one golden path for handling retries, if this is urgent we can revisit in the near future, but I'd rather lose 1 second at install time while testify fixes this than having 2 different ways to retry in the code, which can be misleading.

return DownloadFile(host, url, destination)
// TODO: it would be neat to only retry on web related errors but
// we don't have a way to distinguish them since DownloadFile
// throws a WebException for non web related errors such as
// filename is null or Empty.
// https://learn.microsoft.com/en-us/dotnet/api/system.net.webclient.downloadfile
// example error: Exception calling "DownloadFile" with "2" argument(s): "The remote server returned an error: (503)
}, b)
if err != nil {
return err
}
return nil
}

if strings.HasPrefix(url, "file://") {
Expand Down
3 changes: 0 additions & 3 deletions test/new-e2e/tests/windows/install-test/npm_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import (
"path/filepath"
"time"

"github.com/DataDog/datadog-agent/pkg/util/testutil/flake"
"github.com/DataDog/datadog-agent/test/new-e2e/pkg/components"
windowsCommon "github.com/DataDog/datadog-agent/test/new-e2e/tests/windows/common"
windowsAgent "github.com/DataDog/datadog-agent/test/new-e2e/tests/windows/common/agent"
Expand Down Expand Up @@ -111,8 +110,6 @@ func (s *testNPMInstallWithAddLocalSuite) TestNPMInstallWithAddLocal() {
//
// Old name: Scenario 10
func TestNPMUpgradeFromBeta(t *testing.T) {
// incident-30584
flake.Mark(t)
s := &testNPMUpgradeFromBeta{}
s.previousVersion = "7.23.2-beta1-1"
s.url = "https://ddagent-windows-unstable.s3.amazonaws.com/datadog-agent-7.23.2-beta1-1-x86_64.msi"
Expand Down
3 changes: 0 additions & 3 deletions test/new-e2e/tests/windows/install-test/upgrade_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import (
"slices"
"strings"

"github.com/DataDog/datadog-agent/pkg/util/testutil/flake"
windowsCommon "github.com/DataDog/datadog-agent/test/new-e2e/tests/windows/common"
windowsAgent "github.com/DataDog/datadog-agent/test/new-e2e/tests/windows/common/agent"
servicetest "github.com/DataDog/datadog-agent/test/new-e2e/tests/windows/install-test/service-test"
Expand All @@ -24,8 +23,6 @@ import (

// TestUpgrade tests upgrading the agent from LAST_STABLE_VERSION to WINDOWS_AGENT_VERSION
func TestUpgrade(t *testing.T) {
// incident-30584
flake.Mark(t)
s := &testUpgradeSuite{}
previousAgentPackage, err := windowsAgent.GetLastStablePackageFromEnv()
require.NoError(t, err, "should get last stable agent package from env")
Expand Down
Loading