From c5bcbc1eadd48de627484fb65795ec02c5fe63f9 Mon Sep 17 00:00:00 2001 From: Branden Clark Date: Wed, 25 Sep 2024 12:55:53 -0400 Subject: [PATCH] [E2E] Retry Agent MSI download (#29352) --- test/new-e2e/go.mod | 2 +- .../security_agent_test.go | 9 +------- .../sysprobe-functional/sysprobe_test.go | 9 +------- .../tests/windows/common/agent/agent.go | 14 ++++++++++- .../common/agent/agent_install_params.go | 14 ++++++++++- test/new-e2e/tests/windows/common/network.go | 23 ++++++++++++++++++- .../tests/windows/install-test/npm_test.go | 3 --- .../windows/install-test/upgrade_test.go | 3 --- 8 files changed, 51 insertions(+), 26 deletions(-) diff --git a/test/new-e2e/go.mod b/test/new-e2e/go.mod index 1f74f75f37a50..d8d8132c7a014 100644 --- a/test/new-e2e/go.mod +++ b/test/new-e2e/go.mod @@ -105,7 +105,7 @@ require ( github.com/aws/smithy-go v1.20.4 // indirect github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect github.com/blang/semver v3.5.1+incompatible // indirect - github.com/cenkalti/backoff/v4 v4.3.0 // indirect + github.com/cenkalti/backoff/v4 v4.3.0 github.com/chai2010/gettext-go v1.0.2 // indirect github.com/charmbracelet/bubbles v0.18.0 // indirect github.com/charmbracelet/bubbletea v0.25.0 // indirect diff --git a/test/new-e2e/tests/security-agent-functional/security_agent_test.go b/test/new-e2e/tests/security-agent-functional/security_agent_test.go index 6606c33a6d600..94da861eb7fa6 100644 --- a/test/new-e2e/tests/security-agent-functional/security_agent_test.go +++ b/test/new-e2e/tests/security-agent-functional/security_agent_test.go @@ -21,7 +21,6 @@ import ( "github.com/DataDog/datadog-agent/test/new-e2e/pkg/environments" awshost "github.com/DataDog/datadog-agent/test/new-e2e/pkg/environments/aws/host" "github.com/DataDog/datadog-agent/test/new-e2e/tests/windows" - windowsCommon "github.com/DataDog/datadog-agent/test/new-e2e/tests/windows/common" windowsAgent "github.com/DataDog/datadog-agent/test/new-e2e/tests/windows/common/agent" ) @@ -76,13 +75,7 @@ func (v *vmSuite) TestSystemProbeCWSSuite() { // install the agent (just so we can get the driver(s) installed) agentPackage, err := windowsAgent.GetPackageFromEnv() require.NoError(t, err) - remoteMSIPath, err := windowsCommon.GetTemporaryFile(vm) - require.NoError(t, err) - t.Logf("Getting install package %s...", agentPackage.URL) - err = windowsCommon.PutOrDownloadFile(vm, agentPackage.URL, remoteMSIPath) - require.NoError(t, err) - - err = windowsCommon.InstallMSI(vm, remoteMSIPath, "", "") + _, err = windowsAgent.InstallAgent(vm, windowsAgent.WithPackage(agentPackage)) t.Log("Install complete") require.NoError(t, err) diff --git a/test/new-e2e/tests/sysprobe-functional/sysprobe_test.go b/test/new-e2e/tests/sysprobe-functional/sysprobe_test.go index 190e2cfffa937..4e41c685bbd7d 100644 --- a/test/new-e2e/tests/sysprobe-functional/sysprobe_test.go +++ b/test/new-e2e/tests/sysprobe-functional/sysprobe_test.go @@ -18,7 +18,6 @@ import ( "github.com/DataDog/datadog-agent/test/new-e2e/pkg/environments" awshost "github.com/DataDog/datadog-agent/test/new-e2e/pkg/environments/aws/host" "github.com/DataDog/datadog-agent/test/new-e2e/tests/windows" - windowsCommon "github.com/DataDog/datadog-agent/test/new-e2e/tests/windows/common" windowsAgent "github.com/DataDog/datadog-agent/test/new-e2e/tests/windows/common/agent" componentsos "github.com/DataDog/test-infra-definitions/components/os" "github.com/DataDog/test-infra-definitions/scenarios/aws/ec2" @@ -106,13 +105,7 @@ func (v *vmSuite) TestSystemProbeNPMSuite() { // install the agent (just so we can get the driver(s) installed) agentPackage, err := windowsAgent.GetPackageFromEnv() require.NoError(t, err) - remoteMSIPath, err := windowsCommon.GetTemporaryFile(vm) - require.NoError(t, err) - t.Logf("Getting install package %s...", agentPackage.URL) - err = windowsCommon.PutOrDownloadFile(vm, agentPackage.URL, remoteMSIPath) - require.NoError(t, err) - - err = windowsCommon.InstallMSI(vm, remoteMSIPath, "", "") + _, err = windowsAgent.InstallAgent(vm, windowsAgent.WithPackage(agentPackage)) t.Log("Install complete") require.NoError(t, err) diff --git a/test/new-e2e/tests/windows/common/agent/agent.go b/test/new-e2e/tests/windows/common/agent/agent.go index 693fa93db26fd..8833fb0f3cb6e 100644 --- a/test/new-e2e/tests/windows/common/agent/agent.go +++ b/test/new-e2e/tests/windows/common/agent/agent.go @@ -12,6 +12,7 @@ import ( "path/filepath" "strings" "testing" + "time" "github.com/DataDog/datadog-agent/pkg/version" "github.com/DataDog/datadog-agent/test/new-e2e/pkg/components" @@ -20,6 +21,7 @@ import ( windowsCommon "github.com/DataDog/datadog-agent/test/new-e2e/tests/windows/common" infraCommon "github.com/DataDog/test-infra-definitions/common" + "github.com/cenkalti/backoff/v4" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -72,13 +74,23 @@ func InstallAgent(host *components.RemoteHost, options ...InstallAgentOption) (s p.LocalInstallLogFile = filepath.Join(os.TempDir(), "install.log") } + downloadBackOff := p.DownloadMSIBackOff + if downloadBackOff == nil { + // 5s, 7s, 11s, 17s, 25s, 38s, 60s, 60s...for up to 5 minutes + downloadBackOff = backoff.NewExponentialBackOff( + backoff.WithInitialInterval(5*time.Second), + backoff.WithMaxInterval(60*time.Second), + backoff.WithMaxElapsedTime(5*time.Minute), + ) + } + args := p.toArgs() remoteMSIPath, err := windowsCommon.GetTemporaryFile(host) if err != nil { return "", err } - err = windowsCommon.PutOrDownloadFile(host, p.Package.URL, remoteMSIPath) + err = windowsCommon.PutOrDownloadFileWithRetry(host, p.Package.URL, remoteMSIPath, downloadBackOff) if err != nil { return "", err } diff --git a/test/new-e2e/tests/windows/common/agent/agent_install_params.go b/test/new-e2e/tests/windows/common/agent/agent_install_params.go index 51f4a0b951057..a4f37403d27c4 100644 --- a/test/new-e2e/tests/windows/common/agent/agent_install_params.go +++ b/test/new-e2e/tests/windows/common/agent/agent_install_params.go @@ -13,11 +13,15 @@ import ( "github.com/DataDog/datadog-agent/test/new-e2e/pkg/runner" "github.com/DataDog/datadog-agent/test/new-e2e/pkg/runner/parameters" "github.com/DataDog/test-infra-definitions/components/datadog/agentparams/msi" + + "github.com/cenkalti/backoff/v4" ) // InstallAgentParams are the parameters used for installing the Agent using msiexec. type InstallAgentParams struct { - Package *Package + Package *Package + DownloadMSIBackOff backoff.BackOff + // Path on local test runner to save the MSI install log LocalInstallLogFile string @@ -146,6 +150,14 @@ func WithLastStablePackage() InstallAgentOption { } } +// WithDownloadMSIBackoff specifies the backoff strategy for downloading the MSI. +func WithDownloadMSIBackoff(backoff backoff.BackOff) InstallAgentOption { + return func(i *InstallAgentParams) error { + i.DownloadMSIBackOff = backoff + return nil + } +} + // WithFakeIntake configures the Agent to use a fake intake URL. func WithFakeIntake(fakeIntake *components.FakeIntake) InstallAgentOption { return func(i *InstallAgentParams) error { diff --git a/test/new-e2e/tests/windows/common/network.go b/test/new-e2e/tests/windows/common/network.go index af4291c732008..2a8e98f0266ca 100644 --- a/test/new-e2e/tests/windows/common/network.go +++ b/test/new-e2e/tests/windows/common/network.go @@ -11,6 +11,8 @@ import ( "strings" "github.com/DataDog/datadog-agent/test/new-e2e/pkg/components" + + "github.com/cenkalti/backoff/v4" ) // BoundPort represents a port that is bound to a process @@ -80,8 +82,27 @@ func ListBoundPorts(host *components.RemoteHost) ([]*BoundPort, error) { // If the URL is a local file, it will be uploaded to the VM. // If the URL is a remote file, it will be downloaded from the VM func PutOrDownloadFile(host *components.RemoteHost, url string, destination string) error { + // no retry + return PutOrDownloadFileWithRetry(host, url, destination, &backoff.StopBackOff{}) +} + +// PutOrDownloadFileWithRetry is similar to PutOrDownloadFile but retries on download failure, +// local file copy is not retried. +func PutOrDownloadFileWithRetry(host *components.RemoteHost, url string, destination string, b backoff.BackOff) error { if strings.HasPrefix(url, "http://") || strings.HasPrefix(url, "https://") { - return DownloadFile(host, url, destination) + err := backoff.Retry(func() error { + return DownloadFile(host, url, destination) + // TODO: it would be neat to only retry on web related errors but + // we don't have a way to distinguish them since DownloadFile + // throws a WebException for non web related errors such as + // filename is null or Empty. + // https://learn.microsoft.com/en-us/dotnet/api/system.net.webclient.downloadfile + // example error: Exception calling "DownloadFile" with "2" argument(s): "The remote server returned an error: (503) + }, b) + if err != nil { + return err + } + return nil } if strings.HasPrefix(url, "file://") { diff --git a/test/new-e2e/tests/windows/install-test/npm_test.go b/test/new-e2e/tests/windows/install-test/npm_test.go index e3d5ff0b0d82e..cefec1f51b626 100644 --- a/test/new-e2e/tests/windows/install-test/npm_test.go +++ b/test/new-e2e/tests/windows/install-test/npm_test.go @@ -10,7 +10,6 @@ import ( "path/filepath" "time" - "github.com/DataDog/datadog-agent/pkg/util/testutil/flake" "github.com/DataDog/datadog-agent/test/new-e2e/pkg/components" windowsCommon "github.com/DataDog/datadog-agent/test/new-e2e/tests/windows/common" windowsAgent "github.com/DataDog/datadog-agent/test/new-e2e/tests/windows/common/agent" @@ -111,8 +110,6 @@ func (s *testNPMInstallWithAddLocalSuite) TestNPMInstallWithAddLocal() { // // Old name: Scenario 10 func TestNPMUpgradeFromBeta(t *testing.T) { - // incident-30584 - flake.Mark(t) s := &testNPMUpgradeFromBeta{} s.previousVersion = "7.23.2-beta1-1" s.url = "https://ddagent-windows-unstable.s3.amazonaws.com/datadog-agent-7.23.2-beta1-1-x86_64.msi" diff --git a/test/new-e2e/tests/windows/install-test/upgrade_test.go b/test/new-e2e/tests/windows/install-test/upgrade_test.go index d1f62be95aedc..e4c5677cf7198 100644 --- a/test/new-e2e/tests/windows/install-test/upgrade_test.go +++ b/test/new-e2e/tests/windows/install-test/upgrade_test.go @@ -11,7 +11,6 @@ import ( "slices" "strings" - "github.com/DataDog/datadog-agent/pkg/util/testutil/flake" windowsCommon "github.com/DataDog/datadog-agent/test/new-e2e/tests/windows/common" windowsAgent "github.com/DataDog/datadog-agent/test/new-e2e/tests/windows/common/agent" servicetest "github.com/DataDog/datadog-agent/test/new-e2e/tests/windows/install-test/service-test" @@ -24,8 +23,6 @@ import ( // TestUpgrade tests upgrading the agent from LAST_STABLE_VERSION to WINDOWS_AGENT_VERSION func TestUpgrade(t *testing.T) { - // incident-30584 - flake.Mark(t) s := &testUpgradeSuite{} previousAgentPackage, err := windowsAgent.GetLastStablePackageFromEnv() require.NoError(t, err, "should get last stable agent package from env")