Skip to content

Commit

Permalink
testutil/prometheus: fix start/stop race (#6854)
Browse files Browse the repository at this point in the history
* testutil/prometheus: fix start/stop race

Fix the following race between start and stop:

```
WARNING: DATA RACE
Read at 0x00c0001940a0 by goroutine 35:
  github.com/thanos-io/thanos/pkg/testutil/e2eutil.(*Prometheus).Stop()
      /home/giedrius/dev/thanos/pkg/testutil/e2eutil/prometheus.go:298 +0x94
  github.com/thanos-io/thanos/pkg/metadata.TestPrometheus_Metadata_e2e.func1()
      /home/giedrius/dev/thanos/pkg/metadata/prometheus_test.go:32 +0x33
  runtime.deferreturn()
      /usr/lib/go-1.21/src/runtime/panic.go:477 +0x30
  testing.tRunner()
      /usr/lib/go-1.21/src/testing/testing.go:1595 +0x238
  testing.(*T).Run.func1()
      /usr/lib/go-1.21/src/testing/testing.go:1648 +0x44

Previous write at 0x00c0001940a0 by goroutine 73:
  os/exec.(*Cmd).Start()
      /usr/lib/go-1.21/src/os/exec/exec.go:693 +0x977
  os/exec.(*Cmd).Run()
      /usr/lib/go-1.21/src/os/exec/exec.go:587 +0x26
  os/exec.(*Cmd).CombinedOutput()
      /usr/lib/go-1.21/src/os/exec/exec.go:1005 +0x1f6
  github.com/thanos-io/thanos/pkg/testutil/e2eutil.(*Prometheus).start.func1()
      /home/giedrius/dev/thanos/pkg/testutil/e2eutil/prometheus.go:226 +0x52
```

While fixing this I have also noticed some pretty bad code like
hardcoding durations in random time.Sleep. Also, some Start() calls were
without waiting for Prometheus to be up. Calling start but without
waiting for Prometheus to be up doesn't make much sense so I have taken
the liberty to refactor this place. Also, this should make tests faster
because we won't have random sleeps too.

Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>

* e2eutil: pass prefix too

Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>

---------

Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
  • Loading branch information
GiedriusS authored Oct 31, 2023
1 parent 429cfd6 commit 3e023b6
Show file tree
Hide file tree
Showing 8 changed files with 80 additions and 70 deletions.
20 changes: 9 additions & 11 deletions pkg/metadata/prometheus_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,14 @@ func TestPrometheus_Metadata_e2e(t *testing.T) {
testutil.Ok(t, err)
defer func() { testutil.Ok(t, p.Stop()) }()

ctx, cancel := context.WithCancel(context.Background())
defer cancel()

upctx, upcancel := context.WithTimeout(ctx, 10*time.Second)
defer upcancel()

logger := log.NewNopLogger()

p.SetConfig(fmt.Sprintf(`
global:
external_labels:
Expand All @@ -43,17 +51,7 @@ scrape_configs:
static_configs:
- targets: ['%s']
`, e2eutil.PromAddrPlaceHolder))
testutil.Ok(t, p.Start())

ctx, cancel := context.WithCancel(context.Background())
defer cancel()

upctx, upcancel := context.WithTimeout(ctx, 10*time.Second)
defer upcancel()

logger := log.NewNopLogger()
err = p.WaitPrometheusUp(upctx, logger)
testutil.Ok(t, err)
testutil.Ok(t, p.Start(upctx, logger))

u, err := url.Parse("http://" + p.Addr())
testutil.Ok(t, err)
Expand Down
13 changes: 7 additions & 6 deletions pkg/promclient/promclient_e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"testing"
"time"

"github.com/go-kit/log"
"github.com/oklog/ulid"
"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/config"
Expand All @@ -27,10 +28,10 @@ import (

func TestIsWALFileAccessible_e2e(t *testing.T) {
e2eutil.ForeachPrometheus(t, func(t testing.TB, p *e2eutil.Prometheus) {
testutil.Ok(t, p.Start())

ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
defer cancel()
testutil.Ok(t, p.Start(ctx, log.NewNopLogger()))

testutil.Ok(t, runutil.Retry(time.Second, ctx.Done(), func() error { return IsWALDirAccessible(p.Dir()) }))

testutil.NotOk(t, IsWALDirAccessible(path.Join(p.Dir(), "/non-existing")))
Expand All @@ -49,7 +50,7 @@ func TestExternalLabels_e2e(t *testing.T) {
testutil.Ok(t, err)
p.SetConfig(string(cfgData))

testutil.Ok(t, p.Start())
testutil.Ok(t, p.Start(context.Background(), log.NewNopLogger()))

u, err := url.Parse(fmt.Sprintf("http://%s", p.Addr()))
testutil.Ok(t, err)
Expand All @@ -65,7 +66,7 @@ func TestExternalLabels_e2e(t *testing.T) {

func TestConfiguredFlags_e2e(t *testing.T) {
e2eutil.ForeachPrometheus(t, func(t testing.TB, p *e2eutil.Prometheus) {
testutil.Ok(t, p.Start())
testutil.Ok(t, p.Start(context.Background(), log.NewNopLogger()))

u, err := url.Parse(fmt.Sprintf("http://%s", p.Addr()))
testutil.Ok(t, err)
Expand Down Expand Up @@ -101,7 +102,7 @@ func TestSnapshot_e2e(t *testing.T) {
)
testutil.Ok(t, err)

testutil.Ok(t, p.Start())
testutil.Ok(t, p.Start(context.Background(), log.NewNopLogger()))

u, err := url.Parse(fmt.Sprintf("http://%s", p.Addr()))
testutil.Ok(t, err)
Expand Down Expand Up @@ -172,7 +173,7 @@ func TestQueryRange_e2e(t *testing.T) {
)
testutil.Ok(t, err)

testutil.Ok(t, p.Start())
testutil.Ok(t, p.Start(context.Background(), log.NewNopLogger()))

u, err := url.Parse(fmt.Sprintf("http://%s", p.Addr()))
testutil.Ok(t, err)
Expand Down
4 changes: 3 additions & 1 deletion pkg/rules/prometheus_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@
package rules

import (
"context"
"fmt"
"net/url"
"os"
"path/filepath"
"testing"

"github.com/go-kit/log"
"github.com/prometheus/prometheus/model/labels"

"github.com/efficientgo/core/testutil"
Expand All @@ -35,7 +37,7 @@ rule_files:
- %s/examples/alerts/alerts.yaml
- %s/examples/alerts/rules.yaml
`, root, root))
testutil.Ok(t, p.Start())
testutil.Ok(t, p.Start(context.Background(), log.NewNopLogger()))

u, err := url.Parse(fmt.Sprintf("http://%s", p.Addr()))
testutil.Ok(t, err)
Expand Down
24 changes: 4 additions & 20 deletions pkg/shipper/shipper_e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -205,19 +205,11 @@ func TestShipper_SyncBlocksWithMigrating_e2e(t *testing.T) {

extLset := labels.FromStrings("prometheus", "prom-1")

testutil.Ok(t, p.Start())

logger := log.NewNopLogger()
upctx, upcancel := context.WithTimeout(ctx, 10*time.Second)
defer upcancel()
testutil.Ok(t, p.WaitPrometheusUp(upctx, logger))
testutil.Ok(t, p.Start(context.Background(), logger))

p.DisableCompaction()
testutil.Ok(t, p.Restart())

upctx2, upcancel2 := context.WithTimeout(ctx, 10*time.Second)
defer upcancel2()
testutil.Ok(t, p.WaitPrometheusUp(upctx2, logger))
testutil.Ok(t, p.Restart(context.Background(), logger))

uploadCompactedFunc := func() bool { return true }
shipper := New(log.NewLogfmtLogger(os.Stderr), nil, dir, bkt, func() labels.Labels { return extLset }, metadata.TestSource, uploadCompactedFunc, false, metadata.NoneFunc)
Expand Down Expand Up @@ -361,19 +353,11 @@ func TestShipper_SyncOverlapBlocks_e2e(t *testing.T) {

extLset := labels.FromStrings("prometheus", "prom-1")

testutil.Ok(t, p.Start())

logger := log.NewNopLogger()
upctx, upcancel := context.WithTimeout(ctx, 10*time.Second)
defer upcancel()
testutil.Ok(t, p.WaitPrometheusUp(upctx, logger))
testutil.Ok(t, p.Start(context.Background(), logger))

p.DisableCompaction()
testutil.Ok(t, p.Restart())

upctx2, upcancel2 := context.WithTimeout(ctx, 10*time.Second)
defer upcancel2()
testutil.Ok(t, p.WaitPrometheusUp(upctx2, logger))
testutil.Ok(t, p.Restart(context.Background(), logger))

uploadCompactedFunc := func() bool { return true }
// Here, the allowOutOfOrderUploads flag is set to true, which allows blocks with overlaps to be uploaded.
Expand Down
2 changes: 1 addition & 1 deletion pkg/store/acceptance_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -824,7 +824,7 @@ func TestPrometheusStore_Acceptance(t *testing.T) {

appendFn(p.Appender())

testutil.Ok(tt, p.Start())
testutil.Ok(tt, p.Start(context.Background(), log.NewNopLogger()))
u, err := url.Parse(fmt.Sprintf("http://%s", p.Addr()))
testutil.Ok(tt, err)

Expand Down
12 changes: 7 additions & 5 deletions pkg/store/prometheus_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,12 @@ import (
"fmt"
"math"
"net/url"
"os"
"testing"
"time"

"github.com/cespare/xxhash"
"github.com/go-kit/log"

"github.com/pkg/errors"
"github.com/prometheus/prometheus/model/labels"
Expand Down Expand Up @@ -62,7 +64,7 @@ func testPrometheusStoreSeriesE2e(t *testing.T, prefix string) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()

testutil.Ok(t, p.Start())
testutil.Ok(t, p.Start(ctx, log.NewLogfmtLogger(os.Stderr)))

u, err := url.Parse(fmt.Sprintf("http://%s", p.Addr()))
testutil.Ok(t, err)
Expand Down Expand Up @@ -224,7 +226,7 @@ func TestPrometheusStore_SeriesLabels_e2e(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()

testutil.Ok(t, p.Start())
testutil.Ok(t, p.Start(ctx, log.NewNopLogger()))

u, err := url.Parse(fmt.Sprintf("http://%s", p.Addr()))
testutil.Ok(t, err)
Expand Down Expand Up @@ -406,7 +408,7 @@ func TestPrometheusStore_Series_MatchExternalLabel(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()

testutil.Ok(t, p.Start())
testutil.Ok(t, p.Start(ctx, log.NewNopLogger()))

u, err := url.Parse(fmt.Sprintf("http://%s", p.Addr()))
testutil.Ok(t, err)
Expand Down Expand Up @@ -469,7 +471,7 @@ func TestPrometheusStore_Series_ChunkHashCalculation_Integration(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()

testutil.Ok(t, p.Start())
testutil.Ok(t, p.Start(ctx, log.NewNopLogger()))

u, err := url.Parse(fmt.Sprintf("http://%s", p.Addr()))
testutil.Ok(t, err)
Expand Down Expand Up @@ -578,7 +580,7 @@ func TestPrometheusStore_Series_SplitSamplesIntoChunksWithMaxSizeOf120(t *testin
defer func() { testutil.Ok(t, p.Stop()) }()

testSeries_SplitSamplesIntoChunksWithMaxSizeOf120(t, p.Appender(), func() storepb.StoreServer {
testutil.Ok(t, p.Start())
testutil.Ok(t, p.Start(context.Background(), log.NewNopLogger()))

u, err := url.Parse(fmt.Sprintf("http://%s", p.Addr()))
testutil.Ok(t, err)
Expand Down
10 changes: 2 additions & 8 deletions pkg/targets/prometheus_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,18 +45,12 @@ scrape_configs:
regex: '^.+:80$'
action: drop
`, e2eutil.PromAddrPlaceHolder))
testutil.Ok(t, p.Start())
logger := log.NewNopLogger()
testutil.Ok(t, p.Start(context.Background(), logger))

ctx, cancel := context.WithCancel(context.Background())
defer cancel()

upctx, upcancel := context.WithTimeout(ctx, 10*time.Second)
defer upcancel()

logger := log.NewNopLogger()
err = p.WaitPrometheusUp(upctx, logger)
testutil.Ok(t, err)

u, err := url.Parse("http://" + p.Addr())
testutil.Ok(t, err)

Expand Down
65 changes: 47 additions & 18 deletions pkg/testutil/e2eutil/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ type Prometheus struct {
addr string

config string

stdout, stderr bytes.Buffer
}

func NewTSDB() (*tsdb.DB, error) {
Expand Down Expand Up @@ -178,20 +180,24 @@ func newPrometheus(binPath, prefix string) (*Prometheus, error) {
}

// Start running the Prometheus instance and return.
func (p *Prometheus) Start() error {
func (p *Prometheus) Start(ctx context.Context, l log.Logger) error {
if p.running {
return errors.New("Already started")
}

if err := p.db.Close(); err != nil {
return err
}
return p.start()
if err := p.start(); err != nil {
return err
}
if err := p.waitPrometheusUp(ctx, l, p.prefix); err != nil {
return err
}
return nil
}

func (p *Prometheus) start() error {
p.running = true

port, err := FreePort()
if err != nil {
return err
Expand Down Expand Up @@ -222,23 +228,26 @@ func (p *Prometheus) start() error {
p.cmd = exec.Command(p.binPath, args...)
p.cmd.SysProcAttr = SysProcAttr()

go func() {
if b, err := p.cmd.CombinedOutput(); err != nil {
fmt.Fprintln(os.Stderr, "running Prometheus failed", err)
fmt.Fprintln(os.Stderr, string(b))
}
}()
time.Sleep(2 * time.Second)
p.stderr.Reset()
p.stdout.Reset()

p.cmd.Stdout = &p.stdout
p.cmd.Stderr = &p.stderr

if err := p.cmd.Start(); err != nil {
return fmt.Errorf("starting Prometheus failed: %w", err)
}

p.running = true
return nil
}

func (p *Prometheus) WaitPrometheusUp(ctx context.Context, logger log.Logger) error {
func (p *Prometheus) waitPrometheusUp(ctx context.Context, logger log.Logger, prefix string) error {
if !p.running {
return errors.New("method Start was not invoked.")
}
return runutil.Retry(time.Second, ctx.Done(), func() error {
r, err := http.Get(fmt.Sprintf("http://%s/-/ready", p.addr))
return runutil.RetryWithLog(logger, time.Second, ctx.Done(), func() error {
r, err := http.Get(fmt.Sprintf("http://%s%s/-/ready", p.addr, prefix))
if err != nil {
return err
}
Expand All @@ -251,12 +260,15 @@ func (p *Prometheus) WaitPrometheusUp(ctx context.Context, logger log.Logger) er
})
}

func (p *Prometheus) Restart() error {
func (p *Prometheus) Restart(ctx context.Context, l log.Logger) error {
if err := p.cmd.Process.Signal(syscall.SIGTERM); err != nil {
return errors.Wrap(err, "failed to kill Prometheus. Kill it manually")
}
_ = p.cmd.Wait()
return p.start()
if err := p.start(); err != nil {
return err
}
return p.waitPrometheusUp(ctx, l, p.prefix)
}

// Dir returns TSDB dir.
Expand Down Expand Up @@ -290,7 +302,7 @@ func (p *Prometheus) writeConfig(config string) (err error) {
}

// Stop terminates Prometheus and clean up its data directory.
func (p *Prometheus) Stop() error {
func (p *Prometheus) Stop() (rerr error) {
if !p.running {
return nil
}
Expand All @@ -299,8 +311,25 @@ func (p *Prometheus) Stop() error {
if err := p.cmd.Process.Signal(syscall.SIGTERM); err != nil {
return errors.Wrapf(err, "failed to Prometheus. Kill it manually and clean %s dir", p.db.Dir())
}

err := p.cmd.Wait()
if err != nil {
var exitErr *exec.ExitError
if errors.As(err, &exitErr) {
if exitErr.ExitCode() != -1 {
fmt.Fprintln(os.Stderr, "Prometheus exited with", exitErr.ExitCode())
fmt.Fprintln(os.Stderr, "stdout:\n", p.stdout.String(), "\nstderr:\n", p.stderr.String())
} else {
err = nil
}
}
}

if err != nil {
return fmt.Errorf("waiting for Prometheus to exit: %w", err)
}
}
time.Sleep(time.Second / 2)

return p.cleanup()
}

Expand Down

0 comments on commit 3e023b6

Please sign in to comment.