Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

tests/robustness: init with powerfailure case #622

Merged
merged 1 commit into from
Dec 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .github/workflows/failpoint_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,5 @@ jobs:
with:
go-version: ${{ steps.goversion.outputs.goversion }}
- run: |
sudo make root-test
make gofail-enable
make test-failpoint
18 changes: 18 additions & 0 deletions .github/workflows/robustness_test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
name: Robustness Test
on: [push, pull_request]
permissions: read-all
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- id: goversion
run: echo "goversion=$(cat .go-version)" >> "$GITHUB_OUTPUT"
- uses: actions/setup-go@v4
with:
go-version: ${{ steps.goversion.outputs.goversion }}
- run: |
make gofail-enable
# build bbolt with failpoint
go install ./cmd/bbolt
sudo -E PATH=$PATH make test-robustness
5 changes: 3 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ test-failpoint:
@echo "[failpoint] array freelist test"
BBOLT_VERIFY=all TEST_FREELIST_TYPE=array go test -v ${TESTFLAGS} -timeout 30m ./tests/failpoint

.PHONY: root-test # run tests that require root
root-test:
.PHONY: test-robustness # Running robustness tests requires root permission
test-robustness:
go test -v ${TESTFLAGS} ./tests/dmflakey -test.root
go test -v ${TESTFLAGS} ./tests/robustness -test.root
22 changes: 3 additions & 19 deletions tests/dmflakey/dmflakey_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,35 +12,19 @@ import (
"testing"
"time"

testutils "go.etcd.io/bbolt/tests/utils"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"golang.org/x/sys/unix"
)

var enableRoot bool

func init() {
flag.BoolVar(&enableRoot, "test.root", false, "enable tests that require root")
}

func TestMain(m *testing.M) {
flag.Parse()
requiresRoot()
testutils.RequiresRoot()
os.Exit(m.Run())
}

func requiresRoot() {
if !enableRoot {
fmt.Fprintln(os.Stderr, "Skip tests that require root")
os.Exit(0)
}

if os.Getuid() != 0 {
fmt.Fprintln(os.Stderr, "This test must be run as root.")
os.Exit(1)
}
}

func TestBasic(t *testing.T) {
tmpDir := t.TempDir()

Expand Down
17 changes: 17 additions & 0 deletions tests/robustness/main_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
//go:build linux

package robustness

import (
"flag"
"os"
"testing"

testutils "go.etcd.io/bbolt/tests/utils"
)

func TestMain(m *testing.M) {
flag.Parse()
testutils.RequiresRoot()
os.Exit(m.Run())
}
194 changes: 194 additions & 0 deletions tests/robustness/powerfailure_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
//go:build linux

package robustness

import (
"bytes"
"fmt"
"io"
"net/http"
"net/url"
"os"
"os/exec"
"path"
"path/filepath"
"strings"
"testing"
"time"

"go.etcd.io/bbolt/tests/dmflakey"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"golang.org/x/sys/unix"
)

// TestRestartFromPowerFailure is to test data after unexpected power failure.
func TestRestartFromPowerFailure(t *testing.T) {
flakey := initFlakeyDevice(t, t.Name(), dmflakey.FSTypeEXT4, "")
root := flakey.RootFS()

dbPath := filepath.Join(root, "boltdb")

args := []string{"bbolt", "bench",
"-work", // keep the database
"-path", dbPath,
"-count=1000000000",
"-batch-size=5", // separate total count into multiple truncation
}

logPath := filepath.Join(t.TempDir(), fmt.Sprintf("%s.log", t.Name()))
logFd, err := os.Create(logPath)
require.NoError(t, err)
defer logFd.Close()

fpURL := "127.0.0.1:12345"

cmd := exec.Command(args[0], args[1:]...)
cmd.Stdout = logFd
cmd.Stderr = logFd
cmd.Env = append(cmd.Env, "GOFAIL_HTTP="+fpURL)
t.Logf("start %s", strings.Join(args, " "))
require.NoError(t, cmd.Start(), "args: %v", args)

errCh := make(chan error, 1)
go func() {
errCh <- cmd.Wait()
}()

defer func() {
if t.Failed() {
logData, err := os.ReadFile(logPath)
assert.NoError(t, err)
t.Logf("dump log:\n: %s", string(logData))
}
}()

time.Sleep(time.Duration(time.Now().UnixNano()%5+1) * time.Second)
t.Logf("simulate power failure")

activeFailpoint(t, fpURL, "beforeSyncMetaPage", "panic")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am thinking we should also support forcibly killing the process so that the process can exit at a random point?

This can be resolved in a followup PR.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah. I am thinking about introducing random panic including force-kill. Let me handle this in the follow-up. Thanks.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this test, you inject the failure on device (fs) after the process already terminates. Should we inject the failure (dropWrite) before we terminate(panic) the process?

For the forcibly killing case (we will support it in a followup PR), we do need to inject the failure on device (fs) after the process already terminates.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Discuss with @fuweid , let's support more cases in followup PRs

Sync times:
t1       t2          t3               x
FS
   f1   f2     f3               f4

if f4 < x
     f4 ~ x

if f4 > x
     t3 ~ x

Use gofailpoint

  • Set a huge value for commit interval: make sure all data after the last sync is lost
  • Set proper value for commit interval: make sure part of the data since last sync is lost
  • Set very small value for commit interval: almost no data loss

forcibly killing the process

  • same as above to support different commit interval


select {
case <-time.After(10 * time.Second):
t.Error("bbolt should stop with panic in seconds")
assert.NoError(t, cmd.Process.Kill())
case err := <-errCh:
require.Error(t, err)
}
require.NoError(t, flakey.PowerFailure(""))

st, err := os.Stat(dbPath)
require.NoError(t, err)
t.Logf("db size: %d", st.Size())

t.Logf("verify data")
output, err := exec.Command("bbolt", "check", dbPath).CombinedOutput()
require.NoError(t, err, "bbolt check output: %s", string(output))
}

// activeFailpoint actives the failpoint by http.
func activeFailpoint(t *testing.T, targetUrl string, fpName, fpVal string) {
u, err := url.Parse("http://" + path.Join(targetUrl, fpName))
require.NoError(t, err, "parse url %s", targetUrl)

req, err := http.NewRequest("PUT", u.String(), bytes.NewBuffer([]byte(fpVal)))
require.NoError(t, err)

resp, err := http.DefaultClient.Do(req)
require.NoError(t, err)
defer resp.Body.Close()

data, err := io.ReadAll(resp.Body)
require.NoError(t, err)
require.Equal(t, 204, resp.StatusCode, "response body: %s", string(data))
}

// FlakeyDevice extends dmflakey.Flakey interface.
type FlakeyDevice interface {
// RootFS returns root filesystem.
RootFS() string

// PowerFailure simulates power failure with drop all the writes.
PowerFailure(mntOpt string) error

dmflakey.Flakey
}

// initFlakeyDevice returns FlakeyDevice instance with a given filesystem.
func initFlakeyDevice(t *testing.T, name string, fsType dmflakey.FSType, mntOpt string) FlakeyDevice {
imgDir := t.TempDir()

flakey, err := dmflakey.InitFlakey(name, imgDir, fsType)
require.NoError(t, err, "init flakey %s", name)
t.Cleanup(func() {
assert.NoError(t, flakey.Teardown())
})

rootDir := t.TempDir()
err = unix.Mount(flakey.DevicePath(), rootDir, string(fsType), 0, mntOpt)
require.NoError(t, err, "init rootfs on %s", rootDir)

t.Cleanup(func() { assert.NoError(t, unmountAll(rootDir)) })

return &flakeyT{
Flakey: flakey,

rootDir: rootDir,
mntOpt: mntOpt,
}
}

type flakeyT struct {
dmflakey.Flakey

rootDir string
mntOpt string
}

// RootFS returns root filesystem.
func (f *flakeyT) RootFS() string {
return f.rootDir
}

// PowerFailure simulates power failure with drop all the writes.
func (f *flakeyT) PowerFailure(mntOpt string) error {
if err := f.DropWrites(); err != nil {
return fmt.Errorf("failed to drop_writes: %w", err)
}

if err := unmountAll(f.rootDir); err != nil {
return fmt.Errorf("failed to unmount rootfs %s: %w", f.rootDir, err)
}

if mntOpt == "" {
mntOpt = f.mntOpt
}

if err := f.AllowWrites(); err != nil {
return fmt.Errorf("failed to allow_writes: %w", err)
}

if err := unix.Mount(f.DevicePath(), f.rootDir, string(f.Filesystem()), 0, mntOpt); err != nil {
return fmt.Errorf("failed to mount rootfs %s: %w", f.rootDir, err)
}
return nil
}

func unmountAll(target string) error {
for i := 0; i < 50; i++ {
if err := unix.Unmount(target, 0); err != nil {
switch err {
case unix.EBUSY:
time.Sleep(500 * time.Millisecond)
continue
case unix.EINVAL:
return nil
default:
return fmt.Errorf("failed to umount %s: %w", target, err)
}
}
continue
}
return fmt.Errorf("failed to umount %s: %w", target, unix.EBUSY)
}
26 changes: 26 additions & 0 deletions tests/utils/helpers.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package utils

import (
"flag"
"fmt"
"os"
)

var enableRoot bool

func init() {
flag.BoolVar(&enableRoot, "test.root", false, "enable tests that require root")
}

// RequiresRoot requires root and the test.root flag has been set.
func RequiresRoot() {
if !enableRoot {
fmt.Fprintln(os.Stderr, "Skip tests that require root")
os.Exit(0)
}

if os.Getuid() != 0 {
fmt.Fprintln(os.Stderr, "This test must be run as root.")
os.Exit(1)
}
}
Loading