Skip to content
This repository has been archived by the owner on Jul 24, 2024. It is now read-only.

task: allow start pprof at runtime by unix signal #372

Merged
merged 19 commits into from
Jun 30, 2020
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 5 additions & 13 deletions cmd/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@ package cmd

import (
"context"
"net/http"
"net/http/pprof"
"os"
"path/filepath"
"sync"
Expand All @@ -16,7 +14,6 @@ import (
"github.com/pingcap/tidb/util/logutil"
"github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"go.uber.org/zap"

"github.com/pingcap/br/pkg/gluetidb"
"github.com/pingcap/br/pkg/summary"
Expand Down Expand Up @@ -134,16 +131,11 @@ func Init(cmd *cobra.Command) (err error) {
err = e
return
}
go func() {
// Make sure pprof is registered.
_ = pprof.Handler
if len(statusAddr) != 0 {
log.Info("start pprof", zap.String("addr", statusAddr))
if e := http.ListenAndServe(statusAddr, nil); e != nil {
log.Warn("fail to start pprof", zap.String("addr", statusAddr), zap.Error(e))
}
}
}()
if statusAddr != "" {
utils.StartPProfListener(statusAddr)
} else {
utils.StartDynamicPProfListener()
}
})
return err
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/task/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -583,7 +583,7 @@ func restoreTableStream(
errCh <- err
return
}
log.Debug("get rules", zap.Any("rules", rules))
log.Debug("get rules", zap.Any("rules", rules), zap.Strings("pd", pdAddr))
log.Debug("try to remove tiflash of table", zap.Stringer("table name", t.Table.Name))
tiFlashRep, err := client.RemoveTiFlashOfTable(t.CreatedTable, rules)
if err != nil {
Expand Down
9 changes: 9 additions & 0 deletions pkg/utils/dyn_pprof_other.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
// +build !linux,!darwin,!freebsd,!unix
// Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0.

package utils

// StartDynamicPProfListener starts the listener that will enable pprof when received `startPProfSignal`
func StartDynamicPProfListener() {
// nothing to do on no posix signal supporting systems.
}
40 changes: 40 additions & 0 deletions pkg/utils/dyn_pprof_unix.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// +build linux darwin freebsd unix
// Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0.

package utils

import (
"fmt"
"os"
"os/signal"
"syscall"

"github.com/pingcap/log"
"go.uber.org/zap"
)

const (
startPProfSignal = syscall.SIGUSR1
)

var (
signalChan = make(chan os.Signal, 1)
)

// StartDynamicPProfListener starts the listener that will enable pprof when received `startPProfSignal`.
func StartDynamicPProfListener() {
signal.Notify(signalChan, startPProfSignal)
go onSignalStartPProf(signalChan)
log.Info(fmt.Sprintf("dynamic pprof started, you can enable pprof by `kill -s %d %d`)",
startPProfSignal,
os.Getpid()))
YuJuncen marked this conversation as resolved.
Show resolved Hide resolved
}

func onSignalStartPProf(signals <-chan os.Signal) {
for sig := range signals {
if sig == startPProfSignal {
log.Info("signal received, starting pprof...", zap.Stringer("signal", sig))
StartPProfListener("0.0.0.0:0")
}
}
}
57 changes: 57 additions & 0 deletions pkg/utils/pprof.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0.

package utils

import (
"fmt"
"net"
"net/http"
"net/http/pprof"
"sync"

"github.com/pingcap/failpoint"
"github.com/pingcap/log"
"go.uber.org/zap"
)

var (
startedPProf = ""
mu = new(sync.Mutex)
YuJuncen marked this conversation as resolved.
Show resolved Hide resolved
)

// StartPProfListener forks a new goroutine listening on specified port and provide pprof info.
func StartPProfListener(statusAddr string) {
mu.Lock()
defer mu.Unlock()

if startedPProf != "" {
log.Warn("Try to start pprof when it has been started, nothing will happen", zap.String("address", startedPProf))
return
}

go func() {
_ = pprof.Handler
YuJuncen marked this conversation as resolved.
Show resolved Hide resolved
if len(statusAddr) != 0 {
mu.Lock()
failpoint.Inject("determined-pprof-port", func(v failpoint.Value) {
port := v.(int)
statusAddr = fmt.Sprintf(":%d", port)
log.Info("injecting failpoint, pprof will start at determined port", zap.Int("port", port))
})
listener, err := net.Listen("tcp", statusAddr)
log.Info("bind pprof to addr", zap.String("addr", listener.Addr().String()))
YuJuncen marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
log.Warn("failed to start pprof", zap.String("addr", statusAddr), zap.Error(err))
}
startedPProf = listener.Addr().String()
mu.Unlock()
YuJuncen marked this conversation as resolved.
Show resolved Hide resolved
if e := http.Serve(listener, nil); e != nil {
log.Warn("failed to serve pprof", zap.String("addr", startedPProf), zap.Error(e))
mu.Lock()
startedPProf = ""
mu.Unlock()
return
}
}
}()
}
18 changes: 15 additions & 3 deletions tests/br_other/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,24 @@ fi

# backup full with ratelimit = 1 to make sure this backup task won't finish quickly
echo "backup start to test lock file"
run_br --pd $PD_ADDR backup full -s "local://$TEST_DIR/$DB/lock" --ratelimit 1 --ratelimit-unit 1 --concurrency 4 > /dev/null 2>&1 &
BACKGROUND_LOG=$TEST_DIR/bg.log
PPROF_PORT=6080
rm -f $BACKGROUND_LOG
GO_FAILPOINTS="github.com/pingcap/br/pkg/utils/determined-pprof-port=return($PPROF_PORT)" \
run_br --pd $PD_ADDR backup full -s "local://$TEST_DIR/$DB/lock" --ratelimit 1 --ratelimit-unit 1 --concurrency 4 > $BACKGROUND_LOG 2>&1 &
# record last backup pid
_pid=$!

# give the former backup some time to write down lock file.
sleep 2
# give the former backup some time to write down lock file (and initialize signal listener).
sleep 1
start_pprof=$(cat $BACKGROUND_LOG | grep 'dynamic pprof started, you can enable pprof by' | grep -oP 'kill -s 10 [0-9]+' | head -n1)
echo "executing $start_pprof"
$start_pprof

# give the former backup some time to write down lock file (and start pprof server).
sleep 1
curl "http://localhost:$PPROF_PORT/debug/pprof/trace?seconds=1" 2>&1 > /dev/null


backup_fail=0
echo "another backup start expect to fail due to last backup add a lockfile"
Expand Down