Skip to content
This repository has been archived by the owner on Jul 24, 2024. It is now read-only.

Commit

Permalink
task: allow start pprof at runtime by unix signal (#372)
Browse files Browse the repository at this point in the history
* task: allow start pprof at runtime by unix sig

* utils: fix CI, and test dynamic port allocate

* utils: fix a mis-used lock

* tests: use SimpleHTTPServer instead of nc

* tests: remove test dynamic port

* tests: give CI more time

* tests: add retry on bind pprof address

* utils: use OS assigned port insteadof fixed port

* utils: start pprof at 0.0.0.0:0

* utils: remove unused variables

* *: apply some suggestions from code review

- start pprof by import for side-effect
- change the dynamic pprof hint to DEBUG level
- check whther pprof started outside the pprof gorotuine

* tests: use pkill instead of kill to send signal

* utils: print log after success to listen TCP addr

* utils: don't log dynamic pprof enabled log

* Update pkg/utils/pprof.go

Co-authored-by: kennytm <kennytm@gmail.com>

Co-authored-by: 3pointer <luancheng@pingcap.com>
Co-authored-by: kennytm <kennytm@gmail.com>
  • Loading branch information
3 people authored Jun 30, 2020
1 parent 1a63149 commit aa4b7d5
Show file tree
Hide file tree
Showing 6 changed files with 120 additions and 17 deletions.
18 changes: 5 additions & 13 deletions cmd/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@ package cmd

import (
"context"
"net/http"
"net/http/pprof"
"os"
"path/filepath"
"sync"
Expand All @@ -16,7 +14,6 @@ import (
"github.com/pingcap/tidb/util/logutil"
"github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"go.uber.org/zap"

"github.com/pingcap/br/pkg/gluetidb"
"github.com/pingcap/br/pkg/summary"
Expand Down Expand Up @@ -134,16 +131,11 @@ func Init(cmd *cobra.Command) (err error) {
err = e
return
}
go func() {
// Make sure pprof is registered.
_ = pprof.Handler
if len(statusAddr) != 0 {
log.Info("start pprof", zap.String("addr", statusAddr))
if e := http.ListenAndServe(statusAddr, nil); e != nil {
log.Warn("fail to start pprof", zap.String("addr", statusAddr), zap.Error(e))
}
}
}()
if statusAddr != "" {
utils.StartPProfListener(statusAddr)
} else {
utils.StartDynamicPProfListener()
}
})
return err
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/task/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -583,7 +583,7 @@ func restoreTableStream(
errCh <- err
return
}
log.Debug("get rules", zap.Any("rules", rules))
log.Debug("get rules", zap.Any("rules", rules), zap.Strings("pd", pdAddr))
log.Debug("try to remove tiflash of table", zap.Stringer("table name", t.Table.Name))
tiFlashRep, err := client.RemoveTiFlashOfTable(t.CreatedTable, rules)
if err != nil {
Expand Down
9 changes: 9 additions & 0 deletions pkg/utils/dyn_pprof_other.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
// +build !linux,!darwin,!freebsd,!unix
// Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0.

package utils

// StartDynamicPProfListener starts the listener that will enable pprof when received `startPProfSignal`
func StartDynamicPProfListener() {
// nothing to do on no posix signal supporting systems.
}
36 changes: 36 additions & 0 deletions pkg/utils/dyn_pprof_unix.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
// +build linux darwin freebsd unix
// Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0.

package utils

import (
"os"
"os/signal"
"syscall"

"github.com/pingcap/log"
"go.uber.org/zap"
)

const (
startPProfSignal = syscall.SIGUSR1
)

var (
signalChan = make(chan os.Signal, 1)
)

// StartDynamicPProfListener starts the listener that will enable pprof when received `startPProfSignal`.
func StartDynamicPProfListener() {
signal.Notify(signalChan, startPProfSignal)
go onSignalStartPProf(signalChan)
}

func onSignalStartPProf(signals <-chan os.Signal) {
for sig := range signals {
if sig == startPProfSignal {
log.Info("signal received, starting pprof...", zap.Stringer("signal", sig))
StartPProfListener("0.0.0.0:0")
}
}
}
57 changes: 57 additions & 0 deletions pkg/utils/pprof.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0.

package utils

import (
"fmt"
"net"
"net/http"
"os"

// #nosec
// register HTTP handler for /debug/pprof
_ "net/http/pprof"
"sync"

"github.com/pingcap/failpoint"
"github.com/pingcap/log"
"go.uber.org/zap"
)

var (
startedPProf = ""
mu sync.Mutex
)

// StartPProfListener forks a new goroutine listening on specified port and provide pprof info.
func StartPProfListener(statusAddr string) {
mu.Lock()
defer mu.Unlock()
if startedPProf != "" {
log.Warn("Try to start pprof when it has been started, nothing will happen", zap.String("address", startedPProf))
return
}
failpoint.Inject("determined-pprof-port", func(v failpoint.Value) {
port := v.(int)
statusAddr = fmt.Sprintf(":%d", port)
log.Info("injecting failpoint, pprof will start at determined port", zap.Int("port", port))
})
listener, err := net.Listen("tcp", statusAddr)
if err != nil {
log.Warn("failed to start pprof", zap.String("addr", statusAddr), zap.Error(err))
return
}
startedPProf = listener.Addr().String()
log.Info("bound pprof to addr", zap.String("addr", startedPProf))
_, _ = fmt.Fprintf(os.Stderr, "bound pprof to addr %s\n", startedPProf)

go func() {
if e := http.Serve(listener, nil); e != nil {
log.Warn("failed to serve pprof", zap.String("addr", startedPProf), zap.Error(e))
mu.Lock()
startedPProf = ""
mu.Unlock()
return
}
}()
}
15 changes: 12 additions & 3 deletions tests/br_other/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,21 @@ fi

# backup full with ratelimit = 1 to make sure this backup task won't finish quickly
echo "backup start to test lock file"
run_br --pd $PD_ADDR backup full -s "local://$TEST_DIR/$DB/lock" --ratelimit 1 --ratelimit-unit 1 --concurrency 4 > /dev/null 2>&1 &
PPROF_PORT=6080
GO_FAILPOINTS="github.com/pingcap/br/pkg/utils/determined-pprof-port=return($PPROF_PORT)" \
run_br --pd $PD_ADDR backup full -s "local://$TEST_DIR/$DB/lock" --ratelimit 1 --ratelimit-unit 1 --concurrency 4 2>&1 >/dev/null &
# record last backup pid
_pid=$!

# give the former backup some time to write down lock file.
sleep 2
# give the former backup some time to write down lock file (and initialize signal listener).
sleep 1
pkill -10 -P $_pid
echo "starting pprof..."

# give the former backup some time to write down lock file (and start pprof server).
sleep 1
curl "http://localhost:$PPROF_PORT/debug/pprof/trace?seconds=1" 2>&1 > /dev/null
echo "pprof started..."

backup_fail=0
echo "another backup start expect to fail due to last backup add a lockfile"
Expand Down

0 comments on commit aa4b7d5

Please sign in to comment.