Skip to content

Commit

Permalink
Merge pull request #12214 from gyuho/fd
Browse files Browse the repository at this point in the history
*: optimize runtime.FDUsage + add OS level FD metrics
  • Loading branch information
gyuho authored Aug 13, 2020
2 parents ed27d9d + 5678779 commit 93cf449
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 5 deletions.
7 changes: 6 additions & 1 deletion CHANGELOG-3.5.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ Note that any `etcd_debugging_*` metrics are experimental and subject to change.
- Add [`etcd_server_client_requests_total` with `"type"` and `"client_api_version"` labels](https://github.com/etcd-io/etcd/pull/11687).
- Add [`etcd_wal_write_bytes_total`](https://github.com/etcd-io/etcd/pull/11738).
- Add [`etcd_debugging_auth_revision`](https://github.com/etcd-io/etcd/commit/f14d2a087f7b0fd6f7980b95b5e0b945109c95f3).
- Add [`os_fd_used` and `os_fd_limit` to monitor current OS file descriptors](https://github.com/etcd-io/etcd/pull/12214).

### etcd server

Expand Down Expand Up @@ -130,12 +131,16 @@ Note that any `etcd_debugging_*` metrics are experimental and subject to change.
- Add [`--unsafe-no-fsync`](https://github.com/etcd-io/etcd/pull/11946) flag.
- Setting the flag disables all uses of fsync, which is unsafe and will cause data loss. This flag makes it possible to run an etcd node for testing and development without placing lots of load on the file system.
- Add [etcd --auth-token-ttl](https://github.com/etcd-io/etcd/pull/11980) flag to customize `simpleTokenTTL` settings.
- Improve [runtime.FDUsage objects malloc of Memory Usage and CPU Usage](https://github.com/etcd-io/etcd/pull/11986).
- Improve [`runtime.FDUsage` call pattern to reduce objects malloc of Memory Usage and CPU Usage](https://github.com/etcd-io/etcd/pull/11986).
- Improve [mvcc.watchResponse channel Memory Usage](https://github.com/etcd-io/etcd/pull/11987).
- Log [expensive request info in UnaryInterceptor](https://github.com/etcd-io/etcd/pull/12086).
- [Fix invalid Go type in etcdserverpb](https://github.com/etcd-io/etcd/pull/12000).
- [Improve healthcheck by using v3 range request and its corresponding timeout](https://github.com/etcd-io/etcd/pull/12195).

### Package `runtime`

- Optimize [`runtime.FDUsage` by removing unnecessary sorting](https://github.com/etcd-io/etcd/pull/12214).

### Package `embed`

- Remove [`embed.Config.Debug`](https://github.com/etcd-io/etcd/pull/10947).
Expand Down
18 changes: 17 additions & 1 deletion etcdserver/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,19 @@ var (
Help: "Server or member ID in hexadecimal format. 1 for 'server_id' label with current ID.",
},
[]string{"server_id"})

fdUsed = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "os",
Subsystem: "fd",
Name: "used",
Help: "The number of used file descriptors.",
})
fdLimit = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "os",
Subsystem: "fd",
Name: "limit",
Help: "The file descriptor limit.",
})
)

func init() {
Expand All @@ -174,6 +187,8 @@ func init() {
prometheus.MustRegister(isLearner)
prometheus.MustRegister(learnerPromoteSucceed)
prometheus.MustRegister(learnerPromoteFailed)
prometheus.MustRegister(fdUsed)
prometheus.MustRegister(fdLimit)

currentVersion.With(prometheus.Labels{
"server_version": version.Version,
Expand All @@ -184,7 +199,6 @@ func init() {
}

func monitorFileDescriptor(lg *zap.Logger, done <-chan struct{}) {

// This ticker will check File Descriptor Requirements ,and count all fds in used.
// And recorded some logs when in used >= limit/5*4. Just recorded message.
// If fds was more than 10K,It's low performance due to FDUsage() works.
Expand All @@ -198,11 +212,13 @@ func monitorFileDescriptor(lg *zap.Logger, done <-chan struct{}) {
lg.Warn("failed to get file descriptor usage", zap.Error(err))
return
}
fdUsed.Set(float64(used))
limit, err := runtime.FDLimit()
if err != nil {
lg.Warn("failed to get file descriptor limit", zap.Error(err))
return
}
fdLimit.Set(float64(limit))
if used >= limit/5*4 {
lg.Warn("80% of file descriptors are used", zap.Uint64("used", used), zap.Uint64("limit", limit))
}
Expand Down
17 changes: 14 additions & 3 deletions pkg/runtime/fds_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
package runtime

import (
"io/ioutil"
"os"
"syscall"
)

Expand All @@ -29,9 +29,20 @@ func FDLimit() (uint64, error) {
}

func FDUsage() (uint64, error) {
fds, err := ioutil.ReadDir("/proc/self/fd")
return countFiles("/proc/self/fd")
}

// countFiles reads the directory named by dirname and returns the count.
// This is same as stdlib "io/ioutil.ReadDir" but without sorting.
func countFiles(dirname string) (uint64, error) {
f, err := os.Open(dirname)
if err != nil {
return 0, err
}
list, err := f.Readdir(-1)
f.Close()
if err != nil {
return 0, err
}
return uint64(len(fds)), nil
return uint64(len(list)), nil
}

0 comments on commit 93cf449

Please sign in to comment.