Skip to content

Commit

Permalink
Merge pull request alibaba#265 from BSWANG/main
Browse files Browse the repository at this point in the history
feat: support rdma resource metrics
  • Loading branch information
jzwlqx committed Apr 25, 2024
2 parents 2e79815 + 031bb44 commit acb1590
Show file tree
Hide file tree
Showing 6 changed files with 261 additions and 5 deletions.
1 change: 1 addition & 0 deletions cmd/exporter/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
_ "github.com/alibaba/kubeskoop/pkg/exporter/probe/procsock"
_ "github.com/alibaba/kubeskoop/pkg/exporter/probe/procsoftnet"
_ "github.com/alibaba/kubeskoop/pkg/exporter/probe/proctcpsummary"
_ "github.com/alibaba/kubeskoop/pkg/exporter/probe/rdma"
_ "github.com/alibaba/kubeskoop/pkg/exporter/probe/tracebiolatency"
_ "github.com/alibaba/kubeskoop/pkg/exporter/probe/tracekernel"
_ "github.com/alibaba/kubeskoop/pkg/exporter/probe/tracenetiftxlatency"
Expand Down
4 changes: 3 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ require (
github.com/vishvananda/netns v0.0.4
golang.org/x/exp v0.0.0-20221204150635-6dcec336b2bb
golang.org/x/sync v0.1.0
golang.org/x/sys v0.8.0
golang.org/x/sys v0.10.0
google.golang.org/grpc v1.56.2
google.golang.org/protobuf v1.33.0
gopkg.in/yaml.v3 v3.0.1
Expand Down Expand Up @@ -203,3 +203,5 @@ require (
sigs.k8s.io/structured-merge-diff/v4 v4.2.3 // indirect
sigs.k8s.io/yaml v1.3.0 // indirect
)

replace github.com/vishvananda/netlink v1.2.1-beta.2 => github.com/bswang/netlink v1.0.1-0.20240423021740-86cd4b5bb65d
8 changes: 4 additions & 4 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,8 @@ github.com/blang/semver v3.5.1+incompatible h1:cQNTCjp13qL8KC3Nbxr/y2Bqb63oX6wdn
github.com/blang/semver v3.5.1+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk=
github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869/go.mod h1:Ekp36dRnpXw/yCqJaO+ZrUyxD+3VXMFFr56k5XYrpB4=
github.com/bshuster-repo/logrus-logstash-hook v0.4.1/go.mod h1:zsTqEiSzDgAa/8GZR7E1qaXrhYNDKBYy5/dWPTIflbk=
github.com/bswang/netlink v1.0.1-0.20240423021740-86cd4b5bb65d h1:v/jMfwlqJxCqdVtcNIiHwKDnfTBWGjNWFbfS+HYuFfc=
github.com/bswang/netlink v1.0.1-0.20240423021740-86cd4b5bb65d/go.mod h1:whJevzBpTrid75eZy99s3DqCmy05NfibNaF2Ol5Ox5A=
github.com/buger/jsonparser v0.0.0-20180808090653-f4dd9f5a6b44/go.mod h1:bbYlZJ7hK1yFx9hf58LP0zeX7UjIGs20ufpu3evjr+s=
github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0=
github.com/bugsnag/bugsnag-go v0.0.0-20141110184014-b1d153021fcd/go.mod h1:2oa8nejYd4cQ/b0hMIopN0lCRxU0bueqREvZLWFrtK8=
Expand Down Expand Up @@ -1120,8 +1122,6 @@ github.com/vishvananda/netlink v0.0.0-20181108222139-023a6dafdcdf/go.mod h1:+SR5
github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE=
github.com/vishvananda/netlink v1.1.1-0.20201029203352-d40f9887b852/go.mod h1:twkDnbuQxJYemMlGd4JFIcuhgX83tXhKS2B/PRMpOho=
github.com/vishvananda/netlink v1.1.1-0.20210330154013-f5de75959ad5/go.mod h1:twkDnbuQxJYemMlGd4JFIcuhgX83tXhKS2B/PRMpOho=
github.com/vishvananda/netlink v1.2.1-beta.2 h1:Llsql0lnQEbHj0I1OuKyp8otXp0r3q0mPkuhwHfStVs=
github.com/vishvananda/netlink v1.2.1-beta.2/go.mod h1:twkDnbuQxJYemMlGd4JFIcuhgX83tXhKS2B/PRMpOho=
github.com/vishvananda/netns v0.0.0-20180720170159-13995c7128cc/go.mod h1:ZjcWmFBXmLKZu9Nxj3WKYEafiSqer2rnvPr0en9UNpI=
github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17pCcGlemwknint6hfoeCVQrEMVwxRLRjXpq+BU=
github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0=
Expand Down Expand Up @@ -1476,8 +1476,8 @@ golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.8.0 h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU=
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.10.0 h1:SqMFp9UcQJZa+pmYuAKjd9xq1f0j5rLcDIk0mj4qAsA=
golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210220032956-6a3ed077a48d/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
Expand Down
81 changes: 81 additions & 0 deletions pkg/exporter/probe/rdma/erdma.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
package rdma

import (
"strings"

"github.com/alibaba/kubeskoop/pkg/exporter/probe"
"github.com/prometheus/client_golang/prometheus"
"github.com/samber/lo"
)

const (
linkTypeERdma = "erdma"
)

var (
erdmaStatisticCounterEntries = map[string]string{
"accept_failed_cnt": "The total number of failed connection accept attempts.",
"accept_success_cnt": "The total number of successful connection accept attempts.",
"accept_total_cnt": "The total number of connection accept attempts, successful or not.",
"cmdq_comp_cnt": "The total number of command queue completions processed.",
"cmdq_cq_armed_cnt": "The total number of command queue completion events that have been armed.",
"cmdq_eq_event_cnt": "The total number of command queue event queue events received.",
"cmdq_eq_notify_cnt": "The total number of command queue event queue notifications triggered.",
"cmdq_submitted_cnt": "The total number of command queue submissions.",
"connect_failed_cnt": "The total number of failed connection attempts.",
"connect_reset_cnt": "The total number of connection attempts that have been reset.",
"connect_success_cnt": "The total number of successful connection attempts.",
"connect_timeout_cnt": "The total number of connection attempts that timed out.",
"connect_total_cnt": "The total number of connection attempts, successful or not.",
"erdma_aeq_event_cnt": "The total number of ERDMA asynchronous event queue events received.",
"erdma_aeq_notify_cnt": "The total number of ERDMA asynchronous event queue notifications triggered.",
"hw_bps_limit_drop_cnt": "The total number of packets dropped due to hardware bandwidth limit.",
"hw_disable_drop_cnt": "The total number of packets dropped due to hardware being disabled.",
"hw_pps_limit_drop_cnt": "The total number of packets dropped due to hardware packets-per-second limit.",
"hw_rx_bps_limit_drop_cnt": "The total number of received packets dropped due to hardware receive bandwidth limit.",
"hw_rx_bytes_cnt": "The total number of bytes received by the hardware.",
"hw_rx_disable_drop_cnt": "The total number of received packets dropped due to receive hardware being disabled.",
"hw_rx_packets_cnt": "The total number of packets received by the hardware.",
"hw_rx_pps_limit_drop_cnt": "The total number of received packets dropped due to hardware receive packets-per-second limit.",
"hw_tx_bytes_cnt": "The total number of bytes transmitted by the hardware.",
"hw_tx_packets_cnt": "The total number of packets transmitted by the hardware.",
"hw_tx_reqs_cnt": "The total number of transmit requests processed by the hardware.",
"listen_create_cnt": "The total number of successfully created listen sockets.",
"listen_destroy_cnt": "The total number of destroyed listen sockets.",
"listen_failed_cnt": "The total number of failed attempts to create listen sockets.",
"listen_ipv6_cnt": "The total number of listen sockets created for IPv6 addresses.",
"listen_success_cnt": "The total number of successful listen operations.",
"reject_cnt": "The total number of received connection requests that were rejected.",
"reject_failed_cnt": "The total number of failed attempts to reject connection requests.",
"verbs_alloc_mr_cnt": "The total number of successful memory region allocations using verbs API.",
"verbs_alloc_mr_failed_cnt": "The total number of failed memory region allocation attempts using verbs API.",
"verbs_alloc_pd_cnt": "The total number of successful protection domain allocations using verbs API.",
"verbs_alloc_pd_failed_cnt": "The total number of failed protection domain allocation attempts using verbs API.",
"verbs_alloc_uctx_cnt": "The total number of successful user context allocations using verbs API.",
"verbs_alloc_uctx_failed_cnt": "The total number of failed user context allocation attempts using verbs API.",
"verbs_create_cq_cnt": "The total number of successful completion queue creations using verbs API.",
"verbs_create_cq_failed_cnt": "The total number of failed completion queue creation attempts using verbs API.",
"verbs_destroy_cq_failed_cnt": "The total number of failed completion queue deletion using verbs API.",
"verbs_create_qp_cnt": "The total number of successful queue pair creations using verbs API.",
"verbs_create_qp_failed_cnt": "The total number of failed queue pair creation attempts using verbs API.",
"verbs_destroy_qp_cnt": "The total number of failed queue pair deletion using verbs API.",
"verbs_dealloc_pd_cnt": "The total number of deallocated protection domains using verbs API.",
"verbs_dealloc_uctx_cnt": "The total number of deallocated user contexts using verbs API.",
"verbs_dereg_mr_cnt": "The total number of successful memory region deregistrations using verbs API.",
"verbs_dereg_mr_failed_cnt": "The total number of failed memory region deregistration attempts using verbs API.",
"verbs_destroy_cq_cnt": "The total number of destroyed completion queues using verbs API.",
"verbs_destroy_qp_failed_cnt": "The total number of failed attempts to destroy queue pairs (QPs) using verbs API.",
"verbs_get_dma_mr_cnt": "The total number of successful direct memory access (DMA) memory region acquisitions using verbs API.",
"verbs_get_dma_mr_failed_cnt": "The total number of failed attempts to acquire direct memory access (DMA) memory regions using verbs API.",
"verbs_reg_usr_mr_cnt": "The total number of user memory regions successfully registered with the verbs API.",
"verbs_reg_usr_mr_failed_cnt": "The total number of failed attempts to register user memory regions with the verbs API.",
}
erdmaMetrics = lo.Map(lo.Keys(erdmaStatisticCounterEntries), func(k string, _ int) probe.SingleMetricsOpts {
return probe.SingleMetricsOpts{
Name: strings.Join([]string{linkTypeERdma, k}, "_"),
VariableLabels: rdmaDevPortLabels,
Help: erdmaStatisticCounterEntries[k],
ValueType: prometheus.CounterValue,
}
})
)
55 changes: 55 additions & 0 deletions pkg/exporter/probe/rdma/mellanox.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
package rdma

import (
"strings"

"github.com/alibaba/kubeskoop/pkg/exporter/probe"
"github.com/prometheus/client_golang/prometheus"
"github.com/samber/lo"
)

const (
linkTypeMellanox = "mellanox_mlx5"
)

var (
mlx5 = map[string]string{
"rx_write_requests": "The number of received WRITE requests for the associated QPs.",
"rx_read_requests": "The number of received READ requests for the associated QPs.",
"rx_atomic_requests": "The number of received ATOMIC request for the associated QPs.",
"out_of_buffer": "The number of drops occurred due to lack of WQE for the associated QPs.",
"out_of_sequence": "The number of out of sequence packets received.",
"duplicate_request": "Number of duplicate request packets.",
"rnr_nak_retry_err": "The number of received RNR NAK packets. The QP retry limit was not exceeded.",
"packet_seq_err": "The number of received NAK sequence error packets. The QP retry limit was not exceeded.",
"implied_nak_seq_err": "Number of time the requested decided an ACK with a PSN larger than the expected PSN for an RDMA read or response.",
"local_ack_timeout_err": "The number of times QP's ack timer expired for RC, XRC, DCT QPs at the sender side.",
"rx_dct_connect": "The number of received connection request for the associated DCTs.",
"resp_local_length_error": "The number of times responder detected local length errors.",
"resp_cqe_error": "The number of times responder detected CQEs completed with errors.",
"req_cqe_error": "The number of times requester detected CQEs completed with errors.",
"req_remote_invalid_request": "The number of times requester detected remote invalid request errors.",
"req_remote_access_errors": "The number of times requester detected remote access errors.",
"resp_remote_access_errors": "The number of times responder detected remote access errors.",
"resp_cqe_flush_error": "The number of times responder detected CQEs completed with flushed errors.",
"req_cqe_flush_error": "The number of times requester detected CQEs completed with flushed errors.",
"roce_adp_retrans": "The number of adaptive retransmissions for RoCE traffic",
"roce_adp_retrans_to": "The number of times RoCE traffic reached timeout due to adaptive retransmission",
"roce_slow_restart": "The number of times RoCE slow restart was used",
"roce_slow_restart_cnps": "The number of times RoCE slow restart generated CNP packets",
"roce_slow_restart_trans": "The number of times RoCE slow restart changed state to slow restart",
"rp_cnp_ignored": "The number of CNP packets received and ignored by the Reaction Point HCA.",
"rp_cnp_handled": "The number of CNP packets handled by the Reaction Point HCA to throttle the transmission rate.",
"np_ecn_marked_roce_packets": "The number of RoCEv2 packets received by the notification point which were marked for experiencing the congestion (ECN bits where '11' on the ingress RoCE traffic) .",
"np_cnp_sent": "The number of CNP packets sent by the Notification Point when it noticed congestion experienced in the RoCEv2 IP header (ECN bits).",
"rx_icrc_encapsulated": "The number of RoCE packets with ICRC errors.",
}
mlx5Metrics = lo.Map(lo.Keys(mlx5), func(k string, _ int) probe.SingleMetricsOpts {
return probe.SingleMetricsOpts{
Name: strings.Join([]string{linkTypeMellanox, k}, "_"),
VariableLabels: rdmaDevPortLabels,
Help: mlx5[k],
ValueType: prometheus.CounterValue,
}
})
)
117 changes: 117 additions & 0 deletions pkg/exporter/probe/rdma/rdma.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
package rdma

import (
"context"
"fmt"
"strconv"
"strings"

"github.com/alibaba/kubeskoop/pkg/exporter/nettop"
"github.com/prometheus/client_golang/prometheus"
"github.com/samber/lo"
log "github.com/sirupsen/logrus"
"github.com/vishvananda/netlink"

"github.com/alibaba/kubeskoop/pkg/exporter/probe"
)

const (
probeName = "rdma"
linkTypeUnknown = "unknown"
)

var (
resourceSummaryEntries = []string{"cm_id", "cq", "ctx", "mr", "pd", "qp"}
rdmaDevLabels = []string{"device", "type"}
rdmaDevPortLabels = append(rdmaDevLabels, "port")
)

func init() {
probe.MustRegisterMetricsProbe(probeName, metricsProbeCreator)
}

func metricsProbeCreator() (probe.MetricsProbe, error) {
p := &metricsProbe{}

opts := probe.BatchMetricsOpts{
Namespace: probe.MetricsNamespace,
Subsystem: probeName,
VariableLabels: probe.StandardMetricsLabels,
SingleMetricsOpts: lo.Map(resourceSummaryEntries, func(entry string, _ int) probe.SingleMetricsOpts {
return probe.SingleMetricsOpts{Name: entry, VariableLabels: rdmaDevLabels, Help: fmt.Sprintf("rdma resource summary %s", entry), ValueType: prometheus.GaugeValue}
}),
}
opts.SingleMetricsOpts = append(opts.SingleMetricsOpts, mlx5Metrics...)
opts.SingleMetricsOpts = append(opts.SingleMetricsOpts, erdmaMetrics...)
batchMetrics := probe.NewBatchMetrics(opts, p.collectOnce)
return probe.NewMetricsProbe(probeName, p, batchMetrics), nil
}

type metricsProbe struct {
}

func (p *metricsProbe) Start(_ context.Context) error {
return nil
}

func (p *metricsProbe) Stop(_ context.Context) error {
return nil
}

func (p *metricsProbe) collectOnce(emit probe.Emit) error {
// rdma only collect host network
entity, err := nettop.GetHostNetworkEntity()
if err != nil {
return err
}
rdmaRes, err := netlink.RdmaResourceList()
if err != nil {
return err
}
if len(rdmaRes) == 0 {
return nil
}
standardLabelValues := probe.BuildStandardMetricsLabelValues(entity)
for _, res := range rdmaRes {
link, err := netlink.RdmaLinkByName(res.Name)
if err != nil {
log.Errorf("failed get rdma link %v, error: %v", res.Name, err)
continue
}
linkType := rdmaLinkType(link)
deviceLabelValues := append(standardLabelValues, res.Name, linkType)
for resKey, resVal := range res.RdmaResourceSummaryEntries {
emit(resKey, deviceLabelValues, float64(resVal))
}
if linkType == "unknown" {
continue
}
linkStatistics, err := netlink.RdmaStatistic(link)
if err != nil {
log.Errorf("failed get rdma statistics %v, error: %v", res.Name, err)
continue
}

for _, port := range linkStatistics.RdmaPortStatistics {
devicePortLabelValues := append(deviceLabelValues, strconv.FormatUint(uint64(port.PortIndex), 10))
for statKey, statVal := range port.Statistics {
emit(strings.Join([]string{linkType, statKey}, "_"), devicePortLabelValues, float64(statVal))
}
}
}
return nil
}

func rdmaLinkType(link *netlink.RdmaLink) string {
if link == nil {
return linkTypeUnknown
}
switch strings.Split(link.Attrs.Name, "_")[0] {
case linkTypeMellanox:
return linkTypeMellanox
case linkTypeERdma:
return linkTypeERdma
default:
return linkTypeUnknown
}
}

0 comments on commit acb1590

Please sign in to comment.