From c2127aa931b0765421e2c4ec7653e50f2ee90ff8 Mon Sep 17 00:00:00 2001 From: Breezewish Date: Thu, 26 Nov 2020 15:24:22 +0800 Subject: [PATCH 1/3] Refine cluster info Signed-off-by: Breezewish --- .../_shared/Vagrantfile.partial.pubKey.rb | 24 +- etc/manualTestEnv/complexCase1/README.md | 36 ++ etc/manualTestEnv/complexCase1/Vagrantfile | 40 ++ etc/manualTestEnv/complexCase1/topology.yaml | 85 ++++ etc/manualTestEnv/multiHost/README.md | 2 +- etc/manualTestEnv/multiReplica/README.md | 2 +- etc/manualTestEnv/singleHost/README.md | 2 +- .../singleHostMultiDisk/README.md | 2 +- pkg/apiserver/clusterinfo/host.go | 402 +++--------------- .../clusterinfo/hostinfo/cluster_config.go | 82 ++++ .../clusterinfo/hostinfo/cluster_hardware.go | 149 +++++++ .../clusterinfo/hostinfo/cluster_load.go | 92 ++++ .../clusterinfo/hostinfo/hostinfo.go | 95 +++++ pkg/apiserver/clusterinfo/service.go | 87 ++-- pkg/apiserver/clusterinfo/statistics.go | 191 +++++++++ pkg/utils/host/host.go | 49 +++ pkg/utils/topology/pd.go | 5 +- pkg/utils/topology/store.go | 7 +- pkg/utils/topology/tidb.go | 6 +- pkg/utils/topology/topology.go | 31 -- .../apps/ClusterInfo/components/DiskTable.tsx | 212 +++++++++ .../apps/ClusterInfo/components/HostTable.tsx | 322 +++++++------- .../components/Statistics.module.less | 8 + .../ClusterInfo/components/Statistics.tsx | 102 +++++ .../components/StoreLocationTree/index.tsx | 13 +- ui/lib/apps/ClusterInfo/pages/List.tsx | 14 + ui/lib/apps/ClusterInfo/translations/en.yaml | 28 +- ui/lib/apps/ClusterInfo/translations/zh.yaml | 26 +- 28 files changed, 1471 insertions(+), 643 deletions(-) create mode 100644 etc/manualTestEnv/complexCase1/README.md create mode 100644 etc/manualTestEnv/complexCase1/Vagrantfile create mode 100644 etc/manualTestEnv/complexCase1/topology.yaml create mode 100644 pkg/apiserver/clusterinfo/hostinfo/cluster_config.go create mode 100644 pkg/apiserver/clusterinfo/hostinfo/cluster_hardware.go create mode 100644 pkg/apiserver/clusterinfo/hostinfo/cluster_load.go create mode 100644 pkg/apiserver/clusterinfo/hostinfo/hostinfo.go create mode 100644 pkg/apiserver/clusterinfo/statistics.go create mode 100644 pkg/utils/host/host.go create mode 100644 ui/lib/apps/ClusterInfo/components/DiskTable.tsx create mode 100644 ui/lib/apps/ClusterInfo/components/Statistics.module.less create mode 100644 ui/lib/apps/ClusterInfo/components/Statistics.tsx diff --git a/etc/manualTestEnv/_shared/Vagrantfile.partial.pubKey.rb b/etc/manualTestEnv/_shared/Vagrantfile.partial.pubKey.rb index 8b7a381637..c319df4019 100644 --- a/etc/manualTestEnv/_shared/Vagrantfile.partial.pubKey.rb +++ b/etc/manualTestEnv/_shared/Vagrantfile.partial.pubKey.rb @@ -2,23 +2,25 @@ ssh_pub_key = File.readlines("#{File.dirname(__FILE__)}/vagrant_key.pub").first.strip config.vm.box = "hashicorp/bionic64" - config.vm.provision "shell", privileged: false, inline: <<-SHELL + config.vm.provision "zsh", type: "shell", privileged: false, inline: <<-SHELL + echo "Installing zsh" sudo apt install -y zsh sh -c "$(curl -fsSL https://raw.githubusercontent.com/ohmyzsh/ohmyzsh/master/tools/install.sh)" sudo chsh -s /usr/bin/zsh vagrant + SHELL + config.vm.provision "private_key", type: "shell", privileged: false, inline: <<-SHELL + echo "Inserting private key" echo #{ssh_pub_key} >> /home/vagrant/.ssh/authorized_keys SHELL - config.vm.provision "shell", privileged: true, inline: <<-SHELL - echo "setting ulimit" - sudo echo "fs.file-max = 65535" >> /etc/sysctl.conf - sudo sysctl -p - sudo echo "* hard nofile 65535" >> /etc/security/limits.conf - sudo echo "* soft nofile 65535" >> /etc/security/limits.conf - sudo echo "root hard nofile 65535" >> /etc/security/limits.conf - sudo echo "root hard nofile 65535" >> /etc/security/limits.conf + config.vm.provision "ulimit", type: "shell", privileged: true, inline: <<-SHELL + echo "Setting ulimit" + echo "fs.file-max = 65535" >> /etc/sysctl.conf + sysctl -p + echo "* hard nofile 65535" >> /etc/security/limits.conf + echo "* soft nofile 65535" >> /etc/security/limits.conf + echo "root hard nofile 65535" >> /etc/security/limits.conf + echo "root hard nofile 65535" >> /etc/security/limits.conf SHELL end - -# ulimit ref: https://my.oschina.net/u/914655/blog/3067520 diff --git a/etc/manualTestEnv/complexCase1/README.md b/etc/manualTestEnv/complexCase1/README.md new file mode 100644 index 0000000000..24ea4d0548 --- /dev/null +++ b/etc/manualTestEnv/complexCase1/README.md @@ -0,0 +1,36 @@ +# complexCase1 + +TiDB, PD, TiKV, TiFlash each in different hosts. + +## Usage + +1. Start the box: + + ```bash + VAGRANT_EXPERIMENTAL="disks" vagrant up + ``` + +1. Use [TiUP](https://tiup.io/) to deploy the cluster to the box (only need to do it once): + + ```bash + tiup cluster deploy complexCase1 v4.0.8 topology.yaml -i ../_shared/vagrant_key -y --user vagrant + ``` + +1. Start the cluster in the box: + + ```bash + tiup cluster start complexCase1 + ``` + +1. Start TiDB Dashboard server: + + ```bash + bin/tidb-dashboard --pd http://10.0.1.31:2379 + ``` + +## Cleanup + +```bash +tiup cluster destroy complexCase1 -y +vagrant destroy --force +``` diff --git a/etc/manualTestEnv/complexCase1/Vagrantfile b/etc/manualTestEnv/complexCase1/Vagrantfile new file mode 100644 index 0000000000..b941effb1d --- /dev/null +++ b/etc/manualTestEnv/complexCase1/Vagrantfile @@ -0,0 +1,40 @@ +load "#{File.dirname(__FILE__)}/../_shared/Vagrantfile.partial.pubKey.rb" + +Vagrant.configure("2") do |config| + config.vm.provider "virtualbox" do |v| + v.memory = 1024 + v.cpus = 1 + end + + (1..5).each do |i| + config.vm.define "node#{i}" do |node| + node.vm.network "private_network", ip: "10.0.1.#{i+30}" + (1..4).each do |j| + node.vm.disk :disk, size: "10GB", name: "disk-#{i}-#{j}" + end + end + end + + config.vm.provision "disk", type: "shell", privileged: false, inline: <<-SHELL + echo "Formatting disks" + sudo mkfs.ext4 -j -L hdd1 /dev/sdb + sudo mkfs.ext4 -j -L hdd2 /dev/sdc + sudo mkfs.ext4 -j -L hdd3 /dev/sdd + sudo mkfs.ext4 -j -L hdd4 /dev/sde + + echo "Mounting directories" + sudo mkdir -p /pingcap/tidb-data + echo "/dev/sdb /pingcap/tidb-data ext4 defaults 0 0" | sudo tee -a /etc/fstab + sudo mount /pingcap/tidb-data + + sudo mkdir -p /pingcap/tidb-deploy + sudo mkdir -p /pingcap/tidb-data/tikv-1 + sudo mkdir -p /pingcap/tidb-data/tikv-2 + echo "/dev/sdc /pingcap/tidb-deploy ext4 defaults 0 0" | sudo tee -a /etc/fstab + echo "/dev/sdd /pingcap/tidb-data/tikv-1 ext4 defaults 0 0" | sudo tee -a /etc/fstab + echo "/dev/sde /pingcap/tidb-data/tikv-2 ext4 defaults 0 0" | sudo tee -a /etc/fstab + sudo mount /pingcap/tidb-deploy + sudo mount /pingcap/tidb-data/tikv-1 + sudo mount /pingcap/tidb-data/tikv-2 + SHELL +end diff --git a/etc/manualTestEnv/complexCase1/topology.yaml b/etc/manualTestEnv/complexCase1/topology.yaml new file mode 100644 index 0000000000..8bd2db0e9b --- /dev/null +++ b/etc/manualTestEnv/complexCase1/topology.yaml @@ -0,0 +1,85 @@ +global: + user: tidb + deploy_dir: /pingcap/tidb-deploy + data_dir: /pingcap/tidb-data + +server_configs: + tikv: + server.grpc-concurrency: 1 + raftstore.apply-pool-size: 1 + raftstore.store-pool-size: 1 + readpool.unified.max-thread-count: 1 + readpool.storage.use-unified-pool: false + readpool.coprocessor.use-unified-pool: true + storage.block-cache.capacity: 256MB + raftstore.capacity: 5GB + +# Overview: +# 31: 1 PD, 1 TiDB, 2 TiKV +# 32: 1 TiDB, 2 TiKV +# 33: 1 PD, 1 TiFlash +# 34: 2 TiKV, 1 TiFlash +# 35: 1 TiFlash + +pd_servers: + - host: 10.0.1.31 + - host: 10.0.1.33 + +tikv_servers: + - host: 10.0.1.31 + port: 20160 + status_port: 20180 + data_dir: /pingcap/tidb-data/tikv-1/tikv-20160 + config: + server.labels: { host: "tikv1" } + - host: 10.0.1.31 + port: 20161 + status_port: 20181 + data_dir: /pingcap/tidb-data/tikv-2/tikv-20161 + config: + server.labels: { host: "tikv2" } + - host: 10.0.1.32 + port: 20160 + status_port: 20180 + data_dir: /pingcap/tidb-data/tikv-1/tikv-20160 + config: + server.labels: { host: "tikv1" } + - host: 10.0.1.32 + port: 20161 + status_port: 20181 + data_dir: /pingcap/tidb-data/tikv-2/tikv-20161 + config: + server.labels: { host: "tikv2" } + - host: 10.0.1.34 + port: 20160 + status_port: 20180 + data_dir: /pingcap/tidb-data/tikv-1/tikv-20160 + config: + server.labels: { host: "tikv1" } + - host: 10.0.1.34 + port: 20161 + status_port: 20181 + data_dir: /pingcap/tidb-data/tikv-2/tikv-20161 + config: + server.labels: { host: "tikv2" } + +tiflash_servers: + - host: 10.0.1.33 + data_dir: /pingcap/tidb-data/tikv-1/tiflash + - host: 10.0.1.34 + data_dir: /pingcap/tidb-data/tikv-2/tiflash + - host: 10.0.1.35 + data_dir: /pingcap/tidb-data/tikv-1/tiflash + +tidb_servers: + - host: 10.0.1.31 + - host: 10.0.1.32 + +grafana_servers: + - host: 10.0.1.31 + +monitoring_servers: + - host: 10.0.1.31 + +alertmanager_servers: + - host: 10.0.1.31 diff --git a/etc/manualTestEnv/multiHost/README.md b/etc/manualTestEnv/multiHost/README.md index 60b8058c4f..dad3f81b6e 100644 --- a/etc/manualTestEnv/multiHost/README.md +++ b/etc/manualTestEnv/multiHost/README.md @@ -13,7 +13,7 @@ TiDB, PD, TiKV, TiFlash each in different hosts. 1. Use [TiUP](https://tiup.io/) to deploy the cluster to the box (only need to do it once): ```bash - tiup cluster deploy multiHost v4.0.4 topology.yaml -i ../_shared/vagrant_key -y --user vagrant + tiup cluster deploy multiHost v4.0.8 topology.yaml -i ../_shared/vagrant_key -y --user vagrant ``` 1. Start the cluster in the box: diff --git a/etc/manualTestEnv/multiReplica/README.md b/etc/manualTestEnv/multiReplica/README.md index fc31ff5941..32111caa53 100644 --- a/etc/manualTestEnv/multiReplica/README.md +++ b/etc/manualTestEnv/multiReplica/README.md @@ -13,7 +13,7 @@ Multiple TiKV nodes in different labels. 1. Use [TiUP](https://tiup.io/) to deploy the cluster to the box (only need to do it once): ```bash - tiup cluster deploy multiReplica v4.0.4 topology.yaml -i ../_shared/vagrant_key -y --user vagrant + tiup cluster deploy multiReplica v4.0.8 topology.yaml -i ../_shared/vagrant_key -y --user vagrant ``` 1. Start the cluster in the box: diff --git a/etc/manualTestEnv/singleHost/README.md b/etc/manualTestEnv/singleHost/README.md index 4d3f7413ab..d1d8c2b6ad 100644 --- a/etc/manualTestEnv/singleHost/README.md +++ b/etc/manualTestEnv/singleHost/README.md @@ -13,7 +13,7 @@ TiDB, PD, TiKV, TiFlash in the same host. 1. Use [TiUP](https://tiup.io/) to deploy the cluster to the box (only need to do it once): ```bash - tiup cluster deploy singleHost v4.0.4 topology.yaml -i ../_shared/vagrant_key -y --user vagrant + tiup cluster deploy singleHost v4.0.8 topology.yaml -i ../_shared/vagrant_key -y --user vagrant ``` 1. Start the cluster in the box: diff --git a/etc/manualTestEnv/singleHostMultiDisk/README.md b/etc/manualTestEnv/singleHostMultiDisk/README.md index 4cb2dd419e..cb9d6eabed 100644 --- a/etc/manualTestEnv/singleHostMultiDisk/README.md +++ b/etc/manualTestEnv/singleHostMultiDisk/README.md @@ -13,7 +13,7 @@ All instances in a single host, but on different disks. 1. Use [TiUP](https://tiup.io/) to deploy the cluster to the box (only need to do it once): ```bash - tiup cluster deploy singleHostMultiDisk v4.0.4 topology.yaml -i ../_shared/vagrant_key -y --user vagrant + tiup cluster deploy singleHostMultiDisk v4.0.8 topology.yaml -i ../_shared/vagrant_key -y --user vagrant ``` 1. Start the cluster in the box: diff --git a/pkg/apiserver/clusterinfo/host.go b/pkg/apiserver/clusterinfo/host.go index fb5ce32323..3532c3e075 100644 --- a/pkg/apiserver/clusterinfo/host.go +++ b/pkg/apiserver/clusterinfo/host.go @@ -14,385 +14,83 @@ package clusterinfo import ( - "math" - "path/filepath" - "strconv" - "strings" + "sort" "github.com/jinzhu/gorm" -) - -type CPUUsage struct { - Idle float64 `json:"idle"` - System float64 `json:"system"` -} + "github.com/pingcap/log" + "github.com/thoas/go-funk" + "go.uber.org/zap" -type Memory struct { - Used int `json:"used"` - Total int `json:"total"` -} - -type Partition struct { - Path string `json:"path"` - FSType string `json:"fstype"` - Free int `json:"free"` - Total int `json:"total"` - - ServerType string // identify TiFlash -} - -type HostInfo struct { - IP string `json:"ip"` - CPUCore int `json:"cpu_core,omitempty"` - *CPUUsage `json:"cpu_usage,omitempty"` - *Memory `json:"memory,omitempty"` - Partitions []PartitionInstance `json:"partitions,omitempty"` - Unavailable bool `json:"unavailable"` -} - -type Instance struct { - Address string `gorm:"column:INSTANCE" json:"address"` - ServerType string `gorm:"column:TYPE" json:"server_type"` -} - -type PartitionInstance struct { - Partition `json:"partition"` - Instance `json:"instance"` -} + "github.com/pingcap-incubator/tidb-dashboard/pkg/apiserver/clusterinfo/hostinfo" + "github.com/pingcap-incubator/tidb-dashboard/pkg/utils/topology" +) -func GetAllHostInfo(db *gorm.DB) ([]HostInfo, error) { - hostMap, err := loadHosts(db) +// fetchAllInstanceHosts fetches all hosts in the cluster and return in ascending order. +func (s *Service) fetchAllInstanceHosts() ([]string, error) { + allHostsMap := make(map[string]struct{}) + pdInfo, err := topology.FetchPDTopology(s.params.PDClient) if err != nil { return nil, err } - memory, usages, err := queryClusterLoad(db) - if err != nil { - return nil, err + for _, i := range pdInfo { + allHostsMap[i.IP] = struct{}{} } - cores, hostPartitionMap, err := queryClusterHardware(db) + + tikvInfo, tiFlashInfo, err := topology.FetchStoreTopology(s.params.PDClient) if err != nil { return nil, err } - dataDirMap, err := queryDeployInfo(db) - if err != nil { - return nil, err + for _, i := range tikvInfo { + allHostsMap[i.IP] = struct{}{} } - - infos := make([]HostInfo, 0) - for ip, instances := range hostMap { - var partitions = make([]PartitionInstance, 0) - for _, instance := range instances { - ip := parseIP(instance.Address) - - partitionMap, ok := hostPartitionMap[ip] - if !ok { - continue - } - - if instance.ServerType == "tiflash" { - // Since Multi-path is a common feature in TiFlash, a TiFlash instance may have multiple partitions. - // For now TiFlash will only return the disks it used, so we can just add all partitions. - for _, p := range partitionMap { - if p.ServerType == "tiflash" { - partitions = append(partitions, PartitionInstance{ - Partition: p, - Instance: instance, - }) - } - } - } else { - dataDir, ok := dataDirMap[instance.Address] - if !ok { - continue - } - - partition := inferPartition(dataDir, partitionMap) - partitions = append(partitions, PartitionInstance{ - Partition: partition, - Instance: instance, - }) - } - } - - info := HostInfo{ - IP: ip, - CPUCore: cores[ip], - CPUUsage: usages[ip], - Memory: memory[ip], - Partitions: partitions, - } - infos = append(infos, info) + for _, i := range tiFlashInfo { + allHostsMap[i.IP] = struct{}{} } - return infos, nil -} - -func splitPath(path string) []string { - if path == "" { - return []string{} - } - return strings.Split(path, string(filepath.Separator)) -} - -func inferPartition(dataDir string, diskMap PartitionMap) Partition { - var targetDisk Partition - var minRelativePathLength = math.MaxInt64 - - for _, disk := range diskMap { - rel, err := filepath.Rel(disk.Path, dataDir) - if err != nil { - continue - } - var relativePathLength int - for _, dir := range splitPath(rel) { - if dir == ".." { - relativePathLength = -1 - break - } else { - relativePathLength++ - } - } - if relativePathLength == -1 { - continue - } - if relativePathLength < minRelativePathLength { - minRelativePathLength = relativePathLength - targetDisk = disk - } - } - - return targetDisk -} - -// HostMap map host ip to all instance on it -// e.g. "127.0.0.1" => []Instance{...} -type HostMap map[string][]Instance - -func loadHosts(db *gorm.DB) (HostMap, error) { - hostMap := make(HostMap) - var rows []Instance - if err := db.Table("INFORMATION_SCHEMA.CLUSTER_INFO").Find(&rows).Error; err != nil { + tidbInfo, err := topology.FetchTiDBTopology(s.lifecycleCtx, s.params.EtcdClient) + if err != nil { return nil, err } - for _, row := range rows { - ip := parseIP(row.Address) - instances, ok := hostMap[ip] - if !ok { - instances = []Instance{} - } - - instances = append(instances, Instance{ - Address: row.Address, - ServerType: row.ServerType, - }) - hostMap[ip] = instances + for _, i := range tidbInfo { + allHostsMap[i.IP] = struct{}{} } - return hostMap, nil -} - -func parseIP(addr string) string { - return strings.Split(addr, ":")[0] -} - -// CPUCoreMap map host ip to its cpu logical cores number -// e.g. "127.0.0.1" => 8 -type CPUCoreMap map[string]int + allHosts := funk.Keys(allHostsMap).([]string) + sort.Strings(allHosts) -// Memory map host ip to its Memory detail -// e.g. "127.0.0.1" => &Memory{} -type MemoryMap map[string]*Memory - -// CPUUsageMap map host ip to its cpu usage -// e.g. "127.0.0.1" => &CPUUsage{ Idle: 0.1, System: 0.1 } -type CPUUsageMap map[string]*CPUUsage - -type ClusterTableModel struct { - Instance string `gorm:"column:INSTANCE"` - DeviceName string `gorm:"column:DEVICE_NAME"` - DeviceType string `gorm:"column:DEVICE_TYPE"` - Name string `gorm:"column:NAME"` - Value string `gorm:"column:VALUE"` - Type string `gorm:"column:TYPE"` + return allHosts, nil } -const ClusterLoadCondition = "(device_type = 'memory' and device_name = 'virtual') or (device_type = 'cpu' and device_name = 'usage')" - -func queryClusterLoad(db *gorm.DB) (MemoryMap, CPUUsageMap, error) { - memoryMap := make(MemoryMap) - cpuMap := make(CPUUsageMap) - var rows []ClusterTableModel - if err := db.Table("INFORMATION_SCHEMA.CLUSTER_LOAD"). - Where(ClusterLoadCondition).Find(&rows).Error; err != nil { - return nil, nil, err - } - - for _, row := range rows { - switch { - case row.DeviceType == "memory" && row.DeviceName == "virtual": - saveMemory(row, &memoryMap) - case row.DeviceType == "cpu" && row.DeviceName == "usage": - saveCPUUsageMap(row, &cpuMap) - default: - continue - } - } - return memoryMap, cpuMap, nil -} - -func saveMemory(row ClusterTableModel, m *MemoryMap) { - ip := parseIP(row.Instance) - - memory, ok := (*m)[ip] - if !ok { - memory = &Memory{} - (*m)[ip] = memory - } - - var err error - switch row.Name { - case "total": - memory.Total, err = strconv.Atoi(row.Value) - if err != nil { - return - } - case "used": - memory.Used, err = strconv.Atoi(row.Value) - if err != nil { - return - } - default: - return - } -} - -func saveCPUUsageMap(row ClusterTableModel, m *CPUUsageMap) { - ip := parseIP(row.Instance) - - var cpu *CPUUsage - var ok bool - if cpu, ok = (*m)[ip]; !ok { - cpu = &CPUUsage{} - (*m)[ip] = cpu - } - - var err error - switch row.Name { - case "system": - cpu.System, err = strconv.ParseFloat(row.Value, 64) - if err != nil { - return - } - case "idle": - cpu.Idle, err = strconv.ParseFloat(row.Value, 64) - if err != nil { - return - } - default: - return - } -} - -// PartitionMap map partition name to its detail -// e.g. "nvme0n1p1" => Partition{ Path: "/", FSType: "ext4", ... } -type PartitionMap map[string]Partition - -// HostPartition map host ip to all partitions on it -// e.g. "127.0.0.1" => { "nvme0n1p1" => Partition{ Path: "/", FSType: "ext4", ... }, ... } -type HostPartitionMap map[string]PartitionMap - -const ClusterHardWareCondition = "(device_type = 'cpu' and name = 'cpu-logical-cores') or (device_type = 'disk')" - -func queryClusterHardware(db *gorm.DB) (CPUCoreMap, HostPartitionMap, error) { - cpuMap := make(CPUCoreMap) - hostMap := make(HostPartitionMap) - var rows []ClusterTableModel - - if err := db.Table("INFORMATION_SCHEMA.CLUSTER_HARDWARE").Where(ClusterHardWareCondition).Find(&rows).Error; err != nil { - return nil, nil, err - } - - for _, row := range rows { - switch { - case row.DeviceType == "cpu" && row.Name == "cpu-logical-cores": - saveCPUCore(row, &cpuMap) - case row.DeviceType == "disk": - savePartition(row, &hostMap) - default: - continue - } - } - return cpuMap, hostMap, nil -} - -func saveCPUCore(row ClusterTableModel, m *CPUCoreMap) { - ip := parseIP(row.Instance) - cores, err := strconv.Atoi(row.Value) +// fetchAllHostsInfo fetches all hosts and their information. +// Note: The returned data and error may both exist. +func (s *Service) fetchAllHostsInfo(db *gorm.DB) ([]*hostinfo.HostInfo, error) { + allHosts, err := s.fetchAllInstanceHosts() if err != nil { - return + return nil, err } - (*m)[ip] = cores -} - -func savePartition(row ClusterTableModel, m *HostPartitionMap) { - ip := parseIP(row.Instance) - partitionMap, ok := (*m)[ip] - if !ok { - partitionMap = make(PartitionMap) + allHostsInfoMap := make(map[string]*hostinfo.HostInfo) + if e := hostinfo.FillFromClusterLoadTable(db, allHostsInfoMap); e != nil { + log.Warn("Failed to read cluster_load table", zap.Error(e)) + err = e } - - partition, ok := partitionMap[row.DeviceName] - if !ok { - partition = Partition{} + if e := hostinfo.FillFromClusterHardwareTable(db, allHostsInfoMap); e != nil && err == nil { + log.Warn("Failed to read cluster_hardware table", zap.Error(e)) + err = e } - - partition.ServerType = row.Type - - var err error - switch row.Name { - case "fstype": - partition.FSType = row.Value - case "path": - partition.Path = row.Value - case "total": - partition.Total, err = strconv.Atoi(row.Value) - if err != nil { - return - } - case "free": - partition.Free, err = strconv.Atoi(row.Value) - if err != nil { - return - } - default: - return - } - - partitionMap[row.DeviceName] = partition - (*m)[ip] = partitionMap -} - -type ClusterConfigModel struct { - Instance string `gorm:"column:INSTANCE"` - Value string `gorm:"column:VALUE"` -} - -// DataDirMap map instance address to its data directory -// e.g. "127.0.0.1:20160" => "/tikv/data-dir" -type DataDirMap map[string]string - -const ClusterConfigCondition = "(`type` = 'tidb' and `key` = 'log.file.filename') or (`type` = 'tikv' and `key` = 'storage.data-dir') or (`type` = 'pd' and `key` = 'data-dir')" - -func queryDeployInfo(db *gorm.DB) (DataDirMap, error) { - m := make(DataDirMap) - var rows []ClusterConfigModel - if err := db.Table("INFORMATION_SCHEMA.CLUSTER_CONFIG").Where(ClusterConfigCondition).Find(&rows).Error; err != nil { - return nil, err + if e := hostinfo.FillInstances(db, allHostsInfoMap); e != nil && err == nil { + log.Warn("Failed to fill instances for hosts", zap.Error(e)) + err = e } - for _, row := range rows { - m[row.Instance] = row.Value + r := make([]*hostinfo.HostInfo, 0, len(allHosts)) + for _, host := range allHosts { + if im, ok := allHostsInfoMap[host]; ok { + r = append(r, im) + } else { + // Missing item + r = append(r, hostinfo.NewHostInfo(host)) + } } - return m, nil + return r, err } diff --git a/pkg/apiserver/clusterinfo/hostinfo/cluster_config.go b/pkg/apiserver/clusterinfo/hostinfo/cluster_config.go new file mode 100644 index 0000000000..900961cadd --- /dev/null +++ b/pkg/apiserver/clusterinfo/hostinfo/cluster_config.go @@ -0,0 +1,82 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package hostinfo + +import ( + "strings" + + "github.com/jinzhu/gorm" + + "github.com/pingcap-incubator/tidb-dashboard/pkg/utils/host" +) + +type clusterConfigModel struct { + Type string `gorm:"column:TYPE"` + Instance string `gorm:"column:INSTANCE"` + Key string `gorm:"column:KEY"` + Value string `gorm:"column:VALUE"` +} + +func FillInstances(db *gorm.DB, m HostInfoMap) error { + var rows []clusterConfigModel + if err := db. + Table("INFORMATION_SCHEMA.CLUSTER_CONFIG"). + Where("(`TYPE` = 'tidb' AND `KEY` = 'log.file.filename') " + + "OR (`TYPE` = 'tikv' AND `KEY` = 'storage.data-dir') " + + "OR (`TYPE` = 'pd' AND `KEY` = 'data-dir')"). + Find(&rows).Error; err != nil { + return err + } + + for _, row := range rows { + hostname, _, err := host.ParseHostAndPortFromAddress(row.Instance) + if err != nil { + continue + } + if _, ok := m[hostname]; !ok { + m[hostname] = NewHostInfo(hostname) + } + m[hostname].Instances[row.Instance] = &InstanceInfo{ + Type: row.Type, + PartitionPathL: strings.ToLower(locateInstanceMountPartition(row.Value, m[hostname].Partitions)), + } + } + return nil +} + +// Try to discover which partition this instance is running on. +// If discover failed, empty string will be returned. +func locateInstanceMountPartition(directoryOrFilePath string, partitions map[string]*PartitionInfo) string { + if len(directoryOrFilePath) == 0 { + return "" + } + + maxMatchLen := 0 + maxMatchPath := "" + + directoryOrFilePathL := strings.ToLower(directoryOrFilePath) + + for _, info := range partitions { + // FIXME: This may cause wrong result in case sensitive FS. + if !strings.HasPrefix(directoryOrFilePathL, strings.ToLower(info.Path)) { + continue + } + if len(info.Path) > maxMatchLen { + maxMatchLen = len(info.Path) + maxMatchPath = info.Path + } + } + + return maxMatchPath +} diff --git a/pkg/apiserver/clusterinfo/hostinfo/cluster_hardware.go b/pkg/apiserver/clusterinfo/hostinfo/cluster_hardware.go new file mode 100644 index 0000000000..f240f87491 --- /dev/null +++ b/pkg/apiserver/clusterinfo/hostinfo/cluster_hardware.go @@ -0,0 +1,149 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package hostinfo + +import ( + "bytes" + "encoding/json" + "strings" + + "github.com/jinzhu/gorm" + + "github.com/pingcap-incubator/tidb-dashboard/pkg/utils/host" +) + +// Used to deserialize from JSON_VALUE +type clusterHardwareCpuInfoModel struct { + LogicalCores int `json:"cpu-logical-cores,string"` + PhysicalCores int `json:"cpu-physical-cores,string"` +} + +// Used to deserialize from JSON_VALUE +type clusterHardwareDiskModel struct { + Path string `json:"path"` + FSType string `json:"fstype"` + Free int `json:"free,string"` + Total int `json:"total,string"` +} + +func FillFromClusterHardwareTable(db *gorm.DB, m HostInfoMap) error { + var rows []clusterTableModel + + var sqlQuery bytes.Buffer + if err := clusterTableQueryTemplate.Execute(&sqlQuery, map[string]string{ + "tableName": "INFORMATION_SCHEMA.CLUSTER_HARDWARE", + }); err != nil { + panic(err) + } + + if err := db. + Raw(sqlQuery.String(), []string{"cpu", "disk"}). + Scan(&rows).Error; err != nil { + return err + } + + tiFlashDisks := make([]clusterTableModel, 0) + + for _, row := range rows { + hostname, _, err := host.ParseHostAndPortFromAddress(row.Instance) + if err != nil { + continue + } + if _, ok := m[hostname]; !ok { + m[hostname] = NewHostInfo(hostname) + } + + switch { + case row.DeviceType == "cpu" && row.DeviceName == "cpu": + if m[hostname].CPUInfo != nil { + continue + } + var v clusterHardwareCpuInfoModel + err := json.Unmarshal([]byte(row.JsonValue), &v) + if err != nil { + continue + } + m[hostname].CPUInfo = &CPUInfo{ + LogicalCores: v.LogicalCores, + PhysicalCores: v.PhysicalCores, + } + case row.DeviceType == "disk": + if row.Type == "tiflash" { + // Collect TiFlash related information for later processing. + tiFlashDisks = append(tiFlashDisks, row) + } + if m[hostname].PartitionProviderType != "" && m[hostname].PartitionProviderType != row.Type { + // Another instance on the same host has already provided disk information, skip. + continue + } + var v clusterHardwareDiskModel + err := json.Unmarshal([]byte(row.JsonValue), &v) + if err != nil { + continue + } + if m[hostname].PartitionProviderType == "" { + m[hostname].PartitionProviderType = row.Type + } + m[hostname].Partitions[strings.ToLower(v.Path)] = &PartitionInfo{ + Path: v.Path, + FSType: v.FSType, + Free: v.Free, + Total: v.Total, + } + } + } + + // ========================================================================================== + // HACK: TiFlash special logic + // For now, we can only infer TiFlash instances from its reported disk information. + // Due to a bug, TiFlash will return all disks that has the prefix of actual deployed disk. + type tiFlashDiskEntity struct { + maxLen int + maxLenPath string + } + tiFlashDiskInfo := make(map[string]tiFlashDiskEntity) // key is TiFlash instance address + for _, d := range tiFlashDisks { + var v clusterHardwareDiskModel + err := json.Unmarshal([]byte(d.JsonValue), &v) + if err != nil { + continue + } + // For each TiFlash instance, it may report multiple disks. We keep the disk that has longest path. + if _, ok := tiFlashDiskInfo[d.Instance]; !ok { + tiFlashDiskInfo[d.Instance] = tiFlashDiskEntity{ + maxLen: 0, + maxLenPath: "", + } + } + if len(v.Path) > tiFlashDiskInfo[d.Instance].maxLen { + tiFlashDiskInfo[d.Instance] = tiFlashDiskEntity{ + maxLen: len(v.Path), + maxLenPath: v.Path, + } + } + } + // Back fill TiFlash instances + for instance, de := range tiFlashDiskInfo { + hostname, _, err := host.ParseHostAndPortFromAddress(instance) + if err != nil { + panic(err) + } + m[hostname].Instances[instance] = &InstanceInfo{ + Type: "tiflash", + PartitionPathL: strings.ToLower(de.maxLenPath), + } + } + + return nil +} diff --git a/pkg/apiserver/clusterinfo/hostinfo/cluster_load.go b/pkg/apiserver/clusterinfo/hostinfo/cluster_load.go new file mode 100644 index 0000000000..39d1d5fe86 --- /dev/null +++ b/pkg/apiserver/clusterinfo/hostinfo/cluster_load.go @@ -0,0 +1,92 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package hostinfo + +import ( + "bytes" + "encoding/json" + + "github.com/jinzhu/gorm" + + "github.com/pingcap-incubator/tidb-dashboard/pkg/utils/host" +) + +// Used to deserialize from JSON_VALUE +type clusterLoadCpuUsageModel struct { + Idle float64 `json:"idle,string"` + System float64 `json:"system,string"` +} + +// Used to deserialize from JSON_VALUE +type clusterLoadMemoryVirtualModel struct { + Used int `json:"used,string"` + Total int `json:"total,string"` +} + +func FillFromClusterLoadTable(db *gorm.DB, m HostInfoMap) error { + var rows []clusterTableModel + + var sqlQuery bytes.Buffer + if err := clusterTableQueryTemplate.Execute(&sqlQuery, map[string]string{ + "tableName": "INFORMATION_SCHEMA.CLUSTER_LOAD", + }); err != nil { + panic(err) + } + + if err := db. + Raw(sqlQuery.String(), []string{"memory", "cpu"}). + Scan(&rows).Error; err != nil { + return err + } + + for _, row := range rows { + hostname, _, err := host.ParseHostAndPortFromAddress(row.Instance) + if err != nil { + continue + } + if _, ok := m[hostname]; !ok { + m[hostname] = NewHostInfo(hostname) + } + + switch { + case row.DeviceType == "memory" && row.DeviceName == "virtual": + if m[hostname].MemoryUsage != nil { + continue + } + var v clusterLoadMemoryVirtualModel + err := json.Unmarshal([]byte(row.JsonValue), &v) + if err != nil { + continue + } + m[hostname].MemoryUsage = &MemoryUsageInfo{ + Used: v.Used, + Total: v.Total, + } + case row.DeviceType == "cpu" && row.DeviceName == "usage": + if m[hostname].CPUUsage != nil { + continue + } + var v clusterLoadCpuUsageModel + err := json.Unmarshal([]byte(row.JsonValue), &v) + if err != nil { + continue + } + m[hostname].CPUUsage = &CPUUsageInfo{ + Idle: v.Idle, + System: v.System, + } + } + } + return nil +} diff --git a/pkg/apiserver/clusterinfo/hostinfo/hostinfo.go b/pkg/apiserver/clusterinfo/hostinfo/hostinfo.go new file mode 100644 index 0000000000..aa5cc5821a --- /dev/null +++ b/pkg/apiserver/clusterinfo/hostinfo/hostinfo.go @@ -0,0 +1,95 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package hostinfo + +import "text/template" + +type CPUUsageInfo struct { + Idle float64 `json:"idle"` + System float64 `json:"system"` +} + +type MemoryUsageInfo struct { + Used int `json:"used"` + Total int `json:"total"` +} + +type CPUInfo struct { + LogicalCores int `json:"logical_cores"` + PhysicalCores int `json:"physical_cores"` + // TODO: Support arch. +} + +type PartitionInfo struct { + Path string `json:"path"` + FSType string `json:"fstype"` + Free int `json:"free"` + Total int `json:"total"` +} + +type InstanceInfo struct { + Type string `json:"type"` + PartitionPathL string `json:"partition_path_lower"` +} + +type HostInfo struct { + Host string `json:"host"` + CPUInfo *CPUInfo `json:"cpu_info"` + CPUUsage *CPUUsageInfo `json:"cpu_usage"` + MemoryUsage *MemoryUsageInfo `json:"memory_usage"` + + // Containing unused partitions. The key is path in lower case. + // Note: deviceName is not used as the key, since TiDB and TiKV may return different deviceName for the same device. + Partitions map[string]*PartitionInfo `json:"partitions"` + // The source instance type that provides the partition info. + PartitionProviderType string `json:"-"` + + // Instances in the current host. The key is instance address + Instances map[string]*InstanceInfo `json:"instances"` +} + +type HostInfoMap = map[string]*HostInfo + +var clusterTableQueryTemplate = template.Must(template.New("").Parse(` +SELECT + *, + FIELD(LOWER(A.TYPE), 'tiflash', 'tikv', 'pd', 'tidb') AS _ORDER +FROM ( + SELECT + TYPE, INSTANCE, DEVICE_TYPE, DEVICE_NAME, JSON_OBJECTAGG(NAME, VALUE) AS JSON_VALUE + FROM + {{.tableName}} + WHERE + DEVICE_TYPE IN (?) + GROUP BY TYPE, INSTANCE, DEVICE_TYPE, DEVICE_NAME +) AS A +ORDER BY + _ORDER DESC, INSTANCE, DEVICE_TYPE, DEVICE_NAME +`)) + +type clusterTableModel struct { + Type string `gorm:"column:TYPE"` // Example: tidb, tikv + Instance string `gorm:"column:INSTANCE"` // Example: 127.0.0.1:4000 + DeviceType string `gorm:"column:DEVICE_TYPE"` // Example: cpu + DeviceName string `gorm:"column:DEVICE_NAME"` // Example: usage + JsonValue string `gorm:"column:JSON_VALUE"` // Only exists by using `clusterTableQueryTemplate`. +} + +func NewHostInfo(hostname string) *HostInfo { + return &HostInfo{ + Host: hostname, + Partitions: make(map[string]*PartitionInfo), + Instances: make(map[string]*InstanceInfo), + } +} diff --git a/pkg/apiserver/clusterinfo/service.go b/pkg/apiserver/clusterinfo/service.go index 6b9eb70591..f74fe5d55a 100644 --- a/pkg/apiserver/clusterinfo/service.go +++ b/pkg/apiserver/clusterinfo/service.go @@ -20,7 +20,6 @@ import ( "context" "fmt" "net/http" - "sort" "sync" "time" @@ -28,6 +27,7 @@ import ( "go.etcd.io/etcd/clientv3" "go.uber.org/fx" + "github.com/pingcap-incubator/tidb-dashboard/pkg/apiserver/clusterinfo/hostinfo" "github.com/pingcap-incubator/tidb-dashboard/pkg/apiserver/user" "github.com/pingcap-incubator/tidb-dashboard/pkg/apiserver/utils" "github.com/pingcap-incubator/tidb-dashboard/pkg/httpc" @@ -77,6 +77,7 @@ func Register(r *gin.RouterGroup, auth *user.AuthService, s *Service) { endpoint.Use(auth.MWAuthRequired()) endpoint.Use(utils.MWConnectTiDB(s.params.TiDBClient)) endpoint.GET("/all", s.getHostsInfo) + endpoint.GET("/statistics", s.getStatistics) } // @Summary Hide a TiDB instance @@ -233,79 +234,49 @@ func (s *Service) getAlertManagerCounts(c *gin.Context) { c.JSON(http.StatusOK, cnt) } -// @ID getHostsInfo +type GetHostsInfoResponse struct { + Hosts []*hostinfo.HostInfo `json:"hosts"` + Warning *utils.APIError `json:"warning"` +} + +// @ID clusterInfoGetHostsInfo // @Summary Get information of all hosts -// @Description Get information about host in the cluster -// @Success 200 {array} HostInfo // @Router /host/all [get] // @Security JwtAuth +// @Success 200 {object} GetHostsInfoResponse // @Failure 401 {object} utils.APIError "Unauthorized failure" func (s *Service) getHostsInfo(c *gin.Context) { db := utils.GetTiDBConnection(c) - allHostsMap, err := s.fetchAllInstanceHostsMap() - if err != nil { - _ = c.Error(err) - return - } - hostsInfo, err := GetAllHostInfo(db) - if err != nil { + info, err := s.fetchAllHostsInfo(db) + if err != nil && info == nil { _ = c.Error(err) return } - hostsInfoMap := make(map[string]HostInfo) - for _, hi := range hostsInfo { - hostsInfoMap[hi.IP] = hi - } - - hiList := make([]HostInfo, 0, len(hostsInfo)) - for hostIP := range allHostsMap { - if hi, ok := hostsInfoMap[hostIP]; ok { - hiList = append(hiList, hi) - } else { - hiList = append(hiList, HostInfo{ - IP: hostIP, - Unavailable: true, - }) - } + var warning *utils.APIError + if err != nil { + warning = utils.NewAPIError(err) } - sort.Slice(hiList, func(i, j int) bool { - return hiList[i].IP < hiList[j].IP + c.JSON(http.StatusOK, GetHostsInfoResponse{ + Hosts: info, + Warning: warning, }) - - c.JSON(http.StatusOK, hiList) } -func (s *Service) fetchAllInstanceHostsMap() (map[string]struct{}, error) { - allHosts := make(map[string]struct{}) - pdInfo, err := topology.FetchPDTopology(s.params.PDClient) - if err != nil { - return nil, err - } - for _, i := range pdInfo { - allHosts[i.IP] = struct{}{} - } - - tikvInfo, tiFlashInfo, err := topology.FetchStoreTopology(s.params.PDClient) - if err != nil { - return nil, err - } - for _, i := range tikvInfo { - allHosts[i.IP] = struct{}{} - } - for _, i := range tiFlashInfo { - allHosts[i.IP] = struct{}{} - } - - tidbInfo, err := topology.FetchTiDBTopology(s.lifecycleCtx, s.params.EtcdClient) +// @ID clusterInfoGetStatistics +// @Summary Get cluster statistics +// @Router /host/statistics [get] +// @Security JwtAuth +// @Success 200 {object} ClusterStatistics +// @Failure 401 {object} utils.APIError "Unauthorized failure" +func (s *Service) getStatistics(c *gin.Context) { + db := utils.GetTiDBConnection(c) + stats, err := s.calculateStatistics(db) if err != nil { - return nil, err - } - for _, i := range tidbInfo { - allHosts[i.IP] = struct{}{} + _ = c.Error(err) + return } - - return allHosts, nil + c.JSON(http.StatusOK, stats) } diff --git a/pkg/apiserver/clusterinfo/statistics.go b/pkg/apiserver/clusterinfo/statistics.go new file mode 100644 index 0000000000..ceb6e2293c --- /dev/null +++ b/pkg/apiserver/clusterinfo/statistics.go @@ -0,0 +1,191 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package clusterinfo + +import ( + "fmt" + "sort" + + "github.com/jinzhu/gorm" + "github.com/thoas/go-funk" + + "github.com/pingcap-incubator/tidb-dashboard/pkg/apiserver/clusterinfo/hostinfo" + "github.com/pingcap-incubator/tidb-dashboard/pkg/utils/topology" +) + +type ClusterStatisticsPartial struct { + NumberOfHosts int `json:"number_of_hosts"` + NumberOfInstances int `json:"number_of_instances"` + TotalMemoryCapacityBytes int `json:"total_memory_capacity_bytes"` + TotalPhysicalCores int `json:"total_physical_cores"` + TotalLogicalCores int `json:"total_logical_cores"` +} + +type ClusterStatistics struct { + ProbeFailureHosts int `json:"probe_failure_hosts"` + Versions []string `json:"versions"` + TotalStats *ClusterStatisticsPartial `json:"total_stats"` + StatsByInstanceKind map[string]*ClusterStatisticsPartial `json:"stats_by_instance_kind"` +} + +type instanceKindHostImmediateInfo struct { + memoryCapacity int + physicalCores int + logicalCores int +} + +type instanceKindImmediateInfo struct { + instances map[string]struct{} + hosts map[string]*instanceKindHostImmediateInfo +} + +func newInstanceKindImmediateInfo() *instanceKindImmediateInfo { + return &instanceKindImmediateInfo{ + instances: make(map[string]struct{}), + hosts: make(map[string]*instanceKindHostImmediateInfo), + } +} + +func sumInt(array []int) int { + result := 0 + for _, v := range array { + result += v + } + return result +} + +func (info *instanceKindImmediateInfo) ToResult() *ClusterStatisticsPartial { + return &ClusterStatisticsPartial{ + NumberOfHosts: len(funk.Keys(info.hosts).([]string)), + NumberOfInstances: len(funk.Keys(info.instances).([]string)), + TotalMemoryCapacityBytes: sumInt(funk.Map(funk.Values(info.hosts), func(x *instanceKindHostImmediateInfo) int { return x.memoryCapacity }).([]int)), + TotalPhysicalCores: sumInt(funk.Map(funk.Values(info.hosts), func(x *instanceKindHostImmediateInfo) int { return x.physicalCores }).([]int)), + TotalLogicalCores: sumInt(funk.Map(funk.Values(info.hosts), func(x *instanceKindHostImmediateInfo) int { return x.logicalCores }).([]int)), + } +} + +func (s *Service) calculateStatistics(db *gorm.DB) (*ClusterStatistics, error) { + globalHostsSet := make(map[string]struct{}) + globalFailureHostsSet := make(map[string]struct{}) + globalVersionsSet := make(map[string]struct{}) + globalInfo := newInstanceKindImmediateInfo() + infoByIk := make(map[string]*instanceKindImmediateInfo) + infoByIk["pd"] = newInstanceKindImmediateInfo() + infoByIk["tidb"] = newInstanceKindImmediateInfo() + infoByIk["tikv"] = newInstanceKindImmediateInfo() + infoByIk["tiflash"] = newInstanceKindImmediateInfo() + + // Fill from topology info + pdInfo, err := topology.FetchPDTopology(s.params.PDClient) + if err != nil { + return nil, err + } + for _, i := range pdInfo { + globalHostsSet[i.IP] = struct{}{} + globalVersionsSet[i.Version] = struct{}{} + globalInfo.instances[fmt.Sprintf("%s:%d", i.IP, i.Port)] = struct{}{} + infoByIk["pd"].instances[fmt.Sprintf("%s:%d", i.IP, i.Port)] = struct{}{} + } + tikvInfo, tiFlashInfo, err := topology.FetchStoreTopology(s.params.PDClient) + if err != nil { + return nil, err + } + for _, i := range tikvInfo { + globalHostsSet[i.IP] = struct{}{} + globalVersionsSet[i.Version] = struct{}{} + globalInfo.instances[fmt.Sprintf("%s:%d", i.IP, i.Port)] = struct{}{} + infoByIk["tikv"].instances[fmt.Sprintf("%s:%d", i.IP, i.Port)] = struct{}{} + } + for _, i := range tiFlashInfo { + globalHostsSet[i.IP] = struct{}{} + globalVersionsSet[i.Version] = struct{}{} + globalInfo.instances[fmt.Sprintf("%s:%d", i.IP, i.Port)] = struct{}{} + infoByIk["tiflash"].instances[fmt.Sprintf("%s:%d", i.IP, i.Port)] = struct{}{} + } + tidbInfo, err := topology.FetchTiDBTopology(s.lifecycleCtx, s.params.EtcdClient) + if err != nil { + return nil, err + } + for _, i := range tidbInfo { + globalHostsSet[i.IP] = struct{}{} + globalVersionsSet[i.Version] = struct{}{} + globalInfo.instances[fmt.Sprintf("%s:%d", i.IP, i.Port)] = struct{}{} + infoByIk["tidb"].instances[fmt.Sprintf("%s:%d", i.IP, i.Port)] = struct{}{} + } + + // Fill from hardware info + allHostsInfoMap := make(map[string]*hostinfo.HostInfo) + if e := hostinfo.FillFromClusterLoadTable(db, allHostsInfoMap); e != nil { + return nil, err + } + if e := hostinfo.FillFromClusterHardwareTable(db, allHostsInfoMap); e != nil { + return nil, err + } + for host, hi := range allHostsInfoMap { + if hi.MemoryUsage.Total > 0 && hi.CPUInfo.PhysicalCores > 0 && hi.CPUInfo.LogicalCores > 0 { + // Put success host info into `globalInfo.hosts`. + globalInfo.hosts[host] = &instanceKindHostImmediateInfo{ + memoryCapacity: hi.MemoryUsage.Total, + physicalCores: hi.CPUInfo.PhysicalCores, + logicalCores: hi.CPUInfo.LogicalCores, + } + } + } + + // Fill hosts in each instance kind according to the global hosts info + for _, i := range pdInfo { + if v, ok := globalInfo.hosts[i.IP]; ok { + infoByIk["pd"].hosts[i.IP] = v + } else { + globalFailureHostsSet[i.IP] = struct{}{} + } + } + for _, i := range tikvInfo { + if v, ok := globalInfo.hosts[i.IP]; ok { + infoByIk["tikv"].hosts[i.IP] = v + } else { + globalFailureHostsSet[i.IP] = struct{}{} + } + } + for _, i := range tiFlashInfo { + if v, ok := globalInfo.hosts[i.IP]; ok { + infoByIk["tiflash"].hosts[i.IP] = v + } else { + globalFailureHostsSet[i.IP] = struct{}{} + } + } + for _, i := range tidbInfo { + if v, ok := globalInfo.hosts[i.IP]; ok { + infoByIk["tidb"].hosts[i.IP] = v + } else { + globalFailureHostsSet[i.IP] = struct{}{} + } + } + + // Generate result.. + versions := funk.Keys(globalVersionsSet).([]string) + sort.Strings(versions) + + statsByIk := make(map[string]*ClusterStatisticsPartial) + for ik, info := range infoByIk { + statsByIk[ik] = info.ToResult() + } + + return &ClusterStatistics{ + ProbeFailureHosts: len(funk.Keys(globalFailureHostsSet).([]string)), + Versions: versions, + TotalStats: globalInfo.ToResult(), + StatsByInstanceKind: statsByIk, + }, nil +} diff --git a/pkg/utils/host/host.go b/pkg/utils/host/host.go new file mode 100644 index 0000000000..8d4ab16e4a --- /dev/null +++ b/pkg/utils/host/host.go @@ -0,0 +1,49 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package host + +import ( + "fmt" + "net" + "net/url" + "strconv" + "strings" +) + +// address should be like "ip:port" as "127.0.0.1:2379". +// return error if string is not like "ip:port". +func ParseHostAndPortFromAddress(address string) (string, uint, error) { + host, port, err := net.SplitHostPort(address) + if err != nil { + return "", 0, fmt.Errorf("invalid address: %v", err) + } + portNumeric, err := strconv.Atoi(port) + if err != nil || portNumeric == 0 { + return "", 0, fmt.Errorf("invalid address: invalid port") + } + return strings.ToLower(host), uint(portNumeric), nil +} + +// address should be like "protocol://ip:port" as "http://127.0.0.1:2379". +func ParseHostAndPortFromAddressURL(urlString string) (string, uint, error) { + u, err := url.Parse(urlString) + if err != nil { + return "", 0, fmt.Errorf("invalid address: %v", err) + } + port, err := strconv.Atoi(u.Port()) + if err != nil || port == 0 { + return "", 0, fmt.Errorf("invalid address: invalid port") + } + return strings.ToLower(u.Hostname()), uint(port), nil +} diff --git a/pkg/utils/topology/pd.go b/pkg/utils/topology/pd.go index eadf6e8777..94a38abb6c 100644 --- a/pkg/utils/topology/pd.go +++ b/pkg/utils/topology/pd.go @@ -22,6 +22,7 @@ import ( "go.uber.org/zap" "github.com/pingcap-incubator/tidb-dashboard/pkg/pd" + "github.com/pingcap-incubator/tidb-dashboard/pkg/utils/host" ) func FetchPDTopology(pdClient *pd.Client) ([]PDInfo, error) { @@ -53,7 +54,7 @@ func FetchPDTopology(pdClient *pd.Client) ([]PDInfo, error) { for _, ds := range ds.Members { u := ds.ClientUrls[0] - host, port, err := parseHostAndPortFromAddressURL(u) + hostname, port, err := host.ParseHostAndPortFromAddressURL(u) if err != nil { continue } @@ -74,7 +75,7 @@ func FetchPDTopology(pdClient *pd.Client) ([]PDInfo, error) { nodes = append(nodes, PDInfo{ GitHash: ds.GitHash, Version: ds.BinaryVersion, - IP: host, + IP: hostname, Port: port, DeployPath: ds.DeployPath, Status: storeStatus, diff --git a/pkg/utils/topology/store.go b/pkg/utils/topology/store.go index 1750fe3a63..9d700736f4 100644 --- a/pkg/utils/topology/store.go +++ b/pkg/utils/topology/store.go @@ -22,6 +22,7 @@ import ( "go.uber.org/zap" "github.com/pingcap-incubator/tidb-dashboard/pkg/pd" + "github.com/pingcap-incubator/tidb-dashboard/pkg/utils/host" ) // FetchStoreTopology returns TiKV info and TiFlash info. @@ -84,12 +85,12 @@ func FetchStoreLocation(pdClient *pd.Client) (*StoreLocation, error) { func buildStoreTopology(stores []store) []StoreInfo { nodes := make([]StoreInfo, 0, len(stores)) for _, v := range stores { - host, port, err := parseHostAndPortFromAddress(v.Address) + hostname, port, err := host.ParseHostAndPortFromAddress(v.Address) if err != nil { log.Warn("Failed to parse store address", zap.Any("store", v)) continue } - _, statusPort, err := parseHostAndPortFromAddress(v.StatusAddress) + _, statusPort, err := host.ParseHostAndPortFromAddress(v.StatusAddress) if err != nil { log.Warn("Failed to parse store status address", zap.Any("store", v)) continue @@ -102,7 +103,7 @@ func buildStoreTopology(stores []store) []StoreInfo { } node := StoreInfo{ Version: version, - IP: host, + IP: hostname, Port: port, GitHash: v.GitHash, DeployPath: v.DeployPath, diff --git a/pkg/utils/topology/tidb.go b/pkg/utils/topology/tidb.go index ed8f8fc8d8..ebab743d8d 100644 --- a/pkg/utils/topology/tidb.go +++ b/pkg/utils/topology/tidb.go @@ -24,6 +24,8 @@ import ( "github.com/pingcap/log" "go.etcd.io/etcd/clientv3" "go.uber.org/zap" + + "github.com/pingcap-incubator/tidb-dashboard/pkg/utils/host" ) const tidbTopologyKeyPrefix = "/topology/tidb/" @@ -114,7 +116,7 @@ func parseTiDBInfo(address string, value []byte) (*TiDBInfo, error) { if err != nil { return nil, ErrInvalidTopologyData.Wrap(err, "TiDB info unmarshal failed") } - host, port, err := parseHostAndPortFromAddress(address) + hostname, port, err := host.ParseHostAndPortFromAddress(address) if err != nil { return nil, ErrInvalidTopologyData.Wrap(err, "TiDB info address parse failed") } @@ -122,7 +124,7 @@ func parseTiDBInfo(address string, value []byte) (*TiDBInfo, error) { return &TiDBInfo{ GitHash: ds.GitHash, Version: ds.Version, - IP: host, + IP: hostname, Port: port, DeployPath: ds.DeployPath, Status: ComponentStatusUnreachable, diff --git a/pkg/utils/topology/topology.go b/pkg/utils/topology/topology.go index d3b904750a..60cee5669f 100644 --- a/pkg/utils/topology/topology.go +++ b/pkg/utils/topology/topology.go @@ -16,10 +16,6 @@ package topology import ( "context" "encoding/json" - "fmt" - "net/url" - "strconv" - "strings" "time" "github.com/joomcode/errorx" @@ -36,33 +32,6 @@ var ( const defaultFetchTimeout = 2 * time.Second -// address should be like "ip:port" as "127.0.0.1:2379". -// return error if string is not like "ip:port". -func parseHostAndPortFromAddress(address string) (string, uint, error) { - addresses := strings.Split(address, ":") - if len(addresses) != 2 { - return "", 0, fmt.Errorf("invalid address %s", address) - } - port, err := strconv.Atoi(addresses[1]) - if err != nil { - return "", 0, err - } - return addresses[0], uint(port), nil -} - -// address should be like "protocol://ip:port" as "http://127.0.0.1:2379". -func parseHostAndPortFromAddressURL(urlString string) (string, uint, error) { - u, err := url.Parse(urlString) - if err != nil { - return "", 0, err - } - port, err := strconv.Atoi(u.Port()) - if err != nil { - return "", 0, err - } - return u.Hostname(), uint(port), nil -} - func fetchStandardComponentTopology(ctx context.Context, componentName string, etcdClient *clientv3.Client) (*StandardComponentInfo, error) { ctx2, cancel := context.WithTimeout(ctx, defaultFetchTimeout) defer cancel() diff --git a/ui/lib/apps/ClusterInfo/components/DiskTable.tsx b/ui/lib/apps/ClusterInfo/components/DiskTable.tsx new file mode 100644 index 0000000000..9c66db1f96 --- /dev/null +++ b/ui/lib/apps/ClusterInfo/components/DiskTable.tsx @@ -0,0 +1,212 @@ +import { Tooltip, Typography } from 'antd' +import React, { useMemo } from 'react' +import { useTranslation } from 'react-i18next' +import { getValueFormat } from '@baurine/grafana-value-formats' +import client, { HostinfoHostInfo, HostinfoPartitionInfo } from '@lib/client' +import { Bar, CardTable } from '@lib/components' +import { useClientRequest } from '@lib/utils/useClientRequest' +import { IColumn } from 'office-ui-fabric-react/lib/DetailsList' +import { + InstanceKind, + InstanceKinds, + InstanceKindName, +} from '@lib/utils/instanceTable' +import { WarningOutlined } from '@ant-design/icons' + +interface IExpandedDiskItem extends HostinfoPartitionInfo { + key: string + host?: string + instancesCount: Record +} + +function expandDisksItems(rows: HostinfoHostInfo[]): IExpandedDiskItem[] { + const expanded: IExpandedDiskItem[] = [] + rows.forEach((row) => { + const instancesPerPartition: Record< + string, + Record + > = {} + + let partitions = 0 + + Object.values(row.instances ?? {}).forEach((i) => { + if (!i) { + return + } + if (!instancesPerPartition[i.partition_path_lower!]) { + instancesPerPartition[i.partition_path_lower!] = { + pd: 0, + tidb: 0, + tikv: 0, + tiflash: 0, + } + } + instancesPerPartition[i.partition_path_lower!][i.type!]++ + }) + + for (let pathL in row.partitions) { + const instancesCount = instancesPerPartition[pathL] + if (!instancesCount) { + // This partition does not have deployed instances, skip + continue + } + const partition = row.partitions[pathL] + expanded.push({ + key: `${row.host} ${pathL}`, + host: row.host, + instancesCount, + ...partition, + }) + partitions++ + } + + if (partitions === 0) { + // Supply dummy item.. + expanded.push({ + key: row.host ?? '', + host: row.host, + instancesCount: { + pd: 0, + tidb: 0, + tikv: 0, + tiflash: 0, + }, + }) + } + }) + return expanded +} + +export default function HostTable() { + const { t } = useTranslation() + + const { data, isLoading, error } = useClientRequest((reqConfig) => + client.getInstance().clusterInfoGetHostsInfo(reqConfig) + ) + + const diskData = useMemo(() => expandDisksItems(data?.hosts ?? []), [data]) + + const columns: IColumn[] = useMemo( + () => [ + { + name: t('cluster_info.list.disk_table.columns.host'), + key: 'host', + minWidth: 100, + maxWidth: 150, + onRender: (row: IExpandedDiskItem) => { + if (!row.free) { + return ( + + + {row.host} + + + ) + } + return ( + + {row.host} + + ) + }, + }, + { + name: t('cluster_info.list.disk_table.columns.mount_dir'), + key: 'mount_dir', + minWidth: 150, + maxWidth: 200, + onRender: (row: IExpandedDiskItem) => { + if (!row.path) { + return + } + return ( + + {row.path} + + ) + }, + }, + { + name: t('cluster_info.list.disk_table.columns.fs'), + key: 'fs', + minWidth: 50, + maxWidth: 100, + onRender: (row: IExpandedDiskItem) => { + return row.fstype?.toUpperCase() ?? '' + }, + }, + { + name: t('cluster_info.list.disk_table.columns.disk_size'), + key: 'disk_size', + minWidth: 60, + maxWidth: 100, + onRender: (row: IExpandedDiskItem) => { + if (!row.total) { + return + } + return getValueFormat('bytes')(row.total, 1) + }, + }, + { + name: t('cluster_info.list.disk_table.columns.disk_usage'), + key: 'disk_usage', + minWidth: 100, + maxWidth: 150, + onRender: (row: IExpandedDiskItem) => { + if (!row.total || !row.free) { + return + } + const total = row.total + const free = row.free + const used = total - free + const usedPercent = (used / total).toFixed(3) + const tooltipContent = ( + + Used: {getValueFormat('bytes')(used, 1)} ( + {getValueFormat('percentunit')(+usedPercent, 1)}) + + ) + return ( + + + + ) + }, + }, + { + name: t('cluster_info.list.disk_table.columns.instances'), + key: 'instances', + minWidth: 100, + maxWidth: 200, + onRender: (row: IExpandedDiskItem) => { + const item = InstanceKinds.map((ik) => { + if (row.instancesCount[ik] > 0) { + return `${row.instancesCount[ik]} ${InstanceKindName[ik]}` + } else { + return '' + } + }) + const content = item.filter((v) => v.length > 0).join(', ') + return ( + + {content} + + ) + }, + }, + ], + [t] + ) + + return ( + + ) +} diff --git a/ui/lib/apps/ClusterInfo/components/HostTable.tsx b/ui/lib/apps/ClusterInfo/components/HostTable.tsx index 634dbd996a..af4fd8f9d3 100644 --- a/ui/lib/apps/ClusterInfo/components/HostTable.tsx +++ b/ui/lib/apps/ClusterInfo/components/HostTable.tsx @@ -1,212 +1,200 @@ import { Tooltip, Typography } from 'antd' -import React from 'react' +import React, { useMemo } from 'react' import { useTranslation } from 'react-i18next' import { red } from '@ant-design/colors' -import { WarningOutlined } from '@ant-design/icons' import { getValueFormat } from '@baurine/grafana-value-formats' - -import client from '@lib/client' +import client, { HostinfoHostInfo } from '@lib/client' import { Bar, CardTable, Pre } from '@lib/components' import { useClientRequest } from '@lib/utils/useClientRequest' +import { IColumn } from 'office-ui-fabric-react/lib/DetailsList' +import { + InstanceKind, + InstanceKinds, + InstanceKindName, +} from '@lib/utils/instanceTable' +import { WarningOutlined } from '@ant-design/icons' -const { Text } = Typography +interface IExpandedHostItem extends HostinfoHostInfo { + key: string + instancesCount: Record +} -function filterUniquePartitions(items) { - return items.filter( - (x, i, a) => a.findIndex((y) => y.partition.path === x.partition.path) === i - ) +function expandHostItems(rows: HostinfoHostInfo[]): IExpandedHostItem[] { + const expanded: IExpandedHostItem[] = [] + rows.forEach((row) => { + const instancesCount: Record = { + pd: 0, + tidb: 0, + tikv: 0, + tiflash: 0, + } + + Object.values(row.instances ?? {}).forEach((i) => { + if (!i) { + return + } + instancesCount[i.type!]++ + }) + + expanded.push({ + key: row.host ?? '', + instancesCount, + ...row, + }) + }) + return expanded } export default function HostTable() { const { t } = useTranslation() - const { data: tableData, isLoading, error } = useClientRequest((reqConfig) => - client.getInstance().getHostsInfo(reqConfig) + const { data, isLoading, error } = useClientRequest((reqConfig) => + client.getInstance().clusterInfoGetHostsInfo(reqConfig) ) - const columns = [ - { - name: t('cluster_info.list.host_table.columns.ip'), - key: 'ip', - minWidth: 100, - maxWidth: 150, - onRender: ({ ip, unavailable }) => { - if (unavailable) { + const hostData = useMemo(() => expandHostItems(data?.hosts ?? []), [data]) + + const columns: IColumn[] = useMemo( + () => [ + { + name: t('cluster_info.list.host_table.columns.host'), + key: 'host', + minWidth: 100, + maxWidth: 150, + onRender: (row: IExpandedHostItem) => { + if (!row.cpu_info) { + // We assume that CPU info must be successfully retrieved. + return ( + + + {row.host} + + + ) + } return ( - - - {ip} - + + {row.host} ) - } - return ip + }, }, - }, - { - name: t('cluster_info.list.host_table.columns.cpu'), - key: 'cpu_core', - minWidth: 60, - maxWidth: 100, - onRender: ({ cpu_core }) => - cpu_core !== undefined ? `${cpu_core} vCPU` : '', - }, - { - name: t('cluster_info.list.host_table.columns.cpu_usage'), - key: 'cpu_usage', - minWidth: 100, - maxWidth: 150, - onRender: ({ cpu_usage }) => { - if (cpu_usage === undefined) { - return - } - const { system, idle } = cpu_usage - const user = 1 - system - idle - const tooltipContent = ` -User: ${getValueFormat('percentunit')(user)} -System: ${getValueFormat('percentunit')(system)}` - return ( - {tooltipContent.trim()}}> - - - ) - }, - }, - { - name: t('cluster_info.list.host_table.columns.memory'), - key: 'memory', - minWidth: 60, - maxWidth: 100, - onRender: ({ memory }) => - memory !== undefined ? getValueFormat('bytes')(memory.total, 1) : '', - }, - { - name: t('cluster_info.list.host_table.columns.memory_usage'), - key: 'memory_usage', - minWidth: 100, - maxWidth: 150, - onRender: ({ memory }) => { - if (memory === undefined) { - return - } - const { total, used } = memory - const usedPercent = (used / total).toFixed(3) - const title = ( -
- Used: {getValueFormat('bytes')(used, 1)} ( - {getValueFormat('percentunit')(+usedPercent, 1)}) -
- ) - return ( - - - - ) - }, - }, - { - name: t('cluster_info.list.host_table.columns.deploy'), - key: 'deploy', - minWidth: 100, - maxWidth: 200, - onRender: ({ partitions }) => { - if (partitions === undefined || partitions.length === 0) { - return - } - const serverTotal = { - tidb: 0, - tikv: 0, - pd: 0, - tiflash: 0, - } - return filterUniquePartitions(partitions).map((partition, i) => { - const currentMountPoint = partition.partition.path - partitions.forEach((item) => { - if (item.partition.path !== currentMountPoint) { - return - } - serverTotal[item.instance.server_type]++ - }) - const serverInfos: string[] = [] - if (serverTotal.tidb > 0) { - serverInfos.push(`${serverTotal.tidb} TiDB`) - } - if (serverTotal.tikv > 0) { - serverInfos.push(`${serverTotal.tikv} TiKV`) - } - if (serverTotal.pd > 0) { - serverInfos.push(`${serverTotal.pd} PD`) + { + name: t('cluster_info.list.host_table.columns.cpu'), + key: 'cpu', + minWidth: 100, + maxWidth: 150, + onRender: (row: IExpandedHostItem) => { + const { cpu_info: c } = row + if (!c) { + return } - if (serverTotal.tiflash > 0) { - serverInfos.push(`${serverTotal.tiflash} TiFlash`) - } - const content = `${serverInfos.join( - ',' - )}: ${partition.partition.fstype.toUpperCase()} ${currentMountPoint}` + const tooltipContent = ` +Physical Cores: ${c.physical_cores} +Logical Cores: ${c.logical_cores}` return ( - -
{content}
+ {tooltipContent.trim()}}> + {`${c.physical_cores!} (${c.logical_cores!} vCore)`} ) - }) + }, }, - }, - { - name: t('cluster_info.list.host_table.columns.disk_size'), - key: 'disk_size', - minWidth: 80, - maxWidth: 100, - onRender: ({ partitions }) => { - if (partitions === undefined || partitions.length === 0) { - return - } - return filterUniquePartitions(partitions).map((partiton, i) => { + { + name: t('cluster_info.list.host_table.columns.cpu_usage'), + key: 'cpu_usage', + minWidth: 100, + maxWidth: 150, + onRender: (row: IExpandedHostItem) => { + if (!row.cpu_usage) { + return + } + const system = row.cpu_usage.system ?? 0 + const idle = row.cpu_usage.idle ?? 1 + const user = 1 - system - idle + const tooltipContent = ` +User: ${getValueFormat('percentunit')(user)} +System: ${getValueFormat('percentunit')(system)}` return ( -
- {getValueFormat('bytes')(partiton.partition.total, 1)} -
+ {tooltipContent.trim()}}> + + ) - }) + }, + }, + { + name: t('cluster_info.list.host_table.columns.memory'), + key: 'memory', + minWidth: 60, + maxWidth: 100, + onRender: (row: IExpandedHostItem) => { + if (!row.memory_usage) { + return + } + return getValueFormat('bytes')(row.memory_usage.total ?? 0, 1) + }, }, - }, - { - name: t('cluster_info.list.host_table.columns.disk_usage'), - key: 'disk_usage', - minWidth: 100, - maxWidth: 150, - onRender: ({ partitions }) => { - if (partitions === undefined || partitions.length === 0) { - return - } - return filterUniquePartitions(partitions).map((partiton, i) => { - const { total, free } = partiton.partition - const used = total - free - const usedPercent = (used / total).toFixed(3) + { + name: t('cluster_info.list.host_table.columns.memory_usage'), + key: 'memory_usage', + minWidth: 100, + maxWidth: 150, + onRender: (row: IExpandedHostItem) => { + if (!row.memory_usage) { + return + } + const { total, used } = row.memory_usage + const usedPercent = (used! / total!).toFixed(3) const title = (
- Used: {getValueFormat('bytes')(used, 1)} ( + Used: {getValueFormat('bytes')(used!, 1)} ( {getValueFormat('percentunit')(+usedPercent, 1)})
) return ( - - + + + + ) + }, + }, + { + name: t('cluster_info.list.host_table.columns.instances'), + key: 'instances', + minWidth: 100, + maxWidth: 200, + onRender: (row: IExpandedHostItem) => { + const item = InstanceKinds.map((ik) => { + if (row.instancesCount[ik] > 0) { + return `${row.instancesCount[ik]} ${InstanceKindName[ik]}` + } else { + return '' + } + }) + const content = item.filter((v) => v.length > 0).join(', ') + return ( + + {content} ) - }) + }, }, - }, - ] + ], + [t] + ) return ( ) } diff --git a/ui/lib/apps/ClusterInfo/components/Statistics.module.less b/ui/lib/apps/ClusterInfo/components/Statistics.module.less new file mode 100644 index 0000000000..6372349792 --- /dev/null +++ b/ui/lib/apps/ClusterInfo/components/Statistics.module.less @@ -0,0 +1,8 @@ +@import '~antd/es/style/themes/default.less'; + +// FIXME: We should not provide padding for CardTab content, so that user +// can control whether a padding is needed. For example, to a . +.content { + margin-left: -@padding-page; + margin-right: -@padding-page; +} diff --git a/ui/lib/apps/ClusterInfo/components/Statistics.tsx b/ui/lib/apps/ClusterInfo/components/Statistics.tsx new file mode 100644 index 0000000000..0d8c99693a --- /dev/null +++ b/ui/lib/apps/ClusterInfo/components/Statistics.tsx @@ -0,0 +1,102 @@ +import React from 'react' +import { useClientRequest } from '@lib/utils/useClientRequest' +import client, { ClusterinfoClusterStatisticsPartial } from '@lib/client' +import { AnimatedSkeleton, ErrorBar, Descriptions, Card } from '@lib/components' +import { useTranslation } from 'react-i18next' +import { getValueFormat } from '@baurine/grafana-value-formats' +import { Alert } from 'antd' + +import styles from './Statistics.module.less' +import { InstanceKinds, InstanceKindName } from '@lib/utils/instanceTable' + +function PartialInfo({ data }: { data?: ClusterinfoClusterStatisticsPartial }) { + const { t } = useTranslation() + return ( + + + {data?.number_of_instances ?? 'Unknown'} + + + {data?.number_of_hosts ?? 'Unknown'} + + + {getValueFormat('bytes')(data?.total_memory_capacity_bytes ?? 0, 1)} + + + {data?.total_physical_cores ?? 'Unknown'} + + + {data?.total_logical_cores ?? 'Unknown'} + + + ) +} + +export default function Statistics() { + const { data, isLoading, error } = useClientRequest((reqConfig) => + client.getInstance().clusterInfoGetStatistics(reqConfig) + ) + const { t } = useTranslation() + + return ( + + {error && } + {data && ( +
+ {(data.probe_failure_hosts ?? 0) > 0 && ( + + + + )} + + + + {(data.versions ?? []).join(', ')} + + + + + + + + {InstanceKinds.map((ik) => { + return ( + + + + ) + })} +
+ )} +
+ ) +} diff --git a/ui/lib/apps/ClusterInfo/components/StoreLocationTree/index.tsx b/ui/lib/apps/ClusterInfo/components/StoreLocationTree/index.tsx index c4d80205a2..61b949b337 100644 --- a/ui/lib/apps/ClusterInfo/components/StoreLocationTree/index.tsx +++ b/ui/lib/apps/ClusterInfo/components/StoreLocationTree/index.tsx @@ -7,6 +7,7 @@ import { QuestionCircleOutlined, } from '@ant-design/icons' import { Space, Tooltip } from 'antd' +import { cyan } from '@ant-design/colors' import { useTranslation } from 'react-i18next' export interface IStoreLocationProps { @@ -67,8 +68,7 @@ export default function StoreLocationTree({ const gLink = bound .append('g') .attr('fill', 'none') - .attr('stroke', '#555') - .attr('stroke-opacity', 0.4) + .attr('stroke', cyan[3]) .attr('stroke-width', 2) const gNode = bound .append('g') @@ -160,14 +160,15 @@ export default function StoreLocationTree({ nodeEnter .append('circle') - .attr('r', 6) - .attr('fill', (d: any) => (d._children ? '#ff4d4f' : '#3351ff')) - .attr('stroke-width', 10) + .attr('r', 8) + .attr('fill', '#fff') + .attr('stroke', (d: any) => (d._children ? cyan[5] : '#ddd')) + .attr('stroke-width', 3) nodeEnter .append('text') .attr('dy', '0.31em') - .attr('x', (d: any) => (d._children ? -8 : 8)) + .attr('x', (d: any) => (d._children ? -15 : 15)) .attr('text-anchor', (d: any) => (d._children ? 'end' : 'start')) .text(({ data: { name, value } }: any) => { if (value) { diff --git a/ui/lib/apps/ClusterInfo/pages/List.tsx b/ui/lib/apps/ClusterInfo/pages/List.tsx index b4ead3a335..4abb964c97 100644 --- a/ui/lib/apps/ClusterInfo/pages/List.tsx +++ b/ui/lib/apps/ClusterInfo/pages/List.tsx @@ -8,8 +8,10 @@ import { Card } from '@lib/components' import CardTabs from '@lib/components/CardTabs' import HostTable from '../components/HostTable' +import DiskTable from '../components/DiskTable' import InstanceTable from '../components/InstanceTable' import StoreLocation from '../components/StoreLocation' +import Statistics from '../components/Statistics' function renderTabBar(props, DefaultTabBar) { return ( @@ -47,12 +49,24 @@ export default function ListPage() { > + + + + + +
diff --git a/ui/lib/apps/ClusterInfo/translations/en.yaml b/ui/lib/apps/ClusterInfo/translations/en.yaml index fbd363c8ad..3465bf87b4 100644 --- a/ui/lib/apps/ClusterInfo/translations/en.yaml +++ b/ui/lib/apps/ClusterInfo/translations/en.yaml @@ -17,17 +17,37 @@ cluster_info: host_table: title: Hosts columns: - ip: Address + host: Host Address cpu: CPU cpu_usage: CPU Usage memory: Memory memory_usage: Memory Usage - deploy: Disk - disk_size: Disk Size + instances: Instances + instanceUnavailable: Host information is unavailable due to instances on the host is down + disk_table: + title: Disks + columns: + host: Host Address + mount_dir: Mount Directory + fs: File System + disk_size: Disk Capacity disk_usage: Disk Usage - instanceUnavailable: Host information is unknow due to instance unreachable + instances: Instances store_topology: title: Store Topology tooltip: You can also zoom in or out by pressing CTRL and scrolling mouse + statistics: + title: Statistics + summary_title: Cluster Summary + field: + version: Version + instances: '# Instances' + hosts: '# Hosts that instances deployed' + memory_capacity: Σ Memory capacity (of all hosts) + physical_cores: Σ CPU physical cores (of all hosts) + logical_cores: Σ CPU logical cores (of all hosts) + message: + instance_down: 'Some instances are down in {{n}} host(s) so that host related information may be inccurate.' + sub_statistics: Sub-statistics below are counted by instance kinds. The sum of host metrics in sub-statistics can be larger "Cluster Summary" when different instances are deployed in the same host. error: load: 'Load component {{comp}} error: {{cause}}' diff --git a/ui/lib/apps/ClusterInfo/translations/zh.yaml b/ui/lib/apps/ClusterInfo/translations/zh.yaml index e3e916b646..8a7deefa1c 100644 --- a/ui/lib/apps/ClusterInfo/translations/zh.yaml +++ b/ui/lib/apps/ClusterInfo/translations/zh.yaml @@ -17,17 +17,37 @@ cluster_info: host_table: title: 主机 columns: - ip: 主机地址 + host: 主机地址 cpu: CPU cpu_usage: CPU 使用率 memory: 物理内存 memory_usage: 内存使用率 - deploy: 部署磁盘 + instances: 实例 + instanceUnavailable: 由于该主机上没有实例存活,因此无法获取主机信息 + disk_table: + title: 磁盘 + columns: + host: 主机地址 + mount_dir: 磁盘挂载点 + fs: 文件系统 disk_size: 磁盘容量 disk_usage: 磁盘使用率 - instanceUnavailable: 获取该主机信息失败:无法访问实例 + instances: 实例 store_topology: title: 存储拓扑 tooltip: 按住 Ctrl 键并滑动鼠标滚轮可以缩放 + statistics: + title: 统计 + summary_title: 集群总计 + field: + version: 版本 + instances: 总实例数量 + hosts: 实例部署的总机器数量 + memory_capacity: 内存总量总和 (按实例部署的机器计算) + physical_cores: CPU 物理核心数总和 (按实例部署的机器计算) + logical_cores: CPU 逻辑核心数总和 (按实例部署的机器计算) + message: + instance_down: '由于有 {{n}} 台机器上的所有实例都未启动或无法访问,因此统计中关于机器的指标可能会不准确。' + sub_statistics: 子统计按不同实例类型分别计算。当一个机器上部署了不同类型实例时,以下子统计的机器指标累加起来会超过“集群总计”数量。 error: load: '加载组件 {{comp}} 失败: {{cause}}' From 85698f7db0e095dc974fc81d9ef586626f7ff1ba Mon Sep 17 00:00:00 2001 From: Breezewish Date: Thu, 26 Nov 2020 15:59:22 +0800 Subject: [PATCH 2/3] Fix lints Signed-off-by: Breezewish --- pkg/apiserver/clusterinfo/host.go | 6 +++--- pkg/apiserver/clusterinfo/hostinfo/cluster_config.go | 2 +- .../clusterinfo/hostinfo/cluster_hardware.go | 12 ++++++------ pkg/apiserver/clusterinfo/hostinfo/cluster_load.go | 10 +++++----- pkg/apiserver/clusterinfo/hostinfo/hostinfo.go | 10 +++++----- pkg/apiserver/clusterinfo/service.go | 4 ++-- pkg/apiserver/clusterinfo/statistics.go | 2 +- 7 files changed, 23 insertions(+), 23 deletions(-) diff --git a/pkg/apiserver/clusterinfo/host.go b/pkg/apiserver/clusterinfo/host.go index 3532c3e075..414982dd34 100644 --- a/pkg/apiserver/clusterinfo/host.go +++ b/pkg/apiserver/clusterinfo/host.go @@ -63,13 +63,13 @@ func (s *Service) fetchAllInstanceHosts() ([]string, error) { // fetchAllHostsInfo fetches all hosts and their information. // Note: The returned data and error may both exist. -func (s *Service) fetchAllHostsInfo(db *gorm.DB) ([]*hostinfo.HostInfo, error) { +func (s *Service) fetchAllHostsInfo(db *gorm.DB) ([]*hostinfo.Info, error) { allHosts, err := s.fetchAllInstanceHosts() if err != nil { return nil, err } - allHostsInfoMap := make(map[string]*hostinfo.HostInfo) + allHostsInfoMap := make(map[string]*hostinfo.Info) if e := hostinfo.FillFromClusterLoadTable(db, allHostsInfoMap); e != nil { log.Warn("Failed to read cluster_load table", zap.Error(e)) err = e @@ -83,7 +83,7 @@ func (s *Service) fetchAllHostsInfo(db *gorm.DB) ([]*hostinfo.HostInfo, error) { err = e } - r := make([]*hostinfo.HostInfo, 0, len(allHosts)) + r := make([]*hostinfo.Info, 0, len(allHosts)) for _, host := range allHosts { if im, ok := allHostsInfoMap[host]; ok { r = append(r, im) diff --git a/pkg/apiserver/clusterinfo/hostinfo/cluster_config.go b/pkg/apiserver/clusterinfo/hostinfo/cluster_config.go index 900961cadd..fb2655de29 100644 --- a/pkg/apiserver/clusterinfo/hostinfo/cluster_config.go +++ b/pkg/apiserver/clusterinfo/hostinfo/cluster_config.go @@ -28,7 +28,7 @@ type clusterConfigModel struct { Value string `gorm:"column:VALUE"` } -func FillInstances(db *gorm.DB, m HostInfoMap) error { +func FillInstances(db *gorm.DB, m InfoMap) error { var rows []clusterConfigModel if err := db. Table("INFORMATION_SCHEMA.CLUSTER_CONFIG"). diff --git a/pkg/apiserver/clusterinfo/hostinfo/cluster_hardware.go b/pkg/apiserver/clusterinfo/hostinfo/cluster_hardware.go index f240f87491..3657a02b28 100644 --- a/pkg/apiserver/clusterinfo/hostinfo/cluster_hardware.go +++ b/pkg/apiserver/clusterinfo/hostinfo/cluster_hardware.go @@ -24,7 +24,7 @@ import ( ) // Used to deserialize from JSON_VALUE -type clusterHardwareCpuInfoModel struct { +type clusterHardwareCPUInfoModel struct { LogicalCores int `json:"cpu-logical-cores,string"` PhysicalCores int `json:"cpu-physical-cores,string"` } @@ -37,7 +37,7 @@ type clusterHardwareDiskModel struct { Total int `json:"total,string"` } -func FillFromClusterHardwareTable(db *gorm.DB, m HostInfoMap) error { +func FillFromClusterHardwareTable(db *gorm.DB, m InfoMap) error { var rows []clusterTableModel var sqlQuery bytes.Buffer @@ -69,8 +69,8 @@ func FillFromClusterHardwareTable(db *gorm.DB, m HostInfoMap) error { if m[hostname].CPUInfo != nil { continue } - var v clusterHardwareCpuInfoModel - err := json.Unmarshal([]byte(row.JsonValue), &v) + var v clusterHardwareCPUInfoModel + err := json.Unmarshal([]byte(row.JSONValue), &v) if err != nil { continue } @@ -88,7 +88,7 @@ func FillFromClusterHardwareTable(db *gorm.DB, m HostInfoMap) error { continue } var v clusterHardwareDiskModel - err := json.Unmarshal([]byte(row.JsonValue), &v) + err := json.Unmarshal([]byte(row.JSONValue), &v) if err != nil { continue } @@ -115,7 +115,7 @@ func FillFromClusterHardwareTable(db *gorm.DB, m HostInfoMap) error { tiFlashDiskInfo := make(map[string]tiFlashDiskEntity) // key is TiFlash instance address for _, d := range tiFlashDisks { var v clusterHardwareDiskModel - err := json.Unmarshal([]byte(d.JsonValue), &v) + err := json.Unmarshal([]byte(d.JSONValue), &v) if err != nil { continue } diff --git a/pkg/apiserver/clusterinfo/hostinfo/cluster_load.go b/pkg/apiserver/clusterinfo/hostinfo/cluster_load.go index 39d1d5fe86..e30582f91a 100644 --- a/pkg/apiserver/clusterinfo/hostinfo/cluster_load.go +++ b/pkg/apiserver/clusterinfo/hostinfo/cluster_load.go @@ -23,7 +23,7 @@ import ( ) // Used to deserialize from JSON_VALUE -type clusterLoadCpuUsageModel struct { +type clusterLoadCPUUsageModel struct { Idle float64 `json:"idle,string"` System float64 `json:"system,string"` } @@ -34,7 +34,7 @@ type clusterLoadMemoryVirtualModel struct { Total int `json:"total,string"` } -func FillFromClusterLoadTable(db *gorm.DB, m HostInfoMap) error { +func FillFromClusterLoadTable(db *gorm.DB, m InfoMap) error { var rows []clusterTableModel var sqlQuery bytes.Buffer @@ -65,7 +65,7 @@ func FillFromClusterLoadTable(db *gorm.DB, m HostInfoMap) error { continue } var v clusterLoadMemoryVirtualModel - err := json.Unmarshal([]byte(row.JsonValue), &v) + err := json.Unmarshal([]byte(row.JSONValue), &v) if err != nil { continue } @@ -77,8 +77,8 @@ func FillFromClusterLoadTable(db *gorm.DB, m HostInfoMap) error { if m[hostname].CPUUsage != nil { continue } - var v clusterLoadCpuUsageModel - err := json.Unmarshal([]byte(row.JsonValue), &v) + var v clusterLoadCPUUsageModel + err := json.Unmarshal([]byte(row.JSONValue), &v) if err != nil { continue } diff --git a/pkg/apiserver/clusterinfo/hostinfo/hostinfo.go b/pkg/apiserver/clusterinfo/hostinfo/hostinfo.go index aa5cc5821a..97a2d81dd7 100644 --- a/pkg/apiserver/clusterinfo/hostinfo/hostinfo.go +++ b/pkg/apiserver/clusterinfo/hostinfo/hostinfo.go @@ -43,7 +43,7 @@ type InstanceInfo struct { PartitionPathL string `json:"partition_path_lower"` } -type HostInfo struct { +type Info struct { Host string `json:"host"` CPUInfo *CPUInfo `json:"cpu_info"` CPUUsage *CPUUsageInfo `json:"cpu_usage"` @@ -59,7 +59,7 @@ type HostInfo struct { Instances map[string]*InstanceInfo `json:"instances"` } -type HostInfoMap = map[string]*HostInfo +type InfoMap = map[string]*Info var clusterTableQueryTemplate = template.Must(template.New("").Parse(` SELECT @@ -83,11 +83,11 @@ type clusterTableModel struct { Instance string `gorm:"column:INSTANCE"` // Example: 127.0.0.1:4000 DeviceType string `gorm:"column:DEVICE_TYPE"` // Example: cpu DeviceName string `gorm:"column:DEVICE_NAME"` // Example: usage - JsonValue string `gorm:"column:JSON_VALUE"` // Only exists by using `clusterTableQueryTemplate`. + JSONValue string `gorm:"column:JSON_VALUE"` // Only exists by using `clusterTableQueryTemplate`. } -func NewHostInfo(hostname string) *HostInfo { - return &HostInfo{ +func NewHostInfo(hostname string) *Info { + return &Info{ Host: hostname, Partitions: make(map[string]*PartitionInfo), Instances: make(map[string]*InstanceInfo), diff --git a/pkg/apiserver/clusterinfo/service.go b/pkg/apiserver/clusterinfo/service.go index f74fe5d55a..475d2049ee 100644 --- a/pkg/apiserver/clusterinfo/service.go +++ b/pkg/apiserver/clusterinfo/service.go @@ -235,8 +235,8 @@ func (s *Service) getAlertManagerCounts(c *gin.Context) { } type GetHostsInfoResponse struct { - Hosts []*hostinfo.HostInfo `json:"hosts"` - Warning *utils.APIError `json:"warning"` + Hosts []*hostinfo.Info `json:"hosts"` + Warning *utils.APIError `json:"warning"` } // @ID clusterInfoGetHostsInfo diff --git a/pkg/apiserver/clusterinfo/statistics.go b/pkg/apiserver/clusterinfo/statistics.go index ceb6e2293c..a57fd40fb4 100644 --- a/pkg/apiserver/clusterinfo/statistics.go +++ b/pkg/apiserver/clusterinfo/statistics.go @@ -125,7 +125,7 @@ func (s *Service) calculateStatistics(db *gorm.DB) (*ClusterStatistics, error) { } // Fill from hardware info - allHostsInfoMap := make(map[string]*hostinfo.HostInfo) + allHostsInfoMap := make(map[string]*hostinfo.Info) if e := hostinfo.FillFromClusterLoadTable(db, allHostsInfoMap); e != nil { return nil, err } From e7169454352c22d2ce83746e4c70fba932210571 Mon Sep 17 00:00:00 2001 From: Breezewish Date: Thu, 26 Nov 2020 16:01:18 +0800 Subject: [PATCH 3/3] Fix frontend Signed-off-by: Breezewish --- ui/lib/apps/ClusterInfo/components/DiskTable.tsx | 4 ++-- ui/lib/apps/ClusterInfo/components/HostTable.tsx | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/ui/lib/apps/ClusterInfo/components/DiskTable.tsx b/ui/lib/apps/ClusterInfo/components/DiskTable.tsx index 9c66db1f96..e7a9be879b 100644 --- a/ui/lib/apps/ClusterInfo/components/DiskTable.tsx +++ b/ui/lib/apps/ClusterInfo/components/DiskTable.tsx @@ -2,7 +2,7 @@ import { Tooltip, Typography } from 'antd' import React, { useMemo } from 'react' import { useTranslation } from 'react-i18next' import { getValueFormat } from '@baurine/grafana-value-formats' -import client, { HostinfoHostInfo, HostinfoPartitionInfo } from '@lib/client' +import client, { HostinfoInfo, HostinfoPartitionInfo } from '@lib/client' import { Bar, CardTable } from '@lib/components' import { useClientRequest } from '@lib/utils/useClientRequest' import { IColumn } from 'office-ui-fabric-react/lib/DetailsList' @@ -19,7 +19,7 @@ interface IExpandedDiskItem extends HostinfoPartitionInfo { instancesCount: Record } -function expandDisksItems(rows: HostinfoHostInfo[]): IExpandedDiskItem[] { +function expandDisksItems(rows: HostinfoInfo[]): IExpandedDiskItem[] { const expanded: IExpandedDiskItem[] = [] rows.forEach((row) => { const instancesPerPartition: Record< diff --git a/ui/lib/apps/ClusterInfo/components/HostTable.tsx b/ui/lib/apps/ClusterInfo/components/HostTable.tsx index af4fd8f9d3..49cbfc9d5f 100644 --- a/ui/lib/apps/ClusterInfo/components/HostTable.tsx +++ b/ui/lib/apps/ClusterInfo/components/HostTable.tsx @@ -3,7 +3,7 @@ import React, { useMemo } from 'react' import { useTranslation } from 'react-i18next' import { red } from '@ant-design/colors' import { getValueFormat } from '@baurine/grafana-value-formats' -import client, { HostinfoHostInfo } from '@lib/client' +import client, { HostinfoInfo } from '@lib/client' import { Bar, CardTable, Pre } from '@lib/components' import { useClientRequest } from '@lib/utils/useClientRequest' import { IColumn } from 'office-ui-fabric-react/lib/DetailsList' @@ -14,12 +14,12 @@ import { } from '@lib/utils/instanceTable' import { WarningOutlined } from '@ant-design/icons' -interface IExpandedHostItem extends HostinfoHostInfo { +interface IExpandedHostItem extends HostinfoInfo { key: string instancesCount: Record } -function expandHostItems(rows: HostinfoHostInfo[]): IExpandedHostItem[] { +function expandHostItems(rows: HostinfoInfo[]): IExpandedHostItem[] { const expanded: IExpandedHostItem[] = [] rows.forEach((row) => { const instancesCount: Record = {