From c0bfea768b5e4e2a913d65ac9e5b1eafd55546d9 Mon Sep 17 00:00:00 2001 From: Lonng Date: Sun, 16 Feb 2020 10:01:14 +0800 Subject: [PATCH 1/4] executor: add a inspection_summary table to summary metrics by module/link Signed-off-by: Lonng --- executor/builder.go | 10 +- executor/executor_pkg_test.go | 3 + .../{diagnostics.go => inspection_result.go} | 4 +- ...tics_test.go => inspection_result_test.go} | 12 +- executor/inspection_summary.go | 508 ++++++++++++++++++ executor/inspection_summary_test.go | 106 ++++ infoschema/metric_schema_test.go | 5 + infoschema/metric_table_def.go | 81 +-- infoschema/tables.go | 15 + planner/core/logical_plan_builder.go | 2 + planner/core/memtable_predicate_extractor.go | 64 ++- .../core/memtable_predicate_extractor_test.go | 104 +++- util/set/float64_set.go | 8 +- 13 files changed, 851 insertions(+), 71 deletions(-) rename executor/{diagnostics.go => inspection_result.go} (98%) rename executor/{diagnostics_test.go => inspection_result_test.go} (98%) create mode 100644 executor/inspection_summary.go create mode 100644 executor/inspection_summary_test.go diff --git a/executor/builder.go b/executor/builder.go index a12affc62f1e7..032710947785d 100644 --- a/executor/builder.go +++ b/executor/builder.go @@ -1317,10 +1317,18 @@ func (b *executorBuilder) buildMemTable(v *plannercore.PhysicalMemTable) Executo case strings.ToLower(infoschema.TableInspectionResult): return &MemTableReaderExec{ baseExecutor: newBaseExecutor(b.ctx, v.Schema(), v.ExplainID()), - retriever: &inspectionRetriever{ + retriever: &inspectionResultRetriever{ extractor: v.Extractor.(*plannercore.InspectionResultTableExtractor), }, } + case strings.ToLower(infoschema.TableInspectionSummary): + return &MemTableReaderExec{ + baseExecutor: newBaseExecutor(b.ctx, v.Schema(), v.ExplainID()), + retriever: &inspectionSummaryRetriever{ + table: v.Table, + extractor: v.Extractor.(*plannercore.InspectionSummaryTableExtractor), + }, + } case strings.ToLower(infoschema.TableMetricSummary): return &MemTableReaderExec{ baseExecutor: newBaseExecutor(b.ctx, v.Schema(), v.ExplainID()), diff --git a/executor/executor_pkg_test.go b/executor/executor_pkg_test.go index 8417d9e7818e9..6794bba00467f 100644 --- a/executor/executor_pkg_test.go +++ b/executor/executor_pkg_test.go @@ -36,6 +36,9 @@ import ( var _ = Suite(&testExecSuite{}) var _ = SerialSuites(&testExecSerialSuite{}) +// Note: it's a tricky way to export the `inspectionSummaryRules` for unit test but invisible for normal code +var InspectionSummaryRules = inspectionSummaryRules + type testExecSuite struct { } diff --git a/executor/diagnostics.go b/executor/inspection_result.go similarity index 98% rename from executor/diagnostics.go rename to executor/inspection_result.go index 11adbeb60fb06..64e6a54790525 100644 --- a/executor/diagnostics.go +++ b/executor/inspection_result.go @@ -88,13 +88,13 @@ var inspectionRules = []inspectionRule{ &criticalErrorInspection{inspectionName: "critical-error"}, } -type inspectionRetriever struct { +type inspectionResultRetriever struct { dummyCloser retrieved bool extractor *plannercore.InspectionResultTableExtractor } -func (e *inspectionRetriever) retrieve(ctx context.Context, sctx sessionctx.Context) ([][]types.Datum, error) { +func (e *inspectionResultRetriever) retrieve(ctx context.Context, sctx sessionctx.Context) ([][]types.Datum, error) { if e.retrieved || e.extractor.SkipInspection { return nil, nil } diff --git a/executor/diagnostics_test.go b/executor/inspection_result_test.go similarity index 98% rename from executor/diagnostics_test.go rename to executor/inspection_result_test.go index e0e75808df7ef..167368081871b 100644 --- a/executor/diagnostics_test.go +++ b/executor/inspection_result_test.go @@ -27,26 +27,26 @@ import ( "github.com/pingcap/tidb/util/testkit" ) -var _ = Suite(&diagnosticsSuite{}) +var _ = Suite(&inspectionResultSuite{}) -type diagnosticsSuite struct { +type inspectionResultSuite struct { store kv.Storage dom *domain.Domain } -func (s *diagnosticsSuite) SetUpSuite(c *C) { +func (s *inspectionResultSuite) SetUpSuite(c *C) { store, dom, err := newStoreWithBootstrap() c.Assert(err, IsNil) s.store = store s.dom = dom } -func (s *diagnosticsSuite) TearDownSuite(c *C) { +func (s *inspectionResultSuite) TearDownSuite(c *C) { s.dom.Close() s.store.Close() } -func (s *diagnosticsSuite) TestInspectionResult(c *C) { +func (s *inspectionResultSuite) TestInspectionResult(c *C) { tk := testkit.NewTestKitWithInit(c, s.store) mockData := map[string]variable.TableSnapshot{} @@ -166,7 +166,7 @@ func (s *diagnosticsSuite) TestInspectionResult(c *C) { } } -func (s *diagnosticsSuite) TestCriticalErrorInspection(c *C) { +func (s *inspectionResultSuite) TestCriticalErrorInspection(c *C) { tk := testkit.NewTestKitWithInit(c, s.store) fpName := "github.com/pingcap/tidb/executor/mockMetricsTableData" diff --git a/executor/inspection_summary.go b/executor/inspection_summary.go new file mode 100644 index 0000000000000..efbcecb1ab48d --- /dev/null +++ b/executor/inspection_summary.go @@ -0,0 +1,508 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package executor + +import ( + "context" + "fmt" + "strings" + + "github.com/pingcap/errors" + "github.com/pingcap/parser/model" + "github.com/pingcap/tidb/infoschema" + "github.com/pingcap/tidb/meta" + plannercore "github.com/pingcap/tidb/planner/core" + "github.com/pingcap/tidb/sessionctx" + "github.com/pingcap/tidb/types" + "github.com/pingcap/tidb/util" + "github.com/pingcap/tidb/util/sqlexec" +) + +type inspectionSummaryRetriever struct { + dummyCloser + retrieved bool + table *model.TableInfo + extractor *plannercore.InspectionSummaryTableExtractor +} + +// inspectionSummaryRules is used to maintain +var inspectionSummaryRules = map[string][]string{ + "query-summary": { + "tidb_connection_count", + "tidb_query_duration", + "tidb_qps_ideal", + "tidb_qps", + "tidb_ops_internal", + "tidb_ops_statement", + "tidb_failed_query_opm", + "tidb_slow_query_time", + "tidb_slow_query_cop_wait_time", + "tidb_slow_query_cop_process_time", + }, + "wait-events": { + "tidb_get_token_duration", + "tidb_load_schema_duration", + "tidb_query_duration", + "tidb_parse_duration", + "tidb_compile_duration", + "tidb_execute_duration", + "tidb_auto_id_request_duration", + "pd_tso_wait_duration", + "pd_tso_rpc_duration", + "tidb_distsql_execution_duration", + "pd_start_tso_wait_duration", + "tidb_transaction_local_latch_wait_duration", + "tidb_transaction_duration", + "pd_handle_request_duration", + "tidb_cop_duration", + "tidb_batch_client_wait_duration", + "tidb_batch_client_unavailable_duration", + "tidb_kv_backoff_duration", + "tidb_kv_request_duration", + "pd_client_cmd_duration", + "tikv_grpc_messge_duration", + "tikv_average_grpc_messge_duration", + "tikv_channel_full_total", + "tikv_scheduler_is_busy", + "tikv_coprocessor_is_busy", + "tikv_engine_write_stall", + "tikv_apply_log_avg_duration", + "tikv_apply_log_duration", + "tikv_apply_log_duration_per_server", + "tikv_append_log_avg_duration", + "tikv_append_log_duration", + "tikv_append_log_duration_per_server", + "tikv_commit_log_avg_duration", + "tikv_commit_log_duration", + "tikv_commit_log_duration_per_server", + "tikv_process_duration_per_server", + "tikv_propose_wait_duration", + "tikv_propose_avg_wait_duration", + "tikv_apply_wait_duration", + "tikv_apply_avg_wait_duration", + "tikv_check_split_duration", + "tikv_storage_async_request_duration", + "tikv_storage_async_request_avg_duration", + "tikv_scheduler_command_duration", + "tikv_scheduler_command_avg_duration", + "tikv_scheduler_latch_wait_duration", + "tikv_scheduler_latch_wait_avg_duration", + "tikv_send_snapshot_duration", + "tikv_handle_snapshot_duration", + "tikv_cop_request_durations", + "tikv_cop_request_duration", + "tikv_cop_handle_duration", + "tikv_cop_wait_duration", + "tikv_engine_max_get_duration", + "tikv_engine_avg_get_duration", + "tikv_engine_avg_seek_duration", + "tikv_engine_write_duration", + "tikv_wal_sync_max_duration", + "tikv_wal_sync_duration", + "tikv_compaction_max_duration", + "tikv_compaction_duration", + "tikv_sst_read_max_duration", + "tikv_sst_read_duration", + "tikv_write_stall_max_duration", + "tikv_write_stall_avg_duration", + "tikv_oldest_snapshots_duration", + "tikv_ingest_sst_duration", + "tikv_ingest_sst_avg_duration", + "tikv_engine_blob_seek_duration", + "tikv_engine_blob_get_duration", + "tikv_engine_blob_file_read_duration", + "tikv_engine_blob_file_write_duration", + "tikv_engine_blob_file_sync_duration", + "tikv_lock_manager_waiter_lifetime_avg_duration", + "tikv_lock_manager_deadlock_detect_duration", + "tikv_lock_manager_deadlock_detect_avg_duration", + }, + "read-link": { + "tidb_get_token_duration", + "tidb_parse_duration", + "tidb_compile_duration", + "pd_tso_rpc_duration", + "pd_tso_wait_duration", + "tidb_execute_duration", + "tidb_expensive_executors_ops", + "tidb_query_using_plan_cache_ops", + "tidb_distsql_execution_duration", + "tidb_distsql_partial_num", + "tidb_distsql_partial_qps", + "tidb_distsql_partial_scan_key_num", + "tidb_distsql_qps", + "tidb_distsql_scan_key_num", + "tidb_region_cache_ops", + "tidb_batch_client_pending_req_count", + "tidb_batch_client_unavailable_duration", + "tidb_batch_client_wait_duration", + "tidb_kv_backoff_duration", + "tidb_kv_backoff_ops", + "tidb_kv_region_error_ops", + "tidb_kv_request_duration", + "tidb_kv_request_ops", + "tidb_kv_snapshot_ops", + "tidb_kv_txn_ops", + "tikv_average_grpc_messge_duration", + "tikv_grpc_avg_req_batch_size", + "tikv_grpc_avg_resp_batch_size", + "tikv_grpc_errors", + "tikv_grpc_messge_duration", + "tikv_grpc_qps", + "tikv_grpc_req_batch_size", + "tikv_grpc_resp_batch_size", + "tidb_cop_duration", + "tikv_cop_wait_duration", + "tikv_coprocessor_is_busy", + "tikv_coprocessor_request_error", + "tikv_cop_handle_duration", + "tikv_cop_kv_cursor_operations", + "tikv_cop_request_duration", + "tikv_cop_request_durations", + "tikv_cop_scan_details", + "tikv_cop_total_dag_executors", + "tikv_cop_total_dag_requests", + "tikv_cop_total_kv_cursor_operations", + "tikv_cop_total_request_errors", + "tikv_cop_total_requests", + "tikv_cop_total_response_size", + "tikv_cop_total_rocksdb_perf_statistics", + "tikv_channel_full_total", + "tikv_engine_avg_get_duration", + "tikv_engine_avg_seek_duration", + "tikv_handle_snapshot_duration", + "tikv_block_all_cache_hit", + "tikv_block_bloom_prefix_cache_hit", + "tikv_block_cache_size", + "tikv_block_data_cache_hit", + "tikv_block_filter_cache_hit", + "tikv_block_index_cache_hit", + "tikv_engine_get_block_cache_operations", + "tikv_engine_get_cpu_cache_operations", + "tikv_engine_get_memtable_operations", + "tikv_per_read_avg_bytes", + "tikv_per_read_max_bytes", + }, + "write-link": { + "tidb_get_token_duration", + "tidb_parse_duration", + "tidb_compile_duration", + "pd_tso_rpc_duration", + "pd_tso_wait_duration", + "tidb_execute_duration", + "tidb_transaction_duration", + "tidb_transaction_local_latch_wait_duration", + "tidb_transaction_ops", + "tidb_transaction_retry_error_ops", + "tidb_transaction_retry_num", + "tidb_transaction_statement_num", + "tidb_auto_id_qps", + "tidb_auto_id_request_duration", + "tidb_region_cache_ops", + "tidb_kv_backoff_duration", + "tidb_kv_backoff_ops", + "tidb_kv_region_error_ops", + "tidb_kv_request_duration", + "tidb_kv_request_ops", + "tidb_kv_snapshot_ops", + "tidb_kv_txn_ops", + "tidb_kv_write_num", + "tidb_kv_write_size", + "tikv_average_grpc_messge_duration", + "tikv_grpc_avg_req_batch_size", + "tikv_grpc_avg_resp_batch_size", + "tikv_grpc_errors", + "tikv_grpc_messge_duration", + "tikv_grpc_qps", + "tikv_grpc_req_batch_size", + "tikv_grpc_resp_batch_size", + "tikv_scheduler_command_avg_duration", + "tikv_scheduler_command_duration", + "tikv_scheduler_is_busy", + "tikv_scheduler_keys_read_avg", + "tikv_scheduler_keys_read", + "tikv_scheduler_keys_written_avg", + "tikv_scheduler_keys_written", + "tikv_scheduler_latch_wait_avg_duration", + "tikv_scheduler_latch_wait_duration", + "tikv_scheduler_pending_commands", + "tikv_scheduler_priority_commands", + "tikv_scheduler_scan_details", + "tikv_scheduler_stage_total", + "tikv_scheduler_writing_bytes", + "tikv_propose_avg_wait_duration", + "tikv_propose_wait_duration", + "tikv_append_log_avg_duration", + "tikv_append_log_duration_per_server", + "tikv_append_log_duration", + "tikv_commit_log_avg_duration", + "tikv_commit_log_duration_per_server", + "tikv_commit_log_duration", + "tikv_apply_avg_wait_duration", + "tikv_apply_log_avg_duration", + "tikv_apply_log_duration_per_server", + "tikv_apply_log_duration", + "tikv_apply_wait_duration", + "tikv_engine_wal_sync_operations", + "tikv_engine_write_duration", + "tikv_engine_write_operations", + "tikv_engine_write_stall", + "tikv_write_stall_avg_duration", + "tikv_write_stall_max_duration", + "tikv_write_stall_reason", + }, + "ddl": { + "tidb_ddl_add_index_speed", + "tidb_ddl_batch_add_index_duration", + "tidb_ddl_deploy_syncer_duration", + "tidb_ddl_duration", + "tidb_ddl_meta_opm", + "tidb_ddl_opm", + "tidb_ddl_update_self_version_duration", + "tidb_ddl_waiting_jobs_num", + "tidb_ddl_worker_duration", + }, + "stats": { + "tidb_statistics_auto_analyze_duration", + "tidb_statistics_auto_analyze_ops", + "tidb_statistics_dump_feedback_ops", + "tidb_statistics_fast_analyze_status", + "tidb_statistics_pseudo_estimation_ops", + "tidb_statistics_significant_feedback", + "tidb_statistics_stats_inaccuracy_rate", + "tidb_statistics_store_query_feedback_qps", + "tidb_statistics_update_stats_ops", + }, + "gc": { + "tidb_gc_action_result_opm", + "tidb_gc_config", + "tidb_gc_delete_range_fail_opm", + "tidb_gc_delete_range_task_status", + "tidb_gc_duration", + "tidb_gc_fail_opm", + "tidb_gc_interval", + "tidb_gc_lifetime", + "tidb_gc_push_task_duration", + "tidb_gc_too_many_locks_opm", + "tidb_gc_worker_action_opm", + "tikv_engine_blob_gc_duration", + "tikv_auto_gc_progress", + "tikv_auto_gc_safepoint", + "tikv_auto_gc_working", + "tikv_gc_fail_tasks", + "tikv_gc_keys", + "tikv_gc_skipped_tasks", + "tikv_gc_speed", + "tikv_gc_tasks_avg_duration", + "tikv_gc_tasks_duration", + "tikv_gc_too_busy", + "tikv_gc_total_tasks", + }, + "rocksdb": { + "tikv_compaction_duration", + "tikv_compaction_max_duration", + "tikv_compaction_operations", + "tikv_compaction_pending_bytes", + "tikv_compaction_reason", + "tikv_write_stall_avg_duration", + "tikv_write_stall_max_duration", + "tikv_write_stall_reason", + "store_available_ratio", + "store_size_amplification", + "tikv_engine_avg_get_duration", + "tikv_engine_avg_seek_duration", + "tikv_engine_blob_bytes_flow", + "tikv_engine_blob_file_count", + "tikv_engine_blob_file_read_duration", + "tikv_engine_blob_file_size", + "tikv_engine_blob_file_sync_duration", + "tikv_engine_blob_file_sync_operations", + "tikv_engine_blob_file_write_duration", + "tikv_engine_blob_gc_bytes_flow", + "tikv_engine_blob_gc_duration", + "tikv_engine_blob_gc_file", + "tikv_engine_blob_gc_keys_flow", + "tikv_engine_blob_get_duration", + "tikv_engine_blob_key_avg_size", + "tikv_engine_blob_key_max_size", + "tikv_engine_blob_seek_duration", + "tikv_engine_blob_seek_operations", + "tikv_engine_blob_value_avg_size", + "tikv_engine_blob_value_max_size", + "tikv_engine_compaction_flow_bytes", + "tikv_engine_get_block_cache_operations", + "tikv_engine_get_cpu_cache_operations", + "tikv_engine_get_memtable_operations", + "tikv_engine_live_blob_size", + "tikv_engine_max_get_duration", + "tikv_engine_max_seek_duration", + "tikv_engine_seek_operations", + "tikv_engine_size", + "tikv_engine_wal_sync_operations", + "tikv_engine_write_duration", + "tikv_engine_write_operations", + "tikv_engine_write_stall", + }, + "pd": { + "pd_balance_region_movement", + "pd_balance_scheduler_status", + "pd_checker_event_count", + "pd_client_cmd_duration", + "pd_client_cmd_ops", + "pd_cluster_metadata", + "pd_cluster_status", + "pd_grpc_completed_commands_duration", + "pd_grpc_completed_commands_rate", + "pd_handle_request_duration", + "pd_handle_request_ops", + "pd_handle_requests_duration_avg", + "pd_handle_requests_duration", + "pd_handle_transactions_duration", + "pd_handle_transactions_rate", + "pd_hotspot_status", + "pd_label_distribution", + "pd_operator_finish_duration", + "pd_operator_step_finish_duration", + "pd_peer_round_trip_time_seconds", + "pd_region_health", + "pd_region_heartbeat_latency", + "pd_region_label_isolation_level", + "pd_region_syncer_status", + "pd_role", + "pd_schedule_filter", + "pd_schedule_operator", + "pd_schedule_store_limit", + "pd_scheduler_balance_direction", + "pd_scheduler_balance_leader", + "pd_scheduler_config", + "pd_scheduler_op_influence", + "pd_scheduler_region_heartbeat", + "pd_scheduler_status", + "pd_scheduler_store_status", + "pd_scheduler_tolerant_resource", + "pd_server_etcd_state", + "pd_start_tso_wait_duration", + }, + "raftstore": { + "tikv_approximate_avg_region_size", + "tikv_approximate_region_size_histogram", + "tikv_approximate_region_size", + "tikv_append_log_avg_duration", + "tikv_append_log_duration_per_server", + "tikv_append_log_duration", + "tikv_commit_log_avg_duration", + "tikv_commit_log_duration_per_server", + "tikv_commit_log_duration", + "tikv_apply_avg_wait_duration", + "tikv_apply_log_avg_duration", + "tikv_apply_log_duration_per_server", + "tikv_apply_log_duration", + "tikv_apply_wait_duration", + "tikv_process_duration_per_server", + "tikv_process_handled", + "tikv_propose_avg_wait_duration", + "tikv_propose_wait_duration", + "tikv_raft_dropped_messages", + "tikv_raft_log_speed", + "tikv_raft_message_avg_batch_size", + "tikv_raft_message_batch_size", + "tikv_raft_proposals_per_ready", + "tikv_raft_proposals", + "tikv_raft_sent_messages_per_server", + }, +} + +func (e *inspectionSummaryRetriever) retrieve(ctx context.Context, sctx sessionctx.Context) ([][]types.Datum, error) { + if e.retrieved || e.extractor.SkipInspection { + return nil, nil + } + e.retrieved = true + + rules := inspectionFilter{e.extractor.Rules} + names := inspectionFilter{e.extractor.MetricNames} + var finalRows [][]types.Datum + // TODO: support specify time range via SQL hint + for rule, tables := range inspectionSummaryRules { + if !rules.Exist(rule) { + continue + } + for _, name := range tables { + if !names.enable(name) { + continue + } + def, ok := infoschema.MetricTableMap[name] + if !ok { + return nil, meta.ErrTableNotExists + } + cols := def.Labels + cond := "" + if def.Quantile > 0 { + cols = append(cols, "quantile") + if len(e.extractor.Quantiles) > 0 { + qs := make([]string, len(e.extractor.Quantiles)) + for i, q := range e.extractor.Quantiles { + qs[i] = fmt.Sprintf("%f", q) + } + cond = "where quantile in (" + strings.Join(qs, ",") + ")" + } else { + cond = "where quantile=0.99" + } + } + var sql string + if len(cols) > 0 { + sql = fmt.Sprintf("select avg(value),min(value),max(value), `%s` from `%s`.`%s` %s group by `%[1]s` order by `%[1]s`", + strings.Join(cols, "`,`"), util.MetricSchemaName.L, name, cond) + } else { + sql = fmt.Sprintf("select avg(value),min(value),max(value) from `%s`.`%s` %s", + util.MetricSchemaName.L, name, cond) + } + rows, _, err := sctx.(sqlexec.RestrictedSQLExecutor).ExecRestrictedSQLWithContext(ctx, sql) + if err != nil { + return nil, errors.Errorf("execute '%s' failed: %v", sql, err) + } + nonInstanceLabelIndex := 0 + if len(def.Labels) > 0 && def.Labels[0] == "instance" { + nonInstanceLabelIndex = 1 + } + // skip min/max/avg + const skipCols = 3 + for _, row := range rows { + instance := "" + if nonInstanceLabelIndex > 0 { + instance = row.GetString(skipCols) // skip min/max/avg + } + var labels []string + for i := range def.Labels[nonInstanceLabelIndex:] { + labels = append(labels, row.GetString(skipCols+nonInstanceLabelIndex+i)) // skip min/max/avg/instance + } + quantile := 0.0 + if def.Quantile > 0 { + quantile = row.GetFloat64(row.Len() - 1) // quantile will be the last column + } + finalRows = append(finalRows, types.MakeDatums( + rule, + instance, + name, + strings.Join(labels, ", "), + quantile, + row.GetFloat64(0), // avg + row.GetFloat64(1), // min + row.GetFloat64(2), // max + + )) + } + } + } + return finalRows, nil +} diff --git a/executor/inspection_summary_test.go b/executor/inspection_summary_test.go new file mode 100644 index 0000000000000..d93345630aae9 --- /dev/null +++ b/executor/inspection_summary_test.go @@ -0,0 +1,106 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package executor_test + +import ( + "context" + + . "github.com/pingcap/check" + "github.com/pingcap/failpoint" + "github.com/pingcap/parser/mysql" + "github.com/pingcap/tidb/domain" + "github.com/pingcap/tidb/executor" + "github.com/pingcap/tidb/kv" + "github.com/pingcap/tidb/types" + "github.com/pingcap/tidb/util/set" + "github.com/pingcap/tidb/util/testkit" +) + +var _ = Suite(&inspectionSummarySuite{}) + +type inspectionSummarySuite struct { + store kv.Storage + dom *domain.Domain +} + +func (s *inspectionSummarySuite) SetUpSuite(c *C) { + store, dom, err := newStoreWithBootstrap() + c.Assert(err, IsNil) + s.store = store + s.dom = dom +} + +func (s *inspectionSummarySuite) TearDownSuite(c *C) { + s.dom.Close() + s.store.Close() +} + +func (s *inspectionSummarySuite) TestValidInspectionSummaryRules(c *C) { + for rule, tbls := range executor.InspectionSummaryRules { + tables := set.StringSet{} + for _, t := range tbls { + c.Assert(tables.Exist(t), IsFalse, Commentf("duplicate table name: %v in rule: %v", t, rule)) + } + } +} + +func (s *inspectionSummarySuite) TestInspectionSummary(c *C) { + tk := testkit.NewTestKitWithInit(c, s.store) + + fpName := "github.com/pingcap/tidb/executor/mockMetricsTableData" + c.Assert(failpoint.Enable(fpName, "return"), IsNil) + defer func() { c.Assert(failpoint.Disable(fpName), IsNil) }() + + datetime := func(s string) types.Time { + t, err := types.ParseTime(tk.Se.GetSessionVars().StmtCtx, s, mysql.TypeDatetime, types.MaxFsp) + c.Assert(err, IsNil) + return t + } + + // construct some mock data + mockData := map[string][][]types.Datum{ + // columns: time, instance, type, result, value + "tidb_qps": { + types.MakeDatums(datetime("2020-02-12 10:35:00"), "tidb-0", "Query", "OK", 0.0), + types.MakeDatums(datetime("2020-02-12 10:36:00"), "tidb-0", "Query", "Error", 1.0), + types.MakeDatums(datetime("2020-02-12 10:37:00"), "tidb-1", "Quit", "Error", 5.0), + types.MakeDatums(datetime("2020-02-12 10:37:00"), "tidb-1", "Quit", "Error", 9.0), + }, + // columns: time, instance, sql_type, quantile, value + "tidb_query_duration": { + types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-0", "Select", 0.99, 0.0), + types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-1", "Update", 0.99, 1.0), + types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-1", "Update", 0.99, 3.0), + types.MakeDatums(datetime("2020-02-12 10:37:00"), "tikv-2", "Delete", 0.99, 5.0), + }, + } + + ctx := context.WithValue(context.Background(), "__mockMetricsTableData", mockData) + ctx = failpoint.WithHook(ctx, func(_ context.Context, fpname string) bool { + return fpName == fpname + }) + + rs, err := tk.Se.Execute(ctx, "select * from information_schema.inspection_summary where rule='query-summary' and metric_name in ('tidb_qps', 'tidb_query_duration')") + c.Assert(err, IsNil) + result := tk.ResultSetToResultWithCtx(ctx, rs[0], Commentf("execute inspect SQL failed")) + c.Assert(tk.Se.GetSessionVars().StmtCtx.WarningCount(), Equals, uint16(0), Commentf("unexpected warnings: %+v", tk.Se.GetSessionVars().StmtCtx.GetWarnings())) + result.Check(testkit.Rows( + "query-summary tikv-0 tidb_query_duration Select 0.99 0 0 0", + "query-summary tikv-1 tidb_query_duration Update 0.99 2 1 3", + "query-summary tikv-2 tidb_query_duration Delete 0.99 5 5 5", + "query-summary tidb-0 tidb_qps Query, Error 0 1 1 1", + "query-summary tidb-0 tidb_qps Query, OK 0 0 0 0", + "query-summary tidb-1 tidb_qps Quit, Error 0 7 5 9", + )) +} diff --git a/infoschema/metric_schema_test.go b/infoschema/metric_schema_test.go index 931e12adb2735..4f6c2fea03885 100644 --- a/infoschema/metric_schema_test.go +++ b/infoschema/metric_schema_test.go @@ -18,6 +18,7 @@ import ( . "github.com/pingcap/check" "github.com/pingcap/tidb/infoschema" + "github.com/pingcap/tidb/util/set" ) type metricSchemaSuite struct{} @@ -56,5 +57,9 @@ func (s *inspectionSuite) TestMetricSchemaDef(c *C) { if name != strings.ToLower(name) { c.Assert(name, Equals, strings.ToLower(name), Commentf("metric table name %v should be lower case", name)) } + // INSTANCE must be the first label + if set.NewStringSet(def.Labels...).Exist("instance") { + c.Assert(def.Labels[0], Equals, "instance", Commentf("metrics table %v: expect `instance`is the first label but got %v", name, def.Labels)) + } } } diff --git a/infoschema/metric_table_def.go b/infoschema/metric_table_def.go index 0cc7786de048a..68c908e6dac5c 100644 --- a/infoschema/metric_table_def.go +++ b/infoschema/metric_table_def.go @@ -56,7 +56,8 @@ var MetricTableMap = map[string]MetricTableDef{ Comment: "The quantile of TiDB slow query statistics with slow query total cop wait time(second)", }, "tidb_ops_internal": { - PromQL: "sum(rate(tidb_session_restricted_sql_total[$RANGE_DURATION]))", + PromQL: "sum(rate(tidb_session_restricted_sql_total[$RANGE_DURATION])) by (instance)", + Labels: []string{"instance"}, Comment: "TiDB internal SQL is used by TiDB itself.", }, "tidb_process_mem_usage": { @@ -656,7 +657,7 @@ var MetricTableMap = map[string]MetricTableDef{ }, "pd_scheduler_store_status": { PromQL: `pd_scheduler_store_status{$LABEL_CONDITIONS}`, - Labels: []string{"address", "instance", "store", "type"}, + Labels: []string{"instance", "address", "store", "type"}, }, "store_available_ratio": { PromQL: `sum(pd_scheduler_store_status{type="store_available"}) by (address, store) / sum(pd_scheduler_store_status{type="store_capacity"}) by (address, store)`, @@ -678,7 +679,7 @@ var MetricTableMap = map[string]MetricTableDef{ }, "pd_hotspot_status": { PromQL: `pd_hotspot_status{$LABEL_CONDITIONS}`, - Labels: []string{"address", "instance", "store", "type"}, + Labels: []string{"instance", "address", "store", "type"}, }, "pd_scheduler_status": { PromQL: `pd_scheduler_status{$LABEL_CONDITIONS}`, @@ -719,12 +720,12 @@ var MetricTableMap = map[string]MetricTableDef{ "pd_grpc_completed_commands_rate": { PromQL: `sum(rate(grpc_server_handling_seconds_count{$LABEL_CONDITIONS}[$RANGE_DURATION])) by (grpc_method,instance)`, - Labels: []string{"grpc_method", "instance"}, + Labels: []string{"instance", "grpc_method"}, Comment: "The rate of completing each kind of gRPC commands", }, "pd_grpc_completed_commands_duration": { PromQL: `histogram_quantile($QUANTILE, sum(rate(grpc_server_handling_seconds_bucket{$LABEL_CONDITIONS}[$RANGE_DURATION])) by (le,grpc_method,instance))`, - Labels: []string{"grpc_method", "instance"}, + Labels: []string{"instance", "grpc_method"}, Quantile: 0.99, Comment: "The quantile time consumed of completing each kind of gRPC commands", }, @@ -747,7 +748,7 @@ var MetricTableMap = map[string]MetricTableDef{ }, "pd_peer_round_trip_time_seconds": { PromQL: `histogram_quantile($QUANTILE, sum(rate(etcd_network_peer_round_trip_time_seconds_bucket{$LABEL_CONDITIONS}[$RANGE_DURATION])) by (le,instance,To))`, - Labels: []string{"To", "instance"}, + Labels: []string{"instance", "To"}, Quantile: 0.99, Comment: "The quantile latency of the network in .99", }, @@ -778,7 +779,7 @@ var MetricTableMap = map[string]MetricTableDef{ }, "pd_scheduler_region_heartbeat": { PromQL: `sum(rate(pd_scheduler_region_heartbeat{$LABEL_CONDITIONS}[$RANGE_DURATION])*60) by (address,instance, store, status,type)`, - Labels: []string{"address", "instance", "status", "store", "type"}, + Labels: []string{"instance", "address", "status", "store", "type"}, }, "pd_region_syncer_status": { PromQL: `pd_region_syncer_status{$LABEL_CONDITIONS}`, @@ -806,7 +807,7 @@ var MetricTableMap = map[string]MetricTableDef{ }, "tikv_io_utilization": { PromQL: `rate(node_disk_io_time_seconds_total{$LABEL_CONDITIONS}[$RANGE_DURATION])`, - Labels: []string{"device", "instance"}, + Labels: []string{"instance", "device"}, Comment: "The I/O utilization per TiKV instance", }, "tikv_flow_mbps": { @@ -856,7 +857,7 @@ var MetricTableMap = map[string]MetricTableDef{ }, "tikv_engine_write_stall": { PromQL: `avg(tikv_engine_write_stall{type="write_stall_percentile99"}) by (instance, db)`, - Labels: []string{"db", "instance"}, + Labels: []string{"instance", "db"}, Comment: "Indicates occurrences of Write Stall events that make the TiKV instance unavailable temporarily", }, "tikv_server_report_failures": { @@ -986,7 +987,7 @@ var MetricTableMap = map[string]MetricTableDef{ }, "tikv_pd_requests": { PromQL: `sum(rate(tikv_pd_request_duration_seconds_count{$LABEL_CONDITIONS}[$RANGE_DURATION])) by (type,instance)`, - Labels: []string{"type", "instance"}, + Labels: []string{"instance", "type"}, Comment: "The count of requests that TiKV sends to PD", }, "tikv_pd_request_avg_duration": { @@ -1552,7 +1553,7 @@ var MetricTableMap = map[string]MetricTableDef{ }, "tikv_memtable_size": { PromQL: `avg(tikv_engine_memory_bytes{$LABEL_CONDITIONS}) by (type,instance,db,cf)`, - Labels: []string{"cf", "instance", "type", "db"}, + Labels: []string{"instance", "cf", "type", "db"}, Comment: "The memtable size of each column family", }, "tikv_memtable_hit": { @@ -1562,7 +1563,7 @@ var MetricTableMap = map[string]MetricTableDef{ }, "tikv_block_cache_size": { PromQL: `topk(20, avg(tikv_engine_block_cache_size_bytes{$LABEL_CONDITIONS}) by(cf, instance, db))`, - Labels: []string{"cf", "instance", "db"}, + Labels: []string{"instance", "cf", "db"}, Comment: "The block cache size. Broken down by column family if shared block cache is disabled.", }, "tikv_block_all_cache_hit": { @@ -1597,7 +1598,7 @@ var MetricTableMap = map[string]MetricTableDef{ }, "tikv_total_keys": { PromQL: `sum(tikv_engine_estimate_num_keys{$LABEL_CONDITIONS}) by (db,cf,instance)`, - Labels: []string{"db", "cf", "instance"}, + Labels: []string{"instance", "db", "cf"}, Comment: "The count of keys in each column family", }, "tikv_per_read_max_bytes": { @@ -1627,7 +1628,7 @@ var MetricTableMap = map[string]MetricTableDef{ }, "tikv_compaction_pending_bytes": { PromQL: `sum(rate(tikv_engine_pending_compaction_bytes{$LABEL_CONDITIONS}[$RANGE_DURATION])) by (cf,instance,db)`, - Labels: []string{"cf", "instance", "db"}, + Labels: []string{"instance", "cf", "db"}, Comment: "The pending bytes to be compacted", }, "tikv_read_amplication": { @@ -1652,23 +1653,23 @@ var MetricTableMap = map[string]MetricTableDef{ }, "tikv_number_files_at_each_level": { PromQL: `avg(tikv_engine_num_files_at_level{$LABEL_CONDITIONS}) by (cf, level,db,instance)`, - Labels: []string{"cf", "instance", "level", "db"}, + Labels: []string{"instance", "cf", "level", "db"}, Comment: "The number of SST files for different column families in each level", }, - "tikv_ingest_sst_duration_seconds": { + "tikv_ingest_sst_duration": { PromQL: `histogram_quantile($QUANTILE, sum(rate(tikv_snapshot_ingest_sst_duration_seconds_bucket{$LABEL_CONDITIONS}[$RANGE_DURATION])) by (le,instance,db))`, Labels: []string{"instance", "db"}, Quantile: 0.99, Comment: "The quantile of time consumed when ingesting SST files", }, - "tikv_ingest_sst_avg_duration_seconds": { + "tikv_ingest_sst_avg_duration": { PromQL: `sum(rate(tikv_snapshot_ingest_sst_duration_seconds_sum{$LABEL_CONDITIONS}[$RANGE_DURATION])) / sum(rate(tikv_snapshot_ingest_sst_duration_seconds_count{$LABEL_CONDITIONS}[$RANGE_DURATION]))`, Labels: []string{"instance"}, Comment: "The average time consumed when ingesting SST files", }, "tikv_stall_conditions_changed_of_each_cf": { PromQL: `tikv_engine_stall_conditions_changed{$LABEL_CONDITIONS}`, - Labels: []string{"cf", "instance", "type", "db"}, + Labels: []string{"instance", "cf", "type", "db"}, Comment: "Stall conditions changed of each column family", }, "tikv_write_stall_reason": { @@ -1677,7 +1678,7 @@ var MetricTableMap = map[string]MetricTableDef{ }, "tikv_compaction_reason": { PromQL: `sum(rate(tikv_engine_compaction_reason{$LABEL_CONDITIONS}[$RANGE_DURATION])) by (db,cf,reason,instance)`, - Labels: []string{"cf", "instance", "reason", "db"}, + Labels: []string{"instance", "cf", "reason", "db"}, }, "tikv_engine_blob_key_max_size": { PromQL: `max(tikv_engine_blob_key_size{$LABEL_CONDITIONS}) by (db,instance,type)`, @@ -1796,7 +1797,7 @@ var MetricTableMap = map[string]MetricTableDef{ }, "tikv_backup_range_size": { PromQL: `histogram_quantile($QUANTILE, sum(rate(tikv_backup_range_size_bytes_bucket{$LABEL_CONDITIONS}[$RANGE_DURATION])) by (le,cf,instance))`, - Labels: []string{"cf", "instance"}, + Labels: []string{"instance", "cf"}, Quantile: 0.99, }, "tikv_backup_duration": { @@ -1815,11 +1816,11 @@ var MetricTableMap = map[string]MetricTableDef{ }, "tikv_disk_read_bytes": { PromQL: `sum(irate(node_disk_read_bytes_total{$LABEL_CONDITIONS}[$RANGE_DURATION])) by (instance,device)`, - Labels: []string{"device", "instance"}, + Labels: []string{"instance", "device"}, }, "tikv_disk_write_bytes": { PromQL: `sum(irate(node_disk_written_bytes_total{$LABEL_CONDITIONS}[$RANGE_DURATION])) by (instance,device)`, - Labels: []string{"device", "instance"}, + Labels: []string{"instance", "device"}, }, "tikv_backup_range_duration": { PromQL: `histogram_quantile($QUANTILE, sum(rate(tikv_backup_range_duration_seconds_bucket{$LABEL_CONDITIONS}[$RANGE_DURATION])) by (le,type,instance))`, @@ -1832,7 +1833,7 @@ var MetricTableMap = map[string]MetricTableDef{ }, "tikv_backup_errors": { PromQL: `rate(tikv_backup_error_counter{$LABEL_CONDITIONS}[$RANGE_DURATION])`, - Labels: []string{"error", "instance"}, + Labels: []string{"instance", "error"}, }, "node_virtual_cpus": { PromQL: `count(node_cpu_seconds_total{mode="user"}) by (instance)`, @@ -1943,30 +1944,30 @@ var MetricTableMap = map[string]MetricTableDef{ }, "node_disk_io_util": { PromQL: `rate(node_disk_io_time_seconds_total{$LABEL_CONDITIONS}[$RANGE_DURATION]) or irate(node_disk_io_time_seconds_total{$LABEL_CONDITIONS}[$RANGE_DURATION])`, - Labels: []string{"device", "instance"}, + Labels: []string{"instance", "device"}, }, "node_disk_iops": { PromQL: `sum(rate(node_disk_reads_completed_total{$LABEL_CONDITIONS}[$RANGE_DURATION]) + rate(node_disk_writes_completed_total{$LABEL_CONDITIONS}[$RANGE_DURATION])) by (instance,device)`, - Labels: []string{"device", "instance"}, + Labels: []string{"instance", "device"}, }, "node_disk_write_latency": { PromQL: `(rate(node_disk_write_time_seconds_total{$LABEL_CONDITIONS}[$RANGE_DURATION])/ rate(node_disk_writes_completed_total{$LABEL_CONDITIONS}[$RANGE_DURATION]))`, - Labels: []string{"device", "instance"}, + Labels: []string{"instance", "device"}, Comment: "node disk write latency(ms)", }, "node_disk_read_latency": { PromQL: `(rate(node_disk_read_time_seconds_total{$LABEL_CONDITIONS}[$RANGE_DURATION])/ rate(node_disk_reads_completed_total{$LABEL_CONDITIONS}[$RANGE_DURATION]))`, - Labels: []string{"device", "instance"}, + Labels: []string{"instance", "device"}, Comment: "node disk read latency(ms)", }, "node_disk_throughput": { PromQL: `irate(node_disk_read_bytes_total{$LABEL_CONDITIONS}[$RANGE_DURATION]) + irate(node_disk_written_bytes_total{$LABEL_CONDITIONS}[$RANGE_DURATION])`, - Labels: []string{"device", "instance"}, + Labels: []string{"instance", "device"}, Comment: "Units is byte", }, "node_filesystem_space_used": { PromQL: `((node_filesystem_size_bytes{$LABEL_CONDITIONS} - node_filesystem_avail_bytes{$LABEL_CONDITIONS}) / node_filesystem_size_bytes{$LABEL_CONDITIONS}) * 100`, - Labels: []string{"device", "instance"}, + Labels: []string{"instance", "device"}, Comment: "Filesystem used space. If is > 80% then is Critical.", }, "node_file_descriptor_allocated": { @@ -1975,48 +1976,48 @@ var MetricTableMap = map[string]MetricTableDef{ }, "node_network_in_drops": { PromQL: `rate(node_network_receive_drop_total{$LABEL_CONDITIONS}[$RANGE_DURATION]) `, - Labels: []string{"device", "instance"}, + Labels: []string{"instance", "device"}, }, "node_network_out_drops": { PromQL: `rate(node_network_transmit_drop_total{$LABEL_CONDITIONS}[$RANGE_DURATION])`, - Labels: []string{"device", "instance"}, + Labels: []string{"instance", "device"}, }, "node_network_in_errors": { PromQL: `rate(node_network_receive_errs_total{$LABEL_CONDITIONS}[$RANGE_DURATION])`, - Labels: []string{"device", "instance"}, + Labels: []string{"instance", "device"}, }, "node_network_out_errors": { PromQL: `rate(node_network_transmit_errs_total{$LABEL_CONDITIONS}[$RANGE_DURATION])`, - Labels: []string{"device", "instance"}, + Labels: []string{"instance", "device"}, }, "node_network_in_traffic": { PromQL: `rate(node_network_receive_bytes_total{$LABEL_CONDITIONS}[$RANGE_DURATION]) or irate(node_network_receive_bytes_total{$LABEL_CONDITIONS}[$RANGE_DURATION])`, - Labels: []string{"device", "instance"}, + Labels: []string{"instance", "device"}, }, "node_network_out_traffic": { PromQL: `rate(node_network_transmit_bytes_total{$LABEL_CONDITIONS}[$RANGE_DURATION]) or irate(node_network_transmit_bytes_total{$LABEL_CONDITIONS}[$RANGE_DURATION])`, - Labels: []string{"device", "instance"}, + Labels: []string{"instance", "device"}, }, "node_network_in_packets": { PromQL: `rate(node_network_receive_packets_total{$LABEL_CONDITIONS}[$RANGE_DURATION]) or irate(node_network_receive_packets_total{$LABEL_CONDITIONS}[$RANGE_DURATION])`, - Labels: []string{"device", "instance"}, + Labels: []string{"instance", "device"}, }, "node_network_out_packets": { PromQL: `rate(node_network_transmit_packets_total{$LABEL_CONDITIONS}[$RANGE_DURATION]) or irate(node_network_transmit_packets_total{$LABEL_CONDITIONS}[$RANGE_DURATION])`, - Labels: []string{"device", "instance"}, + Labels: []string{"instance", "device"}, }, "node_network_interface_speed": { PromQL: `node_network_transmit_queue_length{$LABEL_CONDITIONS}`, - Labels: []string{"device", "instance"}, + Labels: []string{"instance", "device"}, Comment: "node_network_transmit_queue_length = transmit_queue_length value of /sys/class/net/.", }, "node_network_utilization_in_hourly": { PromQL: `sum(increase(node_network_receive_bytes_total{$LABEL_CONDITIONS}[1h]))`, - Labels: []string{"device", "instance"}, + Labels: []string{"instance", "device"}, }, "node_network_utilization_out_hourly": { PromQL: `sum(increase(node_network_transmit_bytes_total{$LABEL_CONDITIONS}[1h]))`, - Labels: []string{"device", "instance"}, + Labels: []string{"instance", "device"}, }, "node_tcp_in_use": { PromQL: `node_sockstat_TCP_inuse{$LABEL_CONDITIONS}`, diff --git a/infoschema/tables.go b/infoschema/tables.go index aba2432e66b2f..6b471c87d9558 100755 --- a/infoschema/tables.go +++ b/infoschema/tables.go @@ -115,6 +115,8 @@ const ( TableMetricSummary = "METRICS_SUMMARY" // TableMetricSummaryByLabel is a metric table that contains all metrics that group by label info. TableMetricSummaryByLabel = "METRICS_SUMMARY_BY_LABEL" + // TableInspectionSummary is the string constant of inspection summary table + TableInspectionSummary = "INSPECTION_SUMMARY" ) var tableIDMap = map[string]int64{ @@ -172,6 +174,7 @@ var tableIDMap = map[string]int64{ TableMetricSummary: autoid.InformationSchemaDBID + 52, TableMetricSummaryByLabel: autoid.InformationSchemaDBID + 53, TableMetricTables: autoid.InformationSchemaDBID + 54, + TableInspectionSummary: autoid.InformationSchemaDBID + 55, } type columnInfo struct { @@ -1140,6 +1143,17 @@ var tableInspectionResultCols = []columnInfo{ {"DETAILS", mysql.TypeVarchar, 256, 0, nil, nil}, } +var tableInspectionSummaryCols = []columnInfo{ + {"RULE", mysql.TypeVarchar, 64, 0, nil, nil}, + {"INSTANCE", mysql.TypeVarchar, 64, 0, nil, nil}, + {"METRIC_NAME", mysql.TypeVarchar, 64, 0, nil, nil}, + {"LABEL", mysql.TypeVarchar, 64, 0, nil, nil}, + {"QUANTILE", mysql.TypeDouble, 22, 0, nil, nil}, + {"AVG_VALUE", mysql.TypeDouble, 22, 0, nil, nil}, + {"MIN_VALUE", mysql.TypeDouble, 22, 0, nil, nil}, + {"MAX_VALUE", mysql.TypeDouble, 22, 0, nil, nil}, +} + var tableMetricTablesCols = []columnInfo{ {"TABLE_NAME", mysql.TypeVarchar, 64, 0, nil, nil}, {"PROMQL", mysql.TypeVarchar, 64, 0, nil, nil}, @@ -2363,6 +2377,7 @@ var tableNameToColumns = map[string][]columnInfo{ TableMetricSummary: tableMetricSummaryCols, TableMetricSummaryByLabel: tableMetricSummaryByLabelCols, TableMetricTables: tableMetricTablesCols, + TableInspectionSummary: tableInspectionSummaryCols, } func createInfoSchemaTable(_ autoid.Allocators, meta *model.TableInfo) (table.Table, error) { diff --git a/planner/core/logical_plan_builder.go b/planner/core/logical_plan_builder.go index 0e15289700aec..3e86a8ef64143 100644 --- a/planner/core/logical_plan_builder.go +++ b/planner/core/logical_plan_builder.go @@ -2793,6 +2793,8 @@ func (b *PlanBuilder) buildMemTable(ctx context.Context, dbName model.CIStr, tab p.Extractor = &ClusterLogTableExtractor{} case infoschema.TableInspectionResult: p.Extractor = &InspectionResultTableExtractor{} + case infoschema.TableInspectionSummary: + p.Extractor = &InspectionSummaryTableExtractor{} case infoschema.TableMetricSummary, infoschema.TableMetricSummaryByLabel: p.Extractor = newMetricTableExtractor() } diff --git a/planner/core/memtable_predicate_extractor.go b/planner/core/memtable_predicate_extractor.go index 3d2828546e8c0..b27d09204f257 100644 --- a/planner/core/memtable_predicate_extractor.go +++ b/planner/core/memtable_predicate_extractor.go @@ -368,6 +368,20 @@ func (helper extractHelper) extractTimeRange( return } +func (helper extractHelper) parseQuantiles(quantileSet set.StringSet) []float64 { + quantiles := make([]float64, 0, len(quantileSet)) + for k := range quantileSet { + v, err := strconv.ParseFloat(k, 64) + if err != nil { + // ignore the parse error won't affect result. + continue + } + quantiles = append(quantiles, v) + } + sort.Float64s(quantiles) + return quantiles +} + // ClusterTableExtractor is used to extract some predicates of cluster table. type ClusterTableExtractor struct { extractHelper @@ -559,20 +573,6 @@ func (e *MetricTableExtractor) getTimeRange(start, end int64) (time.Time, time.T return startTime, endTime } -func (e *MetricTableExtractor) parseQuantiles(quantileSet set.StringSet) []float64 { - quantiles := make([]float64, 0, len(quantileSet)) - for k := range quantileSet { - v, err := strconv.ParseFloat(k, 64) - if err != nil { - // ignore the parse error won't affect result. - continue - } - quantiles = append(quantiles, v) - } - sort.Float64s(quantiles) - return quantiles -} - func (e *MetricTableExtractor) convertToTime(t int64) time.Time { if t == 0 || t == math.MaxInt64 { return time.Now() @@ -595,12 +595,12 @@ type InspectionResultTableExtractor struct { // Extract implements the MemTablePredicateExtractor Extract interface func (e *InspectionResultTableExtractor) Extract( - ctx sessionctx.Context, + _ sessionctx.Context, schema *expression.Schema, names []*types.FieldName, predicates []expression.Expression, ) (remained []expression.Expression) { - // Extract the `type/instance` columns + // Extract the `rule/item` columns remained, ruleSkip, rules := e.extractCol(schema, names, predicates, "rule", true) remained, itemSkip, items := e.extractCol(schema, names, remained, "item", true) e.SkipInspection = ruleSkip || itemSkip @@ -608,3 +608,35 @@ func (e *InspectionResultTableExtractor) Extract( e.Items = items return remained } + +// InspectionSummaryTableExtractor is used to extract some predicates of `inspection_summary` +type InspectionSummaryTableExtractor struct { + extractHelper + // SkipInspection means the where clause always false, we don't need to request any component + SkipInspection bool + // Rules represents rules applied to, and we should apply all inspection rules if there is no rules specified + // e.g: SELECT * FROM inspection_summary WHERE rule in ('ddl', 'config') + Rules set.StringSet + MetricNames set.StringSet + Quantiles []float64 +} + +// Extract implements the MemTablePredicateExtractor Extract interface +func (e *InspectionSummaryTableExtractor) Extract( + _ sessionctx.Context, + schema *expression.Schema, + names []*types.FieldName, + predicates []expression.Expression, +) (remained []expression.Expression) { + // Extract the `rule` columns + remained, ruleSkip, rules := e.extractCol(schema, names, predicates, "rule", true) + // Extract the `metric_name` columns + remained, metricNameSkip, metricNames := e.extractCol(schema, names, predicates, "metric_name", true) + // Extract the `quantile` columns + remained, quantileSkip, quantileSet := e.extractCol(schema, names, predicates, "quantile", false) + e.SkipInspection = ruleSkip || quantileSkip || metricNameSkip + e.Rules = rules + e.Quantiles = e.parseQuantiles(quantileSet) + e.MetricNames = metricNames + return remained +} diff --git a/planner/core/memtable_predicate_extractor_test.go b/planner/core/memtable_predicate_extractor_test.go index 8d7b6d4e03a13..3caf277ba7e51 100644 --- a/planner/core/memtable_predicate_extractor_test.go +++ b/planner/core/memtable_predicate_extractor_test.go @@ -740,10 +740,7 @@ func (s *extractorSuite) TestInspectionResultTableExtractor(c *C) { sql: "select * from information_schema.inspection_result where rule='ddl' and rule in ('slow_query', 'ddl')", rules: set.NewStringSet("ddl"), }, - { - sql: "select * from information_schema.inspection_result where rule='ddl' and rule in ('slow_query', 'config')", - skipInspection: true, - }, + { sql: "select * from information_schema.inspection_result where rule in ('ddl', 'config') and rule in ('slow_query', 'config')", rules: set.NewStringSet("config"), @@ -797,3 +794,102 @@ func (s *extractorSuite) TestInspectionResultTableExtractor(c *C) { c.Assert(clusterConfigExtractor.SkipInspection, Equals, ca.skipInspection, Commentf("SQL: %v", ca.sql)) } } + +func (s *extractorSuite) TestInspectionSummaryTableExtractor(c *C) { + se, err := session.CreateSession4Test(s.store) + c.Assert(err, IsNil) + + var cases = []struct { + sql string + rules set.StringSet + names set.StringSet + quantiles set.Float64Set + skipInspection bool + }{ + { + sql: "select * from information_schema.inspection_summary", + }, + { + sql: "select * from information_schema.inspection_summary where rule='ddl'", + rules: set.NewStringSet("ddl"), + }, + { + sql: "select * from information_schema.inspection_summary where 'ddl'=rule or rule='config'", + rules: set.NewStringSet("ddl", "config"), + }, + { + sql: "select * from information_schema.inspection_summary where 'ddl'=rule or rule='config' or rule='slow_query'", + rules: set.NewStringSet("ddl", "config", "slow_query"), + }, + { + sql: "select * from information_schema.inspection_summary where (rule='config' or rule='slow_query') and (metric_name='metric_name3' or metric_name='metric_name1')", + rules: set.NewStringSet("config", "slow_query"), + names: set.NewStringSet("metric_name3", "metric_name1"), + }, + { + sql: "select * from information_schema.inspection_summary where rule in ('ddl', 'slow_query')", + rules: set.NewStringSet("ddl", "slow_query"), + }, + { + sql: "select * from information_schema.inspection_summary where rule in ('ddl', 'slow_query') and metric_name='metric_name1'", + rules: set.NewStringSet("ddl", "slow_query"), + names: set.NewStringSet("metric_name1"), + }, + { + sql: "select * from information_schema.inspection_summary where rule in ('ddl', 'slow_query') and metric_name in ('metric_name1', 'metric_name2')", + rules: set.NewStringSet("ddl", "slow_query"), + names: set.NewStringSet("metric_name1", "metric_name2"), + }, + { + sql: "select * from information_schema.inspection_summary where rule='ddl' and metric_name in ('metric_name1', 'metric_name2')", + rules: set.NewStringSet("ddl"), + names: set.NewStringSet("metric_name1", "metric_name2"), + }, + { + sql: "select * from information_schema.inspection_summary where rule='ddl' and metric_name='metric_NAME3'", + rules: set.NewStringSet("ddl"), + names: set.NewStringSet("metric_name3"), + }, + { + sql: "select * from information_schema.inspection_summary where rule='ddl' and rule in ('slow_query', 'ddl')", + rules: set.NewStringSet("ddl"), + }, + { + sql: "select * from information_schema.inspection_summary where rule in ('ddl', 'config') and rule in ('slow_query', 'config')", + rules: set.NewStringSet("config"), + }, + { + sql: `select * from information_schema.inspection_summary + where metric_name in ('metric_name1', 'metric_name4') + and metric_name in ('metric_name5', 'metric_name4') + and rule in ('ddl', 'config') + and rule in ('slow_query', 'config') + and quantile in (0.80, 0.90)`, + rules: set.NewStringSet("config"), + names: set.NewStringSet("metric_name4"), + quantiles: set.NewFloat64Set(0.80, 0.90), + }, + { + sql: `select * from information_schema.inspection_summary + where metric_name in ('metric_name1', 'metric_name4') + and metric_name in ('metric_name5', 'metric_name4') + and metric_name in ('metric_name5', 'metric_name1') + and metric_name in ('metric_name1', 'metric_name3')`, + skipInspection: true, + }, + } + parser := parser.New() + for _, ca := range cases { + logicalMemTable := s.getLogicalMemTable(c, se, parser, ca.sql) + c.Assert(logicalMemTable.Extractor, NotNil) + + clusterConfigExtractor := logicalMemTable.Extractor.(*plannercore.InspectionSummaryTableExtractor) + if len(ca.rules) > 0 { + c.Assert(clusterConfigExtractor.Rules, DeepEquals, ca.rules, Commentf("SQL: %v", ca.sql)) + } + if len(ca.names) > 0 { + c.Assert(clusterConfigExtractor.MetricNames, DeepEquals, ca.names, Commentf("SQL: %v", ca.sql)) + } + c.Assert(clusterConfigExtractor.SkipInspection, Equals, ca.skipInspection, Commentf("SQL: %v", ca.sql)) + } +} diff --git a/util/set/float64_set.go b/util/set/float64_set.go index e4711ccd76612..296451ff18426 100644 --- a/util/set/float64_set.go +++ b/util/set/float64_set.go @@ -17,8 +17,12 @@ package set type Float64Set map[float64]struct{} // NewFloat64Set builds a float64 set. -func NewFloat64Set() Float64Set { - return make(map[float64]struct{}) +func NewFloat64Set(fs ...float64) Float64Set { + x := make(map[float64]struct{}) + for _, f := range fs { + x[f] = struct{}{} + } + return x } // Exist checks whether `val` exists in `s`. From 6b7808e5fadb30d3d256ca410c53e1472c6bfa37 Mon Sep 17 00:00:00 2001 From: Lonng Date: Tue, 18 Feb 2020 12:49:14 +0800 Subject: [PATCH 2/4] disable inspection test case parallel Signed-off-by: Lonng --- executor/inspection_result_test.go | 2 +- executor/inspection_summary_test.go | 2 +- infoschema/metric_table_def.go | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/executor/inspection_result_test.go b/executor/inspection_result_test.go index 167368081871b..7db6ea045a89c 100644 --- a/executor/inspection_result_test.go +++ b/executor/inspection_result_test.go @@ -27,7 +27,7 @@ import ( "github.com/pingcap/tidb/util/testkit" ) -var _ = Suite(&inspectionResultSuite{}) +var _ = SerialSuites(&inspectionResultSuite{}) type inspectionResultSuite struct { store kv.Storage diff --git a/executor/inspection_summary_test.go b/executor/inspection_summary_test.go index d93345630aae9..f35fbd3a24f9d 100644 --- a/executor/inspection_summary_test.go +++ b/executor/inspection_summary_test.go @@ -27,7 +27,7 @@ import ( "github.com/pingcap/tidb/util/testkit" ) -var _ = Suite(&inspectionSummarySuite{}) +var _ = SerialSuites(&inspectionSummarySuite{}) type inspectionSummarySuite struct { store kv.Storage diff --git a/infoschema/metric_table_def.go b/infoschema/metric_table_def.go index 68c908e6dac5c..189c943fd1ed1 100644 --- a/infoschema/metric_table_def.go +++ b/infoschema/metric_table_def.go @@ -56,7 +56,7 @@ var MetricTableMap = map[string]MetricTableDef{ Comment: "The quantile of TiDB slow query statistics with slow query total cop wait time(second)", }, "tidb_ops_internal": { - PromQL: "sum(rate(tidb_session_restricted_sql_total[$RANGE_DURATION])) by (instance)", + PromQL: "sum(rate(tidb_session_restricted_sql_total{$LABEL_CONDITIONS}[$RANGE_DURATION])) by (instance)", Labels: []string{"instance"}, Comment: "TiDB internal SQL is used by TiDB itself.", }, From 264ca09fe459e80efdb1d23ed5973fa36414bd74 Mon Sep 17 00:00:00 2001 From: Lonng Date: Tue, 18 Feb 2020 20:07:12 +0800 Subject: [PATCH 3/4] address comment Signed-off-by: Lonng --- executor/inspection_summary.go | 3 +-- executor/inspection_summary_test.go | 5 +++++ planner/core/memtable_predicate_extractor_test.go | 1 - util/set/float64_set.go | 2 +- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/executor/inspection_summary.go b/executor/inspection_summary.go index efbcecb1ab48d..d4f4927ef33ad 100644 --- a/executor/inspection_summary.go +++ b/executor/inspection_summary.go @@ -486,7 +486,7 @@ func (e *inspectionSummaryRetriever) retrieve(ctx context.Context, sctx sessionc for i := range def.Labels[nonInstanceLabelIndex:] { labels = append(labels, row.GetString(skipCols+nonInstanceLabelIndex+i)) // skip min/max/avg/instance } - quantile := 0.0 + var quantile float64 if def.Quantile > 0 { quantile = row.GetFloat64(row.Len() - 1) // quantile will be the last column } @@ -499,7 +499,6 @@ func (e *inspectionSummaryRetriever) retrieve(ctx context.Context, sctx sessionc row.GetFloat64(0), // avg row.GetFloat64(1), // min row.GetFloat64(2), // max - )) } } diff --git a/executor/inspection_summary_test.go b/executor/inspection_summary_test.go index f35fbd3a24f9d..062536c170ec7 100644 --- a/executor/inspection_summary_test.go +++ b/executor/inspection_summary_test.go @@ -21,6 +21,7 @@ import ( "github.com/pingcap/parser/mysql" "github.com/pingcap/tidb/domain" "github.com/pingcap/tidb/executor" + "github.com/pingcap/tidb/infoschema" "github.com/pingcap/tidb/kv" "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/util/set" @@ -51,6 +52,10 @@ func (s *inspectionSummarySuite) TestValidInspectionSummaryRules(c *C) { tables := set.StringSet{} for _, t := range tbls { c.Assert(tables.Exist(t), IsFalse, Commentf("duplicate table name: %v in rule: %v", t, rule)) + tables.Insert(t) + + _, found := infoschema.MetricTableMap[t] + c.Assert(found, IsTrue, Commentf("metric table %v not define", t)) } } } diff --git a/planner/core/memtable_predicate_extractor_test.go b/planner/core/memtable_predicate_extractor_test.go index 3caf277ba7e51..3f4bebb4cf577 100644 --- a/planner/core/memtable_predicate_extractor_test.go +++ b/planner/core/memtable_predicate_extractor_test.go @@ -740,7 +740,6 @@ func (s *extractorSuite) TestInspectionResultTableExtractor(c *C) { sql: "select * from information_schema.inspection_result where rule='ddl' and rule in ('slow_query', 'ddl')", rules: set.NewStringSet("ddl"), }, - { sql: "select * from information_schema.inspection_result where rule in ('ddl', 'config') and rule in ('slow_query', 'config')", rules: set.NewStringSet("config"), diff --git a/util/set/float64_set.go b/util/set/float64_set.go index 296451ff18426..efbb61d69b0f9 100644 --- a/util/set/float64_set.go +++ b/util/set/float64_set.go @@ -18,7 +18,7 @@ type Float64Set map[float64]struct{} // NewFloat64Set builds a float64 set. func NewFloat64Set(fs ...float64) Float64Set { - x := make(map[float64]struct{}) + x := make(map[float64]struct{}, len(fs)) for _, f := range fs { x[f] = struct{}{} } From 021d65d2d41b1a258ae113a41776c4effcbc9254 Mon Sep 17 00:00:00 2001 From: Lonng Date: Tue, 18 Feb 2020 20:13:05 +0800 Subject: [PATCH 4/4] address comment Signed-off-by: Lonng --- executor/inspection_summary.go | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/executor/inspection_summary.go b/executor/inspection_summary.go index d4f4927ef33ad..50db38d860794 100644 --- a/executor/inspection_summary.go +++ b/executor/inspection_summary.go @@ -483,8 +483,13 @@ func (e *inspectionSummaryRetriever) retrieve(ctx context.Context, sctx sessionc instance = row.GetString(skipCols) // skip min/max/avg } var labels []string - for i := range def.Labels[nonInstanceLabelIndex:] { - labels = append(labels, row.GetString(skipCols+nonInstanceLabelIndex+i)) // skip min/max/avg/instance + for i, label := range def.Labels[nonInstanceLabelIndex:] { + // skip min/max/avg/instance + val := row.GetString(skipCols + nonInstanceLabelIndex + i) + if label == "store" || label == "store_id" { + val = fmt.Sprintf("store_id:%s", val) + } + labels = append(labels, val) } var quantile float64 if def.Quantile > 0 {