From baeecc76f338b20b50d69fcd3253ea0a920f30b6 Mon Sep 17 00:00:00 2001 From: Hitanshu Mehta Date: Tue, 6 Jul 2021 00:58:19 +0530 Subject: [PATCH 01/11] Create endpoint flow Signed-off-by: Hitanshu Mehta --- pkg/component/component.go | 19 + pkg/info/infopb/rpc.pb.go | 1570 ++++++++++++++++++++++++++++++++++++ pkg/info/infopb/rpc.proto | 2 +- pkg/query/endpointset.go | 686 ++++++++++++++++ pkg/query/storeset.go | 17 - 5 files changed, 2276 insertions(+), 18 deletions(-) create mode 100644 pkg/info/infopb/rpc.pb.go create mode 100644 pkg/query/endpointset.go diff --git a/pkg/component/component.go b/pkg/component/component.go index 91ec2f9ec5..71fcd7b77e 100644 --- a/pkg/component/component.go +++ b/pkg/component/component.go @@ -88,6 +88,25 @@ func FromProto(storeType storepb.StoreType) StoreAPI { } } +func FromString(storeType string) StoreAPI { + switch storeType { + case "query": + return Query + case "rule": + return Rule + case "sidecar": + return Sidecar + case "store": + return Store + case "receive": + return Receive + case "debug": + return Debug + default: + return UnknownStoreAPI + } +} + var ( Bucket = source{component: component{name: "bucket"}} Cleanup = source{component: component{name: "cleanup"}} diff --git a/pkg/info/infopb/rpc.pb.go b/pkg/info/infopb/rpc.pb.go new file mode 100644 index 0000000000..452ef93b22 --- /dev/null +++ b/pkg/info/infopb/rpc.pb.go @@ -0,0 +1,1570 @@ +// Code generated by protoc-gen-gogo. DO NOT EDIT. +// source: info/infopb/rpc.proto + +package infopb + +import ( + context "context" + fmt "fmt" + io "io" + math "math" + math_bits "math/bits" + + _ "github.com/gogo/protobuf/gogoproto" + proto "github.com/gogo/protobuf/proto" + labelpb "github.com/thanos-io/thanos/pkg/store/labelpb" + grpc "google.golang.org/grpc" + codes "google.golang.org/grpc/codes" + status "google.golang.org/grpc/status" +) + +// Reference imports to suppress errors if they are not otherwise used. +var _ = proto.Marshal +var _ = fmt.Errorf +var _ = math.Inf + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the proto package it is being compiled against. +// A compilation error at this line likely means your copy of the +// proto package needs to be updated. +const _ = proto.GoGoProtoPackageIsVersion3 // please upgrade the proto package + +type InfoRequest struct { +} + +func (m *InfoRequest) Reset() { *m = InfoRequest{} } +func (m *InfoRequest) String() string { return proto.CompactTextString(m) } +func (*InfoRequest) ProtoMessage() {} +func (*InfoRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_a1214ec45d2bf952, []int{0} +} +func (m *InfoRequest) XXX_Unmarshal(b []byte) error { + return m.Unmarshal(b) +} +func (m *InfoRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + if deterministic { + return xxx_messageInfo_InfoRequest.Marshal(b, m, deterministic) + } else { + b = b[:cap(b)] + n, err := m.MarshalToSizedBuffer(b) + if err != nil { + return nil, err + } + return b[:n], nil + } +} +func (m *InfoRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_InfoRequest.Merge(m, src) +} +func (m *InfoRequest) XXX_Size() int { + return m.Size() +} +func (m *InfoRequest) XXX_DiscardUnknown() { + xxx_messageInfo_InfoRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_InfoRequest proto.InternalMessageInfo + +type InfoResponse struct { + LabelSets []labelpb.ZLabelSet `protobuf:"bytes,1,rep,name=label_sets,json=labelSets,proto3" json:"label_sets"` + ComponentType string `protobuf:"bytes,2,opt,name=ComponentType,proto3" json:"ComponentType,omitempty"` + /// StoreInfo holds the metadata related to Store API if exposed by the component otherwise it will be null. + Store *StoreInfo `protobuf:"bytes,3,opt,name=store,proto3" json:"store,omitempty"` + /// RulesInfo holds the metadata related to Rules API if exposed by the component otherwise it will be null. + Rules *RulesInfo `protobuf:"bytes,4,opt,name=rules,proto3" json:"rules,omitempty"` + /// MetricMetadataInfo holds the metadata related to Metadata API if exposed by the component otherwise it will be null. + MetricMetadata *MetricMetadataInfo `protobuf:"bytes,5,opt,name=metric_metadata,json=metricMetadata,proto3" json:"metric_metadata,omitempty"` + /// TargetsInfo holds the metadata related to Targets API if exposed by the component otherwise it will be null. + Targets *TargetsInfo `protobuf:"bytes,6,opt,name=targets,proto3" json:"targets,omitempty"` + /// ExemplarsInfo holds the metadata related to Exemplars API if exposed by the component otherwise it will be null. + Exemplars *ExemplarsInfo `protobuf:"bytes,7,opt,name=exemplars,proto3" json:"exemplars,omitempty"` +} + +func (m *InfoResponse) Reset() { *m = InfoResponse{} } +func (m *InfoResponse) String() string { return proto.CompactTextString(m) } +func (*InfoResponse) ProtoMessage() {} +func (*InfoResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_a1214ec45d2bf952, []int{1} +} +func (m *InfoResponse) XXX_Unmarshal(b []byte) error { + return m.Unmarshal(b) +} +func (m *InfoResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + if deterministic { + return xxx_messageInfo_InfoResponse.Marshal(b, m, deterministic) + } else { + b = b[:cap(b)] + n, err := m.MarshalToSizedBuffer(b) + if err != nil { + return nil, err + } + return b[:n], nil + } +} +func (m *InfoResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_InfoResponse.Merge(m, src) +} +func (m *InfoResponse) XXX_Size() int { + return m.Size() +} +func (m *InfoResponse) XXX_DiscardUnknown() { + xxx_messageInfo_InfoResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_InfoResponse proto.InternalMessageInfo + +/// StoreInfo holds the metadata related to Store API exposed by the component. +type StoreInfo struct { + MinTime int64 `protobuf:"varint,1,opt,name=min_time,json=minTime,proto3" json:"min_time,omitempty"` + MaxTime int64 `protobuf:"varint,2,opt,name=max_time,json=maxTime,proto3" json:"max_time,omitempty"` +} + +func (m *StoreInfo) Reset() { *m = StoreInfo{} } +func (m *StoreInfo) String() string { return proto.CompactTextString(m) } +func (*StoreInfo) ProtoMessage() {} +func (*StoreInfo) Descriptor() ([]byte, []int) { + return fileDescriptor_a1214ec45d2bf952, []int{2} +} +func (m *StoreInfo) XXX_Unmarshal(b []byte) error { + return m.Unmarshal(b) +} +func (m *StoreInfo) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + if deterministic { + return xxx_messageInfo_StoreInfo.Marshal(b, m, deterministic) + } else { + b = b[:cap(b)] + n, err := m.MarshalToSizedBuffer(b) + if err != nil { + return nil, err + } + return b[:n], nil + } +} +func (m *StoreInfo) XXX_Merge(src proto.Message) { + xxx_messageInfo_StoreInfo.Merge(m, src) +} +func (m *StoreInfo) XXX_Size() int { + return m.Size() +} +func (m *StoreInfo) XXX_DiscardUnknown() { + xxx_messageInfo_StoreInfo.DiscardUnknown(m) +} + +var xxx_messageInfo_StoreInfo proto.InternalMessageInfo + +/// RulesInfo holds the metadata related to Rules API exposed by the component. +type RulesInfo struct { +} + +func (m *RulesInfo) Reset() { *m = RulesInfo{} } +func (m *RulesInfo) String() string { return proto.CompactTextString(m) } +func (*RulesInfo) ProtoMessage() {} +func (*RulesInfo) Descriptor() ([]byte, []int) { + return fileDescriptor_a1214ec45d2bf952, []int{3} +} +func (m *RulesInfo) XXX_Unmarshal(b []byte) error { + return m.Unmarshal(b) +} +func (m *RulesInfo) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + if deterministic { + return xxx_messageInfo_RulesInfo.Marshal(b, m, deterministic) + } else { + b = b[:cap(b)] + n, err := m.MarshalToSizedBuffer(b) + if err != nil { + return nil, err + } + return b[:n], nil + } +} +func (m *RulesInfo) XXX_Merge(src proto.Message) { + xxx_messageInfo_RulesInfo.Merge(m, src) +} +func (m *RulesInfo) XXX_Size() int { + return m.Size() +} +func (m *RulesInfo) XXX_DiscardUnknown() { + xxx_messageInfo_RulesInfo.DiscardUnknown(m) +} + +var xxx_messageInfo_RulesInfo proto.InternalMessageInfo + +/// MetricMetadataInfo holds the metadata related to Metadata API exposed by the component. +type MetricMetadataInfo struct { +} + +func (m *MetricMetadataInfo) Reset() { *m = MetricMetadataInfo{} } +func (m *MetricMetadataInfo) String() string { return proto.CompactTextString(m) } +func (*MetricMetadataInfo) ProtoMessage() {} +func (*MetricMetadataInfo) Descriptor() ([]byte, []int) { + return fileDescriptor_a1214ec45d2bf952, []int{4} +} +func (m *MetricMetadataInfo) XXX_Unmarshal(b []byte) error { + return m.Unmarshal(b) +} +func (m *MetricMetadataInfo) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + if deterministic { + return xxx_messageInfo_MetricMetadataInfo.Marshal(b, m, deterministic) + } else { + b = b[:cap(b)] + n, err := m.MarshalToSizedBuffer(b) + if err != nil { + return nil, err + } + return b[:n], nil + } +} +func (m *MetricMetadataInfo) XXX_Merge(src proto.Message) { + xxx_messageInfo_MetricMetadataInfo.Merge(m, src) +} +func (m *MetricMetadataInfo) XXX_Size() int { + return m.Size() +} +func (m *MetricMetadataInfo) XXX_DiscardUnknown() { + xxx_messageInfo_MetricMetadataInfo.DiscardUnknown(m) +} + +var xxx_messageInfo_MetricMetadataInfo proto.InternalMessageInfo + +/// TargetsInfo holds the metadata related to Targets API exposed by the component. +type TargetsInfo struct { +} + +func (m *TargetsInfo) Reset() { *m = TargetsInfo{} } +func (m *TargetsInfo) String() string { return proto.CompactTextString(m) } +func (*TargetsInfo) ProtoMessage() {} +func (*TargetsInfo) Descriptor() ([]byte, []int) { + return fileDescriptor_a1214ec45d2bf952, []int{5} +} +func (m *TargetsInfo) XXX_Unmarshal(b []byte) error { + return m.Unmarshal(b) +} +func (m *TargetsInfo) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + if deterministic { + return xxx_messageInfo_TargetsInfo.Marshal(b, m, deterministic) + } else { + b = b[:cap(b)] + n, err := m.MarshalToSizedBuffer(b) + if err != nil { + return nil, err + } + return b[:n], nil + } +} +func (m *TargetsInfo) XXX_Merge(src proto.Message) { + xxx_messageInfo_TargetsInfo.Merge(m, src) +} +func (m *TargetsInfo) XXX_Size() int { + return m.Size() +} +func (m *TargetsInfo) XXX_DiscardUnknown() { + xxx_messageInfo_TargetsInfo.DiscardUnknown(m) +} + +var xxx_messageInfo_TargetsInfo proto.InternalMessageInfo + +/// EXemplarsInfo holds the metadata related to Exemplars API exposed by the component. +type ExemplarsInfo struct { + MinTime int64 `protobuf:"varint,1,opt,name=min_time,json=minTime,proto3" json:"min_time,omitempty"` + MaxTime int64 `protobuf:"varint,2,opt,name=max_time,json=maxTime,proto3" json:"max_time,omitempty"` +} + +func (m *ExemplarsInfo) Reset() { *m = ExemplarsInfo{} } +func (m *ExemplarsInfo) String() string { return proto.CompactTextString(m) } +func (*ExemplarsInfo) ProtoMessage() {} +func (*ExemplarsInfo) Descriptor() ([]byte, []int) { + return fileDescriptor_a1214ec45d2bf952, []int{6} +} +func (m *ExemplarsInfo) XXX_Unmarshal(b []byte) error { + return m.Unmarshal(b) +} +func (m *ExemplarsInfo) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + if deterministic { + return xxx_messageInfo_ExemplarsInfo.Marshal(b, m, deterministic) + } else { + b = b[:cap(b)] + n, err := m.MarshalToSizedBuffer(b) + if err != nil { + return nil, err + } + return b[:n], nil + } +} +func (m *ExemplarsInfo) XXX_Merge(src proto.Message) { + xxx_messageInfo_ExemplarsInfo.Merge(m, src) +} +func (m *ExemplarsInfo) XXX_Size() int { + return m.Size() +} +func (m *ExemplarsInfo) XXX_DiscardUnknown() { + xxx_messageInfo_ExemplarsInfo.DiscardUnknown(m) +} + +var xxx_messageInfo_ExemplarsInfo proto.InternalMessageInfo + +func init() { + proto.RegisterType((*InfoRequest)(nil), "thanos.info.InfoRequest") + proto.RegisterType((*InfoResponse)(nil), "thanos.info.InfoResponse") + proto.RegisterType((*StoreInfo)(nil), "thanos.info.StoreInfo") + proto.RegisterType((*RulesInfo)(nil), "thanos.info.RulesInfo") + proto.RegisterType((*MetricMetadataInfo)(nil), "thanos.info.MetricMetadataInfo") + proto.RegisterType((*TargetsInfo)(nil), "thanos.info.TargetsInfo") + proto.RegisterType((*ExemplarsInfo)(nil), "thanos.info.ExemplarsInfo") +} + +func init() { proto.RegisterFile("info/infopb/rpc.proto", fileDescriptor_a1214ec45d2bf952) } + +var fileDescriptor_a1214ec45d2bf952 = []byte{ + // 437 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x9c, 0x93, 0xcf, 0x6e, 0xd3, 0x40, + 0x10, 0xc6, 0xed, 0x26, 0x4d, 0xf0, 0x98, 0x80, 0x58, 0x15, 0xb4, 0xc9, 0xc1, 0x8d, 0xac, 0x1e, + 0x72, 0x40, 0xb6, 0x14, 0x24, 0x84, 0xc4, 0x89, 0x56, 0x95, 0x40, 0xa2, 0x17, 0x37, 0xa7, 0x5e, + 0xa2, 0x4d, 0x99, 0x06, 0x4b, 0xde, 0x3f, 0x78, 0xb7, 0x52, 0x7a, 0xe3, 0x11, 0x78, 0xac, 0x1c, + 0x7b, 0xe4, 0x84, 0x20, 0x79, 0x11, 0xb4, 0xbb, 0x6e, 0x89, 0x45, 0x4f, 0xbd, 0xd8, 0xbb, 0xfb, + 0xfb, 0xbe, 0xd9, 0x99, 0xf1, 0x18, 0x5e, 0x96, 0xe2, 0x4a, 0xe6, 0xf6, 0xa1, 0x16, 0x79, 0xad, + 0x2e, 0x33, 0x55, 0x4b, 0x23, 0x49, 0x6c, 0xbe, 0x32, 0x21, 0x75, 0x66, 0xc1, 0x68, 0xa8, 0x8d, + 0xac, 0x31, 0xaf, 0xd8, 0x02, 0x2b, 0xb5, 0xc8, 0xcd, 0x8d, 0x42, 0xed, 0x75, 0xa3, 0x83, 0xa5, + 0x5c, 0x4a, 0xb7, 0xcc, 0xed, 0xca, 0x9f, 0xa6, 0x03, 0x88, 0x3f, 0x89, 0x2b, 0x59, 0xe0, 0xb7, + 0x6b, 0xd4, 0x26, 0xfd, 0xde, 0x81, 0xa7, 0x7e, 0xaf, 0x95, 0x14, 0x1a, 0xc9, 0x5b, 0x00, 0x17, + 0x6c, 0xae, 0xd1, 0x68, 0x1a, 0x8e, 0x3b, 0x93, 0x78, 0xfa, 0x22, 0x6b, 0xae, 0xbc, 0xf8, 0x6c, + 0xd1, 0x39, 0x9a, 0xe3, 0xee, 0xfa, 0xd7, 0x61, 0x50, 0x44, 0x55, 0xb3, 0xd7, 0xe4, 0x08, 0x06, + 0x27, 0x92, 0x2b, 0x29, 0x50, 0x98, 0xd9, 0x8d, 0x42, 0xba, 0x37, 0x0e, 0x27, 0x51, 0xd1, 0x3e, + 0x24, 0xaf, 0x61, 0xdf, 0x25, 0x4c, 0x3b, 0xe3, 0x70, 0x12, 0x4f, 0x5f, 0x65, 0x3b, 0xb5, 0x64, + 0xe7, 0x96, 0xb8, 0x64, 0xbc, 0xc8, 0xaa, 0xeb, 0xeb, 0x0a, 0x35, 0xed, 0x3e, 0xa0, 0x2e, 0x2c, + 0xf1, 0x6a, 0x27, 0x22, 0x1f, 0xe1, 0x39, 0x47, 0x53, 0x97, 0x97, 0x73, 0x8e, 0x86, 0x7d, 0x61, + 0x86, 0xd1, 0x7d, 0xe7, 0x3b, 0x6c, 0xf9, 0xce, 0x9c, 0xe6, 0xac, 0x91, 0xb8, 0x00, 0xcf, 0x78, + 0xeb, 0x8c, 0x4c, 0xa1, 0x6f, 0x58, 0xbd, 0xb4, 0x0d, 0xe8, 0xb9, 0x08, 0xb4, 0x15, 0x61, 0xe6, + 0x99, 0xb3, 0xde, 0x09, 0xc9, 0x3b, 0x88, 0x70, 0x85, 0x5c, 0x55, 0xac, 0xd6, 0xb4, 0xef, 0x5c, + 0xa3, 0x96, 0xeb, 0xf4, 0x8e, 0x3a, 0xdf, 0x3f, 0x71, 0xfa, 0x01, 0xa2, 0xfb, 0xca, 0xc9, 0x10, + 0x9e, 0xf0, 0x52, 0xcc, 0x4d, 0xc9, 0x91, 0x86, 0xe3, 0x70, 0xd2, 0x29, 0xfa, 0xbc, 0x14, 0xb3, + 0x92, 0xa3, 0x43, 0x6c, 0xe5, 0xd1, 0x5e, 0x83, 0xd8, 0xca, 0xa2, 0x34, 0x86, 0xe8, 0xbe, 0x1d, + 0xe9, 0x01, 0x90, 0xff, 0x6b, 0xb4, 0xdf, 0x7d, 0x27, 0xef, 0xf4, 0x14, 0x06, 0xad, 0x84, 0x1e, + 0x77, 0xf1, 0xf4, 0x04, 0xba, 0xce, 0xfd, 0xbe, 0x79, 0xb7, 0x1b, 0xb5, 0x33, 0x68, 0xa3, 0xe1, + 0x03, 0xc4, 0x8f, 0xdc, 0xf1, 0xd1, 0xfa, 0x4f, 0x12, 0xac, 0x37, 0x49, 0x78, 0xbb, 0x49, 0xc2, + 0xdf, 0x9b, 0x24, 0xfc, 0xb1, 0x4d, 0x82, 0xdb, 0x6d, 0x12, 0xfc, 0xdc, 0x26, 0xc1, 0x45, 0xcf, + 0xff, 0x00, 0x8b, 0x9e, 0x9b, 0xdf, 0x37, 0x7f, 0x03, 0x00, 0x00, 0xff, 0xff, 0xc9, 0x9c, 0xd8, + 0x20, 0x16, 0x03, 0x00, 0x00, +} + +// Reference imports to suppress errors if they are not otherwise used. +var _ context.Context +var _ grpc.ClientConn + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the grpc package it is being compiled against. +const _ = grpc.SupportPackageIsVersion4 + +// InfoClient is the client API for Info service. +// +// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://godoc.org/google.golang.org/grpc#ClientConn.NewStream. +type InfoClient interface { + /// Info returns the metadata (Eg. LabelSets, Min/Max time) about all the APIs the component supports. + Info(ctx context.Context, in *InfoRequest, opts ...grpc.CallOption) (*InfoResponse, error) +} + +type infoClient struct { + cc *grpc.ClientConn +} + +func NewInfoClient(cc *grpc.ClientConn) InfoClient { + return &infoClient{cc} +} + +func (c *infoClient) Info(ctx context.Context, in *InfoRequest, opts ...grpc.CallOption) (*InfoResponse, error) { + out := new(InfoResponse) + err := c.cc.Invoke(ctx, "/thanos.info.Info/Info", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +// InfoServer is the server API for Info service. +type InfoServer interface { + /// Info returns the metadata (Eg. LabelSets, Min/Max time) about all the APIs the component supports. + Info(context.Context, *InfoRequest) (*InfoResponse, error) +} + +// UnimplementedInfoServer can be embedded to have forward compatible implementations. +type UnimplementedInfoServer struct { +} + +func (*UnimplementedInfoServer) Info(ctx context.Context, req *InfoRequest) (*InfoResponse, error) { + return nil, status.Errorf(codes.Unimplemented, "method Info not implemented") +} + +func RegisterInfoServer(s *grpc.Server, srv InfoServer) { + s.RegisterService(&_Info_serviceDesc, srv) +} + +func _Info_Info_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(InfoRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(InfoServer).Info(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/thanos.info.Info/Info", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(InfoServer).Info(ctx, req.(*InfoRequest)) + } + return interceptor(ctx, in, info, handler) +} + +var _Info_serviceDesc = grpc.ServiceDesc{ + ServiceName: "thanos.info.Info", + HandlerType: (*InfoServer)(nil), + Methods: []grpc.MethodDesc{ + { + MethodName: "Info", + Handler: _Info_Info_Handler, + }, + }, + Streams: []grpc.StreamDesc{}, + Metadata: "info/infopb/rpc.proto", +} + +func (m *InfoRequest) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalToSizedBuffer(dAtA[:size]) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *InfoRequest) MarshalTo(dAtA []byte) (int, error) { + size := m.Size() + return m.MarshalToSizedBuffer(dAtA[:size]) +} + +func (m *InfoRequest) MarshalToSizedBuffer(dAtA []byte) (int, error) { + i := len(dAtA) + _ = i + var l int + _ = l + return len(dAtA) - i, nil +} + +func (m *InfoResponse) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalToSizedBuffer(dAtA[:size]) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *InfoResponse) MarshalTo(dAtA []byte) (int, error) { + size := m.Size() + return m.MarshalToSizedBuffer(dAtA[:size]) +} + +func (m *InfoResponse) MarshalToSizedBuffer(dAtA []byte) (int, error) { + i := len(dAtA) + _ = i + var l int + _ = l + if m.Exemplars != nil { + { + size, err := m.Exemplars.MarshalToSizedBuffer(dAtA[:i]) + if err != nil { + return 0, err + } + i -= size + i = encodeVarintRpc(dAtA, i, uint64(size)) + } + i-- + dAtA[i] = 0x3a + } + if m.Targets != nil { + { + size, err := m.Targets.MarshalToSizedBuffer(dAtA[:i]) + if err != nil { + return 0, err + } + i -= size + i = encodeVarintRpc(dAtA, i, uint64(size)) + } + i-- + dAtA[i] = 0x32 + } + if m.MetricMetadata != nil { + { + size, err := m.MetricMetadata.MarshalToSizedBuffer(dAtA[:i]) + if err != nil { + return 0, err + } + i -= size + i = encodeVarintRpc(dAtA, i, uint64(size)) + } + i-- + dAtA[i] = 0x2a + } + if m.Rules != nil { + { + size, err := m.Rules.MarshalToSizedBuffer(dAtA[:i]) + if err != nil { + return 0, err + } + i -= size + i = encodeVarintRpc(dAtA, i, uint64(size)) + } + i-- + dAtA[i] = 0x22 + } + if m.Store != nil { + { + size, err := m.Store.MarshalToSizedBuffer(dAtA[:i]) + if err != nil { + return 0, err + } + i -= size + i = encodeVarintRpc(dAtA, i, uint64(size)) + } + i-- + dAtA[i] = 0x1a + } + if len(m.ComponentType) > 0 { + i -= len(m.ComponentType) + copy(dAtA[i:], m.ComponentType) + i = encodeVarintRpc(dAtA, i, uint64(len(m.ComponentType))) + i-- + dAtA[i] = 0x12 + } + if len(m.LabelSets) > 0 { + for iNdEx := len(m.LabelSets) - 1; iNdEx >= 0; iNdEx-- { + { + size, err := m.LabelSets[iNdEx].MarshalToSizedBuffer(dAtA[:i]) + if err != nil { + return 0, err + } + i -= size + i = encodeVarintRpc(dAtA, i, uint64(size)) + } + i-- + dAtA[i] = 0xa + } + } + return len(dAtA) - i, nil +} + +func (m *StoreInfo) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalToSizedBuffer(dAtA[:size]) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *StoreInfo) MarshalTo(dAtA []byte) (int, error) { + size := m.Size() + return m.MarshalToSizedBuffer(dAtA[:size]) +} + +func (m *StoreInfo) MarshalToSizedBuffer(dAtA []byte) (int, error) { + i := len(dAtA) + _ = i + var l int + _ = l + if m.MaxTime != 0 { + i = encodeVarintRpc(dAtA, i, uint64(m.MaxTime)) + i-- + dAtA[i] = 0x10 + } + if m.MinTime != 0 { + i = encodeVarintRpc(dAtA, i, uint64(m.MinTime)) + i-- + dAtA[i] = 0x8 + } + return len(dAtA) - i, nil +} + +func (m *RulesInfo) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalToSizedBuffer(dAtA[:size]) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *RulesInfo) MarshalTo(dAtA []byte) (int, error) { + size := m.Size() + return m.MarshalToSizedBuffer(dAtA[:size]) +} + +func (m *RulesInfo) MarshalToSizedBuffer(dAtA []byte) (int, error) { + i := len(dAtA) + _ = i + var l int + _ = l + return len(dAtA) - i, nil +} + +func (m *MetricMetadataInfo) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalToSizedBuffer(dAtA[:size]) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *MetricMetadataInfo) MarshalTo(dAtA []byte) (int, error) { + size := m.Size() + return m.MarshalToSizedBuffer(dAtA[:size]) +} + +func (m *MetricMetadataInfo) MarshalToSizedBuffer(dAtA []byte) (int, error) { + i := len(dAtA) + _ = i + var l int + _ = l + return len(dAtA) - i, nil +} + +func (m *TargetsInfo) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalToSizedBuffer(dAtA[:size]) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *TargetsInfo) MarshalTo(dAtA []byte) (int, error) { + size := m.Size() + return m.MarshalToSizedBuffer(dAtA[:size]) +} + +func (m *TargetsInfo) MarshalToSizedBuffer(dAtA []byte) (int, error) { + i := len(dAtA) + _ = i + var l int + _ = l + return len(dAtA) - i, nil +} + +func (m *ExemplarsInfo) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalToSizedBuffer(dAtA[:size]) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *ExemplarsInfo) MarshalTo(dAtA []byte) (int, error) { + size := m.Size() + return m.MarshalToSizedBuffer(dAtA[:size]) +} + +func (m *ExemplarsInfo) MarshalToSizedBuffer(dAtA []byte) (int, error) { + i := len(dAtA) + _ = i + var l int + _ = l + if m.MaxTime != 0 { + i = encodeVarintRpc(dAtA, i, uint64(m.MaxTime)) + i-- + dAtA[i] = 0x10 + } + if m.MinTime != 0 { + i = encodeVarintRpc(dAtA, i, uint64(m.MinTime)) + i-- + dAtA[i] = 0x8 + } + return len(dAtA) - i, nil +} + +func encodeVarintRpc(dAtA []byte, offset int, v uint64) int { + offset -= sovRpc(v) + base := offset + for v >= 1<<7 { + dAtA[offset] = uint8(v&0x7f | 0x80) + v >>= 7 + offset++ + } + dAtA[offset] = uint8(v) + return base +} +func (m *InfoRequest) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + return n +} + +func (m *InfoResponse) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + if len(m.LabelSets) > 0 { + for _, e := range m.LabelSets { + l = e.Size() + n += 1 + l + sovRpc(uint64(l)) + } + } + l = len(m.ComponentType) + if l > 0 { + n += 1 + l + sovRpc(uint64(l)) + } + if m.Store != nil { + l = m.Store.Size() + n += 1 + l + sovRpc(uint64(l)) + } + if m.Rules != nil { + l = m.Rules.Size() + n += 1 + l + sovRpc(uint64(l)) + } + if m.MetricMetadata != nil { + l = m.MetricMetadata.Size() + n += 1 + l + sovRpc(uint64(l)) + } + if m.Targets != nil { + l = m.Targets.Size() + n += 1 + l + sovRpc(uint64(l)) + } + if m.Exemplars != nil { + l = m.Exemplars.Size() + n += 1 + l + sovRpc(uint64(l)) + } + return n +} + +func (m *StoreInfo) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + if m.MinTime != 0 { + n += 1 + sovRpc(uint64(m.MinTime)) + } + if m.MaxTime != 0 { + n += 1 + sovRpc(uint64(m.MaxTime)) + } + return n +} + +func (m *RulesInfo) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + return n +} + +func (m *MetricMetadataInfo) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + return n +} + +func (m *TargetsInfo) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + return n +} + +func (m *ExemplarsInfo) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + if m.MinTime != 0 { + n += 1 + sovRpc(uint64(m.MinTime)) + } + if m.MaxTime != 0 { + n += 1 + sovRpc(uint64(m.MaxTime)) + } + return n +} + +func sovRpc(x uint64) (n int) { + return (math_bits.Len64(x|1) + 6) / 7 +} +func sozRpc(x uint64) (n int) { + return sovRpc(uint64((x << 1) ^ uint64((int64(x) >> 63)))) +} +func (m *InfoRequest) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRpc + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: InfoRequest: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: InfoRequest: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + default: + iNdEx = preIndex + skippy, err := skipRpc(dAtA[iNdEx:]) + if err != nil { + return err + } + if (skippy < 0) || (iNdEx+skippy) < 0 { + return ErrInvalidLengthRpc + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *InfoResponse) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRpc + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: InfoResponse: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: InfoResponse: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field LabelSets", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRpc + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthRpc + } + postIndex := iNdEx + msglen + if postIndex < 0 { + return ErrInvalidLengthRpc + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.LabelSets = append(m.LabelSets, labelpb.ZLabelSet{}) + if err := m.LabelSets[len(m.LabelSets)-1].Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field ComponentType", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRpc + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthRpc + } + postIndex := iNdEx + intStringLen + if postIndex < 0 { + return ErrInvalidLengthRpc + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.ComponentType = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 3: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Store", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRpc + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthRpc + } + postIndex := iNdEx + msglen + if postIndex < 0 { + return ErrInvalidLengthRpc + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.Store == nil { + m.Store = &StoreInfo{} + } + if err := m.Store.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + case 4: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Rules", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRpc + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthRpc + } + postIndex := iNdEx + msglen + if postIndex < 0 { + return ErrInvalidLengthRpc + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.Rules == nil { + m.Rules = &RulesInfo{} + } + if err := m.Rules.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + case 5: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field MetricMetadata", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRpc + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthRpc + } + postIndex := iNdEx + msglen + if postIndex < 0 { + return ErrInvalidLengthRpc + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.MetricMetadata == nil { + m.MetricMetadata = &MetricMetadataInfo{} + } + if err := m.MetricMetadata.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + case 6: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Targets", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRpc + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthRpc + } + postIndex := iNdEx + msglen + if postIndex < 0 { + return ErrInvalidLengthRpc + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.Targets == nil { + m.Targets = &TargetsInfo{} + } + if err := m.Targets.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + case 7: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Exemplars", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRpc + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthRpc + } + postIndex := iNdEx + msglen + if postIndex < 0 { + return ErrInvalidLengthRpc + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.Exemplars == nil { + m.Exemplars = &ExemplarsInfo{} + } + if err := m.Exemplars.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipRpc(dAtA[iNdEx:]) + if err != nil { + return err + } + if (skippy < 0) || (iNdEx+skippy) < 0 { + return ErrInvalidLengthRpc + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *StoreInfo) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRpc + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: StoreInfo: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: StoreInfo: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field MinTime", wireType) + } + m.MinTime = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRpc + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.MinTime |= int64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 2: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field MaxTime", wireType) + } + m.MaxTime = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRpc + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.MaxTime |= int64(b&0x7F) << shift + if b < 0x80 { + break + } + } + default: + iNdEx = preIndex + skippy, err := skipRpc(dAtA[iNdEx:]) + if err != nil { + return err + } + if (skippy < 0) || (iNdEx+skippy) < 0 { + return ErrInvalidLengthRpc + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *RulesInfo) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRpc + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: RulesInfo: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: RulesInfo: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + default: + iNdEx = preIndex + skippy, err := skipRpc(dAtA[iNdEx:]) + if err != nil { + return err + } + if (skippy < 0) || (iNdEx+skippy) < 0 { + return ErrInvalidLengthRpc + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *MetricMetadataInfo) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRpc + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: MetricMetadataInfo: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: MetricMetadataInfo: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + default: + iNdEx = preIndex + skippy, err := skipRpc(dAtA[iNdEx:]) + if err != nil { + return err + } + if (skippy < 0) || (iNdEx+skippy) < 0 { + return ErrInvalidLengthRpc + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *TargetsInfo) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRpc + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: TargetsInfo: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: TargetsInfo: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + default: + iNdEx = preIndex + skippy, err := skipRpc(dAtA[iNdEx:]) + if err != nil { + return err + } + if (skippy < 0) || (iNdEx+skippy) < 0 { + return ErrInvalidLengthRpc + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *ExemplarsInfo) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRpc + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: ExemplarsInfo: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: ExemplarsInfo: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field MinTime", wireType) + } + m.MinTime = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRpc + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.MinTime |= int64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 2: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field MaxTime", wireType) + } + m.MaxTime = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRpc + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.MaxTime |= int64(b&0x7F) << shift + if b < 0x80 { + break + } + } + default: + iNdEx = preIndex + skippy, err := skipRpc(dAtA[iNdEx:]) + if err != nil { + return err + } + if (skippy < 0) || (iNdEx+skippy) < 0 { + return ErrInvalidLengthRpc + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func skipRpc(dAtA []byte) (n int, err error) { + l := len(dAtA) + iNdEx := 0 + depth := 0 + for iNdEx < l { + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowRpc + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + wireType := int(wire & 0x7) + switch wireType { + case 0: + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowRpc + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + iNdEx++ + if dAtA[iNdEx-1] < 0x80 { + break + } + } + case 1: + iNdEx += 8 + case 2: + var length int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowRpc + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + length |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if length < 0 { + return 0, ErrInvalidLengthRpc + } + iNdEx += length + case 3: + depth++ + case 4: + if depth == 0 { + return 0, ErrUnexpectedEndOfGroupRpc + } + depth-- + case 5: + iNdEx += 4 + default: + return 0, fmt.Errorf("proto: illegal wireType %d", wireType) + } + if iNdEx < 0 { + return 0, ErrInvalidLengthRpc + } + if depth == 0 { + return iNdEx, nil + } + } + return 0, io.ErrUnexpectedEOF +} + +var ( + ErrInvalidLengthRpc = fmt.Errorf("proto: negative length found during unmarshaling") + ErrIntOverflowRpc = fmt.Errorf("proto: integer overflow") + ErrUnexpectedEndOfGroupRpc = fmt.Errorf("proto: unexpected end of group") +) diff --git a/pkg/info/infopb/rpc.proto b/pkg/info/infopb/rpc.proto index c87ba01dbd..7d19c5c12d 100644 --- a/pkg/info/infopb/rpc.proto +++ b/pkg/info/infopb/rpc.proto @@ -2,7 +2,7 @@ // Licensed under the Apache License 2.0. syntax = "proto3"; -package thanos; +package thanos.info; import "store/labelpb/types.proto"; import "gogoproto/gogo.proto"; diff --git a/pkg/query/endpointset.go b/pkg/query/endpointset.go new file mode 100644 index 0000000000..7b8520fb7e --- /dev/null +++ b/pkg/query/endpointset.go @@ -0,0 +1,686 @@ +// Copyright (c) The Thanos Authors. +// Licensed under the Apache License 2.0. + +package query + +import ( + "context" + "encoding/json" + "fmt" + "sort" + "sync" + "time" + + "github.com/go-kit/kit/log" + "github.com/go-kit/kit/log/level" + "github.com/pkg/errors" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/prometheus/pkg/labels" + "github.com/thanos-io/thanos/pkg/exemplars/exemplarspb" + "github.com/thanos-io/thanos/pkg/info/infopb" + "google.golang.org/grpc" + + "github.com/thanos-io/thanos/pkg/component" + "github.com/thanos-io/thanos/pkg/metadata/metadatapb" + "github.com/thanos-io/thanos/pkg/rules/rulespb" + "github.com/thanos-io/thanos/pkg/runutil" + "github.com/thanos-io/thanos/pkg/store" + "github.com/thanos-io/thanos/pkg/store/labelpb" + "github.com/thanos-io/thanos/pkg/store/storepb" + "github.com/thanos-io/thanos/pkg/targets/targetspb" +) + +const ( + unhealthyEndpointMessage = "removing endpoint because it's unhealthy or does not exist" +) + +type EndpointSpec interface { + // Addr returns Thanos API Address for the endpoint spec. It is used as ID for endpoint. + Addr() string + // Metadata returns current labels, component type and min, max ranges for store. + // It can change for every call for this method. + // If metadata call fails we assume that store is no longer accessible and we should not use it. + // NOTE: It is implementation responsibility to retry until context timeout, but a caller responsibility to manage + // given store connection. + Metadata(ctx context.Context, client infopb.InfoClient) (*endpointMetadata, error) + + // StrictStatic returns true if the endpoint has been statically defined and it is under a strict mode. + StrictStatic() bool +} + +type grpcEndpointSpec struct { + addr string + strictstatic bool +} + +// NewGRPCEndpointSpec creates gRPC endpoint spec. +// It uses InfoAPI to get Metadata. +func NewGRPCEndpointSpec(addr string, strictstatic bool) StoreSpec { + return &grpcStoreSpec{addr: addr, strictstatic: strictstatic} +} + +// StrictStatic returns true if the endpoint has been statically defined and it is under a strict mode. +func (es *grpcEndpointSpec) StrictStatic() bool { + return es.strictstatic +} + +func (es *grpcEndpointSpec) Addr() string { + // API address should not change between state changes. + return es.addr +} + +// Metadata method for gRPC endpoint tries to call InfoAPI exposed by Thanos components until context timeout. If we are unable to get metadata after +// that time, we assume that the host is unhealthy and return error. +func (es *grpcEndpointSpec) Metadata(ctx context.Context, client infopb.InfoClient) (metadata *endpointMetadata, err error) { + resp, err := client.Info(ctx, &infopb.InfoRequest{}, grpc.WaitForReady(true)) + if err != nil { + return nil, errors.Wrapf(err, "fetching info from %s", es.addr) + } + + return &endpointMetadata{*resp}, nil +} + +// stringError forces the error to be a string +// when marshaled into a JSON. +type stringError struct { + originalErr error +} + +// MarshalJSON marshals the error into a string form. +func (e *stringError) MarshalJSON() ([]byte, error) { + return json.Marshal(e.originalErr.Error()) +} + +// Error returns the original underlying error. +func (e *stringError) Error() string { + return e.originalErr.Error() +} + +type EndpointStatus struct { + Name string `json:"name"` + LastCheck time.Time `json:"lastCheck"` + LastError *stringError `json:"lastError"` + LabelSets []labels.Labels `json:"labelSets"` + ComponentType component.Component `json:"-"` + MinTime int64 `json:"minTime"` + MaxTime int64 `json:"maxTime"` +} + +// storeSetNodeCollector is a metric collector reporting the number of available storeAPIs for Querier. +// A Collector is required as we want atomic updates for all 'thanos_store_nodes_grpc_connections' series. +// TODO(hitanshu-mehta) Currently,only collecting metrices of storeAPI. Make this struct generic. +type endpointSetNodeCollector struct { + mtx sync.Mutex + storeNodes map[component.Component]map[string]int + storePerExtLset map[string]int + + connectionsDesc *prometheus.Desc +} + +func newEndpointSetNodeCollector() *endpointSetNodeCollector { + return &endpointSetNodeCollector{ + storeNodes: map[component.Component]map[string]int{}, + connectionsDesc: prometheus.NewDesc( + "thanos_store_nodes_grpc_connections", + "Number of gRPC connection to Store APIs. Opened connection means healthy store APIs available for Querier.", + []string{"external_labels", "store_type"}, nil, + ), + } +} + +func (c *endpointSetNodeCollector) Update(nodes map[component.Component]map[string]int) { + storeNodes := make(map[component.Component]map[string]int, len(nodes)) + storePerExtLset := map[string]int{} + + for k, v := range nodes { + storeNodes[k] = make(map[string]int, len(v)) + for kk, vv := range v { + storePerExtLset[kk] += vv + storeNodes[k][kk] = vv + } + } + + c.mtx.Lock() + defer c.mtx.Unlock() + c.storeNodes = storeNodes + c.storePerExtLset = storePerExtLset +} + +func (c *endpointSetNodeCollector) Describe(ch chan<- *prometheus.Desc) { + ch <- c.connectionsDesc +} + +func (c *endpointSetNodeCollector) Collect(ch chan<- prometheus.Metric) { + c.mtx.Lock() + defer c.mtx.Unlock() + + for storeType, occurrencesPerExtLset := range c.storeNodes { + for externalLabels, occurrences := range occurrencesPerExtLset { + var storeTypeStr string + if storeType != nil { + storeTypeStr = storeType.String() + } + ch <- prometheus.MustNewConstMetric(c.connectionsDesc, prometheus.GaugeValue, float64(occurrences), externalLabels, storeTypeStr) + } + } +} + +// EndpointSet maintains a set of active Thanos endpoints. It is backed up by Endpoint Specifications that are dynamically fetched on +// every Update() call. +type EndpointSet struct { + logger log.Logger + + // Endpoint specifications can change dynamically. If some store is missing from the list, we assuming it is no longer + // accessible and we close gRPC client for it. + endpointSpec func() []EndpointSpec + dialOpts []grpc.DialOption + gRPCInfoCallTimeout time.Duration + + updateMtx sync.Mutex + endpointsMtx sync.RWMutex + endpointsStatusesMtx sync.RWMutex + + // Main map of stores currently used for fanout. + endpoints map[string]*endpointRef + endpointsMetric *endpointSetNodeCollector + + // Map of statuses used only by UI. + endpointStatuses map[string]*EndpointStatus + unhealthyEndpointTimeout time.Duration +} + +// NewEndpointSet returns a new set of Thanos APIs. +func NewEndpointSet( + logger log.Logger, + reg *prometheus.Registry, + endpointSpecs func() []EndpointSpec, + dialOpts []grpc.DialOption, + unhealthyStoreTimeout time.Duration, +) *EndpointSet { + endpointsMetric := newEndpointSetNodeCollector() + if reg != nil { + reg.MustRegister(endpointsMetric) + } + + if logger == nil { + logger = log.NewNopLogger() + } + + if endpointSpecs == nil { + endpointSpecs = func() []EndpointSpec { return nil } + } + + es := &EndpointSet{ + logger: log.With(logger, "component", "endpointset"), + dialOpts: dialOpts, + endpointsMetric: endpointsMetric, + gRPCInfoCallTimeout: 5 * time.Second, + endpoints: make(map[string]*endpointRef), + endpointStatuses: make(map[string]*EndpointStatus), + unhealthyEndpointTimeout: unhealthyStoreTimeout, + } + return es +} + +// Update updates the endpoint set. It fetches current list of endpoint specs from function and updates the fresh metadata +// from all endpoints. Keeps around statically defined nodes that were defined with the strict mode. +func (e *EndpointSet) Update(ctx context.Context) { + e.updateMtx.Lock() + defer e.updateMtx.Unlock() + + e.endpointsMtx.RLock() + endpoints := make(map[string]*endpointRef, len(e.endpoints)) + for addr, er := range e.endpoints { + endpoints[addr] = er + } + e.endpointsMtx.RUnlock() + + level.Debug(e.logger).Log("msg", "starting to update API endpoints", "cachedEndpoints", len(endpoints)) + + activeEndpoints := e.getActiveEndpoints(ctx, endpoints) + level.Debug(e.logger).Log("msg", "checked requested endpoints", "activeEndpoints", len(activeEndpoints), "cachedEndpoints", len(endpoints)) + + stats := newEndpointAPIStats() + + // Close endpoints which are not active this time (are not in active endpoints map). + for addr, er := range endpoints { + if _, ok := activeEndpoints[addr]; ok { + stats[er.ComponentType()][labelpb.PromLabelSetsToString(er.LabelSets())]++ + continue + } + + er.Close() + delete(endpoints, addr) + e.updateEndpointStatus(er, errors.New(unhealthyStoreMessage)) + level.Info(er.logger).Log("msg", unhealthyEndpointMessage, "address", addr, "extLset", labelpb.PromLabelSetsToString(er.LabelSets())) + } + + // Add stores that are not yet in stores. + for addr, er := range activeEndpoints { + if _, ok := endpoints[addr]; ok { + continue + } + + extLset := labelpb.PromLabelSetsToString(er.LabelSets()) + + // All producers should have unique external labels. While this does not check only StoreAPIs connected to + // this querier this allows to notify early user about misconfiguration. Warn only. This is also detectable from metric. + if er.ComponentType() != nil && + (er.ComponentType() == component.Sidecar || er.ComponentType() == component.Rule) && + stats[component.Sidecar][extLset]+stats[component.Rule][extLset] > 0 { + + level.Warn(e.logger).Log("msg", "found duplicate storeAPI producer (sidecar or ruler). This is not advices as it will malform data in in the same bucket", + "address", addr, "extLset", extLset, "duplicates", fmt.Sprintf("%v", stats[component.Sidecar][extLset]+stats[component.Rule][extLset]+1)) + } + stats[er.ComponentType()][extLset]++ + + endpoints[addr] = er + e.updateEndpointStatus(er, nil) + + if er.HasStoreAPI() { + level.Info(e.logger).Log("msg", "adding new storeAPI to query endpointset", "address", addr, "extLset", extLset) + } + + if er.HasRulesAPI() { + level.Info(e.logger).Log("msg", "adding new rulesAPI to query endpointset", "address", addr) + } + + if er.HasExemplarsAPI() { + level.Info(e.logger).Log("msg", "adding new exemplarsAPI to query endpointset", "address", addr) + } + + if er.HasTargetsAPI() { + level.Info(e.logger).Log("msg", "adding new targetsAPI to query endpointset", "address", addr) + } + + if er.HasMetricMetadataAPI() { + level.Info(e.logger).Log("msg", "adding new MetricMetadataAPI to query endpointset", "address", addr) + } + } + + e.endpointsMetric.Update(stats) + e.endpointsMtx.Lock() + e.endpoints = endpoints + e.endpointsMtx.Unlock() + + e.cleanUpStoreStatuses(endpoints) +} + +// Get returns a list of all active stores. +func (e *EndpointSet) GetStoreClients() []storepb.StoreClient { + e.endpointsMtx.RLock() + defer e.endpointsMtx.RUnlock() + + stores := make([]storepb.StoreClient, 0, len(e.endpoints)) + for _, er := range e.endpoints { + if er.HasStoreAPI() { + stores = append(stores, er.clients.store) + } + } + return stores +} + +// GetRulesClients returns a list of all active rules clients. +func (e *EndpointSet) GetRulesClients() []rulespb.RulesClient { + e.endpointsMtx.RLock() + defer e.endpointsMtx.RUnlock() + + rules := make([]rulespb.RulesClient, 0, len(e.endpoints)) + for _, er := range e.endpoints { + if er.HasRulesAPI() { + rules = append(rules, er.clients.rule) + } + } + return rules +} + +// GetTargetsClients returns a list of all active targets clients. +func (e *EndpointSet) GetTargetsClients() []targetspb.TargetsClient { + e.endpointsMtx.RLock() + defer e.endpointsMtx.RUnlock() + + targets := make([]targetspb.TargetsClient, 0, len(e.endpoints)) + for _, er := range e.endpoints { + if er.HasTargetsAPI() { + targets = append(targets, er.clients.target) + } + } + return targets +} + +// GetMetricMetadataClients returns a list of all active metadata clients. +func (e *EndpointSet) GetMetricMetadataClients() []metadatapb.MetadataClient { + e.endpointsMtx.RLock() + defer e.endpointsMtx.RUnlock() + + metadataClients := make([]metadatapb.MetadataClient, 0, len(e.endpoints)) + for _, er := range e.endpoints { + if er.HasMetricMetadataAPI() { + metadataClients = append(metadataClients, er.clients.metricMetadata) + } + } + return metadataClients +} + +// GetExemplarsStores returns a list of all active exemplars stores. +func (e *EndpointSet) GetExemplarsStores() []*exemplarspb.ExemplarStore { + e.endpointsMtx.RLock() + defer e.endpointsMtx.RUnlock() + + exemplarStores := make([]*exemplarspb.ExemplarStore, 0, len(e.endpoints)) + for _, er := range e.endpoints { + if er.HasExemplarsAPI() { + exemplarStores = append(exemplarStores, &exemplarspb.ExemplarStore{ + ExemplarsClient: er.clients.exemplar, + LabelSets: labelpb.ZLabelSetsToPromLabelSets(er.metadata.LabelSets...), + }) + } + } + return exemplarStores +} + +func (e *EndpointSet) Close() { + e.endpointsMtx.Lock() + defer e.endpointsMtx.Unlock() + + for _, ef := range e.endpoints { + ef.Close() + } + e.endpoints = map[string]*endpointRef{} +} + +func (e *EndpointSet) getActiveEndpoints(ctx context.Context, endpoints map[string]*endpointRef) map[string]*endpointRef { + var ( + activeEndpoints = make(map[string]*endpointRef, len(endpoints)) + mtx sync.Mutex + wg sync.WaitGroup + + endpointAddrSet = make(map[string]struct{}) + ) + + // Gather healthy endpoints map concurrently using info API. Build new clients if does not exist already. + for _, es := range e.endpointSpec() { + if _, ok := endpointAddrSet[es.Addr()]; ok { + level.Warn(e.logger).Log("msg", "duplicated address in nodes", "address", es.Addr()) + continue + } + endpointAddrSet[es.Addr()] = struct{}{} + + wg.Add(1) + go func(spec EndpointSpec) { + defer wg.Done() + + addr := spec.Addr() + + ctx, cancel := context.WithTimeout(ctx, e.gRPCInfoCallTimeout) + defer cancel() + + er, seenAlready := endpoints[addr] + if !seenAlready { + // New store or was unactive and was removed in the past - create the new one. + conn, err := grpc.DialContext(ctx, addr, e.dialOpts...) + if err != nil { + e.updateEndpointStatus(&endpointRef{addr: addr}, err) + level.Warn(e.logger).Log("msg", "update of node failed", "err", errors.Wrap(err, "dialing connection"), "address", addr) + return + } + + er = &endpointRef{ + cc: conn, + addr: addr, + logger: e.logger, + clients: NewEndpointClients(InfoClient(infopb.NewInfoClient(conn))), + } + } + + // info, err := er.info.Info(ctx, &infopb.InfoRequest{}, grpc.WaitForReady(true)) + metadata, err := spec.Metadata(ctx, er.clients.info) + if err != nil { + if !seenAlready { + // Close only if new + // Unactive `s.stores` will be closed later on. + er.Close() + } + + e.updateEndpointStatus(er, err) + level.Warn(e.logger).Log("msg", "update of node failed", "err", errors.Wrap(err, "getting metadata"), "address", addr) + + return + } + + er.Update(metadata) + e.updateEndpointStatus(er, nil) + + mtx.Lock() + defer mtx.Unlock() + + activeEndpoints[addr] = er + }(es) + } + wg.Wait() + + return activeEndpoints +} + +func (e *EndpointSet) updateEndpointStatus(er *endpointRef, err error) { + e.endpointsStatusesMtx.Lock() + defer e.endpointsStatusesMtx.Unlock() + + status := EndpointStatus{Name: er.addr} + prev, ok := e.endpointStatuses[er.addr] + if ok { + status = *prev + } else { + mint, maxt := er.TimeRange() + status.MinTime = mint + status.MaxTime = maxt + } + + if err == nil { + status.LastCheck = time.Now() + mint, maxt := er.TimeRange() + status.LabelSets = er.LabelSets() + status.ComponentType = er.ComponentType() + status.MinTime = mint + status.MaxTime = maxt + status.LastError = nil + } else { + status.LastError = &stringError{originalErr: err} + } + + e.endpointStatuses[er.addr] = &status +} + +func (e *EndpointSet) GetStoreStatus() []EndpointStatus { + e.endpointsStatusesMtx.RLock() + defer e.endpointsStatusesMtx.RUnlock() + + statuses := make([]EndpointStatus, 0, len(e.endpointStatuses)) + for _, v := range e.endpointStatuses { + statuses = append(statuses, *v) + } + + sort.Slice(statuses, func(i, j int) bool { + return statuses[i].Name < statuses[j].Name + }) + return statuses +} + +func (e *EndpointSet) cleanUpStoreStatuses(endpoints map[string]*endpointRef) { + e.endpointsStatusesMtx.Lock() + defer e.endpointsStatusesMtx.Unlock() + + now := time.Now() + for addr, status := range e.endpointStatuses { + if _, ok := endpoints[addr]; ok { + continue + } + + if now.Sub(status.LastCheck) >= e.unhealthyEndpointTimeout { + delete(e.endpointStatuses, addr) + } + } +} + +// TODO(bwplotka): Consider moving storeRef out of this package and renaming it, as it also supports rules API. +type endpointRef struct { + mtx sync.RWMutex + cc *grpc.ClientConn + addr string + + clients *endpointClients + + // Metadata can change during runtime. + metadata *endpointMetadata + + logger log.Logger +} + +func (er *endpointRef) Update(metadata *endpointMetadata) { + er.mtx.Lock() + defer er.mtx.Unlock() + + clients := &endpointClients{} + + if metadata.Store != nil { + clients.store = storepb.NewStoreClient(er.cc) + } + + if metadata.Rules != nil { + clients.rule = rulespb.NewRulesClient(er.cc) + } + + if metadata.Targets != nil { + clients.target = targetspb.NewTargetsClient(er.cc) + } + + if metadata.MetricMetadata != nil { + clients.metricMetadata = metadatapb.NewMetadataClient(er.cc) + } + + if metadata.Exemplars != nil { + // min/max range is also provided by in the response of Info rpc call + // but we are not using this metadata anywhere right now so ignoring. + clients.exemplar = exemplarspb.NewExemplarsClient(er.cc) + } + + er.metadata = metadata +} + +func (er *endpointRef) ComponentType() component.Component { + er.mtx.RLock() + defer er.mtx.RUnlock() + + return component.FromString(er.metadata.ComponentType) +} + +func (er *endpointRef) HasStoreAPI() bool { + er.mtx.RLock() + defer er.mtx.RUnlock() + + return er.clients.store != nil +} + +func (er *endpointRef) HasRulesAPI() bool { + er.mtx.RLock() + defer er.mtx.RUnlock() + + return er.clients.rule != nil +} + +func (er *endpointRef) HasTargetsAPI() bool { + er.mtx.RLock() + defer er.mtx.RUnlock() + + return er.clients.target != nil +} + +func (er *endpointRef) HasMetricMetadataAPI() bool { + er.mtx.RLock() + defer er.mtx.RUnlock() + + return er.clients.metricMetadata != nil +} + +func (er *endpointRef) HasExemplarsAPI() bool { + er.mtx.RLock() + defer er.mtx.RUnlock() + + return er.clients.exemplar != nil +} + +func (er *endpointRef) LabelSets() []labels.Labels { + er.mtx.RLock() + defer er.mtx.RUnlock() + + labelSet := make([]labels.Labels, 0, len(er.metadata.LabelSets)) + for _, ls := range labelpb.ZLabelSetsToPromLabelSets(er.metadata.LabelSets...) { + if len(ls) == 0 { + continue + } + // Compatibility label for Queriers pre 0.8.1. Filter it out now. + if ls[0].Name == store.CompatibilityTypeLabelName { + continue + } + labelSet = append(labelSet, ls.Copy()) + } + return labelSet +} + +func (er *endpointRef) TimeRange() (mint int64, maxt int64) { + er.mtx.RLock() + defer er.mtx.RUnlock() + + // Currently, min/max time of only StoreAPI is being updated by all components. + return er.metadata.Store.MinTime, er.metadata.Store.MaxTime +} + +func (er *endpointRef) String() string { + mint, maxt := er.TimeRange() + return fmt.Sprintf("Addr: %s LabelSets: %v Mint: %d Maxt: %d", er.addr, labelpb.PromLabelSetsToString(er.LabelSets()), mint, maxt) +} + +func (er *endpointRef) Addr() string { + return er.addr +} + +func (er *endpointRef) Close() { + runutil.CloseWithLogOnErr(er.logger, er.cc, fmt.Sprintf("endpoint %v connection closed", er.addr)) +} + +type endpointClients struct { + store storepb.StoreClient + rule rulespb.RulesClient + metricMetadata metadatapb.MetadataClient + exemplar exemplarspb.ExemplarsClient + target targetspb.TargetsClient + info infopb.InfoClient +} + +func NewEndpointClients(clients ...func(*endpointClients)) *endpointClients { + ec := &endpointClients{} + + for _, c := range clients { + c(ec) + } + + return ec +} + +func InfoClient(info infopb.InfoClient) func(*endpointClients) { + return func(ec *endpointClients) { + ec.info = info + } +} + +type endpointMetadata struct { + infopb.InfoResponse +} + +func newEndpointAPIStats() map[component.Component]map[string]int { + nodes := make(map[component.Component]map[string]int, len(storepb.StoreType_name)) + for i := range storepb.StoreType_name { + nodes[component.FromProto(storepb.StoreType(i))] = map[string]int{} + } + return nodes +} diff --git a/pkg/query/storeset.go b/pkg/query/storeset.go index 171848b06d..7da536ef0a 100644 --- a/pkg/query/storeset.go +++ b/pkg/query/storeset.go @@ -5,7 +5,6 @@ package query import ( "context" - "encoding/json" "fmt" "math" "sort" @@ -74,22 +73,6 @@ type ExemplarSpec interface { StoreAddrSpec } -// stringError forces the error to be a string -// when marshaled into a JSON. -type stringError struct { - originalErr error -} - -// MarshalJSON marshals the error into a string form. -func (e *stringError) MarshalJSON() ([]byte, error) { - return json.Marshal(e.originalErr.Error()) -} - -// Error returns the original underlying error. -func (e *stringError) Error() string { - return e.originalErr.Error() -} - type StoreStatus struct { Name string `json:"name"` LastCheck time.Time `json:"lastCheck"` From e5ba86cc6ec7709b219389da462edc46dd966b69 Mon Sep 17 00:00:00 2001 From: Hitanshu Mehta Date: Wed, 7 Jul 2021 02:28:42 +0530 Subject: [PATCH 02/11] add unit test for endpointSet Signed-off-by: Hitanshu Mehta --- pkg/query/endpointset.go | 91 ++- pkg/query/endpointset_test.go | 1090 +++++++++++++++++++++++++++++++++ pkg/query/storeset_test.go | 19 - 3 files changed, 1147 insertions(+), 53 deletions(-) create mode 100644 pkg/query/endpointset_test.go diff --git a/pkg/query/endpointset.go b/pkg/query/endpointset.go index 7b8520fb7e..1742e8f166 100644 --- a/pkg/query/endpointset.go +++ b/pkg/query/endpointset.go @@ -32,6 +32,10 @@ import ( const ( unhealthyEndpointMessage = "removing endpoint because it's unhealthy or does not exist" + + // Default minimum and maximum time values used by Prometheus when they are not passed as query parameter. + MinTime = -9223309901257974 + MaxTime = 9223309901257974 ) type EndpointSpec interface { @@ -55,8 +59,8 @@ type grpcEndpointSpec struct { // NewGRPCEndpointSpec creates gRPC endpoint spec. // It uses InfoAPI to get Metadata. -func NewGRPCEndpointSpec(addr string, strictstatic bool) StoreSpec { - return &grpcStoreSpec{addr: addr, strictstatic: strictstatic} +func NewGRPCEndpointSpec(addr string, strictstatic bool) EndpointSpec { + return &grpcEndpointSpec{addr: addr, strictstatic: strictstatic} } // StrictStatic returns true if the endpoint has been statically defined and it is under a strict mode. @@ -74,10 +78,12 @@ func (es *grpcEndpointSpec) Addr() string { func (es *grpcEndpointSpec) Metadata(ctx context.Context, client infopb.InfoClient) (metadata *endpointMetadata, err error) { resp, err := client.Info(ctx, &infopb.InfoRequest{}, grpc.WaitForReady(true)) if err != nil { - return nil, errors.Wrapf(err, "fetching info from %s", es.addr) + return &endpointMetadata{&infopb.InfoResponse{ + ComponentType: component.UnknownStoreAPI.String(), + }}, errors.Wrapf(err, "fetching info from %s", es.addr) } - return &endpointMetadata{*resp}, nil + return &endpointMetadata{resp}, nil } // stringError forces the error to be a string @@ -218,6 +224,7 @@ func NewEndpointSet( endpoints: make(map[string]*endpointRef), endpointStatuses: make(map[string]*EndpointStatus), unhealthyEndpointTimeout: unhealthyStoreTimeout, + endpointSpec: endpointSpecs, } return es } @@ -251,7 +258,7 @@ func (e *EndpointSet) Update(ctx context.Context) { er.Close() delete(endpoints, addr) - e.updateEndpointStatus(er, errors.New(unhealthyStoreMessage)) + e.updateEndpointStatus(er, errors.New(unhealthyEndpointMessage)) level.Info(er.logger).Log("msg", unhealthyEndpointMessage, "address", addr, "extLset", labelpb.PromLabelSetsToString(er.LabelSets())) } @@ -265,8 +272,8 @@ func (e *EndpointSet) Update(ctx context.Context) { // All producers should have unique external labels. While this does not check only StoreAPIs connected to // this querier this allows to notify early user about misconfiguration. Warn only. This is also detectable from metric. - if er.ComponentType() != nil && - (er.ComponentType() == component.Sidecar || er.ComponentType() == component.Rule) && + if (er.ComponentType() != nil && + (er.ComponentType() == component.Sidecar || er.ComponentType() == component.Rule)) && stats[component.Sidecar][extLset]+stats[component.Rule][extLset] > 0 { level.Warn(e.logger).Log("msg", "found duplicate storeAPI producer (sidecar or ruler). This is not advices as it will malform data in in the same bucket", @@ -417,7 +424,7 @@ func (e *EndpointSet) getActiveEndpoints(ctx context.Context, endpoints map[stri er, seenAlready := endpoints[addr] if !seenAlready { - // New store or was unactive and was removed in the past - create the new one. + // New endpoint or was unactive and was removed in the past - create the new one. conn, err := grpc.DialContext(ctx, addr, e.dialOpts...) if err != nil { e.updateEndpointStatus(&endpointRef{addr: addr}, err) @@ -426,25 +433,44 @@ func (e *EndpointSet) getActiveEndpoints(ctx context.Context, endpoints map[stri } er = &endpointRef{ - cc: conn, - addr: addr, - logger: e.logger, - clients: NewEndpointClients(InfoClient(infopb.NewInfoClient(conn))), + cc: conn, + addr: addr, + logger: e.logger, + clients: &endpointClients{ + info: infopb.NewInfoClient(conn), + }, } } - // info, err := er.info.Info(ctx, &infopb.InfoRequest{}, grpc.WaitForReady(true)) metadata, err := spec.Metadata(ctx, er.clients.info) if err != nil { - if !seenAlready { - // Close only if new - // Unactive `s.stores` will be closed later on. + if !seenAlready && !spec.StrictStatic() { + // Close only if new and not a strict static node. + // Unactive `e.endpoints` will be closed later on. er.Close() } e.updateEndpointStatus(er, err) level.Warn(e.logger).Log("msg", "update of node failed", "err", errors.Wrap(err, "getting metadata"), "address", addr) + if !spec.StrictStatic() { + return + } + + // Still keep it around if static & strict mode enabled. + // Assume that it expose storeAPI and cover all complete possible time range. + if !seenAlready { + metadata.Store = &infopb.StoreInfo{ + MinTime: MinTime, + MaxTime: MaxTime, + } + er.Update(metadata) + } + + mtx.Lock() + defer mtx.Unlock() + + activeEndpoints[addr] = er return } @@ -540,7 +566,7 @@ func (er *endpointRef) Update(metadata *endpointMetadata) { er.mtx.Lock() defer er.mtx.Unlock() - clients := &endpointClients{} + clients := er.clients if metadata.Store != nil { clients.store = storepb.NewStoreClient(er.cc) @@ -564,6 +590,7 @@ func (er *endpointRef) Update(metadata *endpointMetadata) { clients.exemplar = exemplarspb.NewExemplarsClient(er.cc) } + er.clients = clients er.metadata = metadata } @@ -571,6 +598,10 @@ func (er *endpointRef) ComponentType() component.Component { er.mtx.RLock() defer er.mtx.RUnlock() + if er.metadata == nil { + return component.UnknownStoreAPI + } + return component.FromString(er.metadata.ComponentType) } @@ -613,6 +644,10 @@ func (er *endpointRef) LabelSets() []labels.Labels { er.mtx.RLock() defer er.mtx.RUnlock() + if er.metadata == nil { + return make([]labels.Labels, 0) + } + labelSet := make([]labels.Labels, 0, len(er.metadata.LabelSets)) for _, ls := range labelpb.ZLabelSetsToPromLabelSets(er.metadata.LabelSets...) { if len(ls) == 0 { @@ -631,6 +666,10 @@ func (er *endpointRef) TimeRange() (mint int64, maxt int64) { er.mtx.RLock() defer er.mtx.RUnlock() + if er.metadata == nil || er.metadata.Store == nil { + return MinTime, MaxTime + } + // Currently, min/max time of only StoreAPI is being updated by all components. return er.metadata.Store.MinTime, er.metadata.Store.MaxTime } @@ -657,24 +696,8 @@ type endpointClients struct { info infopb.InfoClient } -func NewEndpointClients(clients ...func(*endpointClients)) *endpointClients { - ec := &endpointClients{} - - for _, c := range clients { - c(ec) - } - - return ec -} - -func InfoClient(info infopb.InfoClient) func(*endpointClients) { - return func(ec *endpointClients) { - ec.info = info - } -} - type endpointMetadata struct { - infopb.InfoResponse + *infopb.InfoResponse } func newEndpointAPIStats() map[component.Component]map[string]int { diff --git a/pkg/query/endpointset_test.go b/pkg/query/endpointset_test.go new file mode 100644 index 0000000000..6509cf6255 --- /dev/null +++ b/pkg/query/endpointset_test.go @@ -0,0 +1,1090 @@ +// Copyright (c) The Thanos Authors. +// Licensed under the Apache License 2.0. + +package query + +import ( + "context" + "encoding/json" + "fmt" + "math" + "net" + "testing" + "time" + + "google.golang.org/grpc" + + "github.com/pkg/errors" + "github.com/thanos-io/thanos/pkg/component" + "github.com/thanos-io/thanos/pkg/info/infopb" + "github.com/thanos-io/thanos/pkg/store" + "github.com/thanos-io/thanos/pkg/store/labelpb" + "github.com/thanos-io/thanos/pkg/testutil" +) + +var testGRPCOpts = []grpc.DialOption{ + grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(math.MaxInt32)), + grpc.WithInsecure(), +} + +var ( + sidecarInfo = &infopb.InfoResponse{ + ComponentType: component.Sidecar.String(), + Store: &infopb.StoreInfo{ + MinTime: math.MinInt64, + MaxTime: math.MaxInt64, + }, + Exemplars: &infopb.ExemplarsInfo{ + MinTime: math.MinInt64, + MaxTime: math.MaxInt64, + }, + Rules: &infopb.RulesInfo{}, + MetricMetadata: &infopb.MetricMetadataInfo{}, + Targets: &infopb.TargetsInfo{}, + } + queryInfo = &infopb.InfoResponse{ + ComponentType: component.Query.String(), + Store: &infopb.StoreInfo{ + MinTime: math.MinInt64, + MaxTime: math.MaxInt64, + }, + Exemplars: &infopb.ExemplarsInfo{ + MinTime: math.MinInt64, + MaxTime: math.MaxInt64, + }, + Rules: &infopb.RulesInfo{}, + MetricMetadata: &infopb.MetricMetadataInfo{}, + Targets: &infopb.TargetsInfo{}, + } + ruleInfo = &infopb.InfoResponse{ + ComponentType: component.Rule.String(), + Rules: &infopb.RulesInfo{}, + } + storeGWInfo = &infopb.InfoResponse{ + ComponentType: component.Store.String(), + Store: &infopb.StoreInfo{ + MinTime: math.MinInt64, + MaxTime: math.MaxInt64, + }, + } + receiveInfo = &infopb.InfoResponse{ + ComponentType: component.Receive.String(), + Store: &infopb.StoreInfo{ + MinTime: math.MinInt64, + MaxTime: math.MaxInt64, + }, + Exemplars: &infopb.ExemplarsInfo{ + MinTime: math.MinInt64, + MaxTime: math.MaxInt64, + }, + } +) + +type mockedEndpoint struct { + infoDelay time.Duration + info infopb.InfoResponse +} + +func (c *mockedEndpoint) Info(ctx context.Context, r *infopb.InfoRequest) (*infopb.InfoResponse, error) { + if c.infoDelay > 0 { + time.Sleep(c.infoDelay) + } + + return &c.info, nil +} + +type APIs struct { + store bool + metricMetadata bool + rules bool + target bool + exemplars bool +} + +type testEndpointMeta struct { + *infopb.InfoResponse + extlsetFn func(addr string) []labelpb.ZLabelSet + infoDelay time.Duration +} + +type testEndpoints struct { + srvs map[string]*grpc.Server + orderAddrs []string + exposedAPIs map[string]*APIs +} + +func startTestEndpoints(testEndpointMeta []testEndpointMeta) (*testEndpoints, error) { + e := &testEndpoints{ + srvs: map[string]*grpc.Server{}, + exposedAPIs: map[string]*APIs{}, + } + + for _, meta := range testEndpointMeta { + listener, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + // Close so far started servers. + e.Close() + return nil, err + } + + srv := grpc.NewServer() + addr := listener.Addr().String() + + endpointSrv := &mockedEndpoint{ + info: infopb.InfoResponse{ + LabelSets: meta.extlsetFn(listener.Addr().String()), + Store: meta.Store, + MetricMetadata: meta.MetricMetadata, + Rules: meta.Rules, + Targets: meta.Targets, + Exemplars: meta.Exemplars, + ComponentType: meta.ComponentType, + }, + infoDelay: meta.infoDelay, + } + infopb.RegisterInfoServer(srv, endpointSrv) + go func() { + _ = srv.Serve(listener) + }() + + e.exposedAPIs[addr] = exposedAPIs(meta.ComponentType) + e.srvs[addr] = srv + e.orderAddrs = append(e.orderAddrs, listener.Addr().String()) + } + + return e, nil +} + +func (e *testEndpoints) EndpointAddresses() []string { + var endpoints []string + endpoints = append(endpoints, e.orderAddrs...) + return endpoints +} + +func (e *testEndpoints) Close() { + for _, srv := range e.srvs { + srv.Stop() + } + e.srvs = nil +} + +func (e *testEndpoints) CloseOne(addr string) { + srv, ok := e.srvs[addr] + if !ok { + return + } + + srv.Stop() + delete(e.srvs, addr) +} + +func TestEndpointSet_Update(t *testing.T) { + + endpoints, err := startTestEndpoints([]testEndpointMeta{ + { + InfoResponse: sidecarInfo, + extlsetFn: func(addr string) []labelpb.ZLabelSet { + return []labelpb.ZLabelSet{ + { + Labels: []labelpb.ZLabel{ + {Name: "addr", Value: addr}, + }, + }, + { + Labels: []labelpb.ZLabel{ + {Name: "a", Value: "b"}, + }, + }, + } + }, + }, + { + InfoResponse: sidecarInfo, + extlsetFn: func(addr string) []labelpb.ZLabelSet { + return []labelpb.ZLabelSet{ + { + Labels: []labelpb.ZLabel{ + {Name: "addr", Value: addr}, + }, + }, + { + Labels: []labelpb.ZLabel{ + {Name: "a", Value: "b"}, + }, + }, + } + }, + }, + { + InfoResponse: queryInfo, + extlsetFn: func(addr string) []labelpb.ZLabelSet { + return []labelpb.ZLabelSet{ + { + Labels: []labelpb.ZLabel{ + {Name: "addr", Value: addr}, + }, + }, + { + Labels: []labelpb.ZLabel{ + {Name: "a", Value: "b"}, + }, + }, + } + }, + }, + }) + testutil.Ok(t, err) + defer endpoints.Close() + + discoveredEndpointAddr := endpoints.EndpointAddresses() + + // Testing if duplicates can cause weird results. + discoveredEndpointAddr = append(discoveredEndpointAddr, discoveredEndpointAddr[0]) + endpointSet := NewEndpointSet(nil, nil, + func() (specs []EndpointSpec) { + for _, addr := range discoveredEndpointAddr { + specs = append(specs, NewGRPCEndpointSpec(addr, false)) + } + return specs + }, + testGRPCOpts, time.Minute) + endpointSet.gRPCInfoCallTimeout = 2 * time.Second + defer endpointSet.Close() + + // Initial update. + endpointSet.Update(context.Background()) + + // Start with one not available. + endpoints.CloseOne(discoveredEndpointAddr[2]) + + // Should not matter how many of these we run. + endpointSet.Update(context.Background()) + endpointSet.Update(context.Background()) + testutil.Equals(t, 2, len(endpointSet.endpoints)) + testutil.Equals(t, 3, len(endpointSet.endpointStatuses)) + + for addr, e := range endpointSet.endpoints { + testutil.Equals(t, addr, e.addr) + + lset := e.LabelSets() + testutil.Equals(t, 2, len(lset)) + testutil.Equals(t, "addr", lset[0][0].Name) + testutil.Equals(t, addr, lset[0][0].Value) + testutil.Equals(t, "a", lset[1][0].Name) + testutil.Equals(t, "b", lset[1][0].Value) + assertRegisteredAPIs(t, endpoints.exposedAPIs[addr], e) + } + + // Check stats. + expected := newEndpointAPIStats() + expected[component.Sidecar] = map[string]int{ + fmt.Sprintf("{a=\"b\"},{addr=\"%s\"}", discoveredEndpointAddr[0]): 1, + fmt.Sprintf("{a=\"b\"},{addr=\"%s\"}", discoveredEndpointAddr[1]): 1, + } + testutil.Equals(t, expected, endpointSet.endpointsMetric.storeNodes) + + // Remove address from discovered and reset last check, which should ensure cleanup of status on next update. + endpointSet.endpointStatuses[discoveredEndpointAddr[2]].LastCheck = time.Now().Add(-4 * time.Minute) + discoveredEndpointAddr = discoveredEndpointAddr[:len(discoveredEndpointAddr)-2] + endpointSet.Update(context.Background()) + testutil.Equals(t, 2, len(endpointSet.endpointStatuses)) + + endpoints.CloseOne(discoveredEndpointAddr[0]) + delete(expected[component.Sidecar], fmt.Sprintf("{a=\"b\"},{addr=\"%s\"}", discoveredEndpointAddr[0])) + + // We expect Update to tear down store client for closed store server. + endpointSet.Update(context.Background()) + testutil.Equals(t, 1, len(endpointSet.endpoints), "only one service should respond just fine, so we expect one client to be ready.") + testutil.Equals(t, 2, len(endpointSet.endpointStatuses)) + + addr := discoveredEndpointAddr[1] + st, ok := endpointSet.endpoints[addr] + testutil.Assert(t, ok, "addr exist") + testutil.Equals(t, addr, st.addr) + + lset := st.LabelSets() + testutil.Equals(t, 2, len(lset)) + testutil.Equals(t, "addr", lset[0][0].Name) + testutil.Equals(t, addr, lset[0][0].Value) + testutil.Equals(t, "a", lset[1][0].Name) + testutil.Equals(t, "b", lset[1][0].Value) + testutil.Equals(t, expected, endpointSet.endpointsMetric.storeNodes) + + // New big batch of endpoints. + endpoint2, err := startTestEndpoints([]testEndpointMeta{ + { + InfoResponse: queryInfo, + extlsetFn: func(addr string) []labelpb.ZLabelSet { + return []labelpb.ZLabelSet{ + { + Labels: []labelpb.ZLabel{ + {Name: "l1", Value: "v2"}, + {Name: "l2", Value: "v3"}, + }, + }, + { + Labels: []labelpb.ZLabel{ + {Name: "l3", Value: "v4"}, + }, + }, + } + }, + }, + { + // Duplicated Querier, in previous versions it would be deduplicated. Now it should be not. + InfoResponse: queryInfo, + extlsetFn: func(addr string) []labelpb.ZLabelSet { + return []labelpb.ZLabelSet{ + { + Labels: []labelpb.ZLabel{ + {Name: "l1", Value: "v2"}, + {Name: "l2", Value: "v3"}, + }, + }, + { + Labels: []labelpb.ZLabel{ + {Name: "l3", Value: "v4"}, + }, + }, + } + }, + }, + { + InfoResponse: sidecarInfo, + extlsetFn: func(addr string) []labelpb.ZLabelSet { + return []labelpb.ZLabelSet{ + { + Labels: []labelpb.ZLabel{ + {Name: "l1", Value: "v2"}, + {Name: "l2", Value: "v3"}, + }, + }, + } + }, + }, + { + // Duplicated Sidecar, in previous versions it would be deduplicated. Now it should be not. + InfoResponse: sidecarInfo, + extlsetFn: func(addr string) []labelpb.ZLabelSet { + return []labelpb.ZLabelSet{ + { + Labels: []labelpb.ZLabel{ + {Name: "l1", Value: "v2"}, + {Name: "l2", Value: "v3"}, + }, + }, + } + }, + }, + { + // Querier that duplicates with sidecar, in previous versions it would be deduplicated. Now it should be not. + InfoResponse: queryInfo, + extlsetFn: func(addr string) []labelpb.ZLabelSet { + return []labelpb.ZLabelSet{ + { + Labels: []labelpb.ZLabel{ + {Name: "l1", Value: "v2"}, + {Name: "l2", Value: "v3"}, + }, + }, + } + }, + }, + { + // Ruler that duplicates with sidecar, in previous versions it would be deduplicated. Now it should be not. + // Warning should be produced. + InfoResponse: ruleInfo, + extlsetFn: func(addr string) []labelpb.ZLabelSet { + return []labelpb.ZLabelSet{ + { + Labels: []labelpb.ZLabel{ + {Name: "l1", Value: "v2"}, + {Name: "l2", Value: "v3"}, + }, + }, + } + }, + }, + { + // Duplicated Rule, in previous versions it would be deduplicated. Now it should be not. Warning should be produced. + InfoResponse: ruleInfo, + extlsetFn: func(addr string) []labelpb.ZLabelSet { + return []labelpb.ZLabelSet{ + { + Labels: []labelpb.ZLabel{ + {Name: "l1", Value: "v2"}, + {Name: "l2", Value: "v3"}, + }, + }, + } + }, + }, + // Two pre v0.8.0 store gateway nodes, they don't have ext labels set. + { + InfoResponse: storeGWInfo, + extlsetFn: func(addr string) []labelpb.ZLabelSet { + return []labelpb.ZLabelSet{} + }, + }, + { + InfoResponse: storeGWInfo, + extlsetFn: func(addr string) []labelpb.ZLabelSet { + return []labelpb.ZLabelSet{} + }, + }, + // Regression tests against https://github.com/thanos-io/thanos/issues/1632: From v0.8.0 stores advertise labels. + // If the object storage handled by store gateway has only one sidecar we used to hitting issue. + { + InfoResponse: storeGWInfo, + extlsetFn: func(addr string) []labelpb.ZLabelSet { + return []labelpb.ZLabelSet{ + { + Labels: []labelpb.ZLabel{ + {Name: "l1", Value: "v2"}, + {Name: "l2", Value: "v3"}, + }, + }, + { + Labels: []labelpb.ZLabel{ + {Name: "l3", Value: "v4"}, + }, + }, + } + }, + }, + // Stores v0.8.1 has compatibility labels. Check if they are correctly removed. + { + InfoResponse: storeGWInfo, + extlsetFn: func(addr string) []labelpb.ZLabelSet { + return []labelpb.ZLabelSet{ + { + Labels: []labelpb.ZLabel{ + {Name: "l1", Value: "v2"}, + {Name: "l2", Value: "v3"}, + }, + }, + { + Labels: []labelpb.ZLabel{ + {Name: "l3", Value: "v4"}, + }, + }, + { + Labels: []labelpb.ZLabel{ + {Name: store.CompatibilityTypeLabelName, Value: "store"}, + }, + }, + } + }, + }, + // Duplicated store, in previous versions it would be deduplicated. Now it should be not. + { + InfoResponse: storeGWInfo, + extlsetFn: func(addr string) []labelpb.ZLabelSet { + return []labelpb.ZLabelSet{ + { + Labels: []labelpb.ZLabel{ + {Name: "l1", Value: "v2"}, + {Name: "l2", Value: "v3"}, + }, + }, + { + Labels: []labelpb.ZLabel{ + {Name: "l3", Value: "v4"}, + }, + }, + { + Labels: []labelpb.ZLabel{ + {Name: store.CompatibilityTypeLabelName, Value: "store"}, + }, + }, + } + }, + }, + { + InfoResponse: receiveInfo, + extlsetFn: func(addr string) []labelpb.ZLabelSet { + return []labelpb.ZLabelSet{ + { + Labels: []labelpb.ZLabel{ + {Name: "l1", Value: "v2"}, + {Name: "l2", Value: "v3"}, + }, + }, + { + Labels: []labelpb.ZLabel{ + {Name: "l3", Value: "v4"}, + }, + }, + } + }, + }, + // Duplicate receiver + { + InfoResponse: receiveInfo, + extlsetFn: func(addr string) []labelpb.ZLabelSet { + return []labelpb.ZLabelSet{ + { + Labels: []labelpb.ZLabel{ + {Name: "l1", Value: "v2"}, + {Name: "l2", Value: "v3"}, + }, + }, + { + Labels: []labelpb.ZLabel{ + {Name: "l3", Value: "v4"}, + }, + }, + } + }, + }, + }) + testutil.Ok(t, err) + defer endpoint2.Close() + + discoveredEndpointAddr = append(discoveredEndpointAddr, endpoint2.EndpointAddresses()...) + + // New stores should be loaded. + endpointSet.Update(context.Background()) + testutil.Equals(t, 1+len(endpoint2.srvs), len(endpointSet.endpoints)) + + // Check stats. + expected = newEndpointAPIStats() + expected[component.Query] = map[string]int{ + "{l1=\"v2\", l2=\"v3\"}": 1, + "{l1=\"v2\", l2=\"v3\"},{l3=\"v4\"}": 2, + } + expected[component.Rule] = map[string]int{ + "{l1=\"v2\", l2=\"v3\"}": 2, + } + expected[component.Sidecar] = map[string]int{ + fmt.Sprintf("{a=\"b\"},{addr=\"%s\"}", discoveredEndpointAddr[1]): 1, + "{l1=\"v2\", l2=\"v3\"}": 2, + } + expected[component.Store] = map[string]int{ + "": 2, + "{l1=\"v2\", l2=\"v3\"},{l3=\"v4\"}": 3, + } + expected[component.Receive] = map[string]int{ + "{l1=\"v2\", l2=\"v3\"},{l3=\"v4\"}": 2, + } + testutil.Equals(t, expected, endpointSet.endpointsMetric.storeNodes) + + // Close remaining endpoint from previous batch + endpoints.CloseOne(discoveredEndpointAddr[1]) + endpointSet.Update(context.Background()) + + for addr, e := range endpointSet.endpoints { + testutil.Equals(t, addr, e.addr) + assertRegisteredAPIs(t, endpoint2.exposedAPIs[addr], e) + } + + // Check statuses. + testutil.Equals(t, 2+len(endpoint2.srvs), len(endpointSet.endpointStatuses)) +} + +func TestEndpointSet_Update_NoneAvailable(t *testing.T) { + endpoints, err := startTestEndpoints([]testEndpointMeta{ + { + InfoResponse: sidecarInfo, + extlsetFn: func(addr string) []labelpb.ZLabelSet { + return []labelpb.ZLabelSet{ + { + Labels: []labelpb.ZLabel{ + { + Name: "addr", + Value: addr, + }, + }, + }, + } + }, + }, + { + InfoResponse: sidecarInfo, + extlsetFn: func(addr string) []labelpb.ZLabelSet { + return []labelpb.ZLabelSet{ + { + Labels: []labelpb.ZLabel{ + { + Name: "addr", + Value: addr, + }, + }, + }, + } + }, + }, + }) + testutil.Ok(t, err) + defer endpoints.Close() + + initialEndpointAddr := endpoints.EndpointAddresses() + endpoints.CloseOne(initialEndpointAddr[0]) + endpoints.CloseOne(initialEndpointAddr[1]) + + endpointSet := NewEndpointSet(nil, nil, + func() (specs []EndpointSpec) { + for _, addr := range initialEndpointAddr { + specs = append(specs, NewGRPCEndpointSpec(addr, false)) + } + return specs + }, + testGRPCOpts, time.Minute) + endpointSet.gRPCInfoCallTimeout = 2 * time.Second + + // Should not matter how many of these we run. + endpointSet.Update(context.Background()) + endpointSet.Update(context.Background()) + testutil.Equals(t, 0, len(endpointSet.endpoints), "none of services should respond just fine, so we expect no client to be ready.") + + // Leak test will ensure that we don't keep client connection around. + expected := newEndpointAPIStats() + testutil.Equals(t, expected, endpointSet.endpointsMetric.storeNodes) +} + +// TestEndpoint_Update_QuerierStrict tests what happens when the strict mode is enabled/disabled. +func TestEndpoint_Update_QuerierStrict(t *testing.T) { + endpoints, err := startTestEndpoints([]testEndpointMeta{ + { + InfoResponse: &infopb.InfoResponse{ + ComponentType: component.Sidecar.String(), + Store: &infopb.StoreInfo{ + MinTime: 12345, + MaxTime: 54321, + }, + Exemplars: &infopb.ExemplarsInfo{ + MinTime: math.MinInt64, + MaxTime: math.MaxInt64, + }, + Rules: &infopb.RulesInfo{}, + MetricMetadata: &infopb.MetricMetadataInfo{}, + Targets: &infopb.TargetsInfo{}, + }, + extlsetFn: func(addr string) []labelpb.ZLabelSet { + return []labelpb.ZLabelSet{ + { + Labels: []labelpb.ZLabel{ + { + Name: "addr", + Value: addr, + }, + }, + }, + } + }, + }, + { + InfoResponse: &infopb.InfoResponse{ + ComponentType: component.Sidecar.String(), + Store: &infopb.StoreInfo{ + MinTime: 66666, + MaxTime: 77777, + }, + Exemplars: &infopb.ExemplarsInfo{ + MinTime: math.MinInt64, + MaxTime: math.MaxInt64, + }, + Rules: &infopb.RulesInfo{}, + MetricMetadata: &infopb.MetricMetadataInfo{}, + Targets: &infopb.TargetsInfo{}, + }, + extlsetFn: func(addr string) []labelpb.ZLabelSet { + return []labelpb.ZLabelSet{ + { + Labels: []labelpb.ZLabel{ + { + Name: "addr", + Value: addr, + }, + }, + }, + } + }, + }, + // Slow store. + { + InfoResponse: &infopb.InfoResponse{ + ComponentType: component.Sidecar.String(), + Store: &infopb.StoreInfo{ + MinTime: 65644, + MaxTime: 77777, + }, + Exemplars: &infopb.ExemplarsInfo{ + MinTime: math.MinInt64, + MaxTime: math.MaxInt64, + }, + Rules: &infopb.RulesInfo{}, + MetricMetadata: &infopb.MetricMetadataInfo{}, + Targets: &infopb.TargetsInfo{}, + }, + extlsetFn: func(addr string) []labelpb.ZLabelSet { + return []labelpb.ZLabelSet{ + { + Labels: []labelpb.ZLabel{ + { + Name: "addr", + Value: addr, + }, + }, + }, + } + }, + infoDelay: 2 * time.Second, + }, + }) + + testutil.Ok(t, err) + defer endpoints.Close() + + discoveredEndpointAddr := endpoints.EndpointAddresses() + + staticEndpointAddr := discoveredEndpointAddr[0] + slowStaticEndpointAddr := discoveredEndpointAddr[2] + endpointSet := NewEndpointSet(nil, nil, func() (specs []EndpointSpec) { + return []EndpointSpec{ + NewGRPCEndpointSpec(discoveredEndpointAddr[0], true), + NewGRPCEndpointSpec(discoveredEndpointAddr[1], false), + NewGRPCEndpointSpec(discoveredEndpointAddr[2], true), + } + }, testGRPCOpts, time.Minute) + defer endpointSet.Close() + endpointSet.gRPCInfoCallTimeout = 1 * time.Second + + // Initial update. + endpointSet.Update(context.Background()) + testutil.Equals(t, 3, len(endpointSet.endpoints), "three clients must be available for running nodes") + + // The endpoint has not responded to the info call and is assumed to cover everything. + curMin, curMax := endpointSet.endpoints[slowStaticEndpointAddr].metadata.Store.MinTime, endpointSet.endpoints[slowStaticEndpointAddr].metadata.Store.MaxTime + testutil.Assert(t, endpointSet.endpoints[slowStaticEndpointAddr].cc.GetState().String() != "SHUTDOWN", "slow store's connection should not be closed") + testutil.Equals(t, int64(MinTime), curMin) + testutil.Equals(t, int64(MaxTime), curMax) + + // The endpoint is statically defined + strict mode is enabled + // so its client + information must be retained. + curMin, curMax = endpointSet.endpoints[staticEndpointAddr].metadata.Store.MinTime, endpointSet.endpoints[staticEndpointAddr].metadata.Store.MaxTime + testutil.Equals(t, int64(12345), curMin, "got incorrect minimum time") + testutil.Equals(t, int64(54321), curMax, "got incorrect minimum time") + + // Successfully retrieve the information and observe minTime/maxTime updating. + endpointSet.gRPCInfoCallTimeout = 3 * time.Second + endpointSet.Update(context.Background()) + updatedCurMin, updatedCurMax := endpointSet.endpoints[slowStaticEndpointAddr].metadata.Store.MinTime, endpointSet.endpoints[slowStaticEndpointAddr].metadata.Store.MaxTime + testutil.Equals(t, int64(65644), updatedCurMin) + testutil.Equals(t, int64(77777), updatedCurMax) + endpointSet.gRPCInfoCallTimeout = 1 * time.Second + + // Turn off the endpoints. + endpoints.Close() + + // Update again many times. Should not matter WRT the static one. + endpointSet.Update(context.Background()) + endpointSet.Update(context.Background()) + endpointSet.Update(context.Background()) + + // Check that the information is the same. + testutil.Equals(t, 2, len(endpointSet.endpoints), "two static clients must remain available") + testutil.Equals(t, curMin, endpointSet.endpoints[staticEndpointAddr].metadata.Store.MinTime, "minimum time reported by the store node is different") + testutil.Equals(t, curMax, endpointSet.endpoints[staticEndpointAddr].metadata.Store.MaxTime, "minimum time reported by the store node is different") + testutil.NotOk(t, endpointSet.endpointStatuses[staticEndpointAddr].LastError.originalErr) + + testutil.Equals(t, updatedCurMin, endpointSet.endpoints[slowStaticEndpointAddr].metadata.Store.MinTime, "minimum time reported by the store node is different") + testutil.Equals(t, updatedCurMax, endpointSet.endpoints[slowStaticEndpointAddr].metadata.Store.MaxTime, "minimum time reported by the store node is different") +} + +func TestEndpointSet_APIs_Discovery(t *testing.T) { + endpoints, err := startTestEndpoints([]testEndpointMeta{ + { + InfoResponse: sidecarInfo, + extlsetFn: func(addr string) []labelpb.ZLabelSet { + return []labelpb.ZLabelSet{} + }, + }, + { + InfoResponse: ruleInfo, + extlsetFn: func(addr string) []labelpb.ZLabelSet { + return []labelpb.ZLabelSet{} + }, + }, + { + InfoResponse: receiveInfo, + extlsetFn: func(addr string) []labelpb.ZLabelSet { + return []labelpb.ZLabelSet{} + }, + }, + { + InfoResponse: storeGWInfo, + extlsetFn: func(addr string) []labelpb.ZLabelSet { + return []labelpb.ZLabelSet{} + }, + }, + { + InfoResponse: queryInfo, + extlsetFn: func(addr string) []labelpb.ZLabelSet { + return []labelpb.ZLabelSet{} + }, + }, + }) + testutil.Ok(t, err) + defer endpoints.Close() + + type discoveryState struct { + name string + endpointSpec func() []EndpointSpec + expectedStores int + expectedRules int + expectedTarget int + expectedMetricMetadata int + expectedExemplars int + } + + for _, tc := range []struct { + states []discoveryState + name string + }{ + { + name: "All endpoints discovered concurrently", + states: []discoveryState{ + { + name: "no endpoints", + endpointSpec: nil, + }, + { + name: "Sidecar, Ruler, Querier, Receiver and StoreGW discovered", + endpointSpec: func() []EndpointSpec { + endpointSpec := make([]EndpointSpec, 0, len(endpoints.orderAddrs)) + for _, addr := range endpoints.orderAddrs { + endpointSpec = append(endpointSpec, NewGRPCEndpointSpec(addr, false)) + } + return endpointSpec + }, + expectedStores: 4, // sidecar + querier + receiver + storeGW + expectedRules: 3, // sidecar + querier + ruler + expectedTarget: 2, // sidecar + querier + expectedMetricMetadata: 2, // sidecar + querier + expectedExemplars: 3, // sidecar + querier + receiver + }, + }, + }, + { + name: "Sidecar discovery first, eventually Ruler discovered and then Sidecar removed", + states: []discoveryState{ + { + name: "no stores", + endpointSpec: nil, + }, + { + name: "Sidecar discovered, no Ruler discovered", + endpointSpec: func() []EndpointSpec { + return []EndpointSpec{ + NewGRPCEndpointSpec(endpoints.orderAddrs[0], false), + } + }, + expectedStores: 1, // sidecar + expectedRules: 1, // sidecar + expectedTarget: 1, // sidecar + expectedMetricMetadata: 1, // sidecar + expectedExemplars: 1, // sidecar + }, + { + name: "Ruler discovered", + endpointSpec: func() []EndpointSpec { + return []EndpointSpec{ + NewGRPCEndpointSpec(endpoints.orderAddrs[0], false), + NewGRPCEndpointSpec(endpoints.orderAddrs[1], false), + } + }, + expectedStores: 1, // sidecar + expectedRules: 2, // sidecar + ruler + expectedTarget: 1, // sidecar + expectedMetricMetadata: 1, // sidecar + expectedExemplars: 1, // sidecar + }, + { + name: "Sidecar removed", + endpointSpec: func() []EndpointSpec { + return []EndpointSpec{ + NewGRPCEndpointSpec(endpoints.orderAddrs[1], false), + } + }, + expectedRules: 1, // ruler + }, + }, + }, + } { + t.Run(tc.name, func(t *testing.T) { + currentState := 0 + + endpointSet := NewEndpointSet(nil, nil, + func() []EndpointSpec { + if tc.states[currentState].endpointSpec == nil { + return nil + } + + return tc.states[currentState].endpointSpec() + }, + testGRPCOpts, time.Minute) + + defer endpointSet.Close() + + for { + endpointSet.Update(context.Background()) + + gotStores := 0 + gotRules := 0 + gotTarget := 0 + gotExemplars := 0 + gotMetricMetadata := 0 + + for _, er := range endpointSet.endpoints { + if er.HasStoreAPI() { + gotStores += 1 + } + if er.HasRulesAPI() { + gotRules += 1 + } + if er.HasTargetsAPI() { + gotTarget += 1 + } + if er.HasExemplarsAPI() { + gotExemplars += 1 + } + if er.HasMetricMetadataAPI() { + gotMetricMetadata += 1 + } + } + testutil.Equals(t, tc.states[currentState].expectedStores, gotStores) + testutil.Equals(t, tc.states[currentState].expectedRules, gotRules) + testutil.Equals(t, tc.states[currentState].expectedTarget, gotTarget) + testutil.Equals(t, tc.states[currentState].expectedMetricMetadata, gotMetricMetadata) + testutil.Equals(t, tc.states[currentState].expectedExemplars, gotExemplars) + + currentState = currentState + 1 + if len(tc.states) == currentState { + break + } + } + }) + } +} + +type errThatMarshalsToEmptyDict struct { + msg string +} + +// MarshalJSON marshals the error and returns and empty dict, not the error string. +func (e *errThatMarshalsToEmptyDict) MarshalJSON() ([]byte, error) { + return json.Marshal(map[string]string{}) +} + +// Error returns the original, underlying string. +func (e *errThatMarshalsToEmptyDict) Error() string { + return e.msg +} + +// Test highlights that without wrapping the error, it is marshaled to empty dict {}, not its message. +func TestEndpointStringError(t *testing.T) { + dictErr := &errThatMarshalsToEmptyDict{msg: "Error message"} + stringErr := &stringError{originalErr: dictErr} + + endpointstatusMock := map[string]error{} + endpointstatusMock["dictErr"] = dictErr + endpointstatusMock["stringErr"] = stringErr + + b, err := json.Marshal(endpointstatusMock) + + testutil.Ok(t, err) + testutil.Equals(t, []byte(`{"dictErr":{},"stringErr":"Error message"}`), b, "expected to get proper results") +} + +// Errors that usually marshal to empty dict should return the original error string. +func TestUpdateEndpointStateLastError(t *testing.T) { + tcs := []struct { + InputError error + ExpectedLastErr string + }{ + {errors.New("normal_err"), `"normal_err"`}, + {nil, `null`}, + {&errThatMarshalsToEmptyDict{"the error message"}, `"the error message"`}, + } + + for _, tc := range tcs { + mockedEndpointSet := &EndpointSet{ + endpointStatuses: map[string]*EndpointStatus{}, + } + mockEndpointRef := &endpointRef{ + addr: "mockedStore", + } + + mockedEndpointSet.updateEndpointStatus(mockEndpointRef, tc.InputError) + + b, err := json.Marshal(mockedEndpointSet.endpointStatuses["mockedStore"].LastError) + testutil.Ok(t, err) + testutil.Equals(t, tc.ExpectedLastErr, string(b)) + } +} + +func TestUpdateEndpointStateForgetsPreviousErrors(t *testing.T) { + mockEndpointSet := &EndpointSet{ + endpointStatuses: map[string]*EndpointStatus{}, + } + mockEndpointRef := &endpointRef{ + addr: "mockedStore", + } + + mockEndpointSet.updateEndpointStatus(mockEndpointRef, errors.New("test err")) + + b, err := json.Marshal(mockEndpointSet.endpointStatuses["mockedStore"].LastError) + testutil.Ok(t, err) + testutil.Equals(t, `"test err"`, string(b)) + + // updating status without and error should clear the previous one. + mockEndpointSet.updateEndpointStatus(mockEndpointRef, nil) + + b, err = json.Marshal(mockEndpointSet.endpointStatuses["mockedStore"].LastError) + testutil.Ok(t, err) + testutil.Equals(t, `null`, string(b)) +} + +func exposedAPIs(c string) *APIs { + switch c { + case component.Sidecar.String(): + return &APIs{ + store: true, + target: true, + rules: true, + metricMetadata: true, + exemplars: true, + } + case component.Query.String(): + return &APIs{ + store: true, + target: true, + rules: true, + metricMetadata: true, + exemplars: true, + } + case component.Receive.String(): + return &APIs{ + store: true, + exemplars: true, + } + case component.Rule.String(): + return &APIs{ + rules: true, + } + case component.Store.String(): + return &APIs{ + store: true, + } + } + return &APIs{} +} + +func assertRegisteredAPIs(t *testing.T, expectedAPIs *APIs, er *endpointRef) { + testutil.Equals(t, expectedAPIs.store, er.HasStoreAPI()) + testutil.Equals(t, expectedAPIs.rules, er.HasRulesAPI()) + testutil.Equals(t, expectedAPIs.target, er.HasTargetsAPI()) + testutil.Equals(t, expectedAPIs.metricMetadata, er.HasMetricMetadataAPI()) + testutil.Equals(t, expectedAPIs.exemplars, er.HasExemplarsAPI()) +} diff --git a/pkg/query/storeset_test.go b/pkg/query/storeset_test.go index d7bb2c7ee8..bf3e2fe8ea 100644 --- a/pkg/query/storeset_test.go +++ b/pkg/query/storeset_test.go @@ -24,11 +24,6 @@ import ( "github.com/thanos-io/thanos/pkg/testutil" ) -var testGRPCOpts = []grpc.DialOption{ - grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(math.MaxInt32)), - grpc.WithInsecure(), -} - type mockedStore struct { infoDelay time.Duration info storepb.InfoResponse @@ -1016,20 +1011,6 @@ func TestStoreSet_Rules_Discovery(t *testing.T) { } } -type errThatMarshalsToEmptyDict struct { - msg string -} - -// MarshalJSON marshals the error and returns and empty dict, not the error string. -func (e *errThatMarshalsToEmptyDict) MarshalJSON() ([]byte, error) { - return json.Marshal(map[string]string{}) -} - -// Error returns the original, underlying string. -func (e *errThatMarshalsToEmptyDict) Error() string { - return e.msg -} - // Test highlights that without wrapping the error, it is marshaled to empty dict {}, not its message. func TestStringError(t *testing.T) { dictErr := &errThatMarshalsToEmptyDict{msg: "Error message"} From 9019977b63d2c077fcd454c43c1bdec370a19fb7 Mon Sep 17 00:00:00 2001 From: Hitanshu Mehta Date: Wed, 7 Jul 2021 03:14:40 +0530 Subject: [PATCH 03/11] lint fixes Signed-off-by: Hitanshu Mehta --- pkg/query/endpointset.go | 11 +++------ pkg/query/endpointset_test.go | 44 +++++++++++++++++++++++++++++++---- 2 files changed, 42 insertions(+), 13 deletions(-) diff --git a/pkg/query/endpointset.go b/pkg/query/endpointset.go index 1742e8f166..98952907de 100644 --- a/pkg/query/endpointset.go +++ b/pkg/query/endpointset.go @@ -114,7 +114,7 @@ type EndpointStatus struct { // storeSetNodeCollector is a metric collector reporting the number of available storeAPIs for Querier. // A Collector is required as we want atomic updates for all 'thanos_store_nodes_grpc_connections' series. -// TODO(hitanshu-mehta) Currently,only collecting metrices of storeAPI. Make this struct generic. +// TODO(hitanshu-mehta) Currently,only collecting metrics of storeAPI. Make this struct generic. type endpointSetNodeCollector struct { mtx sync.Mutex storeNodes map[component.Component]map[string]int @@ -272,8 +272,7 @@ func (e *EndpointSet) Update(ctx context.Context) { // All producers should have unique external labels. While this does not check only StoreAPIs connected to // this querier this allows to notify early user about misconfiguration. Warn only. This is also detectable from metric. - if (er.ComponentType() != nil && - (er.ComponentType() == component.Sidecar || er.ComponentType() == component.Rule)) && + if (er.ComponentType() == component.Sidecar || er.ComponentType() == component.Rule) && stats[component.Sidecar][extLset]+stats[component.Rule][extLset] > 0 { level.Warn(e.logger).Log("msg", "found duplicate storeAPI producer (sidecar or ruler). This is not advices as it will malform data in in the same bucket", @@ -598,10 +597,6 @@ func (er *endpointRef) ComponentType() component.Component { er.mtx.RLock() defer er.mtx.RUnlock() - if er.metadata == nil { - return component.UnknownStoreAPI - } - return component.FromString(er.metadata.ComponentType) } @@ -662,7 +657,7 @@ func (er *endpointRef) LabelSets() []labels.Labels { return labelSet } -func (er *endpointRef) TimeRange() (mint int64, maxt int64) { +func (er *endpointRef) TimeRange() (mint, maxt int64) { er.mtx.RLock() defer er.mtx.RUnlock() diff --git a/pkg/query/endpointset_test.go b/pkg/query/endpointset_test.go index 6509cf6255..dd6be069b6 100644 --- a/pkg/query/endpointset_test.go +++ b/pkg/query/endpointset_test.go @@ -953,11 +953,39 @@ func TestEndpointSet_APIs_Discovery(t *testing.T) { gotMetricMetadata += 1 } } - testutil.Equals(t, tc.states[currentState].expectedStores, gotStores) - testutil.Equals(t, tc.states[currentState].expectedRules, gotRules) - testutil.Equals(t, tc.states[currentState].expectedTarget, gotTarget) - testutil.Equals(t, tc.states[currentState].expectedMetricMetadata, gotMetricMetadata) - testutil.Equals(t, tc.states[currentState].expectedExemplars, gotExemplars) + testutil.Equals( + t, + tc.states[currentState].expectedStores, + gotStores, + "unexepected discovered storeAPIs in state %q", + tc.states[currentState].name) + testutil.Equals( + t, + tc.states[currentState].expectedRules, + gotRules, + "unexepected discovered rulesAPIs in state %q", + tc.states[currentState].name) + testutil.Equals( + t, + tc.states[currentState].expectedTarget, + gotTarget, + "unexepected discovered targetAPIs in state %q", + tc.states[currentState].name, + ) + testutil.Equals( + t, + tc.states[currentState].expectedMetricMetadata, + gotMetricMetadata, + "unexepected discovered metricMetadataAPIs in state %q", + tc.states[currentState].name, + ) + testutil.Equals( + t, + tc.states[currentState].expectedExemplars, + gotExemplars, + "unexepected discovered ExemplarsAPIs in state %q", + tc.states[currentState].name, + ) currentState = currentState + 1 if len(tc.states) == currentState { @@ -1014,6 +1042,9 @@ func TestUpdateEndpointStateLastError(t *testing.T) { } mockEndpointRef := &endpointRef{ addr: "mockedStore", + metadata: &endpointMetadata{ + &infopb.InfoResponse{}, + }, } mockedEndpointSet.updateEndpointStatus(mockEndpointRef, tc.InputError) @@ -1030,6 +1061,9 @@ func TestUpdateEndpointStateForgetsPreviousErrors(t *testing.T) { } mockEndpointRef := &endpointRef{ addr: "mockedStore", + metadata: &endpointMetadata{ + &infopb.InfoResponse{}, + }, } mockEndpointSet.updateEndpointStatus(mockEndpointRef, errors.New("test err")) From 219f4f94a53bcd78a7dc09157cf0260a1bf39ec0 Mon Sep 17 00:00:00 2001 From: Hitanshu Mehta Date: Thu, 22 Jul 2021 15:46:34 +0530 Subject: [PATCH 04/11] fix typo Signed-off-by: Hitanshu Mehta --- pkg/query/endpointset.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/query/endpointset.go b/pkg/query/endpointset.go index 98952907de..e23dbf9cd7 100644 --- a/pkg/query/endpointset.go +++ b/pkg/query/endpointset.go @@ -112,7 +112,7 @@ type EndpointStatus struct { MaxTime int64 `json:"maxTime"` } -// storeSetNodeCollector is a metric collector reporting the number of available storeAPIs for Querier. +// endpointSetNodeCollector is a metric collector reporting the number of available storeAPIs for Querier. // A Collector is required as we want atomic updates for all 'thanos_store_nodes_grpc_connections' series. // TODO(hitanshu-mehta) Currently,only collecting metrics of storeAPI. Make this struct generic. type endpointSetNodeCollector struct { From 171231a3ea5b75ca6dcf428faf0e37a37d48cdba Mon Sep 17 00:00:00 2001 From: Hitanshu Mehta Date: Wed, 18 Aug 2021 11:20:50 +0530 Subject: [PATCH 05/11] remove code smells Signed-off-by: Hitanshu Mehta --- pkg/query/endpointset.go | 94 +++++++++++++++++++++++----------------- 1 file changed, 54 insertions(+), 40 deletions(-) diff --git a/pkg/query/endpointset.go b/pkg/query/endpointset.go index e23dbf9cd7..bd145f8eca 100644 --- a/pkg/query/endpointset.go +++ b/pkg/query/endpointset.go @@ -48,24 +48,24 @@ type EndpointSpec interface { // given store connection. Metadata(ctx context.Context, client infopb.InfoClient) (*endpointMetadata, error) - // StrictStatic returns true if the endpoint has been statically defined and it is under a strict mode. - StrictStatic() bool + // IsStrictStatic returns true if the endpoint has been statically defined and it is under a strict mode. + IsStrictStatic() bool } type grpcEndpointSpec struct { - addr string - strictstatic bool + addr string + isStrictStatic bool } // NewGRPCEndpointSpec creates gRPC endpoint spec. // It uses InfoAPI to get Metadata. -func NewGRPCEndpointSpec(addr string, strictstatic bool) EndpointSpec { - return &grpcEndpointSpec{addr: addr, strictstatic: strictstatic} +func NewGRPCEndpointSpec(addr string, isStrictStatic bool) EndpointSpec { + return &grpcEndpointSpec{addr: addr, isStrictStatic: isStrictStatic} } -// StrictStatic returns true if the endpoint has been statically defined and it is under a strict mode. -func (es *grpcEndpointSpec) StrictStatic() bool { - return es.strictstatic +// IsStrictStatic returns true if the endpoint has been statically defined and it is under a strict mode. +func (es *grpcEndpointSpec) IsStrictStatic() bool { + return es.isStrictStatic } func (es *grpcEndpointSpec) Addr() string { @@ -262,7 +262,7 @@ func (e *EndpointSet) Update(ctx context.Context) { level.Info(er.logger).Log("msg", unhealthyEndpointMessage, "address", addr, "extLset", labelpb.PromLabelSetsToString(er.LabelSets())) } - // Add stores that are not yet in stores. + // Add endpoints that are not yet in activeEndpoints map. for addr, er := range activeEndpoints { if _, ok := endpoints[addr]; ok { continue @@ -270,9 +270,8 @@ func (e *EndpointSet) Update(ctx context.Context) { extLset := labelpb.PromLabelSetsToString(er.LabelSets()) - // All producers should have unique external labels. While this does not check only StoreAPIs connected to - // this querier this allows to notify early user about misconfiguration. Warn only. This is also detectable from metric. - if (er.ComponentType() == component.Sidecar || er.ComponentType() == component.Rule) && + // All producers that expose StoreAPI should have unique external labels. Check all which connect to our Querier. + if er.HasStoreAPI() && (er.ComponentType() == component.Sidecar || er.ComponentType() == component.Rule) && stats[component.Sidecar][extLset]+stats[component.Rule][extLset] > 0 { level.Warn(e.logger).Log("msg", "found duplicate storeAPI producer (sidecar or ruler). This is not advices as it will malform data in in the same bucket", @@ -283,25 +282,7 @@ func (e *EndpointSet) Update(ctx context.Context) { endpoints[addr] = er e.updateEndpointStatus(er, nil) - if er.HasStoreAPI() { - level.Info(e.logger).Log("msg", "adding new storeAPI to query endpointset", "address", addr, "extLset", extLset) - } - - if er.HasRulesAPI() { - level.Info(e.logger).Log("msg", "adding new rulesAPI to query endpointset", "address", addr) - } - - if er.HasExemplarsAPI() { - level.Info(e.logger).Log("msg", "adding new exemplarsAPI to query endpointset", "address", addr) - } - - if er.HasTargetsAPI() { - level.Info(e.logger).Log("msg", "adding new targetsAPI to query endpointset", "address", addr) - } - - if er.HasMetricMetadataAPI() { - level.Info(e.logger).Log("msg", "adding new MetricMetadataAPI to query endpointset", "address", addr) - } + level.Info(e.logger).Log("msg", fmt.Sprintf("adding new %v with %+v", er.ComponentType(), er.apisPresent()), "address", addr, "extLset", extLset) } e.endpointsMetric.Update(stats) @@ -312,7 +293,7 @@ func (e *EndpointSet) Update(ctx context.Context) { e.cleanUpStoreStatuses(endpoints) } -// Get returns a list of all active stores. +// GetStoreClients returns a list of all active stores. func (e *EndpointSet) GetStoreClients() []storepb.StoreClient { e.endpointsMtx.RLock() defer e.endpointsMtx.RUnlock() @@ -443,7 +424,7 @@ func (e *EndpointSet) getActiveEndpoints(ctx context.Context, endpoints map[stri metadata, err := spec.Metadata(ctx, er.clients.info) if err != nil { - if !seenAlready && !spec.StrictStatic() { + if !seenAlready && !spec.IsStrictStatic() { // Close only if new and not a strict static node. // Unactive `e.endpoints` will be closed later on. er.Close() @@ -452,7 +433,7 @@ func (e *EndpointSet) getActiveEndpoints(ctx context.Context, endpoints map[stri e.updateEndpointStatus(er, err) level.Warn(e.logger).Log("msg", "update of node failed", "err", errors.Wrap(err, "getting metadata"), "address", addr) - if !spec.StrictStatic() { + if !spec.IsStrictStatic() { return } @@ -600,39 +581,46 @@ func (er *endpointRef) ComponentType() component.Component { return component.FromString(er.metadata.ComponentType) } +func (er *endpointRef) HasClients() bool { + er.mtx.RLock() + defer er.mtx.RUnlock() + + return er.clients != nil +} + func (er *endpointRef) HasStoreAPI() bool { er.mtx.RLock() defer er.mtx.RUnlock() - return er.clients.store != nil + return er.HasClients() && er.clients.store != nil } func (er *endpointRef) HasRulesAPI() bool { er.mtx.RLock() defer er.mtx.RUnlock() - return er.clients.rule != nil + return er.HasClients() && er.clients.rule != nil } func (er *endpointRef) HasTargetsAPI() bool { er.mtx.RLock() defer er.mtx.RUnlock() - return er.clients.target != nil + return er.HasClients() && er.clients.target != nil } func (er *endpointRef) HasMetricMetadataAPI() bool { er.mtx.RLock() defer er.mtx.RUnlock() - return er.clients.metricMetadata != nil + return er.HasClients() && er.clients.metricMetadata != nil } func (er *endpointRef) HasExemplarsAPI() bool { er.mtx.RLock() defer er.mtx.RUnlock() - return er.clients.exemplar != nil + return er.HasClients() && er.clients.exemplar != nil } func (er *endpointRef) LabelSets() []labels.Labels { @@ -682,6 +670,32 @@ func (er *endpointRef) Close() { runutil.CloseWithLogOnErr(er.logger, er.cc, fmt.Sprintf("endpoint %v connection closed", er.addr)) } +func (er *endpointRef) apisPresent() []string { + var apisPresent []string + + if er.HasStoreAPI() { + apisPresent = append(apisPresent, "storeAPI") + } + + if er.HasRulesAPI() { + apisPresent = append(apisPresent, "rulesAPI") + } + + if er.HasExemplarsAPI() { + apisPresent = append(apisPresent, "exemplarsAPI") + } + + if er.HasTargetsAPI() { + apisPresent = append(apisPresent, "targetsAPI") + } + + if er.HasMetricMetadataAPI() { + apisPresent = append(apisPresent, "MetricMetadataAPI") + } + + return apisPresent +} + type endpointClients struct { store storepb.StoreClient rule rulespb.RulesClient From 7a33e85405c6b784d614eb30454ea65d13fccf2f Mon Sep 17 00:00:00 2001 From: Hitanshu Mehta Date: Fri, 20 Aug 2021 23:21:43 +0530 Subject: [PATCH 06/11] start using endpointset instead of storeset Signed-off-by: Hitanshu Mehta --- cmd/thanos/query.go | 112 ++++++++++++++++++++++------------ pkg/api/query/v1.go | 12 ++-- pkg/query/endpointset.go | 75 +++++++++++++++++------ pkg/query/endpointset_test.go | 1 + pkg/ui/query.go | 18 +++--- 5 files changed, 144 insertions(+), 74 deletions(-) diff --git a/cmd/thanos/query.go b/cmd/thanos/query.go index 2cd738f99c..4eaa06474f 100644 --- a/cmd/thanos/query.go +++ b/cmd/thanos/query.go @@ -375,60 +375,94 @@ func runQuery( ) var ( - stores = query.NewStoreSet( + endpoints = query.NewEndpointSet( logger, reg, - func() (specs []query.StoreSpec) { - + func() (specs []query.EndpointSpec) { // Add strict & static nodes. for _, addr := range strictStores { - specs = append(specs, query.NewGRPCStoreSpec(addr, true)) + specs = append(specs, query.NewGRPCEndpointSpec(addr, true)) } - // Add DNS resolved addresses from static flags and file SD. + for _, addr := range dnsStoreProvider.Addresses() { - specs = append(specs, query.NewGRPCStoreSpec(addr, false)) + specs = append(specs, query.NewGRPCEndpointSpec(addr, false)) } - return removeDuplicateStoreSpecs(logger, duplicatedStores, specs) - }, - func() (specs []query.RuleSpec) { + for _, addr := range dnsRuleProvider.Addresses() { - specs = append(specs, query.NewGRPCStoreSpec(addr, false)) + specs = append(specs, query.NewGRPCEndpointSpec(addr, false)) } - // NOTE(s-urbaniak): No need to remove duplicates, as rule apis are a subset of store apis. - // hence, any duplicates will be tracked in the store api set. - - return specs - }, - func() (specs []query.TargetSpec) { for _, addr := range dnsTargetProvider.Addresses() { - specs = append(specs, query.NewGRPCStoreSpec(addr, false)) + specs = append(specs, query.NewGRPCEndpointSpec(addr, false)) } - return specs - }, - func() (specs []query.MetadataSpec) { for _, addr := range dnsMetadataProvider.Addresses() { - specs = append(specs, query.NewGRPCStoreSpec(addr, false)) + specs = append(specs, query.NewGRPCEndpointSpec(addr, false)) } - return specs - }, - func() (specs []query.ExemplarSpec) { for _, addr := range dnsExemplarProvider.Addresses() { - specs = append(specs, query.NewGRPCStoreSpec(addr, false)) + specs = append(specs, query.NewGRPCEndpointSpec(addr, false)) } - return specs + return removeDuplicateStoreSpecs(logger, duplicatedStores, specs) }, dialOpts, unhealthyStoreTimeout, ) - proxy = store.NewProxyStore(logger, reg, stores.Get, component.Query, selectorLset, storeResponseTimeout) - rulesProxy = rules.NewProxy(logger, stores.GetRulesClients) - targetsProxy = targets.NewProxy(logger, stores.GetTargetsClients) - metadataProxy = metadata.NewProxy(logger, stores.GetMetadataClients) - exemplarsProxy = exemplars.NewProxy(logger, stores.GetExemplarsStores, selectorLset) + // stores = query.NewStoreSet( + // logger, + // reg, + // func() (specs []query.StoreSpec) { + + // // Add strict & static nodes. + // for _, addr := range strictStores { + // specs = append(specs, query.NewGRPCStoreSpec(addr, true)) + // } + // // Add DNS resolved addresses from static flags and file SD. + // for _, addr := range dnsStoreProvider.Addresses() { + // specs = append(specs, query.NewGRPCStoreSpec(addr, false)) + // } + // return removeDuplicateStoreSpecs(logger, duplicatedStores, specs) + // }, + // func() (specs []query.RuleSpec) { + // for _, addr := range dnsRuleProvider.Addresses() { + // specs = append(specs, query.NewGRPCStoreSpec(addr, false)) + // } + + // // NOTE(s-urbaniak): No need to remove duplicates, as rule apis are a subset of store apis. + // // hence, any duplicates will be tracked in the store api set. + + // return specs + // }, + // func() (specs []query.TargetSpec) { + // for _, addr := range dnsTargetProvider.Addresses() { + // specs = append(specs, query.NewGRPCStoreSpec(addr, false)) + // } + + // return specs + // }, + // func() (specs []query.MetadataSpec) { + // for _, addr := range dnsMetadataProvider.Addresses() { + // specs = append(specs, query.NewGRPCStoreSpec(addr, false)) + // } + + // return specs + // }, + // func() (specs []query.ExemplarSpec) { + // for _, addr := range dnsExemplarProvider.Addresses() { + // specs = append(specs, query.NewGRPCStoreSpec(addr, false)) + // } + + // return specs + // }, + // dialOpts, + // unhealthyStoreTimeout, + // ) + proxy = store.NewProxyStore(logger, reg, endpoints.GetStoreClients, component.Query, selectorLset, storeResponseTimeout) + rulesProxy = rules.NewProxy(logger, endpoints.GetRulesClients) + targetsProxy = targets.NewProxy(logger, endpoints.GetTargetsClients) + metadataProxy = metadata.NewProxy(logger, endpoints.GetMetricMetadataClients) + exemplarsProxy = exemplars.NewProxy(logger, endpoints.GetExemplarsStores, selectorLset) queryableCreator = query.NewQueryableCreator( logger, extprom.WrapRegistererWithPrefix("thanos_query_", reg), @@ -454,12 +488,12 @@ func runQuery( ctx, cancel := context.WithCancel(context.Background()) g.Add(func() error { return runutil.Repeat(5*time.Second, ctx.Done(), func() error { - stores.Update(ctx) + endpoints.Update(ctx) return nil }) }, func(error) { cancel() - stores.Close() + endpoints.Close() }) } // Run File Service Discovery and update the store set when the files are modified. @@ -486,7 +520,7 @@ func runQuery( continue } fileSDCache.Update(update) - stores.Update(ctxUpdate) + endpoints.Update(ctxUpdate) if err := dnsStoreProvider.Resolve(ctxUpdate, append(fileSDCache.Addresses(), storeAddrs...)); err != nil { level.Error(logger).Log("msg", "failed to resolve addresses for storeAPIs", "err", err) @@ -562,11 +596,11 @@ func runQuery( ins := extpromhttp.NewInstrumentationMiddleware(reg, nil) // TODO(bplotka in PR #513 review): pass all flags, not only the flags needed by prefix rewriting. - ui.NewQueryUI(logger, stores, webExternalPrefix, webPrefixHeaderName).Register(router, ins) + ui.NewQueryUI(logger, endpoints, webExternalPrefix, webPrefixHeaderName).Register(router, ins) api := v1.NewQueryAPI( logger, - stores, + endpoints, engineFactory(promql.NewEngine, engineOpts, dynamicLookbackDelta), queryableCreator, // NOTE: Will share the same replica label as the query for now. @@ -644,8 +678,8 @@ func runQuery( return nil } -func removeDuplicateStoreSpecs(logger log.Logger, duplicatedStores prometheus.Counter, specs []query.StoreSpec) []query.StoreSpec { - set := make(map[string]query.StoreSpec) +func removeDuplicateStoreSpecs(logger log.Logger, duplicatedStores prometheus.Counter, specs []query.EndpointSpec) []query.EndpointSpec { + set := make(map[string]query.EndpointSpec) for _, spec := range specs { addr := spec.Addr() if _, ok := set[addr]; ok { @@ -654,7 +688,7 @@ func removeDuplicateStoreSpecs(logger log.Logger, duplicatedStores prometheus.Co } set[addr] = spec } - deduplicated := make([]query.StoreSpec, 0, len(set)) + deduplicated := make([]query.EndpointSpec, 0, len(set)) for _, value := range set { deduplicated = append(deduplicated, value) } diff --git a/pkg/api/query/v1.go b/pkg/api/query/v1.go index b74dcf864a..6892df380f 100644 --- a/pkg/api/query/v1.go +++ b/pkg/api/query/v1.go @@ -94,7 +94,7 @@ type QueryAPI struct { disableCORS bool replicaLabels []string - storeSet *query.StoreSet + endpointSet *query.EndpointSet defaultRangeQueryStep time.Duration defaultInstantQueryMaxSourceResolution time.Duration @@ -106,7 +106,7 @@ type QueryAPI struct { // NewQueryAPI returns an initialized QueryAPI type. func NewQueryAPI( logger log.Logger, - storeSet *query.StoreSet, + endpointSet *query.EndpointSet, qe func(int64) *promql.Engine, c query.QueryableCreator, ruleGroups rules.UnaryClient, @@ -144,7 +144,7 @@ func NewQueryAPI( enableTargetPartialResponse: enableTargetPartialResponse, enableMetricMetadataPartialResponse: enableMetricMetadataPartialResponse, replicaLabels: replicaLabels, - storeSet: storeSet, + endpointSet: endpointSet, defaultRangeQueryStep: defaultRangeQueryStep, defaultInstantQueryMaxSourceResolution: defaultInstantQueryMaxSourceResolution, defaultMetadataTimeRange: defaultMetadataTimeRange, @@ -701,9 +701,9 @@ func (qapi *QueryAPI) labelNames(r *http.Request) (interface{}, []error, *api.Ap } func (qapi *QueryAPI) stores(_ *http.Request) (interface{}, []error, *api.ApiError) { - statuses := make(map[string][]query.StoreStatus) - for _, status := range qapi.storeSet.GetStoreStatus() { - statuses[status.StoreType.String()] = append(statuses[status.StoreType.String()], status) + statuses := make(map[string][]query.EndpointStatus) + for _, status := range qapi.endpointSet.GetEndpointStatus() { + statuses[status.ComponentType.String()] = append(statuses[status.ComponentType.String()], status) } return statuses, nil, nil } diff --git a/pkg/query/endpointset.go b/pkg/query/endpointset.go index bd145f8eca..b3eaf12412 100644 --- a/pkg/query/endpointset.go +++ b/pkg/query/endpointset.go @@ -46,7 +46,7 @@ type EndpointSpec interface { // If metadata call fails we assume that store is no longer accessible and we should not use it. // NOTE: It is implementation responsibility to retry until context timeout, but a caller responsibility to manage // given store connection. - Metadata(ctx context.Context, client infopb.InfoClient) (*endpointMetadata, error) + Metadata(ctx context.Context, client *endpointClients) (*endpointMetadata, error) // IsStrictStatic returns true if the endpoint has been statically defined and it is under a strict mode. IsStrictStatic() bool @@ -75,17 +75,38 @@ func (es *grpcEndpointSpec) Addr() string { // Metadata method for gRPC endpoint tries to call InfoAPI exposed by Thanos components until context timeout. If we are unable to get metadata after // that time, we assume that the host is unhealthy and return error. -func (es *grpcEndpointSpec) Metadata(ctx context.Context, client infopb.InfoClient) (metadata *endpointMetadata, err error) { - resp, err := client.Info(ctx, &infopb.InfoRequest{}, grpc.WaitForReady(true)) +func (es *grpcEndpointSpec) Metadata(ctx context.Context, client *endpointClients) (*endpointMetadata, error) { + resp, err := client.info.Info(ctx, &infopb.InfoRequest{}, grpc.WaitForReady(true)) if err != nil { - return &endpointMetadata{&infopb.InfoResponse{ - ComponentType: component.UnknownStoreAPI.String(), - }}, errors.Wrapf(err, "fetching info from %s", es.addr) + // Call Info method of StoreAPI, this way querier will be able to discovery old components not exposing InfoAPI. + metadata, err := es.getMetadataUsingStoreAPI(ctx, client.store) + if err != nil { + return nil, errors.Wrapf(err, "fetching info from %s", es.addr) + } + return metadata, nil } return &endpointMetadata{resp}, nil } +func (es *grpcEndpointSpec) getMetadataUsingStoreAPI(ctx context.Context, client storepb.StoreClient) (*endpointMetadata, error) { + resp, err := client.Info(ctx, &storepb.InfoRequest{}) + if err != nil { + return nil, err + } + + return &endpointMetadata{ + &infopb.InfoResponse{ + LabelSets: resp.LabelSets, + ComponentType: component.FromProto(resp.StoreType).String(), + Store: &infopb.StoreInfo{ + MinTime: resp.MinTime, + MaxTime: resp.MaxTime, + }, + }, + }, nil +} + // stringError forces the error to be a string // when marshaled into a JSON. type stringError struct { @@ -290,18 +311,18 @@ func (e *EndpointSet) Update(ctx context.Context) { e.endpoints = endpoints e.endpointsMtx.Unlock() - e.cleanUpStoreStatuses(endpoints) + e.cleanUpEndpointStatuses(endpoints) } // GetStoreClients returns a list of all active stores. -func (e *EndpointSet) GetStoreClients() []storepb.StoreClient { +func (e *EndpointSet) GetStoreClients() []store.Client { e.endpointsMtx.RLock() defer e.endpointsMtx.RUnlock() - stores := make([]storepb.StoreClient, 0, len(e.endpoints)) + stores := make([]store.Client, 0, len(e.endpoints)) for _, er := range e.endpoints { if er.HasStoreAPI() { - stores = append(stores, er.clients.store) + stores = append(stores, er) } } return stores @@ -412,17 +433,21 @@ func (e *EndpointSet) getActiveEndpoints(ctx context.Context, endpoints map[stri return } + // Assume that StoreAPI is also exposed because if call to info service fails we will call info method of storeAPI. + // It will be overwritten to null if not present. er = &endpointRef{ - cc: conn, - addr: addr, - logger: e.logger, + cc: conn, + addr: addr, + logger: e.logger, + StoreClient: storepb.NewStoreClient(conn), clients: &endpointClients{ - info: infopb.NewInfoClient(conn), + info: infopb.NewInfoClient(conn), + store: storepb.NewStoreClient(conn), }, } } - metadata, err := spec.Metadata(ctx, er.clients.info) + metadata, err := spec.Metadata(ctx, er.clients) if err != nil { if !seenAlready && !spec.IsStrictStatic() { // Close only if new and not a strict static node. @@ -440,9 +465,13 @@ func (e *EndpointSet) getActiveEndpoints(ctx context.Context, endpoints map[stri // Still keep it around if static & strict mode enabled. // Assume that it expose storeAPI and cover all complete possible time range. if !seenAlready { - metadata.Store = &infopb.StoreInfo{ - MinTime: MinTime, - MaxTime: MaxTime, + metadata = &endpointMetadata{ + &infopb.InfoResponse{ + Store: &infopb.StoreInfo{ + MinTime: MinTime, + MaxTime: MaxTime, + }, + }, } er.Update(metadata) } @@ -497,7 +526,7 @@ func (e *EndpointSet) updateEndpointStatus(er *endpointRef, err error) { e.endpointStatuses[er.addr] = &status } -func (e *EndpointSet) GetStoreStatus() []EndpointStatus { +func (e *EndpointSet) GetEndpointStatus() []EndpointStatus { e.endpointsStatusesMtx.RLock() defer e.endpointsStatusesMtx.RUnlock() @@ -512,7 +541,7 @@ func (e *EndpointSet) GetStoreStatus() []EndpointStatus { return statuses } -func (e *EndpointSet) cleanUpStoreStatuses(endpoints map[string]*endpointRef) { +func (e *EndpointSet) cleanUpEndpointStatuses(endpoints map[string]*endpointRef) { e.endpointsStatusesMtx.Lock() defer e.endpointsStatusesMtx.Unlock() @@ -530,6 +559,8 @@ func (e *EndpointSet) cleanUpStoreStatuses(endpoints map[string]*endpointRef) { // TODO(bwplotka): Consider moving storeRef out of this package and renaming it, as it also supports rules API. type endpointRef struct { + storepb.StoreClient + mtx sync.RWMutex cc *grpc.ClientConn addr string @@ -550,6 +581,10 @@ func (er *endpointRef) Update(metadata *endpointMetadata) { if metadata.Store != nil { clients.store = storepb.NewStoreClient(er.cc) + er.StoreClient = clients.store + } else { + er.clients.store = nil + er.StoreClient = nil } if metadata.Rules != nil { diff --git a/pkg/query/endpointset_test.go b/pkg/query/endpointset_test.go index dd6be069b6..3e6b89f38a 100644 --- a/pkg/query/endpointset_test.go +++ b/pkg/query/endpointset_test.go @@ -253,6 +253,7 @@ func TestEndpointSet_Update(t *testing.T) { // Initial update. endpointSet.Update(context.Background()) + testutil.Equals(t, 3, len(endpointSet.endpoints)) // Start with one not available. endpoints.CloseOne(discoveredEndpointAddr[2]) diff --git a/pkg/ui/query.go b/pkg/ui/query.go index 7389dfbd54..1778dc5557 100644 --- a/pkg/ui/query.go +++ b/pkg/ui/query.go @@ -22,7 +22,7 @@ import ( type Query struct { *BaseUI - storeSet *query.StoreSet + endpointSet *query.EndpointSet externalPrefix, prefixHeader string @@ -32,7 +32,7 @@ type Query struct { now func() model.Time } -func NewQueryUI(logger log.Logger, storeSet *query.StoreSet, externalPrefix, prefixHeader string) *Query { +func NewQueryUI(logger log.Logger, endpointSet *query.EndpointSet, externalPrefix, prefixHeader string) *Query { tmplVariables := map[string]string{ "Component": component.Query.String(), } @@ -43,7 +43,7 @@ func NewQueryUI(logger log.Logger, storeSet *query.StoreSet, externalPrefix, pre return &Query{ BaseUI: NewBaseUI(logger, "query_menu.html", tmplFuncs, tmplVariables, externalPrefix, prefixHeader, component.Query), - storeSet: storeSet, + endpointSet: endpointSet, externalPrefix: externalPrefix, prefixHeader: prefixHeader, cwd: runtimeInfo().CWD, @@ -111,12 +111,12 @@ func (q *Query) status(w http.ResponseWriter, r *http.Request) { func (q *Query) stores(w http.ResponseWriter, r *http.Request) { prefix := GetWebPrefix(q.logger, q.externalPrefix, q.prefixHeader, r) - statuses := make(map[component.StoreAPI][]query.StoreStatus) - for _, status := range q.storeSet.GetStoreStatus() { - statuses[status.StoreType] = append(statuses[status.StoreType], status) + statuses := make(map[component.Component][]query.EndpointStatus) + for _, status := range q.endpointSet.GetEndpointStatus() { + statuses[status.ComponentType] = append(statuses[status.ComponentType], status) } - sources := make([]component.StoreAPI, 0, len(statuses)) + sources := make([]component.Component, 0, len(statuses)) for k := range statuses { sources = append(sources, k) } @@ -131,8 +131,8 @@ func (q *Query) stores(w http.ResponseWriter, r *http.Request) { }) q.executeTemplate(w, "stores.html", prefix, struct { - Stores map[component.StoreAPI][]query.StoreStatus - Sources []component.StoreAPI + Stores map[component.Component][]query.EndpointStatus + Sources []component.Component }{ Stores: statuses, Sources: sources, From 291bcdd7a89b9c2c4ff0b9cbd63175888898bb3e Mon Sep 17 00:00:00 2001 From: Hitanshu Mehta Date: Sat, 21 Aug 2021 14:28:10 +0530 Subject: [PATCH 07/11] remove storeset Signed-off-by: Hitanshu Mehta --- cmd/thanos/query.go | 49 -- pkg/query/endpointset.go | 1 + pkg/query/storeset.go | 731 ------------------------ pkg/query/storeset_test.go | 1076 ------------------------------------ 4 files changed, 1 insertion(+), 1856 deletions(-) delete mode 100644 pkg/query/storeset.go delete mode 100644 pkg/query/storeset_test.go diff --git a/cmd/thanos/query.go b/cmd/thanos/query.go index 4eaa06474f..2c83f3eb84 100644 --- a/cmd/thanos/query.go +++ b/cmd/thanos/query.go @@ -409,55 +409,6 @@ func runQuery( dialOpts, unhealthyStoreTimeout, ) - // stores = query.NewStoreSet( - // logger, - // reg, - // func() (specs []query.StoreSpec) { - - // // Add strict & static nodes. - // for _, addr := range strictStores { - // specs = append(specs, query.NewGRPCStoreSpec(addr, true)) - // } - // // Add DNS resolved addresses from static flags and file SD. - // for _, addr := range dnsStoreProvider.Addresses() { - // specs = append(specs, query.NewGRPCStoreSpec(addr, false)) - // } - // return removeDuplicateStoreSpecs(logger, duplicatedStores, specs) - // }, - // func() (specs []query.RuleSpec) { - // for _, addr := range dnsRuleProvider.Addresses() { - // specs = append(specs, query.NewGRPCStoreSpec(addr, false)) - // } - - // // NOTE(s-urbaniak): No need to remove duplicates, as rule apis are a subset of store apis. - // // hence, any duplicates will be tracked in the store api set. - - // return specs - // }, - // func() (specs []query.TargetSpec) { - // for _, addr := range dnsTargetProvider.Addresses() { - // specs = append(specs, query.NewGRPCStoreSpec(addr, false)) - // } - - // return specs - // }, - // func() (specs []query.MetadataSpec) { - // for _, addr := range dnsMetadataProvider.Addresses() { - // specs = append(specs, query.NewGRPCStoreSpec(addr, false)) - // } - - // return specs - // }, - // func() (specs []query.ExemplarSpec) { - // for _, addr := range dnsExemplarProvider.Addresses() { - // specs = append(specs, query.NewGRPCStoreSpec(addr, false)) - // } - - // return specs - // }, - // dialOpts, - // unhealthyStoreTimeout, - // ) proxy = store.NewProxyStore(logger, reg, endpoints.GetStoreClients, component.Query, selectorLset, storeResponseTimeout) rulesProxy = rules.NewProxy(logger, endpoints.GetRulesClients) targetsProxy = targets.NewProxy(logger, endpoints.GetTargetsClients) diff --git a/pkg/query/endpointset.go b/pkg/query/endpointset.go index b3eaf12412..402f7ecdbf 100644 --- a/pkg/query/endpointset.go +++ b/pkg/query/endpointset.go @@ -103,6 +103,7 @@ func (es *grpcEndpointSpec) getMetadataUsingStoreAPI(ctx context.Context, client MinTime: resp.MinTime, MaxTime: resp.MaxTime, }, + Rules: &infopb.RulesInfo{}, }, }, nil } diff --git a/pkg/query/storeset.go b/pkg/query/storeset.go deleted file mode 100644 index 7da536ef0a..0000000000 --- a/pkg/query/storeset.go +++ /dev/null @@ -1,731 +0,0 @@ -// Copyright (c) The Thanos Authors. -// Licensed under the Apache License 2.0. - -package query - -import ( - "context" - "fmt" - "math" - "sort" - "sync" - "time" - - "github.com/go-kit/kit/log" - "github.com/go-kit/kit/log/level" - "github.com/pkg/errors" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/prometheus/pkg/labels" - "github.com/thanos-io/thanos/pkg/exemplars/exemplarspb" - "google.golang.org/grpc" - - "github.com/thanos-io/thanos/pkg/component" - "github.com/thanos-io/thanos/pkg/metadata/metadatapb" - "github.com/thanos-io/thanos/pkg/rules/rulespb" - "github.com/thanos-io/thanos/pkg/runutil" - "github.com/thanos-io/thanos/pkg/store" - "github.com/thanos-io/thanos/pkg/store/labelpb" - "github.com/thanos-io/thanos/pkg/store/storepb" - "github.com/thanos-io/thanos/pkg/targets/targetspb" -) - -const ( - unhealthyStoreMessage = "removing store because it's unhealthy or does not exist" -) - -type StoreSpec interface { - // StoreAddrSpec Addr returns StoreAPI Address for the store spec. It is used as ID for store. - StoreAddrSpec - - // Metadata returns current labels, store type and min, max ranges for store. - // It can change for every call for this method. - // If metadata call fails we assume that store is no longer accessible and we should not use it. - // NOTE: It is implementation responsibility to retry until context timeout, but a caller responsibility to manage - // given store connection. - Metadata(ctx context.Context, client storepb.StoreClient) (labelSets []labels.Labels, mint int64, maxt int64, storeType component.StoreAPI, err error) - - // StrictStatic returns true if the StoreAPI has been statically defined and it is under a strict mode. - StrictStatic() bool -} - -type StoreAddrSpec interface { - // Addr returns Store address for the rules spec. It is used as its ID. - Addr() string -} - -type RuleSpec interface { - // StoreAddrSpec Addr returns StoreAPI Address for the store spec. It is used as ID for store. - StoreAddrSpec -} - -type TargetSpec interface { - // StoreAddrSpec Addr returns StoreAPI Address for the store spec. It is used as ID for store. - StoreAddrSpec -} - -type MetadataSpec interface { - // StoreAddrSpec Addr returns StoreAPI Address for the store spec. It is used as ID for store. - StoreAddrSpec -} - -type ExemplarSpec interface { - // StoreAddrSpec Addr returns StoreAPI Address for the store spec. It is used as ID for store. - StoreAddrSpec -} - -type StoreStatus struct { - Name string `json:"name"` - LastCheck time.Time `json:"lastCheck"` - LastError *stringError `json:"lastError"` - LabelSets []labels.Labels `json:"labelSets"` - StoreType component.StoreAPI `json:"-"` - MinTime int64 `json:"minTime"` - MaxTime int64 `json:"maxTime"` -} - -type grpcStoreSpec struct { - addr string - strictstatic bool -} - -// NewGRPCStoreSpec creates store pure gRPC spec. -// It uses Info gRPC call to get Metadata. -func NewGRPCStoreSpec(addr string, strictstatic bool) StoreSpec { - return &grpcStoreSpec{addr: addr, strictstatic: strictstatic} -} - -// StrictStatic returns true if the StoreAPI has been statically defined and it is under a strict mode. -func (s *grpcStoreSpec) StrictStatic() bool { - return s.strictstatic -} - -func (s *grpcStoreSpec) Addr() string { - // API addr should not change between state changes. - return s.addr -} - -// Metadata method for gRPC store API tries to reach host Info method until context timeout. If we are unable to get metadata after -// that time, we assume that the host is unhealthy and return error. -func (s *grpcStoreSpec) Metadata(ctx context.Context, client storepb.StoreClient) (labelSets []labels.Labels, mint, maxt int64, Type component.StoreAPI, err error) { - resp, err := client.Info(ctx, &storepb.InfoRequest{}, grpc.WaitForReady(true)) - if err != nil { - return nil, 0, 0, nil, errors.Wrapf(err, "fetching store info from %s", s.addr) - } - if len(resp.LabelSets) == 0 && len(resp.Labels) > 0 { - resp.LabelSets = []labelpb.ZLabelSet{{Labels: resp.Labels}} - } - - labelSets = make([]labels.Labels, 0, len(resp.LabelSets)) - for _, ls := range resp.LabelSets { - labelSets = append(labelSets, ls.PromLabels()) - } - return labelSets, resp.MinTime, resp.MaxTime, component.FromProto(resp.StoreType), nil -} - -// storeSetNodeCollector is a metric collector reporting the number of available storeAPIs for Querier. -// A Collector is required as we want atomic updates for all 'thanos_store_nodes_grpc_connections' series. -type storeSetNodeCollector struct { - mtx sync.Mutex - storeNodes map[component.StoreAPI]map[string]int - storePerExtLset map[string]int - - connectionsDesc *prometheus.Desc -} - -func newStoreSetNodeCollector() *storeSetNodeCollector { - return &storeSetNodeCollector{ - storeNodes: map[component.StoreAPI]map[string]int{}, - connectionsDesc: prometheus.NewDesc( - "thanos_store_nodes_grpc_connections", - "Number of gRPC connection to Store APIs. Opened connection means healthy store APIs available for Querier.", - []string{"external_labels", "store_type"}, nil, - ), - } -} - -func (c *storeSetNodeCollector) Update(nodes map[component.StoreAPI]map[string]int) { - storeNodes := make(map[component.StoreAPI]map[string]int, len(nodes)) - storePerExtLset := map[string]int{} - - for k, v := range nodes { - storeNodes[k] = make(map[string]int, len(v)) - for kk, vv := range v { - storePerExtLset[kk] += vv - storeNodes[k][kk] = vv - } - } - - c.mtx.Lock() - defer c.mtx.Unlock() - c.storeNodes = storeNodes - c.storePerExtLset = storePerExtLset -} - -func (c *storeSetNodeCollector) Describe(ch chan<- *prometheus.Desc) { - ch <- c.connectionsDesc -} - -func (c *storeSetNodeCollector) Collect(ch chan<- prometheus.Metric) { - c.mtx.Lock() - defer c.mtx.Unlock() - - for storeType, occurrencesPerExtLset := range c.storeNodes { - for externalLabels, occurrences := range occurrencesPerExtLset { - var storeTypeStr string - if storeType != nil { - storeTypeStr = storeType.String() - } - ch <- prometheus.MustNewConstMetric(c.connectionsDesc, prometheus.GaugeValue, float64(occurrences), externalLabels, storeTypeStr) - } - } -} - -// StoreSet maintains a set of active stores. It is backed up by Store Specifications that are dynamically fetched on -// every Update() call. -type StoreSet struct { - logger log.Logger - - // Store specifications can change dynamically. If some store is missing from the list, we assuming it is no longer - // accessible and we close gRPC client for it. - storeSpecs func() []StoreSpec - ruleSpecs func() []RuleSpec - targetSpecs func() []TargetSpec - metadataSpecs func() []MetadataSpec - exemplarSpecs func() []ExemplarSpec - dialOpts []grpc.DialOption - gRPCInfoCallTimeout time.Duration - - updateMtx sync.Mutex - storesMtx sync.RWMutex - storesStatusesMtx sync.RWMutex - - // Main map of stores currently used for fanout. - stores map[string]*storeRef - storesMetric *storeSetNodeCollector - - // Map of statuses used only by UI. - storeStatuses map[string]*StoreStatus - unhealthyStoreTimeout time.Duration -} - -// NewStoreSet returns a new set of store APIs and potentially Rules APIs from given specs. -func NewStoreSet( - logger log.Logger, - reg *prometheus.Registry, - storeSpecs func() []StoreSpec, - ruleSpecs func() []RuleSpec, - targetSpecs func() []TargetSpec, - metadataSpecs func() []MetadataSpec, - exemplarSpecs func() []ExemplarSpec, - dialOpts []grpc.DialOption, - unhealthyStoreTimeout time.Duration, -) *StoreSet { - storesMetric := newStoreSetNodeCollector() - if reg != nil { - reg.MustRegister(storesMetric) - } - - if logger == nil { - logger = log.NewNopLogger() - } - if storeSpecs == nil { - storeSpecs = func() []StoreSpec { return nil } - } - if ruleSpecs == nil { - ruleSpecs = func() []RuleSpec { return nil } - } - if targetSpecs == nil { - targetSpecs = func() []TargetSpec { return nil } - } - if metadataSpecs == nil { - metadataSpecs = func() []MetadataSpec { return nil } - } - if exemplarSpecs == nil { - exemplarSpecs = func() []ExemplarSpec { return nil } - } - - ss := &StoreSet{ - logger: log.With(logger, "component", "storeset"), - storeSpecs: storeSpecs, - ruleSpecs: ruleSpecs, - targetSpecs: targetSpecs, - metadataSpecs: metadataSpecs, - exemplarSpecs: exemplarSpecs, - dialOpts: dialOpts, - storesMetric: storesMetric, - gRPCInfoCallTimeout: 5 * time.Second, - stores: make(map[string]*storeRef), - storeStatuses: make(map[string]*StoreStatus), - unhealthyStoreTimeout: unhealthyStoreTimeout, - } - return ss -} - -// TODO(bwplotka): Consider moving storeRef out of this package and renaming it, as it also supports rules API. -type storeRef struct { - storepb.StoreClient - - mtx sync.RWMutex - cc *grpc.ClientConn - addr string - // If rule is not nil, then this store also supports rules API. - rule rulespb.RulesClient - metadata metadatapb.MetadataClient - - // If exemplar is not nil, then this store also support exemplars API. - exemplar exemplarspb.ExemplarsClient - - // If target is not nil, then this store also supports targets API. - target targetspb.TargetsClient - - // Meta (can change during runtime). - labelSets []labels.Labels - storeType component.StoreAPI - minTime int64 - maxTime int64 - - logger log.Logger -} - -func (s *storeRef) Update(labelSets []labels.Labels, minTime, maxTime int64, storeType component.StoreAPI, rule rulespb.RulesClient, target targetspb.TargetsClient, metadata metadatapb.MetadataClient, exemplar exemplarspb.ExemplarsClient) { - s.mtx.Lock() - defer s.mtx.Unlock() - - s.storeType = storeType - s.labelSets = labelSets - s.minTime = minTime - s.maxTime = maxTime - s.rule = rule - s.target = target - s.metadata = metadata - s.exemplar = exemplar -} - -func (s *storeRef) StoreType() component.StoreAPI { - s.mtx.RLock() - defer s.mtx.RUnlock() - - return s.storeType -} - -func (s *storeRef) HasRulesAPI() bool { - s.mtx.RLock() - defer s.mtx.RUnlock() - - return s.rule != nil -} - -func (s *storeRef) HasTargetsAPI() bool { - s.mtx.RLock() - defer s.mtx.RUnlock() - - return s.target != nil -} - -func (s *storeRef) HasMetadataAPI() bool { - s.mtx.RLock() - defer s.mtx.RUnlock() - - return s.metadata != nil -} - -func (s *storeRef) HasExemplarsAPI() bool { - s.mtx.RLock() - defer s.mtx.RUnlock() - - return s.exemplar != nil -} - -func (s *storeRef) LabelSets() []labels.Labels { - s.mtx.RLock() - defer s.mtx.RUnlock() - - labelSet := make([]labels.Labels, 0, len(s.labelSets)) - for _, ls := range s.labelSets { - if len(ls) == 0 { - continue - } - // Compatibility label for Queriers pre 0.8.1. Filter it out now. - if ls[0].Name == store.CompatibilityTypeLabelName { - continue - } - labelSet = append(labelSet, ls.Copy()) - } - return labelSet -} - -func (s *storeRef) TimeRange() (mint, maxt int64) { - s.mtx.RLock() - defer s.mtx.RUnlock() - - return s.minTime, s.maxTime -} - -func (s *storeRef) String() string { - mint, maxt := s.TimeRange() - return fmt.Sprintf("Addr: %s LabelSets: %v Mint: %d Maxt: %d", s.addr, labelpb.PromLabelSetsToString(s.LabelSets()), mint, maxt) -} - -func (s *storeRef) Addr() string { - return s.addr -} - -func (s *storeRef) Close() { - runutil.CloseWithLogOnErr(s.logger, s.cc, fmt.Sprintf("store %v connection close", s.addr)) -} - -func newStoreAPIStats() map[component.StoreAPI]map[string]int { - nodes := make(map[component.StoreAPI]map[string]int, len(storepb.StoreType_name)) - for i := range storepb.StoreType_name { - nodes[component.FromProto(storepb.StoreType(i))] = map[string]int{} - } - return nodes -} - -// Update updates the store set. It fetches current list of store specs from function and updates the fresh metadata -// from all stores. Keeps around statically defined nodes that were defined with the strict mode. -func (s *StoreSet) Update(ctx context.Context) { - s.updateMtx.Lock() - defer s.updateMtx.Unlock() - - s.storesMtx.RLock() - stores := make(map[string]*storeRef, len(s.stores)) - for addr, st := range s.stores { - stores[addr] = st - } - s.storesMtx.RUnlock() - - level.Debug(s.logger).Log("msg", "starting updating storeAPIs", "cachedStores", len(stores)) - - activeStores := s.getActiveStores(ctx, stores) - level.Debug(s.logger).Log("msg", "checked requested storeAPIs", "activeStores", len(activeStores), "cachedStores", len(stores)) - - stats := newStoreAPIStats() - - // Close stores that where not active this time (are not in active stores map). - for addr, st := range stores { - if _, ok := activeStores[addr]; ok { - stats[st.StoreType()][labelpb.PromLabelSetsToString(st.LabelSets())]++ - continue - } - - st.Close() - delete(stores, addr) - s.updateStoreStatus(st, errors.New(unhealthyStoreMessage)) - level.Info(s.logger).Log("msg", unhealthyStoreMessage, "address", addr, "extLset", labelpb.PromLabelSetsToString(st.LabelSets())) - } - - // Add stores that are not yet in stores. - for addr, st := range activeStores { - if _, ok := stores[addr]; ok { - continue - } - - extLset := labelpb.PromLabelSetsToString(st.LabelSets()) - - // All producers should have unique external labels. While this does not check only StoreAPIs connected to - // this querier this allows to notify early user about misconfiguration. Warn only. This is also detectable from metric. - if st.StoreType() != nil && - (st.StoreType() == component.Sidecar || st.StoreType() == component.Rule) && - stats[component.Sidecar][extLset]+stats[component.Rule][extLset] > 0 { - - level.Warn(s.logger).Log("msg", "found duplicate storeAPI producer (sidecar or ruler). This is not advices as it will malform data in in the same bucket", - "address", addr, "extLset", extLset, "duplicates", fmt.Sprintf("%v", stats[component.Sidecar][extLset]+stats[component.Rule][extLset]+1)) - } - stats[st.StoreType()][extLset]++ - - stores[addr] = st - s.updateStoreStatus(st, nil) - - if st.HasRulesAPI() { - level.Info(s.logger).Log("msg", "adding new rulesAPI to query storeset", "address", addr) - } - - if st.HasExemplarsAPI() { - level.Info(s.logger).Log("msg", "adding new exemplarsAPI to query storeset", "address", addr) - } - - if st.HasTargetsAPI() { - level.Info(s.logger).Log("msg", "adding new targetsAPI to query storeset", "address", addr) - } - - level.Info(s.logger).Log("msg", "adding new storeAPI to query storeset", "address", addr, "extLset", extLset) - } - - s.storesMetric.Update(stats) - s.storesMtx.Lock() - s.stores = stores - s.storesMtx.Unlock() - - s.cleanUpStoreStatuses(stores) -} - -func (s *StoreSet) getActiveStores(ctx context.Context, stores map[string]*storeRef) map[string]*storeRef { - var ( - // UNIQUE? - activeStores = make(map[string]*storeRef, len(stores)) - mtx sync.Mutex - wg sync.WaitGroup - - storeAddrSet = make(map[string]struct{}) - ruleAddrSet = make(map[string]struct{}) - targetAddrSet = make(map[string]struct{}) - metadataAddrSet = make(map[string]struct{}) - exemplarAddrSet = make(map[string]struct{}) - ) - - // Gather active stores map concurrently. Build new store if does not exist already. - for _, ruleSpec := range s.ruleSpecs() { - ruleAddrSet[ruleSpec.Addr()] = struct{}{} - } - - // Gather active targets map concurrently. Add a new target if it does not exist already. - for _, targetSpec := range s.targetSpecs() { - targetAddrSet[targetSpec.Addr()] = struct{}{} - } - - // Gather active stores map concurrently. Build new store if does not exist already. - for _, metadataSpec := range s.metadataSpecs() { - metadataAddrSet[metadataSpec.Addr()] = struct{}{} - } - - // Gather active stores map concurrently. Build new store if does not exist already. - for _, exemplarSpec := range s.exemplarSpecs() { - exemplarAddrSet[exemplarSpec.Addr()] = struct{}{} - } - - // Gather healthy stores map concurrently. Build new store if does not exist already. - for _, storeSpec := range s.storeSpecs() { - if _, ok := storeAddrSet[storeSpec.Addr()]; ok { - level.Warn(s.logger).Log("msg", "duplicated address in store nodes", "address", storeSpec.Addr()) - continue - } - storeAddrSet[storeSpec.Addr()] = struct{}{} - - wg.Add(1) - go func(spec StoreSpec) { - defer wg.Done() - - addr := spec.Addr() - - ctx, cancel := context.WithTimeout(ctx, s.gRPCInfoCallTimeout) - defer cancel() - - st, seenAlready := stores[addr] - if !seenAlready { - // New store or was unactive and was removed in the past - create new one. - conn, err := grpc.DialContext(ctx, addr, s.dialOpts...) - if err != nil { - s.updateStoreStatus(&storeRef{addr: addr}, err) - level.Warn(s.logger).Log("msg", "update of store node failed", "err", errors.Wrap(err, "dialing connection"), "address", addr) - return - } - - st = &storeRef{StoreClient: storepb.NewStoreClient(conn), storeType: component.UnknownStoreAPI, cc: conn, addr: addr, logger: s.logger} - if spec.StrictStatic() { - st.maxTime = math.MaxInt64 - } - } - - var rule rulespb.RulesClient - if _, ok := ruleAddrSet[addr]; ok { - rule = rulespb.NewRulesClient(st.cc) - } - - var target targetspb.TargetsClient - if _, ok := targetAddrSet[addr]; ok { - target = targetspb.NewTargetsClient(st.cc) - } - - var metadata metadatapb.MetadataClient - if _, ok := metadataAddrSet[addr]; ok { - metadata = metadatapb.NewMetadataClient(st.cc) - } - - var exemplar exemplarspb.ExemplarsClient - if _, ok := exemplarAddrSet[addr]; ok { - exemplar = exemplarspb.NewExemplarsClient(st.cc) - } - - // Check existing or new store. Is it healthy? What are current metadata? - labelSets, minTime, maxTime, storeType, err := spec.Metadata(ctx, st.StoreClient) - if err != nil { - if !seenAlready && !spec.StrictStatic() { - // Close only if new and not a strict static node. - // Unactive `s.stores` will be closed later on. - st.Close() - } - s.updateStoreStatus(st, err) - level.Warn(s.logger).Log("msg", "update of store node failed", "err", errors.Wrap(err, "getting metadata"), "address", addr) - - if !spec.StrictStatic() { - return - } - - // Still keep it around if static & strict mode enabled. - mtx.Lock() - defer mtx.Unlock() - - activeStores[addr] = st - return - } - - s.updateStoreStatus(st, nil) - st.Update(labelSets, minTime, maxTime, storeType, rule, target, metadata, exemplar) - - mtx.Lock() - defer mtx.Unlock() - - activeStores[addr] = st - }(storeSpec) - } - wg.Wait() - - for ruleAddr := range ruleAddrSet { - if _, ok := storeAddrSet[ruleAddr]; !ok { - level.Warn(s.logger).Log("msg", "ignored rule store", "address", ruleAddr) - } - } - return activeStores -} - -func (s *StoreSet) updateStoreStatus(store *storeRef, err error) { - s.storesStatusesMtx.Lock() - defer s.storesStatusesMtx.Unlock() - - status := StoreStatus{Name: store.addr} - prev, ok := s.storeStatuses[store.addr] - if ok { - status = *prev - } else { - mint, maxt := store.TimeRange() - status.MinTime = mint - status.MaxTime = maxt - } - - if err == nil { - status.LastCheck = time.Now() - mint, maxt := store.TimeRange() - status.LabelSets = store.LabelSets() - status.StoreType = store.StoreType() - status.MinTime = mint - status.MaxTime = maxt - status.LastError = nil - } else { - status.LastError = &stringError{originalErr: err} - } - - s.storeStatuses[store.addr] = &status -} - -func (s *StoreSet) GetStoreStatus() []StoreStatus { - s.storesStatusesMtx.RLock() - defer s.storesStatusesMtx.RUnlock() - - statuses := make([]StoreStatus, 0, len(s.storeStatuses)) - for _, v := range s.storeStatuses { - statuses = append(statuses, *v) - } - - sort.Slice(statuses, func(i, j int) bool { - return statuses[i].Name < statuses[j].Name - }) - return statuses -} - -// Get returns a list of all active stores. -func (s *StoreSet) Get() []store.Client { - s.storesMtx.RLock() - defer s.storesMtx.RUnlock() - - stores := make([]store.Client, 0, len(s.stores)) - for _, st := range s.stores { - stores = append(stores, st) - } - return stores -} - -// GetRulesClients returns a list of all active rules clients. -func (s *StoreSet) GetRulesClients() []rulespb.RulesClient { - s.storesMtx.RLock() - defer s.storesMtx.RUnlock() - - rules := make([]rulespb.RulesClient, 0, len(s.stores)) - for _, st := range s.stores { - if st.HasRulesAPI() { - rules = append(rules, st.rule) - } - } - return rules -} - -// GetTargetsClients returns a list of all active targets clients. -func (s *StoreSet) GetTargetsClients() []targetspb.TargetsClient { - s.storesMtx.RLock() - defer s.storesMtx.RUnlock() - - targets := make([]targetspb.TargetsClient, 0, len(s.stores)) - for _, st := range s.stores { - if st.HasTargetsAPI() { - targets = append(targets, st.target) - } - } - return targets -} - -// GetMetadataClients returns a list of all active metadata clients. -func (s *StoreSet) GetMetadataClients() []metadatapb.MetadataClient { - s.storesMtx.RLock() - defer s.storesMtx.RUnlock() - - metadataClients := make([]metadatapb.MetadataClient, 0, len(s.stores)) - for _, st := range s.stores { - if st.HasMetadataAPI() { - metadataClients = append(metadataClients, st.metadata) - } - } - return metadataClients -} - -// GetExemplarsStores returns a list of all active exemplars stores. -func (s *StoreSet) GetExemplarsStores() []*exemplarspb.ExemplarStore { - s.storesMtx.RLock() - defer s.storesMtx.RUnlock() - - exemplarStores := make([]*exemplarspb.ExemplarStore, 0, len(s.stores)) - for _, st := range s.stores { - if st.HasExemplarsAPI() { - exemplarStores = append(exemplarStores, &exemplarspb.ExemplarStore{ - ExemplarsClient: st.exemplar, - LabelSets: st.labelSets, - }) - } - } - return exemplarStores -} - -func (s *StoreSet) Close() { - s.storesMtx.Lock() - defer s.storesMtx.Unlock() - - for _, st := range s.stores { - st.Close() - } - s.stores = map[string]*storeRef{} -} - -func (s *StoreSet) cleanUpStoreStatuses(stores map[string]*storeRef) { - s.storesStatusesMtx.Lock() - defer s.storesStatusesMtx.Unlock() - - now := time.Now() - for addr, status := range s.storeStatuses { - if _, ok := stores[addr]; ok { - continue - } - - if now.Sub(status.LastCheck) >= s.unhealthyStoreTimeout { - delete(s.storeStatuses, addr) - } - } -} diff --git a/pkg/query/storeset_test.go b/pkg/query/storeset_test.go deleted file mode 100644 index bf3e2fe8ea..0000000000 --- a/pkg/query/storeset_test.go +++ /dev/null @@ -1,1076 +0,0 @@ -// Copyright (c) The Thanos Authors. -// Licensed under the Apache License 2.0. - -package query - -import ( - "context" - "encoding/json" - "fmt" - "math" - "net" - "testing" - "time" - - "github.com/pkg/errors" - "google.golang.org/grpc" - "google.golang.org/grpc/codes" - "google.golang.org/grpc/status" - - "github.com/thanos-io/thanos/pkg/component" - "github.com/thanos-io/thanos/pkg/store" - "github.com/thanos-io/thanos/pkg/store/labelpb" - "github.com/thanos-io/thanos/pkg/store/storepb" - "github.com/thanos-io/thanos/pkg/testutil" -) - -type mockedStore struct { - infoDelay time.Duration - info storepb.InfoResponse -} - -func (s *mockedStore) Info(ctx context.Context, r *storepb.InfoRequest) (*storepb.InfoResponse, error) { - if s.infoDelay > 0 { - time.Sleep(s.infoDelay) - } - return &s.info, nil -} - -func (s *mockedStore) Series(r *storepb.SeriesRequest, srv storepb.Store_SeriesServer) error { - return status.Error(codes.Unimplemented, "not implemented") -} - -func (s *mockedStore) LabelNames(ctx context.Context, r *storepb.LabelNamesRequest) ( - *storepb.LabelNamesResponse, error, -) { - return nil, status.Error(codes.Unimplemented, "not implemented") -} - -func (s *mockedStore) LabelValues(ctx context.Context, r *storepb.LabelValuesRequest) ( - *storepb.LabelValuesResponse, error, -) { - return nil, status.Error(codes.Unimplemented, "not implemented") -} - -type testStoreMeta struct { - extlsetFn func(addr string) []labelpb.ZLabelSet - storeType component.StoreAPI - minTime, maxTime int64 - infoDelay time.Duration -} - -type testStores struct { - srvs map[string]*grpc.Server - orderAddrs []string -} - -func startTestStores(storeMetas []testStoreMeta) (*testStores, error) { - st := &testStores{ - srvs: map[string]*grpc.Server{}, - } - - for _, meta := range storeMetas { - listener, err := net.Listen("tcp", "127.0.0.1:0") - if err != nil { - // Close so far started servers. - st.Close() - return nil, err - } - - srv := grpc.NewServer() - - storeSrv := &mockedStore{ - info: storepb.InfoResponse{ - LabelSets: meta.extlsetFn(listener.Addr().String()), - MaxTime: meta.maxTime, - MinTime: meta.minTime, - }, - infoDelay: meta.infoDelay, - } - if meta.storeType != nil { - storeSrv.info.StoreType = meta.storeType.ToProto() - } - storepb.RegisterStoreServer(srv, storeSrv) - go func() { - _ = srv.Serve(listener) - }() - - st.srvs[listener.Addr().String()] = srv - st.orderAddrs = append(st.orderAddrs, listener.Addr().String()) - } - - return st, nil -} - -func (s *testStores) StoreAddresses() []string { - var stores []string - stores = append(stores, s.orderAddrs...) - return stores -} - -func (s *testStores) Close() { - for _, srv := range s.srvs { - srv.Stop() - } - s.srvs = nil -} - -func (s *testStores) CloseOne(addr string) { - srv, ok := s.srvs[addr] - if !ok { - return - } - - srv.Stop() - delete(s.srvs, addr) -} - -func TestStoreSet_Update(t *testing.T) { - stores, err := startTestStores([]testStoreMeta{ - { - storeType: component.Sidecar, - extlsetFn: func(addr string) []labelpb.ZLabelSet { - return []labelpb.ZLabelSet{ - { - Labels: []labelpb.ZLabel{ - {Name: "addr", Value: addr}, - }, - }, - { - Labels: []labelpb.ZLabel{ - {Name: "a", Value: "b"}, - }, - }, - } - }, - }, - { - storeType: component.Sidecar, - extlsetFn: func(addr string) []labelpb.ZLabelSet { - return []labelpb.ZLabelSet{ - { - Labels: []labelpb.ZLabel{ - {Name: "addr", Value: addr}, - }, - }, - { - Labels: []labelpb.ZLabel{ - {Name: "a", Value: "b"}, - }, - }, - } - }, - }, - { - storeType: component.Query, - extlsetFn: func(addr string) []labelpb.ZLabelSet { - return []labelpb.ZLabelSet{ - { - Labels: []labelpb.ZLabel{ - {Name: "a", Value: "broken"}, - }, - }, - } - }, - }, - }) - testutil.Ok(t, err) - defer stores.Close() - - discoveredStoreAddr := stores.StoreAddresses() - - // Testing if duplicates can cause weird results. - discoveredStoreAddr = append(discoveredStoreAddr, discoveredStoreAddr[0]) - storeSet := NewStoreSet(nil, nil, - func() (specs []StoreSpec) { - for _, addr := range discoveredStoreAddr { - specs = append(specs, NewGRPCStoreSpec(addr, false)) - } - return specs - }, - func() (specs []RuleSpec) { - return nil - }, - func() (specs []TargetSpec) { - return nil - }, - func() (specs []MetadataSpec) { - return nil - }, - func() (specs []ExemplarSpec) { - return nil - }, - testGRPCOpts, time.Minute) - storeSet.gRPCInfoCallTimeout = 2 * time.Second - defer storeSet.Close() - - // Initial update. - storeSet.Update(context.Background()) - - // Start with one not available. - stores.CloseOne(discoveredStoreAddr[2]) - - // Should not matter how many of these we run. - storeSet.Update(context.Background()) - storeSet.Update(context.Background()) - testutil.Equals(t, 2, len(storeSet.stores)) - testutil.Equals(t, 3, len(storeSet.storeStatuses)) - - for addr, st := range storeSet.stores { - testutil.Equals(t, addr, st.addr) - - lset := st.LabelSets() - testutil.Equals(t, 2, len(lset)) - testutil.Equals(t, "addr", lset[0][0].Name) - testutil.Equals(t, addr, lset[0][0].Value) - testutil.Equals(t, "a", lset[1][0].Name) - testutil.Equals(t, "b", lset[1][0].Value) - } - - // Check stats. - expected := newStoreAPIStats() - expected[component.Sidecar] = map[string]int{ - fmt.Sprintf(`{a="b"},{addr="%s"}`, discoveredStoreAddr[0]): 1, - fmt.Sprintf(`{a="b"},{addr="%s"}`, discoveredStoreAddr[1]): 1, - } - testutil.Equals(t, expected, storeSet.storesMetric.storeNodes) - - // Remove address from discovered and reset last check, which should ensure cleanup of status on next update. - storeSet.storeStatuses[discoveredStoreAddr[2]].LastCheck = time.Now().Add(-4 * time.Minute) - discoveredStoreAddr = discoveredStoreAddr[:len(discoveredStoreAddr)-2] - storeSet.Update(context.Background()) - testutil.Equals(t, 2, len(storeSet.storeStatuses)) - - stores.CloseOne(discoveredStoreAddr[0]) - delete(expected[component.Sidecar], fmt.Sprintf(`{a="b"},{addr="%s"}`, discoveredStoreAddr[0])) - - // We expect Update to tear down store client for closed store server. - storeSet.Update(context.Background()) - testutil.Equals(t, 1, len(storeSet.stores), "only one service should respond just fine, so we expect one client to be ready.") - testutil.Equals(t, 2, len(storeSet.storeStatuses)) - - addr := discoveredStoreAddr[1] - st, ok := storeSet.stores[addr] - testutil.Assert(t, ok, "addr exist") - testutil.Equals(t, addr, st.addr) - - lset := st.LabelSets() - testutil.Equals(t, 2, len(lset)) - testutil.Equals(t, "addr", lset[0][0].Name) - testutil.Equals(t, addr, lset[0][0].Value) - testutil.Equals(t, "a", lset[1][0].Name) - testutil.Equals(t, "b", lset[1][0].Value) - testutil.Equals(t, expected, storeSet.storesMetric.storeNodes) - - // New big batch of storeAPIs. - stores2, err := startTestStores([]testStoreMeta{ - { - storeType: component.Query, - extlsetFn: func(addr string) []labelpb.ZLabelSet { - return []labelpb.ZLabelSet{ - { - Labels: []labelpb.ZLabel{ - {Name: "l1", Value: "v2"}, - {Name: "l2", Value: "v3"}, - }, - }, - { - Labels: []labelpb.ZLabel{ - {Name: "l3", Value: "v4"}, - }, - }, - } - }, - }, - { - // Duplicated Querier, in previous versions it would be deduplicated. Now it should be not. - storeType: component.Query, - extlsetFn: func(addr string) []labelpb.ZLabelSet { - return []labelpb.ZLabelSet{ - { - Labels: []labelpb.ZLabel{ - {Name: "l1", Value: "v2"}, - {Name: "l2", Value: "v3"}, - }, - }, - { - Labels: []labelpb.ZLabel{ - {Name: "l3", Value: "v4"}, - }, - }, - } - }, - }, - { - storeType: component.Sidecar, - extlsetFn: func(addr string) []labelpb.ZLabelSet { - return []labelpb.ZLabelSet{ - { - Labels: []labelpb.ZLabel{ - {Name: "l1", Value: "v2"}, - {Name: "l2", Value: "v3"}, - }, - }, - } - }, - }, - { - // Duplicated Sidecar, in previous versions it would be deduplicated. Now it should be not. - storeType: component.Sidecar, - extlsetFn: func(addr string) []labelpb.ZLabelSet { - return []labelpb.ZLabelSet{ - { - Labels: []labelpb.ZLabel{ - {Name: "l1", Value: "v2"}, - {Name: "l2", Value: "v3"}, - }, - }, - } - }, - }, - { - // Querier that duplicates with sidecar, in previous versions it would be deduplicated. Now it should be not. - storeType: component.Query, - extlsetFn: func(addr string) []labelpb.ZLabelSet { - return []labelpb.ZLabelSet{ - { - Labels: []labelpb.ZLabel{ - {Name: "l1", Value: "v2"}, - {Name: "l2", Value: "v3"}, - }, - }, - } - }, - }, - { - // Ruler that duplicates with sidecar, in previous versions it would be deduplicated. Now it should be not. - // Warning should be produced. - storeType: component.Rule, - extlsetFn: func(addr string) []labelpb.ZLabelSet { - return []labelpb.ZLabelSet{ - { - Labels: []labelpb.ZLabel{ - {Name: "l1", Value: "v2"}, - {Name: "l2", Value: "v3"}, - }, - }, - } - }, - }, - { - // Duplicated Rule, in previous versions it would be deduplicated. Now it should be not. Warning should be produced. - storeType: component.Rule, - extlsetFn: func(addr string) []labelpb.ZLabelSet { - return []labelpb.ZLabelSet{ - { - Labels: []labelpb.ZLabel{ - {Name: "l1", Value: "v2"}, - {Name: "l2", Value: "v3"}, - }, - }, - } - }, - }, - { - // No storeType. - extlsetFn: func(addr string) []labelpb.ZLabelSet { - return []labelpb.ZLabelSet{ - { - Labels: []labelpb.ZLabel{ - {Name: "l1", Value: "no-store-type"}, - {Name: "l2", Value: "v3"}, - }, - }, - } - }, - }, - // Two pre v0.8.0 store gateway nodes, they don't have ext labels set. - { - storeType: component.Store, - extlsetFn: func(addr string) []labelpb.ZLabelSet { - return []labelpb.ZLabelSet{} - }, - }, - { - storeType: component.Store, - extlsetFn: func(addr string) []labelpb.ZLabelSet { - return []labelpb.ZLabelSet{} - }, - }, - // Regression tests against https://github.com/thanos-io/thanos/issues/1632: From v0.8.0 stores advertise labels. - // If the object storage handled by store gateway has only one sidecar we used to hitting issue. - { - storeType: component.Store, - extlsetFn: func(addr string) []labelpb.ZLabelSet { - return []labelpb.ZLabelSet{ - { - Labels: []labelpb.ZLabel{ - {Name: "l1", Value: "v2"}, - {Name: "l2", Value: "v3"}, - }, - }, - { - Labels: []labelpb.ZLabel{ - {Name: "l3", Value: "v4"}, - }, - }, - } - }, - }, - // Stores v0.8.1 has compatibility labels. Check if they are correctly removed. - { - storeType: component.Store, - extlsetFn: func(addr string) []labelpb.ZLabelSet { - return []labelpb.ZLabelSet{ - { - Labels: []labelpb.ZLabel{ - {Name: "l1", Value: "v2"}, - {Name: "l2", Value: "v3"}, - }, - }, - { - Labels: []labelpb.ZLabel{ - {Name: "l3", Value: "v4"}, - }, - }, - { - Labels: []labelpb.ZLabel{ - {Name: store.CompatibilityTypeLabelName, Value: "store"}, - }, - }, - } - }, - }, - // Duplicated store, in previous versions it would be deduplicated. Now it should be not. - { - storeType: component.Store, - extlsetFn: func(addr string) []labelpb.ZLabelSet { - return []labelpb.ZLabelSet{ - { - Labels: []labelpb.ZLabel{ - {Name: "l1", Value: "v2"}, - {Name: "l2", Value: "v3"}, - }, - }, - { - Labels: []labelpb.ZLabel{ - {Name: "l3", Value: "v4"}, - }, - }, - { - Labels: []labelpb.ZLabel{ - {Name: store.CompatibilityTypeLabelName, Value: "store"}, - }, - }, - } - }, - }, - }) - testutil.Ok(t, err) - defer stores2.Close() - - discoveredStoreAddr = append(discoveredStoreAddr, stores2.StoreAddresses()...) - - // New stores should be loaded. - storeSet.Update(context.Background()) - testutil.Equals(t, 1+len(stores2.srvs), len(storeSet.stores)) - - // Check stats. - expected = newStoreAPIStats() - expected[component.UnknownStoreAPI] = map[string]int{ - `{l1="no-store-type", l2="v3"}`: 1, - } - expected[component.Query] = map[string]int{ - `{l1="v2", l2="v3"}`: 1, - `{l1="v2", l2="v3"},{l3="v4"}`: 2, - } - expected[component.Rule] = map[string]int{ - `{l1="v2", l2="v3"}`: 2, - } - expected[component.Sidecar] = map[string]int{ - fmt.Sprintf(`{a="b"},{addr="%s"}`, discoveredStoreAddr[1]): 1, - `{l1="v2", l2="v3"}`: 2, - } - expected[component.Store] = map[string]int{ - ``: 2, - `{l1="v2", l2="v3"},{l3="v4"}`: 3, - } - testutil.Equals(t, expected, storeSet.storesMetric.storeNodes) - - // Check statuses. - testutil.Equals(t, 2+len(stores2.srvs), len(storeSet.storeStatuses)) -} - -func TestStoreSet_Update_NoneAvailable(t *testing.T) { - st, err := startTestStores([]testStoreMeta{ - { - extlsetFn: func(addr string) []labelpb.ZLabelSet { - return []labelpb.ZLabelSet{ - { - Labels: []labelpb.ZLabel{ - { - Name: "addr", - Value: addr, - }, - }, - }, - } - }, - storeType: component.Sidecar, - }, - { - extlsetFn: func(addr string) []labelpb.ZLabelSet { - return []labelpb.ZLabelSet{ - { - Labels: []labelpb.ZLabel{ - { - Name: "addr", - Value: addr, - }, - }, - }, - } - }, - storeType: component.Sidecar, - }, - }) - testutil.Ok(t, err) - defer st.Close() - - initialStoreAddr := st.StoreAddresses() - st.CloseOne(initialStoreAddr[0]) - st.CloseOne(initialStoreAddr[1]) - - storeSet := NewStoreSet(nil, nil, - func() (specs []StoreSpec) { - for _, addr := range initialStoreAddr { - specs = append(specs, NewGRPCStoreSpec(addr, false)) - } - return specs - }, - func() (specs []RuleSpec) { return nil }, - func() (specs []TargetSpec) { return nil }, - func() (specs []MetadataSpec) { return nil }, - func() (specs []ExemplarSpec) { return nil }, - testGRPCOpts, time.Minute) - storeSet.gRPCInfoCallTimeout = 2 * time.Second - - // Should not matter how many of these we run. - storeSet.Update(context.Background()) - storeSet.Update(context.Background()) - testutil.Equals(t, 0, len(storeSet.stores), "none of services should respond just fine, so we expect no client to be ready.") - - // Leak test will ensure that we don't keep client connection around. - - expected := newStoreAPIStats() - testutil.Equals(t, expected, storeSet.storesMetric.storeNodes) -} - -// TestQuerierStrict tests what happens when the strict mode is enabled/disabled. -func TestQuerierStrict(t *testing.T) { - st, err := startTestStores([]testStoreMeta{ - { - minTime: 12345, - maxTime: 54321, - extlsetFn: func(addr string) []labelpb.ZLabelSet { - return []labelpb.ZLabelSet{ - { - Labels: []labelpb.ZLabel{ - { - Name: "addr", - Value: addr, - }, - }, - }, - } - }, - storeType: component.Sidecar, - }, - { - minTime: 66666, - maxTime: 77777, - extlsetFn: func(addr string) []labelpb.ZLabelSet { - return []labelpb.ZLabelSet{ - { - Labels: []labelpb.ZLabel{ - { - Name: "addr", - Value: addr, - }, - }, - }, - } - }, - storeType: component.Sidecar, - }, - // Slow store. - { - minTime: 65644, - maxTime: 77777, - extlsetFn: func(addr string) []labelpb.ZLabelSet { - return []labelpb.ZLabelSet{ - { - Labels: []labelpb.ZLabel{ - { - Name: "addr", - Value: addr, - }, - }, - }, - } - }, - storeType: component.Sidecar, - infoDelay: 2 * time.Second, - }, - }) - - testutil.Ok(t, err) - defer st.Close() - - staticStoreAddr := st.StoreAddresses()[0] - slowStaticStoreAddr := st.StoreAddresses()[2] - storeSet := NewStoreSet(nil, nil, func() (specs []StoreSpec) { - return []StoreSpec{ - NewGRPCStoreSpec(st.StoreAddresses()[0], true), - NewGRPCStoreSpec(st.StoreAddresses()[1], false), - NewGRPCStoreSpec(st.StoreAddresses()[2], true), - } - }, func() []RuleSpec { - return nil - }, func() []TargetSpec { - return nil - }, func() (specs []MetadataSpec) { - return nil - }, func() []ExemplarSpec { - return nil - }, testGRPCOpts, time.Minute) - defer storeSet.Close() - storeSet.gRPCInfoCallTimeout = 1 * time.Second - - // Initial update. - storeSet.Update(context.Background()) - testutil.Equals(t, 3, len(storeSet.stores), "three clients must be available for running store nodes") - - // The store has not responded to the info call and is assumed to cover everything. - curMin, curMax := storeSet.stores[slowStaticStoreAddr].minTime, storeSet.stores[slowStaticStoreAddr].maxTime - testutil.Assert(t, storeSet.stores[slowStaticStoreAddr].cc.GetState().String() != "SHUTDOWN", "slow store's connection should not be closed") - testutil.Equals(t, int64(0), curMin) - testutil.Equals(t, int64(math.MaxInt64), curMax) - - // The store is statically defined + strict mode is enabled - // so its client + information must be retained. - curMin, curMax = storeSet.stores[staticStoreAddr].minTime, storeSet.stores[staticStoreAddr].maxTime - testutil.Equals(t, int64(12345), curMin, "got incorrect minimum time") - testutil.Equals(t, int64(54321), curMax, "got incorrect minimum time") - - // Successfully retrieve the information and observe minTime/maxTime updating. - storeSet.gRPCInfoCallTimeout = 3 * time.Second - storeSet.Update(context.Background()) - updatedCurMin, updatedCurMax := storeSet.stores[slowStaticStoreAddr].minTime, storeSet.stores[slowStaticStoreAddr].maxTime - testutil.Equals(t, int64(65644), updatedCurMin) - testutil.Equals(t, int64(77777), updatedCurMax) - storeSet.gRPCInfoCallTimeout = 1 * time.Second - - // Turn off the stores. - st.Close() - - // Update again many times. Should not matter WRT the static one. - storeSet.Update(context.Background()) - storeSet.Update(context.Background()) - storeSet.Update(context.Background()) - - // Check that the information is the same. - testutil.Equals(t, 2, len(storeSet.stores), "two static clients must remain available") - testutil.Equals(t, curMin, storeSet.stores[staticStoreAddr].minTime, "minimum time reported by the store node is different") - testutil.Equals(t, curMax, storeSet.stores[staticStoreAddr].maxTime, "minimum time reported by the store node is different") - testutil.NotOk(t, storeSet.storeStatuses[staticStoreAddr].LastError.originalErr) - - testutil.Equals(t, updatedCurMin, storeSet.stores[slowStaticStoreAddr].minTime, "minimum time reported by the store node is different") - testutil.Equals(t, updatedCurMax, storeSet.stores[slowStaticStoreAddr].maxTime, "minimum time reported by the store node is different") -} - -func TestStoreSet_Update_Rules(t *testing.T) { - stores, err := startTestStores([]testStoreMeta{ - { - extlsetFn: func(addr string) []labelpb.ZLabelSet { - return []labelpb.ZLabelSet{} - }, - storeType: component.Sidecar, - }, - { - extlsetFn: func(addr string) []labelpb.ZLabelSet { - return []labelpb.ZLabelSet{} - }, - storeType: component.Rule, - }, - }) - testutil.Ok(t, err) - defer stores.Close() - - for _, tc := range []struct { - name string - storeSpecs func() []StoreSpec - ruleSpecs func() []RuleSpec - exemplarSpecs func() []ExemplarSpec - expectedStores int - expectedRules int - }{ - { - name: "stores, no rules", - storeSpecs: func() []StoreSpec { - return []StoreSpec{ - NewGRPCStoreSpec(stores.orderAddrs[0], false), - NewGRPCStoreSpec(stores.orderAddrs[1], false), - } - }, - expectedStores: 2, - expectedRules: 0, - }, - { - name: "rules, no stores", - ruleSpecs: func() []RuleSpec { - return []RuleSpec{ - NewGRPCStoreSpec(stores.orderAddrs[0], false), - } - }, - expectedStores: 0, - expectedRules: 0, - }, - { - name: "one store, different rule", - storeSpecs: func() []StoreSpec { - return []StoreSpec{ - NewGRPCStoreSpec(stores.orderAddrs[0], false), - } - }, - ruleSpecs: func() []RuleSpec { - return []RuleSpec{ - NewGRPCStoreSpec(stores.orderAddrs[1], false), - } - }, - expectedStores: 1, - expectedRules: 0, - }, - { - name: "two stores, one rule", - storeSpecs: func() []StoreSpec { - return []StoreSpec{ - NewGRPCStoreSpec(stores.orderAddrs[0], false), - NewGRPCStoreSpec(stores.orderAddrs[1], false), - } - }, - ruleSpecs: func() []RuleSpec { - return []RuleSpec{ - NewGRPCStoreSpec(stores.orderAddrs[0], false), - } - }, - expectedStores: 2, - expectedRules: 1, - }, - { - name: "two stores, two rules", - storeSpecs: func() []StoreSpec { - return []StoreSpec{ - NewGRPCStoreSpec(stores.orderAddrs[0], false), - NewGRPCStoreSpec(stores.orderAddrs[1], false), - } - }, - ruleSpecs: func() []RuleSpec { - return []RuleSpec{ - NewGRPCStoreSpec(stores.orderAddrs[0], false), - NewGRPCStoreSpec(stores.orderAddrs[1], false), - } - }, - exemplarSpecs: func() []ExemplarSpec { - return []ExemplarSpec{ - NewGRPCStoreSpec(stores.orderAddrs[0], false), - NewGRPCStoreSpec(stores.orderAddrs[1], false), - } - }, - expectedStores: 2, - expectedRules: 2, - }, - } { - storeSet := NewStoreSet(nil, nil, - tc.storeSpecs, - tc.ruleSpecs, - func() []TargetSpec { return nil }, - func() []MetadataSpec { return nil }, - tc.exemplarSpecs, - testGRPCOpts, time.Minute) - - t.Run(tc.name, func(t *testing.T) { - defer storeSet.Close() - storeSet.Update(context.Background()) - testutil.Equals(t, tc.expectedStores, len(storeSet.stores)) - - gotRules := 0 - for _, ref := range storeSet.stores { - if ref.HasRulesAPI() { - gotRules += 1 - } - } - - testutil.Equals(t, tc.expectedRules, gotRules) - }) - } -} - -func TestStoreSet_Rules_Discovery(t *testing.T) { - stores, err := startTestStores([]testStoreMeta{ - { - extlsetFn: func(addr string) []labelpb.ZLabelSet { - return []labelpb.ZLabelSet{} - }, - storeType: component.Sidecar, - }, - { - extlsetFn: func(addr string) []labelpb.ZLabelSet { - return []labelpb.ZLabelSet{} - }, - storeType: component.Rule, - }, - }) - testutil.Ok(t, err) - defer stores.Close() - - type discoveryState struct { - name string - storeSpecs func() []StoreSpec - ruleSpecs func() []RuleSpec - expectedStores int - expectedRules int - } - - for _, tc := range []struct { - states []discoveryState - name string - }{ - { - name: "StoreAPI and RulesAPI concurrent discovery", - states: []discoveryState{ - { - name: "no stores", - storeSpecs: nil, - ruleSpecs: nil, - expectedRules: 0, - expectedStores: 0, - }, - { - name: "RulesAPI discovered", - storeSpecs: func() []StoreSpec { - return []StoreSpec{ - NewGRPCStoreSpec(stores.orderAddrs[0], false), - } - }, - ruleSpecs: func() []RuleSpec { - return []RuleSpec{ - NewGRPCStoreSpec(stores.orderAddrs[0], false), - } - }, - expectedRules: 1, - expectedStores: 1, - }, - }, - }, - - { - name: "StoreAPI discovery first, eventually discovered RulesAPI", - states: []discoveryState{ - { - name: "no stores", - storeSpecs: nil, - ruleSpecs: nil, - expectedRules: 0, - expectedStores: 0, - }, - { - name: "StoreAPI discovered, no RulesAPI discovered", - storeSpecs: func() []StoreSpec { - return []StoreSpec{ - NewGRPCStoreSpec(stores.orderAddrs[0], false), - } - }, - expectedStores: 1, - expectedRules: 0, - }, - { - name: "RulesAPI discovered", - storeSpecs: func() []StoreSpec { - return []StoreSpec{ - NewGRPCStoreSpec(stores.orderAddrs[0], false), - } - }, - ruleSpecs: func() []RuleSpec { - return []RuleSpec{ - NewGRPCStoreSpec(stores.orderAddrs[0], false), - } - }, - expectedStores: 1, - expectedRules: 1, - }, - }, - }, - - { - name: "RulesAPI discovery first, eventually discovered StoreAPI", - states: []discoveryState{ - { - name: "no stores", - storeSpecs: nil, - ruleSpecs: nil, - expectedRules: 0, - expectedStores: 0, - }, - { - name: "RulesAPI discovered, no StoreAPI discovered", - storeSpecs: nil, - ruleSpecs: func() []RuleSpec { - return []RuleSpec{ - NewGRPCStoreSpec(stores.orderAddrs[0], false), - } - }, - expectedStores: 0, - expectedRules: 0, - }, - { - name: "StoreAPI discovered", - storeSpecs: func() []StoreSpec { - return []StoreSpec{ - NewGRPCStoreSpec(stores.orderAddrs[0], false), - } - }, - ruleSpecs: func() []RuleSpec { - return []RuleSpec{ - NewGRPCStoreSpec(stores.orderAddrs[0], false), - } - }, - expectedStores: 1, - expectedRules: 1, - }, - }, - }, - } { - t.Run(tc.name, func(t *testing.T) { - currentState := 0 - - storeSet := NewStoreSet(nil, nil, - func() []StoreSpec { - if tc.states[currentState].storeSpecs == nil { - return nil - } - - return tc.states[currentState].storeSpecs() - }, - func() []RuleSpec { - if tc.states[currentState].ruleSpecs == nil { - return nil - } - - return tc.states[currentState].ruleSpecs() - }, - func() []TargetSpec { return nil }, - func() []MetadataSpec { - return nil - }, - func() []ExemplarSpec { return nil }, - testGRPCOpts, time.Minute) - - defer storeSet.Close() - - for { - storeSet.Update(context.Background()) - testutil.Equals( - t, - tc.states[currentState].expectedStores, - len(storeSet.stores), - "unexepected discovered stores in state %q", - tc.states[currentState].name, - ) - - gotRules := 0 - for _, ref := range storeSet.stores { - if ref.HasRulesAPI() { - gotRules += 1 - } - } - testutil.Equals( - t, - tc.states[currentState].expectedRules, - gotRules, - "unexpected discovered rules in state %q", - tc.states[currentState].name, - ) - - currentState = currentState + 1 - if len(tc.states) == currentState { - break - } - } - }) - } -} - -// Test highlights that without wrapping the error, it is marshaled to empty dict {}, not its message. -func TestStringError(t *testing.T) { - dictErr := &errThatMarshalsToEmptyDict{msg: "Error message"} - stringErr := &stringError{originalErr: dictErr} - - storestatusMock := map[string]error{} - storestatusMock["dictErr"] = dictErr - storestatusMock["stringErr"] = stringErr - - b, err := json.Marshal(storestatusMock) - - testutil.Ok(t, err) - testutil.Equals(t, []byte(`{"dictErr":{},"stringErr":"Error message"}`), b, "expected to get proper results") -} - -// Errors that usually marshal to empty dict should return the original error string. -func TestUpdateStoreStateLastError(t *testing.T) { - tcs := []struct { - InputError error - ExpectedLastErr string - }{ - {errors.New("normal_err"), `"normal_err"`}, - {nil, `null`}, - {&errThatMarshalsToEmptyDict{"the error message"}, `"the error message"`}, - } - - for _, tc := range tcs { - mockStoreSet := &StoreSet{ - storeStatuses: map[string]*StoreStatus{}, - } - mockStoreRef := &storeRef{ - addr: "mockedStore", - } - - mockStoreSet.updateStoreStatus(mockStoreRef, tc.InputError) - - b, err := json.Marshal(mockStoreSet.storeStatuses["mockedStore"].LastError) - testutil.Ok(t, err) - testutil.Equals(t, tc.ExpectedLastErr, string(b)) - } -} - -func TestUpdateStoreStateForgetsPreviousErrors(t *testing.T) { - mockStoreSet := &StoreSet{ - storeStatuses: map[string]*StoreStatus{}, - } - mockStoreRef := &storeRef{ - addr: "mockedStore", - } - - mockStoreSet.updateStoreStatus(mockStoreRef, errors.New("test err")) - - b, err := json.Marshal(mockStoreSet.storeStatuses["mockedStore"].LastError) - testutil.Ok(t, err) - testutil.Equals(t, `"test err"`, string(b)) - - // updating status without and error should clear the previous one. - mockStoreSet.updateStoreStatus(mockStoreRef, nil) - - b, err = json.Marshal(mockStoreSet.storeStatuses["mockedStore"].LastError) - testutil.Ok(t, err) - testutil.Equals(t, `null`, string(b)) -} From 9696234e61192f697c3eb5e2055fa1055f3c248f Mon Sep 17 00:00:00 2001 From: Hitanshu Mehta Date: Thu, 26 Aug 2021 00:40:12 +0530 Subject: [PATCH 08/11] minor nits Signed-off-by: Hitanshu Mehta --- pkg/query/endpointset.go | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/pkg/query/endpointset.go b/pkg/query/endpointset.go index 402f7ecdbf..79ed471b13 100644 --- a/pkg/query/endpointset.go +++ b/pkg/query/endpointset.go @@ -79,9 +79,9 @@ func (es *grpcEndpointSpec) Metadata(ctx context.Context, client *endpointClient resp, err := client.info.Info(ctx, &infopb.InfoRequest{}, grpc.WaitForReady(true)) if err != nil { // Call Info method of StoreAPI, this way querier will be able to discovery old components not exposing InfoAPI. - metadata, err := es.getMetadataUsingStoreAPI(ctx, client.store) - if err != nil { - return nil, errors.Wrapf(err, "fetching info from %s", es.addr) + metadata, merr := es.getMetadataUsingStoreAPI(ctx, client.store) + if merr != nil { + return nil, errors.Wrapf(merr, "fallback fetching info from %s after err: %v", es.addr, err) } return metadata, nil } @@ -198,8 +198,8 @@ func (c *endpointSetNodeCollector) Collect(ch chan<- prometheus.Metric) { type EndpointSet struct { logger log.Logger - // Endpoint specifications can change dynamically. If some store is missing from the list, we assuming it is no longer - // accessible and we close gRPC client for it. + // Endpoint specifications can change dynamically. If some component is missing from the list, we assume it is no longer + // accessible and we close gRPC client for it, unless it is strict. endpointSpec func() []EndpointSpec dialOpts []grpc.DialOption gRPCInfoCallTimeout time.Duration @@ -223,7 +223,7 @@ func NewEndpointSet( reg *prometheus.Registry, endpointSpecs func() []EndpointSpec, dialOpts []grpc.DialOption, - unhealthyStoreTimeout time.Duration, + unhealthyEndpointTimeout time.Duration, ) *EndpointSet { endpointsMetric := newEndpointSetNodeCollector() if reg != nil { @@ -245,7 +245,7 @@ func NewEndpointSet( gRPCInfoCallTimeout: 5 * time.Second, endpoints: make(map[string]*endpointRef), endpointStatuses: make(map[string]*EndpointStatus), - unhealthyEndpointTimeout: unhealthyStoreTimeout, + unhealthyEndpointTimeout: unhealthyEndpointTimeout, endpointSpec: endpointSpecs, } return es @@ -558,7 +558,6 @@ func (e *EndpointSet) cleanUpEndpointStatuses(endpoints map[string]*endpointRef) } } -// TODO(bwplotka): Consider moving storeRef out of this package and renaming it, as it also supports rules API. type endpointRef struct { storepb.StoreClient From 873f39d3bb221b504aa0528eb873ebe2245ef819 Mon Sep 17 00:00:00 2001 From: Hitanshu Mehta Date: Thu, 26 Aug 2021 17:37:00 +0530 Subject: [PATCH 09/11] Fix failing e2e tests Signed-off-by: Hitanshu Mehta --- pkg/query/endpointset.go | 70 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 63 insertions(+), 7 deletions(-) diff --git a/pkg/query/endpointset.go b/pkg/query/endpointset.go index 79ed471b13..a8a1e5a354 100644 --- a/pkg/query/endpointset.go +++ b/pkg/query/endpointset.go @@ -95,17 +95,70 @@ func (es *grpcEndpointSpec) getMetadataUsingStoreAPI(ctx context.Context, client return nil, err } + infoResp := es.fillExpectedAPIs(component.FromProto(resp.StoreType), resp.MinTime, resp.MaxTime) + infoResp.LabelSets = resp.LabelSets + infoResp.ComponentType = component.FromProto(resp.StoreType).String() + return &endpointMetadata{ - &infopb.InfoResponse{ - LabelSets: resp.LabelSets, - ComponentType: component.FromProto(resp.StoreType).String(), + &infoResp, + }, nil +} + +func (es *grpcEndpointSpec) fillExpectedAPIs(componentType component.Component, mintime, maxTime int64) infopb.InfoResponse { + + switch componentType { + case component.Sidecar: + return infopb.InfoResponse{ + Store: &infopb.StoreInfo{ + MinTime: mintime, + MaxTime: maxTime, + }, + Rules: &infopb.RulesInfo{}, + Targets: &infopb.TargetsInfo{}, + MetricMetadata: &infopb.MetricMetadataInfo{}, + Exemplars: &infopb.ExemplarsInfo{}, + } + case component.Query: + { + return infopb.InfoResponse{ + Store: &infopb.StoreInfo{ + MinTime: mintime, + MaxTime: maxTime, + }, + Rules: &infopb.RulesInfo{}, + Targets: &infopb.TargetsInfo{}, + MetricMetadata: &infopb.MetricMetadataInfo{}, + Exemplars: &infopb.ExemplarsInfo{}, + } + } + case component.Receive: + { + return infopb.InfoResponse{ + Store: &infopb.StoreInfo{ + MinTime: mintime, + MaxTime: maxTime, + }, + Exemplars: &infopb.ExemplarsInfo{}, + } + } + case component.Store: + return infopb.InfoResponse{ Store: &infopb.StoreInfo{ - MinTime: resp.MinTime, - MaxTime: resp.MaxTime, + MinTime: mintime, + MaxTime: maxTime, + }, + } + case component.Rule: + return infopb.InfoResponse{ + Store: &infopb.StoreInfo{ + MinTime: mintime, + MaxTime: maxTime, }, Rules: &infopb.RulesInfo{}, - }, - }, nil + } + default: + return infopb.InfoResponse{} + } } // stringError forces the error to be a string @@ -583,6 +636,9 @@ func (er *endpointRef) Update(metadata *endpointMetadata) { clients.store = storepb.NewStoreClient(er.cc) er.StoreClient = clients.store } else { + // When we see the endpoint for the first time we assume the StoreAPI is exposed by that endpoint (which may not be true for some ruler) + // and we create a store API client because as a fallback we might have to call info method of storeAPI. + // In this step,we are setting it to null when we find out that the store API is not exposed. er.clients.store = nil er.StoreClient = nil } From 8d0f0ff51f9136db5cc1dff06fec17882976f4d9 Mon Sep 17 00:00:00 2001 From: Hitanshu Mehta Date: Tue, 31 Aug 2021 19:37:41 +0530 Subject: [PATCH 10/11] improve logging Signed-off-by: Hitanshu Mehta --- cmd/thanos/query.go | 28 +++++++++------------------- pkg/query/endpointset.go | 1 - 2 files changed, 9 insertions(+), 20 deletions(-) diff --git a/cmd/thanos/query.go b/cmd/thanos/query.go index 2c83f3eb84..797ff8c019 100644 --- a/cmd/thanos/query.go +++ b/cmd/thanos/query.go @@ -384,27 +384,17 @@ func runQuery( specs = append(specs, query.NewGRPCEndpointSpec(addr, true)) } - for _, addr := range dnsStoreProvider.Addresses() { - specs = append(specs, query.NewGRPCEndpointSpec(addr, false)) - } - - for _, addr := range dnsRuleProvider.Addresses() { - specs = append(specs, query.NewGRPCEndpointSpec(addr, false)) - } - - for _, addr := range dnsTargetProvider.Addresses() { - specs = append(specs, query.NewGRPCEndpointSpec(addr, false)) - } - - for _, addr := range dnsMetadataProvider.Addresses() { - specs = append(specs, query.NewGRPCEndpointSpec(addr, false)) - } + for _, dnsProvider := range []*dns.Provider{dnsStoreProvider, dnsRuleProvider, dnsExemplarProvider, dnsMetadataProvider, dnsTargetProvider} { + var tmpSpecs []query.EndpointSpec - for _, addr := range dnsExemplarProvider.Addresses() { - specs = append(specs, query.NewGRPCEndpointSpec(addr, false)) + for _, addr := range dnsProvider.Addresses() { + tmpSpecs = append(tmpSpecs, query.NewGRPCEndpointSpec(addr, false)) + } + tmpSpecs = removeDuplicateEndpointSpecs(logger, duplicatedStores, tmpSpecs) + specs = append(specs, tmpSpecs...) } - return removeDuplicateStoreSpecs(logger, duplicatedStores, specs) + return specs }, dialOpts, unhealthyStoreTimeout, @@ -629,7 +619,7 @@ func runQuery( return nil } -func removeDuplicateStoreSpecs(logger log.Logger, duplicatedStores prometheus.Counter, specs []query.EndpointSpec) []query.EndpointSpec { +func removeDuplicateEndpointSpecs(logger log.Logger, duplicatedStores prometheus.Counter, specs []query.EndpointSpec) []query.EndpointSpec { set := make(map[string]query.EndpointSpec) for _, spec := range specs { addr := spec.Addr() diff --git a/pkg/query/endpointset.go b/pkg/query/endpointset.go index a8a1e5a354..1a022160e3 100644 --- a/pkg/query/endpointset.go +++ b/pkg/query/endpointset.go @@ -463,7 +463,6 @@ func (e *EndpointSet) getActiveEndpoints(ctx context.Context, endpoints map[stri // Gather healthy endpoints map concurrently using info API. Build new clients if does not exist already. for _, es := range e.endpointSpec() { if _, ok := endpointAddrSet[es.Addr()]; ok { - level.Warn(e.logger).Log("msg", "duplicated address in nodes", "address", es.Addr()) continue } endpointAddrSet[es.Addr()] = struct{}{} From 2f8ae5ee1b1e7a162355fdef68e7ec60f47452ee Mon Sep 17 00:00:00 2001 From: Hitanshu Mehta Date: Tue, 31 Aug 2021 20:50:43 +0530 Subject: [PATCH 11/11] fix comment Signed-off-by: Hitanshu Mehta --- pkg/query/endpointset.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/query/endpointset.go b/pkg/query/endpointset.go index 1a022160e3..5b6f04532a 100644 --- a/pkg/query/endpointset.go +++ b/pkg/query/endpointset.go @@ -635,9 +635,9 @@ func (er *endpointRef) Update(metadata *endpointMetadata) { clients.store = storepb.NewStoreClient(er.cc) er.StoreClient = clients.store } else { - // When we see the endpoint for the first time we assume the StoreAPI is exposed by that endpoint (which may not be true for some ruler) + // When we see the endpoint for the first time we assume the StoreAPI is exposed by that endpoint (which may not be true for some component, e.g. ruler) // and we create a store API client because as a fallback we might have to call info method of storeAPI. - // In this step,we are setting it to null when we find out that the store API is not exposed. + // In this step, we are setting it to null when we find out that the store API is not exposed. er.clients.store = nil er.StoreClient = nil }