diff --git a/x-pack/plugins/infra/public/alerting/metric_threshold/components/expression.tsx b/x-pack/plugins/infra/public/alerting/metric_threshold/components/expression.tsx index 7b332a2a4ce8a6..bfdf1d7eeefa61 100644 --- a/x-pack/plugins/infra/public/alerting/metric_threshold/components/expression.tsx +++ b/x-pack/plugins/infra/public/alerting/metric_threshold/components/expression.tsx @@ -252,11 +252,6 @@ export const Expressions: React.FC = (props) => { [onFilterChange] ); - const areAllAggsRate = useMemo( - () => ruleParams.criteria?.every((c) => c.aggType === Aggregators.RATE), - [ruleParams.criteria] - ); - const hasGroupBy = useMemo( () => ruleParams.groupBy && ruleParams.groupBy.length > 0, [ruleParams.groupBy] @@ -386,31 +381,6 @@ export const Expressions: React.FC = (props) => { checked={ruleParams.alertOnNoData} onChange={(e) => setRuleParams('alertOnNoData', e.target.checked)} /> - - {i18n.translate('xpack.infra.metrics.alertFlyout.shouldDropPartialBuckets', { - defaultMessage: 'Drop partial buckets when evaluating data', - })}{' '} - - - - - } - checked={areAllAggsRate || ruleParams.shouldDropPartialBuckets} - disabled={areAllAggsRate} - onChange={(e) => setRuleParams('shouldDropPartialBuckets', e.target.checked)} - /> diff --git a/x-pack/plugins/infra/server/lib/alerting/inventory_metric_threshold/lib/calculate_rate_timeranges.ts b/x-pack/plugins/infra/server/lib/alerting/inventory_metric_threshold/lib/calculate_rate_timeranges.ts index 67a56065289ed4..dca49410de0d5e 100644 --- a/x-pack/plugins/infra/server/lib/alerting/inventory_metric_threshold/lib/calculate_rate_timeranges.ts +++ b/x-pack/plugins/infra/server/lib/alerting/inventory_metric_threshold/lib/calculate_rate_timeranges.ts @@ -4,15 +4,14 @@ * 2.0; you may not use this file except in compliance with the Elastic License * 2.0. */ -import { InfraTimerangeInput } from '../../../../../common/http_api'; -export const calculateRateTimeranges = (timerange: InfraTimerangeInput) => { +export const calculateRateTimeranges = (timerange: { to: number; from: number }) => { // This is the total number of milliseconds for the entire timerange const totalTime = timerange.to - timerange.from; // Halfway is the to minus half the total time; - const halfway = timerange.to - totalTime / 2; + const halfway = Math.round(timerange.to - totalTime / 2); // The interval is half the total time (divided by 1000 to convert to seconds) - const intervalInSeconds = totalTime / 2000; + const intervalInSeconds = Math.round(totalTime / (2 * 1000)); // The first bucket is from the beginning of the time range to the halfway point const firstBucketRange = { diff --git a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/create_bucket_selector.ts b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/create_bucket_selector.ts new file mode 100644 index 00000000000000..d078cee95e45eb --- /dev/null +++ b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/create_bucket_selector.ts @@ -0,0 +1,118 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { + Aggregators, + Comparator, + MetricExpressionParams, +} from '../../../../../common/alerting/metrics'; +import { createConditionScript } from './create_condition_script'; +import { createLastPeriod } from './wrap_in_period'; + +const EMPTY_SHOULD_WARN = { + bucket_script: { + buckets_path: {}, + script: '0', + }, +}; + +export const createBucketSelector = ( + condition: MetricExpressionParams, + alertOnGroupDisappear: boolean = false, + groupBy?: string | string[], + lastPeriodEnd?: number +) => { + const hasGroupBy = groupBy != null; + const hasWarn = condition.warningThreshold != null && condition.warningComparator != null; + const isPercentile = [Aggregators.P95, Aggregators.P99].includes(condition.aggType); + const isCount = condition.aggType === Aggregators.COUNT; + const isRate = condition.aggType === Aggregators.RATE; + const bucketPath = isCount + ? 'currentPeriod>_count' + : isRate + ? `aggregatedValue` + : isPercentile + ? `currentPeriod>aggregatedValue[${condition.aggType === Aggregators.P95 ? '95' : '99'}]` + : 'currentPeriod>aggregatedValue'; + + const shouldWarn = hasWarn + ? { + bucket_script: { + buckets_path: { + value: bucketPath, + }, + script: createConditionScript( + condition.warningThreshold as number[], + condition.warningComparator as Comparator + ), + }, + } + : EMPTY_SHOULD_WARN; + + const shouldTrigger = { + bucket_script: { + buckets_path: { + value: bucketPath, + }, + script: createConditionScript(condition.threshold, condition.comparator), + }, + }; + + const aggs: any = { + shouldWarn, + shouldTrigger, + }; + + if (hasGroupBy && alertOnGroupDisappear && lastPeriodEnd) { + const wrappedPeriod = createLastPeriod(lastPeriodEnd, condition); + aggs.lastPeriod = wrappedPeriod.lastPeriod; + aggs.missingGroup = { + bucket_script: { + buckets_path: { + lastPeriod: 'lastPeriod>_count', + currentPeriod: 'currentPeriod>_count', + }, + script: 'params.lastPeriod > 0 && params.currentPeriod < 1 ? 1 : 0', + }, + }; + aggs.newOrRecoveredGroup = { + bucket_script: { + buckets_path: { + lastPeriod: 'lastPeriod>_count', + currentPeriod: 'currentPeriod>_count', + }, + script: 'params.lastPeriod < 1 && params.currentPeriod > 0 ? 1 : 0', + }, + }; + } + + if (hasGroupBy) { + const evalutionBucketPath = + alertOnGroupDisappear && lastPeriodEnd + ? { + shouldWarn: 'shouldWarn', + shouldTrigger: 'shouldTrigger', + missingGroup: 'missingGroup', + newOrRecoveredGroup: 'newOrRecoveredGroup', + } + : { shouldWarn: 'shouldWarn', shouldTrigger: 'shouldTrigger' }; + + const evaluationScript = + alertOnGroupDisappear && lastPeriodEnd + ? '(params.missingGroup != null && params.missingGroup > 0) || (params.shouldWarn != null && params.shouldWarn > 0) || (params.shouldTrigger != null && params.shouldTrigger > 0) || (params.newOrRecoveredGroup != null && params.newOrRecoveredGroup > 0)' + : '(params.shouldWarn != null && params.shouldWarn > 0) || (params.shouldTrigger != null && params.shouldTrigger > 0)'; + + aggs.evaluation = { + bucket_selector: { + buckets_path: evalutionBucketPath, + script: evaluationScript, + }, + }; + } + + return aggs; +}; diff --git a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/create_condition_script.ts b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/create_condition_script.ts new file mode 100644 index 00000000000000..843a1a79eaf62b --- /dev/null +++ b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/create_condition_script.ts @@ -0,0 +1,17 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ +import { Comparator } from '../../../../../common/alerting/metrics'; + +export const createConditionScript = (threshold: number[], comparator: Comparator) => { + if (comparator === Comparator.BETWEEN && threshold.length === 2) { + return `params.value > ${threshold[0]} && params.value < ${threshold[1]} ? 1 : 0`; + } + if (comparator === Comparator.OUTSIDE_RANGE && threshold.length === 2) { + return `params.value < ${threshold[0]} && params.value > ${threshold[1]} ? 1 : 0`; + } + return `params.value ${comparator} ${threshold[0]} ? 1 : 0`; +}; diff --git a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/create_percentile_aggregation.ts b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/create_percentile_aggregation.ts index 35111c1a69b2f1..bd714b2f6e2a11 100644 --- a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/create_percentile_aggregation.ts +++ b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/create_percentile_aggregation.ts @@ -17,7 +17,7 @@ export const createPercentileAggregation = ( percentiles: { field, percents: [value], - keyed: false, + keyed: true, }, }, }; diff --git a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/create_rate_aggregation.ts b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/create_rate_aggregation.ts new file mode 100644 index 00000000000000..2fdb8f5c6b834f --- /dev/null +++ b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/create_rate_aggregation.ts @@ -0,0 +1,67 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import moment from 'moment'; +import { calculateRateTimeranges } from '../../inventory_metric_threshold/lib/calculate_rate_timeranges'; +import { TIMESTAMP_FIELD } from '../../../../../common/constants'; + +export const createRateAggsBucketScript = ( + timeframe: { start: number; end: number }, + id: string +) => { + const { intervalInSeconds } = calculateRateTimeranges({ + to: timeframe.end, + from: timeframe.start, + }); + return { + [id]: { + bucket_script: { + buckets_path: { + first: `currentPeriod>${id}_first_bucket.maxValue`, + second: `currentPeriod>${id}_second_bucket.maxValue`, + }, + script: `params.second > 0.0 && params.first > 0.0 && params.second > params.first ? (params.second - params.first) / ${intervalInSeconds}: null`, + }, + }, + }; +}; + +export const createRateAggsBuckets = ( + timeframe: { start: number; end: number }, + id: string, + field: string +) => { + const { firstBucketRange, secondBucketRange } = calculateRateTimeranges({ + to: timeframe.end, + from: timeframe.start, + }); + + return { + [`${id}_first_bucket`]: { + filter: { + range: { + [TIMESTAMP_FIELD]: { + gte: moment(firstBucketRange.from).toISOString(), + lt: moment(firstBucketRange.to).toISOString(), + }, + }, + }, + aggs: { maxValue: { max: { field } } }, + }, + [`${id}_second_bucket`]: { + filter: { + range: { + [TIMESTAMP_FIELD]: { + gte: moment(secondBucketRange.from).toISOString(), + lt: moment(secondBucketRange.to).toISOString(), + }, + }, + }, + aggs: { maxValue: { max: { field } } }, + }, + }; +}; diff --git a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/create_timerange.test.ts b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/create_timerange.test.ts index bf365d7e89bcac..6906800bf4c9d0 100644 --- a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/create_timerange.test.ts +++ b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/create_timerange.test.ts @@ -36,23 +36,23 @@ describe('createTimerange(interval, aggType, timeframe)', () => { describe('Rate Aggs', () => { it('should return a 20 second range for last 1 second', () => { const subject = createTimerange(1000, Aggregators.RATE); - expect(subject.end - subject.start).toEqual(1000 * 5); + expect(subject.end - subject.start).toEqual(1000 * 2); }); it('should return a 5 minute range for last 1 minute', () => { const subject = createTimerange(60000, Aggregators.RATE); - expect(subject.end - subject.start).toEqual(60000 * 5); + expect(subject.end - subject.start).toEqual(60000 * 2); }); it('should return 25 minute range for last 5 minutes', () => { const subject = createTimerange(300000, Aggregators.RATE); - expect(subject.end - subject.start).toEqual(300000 * 5); + expect(subject.end - subject.start).toEqual(300000 * 2); }); it('should return 5 hour range for last hour', () => { const subject = createTimerange(3600000, Aggregators.RATE); - expect(subject.end - subject.start).toEqual(3600000 * 5); + expect(subject.end - subject.start).toEqual(3600000 * 2); }); it('should return a 5 day range for last day', () => { const subject = createTimerange(86400000, Aggregators.RATE); - expect(subject.end - subject.start).toEqual(86400000 * 5); + expect(subject.end - subject.start).toEqual(86400000 * 2); }); }); }); @@ -78,23 +78,23 @@ describe('createTimerange(interval, aggType, timeframe)', () => { }); }); describe('Rate Aggs', () => { - it('should return 25 minute range when given 4 minute timeframe', () => { + it('should return 8 minute range when given 4 minute timeframe', () => { const end = moment(); const timeframe = { start: end.clone().subtract(4, 'minutes').valueOf(), end: end.valueOf(), }; const subject = createTimerange(300000, Aggregators.RATE, timeframe); - expect(subject.end - subject.start).toEqual(300000 * 5); + expect(subject.end - subject.start).toEqual(300000 * 2); }); - it('should return 25 minute range when given 6 minute timeframe', () => { + it('should return 12 minute range when given 6 minute timeframe', () => { const end = moment(); const timeframe = { start: end.clone().subtract(6, 'minutes').valueOf(), end: end.valueOf(), }; const subject = createTimerange(300000, Aggregators.RATE, timeframe); - expect(subject.end - subject.start).toEqual(300000 * 5); + expect(subject.end - subject.start).toEqual(300000 * 2); }); }); }); @@ -113,7 +113,7 @@ describe('createTimerange(interval, aggType, timeframe)', () => { }); }); describe('Rate Aggs', () => { - it('should return 25 minute range for last 5 minutes', () => { + it('should return 10 minute range for last 5 minutes', () => { const end = moment(); const timeframe = { end: end.valueOf(), @@ -122,7 +122,7 @@ describe('createTimerange(interval, aggType, timeframe)', () => { expect(subject).toEqual({ start: end .clone() - .subtract(300 * 5, 'seconds') + .subtract(300 * 2, 'seconds') .valueOf(), end: end.valueOf(), }); diff --git a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/create_timerange.ts b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/create_timerange.ts index 03c407e8afe377..035f8dcdb88a95 100644 --- a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/create_timerange.ts +++ b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/create_timerange.ts @@ -11,14 +11,14 @@ import { Aggregators } from '../../../../../common/alerting/metrics'; export const createTimerange = ( interval: number, aggType: Aggregators, - timeframe?: { end: number; start?: number } + timeframe?: { end: number; start?: number }, + lastPeriodEnd?: number ) => { const to = moment(timeframe ? timeframe.end : Date.now()).valueOf(); // Rate aggregations need 5 buckets worth of data - const minimumBuckets = aggType === Aggregators.RATE ? 5 : 1; - - const calculatedFrom = to - interval * minimumBuckets; + const minimumBuckets = aggType === Aggregators.RATE ? 2 : 1; + const calculatedFrom = lastPeriodEnd ? lastPeriodEnd - interval : to - interval * minimumBuckets; // Use either the timeframe.start when the start is less then calculatedFrom // OR use the calculatedFrom diff --git a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/evaluate_rule.ts b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/evaluate_rule.ts index 05168edcac45cf..9103645f7e46c3 100644 --- a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/evaluate_rule.ts +++ b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/evaluate_rule.ts @@ -7,352 +7,91 @@ import { ElasticsearchClient } from '@kbn/core/server'; import moment from 'moment'; -import { difference, first, has, isNaN, isNumber, isObject, last, mapValues } from 'lodash'; -import { - Aggregators, - Comparator, - isTooManyBucketsPreviewException, - MetricExpressionParams, - TOO_MANY_BUCKETS_PREVIEW_EXCEPTION, -} from '../../../../../common/alerting/metrics'; +import { MetricExpressionParams } from '../../../../../common/alerting/metrics'; import { InfraSource } from '../../../../../common/source_configuration/source_configuration'; -import { createAfterKeyHandler } from '../../../../utils/create_afterkey_handler'; -import { getAllCompositeData } from '../../../../utils/get_all_composite_data'; import { getIntervalInSeconds } from '../../../../utils/get_interval_in_seconds'; -import { InfraDatabaseSearchResponse } from '../../../adapters/framework/adapter_types'; import { DOCUMENT_COUNT_I18N } from '../../common/messages'; -import { UNGROUPED_FACTORY_KEY } from '../../common/utils'; import { createTimerange } from './create_timerange'; -import { getElasticsearchMetricQuery } from './metric_query'; - -interface AggregationWithoutIntervals { - aggregatedValue: { value: number; values?: Array<{ key: number; value: number }> }; -} - -interface AggregationWithIntervals { - aggregatedIntervals: { - buckets: Array<{ - aggregatedValue: { value: number; values?: Array<{ key: number; value: number }> }; - doc_count: number; - to_as_string: string; - from_as_string: string; - key_as_string: string; - }>; - }; -} - -type Aggregation = AggregationWithIntervals | AggregationWithoutIntervals; - -function isAggregationWithIntervals( - subject: Aggregation | undefined -): subject is AggregationWithIntervals { - return isObject(subject) && has(subject, 'aggregatedIntervals'); -} - -interface CompositeAggregationsResponse { - groupings: { - buckets: Aggregation[]; - }; -} +import { getData } from './get_data'; export interface EvaluatedRuleParams { criteria: MetricExpressionParams[]; groupBy: string | undefined | string[]; filterQuery?: string; filterQueryText?: string; - shouldDropPartialBuckets?: boolean; } -export const evaluateRule = ( +export type Evaluation = Omit & { + metric: string; + currentValue: number | null; + timestamp: string; + shouldFire: boolean; + shouldWarn: boolean; + isNoData: boolean; +}; + +export const evaluateRule = async ( esClient: ElasticsearchClient, params: Params, config: InfraSource['configuration'], - prevGroups: string[], compositeSize: number, - timeframe?: { start?: number; end: number } -) => { - const { criteria, groupBy, filterQuery, shouldDropPartialBuckets } = params; + alertOnGroupDisappear: boolean, + lastPeriodEnd?: number, + timeframe?: { start?: number; end: number }, + missingGroups: string[] = [] +): Promise>> => { + const { criteria, groupBy, filterQuery } = params; return Promise.all( criteria.map(async (criterion) => { - const currentValues = await getMetric( + const interval = `${criterion.timeSize}${criterion.timeUnit}`; + const intervalAsSeconds = getIntervalInSeconds(interval); + const intervalAsMS = intervalAsSeconds * 1000; + const calculatedTimerange = createTimerange( + intervalAsMS, + criterion.aggType, + timeframe, + lastPeriodEnd + ); + + const currentValues = await getData( esClient, criterion, config.metricAlias, groupBy, filterQuery, compositeSize, - timeframe, - shouldDropPartialBuckets + alertOnGroupDisappear, + calculatedTimerange, + lastPeriodEnd ); - const { threshold, warningThreshold, comparator, warningComparator } = criterion; - const pointsEvaluator = (points: any[] | typeof NaN | null, t?: number[], c?: Comparator) => { - if (!t || !c) return [false]; - const comparisonFunction = comparatorMap[c]; - return Array.isArray(points) - ? points.map( - (point) => t && typeof point.value === 'number' && comparisonFunction(point.value, t) - ) - : [false]; - }; - - // If any previous groups are no longer being reported, backfill them with null values - const currentGroups = Object.keys(currentValues); - - const missingGroups = difference(prevGroups, currentGroups); - - if (currentGroups.length === 0 && missingGroups.length === 0) { - missingGroups.push(UNGROUPED_FACTORY_KEY); - } - const backfillTimestamp = - last(last(Object.values(currentValues)))?.key ?? new Date().toISOString(); - const backfilledPrevGroups: Record> = {}; - for (const group of missingGroups) { - backfilledPrevGroups[group] = [ - { - key: backfillTimestamp, - value: criterion.aggType === Aggregators.COUNT ? 0 : null, - }, - ]; + for (const missingGroup of missingGroups) { + if (currentValues[missingGroup] == null) { + currentValues[missingGroup] = { + value: null, + trigger: false, + warn: false, + }; + } } - const currentValuesWithBackfilledPrevGroups = { - ...currentValues, - ...backfilledPrevGroups, - }; - return mapValues( - currentValuesWithBackfilledPrevGroups, - (points: any[] | typeof NaN | null) => { - if (isTooManyBucketsPreviewException(points)) throw points; - return { + const evaluations: Record = {}; + for (const key of Object.keys(currentValues)) { + const result = currentValues[key]; + if (result.trigger || result.warn || result.value === null) { + evaluations[key] = { ...criterion, metric: criterion.metric ?? DOCUMENT_COUNT_I18N, - currentValue: Array.isArray(points) ? last(points)?.value : NaN, - timestamp: Array.isArray(points) ? last(points)?.key : NaN, - shouldFire: pointsEvaluator(points, threshold, comparator), - shouldWarn: pointsEvaluator(points, warningThreshold, warningComparator), - isNoData: Array.isArray(points) - ? points.map((point) => point?.value === null || point === null) - : [points === null], - isError: isNaN(Array.isArray(points) ? last(points)?.value : points), + currentValue: result.value, + timestamp: moment(calculatedTimerange.end).toISOString(), + shouldFire: result.trigger, + shouldWarn: result.warn, + isNoData: result.value === null, }; } - ); + } + return evaluations; }) ); }; - -const getMetric: ( - esClient: ElasticsearchClient, - params: MetricExpressionParams, - index: string, - groupBy: string | undefined | string[], - filterQuery: string | undefined, - compositeSize: number, - timeframe?: { start?: number; end: number }, - shouldDropPartialBuckets?: boolean -) => Promise>> = async function ( - esClient, - params, - index, - groupBy, - filterQuery, - compositeSize, - timeframe, - shouldDropPartialBuckets -) { - const { aggType, timeSize, timeUnit } = params; - const hasGroupBy = groupBy && groupBy.length; - - const interval = `${timeSize}${timeUnit}`; - const intervalAsSeconds = getIntervalInSeconds(interval); - const intervalAsMS = intervalAsSeconds * 1000; - const calculatedTimerange = createTimerange(intervalAsMS, aggType, timeframe); - - const searchBody = getElasticsearchMetricQuery( - params, - calculatedTimerange, - compositeSize, - hasGroupBy ? groupBy : undefined, - filterQuery - ); - - const dropPartialBucketsOptions = - // Rate aggs always drop partial buckets; guard against this boolean being passed as false - shouldDropPartialBuckets || aggType === Aggregators.RATE - ? { - from: calculatedTimerange.start, - to: calculatedTimerange.end, - bucketSizeInMillis: intervalAsMS, - } - : null; - - try { - if (hasGroupBy) { - const bucketSelector = ( - response: InfraDatabaseSearchResponse<{}, CompositeAggregationsResponse> - ) => response.aggregations?.groupings?.buckets || []; - const afterKeyHandler = createAfterKeyHandler( - 'aggs.groupings.composite.after', - (response) => response.aggregations?.groupings?.after_key - ); - const compositeBuckets = (await getAllCompositeData( - // @ts-expect-error @elastic/elasticsearch SearchResponse.body.timeout is not required - (body) => esClient.search({ body, index }, { meta: true }), - searchBody, - bucketSelector, - afterKeyHandler - )) as Array; doc_count: number }>; - const groupedResults: Record = {}; - for (const bucket of compositeBuckets) { - const key = Object.values(bucket.key).join(', '); - const value = getValuesFromAggregations( - bucket, - aggType, - dropPartialBucketsOptions, - calculatedTimerange, - bucket.doc_count - ); - groupedResults[key] = value; - } - return groupedResults; - } - const result = await esClient.search({ - body: searchBody, - index, - }); - - return { - [UNGROUPED_FACTORY_KEY]: getValuesFromAggregations( - result.aggregations! as unknown as Aggregation, - aggType, - dropPartialBucketsOptions, - calculatedTimerange, - result.hits - ? isNumber(result.hits.total) - ? result.hits.total - : result.hits.total?.value ?? 0 - : 0 - ), - }; - } catch (e: any) { - if (timeframe) { - // This code should only ever be reached when previewing the alert, not executing it - const causedByType = e.body?.error?.caused_by?.type; - if (causedByType === 'too_many_buckets_exception') { - return { - [UNGROUPED_FACTORY_KEY]: { - [TOO_MANY_BUCKETS_PREVIEW_EXCEPTION]: true, - maxBuckets: e.body.error.caused_by.max_buckets, - }, - }; - } - } - return { [UNGROUPED_FACTORY_KEY]: NaN }; // Trigger an Error state - } -}; - -interface DropPartialBucketOptions { - from: number; - to: number; - bucketSizeInMillis: number; -} - -const dropPartialBuckets = - ({ from, to, bucketSizeInMillis }: DropPartialBucketOptions) => - ( - row: { - key: string; - value: number | null; - } | null - ) => { - if (row == null) return null; - const timestamp = new Date(row.key).valueOf(); - return timestamp >= from && timestamp + bucketSizeInMillis <= to; - }; - -const getValuesFromAggregations = ( - aggregations: Aggregation | undefined, - aggType: MetricExpressionParams['aggType'], - dropPartialBucketsOptions: DropPartialBucketOptions | null, - timeFrame: { start: number; end: number }, - docCount?: number -) => { - try { - let buckets; - if (aggType === Aggregators.COUNT) { - buckets = [ - { - doc_count: docCount, - to_as_string: moment(timeFrame.end).toISOString(), - from_as_string: moment(timeFrame.start).toISOString(), - key_as_string: moment(timeFrame.start).toISOString(), - }, - ]; - } else if (isAggregationWithIntervals(aggregations)) { - buckets = aggregations.aggregatedIntervals.buckets; - } else { - buckets = [ - { - ...aggregations, - doc_count: docCount, - to_as_string: moment(timeFrame.end).toISOString(), - from_as_string: moment(timeFrame.start).toISOString(), - key_as_string: moment(timeFrame.start).toISOString(), - }, - ]; - } - - if (!buckets.length) return null; // No Data state - - let mappedBuckets: Array<{ key: string; value: number | null } | null>; - - if (aggType === Aggregators.COUNT) { - mappedBuckets = buckets.map((bucket) => ({ - key: bucket.from_as_string, - value: bucket.doc_count || null, - })); - } else if (aggType === Aggregators.P95 || aggType === Aggregators.P99) { - mappedBuckets = buckets.map((bucket) => { - const values = bucket.aggregatedValue?.values || []; - const firstValue = first(values); - if (!firstValue) return null; - return { key: bucket.from_as_string, value: firstValue.value }; - }); - } else if (aggType === Aggregators.AVERAGE) { - mappedBuckets = buckets.map((bucket) => ({ - key: bucket.key_as_string ?? bucket.from_as_string, - value: bucket.aggregatedValue?.value ?? null, - })); - } else if (aggType === Aggregators.RATE) { - mappedBuckets = buckets.map((bucket) => ({ - key: bucket.key_as_string ?? bucket.from_as_string, - value: bucket.aggregatedValue?.value ?? null, - })); - } else { - mappedBuckets = buckets.map((bucket) => ({ - key: bucket.key_as_string ?? bucket.from_as_string, - value: bucket.aggregatedValue?.value ?? null, - })); - } - if (dropPartialBucketsOptions) { - return mappedBuckets.filter(dropPartialBuckets(dropPartialBucketsOptions)); - } - return mappedBuckets; - } catch (e) { - return NaN; // Error state - } -}; - -const comparatorMap = { - [Comparator.BETWEEN]: (value: number, [a, b]: number[]) => - value >= Math.min(a, b) && value <= Math.max(a, b), - [Comparator.OUTSIDE_RANGE]: (value: number, [a, b]: number[]) => value < a || value > b, - // `threshold` is always an array of numbers in case the BETWEEN/OUTSIDE_RANGE comparator is - // used; all other compartors will just destructure the first value in the array - [Comparator.GT]: (a: number, [b]: number[]) => a > b, - [Comparator.LT]: (a: number, [b]: number[]) => a < b, - [Comparator.GT_OR_EQ]: (a: number, [b]: number[]) => a >= b, - [Comparator.LT_OR_EQ]: (a: number, [b]: number[]) => a <= b, -}; diff --git a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/get_data.ts b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/get_data.ts new file mode 100644 index 00000000000000..0d5aa6863ee9c4 --- /dev/null +++ b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/get_data.ts @@ -0,0 +1,225 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { ElasticsearchClient } from '@kbn/core/server'; +import { + Aggregators, + Comparator, + MetricExpressionParams, +} from '../../../../../common/alerting/metrics'; +import { UNGROUPED_FACTORY_KEY } from '../../common/utils'; +import { getElasticsearchMetricQuery } from './metric_query'; + +export type GetDataResponse = Record< + string, + { warn: boolean; trigger: boolean; value: number | null } +>; + +type BucketKey = Record; +interface AggregatedValue { + value: number | null; + values?: Record; +} +interface Aggs { + currentPeriod: { + doc_count: number; + aggregatedValue?: AggregatedValue; + }; + aggregatedValue?: AggregatedValue; + shouldWarn?: { + value: number; + }; + shouldTrigger?: { + value: number; + }; + missingGroup?: { + value: number; + }; +} +interface Bucket extends Aggs { + key: BucketKey; + doc_count: number; +} +interface ResponseAggregations extends Partial { + groupings?: { + after_key: Record; + buckets: Bucket[]; + }; + all?: { + buckets: { + all?: { + doc_count: number; + } & Aggs; + }; + }; +} + +const getValue = (aggregatedValue: AggregatedValue, params: MetricExpressionParams) => + [Aggregators.P95, Aggregators.P99].includes(params.aggType) && aggregatedValue.values != null + ? aggregatedValue.values[params.aggType === Aggregators.P95 ? '95.0' : '99.0'] + : aggregatedValue.value; + +const NO_DATA_RESPONSE = { + [UNGROUPED_FACTORY_KEY]: { + value: null, + warn: false, + trigger: false, + }, +}; + +export const getData = async ( + esClient: ElasticsearchClient, + params: MetricExpressionParams, + index: string, + groupBy: string | undefined | string[], + filterQuery: string | undefined, + compositeSize: number, + alertOnGroupDisappear: boolean, + timeframe: { start: number; end: number }, + lastPeriodEnd?: number, + previousResults: GetDataResponse = {}, + afterKey?: Record +): Promise => { + const handleResponse = ( + aggs: ResponseAggregations, + previous: GetDataResponse, + successfulShards: number + ) => { + // This is absolutely NO DATA + if (successfulShards === 0) { + return NO_DATA_RESPONSE; + } + if (aggs.groupings) { + const { groupings } = aggs; + const nextAfterKey = groupings.after_key; + for (const bucket of groupings.buckets) { + const key = Object.values(bucket.key).join(','); + const { + shouldWarn, + shouldTrigger, + missingGroup, + currentPeriod: { aggregatedValue, doc_count: docCount }, + aggregatedValue: aggregatedValueForRate, + } = bucket; + if (missingGroup && missingGroup.value > 0) { + previous[key] = { + trigger: false, + warn: false, + value: null, + }; + } else { + const value = + params.aggType === Aggregators.COUNT + ? docCount + : params.aggType === Aggregators.RATE && aggregatedValueForRate != null + ? aggregatedValueForRate.value + : aggregatedValue != null + ? getValue(aggregatedValue, params) + : null; + previous[key] = { + trigger: (shouldTrigger && shouldTrigger.value > 0) || false, + warn: (shouldWarn && shouldWarn.value > 0) || false, + value, + }; + } + } + if (nextAfterKey) { + return getData( + esClient, + params, + index, + groupBy, + filterQuery, + compositeSize, + alertOnGroupDisappear, + timeframe, + lastPeriodEnd, + previous, + nextAfterKey + ); + } + return previous; + } + if (aggs.all?.buckets.all) { + const { + currentPeriod: { aggregatedValue, doc_count: docCount }, + aggregatedValue: aggregatedValueForRate, + shouldWarn, + shouldTrigger, + } = aggs.all.buckets.all; + const value = + params.aggType === Aggregators.COUNT + ? docCount + : params.aggType === Aggregators.RATE && aggregatedValueForRate != null + ? aggregatedValueForRate.value + : aggregatedValue != null + ? getValue(aggregatedValue, params) + : null; + // There is an edge case where there is no results and the shouldWarn/shouldTrigger + // bucket scripts will be missing. This is only an issue for document count because + // the value will end up being ZERO, for other metrics it will be null. In this case + // we need to do the evaluation in Node.js + if (aggs.all && params.aggType === Aggregators.COUNT && value === 0) { + const trigger = comparatorMap[params.comparator](value, params.threshold); + const warn = + params.warningThreshold && params.warningComparator + ? comparatorMap[params.warningComparator](value, params.warningThreshold) + : false; + return { + [UNGROUPED_FACTORY_KEY]: { + value, + warn, + trigger, + }, + }; + } + return { + [UNGROUPED_FACTORY_KEY]: { + value, + warn: (shouldWarn && shouldWarn.value > 0) || false, + trigger: (shouldTrigger && shouldTrigger.value > 0) || false, + }, + }; + } else { + return NO_DATA_RESPONSE; + } + }; + const request = { + index, + allow_no_indices: true, + ignore_unavailable: true, + body: getElasticsearchMetricQuery( + params, + timeframe, + compositeSize, + alertOnGroupDisappear, + lastPeriodEnd, + groupBy, + filterQuery, + afterKey + ), + }; + const { aggregations, _shards } = await esClient.search(request); + if (aggregations) { + return handleResponse(aggregations, previousResults, _shards.successful); + } else if (_shards.successful) { + return previousResults; + } + return NO_DATA_RESPONSE; +}; + +const comparatorMap = { + [Comparator.BETWEEN]: (value: number, [a, b]: number[]) => + value >= Math.min(a, b) && value <= Math.max(a, b), + // `threshold` is always an array of numbers in case the BETWEEN comparator is + // used; all other compartors will just destructure the first value in the array + [Comparator.GT]: (a: number, [b]: number[]) => a > b, + [Comparator.LT]: (a: number, [b]: number[]) => a < b, + [Comparator.OUTSIDE_RANGE]: (value: number, [a, b]: number[]) => value < a || value > b, + [Comparator.GT_OR_EQ]: (a: number, [b]: number[]) => a >= b, + [Comparator.LT_OR_EQ]: (a: number, [b]: number[]) => a <= b, +}; diff --git a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/metric_query.test.ts b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/metric_query.test.ts index 7e26bc2ba6be67..966ac767bf458f 100644 --- a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/metric_query.test.ts +++ b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/metric_query.test.ts @@ -6,7 +6,7 @@ */ import moment from 'moment'; -import { MetricExpressionParams } from '../../../../../common/alerting/metrics'; +import { Comparator, MetricExpressionParams } from '../../../../../common/alerting/metrics'; import { getElasticsearchMetricQuery } from './metric_query'; describe("The Metric Threshold Alert's getElasticsearchMetricQuery", () => { @@ -15,6 +15,8 @@ describe("The Metric Threshold Alert's getElasticsearchMetricQuery", () => { aggType: 'avg', timeUnit: 'm', timeSize: 1, + threshold: [1], + comparator: Comparator.GT, } as MetricExpressionParams; const groupBy = 'host.doggoname'; @@ -24,7 +26,14 @@ describe("The Metric Threshold Alert's getElasticsearchMetricQuery", () => { }; describe('when passed no filterQuery', () => { - const searchBody = getElasticsearchMetricQuery(expressionParams, timeframe, 100, groupBy); + const searchBody = getElasticsearchMetricQuery( + expressionParams, + timeframe, + 100, + true, + void 0, + groupBy + ); test('includes a range filter', () => { expect( searchBody.query.bool.filter.find((filter) => filter.hasOwnProperty('range')) @@ -48,6 +57,8 @@ describe("The Metric Threshold Alert's getElasticsearchMetricQuery", () => { expressionParams, timeframe, 100, + true, + void 0, groupBy, filterQuery ); diff --git a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/metric_query.ts b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/metric_query.ts index 5f7d643ec22eb3..06da4c09a8be61 100644 --- a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/metric_query.ts +++ b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/metric_query.ts @@ -5,11 +5,12 @@ * 2.0. */ +import moment from 'moment'; import { Aggregators, MetricExpressionParams } from '../../../../../common/alerting/metrics'; -import { TIMESTAMP_FIELD } from '../../../../../common/constants'; -import { networkTraffic } from '../../../../../common/inventory_models/shared/metrics/snapshot/network_traffic'; -import { calculateDateHistogramOffset } from '../../../metrics/lib/calculate_date_histogram_offset'; +import { createBucketSelector } from './create_bucket_selector'; import { createPercentileAggregation } from './create_percentile_aggregation'; +import { createRateAggsBuckets, createRateAggsBucketScript } from './create_rate_aggregation'; +import { wrapInCurrentPeriod } from './wrap_in_period'; const getParsedFilterQuery: (filterQuery: string | undefined) => Record | null = ( filterQuery @@ -19,27 +20,42 @@ const getParsedFilterQuery: (filterQuery: string | undefined) => Record ) => { + const { metric, aggType } = metricParams; if (aggType === Aggregators.COUNT && metric) { throw new Error('Cannot aggregate document count with a metric'); } if (aggType !== Aggregators.COUNT && !metric) { throw new Error('Can only aggregate without a metric if using the document count aggregator'); } - const interval = `${timeSize}${timeUnit}`; - const to = timeframe.end; - const from = timeframe.start; - const aggregations = + // We need to make a timeframe that represents the current timeframe as oppose + // to the total timeframe (which includes the last period). + const currentTimeframe = { + ...timeframe, + start: moment(timeframe.end) + .subtract( + metricParams.aggType === Aggregators.RATE + ? metricParams.timeSize * 2 + : metricParams.timeSize, + metricParams.timeUnit + ) + .valueOf(), + }; + + const metricAggregations = aggType === Aggregators.COUNT ? {} : aggType === Aggregators.RATE - ? networkTraffic('aggregatedValue', metric) + ? createRateAggsBuckets(currentTimeframe, 'aggregatedValue', metric) : aggType === Aggregators.P95 || aggType === Aggregators.P99 ? createPercentileAggregation(aggType, metric) : { @@ -50,25 +66,21 @@ export const getElasticsearchMetricQuery = ( }, }; - const baseAggs = - aggType === Aggregators.RATE - ? { - aggregatedIntervals: { - date_histogram: { - field: TIMESTAMP_FIELD, - fixed_interval: interval, - offset: calculateDateHistogramOffset({ from, to, interval }), - extended_bounds: { - min: from, - max: to, - }, - }, - aggregations, - }, - } - : aggregations; + const bucketSelectorAggregations = createBucketSelector( + metricParams, + alertOnGroupDisappear, + groupBy, + lastPeriodEnd + ); + + const rateAggBucketScript = + metricParams.aggType === Aggregators.RATE + ? createRateAggsBucketScript(currentTimeframe, 'aggregatedValue') + : {}; - const aggs = groupBy + const currentPeriod = wrapInCurrentPeriod(currentTimeframe, metricAggregations); + + const aggs: any = groupBy ? { groupings: { composite: { @@ -89,18 +101,40 @@ export const getElasticsearchMetricQuery = ( }, ], }, - aggs: baseAggs, + aggs: { + ...currentPeriod, + ...rateAggBucketScript, + ...bucketSelectorAggregations, + }, }, } - : baseAggs; + : { + all: { + filters: { + filters: { + all: { + match_all: {}, + }, + }, + }, + aggs: { + ...currentPeriod, + ...rateAggBucketScript, + ...bucketSelectorAggregations, + }, + }, + }; + + if (aggs.groupings && afterKey) { + aggs.groupings.composite.after = afterKey; + } const rangeFilters = [ { range: { '@timestamp': { - gte: from, - lte: to, - format: 'epoch_millis', + gte: moment(timeframe.start).toISOString(), + lte: moment(timeframe.end).toISOString(), }, }, }, diff --git a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/wrap_in_period.ts b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/wrap_in_period.ts new file mode 100644 index 00000000000000..391ade29cfb9db --- /dev/null +++ b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/wrap_in_period.ts @@ -0,0 +1,48 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import moment from 'moment'; +import { MetricExpressionParams } from '../../../../../common/alerting/metrics'; +import { TIMESTAMP_FIELD } from '../../../../../common/constants'; + +export const createLastPeriod = ( + lastPeriodEnd: number, + { timeUnit, timeSize }: MetricExpressionParams +) => { + const start = moment(lastPeriodEnd).subtract(timeSize, timeUnit).toISOString(); + return { + lastPeriod: { + filter: { + range: { + [TIMESTAMP_FIELD]: { + gte: start, + lte: moment(lastPeriodEnd).toISOString(), + }, + }, + }, + }, + }; +}; + +export const wrapInCurrentPeriod = ( + timeframe: { start: number; end: number }, + aggs: Aggs +) => { + return { + currentPeriod: { + filter: { + range: { + [TIMESTAMP_FIELD]: { + gte: moment(timeframe.start).toISOString(), + lte: moment(timeframe.end).toISOString(), + }, + }, + }, + aggs, + }, + }; +}; diff --git a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/metric_threshold_executor.test.ts b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/metric_threshold_executor.test.ts index c215470a6d74e7..6bd421caf561bb 100644 --- a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/metric_threshold_executor.test.ts +++ b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/metric_threshold_executor.test.ts @@ -25,8 +25,14 @@ import { NonCountMetricExpressionParams, } from '../../../../common/alerting/metrics'; import { InfraSources } from '../../sources'; -import { createMetricThresholdExecutor, FIRED_ACTIONS } from './metric_threshold_executor'; -import * as mocks from './test_mocks'; +import { + createMetricThresholdExecutor, + FIRED_ACTIONS, + NO_DATA_ACTIONS, +} from './metric_threshold_executor'; +import { Evaluation } from './lib/evaluate_rule'; + +jest.mock('./lib/evaluate_rule', () => ({ evaluateRule: jest.fn() })); interface AlertTestInstance { instance: AlertInstanceMock; @@ -93,6 +99,10 @@ const mockOptions = { }, }; +const setEvaluationResults = (response: Array>) => { + jest.requireMock('./lib/evaluate_rule').evaluateRule.mockImplementation(() => response); +}; + describe('The metric threshold alert type', () => { describe('querying the entire infrastructure', () => { afterAll(() => clearInstances()); @@ -112,47 +122,84 @@ describe('The metric threshold alert type', () => { ], }, }); + const setResults = ( + comparator: Comparator, + threshold: number[], + shouldFire: boolean = false, + shouldWarn: boolean = false, + isNoData: boolean = false + ) => + setEvaluationResults([ + { + '*': { + ...baseNonCountCriterion, + comparator, + threshold, + metric: 'test.metric.1', + currentValue: 1.0, + timestamp: new Date().toISOString(), + shouldFire, + shouldWarn, + isNoData, + }, + }, + ]); test('alerts as expected with the > comparator', async () => { + setResults(Comparator.GT, [0.75], true); await execute(Comparator.GT, [0.75]); expect(mostRecentAction(instanceID)).toBeAlertAction(); + setResults(Comparator.GT, [1.5], false); await execute(Comparator.GT, [1.5]); expect(mostRecentAction(instanceID)).toBe(undefined); }); test('alerts as expected with the < comparator', async () => { + setResults(Comparator.LT, [1.5], true); await execute(Comparator.LT, [1.5]); expect(mostRecentAction(instanceID)).toBeAlertAction(); + setResults(Comparator.LT, [0.75], false); await execute(Comparator.LT, [0.75]); expect(mostRecentAction(instanceID)).toBe(undefined); }); test('alerts as expected with the >= comparator', async () => { + setResults(Comparator.GT_OR_EQ, [0.75], true); await execute(Comparator.GT_OR_EQ, [0.75]); expect(mostRecentAction(instanceID)).toBeAlertAction(); + setResults(Comparator.GT_OR_EQ, [1.0], true); await execute(Comparator.GT_OR_EQ, [1.0]); expect(mostRecentAction(instanceID)).toBeAlertAction(); + setResults(Comparator.GT_OR_EQ, [1.5], false); await execute(Comparator.GT_OR_EQ, [1.5]); expect(mostRecentAction(instanceID)).toBe(undefined); }); test('alerts as expected with the <= comparator', async () => { + setResults(Comparator.LT_OR_EQ, [1.5], true); await execute(Comparator.LT_OR_EQ, [1.5]); expect(mostRecentAction(instanceID)).toBeAlertAction(); + setResults(Comparator.LT_OR_EQ, [1.0], true); await execute(Comparator.LT_OR_EQ, [1.0]); expect(mostRecentAction(instanceID)).toBeAlertAction(); + setResults(Comparator.LT_OR_EQ, [0.75], false); await execute(Comparator.LT_OR_EQ, [0.75]); expect(mostRecentAction(instanceID)).toBe(undefined); }); test('alerts as expected with the between comparator', async () => { + setResults(Comparator.BETWEEN, [0, 1.5], true); await execute(Comparator.BETWEEN, [0, 1.5]); expect(mostRecentAction(instanceID)).toBeAlertAction(); + setResults(Comparator.BETWEEN, [0, 0.75], false); await execute(Comparator.BETWEEN, [0, 0.75]); expect(mostRecentAction(instanceID)).toBe(undefined); }); test('alerts as expected with the outside range comparator', async () => { + setResults(Comparator.OUTSIDE_RANGE, [0, 0.75], true); await execute(Comparator.OUTSIDE_RANGE, [0, 0.75]); expect(mostRecentAction(instanceID)).toBeAlertAction(); + setResults(Comparator.OUTSIDE_RANGE, [0, 1.5], false); await execute(Comparator.OUTSIDE_RANGE, [0, 1.5]); expect(mostRecentAction(instanceID)).toBe(undefined); }); test('reports expected values to the action context', async () => { + setResults(Comparator.GT, [0.75], true); await execute(Comparator.GT, [0.75]); const { action } = mostRecentAction(instanceID); expect(action.group).toBe('*'); @@ -191,33 +238,206 @@ describe('The metric threshold alert type', () => { const instanceIdA = 'a'; const instanceIdB = 'b'; test('sends an alert when all groups pass the threshold', async () => { + setEvaluationResults([ + { + a: { + ...baseNonCountCriterion, + comparator: Comparator.GT, + threshold: [0.75], + metric: 'test.metric.1', + currentValue: 1.0, + timestamp: new Date().toISOString(), + shouldFire: true, + shouldWarn: false, + isNoData: false, + }, + b: { + ...baseNonCountCriterion, + comparator: Comparator.GT, + threshold: [0.75], + metric: 'test.metric.1', + currentValue: 1.0, + timestamp: new Date().toISOString(), + shouldFire: true, + shouldWarn: false, + isNoData: false, + }, + }, + ]); await execute(Comparator.GT, [0.75]); expect(mostRecentAction(instanceIdA)).toBeAlertAction(); expect(mostRecentAction(instanceIdB)).toBeAlertAction(); }); test('sends an alert when only some groups pass the threshold', async () => { + setEvaluationResults([ + { + a: { + ...baseNonCountCriterion, + comparator: Comparator.LT, + threshold: [1.5], + metric: 'test.metric.1', + currentValue: 1.0, + timestamp: new Date().toISOString(), + shouldFire: true, + shouldWarn: false, + isNoData: false, + }, + b: { + ...baseNonCountCriterion, + comparator: Comparator.LT, + threshold: [1.5], + metric: 'test.metric.1', + currentValue: 3, + timestamp: new Date().toISOString(), + shouldFire: false, + shouldWarn: false, + isNoData: false, + }, + }, + ]); await execute(Comparator.LT, [1.5]); expect(mostRecentAction(instanceIdA)).toBeAlertAction(); expect(mostRecentAction(instanceIdB)).toBe(undefined); }); test('sends no alert when no groups pass the threshold', async () => { + setEvaluationResults([ + { + a: { + ...baseNonCountCriterion, + comparator: Comparator.GT, + threshold: [5], + metric: 'test.metric.1', + currentValue: 1.0, + timestamp: new Date().toISOString(), + shouldFire: false, + shouldWarn: false, + isNoData: false, + }, + b: { + ...baseNonCountCriterion, + comparator: Comparator.GT, + threshold: [5], + metric: 'test.metric.1', + currentValue: 3, + timestamp: new Date().toISOString(), + shouldFire: false, + shouldWarn: false, + isNoData: false, + }, + }, + ]); await execute(Comparator.GT, [5]); expect(mostRecentAction(instanceIdA)).toBe(undefined); expect(mostRecentAction(instanceIdB)).toBe(undefined); }); test('reports group values to the action context', async () => { + setEvaluationResults([ + { + a: { + ...baseNonCountCriterion, + comparator: Comparator.GT, + threshold: [0.75], + metric: 'test.metric.1', + currentValue: 1.0, + timestamp: new Date().toISOString(), + shouldFire: true, + shouldWarn: false, + isNoData: false, + }, + b: { + ...baseNonCountCriterion, + comparator: Comparator.GT, + threshold: [0.75], + metric: 'test.metric.1', + currentValue: 3, + timestamp: new Date().toISOString(), + shouldFire: true, + shouldWarn: false, + isNoData: false, + }, + }, + ]); await execute(Comparator.GT, [0.75]); expect(mostRecentAction(instanceIdA).action.group).toBe('a'); expect(mostRecentAction(instanceIdB).action.group).toBe('b'); }); - test('reports previous groups and the groupBy parameter in its state', async () => { - const stateResult = await execute(Comparator.GT, [0.75]); - expect(stateResult.groups).toEqual(expect.arrayContaining(['a', 'b'])); - expect(stateResult.groupBy).toEqual(['something']); - }); test('persists previous groups that go missing, until the groupBy param changes', async () => { + setEvaluationResults([ + { + a: { + ...baseNonCountCriterion, + comparator: Comparator.GT, + threshold: [0.75], + metric: 'test.metric.2', + currentValue: 1.0, + timestamp: new Date().toISOString(), + shouldFire: true, + shouldWarn: false, + isNoData: false, + }, + b: { + ...baseNonCountCriterion, + comparator: Comparator.GT, + threshold: [0.75], + metric: 'test.metric.2', + currentValue: 3, + timestamp: new Date().toISOString(), + shouldFire: true, + shouldWarn: false, + isNoData: false, + }, + c: { + ...baseNonCountCriterion, + comparator: Comparator.GT, + threshold: [0.75], + metric: 'test.metric.2', + currentValue: 3, + timestamp: new Date().toISOString(), + shouldFire: true, + shouldWarn: false, + isNoData: false, + }, + }, + ]); const stateResult1 = await execute(Comparator.GT, [0.75], ['something'], 'test.metric.2'); - expect(stateResult1.groups).toEqual(expect.arrayContaining(['a', 'b', 'c'])); + expect(stateResult1.missingGroups).toEqual(expect.arrayContaining([])); + setEvaluationResults([ + { + a: { + ...baseNonCountCriterion, + comparator: Comparator.GT, + threshold: [0.75], + metric: 'test.metric.1', + currentValue: 1.0, + timestamp: new Date().toISOString(), + shouldFire: true, + shouldWarn: false, + isNoData: false, + }, + b: { + ...baseNonCountCriterion, + comparator: Comparator.GT, + threshold: [0.75], + metric: 'test.metric.1', + currentValue: 3, + timestamp: new Date().toISOString(), + shouldFire: true, + shouldWarn: false, + isNoData: false, + }, + c: { + ...baseNonCountCriterion, + comparator: Comparator.GT, + threshold: [0.75], + metric: 'test.metric.1', + currentValue: null, + timestamp: new Date().toISOString(), + shouldFire: false, + shouldWarn: false, + isNoData: true, + }, + }, + ]); const stateResult2 = await execute( Comparator.GT, [0.75], @@ -225,7 +445,33 @@ describe('The metric threshold alert type', () => { 'test.metric.1', stateResult1 ); - expect(stateResult2.groups).toEqual(expect.arrayContaining(['a', 'b', 'c'])); + expect(stateResult2.missingGroups).toEqual(expect.arrayContaining(['c'])); + setEvaluationResults([ + { + a: { + ...baseNonCountCriterion, + comparator: Comparator.GT, + threshold: [0.75], + metric: 'test.metric.1', + currentValue: 1.0, + timestamp: new Date().toISOString(), + shouldFire: true, + shouldWarn: false, + isNoData: false, + }, + b: { + ...baseNonCountCriterion, + comparator: Comparator.GT, + threshold: [0.75], + metric: 'test.metric.1', + currentValue: 3, + timestamp: new Date().toISOString(), + shouldFire: true, + shouldWarn: false, + isNoData: false, + }, + }, + ]); const stateResult3 = await execute( Comparator.GT, [0.75], @@ -233,7 +479,7 @@ describe('The metric threshold alert type', () => { 'test.metric.1', stateResult2 ); - expect(stateResult3.groups).toEqual(expect.arrayContaining(['a', 'b'])); + expect(stateResult3.missingGroups).toEqual(expect.arrayContaining([])); }); const executeWithFilter = ( @@ -261,13 +507,87 @@ describe('The metric threshold alert type', () => { state: state ?? mockOptions.state.wrapped, }); test('persists previous groups that go missing, until the filterQuery param changes', async () => { + setEvaluationResults([ + { + a: { + ...baseNonCountCriterion, + comparator: Comparator.GT, + threshold: [0.75], + metric: 'test.metric.2', + currentValue: 1.0, + timestamp: new Date().toISOString(), + shouldFire: true, + shouldWarn: false, + isNoData: false, + }, + b: { + ...baseNonCountCriterion, + comparator: Comparator.GT, + threshold: [0.75], + metric: 'test.metric.2', + currentValue: 3, + timestamp: new Date().toISOString(), + shouldFire: true, + shouldWarn: false, + isNoData: false, + }, + c: { + ...baseNonCountCriterion, + comparator: Comparator.GT, + threshold: [0.75], + metric: 'test.metric.2', + currentValue: 3, + timestamp: new Date().toISOString(), + shouldFire: true, + shouldWarn: false, + isNoData: false, + }, + }, + ]); const stateResult1 = await executeWithFilter( Comparator.GT, [0.75], JSON.stringify({ query: 'q' }), 'test.metric.2' ); - expect(stateResult1.groups).toEqual(expect.arrayContaining(['a', 'b', 'c'])); + expect(stateResult1.missingGroups).toEqual(expect.arrayContaining([])); + setEvaluationResults([ + { + a: { + ...baseNonCountCriterion, + comparator: Comparator.GT, + threshold: [0.75], + metric: 'test.metric.1', + currentValue: 1.0, + timestamp: new Date().toISOString(), + shouldFire: true, + shouldWarn: false, + isNoData: false, + }, + b: { + ...baseNonCountCriterion, + comparator: Comparator.GT, + threshold: [0.75], + metric: 'test.metric.1', + currentValue: 3, + timestamp: new Date().toISOString(), + shouldFire: true, + shouldWarn: false, + isNoData: false, + }, + c: { + ...baseNonCountCriterion, + comparator: Comparator.GT, + threshold: [0.75], + metric: 'test.metric.1', + currentValue: null, + timestamp: new Date().toISOString(), + shouldFire: false, + shouldWarn: false, + isNoData: true, + }, + }, + ]); const stateResult2 = await executeWithFilter( Comparator.GT, [0.75], @@ -275,7 +595,33 @@ describe('The metric threshold alert type', () => { 'test.metric.1', stateResult1 ); - expect(stateResult2.groups).toEqual(expect.arrayContaining(['a', 'b', 'c'])); + expect(stateResult2.missingGroups).toEqual(expect.arrayContaining(['c'])); + setEvaluationResults([ + { + a: { + ...baseNonCountCriterion, + comparator: Comparator.GT, + threshold: [0.75], + metric: 'test.metric.1', + currentValue: 1.0, + timestamp: new Date().toISOString(), + shouldFire: true, + shouldWarn: false, + isNoData: false, + }, + b: { + ...baseNonCountCriterion, + comparator: Comparator.GT, + threshold: [0.75], + metric: 'test.metric.1', + currentValue: 3, + timestamp: new Date().toISOString(), + shouldFire: true, + shouldWarn: false, + isNoData: false, + }, + }, + ]); const stateResult3 = await executeWithFilter( Comparator.GT, [0.75], @@ -283,7 +629,7 @@ describe('The metric threshold alert type', () => { 'test.metric.1', stateResult2 ); - expect(stateResult3.groups).toEqual(expect.arrayContaining(['a', 'b'])); + expect(stateResult3.groups).toEqual(expect.arrayContaining([])); }); }); @@ -318,16 +664,110 @@ describe('The metric threshold alert type', () => { }, }); test('sends an alert when all criteria cross the threshold', async () => { + setEvaluationResults([ + { + '*': { + ...baseNonCountCriterion, + comparator: Comparator.GT_OR_EQ, + threshold: [1.0], + metric: 'test.metric.1', + currentValue: 1.0, + timestamp: new Date().toISOString(), + shouldFire: true, + shouldWarn: false, + isNoData: false, + }, + }, + { + '*': { + ...baseNonCountCriterion, + comparator: Comparator.GT_OR_EQ, + threshold: [3.0], + metric: 'test.metric.2', + currentValue: 3.0, + timestamp: new Date().toISOString(), + shouldFire: true, + shouldWarn: false, + isNoData: false, + }, + }, + ]); const instanceID = '*'; await execute(Comparator.GT_OR_EQ, [1.0], [3.0]); expect(mostRecentAction(instanceID)).toBeAlertAction(); }); test('sends no alert when some, but not all, criteria cross the threshold', async () => { + setEvaluationResults([ + { + '*': { + ...baseNonCountCriterion, + comparator: Comparator.LT_OR_EQ, + threshold: [1.0], + metric: 'test.metric.1', + currentValue: 1.0, + timestamp: new Date().toISOString(), + shouldFire: true, + shouldWarn: false, + isNoData: false, + }, + }, + {}, + ]); const instanceID = '*'; await execute(Comparator.LT_OR_EQ, [1.0], [2.5]); expect(mostRecentAction(instanceID)).toBe(undefined); }); test('alerts only on groups that meet all criteria when querying with a groupBy parameter', async () => { + setEvaluationResults([ + { + a: { + ...baseNonCountCriterion, + comparator: Comparator.GT_OR_EQ, + threshold: [1.0], + metric: 'test.metric.1', + currentValue: 1.0, + timestamp: new Date().toISOString(), + shouldFire: true, + shouldWarn: false, + isNoData: false, + }, + b: { + ...baseNonCountCriterion, + comparator: Comparator.GT_OR_EQ, + threshold: [1.0], + metric: 'test.metric.1', + currentValue: 3.0, + timestamp: new Date().toISOString(), + shouldFire: true, + shouldWarn: false, + isNoData: false, + }, + }, + { + a: { + ...baseNonCountCriterion, + comparator: Comparator.GT_OR_EQ, + threshold: [3.0], + metric: 'test.metric.2', + currentValue: 3.0, + timestamp: new Date().toISOString(), + shouldFire: true, + shouldWarn: false, + isNoData: false, + }, + b: { + ...baseNonCountCriterion, + comparator: Comparator.GT_OR_EQ, + threshold: [3.0], + metric: 'test.metric.2', + currentValue: 1.0, + timestamp: new Date().toISOString(), + shouldFire: false, + shouldWarn: false, + isNoData: false, + }, + }, + ]); const instanceIdA = 'a'; const instanceIdB = 'b'; await execute(Comparator.GT_OR_EQ, [1.0], [3.0], 'something'); @@ -335,6 +775,34 @@ describe('The metric threshold alert type', () => { expect(mostRecentAction(instanceIdB)).toBe(undefined); }); test('sends all criteria to the action context', async () => { + setEvaluationResults([ + { + '*': { + ...baseNonCountCriterion, + comparator: Comparator.GT_OR_EQ, + threshold: [1.0], + metric: 'test.metric.1', + currentValue: 1.0, + timestamp: new Date().toISOString(), + shouldFire: true, + shouldWarn: false, + isNoData: false, + }, + }, + { + '*': { + ...baseNonCountCriterion, + comparator: Comparator.GT_OR_EQ, + threshold: [3.0], + metric: 'test.metric.2', + currentValue: 3.0, + timestamp: new Date().toISOString(), + shouldFire: true, + shouldWarn: false, + isNoData: false, + }, + }, + ]); const instanceID = '*'; await execute(Comparator.GT_OR_EQ, [1.0], [3.0]); const { action } = mostRecentAction(instanceID); @@ -371,8 +839,38 @@ describe('The metric threshold alert type', () => { }, }); test('alerts based on the doc_count value instead of the aggregatedValue', async () => { + setEvaluationResults([ + { + '*': { + ...baseCountCriterion, + comparator: Comparator.GT, + threshold: [0.9], + metric: 'count', + currentValue: 1, + timestamp: new Date().toISOString(), + shouldFire: true, + shouldWarn: false, + isNoData: false, + }, + }, + ]); await execute(Comparator.GT, [0.9]); expect(mostRecentAction(instanceID)).toBeAlertAction(); + setEvaluationResults([ + { + '*': { + ...baseCountCriterion, + comparator: Comparator.LT, + threshold: [0.5], + metric: 'count', + currentValue: 1, + timestamp: new Date().toISOString(), + shouldFire: false, + shouldWarn: false, + isNoData: false, + }, + }, + ]); await execute(Comparator.LT, [0.5]); expect(mostRecentAction(instanceID)).toBe(undefined); }); @@ -403,15 +901,64 @@ describe('The metric threshold alert type', () => { const instanceIdB = 'b'; test('successfully detects and alerts on a document count of 0', async () => { + setEvaluationResults([ + { + a: { + ...baseCountCriterion, + comparator: Comparator.LT_OR_EQ, + threshold: [0], + metric: 'count', + currentValue: 1, + timestamp: new Date().toISOString(), + shouldFire: false, + shouldWarn: false, + isNoData: false, + }, + b: { + ...baseCountCriterion, + comparator: Comparator.LT_OR_EQ, + threshold: [0], + metric: 'count', + currentValue: 1, + timestamp: new Date().toISOString(), + shouldFire: false, + shouldWarn: false, + isNoData: false, + }, + }, + ]); const resultState = await executeGroupBy(Comparator.LT_OR_EQ, [0]); expect(mostRecentAction(instanceIdA)).toBe(undefined); expect(mostRecentAction(instanceIdB)).toBe(undefined); + setEvaluationResults([ + { + a: { + ...baseCountCriterion, + comparator: Comparator.LT_OR_EQ, + threshold: [0], + metric: 'count', + currentValue: 0, + timestamp: new Date().toISOString(), + shouldFire: true, + shouldWarn: false, + isNoData: false, + }, + b: { + ...baseCountCriterion, + comparator: Comparator.LT_OR_EQ, + threshold: [0], + metric: 'count', + currentValue: 0, + timestamp: new Date().toISOString(), + shouldFire: true, + shouldWarn: false, + isNoData: false, + }, + }, + ]); await executeGroupBy(Comparator.LT_OR_EQ, [0], 'empty-response', resultState); expect(mostRecentAction(instanceIdA)).toBeAlertAction(); expect(mostRecentAction(instanceIdB)).toBeAlertAction(); - await executeGroupBy(Comparator.LT_OR_EQ, [0]); - expect(mostRecentAction(instanceIdA)).toBe(undefined); - expect(mostRecentAction(instanceIdB)).toBe(undefined); }); }); }); @@ -435,8 +982,38 @@ describe('The metric threshold alert type', () => { }, }); test('alerts based on the p99 values', async () => { + setEvaluationResults([ + { + '*': { + ...baseNonCountCriterion, + comparator: Comparator.GT, + threshold: [1], + metric: 'test.metric.2', + currentValue: 3, + timestamp: new Date().toISOString(), + shouldFire: true, + shouldWarn: false, + isNoData: false, + }, + }, + ]); await execute(Comparator.GT, [1]); expect(mostRecentAction(instanceID)).toBeAlertAction(); + setEvaluationResults([ + { + '*': { + ...baseNonCountCriterion, + comparator: Comparator.LT, + threshold: [1], + metric: 'test.metric.2', + currentValue: 3, + timestamp: new Date().toISOString(), + shouldFire: false, + shouldWarn: false, + isNoData: false, + }, + }, + ]); await execute(Comparator.LT, [1]); expect(mostRecentAction(instanceID)).toBe(undefined); }); @@ -462,8 +1039,38 @@ describe('The metric threshold alert type', () => { }, }); test('alerts based on the p95 values', async () => { + setEvaluationResults([ + { + '*': { + ...baseNonCountCriterion, + comparator: Comparator.GT, + threshold: [0.25], + metric: 'test.metric.1', + currentValue: 1.0, + timestamp: new Date().toISOString(), + shouldFire: true, + shouldWarn: false, + isNoData: false, + }, + }, + ]); await execute(Comparator.GT, [0.25]); expect(mostRecentAction(instanceID)).toBeAlertAction(); + setEvaluationResults([ + { + '*': { + ...baseNonCountCriterion, + comparator: Comparator.LT, + threshold: [0.95], + metric: 'test.metric.1', + currentValue: 1.0, + timestamp: new Date().toISOString(), + shouldFire: false, + shouldWarn: false, + isNoData: false, + }, + }, + ]); await execute(Comparator.LT, [0.95]); expect(mostRecentAction(instanceID)).toBe(undefined); }); @@ -489,10 +1096,40 @@ describe('The metric threshold alert type', () => { }, }); test('sends a No Data alert when configured to do so', async () => { + setEvaluationResults([ + { + '*': { + ...baseNonCountCriterion, + comparator: Comparator.LT, + threshold: [1], + metric: 'test.metric.3', + currentValue: null, + timestamp: new Date().toISOString(), + shouldFire: false, + shouldWarn: false, + isNoData: true, + }, + }, + ]); await execute(true); expect(mostRecentAction(instanceID)).toBeNoDataAction(); }); test('does not send a No Data alert when not configured to do so', async () => { + setEvaluationResults([ + { + '*': { + ...baseNonCountCriterion, + comparator: Comparator.LT, + threshold: [1], + metric: 'test.metric.3', + currentValue: null, + timestamp: new Date().toISOString(), + shouldFire: false, + shouldWarn: false, + isNoData: true, + }, + }, + ]); await execute(false); expect(mostRecentAction(instanceID)).toBe(undefined); }); @@ -535,10 +1172,66 @@ describe('The metric threshold alert type', () => { const interTestStateStorage: any[] = []; test('first sends a No Data alert with the * group, but then reports groups when data is available', async () => { + setEvaluationResults([ + { + '*': { + ...baseNonCountCriterion, + comparator: Comparator.GT, + threshold: [0], + metric: 'test.metric.3', + currentValue: null, + timestamp: new Date().toISOString(), + shouldFire: false, + shouldWarn: false, + isNoData: true, + }, + }, + ]); let resultState = await executeEmptyResponse(); expect(mostRecentAction(instanceID)).toBeNoDataAction(); + setEvaluationResults([ + { + '*': { + ...baseNonCountCriterion, + comparator: Comparator.GT, + threshold: [0], + metric: 'test.metric.3', + currentValue: null, + timestamp: new Date().toISOString(), + shouldFire: false, + shouldWarn: false, + isNoData: true, + }, + }, + ]); resultState = await executeEmptyResponse(true, resultState); expect(mostRecentAction(instanceID)).toBeNoDataAction(); + setEvaluationResults([ + { + a: { + ...baseNonCountCriterion, + comparator: Comparator.GT, + threshold: [0], + metric: 'test.metric.1', + currentValue: 1.0, + timestamp: new Date().toISOString(), + shouldFire: true, + shouldWarn: false, + isNoData: false, + }, + b: { + ...baseNonCountCriterion, + comparator: Comparator.GT, + threshold: [0], + metric: 'test.metric.1', + currentValue: 3, + timestamp: new Date().toISOString(), + shouldFire: true, + shouldWarn: false, + isNoData: false, + }, + }, + ]); resultState = await execute2GroupsABResponse(true, resultState); expect(mostRecentAction(instanceID)).toBe(undefined); expect(mostRecentAction(instanceIdA)).toBeAlertAction(); @@ -549,17 +1242,106 @@ describe('The metric threshold alert type', () => { // Pop a previous execution result instead of defining it manually // The type signature of alert executor states are complex const resultState = interTestStateStorage.pop(); + setEvaluationResults([ + { + a: { + ...baseNonCountCriterion, + comparator: Comparator.GT, + threshold: [0], + metric: 'test.metric.3', + currentValue: null, + timestamp: new Date().toISOString(), + shouldFire: false, + shouldWarn: false, + isNoData: true, + }, + b: { + ...baseNonCountCriterion, + comparator: Comparator.GT, + threshold: [0], + metric: 'test.metric.3', + currentValue: null, + timestamp: new Date().toISOString(), + shouldFire: false, + shouldWarn: false, + isNoData: true, + }, + }, + ]); await executeEmptyResponse(true, resultState); expect(mostRecentAction(instanceID)).toBe(undefined); expect(mostRecentAction(instanceIdA)).toBeNoDataAction(); expect(mostRecentAction(instanceIdB)).toBeNoDataAction(); }); test('does not send individual No Data alerts when groups disappear if alertOnGroupDisappear is disabled', async () => { + setEvaluationResults([ + { + a: { + ...baseNonCountCriterion, + comparator: Comparator.GT, + threshold: [0], + metric: 'test.metric.2', + currentValue: 3, + timestamp: new Date().toISOString(), + shouldFire: true, + shouldWarn: false, + isNoData: false, + }, + b: { + ...baseNonCountCriterion, + comparator: Comparator.GT, + threshold: [0], + metric: 'test.metric.2', + currentValue: 1, + timestamp: new Date().toISOString(), + shouldFire: true, + shouldWarn: false, + isNoData: false, + }, + c: { + ...baseNonCountCriterion, + comparator: Comparator.GT, + threshold: [0], + metric: 'test.metric.2', + currentValue: 3, + timestamp: new Date().toISOString(), + shouldFire: true, + shouldWarn: false, + isNoData: false, + }, + }, + ]); const resultState = await execute3GroupsABCResponse(false); expect(mostRecentAction(instanceID)).toBe(undefined); expect(mostRecentAction(instanceIdA)).toBeAlertAction(); expect(mostRecentAction(instanceIdB)).toBeAlertAction(); expect(mostRecentAction(instanceIdC)).toBeAlertAction(); + setEvaluationResults([ + { + a: { + ...baseNonCountCriterion, + comparator: Comparator.GT, + threshold: [0], + metric: 'test.metric.1', + currentValue: 1, + timestamp: new Date().toISOString(), + shouldFire: true, + shouldWarn: false, + isNoData: false, + }, + b: { + ...baseNonCountCriterion, + comparator: Comparator.GT, + threshold: [0], + metric: 'test.metric.1', + currentValue: 3, + timestamp: new Date().toISOString(), + shouldFire: true, + shouldWarn: false, + isNoData: false, + }, + }, + ]); await execute2GroupsABResponse(false, resultState); expect(mostRecentAction(instanceID)).toBe(undefined); expect(mostRecentAction(instanceIdA)).toBeAlertAction(); @@ -595,10 +1377,66 @@ describe('The metric threshold alert type', () => { executeWeirdNoDataConfig('test.metric.1', ...args); test('does not send a No Data alert with the * group, but then reports groups when data is available', async () => { + setEvaluationResults([ + { + '*': { + ...baseNonCountCriterion, + comparator: Comparator.GT, + threshold: [0], + metric: 'test.metric.3', + currentValue: null, + timestamp: new Date().toISOString(), + shouldFire: false, + shouldWarn: false, + isNoData: true, + }, + }, + ]); let resultState = await executeWeirdEmptyResponse(); expect(mostRecentAction(instanceID)).toBe(undefined); + setEvaluationResults([ + { + '*': { + ...baseNonCountCriterion, + comparator: Comparator.GT, + threshold: [0], + metric: 'test.metric.3', + currentValue: null, + timestamp: new Date().toISOString(), + shouldFire: false, + shouldWarn: false, + isNoData: true, + }, + }, + ]); resultState = await executeWeirdEmptyResponse(resultState); expect(mostRecentAction(instanceID)).toBe(undefined); + setEvaluationResults([ + { + a: { + ...baseNonCountCriterion, + comparator: Comparator.GT, + threshold: [0], + metric: 'test.metric.1', + currentValue: 1, + timestamp: new Date().toISOString(), + shouldFire: true, + shouldWarn: false, + isNoData: false, + }, + b: { + ...baseNonCountCriterion, + comparator: Comparator.GT, + threshold: [0], + metric: 'test.metric.1', + currentValue: 3, + timestamp: new Date().toISOString(), + shouldFire: true, + shouldWarn: false, + isNoData: false, + }, + }, + ]); resultState = await executeWeird2GroupsABResponse(resultState); expect(mostRecentAction(instanceID)).toBe(undefined); expect(mostRecentAction(instanceIdA)).toBeAlertAction(); @@ -607,120 +1445,37 @@ describe('The metric threshold alert type', () => { }); test('sends No Data alerts for the previously detected groups when they stop reporting data, but not the * group', async () => { const resultState = interTestStateStorage.pop(); // Import the resultState from the previous test - await executeWeirdEmptyResponse(resultState); - expect(mostRecentAction(instanceID)).toBe(undefined); - expect(mostRecentAction(instanceIdA)).toBeNoDataAction(); - expect(mostRecentAction(instanceIdB)).toBeNoDataAction(); - }); - }); - }); - - describe("querying a rate-aggregated metric that hasn't reported data", () => { - afterAll(() => clearInstances()); - const instanceID = '*'; - const execute = (sourceId: string = 'default') => - executor({ - ...mockOptions, - services, - params: { - criteria: [ - { + setEvaluationResults([ + { + a: { ...baseNonCountCriterion, comparator: Comparator.GT, - threshold: [1], + threshold: [0], metric: 'test.metric.3', - aggType: Aggregators.RATE, - }, - ], - alertOnNoData: true, - }, - }); - test('sends a No Data alert', async () => { - await execute(); - expect(mostRecentAction(instanceID)).toBeNoDataAction(); - }); - }); - - /* - * Custom recovery actions aren't yet available in the alerting framework - * Uncomment the code below once they've been implemented - * Reference: https://github.com/elastic/kibana/issues/87048 - */ - - /* - describe('querying a metric that later recovers', () => { - const instanceID = '*'; - const execute = (threshold: number[]) => - executor({ - ...mockOptions, - services, - params: { - criteria: [ - { - ...baseNonCountCriterion, - comparator: Comparator.GT, - threshold, + currentValue: null, + timestamp: new Date().toISOString(), + shouldFire: false, + shouldWarn: false, + isNoData: true, }, - ], - }, - }); - beforeAll(() => (persistAlertInstances = true)); - afterAll(() => (persistAlertInstances = false)); - - test('sends a recovery alert as soon as the metric recovers', async () => { - await execute([0.5]); - expect(mostRecentAction(instanceID)).toBeAlertAction(); - expect(getState(instanceID).alertState).toBe(AlertStates.ALERT); - await execute([2]); - expect(mostRecentAction(instanceID).id).toBe(RecoveredActionGroup.id); - expect(getState(instanceID).alertState).toBe(AlertStates.OK); - }); - test('does not continue to send a recovery alert if the metric is still OK', async () => { - await execute([2]); - expect(mostRecentAction(instanceID)).toBe(undefined); - expect(getState(instanceID).alertState).toBe(AlertStates.OK); - await execute([2]); - expect(mostRecentAction(instanceID)).toBe(undefined); - expect(getState(instanceID).alertState).toBe(AlertStates.OK); - }); - test('sends a recovery alert again once the metric alerts and recovers again', async () => { - await execute([0.5]); - expect(mostRecentAction(instanceID)).toBeAlertAction(); - expect(getState(instanceID).alertState).toBe(AlertStates.ALERT); - await execute([2]); - expect(mostRecentAction(instanceID).id).toBe(RecoveredActionGroup.id); - expect(getState(instanceID).alertState).toBe(AlertStates.OK); - }); - }); - */ - - describe('querying a metric with a percentage metric', () => { - afterAll(() => clearInstances()); - const instanceID = '*'; - const execute = () => - executor({ - ...mockOptions, - services, - params: { - sourceId: 'default', - criteria: [ - { + b: { ...baseNonCountCriterion, - metric: 'test.metric.pct', comparator: Comparator.GT, - threshold: [0.75], + threshold: [0], + metric: 'test.metric.3', + currentValue: null, + timestamp: new Date().toISOString(), + shouldFire: false, + shouldWarn: false, + isNoData: true, }, - ], - }, + }, + ]); + await executeWeirdEmptyResponse(resultState); + expect(mostRecentAction(instanceID)).toBe(undefined); + expect(mostRecentAction(instanceIdA)).toBeNoDataAction(); + expect(mostRecentAction(instanceIdB)).toBeNoDataAction(); }); - test('reports values converted from decimals to percentages to the action context', async () => { - await execute(); - const { action } = mostRecentAction(instanceID); - expect(action.group).toBe('*'); - expect(action.reason).toContain('is 100%'); - expect(action.reason).toContain('Alert when > 75%'); - expect(action.threshold.condition0[0]).toBe('75%'); - expect(action.value.condition0).toBe('100%'); }); }); @@ -773,6 +1528,10 @@ const mockLibs: any = { metricsRules: { createLifecycleRuleExecutor: createLifecycleRuleExecutorMock, }, + basePath: { + publicBaseUrl: 'http://localhost:5601', + prepend: (path: string) => path, + }, }; const executor = createMetricThresholdExecutor(mockLibs); @@ -783,39 +1542,6 @@ const services: RuleExecutorServicesMock & ...alertsServices, ...ruleRegistryMocks.createLifecycleAlertServices(alertsServices), }; - -services.scopedClusterClient.asCurrentUser.search.mockResponseImplementation( - (params?: any): any => { - const from = params?.body.query.bool.filter[0]?.range['@timestamp'].gte; - - if (params.index === 'alternatebeat-*') return { body: mocks.changedSourceIdResponse(from) }; - - if (params.index === 'empty-response') return { body: mocks.emptyMetricResponse }; - - const metric = params?.body.query.bool.filter[1]?.exists.field; - if (metric === 'test.metric.3') { - return { - body: params?.body.aggs.aggregatedIntervals?.aggregations.aggregatedValueMax - ? mocks.emptyRateResponse - : mocks.emptyMetricResponse, - }; - } - if (params?.body.aggs.groupings) { - if (params?.body.aggs.groupings.composite.after) { - return { body: mocks.compositeEndResponse }; - } - if (metric === 'test.metric.2') { - return { body: mocks.alternateCompositeResponse(from) }; - } - return { body: mocks.basicCompositeResponse(from) }; - } - if (metric === 'test.metric.2') { - return { body: mocks.alternateMetricResponse() }; - } - return { body: mocks.basicMetricResponse() }; - } -); - services.savedObjectsClient.get.mockImplementation(async (type: string, sourceId: string) => { if (sourceId === 'alternate') return { @@ -882,7 +1608,7 @@ expect.extend({ }; }, toBeNoDataAction(action?: Action) { - const pass = action?.id === FIRED_ACTIONS.id && action?.action.alertState === 'NO DATA'; + const pass = action?.id === NO_DATA_ACTIONS.id && action?.action.alertState === 'NO DATA'; const message = () => `expected ${action} to be a NO DATA action`; return { message, diff --git a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/metric_threshold_executor.ts b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/metric_threshold_executor.ts index c69595ee652072..64d85ea582368d 100644 --- a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/metric_threshold_executor.ts +++ b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/metric_threshold_executor.ts @@ -7,7 +7,7 @@ import { i18n } from '@kbn/i18n'; import { ALERT_REASON } from '@kbn/rule-data-utils'; -import { first, isEqual, last } from 'lodash'; +import { isEqual } from 'lodash'; import { ActionGroupIdsOf, AlertInstanceContext as AlertContext, @@ -20,7 +20,6 @@ import { AlertStates, Comparator } from '../../../../common/alerting/metrics'; import { createFormatter } from '../../../../common/formatters'; import { InfraBackendLibs } from '../../infra_types'; import { - buildErrorAlertReason, buildFiredAlertReason, buildInvalidQueryAlertReason, buildNoDataAlertReason, @@ -34,7 +33,8 @@ import { EvaluatedRuleParams, evaluateRule } from './lib/evaluate_rule'; export type MetricThresholdRuleParams = Record; export type MetricThresholdRuleTypeState = RuleTypeState & { - groups: string[]; + lastRunTimestamp?: number; + missingGroups?: string[]; groupBy?: string | string[]; filterQuery?: string; }; @@ -106,7 +106,12 @@ export const createMetricThresholdExecutor = (libs: InfraBackendLibs) => value: null, metric: mapToConditionsLookup(criteria, (c) => c.metric), }); - return { groups: [], groupBy: params.groupBy, filterQuery: params.filterQuery }; + return { + lastRunTimestamp: startedAt.valueOf(), + missingGroups: [], + groupBy: params.groupBy, + filterQuery: params.filterQuery, + }; } } @@ -120,49 +125,46 @@ export const createMetricThresholdExecutor = (libs: InfraBackendLibs) => const config = source.configuration; const compositeSize = libs.configuration.alerting.metric_threshold.group_by_page_size; - const previousGroupBy = state.groupBy; - const previousFilterQuery = state.filterQuery; - const prevGroups = - alertOnGroupDisappear && - isEqual(previousGroupBy, params.groupBy) && - isEqual(previousFilterQuery, params.filterQuery) - ? // Filter out the * key from the previous groups, only include it if it's one of - // the current groups. In case of a groupBy alert that starts out with no data and no - // groups, we don't want to persist the existence of the * alert instance - state.groups?.filter((g) => g !== UNGROUPED_FACTORY_KEY) ?? [] - : []; + const filterQueryIsSame = isEqual(state.filterQuery, params.filterQuery); + const groupByIsSame = isEqual(state.groupBy, params.groupBy); + const previousMissingGroups = + alertOnGroupDisappear && filterQueryIsSame && groupByIsSame ? state.missingGroups : []; const alertResults = await evaluateRule( services.scopedClusterClient.asCurrentUser, params as EvaluatedRuleParams, config, - prevGroups, compositeSize, - { end: startedAt.valueOf() } + alertOnGroupDisappear, + state.lastRunTimestamp, + { end: startedAt.valueOf() }, + previousMissingGroups ); - // Because each alert result has the same group definitions, just grab the groups from the first one. - const resultGroups = Object.keys(first(alertResults)!); - // Merge the list of currently fetched groups and previous groups, and uniquify them. This is necessary for reporting - // no data results on groups that get removed - const groups = [...new Set([...prevGroups, ...resultGroups])]; + const resultGroupSet = new Set(); + for (const resultSet of alertResults) { + for (const group of Object.keys(resultSet)) { + resultGroupSet.add(group); + } + } + const groups = [...resultGroupSet]; + const nextMissingGroups = new Set(); const hasGroups = !isEqual(groups, [UNGROUPED_FACTORY_KEY]); + for (const group of groups) { // AND logic; all criteria must be across the threshold - const shouldAlertFire = alertResults.every((result) => - // Grab the result of the most recent bucket - last(result[group].shouldFire) - ); - const shouldAlertWarn = alertResults.every((result) => last(result[group].shouldWarn)); + const shouldAlertFire = alertResults.every((result) => result[group]?.shouldFire); + const shouldAlertWarn = alertResults.every((result) => result[group]?.shouldWarn); // AND logic; because we need to evaluate all criteria, if one of them reports no data then the // whole alert is in a No Data/Error state - const isNoData = alertResults.some((result) => last(result[group].isNoData)); - const isError = alertResults.some((result) => result[group].isError); + const isNoData = alertResults.some((result) => result[group]?.isNoData); - const nextState = isError - ? AlertStates.ERROR - : isNoData + if (isNoData && group !== UNGROUPED_FACTORY_KEY) { + nextMissingGroups.add(group); + } + + const nextState = isNoData ? AlertStates.NO_DATA : shouldAlertFire ? AlertStates.ALERT @@ -180,15 +182,6 @@ export const createMetricThresholdExecutor = (libs: InfraBackendLibs) => }) ) .join('\n'); - /* - * Custom recovery actions aren't yet available in the alerting framework - * Uncomment the code below once they've been implemented - * Reference: https://github.com/elastic/kibana/issues/87048 - */ - // } else if (nextState === AlertStates.OK && prevState?.alertState === AlertStates.ALERT) { - // reason = alertResults - // .map((result) => buildRecoveredAlertReason(formatAlertResult(result[group]))) - // .join('\n'); } /* NO DATA STATE HANDLING @@ -215,20 +208,16 @@ export const createMetricThresholdExecutor = (libs: InfraBackendLibs) => .filter((result) => result[group].isNoData) .map((result) => buildNoDataAlertReason({ ...result[group], group })) .join('\n'); - } else if (nextState === AlertStates.ERROR) { - reason = alertResults - .filter((result) => result[group].isError) - .map((result) => buildErrorAlertReason(result[group].metric)) - .join('\n'); } } if (reason) { - const firstResult = first(alertResults); - const timestamp = (firstResult && firstResult[group].timestamp) ?? startedAt.toISOString(); + const timestamp = startedAt.toISOString(); const actionGroupId = nextState === AlertStates.OK ? RecoveredActionGroup.id + : nextState === AlertStates.NO_DATA + ? NO_DATA_ACTIONS.id : nextState === AlertStates.WARNING ? WARNING_ACTIONS.id : FIRED_ACTIONS.id; @@ -251,7 +240,12 @@ export const createMetricThresholdExecutor = (libs: InfraBackendLibs) => }); } } - return { groups, groupBy: params.groupBy, filterQuery: params.filterQuery }; + return { + lastRunTimestamp: startedAt.valueOf(), + missingGroups: [...nextMissingGroups], + groupBy: params.groupBy, + filterQuery: params.filterQuery, + }; }); export const FIRED_ACTIONS = { @@ -268,6 +262,13 @@ export const WARNING_ACTIONS = { }), }; +export const NO_DATA_ACTIONS = { + id: 'metrics.threshold.nodata', + name: i18n.translate('xpack.infra.metrics.alerting.threshold.nodata', { + defaultMessage: 'No Data', + }), +}; + const mapToConditionsLookup = ( list: any[], mapFn: (value: any, index: number, array: any[]) => unknown @@ -282,7 +283,7 @@ const mapToConditionsLookup = ( const formatAlertResult = ( alertResult: { metric: string; - currentValue: number; + currentValue: number | null; threshold: number[]; comparator: Comparator; warningThreshold?: number[]; diff --git a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/register_metric_threshold_rule_type.ts b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/register_metric_threshold_rule_type.ts index e414214c601041..be5793845bf439 100644 --- a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/register_metric_threshold_rule_type.ts +++ b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/register_metric_threshold_rule_type.ts @@ -27,10 +27,11 @@ import { createMetricThresholdExecutor, FIRED_ACTIONS, WARNING_ACTIONS, + NO_DATA_ACTIONS, } from './metric_threshold_executor'; type MetricThresholdAllowedActionGroups = ActionGroupIdsOf< - typeof FIRED_ACTIONS | typeof WARNING_ACTIONS + typeof FIRED_ACTIONS | typeof WARNING_ACTIONS | typeof NO_DATA_ACTIONS >; export type MetricThresholdAlertType = Omit & { ActionGroupIdsOf: MetricThresholdAllowedActionGroups; @@ -79,13 +80,12 @@ export async function registerMetricThresholdRuleType( sourceId: schema.string(), alertOnNoData: schema.maybe(schema.boolean()), alertOnGroupDisappear: schema.maybe(schema.boolean()), - shouldDropPartialBuckets: schema.maybe(schema.boolean()), }, { unknowns: 'allow' } ), }, defaultActionGroupId: FIRED_ACTIONS.id, - actionGroups: [FIRED_ACTIONS, WARNING_ACTIONS], + actionGroups: [FIRED_ACTIONS, WARNING_ACTIONS, NO_DATA_ACTIONS], minimumLicenseRequired: 'basic', isExportable: true, executor: createMetricThresholdExecutor(libs), diff --git a/x-pack/plugins/translations/translations/fr-FR.json b/x-pack/plugins/translations/translations/fr-FR.json index 7232ee8c63f424..1a56be4797a4f4 100644 --- a/x-pack/plugins/translations/translations/fr-FR.json +++ b/x-pack/plugins/translations/translations/fr-FR.json @@ -15331,8 +15331,6 @@ "xpack.infra.metrics.alertFlyout.createAlertPerHelpText": "Créer une alerte pour chaque valeur unique. Par exemple : \"host.id\" ou \"cloud.region\".", "xpack.infra.metrics.alertFlyout.createAlertPerText": "Regrouper les alertes par (facultatif)", "xpack.infra.metrics.alertFlyout.criticalThreshold": "Alerte", - "xpack.infra.metrics.alertFlyout.docCountNoDataDisabledHelpText": "[Ce paramètre n’est pas applicable à l’agrégateur du nombre de documents.]", - "xpack.infra.metrics.alertFlyout.dropPartialBucketsHelpText": "Activez cette option pour abandonner le groupe de données d'évaluation le plus récent s'il fait moins de {timeSize}{timeUnit}.", "xpack.infra.metrics.alertFlyout.error.aggregationRequired": "L'agrégation est requise.", "xpack.infra.metrics.alertFlyout.error.customMetricFieldRequired": "Champ obligatoire.", "xpack.infra.metrics.alertFlyout.error.invalidFilterQuery": "La requête de filtre n'est pas valide.", @@ -15363,7 +15361,6 @@ "xpack.infra.metrics.alertFlyout.outsideRangeLabel": "N'est pas entre", "xpack.infra.metrics.alertFlyout.removeCondition": "Retirer la condition", "xpack.infra.metrics.alertFlyout.removeWarningThreshold": "Retirer le seuil d'avertissement", - "xpack.infra.metrics.alertFlyout.shouldDropPartialBuckets": "Abandonner les groupes partiels lors de l'évaluation des données", "xpack.infra.metrics.alertFlyout.warningThreshold": "Avertissement", "xpack.infra.metrics.alerting.alertStateActionVariableDescription": "État actuel de l'alerte", "xpack.infra.metrics.alerting.anomaly.defaultActionMessage": "\\{\\{alertName\\}\\} est à l'état \\{\\{context.alertState\\}\\}\n\n\\{\\{context.metric\\}\\} était \\{\\{context.summary\\}\\} que la normale à \\{\\{context.timestamp\\}\\}\n\nValeur typique : \\{\\{context.typical\\}\\}\nValeur réelle : \\{\\{context.actual\\}\\}\n", diff --git a/x-pack/plugins/translations/translations/ja-JP.json b/x-pack/plugins/translations/translations/ja-JP.json index 003a20771c1cb4..18a776b87acb2f 100644 --- a/x-pack/plugins/translations/translations/ja-JP.json +++ b/x-pack/plugins/translations/translations/ja-JP.json @@ -15451,7 +15451,6 @@ "xpack.infra.metrics.alertFlyout.createAlertPerText": "アラートのグループ化条件(オプション)", "xpack.infra.metrics.alertFlyout.criticalThreshold": "アラート", "xpack.infra.metrics.alertFlyout.docCountNoDataDisabledHelpText": "[この設定は、ドキュメントカウントアグリゲーターには適用されません。]", - "xpack.infra.metrics.alertFlyout.dropPartialBucketsHelpText": "これを有効にすると、{timeSize}{timeUnit}未満の場合は、評価データの最新のバケットを破棄します。", "xpack.infra.metrics.alertFlyout.error.aggregationRequired": "集約が必要です。", "xpack.infra.metrics.alertFlyout.error.customMetricFieldRequired": "フィールドが必要です。", "xpack.infra.metrics.alertFlyout.error.invalidFilterQuery": "フィルタークエリは無効です。", @@ -15482,7 +15481,6 @@ "xpack.infra.metrics.alertFlyout.outsideRangeLabel": "is not between", "xpack.infra.metrics.alertFlyout.removeCondition": "条件を削除", "xpack.infra.metrics.alertFlyout.removeWarningThreshold": "warningThresholdを削除", - "xpack.infra.metrics.alertFlyout.shouldDropPartialBuckets": "データを評価するときに部分バケットを破棄", "xpack.infra.metrics.alertFlyout.warningThreshold": "警告", "xpack.infra.metrics.alerting.alertStateActionVariableDescription": "現在のアラートの状態", "xpack.infra.metrics.alerting.anomaly.defaultActionMessage": "\\{\\{alertName\\}\\}は\\{\\{context.alertState\\}\\}の状態です\n\n\\{\\{context.metric\\}\\}は\\{\\{context.timestamp\\}\\}で標準を超える\\{\\{context.summary\\}\\}でした\n\n標準の値:\\{\\{context.typical\\}\\}\n実際の値:\\{\\{context.actual\\}\\}\n", diff --git a/x-pack/plugins/translations/translations/zh-CN.json b/x-pack/plugins/translations/translations/zh-CN.json index e86c12888a520c..8e2254d1cf80ab 100644 --- a/x-pack/plugins/translations/translations/zh-CN.json +++ b/x-pack/plugins/translations/translations/zh-CN.json @@ -15477,7 +15477,6 @@ "xpack.infra.metrics.alertFlyout.createAlertPerText": "告警分组依据(可选)", "xpack.infra.metrics.alertFlyout.criticalThreshold": "告警", "xpack.infra.metrics.alertFlyout.docCountNoDataDisabledHelpText": "[此设置不适用于文档计数聚合器。]", - "xpack.infra.metrics.alertFlyout.dropPartialBucketsHelpText": "启用此选项后,最近的评估数据存储桶小于 {timeSize}{timeUnit} 时将会被丢弃。", "xpack.infra.metrics.alertFlyout.error.aggregationRequired": "“聚合”必填。", "xpack.infra.metrics.alertFlyout.error.customMetricFieldRequired": "“字段”必填。", "xpack.infra.metrics.alertFlyout.error.invalidFilterQuery": "筛选查询无效。", @@ -15508,7 +15507,6 @@ "xpack.infra.metrics.alertFlyout.outsideRangeLabel": "不介于", "xpack.infra.metrics.alertFlyout.removeCondition": "删除条件", "xpack.infra.metrics.alertFlyout.removeWarningThreshold": "移除警告阈值", - "xpack.infra.metrics.alertFlyout.shouldDropPartialBuckets": "评估数据时丢弃部分存储桶", "xpack.infra.metrics.alertFlyout.warningThreshold": "警告", "xpack.infra.metrics.alerting.alertStateActionVariableDescription": "告警的当前状态", "xpack.infra.metrics.alerting.anomaly.defaultActionMessage": "\\{\\{alertName\\}\\} 处于 \\{\\{context.alertState\\}\\} 状态\n\n\\{\\{context.metric\\}\\} 在 \\{\\{context.timestamp\\}\\}比正常\\{\\{context.summary\\}\\}\n\n典型值:\\{\\{context.typical\\}\\}\n实际值:\\{\\{context.actual\\}\\}\n", diff --git a/x-pack/test/api_integration/apis/metrics_ui/metric_threshold_alert.ts b/x-pack/test/api_integration/apis/metrics_ui/metric_threshold_alert.ts index 9c9d4bd096a639..2e83e533b5c8f7 100644 --- a/x-pack/test/api_integration/apis/metrics_ui/metric_threshold_alert.ts +++ b/x-pack/test/api_integration/apis/metrics_ui/metric_threshold_alert.ts @@ -6,6 +6,7 @@ */ import expect from '@kbn/expect'; +import moment from 'moment'; import { Aggregators, Comparator, @@ -21,7 +22,6 @@ import { FtrProviderContext } from '../../ftr_provider_context'; import { DATES } from './constants'; const { gauge, rate } = DATES['alert-test-data']; - export default function ({ getService }: FtrProviderContext) { const esArchiver = getService('esArchiver'); const esClient = getService('es'); @@ -88,7 +88,7 @@ export default function ({ getService }: FtrProviderContext) { timeSize: 5, timeUnit: 'm', threshold: [10000], - comparator: Comparator.LT_OR_EQ, + comparator: Comparator.GT_OR_EQ, aggType: Aggregators.COUNT, } as CountMetricExpressionParams, ], @@ -98,29 +98,36 @@ export default function ({ getService }: FtrProviderContext) { metricAlias: 'filebeat-*', }; const timeFrame = { end: DATES.ten_thousand_plus.max }; - const results = await evaluateRule(esClient, params, config, [], 10000, timeFrame); + const results = await evaluateRule( + esClient, + params, + config, + 10000, + true, + void 0, + timeFrame + ); expect(results).to.eql([ { '*': { timeSize: 5, timeUnit: 'm', threshold: [10000], - comparator: '<=', + comparator: '>=', aggType: 'count', metric: 'Document count', currentValue: 20895, - timestamp: '2021-10-19T00:48:59.997Z', - shouldFire: [false], - shouldWarn: [false], - isNoData: [false], - isError: false, + timestamp: '2021-10-19T00:53:59.997Z', + shouldFire: true, + shouldWarn: false, + isNoData: false, }, }, ]); }); }); describe('with group by', () => { - it('should alert on document count', async () => { + it('should trigger on document count', async () => { const params = { ...baseParams, groupBy: ['event.category'], @@ -128,8 +135,8 @@ export default function ({ getService }: FtrProviderContext) { { timeSize: 5, timeUnit: 'm', - threshold: [10000], - comparator: Comparator.LT_OR_EQ, + threshold: [20000], + comparator: Comparator.GT_OR_EQ, aggType: Aggregators.COUNT, } as CountMetricExpressionParams, ], @@ -139,28 +146,287 @@ export default function ({ getService }: FtrProviderContext) { metricAlias: 'filebeat-*', }; const timeFrame = { end: DATES.ten_thousand_plus.max }; - const results = await evaluateRule(esClient, params, config, [], 10000, timeFrame); + const results = await evaluateRule( + esClient, + params, + config, + 10000, + true, + void 0, + timeFrame + ); expect(results).to.eql([ { web: { timeSize: 5, timeUnit: 'm', - threshold: [10000], - comparator: '<=', + threshold: [20000], + comparator: '>=', aggType: 'count', metric: 'Document count', currentValue: 20895, - timestamp: '2021-10-19T00:48:59.997Z', - shouldFire: [false], - shouldWarn: [false], - isNoData: [false], - isError: false, + timestamp: '2021-10-19T00:53:59.997Z', + shouldFire: true, + shouldWarn: false, + isNoData: false, + }, + }, + ]); + }); + it('shouldFire on document count and isNoData for missing group ', async () => { + const params = { + ...baseParams, + groupBy: ['event.category'], + criteria: [ + { + timeSize: 5, + timeUnit: 'm', + threshold: [20000], + comparator: Comparator.GT_OR_EQ, + aggType: Aggregators.COUNT, + } as CountMetricExpressionParams, + ], + }; + const config = { + ...configuration, + metricAlias: 'filebeat-*', + }; + const timeFrame = { end: DATES.ten_thousand_plus.max }; + const results = await evaluateRule( + esClient, + params, + config, + 10000, + true, + void 0, + timeFrame, + ['middleware'] + ); + expect(results).to.eql([ + { + web: { + timeSize: 5, + timeUnit: 'm', + threshold: [20000], + comparator: '>=', + aggType: 'count', + metric: 'Document count', + currentValue: 20895, + timestamp: '2021-10-19T00:53:59.997Z', + shouldFire: true, + shouldWarn: false, + isNoData: false, + }, + middleware: { + timeSize: 5, + timeUnit: 'm', + threshold: [20000], + comparator: '>=', + aggType: 'count', + metric: 'Document count', + currentValue: null, + timestamp: '2021-10-19T00:53:59.997Z', + shouldFire: false, + shouldWarn: false, + isNoData: true, }, }, ]); }); }); }); + describe('without ANY data', () => { + describe('without groupBy', () => { + it('should trigger NO_DATA for document count queries', async () => { + const params = { + ...baseParams, + criteria: [ + { + timeSize: 5, + timeUnit: 'm', + threshold: [0], + comparator: Comparator.LT_OR_EQ, + aggType: Aggregators.COUNT, + } as CountMetricExpressionParams, + ], + }; + const timeFrame = { end: gauge.max }; + const results = await evaluateRule( + esClient, + params, + configuration, + 10000, + true, + void 0, + timeFrame + ); + expect(results).to.eql([ + { + '*': { + timeSize: 5, + timeUnit: 'm', + threshold: [0], + comparator: '<=', + aggType: 'count', + metric: 'Document count', + currentValue: null, + timestamp: '2021-01-01T01:00:00.000Z', + shouldFire: false, + shouldWarn: false, + isNoData: true, + }, + }, + ]); + }); + it('should trigger NO_DATA for basic metric', async () => { + const params = { ...baseParams }; + const timeFrame = { end: gauge.max }; + const results = await evaluateRule( + esClient, + params, + configuration, + 10000, + true, + void 0, + timeFrame + ); + expect(results).to.eql([ + { + '*': { + timeSize: 5, + timeUnit: 'm', + threshold: [1], + comparator: '>=', + aggType: 'sum', + metric: 'value', + currentValue: null, + timestamp: '2021-01-01T01:00:00.000Z', + shouldFire: false, + shouldWarn: false, + isNoData: true, + }, + }, + ]); + }); + }); + describe('with groupBy', () => { + describe('without pre-existing groups', () => { + it('should trigger NO_DATA for document count queries', async () => { + const params = { + ...baseParams, + groupBy: ['event.category'], + criteria: [ + { + timeSize: 5, + timeUnit: 'm', + threshold: [0], + comparator: Comparator.LT_OR_EQ, + aggType: Aggregators.COUNT, + } as CountMetricExpressionParams, + ], + }; + const timeFrame = { end: gauge.max }; + const results = await evaluateRule( + esClient, + params, + configuration, + 10000, + true, + void 0, + timeFrame + ); + expect(results).to.eql([ + { + '*': { + timeSize: 5, + timeUnit: 'm', + threshold: [0], + comparator: '<=', + aggType: 'count', + metric: 'Document count', + currentValue: null, + timestamp: '2021-01-01T01:00:00.000Z', + shouldFire: false, + shouldWarn: false, + isNoData: true, + }, + }, + ]); + }); + }); + describe('with pre-existing groups', () => { + it('should trigger NO_DATA for document count queries', async () => { + const params = { + ...baseParams, + groupBy: ['event.category'], + criteria: [ + { + timeSize: 5, + timeUnit: 'm', + threshold: [0], + comparator: Comparator.LT_OR_EQ, + aggType: Aggregators.COUNT, + } as CountMetricExpressionParams, + ], + }; + const timeFrame = { end: gauge.max }; + const results = await evaluateRule( + esClient, + params, + configuration, + 10000, + true, + void 0, + timeFrame, + ['web', 'prod'] + ); + expect(results).to.eql([ + { + '*': { + timeSize: 5, + timeUnit: 'm', + threshold: [0], + comparator: '<=', + aggType: 'count', + metric: 'Document count', + currentValue: null, + timestamp: '2021-01-01T01:00:00.000Z', + shouldFire: false, + shouldWarn: false, + isNoData: true, + }, + web: { + timeSize: 5, + timeUnit: 'm', + threshold: [0], + comparator: '<=', + aggType: 'count', + metric: 'Document count', + currentValue: null, + timestamp: '2021-01-01T01:00:00.000Z', + shouldFire: false, + shouldWarn: false, + isNoData: true, + }, + prod: { + timeSize: 5, + timeUnit: 'm', + threshold: [0], + comparator: '<=', + aggType: 'count', + metric: 'Document count', + currentValue: null, + timestamp: '2021-01-01T01:00:00.000Z', + shouldFire: false, + shouldWarn: false, + isNoData: true, + }, + }, + ]); + }); + }); + }); + }); describe('with gauge data', () => { before(() => esArchiver.load('x-pack/test/functional/es_archives/infra/alerts_test_data')); after(() => esArchiver.unload('x-pack/test/functional/es_archives/infra/alerts_test_data')); @@ -180,7 +446,15 @@ export default function ({ getService }: FtrProviderContext) { ], }; const timeFrame = { end: gauge.max }; - const results = await evaluateRule(esClient, params, configuration, [], 10000, timeFrame); + const results = await evaluateRule( + esClient, + params, + configuration, + 10000, + true, + void 0, + timeFrame + ); expect(results).to.eql([ { '*': { @@ -191,11 +465,51 @@ export default function ({ getService }: FtrProviderContext) { aggType: 'count', metric: 'Document count', currentValue: 4, - timestamp: '2021-01-01T00:55:00.000Z', - shouldFire: [true], - shouldWarn: [false], - isNoData: [false], - isError: false, + timestamp: '2021-01-01T01:00:00.000Z', + shouldFire: true, + shouldWarn: false, + isNoData: false, + }, + }, + ]); + }); + it('should alert on ZERO document count outside the time frame', async () => { + const params = { + ...baseParams, + criteria: [ + { + timeSize: 5, + timeUnit: 'm', + threshold: [0], + comparator: Comparator.LT_OR_EQ, + aggType: Aggregators.COUNT, + } as CountMetricExpressionParams, + ], + }; + const timeFrame = { end: gauge.max + 600_000 }; + const results = await evaluateRule( + esClient, + params, + configuration, + 10000, + true, + void 0, + timeFrame + ); + expect(results).to.eql([ + { + '*': { + timeSize: 5, + timeUnit: 'm', + threshold: [0], + comparator: '<=', + aggType: 'count', + metric: 'Document count', + currentValue: 0, + timestamp: '2021-01-01T01:10:00.000Z', + shouldFire: true, + shouldWarn: false, + isNoData: false, }, }, ]); @@ -203,7 +517,15 @@ export default function ({ getService }: FtrProviderContext) { it('should alert on the last value when the end date is the same as the last event', async () => { const params = { ...baseParams }; const timeFrame = { end: gauge.max }; - const results = await evaluateRule(esClient, params, configuration, [], 10000, timeFrame); + const results = await evaluateRule( + esClient, + params, + configuration, + 10000, + true, + void 0, + timeFrame + ); expect(results).to.eql([ { '*': { @@ -214,11 +536,10 @@ export default function ({ getService }: FtrProviderContext) { aggType: 'sum', metric: 'value', currentValue: 1, - timestamp: '2021-01-01T00:55:00.000Z', - shouldFire: [true], - shouldWarn: [false], - isNoData: [false], - isError: false, + timestamp: '2021-01-01T01:00:00.000Z', + shouldFire: true, + shouldWarn: false, + isNoData: false, }, }, ]); @@ -240,7 +561,15 @@ export default function ({ getService }: FtrProviderContext) { ], }; const timeFrame = { end: gauge.max }; - const results = await evaluateRule(esClient, params, configuration, [], 10000, timeFrame); + const results = await evaluateRule( + esClient, + params, + configuration, + 10000, + true, + void 0, + timeFrame + ); expect(results).to.eql([ { dev: { @@ -251,11 +580,10 @@ export default function ({ getService }: FtrProviderContext) { aggType: 'count', metric: 'Document count', currentValue: 2, - timestamp: '2021-01-01T00:55:00.000Z', - shouldFire: [true], - shouldWarn: [false], - isNoData: [false], - isError: false, + timestamp: '2021-01-01T01:00:00.000Z', + shouldFire: true, + shouldWarn: false, + isNoData: false, }, prod: { timeSize: 5, @@ -265,11 +593,10 @@ export default function ({ getService }: FtrProviderContext) { aggType: 'count', metric: 'Document count', currentValue: 2, - timestamp: '2021-01-01T00:55:00.000Z', - shouldFire: [true], - shouldWarn: [false], - isNoData: [false], - isError: false, + timestamp: '2021-01-01T01:00:00.000Z', + shouldFire: true, + shouldWarn: false, + isNoData: false, }, }, ]); @@ -280,58 +607,116 @@ export default function ({ getService }: FtrProviderContext) { groupBy: ['env'], }; const timeFrame = { end: gauge.max }; - const results = await evaluateRule(esClient, params, configuration, [], 10000, timeFrame); + const results = await evaluateRule( + esClient, + params, + configuration, + 10000, + true, + void 0, + timeFrame + ); expect(results).to.eql([ { - dev: { + prod: { timeSize: 5, timeUnit: 'm', threshold: [1], comparator: '>=', aggType: 'sum', metric: 'value', - currentValue: 0, - timestamp: '2021-01-01T00:55:00.000Z', - shouldFire: [false], - shouldWarn: [false], - isNoData: [false], - isError: false, + currentValue: 1, + timestamp: '2021-01-01T01:00:00.000Z', + shouldFire: true, + shouldWarn: false, + isNoData: false, }, - prod: { + }, + ]); + }); + + it('should report no data when one of the groups has a data gap', async () => { + const params = { + ...baseParams, + groupBy: ['env'], + }; + const timeFrame = { end: gauge.midpoint }; + const results = await evaluateRule( + esClient, + params, + configuration, + 10000, + true, + void 0, + timeFrame, + ['dev'] + ); + expect(results).to.eql([ + { + dev: { timeSize: 5, timeUnit: 'm', threshold: [1], comparator: '>=', aggType: 'sum', metric: 'value', - currentValue: 1, - timestamp: '2021-01-01T00:55:00.000Z', - shouldFire: [true], - shouldWarn: [false], - isNoData: [false], - isError: false, + currentValue: null, + timestamp: '2021-01-01T00:30:00.000Z', + shouldFire: false, + shouldWarn: false, + isNoData: true, }, }, ]); }); - it('should report no data when one of the groups has a data gap', async () => { + it('should NOT resport any alerts when missing group recovers', async () => { const params = { ...baseParams, + criteria: [ + { + timeSize: 5, + timeUnit: 'm', + threshold: [100], + comparator: Comparator.GT, + aggType: Aggregators.SUM, + metric: 'value', + } as NonCountMetricExpressionParams, + ], groupBy: ['env'], }; - const timeFrame = { end: gauge.midpoint }; + const timeFrame = { end: moment(gauge.midpoint).add(10, 'm').valueOf() }; + const results = await evaluateRule( + esClient, + params, + configuration, + 10000, + true, + moment(gauge.midpoint).subtract(1, 'm').valueOf(), + timeFrame, + ['dev'] + ); + expect(results).to.eql([{}]); + }); + + it('should report no data when both groups stop reporting', async () => { + const params = { + ...baseParams, + groupBy: ['env'], + }; + const timeFrame = { end: moment(gauge.max).add(6, 'm').valueOf() }; const results = await evaluateRule( esClient, params, configuration, - ['dev', 'prod'], 10000, + true, + gauge.max, timeFrame ); expect(results).to.eql([ { - dev: { + prod: { timeSize: 5, timeUnit: 'm', threshold: [1], @@ -339,32 +724,29 @@ export default function ({ getService }: FtrProviderContext) { aggType: 'sum', metric: 'value', currentValue: null, - timestamp: '2021-01-01T00:25:00.000Z', - shouldFire: [false], - shouldWarn: [false], - isNoData: [true], - isError: false, + timestamp: '2021-01-01T01:06:00.000Z', + shouldFire: false, + shouldWarn: false, + isNoData: true, }, - prod: { + dev: { timeSize: 5, timeUnit: 'm', threshold: [1], comparator: '>=', aggType: 'sum', metric: 'value', - currentValue: 0, - timestamp: '2021-01-01T00:25:00.000Z', - shouldFire: [false], - shouldWarn: [false], - isNoData: [false], - isError: false, + currentValue: null, + timestamp: '2021-01-01T01:06:00.000Z', + shouldFire: false, + shouldWarn: false, + isNoData: true, }, }, ]); }); }); }); - describe('with rate data', () => { before(() => esArchiver.load('x-pack/test/functional/es_archives/infra/alerts_test_data')); after(() => esArchiver.unload('x-pack/test/functional/es_archives/infra/alerts_test_data')); @@ -384,7 +766,15 @@ export default function ({ getService }: FtrProviderContext) { ], }; const timeFrame = { end: rate.max }; - const results = await evaluateRule(esClient, params, configuration, [], 10000, timeFrame); + const results = await evaluateRule( + esClient, + params, + configuration, + 10000, + true, + void 0, + timeFrame + ); expect(results).to.eql([ { '*': { @@ -395,11 +785,10 @@ export default function ({ getService }: FtrProviderContext) { aggType: 'rate', metric: 'value', currentValue: 0.6666666666666666, - timestamp: '2021-01-02T00:04:00.000Z', - shouldFire: [false, false, false, false, true], - shouldWarn: [false], - isNoData: [true, false, false, false, false], - isError: false, + timestamp: '2021-01-02T00:05:00.000Z', + shouldFire: true, + shouldWarn: false, + isNoData: false, }, }, ]); @@ -424,7 +813,15 @@ export default function ({ getService }: FtrProviderContext) { ], }; const timeFrame = { end: rate.max }; - const results = await evaluateRule(esClient, params, configuration, [], 10000, timeFrame); + const results = await evaluateRule( + esClient, + params, + configuration, + 10000, + true, + void 0, + timeFrame + ); expect(results).to.eql([ { dev: { @@ -437,11 +834,10 @@ export default function ({ getService }: FtrProviderContext) { aggType: 'rate', metric: 'value', currentValue: 0.6666666666666666, - timestamp: '2021-01-02T00:04:00.000Z', - shouldFire: [false, false, false, false, false], - shouldWarn: [false, false, false, false, true], - isNoData: [true, false, false, false, false], - isError: false, + timestamp: '2021-01-02T00:05:00.000Z', + shouldFire: false, + shouldWarn: true, + isNoData: false, }, }, ]); diff --git a/x-pack/test/api_integration/apis/metrics_ui/metrics_alerting.ts b/x-pack/test/api_integration/apis/metrics_ui/metrics_alerting.ts index 7df64d9057a4db..2e7a900e0fe877 100644 --- a/x-pack/test/api_integration/apis/metrics_ui/metrics_alerting.ts +++ b/x-pack/test/api_integration/apis/metrics_ui/metrics_alerting.ts @@ -7,7 +7,7 @@ import expect from '@kbn/expect'; import moment from 'moment'; -import { MetricExpressionParams } from '@kbn/infra-plugin/common/alerting/metrics'; +import { Comparator, MetricExpressionParams } from '@kbn/infra-plugin/common/alerting/metrics'; import { getElasticsearchMetricQuery } from '@kbn/infra-plugin/server/lib/alerting/metric_threshold/lib/metric_query'; import { FtrProviderContext } from '../../ftr_provider_context'; @@ -18,6 +18,8 @@ export default function ({ getService }: FtrProviderContext) { ({ aggType, timeUnit: 'm', + threshold: [0], + comparator: Comparator.GT_OR_EQ, timeSize: 5, ...(aggType !== 'count' ? { metric: 'test.metric' } : {}), } as MetricExpressionParams); @@ -37,7 +39,12 @@ export default function ({ getService }: FtrProviderContext) { start: moment().subtract(25, 'minutes').valueOf(), end: moment().valueOf(), }; - const searchBody = getElasticsearchMetricQuery(getSearchParams(aggType), timeframe, 100); + const searchBody = getElasticsearchMetricQuery( + getSearchParams(aggType), + timeframe, + 100, + true + ); const result = await client.search({ index, body: searchBody, @@ -58,7 +65,8 @@ export default function ({ getService }: FtrProviderContext) { getSearchParams('avg'), timeframe, 100, - undefined, + true, + void 0, '{"bool":{"should":[{"match_phrase":{"agent.hostname":"foo"}}],"minimum_should_match":1}}' ); const result = await client.search({ @@ -81,6 +89,8 @@ export default function ({ getService }: FtrProviderContext) { getSearchParams(aggType), timeframe, 100, + true, + void 0, 'agent.id' ); const result = await client.search({ @@ -101,6 +111,8 @@ export default function ({ getService }: FtrProviderContext) { getSearchParams('avg'), timeframe, 100, + true, + void 0, 'agent.id', '{"bool":{"should":[{"match_phrase":{"agent.hostname":"foo"}}],"minimum_should_match":1}}' );