Skip to content

Commit

Permalink
[Obs AI Assistant] register alert details context in observability pl…
Browse files Browse the repository at this point in the history
…ugin (#181501)

## Summary
Closes #181286

Create a AlertDetailsContextService in `observability` plugin. The
service can be used by solutions to register data handler fetching
information relevant to an alert context which is then used as an input
to the ai assistant when asked to investigate an alert. While only one
handler is currently registered from apm plugin, the benefits are 1.
making this information available to the ai assistant connector since it
can't directly call apm api and 2. extending the context with additional
data in the future, for example logs.

#### Follow up
- Move apm route and associated tests to observability plugin
  • Loading branch information
klacabane authored May 1, 2024
1 parent 593d391 commit d883196
Show file tree
Hide file tree
Showing 25 changed files with 711 additions and 340 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@ import type { MinimalAPMRouteHandlerResources } from '../../routes/apm_routes/re

export type ApmAlertsClient = Awaited<ReturnType<typeof getApmAlertsClient>>;

export async function getApmAlertsClient({ plugins, request }: MinimalAPMRouteHandlerResources) {
export async function getApmAlertsClient({
plugins,
request,
}: Pick<MinimalAPMRouteHandlerResources, 'plugins' | 'request'>) {
const ruleRegistryPluginStart = await plugins.ruleRegistry.start();
const alertsClient = await ruleRegistryPluginStart.getRacClientWithRequest(request);
const apmAlertsIndices = await alertsClient.getAuthorizedAlertsIndices(['apm']);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,11 @@ import { MinimalAPMRouteHandlerResources } from '../../routes/apm_routes/registe
export async function getApmEventClient({
context,
params,
config,
getApmIndices,
request,
}: Pick<
MinimalAPMRouteHandlerResources,
'context' | 'params' | 'config' | 'getApmIndices' | 'request'
'context' | 'params' | 'getApmIndices' | 'request'
>): Promise<APMEventClient> {
return withApmSpan('get_apm_event_client', async () => {
const coreContext = await context.core;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,11 @@ export interface MlClient {
modules: MlModules;
}

export async function getMlClient({ plugins, context, request }: MinimalAPMRouteHandlerResources) {
export async function getMlClient({
plugins,
context,
request,
}: Pick<MinimalAPMRouteHandlerResources, 'plugins' | 'context' | 'request'>) {
const [coreContext, licensingContext] = await Promise.all([context.core, context.licensing]);

const mlplugin = plugins.ml;
Expand Down
7 changes: 6 additions & 1 deletion x-pack/plugins/observability_solution/apm/server/plugin.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ import { createApmSourceMapIndexTemplate } from './routes/source_maps/create_apm
import { addApiKeysToEveryPackagePolicyIfMissing } from './routes/fleet/api_keys/add_api_keys_to_policies_if_missing';
import { apmTutorialCustomIntegration } from '../common/tutorial/tutorials';
import { registerAssistantFunctions } from './assistant_functions';
import { getAlertDetailsContextHandler } from './routes/assistant_functions/get_observability_alert_details_context/get_alert_details_context_handler';

export class APMPlugin
implements Plugin<APMPluginSetup, void, APMPluginSetupDependencies, APMPluginStartDependencies>
Expand All @@ -52,7 +53,7 @@ export class APMPlugin
}

public setup(core: CoreSetup<APMPluginStartDependencies>, plugins: APMPluginSetupDependencies) {
this.logger = this.initContext.logger.get();
const logger = (this.logger = this.initContext.logger.get());
const config$ = this.initContext.config.create<APMConfig>();

core.savedObjects.registerType(apmTelemetry);
Expand Down Expand Up @@ -221,6 +222,10 @@ export class APMPlugin
})
);

plugins.observability.alertDetailsContextualInsightsService.registerHandler(
getAlertDetailsContextHandler(resourcePlugins, logger)
);

return { config$ };
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import moment from 'moment';
import { LatencyAggregationType } from '../../../../common/latency_aggregation_types';
import { APMEventClient } from '../../../lib/helpers/create_es_client/create_apm_event_client';
import { ApmTimeseriesType, getApmTimeseries, TimeseriesChangePoint } from '../get_apm_timeseries';

export interface ChangePointGrouping {
title: string;
grouping: string;
changes: TimeseriesChangePoint[];
}

export async function getServiceChangePoints({
apmEventClient,
alertStartedAt,
serviceName,
serviceEnvironment,
transactionType,
transactionName,
}: {
apmEventClient: APMEventClient;
alertStartedAt: string;
serviceName: string | undefined;
serviceEnvironment: string | undefined;
transactionType: string | undefined;
transactionName: string | undefined;
}): Promise<ChangePointGrouping[]> {
if (!serviceName) {
return [];
}

const res = await getApmTimeseries({
apmEventClient,
arguments: {
start: moment(alertStartedAt).subtract(12, 'hours').toISOString(),
end: alertStartedAt,
stats: [
{
title: 'Latency',
'service.name': serviceName,
'service.environment': serviceEnvironment,
timeseries: {
name: ApmTimeseriesType.transactionLatency,
function: LatencyAggregationType.p95,
'transaction.type': transactionType,
'transaction.name': transactionName,
},
},
{
title: 'Throughput',
'service.name': serviceName,
'service.environment': serviceEnvironment,
timeseries: {
name: ApmTimeseriesType.transactionThroughput,
'transaction.type': transactionType,
'transaction.name': transactionName,
},
},
{
title: 'Failure rate',
'service.name': serviceName,
'service.environment': serviceEnvironment,
timeseries: {
name: ApmTimeseriesType.transactionFailureRate,
'transaction.type': transactionType,
'transaction.name': transactionName,
},
},
{
title: 'Error events',
'service.name': serviceName,
'service.environment': serviceEnvironment,
timeseries: {
name: ApmTimeseriesType.errorEventRate,
},
},
],
},
});

return res
.filter((timeseries) => timeseries.changes.length > 0)
.map((timeseries) => ({
title: timeseries.stat.title,
grouping: timeseries.id,
changes: timeseries.changes,
}));
}

export async function getExitSpanChangePoints({
apmEventClient,
alertStartedAt,
serviceName,
serviceEnvironment,
}: {
apmEventClient: APMEventClient;
alertStartedAt: string;
serviceName: string | undefined;
serviceEnvironment: string | undefined;
}): Promise<ChangePointGrouping[]> {
if (!serviceName) {
return [];
}

const res = await getApmTimeseries({
apmEventClient,
arguments: {
start: moment(alertStartedAt).subtract(30, 'minute').toISOString(),
end: alertStartedAt,
stats: [
{
title: 'Exit span latency',
'service.name': serviceName,
'service.environment': serviceEnvironment,
timeseries: {
name: ApmTimeseriesType.exitSpanLatency,
},
},
{
title: 'Exit span failure rate',
'service.name': serviceName,
'service.environment': serviceEnvironment,
timeseries: {
name: ApmTimeseriesType.exitSpanFailureRate,
},
},
],
},
});

return res
.filter((timeseries) => timeseries.changes.length > 0)
.map((timeseries) => {
return {
title: timeseries.stat.title,
grouping: timeseries.id,
changes: timeseries.changes,
};
});
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ export async function getLogCategories({
arguments: args,
}: {
esClient: ElasticsearchClient;
coreContext: CoreRequestHandlerContext;
coreContext: Pick<CoreRequestHandlerContext, 'uiSettings'>;
arguments: {
start: string;
end: string;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { Logger } from '@kbn/core/server';
import {
AlertDetailsContextualInsightsHandlerQuery,
AlertDetailsContextualInsightsRequestContext,
} from '@kbn/observability-plugin/server/services';
import { getApmAlertsClient } from '../../../lib/helpers/get_apm_alerts_client';
import { getApmEventClient } from '../../../lib/helpers/get_apm_event_client';
import { getMlClient } from '../../../lib/helpers/get_ml_client';
import { getRandomSampler } from '../../../lib/helpers/get_random_sampler';
import { getObservabilityAlertDetailsContext } from '.';
import { APMRouteHandlerResources } from '../../apm_routes/register_apm_server_routes';

export const getAlertDetailsContextHandler = (
resourcePlugins: APMRouteHandlerResources['plugins'],
logger: Logger
) => {
return async (
requestContext: AlertDetailsContextualInsightsRequestContext,
query: AlertDetailsContextualInsightsHandlerQuery
) => {
const resources = {
getApmIndices: async () => {
const coreContext = await requestContext.core;
return resourcePlugins.apmDataAccess.setup.getApmIndices(coreContext.savedObjects.client);
},
request: requestContext.request,
params: { query: { _inspect: false } },
plugins: resourcePlugins,
context: {
core: requestContext.core,
licensing: requestContext.licensing,
alerting: resourcePlugins.alerting!.start().then((startContract) => {
return {
getRulesClient() {
return startContract.getRulesClientWithRequest(requestContext.request);
},
};
}),
rac: resourcePlugins.ruleRegistry.start().then((startContract) => {
return {
getAlertsClient() {
return startContract.getRacClientWithRequest(requestContext.request);
},
};
}),
},
};

const [apmEventClient, annotationsClient, apmAlertsClient, coreContext, mlClient] =
await Promise.all([
getApmEventClient(resources),
resourcePlugins.observability.setup.getScopedAnnotationsClient(
resources.context,
requestContext.request
),
getApmAlertsClient(resources),
requestContext.core,
getMlClient(resources),
getRandomSampler({
security: resourcePlugins.security,
probability: 1,
request: requestContext.request,
}),
]);
const esClient = coreContext.elasticsearch.client.asCurrentUser;

return getObservabilityAlertDetailsContext({
coreContext,
apmEventClient,
annotationsClient,
apmAlertsClient,
mlClient,
esClient,
query,
logger,
});
};
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { isEmpty } from 'lodash';
import { AlertDetailsContextualInsight } from '@kbn/observability-plugin/server/services';
import { APMDownstreamDependency } from '../get_apm_downstream_dependencies';
import { ServiceSummary } from '../get_apm_service_summary';
import { LogCategories } from '../get_log_categories';
import { ApmAnomalies } from '../get_apm_service_summary/get_anomalies';
import { ChangePointGrouping } from '../get_changepoints';

export function getApmAlertDetailsContextPrompt({
serviceName,
serviceEnvironment,
serviceSummary,
downstreamDependencies,
logCategories,
serviceChangePoints,
exitSpanChangePoints,
anomalies,
}: {
serviceName?: string;
serviceEnvironment?: string;
serviceSummary?: ServiceSummary;
downstreamDependencies?: APMDownstreamDependency[];
logCategories: LogCategories;
serviceChangePoints?: ChangePointGrouping[];
exitSpanChangePoints?: ChangePointGrouping[];
anomalies?: ApmAnomalies;
}): AlertDetailsContextualInsight[] {
const prompt: AlertDetailsContextualInsight[] = [];
if (!isEmpty(serviceSummary)) {
prompt.push({
key: 'serviceSummary',
description: 'Metadata for the service where the alert occurred',
data: serviceSummary,
});
}

if (!isEmpty(downstreamDependencies)) {
prompt.push({
key: 'downstreamDependencies',
description: `Downstream dependencies from the service "${serviceName}". Problems in these services can negatively affect the performance of "${serviceName}"`,
data: downstreamDependencies,
});
}

if (!isEmpty(serviceChangePoints)) {
prompt.push({
key: 'serviceChangePoints',
description: `Significant change points for "${serviceName}". Use this to spot dips and spikes in throughput, latency and failure rate`,
data: serviceChangePoints,
});
}

if (!isEmpty(exitSpanChangePoints)) {
prompt.push({
key: 'exitSpanChangePoints',
description: `Significant change points for the dependencies of "${serviceName}". Use this to spot dips or spikes in throughput, latency and failure rate for downstream dependencies`,
data: exitSpanChangePoints,
});
}

if (!isEmpty(logCategories)) {
prompt.push({
key: 'logCategories',
description: `Log events occurring around the time of the alert`,
data: logCategories,
});
}

if (!isEmpty(anomalies)) {
prompt.push({
key: 'anomalies',
description: `Anomalies for services running in the environment "${serviceEnvironment}"`,
data: anomalies,
});
}

return prompt;
}
Loading

0 comments on commit d883196

Please sign in to comment.