Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Sampler & Diversified Sampler aggs to AggConfigs #120135

Merged
merged 12 commits into from
Dec 8, 2021
4 changes: 4 additions & 0 deletions src/plugins/data/common/search/aggs/agg_types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ export const getAggTypes = () => ({
{ name: BUCKET_TYPES.SIGNIFICANT_TERMS, fn: buckets.getSignificantTermsBucketAgg },
{ name: BUCKET_TYPES.GEOHASH_GRID, fn: buckets.getGeoHashBucketAgg },
{ name: BUCKET_TYPES.GEOTILE_GRID, fn: buckets.getGeoTitleBucketAgg },
{ name: BUCKET_TYPES.SAMPLER, fn: buckets.getSamplerBucketAgg },
{ name: BUCKET_TYPES.DIVERSIFIED_SAMPLER, fn: buckets.getDiversifiedSamplerBucketAgg },
],
});

Expand All @@ -79,6 +81,8 @@ export const getAggTypesFunctions = () => [
buckets.aggDateHistogram,
buckets.aggTerms,
buckets.aggMultiTerms,
buckets.aggSampler,
buckets.aggDiversifiedSampler,
metrics.aggAvg,
metrics.aggBucketAvg,
metrics.aggBucketMax,
Expand Down
4 changes: 4 additions & 0 deletions src/plugins/data/common/search/aggs/aggs_service.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ describe('Aggs service', () => {
"significant_terms",
"geohash_grid",
"geotile_grid",
"sampler",
"diversified_sampler",
"foo",
]
`);
Expand Down Expand Up @@ -122,6 +124,8 @@ describe('Aggs service', () => {
"significant_terms",
"geohash_grid",
"geotile_grid",
"sampler",
"diversified_sampler",
]
`);
expect(bStart.types.getAll().metrics.map((t) => t(aggTypesDependencies).name))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,6 @@ export enum BUCKET_TYPES {
GEOHASH_GRID = 'geohash_grid',
GEOTILE_GRID = 'geotile_grid',
DATE_HISTOGRAM = 'date_histogram',
SAMPLER = 'sampler',
DIVERSIFIED_SAMPLER = 'diversified_sampler',
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

import { i18n } from '@kbn/i18n';
import { BucketAggType } from './bucket_agg_type';
import { BaseAggParams } from '../types';
import { aggDiversifiedSamplerFnName } from './diversified_sampler_fn';

export const DIVERSIFIED_SAMPLER_AGG_NAME = 'diversified_sampler';

const title = i18n.translate('data.search.aggs.buckets.diversifiedSamplerTitle', {
defaultMessage: 'Diversified sampler',
description: 'Diversified sampler aggregation title',
});

export interface AggParamsDiversifiedSampler extends BaseAggParams {
/**
* Is used to provide values used for de-duplication
*/
field: string;

/**
* Limits how many top-scoring documents are collected in the sample processed on each shard.
*/
shard_size?: number;

/**
* Limits how many documents are permitted per choice of de-duplicating value
*/
max_docs_per_value?: number;
}

/**
* Like the sampler aggregation this is a filtering aggregation used to limit any sub aggregations' processing to a sample of the top-scoring documents.
* The diversified_sampler aggregation adds the ability to limit the number of matches that share a common value.
*/
export const getDiversifiedSamplerBucketAgg = () =>
new BucketAggType({
name: DIVERSIFIED_SAMPLER_AGG_NAME,
title,
customLabels: false,
expressionName: aggDiversifiedSamplerFnName,
params: [
{
name: 'shard_size',
type: 'number',
},
{
name: 'max_docs_per_value',
type: 'number',
},
{
name: 'field',
type: 'field',
},
],
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

import { functionWrapper } from '../test_helpers';
import { aggDiversifiedSampler } from './diversified_sampler_fn';

describe('aggDiversifiedSampler', () => {
const fn = functionWrapper(aggDiversifiedSampler());

test('fills in defaults when only required args are provided', () => {
const actual = fn({ id: 'sampler', schema: 'bucket', field: 'author' });
expect(actual).toMatchInlineSnapshot(`
Object {
"type": "agg_type",
"value": Object {
"enabled": true,
"id": "sampler",
"params": Object {
"field": "author",
"max_docs_per_value": undefined,
"shard_size": undefined,
},
"schema": "bucket",
"type": "diversified_sampler",
},
}
`);
});

test('includes optional params when they are provided', () => {
const actual = fn({
id: 'sampler',
schema: 'bucket',
shard_size: 300,
field: 'author',
max_docs_per_value: 3,
});

expect(actual.value).toMatchInlineSnapshot(`
Object {
"enabled": true,
"id": "sampler",
"params": Object {
"field": "author",
"max_docs_per_value": 3,
"shard_size": 300,
},
"schema": "bucket",
"type": "diversified_sampler",
}
`);
});
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

import { i18n } from '@kbn/i18n';
import { ExpressionFunctionDefinition } from 'src/plugins/expressions/common';
import { AggExpressionFunctionArgs, AggExpressionType, BUCKET_TYPES } from '../';
import { DIVERSIFIED_SAMPLER_AGG_NAME } from './diversified_sampler';

export const aggDiversifiedSamplerFnName = 'aggDiversifiedSampler';

type Input = any;
type Arguments = AggExpressionFunctionArgs<typeof BUCKET_TYPES.DIVERSIFIED_SAMPLER>;

type Output = AggExpressionType;
type FunctionDefinition = ExpressionFunctionDefinition<
typeof aggDiversifiedSamplerFnName,
Input,
Arguments,
Output
>;

export const aggDiversifiedSampler = (): FunctionDefinition => ({
name: aggDiversifiedSamplerFnName,
help: i18n.translate('data.search.aggs.function.buckets.diversifiedSampler.help', {
defaultMessage: 'Generates a serialized agg config for a Diversified sampler agg',
}),
type: 'agg_type',
args: {
id: {
types: ['string'],
help: i18n.translate('data.search.aggs.buckets.diversifiedSampler.id.help', {
defaultMessage: 'ID for this aggregation',
}),
},
enabled: {
types: ['boolean'],
default: true,
help: i18n.translate('data.search.aggs.buckets.diversifiedSampler.enabled.help', {
defaultMessage: 'Specifies whether this aggregation should be enabled',
}),
},
schema: {
types: ['string'],
help: i18n.translate('data.search.aggs.buckets.diversifiedSampler.schema.help', {
defaultMessage: 'Schema to use for this aggregation',
}),
},
shard_size: {
types: ['number'],
help: i18n.translate('data.search.aggs.buckets.diversifiedSampler.shardSize.help', {
defaultMessage:
'The shard_size parameter limits how many top-scoring documents are collected in the sample processed on each shard.',
}),
},
max_docs_per_value: {
types: ['number'],
help: i18n.translate('data.search.aggs.buckets.diversifiedSampler.maxDocsPerValue.help', {
defaultMessage:
'Limits how many documents are permitted per choice of de-duplicating value.',
}),
},
field: {
types: ['string'],
help: i18n.translate('data.search.aggs.buckets.diversifiedSampler.field.help', {
defaultMessage: 'Used to provide values used for de-duplication.',
}),
},
},
fn: (input, args) => {
const { id, enabled, schema, ...rest } = args;

return {
type: 'agg_type',
value: {
id,
enabled,
schema,
type: DIVERSIFIED_SAMPLER_AGG_NAME,
params: {
...rest,
},
},
};
},
});
4 changes: 4 additions & 0 deletions src/plugins/data/common/search/aggs/buckets/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,7 @@ export * from './terms_fn';
export * from './terms';
export * from './multi_terms_fn';
export * from './multi_terms';
export * from './sampler_fn';
export * from './sampler';
export * from './diversified_sampler_fn';
export * from './diversified_sampler';
43 changes: 43 additions & 0 deletions src/plugins/data/common/search/aggs/buckets/sampler.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

import { i18n } from '@kbn/i18n';
import { BucketAggType } from './bucket_agg_type';
import { BaseAggParams } from '../types';
import { aggSamplerFnName } from './sampler_fn';

export const SAMPLER_AGG_NAME = 'sampler';

const title = i18n.translate('data.search.aggs.buckets.samplerTitle', {
defaultMessage: 'Sampler',
description: 'Sampler aggregation title',
});

export interface AggParamsSampler extends BaseAggParams {
/**
* Limits how many top-scoring documents are collected in the sample processed on each shard.
*/
shard_size?: number;
}

/**
* A filtering aggregation used to limit any sub aggregations' processing to a sample of the top-scoring documents.
*/
export const getSamplerBucketAgg = () =>
new BucketAggType({
name: SAMPLER_AGG_NAME,
title,
customLabels: false,
expressionName: aggSamplerFnName,
params: [
{
name: 'shard_size',
type: 'number',
},
],
});
52 changes: 52 additions & 0 deletions src/plugins/data/common/search/aggs/buckets/sampler_fn.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

import { functionWrapper } from '../test_helpers';
import { aggSampler } from './sampler_fn';

describe('aggSampler', () => {
const fn = functionWrapper(aggSampler());

test('fills in defaults when only required args are provided', () => {
const actual = fn({ id: 'sampler', schema: 'bucket' });
expect(actual).toMatchInlineSnapshot(`
Object {
"type": "agg_type",
"value": Object {
"enabled": true,
"id": "sampler",
"params": Object {
"shard_size": undefined,
},
"schema": "bucket",
"type": "sampler",
},
}
`);
});

test('includes optional params when they are provided', () => {
const actual = fn({
id: 'sampler',
schema: 'bucket',
shard_size: 300,
});

expect(actual.value).toMatchInlineSnapshot(`
Object {
"enabled": true,
"id": "sampler",
"params": Object {
"shard_size": 300,
},
"schema": "bucket",
"type": "sampler",
}
`);
});
});
Loading