Skip to content

Commit

Permalink
[ML] Transforms: Fix handling of fields with keyword mapping available (
Browse files Browse the repository at this point in the history
#98882) (#99210)

- For groupby/agg configs, removes the .keyword postfix for the agg name and field name being displayed. The config itself will still use the field name including .keyword.
- For histogram charts, if available, query data using the .keyword field. This enables support for charts for terms when there's both a text and keyword variant.
- Fixes isKeywordDuplicate check for field names with multiple dots in them.

Co-authored-by: Walter Rafelsberger <walter@elastic.co>
  • Loading branch information
kibanamachine and walterra authored May 4, 2021
1 parent 9ab674b commit 87824da
Show file tree
Hide file tree
Showing 11 changed files with 203 additions and 34 deletions.
59 changes: 59 additions & 0 deletions x-pack/plugins/transform/common/utils/field_utils.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { hasKeywordDuplicate, isKeywordDuplicate, removeKeywordPostfix } from './field_utils';

const allFields = new Set([
'field1',
'field2',
'field2.keyword',
'field3.keyword',
'field3.keyword.keyword',
'field4.keyword.b',
'field4.keyword.b.keyword',
]);

describe('field_utils: hasKeywordDuplicate()', () => {
it('returns true when a corresponding keyword field is available', () => {
expect(hasKeywordDuplicate('field2', allFields)).toBe(true);
expect(hasKeywordDuplicate('field3.keyword', allFields)).toBe(true);
expect(hasKeywordDuplicate('field4.keyword.b', allFields)).toBe(true);
});
it('returns false when a corresponding keyword field is not available', () => {
expect(hasKeywordDuplicate('field1', allFields)).toBe(false);
expect(hasKeywordDuplicate('field2.keyword', allFields)).toBe(false);
expect(hasKeywordDuplicate('field3.keyword.keyword', allFields)).toBe(false);
expect(hasKeywordDuplicate('field4.keyword.b.keyword', allFields)).toBe(false);
});
});

describe('field_utils: isKeywordDuplicate()', () => {
it('returns true when a corresponding field without keyword postfix is available', () => {
expect(isKeywordDuplicate('field2.keyword', allFields)).toBe(true);
expect(isKeywordDuplicate('field3.keyword.keyword', allFields)).toBe(true);
expect(isKeywordDuplicate('field4.keyword.b.keyword', allFields)).toBe(true);
});
it('returns false when a corresponding field without keyword postfix is not available', () => {
expect(isKeywordDuplicate('field1', allFields)).toBe(false);
expect(isKeywordDuplicate('field2', allFields)).toBe(false);
expect(isKeywordDuplicate('field3.keyword', allFields)).toBe(false);
expect(isKeywordDuplicate('field4.keyword.b', allFields)).toBe(false);
});
});

describe('field_utils: removeKeywordPostfix()', () => {
it('removes the keyword postfix', () => {
expect(removeKeywordPostfix('field2.keyword')).toBe('field2');
expect(removeKeywordPostfix('field3.keyword.keyword')).toBe('field3.keyword');
expect(removeKeywordPostfix('field4.keyword.b.keyword')).toBe('field4.keyword.b');
});
it("returns the field name as is when there's no keyword postfix", () => {
expect(removeKeywordPostfix('field1')).toBe('field1');
expect(removeKeywordPostfix('field2')).toBe('field2');
expect(removeKeywordPostfix('field4.keyword.b')).toBe('field4.keyword.b');
});
});
20 changes: 20 additions & 0 deletions x-pack/plugins/transform/common/utils/field_utils.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

export const KEYWORD_POSTFIX = '.keyword';

// checks if fieldName has a `fieldName.keyword` equivalent in the set of all field names.
export const hasKeywordDuplicate = (fieldName: string, fieldNamesSet: Set<string>): boolean =>
fieldNamesSet.has(`${fieldName}${KEYWORD_POSTFIX}`);

// checks if a fieldName ends with `.keyword` and has a field name equivalent without the postfix in the set of all field names.
export const isKeywordDuplicate = (fieldName: string, fieldNamesSet: Set<string>): boolean =>
fieldName.endsWith(KEYWORD_POSTFIX) && fieldNamesSet.has(removeKeywordPostfix(fieldName));

// removes the `.keyword` postfix form a field name if applicable
export const removeKeywordPostfix = (fieldName: string): string =>
fieldName.replace(new RegExp(`${KEYWORD_POSTFIX}$`), '');
34 changes: 29 additions & 5 deletions x-pack/plugins/transform/public/app/hooks/use_index_data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ import {
isEsSearchResponse,
isFieldHistogramsResponseSchema,
} from '../../../common/api_schemas/type_guards';
import {
hasKeywordDuplicate,
isKeywordDuplicate,
removeKeywordPostfix,
} from '../../../common/utils/field_utils';
import type { EsSorting, UseIndexDataReturnType } from '../../shared_imports';

import { getErrorMessage } from '../../../common/utils/errors';
Expand Down Expand Up @@ -209,14 +214,25 @@ export const useIndexData = (
};

const fetchColumnChartsData = async function () {
const allIndexPatternFieldNames = new Set(indexPattern.fields.map((f) => f.name));
const columnChartsData = await api.getHistogramsForFields(
indexPattern.title,
columns
.filter((cT) => dataGrid.visibleColumns.includes(cT.id))
.map((cT) => ({
fieldName: cT.id,
type: getFieldType(cT.schema),
})),
.map((cT) => {
// If a column field name has a corresponding keyword field,
// fetch the keyword field instead to be able to do aggregations.
const fieldName = cT.id;
return hasKeywordDuplicate(fieldName, allIndexPatternFieldNames)
? {
fieldName: `${fieldName}.keyword`,
type: getFieldType(undefined),
}
: {
fieldName,
type: getFieldType(cT.schema),
};
}),
isDefaultQuery(query) ? matchAllQuery : query,
combinedRuntimeMappings
);
Expand All @@ -226,7 +242,15 @@ export const useIndexData = (
return;
}

setColumnCharts(columnChartsData);
setColumnCharts(
// revert field names with `.keyword` used to do aggregations to their original column name
columnChartsData.map((d) => ({
...d,
...(isKeywordDuplicate(d.id, allIndexPatternFieldNames)
? { id: removeKeywordPostfix(d.id) }
: {}),
}))
);
};

useEffect(() => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ import {
} from '../../../../../../../../../../src/plugins/data/public';

import { getNestedProperty } from '../../../../../../../common/utils/object_utils';
import { removeKeywordPostfix } from '../../../../../../../common/utils/field_utils';

import { isRuntimeMappings } from '../../../../../../../common/shared_imports';

import {
Expand Down Expand Up @@ -93,41 +95,44 @@ export function getPivotDropdownOptions(

const combinedFields = [...indexPatternFields, ...runtimeFields].sort(sortByLabel);
combinedFields.forEach((field) => {
const rawFieldName = field.name;
const displayFieldName = removeKeywordPostfix(rawFieldName);

// Group by
const availableGroupByAggs: [] = getNestedProperty(pivotGroupByFieldSupport, field.type);

if (availableGroupByAggs !== undefined) {
availableGroupByAggs.forEach((groupByAgg) => {
// Aggregation name for the group-by is the plain field name. Illegal characters will be removed.
const aggName = field.name.replace(illegalEsAggNameChars, '').trim();
const aggName = displayFieldName.replace(illegalEsAggNameChars, '').trim();
// Option name in the dropdown for the group-by is in the form of `sum(fieldname)`.
const dropDownName = `${groupByAgg}(${field.name})`;
const dropDownName = `${groupByAgg}(${displayFieldName})`;
const groupByOption: DropDownLabel = { label: dropDownName };
groupByOptions.push(groupByOption);
groupByOptionsData[dropDownName] = getDefaultGroupByConfig(
aggName,
dropDownName,
field.name,
rawFieldName,
groupByAgg
);
});
}

// Aggregations
const aggOption: DropDownOption = { label: field.name, options: [] };
const aggOption: DropDownOption = { label: displayFieldName, options: [] };
const availableAggs: [] = getNestedProperty(pivotAggsFieldSupport, field.type);

if (availableAggs !== undefined) {
availableAggs.forEach((agg) => {
// Aggregation name is formatted like `fieldname.sum`. Illegal characters will be removed.
const aggName = `${field.name.replace(illegalEsAggNameChars, '').trim()}.${agg}`;
const aggName = `${displayFieldName.replace(illegalEsAggNameChars, '').trim()}.${agg}`;
// Option name in the dropdown for the aggregation is in the form of `sum(fieldname)`.
const dropDownName = `${agg}(${field.name})`;
const dropDownName = `${agg}(${displayFieldName})`;
aggOption.options.push({ label: dropDownName });
aggOptionsData[dropDownName] = getDefaultAggregationConfig(
aggName,
dropDownName,
field.name,
rawFieldName,
agg
);
});
Expand Down
5 changes: 2 additions & 3 deletions x-pack/plugins/transform/server/routes/api/transforms.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ import { registerTransformsAuditMessagesRoutes } from './transforms_audit_messag
import { registerTransformNodesRoutes } from './transforms_nodes';
import { IIndexPattern } from '../../../../../../src/plugins/data/common/index_patterns';
import { isLatestTransform } from '../../../common/types/transform';
import { isKeywordDuplicate } from '../../../common/utils/field_utils';

enum TRANSFORM_ACTIONS {
STOP = 'stop',
Expand Down Expand Up @@ -562,9 +563,7 @@ const previewTransformHandler: RequestHandler<
).reduce((acc, [fieldName, fieldCaps]) => {
const fieldDefinition = Object.values(fieldCaps)[0];
const isMetaField = fieldDefinition.type.startsWith('_') || fieldName === '_doc_count';
const isKeywordDuplicate =
fieldName.endsWith('.keyword') && fieldNamesSet.has(fieldName.split('.keyword')[0]);
if (isMetaField || isKeywordDuplicate) {
if (isMetaField || isKeywordDuplicate(fieldName, fieldNamesSet)) {
return acc;
}
acc[fieldName] = { ...fieldDefinition };
Expand Down
4 changes: 2 additions & 2 deletions x-pack/test/accessibility/apps/transform.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ export default function ({ getService }: FtrProviderContext) {

const pivotGroupByEntries = [
{
identifier: 'terms(category.keyword)',
label: 'category.keyword',
identifier: 'terms(category)',
label: 'category',
},
{
identifier: 'date_histogram(order_date)',
Expand Down
2 changes: 1 addition & 1 deletion x-pack/test/functional/apps/transform/cloning.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ function getTransformConfig(): TransformPivotConfig {
aggregations: { 'products.base_price.avg': { avg: { field: 'products.base_price' } } },
},
description:
'ecommerce batch transform with avg(products.base_price) grouped by terms(category.keyword)',
'ecommerce batch transform with avg(products.base_price) grouped by terms(category)',
frequency: '3s',
settings: {
max_page_search_size: 250,
Expand Down
56 changes: 47 additions & 9 deletions x-pack/test/functional/apps/transform/creation_index_pattern.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import {
} from './index';

export default function ({ getService }: FtrProviderContext) {
const canvasElement = getService('canvasElement');
const esArchiver = getService('esArchiver');
const transform = getService('transform');

Expand All @@ -40,8 +41,8 @@ export default function ({ getService }: FtrProviderContext) {
source: 'ft_ecommerce',
groupByEntries: [
{
identifier: 'terms(category.keyword)',
label: 'category.keyword',
identifier: 'terms(category)',
label: 'category',
} as GroupByEntry,
{
identifier: 'date_histogram(order_date)',
Expand Down Expand Up @@ -85,16 +86,16 @@ export default function ({ getService }: FtrProviderContext) {
],
transformId: `ec_1_${Date.now()}`,
transformDescription:
'ecommerce batch transform with groups terms(category.keyword) + date_histogram(order_date) 1m and aggregation avg(products.base_price)',
'ecommerce batch transform with groups terms(category) + date_histogram(order_date) 1m and aggregation avg(products.base_price)',
get destinationIndex(): string {
return `user-${this.transformId}`;
},
discoverAdjustSuperDatePicker: true,
expected: {
pivotAdvancedEditorValueArr: ['{', ' "group_by": {', ' "category.keyword": {'],
pivotAdvancedEditorValueArr: ['{', ' "group_by": {', ' "category": {'],
pivotAdvancedEditorValue: {
group_by: {
'category.keyword': {
category: {
terms: {
field: 'category.keyword',
},
Expand Down Expand Up @@ -156,7 +157,15 @@ export default function ({ getService }: FtrProviderContext) {
rows: 5,
},
histogramCharts: [
{ chartAvailable: false, id: 'category', legend: 'Chart not supported.' },
{
chartAvailable: true,
id: 'category',
legend: '6 categories',
colorStats: [
{ color: '#000000', percentage: 45 },
{ color: '#54B399', percentage: 55 },
],
},
{
chartAvailable: true,
id: 'currency',
Expand All @@ -166,8 +175,24 @@ export default function ({ getService }: FtrProviderContext) {
{ color: '#54B399', percentage: 90 },
],
},
{ chartAvailable: false, id: 'customer_first_name', legend: 'Chart not supported.' },
{ chartAvailable: false, id: 'customer_full_name', legend: 'Chart not supported.' },
{
chartAvailable: true,
id: 'customer_first_name',
legend: 'top 20 of 46 categories',
colorStats: [
{ color: '#000000', percentage: 60 },
{ color: '#54B399', percentage: 35 },
],
},
{
chartAvailable: true,
id: 'customer_full_name',
legend: 'top 20 of 3321 categories',
colorStats: [
{ color: '#000000', percentage: 25 },
{ color: '#54B399', percentage: 67 },
],
},
{
chartAvailable: true,
id: 'customer_gender',
Expand All @@ -186,7 +211,15 @@ export default function ({ getService }: FtrProviderContext) {
{ color: '#000000', percentage: 60 },
],
},
{ chartAvailable: false, id: 'customer_last_name', legend: 'Chart not supported.' },
{
chartAvailable: true,
id: 'customer_last_name',
legend: 'top 20 of 183 categories',
colorStats: [
{ color: '#000000', percentage: 25 },
{ color: '#54B399', percentage: 70 },
],
},
{
chartAvailable: true,
id: 'customer_phone',
Expand Down Expand Up @@ -403,6 +436,9 @@ export default function ({ getService }: FtrProviderContext) {
await transform.wizard.assertAdvancedQueryEditorSwitchExists();
await transform.wizard.assertAdvancedQueryEditorSwitchCheckState(false);

// Disable anti-aliasing to stabilize canvas image rendering assertions
await canvasElement.disableAntiAliasing();

await transform.testExecution.logTestStep('enables the index preview histogram charts');
await transform.wizard.enableIndexPreviewHistogramCharts(true);

Expand All @@ -415,6 +451,8 @@ export default function ({ getService }: FtrProviderContext) {
);
}

await canvasElement.resetAntiAliasing();

if (isPivotTransformTestData(testData)) {
await transform.testExecution.logTestStep('adds the group by entries');
for (const [index, entry] of testData.groupByEntries.entries()) {
Expand Down
Loading

0 comments on commit 87824da

Please sign in to comment.