diff --git a/x-pack/plugins/ml/public/application/components/scatterplot_matrix/scatterplot_matrix.tsx b/x-pack/plugins/ml/public/application/components/scatterplot_matrix/scatterplot_matrix.tsx index a4f68c84ba81f1..740d127e1b08d7 100644 --- a/x-pack/plugins/ml/public/application/components/scatterplot_matrix/scatterplot_matrix.tsx +++ b/x-pack/plugins/ml/public/application/components/scatterplot_matrix/scatterplot_matrix.tsx @@ -8,17 +8,21 @@ import React, { useMemo, useEffect, useState, FC } from 'react'; import { + EuiCallOut, EuiComboBox, EuiComboBoxOptionOption, EuiFlexGroup, EuiFlexItem, EuiFormRow, EuiSelect, + EuiSpacer, EuiSwitch, } from '@elastic/eui'; import { i18n } from '@kbn/i18n'; +import { extractErrorMessage } from '../../../../common'; +import { stringHash } from '../../../../common/util/string_utils'; import type { SearchResponse7 } from '../../../../common/types/es_client'; import type { ResultsSearchQuery } from '../../data_frame_analytics/common/analytics'; @@ -95,7 +99,9 @@ export const ScatterplotMatrix: FC = ({ const [isLoading, setIsLoading] = useState(false); // contains the fetched documents and columns to be passed on to the Vega spec. - const [splom, setSplom] = useState<{ items: any[]; columns: string[] } | undefined>(); + const [splom, setSplom] = useState< + { items: any[]; columns: string[]; messages: string[] } | undefined + >(); // formats the array of field names for EuiComboBox const fieldOptions = useMemo( @@ -138,22 +144,26 @@ export const ScatterplotMatrix: FC = ({ async function fetchSplom(options: { didCancel: boolean }) { setIsLoading(true); + const messages: string[] = []; + try { + const outlierScoreField = `${resultsField}.${OUTLIER_SCORE_FIELD}`; + const includeOutlierScoreField = resultsField !== undefined; + const queryFields = [ ...fields, ...(color !== undefined ? [color] : []), - ...(legendType !== undefined ? [] : [`${resultsField}.${OUTLIER_SCORE_FIELD}`]), + ...(includeOutlierScoreField ? [outlierScoreField] : []), ]; - const queryFallback = searchQuery !== undefined ? searchQuery : { match_all: {} }; const query = randomizeQuery ? { function_score: { - query: queryFallback, + query: searchQuery, random_score: { seed: 10, field: '_seq_no' }, }, } - : queryFallback; + : searchQuery; const resp: SearchResponse7 = await esSearch({ index, @@ -167,18 +177,43 @@ export const ScatterplotMatrix: FC = ({ }); if (!options.didCancel) { - const items = resp.hits.hits.map((d) => - getProcessedFields(d.fields, (key: string) => - key.startsWith(`${resultsField}.feature_importance`) + const items = resp.hits.hits + .map((d) => + getProcessedFields(d.fields, (key: string) => + key.startsWith(`${resultsField}.feature_importance`) + ) ) - ); - - setSplom({ columns: fields, items }); + .filter((d) => !Object.keys(d).some((field) => Array.isArray(d[field]))); + + const originalDocsCount = resp.hits.hits.length; + const filteredDocsCount = originalDocsCount - items.length; + + if (originalDocsCount === filteredDocsCount) { + messages.push( + i18n.translate('xpack.ml.splom.allDocsFilteredWarningMessage', { + defaultMessage: + 'All fetched documents included fields with arrays of values and cannot be visualized.', + }) + ); + } else if (resp.hits.hits.length !== items.length) { + messages.push( + i18n.translate('xpack.ml.splom.arrayFieldsWarningMessage', { + defaultMessage: + '{filteredDocsCount} out of {originalDocsCount} fetched documents include fields with arrays of values and cannot be visualized.', + values: { + originalDocsCount, + filteredDocsCount, + }, + }) + ); + } + + setSplom({ columns: fields, items, messages }); setIsLoading(false); } } catch (e) { - // TODO error handling setIsLoading(false); + setSplom({ columns: [], items: [], messages: [extractErrorMessage(e)] }); } } @@ -197,16 +232,8 @@ export const ScatterplotMatrix: FC = ({ const { items, columns } = splom; - const values = - resultsField !== undefined - ? items - : items.map((d) => { - d[`${resultsField}.${OUTLIER_SCORE_FIELD}`] = 0; - return d; - }); - return getScatterplotMatrixVegaLiteSpec( - values, + items, columns, euiTheme, resultsField, @@ -301,7 +328,21 @@ export const ScatterplotMatrix: FC = ({ )} - + {splom.messages.length > 0 && ( + <> + + + {splom.messages.map((m) => ( + + {m} +
+
+ ))} +
+ + )} + + {splom.items.length > 0 && } )} diff --git a/x-pack/plugins/ml/public/application/components/scatterplot_matrix/scatterplot_matrix_vega_lite_spec.test.ts b/x-pack/plugins/ml/public/application/components/scatterplot_matrix/scatterplot_matrix_vega_lite_spec.test.ts index c963b7509139b8..1ce03119c161d7 100644 --- a/x-pack/plugins/ml/public/application/components/scatterplot_matrix/scatterplot_matrix_vega_lite_spec.test.ts +++ b/x-pack/plugins/ml/public/application/components/scatterplot_matrix/scatterplot_matrix_vega_lite_spec.test.ts @@ -22,13 +22,13 @@ import { describe('getColorSpec()', () => { it('should return the default color for non-outlier specs', () => { - const colorSpec = getColorSpec(euiThemeLight, false); + const colorSpec = getColorSpec(euiThemeLight); expect(colorSpec).toEqual({ value: DEFAULT_COLOR }); }); it('should return a conditional spec for outliers', () => { - const colorSpec = getColorSpec(euiThemeLight, true); + const colorSpec = getColorSpec(euiThemeLight, 'outlier_score'); expect(colorSpec).toEqual({ condition: { @@ -42,7 +42,7 @@ describe('getColorSpec()', () => { it('should return a field based spec for non-outlier specs with legendType supplied', () => { const colorName = 'the-color-field'; - const colorSpec = getColorSpec(euiThemeLight, false, colorName, LEGEND_TYPES.NOMINAL); + const colorSpec = getColorSpec(euiThemeLight, undefined, colorName, LEGEND_TYPES.NOMINAL); expect(colorSpec).toEqual({ field: colorName, @@ -67,10 +67,6 @@ describe('getScatterplotMatrixVegaLiteSpec()', () => { column: ['x', 'y'], row: ['y', 'x'], }); - expect(vegaLiteSpec.spec.transform).toEqual([ - { as: 'x', calculate: "datum['x']" }, - { as: 'y', calculate: "datum['y']" }, - ]); expect(vegaLiteSpec.spec.data.values).toEqual(data); expect(vegaLiteSpec.spec.mark).toEqual({ opacity: 0.75, @@ -96,14 +92,6 @@ describe('getScatterplotMatrixVegaLiteSpec()', () => { column: ['x', 'y'], row: ['y', 'x'], }); - expect(vegaLiteSpec.spec.transform).toEqual([ - { as: 'x', calculate: "datum['x']" }, - { as: 'y', calculate: "datum['y']" }, - { - as: 'outlier_score', - calculate: "datum['ml.outlier_score']", - }, - ]); expect(vegaLiteSpec.spec.data.values).toEqual(data); expect(vegaLiteSpec.spec.mark).toEqual({ opacity: 0.75, @@ -112,7 +100,8 @@ describe('getScatterplotMatrixVegaLiteSpec()', () => { }); expect(vegaLiteSpec.spec.encoding.color).toEqual({ condition: { - test: "(datum['outlier_score'] >= mlOutlierScoreThreshold.cutoff)", + // Note the alternative UTF-8 dot character + test: "(datum['ml․outlier_score'] >= mlOutlierScoreThreshold.cutoff)", value: COLOR_OUTLIER, }, value: euiThemeLight.euiColorMediumShade, @@ -121,7 +110,8 @@ describe('getScatterplotMatrixVegaLiteSpec()', () => { { field: 'x', type: 'quantitative' }, { field: 'y', type: 'quantitative' }, { - field: 'outlier_score', + // Note the alternative UTF-8 dot character + field: 'ml․outlier_score', format: '.3f', type: 'quantitative', }, @@ -147,10 +137,6 @@ describe('getScatterplotMatrixVegaLiteSpec()', () => { column: ['x', 'y'], row: ['y', 'x'], }); - expect(vegaLiteSpec.spec.transform).toEqual([ - { as: 'x', calculate: "datum['x']" }, - { as: 'y', calculate: "datum['y']" }, - ]); expect(vegaLiteSpec.spec.data.values).toEqual(data); expect(vegaLiteSpec.spec.mark).toEqual({ opacity: 0.75, diff --git a/x-pack/plugins/ml/public/application/components/scatterplot_matrix/scatterplot_matrix_vega_lite_spec.ts b/x-pack/plugins/ml/public/application/components/scatterplot_matrix/scatterplot_matrix_vega_lite_spec.ts index f99aa7c5c3de86..9d8e3b6546327e 100644 --- a/x-pack/plugins/ml/public/application/components/scatterplot_matrix/scatterplot_matrix_vega_lite_spec.ts +++ b/x-pack/plugins/ml/public/application/components/scatterplot_matrix/scatterplot_matrix_vega_lite_spec.ts @@ -28,17 +28,17 @@ export const COLOR_RANGE_QUANTITATIVE = euiPalettePositive(5); export const getColorSpec = ( euiTheme: typeof euiThemeLight, - outliers = true, + escapedOutlierScoreField?: string, color?: string, legendType?: LegendType ) => { // For outlier detection result pages coloring is done based on a threshold. // This returns a Vega spec using a conditional to return the color. - if (outliers) { + if (typeof escapedOutlierScoreField === 'string') { return { condition: { value: COLOR_OUTLIER, - test: `(datum['${OUTLIER_SCORE_FIELD}'] >= mlOutlierScoreThreshold.cutoff)`, + test: `(datum['${escapedOutlierScoreField}'] >= mlOutlierScoreThreshold.cutoff)`, }, value: euiTheme.euiColorMediumShade, }; @@ -48,7 +48,7 @@ export const getColorSpec = ( // this returns either a continuous or categorical color spec. if (color !== undefined && legendType !== undefined) { return { - field: color, + field: getEscapedVegaFieldName(color), type: legendType, scale: { range: legendType === LEGEND_TYPES.NOMINAL ? COLOR_RANGE_NOMINAL : COLOR_RANGE_QUANTITATIVE, @@ -59,8 +59,32 @@ export const getColorSpec = ( return { value: DEFAULT_COLOR }; }; +// Replace dots in field names with an alternative UTF-8 character +// since VEGA treats dots in field names as nested values and escaping +// in columns/rows for repeated charts isn't working as expected. +function getEscapedVegaFieldName(fieldName: string) { + return fieldName.replace(/\./g, '․'); +} + +// Replace dots for all keys of all data items with an alternative UTF-8 character +// since VEGA treats dots in field names as nested values and escaping +// in columns/rows for repeated charts isn't working as expected. +function getEscapedVegaValues(values: VegaValue[]): VegaValue[] { + return values.map((d) => + Object.keys(d).reduce( + (p, c) => ({ + ...p, + [getEscapedVegaFieldName(c)]: d[c], + }), + {} as VegaValue + ) + ); +} + +type VegaValue = Record; + export const getScatterplotMatrixVegaLiteSpec = ( - values: any[], + values: VegaValue[], columns: string[], euiTheme: typeof euiThemeLight, resultsField?: string, @@ -68,20 +92,20 @@ export const getScatterplotMatrixVegaLiteSpec = ( legendType?: LegendType, dynamicSize?: boolean ): TopLevelSpec => { + const vegaValues = getEscapedVegaValues(values); + const vegaColumns = columns.map(getEscapedVegaFieldName); const outliers = resultsField !== undefined; - const transform = columns.map((column) => ({ - calculate: `datum['${column}']`, - as: column, - })); - - if (resultsField !== undefined) { - transform.push({ - calculate: `datum['${resultsField}.${OUTLIER_SCORE_FIELD}']`, - as: OUTLIER_SCORE_FIELD, - }); - } - const colorSpec = getColorSpec(euiTheme, outliers, color, legendType); + // Use an alternative UTF-8 character for the dot + // since VEGA treats dots in field names as nested values. + const escapedOutlierScoreField = `${resultsField}․${OUTLIER_SCORE_FIELD}`; + + const colorSpec = getColorSpec( + euiTheme, + resultsField && escapedOutlierScoreField, + color, + legendType + ); return { $schema: 'https://vega.github.io/schema/vega-lite/v4.17.0.json', @@ -102,11 +126,11 @@ export const getScatterplotMatrixVegaLiteSpec = ( }, }, repeat: { - column: columns, - row: columns.slice().reverse(), + column: vegaColumns, + row: vegaColumns.slice().reverse(), }, spec: { - data: { values }, + data: { values: [...vegaValues] }, mark: { ...(outliers && dynamicSize ? { @@ -125,7 +149,7 @@ export const getScatterplotMatrixVegaLiteSpec = ( opacity: { condition: { value: 1, - test: `(datum['${OUTLIER_SCORE_FIELD}'] >= mlOutlierScoreThreshold.cutoff)`, + test: `(datum['${escapedOutlierScoreField}'] >= mlOutlierScoreThreshold.cutoff)`, }, value: 0.5, }, @@ -133,19 +157,19 @@ export const getScatterplotMatrixVegaLiteSpec = ( : {}), ...(outliers ? { - order: { field: OUTLIER_SCORE_FIELD }, + order: { field: escapedOutlierScoreField }, size: { ...(!dynamicSize ? { condition: { value: 40, - test: `(datum['${OUTLIER_SCORE_FIELD}'] >= mlOutlierScoreThreshold.cutoff)`, + test: `(datum['${escapedOutlierScoreField}'] >= mlOutlierScoreThreshold.cutoff)`, }, value: 8, } : { type: LEGEND_TYPES.QUANTITATIVE, - field: OUTLIER_SCORE_FIELD, + field: escapedOutlierScoreField, scale: { type: 'linear', range: [8, 200], @@ -166,10 +190,12 @@ export const getScatterplotMatrixVegaLiteSpec = ( scale: { zero: false }, }, tooltip: [ - ...(color !== undefined ? [{ type: colorSpec.type, field: color }] : []), - ...columns.map((d) => ({ type: LEGEND_TYPES.QUANTITATIVE, field: d })), + ...(color !== undefined + ? [{ type: colorSpec.type, field: getEscapedVegaFieldName(color) }] + : []), + ...vegaColumns.map((d) => ({ type: LEGEND_TYPES.QUANTITATIVE, field: d })), ...(outliers - ? [{ type: LEGEND_TYPES.QUANTITATIVE, field: OUTLIER_SCORE_FIELD, format: '.3f' }] + ? [{ type: LEGEND_TYPES.QUANTITATIVE, field: escapedOutlierScoreField, format: '.3f' }] : []), ], }, @@ -193,7 +219,6 @@ export const getScatterplotMatrixVegaLiteSpec = ( }, } : {}), - transform, width: SCATTERPLOT_SIZE, height: SCATTERPLOT_SIZE, }, diff --git a/x-pack/plugins/ml/public/application/data_frame_analytics/pages/analytics_creation/components/configuration_step/configuration_step_form.tsx b/x-pack/plugins/ml/public/application/data_frame_analytics/pages/analytics_creation/components/configuration_step/configuration_step_form.tsx index 0432094c30c500..390204888b5007 100644 --- a/x-pack/plugins/ml/public/application/data_frame_analytics/pages/analytics_creation/components/configuration_step/configuration_step_form.tsx +++ b/x-pack/plugins/ml/public/application/data_frame_analytics/pages/analytics_creation/components/configuration_step/configuration_step_form.tsx @@ -327,6 +327,17 @@ export const ConfigurationStepForm: FC = ({ [currentIndexPattern.fields] ); + // Show the Scatterplot Matrix only if + // - There's more than one suitable field available + // - The job type is outlier detection, or + // - The job type is regression or classification and the dependent variable has been set + const showScatterplotMatrix = + (jobType === ANALYSIS_CONFIG_TYPE.OUTLIER_DETECTION || + ((jobType === ANALYSIS_CONFIG_TYPE.REGRESSION || + jobType === ANALYSIS_CONFIG_TYPE.CLASSIFICATION) && + !dependentVariableEmpty)) && + scatterplotFieldOptions.length > 1; + return ( @@ -499,7 +510,7 @@ export const ConfigurationStepForm: FC = ({ loadingItems={loadingFieldOptions} setFormState={setFormState} /> - {scatterplotFieldOptions.length > 1 && ( + {showScatterplotMatrix && ( <>