Skip to content

Commit

Permalink
[ML] Data Frame Analytics: Improved error handling for scatterplot ma…
Browse files Browse the repository at this point in the history
…trix. (elastic#91993)

Improves error handling for the scatterplot matrix. Documents with fields with arrays of values cannot be visualized in the scatterplot matrix. This adds a warning callout when the fetched data includes such documents.
  • Loading branch information
walterra authored and kibanamachine committed Feb 22, 2021
1 parent ec92c8e commit ad903db
Show file tree
Hide file tree
Showing 4 changed files with 135 additions and 72 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,21 @@
import React, { useMemo, useEffect, useState, FC } from 'react';

import {
EuiCallOut,
EuiComboBox,
EuiComboBoxOptionOption,
EuiFlexGroup,
EuiFlexItem,
EuiFormRow,
EuiSelect,
EuiSpacer,
EuiSwitch,
} from '@elastic/eui';

import { i18n } from '@kbn/i18n';

import { extractErrorMessage } from '../../../../common';
import { stringHash } from '../../../../common/util/string_utils';
import type { SearchResponse7 } from '../../../../common/types/es_client';
import type { ResultsSearchQuery } from '../../data_frame_analytics/common/analytics';

Expand Down Expand Up @@ -95,7 +99,9 @@ export const ScatterplotMatrix: FC<ScatterplotMatrixProps> = ({
const [isLoading, setIsLoading] = useState<boolean>(false);

// contains the fetched documents and columns to be passed on to the Vega spec.
const [splom, setSplom] = useState<{ items: any[]; columns: string[] } | undefined>();
const [splom, setSplom] = useState<
{ items: any[]; columns: string[]; messages: string[] } | undefined
>();

// formats the array of field names for EuiComboBox
const fieldOptions = useMemo(
Expand Down Expand Up @@ -138,22 +144,26 @@ export const ScatterplotMatrix: FC<ScatterplotMatrixProps> = ({

async function fetchSplom(options: { didCancel: boolean }) {
setIsLoading(true);
const messages: string[] = [];

try {
const outlierScoreField = `${resultsField}.${OUTLIER_SCORE_FIELD}`;
const includeOutlierScoreField = resultsField !== undefined;

const queryFields = [
...fields,
...(color !== undefined ? [color] : []),
...(legendType !== undefined ? [] : [`${resultsField}.${OUTLIER_SCORE_FIELD}`]),
...(includeOutlierScoreField ? [outlierScoreField] : []),
];

const queryFallback = searchQuery !== undefined ? searchQuery : { match_all: {} };
const query = randomizeQuery
? {
function_score: {
query: queryFallback,
query: searchQuery,
random_score: { seed: 10, field: '_seq_no' },
},
}
: queryFallback;
: searchQuery;

const resp: SearchResponse7 = await esSearch({
index,
Expand All @@ -167,18 +177,43 @@ export const ScatterplotMatrix: FC<ScatterplotMatrixProps> = ({
});

if (!options.didCancel) {
const items = resp.hits.hits.map((d) =>
getProcessedFields(d.fields, (key: string) =>
key.startsWith(`${resultsField}.feature_importance`)
const items = resp.hits.hits
.map((d) =>
getProcessedFields(d.fields, (key: string) =>
key.startsWith(`${resultsField}.feature_importance`)
)
)
);

setSplom({ columns: fields, items });
.filter((d) => !Object.keys(d).some((field) => Array.isArray(d[field])));

const originalDocsCount = resp.hits.hits.length;
const filteredDocsCount = originalDocsCount - items.length;

if (originalDocsCount === filteredDocsCount) {
messages.push(
i18n.translate('xpack.ml.splom.allDocsFilteredWarningMessage', {
defaultMessage:
'All fetched documents included fields with arrays of values and cannot be visualized.',
})
);
} else if (resp.hits.hits.length !== items.length) {
messages.push(
i18n.translate('xpack.ml.splom.arrayFieldsWarningMessage', {
defaultMessage:
'{filteredDocsCount} out of {originalDocsCount} fetched documents include fields with arrays of values and cannot be visualized.',
values: {
originalDocsCount,
filteredDocsCount,
},
})
);
}

setSplom({ columns: fields, items, messages });
setIsLoading(false);
}
} catch (e) {
// TODO error handling
setIsLoading(false);
setSplom({ columns: [], items: [], messages: [extractErrorMessage(e)] });
}
}

Expand All @@ -197,16 +232,8 @@ export const ScatterplotMatrix: FC<ScatterplotMatrixProps> = ({

const { items, columns } = splom;

const values =
resultsField !== undefined
? items
: items.map((d) => {
d[`${resultsField}.${OUTLIER_SCORE_FIELD}`] = 0;
return d;
});

return getScatterplotMatrixVegaLiteSpec(
values,
items,
columns,
euiTheme,
resultsField,
Expand Down Expand Up @@ -301,7 +328,21 @@ export const ScatterplotMatrix: FC<ScatterplotMatrixProps> = ({
)}
</EuiFlexGroup>

<VegaChart vegaSpec={vegaSpec} />
{splom.messages.length > 0 && (
<>
<EuiSpacer size="m" />
<EuiCallOut color="warning">
{splom.messages.map((m) => (
<span key={stringHash(m)}>
{m}
<br />
</span>
))}
</EuiCallOut>
</>
)}

{splom.items.length > 0 && <VegaChart vegaSpec={vegaSpec} />}
</div>
)}
</>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,13 @@ import {

describe('getColorSpec()', () => {
it('should return the default color for non-outlier specs', () => {
const colorSpec = getColorSpec(euiThemeLight, false);
const colorSpec = getColorSpec(euiThemeLight);

expect(colorSpec).toEqual({ value: DEFAULT_COLOR });
});

it('should return a conditional spec for outliers', () => {
const colorSpec = getColorSpec(euiThemeLight, true);
const colorSpec = getColorSpec(euiThemeLight, 'outlier_score');

expect(colorSpec).toEqual({
condition: {
Expand All @@ -42,7 +42,7 @@ describe('getColorSpec()', () => {
it('should return a field based spec for non-outlier specs with legendType supplied', () => {
const colorName = 'the-color-field';

const colorSpec = getColorSpec(euiThemeLight, false, colorName, LEGEND_TYPES.NOMINAL);
const colorSpec = getColorSpec(euiThemeLight, undefined, colorName, LEGEND_TYPES.NOMINAL);

expect(colorSpec).toEqual({
field: colorName,
Expand All @@ -67,10 +67,6 @@ describe('getScatterplotMatrixVegaLiteSpec()', () => {
column: ['x', 'y'],
row: ['y', 'x'],
});
expect(vegaLiteSpec.spec.transform).toEqual([
{ as: 'x', calculate: "datum['x']" },
{ as: 'y', calculate: "datum['y']" },
]);
expect(vegaLiteSpec.spec.data.values).toEqual(data);
expect(vegaLiteSpec.spec.mark).toEqual({
opacity: 0.75,
Expand All @@ -96,14 +92,6 @@ describe('getScatterplotMatrixVegaLiteSpec()', () => {
column: ['x', 'y'],
row: ['y', 'x'],
});
expect(vegaLiteSpec.spec.transform).toEqual([
{ as: 'x', calculate: "datum['x']" },
{ as: 'y', calculate: "datum['y']" },
{
as: 'outlier_score',
calculate: "datum['ml.outlier_score']",
},
]);
expect(vegaLiteSpec.spec.data.values).toEqual(data);
expect(vegaLiteSpec.spec.mark).toEqual({
opacity: 0.75,
Expand All @@ -112,7 +100,8 @@ describe('getScatterplotMatrixVegaLiteSpec()', () => {
});
expect(vegaLiteSpec.spec.encoding.color).toEqual({
condition: {
test: "(datum['outlier_score'] >= mlOutlierScoreThreshold.cutoff)",
// Note the alternative UTF-8 dot character
test: "(datum['ml․outlier_score'] >= mlOutlierScoreThreshold.cutoff)",
value: COLOR_OUTLIER,
},
value: euiThemeLight.euiColorMediumShade,
Expand All @@ -121,7 +110,8 @@ describe('getScatterplotMatrixVegaLiteSpec()', () => {
{ field: 'x', type: 'quantitative' },
{ field: 'y', type: 'quantitative' },
{
field: 'outlier_score',
// Note the alternative UTF-8 dot character
field: 'ml․outlier_score',
format: '.3f',
type: 'quantitative',
},
Expand All @@ -147,10 +137,6 @@ describe('getScatterplotMatrixVegaLiteSpec()', () => {
column: ['x', 'y'],
row: ['y', 'x'],
});
expect(vegaLiteSpec.spec.transform).toEqual([
{ as: 'x', calculate: "datum['x']" },
{ as: 'y', calculate: "datum['y']" },
]);
expect(vegaLiteSpec.spec.data.values).toEqual(data);
expect(vegaLiteSpec.spec.mark).toEqual({
opacity: 0.75,
Expand Down
Loading

0 comments on commit ad903db

Please sign in to comment.