From 0f56482b9243a847187ed28804a3ace7c9350dfe Mon Sep 17 00:00:00 2001 From: goodov <5928869+goodov@users.noreply.github.com> Date: Tue, 20 Aug 2024 13:59:13 +0700 Subject: [PATCH] Add more study validation from processed_study.cc. (#1183) Implement additional validation that exists in [processed_study.cc](https://source.chromium.org/chromium/chromium/src/+/main:components/variations/processed_study.cc;l=38;drc=f9d386e085b36b0118ab74f9a3d6dc18934de430). Important ones: * study, experiment, feature name validation * default_experiment_name validation + experiment existence validation * experiment param name conflict and emptiness Other was added just to have the full coverage (forcing_flag, google_web_experiment_id). --- src/seed_tools/README.md | 14 + src/seed_tools/commands/validate_seed.test.ts | 55 +++ src/seed_tools/commands/validate_seed.ts | 44 +++ src/seed_tools/seed_tools.ts | 2 + src/seed_tools/utils/study_validation.test.ts | 313 +++++++++++++++++- src/seed_tools/utils/study_validation.ts | 164 ++++++++- src/test/data/invalid_seed.bin | Bin 0 -> 124 bytes 7 files changed, 585 insertions(+), 7 deletions(-) create mode 100644 src/seed_tools/commands/validate_seed.test.ts create mode 100644 src/seed_tools/commands/validate_seed.ts create mode 100644 src/test/data/invalid_seed.bin diff --git a/src/seed_tools/README.md b/src/seed_tools/README.md index e08b87fb..340fd600 100644 --- a/src/seed_tools/README.md +++ b/src/seed_tools/README.md @@ -75,3 +75,17 @@ npm run seed_tools -- split_seed_json - ``: The path to the `seed.json` file to be split. - ``: The directory where the individual study files will be outputted. + +### `validate_seed` + +Validates a seed protobuf. + +##### Syntax + +```bash +npm run seed_tools -- validate_seed +``` + +##### Arguments + +- ``: The path to the binary-serialized `seed` protobuf. diff --git a/src/seed_tools/commands/validate_seed.test.ts b/src/seed_tools/commands/validate_seed.test.ts new file mode 100644 index 00000000..1514119d --- /dev/null +++ b/src/seed_tools/commands/validate_seed.test.ts @@ -0,0 +1,55 @@ +// Copyright (c) 2024 The Brave Authors. All rights reserved. +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +import * as fs_sync from 'fs'; +import * as path from 'path'; +import { wsPath } from '../../base/path_utils'; +import validate_seed from './validate_seed'; + +describe('validate_seed command', () => { + const testDataDir = wsPath('//src/test/data'); + + let errorMock: jest.SpyInstance; + let exitMock: jest.SpyInstance; + + beforeEach(async () => { + errorMock = jest.spyOn(console, 'error').mockImplementation(); + exitMock = jest.spyOn(process, 'exit').mockImplementation(); + }); + + afterEach(async () => { + jest.restoreAllMocks(); + }); + + describe('valid seeds', () => { + const validSeedsDir = path.join(testDataDir, 'valid_seeds'); + it.each(fs_sync.readdirSync(validSeedsDir))( + 'correctly validates %s', + async (testCase) => { + const testCaseDir = path.join(validSeedsDir, testCase); + const seedBin = path.join(testCaseDir, 'expected_seed.bin'); + + await validate_seed.parseAsync(['node', 'validate_seed', seedBin]); + + expect(errorMock).toHaveBeenCalledTimes(0); + expect(exitMock).toHaveBeenCalledWith(0); + }, + ); + }); + + test('invalid seed', async () => { + const seedBin = path.join(testDataDir, 'invalid_seed.bin'); + expect(fs_sync.existsSync(seedBin)).toBe(true); + + await validate_seed.parseAsync(['node', 'validate_seed', seedBin]); + + expect(errorMock).toHaveBeenCalledWith( + expect.stringContaining( + 'Total probability is not 100 for study AllowCertainClientHintsStudy', + ), + ); + expect(exitMock).toHaveBeenCalledWith(1); + }); +}); diff --git a/src/seed_tools/commands/validate_seed.ts b/src/seed_tools/commands/validate_seed.ts new file mode 100644 index 00000000..f8d28de8 --- /dev/null +++ b/src/seed_tools/commands/validate_seed.ts @@ -0,0 +1,44 @@ +// Copyright (c) 2024 The Brave Authors. All rights reserved. +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +import { Command } from '@commander-js/extra-typings'; +import { promises as fs } from 'fs'; +import { VariationsSeed } from '../../proto/generated/variations_seed'; +import * as seed_validation from '../utils/seed_validation'; +import * as study_validation from '../utils/study_validation'; + +export default new Command('validate_seed') + .description('Validates seed.bin') + .argument('', 'path to a seed protobuf') + .action(main); + +async function main(seedFilePath: string) { + const variationsSeed = VariationsSeed.fromBinary( + await fs.readFile(seedFilePath, { encoding: null }), + ); + const errors = []; + for (const study of variationsSeed.study) { + const filePath = `${study.name}.json`; + for (const error of study_validation.validateStudyReturnErrors( + study, + filePath, + )) { + errors.push(error); + } + } + + for (const error of seed_validation.validateSeedReturnErrors( + variationsSeed, + )) { + errors.push(error); + } + + console.log('Seed studies count:', variationsSeed.study.length); + if (errors.length > 0) { + console.error(`Seed validation errors:\n${errors.join('\n---\n')}`); + } + + process.exit(errors.length > 0 ? 1 : 0); +} diff --git a/src/seed_tools/seed_tools.ts b/src/seed_tools/seed_tools.ts index 252a053f..3a43331e 100644 --- a/src/seed_tools/seed_tools.ts +++ b/src/seed_tools/seed_tools.ts @@ -9,6 +9,7 @@ import check_study from './commands/check_study'; import compare_seeds from './commands/compare_seeds'; import create_seed from './commands/create_seed'; import split_seed_json from './commands/split_seed_json'; +import validate_seed from './commands/validate_seed'; program .name('seed_tools') @@ -17,4 +18,5 @@ program .addCommand(compare_seeds) .addCommand(create_seed) .addCommand(split_seed_json) + .addCommand(validate_seed) .parse(); diff --git a/src/seed_tools/utils/study_validation.test.ts b/src/seed_tools/utils/study_validation.test.ts index 27c021c7..c0c42205 100644 --- a/src/seed_tools/utils/study_validation.test.ts +++ b/src/seed_tools/utils/study_validation.test.ts @@ -56,20 +56,81 @@ describe('validateStudy', () => { }).toThrowError('Study name study1 does not match file name'); }); + test.each(['study_😀', 'study_,', 'study_<', 'study_*'])( + 'should throw an error if study name has invalid char %s', + (studyName) => { + const study = Study.fromJson({ + name: studyName, + experiment: [], + }); + + expect(() => { + study_validation.validateStudy(study, studyFilePath); + }).toThrowError(`Invalid study name: ${studyName}`); + }, + ); + + test('should throw an error if layer is set', () => { + const study = Study.fromJson({ + name: 'study', + layer: { + layer_id: 1, + layer_member_id: 1, + }, + }); + + expect(() => { + study_validation.validateStudy(study, studyFilePath); + }).toThrowError('Layers are currently not supported'); + }); + test('should throw an error if experiment name is not defined', () => { const study = Study.fromJson({ name: 'study', experiment: [ { name: '', - probabilityWeight: 50, + probabilityWeight: 100, }, ], }); expect(() => { study_validation.validateStudy(study, studyFilePath); - }).toThrowError('Experiment name is not defined for study study'); + }).toThrowError('Experiment name is not defined for study: study'); + }); + + test.each(['exp😀', 'exp<', 'exp*'])( + 'should throw an error if experiment name has invalid char %s', + (experimentName) => { + const study = Study.fromJson({ + name: 'study', + experiment: [ + { + name: experimentName, + probabilityWeight: 100, + }, + ], + }); + + expect(() => { + study_validation.validateStudy(study, studyFilePath); + }).toThrowError(`Invalid experiment name: ${experimentName}`); + }, + ); + + test('should not throw if experiment name has comma', () => { + const study = Study.fromJson({ + name: 'study', + experiment: [ + { + name: 'experiment1,', + probability_weight: 100, + }, + ], + }); + + study_validation.validateStudy(study, studyFilePath); }); test('should throw an error if duplicate experiment names are found', () => { @@ -96,6 +157,254 @@ describe('validateStudy', () => { }).toThrowError('Duplicate experiment name: experiment1'); }); + test('should throw an error if feature name is not defined', () => { + const study = Study.fromJson({ + name: 'study', + experiment: [ + { + name: 'experiment', + feature_association: { + enable_feature: [''], + }, + probabilityWeight: 100, + }, + ], + }); + + expect(() => { + study_validation.validateStudy(study, studyFilePath); + }).toThrowError('Feature name is not defined for experiment: experiment'); + }); + + test.each(['feature😀', 'feature,', 'feature<', 'feature*'])( + 'should throw an error if feature name has invalid char %s', + (featureName) => { + const featureAssociations = [ + { enable_feature: [featureName] }, + { disable_feature: [featureName] }, + { forcing_feature_on: featureName }, + { forcing_feature_off: featureName }, + ]; + for (const featureAssociation of featureAssociations) { + const study = Study.fromJson({ + name: 'study', + experiment: [ + { + name: 'experiment', + probabilityWeight: 100, + feature_association: featureAssociation as any, + }, + ], + }); + + expect(() => { + study_validation.validateStudy(study, studyFilePath); + }).toThrowError( + `Invalid feature name for experiment experiment: ${featureName}`, + ); + } + }, + ); + + test('should not throw if forcing flag is correct', () => { + const study = Study.fromJson({ + name: 'study', + experiment: [ + { + name: 'experiment1', + probability_weight: 100, + forcing_flag: 'hello', + }, + ], + }); + + study_validation.validateStudy(study, studyFilePath); + }); + + test.each(['Hello', ''])( + 'should throw an error if forcing flag is incorrect %s', + (forcingFlag) => { + const study = Study.fromJson({ + name: 'study', + experiment: [ + { + name: 'experiment1', + probability_weight: 100, + forcing_flag: forcingFlag, + }, + ], + }); + + expect(() => { + study_validation.validateStudy(study, studyFilePath); + }).toThrowError('Invalid forcing flag for experiment experiment1'); + }, + ); + + test.each([ + [true, true, false], + [true, false, true], + [false, true, true], + [true, true, true], + ])( + 'should throw on mixed forcing options', + (forcingFeatureOn, forcingFeatureOff, forcingFlag) => { + const studyJson = { + name: 'study', + experiment: [ + { + name: 'experiment1', + probability_weight: 100, + feature_association: {}, + }, + ], + }; + if (forcingFeatureOn) { + ( + studyJson.experiment[0].feature_association as any + ).forcing_feature_on = 'feature1'; + } + if (forcingFeatureOff) { + ( + studyJson.experiment[0].feature_association as any + ).forcing_feature_off = 'feature1'; + } + if (forcingFlag) { + (studyJson.experiment[0] as any).forcing_flag = 'feature1'; + } + + const study = Study.fromJson(studyJson); + + expect(() => { + study_validation.validateStudy(study, studyFilePath); + }).toThrowError( + 'Forcing feature_on, feature_off and flag are mutually exclusive', + ); + }, + ); + + test.each([ + [true, false, false], + [false, true, false], + [false, false, true], + ])( + 'should not throw on correct forcing options use', + (forcingFeatureOn, forcingFeatureOff, forcingFlag) => { + const studyJson = { + name: 'study', + experiment: [ + { + name: 'experiment1', + probability_weight: 100, + feature_association: {}, + }, + ], + }; + if (forcingFeatureOn) { + ( + studyJson.experiment[0].feature_association as any + ).forcing_feature_on = 'feature1'; + } + if (forcingFeatureOff) { + ( + studyJson.experiment[0].feature_association as any + ).forcing_feature_off = 'feature1'; + } + if (forcingFlag) { + (studyJson.experiment[0] as any).forcing_flag = 'feature1'; + } + + const study = Study.fromJson(studyJson); + + study_validation.validateStudy(study, studyFilePath); + }, + ); + + test('should throw an error if google_web_experiment/trigger_id conflict', () => { + const study = Study.fromJson({ + name: 'study', + experiment: [ + { + name: 'experiment1', + probability_weight: 100, + google_web_experiment_id: 1, + google_web_trigger_experiment_id: 2, + }, + ], + }); + + expect(() => { + study_validation.validateStudy(study, studyFilePath); + }).toThrowError( + 'Experiment experiment1 has both google_web_experiment_id and web_trigger_experiment_id', + ); + }); + + test('should throw an error if param name is empty', () => { + const study = Study.fromJson({ + name: 'study', + experiment: [ + { + name: 'experiment1', + probability_weight: 100, + param: [ + { + name: '', + value: '1', + }, + ], + }, + ], + }); + + expect(() => { + study_validation.validateStudy(study, studyFilePath); + }).toThrowError('Empty param name in experiment experiment1'); + }); + + test('should throw an error if params conflict', () => { + const study = Study.fromJson({ + name: 'study', + experiment: [ + { + name: 'experiment1', + probability_weight: 100, + param: [ + { + name: 'test', + value: '1', + }, + { + name: 'test', + value: '2', + }, + ], + }, + ], + }); + + expect(() => { + study_validation.validateStudy(study, studyFilePath); + }).toThrowError('Duplicate param name: test in experiment experiment1'); + }); + + test('should throw an error if default_experiment_name not found', () => { + const study = Study.fromJson({ + name: 'study', + experiment: [ + { + name: 'experiment1', + probability_weight: 100, + }, + ], + default_experiment_name: 'DefaultExp', + }); + + expect(() => { + study_validation.validateStudy(study, studyFilePath); + }).toThrowError('Missing default experiment: DefaultExp in study study'); + }); + test('should throw an error if total probability is not 100', () => { const study = Study.fromJson({ name: 'study', diff --git a/src/seed_tools/utils/study_validation.ts b/src/seed_tools/utils/study_validation.ts index 117ed6cc..d95d17e4 100644 --- a/src/seed_tools/utils/study_validation.ts +++ b/src/seed_tools/utils/study_validation.ts @@ -4,9 +4,12 @@ // You can obtain one at https://mozilla.org/MPL/2.0/. import path from 'path'; -import { type Study } from '../../proto/generated/study'; +import { type Study, type Study_Experiment } from '../../proto/generated/study'; import * as study_filter_utils from './study_filter_utils'; +const invalidFeatureOrFieldTrialNameChars = ',<*'; +const invalidExperimentNameChars = '<*'; + // Validate a study for common errors. Throws an error if any is found. export function validateStudy(study: Study, studyFilePath: string) { const errors = validateStudyReturnErrors(study, studyFilePath); @@ -23,6 +26,7 @@ export function validateStudyReturnErrors( const errors: string[] = []; const validators = [ checkName, + checkLayers, checkExperiments, checkFilterExcludeFields, checkDateRange, @@ -53,22 +57,39 @@ function checkName(study: Study, studyFilePath: string): string[] { !study.name.startsWith(`${fileBaseName}_`) ) { errors.push( - `Study name ${study.name} does not match file name: ${fileBaseName}`, + `Study name ${study.name} does not match file name (expected ${fileBaseName} or ${fileBaseName}_)`, + ); + } + if ( + !isStringASCIIWithoutChars(study.name, invalidFeatureOrFieldTrialNameChars) + ) { + errors.push( + `Invalid study name: ${study.name} (expected ASCII without "${invalidFeatureOrFieldTrialNameChars}" chars)`, ); } return errors; } +function checkLayers(study: Study): string[] { + const errors: string[] = []; + if (study.layer !== undefined) { + errors.push('Layers are currently not supported'); + } + return errors; +} + // Check that experiment names are unique and total probability is 100. function checkExperiments(study: Study): string[] { const errors: string[] = []; const experimentNames = new Set(); let totalProbability = 0; for (const experiment of study.experiment) { + let hasForcingFeatureOn = false; + let hasForcingFeatureOff = false; + let hasForcingFlag = false; + // Validate experiment name. - if (experiment.name === '') { - errors.push(`Experiment name is not defined for study ${study.name}`); - } + checkExperimentName(study, experiment.name, errors); if (experimentNames.has(experiment.name)) { errors.push(`Duplicate experiment name: ${experiment.name}`); } @@ -80,11 +101,94 @@ function checkExperiments(study: Study): string[] { } experimentNames.add(experiment.name); totalProbability += experiment.probability_weight ?? 0; + + // Validate features. + const featureAssociations = experiment.feature_association; + if (featureAssociations !== undefined) { + const featureNamesToCheck = [ + ...featureAssociations.enable_feature, + ...featureAssociations.disable_feature, + ]; + if (featureAssociations.forcing_feature_on !== undefined) { + featureNamesToCheck.push(featureAssociations.forcing_feature_on); + hasForcingFeatureOn = true; + } + if (featureAssociations.forcing_feature_off !== undefined) { + featureNamesToCheck.push(featureAssociations.forcing_feature_off); + hasForcingFeatureOff = true; + } + for (const featureName of featureNamesToCheck) { + checkFeatureName(experiment, featureName, errors); + } + } + + // Validate forcing flag. + if (experiment.forcing_flag !== undefined) { + if ( + experiment.forcing_flag === '' || + !isStringASCIIWithoutChars(experiment.forcing_flag, '') || + experiment.forcing_flag !== experiment.forcing_flag.toLowerCase() + ) { + errors.push( + `Invalid forcing flag for experiment ${experiment.name}: ${experiment.forcing_flag} (expected lowercase ASCII)`, + ); + } + hasForcingFlag = true; + } + + // Check either all forcing options are not set or only one of them is set. + if ( + [hasForcingFeatureOn, hasForcingFeatureOff, hasForcingFlag].filter( + Boolean, + ).length > 1 + ) { + errors.push( + `Forcing feature_on, feature_off and flag are mutually exclusive, cannot mix them in experiment: ${experiment.name}`, + ); + } + + // Validate google_web_experiment_id and google_web_trigger_experiment_id. + if ( + experiment.google_web_experiment_id !== undefined && + experiment.google_web_trigger_experiment_id !== undefined + ) { + errors.push( + `Experiment ${experiment.name} has both google_web_experiment_id and web_trigger_experiment_id`, + ); + } + + // Valiate params. + const paramNames = new Set(); + for (const param of experiment.param) { + if (param.name === undefined || param.name === '') { + errors.push(`Empty param name in experiment ${experiment.name}`); + continue; + } + if (paramNames.has(param.name)) { + errors.push( + `Duplicate param name: ${param.name} in experiment ${experiment.name}`, + ); + } + paramNames.add(param.name); + } + } + + // Validate default_experiment_name. + if ( + study.default_experiment_name !== undefined && + study.default_experiment_name !== '' && + !experimentNames.has(study.default_experiment_name) + ) { + errors.push( + `Missing default experiment: ${study.default_experiment_name} in ${study.name} study`, + ); } + // Validate total probability. if (totalProbability !== 100) { errors.push(`Total probability is not 100 for study ${study.name}`); } + return errors; } @@ -161,3 +265,53 @@ function checkOsVersionRange(study: Study): string[] { } return errors; } + +function checkExperimentName( + study: Study, + experimentName: string, + errors: string[], +) { + if (experimentName === '') { + errors.push(`Experiment name is not defined for study: ${study.name}`); + } + if (!isStringASCIIWithoutChars(experimentName, invalidExperimentNameChars)) { + errors.push( + `Invalid experiment name: ${experimentName} (expected ASCII without "${invalidExperimentNameChars}" chars)`, + ); + } +} + +function checkFeatureName( + experiment: Study_Experiment, + featureName: string, + errors: string[], +) { + if (featureName === '') { + errors.push( + `Feature name is not defined for experiment: ${experiment.name}`, + ); + } + if ( + !isStringASCIIWithoutChars(featureName, invalidFeatureOrFieldTrialNameChars) + ) { + errors.push( + `Invalid feature name for experiment ${experiment.name}: ${featureName} (expected ASCII without "${invalidFeatureOrFieldTrialNameChars}" chars)`, + ); + } +} + +function isStringASCIIWithoutChars( + str: string, + charsToExclude: string, +): boolean { + // Check if every character in the string is within the ASCII range (0-127). + for (let i = 0; i < str.length; i++) { + if (str.charCodeAt(i) > 127) { + return false; + } + if (charsToExclude !== '' && charsToExclude.includes(str[i])) { + return false; + } + } + return true; +} diff --git a/src/test/data/invalid_seed.bin b/src/test/data/invalid_seed.bin new file mode 100644 index 0000000000000000000000000000000000000000..988b4dd582c29ef5ba98590208d3eaeaef187c3f GIT binary patch literal 124 zcmd;LG!!b~l5xz*$uD