From 1c3eed8a54ff88b6541ca525ffd91b93acc078a8 Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Tue, 24 Jan 2023 16:12:29 -0800 Subject: [PATCH 1/3] validate: Remove never-used code --- augur/validate.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/augur/validate.py b/augur/validate.py index 8b3e06439..cb9f9526f 100644 --- a/augur/validate.py +++ b/augur/validate.py @@ -17,21 +17,6 @@ from augur.io.json import shorten_as_json from .validate_export import verifyMainJSONIsInternallyConsistent, verifyMetaAndOrTreeJSONsAreInternallyConsistent -class ValidationWarnings: - def __init__(self): - self.seen = defaultdict(set) - def add(self, warningType, message): - self.seen[warningType].add(message) - def show(self): - print("WARNINGS") - print(self.seen) - -class ValidationErrors(ValidationWarnings): - def show(self): - print("ERRORS") - print(self.seen) - sys.exit(2) - def fatal(message): print("FATAL ERROR: {}".format(message)) sys.exit(2) @@ -178,8 +163,6 @@ def auspice_config_v2(config_json, **kwargs): validate(config, schema, config_json) def export_v2(main_json, **kwargs): - # validationWarnings = ValidationWarnings() - # validationErrors = ValidationErrors() main_schema = load_json_schema("schema-export-v2.json") if main_json.endswith("frequencies.json") or main_json.endswith("entropy.json") or main_json.endswith("sequences.json"): From 3003592577e1fe3996aea4eb43ffb567e2ef5833 Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Tue, 24 Jan 2023 16:23:18 -0800 Subject: [PATCH 2/3] export v2: Add --validation-mode={error,warn,skip} option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extends the existing --skip-validation flag to be more nuanced by the addition of a "warn" mode. Our validation is improving, but not always correct. The "warn" mode allows a workflow to emit messages about validation failures, but keep going in the face of them instead of aborting the whole workflow due to a non-zero exit code. We may want to change the default mode to "warn"—at least until our validation failures have few "false positives" or "low-consequence positives" where the output files still load ok in Auspice—but I'll leave this for separate consideration. The --skip-validation flag is now an alias for --validation-mode=skip. Related-to: --- augur/export_v2.py | 53 ++++++++++++++++++++++---- augur/types.py | 21 ++++++++++ augur/util_support/node_data_file.py | 16 ++++++-- augur/util_support/node_data_reader.py | 9 +++-- augur/utils.py | 6 ++- tests/functional/export_v2.t | 1 + 6 files changed, 89 insertions(+), 17 deletions(-) diff --git a/augur/export_v2.py b/augur/export_v2.py index 48009f296..19bb6933c 100644 --- a/augur/export_v2.py +++ b/augur/export_v2.py @@ -12,6 +12,7 @@ from .argparse_ import ExtendAction from .io.metadata import read_metadata +from .types import ValidationMode from .utils import read_node_data, write_json, read_config, read_lat_longs, read_colors from .validate import export_v2 as validate_v2, auspice_config_v2 as validate_auspice_config_v2, ValidateError @@ -522,7 +523,11 @@ def set_filters(data_json, config): if coloring["type"] != "continuous" and coloring["key"] != 'gt'} data_json['meta']['filters'] = list(potentials) -def validate_data_json(filename): +def validate_data_json(filename, validation_mode=ValidationMode.ERROR): + if validation_mode is ValidationMode.SKIP: + print(f"Skipping validation of produced JSON due to --validation-mode={validation_mode.value} or --skip-validation.") + return + print("Validating produced JSON") try: validate_v2(main_json=filename) @@ -531,7 +536,18 @@ def validate_data_json(filename): print("\n------------------------") print("Validation of {} failed. Please check this in a local instance of `auspice`, as it is not expected to display correctly. ".format(filename)) print("------------------------") + validation_failure(validation_mode) + +def validation_failure(mode: ValidationMode): + if mode is ValidationMode.ERROR: sys.exit(2) + elif mode is ValidationMode.WARN: + print(f"Continuing due to --validation-mode={mode.value} even though there were validation errors.") + elif mode is ValidationMode.SKIP: + # Shouldn't be doing validation under skip, but if we're called anyway just do nothing. + return + else: + raise ValueError(f"unknown validation mode: {mode!r}") def set_panels(data_json, config, cmd_line_panels): @@ -857,7 +873,31 @@ def register_parser(parent_subparsers): ) optional_settings.add_argument('--minify-json', action="store_true", help="export JSONs without indentation or line returns") optional_settings.add_argument('--include-root-sequence', action="store_true", help="Export an additional JSON containing the root sequence (reference sequence for vcf) used to identify mutations. The filename will follow the pattern of _root-sequence.json for a main auspice JSON of .json") - optional_settings.add_argument('--skip-validation', action="store_true", help="skip validation of input/output files. Use at your own risk!") + optional_settings.add_argument( + '--validation-mode', + dest="validation_mode", + type=ValidationMode, + choices=[mode for mode in ValidationMode], + default=ValidationMode.ERROR, + help=""" + Control if optional validation checks are performed and what + happens if they fail. + + 'error' and 'warn' modes perform validation and emit messages about + failed validation checks. 'error' mode causes a non-zero exit + status if any validation checks failed, while 'warn' does not. + + 'skip' mode performs no validation. + + Note that some validation checks are non-optional and as such are + not affected by this setting. + """) + optional_settings.add_argument( + '--skip-validation', + dest="validation_mode", + action="store_const", + const=ValidationMode.SKIP, + help="Skip validation of input/output files, equivalent to --validation-mode=skip. Use at your own risk!") return parser @@ -969,13 +1009,13 @@ def get_config(args): if not args.auspice_config: return {} config = read_config(args.auspice_config) - if not args.skip_validation: + if args.validation_mode is not ValidationMode.SKIP: try: print("Validating config file {} against the JSON schema".format(args.auspice_config)) validate_auspice_config_v2(args.auspice_config) except ValidateError: print("Validation of {} failed. Please check the formatting of this file & refer to the augur documentation for further help. ".format(args.auspice_config)) - sys.exit(2) + validation_failure(args.validation_mode) # Print a warning about the inclusion of "vaccine_choices" which are _unused_ by `export v2` # (They are in the schema as this allows v1-compat configs to be used) if config.get("vaccine_choices"): @@ -989,7 +1029,7 @@ def run(args): #load input files try: - node_data_file = read_node_data(args.node_data, skip_validation=args.skip_validation) # node_data_files is an array of multiple files (or a single file) + node_data_file = read_node_data(args.node_data, validation_mode=args.validation_mode) # node_data_files is an array of multiple files (or a single file) except FileNotFoundError: print(f"ERROR: node data file ({args.node_data}) does not exist") sys.exit(2) @@ -1066,8 +1106,7 @@ def run(args): fatal("Root sequence output was requested, but the node data provided is missing a 'reference' key.") # validate outputs - if not args.skip_validation: - validate_data_json(args.output) + validate_data_json(args.output, args.validation_mode) if deprecationWarningsEmitted: print("\n------------------------") diff --git a/augur/types.py b/augur/types.py index a61d24096..be21b33dc 100644 --- a/augur/types.py +++ b/augur/types.py @@ -11,3 +11,24 @@ class DataErrorMethod(enum.Enum): ERROR_ALL = 'error_all' WARN = 'warn' SILENT = 'silent' + + +@enum.unique +class ValidationMode(enum.Enum): + """ + Enum representation of string values that represent how validation should + be handled. + """ + ERROR = 'error' + WARN = 'warn' + SKIP = 'skip' + + def __str__(self) -> str: + """ + Stringify to the enum member's :py:attr:`.value` instead of the default. + + This let us use the enum's constructor and members with argparse's + ``type`` and ``choices`` parameters, respectively, without exposing the + enum class name to users. + """ + return self.value diff --git a/augur/util_support/node_data_file.py b/augur/util_support/node_data_file.py index 472bf7b5a..5d389771e 100644 --- a/augur/util_support/node_data_file.py +++ b/augur/util_support/node_data_file.py @@ -3,6 +3,8 @@ from augur.__version__ import __version__ from augur.__version__ import is_augur_version_compatible from augur.errors import AugurError +from augur.io.print import print_err +from augur.types import ValidationMode from augur.validate import validate_json, ValidateError, load_json_schema @@ -10,9 +12,9 @@ class NodeDataFile: - def __init__(self, fname, skip_validation=False): + def __init__(self, fname, validation_mode=ValidationMode.ERROR): self.fname = fname - self.skip_validation = skip_validation + self.validation_mode = validation_mode with open(fname, encoding="utf-8") as jfile: self.attrs = json.load(jfile) @@ -71,10 +73,16 @@ def validate(self): f"`nodes` value in {self.fname} is not a dictionary. Please check the formatting of this JSON!" ) - if not self.skip_validation and self.is_generated_by_incompatible_augur: - raise AugurError( + if self.validation_mode is not ValidationMode.SKIP and self.is_generated_by_incompatible_augur: + msg = ( f"Augur version incompatibility detected: the JSON {self.fname} was generated by " f"{self.generated_by}, which is incompatible with the current augur version " f"({__version__}). We suggest you rerun the pipeline using the current version of " "augur." ) + if self.validation_mode is ValidationMode.ERROR: + raise AugurError(msg) + elif self.validation_mode is ValidationMode.WARN: + print_err(f"WARNING: {msg}") + else: + raise ValueError(f"unknown validation mode: {self.validation_mode!r}") diff --git a/augur/util_support/node_data_reader.py b/augur/util_support/node_data_reader.py index d6bcefd99..191414318 100644 --- a/augur/util_support/node_data_reader.py +++ b/augur/util_support/node_data_reader.py @@ -1,6 +1,7 @@ import Bio.Phylo import sys +from augur.types import ValidationMode from augur.util_support.node_data import DuplicatedNonDictAttributeError from augur.util_support.node_data import NodeData from augur.util_support.node_data_file import NodeDataFile @@ -15,15 +16,15 @@ class NodeDataReader: If a tree file is specified, it is used to verify the node names. - If skip_validation is set to true, Augur version of node data files is not checked. + If validation_mode is set to :py:attr:`ValidationMode.SKIP`, Augur version of node data files is not checked. """ - def __init__(self, filenames, tree_file=None, skip_validation=False): + def __init__(self, filenames, tree_file=None, validation_mode=ValidationMode.ERROR): if not isinstance(filenames, list): filenames = [filenames] self.filenames = filenames self.tree_file = tree_file - self.skip_validation = skip_validation + self.validation_mode = validation_mode def read(self): node_data = self.build_node_data() @@ -51,7 +52,7 @@ def build_node_data(self): @property def node_data_files(self): - return (NodeDataFile(fname, skip_validation = self.skip_validation) for fname in self.filenames) + return (NodeDataFile(fname, validation_mode = self.validation_mode) for fname in self.filenames) def check_against_tree_file(self, node_data): if not self.tree_file: diff --git a/augur/utils.py b/augur/utils.py index 3f5a169e2..33152d3b6 100644 --- a/augur/utils.py +++ b/augur/utils.py @@ -15,6 +15,8 @@ from augur.io.file import open_file +from augur.types import ValidationMode + from augur.util_support.color_parser import ColorParser from augur.util_support.node_data_reader import NodeDataReader @@ -87,8 +89,8 @@ def read_tree(fname, min_terminals=3): return T -def read_node_data(fnames, tree=None, skip_validation=False): - return NodeDataReader(fnames, tree, skip_validation).read() +def read_node_data(fnames, tree=None, validation_mode=ValidationMode.ERROR): + return NodeDataReader(fnames, tree, validation_mode).read() def write_json(data, file_name, indent=(None if os.environ.get("AUGUR_MINIFY_JSON") else 2), include_version=True): diff --git a/tests/functional/export_v2.t b/tests/functional/export_v2.t index 5efafc641..b9eb06e94 100644 --- a/tests/functional/export_v2.t +++ b/tests/functional/export_v2.t @@ -144,6 +144,7 @@ Skipping validation allows mismatched augur versions to be used without error. > --skip-validation WARNING: You didn't provide information on who is maintaining this analysis. \s{0} (re) + Skipping validation of produced JSON due to --validation-mode=skip or --skip-validation. \s{0} (re) Check the output from the above command against its expected contents From 816d285154adea71251231bae3d1b6d5d1713a87 Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Fri, 27 Jan 2023 15:42:25 -0800 Subject: [PATCH 3/3] Update changelog for #1135 --- CHANGES.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index b4078565e..4e48e1c54 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,12 +2,17 @@ ## __NEXT__ +### Features + +* export v2: Add `--validation-mode={error,warn,skip}` option for more nuanced control of validation. The new "warn" mode performs validation and emits messages about potential problems, but it does not cause the export command to fail even if there are problems. [#1135][] (@tsibley) + ### Bug Fixes * filter, frequencies, refine, parse: Properly handle invalid date errors and output the bad date. [#1140][] (@victorlin) * export, validate: Validation errors are now much more human-readable and actually pinpoint the problems. [#1134][] (@tsibley) [#1134]: https://github.com/nextstrain/augur/pull/1134 +[#1135]: https://github.com/nextstrain/augur/pull/1135 [#1140]: https://github.com/nextstrain/augur/pull/1140 ## 20.0.0 (20 January 2023)