Skip to content

Commit

Permalink
Merge branch 'trs/export-v2/validation-mode'
Browse files Browse the repository at this point in the history
  • Loading branch information
tsibley committed Jan 27, 2023
2 parents 5400626 + 816d285 commit 94671e7
Show file tree
Hide file tree
Showing 8 changed files with 94 additions and 34 deletions.
5 changes: 5 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,17 @@

## __NEXT__

### Features

* export v2: Add `--validation-mode={error,warn,skip}` option for more nuanced control of validation. The new "warn" mode performs validation and emits messages about potential problems, but it does not cause the export command to fail even if there are problems. [#1135][] (@tsibley)

### Bug Fixes

* filter, frequencies, refine, parse: Properly handle invalid date errors and output the bad date. [#1140][] (@victorlin)
* export, validate: Validation errors are now much more human-readable and actually pinpoint the problems. [#1134][] (@tsibley)

[#1134]: https://github.com/nextstrain/augur/pull/1134
[#1135]: https://github.com/nextstrain/augur/pull/1135
[#1140]: https://github.com/nextstrain/augur/pull/1140

## 20.0.0 (20 January 2023)
Expand Down
53 changes: 46 additions & 7 deletions augur/export_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

from .argparse_ import ExtendAction
from .io.metadata import read_metadata
from .types import ValidationMode
from .utils import read_node_data, write_json, read_config, read_lat_longs, read_colors
from .validate import export_v2 as validate_v2, auspice_config_v2 as validate_auspice_config_v2, ValidateError

Expand Down Expand Up @@ -522,7 +523,11 @@ def set_filters(data_json, config):
if coloring["type"] != "continuous" and coloring["key"] != 'gt'}
data_json['meta']['filters'] = list(potentials)

def validate_data_json(filename):
def validate_data_json(filename, validation_mode=ValidationMode.ERROR):
if validation_mode is ValidationMode.SKIP:
print(f"Skipping validation of produced JSON due to --validation-mode={validation_mode.value} or --skip-validation.")
return

print("Validating produced JSON")
try:
validate_v2(main_json=filename)
Expand All @@ -531,7 +536,18 @@ def validate_data_json(filename):
print("\n------------------------")
print("Validation of {} failed. Please check this in a local instance of `auspice`, as it is not expected to display correctly. ".format(filename))
print("------------------------")
validation_failure(validation_mode)

def validation_failure(mode: ValidationMode):
if mode is ValidationMode.ERROR:
sys.exit(2)
elif mode is ValidationMode.WARN:
print(f"Continuing due to --validation-mode={mode.value} even though there were validation errors.")
elif mode is ValidationMode.SKIP:
# Shouldn't be doing validation under skip, but if we're called anyway just do nothing.
return
else:
raise ValueError(f"unknown validation mode: {mode!r}")


def set_panels(data_json, config, cmd_line_panels):
Expand Down Expand Up @@ -857,7 +873,31 @@ def register_parser(parent_subparsers):
)
optional_settings.add_argument('--minify-json', action="store_true", help="export JSONs without indentation or line returns")
optional_settings.add_argument('--include-root-sequence', action="store_true", help="Export an additional JSON containing the root sequence (reference sequence for vcf) used to identify mutations. The filename will follow the pattern of <OUTPUT>_root-sequence.json for a main auspice JSON of <OUTPUT>.json")
optional_settings.add_argument('--skip-validation', action="store_true", help="skip validation of input/output files. Use at your own risk!")
optional_settings.add_argument(
'--validation-mode',
dest="validation_mode",
type=ValidationMode,
choices=[mode for mode in ValidationMode],
default=ValidationMode.ERROR,
help="""
Control if optional validation checks are performed and what
happens if they fail.
'error' and 'warn' modes perform validation and emit messages about
failed validation checks. 'error' mode causes a non-zero exit
status if any validation checks failed, while 'warn' does not.
'skip' mode performs no validation.
Note that some validation checks are non-optional and as such are
not affected by this setting.
""")
optional_settings.add_argument(
'--skip-validation',
dest="validation_mode",
action="store_const",
const=ValidationMode.SKIP,
help="Skip validation of input/output files, equivalent to --validation-mode=skip. Use at your own risk!")

return parser

Expand Down Expand Up @@ -969,13 +1009,13 @@ def get_config(args):
if not args.auspice_config:
return {}
config = read_config(args.auspice_config)
if not args.skip_validation:
if args.validation_mode is not ValidationMode.SKIP:
try:
print("Validating config file {} against the JSON schema".format(args.auspice_config))
validate_auspice_config_v2(args.auspice_config)
except ValidateError:
print("Validation of {} failed. Please check the formatting of this file & refer to the augur documentation for further help. ".format(args.auspice_config))
sys.exit(2)
validation_failure(args.validation_mode)
# Print a warning about the inclusion of "vaccine_choices" which are _unused_ by `export v2`
# (They are in the schema as this allows v1-compat configs to be used)
if config.get("vaccine_choices"):
Expand All @@ -989,7 +1029,7 @@ def run(args):

#load input files
try:
node_data_file = read_node_data(args.node_data, skip_validation=args.skip_validation) # node_data_files is an array of multiple files (or a single file)
node_data_file = read_node_data(args.node_data, validation_mode=args.validation_mode) # node_data_files is an array of multiple files (or a single file)
except FileNotFoundError:
print(f"ERROR: node data file ({args.node_data}) does not exist")
sys.exit(2)
Expand Down Expand Up @@ -1066,8 +1106,7 @@ def run(args):
fatal("Root sequence output was requested, but the node data provided is missing a 'reference' key.")

# validate outputs
if not args.skip_validation:
validate_data_json(args.output)
validate_data_json(args.output, args.validation_mode)

if deprecationWarningsEmitted:
print("\n------------------------")
Expand Down
21 changes: 21 additions & 0 deletions augur/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,24 @@ class DataErrorMethod(enum.Enum):
ERROR_ALL = 'error_all'
WARN = 'warn'
SILENT = 'silent'


@enum.unique
class ValidationMode(enum.Enum):
"""
Enum representation of string values that represent how validation should
be handled.
"""
ERROR = 'error'
WARN = 'warn'
SKIP = 'skip'

def __str__(self) -> str:
"""
Stringify to the enum member's :py:attr:`.value` instead of the default.
This let us use the enum's constructor and members with argparse's
``type`` and ``choices`` parameters, respectively, without exposing the
enum class name to users.
"""
return self.value
16 changes: 12 additions & 4 deletions augur/util_support/node_data_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,18 @@
from augur.__version__ import __version__
from augur.__version__ import is_augur_version_compatible
from augur.errors import AugurError
from augur.io.print import print_err
from augur.types import ValidationMode
from augur.validate import validate_json, ValidateError, load_json_schema


FILTERED_ATTRS = ["generated_by"]


class NodeDataFile:
def __init__(self, fname, skip_validation=False):
def __init__(self, fname, validation_mode=ValidationMode.ERROR):
self.fname = fname
self.skip_validation = skip_validation
self.validation_mode = validation_mode

with open(fname, encoding="utf-8") as jfile:
self.attrs = json.load(jfile)
Expand Down Expand Up @@ -71,10 +73,16 @@ def validate(self):
f"`nodes` value in {self.fname} is not a dictionary. Please check the formatting of this JSON!"
)

if not self.skip_validation and self.is_generated_by_incompatible_augur:
raise AugurError(
if self.validation_mode is not ValidationMode.SKIP and self.is_generated_by_incompatible_augur:
msg = (
f"Augur version incompatibility detected: the JSON {self.fname} was generated by "
f"{self.generated_by}, which is incompatible with the current augur version "
f"({__version__}). We suggest you rerun the pipeline using the current version of "
"augur."
)
if self.validation_mode is ValidationMode.ERROR:
raise AugurError(msg)
elif self.validation_mode is ValidationMode.WARN:
print_err(f"WARNING: {msg}")
else:
raise ValueError(f"unknown validation mode: {self.validation_mode!r}")
9 changes: 5 additions & 4 deletions augur/util_support/node_data_reader.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import Bio.Phylo
import sys

from augur.types import ValidationMode
from augur.util_support.node_data import DuplicatedNonDictAttributeError
from augur.util_support.node_data import NodeData
from augur.util_support.node_data_file import NodeDataFile
Expand All @@ -15,15 +16,15 @@ class NodeDataReader:
If a tree file is specified, it is used to verify the node names.
If skip_validation is set to true, Augur version of node data files is not checked.
If validation_mode is set to :py:attr:`ValidationMode.SKIP`, Augur version of node data files is not checked.
"""

def __init__(self, filenames, tree_file=None, skip_validation=False):
def __init__(self, filenames, tree_file=None, validation_mode=ValidationMode.ERROR):
if not isinstance(filenames, list):
filenames = [filenames]
self.filenames = filenames
self.tree_file = tree_file
self.skip_validation = skip_validation
self.validation_mode = validation_mode

def read(self):
node_data = self.build_node_data()
Expand Down Expand Up @@ -51,7 +52,7 @@ def build_node_data(self):

@property
def node_data_files(self):
return (NodeDataFile(fname, skip_validation = self.skip_validation) for fname in self.filenames)
return (NodeDataFile(fname, validation_mode = self.validation_mode) for fname in self.filenames)

def check_against_tree_file(self, node_data):
if not self.tree_file:
Expand Down
6 changes: 4 additions & 2 deletions augur/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@

from augur.io.file import open_file

from augur.types import ValidationMode

from augur.util_support.color_parser import ColorParser
from augur.util_support.node_data_reader import NodeDataReader

Expand Down Expand Up @@ -87,8 +89,8 @@ def read_tree(fname, min_terminals=3):
return T


def read_node_data(fnames, tree=None, skip_validation=False):
return NodeDataReader(fnames, tree, skip_validation).read()
def read_node_data(fnames, tree=None, validation_mode=ValidationMode.ERROR):
return NodeDataReader(fnames, tree, validation_mode).read()


def write_json(data, file_name, indent=(None if os.environ.get("AUGUR_MINIFY_JSON") else 2), include_version=True):
Expand Down
17 changes: 0 additions & 17 deletions augur/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,21 +17,6 @@
from augur.io.json import shorten_as_json
from .validate_export import verifyMainJSONIsInternallyConsistent, verifyMetaAndOrTreeJSONsAreInternallyConsistent

class ValidationWarnings:
def __init__(self):
self.seen = defaultdict(set)
def add(self, warningType, message):
self.seen[warningType].add(message)
def show(self):
print("WARNINGS")
print(self.seen)

class ValidationErrors(ValidationWarnings):
def show(self):
print("ERRORS")
print(self.seen)
sys.exit(2)

def fatal(message):
print("FATAL ERROR: {}".format(message))
sys.exit(2)
Expand Down Expand Up @@ -178,8 +163,6 @@ def auspice_config_v2(config_json, **kwargs):
validate(config, schema, config_json)

def export_v2(main_json, **kwargs):
# validationWarnings = ValidationWarnings()
# validationErrors = ValidationErrors()
main_schema = load_json_schema("schema-export-v2.json")

if main_json.endswith("frequencies.json") or main_json.endswith("entropy.json") or main_json.endswith("sequences.json"):
Expand Down
1 change: 1 addition & 0 deletions tests/functional/export_v2.t
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ Skipping validation allows mismatched augur versions to be used without error.
> --skip-validation
WARNING: You didn't provide information on who is maintaining this analysis.
\s{0} (re)
Skipping validation of produced JSON due to --validation-mode=skip or --skip-validation.
\s{0} (re)

Check the output from the above command against its expected contents
Expand Down

0 comments on commit 94671e7

Please sign in to comment.