From b014d60f3ae3220770d145323eb3db19bc7dbf07 Mon Sep 17 00:00:00 2001 From: April Shen Date: Wed, 4 Sep 2024 15:47:37 +0100 Subject: [PATCH 1/3] filtering - WIP --- cmat/clinvar_xml_io/clinvar_dataset.py | 7 ++++++- .../clinvar_to_evidence_strings.py | 1 + cmat/trait_mapping/trait_names_parsing.py | 19 +++++++++++++++++-- 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/cmat/clinvar_xml_io/clinvar_dataset.py b/cmat/clinvar_xml_io/clinvar_dataset.py index a896cbdd..f82722a0 100644 --- a/cmat/clinvar_xml_io/clinvar_dataset.py +++ b/cmat/clinvar_xml_io/clinvar_dataset.py @@ -4,7 +4,8 @@ from datetime import date from cmat.clinvar_xml_io.clinvar_reference_record import ClinVarReferenceRecord -from cmat.clinvar_xml_io.xml_parsing import iterate_rcv_from_xml, parse_header_attributes +from cmat.clinvar_xml_io.clinvar_set import ClinVarSet +from cmat.clinvar_xml_io.xml_parsing import iterate_rcv_from_xml, parse_header_attributes, iterate_cvs_from_xml logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) @@ -22,6 +23,10 @@ def __iter__(self): for rcv in iterate_rcv_from_xml(self.clinvar_xml): yield ClinVarReferenceRecord(rcv, self.xsd_version) + def iter_cvs(self): + for cvs in iterate_cvs_from_xml(self.clinvar_xml): + yield ClinVarSet(cvs, self.xsd_version) + def get_xsd_version(self): # For format, see https://github.com/ncbi/clinvar/blob/master/FTPSiteXsdChanges.md if 'xsi:noNamespaceSchemaLocation' in self.header_attr: diff --git a/cmat/output_generation/clinvar_to_evidence_strings.py b/cmat/output_generation/clinvar_to_evidence_strings.py index 653341ab..85572a44 100644 --- a/cmat/output_generation/clinvar_to_evidence_strings.py +++ b/cmat/output_generation/clinvar_to_evidence_strings.py @@ -64,6 +64,7 @@ def clinvar_to_evidence_strings(string_to_efo_mappings, variant_to_gene_mappings logger.info('Processing ClinVar records') i = -1 + # TODO filter here for clinvar_record in ClinVarDataset(clinvar_xml): # If start & end provided, only process records in the range [start, end) i += 1 diff --git a/cmat/trait_mapping/trait_names_parsing.py b/cmat/trait_mapping/trait_names_parsing.py index 55f4bd97..39497e21 100644 --- a/cmat/trait_mapping/trait_names_parsing.py +++ b/cmat/trait_mapping/trait_names_parsing.py @@ -1,6 +1,8 @@ from collections import Counter +from typing import Iterable -from cmat import clinvar_xml_io +from cmat.clinvar_xml_io import ClinVarDataset +from cmat.clinvar_xml_io.clinvar_set import ClinVarSet from cmat.trait_mapping.trait import Trait @@ -27,7 +29,12 @@ def parse_trait_names(filepath: str) -> list: # Their curation is of highest importance regardless of how many records they are actually associated with. nt_expansion_traits = set() - for clinvar_record in clinvar_xml_io.ClinVarDataset(filepath): + dataset = ClinVarDataset(filepath) + for clinvar_set in dataset.iter_cvs(): + # TODO where to put this logic (both the method & the exclusion list)? + if should_exclude_record(clinvar_set, ['SUB14299258']): + continue + clinvar_record = clinvar_set.rcv trait_names_and_ids = set((trait.preferred_or_other_valid_name.lower(), trait.identifier) for trait in clinvar_record.traits_with_valid_names) for trait_tuple in trait_names_and_ids: @@ -46,3 +53,11 @@ def parse_trait_names(filepath: str) -> list: associated_with_nt_expansion=associated_with_nt_expansion)) return traits + + +def should_exclude_record(clinvar_set: ClinVarSet, names_to_exclude: Iterable) -> bool: + """Return True if every submitted record in the set has submission_name in the exclusion list.""" + for submitted_record in clinvar_set.scvs: + if submitted_record.submission_name not in names_to_exclude: + return False + return True From 4fb0d280362923188a4b8875ec35065f79c6dea1 Mon Sep 17 00:00:00 2001 From: April Shen Date: Thu, 5 Sep 2024 12:00:16 +0100 Subject: [PATCH 2/3] add filtering to trait name parsing and evidence string generation --- bin/trait_mapping/parse_traits.py | 3 +-- cmat/clinvar_xml_io/filtering.py | 12 +++++++++ .../clinvar_to_evidence_strings.py | 25 ++++++++++++------- cmat/output_generation/report.py | 5 +++- cmat/trait_mapping/trait_names_parsing.py | 14 ++--------- 5 files changed, 35 insertions(+), 24 deletions(-) create mode 100644 cmat/clinvar_xml_io/filtering.py diff --git a/bin/trait_mapping/parse_traits.py b/bin/trait_mapping/parse_traits.py index e49a7ca5..d1ab119f 100644 --- a/bin/trait_mapping/parse_traits.py +++ b/bin/trait_mapping/parse_traits.py @@ -5,8 +5,7 @@ if __name__ == '__main__': parser = argparse.ArgumentParser(description="Parse traits from ClinVar XML") - parser.add_argument("-i", dest="input_filepath", required=True, - help="ClinVar XML dump file. One record per line.") + parser.add_argument("-i", dest="input_filepath", required=True, help="ClinVar XML dump file.") parser.add_argument("-o", dest="output_traits_filepath", required=True, help="path to output file for all traits for downstream processing") parser.add_argument("-u", dest="output_for_platform", required=False, diff --git a/cmat/clinvar_xml_io/filtering.py b/cmat/clinvar_xml_io/filtering.py new file mode 100644 index 00000000..d16714dc --- /dev/null +++ b/cmat/clinvar_xml_io/filtering.py @@ -0,0 +1,12 @@ +# Filtering functions that can be used in multiple pipelines. + +# Identified as problematic submissions, e.g. too many unmappable trait names. +submission_names_to_exclude = ['SUB14299258'] + + +def filter_by_submission_name(clinvar_set): + """Return False (i.e. filter out) if every submitted record in the set has submission_name in the exclusion list.""" + for submitted_record in clinvar_set.scvs: + if submitted_record.submission_name not in submission_names_to_exclude: + return True + return False diff --git a/cmat/output_generation/clinvar_to_evidence_strings.py b/cmat/output_generation/clinvar_to_evidence_strings.py index 85572a44..36d2c991 100644 --- a/cmat/output_generation/clinvar_to_evidence_strings.py +++ b/cmat/output_generation/clinvar_to_evidence_strings.py @@ -10,6 +10,7 @@ from cmat.clinvar_xml_io import ClinVarDataset from cmat.clinvar_xml_io.clinical_classification import MultipleClinicalClassificationsError +from cmat.clinvar_xml_io.filtering import filter_by_submission_name from cmat.output_generation import consequence_type as CT from cmat.output_generation.report import Report @@ -64,8 +65,8 @@ def clinvar_to_evidence_strings(string_to_efo_mappings, variant_to_gene_mappings logger.info('Processing ClinVar records') i = -1 - # TODO filter here - for clinvar_record in ClinVarDataset(clinvar_xml): + dataset = ClinVarDataset(clinvar_xml) + for clinvar_set in dataset.iter_cvs(): # If start & end provided, only process records in the range [start, end) i += 1 if start and i < start: @@ -79,7 +80,13 @@ def clinvar_to_evidence_strings(string_to_efo_mappings, variant_to_gene_mappings # Catch any exceptions for this record so we can continue processing. try: - # Failure mode 0 (skip). Contains multiple clinical classification annotations. + # Failure mode 1 (fatal). Record is only supported by submissions deemed to be unusable. + if not filter_by_submission_name(clinvar_set): + report.clinvar_fatal_excluded_submission += 1 + continue + clinvar_record = clinvar_set.rcv + + # Failure mode 2 (skip). Contains multiple clinical classification annotations. # This is new as of V2 of the ClinVar XSD and should definitely be supported at some point, # but as it can cause parsing complications we catch these cases first. # See GH issue for context: https://github.com/EBIvariation/CMAT/issues/396 @@ -88,18 +95,18 @@ def clinvar_to_evidence_strings(string_to_efo_mappings, variant_to_gene_mappings report.clinvar_skip_multiple_clinical_classifications += 1 continue - # Failure mode 1 (fatal). A ClinVar record contains no valid traits (traits which have at least one valid, + # Failure mode 3 (fatal). A ClinVar record contains no valid traits (traits which have at least one valid, # potentially mappable name). if not clinvar_record.traits_with_valid_names: report.clinvar_fatal_no_valid_traits += 1 continue - # Failure mode 2 (fatal). A ClinVar record contains no valid clinical significance terms, likely due to + # Failure mode 4 (fatal). A ClinVar record contains no valid clinical significance terms, likely due to # submissions being flagged. if not clinvar_record.valid_clinical_significances: report.clinvar_fatal_no_clinical_significance += 1 continue - # Failure mode 3 (skip). A ClinVar record contains an unsupported variation type. + # Failure mode 5 (skip). A ClinVar record contains an unsupported variation type. if clinvar_record.measure is None: report.clinvar_skip_unsupported_variation += 1 continue @@ -111,7 +118,7 @@ def clinvar_to_evidence_strings(string_to_efo_mappings, variant_to_gene_mappings grouped_diseases = group_diseases_by_efo_mapping(clinvar_record.traits_with_valid_names, string_to_efo_mappings) - # Failure mode 4 (skip). No functional consequences are available. + # Failure mode 6 (skip). No functional consequences are available. if not consequence_types: report.clinvar_skip_no_functional_consequences += 1 continue @@ -122,7 +129,7 @@ def clinvar_to_evidence_strings(string_to_efo_mappings, variant_to_gene_mappings if is_structural_variant(clinvar_record.measure): report.structural_variants += len(consequence_types) - # Failure mode 5 (skip). A ClinVar record has at least one trait with at least one valid name, but no + # Failure mode 7 (skip). A ClinVar record has at least one trait with at least one valid name, but no # suitable EFO mappings were found in the database. This will still generate an evidence string, but is # tracked as a failure so we can continue to measure mapping coverage. if not contains_mapping(grouped_diseases): @@ -176,7 +183,7 @@ def clinvar_to_evidence_strings(string_to_efo_mappings, variant_to_gene_mappings except Exception as e: # We catch exceptions but record when one is thrown, so that the pipeline will crash after processing all # records and printing the report. - logger.error(f'Problem generating evidence for {clinvar_record.accession}') + logger.error(f'Problem generating evidence for {clinvar_set.rcv.accession}') logger.error(f'Error: {e}') exception_raised = True continue diff --git a/cmat/output_generation/report.py b/cmat/output_generation/report.py index 714a502e..c9b45005 100644 --- a/cmat/output_generation/report.py +++ b/cmat/output_generation/report.py @@ -27,6 +27,7 @@ def __init__(self, trait_mappings=None, consequence_mappings=None): self.clinvar_total = 0 self.clinvar_fatal_no_valid_traits = 0 self.clinvar_fatal_no_clinical_significance = 0 + self.clinvar_fatal_excluded_submission = 0 self.clinvar_skip_unsupported_variation = 0 self.clinvar_skip_no_functional_consequences = 0 self.clinvar_skip_missing_efo_mapping = 0 @@ -88,7 +89,8 @@ def load_from_file(self, filename): def compute_record_tallies(self): """Compute tallies of records fatal/skipped/done based on the more granular counts.""" - self.clinvar_fatal = self.clinvar_fatal_no_valid_traits + self.clinvar_fatal_no_clinical_significance + self.clinvar_fatal = (self.clinvar_fatal_no_valid_traits + self.clinvar_fatal_no_clinical_significance + + self.clinvar_fatal_excluded_submission) self.clinvar_skipped = (self.clinvar_skip_unsupported_variation + self.clinvar_skip_no_functional_consequences + self.clinvar_skip_missing_efo_mapping + self.clinvar_skip_invalid_evidence_string + self.clinvar_skip_multiple_clinical_classifications) @@ -115,6 +117,7 @@ def print_report(self): Fatal: Cannot produce evidence\t{self.clinvar_fatal} No traits with valid names\t{self.clinvar_fatal_no_valid_traits} No clinical significance\t{self.clinvar_fatal_no_clinical_significance} + Excluded submissions\t{self.clinvar_fatal_excluded_submission} Skipped: Can be rescued by future improvements\t{self.clinvar_skipped} Unsupported variation type\t{self.clinvar_skip_unsupported_variation} No functional consequences\t{self.clinvar_skip_no_functional_consequences} diff --git a/cmat/trait_mapping/trait_names_parsing.py b/cmat/trait_mapping/trait_names_parsing.py index 39497e21..682dfa56 100644 --- a/cmat/trait_mapping/trait_names_parsing.py +++ b/cmat/trait_mapping/trait_names_parsing.py @@ -1,8 +1,7 @@ from collections import Counter -from typing import Iterable from cmat.clinvar_xml_io import ClinVarDataset -from cmat.clinvar_xml_io.clinvar_set import ClinVarSet +from cmat.clinvar_xml_io.filtering import filter_by_submission_name from cmat.trait_mapping.trait import Trait @@ -31,8 +30,7 @@ def parse_trait_names(filepath: str) -> list: dataset = ClinVarDataset(filepath) for clinvar_set in dataset.iter_cvs(): - # TODO where to put this logic (both the method & the exclusion list)? - if should_exclude_record(clinvar_set, ['SUB14299258']): + if not filter_by_submission_name(clinvar_set): continue clinvar_record = clinvar_set.rcv trait_names_and_ids = set((trait.preferred_or_other_valid_name.lower(), trait.identifier) @@ -53,11 +51,3 @@ def parse_trait_names(filepath: str) -> list: associated_with_nt_expansion=associated_with_nt_expansion)) return traits - - -def should_exclude_record(clinvar_set: ClinVarSet, names_to_exclude: Iterable) -> bool: - """Return True if every submitted record in the set has submission_name in the exclusion list.""" - for submitted_record in clinvar_set.scvs: - if submitted_record.submission_name not in names_to_exclude: - return False - return True From facb45db2a6bdde9b17ae15af705aec6adb6db5d Mon Sep 17 00:00:00 2001 From: April Shen Date: Thu, 5 Sep 2024 12:52:01 +0100 Subject: [PATCH 3/3] fix tests --- cmat/clinvar_xml_io/clinvar_reference_record.py | 1 - cmat/clinvar_xml_io/clinvar_set.py | 4 ++-- cmat/clinvar_xml_io/clinvar_submitted_record.py | 2 +- cmat/output_generation/clinvar_to_evidence_strings.py | 2 +- .../resources/expected/automated_trait_mappings.tsv | 8 ++++---- .../resources/expected/google_sheets_table.tsv | 8 ++++---- .../expected/trait_names_to_ontology_mappings.tsv | 10 +++++----- 7 files changed, 17 insertions(+), 18 deletions(-) diff --git a/cmat/clinvar_xml_io/clinvar_reference_record.py b/cmat/clinvar_xml_io/clinvar_reference_record.py index 90bd4f4e..a45c574f 100644 --- a/cmat/clinvar_xml_io/clinvar_reference_record.py +++ b/cmat/clinvar_xml_io/clinvar_reference_record.py @@ -2,7 +2,6 @@ from functools import cached_property from cmat.clinvar_xml_io.clinical_classification import ClinicalClassification - from cmat.clinvar_xml_io.clinvar_record import ClinVarRecord from cmat.clinvar_xml_io.xml_parsing import find_mandatory_unique_element, find_elements diff --git a/cmat/clinvar_xml_io/clinvar_set.py b/cmat/clinvar_xml_io/clinvar_set.py index 7f490b13..25b98267 100644 --- a/cmat/clinvar_xml_io/clinvar_set.py +++ b/cmat/clinvar_xml_io/clinvar_set.py @@ -1,4 +1,4 @@ -from cmat.clinvar_xml_io import ClinVarRecord +from cmat.clinvar_xml_io.clinvar_reference_record import ClinVarReferenceRecord from cmat.clinvar_xml_io.clinvar_submitted_record import ClinVarSubmittedRecord from cmat.clinvar_xml_io.xml_parsing import find_mandatory_unique_element, find_elements @@ -12,7 +12,7 @@ def __init__(self, cvs_xml, xsd_version): self.cvs_xml = cvs_xml rcv_elem = find_mandatory_unique_element(self.cvs_xml, 'ReferenceClinVarAssertion') - self.rcv = ClinVarRecord(rcv_elem, xsd_version) + self.rcv = ClinVarReferenceRecord(rcv_elem, xsd_version) scv_elems = find_elements(self.cvs_xml, 'ClinVarAssertion', allow_zero=False, allow_multiple=True) self.scvs = [ClinVarSubmittedRecord(elem, xsd_version, self.rcv) for elem in scv_elems] diff --git a/cmat/clinvar_xml_io/clinvar_submitted_record.py b/cmat/clinvar_xml_io/clinvar_submitted_record.py index 7934cea3..ed247e27 100644 --- a/cmat/clinvar_xml_io/clinvar_submitted_record.py +++ b/cmat/clinvar_xml_io/clinvar_submitted_record.py @@ -1,7 +1,7 @@ import logging from functools import cached_property -from cmat.clinvar_xml_io import ClinVarRecord +from cmat.clinvar_xml_io.clinvar_record import ClinVarRecord from cmat.clinvar_xml_io.xml_parsing import find_mandatory_unique_element logger = logging.getLogger(__name__) diff --git a/cmat/output_generation/clinvar_to_evidence_strings.py b/cmat/output_generation/clinvar_to_evidence_strings.py index 36d2c991..5c377d54 100644 --- a/cmat/output_generation/clinvar_to_evidence_strings.py +++ b/cmat/output_generation/clinvar_to_evidence_strings.py @@ -184,7 +184,7 @@ def clinvar_to_evidence_strings(string_to_efo_mappings, variant_to_gene_mappings # We catch exceptions but record when one is thrown, so that the pipeline will crash after processing all # records and printing the report. logger.error(f'Problem generating evidence for {clinvar_set.rcv.accession}') - logger.error(f'Error: {e}') + logger.error(f'Error: {repr(e)}') exception_raised = True continue diff --git a/tests/pipelines/resources/expected/automated_trait_mappings.tsv b/tests/pipelines/resources/expected/automated_trait_mappings.tsv index 4274e9a9..6950f88c 100644 --- a/tests/pipelines/resources/expected/automated_trait_mappings.tsv +++ b/tests/pipelines/resources/expected/automated_trait_mappings.tsv @@ -128,7 +128,7 @@ chédiak-higashi syndrome http://www.orpha.net/ORDO/Orphanet_167 chédiak-higash cobalamin c disease http://purl.obolibrary.org/obo/MONDO_0010184 methylmalonic aciduria and homocystinuria type cblC cobalamin c disease http://www.orpha.net/ORDO/Orphanet_26 Methylmalonic acidemia with homocystinuria cobalamin c disease http://www.orpha.net/ORDO/Orphanet_79282 Methylmalonic acidemia with homocystinuria, type cblC -coffin-siris syndrome 1 http://purl.obolibrary.org/obo/MONDO_0015452 Coffin-Siris syndrome +coffin-siris syndrome 1 http://purl.obolibrary.org/obo/MONDO_0007617 coffin-siris syndrome 1 cog1 congenital disorder of glycosylation http://purl.obolibrary.org/obo/MONDO_0012637 COG1-congenital disorder of glycosylation cog7 congenital disorder of glycosylation http://purl.obolibrary.org/obo/MONDO_0012118 COG7-congenital disorder of glycosylation cohen syndrome http://purl.obolibrary.org/obo/MONDO_0008999 cohen syndrome @@ -278,7 +278,7 @@ hepatoencephalopathy due to combined oxidative phosphorylation defect type 1 htt hereditary breast ovarian cancer syndrome http://purl.obolibrary.org/obo/MONDO_0003582 hereditary breast ovarian cancer syndrome hereditary cancer-predisposing syndrome http://purl.obolibrary.org/obo/MONDO_0015356 hereditary neoplastic syndrome hereditary diffuse gastric adenocarcinoma http://purl.obolibrary.org/obo/MONDO_0007648 hereditary diffuse gastric adenocarcinoma -hereditary diffuse leukoencephalopathy with spheroids http://www.orpha.net/ORDO/Orphanet_313808 Hereditary diffuse leukoencephalopathy with axonal spheroids and pigmented glia +hereditary diffuse leukoencephalopathy with spheroids http://www.orpha.net/ORDO/Orphanet_313808 Adult-onset leukoencephalopathy with axonal spheroids and pigmented glia hereditary hemorrhagic telangiectasia http://purl.obolibrary.org/obo/MONDO_0019180 hereditary hemorrhagic telangiectasia hereditary insensitivity to pain with anhidrosis http://purl.obolibrary.org/obo/MONDO_0009746 hereditary sensory and autonomic neuropathy type 4 hereditary nonpolyposis colorectal neoplasms http://www.ebi.ac.uk/efo/EFO_0009911 hereditary nonpolyposis colorectal carcinoma @@ -338,7 +338,7 @@ inflammatory skin and bowel disease, neonatal, 1 http://purl.obolibrary.org/obo/ intellectual developmental disorder, autosomal dominant 64 http://purl.obolibrary.org/obo/MONDO_0030934 intellectual developmental disorder, autosomal dominant 64 intellectual disability http://purl.obolibrary.org/obo/HP_0001249 intellectual disability intellectual disability, autosomal dominant 1 http://purl.obolibrary.org/obo/MONDO_0016459 2q23.1 microdeletion syndrome -intellectual disability, autosomal dominant 20 http://purl.obolibrary.org/obo/MONDO_0016456 5q14.3 microdeletion syndrome +intellectual disability, autosomal dominant 20 http://purl.obolibrary.org/obo/MONDO_0013266 intellectual disability, autosomal dominant 20 intellectual disability, autosomal dominant 5 http://purl.obolibrary.org/obo/MONDO_0012960 intellectual disability, autosomal dominant 5 intellectual disability, autosomal dominant 6 http://purl.obolibrary.org/obo/MONDO_0100172 intellectual disability, autosomal dominant intellectual disability, autosomal dominant 9 http://purl.obolibrary.org/obo/MONDO_0013656 intellectual disability, autosomal dominant 9 @@ -508,7 +508,7 @@ retinitis pigmentosa-deafness syndrome http://purl.obolibrary.org/obo/MONDO_0019 retinoblastoma http://purl.obolibrary.org/obo/MONDO_0008380 retinoblastoma rett syndrome http://purl.obolibrary.org/obo/MONDO_0010726 rett syndrome rett syndrome, congenital variant http://purl.obolibrary.org/obo/MONDO_0010726 Rett syndrome -rhabdoid tumor predisposition syndrome 2 http://purl.obolibrary.org/obo/MONDO_0016473 familial rhabdoid tumor +rhabdoid tumor predisposition syndrome 2 http://purl.obolibrary.org/obo/MONDO_0013224 rhabdoid tumor predisposition syndrome 2 rod-cone dystrophy http://www.orpha.net/ORDO/Orphanet_1872 Cone rod dystrophy rubinstein-taybi syndrome http://purl.obolibrary.org/obo/MONDO_0019188 rubinstein-taybi syndrome ryr1-related disorders http://www.ebi.ac.uk/efo/EFO_0009143 ryr1-related disorders diff --git a/tests/pipelines/resources/expected/google_sheets_table.tsv b/tests/pipelines/resources/expected/google_sheets_table.tsv index e2b46667..1b204d57 100644 --- a/tests/pipelines/resources/expected/google_sheets_table.tsv +++ b/tests/pipelines/resources/expected/google_sheets_table.tsv @@ -1,9 +1,9 @@ -gc1/gc2 polymorphism 1 NT expansion http://purl.obolibrary.org/obo/MONDO_0010739|Taqi polymorphism|MEDIUM|http://purl.obolibrary.org/obo/mondo.owl|NOT_CONTAINED http://www.orpha.net/ORDO/Orphanet_118803|solute carrier family 25 member 22|MEDIUM|https://www.orphadata.com/data/ontologies/ordo/last_version/ORDO_en_4.4.owl|NOT_CONTAINED http://purl.obolibrary.org/obo/MONDO_0009826|PA polymorphism of alpha-2-globulin|MEDIUM|http://purl.obolibrary.org/obo/mondo.owl|NOT_CONTAINED http://www.orpha.net/ORDO/Orphanet_122340|guanylate cyclase 2D, retinal|MEDIUM|https://www.orphadata.com/data/ontologies/ordo/last_version/ORDO_en_4.4.owl|NOT_CONTAINED +gc1/gc2 polymorphism 1 NT expansion http://purl.obolibrary.org/obo/MONDO_0010739|Taqi polymorphism|MEDIUM|http://purl.obolibrary.org/obo/mondo.owl|NOT_CONTAINED http://purl.obolibrary.org/obo/MONDO_0009826|PA polymorphism of alpha-2-globulin|MEDIUM|http://purl.obolibrary.org/obo/mondo.owl|NOT_CONTAINED http://www.orpha.net/ORDO/Orphanet_118803|solute carrier family 25 member 22|MEDIUM|https://www.orphadata.com/data/ontologies/ordo/last_version/ORDO_en_4.5.owl|NOT_CONTAINED http://www.orpha.net/ORDO/Orphanet_122340|guanylate cyclase 2D, retinal|MEDIUM|https://www.orphadata.com/data/ontologies/ordo/last_version/ORDO_en_4.5.owl|NOT_CONTAINED inherited immunodeficiency diseases 2 http://identifiers.org/medgen/C5197805|Inherited Immunodeficiency Diseases|HIGH|clinvar-xrefs|NOT_CONTAINED http://identifiers.org/medgen/C5197805|Inherited Immunodeficiency Diseases|HIGH|clinvar-xrefs|NOT_CONTAINED http://identifiers.org/mesh/D000081207|Inherited Immunodeficiency Diseases|HIGH|clinvar-xrefs|NOT_CONTAINED congenital muscular dystrophy, alpha-dystroglycan related 1 http://identifiers.org/medgen/CN239202|Congenital Muscular Dystrophy, alpha-dystroglycan related|HIGH|clinvar-xrefs|NOT_CONTAINED http://identifiers.org/medgen/CN239202|Congenital Muscular Dystrophy, alpha-dystroglycan related|HIGH|clinvar-xrefs|NOT_CONTAINED corneal dystrophy, recessive 1 http://identifiers.org/medgen/CN239343|Corneal Dystrophy, Recessive|HIGH|clinvar-xrefs|NOT_CONTAINED http://identifiers.org/medgen/CN239343|Corneal Dystrophy, Recessive|HIGH|clinvar-xrefs|NOT_CONTAINED -elfn1-related condition 1 http://purl.obolibrary.org/obo/MONDO_0045054|cancer-related condition|MEDIUM|http://purl.obolibrary.org/obo/mondo.owl|EFO_CURRENT http://purl.obolibrary.org/obo/MONDO_0021074|precancerous condition|MEDIUM|http://purl.obolibrary.org/obo/mondo.owl|EFO_CURRENT http://purl.obolibrary.org/obo/HP_0025256|Ameliorated by heat|MEDIUM|http://purl.obolibrary.org/obo/hp/hp-international.owl|NOT_CONTAINED http://purl.obolibrary.org/obo/HP_0032522|Ameliorated by immunosuppresion|MEDIUM|http://purl.obolibrary.org/obo/hp/hp-international.owl|NOT_CONTAINED http://www.orpha.net/ORDO/Orphanet_568065|EPHB4-related lymphatic-related hydrops fetalis|MEDIUM|https://www.orphadata.com/data/ontologies/ordo/last_version/ORDO_en_4.4.owl|NOT_CONTAINED http://www.orpha.net/ORDO/Orphanet_471012|RAS related|MEDIUM|https://www.orphadata.com/data/ontologies/ordo/last_version/ORDO_en_4.4.owl|NOT_CONTAINED +elfn1-related condition 1 http://purl.obolibrary.org/obo/MONDO_0045054|cancer-related condition|MEDIUM|http://purl.obolibrary.org/obo/mondo.owl|EFO_CURRENT http://purl.obolibrary.org/obo/MONDO_1040010|IRF6-related condition|MEDIUM|http://purl.obolibrary.org/obo/mondo.owl|NOT_CONTAINED http://purl.obolibrary.org/obo/HP_0025256|Ameliorated by heat|MEDIUM|http://purl.obolibrary.org/obo/hp/hp-international.owl|NOT_CONTAINED http://purl.obolibrary.org/obo/HP_0032522|Ameliorated by immunosuppresion|MEDIUM|http://purl.obolibrary.org/obo/hp/hp-international.owl|NOT_CONTAINED http://www.orpha.net/ORDO/Orphanet_568065|EPHB4-related lymphatic-related hydrops fetalis|MEDIUM|https://www.orphadata.com/data/ontologies/ordo/last_version/ORDO_en_4.5.owl|NOT_CONTAINED http://www.orpha.net/ORDO/Orphanet_471012|RAS related|MEDIUM|https://www.orphadata.com/data/ontologies/ordo/last_version/ORDO_en_4.5.owl|NOT_CONTAINED blepharophimosis, ptosis, and epicanthus inversus, type i 1 http://identifiers.org/medgen/C2931135|BLEPHAROPHIMOSIS, PTOSIS, AND EPICANTHUS INVERSUS, TYPE I|HIGH|clinvar-xrefs|NOT_CONTAINED http://identifiers.org/medgen/C2931135|BLEPHAROPHIMOSIS, PTOSIS, AND EPICANTHUS INVERSUS, TYPE I|HIGH|clinvar-xrefs|NOT_CONTAINED -autosomal dominant kcnq1-related disease 1 http://purl.obolibrary.org/obo/MONDO_0018832|HTRA1-related autosomal dominant cerebral small vessel disease|MEDIUM|http://purl.obolibrary.org/obo/mondo.owl|EFO_CURRENT http://purl.obolibrary.org/obo/MONDO_0000426|autosomal dominant disease|MEDIUM|http://purl.obolibrary.org/obo/mondo.owl|EFO_CURRENT http://purl.obolibrary.org/obo/HP_0000006|Autosomal dominant inheritance|MEDIUM|http://purl.obolibrary.org/obo/hp/hp-international.owl|EFO_CURRENT http://www.orpha.net/ORDO/Orphanet_482077|HTRA1-related autosomal dominant cerebral small vessel disease|MEDIUM|https://www.orphadata.com/data/ontologies/ordo/last_version/ORDO_en_4.4.owl|NOT_CONTAINED http://www.orpha.net/ORDO/Orphanet_497757|MME-related autosomal dominant Charcot Marie Tooth disease type 2|MEDIUM|https://www.orphadata.com/data/ontologies/ordo/last_version/ORDO_en_4.4.owl|NOT_CONTAINED http://purl.obolibrary.org/obo/HP_0003743|Genetic anticipation|MEDIUM|http://purl.obolibrary.org/obo/hp/hp-international.owl|NOT_CONTAINED +autosomal dominant kcnq1-related disease 1 http://purl.obolibrary.org/obo/MONDO_0018832|HTRA1-related autosomal dominant cerebral small vessel disease|MEDIUM|http://purl.obolibrary.org/obo/mondo.owl|EFO_CURRENT http://purl.obolibrary.org/obo/MONDO_0000426|autosomal dominant disease|MEDIUM|http://purl.obolibrary.org/obo/mondo.owl|EFO_CURRENT http://purl.obolibrary.org/obo/HP_0000006|Autosomal dominant inheritance|MEDIUM|http://purl.obolibrary.org/obo/hp/hp-international.owl|EFO_CURRENT http://www.orpha.net/ORDO/Orphanet_482077|HTRA1-related autosomal dominant cerebral small vessel disease|MEDIUM|https://www.orphadata.com/data/ontologies/ordo/last_version/ORDO_en_4.5.owl|NOT_CONTAINED http://www.orpha.net/ORDO/Orphanet_497757|MME-related autosomal dominant Charcot Marie Tooth disease type 2|MEDIUM|https://www.orphadata.com/data/ontologies/ordo/last_version/ORDO_en_4.5.owl|NOT_CONTAINED http://purl.obolibrary.org/obo/HP_0003743|Genetic anticipation|MEDIUM|http://purl.obolibrary.org/obo/hp/hp-international.owl|NOT_CONTAINED isolated nonsyndromic congenital heart disease 1 http://identifiers.org/medgen/CN239319|Isolated Nonsyndromic Congenital Heart Disease|HIGH|clinvar-xrefs|NOT_CONTAINED http://identifiers.org/medgen/CN239319|Isolated Nonsyndromic Congenital Heart Disease|HIGH|clinvar-xrefs|NOT_CONTAINED -simvastatin response - toxicity 1 http://www.ebi.ac.uk/efo/EFO_0011048|dermatological toxicity|MEDIUM|http://www.ebi.ac.uk/efo/efo.owl|EFO_CURRENT http://www.orpha.net/ORDO/Orphanet_529831|Letrozole toxicity|MEDIUM|https://www.orphadata.com/data/ontologies/ordo/last_version/ORDO_en_4.4.owl|NOT_CONTAINED http://purl.obolibrary.org/obo/MONDO_0010784|chloramphenicol toxicity|MEDIUM|http://purl.obolibrary.org/obo/mondo.owl|NOT_CONTAINED http://www.orpha.net/ORDO/Orphanet_565782|Methotrexate toxicity|MEDIUM|https://www.orphadata.com/data/ontologies/ordo/last_version/ORDO_en_4.4.owl|NOT_CONTAINED http://purl.obolibrary.org/obo/MONDO_0023176|formaldehyde poisoning|MEDIUM|http://purl.obolibrary.org/obo/mondo.owl|NOT_CONTAINED +simvastatin response - toxicity 1 http://www.ebi.ac.uk/efo/EFO_0011048|dermatological toxicity|MEDIUM|http://www.ebi.ac.uk/efo/efo.owl|EFO_CURRENT http://www.orpha.net/ORDO/Orphanet_529831|Letrozole toxicity|MEDIUM|https://www.orphadata.com/data/ontologies/ordo/last_version/ORDO_en_4.5.owl|NOT_CONTAINED http://www.orpha.net/ORDO/Orphanet_565782|Methotrexate toxicity|MEDIUM|https://www.orphadata.com/data/ontologies/ordo/last_version/ORDO_en_4.5.owl|NOT_CONTAINED http://purl.obolibrary.org/obo/MONDO_0027653|abacavir toxicity|MEDIUM|http://purl.obolibrary.org/obo/mondo.owl|NOT_CONTAINED http://purl.obolibrary.org/obo/MONDO_0023176|formaldehyde poisoning|MEDIUM|http://purl.obolibrary.org/obo/mondo.owl|NOT_CONTAINED diff --git a/tests/pipelines/resources/expected/trait_names_to_ontology_mappings.tsv b/tests/pipelines/resources/expected/trait_names_to_ontology_mappings.tsv index 7b7339dd..6bde0459 100644 --- a/tests/pipelines/resources/expected/trait_names_to_ontology_mappings.tsv +++ b/tests/pipelines/resources/expected/trait_names_to_ontology_mappings.tsv @@ -1,4 +1,4 @@ -#generated-date=2024-05-24 +#generated-date=2024-09-05 #ontology=EFO #clinvar_trait_name uri label 2-aminoadipic 2-oxoadipic aciduria http://purl.obolibrary.org/obo/MONDO_0008774 2-aminoadipic 2-oxoadipic aciduria @@ -132,7 +132,7 @@ chédiak-higashi syndrome http://www.orpha.net/ORDO/Orphanet_167 chédiak-higash cobalamin c disease http://purl.obolibrary.org/obo/MONDO_0010184 methylmalonic aciduria and homocystinuria type cblC cobalamin c disease http://www.orpha.net/ORDO/Orphanet_26 Methylmalonic acidemia with homocystinuria cobalamin c disease http://www.orpha.net/ORDO/Orphanet_79282 Methylmalonic acidemia with homocystinuria, type cblC -coffin-siris syndrome 1 http://purl.obolibrary.org/obo/MONDO_0015452 Coffin-Siris syndrome +coffin-siris syndrome 1 http://purl.obolibrary.org/obo/MONDO_0007617 coffin-siris syndrome 1 cog1 congenital disorder of glycosylation http://purl.obolibrary.org/obo/MONDO_0012637 COG1-congenital disorder of glycosylation cog7 congenital disorder of glycosylation http://purl.obolibrary.org/obo/MONDO_0012118 COG7-congenital disorder of glycosylation cohen syndrome http://purl.obolibrary.org/obo/MONDO_0008999 cohen syndrome @@ -283,7 +283,7 @@ hepatoencephalopathy due to combined oxidative phosphorylation defect type 1 htt hereditary breast ovarian cancer syndrome http://purl.obolibrary.org/obo/MONDO_0003582 hereditary breast ovarian cancer syndrome hereditary cancer-predisposing syndrome http://purl.obolibrary.org/obo/MONDO_0015356 hereditary neoplastic syndrome hereditary diffuse gastric adenocarcinoma http://purl.obolibrary.org/obo/MONDO_0007648 hereditary diffuse gastric adenocarcinoma -hereditary diffuse leukoencephalopathy with spheroids http://www.orpha.net/ORDO/Orphanet_313808 Hereditary diffuse leukoencephalopathy with axonal spheroids and pigmented glia +hereditary diffuse leukoencephalopathy with spheroids http://www.orpha.net/ORDO/Orphanet_313808 Adult-onset leukoencephalopathy with axonal spheroids and pigmented glia hereditary hemorrhagic telangiectasia http://purl.obolibrary.org/obo/MONDO_0019180 hereditary hemorrhagic telangiectasia hereditary insensitivity to pain with anhidrosis http://purl.obolibrary.org/obo/MONDO_0009746 hereditary sensory and autonomic neuropathy type 4 hereditary nonpolyposis colorectal neoplasms http://www.ebi.ac.uk/efo/EFO_0009911 hereditary nonpolyposis colorectal carcinoma @@ -344,7 +344,7 @@ inflammatory skin and bowel disease, neonatal, 1 http://purl.obolibrary.org/obo/ intellectual developmental disorder, autosomal dominant 64 http://purl.obolibrary.org/obo/MONDO_0030934 intellectual developmental disorder, autosomal dominant 64 intellectual disability http://purl.obolibrary.org/obo/HP_0001249 intellectual disability intellectual disability, autosomal dominant 1 http://purl.obolibrary.org/obo/MONDO_0016459 2q23.1 microdeletion syndrome -intellectual disability, autosomal dominant 20 http://purl.obolibrary.org/obo/MONDO_0016456 5q14.3 microdeletion syndrome +intellectual disability, autosomal dominant 20 http://purl.obolibrary.org/obo/MONDO_0013266 intellectual disability, autosomal dominant 20 intellectual disability, autosomal dominant 5 http://purl.obolibrary.org/obo/MONDO_0012960 intellectual disability, autosomal dominant 5 intellectual disability, autosomal dominant 6 http://purl.obolibrary.org/obo/MONDO_0100172 intellectual disability, autosomal dominant intellectual disability, autosomal dominant 9 http://purl.obolibrary.org/obo/MONDO_0013656 intellectual disability, autosomal dominant 9 @@ -517,7 +517,7 @@ retinitis pigmentosa-deafness syndrome http://purl.obolibrary.org/obo/MONDO_0019 retinoblastoma http://purl.obolibrary.org/obo/MONDO_0008380 retinoblastoma rett syndrome http://purl.obolibrary.org/obo/MONDO_0010726 rett syndrome rett syndrome, congenital variant http://purl.obolibrary.org/obo/MONDO_0010726 Rett syndrome -rhabdoid tumor predisposition syndrome 2 http://purl.obolibrary.org/obo/MONDO_0016473 familial rhabdoid tumor +rhabdoid tumor predisposition syndrome 2 http://purl.obolibrary.org/obo/MONDO_0013224 rhabdoid tumor predisposition syndrome 2 rod-cone dystrophy http://www.orpha.net/ORDO/Orphanet_1872 Cone rod dystrophy rubinstein-taybi syndrome http://purl.obolibrary.org/obo/MONDO_0019188 rubinstein-taybi syndrome ryr1-related disorders http://www.ebi.ac.uk/efo/EFO_0009143 ryr1-related disorders