Skip to content

Commit

Permalink
Merge pull request #382 from apriltuesday/issue-379
Browse files Browse the repository at this point in the history
Issue 379 - Add check for obsolete terms to manual curation process
  • Loading branch information
apriltuesday authored Jun 2, 2023
2 parents bde4fa4 + c1d1d44 commit fd9dae8
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 18 deletions.
45 changes: 33 additions & 12 deletions bin/trait_mapping/create_table_for_manual_curation.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,28 @@
import pandas as pd

from cmat.trait_mapping.ols import (
get_ontology_label_from_ols, is_current_and_in_efo, is_in_efo,
get_ontology_label_from_ols, is_current_and_in_efo, is_in_efo, get_replacement_term,
)


def find_previous_mapping(trait_name, previous_mappings):
def find_previous_mapping_and_replacement(trait_name, previous_mappings):
if trait_name not in previous_mappings:
return ''
return '', ''
uri = previous_mappings[trait_name]
label = get_ontology_label_from_ols(uri)
uri_is_current_and_in_efo = is_current_and_in_efo(uri)
uri_in_efo = is_in_efo(uri)
if uri_in_efo:
trait_status = 'EFO_CURRENT' if uri_is_current_and_in_efo else 'EFO_OBSOLETE'
else:
trait_status = 'NOT_CONTAINED'
label = get_ontology_label(uri)
trait_status = get_trait_status(uri)
trait_string = '|'.join([uri, label, 'NOT_SPECIFIED', 'previously-used', trait_status])
replacement_string = find_replacement_mapping(uri)
return trait_string, replacement_string


def find_replacement_mapping(previous_uri):
replacement_uri = get_replacement_term(previous_uri)
if not replacement_uri:
return ''
label = get_ontology_label(replacement_uri)
trait_status = get_trait_status(replacement_uri)
trait_string = '|'.join([replacement_uri, label, 'NOT_SPECIFIED', 'replacement', trait_status])
return trait_string


Expand All @@ -31,6 +37,21 @@ def find_exact_mapping(trait_name, mappings):
return ''


def get_ontology_label(uri):
label = get_ontology_label_from_ols(uri)
return label if label is not None else ''


def get_trait_status(uri):
uri_is_current_and_in_efo = is_current_and_in_efo(uri)
uri_in_efo = is_in_efo(uri)
if uri_in_efo:
trait_status = 'EFO_CURRENT' if uri_is_current_and_in_efo else 'EFO_OBSOLETE'
else:
trait_status = 'NOT_CONTAINED'
return trait_status


if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(
Expand Down Expand Up @@ -66,9 +87,9 @@ def find_exact_mapping(trait_name, mappings):
notes = f'"{notes}\n{previous_comments[trait_name]}"'
# Use maximum of 50 mappings to improve Google Sheets performance
mappings = fields[3:53]
previous_mapping = find_previous_mapping(trait_name, previous_mappings)
previous_mapping, replacement_mapping = find_previous_mapping_and_replacement(trait_name, previous_mappings)
exact_mapping = find_exact_mapping(trait_name, mappings)
rows.append([trait_name, trait_freq, notes, previous_mapping, exact_mapping] + mappings)
rows.append([trait_name, trait_freq, notes, previous_mapping, exact_mapping, replacement_mapping] + mappings)

rows.sort(key=lambda x: (x[2], int(x[1])), reverse=True)
with open(args.output, 'w') as outfile:
Expand Down
28 changes: 25 additions & 3 deletions cmat/trait_mapping/ols.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
import requests
import urllib

from cmat.trait_mapping.utils import json_request
from retry import retry

from cmat.trait_mapping.utils import json_request, ServerError

OLS_EFO_SERVER = 'https://www.ebi.ac.uk/ols'
# The setting for local OLS installation should be uncommented if necessary. Note that the link
Expand Down Expand Up @@ -59,6 +60,7 @@ def double_encode_uri(uri: str) -> str:
return urllib.parse.quote(urllib.parse.quote(uri, safe=""), safe="")


@retry(exceptions=(ConnectionError, ServerError), logger=logger, tries=8, delay=2, backoff=1.2, jitter=(1, 3))
def ols_efo_query(uri: str) -> requests.Response:
"""
Query EFO using OLS for a given ontology uri, returning the response from the request.
Expand All @@ -67,8 +69,11 @@ def ols_efo_query(uri: str) -> requests.Response:
:return: Response from OLS
"""
double_encoded_uri = double_encode_uri(uri)
return requests.get(
response = requests.get(
"{}/api/ontologies/efo/terms/{}".format(OLS_EFO_SERVER, double_encoded_uri))
if 500 <= response.status_code < 600:
raise ServerError
return response


@lru_cache(maxsize=16384)
Expand All @@ -92,7 +97,24 @@ def is_in_efo(uri: str) -> bool:
Checks whether given ontology uri is a valid term in EFO.
:param uri: Ontology uri to use in querying EFO using OLS
:return: Boolean value, true if ontology uri is valid and non-obsolete term in EFO
:return: Boolean value, true if ontology uri is valid term in EFO
"""
response = ols_efo_query(uri)
return response.status_code == 200


@lru_cache(maxsize=16384)
def get_replacement_term(uri: str) -> str:
"""
Finds replacement term in EFO (if present) for the given ontology uri.
:param uri: Ontology uri to use in querying EFO using OLS
:return: Replacement term URI or empty string if not obsolete
"""
response = ols_efo_query(uri)
if response.status_code != 200:
return ""
response_json = response.json()
if response_json["term_replaced_by"] is not None:
return response_json["term_replaced_by"]
return ""
6 changes: 6 additions & 0 deletions cmat/trait_mapping/utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
import logging
import requests
from requests import HTTPError
from retry import retry

logger = logging.getLogger(__package__)


class ServerError(HTTPError):
"""A server-side error occurred."""


@retry(exceptions=(ConnectionError, requests.RequestException), logger=logger,
tries=8, delay=2, backoff=1.2, jitter=(1, 3))
def json_request(url: str, payload: dict = None, method=requests.get) -> dict:
Expand Down
6 changes: 3 additions & 3 deletions docs/manual-curation/step2-manual-curation.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
The goals of the manual curation:
* All traits which are linked to NT expansion (nucleotide repeat expansion) variants must be curated. Those are marked as "NT expansion" in the “Notes” column.
* All traits with occurrence ≥ **10** must be curated. Additionally, if there are less than **200** such terms, then the top 200 terms must be curated.
* _Suggested previous mapping_ traits should be checked for any terms that have become obsolete since the last iteration. This can be done by filtering then searching for the string EFO\_OBSOLETE
* _Suggested previous mapping_ traits should be checked for any terms that have become obsolete since the last iteration. These will be colored red and likely have a _suggested replacement mapping_ provided in the appropriate column. If no replacement is provided, curate as usual.
* For the rest of the traits, we curate as many as possible.

Good mappings must be eyeballed to ensure they are actually good. Alternative mappings for medium or low quality mappings can be searched for using OLS. If a mapping can't be found in EFO, look for a mapping to a HP, ORDO, or MONDO trait name. Most HP/ORDO/MONDO terms will also be in EFO but some are not. These can be imported to EFO using the Webulous submission service.
Expand All @@ -27,8 +27,8 @@ Curation should be done by subsequently applying filters to appropriate columns,

* 1\. **There is a previously assigned mapping for this trait.** All of these are the decisions that we made in the past, so we trust them (to an extent). Copy and paste previously used mappings into “Mapping to use”. Then review them according to the following steps.
* 1.1. **The previously assigned mapping is in EFO**
* 1.1.1. **The previously assigned mapping is in EFO and is exact.** Mark as finished immediately. (It's extremely unlikely that a better mapping could be found).
* 1.1.2. **The previously assigned mapping is in EFO and IS NOT exact.** Review the mappings to see if a better (more accurate/specific) mapping is available. Then mark as finished.
* 1.1.1. **The previously assigned mapping is in EFO and is exact and current.** Mark as finished immediately. (It's extremely unlikely that a better mapping could be found).
* 1.1.2. **The previously assigned mapping is in EFO and IS NOT exact or current.** Review the mappings to see if a better (more accurate/specific, non-obsolete) mapping is available. Then mark as finished.
* 1.2. **The previously assigned mapping is not contained in EFO.** We need to either find a mapping which is already in EFO, or import these terms into EFO.
* 1.2.1. **The previously used mapping IS NOT contained in EFO and is exact.** These are good candidates to mark as finished and them import in EFO afterwards. However, quickly check whether there are non-exact matches which are already in EFO are are as good as exact mappings.
* E. g. if the exact mapping is “erythrocytosis 6, familial” and not in EFO, but there is an inexact mapping “familial erythrocytosis 6” which *is* in EFO, we should use the inexact mapping.
Expand Down

0 comments on commit fd9dae8

Please sign in to comment.