Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rework ontologyannotation localID field #170

Merged
merged 2 commits into from
Aug 11, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 7 additions & 18 deletions src/ISA/ISA.Json/Ontology.fs
Original file line number Diff line number Diff line change
Expand Up @@ -109,27 +109,16 @@ module OntologyAnnotation =
|> GEncode.choose
|> Encode.object

let localIDDecoder : Decoder<string> =
fun s json ->
match Decode.string s json with
| Ok (Regex.ActivePatterns.TermAnnotation tan) ->
Ok (tan.TermSourceREF)
| _ -> Ok ""
//| Ok s -> Error (DecoderError(s,ErrorReason.FailMessage "Could not parse local ID from string"))
//| Error e -> Error e


let decoder (options : ConverterOptions) : Decoder<OntologyAnnotation> =
Decode.object (fun get ->
{
ID = get.Optional.Field "@id" GDecode.uri
Name = get.Optional.Field "annotationValue" (AnnotationValue.decoder options)
TermSourceREF = get.Optional.Field "termSource" Decode.string
//LocalID = try get.Optional.Field "termAccession" localIDDecoder with | _ -> None
LocalID = get.Optional.Field "termAccession" localIDDecoder |> Option.bind (fun s -> if s = "" then None else Some s)
TermAccessionNumber = get.Optional.Field "termAccession" Decode.string
Comments = get.Optional.Field "comments" (Decode.array (Comment.decoder options))
}
OntologyAnnotation.create(
?Id = get.Optional.Field "@id" GDecode.uri,
?Name = get.Optional.Field "annotationValue" (AnnotationValue.decoder options),
?TermSourceREF = get.Optional.Field "termSource" Decode.string,
?TermAccessionNumber = get.Optional.Field "termAccession" Decode.string,
?Comments = get.Optional.Field "comments" (Decode.array (Comment.decoder options))
)
)

let fromString (s:string) =
Expand Down
10 changes: 5 additions & 5 deletions src/ISA/ISA.Spreadsheet/CompositeHeader.fs
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@ module ActivePattern =

open ARCtrl.ISA.Regex.ActivePatterns

let mergeTerms tsr1 tan1 tsr2 tan2 =
if tsr1 <> tsr2 then failwithf "TermSourceRef %s and %s do not match" tsr1 tsr2
if tan1 <> tan2 then failwithf "TermAccessionNumber %s and %s do not match" tan1 tan2
{|TermSourceRef = tsr1; TermAccessionNumber = tan1|}
let mergeIDInfo idSpace1 localID1 idSpace2 localID2 =
if idSpace1 <> idSpace2 then failwithf "TermSourceRef %s and %s do not match" idSpace1 idSpace2
if localID1 <> localID2 then failwithf "LocalID %s and %s do not match" localID1 localID2
{|TermSourceRef = idSpace1; TermAccessionNumber = $"{idSpace1}:{localID1}"|}

let (|Term|_|) (categoryParser : string -> string option) (f : OntologyAnnotation -> CompositeHeader) (cells : FsCell list) =
let (|AC|_|) s =
Expand All @@ -25,7 +25,7 @@ module ActivePattern =
//| [AC name; TermAccessionNumber term1; TermSourceREF term2]
//| [AC name; Unit; TermAccessionNumber term1; TermSourceREF term2]
| [AC name; UnitColumnHeader; TSRColumnHeader term1; TANColumnHeader term2] ->
let term = mergeTerms term1.TermSourceREF term1.TermAccessionNumber term2.TermSourceREF term2.TermAccessionNumber
let term = mergeIDInfo term1.IDSpace term1.LocalID term2.IDSpace term2.LocalID
let ont = OntologyAnnotation.fromString(name, term.TermSourceRef, term.TermAccessionNumber)
f ont
|> Some
Expand Down
4 changes: 2 additions & 2 deletions src/ISA/ISA/ArcTypes/CompositeCell.fs
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,10 @@ type CompositeCell =
// TODO: i would really love to have an overload here accepting string input
static member createTerm (oa:OntologyAnnotation) = Term oa
static member createTermFromString (?name: string, ?tsr: string, ?tan: string) =
Term <| OntologyAnnotation.fromString(?term = name, ?tsr = tsr, ?tan = tan)
Term <| OntologyAnnotation.fromString(?termName = name, ?tsr = tsr, ?tan = tan)
static member createUnitized (value: string, ?oa:OntologyAnnotation) = Unitized (value, Option.defaultValue (OntologyAnnotation.empty) oa)
static member createUnitizedFromString (value: string, ?name: string, ?tsr: string, ?tan: string) =
Unitized <| (value, OntologyAnnotation.fromString(?term = name, ?tsr = tsr, ?tan = tan))
Unitized <| (value, OntologyAnnotation.fromString(?termName = name, ?tsr = tsr, ?tan = tan))
static member createFreeText (value: string) = FreeText value

static member emptyTerm = Term OntologyAnnotation.empty
Expand Down
4 changes: 2 additions & 2 deletions src/ISA/ISA/ArcTypes/CompositeHeader.fs
Original file line number Diff line number Diff line change
Expand Up @@ -140,10 +140,10 @@ type CompositeHeader =
// Input/Output have similiar naming as Term, but are more specific.
// So they have to be called first.
| Regex.ActivePatterns.Regex Regex.Pattern.InputPattern r ->
let iotype = r.Groups.["iotype"].Value
let iotype = r.Groups.[Regex.Pattern.MatchGroups.iotype].Value
Input <| IOType.ofString (iotype)
| Regex.ActivePatterns.Regex Regex.Pattern.OutputPattern r ->
let iotype = r.Groups.["iotype"].Value
let iotype = r.Groups.[Regex.Pattern.MatchGroups.iotype].Value
Output <| IOType.ofString (iotype)
// Is term column
| Regex.ActivePatterns.TermColumn r ->
Expand Down
2 changes: 1 addition & 1 deletion src/ISA/ISA/JsonTypes/Component.fs
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ type Component =

/// Create a ISAJson Component from ISATab string entries
static member fromString (?name: string, ?term:string, ?source:string, ?accession:string, ?comments : Comment []) =
let cType = OntologyAnnotation.fromString (?term = term, ?tsr=source, ?tan=accession, ?comments = comments) |> Option.fromValueWithDefault OntologyAnnotation.empty
let cType = OntologyAnnotation.fromString (?termName = term, ?tsr=source, ?tan=accession, ?comments = comments) |> Option.fromValueWithDefault OntologyAnnotation.empty
match name with
| Some n ->
let v,u = Component.decomposeName n
Expand Down
41 changes: 18 additions & 23 deletions src/ISA/ISA/JsonTypes/OntologyAnnotation.fs
Original file line number Diff line number Diff line change
Expand Up @@ -11,28 +11,31 @@ type OntologyAnnotation =
ID : URI option
Name : AnnotationValue option
TermSourceREF : string option
LocalID : string option
TermAccessionNumber : URI option
Comments : Comment [] option
}

static member make id name termSourceREF localID termAccessionNumber comments=
static member make id name termSourceREF termAccessionNumber comments=
{
ID = id
Name = name
TermSourceREF = termSourceREF
LocalID = localID
TermAccessionNumber = termAccessionNumber
Comments = comments
}

/// This function creates the type exactly as given. If you want a more streamlined approach use `OntologyAnnotation.fromString`.
static member create(?Id,?Name,?TermSourceREF,?LocalID,?TermAccessionNumber,?Comments) : OntologyAnnotation =
OntologyAnnotation.make Id Name TermSourceREF LocalID TermAccessionNumber Comments
static member create(?Id,?Name,?TermSourceREF,?TermAccessionNumber,?Comments) : OntologyAnnotation =
OntologyAnnotation.make Id Name TermSourceREF TermAccessionNumber Comments

static member empty =
OntologyAnnotation.create()


member this.IDInfo =
this.TermAccessionNumber
|> Option.bind Regex.tryParseTermAnnotation

/// Returns the name of the ontology as string
// TODO: Why is this called Text, while everything else is called string?
member this.NameText =
Expand Down Expand Up @@ -76,20 +79,12 @@ type OntologyAnnotation =
///</summary>
///<param name="tsr">Term source reference</param>
///<param name="tan">Term accession number</param>
static member fromString (?term:string, ?tsr:string, ?tan:string, ?comments : Comment []) =

let tsr,localID =
match tan with
| Some (Regex.ActivePatterns.TermAnnotation tan) ->
(if tsr.IsSome then tsr else Some tan.TermSourceREF),
Some tan.LocalTAN
| _ -> tsr,None
static member fromString (?termName:string, ?tsr:string, ?tan:string, ?comments : Comment []) =

OntologyAnnotation.make
None
(term |> Option.map AnnotationValue.fromString)
(termName |> Option.map AnnotationValue.fromString)
tsr
localID
tan
comments

Expand All @@ -99,20 +94,20 @@ type OntologyAnnotation =
|> Regex.tryParseTermAnnotation
|> Option.get
|> fun r ->
let accession = r.TermSourceREF + ":" + r.LocalTAN
OntologyAnnotation.fromString ("", r.TermSourceREF, accession)
let accession = r.IDSpace + ":" + r.LocalID
OntologyAnnotation.fromString ("", r.IDSpace, accession)

/// Parses any value in `TermAccessionString` to term accession format "termsourceref:localtan". Exmp.: "MS:000001".
///
/// If `TermAccessionString` cannot be parsed to this format, returns empty string!
member this.TermAccessionShort =
match this.TermSourceREF, this.LocalID with
| Some tsr, Some id -> $"{tsr}:{id}"
match this.IDInfo with
| Some id -> $"{id.IDSpace}:{id.LocalID}"
| _ -> ""

member this.TermAccessionOntobeeUrl =
match this.TermSourceREF, this.LocalID with
| Some tsr, Some id -> OntologyAnnotation.createUriAnnotation tsr id
match this.IDInfo with
| Some id -> OntologyAnnotation.createUriAnnotation id.IDSpace id.LocalID
| _ -> ""

member this.TermAccessionAndOntobeeUrlIfShort =
Expand Down Expand Up @@ -234,4 +229,4 @@ type OntologyAnnotation =

member this.Copy() =
let nextComments = this.Comments |> Option.map (Array.map (fun c -> c.Copy()))
OntologyAnnotation.make this.ID this.Name this.TermSourceREF this.LocalID this.TermAccessionNumber nextComments
OntologyAnnotation.make this.ID this.Name this.TermSourceREF this.TermAccessionNumber nextComments
71 changes: 39 additions & 32 deletions src/ISA/ISA/Regex.fs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,20 @@ open System

module Pattern =

module MatchGroups =

[<Literal>]
let numberFormat = "numberFormat"

[<Literal>]
let localID = "localid"

[<Literal>]
let idspace = "idspace"

[<Literal>]
let iotype = "iotype"

/// This pattern is only used to remove any leftover #id attributes from previous Swate version.
/// `"Parameter [biological replicate#2]"` This #id is deprecated but the pattern can still be used to remove any from files.
/// Was deprecated before 2023.
Expand All @@ -21,8 +35,7 @@ module Pattern =
let SquaredBracketsTermNamePattern = "\[.*\]" // @"(?<= \[)[^#\]]*(?=[\]#])" <- Cannot be used in IE11

/// Used to get unit name from Excel numberFormat: 0.00 "degree Celsius" --> degree Celsius
[<LiteralAttribute>]
let ExcelNumberFormat = "\"(?<numberFormat>(.*?))\""
let ExcelNumberFormat = $"\"(?<{MatchGroups.numberFormat}>(.*?))\""

/// Hits Unit column header
[<LiteralAttribute>]
Expand Down Expand Up @@ -55,29 +68,23 @@ module Pattern =
let TermAccessionNumberColumnPattern = @"Term Accession Number\s\((?<id>.*)\)"

/// Hits term accession, without id: ENVO:01001831
[<LiteralAttribute>]
let TermAnnotationShortPattern = @"(?<termsourceref>\w+?):(?<localtan>\w+)" //prev: @"[\w]+?:[\d]+"
let TermAnnotationShortPattern = $@"(?<{MatchGroups.idspace}>\w+?):(?<{MatchGroups.localID}>\w+)" //prev: @"[\w]+?:[\d]+"

// https://obofoundry.org/id-policy.html#mapping-of-owl-ids-to-obo-format-ids
/// <summary>Regex pattern is designed to hit only Foundry-compliant URIs.</summary>
[<LiteralAttribute>]
let TermAnnotationURIPattern = @"http://purl.obolibrary.org/obo/(?<termsourceref>\w+?)_(?<localtan>\w+)"
let TermAnnotationURIPattern = $@"http://purl.obolibrary.org/obo/(?<{MatchGroups.idspace}>\w+?)_(?<{MatchGroups.localID}>\w+)"

/// Watch this closely, this could hit some edge cases we do not want to cover.
[<LiteralAttribute>]
let TermAnnotationURIPattern_lessRestrictive = @".*\/(?<termsourceref>\w+?)[:_](?<localtan>\w+)"
let TermAnnotationURIPattern_lessRestrictive = $@".*\/(?<{MatchGroups.idspace}>\w+?)[:_](?<{MatchGroups.localID}>\w+)"

/// This pattern is used to match both Input and Output columns and capture the IOType as `iotype` group.
[<LiteralAttribute>]
let IOTypePattern = @"(Input|Output)\s\[(?<iotype>.+)\]"
let IOTypePattern = $@"(Input|Output)\s\[(?<{MatchGroups.iotype}>.+)\]"

/// This pattern is used to match Input column and capture the IOType as `iotype` group.
[<LiteralAttribute>]
let InputPattern = @"Input\s\[(?<iotype>.+)\]"
let InputPattern = $@"Input\s\[(?<{MatchGroups.iotype}>.+)\]"

/// This pattern is used to match Output column and capture the IOType as `iotype` group.
[<LiteralAttribute>]
let OutputPattern = @"Output\s\[(?<iotype>.+)\]"
let OutputPattern = $@"Output\s\[(?<{MatchGroups.iotype}>.+)\]"

/// This pattern matches any column header starting with some text, followed by one whitespace and a term name inside squared brackets.
///
Expand Down Expand Up @@ -176,9 +183,9 @@ module ActivePatterns =
let (|TermAnnotationShort|_|) input =
match input with
| Regex Pattern.TermAnnotationShortPattern value ->
let termsourceref = value.Groups.["termsourceref"].Value
let localtan = value.Groups.["localtan"].Value
{|TermSourceREF = termsourceref; LocalTAN = localtan|}
let idspace = value.Groups.[Pattern.MatchGroups.idspace].Value
let localID = value.Groups.[Pattern.MatchGroups.localID].Value
{|IDSpace = idspace; LocalID = localID|}
|> Some
| _ ->
None
Expand All @@ -193,9 +200,9 @@ module ActivePatterns =
| Regex Pattern.TermAnnotationShortPattern value
| Regex Pattern.TermAnnotationURIPattern value
| Regex Pattern.TermAnnotationURIPattern_lessRestrictive value ->
let termsourceref = value.Groups.["termsourceref"].Value
let localtan = value.Groups.["localtan"].Value
{|TermSourceREF = termsourceref; LocalTAN = localtan; TermAccessionNumber = input|}
let idspace = value.Groups.[Pattern.MatchGroups.idspace].Value
let localID = value.Groups.[Pattern.MatchGroups.localID].Value
{|IDSpace = idspace; LocalID = localID|}
|> Some
| _ ->
None
Expand All @@ -208,7 +215,7 @@ module ActivePatterns =
| Regex Pattern.TermSourceREFColumnPattern r ->
match r.Groups.["id"].Value with
| TermAnnotation r -> Some r
| _ -> Some {|LocalTAN = ""; TermAccessionNumber = ""; TermSourceREF = ""|}
| _ -> Some {|IDSpace = ""; LocalID = ""|}
| _ -> None

/// Matches a "Term Accession Number (ShortTerm)" column header and returns the ShortTerm as Term Source Ref and Annotation Number.
Expand All @@ -219,21 +226,21 @@ module ActivePatterns =
| Regex Pattern.TermAccessionNumberColumnPattern r ->
match r.Groups.["id"].Value with
| TermAnnotation r -> Some r
| _ -> Some {|LocalTAN = ""; TermAccessionNumber = ""; TermSourceREF = ""|}
| _ -> Some {|IDSpace = ""; LocalID = ""|}
| _ -> None

/// Matches a "Input [InputType]" column header and returns the InputType as string.
let (|InputColumnHeader|_|) input =
match input with
| Regex Pattern.InputPattern r ->
Some r.Groups.["iotype"].Value
Some r.Groups.[Pattern.MatchGroups.iotype].Value
| _ -> None

/// Matches a "Output [OutputType]" column header and returns the OutputType as string.
let (|OutputColumnHeader|_|) input =
match input with
| Regex Pattern.OutputPattern r ->
Some r.Groups.["iotype"].Value
Some r.Groups.[Pattern.MatchGroups.iotype].Value
| _ -> None


Expand Down Expand Up @@ -261,9 +268,9 @@ let tryParseReferenceColumnHeader (str : string) =
let tryParseTermAnnotationShort (str:string) =
match str.Trim() with
| Regex TermAnnotationShortPattern value ->
let termsourceref = value.Groups.["termsourceref"].Value
let localtan = value.Groups.["localtan"].Value
{|TermSourceREF = termsourceref; LocalTAN = localtan|}
let idspace = value.Groups.[Pattern.MatchGroups.idspace].Value
let localid = value.Groups.[Pattern.MatchGroups.localID].Value
{|IDSpace = idspace; LocalID = localid|}
|> Some
| _ -> None

Expand All @@ -279,17 +286,17 @@ let tryParseTermAnnotation (str:string) =
| Regex TermAnnotationShortPattern value
| Regex TermAnnotationURIPattern value
| Regex TermAnnotationURIPattern_lessRestrictive value ->
let termsourceref = value.Groups.["termsourceref"].Value
let localtan = value.Groups.["localtan"].Value
{|TermSourceREF = termsourceref; LocalTAN = localtan|}
let idspace = value.Groups.[Pattern.MatchGroups.idspace].Value
let localid = value.Groups.[Pattern.MatchGroups.localID].Value
{|IDSpace = idspace; LocalID = localid|}
|> Some
| _ ->
None

/// Tries to parse 'str' to term accession and returns it in the format `Some "termsourceref:localtan"`. Exmp.: `Some "MS:000001"`
let tryGetTermAnnotationShortString (str:string) =
tryParseTermAnnotation str
|> Option.map (fun r -> r.TermSourceREF + ":" + r.LocalTAN)
|> Option.map (fun r -> r.IDSpace + ":" + r.LocalID)

/// Parses 'str' to term accession and returns it in the format "termsourceref:localtan". Exmp.: "MS:000001"
let getTermAnnotationShortString (str:string) =
Expand Down Expand Up @@ -320,7 +327,7 @@ let tryParseIOTypeHeader (headerStr: string) =
match headerStr.Trim() with
| Regex IOTypePattern value ->
// remove quotes at beginning and end of matched string
let numberFormat = value.Groups.["iotype"].Value
let numberFormat = value.Groups.[Pattern.MatchGroups.iotype].Value
Some numberFormat
| _ ->
None
Expand Down
Loading