Skip to content

Commit

Permalink
Merge pull request #227 from nfdi4plants/developer_updateIO
Browse files Browse the repository at this point in the history
Add function for updating IO types against each other in ARC
  • Loading branch information
HLWeil authored Oct 6, 2023
2 parents 9f89e84 + a90e89a commit 5fa2559
Show file tree
Hide file tree
Showing 5 changed files with 279 additions and 7 deletions.
49 changes: 43 additions & 6 deletions src/ISA/ISA/ArcTypes/ArcTable.fs
Original file line number Diff line number Diff line change
Expand Up @@ -268,22 +268,59 @@ type ArcTable(name: string, headers: ResizeArray<CompositeHeader>, values: Syste
fun (table:ArcTable) ->
table.GetColumn(index)

member this.TryGetColumnByHeader (header:CompositeHeader) =
let index = this.Headers |> Seq.tryFindIndex (fun x -> x = header)
index
|> Option.map (fun i -> this.GetColumn(i))

static member tryGetColumnByHeader (header:CompositeHeader) =
fun (table:ArcTable) ->
table.TryGetColumnByHeader(header)

member this.GetColumnByHeader (header:CompositeHeader) =
let index = this.Headers |> Seq.findIndex (fun x -> x = header)
this.GetColumn(index)
match this.TryGetColumnByHeader(header) with
| Some c -> c
| None -> failwithf "Unable to find column with header in table %s: %O" this.Name header

static member getColumnByHeader (header:CompositeHeader) =
fun (table:ArcTable) ->
table.GetColumnByHeader(header)

member this.TryGetColumnByHeader (header:CompositeHeader) =
let index = this.Headers |> Seq.tryFindIndex (fun x -> x = header)
member this.TryGetInputColumn() =
let index = this.Headers |> Seq.tryFindIndex (fun x -> x.isInput)
index
|> Option.map (fun i -> this.GetColumn(i))

static member tryGetColumnByHeader (header:CompositeHeader) =
static member tryGetInputColumn () =
fun (table:ArcTable) ->
table.TryGetColumnByHeader(header)
table.TryGetInputColumn()

member this.GetInputColumn() =
match this.TryGetInputColumn() with
| Some c -> c
| None -> failwithf "Unable to find input column in table %s" this.Name

static member getInputColumn () =
fun (table:ArcTable) ->
table.GetInputColumn()

member this.TryGetOutputColumn() =
let index = this.Headers |> Seq.tryFindIndex (fun x -> x.isOutput)
index
|> Option.map (fun i -> this.GetColumn(i))

static member tryGetOutputColumn () =
fun (table:ArcTable) ->
table.TryGetOutputColumn()

member this.GetOutputColumn() =
match this.TryGetOutputColumn() with
| Some c -> c
| None -> failwithf "Unable to find output column in table %s" this.Name

static member getOutputColumn () =
fun (table:ArcTable) ->
table.GetOutputColumn()

// - Row API - //
member this.AddRow (?cells: CompositeCell [], ?index: int) : unit =
Expand Down
60 changes: 60 additions & 0 deletions src/ISA/ISA/ArcTypes/ArcTables.fs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,66 @@ module ArcTablesAux =
| Some index -> index
| None -> failwith $"Unable to find table with name '{name}'!"

/// Collects the IOType of each distinct entity in the tables. Then merges the IOType of each entity according to the IOType.Merge function.
let getIOMap (tables: ResizeArray<ArcTable>) =
let mappings : Dictionary<string,IOType> = Dictionary()
let includeInMap (name : string) (ioType:IOType) =
if name <> "" then
match Aux.Dict.tryFind name mappings with
| Some oldIOType ->
let newIOType = oldIOType.Merge ioType
mappings.[name] <- newIOType
| None ->
mappings.Add(name, ioType)
for table in tables do

match table.TryGetInputColumn() with
| Some ic ->
let ioType = ic.Header.tryInput().Value
ic.Cells
|> Array.iter (fun c ->
includeInMap (c.ToFreeTextCell().AsFreeText) ioType
)
| None -> ()
match table.TryGetOutputColumn() with
| Some oc ->
let ioType = oc.Header.tryOutput().Value
oc.Cells
|> Array.iter (fun c ->
includeInMap (c.ToFreeTextCell().AsFreeText) ioType
)
| None -> ()
mappings

let applyIOMap (map : Dictionary<string,IOType>) (tables : ResizeArray<ArcTable>) =
for table in tables do
match table.TryGetInputColumn() with
| Some ic ->
let index = table.Headers |> Seq.findIndex (fun x -> x.isInput)
let oldIoType = ic.Header.tryInput().Value
let newIOType =
ic.Cells
|> Array.fold (fun (io : IOType) c ->
match Aux.Dict.tryFind (c.ToFreeTextCell().AsFreeText) map with
| Some newIO -> io.Merge newIO
| None -> io
) oldIoType
table.UpdateHeader(index, CompositeHeader.Input(newIOType))
| None -> ()
match table.TryGetOutputColumn() with
| Some oc ->
let index = table.Headers |> Seq.findIndex (fun x -> x.isOutput)
let oldIoType = oc.Header.tryOutput().Value
let newIOType =
oc.Cells
|> Array.fold (fun (io : IOType) c ->
match Aux.Dict.tryFind (c.ToFreeTextCell().AsFreeText) map with
| Some newIO -> io.Merge newIO
| None -> io
) oldIoType
table.UpdateHeader(index, CompositeHeader.Output(newIOType))
| None -> ()

module SanityChecks =

/// Fails, if the index is out of range of the Tables collection. When allowAppend is set to true, it may be out of range by at most 1.
Expand Down
24 changes: 24 additions & 0 deletions src/ISA/ISA/ArcTypes/ArcTypes.fs
Original file line number Diff line number Diff line change
Expand Up @@ -1683,6 +1683,30 @@ type ArcInvestigation(identifier : string, ?title : string, ?description : strin
let copy = inv.Copy()
copy.DeregisterMissingAssays()
copy

/// Updates the IOtypes of the IO columns (Input, Output) across all tables in the investigation if possible.
///
/// If an entity (Row Value of IO Column) with the same name as an entity with a higher IOType specifity is found, the IOType of the entity with the lower IOType specificity is updated.
///
/// E.g. In Table1, there is a column "Output [Sample Name]" with an entity "Sample1". In Table2, there is a column "Input [Source Name]" with the same entity "Sample1". By equality of the entities, the IOType of the Input column in Table2 is inferred to be Sample, resulting in "Input [Sample Name]".
///
/// E.g. RawDataFile is more specific than Source, but less specific than DerivedDataFile.
///
/// E.g. Sample is equally specific to RawDataFile.
member this.UpdateIOTypeByEntityID() =
let ioMap =
[
for study in this.Studies do
yield! study.Tables
for assay in this.Assays do
yield! assay.Tables
]
|> ResizeArray
|> ArcTablesAux.getIOMap
for study in this.Studies do
ArcTablesAux.applyIOMap ioMap study.Tables
for assay in this.Assays do
ArcTablesAux.applyIOMap ioMap assay.Tables

member this.Copy() : ArcInvestigation =
let nextAssays = ResizeArray()
Expand Down
47 changes: 46 additions & 1 deletion src/ISA/ISA/ArcTypes/CompositeHeader.fs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,41 @@ type IOType =
| FreeText s -> stringCreate s
| anyelse -> stringCreate anyelse

/// Given two IOTypes, tries to return the one with a higher specificity. If both are equally specific, fail.
///
/// E.g. RawDataFile is more specific than Source, but less specific than DerivedDataFile.
///
/// E.g. Sample is equally specific to RawDataFile.
member this.Merge(other) =
match this, other with
| FreeText s1, FreeText s2 when s1 = s2 -> FreeText (s1)
| FreeText s1, FreeText s2 -> failwith $"FreeText IO column names {s1} and {s2} do differ"
| FreeText s, _ -> failwith $"FreeText IO column and {other} can not be merged"
| ImageFile, Source -> ImageFile
| ImageFile, RawDataFile -> ImageFile
| ImageFile, DerivedDataFile -> ImageFile
| ImageFile, ImageFile -> ImageFile
| ImageFile, _ -> failwith $"ImageFile IO column and {other} can not be merged"
| DerivedDataFile, Source -> DerivedDataFile
| DerivedDataFile, RawDataFile -> DerivedDataFile
| DerivedDataFile, DerivedDataFile -> DerivedDataFile
| DerivedDataFile, ImageFile -> ImageFile
| DerivedDataFile, _ -> failwith $"DerivedDataFile IO column and {other} can not be merged"
| RawDataFile, Source -> RawDataFile
| RawDataFile, RawDataFile -> RawDataFile
| RawDataFile, DerivedDataFile -> DerivedDataFile
| RawDataFile, ImageFile -> ImageFile
| RawDataFile, _ -> failwith $"RawDataFile IO column and {other} can not be merged"
| Sample, Source -> Sample
| Sample, Sample -> Sample
| Sample, _ -> failwith $"Sample IO column and {other} can not be merged"
| Source, Source -> Source
| Source, _ -> other
| Material, Source -> Material
| Material, Material -> Material
| Material, _ -> failwith $"Material IO column and {other} can not be merged"


override this.ToString() =
match this with
| Source -> "Source Name"
Expand Down Expand Up @@ -314,7 +349,7 @@ type CompositeHeader =

member this.isOutput =
match this with
| Input io -> true
| Output io -> true
| anythingElse -> false

member this.isParameter =
Expand Down Expand Up @@ -382,6 +417,16 @@ type CompositeHeader =
| FreeText _ -> true
| anythingElse -> false

member this.tryInput() =
match this with
| Input io -> Some io
| _ -> None

member this.tryOutput() =
match this with
| Output io -> Some io
| _ -> None

member this.TryParameter() =
match this with
| Parameter oa -> Some (ProtocolParameter.create(ParameterName = oa))
Expand Down
106 changes: 106 additions & 0 deletions tests/ISA/ISA.Tests/ArcInvestigation.Tests.fs
Original file line number Diff line number Diff line change
Expand Up @@ -541,6 +541,111 @@ let tests_Assay = testList "CRUD Assay" [
]
]

let tests_UpdateIOTypeByEntityIDTypes = testList "UpdateIOTypeByEntityIDType" [
testList "SameAssay" [
testCase "nothingToUpdate" <| fun _ ->
let i = ArcInvestigation.init("MyInvestigation")
let a = i.InitAssay("MyAssay")
let t1 = a.InitTable("MyTable")
let t2 = a.InitTable("MyTable2")
t1.AddColumns [|
CompositeColumn.create (CompositeHeader.Input IOType.Source, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Source %i" i)))
CompositeColumn.create (CompositeHeader.Output IOType.Sample, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Sample %i" i)))
|]
t2.AddColumns [|
CompositeColumn.create (CompositeHeader.Input IOType.Source, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Source_Alt %i" i)))
CompositeColumn.create (CompositeHeader.Output IOType.Sample, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Sample_Alt %i" i)))
|]
let a_Copy = a.Copy()
i.UpdateIOTypeByEntityID()
Expect.sequenceEqual a.Tables a_Copy.Tables "Tables should be unchanged"
testCase "updateOutputByNextInput" <| fun _ ->
let i = ArcInvestigation.init("MyInvestigation")
let a = i.InitAssay("MyAssay")
let t1 = a.InitTable("MyTable")
let t2 = a.InitTable("MyTable2")
t1.AddColumns [|
CompositeColumn.create (CompositeHeader.Input IOType.Source, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Source %i" i)))
CompositeColumn.create (CompositeHeader.Output IOType.Sample, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Sample %i" i)))
|]
t2.AddColumns [|
CompositeColumn.create (CompositeHeader.Input IOType.Source, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Sample %i" i)))
CompositeColumn.create (CompositeHeader.Output IOType.Sample, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Sample_Alt %i" i)))
|]
i.UpdateIOTypeByEntityID()
Expect.sequenceEqual t1.Headers [CompositeHeader.Input IOType.Source; CompositeHeader.Output IOType.Sample] "Headers should be updated"
Expect.sequenceEqual t2.Headers [CompositeHeader.Input IOType.Sample; CompositeHeader.Output IOType.Sample] "Headers should be updated"
testCase "failBecauseClashing" <| fun _ ->
let i = ArcInvestigation.init("MyInvestigation")
let a = i.InitAssay("MyAssay")
let t1 = a.InitTable("MyTable")
let t2 = a.InitTable("MyTable2")
t1.AddColumns [|
CompositeColumn.create (CompositeHeader.Input IOType.Source, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Source %i" i)))
CompositeColumn.create (CompositeHeader.Output IOType.Sample, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Sample %i" i)))
|]
t2.AddColumns [|
CompositeColumn.create (CompositeHeader.Input IOType.DerivedDataFile, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Sample %i" i)))
CompositeColumn.create (CompositeHeader.Output IOType.Sample, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Sample_Alt %i" i)))
|]
Expect.throws (fun () -> i.UpdateIOTypeByEntityID()) "Update should fail as sample and data can not be updated against each other."
]
testList "AssayAndStudy" [
testCase "nothingToUpdate" <| fun _ ->
let i = ArcInvestigation.init("MyInvestigation")
let s = i.InitStudy("MyStudy")
let a = i.InitAssay("MyAssay")
let t1 = s.InitTable("MyTable")
let t2 = a.InitTable("MyTable2")
t1.AddColumns [|
CompositeColumn.create (CompositeHeader.Input IOType.Source, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Source %i" i)))
CompositeColumn.create (CompositeHeader.Output IOType.Sample, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Sample %i" i)))
|]
t2.AddColumns [|
CompositeColumn.create (CompositeHeader.Input IOType.Source, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Source_Alt %i" i)))
CompositeColumn.create (CompositeHeader.Output IOType.Sample, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Sample_Alt %i" i)))
|]
let a_Copy = a.Copy()
let s_Copy = s.Copy()
i.UpdateIOTypeByEntityID()
Expect.sequenceEqual a.Tables a_Copy.Tables "Tables should be unchanged"
Expect.sequenceEqual s.Tables s_Copy.Tables "Tables should be unchanged"
testCase "updateOutputByNextInput" <| fun _ ->
let i = ArcInvestigation.init("MyInvestigation")
let s = i.InitStudy("MyStudy")
let a = i.InitAssay("MyAssay")
let t1 = s.InitTable("MyTable")
let t2 = a.InitTable("MyTable2")
t1.AddColumns [|
CompositeColumn.create (CompositeHeader.Input IOType.Source, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Source %i" i)))
CompositeColumn.create (CompositeHeader.Output IOType.Sample, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Sample %i" i)))
|]
t2.AddColumns [|
CompositeColumn.create (CompositeHeader.Input IOType.Source, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Sample %i" i)))
CompositeColumn.create (CompositeHeader.Output IOType.Sample, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Sample_Alt %i" i)))
|]
i.UpdateIOTypeByEntityID()
Expect.sequenceEqual t1.Headers [CompositeHeader.Input IOType.Source; CompositeHeader.Output IOType.Sample] "Headers should be updated"
Expect.sequenceEqual t2.Headers [CompositeHeader.Input IOType.Sample; CompositeHeader.Output IOType.Sample] "Headers should be updated"
testCase "failBecauseClashing" <| fun _ ->
let i = ArcInvestigation.init("MyInvestigation")
let s = i.InitStudy("MyStudy")
let a = i.InitAssay("MyAssay")
let t1 = s.InitTable("MyTable")
let t2 = a.InitTable("MyTable2")
t1.AddColumns [|
CompositeColumn.create (CompositeHeader.Input IOType.Source, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Source %i" i)))
CompositeColumn.create (CompositeHeader.Output IOType.Sample, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Sample %i" i)))
|]
t2.AddColumns [|
CompositeColumn.create (CompositeHeader.Input IOType.DerivedDataFile, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Sample %i" i)))
CompositeColumn.create (CompositeHeader.Output IOType.Sample, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Sample_Alt %i" i)))
|]
Expect.throws (fun () -> i.UpdateIOTypeByEntityID()) "Update should fail as sample and data can not be updated against each other."
]
]


let private tests_GetHashCode = testList "GetHashCode" [
testCase "passing" <| fun _ ->
let actual = ArcInvestigation.init("Test")
Expand Down Expand Up @@ -700,5 +805,6 @@ let main =
tests_Study
tests_Assay
tests_GetHashCode
tests_UpdateIOTypeByEntityIDTypes
// tests_UpdateBy
]

0 comments on commit 5fa2559

Please sign in to comment.