diff --git a/src/ISA/ISA/ArcTypes/ArcTable.fs b/src/ISA/ISA/ArcTypes/ArcTable.fs index f0716a64..133f3368 100644 --- a/src/ISA/ISA/ArcTypes/ArcTable.fs +++ b/src/ISA/ISA/ArcTypes/ArcTable.fs @@ -268,22 +268,59 @@ type ArcTable(name: string, headers: ResizeArray, values: Syste fun (table:ArcTable) -> table.GetColumn(index) + member this.TryGetColumnByHeader (header:CompositeHeader) = + let index = this.Headers |> Seq.tryFindIndex (fun x -> x = header) + index + |> Option.map (fun i -> this.GetColumn(i)) + + static member tryGetColumnByHeader (header:CompositeHeader) = + fun (table:ArcTable) -> + table.TryGetColumnByHeader(header) + member this.GetColumnByHeader (header:CompositeHeader) = - let index = this.Headers |> Seq.findIndex (fun x -> x = header) - this.GetColumn(index) + match this.TryGetColumnByHeader(header) with + | Some c -> c + | None -> failwithf "Unable to find column with header in table %s: %O" this.Name header static member getColumnByHeader (header:CompositeHeader) = fun (table:ArcTable) -> table.GetColumnByHeader(header) - member this.TryGetColumnByHeader (header:CompositeHeader) = - let index = this.Headers |> Seq.tryFindIndex (fun x -> x = header) + member this.TryGetInputColumn() = + let index = this.Headers |> Seq.tryFindIndex (fun x -> x.isInput) index |> Option.map (fun i -> this.GetColumn(i)) - static member tryGetColumnByHeader (header:CompositeHeader) = + static member tryGetInputColumn () = fun (table:ArcTable) -> - table.TryGetColumnByHeader(header) + table.TryGetInputColumn() + + member this.GetInputColumn() = + match this.TryGetInputColumn() with + | Some c -> c + | None -> failwithf "Unable to find input column in table %s" this.Name + + static member getInputColumn () = + fun (table:ArcTable) -> + table.GetInputColumn() + + member this.TryGetOutputColumn() = + let index = this.Headers |> Seq.tryFindIndex (fun x -> x.isOutput) + index + |> Option.map (fun i -> this.GetColumn(i)) + + static member tryGetOutputColumn () = + fun (table:ArcTable) -> + table.TryGetOutputColumn() + + member this.GetOutputColumn() = + match this.TryGetOutputColumn() with + | Some c -> c + | None -> failwithf "Unable to find output column in table %s" this.Name + + static member getOutputColumn () = + fun (table:ArcTable) -> + table.GetOutputColumn() // - Row API - // member this.AddRow (?cells: CompositeCell [], ?index: int) : unit = diff --git a/src/ISA/ISA/ArcTypes/ArcTables.fs b/src/ISA/ISA/ArcTypes/ArcTables.fs index f310fa35..edd5934e 100644 --- a/src/ISA/ISA/ArcTypes/ArcTables.fs +++ b/src/ISA/ISA/ArcTypes/ArcTables.fs @@ -36,6 +36,66 @@ module ArcTablesAux = | Some index -> index | None -> failwith $"Unable to find table with name '{name}'!" + /// Collects the IOType of each distinct entity in the tables. Then merges the IOType of each entity according to the IOType.Merge function. + let getIOMap (tables: ResizeArray) = + let mappings : Dictionary = Dictionary() + let includeInMap (name : string) (ioType:IOType) = + if name <> "" then + match Aux.Dict.tryFind name mappings with + | Some oldIOType -> + let newIOType = oldIOType.Merge ioType + mappings.[name] <- newIOType + | None -> + mappings.Add(name, ioType) + for table in tables do + + match table.TryGetInputColumn() with + | Some ic -> + let ioType = ic.Header.tryInput().Value + ic.Cells + |> Array.iter (fun c -> + includeInMap (c.ToFreeTextCell().AsFreeText) ioType + ) + | None -> () + match table.TryGetOutputColumn() with + | Some oc -> + let ioType = oc.Header.tryOutput().Value + oc.Cells + |> Array.iter (fun c -> + includeInMap (c.ToFreeTextCell().AsFreeText) ioType + ) + | None -> () + mappings + + let applyIOMap (map : Dictionary) (tables : ResizeArray) = + for table in tables do + match table.TryGetInputColumn() with + | Some ic -> + let index = table.Headers |> Seq.findIndex (fun x -> x.isInput) + let oldIoType = ic.Header.tryInput().Value + let newIOType = + ic.Cells + |> Array.fold (fun (io : IOType) c -> + match Aux.Dict.tryFind (c.ToFreeTextCell().AsFreeText) map with + | Some newIO -> io.Merge newIO + | None -> io + ) oldIoType + table.UpdateHeader(index, CompositeHeader.Input(newIOType)) + | None -> () + match table.TryGetOutputColumn() with + | Some oc -> + let index = table.Headers |> Seq.findIndex (fun x -> x.isOutput) + let oldIoType = oc.Header.tryOutput().Value + let newIOType = + oc.Cells + |> Array.fold (fun (io : IOType) c -> + match Aux.Dict.tryFind (c.ToFreeTextCell().AsFreeText) map with + | Some newIO -> io.Merge newIO + | None -> io + ) oldIoType + table.UpdateHeader(index, CompositeHeader.Output(newIOType)) + | None -> () + module SanityChecks = /// Fails, if the index is out of range of the Tables collection. When allowAppend is set to true, it may be out of range by at most 1. diff --git a/src/ISA/ISA/ArcTypes/ArcTypes.fs b/src/ISA/ISA/ArcTypes/ArcTypes.fs index 6edefbc8..ff773493 100644 --- a/src/ISA/ISA/ArcTypes/ArcTypes.fs +++ b/src/ISA/ISA/ArcTypes/ArcTypes.fs @@ -1683,6 +1683,30 @@ type ArcInvestigation(identifier : string, ?title : string, ?description : strin let copy = inv.Copy() copy.DeregisterMissingAssays() copy + + /// Updates the IOtypes of the IO columns (Input, Output) across all tables in the investigation if possible. + /// + /// If an entity (Row Value of IO Column) with the same name as an entity with a higher IOType specifity is found, the IOType of the entity with the lower IOType specificity is updated. + /// + /// E.g. In Table1, there is a column "Output [Sample Name]" with an entity "Sample1". In Table2, there is a column "Input [Source Name]" with the same entity "Sample1". By equality of the entities, the IOType of the Input column in Table2 is inferred to be Sample, resulting in "Input [Sample Name]". + /// + /// E.g. RawDataFile is more specific than Source, but less specific than DerivedDataFile. + /// + /// E.g. Sample is equally specific to RawDataFile. + member this.UpdateIOTypeByEntityID() = + let ioMap = + [ + for study in this.Studies do + yield! study.Tables + for assay in this.Assays do + yield! assay.Tables + ] + |> ResizeArray + |> ArcTablesAux.getIOMap + for study in this.Studies do + ArcTablesAux.applyIOMap ioMap study.Tables + for assay in this.Assays do + ArcTablesAux.applyIOMap ioMap assay.Tables member this.Copy() : ArcInvestigation = let nextAssays = ResizeArray() diff --git a/src/ISA/ISA/ArcTypes/CompositeHeader.fs b/src/ISA/ISA/ArcTypes/CompositeHeader.fs index 6e5d8503..6e66b3fe 100644 --- a/src/ISA/ISA/ArcTypes/CompositeHeader.fs +++ b/src/ISA/ISA/ArcTypes/CompositeHeader.fs @@ -41,6 +41,41 @@ type IOType = | FreeText s -> stringCreate s | anyelse -> stringCreate anyelse + /// Given two IOTypes, tries to return the one with a higher specificity. If both are equally specific, fail. + /// + /// E.g. RawDataFile is more specific than Source, but less specific than DerivedDataFile. + /// + /// E.g. Sample is equally specific to RawDataFile. + member this.Merge(other) = + match this, other with + | FreeText s1, FreeText s2 when s1 = s2 -> FreeText (s1) + | FreeText s1, FreeText s2 -> failwith $"FreeText IO column names {s1} and {s2} do differ" + | FreeText s, _ -> failwith $"FreeText IO column and {other} can not be merged" + | ImageFile, Source -> ImageFile + | ImageFile, RawDataFile -> ImageFile + | ImageFile, DerivedDataFile -> ImageFile + | ImageFile, ImageFile -> ImageFile + | ImageFile, _ -> failwith $"ImageFile IO column and {other} can not be merged" + | DerivedDataFile, Source -> DerivedDataFile + | DerivedDataFile, RawDataFile -> DerivedDataFile + | DerivedDataFile, DerivedDataFile -> DerivedDataFile + | DerivedDataFile, ImageFile -> ImageFile + | DerivedDataFile, _ -> failwith $"DerivedDataFile IO column and {other} can not be merged" + | RawDataFile, Source -> RawDataFile + | RawDataFile, RawDataFile -> RawDataFile + | RawDataFile, DerivedDataFile -> DerivedDataFile + | RawDataFile, ImageFile -> ImageFile + | RawDataFile, _ -> failwith $"RawDataFile IO column and {other} can not be merged" + | Sample, Source -> Sample + | Sample, Sample -> Sample + | Sample, _ -> failwith $"Sample IO column and {other} can not be merged" + | Source, Source -> Source + | Source, _ -> other + | Material, Source -> Material + | Material, Material -> Material + | Material, _ -> failwith $"Material IO column and {other} can not be merged" + + override this.ToString() = match this with | Source -> "Source Name" @@ -314,7 +349,7 @@ type CompositeHeader = member this.isOutput = match this with - | Input io -> true + | Output io -> true | anythingElse -> false member this.isParameter = @@ -382,6 +417,16 @@ type CompositeHeader = | FreeText _ -> true | anythingElse -> false + member this.tryInput() = + match this with + | Input io -> Some io + | _ -> None + + member this.tryOutput() = + match this with + | Output io -> Some io + | _ -> None + member this.TryParameter() = match this with | Parameter oa -> Some (ProtocolParameter.create(ParameterName = oa)) diff --git a/tests/ISA/ISA.Tests/ArcInvestigation.Tests.fs b/tests/ISA/ISA.Tests/ArcInvestigation.Tests.fs index 5f347a65..22305930 100644 --- a/tests/ISA/ISA.Tests/ArcInvestigation.Tests.fs +++ b/tests/ISA/ISA.Tests/ArcInvestigation.Tests.fs @@ -541,6 +541,111 @@ let tests_Assay = testList "CRUD Assay" [ ] ] +let tests_UpdateIOTypeByEntityIDTypes = testList "UpdateIOTypeByEntityIDType" [ + testList "SameAssay" [ + testCase "nothingToUpdate" <| fun _ -> + let i = ArcInvestigation.init("MyInvestigation") + let a = i.InitAssay("MyAssay") + let t1 = a.InitTable("MyTable") + let t2 = a.InitTable("MyTable2") + t1.AddColumns [| + CompositeColumn.create (CompositeHeader.Input IOType.Source, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Source %i" i))) + CompositeColumn.create (CompositeHeader.Output IOType.Sample, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Sample %i" i))) + |] + t2.AddColumns [| + CompositeColumn.create (CompositeHeader.Input IOType.Source, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Source_Alt %i" i))) + CompositeColumn.create (CompositeHeader.Output IOType.Sample, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Sample_Alt %i" i))) + |] + let a_Copy = a.Copy() + i.UpdateIOTypeByEntityID() + Expect.sequenceEqual a.Tables a_Copy.Tables "Tables should be unchanged" + testCase "updateOutputByNextInput" <| fun _ -> + let i = ArcInvestigation.init("MyInvestigation") + let a = i.InitAssay("MyAssay") + let t1 = a.InitTable("MyTable") + let t2 = a.InitTable("MyTable2") + t1.AddColumns [| + CompositeColumn.create (CompositeHeader.Input IOType.Source, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Source %i" i))) + CompositeColumn.create (CompositeHeader.Output IOType.Sample, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Sample %i" i))) + |] + t2.AddColumns [| + CompositeColumn.create (CompositeHeader.Input IOType.Source, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Sample %i" i))) + CompositeColumn.create (CompositeHeader.Output IOType.Sample, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Sample_Alt %i" i))) + |] + i.UpdateIOTypeByEntityID() + Expect.sequenceEqual t1.Headers [CompositeHeader.Input IOType.Source; CompositeHeader.Output IOType.Sample] "Headers should be updated" + Expect.sequenceEqual t2.Headers [CompositeHeader.Input IOType.Sample; CompositeHeader.Output IOType.Sample] "Headers should be updated" + testCase "failBecauseClashing" <| fun _ -> + let i = ArcInvestigation.init("MyInvestigation") + let a = i.InitAssay("MyAssay") + let t1 = a.InitTable("MyTable") + let t2 = a.InitTable("MyTable2") + t1.AddColumns [| + CompositeColumn.create (CompositeHeader.Input IOType.Source, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Source %i" i))) + CompositeColumn.create (CompositeHeader.Output IOType.Sample, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Sample %i" i))) + |] + t2.AddColumns [| + CompositeColumn.create (CompositeHeader.Input IOType.DerivedDataFile, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Sample %i" i))) + CompositeColumn.create (CompositeHeader.Output IOType.Sample, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Sample_Alt %i" i))) + |] + Expect.throws (fun () -> i.UpdateIOTypeByEntityID()) "Update should fail as sample and data can not be updated against each other." + ] + testList "AssayAndStudy" [ + testCase "nothingToUpdate" <| fun _ -> + let i = ArcInvestigation.init("MyInvestigation") + let s = i.InitStudy("MyStudy") + let a = i.InitAssay("MyAssay") + let t1 = s.InitTable("MyTable") + let t2 = a.InitTable("MyTable2") + t1.AddColumns [| + CompositeColumn.create (CompositeHeader.Input IOType.Source, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Source %i" i))) + CompositeColumn.create (CompositeHeader.Output IOType.Sample, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Sample %i" i))) + |] + t2.AddColumns [| + CompositeColumn.create (CompositeHeader.Input IOType.Source, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Source_Alt %i" i))) + CompositeColumn.create (CompositeHeader.Output IOType.Sample, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Sample_Alt %i" i))) + |] + let a_Copy = a.Copy() + let s_Copy = s.Copy() + i.UpdateIOTypeByEntityID() + Expect.sequenceEqual a.Tables a_Copy.Tables "Tables should be unchanged" + Expect.sequenceEqual s.Tables s_Copy.Tables "Tables should be unchanged" + testCase "updateOutputByNextInput" <| fun _ -> + let i = ArcInvestigation.init("MyInvestigation") + let s = i.InitStudy("MyStudy") + let a = i.InitAssay("MyAssay") + let t1 = s.InitTable("MyTable") + let t2 = a.InitTable("MyTable2") + t1.AddColumns [| + CompositeColumn.create (CompositeHeader.Input IOType.Source, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Source %i" i))) + CompositeColumn.create (CompositeHeader.Output IOType.Sample, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Sample %i" i))) + |] + t2.AddColumns [| + CompositeColumn.create (CompositeHeader.Input IOType.Source, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Sample %i" i))) + CompositeColumn.create (CompositeHeader.Output IOType.Sample, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Sample_Alt %i" i))) + |] + i.UpdateIOTypeByEntityID() + Expect.sequenceEqual t1.Headers [CompositeHeader.Input IOType.Source; CompositeHeader.Output IOType.Sample] "Headers should be updated" + Expect.sequenceEqual t2.Headers [CompositeHeader.Input IOType.Sample; CompositeHeader.Output IOType.Sample] "Headers should be updated" + testCase "failBecauseClashing" <| fun _ -> + let i = ArcInvestigation.init("MyInvestigation") + let s = i.InitStudy("MyStudy") + let a = i.InitAssay("MyAssay") + let t1 = s.InitTable("MyTable") + let t2 = a.InitTable("MyTable2") + t1.AddColumns [| + CompositeColumn.create (CompositeHeader.Input IOType.Source, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Source %i" i))) + CompositeColumn.create (CompositeHeader.Output IOType.Sample, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Sample %i" i))) + |] + t2.AddColumns [| + CompositeColumn.create (CompositeHeader.Input IOType.DerivedDataFile, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Sample %i" i))) + CompositeColumn.create (CompositeHeader.Output IOType.Sample, Array.init 3 (fun i -> CompositeCell.createFreeText (sprintf "Sample_Alt %i" i))) + |] + Expect.throws (fun () -> i.UpdateIOTypeByEntityID()) "Update should fail as sample and data can not be updated against each other." + ] +] + + let private tests_GetHashCode = testList "GetHashCode" [ testCase "passing" <| fun _ -> let actual = ArcInvestigation.init("Test") @@ -700,5 +805,6 @@ let main = tests_Study tests_Assay tests_GetHashCode + tests_UpdateIOTypeByEntityIDTypes // tests_UpdateBy ] \ No newline at end of file