From 7f2c4e202fee44fce069eccfe400c9732e8bd2bc Mon Sep 17 00:00:00 2001 From: Heinrich Lukas Weil Date: Tue, 5 Sep 2023 09:56:59 +0200 Subject: [PATCH 1/2] restructure isa.spreadsheet project structure --- .../ARCtrl.ISA.Spreadsheet.fsproj | 36 +++++++-------- .../{ => AnnotationTable}/ArcTable.fs | 0 .../{ => AnnotationTable}/CompositeCell.fs | 0 .../{ => AnnotationTable}/CompositeColumn.fs | 0 .../{ => AnnotationTable}/CompositeHeader.fs | 0 .../Investigation.fs => ArcInvestigation.fs} | 1 + src/ISA/ISA.Spreadsheet/CollectionAux.fs | 45 ------------------- .../InvestigationFile/SparseRow.fs | 24 ---------- .../{InvestigationFile => Metadata}/Assays.fs | 0 .../ISA.Spreadsheet/{ => Metadata}/Comment.fs | 0 .../Contacts.fs | 0 .../{ => Metadata}/Conversions.fs | 0 .../DesignDescriptors.fs | 0 .../Factors.fs | 0 .../OntologySourceReference.fs | 0 .../Protocols.fs | 0 .../Publication.fs | 0 .../{ => Metadata}/SparseTable.fs | 0 .../{InvestigationFile => Metadata}/Study.fs | 0 19 files changed, 18 insertions(+), 88 deletions(-) rename src/ISA/ISA.Spreadsheet/{ => AnnotationTable}/ArcTable.fs (100%) rename src/ISA/ISA.Spreadsheet/{ => AnnotationTable}/CompositeCell.fs (100%) rename src/ISA/ISA.Spreadsheet/{ => AnnotationTable}/CompositeColumn.fs (100%) rename src/ISA/ISA.Spreadsheet/{ => AnnotationTable}/CompositeHeader.fs (100%) rename src/ISA/ISA.Spreadsheet/{InvestigationFile/Investigation.fs => ArcInvestigation.fs} (99%) delete mode 100644 src/ISA/ISA.Spreadsheet/InvestigationFile/SparseRow.fs rename src/ISA/ISA.Spreadsheet/{InvestigationFile => Metadata}/Assays.fs (100%) rename src/ISA/ISA.Spreadsheet/{ => Metadata}/Comment.fs (100%) rename src/ISA/ISA.Spreadsheet/{InvestigationFile => Metadata}/Contacts.fs (100%) rename src/ISA/ISA.Spreadsheet/{ => Metadata}/Conversions.fs (100%) rename src/ISA/ISA.Spreadsheet/{InvestigationFile => Metadata}/DesignDescriptors.fs (100%) rename src/ISA/ISA.Spreadsheet/{InvestigationFile => Metadata}/Factors.fs (100%) rename src/ISA/ISA.Spreadsheet/{InvestigationFile => Metadata}/OntologySourceReference.fs (100%) rename src/ISA/ISA.Spreadsheet/{InvestigationFile => Metadata}/Protocols.fs (100%) rename src/ISA/ISA.Spreadsheet/{InvestigationFile => Metadata}/Publication.fs (100%) rename src/ISA/ISA.Spreadsheet/{ => Metadata}/SparseTable.fs (100%) rename src/ISA/ISA.Spreadsheet/{InvestigationFile => Metadata}/Study.fs (100%) diff --git a/src/ISA/ISA.Spreadsheet/ARCtrl.ISA.Spreadsheet.fsproj b/src/ISA/ISA.Spreadsheet/ARCtrl.ISA.Spreadsheet.fsproj index a70c4771..d10120b3 100644 --- a/src/ISA/ISA.Spreadsheet/ARCtrl.ISA.Spreadsheet.fsproj +++ b/src/ISA/ISA.Spreadsheet/ARCtrl.ISA.Spreadsheet.fsproj @@ -11,33 +11,31 @@ - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + - - - + nfdi4plants, Lukas Weil ARC and ISA xlsx compliant parser for experimental metadata toolkit in F#. This project is meant as an easy means to open, manipulate and save ISA (Investigation,Study,Assay) metadata files in isa-xlsx format. diff --git a/src/ISA/ISA.Spreadsheet/ArcTable.fs b/src/ISA/ISA.Spreadsheet/AnnotationTable/ArcTable.fs similarity index 100% rename from src/ISA/ISA.Spreadsheet/ArcTable.fs rename to src/ISA/ISA.Spreadsheet/AnnotationTable/ArcTable.fs diff --git a/src/ISA/ISA.Spreadsheet/CompositeCell.fs b/src/ISA/ISA.Spreadsheet/AnnotationTable/CompositeCell.fs similarity index 100% rename from src/ISA/ISA.Spreadsheet/CompositeCell.fs rename to src/ISA/ISA.Spreadsheet/AnnotationTable/CompositeCell.fs diff --git a/src/ISA/ISA.Spreadsheet/CompositeColumn.fs b/src/ISA/ISA.Spreadsheet/AnnotationTable/CompositeColumn.fs similarity index 100% rename from src/ISA/ISA.Spreadsheet/CompositeColumn.fs rename to src/ISA/ISA.Spreadsheet/AnnotationTable/CompositeColumn.fs diff --git a/src/ISA/ISA.Spreadsheet/CompositeHeader.fs b/src/ISA/ISA.Spreadsheet/AnnotationTable/CompositeHeader.fs similarity index 100% rename from src/ISA/ISA.Spreadsheet/CompositeHeader.fs rename to src/ISA/ISA.Spreadsheet/AnnotationTable/CompositeHeader.fs diff --git a/src/ISA/ISA.Spreadsheet/InvestigationFile/Investigation.fs b/src/ISA/ISA.Spreadsheet/ArcInvestigation.fs similarity index 99% rename from src/ISA/ISA.Spreadsheet/InvestigationFile/Investigation.fs rename to src/ISA/ISA.Spreadsheet/ArcInvestigation.fs index d6c8d397..3a157402 100644 --- a/src/ISA/ISA.Spreadsheet/InvestigationFile/Investigation.fs +++ b/src/ISA/ISA.Spreadsheet/ArcInvestigation.fs @@ -23,6 +23,7 @@ module ArcInvestigation = let [] publicationsLabelPrefix = "Investigation Publication" let [] contactsLabelPrefix = "Investigation Person" + let [] metaDataSheetName = "isa_study" type InvestigationInfo = { diff --git a/src/ISA/ISA.Spreadsheet/CollectionAux.fs b/src/ISA/ISA.Spreadsheet/CollectionAux.fs index 642c081f..7d324f2d 100644 --- a/src/ISA/ISA.Spreadsheet/CollectionAux.fs +++ b/src/ISA/ISA.Spreadsheet/CollectionAux.fs @@ -11,51 +11,6 @@ module Seq = with | _ -> None - /// Iterates over elements of the input sequence and groups adjacent elements. - /// A new group is started when the specified predicate holds about the element - /// of the sequence (and at the beginning of the iteration). - /// - /// For example: - /// Seq.groupWhen isOdd [3;3;2;4;1;2] = seq [[3]; [3; 2; 4]; [1; 2]] - let private groupWhen (withOverlap : bool) predicate (input:seq<'a>) = - use en = input.GetEnumerator() - - let rec loop cont = - if en.MoveNext() then - let temp = en.Current - if predicate temp then - - loop (fun y -> - cont - ( match y with - | h::t when withOverlap -> [temp]::(temp::h)::t - | h::t -> []::(temp::h)::t - //| h::t -> [temp]::(h)::t - | [] -> [[temp]] - ) - ) - else - loop (fun y -> - cont - ( match y with - | h::t -> (temp::h)::t - | [] -> [[temp]] - ) - ) - else - cont [] - // Remove when first element is empty due to "[]::(temp::h)::t" - let tmp:seq> = - match (loop id) with - | h::t -> match h with - | [x] when predicate x && withOverlap -> t - | [] -> t - | _ -> h::t - | [] -> [] - |> Seq.cast - - tmp - module internal Array = let ofIndexedSeq (s : seq) = diff --git a/src/ISA/ISA.Spreadsheet/InvestigationFile/SparseRow.fs b/src/ISA/ISA.Spreadsheet/InvestigationFile/SparseRow.fs deleted file mode 100644 index 58a3271f..00000000 --- a/src/ISA/ISA.Spreadsheet/InvestigationFile/SparseRow.fs +++ /dev/null @@ -1,24 +0,0 @@ -namespace ISADotNet.XLSX - -open ISADotNet -open System.Collections.Generic -open FSharpSpreadsheetML -open DocumentFormat.OpenXml.Spreadsheet - - -type SparseRow = (int * string) seq - -module SparseRow = - - let fromValues (v : string seq) : SparseRow = Seq.indexed v - - let getValues (i : SparseRow) = i |> Seq.map snd - - let fromAllValues (v : string option seq) : SparseRow = - Seq.indexed v - |> Seq.choose (fun (i,o) -> Option.map (fun v -> i,v) o) - - let getAllValues (i : SparseRow) = - let m = i |> Map.ofSeq - let max = i |> Seq.maxBy fst |> fst - Seq.init (max + 1) (fun i -> Map.tryFind i m) diff --git a/src/ISA/ISA.Spreadsheet/InvestigationFile/Assays.fs b/src/ISA/ISA.Spreadsheet/Metadata/Assays.fs similarity index 100% rename from src/ISA/ISA.Spreadsheet/InvestigationFile/Assays.fs rename to src/ISA/ISA.Spreadsheet/Metadata/Assays.fs diff --git a/src/ISA/ISA.Spreadsheet/Comment.fs b/src/ISA/ISA.Spreadsheet/Metadata/Comment.fs similarity index 100% rename from src/ISA/ISA.Spreadsheet/Comment.fs rename to src/ISA/ISA.Spreadsheet/Metadata/Comment.fs diff --git a/src/ISA/ISA.Spreadsheet/InvestigationFile/Contacts.fs b/src/ISA/ISA.Spreadsheet/Metadata/Contacts.fs similarity index 100% rename from src/ISA/ISA.Spreadsheet/InvestigationFile/Contacts.fs rename to src/ISA/ISA.Spreadsheet/Metadata/Contacts.fs diff --git a/src/ISA/ISA.Spreadsheet/Conversions.fs b/src/ISA/ISA.Spreadsheet/Metadata/Conversions.fs similarity index 100% rename from src/ISA/ISA.Spreadsheet/Conversions.fs rename to src/ISA/ISA.Spreadsheet/Metadata/Conversions.fs diff --git a/src/ISA/ISA.Spreadsheet/InvestigationFile/DesignDescriptors.fs b/src/ISA/ISA.Spreadsheet/Metadata/DesignDescriptors.fs similarity index 100% rename from src/ISA/ISA.Spreadsheet/InvestigationFile/DesignDescriptors.fs rename to src/ISA/ISA.Spreadsheet/Metadata/DesignDescriptors.fs diff --git a/src/ISA/ISA.Spreadsheet/InvestigationFile/Factors.fs b/src/ISA/ISA.Spreadsheet/Metadata/Factors.fs similarity index 100% rename from src/ISA/ISA.Spreadsheet/InvestigationFile/Factors.fs rename to src/ISA/ISA.Spreadsheet/Metadata/Factors.fs diff --git a/src/ISA/ISA.Spreadsheet/InvestigationFile/OntologySourceReference.fs b/src/ISA/ISA.Spreadsheet/Metadata/OntologySourceReference.fs similarity index 100% rename from src/ISA/ISA.Spreadsheet/InvestigationFile/OntologySourceReference.fs rename to src/ISA/ISA.Spreadsheet/Metadata/OntologySourceReference.fs diff --git a/src/ISA/ISA.Spreadsheet/InvestigationFile/Protocols.fs b/src/ISA/ISA.Spreadsheet/Metadata/Protocols.fs similarity index 100% rename from src/ISA/ISA.Spreadsheet/InvestigationFile/Protocols.fs rename to src/ISA/ISA.Spreadsheet/Metadata/Protocols.fs diff --git a/src/ISA/ISA.Spreadsheet/InvestigationFile/Publication.fs b/src/ISA/ISA.Spreadsheet/Metadata/Publication.fs similarity index 100% rename from src/ISA/ISA.Spreadsheet/InvestigationFile/Publication.fs rename to src/ISA/ISA.Spreadsheet/Metadata/Publication.fs diff --git a/src/ISA/ISA.Spreadsheet/SparseTable.fs b/src/ISA/ISA.Spreadsheet/Metadata/SparseTable.fs similarity index 100% rename from src/ISA/ISA.Spreadsheet/SparseTable.fs rename to src/ISA/ISA.Spreadsheet/Metadata/SparseTable.fs diff --git a/src/ISA/ISA.Spreadsheet/InvestigationFile/Study.fs b/src/ISA/ISA.Spreadsheet/Metadata/Study.fs similarity index 100% rename from src/ISA/ISA.Spreadsheet/InvestigationFile/Study.fs rename to src/ISA/ISA.Spreadsheet/Metadata/Study.fs From a4d36d5da8ffac3b43d7ff0f3622a7cc411dddc1 Mon Sep 17 00:00:00 2001 From: Heinrich Lukas Weil Date: Tue, 5 Sep 2023 10:23:06 +0200 Subject: [PATCH 2/2] standardize and test investigation sheet name --- src/ISA/ISA.Spreadsheet/ArcInvestigation.fs | 16 +++-- .../InvestigationFileTests.fs | 58 +++++++++++++------ .../TestObjects/InvestigationFile.fs | 18 +++++- 3 files changed, 66 insertions(+), 26 deletions(-) diff --git a/src/ISA/ISA.Spreadsheet/ArcInvestigation.fs b/src/ISA/ISA.Spreadsheet/ArcInvestigation.fs index 3a157402..fafd86ed 100644 --- a/src/ISA/ISA.Spreadsheet/ArcInvestigation.fs +++ b/src/ISA/ISA.Spreadsheet/ArcInvestigation.fs @@ -23,8 +23,10 @@ module ArcInvestigation = let [] publicationsLabelPrefix = "Investigation Publication" let [] contactsLabelPrefix = "Investigation Person" - let [] metaDataSheetName = "isa_study" - + let [] metaDataSheetName = "isa_investigation" + let [] metaDataSheetName_deprecated = "Investigation" + + type InvestigationInfo = { Identifier : string @@ -192,8 +194,12 @@ module ArcInvestigation = let fromFsWorkbook (doc:FsWorkbook) = try - doc.GetWorksheets() - |> Seq.head + match doc.TryGetWorksheetByName metaDataSheetName with + | Some sheet -> sheet + | None -> + match doc.TryGetWorksheetByName metaDataSheetName_deprecated with + | Some sheet -> sheet + | None -> failwith "Could not find metadata sheet with sheetname \"isa_investigation\" or deprecated sheetname \"Investigation\"" |> FsWorksheet.getRows |> Seq.map SparseRow.fromFsRow |> fromRows @@ -203,7 +209,7 @@ module ArcInvestigation = let toFsWorkbook (investigation:ArcInvestigation) : FsWorkbook = try let wb = new FsWorkbook() - let sheet = FsWorksheet("Investigation") + let sheet = FsWorksheet(metaDataSheetName) investigation |> toRows |> Seq.iteri (fun rowI r -> SparseRow.writeToSheet (rowI + 1) r sheet) diff --git a/tests/ISA/ISA.Spreadsheet.Tests/InvestigationFileTests.fs b/tests/ISA/ISA.Spreadsheet.Tests/InvestigationFileTests.fs index d522c556..d79c6ac6 100644 --- a/tests/ISA/ISA.Spreadsheet.Tests/InvestigationFileTests.fs +++ b/tests/ISA/ISA.Spreadsheet.Tests/InvestigationFileTests.fs @@ -57,8 +57,6 @@ let private testInvestigationWriterComponents = let private testInvestigationFile = - - testList "InvestigationXLSXTests" [ testCase "ReaderSuccess" (fun () -> @@ -72,7 +70,29 @@ let private testInvestigationFile = Expect.isOk readingSuccess (Result.getMessage readingSuccess) ) + testCase "ReaderSuccessDeprecatedSheetName" (fun () -> + + let readingSuccess = + try + ArcInvestigation.fromFsWorkbook TestObjects.Investigation.fullInvestigationObsoleteSheetName |> ignore + Result.Ok "DidRun" + with + | err -> Result.Error(sprintf "Reading the test file failed: %s" err.Message) + + Expect.isOk readingSuccess (Result.getMessage readingSuccess) + ) + testCase "ReaderFailureWrongSheetName" (fun () -> + + let readingSuccess = + try + ArcInvestigation.fromFsWorkbook TestObjects.Investigation.fullInvestigationWrongSheetName |> ignore + Result.Ok "DidRun" + with + | err -> Result.Error(sprintf "Reading the test file failed: %s" err.Message) + + Expect.isError readingSuccess "Reading the investigation file should fail if the sheet name is wrong" + ) testCase "WriterSuccess" (fun () -> let i = ArcInvestigation.fromFsWorkbook TestObjects.Investigation.fullInvestigation @@ -90,13 +110,13 @@ let private testInvestigationFile = testCase "OutputMatchesInput" (fun () -> let i = - TestObjects.Investigation.fullInvestigation.GetWorksheetByName "Investigation" + TestObjects.Investigation.fullInvestigation.GetWorksheetByName "isa_investigation" let o = TestObjects.Investigation.fullInvestigation |> ArcInvestigation.fromFsWorkbook |> ArcInvestigation.toFsWorkbook - |> fun wb -> wb.GetWorksheetByName "Investigation" + |> fun wb -> wb.GetWorksheetByName "isa_investigation" Expect.workSheetEqual o i "Written investigation file does not match read investigation file" ) @@ -133,23 +153,23 @@ let private testInvestigationFile = Expect.isOk writingSuccess (Result.getMessage writingSuccess) ) - testCase "OutputMatchesInputEmpty" (fun () -> + //testCase "OutputMatchesInputEmpty" (fun () -> - let i = - TestObjects.Investigation.emptyInvestigation.GetWorksheetByName "Investigation" - |> fun ws -> ws.Rows - |> Seq.map (fun r -> r.Cells |> Seq.map (fun c -> c.Value) |> Seq.reduce (fun a b -> a + b)) - let o = - TestObjects.Investigation.emptyInvestigation - |> ArcInvestigation.fromFsWorkbook - |> ArcInvestigation.toFsWorkbook - |> fun wb -> wb.GetWorksheetByName "Investigation" - |> fun ws -> ws.Rows - |> Seq.map (fun r -> r.Cells |> Seq.map (fun c -> c.Value) |> Seq.reduce (fun a b -> a + b)) + // let i = + // TestObjects.Investigation.emptyInvestigation.GetWorksheetByName "isa_investigation" + // |> fun ws -> ws.Rows + // |> Seq.map (fun r -> r.Cells |> Seq.map (fun c -> c.Value) |> Seq.reduce (fun a b -> a + b)) + // let o = + // TestObjects.Investigation.emptyInvestigation + // |> ArcInvestigation.fromFsWorkbook + // |> ArcInvestigation.toFsWorkbook + // |> fun wb -> wb.GetWorksheetByName "isa_investigation" + // |> fun ws -> ws.Rows + // |> Seq.map (fun r -> r.Cells |> Seq.map (fun c -> c.Value) |> Seq.reduce (fun a b -> a + b)) - mySequenceEqual o i "Written empty investigation file does not match read empty investigation file" - ) + // mySequenceEqual o i "Written empty investigation file does not match read empty investigation file" + //) ] |> testSequenced @@ -529,5 +549,5 @@ let private testInvestigationFile = let main = testList "InvestigationFile" [ testInvestigationWriterComponents - //testInvestigationFile + testInvestigationFile ] \ No newline at end of file diff --git a/tests/ISA/ISA.Spreadsheet.Tests/TestObjects/InvestigationFile.fs b/tests/ISA/ISA.Spreadsheet.Tests/TestObjects/InvestigationFile.fs index 4bb9af13..0e4ab9d8 100644 --- a/tests/ISA/ISA.Spreadsheet.Tests/TestObjects/InvestigationFile.fs +++ b/tests/ISA/ISA.Spreadsheet.Tests/TestObjects/InvestigationFile.fs @@ -5,7 +5,7 @@ open FsSpreadsheet let emptyInvestigation = let wb = new FsWorkbook() - let ws = wb.InitWorksheet("Investigation") + let ws = wb.InitWorksheet("isa_investigation") let row1 = ws.Row(1) row1.[1].Value <- "ONTOLOGY SOURCE REFERENCE" let row2 = ws.Row(2) @@ -195,7 +195,7 @@ let investigationIdentifier = "BII-I-1" let fullInvestigation = let wb = new FsWorkbook() - let ws = wb.InitWorksheet("Investigation") + let ws = wb.InitWorksheet("isa_investigation") let row1 = ws.Row(1) row1.[1].Value <- "ONTOLOGY SOURCE REFERENCE" let row2 = ws.Row(2) @@ -721,4 +721,18 @@ let fullInvestigation = row164.[1].Value <- "#TestRemark4" let row165 = ws.Row(165) row165.[1].Value <- "#TestRemark5" + wb + +let fullInvestigationObsoleteSheetName = + let cp = (fullInvestigation.GetWorksheetByName "isa_investigation").Copy() + cp.Name <- "Investigation" + let wb = new FsWorkbook() + wb.AddWorksheet cp + wb + +let fullInvestigationWrongSheetName = + let cp = (fullInvestigation.GetWorksheetByName "isa_investigation").Copy() + cp.Name <- "Gibberish" + let wb = new FsWorkbook() + wb.AddWorksheet cp wb \ No newline at end of file