Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Standardize isa sheet name parsing #180

Merged
merged 2 commits into from
Sep 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 17 additions & 19 deletions src/ISA/ISA.Spreadsheet/ARCtrl.ISA.Spreadsheet.fsproj
Original file line number Diff line number Diff line change
Expand Up @@ -11,33 +11,31 @@
<None Include="paket.references" />
<Compile Include="AssemblyInfo.fs" />
<Compile Include="CollectionAux.fs" />
<Compile Include="Conversions.fs" />
<Compile Include="Comment.fs" />
<Compile Include="SparseTable.fs" />
<Compile Include="InvestigationFile\Contacts.fs" />
<Compile Include="InvestigationFile\DesignDescriptors.fs" />
<Compile Include="InvestigationFile\Factors.fs" />
<Compile Include="InvestigationFile\Protocols.fs" />
<Compile Include="InvestigationFile\Publication.fs" />
<Compile Include="InvestigationFile\Assays.fs" />
<Compile Include="InvestigationFile\Study.fs" />
<Compile Include="InvestigationFile\OntologySourceReference.fs" />
<Compile Include="InvestigationFile\Investigation.fs" />
<Compile Include="CompositeHeader.fs" />
<Compile Include="CompositeCell.fs" />
<Compile Include="CompositeColumn.fs" />
<Compile Include="ArcTable.fs" />
<Compile Include="Metadata\Conversions.fs" />
<Compile Include="Metadata\Comment.fs" />
<Compile Include="Metadata\SparseTable.fs" />
<Compile Include="Metadata\Contacts.fs" />
<Compile Include="Metadata\DesignDescriptors.fs" />
<Compile Include="Metadata\Factors.fs" />
<Compile Include="Metadata\Protocols.fs" />
<Compile Include="Metadata\Publication.fs" />
<Compile Include="Metadata\Assays.fs" />
<Compile Include="Metadata\Study.fs" />
<Compile Include="Metadata\OntologySourceReference.fs" />
<Compile Include="AnnotationTable\CompositeHeader.fs" />
<Compile Include="AnnotationTable\CompositeCell.fs" />
<Compile Include="AnnotationTable\CompositeColumn.fs" />
<Compile Include="AnnotationTable\ArcTable.fs" />
<Compile Include="ArcAssay.fs" />
<Compile Include="ArcStudy.fs" />
<Compile Include="ArcInvestigation.fs" />
<None Include="../../../build/logo.png" Pack="true" PackagePath="\" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\ISA\ARCtrl.ISA.fsproj" />
<ProjectReference Include="..\..\FileSystem\ARCtrl.FileSystem.fsproj" />
</ItemGroup>
<ItemGroup>
<Content Include="*.fsproj; **\*.fs; **\*.fsi" PackagePath="fable\" />
</ItemGroup>
<ItemGroup />
<PropertyGroup>
<Authors>nfdi4plants, Lukas Weil</Authors>
<Description>ARC and ISA xlsx compliant parser for experimental metadata toolkit in F#. This project is meant as an easy means to open, manipulate and save ISA (Investigation,Study,Assay) metadata files in isa-xlsx format.</Description>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,10 @@ module ArcInvestigation =
let [<Literal>] publicationsLabelPrefix = "Investigation Publication"
let [<Literal>] contactsLabelPrefix = "Investigation Person"


let [<Literal>] metaDataSheetName = "isa_investigation"
let [<Literal>] metaDataSheetName_deprecated = "Investigation"


type InvestigationInfo =
{
Identifier : string
Expand Down Expand Up @@ -191,8 +194,12 @@ module ArcInvestigation =

let fromFsWorkbook (doc:FsWorkbook) =
try
doc.GetWorksheets()
|> Seq.head
match doc.TryGetWorksheetByName metaDataSheetName with
| Some sheet -> sheet
| None ->
match doc.TryGetWorksheetByName metaDataSheetName_deprecated with
| Some sheet -> sheet
| None -> failwith "Could not find metadata sheet with sheetname \"isa_investigation\" or deprecated sheetname \"Investigation\""
|> FsWorksheet.getRows
|> Seq.map SparseRow.fromFsRow
|> fromRows
Expand All @@ -202,7 +209,7 @@ module ArcInvestigation =
let toFsWorkbook (investigation:ArcInvestigation) : FsWorkbook =
try
let wb = new FsWorkbook()
let sheet = FsWorksheet("Investigation")
let sheet = FsWorksheet(metaDataSheetName)
investigation
|> toRows
|> Seq.iteri (fun rowI r -> SparseRow.writeToSheet (rowI + 1) r sheet)
Expand Down
45 changes: 0 additions & 45 deletions src/ISA/ISA.Spreadsheet/CollectionAux.fs
Original file line number Diff line number Diff line change
Expand Up @@ -11,51 +11,6 @@ module Seq =
with
| _ -> None

/// Iterates over elements of the input sequence and groups adjacent elements.
/// A new group is started when the specified predicate holds about the element
/// of the sequence (and at the beginning of the iteration).
///
/// For example:
/// Seq.groupWhen isOdd [3;3;2;4;1;2] = seq [[3]; [3; 2; 4]; [1; 2]]
let private groupWhen (withOverlap : bool) predicate (input:seq<'a>) =
use en = input.GetEnumerator()

let rec loop cont =
if en.MoveNext() then
let temp = en.Current
if predicate temp then

loop (fun y ->
cont
( match y with
| h::t when withOverlap -> [temp]::(temp::h)::t
| h::t -> []::(temp::h)::t
//| h::t -> [temp]::(h)::t
| [] -> [[temp]]
)
)
else
loop (fun y ->
cont
( match y with
| h::t -> (temp::h)::t
| [] -> [[temp]]
)
)
else
cont []
// Remove when first element is empty due to "[]::(temp::h)::t"
let tmp:seq<seq<'a>> =
match (loop id) with
| h::t -> match h with
| [x] when predicate x && withOverlap -> t
| [] -> t
| _ -> h::t
| [] -> []
|> Seq.cast

tmp

module internal Array =

let ofIndexedSeq (s : seq<int*string>) =
Expand Down
24 changes: 0 additions & 24 deletions src/ISA/ISA.Spreadsheet/InvestigationFile/SparseRow.fs

This file was deleted.

58 changes: 39 additions & 19 deletions tests/ISA/ISA.Spreadsheet.Tests/InvestigationFileTests.fs
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,6 @@ let private testInvestigationWriterComponents =

let private testInvestigationFile =



testList "InvestigationXLSXTests" [
testCase "ReaderSuccess" (fun () ->

Expand All @@ -72,7 +70,29 @@ let private testInvestigationFile =
Expect.isOk readingSuccess (Result.getMessage readingSuccess)

)
testCase "ReaderSuccessDeprecatedSheetName" (fun () ->

let readingSuccess =
try
ArcInvestigation.fromFsWorkbook TestObjects.Investigation.fullInvestigationObsoleteSheetName |> ignore
Result.Ok "DidRun"
with
| err -> Result.Error(sprintf "Reading the test file failed: %s" err.Message)

Expect.isOk readingSuccess (Result.getMessage readingSuccess)

)
testCase "ReaderFailureWrongSheetName" (fun () ->

let readingSuccess =
try
ArcInvestigation.fromFsWorkbook TestObjects.Investigation.fullInvestigationWrongSheetName |> ignore
Result.Ok "DidRun"
with
| err -> Result.Error(sprintf "Reading the test file failed: %s" err.Message)

Expect.isError readingSuccess "Reading the investigation file should fail if the sheet name is wrong"
)
testCase "WriterSuccess" (fun () ->

let i = ArcInvestigation.fromFsWorkbook TestObjects.Investigation.fullInvestigation
Expand All @@ -90,13 +110,13 @@ let private testInvestigationFile =
testCase "OutputMatchesInput" (fun () ->

let i =
TestObjects.Investigation.fullInvestigation.GetWorksheetByName "Investigation"
TestObjects.Investigation.fullInvestigation.GetWorksheetByName "isa_investigation"

let o =
TestObjects.Investigation.fullInvestigation
|> ArcInvestigation.fromFsWorkbook
|> ArcInvestigation.toFsWorkbook
|> fun wb -> wb.GetWorksheetByName "Investigation"
|> fun wb -> wb.GetWorksheetByName "isa_investigation"

Expect.workSheetEqual o i "Written investigation file does not match read investigation file"
)
Expand Down Expand Up @@ -133,23 +153,23 @@ let private testInvestigationFile =
Expect.isOk writingSuccess (Result.getMessage writingSuccess)
)

testCase "OutputMatchesInputEmpty" (fun () ->
//testCase "OutputMatchesInputEmpty" (fun () ->

let i =
TestObjects.Investigation.emptyInvestigation.GetWorksheetByName "Investigation"
|> fun ws -> ws.Rows
|> Seq.map (fun r -> r.Cells |> Seq.map (fun c -> c.Value) |> Seq.reduce (fun a b -> a + b))
let o =
TestObjects.Investigation.emptyInvestigation
|> ArcInvestigation.fromFsWorkbook
|> ArcInvestigation.toFsWorkbook
|> fun wb -> wb.GetWorksheetByName "Investigation"
|> fun ws -> ws.Rows
|> Seq.map (fun r -> r.Cells |> Seq.map (fun c -> c.Value) |> Seq.reduce (fun a b -> a + b))
// let i =
// TestObjects.Investigation.emptyInvestigation.GetWorksheetByName "isa_investigation"
// |> fun ws -> ws.Rows
// |> Seq.map (fun r -> r.Cells |> Seq.map (fun c -> c.Value) |> Seq.reduce (fun a b -> a + b))
// let o =
// TestObjects.Investigation.emptyInvestigation
// |> ArcInvestigation.fromFsWorkbook
// |> ArcInvestigation.toFsWorkbook
// |> fun wb -> wb.GetWorksheetByName "isa_investigation"
// |> fun ws -> ws.Rows
// |> Seq.map (fun r -> r.Cells |> Seq.map (fun c -> c.Value) |> Seq.reduce (fun a b -> a + b))


mySequenceEqual o i "Written empty investigation file does not match read empty investigation file"
)
// mySequenceEqual o i "Written empty investigation file does not match read empty investigation file"
//)
]
|> testSequenced

Expand Down Expand Up @@ -529,5 +549,5 @@ let private testInvestigationFile =
let main =
testList "InvestigationFile" [
testInvestigationWriterComponents
//testInvestigationFile
testInvestigationFile
]
18 changes: 16 additions & 2 deletions tests/ISA/ISA.Spreadsheet.Tests/TestObjects/InvestigationFile.fs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ open FsSpreadsheet

let emptyInvestigation =
let wb = new FsWorkbook()
let ws = wb.InitWorksheet("Investigation")
let ws = wb.InitWorksheet("isa_investigation")
let row1 = ws.Row(1)
row1.[1].Value <- "ONTOLOGY SOURCE REFERENCE"
let row2 = ws.Row(2)
Expand Down Expand Up @@ -195,7 +195,7 @@ let investigationIdentifier = "BII-I-1"

let fullInvestigation =
let wb = new FsWorkbook()
let ws = wb.InitWorksheet("Investigation")
let ws = wb.InitWorksheet("isa_investigation")
let row1 = ws.Row(1)
row1.[1].Value <- "ONTOLOGY SOURCE REFERENCE"
let row2 = ws.Row(2)
Expand Down Expand Up @@ -721,4 +721,18 @@ let fullInvestigation =
row164.[1].Value <- "#TestRemark4"
let row165 = ws.Row(165)
row165.[1].Value <- "#TestRemark5"
wb

let fullInvestigationObsoleteSheetName =
let cp = (fullInvestigation.GetWorksheetByName "isa_investigation").Copy()
cp.Name <- "Investigation"
let wb = new FsWorkbook()
wb.AddWorksheet cp
wb

let fullInvestigationWrongSheetName =
let cp = (fullInvestigation.GetWorksheetByName "isa_investigation").Copy()
cp.Name <- "Gibberish"
let wb = new FsWorkbook()
wb.AddWorksheet cp
wb