Skip to content

Commit

Permalink
Merge pull request #451 from nfdi4plants/Feature_Enable_Sequence_Base…
Browse files Browse the repository at this point in the history
…d_Metadata_Creation

Feature enable sequence based metadata creation
  • Loading branch information
HLWeil authored Oct 15, 2024
2 parents 8050b1b + 36f78d4 commit 0f9028c
Show file tree
Hide file tree
Showing 13 changed files with 311 additions and 197 deletions.
111 changes: 65 additions & 46 deletions src/Spreadsheet/ArcAssay.fs
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,51 @@ module ArcAssay =
let [<Literal>] obsoleteMetadataSheetName = "Assay"
let [<Literal>] metadataSheetName = "isa_assay"

let fromRows (rows : seq<SparseRow>) =
let hasPrefix =
rows
|> Seq.exists (fun row -> row |> Seq.head |> snd |> fun s -> s.StartsWith(assaysPrefix))
let aPrefix, cPrefix =
if hasPrefix then
Some assaysPrefix, Some contactsPrefix
else None, None
let en = rows.GetEnumerator()
let rec loop lastRow assays contacts rowNumber =

match lastRow with
| Some prefix when prefix = assaysLabel || prefix = obsoleteAssaysLabel ->
let currentRow, rowNumber, _, assays = Assays.fromRows aPrefix (rowNumber + 1) en
loop currentRow assays contacts rowNumber

| Some prefix when prefix = contactsLabel ->
let currentLine, rowNumber, _, contacts = Contacts.fromRows cPrefix (rowNumber + 1) en
loop currentLine assays contacts rowNumber
| _ ->
match assays, contacts with
| [], [] -> ArcAssay.create(Identifier.createMissingIdentifier())
| assays, contacts ->
assays
|> Seq.tryHead
|> Option.defaultValue (ArcAssay.create(Identifier.createMissingIdentifier()))
|> ArcAssay.setPerformers (ResizeArray contacts)

if en.MoveNext () then
let currentLine = en.Current |> SparseRow.tryGetValueAt 0
loop currentLine [] [] 1

else
failwith "empty assay metadata sheet"

let toRows (assay : ArcAssay) =
seq {
yield SparseRow.fromValues [assaysLabel]
yield! Assays.toRows (Some assaysPrefix) [assay]

yield SparseRow.fromValues [contactsLabel]
yield! Contacts.toRows (Some contactsPrefix) (List.ofSeq assay.Performers)
}

let toMetadataSheet (assay : ArcAssay) : FsWorksheet =
let toRows (assay:ArcAssay) =
seq {
yield SparseRow.fromValues [assaysLabel]
yield! Assays.toRows (Some assaysPrefix) [assay]

yield SparseRow.fromValues [contactsLabel]
yield! Contacts.toRows (Some contactsPrefix) (List.ofSeq assay.Performers)
}
let sheet = FsWorksheet(metadataSheetName)
assay
|> toRows
Expand All @@ -33,56 +69,39 @@ module ArcAssay =

let fromMetadataSheet (sheet : FsWorksheet) : ArcAssay =
try
let fromRows (usePrefixes : bool) (rows: seq<SparseRow>) =
let aPrefix,cPrefix =
if usePrefixes then
Some assaysPrefix,Some contactsPrefix
else None,None
let en = rows.GetEnumerator()
let rec loop lastLine assays contacts lineNumber =

match lastLine with

| Some k when k = assaysLabel || k = obsoleteAssaysLabel ->
let currentLine,lineNumber,_,assays = Assays.fromRows aPrefix (lineNumber + 1) en
loop currentLine assays contacts lineNumber

| Some k when k = contactsLabel ->
let currentLine,lineNumber,_,contacts = Contacts.fromRows cPrefix (lineNumber + 1) en
loop currentLine assays contacts lineNumber
| k ->
match assays, contacts with
| [], [] -> ArcAssay.create(Identifier.createMissingIdentifier())
| assays, contacts ->
assays
|> Seq.tryHead
|> Option.defaultValue (ArcAssay.create(Identifier.createMissingIdentifier()))
|> ArcAssay.setPerformers (ResizeArray contacts)

if en.MoveNext () then
let currentLine = en.Current |> SparseRow.tryGetValueAt 0
loop currentLine [] [] 1

else
failwith "empty assay metadata sheet"
let rows =
sheet.Rows
|> Seq.map SparseRow.fromFsRow
let hasPrefix =
rows
|> Seq.exists (fun row -> row |> Seq.head |> snd |> fun s -> s.StartsWith(assaysPrefix))
rows
|> fromRows hasPrefix
|> fromRows
with
| err -> failwithf "Failed while parsing metadatasheet: %s" err.Message

let toMetadataCollection (assay : ArcAssay) =
assay
|> toRows
|> Seq.map (fun row -> SparseRow.getAllValues row)

let fromMetadataCollection (collection : seq<seq<string option>>) : ArcAssay =
try
let rows =
collection
|> Seq.map SparseRow.fromAllValues
rows
|> fromRows
with
| err -> failwithf "Failed while parsing metadatasheet: %s" err.Message

let isMetadataSheetName (name:string) =
let isMetadataSheetName (name : string) =
name = metadataSheetName || name = obsoleteMetadataSheetName

let isMetadataSheet (sheet : FsWorksheet) =
isMetadataSheetName sheet.Name

let tryGetMetadataSheet (doc:FsWorkbook) =
let tryGetMetadataSheet (doc : FsWorkbook) =
doc.GetWorksheets()
|> Seq.tryFind isMetadataSheet

Expand All @@ -92,7 +111,7 @@ module ArcAssayExtensions =
type ArcAssay with

/// Reads an assay from a spreadsheet
static member fromFsWorkbook (doc:FsWorkbook) : ArcAssay =
static member fromFsWorkbook (doc : FsWorkbook) : ArcAssay =
try
// Reading the "Assay" metadata sheet. Here metadata
let assayMetadata =
Expand Down Expand Up @@ -122,7 +141,7 @@ module ArcAssayExtensions =
/// </summary>
/// <param name="assay"></param>
/// <param name="datamapSheet">Default: true</param>
static member toFsWorkbook (assay : ArcAssay, ?datamapSheet: bool) =
static member toFsWorkbook (assay : ArcAssay, ?datamapSheet : bool) =
let datamapSheet = defaultArg datamapSheet true
let doc = new FsWorkbook()
let metadataSheet = ArcAssay.toMetadataSheet (assay)
Expand All @@ -140,5 +159,5 @@ module ArcAssayExtensions =
/// Write an assay to a spreadsheet
///
/// If datamapSheet is true, the datamap will be written to a worksheet inside assay workbook.
member this.ToFsWorkbook (?datamapSheet: bool) =
member this.ToFsWorkbook (?datamapSheet : bool) =
ArcAssay.toFsWorkbook (this, ?datamapSheet = datamapSheet)
97 changes: 52 additions & 45 deletions src/Spreadsheet/ArcInvestigation.fs
Original file line number Diff line number Diff line change
Expand Up @@ -27,28 +27,27 @@ module ArcInvestigation =
let [<Literal>] metadataSheetName = "isa_investigation"
let [<Literal>] obsoleteMetadataSheetName = "Investigation"


type InvestigationInfo =
{
Identifier : string
Title : string
Description : string
SubmissionDate : string
PublicReleaseDate : string
Comments : Comment list
Identifier : string
Title : string
Description : string
SubmissionDate : string
PublicReleaseDate : string
Comments : Comment list
}

static member create identifier title description submissionDate publicReleaseDate comments =
{
Identifier = identifier
Title = title
Description = description
SubmissionDate = submissionDate
PublicReleaseDate = publicReleaseDate
Comments = comments
Identifier = identifier
Title = title
Description = description
SubmissionDate = submissionDate
PublicReleaseDate = publicReleaseDate
Comments = comments
}

static member Labels = [identifierLabel;titleLabel;descriptionLabel;submissionDateLabel;publicReleaseDateLabel]
static member Labels = [identifierLabel; titleLabel; descriptionLabel; submissionDateLabel; publicReleaseDateLabel]

static member FromSparseTable (matrix : SparseTable) =

Expand All @@ -57,51 +56,51 @@ module ArcInvestigation =
let comments =
matrix.CommentKeys
|> List.map (fun k ->
Comment.fromString k (matrix.TryGetValueDefault("",(k,i))))
Comment.fromString k (matrix.TryGetValueDefault("", (k, i))))

InvestigationInfo.create
(matrix.TryGetValueDefault("",(identifierLabel,i)))
(matrix.TryGetValueDefault("",(titleLabel,i)))
(matrix.TryGetValueDefault("",(descriptionLabel,i)))
(matrix.TryGetValueDefault("",(submissionDateLabel,i)))
(matrix.TryGetValueDefault("",(publicReleaseDateLabel,i)))
(matrix.TryGetValueDefault("", (identifierLabel, i)))
(matrix.TryGetValueDefault("", (titleLabel, i)))
(matrix.TryGetValueDefault("", (descriptionLabel, i)))
(matrix.TryGetValueDefault("", (submissionDateLabel, i)))
(matrix.TryGetValueDefault("", (publicReleaseDateLabel, i)))
comments


static member ToSparseTable (investigation: ArcInvestigation) =
static member ToSparseTable (investigation : ArcInvestigation) =
let i = 1
let matrix = SparseTable.Create (keys = InvestigationInfo.Labels,length=2)
let mutable commentKeys = []

do matrix.Matrix.Add ((identifierLabel,i), (investigation.Identifier))
do matrix.Matrix.Add ((titleLabel,i), (Option.defaultValue "" investigation.Title))
do matrix.Matrix.Add ((descriptionLabel,i), (Option.defaultValue "" investigation.Description))
do matrix.Matrix.Add ((submissionDateLabel,i), (Option.defaultValue "" investigation.SubmissionDate))
do matrix.Matrix.Add ((publicReleaseDateLabel,i), (Option.defaultValue "" investigation.PublicReleaseDate))
do matrix.Matrix.Add ((identifierLabel, i), (investigation.Identifier))
do matrix.Matrix.Add ((titleLabel, i), (Option.defaultValue "" investigation.Title))
do matrix.Matrix.Add ((descriptionLabel, i), (Option.defaultValue "" investigation.Description))
do matrix.Matrix.Add ((submissionDateLabel, i), (Option.defaultValue "" investigation.SubmissionDate))
do matrix.Matrix.Add ((publicReleaseDateLabel, i), (Option.defaultValue "" investigation.PublicReleaseDate))

investigation.Comments
|> ResizeArray.iter (fun comment ->
let n,v = comment |> Comment.toString
let n, v = comment |> Comment.toString
commentKeys <- n :: commentKeys
matrix.Matrix.Add((n,i),v)
matrix.Matrix.Add((n, i), v)
)

{matrix with CommentKeys = commentKeys |> List.distinct |> List.rev}


static member fromRows lineNumber (rows : IEnumerator<SparseRow>) =
SparseTable.FromRows(rows,InvestigationInfo.Labels,lineNumber)
|> fun (s,ln,rs,sm) -> (s,ln,rs, InvestigationInfo.FromSparseTable sm)
SparseTable.FromRows(rows, InvestigationInfo.Labels, lineNumber)
|> fun (s, ln, rs, sm) -> (s, ln, rs, InvestigationInfo.FromSparseTable sm)

static member toRows (investigation : ArcInvestigation) =
investigation
|> InvestigationInfo.ToSparseTable
|> SparseTable.ToRows

let fromParts (investigationInfo:InvestigationInfo) (ontologySourceReference:OntologySourceReference list) (publications: Publication list) (contacts: Person list) (studies: ArcStudy list) (assays: ArcAssay list) (remarks: Remark list) =
let fromParts (investigationInfo : InvestigationInfo) (ontologySourceReference : OntologySourceReference list) (publications : Publication list) (contacts : Person list) (studies : ArcStudy list) (assays : ArcAssay list) (remarks : Remark list) =
let studyIdentifiers = studies |> List.map (fun s -> s.Identifier)
ArcInvestigation.make
(investigationInfo.Identifier)
investigationInfo.Identifier
(Option.fromValueWithDefault "" investigationInfo.Title)
(Option.fromValueWithDefault "" investigationInfo.Description)
(Option.fromValueWithDefault "" investigationInfo.SubmissionDate)
Expand All @@ -115,8 +114,7 @@ module ArcInvestigation =
(ResizeArray investigationInfo.Comments)
(ResizeArray remarks)


let fromRows (rows:seq<SparseRow>) =
let fromRows (rows : seq<SparseRow>) =
let en = rows.GetEnumerator()

let emptyInvestigationInfo = InvestigationInfo.create "" "" "" "" "" []
Expand All @@ -125,7 +123,7 @@ module ArcInvestigation =
match lastLine with

| Some k when k = ontologySourceReferenceLabel ->
let currentLine,lineNumber,newRemarks,ontologySourceReferences = OntologySourceReference.fromRows (lineNumber + 1) en
let currentLine, lineNumber, newRemarks, ontologySourceReferences = OntologySourceReference.fromRows (lineNumber + 1) en
loop currentLine ontologySourceReferences investigationInfo publications contacts studies (List.append remarks newRemarks) lineNumber

| Some k when k = investigationLabel ->
Expand All @@ -137,21 +135,21 @@ module ArcInvestigation =
loop currentLine ontologySourceReferences investigationInfo publications contacts studies (List.append remarks newRemarks) lineNumber

| Some k when k = contactsLabel ->
let currentLine,lineNumber,newRemarks,contacts = Contacts.fromRows (Some contactsLabelPrefix) (lineNumber + 1) en
let currentLine,lineNumber, newRemarks, contacts = Contacts.fromRows (Some contactsLabelPrefix) (lineNumber + 1) en
loop currentLine ontologySourceReferences investigationInfo publications contacts studies (List.append remarks newRemarks) lineNumber

| Some k when k = studyLabel ->
let currentLine,lineNumber,newRemarks,study = Studies.fromRows (lineNumber + 1) en
let currentLine, lineNumber, newRemarks, study = Studies.fromRows (lineNumber + 1) en
if study.IsSome then
loop currentLine ontologySourceReferences investigationInfo publications contacts (study.Value::studies) (List.append remarks newRemarks) lineNumber
else
loop currentLine ontologySourceReferences investigationInfo publications contacts studies (List.append remarks newRemarks) lineNumber

| k ->
let studies,assays =
| _ ->
let studies, assays =
studies
|> List.unzip
|> fun (s,a) ->
|> fun (s, a) ->
s |> List.rev,
a |> List.concat |> List.distinctBy (fun a -> a.Identifier)
fromParts investigationInfo ontologySourceReferences publications contacts studies assays remarks
Expand All @@ -164,8 +162,8 @@ module ArcInvestigation =
failwith "emptyInvestigationFile"


let toRows (investigation:ArcInvestigation) : seq<SparseRow> =
let insertRemarks (remarks:Remark list) (rows:seq<SparseRow>) =
let toRows (investigation : ArcInvestigation) : seq<SparseRow> =
let insertRemarks (remarks : Remark list) (rows : seq<SparseRow>) =
try
let rm = remarks |> List.map Remark.toTuple |> Map.ofList
let rec loop i l nl =
Expand Down Expand Up @@ -202,13 +200,22 @@ module ArcInvestigation =
|> insertRemarks (List.ofSeq investigation.Remarks)
|> seq

let toMetadataCollection (investigation : ArcInvestigation) =
toRows investigation
|> Seq.map (fun row -> SparseRow.getAllValues row)

let fromMetadataCollection (collection : seq<seq<string option>>) =
collection
|> Seq.map SparseRow.fromAllValues
|> fromRows

let isMetadataSheetName (name : string) =
name = metadataSheetName || name = obsoleteMetadataSheetName

let isMetadataSheet (sheet : FsWorksheet) =
isMetadataSheetName sheet.Name

let tryGetMetadataSheet (doc:FsWorkbook) =
let tryGetMetadataSheet (doc : FsWorkbook) =
doc.GetWorksheets()
|> Seq.tryFind isMetadataSheet

Expand All @@ -220,7 +227,7 @@ module ArcInvestigationExtensions =

type ArcInvestigation with

static member fromFsWorkbook (doc:FsWorkbook) =
static member fromFsWorkbook (doc : FsWorkbook) =
try
match ArcInvestigation.tryGetMetadataSheet doc with
| Some sheet -> sheet
Expand All @@ -231,7 +238,7 @@ module ArcInvestigationExtensions =
with
| err -> failwithf "Could not read investigation from spreadsheet: %s" err.Message

static member toFsWorkbook (investigation:ArcInvestigation) : FsWorkbook =
static member toFsWorkbook (investigation : ArcInvestigation) : FsWorkbook =
try
let wb = new FsWorkbook()
let sheet = FsWorksheet(metadataSheetName)
Expand Down
Loading

0 comments on commit 0f9028c

Please sign in to comment.