Skip to content

Commit

Permalink
Conversion from paired FASTQ to unmapped BAM
Browse files Browse the repository at this point in the history
  • Loading branch information
Geraldine Van der Auwera committed Oct 6, 2017
1 parent ea4463c commit e5eb6e3
Show file tree
Hide file tree
Showing 4 changed files with 136 additions and 1 deletion.
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
BSD 3-Clause License

Copyright (c) 2017, GATK workflows
Copyright (c) 2017, Broad Institute
All rights reserved.

Redistribution and use in source and binary forms, with or without
Expand Down
6 changes: 6 additions & 0 deletions generic.google-papi.options.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"read_from_cache":false,
"default_runtime_attributes": {
"zones": "us-central1-a us-central1-b us-central1-c us-central1-f"
}
}
32 changes: 32 additions & 0 deletions paired-fastq-to-unmapped-bam.inputs.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
{
"ConvertPairedFastQsToUnmappedBamWf.readgroup_list": [
"NA12878_A", "NA12878_B", "NA12878_C"
],
"ConvertPairedFastQsToUnmappedBamWf.metadata": {
"NA12878_A": [
"NA12878", "Solexa-NA12878", "H06HDADXX130110.2.ATCACGAT", "2016-09-01T02:00:00+0200", "illumina", "BI"
],
"NA12878_B": [
"NA12878", "Solexa-NA12878", "H06HDADXX130110.1.ATCACGAT", "2016-09-01T02:00:00+0200", "illumina", "BI"
],
"NA12878_C": [
"NA12878", "Solexa-NA12878", "H06JUADXX130110.1.ATCACGAT", "2016-09-01T02:00:00+0200", "illumina", "BI"
]
},
"ConvertPairedFastQsToUnmappedBamWf.fastq_pairs": {
"NA12878_A": [
"gs://gatk-test-data/wgs_fastq/NA12878_20k/H06HDADXX130110.1.ATCACGAT.20k_reads_1.fastq",
"gs://gatk-test-data/wgs_fastq/NA12878_20k/H06HDADXX130110.1.ATCACGAT.20k_reads_2.fastq"
],
"NA12878_B": [
"gs://gatk-test-data/wgs_fastq/NA12878_20k/H06HDADXX130110.2.ATCACGAT.20k_reads_1.fastq",
"gs://gatk-test-data/wgs_fastq/NA12878_20k/H06HDADXX130110.2.ATCACGAT.20k_reads_2.fastq"
],
"NA12878_C": [
"gs://gatk-test-data/wgs_fastq/NA12878_20k/H06JUADXX130110.1.ATCACGAT.20k_reads_1.fastq",
"gs://gatk-test-data/wgs_fastq/NA12878_20k/H06JUADXX130110.1.ATCACGAT.20k_reads_2.fastq"
]
},
"ConvertPairedFastQsToUnmappedBamWf.PairedFastQsToUnmappedBAM.mem_size": "1 GB",
"ConvertPairedFastQsToUnmappedBamWf.PairedFastQsToUnmappedBAM.disk_size": 200
}
97 changes: 97 additions & 0 deletions paired-fastq-to-unmapped-bam.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
## Copyright Broad Institute, 2017
##
## This WDL converts paired FASTQ to uBAM and adds read group information
##
## Requirements/expectations :
## - Pair-end sequencing data in FASTQ format (one file per orientation)
## - One or more read groups, one per pair of FASTQ files
##
## Outputs :
## - Set of unmapped BAMs, one per read group
##
## Cromwell version support
## - Successfully tested on v24
## - Does not work on versions < v23 due to output syntax
##
## Runtime parameters are optimized for Broad's Google Cloud Platform implementation.
## For program versions, see docker containers.
##
## LICENSING :
## This script is released under the WDL source code license (BSD-3) (see LICENSE in
## https://github.com/broadinstitute/wdl). Note however that the programs it calls may
## be subject to different licenses. Users are responsible for checking that they are
## authorized to run all programs before running this script. Please see the docker
## page at https://hub.docker.com/r/broadinstitute/genomes-in-the-cloud/ for detailed
## licensing information pertaining to the included programs.
# TASK DEFINITIONS
# Convert a pair of FASTQs to uBAM
task PairedFastQsToUnmappedBAM {
File fastq_1
File fastq_2
String readgroup_name
String sample_name
String library_name
String platform_unit
String run_date
String platform_name
String sequencing_center
Int disk_size
String mem_size

command {
java -Xmx3000m -jar /usr/gitc/picard.jar \
FastqToSam \
FASTQ=${fastq_1} \
FASTQ2=${fastq_2} \
OUTPUT=${readgroup_name}.bam \
READ_GROUP_NAME=${readgroup_name} \
SAMPLE_NAME=${sample_name} \
LIBRARY_NAME=${library_name} \
PLATFORM_UNIT=${platform_unit} \
RUN_DATE=${run_date} \
PLATFORM=${platform_name} \
SEQUENCING_CENTER=${sequencing_center}
}
runtime {
docker: "broadinstitute/genomes-in-the-cloud:2.2.4-1469632282"
memory: mem_size
cpu: "1"
disks: "local-disk " + disk_size + " HDD"
}
output {
File output_bam = "${readgroup_name}.bam"
}
}

# WORKFLOW DEFINITION
workflow ConvertPairedFastQsToUnmappedBamWf {
Array[String] readgroup_list
Map[String, Array[File]] fastq_pairs
Map[String, Array[String]] metadata
# Convert multiple pairs of input fastqs in parallel
scatter (readgroup in readgroup_list) {

# Convert pair of FASTQs to uBAM
call PairedFastQsToUnmappedBAM {
input:
fastq_1 = fastq_pairs[readgroup][0],
fastq_2 = fastq_pairs[readgroup][1],
readgroup_name = readgroup,
sample_name = metadata[readgroup][0],
library_name = metadata[readgroup][1],
platform_unit = metadata[readgroup][2],
run_date = metadata[readgroup][3],
platform_name = metadata[readgroup][4],
sequencing_center = metadata[readgroup][5]
}
}
# Outputs that will be retained when execution is complete
output {
Array[File] output_bams = PairedFastQsToUnmappedBAM.output_bam
}
}

0 comments on commit e5eb6e3

Please sign in to comment.