Skip to content

Commit

Permalink
Merged thanhleviet's changes from PR galaxyproject#2575 here
Browse files Browse the repository at this point in the history
  • Loading branch information
Slugger70 committed Sep 11, 2019
2 parents ae13a8c + 31d02e0 commit ced4edf
Show file tree
Hide file tree
Showing 6 changed files with 151 additions and 33 deletions.
34 changes: 31 additions & 3 deletions tools/snippy/snippy-core.xml
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,39 @@
</macros>
<expand macro="requirements" />
<command detect_errors="exit_code"><![CDATA[
#if $reference_source.reference_source_selector == 'history'
ln -sf '$reference_source.ref_file' 'ref' &&
#elif $reference_source.reference_source_selector == 'cached'
ln -sf '$reference_source.ref_file.fields.path' 'ref' &&
#end if
#for $indir in $indirs
#set $sample_name = os.path.splitext(os.path.basename(str($indir.name)))[0]
mkdir '$sample_name' && tar -xf '$indir' -C '$sample_name' --strip-components=1 &&
#end for
#set snippy_dirs = " ".join(["'{0}'".format(os.path.splitext(os.path.basename(str($indir.name)))[0]) for $indir in $indirs])
snippy-core
--ref '$ref'
--ref 'ref'
${snippy_dirs}
]]></command>

<inputs>
<param name="indirs" type="data" multiple="true" format="zip" label="Snippy input zipped dirs" help="Select all the snippy inputs for alignment" />
<param name="ref" type="data" format="fasta,genbank" label="Reference File (either in fasta or genbank format)" help="Fasta or Genbank file to use as the reference" />
<conditional name="reference_source">
<param name="reference_source_selector" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options. See `Indexes` section of help below. If you would like to perform self-mapping select `history` here, then choose your input file as reference.">
<option value="cached">Use a built-in genome index</option>
<option value="history">Use a genome from history and build index</option>
</param>
<when value="cached">
<param name="ref_file" type="select" label="Using reference genome" help="Select genome from the list">
<options from_data_table="all_fasta">
<validator type="no_options" message="No reference genomes are available" />
</options>
</param>
</when>
<when value="history">
<param name="ref_file" type="data" format="fasta,genbank" label="Use the following dataset as the reference sequence" help="You can upload a FASTA or FASTQ sequence to the history and use it as reference" />
</when>
</conditional>
<param name="outputs" type="select" multiple="true" display="checkboxes" label="Output selection">
<option value="outaln" selected="True">A core SNP alignment in the fasta format</option>
<option value="outfull" selected="False">A whole genome SNP alignment (includes invariant sites)</option>
Expand Down Expand Up @@ -48,7 +68,15 @@
<tests>
<test><!-- Test #1 - test with 3 zipped directories -->
<param name="indirs" value="a.tgz,b.tgz,c.tgz" />
<param name="ref" value="reference.fasta" />
<param name="reference_source|reference_source_selector" value="history"/>
<param name="reference_source|ref_file" value="reference.fasta" ftype="fasta"/>
<param name="outputs" value="outtxt" />
<output name="alignment_summary" ftype="txt" file="a_b_c.core.txt" />
</test>
<test><!-- Test #2 - test with 3 zipped directories -->
<param name="indirs" value="a.tgz,b.tgz,c.tgz" />
<param name="reference_source|reference_source_selector" value="cached"/>
<param name="reference_source|ref_file" value="test_id"/>
<param name="outputs" value="outtxt" />
<output name="alignment_summary" ftype="txt" file="a_b_c.core.txt" />
</test>
Expand Down
96 changes: 66 additions & 30 deletions tools/snippy/snippy.xml
Original file line number Diff line number Diff line change
@@ -1,31 +1,26 @@
<tool id="snippy" name="snippy" version="@VERSION@+galaxy3">
<description>
Snippy finds SNPs between a haploid reference genome and your NGS sequence reads.
</description>
<macros>
<import>macros.xml</import>
</macros>
<expand macro="requirements" />
<expand macro="version_command" />
</description>
<macros>
<import>macros.xml</import>
</macros>
<expand macro="requirements" />
<expand macro="version_command" />

<command detect_errors="exit_code"><![CDATA[
#if $ref.is_of_type("fasta")
cp '$ref' 'ref.fna' &&
#end if
#if $ref.is_of_type("genbank")
cp '$ref' 'ref.gbk' &&
#if $reference_source.reference_source_selector == 'history'
ln -sf '$reference_source.ref_file' 'ref' &&
#elif $reference_source.reference_source_selector == 'cached'
ln -sf '$reference_source.ref_file.fields.path' 'ref' &&
#end if
snippy
--outdir 'out'
--cpus \${GALAXY_SLOTS:-1}
--ram \$((\${GALAXY_MEMORY_MB:-4096}/1024))
#if $ref.is_of_type("fasta")
--ref 'ref.fna'
#end if
#if $ref.is_of_type("genbank")
--ref 'ref.gbk'
#end if
--ref 'ref'
--mapqual $adv.mapqual
--mincov $adv.mincov
--minfrac $adv.minfrac
Expand Down Expand Up @@ -69,12 +64,26 @@
#end if
]]></command>
]]> </command>

<inputs>

<param name="ref" type="data" format="fasta,genbank" label="Reference File (either in fasta or genbank format)" help="Fasta or Genbank file to use as the reference" />

<conditional name="reference_source">
<param name="reference_source_selector" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options. See `Indexes` section of help below. If you would like to perform self-mapping select `history` here, then choose your input file as reference.">
<option value="cached">Use a built-in genome index</option>
<option value="history">Use a genome from history and build index</option>
</param>
<when value="cached">
<param name="ref_file" type="select" label="Using reference genome" help="Select genome from the list">
<options from_data_table="all_fasta">
<validator type="no_options" message="No reference genomes are available" />
</options>
</param>
</when>
<when value="history">
<param name="ref_file" type="data" format="fasta" label="Use the following dataset as the reference sequence" help="You can upload a FASTA or FASTQ sequence to the history and use it as reference" />
</when>
</conditional>
<conditional name="fastq_input">
<param name="fastq_input_selector" type="select" label="Single or Paired-end reads" help="Select between paired and single end data">
<option value="paired">Paired</option>
Expand Down Expand Up @@ -155,8 +164,12 @@

<tests>

<test> <!-- test 0 - fasta ref no snps -->
<param name="ref" value="reference.fasta" ftype="fasta" />
<test> <!-- test 0 - fasta ref no snps -->
<!-- <param name="ref" value="reference.fasta" ftype="fasta" /> -->
<conditional name="reference_source">
<param name="reference_source_selector" value="history"/>
<param name="ref_file" value="reference.fasta" ftype="fasta"/>
</conditional>
<param name="fastq_input_selector" value="paired" />
<param name="fastq_input1" ftype="fastqsanger" value="a_1.fastq" />
<param name="fastq_input2" ftype="fastqsanger" value="a_2.fastq" />
Expand All @@ -167,8 +180,11 @@
<output name="snpgff" ftype="gff3" file="a_fna_ref_mincov_2_minqual_60.snps.gff" />
</test>

<test> <!-- test 1 - fasta ref one snp -->
<param name="ref" value="reference.fasta" ftype="fasta" />
<test> <!-- test 1 - fasta ref one snp -->
<conditional name="reference_source">
<param name="reference_source_selector" value="history"/>
<param name="ref_file" value="reference.fasta" ftype="fasta"/>
</conditional>
<param name="fastq_input_selector" value="paired" />
<param name="fastq_input1" ftype="fastqsanger" value="b_1.fastq" />
<param name="fastq_input2" ftype="fastqsanger" value="b_2.fastq" />
Expand All @@ -179,8 +195,11 @@
<output name="snpgff" ftype="gff3" file="b_fna_ref_mincov_2_minqual_60.snps.gff" />
</test>

<test> <!-- test 2 - fasta ref one snp paired_collection -->
<param name="ref" value="reference.fasta" ftype="fasta" />
<test> <!-- test 2 - fasta ref one snp paired_collection -->
<conditional name="reference_source">
<param name="reference_source_selector" value="history"/>
<param name="ref_file" value="reference.fasta" ftype="fasta"/>
</conditional>
<param name="fastq_input_selector" value="paired_collection" />
<param name="fastq_input">
<collection type="paired">
Expand All @@ -195,8 +214,25 @@
<output name="snpgff" ftype="gff3" file="b_fna_ref_mincov_2_minqual_60.snps.gff" />
</test>

<test> <!-- test 3 - fasta ref one snp single -->
<param name="ref" value="reference.fasta" ftype="fasta" />
<test> <!-- test 3 - fasta ref one snp single -->
<conditional name="reference_source">
<param name="reference_source_selector" value="history"/>
<param name="ref_file" value="reference.fasta" ftype="fasta"/>
</conditional>
<param name="fastq_input_selector" value="single" />
<param name="fastq_input_single" value="b_2.fastq" ftype="fastqsanger" />
<param name="mincov" value="2" />
<param name="minqual" value="60" />
<param name="outputs" value="outgff,outsum" />
<output name="snpsum" ftype="tabular" file="b_fna_ref_mincov_2_minqual_60.snps.txt" lines_diff="6" />
<output name="snpgff" ftype="gff3" file="b_2_fna_ref_mincov_2_minqual_60.snps.gff" />
</test>

<test> <!-- test 4 - reference source as cached -->
<conditional name="reference_source">
<param name="reference_source_selector" value="cached"/>
<param name="ref_file" value="test_id"/>
</conditional>
<param name="fastq_input_selector" value="single" />
<param name="fastq_input_single" value="b_2.fastq" ftype="fastqsanger" />
<param name="mincov" value="2" />
Expand Down Expand Up @@ -243,7 +279,7 @@ If the reference file is supplied in genbank format, snpeff will be called to de
For a much more in depth description of snippy and how it works, see https://github.com/tseemann/snippy
]]></help>
<expand macro="citations"/>
]]> </help>
<expand macro="citations"/>

</tool>
20 changes: 20 additions & 0 deletions tools/snippy/test-data/all_fasta.loc
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#This file lists the locations and dbkeys of all the fasta files
#under the "genome" directory (a directory that contains a directory
#for each build). The script extract_fasta.py will generate the file
#all_fasta.loc. This file has the format (white space characters are
#TAB characters):
#
#<unique_build_id> <dbkey> <display_name> <file_path>
#
#So, all_fasta.loc could look something like this:
#
#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa
#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa
#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa
#
#Your all_fasta.loc file should contain an entry for each individual
#fasta file. So there will be multiple fasta files for each build,
#such as with hg19 above.
#
test_id test_dbkey test display name ${__HERE__}/ref.fna

18 changes: 18 additions & 0 deletions tools/snippy/tool-data/all_fasta.loc
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#This file lists the locations and dbkeys of all the fasta files
#under the "genome" directory (a directory that contains a directory
#for each build). The script extract_fasta.py will generate the file
#all_fasta.loc. This file has the format (white space characters are
#TAB characters):
#
#<unique_build_id> <dbkey> <display_name> <file_path>
#
#So, all_fasta.loc could look something like this:
#
#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa
#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa
#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa
#
#Your all_fasta.loc file should contain an entry for each individual
#fasta file. So there will be multiple fasta files for each build,
#such as with hg19 above.
#
8 changes: 8 additions & 0 deletions tools/snippy/tool_data_table_conf.xml.sample
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc-->
<tables>
<!-- Locations of all fasta files under genome directory -->
<table name="all_fasta" comment_char="#" allow_duplicate_entries="False">
<columns>value, dbkey, name, path</columns>
<file path="tool-data/all_fasta.loc" />
</table>
</tables>
8 changes: 8 additions & 0 deletions tools/snippy/tool_data_table_conf.xml.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc-->
<tables>
<!-- Locations of all fasta files under genome directory -->
<table name="all_fasta" comment_char="#" allow_duplicate_entries="False">
<columns>value, dbkey, name, path</columns>
<file path="${__HERE__}/test-data/all_fasta.loc" />
</table>
</tables>

0 comments on commit ced4edf

Please sign in to comment.