tools/cemitool/cemitool.xml

<tool id="cemitool" name="CEMiTool" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05">
    <description>gene co-expression network analyses</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro='xrefs'/>
    <expand macro="requirements" />
    <command detect_errors="exit_code"><![CDATA[
        Rscript '$__tool_directory__/CEMiTool.R'
            -M '$expression_matrix'
        #if $annotation
            -A '$annotation'
        #end if
        #if $pathways
            -P '$pathways'
        #end if
        #if $interactions
            -I '$interactions'
        #end if
        -f $advanced_parameters.filter
        -i $advanced_parameters.filter_pval
        -a $advanced_parameters.apply_vst
        -n $advanced_parameters.n_genes
        -e $advanced_parameters.eps
        -c $advanced_parameters.cor_method
        -y $advanced_parameters.cor_function
        -x $advanced_parameters.network_type
        -t $advanced_parameters.tom_type
        -m $advanced_parameters.merge_similar
        -r $advanced_parameters.rank_method
        -g $advanced_parameters.min_ngen
        -d $advanced_parameters.diss_thresh
        -h $advanced_parameters.center_func
        -o $advanced_parameters.ora_pval
        -l $advanced_parameters.gsea_scale
        -w $advanced_parameters.gsea_min_size
        -z $advanced_parameters.gsea_max_size
        -v $advanced_parameters.sample_column_name
    ]]></command>
    <inputs>
        <param name="expression_matrix" type="data" format="tabular" label="Expression matrix"/>
        <param name="annotation" type="data" format="tabular" optional="true" label="Sample annotation" 
            help="It allows to build a more complete object and generate richer reports about the 
                expression data. Sample annotation can be supplied in a data.frame that specifies a 
                class for each sample. Classes can represent different conditions, phenotypes, cell lines, 
                time points, etc. " />
        <param name="pathways" type="data" format="tabular,txt" label="Pathways list" optional="true" help="CEMiTool can 
            determine which biological functions are associated with the modules by performing an over 
            representation analysis (ORA). To do this you must provide a pathway list in the form of GMT 
            file. CEMiTool will then analyze how these pathways are represented in the modules." />
        <param name="interactions" type="data" format="tabular" optional="true" label="Interactions data" help="Interaction data, 
            such as protein-protein interactions can be added in order to generate annotated module graphs. 
            Interaction files contains two columns for interacting pairs of genes"/>
        <param name="outputs" type="select" multiple="true" display="checkboxes" label="Outputs selector">
            <option value="report" selected="true">Report</option>
            <option value="tables">Tables</option>
            <option value="plots">Plots</option>
        </param>
        <section name="advanced_parameters" title="Advanced parameters">
            <param argument="filter" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="True" label="Filter" help="If enabled, it will filter expression data" />
            <param argument="filter_pval" type="float" min="0" max="1" value="0.1" label="Filter P-value" help="P-value threshold for filtering" />
            <param argument="apply_vst" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="VST" help="If enabled, it apply Variance Stabilizing Transform before filtering genes" />
            <param argument="n_genes" type="integer" min="0" value="1000" label="Number of genes" help="Number of genes left after filtering" />
            <param argument="eps" type="float" min="0" max="1" value="0.1" label="EPS" help="A value for accepted R-squared interval between subsequent beta values" />
            <param name="cor_method" type="select" label="Correlation method" help="Correlation coefficient to be computed">
                <option value="pearson">Pearson</option>
                <option value="spearman">Spearman</option>
            </param>
            <param name="cor_function" type="select" label="Correlation function" help="Correlation function to be used">
                <option value="cor">Correlation (cor)</option>
                <option value="bicor">Biweight Midcorrelation (bicor)</option>
            </param>
            <param name="network_type" type="select" label="Network type" help="Indicates if network type should be computed as 'signed' or 'unsigned'">
                <option value="signed">Signed</option>
                <option value="unsigned" selected="true">Unsigned</option>
            </param>
            <param name="tom_type" type="select" label="TOM type" help="Indicates if TOM type should be computed as 'signed' or 'unsigned'">
                <option value="signed">Signed</option>
                <option value="unsigned">Unsigned</option>
            </param>
            <param argument="merge_similar" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Merge similar" help="If enabled, merge similar modules" />
            <param argument="rank_method" type="select" label="Rank method" help="Indicate how to rank genes">
                <option value="mean">Mean</option>
                <option value="median">Median</option>
            </param>
            <param argument="min_ngen" type="integer" min="0" value="30" label="Minimal number of genes per module"/>
            <param argument="diss_thresh" type="float" min="0" max="1" value="0.8" label="Merging correlation threshold" help="Module merging correlation threshold for eigengene similarity" />
            <param argument="center_func" type="select" label="Cetrality measure to show in the plot">
                <option value="mean">Mean</option>
                <option value="median">Median</option>
            </param>
            <param argument="ora_pval" type="float" min="0" max="1" value="0.05" label="P-value for overrepresentation analysis"/>
            <param argument="gsea_scale" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Z-score transformation for GSEA analysis" help="If enabled, it applies z-score transformation" />
            <param argument="gsea_min_size" type="integer" min="0" value="15" label="Minimum size of gene sets for GSEA analysis"/>
            <param argument="gsea_max_size" type="integer" min="0" value="1000" label="Maximum size of gene sets for GSEA analysis"/>
            <param argument="sample_column_name" type="text" value="SampleName" label="Sample column name" help="This is only required if an annotation file is provided">
                <sanitizer invalid_char="">
                    <valid initial="string.letters,string.digits">
                        <add value="_" />
                        <add value="-" />
                        <add value=":" />
                    </valid>
                </sanitizer>
                <validator type="regex">[0-9a-zA-Z:-_]+</validator>
            </param>
        </section>
    </inputs>
    <outputs>
        <collection name="plots" type="list" label="${tool.name} on ${on_string}: Plots">
            <discover_datasets pattern="__designation_and_ext__" format="pdf" directory="Plots" />
            <filter>"plots" in outputs</filter>
        </collection>
        <data name="module" format="tabular" from_work_dir="Tables/module.tsv" label="${tool.name} on ${on_string}: Module">
            <filter>"tables" in outputs</filter>
        </data>
        <data name="modules_genes" format="tabular" from_work_dir="Tables/modules_genes.gmt" label="${tool.name} on ${on_string}: Module genes">
            <filter>"tables" in outputs</filter>
        </data>
        <data name="parameters" format="tabular" from_work_dir="Tables/parameters.tsv" label="${tool.name} on ${on_string}: Parameters">
            <filter>"tables" in outputs</filter>
        </data>
        <data name="selected_genes" format="txt" from_work_dir="Tables/selected_genes.txt" label="${tool.name} on ${on_string}: Selected genes">
            <filter>"tables" in outputs</filter>
        </data>
        <data name="summary_eigengene" format="tabular" from_work_dir="Tables/summary_eigengene.tsv" label="${tool.name} on ${on_string}: Summary eigengenes">
            <filter>"tables" in outputs</filter>
        </data>
        <data name="summary_mean" format="tabular" from_work_dir="Tables/summary_mean.tsv" label="${tool.name} on ${on_string}: Summary mean">
            <filter>"tables" in outputs</filter>
        </data>
        <data name="summary_median" format="tabular" from_work_dir="Tables/summary_median.tsv" label="${tool.name} on ${on_string}: Summary median">
            <filter>"tables" in outputs</filter>
        </data>
        <data name="interactions_output" format="tabular" from_work_dir="Tables/interactions.tsv" label="${tool.name} on ${on_string}: Interactions">
            <filter>"tables" in outputs</filter>
            <filter>interactions</filter>
        </data>
        <data name='output_html' format='html' from_work_dir="Reports/Report/report.html" label="${tool.name} on ${on_string}: HTML report">
            <filter>"report" in outputs</filter>
        </data>
    </outputs>
    <tests>
        <test expect_num_outputs="1">
            <!--Test expression matrix input-->
            <param name="expression_matrix" value="expression_matrix.tab"/>
            <param name="outputs" value="report"/>
            <section name="advanced_parameters">
                <param name="filter" value="True"/>
                <param name="filter_pval" value="0.1"/>
                <param name="apply_vst" value="false"/>
                <param name="n_genes" value="1000"/>
                <param name="eps" value="0.1"/>
                <param name="cor_method" value="pearson"/>
                <param name="cor_function" value="cor"/>
                <param name="network_type" value="signed"/>
                <param name="tom_type" value="signed"/>
                <param name="merge_similar" value="false"/>
                <param name="rank_method" value="mean"/>
                <param name="min_ngen" value="30"/>
                <param name="diss_thresh" value="0.8"/>
                <param name="center_func" value="mean"/>
                <param name="ora_pval" value="0.05"/>
                <param name="gsea_scale" value="true"/>
                <param name="gsea_min_size" value="15"/>
                <param name="gsea_max_size" value="1000"/>
            </section>
            <output name="output_html">
                <assert_contents>
                    <has_text text="EPSTI1"/>
                    <has_text text="RSAD2"/>
                    <has_text text="XAF1"/>
                    <has_text text="OAS2"/>
                    <has_text text="Gene Set Enrichment Analysis"/>
                    <has_text text="SPARC"/>
                </assert_contents>
            </output>
        </test>
        
        <test expect_num_outputs="8">
            <!--Test all inputs and output tables-->
            <param name="expression_matrix" value="expression_matrix.tab"/>
            <param name="annotation" value="sample_annotation.tab"/>
            <param name="pathways" value="pathways.gmt"/>
            <param name="interactions" value="interactions.tab"/>
            <param name="outputs" value="tables"/>
            <section name="advanced_parameters">
                <param name="filter" value="True"/>
                <param name="filter_pval" value="0.1"/>
                <param name="apply_vst" value="false"/>
                <param name="n_genes" value="1000"/>
                <param name="eps" value="0.1"/>
                <param name="cor_method" value="pearson"/>
                <param name="cor_function" value="cor"/>
                <param name="network_type" value="signed"/>
                <param name="tom_type" value="signed"/>
                <param name="merge_similar" value="false"/>
                <param name="rank_method" value="mean"/>
                <param name="min_ngen" value="30"/>
                <param name="diss_thresh" value="0.8"/>
                <param name="center_func" value="mean"/>
                <param name="ora_pval" value="0.05"/>
                <param name="gsea_scale" value="true"/>
                <param name="gsea_min_size" value="15"/>
                <param name="gsea_max_size" value="1000"/>
            </section>
            <output name="module">
                <assert_contents>
                    <has_n_lines n="110"/>
                    <has_size value="2250" delta="200"/>
                </assert_contents>
            </output>
            <output name="modules_genes">
                <assert_contents>
                    <has_n_lines n="1"/>
                    <has_size value="216" delta="10"/>
                </assert_contents>
            </output>
            <output name="parameters">
                <assert_contents>
                    <has_n_lines n="12"/>
                    <has_size value="234" delta="10"/>
                </assert_contents>
            </output>
            <output name="selected_genes">
                <assert_contents>
                    <has_n_lines n="1000"/>
                    <has_size value="6496" delta="10"/>
                </assert_contents>
            </output>
            <output name="summary_eigengene">
                <assert_contents>
                    <has_n_lines n="3"/>
                    <has_size value="2241" delta="10"/>
                </assert_contents>
            </output>
            <output name="summary_mean">
                <assert_contents>
                    <has_n_lines n="3"/>
                    <has_size value="2044" delta="50"/>
                </assert_contents>
            </output>
            <output name="summary_median">
                <assert_contents>
                    <has_n_lines n="3"/>
                    <has_size value="1621" delta="10"/>
                </assert_contents>
            </output>
            <output name="interactions_output">
                <assert_contents>
                    <has_n_lines n="277"/>
                    <has_size value="7736" delta="200"/>
                </assert_contents>
            </output>
        </test>

        <test expect_num_outputs="10">
            <!--Test custom inputs and plots-->
            <param name="expression_matrix" value="expression_matrix.tab"/>
            <param name="annotation" value="sample_annotation.tab"/>
            <param name="pathways" value="pathways.gmt"/>
            <param name="interactions" value="interactions.tab"/>
            <param name="outputs" value="report,tables,plots"/>
            <section name="advanced_parameters">
                <param name="filter" value="false"/>
                <param name="filter_pval" value="0.2"/>
                <param name="apply_vst" value="true"/>
                <param name="n_genes" value="2000"/>
                <param name="eps" value="0.1"/>
                <param name="cor_method" value="spearman"/>
                <param name="cor_function" value="bicor"/>
                <param name="network_type" value="unsigned"/>
                <param name="tom_type" value="unsigned"/>
                <param name="merge_similar" value="true"/>
                <param name="rank_method" value="median"/>
                <param name="min_ngen" value="35"/>
                <param name="diss_thresh" value="0.7"/>
                <param name="center_func" value="median"/>
                <param name="ora_pval" value="0.07"/>
                <param name="gsea_scale" value="false"/>
                <param name="gsea_min_size" value="10"/>
                <param name="gsea_max_size" value="1100"/>
            </section>
            <output name="module">
                <assert_contents>
                    <has_n_lines n="1929"/>
                    <has_size value="26528" delta="200"/>
                </assert_contents>
            </output>
            <output name="modules_genes">
                <assert_contents>
                    <has_n_lines n="10"/>
                    <has_size value="15814" delta="200"/>
                </assert_contents>
            </output>
            <output name="parameters">
                <assert_contents>
                    <has_n_lines n="12"/>
                    <has_size value="234" delta="10"/>
                </assert_contents>
            </output>
            <output name="selected_genes">
                <assert_contents>
                    <has_n_lines n="1928"/>
                    <has_size value="12570" delta="200"/>
                </assert_contents>
            </output>
            <output name="summary_eigengene">
                <assert_contents>
                    <has_n_lines n="12"/>
                    <has_size value="9963" delta="200"/>
                </assert_contents>
            </output>
            <output name="summary_mean">
                <assert_contents>
                    <has_n_lines n="12"/>
                    <has_size value="8898" delta="200"/>
                </assert_contents>
            </output>
            <output name="summary_median">
                <assert_contents>
                    <has_n_lines n="12"/>
                    <has_size value="6574" delta="200"/>
                </assert_contents>
            </output>
            <output name="interactions_output">
                <assert_contents>
                    <has_n_lines n="2579"/>
                    <has_size value="55341" delta="200"/>
                </assert_contents>
            </output>
            <output_collection name="plots" type="list" count="10">
                <element name="profile" file="profiles.pdf" compare="sim_size" delta="100"/>
            </output_collection>
        </test>

    </tests>
    <help><![CDATA[

.. class:: infomark

**Purpose**

The CEMiTool R package provides users with an easy-to-use method to automatically implement gene co-expression network analyses, obtain key information about the 
discovered gene modules using additional downstream analyses and retrieve publication-ready results via a high-quality interactive report.

.. class:: infomark

**Purpose**

    ]]></help>
    <expand macro="citations" />
</tool>