diff --git a/bin/evidence_string_generation.py b/bin/evidence_string_generation.py
index c3c2fd45..c24e8f20 100755
--- a/bin/evidence_string_generation.py
+++ b/bin/evidence_string_generation.py
@@ -9,12 +9,10 @@
 parser.add_argument('--gene-mapping', help='Variant to gene & consequence mappings', required=True)
 parser.add_argument('--ot-schema',    help='OpenTargets schema JSON',                required=True)
 parser.add_argument('--out',          help='Output directory',                       required=True)
-parser.add_argument('--include-structural', help='Use structural variants consequence prediction pipeline',
-                    action='store_true', default=False, required=False)
 
 
 if __name__ == '__main__':
     args = parser.parse_args()
     clinvar_to_evidence_strings.launch_pipeline(
         clinvar_xml_file=args.clinvar_xml, efo_mapping_file=args.efo_mapping, gene_mapping_file=args.gene_mapping,
-        ot_schema_file=args.ot_schema, dir_out=args.out, include_structural=args.include_structural)
+        ot_schema_file=args.ot_schema, dir_out=args.out)
diff --git a/consequence_prediction/run_repeat_expansion_variants.py b/consequence_prediction/run_repeat_expansion_variants.py
index abc04d4b..e9e2ecaa 100755
--- a/consequence_prediction/run_repeat_expansion_variants.py
+++ b/consequence_prediction/run_repeat_expansion_variants.py
@@ -14,7 +14,7 @@
     help='File to output functional consequences to. Format is compatible with the main VEP mapping pipeline.'
 )
 parser.add_argument(
-    '--output-dataframe', required=True,
+    '--output-dataframe', required=False,
     help='File to output full dataframe for subsequent analysis and debugging.'
 )
 args = parser.parse_args()
diff --git a/consequence_prediction/run_structural_variants.py b/consequence_prediction/run_structural_variants.py
new file mode 100755
index 00000000..cd2117c2
--- /dev/null
+++ b/consequence_prediction/run_structural_variants.py
@@ -0,0 +1,18 @@
+#!/usr/bin/env python3
+"""A wrapper script for running the repeat expansion pipeline."""
+
+import argparse
+import structural_variants.pipeline
+
+parser = argparse.ArgumentParser(description=__doc__)
+parser.add_argument(
+    '--clinvar-xml', required=True,
+    help='ClinVar XML dump file (ClinVarFullRelease_00-latest.xml.gz)'
+)
+parser.add_argument(
+    '--output-consequences', required=True,
+    help='File to output functional consequences to. Format is compatible with the main VEP mapping pipeline.'
+)
+
+args = parser.parse_args()
+structural_variants.pipeline.main(args.clinvar_xml, args.output_consequences)
diff --git a/consequence_prediction/structural_variants/pipeline.py b/consequence_prediction/structural_variants/pipeline.py
index 232ec150..214f7ca7 100644
--- a/consequence_prediction/structural_variants/pipeline.py
+++ b/consequence_prediction/structural_variants/pipeline.py
@@ -68,7 +68,16 @@ def get_vep_results(clinvar_xml):
     return vep_results
 
 
-def main(clinvar_xml):
+def generate_consequences_file(consequences, output_consequences):
+    """Output final table."""
+    if consequences.empty:
+        logger.info('There are no records ready for output')
+        return
+    # Write the consequences table. This is used by the main evidence string generation pipeline.
+    consequences.to_csv(output_consequences, sep='\t', index=False, header=False)
+
+
+def main(clinvar_xml, output_consequences=None):
     vep_results = get_vep_results(clinvar_xml)
     results_by_variant = extract_consequences(vep_results=vep_results, acceptable_biotypes={'protein_coding', 'miRNA'})
     variant_data = []
@@ -82,4 +91,6 @@ def main(clinvar_xml):
     # Return as a dataframe to be compatible with repeat expansion pipeline
     consequences = pd.DataFrame(variant_data, columns=('VariantID', 'EnsemblGeneID',
                                                        'EnsemblGeneName', 'ConsequenceTerm'))
+    if output_consequences is not None:
+        generate_consequences_file(consequences, output_consequences)
     return consequences
diff --git a/docs/build.md b/docs/build.md
index 5900aa84..9003b71d 100644
--- a/docs/build.md
+++ b/docs/build.md
@@ -52,6 +52,14 @@ export PYTHONPATH=${INSTALL_PATH}
 
 The installed Python version can then be called with either `python` or `python3`. You can also use either `pip` or `pip3` to install packages into this local distribution.
 
+## Nextflow installation
+
+The evidence string generation pipeline uses Nextflow, which itself relies on Java. You can install in the current directory as follows:
+```bash
+wget -qO- https://get.nextflow.io | bash
+```
+You can then include this in your `$PATH` variable if necessary, or invoke the executable directly.  For more details on installing Nextflow, see the [documentation](https://www.nextflow.io/docs/latest/getstarted.html).
+
 ## Deploying local OLS installation
 During the preparation of 2019_04 release, which had to be synchronized with EFO v3, OLS had to be deployed locally because the production deployment of OLS on www.ebi.ac.uk/ols only supported EFO v2 at the time. This can be done using the following command (substitute the image version as appropriate):
 
diff --git a/docs/environment.md b/docs/environment.md
index e7125a35..788b9acd 100644
--- a/docs/environment.md
+++ b/docs/environment.md
@@ -1,9 +1,9 @@
 # Setting up the common environment
 
-1. Log in to the LSF cluster, where all data processing must take place.
+1. Log in to the LSF cluster (currently `codon`), where all data processing must take place.
 1. Using a `become` command, switch to a common EVA production user instead of your personal account.
 1. Adjust and execute the commands below. They will set up the environment, fetch and build the code. Notes:
-    - The first five variables are installation-specific and are blanked in this repository. You can get the values for the EVA installation from the [private repository](https://github.com/EBIvariation/configuration/blob/master/open-targets-configuration.md).
+    - The first six variables are installation-specific and are blanked in this repository. You can get the values for the EVA installation from the [private repository](https://github.com/EBIvariation/configuration/blob/master/open-targets-configuration.md).
     - By modifying the `GIT_REMOTE` and `GIT_BRANCH` variables, you can run an arbitrary version of the pipeline. This can be used for development and debugging. By default it will fetch the master branch from the main pipeline repository.
     - Running these commands will overwrite any local changes you had in the repository copy on the cluster.
 
@@ -17,6 +17,9 @@ export PYTHON_INSTALL_PATH=
 # Location of bcftools installation path
 export BCFTOOLS_INSTALL_PATH=
 
+# Location of Nextflow installation path
+export NEXTFLOW_INSTALL_PATH=
+
 # The directory where subdirectories for each batch will be created
 export BATCH_ROOT_BASE=
 
@@ -26,8 +29,8 @@ export FTP_PATH_BASE=
 # Base bsub command line for all commands.
 export BSUB_CMDLINE="bsub"
 
-# Setting up Python paths
-export PATH=${PYTHON_INSTALL_PATH}:${BCFTOOLS_INSTALL_PATH}:$PATH
+# Setting up paths
+export PATH=${PYTHON_INSTALL_PATH}:${BCFTOOLS_INSTALL_PATH}:${NEXTFLOW_INSTALL_PATH}:$PATH
 export PYTHONPATH=${PYTHON_INSTALL_PATH}
 
 # External service paths
@@ -44,4 +47,7 @@ source env/bin/activate
 python3 -m pip -q install --upgrade pip setuptools
 python3 -m pip -q install -r requirements.txt
 python3 setup.py install
+
+# Location of Python executable, pointing to the virtualenv
+export PYTHON_BIN=${CODE_ROOT}/env/bin/python
 ```
diff --git a/docs/generate-evidence-strings.md b/docs/generate-evidence-strings.md
index 49bcd3b0..e69e6e86 100644
--- a/docs/generate-evidence-strings.md
+++ b/docs/generate-evidence-strings.md
@@ -18,7 +18,7 @@ Next, set up the protocol-specific environment:
 export OT_RELEASE=YYYY-MM
 
 # Open Targets JSON schema version.
-export OT_SCHEMA_VERSION=2.2.6
+export OT_SCHEMA_VERSION=2.2.8
 ```
 
 ## 1. Process data
@@ -31,58 +31,15 @@ mkdir -p ${BATCH_ROOT}
 cd ${BATCH_ROOT}
 mkdir -p clinvar gene_mapping evidence_strings logs
 
-# Download ClinVar data. We always use the most recent XML dump, which contains all data for the release.
-wget \
-  -O ${BATCH_ROOT}/clinvar/ClinVarFullRelease_00-latest.xml.gz \
-  https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_00-latest.xml.gz
-
-# Download the Open Targets JSON schema.
-wget \
-  -O ${BATCH_ROOT}/evidence_strings/opentargets-${OT_SCHEMA_VERSION}.json \
-  https://raw.githubusercontent.com/opentargets/json_schema/${OT_SCHEMA_VERSION}/opentargets.json
-
-# Run ClinVar variants through VEP and map them to genes and functional consequences.
-${BSUB_CMDLINE} -K -M 10G \
-  -o ${BATCH_ROOT}/logs/consequence_vep.out \
-  -e ${BATCH_ROOT}/logs/consequence_vep.err \
-  bash ${CODE_ROOT}/consequence_prediction/run_consequence_mapping.sh \
-    ${BATCH_ROOT}/clinvar/ClinVarFullRelease_00-latest.xml.gz \
-    ${BATCH_ROOT}/gene_mapping/consequences_vep.tsv
-
-# Generate the evidence strings for submission to Open Targets.
-${BSUB_CMDLINE} -K -M 10G \
-  -o ${BATCH_ROOT}/logs/evidence_string_generation.out \
-  -e ${BATCH_ROOT}/logs/evidence_string_generation.err \
-  python3 ${CODE_ROOT}/bin/evidence_string_generation.py \
-    --clinvar-xml  ${BATCH_ROOT}/clinvar/ClinVarFullRelease_00-latest.xml.gz \
-    --efo-mapping  ${BATCH_ROOT_BASE}/manual_curation/latest_mappings.tsv \
-    --gene-mapping ${BATCH_ROOT}/gene_mapping/consequences_vep.tsv \
-    --ot-schema    ${BATCH_ROOT}/evidence_strings/opentargets-${OT_SCHEMA_VERSION}.json \
-    --out          ${BATCH_ROOT}/evidence_strings/ \
-    --include-structural
-
-# Check that the generated evidence strings do not contain any duplicated evidence strings. 
-#    For every evidence string, we group the value of fields datatypeId, studyId, 
-#    targetFromSourceId, variantId, variantFunctionalConsequenceId and diseaseFromSourceMappedId, 
-#    all separated by tabs, sorted and saved at duplicates.tsv if found duplicated. 
-jq --arg sep $'\t' -jr \
-  '.datatypeId,$sep,.studyId,$sep,.targetFromSourceId,$sep,.variantId,$sep,.variantFunctionalConsequenceId,$sep,.diseaseFromSourceMappedId,$sep,.diseaseFromSource,"\n"' \
-  ${BATCH_ROOT}/evidence_strings/evidence_strings.json \
-  | sort | uniq -d > ${BATCH_ROOT}/evidence_strings/duplicates.tsv
-
-# Convert MedGen and OMIM cross-references into ZOOMA format.
-${BSUB_CMDLINE} -K \
-  -o ${BATCH_ROOT}/logs/traits_to_zooma_format.out \
-  -e ${BATCH_ROOT}/logs/traits_to_zooma_format.err \
-  python3 ${CODE_ROOT}/bin/traits_to_zooma_format.py \
-    --clinvar-xml    ${BATCH_ROOT}/clinvar/ClinVarFullRelease_00-latest.xml.gz \
-    --zooma-feedback ${BATCH_ROOT}/clinvar/clinvar_xrefs.txt
+# Run the nextflow pipeline, resuming execution of previous attempt if possible.
+nextflow run ${CODE_ROOT}/eva_cttv_pipeline/evidence_string_generation/pipeline.nf \
+  --batch_root ${BATCH_ROOT} \
+  --schema ${OT_SCHEMA_VERSION} \
+  -resume
 ```
 
-## 2. Manual follow-up actions
-
-### Check that generated evidence strings do not contain any duplicates
-The algorithm used for generating the evidence strings should not allow any duplicate values to be emitted, and the file `${BATCH_ROOT}/evidence_strings/duplicates.tsv` should be empty. Check that this is the case.
+### Note on duplication checks
+The algorithm used for generating the evidence strings should not allow any duplicate values to be emitted, and the automated pipeline should fail with an error if duplicates are detected.
 
 A repeated evidence string will have identical values for these five fields:
 * **datatypeId** - Identifier of the type of data we are associating, varying between somatic and non-somatic ClinVar records (*e.g.* ``somatic_mutation`` or ``genetic_association`` respectively). 
@@ -94,6 +51,8 @@ A repeated evidence string will have identical values for these five fields:
 
 Nevertheless, we also report evidence strings in which  ``diseaseFromSourceMappedId`` may be empty (``diseaseFromSourceMappedId: null``) - i.e. the phenotype has not been mapped to an ontology yet. Therefore, to check for duplicates we also take into account the field ``diseaseFromSource``, which is the string describing the phenotype within ClinVar records (and is never missing in any evidence string).
 
+## 2. Manual follow-up actions
+
 ### Update summary metrics
 After the evidence strings have been generated, summary metrics need to be updated in the Google Sheets [table](https://docs.google.com/spreadsheets/d/1g_4tHNWP4VIikH7Jb0ui5aNr0PiFgvscZYOe69g191k/) on the “Raw statistics” sheet.
 
diff --git a/eva_cttv_pipeline/evidence_string_generation/clinvar_to_evidence_strings.py b/eva_cttv_pipeline/evidence_string_generation/clinvar_to_evidence_strings.py
index 00ad502a..5f4ca7e4 100644
--- a/eva_cttv_pipeline/evidence_string_generation/clinvar_to_evidence_strings.py
+++ b/eva_cttv_pipeline/evidence_string_generation/clinvar_to_evidence_strings.py
@@ -8,8 +8,6 @@
 
 import jsonschema
 
-from consequence_prediction.repeat_expansion_variants import pipeline as repeat_pipeline
-from consequence_prediction.structural_variants import pipeline as structural_pipeline
 from eva_cttv_pipeline.clinvar_xml_io import clinvar_xml_io
 from eva_cttv_pipeline.evidence_string_generation import consequence_type as CT
 
@@ -113,29 +111,20 @@ def validate_evidence_string(ev_string, ot_schema_contents):
         sys.exit(1)
 
 
-def launch_pipeline(clinvar_xml_file, efo_mapping_file, gene_mapping_file, ot_schema_file, dir_out,
-                    include_structural=False):
+def launch_pipeline(clinvar_xml_file, efo_mapping_file, gene_mapping_file, ot_schema_file, dir_out):
     os.makedirs(dir_out, exist_ok=True)
     string_to_efo_mappings = load_efo_mapping(efo_mapping_file)
-
-    repeat_consequences = repeat_pipeline.main(clinvar_xml_file)
-    if include_structural:
-        structural_consequences = structural_pipeline.main(clinvar_xml_file)
-        complex_consequences = CT.process_consequence_type_dataframes(repeat_consequences, structural_consequences)
-    else:
-        complex_consequences = CT.process_consequence_type_dataframes(repeat_consequences)
-    variant_to_gene_mappings = CT.process_consequence_type_file(gene_mapping_file, complex_consequences)
+    variant_to_gene_mappings = CT.process_consequence_type_file(gene_mapping_file)
 
     report = clinvar_to_evidence_strings(
         string_to_efo_mappings, variant_to_gene_mappings, clinvar_xml_file, ot_schema_file,
-        output_evidence_strings=os.path.join(dir_out, EVIDENCE_STRINGS_FILE_NAME),
-        include_structural=include_structural)
+        output_evidence_strings=os.path.join(dir_out, EVIDENCE_STRINGS_FILE_NAME))
     print(report.collate_report())
     report.write_unmapped_terms(dir_out)
 
 
 def clinvar_to_evidence_strings(string_to_efo_mappings, variant_to_gene_mappings, clinvar_xml, ot_schema,
-                                output_evidence_strings, include_structural):
+                                output_evidence_strings):
     report = Report(trait_mappings=string_to_efo_mappings, consequence_mappings=variant_to_gene_mappings)
     ot_schema_contents = json.loads(open(ot_schema).read())
     output_evidence_strings_file = open(output_evidence_strings, 'wt')
@@ -160,7 +149,7 @@ def clinvar_to_evidence_strings(string_to_efo_mappings, variant_to_gene_mappings
         # Within each ClinVar record, an evidence string is generated for all possible permutations of (1) valid allele
         # origins, (2) EFO mappings, and (3) genes where the variant has effect.
         grouped_allele_origins = convert_allele_origins(clinvar_record.valid_allele_origins)
-        consequence_types = get_consequence_types(clinvar_record.measure, variant_to_gene_mappings, include_structural)
+        consequence_types = get_consequence_types(clinvar_record.measure, variant_to_gene_mappings)
         grouped_diseases = group_diseases_by_efo_mapping(clinvar_record.traits_with_valid_names,
                                                          string_to_efo_mappings)
 
@@ -192,8 +181,7 @@ def clinvar_to_evidence_strings(string_to_efo_mappings, variant_to_gene_mappings
                 grouped_allele_origins, grouped_diseases, consequence_types):
             disease_name, disease_source_id, disease_mapped_efo_id = disease_attributes
             evidence_string = generate_evidence_string(clinvar_record, allele_origins, disease_name, disease_source_id,
-                                                       disease_mapped_efo_id, consequence_attributes,
-                                                       include_structural=include_structural)
+                                                       disease_mapped_efo_id, consequence_attributes)
 
             # Validate and immediately output the evidence string (not keeping everything in memory).
             validate_evidence_string(evidence_string, ot_schema_contents)
@@ -219,7 +207,7 @@ def clinvar_to_evidence_strings(string_to_efo_mappings, variant_to_gene_mappings
 
 
 def generate_evidence_string(clinvar_record, allele_origins, disease_name, disease_source_id, disease_mapped_efo_id,
-                             consequence_attributes, include_structural=False):
+                             consequence_attributes):
     """Generates an evidence string based on ClinVar record and some additional attributes."""
     is_somatic = allele_origins == ['somatic']
     evidence_string = {
@@ -268,7 +256,7 @@ def generate_evidence_string(clinvar_record, allele_origins, disease_name, disea
         # required by the Open Targets JSON schema.
         'diseaseFromSourceMappedId': disease_mapped_efo_id.split('/')[-1] if disease_mapped_efo_id else None,
     }
-    if include_structural and clinvar_record.measure.preferred_current_hgvs:
+    if clinvar_record.measure.preferred_current_hgvs:
         evidence_string['variantHgvsId'] = clinvar_record.measure.preferred_current_hgvs.text
 
     # Remove the attributes with empty values (either None or empty lists).
@@ -276,7 +264,7 @@ def generate_evidence_string(clinvar_record, allele_origins, disease_name, disea
     return evidence_string
 
 
-def get_consequence_types(clinvar_record_measure, consequence_type_dict, include_structural=False):
+def get_consequence_types(clinvar_record_measure, consequence_type_dict):
     """Returns the list of functional consequences for a given ClinVar record measure.
 
     This is the place where ClinVar records are paired with the information about gene and functional consequences.
@@ -317,7 +305,7 @@ def get_consequence_types(clinvar_record_measure, consequence_type_dict, include
             return consequence_type_dict[coord_id]
 
     # If there's also no complete coordinates, pair using HGVS
-    if include_structural and clinvar_record_measure.preferred_current_hgvs:
+    if clinvar_record_measure.preferred_current_hgvs:
         hgvs_id = clinvar_record_measure.preferred_current_hgvs.text
         if hgvs_id in consequence_type_dict:
             consequences = consequence_type_dict[hgvs_id]
diff --git a/eva_cttv_pipeline/evidence_string_generation/consequence_type.py b/eva_cttv_pipeline/evidence_string_generation/consequence_type.py
index d2b10379..f224f1d9 100644
--- a/eva_cttv_pipeline/evidence_string_generation/consequence_type.py
+++ b/eva_cttv_pipeline/evidence_string_generation/consequence_type.py
@@ -13,23 +13,6 @@ def process_gene(consequence_type_dict, variant_id, ensembl_gene_id, so_term):
     consequence_type_dict[variant_id].append(ConsequenceType(ensembl_gene_id, SoTerm(so_term)))
 
 
-def process_consequence_type_dataframes(*dataframes):
-    """
-    Return a dictionary of consequence information extracted from one or more dataframes.
-    Assumes all dataframes are in the same format.
-    """
-    consequence_type_dict = defaultdict(list)
-    for consequences_dataframe in dataframes:
-        for row in consequences_dataframe.itertuples():
-            variant_id = row[1]
-            ensembl_gene_id = row[2]
-            so_term = row[4]
-
-            process_gene(consequence_type_dict, variant_id, ensembl_gene_id, so_term)
-
-    return consequence_type_dict
-
-
 def process_consequence_type_file(snp_2_gene_file, consequence_type_dict=None):
     """
     Return a dictionary of consequence information extracted from the given file.
@@ -88,7 +71,6 @@ class SoTerm(object):
     Represents a sequence ontology term belonging to a consequence type object.
     Holds information on accession and rank.
     """
-
     so_accession_name_dict = get_so_accession_dict()
 
     ranked_so_names_list = get_severity_ranking()
diff --git a/eva_cttv_pipeline/evidence_string_generation/pipeline.nf b/eva_cttv_pipeline/evidence_string_generation/pipeline.nf
new file mode 100644
index 00000000..bda65227
--- /dev/null
+++ b/eva_cttv_pipeline/evidence_string_generation/pipeline.nf
@@ -0,0 +1,266 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl=2
+
+
+def helpMessage() {
+    log.info"""
+    Generate ClinVar evidence strings for Open Targets.
+    
+    Params:
+        --batch_root    Directory for current batch
+        --schema        Open Targets JSON schema version
+        --clinvar       ClinVar XML file (optional, will download latest if omitted)
+    """
+}
+
+params.help = null
+params.batch_root = null
+params.schema = null
+params.clinvar = null
+
+if (params.help) {
+    exit 0, helpMessage()
+}
+if (!params.batch_root || !params.schema) {
+    exit 1, helpMessage()
+}
+batchRoot = params.batch_root
+
+
+/*
+ * Main workflow.
+ */
+workflow {
+    if (params.clinvar != null) {
+        clinvarXml = Channel.fromPath(params.clinvar)
+    } else {
+        clinvarXml = downloadClinvar()
+    }
+    downloadJsonSchema()
+
+    runSnpIndel(clinvarXml)
+    runRepeat(clinvarXml)
+    runStructural(clinvarXml)
+    combineConsequences(runSnpIndel.out.consequencesSnp,
+                        runRepeat.out.consequencesRepeat,
+                        runStructural.out.consequencesStructural)
+    
+    generateEvidence(clinvarXml,
+                     downloadJsonSchema.out.jsonSchema,
+                     combineConsequences.out.consequencesCombined)
+    checkDuplicates(generateEvidence.out.evidenceStrings)
+
+    convertXrefs(clinvarXml)
+}
+
+/*
+ * Download ClinVar data, using the most recent XML dump.
+ */
+process downloadClinvar {
+    output:
+    path "clinvar.xml.gz", emit: clinvarXml
+
+    script:
+    """
+    wget -O clinvar.xml.gz \
+        https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_00-latest.xml.gz
+    """
+}
+
+/*
+ * Download the Open Targets JSON schema.
+ */
+process downloadJsonSchema {
+    output:
+    path "opentargets-${params.schema}.json", emit: jsonSchema
+
+    script:
+    """
+    wget -O opentargets-${params.schema}.json \
+        https://raw.githubusercontent.com/opentargets/json_schema/${params.schema}/opentargets.json
+    """
+}
+
+/*
+ * Run simple variants (SNPs and other variants with complete coordinates) through VEP and map them
+ * to genes and functional consequences.
+ */
+process runSnpIndel {
+    clusterOptions "-o ${batchRoot}/logs/consequence_snp.out \
+                    -e ${batchRoot}/logs/consequence_snp.err"
+
+    publishDir "${batchRoot}/gene_mapping",
+        overwrite: true,
+        mode: "copy",
+        pattern: "*.tsv"
+
+    input:
+    path clinvarXml
+
+    output:
+    path "consequences_snp.tsv", emit: consequencesSnp
+
+    script:
+    """
+    \${PYTHON_BIN} "\${CODE_ROOT}/consequence_prediction/extract_variants_for_vep.py" --clinvar-xml ${clinvarXml} \
+    | sort -u \
+    | parallel \
+        --halt now,fail=1    `# If any job fails, kill the remaining ones immediately and report failure` \
+        --pipe               `# Input is read from STDIN and split by chunks`                             \
+        -j 20                `# Number of concurrent workers`                                             \
+        -N 200               `# Number of records (lines) per worker`                                     \
+        --tmpdir .           `# Store temporary files in the current directory to avoid /tmp overflow`    \
+        \${PYTHON_BIN} "\${CODE_ROOT}/consequence_prediction/vep_mapping_pipeline/consequence_mapping.py" \
+    | sort -u > consequences_snp.tsv
+    """
+}
+
+/*
+ * Extract repeat expansion variants from ClinVar and map them to genes.
+ */
+process runRepeat {
+   clusterOptions "-o ${batchRoot}/logs/consequence_repeat.out \
+                   -e ${batchRoot}/logs/consequence_repeat.err"
+
+   publishDir "${batchRoot}/gene_mapping",
+       overwrite: true,
+       mode: "copy",
+       pattern: "*.tsv"
+
+   input:
+   path clinvarXml
+
+   output:
+   path "consequences_repeat.tsv", emit: consequencesRepeat
+
+   script:
+   """
+   \${PYTHON_BIN} \${CODE_ROOT}/consequence_prediction/run_repeat_expansion_variants.py \
+        --clinvar-xml ${clinvarXml} \
+        --output-consequences consequences_repeat.tsv
+
+    # create an empty file if nothing generated
+    [[ -f consequences_repeat.tsv ]] || touch consequences_repeat.tsv
+   """
+}
+
+/*
+ * Run consequence and gene mapping for structural variants (i.e. no complete coordinates and not
+ * known repeat expansions).
+ */
+process runStructural {
+   clusterOptions "-o ${batchRoot}/logs/consequence_structural.out \
+                   -e ${batchRoot}/logs/consequence_structural.err"
+
+   publishDir "${batchRoot}/gene_mapping",
+       overwrite: true,
+       mode: "copy",
+       pattern: "*.tsv"
+
+   input:
+   path clinvarXml
+
+   output:
+   path "consequences_structural.tsv", emit: consequencesStructural
+
+   script:
+   """
+   \${PYTHON_BIN} \${CODE_ROOT}/consequence_prediction/run_structural_variants.py \
+        --clinvar-xml ${clinvarXml} \
+        --output-consequences consequences_structural.tsv
+
+    # create an empty file if nothing generated
+    [[ -f consequences_structural.tsv ]] || touch consequences_structural.tsv
+   """
+}
+
+/*
+ * Unite results of consequence mapping.
+ */
+process combineConsequences {
+    input:
+    path consequencesSnp
+    path consequencesRepeat
+    path consequencesStructural
+
+    output:
+    path "consequences_combined.tsv", emit: consequencesCombined
+
+    script:
+    """
+    cat ${consequencesRepeat} ${consequencesSnp} ${consequencesStructural} > consequences_combined.tsv
+    """
+}
+
+/*
+ * Generate the evidence strings for submission to Open Targets.
+ */
+process generateEvidence {
+    clusterOptions "-o ${batchRoot}/logs/evidence_string_generation.out \
+                    -e ${batchRoot}/logs/evidence_string_generation.err"
+
+    publishDir "${batchRoot}/evidence_strings",
+        overwrite: true,
+        mode: "copy",
+        pattern: "*.json"
+
+    input:
+    path clinvarXml
+    path jsonSchema
+    path consequenceMappings
+
+    output:
+    path "evidence_strings.json", emit: evidenceStrings
+
+    script:
+    """
+    \${PYTHON_BIN} \${CODE_ROOT}/bin/evidence_string_generation.py \
+        --clinvar-xml ${clinvarXml} \
+        --efo-mapping \${BATCH_ROOT_BASE}/manual_curation/latest_mappings.tsv \
+        --gene-mapping ${consequenceMappings} \
+        --ot-schema ${jsonSchema} \
+        --out .
+    """
+}
+
+/*
+ * Check that the generated evidence strings do not contain any duplicated evidence strings.
+ */
+process checkDuplicates {
+    input:
+    path evidenceStrings
+
+    script:
+    """
+    jq --arg sep \$'\t' -jr \
+        '.datatypeId,\$sep,.studyId,\$sep,.targetFromSourceId,\$sep,.variantId,\$sep,.variantFunctionalConsequenceId,\$sep,.diseaseFromSourceMappedId,\$sep,.diseaseFromSource,"\n"' \
+        ${evidenceStrings} | sort | uniq -d > duplicates.tsv
+    [[ ! -s duplicates.tsv ]]
+    """
+}
+
+/*
+ * Convert MedGen and OMIM cross-references into ZOOMA format.
+ */
+process convertXrefs {
+    clusterOptions "-o ${batchRoot}/logs/traits_to_zooma_format.out \
+                    -e ${batchRoot}/logs/traits_to_zooma_format.err"
+
+    publishDir "${batchRoot}/clinvar",
+        overwrite: true,
+        mode: "copy",
+        pattern: "*.txt"
+
+    input:
+    path clinvarXml
+
+    output:
+    path "clinvar_xrefs.txt", emit: clinvarXrefs
+
+    """
+    \${PYTHON_BIN} \${CODE_ROOT}/bin/traits_to_zooma_format.py \
+        --clinvar-xml ${clinvarXml} \
+        --zooma-feedback clinvar_xrefs.txt
+    """
+}
diff --git a/setup.py b/setup.py
index 8ad34e3b..5d136262 100644
--- a/setup.py
+++ b/setup.py
@@ -22,7 +22,7 @@ def get_requires():
 
 
 setup(name='eva_cttv_pipeline',
-      version='2.5.4',
+      version='2.6.0',
       packages=find_packages(),
       install_requires=get_requires(),
       #! TBD: list as a dependency subpackage 'clinvar_xml_utils.clinvar_xml_utils.clinvar_xml_utils'
diff --git a/tests/eva_cttv_pipeline/evidence_string_generation/config.py b/tests/eva_cttv_pipeline/evidence_string_generation/config.py
index c7d7593e..40192d6b 100644
--- a/tests/eva_cttv_pipeline/evidence_string_generation/config.py
+++ b/tests/eva_cttv_pipeline/evidence_string_generation/config.py
@@ -2,7 +2,7 @@
 
 from eva_cttv_pipeline.clinvar_xml_io import clinvar_xml_io
 
-OT_SCHEMA_VERSION = "2.2.6"
+OT_SCHEMA_VERSION = "2.2.8"
 
 test_dir = os.path.dirname(__file__)
 efo_mapping_file = os.path.join(test_dir, 'resources', 'string_to_ontology_mappings.tsv')
diff --git a/tests/eva_cttv_pipeline/evidence_string_generation/resources/expected_genetics_evidence_string.json b/tests/eva_cttv_pipeline/evidence_string_generation/resources/expected_genetics_evidence_string.json
index 47ea7401..8245a994 100644
--- a/tests/eva_cttv_pipeline/evidence_string_generation/resources/expected_genetics_evidence_string.json
+++ b/tests/eva_cttv_pipeline/evidence_string_generation/resources/expected_genetics_evidence_string.json
@@ -22,6 +22,7 @@
   "studyId": "RCV000002127",
   "targetFromSourceId": "ENSG00000139988",
   "variantFunctionalConsequenceId": "SO_0001583",
+  "variantHgvsId": "NC_000014.9:g.67729209A>G",
   "variantId": "14_67729209_A_G",
   "variantRsId": "rs28940313"
 }
\ No newline at end of file
diff --git a/tests/eva_cttv_pipeline/evidence_string_generation/resources/expected_multiple_names_evidence_string.json b/tests/eva_cttv_pipeline/evidence_string_generation/resources/expected_multiple_names_evidence_string.json
index 5740864d..a7a398f2 100644
--- a/tests/eva_cttv_pipeline/evidence_string_generation/resources/expected_multiple_names_evidence_string.json
+++ b/tests/eva_cttv_pipeline/evidence_string_generation/resources/expected_multiple_names_evidence_string.json
@@ -23,6 +23,7 @@
   "studyId": "RCV000415158",
   "targetFromSourceId": "ENSG00000139988",
   "variantFunctionalConsequenceId": "SO_0001583",
+  "variantHgvsId": "NC_000007.14:g.94423102G>A",
   "variantId": "7_94423102_G_A",
   "variantRsId": "rs1057518967"
 }
\ No newline at end of file
diff --git a/tests/eva_cttv_pipeline/evidence_string_generation/resources/expected_somatic_evidence_string.json b/tests/eva_cttv_pipeline/evidence_string_generation/resources/expected_somatic_evidence_string.json
index a42b3944..8dee46cf 100644
--- a/tests/eva_cttv_pipeline/evidence_string_generation/resources/expected_somatic_evidence_string.json
+++ b/tests/eva_cttv_pipeline/evidence_string_generation/resources/expected_somatic_evidence_string.json
@@ -22,6 +22,7 @@
   "studyId": "RCV000002127",
   "targetFromSourceId": "ENSG00000139988",
   "variantFunctionalConsequenceId": "SO_0001583",
+  "variantHgvsId": "NC_000014.9:g.67729209A>G",
   "variantId": "14_67729209_A_G",
   "variantRsId": "rs28940313"
 }
\ No newline at end of file
diff --git a/tests/eva_cttv_pipeline/evidence_string_generation/test_clinvar_to_evidence_strings.py b/tests/eva_cttv_pipeline/evidence_string_generation/test_clinvar_to_evidence_strings.py
index 39e73329..37dd45a5 100644
--- a/tests/eva_cttv_pipeline/evidence_string_generation/test_clinvar_to_evidence_strings.py
+++ b/tests/eva_cttv_pipeline/evidence_string_generation/test_clinvar_to_evidence_strings.py
@@ -130,14 +130,11 @@ def test_structural_variant_consequences(self):
         structural_crm = config.get_test_clinvar_record('test_structural_record.xml.gz').measure
         consequences = [CT.ConsequenceType('ENSG00000075151', CT.SoTerm('splice_polypyrimidine_tract_variant'))]
         consequence_dict = {structural_crm.preferred_current_hgvs.text: consequences}
-
-        # only get consequences from HGVS if include_structural is True
-        assert clinvar_to_evidence_strings.get_consequence_types(structural_crm, consequence_dict, False) == []
-        assert clinvar_to_evidence_strings.get_consequence_types(structural_crm, consequence_dict, True) == consequences
+        assert clinvar_to_evidence_strings.get_consequence_types(structural_crm, consequence_dict) == consequences
 
         # don't get consequences if there are more than MAX_TARGET_GENES
         long_consequence_dict = {structural_crm.preferred_current_hgvs.text: consequences * (MAX_TARGET_GENES+1)}
-        assert clinvar_to_evidence_strings.get_consequence_types(structural_crm, long_consequence_dict, True) == []
+        assert clinvar_to_evidence_strings.get_consequence_types(structural_crm, long_consequence_dict) == []
 
 
 class TestGenerateEvidenceStringTest:
diff --git a/tests/eva_cttv_pipeline/evidence_string_generation/test_consequence_type.py b/tests/eva_cttv_pipeline/evidence_string_generation/test_consequence_type.py
index 0cfbd92e..7ecc591d 100644
--- a/tests/eva_cttv_pipeline/evidence_string_generation/test_consequence_type.py
+++ b/tests/eva_cttv_pipeline/evidence_string_generation/test_consequence_type.py
@@ -25,18 +25,6 @@ def test_process_consequence_type_file_tsv():
     assert consequence_type_dict["14:67729241:C:T"][0] == test_consequence_type
 
 
-def test_process_consequence_type_dataframes():
-    dataframe_1 = pd.DataFrame(
-        [('NC_000011.10:g.5226797_5226798insGCC', 'ENSG00000244734', 'HBB', 'coding_sequence_variant')],
-        columns=('VariantID', 'EnsemblGeneID', 'EnsemblGeneName', 'ConsequenceTerm'))
-    dataframe_2 = pd.DataFrame(
-        [('RCV001051772', 'ENSG00000130711', 'PRDM12', 'trinucleotide_repeat_expansion')],
-        columns=('1', '2', '3', '4'))  # column names can be anything
-    consequence_type_dict = CT.process_consequence_type_dataframes(dataframe_1, dataframe_2)
-    assert consequence_type_dict['NC_000011.10:g.5226797_5226798insGCC'][0].ensembl_gene_id == 'ENSG00000244734'
-    assert consequence_type_dict['RCV001051772'][0].ensembl_gene_id == 'ENSG00000130711'
-
-
 def test_ensembl_so_term():
     so_term = CT.SoTerm('stop_gained')
     assert so_term.accession == 'SO_0001587'