From 8a20b4fbb420180f35bd4305a931e2cfe487421b Mon Sep 17 00:00:00 2001 From: John Huddleston Date: Wed, 23 Dec 2020 16:20:01 -0800 Subject: [PATCH 1/8] Add initial I/O interface and tests Adds tests and code for new `open_file`, `read_sequences`, and `write_sequences` functions loosely based on a proposed API [1]. These functions transparently handle compressed inputs and outputs using the xopen library. The `open_file` function is a context manager that lightly wraps the `xopen` function and also supports either path strings or existing IO buffers. Both the read and write functions use this context manager to open files. This manager enables the common use case of writing to the same handle many times inside a for loop, by replacing the standard `open` call with `open_file`. Doing so, we maintain a Pythonic interface that also supports compressed file formats and path-or-buffer inputs. This context manager also enables input and output of any other file type in compressed formats (e.g., metadata, sequence indices, etc.). Note that the `read_sequences` and `write_sequences` functions do not infer the format of sequence files (e.g., FASTA, GenBank, etc.). Inferring file formats requires peeking at the first record in each given input, but peeking is not supported by piped inputs that we want to support (e.g., piped gzip inputs from xopen). There are also no internal use cases for Augur to read multiple sequences of different formats, so I can't currently justify the complexity required to support type inference. Instead, I opted for the same approach used by BioPython where the calling code must know the type of input file being passed. This isn't an unreasonable expectation for Augur's internal code. I also considered inferring file type by filename extensions like xopen infers compression modes. Filename extensions are less standardized across bioinformatics than we would like for this type of inference to work robustly. Tests ignore BioPython and pycov warnings to minimize warning fatigue for issues we cannot address during test-driven development. [1] https://github.com/nextstrain/augur/issues/645 --- augur/io.py | 106 +++++++++++++++++++++++++++ pytest.python3.ini | 5 ++ setup.py | 3 +- tests/test_io.py | 179 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 292 insertions(+), 1 deletion(-) create mode 100644 augur/io.py create mode 100644 tests/test_io.py diff --git a/augur/io.py b/augur/io.py new file mode 100644 index 000000000..d972800ef --- /dev/null +++ b/augur/io.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python3 +"""Interfaces for reading and writing data also known as input/output (I/O) +""" +import Bio.SeqIO +import Bio.SeqRecord +from contextlib import contextmanager +from pathlib import Path +from xopen import xopen + + +@contextmanager +def open_file(path_or_buffer, mode="r", **kwargs): + """Opens a given file path and returns the handle. + + Transparently handles compressed inputs and outputs. + + Parameters + ---------- + path_or_buffer : str or Path-like or IO buffer + Name of the file to open or an existing IO buffer + + mode : str + Mode to open file (read or write) + + Returns + ------- + IO + File handle object + + """ + try: + with xopen(path_or_buffer, mode, **kwargs) as handle: + yield handle + except TypeError: + yield path_or_buffer + + +def read_sequences(*paths, format="fasta"): + """Read sequences from one or more paths. + + Automatically infer compression mode (e.g., gzip, etc.) and return a stream + of sequence records in the requested format (e.g., "fasta", "genbank", etc.). + + Parameters + ---------- + paths : list of str or Path-like objects + One or more paths to sequence files of any type supported by BioPython. + + format : str + Format of input sequences matching any of those supported by BioPython + (e.g., "fasta", "genbank", etc.). + + Yields + ------ + Bio.SeqRecord.SeqRecord + Sequence record from the given path(s). + + """ + for path in paths: + # Open the given path as a handle, inferring the file's compression. + # This way we can pass a handle to BioPython's SeqIO interface + # regardless of the compression mode. + with open_file(path) as handle: + sequences = Bio.SeqIO.parse(handle, format) + + for sequence in sequences: + yield sequence + + +def write_sequences(sequences, path_or_buffer, format="fasta"): + """Write sequences to a given path in the given format. + + Automatically infer compression mode (e.g., gzip, etc.) based on the path's + filename extension. + + Parameters + ---------- + sequences : iterable of Bio.SeqRecord.SeqRecord objects + A list-like collection of sequences to write + + path_or_buffer : str or Path-like object or IO buffer + A path to a file to write the given sequences in the given format. + + format : str + Format of input sequences matching any of those supported by BioPython + (e.g., "fasta", "genbank", etc.) + + Returns + ------- + int : + Number of sequences written out to the given path. + + """ + with open_file(path_or_buffer, "wt") as handle: + # Bio.SeqIO supports writing to the same handle multiple times for specific + # file formats. For the formats we use, this function call should work for + # both a newly opened file handle or one that is provided by the caller. + # For more details see: + # https://github.com/biopython/biopython/blob/25f5152f4aeefe184a323db25694fbfe0593f0e2/Bio/SeqIO/__init__.py#L233-L251 + sequences_written = Bio.SeqIO.write( + sequences, + handle, + format + ) + + return sequences_written diff --git a/pytest.python3.ini b/pytest.python3.ini index d62edcdea..5845e427e 100644 --- a/pytest.python3.ini +++ b/pytest.python3.ini @@ -1,4 +1,9 @@ [pytest] +# ignore biopython's deprecation warnings about alphabet that are outside of our control +filterwarnings = + ignore::PendingDeprecationWarning:Bio.Alphabet + ignore:Coverage disabled + addopts = # do not capture any output---necessary for interactive breakpoints -s diff --git a/setup.py b/setup.py index ee1f0a6fa..3f57b1555 100644 --- a/setup.py +++ b/setup.py @@ -52,7 +52,8 @@ "jsonschema >=3.0.0, ==3.*", "packaging >=19.2", "pandas >=1.0.0, ==1.*", - "phylo-treetime ==0.8.*" + "phylo-treetime ==0.8.*", + "xopen >=1.0.1, ==1.*" ], extras_require = { 'full': [ diff --git a/tests/test_io.py b/tests/test_io.py new file mode 100644 index 000000000..d65fa5f1e --- /dev/null +++ b/tests/test_io.py @@ -0,0 +1,179 @@ +#!/usr/bin/env python3 +from Bio import SeqIO +from Bio.Seq import Seq +from Bio.SeqRecord import SeqRecord +import bz2 +import gzip +import lzma +from pathlib import Path +import pytest +import random +import sys + +from augur.io import open_file, read_sequences, write_sequences + + +def random_seq(k): + """Generate a single random sequence of nucleotides of length k. + """ + return "".join(random.choices(("A","T","G","C"), k=k)) + +def generate_sequences(n, k=10): + """Generate n random sequences of length k. + """ + return ( + SeqRecord(Seq(random_seq(k)), id=f"SEQ_{i}") + for i in range(1, n + 1) + ) + +@pytest.fixture +def sequences(): + return list(generate_sequences(3)) + +@pytest.fixture +def sequences_generator(): + return generate_sequences(3) + +@pytest.fixture +def fasta_filename(tmpdir, sequences): + filename = str(tmpdir / "sequences.fasta") + SeqIO.write(sequences, filename, "fasta") + return filename + +@pytest.fixture +def additional_fasta_filename(tmpdir, sequences): + filename = str(tmpdir / "additional_sequences.fasta") + SeqIO.write(sequences, filename, "fasta") + return filename + +@pytest.fixture +def gzip_fasta_filename(tmpdir, sequences): + filename = str(tmpdir / "sequences.fasta.gz") + + with gzip.open(filename, "wt") as oh: + SeqIO.write(sequences, oh, "fasta") + + return filename + +@pytest.fixture +def bzip2_fasta_filename(tmpdir, sequences): + filename = str(tmpdir / "sequences.fasta.bz2") + + with bz2.open(filename, "wt") as oh: + SeqIO.write(sequences, oh, "fasta") + + return filename + +@pytest.fixture +def lzma_fasta_filename(tmpdir, sequences): + filename = str(tmpdir / "sequences.fasta.xz") + + with lzma.open(filename, "wt") as oh: + SeqIO.write(sequences, oh, "fasta") + + return filename + +@pytest.fixture +def genbank_reference(): + return "tests/builds/zika/config/zika_outgroup.gb" + + +class TestReadSequences: + def test_read_sequences_from_single_file(self, fasta_filename): + sequences = read_sequences(fasta_filename, format="fasta") + assert len(list(sequences)) == 3 + + def test_read_sequences_from_multiple_files(self, fasta_filename, additional_fasta_filename): + sequences = read_sequences(fasta_filename, additional_fasta_filename, format="fasta") + assert len(list(sequences)) == 6 + + def test_read_sequences_from_multiple_files_or_buffers(self, fasta_filename, additional_fasta_filename): + with open(fasta_filename) as fasta_handle: + sequences = read_sequences(fasta_handle, additional_fasta_filename, format="fasta") + assert len(list(sequences)) == 6 + + def test_read_single_fasta_record(self, fasta_filename): + record = next(read_sequences(fasta_filename, format="fasta")) + assert record.id == "SEQ_1" + + def test_read_single_genbank_record(self, genbank_reference): + reference = next(read_sequences(genbank_reference, format="genbank")) + assert reference.id == "KX369547.1" + + def test_read_single_genbank_record_from_a_path(self, genbank_reference): + reference = next(read_sequences(Path(genbank_reference), format="genbank")) + assert reference.id == "KX369547.1" + + def test_read_sequences_from_single_gzip_file(self, gzip_fasta_filename): + sequences = read_sequences(gzip_fasta_filename, format="fasta") + assert len(list(sequences)) == 3 + + def test_read_sequences_from_single_lzma_file(self, lzma_fasta_filename): + sequences = read_sequences(lzma_fasta_filename, format="fasta") + assert len(list(sequences)) == 3 + + def test_read_sequences_from_single_bzip2_file(self, bzip2_fasta_filename): + sequences = read_sequences(bzip2_fasta_filename, format="fasta") + assert len(list(sequences)) == 3 + + def test_read_sequences_from_multiple_files_with_different_compression(self, fasta_filename, gzip_fasta_filename, lzma_fasta_filename): + sequences = read_sequences(fasta_filename, gzip_fasta_filename, lzma_fasta_filename, format="fasta") + assert len(list(sequences)) == 9 + + +class TestWriteSequences: + def test_write_sequences(self, tmpdir, sequences): + output_filename = Path(tmpdir) / Path("new_sequences.fasta") + sequences_written = write_sequences(sequences, output_filename, "fasta") + assert sequences_written == len(sequences) + + def test_write_genbank_sequence(self, tmpdir, genbank_reference): + output_filename = Path(tmpdir) / Path("new_sequences.fasta") + + reference = SeqIO.read(genbank_reference, "genbank") + sequences_written = write_sequences([reference], output_filename, "genbank") + assert sequences_written == 1 + + def test_write_sequences_from_generator(self, tmpdir, sequences_generator): + output_filename = Path(tmpdir) / Path("new_sequences.fasta") + sequences_written = write_sequences(sequences_generator, output_filename, "fasta") + assert sequences_written == 3 + + def test_write_single_set_of_sequences_to_gzip_file(self, tmpdir, sequences): + output_filename = Path(tmpdir) / Path("new_sequences.fasta.gz") + sequences_written = write_sequences(sequences, output_filename, "fasta") + assert sequences_written == len(sequences) + + with gzip.open(output_filename, "rt") as handle: + assert sequences_written == len([line for line in handle if line.startswith(">")]) + + def test_write_single_set_of_sequences_to_bzip2_file(self, tmpdir, sequences): + output_filename = Path(tmpdir) / Path("new_sequences.fasta.bz2") + sequences_written = write_sequences(sequences, output_filename, "fasta") + assert sequences_written == len(sequences) + + with bz2.open(output_filename, "rt") as handle: + assert sequences_written == len([line for line in handle if line.startswith(">")]) + + def test_write_single_set_of_sequences_to_lzma_file(self, tmpdir, sequences): + output_filename = Path(tmpdir) / Path("new_sequences.fasta.xz") + sequences_written = write_sequences(sequences, output_filename, "fasta") + assert sequences_written == len(sequences) + + with lzma.open(output_filename, "rt") as handle: + assert sequences_written == len([line for line in handle if line.startswith(">")]) + + def test_write_sequences_by_external_handle(self, tmpdir, sequences): + output_filename = Path(tmpdir) / Path("new_sequences.fasta") + + with open_file(output_filename, "w") as handle: + total_sequences_written = 0 + for sequence in sequences: + sequences_written = write_sequences( + sequence, + handle + ) + total_sequences_written += sequences_written + + with open(output_filename, "r") as handle: + assert total_sequences_written == len([line for line in handle if line.startswith(">")]) From 0a9d742f47c1a49332b54b6b13b1046a041eea92 Mon Sep 17 00:00:00 2001 From: John Huddleston Date: Tue, 9 Mar 2021 22:36:15 -0800 Subject: [PATCH 2/8] Support compressed inputs/outputs for index Adds support to augur index for compressed sequence inputs and index outputs. --- augur/index.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/augur/index.py b/augur/index.py index 3b0d883c6..34f339cab 100644 --- a/augur/index.py +++ b/augur/index.py @@ -8,11 +8,15 @@ import sys import csv +from .io import open_file, read_sequences + + def register_arguments(parser): parser.add_argument('--sequences', '-s', required=True, help="sequences in fasta format") parser.add_argument('--output', '-o', help="tab-delimited file containing the number of bases per sequence in the given file. Output columns include strain, length, and counts for A, C, G, T, N, other valid IUPAC characters, ambiguous characters ('?' and '-'), and other invalid characters.", required=True) parser.add_argument('--verbose', '-v', action="store_true", help="print index statistics to stdout") + def index_sequence(sequence, values): """Count the number of nucleotides for a given sequence record. @@ -127,13 +131,7 @@ def index_sequences(sequences_path, sequence_index_path): total length of sequences indexed """ - #read in files - try: - seqs = SeqIO.parse(sequences_path, 'fasta') - except ValueError as error: - print("ERROR: Problem reading in {}:".format(sequences_path), file=sys.stderr) - print(error, file=sys.stderr) - return 1 + seqs = read_sequences(sequences_path) other_IUPAC = {'r', 'y', 's', 'w', 'k', 'm', 'd', 'h', 'b', 'v'} values = [{'a'},{'c'},{'g'},{'t'},{'n'},other_IUPAC,{'-'},{'?'}] @@ -142,7 +140,7 @@ def index_sequences(sequences_path, sequence_index_path): tot_length = 0 num_of_seqs = 0 - with open(sequence_index_path, 'wt') as out_file: + with open_file(sequence_index_path, 'wt') as out_file: tsv_writer = csv.writer(out_file, delimiter = '\t') #write header i output file @@ -166,7 +164,12 @@ def run(args): ("?" and "-"), and other invalid characters in a set of sequences and write the composition as a data frame to the given sequence index path. ''' - num_of_seqs, tot_length = index_sequences(args.sequences, args.output) + try: + num_of_seqs, tot_length = index_sequences(args.sequences, args.output) + except ValueError as error: + print("ERROR: Problem reading in {}:".format(sequences_path), file=sys.stderr) + print(error, file=sys.stderr) + return 1 if args.verbose: print("Analysed %i sequences with an average length of %i nucleotides." % (num_of_seqs, int(tot_length / num_of_seqs))) From c77bcb77d5585ba3df99150b3dbb86bf3b90f452 Mon Sep 17 00:00:00 2001 From: John Huddleston Date: Wed, 30 Dec 2020 12:58:50 -0800 Subject: [PATCH 3/8] Support compress inputs/outputs for parse and mask Adds tests for augur parse and mask and then refactors these modules to use the new read/write interface. For augur parse, the refactor moves from an original for loop into its own `parse_sequence` function, adds tests for this new function, and updates the body of the `run` function to use this function inside the for loop. This commit also replaces the Bio.SeqIO read and write functions with the new `read_sequences` and `write_sequences` functions. These functions support compressed input and output files based on the filename extensions. For augur mask, the refactor moves logic for masking individual sequences into its own function and replaces Bio.SeqIO calls with new `read_sequences` and `write_sequences` functions. The refactoring of the `mask_sequence` function allows us to easily define a generator for the output sequences to write and make a single call to `write_sequences`. --- augur/mask.py | 69 +- augur/parse.py | 115 +- tests/functional/parse.t | 32 + tests/functional/parse/metadata.tsv | 13 + tests/functional/parse/sequences.fasta | 2138 ++++++++++++++++++++++++ tests/functional/parse/zika.fasta | 24 + tests/functional/parse/zika.fasta.gz | Bin 0 -> 10908 bytes tests/test_parse.py | 25 +- 8 files changed, 2361 insertions(+), 55 deletions(-) create mode 100644 tests/functional/parse.t create mode 100644 tests/functional/parse/metadata.tsv create mode 100644 tests/functional/parse/sequences.fasta create mode 100644 tests/functional/parse/zika.fasta create mode 100644 tests/functional/parse/zika.fasta.gz diff --git a/augur/mask.py b/augur/mask.py index 838529e12..ff2cd22e8 100644 --- a/augur/mask.py +++ b/augur/mask.py @@ -10,13 +10,14 @@ from Bio import SeqIO from Bio.Seq import MutableSeq +from .io import read_sequences, write_sequences from .utils import run_shell_command, shquote, open_file, is_vcf, load_mask_sites, VALID_NUCLEOTIDES def get_chrom_name(vcf_file): """Read the CHROM field from the first non-header line of a vcf file. - + Returns: - str or None: Either the CHROM field or None if no non-comment line could be found. + str or None: Either the CHROM field or None if no non-comment line could be found. """ with open_file(vcf_file, mode='r') as f: for line in f: @@ -73,6 +74,32 @@ def mask_vcf(mask_sites, in_file, out_file, cleanup=True): except OSError: pass + +def mask_sequence(sequence, mask_sites, mask_from_beginning, mask_from_end, mask_invalid): + # Convert to a mutable sequence to enable masking with Ns. + sequence_length = len(sequence.seq) + beginning, end = mask_from_beginning, mask_from_end + + if beginning + end > sequence_length: + beginning, end = sequence_length, 0 + + seq = str(sequence.seq)[beginning:-end or None] + + if mask_invalid: + seq = "".join(nuc if nuc in VALID_NUCLEOTIDES else "N" for nuc in seq) + + masked_sequence = MutableSeq("N" * beginning + seq + "N" * end) + + # Replace all excluded sites with Ns. + for site in mask_sites: + if site < sequence_length: + masked_sequence[site] = "N" + + sequence.seq = masked_sequence + + return sequence + + def mask_fasta(mask_sites, in_file, out_file, mask_from_beginning=0, mask_from_end=0, mask_invalid=False): """Mask the provided site list from a FASTA file and write to a new file. @@ -95,27 +122,27 @@ def mask_fasta(mask_sites, in_file, out_file, mask_from_beginning=0, mask_from_e """ # Load alignment as FASTA generator to prevent loading the whole alignment # into memory. - alignment = SeqIO.parse(in_file, "fasta") + alignment = read_sequences(in_file) # Write the masked alignment to disk one record at a time. print("Removing masked sites from FASTA file.") - with open_file(out_file, "w") as oh: - for record in alignment: - # Convert to a mutable sequence to enable masking with Ns. - sequence_length = len(record.seq) - beginning, end = mask_from_beginning, mask_from_end - if beginning + end > sequence_length: - beginning, end = sequence_length, 0 - seq = str(record.seq)[beginning:-end or None] - if mask_invalid: - seq = "".join(nuc if nuc in VALID_NUCLEOTIDES else "N" for nuc in seq) - sequence = MutableSeq("N" * beginning + seq + "N" * end) - # Replace all excluded sites with Ns. - for site in mask_sites: - if site < sequence_length: - sequence[site] = "N" - record.seq = sequence - SeqIO.write(record, oh, "fasta") + + masked_sequences = ( + mask_sequence( + sequence, + mask_sites, + mask_from_beginning, + mask_from_end, + mask_invalid, + ) + for sequence in alignment + ) + sequences_written = write_sequences( + masked_sequences, + out_file, + "fasta" + ) + def register_arguments(parser): parser.add_argument('--sequences', '-s', required=True, help="sequences in VCF or FASTA format") @@ -179,7 +206,7 @@ def run(args): sys.exit(1) mask_vcf(mask_sites, args.sequences, out_file, args.cleanup) else: - mask_fasta(mask_sites, args.sequences, out_file, + mask_fasta(mask_sites, args.sequences, out_file, mask_from_beginning=args.mask_from_beginning, mask_from_end=args.mask_from_end, mask_invalid=args.mask_invalid) diff --git a/augur/parse.py b/augur/parse.py index 02e1b82f5..ef61c7b0a 100644 --- a/augur/parse.py +++ b/augur/parse.py @@ -1,10 +1,10 @@ """ Parse delimited fields from FASTA sequence names into a TSV and FASTA file. """ - -from Bio import SeqIO import pandas as pd +from .io import open_file, read_sequences, write_sequences + forbidden_chactacters = str.maketrans( {' ': None, '(': '_', @@ -68,6 +68,63 @@ def prettify(x, trim=0, camelCase=False, etal=None, removeComma=False): return res +def parse_sequence(sequence, fields, strain_key="strain", separator="|", prettify_fields=None, fix_dates=None): + """Parse a single sequence record into a sequence record and associated metadata. + + Parameters + ---------- + sequence : Bio.SeqRecord.SeqRecord + a BioPython sequence record to parse with metadata stored in its description field. + + fields : list or tuple + a list of names for fields expected in the given record's description. + + strain_key : str + name of the field to use as the given sequence's unique id + + separator : str + delimiter to split record description by. + + prettify_fields : list or tuple + a list of field names for which the values in those fields should be prettified. + + fix_dates : str + parse "date" field into the requested canonical format ("dayfirst" or "monthfirst"). + + Returns + ------- + Bio.SeqRecord.SeqRecord : + a BioPython sequence record with the given sequence's name as the record + id and all other metadata stripped. + + dict : + metadata associated with the given record indexed by the given field names. + """ + sequence_fields = map(str.strip, sequence.description.split(separator)) + metadata = dict(zip(fields, sequence_fields)) + + tmp_name = metadata[strain_key].translate(forbidden_chactacters) + sequence.name = sequence.id = tmp_name + sequence.description = '' + + if prettify_fields: + for field in metadata.keys() & prettify_fields: + if isinstance(metadata[field], str): + metadata[field] = prettify(metadata[field], camelCase=(not field.startswith('author')), + etal='lower' if field.startswith('author') else None) + + # parse dates and convert to a canonical format + if fix_dates and 'date' in metadata: + metadata['date'] = fix_dates( + metadata['date'], + dayfirst=fix_dates=='dayfirst' + ) + + metadata["strain"] = sequence.id + + return sequence, metadata + + def register_arguments(parser): parser.add_argument('--sequences', '-s', required=True, help="sequences in fasta or VCF format") parser.add_argument('--output-sequences', help="output sequences file") @@ -84,7 +141,7 @@ def run(args): parse a fasta file and turn information in the header into a tsv or csv file. ''' - seqs = SeqIO.parse(args.sequences, 'fasta') + sequences = read_sequences(args.sequences) # if strain or name are found in specified fields, use this # field to index the dictionary and the data frame @@ -98,32 +155,26 @@ def run(args): strain_key = args.fields[0] # loop over sequences, parse fasta header of each sequence - with open(args.output_sequences, 'w', encoding='utf-8') as output: - for seq in seqs: - fields = map(str.strip, seq.description.split(args.separator)) - tmp_meta = dict(zip(args.fields, fields)) - - tmp_name = tmp_meta[strain_key].translate(forbidden_chactacters) - seq.name = seq.id = tmp_name - seq.description = '' - - if args.prettify_fields: - for field in tmp_meta.keys() & args.prettify_fields: - if isinstance(tmp_meta[field], str): - tmp_meta[field] = prettify(tmp_meta[field], camelCase=(not field.startswith('author')), - etal='lower' if field.startswith('author') else None) - - # parse dates and convert to a canonical format - if args.fix_dates and 'date' in tmp_meta: - tmp_meta['date'] = fix_dates(tmp_meta['date'], - dayfirst=args.fix_dates=='dayfirst') - - if 'strain' in tmp_meta: - del tmp_meta['strain'] - meta_data[seq.id] = tmp_meta - - SeqIO.write(seq, output, 'fasta') - - df = pd.DataFrame.from_dict(meta_data, orient='index') - df.to_csv(args.output_metadata, index_label='strain', - sep='\t' if args.output_metadata.endswith('tsv') else ',') + with open_file(args.output_sequences, "wt") as handle: + for sequence in sequences: + sequence_record, sequence_metadata = parse_sequence( + sequence, + args.fields, + strain_key, + args.separator, + args.prettify_fields, + args.fix_dates + ) + meta_data[sequence_record.id] = sequence_metadata + + sequences_written = write_sequences( + sequence_record, + handle + ) + + df = pd.DataFrame(meta_data.values()) + df.to_csv( + args.output_metadata, + index=False, + sep='\t' if args.output_metadata.endswith('tsv') else ',' + ) diff --git a/tests/functional/parse.t b/tests/functional/parse.t new file mode 100644 index 000000000..e9e65ca44 --- /dev/null +++ b/tests/functional/parse.t @@ -0,0 +1,32 @@ +Integration tests for augur parse. + + $ pushd "$TESTDIR" > /dev/null + $ export AUGUR="../../bin/augur" + +Parse Zika sequences into sequences and metadata. + + $ ${AUGUR} parse \ + > --sequences parse/zika.fasta \ + > --output-sequences "$TMP/sequences.fasta" \ + > --output-metadata "$TMP/metadata.tsv" \ + > --fields strain virus accession date region country division city db segment authors url title journal paper_url \ + > --prettify-fields region country division city + + $ diff -u "parse/sequences.fasta" "$TMP/sequences.fasta" + $ diff -u "parse/metadata.tsv" "$TMP/metadata.tsv" + $ rm -f "$TMP/sequences.fasta" "$TMP/metadata.tsv" + +Parse compressed Zika sequences into sequences and metadata. + + $ ${AUGUR} parse \ + > --sequences parse/zika.fasta.gz \ + > --output-sequences "$TMP/sequences.fasta" \ + > --output-metadata "$TMP/metadata.tsv" \ + > --fields strain virus accession date region country division city db segment authors url title journal paper_url \ + > --prettify-fields region country division city + + $ diff -u "parse/sequences.fasta" "$TMP/sequences.fasta" + $ diff -u "parse/metadata.tsv" "$TMP/metadata.tsv" + $ rm -f "$TMP/sequences.fasta" "$TMP/metadata.tsv" + + $ popd > /dev/null diff --git a/tests/functional/parse/metadata.tsv b/tests/functional/parse/metadata.tsv new file mode 100644 index 000000000..319d519b6 --- /dev/null +++ b/tests/functional/parse/metadata.tsv @@ -0,0 +1,13 @@ +strain virus accession date region country division city db segment authors url title journal paper_url +PAN/CDC_259359_V1_V3/2015 zika KX156774 2015-12-18 North America Panama Panama Panama genbank genome Shabman et al https://www.ncbi.nlm.nih.gov/nuccore/KX156774 Direct Submission Submitted (29-APR-2016) J. Craig Venter Institute, 9704 Medical Center Drive, Rockville, MD 20850, USA https://www.ncbi.nlm.nih.gov/pubmed/ +COL/FLR_00024/2015 zika MF574569 2015-12-XX South America Colombia Colombia Colombia genbank genome Pickett et al https://www.ncbi.nlm.nih.gov/nuccore/MF574569 Direct Submission Submitted (28-JUL-2017) J. Craig Venter Institute, 9704 Medical Center Drive, Rockville, MD 20850, USA https://www.ncbi.nlm.nih.gov/pubmed/ +PRVABC59 zika KU501215 2015-12-XX North America Puerto Rico Puerto Rico Puerto Rico genbank genome Lanciotti et al https://www.ncbi.nlm.nih.gov/nuccore/KU501215 Phylogeny of Zika Virus in Western Hemisphere, 2015 Emerging Infect. Dis. 22 (5), 933-935 (2016) https://www.ncbi.nlm.nih.gov/pubmed/27088323 +COL/FLR_00008/2015 zika MF574562 2015-12-XX South America Colombia Colombia Colombia genbank genome Pickett et al https://www.ncbi.nlm.nih.gov/nuccore/MF574562 Direct Submission Submitted (28-JUL-2017) J. Craig Venter Institute, 9704 Medical Center Drive, Rockville, MD 20850, USA https://www.ncbi.nlm.nih.gov/pubmed/ +Colombia/2016/ZC204Se zika KY317939 2016-01-06 South America Colombia Colombia Colombia genbank genome Quick et al https://www.ncbi.nlm.nih.gov/nuccore/KY317939 Multiplex PCR method for MinION and Illumina sequencing of Zika and other virus genomes directly from clinical samples Nat Protoc 12 (6), 1261-1276 (2017) https://www.ncbi.nlm.nih.gov/pubmed/28538739 +ZKC2/2016 zika KX253996 2016-02-16 Oceania American Samoa American Samoa American Samoa genbank genome Wu et al https://www.ncbi.nlm.nih.gov/nuccore/KX253996 Direct Submission Submitted (18-MAY-2016) Center for Diseases Control and Prevention of Guangdong Province; National Institute of Viral Disease Control and Prevention, China https://www.ncbi.nlm.nih.gov/pubmed/ +VEN/UF_1/2016 zika KX702400 2016-03-25 South America Venezuela Venezuela Venezuela genbank genome Blohm et al https://www.ncbi.nlm.nih.gov/nuccore/KX702400 Complete Genome Sequences of Identical Zika virus Isolates in a Nursing Mother and Her Infant Genome Announc 5 (17), e00231-17 (2017) https://www.ncbi.nlm.nih.gov/pubmed/28450510 +DOM/2016/BB_0059 zika KY785425 2016-04-04 North America Dominican Republic Dominican Republic Dominican Republic genbank genome Metsky et al https://www.ncbi.nlm.nih.gov/nuccore/KY785425 Zika virus evolution and spread in the Americas Nature 546 (7658), 411-415 (2017) https://www.ncbi.nlm.nih.gov/pubmed/28538734 +BRA/2016/FC_6706 zika KY785433 2016-04-08 South America Brazil Brazil Brazil genbank genome Metsky et al https://www.ncbi.nlm.nih.gov/nuccore/KY785433 Zika virus evolution and spread in the Americas Nature 546 (7658), 411-415 (2017) https://www.ncbi.nlm.nih.gov/pubmed/28538734 +DOM/2016/BB_0183 zika KY785420 2016-04-18 North America Dominican Republic Dominican Republic Dominican Republic genbank genome Metsky et al https://www.ncbi.nlm.nih.gov/nuccore/KY785420 Zika virus evolution and spread in the Americas Nature 546 (7658), 411-415 (2017) https://www.ncbi.nlm.nih.gov/pubmed/28538734 +EcEs062_16 zika KX879603 2016-04-XX South America Ecuador Ecuador Ecuador genbank genome Marquez et al https://www.ncbi.nlm.nih.gov/nuccore/KX879603 First Complete Genome Sequences of Zika Virus Isolated from Febrile Patient Sera in Ecuador Genome Announc 5 (8), e01673-16 (2017) https://www.ncbi.nlm.nih.gov/pubmed/28232448 +HND/2016/HU_ME59 zika KY785418 2016-05-13 North America Honduras Honduras Honduras genbank genome Metsky et al https://www.ncbi.nlm.nih.gov/nuccore/KY785418 Zika virus evolution and spread in the Americas Nature 546 (7658), 411-415 (2017) https://www.ncbi.nlm.nih.gov/pubmed/28538734 diff --git a/tests/functional/parse/sequences.fasta b/tests/functional/parse/sequences.fasta new file mode 100644 index 000000000..3e37aa57f --- /dev/null +++ b/tests/functional/parse/sequences.fasta @@ -0,0 +1,2138 @@ +>PAN/CDC_259359_V1_V3/2015 +gaatttgaagcgaatgctaacaacagtatcaacaggttttattttggatttggaaacgag +agtttctggtcatgaaaaacccaaaaaagaaatccggaggattccggattgtcaatatgc +taaaacgcggagtagcccgtgtgagcccctttgggggcttgaagaggctgccagccggac +ttctgctgggtcatgggcccatcaggatggtcttggcgattctagcctttttgagattca +cggcaatcaagccatcactgggtctcatcaatagatggggttcagtggggaaaaaagagg +ctatggaaataataaagaagttcaagaaagatctggctgccatgctgagaataatcaatg +ctaggaaggagaagaagagacgaggcgcagaaactagtgtcggaattgttggcctcctgc +tgaccacagctatggcagcggaggtcactagacgtgggagtgcatactatatgtacttgg +acagaaacgatgctggggaggccatatcttttccaaccacattggggatgaataagtgtt +atatacagatcatggatcttggacacatgtgtgatgccaccatgagctatgaatgcccta +tgctggatgagggggtggaaccagatgacgtcgattgttggtgcaacacgacgtcaactt +gggttgtgtacggaacctgccatcacaaaaaaggtgaagcacggagatctagaagagccg +tgacgctcccctcccattccactaggaagctgcaaacgcggtcgcaaacctggttggaat +caagagaatacacaaagcacttgattagagtcgaaaattggatattcaggaaccctggtt +tcgctttagcagcagctgccatcgcttggcttttgggaagctcaacgagccaaaaagtca +tatacttggtcatgatactgctgattgccccggcatacagcatcaggtgcataggagtca +gcaatagggactttgtggaaggtatgtcaggtgggacttgggttgatgtcgtcttggaac +atggaggttgtgtcaccgtaatggcacaggacaaaccgactgtcgacatagagctggtta +caacaacagtcagcaacatggcggaggtaagatcctactgctatgaggcatcaatatcag +acatggcttcggacagccgctgcccaacacaaggtgaagcctaccttgacaagcaatcag +acactcaatatgtctgcaaaagaacgttagtggacagaggctggggaaatggatgtggac +tttttggcaaagggagcctggtgacatgcgctaagtttgcatgctccaagaaaatgaccg +ggaagagcatccagccagagaatctggagtaccggataatgttgtcagttcatggctccc +agcacagtgggatgatcgttaatgacacaggacatgaaactgatgagaatagagcgaagg +ttgagataacgcccaattcaccaagagccgaagccaccctggggggttttggaagcctag +gacttgattgtgaaccgaggacaggccttgacttttcagatttgtattacttgactatga +ataacaagcactggttggttcacaaggagtggttccacgacattccattaccttggcacg +ctggggcagacaccggaactccacactggaacaacaaagaagcactggtagagttcaagg +acgcacatgccaaaaggcaaactgtcgtggttctagggagtcaagaaggagcagttcaca +cggcccttgctggagctctggaggctgagatggatggtgcaaagggaaggctgtcctctg +gccacttgaaatgtcgcctgaaaatggataaacttagattgaagggcgtgtcatactcct +tgtgtaccgcagcgttcacattcaccaagatcccggctgaaacactgcacgggacagtca +cagtggaggtacagtacgcagggacagatggaccttgcaaggttccagctcagatggcgg +tggacatgcaaactctgaccccagttgggaggttgataaccgctaaccccgtaatcactg +aaagcactgagaactctaagatgatgctggaacttgatccaccatttggggactcttaca +ttgtcataggagtcggggagaagaagatcacccaccactggcacaggagtggcagcacca +ttggaaaagcatttgaagccactgtgagaggtgccaagagaatggcagtcttgggagaca +cagcctgggactttggatcagttggaggcgctctcaactcattgggcaagggcatccatc +aaatttttggagcagctttcaaatcattgtttggaggaatgtcctggttctcacaaattc +tcattggaacgttgctgatgtggttgggtctgaacacaaagaatggatctatttccctta +tgtgcttggccttagggggagtgttgatcttcttatccacagccgtctctgctgatgtgg +ggtgctcggtggacttctcaaagaaggagacgagatgtggtacaggggtgttcgtctaca +acgacgttgaagcctggagggacaggtacaagtaccatcctgactccccccgtagattgg +cagcagcagtcaagcaagcctgggaagatggtatctgcgggatctcctctgtttcaagaa +tggaaaacatcatgtggagatcagtagaaggggagctcaacgcaatcctggaagagaatg +gagttcaactgacggtcgttgtgggatctgtaaaaaaccccatgtggagaggtccacaga +gattgcccgtgcctgtgaacgagctgccccacggctggaaggcttgggggaaatcgtact +tcgtcagagcagcaaagacaaataacagctttgtcgtggatggtgacacactgaaggaat +gcccactcaaacatagagcatggaacagctttcttgtggaggatcatgggttcggggtat +ttcacactagtgtctggctcaaggttagagaagattattcattagagtgtgatccagccg +ttattggaacagctgttaagggaaaggaggctgtacacagtgatctaggctactggattg +agagtgagaagaatgacacatggaggctgaagagggcccatctgatcgagatgaaaacat +gtgaatggccaaagtcccacacattgtggacagatggaatagaagagagtgatctgatca +tacccaagtctttagctgggccactcagccatcacaataccagagagggctacaggaccc +aaatgaaagggccatggcacagtgaagagcttgaaattcggtttgaggaatgcccaggca +ctaaggtccacgtggaggaaacatgtggaacaagaggaccatctctgagatcaaccactg +caagcggaagggtgatcgaggaatggtgctgcagggagtgcacaatgcccccactgtcgt +tctgggctaaagatggctgttggtatggaatggagataaggcccaggaaagaaccagaaa +gcaacttagtaaggtcaatggtgactgcaggatcaactgatcacatggatcacttctccc +ttggagtgcttgtgattctgctcatggtgcaggaagggctgaagaagagaatgaccacaa +agatcatcataagcacatcaatggcagtgctggtagctatgatcctgggaggattttcaa +tgagtgacctggctaagcttgcaatcttgatgggtgccaccttcgcggaaatgaacactg +gaggagatgtggctcatctggcgctgatagcagcattcaaagtcagaccagcgttgctgg +tatctttcatcttcagagctaattggacaccccgtgaaagcatgctgctggccttggcct +cgtgtcttttgcaaactgcgatctccgccttggagggcgacctgatggttctcatcaatg +gttttgctttggcctggttggcaatacgagcgatggttgttccacgcactgacaacatca +ccttggcaatcctggctgctctgacaccactggcccggggcacactgcttgtggcgtgga +gagcaggccttgctacttgcggggggtttatgctcctctctctgaagggaaaaggcagtg +tgaagaagaacttaccatttgtcatggccctgggactaaccgctgtgaggctggtcgacc +ccatcaacgtggtgggactgctgttgctcacaaggagtgggaagcggagctggcccccta +gcgaagtactcacagctgttggcctgatatgcgcattggctggagggttcgccaaggcag +atatagagatggctgggcccatagccgcggtcggtctgctaattgtcagttacgtggtct +caggaaagagtgtggacatgtacattgaaagagcaggtgacatcacatgggaaaaagatg +cggaagtcactggaaacagtccccggctcgatgtggcgctagatgagagtggtgatttct +ccctggtggaggatgacggtccccccatgagagagatcatactcaaggtggtcctgatga +ccatctgtggcatgaacccaatagccataccctttgcagctggagcgtggtacgtatacg +tgaagactggaaaaaggagtggtgctctatgggatgtgcctgctcccaaggaagtaaaaa +agggggagaccacagatggagtgtacagagtaatgactcgtagactgctaggttcaacac +aagttggagtgggagttatgcaagagggggtctttcacactatgtggcacgtcacaaaag +gatccgcgctgagaagcggtgaagggagacttgatccatactggggagatgtcaagcagg +atctggtgtcatactgtggtccatggaagctagatgccgcctgggacgggcacagcgagg +tgcagctcttggccgtgccccccggagagagagcgaggaacatccagactctgcccggaa +tatttaagacaaaggatggggacattggagcggttgcgctggattacccagcaggaactt +caggatctccaatcctagacaagtgtgggagagtgataggactttatggcaatggggtcg +tgatcaaaaatgggagttatgttagtgccatcacccaagggaggagggaggaagagactc +ctgttgagtgcttcgagccttcgatgctgaagaagaagcagctaactgtcttagacttgc +atcctggagctgggaaaaccaggagagttcttcctgaaatagtccgtgaagccataaaaa +caagactccgtactgtgatcttagctccaaccagggttgtcgctgctgaaatggaggaag +cccttagagggcttccagtgcgttatatgacaacagcagtcaatgtcacccactctggaa +cagaaatcgtcgacttaatgtgccatgccaccttcacttcacgtctactacagccaatca +gagtccccaactataatctgtatattatggatgaggcccacttcacagatccctcaagta +tagcagcaagaggatacatttcaacaagggttgagatgggcgaggcggctgccatcttca +tgaccgccacgccaccaggaacccgtgacgcatttccggactccaactcaccaattatgg +acaccgaagtggaagtcccagagagagcctggagctcaggctttgattgggtgacggatc +attctggaaaaacagtttggtttgttccaagcgtgaggaacggcaatgagatcgcagctt +gtctgacaaaggctggaaaacgggtcatacagctcagcagaaagacttttgagacagagt +tccagaaaacaaaacatcaagagtgggactttgtcgtgacaactgacatttcagagatgg +gcgccaactttaaagctgaccgtgtcatagattccaggagatgcctaaagccggtcatac +ttgatggcgagagagtcattctggctggacccatgcctgtcacacatgccagcgctgccc +agaggagggggcgcataggcaggaatcccaataaacctggagatgagtatctgtatggag +gtgggtgcgcagagactgacgaagaccatgcacactggcttgaagcaagaatgctccttg +acaatatttacctccaagatggcctcatagcctcgctctatcgacctgaggccgacaaag +tagcagccattgagggagagttcaagcttaggacggagcaaaggaagacctttgtggaac +tcatgaaaagaggagatcttcctgtttggctggcctatcaggttgcatctgccggaataa +cctacacagatagaagatggtgctttgatggcacgaccaacaacaccataatggaagaca +gtgtgccggcagaggtgtggaccagacacggagagaaaagagtgctcaaaccgaggtgga +tggacgccagagtttgttcagatcatgcggccctgaagtcattcaaggagtttgccgctg +ggaaaagaggagcggcttttggagtgatggaagccctgggaacactgccaggacacatga +cagagagattccaggaagccattgacaacctcgctgtgctcatgcgggcagagactggaa +gcaggccttacaaagccgcggcggcccaattgccggagaccctagagaccattatgcttt +tggggttgctgggaacagtctcgctgggaatctttttcgtcttgatgaggaacaagggca +tagggaagatgggctttggaatggtgactcttggggccagcgcatggctcatgtggctct +cggaaattgagccagccagaattgcatgtgtcctcattgttgtgttcctattgctggtgg +tgctcatacctgagccagaaaagcaaagatctccccaggacaaccaaatggcaatcatca +tcatggtagcagtaggtcttctgggcttgattaccgccaatgaactcggatggttggaga +gaacaaagagtgacctaagccatctaatgggaaggagagaggagggggcaaccataggat +tctcaatggacattgacctgcggccagcctcagcttgggccatctatgctgccttgacaa +ctttcattaccccagctgtccaacatgcagtgaccacttcatacaacaactactccttaa +tggcgatggccacgcaagctggagtgttgtttggtatgggcaaagggatgccattctacg +catgggactttggagtcccgctgctaatgataggttgctactcacaattaacacccctga +ccctaatagtggccatcattttgctcgtggcgcactacatgtacttgatcccagggctgc +aggcagcagctgcgcgtgctgcccagaagagaacggcagctggcatcatgaagaaccctg +ttgtggatggaatagtggtgactgacattgacacaatgacaattgacccccaagtggaga +aaaagatgggacaggtgctactcatagcagtagccgtctccagcgccatactgtcgcgga +ccgcctgggggtggggggaggctggggccctgatcacagccgcaacttccactttgtggg +aaggctctccgaacaagtactggaactcctctacagccacttcactgtgtaacattttta +ggggaagttacttggctggagcttctctaatctacacagtaacaagaaacgctggcttgg +tcaagagacgtgggggtggaacaggagagaccctgggagagaaatggaaggcccgcttga +accagatgtcggccctggagttctactcctacaaaaagtcaggcatcaccgaggtgtgca +gagaagaggcccgccgcgccctcaaggacggtgtggcaacgggaggccatgctgtgtccc +gaggaagtgcaaagctgagatggttggtggagcggggatacctgcagccctatggaaagg +tcattgatcttggatgtggcagagggggctggagttactacgccgccaccatccgcaaag +ttcaagaagtgaaaggatacacaaaaggaggccctggtcatgaagaacccgtgttggtgc +aaagctatgggtggaacatagtccgtcttaagagtggggtggacgtctttcatatggcgg +ctgagccgtgtgacacgttgctgtgtgacataggtgagtcatcatctagtcctgaagtgg +aagaagcacggacgctcagagtcctctccatggtgggggattggcttgaaaaaagaccag +gagccttttgtataaaagtgttgtgcccatacaccagcactatgatggaaaccctggagc +gactgcagcgtaggtatgggggaggactggtcagagtgccactctcccgcaactctacac +atgagatgtactgggtctctggagcgaaaagcaacaccataaaaagtgtgtccaccacga +gccagctcctcttggggcgcatggacgggcctaggaggccagtgaaatatgaggaggatg +tgaatctcggctctggcacgcgggctgtggtaagctgcgctgaagctcccaacatgaaga +tcattggtaaccgcattgaaaggatccgcagtgagcacgcggaaacgtggttctttgacg +agaaccacccatataggacatgggcttaccatggaagctatgaggcccccacacaagggt +cagcgtcctctctaataaacggggttgtcaggctcctgtcaaaaccctgggatgtggtga +ctggagtcacaggaatagccatgaccgacaccacaccgtatggtcagcaaagagttttca +aggaaaaagtggacactagggtgccagacccccaagaaggcactcgtcaggttatgagca +tggtctcttcctggttgtggaaagagctaggcaaacacaaacggccacgagtctgtacca +aagaagagttcatcaacaaggttcgtagcaatgcagcattaggggcaatatttgaagagg +aaaaagagtggaagactgcagtggaagctgtgaacgatccaaggttctgggctctagtgg +acaaggaaagagagcaccacctgagaggagagtgccagagttgtgtgtacaacatgatgg +gaaaaagagaaaagaaacaaggggaatttggaaaggccaagggcagccgcgccatctggt +atatgtggctaggggctagatttctagagttcgaagcccttggattcttgaacgaggatc +actggatggggagagagaactcaggaggtggtgttgaagggctgggattacaaagactcg +gatatgtcctagaagagatgagtcgcataccaggaggaaggatgtatgcagatgacactg +ctggctgggacacccgcattagcaggtttgatctggagaatgaagctctaatcaccaacc +aaatggagaaagggcacagggccttggcattggccataatcaagtacacataccaaaaca +aagtggtaaaggtccttagaccagctgaaaaagggaaaacagttatggacattatttcga +gacaagaccaaagggggagcggacaagttgtcacttacgctcttaacacatttaccaacc +tagtggtgcaactcattcggaatatggaggctgaggaagttctagagatgcaagacttgt +ggctgctgcggaggtcagagaaagtgaccaactggttgcagagcaacggatgggataggc +tcaaacgaatggcagtcagtggagatgattgcgttgtgaagccaattgatgataggtttg +cacatgccctcaggttcttgaatgatatgggaaaagttaggaaggacacacaagagtgga +aaccctcaactggatgggacaactgggaagaagttccgttttgctcccaccacttcaaca +agctccatctcaaggacgggaggtccattgtggttccctgccgccaccaagatgaactga +ttggccgggcccgcgtctctccaggggcgggatggagcatccgggagactgcttgcctag +caaaatcatatgcgcaaatgtggcagctcctttatttccacagaagggacctccgactga +tggccaatgccatttgttcatctgtgccagttgactgggttccaactgggagaactacct +ggtcaatccatggaaagggagaatggatgaccactgaagacatgcttgtggtgtggaaca +gagtgtggattgaggagaacgaccacatggaagacaagaccccagttgcgaaatggacag +acattccctatttgggaaaaagggaagacttgtggtgtggatctctcatagggcacagac +cgcgcaccacctgggctgagaacattaaaaacacagtcaacatggtgcgcaggatcatag +gtgatgaagaaaagtacatggactacctatccacccaagttcgctacttgggtgaagaag +ggtccacacctggagtgctgtaagcaccaatcttaatgttgtcaggcctgctagtcagcc +acagcttggggaaagctgtgcagcctgtgacccccccaggagaagctgggaaaccaagcc +tatagtcaggccgagaacgccatggcacggaagaagccatgctgcctgtgagcccctcag +aggacactgagtcaaaaaaccccacgcgcttggaggcgcaggatgggaaaagaaggtggc +gaccttccccacccttcaatctggggcctgaactggagatcagctgtggatctccagaag +agggactagtggttagaggagaccccccggaaaacgcaaaacagcatattgacgctggga +aagaccagagactccatgagtttccaccacgctggccgccaggcacagatcgccgaatag +cggcggccggtgtggggaaatccatgggtct +>COL/FLR_00024/2015 +tcagactgcgacagttcgagtttgaagcgaaagctagcaacagtatcaacaggttttatt +ttggatttggaaacgagagtttctggtcatgaaaaacccaaaaaagaaatccggaggatt +ccggattgtcaatatgctaaaacgcggagtagcccgtgtgagcccctttgggggcttgaa +gaggctgccagccggacttctgctgggtcatgggcccatcaggatggtcttggcgattct +agcctttttgagattcacggcaatcaagccatcactgggtctcatcaatagatggggttc +agtggggaaaaaagaggctatggaaacaataaagaagttcaagaaagatctggctgccat +gctgagaataatcaatgctaggaaggagaagaagagacgaggcgcagaaactagtgtcgg +aattgttggcctcctgctgaccacagctatggcagcggaggtcactagacgtgggagtgc +atactatatgtacttggacagaaacgatgctggggaggccatatcttttccaaccacatt +ggggatgaataagtgttatatacagatcatggatcttggacacatgtgtgatgccaccat +gagctatgaatgccctatgctggatgagggggtggaaccagatgacgtcgattgttggtg +caacacgacgtcaacttgggttgtgtacggaacctgccatcacaaaaaaggtgaagcacg +gagatctagaagagccgtgacgctcccctcccattccactaggaagctgcaaacgcggtc +gcaaacctggttggaatcaagagaatacacaaagcacttgattagagtcgaaaattggat +attcaggaaccctggtttcgctttagcagcagctgccatcgcttggcttttgggaagctc +aacgagccaaaaagtcatatacttggtcatgatactgctgattgccccggcatacagcat +caggtgcataggagtcagcaatagggactttgtggaaggtatgtcaggtgggacttgggt +tgatgtcgtcttggaacatggaggttgtgtcaccgtaatggcacaggacaaaccgactgt +cgacatagagctggttacaacaacagtcagcaacatggcggaggtaagatcctactgcta +tgaggcatcaatatcagacatggcttcggacagccgctgcccaacacaaggtgaagccta +ccttgagaagcaatcagacactcaatatgtctgcaaaagaacgttagtggacagaggctg +gggaaatggatgtggactttttggcaaagggagcctggtgacatgcgctaagtttgcatg +ctccaagaaaatgaccgggaagagcatccagccagagaatctggagtaccggataatgtt +gtcagttcatggctcccagcacagtgggatgatcgttaatgacacaggacatgaaactga +tgagaatagagcgaaggttgagataacgcccaattcaccaagagccgaagccaccctggg +gggctttggaagcctaggacttgattgtgaaccgaggacaggccttgacttttcagattt +gtattacttgactatgaataacaagcactggttggttcacaaggagtggttccacgacat +tccattaccttggcacgctggggcagacaccggaactccacactggaacaacaaagaagc +actggtagagttcaaggacgcacatgccaaaaggcaaactgtcgtggttctagggagtca +agaaggagcagttcacacggcccttgctggagctctggaggctgagatggatggtgcaaa +gggaaggctgtcctctggccacttgaaatgtcgcctgaaaatggataaacttagattgaa +gggcgtgtcatactccttgtgtaccgcagcgttcacattcaccaagatcccggctgaaac +actgcacgggacagtcacagtggaggtacagtacgcagggacagatggaccttgcaaggt +tccagctcagatggcggtggacatgcaaactctgaccccagttgggaggttgataaccgc +taaccccgtaatcactgaaagcactgagaactctaagatgatgctggaacttgatccacc +atttggggactcttacattgtcataggagtcggggagaagaagatcacccaccactggca +caggagtggcagcaccattggaaaagcatttgaagccactgtgagaggtgccaagagaat +ggcagtcttgggagacacagcctgggactttggatcagttggaggcgctctcaactcatt +gggcaagggcatccatcaaatttttggagcagctttcaaatcattgtttggaggaatgtc +ctggttctcacaaattctcattggaacgttgctgatgtggttgggtctgaacacaaagaa +tggatctatttcccttatgtgcttggccttagggggagtgttgatcttcttatccacagc +cgtctctgctgatgtggggtgctcggtggacttctcaaagaaggagacgagatgtggtac +aggggtgttcgtctataacgacgttgaagcctggagggacaggtacaagtaccatcctga +ctccccccgtagattggcagcagcagtcaagcaagcctgggaagatggtatctgcgggat +ctcctctgtttcaagaatggaaaacatcatgtggagatcagtagaaggggagctcaacgc +aatcctggaagagaatggagttcaactgacggtcgttgtgggatctgtaaaaaaccccat +gtggagaggtccacagagattgcccgtgcctgtgaacgagctgccccacggctggaaggc +ttgggggaaatcgtacttcgtcagagcagcaaagacaaataacagctttgtcgtggatgg +tgacacactgaaagaatgcccactcaaacatagagcatggaacagctttcttgtggagga +tcatgggttcggggtatttcacactagtgtctggctcaaggttagagaagattattcatt +agagtgtgatccagccgttattggaacagctgttaagggaaaggaggctgtacacagtga +tctaggctactggattgagagtgagaagaatgacacatggaggctgaagagggcccatct +gatcgagatgaaaacatgtgaatggccaaagtcccacacattgtggacagatggaataga +agagagtgatctgatcatacccaagtctttagctgggccactcagccatcacaataccag +agagggctacaggacccaaatgaaagggccatggcacagtgaagagcttgaaattcggtt +tgaggaatgcccaggcactaaggtccacgtggaggaaacatgtggaacaagaggaccatc +tctgagatcaaccactgcaagcggaagggtgatcgaggaatggtgctgcagggagtgcac +aatgcccccactgtcgttctgggctaaagatggctgttggtatggaatggagataaggcc +caggaaagaaccagaaagcaacttagtaaggtcaatggtgactgcaggatcaactgatca +catggatcacttctcccttggagtgcttgtgattctgctcatggtgcaggaagggctgaa +gaagagaatgaccacaaagatcatcataagcacatcaatggcagtgctggtagctatgat +cctgggaggattttcaatgagtgacctggctaagcttgcaatcttgatgggtgccacctt +cgcggaaatgaacactggaggagatgtagctcatctggcgctgatagcggcattcaaagt +cagaccagcgttgctggtatccttcatcttcagagctaattggacaccccgtgaaagcat +gctgctggccttggcctcgtgtcttttgcaaactgcgatctccgccttggagggcgacct +gatggttctcatcaatggttttgctttggcctggttggcaatacgagcgatggttgttcc +acgcactgacaacatcaccttggcaatcctggctgctctgacaccactggcccggggcac +actgcttgtggcgtggagagcaggccttgctacttgcggggggtttatgctcctctctct +gaagggaaaaggcagtgtgaagaagaacttaccatttgtcatggccctgggactaaccgc +tgtgaggctggtcgaccccatcaacgtggtgggactgctgttgctcacaaggagtgggaa +gcggagctggccccctagcgaagtactcacagctgttggcctgatatgcgcattggctgg +agggttcgccaaggcagatatagagatggctgggcccatggccgcggttggtctgctaat +tgtcagttacgtggtctcaggaaagagtgtggacatgtacattgaaagagcaggtgacat +cacatgggaaaaagatgcggaagtcactggaaacagtccccggctcgatgtggcgctaga +tgagagtggtgatttctccctggtggaggatgacggtccccccatgagagagatcatact +caaggtggtcctgatgaccatctgtggcatgaacccaatagccataccctttgcagctgg +agcgtggtacgtatacgtgaagactggaaaaaggagtggtgcgctatgggatgtgcctgc +tcccaaggaagtaaaaaagggggagaccacagatggagtgtacagagtaatgactcgtag +actgctaggttcaacacaagttggagtgggagttatgcaagagggggtctttcacactat +gtggcacgtcacaaaaggatccgcgctgagaagcggtgaagggagacttgatccatactg +gggagatgtcaagcaggatctggtgtcatactgtggtccatggaagctagatgccgcctg +ggacgggcacagcgaggtgcagctcttggccgtgccccccggagagagagcgaggaacat +ccagactctgcccggaatatttaagacaaaggatggggacattggagcggttgcgctgga +ttacccagcaggaacttcaggatctccaatcctagacaagtgtgggagagtgataggact +ttatggcaatggggtcgtgatcaaaaatgggagttatgttagtgccatcacccaagggag +gagggaggaagagactcctgttgagtgcttcgagccttcgatgctgaagaagaagcagct +aactgtcttagacttgcatcctggagctgggaaaaccaggagagttcttcctgaaatagt +ccgtgaagccataaaaacaagactccgtactgtgatcttagctccaaccagggttgtcgc +tgctgaaatggaggaagcccttagagggcttccagtgcgttatatgacaacagcagtcaa +tgtcacccactctggaacagaaatcgtcgacttaatgtgccatgccaccttcacttcacg +tctactacagccaatcagagtccccaactataatctgtatattatggatgaggcccactt +cacagatccctcaagtatagcagcaagaggatacatttcaacaagggttgagatgggcga +ggcggctgccatcttcatgaccgccacgccaccaggaacccgtgacgcatttccggactc +caactcaccaattatggacaccgaagtggaagtcccagagagagcctggagctcaggctt +tgattgggtgacggatcattctggaaaaacagtttggtttgttccaagcgtgaggaacgg +caatgagatcgcagcttgtctgacaaaggctggaaaacgggtcatacagctcagcagaaa +gacttttgagacagagttccagaaaacaaaacatcaagagtgggactttgtcgtgacaac +tgacatttcagagatgggcgccaactttaaagctgaccgtgtcatagattccaggagatg +cctaaagccggtcatacttgatggcgagagagtcattctggctggacccatgcctgtcac +acatgccagcgctgcccagaggagggggcgcataggcaggaatcccaataaacctggaga +tgagtatctgtatggaggtgggtgcgcagagactgacgaagaccatgcacactggcttga +agcaagaatgctccttgacaatatttacctccaagatggcctcatagcctcgctctatcg +acctgaggccgacaaagtagcagccattgagggagagttcaagcttaggacggagcaaag +gaagacctttgtggaactcatgaaaagaggagatcttcctgtttggctggcctatcaggt +tgcatctgccggaataacctacacagatagaagatggtgctttgatggcacgaccaacaa +caccataatggaagacagtgtgccggcagaggtgtggaccagacacggagagaaaagagt +gctcaaaccgaggtggatggacgccagagtttgttcagatcatgcggccctgaagtcatt +caaggagtttgccgctgggaaaagaggagcggcttttggagtgatggaagccctgggaac +actgccaggacacatgacagagagattccaggaagccattgacaacctcgctgtgctcat +gcgggcagagactggaagcaggccttacaaagccgcggcggcccaattgccggagaccct +agagaccattatgcttttggggttgctgggaacagtctcgttgggaatctttttcgtctt +gatgaggaacaagggcatagggaagatgggctttggaatggtgactcttggggccagcgc +atggctcatgtggctctcggaaattgagccagccagaattgcatgtgtcctcattgttgt +gttcctattgctggtggtgctcatacctgagccagaaaagcaaagatctccccaggacaa +ccaaatggcaatcatcatcatggtagcagtaggtcttctgggcttgattaccgccaatga +actcggatggttggagagaacaaagagtgacctaagccatctaatgggaaggagagagga +gggggcaaccataggattctcaatggacattgacctgcggccagcctcagcttgggccat +ctatgctgccttgacaactttcattaccccagccgtccaacatgcagtgaccacttcata +caacaactactccttaatggcgatggccacgcaagctggagtgttgtttggtatgggcaa +agggatgccattctacgcatgggactttggagtcccgctgctaatgataggttgctactc +acaattaacacccctgaccctaatagtggccatcattttgctcgtggcgcactacatgta +cttgatcccagggctgcaggcagcagctgcgcgtgctgcccagaagagaacggcagctgg +catcatgaagaaccctgttgtggatggaatagtggtgactgacattgacacaatgacaat +tgacccccaagtggagaaaaagatgggacaggtgctactcatagcagtagccgtctccag +cgccatactgtcgcggaccgcctgggggtggggggaggctggggccctgatcacagccgc +aacttccactttgtgggaaggctctccgaacaagtactggaactcctctacagccacttc +actgtgtaacatttttaggggaagttacttggctggagcttctctaatctacacagtaac +aagaaacgctggcttggtcaagagacgtgggggtggaacaggagagaccctgggagagaa +atggaaggcccgcttgaaccagatgtcggccctggagttctactcctacaaaaagtcagg +catcaccgaggtgtgcagagaagaggcccgccgcgccctcaaggacggtgtggcaacggg +aggccatgctgtgtcccgaggaagtgcaaagctgagatggttggtggagcggggatacct +gcagccctatggaaaggtcattgatcttggatgtggcagagggggctggagttactacgc +cgccaccatccgcaaagttcaagaagtgaaaggatacacaaaaggaggccctggtcatga +agaacccgtgttggtgcaaagctatgggtggaacatagtccgtcttaagagtggggtgga +cgtctttcatatggcggctgagccgtgtgacacgttgctgtgtgacataggtgagtcatc +atctagtcctgaagtggaagaagcacggacgctcagagtcctctccatggtgggggattg +gcttgaaaaaagaccaggagccttttgtataaaagtgttgtgcccatacaccagcactat +gatggaaaccctggagcgactgcagcgtaggtatgggggaggactggtcagagtgccact +ctcccgcaactctacacatgagatgtactgggtctctggagcgaaaagcaacaccataaa +aagtgtgtccaccacgagccagctcctcttggggcgcatggacgggcctaggaggccagt +gaaatatgaggaggatgtgaatctcggctctggcacgcgggctgtggtaagctgcgctga +agctcccaacatgaagatcattggtaaccgcattgaaaggatccgcagtgagcacgcgga +aacgtggttctttgacgagaaccacccatataggacatgggcttaccatggaagctatga +ggcccccacacaagggtcagcgtcctctctaataaacggggttgtcaggctcctgtcaaa +accctgggatgtggtgactggagtcacaggaatagccatgaccgacaccacaccgtatgg +tcagcaaagagttttcaaggaaaaagtggacactagggtgccagacccccaagaaggcac +tcgtcaggttatgagcatggtctcttcctggttgtggaaagagctaggcaaacacaaacg +gccacgagtctgtaccaaagaagagttcatcaacaaggtgcgtagcaatgcagcattagg +ggcaatatttgaagaggaaaaagagtggaagactgcagtggaagctgtgaacgatccaag +gttctgggctctagtggacaaggaaagagagcaccacctgagaggagagtgccagagttg +tgtgtacaacatgatgggaaaaagagaaaagaaacaaggggaatttggaaaggccaaggg +cagccgcgccatctggtatatgtggctaggggctagatttctagagttcgaagcccttgg +attcttgaacgaggatcactggatggggagagagaactcaggaggtggtgttgaagggct +gggattacaaagactcggatatgtcctagaagagatgagtcgcataccaggaggaaggat +gtatgcagatgacactgctggctgggacacccgcattagcaggtttgatctggagaatga +agctctaatcaccaaccaaatggagaaagggcacagggccttggcattggccataatcaa +gtacacataccaaaacaaagtggtaaaggtccttagaccagctgaaaaagggaaaacagt +tatggacattatttcgagacaagaccaaagggggagcggacaagttgtcacttacgctct +taacacatttaccaacctagtggtgcaactcattcggaatatggaggctgaggaagttct +agagatgcaagacttgtggctgctgcggaggtcagagaaagtgaccaactggttgcagag +caacggatgggataggctcaaacgaatggcagtcagtggagatgattgcgttgtgaagcc +aattgatgataggtttgcacatgccctcaggttcttgaatgatatgggaaaagttaggaa +ggacacacaagagtggaaaccctcaactggatgggacaactgggaagaagttccgttttg +ctcccaccacttcaacaagctccatctcaaggacgggaggtccattgtggttccctgccg +ccaccaagatgaactgattggccgggcccgcgtctctccaggggcgggatggagcatccg +ggagactgcttgcctagcaaaatcatatgcgcaaatgtggcagctcctttatttccacag +aagggacctccgactgatggccaatgccatttgttcatctgtgccagttgactgggttcc +aactgggagaactacctggtcaatccatggaaagggagaatggatgaccactgaagacat +gcttgtggtgtggaacagagtgtggattgaggagaacgaccacatggaagacaagacccc +agttgcgaaatggacagacattccctatttgggaaaaagggaagacttgtggtgtggatc +tctcatagggcacagaccgcgcaccacctgggctgagaacattaaaaacacagtcaacat +ggtgcgcaggatcataggtgatgaagaaaagtacatggactacctatccacccaagttcg +ctacttgggtgaagaagggtctacacctggagtgctgtaagcaccaatcttaatgttgtc +aggcctgctagtcagccacagcttggggaaagctgtgcagcctgtgacccccccaggaga +agctgggaaaccaagcctatagtcaggccgagaacgccatggcacggaagaagccatgct +gcctgtgagcccctcagaggacactgagtcaaaaaaccccacgcgcttggaggcgcagga +tgggaaaagaaggtggcgaccttccccacccttcaatctggggcctgaactggagatcag +ctgtggatctccagaagagggactagtggttagaggaga +>PRVABC59 +gttgttgatctgtgtgaatcagactgcgacagttcgagtttgaagcgaaagctagcaaca +gtatcaacaggttttattttggatttggaaacgagagtttctggtcatgaaaaacccaaa +aaagaaatccggaggattccggattgtcaatatgctaaaacgcggagtagcccgtgtgag +cccctttgggggcttgaagaggctgccagccggacttctgctgggtcatgggcccatcag +gatggtcttggcgattctagcctttttgagattcacggcaatcaagccatcactgggtct +catcaatagatggggttcagtggggaaaaaagaggctatggaaacaataaagaagttcaa +gaaagatctggctgccatgctgagaataatcaatgctaggaaggagaagaagagacgagg +cgcagatactagtgtcggaattgttggcctcctgctgaccacagctatggcagcggaggt +cactagacgtgggagtgcatactatatgtacttggacagaaacgatgctggggaggccat +atcttttccaaccacattggggatgaataagtgttatatacagatcatggatcttggaca +catgtgtgatgccaccatgagctatgaatgccctatgctggatgagggggtggaaccaga +tgacgtcgattgttggtgcaacacgacgtcaacttgggttgtgtacggaacctgccatca +caaaaaaggtgaagcacggagatctagaagagctgtgacgctcccctcccattccaccag +gaagctgcaaacgcggtcgcaaacctggttggaatcaagagaatacacaaagcacttgat +tagagtcgaaaattggatattcaggaaccctggcttcgcgttagcagcagctgccatcgc +ttggcttttgggaagctcaacgagccaaaaagtcatatacttggtcatgatactgctgat +tgccccggcatacagcatcaggtgcataggagtcagcaatagggactttgtggaaggtat +gtcaggtgggacttgggttgatgttgtcttggaacatggaggttgtgtcaccgtaatggc +acaggacaaaccgactgtcgacatagagctggttacaacaacagtcagcaacatggcgga +ggtaagatcctactgctatgaggcatcaatatcagacatggcttctgacagccgctgccc +aacacaaggtgaagcctaccttgacaagcaatcagacactcaatatgtctgcaaaagaac +gttagtggacagaggctggggaaatggatgtggactttttggcaaagggagcctggtgac +atgcgctaagtttgcatgctccaagaaaatgaccgggaagagcatccagccagagaatct +ggagtaccggataatgctgtcagttcatggctcccagcacagtgggatgatcgttaatga +cacaggacatgaaactgatgagaatagagcgaaagttgagataacgcccaattcaccgag +agccgaagccaccctggggggttttggaagcctaggacttgattgtgaaccgaggacagg +ccttgacttttcagatttgtattacttgactatgaataacaagcactggttggttcacaa +ggagtggttccacgacattccattaccttggcacgctggggcagacaccggaactccaca +ctggaacaacaaagaagcactggtagagttcaaggacgcacatgccaaaaggcaaactgt +cgtggttctagggagtcaagaaggagcagttcacacggcccttgctggagctctggaggc +tgagatggatggtgcaaagggaaggctgtcctctggccacttgaaatgtcgcctgaaaat +ggataaacttagattgaagggcgtgtcatactccttgtgtactgcagcgttcacattcac +caagatcccggctgaaacactgcacgggacagtcacagtggaggtacagtacgcagggac +agatggaccttgcaaggttccagctcagatggcggtggacatgcaaactctgaccccagt +tgggaggttgataaccgctaaccccgtaatcactgaaagcactgagaactctaagatgat +gctggaacttgatccaccatttggggactcttacattgtcataggagtcggggagaagaa +gatcacccaccactggcacaggagtggcagcaccattggaaaagcatttgaagccactgt +gagaggtgccaagagaatggcagtcttgggagacacagcctgggactttggatcagttgg +aggcgctctcaactcattgggcaagggcatccatcaaatttttggagcagctttcaaatc +attgtttggaggaatgtcctggttctcacaaattctcattggaacgttgctgatgtggtt +gggtctgaacacaaagaatggatctatttcccttatgtgcttggccttagggggagtgtt +gatcttcttatccacagccgtctctgctgatgtggggtgctcggtggacttctcaaagaa +ggagacgagatgcggtacaggggtgttcgtctataacgacgttgaagcctggagggacag +gtacaagtaccatcctgactccccccgtagattggcagcagcagtcaagcaagcctggga +agatggtatctgcgggatctcctctgtttcaagaatggaaaacatcatgtggagatcagt +agaaggggagctcaacgcaatcctggaagagaatggagttcaactgacggtcgttgtggg +atctgtaaaaaaccccatgtggagaggtccacagagattgcccgtgcctgtgaacgagct +gccccacggctggaaggcttgggggaaatcgtatttcgtcagagcagcaaagacaaataa +cagctttgtcgtggatggtgacacactgaaggaatgcccactcaaacatagagcatggaa +cagctttcttgtggaggatcatgggttcggggtatttcacactagtgtctggctcaaggt +tagagaagattattcattagagtgtgatccagccgttattggaacagctgttaagggaaa +ggaggctgtacacagtgatctaggctactggattgagagtgagaagaatgacacatggag +gctgaagagggcccatctgatcgagatgaaaacatgtgaatggccaaagtcccacacatt +gtggacagatggaatagaagagagtgatctgatcatacccaagtctttagctgggccact +cagccatcacaataccagagagggctacaggacccaaatgaaagggccatggcacagtga +agagcttgaaattcggtttgaggaatgcccaggcactaaggtccacgtggaggaaacatg +tggaacaagaggaccatctctgagatcaaccactgcaagcggaagggtgatcgaggaatg +gtgctgcagggagtgcacaatgcccccactgtcgttccgggctaaagatggctgttggta +tggaatggagataaggcccaggaaagaaccagaaagcaacttagtaaggtcaatggtgac +tgcaggatcaactgatcacatggaccacttctcccttggagtgcttgtgatcctgctcat +ggtgcaggaagggctgaagaagagaatgaccacaaagatcatcataagcacatcaatggc +agtgctggtagctatgatcctgggaggattttcaatgagtgacctggctaagcttgcaat +tttgatgggtgccaccttcgcggaaatgaacactggaggagatgtagctcatctggcgct +gatagcggcattcaaagtcagaccagcgttgctggtatctttcatcttcagagctaattg +gacaccccgtgaaagcatgctgctggccttggcctcgtgtcttttgcaaactgcgatctc +cgccttggaaggcgacctgatggttctcatcaatggttttgctttggcctggttggcaat +acgagcgatggttgttccacgcactgataacatcaccttggcaatcctggctgctctgac +accactggcccggggcacactgcttgtggcgtggagagcaggccttgctacttgcggggg +gtttatgctcctctctctgaagggaaaaggcagtgtgaagaagaacttaccatttgtcat +ggccctgggactaaccgctgtgaggctggtcgaccccatcaacgtggtgggactgctgtt +gctcacaaggagtgggaagcggagctggccccctagcgaagtactcacagctgttggcct +gatatgcgcattggctggagggttcgccaaggcagatatagagatggctgggcccatggc +cgcggtcggtctgctaattgtcagttacgtggtctcaggaaagagtgtggacatgtacat +tgaaagagcaggtgacatcacatgggaaaaagatgcggaagtcactggaaacagtccccg +gctcgatgtggcgctagatgagagtggtgatttctccctggtggaggatgacggtccccc +catgagagagatcatactcaaggtggtcctgatgaccatctgtggcatgaacccaatagc +cataccctttgcagctggagcgtggtacgtatacgtgaagactggaaaaaggagtggtgc +tctatgggatgtgcctgctcccaaggaagtaaaaaagggggagaccacagatggagtgta +cagagtaatgactcgtagactgctaggttcaacacaagttggagtgggagttatgcaaga +gggggtctttcacactatgtggcacgtcacaaaaggatccgcgctgagaagcggtgaagg +gagacttgatccatactggggagatgtcaagcaggatctggtgtcatactgtggtccatg +gaagctagatgccgcctgggatgggcacagcgaggtgcagctcttggccgtgccccccgg +agagagagcgaggaacatccagactctgcccggaatatttaagacaaaggatggggacat +tggagcggttgcgctggattacccagcaggaacttcaggatctccaatcctagacaagtg +tgggagagtgataggactttatggcaatggggtcgtgatcaaaaacgggagttatgttag +tgccatcacccaagggaggagggaggaagagactcctgttgagtgcttcgagccctcgat +gctgaagaagaagcagctaactgtcttagacttgcatcctggagctgggaaaaccaggag +agttcttcctgaaatagtccgtgaagccataaaaacaagactccgtactgtgatcttagc +tccaaccagggttgtcgctgctgaaatggaggaggcccttagagggcttccagtgcgtta +tatgacaacagcagtcaatgtcacccactctggaacagaaatcgtcgacttaatgtgcca +tgccaccttcacttcacgtctactacagccaatcagagtccccaactataatctgtatat +tatggatgaggcccacttcacagatccctcaagtatagcagcaagaggatacatttcaac +aagggttgagatgggcgaggcggctgccatcttcatgaccgccacgccaccaggaacccg +tgacgcatttccggactccaactcaccaattatggacaccgaagtggaagtcccagagag +agcctggagctcaggctttgattgggtgacggatcattctggaaaaacagtttggtttgt +tccaagcgtgaggaacggcaatgagatcgcagcttgtctgacaaaggctggaaaacgggt +catacagctcagcagaaagacttttgagacagagttccagaaaacaaaacatcaagagtg +ggactttgtcgtgacaactgacatttcagagatgggcgccaactttaaagctgaccgtgt +catagattccaggagatgcctaaagccggtcatacttgatggcgagagagtcattctggc +tggacccatgcctgtcacacatgccagcgctgcccagaggagggggcgcataggcaggaa +tcccaacaaacctggagatgagtatctgtatggaggtgggtgcgcagagactgacgaaga +ccatgcacactggcttgaagcaagaatgctccttgacaatatttacctccaagatggcct +catagcctcgctctatcgacctgaggccgacaaagtagcagccattgagggagagttcaa +gcttaggacggagcaaaggaagacctttgtggaactcatgaaaagaggagatcttcctgt +ttggctggcctatcaggttgcatctgccggaataacctacacagatagaagatggtgctt +tgatggcacgaccaacaacaccataatggaagacagtgtgccggcagaggtgtggaccag +acacggagagaaaagagtgctcaaaccgaggtggatggacgccagagtttgttcagatca +tgcggccctgaagtcattcaaggagtttgccgctgggaaaagaggagcggcttttggagt +gatggaagccctgggaacactgccaggacacatgacagagagattccaggaagccattga +caacctcgctgtgctcatgcgggcagagactggaagcaggccttacaaagccgcggcggc +ccaattgccggagaccctagagaccataatgcttttggggttgctgggaacagtctcgct +gggaatcttcttcgtcttgatgaggaacaagggcatagggaagatgggctttggaatggt +gactcttggggccagcgcatggctcatgtggctctcggaaattgagccagccagaattgc +atgtgtcctcattgttgtgttcctattgctggtggtgctcatacctgagccagaaaagca +aagatctccccaggacaaccaaatggcaatcatcatcatggtagcagtaggtcttctggg +cttgattaccgccaatgaactcggatggttggagagaacaaagagtgacctaagccatct +aatgggaaggagagaggagggggcaaccataggattctcaatggacattgacctgcggcc +agcctcagcttgggccatctatgctgccttgacaactttcattaccccagccgtccaaca +tgcagtgaccacctcatacaacaactactccttaatggcgatggccacgcaagctggagt +gttgtttggcatgggcaaagggatgccattctacgcatgggactttggagtcccgctgct +aatgataggttgctactcacaattaacacccctgaccctaatagtggccatcattttgct +cgtggcgcactacatgtacttgatcccagggctgcaggcagcagctgcgcgtgctgccca +gaagagaacggcagctggcatcatgaagaaccctgttgtggatggaatagtggtgactga +cattgacacaatgacaattgacccccaagtggagaaaaagatgggacaggtgctactcat +agcagtagccgtctccagcgccatactgtcgcggaccgcctgggggtggggggaggctgg +ggctctgatcacagccgcaacttccactttgtgggaaggctctccgaacaagtactggaa +ctcctctacagccacttcactgtgtaacatttttaggggaagttacttggctggagcttc +tctaatctacacagtaacaagaaacgctggcttggtcaagagacgtgggggtggaacagg +agagaccctgggagagaaatggaaggcccgcttgaaccagatgtcggccctggagttcta +ctcctacaaaaagtcaggcatcaccgaggtgtgcagagaagaggcccgccgcgccctcaa +ggacggtgtggcaacgggaggccatgctgtgtcccgaggaagtgcaaagctgagatggtt +ggtggagcggggatacctgcagccctatggaaaggtcattgatcttggatgtggcagagg +gggctggagttactacgtcgccaccatccgcaaagttcaagaagtgaaaggatacacaaa +aggaggccctggtcatgaagaacccgtgttggtgcaaagctatgggtggaacatagtccg +tcttaagagtggggtggacgtctttcatatggcggctgagccgtgtgacacgttgctgtg +tgacataggtgagtcatcatctagtcctgaagtggaagaagcacggacgctcagagtcct +ctccatggtgggggattggcttgaaaaaagaccaggagccttttgtataaaagtgttgtg +cccatacaccagcactatgatggaaaccctggagcgactgcagcgtaggtatgggggagg +actggtcagagtgccactctcccgcaactctacacatgagatgtactgggtctctggagc +gaaaagcaacaccataaaaagtgtgtccaccacgagccagctcctcttggggcgcatgga +cgggcctaggaggccagtgaaatatgaggaggatgtgaatctcggctctggcacgcgggc +tgtggtaagctgcgctgaagctcccaacatgaagatcattggtaaccgcattgaaaggat +ccgcagtgagcacgcggaaacgtggttctttgacgagaaccacccatataggacatgggc +ttaccatggaagctatgaggcccccacacaagggtcagcgtcctctctaataaacggggt +tgtcaggctcctgtcaaaaccctgggatgtggtgactggagtcacaggaatagccatgac +cgacaccacaccgtatggtcagcaaagagttttcaaggaaaaagtggacactagggtgcc +agacccccaagaaggcactcgtcaggttatgagcatggtctcttcctggttgtggaaaga +gctaggcaaacacaaacggccacgagtctgcaccaaagaagagttcatcaacaaggttcg +tagcaatgcagcattaggggcaatatttgaagaggaaaaagagtggaagactgcagtgga +agctgtgaacgatccaaggttctgggctctagtggacaaggaaagagagcaccacctgag +aggagagtgccagagctgtgtgtacaacatgatgggaaaaagagaaaagaaacaagggga +atttggaaaggccaagggcagccgcgccatctggtatatgtggctaggggctagatttct +agagttcgaagcccttggattcttgaacgaggatcactggatggggagagagaactcagg +aggtggtgttgaagggctgggattacaaagactcggatatgtcctagaagagatgagtcg +tataccaggaggaaggatgtatgcagatgacactgctggctgggacacccgcattagcag +gtttgatctggagaatgaagctctaatcaccaaccaaatggagaaagggcacagggcctt +ggcattggccataatcaagtacacataccaaaacaaagtggtaaaggtccttagaccagc +tgaaaaagggaaaacagttatggacattatttcgagacaagaccaaagggggagcggaca +agttgtcacttacgctcttaacacatttaccaacctagtggtgcaactcattcggaatat +ggaggctgaggaagttctagagatgcaagacttgtggctgctgcggaggtcagagaaagt +gaccaactggttgcagagcaacggatgggataggctcaaacgaatggcagtcagtggaga +tgattgcgttgtgaagccaattgatgataggtttgcacatgccctcaggttcttgaatga +tatgggaaaagttaggaaggacacacaagagtggaaaccctcaactggatgggacaactg +ggaagaagttccgttttgctcccaccacttcaacaagctccatctcaaggacgggaggtc +cattgtggttccctgccgccaccaagatgaactgattggccgggcccgcgtctctccagg +ggcgggatggagcatccgggagactgcttgcctagcaaaatcatatgcgcaaatgtggca +gctcctttatttccacagaagggacctccgactgatggccaatgccatttgttcatctgt +gccagttgactgggttccaactgggagaactacctggtcaatccatggaaagggagaatg +gatgaccactgaagacatgcttgtggtgtggaacagagtgtggattgaggagaacgacca +catggaagacaagaccccagttacgaaatggacagacattccctatttgggaaaaaggga +agacttgtggtgtggatctctcatagggcacagaccgcgcaccacctgggctgagaacat +taaaaacacagtcaacatggtgcgcaggatcataggtgatgaagaaaagtacatggacta +cctatccacccaagttcgctacttgggtgaagaagggtctacacctggagtgctgtaagc +accaatcttaatgttgtcaggcctgctagtcagccacagcttggggaaagctgtgcagcc +tgtgacccccccaggagaagctgggaaaccaagcctatagtcaggccgagaacgccatgg +cacggaagaagccatgctgcctgtgagcccctcagaggacactgagtcaaaaaaccccac +gcgcttggaggcgcaggatgggaaaagaaggtggcgaccttccccacccttcaatctggg +gcctgaactggagatcagctgtggatctccagaagagggactagtggttagagga +>COL/FLR_00008/2015 +tcagactgcgacagttcgagtttgaagcgaaagctagcaacagtatcaacaggttttatt +ttggatttggaaacgagagtttctggtcatgaaaaacccaaaaaagaaatccggaggatt +ccggattgtcaatatgctaaaacgcggagtagcccgtgtgagcccctttgggggcttgaa +gaggctgccagccggacttctgctgggtcatgggcccatcaggatggtcttggcgattct +agcctttttgagattcacggcaatcaagccatcactgggtctcatcaatagatggggttc +agtggggaaaaaagaggctatggaaataataaagaagttcaagaaagatctggctgccat +gctgagaataatcaatgctaggaaggagaagaagagacgaggcgcagaaactagtgtcgg +aattgttggcctcctgctgaccacagctatggcagcggaggtcactagacgtgggagtgc +atactatatgtacttggacagaaacgatgctggggaggccatatcttttccaaccacatt +ggggatgaataagtgttatatacagatcatggatcttggacacatgtgtgatgccaccat +gagctatgaatgccctatgctggatgagggggtggaaccagatgacgtcgattgttggtg +caacacgacgtcaacttgggttgtgtacggaacctgccatcacaaaaaaggtgaagcacg +gagatctagaagagccgtgacgctcccctcccattccactaggaagctgcaaacgcggtc +gcaaacctggttggaatcaagagaatacacaaagcacttgattagagtcgaaaattggat +attcaggaaccctggtttcgctttagcagcagctgccatcgcttggcttttgggaagctc +aacgagccaaaaagtcatatacttggtcatgatactgctgattgccccggcatacagcat +caggtgcataggagtcagcaatagggactttgtggaaggtatgtcaggtgggacttgggt +tgatgtcgtcttggaacatggaggttgtgtcaccgtaatggcacaggacaaaccgactgt +cgacatagagctggttacaacaacagtcagcaacatggcggaggtaagatcctactgcta +tgaggcatcaatatcagacatggcttcggacagccgctgcccaacacaaggtgaagccta +ccttgacaagcaatcagacactcaatatgtctgcaaaagaacgttagtggacagaggctg +gggaaatggatgtggactttttggcaaagggagcctggtgacatgcgctaagtttgcatg +ctccaagaaaatgaccgggaagagcatccagccagagaatctggagtaccggataatgtt +gtcagttcatggctcccagcacagtgggatgatcgttaatgacacaggacatgaaactga +tgagaatagagcgaaggttgagataacgcccaattcaccaagagccgaagccaccctggg +gggctttggaagcctaggacttgattgtgaaccgaggacaggccttgacttttcagattt +gtattacttgactatgaataacaagcactggttggttcacaaggagtggttccacgacat +tccattaccttggcacgctggggcagacaccggaactccacactggaacaacaaagaagc +actggtagagttcaaggacgcacatgccaaaaggcaaactgtcgtggttctagggagtca +agaaggagcagttcacacggcccttgctggagctctggaggctgagatggatggtgcaaa +gggaaggctgtcctctggccacttgaaatgtcgcctgaaaatggataaacttagattgaa +gggcgtgtcatactccttgtgtaccgcagcgttcacattcaccaagatcccggctgaaac +actgcacgggacagtcacagtggaggtacagtacgcagggacagatggaccttgcaaggt +tccagctcagatggcggtggacatgcaaactctgaccccagttgggaggttgataaccgc +taaccccgtaatcactgaaagcactgagaactctaagatgatgctggaacttgatccacc +atttggggactcttacattgtcataggagtcggggagaagaagatcacccaccactggca +caggagtggcagcaccattggaaaagcatttgaagccactgtgagaggtgccaagagaat +ggcagtcttgggagacacagcctgggactttggatcagttggaggcgctctcaactcatt +gggcaagggcatccatcaaatttttggagcagctttcaaatcattgtttggaggaatgtc +ctggttctcacaaattctcattggaacgttgctgatgtggttgggtctgaacacaaagaa +tggatctatttcccttatgtgcttggccttagggggagtgttgatcttcttatccacagc +cgtctctgctgatgtggggtgctcggtggacttctcaaagaaggagacgagatgtggtac +aggggtgttcgtctataacgacgttgaagcctggagggacaggtacaagtaccatcctga +ctccccccgtagattggcagcagcagtcaagcaagcctgggaagatggtatctgcgggat +ctcctctgtttcaagaatggaaaacatcatgtggagatcagtagaaggggagctcaacgc +aatcctggaagagaatggagttcaactgacggtcgttgtgggatctgtaaaaaaccccat +gtggagaggtccacagagattgcccgtgcctgtgaacgagctgccccacggctggaaggc +ttgggggaaatcgtacttcgtcagagcagcaaagacaaataacagctttgtcgtggatgg +tgacacactgaaagaatgcccactcaaacatagagcatggaacagctttcttgtggagga +tcatgggttcggggtatttcacactagtgtctggatcaaggttagagaagattattcatt +agagtgtgatccagccgttattggaacagctgttaagggaaaggaggctgtacacagtga +tctaggctactggattgagagtgagaagaatgacacatggaggctgaagagggcccatct +gatcgagatgaaaacatgtgaatggccaaagtcccacacattgtggacagatggaataga +agagagtgatctgatcatacccaagtctttagctgggccactcagccatcacaataccag +agagggctacaggacccaaatgaaagggccatggcacagtgaagagcttgaaattcggtt +tgaggaatgcccaggcactaaggtccacgtggaggaaacatgtgaaacaagaggaccatc +tctgagatcaaccactgcaagcggaagggtgatcgaggaatggtgctgcagggagtgcac +aatgcccccactgtcgttctgggctaaagatggctgttggtatggaatggagataaggcc +caggaaagaaccagaaagcaacttagtaaggtcaatggtgactgcaggatcaactgatca +catggatcacttctcccttggagtgcttgtgattctgctcatggtgcaggaagggctgaa +gaagagaatgaccacaaagatcatcataagcacatcaatggcagtgctggtagctatgat +cctgggaggattttcaatgagtgacctggctaagcttgcaatcttgatgggtgccacctt +cgcggaaatgaacactggaggagatgtagctcatctggcgctgatagcggcattcaaagt +cagaccagcgttgctggtatccttcatcttcagagctaattggacaccccgtgaaagcat +gctgctggccttggcctcgtgtcttttgcaaactgcgatctccgccttggagggcgacct +gatggttctcatcaatggttttgctttggcctggttggcaatacgagcgatggttgttcc +acgcactgacaacatcaccttggcaatcctggctgctctgacaccactggcccggggcac +actgcttgtggcgtggagagcaggccttgctacttgcggggggtttatgctcctctctct +gaagggaaaaggcagtgtgaagaagaacttaccatttgtcatggccctgggactaaccgc +tgtgaggctggtcgaccccatcaacgtggtgggactgctgttgctcacaaggagtgggaa +gcggagctggccccctagcgaagtactcacagctgttggcctgatatgcgcattggctgg +agggttcgccaaggcagatatagagatggctgggcccatggccgcggttggtctgctaat +tgtcagttacgtggtctcaggaaagagtgtggacatgtacattgaaagagcaggtgacat +cacatgggaaaaagatgcggaagtcactggaaacagtccccggctcgatgtggcgctaga +tgagagtggtgatttctccctggtggaggatgacggtccccccatgagagagatcatact +caaggtggtcctgatgaccatctgtggcatgaacccaatagccataccctttgcagctgg +agcgtggtacgtatacgtgaagactggaaaaaggagtggtgcgctatgggatgtgcctgc +tcccaaggaagtaaaaaagggggagaccacagatggagtgtacagagtaatgactcgtag +actgctaggttcaacacaagttggagtgggagttatgcaagagggggtctttcacactat +gtggcacgtcacaaaaggatccgcgctgagaagcggtgaagggagacttgatccatactg +gggagatgtcaagcaggatctggtgtcatactgtggtccatggaagctagatgccgcctg +ggacgggcacagcgaggtgcagctcttggccgtgccccccggagagagagcgaggaacat +ccagactctgcccggaatatttaagacaaaggatggggacattggagcggttgcgctgga +ttacccagcaggaacttcaggatctccaatcctagacaagtgtgggagagtgataggact +ttatggcaatggggtcgtgatcaaaaatgggagttatgttagtgccatcacccaagggag +gagggaggaagagactcctgttgagtgcttcgagccttcgatgctgaagaagaagcagct +aactgtcttagacttgcatcctggagctgggaaaaccaggagagttcttcctgaaatagt +ccgtgaagccataaaaacaagactccgtactgtgatcttagctccaaccagggttgtcgc +tgctgaaatggaggaagcccttagagggcttccagtgcgttatatgacaacagcagtcaa +tgtcacccactctggaacagaaatcgtcgacttaatgtgccatgccaccttcacttcacg +tctactacagccaatcagagtccccaactataatctgtatattatggatgaggcccactt +cacagatccctcaagtatagcagcaagaggatacatttcaacaagggttgagatgggcga +ggcggctgccatcttcatgaccgccacgccaccaggaacccgtgacgcatttccggactc +caactcaccaattatggacaccgaagtggaagtcccagagagagcctggagctcaggctt +tgattgggtgacagatcattctggaaaaacagtttggtttgttccaagcgtgaggaacgg +caatgagatcgcagcttgtctgacaaaggctggaaaacgggtcatacagctcagcagaaa +gacttttgagacagagttccagaaaacaaaacatcaagagtgggactttgtcgtgacaac +tgacatttcagagatgggcgccaactttaaagctgaccgtgtcatagattccaggagatg +cctaaagccggtcatacttgatggcgagagagtcattctggctggacccatgcctgtcac +acatgccagcgctgcccagaggagggggcgcataggcaggaatcccaataaacctggaga +tgagtatctgtatggaggtgggtgcgcagagactgacgaagaccatgcacactggcttga +agcaagaatgctccttgacaatatttacctccaagatggcctcatagcctcgctctatcg +acctgaggccgacaaagtagcagccattgagggagagttcaagcttaggacggagcaaag +gaagacctttgtggaactcatgaaaagaggagatcttcctgtttggctggcctatcaggt +tgcatctgccggaataacctacacagatagaagatggtgctttgatggcacgaccaacaa +caccataatggaagacagtgtgccggcagaggtgtggaccagacacggagagaaaagagt +gctcaaaccgaggtggatggacgccagagtttgttcagatcatgcggccctgaagtcatt +caaggagtttgccgctgggaaaagaggagcggcttttggagtgatggaagccctgggaac +actgccaggacacatgacagagagattccaggaagccattgacaacctcgctgtgctcat +gcgggcagagactggaagcaggccttacaaagccgcggcggcccaattgccggagaccct +agagaccattatgcttttggggttgctgggaacagtctcgttgggaatctttttcgtctt +gatgaggaacaagggcatagggaagatgggctttggaatggtgactcttggggccagcgc +atggctcatgtggctctcggaaattgagccagccagaattgcatgtgtcctcattgttgt +gttcctattgctggtggtgctcatacctgagccagaaaagcaaagatctccccaggacaa +ccaaatggcaatcatcatcatggtagcagtaggtcttctgggcttgattaccgccaatga +actcggatggttggagagaacaaagagtgacctaagccatctaatgggaaggagagagga +gggggcaaccataggattctcaatggacattgacctgcggccagcctcagcttgggccat +ctatgctgccttgacaactttcattaccccagccgtccaacatgcagtgaccacttcata +caacaactactccttaatggcgatggccacgcaagctggagtgttgtttggtatgggcaa +agggatgccattctacgcatgggactttggagtcccgctgctaatgataggttgctactc +acaattaacacccctgaccctaatagtggccatcattttgctcgtggcgcactacatgta +cttgatcccagggctgcaggcagcagctgcgcgtgctgcccagaagagaacggcagctgg +catcatgaagaaccctgttgtggatggaatagtggtgactgacattgacacaatgacaat +tgacccccaagtggagaaaaagatgggacaggtgctactcatagcagtagccgtctccag +cgccatactgtcgcggaccgcctgggggtggggggaggctggggccctgatcacagccgc +aacttccactttgtgggaaggctctccgaacaagtactggaactcctctacagccacttc +actgtgtaacatttttaggggaagttacttggctggagcttctctaatctacacagtaac +aagaaacgctggcttggtcaagagacgtgggggtggaacaggagagaccctgggagagaa +atggaaggcccgcttgaaccagatgtcggccctggagttctactcctacaaaaagtcagg +catcaccgaggtgtgcagagaagaggcccgccgcgccctcaaggacggtgtggcaacggg +aggccatgctgtgtcccgaggaagtgcaaagctgagatggttggtggagcggggatacct +gcagccctatggaaaggtcattgatcttggatgtggcagagggggctggagttactacgc +cgccaccatccgcaaagttcaagaagtgaaaggatacacaaaaggaggccctggtcatga +agaacccgtgttggtgcaaagctatgggtggaacatagtccgtcttaagagtggggtgga +cgtctttcatatggcggctgagccgtgtgacacgttgctgtgtgacataggtgagtcatc +atctagtcctgaagtggaagaagcacggacgctcagagtcctctccatggtgggggattg +gcttgaaaaaagaccaggagccttttgtataaaagtgttgtgcccatacaccagcactat +gatggaaaccctggagcgactgcagcgtaggtatgggggaggactggtcagagtgccact +ctcccgcaactctacacatgagatgtactgggtctctggagcgaaaagcaacaccataaa +aagtgtgtccaccacgagccagctcctcttggggcgcatggacgggcctaggaggccagt +gaaatatgaggaggatgtgaatctcggctctggcacgcgggccgtggtaagctgcgctga +agctcccaacatgaagatcattggtaaccgcattgaaaggatccgcagtgagcacgcgga +aacgtggttctttgacgagaaccacccatataggacatgggcttaccatggaagctatga +ggcccccacacaagggtcagcgtcctctctaataaacggggttgtcaggctcctgtcaaa +accctgggatgtggtgactggagtcacaggaatagccatgaccgacaccacaccgtatgg +tcagcaaagagttttcaaggaaaaagtggacactagggtgccagacccccaagaaggcac +tcgtcaggttatgagcatggtctcttcctggttgtggaaagagctaggcaaacacaaacg +gccacgagtctgtaccaaagaagagttcatcaacaaggtgcgtagcaatgcagcattagg +ggcaatatttgaagaggaaaaagagtggaagactgcagtggaagctgtgaacgatccaag +gttctgggctctagtggacaaggaaagagagcaccacctgagaggagagtgccagagttg +tgtgtacaacatgatgggaaaaagagaaaagaaacaaggggaatttggaaaggccaaggg +cagccgcgccatctggtatatgtggctaggggctagatttctagagttcgaagcccttgg +attcttgaacgaggatcactggatggggagagagaactcaggaggtggtgttgaagggct +gggattacaaagactcggatatgtcctagaagagatgagtcgcataccaggaggaaggat +gtatgcagatgacactgctggctgggacacccgcattagcaggtttgatctggagaatga +agctctaatcaccaaccaaatggagaaagggcacagggccttggcattggccataatcaa +gtacacataccaaaacaaagtggtaaaggtccttagaccagctgaaaaagggaaaacagt +tatggacattatttcgagacaagaccaaagggggagcggacaagttgtcacttacgctct +taacacatttaccaacctagtggtgcaactcattcggaatatggaggctgaggaagttct +agagatgcaagacttgtggctgctgcggaggtcagagaaagtgaccaactggttgcagag +caacggatgggataggctcaaacgaatggcagtcagtggagatgattgcgttgtgaagcc +aattgatgataggtttgcacatgccctcaggttcttgaatgatatgggaaaagttaggaa +ggacacacaagagtggaaaccctcaactggatgggacaactgggaagaagttccgttttg +ctcccaccacttcaacaagctccatctcaaggacgggaggtccattgtggttccctgccg +ccaccaagatgaactgattggccgggcccgcgtctctccaggggcgggatggagcatccg +ggagactgcttgcctagcaaaatcatatgcgcaaatgtggcagctcctttatttccacag +aagggacctccgactgatggccaatgccatttgttcatctgtgccagttgactgggttcc +aactgggagaactacctggtcaatccatggaaagggagaatggatgaccactgaagacat +gcttgtggtgtggaacagagtgtggattgaggagaacgaccacatggaagacaagacccc +agttgcgaaatggacagacattccctatttgggaaaaagggaagacttgtggtgtggatc +tctcatagggcacagaccgcgcaccacctgggctgagaacattaaaaacacagtcaacat +ggtgcgcaggatcataggtgatgaagaaaagtacatggactacctatccacccaagttcg +ctacttgggtgaagaagggtctacacctggagtgctgtaagcaccaatcttaatgttgtc +aggcctgctagtcagccacagcttggggaaagctgtgcagcctgtgacccccccaggaga +agctgggaaaccaagcctatagtcaggccgagaacgccatggcacggaagaagccatgct +gcctgtgagcccctcagaggacactgagtcaaaaaaccccacgcgcttggaggcgcagga +tgggaaaagaaggtggcgaccttccccacccttcaatctggggcctgaactggagatcag +ctgtggatctccagaagagggactagtggttagaggaga +>Colombia/2016/ZC204Se +gacagttcgagtttgaagcgaaagctagcaacagtatcaacaggttttattttggatttg +gaaacgagagtttctggtcatgaaaaacccaaaaaagaaatccggaggattccggattgt +caatatgctaaaacgcggagtagcccgtgtgagcccctttgggggcttgaagaggctgcc +agccggacttctgctgggtcatgggcccatcaggatggtcttggcgattctagccttttt +gagattcacggcaatcaagccatcactgggtctcatcaatagatggggttcagtggggaa +aaaagaggctatggaaataataaagaagttcaagaaagatctggctgccatgctgagaat +aatcaatgctaggaaggagaagaagagacgaggcgcagaaactagtgtcggaattgttgg +cctcctgctgaccacagctatggcagcggaggtcactagacgtgggagtgcatactatat +gtacttggacagaaaagatgctggggaggccatatcttttccaaccacattggggatgaa +taagtgttatatacagatcatggatcttggacacatgtgtgatgccaccatgagctatga +atgccctatgctggatgagggggtggaaccagatgacgtcgattgttggtgcaacacgac +gtcaacttgggttgtgtacggaacctgccatcacaaaaaaggtgaagcacggagatctag +aagagccgtgacgctcccctcccattccactaggaagctgcaaacgcggtcgcaaacctg +gttggaatcaagagaatacacaaagcacttgattagagtcgaaaattggatattcaggaa +ccctggtttcgctttagcagcagctgccatcgcttggcttttgggaagctcaacgagcca +aaaagtcatatacttggtcatgatactgctgattgccccggcatacagcatcaggtgcat +aggagtcagcaatagggactttgtggaaggtatgtcaggtgggacttgggttgatgttgt +cttggaacatggaggttgtgtcaccgtaatggcacaggacaaaccgactgtcgacataga +gctggttacaacaacagtcagcaacatggcggaggtaagatcctactgctatgaggcatc +aatatcagacatggcttcggacagccgctgcccaacacaaggtgaagcctaccttgacaa +gcaatcagacactcaatatgtttgcaaaagaacgttagtggacagaggctggggaaatgg +atgtggactttttggcaaagggagcctggtgacatgcgctaagtttgcatgctccaagaa +aatgaccgggaagagcatccagccagagaatctggagtaccggataatgttgtcagttca +tggctcccagcacagtgggatgatcgttaatgacacaggacatgaaactgatgagaatag +agcgaaggttgagataacgcccaattcaccaagagccgaagccaccctggggggttttgg +aagcctaggacttgattgtgaaccgaggacaggccttgacttttcagatttgtattactt +gactatgaataacaagcactggttggttcacaaggagtggttccacgacattccattacc +ttggcacgctggggcagacaccggaactccacactggaacaacaaagaagcactggtaga +gttcaaggacgcacatgccaaaaggcaaactgtcgtggttctagggagtcaagaaggagc +agttcacacggcccttgctggagctctggaggctgagatggatggtgcaaagggaaggct +gtcctctggccacttgaaatgtcgcctgaaaatggataaacttagattgaagggcgtgtc +atactccttgtgtaccgcagcgttcacattcaccaagatcccggctgaaacactgcacgg +gacagtcacagtggaggtacagtacgcagggacagatggaccttgcaaggttccagctca +gatggcggtggacatgcaaactctgaccccagttgggaggttgataaccgctaaccccgt +aatcactgaaagcactgagaactctaagatgatgctggaacttgatccaccatttgggga +ctcttacattgtcataggagtcggggagaagaagatcacccaccactggcataggagtgg +cagcaccattggaaaagcatttgaagccactgtgagaggtgccaagagaatggcagtctt +gggagacacagcctgggactttggatcagttggaggcgctctcaactcattgggcaaggg +catccatcaaatttttggagcagctttcaaatcattgtttggaggaatgtcctggttctc +acaaattctcattggaacgttgctgatgtggttgggtctgaacacaaagaatggatctat +ttcccttatgtgcttggccttagggggagtgttgatcttcttatccacagccgtctctgc +tgatgtggggtgctcggtggacttctcaaagaaggagacgagatgtggtacaggggtgtt +cgtctataacgacgttgaagcctggagggacaggtacaagtaccatcctgactccccccg +tagattggcagcagcagtcaagcaagcctgggaagatggtatctgcgggatctcctctgt +ttcaagaatggaaaacatcatgtggagatcagtagaaggggagctcaacgcaatcctgga +agagaatggagttcaactgacggtcgttgtgggatctgtaaaaaaccccatgtggagagg +tccacagagattgcccgtgcctgtgaacgagctgccccacggctggaaggcttgggggaa +atcgtacttcgtcagagcagcaaagacaaataacagctttgtcgtggatggtgacacact +gaaggaatgcccactcaaacatagagcatggaacagctttcttgtggaggatcatgggtt +cggggtatttcacactagtgtctggctcaaggttagagaagattattcattagagtgtga +tccagccgttattggaacagctgttaagggaaaggaggctgtacacagtgatctaggcta +ctggattgagagtgagaagaatgacacatggaggctgaagagggcccatctgatcgagat +gaaaacatgtgaatggccaaagtcccacacattgtggacagatggaatagaagagagtga +tctgatcatacccaagtctttagctgggccactcagccatcacaataccagagagggcta +caggacccaaatgaaagggccatggcacagtgaagagcttgaaattcggtttgaggaatg +cccaggcactaaggtccacgtggaggaaacatgtggaacaagaggaccatctctgagatc +aaccactgcaagcggaagggtgatcgaggaatggtgctgcagggagtgcacaatgccccc +actgtcgttctgggctaaagatggctgttggtatggaatggagataaggcccaggaaaga +accagaaagcaacttagtaaggtcaatggtgactgcaggatcaactgatcacatggatca +cttctcccttggagtgcttgtgattctgctcatggtgcaggaagggctgaagaagagaat +gaccacaaagatcatcataagcacatcaatggcagtgctggtagctatgatcctgggagg +attttcaatgagtgacctggctaagcttgcaattttgatgggtgccaccttcgcggaaat +gaacactggaggagatgtagctcatctggcgctgatagcggcattcaaagtcagaccagc +gttgctggtatctttcatcttcagagctaattggacaccccgtgaaagcatgctgctggc +cttggcctcgtgtcttttgcaaactgcgatctccgccttggagggcgacctgatggttct +catcaatggttttgctttggcctggttggcaatacgagcgatggttgttccacgcactga +caacatcaccttggcaatcctggctgctctgacaccactggcccggggcacactgcttgt +ggcgtggagagcaggccttgctacttgcggggggtttatgctcctctctctgaagggaaa +aggcagtgtgaagaagaacttaccatttgtcatggccctgggactaaccgctgtgaggct +ggtcgaccccatcaacgtggtgggactgctgttgctcacaaggagtgggaagcggagctg +gccccctagcgaagtactcacagctgttggcctgatatgcgcattggctggagggttcgc +caaggcagatatagagatggctgggcccatggccgcggtcggtctgctaattgtcagtta +cgtggtctcaggaaagagtgtggacatgtacattgaaagagcaggtgacatcacatggga +aaaagatgcggaagtcactggaaacagtccccggctcgatgtggcgctagatgagagtgg +tgatttctccctggtggaggatgacggtccccccatgagagagatcatactcaaggtggt +cctgatgaccatctgtggcatgaacccaatagccataccctttgcagctggagcgtggta +cgtatacgtgaagactggaaaaaggagtggtgctctatgggatgtgcctgctcccaagga +agtaaaaaagggggagaccacagatggagtgtacagagtaatgactcgtagactgctagg +ttcaacacaagttggagtgggagttatgcaagagggggtctttcacactatgtggcacgt +cacaaaaggatccgcgctgagaagcggtgaagggagacttgatccatactggggagatgt +caagcaggatctggtgtcatactgtggtccatggaagctagatgccgcctgggacgggca +cagcgaggtgcagctcttggccgtgccccccggagagagagcgaggaacatccagactct +gcccggaatatttaagacaaaggatggggacattggagcggttgcgctggattacccagc +aggaacttcaggatctccaatcctagacaagtgtgggagagtgataggactttatggcaa +tggggtcgtgatcaaaaatgggagttatgttagtgccatcacccaagggaggagggagga +agagactcctgttgagtgcttcgagccttcgatgctgaagaagaagcagctaactgtctt +agacttgcatcctggagctgggaaaaccaggagagttcttcctgaaatagtccgtgaagc +cataaaaacaagactccgtactgtgatcttagctccaaccagggttgtcgctgctgaaat +ggaggaagcccttagagggcttccagtgcgttatatgacaacagcagtcaatgtcaccca +ctctggaacagaaatcgtcgacttaatgtgccatgccaccttcacttcacgtctactaca +gccaatcagagtccccaactataatctgtatattatggatgaggcccacttcacagatcc +ctcaagtatagcagcaagaggatacatttcaacaagggttgagatgggcgaggcggctgc +catcttcatgaccgccacgccaccaggaacccgtgacgcatttccggactccaactcacc +aattatggacaccgaagtggaagtcccagagagagcctggagctcaggctttgattgggt +gacggatcattctggaaaaacagtttggtttgttccaagcgtgaggaacggcaatgagat +cgcagcttgtctgacaaaggctggaaaacgggtcatacagctcagcagaaagacttttga +gacagagttccagaaaacaaaacatcaagagtgggactttgtcgtgacaactgacatttc +agagatgggcgccaactttaaagctgaccgtgtcatagattccaggagatgcctaaagcc +ggtcatacttgatggcgagagagtcattctggctggacccatgcctgtcacacatgccag +cgctgcccagaggagggggcgcataggcaggaatcccaataaacctggagatgagtatct +gtatggaggtgggtgcgcagagactgacgaagaccatgcacactggcttgaagcaagaat +gctccttgacaatatttacctccaagatggcctcatagcctcgctctatcgacctgaggc +cgacaaagtagcagccattgagggagagttcaagcttaggacggagcaaaggaagacctt +tgtggaactcatgaaaagaggagatcttcctgtttggctggcctatcaggttgcatctgc +cggaataacctacacagatagaagatggtgctttgatggcacgaccaacaacaccataat +ggaagacagtgtgccggcagaggtgtggaccagacacggagagaaaagagtgctcaaacc +gaggtggatggacgccagagtttgttcagatcatgcggccctgaagtcattcaaggagtt +tgccgctgggaaaagaggagcggcttttggagtgatggaagccctgggaacactgccagg +acacatgacagagagattccaggaagccattgacaacctcgctgtgctcatgcgggcaga +gactggaagcaggccttacaaagccgcggcggcccaattgccggagaccctagagaccat +tatgcttttggggttgctgggaacagtctcgctgggaatctttttcgtcttgatgaggaa +caagggcatagggaagatgggctttggaatggtgactcttggggccagcgcatggctcat +gtggctctcggaaattgagccagccagaattgcatgtgtcctcattgttgtgttcctatt +gctggtggtgctcatacctgagccagaaaagcaaagatctccccaggacaaccaaatggc +aatcatcatcatggtagcagtaggtcttctgggcttgattaccgccaatgaactcggatg +gttggagagaacaaagagtgacctaagccatctaatgggaaggagagaggagggggcaac +cataggattctcaatggacattgacctgcggccagcctcagcttgggccatctatgctgc +cttgacaactttcattaccccagccgtccaacatgcagtgaccacttcatacaacaacta +ctccttaatggcgatggccacgcaagctggagtgttgtttggtatgggcaaagggatgcc +attctacgcatgggactttggagtcccgctgctaatgataggttgctactcacaattaac +acccctgaccctaatagtggccatcattttgctcgtggcgcactacatgtacttgatccc +agggctgcaggcagcagctgcgcgtgctgcccagaagagaacggcagctggcatcatgaa +gaaccctgttgtggatggaatagtggtgactgacattgacacaatgacaattgaccccca +agtggagaaaaagatgggacaggtgctactcatagcagtagccgtctccagcgccatact +gtcgcggaccgcctgggggtggggggaggctggggccctgatcacagccgcaacttccac +tttgtgggaaggctctccgaacaagtactggaactcctctacagccacttcactgtgtaa +catttttaggggaagttacttggctggagcttctctaatctacacagtaacaagaaacgc +tggcttggtcaagagacgtgggggtggaacaggagagaccctgggagagaaatggaaggc +ccgcttgaaccagatgtcggccctggagttctactcctacaaaaagtcaggcatcaccga +ggtgtgcagagaagaggcccgccgcgccctcaaggacggtgtggcaacgggaggccatgc +tgtgtcccgaggaagtgcaaagctgagatggctggtggagcggggatacctgcagcccta +tggaaaggtcattgatcttggatgtggcagagggggctggagttactacgccgccaccat +ccgcaaagttcaagaagtgaaaggatacacaaaaggaggccctggtcatgaagaacccgt +gttggtgcaaagctatgggtggaacatagtccgtcttaagagtggggtggacgtctttca +tatggcggctgagccgtgtgacacgttgctgtgtgacataggtgagtcatcatctagtcc +tgaagtggaagaagcacggacgctcagagtcctctccatggtgggggattggcttgaaaa +aagaccaggagccttttgtataaaagtgttgtgcccatacaccagcactatgatggaaac +cctggagcgactgcagcgtaggtatgggggaggactggtcagagtgccactctcccgcaa +ctctacacatgagatgtactgggtctctggagcgaaaagcaacaccataaaaagtgtgtc +caccacgagccagctcctcttggggcgcatggacgggcctaggaggccagtgaaatatga +ggaggatgtgaatctcggctctggcacgcgggctgtggtaagctgcgctgaagctcccaa +catgaagatcattggtaaccgcattgaaaggatccgcagtgagcacgcggaaacgtggtt +ctttgacgagaaccacccatataggacatgggcttaccatggaagctatgaggcccccac +acaagggtcagcgtcctctctaataaacggggttgtcaggctcctgtcaaaaccctggga +tgtggtgactggagtcacaggaatagccatgaccgacaccacaccgtatggtcagcaaag +agttttcaaggaaaaagtggacactagggtgccagacccccaagaaggcactcgtcaggt +tatgagcatggtctcttcctggttgtggaaagagctaggcaaacacaaacggccacgagt +ctgtaccaaagaagagttcatcaacaaggttcgtagcaatgcagcattaggggcaatatt +tgaagaggaaaaagagtggaagactgcagtggaagctgtgaacgatccaaggttctgggc +tctagtggacaaggaaagagagcaccacctgagaggagagtgccagagttgtgtgtacaa +catgatgggaaaaagagaaaagaaacaaggggaatttggaaaggccaagggcagccgcgc +catctggtatatgtggctaggggctagatttctagagttcgaagcccttggattcttgaa +cgaggatcactggatggggagagagaactcaggaggtggtgttgaagggctgggattaca +aagactcggatatgtcctagaagagatgagtcgcataccaggaggaaggatgtatgcaga +tgacactgctggctgggacacccgcattagcaggtttgatctggagaatgaagctctaat +caccaaccaaatggagaaagggcacagggccttggcattggccataatcaagtacacata +ccaaaacaaagtggtaaaggtccttagaccagctgaaaaagggaaaacagttatggacat +tatttcgagacaagaccaaagggggagcggacaagttgtcacttacgctcttaacacatt +taccaacctagtggtgcaactcattcggaatatggaggctgaggaagttctagagatgca +agacttgtggctgctgcggaggtcagagaaagtgaccaactggttgcagagcaacggatg +ggataggctcaaacgaatggcagtcagtggagatgaytgcgttgtgaagccaattgatga +taggtttgcacatgccctcaggttcttgaatgatatgggaaaagttaggaaggacacaca +agagtggaaaccctcaactggatgggacaactgggaagaagttccgttttgctcccacca +cttcaacaagctccatctcaaggacgggaggtccattgtggttccctgccgccaccaaga +tgaactgattggccgggcccgcgtctctccaggggcgggatggagcatccgggagactgc +ttgcctagcaaaatcatatgcgcaaatgtggcagctcctttatttccacagaagggacct +ccgactgatggccaatgccatttgttcatctgtgccagttgactgggttccaactgggag +aactacctggtcaatccatggaaagggagaatggatgaccactgaagacatgcttgtggt +gtggaacagagtgtggattgaggagaacgaccacatggaagacaagaccccagttgcgaa +atggacagacattccctatttgggaaaaagggaagacttgtggtgtggatctctcatagg +gcacagaccgcgcaccacctgggctgagaacattaaaaacacagtcaacatggtgcgcag +gatcataggtgatgaagaaaagtacatggactacctatccacccaagttcgctacttggg +cgaagaagggtctacacctggagtgctgtaagcaccaatcttaatgttgtcaggcctgct +agtcagccacagcttggggaaagctgtgcagcctgtgacccccccaggagaagctgggaa +accaagcctatagtcaggccgagaacgccatggcacggaagaagccatgctgcctgtgag +cccctcagaggacactgagtcaaaaaaccccacgcgcttggaggcgcaggatgggaaaag +aaggtggcgaccttccccacccttcaatctggggcctgaactggagat +>ZKC2/2016 +agttgttgatctgtgtgaatcagactgcgacagttcgagtttgaagcgaaagctagcaac +agtatcaacaggttttattttggatttggaaacgagagtttctggtcatgaaaaacccaa +aaaagaaatccggaggattccggattgtcaatatgctaaaacgcggagtagcccgtgtga +gcccctttgggggcttgaagaggctgccagccggacttctgctgggtcatgggcccatca +ggatggtcttggcgattctagccttcttgagattcacggcaatcaagccatcactgggtc +tcatcaatagatggggttcagtggggaaaaaagaggctatggaaataataaagaagttca +agaaagatctggctgccatgctgagaataatcaatgctaggaaggagaagaagagacgag +gcgcagatactaatgtcggaattgttggcctcctgctgaccacagctatggcagcggagg +tcactagacgtgggagtgcatactatatgtacttggacagaaacgatgctggggaggcca +tatcttttccaaccacattggggatgaataagtgttatatacagatcatggatcttggac +acatgtgtgatgccaccatgagctatgaatgccctatgctggatgagggggtggaaccag +atgacgtcgattgttggtgcaacacgacgtcaacttgggttgtgtacggaacctgccatc +acaaaaaaggtgaagcacggagatctagaagagctgtgacgctcccctcccattccacta +ggaagctgcaaacgcggtcgcaaacttggttggaatcaagagaatacacaaagcacttga +ttagagtcgaaaattggatattcaggaaccctggcttcgcgttagcagcagctgccatcg +cttggcttttgggaagctcaacgagccaaaaagtcatatacttggtcatgatactgctga +ttgccccggcatacagcatcaggtgcataggagtcagcaatagggactttgtggaaggta +tgtcaggtgggacttgggttgatgttgtcttggaacatggaggttgtgtcaccgtaatgg +cacaggacaaaccgactgtcgacatagagctggttacaacaacagtcagcaacatggcgg +aggtaagatcctactgctatgaggcatcaatatcggacatggcttcggacagccgctgcc +caacacaaggtgaagcctaccttgacaagcaatcagacactcaatatgtctgcaaaagaa +cgttagtggacagaggctggggaaatggatgtggactttttggcaaagggagcctggtga +catgcgctaagtttgcatgctccaagaaaatgaccgggaagagcatccagccagagaatc +tggagtaccggataatgctgtcagttcatggctcccagcacagtgggatgatcgttaatg +acacaggacatgaaactgatgagaatagagcgaaggttgagataacgcccaattcaccaa +gagccgaagccaccctggggggttttggaagcctaggacttgattgtgaaccgaggacag +gccttgacttttcagatttgtattacttgactatgaataacaagcactggttggttcaca +aggagtggttccacgacattccattaccttggcacgctggggcagacaccggaactccac +actggaacaacaaagaagcactggtagagttcaaggacgcacatgccaaaaggcaaactg +tcgtggttctagggagtcaagaaggagcagttcacacggcccttgctggagctctggagg +ctgagatggatggtgcaaagggaaggctgtcctctggccacttgaaatgtcgcctgaaaa +tggataaacttagattgaagggcgtgtcatactccttgtgtaccgcagcgttcacattca +ccaagatcccggctgaaacactgcacgggacagtcacagtggaggtacagtacgcaggga +cagatggaccttgcaaggttccagctcagatggcggtggacatgcaaactctgaccccag +ttgggaggctgataaccgctaaccccgtaatcactgaaagcactgagaactccaagatga +tgctggaacttgatccaccatttggggactcttacattgtcataggagtcggggagaaga +agatcacccaccactggcacaggagtggcagcaccattggaaaagcatttgaagccactg +tgagaggtgccaggagaatggcagtcttgggagacacagcctgggactttggatcagttg +gaggcgctctcaactcattgggcaagggcatccatcaaatttttggagcagctttcaaat +cattgtttggaggaatgtcctggttctcacaaattctcattggaacgttgctgatgtggt +tgggtctgaacacaaagaatggatctatttcccttatgtgcttggccttagggggagtgt +tgatcttcttatccacagccgtctctgctgatgtggggtgctcggtggacttctcaaaga +aggagacgagatgcggtacaggggtgttcgtctataacgacgttgaagcctggagggaca +ggtacaagtaccatcctgactccccccgtagattggcagcagcagtcaagcaagcctggg +aagatggtatctgtgggatctcctctgtttcaagaatggaaaacatcatgtggagatcag +tagaaggggagctcaacgcaatcctggaagagaatggagttcaactgacggtcgttgtgg +gatctgtaaaaaaccccatgtggagaggtccacagagattgcccgtgcctgtgaacgagc +tgccccacggctggaaggcttgggggaaatcgtacttcgtcagagcagcaaagacaaata +acagctttgtcgtggatggtgacacactgaaggaatgcccactcaaacatagagcatgga +acagctttcttgtggaggatcatgggttcggggtatttcacactagtgtctggctcaagg +ttagagaagattattcattagagtgtgatccagccgttattggaacagctgttaagggaa +aggaggctgtacacagtgatctaggctactggattgagagtgagaagaatgacacatgga +ggctgaagagggcccatctgatcgagatgaaaacatgtgaatggccaaagtcccacacat +tgtggacagatggaatagaagagagtgatctgatcatacccaagtctttagctgggccac +tcagccatcacaataccagagagggctacaggacccaaatgaaagggccatggcacagtg +aagagcttgaaattcggtttgaggaatgcccaggcaccaaggtccacgtggaggaaacat +gtggaacaagaggaccatctctgagatcaaccactgcaagcggaagggtgatcgaggaat +ggtgctgcagggagtgcacaatgcccccactgtcgttccaggctaaagatggctgttggt +atggaatggagataaggcccaggaaagaaccagaaagtaacttagtaaggtcaatggtga +ctgcaggatcaactgatcacatggatcacttctcccttggagtgcttgtgattctgctca +tggtgcaggaagggctgaagaagagaatgaccacaaagatcatcataagcacatcaatgg +cagtgctggtagctatgatcctgggaggattttcaatgagtgacctggctaagcttgcaa +ttttgatgggtgccaccttcgcggaaatgaacactggaggagatgtagctcatctggcgc +tgatagcggcattcaaagtcagaccagcgttgctggtatctttcatcttcagagctaatt +ggacaccccgtgaaagcatgctgctggccttggcctcgtgtcttttgcaaactgcgatct +ccgccttggaaggcgacctgatggttctcatcaatggttttgctttggcctggttggcaa +tacgagcgatggttgttccacgcactgataacatcaccttggcaatcctggctgctctga +caccactggcccggggcacactgcttgtggcgtggagagcaggccttgctacttgcgggg +ggtttatgctcctctctctgaagggaaaaggcagtgtgaagaagaacttaccatttgtca +tggccctgggactaaccgctgtgaggctggtcgaccccatcaacgtggtgggactgctgt +tgctcacaaggagtgggaagcggagctggccccctagcgaagtactcacagctgttggcc +tgatatgcgcattggctggagggttcgccaaggcagatatagagatggctgggcccatgg +ccgcggtcggtctgctaattgtcagttacgtggtctcaggaaagagtgtggacatgtaca +ttgaaagagcaggtgacatcacatgggaaaaagatgcggaagtcactggaaacagtcccc +ggcttgatgtggcgctagatgagagtggtgatttctccctggtggaggatgacggtcccc +ccatgagagagatcatactcaaggtggtcctgatgaccatctgtggcatgaacccaatag +ccataccctttgcagctggagcgtggtacgtatacgtgaagactggaaaaaggagtggag +ctctatgggatgtgcctgctcccaaggaagtaaaaaagggggagaccacagatggagtgt +acagagtgatgactcgtagactgctaggttcaacacaagttggagtgggagttatgcaag +agggggtctttcacaccatgtggcacgtcacaaaaggatccgcgctgagaagcggtgaag +ggagacttgatccatactggggagatgtcaagcaggatctggtgtcatactgtggtccat +ggaagctagatgccgcctgggacgggcacagcgaggtgcagctcttggccgtgccccccg +gagagagagcgaggaacatccagactctgcccggaatatttaagacaaaggatggggaca +ttggagcggttgcgctggattacccagcaggaacttcaggatctccaatcctagacaagt +gtgggagagtgataggactttatggcaatggggtcgtgatcaaaaatgggagttatgtta +gtgccatcacccaagggaggagggaggaagagactcctgttgagtgcttcgagccttcga +tgctgaagaagaagcagctaactgtcttagacttgcatcctggagctgggaaaaccagga +gagttcttcctgaaatagtccgtgaagccataaaaacaagactccgtactgtgatcttag +ctccaaccagggttgtcgctgccgaaatggaggaagcccttagagggcttccagtgcgtt +atatgacaacagcagtcaatgtcacccactctggaacagaaatcgtcgacttaatgtgcc +atgccaccttcacttcacgtctactacagccaatcagagtccccaactataatctgtata +ttatggatgaggcccacttcacagatccctcaagtatagcagcaagaggatacatttcaa +caagggttgagatgggcgaggcggctgccatcttcatgaccgccacgccaccaggaaccc +gtgacgcatttccggactccaactcaccaattatggacaccgaagtggaagtcccagaga +gagcctggagctcaggctttgattgggtgacggatcattctggaaaaacagtctggtttg +ttccaagcgtgaggaacggcaatgagatcgcagcttgtctgacaaaggctggaaaacggg +tcatacagctcagcagaaagacttttgagacagagttccagaaaacaaaacatcaagagt +gggactttgtcgtgacaactgacatttcagagatgggcgccaactttaaagctgaccgtg +tcatagattccaggagatgcctaaagccggtcatacttgatggcgagagagtcattctgg +ctggacccatgcctgtcacacatgccagcgctgcccagaggagggggcgcataggcagga +atcccaacaaacctggagatgagtatctgtatggaggtgggtgcgcagagactgacgaag +accatgcacactggcttgaagcaagaatgctccttgacaatatttacctccaagatggcc +tcatagcctcgctctatcgacctgaggccgacaaagtagcagccattgagggagagttca +agcttaggacggagcaaaggaagacctttgtggaactcatgaaaagaggagatcttcctg +tttggctggcctatcaggttgcatctgccggaataacctacacagatagaagatggtgct +ttgatggcacgaccaacaacaccataatggaagacagtgtgccggcagaggtgtggacca +gacacggagagaaaagagtgctcaaaccgaggtggatggacgccagagtttgttcagatc +acgcggccctgaagtcattcaaggagtttgccgctgggaaaagaggagcggcttttggag +tgatggaagccttgggaacactgccaggacacatgacagagagattccaggaagccattg +acaacctcgctgtgctcatgcgggcagagactggaagcaggccttacaaagccgcggcgg +cccaattgccggagaccctagagaccattatgcttttggggttgctgggaacagtctcgc +tgggaatctttttcgtcttgatgaggaacaagggcatagggaagatgggctttggaatgg +tgactcttggggccagcgcatggctcatgtggctctcggaaattgagccagccagaattg +catgtgtcctcattgttgtgttcctattgctggtggtgctcatacctgagccagaaaagc +aaagatctccccaggacaaccaaatggcaatcatcatcatggtagcagtaggtcttctgg +gcttgattaccgccaatgaactcggatggttggagagaacaaagagtgacctaagccatc +taatgggaaggagagaggagggggcaaccataggattctcaatggacattgacctgcggc +cagcctcagcttgggccatctacgctgccttgacaactttcattaccccagccgtccaac +atgcagtgaccacttcatacaacaactactccttaatggcgatggccacgcaagctggag +tgttgtttggtatgggcaaagggatgccattctacgcatgggactttggagtcccgctgc +taatgataggttgctactcacaattaacacccctgaccctaatagtagccatcattttgc +tcgtggcgcactacatgtacttgatcccagggctgcaggcagcagctgcgcgtgctgccc +agaagagaacggcagctggcatcatgaagaaccctgttgtggatggaatagtggtgactg +acattgacacaatgacaattgacccccaagtggagaaaaagatgggacaggtgctactca +tagcagtagccgtctccagcgccatactgtcgcggaccgcctgggggtggggggaggctg +gggccctgatcacagctgcaacttccactttgtgggaaggctctccgaacaagtactgga +actcctctacagccacttcactgtgtaacatttttaggggaagttacttggctggagctt +ctctaatctacacagtaacaagaaacgctggcttggtcaagagacgtgggggtggaacag +gagagaccctgggagagaaatggaaggcccgcttgaaccagatgtcggccctggagttct +actcctacaaaaagtcaggcatcaccgaggtgtgcagagaagaggcccgccgcgccctca +aggacggtgtggcaacgggaggccatgctgtgtcccgaggaagtgcaaagctgagatggt +tggtggagcggggatacctgcagccctatggaaaggtcattgatcttggatgtggcagag +ggggctggagttactacgccgccaccatccgcaaagttcaagaagtgaaaggatacacaa +aaggaggccctggtcatgaagaacccatgttggtgcaaagctatgggtggaacatagtcc +gtcttaagagtggggtggacgtctttcatatggcggctgagccgtgtgacacgttgctgt +gtgacataggtgagtcatcatctagtcctgaagtggaagaagcacggacgctcagagtcc +tttccatggtgggggattggcttgaaaaaagaccaggagccttttgtataaaagtgttgt +gcccatacaccagcactatgatggaaaccctggagcgactgcagcgtaggtatgggggag +gactggtcagagtgccactctcccgcaactctacacatgagatgtactgggtctctggag +cgaaaagcaacaccataaaaagtgtgtccaccacgagccagctcctcttggggcgcatgg +acgggcccaggaggccagtgaaatatgaggaggatgtgaatctcggctctggcacgcggg +ctgtggtaagctgcgctgaagctcccaacatgaagatcattggtaaccgcattgaaagga +tccgcagtgagcacgcggaaacgtggttctttgacgagaaccacccatataggacatggg +cttaccatggaagctatgaggcccccacacaagggtcagcgtcctctctaataaacgggg +ttgtcaggctcctgtcaaaaccctgggacgtggtgactggagtcacaggaatagccatga +ccgacaccacaccgtatggtcagcaaagagttttcaaggaaaaagtggacactagggtgc +cagatccccaagaaggcactcgtcaggttatgagcatggtctcttcctggttgtggaaag +agctaggcaaacacaaacggccacgagtctgtaccaaagaagagttcatcaacaaggttc +gtagcaatgcagcattaggggcaatatttgaagaggaaaaagagtggaagactgcagtgg +aagctgtgaacgatccaaggttctgggctctagtggacaaggaaagagagcaccacctga +gaggagagtgccagagttgtgtgtacaacatgatgggaaaaagagaaaagaaacaagggg +aatttggaaaggccaagggcagccgcgccatctggtatatgtggctaggggctagatttc +tagagttcgaagcccttggattcttgaacgaggatcactggatggggagagagaactcag +gaggtggtgttgaagggctgggattacaaagactcggatatgtcctagaagagatgagtc +gcataccaggaggaaggatgtatgcagatgacactgctggctgggacacccgcatcagca +ggtttgatctggagaatgaagctctaatcaccaaccaaatggagaaagggcacagggcct +tggcattggccataatcaagtacacataccaaaacaaagtggtaaaggtccttagaccag +ctgaaaaagggaagacagttatggacattatttcgagacaagaccaaagggggagcggac +aagttgtcacttacgctcttaacacatttaccaacctagtggtgcaactcattcggagta +tggaggctgaggaagttctagagatgcaagacttgtggctgctgcggaggtcagagaaag +tgaccaactggctgcagagcaacggatgggataggctcaaacgaatggcagtcagtggag +atgattgcgttgtgaggccaattgatgataggtttgcacatgccctcaggttcttgaatg +atatggggaaagttaggaaggacacacaagagtggaaaccctcaactggatgggacaact +gggaggaagttccgttttgctcccaccacttcaacaagctccatctcaaggacgggaggt +ccattgtggttccctgccgccaccaagatgaactgattggccgggcccgcgtctctccag +gggcgggatggagcatccgggagactgcttgcctagcaaaatcatatgcgcaaatgtggc +agctcctttatttccacagaagggacctccgactgatggccaatgccatttgttcatctg +tgccagttgactgggttccaactgggagaactacctggtcaatccatggaaagggagaat +ggatgaccactgaagacatgcttgtggtgtggaacagagtgtggattgaggagaacgacc +acatggaagacaagaccccagttacgaaatggacagacattccctatttgggaaaaaggg +aagacttgtggtgtggatctctcatagggcacagaccgcgcaccacctgggctgagaaca +ttaaaaacacagtcaacatggtgcgcaggatcataggtgatgaagaaaagtacatggact +acctatccacccaagttcgctacttgggtgaagaagggtctacacctggagtgctgtaag +caccaatcttagtgttgtcaggcctgctagtcagccacagcttggggaaagctgtgcagc +ctgtgacccccccaggagaagctgggaaaccaagcctatagtcaggccgagaacgccatg +gcacggaagaagccatgctgcctgtgagcccctcagaggacactgagtcaaaaaacccca +cgcgcttggaggcgcaggatgggaaaagaaggtggcgaccttccccaccctttaatctgg +ggcctgaactggagatcagctgtggatctccagaagagggactagtggttagaggagacc +ccccggaaaacgcaaaacagcatattgacgctgggaaagaccagagactccatgagtttc +caccacgctggccgccaggcacagatcgccgaatagcggcggccggtgtggggaaatcca +tgggtct +>VEN/UF_1/2016 +agttgttactgttgctgactcagactgcgacagttcgagtttgaagcgaaagctagcaac +agtatcaacaggttttattttggatttggaaacgagagtttctggtcatgaaaaacccaa +aaaagaaatccggaggattccggattgtcaatatgctaaaacgcggagtagcccgtgtga +gcccctttgggggcttgaagaggctgccagccggacttctgctgggtcatgggcccatca +ggatggtcttggcgattctagcctttttgagattcacggcaatcaagccatcactgggtc +tcatcaatagatggggttcagtggggaaaaaagatgctatggaaataataaagaagttca +agaaagatctggctgccatgctgagaataatcaatgctaggaaggagaagaagagacgag +gcgcagaaactagtgtcggaattgttggcctccttctgaccacagctatggcagcggagg +tcactagacgtgggagtgcatactatatgtacttggacagaaacgatgctggggaggcca +tatcttttccaaccacattggggatgaataagtgttatatacagatcatggatcttggac +acatgtgtgatgccaccatgagctatgaatgccctatgctggatgagggggtggaaccag +atgacgtcgattgttggtgcaacacgacgtcaacttgggttgtgtacggaacctgccatc +acaaaaaaggtgaagcacggagatctagaagagccgtgacgctcccctcccattccacta +ggaagctgcaaacgcggtcgcaaacctggttggaatcaagagaatacacaaagcacttga +ttagagtcgaaaattggatattcaggaaccctggtttcgctttagcagcagctgccatcg +cttggcttttgggaagctcaacgagccaaaaagtcatatacttggtcatgatactgctga +ttgccccggcatacagcatcaggtgcataggagtcagcaatagggactttgtggaaggta +tgtcaggtgggacttgggttgatgtcgtcttggaacatggaggttgtgtcaccgtaatgg +cacaggacaaaccgactgtcgacatagagctggttacaacaacagtcagcaacatggcgg +aggtaagatcctactgctatgaggcatcaatatcagacatggcttcggacagccgctgcc +caacacaaggtgaagcctaccttgacaagcaatcagacactcaatatgtttgcaaaagaa +cgttagtggacagaggctggggaaatggatgtggactttttggcaaagggagcctggtga +catgcgctaagtttgcatgctccaagaaaatgaccgggaagagcatccagccagagaatc +tggagtaccggataatgttgtcagttcatggctcccagcacagtgggatgattgttaatg +acacaggacatgaaactgatgagaatagagcgaaggttgagataacgcccaattcaccaa +gagccgaagccaccctggggggttttggaagcctaggacttgattgtgaaccgaggacag +gccttgacttttcagatttgtattacttgactatgaataacaagcactggttggttcaca +aggagtggttccacgacattccattaccttggcacgctggggcagacaccggaactccac +actggaacaacaaagaagcactggtagagttcaaggacgcacatgccaaaaggcaaactg +tcgtggttctagggagtcaagaaggagcagttcacacggcccttgctggagctctggagg +ctgagatggatggtgcaaagggaaggctgtcctctggccacttgaaatgtcgcctgaaaa +tggataaacttagattgaagggcgtgtcatactccttgtgtaccgcagcgttcacattca +ccaagatcccggctgaaacactgcacgggacagtcacagtggaggtacagtacgcaggga +cagatggaccttgcaaggttccagctcagatggcggtggacatgcaaactctgaccccag +ttgggaggttgataaccgctaaccccgtaatcactgaaagcactgagaactctaagatga +tgctggaacttgatccaccatttggggactcttacattgtcataggagtcggggagaaga +agatcacccaccactggcacaggagtggcagcaccattggaaaagcatttgaagccactg +tgagaggtgccaagagaatggcagtcttgggagacacagcctgggactttggatcagttg +gaggcgctctcaactcattgggcaagggcatccatcaaatttttggagcagctttcaaat +cattgtttggaggaatgtcctggttctcacaaattctcattggaacgttgctgatgtggt +tgggtctgaacacaaagaatggatctatttcccttatgtgcttggccttagggggagtgt +tgatcttcttatccacagccgtctctgctgatgtggggtgctcggtggacttctcaaaga +aggagacgagatgtggtacaggggtgttcgtctataacgacgttgaagcctggagggaca +ggtacaagtaccatcctgactccccccgtagattggcagcagcagtcaagcaagcctggg +aagatggtatctgcgggatctcctctgtttcaagaatggaaaacatcatgtggagatcag +tagaaggggagctcaacgcaatcctggaagagaatggagttcaactgacggtcgttgtgg +gatctgtaaaaaaccccatgtggagaggtccacagagattgcccgtgcctgtgaacgagc +tgccccacggctggaaggcttgggggaaatcgtacttcgtcagagcagcaaagacaaata +acagctttgtcgtggatggtgacacactgaaagaatgcccactcaaacatagagcatgga +acagctttcttgtggaggatcatgggttcggggtatttcacactagtgtctggctcaagg +ttagagaagattattcattagagtgtgatccagccgttattggaacagctgttaagggaa +aggaggctgtacacagtgatctaggctactggattgagagtgagaagaatgacacatgga +ggctgaagagggcccatctgatcgagatgaaaacatgtgaatggccaaagtcccacacat +tgtggacagatggaatagaagagagtgatctgatcatacccaagtctttagctgggccac +tcagccatcacaataccagagagggctacaggacccaaatgaaagggccatggcacagtg +aagagcttgaaattcggtttgaggaatgcccaggcactaaggtccacgtggaggaaacat +gtggaacaagaggaccatctctgagatcacccactgcaagcggaagggtgatcgaggaat +ggtgctgcagggagtgcacaatgcccccactgtcgttctgggctaaagatggctgttggt +atggaatggagataaggcccaggaaagaaccagaaagcaacttagtaaggtcaatggtga +ctgcaggatcaactgatcacatggatcacttctcccttggagtgcttgtgattctgctca +tggtgcaggaagggctgaagaagagaatgaccacaaagatcatcataagcacatcaatgg +cagtgctggtagctatgatcctgggaggattttcaatgagtgacctggctaagcttgcaa +tcttgatgggtgccaccttcgcggaaatgaacactggaggagatgtagctcatctggcgc +tgatagcggcattcaaagtcagaccagcgttgctggtatccttcatcttcagagctaatt +ggacaccccgtgaaagcatgctgctggccttggcctcgtgtcttttgcaaactgcgatct +ccgccttggagggcgacctgatggttctcatcaatggttttgctttggcctggttggcaa +tacgagcgatggttgttccacgcactgacaacatcaccttggcaatcctggctgctctga +caccactggcccggggcacactgcttgtggcgtggagagcaggccttgctacttgcgggg +ggtttatgctcctctctctgaagggaaaaggcagtgtgaagaagaacttaccatttgtca +tggccctgggactaaccgctgtgaggctggtcgaccccatcaacgtggtgggactgctgt +tgctcacaaggagtgggaagcggagctggccccctagcgaagtactcacagctgttggcc +tgatatgcgcattggctggagggttcgccaaggcagatatagagatggctgggcccatgg +ccgcggttggtctgctaattgtcagttacgtggtctcaggaaagagtgtggacatgtaca +ttgaaagagcaggtgacatcacatgggaaaaagatgcggaagtcactggaaacagtcccc +ggctcgatgtggcgctagatgagagtggtgatttctccctggtggaggatgacggtcccc +ccatgagagagatcatactcaaggtggtcctgatgaccatctgtggcatgaacccaatag +ccataccctttgcagctggagcgtggtacgtatacgtgaagactggaaaaaggagtggtg +cgctatgggatgtgcctgctcccaaggaagtaaaaaagggggagaccacagatggagtgt +acagagtaatgactcgtagactgctaggttcaacacaagttggagtgggagttatgcaag +agggggtctttcacactatgtggcacgtcacaaaaggatccgcgctgagaagcggtgaag +ggagacttgatccatactggggagatgtcaagcaggatctggtgtcatactgtggtccat +ggaagctagatgccgcctgggacgggcacagcgaggtgcagctcttggccgtgccccccg +gagagagagcgaggaacatccagactctgcccggaatatttaagacaaaggatggggaca +ttggagcggttgcgctggattacccagcaggaacttcaggatctccaatcctagacaagt +gtgggagagtgataggactttatggcaatggggtcgtgatcaaaaatgggagttatgtta +gtgccatcacccaagggaggagggaggaagagactcctgttgagtgcttcgagccttcga +tgctgaagaagaagcagctaactgtcttagacttgcatcctggagctgggaaaaccagga +gagttcttcctgaaatagtccgtgaagccataaaaacaagactccgtactgtgatcttag +ctccaaccagggttgtcgctgctgaaatggaggaagcccttagagggcttccagtgcgtt +atatgacaacagcagtcaatgtcacccactctggaacagaaatcgtcgacttaatgtgcc +atgccaccttcacttcacgtctactacagccaatcagagtccccaactataatctgtata +ttatggatgaggcccacttcacagatccctcaagtatagcagcaagaggatacatttcaa +caagggttgagatgggcgaggcggctgccatcttcatgaccgccacgccaccaggaaccc +gtgacgcatttccggactccaactcaccaattatggacaccgaagtggaagtcccagaga +gagcctggagctcaggctttgattgggtgacggatcattctggaaaaacagtttggtttg +ttccaagcgtgaggaacggcaatgagatcgcagcttgtctgacaaaggctggaaaacggg +tcatacagctcagcagaaagacttttgagacagagttccagaaaacaaaacatcaagagt +gggactttgtcgtgacaactgacatttcagagatgggcgccaactttaaagctgaccgtg +tcatagattccaggagatgcctaaagccggtcatacttgatggcgagagagtcattctgg +ctggacccatgcctgtcacacatgccagcgctgcccagaggagggggcgcataggcagga +atcccaataaacctggagatgagtatctgtatggaggtgggtgcgcagagactgacgaag +accatgcacactggcttgaagcaagaatgctccttgacaatatttacctccaagatggcc +tcatagcctcgctctatcgacctgaggccgacaaagtagcagccattgagggagagttca +agcttaggacggagcaaaggaagacctttgtggaactcatgaaaagaggagatcttcctg +tttggctggcctatcaggttgcatctgccggaataacctacacagatagaagatggtgct +ttgatggcacgaccaacaacaccataatggaagacagtgtgccggcagaggtgtggacca +gacacggagagaaaagagtgctcaaaccgaggtggatggacgccagagtttgttcagatc +atgcggccctgaagtcattcaaggagtttgccgctgggaaaagaggagcggcttttggag +tgatggaagccctgggaacactgccaggacacatgacagagagattccaggaagccattg +acaacctcgctgtgctcatgcgggcagagactggaagcaggccttacaaagccgcggcgg +cccaattgccggagaccctagagaccattatgcttttggggttgctgggaacagtctcgt +tgggaatctttttcgtcttgatgaggaacaagggcatagggaagatgggctttggaatgg +tgactcttggggccagcgcatggctcatgtggctctcggaaattgagccagccagaattg +catgtgtcctcattgttgtgttcctattgctggtggtgctcatacctgagccagaaaagc +aaagatctccccaggacaaccaaatggcaatcatcatcatggtagcagtaggtcttctgg +gcttgattaccgccaatgaactcggatggttggagagaacaaagagtgacctaagccatc +taatgggaaggagagaggagggggcaaccataggattctcaatggacattgacctgcggc +cagcctcagcttgggccatctatgctgccttgacaactttcattaccccagccgtccaac +atgcagtgaccacttcatacaacaactactccttaatggcgatggccacgcaagctggag +tgttgtttggtatgggcaaagggatgccattctacgcatgggactttggagtcccgctgc +taatgataggttgctactcacaattaacacccctgaccctaatagtggccatcattttgc +tcgtggcgcactacatgtacttgatcccagggctgcaggcagcagctgcgcgtgctgccc +agaagagaacggcagctggcatcatgaagaaccctgttgtggatggaatagtggtgactg +acattgacacaatgacaattgacccccaagtggagaaaaagatgggacaggtgctactca +tagcagtagccgtctccagcgccatactgtcgcggaccgcctgggggtggggggaggctg +gggccctgatcacagccgcaacttccactttgtgggaaggctctccgaacaagtactgga +actcctctacagccacttcactgtgtaacatttttaggggaagttacttggctggagctt +ctctaatctacacagtaacaagaaacgctggcttggtcaagagacgtgggggtggaacag +gagagaccctgggagagaaatggaaggcccgcttgaaccagatgtcggccctggagttct +actcctacaaaaagtcaggcatcaccgaggtgtgcagagaagaggcccgccgcgccctca +aggacggtgtggcaacgggaggccatgctgtgtcccgaggaagtgcaaagctgagatggt +tggtggagcggggatacctgcagccctatggaaaggtcattgatcttggatgtggcagag +ggggctggagttactacgccgccaccatccgcaaagttcaagaagtgaaaggatacacaa +aaggaggccctggtcatgaagaacccgtgttggtgcaaagctatgggtggaatatagtcc +gtcttaagagtggggtggacgtctttcatatggcggctgagccgtgtgacacgttgctgt +gtgacataggtgagtcatcatctagtcctgaagtggaagaagcacggacgctcagagtcc +tctccatggtgggggattggcttgaaaaaagaccaggagccttttgtataaaagtgttgt +gcccatacaccagcactatgatggaaaccctggagcgactgcagcgtaggtatgggggag +gactggtcagagtgccactctcccgcaactctacacatgagatgtactgggtctctggag +cgaaaagcaacaccataaaaagtgtgtccaccacgagccagctcctcttggggcgcatgg +acgggcctaggaggccagtgaaatatgaggaggatgtgaatctcggctctggcacgcggg +ctgtggtaagctgcgctgaagctcccaacatgaagatcattggtaaccgcattgaaagga +tccgcagtgagcacgcggaaacgtggttctttgacgagaaccacccatataggacatggg +cttaccatggaagctatgaggcccccacacaagggtcagcgtcctctctaataaacgggg +ttgtcaggctcctgtcaaaaccctgggatgtggtgactggagtcacaggaatagccatga +ccgacaccacaccgtatggtcagcaaagagttttcaaggaaaaagtggacactagggtgc +cagacccccaagaaggcactcgtcaggttatgagcatggtctcttcctggttgtggaaag +agctaggcaaacacaaacggccacgagtctgtaccaaagaagagttcatcaacaaggtgc +gtagcaatgcagcattaggggcaatatttgaagaggaaaaagagtggaagactgcagtgg +aagctgtgaacgatccaaggttctgggctctagtggacaaggaaagagagcaccacctga +gaggagagtgccagagttgtgtgtacaacatgatgggaaaaagagaaaagaaacaagggg +aatttggaaaggccaagggcagccgcgccatctggtatatgtggctaggggctagatttc +tagagttcgaagcccttggattcttgaacgaggatcactggatggggagagagaactcag +gaggtggtgttgaagggctgggattacaaagactcggatatgtcctagaagagatgagtc +gcataccaggaggaaggatgtatgcagatgacactgctggctgggacacccgcattagca +ggtttgatctggagaatgaagctctaatcaccaaccaaatggagaaagggcacagggcct +tggcattggccataatcaagtacacataccaaaacaaagtggtaaaggtccttagaccag +ctgaaaaagggaaaacagttatggacattatttcgagacaagaccaaagggggagcggac +aagttgtcacttacgctcttaacacatttaccaacctagtggtgcaactcattcggaata +tggaggctgaggaagttctagagatgcaagacttgtggctgctgcggaggtcagagaaag +tgaccaactggttgcagagcaacggatgggataggctcaaacgaatggcagtcagtggag +atgattgcgttgtgaagccaattgatgataggtttgcacatgccctcaggttcttgaatg +atatgggaaaagttaggaaggacacacaagagtggaaaccctcaactggatgggacaact +gggaagaagttccgttttgctcccaccacttcaacaagctccatctcaaggacgggaggt +ccattgtggttccctgccgccaccaagatgaactgattggccgggcccgcgtctctccag +gggcgggatggagcatccgggagactgcttgcctagcaaaatcatatgcgcaaatgtggc +agctcctttatttccacagaagggacctccgactgatggccaatgccatttgttcatctg +tgccagttgactgggttccaactgggagaactacctggtcaatccatggaaagggagaat +ggatgaccactgaagacatgcttgtggtgtggaacagagtgtggattgaggagaacgacc +acatggaagacaagaccccagttacgaaatggacagacattccctatttgggaaaaaggg +aagacttgtggtgtggatctctcatagggcacagaccgcgcaccacctgggctgagaaca +ttaaaaacacagtcaacatggtgcgcaggatcataggtgatgaagaaaagtacatggact +acctatccacccaagttcgctacttgggtgaagaagggtctacacctggagtgctgtaag +caccaatcttaatgttgtcaggcctgctagtcagccacagcttggggaaagctgtgcagc +ctgtgacccccccaggagaagctgggaaaccaagcctatagtcaggccgagaacgccatg +gcacggaagaagccatgctgcctgtgagcccctcagaggacactgagtcaaaaaacccca +cgcgcttggaggcgcaggatgggaaaagaaggtggcgaccttccccacccttcaatctgg +ggcctgaactggagatcagctgtggatctccagaagagggactagtggttagaggagacc +ccccggaaaacgcaaaacagcatattgacgctgggaaagaccagagactccatgagtttc +caccacgctggccgccaggcacagatcgccgaatagcggcggccggtgtggggaaatcca +tgggtctt +>DOM/2016/BB_0059 +tggctgccatgctgagaataatcaatgctaggaaggagaagaagagacgaggcgcagata +ctagtgtcggaattgttggcctcctgctgaccacagctatggcagcggaggtcactagac +gtgggagtgcatactacatgtacttggacagaaacgatgctggggaggccatatctttcc +caaccacattggggatgaataagtgttatatacagatcatggatcttggacacatgtgtg +atgccaccatgagctatgaatgccctatgctggatgagggggtggaaccagatgacgtcg +attgttggtgcaacacgacgtcaacttgggttgtgtacggaacctgccatcacaaaaaag +gtgaagcacggagatctagaagagctgtgacgctcccctcccattccactaggaagctgc +aaacgcggtcgcaaacctggttggaatcaagagaatacacaaagcacttgattagagtcg +aaaattggatattcaggaaccctggcttcgcgttagcagcagctgccatcgcttggcttt +tgggaagctcaacgagccaaaaagtcatatacttggtcatgatactgctgattgccccgg +catacagcatcaggtgcataggagtcagcaatagggactttgtggaaggtatgtcaggtg +ggacttgggttgatgttgtcttggaacatggaggttgtgtcaccgtaatggcacaggaca +aaccgactgtcgacatagagctggttacaacaacagtcagcaacatggcggaggtaagat +cctactgctatgaggcatcaatatcagacatggcttcggacagccgctgcccaacacaag +gtkaagcctaccttgacaagcaatcagacactcaatatgtctgcaaaagaacgttagtgg +acagaggctggggaaatggatgtggactttttggcaaagggagcctggtgacatgcgcta +agtttgcatgctccaagaaaatgaccgggaagagcatccagccagagaatctggagtacc +ggataatgctgtcagttcatggctcccagcacagtgggatgatcgttaatgacacaggac +atgaaactgatgagaatagagcgaaggttgagataacgcccaattcaccaagagccgaag +ccaccctggggggttttggaagcctaggactggattgtgaaccgaggacaggccttgact +tttcagatttgtattacttgactatgaataacaagcactggttggttcacaaggagtggt +tccacgacattccattaccttggcacgctggggcagacaccggaactccacactggaaca +acaaagaagcactggtagagttcaaggacgcacatgccaaaaggcaaactgtcgtggttc +tagggagtcaagaaggagcagttcacacggcccttgctggagctctggaggctgagatgg +atggtgcaaagggaaggctgtcctctggccacttgaaatgtcgcctgaaaatggataaac +ttagattgaagggcgtgtcatactccttgtgtaccgcagcgttcacattcaccaagatcc +cggctgaaacactgcacgggacagtcacagtggaggtacagtacgcagggacagatggac +cttgcaaggttccagctcagatggcggtggacatgcaaactctgaccccagttgggaggt +tgataaccgccaaccccgtaatcactgaaagcactgagaactctaagatgatgctggaac +ttgatccaccatttggggactcttacattgtcataggagtcggggagaagaagatcaccc +accactggcacaggagtggcagcaccattggaaaagcatttgaagccactgtgagaggtg +ccaagagaatggcagtcttgggagatacagcctgggactttggatcagttggaggcgctc +tcaactcattgggcaagggcatccatcaaatttttggagcagctttcaaatcattgtttg +gaggaatgtcctggttctcacaaatcctcattggaacgttgctgatgtggttgggtctga +acacaaagaatggatctatttccctcatgtgcttggccttagggggagtgttgatcttct +tatccacagccgtctctgctgatgtggggtgctcggtggacttctcaaagaaggagacga +gatgcggtacaggggtgttcgtctataacgacgttgaagcctggagggacaggtacaagt +accatcctgactccccccgtagattggcagcagcagtcaagcaagcctgggaagatggta +tctgcgggatctcctctgtttcaagaatggaaaacatcatgtggagatcagtagaagggg +agctcaatgcaatcctggaagagaatggagttcaactgacggtcgttgtgggatctgtaa +aaaaccccatgtggagaggtccacagagattgcccgtgcctgtgaacgagctgccccacg +gctggaaggcttgggggaaatcgtacttcgttagagcagcaaagacaaataacagctttg +tcgtgnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn +nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn +nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnntggaacagctgttaagggaaaggaggctg +tacacagtgatctaggctactggattgagagtgagaagaatgacacatggaggctgaaga +gggcccatctgatcgagatgaaaacatgtgaatggccaaagtcccacacattgtggacag +atggaatagaagagagtgatctgatcatacccaagtctttagctgggccactcagccatc +acaataccagagagggctacaggacccaaatgaaagggccatggcacagtgaagagcttg +aaattcggtttgaggaatgcccaggcactaaggtccacgtggaggaaacatgtggaacaa +gaggaccatctctgagatcaaccactgcaagcggaagggtgatcgaggaatggtgctgca +gggagtgcacaatgcccccactgtcgttccgggctaaagatggctgttggtatggaatgg +agataaggcccaggaaagaaccagaaagcaacttagtaaggtcagtggtgactgcaggat +caactgatcacatggatcacttctcccttggagtgcttgtgattctgctcatggtgcagg +aagggctgaagaagagaatgaccacaaagatcatcataagcacatcaatggcagtgctgg +tagctatgatcctgggaggattttcaatgagtgacctggctaagcttgcaattttgatgg +gtgccaccttcgcggaaatgaacactggaggagatgtagctcatctggcgctgatagcgg +cattcaaagtcagaccagcgttgctggtatctttcatcttcagagctaattggacacccc +gtgaaagcatgctgctggccttggcctcgtgtcttttgcaaactgcgatctccgccttgg +aaggcgacctgatggttctcatcaatggttttgctttggcctggttggcaatacgagcga +tggttgttccacgcactgataacatcaccttggcaatcctggctgctctgacaccactgg +cccggggtacactgcttgtggcgtggagagcaggccttgctacttgcggggggtttatgc +tcctctctctgaagggaaaaggcagtgtgaagaagaacttaccatttgtcatggccctgg +gactaaccgctgtgaggctggtcgaccccatcaacgtggtgggactgctgttgctcacaa +ggagtgggaagcggagctggccccctagcgaagtactcacagctgttggcctgatatgcg +cattggctggagggttcgccaaggcagatatagagatggctgggcccatggccgcggtcg +gtctgctaattgtcagttacgtggtctcaggaaagagtgtggacatgtacattgaaagag +caggtgacatcacatgggaaaaagatgcggaagtcactggaaacagtccccggctcgatg +tggcgctagatgagagtggtgatttctccctggtggaggatgacggtccccccatgagag +agatcatactcaaggtggtcctgatgaccatctgtggcatgaacccaatagccataccct +ttgcagctggagcgtggtacgtatacgtgaagactggaaaaaggagtggtgctctatggg +atgtgcctgctcccaaggaagtaaaaaagggggagaccacagatggagtgtacagagtaa +tgactcgtagactgctaggttcaacacaagttggagtgggagttatgcaagagggggtct +ttcacactatgtggcatgtcacaaaaggatccgcgctgagaagcggtgaagggagacttg +atccatactggggagatgtcaagcaggatctggtgtcatactgtggtccatggaagctag +atgccgcctgggacgggcacagcgaggtgcagctcttggccgtgccccccggagagagag +cgaggaacatccagactctgcccggaatatttaagacaaaggannnnnnnnnnnnnnnnn +nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn +nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn +nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnncttcgagccttcgatgctgaaga +agaagcagctaactgtcttagacttgcatcctggagctgggaaaaccaggagagttcttc +ctgaaatagtccgtgaagctataaaaacaagactccgtactgtgatcttagctccaacca +gggttgtcgctgctgaaatggaggaagcccttagagggcttccagtgcgttatatgacaa +cagcagtcaatgtcacccattctggaacagaaatcgtcgacttaatgtgccatgccacct +tcacttcacgtctactacagccaatcagagtccccaactataatctgtatattatggatg +aggcccacttcacagatccctcaagtatagcagcaagaggatacatttcaacaagggttg +agatgggcgaggcggctgccatcttcatgaccgccacgccaccaggaacccgtgacgcat +ttccggnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn +nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn +nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnncgggtcatacagc +tcagcagaaagacttttgagacagagttccagaaaacaaaacatcaagagtgggactttg +tcgtgacaaccgacatttcagagatgggcgccaactttaaagctgaccgtgtcatagatt +ccaggagatgcctaaagccggtcatacttgatggcgagagagtcattctggctggaccca +tgcctgtcacacatgccagcgctgcccagaggagggggcgcataggcaggaatcccaaca +aacctggagatgagtatctgtatggaggtgggtgcgcagagactgacgaagaccatgcac +actggcttgarkcaagaatgctccttgacaatatttgcctccaagatggcctcatagcct +cgctctatcgacctgaggccgacaaagtagcagccattgagggagagttcaagcttagga +cggagcaaaggaagacctttgtggaactcatgaaaagaggagatcttcctgtttggctgg +cctatcaggttgcatctgccggaataacttacacagatagaagatggtgctttgatggca +cgaccaacaacaccataatggaagacagtgtgccggcagaggtgtggaccagacacggag +agaaaagagtgctcaaaccgaggtggatggacgccagagtttgttcagatcatgcggccc +tgaagtcattcaaggagtttgccgctgggaaaagaggagcggcttttggagtgatggaag +ccctgggaacactgccaggacacatgacagagagattccaggaagccattgacaacctcg +ctgtgctcatgcgggcagagactggaagcaggccttacaaagccgcggcggcccaattgc +cggagaccctagagaccattatgcttttggggttgctgggaacagtctcgctgggaatct +ttttcgtcttgatgaggaacaagggcatagggaagatgggctttggaatggtgactcttg +gggccagcgcatggctcatgtggctctcggaaattgagccagccagaattgcatgtgtcc +tcattgttgtgttcctattgctggtggtgctcatacctgagccagaaaagcaaagatctc +cccaggacaaccaaatggcaatcatcatcatggtagcagtaggtcttctgggcttgatca +ccgccaatgaactcggatggttggagagaacaaagagtgacctaagccatctaatgggaa +ggagagaggagggagcaaccataggattctcaatggacattgacctgcggccagcctcag +cttgggccatctatgctgccttgacaactttcattaccccagccgtccaacatgcagtga +ccacttcatacaacaactactccttaatggcgatggccacgcaagctggagtgttgtttg +gtatgggcaaagggatgccattctacgcatgggactttggagtcccgctgctaatgatag +gttgctactcacaattaacacccctgaccctaatagtggccatcattttgctcgtggcgc +actacatgtacttgatcccagggctgcaggcagcagctgcgcgtgctgcccagaagagaa +crgcagctggcatcatgaagaaccctgttgtggatggaatagtggtgactgacattgaca +caatgacaattgacccccaagtggagaaaaagatgggacaggtgctactcatagcagtag +ccgtctccagcgccatactgtcgcggaccgcctgggggtggggggaggctggggccctga +tcacagccgcaacttccactttgtgggaaggctctccgaacaagtactggaactcctcta +cagccacttcactgtgtaacatttttaggggaagttacttggctggagcttctctaatct +acacagtaacaagaaacgctggcttggtcaagagacgtgggggtggaacaggagagaccc +tgggagagaaatggaaggcccgcttgaaccagatgtcggccctggagttctactcctaca +aaaagtcaggcatcaccgaggtgtgcagagaagaggcccgccgcgccctcaaggacggtg +tggcaacgggaggccatgctgtgtcccgaggaagtgcaaagctgagatggttggtggagc +ggggatacctgcagccctatggaaaggtcattgatcttggatgtggcagagggggctgga +gttactacgccgccaccatccgcaaagttcaagaagtgaaaggatacacaaaaggaggcc +ctggtcatgaagaacccgtgttggtgcaaagctatgggtggaacatagtccgtctcaaga +gtggggtggacgtctttcatatggcggctgagccgtgtgacacgttgctgtgtgacatag +gtgagtcatcatctagtcctgaagtggaagaagcacggacgctcagagtcctctccatgg +tgggggattggcttgaaaaaagaccaggagccttttgtataaaagtgttgtgcccataca +ccagcactatgatggaaaccctggagcgactgcagcgtaggtatgggggaggactggtca +gagtgccactctcccgcaactctacacatgagatgtactgggtctctggagcgaaaagca +acaccataaaaagtgtgtccaccacgagccagctcctcttggggcgcatggacgggccta +ggaggccagtgaaatatgaggaggatgtgaatctcggctctggcacgcgggctgtggtaa +gctgcgctgaagctcccaacatgaagatcattggtaaccgcattgaaaggatccgcagtg +agcacgcggaaacgtggttctttgacgagaaccacccatataggacatgggcttaccatg +gaagctatgaggcccccacacaagggtcagcatcctctctagtaaacggggttgtcaggc +tcctgtcaaaaccctgggatgtggtgactggagtcacaggaatagccatgaccgacacca +caccgtatggtcagcaaagagttttcaaggaaaaagtggacactagggtgccagaccccc +aagaaggcactcgtcaggttatgagcatggtctcttcctggttgtggaaagagctaggca +aacacaaacggccacgagtctgtaccaaagaagagttcatcaacaaggttcgtagcaatg +cagcattaggggcaatatttgaagaggaaaaagagtggaagactgcagtggaagctgtga +acgatccaaggttctgggctctagtggacaasgaaagagagcaccacctgagaggagagt +gccagagttgtgtgtacaacatgatgggaaaaagagaaaagaaacaaggggaatttggaa +aggccaagggcagccgcgccatctggtatatgtggctaggggctagatttctagagttcg +aagcccttggattcttgaacgaggatcactggwtggggagagagaactcaggaggtggtg +ttgaagggctgggattannnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn +nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn +nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnagggcacagggccttggcattgg +ccataatcaagtacacataccaaaacaaagtggtaaaggtccttagaccagctgaaaaag +ggaaaacagttatggacattatttcgagacaagaccaaagggggagcggacaagttgtca +cttacgctcttaacacatttaccaacctagtggtgcaactcattcggaatatggaggctg +aggaagttctagagatgcaagacttgtggctgctgcggaggtcagagaaagtgaccaact +ggttgcagagcaacggatgggataggctcaaacgaatggcagtcagtggagatgattgcg +ttgtgaagccaattgatgataggtttgcacatgccctcaggttcttgaatgatatgggaa +aagttaggaaggacacacaagagtggaaaccctcaactggatgggacaactgggaagaag +ttccgttttgctcccaccacttcaacaagctccatctcaaggacgggaggtccattgtgg +ttccctgccgccaccaagatgaactgattggccgggcccgcgtctctccaggggcgggat +ggagcatccgggagactgcttgcctagcaaaatcatatgcgcaaatgtggcagctccttt +atttccacagaagggacctccgactgatggccaatgccatttgttcatctgtgccagttg +actgggttccaactgggagaactacctggtcaatccatggaaagggagaatggatgacca +ttgaagacatgcttgtggtgtggaacagagtgtggattgaggagaacgaccacatggaag +acaagaccccagttacgaaatggacagacattccctatttgggaaaaagggaagacttgt +ggtgtggatctctcatagggcacagaccgcgcaccacctgggctgagaacattaaaaaca +cagtcaacatggtgcgcaggatcataggtgaggaagaaaagtacatggactacctatcca +cccaagttcgctacttgggtgaagaagggtctacacctggagtgctgtaagcaccaatct +taatgttgtcaggcc +>BRA/2016/FC_6706 +agtttgaagcgaaagctagcaacagtatcaacaggttttatttyggatttggaaacgaga +gtttctggtcatgaaaaacccaaaaaagaaatccggaggattccggattgtcaatatgct +aaaacgcggagtagcccgtgtgagcccctttgggggcttgaagaggctgccagccggact +tctgctgggtcatgggcccatcaggatggtcttggcgattctagcctttttgagattcac +ggcaatcaagccatcactgggtctcatcaatagatggggttcagtggggaaaaaagaggc +tatggaaataataaagaagttcaagaaagatctggctgccatgctgagaataatcaatgc +taggaaggagaagaagagacgaggcgcagatactagtgtcggaattgttggcctcctgct +gaccacagctatggcagcggaggtcactagacgtgggagtgcatactatatgtacttgga +cagaaacgatgctggggaggccatatcttttccaaccacattggggatgaataagtgtta +tatacagatcatggatcttggacacatgtgtgatgccaccatgagctatgaatgccctat +gctggatgagggggtggaaccagatgacgtcgattgttggtgcaacacgacgtcaacttg +ggttgtgtacggaacctgccatcacaaaaaaggtgaagcacggagatctagaagagctgt +gacgctcccctcccattccactaggaagctgcaaacgcggtcgcaaacctggttggaatc +aagagaatacacaaagcacttgattagagtcgaaaattggatattcaggaaccctggctt +cgcgttagcagcagccgccatcgcttggcttttgggaagctcaacgagccaaaaagtcat +atacttggtcatgatactgctgattgccccggcatacagcatcaggtgcataggagtcag +caatagggactttgtggaaggtatgtcaggtgggacttgggttgatgttgtcttggaaca +tggaggttgtgtcaccgtaatggcacaggacaaaccgactgtcgacatagagctggttac +aacaacagtcagcaacatggcggaggtaagatcctactgctatgaggcatcaatatcaga +catggcttcggacagccgctgcccaacacaaggtgaagcctaccttgacaagcaatcaga +cactcaatatgtctgyaaaagaacgttagtggacagaggctggggaaatggatgtggact +ttttggcaaagggagcctggtgacatgcgctaagtttgcatgctccaagaaaatgaccgg +gaagagcatccagccagagaatctggagtaccggataatgctgtcagttcatggctccca +gcacagtgggatgatcgttaatgacacaggacatgaaactgatgagaatagagcgaaggt +tgagataacgcccaattcaccaagagccgaagccaccctggggggttttggaagcctagg +acttgattgtgaaccgaggacaggccttgacttttcagatttgtattacttgactatgaa +taacaagcactggttggttcacaaggagtggttccacgacattccattaccttggcacgc +tggggcagacaccggaactccacactggaacaacaaagaagcactggtagagttcaagga +cgcacatgccaaaaggcaaactgtcgtggttctagggagtcaagaaggagcagttcacac +ggcccttgctggagctctggaggctgagatggatggtgcaaagggaaggctgtcctctgg +ccacttgaaatgtcgcctgaaaatggataaacttagattgaagggcgtgtcatactcctt +gtgtaccgcagcgttcacattcaccaagatcccggctgaaacactgcacgggacagtcac +agtggaggtacagtacgcagggacagatggaccttgcaaggttccagctcagatggcggt +ggacatgcaaactctgaccccagttgggaggttgataaccgctaaccccgtaatcactga +aagcactgagaactctaagatgatgctggaacttgatccaccatttggggactcttacat +tgtcataggagtcggggagaagaagatcacccaccactggcacaggagtggcagcaccat +tggaaaagcatttgaagccactgtgagaggtgccaagagaatggcagtcttgggagacac +agcctgggactttggatcagttggaggcgctctcaactcattgggcaagggcatccatca +aatttttggagcagctttcaaatcattgtttggaggaatgtcctggttctcacaaattct +cattggaacgttgctgatgtggttgggtctgaacacaaagaatggatctatttcccttat +gtgcttggccttagggggagtgttgatcttcttatccacagccgtctctgctgatgtggg +gtgctcggtggacttctcaaagaaggagacgagatgtggtacaggggtgttcgtctataa +cgacgttgaagcctggagggacaggtacaagtaccatcctgactccccccgtagattggc +agcagcagtcaagcaagcctgggaagatggtatctgcgggatctcttctgtttcaagaat +ggaaaacatcatgtggagatcagtagaaggggagctcaacgcaatcctggaagagaatgg +agttcaactgacggtcgttgtgggatctgtaaaaaaccccatgtggagaggtccacagag +attgcccgtgcctgtgaacgagctgccccacggctggaaggcttgggggaaatcgtactt +cgtcagagcagcaaagacaaataacagctttgtcgtggatggtgacacactgaaggaatg +cccactcaaacatagagcatggaacagctttctwgtggaggatcatgggttcggggtatt +tcacactagtgtctggctcaaggttagagaagattattcattagagtgtgatccagccgt +tattggaacagctgttaagggaaaggaggctgtacacagtgatctaggctactggattga +gagtgagaagaatgacacatggaggctgaagagggcccatctgatcgagatgaaaacatg +tgaatggccaaagtcccacacattgtggacagatggaatagaagagagtgatctgatcat +acccaagtctttagctgggccactcagccatcacaataccagagagggctacaggaccca +aatgaaagggccatggcacagtgaagagcttgaaattcggtttgaggaatgcccaggcac +taaggtccacgtggaggaaacatgtggaacaagaggaccatctctgagatcaaccactgc +aagcggaagggtgatcgaggaatggtgctgcagggagtgcacaatgcccccactgtcgtt +ccgggctaaagatggctgttggtatggaatggagataaggcccaggaaagaaccagaaag +caacttagtaaggtcaatggtgactgcaggatcaactgatcacatggatcacttctccct +tggagtgcttgtgattctgctcatggtgcaggaagggctgaagaagagaatgaccacaaa +gatcatcataagcacatcaatggcagtgctggtagctatgatcctgggaggattttcaat +gagtgacctggctaagcttgcaattttgatgggtgccaccttcgcggaaatgaacactgg +aggagatgtagctcatctggcgctgatagcggcattcaaagtcagaccagcgttgctggt +atctttcatcttcagagctaattggacaccccgtgaaagcatgctgctggccttggcctc +gtgtcttttgcaaactgcgatctccgccttggaaggcgacctgatggttctcatcaatgg +ttttgctttggcctggttggcaatacgagcgatggttgttccacgcactgacaacatcac +cttggcaatcctggctgctctgacaccactggcccggggcacactgcttgtggcgtggag +agcaggccttgctacttgcggggggtttatgctcctctctctgaagggaaaaggcagtgt +gaagaagaacttaccatttgtcatggccctgggactaaccgctgtgaggctggtcgaccc +catcaacgtggtgggactgctgttgctcacaaggagtgggaagcggagctggccccctag +cgaagtactcacagctgttggcctgatatgcgcattggctggagggttcgccaaggcaga +tatagagatggctgggcccatggccgcggtcggtctgctaattgtcagttacgtggtctc +gggaaagagtgtggacatgtacattgaaagagcaggtgacatcacatgggaaaaagatgc +ggaagtcactggaaacagtccccggctcgatgtggcgctagatgagagtggtgatttctc +cctggtggaggatgacggtccccccatgagagagatcatactcaaggtggtcctgatgac +catctgtggcatgaacccaatagccataccctttgcagctggagcgtggtacgtatacgt +gaagactggaaaaaggagtggtgctctatgggatgtgcctgctcccaaggaagtaaaaaa +gggggagaccacagatggagtgtacagagtaatgactcgtagactgctaggttcaacaca +agttggagtgggagttatgcaagagggggtctttcacactatgtggcacgtcacaaaagg +atccgcgctgagaagcggtgaagggagacttgatccatactggggagatgtcaagcagga +tctggtgtcatactgtggtccatggaagctagatgccgcctgggacgggcacagcgaggt +gcagctcttggccgtgccccccggagagagagcgaggaacatccagactctgcccggaat +atttaagacaaaggatggggannnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn +nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn +nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn +nnnnnnnnnnnnnnagccttcgatgctgaagaagaagcagctaactgtcttagacttgca +tcctggagctgggaaaaccaggagagttcttcctgaaatagtccgtgaagccataaaaac +aagactccgtactgtgatcttagctccaaccagggttgtcgctgctgaaatggaggaagc +ccttagagggcttccagtgcgttatatgacaacagcagtcaatgtcacccactctggaac +agaaatcgtcgacttaatgtgcyatgccaccttcacttcacgtctactacagccaatcag +agtccccaactataatctgtatattatggatgaggcccacttcacagatccctcaagtat +agcagcaagaggatacatttcaacaagggttgagatgggcgaggcrgctgccatcttcat +gaccgccacgccaccaggaacccgtgacgcatttccggactccaactcaccaattatgga +caccgaagtggaagtcccagagagagcctggagctcaggctttgattgggtgacggatca +ttctggaaaaacagtttggtttgttccaagcgtgaggaacggcaatgagatcgcagcttg +tctgacaaaggctggaaaacgggtcatacagctcagcagaaagacttttgagacagagtt +ccagaaaacaaaacatcaagagtgggactttgtcgtgacaactgacatttcagagatggg +cgccaactttaaagctgaccgtgtcatagattccaggagatgcctaaagccggtcatact +tgatggcgagagagtcattctggctggacccatgcctgtcacacatgccagcgctgccca +gaggagggggcgcataggcaggaatcctaacaaacctggagatgagtatctgtatggagg +tgggtgcgcagagactgacgaagaccatgcacactggcttgaagcaagaatgctccttga +caatatttacctccaagatggcctcatagcctcgctctatcgacctgaggccgacaaagt +agcagccattgagggagagttcaagcttaggacggagcaaaggaagacctttgtggaact +catgaaaagaggagatcttcctgtttggctggcctatcaggttgcatctgccggaataac +ctacacagatagaagatggtgctttgatggcacgaccaacaacaccataatggaagacag +tgtgccggcagaggtgtggaccagacacggagagaaaagagtgctcaaaccgaggtggat +ggacgccagagtttgttcagatcatgcggccctgaagtcattcaaggagtttgccgctgg +gaaaagaggagcggcttttggagtgatggaagccctgggaacactgccaggacacatgac +agagagattccaggaagccattgacaacctcgctgtgctcatgcgggcagagactggaag +caggccttacaaagccgcggcggcccaattgccggagaccctagagaccattatgctctt +ggggttgctgggaacagtctcgctgggaatctttttcgtcttgatgaggaacaagggcat +agggaagatgggctttggaatggtgactcttggggccagcgcatggctcatgtggctctc +ggaaattgagccagccagaattgcatgtgtcctcattgttgtgttcctattgctggtggt +gctcatacctgagccagaaaagcaaagatctccccaggacaaccaaatggcaatcatcat +catggtagcagtaggtcttctgggcttgattaccgccaatgaactcggatggttggagag +aacaaagagtgacctaagccatctaatgggaaggagagaggagggggcaaccataggatt +ctcaatggacattgacctgcggccagcctcagcttgggccatctatgctgccttgacaac +tttcattaccccagccgtccaacatgcagtgaccacttcatacaacaactactccttaat +ggcgatggccacgcaggctggagtgttgtttggtatgggcaaagggatgccattctacgc +atgggactttggagtcccgctgctaatgataggttgctactcacaattaacacccctgac +cctaatagtggccatcattttgctcgtggcgcactacatgtacttgatcccagggctgca +ggcagcagctgcgcgtgctgcccagaagagaacggcagctggcatcatgaagaaccctgt +tgtggatggaatagtggtgactgacattgacacaatgacaattgacccccaagtggagaa +aaagatgggacaggtgctactcatagcagtagccgtctccagcgccatactgtcgcggac +cgcctgggggtggggggaggctggggctctgatcacagccgcaacctccactttgtggga +aggctctccgaacaagtactggaactcctctacagccacttcactgtgtaacatttttag +gggaagttacttggctggagcttctctaatctacacagtaacaagaaacgctggcttggt +caagagacgtgggggtggaacaggagagaccctgggagagaaatggaaggcccgcttgaa +ccagatgtcggccctggagttctactcctacaaaaagtcaggcatcaccgaggtgtgcag +agaagaggcccgccgcgccctcaaggacggtgtggcaacgggaggccatgctgtgtcccg +aggaagtgcaaagctgagatggttggtggagcggggatacctgcagccctatggaaaggt +cattgatcttggatgtggcagagggggctggagttactacgccgccaccatccgcaaagt +tcaagaagtgaaaggatacacaaaaggaggccctggtcatgaagaacccgtgttggtgca +aagctatgggtggaacatagtccgtcttaagagtggggtagacgtctttcatatggcggc +tgagccgtgtgacacgttgctgtgtgacataggtgagtcatcatctagtcctgaagtgga +agaagcacggacgctcagagtcctctccatggtgggggattggcttgaaaaaagaccagg +agccttttgtataaaagtgttgtgcccatacaccagcactatgatggaaaccctggagcg +actgcagcgtaggtatgggggaggactggtcagagtgccactctcccgcaactctacaca +tgagatgtactgggtctctggagcgaaaagcaacaccataaaaagtgtgtccaccacgag +ccagctcctcttggggcgcatggacgggcctaggaggccagtgaaatatgaggaggatgt +gaatctcggctctggcacgcgggctgtggtaagctgcgctgaagctcccaacatgaagat +cattggtaaccgcattgaaaggatccgcagtgagcacgcggaaacgtggttctttgacga +gaaccacccatataggacatgggcttaccatggaagctatgaggcccccacacaagggtc +agcgtcctctctaataaacggggttgtcaggctcctgtcaaaaccctgggatgtggtgac +tggagtcacaggaatagccatgaccgacaccacaccgtatggtcagcaaagagttttcaa +ggaaaaagtggacactagggtgccagacccccaagaaggcactcgtcaggttatgagcat +ggtctcttcctggttgtggaaagagctaggcaaacacaagcggccacgagtctgtaccaa +agaagagttcatcaacaaggttcgtagcaatgcagcattaggggcaatatttgaagagga +aaaagrktggaagactgcagtggaagctgtgaacgatccaaggttctgggctctagtgga +caaggaaagagagcaccacctgagaggagagtgccagagttgtgtgtacaacatgatggg +aaaaagagaaaagaaacaaggggaatttggaaaggccaagggcagtcgcgccatctggta +tatgtggctaggggctagatttctagagttcgaagcccttggattcttgaacgaggatca +ctggatggggagagagaactcaggaggtggtgttgaagnnnnnnnnnnnnnnnnnnnnnn +nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn +nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn +nnnnnnnnnnnnnnacagggccttggcattggccataatcaagtacacataccaaaacaa +agtggtaaaggtccttagaccagctgaaaaagggaaaacagttatggacattatttcgag +acaagaccaaagggggagcggacaagttgtcacttacgctcttaacacatttaccaacct +agtggtgcaactcattcggaatatggaggctgaggaagttctagagatgcaagacttgtg +gctgctgcggaggtcagagaaagtgaccaactggttgcagagcaacggatgggataggct +caaacgaatggcagtcagtggagatgattgcgttgtgaagccaattgatgataggtttgc +acatgccctcaggttcttgaatgatatgggaaaagtcaggaaggacacacaagagtggaa +accctcaactggatgggacaactgggaagaagttccgttttgctcccaccacttcaacaa +gctccatctcaaggacgggaggtccattgtggttccctgccgccaccaagatgaactgat +tggccgggcccgcgtctctccaggggcgggatggagcatccgggagactgcttgcctagc +aaaatcatatgcgcaaatgtggcagctcctttatttccacagaagggacctccgactgat +ggccaatgccatttgttcatctgtgccagttgactgggttccaactgggagaactacctg +gtcaatccatggaaagggagaatggatgaccactgaagacatgcttgtggtgtggaacag +agtgtggattgaggagaacgaccacatggaagacaagaccccagttacgaaatggacaga +cattccctatttgggaaaaagggaagacttgtggtgtggatctctcatagggcacagacc +gcgcaccacctgggctgagaacattaaaaacacagtcaacatggtgcgcaggatcatagg +tgatgaagaaaagtacatggactacctatccacccaagttcgctacttgggtgaagaagg +gtctacacctggagtgctgtaagcaccaatcttaatgttgtcaggc +>DOM/2016/BB_0183 +gtttgaagcgaaagctagcaacagtatcaacaggttttattttggatttggaaacgagag +tttctggtcatgaaaaacccaaaaaagaaatccggaggattccggattgtcaatatgcta +aaacgcggagtagcccgtgtgagcccctttgggggcttgaagaggctgccagccggactt +ctgctgggtcatgggcccatcaggatggtcttggcgattctagcctttttgagattcacg +gcaatcaagccatcactgggtctcatcaatagatggggttcagtggggaaaaaagaggct +atggaaataataaagaagttcaagaaagatctggctgccatgctgagaataatcaatgct +aggaaggagaagaagagacgaggcgcagatactagtgtcggaattgttggcctcctgctg +accacagctatggcagcggaggtcactagacgtgggagtgcatactacatgtacttggac +agaaacgatgctggggaggccatatcttttccaaccacattggggatgaataagtgttat +atacagatcatggatcttggacacatgtgtgatgccaccatgagctatgaatgccctatg +ctggatgagggggtggaaccagatgacgtcgattgttggtgcaacacgacgtcaacttgg +gttgtgtacggaacctgccatcacaaaaaaggtgaagcacggagatctagaagagctgtg +acgctcccctcccattccactaggaagctgcaaacgcggtcgcaaacctggttggaatca +agagaatacacaaagcacttgattagagtcgaaaattggatattcaggaaccctggcttc +gcgttagcagcagctgccatcgcttggcttttgggaagctcaacgagccaaaaagtcata +tacttggtcatgatactgctgattgccccggcatacagcatcaggtgcataggagtcagc +aatagggactttgtggaaggtatgtcaggtgggacttgggttgatgttgtcttggaacat +ggaggttgtgtcaccgtaatggcacaggacaaaccgactgtcgacatagagctggttaca +acaacagtcagcaacatggcggaggtaagatcctactgctatgaggcatcaatatcagac +atggcttcggacagccgctgcccaacacaaggtgaagcctaccttgacaagcaatcagac +actcaatatgtctgcaaaagaacgttagtggacagaggctggggaaatggatgtggactt +tttggcaaagggagcctggtgacatgcgctaagtttgcatgctccaagaaaatgaccggg +aagagcatccagccagagaatctggagtaccggataatgctgtcagttcatggctcccag +cacagtgggatgatcgttaatgacacaggacatgaaactgatgagaatagagcgaaggtt +gagataacgcccaattcaccaagagccgaagccaccctggggggttttggaagcctagga +cttgattgtgaaccgaggacaggccttgacttttcagatttgtattacttgactatgaat +aacaagcactggttggttcacaaggagtggttccacgacattccattaccttggcacgct +ggggcagacaccggaactccacactggaacaacaaagaagcactggtagagttcaaggac +gcacatgccaaaaggcaaactgtcgtggttctagggagtcaagaaggagcagttcacacg +gcccttgctggagctctggaggctgagatggatggtgcaaagggaaggctgtcctctggc +cacttgaaatgtcgcctgaaaatggataaacttagattgaagggcgtgtcatactccttg +tgtaccgcagcgttcacattcaccaagatcccggctgaaacactgcacgggacagtcaca +gtggaggtacagtacgcagggacagatggaccttgcaaggttccagctcagatggcggtg +gacatgcaaactctgaccccagttgggaggttgataaccgccaaccccgtaatcactgaa +agcactgagaactctaagatgatgctggaacttgatccaccatttggggactcttacatt +gtcataggagtcggggagaagaagatcacccaccactggcacaggagtggcagcaccatt +ggaaaagcatttgaagccactgtgagaggtgccaagagaatggcagtcttgggagacaca +gcctgggactttggatcagttggaggcgctctcaactcattgggcaagggcatccatcaa +atttttggagcagctttcaaatcattgtttggaggaatgtcctggttctcacaaatcctc +attggaacgttgctgatgtggttgggtctgaacacaaagaatggatctatttccctcatg +tgcttggccttagggggagtgttgatcttcttatccacagccgtctctgctgatgtgggg +tgctcggtggacttctcaaagaaggagacgagatgcggtacaggggtgttcgtctataac +gacgttgaagcctggagggacaggtacaagtaccatcctgactccccccgtagattggca +gcagcagtcaagcaagcctgggaagatggtatctgcgggatctcctctgtttcaagaatg +gaaaacatcatgtggagatcagtagaaggggagctcaatgcaatcctggaagagaatgga +gtccaactgacggtcgttgtgggatctgtaaaaaaccccatgtggagaggtccacagaga +ttgcccgtgcctgtgaacgagctgccccacggctggaaggcctgggggaaatcgtacttc +gttagagcagcaaagacaaataacagctttgtcgtggatggtgacacactgaaggaatgc +ccactcaaacatagagcatggaacagctttcttgtggaggatcatgggttcggggtattt +cacactagtgtctggctcaaggttagagaagattattcattagagtgtgatccagccgtt +attggaacagctgttaagggaaaggaggctgtacacagtgatctaggctactggattgag +agtgagaagaatggcacatggaggctgaagagggcccatctgatcgagatgaaaacatgt +gaatggccaaagtcccacacattgtggacagatggaatagaagagagtgatctgatcata +cccaagtctttagctgggccactcagccatcacaataccagagagggctacaggacccaa +atgaaagggccatggcacagtgaagagcttgaaattcggtttgaggaatgcccaggcact +aaggtccacgtggaggaaacatgtggaacaagaggaccatctctgagatcaaccactgca +agcggaagggtgatcgaggaatggtgctgcagggagtgcacaatgcccccactgtcgttc +cgggctaaagatggctgttggtatggaatggagataaggcccaggaaagaaccagaaagc +aacttagtaaggtcagtggtgactgcaggatcaactgatcacatggatcacttctccctt +ggagtgcttgtgattctgctcatggtgcaggaagggctgaagaagagaatgaccacaaag +atcatcataagcacatcaatggcagtgctggtagctatgatcctgggaggattttcaatg +agtgacctggctaagcttgcaattttgatgggtgccaccttcgcggaaatgaacactgga +ggagatgtagctcatctggcgctgatagcggcattcaaagtcagaccagcgttgctggta +tctttcatcttcagagctaattggacaccccgtgaaagcatgctgctggccttggcctcg +tgtcttttgcaaactgcgatctccgccttggaaggcgacctgatggttctcatcaatggt +tttgctttggcctggttggcaatacgagcgatggttgttccacgcactgataacatcacc +atggcaatcctggctgctctgacaccactggcccggggcacactgcttgtggcgtggaga +gcaggccttgctacttgcggggggtttatgctcctctctctgaagggaaaaggcagtgtg +aagaagaacttaccatttgtcatggccctgggactaaccgctgtgaggctggtcgacccc +atcaacgtggtgggactgctgttgctcacaaggagtgggaagcggagctggccccctagc +gaagtactcacagctgttggcctgatatgcgcattggctggagggttcgccaaggcagat +atagagatggctgggcccatggccgcggtcggtctgctaattgtcagttacgtggtctca +ggaaagagtgtggacatgtacattgaaagagcaggtgacatcacatgggaaaaagatgcg +gaagtcactggaaacagtccccggctcgatgtggcgctagatgagagtggtgatttctcc +ctggtggaggatgacggtccccccatgagagagatcatactcaaggtggtcctgatgacc +atctgtggcatgaacccaatagccataccctttgcagctggagcgtggtacgtatacgtg +aagactggaaaaaggagtggtgctctatgggatgtgcctgctcccaaggaagtaaaaaag +ggggagaccacagatggagtgtacagagtaatgactcgtagactgctaggttcaacacaa +gttggagtgggagttatgcaagagggggtctttcacactatgtggcacgtcacaaaagga +tccgcgctgagaagcggtgaagggagacttgatccatactggggagatgtcaagcaggat +ctggtgtcatactgtggtccatggaagctagatgccgcctgggacgggcacagcgaggtg +cagctcttggccgtgccccccggagagagagcgaggaacatccagactctgcccggaata +tttaagacaaaggatggggacattggagcggttgcgctggattacccagcaggaacttca +ggatctccaatcctagacaagtgtgggagagtgataggactttatggcaatggggtcgtg +atcaaaaatgggagttatgttagtgccatcacccaagggaggagggaggaagagactcct +gttgagtgcttcgagccttcgatgctgaagaagaagcagctaactgtcttagacttgcat +cctggagctgggaaaaccaggagagttcttcctgaaatagtccgtgaagctataaaaaca +agactccgtactgtgatcttagctccaaccagggttgtcgctgctgaaatggaggaagcc +cttagagggcttccagtgcgttatatgacaacagcagtcaatgtcacccattctggaaca +gaaatcgtcgacttaatgtgccatgccaccttcacttcacgtctactacagccaatcaga +gtccccaactataatctgtatattatggatgaggcccacttcacagatccctcaagtata +gcagcaagaggatacatttcaacaagggttgagatgggcgaggcggctgccatcttcatg +accgccacgccaccaggaacccgtgacgcatttccggactccaactcaccaattatggac +accgaagtggaagtcccagagagagcctggagctcaggctttgattgggtgacggatcat +tctggaaaaacagtttggtttgttccaagcgtgaggaacggcaatgagatcgcagcttgt +ctgacaaaggctggaaaacgggtcatacagctcagcagaaagacttttgagacagagttc +cagaaaacaaaacatcaagagtgggactttgtcgtgacaaccgacatttcagagatgggc +gccaactttaaagctgaccgtgtcatagattccaggagatgcctaaagccggtcatactt +gatggcgagagagtcattctggctggacccatgcctgtcacacatgccagcgctgcccag +aggagggggcgcataggcaggaatcccaacaaacctggagatgagtatctgtatggaggt +gggtgcgcagagactgacgaagaccatgcacactggcttgaagcaagaatgctccttgac +aatatttacctccaagatggccttatagcctcgctctatcgacctgaggccgacaaagta +gcagccattgagggagagttcaagcttaggacggagcaaaggaagacctttgtggaactc +atgaaaagaggagatcttcctgtttggctggcctatcaggttgcatctgccggaataact +tacacagatagaagatggtgctttgatggcacgaccaacaacaccataatggaagacagt +gtgccggcagaggtgtggaccagacacggagagaaaagagtgctcaaaccgaggtggatg +gacgccagagtttgttcagatcatgcggccctgaagtcattcaaggagtttgccgctggg +aaaagaggagcggcttttggagtgatggaagccctgggaacactgccaggacacatgaca +gagagattccaggaagccattgacaacctcgctgtgctcatgcgggcagagactggaagc +aggccttacaaagccgcggcggcccaattgccggagaccctagagaccattatgcttttg +gggttgctgggaacagtctcgctgggaatctttttcgtcttgatgaggaacaagggcata +gggaagatgggctttggaatggtgactcttggggccagcgcatggctcatgtggctctcg +gaaattgagccagccagaattgcatgtgtcctcattgttgtgttcctattgctggtggtg +ctcatacctgagccagaaaagcaaagatctccccaggacaaccaaatggcaatcatcatc +atggtagcagtaggtcttctgggcttgatcaccgccaatgaactcggatggttggagaga +acaaagagtgacctaagccatctaatgggaaggagagaggagggagcaaccataggattc +tcaatggacattgacctgcggccagcctcagcttgggccatctatgctgccttgacaact +ttcattaccccagccgtccaacatgcagtgaccacttcatacaacaactactccttaatg +gcgatggccacgcaagctggagtgttgtttggtatgggcaaagggatgccattctacgca +tgggactttggagtcccgctgctaatgataggttgctactcacaattaacacccctgacc +ctaatagtggccatcattttgctcgtggcgcactacatgtacttgatcccagggctgcag +gcagcagctgcgcgtgctgcccagaagagaacggcagctggcatcatgaagaaccctgtt +gtggatggaatagtggtgactgacattgacacaatgacaattgacccccaagtggagaaa +aagatgggacaggtgctactcatagcagtagccgtctccagcgccatactgtcgcggacc +gcctgggggtggggggaggctggggccctgatcacagccgcaacttccactttgtgggaa +ggctctccgaacaagtactggaactcctctacagccacttcactgtgtaacatttttagg +ggaagttacttggctggagcttctctaatctacacagtaacaagaaacgctggcttggtc +aagagacgtgggggtggaacaggagagaccctgggagagaaatggaaggcccgcttgaac +cagatgtcggccctggagttctactcctacaaaaagtcaggcatcaccgaggtgtgcaga +gaagaggcccgccgcgccctcaaggacggtgtggcaacgggaggccatgctgtgtcccga +ggaagtgcaaagctgagatggttggtggagcggggatacctgcagccctatggaaaggtc +attgatcttggatgtggcagagggggctggagttactacgccgccaccatccgcaaagtt +caagaagtgaaaggatacacaaaaggaggccctggtcatgaagaacccgtgttggtgcaa +agctatgggtggaacatagtccgtctcaagagtggggtggacgtctttcatatggcggct +gagccgtgtgacacgttgctgtgtgacataggtgagtcatcatctagtcctgaagtggaa +gaagcacggacgctcagagtcctctccatggtgggggattggcttgaaaaaagaccagga +gccttttgtataaaagtgttgtgcccatacaccagcactatgatggaaaccctggagcga +ctgcagcgtaggtatgggggaggactggtcagagtgccactctcccgcaactctacacat +gagatgtactgggtctctggagcgaaaagcaacaccataaaaagtgtgtccaccacgagc +cagctcctcttggggcgcatggacgggcctaggaggccagtgaaatatgaggaggatgtg +aatctcggctctggcacgcgggctgtggtaagctgcgctgaagctcccaacatgaagatc +attggtaaccgcattgaaaggatccgcagtgagcacgcggaaacgtggttctttgacgag +aaccacccatataggacatgggcttaccatggaagctatgaggcccccacacaagggtca +gcatcctctctagtaaacggggttgtcaggctcctgtcaaaaccctgggatgtggtgact +ggagtcacaggaatagccatgaccgacaccacaccgtatggtcagcaaagagttttcaag +gaaaaagtggacactagggtgccagacccccaagaaggcactcgtcaggttatgagcatg +gtctcttcctggttgtggaaagagctaggcaaacacaaacggccacgagtctgtaccaaa +gaagagttcatcaacaaggttcgtagcaatgcagcattaggggcaatatttgaagaggaa +aaagagtggaagactgcagtggaagctgtgaacgatccaaggttctgggctctagtggac +aaggaaagagagcaccacctgagaggagagtgccagagttgtgtgtacaacatgatggga +aaaagagaaaagaaacaaggggaatttggaaaggccaagggcagccgcgccatctggtat +atgtggctaggggctagatttctagagttcgaagcccttggattcttgaacgaggatcac +tggatggggagagagaactcaggaggtggtgttgaagggctgggattacaaagactcgga +tatgtcctagaagagatgagtcgcataccaggaggaaggatgtatgcagatgacactgct +ggctgggatacccgcatcagcaggtttgatctagagaatgaagctctaatcaccaaccaa +atggagaaagggcacagggccttggcattggccataatcaagtacacataccaaaacaaa +gtggtaaaggtccttagaccagctgaaaaagggaaaacagttatggacattatttcgaga +caagaccaaagggggagcggacaagttgtcacttacgctcttaacacatttaccaaccta +gtggtgcaactcattcggaatatggaggctgaggaagttctagagatgcaagacttgtgg +ctgctgcggaggtcagagaaagtgaccaactggttgcggagcaacggatgggataggctc +aaacgaatggcagtcagtggagatgattgcgttgtgaagccaattgatgataggtttgca +catgccctcaggttcttgaatgatatgggaaaagttaggaaggacacacaagagtggaaa +ccctcaactggatgggacaactgggaagaagttccgttttgctcccaccacttcaacaag +ctccatctcaaggacgggaggtccattgtggttccctgccgccaccaagatgaactgatt +ggccgggcccgcgtctctccaggggcgggatggagcatccgggagactgcttgcctagca +aaatcatatgcgcaaatgtggcagctcctttatttccacagaagggacctccgactgatg +gccaatgccatttgttcatctgtgccagttgactgggttccaactgggagaactacctgg +tcaatccatggaaagggagaatggatgaccactgaagacatgcttgtggtgtggaacaga +gtgtggattgaggagaacgaccacatggaagacaagaccccagttacgaaatggacagac +attccctatttgggaaaaagggaagacttgtggtgtggatctctcatagggcacagaccg +cgcaccacctgggctgagaacattaaaaacacagtcaacatggtgcgcaggatcataggt +gaggaagaaaagtacatggactacctatccacccaagttcgctacttgggtgaagaaggg +tctacacctggagtgctgtaagcaccaatcttaatgttgtcaggcctgctagtcagccac +agcttggggaaagctgtgcagcctgtgacccccccaggagaagctgggaaaccaagccta +tagtcaggccgagaacgccatggcacggaagaagccatgctgcctgtgagcccctcagag +gacactgagtcaaaaaaccccacgcgcttggaggcgcaggatgggaaaagaaggtggcga +ccttccccacccttcaatctggggcctgaactggagatcagctgtggatccccagaagag +g +>EcEs062_16 +agtagttgatctgtgtgaatcagactgcgacagttcgagtttgaagcgaaagctagcaac +agtatcaacaggttttattttggatttggaaacgagagtttctggtcatgaaaaacccaa +aaaagaaatccggaggattccggattgtcaatatgctaaaacgcggagtagcccgtgtga +gcccctttgggggcttgaagaggctgccagccggacttctgctgggtcatgggcccatca +ggatggtcttggcgattctagcctttttgagattcacggcaatcaagccatcactgggtc +tcatcaatagatggggttcagtggggaaaaaagaggctatggaaataataaagaagttca +agaaagatctggctgccatgctgagaataatcaatgctaggaaggagaagaagagacgag +gcgcagatactagtgtcggaattgttggcctcctgctgaccacagctatggcagcggagg +tcactagacgtgggagtgcatactatatgtacttggacagaaacgatgctggggaggcca +tatcttttccaaccacattggggatgaataagtgttatatacagatcatggatcttggac +acatgtgtgatgccaccatgagctatgaatgccctatgctggatgagggggtggaaccag +atgacgtcgattgttggtgcaacacgacgtcaacttgggttgtgtacggaacctgccatc +acaaaaaaggtgaagcacggagatctagaagagctgtgacgctcccctcccattccacta +ggaagctgcaaacgcggtcgcaaacctggttggaatcaagagaatacacaaagcacttga +ttagagtcgaaaattggatattcaggaaccctggcttcgcgttagcagcagctgccatcg +cttggcttttgggaagctcaacgagccaaaaagtcatatacttggtcatgatactgctga +ttgccccggcatacagcatcaggtgcataggagtcagcaatagggactttgtggaaggta +tgtcaggtgggacttgggttgatgttgtcttggaacatggaggttgtgtcaccgtaatgg +cacaggacaaaccgactgtcgacatagagctggttacaacaacagtcagcaacatggcgg +aggtaagatcctactgctatgaggcatcaatatcagacatggcttcggacagccgctgcc +caacacaaggtgaagcctaccttgacaagcaatcagacactcaatatgtctgcaaaagaa +cgttagtggacagaggctggggaaatggatgcggactttttggcaaagggagcctggtga +catgcgctaagtttgcatgctccaagaaaatgaccgggaagagcatccagccagagaatc +tggagtaccggataatgctgtcagttcatggctcccagcacagtgggatgatcgttaatg +acacaggacatgaaactgatgagaatagagcgaaggttgagataacgcccaattcaccaa +gagccgaagccaccctggggggttttggaagcctaggacttgattgtgaaccgaggacag +gccttgacttttcagatttgtattacttgactatgaataacaagcactggttggttcaca +aggagtggttccacgacattccattaccttggcacgctggggcagacaccggaactccac +actggaacaacaaagaagcactggtagagttcaaggacgcacatgccaaaaggcaaactg +tcgtggttctagggagtcaagaaggagcagttcacacggcccttgctggagctctggagg +ctgagatggatggtgcaaagggaaggctgtcctctggccacttgaaatgtcgcctgaaaa +tggataaacttagactgaagggcgtgtcatactccttgtgtaccgcagcgttcacattca +ccaagatcccggctgaaacactgcacgggacagtcacagtggaggtacagtacgcaggga +cagatggaccttgcaaggttccagctcagatggcggtggacatgcaaactctgaccccag +ttgggaggttgataaccgctaaccccgtaatcactgaaagcactgagaactctaagatga +tgctggaacttgatccaccatttggggactcttacattgtcataggagtcggggagaaga +agatcacccaccactggcacaggagtggcagcaccattggaaaagcatttgaagccactg +tgagaggtgccaagagaatggcagtcttgggagacacagcctgggactttggatcagttg +gaggcgctctcaactcattgggcaagggcatccatcaaatttttggagcagctttcaaat +cattgtttggaggaatgtcctggttctcacaaattctcattggaacgttgctgatgtggt +tgggtctgaacacaaagaatggatccatttcccttatgtgcttggccttagggggagtgt +tgatcttcttatccacagccgtctctgctgatgtggggtgctcggtggacttctcaaaga +aggagacgagatgcggtacaggggtgttcgtctataacgacgttgaagcctggagggaca +ggtacaagtaccatcctgactccccccgtagattggcagcagcagtcaagcaagcctggg +aagatggtatctgcgggatctcctctgtttcaagaatggaaaacatcatgtggagatcag +tagaaggggagctcaacgcaatcctggaagagaatggagttcaactgacggtcgttgtgg +gatctgtaaagaaccccatgtggagaggtccacagagattgcccgtgcctgtgaacgagc +tgccccacggctggaaggcttgggggaaatcgtacttcgtcagagcagcaaagacaaata +acagctttgtcgtggatggtgacacactgaaggaatgcccactcaaacatagagcatgga +acagctttcttgtggaggatcatgggttcggggtatttcacactagtgtctggctcaagg +ttagagaagattattcattagagtgtgatccagccgttattggaacagctgttaagggaa +aggaggctgtacacagtgatctaggctactggattgagagtgagaagaatgacacatgga +ggctgaagagggcccatctgatcgagatgaaaacatgtgaatggccaaagtcccacacat +tgtggacagatggaatagaagagagtgatctgatcatacccaagtctttagctgggccac +tcagccatcacaataccagagagggctacaggacccaaatgaaagggccatggcacagtg +aagagcttgaaattcggtttgaggaatgcccaggcactaaggtccacgtggaggaaacat +gtggaacaagaggaccatctctgagatcaaccactgcaagcggaagggtgatcgaggaat +ggtgctgcagggagtgcacaatgcccccactgtcgttccgggctaaagatggctgttggt +atggaatggagataaggcccaggaaagaaccagaaagcaacttagtaaggtcagtggtga +ctgcaggatcaactgatcacatggatcacttctcccttggagtgcttgtgattctgctca +tggtgcaggaagggctgaagaagagaatgaccacaaagatcatcataagcacatcaatgg +cagtgctggtagctatgatcctgggaggattttcaatgagtgacctggctaagcttgcaa +ttttgatgggtgccaccttcgcggaaatgaacactggaggagatgtagctcatctggcgc +tgatagcggcattcaaggtcagaccagcgttgctggtatctttcatcttcagagctaatt +ggacaccccgtgaaagcatgctgctggccttggcctcgtgtcttttgcaaactgcgatct +ccgccttggaaggcgacctgatggttctcatcaatggttttgctttggcctggttggcaa +tacgagcgatggttgttccacgcactgataacatcaccttggcaatcctggctgctctga +caccactggcccggggcacactgcttgtggcgtggagagcaggccttgctacttgcgggg +ggtttatgctcctctctctgaagggaaaaggcagtgtgaagaagaacttaccatttgtca +tggccctgggactaaccgctgtgaggctggtcgaccccatcaacgtggtgggactgctgt +tgctcacaaggagtgggaagcggagctggccccctagcgaagtactcacagctgttggcc +tgatatgcgcattggctggagggttcgccaaggcagatatagagatggctgggcccatgg +ccgcggtcggtctgctaattgtcagttacgtggtctcaggaaagagtgtggacatgtaca +ttgaaagagcaggtgacatcacatgggaaaaagatgcggaagtcactggaaacagtcccc +ggctcgatgtggcgctagatgagagtggtgatttctccctggtggaggatgacggtcccc +ccatgagagagatcatactcaaggtggtcctgatgaccatctgtggcatgaacccaatag +ccataccctttgcagctggagcgtggtacgtatacgtgaagactggaaaaaggagtggtg +ctctatgggatgtgcctgctcccaaggaagtaaaaaagggggagaccacagatggagtgt +acagagtaatgactcgtagactgctaggttcaacacaagttggagtgggagttatgcaag +agggggtctttcacactatgtggcacgtcacaaaaggatccgcgctgagaagcggtgaag +ggagacttgatccatactggggagatgtcaagcaggatctggtgtcatactgtggtccat +ggaagctagatgccgcctgggacgggcacagcgaggtgcagctcttggccgtgccccccg +gagagagagcgaggaacatccagactctgcccggaatatttaagacaaaggatggggaca +ttggagctgttgcgctggattacccagcaggaacttcaggatctccaatcctagacaagt +gtgggagagtgataggactttatggcaatggggtcgtgatcaaaaatgggagttatgtta +gtgccatcacccaagggaggagggaggaagagactcctgttgagtgcttcgagccttcga +tgctgaagaagaagcagctaactgtcttagacttgcatcctggagctgggaaaaccagga +gagttcttcctgaaatagtccgtgaagccataaaaacaagactccgtactgtgatcttag +ctccaaccagggttgtcgctgctgaaatggaggaagcccttagagggcttccagtgcgtt +atatgacaacagcagtcaatgtcacccactctggaacagaaatcgtcgacttaatgtgcc +atgccaccttcacttcacgtctactacagccaatcagagtccccaactataatctgtata +ttatggatgaggcccacttcacagatccctcaagtatagcagcaagaggatacatttcaa +caagggttgagatgggcgaggcggctgctatcttcatgaccgccacgccaccaggaaccc +gtgacgcatttccggactccaactcaccaattatggacaccgaagtggaagtcccagaga +gagcctggagctcaggctttgattgggtgacggatcattctggaaaaacagtttggtttg +ttccaagcgtgaggaacggcaatgagatcgcagcttgtctgacaaaggctggaaaacggg +tcatacaactcagcagaaagacttttgagacagagttccagaaaacaaaacatcaagagt +gggactttgtcgtgacaactgacatttcagagatgggcgccaactttaaagctgaccgtg +tcatagattccaggagatgcctaaagccggtcatacttgatggcgagagagtcattctgg +ctggacccatgcctgtcacacatgccagcgctgcccagaggagggggcgcataggcagga +atcccaacaaacctggagatgagtatctgtatggaggtgggtgcgcagagactgacgaag +accatgcacactggcttgaagcaagaatgctccttgacaatatttacctccaagatggcc +tcatagcctcgctctatcgacctgaggccgacaaagtagcagccattgagggagagttca +agcttaggacggagcaaaggaagacctttgtggaactcatgaaaagaggagatcttcctg +tttggctggcctatcaggttgcatctgccggaataacctacacagatagaagatggtgct +ttgatggcacgaccaacaacaccataatggaagacagtgtgccggcagaggtgtggacca +gacacggagagaaaagagtgctcaaaccgaggtggatggacgccagagtttgttcagatc +atgcggccctgaagtcattcaaggagtttgccgctgggaaaagaggagccgcttttggag +tgatggaagccctgggaacactgccaggacacatgacagagagattccaggaagccattg +acaacctcgctgtgctcatgcgggcagagactggaagcaggccttacaaagccgcggcgg +cccaattgccggagaccctagagaccattatgcttttggggttgctgggaacagtctcgc +tgggaatctttttcgtcttgatgaggaacaagggcatagggaagatgggctttggaatgg +tgactcttggggccagcgcatggctcatgtggctctcggaaattgagccagccagaattg +catgtgtcctcattgttgtgttcctattgctggtggtgctcatacctgagccagaaaagc +aaagatctccccaggacaaccaaatggcaatcatcatcatggtagcagtgggtcttctgg +gcttgattaccgccaatgaactcggatggttggagagaacaaagagtgacctaagccatc +taatgggaaggagagaggagggagcaaccataggattctcaatggacattgacctgcggc +cagcctcagcttgggccatctatgctgccttgacaactttcattaccccagccgtccaac +atgcagtgaccacttcatacaacaactactccttaatggcgatggccacgcaagctggag +tgttgtttggtatgggcaaagggatgccattctacgcatgggactttggagtcccgctgc +taatgataggttgctactcacaattaacacccctgaccctaatagtggccatcattttgc +tcgtggcgcactacatgtacttgatcccagggctgcaggcagcagctgcgcgtgctgccc +agaagagaacggcagctggcatcatgaagaaccctgttgtggatggaatagtggtgactg +acattgacacaatgacaattgacccccaagtggagaaaaagatgggacaggtgctactca +tagcagtagccgtctccagcgccatactgtcgcggaccgcctgggggtggggggaggctg +gggccctgatcacagccgcaacttccactttgtgggaaggctctccgaacaagtactgga +actcctctacagccacttcactgtgtaacatttttaggggaagttacttggctggagctt +ctctaatctacacagtaacaagaaacgctggcttggtcaagagacgtgggggtggaacag +gagagaccctgggagagaaatggaaggcccgcttgaaccagatgtcggccctggagttct +actcctacaaaaagtcaggcatcaccgaggtgtgcagagaagaggcccgccgcgccctca +aggacggtgtggcaacgggaggccatgctgtgtcccgaggaagtgcaaagctgagatggt +tggtggagcggggatacctgcagccctatggaaaggtcattgatcttggatgtggcagag +ggggctggagttactacgccgccaccatccgcaaagttcaagaagtgaaaggatacacaa +aaggaggccctggtcatgaagaacccgtgttggtgcaaagctatgggtggaacatagtcc +gtcttaagagtggggtggacgtctttcatatggcggctgagccgtgtgacacgttgctgt +gtgacataggtgagtcatcatctagtcctgaagtggaagaagcacggacgctcagagtcc +tctccatggtgggggattggcttgaaaaaagaccaggagccttttgtataaaagtgttgt +gcccatacaccagcactatgatggaaacactggagcgactgcagcgtaggtatgggggag +gactggtcagagtgccactctcccgcaactctacacatgagatgtactgggtctctggag +cgaaaagcaacaccataaaaagtgtgtccaccacgagccagctcctcttggggcgcatgg +acgggcctaggaggccagtgaaatatgaggaggatgtgaatctcggctctggcacgcggg +ctgtggtaagctgcgctgaagctcccaacatgaagatcattggtaaccgcattgaaagga +tccgcagtgagcacgcggaaacgtggttctttgacgagaaccacccatacaggacatggg +cttaccatggaagctatgaggcccccacacaagggtcagcgtcctctctaataaacgggg +ttgtcaggctcctgtcaaaaccctgggatgtggtgactggagtcacaggaatagccatga +ccgacaccacaccgtatggtcagcaaagagttttcaaggaaaaagtggacactagggtgc +cagacccccaagaaggcactcgtcaggttatgagcatggtctcttcctggttgtggaaag +agctaggcaaacacaaacggccacgagtctgtaccaaagaagagtttatcaacaaggttc +gtagcaatgcagcattaggggcaatatttgaagaggaaaaagagtggaagactgcagtgg +aagctgtgaacgatccaaggttctgggctctagtggacaaggaaagagagcaccacctga +gaggagagtgccagagttgtgtgtacaacatgatgggaaaaagagaaaagaaacaagggg +aatttggaaaggccaagggcagccgcgccatctggtatatgtggctaggggctagatttc +tagagttcgaagcccttggattcttgaacgaggatcactggatggggagagagaactcag +gaggtggtgttgaagggctgggattacaaagactcggatatgtcctagaagagatgagtc +gcataccaggaggaaggatgtatgcagatgacactgctggctgggacacccgcatcagca +ggtttgatctggagaatgaagctctaatcaccaaccaaatggagaaagggcacagggcct +tggcattggccataatcaagtacacataccaaaacaaagtggtaaaggtccttagaccag +ctgaaaaagggaaaacggttatggacattatttcgagacaagaccaaagggggagcggac +aagttgtcacttacgctcttaacacatttaccaacctagtggtgcaactcattcggaata +tggaggctgaggaagttctagagatgcaagacttgtggctgctgcggaggtcagagaaag +tgaccaactggttgcagagcaacggatgggataggctcaaacgaatggcagtcagtggag +atgattgcgttgtgaagccaattgatgataggtttgcacatgccctcaggttcttgaatg +atatgggaaaagttaggaaggacacacaagagtggaaaccctcaactggatgggacaact +gggaagaagttccgttttgctcccaccacttcaacaagctccatctcaaggacgggaggt +ccattgtggttccctgccgccaccaagatgaactgattggccgggcccgcgtctctccag +gggcgggatggagcatccgggagactgcttgcctagcaaaatcatatgctcaaatgtggc +agctcctttatttccacagaagggacctccgactgatggccaatgccatttgttcatctg +tgccagttgactgggttccaactgggagaactacctggtcaatccatggaaagggagaat +ggatgaccactgaagacatgcttgtggtgtggaatagagtgtggattgaggagaacgacc +acatggaagacaagaccccagttacgaaatggacagacattccctatttgggaaaaaggg +aagacttgtggtgtggatctctcatagggcacagaccgcgcaccacctgggctgagaaca +ttaaaaacacagtcaacatggtgcgcaggatcataggtgatgaagaaaagtacatggact +acctatccacccaagttcgctacttgggtgaagaagggtctacgcctggagtgctgtaag +caccaatcctaatgttgtcaggcctgctagtcagccacagcttggggaaagctgtgcagc +ctgtgacccccccaggagaagctgggaaaccaagcctatagtcaggccgagaacgccatg +gcacggaagaagccatgctgcctgtgagcccctcagaggacactgagtcaaaaaacccca +cgcgcttggaggcgcaggatgggaaaagaaggtggcgaccttccccacccttcaatctgg +ggcctgaactggagatcagctgtggatctccagaagagggactagtggttagaggagacc +ccccggaaaacgcaaaacagcatattgacgctgggaaagaccagagactccatgagtttc +caccacgctggccgccaggcacagatcgccgaatagcggcggccggtgtggggaaatcca +tgggagatcgga +>HND/2016/HU_ME59 +gtttgaagcgaaagctagcaacagtatcaacaggttttattttggatttggaaacgagag +tttctggtcatgaaaaacccaaaaaagaaatccggaggattccggattgtcaatatgcta +aaacgcggagtagcccgtgtgagcccctttgggggcttgaagaggctgccagccggactt +ctgctgggtcatgggcccatcaggatggtcttggcgattctagcctttttgagattcacg +gcaatcaagccatcactgggtctcatcaatagatggggttcagtggggaaaaaagaggct +atggaaataataaagaagttcaagaaagatctggctgccatgctgagaataatcaatgct +aggaaggagaagaagagacgaggcgcagatactagtgtcggaattgttggcctcctgctg +accacagctatggcagcggaggtcactagacgtgggagtgcatactatatgtacttggac +agaaacgatgctggggaggccatatcttttccaaccacattggggatgaataagtgttat +atacagatcatggatcttggacacatgtgtgatgccaccatgagctatgaatgccctatg +ctggatgagggggtggaaccagatgacgtcgattgttggtgcaacacgacgtcaacttgg +gttgtgtacggaacctgccatcacaaaaaaggtgaagcacggagatctagaagagctgtg +acgctcccctcccattccactaggaagctgcaaacgcggtcgcaaacctggttggaatca +agagaatacacaaagcacttgattagagtcgaaaattggatattcaggaaccctggcttc +gcgttagcagcagctgccatcgcttggcttttgggaagctcaacgagccaaaaagtcata +tacttggtcatgatactgctgattgccccggcatacagcatcaggtgcataggagtcagc +aatagggactttgtggaaggtatgtcaggtgggacttgggttgatgttgtcttggaacat +ggaggttgtgtcaccgtaatggcacaggacaaaccgactgtcgacatagagctggttaca +acaacagtcagcaacatggcggaggtaagatcctactgctatgaggcatcaatatcagac +atggcttcggacagccgctgcccaacacaaggtgaagcctaccttgacaagcaatcagac +actcaataygtctgcaaaagaacgttagtggacagaggctggggaaatggatgtggactt +tttggcaaagggagcctggtgacatgcgctaagtttgcatgctccaagaaaatgaccggg +aagagcatccagccagagaatctggagtaccggataatgctgtcagttcatggctcccag +cacagtgggatgatcgttaatgacacaggacatgaaactgatgagaatagagcgaaggtt +gagataacgcccawttcaccaagagccgaagccaccctggggggttttggaagcctagga +cttgattgtgaaccgaggacaggccttgacttttcagatttgtattacttgactatgaat +aacaagcactggttggttcacaaggagtggttccacgacattccattaccttggcacgct +ggggcagacaccggaactccacactggaacaacaaagaagcactggtagagttcaaggac +gcacatgccaaaaggcaaactgtcgtggttctagggagtcaagaaggagcagttcacacg +gcccttgctggagctctggaggctgagatggatggtgcaaagggaaggctgtcctctggc +cacttgaaatgtcgcctgaaaatggataaacttagattgaagggcgtgtcatactccttg +tgtaccgcagcgttcacattcaccaagatcccggctgaaacactgcacgggacagtcaca +gtggaggtacagtacgcagggacagatggaccttgcaaggttccagctcagatggcggtg +gacatgcaaactctgaccccagttgggaggttgataaccgctaaccccgtaatcactgaa +agcactgagaactctaagatgatgctggaacttgatccaccatttggggactcttacatt +gtcataggagtcggggagaagaagatcacccaccactggcacaggagtggcagcaccatt +ggaaaagcatttgaagccactgtgagaggtgccaagagaatggcagtcttgggagacaca +gcctgggactttggatcagttggaggcgctctcaactcattgggcaagggcatccatcaa +atttttggagcagctttcaaatcattgtttggaggaatgtcctggttctcacaaattctc +attggaacgttgctgatgtggttgggtctgaacacaaagaatggatctatttcccttatg +tgcttggccttagggggagtgttgatcttcttatccacagccgtctctgctgatgtgggg +tgctcggtggacttctcaaagaaggagacgagatgcggtacaggggtgttcgtctataac +gacgttgaagcctggagggacaggtacaagtaccatcctgactccccccgtagattggca +gcagcagtcaagcaagcctgggaagatggtatctgcgggatctcctctgtttcaagaatg +gaaaacatcatgtggagatcagtagaaggggagctcaacgcaatcctggaagagaatgga +gttcaactgacggtcgttgtgggatctgtaaaaaaccccatgtggagagctccacagaga +ttgcccgtgcctgtgaacgagctgccccacggctggaaggcttgggggaaatcgtacttc +gtcagagcagcaaagacaaataacagctttgtcgtggatggtgacacactgaaggaatgc +ccactcaaacatagagcatggaacagctttcttgtggaggatcatgggttcggggtattt +cacactagtgtctggctcaaggttagagaagattattcattagagtgtgatccagccgtt +attggaacagctgttaagggaaaggaggctgtacacagtgatctaggctactggattgag +agtgagaagaatgacacatggaggctgaagagggcccatctgatcgagatgaaaacatgt +gaatggccaaagtcccacacattgtggacagatggaatagaagagagtgatctgatcata +cccaagtctttagctgggccactcagccatcacaataccagagagggctacaggacccaa +atgaaagggccatggcacagtgaagagcttgaaattcggtttgaggaatgcccaggcact +aaggtccacgtggaggaaacatgtggaacaagaggaccatctctgagatcaaccactgca +agcggaagggtgatcgaggaatggtgctgcagggagtgcacaatgcccccactgtcgttc +cgggctaaagatggctgttggtatggaatggagataaggcccaggaaagaaccagaaagc +aacttagtaaggtcaatggtgactgcaggatcaactgatcacatggatcacttctccctt +ggagtgcttgtgattctgctcatggtgcaggaagggctaaagaagagaatgaccacaaag +atcatcataagcacatcaatggcagtgctggtagctatgatcctgggaggattttcaatg +agtgacctggctaagcttgcaattttgatgggtgccaccttcgcggaaatgaacactgga +ggagatgtagctcatctggcgctgatagcggcattcaaagtcagaccagcgttgctggta +tctttcatcttcagagctaattggacaccccgtgaaagcatgctactggccttggcctcg +tgtcttttgcaaactgcgatctccgccttggaaggcgacctgatggttctcatcaatggt +tttgctttggcctggttggcaatacgagcgatggttgttccacgcactgataacatcacc +ttggcaatcctggctgctctgacaccactggcccggggcacactgcttgtggcgtggaga +gcaggccttgctacttgcggggggtttatgctcctctctctgaagggaaaaggcagtgtg +aagaagaacttaccatttgtcatggccctgggactaaccgctgtgaggctggtcgacccc +atcaacgtggtgggactgctgttgctcacaaggagtgggaagcggagctggccccctagc +gaagtactcacagctgttggcctgatatgcgcattggctggagggttcgccaaggcagat +atagagatggctgggcccatggccgcggtcggtctgctaattgtcagttacgtggtctca +ggaaagagtgtggacatgtacattgaaagagcaggtgacatcacatgggaaaaagatgcg +gaagtcactggaaacagtccccggctcgatgtggcgctagatgagagtggtgatttctcc +ctggtggaggatgacggtccccccatgagagagatcatactcaaggtggtcctgatgacc +atctgtggcatgaacccaatagccataccctttgcagctggagcgtggtacgtatacgtg +aagactggaaaaaggagtggtgctctatgggatgtgcctgctcccaaggaagtaaaaaag +ggggagaccacagatggagtgtacagagtaatgactcgtagactgctaggttcaacacaa +gttggagtgggagtcatgcaagagggggtctttcacactatgtggcacgtcacaaaagga +tccgcactgagaagcggtgaagggagacttgatccatactggggagatgtcaagcaggat +ctggtgtcatactgtggtccatggaagctagatgccgcctgggacgggcacagcgaggtg +cagctcctggccgtgccccccggagagagagcgaggaacatccagactctgcccggaata +tttaagacaaaggatggggacattggagcggttgcgctggattacccagcaggaacttca +ggatctccaatcctagacaagtgtgggagagtgataggactttatggcaatggggtcgtg +atcaaaaatgggagttatgttagtgccatcacccaagggaggagggaggaagagactcct +gttgagtgcttcgagccttcgatgctgaagaagaagcagctaactgtcttagacttacat +cctggagctgggaaaaccaggagagttcttcctgaaatagtccgtgaagccataaaaaca +agactccgtactgtgatcttagctccaaccagggttgtcgctgctgaaatggaggaggcc +cttagagggcttccagtgcgttatatgacaacagcagtcaatgtcacccactctggaaca +gaaatcgtcgacttaatgtgccatgccaccttcacttcacgtctactacagccaatcaga +gtccccaactataatctgtatattatggatgaggcccacttcacagatccctcaagtata +gcagcaagaggatacatttcaacaagggttgagatgggcgaggcggctgccatcttcatg +accgccacgccaccaggaacccgtgacgcatttccggactccaactcaccaattatggac +accgaagtggaagtcccagagagagcctggagctcaggctttgattgggtgacggatcat +tctggaaaaacagtttggtttgttccaagcgtgaggaacggcaatgagatcgcagcttgt +ctgacaaaggctggaaaacgggtcatacagctcagcagaaagacttttgagacagagttc +cagaaaacaaaacatcaagagtgggactttgtcgtgacaactgacatttcagagatgggc +gccaactttaaagctgaccgtgtcatagattccaggagrtgcctaaagccggtcatactt +gatggcgagagagtcattctggctggacccatgcctgtcacacatgccagcgctgcccag +aggagggggcgcataggcaggaatcccaacaaacctggagatgagtatctgtatggaggt +gggtgcgcagagactgacgaagaccatgcacactggcttgaagcaagaatgctccttgac +aatatttacctccaagatggcctcatagcctcgctctatcgacctgaggccgacaaagta +gcagccattgagggagagttcaagcttaggacggagcaaaggaagacctttgtggaactc +atgaaaagaggagatcttcctgtttggctggcctatcaggttgcatctgccggaataacc +tacacagatagaagatggtgctttgatggcacgaccaacaacaccatactggaagacagt +gtgccggcagaggtgtggaccagacacggagagaaaagagtgctcaaaccgaggtggatg +gacgccagagtttgttcagatcatgcggccctgaagtcattcaaggagtttgccgctggg +aaaagaggagcggcttttggagtgatggaagccctgggaacactgccaggacacatgaca +gagagattccaggaagccattgacaacctcgctgtgctcatgcgggcagagactggaagc +aggccttacaaagccgcggcggcccaattgccggagaccctagagaccattatgcttttg +gggttgctgggaacagtctcgctgggaatctttttcgtcttgatgaggaacaagggcata +gggaagatgggctttggaatggtgacccttggggccagtgcatggctcatgtggctctcg +gaaattgagccagccagaattgcatgtgtcctcattgttgtgttcctattgctggtggtg +ctcatacctgagccagaaaagcaaagatctccccaggacaaccaaatggcaatcatcatc +atggtagcagtaggtcttctgggcttgattaccgccaatgaactcggatggttggagaga +acaaagagtgacctaagccatctgatgggaaggagagaggagggggcaaccataggattc +tcaatggacattgacctgcggccagcctcagcttgggccatctatgctgccttgacaact +ttcattaccccagccgtccaacatgcagtgaccacttcatacaacaactactccttaatg +gcgatggccacgcaagctggagtgttgtttggtatgggcaaagggatgccattctacgca +tgggactttggagtcccgctgctaatgataggttgctactcacaattaacacccctgacc +ctaatagtggccatcattttgctcgtggcgcactacatgtacttgatcccagggctgcag +gcagcagctgcgcgtgctgcccagaagagaacggcagctggcatcatgaagaaccctgtt +gtggatggaatagtggtgactgacattgacacaatgacaattgacccccaagtggagaaa +aagatgggacaggtgctactcatagcagtagccgtctccagcgccatactgtcgcggacc +gcctgggggtggggggaggctggggccctgatcacagccgcaacttccactttgtgggaa +ggctctccgaacaagtactggaactcctctacagccacttcactgtgtaacatttttagg +ggaagttacttggctggagcttctctaatctacacagtaacaagaaacgctggcttggtc +aagagacgtgggggtggaacaggagagaccctgggagagaaatggaaggcccgcttgaac +cagatgtcggccctggagttctactcctacaaaaagtcaggcatcaccgaggtgtgcaga +gaagaggcccgccgcgccctcaaggacggtgtggcaacgggaggccatgctgtgtcccga +ggaagtgcaaagctgagatggttggtggagcggggatacctgcagccctatggaaaggtc +attgatcttggatgtggcagagggggctggagttactacgccgccaccatccgcaaagtt +caagaagtgaaaggatacacaaaaggaggccctggtcatgaagaacccgtgttggtgcaa +agctatgggtggaacatagtccgtcttaagagtggggtggacgtctttcatatggcggct +gagccgtgtgacacgttgctgtgtgacataggtgagtcatcatctagtcctgaagtggaa +gaagcacggacgctcagagtcctctccatggtgggggattggcttgaaaaaagaccagga +gccttttgtataaaagtgttgtgcccatacaccagcactatgatggaaaccctggagcga +ctgcagcgtaggtatgggggaggactggtcagagtgccactctcccgcaactctacacat +gagatgtactgggtctctggagcgaaaagcaacaccataaaaagtgtgtccaccacgagc +cagctcctcttggggcgcatggacgggcctaggaggccagtgaaatatgaggaggatgtg +aatctcggctctggcacgcgggctgtggtaagctgcgctgaagctcccaacatgaagatc +attggtaaccgcattgaaaggatccgcagtgagcacgcggaaacgtggttctttgacgag +aaccacccatataggacatgggcttaccatggaagctatgaggcccccacacaagggtca +gcgtcctctctaataaacggggttgtcaggctcctgtcaaaaccctgggatgtggtgact +ggagtcacaggaatagccatgaccgacaccacaccgtatggtcagcaaagagttttcaag +gaaaaagtggacactagggtgccagacccccaagaaggcactcgtcaggttatgagcatg +gtctcttcctggttgtggaaagagctaggcaaacacaaacggccacgagtctgtaccaaa +gaagagttcatcaacaaggttcgtagcaatgcagcattaggggcaatatttgaagaggaa +aaagagtggaagactgcagtggaagctgtgaacgatccaaggttctgggctctagtggac +aaggaaagagagcaccacctgagaggagagtgccagagttgtgtgtacaacatgatggga +aaaagagaaaagaaacaaggggaatttggaaaggccaagggcagccgcgccatctggtat +atgtggctaggggctagatttctagagttcgaagcccttggattcttgaacgaggatcac +tggatggggagagagaactcaggaggtggtgttgaagggctgggattacaaagactcgga +tatgtcctagaagagatgagttgcataccaggaggaaggatgtatgcagatgacactgct +ggctgggacacccgcatcagcaggtttgatctggagaatgaagctctaatcaccaaccaa +atggagaaagggcacagggccttggcattggccataatcaagtacacataccaaaacaaa +gtggtaaaggtccttagaccagctgaaaaagggaaaacagttatggacattatttcgaga +caagaccaaagggggagcggacaagttgtcacttacgctcttaacacatttaccaaccta +gtggtgcaactcatccggaatatggaggctgaggaagttctagagatgcaagacttgtgg +ctgctgcggaggtcagagaaagtgaccaactggttgcagagcaacggatgggataggctc +aaacgaatggcagtcagtggagatgattgcgttgtgaagccaattgatgataggtttgca +catgccctcaggttcttgaatgatatgggaaaagttaggaaggacacacaagagtggaaa +ccctcaactggatgggacaactgggaagaagttccgttttgctcccaccacttcaacaag +ctccatctcaaggacgggaggtccattgtggttccctgccgccaccaagatgaactgatt +ggccgggcccgcgtctctccaggggcgggatggagcatccgggagactgcttgcctagca +aaatcatatgcgcaaatgtggcagctcctttatttccacagaagggacctccgactgatg +gccaatgccatttgttcatctgtgccagttgactgggttccaactgggagaactacctgg +tcaatccatggaaagggagaatggatgaccactgaagacatgcttgtggtgtggaacaga +gtgtggattgaggagaacgaccacatggaagacaagaccccagttacgaaatggacagac +attccctatttgggaaaaagggaagacttgtggtgtggatctctcatagggcacagaccg +cgcaccacctgggctgagaacattaaaaacacagtcaacatggtgcgcaggatcataggt +gatgaagaaaagtacatggactacctatccacccaagttcgctacttgggtgaagaaggg +tctacacctggagtgctgtaagcaccaatcttaatgttgtcaggc diff --git a/tests/functional/parse/zika.fasta b/tests/functional/parse/zika.fasta new file mode 100644 index 000000000..2bedf720e --- /dev/null +++ b/tests/functional/parse/zika.fasta @@ -0,0 +1,24 @@ +>PAN/CDC_259359_V1_V3/2015|zika|KX156774|2015-12-18|north_america|panama|panama|panama|genbank|genome|Shabman et al|https://www.ncbi.nlm.nih.gov/nuccore/KX156774|Direct Submission|Submitted (29-APR-2016) J. Craig Venter Institute, 9704 Medical Center Drive, Rockville, MD 20850, USA|https://www.ncbi.nlm.nih.gov/pubmed/ +gaatttgaagcgaatgctaacaacagtatcaacaggttttattttggatttggaaacgagagtttctggtcatgaaaaacccaaaaaagaaatccggaggattccggattgtcaatatgctaaaacgcggagtagcccgtgtgagcccctttgggggcttgaagaggctgccagccggacttctgctgggtcatgggcccatcaggatggtcttggcgattctagcctttttgagattcacggcaatcaagccatcactgggtctcatcaatagatggggttcagtggggaaaaaagaggctatggaaataataaagaagttcaagaaagatctggctgccatgctgagaataatcaatgctaggaaggagaagaagagacgaggcgcagaaactagtgtcggaattgttggcctcctgctgaccacagctatggcagcggaggtcactagacgtgggagtgcatactatatgtacttggacagaaacgatgctggggaggccatatcttttccaaccacattggggatgaataagtgttatatacagatcatggatcttggacacatgtgtgatgccaccatgagctatgaatgccctatgctggatgagggggtggaaccagatgacgtcgattgttggtgcaacacgacgtcaacttgggttgtgtacggaacctgccatcacaaaaaaggtgaagcacggagatctagaagagccgtgacgctcccctcccattccactaggaagctgcaaacgcggtcgcaaacctggttggaatcaagagaatacacaaagcacttgattagagtcgaaaattggatattcaggaaccctggtttcgctttagcagcagctgccatcgcttggcttttgggaagctcaacgagccaaaaagtcatatacttggtcatgatactgctgattgccccggcatacagcatcaggtgcataggagtcagcaatagggactttgtggaaggtatgtcaggtgggacttgggttgatgtcgtcttggaacatggaggttgtgtcaccgtaatggcacaggacaaaccgactgtcgacatagagctggttacaacaacagtcagcaacatggcggaggtaagatcctactgctatgaggcatcaatatcagacatggcttcggacagccgctgcccaacacaaggtgaagcctaccttgacaagcaatcagacactcaatatgtctgcaaaagaacgttagtggacagaggctggggaaatggatgtggactttttggcaaagggagcctggtgacatgcgctaagtttgcatgctccaagaaaatgaccgggaagagcatccagccagagaatctggagtaccggataatgttgtcagttcatggctcccagcacagtgggatgatcgttaatgacacaggacatgaaactgatgagaatagagcgaaggttgagataacgcccaattcaccaagagccgaagccaccctggggggttttggaagcctaggacttgattgtgaaccgaggacaggccttgacttttcagatttgtattacttgactatgaataacaagcactggttggttcacaaggagtggttccacgacattccattaccttggcacgctggggcagacaccggaactccacactggaacaacaaagaagcactggtagagttcaaggacgcacatgccaaaaggcaaactgtcgtggttctagggagtcaagaaggagcagttcacacggcccttgctggagctctggaggctgagatggatggtgcaaagggaaggctgtcctctggccacttgaaatgtcgcctgaaaatggataaacttagattgaagggcgtgtcatactccttgtgtaccgcagcgttcacattcaccaagatcccggctgaaacactgcacgggacagtcacagtggaggtacagtacgcagggacagatggaccttgcaaggttccagctcagatggcggtggacatgcaaactctgaccccagttgggaggttgataaccgctaaccccgtaatcactgaaagcactgagaactctaagatgatgctggaacttgatccaccatttggggactcttacattgtcataggagtcggggagaagaagatcacccaccactggcacaggagtggcagcaccattggaaaagcatttgaagccactgtgagaggtgccaagagaatggcagtcttgggagacacagcctgggactttggatcagttggaggcgctctcaactcattgggcaagggcatccatcaaatttttggagcagctttcaaatcattgtttggaggaatgtcctggttctcacaaattctcattggaacgttgctgatgtggttgggtctgaacacaaagaatggatctatttcccttatgtgcttggccttagggggagtgttgatcttcttatccacagccgtctctgctgatgtggggtgctcggtggacttctcaaagaaggagacgagatgtggtacaggggtgttcgtctacaacgacgttgaagcctggagggacaggtacaagtaccatcctgactccccccgtagattggcagcagcagtcaagcaagcctgggaagatggtatctgcgggatctcctctgtttcaagaatggaaaacatcatgtggagatcagtagaaggggagctcaacgcaatcctggaagagaatggagttcaactgacggtcgttgtgggatctgtaaaaaaccccatgtggagaggtccacagagattgcccgtgcctgtgaacgagctgccccacggctggaaggcttgggggaaatcgtacttcgtcagagcagcaaagacaaataacagctttgtcgtggatggtgacacactgaaggaatgcccactcaaacatagagcatggaacagctttcttgtggaggatcatgggttcggggtatttcacactagtgtctggctcaaggttagagaagattattcattagagtgtgatccagccgttattggaacagctgttaagggaaaggaggctgtacacagtgatctaggctactggattgagagtgagaagaatgacacatggaggctgaagagggcccatctgatcgagatgaaaacatgtgaatggccaaagtcccacacattgtggacagatggaatagaagagagtgatctgatcatacccaagtctttagctgggccactcagccatcacaataccagagagggctacaggacccaaatgaaagggccatggcacagtgaagagcttgaaattcggtttgaggaatgcccaggcactaaggtccacgtggaggaaacatgtggaacaagaggaccatctctgagatcaaccactgcaagcggaagggtgatcgaggaatggtgctgcagggagtgcacaatgcccccactgtcgttctgggctaaagatggctgttggtatggaatggagataaggcccaggaaagaaccagaaagcaacttagtaaggtcaatggtgactgcaggatcaactgatcacatggatcacttctcccttggagtgcttgtgattctgctcatggtgcaggaagggctgaagaagagaatgaccacaaagatcatcataagcacatcaatggcagtgctggtagctatgatcctgggaggattttcaatgagtgacctggctaagcttgcaatcttgatgggtgccaccttcgcggaaatgaacactggaggagatgtggctcatctggcgctgatagcagcattcaaagtcagaccagcgttgctggtatctttcatcttcagagctaattggacaccccgtgaaagcatgctgctggccttggcctcgtgtcttttgcaaactgcgatctccgccttggagggcgacctgatggttctcatcaatggttttgctttggcctggttggcaatacgagcgatggttgttccacgcactgacaacatcaccttggcaatcctggctgctctgacaccactggcccggggcacactgcttgtggcgtggagagcaggccttgctacttgcggggggtttatgctcctctctctgaagggaaaaggcagtgtgaagaagaacttaccatttgtcatggccctgggactaaccgctgtgaggctggtcgaccccatcaacgtggtgggactgctgttgctcacaaggagtgggaagcggagctggccccctagcgaagtactcacagctgttggcctgatatgcgcattggctggagggttcgccaaggcagatatagagatggctgggcccatagccgcggtcggtctgctaattgtcagttacgtggtctcaggaaagagtgtggacatgtacattgaaagagcaggtgacatcacatgggaaaaagatgcggaagtcactggaaacagtccccggctcgatgtggcgctagatgagagtggtgatttctccctggtggaggatgacggtccccccatgagagagatcatactcaaggtggtcctgatgaccatctgtggcatgaacccaatagccataccctttgcagctggagcgtggtacgtatacgtgaagactggaaaaaggagtggtgctctatgggatgtgcctgctcccaaggaagtaaaaaagggggagaccacagatggagtgtacagagtaatgactcgtagactgctaggttcaacacaagttggagtgggagttatgcaagagggggtctttcacactatgtggcacgtcacaaaaggatccgcgctgagaagcggtgaagggagacttgatccatactggggagatgtcaagcaggatctggtgtcatactgtggtccatggaagctagatgccgcctgggacgggcacagcgaggtgcagctcttggccgtgccccccggagagagagcgaggaacatccagactctgcccggaatatttaagacaaaggatggggacattggagcggttgcgctggattacccagcaggaacttcaggatctccaatcctagacaagtgtgggagagtgataggactttatggcaatggggtcgtgatcaaaaatgggagttatgttagtgccatcacccaagggaggagggaggaagagactcctgttgagtgcttcgagccttcgatgctgaagaagaagcagctaactgtcttagacttgcatcctggagctgggaaaaccaggagagttcttcctgaaatagtccgtgaagccataaaaacaagactccgtactgtgatcttagctccaaccagggttgtcgctgctgaaatggaggaagcccttagagggcttccagtgcgttatatgacaacagcagtcaatgtcacccactctggaacagaaatcgtcgacttaatgtgccatgccaccttcacttcacgtctactacagccaatcagagtccccaactataatctgtatattatggatgaggcccacttcacagatccctcaagtatagcagcaagaggatacatttcaacaagggttgagatgggcgaggcggctgccatcttcatgaccgccacgccaccaggaacccgtgacgcatttccggactccaactcaccaattatggacaccgaagtggaagtcccagagagagcctggagctcaggctttgattgggtgacggatcattctggaaaaacagtttggtttgttccaagcgtgaggaacggcaatgagatcgcagcttgtctgacaaaggctggaaaacgggtcatacagctcagcagaaagacttttgagacagagttccagaaaacaaaacatcaagagtgggactttgtcgtgacaactgacatttcagagatgggcgccaactttaaagctgaccgtgtcatagattccaggagatgcctaaagccggtcatacttgatggcgagagagtcattctggctggacccatgcctgtcacacatgccagcgctgcccagaggagggggcgcataggcaggaatcccaataaacctggagatgagtatctgtatggaggtgggtgcgcagagactgacgaagaccatgcacactggcttgaagcaagaatgctccttgacaatatttacctccaagatggcctcatagcctcgctctatcgacctgaggccgacaaagtagcagccattgagggagagttcaagcttaggacggagcaaaggaagacctttgtggaactcatgaaaagaggagatcttcctgtttggctggcctatcaggttgcatctgccggaataacctacacagatagaagatggtgctttgatggcacgaccaacaacaccataatggaagacagtgtgccggcagaggtgtggaccagacacggagagaaaagagtgctcaaaccgaggtggatggacgccagagtttgttcagatcatgcggccctgaagtcattcaaggagtttgccgctgggaaaagaggagcggcttttggagtgatggaagccctgggaacactgccaggacacatgacagagagattccaggaagccattgacaacctcgctgtgctcatgcgggcagagactggaagcaggccttacaaagccgcggcggcccaattgccggagaccctagagaccattatgcttttggggttgctgggaacagtctcgctgggaatctttttcgtcttgatgaggaacaagggcatagggaagatgggctttggaatggtgactcttggggccagcgcatggctcatgtggctctcggaaattgagccagccagaattgcatgtgtcctcattgttgtgttcctattgctggtggtgctcatacctgagccagaaaagcaaagatctccccaggacaaccaaatggcaatcatcatcatggtagcagtaggtcttctgggcttgattaccgccaatgaactcggatggttggagagaacaaagagtgacctaagccatctaatgggaaggagagaggagggggcaaccataggattctcaatggacattgacctgcggccagcctcagcttgggccatctatgctgccttgacaactttcattaccccagctgtccaacatgcagtgaccacttcatacaacaactactccttaatggcgatggccacgcaagctggagtgttgtttggtatgggcaaagggatgccattctacgcatgggactttggagtcccgctgctaatgataggttgctactcacaattaacacccctgaccctaatagtggccatcattttgctcgtggcgcactacatgtacttgatcccagggctgcaggcagcagctgcgcgtgctgcccagaagagaacggcagctggcatcatgaagaaccctgttgtggatggaatagtggtgactgacattgacacaatgacaattgacccccaagtggagaaaaagatgggacaggtgctactcatagcagtagccgtctccagcgccatactgtcgcggaccgcctgggggtggggggaggctggggccctgatcacagccgcaacttccactttgtgggaaggctctccgaacaagtactggaactcctctacagccacttcactgtgtaacatttttaggggaagttacttggctggagcttctctaatctacacagtaacaagaaacgctggcttggtcaagagacgtgggggtggaacaggagagaccctgggagagaaatggaaggcccgcttgaaccagatgtcggccctggagttctactcctacaaaaagtcaggcatcaccgaggtgtgcagagaagaggcccgccgcgccctcaaggacggtgtggcaacgggaggccatgctgtgtcccgaggaagtgcaaagctgagatggttggtggagcggggatacctgcagccctatggaaaggtcattgatcttggatgtggcagagggggctggagttactacgccgccaccatccgcaaagttcaagaagtgaaaggatacacaaaaggaggccctggtcatgaagaacccgtgttggtgcaaagctatgggtggaacatagtccgtcttaagagtggggtggacgtctttcatatggcggctgagccgtgtgacacgttgctgtgtgacataggtgagtcatcatctagtcctgaagtggaagaagcacggacgctcagagtcctctccatggtgggggattggcttgaaaaaagaccaggagccttttgtataaaagtgttgtgcccatacaccagcactatgatggaaaccctggagcgactgcagcgtaggtatgggggaggactggtcagagtgccactctcccgcaactctacacatgagatgtactgggtctctggagcgaaaagcaacaccataaaaagtgtgtccaccacgagccagctcctcttggggcgcatggacgggcctaggaggccagtgaaatatgaggaggatgtgaatctcggctctggcacgcgggctgtggtaagctgcgctgaagctcccaacatgaagatcattggtaaccgcattgaaaggatccgcagtgagcacgcggaaacgtggttctttgacgagaaccacccatataggacatgggcttaccatggaagctatgaggcccccacacaagggtcagcgtcctctctaataaacggggttgtcaggctcctgtcaaaaccctgggatgtggtgactggagtcacaggaatagccatgaccgacaccacaccgtatggtcagcaaagagttttcaaggaaaaagtggacactagggtgccagacccccaagaaggcactcgtcaggttatgagcatggtctcttcctggttgtggaaagagctaggcaaacacaaacggccacgagtctgtaccaaagaagagttcatcaacaaggttcgtagcaatgcagcattaggggcaatatttgaagaggaaaaagagtggaagactgcagtggaagctgtgaacgatccaaggttctgggctctagtggacaaggaaagagagcaccacctgagaggagagtgccagagttgtgtgtacaacatgatgggaaaaagagaaaagaaacaaggggaatttggaaaggccaagggcagccgcgccatctggtatatgtggctaggggctagatttctagagttcgaagcccttggattcttgaacgaggatcactggatggggagagagaactcaggaggtggtgttgaagggctgggattacaaagactcggatatgtcctagaagagatgagtcgcataccaggaggaaggatgtatgcagatgacactgctggctgggacacccgcattagcaggtttgatctggagaatgaagctctaatcaccaaccaaatggagaaagggcacagggccttggcattggccataatcaagtacacataccaaaacaaagtggtaaaggtccttagaccagctgaaaaagggaaaacagttatggacattatttcgagacaagaccaaagggggagcggacaagttgtcacttacgctcttaacacatttaccaacctagtggtgcaactcattcggaatatggaggctgaggaagttctagagatgcaagacttgtggctgctgcggaggtcagagaaagtgaccaactggttgcagagcaacggatgggataggctcaaacgaatggcagtcagtggagatgattgcgttgtgaagccaattgatgataggtttgcacatgccctcaggttcttgaatgatatgggaaaagttaggaaggacacacaagagtggaaaccctcaactggatgggacaactgggaagaagttccgttttgctcccaccacttcaacaagctccatctcaaggacgggaggtccattgtggttccctgccgccaccaagatgaactgattggccgggcccgcgtctctccaggggcgggatggagcatccgggagactgcttgcctagcaaaatcatatgcgcaaatgtggcagctcctttatttccacagaagggacctccgactgatggccaatgccatttgttcatctgtgccagttgactgggttccaactgggagaactacctggtcaatccatggaaagggagaatggatgaccactgaagacatgcttgtggtgtggaacagagtgtggattgaggagaacgaccacatggaagacaagaccccagttgcgaaatggacagacattccctatttgggaaaaagggaagacttgtggtgtggatctctcatagggcacagaccgcgcaccacctgggctgagaacattaaaaacacagtcaacatggtgcgcaggatcataggtgatgaagaaaagtacatggactacctatccacccaagttcgctacttgggtgaagaagggtccacacctggagtgctgtaagcaccaatcttaatgttgtcaggcctgctagtcagccacagcttggggaaagctgtgcagcctgtgacccccccaggagaagctgggaaaccaagcctatagtcaggccgagaacgccatggcacggaagaagccatgctgcctgtgagcccctcagaggacactgagtcaaaaaaccccacgcgcttggaggcgcaggatgggaaaagaaggtggcgaccttccccacccttcaatctggggcctgaactggagatcagctgtggatctccagaagagggactagtggttagaggagaccccccggaaaacgcaaaacagcatattgacgctgggaaagaccagagactccatgagtttccaccacgctggccgccaggcacagatcgccgaatagcggcggccggtgtggggaaatccatgggtct +>COL/FLR_00024/2015|zika|MF574569|2015-12-XX|south_america|colombia|colombia|colombia|genbank|genome|Pickett et al|https://www.ncbi.nlm.nih.gov/nuccore/MF574569|Direct Submission|Submitted (28-JUL-2017) J. Craig Venter Institute, 9704 Medical Center Drive, Rockville, MD 20850, USA|https://www.ncbi.nlm.nih.gov/pubmed/ +tcagactgcgacagttcgagtttgaagcgaaagctagcaacagtatcaacaggttttattttggatttggaaacgagagtttctggtcatgaaaaacccaaaaaagaaatccggaggattccggattgtcaatatgctaaaacgcggagtagcccgtgtgagcccctttgggggcttgaagaggctgccagccggacttctgctgggtcatgggcccatcaggatggtcttggcgattctagcctttttgagattcacggcaatcaagccatcactgggtctcatcaatagatggggttcagtggggaaaaaagaggctatggaaacaataaagaagttcaagaaagatctggctgccatgctgagaataatcaatgctaggaaggagaagaagagacgaggcgcagaaactagtgtcggaattgttggcctcctgctgaccacagctatggcagcggaggtcactagacgtgggagtgcatactatatgtacttggacagaaacgatgctggggaggccatatcttttccaaccacattggggatgaataagtgttatatacagatcatggatcttggacacatgtgtgatgccaccatgagctatgaatgccctatgctggatgagggggtggaaccagatgacgtcgattgttggtgcaacacgacgtcaacttgggttgtgtacggaacctgccatcacaaaaaaggtgaagcacggagatctagaagagccgtgacgctcccctcccattccactaggaagctgcaaacgcggtcgcaaacctggttggaatcaagagaatacacaaagcacttgattagagtcgaaaattggatattcaggaaccctggtttcgctttagcagcagctgccatcgcttggcttttgggaagctcaacgagccaaaaagtcatatacttggtcatgatactgctgattgccccggcatacagcatcaggtgcataggagtcagcaatagggactttgtggaaggtatgtcaggtgggacttgggttgatgtcgtcttggaacatggaggttgtgtcaccgtaatggcacaggacaaaccgactgtcgacatagagctggttacaacaacagtcagcaacatggcggaggtaagatcctactgctatgaggcatcaatatcagacatggcttcggacagccgctgcccaacacaaggtgaagcctaccttgagaagcaatcagacactcaatatgtctgcaaaagaacgttagtggacagaggctggggaaatggatgtggactttttggcaaagggagcctggtgacatgcgctaagtttgcatgctccaagaaaatgaccgggaagagcatccagccagagaatctggagtaccggataatgttgtcagttcatggctcccagcacagtgggatgatcgttaatgacacaggacatgaaactgatgagaatagagcgaaggttgagataacgcccaattcaccaagagccgaagccaccctggggggctttggaagcctaggacttgattgtgaaccgaggacaggccttgacttttcagatttgtattacttgactatgaataacaagcactggttggttcacaaggagtggttccacgacattccattaccttggcacgctggggcagacaccggaactccacactggaacaacaaagaagcactggtagagttcaaggacgcacatgccaaaaggcaaactgtcgtggttctagggagtcaagaaggagcagttcacacggcccttgctggagctctggaggctgagatggatggtgcaaagggaaggctgtcctctggccacttgaaatgtcgcctgaaaatggataaacttagattgaagggcgtgtcatactccttgtgtaccgcagcgttcacattcaccaagatcccggctgaaacactgcacgggacagtcacagtggaggtacagtacgcagggacagatggaccttgcaaggttccagctcagatggcggtggacatgcaaactctgaccccagttgggaggttgataaccgctaaccccgtaatcactgaaagcactgagaactctaagatgatgctggaacttgatccaccatttggggactcttacattgtcataggagtcggggagaagaagatcacccaccactggcacaggagtggcagcaccattggaaaagcatttgaagccactgtgagaggtgccaagagaatggcagtcttgggagacacagcctgggactttggatcagttggaggcgctctcaactcattgggcaagggcatccatcaaatttttggagcagctttcaaatcattgtttggaggaatgtcctggttctcacaaattctcattggaacgttgctgatgtggttgggtctgaacacaaagaatggatctatttcccttatgtgcttggccttagggggagtgttgatcttcttatccacagccgtctctgctgatgtggggtgctcggtggacttctcaaagaaggagacgagatgtggtacaggggtgttcgtctataacgacgttgaagcctggagggacaggtacaagtaccatcctgactccccccgtagattggcagcagcagtcaagcaagcctgggaagatggtatctgcgggatctcctctgtttcaagaatggaaaacatcatgtggagatcagtagaaggggagctcaacgcaatcctggaagagaatggagttcaactgacggtcgttgtgggatctgtaaaaaaccccatgtggagaggtccacagagattgcccgtgcctgtgaacgagctgccccacggctggaaggcttgggggaaatcgtacttcgtcagagcagcaaagacaaataacagctttgtcgtggatggtgacacactgaaagaatgcccactcaaacatagagcatggaacagctttcttgtggaggatcatgggttcggggtatttcacactagtgtctggctcaaggttagagaagattattcattagagtgtgatccagccgttattggaacagctgttaagggaaaggaggctgtacacagtgatctaggctactggattgagagtgagaagaatgacacatggaggctgaagagggcccatctgatcgagatgaaaacatgtgaatggccaaagtcccacacattgtggacagatggaatagaagagagtgatctgatcatacccaagtctttagctgggccactcagccatcacaataccagagagggctacaggacccaaatgaaagggccatggcacagtgaagagcttgaaattcggtttgaggaatgcccaggcactaaggtccacgtggaggaaacatgtggaacaagaggaccatctctgagatcaaccactgcaagcggaagggtgatcgaggaatggtgctgcagggagtgcacaatgcccccactgtcgttctgggctaaagatggctgttggtatggaatggagataaggcccaggaaagaaccagaaagcaacttagtaaggtcaatggtgactgcaggatcaactgatcacatggatcacttctcccttggagtgcttgtgattctgctcatggtgcaggaagggctgaagaagagaatgaccacaaagatcatcataagcacatcaatggcagtgctggtagctatgatcctgggaggattttcaatgagtgacctggctaagcttgcaatcttgatgggtgccaccttcgcggaaatgaacactggaggagatgtagctcatctggcgctgatagcggcattcaaagtcagaccagcgttgctggtatccttcatcttcagagctaattggacaccccgtgaaagcatgctgctggccttggcctcgtgtcttttgcaaactgcgatctccgccttggagggcgacctgatggttctcatcaatggttttgctttggcctggttggcaatacgagcgatggttgttccacgcactgacaacatcaccttggcaatcctggctgctctgacaccactggcccggggcacactgcttgtggcgtggagagcaggccttgctacttgcggggggtttatgctcctctctctgaagggaaaaggcagtgtgaagaagaacttaccatttgtcatggccctgggactaaccgctgtgaggctggtcgaccccatcaacgtggtgggactgctgttgctcacaaggagtgggaagcggagctggccccctagcgaagtactcacagctgttggcctgatatgcgcattggctggagggttcgccaaggcagatatagagatggctgggcccatggccgcggttggtctgctaattgtcagttacgtggtctcaggaaagagtgtggacatgtacattgaaagagcaggtgacatcacatgggaaaaagatgcggaagtcactggaaacagtccccggctcgatgtggcgctagatgagagtggtgatttctccctggtggaggatgacggtccccccatgagagagatcatactcaaggtggtcctgatgaccatctgtggcatgaacccaatagccataccctttgcagctggagcgtggtacgtatacgtgaagactggaaaaaggagtggtgcgctatgggatgtgcctgctcccaaggaagtaaaaaagggggagaccacagatggagtgtacagagtaatgactcgtagactgctaggttcaacacaagttggagtgggagttatgcaagagggggtctttcacactatgtggcacgtcacaaaaggatccgcgctgagaagcggtgaagggagacttgatccatactggggagatgtcaagcaggatctggtgtcatactgtggtccatggaagctagatgccgcctgggacgggcacagcgaggtgcagctcttggccgtgccccccggagagagagcgaggaacatccagactctgcccggaatatttaagacaaaggatggggacattggagcggttgcgctggattacccagcaggaacttcaggatctccaatcctagacaagtgtgggagagtgataggactttatggcaatggggtcgtgatcaaaaatgggagttatgttagtgccatcacccaagggaggagggaggaagagactcctgttgagtgcttcgagccttcgatgctgaagaagaagcagctaactgtcttagacttgcatcctggagctgggaaaaccaggagagttcttcctgaaatagtccgtgaagccataaaaacaagactccgtactgtgatcttagctccaaccagggttgtcgctgctgaaatggaggaagcccttagagggcttccagtgcgttatatgacaacagcagtcaatgtcacccactctggaacagaaatcgtcgacttaatgtgccatgccaccttcacttcacgtctactacagccaatcagagtccccaactataatctgtatattatggatgaggcccacttcacagatccctcaagtatagcagcaagaggatacatttcaacaagggttgagatgggcgaggcggctgccatcttcatgaccgccacgccaccaggaacccgtgacgcatttccggactccaactcaccaattatggacaccgaagtggaagtcccagagagagcctggagctcaggctttgattgggtgacggatcattctggaaaaacagtttggtttgttccaagcgtgaggaacggcaatgagatcgcagcttgtctgacaaaggctggaaaacgggtcatacagctcagcagaaagacttttgagacagagttccagaaaacaaaacatcaagagtgggactttgtcgtgacaactgacatttcagagatgggcgccaactttaaagctgaccgtgtcatagattccaggagatgcctaaagccggtcatacttgatggcgagagagtcattctggctggacccatgcctgtcacacatgccagcgctgcccagaggagggggcgcataggcaggaatcccaataaacctggagatgagtatctgtatggaggtgggtgcgcagagactgacgaagaccatgcacactggcttgaagcaagaatgctccttgacaatatttacctccaagatggcctcatagcctcgctctatcgacctgaggccgacaaagtagcagccattgagggagagttcaagcttaggacggagcaaaggaagacctttgtggaactcatgaaaagaggagatcttcctgtttggctggcctatcaggttgcatctgccggaataacctacacagatagaagatggtgctttgatggcacgaccaacaacaccataatggaagacagtgtgccggcagaggtgtggaccagacacggagagaaaagagtgctcaaaccgaggtggatggacgccagagtttgttcagatcatgcggccctgaagtcattcaaggagtttgccgctgggaaaagaggagcggcttttggagtgatggaagccctgggaacactgccaggacacatgacagagagattccaggaagccattgacaacctcgctgtgctcatgcgggcagagactggaagcaggccttacaaagccgcggcggcccaattgccggagaccctagagaccattatgcttttggggttgctgggaacagtctcgttgggaatctttttcgtcttgatgaggaacaagggcatagggaagatgggctttggaatggtgactcttggggccagcgcatggctcatgtggctctcggaaattgagccagccagaattgcatgtgtcctcattgttgtgttcctattgctggtggtgctcatacctgagccagaaaagcaaagatctccccaggacaaccaaatggcaatcatcatcatggtagcagtaggtcttctgggcttgattaccgccaatgaactcggatggttggagagaacaaagagtgacctaagccatctaatgggaaggagagaggagggggcaaccataggattctcaatggacattgacctgcggccagcctcagcttgggccatctatgctgccttgacaactttcattaccccagccgtccaacatgcagtgaccacttcatacaacaactactccttaatggcgatggccacgcaagctggagtgttgtttggtatgggcaaagggatgccattctacgcatgggactttggagtcccgctgctaatgataggttgctactcacaattaacacccctgaccctaatagtggccatcattttgctcgtggcgcactacatgtacttgatcccagggctgcaggcagcagctgcgcgtgctgcccagaagagaacggcagctggcatcatgaagaaccctgttgtggatggaatagtggtgactgacattgacacaatgacaattgacccccaagtggagaaaaagatgggacaggtgctactcatagcagtagccgtctccagcgccatactgtcgcggaccgcctgggggtggggggaggctggggccctgatcacagccgcaacttccactttgtgggaaggctctccgaacaagtactggaactcctctacagccacttcactgtgtaacatttttaggggaagttacttggctggagcttctctaatctacacagtaacaagaaacgctggcttggtcaagagacgtgggggtggaacaggagagaccctgggagagaaatggaaggcccgcttgaaccagatgtcggccctggagttctactcctacaaaaagtcaggcatcaccgaggtgtgcagagaagaggcccgccgcgccctcaaggacggtgtggcaacgggaggccatgctgtgtcccgaggaagtgcaaagctgagatggttggtggagcggggatacctgcagccctatggaaaggtcattgatcttggatgtggcagagggggctggagttactacgccgccaccatccgcaaagttcaagaagtgaaaggatacacaaaaggaggccctggtcatgaagaacccgtgttggtgcaaagctatgggtggaacatagtccgtcttaagagtggggtggacgtctttcatatggcggctgagccgtgtgacacgttgctgtgtgacataggtgagtcatcatctagtcctgaagtggaagaagcacggacgctcagagtcctctccatggtgggggattggcttgaaaaaagaccaggagccttttgtataaaagtgttgtgcccatacaccagcactatgatggaaaccctggagcgactgcagcgtaggtatgggggaggactggtcagagtgccactctcccgcaactctacacatgagatgtactgggtctctggagcgaaaagcaacaccataaaaagtgtgtccaccacgagccagctcctcttggggcgcatggacgggcctaggaggccagtgaaatatgaggaggatgtgaatctcggctctggcacgcgggctgtggtaagctgcgctgaagctcccaacatgaagatcattggtaaccgcattgaaaggatccgcagtgagcacgcggaaacgtggttctttgacgagaaccacccatataggacatgggcttaccatggaagctatgaggcccccacacaagggtcagcgtcctctctaataaacggggttgtcaggctcctgtcaaaaccctgggatgtggtgactggagtcacaggaatagccatgaccgacaccacaccgtatggtcagcaaagagttttcaaggaaaaagtggacactagggtgccagacccccaagaaggcactcgtcaggttatgagcatggtctcttcctggttgtggaaagagctaggcaaacacaaacggccacgagtctgtaccaaagaagagttcatcaacaaggtgcgtagcaatgcagcattaggggcaatatttgaagaggaaaaagagtggaagactgcagtggaagctgtgaacgatccaaggttctgggctctagtggacaaggaaagagagcaccacctgagaggagagtgccagagttgtgtgtacaacatgatgggaaaaagagaaaagaaacaaggggaatttggaaaggccaagggcagccgcgccatctggtatatgtggctaggggctagatttctagagttcgaagcccttggattcttgaacgaggatcactggatggggagagagaactcaggaggtggtgttgaagggctgggattacaaagactcggatatgtcctagaagagatgagtcgcataccaggaggaaggatgtatgcagatgacactgctggctgggacacccgcattagcaggtttgatctggagaatgaagctctaatcaccaaccaaatggagaaagggcacagggccttggcattggccataatcaagtacacataccaaaacaaagtggtaaaggtccttagaccagctgaaaaagggaaaacagttatggacattatttcgagacaagaccaaagggggagcggacaagttgtcacttacgctcttaacacatttaccaacctagtggtgcaactcattcggaatatggaggctgaggaagttctagagatgcaagacttgtggctgctgcggaggtcagagaaagtgaccaactggttgcagagcaacggatgggataggctcaaacgaatggcagtcagtggagatgattgcgttgtgaagccaattgatgataggtttgcacatgccctcaggttcttgaatgatatgggaaaagttaggaaggacacacaagagtggaaaccctcaactggatgggacaactgggaagaagttccgttttgctcccaccacttcaacaagctccatctcaaggacgggaggtccattgtggttccctgccgccaccaagatgaactgattggccgggcccgcgtctctccaggggcgggatggagcatccgggagactgcttgcctagcaaaatcatatgcgcaaatgtggcagctcctttatttccacagaagggacctccgactgatggccaatgccatttgttcatctgtgccagttgactgggttccaactgggagaactacctggtcaatccatggaaagggagaatggatgaccactgaagacatgcttgtggtgtggaacagagtgtggattgaggagaacgaccacatggaagacaagaccccagttgcgaaatggacagacattccctatttgggaaaaagggaagacttgtggtgtggatctctcatagggcacagaccgcgcaccacctgggctgagaacattaaaaacacagtcaacatggtgcgcaggatcataggtgatgaagaaaagtacatggactacctatccacccaagttcgctacttgggtgaagaagggtctacacctggagtgctgtaagcaccaatcttaatgttgtcaggcctgctagtcagccacagcttggggaaagctgtgcagcctgtgacccccccaggagaagctgggaaaccaagcctatagtcaggccgagaacgccatggcacggaagaagccatgctgcctgtgagcccctcagaggacactgagtcaaaaaaccccacgcgcttggaggcgcaggatgggaaaagaaggtggcgaccttccccacccttcaatctggggcctgaactggagatcagctgtggatctccagaagagggactagtggttagaggaga +>PRVABC59|zika|KU501215|2015-12-XX|north_america|puerto_rico|puerto_rico|puerto_rico|genbank|genome|Lanciotti et al|https://www.ncbi.nlm.nih.gov/nuccore/KU501215|Phylogeny of Zika Virus in Western Hemisphere, 2015|Emerging Infect. Dis. 22 (5), 933-935 (2016)|https://www.ncbi.nlm.nih.gov/pubmed/27088323 +gttgttgatctgtgtgaatcagactgcgacagttcgagtttgaagcgaaagctagcaacagtatcaacaggttttattttggatttggaaacgagagtttctggtcatgaaaaacccaaaaaagaaatccggaggattccggattgtcaatatgctaaaacgcggagtagcccgtgtgagcccctttgggggcttgaagaggctgccagccggacttctgctgggtcatgggcccatcaggatggtcttggcgattctagcctttttgagattcacggcaatcaagccatcactgggtctcatcaatagatggggttcagtggggaaaaaagaggctatggaaacaataaagaagttcaagaaagatctggctgccatgctgagaataatcaatgctaggaaggagaagaagagacgaggcgcagatactagtgtcggaattgttggcctcctgctgaccacagctatggcagcggaggtcactagacgtgggagtgcatactatatgtacttggacagaaacgatgctggggaggccatatcttttccaaccacattggggatgaataagtgttatatacagatcatggatcttggacacatgtgtgatgccaccatgagctatgaatgccctatgctggatgagggggtggaaccagatgacgtcgattgttggtgcaacacgacgtcaacttgggttgtgtacggaacctgccatcacaaaaaaggtgaagcacggagatctagaagagctgtgacgctcccctcccattccaccaggaagctgcaaacgcggtcgcaaacctggttggaatcaagagaatacacaaagcacttgattagagtcgaaaattggatattcaggaaccctggcttcgcgttagcagcagctgccatcgcttggcttttgggaagctcaacgagccaaaaagtcatatacttggtcatgatactgctgattgccccggcatacagcatcaggtgcataggagtcagcaatagggactttgtggaaggtatgtcaggtgggacttgggttgatgttgtcttggaacatggaggttgtgtcaccgtaatggcacaggacaaaccgactgtcgacatagagctggttacaacaacagtcagcaacatggcggaggtaagatcctactgctatgaggcatcaatatcagacatggcttctgacagccgctgcccaacacaaggtgaagcctaccttgacaagcaatcagacactcaatatgtctgcaaaagaacgttagtggacagaggctggggaaatggatgtggactttttggcaaagggagcctggtgacatgcgctaagtttgcatgctccaagaaaatgaccgggaagagcatccagccagagaatctggagtaccggataatgctgtcagttcatggctcccagcacagtgggatgatcgttaatgacacaggacatgaaactgatgagaatagagcgaaagttgagataacgcccaattcaccgagagccgaagccaccctggggggttttggaagcctaggacttgattgtgaaccgaggacaggccttgacttttcagatttgtattacttgactatgaataacaagcactggttggttcacaaggagtggttccacgacattccattaccttggcacgctggggcagacaccggaactccacactggaacaacaaagaagcactggtagagttcaaggacgcacatgccaaaaggcaaactgtcgtggttctagggagtcaagaaggagcagttcacacggcccttgctggagctctggaggctgagatggatggtgcaaagggaaggctgtcctctggccacttgaaatgtcgcctgaaaatggataaacttagattgaagggcgtgtcatactccttgtgtactgcagcgttcacattcaccaagatcccggctgaaacactgcacgggacagtcacagtggaggtacagtacgcagggacagatggaccttgcaaggttccagctcagatggcggtggacatgcaaactctgaccccagttgggaggttgataaccgctaaccccgtaatcactgaaagcactgagaactctaagatgatgctggaacttgatccaccatttggggactcttacattgtcataggagtcggggagaagaagatcacccaccactggcacaggagtggcagcaccattggaaaagcatttgaagccactgtgagaggtgccaagagaatggcagtcttgggagacacagcctgggactttggatcagttggaggcgctctcaactcattgggcaagggcatccatcaaatttttggagcagctttcaaatcattgtttggaggaatgtcctggttctcacaaattctcattggaacgttgctgatgtggttgggtctgaacacaaagaatggatctatttcccttatgtgcttggccttagggggagtgttgatcttcttatccacagccgtctctgctgatgtggggtgctcggtggacttctcaaagaaggagacgagatgcggtacaggggtgttcgtctataacgacgttgaagcctggagggacaggtacaagtaccatcctgactccccccgtagattggcagcagcagtcaagcaagcctgggaagatggtatctgcgggatctcctctgtttcaagaatggaaaacatcatgtggagatcagtagaaggggagctcaacgcaatcctggaagagaatggagttcaactgacggtcgttgtgggatctgtaaaaaaccccatgtggagaggtccacagagattgcccgtgcctgtgaacgagctgccccacggctggaaggcttgggggaaatcgtatttcgtcagagcagcaaagacaaataacagctttgtcgtggatggtgacacactgaaggaatgcccactcaaacatagagcatggaacagctttcttgtggaggatcatgggttcggggtatttcacactagtgtctggctcaaggttagagaagattattcattagagtgtgatccagccgttattggaacagctgttaagggaaaggaggctgtacacagtgatctaggctactggattgagagtgagaagaatgacacatggaggctgaagagggcccatctgatcgagatgaaaacatgtgaatggccaaagtcccacacattgtggacagatggaatagaagagagtgatctgatcatacccaagtctttagctgggccactcagccatcacaataccagagagggctacaggacccaaatgaaagggccatggcacagtgaagagcttgaaattcggtttgaggaatgcccaggcactaaggtccacgtggaggaaacatgtggaacaagaggaccatctctgagatcaaccactgcaagcggaagggtgatcgaggaatggtgctgcagggagtgcacaatgcccccactgtcgttccgggctaaagatggctgttggtatggaatggagataaggcccaggaaagaaccagaaagcaacttagtaaggtcaatggtgactgcaggatcaactgatcacatggaccacttctcccttggagtgcttgtgatcctgctcatggtgcaggaagggctgaagaagagaatgaccacaaagatcatcataagcacatcaatggcagtgctggtagctatgatcctgggaggattttcaatgagtgacctggctaagcttgcaattttgatgggtgccaccttcgcggaaatgaacactggaggagatgtagctcatctggcgctgatagcggcattcaaagtcagaccagcgttgctggtatctttcatcttcagagctaattggacaccccgtgaaagcatgctgctggccttggcctcgtgtcttttgcaaactgcgatctccgccttggaaggcgacctgatggttctcatcaatggttttgctttggcctggttggcaatacgagcgatggttgttccacgcactgataacatcaccttggcaatcctggctgctctgacaccactggcccggggcacactgcttgtggcgtggagagcaggccttgctacttgcggggggtttatgctcctctctctgaagggaaaaggcagtgtgaagaagaacttaccatttgtcatggccctgggactaaccgctgtgaggctggtcgaccccatcaacgtggtgggactgctgttgctcacaaggagtgggaagcggagctggccccctagcgaagtactcacagctgttggcctgatatgcgcattggctggagggttcgccaaggcagatatagagatggctgggcccatggccgcggtcggtctgctaattgtcagttacgtggtctcaggaaagagtgtggacatgtacattgaaagagcaggtgacatcacatgggaaaaagatgcggaagtcactggaaacagtccccggctcgatgtggcgctagatgagagtggtgatttctccctggtggaggatgacggtccccccatgagagagatcatactcaaggtggtcctgatgaccatctgtggcatgaacccaatagccataccctttgcagctggagcgtggtacgtatacgtgaagactggaaaaaggagtggtgctctatgggatgtgcctgctcccaaggaagtaaaaaagggggagaccacagatggagtgtacagagtaatgactcgtagactgctaggttcaacacaagttggagtgggagttatgcaagagggggtctttcacactatgtggcacgtcacaaaaggatccgcgctgagaagcggtgaagggagacttgatccatactggggagatgtcaagcaggatctggtgtcatactgtggtccatggaagctagatgccgcctgggatgggcacagcgaggtgcagctcttggccgtgccccccggagagagagcgaggaacatccagactctgcccggaatatttaagacaaaggatggggacattggagcggttgcgctggattacccagcaggaacttcaggatctccaatcctagacaagtgtgggagagtgataggactttatggcaatggggtcgtgatcaaaaacgggagttatgttagtgccatcacccaagggaggagggaggaagagactcctgttgagtgcttcgagccctcgatgctgaagaagaagcagctaactgtcttagacttgcatcctggagctgggaaaaccaggagagttcttcctgaaatagtccgtgaagccataaaaacaagactccgtactgtgatcttagctccaaccagggttgtcgctgctgaaatggaggaggcccttagagggcttccagtgcgttatatgacaacagcagtcaatgtcacccactctggaacagaaatcgtcgacttaatgtgccatgccaccttcacttcacgtctactacagccaatcagagtccccaactataatctgtatattatggatgaggcccacttcacagatccctcaagtatagcagcaagaggatacatttcaacaagggttgagatgggcgaggcggctgccatcttcatgaccgccacgccaccaggaacccgtgacgcatttccggactccaactcaccaattatggacaccgaagtggaagtcccagagagagcctggagctcaggctttgattgggtgacggatcattctggaaaaacagtttggtttgttccaagcgtgaggaacggcaatgagatcgcagcttgtctgacaaaggctggaaaacgggtcatacagctcagcagaaagacttttgagacagagttccagaaaacaaaacatcaagagtgggactttgtcgtgacaactgacatttcagagatgggcgccaactttaaagctgaccgtgtcatagattccaggagatgcctaaagccggtcatacttgatggcgagagagtcattctggctggacccatgcctgtcacacatgccagcgctgcccagaggagggggcgcataggcaggaatcccaacaaacctggagatgagtatctgtatggaggtgggtgcgcagagactgacgaagaccatgcacactggcttgaagcaagaatgctccttgacaatatttacctccaagatggcctcatagcctcgctctatcgacctgaggccgacaaagtagcagccattgagggagagttcaagcttaggacggagcaaaggaagacctttgtggaactcatgaaaagaggagatcttcctgtttggctggcctatcaggttgcatctgccggaataacctacacagatagaagatggtgctttgatggcacgaccaacaacaccataatggaagacagtgtgccggcagaggtgtggaccagacacggagagaaaagagtgctcaaaccgaggtggatggacgccagagtttgttcagatcatgcggccctgaagtcattcaaggagtttgccgctgggaaaagaggagcggcttttggagtgatggaagccctgggaacactgccaggacacatgacagagagattccaggaagccattgacaacctcgctgtgctcatgcgggcagagactggaagcaggccttacaaagccgcggcggcccaattgccggagaccctagagaccataatgcttttggggttgctgggaacagtctcgctgggaatcttcttcgtcttgatgaggaacaagggcatagggaagatgggctttggaatggtgactcttggggccagcgcatggctcatgtggctctcggaaattgagccagccagaattgcatgtgtcctcattgttgtgttcctattgctggtggtgctcatacctgagccagaaaagcaaagatctccccaggacaaccaaatggcaatcatcatcatggtagcagtaggtcttctgggcttgattaccgccaatgaactcggatggttggagagaacaaagagtgacctaagccatctaatgggaaggagagaggagggggcaaccataggattctcaatggacattgacctgcggccagcctcagcttgggccatctatgctgccttgacaactttcattaccccagccgtccaacatgcagtgaccacctcatacaacaactactccttaatggcgatggccacgcaagctggagtgttgtttggcatgggcaaagggatgccattctacgcatgggactttggagtcccgctgctaatgataggttgctactcacaattaacacccctgaccctaatagtggccatcattttgctcgtggcgcactacatgtacttgatcccagggctgcaggcagcagctgcgcgtgctgcccagaagagaacggcagctggcatcatgaagaaccctgttgtggatggaatagtggtgactgacattgacacaatgacaattgacccccaagtggagaaaaagatgggacaggtgctactcatagcagtagccgtctccagcgccatactgtcgcggaccgcctgggggtggggggaggctggggctctgatcacagccgcaacttccactttgtgggaaggctctccgaacaagtactggaactcctctacagccacttcactgtgtaacatttttaggggaagttacttggctggagcttctctaatctacacagtaacaagaaacgctggcttggtcaagagacgtgggggtggaacaggagagaccctgggagagaaatggaaggcccgcttgaaccagatgtcggccctggagttctactcctacaaaaagtcaggcatcaccgaggtgtgcagagaagaggcccgccgcgccctcaaggacggtgtggcaacgggaggccatgctgtgtcccgaggaagtgcaaagctgagatggttggtggagcggggatacctgcagccctatggaaaggtcattgatcttggatgtggcagagggggctggagttactacgtcgccaccatccgcaaagttcaagaagtgaaaggatacacaaaaggaggccctggtcatgaagaacccgtgttggtgcaaagctatgggtggaacatagtccgtcttaagagtggggtggacgtctttcatatggcggctgagccgtgtgacacgttgctgtgtgacataggtgagtcatcatctagtcctgaagtggaagaagcacggacgctcagagtcctctccatggtgggggattggcttgaaaaaagaccaggagccttttgtataaaagtgttgtgcccatacaccagcactatgatggaaaccctggagcgactgcagcgtaggtatgggggaggactggtcagagtgccactctcccgcaactctacacatgagatgtactgggtctctggagcgaaaagcaacaccataaaaagtgtgtccaccacgagccagctcctcttggggcgcatggacgggcctaggaggccagtgaaatatgaggaggatgtgaatctcggctctggcacgcgggctgtggtaagctgcgctgaagctcccaacatgaagatcattggtaaccgcattgaaaggatccgcagtgagcacgcggaaacgtggttctttgacgagaaccacccatataggacatgggcttaccatggaagctatgaggcccccacacaagggtcagcgtcctctctaataaacggggttgtcaggctcctgtcaaaaccctgggatgtggtgactggagtcacaggaatagccatgaccgacaccacaccgtatggtcagcaaagagttttcaaggaaaaagtggacactagggtgccagacccccaagaaggcactcgtcaggttatgagcatggtctcttcctggttgtggaaagagctaggcaaacacaaacggccacgagtctgcaccaaagaagagttcatcaacaaggttcgtagcaatgcagcattaggggcaatatttgaagaggaaaaagagtggaagactgcagtggaagctgtgaacgatccaaggttctgggctctagtggacaaggaaagagagcaccacctgagaggagagtgccagagctgtgtgtacaacatgatgggaaaaagagaaaagaaacaaggggaatttggaaaggccaagggcagccgcgccatctggtatatgtggctaggggctagatttctagagttcgaagcccttggattcttgaacgaggatcactggatggggagagagaactcaggaggtggtgttgaagggctgggattacaaagactcggatatgtcctagaagagatgagtcgtataccaggaggaaggatgtatgcagatgacactgctggctgggacacccgcattagcaggtttgatctggagaatgaagctctaatcaccaaccaaatggagaaagggcacagggccttggcattggccataatcaagtacacataccaaaacaaagtggtaaaggtccttagaccagctgaaaaagggaaaacagttatggacattatttcgagacaagaccaaagggggagcggacaagttgtcacttacgctcttaacacatttaccaacctagtggtgcaactcattcggaatatggaggctgaggaagttctagagatgcaagacttgtggctgctgcggaggtcagagaaagtgaccaactggttgcagagcaacggatgggataggctcaaacgaatggcagtcagtggagatgattgcgttgtgaagccaattgatgataggtttgcacatgccctcaggttcttgaatgatatgggaaaagttaggaaggacacacaagagtggaaaccctcaactggatgggacaactgggaagaagttccgttttgctcccaccacttcaacaagctccatctcaaggacgggaggtccattgtggttccctgccgccaccaagatgaactgattggccgggcccgcgtctctccaggggcgggatggagcatccgggagactgcttgcctagcaaaatcatatgcgcaaatgtggcagctcctttatttccacagaagggacctccgactgatggccaatgccatttgttcatctgtgccagttgactgggttccaactgggagaactacctggtcaatccatggaaagggagaatggatgaccactgaagacatgcttgtggtgtggaacagagtgtggattgaggagaacgaccacatggaagacaagaccccagttacgaaatggacagacattccctatttgggaaaaagggaagacttgtggtgtggatctctcatagggcacagaccgcgcaccacctgggctgagaacattaaaaacacagtcaacatggtgcgcaggatcataggtgatgaagaaaagtacatggactacctatccacccaagttcgctacttgggtgaagaagggtctacacctggagtgctgtaagcaccaatcttaatgttgtcaggcctgctagtcagccacagcttggggaaagctgtgcagcctgtgacccccccaggagaagctgggaaaccaagcctatagtcaggccgagaacgccatggcacggaagaagccatgctgcctgtgagcccctcagaggacactgagtcaaaaaaccccacgcgcttggaggcgcaggatgggaaaagaaggtggcgaccttccccacccttcaatctggggcctgaactggagatcagctgtggatctccagaagagggactagtggttagagga +>COL/FLR_00008/2015|zika|MF574562|2015-12-XX|south_america|colombia|colombia|colombia|genbank|genome|Pickett et al|https://www.ncbi.nlm.nih.gov/nuccore/MF574562|Direct Submission|Submitted (28-JUL-2017) J. Craig Venter Institute, 9704 Medical Center Drive, Rockville, MD 20850, USA|https://www.ncbi.nlm.nih.gov/pubmed/ +tcagactgcgacagttcgagtttgaagcgaaagctagcaacagtatcaacaggttttattttggatttggaaacgagagtttctggtcatgaaaaacccaaaaaagaaatccggaggattccggattgtcaatatgctaaaacgcggagtagcccgtgtgagcccctttgggggcttgaagaggctgccagccggacttctgctgggtcatgggcccatcaggatggtcttggcgattctagcctttttgagattcacggcaatcaagccatcactgggtctcatcaatagatggggttcagtggggaaaaaagaggctatggaaataataaagaagttcaagaaagatctggctgccatgctgagaataatcaatgctaggaaggagaagaagagacgaggcgcagaaactagtgtcggaattgttggcctcctgctgaccacagctatggcagcggaggtcactagacgtgggagtgcatactatatgtacttggacagaaacgatgctggggaggccatatcttttccaaccacattggggatgaataagtgttatatacagatcatggatcttggacacatgtgtgatgccaccatgagctatgaatgccctatgctggatgagggggtggaaccagatgacgtcgattgttggtgcaacacgacgtcaacttgggttgtgtacggaacctgccatcacaaaaaaggtgaagcacggagatctagaagagccgtgacgctcccctcccattccactaggaagctgcaaacgcggtcgcaaacctggttggaatcaagagaatacacaaagcacttgattagagtcgaaaattggatattcaggaaccctggtttcgctttagcagcagctgccatcgcttggcttttgggaagctcaacgagccaaaaagtcatatacttggtcatgatactgctgattgccccggcatacagcatcaggtgcataggagtcagcaatagggactttgtggaaggtatgtcaggtgggacttgggttgatgtcgtcttggaacatggaggttgtgtcaccgtaatggcacaggacaaaccgactgtcgacatagagctggttacaacaacagtcagcaacatggcggaggtaagatcctactgctatgaggcatcaatatcagacatggcttcggacagccgctgcccaacacaaggtgaagcctaccttgacaagcaatcagacactcaatatgtctgcaaaagaacgttagtggacagaggctggggaaatggatgtggactttttggcaaagggagcctggtgacatgcgctaagtttgcatgctccaagaaaatgaccgggaagagcatccagccagagaatctggagtaccggataatgttgtcagttcatggctcccagcacagtgggatgatcgttaatgacacaggacatgaaactgatgagaatagagcgaaggttgagataacgcccaattcaccaagagccgaagccaccctggggggctttggaagcctaggacttgattgtgaaccgaggacaggccttgacttttcagatttgtattacttgactatgaataacaagcactggttggttcacaaggagtggttccacgacattccattaccttggcacgctggggcagacaccggaactccacactggaacaacaaagaagcactggtagagttcaaggacgcacatgccaaaaggcaaactgtcgtggttctagggagtcaagaaggagcagttcacacggcccttgctggagctctggaggctgagatggatggtgcaaagggaaggctgtcctctggccacttgaaatgtcgcctgaaaatggataaacttagattgaagggcgtgtcatactccttgtgtaccgcagcgttcacattcaccaagatcccggctgaaacactgcacgggacagtcacagtggaggtacagtacgcagggacagatggaccttgcaaggttccagctcagatggcggtggacatgcaaactctgaccccagttgggaggttgataaccgctaaccccgtaatcactgaaagcactgagaactctaagatgatgctggaacttgatccaccatttggggactcttacattgtcataggagtcggggagaagaagatcacccaccactggcacaggagtggcagcaccattggaaaagcatttgaagccactgtgagaggtgccaagagaatggcagtcttgggagacacagcctgggactttggatcagttggaggcgctctcaactcattgggcaagggcatccatcaaatttttggagcagctttcaaatcattgtttggaggaatgtcctggttctcacaaattctcattggaacgttgctgatgtggttgggtctgaacacaaagaatggatctatttcccttatgtgcttggccttagggggagtgttgatcttcttatccacagccgtctctgctgatgtggggtgctcggtggacttctcaaagaaggagacgagatgtggtacaggggtgttcgtctataacgacgttgaagcctggagggacaggtacaagtaccatcctgactccccccgtagattggcagcagcagtcaagcaagcctgggaagatggtatctgcgggatctcctctgtttcaagaatggaaaacatcatgtggagatcagtagaaggggagctcaacgcaatcctggaagagaatggagttcaactgacggtcgttgtgggatctgtaaaaaaccccatgtggagaggtccacagagattgcccgtgcctgtgaacgagctgccccacggctggaaggcttgggggaaatcgtacttcgtcagagcagcaaagacaaataacagctttgtcgtggatggtgacacactgaaagaatgcccactcaaacatagagcatggaacagctttcttgtggaggatcatgggttcggggtatttcacactagtgtctggatcaaggttagagaagattattcattagagtgtgatccagccgttattggaacagctgttaagggaaaggaggctgtacacagtgatctaggctactggattgagagtgagaagaatgacacatggaggctgaagagggcccatctgatcgagatgaaaacatgtgaatggccaaagtcccacacattgtggacagatggaatagaagagagtgatctgatcatacccaagtctttagctgggccactcagccatcacaataccagagagggctacaggacccaaatgaaagggccatggcacagtgaagagcttgaaattcggtttgaggaatgcccaggcactaaggtccacgtggaggaaacatgtgaaacaagaggaccatctctgagatcaaccactgcaagcggaagggtgatcgaggaatggtgctgcagggagtgcacaatgcccccactgtcgttctgggctaaagatggctgttggtatggaatggagataaggcccaggaaagaaccagaaagcaacttagtaaggtcaatggtgactgcaggatcaactgatcacatggatcacttctcccttggagtgcttgtgattctgctcatggtgcaggaagggctgaagaagagaatgaccacaaagatcatcataagcacatcaatggcagtgctggtagctatgatcctgggaggattttcaatgagtgacctggctaagcttgcaatcttgatgggtgccaccttcgcggaaatgaacactggaggagatgtagctcatctggcgctgatagcggcattcaaagtcagaccagcgttgctggtatccttcatcttcagagctaattggacaccccgtgaaagcatgctgctggccttggcctcgtgtcttttgcaaactgcgatctccgccttggagggcgacctgatggttctcatcaatggttttgctttggcctggttggcaatacgagcgatggttgttccacgcactgacaacatcaccttggcaatcctggctgctctgacaccactggcccggggcacactgcttgtggcgtggagagcaggccttgctacttgcggggggtttatgctcctctctctgaagggaaaaggcagtgtgaagaagaacttaccatttgtcatggccctgggactaaccgctgtgaggctggtcgaccccatcaacgtggtgggactgctgttgctcacaaggagtgggaagcggagctggccccctagcgaagtactcacagctgttggcctgatatgcgcattggctggagggttcgccaaggcagatatagagatggctgggcccatggccgcggttggtctgctaattgtcagttacgtggtctcaggaaagagtgtggacatgtacattgaaagagcaggtgacatcacatgggaaaaagatgcggaagtcactggaaacagtccccggctcgatgtggcgctagatgagagtggtgatttctccctggtggaggatgacggtccccccatgagagagatcatactcaaggtggtcctgatgaccatctgtggcatgaacccaatagccataccctttgcagctggagcgtggtacgtatacgtgaagactggaaaaaggagtggtgcgctatgggatgtgcctgctcccaaggaagtaaaaaagggggagaccacagatggagtgtacagagtaatgactcgtagactgctaggttcaacacaagttggagtgggagttatgcaagagggggtctttcacactatgtggcacgtcacaaaaggatccgcgctgagaagcggtgaagggagacttgatccatactggggagatgtcaagcaggatctggtgtcatactgtggtccatggaagctagatgccgcctgggacgggcacagcgaggtgcagctcttggccgtgccccccggagagagagcgaggaacatccagactctgcccggaatatttaagacaaaggatggggacattggagcggttgcgctggattacccagcaggaacttcaggatctccaatcctagacaagtgtgggagagtgataggactttatggcaatggggtcgtgatcaaaaatgggagttatgttagtgccatcacccaagggaggagggaggaagagactcctgttgagtgcttcgagccttcgatgctgaagaagaagcagctaactgtcttagacttgcatcctggagctgggaaaaccaggagagttcttcctgaaatagtccgtgaagccataaaaacaagactccgtactgtgatcttagctccaaccagggttgtcgctgctgaaatggaggaagcccttagagggcttccagtgcgttatatgacaacagcagtcaatgtcacccactctggaacagaaatcgtcgacttaatgtgccatgccaccttcacttcacgtctactacagccaatcagagtccccaactataatctgtatattatggatgaggcccacttcacagatccctcaagtatagcagcaagaggatacatttcaacaagggttgagatgggcgaggcggctgccatcttcatgaccgccacgccaccaggaacccgtgacgcatttccggactccaactcaccaattatggacaccgaagtggaagtcccagagagagcctggagctcaggctttgattgggtgacagatcattctggaaaaacagtttggtttgttccaagcgtgaggaacggcaatgagatcgcagcttgtctgacaaaggctggaaaacgggtcatacagctcagcagaaagacttttgagacagagttccagaaaacaaaacatcaagagtgggactttgtcgtgacaactgacatttcagagatgggcgccaactttaaagctgaccgtgtcatagattccaggagatgcctaaagccggtcatacttgatggcgagagagtcattctggctggacccatgcctgtcacacatgccagcgctgcccagaggagggggcgcataggcaggaatcccaataaacctggagatgagtatctgtatggaggtgggtgcgcagagactgacgaagaccatgcacactggcttgaagcaagaatgctccttgacaatatttacctccaagatggcctcatagcctcgctctatcgacctgaggccgacaaagtagcagccattgagggagagttcaagcttaggacggagcaaaggaagacctttgtggaactcatgaaaagaggagatcttcctgtttggctggcctatcaggttgcatctgccggaataacctacacagatagaagatggtgctttgatggcacgaccaacaacaccataatggaagacagtgtgccggcagaggtgtggaccagacacggagagaaaagagtgctcaaaccgaggtggatggacgccagagtttgttcagatcatgcggccctgaagtcattcaaggagtttgccgctgggaaaagaggagcggcttttggagtgatggaagccctgggaacactgccaggacacatgacagagagattccaggaagccattgacaacctcgctgtgctcatgcgggcagagactggaagcaggccttacaaagccgcggcggcccaattgccggagaccctagagaccattatgcttttggggttgctgggaacagtctcgttgggaatctttttcgtcttgatgaggaacaagggcatagggaagatgggctttggaatggtgactcttggggccagcgcatggctcatgtggctctcggaaattgagccagccagaattgcatgtgtcctcattgttgtgttcctattgctggtggtgctcatacctgagccagaaaagcaaagatctccccaggacaaccaaatggcaatcatcatcatggtagcagtaggtcttctgggcttgattaccgccaatgaactcggatggttggagagaacaaagagtgacctaagccatctaatgggaaggagagaggagggggcaaccataggattctcaatggacattgacctgcggccagcctcagcttgggccatctatgctgccttgacaactttcattaccccagccgtccaacatgcagtgaccacttcatacaacaactactccttaatggcgatggccacgcaagctggagtgttgtttggtatgggcaaagggatgccattctacgcatgggactttggagtcccgctgctaatgataggttgctactcacaattaacacccctgaccctaatagtggccatcattttgctcgtggcgcactacatgtacttgatcccagggctgcaggcagcagctgcgcgtgctgcccagaagagaacggcagctggcatcatgaagaaccctgttgtggatggaatagtggtgactgacattgacacaatgacaattgacccccaagtggagaaaaagatgggacaggtgctactcatagcagtagccgtctccagcgccatactgtcgcggaccgcctgggggtggggggaggctggggccctgatcacagccgcaacttccactttgtgggaaggctctccgaacaagtactggaactcctctacagccacttcactgtgtaacatttttaggggaagttacttggctggagcttctctaatctacacagtaacaagaaacgctggcttggtcaagagacgtgggggtggaacaggagagaccctgggagagaaatggaaggcccgcttgaaccagatgtcggccctggagttctactcctacaaaaagtcaggcatcaccgaggtgtgcagagaagaggcccgccgcgccctcaaggacggtgtggcaacgggaggccatgctgtgtcccgaggaagtgcaaagctgagatggttggtggagcggggatacctgcagccctatggaaaggtcattgatcttggatgtggcagagggggctggagttactacgccgccaccatccgcaaagttcaagaagtgaaaggatacacaaaaggaggccctggtcatgaagaacccgtgttggtgcaaagctatgggtggaacatagtccgtcttaagagtggggtggacgtctttcatatggcggctgagccgtgtgacacgttgctgtgtgacataggtgagtcatcatctagtcctgaagtggaagaagcacggacgctcagagtcctctccatggtgggggattggcttgaaaaaagaccaggagccttttgtataaaagtgttgtgcccatacaccagcactatgatggaaaccctggagcgactgcagcgtaggtatgggggaggactggtcagagtgccactctcccgcaactctacacatgagatgtactgggtctctggagcgaaaagcaacaccataaaaagtgtgtccaccacgagccagctcctcttggggcgcatggacgggcctaggaggccagtgaaatatgaggaggatgtgaatctcggctctggcacgcgggccgtggtaagctgcgctgaagctcccaacatgaagatcattggtaaccgcattgaaaggatccgcagtgagcacgcggaaacgtggttctttgacgagaaccacccatataggacatgggcttaccatggaagctatgaggcccccacacaagggtcagcgtcctctctaataaacggggttgtcaggctcctgtcaaaaccctgggatgtggtgactggagtcacaggaatagccatgaccgacaccacaccgtatggtcagcaaagagttttcaaggaaaaagtggacactagggtgccagacccccaagaaggcactcgtcaggttatgagcatggtctcttcctggttgtggaaagagctaggcaaacacaaacggccacgagtctgtaccaaagaagagttcatcaacaaggtgcgtagcaatgcagcattaggggcaatatttgaagaggaaaaagagtggaagactgcagtggaagctgtgaacgatccaaggttctgggctctagtggacaaggaaagagagcaccacctgagaggagagtgccagagttgtgtgtacaacatgatgggaaaaagagaaaagaaacaaggggaatttggaaaggccaagggcagccgcgccatctggtatatgtggctaggggctagatttctagagttcgaagcccttggattcttgaacgaggatcactggatggggagagagaactcaggaggtggtgttgaagggctgggattacaaagactcggatatgtcctagaagagatgagtcgcataccaggaggaaggatgtatgcagatgacactgctggctgggacacccgcattagcaggtttgatctggagaatgaagctctaatcaccaaccaaatggagaaagggcacagggccttggcattggccataatcaagtacacataccaaaacaaagtggtaaaggtccttagaccagctgaaaaagggaaaacagttatggacattatttcgagacaagaccaaagggggagcggacaagttgtcacttacgctcttaacacatttaccaacctagtggtgcaactcattcggaatatggaggctgaggaagttctagagatgcaagacttgtggctgctgcggaggtcagagaaagtgaccaactggttgcagagcaacggatgggataggctcaaacgaatggcagtcagtggagatgattgcgttgtgaagccaattgatgataggtttgcacatgccctcaggttcttgaatgatatgggaaaagttaggaaggacacacaagagtggaaaccctcaactggatgggacaactgggaagaagttccgttttgctcccaccacttcaacaagctccatctcaaggacgggaggtccattgtggttccctgccgccaccaagatgaactgattggccgggcccgcgtctctccaggggcgggatggagcatccgggagactgcttgcctagcaaaatcatatgcgcaaatgtggcagctcctttatttccacagaagggacctccgactgatggccaatgccatttgttcatctgtgccagttgactgggttccaactgggagaactacctggtcaatccatggaaagggagaatggatgaccactgaagacatgcttgtggtgtggaacagagtgtggattgaggagaacgaccacatggaagacaagaccccagttgcgaaatggacagacattccctatttgggaaaaagggaagacttgtggtgtggatctctcatagggcacagaccgcgcaccacctgggctgagaacattaaaaacacagtcaacatggtgcgcaggatcataggtgatgaagaaaagtacatggactacctatccacccaagttcgctacttgggtgaagaagggtctacacctggagtgctgtaagcaccaatcttaatgttgtcaggcctgctagtcagccacagcttggggaaagctgtgcagcctgtgacccccccaggagaagctgggaaaccaagcctatagtcaggccgagaacgccatggcacggaagaagccatgctgcctgtgagcccctcagaggacactgagtcaaaaaaccccacgcgcttggaggcgcaggatgggaaaagaaggtggcgaccttccccacccttcaatctggggcctgaactggagatcagctgtggatctccagaagagggactagtggttagaggaga +>Colombia/2016/ZC204Se|zika|KY317939|2016-01-06|south_america|colombia|colombia|colombia|genbank|genome|Quick et al|https://www.ncbi.nlm.nih.gov/nuccore/KY317939|Multiplex PCR method for MinION and Illumina sequencing of Zika and other virus genomes directly from clinical samples|Nat Protoc 12 (6), 1261-1276 (2017)|https://www.ncbi.nlm.nih.gov/pubmed/28538739 +gacagttcgagtttgaagcgaaagctagcaacagtatcaacaggttttattttggatttggaaacgagagtttctggtcatgaaaaacccaaaaaagaaatccggaggattccggattgtcaatatgctaaaacgcggagtagcccgtgtgagcccctttgggggcttgaagaggctgccagccggacttctgctgggtcatgggcccatcaggatggtcttggcgattctagcctttttgagattcacggcaatcaagccatcactgggtctcatcaatagatggggttcagtggggaaaaaagaggctatggaaataataaagaagttcaagaaagatctggctgccatgctgagaataatcaatgctaggaaggagaagaagagacgaggcgcagaaactagtgtcggaattgttggcctcctgctgaccacagctatggcagcggaggtcactagacgtgggagtgcatactatatgtacttggacagaaaagatgctggggaggccatatcttttccaaccacattggggatgaataagtgttatatacagatcatggatcttggacacatgtgtgatgccaccatgagctatgaatgccctatgctggatgagggggtggaaccagatgacgtcgattgttggtgcaacacgacgtcaacttgggttgtgtacggaacctgccatcacaaaaaaggtgaagcacggagatctagaagagccgtgacgctcccctcccattccactaggaagctgcaaacgcggtcgcaaacctggttggaatcaagagaatacacaaagcacttgattagagtcgaaaattggatattcaggaaccctggtttcgctttagcagcagctgccatcgcttggcttttgggaagctcaacgagccaaaaagtcatatacttggtcatgatactgctgattgccccggcatacagcatcaggtgcataggagtcagcaatagggactttgtggaaggtatgtcaggtgggacttgggttgatgttgtcttggaacatggaggttgtgtcaccgtaatggcacaggacaaaccgactgtcgacatagagctggttacaacaacagtcagcaacatggcggaggtaagatcctactgctatgaggcatcaatatcagacatggcttcggacagccgctgcccaacacaaggtgaagcctaccttgacaagcaatcagacactcaatatgtttgcaaaagaacgttagtggacagaggctggggaaatggatgtggactttttggcaaagggagcctggtgacatgcgctaagtttgcatgctccaagaaaatgaccgggaagagcatccagccagagaatctggagtaccggataatgttgtcagttcatggctcccagcacagtgggatgatcgttaatgacacaggacatgaaactgatgagaatagagcgaaggttgagataacgcccaattcaccaagagccgaagccaccctggggggttttggaagcctaggacttgattgtgaaccgaggacaggccttgacttttcagatttgtattacttgactatgaataacaagcactggttggttcacaaggagtggttccacgacattccattaccttggcacgctggggcagacaccggaactccacactggaacaacaaagaagcactggtagagttcaaggacgcacatgccaaaaggcaaactgtcgtggttctagggagtcaagaaggagcagttcacacggcccttgctggagctctggaggctgagatggatggtgcaaagggaaggctgtcctctggccacttgaaatgtcgcctgaaaatggataaacttagattgaagggcgtgtcatactccttgtgtaccgcagcgttcacattcaccaagatcccggctgaaacactgcacgggacagtcacagtggaggtacagtacgcagggacagatggaccttgcaaggttccagctcagatggcggtggacatgcaaactctgaccccagttgggaggttgataaccgctaaccccgtaatcactgaaagcactgagaactctaagatgatgctggaacttgatccaccatttggggactcttacattgtcataggagtcggggagaagaagatcacccaccactggcataggagtggcagcaccattggaaaagcatttgaagccactgtgagaggtgccaagagaatggcagtcttgggagacacagcctgggactttggatcagttggaggcgctctcaactcattgggcaagggcatccatcaaatttttggagcagctttcaaatcattgtttggaggaatgtcctggttctcacaaattctcattggaacgttgctgatgtggttgggtctgaacacaaagaatggatctatttcccttatgtgcttggccttagggggagtgttgatcttcttatccacagccgtctctgctgatgtggggtgctcggtggacttctcaaagaaggagacgagatgtggtacaggggtgttcgtctataacgacgttgaagcctggagggacaggtacaagtaccatcctgactccccccgtagattggcagcagcagtcaagcaagcctgggaagatggtatctgcgggatctcctctgtttcaagaatggaaaacatcatgtggagatcagtagaaggggagctcaacgcaatcctggaagagaatggagttcaactgacggtcgttgtgggatctgtaaaaaaccccatgtggagaggtccacagagattgcccgtgcctgtgaacgagctgccccacggctggaaggcttgggggaaatcgtacttcgtcagagcagcaaagacaaataacagctttgtcgtggatggtgacacactgaaggaatgcccactcaaacatagagcatggaacagctttcttgtggaggatcatgggttcggggtatttcacactagtgtctggctcaaggttagagaagattattcattagagtgtgatccagccgttattggaacagctgttaagggaaaggaggctgtacacagtgatctaggctactggattgagagtgagaagaatgacacatggaggctgaagagggcccatctgatcgagatgaaaacatgtgaatggccaaagtcccacacattgtggacagatggaatagaagagagtgatctgatcatacccaagtctttagctgggccactcagccatcacaataccagagagggctacaggacccaaatgaaagggccatggcacagtgaagagcttgaaattcggtttgaggaatgcccaggcactaaggtccacgtggaggaaacatgtggaacaagaggaccatctctgagatcaaccactgcaagcggaagggtgatcgaggaatggtgctgcagggagtgcacaatgcccccactgtcgttctgggctaaagatggctgttggtatggaatggagataaggcccaggaaagaaccagaaagcaacttagtaaggtcaatggtgactgcaggatcaactgatcacatggatcacttctcccttggagtgcttgtgattctgctcatggtgcaggaagggctgaagaagagaatgaccacaaagatcatcataagcacatcaatggcagtgctggtagctatgatcctgggaggattttcaatgagtgacctggctaagcttgcaattttgatgggtgccaccttcgcggaaatgaacactggaggagatgtagctcatctggcgctgatagcggcattcaaagtcagaccagcgttgctggtatctttcatcttcagagctaattggacaccccgtgaaagcatgctgctggccttggcctcgtgtcttttgcaaactgcgatctccgccttggagggcgacctgatggttctcatcaatggttttgctttggcctggttggcaatacgagcgatggttgttccacgcactgacaacatcaccttggcaatcctggctgctctgacaccactggcccggggcacactgcttgtggcgtggagagcaggccttgctacttgcggggggtttatgctcctctctctgaagggaaaaggcagtgtgaagaagaacttaccatttgtcatggccctgggactaaccgctgtgaggctggtcgaccccatcaacgtggtgggactgctgttgctcacaaggagtgggaagcggagctggccccctagcgaagtactcacagctgttggcctgatatgcgcattggctggagggttcgccaaggcagatatagagatggctgggcccatggccgcggtcggtctgctaattgtcagttacgtggtctcaggaaagagtgtggacatgtacattgaaagagcaggtgacatcacatgggaaaaagatgcggaagtcactggaaacagtccccggctcgatgtggcgctagatgagagtggtgatttctccctggtggaggatgacggtccccccatgagagagatcatactcaaggtggtcctgatgaccatctgtggcatgaacccaatagccataccctttgcagctggagcgtggtacgtatacgtgaagactggaaaaaggagtggtgctctatgggatgtgcctgctcccaaggaagtaaaaaagggggagaccacagatggagtgtacagagtaatgactcgtagactgctaggttcaacacaagttggagtgggagttatgcaagagggggtctttcacactatgtggcacgtcacaaaaggatccgcgctgagaagcggtgaagggagacttgatccatactggggagatgtcaagcaggatctggtgtcatactgtggtccatggaagctagatgccgcctgggacgggcacagcgaggtgcagctcttggccgtgccccccggagagagagcgaggaacatccagactctgcccggaatatttaagacaaaggatggggacattggagcggttgcgctggattacccagcaggaacttcaggatctccaatcctagacaagtgtgggagagtgataggactttatggcaatggggtcgtgatcaaaaatgggagttatgttagtgccatcacccaagggaggagggaggaagagactcctgttgagtgcttcgagccttcgatgctgaagaagaagcagctaactgtcttagacttgcatcctggagctgggaaaaccaggagagttcttcctgaaatagtccgtgaagccataaaaacaagactccgtactgtgatcttagctccaaccagggttgtcgctgctgaaatggaggaagcccttagagggcttccagtgcgttatatgacaacagcagtcaatgtcacccactctggaacagaaatcgtcgacttaatgtgccatgccaccttcacttcacgtctactacagccaatcagagtccccaactataatctgtatattatggatgaggcccacttcacagatccctcaagtatagcagcaagaggatacatttcaacaagggttgagatgggcgaggcggctgccatcttcatgaccgccacgccaccaggaacccgtgacgcatttccggactccaactcaccaattatggacaccgaagtggaagtcccagagagagcctggagctcaggctttgattgggtgacggatcattctggaaaaacagtttggtttgttccaagcgtgaggaacggcaatgagatcgcagcttgtctgacaaaggctggaaaacgggtcatacagctcagcagaaagacttttgagacagagttccagaaaacaaaacatcaagagtgggactttgtcgtgacaactgacatttcagagatgggcgccaactttaaagctgaccgtgtcatagattccaggagatgcctaaagccggtcatacttgatggcgagagagtcattctggctggacccatgcctgtcacacatgccagcgctgcccagaggagggggcgcataggcaggaatcccaataaacctggagatgagtatctgtatggaggtgggtgcgcagagactgacgaagaccatgcacactggcttgaagcaagaatgctccttgacaatatttacctccaagatggcctcatagcctcgctctatcgacctgaggccgacaaagtagcagccattgagggagagttcaagcttaggacggagcaaaggaagacctttgtggaactcatgaaaagaggagatcttcctgtttggctggcctatcaggttgcatctgccggaataacctacacagatagaagatggtgctttgatggcacgaccaacaacaccataatggaagacagtgtgccggcagaggtgtggaccagacacggagagaaaagagtgctcaaaccgaggtggatggacgccagagtttgttcagatcatgcggccctgaagtcattcaaggagtttgccgctgggaaaagaggagcggcttttggagtgatggaagccctgggaacactgccaggacacatgacagagagattccaggaagccattgacaacctcgctgtgctcatgcgggcagagactggaagcaggccttacaaagccgcggcggcccaattgccggagaccctagagaccattatgcttttggggttgctgggaacagtctcgctgggaatctttttcgtcttgatgaggaacaagggcatagggaagatgggctttggaatggtgactcttggggccagcgcatggctcatgtggctctcggaaattgagccagccagaattgcatgtgtcctcattgttgtgttcctattgctggtggtgctcatacctgagccagaaaagcaaagatctccccaggacaaccaaatggcaatcatcatcatggtagcagtaggtcttctgggcttgattaccgccaatgaactcggatggttggagagaacaaagagtgacctaagccatctaatgggaaggagagaggagggggcaaccataggattctcaatggacattgacctgcggccagcctcagcttgggccatctatgctgccttgacaactttcattaccccagccgtccaacatgcagtgaccacttcatacaacaactactccttaatggcgatggccacgcaagctggagtgttgtttggtatgggcaaagggatgccattctacgcatgggactttggagtcccgctgctaatgataggttgctactcacaattaacacccctgaccctaatagtggccatcattttgctcgtggcgcactacatgtacttgatcccagggctgcaggcagcagctgcgcgtgctgcccagaagagaacggcagctggcatcatgaagaaccctgttgtggatggaatagtggtgactgacattgacacaatgacaattgacccccaagtggagaaaaagatgggacaggtgctactcatagcagtagccgtctccagcgccatactgtcgcggaccgcctgggggtggggggaggctggggccctgatcacagccgcaacttccactttgtgggaaggctctccgaacaagtactggaactcctctacagccacttcactgtgtaacatttttaggggaagttacttggctggagcttctctaatctacacagtaacaagaaacgctggcttggtcaagagacgtgggggtggaacaggagagaccctgggagagaaatggaaggcccgcttgaaccagatgtcggccctggagttctactcctacaaaaagtcaggcatcaccgaggtgtgcagagaagaggcccgccgcgccctcaaggacggtgtggcaacgggaggccatgctgtgtcccgaggaagtgcaaagctgagatggctggtggagcggggatacctgcagccctatggaaaggtcattgatcttggatgtggcagagggggctggagttactacgccgccaccatccgcaaagttcaagaagtgaaaggatacacaaaaggaggccctggtcatgaagaacccgtgttggtgcaaagctatgggtggaacatagtccgtcttaagagtggggtggacgtctttcatatggcggctgagccgtgtgacacgttgctgtgtgacataggtgagtcatcatctagtcctgaagtggaagaagcacggacgctcagagtcctctccatggtgggggattggcttgaaaaaagaccaggagccttttgtataaaagtgttgtgcccatacaccagcactatgatggaaaccctggagcgactgcagcgtaggtatgggggaggactggtcagagtgccactctcccgcaactctacacatgagatgtactgggtctctggagcgaaaagcaacaccataaaaagtgtgtccaccacgagccagctcctcttggggcgcatggacgggcctaggaggccagtgaaatatgaggaggatgtgaatctcggctctggcacgcgggctgtggtaagctgcgctgaagctcccaacatgaagatcattggtaaccgcattgaaaggatccgcagtgagcacgcggaaacgtggttctttgacgagaaccacccatataggacatgggcttaccatggaagctatgaggcccccacacaagggtcagcgtcctctctaataaacggggttgtcaggctcctgtcaaaaccctgggatgtggtgactggagtcacaggaatagccatgaccgacaccacaccgtatggtcagcaaagagttttcaaggaaaaagtggacactagggtgccagacccccaagaaggcactcgtcaggttatgagcatggtctcttcctggttgtggaaagagctaggcaaacacaaacggccacgagtctgtaccaaagaagagttcatcaacaaggttcgtagcaatgcagcattaggggcaatatttgaagaggaaaaagagtggaagactgcagtggaagctgtgaacgatccaaggttctgggctctagtggacaaggaaagagagcaccacctgagaggagagtgccagagttgtgtgtacaacatgatgggaaaaagagaaaagaaacaaggggaatttggaaaggccaagggcagccgcgccatctggtatatgtggctaggggctagatttctagagttcgaagcccttggattcttgaacgaggatcactggatggggagagagaactcaggaggtggtgttgaagggctgggattacaaagactcggatatgtcctagaagagatgagtcgcataccaggaggaaggatgtatgcagatgacactgctggctgggacacccgcattagcaggtttgatctggagaatgaagctctaatcaccaaccaaatggagaaagggcacagggccttggcattggccataatcaagtacacataccaaaacaaagtggtaaaggtccttagaccagctgaaaaagggaaaacagttatggacattatttcgagacaagaccaaagggggagcggacaagttgtcacttacgctcttaacacatttaccaacctagtggtgcaactcattcggaatatggaggctgaggaagttctagagatgcaagacttgtggctgctgcggaggtcagagaaagtgaccaactggttgcagagcaacggatgggataggctcaaacgaatggcagtcagtggagatgaytgcgttgtgaagccaattgatgataggtttgcacatgccctcaggttcttgaatgatatgggaaaagttaggaaggacacacaagagtggaaaccctcaactggatgggacaactgggaagaagttccgttttgctcccaccacttcaacaagctccatctcaaggacgggaggtccattgtggttccctgccgccaccaagatgaactgattggccgggcccgcgtctctccaggggcgggatggagcatccgggagactgcttgcctagcaaaatcatatgcgcaaatgtggcagctcctttatttccacagaagggacctccgactgatggccaatgccatttgttcatctgtgccagttgactgggttccaactgggagaactacctggtcaatccatggaaagggagaatggatgaccactgaagacatgcttgtggtgtggaacagagtgtggattgaggagaacgaccacatggaagacaagaccccagttgcgaaatggacagacattccctatttgggaaaaagggaagacttgtggtgtggatctctcatagggcacagaccgcgcaccacctgggctgagaacattaaaaacacagtcaacatggtgcgcaggatcataggtgatgaagaaaagtacatggactacctatccacccaagttcgctacttgggcgaagaagggtctacacctggagtgctgtaagcaccaatcttaatgttgtcaggcctgctagtcagccacagcttggggaaagctgtgcagcctgtgacccccccaggagaagctgggaaaccaagcctatagtcaggccgagaacgccatggcacggaagaagccatgctgcctgtgagcccctcagaggacactgagtcaaaaaaccccacgcgcttggaggcgcaggatgggaaaagaaggtggcgaccttccccacccttcaatctggggcctgaactggagat +>ZKC2/2016|zika|KX253996|2016-02-16|oceania|american_samoa|american_samoa|american_samoa|genbank|genome|Wu et al|https://www.ncbi.nlm.nih.gov/nuccore/KX253996|Direct Submission|Submitted (18-MAY-2016) Center for Diseases Control and Prevention of Guangdong Province; National Institute of Viral Disease Control and Prevention, China|https://www.ncbi.nlm.nih.gov/pubmed/ +agttgttgatctgtgtgaatcagactgcgacagttcgagtttgaagcgaaagctagcaacagtatcaacaggttttattttggatttggaaacgagagtttctggtcatgaaaaacccaaaaaagaaatccggaggattccggattgtcaatatgctaaaacgcggagtagcccgtgtgagcccctttgggggcttgaagaggctgccagccggacttctgctgggtcatgggcccatcaggatggtcttggcgattctagccttcttgagattcacggcaatcaagccatcactgggtctcatcaatagatggggttcagtggggaaaaaagaggctatggaaataataaagaagttcaagaaagatctggctgccatgctgagaataatcaatgctaggaaggagaagaagagacgaggcgcagatactaatgtcggaattgttggcctcctgctgaccacagctatggcagcggaggtcactagacgtgggagtgcatactatatgtacttggacagaaacgatgctggggaggccatatcttttccaaccacattggggatgaataagtgttatatacagatcatggatcttggacacatgtgtgatgccaccatgagctatgaatgccctatgctggatgagggggtggaaccagatgacgtcgattgttggtgcaacacgacgtcaacttgggttgtgtacggaacctgccatcacaaaaaaggtgaagcacggagatctagaagagctgtgacgctcccctcccattccactaggaagctgcaaacgcggtcgcaaacttggttggaatcaagagaatacacaaagcacttgattagagtcgaaaattggatattcaggaaccctggcttcgcgttagcagcagctgccatcgcttggcttttgggaagctcaacgagccaaaaagtcatatacttggtcatgatactgctgattgccccggcatacagcatcaggtgcataggagtcagcaatagggactttgtggaaggtatgtcaggtgggacttgggttgatgttgtcttggaacatggaggttgtgtcaccgtaatggcacaggacaaaccgactgtcgacatagagctggttacaacaacagtcagcaacatggcggaggtaagatcctactgctatgaggcatcaatatcggacatggcttcggacagccgctgcccaacacaaggtgaagcctaccttgacaagcaatcagacactcaatatgtctgcaaaagaacgttagtggacagaggctggggaaatggatgtggactttttggcaaagggagcctggtgacatgcgctaagtttgcatgctccaagaaaatgaccgggaagagcatccagccagagaatctggagtaccggataatgctgtcagttcatggctcccagcacagtgggatgatcgttaatgacacaggacatgaaactgatgagaatagagcgaaggttgagataacgcccaattcaccaagagccgaagccaccctggggggttttggaagcctaggacttgattgtgaaccgaggacaggccttgacttttcagatttgtattacttgactatgaataacaagcactggttggttcacaaggagtggttccacgacattccattaccttggcacgctggggcagacaccggaactccacactggaacaacaaagaagcactggtagagttcaaggacgcacatgccaaaaggcaaactgtcgtggttctagggagtcaagaaggagcagttcacacggcccttgctggagctctggaggctgagatggatggtgcaaagggaaggctgtcctctggccacttgaaatgtcgcctgaaaatggataaacttagattgaagggcgtgtcatactccttgtgtaccgcagcgttcacattcaccaagatcccggctgaaacactgcacgggacagtcacagtggaggtacagtacgcagggacagatggaccttgcaaggttccagctcagatggcggtggacatgcaaactctgaccccagttgggaggctgataaccgctaaccccgtaatcactgaaagcactgagaactccaagatgatgctggaacttgatccaccatttggggactcttacattgtcataggagtcggggagaagaagatcacccaccactggcacaggagtggcagcaccattggaaaagcatttgaagccactgtgagaggtgccaggagaatggcagtcttgggagacacagcctgggactttggatcagttggaggcgctctcaactcattgggcaagggcatccatcaaatttttggagcagctttcaaatcattgtttggaggaatgtcctggttctcacaaattctcattggaacgttgctgatgtggttgggtctgaacacaaagaatggatctatttcccttatgtgcttggccttagggggagtgttgatcttcttatccacagccgtctctgctgatgtggggtgctcggtggacttctcaaagaaggagacgagatgcggtacaggggtgttcgtctataacgacgttgaagcctggagggacaggtacaagtaccatcctgactccccccgtagattggcagcagcagtcaagcaagcctgggaagatggtatctgtgggatctcctctgtttcaagaatggaaaacatcatgtggagatcagtagaaggggagctcaacgcaatcctggaagagaatggagttcaactgacggtcgttgtgggatctgtaaaaaaccccatgtggagaggtccacagagattgcccgtgcctgtgaacgagctgccccacggctggaaggcttgggggaaatcgtacttcgtcagagcagcaaagacaaataacagctttgtcgtggatggtgacacactgaaggaatgcccactcaaacatagagcatggaacagctttcttgtggaggatcatgggttcggggtatttcacactagtgtctggctcaaggttagagaagattattcattagagtgtgatccagccgttattggaacagctgttaagggaaaggaggctgtacacagtgatctaggctactggattgagagtgagaagaatgacacatggaggctgaagagggcccatctgatcgagatgaaaacatgtgaatggccaaagtcccacacattgtggacagatggaatagaagagagtgatctgatcatacccaagtctttagctgggccactcagccatcacaataccagagagggctacaggacccaaatgaaagggccatggcacagtgaagagcttgaaattcggtttgaggaatgcccaggcaccaaggtccacgtggaggaaacatgtggaacaagaggaccatctctgagatcaaccactgcaagcggaagggtgatcgaggaatggtgctgcagggagtgcacaatgcccccactgtcgttccaggctaaagatggctgttggtatggaatggagataaggcccaggaaagaaccagaaagtaacttagtaaggtcaatggtgactgcaggatcaactgatcacatggatcacttctcccttggagtgcttgtgattctgctcatggtgcaggaagggctgaagaagagaatgaccacaaagatcatcataagcacatcaatggcagtgctggtagctatgatcctgggaggattttcaatgagtgacctggctaagcttgcaattttgatgggtgccaccttcgcggaaatgaacactggaggagatgtagctcatctggcgctgatagcggcattcaaagtcagaccagcgttgctggtatctttcatcttcagagctaattggacaccccgtgaaagcatgctgctggccttggcctcgtgtcttttgcaaactgcgatctccgccttggaaggcgacctgatggttctcatcaatggttttgctttggcctggttggcaatacgagcgatggttgttccacgcactgataacatcaccttggcaatcctggctgctctgacaccactggcccggggcacactgcttgtggcgtggagagcaggccttgctacttgcggggggtttatgctcctctctctgaagggaaaaggcagtgtgaagaagaacttaccatttgtcatggccctgggactaaccgctgtgaggctggtcgaccccatcaacgtggtgggactgctgttgctcacaaggagtgggaagcggagctggccccctagcgaagtactcacagctgttggcctgatatgcgcattggctggagggttcgccaaggcagatatagagatggctgggcccatggccgcggtcggtctgctaattgtcagttacgtggtctcaggaaagagtgtggacatgtacattgaaagagcaggtgacatcacatgggaaaaagatgcggaagtcactggaaacagtccccggcttgatgtggcgctagatgagagtggtgatttctccctggtggaggatgacggtccccccatgagagagatcatactcaaggtggtcctgatgaccatctgtggcatgaacccaatagccataccctttgcagctggagcgtggtacgtatacgtgaagactggaaaaaggagtggagctctatgggatgtgcctgctcccaaggaagtaaaaaagggggagaccacagatggagtgtacagagtgatgactcgtagactgctaggttcaacacaagttggagtgggagttatgcaagagggggtctttcacaccatgtggcacgtcacaaaaggatccgcgctgagaagcggtgaagggagacttgatccatactggggagatgtcaagcaggatctggtgtcatactgtggtccatggaagctagatgccgcctgggacgggcacagcgaggtgcagctcttggccgtgccccccggagagagagcgaggaacatccagactctgcccggaatatttaagacaaaggatggggacattggagcggttgcgctggattacccagcaggaacttcaggatctccaatcctagacaagtgtgggagagtgataggactttatggcaatggggtcgtgatcaaaaatgggagttatgttagtgccatcacccaagggaggagggaggaagagactcctgttgagtgcttcgagccttcgatgctgaagaagaagcagctaactgtcttagacttgcatcctggagctgggaaaaccaggagagttcttcctgaaatagtccgtgaagccataaaaacaagactccgtactgtgatcttagctccaaccagggttgtcgctgccgaaatggaggaagcccttagagggcttccagtgcgttatatgacaacagcagtcaatgtcacccactctggaacagaaatcgtcgacttaatgtgccatgccaccttcacttcacgtctactacagccaatcagagtccccaactataatctgtatattatggatgaggcccacttcacagatccctcaagtatagcagcaagaggatacatttcaacaagggttgagatgggcgaggcggctgccatcttcatgaccgccacgccaccaggaacccgtgacgcatttccggactccaactcaccaattatggacaccgaagtggaagtcccagagagagcctggagctcaggctttgattgggtgacggatcattctggaaaaacagtctggtttgttccaagcgtgaggaacggcaatgagatcgcagcttgtctgacaaaggctggaaaacgggtcatacagctcagcagaaagacttttgagacagagttccagaaaacaaaacatcaagagtgggactttgtcgtgacaactgacatttcagagatgggcgccaactttaaagctgaccgtgtcatagattccaggagatgcctaaagccggtcatacttgatggcgagagagtcattctggctggacccatgcctgtcacacatgccagcgctgcccagaggagggggcgcataggcaggaatcccaacaaacctggagatgagtatctgtatggaggtgggtgcgcagagactgacgaagaccatgcacactggcttgaagcaagaatgctccttgacaatatttacctccaagatggcctcatagcctcgctctatcgacctgaggccgacaaagtagcagccattgagggagagttcaagcttaggacggagcaaaggaagacctttgtggaactcatgaaaagaggagatcttcctgtttggctggcctatcaggttgcatctgccggaataacctacacagatagaagatggtgctttgatggcacgaccaacaacaccataatggaagacagtgtgccggcagaggtgtggaccagacacggagagaaaagagtgctcaaaccgaggtggatggacgccagagtttgttcagatcacgcggccctgaagtcattcaaggagtttgccgctgggaaaagaggagcggcttttggagtgatggaagccttgggaacactgccaggacacatgacagagagattccaggaagccattgacaacctcgctgtgctcatgcgggcagagactggaagcaggccttacaaagccgcggcggcccaattgccggagaccctagagaccattatgcttttggggttgctgggaacagtctcgctgggaatctttttcgtcttgatgaggaacaagggcatagggaagatgggctttggaatggtgactcttggggccagcgcatggctcatgtggctctcggaaattgagccagccagaattgcatgtgtcctcattgttgtgttcctattgctggtggtgctcatacctgagccagaaaagcaaagatctccccaggacaaccaaatggcaatcatcatcatggtagcagtaggtcttctgggcttgattaccgccaatgaactcggatggttggagagaacaaagagtgacctaagccatctaatgggaaggagagaggagggggcaaccataggattctcaatggacattgacctgcggccagcctcagcttgggccatctacgctgccttgacaactttcattaccccagccgtccaacatgcagtgaccacttcatacaacaactactccttaatggcgatggccacgcaagctggagtgttgtttggtatgggcaaagggatgccattctacgcatgggactttggagtcccgctgctaatgataggttgctactcacaattaacacccctgaccctaatagtagccatcattttgctcgtggcgcactacatgtacttgatcccagggctgcaggcagcagctgcgcgtgctgcccagaagagaacggcagctggcatcatgaagaaccctgttgtggatggaatagtggtgactgacattgacacaatgacaattgacccccaagtggagaaaaagatgggacaggtgctactcatagcagtagccgtctccagcgccatactgtcgcggaccgcctgggggtggggggaggctggggccctgatcacagctgcaacttccactttgtgggaaggctctccgaacaagtactggaactcctctacagccacttcactgtgtaacatttttaggggaagttacttggctggagcttctctaatctacacagtaacaagaaacgctggcttggtcaagagacgtgggggtggaacaggagagaccctgggagagaaatggaaggcccgcttgaaccagatgtcggccctggagttctactcctacaaaaagtcaggcatcaccgaggtgtgcagagaagaggcccgccgcgccctcaaggacggtgtggcaacgggaggccatgctgtgtcccgaggaagtgcaaagctgagatggttggtggagcggggatacctgcagccctatggaaaggtcattgatcttggatgtggcagagggggctggagttactacgccgccaccatccgcaaagttcaagaagtgaaaggatacacaaaaggaggccctggtcatgaagaacccatgttggtgcaaagctatgggtggaacatagtccgtcttaagagtggggtggacgtctttcatatggcggctgagccgtgtgacacgttgctgtgtgacataggtgagtcatcatctagtcctgaagtggaagaagcacggacgctcagagtcctttccatggtgggggattggcttgaaaaaagaccaggagccttttgtataaaagtgttgtgcccatacaccagcactatgatggaaaccctggagcgactgcagcgtaggtatgggggaggactggtcagagtgccactctcccgcaactctacacatgagatgtactgggtctctggagcgaaaagcaacaccataaaaagtgtgtccaccacgagccagctcctcttggggcgcatggacgggcccaggaggccagtgaaatatgaggaggatgtgaatctcggctctggcacgcgggctgtggtaagctgcgctgaagctcccaacatgaagatcattggtaaccgcattgaaaggatccgcagtgagcacgcggaaacgtggttctttgacgagaaccacccatataggacatgggcttaccatggaagctatgaggcccccacacaagggtcagcgtcctctctaataaacggggttgtcaggctcctgtcaaaaccctgggacgtggtgactggagtcacaggaatagccatgaccgacaccacaccgtatggtcagcaaagagttttcaaggaaaaagtggacactagggtgccagatccccaagaaggcactcgtcaggttatgagcatggtctcttcctggttgtggaaagagctaggcaaacacaaacggccacgagtctgtaccaaagaagagttcatcaacaaggttcgtagcaatgcagcattaggggcaatatttgaagaggaaaaagagtggaagactgcagtggaagctgtgaacgatccaaggttctgggctctagtggacaaggaaagagagcaccacctgagaggagagtgccagagttgtgtgtacaacatgatgggaaaaagagaaaagaaacaaggggaatttggaaaggccaagggcagccgcgccatctggtatatgtggctaggggctagatttctagagttcgaagcccttggattcttgaacgaggatcactggatggggagagagaactcaggaggtggtgttgaagggctgggattacaaagactcggatatgtcctagaagagatgagtcgcataccaggaggaaggatgtatgcagatgacactgctggctgggacacccgcatcagcaggtttgatctggagaatgaagctctaatcaccaaccaaatggagaaagggcacagggccttggcattggccataatcaagtacacataccaaaacaaagtggtaaaggtccttagaccagctgaaaaagggaagacagttatggacattatttcgagacaagaccaaagggggagcggacaagttgtcacttacgctcttaacacatttaccaacctagtggtgcaactcattcggagtatggaggctgaggaagttctagagatgcaagacttgtggctgctgcggaggtcagagaaagtgaccaactggctgcagagcaacggatgggataggctcaaacgaatggcagtcagtggagatgattgcgttgtgaggccaattgatgataggtttgcacatgccctcaggttcttgaatgatatggggaaagttaggaaggacacacaagagtggaaaccctcaactggatgggacaactgggaggaagttccgttttgctcccaccacttcaacaagctccatctcaaggacgggaggtccattgtggttccctgccgccaccaagatgaactgattggccgggcccgcgtctctccaggggcgggatggagcatccgggagactgcttgcctagcaaaatcatatgcgcaaatgtggcagctcctttatttccacagaagggacctccgactgatggccaatgccatttgttcatctgtgccagttgactgggttccaactgggagaactacctggtcaatccatggaaagggagaatggatgaccactgaagacatgcttgtggtgtggaacagagtgtggattgaggagaacgaccacatggaagacaagaccccagttacgaaatggacagacattccctatttgggaaaaagggaagacttgtggtgtggatctctcatagggcacagaccgcgcaccacctgggctgagaacattaaaaacacagtcaacatggtgcgcaggatcataggtgatgaagaaaagtacatggactacctatccacccaagttcgctacttgggtgaagaagggtctacacctggagtgctgtaagcaccaatcttagtgttgtcaggcctgctagtcagccacagcttggggaaagctgtgcagcctgtgacccccccaggagaagctgggaaaccaagcctatagtcaggccgagaacgccatggcacggaagaagccatgctgcctgtgagcccctcagaggacactgagtcaaaaaaccccacgcgcttggaggcgcaggatgggaaaagaaggtggcgaccttccccaccctttaatctggggcctgaactggagatcagctgtggatctccagaagagggactagtggttagaggagaccccccggaaaacgcaaaacagcatattgacgctgggaaagaccagagactccatgagtttccaccacgctggccgccaggcacagatcgccgaatagcggcggccggtgtggggaaatccatgggtct +>VEN/UF_1/2016|zika|KX702400|2016-03-25|south_america|venezuela|venezuela|venezuela|genbank|genome|Blohm et al|https://www.ncbi.nlm.nih.gov/nuccore/KX702400|Complete Genome Sequences of Identical Zika virus Isolates in a Nursing Mother and Her Infant|Genome Announc 5 (17), e00231-17 (2017)|https://www.ncbi.nlm.nih.gov/pubmed/28450510 +agttgttactgttgctgactcagactgcgacagttcgagtttgaagcgaaagctagcaacagtatcaacaggttttattttggatttggaaacgagagtttctggtcatgaaaaacccaaaaaagaaatccggaggattccggattgtcaatatgctaaaacgcggagtagcccgtgtgagcccctttgggggcttgaagaggctgccagccggacttctgctgggtcatgggcccatcaggatggtcttggcgattctagcctttttgagattcacggcaatcaagccatcactgggtctcatcaatagatggggttcagtggggaaaaaagatgctatggaaataataaagaagttcaagaaagatctggctgccatgctgagaataatcaatgctaggaaggagaagaagagacgaggcgcagaaactagtgtcggaattgttggcctccttctgaccacagctatggcagcggaggtcactagacgtgggagtgcatactatatgtacttggacagaaacgatgctggggaggccatatcttttccaaccacattggggatgaataagtgttatatacagatcatggatcttggacacatgtgtgatgccaccatgagctatgaatgccctatgctggatgagggggtggaaccagatgacgtcgattgttggtgcaacacgacgtcaacttgggttgtgtacggaacctgccatcacaaaaaaggtgaagcacggagatctagaagagccgtgacgctcccctcccattccactaggaagctgcaaacgcggtcgcaaacctggttggaatcaagagaatacacaaagcacttgattagagtcgaaaattggatattcaggaaccctggtttcgctttagcagcagctgccatcgcttggcttttgggaagctcaacgagccaaaaagtcatatacttggtcatgatactgctgattgccccggcatacagcatcaggtgcataggagtcagcaatagggactttgtggaaggtatgtcaggtgggacttgggttgatgtcgtcttggaacatggaggttgtgtcaccgtaatggcacaggacaaaccgactgtcgacatagagctggttacaacaacagtcagcaacatggcggaggtaagatcctactgctatgaggcatcaatatcagacatggcttcggacagccgctgcccaacacaaggtgaagcctaccttgacaagcaatcagacactcaatatgtttgcaaaagaacgttagtggacagaggctggggaaatggatgtggactttttggcaaagggagcctggtgacatgcgctaagtttgcatgctccaagaaaatgaccgggaagagcatccagccagagaatctggagtaccggataatgttgtcagttcatggctcccagcacagtgggatgattgttaatgacacaggacatgaaactgatgagaatagagcgaaggttgagataacgcccaattcaccaagagccgaagccaccctggggggttttggaagcctaggacttgattgtgaaccgaggacaggccttgacttttcagatttgtattacttgactatgaataacaagcactggttggttcacaaggagtggttccacgacattccattaccttggcacgctggggcagacaccggaactccacactggaacaacaaagaagcactggtagagttcaaggacgcacatgccaaaaggcaaactgtcgtggttctagggagtcaagaaggagcagttcacacggcccttgctggagctctggaggctgagatggatggtgcaaagggaaggctgtcctctggccacttgaaatgtcgcctgaaaatggataaacttagattgaagggcgtgtcatactccttgtgtaccgcagcgttcacattcaccaagatcccggctgaaacactgcacgggacagtcacagtggaggtacagtacgcagggacagatggaccttgcaaggttccagctcagatggcggtggacatgcaaactctgaccccagttgggaggttgataaccgctaaccccgtaatcactgaaagcactgagaactctaagatgatgctggaacttgatccaccatttggggactcttacattgtcataggagtcggggagaagaagatcacccaccactggcacaggagtggcagcaccattggaaaagcatttgaagccactgtgagaggtgccaagagaatggcagtcttgggagacacagcctgggactttggatcagttggaggcgctctcaactcattgggcaagggcatccatcaaatttttggagcagctttcaaatcattgtttggaggaatgtcctggttctcacaaattctcattggaacgttgctgatgtggttgggtctgaacacaaagaatggatctatttcccttatgtgcttggccttagggggagtgttgatcttcttatccacagccgtctctgctgatgtggggtgctcggtggacttctcaaagaaggagacgagatgtggtacaggggtgttcgtctataacgacgttgaagcctggagggacaggtacaagtaccatcctgactccccccgtagattggcagcagcagtcaagcaagcctgggaagatggtatctgcgggatctcctctgtttcaagaatggaaaacatcatgtggagatcagtagaaggggagctcaacgcaatcctggaagagaatggagttcaactgacggtcgttgtgggatctgtaaaaaaccccatgtggagaggtccacagagattgcccgtgcctgtgaacgagctgccccacggctggaaggcttgggggaaatcgtacttcgtcagagcagcaaagacaaataacagctttgtcgtggatggtgacacactgaaagaatgcccactcaaacatagagcatggaacagctttcttgtggaggatcatgggttcggggtatttcacactagtgtctggctcaaggttagagaagattattcattagagtgtgatccagccgttattggaacagctgttaagggaaaggaggctgtacacagtgatctaggctactggattgagagtgagaagaatgacacatggaggctgaagagggcccatctgatcgagatgaaaacatgtgaatggccaaagtcccacacattgtggacagatggaatagaagagagtgatctgatcatacccaagtctttagctgggccactcagccatcacaataccagagagggctacaggacccaaatgaaagggccatggcacagtgaagagcttgaaattcggtttgaggaatgcccaggcactaaggtccacgtggaggaaacatgtggaacaagaggaccatctctgagatcacccactgcaagcggaagggtgatcgaggaatggtgctgcagggagtgcacaatgcccccactgtcgttctgggctaaagatggctgttggtatggaatggagataaggcccaggaaagaaccagaaagcaacttagtaaggtcaatggtgactgcaggatcaactgatcacatggatcacttctcccttggagtgcttgtgattctgctcatggtgcaggaagggctgaagaagagaatgaccacaaagatcatcataagcacatcaatggcagtgctggtagctatgatcctgggaggattttcaatgagtgacctggctaagcttgcaatcttgatgggtgccaccttcgcggaaatgaacactggaggagatgtagctcatctggcgctgatagcggcattcaaagtcagaccagcgttgctggtatccttcatcttcagagctaattggacaccccgtgaaagcatgctgctggccttggcctcgtgtcttttgcaaactgcgatctccgccttggagggcgacctgatggttctcatcaatggttttgctttggcctggttggcaatacgagcgatggttgttccacgcactgacaacatcaccttggcaatcctggctgctctgacaccactggcccggggcacactgcttgtggcgtggagagcaggccttgctacttgcggggggtttatgctcctctctctgaagggaaaaggcagtgtgaagaagaacttaccatttgtcatggccctgggactaaccgctgtgaggctggtcgaccccatcaacgtggtgggactgctgttgctcacaaggagtgggaagcggagctggccccctagcgaagtactcacagctgttggcctgatatgcgcattggctggagggttcgccaaggcagatatagagatggctgggcccatggccgcggttggtctgctaattgtcagttacgtggtctcaggaaagagtgtggacatgtacattgaaagagcaggtgacatcacatgggaaaaagatgcggaagtcactggaaacagtccccggctcgatgtggcgctagatgagagtggtgatttctccctggtggaggatgacggtccccccatgagagagatcatactcaaggtggtcctgatgaccatctgtggcatgaacccaatagccataccctttgcagctggagcgtggtacgtatacgtgaagactggaaaaaggagtggtgcgctatgggatgtgcctgctcccaaggaagtaaaaaagggggagaccacagatggagtgtacagagtaatgactcgtagactgctaggttcaacacaagttggagtgggagttatgcaagagggggtctttcacactatgtggcacgtcacaaaaggatccgcgctgagaagcggtgaagggagacttgatccatactggggagatgtcaagcaggatctggtgtcatactgtggtccatggaagctagatgccgcctgggacgggcacagcgaggtgcagctcttggccgtgccccccggagagagagcgaggaacatccagactctgcccggaatatttaagacaaaggatggggacattggagcggttgcgctggattacccagcaggaacttcaggatctccaatcctagacaagtgtgggagagtgataggactttatggcaatggggtcgtgatcaaaaatgggagttatgttagtgccatcacccaagggaggagggaggaagagactcctgttgagtgcttcgagccttcgatgctgaagaagaagcagctaactgtcttagacttgcatcctggagctgggaaaaccaggagagttcttcctgaaatagtccgtgaagccataaaaacaagactccgtactgtgatcttagctccaaccagggttgtcgctgctgaaatggaggaagcccttagagggcttccagtgcgttatatgacaacagcagtcaatgtcacccactctggaacagaaatcgtcgacttaatgtgccatgccaccttcacttcacgtctactacagccaatcagagtccccaactataatctgtatattatggatgaggcccacttcacagatccctcaagtatagcagcaagaggatacatttcaacaagggttgagatgggcgaggcggctgccatcttcatgaccgccacgccaccaggaacccgtgacgcatttccggactccaactcaccaattatggacaccgaagtggaagtcccagagagagcctggagctcaggctttgattgggtgacggatcattctggaaaaacagtttggtttgttccaagcgtgaggaacggcaatgagatcgcagcttgtctgacaaaggctggaaaacgggtcatacagctcagcagaaagacttttgagacagagttccagaaaacaaaacatcaagagtgggactttgtcgtgacaactgacatttcagagatgggcgccaactttaaagctgaccgtgtcatagattccaggagatgcctaaagccggtcatacttgatggcgagagagtcattctggctggacccatgcctgtcacacatgccagcgctgcccagaggagggggcgcataggcaggaatcccaataaacctggagatgagtatctgtatggaggtgggtgcgcagagactgacgaagaccatgcacactggcttgaagcaagaatgctccttgacaatatttacctccaagatggcctcatagcctcgctctatcgacctgaggccgacaaagtagcagccattgagggagagttcaagcttaggacggagcaaaggaagacctttgtggaactcatgaaaagaggagatcttcctgtttggctggcctatcaggttgcatctgccggaataacctacacagatagaagatggtgctttgatggcacgaccaacaacaccataatggaagacagtgtgccggcagaggtgtggaccagacacggagagaaaagagtgctcaaaccgaggtggatggacgccagagtttgttcagatcatgcggccctgaagtcattcaaggagtttgccgctgggaaaagaggagcggcttttggagtgatggaagccctgggaacactgccaggacacatgacagagagattccaggaagccattgacaacctcgctgtgctcatgcgggcagagactggaagcaggccttacaaagccgcggcggcccaattgccggagaccctagagaccattatgcttttggggttgctgggaacagtctcgttgggaatctttttcgtcttgatgaggaacaagggcatagggaagatgggctttggaatggtgactcttggggccagcgcatggctcatgtggctctcggaaattgagccagccagaattgcatgtgtcctcattgttgtgttcctattgctggtggtgctcatacctgagccagaaaagcaaagatctccccaggacaaccaaatggcaatcatcatcatggtagcagtaggtcttctgggcttgattaccgccaatgaactcggatggttggagagaacaaagagtgacctaagccatctaatgggaaggagagaggagggggcaaccataggattctcaatggacattgacctgcggccagcctcagcttgggccatctatgctgccttgacaactttcattaccccagccgtccaacatgcagtgaccacttcatacaacaactactccttaatggcgatggccacgcaagctggagtgttgtttggtatgggcaaagggatgccattctacgcatgggactttggagtcccgctgctaatgataggttgctactcacaattaacacccctgaccctaatagtggccatcattttgctcgtggcgcactacatgtacttgatcccagggctgcaggcagcagctgcgcgtgctgcccagaagagaacggcagctggcatcatgaagaaccctgttgtggatggaatagtggtgactgacattgacacaatgacaattgacccccaagtggagaaaaagatgggacaggtgctactcatagcagtagccgtctccagcgccatactgtcgcggaccgcctgggggtggggggaggctggggccctgatcacagccgcaacttccactttgtgggaaggctctccgaacaagtactggaactcctctacagccacttcactgtgtaacatttttaggggaagttacttggctggagcttctctaatctacacagtaacaagaaacgctggcttggtcaagagacgtgggggtggaacaggagagaccctgggagagaaatggaaggcccgcttgaaccagatgtcggccctggagttctactcctacaaaaagtcaggcatcaccgaggtgtgcagagaagaggcccgccgcgccctcaaggacggtgtggcaacgggaggccatgctgtgtcccgaggaagtgcaaagctgagatggttggtggagcggggatacctgcagccctatggaaaggtcattgatcttggatgtggcagagggggctggagttactacgccgccaccatccgcaaagttcaagaagtgaaaggatacacaaaaggaggccctggtcatgaagaacccgtgttggtgcaaagctatgggtggaatatagtccgtcttaagagtggggtggacgtctttcatatggcggctgagccgtgtgacacgttgctgtgtgacataggtgagtcatcatctagtcctgaagtggaagaagcacggacgctcagagtcctctccatggtgggggattggcttgaaaaaagaccaggagccttttgtataaaagtgttgtgcccatacaccagcactatgatggaaaccctggagcgactgcagcgtaggtatgggggaggactggtcagagtgccactctcccgcaactctacacatgagatgtactgggtctctggagcgaaaagcaacaccataaaaagtgtgtccaccacgagccagctcctcttggggcgcatggacgggcctaggaggccagtgaaatatgaggaggatgtgaatctcggctctggcacgcgggctgtggtaagctgcgctgaagctcccaacatgaagatcattggtaaccgcattgaaaggatccgcagtgagcacgcggaaacgtggttctttgacgagaaccacccatataggacatgggcttaccatggaagctatgaggcccccacacaagggtcagcgtcctctctaataaacggggttgtcaggctcctgtcaaaaccctgggatgtggtgactggagtcacaggaatagccatgaccgacaccacaccgtatggtcagcaaagagttttcaaggaaaaagtggacactagggtgccagacccccaagaaggcactcgtcaggttatgagcatggtctcttcctggttgtggaaagagctaggcaaacacaaacggccacgagtctgtaccaaagaagagttcatcaacaaggtgcgtagcaatgcagcattaggggcaatatttgaagaggaaaaagagtggaagactgcagtggaagctgtgaacgatccaaggttctgggctctagtggacaaggaaagagagcaccacctgagaggagagtgccagagttgtgtgtacaacatgatgggaaaaagagaaaagaaacaaggggaatttggaaaggccaagggcagccgcgccatctggtatatgtggctaggggctagatttctagagttcgaagcccttggattcttgaacgaggatcactggatggggagagagaactcaggaggtggtgttgaagggctgggattacaaagactcggatatgtcctagaagagatgagtcgcataccaggaggaaggatgtatgcagatgacactgctggctgggacacccgcattagcaggtttgatctggagaatgaagctctaatcaccaaccaaatggagaaagggcacagggccttggcattggccataatcaagtacacataccaaaacaaagtggtaaaggtccttagaccagctgaaaaagggaaaacagttatggacattatttcgagacaagaccaaagggggagcggacaagttgtcacttacgctcttaacacatttaccaacctagtggtgcaactcattcggaatatggaggctgaggaagttctagagatgcaagacttgtggctgctgcggaggtcagagaaagtgaccaactggttgcagagcaacggatgggataggctcaaacgaatggcagtcagtggagatgattgcgttgtgaagccaattgatgataggtttgcacatgccctcaggttcttgaatgatatgggaaaagttaggaaggacacacaagagtggaaaccctcaactggatgggacaactgggaagaagttccgttttgctcccaccacttcaacaagctccatctcaaggacgggaggtccattgtggttccctgccgccaccaagatgaactgattggccgggcccgcgtctctccaggggcgggatggagcatccgggagactgcttgcctagcaaaatcatatgcgcaaatgtggcagctcctttatttccacagaagggacctccgactgatggccaatgccatttgttcatctgtgccagttgactgggttccaactgggagaactacctggtcaatccatggaaagggagaatggatgaccactgaagacatgcttgtggtgtggaacagagtgtggattgaggagaacgaccacatggaagacaagaccccagttacgaaatggacagacattccctatttgggaaaaagggaagacttgtggtgtggatctctcatagggcacagaccgcgcaccacctgggctgagaacattaaaaacacagtcaacatggtgcgcaggatcataggtgatgaagaaaagtacatggactacctatccacccaagttcgctacttgggtgaagaagggtctacacctggagtgctgtaagcaccaatcttaatgttgtcaggcctgctagtcagccacagcttggggaaagctgtgcagcctgtgacccccccaggagaagctgggaaaccaagcctatagtcaggccgagaacgccatggcacggaagaagccatgctgcctgtgagcccctcagaggacactgagtcaaaaaaccccacgcgcttggaggcgcaggatgggaaaagaaggtggcgaccttccccacccttcaatctggggcctgaactggagatcagctgtggatctccagaagagggactagtggttagaggagaccccccggaaaacgcaaaacagcatattgacgctgggaaagaccagagactccatgagtttccaccacgctggccgccaggcacagatcgccgaatagcggcggccggtgtggggaaatccatgggtctt +>DOM/2016/BB_0059|zika|KY785425|2016-04-04|north_america|dominican_republic|dominican_republic|dominican_republic|genbank|genome|Metsky et al|https://www.ncbi.nlm.nih.gov/nuccore/KY785425|Zika virus evolution and spread in the Americas|Nature 546 (7658), 411-415 (2017)|https://www.ncbi.nlm.nih.gov/pubmed/28538734 +tggctgccatgctgagaataatcaatgctaggaaggagaagaagagacgaggcgcagatactagtgtcggaattgttggcctcctgctgaccacagctatggcagcggaggtcactagacgtgggagtgcatactacatgtacttggacagaaacgatgctggggaggccatatctttcccaaccacattggggatgaataagtgttatatacagatcatggatcttggacacatgtgtgatgccaccatgagctatgaatgccctatgctggatgagggggtggaaccagatgacgtcgattgttggtgcaacacgacgtcaacttgggttgtgtacggaacctgccatcacaaaaaaggtgaagcacggagatctagaagagctgtgacgctcccctcccattccactaggaagctgcaaacgcggtcgcaaacctggttggaatcaagagaatacacaaagcacttgattagagtcgaaaattggatattcaggaaccctggcttcgcgttagcagcagctgccatcgcttggcttttgggaagctcaacgagccaaaaagtcatatacttggtcatgatactgctgattgccccggcatacagcatcaggtgcataggagtcagcaatagggactttgtggaaggtatgtcaggtgggacttgggttgatgttgtcttggaacatggaggttgtgtcaccgtaatggcacaggacaaaccgactgtcgacatagagctggttacaacaacagtcagcaacatggcggaggtaagatcctactgctatgaggcatcaatatcagacatggcttcggacagccgctgcccaacacaaggtkaagcctaccttgacaagcaatcagacactcaatatgtctgcaaaagaacgttagtggacagaggctggggaaatggatgtggactttttggcaaagggagcctggtgacatgcgctaagtttgcatgctccaagaaaatgaccgggaagagcatccagccagagaatctggagtaccggataatgctgtcagttcatggctcccagcacagtgggatgatcgttaatgacacaggacatgaaactgatgagaatagagcgaaggttgagataacgcccaattcaccaagagccgaagccaccctggggggttttggaagcctaggactggattgtgaaccgaggacaggccttgacttttcagatttgtattacttgactatgaataacaagcactggttggttcacaaggagtggttccacgacattccattaccttggcacgctggggcagacaccggaactccacactggaacaacaaagaagcactggtagagttcaaggacgcacatgccaaaaggcaaactgtcgtggttctagggagtcaagaaggagcagttcacacggcccttgctggagctctggaggctgagatggatggtgcaaagggaaggctgtcctctggccacttgaaatgtcgcctgaaaatggataaacttagattgaagggcgtgtcatactccttgtgtaccgcagcgttcacattcaccaagatcccggctgaaacactgcacgggacagtcacagtggaggtacagtacgcagggacagatggaccttgcaaggttccagctcagatggcggtggacatgcaaactctgaccccagttgggaggttgataaccgccaaccccgtaatcactgaaagcactgagaactctaagatgatgctggaacttgatccaccatttggggactcttacattgtcataggagtcggggagaagaagatcacccaccactggcacaggagtggcagcaccattggaaaagcatttgaagccactgtgagaggtgccaagagaatggcagtcttgggagatacagcctgggactttggatcagttggaggcgctctcaactcattgggcaagggcatccatcaaatttttggagcagctttcaaatcattgtttggaggaatgtcctggttctcacaaatcctcattggaacgttgctgatgtggttgggtctgaacacaaagaatggatctatttccctcatgtgcttggccttagggggagtgttgatcttcttatccacagccgtctctgctgatgtggggtgctcggtggacttctcaaagaaggagacgagatgcggtacaggggtgttcgtctataacgacgttgaagcctggagggacaggtacaagtaccatcctgactccccccgtagattggcagcagcagtcaagcaagcctgggaagatggtatctgcgggatctcctctgtttcaagaatggaaaacatcatgtggagatcagtagaaggggagctcaatgcaatcctggaagagaatggagttcaactgacggtcgttgtgggatctgtaaaaaaccccatgtggagaggtccacagagattgcccgtgcctgtgaacgagctgccccacggctggaaggcttgggggaaatcgtacttcgttagagcagcaaagacaaataacagctttgtcgtgnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnntggaacagctgttaagggaaaggaggctgtacacagtgatctaggctactggattgagagtgagaagaatgacacatggaggctgaagagggcccatctgatcgagatgaaaacatgtgaatggccaaagtcccacacattgtggacagatggaatagaagagagtgatctgatcatacccaagtctttagctgggccactcagccatcacaataccagagagggctacaggacccaaatgaaagggccatggcacagtgaagagcttgaaattcggtttgaggaatgcccaggcactaaggtccacgtggaggaaacatgtggaacaagaggaccatctctgagatcaaccactgcaagcggaagggtgatcgaggaatggtgctgcagggagtgcacaatgcccccactgtcgttccgggctaaagatggctgttggtatggaatggagataaggcccaggaaagaaccagaaagcaacttagtaaggtcagtggtgactgcaggatcaactgatcacatggatcacttctcccttggagtgcttgtgattctgctcatggtgcaggaagggctgaagaagagaatgaccacaaagatcatcataagcacatcaatggcagtgctggtagctatgatcctgggaggattttcaatgagtgacctggctaagcttgcaattttgatgggtgccaccttcgcggaaatgaacactggaggagatgtagctcatctggcgctgatagcggcattcaaagtcagaccagcgttgctggtatctttcatcttcagagctaattggacaccccgtgaaagcatgctgctggccttggcctcgtgtcttttgcaaactgcgatctccgccttggaaggcgacctgatggttctcatcaatggttttgctttggcctggttggcaatacgagcgatggttgttccacgcactgataacatcaccttggcaatcctggctgctctgacaccactggcccggggtacactgcttgtggcgtggagagcaggccttgctacttgcggggggtttatgctcctctctctgaagggaaaaggcagtgtgaagaagaacttaccatttgtcatggccctgggactaaccgctgtgaggctggtcgaccccatcaacgtggtgggactgctgttgctcacaaggagtgggaagcggagctggccccctagcgaagtactcacagctgttggcctgatatgcgcattggctggagggttcgccaaggcagatatagagatggctgggcccatggccgcggtcggtctgctaattgtcagttacgtggtctcaggaaagagtgtggacatgtacattgaaagagcaggtgacatcacatgggaaaaagatgcggaagtcactggaaacagtccccggctcgatgtggcgctagatgagagtggtgatttctccctggtggaggatgacggtccccccatgagagagatcatactcaaggtggtcctgatgaccatctgtggcatgaacccaatagccataccctttgcagctggagcgtggtacgtatacgtgaagactggaaaaaggagtggtgctctatgggatgtgcctgctcccaaggaagtaaaaaagggggagaccacagatggagtgtacagagtaatgactcgtagactgctaggttcaacacaagttggagtgggagttatgcaagagggggtctttcacactatgtggcatgtcacaaaaggatccgcgctgagaagcggtgaagggagacttgatccatactggggagatgtcaagcaggatctggtgtcatactgtggtccatggaagctagatgccgcctgggacgggcacagcgaggtgcagctcttggccgtgccccccggagagagagcgaggaacatccagactctgcccggaatatttaagacaaaggannnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnncttcgagccttcgatgctgaagaagaagcagctaactgtcttagacttgcatcctggagctgggaaaaccaggagagttcttcctgaaatagtccgtgaagctataaaaacaagactccgtactgtgatcttagctccaaccagggttgtcgctgctgaaatggaggaagcccttagagggcttccagtgcgttatatgacaacagcagtcaatgtcacccattctggaacagaaatcgtcgacttaatgtgccatgccaccttcacttcacgtctactacagccaatcagagtccccaactataatctgtatattatggatgaggcccacttcacagatccctcaagtatagcagcaagaggatacatttcaacaagggttgagatgggcgaggcggctgccatcttcatgaccgccacgccaccaggaacccgtgacgcatttccggnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnncgggtcatacagctcagcagaaagacttttgagacagagttccagaaaacaaaacatcaagagtgggactttgtcgtgacaaccgacatttcagagatgggcgccaactttaaagctgaccgtgtcatagattccaggagatgcctaaagccggtcatacttgatggcgagagagtcattctggctggacccatgcctgtcacacatgccagcgctgcccagaggagggggcgcataggcaggaatcccaacaaacctggagatgagtatctgtatggaggtgggtgcgcagagactgacgaagaccatgcacactggcttgarkcaagaatgctccttgacaatatttgcctccaagatggcctcatagcctcgctctatcgacctgaggccgacaaagtagcagccattgagggagagttcaagcttaggacggagcaaaggaagacctttgtggaactcatgaaaagaggagatcttcctgtttggctggcctatcaggttgcatctgccggaataacttacacagatagaagatggtgctttgatggcacgaccaacaacaccataatggaagacagtgtgccggcagaggtgtggaccagacacggagagaaaagagtgctcaaaccgaggtggatggacgccagagtttgttcagatcatgcggccctgaagtcattcaaggagtttgccgctgggaaaagaggagcggcttttggagtgatggaagccctgggaacactgccaggacacatgacagagagattccaggaagccattgacaacctcgctgtgctcatgcgggcagagactggaagcaggccttacaaagccgcggcggcccaattgccggagaccctagagaccattatgcttttggggttgctgggaacagtctcgctgggaatctttttcgtcttgatgaggaacaagggcatagggaagatgggctttggaatggtgactcttggggccagcgcatggctcatgtggctctcggaaattgagccagccagaattgcatgtgtcctcattgttgtgttcctattgctggtggtgctcatacctgagccagaaaagcaaagatctccccaggacaaccaaatggcaatcatcatcatggtagcagtaggtcttctgggcttgatcaccgccaatgaactcggatggttggagagaacaaagagtgacctaagccatctaatgggaaggagagaggagggagcaaccataggattctcaatggacattgacctgcggccagcctcagcttgggccatctatgctgccttgacaactttcattaccccagccgtccaacatgcagtgaccacttcatacaacaactactccttaatggcgatggccacgcaagctggagtgttgtttggtatgggcaaagggatgccattctacgcatgggactttggagtcccgctgctaatgataggttgctactcacaattaacacccctgaccctaatagtggccatcattttgctcgtggcgcactacatgtacttgatcccagggctgcaggcagcagctgcgcgtgctgcccagaagagaacrgcagctggcatcatgaagaaccctgttgtggatggaatagtggtgactgacattgacacaatgacaattgacccccaagtggagaaaaagatgggacaggtgctactcatagcagtagccgtctccagcgccatactgtcgcggaccgcctgggggtggggggaggctggggccctgatcacagccgcaacttccactttgtgggaaggctctccgaacaagtactggaactcctctacagccacttcactgtgtaacatttttaggggaagttacttggctggagcttctctaatctacacagtaacaagaaacgctggcttggtcaagagacgtgggggtggaacaggagagaccctgggagagaaatggaaggcccgcttgaaccagatgtcggccctggagttctactcctacaaaaagtcaggcatcaccgaggtgtgcagagaagaggcccgccgcgccctcaaggacggtgtggcaacgggaggccatgctgtgtcccgaggaagtgcaaagctgagatggttggtggagcggggatacctgcagccctatggaaaggtcattgatcttggatgtggcagagggggctggagttactacgccgccaccatccgcaaagttcaagaagtgaaaggatacacaaaaggaggccctggtcatgaagaacccgtgttggtgcaaagctatgggtggaacatagtccgtctcaagagtggggtggacgtctttcatatggcggctgagccgtgtgacacgttgctgtgtgacataggtgagtcatcatctagtcctgaagtggaagaagcacggacgctcagagtcctctccatggtgggggattggcttgaaaaaagaccaggagccttttgtataaaagtgttgtgcccatacaccagcactatgatggaaaccctggagcgactgcagcgtaggtatgggggaggactggtcagagtgccactctcccgcaactctacacatgagatgtactgggtctctggagcgaaaagcaacaccataaaaagtgtgtccaccacgagccagctcctcttggggcgcatggacgggcctaggaggccagtgaaatatgaggaggatgtgaatctcggctctggcacgcgggctgtggtaagctgcgctgaagctcccaacatgaagatcattggtaaccgcattgaaaggatccgcagtgagcacgcggaaacgtggttctttgacgagaaccacccatataggacatgggcttaccatggaagctatgaggcccccacacaagggtcagcatcctctctagtaaacggggttgtcaggctcctgtcaaaaccctgggatgtggtgactggagtcacaggaatagccatgaccgacaccacaccgtatggtcagcaaagagttttcaaggaaaaagtggacactagggtgccagacccccaagaaggcactcgtcaggttatgagcatggtctcttcctggttgtggaaagagctaggcaaacacaaacggccacgagtctgtaccaaagaagagttcatcaacaaggttcgtagcaatgcagcattaggggcaatatttgaagaggaaaaagagtggaagactgcagtggaagctgtgaacgatccaaggttctgggctctagtggacaasgaaagagagcaccacctgagaggagagtgccagagttgtgtgtacaacatgatgggaaaaagagaaaagaaacaaggggaatttggaaaggccaagggcagccgcgccatctggtatatgtggctaggggctagatttctagagttcgaagcccttggattcttgaacgaggatcactggwtggggagagagaactcaggaggtggtgttgaagggctgggattannnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnagggcacagggccttggcattggccataatcaagtacacataccaaaacaaagtggtaaaggtccttagaccagctgaaaaagggaaaacagttatggacattatttcgagacaagaccaaagggggagcggacaagttgtcacttacgctcttaacacatttaccaacctagtggtgcaactcattcggaatatggaggctgaggaagttctagagatgcaagacttgtggctgctgcggaggtcagagaaagtgaccaactggttgcagagcaacggatgggataggctcaaacgaatggcagtcagtggagatgattgcgttgtgaagccaattgatgataggtttgcacatgccctcaggttcttgaatgatatgggaaaagttaggaaggacacacaagagtggaaaccctcaactggatgggacaactgggaagaagttccgttttgctcccaccacttcaacaagctccatctcaaggacgggaggtccattgtggttccctgccgccaccaagatgaactgattggccgggcccgcgtctctccaggggcgggatggagcatccgggagactgcttgcctagcaaaatcatatgcgcaaatgtggcagctcctttatttccacagaagggacctccgactgatggccaatgccatttgttcatctgtgccagttgactgggttccaactgggagaactacctggtcaatccatggaaagggagaatggatgaccattgaagacatgcttgtggtgtggaacagagtgtggattgaggagaacgaccacatggaagacaagaccccagttacgaaatggacagacattccctatttgggaaaaagggaagacttgtggtgtggatctctcatagggcacagaccgcgcaccacctgggctgagaacattaaaaacacagtcaacatggtgcgcaggatcataggtgaggaagaaaagtacatggactacctatccacccaagttcgctacttgggtgaagaagggtctacacctggagtgctgtaagcaccaatcttaatgttgtcaggcc +>BRA/2016/FC_6706|zika|KY785433|2016-04-08|south_america|brazil|brazil|brazil|genbank|genome|Metsky et al|https://www.ncbi.nlm.nih.gov/nuccore/KY785433|Zika virus evolution and spread in the Americas|Nature 546 (7658), 411-415 (2017)|https://www.ncbi.nlm.nih.gov/pubmed/28538734 +agtttgaagcgaaagctagcaacagtatcaacaggttttatttyggatttggaaacgagagtttctggtcatgaaaaacccaaaaaagaaatccggaggattccggattgtcaatatgctaaaacgcggagtagcccgtgtgagcccctttgggggcttgaagaggctgccagccggacttctgctgggtcatgggcccatcaggatggtcttggcgattctagcctttttgagattcacggcaatcaagccatcactgggtctcatcaatagatggggttcagtggggaaaaaagaggctatggaaataataaagaagttcaagaaagatctggctgccatgctgagaataatcaatgctaggaaggagaagaagagacgaggcgcagatactagtgtcggaattgttggcctcctgctgaccacagctatggcagcggaggtcactagacgtgggagtgcatactatatgtacttggacagaaacgatgctggggaggccatatcttttccaaccacattggggatgaataagtgttatatacagatcatggatcttggacacatgtgtgatgccaccatgagctatgaatgccctatgctggatgagggggtggaaccagatgacgtcgattgttggtgcaacacgacgtcaacttgggttgtgtacggaacctgccatcacaaaaaaggtgaagcacggagatctagaagagctgtgacgctcccctcccattccactaggaagctgcaaacgcggtcgcaaacctggttggaatcaagagaatacacaaagcacttgattagagtcgaaaattggatattcaggaaccctggcttcgcgttagcagcagccgccatcgcttggcttttgggaagctcaacgagccaaaaagtcatatacttggtcatgatactgctgattgccccggcatacagcatcaggtgcataggagtcagcaatagggactttgtggaaggtatgtcaggtgggacttgggttgatgttgtcttggaacatggaggttgtgtcaccgtaatggcacaggacaaaccgactgtcgacatagagctggttacaacaacagtcagcaacatggcggaggtaagatcctactgctatgaggcatcaatatcagacatggcttcggacagccgctgcccaacacaaggtgaagcctaccttgacaagcaatcagacactcaatatgtctgyaaaagaacgttagtggacagaggctggggaaatggatgtggactttttggcaaagggagcctggtgacatgcgctaagtttgcatgctccaagaaaatgaccgggaagagcatccagccagagaatctggagtaccggataatgctgtcagttcatggctcccagcacagtgggatgatcgttaatgacacaggacatgaaactgatgagaatagagcgaaggttgagataacgcccaattcaccaagagccgaagccaccctggggggttttggaagcctaggacttgattgtgaaccgaggacaggccttgacttttcagatttgtattacttgactatgaataacaagcactggttggttcacaaggagtggttccacgacattccattaccttggcacgctggggcagacaccggaactccacactggaacaacaaagaagcactggtagagttcaaggacgcacatgccaaaaggcaaactgtcgtggttctagggagtcaagaaggagcagttcacacggcccttgctggagctctggaggctgagatggatggtgcaaagggaaggctgtcctctggccacttgaaatgtcgcctgaaaatggataaacttagattgaagggcgtgtcatactccttgtgtaccgcagcgttcacattcaccaagatcccggctgaaacactgcacgggacagtcacagtggaggtacagtacgcagggacagatggaccttgcaaggttccagctcagatggcggtggacatgcaaactctgaccccagttgggaggttgataaccgctaaccccgtaatcactgaaagcactgagaactctaagatgatgctggaacttgatccaccatttggggactcttacattgtcataggagtcggggagaagaagatcacccaccactggcacaggagtggcagcaccattggaaaagcatttgaagccactgtgagaggtgccaagagaatggcagtcttgggagacacagcctgggactttggatcagttggaggcgctctcaactcattgggcaagggcatccatcaaatttttggagcagctttcaaatcattgtttggaggaatgtcctggttctcacaaattctcattggaacgttgctgatgtggttgggtctgaacacaaagaatggatctatttcccttatgtgcttggccttagggggagtgttgatcttcttatccacagccgtctctgctgatgtggggtgctcggtggacttctcaaagaaggagacgagatgtggtacaggggtgttcgtctataacgacgttgaagcctggagggacaggtacaagtaccatcctgactccccccgtagattggcagcagcagtcaagcaagcctgggaagatggtatctgcgggatctcttctgtttcaagaatggaaaacatcatgtggagatcagtagaaggggagctcaacgcaatcctggaagagaatggagttcaactgacggtcgttgtgggatctgtaaaaaaccccatgtggagaggtccacagagattgcccgtgcctgtgaacgagctgccccacggctggaaggcttgggggaaatcgtacttcgtcagagcagcaaagacaaataacagctttgtcgtggatggtgacacactgaaggaatgcccactcaaacatagagcatggaacagctttctwgtggaggatcatgggttcggggtatttcacactagtgtctggctcaaggttagagaagattattcattagagtgtgatccagccgttattggaacagctgttaagggaaaggaggctgtacacagtgatctaggctactggattgagagtgagaagaatgacacatggaggctgaagagggcccatctgatcgagatgaaaacatgtgaatggccaaagtcccacacattgtggacagatggaatagaagagagtgatctgatcatacccaagtctttagctgggccactcagccatcacaataccagagagggctacaggacccaaatgaaagggccatggcacagtgaagagcttgaaattcggtttgaggaatgcccaggcactaaggtccacgtggaggaaacatgtggaacaagaggaccatctctgagatcaaccactgcaagcggaagggtgatcgaggaatggtgctgcagggagtgcacaatgcccccactgtcgttccgggctaaagatggctgttggtatggaatggagataaggcccaggaaagaaccagaaagcaacttagtaaggtcaatggtgactgcaggatcaactgatcacatggatcacttctcccttggagtgcttgtgattctgctcatggtgcaggaagggctgaagaagagaatgaccacaaagatcatcataagcacatcaatggcagtgctggtagctatgatcctgggaggattttcaatgagtgacctggctaagcttgcaattttgatgggtgccaccttcgcggaaatgaacactggaggagatgtagctcatctggcgctgatagcggcattcaaagtcagaccagcgttgctggtatctttcatcttcagagctaattggacaccccgtgaaagcatgctgctggccttggcctcgtgtcttttgcaaactgcgatctccgccttggaaggcgacctgatggttctcatcaatggttttgctttggcctggttggcaatacgagcgatggttgttccacgcactgacaacatcaccttggcaatcctggctgctctgacaccactggcccggggcacactgcttgtggcgtggagagcaggccttgctacttgcggggggtttatgctcctctctctgaagggaaaaggcagtgtgaagaagaacttaccatttgtcatggccctgggactaaccgctgtgaggctggtcgaccccatcaacgtggtgggactgctgttgctcacaaggagtgggaagcggagctggccccctagcgaagtactcacagctgttggcctgatatgcgcattggctggagggttcgccaaggcagatatagagatggctgggcccatggccgcggtcggtctgctaattgtcagttacgtggtctcgggaaagagtgtggacatgtacattgaaagagcaggtgacatcacatgggaaaaagatgcggaagtcactggaaacagtccccggctcgatgtggcgctagatgagagtggtgatttctccctggtggaggatgacggtccccccatgagagagatcatactcaaggtggtcctgatgaccatctgtggcatgaacccaatagccataccctttgcagctggagcgtggtacgtatacgtgaagactggaaaaaggagtggtgctctatgggatgtgcctgctcccaaggaagtaaaaaagggggagaccacagatggagtgtacagagtaatgactcgtagactgctaggttcaacacaagttggagtgggagttatgcaagagggggtctttcacactatgtggcacgtcacaaaaggatccgcgctgagaagcggtgaagggagacttgatccatactggggagatgtcaagcaggatctggtgtcatactgtggtccatggaagctagatgccgcctgggacgggcacagcgaggtgcagctcttggccgtgccccccggagagagagcgaggaacatccagactctgcccggaatatttaagacaaaggatggggannnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnagccttcgatgctgaagaagaagcagctaactgtcttagacttgcatcctggagctgggaaaaccaggagagttcttcctgaaatagtccgtgaagccataaaaacaagactccgtactgtgatcttagctccaaccagggttgtcgctgctgaaatggaggaagcccttagagggcttccagtgcgttatatgacaacagcagtcaatgtcacccactctggaacagaaatcgtcgacttaatgtgcyatgccaccttcacttcacgtctactacagccaatcagagtccccaactataatctgtatattatggatgaggcccacttcacagatccctcaagtatagcagcaagaggatacatttcaacaagggttgagatgggcgaggcrgctgccatcttcatgaccgccacgccaccaggaacccgtgacgcatttccggactccaactcaccaattatggacaccgaagtggaagtcccagagagagcctggagctcaggctttgattgggtgacggatcattctggaaaaacagtttggtttgttccaagcgtgaggaacggcaatgagatcgcagcttgtctgacaaaggctggaaaacgggtcatacagctcagcagaaagacttttgagacagagttccagaaaacaaaacatcaagagtgggactttgtcgtgacaactgacatttcagagatgggcgccaactttaaagctgaccgtgtcatagattccaggagatgcctaaagccggtcatacttgatggcgagagagtcattctggctggacccatgcctgtcacacatgccagcgctgcccagaggagggggcgcataggcaggaatcctaacaaacctggagatgagtatctgtatggaggtgggtgcgcagagactgacgaagaccatgcacactggcttgaagcaagaatgctccttgacaatatttacctccaagatggcctcatagcctcgctctatcgacctgaggccgacaaagtagcagccattgagggagagttcaagcttaggacggagcaaaggaagacctttgtggaactcatgaaaagaggagatcttcctgtttggctggcctatcaggttgcatctgccggaataacctacacagatagaagatggtgctttgatggcacgaccaacaacaccataatggaagacagtgtgccggcagaggtgtggaccagacacggagagaaaagagtgctcaaaccgaggtggatggacgccagagtttgttcagatcatgcggccctgaagtcattcaaggagtttgccgctgggaaaagaggagcggcttttggagtgatggaagccctgggaacactgccaggacacatgacagagagattccaggaagccattgacaacctcgctgtgctcatgcgggcagagactggaagcaggccttacaaagccgcggcggcccaattgccggagaccctagagaccattatgctcttggggttgctgggaacagtctcgctgggaatctttttcgtcttgatgaggaacaagggcatagggaagatgggctttggaatggtgactcttggggccagcgcatggctcatgtggctctcggaaattgagccagccagaattgcatgtgtcctcattgttgtgttcctattgctggtggtgctcatacctgagccagaaaagcaaagatctccccaggacaaccaaatggcaatcatcatcatggtagcagtaggtcttctgggcttgattaccgccaatgaactcggatggttggagagaacaaagagtgacctaagccatctaatgggaaggagagaggagggggcaaccataggattctcaatggacattgacctgcggccagcctcagcttgggccatctatgctgccttgacaactttcattaccccagccgtccaacatgcagtgaccacttcatacaacaactactccttaatggcgatggccacgcaggctggagtgttgtttggtatgggcaaagggatgccattctacgcatgggactttggagtcccgctgctaatgataggttgctactcacaattaacacccctgaccctaatagtggccatcattttgctcgtggcgcactacatgtacttgatcccagggctgcaggcagcagctgcgcgtgctgcccagaagagaacggcagctggcatcatgaagaaccctgttgtggatggaatagtggtgactgacattgacacaatgacaattgacccccaagtggagaaaaagatgggacaggtgctactcatagcagtagccgtctccagcgccatactgtcgcggaccgcctgggggtggggggaggctggggctctgatcacagccgcaacctccactttgtgggaaggctctccgaacaagtactggaactcctctacagccacttcactgtgtaacatttttaggggaagttacttggctggagcttctctaatctacacagtaacaagaaacgctggcttggtcaagagacgtgggggtggaacaggagagaccctgggagagaaatggaaggcccgcttgaaccagatgtcggccctggagttctactcctacaaaaagtcaggcatcaccgaggtgtgcagagaagaggcccgccgcgccctcaaggacggtgtggcaacgggaggccatgctgtgtcccgaggaagtgcaaagctgagatggttggtggagcggggatacctgcagccctatggaaaggtcattgatcttggatgtggcagagggggctggagttactacgccgccaccatccgcaaagttcaagaagtgaaaggatacacaaaaggaggccctggtcatgaagaacccgtgttggtgcaaagctatgggtggaacatagtccgtcttaagagtggggtagacgtctttcatatggcggctgagccgtgtgacacgttgctgtgtgacataggtgagtcatcatctagtcctgaagtggaagaagcacggacgctcagagtcctctccatggtgggggattggcttgaaaaaagaccaggagccttttgtataaaagtgttgtgcccatacaccagcactatgatggaaaccctggagcgactgcagcgtaggtatgggggaggactggtcagagtgccactctcccgcaactctacacatgagatgtactgggtctctggagcgaaaagcaacaccataaaaagtgtgtccaccacgagccagctcctcttggggcgcatggacgggcctaggaggccagtgaaatatgaggaggatgtgaatctcggctctggcacgcgggctgtggtaagctgcgctgaagctcccaacatgaagatcattggtaaccgcattgaaaggatccgcagtgagcacgcggaaacgtggttctttgacgagaaccacccatataggacatgggcttaccatggaagctatgaggcccccacacaagggtcagcgtcctctctaataaacggggttgtcaggctcctgtcaaaaccctgggatgtggtgactggagtcacaggaatagccatgaccgacaccacaccgtatggtcagcaaagagttttcaaggaaaaagtggacactagggtgccagacccccaagaaggcactcgtcaggttatgagcatggtctcttcctggttgtggaaagagctaggcaaacacaagcggccacgagtctgtaccaaagaagagttcatcaacaaggttcgtagcaatgcagcattaggggcaatatttgaagaggaaaaagrktggaagactgcagtggaagctgtgaacgatccaaggttctgggctctagtggacaaggaaagagagcaccacctgagaggagagtgccagagttgtgtgtacaacatgatgggaaaaagagaaaagaaacaaggggaatttggaaaggccaagggcagtcgcgccatctggtatatgtggctaggggctagatttctagagttcgaagcccttggattcttgaacgaggatcactggatggggagagagaactcaggaggtggtgttgaagnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnacagggccttggcattggccataatcaagtacacataccaaaacaaagtggtaaaggtccttagaccagctgaaaaagggaaaacagttatggacattatttcgagacaagaccaaagggggagcggacaagttgtcacttacgctcttaacacatttaccaacctagtggtgcaactcattcggaatatggaggctgaggaagttctagagatgcaagacttgtggctgctgcggaggtcagagaaagtgaccaactggttgcagagcaacggatgggataggctcaaacgaatggcagtcagtggagatgattgcgttgtgaagccaattgatgataggtttgcacatgccctcaggttcttgaatgatatgggaaaagtcaggaaggacacacaagagtggaaaccctcaactggatgggacaactgggaagaagttccgttttgctcccaccacttcaacaagctccatctcaaggacgggaggtccattgtggttccctgccgccaccaagatgaactgattggccgggcccgcgtctctccaggggcgggatggagcatccgggagactgcttgcctagcaaaatcatatgcgcaaatgtggcagctcctttatttccacagaagggacctccgactgatggccaatgccatttgttcatctgtgccagttgactgggttccaactgggagaactacctggtcaatccatggaaagggagaatggatgaccactgaagacatgcttgtggtgtggaacagagtgtggattgaggagaacgaccacatggaagacaagaccccagttacgaaatggacagacattccctatttgggaaaaagggaagacttgtggtgtggatctctcatagggcacagaccgcgcaccacctgggctgagaacattaaaaacacagtcaacatggtgcgcaggatcataggtgatgaagaaaagtacatggactacctatccacccaagttcgctacttgggtgaagaagggtctacacctggagtgctgtaagcaccaatcttaatgttgtcaggc +>DOM/2016/BB_0183|zika|KY785420|2016-04-18|north_america|dominican_republic|dominican_republic|dominican_republic|genbank|genome|Metsky et al|https://www.ncbi.nlm.nih.gov/nuccore/KY785420|Zika virus evolution and spread in the Americas|Nature 546 (7658), 411-415 (2017)|https://www.ncbi.nlm.nih.gov/pubmed/28538734 +gtttgaagcgaaagctagcaacagtatcaacaggttttattttggatttggaaacgagagtttctggtcatgaaaaacccaaaaaagaaatccggaggattccggattgtcaatatgctaaaacgcggagtagcccgtgtgagcccctttgggggcttgaagaggctgccagccggacttctgctgggtcatgggcccatcaggatggtcttggcgattctagcctttttgagattcacggcaatcaagccatcactgggtctcatcaatagatggggttcagtggggaaaaaagaggctatggaaataataaagaagttcaagaaagatctggctgccatgctgagaataatcaatgctaggaaggagaagaagagacgaggcgcagatactagtgtcggaattgttggcctcctgctgaccacagctatggcagcggaggtcactagacgtgggagtgcatactacatgtacttggacagaaacgatgctggggaggccatatcttttccaaccacattggggatgaataagtgttatatacagatcatggatcttggacacatgtgtgatgccaccatgagctatgaatgccctatgctggatgagggggtggaaccagatgacgtcgattgttggtgcaacacgacgtcaacttgggttgtgtacggaacctgccatcacaaaaaaggtgaagcacggagatctagaagagctgtgacgctcccctcccattccactaggaagctgcaaacgcggtcgcaaacctggttggaatcaagagaatacacaaagcacttgattagagtcgaaaattggatattcaggaaccctggcttcgcgttagcagcagctgccatcgcttggcttttgggaagctcaacgagccaaaaagtcatatacttggtcatgatactgctgattgccccggcatacagcatcaggtgcataggagtcagcaatagggactttgtggaaggtatgtcaggtgggacttgggttgatgttgtcttggaacatggaggttgtgtcaccgtaatggcacaggacaaaccgactgtcgacatagagctggttacaacaacagtcagcaacatggcggaggtaagatcctactgctatgaggcatcaatatcagacatggcttcggacagccgctgcccaacacaaggtgaagcctaccttgacaagcaatcagacactcaatatgtctgcaaaagaacgttagtggacagaggctggggaaatggatgtggactttttggcaaagggagcctggtgacatgcgctaagtttgcatgctccaagaaaatgaccgggaagagcatccagccagagaatctggagtaccggataatgctgtcagttcatggctcccagcacagtgggatgatcgttaatgacacaggacatgaaactgatgagaatagagcgaaggttgagataacgcccaattcaccaagagccgaagccaccctggggggttttggaagcctaggacttgattgtgaaccgaggacaggccttgacttttcagatttgtattacttgactatgaataacaagcactggttggttcacaaggagtggttccacgacattccattaccttggcacgctggggcagacaccggaactccacactggaacaacaaagaagcactggtagagttcaaggacgcacatgccaaaaggcaaactgtcgtggttctagggagtcaagaaggagcagttcacacggcccttgctggagctctggaggctgagatggatggtgcaaagggaaggctgtcctctggccacttgaaatgtcgcctgaaaatggataaacttagattgaagggcgtgtcatactccttgtgtaccgcagcgttcacattcaccaagatcccggctgaaacactgcacgggacagtcacagtggaggtacagtacgcagggacagatggaccttgcaaggttccagctcagatggcggtggacatgcaaactctgaccccagttgggaggttgataaccgccaaccccgtaatcactgaaagcactgagaactctaagatgatgctggaacttgatccaccatttggggactcttacattgtcataggagtcggggagaagaagatcacccaccactggcacaggagtggcagcaccattggaaaagcatttgaagccactgtgagaggtgccaagagaatggcagtcttgggagacacagcctgggactttggatcagttggaggcgctctcaactcattgggcaagggcatccatcaaatttttggagcagctttcaaatcattgtttggaggaatgtcctggttctcacaaatcctcattggaacgttgctgatgtggttgggtctgaacacaaagaatggatctatttccctcatgtgcttggccttagggggagtgttgatcttcttatccacagccgtctctgctgatgtggggtgctcggtggacttctcaaagaaggagacgagatgcggtacaggggtgttcgtctataacgacgttgaagcctggagggacaggtacaagtaccatcctgactccccccgtagattggcagcagcagtcaagcaagcctgggaagatggtatctgcgggatctcctctgtttcaagaatggaaaacatcatgtggagatcagtagaaggggagctcaatgcaatcctggaagagaatggagtccaactgacggtcgttgtgggatctgtaaaaaaccccatgtggagaggtccacagagattgcccgtgcctgtgaacgagctgccccacggctggaaggcctgggggaaatcgtacttcgttagagcagcaaagacaaataacagctttgtcgtggatggtgacacactgaaggaatgcccactcaaacatagagcatggaacagctttcttgtggaggatcatgggttcggggtatttcacactagtgtctggctcaaggttagagaagattattcattagagtgtgatccagccgttattggaacagctgttaagggaaaggaggctgtacacagtgatctaggctactggattgagagtgagaagaatggcacatggaggctgaagagggcccatctgatcgagatgaaaacatgtgaatggccaaagtcccacacattgtggacagatggaatagaagagagtgatctgatcatacccaagtctttagctgggccactcagccatcacaataccagagagggctacaggacccaaatgaaagggccatggcacagtgaagagcttgaaattcggtttgaggaatgcccaggcactaaggtccacgtggaggaaacatgtggaacaagaggaccatctctgagatcaaccactgcaagcggaagggtgatcgaggaatggtgctgcagggagtgcacaatgcccccactgtcgttccgggctaaagatggctgttggtatggaatggagataaggcccaggaaagaaccagaaagcaacttagtaaggtcagtggtgactgcaggatcaactgatcacatggatcacttctcccttggagtgcttgtgattctgctcatggtgcaggaagggctgaagaagagaatgaccacaaagatcatcataagcacatcaatggcagtgctggtagctatgatcctgggaggattttcaatgagtgacctggctaagcttgcaattttgatgggtgccaccttcgcggaaatgaacactggaggagatgtagctcatctggcgctgatagcggcattcaaagtcagaccagcgttgctggtatctttcatcttcagagctaattggacaccccgtgaaagcatgctgctggccttggcctcgtgtcttttgcaaactgcgatctccgccttggaaggcgacctgatggttctcatcaatggttttgctttggcctggttggcaatacgagcgatggttgttccacgcactgataacatcaccatggcaatcctggctgctctgacaccactggcccggggcacactgcttgtggcgtggagagcaggccttgctacttgcggggggtttatgctcctctctctgaagggaaaaggcagtgtgaagaagaacttaccatttgtcatggccctgggactaaccgctgtgaggctggtcgaccccatcaacgtggtgggactgctgttgctcacaaggagtgggaagcggagctggccccctagcgaagtactcacagctgttggcctgatatgcgcattggctggagggttcgccaaggcagatatagagatggctgggcccatggccgcggtcggtctgctaattgtcagttacgtggtctcaggaaagagtgtggacatgtacattgaaagagcaggtgacatcacatgggaaaaagatgcggaagtcactggaaacagtccccggctcgatgtggcgctagatgagagtggtgatttctccctggtggaggatgacggtccccccatgagagagatcatactcaaggtggtcctgatgaccatctgtggcatgaacccaatagccataccctttgcagctggagcgtggtacgtatacgtgaagactggaaaaaggagtggtgctctatgggatgtgcctgctcccaaggaagtaaaaaagggggagaccacagatggagtgtacagagtaatgactcgtagactgctaggttcaacacaagttggagtgggagttatgcaagagggggtctttcacactatgtggcacgtcacaaaaggatccgcgctgagaagcggtgaagggagacttgatccatactggggagatgtcaagcaggatctggtgtcatactgtggtccatggaagctagatgccgcctgggacgggcacagcgaggtgcagctcttggccgtgccccccggagagagagcgaggaacatccagactctgcccggaatatttaagacaaaggatggggacattggagcggttgcgctggattacccagcaggaacttcaggatctccaatcctagacaagtgtgggagagtgataggactttatggcaatggggtcgtgatcaaaaatgggagttatgttagtgccatcacccaagggaggagggaggaagagactcctgttgagtgcttcgagccttcgatgctgaagaagaagcagctaactgtcttagacttgcatcctggagctgggaaaaccaggagagttcttcctgaaatagtccgtgaagctataaaaacaagactccgtactgtgatcttagctccaaccagggttgtcgctgctgaaatggaggaagcccttagagggcttccagtgcgttatatgacaacagcagtcaatgtcacccattctggaacagaaatcgtcgacttaatgtgccatgccaccttcacttcacgtctactacagccaatcagagtccccaactataatctgtatattatggatgaggcccacttcacagatccctcaagtatagcagcaagaggatacatttcaacaagggttgagatgggcgaggcggctgccatcttcatgaccgccacgccaccaggaacccgtgacgcatttccggactccaactcaccaattatggacaccgaagtggaagtcccagagagagcctggagctcaggctttgattgggtgacggatcattctggaaaaacagtttggtttgttccaagcgtgaggaacggcaatgagatcgcagcttgtctgacaaaggctggaaaacgggtcatacagctcagcagaaagacttttgagacagagttccagaaaacaaaacatcaagagtgggactttgtcgtgacaaccgacatttcagagatgggcgccaactttaaagctgaccgtgtcatagattccaggagatgcctaaagccggtcatacttgatggcgagagagtcattctggctggacccatgcctgtcacacatgccagcgctgcccagaggagggggcgcataggcaggaatcccaacaaacctggagatgagtatctgtatggaggtgggtgcgcagagactgacgaagaccatgcacactggcttgaagcaagaatgctccttgacaatatttacctccaagatggccttatagcctcgctctatcgacctgaggccgacaaagtagcagccattgagggagagttcaagcttaggacggagcaaaggaagacctttgtggaactcatgaaaagaggagatcttcctgtttggctggcctatcaggttgcatctgccggaataacttacacagatagaagatggtgctttgatggcacgaccaacaacaccataatggaagacagtgtgccggcagaggtgtggaccagacacggagagaaaagagtgctcaaaccgaggtggatggacgccagagtttgttcagatcatgcggccctgaagtcattcaaggagtttgccgctgggaaaagaggagcggcttttggagtgatggaagccctgggaacactgccaggacacatgacagagagattccaggaagccattgacaacctcgctgtgctcatgcgggcagagactggaagcaggccttacaaagccgcggcggcccaattgccggagaccctagagaccattatgcttttggggttgctgggaacagtctcgctgggaatctttttcgtcttgatgaggaacaagggcatagggaagatgggctttggaatggtgactcttggggccagcgcatggctcatgtggctctcggaaattgagccagccagaattgcatgtgtcctcattgttgtgttcctattgctggtggtgctcatacctgagccagaaaagcaaagatctccccaggacaaccaaatggcaatcatcatcatggtagcagtaggtcttctgggcttgatcaccgccaatgaactcggatggttggagagaacaaagagtgacctaagccatctaatgggaaggagagaggagggagcaaccataggattctcaatggacattgacctgcggccagcctcagcttgggccatctatgctgccttgacaactttcattaccccagccgtccaacatgcagtgaccacttcatacaacaactactccttaatggcgatggccacgcaagctggagtgttgtttggtatgggcaaagggatgccattctacgcatgggactttggagtcccgctgctaatgataggttgctactcacaattaacacccctgaccctaatagtggccatcattttgctcgtggcgcactacatgtacttgatcccagggctgcaggcagcagctgcgcgtgctgcccagaagagaacggcagctggcatcatgaagaaccctgttgtggatggaatagtggtgactgacattgacacaatgacaattgacccccaagtggagaaaaagatgggacaggtgctactcatagcagtagccgtctccagcgccatactgtcgcggaccgcctgggggtggggggaggctggggccctgatcacagccgcaacttccactttgtgggaaggctctccgaacaagtactggaactcctctacagccacttcactgtgtaacatttttaggggaagttacttggctggagcttctctaatctacacagtaacaagaaacgctggcttggtcaagagacgtgggggtggaacaggagagaccctgggagagaaatggaaggcccgcttgaaccagatgtcggccctggagttctactcctacaaaaagtcaggcatcaccgaggtgtgcagagaagaggcccgccgcgccctcaaggacggtgtggcaacgggaggccatgctgtgtcccgaggaagtgcaaagctgagatggttggtggagcggggatacctgcagccctatggaaaggtcattgatcttggatgtggcagagggggctggagttactacgccgccaccatccgcaaagttcaagaagtgaaaggatacacaaaaggaggccctggtcatgaagaacccgtgttggtgcaaagctatgggtggaacatagtccgtctcaagagtggggtggacgtctttcatatggcggctgagccgtgtgacacgttgctgtgtgacataggtgagtcatcatctagtcctgaagtggaagaagcacggacgctcagagtcctctccatggtgggggattggcttgaaaaaagaccaggagccttttgtataaaagtgttgtgcccatacaccagcactatgatggaaaccctggagcgactgcagcgtaggtatgggggaggactggtcagagtgccactctcccgcaactctacacatgagatgtactgggtctctggagcgaaaagcaacaccataaaaagtgtgtccaccacgagccagctcctcttggggcgcatggacgggcctaggaggccagtgaaatatgaggaggatgtgaatctcggctctggcacgcgggctgtggtaagctgcgctgaagctcccaacatgaagatcattggtaaccgcattgaaaggatccgcagtgagcacgcggaaacgtggttctttgacgagaaccacccatataggacatgggcttaccatggaagctatgaggcccccacacaagggtcagcatcctctctagtaaacggggttgtcaggctcctgtcaaaaccctgggatgtggtgactggagtcacaggaatagccatgaccgacaccacaccgtatggtcagcaaagagttttcaaggaaaaagtggacactagggtgccagacccccaagaaggcactcgtcaggttatgagcatggtctcttcctggttgtggaaagagctaggcaaacacaaacggccacgagtctgtaccaaagaagagttcatcaacaaggttcgtagcaatgcagcattaggggcaatatttgaagaggaaaaagagtggaagactgcagtggaagctgtgaacgatccaaggttctgggctctagtggacaaggaaagagagcaccacctgagaggagagtgccagagttgtgtgtacaacatgatgggaaaaagagaaaagaaacaaggggaatttggaaaggccaagggcagccgcgccatctggtatatgtggctaggggctagatttctagagttcgaagcccttggattcttgaacgaggatcactggatggggagagagaactcaggaggtggtgttgaagggctgggattacaaagactcggatatgtcctagaagagatgagtcgcataccaggaggaaggatgtatgcagatgacactgctggctgggatacccgcatcagcaggtttgatctagagaatgaagctctaatcaccaaccaaatggagaaagggcacagggccttggcattggccataatcaagtacacataccaaaacaaagtggtaaaggtccttagaccagctgaaaaagggaaaacagttatggacattatttcgagacaagaccaaagggggagcggacaagttgtcacttacgctcttaacacatttaccaacctagtggtgcaactcattcggaatatggaggctgaggaagttctagagatgcaagacttgtggctgctgcggaggtcagagaaagtgaccaactggttgcggagcaacggatgggataggctcaaacgaatggcagtcagtggagatgattgcgttgtgaagccaattgatgataggtttgcacatgccctcaggttcttgaatgatatgggaaaagttaggaaggacacacaagagtggaaaccctcaactggatgggacaactgggaagaagttccgttttgctcccaccacttcaacaagctccatctcaaggacgggaggtccattgtggttccctgccgccaccaagatgaactgattggccgggcccgcgtctctccaggggcgggatggagcatccgggagactgcttgcctagcaaaatcatatgcgcaaatgtggcagctcctttatttccacagaagggacctccgactgatggccaatgccatttgttcatctgtgccagttgactgggttccaactgggagaactacctggtcaatccatggaaagggagaatggatgaccactgaagacatgcttgtggtgtggaacagagtgtggattgaggagaacgaccacatggaagacaagaccccagttacgaaatggacagacattccctatttgggaaaaagggaagacttgtggtgtggatctctcatagggcacagaccgcgcaccacctgggctgagaacattaaaaacacagtcaacatggtgcgcaggatcataggtgaggaagaaaagtacatggactacctatccacccaagttcgctacttgggtgaagaagggtctacacctggagtgctgtaagcaccaatcttaatgttgtcaggcctgctagtcagccacagcttggggaaagctgtgcagcctgtgacccccccaggagaagctgggaaaccaagcctatagtcaggccgagaacgccatggcacggaagaagccatgctgcctgtgagcccctcagaggacactgagtcaaaaaaccccacgcgcttggaggcgcaggatgggaaaagaaggtggcgaccttccccacccttcaatctggggcctgaactggagatcagctgtggatccccagaagagg +>EcEs062_16|zika|KX879603|2016-04-XX|south_america|ecuador|ecuador|ecuador|genbank|genome|Marquez et al|https://www.ncbi.nlm.nih.gov/nuccore/KX879603|First Complete Genome Sequences of Zika Virus Isolated from Febrile Patient Sera in Ecuador|Genome Announc 5 (8), e01673-16 (2017)|https://www.ncbi.nlm.nih.gov/pubmed/28232448 +agtagttgatctgtgtgaatcagactgcgacagttcgagtttgaagcgaaagctagcaacagtatcaacaggttttattttggatttggaaacgagagtttctggtcatgaaaaacccaaaaaagaaatccggaggattccggattgtcaatatgctaaaacgcggagtagcccgtgtgagcccctttgggggcttgaagaggctgccagccggacttctgctgggtcatgggcccatcaggatggtcttggcgattctagcctttttgagattcacggcaatcaagccatcactgggtctcatcaatagatggggttcagtggggaaaaaagaggctatggaaataataaagaagttcaagaaagatctggctgccatgctgagaataatcaatgctaggaaggagaagaagagacgaggcgcagatactagtgtcggaattgttggcctcctgctgaccacagctatggcagcggaggtcactagacgtgggagtgcatactatatgtacttggacagaaacgatgctggggaggccatatcttttccaaccacattggggatgaataagtgttatatacagatcatggatcttggacacatgtgtgatgccaccatgagctatgaatgccctatgctggatgagggggtggaaccagatgacgtcgattgttggtgcaacacgacgtcaacttgggttgtgtacggaacctgccatcacaaaaaaggtgaagcacggagatctagaagagctgtgacgctcccctcccattccactaggaagctgcaaacgcggtcgcaaacctggttggaatcaagagaatacacaaagcacttgattagagtcgaaaattggatattcaggaaccctggcttcgcgttagcagcagctgccatcgcttggcttttgggaagctcaacgagccaaaaagtcatatacttggtcatgatactgctgattgccccggcatacagcatcaggtgcataggagtcagcaatagggactttgtggaaggtatgtcaggtgggacttgggttgatgttgtcttggaacatggaggttgtgtcaccgtaatggcacaggacaaaccgactgtcgacatagagctggttacaacaacagtcagcaacatggcggaggtaagatcctactgctatgaggcatcaatatcagacatggcttcggacagccgctgcccaacacaaggtgaagcctaccttgacaagcaatcagacactcaatatgtctgcaaaagaacgttagtggacagaggctggggaaatggatgcggactttttggcaaagggagcctggtgacatgcgctaagtttgcatgctccaagaaaatgaccgggaagagcatccagccagagaatctggagtaccggataatgctgtcagttcatggctcccagcacagtgggatgatcgttaatgacacaggacatgaaactgatgagaatagagcgaaggttgagataacgcccaattcaccaagagccgaagccaccctggggggttttggaagcctaggacttgattgtgaaccgaggacaggccttgacttttcagatttgtattacttgactatgaataacaagcactggttggttcacaaggagtggttccacgacattccattaccttggcacgctggggcagacaccggaactccacactggaacaacaaagaagcactggtagagttcaaggacgcacatgccaaaaggcaaactgtcgtggttctagggagtcaagaaggagcagttcacacggcccttgctggagctctggaggctgagatggatggtgcaaagggaaggctgtcctctggccacttgaaatgtcgcctgaaaatggataaacttagactgaagggcgtgtcatactccttgtgtaccgcagcgttcacattcaccaagatcccggctgaaacactgcacgggacagtcacagtggaggtacagtacgcagggacagatggaccttgcaaggttccagctcagatggcggtggacatgcaaactctgaccccagttgggaggttgataaccgctaaccccgtaatcactgaaagcactgagaactctaagatgatgctggaacttgatccaccatttggggactcttacattgtcataggagtcggggagaagaagatcacccaccactggcacaggagtggcagcaccattggaaaagcatttgaagccactgtgagaggtgccaagagaatggcagtcttgggagacacagcctgggactttggatcagttggaggcgctctcaactcattgggcaagggcatccatcaaatttttggagcagctttcaaatcattgtttggaggaatgtcctggttctcacaaattctcattggaacgttgctgatgtggttgggtctgaacacaaagaatggatccatttcccttatgtgcttggccttagggggagtgttgatcttcttatccacagccgtctctgctgatgtggggtgctcggtggacttctcaaagaaggagacgagatgcggtacaggggtgttcgtctataacgacgttgaagcctggagggacaggtacaagtaccatcctgactccccccgtagattggcagcagcagtcaagcaagcctgggaagatggtatctgcgggatctcctctgtttcaagaatggaaaacatcatgtggagatcagtagaaggggagctcaacgcaatcctggaagagaatggagttcaactgacggtcgttgtgggatctgtaaagaaccccatgtggagaggtccacagagattgcccgtgcctgtgaacgagctgccccacggctggaaggcttgggggaaatcgtacttcgtcagagcagcaaagacaaataacagctttgtcgtggatggtgacacactgaaggaatgcccactcaaacatagagcatggaacagctttcttgtggaggatcatgggttcggggtatttcacactagtgtctggctcaaggttagagaagattattcattagagtgtgatccagccgttattggaacagctgttaagggaaaggaggctgtacacagtgatctaggctactggattgagagtgagaagaatgacacatggaggctgaagagggcccatctgatcgagatgaaaacatgtgaatggccaaagtcccacacattgtggacagatggaatagaagagagtgatctgatcatacccaagtctttagctgggccactcagccatcacaataccagagagggctacaggacccaaatgaaagggccatggcacagtgaagagcttgaaattcggtttgaggaatgcccaggcactaaggtccacgtggaggaaacatgtggaacaagaggaccatctctgagatcaaccactgcaagcggaagggtgatcgaggaatggtgctgcagggagtgcacaatgcccccactgtcgttccgggctaaagatggctgttggtatggaatggagataaggcccaggaaagaaccagaaagcaacttagtaaggtcagtggtgactgcaggatcaactgatcacatggatcacttctcccttggagtgcttgtgattctgctcatggtgcaggaagggctgaagaagagaatgaccacaaagatcatcataagcacatcaatggcagtgctggtagctatgatcctgggaggattttcaatgagtgacctggctaagcttgcaattttgatgggtgccaccttcgcggaaatgaacactggaggagatgtagctcatctggcgctgatagcggcattcaaggtcagaccagcgttgctggtatctttcatcttcagagctaattggacaccccgtgaaagcatgctgctggccttggcctcgtgtcttttgcaaactgcgatctccgccttggaaggcgacctgatggttctcatcaatggttttgctttggcctggttggcaatacgagcgatggttgttccacgcactgataacatcaccttggcaatcctggctgctctgacaccactggcccggggcacactgcttgtggcgtggagagcaggccttgctacttgcggggggtttatgctcctctctctgaagggaaaaggcagtgtgaagaagaacttaccatttgtcatggccctgggactaaccgctgtgaggctggtcgaccccatcaacgtggtgggactgctgttgctcacaaggagtgggaagcggagctggccccctagcgaagtactcacagctgttggcctgatatgcgcattggctggagggttcgccaaggcagatatagagatggctgggcccatggccgcggtcggtctgctaattgtcagttacgtggtctcaggaaagagtgtggacatgtacattgaaagagcaggtgacatcacatgggaaaaagatgcggaagtcactggaaacagtccccggctcgatgtggcgctagatgagagtggtgatttctccctggtggaggatgacggtccccccatgagagagatcatactcaaggtggtcctgatgaccatctgtggcatgaacccaatagccataccctttgcagctggagcgtggtacgtatacgtgaagactggaaaaaggagtggtgctctatgggatgtgcctgctcccaaggaagtaaaaaagggggagaccacagatggagtgtacagagtaatgactcgtagactgctaggttcaacacaagttggagtgggagttatgcaagagggggtctttcacactatgtggcacgtcacaaaaggatccgcgctgagaagcggtgaagggagacttgatccatactggggagatgtcaagcaggatctggtgtcatactgtggtccatggaagctagatgccgcctgggacgggcacagcgaggtgcagctcttggccgtgccccccggagagagagcgaggaacatccagactctgcccggaatatttaagacaaaggatggggacattggagctgttgcgctggattacccagcaggaacttcaggatctccaatcctagacaagtgtgggagagtgataggactttatggcaatggggtcgtgatcaaaaatgggagttatgttagtgccatcacccaagggaggagggaggaagagactcctgttgagtgcttcgagccttcgatgctgaagaagaagcagctaactgtcttagacttgcatcctggagctgggaaaaccaggagagttcttcctgaaatagtccgtgaagccataaaaacaagactccgtactgtgatcttagctccaaccagggttgtcgctgctgaaatggaggaagcccttagagggcttccagtgcgttatatgacaacagcagtcaatgtcacccactctggaacagaaatcgtcgacttaatgtgccatgccaccttcacttcacgtctactacagccaatcagagtccccaactataatctgtatattatggatgaggcccacttcacagatccctcaagtatagcagcaagaggatacatttcaacaagggttgagatgggcgaggcggctgctatcttcatgaccgccacgccaccaggaacccgtgacgcatttccggactccaactcaccaattatggacaccgaagtggaagtcccagagagagcctggagctcaggctttgattgggtgacggatcattctggaaaaacagtttggtttgttccaagcgtgaggaacggcaatgagatcgcagcttgtctgacaaaggctggaaaacgggtcatacaactcagcagaaagacttttgagacagagttccagaaaacaaaacatcaagagtgggactttgtcgtgacaactgacatttcagagatgggcgccaactttaaagctgaccgtgtcatagattccaggagatgcctaaagccggtcatacttgatggcgagagagtcattctggctggacccatgcctgtcacacatgccagcgctgcccagaggagggggcgcataggcaggaatcccaacaaacctggagatgagtatctgtatggaggtgggtgcgcagagactgacgaagaccatgcacactggcttgaagcaagaatgctccttgacaatatttacctccaagatggcctcatagcctcgctctatcgacctgaggccgacaaagtagcagccattgagggagagttcaagcttaggacggagcaaaggaagacctttgtggaactcatgaaaagaggagatcttcctgtttggctggcctatcaggttgcatctgccggaataacctacacagatagaagatggtgctttgatggcacgaccaacaacaccataatggaagacagtgtgccggcagaggtgtggaccagacacggagagaaaagagtgctcaaaccgaggtggatggacgccagagtttgttcagatcatgcggccctgaagtcattcaaggagtttgccgctgggaaaagaggagccgcttttggagtgatggaagccctgggaacactgccaggacacatgacagagagattccaggaagccattgacaacctcgctgtgctcatgcgggcagagactggaagcaggccttacaaagccgcggcggcccaattgccggagaccctagagaccattatgcttttggggttgctgggaacagtctcgctgggaatctttttcgtcttgatgaggaacaagggcatagggaagatgggctttggaatggtgactcttggggccagcgcatggctcatgtggctctcggaaattgagccagccagaattgcatgtgtcctcattgttgtgttcctattgctggtggtgctcatacctgagccagaaaagcaaagatctccccaggacaaccaaatggcaatcatcatcatggtagcagtgggtcttctgggcttgattaccgccaatgaactcggatggttggagagaacaaagagtgacctaagccatctaatgggaaggagagaggagggagcaaccataggattctcaatggacattgacctgcggccagcctcagcttgggccatctatgctgccttgacaactttcattaccccagccgtccaacatgcagtgaccacttcatacaacaactactccttaatggcgatggccacgcaagctggagtgttgtttggtatgggcaaagggatgccattctacgcatgggactttggagtcccgctgctaatgataggttgctactcacaattaacacccctgaccctaatagtggccatcattttgctcgtggcgcactacatgtacttgatcccagggctgcaggcagcagctgcgcgtgctgcccagaagagaacggcagctggcatcatgaagaaccctgttgtggatggaatagtggtgactgacattgacacaatgacaattgacccccaagtggagaaaaagatgggacaggtgctactcatagcagtagccgtctccagcgccatactgtcgcggaccgcctgggggtggggggaggctggggccctgatcacagccgcaacttccactttgtgggaaggctctccgaacaagtactggaactcctctacagccacttcactgtgtaacatttttaggggaagttacttggctggagcttctctaatctacacagtaacaagaaacgctggcttggtcaagagacgtgggggtggaacaggagagaccctgggagagaaatggaaggcccgcttgaaccagatgtcggccctggagttctactcctacaaaaagtcaggcatcaccgaggtgtgcagagaagaggcccgccgcgccctcaaggacggtgtggcaacgggaggccatgctgtgtcccgaggaagtgcaaagctgagatggttggtggagcggggatacctgcagccctatggaaaggtcattgatcttggatgtggcagagggggctggagttactacgccgccaccatccgcaaagttcaagaagtgaaaggatacacaaaaggaggccctggtcatgaagaacccgtgttggtgcaaagctatgggtggaacatagtccgtcttaagagtggggtggacgtctttcatatggcggctgagccgtgtgacacgttgctgtgtgacataggtgagtcatcatctagtcctgaagtggaagaagcacggacgctcagagtcctctccatggtgggggattggcttgaaaaaagaccaggagccttttgtataaaagtgttgtgcccatacaccagcactatgatggaaacactggagcgactgcagcgtaggtatgggggaggactggtcagagtgccactctcccgcaactctacacatgagatgtactgggtctctggagcgaaaagcaacaccataaaaagtgtgtccaccacgagccagctcctcttggggcgcatggacgggcctaggaggccagtgaaatatgaggaggatgtgaatctcggctctggcacgcgggctgtggtaagctgcgctgaagctcccaacatgaagatcattggtaaccgcattgaaaggatccgcagtgagcacgcggaaacgtggttctttgacgagaaccacccatacaggacatgggcttaccatggaagctatgaggcccccacacaagggtcagcgtcctctctaataaacggggttgtcaggctcctgtcaaaaccctgggatgtggtgactggagtcacaggaatagccatgaccgacaccacaccgtatggtcagcaaagagttttcaaggaaaaagtggacactagggtgccagacccccaagaaggcactcgtcaggttatgagcatggtctcttcctggttgtggaaagagctaggcaaacacaaacggccacgagtctgtaccaaagaagagtttatcaacaaggttcgtagcaatgcagcattaggggcaatatttgaagaggaaaaagagtggaagactgcagtggaagctgtgaacgatccaaggttctgggctctagtggacaaggaaagagagcaccacctgagaggagagtgccagagttgtgtgtacaacatgatgggaaaaagagaaaagaaacaaggggaatttggaaaggccaagggcagccgcgccatctggtatatgtggctaggggctagatttctagagttcgaagcccttggattcttgaacgaggatcactggatggggagagagaactcaggaggtggtgttgaagggctgggattacaaagactcggatatgtcctagaagagatgagtcgcataccaggaggaaggatgtatgcagatgacactgctggctgggacacccgcatcagcaggtttgatctggagaatgaagctctaatcaccaaccaaatggagaaagggcacagggccttggcattggccataatcaagtacacataccaaaacaaagtggtaaaggtccttagaccagctgaaaaagggaaaacggttatggacattatttcgagacaagaccaaagggggagcggacaagttgtcacttacgctcttaacacatttaccaacctagtggtgcaactcattcggaatatggaggctgaggaagttctagagatgcaagacttgtggctgctgcggaggtcagagaaagtgaccaactggttgcagagcaacggatgggataggctcaaacgaatggcagtcagtggagatgattgcgttgtgaagccaattgatgataggtttgcacatgccctcaggttcttgaatgatatgggaaaagttaggaaggacacacaagagtggaaaccctcaactggatgggacaactgggaagaagttccgttttgctcccaccacttcaacaagctccatctcaaggacgggaggtccattgtggttccctgccgccaccaagatgaactgattggccgggcccgcgtctctccaggggcgggatggagcatccgggagactgcttgcctagcaaaatcatatgctcaaatgtggcagctcctttatttccacagaagggacctccgactgatggccaatgccatttgttcatctgtgccagttgactgggttccaactgggagaactacctggtcaatccatggaaagggagaatggatgaccactgaagacatgcttgtggtgtggaatagagtgtggattgaggagaacgaccacatggaagacaagaccccagttacgaaatggacagacattccctatttgggaaaaagggaagacttgtggtgtggatctctcatagggcacagaccgcgcaccacctgggctgagaacattaaaaacacagtcaacatggtgcgcaggatcataggtgatgaagaaaagtacatggactacctatccacccaagttcgctacttgggtgaagaagggtctacgcctggagtgctgtaagcaccaatcctaatgttgtcaggcctgctagtcagccacagcttggggaaagctgtgcagcctgtgacccccccaggagaagctgggaaaccaagcctatagtcaggccgagaacgccatggcacggaagaagccatgctgcctgtgagcccctcagaggacactgagtcaaaaaaccccacgcgcttggaggcgcaggatgggaaaagaaggtggcgaccttccccacccttcaatctggggcctgaactggagatcagctgtggatctccagaagagggactagtggttagaggagaccccccggaaaacgcaaaacagcatattgacgctgggaaagaccagagactccatgagtttccaccacgctggccgccaggcacagatcgccgaatagcggcggccggtgtggggaaatccatgggagatcgga +>HND/2016/HU_ME59|zika|KY785418|2016-05-13|north_america|honduras|honduras|honduras|genbank|genome|Metsky et al|https://www.ncbi.nlm.nih.gov/nuccore/KY785418|Zika virus evolution and spread in the Americas|Nature 546 (7658), 411-415 (2017)|https://www.ncbi.nlm.nih.gov/pubmed/28538734 +gtttgaagcgaaagctagcaacagtatcaacaggttttattttggatttggaaacgagagtttctggtcatgaaaaacccaaaaaagaaatccggaggattccggattgtcaatatgctaaaacgcggagtagcccgtgtgagcccctttgggggcttgaagaggctgccagccggacttctgctgggtcatgggcccatcaggatggtcttggcgattctagcctttttgagattcacggcaatcaagccatcactgggtctcatcaatagatggggttcagtggggaaaaaagaggctatggaaataataaagaagttcaagaaagatctggctgccatgctgagaataatcaatgctaggaaggagaagaagagacgaggcgcagatactagtgtcggaattgttggcctcctgctgaccacagctatggcagcggaggtcactagacgtgggagtgcatactatatgtacttggacagaaacgatgctggggaggccatatcttttccaaccacattggggatgaataagtgttatatacagatcatggatcttggacacatgtgtgatgccaccatgagctatgaatgccctatgctggatgagggggtggaaccagatgacgtcgattgttggtgcaacacgacgtcaacttgggttgtgtacggaacctgccatcacaaaaaaggtgaagcacggagatctagaagagctgtgacgctcccctcccattccactaggaagctgcaaacgcggtcgcaaacctggttggaatcaagagaatacacaaagcacttgattagagtcgaaaattggatattcaggaaccctggcttcgcgttagcagcagctgccatcgcttggcttttgggaagctcaacgagccaaaaagtcatatacttggtcatgatactgctgattgccccggcatacagcatcaggtgcataggagtcagcaatagggactttgtggaaggtatgtcaggtgggacttgggttgatgttgtcttggaacatggaggttgtgtcaccgtaatggcacaggacaaaccgactgtcgacatagagctggttacaacaacagtcagcaacatggcggaggtaagatcctactgctatgaggcatcaatatcagacatggcttcggacagccgctgcccaacacaaggtgaagcctaccttgacaagcaatcagacactcaataygtctgcaaaagaacgttagtggacagaggctggggaaatggatgtggactttttggcaaagggagcctggtgacatgcgctaagtttgcatgctccaagaaaatgaccgggaagagcatccagccagagaatctggagtaccggataatgctgtcagttcatggctcccagcacagtgggatgatcgttaatgacacaggacatgaaactgatgagaatagagcgaaggttgagataacgcccawttcaccaagagccgaagccaccctggggggttttggaagcctaggacttgattgtgaaccgaggacaggccttgacttttcagatttgtattacttgactatgaataacaagcactggttggttcacaaggagtggttccacgacattccattaccttggcacgctggggcagacaccggaactccacactggaacaacaaagaagcactggtagagttcaaggacgcacatgccaaaaggcaaactgtcgtggttctagggagtcaagaaggagcagttcacacggcccttgctggagctctggaggctgagatggatggtgcaaagggaaggctgtcctctggccacttgaaatgtcgcctgaaaatggataaacttagattgaagggcgtgtcatactccttgtgtaccgcagcgttcacattcaccaagatcccggctgaaacactgcacgggacagtcacagtggaggtacagtacgcagggacagatggaccttgcaaggttccagctcagatggcggtggacatgcaaactctgaccccagttgggaggttgataaccgctaaccccgtaatcactgaaagcactgagaactctaagatgatgctggaacttgatccaccatttggggactcttacattgtcataggagtcggggagaagaagatcacccaccactggcacaggagtggcagcaccattggaaaagcatttgaagccactgtgagaggtgccaagagaatggcagtcttgggagacacagcctgggactttggatcagttggaggcgctctcaactcattgggcaagggcatccatcaaatttttggagcagctttcaaatcattgtttggaggaatgtcctggttctcacaaattctcattggaacgttgctgatgtggttgggtctgaacacaaagaatggatctatttcccttatgtgcttggccttagggggagtgttgatcttcttatccacagccgtctctgctgatgtggggtgctcggtggacttctcaaagaaggagacgagatgcggtacaggggtgttcgtctataacgacgttgaagcctggagggacaggtacaagtaccatcctgactccccccgtagattggcagcagcagtcaagcaagcctgggaagatggtatctgcgggatctcctctgtttcaagaatggaaaacatcatgtggagatcagtagaaggggagctcaacgcaatcctggaagagaatggagttcaactgacggtcgttgtgggatctgtaaaaaaccccatgtggagagctccacagagattgcccgtgcctgtgaacgagctgccccacggctggaaggcttgggggaaatcgtacttcgtcagagcagcaaagacaaataacagctttgtcgtggatggtgacacactgaaggaatgcccactcaaacatagagcatggaacagctttcttgtggaggatcatgggttcggggtatttcacactagtgtctggctcaaggttagagaagattattcattagagtgtgatccagccgttattggaacagctgttaagggaaaggaggctgtacacagtgatctaggctactggattgagagtgagaagaatgacacatggaggctgaagagggcccatctgatcgagatgaaaacatgtgaatggccaaagtcccacacattgtggacagatggaatagaagagagtgatctgatcatacccaagtctttagctgggccactcagccatcacaataccagagagggctacaggacccaaatgaaagggccatggcacagtgaagagcttgaaattcggtttgaggaatgcccaggcactaaggtccacgtggaggaaacatgtggaacaagaggaccatctctgagatcaaccactgcaagcggaagggtgatcgaggaatggtgctgcagggagtgcacaatgcccccactgtcgttccgggctaaagatggctgttggtatggaatggagataaggcccaggaaagaaccagaaagcaacttagtaaggtcaatggtgactgcaggatcaactgatcacatggatcacttctcccttggagtgcttgtgattctgctcatggtgcaggaagggctaaagaagagaatgaccacaaagatcatcataagcacatcaatggcagtgctggtagctatgatcctgggaggattttcaatgagtgacctggctaagcttgcaattttgatgggtgccaccttcgcggaaatgaacactggaggagatgtagctcatctggcgctgatagcggcattcaaagtcagaccagcgttgctggtatctttcatcttcagagctaattggacaccccgtgaaagcatgctactggccttggcctcgtgtcttttgcaaactgcgatctccgccttggaaggcgacctgatggttctcatcaatggttttgctttggcctggttggcaatacgagcgatggttgttccacgcactgataacatcaccttggcaatcctggctgctctgacaccactggcccggggcacactgcttgtggcgtggagagcaggccttgctacttgcggggggtttatgctcctctctctgaagggaaaaggcagtgtgaagaagaacttaccatttgtcatggccctgggactaaccgctgtgaggctggtcgaccccatcaacgtggtgggactgctgttgctcacaaggagtgggaagcggagctggccccctagcgaagtactcacagctgttggcctgatatgcgcattggctggagggttcgccaaggcagatatagagatggctgggcccatggccgcggtcggtctgctaattgtcagttacgtggtctcaggaaagagtgtggacatgtacattgaaagagcaggtgacatcacatgggaaaaagatgcggaagtcactggaaacagtccccggctcgatgtggcgctagatgagagtggtgatttctccctggtggaggatgacggtccccccatgagagagatcatactcaaggtggtcctgatgaccatctgtggcatgaacccaatagccataccctttgcagctggagcgtggtacgtatacgtgaagactggaaaaaggagtggtgctctatgggatgtgcctgctcccaaggaagtaaaaaagggggagaccacagatggagtgtacagagtaatgactcgtagactgctaggttcaacacaagttggagtgggagtcatgcaagagggggtctttcacactatgtggcacgtcacaaaaggatccgcactgagaagcggtgaagggagacttgatccatactggggagatgtcaagcaggatctggtgtcatactgtggtccatggaagctagatgccgcctgggacgggcacagcgaggtgcagctcctggccgtgccccccggagagagagcgaggaacatccagactctgcccggaatatttaagacaaaggatggggacattggagcggttgcgctggattacccagcaggaacttcaggatctccaatcctagacaagtgtgggagagtgataggactttatggcaatggggtcgtgatcaaaaatgggagttatgttagtgccatcacccaagggaggagggaggaagagactcctgttgagtgcttcgagccttcgatgctgaagaagaagcagctaactgtcttagacttacatcctggagctgggaaaaccaggagagttcttcctgaaatagtccgtgaagccataaaaacaagactccgtactgtgatcttagctccaaccagggttgtcgctgctgaaatggaggaggcccttagagggcttccagtgcgttatatgacaacagcagtcaatgtcacccactctggaacagaaatcgtcgacttaatgtgccatgccaccttcacttcacgtctactacagccaatcagagtccccaactataatctgtatattatggatgaggcccacttcacagatccctcaagtatagcagcaagaggatacatttcaacaagggttgagatgggcgaggcggctgccatcttcatgaccgccacgccaccaggaacccgtgacgcatttccggactccaactcaccaattatggacaccgaagtggaagtcccagagagagcctggagctcaggctttgattgggtgacggatcattctggaaaaacagtttggtttgttccaagcgtgaggaacggcaatgagatcgcagcttgtctgacaaaggctggaaaacgggtcatacagctcagcagaaagacttttgagacagagttccagaaaacaaaacatcaagagtgggactttgtcgtgacaactgacatttcagagatgggcgccaactttaaagctgaccgtgtcatagattccaggagrtgcctaaagccggtcatacttgatggcgagagagtcattctggctggacccatgcctgtcacacatgccagcgctgcccagaggagggggcgcataggcaggaatcccaacaaacctggagatgagtatctgtatggaggtgggtgcgcagagactgacgaagaccatgcacactggcttgaagcaagaatgctccttgacaatatttacctccaagatggcctcatagcctcgctctatcgacctgaggccgacaaagtagcagccattgagggagagttcaagcttaggacggagcaaaggaagacctttgtggaactcatgaaaagaggagatcttcctgtttggctggcctatcaggttgcatctgccggaataacctacacagatagaagatggtgctttgatggcacgaccaacaacaccatactggaagacagtgtgccggcagaggtgtggaccagacacggagagaaaagagtgctcaaaccgaggtggatggacgccagagtttgttcagatcatgcggccctgaagtcattcaaggagtttgccgctgggaaaagaggagcggcttttggagtgatggaagccctgggaacactgccaggacacatgacagagagattccaggaagccattgacaacctcgctgtgctcatgcgggcagagactggaagcaggccttacaaagccgcggcggcccaattgccggagaccctagagaccattatgcttttggggttgctgggaacagtctcgctgggaatctttttcgtcttgatgaggaacaagggcatagggaagatgggctttggaatggtgacccttggggccagtgcatggctcatgtggctctcggaaattgagccagccagaattgcatgtgtcctcattgttgtgttcctattgctggtggtgctcatacctgagccagaaaagcaaagatctccccaggacaaccaaatggcaatcatcatcatggtagcagtaggtcttctgggcttgattaccgccaatgaactcggatggttggagagaacaaagagtgacctaagccatctgatgggaaggagagaggagggggcaaccataggattctcaatggacattgacctgcggccagcctcagcttgggccatctatgctgccttgacaactttcattaccccagccgtccaacatgcagtgaccacttcatacaacaactactccttaatggcgatggccacgcaagctggagtgttgtttggtatgggcaaagggatgccattctacgcatgggactttggagtcccgctgctaatgataggttgctactcacaattaacacccctgaccctaatagtggccatcattttgctcgtggcgcactacatgtacttgatcccagggctgcaggcagcagctgcgcgtgctgcccagaagagaacggcagctggcatcatgaagaaccctgttgtggatggaatagtggtgactgacattgacacaatgacaattgacccccaagtggagaaaaagatgggacaggtgctactcatagcagtagccgtctccagcgccatactgtcgcggaccgcctgggggtggggggaggctggggccctgatcacagccgcaacttccactttgtgggaaggctctccgaacaagtactggaactcctctacagccacttcactgtgtaacatttttaggggaagttacttggctggagcttctctaatctacacagtaacaagaaacgctggcttggtcaagagacgtgggggtggaacaggagagaccctgggagagaaatggaaggcccgcttgaaccagatgtcggccctggagttctactcctacaaaaagtcaggcatcaccgaggtgtgcagagaagaggcccgccgcgccctcaaggacggtgtggcaacgggaggccatgctgtgtcccgaggaagtgcaaagctgagatggttggtggagcggggatacctgcagccctatggaaaggtcattgatcttggatgtggcagagggggctggagttactacgccgccaccatccgcaaagttcaagaagtgaaaggatacacaaaaggaggccctggtcatgaagaacccgtgttggtgcaaagctatgggtggaacatagtccgtcttaagagtggggtggacgtctttcatatggcggctgagccgtgtgacacgttgctgtgtgacataggtgagtcatcatctagtcctgaagtggaagaagcacggacgctcagagtcctctccatggtgggggattggcttgaaaaaagaccaggagccttttgtataaaagtgttgtgcccatacaccagcactatgatggaaaccctggagcgactgcagcgtaggtatgggggaggactggtcagagtgccactctcccgcaactctacacatgagatgtactgggtctctggagcgaaaagcaacaccataaaaagtgtgtccaccacgagccagctcctcttggggcgcatggacgggcctaggaggccagtgaaatatgaggaggatgtgaatctcggctctggcacgcgggctgtggtaagctgcgctgaagctcccaacatgaagatcattggtaaccgcattgaaaggatccgcagtgagcacgcggaaacgtggttctttgacgagaaccacccatataggacatgggcttaccatggaagctatgaggcccccacacaagggtcagcgtcctctctaataaacggggttgtcaggctcctgtcaaaaccctgggatgtggtgactggagtcacaggaatagccatgaccgacaccacaccgtatggtcagcaaagagttttcaaggaaaaagtggacactagggtgccagacccccaagaaggcactcgtcaggttatgagcatggtctcttcctggttgtggaaagagctaggcaaacacaaacggccacgagtctgtaccaaagaagagttcatcaacaaggttcgtagcaatgcagcattaggggcaatatttgaagaggaaaaagagtggaagactgcagtggaagctgtgaacgatccaaggttctgggctctagtggacaaggaaagagagcaccacctgagaggagagtgccagagttgtgtgtacaacatgatgggaaaaagagaaaagaaacaaggggaatttggaaaggccaagggcagccgcgccatctggtatatgtggctaggggctagatttctagagttcgaagcccttggattcttgaacgaggatcactggatggggagagagaactcaggaggtggtgttgaagggctgggattacaaagactcggatatgtcctagaagagatgagttgcataccaggaggaaggatgtatgcagatgacactgctggctgggacacccgcatcagcaggtttgatctggagaatgaagctctaatcaccaaccaaatggagaaagggcacagggccttggcattggccataatcaagtacacataccaaaacaaagtggtaaaggtccttagaccagctgaaaaagggaaaacagttatggacattatttcgagacaagaccaaagggggagcggacaagttgtcacttacgctcttaacacatttaccaacctagtggtgcaactcatccggaatatggaggctgaggaagttctagagatgcaagacttgtggctgctgcggaggtcagagaaagtgaccaactggttgcagagcaacggatgggataggctcaaacgaatggcagtcagtggagatgattgcgttgtgaagccaattgatgataggtttgcacatgccctcaggttcttgaatgatatgggaaaagttaggaaggacacacaagagtggaaaccctcaactggatgggacaactgggaagaagttccgttttgctcccaccacttcaacaagctccatctcaaggacgggaggtccattgtggttccctgccgccaccaagatgaactgattggccgggcccgcgtctctccaggggcgggatggagcatccgggagactgcttgcctagcaaaatcatatgcgcaaatgtggcagctcctttatttccacagaagggacctccgactgatggccaatgccatttgttcatctgtgccagttgactgggttccaactgggagaactacctggtcaatccatggaaagggagaatggatgaccactgaagacatgcttgtggtgtggaacagagtgtggattgaggagaacgaccacatggaagacaagaccccagttacgaaatggacagacattccctatttgggaaaaagggaagacttgtggtgtggatctctcatagggcacagaccgcgcaccacctgggctgagaacattaaaaacacagtcaacatggtgcgcaggatcataggtgatgaagaaaagtacatggactacctatccacccaagttcgctacttgggtgaagaagggtctacacctggagtgctgtaagcaccaatcttaatgttgtcaggc diff --git a/tests/functional/parse/zika.fasta.gz b/tests/functional/parse/zika.fasta.gz new file mode 100644 index 0000000000000000000000000000000000000000..4478e81df193960315700b78beb897a09a3b41dd GIT binary patch literal 10908 zcmZ{KRahMDmM-oz5JGSb5Zv9}-CcsyxVyUtcL?s@Snv)OJU~NZf#B|vV8g#>_L(zB zZt7}1&-zx?TdQ6v@^}=K(8Ci`7p)wzfP|3&Z^8pwGC2f$@5$ z`qx^tDtVy){$*Bjk>jb*@`mrl$>@}gsM9a#%JQ1;^EtHf?ys|`1+{(tU7*$ZABu1%wZ@mVVy`-9Cz``i9l=HjmUKT}i$lmAkK%~B8$8)VDPF#-PAX5>?+y9k|zN4G<-3!d)zDH@n96-f^klWBpC4h zqIGNCq{u*bny7mbE0dfMj2+Oq<%iptc01+f;F-8p7)?*m_wyMeoNX-t@@tFi*weFh z;vO?=NDSx$PR1?L<`wGUKC0}j&IRv^^=^pvm}M7L;l*}G?^d}~PWY1Qz`+yrRr%J> zEqO+T0-Q~Qt}aTqtibqSTi;tY75%`yi|)EuLP#x35`CkmQtj6T&!gvTYei$%hjyJ> zr}nd~@LEGp)gXiCk_z`u=0@P6kFZA?xHwwz_h^(lR)4kOz^Vpt7=(i0@Z23WOju7r zRKma*$G{XNxUl0NNHD}gYO4DLxoArDe0`|e!i^dmuhY9nLp%wKqrm{+6R_|)q#2NE z5-eabCLK4}qa;m%6E!FujF?VJxAF{b>bmM;>OIX;EOLN@$I-XSj`81xWVK*6K@XS} zZ9zXUKSxO~8ett1NOl6qb?>7N2{%mcO~?(rc_N}ZxI)<%h6ys4F{U5IFl_xXS@+{x z!$`k%@1cjXHJrOd8icIWSKN1}%DD+gb6=aQeGS1H?xu9PM5hk(Q#^%dB8}S4NR5ib za4_BPYY6dVBFik6nUt%hAcq58tzLYl?)ri5nP$?+Yn^8BiwJ0}i(4=Tv&=dYJEB14 zu00EotRdNp^Ri_BPSNrqvwbg#(Uh3EDLuPzvkvmYyK_TXk|?V4&czAVAw`KrWgYcS zJ5d;zXI^cu`I*az$zjXS(>TsGoO-ObG=1|o*%<3GxgN6=g$UjtnbJ2a;!9AKRLNggC1i3Tnx01e1N{ zzD;)8Gznk8C(|ozJxuBhZc5@+^i1}iOSiP-xbjW5TLx8CTgu)GKp+K3fEj%$FQsm% zQy=?q%=6Z6Y+YLhw~JeUf`n0h~q`2ZEXqG@O-j9wAqpX7esKpaUGO>D# z=##R#cB9OX8fxQ-BaEatmkc&1jt-vFn4VOCI(z=~Y1&V5Ba>)rxeo_FeL!S$3T8AO z2zih^{bD3-JR&^IpA5=q?mbr*RI(#BpgNS#L+B85S$nxtU+M{iz_Bt7GK_>=b6(m3 zG41*ti<%E>Vw9lpxCOK__6f={y>v;28VH^Jha3>ylNxi=e4-!&B^RAom|=?q4PKgO znxy!kF#Wdp_f2NhOEl@Ig3Ogxdhtpn{dooXMEl6e5W$2Uh(`?mj_0;912E@(4pPWB zS${Qufv?nIt-NSGTpyMOvTJD7$|YQPcym)+qrIsa!KAZIeKVf6J9Bs ziBba*wb?ZqKe|Y?h7!t+jaT#(kdFuGL{hlouJNf1Ptc?3+--`{TT(28_1*crG;g*z zl^>}=J{b_8sKf^QIrZOSG-e&q4a_BjJS<}vcOT{tmgu4@u7eB^HI(tPkP2+pT0GZ? z7IS0!JV}3!UjI|RN&(KpFxx*EQ+ia@Seg-uu?dOwM@%3R4XPqt8EG-KD!~b|%J;w# zhXf>af2{7tiiI4A1g`#6*+<4!He2B%$8p84R1^_ke_ec3ps~cXaY{G%&Gk953144O z(F3U!b3TUgoEvXn&2i&~d>AMYo?|!I8R0m`*sZr#t!?O9ZXJ`J%JMwJLYx=VXsPr_ zyu#LL!#BT(B{7|8W>1gaiX=**W}#ipAdz3%fJDDS_SsG1mzss-rx5OF+D!`K)e$r+ zx-uC{jA?2=n6omo{-U*e`gOEn8SeeK#iJ8tdA!HORW^e?8)5ektUJ7EVN#>;yMqOK zh>Z)1vs&^ya;3sy{B>;Ma4|vbM+Bsirt*?F!o9EcrDNq5&hVC1YHo778}|*akJ93fUqf8jG@Ky64nyL3#n0H zl9(AH={#m5%f@bvLD z264q@Sq}dq8Inp)D{K1`{<5z=;1DDG)bk-19htn~zb1Bq0K~Jm>w~DpKy?E6+uY#2Q^#QSoQ^LC^ z%T7(k)=zmm{GNUqBQ|5+@w} zq}*7kw;Y#dkcEl<&R05u$BS3&Aett{cl8`y#<%Vj-We; zp0e~nEiQrZsA;Q{@Yz|(z}e}qxg0Ae0z48TF+47u)tm(YHbdIz!)dB*74do`>@a|Z zjJC`SX?m8z@R-quZUcHBdx{%D?7uGGN=f5s;ZhQj;UML)8p^m1Ry#<%1@uF(5(;y! z)S@!}8Qx|w0o7IC!xHyFZ3rc#cXND_t6p3mw#WEsdXssEQ@g$eaCp zHq_re^^ao-D&{Va5w9#WGAFH1r@Utzfr>N(v2D7&LR|3=)hb<1D~!9@ldrpy(r5qY$BQUcTt1j!7>+-=xzEDYWg2I1HVaS95Z9FpG5`)Dm7t+hwC zdVKZ*-jaI3vc%F{cyX<*QGi|FmqimnEAYc*{Sq+chtypagd(}AXl?ZnC*Zlo5PTB` zCJ~HLNvt5$Z6aB4;odluB9UidZRi*X3RPv=LrukA>3i(9Y}!UU*)H8DN<+()V+Ug@ zT=M!71*@Ufgzl^g^>;W}ACl#`Z~SizbGPHDci}92lm?s3YBdaJ&0=V-vnw&`ZJF)U z%(?Rp=r6CVt@w3G4Xb%Rl8{Gy@}u{l|4NND&X6Sw7jhd{OY5!J$5e)?6B(%D{@rJd9w!`7AZsRL3b!KMg?6M=V2JxgQZ_?biRVlXHF|YeuhuZ2c~qy1 zzDV34xw@;1fKEu(Uf5~P7Z3rSY5x1$%f&=}f_wpEj8ZK@NFAdLp8}uqocMzq)MNw0 z0tg~!$Qf<82s}zE9u_Lkr-@JNiVjk9q*|!H;lYH3oi?zU(y@GGA|LZ4iCNsl;KHHZ z&KcEMUTM(GW4GqV$dZ%Hi_hWOl3amdzVLC(542fzib5#Ez;Dq3wi38=x-jzWoNC2t zMEs2wY(vs=7pH~s|Ka&02F~2Ykd8i;rwOz+d%%o|~hA;(j&@ z-$dyw9PpIf|Lf1+^IyWN%gf8%{sjnN;L`skIs*PA5qGintOi!3iUWELY z@SQ0^sx~jjT-Vfcj*kymA3^uduS;0LpAu=Wk{tS$EL0j zuFg?p;SOF%pF;@^HT#=K$A(V;_ZlaX5#$v!p?Uvk3p;6^9@;kv%i1V;lK z2i@uA+=Zk$`oo0!79AT0K?|TBz5t_b?J{dXO0#s>K#&+q1mlJI2t?4ruxodZt)1;l zm9j8sZx*4TdDqcai>#nIaeCRhhQs5ZlK5IGTq{K)r#V8yJDl_mmm(`oJ9lXthw=(r zWhzZej}b3n0>hPQwa$WOW$8plCk8QSkO`N?aY1uN(r#D_{M~y{q*{854(fnYDU96T zLZ>0kvR~m*S!eZ}?FtQB1WzD6S=HAZb}y&5jLK zV}?l}jVHH0gTu!i zME(>VimTl3r}e9tP6LAc+1HDLc#5o50xf2%8ceJZa<9=4L9qd|IkiSqblGDJTGu7 zUr*$rJ8<{>aVtcpo2=3`#=$>0~q#Q6eUvAj?dL?O?h|)2dgL?S?l$ zz=>lDyPL~cYNTv}l)Pn7OTYo^ORt6S%C?(3nW&)%pwMxVN z-tPSh-d-Xv{nD*(D%1V$ZMjjaW&_HMN>|e5Fqz?Qg=WLQRw}+kmqDe`m*e!AIo(LE zK9YjD=m=p?N4h0mvh8ly*O_G3_3pLNPN&<^P6ZQFivjK#QWww&zZj?I03I5yGccOh z2tD&j6uv7A+;r>`jb}U+};K}Fkt*dQ)DCMKO?!Mg!{;y0#n6O0**g71UD5biAmLFe;$6~Dd1kZkm@;bTNeV9K{gTvbiQ4EB@8ZQ z-vkY8^Y@cLZJ4ExxPg8H+)HowW3XsJ4B1E*wv*#ui;L8S`@a|0upsPGdi_}UEUfPL z?Ej?IA)034H-{m$MJO(j8~&`D3af-@9qSI<*BAUS1Sl5L3V}w#G|a*3?g;F<-)K`NvdY@rP@1b6 z6Ayr}bw5|bult=(_meq0u003+u<3qr_jkV^ybBL?>-F}xxV{_d)zoZw4dlBvRlOn5 z4SYxg_vSyh^FH^F&yW66Hc`1A{{3esYxren+~H+Pw69?K>CaAHZ!b8jwM!XL@#@c4 z@OXisD}3Ym@JwZ@bpNS;Y1@>&IDGv2v(3GUauC$CC;wr?!35?cwAUzTdGOeo`IG8B ze%HnL9%LuYzbL5v>3(DH&AnaszlB*|w-AB3dzrp_UOz?dFyrt<@me@2yQ$)9AVdHP z9T7aL6IrA)Z&2#}IbR|Pn*t&X(n4^#8zq~{IJmrx{s0Df;-}J`>+~~8!!QMO9eO1C zm`NO6ImSK)X`t}nv8s?N{ej7nTql*To(p5YCE)vEE-P`(Z`cstMj~zBQhf~=^g_uL zd3O}oh!BQZO3G$>g<@P(trw)on-r5!NHfrlJ_8%wE2i6z8w zi6_f?qwQUNT@X=iL78YXPHjQ~ zgk#dMVeK!mEi{-N2iQ!HC_fB4XIkFGVTojuhGrY>7kUl>gl}ZnO#P8Vr`Qe*_C~In zus`lvC7TXD3nHM;5iNi@+IqRy<|y^7Sm#0fQAQ zh$8`~dxluAw;iK~k)U@r&vrnY$H8#_@A@51tijC})l)Wixnjpp(6@&haI)Gl3wh-i z!da?O54eEV=iNJt`rgcJLF?~D7CH5i_io=D^aVf#a@uIT?-}b|oKs-8=yTwo)Lj~+ zgIm^Up5*;_UyO&N^VTlaSNE%W%zjYsWpg8#F*yH}HTb(?-+o+?GFgB2)Ts|vfHcIW zIhDgwA+*;+@>fWJ} zBzIJ81+BZ?FuKqkyOx4$;>dnKBfv|X!yaj{r%ss;g>pfo_meifnaxxml$9XZQ?tyL z*@Ja(yKKPx-yq}MY-DRpBp$s+cmTHSc|6^wUh1Zeha#}G$GH+capw@1XVz^u>b?us zPV6IkU{s}3Q`Dz=E_+}c@sfGUuv-#*IU)@X{PeN{fN6>lf(s`;ppzsGwwl;G;g}&0 zmgb*G>NjZpm=xbX#9P{fe*D15`+ph-KYX_vgIFFIPmOFjbA6V3YEDUm6MHTn7&)UL zuL|3ZM{|NNKLo=vue~lLyvGG!;@l*@22(w8FLKpmz1Yi6(;o)5(f3(CaXVAL92qdM z{7(FC8mJWeHN1>Z_9p^JbIdSt)UP1t(>PCYDF*c72oUotC#{7p+HOGRI?KVBX@U&FrypxcJTHI93|04JDdbL8zU>0g?Gv@B- z#c#J4qm6yN*WjfYo?peD-n{K+P7W!M^}b%vH3V!ETr^$zDw2QJ`&ipSvCew;fz_li zVSaaC)C2=7V(M^*)w6v3-sk(tjhUEjzpsc{3)tP_SijY0=!6$JUU*^`-TFFKxleda zl%P=d5drHf>BUMv%j>iCOZ?O|H|t+p5mS=sz!uIwmhSg&`O$o0V*TmVYTma`{tGAT z1&IYyn690B^g#&bx-{_RC)O^V9zioBTh?_cU{I8W*veu0PuJE8>S4Ea{xwK8(uz(S zj6ee)Z1e;V)%5o*Ym=PO*`M*4nP&4wt)>=-CC_$@n=bTvKP)8z3Uk5Ab)SBm<+^mz zzZWayP6QM()4=z;ce0xpEAvYQ3O-1<1BGxi&qG0J1s^VSTunYW*zA7(`-LSgltL8q z@w=}UOoj>;dLa|B=*gXe;s@OXaQ$#}^^H1!7s}*%CG!*-h<+7giV_X9ceju8;az6J z#FHTMnj`1U0#9|40Pu}lf0+T6%DfVs6!u**K#&A5&BW2CEtlqon3rPk05rzJy)!m% zbHtUwL`A)+rK1%QC`=AZ*#Q<;Om!&Mzy(^^WR+0iFi6O$#3o8I~x$8Y5E z#AE{DH$nldXssAsssMPc7+Y980Qkh;4dAO$q(}$MLE?Yw^1)N?TP}YX{{4a+4{rVR ziW)j4ar*bhzJB62B7I~CH?{&~@ho|5O=GDT)vwDEnwYOrWJgIbDK>m2ME5yF!-|^Q{WR!=CMVhjP$3c|0Xj`X}1xR7$L?aO?pX7SgTq)herH zbgx-veE+SMQe^YC2@iM1Q!1s{&K1;$Ql-M4ZJF{W zso#3ip_RLoW>6H9>Ar(iruMKQf1i~YlDbhQtApA9pE@g;-Tq8nNJZX;iJh+$HiZd!BW-jukllBR*XXmc8Tb&wk>Gvc|brH0X zUxe5$@Fk405UXuEY7;7gD^&i?B!Ax`}Shb zPC(#3J#OQNh3wH#7MBpJ%Qw=%`9WcC=0q9A#145bVlZ)I5Di!n+MscD=#|o9_rcnV zw>)rYHhMK~y*RTcCkCuV$@}REl{LE zO>uO%zQv*(3=75GhLrC7KYY01NQ&ZMOS*8)$=BAtr(d z=}=U-=p212r4i2L@+OdRC6C2qcL2~39Up4yLRznnEdkO`|Cw`)&IE2T>>?$$bX!U@(HEX3@xf3tQ^*@D)Uqu$Kqs9B1DtV!F&F*}je*)MMpFrmB@@fsMW& zZuR$eh=aXtLIjC_j@-wB_k29R(Sb?Ql9~l~VlB(&;L`v{qWdUIt7s1HS)s8&ChDqG z<+~KA%I_S65)22~kvrqJ|%RF;zg6f%ettSSnKGxf=j`mFUT%G^xs zm^jNUo*B$?uGpKr)@C;VCI>9Oqa5lgbT`gOnZ7sU`Fd#QdbIO!L<3&xU4{^%2s15$ z>GUvUQj5#4)qaW1O7;7m{v0-S2or0s+Eic4aa#>*`vIzZlh?x>hb(g6sPDL`U^v~O zCPUFL1PHL0Gi50UdV9lf4Ih>N@mAjso(4KX;XI^oR$8&a=9`>+XUD8L>*s8%5^D30}Kv3%ZJ# z=FZcRxx3D87I75IHC9v>`h%iBzxJ`E@*X*1YA4@{}yk2B#xsJUzK9-T_hjyLzLLh?%0 zYd@?S?aRG^VsHwC%saYoM`XG-sQlOEAYH9`8f(o#r*Q~uw&`-4lGMDtn5 zez>?sI5_f=`A`ZHAB~qnYTVP&$WI)*$}l8`tSH)33lx!9*I}KRB1&$ceHp=%k_)J&|wQFszni1=|{mi zYUN!s*{f*%k+t|iTk=b@%NQ-me%bzV{svxxwe<8IKNIXuXy->75-0p?fRn&;F~I2s zbO&O>#P`tGgCj^kE{ZO zYQG0_#hU-YqXr<0BVS}gOiMoID|(Q!05@D8ZTd$SnfSiF(D6vTM0EXEpo_CbK+1ut zXzwk>Ts8U1_3%s4OJJTH==oJR|K(9iu14hX82TcvLdI(H($4!YYCAlBKfjB(FCv`% zW@)|${k2f9{YAob$5c7acy5icX5BLiy>Xs3ZhVRc+#0uCK zf#w8oVAsv>uAe#m+UW7-i1GybAD^v1cGT2b7^B6CS!40ZYm*Bc`mM_?IZR8=o}HI5 zww5xU2pk@zyJiEuq?iC)1gG684U@zL9^74r`B<_`i}Jer)~^(YNA`@R1@+$;b?Z3~ zSIE`uE$iBy0a;@vHOg0K#WPQ*QS`7jR^TQRv_gVO=baGHAQ~G{M1fBO^8tG9wAC?z*~@eVVlVISGNZxv*dat*2;Dhl~XUh z4QfMWjBB2mo^p09*WK>^t<%1Y@fOS57W06*tQO-_42ea1%`ZPzrE=Zppg5hO>8P6Py%C`v?$sxtwMTB0vE1Zt?T(J~$D&>GrVtN)lPopO5)PNC#_Ota=`C_hBtt77@4d+CZiv1=L zXDfq=3$YqozPpm!b`3}CgVqnCPQbxFw%|jgFOXPC-*RE2qG3~O@Z|-0J0rb;O)}b4^ z6|y2AC$mDrb3lCL);v|O{}s*8<>;SJGEE6E(Pc6%-kGU-ShsVwwoo^@htmT{I~#d9 z8Jp=ksySOfSz1$8X{nsriq?#T_En41{?f9}j7lApJD7AZLf}LX3K^$7Ffrn`hUiRA z^8733WDw+>_-^N02H75YhRcD9Wm|FKI%DSOXUaJ^ROAp3PX2%i9b^BmMDW|~t2Z!a K;oub(=05;i&O- Date: Tue, 9 Mar 2021 23:02:38 -0800 Subject: [PATCH 4/8] Update filter to use new IO interface --- augur/filter.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/augur/filter.py b/augur/filter.py index 6e08f453e..6b3723d32 100644 --- a/augur/filter.py +++ b/augur/filter.py @@ -14,6 +14,7 @@ import treetime.utils from .index import index_sequences +from .io import open_file, read_sequences, write_sequences from .utils import read_metadata, read_strains, get_numerical_dates, run_shell_command, shquote, is_date_ambiguous comment_char = '#' @@ -545,19 +546,19 @@ def run(args): dropped_samps = list(available_strains - seq_keep) write_vcf(args.sequences, args.output, dropped_samps) elif args.sequences and args.output: - sequences = SeqIO.parse(args.sequences, "fasta") + sequences = read_sequences(args.sequences) # Stream to disk all sequences that passed all filters to avoid reading # sequences into memory first. Track the observed strain names in the # sequence file as part of the single pass to allow comparison with the # provided sequence index. observed_sequence_strains = set() - with open(args.output, "w") as output_handle: + with open_file(args.output, "wt") as output_handle: for sequence in sequences: observed_sequence_strains.add(sequence.id) if sequence.id in seq_keep: - SeqIO.write(sequence, output_handle, 'fasta') + write_sequences(sequence, output_handle, 'fasta') if sequence_strains != observed_sequence_strains: # Warn the user if the expected strains from the sequence index are From f6c61f194e7c2ba8008d255c55a218871a27d160 Mon Sep 17 00:00:00 2001 From: John Huddleston Date: Mon, 15 Mar 2021 16:54:49 -0700 Subject: [PATCH 5/8] Add docstring for mask sequence function --- augur/mask.py | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/augur/mask.py b/augur/mask.py index ff2cd22e8..f45c5ba52 100644 --- a/augur/mask.py +++ b/augur/mask.py @@ -30,8 +30,8 @@ def mask_vcf(mask_sites, in_file, out_file, cleanup=True): This function relies on 'vcftools --exclude-positions' to mask the requested sites. - Parameters: - ----------- + Parameters + ---------- mask_sites: list[int] A list of site indexes to exclude from the vcf. in_file: str @@ -76,6 +76,28 @@ def mask_vcf(mask_sites, in_file, out_file, cleanup=True): def mask_sequence(sequence, mask_sites, mask_from_beginning, mask_from_end, mask_invalid): + """Mask characters at the given sites in a single sequence record, modifying the + record in place. + + Parameters + ---------- + sequence : Bio.SeqIO.SeqRecord + A sequence to be masked + mask_sites: list[int] + A list of site indexes to exclude from the FASTA. + mask_from_beginning: int + Number of sites to mask from the beginning of each sequence (default 0) + mask_from_end: int + Number of sites to mask from the end of each sequence (default 0) + mask_invalid: bool + Mask invalid nucleotides (default False) + + Returns + ------- + Bio.SeqIO.SeqRecord + Masked sequence in its original record object + + """ # Convert to a mutable sequence to enable masking with Ns. sequence_length = len(sequence.seq) beginning, end = mask_from_beginning, mask_from_end @@ -105,8 +127,8 @@ def mask_fasta(mask_sites, in_file, out_file, mask_from_beginning=0, mask_from_e Masked sites are overwritten as "N"s. - Parameters: - ----------- + Parameters + ---------- mask_sites: list[int] A list of site indexes to exclude from the FASTA. in_file: str From 46b8a65bed1b5545fa2988ff79794c9890cd000c Mon Sep 17 00:00:00 2001 From: John Huddleston Date: Tue, 16 Mar 2021 14:13:06 -0700 Subject: [PATCH 6/8] Add Zika build test for compressed inputs/outputs Documents which steps of a standard build support compressed inputs/outputs by adding a copy of the Zika build test and corresponding expected compressed inputs/outputs. --- tests/builds/zika/data/zika.fasta.gz | Bin 0 -> 10908 bytes tests/builds/zika/results/filtered.fasta.gz | Bin 0 -> 18641 bytes tests/builds/zika/results/metadata.tsv.gz | Bin 0 -> 1180 bytes .../builds/zika/results/sequence_index.tsv.gz | Bin 0 -> 409 bytes tests/builds/zika/results/sequences.fasta.gz | Bin 0 -> 22414 bytes tests/builds/zika_compressed.t | 193 ++++++++++++++++++ 6 files changed, 193 insertions(+) create mode 100644 tests/builds/zika/data/zika.fasta.gz create mode 100644 tests/builds/zika/results/filtered.fasta.gz create mode 100644 tests/builds/zika/results/metadata.tsv.gz create mode 100644 tests/builds/zika/results/sequence_index.tsv.gz create mode 100644 tests/builds/zika/results/sequences.fasta.gz create mode 100644 tests/builds/zika_compressed.t diff --git a/tests/builds/zika/data/zika.fasta.gz b/tests/builds/zika/data/zika.fasta.gz new file mode 100644 index 0000000000000000000000000000000000000000..8068f895f82500ef1e3d11dd439031add506bdf2 GIT binary patch literal 10908 zcmZ{KRahMDmM-oz5JGSb5Zv9}-CcsyxVyUtcL?s@Snv)OJU~NZf#B|vV8g#>_L(zB zZt7}1&-zx?TdQ6v@^}=KdP`L!7p)wzfP|3&Z^8pwGC2f$@5$ z`qx^tDtVy){$*Bjk>jb*@`mrl$>@}gsM9a#%JQ1;^EtHf?ys|`1+{(tU7*$ZABu1%wZ@mVVy`-9Cz``i9l=HjmUKT}i$lmAkK%~B8$8)VDPF#-PAX5>?+y9k|zN4G<-3!d)zDH@n96-f^klWBpC4h zqIGNCq{u*bny7mbE0dfMj2+Oq<%iptc01+f;F-8p7)?*m_wyMeoNX-t@@tFi*weFh z;vO?=NDSx$PR1?L<`wGUKC0}j&IRv^^=^pvm}M7L;l*}G?^d}~PWY1Qz`+yrRr%J> zEqO+T0-Q~Qt}aTqtibqSTi;tY75%`yi|)EuLP#x35`CkmQtj6T&!gvTYei$%hjyJ> zr}nd~@LEGp)gXiCk_z`u=0@P6kFZA?xHwwz_h^(lR)4kOz^Vpt7=(i0@Z23WOju7r zRKma*$G{XNxUl0NNHD}gYO4DLxoArDe0`|e!i^dmuhY9nLp%wKqrm{+6R_|)q#2NE z5-eabCLK4}qa;m%6E!FujF?VJxAF{b>bmM;>OIX;EOLN@$I-XSj`81xWVK*6K@XS} zZ9zXUKSxO~8ett1NOl6qb?>7N2{%mcO~?(rc_N}ZxI)<%h6ys4F{U5IFl_xXS@+{x z!$`k%@1cjXHJrOd8icIWSKN1}%DD+gb6=aQeGS1H?xu9PM5hk(Q#^%dB8}S4NR5ib za4_BPYY6dVBFik6nUt%hAcq58tzLYl?)ri5nP$?+Yn^8BiwJ0}i(4=Tv&=dYJEB14 zu00EotRdNp^Ri_BPSNrqvwbg#(Uh3EDLuPzvkvmYyK_TXk|?V4&czAVAw`KrWgYcS zJ5d;zXI^cu`I*az$zjXS(>TsGoO-ObG=1|o*%<3GxgN6=g$UjtnbJ2a;!9AKRLNggC1i3Tnx01e1N{ zzD;)8Gznk8C(|ozJxuBhZc5@+^i1}iOSiP-xbjW5TLx8CTgu)GKp+K3fEj%$FQsm% zQy=?q%=6Z6Y+YLhw~JeUf`n0h~q`2ZEXqG@O-j9wAqpX7esKpaUGO>D# z=##R#cB9OX8fxQ-BaEatmkc&1jt-vFn4VOCI(z=~Y1&V5Ba>)rxeo_FeL!S$3T8AO z2zih^{bD3-JR&^IpA5=q?mbr*RI(#BpgNS#L+B85S$nxtU+M{iz_Bt7GK_>=b6(m3 zG41*ti<%E>Vw9lpxCOK__6f={y>v;28VH^Jha3>ylNxi=e4-!&B^RAom|=?q4PKgO znxy!kF#Wdp_f2NhOEl@Ig3Ogxdhtpn{dooXMEl6e5W$2Uh(`?mj_0;912E@(4pPWB zS${Qufv?nIt-NSGTpyMOvTJD7$|YQPcym)+qrIsa!KAZIeKVf6J9Bs ziBba*wb?ZqKe|Y?h7!t+jaT#(kdFuGL{hlouJNf1Ptc?3+--`{TT(28_1*crG;g*z zl^>}=J{b_8sKf^QIrZOSG-e&q4a_BjJS<}vcOT{tmgu4@u7eB^HI(tPkP2+pT0GZ? z7IS0!JV}3!UjI|RN&(KpFxx*EQ+ia@Seg-uu?dOwM@%3R4XPqt8EG-KD!~b|%J;w# zhXf>af2{7tiiI4A1g`#6*+<4!He2B%$8p84R1^_ke_ec3ps~cXaY{G%&Gk953144O z(F3U!b3TUgoEvXn&2i&~d>AMYo?|!I8R0m`*sZr#t!?O9ZXJ`J%JMwJLYx=VXsPr_ zyu#LL!#BT(B{7|8W>1gaiX=**W}#ipAdz3%fJDDS_SsG1mzss-rx5OF+D!`K)e$r+ zx-uC{jA?2=n6omo{-U*e`gOEn8SeeK#iJ8tdA!HORW^e?8)5ektUJ7EVN#>;yMqOK zh>Z)1vs&^ya;3sy{B>;Ma4|vbM+Bsirt*?F!o9EcrDNq5&hVC1YHo778}|*akJ93fUqf8jG@Ky64nyL3#n0H zl9(AH={#m5%f@bvLD z264q@Sq}dq8Inp)D{K1`{<5z=;1DDG)bk-19htn~zb1Bq0K~Jm>w~DpKy?E6+uY#2Q^#QSoQ^LC^ z%T7(k)=zmm{GNUqBQ|5+@w} zq}*7kw;Y#dkcEl<&R05u$BS3&Aett{cl8`y#<%Vj-We; zp0e~nEiQrZsA;Q{@Yz|(z}e}qxg0Ae0z48TF+47u)tm(YHbdIz!)dB*74do`>@a|Z zjJC`SX?m8z@R-quZUcHBdx{%D?7uGGN=f5s;ZhQj;UML)8p^m1Ry#<%1@uF(5(;y! z)S@!}8Qx|w0o7IC!xHyFZ3rc#cXND_t6p3mw#WEsdXssEQ@g$eaCp zHq_re^^ao-D&{Va5w9#WGAFH1r@Utzfr>N(v2D7&LR|3=)hb<1D~!9@ldrpy(r5qY$BQUcTt1j!7>+-=xzEDYWg2I1HVaS95Z9FpG5`)Dm7t+hwC zdVKZ*-jaI3vc%F{cyX<*QGi|FmqimnEAYc*{Sq+chtypagd(}AXl?ZnC*Zlo5PTB` zCJ~HLNvt5$Z6aB4;odluB9UidZRi*X3RPv=LrukA>3i(9Y}!UU*)H8DN<+()V+Ug@ zT=M!71*@Ufgzl^g^>;W}ACl#`Z~SizbGPHDci}92lm?s3YBdaJ&0=V-vnw&`ZJF)U z%(?Rp=r6CVt@w3G4Xb%Rl8{Gy@}u{l|4NND&X6Sw7jhd{OY5!J$5e)?6B(%D{@rJd9w!`7AZsRL3b!KMg?6M=V2JxgQZ_?biRVlXHF|YeuhuZ2c~qy1 zzDV34xw@;1fKEu(Uf5~P7Z3rSY5x1$%f&=}f_wpEj8ZK@NFAdLp8}uqocMzq)MNw0 z0tg~!$Qf<82s}zE9u_Lkr-@JNiVjk9q*|!H;lYH3oi?zU(y@GGA|LZ4iCNsl;KHHZ z&KcEMUTM(GW4GqV$dZ%Hi_hWOl3amdzVLC(542fzib5#Ez;Dq3wi38=x-jzWoNC2t zMEs2wY(vs=7pH~s|Ka&02F~2Ykd8i;rwOz+d%%o|~hA;(j&@ z-$dyw9PpIf|Lf1+^IyWN%gf8%{sjnN;L`skIs*PA5qGintOi!3iUWELY z@SQ0^sx~jjT-Vfcj*kymA3^uduS;0LpAu=Wk{tS$EL0j zuFg?p;SOF%pF;@^HT#=K$A(V;_ZlaX5#$v!p?Uvk3p;6^9@;kv%i1V;lK z2i@uA+=Zk$`oo0!79AT0K?|TBz5t_b?J{dXO0#s>K#&+q1mlJI2t?4ruxodZt)1;l zm9j8sZx*4TdDqcai>#nIaeCRhhQs5ZlK5IGTq{K)r#V8yJDl_mmm(`oJ9lXthw=(r zWhzZej}b3n0>hPQwa$WOW$8plCk8QSkO`N?aY1uN(r#D_{M~y{q*{854(fnYDU96T zLZ>0kvR~m*S!eZ}?FtQB1WzD6S=HAZb}y&5jLK zV}?l}jVHH0gTu!i zME(>VimTl3r}e9tP6LAc+1HDLc#5o50xf2%8ceJZa<9=4L9qd|IkiSqblGDJTGu7 zUr*$rJ8<{>aVtcpo2=3`#=$>0~q#Q6eUvAj?dL?O?h|)2dgL?S?l$ zz=>lDyPL~cYNTv}l)Pn7OTYo^ORt6S%C?(3nW&)%pwMxVN z-tPSh-d-Xv{nD*(D%1V$ZMjjaW&_HMN>|e5Fqz?Qg=WLQRw}+kmqDe`m*e!AIo(LE zK9YjD=m=p?N4h0mvh8ly*O_G3_3pLNPN&<^P6ZQFivjK#QWww&zZj?I03I5yGccOh z2tD&j6uv7A+;r>`jb}U+};K}Fkt*dQ)DCMKO?!Mg!{;y0#n6O0**g71UD5biAmLFe;$6~Dd1kZkm@;bTNeV9K{gTvbiQ4EB@8ZQ z-vkY8^Y@cLZJ4ExxPg8H+)HowW3XsJ4B1E*wv*#ui;L8S`@a|0upsPGdi_}UEUfPL z?Ej?IA)034H-{m$MJO(j8~&`D3af-@9qSI<*BAUS1Sl5L3V}w#G|a*3?g;F<-)K`NvdY@rP@1b6 z6Ayr}bw5|bult=(_meq0u003+u<3qr_jkV^ybBL?>-F}xxV{_d)zoZw4dlBvRlOn5 z4SYxg_vSyh^FH^F&yW66Hc`1A{{3esYxren+~H+Pw69?K>CaAHZ!b8jwM!XL@#@c4 z@OXisD}3Ym@JwZ@bpNS;Y1@>&IDGv2v(3GUauC$CC;wr?!35?cwAUzTdGOeo`IG8B ze%HnL9%LuYzbL5v>3(DH&AnaszlB*|w-AB3dzrp_UOz?dFyrt<@me@2yQ$)9AVdHP z9T7aL6IrA)Z&2#}IbR|Pn*t&X(n4^#8zq~{IJmrx{s0Df;-}J`>+~~8!!QMO9eO1C zm`NO6ImSK)X`t}nv8s?N{ej7nTql*To(p5YCE)vEE-P`(Z`cstMj~zBQhf~=^g_uL zd3O}oh!BQZO3G$>g<@P(trw)on-r5!NHfrlJ_8%wE2i6z8w zi6_f?qwQUNT@X=iL78YXPHjQ~ zgk#dMVeK!mEi{-N2iQ!HC_fB4XIkFGVTojuhGrY>7kUl>gl}ZnO#P8Vr`Qe*_C~In zus`lvC7TXD3nHM;5iNi@+IqRy<|y^7Sm#0fQAQ zh$8`~dxluAw;iK~k)U@r&vrnY$H8#_@A@51tijC})l)Wixnjpp(6@&haI)Gl3wh-i z!da?O54eEV=iNJt`rgcJLF?~D7CH5i_io=D^aVf#a@uIT?-}b|oKs-8=yTwo)Lj~+ zgIm^Up5*;_UyO&N^VTlaSNE%W%zjYsWpg8#F*yH}HTb(?-+o+?GFgB2)Ts|vfHcIW zIhDgwA+*;+@>fWJ} zBzIJ81+BZ?FuKqkyOx4$;>dnKBfv|X!yaj{r%ss;g>pfo_meifnaxxml$9XZQ?tyL z*@Ja(yKKPx-yq}MY-DRpBp$s+cmTHSc|6^wUh1Zeha#}G$GH+capw@1XVz^u>b?us zPV6IkU{s}3Q`Dz=E_+}c@sfGUuv-#*IU)@X{PeN{fN6>lf(s`;ppzsGwwl;G;g}&0 zmgb*G>NjZpm=xbX#9P{fe*D15`+ph-KYX_vgIFFIPmOFjbA6V3YEDUm6MHTn7&)UL zuL|3ZM{|NNKLo=vue~lLyvGG!;@l*@22(w8FLKpmz1Yi6(;o)5(f3(CaXVAL92qdM z{7(FC8mJWeHN1>Z_9p^JbIdSt)UP1t(>PCYDF*c72oUotC#{7p+HOGRI?KVBX@U&FrypxcJTHI93|04JDdbL8zU>0g?Gv@B- z#c#J4qm6yN*WjfYo?peD-n{K+P7W!M^}b%vH3V!ETr^$zDw2QJ`&ipSvCew;fz_li zVSaaC)C2=7V(M^*)w6v3-sk(tjhUEjzpsc{3)tP_SijY0=!6$JUU*^`-TFFKxleda zl%P=d5drHf>BUMv%j>iCOZ?O|H|t+p5mS=sz!uIwmhSg&`O$o0V*TmVYTma`{tGAT z1&IYyn690B^g#&bx-{_RC)O^V9zioBTh?_cU{I8W*veu0PuJE8>S4Ea{xwK8(uz(S zj6ee)Z1e;V)%5o*Ym=PO*`M*4nP&4wt)>=-CC_$@n=bTvKP)8z3Uk5Ab)SBm<+^mz zzZWayP6QM()4=z;ce0xpEAvYQ3O-1<1BGxi&qG0J1s^VSTunYW*zA7(`-LSgltL8q z@w=}UOoj>;dLa|B=*gXe;s@OXaQ$#}^^H1!7s}*%CG!*-h<+7giV_X9ceju8;az6J z#FHTMnj`1U0#9|40Pu}lf0+T6%DfVs6!u**K#&A5&BW2CEtlqon3rPk05rzJy)!m% zbHtUwL`A)+rK1%QC`=AZ*#Q<;Om!&Mzy(^^WR+0iFi6O$#3o8I~x$8Y5E z#AE{DH$nldXssAsssMPc7+Y980Qkh;4dAO$q(}$MLE?Yw^1)N?TP}YX{{4a+4{rVR ziW)j4ar*bhzJB62B7I~CH?{&~@ho|5O=GDT)vwDEnwYOrWJgIbDK>m2ME5yF!-|^Q{WR!=CMVhjP$3c|0Xj`X}1xR7$L?aO?pX7SgTq)herH zbgx-veE+SMQe^YC2@iM1Q!1s{&K1;$Ql-M4ZJF{W zso#3ip_RLoW>6H9>Ar(iruMKQf1i~YlDbhQtApA9pE@g;-Tq8nNJZX;iJh+$HiZd!BW-jukllBR*XXmc8Tb&wk>Gvc|brH0X zUxe5$@Fk405UXuEY7;7gD^&i?B!Ax`}Shb zPC(#3J#OQNh3wH#7MBpJ%Qw=%`9WcC=0q9A#145bVlZ)I5Di!n+MscD=#|o9_rcnV zw>)rYHhMK~y*RTcCkCuV$@}REl{LE zO>uO%zQv*(3=75GhLrC7KYY01NQ&ZMOS*8)$=BAtr(d z=}=U-=p212r4i2L@+OdRC6C2qcL2~39Up4yLRznnEdkO`|Cw`)&IE2T>>?$$bX!U@(HEX3@xf3tQ^*@D)Uqu$Kqs9B1DtV!F&F*}je*)MMpFrmB@@fsMW& zZuR$eh=aXtLIjC_j@-wB_k29R(Sb?Ql9~l~VlB(&;L`v{qWdUIt7s1HS)s8&ChDqG z<+~KA%I_S65)22~kvrqJ|%RF;zg6f%ettSSnKGxf=j`mFUT%G^xs zm^jNUo*B$?uGpKr)@C;VCI>9Oqa5lgbT`gOnZ7sU`Fd#QdbIO!L<3&xU4{^%2s15$ z>GUvUQj5#4)qaW1O7;7m{v0-S2or0s+Eic4aa#>*`vIzZlh?x>hb(g6sPDL`U^v~O zCPUFL1PHL0Gi50UdV9lf4Ih>N@mAjso(4KX;XI^oR$8&a=9`>+XUD8L>*s8%5^D30}Kv3%ZJ# z=FZcRxx3D87I75IHC9v>`h%iBzxJ`E@*X*1YA4@{}yk2B#xsJUzK9-T_hjyLzLLh?%0 zYd@?S?aRG^VsHwC%saYoM`XG-sQlOEAYH9`8f(o#r*Q~uw&`-4lGMDtn5 zez>?sI5_f=`A`ZHAB~qnYTVP&$WI)*$}l8`tSH)33lx!9*I}KRB1&$ceHp=%k_)J&|wQFszni1=|{mi zYUN!s*{f*%k+t|iTk=b@%NQ-me%bzV{svxxwe<8IKNIXuXy->75-0p?fRn&;F~I2s zbO&O>#P`tGgCj^kE{ZO zYQG0_#hU-YqXr<0BVS}gOiMoID|(Q!05@D8ZTd$SnfSiF(D6vTM0EXEpo_CbK+1ut zXzwk>Ts8U1_3%s4OJJTH==oJR|K(9iu14hX82TcvLdI(H($4!YYCAlBKfjB(FCv`% zW@)|${k2f9{YAob$5c7acy5icX5BLiy>Xs3ZhVRc+#0uCK zf#w8oVAsv>uAe#m+UW7-i1GybAD^v1cGT2b7^B6CS!40ZYm*Bc`mM_?IZR8=o}HI5 zww5xU2pk@zyJiEuq?iC)1gG684U@zL9^74r`B<_`i}Jer)~^(YNA`@R1@+$;b?Z3~ zSIE`uE$iBy0a;@vHOg0K#WPQ*QS`7jR^TQRv_gVO=baGHAQ~G{M1fBO^8tG9wAC?z*~@eVVlVISGNZxv*dat*2;Dhl~XUh z4QfMWjBB2mo^p09*WK>^t<%1Y@fOS57W06*tQO-_42ea1%`ZPzrE=Zppg5hO>8P6Py%C`v?$sxtwMTB0vE1Zt?T(J~$D&>GrVtN)lPopO5)PNC#_Ota=`C_hBtt77@4d+CZiv1=L zXDfq=3$YqozPpm!b`3}CgVqnCPQbxFw%|jgFOXPC-*RE2qG3~O@Z|-0J0rb;O)}b4^ z6|y2AC$mDrb3lCL);v|O{}s*8<>;SJGEE6E(Pc6%-kGU-ShsVwwoo^@htmT{I~#d9 z8Jp=ksySOfSz1$8X{nsriq?#T_En41{?f9}j7lApJD7AZLf}LX3K^$7Ffrn`hUiRA z^8733WDw+>_-^N02H75YhRcD9Wm|FKI%DSOXUaJ^ROAp3PX2%i9b^BmMDW|~t2Z!a K;oub(=05=KNJ0+) literal 0 HcmV?d00001 diff --git a/tests/builds/zika/results/filtered.fasta.gz b/tests/builds/zika/results/filtered.fasta.gz new file mode 100644 index 0000000000000000000000000000000000000000..8a1030f31855f5e2d8ade2fe729d16fb7cfa49ae GIT binary patch literal 18641 zcmYg%bzD?m^sRJvcgN7(ASpF~4Bg#GgM^@TccN)qLSmD!pX|ML+2uh7n$M(m?1)cLpxYqdAj2c>K_LKs_Z{E< z4fSls6RDnuI-H{A1Q_1d9TBP3PX6O|s*=Ch)-PH&y$iKVXxL}C7m%o7_5C6dAP7b2 zV*r0Vm^mdo5X`Fa^nDz$QG1;g@a}G4B!2SmzeDoYfAfb#mGU-OEdlS*Kd9OGn1ENV z3?31Q9Y=0`pF|j|o!ibc)0Z3$w>y6P@_FPGFPp2D>(w)HD|YheI6D;oy=})f?n~I< z3|GslLQ+a!?{)wAzF(+{>bMw*t#sy?CdN3l1{Cp6i0upNLwmZLiK(8c*WGaG#~cX@ zeHng#X_J;4{9QgFU7xwF^Y?X!7^Nz6$vgU3j|0YY;-mgJfz^XXygyp6O_!Fn=1hDU zwBl_!f97Fuc!S@oJ&Z)-p?PQJ9wi>{F0#V(#(eB+`=i!wAW<{m>&Cw|_hGNeqZI4p zet=k|_Wb@l{xtjEJJcZN%goZEgA6OaD4jQ}F50T&TrJthPTyUN?3;Wuxrs}$ZaFm> zXJaZ(%%Rk+E>#VpIZp;{78&F}Jc}0GNmq_n_%`2UIfr`k4_t&0+6TQb&n}HSsaVIr5lb7WIyJBsOSUG@h%HGK5{{zRpLxPvD`>fr1IWp&&K_^4sZ(TdLMSeYL zntr{j<18|vd#*l!pmI(@OmQwXqro6ftR5AL#%=PIF<%mlUzy_MNMw;LfHSWf-jcR>l3e>QSIg4g*G-#o#qw_vvNXn@$4T z&c(;#7TpjuD?YiID5=dNPd&20*G-j(o9g~S{`BaBe3Wa2M$?*c0E@SHqi#kq`?GIk}Ff2UgzGge5T;Gs9OK%2*icX4&NZZA-eQGEQbqu zgi5YMpQ`#sXdOKhyYpDvGL`JdgeGD1)F(Eo5&iVt=ttwObiH!kYZx9JAK}M&Afae; z@Qd_bUnz?RQRyX(H7(&5newQ;zuF=Vf2Fk;p{=nfvu{WvJ_RE@N-*HPGfuZGt1GlP zLwZ^CaER`&>jYi})h}qIil>Av)5J!|234PbH$mW*PE7h<@K&Z|cSTTSm-KsjKXnSG zvBo2v>~2<&R4>Jii{c$9;Lipmh%T8%bAypR+JK+h75i82`wE4{TZMMY`ULtG)@9#PDm;!kl&l8!J0mP}hoqnD zN8D;IK|zydBlkg!QV}xDEc0sJqZwpAGCA(2a`>1|Y}kdh>^ZPlG$NUkG_+Cbun4C04dXnhOFDkcT>i*Xr465K*{J- zDLX1~)R<#Rm55BiYQixZZatC!@$s!PHoU)y1O)M`)B(>j-y1q|# z5G`^r7{7&4!vTJf&=R4AEUkgYmjOY2|AH8j2x$#M%XaM-P8P84T_Tx#oq9UjokE}o z`Dw%xV)o5k`L|Ac&$iSXMnUWmvS#{uXS7P3VJ;6Y0$h-;URAo%uYUERy=8TD-7@~W zi??E?X9c9*nR8+epGyh}VHwSl!||-(ZnS23GlHbnRTOun*VJ!9)7~M*AeF|PRdf6E zu;QK&GIp9;%S^kMtKciWWny0>N%AX@JT8(r5iR`|6nKJw6q{faXrJdZ+1PXLxyNRZ zAgOLbCq{jp#fL7AW@8!{pTOpU3V|KS10QJ6u#AN<2QrND!THH&CfqE7NVEbyy*JPH zS!QXqH-pzWjm;Fh!Tg91Y=`gT7-4Izk^MTCIBs;jyanSJf@OHB2C@_gokuBhQwzp^ zqZ$2quNwBI7(q@3Q7|y5$--KV9`0f%nXBP_;A*GYIDtM_qJ5 zB^zs9Fh?O9FAX9ux?h4Yoc5bkN5&zP7_{hSmZWR`*mGT+{KDjf(((*-xA87l}m4A*d%A+2FZg{pmx z2aW%JjlELmV5-yPf+0kP;7}LsIMOvuhSCkgIv9j2XvJ%b1yAir`jg#De^!%xO5;9{ zi#x~oxG(2v|Sh_iR~kvc?%Yd~L8=)}qn8yA@lMyBKmsmB6| z(h!}3l! z06|A+3W76yhZ6ruKYH4e#7fyEK|eJ-NyDYy^xfFJLc8(aszY5E8mOw9LDr=sb%MA= zi|=tT$sQEk+2kM@vXw-c&`gI)Dhm=5B?s-7BjSE&`5EW12OOi$&+( zN6ui&@|GGXmVL~*SjqKtt~-Ms74EJC7-A8ihHJ}Aoo*G~&6~@H{YsFFB<^=}TTNQ- z;etX)@;C^JT9aNow0_}atXR&!8R4Id$+^nOc(AjWQ9CJbG5qd=?m)ar3X;F(lO>Ps zlXTPhvoAtUx4v1++$){V!x=7{DcMNV=0fMw7k8AgB1)ZQx@)F@QC{R`XkdVQ$9h$l zW3}`Xx?sNBC?)w@bcwmDwVlM!H?O}ZX1sUSUAIQ{V_E>kJxT_cSW%Wn86YX)yGj>}%K=MOrH#BdR`G z2W~1(inhz^n56DjdnSena=tto(H3&Izh1~y%)87|Wh(lSbU#$i=*Vk|=U%g21f3d0 zReSuK5wJj2v{15oQm2rpwng9?d<)+^2sv<)h9_u?l2>?bD9|8K@uwJHDFOHpIqG_4@Q!Kzf006 z*?g|B&W8B@j0{v`JuH?OgcPtsVU7`KCt-GBv7s=eN*NmPcKDkKMO6nv6Z)8DJhiZa zOUl^;!jLh2gr)w+KrM%qfX0>}t-+@|+g=&6SXG%VNhK!%=U|Xg|KW9dHY7oG#_Qd>)H+c}dP6m5 zr5*v9hh=9N7dqb~mHnh9dnhy950O2E(@;a;f6&8(Qp|Aglw;WlUlCFH8W4>m67NJFZP2hM$I5afalE~nxqTg3b=7xB zk5Om2m9Lbsx3Zh}tLE+OtAYc+(uWgdBoLBr32TRGQi_XIUq;H%P#ErIy0Z13LA?g8 z*R~k=++`lR8VoAfy02Y&HmcHpHW3MxA~?X%v~!L%S1!*u%2{2!TE=s+MHDHQtCkW# zR&S?9pooSSoi(c`2sTJTZy-lFOK=b`u6`BVH*FOg=;`8A^)uTgKIbuU)=D`%L_fXC zPYE}$Jtmo{wMyKiNKkA}y6eeYPLr+dZKisgbiSwhVk0Iyx`C#ZO&t?to{wl%DuUNVAjF_q6jed)9CD4zda-H;X z4TLD2QXP<9<0i>HY#qNPdhDF3R3JdV7g9uxVFt3uj^#SJej7hJa_4{?xe(U?Wf381_pTy!{dQDK^5j0{n>I4loGoiW!bDi0BLeV zF}$3HZDnOey`|ENhe=kDvbqg2XpcdIOFuKi%{jBvpQg3%edS)l^a+ z*zsyPvdbexrT^Kbh$@0HEd4WmF{lqNT&Bk+UiDjJ7Yax5Kdzc9r*9t~IX~WAu0Q?B zHhCO=o5|RUK~H>ROUsAEPF#w3q9?c9_Nx1i^3opMCqI46acSRY4IY2%^2ST^Z&^`r zoU;040oU1wET)%ZQkd~8AJBX$Qq{z`YttH3!xpMA#PXMn_7+UvF1{koH=xX4f;=5$ z`tXkq$Q%%GdjuZkIoYYf;F%LY`fp01vqHRf31xx0i!w6`C9Gj}XX&j_N{3g3d z-;2s*bd6;d=nUo=a@Xq8mkBn(3g+t&UO4P6s21q7?6zj;SV)b|Cm6iMVq0i6eo_ht z*7&X^=g3prg4Df;-VKp48`R6Wg7Bsj9du4tqVE!1+WTO3U$jP_#*$A|wpVboRAL-7 z^GZ5$)q27B{;)E}DBQ*NRjJ`mlr!XEmwbTn#b}<{-EJ7Rxi1NB%Fsc>c#pJ@@7(pt z*yxn~qor{p-W=#E%W#}+)Ov40sO1H+D8DJ0gVaL;_ZCazSj60}&ENkjWD5-C{g8Lb z-yrRbpnHfb9>ObS@z2FC?+aPCN#Y#3yrb$rsY8Vh&U|nmSW2MGr`q@ zt3?yG-n=Z$-sZ*_+OG&d!Hbh}%s=skI>1Ygw*IlEK{!VCy%yhF;E=2>Vf|=rjB!M3 z)lr76CtZ7>O0f2pEvnKaW^WzmN(q~RUf65% z3o#a+bQGO1?~J)Nje{p)JtgOHd)wvm0Uc)ztv z=j>9G*6Q-ePdnh|xlkSAZRZm;zR@&7B3O0b<2_8i9_?C1cqT`)rIXkvr^qURjL5I7nT1zX^A6^yYlO<;SM2+u!;>lrJQ0(u#kP2jbPL&qySV zgk){!L?wk}?N9_refQf$W$b(hRyXbQKZjDc7e-PfpC1)){m3TuswVKq^R>z|;;l{F zf1XQ;>=E1dhqZC(t`Cea#ahgnQGU}|;e^rlN}~Eb%OQy-khTA}qs%>lt8qeRLFK85 zN`#Lq1k9NjZehkUd3J>v)ZD3$=D;6-#1n+2HK}P<_SxCogAPWf2YlbuumYRDm|Qu{ zePUm^z#`8&Q)7$;Fu7Fiq3A?qpR$Dd?o6Hev={vc0yw!GD|DR z4n|>C5r$JmT)cXVSheQ{=bKta2CWk@-rJvMT@(3F^NT^VXv#07RcIocsGI~vd5n?z zs;rwmlk!UOn4?{{0N8SQETW{5z@57_>yJpK>HnW9ojVO19^q&qC$5Awl4`o^7+A03QL7J zPc6+J!7g>QU>sV@2zobdfEJHkTZs9}C*xpJWP zz!ezfy|j15?C#$HK&=MQhtBg~+Sj~Fq6;Ie)tqKEj0QN?=2-mhH%twqd=ghaQedkA zS8W)0VAYh&w&m=sMpY7)Gxh|FUja^c?KfR6Dwe@S2JSznW@9#`|A z8A1w(FSA8HC=Me5F`BWs&gLN16^s7^TZRs6%cXtefC30_&J1$j=|3 zEYR`7Tb!sg`ea;hPt8O1a@;)M;=h6$-k%%+s{%trl;K6#00bnE^h<^B(O;%W3!i3< zB7c(vIR7)Wt8c>;H#qnBzF_9-EWI0`!hVozQ9Ks;t9p&x9Tz|Rq-b$HCR?IQfNPAJ zcqQbOsk5!15I`u0>2Lx#D!9LANGHFHq6uz<34zslpE2MCjtJ)(koh$AqMzY>J z$=a*1CBWusE53&ttDo#-`S9ca4WF(&THR0`!p$n4M>rs;4_#tXyh0t7PB zl~rX*>XslCvkYIqXt}qd8#W^o+us3SI7pp^?wvfDVh&oBLJu}Bz!eq zEnToT(a*i93K$O?(SyVXB^!e5x4t~7K{xaUne7~pkv`3}y?^&VJ{(~^-Ih9k{PX*B z?|@>3ZNmb>gojEd=_Q86EAe7|1#=rJ8AWHQ%vH|`zU4>Bv+SQs#L9Y1OdW-aE66aR z`uKFpMO0%m^e)k96B5>>@aq|`OL?LTjc!te?PD*Cv{d}|$ayhVi+Qc=m$-qe0*05* zE8X-IP>-#i4-9YUlSUJf%eSOjFxP_^@~S$SqM#TADwKDLYh9r=jK3eq2${?%SB>!= z1`8wf^>B&Aqz$x{oDQw}#AeojH2nI(l}_Z<_5mcka%-rn6q>a&U)w%CA0e*$+NiY) zDVo0^;q}aP<`|!^v4FN0f-x2_-WzbM7i(BJ4nS-mht$L&Eeo%g_ z#Xz&@5SY7{lGkdrD7Q$BqTrT*PYG4s%XUroEi{bu8r6C{8go{t+`=+nw(eT@&?V>K zr*w^dQ8Z8UaR$iGzsQ8jY>9hLb4g_R*o_=bt_V}H7=k%XXT6#P=PCF;tNwPALipV> zDGYXy-JjMLz` zeqYLM36a*cAR2B4xapQB6`W>19_o^&$MQ?B%XI49&*(C8;_rS_`R{Hw`)~uKGP8Wu ze4;X=mPP}AEdvtJNh$AJ`z1fY*TSVinu#_(50NM}zvi$OVV=`;{pKt=JnLf4 zP#v0kP-6y_3Uh{nSKhVP1eg=NM-6bG$nwhVcy~Ko>nH>v+D>) zuq+#G)}JTRjrSGL-6>=?q{rsC#bml=5vG(QXb0stx;bs3yAoeUTP_2(K-ODV5c2x{IK5AE(InK28RZ|ih*eioMJLcwf^_9>i3YWg&4G-mCw z?+AmU5tP5~*ymV4hmg!xh-oeI3D}tzG>LW9Uqj>lQ1~18Z zC%vDe_kiKmQi_+#BsG`c6}BGN19V7OtfZ?7nR7IQ{)(k`uXp-oMVx8PcG_oW z^wCIV%CKYEEz5&O6!At(R-HV>WP08DT(d1BzELYtSn2&MG{ytg`XWh- z72;X^B6BL<691I4MV>Eld;AmgX8N4W8l7>Og&ukk=I#oppIRHv^O)m?sn)J+I{4F@eeg;2=Jdh)F+qDWzO- z4#7bq<=noDjawtN3Yg55gm>X*6p$jtf=eF}DpCK`4TA0GD92;9D(~A=iIT@qXV|sZ}JZHt|J_!;{3YA*m zwI*GLud~#ham{F^{KKuWufX6yKwG=SHHY(1g5>ROZq)ajQ2_+RBDj)gkO4Y&9AY}E z`!CoP5P{7L=TdIUXkp5H3K+wKhD>teiQg43Y^OnFg=Yuh%0$~rZd1L>RG{04pktYhTie5O4{s>Sn zhF~_W4FH*Wz)ApFG2mo;RWi@@$`ms}2JR=!L7P5z+&)^!v=q08cmEW%j?F9_Hi1C@ zgK4;daW8iVyk}7&{L~NT(|G<_Yt0Vu@;9s!D_B*hXGCaeshFRkS zKe49(JtJYhrpIIk2!g)NN(Reew#$@Jo&#KuoLF#~D_4&#b1-txOJDR#SlPts~FSe3=#U2u08jm9TM;r-6;C@6zA`C|bD4cR4VUnY}9&w-x!Wj-YxA&q@v*bhbqZ>6Fo2sqf5} zLw*lqP;;%Mw}V6wXHF_Z|GWJ20JVtnxqCImZHz&DPw78!yKhmxOM=}MbQp;OD2s2( zqKio=aWqCBPw1F83N>mZgviT#14z+_5Q$`iy&+G!Em#)lZ&BJ%6&Td%sZIk ziRGfczetjS*6$vn-aQQ_u1)---%5^G|V>>2hMB#s!_sYDz+EuITBM3P)`U(?4}bh&|o8&&K%&72JxZURC5zAa zwAT`TyXXqcV42^_JQBpwAXGvL${wa=8<*vml8l^ucgsdRYV6WYc7o4cP4HZq%limW zW&e&lyG&0fVLy#|Ru$Wq?M)s|I%s*FSpugfmYxpwoMjPCc!EY|VFU9Y98fx+3fdt4 zyT+}RLxqp&&$`<333o+%rsL&LeYvG+k-O3=WS1r8G`=k=g~sD|8~)h`46Tzps18?RvF2HSJ2e*AiDub z+`9<;2{9`CS;!Fg`sfT=kE_+GuILQJ$fT`Ap?SU~0F2YH($|d>olzM|KQ=No-KPgI zK2t+;83ulT(Q&l4dUAi^Q;5VBmGv1iw6t-y7=I{ z-u6W_{f6fQ?QH3$Ht`jc1k2YqOM(;U zrPY!Gt7#nsw7!&!&8P@@^}YDoe9835i@O2CQSs_vN3PVb`&Fz z5LVG5?A+WcL8LprvPtrTO)c0OA|AJl6p%mI;XGjJkC8KmpVBCQa;Um~@yB$CeB>O( zTj-O~XC#Z%r-#HhifK_~0LG6OKHk?Ant|Cr4+szkFK2&fMEn_2vFhM|th|oobWkn} z^7T982$A`y6U1qRR8Qg;5j&IBe;IT5f0hq`GQJtJ3rZqxMvDL?m24l+l|~$z9K!(8 zDhKjs6t!})U=mLLt7}&^n;L07$S0fOMHTGbuQkXwA1TOu(-oY-h<3tAzQt%BMSwRm z{wW7Z89DQa;i*Lf=<4tAjw-BGjI#PTX`mjXIouR1z8Le% zGsww6!vfr|)}@Xt(9fu$0}EXGKM$(S#9c=IFubFI26HuUz>Wj7v)^?L%FZ28e_z5n zT|}*%ir^L=)*Xol*|boY&!d6*%1dnIPGK`%;&W4UR@v=An2uo0?MgH_xPp_7}lMWVkJ+f%>@mS=rUL~S>1 zS-rfg5KC*0N1wXhIFd*j)cBQkp68(Y!*C+0N^gz0t`D;VP2Oh(gO= zp8AkPKB{(kpkJps1-ER1`aw)#QT+j2pI@|wso)EYWCh#AD+HKe*!4f=kdc=wZiQ;+ zOG0kfGLgv3DFTc8(wxZP$jCu!-`1$hS@y)^@XPIy=sm~ZM||I0r$(}D&$E8}z=U4a zZ&^++*CNNg8%&o=HFMSjtEldC?q2LKE8#UvN{=94A;P3h4#KL<-Sjf276~9O>|qqb*C8byAQ15Hz9~cI%A9E?2c-Z)5kppcJCRf zpNvG*ATnLUzcKD|eqLENAyglZcojNE61U6^oWACVd}K|6Ph`LphREO+oRqc~hT^5T zXcCuw*D8jmTniTQWaiTn?4l{1(WiUGI6Bx*qwGfw5^$+(P>DMHnI-3;P+U$hYn6IVHnv3WU0^Y?{IqOO zUt%0l<5{6Wu$cdF;wsK_@sGF-n-P+vI|wM)E3|)Vnh4P8ygYx2AZSz8q-0L??@Me5 zu2T4eoOBWEB@D3;9$`%3{o9W%b65OM_M3v|>ebmp1rFUEoMuyokMXnnxXL zRAstrb*prP{%%C^@mkstbKWogUj^PYLXg|$DI@&(5wXKyQ3OV_(a=F;p$>6?cE2@@ zI-z`#T=f8;7EU`9u2aoK>Ow+jA>KeWbc>w>i)~)$IF_P#T$Cp~8<*Du_YA?q9}mZ% z;;02b^72XQ^2!hLvXX@nbkC)pH6KhVyI_i&-<$71RJ;tKYZiN@NHHcbveS8ot8k-j zAJnXUg_OXjCIlIf4xUy{l^?q+Y#ou1x4&W(l$zZLJ%0fhSR;(M|M$sgnZKMUMkWy} zmLkF|N0uH7+rI@<1p+}txFTW~^-UnKPe~v!crhgXiFoxRh%%ynLb74a$ z1$SWy_nD>DJHdUf(&TSAy!0hHfszoekkViK;B#ZEk%J_JTcu{jHWhyVVB4V-*a@#p z1WGCl%bp{I+UK#L$Meh0!w}oBFOhVM;rdgR=YlyCIp|ziKnjSZ3F~+nPVo?O=Jw2a zAYiE7GOVHx#^@m#x!v3-pe!U(KMTHl2bW?*2NL*SX$ zOnSnTU)}RQ`BbcQAoFClhbV8~gy)X-9JUW1D*SI=1(7PkJBMX6UU_@DCua2;@lVB( zuTR{HhgD?(Q7R$~)o(OtS=wsPXG-vY2^Nk3FdDgd7yLiWbnQ>vx~ex%0VDd_po8R^ z-^De#rU~NFq z{9?PwCR0T0jj*}FjV`Xc=H?X6|j!kS|yPk!<3!nDT_2hq5e&?$N=!|Wd_q`D0>oSre zPQ7g}&wzQQ1%)2vr{XfplEb$wv+Z^Sj&EV#ihQHC3qe(IGWdoRH($7u5F2Fv>6r$7 z$3bSl(32?LipS3RxvJ;_IVB-kY;w-tLy24Yo;K_e522Qd&fheKRBME0q3RH8-|L~hs%f4$g53E{>KfTbUoGw*AeQ|Z&bKJy{grQu zxHXh6jPZeZyCw_%nEbA6uWXRx*8tN3dXo9gUk-WTY5_!XUK~I1O31zV5uackM$t-n z4^v+0h}_9sJn-4WXxod}5=8m#vt2rYhEUYyMsUftn{e8m*rvripQGo!rHCmBk=HeL zE#{-Xe{}}zVfowdmsCArm|kP5d12Q<07Dm6?VD8^3TyZMLxo~=cVyG+o<_J}B+T#+ zYHcki8)h1~-Y!)h^#p&eKYx50sr`42wf^q`#@}<^9mXb5G!^EqG`MFL`%(6)`NB`m z#NP!YQ|*9tjNwFxu~6{t2;I_++2M_eC(j_oRcfK|YYhMDRZ;0{cUhC|aCwu=xEV3! zQiXGCNEyMnkA$2F+MI~WkNuFIw5&rncfG^LPJ{38&Q}Xs;TMlu3DRHU7;&ORVy4La zwbUt2;M|Vu@3w?y)Z-BDRx4lajf3tg=rmvBU8NetB9J&Ei`q;JMog8F{5){j#R}~y zG=3;As^w7GalUFcd1^1J6_qceEUL{gaSXK#oWvPBK5*ESXp1g9%dhS8xGKr74N_SC z9&rxmAA64DuOf^YfqWf#;J{_Br1_W3|IulBX`Bk)|A(kd^TnmC2^!MlCT29rmBd|C z+wby&$__#2D=pOH*dAcEl71@Qu|)#5VaYJxqS}viR|GYEWd01&T!yh<503XpDx9wf z^dz)C>|sUsEWQu!xmcp!vPC!NW&Qh+6}?)$3G2CgAJIcN2TEe-7FQz>^@-}4{rPFNH>$_Hwl}Cpbx$A-zUpkZOa~r@bwCRe0p{bqL*ja@5~luSs!#LBVSWZzXEX(wXM{7wu$|O`bCb zpRrYLtJ=QBlMZsaU$QN<26d(S7J6QeB&L<83Dq7n4sX#i)l40Ry^(Ds5~~p~ywO|q z>!>~BvX#VF-+~|NQCe0pSB{#J4{Tl9B?7C5t>fWj``UhLoEqIMCN|&PVQA;EJNJdz;uUBbV zq+HfVt4ozep_@VtY2kX5SVuC5t@+Kna^wT!$nTr+&C;Q426Wwo(4v0}ZVBi*sf~G^ z{zV(ee-rWZr|RJ$#t#w7VVScwb^P-z={jpY!bt+HYeMr2BQEd37`_AXrmW1p{41#Y zu!-zD$>PD)(2|%6hlJ`282~+CmU{S0V#gsk5KUN`CF|**1Q@ZO#=F);*P@ok40Q`h zT3L>rx|#@vIBs!uc7=$`_I*E|WLwPdgMW>R*DYZ8YFo1IdXNWI`<~46u8;z|n_i_D zm{z(HnB~|K%g{I!odXLhnl7+3q|yn|!D?$a76BRntf9T4S4X4azpALJ78Jw z-&u|&7sZX~YcFT|AUN`uw3$mTZdl*|bz sas$UGpc4~IZ`&Q{421Mb@F>E%EURB zciCRll9lHK<>k|XVu((MQe7^xFfRoDY;AWemPr1QNlHS9yqCZNEP`W;q|f`8qB7Dy;czWLe=D>!^jK6u6@yr1|PACkAWtPCCgw#l^=t_@M=aee#+yfhKJ^hHN6v6{Tp z@<`2?bwPEy4t1}HsUh|A(JEhwa7Z~~NSw)7!Ef&5>WSE{f&{_*g->6G^(3r z`?PDgSZqhG;J3Js>N8381FL0kk24)Zzk0#8WK}-lVOcuNii*PLZ{wM>~-epQo{ z&GVof><+U?PVw(jfiAv#zO5O$0#P&)*j86D7Z!u!iHxcURGAt%1=Bk?KY|Gv#YT}N;^ z;gEMN-hrUd49>4dG>9b)%M0bL!#C0#j0IRozk2T4o~&by*J&x#7^1&`?dj~Mt~7fJ$Q9m-$*|& z8gK?aXJ8C#@SFV)!OES^XAOLmB#gR+58000@GVNtGJX@5tVbCTcICQjPB&U94wB7G zn+E>P*HppP?<8;jS#u?Ck!lUVJWuM$~WQ)w{zn-aG_^*F)yhZ&(%QH?J|9@}e z4br^?ptOcpm+G<=<#=i!dG!;?U}fp-LFFUX+yX}yR%;T;HjX2IMTsUAjZ{m2e(IXE zLvFJ!s$*zi8>4~iH&^r)H=kkNl~ugM|MvQ$)h*1+Dqym}eE|aS8r6;(5dw$5mnmCm>(UZfMrNE;ovI{UQAz`}48)|)h9|t0 zRY4!bmaHqPAMX;YJ@DMA9N#IU%~%_fjcWfR@bxBg)8gDvU^lAX+1YzM}D+~A7mp2T;A|iVR(Jf zwrCF;B1=*&F+aw33|*>x!~9yoFI{lc&$O~!O3K;(ir8)_5|Wv2HAMm0aQmNa^6Y)u zUaaE>8!qdH)KpfbWq`b8JDpeg7BQLzyI>~F3#!L~D zN6jQb70>&k?KQ6bLZxMmOgy~DRaVo=U4fPHhFOBo`2){~@%}r!fn@GTWLfz@s@2FM za4_nHF_ZU?a79T6exM$yWmeMCz~+VBjHNK`P6^e&POxn^_XbH{N~GR0kn65VHntLK zoA*;YaVOC+R2;{~loiwM{_9gxzNPgLF;g}gkmih_8Hiy}nO3*87o&x=l~}7-R@*7K z`WpG!-vGB9y18T_SWUF8`-4USJ-lA;-TbA<>Gj|4b0CUBqE1GGBFltbW|we7yk&cZ zT&eOw^kA@&4ZsK=jbq4qXqv1kMgk{&6qcvpEJE z5I96wvm>H)E^95u9FeO*Y#If^i`~+QyVi{A7B*lPq%k)37Q94Y4P2#VAleg_4&fh_ zWUhc4kA-?OReH{fd*2yW#`)ZUUf&RsP;PwTEIK3^v4{DrLx+A;Z8BW$ML^QIXq)tll7UjgUG)HG;tig153iFb}t6_6ZXGM`> z@)B264=UOB_p)nBb<5xf%^`WfuwR)5Mx-4QYaCMNzPYyeu*pA$uRe3-^t3*RblJ%p~r{W9t~dPAL#+4n1Buioi@X9wwTt`_8{)ix$S zE+C}TdS>T3I{ee}nsr4q>KRo1vt(_gDL-H342`y2Ac}qATt%CZjF;v2kx%v^NSWfwPr6-DEk!NsIpHZ*-D&(FD4^ohr-ecrDxdv2r&LPZ{NCClc=DahnJ(;@d~Vr#pUTr zm(z?@p6txa9g=MdM0ND3{z=zOJ1KUYB-JVnC*?*@pz*>*ecODk;?+=I0uZ65ZfRL@ zB=X}mu4rVi{BIT^Wx?e!)wAEUq9~^%t;=KPU(l8IfKvufKg8yS9EGyAO{#_g=#j); zc8{uh5oBA)bDLTjn=>6Am$ES~E&`{17^x_BD>$ubNPZOGPQ<&Q6kRD&xy00eUDKF+ zr`_r#M}+A0hCP|W3m0j}`?*f7ZX?odW2zS$`0boG%VS%%?y|yJS6JVc#}FY~a$at; z^aiCtkl6kWRclKmQQ_Uif3JrW#CsQ`Rycv|sA(tW#>oBXQArNrm zN<@h`pkE?a%_>GWXqiDl6~E;_(Pm`|}S@9L6i$W9V+LtH{Vx+hhi z`)y8r@<1d!`5YoGcgBXoy_Ofx-B1m5L);5@-BWF94Zy9H{(b1rT^>=AhgqQ@8S?t% zD>iBvuBfp7TLv#TNOvk{L-K=w!oKsJUj~QgvQgZ}_(eL=6-H$MuYSqzrcB4MqGP)x zob}04^>NL)K*EQWX5Tp^D%9BDSR(I_BB-OJCC~W|{vAm`Il6>5VtM1t@Nv;8T1M(O zfqzXaz+UL2;D_v7kG#$sTv3uYkKyH=eX6SIwh;o`t7mn|HSYc_BpjN=e`Cr_d8gT| z($2q09-K-9vn_o+v7{n?WD!N_?*6J8=GfG4u3G+nB5~%<3}}w4W=xx(!@d`@T7;Dlav8A_AnfitE$wTejM zIHXEQMXKY$2n0!d>J8A6{fSnzoN2XdP_;qh52+|g)d4`rdzWE=-(GltDP=F%%(wyO zfCF&5Yee!%bY5kgzm zQ$nFlLS8&|ZcO(5$FW-uw4lc1-=nGmW3zq)?>(b);pq*^r{iRUn@tY5FlvQ1b?YRu zwb|+>iJK6kh&4s3%99w8tEk%axS|}p$}MUe{{=^Kewc2qq{Ko%=uQUq(DK3E&&gHS zkEj+B-2=JbvNV zTeC)U1s0pgzRn+3z>y3*OQJ=7S zC2{bR#S85bz`PBlM9`~vzHze#+_)*k;p}D|A)Q0?+^yKv8>d-wr`qK5_(an5F4xEb z);iDP|9cY&L*JQ&yA5We`&Zgqy>hpPrrM8Gy-y!s-(Pd~3|Z_OMZn6^2UeX0AvBH( znBE1PhzamqcMQ7iG4tlVnmgaAzhnvlW%H|jI=rvHWbvct%x5{wu7#O92fm&JI=_N` z-nQ6xP|TpkC>p396t`B2YVrYQ?*_BuL9 z6EJ;0cHVP-f0ccZiK7>T+6klc**tbWR8-~YP)8_!tpL;aW9R9*XyF949Tf&jdQ~s+ z1wM8@M3@4U5Vf=fOuxg&&hHYYxe1v5_#Zo;RNi{;To9(nr8)FF?L=hQ2f~y>)E9&) ztMp-z^#VA3?0iZSIh&MA!1Vpt`CY=4HDWvhrl0F$=NZ>}D_{ksMr3;Ti;tb(225Xj z?0ibl)Tqs!gDW}Dpg}h=WkAe;=@0qX`AdW;hU6JAT|IU_HwjbqVd4QW{jMH6-+9N$ z{5=t-+koleW9MhWl)wE1n7$u7uO44M6Q8!-JTKX$&32~#PoN5J&`*!kKcOk)!;{SqHLU(9UqERafqBkl2{f9$*; z6Q=kF#D6L=0g$Nlmi>nEs6)J0Eg}f2@z4 zkDttA=P5#ZKPSypKO{_9AhE{bT%zWlc!ZdXqq+Z}gydOJH6LU?NGD%9eunhY_9y{N2^BFyMJ~s)|U@RQgSO?Pg zW9N4XQ`#P5r9qtjm>xS{Sa8$(eb$eiAKk)y226hfkDZr&1u5i<$IizgVXCq#U2%T; z(fhIUH2Hc9z*yu}q?)wUi=A%2gU8O}S3gqb*Z=Ux-{JJsfB4sL|LOO?`px_F)R@~Z0q~<)B(4K3M81iuwzgRBW`~#r>Fk)0dD;X zou2xif?Hu;AhLz5vK8Cd$3$cbP;2d13dB%(>^jthW)kpav-=ys_HGz@8 zS@po`Wj5buTYn^{ry^@9RR5r-r|P#uH76+`rPd?1^?iCO0~Q>WJY`$or>9EaqhY|Z zrRez~NMm;J+u!BssT8rY5aO6^W&e-Tb-=B-8mZPjw)ML>J(WHk!@B6G^kcU5r*?X3 z8xDhu8Xg`4{Ynj(^LO~ z#+JjLX6X3RPEU<1Z0iqpdTQK0Jyqr8v>xB5rw+Ik3V}o03w4Wa{k~354KYN)K1TBT z@rTd&^wbl!b?@}lq*Rnf(581R(oXj8(^KKVp+=xs_9M3SeR`@3ZuPjrw!TkKm2@f< zTgPncXL)++IotX^J$1mX@6%KF*jC$yTj`3hz(vKV-CZM2zE4jLW>$=GvbnI!w!TkK zRS^S{fi+dq1X;7KpVBZn3#_QxKnFYbK0S4@8^#*so5u#*`l+XuYb^y(mU31zv6hNQNueguxOaOn|bf3UL zlQs|v6Po0O5w-%_*bA&X$dZST}hV zhCDGUc#5me6g`8M zr`s@~5e6JXu<7nfw#Kfp2p`rgjAG&( zd)x+n$Fm)`wc~bkqfJD%$1M=CNapNkjS9UH3m>Rb$I#nN|293p{^j~Ir{|r9-e^2+ zcLJwXx_0g8hKU6Cm{nZMs8j;REEKAr87*1l}Ie<+)ITG`D1xoq1N=iuDtU0~|=gqG&rT87uMiqq=Wb)&AkzYXwPh6In2 zZ9DlOixpeN^dA@nV@N0_?+EUNgaP9hS3@8?f{QrL62=Ln^hZWfZS-ubX5KFpng~VJ zfK5OtM8&qmPjD|q0%6Q}!8RocPL^szqF^M25+N8Usf&^trfZ-_o?B?@y;D=)v3$>J z)$YCqW&wFyLQKbMwcHXemJnCF3Mt{puEfh>LK5*`Un`)$XUEl(GKx1PHT?RZeOuO( zQoei#@CZ|qBFjKv$rW+IdL-!@e}Y5gTkts}d=UxU4}`g9JfwdCqQzchz4JT&C8iL3 zRSx-!5zio4;x@m}>}hYk7^Hzm*Cl<@(INas@ zfygbxg?}zag2~2PM2RFWqI|WvjD^4(Z7)(0<1MF1vIK^iOmoQx8?$pt=S9M}CwRjt z2W@kXccBn4t)i`GK(QKIXs~yjzqX@0hF}QgKW8+Ep zT*pUtwqew5!}*PqZ`V5G_J&+Pn7N+r?vt~u7jk~Z#f~J;EUrBM5EY#F#8h7SWcb!A zpY$D-gPZlqebP@&t{3)F-8E+~SbfaEDca} z1jJLk&Lx^$`*4LhT1 zr)vL?3x*#q_R|G_xuorS`|{W<6!FD85!(A&Q4E4&DJT;#Mhz%25KN2CZxbCbkq1>q zC%HK24rNcV1d%+={gvBMO|;^DlcK{2Az%7j|h z%a*?7I+N*55GOt^{9Jvxt``>@6d`R35CPwkq^Z>5XtbO=a%{atT(TA!vX*R Du-(Lx literal 0 HcmV?d00001 diff --git a/tests/builds/zika/results/sequences.fasta.gz b/tests/builds/zika/results/sequences.fasta.gz new file mode 100644 index 0000000000000000000000000000000000000000..07cde2d369a22b99ffacf4be85afdfb8f3fc6eb2 GIT binary patch literal 22414 zcmYIvbyOSg^L24|cbDMqQrz9$p=fbhq__q5;OOp-hH=u zmGERcM7P^J)C10k6 zERU7z*XiwJ+@Qz^<`( zZ}h~|sE9wimVx|GcKar{`ggj;h?@Yc~s zMNK(4IE9z_Uw4oaUKWp%N}|2+Et-$gJ^8&7bTg*cNtGD62b`tFf?Z~!WW%4l{Y3qH z97Weh7}w3AW4l|7iS+ZA3HoIE^rx*ezx{exTVNzx>x4sLG|xFa#uP^B`s&xohphv= z5l}Kxyid_BN2XXF*Nq#Joo$%W$P&it!hQ-X7}&<%!(8KU{poZ5t*Umho2RS7X`KH4PST%-8}x3H-w6pmavKbV}yfl zY=k9fhqO0tZBhL~EhhHkOAhfGDtxS|XtmQ}TV^!PmbV4kX5viSyTsvo57{NVhdLLMs1UkR<(m#&+i3K}U2=hVA1sqNKz6l#o#< zb4>Zg3;R(0o-_wj{Dwq!(i4N>Ojp2Df_Jbf3hpayiB!K^+-5V_mIRe zzOuwdd*Za1w`&!nfj`Jl9QkR2aPGSjuiX$vuRHlKw%xm=`$}_E=Ft$Imt}f(Zxgl5 zTh{yPTk4pt_3z>zP7PO~!mQ#xEMJD_ZRjdNG4X4qeb)iwa927*+V@)(|XSI~4HnChg4j($(#1O>2G!>?u zW5Xlu$W=4QyFvO~3@7;1?w4ZQ3v$LWdBVUzsqpf+gT4pKu$*hnRthsG?*Jk6g5#u? zPbhtab(}d<<`Z2qpy~@ieFi9h8uw)5NsjBz_%2IYr|%a4#K)+ZRfcx%-M6<2!a-zu ziKC56n1!YsDqgocgyA2w79+GZ79|c1DTJpW_dYL`VizU#^?Lu!|=ouIH;t6z{K~3G5RJrS+31qZ(_xP)Y7*1c{l% z`m|ENV2k=V1Nst1(HVZnrw&yU#`D2HOwnmkyUFqX5|0bOkq2(Hgitb`i{77+0pd*;}*>x95u{)nPC%LM%=AyaKSB^zbUM3!562}k*_=OAIZ#dy& zMvIGiqn4BUY7Do=-PiYP6m)(RR$Er;gjM&yHZJ$f`)vT>iM)N@flIlu&}1v`wtUr{ zCx^a+4n(}uVC6WF7C}EW^uY)XCPE51@U(NhY2kvniiMb=ZfBr{M8PdOm|Av`vC(7XGfOJG(!^y4ht)^dUT!; zq7Ftc@I0*8A;MTrY`#q>OtWaddaK_$%Xz9OX64dj_*TVA(3RCnDmB?F4cTnDVOjlI ztLE}ce8MQo;FUBp!rfZGT?jieiDIZEqBibnF=nTWW*xsLHWaR1Ot&yL8px*%cH@BS z=c-M4S!h5ce*d=QO0QqaEpmzJw2Fp$+qCDrU%pi)#BLv;(8pf!asmk55jx5OT0y@x zw8IL;oR^F82-`EJpMESudm24%FeUft6Qi=!$;+_WxT(7I$>tx&rBqD7JT z*-xOmMMaI29hBT#ONaHS86jU!A#WA=!wr7~P6TKCPuq93pa+v9bE(`YfLIlAV-~VJkKh z-zSkrehyl}8VwNoOC5b?l;@crgP0DC04xB2*)C8PD>sI}$#4f^X4469lcMOy6(jnP z2S_ca0#ejyGUZ$#cl*t&fj4XfsQIqZM6d`bJ2gNL?YnOYFk6OZfeQrjR@f-n=AW!* zgb<}27w!FSU{{p-rW@3=g+HGQ$z_`je7iIrN$;aYh zhKr|RxENQh?U++PLHVvFEN=&It&0R7-o=C>4rxYhT6X)|>vh%}oVne9R%#7nJ=!>& zK|CHZ^}TX$D0#Z$BY+R35yuK62+B2XXN+mlC^Tg5?~z*8EQ)l2J}^H(+c}_;c^gC3 z9;2PL&;bEez4BVs6_EwEiaDJSurzMygRiW$~A$QKQgW+-bh}_O#uPHX~onT>a=D=%n@Z7!tb5$_AKgc*B|u19QdZx7yb> z=eQ3;k~l=S|5{7Z6OwYUl0+Su9wUVpQlsEv%}}TesrJiq!XgUU@ntW@{uzc_>O z?CTt9AMD4|V}0U*#&e?nysSpT=>sQ;RV9R6PT`>$>tnAV{)!;XWV6!LV%wzq*8yGi zd5RnH)X@nxEvK}Wweb4;Epck)TyC++$T!FW1-ve7)#1Lab}dU&j5tJDX|JV<*(G{O zH5dwJK27-3Mss=T=y0P{^-F|-z(q8B;wdd`w;BbiPjiB}$L=`6xf<$FM?ucf=+9eL z+d=f|mWf0PnFV)C7~vPbzn$XkfEV`DElhk|eJ^NuVe%MILUyy;8I)`8QJ*4Q{;j&$!cuu@gEg-&J-*KT-gPO1y{2xG`dR-)%SQO+nGq&(n&`w2OP3n^@0QzUQvHdrBnq;{$l&#Xk zPosZO;R`W~_Q?{Ji_MX0xPzp~(Rcrl{A9A{(dsQ1nvu9goQz*)m{?1uS;))d<@Szl z+)@3_oMsh^3UK&aK#}N&=suI60Iw7WyFu)h6^(Q(te4fP(jyuw%m!7!uyd)(>{pB_MKmMR zKA*95w2{n7h(hEDQg7m{mDFVHWB=*DBs-Bs3F4$}$eg#B#O zxBg|^8co;*$nK{gswTV={9)b84grqmn-$*K3lz*wl`;t|PDn6RX;VA+gnzvOYN4(838DW!0U znZES?4QeoM|HSI7jI;O*fBsdowLzKiRQ)uQ; z8*J9=2UeU{Su=Xqu^m!wE0BEDYB)YcFEp~j%_lr^3?a%mp?PiAKn|fdcwsTx4=|6O zBpvcv+~i0Rca)t#kP##7S0CmfGFv{)CryakA16X6ZzJ#0K$k6!)JnMjt&A`KK^{M- zHXuW0S)r}D%Oi`Oh!0=kab}1?mn2bcz0OZXm^=ilc0k+_HKQ*HRYO9zzM-&*6MjJP zHe-&|$c~G2Qs!4m&BwSPCN10_BpK&Qtwu8uX*FYY!;NA_ZFXN%U4Q&pml^I7FFsHl zK!>v9w@q#wCpK#<)-07pRx%r^9o&KqxbK%qtXzi*yBjmrlvf+r^K3q`&A~;a7U`5j z6hIjk-$+{w>VpcG=(dSd{@Kupz*6*$v*y<6r|;ce&%Ya+o~J`m-~TF81&Wff)7i|c*H&OA2niY5hI#U zuy&xx&d{b9-G=U!g&~LQL1Xn;VCH_nC6iE4vC|ZC8Eenrxd<@&TwBu|WTfC%=(f0^ zU0Zqc=pa6^K$Z=lUO;6p^JTPIzJoor17J)tjob%Uk-}umk5io@8oMHNsGFOZ8KL;N zLv*;7Qpom(D0W9yENADq3Rpt-xI7ui5ReufikV?w+XfgIp@!$@VOR*sZ=LpVpw28JL zLLsbeF0HDv+EizxwO;#{YH1F;T#SiANVFV}YA(rsG}=Jg37Q zXc}!_Mf9WzNCbX@t761YCaksp`<{b6{g9Z_Uwq%HKAKZI#^sc`3C{th`u|WtMCz)z ziTB_(HfTa|C&#gw@Wj?o7OJh0ra6J~?CNVLEaY!VQXR9eV(hR-daJ3tK#D=*!nDWY z!U=)r&_Zjt!mP?w!$>M-X9jlxQBdi`SY?&{CXarXTGc1Lnqk1HU1uDD=iO;lWy&AV zvW@nscAjkD!g%kZ0fYj)N?~PT5o(&|=lV)ZK8vd#XdX_5Wk!o0VZX^M>X3E`Pwf(K zkO>Po_d|*6Yj`fDG*OW+gl(pId~fpNYE6xlNNTaq^vgxqa)i>C=UHvmbDd_uC9AyX z7o48V*3-dH*Y(4#YJ)plO}GQE)revhS8xYJl((}?^Wgq7YyHkq#$~O7v7dT6NhCUQ z%Q!QPfCz!K%etCWMC_^ZWW5j%S_9N4`#s}PAd@sgf6YXL&j0wir0kAXd&+Z)7#=Fl zvkYwU4l^3zQIM)$wJ@^1GXL>-?o?Y-q8oKSVw8^fQQi|C@4-kXYY>jhWndbX^jaQk z0a^~wys`}*)k`6Kf{biqQgbBlaoWs{9AP@-2c{cLhc_&Z+Jwx)E4MVL#S6UJ?{y36 zOpQ>`S^|t#(31bU4z}wvIDPC^6`r`x4wmb=Pw;4(pj%F{7+sE!KFO@AgvsKQMJ?(X zf2aE|??G@W7?*KI1MZ^3d@?<_%OsB3nuQhkcR?Q9+V-4h3~Ia+rXKCSLvJjy5AvlT_V>6o}o4nHvD|E)PNi?ArqkO z`%;j6d^kZf=kZqkV{U{>!7Tw_;Qd^(J(|9Ry@^Qr2C|r38{^-wMKWPcD@W8_j&sBv z&6#(whvV|BnF^;T_PDdWK8m=gxO1JUZMtC$M2Gn|v%tBH0rzS>9bb9barz_{6C-n@ zm?${_nS@J%2e;?GTko5f?UGE^o_?Z~og890wTmE0f%1$2GY@73H#KRVYe`pU+-AAS z-|GW{$6$NLo)Qkpl%ddXgi|1soDF)W-pXs1M-$GOz%ALYbErieDT1m_!ar}6Hz4u` z4cD_50i;7WO^sr#0W_Gs6g@xQfZ8{8USZ%Ul1~Fmi;47@{U*0_xIJS8>nwwVM{n1; zrdd~KpvH@7R#9Iex^IZkHvDY|LX|4}vxXXh8uKtCj2?z`E_A4*;d!c4pfr8QZQiqQ zY`vm4ATT?wiV^;a=4{pV6-?e)_r6u^{SHFbk*Z1;b3HMM$xjBkbv4p5+=kE**#5I( z<9`3=T6vG=?q5x{^MAYl^#oS#{W~(JN}R{^4DIuX^nk%_4l!C_=;#G2TR+THrwNi3 zToC38^q5Zv<>MsvHIt%nf27Tr>-GEopvjf0*ENADr{@UKK{-~2F~+Oje;~o<8CY$) zL{!CCJ_K_H%q)Hz|626kEH>JwY zimr~r*Ej}BwrQ78kEXgVd(`CeVJ*^z8ts9PbY~|_7)Q0lbu|JmU_2RF8*2DEN-nXg z`8^e$BRwXW7Z*OwOx9K^5SJh)P5TdiV)>}BhUn=b%p(sFGr{chem)L#w?oB?oZn?q zDXw!3_TaWgJ&B!Y?|T=54LQy#U`y-Ue2@n>Gz=m+1J!r?cSS9zy4&zG>mr^JQ8N%; zRM4C;AO#3Rgj?n%>CY49i8{b^n41~DtC{u%2&%3V{=f3@<+#i64McDq15>}?8j^Gr z`#bzd<+`t3@)u~A6Ox?o&ds7~1nyJPj3O%p#~$&? zIN?>5G>jF^4e&pWtc*|;Xu+KlNtK~JR##_Eml|7;GavpV%gDO5k z?!^W1B-&-1@w;@7rtkmG(fpqmzW4mQP7AA#XZ~_Rc7gW51lL<$9jOsvZ(T33H4l2l z9X$5fnh(m3*W|uLuB-xKz04E^gd~e=Wt^Q*gy!j{%`<0kKJ^0RGvIcTAB|S|-SGID zl0UOM7@z03flp+vOXurC%_3}ENqm2Qp`f)aG8!xce|L0KROK1bW8Vuv6%`gSN)hlv z$s%ka90-r@%Ltq+i2H|}1v@!q8ihXy_&2eaDf1TC9Nku-4E;f)QH&DZkz=dV!R>0Y=jZ)c6!>s{F0~Z!m z`s9PM5{m37;Z77f4&8goXZz}rdv!=z8B3fl-Xwh7l1=G+C{4A0^Um-%x^IQhw&ukL zeu?48L))u`UZXl^DE$(`DwV4jgZhC}3z3C+5Cije+7RdAe~JELCurDxWPqX|qA{zx zgDMQBnj($VumSCZMns(L>N5-}euVpbA!ZRCEbVT&YYnsN(T0)>;!O5AvSXD#G%@y; zUzG%#qK*i&LX@IYwoxm8hbm2V<=DPK9f@|U>Ss{c#Uqz5|6TvF&!p`?o3D7XWD(#~QzjcPrBAk~RyHdq*M&RT2+=x}y zv0|jbnSpg4^NygQ$5ZP^kXk~jcK5mwD7?&|r7Qf(C{ngrGB5OZ;v9cf(guzVJR43>Hqc28lphX6dYthi)e=)&2L zW+l3KBWGQ^VRw@&&qzUWc_3#I9$JgNQKI}dL0#|fIaDb-+I_N^q7xDwo>^)+da#^i zFh7(s?BcB|Y}J9P`8@a5ysy;T31BASA~6*s&IipRDZB$@1gq?*tOmk~IJ1A(ofC-} zHe4?t1Fu-0r$t#O)Fx09llb^z<;p%Hbr4`rt&60J(2bI{N>2pqg7_I?J6@n!8Xv*r zLuoUu@-A5W*L_=j6sRLoU=%4xvufIrvh=GW=30@ncKDPZ25LjYDqT>sGHRi9a-qlM zz>^BCiQ|XTFy3P8f~Fe%Gi0!0M-ut0?1g>?&Rsb7^~-51$+9#kw4r5XsCy&tVAmC1 z6Z3H^4+Qa{K+-bNmD@H4fGuUbo&ASdx^>lcbD%~gjthf_Mhe(P$}%a+=yyILn{zV$ z6@@DO#ry4Dwqe4#NIUcCBgd8(kfk9=mb$;`p_VpM7sUNh1RWr<2ww^%+h|)437zdw zm%QI$Mrj%c$8;qdm;Snk>ny+JBA10&`9tFkKY(YKMX%t>B}6HFS9IbiW4-K*ajftb z_C9$OZ!A*t2+Y-`;4BgNgxTUfjm1y~hvndx}}E;HgCh zTuNNVo#a@%RyJh7zmQxJ((iFyS2FL0Il5Y7`N7yNAvq%urJI9D0tfpymVCIBg%0*px^k2V9PG+CR9)T4PlEPA7w%ST2YZgPuMErsm_-0$z zi{zu^iPNF}u*%G}NL!<|BOzvs-pg>xU+Lx8I3@DmN{{^#PQK-61o6hVg15_0D>(+e z<@q)4F!V>YQSUqh4=*;oobB+Gbv#nx-F2h!Vfg zf+#&;SPw3-tWWwOMR~_psbO374{<`h3K4-dRMwjqY#=Vw)S^+8A!tEB77%r@0r}R9 zHKRMol-SkJ5K{SSsJ@S6&?ux&W-R4p44LR%EP!hp1Yte3rfd)R zEd81YjVe&h>JXD{uqaaW3?uo!=yeTXUgG zdA^Golf2w3yd%zCw}t-!ueW>%_&x*pvhw@L`?!}!iR)YGsEex8>_nsR^|*(FCl{wB zaC5AqR9?uxcpqNURNlLO9r()3S28}B<*{Fj15q}K)f0wk|Ddx(vg{xKigy3xX4dC; z0@xYhfla8f_OHw|i zdi&S;7WO&sT>G)G5%70yVch+)50IQj8gJ(&NsNEGk4VR*YOUnI7FoK*=iFyvqMrk& zGa8TFK+)3GwREKA`I>0 zf&d&Sr#ZMt|AlJK`#n?_t^>SYIOsToqsyfRaOyBr3XaA*DE*Px#_XUIj9xML1_U5x zAyGCT*z^RB(aTmq8VC zcSOtu;5B^26|*<|b7~E-yq-+{y=r zeQ1ha(g{~R4MXFZ|1qz;TVi2S2YpTqL|8c>7*SrC$c{6|i>|SX|01?>n;BuNLey+o zu0Iqcp1v;=aZIl0QnM{Qh~4eJRCkXtXO=_Y#SY0b0Q7!|SbKs?-wD1vlf`C0KkBBx z1Lk6YS6xMVJE@xElCmV4ijN1H6WG7PN?szDoX;!0E~yA-w)!ixyYTdVW!xd~%4wH4 zo@DGE(t1bv--0c>s9%|IAF~7$aZ!1cxNv>(*d0j_wOq;ArM?oTPyEy;?+O>7BO@;f zX5;?T+DU-Q)g;~ z?RUx4%@&Ngt;AsAZ&>VBnCBLc(`VA0Wy^PFC_wQSg14$1GcOCtNi{+Z&R?xf+WwLT z2W|&WBbcoT?+v?E(3hTQ(DkgNU4D%k$7Eu_rj#N;a*ebn2E+l1nrjaSsjvLev~r>> zsmcR`yAbUJKAEN;`%r;u$@FQaQ|);4M~VnFA4htN;NVK=67Xat4nt!}kl?#J9Q(c_ zcKKaip`@T3O;*ETGLo?hTsoor0I5woCgUJ>Iif9=G;F;r-oNObZ#ZwU0@6oAi;PFy z_i4=uujx=E0*vsN&^5~cUd;ZIQq*8y)=gt;z0VR``#l-F{{+ddwOKwnZZW8t{Bi0z z7G6Uk&0Q9jKLptUbtgYUcCw*)A{-CWopAZI#r)M{%er zp{ts^5s4td*5eso!U{?$+K0Z0;910ctisv*fDk5^5(Beqj&lSv2F5(2im7*Zn}?OE zq`cNFo>db~56p7QLX7Zujr4+g`ac+eG%h8iL2So{ot0x*|Fk#WWN1X5E>bDKeI!iY zWgg_ZV>Op)LD#^we|SdffAV+>+^>K#8Rl|}n0V}1OALAv{h@|3Z z2(?|H*ikS_{y>_`UsC=$`nxWsh|xvVmmZEm5S#lOir&8|P6AZczBa|I3Ayk4cZ(Ej9#1QaywAqkS43?C; zk|nmwu#d2<_e9$LXz>{tH7^#-3?g5;1>)P^y>t<_Hlv?LqhWSe1Kw$!f%!P?4{Dm& ze~ktkgHb;W_r@N3c%iVT6b+1&>KzP!>LjKr$Qg4#<|d43&C<897@SpADPW%oR6-9(>pO> za{?68cN3!NXJV~lKH{@`2z-2)-i0t109M181B-AUX$h7Y>a8_kvc|B4LoD!+`{#{6 zDjFe9f?xl005%9lXGJnCGT-b8&Oyx$!q|usd2ynDV0vt1Lgt;QXXt>DL^rd`w;E6H z3INrcpQz~ON2Z*WPeew3J11n9%AZ|LD)!z-HJ9f!zCsW{9-HDiKcfYgdcVY-ss#*5 z2EF0xYH;h}3#D~ybkf|=U02=(5lAlQ{W$JU3HU1@LXW_61hu_hyIBiPPG zV6Q?dBW%mCKC51Qg=1I3T~^{7R2sx)q%BB+;73bE=U7nq=U9qAtg}ixuQi#`&J}DT zuDG%(m(wlr9lE^*|Enj42A-Ln;a>c3K$l&tJTOcV#n0t-Q|!b7`GUM;V1Z44j+vm?cuYi~i0okjrc_8o#Uvq#jtiW#R1$rVyyT*Jev*REet*rr=&rKy61Zs*Y>w5l-POdD{)3ua=`QX1XS#N?8jMCg_f8=l-T6+L&tbc zoEuUZpk@+w|-wEKPHd?va%Zn z73D8BcB(B-I@r3$)5;*`f-H8Dc?_ef{AI>}Lg%3tdh=`OXQcgZ$t6NED}PniP7Ss z>a9Y_)dd=)IJy=+lfnQS>Eh?F>WyOO^H{tqZ9O=n3Q8ith#zSW$dN{a*(*$|iSL&$m$C+sjDsLLEt+rz&fBMB@L0Tw#O&eMD0K^iPclSGoo7-mS6 zP$3X;obkw>fXPtTSjHTWy$zr7gO_ji%6!qt-9Enb=Q|eQS3eXTFx%ptq-3HhQqUG! zCc&cw7oR$TIZO^_3Y@&9iF45zIb))5P*LYJi;Nx9;c|#8tqH!lY;I%B5ka|fX3bO_ zA$x&nU{TrY5sQO!QQ;qJIk=93R&RnOZ$|1$tytN15l6k~9fa>NpMiKmRG}uP>r!HV z6Gf^T#gQ&AbAm95b28cA41PHZ3Zl7yxP^6ki;!&5U$D@HQ-@5nkT zSKG&Psqm?fL&eW0IJhZxSF#4We=ergwU_e$Aa@f&#m&S8-=+CAxr)`>=gh50`m-w* zHOj^Vsf?Y>+a7ii6vRJcqgQnr=iLiT4Sr71u;@#9QnG~D+AoEzykF&9~RfBH9RNP8)xUxXU_MZua`=6WwiKZ92G%l8vYoTpj@0BU~BVzZpF^YSA2#B_sZd*-G44dV%` z4hhv_>bWu)P*;NqJK1hT)RS1T*twJ97iJRQS_!`nCuC4f_eU0@E9zv^K(We4Zyk&8y>; zZs>uPv8zoL3$Xo{E>(FA0vn&CUgGW&`Tt#yaw&3*9YK|?2obp@vvzVWk``od`3Md> zS*GwKx@d6GfL(r17zvMCk@jsI=Kd;_Y{tU?c*`@%L0s7kO;utf76Yj;YpLvG4JaW{ zFyF51SEfy#|1lHnyZ~h~M(I(@Jg?z+bdkztirqo33 ze>uFXzPSGfy$@R*s7<{o*?_qGFT}g+gS|kJqJtQq^_@W+S>;L@yxj)RY_s=8d;#K2 zgmq5)`a}XDCyITrOnkDL`F)wXMpF|^qWHHx7MayUrxmjb6y2&0 zf5f^Mf_w|yVjqb42Z6d;@2+oY)OSl3<=5_Ph2ICcY$qfnsM{pA;;uPlEk0Kco+kBN zdtq>!dRB$Lq%NP9$h!mULxvn6giVvqEn^2QC&4*9YH2^$M&Hb@o&(X0l4!TMqNM9m z1P0@cpBxUFlNHN(G;7?@umak|=%<)_YAJgp&_rWMp#m|FD+t(&s*zp@WaS^<#W}oiTF<7k5Nzve^nTqOu3%3BZ zD7K5y<>D;j-c|*dXknz%4vE^TFQ;uFL4R0878oJ*ltO@OKyv!y`UABT4Wm-Hdcu@a zrD-r`!0cC8IV#PMu4~>R&PBL1;SSi?P;WFvF~&m6KF<);0uvBYYFfk zV2s6vs}{5u+he6x@b<}J@N^wY;OWxf{;1BxS@8na4SXVPeg1(ydxYSqtxt3ctc%@1 z!0HFNU9dT6%EG7SMS`;H`nPKDIb_Fx=~P24@#;kVodrV(Uc#~9t}5Myl9=OC6h^L! zTpLFYz!t>H|IV+@=Hh^8>KE%2VD}kWfJXod4bht1*f)KyH=O;VLaufyKMp+aoI#3q zYD|&?JaaEy--3*i(BYgGZhB%T!=x zB|QEH(y z9jw(-xoK1-j|o@dwyon$_%mYTY5Kc&JN^=3DV0RXK^_tSZ|gsCg)O)kFW02I)nwAn10pI#xpu(h7t!@kMgbvg0YcDG2H@C>BM6hGdkJG zE^vOupQb*CcWa-ME6~> zb-VJ5*(r$k@uWiHs~=7gdTpUxNC9f$jkJlDCfrlNx_4DKpR#+hMPT!7WMwr(GEDqyArs{O@IW{wGJ33Rr)M{LYuaA5W(+Dn^bN5AX~q z^i16(uLU0Uzo1?elnEZ$vyr^$PV0zKJ2(yA2X+tNM|77E!%2obrh>mRm)8WZ@ZvPR zG)4yP_e)Ts>3UAm1PShC8zG9|R^$=bdgVBy zowUiT^bw6+1*qGP$YkT!9fq43Z$F)R=jdL$IpyKUmK$ePGy zdenEzQ{=9>7L+o<`ILQdw)w$tlJ}88f1;St#N5ApHDca`@Mn0=Ut{7>4XwB>nfTIV|%FL)o++orS1&F9k| zu0M|}-~C@E!B3R^RjAzlW~Y|z3c8(mRFIsu{OP;Hb_{9$NPr__8G6Ms*A+jNYGpSo zXzT(SWRvTpc$82)?IB`wU67QCQ)Ml z8#h9AGXLRHdhL=P_y z;JchyWq_8SVWBV=ArN^7K~Si3N%$p0eyE)DsZXFoQT7m&s7O+R0`>R-N<@%R+0dU-fh_Xkl>lMb9)+klkDCJPwz^GCXQ{m!0$=&R0PP~Gs^4iLpe z9I81{u{t)Nh(d)vyMnO&XEsN|kP4<-`^_i=l%qmA`&763?c7swD~;Ql^W-Q_!PEBevc|{1||dtw-`Z5+6^mMA#WLQUA~wy1Bol?$c!=$c&_IP;(99R!J@JpFy3P;Piil_6gWc-Pw!`o zp`~1Di$w8Y>hb3NTKR+F0{{ZlAu zwZd4_GC&ZHfJ{e`9`z5qfU87Cgm_N|xo(U{d`VjQYzH@74k`aLkydbAcoJY5!ZB)K z@R$2En4k6fUiu^w7Go+xdu(%mUp<2dp4S+HFSmxSdd(K!ZhT(Dh9kHv(UJgJOq@X; z{bs1hb4R4Vw%OpqY#VBrT|fm037?jTOAKo{O6CRMFo^m3f*KylQL@A{u_Q=L8^dvF zba$0;>6YLt!y2pTq^9zXdc^k5c$ZBk@Dpda+|QtFhG@!zLH>QTGC~lZKKS7mxO!k@FcH>yTZ1i7v9xvg;=HDD7CLvu zm$%sjF%`Wm>bx)XuwbYfvJQpq?=755C`1QNu%qrdjx4?R$mLK)Y} zdHKPs5gxqahDTDayD&)A7wodVjewvZRyYsqxQ>05Z&h}Lhul~Mdyy%qBqfDO6Uc7Z zd2RS9q9Xk$65*HC_2HG=_gL8@PmsyEpmU6p46H7NuHO9ITO zWgV8TBn>{7O;^n6`A{E3nnh*eMbqy|VD!iSgjilUdZrJA2+nMi?r#aBVwu6jmG8i0DkngTq- z{>>ozIH$p8)GA`<_{rC6e+!A#lb?5B;{3&$2?Zo3%;F5{Q(ba^Iqw=Lrmd>!U#i{Q zXH&rJ15I2wgNB02eukL}NYxL{3hc4@Ks4{_bA{;fs;-f|$sRfc=p&x9Hqd8ixW&?#<3Wc!w=+-jl<`CUJ z-w3U+t<6J`uLXU&jCUfncwK9+r_|(LX{oM$&Q=ogugu$`eFTwmm{utjLaR|0k>hR^ zl~pVextT<$TuaR5BD!x_~PgUm?>-me?zLu6rmsne}`goh`fhxx;pn%xw|HxAWb%ceR>} zows)v9J8z=dGT=D!l)d>TyxtD=u~M&-^7xAFhnQp6xs)ryEtQyz0hve3TA(XkP&e2 zDvJ0K)iiAkkL7+YZl~_BC7mS}WzOK|ibmx)|=pLvS zj(m@^-fP)5Y>UomAD`2hZNZeyNZX=PH`ve4H9X+Ar=kNhH(vd(A4R3^{r90qV`wqD zRZg6&V5E!p5j-0E-*$gUS|6Y}E`3-D&g{4?(jwhWD%26fLTCxVsO9vXwa9%#br$XD zjW!897D+j0N9jznu28we>cWff;mW?G8{ASK{eTfE57jNaa7V3~7qXDv+U?A#sNwxB z$8TQfYi28=R1GH1S4kDuP%z8@9;@J-y{}zOQ7Xe=QrJ9DuMt-r6@+c$zTl{gJXoq| zIL#$_hNX4O}tJ&#!h5deqj z_p0A`JIo}_u1%8?Jy3fR9*>HSM?#ZYd<{~WE!qE2FS+lRe|~&|q%G?bQvsobiD@sp zd^FVPBHAz8tBPT_D5%2ng*>04s18!WLf=9u)=K&7=)TcxB$;c0;!?@T@suHFT+jbhi+o9me?m50RYhRVR+1BQ;5SmSrZVHLTXQY;) zXE}z@JFOa0zokYV)F-isw29>t8cH1J^n4fmyre9uozOeE!4d!yf$$pNGDtvJEDbng zFzPR*(mcwczuq)V7d7f1mD4x`_4!?efTzptBoZ92Be|r1g&ke}bY91!&5h`e!(GDf zYJLBOpvX>A*<$mt+!xrc&anN*0F|-Iz|kwPcYr-h=9Zf2#K^b)8B7?0%qkZ?9ZnHI zOrFvu(mVdhgsc&iQndWtCV$~`H7iyM=$Of_wY#x5={Z~}XYSLtz7{LQGfAG{Fwo3C4-#bK zh+d^|hd)?OOEY_yD|jc$NPs2K81~8M0ZQ1`-2!ZgV;B-qFjskW3AtbfX&OXp+ZemR1+ovTB1 z3F-OMxyN#zV}GtjDd4~8D0t5HPt7E%Cm%sTrgQ{oJxsyLTRVW0{Z!}y>Z%zZYonk#{S)&nt%vaGGLd}1QSq7qz)S$&^a?^Zq&CWP zj#877x7_cgtK*~SjJNqT`~PaV_IRfM?~kQS?zh};VJ=V^4$LIO7P6ick#S4gw#KQ{rPFWhUNWjK1z9I zV$HL|)%KPv4w{nu+XFbr+b~9WfHGoRD5JlYHR1jycJ5%)M2*T%t zfB3R_Dc;^uvwYR3&ia7+l|%ckSPny)I|8-gaq=_;5>oEOJg}sqB}eZ6?x1-u9TcI# zdvRRY&TI!wu}S}U%=4s(^94??6|Uf+D=dw@ zDiiyk^&(3kg(!=iaf7|7XSGC8Cr*jX_}N(UTFCFhSuaR$Y!)2e7d}Qm>$7T$zdZYT zLjdkbG3@xe`tm*Mi<*n^pzcm8Ip4B3b9P%C4$ikuA5a#ns{Z8k)X>aF0CwJI`tuE%sjeQ&oAR46IXlqc@1$_ zJty)$`NU?7_uQ-}EqR%7`m4oUGVX`+`!Tw42`}MBdSR(*@-`E9n{z=NPUoA~go|Z&U$B<0 zCbDu#px!IN>62MwPBlbNBTMR1EfJOt8UAn*FHkSM0c~8A1STzn=8)r4l4}Nj)eyhF z_E=_vGxS?^8Tn-ed^C2}sEu#tp_O7*;KC%}e2Av@I6b|JNZ^=NG7NcGZgxVtQ!K0UgqzlniMv?HL#K!=@;A;~Y2b$0~kRL?g+4G7AguihWhS#{s#xk1gFU@?8#%t{B^+9L$E)!>eFpj|P zyCVL5O1Eq~QX}F_gK)Uh;;@)R5cnV0>4F`c()m(*C7cKMo*D0EF z%&2pD5Ig1FOzaCl$(i#)T?7fy-cHB-_YtNC69KY!kCo|U@+>H04F7PBz^57jR>q_7 zHJzx2bAkXjX$6*07aWA0MSG&51C{7sbq+5;6CPaGhO4x4FNVB9@jwKEkrK(B?y<)g z?Z@eRH8C@@V{?sT=8>Za-Jmn$^qB`!!p%LNc#X|RA%~K5u#hvT0xh98r>lvc6|bp2 z_0e1W?*S`|m#m)IMIdwZn5VkiE-c(9`x8rnXfU#YRm8A6b;al` zu4AgHk)}3PV&2&ry&Jb=*YJf#*$V(RZgLB-{Le*KyaujVSR~Y<)gyf^OK-oY0nG2L z@&Te-s!&#-myV6o=~{Dp-4F+=&;lns<(C(cJDf=N7u~YOYs^`!8&2aOz29bD zwxhHs5>-r$f|JNxjyX?Ki|S&xF_}yiZ6*+&`x@}&zl>+VzEYYifU9;~fTHleR&7NX z2%_(QZHZ^x{6;GfWr-E}mGKM~GVT31^-jS!-EmSju5ygNB;MMC!q$C-;RPCY0EZEs ze*3$~9kD1lo>gJnFa|UpHxEdh@U$h6U4pAXHE96@o4lK^^?vaV)sQT~!~tyj$ktSb z63Mu8hKR{qOofXi9ZyKqU9yeB%@neNdZ9pOUxnybTqp)89Y5(3o*#2B2K@Wg%Zsy|1iD4&HPl1E3qh6wBpVsWE}{BagZFx$aHch*5? zj+)+2yQ}v{Z^3}HNju`&LXfiZ)f<4*3u4Y`RvEYsivt)Us)3kea3Si6&li{qv$-LQ zO7L7Yo{-zqehMhv)t|IY$2yZglPBav%U|m+Hc+t8&lyy)07=|KJo%B4ZO3e#nqLLI_8# zFYctO^{5X5sKf64)R*B?arp2IN|&_0~` zD;>!dbGw4Lnu-( zS*yTUr?qg{)qlU401e}I={Zjg1ii43=^(54Rp+Jz>R`#I1RFpR#7 z##VA(As@x8U@8Cvo!dxOxNBv>TVW#dra0$Ye~gOMq4Kae?o@p;nSd=^$HtY{{q+3 zZv5gj8a5?qMu4Vvtz%TmPaN^NZxj4N9_c3tojY^Ake@=g0w)2VMCq=1U#|D7ntoH+ zEuZvNNb!z0hrtU4WYiJF z_0k)*dl9DLaoC4E{%I^6l{PzeE1~-g*Xt-EEpyhq+J(GI4PW_$pL>Pou6kHxE8^WoV=!TH7h+<}(zt+Qi#RHH<3tt`z+nV~~@%bnu`VW%Xk zKvgXwF^n7PM>T{mU3`S_I_m**1Yo-!=-W{9xn6F@&~y%j;j|{Mepl5`yRR1I9bBRz z!bBxBfk`W(TqZRLS|$vu&qjG;oH4E9wUU5NToY6N<)cK6Sv?VTDxN|OHf zD!X#ynfQ!LF0HA4t-@8Ymyc_>PFLBBbqVf9zf^JK*k@%Vyd4q>3_PkIB^yaFeuAN; z=KT*VBo!pTMBRtZxf)4uwGWdd{+@p%@7im0C#@A+!Xtu6bFJ=u`o;Y&)OJ0xaah@+4~9o0B4pR%M~eCoG$CQIwk1#dutNYG)X#&l zScP0d7TESWEO`ZNfsCcAPE0?3(vVmB_tU& zhiy9Q%6-)VPu)L*k=pv^@_Ppsin5@PG8CGAs8IoVB>(MhY~Me^i!9Pn!|Ywizez=!4d1Y!yKi2i2Td!4TGSRJ zcJ2DL-=fc$ZGbTZp)iF#zQ>wEPYn1ds~}LbaorF4GcKE4fo3BQ;Bqtl9~jyN*|i3v z=MDO%SdeLWth6A7L;LF}2Z>DucL7rfEe&#_d*8`>L)g3Y*kw`-2gNY=E8wpa+$Rm1eM%H%mxh8!eE007cBKC5!h@~_$I!k9K_WS;QA=dr=gH~7v zYB7)tLUFoFt>@TmpRc{k^;X}bINX;pCl z;Xvzajs}|PW>F{fl=hoJeql%1Z&zC8E;s@`ug(S=($PQB>vG+U@hJ69qK-C)pGZbs z?8kVO732q!kh^b%%O%>90L4tAKZZJ@NXvBH1XA&0!f^;hO^>IxLh8CCX1qxrl_aO2 zIb(kLQMV?hM}T+Tjk@Xqq?^rk;8*|EC=XW#=fwd`|Nm#=<~_*?+-;auVp_SQutXiM zC9&ObXpi+9OHlGNGeExN^&21m^P{i81Jm&5H{-mL3>UP#KJG8TDf{r9F^AW8%_{;F z>bC4U+Zh_bjK!h`QkFkBJ@9ngaoTWFednHikgjxpf7o5^VI2(3M|N79`5!e<1gsAavSgji*2hE&jtr za;lbB6|u=A=iBpBb`90q>loy6O`1^HA|MigVM5xY)F^-ct4}m?zw;+@f;$AX_-i%o zL?!>Y9X5AV2T>|tKbYYAE_@Qzv|w}tu-DO}fz1W#8Tova3RzrvZ4QQmt&64Q0?E#w zmE!(oOvtbU-prc%pxBuN^kcr$ulm^eDh`UNh~GW?S^0e*0Dm(mHUek-XD{C_on@zG zCU6q%j`($V1g%_Z#e>mVt_;HJP{zHf`<35#-t8$(J=5N6U_lqZye7PQV(#AJi~CKs z%ES4NI$JG%M5&h`THUm{e-4Qi{dzT|Q4u?@?ny@nq?_rxe{Az<@KxvAYh`7DRh7Sn(C32vT(Hhp&LZk- zaiuX-#VLy;u=Md!8DQy2|EB@3E9u(5PHcrSQgZ`Zqa#!ko}3=Dn@n_eTvC>Pv64)% z@O~`Q#h_+J6AG1Y#Wv)&SzEueW~|wq>=b_%6B=ClT}+cQ5W2i(BQtk5lGxBYe8_a~ z>Imp*&Uj>Dz%E=DA?#@RXzK_9VcRBEu4~S!QZHS-4@<1-)01y12e`MSxk1?{P5OC_ z#B1Yb_fA<`mnLT=@OAsEZQss(1`6z&HmhH%uABv6tT9AKO)iwm)K(hQ51}W6MO_rm zr0#rD^IzyKD?`<;(il|rvHw`h@}vz4j{A2t>>=_C2tRk*8{G5}>;2yTx=f$MbeTy8 zH6$Tmw*zIJHd$6yo}WMZ6%d29(O53n`_`jCV7reDs$8P-Shv}X!QO9hVL=3~VToLj zmq{4pm{?0UnV|1QMW{;Z408zeMt3# /dev/null + $ export AUGUR="../../../bin/augur" + +Parse a FASTA whose defline contains metadata into separate sequence and metadata files. + + $ ${AUGUR} parse \ + > --sequences "data/zika.fasta.gz" \ + > --output-sequences "$TMP/out/sequences.fasta.gz" \ + > --output-metadata "$TMP/out/metadata.tsv.gz" \ + > --fields strain virus accession date region country division city db segment authors url title journal paper_url \ + > --prettify-fields region country division city + + $ diff -u <(gzip -c -d "results/sequences.fasta.gz") <(gzip -c -d "$TMP/out/sequences.fasta.gz") + +Index sequence composition to speed up filters. + + $ ${AUGUR} index \ + > --sequences "results/sequences.fasta.gz" \ + > --output "$TMP/out/sequence_index.tsv.gz" \ + > --verbose + Analysed 12 sequences with an average length of 10598 nucleotides. + + $ diff -u <(gzip -c -d "results/sequence_index.tsv.gz") <(gzip -c -d "$TMP/out/sequence_index.tsv.gz") + +Filter sequences by a minimum date and an exclusion list and only keep one sequence per country, year, and month. + + $ ${AUGUR} filter \ + > --sequences "results/sequences.fasta.gz" \ + > --sequence-index "results/sequence_index.tsv.gz" \ + > --metadata "results/metadata.tsv.gz" \ + > --exclude "config/dropped_strains.txt" \ + > --output "$TMP/out/filtered.fasta.gz" \ + > --group-by country year month \ + > --sequences-per-group 1 \ + > --subsample-seed 314159 \ + > --no-probabilistic-sampling \ + > --min-date 2012 > /dev/null + $ gzip -c -d "$TMP/out/filtered.fasta.gz" | grep "^>" | wc -l + \s*10 (re) + +Align filtered sequences to a specific reference sequence and fill any gaps. + + $ ${AUGUR} align \ + > --sequences "results/filtered.fasta" \ + > --reference-sequence "config/zika_outgroup.gb" \ + > --output "$TMP/out/aligned.fasta" \ + > --fill-gaps > /dev/null + + $ diff -u "results/aligned.fasta" "$TMP/out/aligned.fasta" + +Build a tree from the multiple sequence alignment. + + $ ${AUGUR} tree \ + > --alignment "results/aligned.fasta" \ + > --output "$TMP/out/tree_raw.nwk" \ + > --method iqtree \ + > --tree-builder-args "-seed 314159" > /dev/null + + $ python3 "$TESTDIR/../../scripts/diff_trees.py" "results/tree_raw.nwk" "$TMP/out/tree_raw.nwk" --significant-digits 5 + {} + +Confirm that tree log file exists. + + $ test -e "results/aligned-delim.iqtree.log" + $ echo $? + 0 + +Build a time tree from the existing tree topology, the multiple sequence alignment, and the strain metadata. + + $ ${AUGUR} refine \ + > --tree "results/tree_raw.nwk" \ + > --alignment "results/aligned.fasta" \ + > --metadata "results/metadata.tsv.gz" \ + > --output-tree "$TMP/out/tree.nwk" \ + > --output-node-data "$TMP/out/branch_lengths.json" \ + > --timetree \ + > --coalescent opt \ + > --date-confidence \ + > --date-inference marginal \ + > --clock-filter-iqd 4 \ + > --seed 314159 > /dev/null + +Confirm that TreeTime trees match expected topology and branch lengths. + + $ python3 "$TESTDIR/../../scripts/diff_trees.py" "results/tree.nwk" "$TMP/out/tree.nwk" --significant-digits 2 + {} + +Branch lengths and other annotations like dates are too stochastic across runs to consistently compare with reasonable precision. + +#$ python3 "$TESTDIR/../../scripts/diff_jsons.py" "results/branch_lengths.json" "$TMP/out/branch_lengths.json" --significant-digits 0 +#{} + +Calculate tip frequencies from the tree. + + $ ${AUGUR} frequencies \ + > --method kde \ + > --tree "results/tree.nwk" \ + > --metadata "results/metadata.tsv.gz" \ + > --pivot-interval 3 \ + > --output "$TMP/out/zika_tip-frequencies.json" > /dev/null + + $ diff -u --ignore-matching-lines version "auspice/zika_tip-frequencies.json" "$TMP/out/zika_tip-frequencies.json" + +Infer ancestral sequences from the tree. + + $ ${AUGUR} ancestral \ + > --tree "results/tree.nwk" \ + > --alignment "results/aligned.fasta" \ + > --infer-ambiguous \ + > --output-node-data "$TMP/out/nt_muts.json" \ + > --inference joint > /dev/null + + $ diff -u --ignore-matching-lines version "results/nt_muts.json" "$TMP/out/nt_muts.json" + +Infer ancestral traits from the tree. + + $ ${AUGUR} traits \ + > --tree "results/tree.nwk" \ + > --weights "config/trait_weights.csv" \ + > --metadata "results/metadata.tsv.gz" \ + > --output-node-data "$TMP/out/traits.json" \ + > --columns country region \ + > --sampling-bias-correction 3 \ + > --confidence > /dev/null + + $ python3 "$TESTDIR/../../scripts/diff_jsons.py" "results/traits.json" "$TMP/out/traits.json" --significant-digits 5 + {} + +Implicit mugration model outputs are not written to the same directory as the traits output, so we cannot test for matching mugration models here. +See augur issue 541 (https://github.com/nextstrain/augur/issues/541) for more details. + +#$ diff -u "results/treecountry.mugration_model.txt" "$TMP/out/treecountry.mugration_model.txt" +#$ diff -u "results/treeregion.mugration_model.txt" "$TMP/out/treeregion.mugration_model.txt" + +Translate inferred ancestral and observed nucleotide sequences to amino acid mutations. + + $ ${AUGUR} translate \ + > --tree "results/tree.nwk" \ + > --ancestral-sequences "$TMP/out/nt_muts.json" \ + > --reference-sequence "config/zika_outgroup.gb" \ + > --output-node-data "$TMP/out/aa_muts.json" > /dev/null + + $ diff -u --ignore-matching-lines version "results/aa_muts.json" "$TMP/out/aa_muts.json" + +Export JSON files as v1 auspice outputs. + + $ ${AUGUR} export v1 \ + > --tree "results/tree.nwk" \ + > --metadata "results/metadata.tsv.gz" \ + > --node-data "$TMP/out/branch_lengths.json" \ + > "$TMP/out/traits.json" \ + > "$TMP/out/nt_muts.json" \ + > "$TMP/out/aa_muts.json" \ + > --colors "config/colors.tsv" \ + > --auspice-config "config/auspice_config_v1.json" \ + > --output-tree "$TMP/out/v1_zika_tree.json" \ + > --output-meta "$TMP/out/v1_zika_meta.json" \ + > --output-sequence "$TMP/out/v1_zika_seq.json" > /dev/null + + $ ${AUGUR} validate export-v1 "$TMP/out/v1_zika_meta.json" "$TMP/out/v1_zika_tree.json" > /dev/null + +Compare auspice metadata files, but ignore the "updated" field since this changes with the date the export command is run. + + $ diff -u --ignore-matching-lines updated "auspice/v1_zika_meta.json" "$TMP/out/v1_zika_meta.json" + +Export JSON files as v2 auspice outputs. + + $ ${AUGUR} export v2 \ + > --tree "results/tree.nwk" \ + > --metadata "results/metadata.tsv.gz" \ + > --node-data "$TMP/out/branch_lengths.json" \ + > "$TMP/out/traits.json" \ + > "$TMP/out/nt_muts.json" \ + > "$TMP/out/aa_muts.json" \ + > --colors "config/colors.tsv" \ + > --auspice-config "config/auspice_config_v2.json" \ + > --output "$TMP/out/v2_zika.json" \ + > --title 'Real-time tracking of Zika virus evolution -- v2 JSON' \ + > --panels tree map entropy frequencies > /dev/null + + $ ${AUGUR} validate export-v2 "$TMP/out/v2_zika.json" > /dev/null + +Switch back to the original directory where testing started. + + $ popd > /dev/null From 6a719285982c0d0ce0d1ad825ba59a16b3695b7c Mon Sep 17 00:00:00 2001 From: John Huddleston Date: Tue, 16 Mar 2021 14:17:38 -0700 Subject: [PATCH 7/8] Support compressed inputs in augur align Adds support for compressed inputs (reference files and alignment sequences) in augur align by refactoring existing code to use Augur's `io` module. This is a work in progress and still requires focused work to add support for compressed output files. --- augur/align.py | 129 ++++++++++--------- tests/builds/zika/config/zika_outgroup.gb.gz | Bin 0 -> 8883 bytes tests/builds/zika_compressed.t | 4 +- 3 files changed, 73 insertions(+), 60 deletions(-) create mode 100644 tests/builds/zika/config/zika_outgroup.gb.gz diff --git a/augur/align.py b/augur/align.py index 30cd8ea2f..2106b1f0a 100644 --- a/augur/align.py +++ b/augur/align.py @@ -1,14 +1,19 @@ """ Align multiple sequences from FASTA. """ - +import hashlib +from itertools import chain import os +from pathlib import Path from shutil import copyfile import numpy as np from Bio import AlignIO, SeqIO, Seq, Align from .utils import run_shell_command, nthreads_value, shquote from collections import defaultdict +from .io import open_file, read_sequences as io_read_sequences, write_sequences + + class AlignmentError(Exception): # TODO: this exception should potentially be renamed and made augur-wide # thus allowing any module to raise it and have the message printed & augur @@ -58,11 +63,12 @@ def prepare(sequences, existing_aln_fname, output, ref_name, ref_seq_fname): seqs = read_sequences(*sequences) seqs_to_align_fname = output + ".to_align.fasta" + existing_aln = None + existing_aln_sequence_names = set() + if existing_aln_fname: existing_aln = read_alignment(existing_aln_fname) seqs = prune_seqs_matching_alignment(seqs, existing_aln) - else: - existing_aln = None if ref_seq_fname: ref_seq = read_reference(ref_seq_fname) @@ -72,18 +78,22 @@ def prepare(sequences, existing_aln_fname, output, ref_name, ref_seq_fname): raise AlignmentError("ERROR: Provided existing alignment ({}bp) is not the same length as the reference sequence ({}bp)".format(existing_aln.get_alignment_length(), len(ref_seq))) existing_aln_fname = existing_aln_fname + ".ref.fasta" existing_aln.append(ref_seq) - write_seqs(existing_aln, existing_aln_fname) + existing_aln_sequence_names = write_seqs(existing_aln, existing_aln_fname) else: # reference sequence needs to be the first one for auto direction # adjustment (auto reverse-complement) - seqs.insert(0, ref_seq) - elif ref_name: - ensure_reference_strain_present(ref_name, existing_aln, seqs) + seqs = chain((ref_seq,), seqs) + + alignment_sequence_names = write_seqs(seqs, seqs_to_align_fname, ref_name) - write_seqs(seqs, seqs_to_align_fname) + # Check for duplicates in the intersection of existing and new alignment + # sequences. + duplicate_sequence_names = existing_aln_sequence_names & alignment_sequence_names + if len(duplicate_sequence_names) > 0: + raise AlignmentError( + f"Duplicate strains detected: {', '.join(duplicate_sequence_names)}" + ) - # 90% sure this is only ever going to catch ref_seq was a dupe - check_duplicates(existing_aln, seqs) return existing_aln_fname, seqs_to_align_fname, ref_name def run(args): @@ -178,19 +188,34 @@ def postprocess(output_file, ref_name, keep_reference, fill_gaps): def read_sequences(*fnames): """return list of sequences from all fnames""" - seqs = {} + sequence_hash_by_name = {} + try: - for fname in fnames: - for record in SeqIO.parse(fname, 'fasta'): - if record.name in seqs and record.seq != seqs[record.name].seq: + # Stream sequences from all input files into a single output file, + # skipping duplicate records (same strain and sequence) and noting + # mismatched sequences for the same strain name. + for record in io_read_sequences(*fnames): + # Hash each sequence and check whether another sequence with the + # same name already exists and if the hash is different. + sequence_hash = hashlib.sha256(str(record.seq).encode("utf-8")).hexdigest() + if record.name in sequence_hash_by_name: + # If the hashes differ (multiple entries with the same strain + # name but different sequences), we keep the first sequence and + # add the strain to a list of duplicates to report at the end. + if sequence_hash_by_name.get(record.name) != sequence_hash: raise AlignmentError("Detected duplicate input strains \"%s\" but the sequences are different." % record.name) - # if the same sequence then we can proceed (and we only take one) - seqs[record.name] = record + + # If the current strain has been seen before, don't use its + # sequence again. + continue + + sequence_hash_by_name[record.name] = sequence_hash + yield record + except FileNotFoundError: raise AlignmentError("\nCannot read sequences -- make sure the file %s exists and contains sequences in fasta format" % fname) except ValueError as error: raise AlignmentError("\nERROR: Problem reading in {}: {}".format(fname, str(error))) - return list(seqs.values()) def check_arguments(args): # Simple error checking related to a reference name/sequence @@ -201,31 +226,29 @@ def check_arguments(args): def read_alignment(fname): try: - return AlignIO.read(fname, 'fasta') + with open_file(fname) as handle: + alignment = AlignIO.read(handle, "fasta") + + return alignment except Exception as error: raise AlignmentError("\nERROR: Problem reading in {}: {}".format(fname, str(error))) -def ensure_reference_strain_present(ref_name, existing_alignment, seqs): - if existing_alignment: - if ref_name not in {x.name for x in existing_alignment}: - raise AlignmentError("ERROR: Specified reference name %s (via --reference-name) is not in the supplied alignment."%ref_name) - else: - if ref_name not in {x.name for x in seqs}: - raise AlignmentError("ERROR: Specified reference name %s (via --reference-name) is not in the sequence sample."%ref_name) - - - # align - # if args.method=='mafft': - # shoutput = shquote(output) - # shname = shquote(seq_fname) - # cmd = "mafft --reorder --anysymbol --thread %d %s 1> %s 2> %s.log"%(args.nthreads, shname, shoutput, shoutput) - def read_reference(ref_fname): if not os.path.isfile(ref_fname): raise AlignmentError("ERROR: Cannot read reference sequence." "\n\tmake sure the file \"%s\" exists"%ref_fname) + + genbank_suffixes = {".gb", ".genbank"} + ref_fname_path = Path(ref_fname) + + # Check for GenBank suffixes, while allowing for compression suffixes. + if len(set(ref_fname_path.suffixes) & genbank_suffixes) > 0: + format = "genbank" + else: + format = "fasta" + try: - ref_seq = SeqIO.read(ref_fname, 'genbank' if ref_fname.split('.')[-1] in ['gb', 'genbank'] else 'fasta') + ref_seq = next(io_read_sequences(ref_fname, format=format)) except: raise AlignmentError("ERROR: Cannot read reference sequence." "\n\tmake sure the file %s contains one sequence in genbank or fasta format"%ref_fname) @@ -388,43 +411,33 @@ def make_gaps_ambiguous(aln): seq.seq = Seq.Seq(_seq) -def check_duplicates(*values): - names = set() - def add(name): - if name in names: - raise AlignmentError("Duplicate strains of \"{}\" detected".format(name)) - names.add(name) - for sample in values: - if not sample: - # allows false-like values (e.g. always provide existing_alignment, allowing - # the default which is `False`) - continue - elif isinstance(sample, (list, Align.MultipleSeqAlignment)): - for s in sample: - add(s.name) - elif isinstance(sample, str): - add(sample) - else: - raise TypeError() - -def write_seqs(seqs, fname): +def write_seqs(seqs, fname, ref_name=None): """A wrapper around SeqIO.write with error handling""" + sequences_written = set() + try: - SeqIO.write(seqs, fname, 'fasta') + with open_file(fname, "wt") as handle: + for sequence in seqs: + sequences_written.add(sequence.id) + write_sequences(sequence, handle) + except FileNotFoundError: raise AlignmentError('ERROR: Couldn\'t write "{}" -- perhaps the directory doesn\'t exist?'.format(fname)) + if ref_name is not None and ref_name not in sequences_written: + raise AlignmentError(f"ERROR: Specified reference name {ref_name} (via --reference-name) is not in the sequence sample.") + + return sequences_written + def prune_seqs_matching_alignment(seqs, aln): """ Return a set of seqs excluding those already in the alignment & print a warning message for each sequence which is exluded. """ - ret = [] aln_names = {s.name for s in aln} for seq in seqs: if seq.name in aln_names: print("Excluding {} as it is already present in the alignment".format(seq.name)) else: - ret.append(seq) - return ret + yield seq diff --git a/tests/builds/zika/config/zika_outgroup.gb.gz b/tests/builds/zika/config/zika_outgroup.gb.gz new file mode 100644 index 0000000000000000000000000000000000000000..c98f39b02d018711a54a01b8cdfb4327fe3bad3b GIT binary patch literal 8883 zcmV;kB23*MiwFp^5wlD+#^Al!$YiLlio zMN!gB_<$xr5Q7hc02*BErK7scK`U04yi&PidVl?9y_ZEv6UKN>>u|Is63D8`kC!hq ztILm@pSy^En(gA^$K~bXd~xy1;yQZ$XK{YDzKMSQ;(t!+`1O}VTSn)X@Bj95`F?r6 zSiecqZC>Yn{;`gt|GNEe6a9Ys`1L7za*yWr{^dIF7uWA1`|$b8-O(MR>3IL}`FQpw z-fYsY!`o5upBL+!%hlDJA#L@e=MT>oQIzF>S)8xVSL?-Uaq*@|$HT`q!I941bRU0i zH!1h{ypK0g^s&w2I`67~`uN3({3pk!zeZ0_t(^Av*S~&wc)E4BzaQWG6WxCbU%!j) zAE2BkXQty{UkTjancr{ax!X_X_}9~KU!Tt2wCOf&(;EAf^Ze;oBckLd_duopS>(h& z_q&g+KUM$uaC|y`KHeT5-~H|E{qF2tRDFGVdU%(}Pe0v#m9OsPi^y;5C-cYM?bEyN zEItz%ecqSUMFu@bPsjiHdc3#C=;7~?iLAUKn-en}9v<&LMZ2T9bH7C&A18Bv`@hEB zKHNu-$G;yR{VT7Ire^*7!{bY1`(N+B+PmYejf}nh6uIM*yS<;})xU{Z>bR6wf4=|n z^;g;BZ^uus^sxMY(L-|kc(g9+zW)08_UTCu_-b$X*tz4U=uhX@;@dTR`_EBzYabte zy?wZQnErSdHRkKx`}DWRyIcF)`}|&x=)St+yH^T&^-Sj;&!Wr4`^%emQEBeQ=iiC0 zpPr8Q(fVS!ynH9-dHihd|9G?cSXF7=L&Oh1#PZ1JU+?~SpBQJ}cYWOUKm71&neaqZ z9o=sapY+qe{3WXLR$TZjTAiJrE&m$*bii-tXA(abuOw_9em_2n>%NX|k+4{vzk0Bv z>xem{{%C)@hcf(w`Q6>u&$svH)zkmH`Ls@C>)SN$e{R$6)mr6)g{%Fz|7z}T|1Q?m zxO{r}`eCcb$;qFdsE0TWkKK%0js-p;=B_1-=H@!{>( z`}l8B^Kkda{qgD6%)RjG*I)i2A@Sj@GyizF|I2!Hu{@u9_P2*8_u=jC;qycEWWL-U z@1N$LynTAOgARV-nLoUJIzE2=JootLhr3_gA7A7#z^6o;`0w1q4~XWc&mZ1CN9x?O z4C@bXU&FONefb|h_w9?P#T+;4BAev3NpE1Pi}i9=?VtQvoq2R07T?qKOSE6j?flE* z!>6xS)cr*)D8IPl?ft)a)_?uW?WYfKZ&Z-Y>3;TYqnV?zm!d!m1UXjvL3wdFPg5*%Diq5SvO=wk;YY; z=S8tCilmI=s%-PTj@u$Figrd|7AL*Dn-6~FzK^RoPKUm2vo0RTYAlnsPU0+$^KRSp zeVJ8tu^IDyUM1y>z+JUT;=by}bkkI2l4e8NB*~C$4x75~yZ$f?W7cnWyP^=eZQHeN zJoIJL?Y=Fr>)WE-_ieZD4`tf5>DU*$GH-{pKIGfBZn83#!^UwQA4GXw*LVA3$j5T1 zzAZ2>%RFx4vCG>{o()~yCfgy+DzS@fE*XY0-DTNk=!YuaCq>f_NuHKN-}l4J4(m#k z)^_7&Oye%^k}B?!uH7{KUQQ@FEcRW}@5fblq+%3)WehwU!Oo3c6V;(52p`et{Kcha~V(ruB%ySQlg{U8dI z-KAMP?0T_G7ME$9R!J5YSzC79oZFPerY(yqDN6ZA%rbW6rjO%ozwL{>iPOH^WxW_B z9rk0K^-0!jilK@3Gggj=q}umo-wd*9-p6$_+szoI>+`ILeG(dk3>Iw_%Q=%iJ|tToeR9lN)LLZuZl7;6*?7^3fyJ#3+ZloT zyv(z{EUSZw{w`_VFQ2vV4=}T{m>C5QV%tw4FG9TGlgG&dW5dvteI)qsk!#zvP{) z*$Y$+%}}Rhv&#-ed=UQ}w?iApMPJw5b{LWwD<^$f2rfv3wedlo6wgU^;*i63Cops< zi=j;uv3$Bonpg}Y=Pxqxuwf1yk}?|)dD^9g1j0c)r;P<1(yHh;>7gmQU0)u=SMoHK zNAp}h5sz%jZZ{*a?%OmQ+kRganYe$Kre&J9+f+84^~JCo(zF)~h!VQ8?F2BptQ|!9 zVYB(Rz&73VRU<2IB|ejEa~LxL>9!G6ZqlwU1vjdpkF%~%c2%<5iVqFtAQ)B6pp8UU zDR^G>5*B$bXy+Yqh^yR3c2~(U zCHaa64`tO$oKHlT){K zE?C)Q`L^iANt0Z_TTVR+A?auFu*oH}ZM^$uHs6 z>)RyW3b=}Yh@3u^oYaUG4_%jSlfxj9EH2d#hwadGLtAHcIwLTSH)UVQu|`CF`d%vD`|CN*IsjK9T5(yK-!sG)@XJMlPviE7`hF_kQC; zT2?}66S0jT!?>UEL)lK2_hX;5?P0S`vR*PqpI2$pNl>K2p>KDRjKv9xrae@N(Bn-f zTa3$MhB<7D)W?`CEjT72A`X&yU*1)H+HUvaMoBG>URFYX`mAf4vG8nhFJ3m|Hf1Us zlBkov{UCUpZ@2#ST5@|I+n zsUONinswsGqwojeN4?O*T9TFkkLQ|QnUBH}rKHWuL8=~c^BF4(^p@gjLSNcaY~j<} zcI-x9p9xVMHlkyxPQ>p7iDSv3qR-BkMa7H`TcOUW;E15im`MR5%tXL3%MRTrcIorB z81@4BnUpVCChnQ0QXUQaZp;odR75~fygrkXWV`cqS+f!PRBi;F(@y?0f;+OszLa1Q z4v{6|Iz=lex}BjSc`=lmW)vM(l^i~8OTh&X1VTc@gJi_G=p-y9FAB#I zFZi}Vaobjiy>QHCD8@8RhFWr!z+)^)RnR4F>PkpTR=1K`^R`QO5`u^AR*IEx3*0qA zy$7hUi7?XaD}!?YL@f%QhbE|q=l zhe|Znb%*j$NURk*(U$;BU!=YPEhMO`C4{m#7s59uFg+BKcZ8DGNz-l*Qv4)E+NQgm z0ihD+#Aoyj<$PtrQKCsoS}nau_}RkK3)dpNFn(Sqmf|s;retMQkGkUBE#Y zcQ#64npRya;U{Vz<2foKWbz<~&bxM-lxdy%$Poumi%dwjXLNa9jQzNm^A6$>0v_^} zq^oL1ppeXVpX|l=C1!_yltM@HsW4Yz5P4e*;HE;q(n`386yQ?Oi_-i0Q0}`KE6esg zQ5W7StSZ^aE}A?^+N~6jrRbqfM0-P9NXaa^Zn|EcuMe_DGw0aC=C^fG?tHAaoq$cV zX?Oc1mGY`Ze?^$ZLCXEKmwYRpJxbMh5c)H2XQ)Wq3aAQd)^Wa(a;X_4t2cet_PvPN z2~?&BVIOHC0X!V)c904rt%bj*$&4Qg59$jkMDk`Vy=#c7Hd2M>Vv@#7jinqb>VBxJ zp=~8%k~A&GDjB5$oT&nY0112v6!wk4`rG+d+Uj!sy|$M%U!HD1Mc?UsO~?D=hqs$} zX06rb)!EtA#pQQy9Lj$hpEr{lj~sA)gWtaNp;K0CWvpMP(q&&SWdKAQXEzjv^z znN#9gw76Vd&GjQs>mBcZKi>~gc@YtR zBhIlr|E_n`-Ey`+99CJ*dd2em%KPBWzq8Cw-(BXXS?9ZvW!9_J)%TZ~?dOM8E@l=H z@z=7*^}l16)pu34np2r9aO0P``R-Yk#lK!ezW?UebL-y7@vhdF-wiC;{C7>YnlqIw zzy|;2+$`qBI+x#DCOmNQ&+~=zi}iUJ`FxbxCx1>xi)b>&Su+{yqRGlXW^&HTr}B@J z@1{sTlz*)AKN@^5FGTpsKjVU|^&&FSQASl$Fo@TVAC9!g^W$kNhs5 zS}V`XUzN33E+WSdMPMZRGqR}cA;Qa(|3bvduH+g2z=R*ItSjrtANgqtvaV%c#!Hty z_+=-^6e^jp~=QTqWO%6(cB@8x>)7T&jy7wPJpN?B` zkmW^rXqfg z#XoAj4Mao)hk%~3E@DT-!e)LEm32eE2R3+h!0&yOL`J0kzLJUOw!r-%kP zv)8c~7i4+AcR*9b1-!~Z;YKmk`cDl!bfV3@)CpL~ldG(S_j`DxKRpiyz$R$PHZ7gq z;~E^n(iva)SCF;zy7%tjo!M{)L6%jN1!%!-;D9tQ@Z1ugf-Ly`M5OXNpq6QJD8~z! zU}Xn|#OxCMesQrx3?b%;h5l)eE*b*fZE-@lu2&L_A~C`w;a>={u02M>%1#jh zfddCsyrqBPX!wzajIcp(%V5XVY7r3)fX03&K+^#IUOf@?%`9NKTaN>VfMZYlx;$S1 zQjjxwFmGur>*(YT3b&Iw6(Y=I9kLBDD9Bm^3w=at=mN+H{gx3OIfguxV3`B_xG*vbpf7( z>}27q>VU8>jDsv6?0CIaS$^YS4h9+02ad;jODxpgB5%{Jo%a-OGVJTd>mK^@8rK+u zC&5pF2{^eywhlpv<5C48ILHF#csqKHM10InA$0MqKaE9bSt=A7SJ*SB{J1>#esA%K zY+3n)YKHa&pYXy+NG7t6$pmM%LHEmr=pN1mcj68lH3Qf;ZuE>(1kBs?1r+CvycUxf*$>A0%}D_Oh6BnwS?cp!JQ>HwHQ*0pjIcsnQ$S)ivDgu{>@oqlRbBQ&`@FwS zZPrP2MbdIo*C}v0gJ>YprsqZhtwSDKUVGh7nm|F-)R&PIy;C4dIL+at!79rtEerd) zVXkE*2A}Z%8b<_Wl|pU;J%ijh6ClHpooWGm?s*Q$tk;YoDth|vLJS#DedlOALtkZX zARTZ))|&frNH7s|AJQ_!!~Bs6EP^SCAdfN&bS%j7b*&}!P}rnXfgJ&i01gxQiZzUQ zuqwrKPeXz%_&pS$XLM8_=($l;bL5^Ve7)L~o*Qtz;9nQMuB9YBqt}MzW*n8n;*6X( zBCSN-fv-IWRXbjwu60gPjdXM(Apv!=NZ|qY$YQn3VgYPUx#PmuwZt0@H_ZnO=t(0l zD3oRZ8i6k`ON2A7h&e`U3CKa{fkGnNnTSafzzpY`LhvI$p3dm&TAs_$`j+)9QY8q3 zGg=s%zX-pM2tS#hgZzjub$x)zko-<{2I9Asfg|Lr&rz(wvP=jX zSr$D7QZXB9m`RzG({eb-Vr^vuM=&IzULEjz{|!k3&;*4E><);1Rc~Khv7bYe(eJfG z!W7|yE}GaYVLb#5iDcG%e%;^`SFEiJ&gLIwF7&YJm|mUFX+$#cdjt?#PIjUq?s{3aiec|g`CPh$g?fby`sC8WErZ8c%kV6q1=}S}S zGhO()7A)N|OE{*UXL^o$NSi>;vbI8KkQ{@!Q`YMInPY02A%;)Pk9r!*+S^X28imae zWWtbZSLdv)luAeB@sk!V{0lyzd{2GVhO}#6aLm<$9CLC3cUdqaG}#|OwZiuZ%s4D@ zVlprC_G(GLw@kv+fn~=M%`+4(%si*2)`XTK3KO;}f~@s|7(Su029$a`p&Q85 zU}2_-2v3&%1BVF#nI)q*6oo`-I5S}Al+#wIYtfq^ z-g6%WakoGkvS!>J{NJ$)K1+4D15>!+pU1Z|N9x=qQq+sEU zOY|`{)oDu|??py8IFZG&(nCD5*-p;no@W7<%WGn^4IL7ic>+#2I=NSY5 zzIm4A>srJ1t3~9ohevA5D#WsWG^}f_)vi5OfXTxdeO=3%(K;>I*~pB@opBlzlv+@s zQwH>4y76LzEcDWBm>^hUnNNROKVpQ1b~FZCYxTXlEj& zYgdB}G@8i*A$8z&l7;@g_8Bbc6_0=xv?1n%VnKmtUs7zCyD&(Rcv3Pt_1~Q4P&*hftLBi_JfX5;B|;FXpamJxirXPFU=^^GTLJX zCpTIgMOt?o4QTLzQ!RSxPh4Ju=djBRGX>*BP(0R%I4lA5Poez;C9tP-l68%KG-?9x z?BNK6V9-7)9Bmws7aW-61x7gey{~IcXnvwI$l9FcA=3m|2zUl(Z%masyE)<4+Sj$r z+nB*REm3rw0o;UG>VcrdFj5NW4HaaqSwFJFq3w6Jn=uE_Zm{+)br48SZZY5Sc~@nj zu4PHUF`oKCh#{|~NTKpH%LIZ%bVa|5AnTgAimlTXiTIIr=QSSLrveRjI%@)%1rDez zU)Lg^81}4~gaCC~b%yWL!tOJv2PzaO;o4Yz5(Wd4%0zhXhVkS z569E&1Yo|63{Fm@6fwwJGe5FZPHuv38Y>9xbUE!VaMEK~kQ?n;2A{Y(CKXzgy8 z_>pp1VQAmal8~U<2p1Wi`NWm4YaJ)EhCMz@9uo2X2m0hF$IWCz44wM-?j;AfT0r-P z_Y{y1z*ATRF?q40`ua=yZGxsy`BmMycm~a!AIZmL{SXNwU9pK18 z)*5Hj=?uA^)ix5dGH(Fc&_JW3siCKrb=uX%k{kfA#8y8x<`@EsF4*400p_HoE$108 z^7a+$S`MXPCT(*g@iT*S7)s4VnuIjDa2^r#wL%}$YJsT}Jvf!snU1E)aNk5D+G+vt z@{E_-zqjG4h10r%lSU9}h5-5tgdg-vBHmGWuKBJouVu#(ok7c~5HFb+!>a);?Trnk0-4 zX>mrQ*SDVY(3P)i)hD1CCPIaNL)Hy5z&BFc(ddTj1)dXy-uE^gCZ8} zM$3iBhF%49>qsMmtkAzV+D3Art!uOqOJ6m5iNw*@ozcMq3`|+qnAaM9j|r8|ey!e& z(-xiOYz=`y?(FoA$g>@<7sMP4JV>uJB(*6)Mq|n4xi1#sFatx&1zGGVgbBM*J_<_2 zJ9I40phHCubef2DoHeT~o!6@UqTrMkBV}Gj<7u>~z)^w*#-w$8kcB>`B?9YIoc$aZ z+R;pkI_#Wy@xz2GkoLL%bgOn7Yejt9qy^vAZv|U z)P}oeyuYO(f>xRh_j9zR$zjE$(+2ZA@3_9^oxtE!XlQ`i1zcsshqeS-PDqfb>9Fd_ zS2?d`^=?orgaScJ1*e@XM%2l8dNb+ey^d?twY;RjE)egLvVPR=b~Nb)4ND|qDvBo6 zUFfggU|u`vWy#>HFWcNYZ7@6}vnB{S(bhiSgSkOnOSqqG=)vG3jZ=(wYyqR7aOi-9 zrUUDmQ;mGH;9XefSQs0&IUR47oSK(RudGm~DfGd4i{aESyjgH|GO?GbSm*$zyp{NT z;}fI`+=YE|6Z&pJ)>3=OMiVUOKSXvdeQ;;XG9O2rCEyHtbMD0NZ!XSHIDxvB*OkKT zmz665nin?0kPLMiiIxeq)y)O_IZj96tU!2+U?Sw`?FBRsd8N;3_h||RwaQxYJ|>F- z)_m-yD*Mq8AeGZAkcPK3LGH~n7T#c9>zvVj5t>j?DMdv!!e)+ASN2utwxnn(>vDCf zMMJBfR~bU5^!ZXWoZaZOl-J$ZjRRSPvQPM^7Q02VXq(kZ*7gS;wNhp*`uoh)sI&Tl+bVgceP6 zy&5nD|5}{;x|aP)s>6VQf`vF z;YKG5CW>V-bn45AEZ&Vebw71FIq9{36LJTOVX#HRHsNWebJG0N$9 z#-O!3lcLe|#y*oa1%j+={{I~lTN#c)Ov)4PUxj|Oo5HO_E$%}@M`dwdt5-7C77C}5;cC+fzeE=AMybuQ zMUI+zJyjW=Wyrg5qu4T+I&I>rEY!71XQ9u~e+X9xIPi7KA|mz+tdbYTY0#IiYe{{8 zmFCnBjI;oO+d7?TI}QMJIzuE0JZEtp>RO!-;db}wY6?;3&5u96`9IJJrUDIA006PG Bh>`#R literal 0 HcmV?d00001 diff --git a/tests/builds/zika_compressed.t b/tests/builds/zika_compressed.t index 995fb6ef8..5abd118cf 100644 --- a/tests/builds/zika_compressed.t +++ b/tests/builds/zika_compressed.t @@ -49,8 +49,8 @@ Filter sequences by a minimum date and an exclusion list and only keep one seque Align filtered sequences to a specific reference sequence and fill any gaps. $ ${AUGUR} align \ - > --sequences "results/filtered.fasta" \ - > --reference-sequence "config/zika_outgroup.gb" \ + > --sequences "results/filtered.fasta.gz" \ + > --reference-sequence "config/zika_outgroup.gb.gz" \ > --output "$TMP/out/aligned.fasta" \ > --fill-gaps > /dev/null From f53f921ae2d0991dd51ae331989f199037eaa761 Mon Sep 17 00:00:00 2001 From: John Huddleston Date: Wed, 17 Mar 2021 09:29:11 -0700 Subject: [PATCH 8/8] Support multiple inputs to filter Work in progress prototyping how we could add support multiple metadata, sequence, and sequence index inputs to augur filter to simplify workflows that aggregate filters across multiple input datasets (e.g., the ncov workflow). --- augur/filter.py | 28 ++++++++---------------- augur/util_support/metadata_file.py | 33 ++++++++++++++++++++--------- augur/utils.py | 19 +++++++++++++++-- 3 files changed, 49 insertions(+), 31 deletions(-) diff --git a/augur/filter.py b/augur/filter.py index 6b3723d32..95b52d5ec 100644 --- a/augur/filter.py +++ b/augur/filter.py @@ -15,7 +15,7 @@ from .index import index_sequences from .io import open_file, read_sequences, write_sequences -from .utils import read_metadata, read_strains, get_numerical_dates, run_shell_command, shquote, is_date_ambiguous +from .utils import read_metadata, read_sequence_index, read_strains, get_numerical_dates, run_shell_command, shquote, is_date_ambiguous comment_char = '#' MAX_NUMBER_OF_PROBABILISTIC_SAMPLING_ATTEMPTS = 10 @@ -94,9 +94,9 @@ def filter_by_query(sequences, metadata_file, query): def register_arguments(parser): input_group = parser.add_argument_group("inputs", "metadata and sequences to be filtered") - input_group.add_argument('--metadata', required=True, metavar="FILE", help="sequence metadata, as CSV or TSV") - input_group.add_argument('--sequences', '-s', help="sequences in FASTA or VCF format") - input_group.add_argument('--sequence-index', help="sequence composition report generated by augur index. If not provided, an index will be created on the fly.") + input_group.add_argument('--metadata', nargs="+", required=True, metavar="FILE", help="sequence metadata, as CSV or TSV") + input_group.add_argument('--sequences', '-s', nargs="*", help="sequences in FASTA or VCF format") + input_group.add_argument('--sequence-index', nargs="*", help="sequence composition report generated by augur index. If not provided, an index will be created on the fly.") metadata_filter_group = parser.add_argument_group("metadata filters", "filters to apply to metadata") metadata_filter_group.add_argument( @@ -171,20 +171,13 @@ def run(args): return 1 # Load inputs, starting with metadata. - try: - # Metadata are the source of truth for which sequences we want to keep - # in filtered output. - meta_dict, meta_columns = read_metadata(args.metadata) - metadata_strains = set(meta_dict.keys()) - except ValueError as error: - print("ERROR: Problem reading in {}:".format(args.metadata)) - print(error) - return 1 + meta_dict, meta_columns = read_metadata(*args.metadata) + metadata_strains = set(meta_dict.keys()) #Set flags if VCF is_vcf = False is_compressed = False - if args.sequences and any([args.sequences.lower().endswith(x) for x in ['.vcf', '.vcf.gz']]): + if args.sequences and len(args.sequences) == 1 and any([args.sequences[0].lower().endswith(x) for x in ['.vcf', '.vcf.gz']]): is_vcf = True if args.sequences.lower().endswith('.gz'): is_compressed = True @@ -226,10 +219,7 @@ def run(args): ) index_sequences(args.sequences, sequence_index_path) - sequence_index = pd.read_csv( - sequence_index_path, - sep="\t" - ) + sequence_index = read_sequence_index(*sequence_index_path) # Remove temporary index file, if it exists. if index_is_autogenerated: @@ -546,7 +536,7 @@ def run(args): dropped_samps = list(available_strains - seq_keep) write_vcf(args.sequences, args.output, dropped_samps) elif args.sequences and args.output: - sequences = read_sequences(args.sequences) + sequences = read_sequences(*args.sequences) # Stream to disk all sequences that passed all filters to avoid reading # sequences into memory first. Track the observed strain names in the diff --git a/augur/util_support/metadata_file.py b/augur/util_support/metadata_file.py index 7c7508148..9e3d40c84 100644 --- a/augur/util_support/metadata_file.py +++ b/augur/util_support/metadata_file.py @@ -1,5 +1,5 @@ import functools -import pandas +import pandas as pd import sys @@ -11,15 +11,19 @@ class MetadataFile: which is used to match metadata with samples. """ - def __init__(self, fname, query=None): - self.fname = fname + def __init__(self, *fnames, query=None, as_data_frame=False): + self.fname = fnames self.query = query + self.as_data_frame = as_data_frame self.key_type = self.find_key_type() def read(self): self.check_metadata_duplicates() + if self.as_data_frame: + return self.metadata + # augur assumes the metadata dict will contain either "strain" or "name" (the # indexed column), but DataFrame.to_dict("index") does not place the indexed # column in the dict. So let's make a copy of the indexed column so that the @@ -88,10 +92,19 @@ def find_key_type(self): @functools.lru_cache() def parse_file(self): - return pandas.read_csv( - self.fname, - sep=None, # csv.Sniffer will automatically detect sep - engine="python", - skipinitialspace=True, - dtype={"strain":"string", "name":"string"} - ).fillna("") + return pd.concat( + [ + pd.read_csv( + fname, + sep=None, # csv.Sniffer will automatically detect sep + engine="python", + skipinitialspace=True, + dtype={"strain":"string", "name":"string"} + ).fillna("") + for fname in self.fname + ], + ignore_index=True + ).drop_duplicates( + subset=("strain",), + ignore_index=True + ) diff --git a/augur/utils.py b/augur/utils.py index b68c484e3..adc2dc5cd 100644 --- a/augur/utils.py +++ b/augur/utils.py @@ -70,8 +70,23 @@ def get_json_name(args, default=None): def ambiguous_date_to_date_range(uncertain_date, fmt, min_max_year=None): return DateDisambiguator(uncertain_date, fmt=fmt, min_max_year=min_max_year).range() -def read_metadata(fname, query=None): - return MetadataFile(fname, query).read() +def read_metadata(*fnames, query=None, as_data_frame=False): + return MetadataFile(*fnames, query=query, as_data_frame=as_data_frame).read() + +def read_sequence_index(*sequence_index_paths): + return pd.concat( + [ + pd.read_csv( + sequence_index_path, + sep="\t" + ) + for sequence_index_path in sequence_index_paths + ], + ignore_index=True + ).drop_duplicates( + subset=("strain",), + ignore_index=True + ) def is_date_ambiguous(date, ambiguous_by="any"): """