diff --git a/augur/align.py b/augur/align.py
index 83de03991..f7b2914e8 100644
--- a/augur/align.py
+++ b/augur/align.py
@@ -28,6 +28,64 @@ def register_arguments(parser):
     parser.add_argument('--existing-alignment', metavar="FASTA", default=False, help="An existing alignment to which the sequences will be added. The ouput alignment will be the same length as this existing alignment.")
     parser.add_argument('--debug', action="store_true", default=False, help="Produce extra files (e.g. pre- and post-aligner files) which can help with debugging poor alignments.")
 
+def prepare(sequences, existing_aln_fname, output, ref_name, ref_seq_fname):
+    """Prepare the sequences, existing alignment, and reference sequence for alignment.
+
+    This function:
+        1. Combines all given input sequences into a single file
+        2. Checks to make sure the input sequences don't overlap with the existing alignment, if one exists.
+        3. If given a reference name, check that sequence exists in either the existing alignment, if given, or the input sequences.
+        4. If given a reference sequence, either add it to the existing alignment or prepend it to the input seqeunces.
+        5. Write the input sequences to a single file, and write the alignment back out if we added the reference sequence to it.
+
+    Parameters
+    ----------
+    sequences : list[str]
+        List of paths to FASTA-formatted sequences to align.
+    existing_aln_fname : str
+        Path of an existing alignment to use, or None
+    output: str
+        Path the aligned sequences will be written out to.
+    ref_name: str
+        The name of the reference sequence, if provided
+    ref_seq_fname: str
+        The path to the reference sequence file. If this is provided, it overrides ref_name.
+
+    Returns
+    -------
+        tuple: The existing alignment filename, the new sequences filename, and the name of the reference sequence.
+    """
+    seqs = read_sequences(*sequences)
+    seqs_to_align_fname = output + ".to_align.fasta"
+
+    if existing_aln_fname:
+        existing_aln = read_alignment(existing_aln_fname)
+        seqs = prune_seqs_matching_alignment(seqs, existing_aln)
+    else:
+        existing_aln = None
+
+    if ref_seq_fname:
+        ref_seq = read_reference(ref_seq_fname)
+        ref_name = ref_seq.id
+        if existing_aln:
+            if len(ref_seq) != existing_aln.get_alignment_length():
+                raise AlignmentError("ERROR: Provided existing alignment ({}bp) is not the same length as the reference sequence ({}bp)".format(existing_aln.get_alignment_length(), len(ref_seq)))
+            existing_aln_fname = existing_aln_fname + ".ref.fasta"
+            existing_aln.append(ref_seq)
+            write_seqs(existing_aln, existing_aln_fname)
+        else:
+            # reference sequence needs to be the first one for auto direction
+            # adjustment (auto reverse-complement)
+            seqs.insert(0, ref_seq)
+    elif ref_name:
+        ensure_reference_strain_present(ref_name, existing_aln, seqs)
+
+    write_seqs(seqs, seqs_to_align_fname)
+
+    # 90% sure this is only ever going to catch ref_seq was a dupe
+    check_duplicates(existing_aln, seqs)
+    return existing_aln_fname, seqs_to_align_fname, ref_name
+
 def run(args):
     '''
     Parameters
@@ -44,48 +102,14 @@ def run(args):
 
     try:
         check_arguments(args)
-        seqs = read_sequences(*args.sequences)
-        existing_aln = read_alignment(args.existing_alignment) if args.existing_alignment else None
-
-        # if we have been given a reference (strain) name, make sure it is present
-        ref_name = args.reference_name
-        if args.reference_name:
-            ensure_reference_strain_present(ref_name, existing_aln, seqs)
-
-        # If given an existing alignment, then add the reference sequence to this if desired (and if it is the same length)
-        if existing_aln and args.reference_sequence:
-            existing_aln_fname = args.existing_alignment + ".ref.fasta"
-            ref_seq = read_reference(args.reference_sequence)
-            if len(ref_seq) != existing_aln.get_alignment_length():
-                raise AlignmentError("ERROR: Provided existing alignment ({}bp) is not the same length as the reference sequence ({}bp)".format(existing_aln.get_alignment_length(), len(ref_seq)))
-            existing_aln.append(ref_seq)
-            write_seqs(existing_aln, existing_aln_fname)
-            temp_files_to_remove.append(existing_aln_fname)
-            ref_name = ref_seq.id
-        else:
-            existing_aln_fname = args.existing_alignment # may be False
-
-        ## Create a single file of sequences for alignment (or to be added to the alignment).
-        ## Add in the reference file to the sequences _if_ we don't have an existing alignment
-        if args.reference_sequence and not existing_aln:
-            seqs_to_align_fname = args.output+".to_align.fasta"
-            ref_seq = read_reference(args.reference_sequence)
-            # reference sequence needs to be the first one for auto direction adjustment (auto reverse-complement)
-            write_seqs([ref_seq] + list(seqs.values()), seqs_to_align_fname)
-            ref_name = ref_seq.id
-        elif existing_aln:
-            seqs_to_align_fname = args.output+".new_seqs_to_align.fasta"
-            seqs = prune_seqs_matching_alignment(seqs, existing_aln)
-            write_seqs(list(seqs.values()), seqs_to_align_fname)
-        else:
-            seqs_to_align_fname = args.output+".to_align.fasta"
-            write_seqs(list(seqs.values()), seqs_to_align_fname)
+        existing_aln_fname, seqs_to_align_fname, ref_name = prepare(args.sequences, args.existing_alignment, args.output, args.reference_name, args.reference_sequence)
         temp_files_to_remove.append(seqs_to_align_fname)
-
-        check_duplicates(existing_aln, ref_name, seqs)
+        if existing_aln_fname != args.existing_alignment:
+            temp_files_to_remove.append(existing_aln_fname)
+        # -- existing_aln_fname, seqs_to_align_fname, ref_name --
 
         # before aligning, make a copy of the data that the aligner receives as input (very useful for debugging purposes)
-        if args.debug and not existing_aln:
+        if args.debug and not existing_aln_fname:
             copyfile(seqs_to_align_fname, args.output+".pre_aligner.fasta")
 
         # generate alignment command & run
@@ -98,24 +122,7 @@ def run(args):
         if args.debug:
             copyfile(args.output, args.output+".post_aligner.fasta")
 
-        # reads the new alignment
-        seqs = read_alignment(args.output)
-
-        # convert the aligner output to upper case and remove auto reverse-complement prefix
-        prettify_alignment(seqs)
-
-        # if we've specified a reference, strip out all the columns not present in the reference
-        # this will overwrite the alignment file
-        if ref_name:
-            seqs = strip_non_reference(seqs, ref_name, insertion_csv=args.output+".insertions.csv")
-            if args.remove_reference:
-                seqs = remove_reference_sequence(seqs, ref_name)
-            write_seqs(seqs, args.output)
-        if args.fill_gaps:
-            make_gaps_ambiguous(seqs)
-
-        # write the modified sequences back to the alignment file
-        write_seqs(seqs, args.output)
+        postprocess(args.output, ref_name, not args.remove_reference, args.fill_gaps)
 
 
     except AlignmentError as e:
@@ -126,9 +133,50 @@ def run(args):
     for fname in temp_files_to_remove:
         os.remove(fname)
 
+
+def postprocess(output_file, ref_name, keep_reference, fill_gaps):
+    """Postprocessing of the combined alignment file.
+
+    Parameters
+    ----------
+    output_file: str
+        The file the new alignment was written to
+    ref_name: str
+        If provided, the name of the reference strain used in the alignment
+    keep_reference: bool
+        If the reference was provided, whether it should be kept in the alignment
+    fill_gaps: bool
+        Replace all gaps in the alignment with "N" to indicate ambiguous sites.
+
+    Returns
+    -------
+        None - the modified alignment is written directly to output_file
+    """
+    # -- ref_name --
+    # reads the new alignment
+    seqs = read_alignment(output_file)
+    # convert the aligner output to upper case and remove auto reverse-complement prefix
+    prettify_alignment(seqs)
+
+    # if we've specified a reference, strip out all the columns not present in the reference
+    # this will overwrite the alignment file
+    if ref_name:
+        seqs = strip_non_reference(seqs, ref_name, insertion_csv=output_file+".insertions.csv")
+        if not keep_reference:
+            seqs = remove_reference_sequence(seqs, ref_name)
+
+    if fill_gaps:
+        make_gaps_ambiguous(seqs)
+
+    # write the modified sequences back to the alignment file
+    write_seqs(seqs, output_file)
+
+
+
 #####################################################################################################
 
 def read_sequences(*fnames):
+    """return list of sequences from all fnames"""
     seqs = {}
     try:
         for fname in fnames:
@@ -141,7 +189,7 @@ def read_sequences(*fnames):
         raise AlignmentError("\nCannot read sequences -- make sure the file %s exists and contains sequences in fasta format" % fname)
     except ValueError as error:
         raise AlignmentError("\nERROR: Problem reading in {}: {}".format(fname, str(error)))
-    return seqs
+    return list(seqs.values())
 
 def check_arguments(args):
     # Simple error checking related to a reference name/sequence
@@ -161,7 +209,7 @@ def ensure_reference_strain_present(ref_name, existing_alignment, seqs):
         if ref_name not in {x.name for x in existing_alignment}:
             raise AlignmentError("ERROR: Specified reference name %s (via --reference-name) is not in the supplied alignment."%ref_name)
     else:
-        if ref_name not in seqs:
+        if ref_name not in {x.name for x in seqs}:
             raise AlignmentError("ERROR: Specified reference name %s (via --reference-name) is not in the sequence sample."%ref_name)
 
 
@@ -345,19 +393,15 @@ def add(name):
         if name in names:
             raise AlignmentError("Duplicate strains of \"{}\" detected".format(name))
         names.add(name)
-
     for sample in values:
         if not sample:
             # allows false-like values (e.g. always provide existing_alignment, allowing
             # the default which is `False`)
             continue
-        elif type(sample) == dict:
-            for s in sample:
-                add(s)
-        elif type(sample) == Align.MultipleSeqAlignment:
+        elif isinstance(sample, (list, Align.MultipleSeqAlignment)):
             for s in sample:
                 add(s.name)
-        elif type(sample) == str:
+        elif isinstance(sample, str):
             add(sample)
         else:
             raise TypeError()
@@ -372,14 +416,14 @@ def write_seqs(seqs, fname):
 
 def prune_seqs_matching_alignment(seqs, aln):
     """
-    Return a set of seqs excluding those set via `exclude` & print a warning
+    Return a set of seqs excluding those already in the alignment & print a warning
     message for each sequence which is exluded.
     """
-    ret = {}
-    exclude_names = {s.name for s in aln}
-    for name, seq in seqs.items():
-        if name in exclude_names:
-            print("Excluding {} as it is already present in the alignment".format(name))
+    ret = []
+    aln_names = {s.name for s in aln}
+    for seq in seqs:
+        if seq.name in aln_names:
+            print("Excluding {} as it is already present in the alignment".format(seq.name))
         else:
-            ret[name] = seq
+            ret.append(seq)
     return ret
diff --git a/augur/filter.py b/augur/filter.py
index 700bdff99..efcaab8bb 100644
--- a/augur/filter.py
+++ b/augur/filter.py
@@ -8,6 +8,8 @@
 import random, os, re
 import numpy as np
 import sys
+import datetime
+import treetime.utils
 from .utils import read_metadata, get_numerical_dates, run_shell_command, shquote
 
 comment_char = '#'
@@ -87,8 +89,8 @@ def filter_by_query(sequences, metadata_file, query):
 def register_arguments(parser):
     parser.add_argument('--sequences', '-s', required=True, help="sequences in fasta or VCF format")
     parser.add_argument('--metadata', required=True, help="metadata associated with sequences")
-    parser.add_argument('--min-date', type=float, help="minimal cutoff for numerical date")
-    parser.add_argument('--max-date', type=float, help="maximal cutoff for numerical date")
+    parser.add_argument('--min-date', type=numeric_date, help="minimal cutoff for date; may be specified as an Augur-style numeric date (with the year as the integer part) or YYYY-MM-DD")
+    parser.add_argument('--max-date', type=numeric_date, help="maximal cutoff for date; may be specified as an Augur-style numeric date (with the year as the integer part) or YYYY-MM-DD")
     parser.add_argument('--min-length', type=int, help="minimal length of the sequences")
     parser.add_argument('--non-nucleotide', action='store_true', help="exclude sequences that contain illegal characters")
     parser.add_argument('--exclude', type=str, help="file with list of strains that are to be excluded")
@@ -410,3 +412,21 @@ def run(args):
 
 def _filename_gz(filename):
     return filename.lower().endswith(".gz")
+
+
+def numeric_date(date):
+    """
+    Converts the given *date* string to a :py:class:`float`.
+
+    *date* may be given as a number (a float) with year as the integer part, or
+    in the YYYY-MM-DD (ISO 8601) syntax.
+
+    >>> numeric_date("2020.42")
+    2020.42
+    >>> numeric_date("2020-06-04")
+    2020.42486...
+    """
+    try:
+        return float(date)
+    except ValueError:
+        return treetime.utils.numeric_date(datetime.date(*map(int, date.split("-", 2))))
diff --git a/augur/util_support/__init__.py b/augur/util_support/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/augur/util_support/date_disambiguator.py b/augur/util_support/date_disambiguator.py
new file mode 100644
index 000000000..713295eac
--- /dev/null
+++ b/augur/util_support/date_disambiguator.py
@@ -0,0 +1,123 @@
+import calendar
+import datetime
+import functools
+import re
+
+
+def tuple_to_date(year, month, day):
+    month = min(month, 12)
+    day = min(day, max_day_for_year_month(year, month))
+
+    return datetime.date(year=year, month=month, day=day)
+
+
+def max_day_for_year_month(year, month):
+    return calendar.monthrange(year, month)[1]
+
+
+def resolve_uncertain_int(uncertain_string, min_or_max):
+    """
+    Takes a string representation of an integer with uncertain places
+    occupied by the character `X`. Returns the minimum or maximum
+    possible integer.
+    """
+    if min_or_max == "min":
+        result = int(uncertain_string.replace("X", "0"))
+    elif min_or_max == "max":
+        result = int(uncertain_string.replace("X", "9"))
+    else:
+        raise "Tried to resolve an uncertain integer to something other than `min` or `max`."
+
+    if result == 0:
+        # A date component cannot be 0. Well, year can, but...
+        result = 1
+
+    return result
+
+
+class DateDisambiguator:
+    """Transforms a date string with uncertainty into the range of possible dates."""
+
+    def __init__(self, uncertain_date, fmt="%Y-%m-%d", min_max_year=None):
+        self.uncertain_date = uncertain_date
+        self.fmt = fmt
+        self.min_max_year = min_max_year
+
+        self.assert_only_less_significant_uncertainty()
+
+    def range(self):
+        min_date = tuple_to_date(
+            resolve_uncertain_int(self.uncertain_date_components["Y"], "min"),
+            resolve_uncertain_int(self.uncertain_date_components["m"], "min"),
+            resolve_uncertain_int(self.uncertain_date_components["d"], "min"),
+        )
+
+        max_date = tuple_to_date(
+            resolve_uncertain_int(self.uncertain_date_components["Y"], "max"),
+            resolve_uncertain_int(self.uncertain_date_components["m"], "max"),
+            resolve_uncertain_int(self.uncertain_date_components["d"], "max"),
+        )
+        max_date = min(max_date, datetime.date.today())
+
+        return (min_date, max_date)
+
+    @property
+    @functools.lru_cache()
+    def uncertain_date_components(self):
+        matches = re.search(self.regex, self.uncertain_date)
+
+        if matches is None:
+            raise ValueError(
+                f"Malformed uncertain date `{self.uncertain_date}` for format `{self.fmt}`"
+            )
+
+        return dict(zip(self.fmt_components, matches.groups()))
+
+    @property
+    @functools.lru_cache()
+    def fmt_components(self):
+        # The `re` module doesn't capture repeated groups, so we'll do it without regexes
+        return [component[0] for component in self.fmt.split("%") if len(component) > 0]
+
+    @property
+    def regex(self):
+        """
+        Returns regex defined by the format string.
+        Currently only supports %Y, %m, and %d.
+        """
+        return re.compile(
+            "^"
+            + self.fmt.replace("%Y", "(....)")
+            .replace("%m", "(..?)")
+            .replace("%d", "(..?)")
+            + "$"
+        )
+
+    def assert_only_less_significant_uncertainty(self):
+        """
+        Raise an exception if a constrained digit appears in a less-significant place
+        than an uncertain digit.
+
+        Assuming %Y-%m-%d, these patterns are valid:
+            2000-01-01
+            2000-01-XX
+            2000-XX-XX
+
+        but this is invalid, because month is uncertain but day is constrained:
+            2000-XX-01
+
+        These invalid cases are assumed to be unintended use of the tool.
+        """
+        if "X" in self.uncertain_date_components["Y"]:
+            if (
+                self.uncertain_date_components["m"] != "XX"
+                or self.uncertain_date_components["d"] != "XX"
+            ):
+                raise ValueError(
+                    "Invalid date: Year contains uncertainty, so month and day must also be uncertain."
+                )
+        elif "X" in self.uncertain_date_components["m"]:
+            if self.uncertain_date_components["d"] != "XX":
+                raise ValueError(
+                    "Invalid date: Month contains uncertainty, so day must also be uncertain."
+                )
diff --git a/augur/utils.py b/augur/utils.py
index 6f8531c84..0449da24e 100644
--- a/augur/utils.py
+++ b/augur/utils.py
@@ -16,6 +16,8 @@
 import packaging.version as packaging_version
 from .validate import validate, ValidateError, load_json_schema
 
+from augur.util_support.date_disambiguator import DateDisambiguator
+
 class AugurException(Exception):
     pass
 
@@ -62,38 +64,8 @@ def get_json_name(args, default=None):
             raise ValueError("Please specify a name for the JSON file containing the results.")
 
 
-def ambiguous_date_to_date_range(mydate, fmt, min_max_year=None):
-    from datetime import datetime
-    sep = fmt.split('%')[1][-1]
-    min_date, max_date = {}, {}
-    today = datetime.today().date()
-
-    for val, field  in zip(mydate.split(sep), fmt.split(sep+'%')):
-        f = 'year' if 'y' in field.lower() else ('day' if 'd' in field.lower() else 'month')
-        if 'XX' in val:
-            if f=='year':
-                if min_max_year:
-                    min_date[f]=min_max_year[0]
-                    if len(min_max_year)>1:
-                        max_date[f]=min_max_year[1]
-                    elif len(min_max_year)==1:
-                        max_date[f]=4000 #will be replaced by 'today' below.
-                else:
-                    return None, None
-            elif f=='month':
-                min_date[f]=1
-                max_date[f]=12
-            elif f=='day':
-                min_date[f]=1
-                max_date[f]=31
-        else:
-            min_date[f]=int(val)
-            max_date[f]=int(val)
-    max_date['day'] = min(max_date['day'], 31 if max_date['month'] in [1,3,5,7,8,10,12]
-                                           else 28 if max_date['month']==2 else 30)
-    lower_bound = datetime(year=min_date['year'], month=min_date['month'], day=min_date['day']).date()
-    upper_bound = datetime(year=max_date['year'], month=max_date['month'], day=max_date['day']).date()
-    return (lower_bound, upper_bound if upper_bound<today else today)
+def ambiguous_date_to_date_range(uncertain_date, fmt, min_max_year=None):
+    return DateDisambiguator(uncertain_date, fmt=fmt, min_max_year=min_max_year).range()
 
 def read_metadata(fname, query=None):
     if not fname:
diff --git a/docs/usage/cli/traits.rst b/docs/usage/cli/traits.rst
index 7b7a39d7b..772fc5592 100644
--- a/docs/usage/cli/traits.rst
+++ b/docs/usage/cli/traits.rst
@@ -15,8 +15,11 @@ augur traits
 What about missing data?
 ========================
 
-If you have strains with missing data then you must give them the value ``?``.
-For example, if you are running a reconstruction of ``country`` and you don't know the country for a particular strain, you must set country to ``?`` in the metadata file for that strain.
-Note that anything else -- empty strings, ``NA``, ``unknown``-- will be interpretted as a valid value!
+If you have strains with missing data and you want them to be reconstructed, then you must give them the value ``?``.
+For example, if you are running a reconstruction of ``country`` and you don't know the country for a particular strain, you should set country to ``?`` in the metadata file for that strain.
+Then, ``traits`` will estimate the most likely ``country`` value for any strains where you have provided ``?``. 
 
-Currently there is no way to *not* infer the value of these missing data, but we are working on making this option available in the future.
+If you do not want these traits to be reconstructed (you would like it to remain clear that the ``country`` is unknown for this sample), then simply leave this field blank in the metadata file.
+
+Note that each value -- empty strings, ``NA``, ``unknown``-- will be interpretted as a valid value!
+So, it's best to be consistant with whatever you use for missing values, or those with ``NA`` will be shown as different from those with ``unknown``!
diff --git a/setup.py b/setup.py
index 4cfdbd47e..9cd1238c2 100644
--- a/setup.py
+++ b/setup.py
@@ -1,5 +1,5 @@
 from pathlib    import Path
-from setuptools import setup
+import setuptools
 import sys
 
 min_version = (3, 6)
@@ -26,7 +26,9 @@
 with readme_file.open(encoding = "utf-8") as f:
     long_description = f.read()
 
-setup(
+
+
+setuptools.setup(
     name = "nextstrain-augur",
     version = __version__,
     author = "Nextstrain developers",
@@ -41,7 +43,7 @@
         "Change Log": "https://github.com/nextstrain/augur/blob/master/CHANGES.md#next",
         "Source": "https://github.com/nextstrain/augur",
     },
-    packages = ['augur'],
+    packages = setuptools.find_packages(),
     package_data = {'augur': ['data/*']},
     data_files = [("", ["LICENSE.txt"])],
     python_requires = '>={}'.format('.'.join(str(n) for n in min_version)),
diff --git a/tests/test_align.py b/tests/test_align.py
index 5583f615f..5c73fe555 100644
--- a/tests/test_align.py
+++ b/tests/test_align.py
@@ -1,14 +1,93 @@
+import argparse
+import functools
+import os
+
+import pytest
+import pathlib
+
+from shlex import quote
+
+from Bio import SeqIO
 from Bio.Align import MultipleSeqAlignment
 from Bio.Seq import Seq
 from Bio.SeqRecord import SeqRecord
 
-from shlex import quote
-
 from augur import align
 
-import pytest
-import pathlib
 
+def write_strains(tmpdir, name, strains):
+    path = str(tmpdir / name + ".fasta")
+    with open(path, "w") as fh:
+        SeqIO.write(strains, fh, "fasta")
+    return path
+
+@pytest.fixture
+def ref_seq():
+    return SeqRecord(Seq("aaaaTT-Tg-ggCCCC"), "REF")
+
+@pytest.fixture
+def test_seqs(ref_seq):
+    return {
+        "PREFIX": SeqRecord(ref_seq.seq[3:], "PREFIX"),
+        "SUFFIX": SeqRecord(ref_seq.seq[:-3], "SUFFIX"),
+        "LONGER": SeqRecord("CCC" + ref_seq.seq + "AAA", "LONGER")
+    }
+
+@pytest.fixture
+def existing_aln(ref_seq):
+    return {
+        "EXISTING1": SeqRecord("NN" + ref_seq.seq[2:-3] + "NNN", "EXISTING1"),
+        "EXISTING2": SeqRecord("NNN" + ref_seq.seq[3:-1] + "N", "EXISTING2")
+    }
+
+@pytest.fixture
+def ref_file(tmpdir, ref_seq):
+    return write_strains(tmpdir, "ref", ref_seq)
+
+@pytest.fixture
+def test_file(tmpdir, test_seqs):
+    return write_strains(tmpdir, "test", test_seqs.values())
+
+@pytest.fixture
+def test_with_ref(tmpdir, test_seqs, ref_seq):
+    return write_strains(tmpdir, "test_w_ref", [ref_seq,] + list(test_seqs.values()))
+
+@pytest.fixture
+def existing_file(tmpdir, existing_aln):
+    return write_strains(tmpdir, "existing", existing_aln.values())
+
+@pytest.fixture
+def existing_with_ref(tmpdir, existing_aln, ref_seq):
+    return write_strains(tmpdir, "existing_w_ref", [ref_seq,] + list(existing_aln.values()))
+
+@pytest.fixture
+def out_file(tmpdir):
+    out_file = str(tmpdir / "out")
+    open(out_file, "w").close()
+    return out_file
+
+@pytest.fixture
+def argparser():
+    """Provide an easy way to test command line arguments"""
+    parser = argparse.ArgumentParser()
+    align.register_arguments(parser)
+    def parse(args):
+        return parser.parse_args(args.split(" "))
+    return parse
+
+@pytest.fixture
+def run(argparser, out_file):
+    def run(args):
+        args = argparser(args + " -o %s" % out_file)
+        align.run(args)
+        return SeqIO.to_dict(SeqIO.parse(out_file, "fasta"))
+    return run
+
+@pytest.fixture
+def mp_context(monkeypatch):
+    #This should be moved to conftest once #512 is merged
+    with monkeypatch.context() as mp:
+        yield mp
 
 class TestAlign:
     def test_make_gaps_ambiguous(self):
@@ -90,9 +169,10 @@ def test_prune_seqs_matching_alignment(self):
             ]
         )
         
-        result = align.prune_seqs_matching_alignment(sequence, alignment)
-        assert list(result.keys()) == ["seq2"]
-        assert result["seq2"].seq == sequence["seq2"].seq
+        result = align.prune_seqs_matching_alignment(sequence.values(), alignment)
+        assert [r.name for r in result] == ["seq2"]
+        for r in result:
+            assert r.seq == sequence[r.name].seq
 
     def test_prettify_alignment(self):
         data_file = pathlib.Path('tests/data/align/test_aligned_sequences.fasta')
@@ -148,9 +228,274 @@ def test_read_alignment(self):
     def test_read_sequences(self):
         data_file = pathlib.Path('tests/data/align/test_aligned_sequences.fasta')
         result = align.read_sequences(data_file)
-        assert len(result.keys()) == 4
+        assert len(result) == 4
 
     def test_read_seq_compare(self):
         data_file = pathlib.Path("tests/data/align/aa-seq_h3n2_ha_2y_2HA1_dup.fasta")
         with pytest.raises(align.AlignmentError):
             assert align.read_sequences(data_file)
+
+    def test_prepare_no_alignment_or_ref(self, test_file, test_seqs, out_file):
+        _, output, _ = align.prepare([test_file,], None, out_file, None, None)
+        assert os.path.isfile(output), "Didn't write sequences where it said"
+        for name, seq in SeqIO.to_dict(SeqIO.parse(output, "fasta")).items():
+            assert seq.seq == test_seqs[name].seq
+    
+    def test_prepare_no_alignment_with_named_ref_missing(self, test_file, ref_seq):
+        """We're given a ref_name, but it does not exist in the test file"""
+        with pytest.raises(align.AlignmentError):
+            align.prepare([test_file,], None, "dontcare", ref_seq.id, None)
+
+    def test_prepare_with_alignment_with_named_ref_missing(self, test_with_ref, existing_file, ref_seq):
+        """We're given a ref_name and an existing alignment, but the ref doesn't exist in the existing alignment."""
+        with pytest.raises(align.AlignmentError):
+            align.prepare([test_with_ref,], existing_file, "dontcare", ref_seq.id, None)
+
+    def test_prepare_no_alignment_with_ref_file(self, test_file, test_seqs, ref_file, ref_seq, out_file):
+        _, output_fn, ref_name = align.prepare([test_file,], None, out_file, None, ref_file)
+        assert ref_name == ref_seq.id, "Didn't return strain name from refrence file"
+        assert os.path.isfile(output_fn), "Didn't write sequences where it said"
+        output = list(SeqIO.parse(output_fn, "fasta")) # order matters
+        assert output[0].id == ref_seq.id, "Reference sequence is not the first sequence in ouput file!"
+        output_names = {record.name for record in output}
+        assert all(name in output_names for name in test_seqs), "Some test sequences dropped unexpectedly"
+        for record in output[1:]:
+            assert record.seq == test_seqs[record.id].seq, "Some test sequences changed unexpectedly"
+    
+    def test_prepare_no_alignment_with_ref_name(self, test_with_ref, test_seqs, ref_seq, out_file):
+        _, output_fn, _ = align.prepare([test_with_ref,], None, out_file, ref_seq.id, None)
+        assert os.path.isfile(output_fn), "Didn't write sequences where it said"
+        output = SeqIO.to_dict(SeqIO.parse(output_fn, "fasta"))
+        assert output[ref_seq.id].seq == ref_seq.seq, "Reference sequence was not added to test sequences"
+        for seq in test_seqs:
+            assert seq in output, "Some test sequences dropped unexpectedly"
+            assert output[seq].seq == test_seqs[seq].seq, "Some test sequences changed unexpectedly"
+
+    def test_prepare_with_alignment_with_ref_name(self, test_file, test_seqs, existing_with_ref, existing_aln, ref_seq, out_file):
+        """Test that, given a set of test sequences, an existing alignment, and a reference sequence name, no changes are made."""
+        aln_outfile, seqs_outfile, _ = align.prepare([test_file,], existing_with_ref, out_file, ref_seq.id, None)
+        assert os.path.isfile(aln_outfile), "Didn't write existing alignment where it said"
+        assert aln_outfile == existing_with_ref, "Rewrote the alignment file unexpectedly"
+        # Alignment file should be unchanged
+        aln_output = SeqIO.to_dict(SeqIO.parse(aln_outfile, "fasta"))
+        assert aln_output[ref_seq.id].seq == ref_seq.seq, "Reference sequence dropped from alignment"
+        for seq in existing_aln:
+            assert seq in aln_output, "Some existing alignment sequences dropped unexpectedly"
+            assert aln_output[seq].seq == existing_aln[seq].seq, "Some existing alignment sequences changed unexpectedly"
+        # test sequences should be unchanged
+        assert os.path.isfile(seqs_outfile), "Didn't write test sequences where it said"
+        seq_output = SeqIO.to_dict(SeqIO.parse(seqs_outfile, "fasta"))
+        for seq in test_seqs:
+            assert seq in seq_output, "Some test sequences unexpectedly dropped"
+            assert seq_output[seq].seq == test_seqs[seq].seq, "Some test sequences changed unexpectedly"
+        assert seq_output.keys() == test_seqs.keys()
+
+    def test_prepare_with_alignment_with_ref_seq(self, test_file, test_seqs, existing_file, existing_aln, ref_seq, ref_file, out_file):
+        """Test that, given a set of test sequences, an existing alignment, and a reference sequence, the reference
+        is added to the existing alignment and no other changes are made."""
+        aln_outfile, seqs_outfile, ref_name = align.prepare([test_file,], existing_file, out_file, None, ref_file)
+        assert ref_name == ref_seq.id, "Didn't return strain name from refrence file"
+        assert os.path.isfile(aln_outfile), "Didn't write existing alignment where it said"
+        assert aln_outfile != existing_aln, "Unexpectedly overwrote existing alignment"
+        # Alignment file should have the reference added
+        aln_output = SeqIO.to_dict(SeqIO.parse(aln_outfile, "fasta"))
+        assert aln_output[ref_seq.id].seq == ref_seq.seq, "Reference sequence not added to alignment"
+        for seq in existing_aln:
+            assert seq in aln_output, "Some existing alignment sequences dropped unexpectedly"
+            assert aln_output[seq].seq == existing_aln[seq].seq, "Some existing alignment sequences changed unexpectedly"
+        # test sequences should be unchanged
+        assert os.path.isfile(seqs_outfile), "Didn't write test sequences where it said"
+        seq_output = SeqIO.to_dict(SeqIO.parse(seqs_outfile, "fasta"))
+        for seq in test_seqs:
+            assert seq in seq_output, "Some test sequences unexpectedly dropped"
+            assert seq_output[seq].seq == test_seqs[seq].seq, "Some test sequences changed unexpectedly"
+        assert seq_output.keys() == test_seqs.keys()
+    
+    def test_prepare_no_alignment_multiple_test_seqs(self, test_file, test_seqs, ref_file, ref_seq, out_file):
+        """Test that we can pass multiple sequence files to prepare() and get one unified file back"""
+        # bit of a kludge, but gets us the reference strain in the input files
+        _, seq_outfile, _ = align.prepare([test_file, ref_file], None, out_file, ref_seq.id, None)
+        seq_output = SeqIO.to_dict(SeqIO.parse(seq_outfile, "fasta"))
+        assert seq_output.keys() == set(test_seqs.keys()) | {ref_seq.id}, "Did not combine the two files"
+        assert seq_output[ref_seq.id].seq == ref_seq.seq, "Missing sequence from second file"
+        for seq in test_seqs:
+            assert seq in seq_output, "Some test sequences unexpectedly dropped"
+            assert seq_output[seq].seq == test_seqs[seq].seq, "Some test sequences changed unexpectedly"
+    
+    def test_prepare_with_alignment_with_duplicate_sequences(self, test_file, test_seqs, existing_file, existing_aln, out_file):
+        """Test that sequences matching the alignment are removed from the input sequences"""
+        _, seq_outfile, _ = align.prepare([existing_file, test_file], existing_file, out_file, None, None)
+        seq_output = SeqIO.to_dict(SeqIO.parse(seq_outfile, "fasta"))
+        assert seq_output.keys() == test_seqs.keys(), "Did not strip duplicate sequences from test input!"
+    
+    def test_prepare_with_alignment_ref_sequence_wrong_length(self, test_file, existing_file, ref_seq, ref_file):
+        """Test that including a reference sequence with a length different than the existing alignment fails."""
+        ref_seq.seq = ref_seq.seq[:-3]
+        with open(ref_file, "w") as fh:
+            SeqIO.write(ref_seq, fh, "fasta")
+        with pytest.raises(align.AlignmentError):
+            align.prepare([test_file,], existing_file, "out", None, ref_file)
+    
+    def test_postprocess_prettify_alignment(self, tmpdir, existing_aln, ref_seq):
+        """Postprocess should strip _R_ from reverse-complemented strains and convert all sites to uppercase"""
+        ref_seq.name = "_R_" + ref_seq.name
+        post_align = write_strains(tmpdir, "post_align", [ref_seq] + list(existing_aln.values()))
+        align.postprocess(post_align, None, True, False)
+        output = SeqIO.to_dict(SeqIO.parse(post_align, "fasta"))
+        for name, record in output.items():
+            assert record.seq == record.seq.upper(), "Sequence was not made uppercase"
+            assert "_R_" not in name, "Reverse-complement prefix not removed"
+
+    @pytest.mark.parametrize("keep_ref", [True, False])
+    def test_postprocess_remove_reference(self, existing_with_ref, ref_seq, keep_ref):
+        """Postprocess should remove the reference strain only if requested"""
+        align.postprocess(existing_with_ref, ref_seq.id, keep_ref, False)
+        output = SeqIO.to_dict(SeqIO.parse(existing_with_ref, "fasta"))
+        assert (ref_seq.id in output) == keep_ref
+    
+    @pytest.mark.parametrize("fill_gaps", [True, False])
+    def test_postprocess_fill_gaps(self, existing_file, existing_aln, ref_seq, fill_gaps):
+        """Postprocess should make the gaps ambiguous only if requested"""
+        align.postprocess(existing_file, None, True, fill_gaps)
+        output = SeqIO.to_dict(SeqIO.parse(existing_file, "fasta"))
+        for name, record in output.items():
+            for idx, site in enumerate(existing_aln[name].seq):
+                if site == "-":
+                    assert (record.seq[idx] == "N") == fill_gaps
+
+    def test_postprocess_strip_non_reference(self, tmpdir, ref_seq, ref_file):
+        """Postprocess should strip gaps in the reference sequence from other sequences, but not gaps in those sequences"""
+        expected_length = len(ref_seq.seq) - ref_seq.seq.count("-")
+        gapped_seq = ref_seq.seq.tomutable()
+        gapped_seq[1] = "-"
+        gapped = SeqRecord(gapped_seq, "GAP")
+        gap_file = write_strains(tmpdir, "gaps", [ref_seq, gapped])
+        align.postprocess(gap_file, ref_seq.id, True, False)
+        output = SeqIO.to_dict(SeqIO.parse(gap_file, "fasta"))
+        assert "-" not in output[ref_seq.id].seq
+        assert output["GAP"].seq.count("-") == 1
+        for record in output.values():
+            assert len(record.seq) == expected_length
+
+    def test_run_no_ref_or_alignment(self, test_file, test_seqs, ref_seq, out_file, argparser, run):
+        """No reference sequence or existing alignment. In this case, all sequences should be the length of the max sequence minus gaps"""
+        gaps = ref_seq.seq.count("-")
+        expected_length = max(len(seq.seq) for seq in test_seqs.values()) - gaps
+        output = run("-s %s" % (test_file))
+        assert output.keys() == test_seqs.keys()
+        assert all(len(r.seq) == expected_length for r in output.values())
+    
+    def test_run_fill_gaps(self, test_file, run):
+        """All gaps should be filled when --fill-gaps is passed"""
+        output = run("-s %s --fill-gaps" % test_file)
+        assert all("-" not in r.seq for r in output.values())
+    
+    def test_run_with_ref_name_no_alignment(self, test_with_ref, test_seqs, ref_seq, run):
+        expected_length = len(ref_seq.seq) - ref_seq.seq.count("-")
+        output = run("-s %s --reference-name %s" % (test_with_ref, ref_seq.id))
+        assert list(output.keys()) == [ref_seq.id,] + list(test_seqs.keys()) 
+        assert all(len(r.seq) == expected_length for r in output.values())
+        assert output["PREFIX"].seq.startswith("---")
+        assert output["SUFFIX"].seq.endswith("---")
+
+    def test_run_with_ref_seq_no_alignment(self, test_file, test_seqs, ref_file, ref_seq, run):
+        expected_length = len(ref_seq.seq) - ref_seq.seq.count("-")
+        output = run("-s %s --reference-sequence %s" % (test_file, ref_file))
+        assert list(output.keys()) == [ref_seq.id,] + list(test_seqs.keys()) 
+        assert all(len(r.seq) == expected_length for r in output.values())
+        assert output["PREFIX"].seq.startswith("---")
+        assert output["SUFFIX"].seq.endswith("---")
+    
+    def test_run_with_ref_seq_remove_reference(self, test_with_ref, ref_seq, run):
+        expected_length = len(ref_seq.seq) - ref_seq.seq.count("-")
+        output = run("-s %s --reference-name %s --remove-reference" % (test_file, ref_seq.id))
+        assert ref_seq.id not in output
+
+    def test_run_with_ref_seq_remove_reference(self, test_file, ref_file, ref_seq, run):
+        expected_length = len(ref_seq.seq) - ref_seq.seq.count("-")
+        output = run("-s %s --reference-sequence %s --remove-reference" % (test_file, ref_file))
+        assert ref_seq.id not in output
+    
+    def test_run_no_ref_with_alignment(self, test_seqs, test_file, existing_aln, existing_file, run):
+        output = run("-s %s --existing-alignment %s" % (test_file, existing_file))
+        assert sorted(output.keys()) == sorted(list(test_seqs.keys()) + list(existing_aln.keys())), "Missing some sequences from input or alignment"
+        assert len({len(r.seq) for r in output.values()}) == 1, "Not all sequences are the same length"
+    
+    def test_run_multiple_sequences_concatenated(self, test_file, test_seqs, ref_file, ref_seq, run):
+        output = run("-s %s %s" % (test_file, ref_file))
+        assert ref_seq.id in output
+        assert all(r in output for r in test_seqs)
+    
+    def test_run_with_ref_file_with_alignment_file(self, test_file, test_seqs, ref_file, ref_seq, existing_aln, existing_file, run):
+        expected_len = len(ref_seq.seq) - ref_seq.seq.count("-")
+        output = run("-s %s --existing-alignment %s --reference-sequence %s" % (test_file, existing_file, ref_file))
+        assert all(seq in output for seq in test_seqs)
+        assert all(seq in output for seq in existing_aln)
+        assert ref_seq.id in output
+        assert all(len(record.seq) == expected_len for record in output.values())
+    
+    @pytest.mark.parametrize("remove_ref", [True, False])
+    def test_run_remove_reference(self, test_file, existing_file, ref_file, ref_seq, run, remove_ref):
+        output = run("-s %s --existing-alignment %s --reference-sequence %s%s" % (
+                     test_file, existing_file, ref_file, " --remove-reference" if remove_ref else ""))
+        assert (ref_seq.id not in output) == remove_ref 
+    
+    @pytest.mark.parametrize("fill_gaps" , [True, False])
+    def test_run_fill_gaps(self, test_file, ref_file, run, fill_gaps):
+        output = run("-s %s --reference-sequence %s%s" % (test_file, ref_file, " --fill-gaps" if fill_gaps else ""))
+        expected_char = "N" if fill_gaps else "-"
+        assert output["PREFIX"].seq.startswith(expected_char*3)
+        assert output["SUFFIX"].seq.endswith(expected_char*3)
+        if fill_gaps:
+            assert all("-" not in record.seq for record in output.values())
+    
+    def test_run_error_during_alignment(self, test_file, argparser, mp_context):
+        """Not a great test - we can't be sure we're not failing some other check somewhere along the way, but it's all we can do"""
+        mp_context.setattr(align, "run_shell_command", lambda i: False)
+        args = argparser("-s %s" % test_file)
+        assert align.run(args) == 1
+    
+    def test_run_debug_files(self, test_file, out_file, run):
+        run("-s %s --debug" % test_file)
+        assert os.path.isfile(out_file + ".pre_aligner.fasta")
+        assert os.path.isfile(out_file + ".post_aligner.fasta")
+
+    def test_run_check_files_are_cleaned_up_including_alignment(self, test_file, existing_file, ref_file, run, mp_context):
+        """Check we clean up the correct files after run is done.
+
+        Note: This test is complicated because we need to test that everything is actually deleted correctly, so we need
+        to go through the entire run cycle. This means the files all have to be prepared exactly as though they were real.
+        So, below, we're monkeypatching "align.prepare" to catch the generated file names. We're using three tricks here: 
+        first, we're passing a dictionary to the function. Dictionaries are passed by reference (not copy), which means
+        modifications made to the dictionary inside the function are reflected outside, with no need for return. Second, we're
+        importing "align.prepare" again under a second name to avoid the recursion depth exceeded error. Finally, we're using a
+        functools.partial statement to pass our dictionary to our mocked function before align.run() gets to it. 
+        
+        All of this is not Good, but it does Work.
+        """
+        out_files = {}
+        from augur.align import prepare as prpr # Prevent recursion depth exceeded. Yes, this is terrible.
+        def catch_filenames(out_files, *args, **kwargs):
+            existing_aln_fname, seqs_to_align_fname, ref_name = prpr(*args, **kwargs)
+            out_files["aln"] = existing_aln_fname
+            out_files["seq"] = seqs_to_align_fname
+            return existing_aln_fname, seqs_to_align_fname, ref_name
+        mp_context.setattr(align, "prepare", functools.partial(catch_filenames, out_files))
+        run("-s %s --existing-alignment %s --reference-sequence %s" % (test_file, existing_file, ref_file))
+        assert not os.path.exists(out_files["seq"])
+        assert not os.path.exists(out_files["aln"])
+
+    def test_run_check_alignment_not_deleted_if_unchanged(self, test_file, existing_with_ref, ref_seq, run, mp_context):
+        """Check we Don't clean up our original alignment by accident. See note above for this test."""
+        out_files = {}
+        from augur.align import prepare as prpr # Prevent recursion depth exceeded. Yes, this is terrible.
+        def catch_filenames(out_files, *args, **kwargs):
+            existing_aln_fname, seqs_to_align_fname, ref_name = prpr(*args, **kwargs)
+            out_files["aln"] = existing_aln_fname
+            out_files["seq"] = seqs_to_align_fname
+            return existing_aln_fname, seqs_to_align_fname, ref_name
+        mp_context.setattr(align, "prepare", functools.partial(catch_filenames, out_files))
+        run("-s %s --existing-alignment %s --reference-name %s" % (test_file, existing_with_ref, ref_seq.id))
+        assert out_files["aln"] == existing_with_ref
+        assert os.path.exists(out_files["aln"]), "Deleted our existing alignment file by accident"
+        assert not os.path.exists(out_files["seq"])
diff --git a/tests/test_utils.py b/tests/test_utils.py
index da6a2960a..80871e6d8 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -19,12 +19,6 @@ def test_ambiguous_date_to_date_range_ambiguous_day(self):
             datetime.date(year=2000, month=1, day=31),
         )
 
-    def test_ambiguous_date_to_date_range_ambiguous_month(self):
-        assert utils.ambiguous_date_to_date_range("2000-XX-5", "%Y-%m-%d") == (
-            datetime.date(year=2000, month=1, day=5),
-            datetime.date(year=2000, month=12, day=5),
-        )
-
     def test_ambiguous_date_to_date_range_ambiguous_month_and_day(self):
         assert utils.ambiguous_date_to_date_range("2000-XX-XX", "%Y-%m-%d") == (
             datetime.date(year=2000, month=1, day=1),
@@ -145,8 +139,7 @@ def test_read_metadata_bad_query(self, tmpdir):
         with open(meta_fn, "w") as fh:
             fh.write("\n".join(meta_lines))
         with pytest.raises(SystemExit):
-            utils.read_metadata(meta_fn, query='badcol=="goodval"')
-    
+            utils.read_metadata(meta_fn, query='badcol=="goodval"')    
     def test_read_metadata_duplicate_strain(self, tmpdir):
         meta_fn = str(tmpdir / "metadata.tsv")
         meta_lines = ["strain\tlocation\tquality",
@@ -185,4 +178,3 @@ def test_read_metadata_no_strain_or_name(self, tmpdir):
             fh.write("\n".join(meta_lines))
         
         assert utils.read_metadata('') == expected_result
-
diff --git a/tests/util_support/test_date_disambiguator.py b/tests/util_support/test_date_disambiguator.py
new file mode 100644
index 000000000..65be5f4b4
--- /dev/null
+++ b/tests/util_support/test_date_disambiguator.py
@@ -0,0 +1,82 @@
+import datetime
+
+from augur.util_support import date_disambiguator
+from augur.util_support.date_disambiguator import DateDisambiguator
+
+from freezegun import freeze_time
+import pytest
+
+
+class TestDateDisambiguator:
+    @freeze_time("2111-05-05")
+    @pytest.mark.parametrize(
+        "date_str, expected_range",
+        [
+            ("2000-01-01", (datetime.date(2000, 1, 1), datetime.date(2000, 1, 1))),
+            ("2000-02-XX", (datetime.date(2000, 2, 1), datetime.date(2000, 2, 29))),
+            ("2000-XX-XX", (datetime.date(2000, 1, 1), datetime.date(2000, 12, 31))),
+        ],
+    )
+    def test_range(self, date_str, expected_range):
+        assert DateDisambiguator(date_str).range() == expected_range
+
+    @pytest.mark.parametrize(
+        "date_str, fmt",
+        [
+            ("2005-02-XX", "%Y-%m-%d"),
+            ("2005/02/XX", "%Y/%m/%d"),
+            ("2005-XX-02", "%Y-%d-%m"),
+            ("200502XX", "%Y%m%d"),
+        ],
+    )
+    def test_range_separators(self, date_str, fmt):
+        assert DateDisambiguator(date_str, fmt=fmt).range() == (
+            datetime.date(2005, 2, 1),
+            datetime.date(2005, 2, 28),
+        )
+
+    @pytest.mark.parametrize(
+        "date_str, expected_components",
+        [
+            ("2000-01-01", {"Y": "2000", "m": "01", "d": "01"}),
+            ("2000-01-XX", {"Y": "2000", "m": "01", "d": "XX"}),
+            ("2000-XX-XX", {"Y": "2000", "m": "XX", "d": "XX"}),
+        ],
+    )
+    def test_uncertain_date_components(self, date_str, expected_components):
+        assert (
+            DateDisambiguator(date_str).uncertain_date_components == expected_components
+        )
+
+    def test_uncertain_date_components_error(self):
+        with pytest.raises(ValueError, match="Malformed uncertain date"):
+            DateDisambiguator("5-5-5-5-5").uncertain_date_components
+
+    @pytest.mark.parametrize(
+        "date_str, min_or_max, expected",
+        [
+            ("2000", "min", 2000),
+            ("2000", "max", 2000),
+            ("200X", "min", 2000),
+            ("200X", "max", 2009),
+            ("20X0", "max", 2090),
+            ("X000", "max", 9000),
+            ("XXXX", "min", 1),
+            ("XXXX", "max", 9999),
+        ],
+    )
+    def test_resolve_uncertain_int(self, date_str, min_or_max, expected):
+        assert (
+            date_disambiguator.resolve_uncertain_int(date_str, min_or_max) == expected
+        )
+
+    @pytest.mark.parametrize(
+        "date_str, expected_error",
+        [
+            ("200X-01-01", "so month and day must also be uncertain"),
+            ("2000-XX-01", "so day must also be uncertain"),
+        ],
+    )
+    def test_assert_only_less_significant_uncertainty(self, date_str, expected_error):
+        with pytest.raises(ValueError, match=expected_error):
+            DateDisambiguator(date_str)